gnu: emacs-sly: Update to 20200228.
[jackhill/guix/guix.git] / gnu / packages / bioinformatics.scm
1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ricardo Wurmus <rekado@elephly.net>
3 ;;; Copyright © 2015, 2016, 2017, 2018 Ben Woodcroft <donttrustben@gmail.com>
4 ;;; Copyright © 2015, 2016 Pjotr Prins <pjotr.guix@thebird.nl>
5 ;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
6 ;;; Copyright © 2016 Roel Janssen <roel@gnu.org>
7 ;;; Copyright © 2016, 2017, 2018, 2019, 2020 Efraim Flashner <efraim@flashner.co.il>
8 ;;; Copyright © 2016, 2020 Marius Bakke <mbakke@fastmail.com>
9 ;;; Copyright © 2016, 2018 Raoul Bonnal <ilpuccio.febo@gmail.com>
10 ;;; Copyright © 2017, 2018 Tobias Geerinckx-Rice <me@tobias.gr>
11 ;;; Copyright © 2017 Arun Isaac <arunisaac@systemreboot.net>
12 ;;; Copyright © 2018 Joshua Sierles, Nextjournal <joshua@nextjournal.com>
13 ;;; Copyright © 2018 Gábor Boskovits <boskovits@gmail.com>
14 ;;; Copyright © 2018, 2019 Mădălin Ionel Patrașcu <madalinionel.patrascu@mdc-berlin.de>
15 ;;; Copyright © 2019 Maxim Cournoyer <maxim.cournoyer@gmail.com>
16 ;;; Copyright © 2019 Brian Leung <bkleung89@gmail.com>
17 ;;; Copyright © 2019 Brett Gilio <brettg@gnu.org>
18 ;;;
19 ;;; This file is part of GNU Guix.
20 ;;;
21 ;;; GNU Guix is free software; you can redistribute it and/or modify it
22 ;;; under the terms of the GNU General Public License as published by
23 ;;; the Free Software Foundation; either version 3 of the License, or (at
24 ;;; your option) any later version.
25 ;;;
26 ;;; GNU Guix is distributed in the hope that it will be useful, but
27 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
28 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29 ;;; GNU General Public License for more details.
30 ;;;
31 ;;; You should have received a copy of the GNU General Public License
32 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
33
34 (define-module (gnu packages bioinformatics)
35 #:use-module ((guix licenses) #:prefix license:)
36 #:use-module (guix packages)
37 #:use-module (guix utils)
38 #:use-module (guix download)
39 #:use-module (guix git-download)
40 #:use-module (guix hg-download)
41 #:use-module (guix build-system ant)
42 #:use-module (guix build-system gnu)
43 #:use-module (guix build-system cmake)
44 #:use-module (guix build-system haskell)
45 #:use-module (guix build-system meson)
46 #:use-module (guix build-system ocaml)
47 #:use-module (guix build-system perl)
48 #:use-module (guix build-system python)
49 #:use-module (guix build-system r)
50 #:use-module (guix build-system ruby)
51 #:use-module (guix build-system scons)
52 #:use-module (guix build-system trivial)
53 #:use-module (gnu packages)
54 #:use-module (gnu packages autotools)
55 #:use-module (gnu packages algebra)
56 #:use-module (gnu packages base)
57 #:use-module (gnu packages bash)
58 #:use-module (gnu packages bison)
59 #:use-module (gnu packages bioconductor)
60 #:use-module (gnu packages boost)
61 #:use-module (gnu packages check)
62 #:use-module (gnu packages code)
63 #:use-module (gnu packages compression)
64 #:use-module (gnu packages cpio)
65 #:use-module (gnu packages cran)
66 #:use-module (gnu packages curl)
67 #:use-module (gnu packages documentation)
68 #:use-module (gnu packages databases)
69 #:use-module (gnu packages datastructures)
70 #:use-module (gnu packages file)
71 #:use-module (gnu packages flex)
72 #:use-module (gnu packages gawk)
73 #:use-module (gnu packages gcc)
74 #:use-module (gnu packages gd)
75 #:use-module (gnu packages gtk)
76 #:use-module (gnu packages glib)
77 #:use-module (gnu packages graph)
78 #:use-module (gnu packages groff)
79 #:use-module (gnu packages guile)
80 #:use-module (gnu packages guile-xyz)
81 #:use-module (gnu packages haskell-check)
82 #:use-module (gnu packages haskell-web)
83 #:use-module (gnu packages haskell-xyz)
84 #:use-module (gnu packages image)
85 #:use-module (gnu packages imagemagick)
86 #:use-module (gnu packages java)
87 #:use-module (gnu packages java-compression)
88 #:use-module (gnu packages jemalloc)
89 #:use-module (gnu packages dlang)
90 #:use-module (gnu packages linux)
91 #:use-module (gnu packages lisp-xyz)
92 #:use-module (gnu packages logging)
93 #:use-module (gnu packages machine-learning)
94 #:use-module (gnu packages man)
95 #:use-module (gnu packages maths)
96 #:use-module (gnu packages mpi)
97 #:use-module (gnu packages ncurses)
98 #:use-module (gnu packages ocaml)
99 #:use-module (gnu packages pcre)
100 #:use-module (gnu packages parallel)
101 #:use-module (gnu packages pdf)
102 #:use-module (gnu packages perl)
103 #:use-module (gnu packages perl-check)
104 #:use-module (gnu packages pkg-config)
105 #:use-module (gnu packages popt)
106 #:use-module (gnu packages protobuf)
107 #:use-module (gnu packages python)
108 #:use-module (gnu packages python-compression)
109 #:use-module (gnu packages python-science)
110 #:use-module (gnu packages python-web)
111 #:use-module (gnu packages python-xyz)
112 #:use-module (gnu packages readline)
113 #:use-module (gnu packages ruby)
114 #:use-module (gnu packages serialization)
115 #:use-module (gnu packages shells)
116 #:use-module (gnu packages sphinx)
117 #:use-module (gnu packages statistics)
118 #:use-module (gnu packages swig)
119 #:use-module (gnu packages tbb)
120 #:use-module (gnu packages tex)
121 #:use-module (gnu packages texinfo)
122 #:use-module (gnu packages textutils)
123 #:use-module (gnu packages time)
124 #:use-module (gnu packages tls)
125 #:use-module (gnu packages vim)
126 #:use-module (gnu packages web)
127 #:use-module (gnu packages xml)
128 #:use-module (gnu packages xorg)
129 #:use-module (srfi srfi-1)
130 #:use-module (ice-9 match))
131
132 (define-public aragorn
133 (package
134 (name "aragorn")
135 (version "1.2.38")
136 (source (origin
137 (method url-fetch)
138 (uri (string-append
139 "http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
140 version ".tgz"))
141 (sha256
142 (base32
143 "09i1rg716smlbnixfm7q1ml2mfpaa2fpn3hwjg625ysmfwwy712b"))))
144 (build-system gnu-build-system)
145 (arguments
146 `(#:tests? #f ; there are no tests
147 #:phases
148 (modify-phases %standard-phases
149 (delete 'configure)
150 (replace 'build
151 (lambda _
152 (invoke "gcc"
153 "-O3"
154 "-ffast-math"
155 "-finline-functions"
156 "-o"
157 "aragorn"
158 (string-append "aragorn" ,version ".c"))
159 #t))
160 (replace 'install
161 (lambda* (#:key outputs #:allow-other-keys)
162 (let* ((out (assoc-ref outputs "out"))
163 (bin (string-append out "/bin"))
164 (man (string-append out "/share/man/man1")))
165 (install-file "aragorn" bin)
166 (install-file "aragorn.1" man))
167 #t)))))
168 (home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
169 (synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
170 (description
171 "Aragorn identifies transfer RNA, mitochondrial RNA and
172 transfer-messenger RNA from nucleotide sequences, based on homology to known
173 tRNA consensus sequences and RNA structure. It also outputs the secondary
174 structure of the predicted RNA.")
175 (license license:gpl2)))
176
177 (define-public bamm
178 (package
179 (name "bamm")
180 (version "1.7.3")
181 (source (origin
182 (method git-fetch)
183 ;; BamM is not available on pypi.
184 (uri (git-reference
185 (url "https://github.com/Ecogenomics/BamM.git")
186 (commit version)
187 (recursive? #t)))
188 (file-name (git-file-name name version))
189 (sha256
190 (base32
191 "1p83ahi984ipslxlg4yqy1gdnya9rkn1v71z8djgxkm9d2chw4c5"))
192 (modules '((guix build utils)))
193 (snippet
194 `(begin
195 ;; Delete bundled htslib.
196 (delete-file-recursively "c/htslib-1.3.1")
197 #t))))
198 (build-system python-build-system)
199 (arguments
200 `(#:python ,python-2 ; BamM is Python 2 only.
201 ;; Do not use bundled libhts. Do use the bundled libcfu because it has
202 ;; been modified from its original form.
203 #:configure-flags
204 (let ((htslib (assoc-ref %build-inputs "htslib")))
205 (list "--with-libhts-lib" (string-append htslib "/lib")
206 "--with-libhts-inc" (string-append htslib "/include/htslib")))
207 #:phases
208 (modify-phases %standard-phases
209 (add-after 'unpack 'autogen
210 (lambda _
211 (with-directory-excursion "c"
212 (let ((sh (which "sh")))
213 (for-each make-file-writable (find-files "." ".*"))
214 ;; Use autogen so that 'configure' works.
215 (substitute* "autogen.sh" (("/bin/sh") sh))
216 (setenv "CONFIG_SHELL" sh)
217 (invoke "./autogen.sh")))
218 #t))
219 (delete 'build)
220 ;; Run tests after installation so compilation only happens once.
221 (delete 'check)
222 (add-after 'install 'wrap-executable
223 (lambda* (#:key outputs #:allow-other-keys)
224 (let* ((out (assoc-ref outputs "out"))
225 (path (getenv "PATH")))
226 (wrap-program (string-append out "/bin/bamm")
227 `("PATH" ":" prefix (,path))))
228 #t))
229 (add-after 'wrap-executable 'post-install-check
230 (lambda* (#:key inputs outputs #:allow-other-keys)
231 (setenv "PATH"
232 (string-append (assoc-ref outputs "out")
233 "/bin:"
234 (getenv "PATH")))
235 (setenv "PYTHONPATH"
236 (string-append
237 (assoc-ref outputs "out")
238 "/lib/python"
239 (string-take (string-take-right
240 (assoc-ref inputs "python") 5) 3)
241 "/site-packages:"
242 (getenv "PYTHONPATH")))
243 ;; There are 2 errors printed, but they are safe to ignore:
244 ;; 1) [E::hts_open_format] fail to open file ...
245 ;; 2) samtools view: failed to open ...
246 (invoke "nosetests")
247 #t)))))
248 (native-inputs
249 `(("autoconf" ,autoconf)
250 ("automake" ,automake)
251 ("libtool" ,libtool)
252 ("zlib" ,zlib)
253 ("python-nose" ,python2-nose)
254 ("python-pysam" ,python2-pysam)))
255 (inputs
256 `(("htslib" ,htslib-1.3) ; At least one test fails on htslib-1.4+.
257 ("samtools" ,samtools)
258 ("bwa" ,bwa)
259 ("grep" ,grep)
260 ("sed" ,sed)
261 ("coreutils" ,coreutils)))
262 (propagated-inputs
263 `(("python-numpy" ,python2-numpy)))
264 (home-page "https://ecogenomics.github.io/BamM/")
265 (synopsis "Metagenomics-focused BAM file manipulator")
266 (description
267 "BamM is a C library, wrapped in python, to efficiently generate and
268 parse BAM files, specifically for the analysis of metagenomic data. For
269 instance, it implements several methods to assess contig-wise read coverage.")
270 (license license:lgpl3+)))
271
272 (define-public bamtools
273 (package
274 (name "bamtools")
275 (version "2.5.1")
276 (source (origin
277 (method git-fetch)
278 (uri (git-reference
279 (url "https://github.com/pezmaster31/bamtools.git")
280 (commit (string-append "v" version))))
281 (file-name (git-file-name name version))
282 (sha256
283 (base32
284 "0nfb2ypcx9959xnbz6wxh6py3xfizgmg8nrknxl95c507m9hmq8b"))))
285 (build-system cmake-build-system)
286 (arguments
287 `(#:tests? #f ;no "check" target
288 #:phases
289 (modify-phases %standard-phases
290 (add-before
291 'configure 'set-ldflags
292 (lambda* (#:key outputs #:allow-other-keys)
293 (setenv "LDFLAGS"
294 (string-append
295 "-Wl,-rpath="
296 (assoc-ref outputs "out") "/lib/bamtools"))
297 #t)))))
298 (inputs `(("zlib" ,zlib)))
299 (home-page "https://github.com/pezmaster31/bamtools")
300 (synopsis "C++ API and command-line toolkit for working with BAM data")
301 (description
302 "BamTools provides both a C++ API and a command-line toolkit for handling
303 BAM files.")
304 (license license:expat)))
305
306 (define-public bcftools
307 (package
308 (name "bcftools")
309 (version "1.9")
310 (source (origin
311 (method url-fetch)
312 (uri (string-append "https://github.com/samtools/bcftools/"
313 "releases/download/"
314 version "/bcftools-" version ".tar.bz2"))
315 (sha256
316 (base32
317 "1j3h638i8kgihzyrlnpj82xg1b23sijibys9hvwari3fy7kd0dkg"))
318 (modules '((guix build utils)))
319 (snippet '(begin
320 ;; Delete bundled htslib.
321 (delete-file-recursively "htslib-1.9")
322 #t))))
323 (build-system gnu-build-system)
324 (arguments
325 `(#:configure-flags
326 (list "--enable-libgsl")
327 #:test-target "test"
328 #:phases
329 (modify-phases %standard-phases
330 (add-before 'check 'patch-tests
331 (lambda _
332 (substitute* "test/test.pl"
333 (("/bin/bash") (which "bash")))
334 #t)))))
335 (native-inputs
336 `(("htslib" ,htslib)
337 ("perl" ,perl)))
338 (inputs
339 `(("gsl" ,gsl)
340 ("zlib" ,zlib)))
341 (home-page "https://samtools.github.io/bcftools/")
342 (synopsis "Utilities for variant calling and manipulating VCFs and BCFs")
343 (description
344 "BCFtools is a set of utilities that manipulate variant calls in the
345 Variant Call Format (VCF) and its binary counterpart BCF. All commands work
346 transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed.")
347 ;; The sources are dual MIT/GPL, but becomes GPL-only when USE_GPL=1.
348 (license (list license:gpl3+ license:expat))))
349
350 (define-public bedops
351 (package
352 (name "bedops")
353 (version "2.4.35")
354 (source (origin
355 (method git-fetch)
356 (uri (git-reference
357 (url "https://github.com/bedops/bedops.git")
358 (commit (string-append "v" version))))
359 (file-name (git-file-name name version))
360 (sha256
361 (base32
362 "0mmgsgwz5r9w76hzgxkxc9s9lkdhhaf7vr6i02b09vbswvs1fyqx"))))
363 (build-system gnu-build-system)
364 (arguments
365 '(#:tests? #f
366 #:make-flags (list (string-append "BINDIR=" %output "/bin"))
367 #:phases
368 (modify-phases %standard-phases
369 (add-after 'unpack 'unpack-tarballs
370 (lambda _
371 ;; FIXME: Bedops includes tarballs of minimally patched upstream
372 ;; libraries jansson, zlib, and bzip2. We cannot just use stock
373 ;; libraries because at least one of the libraries (zlib) is
374 ;; patched to add a C++ function definition (deflateInit2cpp).
375 ;; Until the Bedops developers offer a way to link against system
376 ;; libraries we have to build the in-tree copies of these three
377 ;; libraries.
378
379 ;; See upstream discussion:
380 ;; https://github.com/bedops/bedops/issues/124
381
382 ;; Unpack the tarballs to benefit from shebang patching.
383 (with-directory-excursion "third-party"
384 (invoke "tar" "xvf" "jansson-2.6.tar.bz2")
385 (invoke "tar" "xvf" "zlib-1.2.7.tar.bz2")
386 (invoke "tar" "xvf" "bzip2-1.0.6.tar.bz2"))
387 ;; Disable unpacking of tarballs in Makefile.
388 (substitute* "system.mk/Makefile.linux"
389 (("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
390 (("\\./configure") "CONFIG_SHELL=bash ./configure"))
391 (substitute* "third-party/zlib-1.2.7/Makefile.in"
392 (("^SHELL=.*$") "SHELL=bash\n"))
393 #t))
394 (delete 'configure))))
395 (home-page "https://github.com/bedops/bedops")
396 (synopsis "Tools for high-performance genomic feature operations")
397 (description
398 "BEDOPS is a suite of tools to address common questions raised in genomic
399 studies---mostly with regard to overlap and proximity relationships between
400 data sets. It aims to be scalable and flexible, facilitating the efficient
401 and accurate analysis and management of large-scale genomic data.
402
403 BEDOPS provides tools that perform highly efficient and scalable Boolean and
404 other set operations, statistical calculations, archiving, conversion and
405 other management of genomic data of arbitrary scale. Tasks can be easily
406 split by chromosome for distributing whole-genome analyses across a
407 computational cluster.")
408 (license license:gpl2+)))
409
410 (define-public bedtools
411 (package
412 (name "bedtools")
413 (version "2.29.2")
414 (source (origin
415 (method url-fetch)
416 (uri (string-append "https://github.com/arq5x/bedtools2/releases/"
417 "download/v" version "/"
418 "bedtools-" version ".tar.gz"))
419 (sha256
420 (base32
421 "0m3hk6548846w83a9s5drsczvy67n2azx41kj71n03klb2gbzwg3"))))
422 (build-system gnu-build-system)
423 (arguments
424 '(#:test-target "test"
425 #:make-flags
426 (list (string-append "prefix=" (assoc-ref %outputs "out")))
427 #:phases
428 (modify-phases %standard-phases
429 (delete 'configure))))
430 (native-inputs
431 `(("python" ,python-wrapper)))
432 (inputs
433 `(("samtools" ,samtools)
434 ("zlib" ,zlib)))
435 (home-page "https://github.com/arq5x/bedtools2")
436 (synopsis "Tools for genome analysis and arithmetic")
437 (description
438 "Collectively, the bedtools utilities are a swiss-army knife of tools for
439 a wide-range of genomics analysis tasks. The most widely-used tools enable
440 genome arithmetic: that is, set theory on the genome. For example, bedtools
441 allows one to intersect, merge, count, complement, and shuffle genomic
442 intervals from multiple files in widely-used genomic file formats such as BAM,
443 BED, GFF/GTF, VCF.")
444 (license license:expat)))
445
446 ;; Later releases of bedtools produce files with more columns than
447 ;; what Ribotaper expects.
448 (define-public bedtools-2.18
449 (package (inherit bedtools)
450 (name "bedtools")
451 (version "2.18.0")
452 (source (origin
453 (method url-fetch)
454 (uri (string-append "https://github.com/arq5x/bedtools2/"
455 "releases/download/v" version
456 "/bedtools-" version ".tar.gz"))
457 (sha256
458 (base32
459 "11rvca19ncg03kxd0wzlfx5ws7r3nisd0z8s9j9n182d8ksp2pxz"))))
460 (arguments
461 '(#:test-target "test"
462 #:phases
463 (modify-phases %standard-phases
464 (delete 'configure)
465 (replace 'install
466 (lambda* (#:key outputs #:allow-other-keys)
467 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
468 (for-each (lambda (file)
469 (install-file file bin))
470 (find-files "bin" ".*")))
471 #t)))))))
472
473 ;; Needed for pybedtools.
474 (define-public bedtools-2.26
475 (package (inherit bedtools)
476 (name "bedtools")
477 (version "2.26.0")
478 (source (origin
479 (method url-fetch)
480 (uri (string-append "https://github.com/arq5x/bedtools2/releases/"
481 "download/v" version "/"
482 "bedtools-" version ".tar.gz"))
483 (sha256
484 (base32
485 "0jhavwifnf7lmkb11h9y7dynr8d699h0rd2l52j1pfgircr2zwv5"))))))
486
487 (define-public pbbam
488 (package
489 (name "pbbam")
490 (version "0.23.0")
491 (source (origin
492 (method git-fetch)
493 (uri (git-reference
494 (url "https://github.com/PacificBiosciences/pbbam.git")
495 (commit version)))
496 (file-name (git-file-name name version))
497 (sha256
498 (base32
499 "0h9gkrpf2lrxklxp72xfl5bi3h5zcm5hprrya9gf0hr3xwlbpp0x"))))
500 (build-system meson-build-system)
501 (arguments
502 `(#:phases
503 (modify-phases %standard-phases
504 (add-after 'unpack 'find-googletest
505 (lambda* (#:key inputs #:allow-other-keys)
506 ;; It doesn't find gtest_main because there's no pkg-config file
507 ;; for it. Find it another way.
508 (substitute* "tests/meson.build"
509 (("pbbam_gtest_dep = dependency\\('gtest_main'.*")
510 (format #f "cpp = meson.get_compiler('cpp')
511 pbbam_gtest_dep = cpp.find_library('gtest_main', dirs : '~a')\n"
512 (assoc-ref inputs "googletest"))))
513 #t)))
514 ;; TODO: tests/pbbam_test cannot be linked
515 ;; ld: tests/59830eb@@pbbam_test@exe/src_test_Accuracy.cpp.o:
516 ;; undefined reference to symbol '_ZTIN7testing4TestE'
517 ;; ld: /gnu/store/...-googletest-1.8.0/lib/libgtest.so:
518 ;; error adding symbols: DSO missing from command line
519 #:tests? #f
520 #:configure-flags '("-Dtests=false")))
521 ;; These libraries are listed as "Required" in the pkg-config file.
522 (propagated-inputs
523 `(("htslib" ,htslib)
524 ("zlib" ,zlib)))
525 (inputs
526 `(("boost" ,boost)
527 ("samtools" ,samtools)))
528 (native-inputs
529 `(("googletest" ,googletest)
530 ("pkg-config" ,pkg-config)
531 ("python" ,python-wrapper))) ; for tests
532 (home-page "https://github.com/PacificBiosciences/pbbam")
533 (synopsis "Work with PacBio BAM files")
534 (description
535 "The pbbam software package provides components to create, query, and
536 edit PacBio BAM files and associated indices. These components include a core
537 C++ library, bindings for additional languages, and command-line utilities.
538 This library is not intended to be used as a general-purpose BAM utility - all
539 input and output BAMs must adhere to the PacBio BAM format specification.
540 Non-PacBio BAMs will cause exceptions to be thrown.")
541 (license license:bsd-3)))
542
543 (define-public blasr-libcpp
544 (package
545 (name "blasr-libcpp")
546 (version "5.3.3")
547 (source (origin
548 (method git-fetch)
549 (uri (git-reference
550 (url "https://github.com/PacificBiosciences/blasr_libcpp.git")
551 (commit version)))
552 (file-name (git-file-name name version))
553 (sha256
554 (base32
555 "0cn5l42zyq67sj0g2imqkhayz2iqvv0a1pgpbmlq0qynjmsrbfd2"))))
556 (build-system meson-build-system)
557 (arguments
558 `(#:phases
559 (modify-phases %standard-phases
560 (add-after 'unpack 'link-with-hdf5
561 (lambda* (#:key inputs #:allow-other-keys)
562 (let ((hdf5 (assoc-ref inputs "hdf5")))
563 (substitute* "meson.build"
564 (("libblasr_deps = \\[" m)
565 (string-append
566 m
567 (format #f "cpp.find_library('hdf5', dirs : '~a'), \
568 cpp.find_library('hdf5_cpp', dirs : '~a'), "
569 hdf5 hdf5)))))
570 #t))
571 (add-after 'unpack 'find-googletest
572 (lambda* (#:key inputs #:allow-other-keys)
573 ;; It doesn't find gtest_main because there's no pkg-config file
574 ;; for it. Find it another way.
575 (substitute* "unittest/meson.build"
576 (("libblasr_gtest_dep = dependency\\('gtest_main'.*")
577 (format #f "cpp = meson.get_compiler('cpp')
578 libblasr_gtest_dep = cpp.find_library('gtest_main', dirs : '~a')\n"
579 (assoc-ref inputs "googletest"))))
580 #t)))
581 ;; TODO: unittest/libblasr_unittest cannot be linked
582 ;; ld: ;; unittest/df08227@@libblasr_unittest@exe/alignment_utils_FileUtils_gtest.cpp.o:
583 ;; undefined reference to symbol
584 ;; '_ZN7testing8internal9DeathTest6CreateEPKcPKNS0_2REES3_iPPS1_'
585 ;; ld: /gnu/store/...-googletest-1.8.0/lib/libgtest.so:
586 ;; error adding symbols: DSO missing from command line
587 #:tests? #f
588 #:configure-flags '("-Dtests=false")))
589 (inputs
590 `(("boost" ,boost)
591 ("hdf5" ,hdf5)
592 ("pbbam" ,pbbam)
593 ("zlib" ,zlib)))
594 (native-inputs
595 `(("googletest" ,googletest)
596 ("pkg-config" ,pkg-config)))
597 (home-page "https://github.com/PacificBiosciences/blasr_libcpp")
598 (synopsis "Library for analyzing PacBio genomic sequences")
599 (description
600 "This package provides three libraries used by applications for analyzing
601 PacBio genomic sequences. This library contains three sub-libraries: pbdata,
602 hdf and alignment.")
603 (license license:bsd-3)))
604
605 (define-public blasr
606 (package
607 (name "blasr")
608 (version "5.3.3")
609 (source (origin
610 (method git-fetch)
611 (uri (git-reference
612 (url "https://github.com/PacificBiosciences/blasr.git")
613 (commit version)))
614 (file-name (git-file-name name version))
615 (sha256
616 (base32
617 "1skgy2mvz8gsgfh1gc2nfgwvpyzb1hpmp2cf2773h5wsj8nw22kl"))))
618 (build-system meson-build-system)
619 (arguments
620 `(#:phases
621 (modify-phases %standard-phases
622 (add-after 'unpack 'link-with-hdf5
623 (lambda* (#:key inputs #:allow-other-keys)
624 (let ((hdf5 (assoc-ref inputs "hdf5")))
625 (substitute* "meson.build"
626 (("blasr_deps = \\[" m)
627 (string-append
628 m
629 (format #f "cpp.find_library('hdf5', dirs : '~a'), \
630 cpp.find_library('hdf5_cpp', dirs : '~a'), "
631 hdf5 hdf5)))))
632 #t)))
633 ;; Tests require "cram" executable, which is not packaged.
634 #:tests? #f
635 #:configure-flags '("-Dtests=false")))
636 (inputs
637 `(("boost" ,boost)
638 ("blasr-libcpp" ,blasr-libcpp)
639 ("hdf5" ,hdf5)
640 ("pbbam" ,pbbam)
641 ("zlib" ,zlib)))
642 (native-inputs
643 `(("pkg-config" ,pkg-config)))
644 (home-page "https://github.com/PacificBiosciences/blasr")
645 (synopsis "PacBio long read aligner")
646 (description
647 "Blasr is a genomic sequence aligner for processing PacBio long reads.")
648 (license license:bsd-3)))
649
650 (define-public ribotaper
651 (package
652 (name "ribotaper")
653 (version "1.3.1")
654 (source (origin
655 (method url-fetch)
656 (uri (string-append "https://ohlerlab.mdc-berlin.de/"
657 "files/RiboTaper/RiboTaper_Version_"
658 version ".tar.gz"))
659 (sha256
660 (base32
661 "0ykjbps1y3z3085q94npw8i9x5gldc6shy8vlc08v76zljsm07hv"))))
662 (build-system gnu-build-system)
663 (arguments
664 `(#:phases
665 (modify-phases %standard-phases
666 (add-after 'install 'wrap-executables
667 (lambda* (#:key inputs outputs #:allow-other-keys)
668 (let* ((out (assoc-ref outputs "out")))
669 (for-each
670 (lambda (script)
671 (wrap-program (string-append out "/bin/" script)
672 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
673 '("create_annotations_files.bash"
674 "create_metaplots.bash"
675 "Ribotaper_ORF_find.sh"
676 "Ribotaper.sh")))
677 #t)))))
678 (inputs
679 `(("bedtools" ,bedtools-2.18)
680 ("samtools" ,samtools-0.1)
681 ("r-minimal" ,r-minimal)
682 ("r-foreach" ,r-foreach)
683 ("r-xnomial" ,r-xnomial)
684 ("r-domc" ,r-domc)
685 ("r-multitaper" ,r-multitaper)
686 ("r-seqinr" ,r-seqinr)))
687 (home-page "https://ohlerlab.mdc-berlin.de/software/RiboTaper_126/")
688 (synopsis "Define translated ORFs using ribosome profiling data")
689 (description
690 "Ribotaper is a method for defining translated @dfn{open reading
691 frames} (ORFs) using ribosome profiling (ribo-seq) data. This package
692 provides the Ribotaper pipeline.")
693 (license license:gpl3+)))
694
695 (define-public ribodiff
696 (package
697 (name "ribodiff")
698 (version "0.2.2")
699 (source
700 (origin
701 (method git-fetch)
702 (uri (git-reference
703 (url "https://github.com/ratschlab/RiboDiff.git")
704 (commit (string-append "v" version))))
705 (file-name (git-file-name name version))
706 (sha256
707 (base32
708 "0x75nlp7qnmm64jasbi6l21f2cy99r2cjyl6b4hr8zf2bq22drnz"))))
709 (build-system python-build-system)
710 (arguments
711 `(#:python ,python-2
712 #:phases
713 (modify-phases %standard-phases
714 ;; Generate an installable executable script wrapper.
715 (add-after 'unpack 'patch-setup.py
716 (lambda _
717 (substitute* "setup.py"
718 (("^(.*)packages=.*" line prefix)
719 (string-append line "\n"
720 prefix "scripts=['scripts/TE.py'],\n")))
721 #t)))))
722 (inputs
723 `(("python-numpy" ,python2-numpy)
724 ("python-matplotlib" ,python2-matplotlib)
725 ("python-scipy" ,python2-scipy)
726 ("python-statsmodels" ,python2-statsmodels)))
727 (native-inputs
728 `(("python-mock" ,python2-mock)
729 ("python-nose" ,python2-nose)))
730 (home-page "https://public.bmi.inf.ethz.ch/user/zhongy/RiboDiff/")
731 (synopsis "Detect translation efficiency changes from ribosome footprints")
732 (description "RiboDiff is a statistical tool that detects the protein
733 translational efficiency change from Ribo-Seq (ribosome footprinting) and
734 RNA-Seq data. It uses a generalized linear model to detect genes showing
735 difference in translational profile taking mRNA abundance into account. It
736 facilitates us to decipher the translational regulation that behave
737 independently with transcriptional regulation.")
738 (license license:gpl3+)))
739
740 (define-public bioawk
741 (package
742 (name "bioawk")
743 (version "1.0")
744 (source (origin
745 (method git-fetch)
746 (uri (git-reference
747 (url "https://github.com/lh3/bioawk.git")
748 (commit (string-append "v" version))))
749 (file-name (git-file-name name version))
750 (sha256
751 (base32
752 "1pxc3zdnirxbf9a0az698hd8xdik7qkhypm7v6hn922x8y9qmspm"))))
753 (build-system gnu-build-system)
754 (inputs
755 `(("zlib" ,zlib)))
756 (native-inputs
757 `(("bison" ,bison)))
758 (arguments
759 `(#:tests? #f ; There are no tests to run.
760 ;; Bison must generate files, before other targets can build.
761 #:parallel-build? #f
762 #:phases
763 (modify-phases %standard-phases
764 (delete 'configure) ; There is no configure phase.
765 (replace 'install
766 (lambda* (#:key outputs #:allow-other-keys)
767 (let* ((out (assoc-ref outputs "out"))
768 (bin (string-append out "/bin"))
769 (man (string-append out "/share/man/man1")))
770 (mkdir-p man)
771 (copy-file "awk.1" (string-append man "/bioawk.1"))
772 (install-file "bioawk" bin))
773 #t)))))
774 (home-page "https://github.com/lh3/bioawk")
775 (synopsis "AWK with bioinformatics extensions")
776 (description "Bioawk is an extension to Brian Kernighan's awk, adding the
777 support of several common biological data formats, including optionally gzip'ed
778 BED, GFF, SAM, VCF, FASTA/Q and TAB-delimited formats with column names. It
779 also adds a few built-in functions and a command line option to use TAB as the
780 input/output delimiter. When the new functionality is not used, bioawk is
781 intended to behave exactly the same as the original BWK awk.")
782 (license license:x11)))
783
784 (define-public python-pybedtools
785 (package
786 (name "python-pybedtools")
787 (version "0.8.0")
788 (source (origin
789 (method url-fetch)
790 (uri (pypi-uri "pybedtools" version))
791 (sha256
792 (base32
793 "1xl454ijvd4dzfvqgfahad49b49j7qy710fq9xh1rvk42z6x5ssf"))))
794 (build-system python-build-system)
795 (arguments
796 `(#:modules ((ice-9 ftw)
797 (srfi srfi-1)
798 (srfi srfi-26)
799 (guix build utils)
800 (guix build python-build-system))
801 ;; See https://github.com/daler/pybedtools/issues/192
802 #:phases
803 (modify-phases %standard-phases
804 ;; See https://github.com/daler/pybedtools/issues/261
805 (add-after 'unpack 'disable-broken-tests
806 (lambda _
807 ;; This test (pybedtools.test.test_scripts.test_venn_mpl) needs a
808 ;; graphical environment.
809 (substitute* "pybedtools/test/test_scripts.py"
810 (("def test_venn_mpl")
811 "def _do_not_test_venn_mpl"))
812 (substitute* "pybedtools/test/test_helpers.py"
813 ;; Requires internet access.
814 (("def test_chromsizes")
815 "def _do_not_test_chromsizes")
816 ;; Broken as a result of the workaround used in the check phase
817 ;; (see: https://github.com/daler/pybedtools/issues/192).
818 (("def test_getting_example_beds")
819 "def _do_not_test_getting_example_beds"))
820 #t))
821 ;; TODO: Remove phase after it's part of PYTHON-BUILD-SYSTEM.
822 ;; build system.
823 ;; Force the Cythonization of C++ files to guard against compilation
824 ;; problems.
825 (add-after 'unpack 'remove-cython-generated-files
826 (lambda _
827 (let ((cython-sources (map (cut string-drop-right <> 4)
828 (find-files "." "\\.pyx$")))
829 (c/c++-files (find-files "." "\\.(c|cpp|cxx)$")))
830 (define (strip-extension filename)
831 (string-take filename (string-index-right filename #\.)))
832 (define (cythonized? c/c++-file)
833 (member (strip-extension c/c++-file) cython-sources))
834 (for-each delete-file (filter cythonized? c/c++-files))
835 #t)))
836 (add-after 'remove-cython-generated-files 'generate-cython-extensions
837 (lambda _
838 (invoke "python" "setup.py" "cythonize")))
839 (replace 'check
840 (lambda _
841 (let* ((cwd (getcwd))
842 (build-root-directory (string-append cwd "/build/"))
843 (build (string-append
844 build-root-directory
845 (find (cut string-prefix? "lib" <>)
846 (scandir (string-append
847 build-root-directory)))))
848 (scripts (string-append
849 build-root-directory
850 (find (cut string-prefix? "scripts" <>)
851 (scandir build-root-directory)))))
852 (setenv "PYTHONPATH"
853 (string-append build ":" (getenv "PYTHONPATH")))
854 ;; Executable scripts such as 'intron_exon_reads.py' must be
855 ;; available in the PATH.
856 (setenv "PATH"
857 (string-append scripts ":" (getenv "PATH"))))
858 ;; The tests need to be run from elsewhere...
859 (mkdir-p "/tmp/test")
860 (copy-recursively "pybedtools/test" "/tmp/test")
861 (with-directory-excursion "/tmp/test"
862 (invoke "pytest")))))))
863 (propagated-inputs
864 `(("bedtools" ,bedtools)
865 ("samtools" ,samtools)
866 ("python-matplotlib" ,python-matplotlib)
867 ("python-pysam" ,python-pysam)
868 ("python-pyyaml" ,python-pyyaml)))
869 (native-inputs
870 `(("python-numpy" ,python-numpy)
871 ("python-pandas" ,python-pandas)
872 ("python-cython" ,python-cython)
873 ("kentutils" ,kentutils) ; for bedGraphToBigWig
874 ("python-six" ,python-six)
875 ;; For the test suite.
876 ("python-pytest" ,python-pytest)
877 ("python-psutil" ,python-psutil)))
878 (home-page "https://pythonhosted.org/pybedtools/")
879 (synopsis "Python wrapper for BEDtools programs")
880 (description
881 "pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
882 which are widely used for genomic interval manipulation or \"genome algebra\".
883 pybedtools extends BEDTools by offering feature-level manipulations from with
884 Python.")
885 (license license:gpl2+)))
886
887 (define-public python2-pybedtools
888 (package-with-python2 python-pybedtools))
889
890 (define-public python-biom-format
891 (package
892 (name "python-biom-format")
893 (version "2.1.7")
894 (source
895 (origin
896 (method git-fetch)
897 ;; Use GitHub as source because PyPI distribution does not contain
898 ;; test data: https://github.com/biocore/biom-format/issues/693
899 (uri (git-reference
900 (url "https://github.com/biocore/biom-format.git")
901 (commit version)))
902 (file-name (git-file-name name version))
903 (sha256
904 (base32
905 "1rna16lyk5aqhnv0dp77wwaplias93f1vw28ad3jmyw6hwkai05v"))
906 (modules '((guix build utils)))
907 (snippet '(begin
908 ;; Delete generated C files.
909 (for-each delete-file (find-files "." "\\.c"))
910 #t))))
911 (build-system python-build-system)
912 (arguments
913 `(#:phases
914 (modify-phases %standard-phases
915 (add-after 'unpack 'use-cython
916 (lambda _ (setenv "USE_CYTHON" "1") #t))
917 (add-after 'unpack 'disable-broken-tests
918 (lambda _
919 (substitute* "biom/tests/test_cli/test_validate_table.py"
920 (("^(.+)def test_invalid_hdf5" m indent)
921 (string-append indent
922 "@npt.dec.skipif(True, msg='Guix')\n"
923 m)))
924 (substitute* "biom/tests/test_table.py"
925 (("^(.+)def test_from_hdf5_issue_731" m indent)
926 (string-append indent
927 "@npt.dec.skipif(True, msg='Guix')\n"
928 m)))
929 #t))
930 (add-before 'reset-gzip-timestamps 'make-files-writable
931 (lambda* (#:key outputs #:allow-other-keys)
932 (let ((out (assoc-ref outputs "out")))
933 (for-each (lambda (file) (chmod file #o644))
934 (find-files out "\\.gz"))
935 #t))))))
936 (propagated-inputs
937 `(("python-numpy" ,python-numpy)
938 ("python-scipy" ,python-scipy)
939 ("python-flake8" ,python-flake8)
940 ("python-future" ,python-future)
941 ("python-click" ,python-click)
942 ("python-h5py" ,python-h5py)
943 ("python-pandas" ,python-pandas)))
944 (native-inputs
945 `(("python-cython" ,python-cython)
946 ("python-pytest" ,python-pytest)
947 ("python-pytest-cov" ,python-pytest-cov)
948 ("python-nose" ,python-nose)))
949 (home-page "http://www.biom-format.org")
950 (synopsis "Biological Observation Matrix (BIOM) format utilities")
951 (description
952 "The BIOM file format is designed to be a general-use format for
953 representing counts of observations e.g. operational taxonomic units, KEGG
954 orthology groups or lipid types, in one or more biological samples
955 e.g. microbiome samples, genomes, metagenomes.")
956 (license license:bsd-3)
957 (properties `((python2-variant . ,(delay python2-biom-format))))))
958
959 (define-public python2-biom-format
960 (let ((base (package-with-python2 (strip-python2-variant python-biom-format))))
961 (package
962 (inherit base)
963 (arguments
964 (substitute-keyword-arguments (package-arguments base)
965 ((#:phases phases)
966 `(modify-phases ,phases
967 ;; Do not require the unmaintained pyqi library.
968 (add-after 'unpack 'remove-pyqi
969 (lambda _
970 (substitute* "setup.py"
971 (("install_requires.append\\(\"pyqi\"\\)") "pass"))
972 #t)))))))))
973
974 (define-public bioperl-minimal
975 (let* ((inputs `(("perl-module-build" ,perl-module-build)
976 ("perl-data-stag" ,perl-data-stag)
977 ("perl-libwww" ,perl-libwww)
978 ("perl-uri" ,perl-uri)))
979 (transitive-inputs
980 (map (compose package-name cadr)
981 (delete-duplicates
982 (concatenate
983 (map (compose package-transitive-target-inputs cadr) inputs))))))
984 (package
985 (name "bioperl-minimal")
986 (version "1.7.0")
987 (source
988 (origin
989 (method git-fetch)
990 (uri (git-reference
991 (url "https://github.com/bioperl/bioperl-live")
992 (commit (string-append "release-"
993 (string-map (lambda (c)
994 (if (char=? c #\.)
995 #\- c)) version)))))
996 (file-name (git-file-name name version))
997 (sha256
998 (base32
999 "0wl8yvzcls59pwwk6m8ahy87pwg6nnibzy5cldbvmcwg2x2w7783"))))
1000 (build-system perl-build-system)
1001 (arguments
1002 `(#:phases
1003 (modify-phases %standard-phases
1004 (add-after
1005 'install 'wrap-programs
1006 (lambda* (#:key outputs #:allow-other-keys)
1007 ;; Make sure all executables in "bin" find the required Perl
1008 ;; modules at runtime. As the PERL5LIB variable contains also
1009 ;; the paths of native inputs, we pick the transitive target
1010 ;; inputs from %build-inputs.
1011 (let* ((out (assoc-ref outputs "out"))
1012 (bin (string-append out "/bin/"))
1013 (path (string-join
1014 (cons (string-append out "/lib/perl5/site_perl")
1015 (map (lambda (name)
1016 (assoc-ref %build-inputs name))
1017 ',transitive-inputs))
1018 ":")))
1019 (for-each (lambda (file)
1020 (wrap-program file
1021 `("PERL5LIB" ":" prefix (,path))))
1022 (find-files bin "\\.pl$"))
1023 #t))))))
1024 (inputs inputs)
1025 (native-inputs
1026 `(("perl-test-most" ,perl-test-most)))
1027 (home-page "https://metacpan.org/release/BioPerl")
1028 (synopsis "Bioinformatics toolkit")
1029 (description
1030 "BioPerl is the product of a community effort to produce Perl code which
1031 is useful in biology. Examples include Sequence objects, Alignment objects
1032 and database searching objects. These objects not only do what they are
1033 advertised to do in the documentation, but they also interact - Alignment
1034 objects are made from the Sequence objects, Sequence objects have access to
1035 Annotation and SeqFeature objects and databases, Blast objects can be
1036 converted to Alignment objects, and so on. This means that the objects
1037 provide a coordinated and extensible framework to do computational biology.")
1038 (license license:perl-license))))
1039
1040 (define-public python-biopython
1041 (package
1042 (name "python-biopython")
1043 (version "1.70")
1044 (source (origin
1045 (method url-fetch)
1046 ;; use PyPi rather than biopython.org to ease updating
1047 (uri (pypi-uri "biopython" version))
1048 (sha256
1049 (base32
1050 "0nz4n9d2y2dg849gn1z0vjlkwcpzzkzy3fij7x94a6ixy2c54z2a"))))
1051 (build-system python-build-system)
1052 (arguments
1053 `(#:phases
1054 (modify-phases %standard-phases
1055 (add-before 'check 'set-home
1056 ;; Some tests require a home directory to be set.
1057 (lambda _ (setenv "HOME" "/tmp") #t)))))
1058 (propagated-inputs
1059 `(("python-numpy" ,python-numpy)))
1060 (home-page "https://biopython.org/")
1061 (synopsis "Tools for biological computation in Python")
1062 (description
1063 "Biopython is a set of tools for biological computation including parsers
1064 for bioinformatics files into Python data structures; interfaces to common
1065 bioinformatics programs; a standard sequence class and tools for performing
1066 common operations on them; code to perform data classification; code for
1067 dealing with alignments; code making it easy to split up parallelizable tasks
1068 into separate processes; and more.")
1069 (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
1070
1071 (define-public python2-biopython
1072 (package-with-python2 python-biopython))
1073
1074 (define-public python-fastalite
1075 (package
1076 (name "python-fastalite")
1077 (version "0.3")
1078 (source
1079 (origin
1080 (method url-fetch)
1081 (uri (pypi-uri "fastalite" version))
1082 (sha256
1083 (base32
1084 "1qli6pxp77i9xn2wfciq2zaxhl82bdxb33cpzqzj1z25yd036wqj"))))
1085 (build-system python-build-system)
1086 (arguments
1087 `(#:tests? #f)) ; Test data is not distributed.
1088 (home-page "https://github.com/nhoffman/fastalite")
1089 (synopsis "Simplest possible FASTA parser")
1090 (description "This library implements a FASTA and a FASTQ parser without
1091 relying on a complex dependency tree.")
1092 (license license:expat)))
1093
1094 (define-public python2-fastalite
1095 (package-with-python2 python-fastalite))
1096
1097 (define-public bpp-core
1098 ;; The last release was in 2014 and the recommended way to install from source
1099 ;; is to clone the git repository, so we do this.
1100 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1101 (let ((commit "7d8bced0d1a87291ea8dd7046b7fb5ff9c35c582"))
1102 (package
1103 (name "bpp-core")
1104 (version (string-append "2.2.0-1." (string-take commit 7)))
1105 (source (origin
1106 (method git-fetch)
1107 (uri (git-reference
1108 (url "http://biopp.univ-montp2.fr/git/bpp-core")
1109 (commit commit)))
1110 (file-name (string-append name "-" version "-checkout"))
1111 (sha256
1112 (base32
1113 "10djsq5vlnkilv436gnmh4irpk49v29pa69r6xiryg32xmvn909j"))))
1114 (build-system cmake-build-system)
1115 (arguments
1116 `(#:parallel-build? #f))
1117 (home-page "http://biopp.univ-montp2.fr")
1118 (synopsis "C++ libraries for Bioinformatics")
1119 (description
1120 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1121 analysis, phylogenetics, molecular evolution and population genetics. It is
1122 Object Oriented and is designed to be both easy to use and computer efficient.
1123 Bio++ intends to help programmers to write computer expensive programs, by
1124 providing them a set of re-usable tools.")
1125 (license license:cecill-c))))
1126
1127 (define-public bpp-phyl
1128 ;; The last release was in 2014 and the recommended way to install from source
1129 ;; is to clone the git repository, so we do this.
1130 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1131 (let ((commit "0c07167b629f68b569bf274d1ad0c4af83276ae2"))
1132 (package
1133 (name "bpp-phyl")
1134 (version (string-append "2.2.0-1." (string-take commit 7)))
1135 (source (origin
1136 (method git-fetch)
1137 (uri (git-reference
1138 (url "http://biopp.univ-montp2.fr/git/bpp-phyl")
1139 (commit commit)))
1140 (file-name (string-append name "-" version "-checkout"))
1141 (sha256
1142 (base32
1143 "1ssjgchzwj3iai26kyly7gwkdv8sk59nqhkb1wpap3sf5m6kyllh"))))
1144 (build-system cmake-build-system)
1145 (arguments
1146 `(#:parallel-build? #f
1147 ;; If out-of-source, test data is not copied into the build directory
1148 ;; so the tests fail.
1149 #:out-of-source? #f))
1150 (inputs
1151 `(("bpp-core" ,bpp-core)
1152 ("bpp-seq" ,bpp-seq)))
1153 (home-page "http://biopp.univ-montp2.fr")
1154 (synopsis "Bio++ phylogenetic Library")
1155 (description
1156 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1157 analysis, phylogenetics, molecular evolution and population genetics. This
1158 library provides phylogenetics-related modules.")
1159 (license license:cecill-c))))
1160
1161 (define-public bpp-popgen
1162 ;; The last release was in 2014 and the recommended way to install from source
1163 ;; is to clone the git repository, so we do this.
1164 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1165 (let ((commit "e472bac9b1a148803895d747cd6d0c5904f85d9f"))
1166 (package
1167 (name "bpp-popgen")
1168 (version (string-append "2.2.0-1." (string-take commit 7)))
1169 (source (origin
1170 (method git-fetch)
1171 (uri (git-reference
1172 (url "http://biopp.univ-montp2.fr/git/bpp-popgen")
1173 (commit commit)))
1174 (file-name (string-append name "-" version "-checkout"))
1175 (sha256
1176 (base32
1177 "0yn82dzn1n5629nzja68xfrhi655709rjanyryb36vzkmymy6dw5"))))
1178 (build-system cmake-build-system)
1179 (arguments
1180 `(#:parallel-build? #f
1181 #:tests? #f)) ; There are no tests.
1182 (inputs
1183 `(("bpp-core" ,bpp-core)
1184 ("bpp-seq" ,bpp-seq)))
1185 (home-page "http://biopp.univ-montp2.fr")
1186 (synopsis "Bio++ population genetics library")
1187 (description
1188 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1189 analysis, phylogenetics, molecular evolution and population genetics. This
1190 library provides population genetics-related modules.")
1191 (license license:cecill-c))))
1192
1193 (define-public bpp-seq
1194 ;; The last release was in 2014 and the recommended way to install from source
1195 ;; is to clone the git repository, so we do this.
1196 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1197 (let ((commit "6cfa07965ce152e5598a89df2fa80a75973bfa33"))
1198 (package
1199 (name "bpp-seq")
1200 (version (string-append "2.2.0-1." (string-take commit 7)))
1201 (source (origin
1202 (method git-fetch)
1203 (uri (git-reference
1204 (url "http://biopp.univ-montp2.fr/git/bpp-seq")
1205 (commit commit)))
1206 (file-name (string-append name "-" version "-checkout"))
1207 (sha256
1208 (base32
1209 "1nys5jq7jqvdg40d91wsmj3q2yzy4276cp7sp44n67p468f27zf2"))))
1210 (build-system cmake-build-system)
1211 (arguments
1212 `(#:parallel-build? #f
1213 ;; If out-of-source, test data is not copied into the build directory
1214 ;; so the tests fail.
1215 #:out-of-source? #f))
1216 (inputs
1217 `(("bpp-core" ,bpp-core)))
1218 (home-page "http://biopp.univ-montp2.fr")
1219 (synopsis "Bio++ sequence library")
1220 (description
1221 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1222 analysis, phylogenetics, molecular evolution and population genetics. This
1223 library provides sequence-related modules.")
1224 (license license:cecill-c))))
1225
1226 (define-public bppsuite
1227 ;; The last release was in 2014 and the recommended way to install from source
1228 ;; is to clone the git repository, so we do this.
1229 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1230 (let ((commit "c516147f57aa50961121cd505bed52cd7603698b"))
1231 (package
1232 (name "bppsuite")
1233 (version (string-append "2.2.0-1." (string-take commit 7)))
1234 (source (origin
1235 (method git-fetch)
1236 (uri (git-reference
1237 (url "http://biopp.univ-montp2.fr/git/bppsuite")
1238 (commit commit)))
1239 (file-name (string-append name "-" version "-checkout"))
1240 (sha256
1241 (base32
1242 "1y87pxvw0jxjizhq2dr9g2r91md45k1p9ih2sl1yy1y3p934l2kb"))))
1243 (build-system cmake-build-system)
1244 (arguments
1245 `(#:parallel-build? #f
1246 #:tests? #f)) ; There are no tests.
1247 (native-inputs
1248 `(("groff" ,groff)
1249 ("man-db" ,man-db)
1250 ("texinfo" ,texinfo)))
1251 (inputs
1252 `(("bpp-core" ,bpp-core)
1253 ("bpp-seq" ,bpp-seq)
1254 ("bpp-phyl" ,bpp-phyl)
1255 ("bpp-phyl" ,bpp-popgen)))
1256 (home-page "http://biopp.univ-montp2.fr")
1257 (synopsis "Bioinformatics tools written with the Bio++ libraries")
1258 (description
1259 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1260 analysis, phylogenetics, molecular evolution and population genetics. This
1261 package provides command line tools using the Bio++ library.")
1262 (license license:cecill-c))))
1263
1264 (define-public blast+
1265 (package
1266 (name "blast+")
1267 (version "2.7.1")
1268 (source (origin
1269 (method url-fetch)
1270 (uri (string-append
1271 "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
1272 version "/ncbi-blast-" version "+-src.tar.gz"))
1273 (sha256
1274 (base32
1275 "1jlq0afxxgczpp35k6mxh8mn4jzq7vqcnaixk166sfj10wq8v9qh"))
1276 (modules '((guix build utils)))
1277 (snippet
1278 '(begin
1279 ;; Remove bundled bzip2, zlib and pcre.
1280 (delete-file-recursively "c++/src/util/compress/bzip2")
1281 (delete-file-recursively "c++/src/util/compress/zlib")
1282 (delete-file-recursively "c++/src/util/regexp")
1283 (substitute* "c++/src/util/compress/Makefile.in"
1284 (("bzip2 zlib api") "api"))
1285 ;; Remove useless msbuild directory
1286 (delete-file-recursively
1287 "c++/src/build-system/project_tree_builder/msbuild")
1288 #t))))
1289 (build-system gnu-build-system)
1290 (arguments
1291 `(;; There are two(!) tests for this massive library, and both fail with
1292 ;; "unparsable timing stats".
1293 ;; ERR [127] -- [serial/datatool] datatool.sh (unparsable timing stats)
1294 ;; ERR [127] -- [serial/datatool] datatool_xml.sh (unparsable timing stats)
1295 #:tests? #f
1296 #:out-of-source? #t
1297 #:parallel-build? #f ; not supported
1298 #:phases
1299 (modify-phases %standard-phases
1300 (add-before 'configure 'set-HOME
1301 ;; $HOME needs to be set at some point during the configure phase
1302 (lambda _ (setenv "HOME" "/tmp") #t))
1303 (add-after 'unpack 'enter-dir
1304 (lambda _ (chdir "c++") #t))
1305 (add-after 'enter-dir 'fix-build-system
1306 (lambda _
1307 (define (which* cmd)
1308 (cond ((string=? cmd "date")
1309 ;; make call to "date" deterministic
1310 "date -d @0")
1311 ((which cmd)
1312 => identity)
1313 (else
1314 (format (current-error-port)
1315 "WARNING: Unable to find absolute path for ~s~%"
1316 cmd)
1317 #f)))
1318
1319 ;; Rewrite hardcoded paths to various tools
1320 (substitute* (append '("src/build-system/configure.ac"
1321 "src/build-system/configure"
1322 "src/build-system/helpers/run_with_lock.c"
1323 "scripts/common/impl/if_diff.sh"
1324 "scripts/common/impl/run_with_lock.sh"
1325 "src/build-system/Makefile.configurables.real"
1326 "src/build-system/Makefile.in.top"
1327 "src/build-system/Makefile.meta.gmake=no"
1328 "src/build-system/Makefile.meta.in"
1329 "src/build-system/Makefile.meta_l"
1330 "src/build-system/Makefile.meta_p"
1331 "src/build-system/Makefile.meta_r"
1332 "src/build-system/Makefile.mk.in"
1333 "src/build-system/Makefile.requirements"
1334 "src/build-system/Makefile.rules_with_autodep.in")
1335 (find-files "scripts/common/check" "\\.sh$"))
1336 (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
1337 (or (which* cmd) all)))
1338
1339 (substitute* (find-files "src/build-system" "^config.*")
1340 (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
1341 (("^PATH=.*") ""))
1342
1343 ;; rewrite "/var/tmp" in check script
1344 (substitute* "scripts/common/check/check_make_unix.sh"
1345 (("/var/tmp") "/tmp"))
1346
1347 ;; do not reset PATH
1348 (substitute* (find-files "scripts/common/impl/" "\\.sh$")
1349 (("^ *PATH=.*") "")
1350 (("action=/bin/") "action=")
1351 (("export PATH") ":"))
1352 #t))
1353 (replace 'configure
1354 (lambda* (#:key inputs outputs #:allow-other-keys)
1355 (let ((out (assoc-ref outputs "out"))
1356 (lib (string-append (assoc-ref outputs "lib") "/lib"))
1357 (include (string-append (assoc-ref outputs "include")
1358 "/include/ncbi-tools++")))
1359 ;; The 'configure' script doesn't recognize things like
1360 ;; '--enable-fast-install'.
1361 (invoke "./configure.orig"
1362 (string-append "--with-build-root=" (getcwd) "/build")
1363 (string-append "--prefix=" out)
1364 (string-append "--libdir=" lib)
1365 (string-append "--includedir=" include)
1366 (string-append "--with-bz2="
1367 (assoc-ref inputs "bzip2"))
1368 (string-append "--with-z="
1369 (assoc-ref inputs "zlib"))
1370 (string-append "--with-pcre="
1371 (assoc-ref inputs "pcre"))
1372 ;; Each library is built twice by default, once
1373 ;; with "-static" in its name, and again
1374 ;; without.
1375 "--without-static"
1376 "--with-dll")
1377 #t))))))
1378 (outputs '("out" ; 21 MB
1379 "lib" ; 226 MB
1380 "include")) ; 33 MB
1381 (inputs
1382 `(("bzip2" ,bzip2)
1383 ("lmdb" ,lmdb)
1384 ("zlib" ,zlib)
1385 ("pcre" ,pcre)
1386 ("perl" ,perl)
1387 ("python" ,python-wrapper)))
1388 (native-inputs
1389 `(("cpio" ,cpio)))
1390 (home-page "http://blast.ncbi.nlm.nih.gov")
1391 (synopsis "Basic local alignment search tool")
1392 (description
1393 "BLAST is a popular method of performing a DNA or protein sequence
1394 similarity search, using heuristics to produce results quickly. It also
1395 calculates an “expect value” that estimates how many matches would have
1396 occurred at a given score by chance, which can aid a user in judging how much
1397 confidence to have in an alignment.")
1398 ;; Most of the sources are in the public domain, with the following
1399 ;; exceptions:
1400 ;; * Expat:
1401 ;; * ./c++/include/util/bitset/
1402 ;; * ./c++/src/html/ncbi_menu*.js
1403 ;; * Boost license:
1404 ;; * ./c++/include/util/impl/floating_point_comparison.hpp
1405 ;; * LGPL 2+:
1406 ;; * ./c++/include/dbapi/driver/odbc/unix_odbc/
1407 ;; * ASL 2.0:
1408 ;; * ./c++/src/corelib/teamcity_*
1409 (license (list license:public-domain
1410 license:expat
1411 license:boost1.0
1412 license:lgpl2.0+
1413 license:asl2.0))))
1414
1415 (define-public bless
1416 (package
1417 (name "bless")
1418 (version "1p02")
1419 (source (origin
1420 (method url-fetch)
1421 (uri (string-append "mirror://sourceforge/bless-ec/bless.v"
1422 version ".tgz"))
1423 (sha256
1424 (base32
1425 "0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
1426 (modules '((guix build utils)))
1427 (snippet
1428 `(begin
1429 ;; Remove bundled boost, pigz, zlib, and .git directory
1430 ;; FIXME: also remove bundled sources for murmurhash3 and
1431 ;; kmc once packaged.
1432 (delete-file-recursively "boost")
1433 (delete-file-recursively "pigz")
1434 (delete-file-recursively "google-sparsehash")
1435 (delete-file-recursively "zlib")
1436 (delete-file-recursively ".git")
1437 #t))))
1438 (build-system gnu-build-system)
1439 (arguments
1440 '(#:tests? #f ;no "check" target
1441 #:make-flags
1442 (list (string-append "ZLIB="
1443 (assoc-ref %build-inputs "zlib:static")
1444 "/lib/libz.a")
1445 (string-append "LDFLAGS="
1446 (string-join '("-lboost_filesystem"
1447 "-lboost_system"
1448 "-lboost_iostreams"
1449 "-lz"
1450 "-fopenmp"))))
1451 #:phases
1452 (modify-phases %standard-phases
1453 (add-after 'unpack 'do-not-build-bundled-pigz
1454 (lambda* (#:key inputs outputs #:allow-other-keys)
1455 (substitute* "Makefile"
1456 (("cd pigz/pigz-2.3.3; make") ""))
1457 #t))
1458 (add-after 'unpack 'patch-paths-to-executables
1459 (lambda* (#:key inputs outputs #:allow-other-keys)
1460 (substitute* "parse_args.cpp"
1461 (("kmc_binary = .*")
1462 (string-append "kmc_binary = \""
1463 (assoc-ref outputs "out")
1464 "/bin/kmc\";"))
1465 (("pigz_binary = .*")
1466 (string-append "pigz_binary = \""
1467 (assoc-ref inputs "pigz")
1468 "/bin/pigz\";")))
1469 #t))
1470 (replace 'install
1471 (lambda* (#:key outputs #:allow-other-keys)
1472 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
1473 (for-each (lambda (file)
1474 (install-file file bin))
1475 '("bless" "kmc/bin/kmc"))
1476 #t)))
1477 (delete 'configure))))
1478 (native-inputs
1479 `(("perl" ,perl)))
1480 (inputs
1481 `(("openmpi" ,openmpi)
1482 ("boost" ,boost)
1483 ("sparsehash" ,sparsehash)
1484 ("pigz" ,pigz)
1485 ("zlib:static" ,zlib "static")
1486 ("zlib" ,zlib)))
1487 (supported-systems '("x86_64-linux"))
1488 (home-page "https://sourceforge.net/p/bless-ec/wiki/Home/")
1489 (synopsis "Bloom-filter-based error correction tool for NGS reads")
1490 (description
1491 "@dfn{Bloom-filter-based error correction solution for high-throughput
1492 sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
1493 correction tool for genomic reads produced by @dfn{Next-generation
1494 sequencing} (NGS). BLESS produces accurate correction results with much less
1495 memory compared with previous solutions and is also able to tolerate a higher
1496 false-positive rate. BLESS can extend reads like DNA assemblers to correct
1497 errors at the end of reads.")
1498 (license license:gpl3+)))
1499
1500 (define-public bowtie
1501 (package
1502 (name "bowtie")
1503 (version "2.3.4.3")
1504 (source (origin
1505 (method git-fetch)
1506 (uri (git-reference
1507 (url "https://github.com/BenLangmead/bowtie2.git")
1508 (commit (string-append "v" version))))
1509 (file-name (git-file-name name version))
1510 (sha256
1511 (base32
1512 "1zl3cf327y2p7p03cavymbh7b00djc7lncfaqih33n96iy9q8ibp"))
1513 (modules '((guix build utils)))
1514 (snippet
1515 '(begin
1516 (substitute* "Makefile"
1517 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
1518 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
1519 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
1520 #t))))
1521 (build-system gnu-build-system)
1522 (arguments
1523 '(#:make-flags
1524 (list "allall"
1525 "WITH_TBB=1"
1526 (string-append "prefix=" (assoc-ref %outputs "out")))
1527 #:phases
1528 (modify-phases %standard-phases
1529 (delete 'configure)
1530 (replace 'check
1531 (lambda _
1532 (invoke "perl"
1533 "scripts/test/simple_tests.pl"
1534 "--bowtie2=./bowtie2"
1535 "--bowtie2-build=./bowtie2-build")
1536 #t)))))
1537 (inputs
1538 `(("tbb" ,tbb)
1539 ("zlib" ,zlib)
1540 ("python" ,python-wrapper)))
1541 (native-inputs
1542 `(("perl" ,perl)
1543 ("perl-clone" ,perl-clone)
1544 ("perl-test-deep" ,perl-test-deep)
1545 ("perl-test-simple" ,perl-test-simple)))
1546 (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
1547 (synopsis "Fast and sensitive nucleotide sequence read aligner")
1548 (description
1549 "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
1550 reads to long reference sequences. It is particularly good at aligning reads
1551 of about 50 up to 100s or 1,000s of characters, and particularly good at
1552 aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
1553 genome with an FM Index to keep its memory footprint small: for the human
1554 genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
1555 gapped, local, and paired-end alignment modes.")
1556 (supported-systems '("x86_64-linux"))
1557 (license license:gpl3+)))
1558
1559 (define-public bowtie1
1560 (package
1561 (name "bowtie1")
1562 (version "1.2.3")
1563 (source (origin
1564 (method url-fetch)
1565 (uri (string-append "mirror://sourceforge/bowtie-bio/bowtie/"
1566 version "/bowtie-src-x86_64.zip"))
1567 (sha256
1568 (base32
1569 "0vmiqdhc9dzyfy9sh6vgi7k9xy2hiw8g87vbamnc6cgpm179zsa4"))
1570 (modules '((guix build utils)))
1571 (snippet
1572 '(substitute* "Makefile"
1573 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
1574 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
1575 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))))
1576 (build-system gnu-build-system)
1577 (arguments
1578 '(#:tests? #f ; no "check" target
1579 #:make-flags
1580 (list "all"
1581 (string-append "prefix=" (assoc-ref %outputs "out")))
1582 #:phases
1583 (modify-phases %standard-phases
1584 (delete 'configure))))
1585 (inputs
1586 `(("tbb" ,tbb)
1587 ("zlib" ,zlib)))
1588 (supported-systems '("x86_64-linux"))
1589 (home-page "http://bowtie-bio.sourceforge.net/index.shtml")
1590 (synopsis "Fast aligner for short nucleotide sequence reads")
1591 (description
1592 "Bowtie is a fast, memory-efficient short read aligner. It aligns short
1593 DNA sequences (reads) to the human genome at a rate of over 25 million 35-bp
1594 reads per hour. Bowtie indexes the genome with a Burrows-Wheeler index to
1595 keep its memory footprint small: typically about 2.2 GB for the human
1596 genome (2.9 GB for paired-end).")
1597 (license license:artistic2.0)))
1598
1599 (define-public tophat
1600 (package
1601 (name "tophat")
1602 (version "2.1.1")
1603 (source (origin
1604 (method url-fetch)
1605 (uri (string-append
1606 "http://ccb.jhu.edu/software/tophat/downloads/tophat-"
1607 version ".tar.gz"))
1608 (sha256
1609 (base32
1610 "19add02kv2xhd6ihd779dr7x35ggym3jqr0m5c4315i1yfb0p11p"))
1611 (modules '((guix build utils)))
1612 (snippet
1613 '(begin
1614 ;; Remove bundled SeqAn and samtools
1615 (delete-file-recursively "src/SeqAn-1.4.2")
1616 (delete-file-recursively "src/samtools-0.1.18")
1617 #t))))
1618 (build-system gnu-build-system)
1619 (arguments
1620 '(#:parallel-build? #f ; not supported
1621 #:phases
1622 (modify-phases %standard-phases
1623 (add-after 'unpack 'use-system-samtools
1624 (lambda* (#:key inputs #:allow-other-keys)
1625 (substitute* "src/Makefile.in"
1626 (("(noinst_LIBRARIES = )\\$\\(SAMLIB\\)" _ prefix) prefix)
1627 (("\\$\\(SAMPROG\\): \\$\\(SAMLIB\\)") "")
1628 (("SAMPROG = samtools_0\\.1\\.18") "")
1629 (("\\$\\(samtools_0_1_18_SOURCES\\)") "")
1630 (("am__EXEEXT_1 = samtools_0\\.1\\.18\\$\\(EXEEXT\\)") ""))
1631 (substitute* '("src/common.cpp"
1632 "src/tophat.py")
1633 (("samtools_0.1.18") (which "samtools")))
1634 (substitute* '("src/common.h"
1635 "src/bam2fastx.cpp")
1636 (("#include \"bam.h\"") "#include <samtools/bam.h>")
1637 (("#include \"sam.h\"") "#include <samtools/sam.h>"))
1638 (substitute* '("src/bwt_map.h"
1639 "src/map2gtf.h"
1640 "src/align_status.h")
1641 (("#include <bam.h>") "#include <samtools/bam.h>")
1642 (("#include <sam.h>") "#include <samtools/sam.h>"))
1643 #t)))))
1644 (native-inputs
1645 `(("gcc" ,gcc-5))) ;; doesn't build with later versions
1646 (inputs
1647 `(("boost" ,boost)
1648 ("bowtie" ,bowtie)
1649 ("ncurses" ,ncurses)
1650 ("perl" ,perl)
1651 ("python" ,python-2)
1652 ("samtools" ,samtools-0.1)
1653 ("seqan" ,seqan-1)
1654 ("zlib" ,zlib)))
1655 (home-page "https://ccb.jhu.edu/software/tophat/index.shtml")
1656 (synopsis "Spliced read mapper for RNA-Seq data")
1657 (description
1658 "TopHat is a fast splice junction mapper for nucleotide sequence
1659 reads produced by the RNA-Seq method. It aligns RNA-Seq reads to
1660 mammalian-sized genomes using the ultra high-throughput short read
1661 aligner Bowtie, and then analyzes the mapping results to identify
1662 splice junctions between exons.")
1663 ;; TopHat is released under the Boost Software License, Version 1.0
1664 ;; See https://github.com/infphilo/tophat/issues/11#issuecomment-121589893
1665 (license license:boost1.0)))
1666
1667 (define-public bwa
1668 (package
1669 (name "bwa")
1670 (version "0.7.17")
1671 (source (origin
1672 (method url-fetch)
1673 (uri (string-append
1674 "https://github.com/lh3/bwa/releases/download/v"
1675 version "/bwa-" version ".tar.bz2"))
1676 (sha256
1677 (base32
1678 "1zfhv2zg9v1icdlq4p9ssc8k01mca5d1bd87w71py2swfi74s6yy"))))
1679 (build-system gnu-build-system)
1680 (arguments
1681 '(#:tests? #f ;no "check" target
1682 #:phases
1683 (modify-phases %standard-phases
1684 (replace 'install
1685 (lambda* (#:key outputs #:allow-other-keys)
1686 (let* ((out (assoc-ref outputs "out"))
1687 (bin (string-append out "/bin"))
1688 (lib (string-append out "/lib"))
1689 (doc (string-append out "/share/doc/bwa"))
1690 (man (string-append out "/share/man/man1")))
1691 (install-file "bwa" bin)
1692 (install-file "libbwa.a" lib)
1693 (install-file "README.md" doc)
1694 (install-file "bwa.1" man))
1695 #t))
1696 ;; no "configure" script
1697 (delete 'configure))))
1698 (inputs `(("zlib" ,zlib)))
1699 ;; Non-portable SSE instructions are used so building fails on platforms
1700 ;; other than x86_64.
1701 (supported-systems '("x86_64-linux"))
1702 (home-page "http://bio-bwa.sourceforge.net/")
1703 (synopsis "Burrows-Wheeler sequence aligner")
1704 (description
1705 "BWA is a software package for mapping low-divergent sequences against a
1706 large reference genome, such as the human genome. It consists of three
1707 algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
1708 designed for Illumina sequence reads up to 100bp, while the rest two for
1709 longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
1710 features such as long-read support and split alignment, but BWA-MEM, which is
1711 the latest, is generally recommended for high-quality queries as it is faster
1712 and more accurate. BWA-MEM also has better performance than BWA-backtrack for
1713 70-100bp Illumina reads.")
1714 (license license:gpl3+)))
1715
1716 (define-public bwa-pssm
1717 (package (inherit bwa)
1718 (name "bwa-pssm")
1719 (version "0.5.11")
1720 (source (origin
1721 (method git-fetch)
1722 (uri (git-reference
1723 (url "https://github.com/pkerpedjiev/bwa-pssm.git")
1724 (commit version)))
1725 (file-name (git-file-name name version))
1726 (sha256
1727 (base32
1728 "076c4q0cdqz8jgylb067y9zmvxglppnzi3qiscn0xiypgc6lgb5r"))))
1729 (build-system gnu-build-system)
1730 (inputs
1731 `(("gdsl" ,gdsl)
1732 ("zlib" ,zlib)
1733 ("perl" ,perl)))
1734 (home-page "http://bwa-pssm.binf.ku.dk/")
1735 (synopsis "Burrows-Wheeler transform-based probabilistic short read mapper")
1736 (description
1737 "BWA-PSSM is a probabilistic short genomic sequence read aligner based on
1738 the use of @dfn{position specific scoring matrices} (PSSM). Like many of the
1739 existing aligners it is fast and sensitive. Unlike most other aligners,
1740 however, it is also adaptible in the sense that one can direct the alignment
1741 based on known biases within the data set. It is coded as a modification of
1742 the original BWA alignment program and shares the genome index structure as
1743 well as many of the command line options.")
1744 (license license:gpl3+)))
1745
1746 (define-public bwa-meth
1747 (package
1748 (name "bwa-meth")
1749 (version "0.2.2")
1750 (source (origin
1751 (method git-fetch)
1752 (uri (git-reference
1753 (url "https://github.com/brentp/bwa-meth.git")
1754 (commit (string-append "v" version))))
1755 (file-name (git-file-name name version))
1756 (sha256
1757 (base32
1758 "17j31i7zws5j7mhsq9x3qgkxly6mlmrgwhfq0qbflgxrmx04yaiz"))))
1759 (build-system python-build-system)
1760 (arguments
1761 `(#:phases
1762 (modify-phases %standard-phases
1763 (add-after 'unpack 'keep-references-to-bwa
1764 (lambda* (#:key inputs #:allow-other-keys)
1765 (substitute* "bwameth.py"
1766 (("bwa (mem|index)" _ command)
1767 (string-append (which "bwa") " " command))
1768 ;; There's an ill-advised check for "samtools" on PATH.
1769 (("^checkX.*") ""))
1770 #t)))))
1771 (inputs
1772 `(("bwa" ,bwa)))
1773 (native-inputs
1774 `(("python-toolshed" ,python-toolshed)))
1775 (home-page "https://github.com/brentp/bwa-meth")
1776 (synopsis "Fast and accurante alignment of BS-Seq reads")
1777 (description
1778 "BWA-Meth works for single-end reads and for paired-end reads from the
1779 directional protocol (most common). It uses the method employed by
1780 methylcoder and Bismark of in silico conversion of all C's to T's in both
1781 reference and reads. It recovers the original read (needed to tabulate
1782 methylation) by attaching it as a comment which BWA appends as a tag to the
1783 read. It performs favorably to existing aligners gauged by number of on and
1784 off-target reads for a capture method that targets CpG-rich region.")
1785 (license license:expat)))
1786
1787 (define-public python-bx-python
1788 (package
1789 (name "python-bx-python")
1790 (version "0.8.2")
1791 (source (origin
1792 (method url-fetch)
1793 (uri (pypi-uri "bx-python" version))
1794 (sha256
1795 (base32
1796 "11kksg2rbzihpmcid823xvg42xi88m7sz58rzk29abybkxy0rszs"))))
1797 (build-system python-build-system)
1798 ;; Tests fail because test data are not included
1799 (arguments '(#:tests? #f))
1800 (propagated-inputs
1801 `(("python-numpy" ,python-numpy)
1802 ("python-six" ,python-six)))
1803 (inputs
1804 `(("zlib" ,zlib)))
1805 (native-inputs
1806 `(("python-lzo" ,python-lzo)
1807 ("python-nose" ,python-nose)
1808 ("python-cython" ,python-cython)))
1809 (home-page "https://github.com/bxlab/bx-python")
1810 (synopsis "Tools for manipulating biological data")
1811 (description
1812 "bx-python provides tools for manipulating biological data, particularly
1813 multiple sequence alignments.")
1814 (license license:expat)))
1815
1816 (define-public python2-bx-python
1817 (package-with-python2 python-bx-python))
1818
1819 (define-public python-pysam
1820 (package
1821 (name "python-pysam")
1822 (version "0.15.1")
1823 (source (origin
1824 (method git-fetch)
1825 ;; Test data is missing on PyPi.
1826 (uri (git-reference
1827 (url "https://github.com/pysam-developers/pysam.git")
1828 (commit (string-append "v" version))))
1829 (file-name (git-file-name name version))
1830 (sha256
1831 (base32
1832 "1vj367w6xbn9bpmksm162l1aipf7cj97h1q83y7jcpm33ihwpf7x"))
1833 (modules '((guix build utils)))
1834 (snippet '(begin
1835 ;; Drop bundled htslib. TODO: Also remove samtools
1836 ;; and bcftools.
1837 (delete-file-recursively "htslib")
1838 #t))))
1839 (build-system python-build-system)
1840 (arguments
1841 `(#:modules ((ice-9 ftw)
1842 (srfi srfi-26)
1843 (guix build python-build-system)
1844 (guix build utils))
1845 #:phases
1846 (modify-phases %standard-phases
1847 (add-before 'build 'set-flags
1848 (lambda* (#:key inputs #:allow-other-keys)
1849 (setenv "HTSLIB_MODE" "external")
1850 (setenv "HTSLIB_LIBRARY_DIR"
1851 (string-append (assoc-ref inputs "htslib") "/lib"))
1852 (setenv "HTSLIB_INCLUDE_DIR"
1853 (string-append (assoc-ref inputs "htslib") "/include"))
1854 (setenv "LDFLAGS" "-lncurses")
1855 (setenv "CFLAGS" "-D_CURSES_LIB=1")
1856 #t))
1857 (replace 'check
1858 (lambda* (#:key inputs outputs #:allow-other-keys)
1859 ;; This file contains tests that require a connection to the
1860 ;; internet.
1861 (delete-file "tests/tabix_test.py")
1862 ;; FIXME: This test fails
1863 (delete-file "tests/AlignmentFile_test.py")
1864 ;; Add first subdirectory of "build" directory to PYTHONPATH.
1865 (setenv "PYTHONPATH"
1866 (string-append
1867 (getenv "PYTHONPATH")
1868 ":" (getcwd) "/build/"
1869 (car (scandir "build"
1870 (negate (cut string-prefix? "." <>))))))
1871 ;; Step out of source dir so python does not import from CWD.
1872 (with-directory-excursion "tests"
1873 (setenv "HOME" "/tmp")
1874 (invoke "make" "-C" "pysam_data")
1875 (invoke "make" "-C" "cbcf_data")
1876 ;; Running nosetests without explicitly asking for a single
1877 ;; process leads to a crash. Running with multiple processes
1878 ;; fails because the tests are not designed to run in parallel.
1879
1880 ;; FIXME: tests keep timing out on some systems.
1881 (invoke "nosetests" "-v" "--processes" "1")))))))
1882 (propagated-inputs
1883 `(("htslib" ,htslib))) ; Included from installed header files.
1884 (inputs
1885 `(("ncurses" ,ncurses)
1886 ("curl" ,curl)
1887 ("zlib" ,zlib)))
1888 (native-inputs
1889 `(("python-cython" ,python-cython)
1890 ;; Dependencies below are are for tests only.
1891 ("samtools" ,samtools)
1892 ("bcftools" ,bcftools)
1893 ("python-nose" ,python-nose)))
1894 (home-page "https://github.com/pysam-developers/pysam")
1895 (synopsis "Python bindings to the SAMtools C API")
1896 (description
1897 "Pysam is a Python module for reading and manipulating files in the
1898 SAM/BAM format. Pysam is a lightweight wrapper of the SAMtools C API. It
1899 also includes an interface for tabix.")
1900 (license license:expat)))
1901
1902 (define-public python2-pysam
1903 (package-with-python2 python-pysam))
1904
1905 (define-public python-twobitreader
1906 (package
1907 (name "python-twobitreader")
1908 (version "3.1.6")
1909 (source (origin
1910 (method git-fetch)
1911 (uri (git-reference
1912 (url "https://github.com/benjschiller/twobitreader")
1913 (commit version)))
1914 (file-name (git-file-name name version))
1915 (sha256
1916 (base32
1917 "1qbxvv1h58cismbk1anpjrkpghsaiy64a11ir3lhy6qch6xf8n62"))))
1918 (build-system python-build-system)
1919 ;; Tests are not included
1920 (arguments '(#:tests? #f))
1921 (native-inputs
1922 `(("python-sphinx" ,python-sphinx)))
1923 (home-page "https://github.com/benjschiller/twobitreader")
1924 (synopsis "Python library for reading .2bit files")
1925 (description
1926 "twobitreader is a Python library for reading .2bit files as used by the
1927 UCSC genome browser.")
1928 (license license:artistic2.0)))
1929
1930 (define-public python2-twobitreader
1931 (package-with-python2 python-twobitreader))
1932
1933 (define-public python-plastid
1934 (package
1935 (name "python-plastid")
1936 (version "0.4.8")
1937 (source (origin
1938 (method url-fetch)
1939 (uri (pypi-uri "plastid" version))
1940 (sha256
1941 (base32
1942 "0l24dd3q66if8yj042m4s0g95n6acn7im1imqd3p6h8ns43kxhj8"))))
1943 (build-system python-build-system)
1944 (arguments
1945 ;; Some test files are not included.
1946 `(#:tests? #f))
1947 (propagated-inputs
1948 `(("python-numpy" ,python-numpy)
1949 ("python-scipy" ,python-scipy)
1950 ("python-pandas" ,python-pandas)
1951 ("python-pysam" ,python-pysam)
1952 ("python-matplotlib" ,python-matplotlib)
1953 ("python-biopython" ,python-biopython)
1954 ("python-twobitreader" ,python-twobitreader)
1955 ("python-termcolor" ,python-termcolor)))
1956 (native-inputs
1957 `(("python-cython" ,python-cython)
1958 ("python-nose" ,python-nose)))
1959 (home-page "https://github.com/joshuagryphon/plastid")
1960 (synopsis "Python library for genomic analysis")
1961 (description
1962 "plastid is a Python library for genomic analysis – in particular,
1963 high-throughput sequencing data – with an emphasis on simplicity.")
1964 (license license:bsd-3)))
1965
1966 (define-public python2-plastid
1967 (package-with-python2 python-plastid))
1968
1969 (define-public tetoolkit
1970 (package
1971 (name "tetoolkit")
1972 (version "2.0.3")
1973 (source (origin
1974 (method git-fetch)
1975 (uri (git-reference
1976 (url "https://github.com/mhammell-laboratory/tetoolkit.git")
1977 (commit version)))
1978 (file-name (git-file-name name version))
1979 (sha256
1980 (base32
1981 "1yzi0kfpzip8zpjb82x1ik6h22yzfyjiz2dv85v6as2awwqvk807"))))
1982 (build-system python-build-system)
1983 (arguments
1984 `(#:python ,python-2 ; not guaranteed to work with Python 3
1985 #:phases
1986 (modify-phases %standard-phases
1987 (add-after 'unpack 'make-writable
1988 (lambda _
1989 (for-each make-file-writable (find-files "."))
1990 #t))
1991 (add-after 'unpack 'patch-invocations
1992 (lambda* (#:key inputs #:allow-other-keys)
1993 (substitute* '("bin/TEtranscripts"
1994 "bin/TEcount")
1995 (("'sort ")
1996 (string-append "'" (which "sort") " "))
1997 (("'rm -f ")
1998 (string-append "'" (which "rm") " -f "))
1999 (("'Rscript'") (string-append "'" (which "Rscript") "'")))
2000 (substitute* "TEToolkit/IO/ReadInputs.py"
2001 (("BamToBED") (which "bamToBed")))
2002 (substitute* "TEToolkit/Normalization.py"
2003 (("\"Rscript\"")
2004 (string-append "\"" (which "Rscript") "\"")))
2005 #t))
2006 (add-after 'install 'wrap-program
2007 (lambda* (#:key outputs #:allow-other-keys)
2008 ;; Make sure the executables find R packages.
2009 (let ((out (assoc-ref outputs "out")))
2010 (for-each
2011 (lambda (script)
2012 (wrap-program (string-append out "/bin/" script)
2013 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
2014 '("TEtranscripts"
2015 "TEcount")))
2016 #t)))))
2017 (inputs
2018 `(("coreutils" ,coreutils)
2019 ("bedtools" ,bedtools)
2020 ("python-argparse" ,python2-argparse)
2021 ("python-pysam" ,python2-pysam)
2022 ("r-minimal" ,r-minimal)
2023 ("r-deseq2" ,r-deseq2)))
2024 (home-page "https://github.com/mhammell-laboratory/tetoolkit")
2025 (synopsis "Transposable elements in differential enrichment analysis")
2026 (description
2027 "This is package for including transposable elements in differential
2028 enrichment analysis of sequencing datasets. TEtranscripts and TEcount take
2029 RNA-seq (and similar data) and annotates reads to both genes and transposable
2030 elements. TEtranscripts then performs differential analysis using DESeq2.
2031 Note that TEtranscripts and TEcount rely on specially curated GTF files, which
2032 are not included due to their size.")
2033 (license license:gpl3+)))
2034
2035 (define-public cd-hit
2036 (package
2037 (name "cd-hit")
2038 (version "4.6.8")
2039 (source (origin
2040 (method url-fetch)
2041 (uri (string-append "https://github.com/weizhongli/cdhit"
2042 "/releases/download/V" version
2043 "/cd-hit-v" version
2044 "-2017-0621-source.tar.gz"))
2045 (sha256
2046 (base32
2047 "1b4mwm2520ixjbw57sil20f9iixzw4bkdqqwgg1fc3pzm6rz4zmn"))))
2048 (build-system gnu-build-system)
2049 (arguments
2050 `(#:tests? #f ; there are no tests
2051 #:make-flags
2052 ;; Executables are copied directly to the PREFIX.
2053 (list (string-append "PREFIX=" (assoc-ref %outputs "out") "/bin")
2054 ;; Support longer sequences (e.g. Pacbio sequences)
2055 "MAX_SEQ=60000000")
2056 #:phases
2057 (modify-phases %standard-phases
2058 ;; No "configure" script
2059 (delete 'configure)
2060 ;; Remove sources of non-determinism
2061 (add-after 'unpack 'be-timeless
2062 (lambda _
2063 (substitute* "cdhit-utility.c++"
2064 ((" \\(built on \" __DATE__ \"\\)") ""))
2065 (substitute* "cdhit-common.c++"
2066 (("__DATE__") "\"0\"")
2067 (("\", %s, \" __TIME__ \"\\\\n\", date") ""))
2068 #t))
2069 ;; The "install" target does not create the target directory.
2070 (add-before 'install 'create-target-dir
2071 (lambda* (#:key outputs #:allow-other-keys)
2072 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
2073 #t)))))
2074 (inputs
2075 `(("perl" ,perl)))
2076 (home-page "http://weizhongli-lab.org/cd-hit/")
2077 (synopsis "Cluster and compare protein or nucleotide sequences")
2078 (description
2079 "CD-HIT is a program for clustering and comparing protein or nucleotide
2080 sequences. CD-HIT is designed to be fast and handle extremely large
2081 databases.")
2082 ;; The manual says: "It can be copied under the GNU General Public License
2083 ;; version 2 (GPLv2)."
2084 (license license:gpl2)))
2085
2086 (define-public clipper
2087 (package
2088 (name "clipper")
2089 (version "1.2.1")
2090 (source (origin
2091 (method git-fetch)
2092 (uri (git-reference
2093 (url "https://github.com/YeoLab/clipper.git")
2094 (commit version)))
2095 (file-name (git-file-name name version))
2096 (sha256
2097 (base32
2098 "0fja1rj84wp9vpj8rxpj3n8zqzcqq454m904yp9as1w4phccirjb"))
2099 (modules '((guix build utils)))
2100 (snippet
2101 '(begin
2102 ;; remove unnecessary setup dependency
2103 (substitute* "setup.py"
2104 (("setup_requires = .*") ""))
2105 #t))))
2106 (build-system python-build-system)
2107 (arguments
2108 `(#:python ,python-2 ; only Python 2 is supported
2109 #:phases
2110 (modify-phases %standard-phases
2111 ;; This is fixed in upstream commit
2112 ;; f6c2990198f906bf97730d95695b4bd5a6d01ddb.
2113 (add-after 'unpack 'fix-typo
2114 (lambda _
2115 (substitute* "clipper/src/readsToWiggle.pyx"
2116 (("^sc.*") ""))
2117 #t)))))
2118 (inputs
2119 `(("htseq" ,python2-htseq)
2120 ("python-pybedtools" ,python2-pybedtools)
2121 ("python-cython" ,python2-cython)
2122 ("python-scikit-learn" ,python2-scikit-learn)
2123 ("python-matplotlib" ,python2-matplotlib)
2124 ("python-pandas" ,python2-pandas)
2125 ("python-pysam" ,python2-pysam)
2126 ("python-numpy" ,python2-numpy)
2127 ("python-scipy" ,python2-scipy)))
2128 (native-inputs
2129 `(("python-mock" ,python2-mock) ; for tests
2130 ("python-nose" ,python2-nose) ; for tests
2131 ("python-pytz" ,python2-pytz))) ; for tests
2132 (home-page "https://github.com/YeoLab/clipper")
2133 (synopsis "CLIP peak enrichment recognition")
2134 (description
2135 "CLIPper is a tool to define peaks in CLIP-seq datasets.")
2136 (license license:gpl2)))
2137
2138 (define-public codingquarry
2139 (package
2140 (name "codingquarry")
2141 (version "2.0")
2142 (source (origin
2143 (method url-fetch)
2144 (uri (string-append
2145 "mirror://sourceforge/codingquarry/CodingQuarry_v"
2146 version ".tar.gz"))
2147 (sha256
2148 (base32
2149 "0115hkjflsnfzn36xppwf9h9avfxlavr43djqmshkkzbgjzsz60i"))))
2150 (build-system gnu-build-system)
2151 (arguments
2152 '(#:tests? #f ; no "check" target
2153 #:phases
2154 (modify-phases %standard-phases
2155 (delete 'configure)
2156 (replace 'install
2157 (lambda* (#:key outputs #:allow-other-keys)
2158 (let* ((out (assoc-ref outputs "out"))
2159 (bin (string-append out "/bin"))
2160 (doc (string-append out "/share/doc/codingquarry")))
2161 (install-file "INSTRUCTIONS.pdf" doc)
2162 (copy-recursively "QuarryFiles"
2163 (string-append out "/QuarryFiles"))
2164 (install-file "CodingQuarry" bin)
2165 (install-file "CufflinksGTF_to_CodingQuarryGFF3.py" bin))
2166 #t)))))
2167 (inputs `(("openmpi" ,openmpi)))
2168 (native-search-paths
2169 (list (search-path-specification
2170 (variable "QUARRY_PATH")
2171 (files '("QuarryFiles")))))
2172 (native-inputs `(("python" ,python-2))) ; Only Python 2 is supported
2173 (synopsis "Fungal gene predictor")
2174 (description "CodingQuarry is a highly accurate, self-training GHMM fungal
2175 gene predictor designed to work with assembled, aligned RNA-seq transcripts.")
2176 (home-page "https://sourceforge.net/projects/codingquarry/")
2177 (license license:gpl3+)))
2178
2179 (define-public couger
2180 (package
2181 (name "couger")
2182 (version "1.8.2")
2183 (source (origin
2184 (method url-fetch)
2185 (uri (string-append
2186 "http://couger.oit.duke.edu/static/assets/COUGER"
2187 version ".zip"))
2188 (sha256
2189 (base32
2190 "04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq"))))
2191 (build-system gnu-build-system)
2192 (arguments
2193 `(#:tests? #f
2194 #:phases
2195 (modify-phases %standard-phases
2196 (delete 'configure)
2197 (delete 'build)
2198 (replace
2199 'install
2200 (lambda* (#:key outputs #:allow-other-keys)
2201 (let* ((out (assoc-ref outputs "out"))
2202 (bin (string-append out "/bin")))
2203 (copy-recursively "src" (string-append out "/src"))
2204 (mkdir bin)
2205 ;; Add "src" directory to module lookup path.
2206 (substitute* "couger"
2207 (("from argparse")
2208 (string-append "import sys\nsys.path.append(\""
2209 out "\")\nfrom argparse")))
2210 (install-file "couger" bin))
2211 #t))
2212 (add-after
2213 'install 'wrap-program
2214 (lambda* (#:key inputs outputs #:allow-other-keys)
2215 ;; Make sure 'couger' runs with the correct PYTHONPATH.
2216 (let* ((out (assoc-ref outputs "out"))
2217 (path (getenv "PYTHONPATH")))
2218 (wrap-program (string-append out "/bin/couger")
2219 `("PYTHONPATH" ":" prefix (,path))))
2220 #t)))))
2221 (inputs
2222 `(("python" ,python-2)
2223 ("python2-pillow" ,python2-pillow)
2224 ("python2-numpy" ,python2-numpy)
2225 ("python2-scipy" ,python2-scipy)
2226 ("python2-matplotlib" ,python2-matplotlib)))
2227 (propagated-inputs
2228 `(("r-minimal" ,r-minimal)
2229 ("libsvm" ,libsvm)
2230 ("randomjungle" ,randomjungle)))
2231 (native-inputs
2232 `(("unzip" ,unzip)))
2233 (home-page "http://couger.oit.duke.edu")
2234 (synopsis "Identify co-factors in sets of genomic regions")
2235 (description
2236 "COUGER can be applied to any two sets of genomic regions bound by
2237 paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify
2238 putative co-factors that provide specificity to each TF. The framework
2239 determines the genomic targets uniquely-bound by each TF, and identifies a
2240 small set of co-factors that best explain the in vivo binding differences
2241 between the two TFs.
2242
2243 COUGER uses classification algorithms (support vector machines and random
2244 forests) with features that reflect the DNA binding specificities of putative
2245 co-factors. The features are generated either from high-throughput TF-DNA
2246 binding data (from protein binding microarray experiments), or from large
2247 collections of DNA motifs.")
2248 (license license:gpl3+)))
2249
2250 (define-public clustal-omega
2251 (package
2252 (name "clustal-omega")
2253 (version "1.2.4")
2254 (source (origin
2255 (method url-fetch)
2256 (uri (string-append "http://www.clustal.org/omega/clustal-omega-"
2257 version ".tar.gz"))
2258 (sha256
2259 (base32
2260 "1vm30mzncwdv881vrcwg11vzvrsmwy4wg80j5i0lcfk6dlld50w6"))))
2261 (build-system gnu-build-system)
2262 (inputs
2263 `(("argtable" ,argtable)))
2264 (home-page "http://www.clustal.org/omega/")
2265 (synopsis "Multiple sequence aligner for protein and DNA/RNA")
2266 (description
2267 "Clustal-Omega is a general purpose multiple sequence alignment (MSA)
2268 program for protein and DNA/RNA. It produces high quality MSAs and is capable
2269 of handling data-sets of hundreds of thousands of sequences in reasonable
2270 time.")
2271 (license license:gpl2+)))
2272
2273 (define-public crossmap
2274 (package
2275 (name "crossmap")
2276 (version "0.3.8")
2277 (source (origin
2278 (method url-fetch)
2279 (uri (pypi-uri "CrossMap" version))
2280 (sha256
2281 (base32
2282 "1sb2f2qbxya4fzw3yjl09vbrs8vfmw22zrygrvz004sf9gb1vkan"))))
2283 (build-system python-build-system)
2284 (inputs
2285 `(("python-bx-python" ,python-bx-python)
2286 ("python-numpy" ,python-numpy)
2287 ("python-pybigwig" ,python-pybigwig)
2288 ("python-pysam" ,python-pysam)
2289 ("zlib" ,zlib)))
2290 (native-inputs
2291 `(("python-cython" ,python-cython)
2292 ("python-nose" ,python-nose)))
2293 (home-page "http://crossmap.sourceforge.net/")
2294 (synopsis "Convert genome coordinates between assemblies")
2295 (description
2296 "CrossMap is a program for conversion of genome coordinates or annotation
2297 files between different genome assemblies. It supports most commonly used
2298 file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
2299 (license license:gpl2+)))
2300
2301 (define-public python-dnaio
2302 (package
2303 (name "python-dnaio")
2304 (version "0.3")
2305 (source
2306 (origin
2307 (method url-fetch)
2308 (uri (pypi-uri "dnaio" version))
2309 (sha256
2310 (base32
2311 "0f16m7hdlm0fz1n7y5asy0v9ghyrq17ni1p9iybq22ddzyd49r27"))))
2312 (build-system python-build-system)
2313 (native-inputs
2314 `(("python-cython" ,python-cython)
2315 ("python-pytest" ,python-pytest)
2316 ("python-xopen" ,python-xopen)))
2317 (home-page "https://github.com/marcelm/dnaio/")
2318 (synopsis "Read FASTA and FASTQ files efficiently")
2319 (description
2320 "dnaio is a Python library for fast parsing of FASTQ and also FASTA
2321 files. The code was previously part of the cutadapt tool.")
2322 (license license:expat)))
2323
2324 (define-public cutadapt
2325 (package
2326 (name "cutadapt")
2327 (version "2.1")
2328 (source (origin
2329 (method url-fetch)
2330 (uri (pypi-uri "cutadapt" version))
2331 (sha256
2332 (base32
2333 "1vqmsfkm6llxzmsz9wcfcvzx9a9f8iabvwik2rbyn7nc4wm25z89"))))
2334 (build-system python-build-system)
2335 (inputs
2336 `(("python-dnaio" ,python-dnaio)
2337 ("python-xopen" ,python-xopen)))
2338 (native-inputs
2339 `(("python-cython" ,python-cython)
2340 ("python-pytest" ,python-pytest)
2341 ("python-setuptools-scm" ,python-setuptools-scm)))
2342 (home-page "https://cutadapt.readthedocs.io/en/stable/")
2343 (synopsis "Remove adapter sequences from nucleotide sequencing reads")
2344 (description
2345 "Cutadapt finds and removes adapter sequences, primers, poly-A tails and
2346 other types of unwanted sequence from high-throughput sequencing reads.")
2347 (license license:expat)))
2348
2349 (define-public libbigwig
2350 (package
2351 (name "libbigwig")
2352 (version "0.4.4")
2353 (source (origin
2354 (method git-fetch)
2355 (uri (git-reference
2356 (url "https://github.com/dpryan79/libBigWig.git")
2357 (commit version)))
2358 (file-name (git-file-name name version))
2359 (sha256
2360 (base32
2361 "09693dmf1scdac5pyq6qyn8b4mcipvnmc370k9a5z41z81m3dcsj"))))
2362 (build-system gnu-build-system)
2363 (arguments
2364 `(#:test-target "test"
2365 #:tests? #f ; tests require access to the web
2366 #:make-flags
2367 (list "CC=gcc"
2368 (string-append "prefix=" (assoc-ref %outputs "out")))
2369 #:phases
2370 (modify-phases %standard-phases
2371 (delete 'configure))))
2372 (inputs
2373 `(("zlib" ,zlib)
2374 ("curl" ,curl)))
2375 (native-inputs
2376 `(("doxygen" ,doxygen)
2377 ;; Need for tests
2378 ("python" ,python-2)))
2379 (home-page "https://github.com/dpryan79/libBigWig")
2380 (synopsis "C library for handling bigWig files")
2381 (description
2382 "This package provides a C library for parsing local and remote BigWig
2383 files.")
2384 (license license:expat)))
2385
2386 (define-public python-pybigwig
2387 (package
2388 (name "python-pybigwig")
2389 (version "0.3.12")
2390 (source (origin
2391 (method url-fetch)
2392 (uri (pypi-uri "pyBigWig" version))
2393 (sha256
2394 (base32
2395 "00w4kfnm2c5l7wdwr2nj1z5djv8kzgf7h1zhsgv6njff1rwr26g0"))
2396 (modules '((guix build utils)))
2397 (snippet
2398 '(begin
2399 ;; Delete bundled libBigWig sources
2400 (delete-file-recursively "libBigWig")
2401 #t))))
2402 (build-system python-build-system)
2403 (arguments
2404 `(#:phases
2405 (modify-phases %standard-phases
2406 (add-after 'unpack 'link-with-libBigWig
2407 (lambda* (#:key inputs #:allow-other-keys)
2408 (substitute* "setup.py"
2409 (("libs=\\[") "libs=[\"BigWig\", "))
2410 #t)))))
2411 (propagated-inputs
2412 `(("python-numpy" ,python-numpy)))
2413 (inputs
2414 `(("libbigwig" ,libbigwig)
2415 ("zlib" ,zlib)
2416 ("curl" ,curl)))
2417 (home-page "https://github.com/dpryan79/pyBigWig")
2418 (synopsis "Access bigWig files in Python using libBigWig")
2419 (description
2420 "This package provides Python bindings to the libBigWig library for
2421 accessing bigWig files.")
2422 (license license:expat)))
2423
2424 (define-public python2-pybigwig
2425 (package-with-python2 python-pybigwig))
2426
2427 (define-public python-dendropy
2428 (package
2429 (name "python-dendropy")
2430 (version "4.4.0")
2431 (source
2432 (origin
2433 (method git-fetch)
2434 ;; Source from GitHub so that tests are included.
2435 (uri (git-reference
2436 (url "https://github.com/jeetsukumaran/DendroPy.git")
2437 (commit (string-append "v" version))))
2438 (file-name (git-file-name name version))
2439 (sha256
2440 (base32
2441 "097hfyv2kaf4x92i4rjx0paw2cncxap48qivv8zxng4z7nhid0x9"))))
2442 (build-system python-build-system)
2443 (home-page "https://dendropy.org/")
2444 (synopsis "Library for phylogenetics and phylogenetic computing")
2445 (description
2446 "DendroPy is a library for phylogenetics and phylogenetic computing: reading,
2447 writing, simulation, processing and manipulation of phylogenetic
2448 trees (phylogenies) and characters.")
2449 (license license:bsd-3)))
2450
2451 (define-public python2-dendropy
2452 (let ((base (package-with-python2 python-dendropy)))
2453 (package
2454 (inherit base)
2455 (arguments
2456 `(#:phases
2457 (modify-phases %standard-phases
2458 (add-after 'unpack 'remove-failing-test
2459 (lambda _
2460 ;; This test fails when the full test suite is run, as documented
2461 ;; at https://github.com/jeetsukumaran/DendroPy/issues/74
2462 (substitute* "tests/test_dataio_nexml_reader_tree_list.py"
2463 (("test_collection_comments_and_annotations")
2464 "do_not_test_collection_comments_and_annotations"))
2465 #t)))
2466 ,@(package-arguments base))))))
2467
2468 (define-public python-py2bit
2469 (package
2470 (name "python-py2bit")
2471 (version "0.3.0")
2472 (source
2473 (origin
2474 (method url-fetch)
2475 (uri (pypi-uri "py2bit" version))
2476 (sha256
2477 (base32
2478 "1vw2nvw1yrl7ikkqsqs1pg239yr5nspvd969r1x9arms1k25a1a5"))))
2479 (build-system python-build-system)
2480 (home-page "https://github.com/dpryan79/py2bit")
2481 (synopsis "Access 2bit files using lib2bit")
2482 (description
2483 "This package provides Python bindings for lib2bit to access 2bit files
2484 with Python.")
2485 (license license:expat)))
2486
2487 (define-public deeptools
2488 (package
2489 (name "deeptools")
2490 (version "3.1.3")
2491 (source (origin
2492 (method git-fetch)
2493 (uri (git-reference
2494 (url "https://github.com/deeptools/deepTools.git")
2495 (commit version)))
2496 (file-name (git-file-name name version))
2497 (sha256
2498 (base32
2499 "1vggnf52g6q2vifdl4cyi7s2fnfqq0ky2zrkj5zv2qfzsc3p3siw"))))
2500 (build-system python-build-system)
2501 (arguments
2502 `(#:phases
2503 (modify-phases %standard-phases
2504 ;; This phase fails, but it's not needed.
2505 (delete 'reset-gzip-timestamps))))
2506 (inputs
2507 `(("python-plotly" ,python-plotly)
2508 ("python-scipy" ,python-scipy)
2509 ("python-numpy" ,python-numpy)
2510 ("python-numpydoc" ,python-numpydoc)
2511 ("python-matplotlib" ,python-matplotlib)
2512 ("python-pysam" ,python-pysam)
2513 ("python-py2bit" ,python-py2bit)
2514 ("python-pybigwig" ,python-pybigwig)))
2515 (native-inputs
2516 `(("python-mock" ,python-mock) ;for tests
2517 ("python-nose" ,python-nose) ;for tests
2518 ("python-pytz" ,python-pytz))) ;for tests
2519 (home-page "https://github.com/deeptools/deepTools")
2520 (synopsis "Tools for normalizing and visualizing deep-sequencing data")
2521 (description
2522 "DeepTools addresses the challenge of handling the large amounts of data
2523 that are now routinely generated from DNA sequencing centers. To do so,
2524 deepTools contains useful modules to process the mapped reads data to create
2525 coverage files in standard bedGraph and bigWig file formats. By doing so,
2526 deepTools allows the creation of normalized coverage files or the comparison
2527 between two files (for example, treatment and control). Finally, using such
2528 normalized and standardized files, multiple visualizations can be created to
2529 identify enrichments with functional annotations of the genome.")
2530 (license license:gpl3+)))
2531
2532 (define-public delly
2533 (package
2534 (name "delly")
2535 (version "0.7.9")
2536 (source (origin
2537 (method git-fetch)
2538 (uri (git-reference
2539 (url "https://github.com/dellytools/delly.git")
2540 (commit (string-append "v" version))))
2541 (file-name (git-file-name name version))
2542 (sha256
2543 (base32 "034jqsxswy9gqdh2zkgc1js99qkv75ks4xvzgmh0284sraagv61z"))
2544 (modules '((guix build utils)))
2545 (snippet
2546 '(begin
2547 (delete-file-recursively "src/htslib")
2548 #t))))
2549 (build-system gnu-build-system)
2550 (arguments
2551 `(#:tests? #f ; There are no tests to run.
2552 #:make-flags
2553 (list "PARALLEL=1" ; Allow parallel execution at run-time.
2554 (string-append "prefix=" (assoc-ref %outputs "out")))
2555 #:phases
2556 (modify-phases %standard-phases
2557 (delete 'configure) ; There is no configure phase.
2558 (add-after 'install 'install-templates
2559 (lambda* (#:key outputs #:allow-other-keys)
2560 (let ((templates (string-append (assoc-ref outputs "out")
2561 "/share/delly/templates")))
2562 (mkdir-p templates)
2563 (copy-recursively "excludeTemplates" templates)
2564 #t))))))
2565 (inputs
2566 `(("boost" ,boost)
2567 ("htslib" ,htslib)
2568 ("zlib" ,zlib)
2569 ("bzip2" ,bzip2)))
2570 (home-page "https://github.com/dellytools/delly")
2571 (synopsis "Integrated structural variant prediction method")
2572 (description "Delly is an integrated structural variant prediction method
2573 that can discover and genotype deletions, tandem duplications, inversions and
2574 translocations at single-nucleotide resolution in short-read massively parallel
2575 sequencing data. It uses paired-ends and split-reads to sensitively and
2576 accurately delineate genomic rearrangements throughout the genome.")
2577 (license license:gpl3+)))
2578
2579 (define-public diamond
2580 (package
2581 (name "diamond")
2582 (version "0.9.30")
2583 (source (origin
2584 (method git-fetch)
2585 (uri (git-reference
2586 (url "https://github.com/bbuchfink/diamond.git")
2587 (commit (string-append "v" version))))
2588 (file-name (git-file-name name version))
2589 (sha256
2590 (base32
2591 "0k6f3kb6cniw11xw6763kkbs1sl0yack7xsy7q5fl5v170ssphq4"))))
2592 (build-system cmake-build-system)
2593 (arguments
2594 '(#:tests? #f ; no "check" target
2595 #:phases
2596 (modify-phases %standard-phases
2597 (add-after 'unpack 'remove-native-compilation
2598 (lambda _
2599 (substitute* "CMakeLists.txt" (("-march=native") ""))
2600 #t)))))
2601 (inputs
2602 `(("zlib" ,zlib)))
2603 (home-page "https://github.com/bbuchfink/diamond")
2604 (synopsis "Accelerated BLAST compatible local sequence aligner")
2605 (description
2606 "DIAMOND is a BLAST-compatible local aligner for mapping protein and
2607 translated DNA query sequences against a protein reference database (BLASTP
2608 and BLASTX alignment mode). The speedup over BLAST is up to 20,000 on short
2609 reads at a typical sensitivity of 90-99% relative to BLAST depending on the
2610 data and settings.")
2611 (license license:agpl3+)))
2612
2613 (define-public discrover
2614 (package
2615 (name "discrover")
2616 (version "1.6.0")
2617 (source
2618 (origin
2619 (method git-fetch)
2620 (uri (git-reference
2621 (url "https://github.com/maaskola/discrover.git")
2622 (commit version)))
2623 (file-name (git-file-name name version))
2624 (sha256
2625 (base32
2626 "173fwi2vb6a5kp406hm3jj6j7v4whww796f2qcygp4rpvamh307y"))))
2627 (build-system cmake-build-system)
2628 (arguments
2629 `(#:tests? #f ; there are no tests
2630 #:phases
2631 (modify-phases %standard-phases
2632 (add-after 'unpack 'fix-latex-errors
2633 (lambda _
2634 (with-fluids ((%default-port-encoding #f))
2635 (substitute* "doc/references.bib"
2636 (("\\{S\\}illanp[^,]+,")
2637 "{S}illanp{\\\"a}{\\\"a},")))
2638 ;; XXX: I just can't get pdflatex to not complain about these
2639 ;; characters. They end up in the manual via the generated
2640 ;; discrover-cli-help.txt.
2641 (substitute* "src/hmm/cli.cpp"
2642 (("µ") "mu")
2643 (("η") "eta")
2644 (("≤") "<="))
2645 ;; This seems to be a syntax error.
2646 (substitute* "doc/discrover-manual.tex"
2647 (("theverbbox\\[t\\]") "theverbbox"))
2648 #t))
2649 (add-after 'unpack 'add-missing-includes
2650 (lambda _
2651 (substitute* "src/executioninformation.hpp"
2652 (("#define EXECUTIONINFORMATION_HPP" line)
2653 (string-append line "\n#include <random>")))
2654 (substitute* "src/plasma/fasta.hpp"
2655 (("#define FASTA_HPP" line)
2656 (string-append line "\n#include <random>")))
2657 #t))
2658 ;; FIXME: this is needed because we're using texlive-union, which
2659 ;; doesn't handle fonts correctly. It expects to be able to generate
2660 ;; fonts in the home directory.
2661 (add-before 'build 'setenv-HOME
2662 (lambda _ (setenv "HOME" "/tmp") #t)))))
2663 (inputs
2664 `(("boost" ,boost)
2665 ("cairo" ,cairo)
2666 ("rmath-standalone" ,rmath-standalone)))
2667 (native-inputs
2668 `(("texlive" ,(texlive-union (list texlive-fonts-cm
2669 texlive-fonts-amsfonts
2670
2671 texlive-latex-doi
2672 texlive-latex-examplep
2673 texlive-latex-hyperref
2674 texlive-latex-ms
2675 texlive-latex-natbib
2676 texlive-bibtex ; style files used by natbib
2677 texlive-latex-pgf ; tikz
2678 texlive-latex-verbatimbox)))
2679 ("imagemagick" ,imagemagick)))
2680 (home-page "https://dorina.mdc-berlin.de/public/rajewsky/discrover/")
2681 (synopsis "Discover discriminative nucleotide sequence motifs")
2682 (description "Discrover is a motif discovery method to find binding sites
2683 of nucleic acid binding proteins.")
2684 (license license:gpl3+)))
2685
2686 (define-public eigensoft
2687 (package
2688 (name "eigensoft")
2689 (version "7.2.1")
2690 (source
2691 (origin
2692 (method git-fetch)
2693 (uri (git-reference
2694 (url "https://github.com/DReichLab/EIG.git")
2695 (commit (string-append "v" version))))
2696 (file-name (git-file-name name version))
2697 (sha256
2698 (base32
2699 "1c141fqvhnzibmnf22sv23vbmzm20kjjyrib44cfh75wyndp2d9k"))
2700 (modules '((guix build utils)))
2701 ;; Remove pre-built binaries.
2702 (snippet '(begin
2703 (delete-file-recursively "bin")
2704 (mkdir "bin")
2705 #t))))
2706 (build-system gnu-build-system)
2707 (arguments
2708 `(#:tests? #f ; There are no tests.
2709 #:make-flags '("CC=gcc")
2710 #:phases
2711 (modify-phases %standard-phases
2712 ;; There is no configure phase, but the Makefile is in a
2713 ;; sub-directory.
2714 (replace 'configure
2715 (lambda _ (chdir "src") #t))
2716 ;; The provided install target only copies executables to
2717 ;; the "bin" directory in the build root.
2718 (add-after 'install 'actually-install
2719 (lambda* (#:key outputs #:allow-other-keys)
2720 (let* ((out (assoc-ref outputs "out"))
2721 (bin (string-append out "/bin")))
2722 (for-each (lambda (file)
2723 (install-file file bin))
2724 (find-files "../bin" ".*"))
2725 #t))))))
2726 (inputs
2727 `(("gsl" ,gsl)
2728 ("lapack" ,lapack)
2729 ("openblas" ,openblas)
2730 ("perl" ,perl)
2731 ("gfortran" ,gfortran "lib")))
2732 (home-page "https://github.com/DReichLab/EIG")
2733 (synopsis "Tools for population genetics")
2734 (description "The EIGENSOFT package provides tools for population
2735 genetics and stratification correction. EIGENSOFT implements methods commonly
2736 used in population genetics analyses such as PCA, computation of Tracy-Widom
2737 statistics, and finding related individuals in structured populations. It
2738 comes with a built-in plotting script and supports multiple file formats and
2739 quantitative phenotypes.")
2740 ;; The license of the eigensoft tools is Expat, but since it's
2741 ;; linking with the GNU Scientific Library (GSL) the effective
2742 ;; license is the GPL.
2743 (license license:gpl3+)))
2744
2745 (define-public edirect
2746 (package
2747 (name "edirect")
2748 (version "12.1.20190829")
2749 (source (origin
2750 (method url-fetch)
2751 (uri (string-append "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect"
2752 "/versions/" version
2753 "/edirect-" version ".tar.gz"))
2754 (sha256
2755 (base32
2756 "1xb330z28dgp7slrvp8r7rgncsasv9lpcpqim571yg728dq7xdik"))))
2757 (build-system perl-build-system)
2758 (arguments
2759 `(#:phases
2760 (modify-phases %standard-phases
2761 (delete 'configure)
2762 (delete 'build)
2763 (delete 'check) ; simple check after install
2764 (replace 'install
2765 (lambda* (#:key outputs #:allow-other-keys)
2766 (install-file "edirect.pl"
2767 (string-append (assoc-ref outputs "out") "/bin"))
2768 #t))
2769 (add-after 'install 'wrap-program
2770 (lambda* (#:key outputs #:allow-other-keys)
2771 ;; Make sure 'edirect.pl' finds all perl inputs at runtime.
2772 (let* ((out (assoc-ref outputs "out"))
2773 (path (getenv "PERL5LIB")))
2774 (wrap-program (string-append out "/bin/edirect.pl")
2775 `("PERL5LIB" ":" prefix (,path))))
2776 #t))
2777 (add-after 'wrap-program 'check
2778 (lambda* (#:key outputs #:allow-other-keys)
2779 (invoke (string-append (assoc-ref outputs "out")
2780 "/bin/edirect.pl")
2781 "-filter" "-help")
2782 #t)))))
2783 (inputs
2784 `(("perl-html-parser" ,perl-html-parser)
2785 ("perl-encode-locale" ,perl-encode-locale)
2786 ("perl-file-listing" ,perl-file-listing)
2787 ("perl-html-tagset" ,perl-html-tagset)
2788 ("perl-html-tree" ,perl-html-tree)
2789 ("perl-http-cookies" ,perl-http-cookies)
2790 ("perl-http-date" ,perl-http-date)
2791 ("perl-http-message" ,perl-http-message)
2792 ("perl-http-negotiate" ,perl-http-negotiate)
2793 ("perl-lwp-mediatypes" ,perl-lwp-mediatypes)
2794 ("perl-lwp-protocol-https" ,perl-lwp-protocol-https)
2795 ("perl-net-http" ,perl-net-http)
2796 ("perl-uri" ,perl-uri)
2797 ("perl-www-robotrules" ,perl-www-robotrules)
2798 ("perl-xml-simple" ,perl-xml-simple)
2799 ("perl" ,perl)))
2800 (home-page "https://www.ncbi.nlm.nih.gov/books/NBK179288/")
2801 (synopsis "Tools for accessing the NCBI's set of databases")
2802 (description
2803 "Entrez Direct (EDirect) is a method for accessing the National Center
2804 for Biotechnology Information's (NCBI) set of interconnected
2805 databases (publication, sequence, structure, gene, variation, expression,
2806 etc.) from a terminal. Functions take search terms from command-line
2807 arguments. Individual operations are combined to build multi-step queries.
2808 Record retrieval and formatting normally complete the process.
2809
2810 EDirect also provides an argument-driven function that simplifies the
2811 extraction of data from document summaries or other results that are returned
2812 in structured XML format. This can eliminate the need for writing custom
2813 software to answer ad hoc questions.")
2814 (license license:public-domain)))
2815
2816 (define-public exonerate
2817 (package
2818 (name "exonerate")
2819 (version "2.4.0")
2820 (source
2821 (origin
2822 (method url-fetch)
2823 (uri
2824 (string-append
2825 "http://ftp.ebi.ac.uk/pub/software/vertebrategenomics/exonerate/"
2826 "exonerate-" version ".tar.gz"))
2827 (sha256
2828 (base32
2829 "0hj0m9xygiqsdxvbg79wq579kbrx1mdrabi2bzqz2zn9qwfjcjgq"))))
2830 (build-system gnu-build-system)
2831 (arguments
2832 `(#:parallel-build? #f)) ; Building in parallel fails on some machines.
2833 (native-inputs
2834 `(("pkg-config" ,pkg-config)))
2835 (inputs
2836 `(("glib" ,glib)))
2837 (home-page
2838 "https://www.ebi.ac.uk/about/vertebrate-genomics/software/exonerate")
2839 (synopsis "Generic tool for biological sequence alignment")
2840 (description
2841 "Exonerate is a generic tool for pairwise sequence comparison. It allows
2842 the alignment of sequences using a many alignment models, either exhaustive
2843 dynamic programming or a variety of heuristics.")
2844 (license license:gpl3)))
2845
2846 (define-public express
2847 (package
2848 (name "express")
2849 (version "1.5.1")
2850 (source (origin
2851 (method url-fetch)
2852 (uri
2853 (string-append
2854 "http://bio.math.berkeley.edu/eXpress/downloads/express-"
2855 version "/express-" version "-src.tgz"))
2856 (sha256
2857 (base32
2858 "03rczxd0gjp2l1jxcmjfmf5j94j77zqyxa6x063zsc585nj40n0c"))))
2859 (build-system cmake-build-system)
2860 (arguments
2861 `(#:tests? #f ;no "check" target
2862 #:phases
2863 (modify-phases %standard-phases
2864 (add-after 'unpack 'use-shared-boost-libs-and-set-bamtools-paths
2865 (lambda* (#:key inputs #:allow-other-keys)
2866 (substitute* "CMakeLists.txt"
2867 (("set\\(Boost_USE_STATIC_LIBS ON\\)")
2868 "set(Boost_USE_STATIC_LIBS OFF)")
2869 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
2870 (string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
2871 (substitute* "src/CMakeLists.txt"
2872 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
2873 (string-append (assoc-ref inputs "bamtools") "/lib"))
2874 (("libprotobuf.a") "libprotobuf.so"))
2875 #t)))))
2876 (inputs
2877 `(("boost" ,boost)
2878 ("bamtools" ,bamtools)
2879 ("protobuf" ,protobuf)
2880 ("zlib" ,zlib)))
2881 (home-page "http://bio.math.berkeley.edu/eXpress")
2882 (synopsis "Streaming quantification for high-throughput genomic sequencing")
2883 (description
2884 "eXpress is a streaming tool for quantifying the abundances of a set of
2885 target sequences from sampled subsequences. Example applications include
2886 transcript-level RNA-Seq quantification, allele-specific/haplotype expression
2887 analysis (from RNA-Seq), transcription factor binding quantification in
2888 ChIP-Seq, and analysis of metagenomic data.")
2889 (license license:artistic2.0)))
2890
2891 (define-public express-beta-diversity
2892 (package
2893 (name "express-beta-diversity")
2894 (version "1.0.8")
2895 (source (origin
2896 (method git-fetch)
2897 (uri (git-reference
2898 (url "https://github.com/dparks1134/ExpressBetaDiversity.git")
2899 (commit (string-append "v" version))))
2900 (file-name (git-file-name name version))
2901 (sha256
2902 (base32
2903 "0s0yzg5c21349rh7x4w9266jsvnp7j1hp9cf8sk32hz8nvrj745x"))))
2904 (build-system gnu-build-system)
2905 (arguments
2906 `(#:phases
2907 (modify-phases %standard-phases
2908 (delete 'configure)
2909 (add-before 'build 'enter-source (lambda _ (chdir "source") #t))
2910 (replace 'check
2911 (lambda _ (invoke "../bin/ExpressBetaDiversity" "-u") #t))
2912 (replace 'install
2913 (lambda* (#:key outputs #:allow-other-keys)
2914 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
2915 (install-file "../scripts/convertToEBD.py" bin)
2916 (install-file "../bin/ExpressBetaDiversity" bin)
2917 #t))))))
2918 (inputs
2919 `(("python" ,python-2)))
2920 (home-page "https://github.com/dparks1134/ExpressBetaDiversity")
2921 (synopsis "Taxon- and phylogenetic-based beta diversity measures")
2922 (description
2923 "Express Beta Diversity (EBD) calculates ecological beta diversity
2924 (dissimilarity) measures between biological communities. EBD implements a
2925 variety of diversity measures including those that make use of phylogenetic
2926 similarity of community members.")
2927 (license license:gpl3+)))
2928
2929 (define-public fasttree
2930 (package
2931 (name "fasttree")
2932 (version "2.1.10")
2933 (source (origin
2934 (method url-fetch)
2935 (uri (string-append
2936 "http://www.microbesonline.org/fasttree/FastTree-"
2937 version ".c"))
2938 (sha256
2939 (base32
2940 "0vcjdvy1j4m702vmak4svbfkrpcw63k7wymfksjp9a982zy8kjsl"))))
2941 (build-system gnu-build-system)
2942 (arguments
2943 `(#:tests? #f ; no "check" target
2944 #:phases
2945 (modify-phases %standard-phases
2946 (delete 'unpack)
2947 (delete 'configure)
2948 (replace 'build
2949 (lambda* (#:key source #:allow-other-keys)
2950 (invoke "gcc"
2951 "-O3"
2952 "-finline-functions"
2953 "-funroll-loops"
2954 "-Wall"
2955 "-o"
2956 "FastTree"
2957 source
2958 "-lm")
2959 (invoke "gcc"
2960 "-DOPENMP"
2961 "-fopenmp"
2962 "-O3"
2963 "-finline-functions"
2964 "-funroll-loops"
2965 "-Wall"
2966 "-o"
2967 "FastTreeMP"
2968 source
2969 "-lm")
2970 #t))
2971 (replace 'install
2972 (lambda* (#:key outputs #:allow-other-keys)
2973 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
2974 (install-file "FastTree" bin)
2975 (install-file "FastTreeMP" bin)
2976 #t))))))
2977 (home-page "http://www.microbesonline.org/fasttree")
2978 (synopsis "Infers approximately-maximum-likelihood phylogenetic trees")
2979 (description
2980 "FastTree can handle alignments with up to a million of sequences in a
2981 reasonable amount of time and memory. For large alignments, FastTree is
2982 100-1,000 times faster than PhyML 3.0 or RAxML 7.")
2983 (license license:gpl2+)))
2984
2985 (define-public fastx-toolkit
2986 (package
2987 (name "fastx-toolkit")
2988 (version "0.0.14")
2989 (source (origin
2990 (method url-fetch)
2991 (uri
2992 (string-append
2993 "https://github.com/agordon/fastx_toolkit/releases/download/"
2994 version "/fastx_toolkit-" version ".tar.bz2"))
2995 (sha256
2996 (base32
2997 "01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy"))))
2998 (build-system gnu-build-system)
2999 (inputs
3000 `(("libgtextutils" ,libgtextutils)))
3001 (native-inputs
3002 `(("gcc" ,gcc-6) ;; doesn't build with later versions
3003 ("pkg-config" ,pkg-config)))
3004 (home-page "http://hannonlab.cshl.edu/fastx_toolkit/")
3005 (synopsis "Tools for FASTA/FASTQ file preprocessing")
3006 (description
3007 "The FASTX-Toolkit is a collection of command line tools for Short-Reads
3008 FASTA/FASTQ files preprocessing.
3009
3010 Next-Generation sequencing machines usually produce FASTA or FASTQ files,
3011 containing multiple short-reads sequences. The main processing of such
3012 FASTA/FASTQ files is mapping the sequences to reference genomes. However, it
3013 is sometimes more productive to preprocess the files before mapping the
3014 sequences to the genome---manipulating the sequences to produce better mapping
3015 results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
3016 (license license:agpl3+)))
3017
3018 (define-public flexbar
3019 (package
3020 (name "flexbar")
3021 (version "3.4.0")
3022 (source (origin
3023 (method git-fetch)
3024 (uri (git-reference
3025 (url "https://github.com/seqan/flexbar.git")
3026 (commit (string-append "v" version))))
3027 (file-name (git-file-name name version))
3028 (sha256
3029 (base32
3030 "1pq9sxvdnldl14libk234m72dqhwgzs3acgl943wchwdqlcsi5r2"))))
3031 (build-system cmake-build-system)
3032 (arguments
3033 `(#:phases
3034 (modify-phases %standard-phases
3035 (add-after 'unpack 'do-not-tune-to-CPU
3036 (lambda _
3037 (substitute* "src/CMakeLists.txt"
3038 ((" -march=native") ""))
3039 #t))
3040 (replace 'check
3041 (lambda* (#:key outputs #:allow-other-keys)
3042 (setenv "PATH" (string-append (getcwd) ":" (getenv "PATH")))
3043 (with-directory-excursion "../source/test"
3044 (invoke "bash" "flexbar_test.sh"))
3045 #t))
3046 (replace 'install
3047 (lambda* (#:key outputs #:allow-other-keys)
3048 (let* ((out (string-append (assoc-ref outputs "out")))
3049 (bin (string-append out "/bin/")))
3050 (install-file "flexbar" bin))
3051 #t)))))
3052 (inputs
3053 `(("tbb" ,tbb)
3054 ("zlib" ,zlib)))
3055 (native-inputs
3056 `(("pkg-config" ,pkg-config)
3057 ("seqan" ,seqan)))
3058 (home-page "https://github.com/seqan/flexbar")
3059 (synopsis "Barcode and adapter removal tool for sequencing platforms")
3060 (description
3061 "Flexbar preprocesses high-throughput nucleotide sequencing data
3062 efficiently. It demultiplexes barcoded runs and removes adapter sequences.
3063 Moreover, trimming and filtering features are provided. Flexbar increases
3064 read mapping rates and improves genome and transcriptome assemblies. It
3065 supports next-generation sequencing data in fasta/q and csfasta/q format from
3066 Illumina, Roche 454, and the SOLiD platform.")
3067 (license license:bsd-3)))
3068
3069 (define-public fraggenescan
3070 (package
3071 (name "fraggenescan")
3072 (version "1.30")
3073 (source
3074 (origin
3075 (method url-fetch)
3076 (uri
3077 (string-append "mirror://sourceforge/fraggenescan/"
3078 "FragGeneScan" version ".tar.gz"))
3079 (sha256
3080 (base32 "158dcnwczgcyhwm4qlx19sanrwgdpzf6bn2y57mbpx55lkgz1mzj"))))
3081 (build-system gnu-build-system)
3082 (arguments
3083 `(#:phases
3084 (modify-phases %standard-phases
3085 (delete 'configure)
3086 (add-before 'build 'patch-paths
3087 (lambda* (#:key outputs #:allow-other-keys)
3088 (let* ((out (string-append (assoc-ref outputs "out")))
3089 (share (string-append out "/share/fraggenescan/")))
3090 (substitute* "run_FragGeneScan.pl"
3091 (("system\\(\"rm")
3092 (string-append "system(\"" (which "rm")))
3093 (("system\\(\"mv")
3094 (string-append "system(\"" (which "mv")))
3095 (("\\\"awk") (string-append "\"" (which "awk")))
3096 ;; This script and other programs expect the training files
3097 ;; to be in the non-standard location bin/train/XXX. Change
3098 ;; this to be share/fraggenescan/train/XXX instead.
3099 (("^\\$train.file = \\$dir.*")
3100 (string-append "$train_file = \""
3101 share
3102 "train/\".$FGS_train_file;")))
3103 (substitute* "run_hmm.c"
3104 (("^ strcat\\(train_dir, \\\"train/\\\"\\);")
3105 (string-append " strcpy(train_dir, \"" share "/train/\");"))))
3106 #t))
3107 (replace 'build
3108 (lambda _
3109 (invoke "make" "clean")
3110 (invoke "make" "fgs")
3111 #t))
3112 (replace 'install
3113 (lambda* (#:key outputs #:allow-other-keys)
3114 (let* ((out (string-append (assoc-ref outputs "out")))
3115 (bin (string-append out "/bin/"))
3116 (share (string-append out "/share/fraggenescan/train")))
3117 (install-file "run_FragGeneScan.pl" bin)
3118 (install-file "FragGeneScan" bin)
3119 (copy-recursively "train" share))
3120 #t))
3121 (delete 'check)
3122 (add-after 'install 'post-install-check
3123 ;; In lieu of 'make check', run one of the examples and check the
3124 ;; output files gets created.
3125 (lambda* (#:key outputs #:allow-other-keys)
3126 (let* ((out (string-append (assoc-ref outputs "out")))
3127 (bin (string-append out "/bin/"))
3128 (frag (string-append bin "run_FragGeneScan.pl")))
3129 ;; Test complete genome.
3130 (invoke frag
3131 "-genome=./example/NC_000913.fna"
3132 "-out=./test2"
3133 "-complete=1"
3134 "-train=complete")
3135 (unless (and (file-exists? "test2.faa")
3136 (file-exists? "test2.ffn")
3137 (file-exists? "test2.gff")
3138 (file-exists? "test2.out"))
3139 (error "Expected files do not exist."))
3140 ;; Test incomplete sequences.
3141 (invoke frag
3142 "-genome=./example/NC_000913-fgs.ffn"
3143 "-out=out"
3144 "-complete=0"
3145 "-train=454_30")
3146 #t))))))
3147 (inputs
3148 `(("perl" ,perl)
3149 ("python" ,python-2))) ;not compatible with python 3.
3150 (home-page "https://sourceforge.net/projects/fraggenescan/")
3151 (synopsis "Finds potentially fragmented genes in short reads")
3152 (description
3153 "FragGeneScan is a program for predicting bacterial and archaeal genes in
3154 short and error-prone DNA sequencing reads. It can also be applied to predict
3155 genes in incomplete assemblies or complete genomes.")
3156 ;; GPL3+ according to private correspondense with the authors.
3157 (license license:gpl3+)))
3158
3159 (define-public fxtract
3160 (let ((util-commit "776ca85a18a47492af3794745efcb4a905113115"))
3161 (package
3162 (name "fxtract")
3163 (version "2.3")
3164 (source
3165 (origin
3166 (method git-fetch)
3167 (uri (git-reference
3168 (url "https://github.com/ctSkennerton/fxtract.git")
3169 (commit version)))
3170 (file-name (git-file-name name version))
3171 (sha256
3172 (base32
3173 "0hab3gpwf4w9s87qlbswq6ws1qqybh4dcqk79q1ahyldzai5fgp5"))))
3174 (build-system gnu-build-system)
3175 (arguments
3176 `(#:make-flags (list
3177 (string-append "PREFIX=" (assoc-ref %outputs "out"))
3178 "CC=gcc")
3179 #:test-target "fxtract_test"
3180 #:phases
3181 (modify-phases %standard-phases
3182 (delete 'configure)
3183 (add-before 'build 'copy-util
3184 (lambda* (#:key inputs #:allow-other-keys)
3185 (rmdir "util")
3186 (copy-recursively (assoc-ref inputs "ctskennerton-util") "util")
3187 #t))
3188 ;; Do not use make install as this requires additional dependencies.
3189 (replace 'install
3190 (lambda* (#:key outputs #:allow-other-keys)
3191 (let* ((out (assoc-ref outputs "out"))
3192 (bin (string-append out"/bin")))
3193 (install-file "fxtract" bin)
3194 #t))))))
3195 (inputs
3196 `(("pcre" ,pcre)
3197 ("zlib" ,zlib)))
3198 (native-inputs
3199 ;; ctskennerton-util is licensed under GPL2.
3200 `(("ctskennerton-util"
3201 ,(origin
3202 (method git-fetch)
3203 (uri (git-reference
3204 (url "https://github.com/ctSkennerton/util.git")
3205 (commit util-commit)))
3206 (file-name (string-append
3207 "ctstennerton-util-" util-commit "-checkout"))
3208 (sha256
3209 (base32
3210 "0cls1hd4vgj3f36fpzzg4xc77d6f3hpc60cbpfmn2gdr7ykzzad7"))))))
3211 (home-page "https://github.com/ctSkennerton/fxtract")
3212 (synopsis "Extract sequences from FASTA and FASTQ files")
3213 (description
3214 "Fxtract extracts sequences from a protein or nucleotide fastx (FASTA
3215 or FASTQ) file given a subsequence. It uses a simple substring search for
3216 basic tasks but can change to using POSIX regular expressions, PCRE, hash
3217 lookups or multi-pattern searching as required. By default fxtract looks in
3218 the sequence of each record but can also be told to look in the header,
3219 comment or quality sections.")
3220 ;; 'util' requires SSE instructions.
3221 (supported-systems '("x86_64-linux"))
3222 (license license:expat))))
3223
3224 (define-public gemma
3225 (package
3226 (name "gemma")
3227 (version "0.98")
3228 (source (origin
3229 (method git-fetch)
3230 (uri (git-reference
3231 (url "https://github.com/xiangzhou/GEMMA.git")
3232 (commit (string-append "v" version))))
3233 (file-name (git-file-name name version))
3234 (sha256
3235 (base32
3236 "1s3ncnbn45r2hh1cvrqky1kbqq6546biypr4f5mkw1kqlrgyh0yg"))))
3237 (inputs
3238 `(("eigen" ,eigen)
3239 ("gfortran" ,gfortran "lib")
3240 ("gsl" ,gsl)
3241 ("lapack" ,lapack)
3242 ("openblas" ,openblas)
3243 ("zlib" ,zlib)))
3244 (build-system gnu-build-system)
3245 (arguments
3246 `(#:make-flags
3247 '(,@(match (%current-system)
3248 ("x86_64-linux"
3249 '("FORCE_DYNAMIC=1"))
3250 ("i686-linux"
3251 '("FORCE_DYNAMIC=1" "FORCE_32BIT=1"))
3252 (_
3253 '("FORCE_DYNAMIC=1" "NO_INTEL_COMPAT=1"))))
3254 #:phases
3255 (modify-phases %standard-phases
3256 (delete 'configure)
3257 (add-after 'unpack 'find-eigen
3258 (lambda* (#:key inputs #:allow-other-keys)
3259 ;; Ensure that Eigen headers can be found
3260 (setenv "CPLUS_INCLUDE_PATH"
3261 (string-append (assoc-ref inputs "eigen")
3262 "/include/eigen3"))
3263 #t))
3264 (add-before 'build 'bin-mkdir
3265 (lambda _
3266 (mkdir-p "bin")
3267 #t))
3268 (replace 'install
3269 (lambda* (#:key outputs #:allow-other-keys)
3270 (let ((out (assoc-ref outputs "out")))
3271 (install-file "bin/gemma"
3272 (string-append
3273 out "/bin")))
3274 #t)))
3275 #:tests? #f)) ; no tests included yet
3276 (home-page "https://github.com/xiangzhou/GEMMA")
3277 (synopsis "Tool for genome-wide efficient mixed model association")
3278 (description
3279 "Genome-wide Efficient Mixed Model Association (GEMMA) provides a
3280 standard linear mixed model resolver with application in genome-wide
3281 association studies (GWAS).")
3282 (license license:gpl3)))
3283
3284 (define-public grit
3285 (package
3286 (name "grit")
3287 (version "2.0.5")
3288 (source (origin
3289 (method git-fetch)
3290 (uri (git-reference
3291 (url "https://github.com/nboley/grit.git")
3292 (commit version)))
3293 (file-name (git-file-name name version))
3294 (sha256
3295 (base32
3296 "1l5v8vfvfbrpmgnrvbrbv40d0arhxcnmxgv2f1mlcqfa3q6bkqm9"))))
3297 (build-system python-build-system)
3298 (arguments
3299 `(#:python ,python-2
3300 #:phases
3301 (modify-phases %standard-phases
3302 (add-after 'unpack 'generate-from-cython-sources
3303 (lambda* (#:key inputs outputs #:allow-other-keys)
3304 ;; Delete these C files to force fresh generation from pyx sources.
3305 (delete-file "grit/sparsify_support_fns.c")
3306 (delete-file "grit/call_peaks_support_fns.c")
3307 (substitute* "setup.py"
3308 (("Cython.Setup") "Cython.Build"))
3309 #t)))))
3310 (inputs
3311 `(("python-scipy" ,python2-scipy)
3312 ("python-numpy" ,python2-numpy)
3313 ("python-pysam" ,python2-pysam)
3314 ("python-networkx" ,python2-networkx)))
3315 (native-inputs
3316 `(("python-cython" ,python2-cython)))
3317 ;; The canonical <http://grit-bio.org> home page times out as of 2020-01-21.
3318 (home-page "https://github.com/nboley/grit")
3319 (synopsis "Tool for integrative analysis of RNA-seq type assays")
3320 (description
3321 "GRIT is designed to use RNA-seq, TES, and TSS data to build and quantify
3322 full length transcript models. When none of these data sources are available,
3323 GRIT can be run by providing a candidate set of TES or TSS sites. In
3324 addition, GRIT can merge in reference junctions and gene boundaries. GRIT can
3325 also be run in quantification mode, where it uses a provided GTF file and just
3326 estimates transcript expression.")
3327 (license license:gpl3+)))
3328
3329 (define-public hisat
3330 (package
3331 (name "hisat")
3332 (version "0.1.4")
3333 (source (origin
3334 (method url-fetch)
3335 (uri (string-append
3336 "http://ccb.jhu.edu/software/hisat/downloads/hisat-"
3337 version "-beta-source.zip"))
3338 (sha256
3339 (base32
3340 "1k381ydranqxp09yf2y7w1d0chz5d59vb6jchi89hbb0prq19lk5"))))
3341 (build-system gnu-build-system)
3342 (arguments
3343 `(#:tests? #f ;no check target
3344 #:make-flags '("allall"
3345 ;; Disable unsupported `popcnt' instructions on
3346 ;; architectures other than x86_64
3347 ,@(if (string-prefix? "x86_64"
3348 (or (%current-target-system)
3349 (%current-system)))
3350 '()
3351 '("POPCNT_CAPABILITY=0")))
3352 #:phases
3353 (modify-phases %standard-phases
3354 (add-after 'unpack 'patch-sources
3355 (lambda _
3356 ;; XXX Cannot use snippet because zip files are not supported
3357 (substitute* "Makefile"
3358 (("^CC = .*$") "CC = gcc")
3359 (("^CPP = .*$") "CPP = g++")
3360 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
3361 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
3362 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
3363 (substitute* '("hisat-build" "hisat-inspect")
3364 (("/usr/bin/env") (which "env")))
3365 #t))
3366 (replace 'install
3367 (lambda* (#:key outputs #:allow-other-keys)
3368 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
3369 (for-each (lambda (file)
3370 (install-file file bin))
3371 (find-files
3372 "."
3373 "hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$")))
3374 #t))
3375 (delete 'configure))))
3376 (native-inputs
3377 `(("unzip" ,unzip)))
3378 (inputs
3379 `(("perl" ,perl)
3380 ("python" ,python)
3381 ("zlib" ,zlib)))
3382 ;; Non-portable SSE instructions are used so building fails on platforms
3383 ;; other than x86_64.
3384 (supported-systems '("x86_64-linux"))
3385 (home-page "https://ccb.jhu.edu/software/hisat/index.shtml")
3386 (synopsis "Hierarchical indexing for spliced alignment of transcripts")
3387 (description
3388 "HISAT is a fast and sensitive spliced alignment program for mapping
3389 RNA-seq reads. In addition to one global FM index that represents a whole
3390 genome, HISAT uses a large set of small FM indexes that collectively cover the
3391 whole genome. These small indexes (called local indexes) combined with
3392 several alignment strategies enable effective alignment of RNA-seq reads, in
3393 particular, reads spanning multiple exons.")
3394 (license license:gpl3+)))
3395
3396 (define-public hisat2
3397 (package
3398 (name "hisat2")
3399 (version "2.0.5")
3400 (source
3401 (origin
3402 (method url-fetch)
3403 (uri (string-append "ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2"
3404 "/downloads/hisat2-" version "-source.zip"))
3405 (sha256
3406 (base32
3407 "0lywnr8kijwsc2aw10dwxic0n0yvip6fl3rjlvc8zzwahamy4x7g"))))
3408 (build-system gnu-build-system)
3409 (arguments
3410 `(#:tests? #f ; no check target
3411 #:make-flags (list "CC=gcc" "CXX=g++" "allall")
3412 #:modules ((guix build gnu-build-system)
3413 (guix build utils)
3414 (srfi srfi-26))
3415 #:phases
3416 (modify-phases %standard-phases
3417 (add-after 'unpack 'make-deterministic
3418 (lambda _
3419 (substitute* "Makefile"
3420 (("`date`") "0"))
3421 #t))
3422 (delete 'configure)
3423 (replace 'install
3424 (lambda* (#:key outputs #:allow-other-keys)
3425 (let* ((out (assoc-ref outputs "out"))
3426 (bin (string-append out "/bin/"))
3427 (doc (string-append out "/share/doc/hisat2/")))
3428 (for-each
3429 (cut install-file <> bin)
3430 (find-files "."
3431 "hisat2(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))
3432 (mkdir-p doc)
3433 (install-file "doc/manual.inc.html" doc))
3434 #t)))))
3435 (native-inputs
3436 `(("unzip" ,unzip) ; needed for archive from ftp
3437 ("perl" ,perl)
3438 ("pandoc" ,ghc-pandoc))) ; for documentation
3439 (home-page "https://ccb.jhu.edu/software/hisat2/index.shtml")
3440 (synopsis "Graph-based alignment of genomic sequencing reads")
3441 (description "HISAT2 is a fast and sensitive alignment program for mapping
3442 next-generation sequencing reads (both DNA and RNA) to a population of human
3443 genomes (as well as to a single reference genome). In addition to using one
3444 global @dfn{graph FM} (GFM) index that represents a population of human
3445 genomes, HISAT2 uses a large set of small GFM indexes that collectively cover
3446 the whole genome. These small indexes, combined with several alignment
3447 strategies, enable rapid and accurate alignment of sequencing reads. This new
3448 indexing scheme is called a @dfn{Hierarchical Graph FM index} (HGFM).")
3449 ;; HISAT2 contains files from Bowtie2, which is released under
3450 ;; GPLv2 or later. The HISAT2 source files are released under
3451 ;; GPLv3 or later.
3452 (license license:gpl3+)))
3453
3454 (define-public hmmer
3455 (package
3456 (name "hmmer")
3457 (version "3.2.1")
3458 (source
3459 (origin
3460 (method url-fetch)
3461 (uri (string-append
3462 "http://eddylab.org/software/hmmer/hmmer-" version ".tar.gz"))
3463 (sha256
3464 (base32
3465 "171bivy6xhgjsz5nv53n81pc3frnwz29ylblawk2bv46szwjjqd5"))))
3466 (build-system gnu-build-system)
3467 (native-inputs `(("perl" ,perl)))
3468 (home-page "http://hmmer.org/")
3469 (synopsis "Biosequence analysis using profile hidden Markov models")
3470 (description
3471 "HMMER is used for searching sequence databases for homologs of protein
3472 sequences, and for making protein sequence alignments. It implements methods
3473 using probabilistic models called profile hidden Markov models (profile
3474 HMMs).")
3475 ;; hmmer uses non-portable SSE intrinsics so building fails on other
3476 ;; platforms.
3477 (supported-systems '("x86_64-linux" "i686-linux"))
3478 (license license:bsd-3)))
3479
3480 (define-public htseq
3481 (package
3482 (name "htseq")
3483 (version "0.9.1")
3484 (source (origin
3485 (method url-fetch)
3486 (uri (pypi-uri "HTSeq" version))
3487 (sha256
3488 (base32
3489 "11flgb1381xdhk43bzbfm3vhnszkpqg6jk76rpa5xd1zbrvvlnxg"))))
3490 (build-system python-build-system)
3491 (native-inputs
3492 `(("python-cython" ,python-cython)))
3493 ;; Numpy needs to be propagated when htseq is used as a Python library.
3494 (propagated-inputs
3495 `(("python-numpy" ,python-numpy)))
3496 (inputs
3497 `(("python-pysam" ,python-pysam)
3498 ("python-matplotlib" ,python-matplotlib)))
3499 (home-page "https://htseq.readthedocs.io/")
3500 (synopsis "Analysing high-throughput sequencing data with Python")
3501 (description
3502 "HTSeq is a Python package that provides infrastructure to process data
3503 from high-throughput sequencing assays.")
3504 (license license:gpl3+)))
3505
3506 (define-public python2-htseq
3507 (package-with-python2 htseq))
3508
3509 (define-public java-htsjdk
3510 (package
3511 (name "java-htsjdk")
3512 (version "2.3.0") ; last version without build dependency on gradle
3513 (source (origin
3514 (method git-fetch)
3515 (uri (git-reference
3516 (url "https://github.com/samtools/htsjdk.git")
3517 (commit version)))
3518 (file-name (git-file-name name version))
3519 (sha256
3520 (base32
3521 "1b178ixcabanm834ydjl3jiakpyxdmki32hqfv2abrzn3rcwa28i"))
3522 (modules '((guix build utils)))
3523 (snippet
3524 ;; Delete pre-built binaries
3525 '(begin
3526 (delete-file-recursively "lib")
3527 (mkdir-p "lib")
3528 #t))))
3529 (build-system ant-build-system)
3530 (arguments
3531 `(#:tests? #f ; test require Internet access
3532 #:jdk ,icedtea-8
3533 #:make-flags
3534 (list (string-append "-Ddist=" (assoc-ref %outputs "out")
3535 "/share/java/htsjdk/"))
3536 #:build-target "all"
3537 #:phases
3538 (modify-phases %standard-phases
3539 ;; The build phase also installs the jars
3540 (delete 'install))))
3541 (inputs
3542 `(("java-ngs" ,java-ngs)
3543 ("java-snappy-1" ,java-snappy-1)
3544 ("java-commons-compress" ,java-commons-compress)
3545 ("java-commons-logging-minimal" ,java-commons-logging-minimal)
3546 ("java-commons-jexl-2" ,java-commons-jexl-2)
3547 ("java-xz" ,java-xz)))
3548 (native-inputs
3549 `(("java-testng" ,java-testng)))
3550 (home-page "http://samtools.github.io/htsjdk/")
3551 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
3552 (description
3553 "HTSJDK is an implementation of a unified Java library for accessing
3554 common file formats, such as SAM and VCF, used for high-throughput
3555 sequencing (HTS) data. There are also an number of useful utilities for
3556 manipulating HTS data.")
3557 (license license:expat)))
3558
3559 (define-public java-htsjdk-latest
3560 (package
3561 (name "java-htsjdk")
3562 (version "2.14.3")
3563 (source (origin
3564 (method git-fetch)
3565 (uri (git-reference
3566 (url "https://github.com/samtools/htsjdk.git")
3567 (commit version)))
3568 (file-name (string-append name "-" version "-checkout"))
3569 (sha256
3570 (base32
3571 "1lmya1fdjy03mz6zmdmd86j9v9vfhqb3952mqq075navx1i6g4bc"))))
3572 (build-system ant-build-system)
3573 (arguments
3574 `(#:tests? #f ; test require Scala
3575 #:jdk ,icedtea-8
3576 #:jar-name "htsjdk.jar"
3577 #:phases
3578 (modify-phases %standard-phases
3579 (add-after 'unpack 'remove-useless-build.xml
3580 (lambda _ (delete-file "build.xml") #t))
3581 ;; The tests require the scalatest package.
3582 (add-after 'unpack 'remove-tests
3583 (lambda _ (delete-file-recursively "src/test") #t)))))
3584 (inputs
3585 `(("java-ngs" ,java-ngs)
3586 ("java-snappy-1" ,java-snappy-1)
3587 ("java-commons-compress" ,java-commons-compress)
3588 ("java-commons-logging-minimal" ,java-commons-logging-minimal)
3589 ("java-commons-jexl-2" ,java-commons-jexl-2)
3590 ("java-xz" ,java-xz)))
3591 (native-inputs
3592 `(("java-junit" ,java-junit)))
3593 (home-page "http://samtools.github.io/htsjdk/")
3594 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
3595 (description
3596 "HTSJDK is an implementation of a unified Java library for accessing
3597 common file formats, such as SAM and VCF, used for high-throughput
3598 sequencing (HTS) data. There are also an number of useful utilities for
3599 manipulating HTS data.")
3600 (license license:expat)))
3601
3602 ;; This is needed for picard 2.10.3
3603 (define-public java-htsjdk-2.10.1
3604 (package (inherit java-htsjdk-latest)
3605 (name "java-htsjdk")
3606 (version "2.10.1")
3607 (source (origin
3608 (method git-fetch)
3609 (uri (git-reference
3610 (url "https://github.com/samtools/htsjdk.git")
3611 (commit version)))
3612 (file-name (string-append name "-" version "-checkout"))
3613 (sha256
3614 (base32
3615 "1kxh7slm2pm3x9p6jxa1wqsq9a31dhiiflhxnxqcisan4k3rwia2"))))
3616 (build-system ant-build-system)
3617 (arguments
3618 `(#:tests? #f ; tests require Scala
3619 #:jdk ,icedtea-8
3620 #:jar-name "htsjdk.jar"
3621 #:phases
3622 (modify-phases %standard-phases
3623 (add-after 'unpack 'remove-useless-build.xml
3624 (lambda _ (delete-file "build.xml") #t))
3625 ;; The tests require the scalatest package.
3626 (add-after 'unpack 'remove-tests
3627 (lambda _ (delete-file-recursively "src/test") #t)))))))
3628
3629 ;; This version matches java-htsjdk 2.3.0. Later versions also require a more
3630 ;; recent version of java-htsjdk, which depends on gradle.
3631 (define-public java-picard
3632 (package
3633 (name "java-picard")
3634 (version "2.3.0")
3635 (source (origin
3636 (method git-fetch)
3637 (uri (git-reference
3638 (url "https://github.com/broadinstitute/picard.git")
3639 (commit version)))
3640 (file-name (string-append "java-picard-" version "-checkout"))
3641 (sha256
3642 (base32
3643 "1ll7mf4r3by92w2nhlmpa591xd1f46xlkwh59mq6fvbb5pdwzvx6"))
3644 (modules '((guix build utils)))
3645 (snippet
3646 '(begin
3647 ;; Delete pre-built binaries.
3648 (delete-file-recursively "lib")
3649 (mkdir-p "lib")
3650 (substitute* "build.xml"
3651 ;; Remove build-time dependency on git.
3652 (("failifexecutionfails=\"true\"")
3653 "failifexecutionfails=\"false\"")
3654 ;; Use our htsjdk.
3655 (("depends=\"compile-htsjdk, ")
3656 "depends=\"")
3657 (("depends=\"compile-htsjdk-tests, ")
3658 "depends=\"")
3659 ;; Build picard-lib.jar before building picard.jar
3660 (("name=\"picard-jar\" depends=\"" line)
3661 (string-append line "picard-lib-jar, ")))
3662 #t))))
3663 (build-system ant-build-system)
3664 (arguments
3665 `(#:build-target "picard-jar"
3666 #:test-target "test"
3667 ;; Tests require jacoco:coverage.
3668 #:tests? #f
3669 #:make-flags
3670 (list (string-append "-Dhtsjdk_lib_dir="
3671 (assoc-ref %build-inputs "java-htsjdk")
3672 "/share/java/htsjdk/")
3673 "-Dhtsjdk-classes=dist/tmp"
3674 (string-append "-Dhtsjdk-version="
3675 ,(package-version java-htsjdk)))
3676 #:jdk ,icedtea-8
3677 #:phases
3678 (modify-phases %standard-phases
3679 ;; FIXME: this phase fails with "duplicate entry: htsjdk/samtools/AbstractBAMFileIndex$1.class"
3680 (delete 'generate-jar-indices)
3681 (add-after 'unpack 'use-our-htsjdk
3682 (lambda* (#:key inputs #:allow-other-keys)
3683 (substitute* "build.xml"
3684 (("\\$\\{htsjdk\\}/lib")
3685 (string-append (assoc-ref inputs "java-htsjdk")
3686 "/share/java/htsjdk/")))
3687 #t))
3688 (add-after 'unpack 'make-test-target-independent
3689 (lambda* (#:key inputs #:allow-other-keys)
3690 (substitute* "build.xml"
3691 (("name=\"test\" depends=\"compile, ")
3692 "name=\"test\" depends=\""))
3693 #t))
3694 (replace 'install (install-jars "dist")))))
3695 (inputs
3696 `(("java-htsjdk" ,java-htsjdk)
3697 ("java-guava" ,java-guava)))
3698 (native-inputs
3699 `(("java-testng" ,java-testng)))
3700 (home-page "http://broadinstitute.github.io/picard/")
3701 (synopsis "Tools for manipulating high-throughput sequencing data and formats")
3702 (description "Picard is a set of Java command line tools for manipulating
3703 high-throughput sequencing (HTS) data and formats. Picard is implemented
3704 using the HTSJDK Java library to support accessing file formats that are
3705 commonly used for high-throughput sequencing data such as SAM, BAM, CRAM and
3706 VCF.")
3707 (license license:expat)))
3708
3709 ;; This is needed for dropseq-tools
3710 (define-public java-picard-2.10.3
3711 (package
3712 (name "java-picard")
3713 (version "2.10.3")
3714 (source (origin
3715 (method git-fetch)
3716 (uri (git-reference
3717 (url "https://github.com/broadinstitute/picard.git")
3718 (commit version)))
3719 (file-name (string-append "java-picard-" version "-checkout"))
3720 (sha256
3721 (base32
3722 "1ajlx31l6i1k3y2rhnmgq07sz99g2czqfqgkr9mihmdjp3gwjhvi"))))
3723 (build-system ant-build-system)
3724 (arguments
3725 `(#:jar-name "picard.jar"
3726 ;; Tests require jacoco:coverage.
3727 #:tests? #f
3728 #:jdk ,icedtea-8
3729 #:main-class "picard.cmdline.PicardCommandLine"
3730 #:modules ((guix build ant-build-system)
3731 (guix build utils)
3732 (guix build java-utils)
3733 (sxml simple)
3734 (sxml transform)
3735 (sxml xpath))
3736 #:phases
3737 (modify-phases %standard-phases
3738 ;; FIXME: this phase fails with "duplicate entry: htsjdk/samtools/AbstractBAMFileIndex$1.class"
3739 (delete 'generate-jar-indices)
3740 (add-after 'unpack 'remove-useless-build.xml
3741 (lambda _ (delete-file "build.xml") #t))
3742 ;; This is necessary to ensure that htsjdk is found when using
3743 ;; picard.jar as an executable.
3744 (add-before 'build 'edit-classpath-in-manifest
3745 (lambda* (#:key inputs #:allow-other-keys)
3746 (chmod "build.xml" #o664)
3747 (call-with-output-file "build.xml.new"
3748 (lambda (port)
3749 (sxml->xml
3750 (pre-post-order
3751 (with-input-from-file "build.xml"
3752 (lambda _ (xml->sxml #:trim-whitespace? #t)))
3753 `((target . ,(lambda (tag . kids)
3754 (let ((name ((sxpath '(name *text*))
3755 (car kids)))
3756 ;; FIXME: We're breaking the line
3757 ;; early with a dummy path to
3758 ;; ensure that the store reference
3759 ;; isn't broken apart and can still
3760 ;; be found by the reference
3761 ;; scanner.
3762 (msg (format #f
3763 "\
3764 Class-Path: /~a \
3765 ~a/share/java/htsjdk.jar${line.separator}${line.separator}"
3766 ;; maximum line length is 70
3767 (string-tabulate (const #\b) 57)
3768 (assoc-ref inputs "java-htsjdk"))))
3769 (if (member "manifest" name)
3770 `(,tag ,@kids
3771 (replaceregexp
3772 (@ (file "${manifest.file}")
3773 (match "\\r\\n\\r\\n")
3774 (replace "${line.separator}")))
3775 (echo
3776 (@ (message ,msg)
3777 (file "${manifest.file}")
3778 (append "true"))))
3779 `(,tag ,@kids)))))
3780 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
3781 (*text* . ,(lambda (_ txt) txt))))
3782 port)))
3783 (rename-file "build.xml.new" "build.xml")
3784 #t)))))
3785 (propagated-inputs
3786 `(("java-htsjdk" ,java-htsjdk-2.10.1)))
3787 (native-inputs
3788 `(("java-testng" ,java-testng)
3789 ("java-guava" ,java-guava)))
3790 (home-page "http://broadinstitute.github.io/picard/")
3791 (synopsis "Tools for manipulating high-throughput sequencing data and formats")
3792 (description "Picard is a set of Java command line tools for manipulating
3793 high-throughput sequencing (HTS) data and formats. Picard is implemented
3794 using the HTSJDK Java library to support accessing file formats that are
3795 commonly used for high-throughput sequencing data such as SAM, BAM, CRAM and
3796 VCF.")
3797 (license license:expat)))
3798
3799 ;; This is the last version of Picard to provide net.sf.samtools
3800 (define-public java-picard-1.113
3801 (package (inherit java-picard)
3802 (name "java-picard")
3803 (version "1.113")
3804 (source (origin
3805 (method git-fetch)
3806 (uri (git-reference
3807 (url "https://github.com/broadinstitute/picard.git")
3808 (commit version)))
3809 (file-name (string-append "java-picard-" version "-checkout"))
3810 (sha256
3811 (base32
3812 "0lkpvin2fz3hhly4l02kk56fqy8lmlgyzr9kmvljk6ry6l1hw973"))
3813 (modules '((guix build utils)))
3814 (snippet
3815 '(begin
3816 ;; Delete pre-built binaries.
3817 (delete-file-recursively "lib")
3818 (mkdir-p "lib")
3819 #t))))
3820 (build-system ant-build-system)
3821 (arguments
3822 `(#:build-target "picard-jar"
3823 #:test-target "test"
3824 ;; FIXME: the class path at test time is wrong.
3825 ;; [testng] Error: A JNI error has occurred, please check your installation and try again
3826 ;; [testng] Exception in thread "main" java.lang.NoClassDefFoundError: com/beust/jcommander/ParameterException
3827 #:tests? #f
3828 #:jdk ,icedtea-8
3829 ;; This is only used for tests.
3830 #:make-flags
3831 (list "-Dsamjdk.intel_deflater_so_path=lib/jni/libIntelDeflater.so")
3832 #:phases
3833 (modify-phases %standard-phases
3834 ;; FIXME: This phase fails.
3835 (delete 'generate-jar-indices)
3836 ;; Do not use bundled ant bzip2.
3837 (add-after 'unpack 'use-ant-bzip
3838 (lambda* (#:key inputs #:allow-other-keys)
3839 (substitute* "build.xml"
3840 (("\\$\\{lib\\}/apache-ant-1.8.2-bzip2.jar")
3841 (string-append (assoc-ref inputs "ant")
3842 "/lib/ant.jar")))
3843 #t))
3844 (add-after 'unpack 'make-test-target-independent
3845 (lambda* (#:key inputs #:allow-other-keys)
3846 (substitute* "build.xml"
3847 (("name=\"test\" depends=\"compile, ")
3848 "name=\"test\" depends=\"compile-tests, ")
3849 (("name=\"compile\" depends=\"compile-src, compile-tests\"")
3850 "name=\"compile\" depends=\"compile-src\""))
3851 #t))
3852 (add-after 'unpack 'fix-deflater-path
3853 (lambda* (#:key outputs #:allow-other-keys)
3854 (substitute* "src/java/net/sf/samtools/Defaults.java"
3855 (("getStringProperty\\(\"intel_deflater_so_path\", null\\)")
3856 (string-append "getStringProperty(\"intel_deflater_so_path\", \""
3857 (assoc-ref outputs "out")
3858 "/lib/jni/libIntelDeflater.so"
3859 "\")")))
3860 #t))
3861 ;; Build the deflater library, because we've previously deleted the
3862 ;; pre-built one. This can only be built with access to the JDK
3863 ;; sources.
3864 (add-after 'build 'build-jni
3865 (lambda* (#:key inputs #:allow-other-keys)
3866 (mkdir-p "lib/jni")
3867 (mkdir-p "jdk-src")
3868 (invoke "tar" "--strip-components=1" "-C" "jdk-src"
3869 "-xf" (assoc-ref inputs "jdk-src"))
3870 (invoke "javah" "-jni"
3871 "-classpath" "classes"
3872 "-d" "lib/"
3873 "net.sf.samtools.util.zip.IntelDeflater")
3874 (with-directory-excursion "src/c/inteldeflater"
3875 (invoke "gcc" "-I../../../lib" "-I."
3876 (string-append "-I" (assoc-ref inputs "jdk")
3877 "/include/linux")
3878 "-I../../../jdk-src/src/share/native/common/"
3879 "-I../../../jdk-src/src/solaris/native/common/"
3880 "-c" "-O3" "-fPIC" "IntelDeflater.c")
3881 (invoke "gcc" "-shared"
3882 "-o" "../../../lib/jni/libIntelDeflater.so"
3883 "IntelDeflater.o" "-lz" "-lstdc++"))
3884 #t))
3885 ;; We can only build everything else after building the JNI library.
3886 (add-after 'build-jni 'build-rest
3887 (lambda* (#:key make-flags #:allow-other-keys)
3888 (apply invoke `("ant" "all" ,@make-flags))
3889 #t))
3890 (add-before 'build 'set-JAVA6_HOME
3891 (lambda _
3892 (setenv "JAVA6_HOME" (getenv "JAVA_HOME"))
3893 #t))
3894 (replace 'install (install-jars "dist"))
3895 (add-after 'install 'install-jni-lib
3896 (lambda* (#:key outputs #:allow-other-keys)
3897 (let ((jni (string-append (assoc-ref outputs "out")
3898 "/lib/jni")))
3899 (mkdir-p jni)
3900 (install-file "lib/jni/libIntelDeflater.so" jni)
3901 #t))))))
3902 (inputs
3903 `(("java-snappy-1" ,java-snappy-1)
3904 ("java-commons-jexl-2" ,java-commons-jexl-2)
3905 ("java-cofoja" ,java-cofoja)
3906 ("ant" ,ant) ; for bzip2 support at runtime
3907 ("zlib" ,zlib)))
3908 (native-inputs
3909 `(("ant-apache-bcel" ,ant-apache-bcel)
3910 ("ant-junit" ,ant-junit)
3911 ("java-testng" ,java-testng)
3912 ("java-commons-bcel" ,java-commons-bcel)
3913 ("java-jcommander" ,java-jcommander)
3914 ("jdk" ,icedtea-8 "jdk")
3915 ("jdk-src" ,(car (assoc-ref (package-native-inputs icedtea-8) "jdk-drop")))))))
3916
3917 (define-public fastqc
3918 (package
3919 (name "fastqc")
3920 (version "0.11.5")
3921 (source
3922 (origin
3923 (method url-fetch)
3924 (uri (string-append "http://www.bioinformatics.babraham.ac.uk/"
3925 "projects/fastqc/fastqc_v"
3926 version "_source.zip"))
3927 (sha256
3928 (base32
3929 "18rrlkhcrxvvvlapch4dpj6xc6mpayzys8qfppybi8jrpgx5cc5f"))))
3930 (build-system ant-build-system)
3931 (arguments
3932 `(#:tests? #f ; there are no tests
3933 #:build-target "build"
3934 #:phases
3935 (modify-phases %standard-phases
3936 (add-after 'unpack 'fix-dependencies
3937 (lambda* (#:key inputs #:allow-other-keys)
3938 (substitute* "build.xml"
3939 (("jbzip2-0.9.jar")
3940 (string-append (assoc-ref inputs "java-jbzip2")
3941 "/share/java/jbzip2.jar"))
3942 (("sam-1.103.jar")
3943 (string-append (assoc-ref inputs "java-picard-1.113")
3944 "/share/java/sam-1.112.jar"))
3945 (("cisd-jhdf5.jar")
3946 (string-append (assoc-ref inputs "java-cisd-jhdf5")
3947 "/share/java/sis-jhdf5.jar")))
3948 #t))
3949 ;; There is no installation target
3950 (replace 'install
3951 (lambda* (#:key inputs outputs #:allow-other-keys)
3952 (let* ((out (assoc-ref outputs "out"))
3953 (bin (string-append out "/bin"))
3954 (share (string-append out "/share/fastqc/"))
3955 (exe (string-append share "/fastqc")))
3956 (for-each mkdir-p (list bin share))
3957 (copy-recursively "bin" share)
3958 (substitute* exe
3959 (("my \\$java_bin = 'java';")
3960 (string-append "my $java_bin = '"
3961 (assoc-ref inputs "java")
3962 "/bin/java';")))
3963 (chmod exe #o555)
3964 (symlink exe (string-append bin "/fastqc"))
3965 #t))))))
3966 (inputs
3967 `(("java" ,icedtea)
3968 ("perl" ,perl) ; needed for the wrapper script
3969 ("java-cisd-jhdf5" ,java-cisd-jhdf5)
3970 ("java-picard-1.113" ,java-picard-1.113)
3971 ("java-jbzip2" ,java-jbzip2)))
3972 (native-inputs
3973 `(("unzip" ,unzip)))
3974 (home-page "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/")
3975 (synopsis "Quality control tool for high throughput sequence data")
3976 (description
3977 "FastQC aims to provide a simple way to do some quality control
3978 checks on raw sequence data coming from high throughput sequencing
3979 pipelines. It provides a modular set of analyses which you can use to
3980 give a quick impression of whether your data has any problems of which
3981 you should be aware before doing any further analysis.
3982
3983 The main functions of FastQC are:
3984
3985 @itemize
3986 @item Import of data from BAM, SAM or FastQ files (any variant);
3987 @item Providing a quick overview to tell you in which areas there may
3988 be problems;
3989 @item Summary graphs and tables to quickly assess your data;
3990 @item Export of results to an HTML based permanent report;
3991 @item Offline operation to allow automated generation of reports
3992 without running the interactive application.
3993 @end itemize\n")
3994 (license license:gpl3+)))
3995
3996 (define-public fastp
3997 (package
3998 (name "fastp")
3999 (version "0.14.1")
4000 (source
4001 (origin
4002 (method git-fetch)
4003 (uri (git-reference
4004 (url "https://github.com/OpenGene/fastp.git")
4005 (commit (string-append "v" version))))
4006 (file-name (git-file-name name version))
4007 (sha256
4008 (base32
4009 "1r6ms5zbf5rps4rgp4z73nczadl00b5rqylw8f684isfz27dp0xh"))))
4010 (build-system gnu-build-system)
4011 (arguments
4012 `(#:tests? #f ; there are none
4013 #:make-flags
4014 (list (string-append "BINDIR=" (assoc-ref %outputs "out") "/bin"))
4015 #:phases
4016 (modify-phases %standard-phases
4017 (delete 'configure)
4018 (add-before 'install 'create-target-dir
4019 (lambda* (#:key outputs #:allow-other-keys)
4020 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
4021 #t)))))
4022 (inputs
4023 `(("zlib" ,zlib)))
4024 (home-page "https://github.com/OpenGene/fastp/")
4025 (synopsis "All-in-one FastQ preprocessor")
4026 (description
4027 "Fastp is a tool designed to provide fast all-in-one preprocessing for
4028 FastQ files. This tool has multi-threading support to afford high
4029 performance.")
4030 (license license:expat)))
4031
4032 (define-public htslib
4033 (package
4034 (name "htslib")
4035 (version "1.9")
4036 (source (origin
4037 (method url-fetch)
4038 (uri (string-append
4039 "https://github.com/samtools/htslib/releases/download/"
4040 version "/htslib-" version ".tar.bz2"))
4041 (sha256
4042 (base32
4043 "16ljv43sc3fxmv63w7b2ff8m1s7h89xhazwmbm1bicz8axq8fjz0"))))
4044 (build-system gnu-build-system)
4045 (inputs
4046 `(("curl" ,curl)
4047 ("openssl" ,openssl)))
4048 ;; This is referred to in the pkg-config file as a required library.
4049 (propagated-inputs
4050 `(("zlib" ,zlib)))
4051 (native-inputs
4052 `(("perl" ,perl)))
4053 (home-page "https://www.htslib.org")
4054 (synopsis "C library for reading/writing high-throughput sequencing data")
4055 (description
4056 "HTSlib is a C library for reading/writing high-throughput sequencing
4057 data. It also provides the @command{bgzip}, @command{htsfile}, and
4058 @command{tabix} utilities.")
4059 ;; Files under cram/ are released under the modified BSD license;
4060 ;; the rest is released under the Expat license
4061 (license (list license:expat license:bsd-3))))
4062
4063 ;; This package should be removed once no packages rely upon it.
4064 (define htslib-1.3
4065 (package
4066 (inherit htslib)
4067 (version "1.3.1")
4068 (source (origin
4069 (method url-fetch)
4070 (uri (string-append
4071 "https://github.com/samtools/htslib/releases/download/"
4072 version "/htslib-" version ".tar.bz2"))
4073 (sha256
4074 (base32
4075 "1rja282fwdc25ql6izkhdyh8ppw8x2fs0w0js78zgkmqjlikmma9"))))))
4076
4077 (define-public idr
4078 (package
4079 (name "idr")
4080 (version "2.0.3")
4081 (source (origin
4082 (method git-fetch)
4083 (uri (git-reference
4084 (url "https://github.com/nboley/idr.git")
4085 (commit version)))
4086 (file-name (git-file-name name version))
4087 (sha256
4088 (base32
4089 "04j876h6z444v2q79drxx283d3k5snd72kj895wbalnl42206x9g"))
4090 ;; Delete generated C code.
4091 (snippet
4092 '(begin (delete-file "idr/inv_cdf.c") #t))))
4093 (build-system python-build-system)
4094 ;; There is only one test ("test_inv_cdf.py") and it tests features that
4095 ;; are no longer part of this package. It also asserts False, which
4096 ;; causes the tests to always fail.
4097 (arguments `(#:tests? #f))
4098 (propagated-inputs
4099 `(("python-scipy" ,python-scipy)
4100 ("python-sympy" ,python-sympy)
4101 ("python-numpy" ,python-numpy)
4102 ("python-matplotlib" ,python-matplotlib)))
4103 (native-inputs
4104 `(("python-cython" ,python-cython)))
4105 (home-page "https://github.com/nboley/idr")
4106 (synopsis "Tool to measure the irreproducible discovery rate (IDR)")
4107 (description
4108 "The IDR (Irreproducible Discovery Rate) framework is a unified approach
4109 to measure the reproducibility of findings identified from replicate
4110 experiments and provide highly stable thresholds based on reproducibility.")
4111 (license license:gpl2+)))
4112
4113 (define-public jellyfish
4114 (package
4115 (name "jellyfish")
4116 (version "2.2.10")
4117 (source (origin
4118 (method url-fetch)
4119 (uri (string-append "https://github.com/gmarcais/Jellyfish/"
4120 "releases/download/v" version
4121 "/jellyfish-" version ".tar.gz"))
4122 (sha256
4123 (base32
4124 "1k4pc3fvv6w1km2yph4m5sd78fbxp21d6xyzgmy0gjihzc6mb249"))))
4125 (build-system gnu-build-system)
4126 (outputs '("out" ;for library
4127 "ruby" ;for Ruby bindings
4128 "python")) ;for Python bindings
4129 (arguments
4130 `(#:configure-flags
4131 (list (string-append "--enable-ruby-binding="
4132 (assoc-ref %outputs "ruby"))
4133 (string-append "--enable-python-binding="
4134 (assoc-ref %outputs "python")))
4135 #:phases
4136 (modify-phases %standard-phases
4137 (add-before 'check 'set-SHELL-variable
4138 (lambda _
4139 ;; generator_manager.hpp either uses /bin/sh or $SHELL
4140 ;; to run tests.
4141 (setenv "SHELL" (which "bash"))
4142 #t)))))
4143 (native-inputs
4144 `(("bc" ,bc)
4145 ("time" ,time)
4146 ("ruby" ,ruby)
4147 ("python" ,python-2)
4148 ("pkg-config" ,pkg-config)))
4149 (inputs
4150 `(("htslib" ,htslib)))
4151 (synopsis "Tool for fast counting of k-mers in DNA")
4152 (description
4153 "Jellyfish is a tool for fast, memory-efficient counting of k-mers in
4154 DNA. A k-mer is a substring of length k, and counting the occurrences of all
4155 such substrings is a central step in many analyses of DNA sequence. Jellyfish
4156 is a command-line program that reads FASTA and multi-FASTA files containing
4157 DNA sequences. It outputs its k-mer counts in a binary format, which can be
4158 translated into a human-readable text format using the @code{jellyfish dump}
4159 command, or queried for specific k-mers with @code{jellyfish query}.")
4160 (home-page "http://www.genome.umd.edu/jellyfish.html")
4161 ;; JELLYFISH seems to be 64-bit only.
4162 (supported-systems '("x86_64-linux" "aarch64-linux" "mips64el-linux"))
4163 ;; The combined work is published under the GPLv3 or later. Individual
4164 ;; files such as lib/jsoncpp.cpp are released under the Expat license.
4165 (license (list license:gpl3+ license:expat))))
4166
4167 (define-public khmer
4168 (package
4169 (name "khmer")
4170 (version "3.0.0a3")
4171 (source
4172 (origin
4173 (method git-fetch)
4174 (uri (git-reference
4175 (url "https://github.com/dib-lab/khmer.git")
4176 (commit (string-append "v" version))))
4177 (file-name (git-file-name name version))
4178 (sha256
4179 (base32
4180 "01l4jczglkl7yfhgvzx8j0df7k54bk1r8sli9ll16i1mis0d8f37"))
4181 (modules '((guix build utils)))
4182 (snippet
4183 '(begin
4184 ;; Delete bundled libraries. We do not replace the bundled seqan
4185 ;; as it is a modified subset of the old version 1.4.1.
4186 ;;
4187 ;; We do not replace the bundled MurmurHash as the canonical
4188 ;; repository for this code 'SMHasher' is unsuitable for providing
4189 ;; a library. See
4190 ;; https://lists.gnu.org/archive/html/guix-devel/2016-06/msg00977.html
4191 (delete-file-recursively "third-party/zlib")
4192 (delete-file-recursively "third-party/bzip2")
4193 (delete-file-recursively "third-party/seqan")
4194 (substitute* "setup.cfg"
4195 (("# libraries = z,bz2")
4196 "libraries = z,bz2")
4197 (("include:third-party/zlib:third-party/bzip2")
4198 "include:"))
4199 #t))))
4200 (build-system python-build-system)
4201 (arguments
4202 `(#:phases
4203 (modify-phases %standard-phases
4204 (add-after 'unpack 'set-cc
4205 (lambda _ (setenv "CC" "gcc") #t))
4206
4207 (add-before 'reset-gzip-timestamps 'make-files-writable
4208 (lambda* (#:key outputs #:allow-other-keys)
4209 ;; Make sure .gz files are writable so that the
4210 ;; 'reset-gzip-timestamps' phase can do its work.
4211 (let ((out (assoc-ref outputs "out")))
4212 (for-each make-file-writable
4213 (find-files out "\\.gz$"))
4214 #t))))))
4215 (native-inputs
4216 `(("python-cython" ,python-cython)
4217 ("python-pytest" ,python-pytest)
4218 ("python-pytest-runner" ,python-pytest-runner)))
4219 (inputs
4220 `(("zlib" ,zlib)
4221 ("bzip2" ,bzip2)
4222 ("seqan" ,seqan-1)
4223 ("python-screed" ,python-screed)
4224 ("python-bz2file" ,python-bz2file)))
4225 (home-page "https://khmer.readthedocs.org/")
4226 (synopsis "K-mer counting, filtering and graph traversal library")
4227 (description "The khmer software is a set of command-line tools for
4228 working with DNA shotgun sequencing data from genomes, transcriptomes,
4229 metagenomes and single cells. Khmer can make de novo assemblies faster, and
4230 sometimes better. Khmer can also identify and fix problems with shotgun
4231 data.")
4232 ;; When building on i686, armhf and mips64el, we get the following error:
4233 ;; error: ['khmer', 'khmer.tests', 'oxli'] require 64-bit operating system
4234 (supported-systems '("x86_64-linux" "aarch64-linux"))
4235 (license license:bsd-3)))
4236
4237 (define-public kaiju
4238 (package
4239 (name "kaiju")
4240 (version "1.6.3")
4241 (source (origin
4242 (method git-fetch)
4243 (uri (git-reference
4244 (url "https://github.com/bioinformatics-centre/kaiju")
4245 (commit (string-append "v" version))))
4246 (file-name (git-file-name name version))
4247 (sha256
4248 (base32
4249 "119pzi0ddzv9mjg4wwa6han0cwr3k3ssn7kirvsjfcq05mi5ka0x"))))
4250 (build-system gnu-build-system)
4251 (arguments
4252 `(#:tests? #f ; There are no tests.
4253 #:phases
4254 (modify-phases %standard-phases
4255 (delete 'configure)
4256 (add-before 'build 'move-to-src-dir
4257 (lambda _ (chdir "src") #t))
4258 (replace 'install
4259 (lambda* (#:key inputs outputs #:allow-other-keys)
4260 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
4261 (mkdir-p bin)
4262 (chdir "..")
4263 (copy-recursively "bin" bin))
4264 #t)))))
4265 (inputs
4266 `(("perl" ,perl)
4267 ("zlib" ,zlib)))
4268 (home-page "http://kaiju.binf.ku.dk/")
4269 (synopsis "Fast and sensitive taxonomic classification for metagenomics")
4270 (description "Kaiju is a program for sensitive taxonomic classification
4271 of high-throughput sequencing reads from metagenomic whole genome sequencing
4272 experiments.")
4273 (license license:gpl3+)))
4274
4275 (define-public macs
4276 (package
4277 (name "macs")
4278 (version "2.2.6")
4279 (source (origin
4280 ;; The PyPi tarball does not contain tests.
4281 (method git-fetch)
4282 (uri (git-reference
4283 (url "https://github.com/taoliu/MACS.git")
4284 (commit (string-append "v" version))))
4285 (file-name (git-file-name name version))
4286 (sha256
4287 (base32
4288 "1c5gxr0mk6hkd4vclf0k00wvyvzw2vrmk52c85338p7aqjwg6n15"))))
4289 (build-system python-build-system)
4290 (arguments
4291 `(#:phases
4292 (modify-phases %standard-phases
4293 (delete 'check)
4294 (add-after 'install 'check
4295 (lambda* (#:key inputs outputs #:allow-other-keys)
4296 (add-installed-pythonpath inputs outputs)
4297 (invoke "pytest" "-v"))))))
4298 (inputs
4299 `(("python-numpy" ,python-numpy)))
4300 (native-inputs
4301 `(("python-pytest" ,python-pytest)))
4302 (home-page "https://github.com/taoliu/MACS/")
4303 (synopsis "Model based analysis for ChIP-Seq data")
4304 (description
4305 "MACS is an implementation of a ChIP-Seq analysis algorithm for
4306 identifying transcript factor binding sites named Model-based Analysis of
4307 ChIP-Seq (MACS). MACS captures the influence of genome complexity to evaluate
4308 the significance of enriched ChIP regions and it improves the spatial
4309 resolution of binding sites through combining the information of both
4310 sequencing tag position and orientation.")
4311 (license license:bsd-3)))
4312
4313 (define-public mafft
4314 (package
4315 (name "mafft")
4316 (version "7.394")
4317 (source (origin
4318 (method url-fetch)
4319 (uri (string-append
4320 "https://mafft.cbrc.jp/alignment/software/mafft-" version
4321 "-without-extensions-src.tgz"))
4322 (file-name (string-append name "-" version ".tgz"))
4323 (sha256
4324 (base32
4325 "0bacjkxfg944p5khhyh5rd4y7wkjc9qk4v2jjj442sqlq0f8ar7b"))))
4326 (build-system gnu-build-system)
4327 (arguments
4328 `(#:tests? #f ; no automated tests, though there are tests in the read me
4329 #:make-flags (let ((out (assoc-ref %outputs "out")))
4330 (list (string-append "PREFIX=" out)
4331 (string-append "BINDIR="
4332 (string-append out "/bin"))))
4333 #:phases
4334 (modify-phases %standard-phases
4335 (add-after 'unpack 'enter-dir
4336 (lambda _ (chdir "core") #t))
4337 (add-after 'enter-dir 'patch-makefile
4338 (lambda _
4339 ;; on advice from the MAFFT authors, there is no need to
4340 ;; distribute mafft-profile, mafft-distance, or
4341 ;; mafft-homologs.rb as they are too "specialised".
4342 (substitute* "Makefile"
4343 ;; remove mafft-homologs.rb from SCRIPTS
4344 (("^SCRIPTS = mafft mafft-homologs.rb")
4345 "SCRIPTS = mafft")
4346 ;; remove mafft-homologs from MANPAGES
4347 (("^MANPAGES = mafft.1 mafft-homologs.1")
4348 "MANPAGES = mafft.1")
4349 ;; remove mafft-distance from PROGS
4350 (("^PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance")
4351 "PROGS = dvtditr dndfast7 dndblast sextet5")
4352 ;; remove mafft-profile from PROGS
4353 (("splittbfast disttbfast tbfast mafft-profile 2cl mccaskillwrap")
4354 "splittbfast disttbfast tbfast f2cl mccaskillwrap")
4355 (("^rm -f mafft-profile mafft-profile.exe") "#")
4356 (("^rm -f mafft-distance mafft-distance.exe") ")#")
4357 ;; do not install MAN pages in libexec folder
4358 (("^\t\\$\\(INSTALL\\) -m 644 \\$\\(MANPAGES\\) \
4359 \\$\\(DESTDIR\\)\\$\\(LIBDIR\\)") "#"))
4360 #t))
4361 (add-after 'enter-dir 'patch-paths
4362 (lambda* (#:key inputs #:allow-other-keys)
4363 (substitute* '("pairash.c"
4364 "mafft.tmpl")
4365 (("perl") (which "perl"))
4366 (("([\"`| ])awk" _ prefix)
4367 (string-append prefix (which "awk")))
4368 (("grep") (which "grep")))
4369 #t))
4370 (delete 'configure)
4371 (add-after 'install 'wrap-programs
4372 (lambda* (#:key outputs #:allow-other-keys)
4373 (let* ((out (assoc-ref outputs "out"))
4374 (bin (string-append out "/bin"))
4375 (path (string-append
4376 (assoc-ref %build-inputs "coreutils") "/bin:")))
4377 (for-each (lambda (file)
4378 (wrap-program file
4379 `("PATH" ":" prefix (,path))))
4380 (find-files bin)))
4381 #t)))))
4382 (inputs
4383 `(("perl" ,perl)
4384 ("ruby" ,ruby)
4385 ("gawk" ,gawk)
4386 ("grep" ,grep)
4387 ("coreutils" ,coreutils)))
4388 (home-page "http://mafft.cbrc.jp/alignment/software/")
4389 (synopsis "Multiple sequence alignment program")
4390 (description
4391 "MAFFT offers a range of multiple alignment methods for nucleotide and
4392 protein sequences. For instance, it offers L-INS-i (accurate; for alignment
4393 of <~200 sequences) and FFT-NS-2 (fast; for alignment of <~30,000
4394 sequences).")
4395 (license (license:non-copyleft
4396 "http://mafft.cbrc.jp/alignment/software/license.txt"
4397 "BSD-3 with different formatting"))))
4398
4399 (define-public mash
4400 (package
4401 (name "mash")
4402 (version "2.1")
4403 (source (origin
4404 (method git-fetch)
4405 (uri (git-reference
4406 (url "https://github.com/marbl/mash.git")
4407 (commit (string-append "v" version))))
4408 (file-name (git-file-name name version))
4409 (sha256
4410 (base32
4411 "049hwcc059p2fd9vwndn63laifvvsi0wmv84i6y1fr79k15dxwy6"))
4412 (modules '((guix build utils)))
4413 (snippet
4414 '(begin
4415 ;; Delete bundled kseq.
4416 ;; TODO: Also delete bundled murmurhash and open bloom filter.
4417 (delete-file "src/mash/kseq.h")
4418 #t))))
4419 (build-system gnu-build-system)
4420 (arguments
4421 `(#:tests? #f ; No tests.
4422 #:configure-flags
4423 (list
4424 (string-append "--with-capnp=" (assoc-ref %build-inputs "capnproto"))
4425 (string-append "--with-gsl=" (assoc-ref %build-inputs "gsl")))
4426 #:make-flags (list "CC=gcc")
4427 #:phases
4428 (modify-phases %standard-phases
4429 (add-after 'unpack 'fix-includes
4430 (lambda _
4431 (substitute* '("src/mash/Sketch.cpp"
4432 "src/mash/CommandFind.cpp"
4433 "src/mash/CommandScreen.cpp")
4434 (("^#include \"kseq\\.h\"")
4435 "#include \"htslib/kseq.h\""))
4436 #t))
4437 (add-after 'fix-includes 'use-c++14
4438 (lambda _
4439 ;; capnproto 0.7 requires c++14 to build
4440 (substitute* "configure.ac"
4441 (("c\\+\\+11") "c++14"))
4442 (substitute* "Makefile.in"
4443 (("c\\+\\+11") "c++14"))
4444 #t)))))
4445 (native-inputs
4446 `(("autoconf" ,autoconf)
4447 ;; Capnproto and htslib are statically embedded in the final
4448 ;; application. Therefore we also list their licenses, below.
4449 ("capnproto" ,capnproto)
4450 ("htslib" ,htslib)))
4451 (inputs
4452 `(("gsl" ,gsl)
4453 ("zlib" ,zlib)))
4454 (supported-systems '("x86_64-linux"))
4455 (home-page "https://mash.readthedocs.io")
4456 (synopsis "Fast genome and metagenome distance estimation using MinHash")
4457 (description "Mash is a fast sequence distance estimator that uses the
4458 MinHash algorithm and is designed to work with genomes and metagenomes in the
4459 form of assemblies or reads.")
4460 (license (list license:bsd-3 ; Mash
4461 license:expat ; HTSlib and capnproto
4462 license:public-domain ; MurmurHash 3
4463 license:cpl1.0)))) ; Open Bloom Filter
4464
4465 (define-public metabat
4466 (package
4467 (name "metabat")
4468 (version "2.12.1")
4469 (source
4470 (origin
4471 (method git-fetch)
4472 (uri (git-reference
4473 (url "https://bitbucket.org/berkeleylab/metabat.git")
4474 (commit (string-append "v" version))))
4475 (file-name (git-file-name name version))
4476 (sha256
4477 (base32
4478 "0hyg2smw1nz69mfvjpk45xyyychmda92c80a0cv7baji84ri4iyn"))
4479 (patches (search-patches "metabat-fix-compilation.patch"))))
4480 (build-system scons-build-system)
4481 (arguments
4482 `(#:scons ,scons-python2
4483 #:scons-flags
4484 (list (string-append "PREFIX=" (assoc-ref %outputs "out"))
4485 (string-append "BOOST_ROOT=" (assoc-ref %build-inputs "boost")))
4486 #:tests? #f ;; Tests are run during the build phase.
4487 #:phases
4488 (modify-phases %standard-phases
4489 (add-after 'unpack 'fix-includes
4490 (lambda _
4491 (substitute* "src/BamUtils.h"
4492 (("^#include \"bam/bam\\.h\"")
4493 "#include \"samtools/bam.h\"")
4494 (("^#include \"bam/sam\\.h\"")
4495 "#include \"samtools/sam.h\""))
4496 (substitute* "src/KseqReader.h"
4497 (("^#include \"bam/kseq\\.h\"")
4498 "#include \"htslib/kseq.h\""))
4499 #t))
4500 (add-after 'unpack 'fix-scons
4501 (lambda* (#:key inputs #:allow-other-keys)
4502 (substitute* "SConstruct"
4503 (("^htslib_dir += 'samtools'")
4504 (string-append "htslib_dir = '"
4505 (assoc-ref inputs "htslib")
4506 "'"))
4507 (("^samtools_dir = 'samtools'")
4508 (string-append "samtools_dir = '"
4509 (assoc-ref inputs "samtools")
4510 "'"))
4511 (("^findStaticOrShared\\('bam', hts_lib")
4512 (string-append "findStaticOrShared('bam', '"
4513 (assoc-ref inputs "samtools")
4514 "/lib'"))
4515 ;; Do not distribute README.
4516 (("^env\\.Install\\(idir_prefix, 'README\\.md'\\)") ""))
4517 #t)))))
4518 (inputs
4519 `(("zlib" ,zlib)
4520 ("perl" ,perl)
4521 ("samtools" ,samtools)
4522 ("htslib" ,htslib)
4523 ("boost" ,boost)))
4524 (home-page "https://bitbucket.org/berkeleylab/metabat")
4525 (synopsis
4526 "Reconstruction of single genomes from complex microbial communities")
4527 (description
4528 "Grouping large genomic fragments assembled from shotgun metagenomic
4529 sequences to deconvolute complex microbial communities, or metagenome binning,
4530 enables the study of individual organisms and their interactions. MetaBAT is
4531 an automated metagenome binning software, which integrates empirical
4532 probabilistic distances of genome abundance and tetranucleotide frequency.")
4533 ;; The source code contains inline assembly.
4534 (supported-systems '("x86_64-linux" "i686-linux"))
4535 (license (license:non-copyleft "file://license.txt"
4536 "See license.txt in the distribution."))))
4537
4538 (define-public minced
4539 (package
4540 (name "minced")
4541 (version "0.3.2")
4542 (source (origin
4543 (method git-fetch)
4544 (uri (git-reference
4545 (url "https://github.com/ctSkennerton/minced.git")
4546 (commit version)))
4547 (file-name (git-file-name name version))
4548 (sha256
4549 (base32
4550 "1f5h9him0gd355cnx7p6pnxpknhckd4g0v62mg8zyhfbx9as25fv"))))
4551 (build-system gnu-build-system)
4552 (arguments
4553 `(#:test-target "test"
4554 #:phases
4555 (modify-phases %standard-phases
4556 (delete 'configure)
4557 (add-before 'check 'fix-test
4558 (lambda _
4559 ;; Fix test for latest version.
4560 (substitute* "t/Aquifex_aeolicus_VF5.expected"
4561 (("minced:0.1.6") "minced:0.2.0"))
4562 #t))
4563 (replace 'install ; No install target.
4564 (lambda* (#:key inputs outputs #:allow-other-keys)
4565 (let* ((out (assoc-ref outputs "out"))
4566 (bin (string-append out "/bin"))
4567 (wrapper (string-append bin "/minced")))
4568 ;; Minced comes with a wrapper script that tries to figure out where
4569 ;; it is located before running the JAR. Since these paths are known
4570 ;; to us, we build our own wrapper to avoid coreutils dependency.
4571 (install-file "minced.jar" bin)
4572 (with-output-to-file wrapper
4573 (lambda _
4574 (display
4575 (string-append
4576 "#!" (assoc-ref inputs "bash") "/bin/sh\n\n"
4577 (assoc-ref inputs "jre") "/bin/java -jar "
4578 bin "/minced.jar \"$@\"\n"))))
4579 (chmod wrapper #o555))
4580 #t)))))
4581 (native-inputs
4582 `(("jdk" ,icedtea "jdk")))
4583 (inputs
4584 `(("bash" ,bash)
4585 ("jre" ,icedtea "out")))
4586 (home-page "https://github.com/ctSkennerton/minced")
4587 (synopsis "Mining CRISPRs in Environmental Datasets")
4588 (description
4589 "MinCED is a program to find Clustered Regularly Interspaced Short
4590 Palindromic Repeats (CRISPRs) in DNA sequences. It can be used for
4591 unassembled metagenomic reads, but is mainly designed for full genomes and
4592 assembled metagenomic sequence.")
4593 (license license:gpl3+)))
4594
4595 (define-public miso
4596 (package
4597 (name "miso")
4598 (version "0.5.4")
4599 (source (origin
4600 (method url-fetch)
4601 (uri (pypi-uri "misopy" version))
4602 (sha256
4603 (base32
4604 "1z3x0vd8ma7pdrnywj7i3kgwl89sdkwrrn62zl7r5calqaq2hyip"))
4605 (modules '((guix build utils)))
4606 (snippet '(begin
4607 (substitute* "setup.py"
4608 ;; Use setuptools, or else the executables are not
4609 ;; installed.
4610 (("distutils.core") "setuptools")
4611 ;; Use "gcc" instead of "cc" for compilation.
4612 (("^defines")
4613 "cc.set_executables(
4614 compiler='gcc',
4615 compiler_so='gcc',
4616 linker_exe='gcc',
4617 linker_so='gcc -shared'); defines"))
4618 #t))))
4619 (build-system python-build-system)
4620 (arguments
4621 `(#:python ,python-2 ; only Python 2 is supported
4622 #:tests? #f)) ; no "test" target
4623 (inputs
4624 `(("samtools" ,samtools)
4625 ("python-numpy" ,python2-numpy)
4626 ("python-pysam" ,python2-pysam)
4627 ("python-scipy" ,python2-scipy)
4628 ("python-matplotlib" ,python2-matplotlib)))
4629 (native-inputs
4630 `(("python-mock" ,python2-mock) ; for tests
4631 ("python-pytz" ,python2-pytz))) ; for tests
4632 (home-page "https://www.genes.mit.edu/burgelab/miso/index.html")
4633 (synopsis "Mixture of Isoforms model for RNA-Seq isoform quantitation")
4634 (description
4635 "MISO (Mixture-of-Isoforms) is a probabilistic framework that quantitates
4636 the expression level of alternatively spliced genes from RNA-Seq data, and
4637 identifies differentially regulated isoforms or exons across samples. By
4638 modeling the generative process by which reads are produced from isoforms in
4639 RNA-Seq, the MISO model uses Bayesian inference to compute the probability
4640 that a read originated from a particular isoform.")
4641 (license license:gpl2)))
4642
4643 (define-public muscle
4644 (package
4645 (name "muscle")
4646 (version "3.8.1551")
4647 (source (origin
4648 (method url-fetch/tarbomb)
4649 (uri (string-append
4650 "http://www.drive5.com/muscle/muscle_src_"
4651 version ".tar.gz"))
4652 (sha256
4653 (base32
4654 "0bj8kj7sdizy3987zx6w7axihk40fk8rn76mpbqqjcnd64i5a367"))))
4655 (build-system gnu-build-system)
4656 (arguments
4657 `(#:make-flags (list "LDLIBS = -lm")
4658 #:phases
4659 (modify-phases %standard-phases
4660 (delete 'configure)
4661 (replace 'check
4662 ;; There are no tests, so just test if it runs.
4663 (lambda _ (invoke "./muscle" "-version") #t))
4664 (replace 'install
4665 (lambda* (#:key outputs #:allow-other-keys)
4666 (let* ((out (assoc-ref outputs "out"))
4667 (bin (string-append out "/bin")))
4668 (install-file "muscle" bin)
4669 #t))))))
4670 (home-page "http://www.drive5.com/muscle")
4671 (synopsis "Multiple sequence alignment program")
4672 (description
4673 "MUSCLE aims to be a fast and accurate multiple sequence alignment
4674 program for nucleotide and protein sequences.")
4675 ;; License information found in 'muscle -h' and usage.cpp.
4676 (license license:public-domain)))
4677
4678 (define-public newick-utils
4679 ;; There are no recent releases so we package from git.
4680 (let ((commit "da121155a977197cab9fbb15953ca1b40b11eb87"))
4681 (package
4682 (name "newick-utils")
4683 (version (string-append "1.6-1." (string-take commit 8)))
4684 (source (origin
4685 (method git-fetch)
4686 (uri (git-reference
4687 (url "https://github.com/tjunier/newick_utils.git")
4688 (commit commit)))
4689 (file-name (string-append name "-" version "-checkout"))
4690 (sha256
4691 (base32
4692 "1hkw21rq1mwf7xp0rmbb2gqc0i6p11108m69i7mr7xcjl268pxnb"))))
4693 (build-system gnu-build-system)
4694 (inputs
4695 ;; XXX: TODO: Enable Lua and Guile bindings.
4696 ;; https://github.com/tjunier/newick_utils/issues/13
4697 `(("libxml2" ,libxml2)
4698 ("flex" ,flex)
4699 ("bison" ,bison)))
4700 (native-inputs
4701 `(("autoconf" ,autoconf)
4702 ("automake" ,automake)
4703 ("libtool" ,libtool)))
4704 (synopsis "Programs for working with newick format phylogenetic trees")
4705 (description
4706 "Newick-utils is a suite of utilities for processing phylogenetic trees
4707 in Newick format. Functions include re-rooting, extracting subtrees,
4708 trimming, pruning, condensing, drawing (ASCII graphics or SVG).")
4709 (home-page "https://github.com/tjunier/newick_utils")
4710 (license license:bsd-3))))
4711
4712 (define-public orfm
4713 (package
4714 (name "orfm")
4715 (version "0.7.1")
4716 (source (origin
4717 (method url-fetch)
4718 (uri (string-append
4719 "https://github.com/wwood/OrfM/releases/download/v"
4720 version "/orfm-" version ".tar.gz"))
4721 (sha256
4722 (base32
4723 "16iigyr2gd8x0imzkk1dr3k5xsds9bpmwg31ayvjg0f4pir9rwqr"))))
4724 (build-system gnu-build-system)
4725 (inputs `(("zlib" ,zlib)))
4726 (native-inputs
4727 `(("ruby-bio-commandeer" ,ruby-bio-commandeer)
4728 ("ruby-rspec" ,ruby-rspec)
4729 ("ruby" ,ruby)))
4730 (synopsis "Simple and not slow open reading frame (ORF) caller")
4731 (description
4732 "An ORF caller finds stretches of DNA that, when translated, are not
4733 interrupted by stop codons. OrfM finds and prints these ORFs.")
4734 (home-page "https://github.com/wwood/OrfM")
4735 (license license:lgpl3+)))
4736
4737 (define-public python2-pbcore
4738 (package
4739 (name "python2-pbcore")
4740 (version "1.2.10")
4741 (source (origin
4742 (method url-fetch)
4743 (uri (pypi-uri "pbcore" version))
4744 (sha256
4745 (base32
4746 "1kjmv891d6qbpp4shhhvkl02ff4q5xlpnls2513sm2cjcrs52f1i"))))
4747 (build-system python-build-system)
4748 (arguments
4749 `(#:python ,python-2 ;pbcore < 2.0 requires Python 2.7
4750 #:phases (modify-phases %standard-phases
4751 (add-after 'unpack 'remove-sphinx-dependency
4752 (lambda _
4753 ;; Sphinx is only required for documentation tests, which
4754 ;; we do not run; furthermore it depends on python2-sphinx
4755 ;; which is no longer maintained.
4756 (substitute* "requirements-dev.txt"
4757 (("^sphinx") ""))
4758 #t)))))
4759 (propagated-inputs
4760 `(("python-cython" ,python2-cython)
4761 ("python-numpy" ,python2-numpy)
4762 ("python-pysam" ,python2-pysam)
4763 ("python-h5py" ,python2-h5py)))
4764 (native-inputs
4765 `(("python-nose" ,python2-nose)
4766 ("python-pyxb" ,python2-pyxb)))
4767 (home-page "https://pacificbiosciences.github.io/pbcore/")
4768 (synopsis "Library for reading and writing PacBio data files")
4769 (description
4770 "The pbcore package provides Python APIs for interacting with PacBio data
4771 files and writing bioinformatics applications.")
4772 (license license:bsd-3)))
4773
4774 (define-public python2-warpedlmm
4775 (package
4776 (name "python2-warpedlmm")
4777 (version "0.21")
4778 (source
4779 (origin
4780 (method url-fetch)
4781 (uri (pypi-uri "WarpedLMM" version ".zip"))
4782 (sha256
4783 (base32
4784 "1agfz6zqa8nc6cw47yh0s3y14gkpa9wqazwcj7mwwj3ffnw39p3j"))))
4785 (build-system python-build-system)
4786 (arguments
4787 `(#:python ,python-2)) ; requires Python 2.7
4788 (propagated-inputs
4789 `(("python-scipy" ,python2-scipy)
4790 ("python-numpy" ,python2-numpy)
4791 ("python-matplotlib" ,python2-matplotlib)
4792 ("python-fastlmm" ,python2-fastlmm)
4793 ("python-pandas" ,python2-pandas)
4794 ("python-pysnptools" ,python2-pysnptools)))
4795 (native-inputs
4796 `(("python-mock" ,python2-mock)
4797 ("python-nose" ,python2-nose)
4798 ("unzip" ,unzip)))
4799 (home-page "https://github.com/PMBio/warpedLMM")
4800 (synopsis "Implementation of warped linear mixed models")
4801 (description
4802 "WarpedLMM is a Python implementation of the warped linear mixed model,
4803 which automatically learns an optimal warping function (or transformation) for
4804 the phenotype as it models the data.")
4805 (license license:asl2.0)))
4806
4807 (define-public pbtranscript-tofu
4808 (let ((commit "8f5467fe6a4472bcfb4226c8720993c8507adfe4"))
4809 (package
4810 (name "pbtranscript-tofu")
4811 (version (string-append "2.2.3." (string-take commit 7)))
4812 (source (origin
4813 (method git-fetch)
4814 (uri (git-reference
4815 (url "https://github.com/PacificBiosciences/cDNA_primer.git")
4816 (commit commit)))
4817 (file-name (string-append name "-" version "-checkout"))
4818 (sha256
4819 (base32
4820 "1lgnpi35ihay42qx0b6yl3kkgra723i413j33kvs0kvs61h82w0f"))
4821 (modules '((guix build utils)))
4822 (snippet
4823 '(begin
4824 ;; remove bundled Cython sources
4825 (delete-file "pbtranscript-tofu/pbtranscript/Cython-0.20.1.tar.gz")
4826 #t))))
4827 (build-system python-build-system)
4828 (arguments
4829 `(#:python ,python-2
4830 ;; FIXME: Tests fail with "No such file or directory:
4831 ;; pbtools/pbtranscript/modified_bx_intervals/intersection_unique.so"
4832 #:tests? #f
4833 #:phases
4834 (modify-phases %standard-phases
4835 (add-after 'unpack 'enter-directory
4836 (lambda _
4837 (chdir "pbtranscript-tofu/pbtranscript/")
4838 #t))
4839 ;; With setuptools version 18.0 and later this setup.py hack causes
4840 ;; a build error, so we disable it.
4841 (add-after 'enter-directory 'patch-setuppy
4842 (lambda _
4843 (substitute* "setup.py"
4844 (("if 'setuptools.extension' in sys.modules:")
4845 "if False:"))
4846 #t)))))
4847 (inputs
4848 `(("python-numpy" ,python2-numpy)
4849 ("python-bx-python" ,python2-bx-python)
4850 ("python-networkx" ,python2-networkx)
4851 ("python-scipy" ,python2-scipy)
4852 ("python-pbcore" ,python2-pbcore)
4853 ("python-h5py" ,python2-h5py)))
4854 (native-inputs
4855 `(("python-cython" ,python2-cython)
4856 ("python-nose" ,python2-nose)))
4857 (home-page "https://github.com/PacificBiosciences/cDNA_primer")
4858 (synopsis "Analyze transcriptome data generated with the Iso-Seq protocol")
4859 (description
4860 "pbtranscript-tofu contains scripts to analyze transcriptome data
4861 generated using the PacBio Iso-Seq protocol.")
4862 (license license:bsd-3))))
4863
4864 (define-public prank
4865 (package
4866 (name "prank")
4867 (version "170427")
4868 (source (origin
4869 (method url-fetch)
4870 (uri (string-append
4871 "http://wasabiapp.org/download/prank/prank.source."
4872 version ".tgz"))
4873 (sha256
4874 (base32
4875 "0nc8g9c5rkdxcir46s0in9ci1sxwzbjibxrvkksf22ybnplvagk2"))))
4876 (build-system gnu-build-system)
4877 (arguments
4878 `(#:phases
4879 (modify-phases %standard-phases
4880 (add-after 'unpack 'enter-src-dir
4881 (lambda _
4882 (chdir "src")
4883 #t))
4884 (add-after 'unpack 'remove-m64-flag
4885 ;; Prank will build with the correct 'bit-ness' without this flag
4886 ;; and this allows building on 32-bit machines.
4887 (lambda _ (substitute* "src/Makefile"
4888 (("-m64") ""))
4889 #t))
4890 (delete 'configure)
4891 (replace 'install
4892 (lambda* (#:key outputs #:allow-other-keys)
4893 (let* ((out (assoc-ref outputs "out"))
4894 (bin (string-append out "/bin"))
4895 (man (string-append out "/share/man/man1"))
4896 (path (string-append
4897 (assoc-ref %build-inputs "mafft") "/bin:"
4898 (assoc-ref %build-inputs "exonerate") "/bin:"
4899 (assoc-ref %build-inputs "bppsuite") "/bin")))
4900 (install-file "prank" bin)
4901 (wrap-program (string-append bin "/prank")
4902 `("PATH" ":" prefix (,path)))
4903 (install-file "prank.1" man))
4904 #t)))))
4905 (inputs
4906 `(("mafft" ,mafft)
4907 ("exonerate" ,exonerate)
4908 ("bppsuite" ,bppsuite)))
4909 (home-page "http://wasabiapp.org/software/prank/")
4910 (synopsis "Probabilistic multiple sequence alignment program")
4911 (description
4912 "PRANK is a probabilistic multiple sequence alignment program for DNA,
4913 codon and amino-acid sequences. It is based on a novel algorithm that treats
4914 insertions correctly and avoids over-estimation of the number of deletion
4915 events. In addition, PRANK borrows ideas from maximum likelihood methods used
4916 in phylogenetics and correctly takes into account the evolutionary distances
4917 between sequences. Lastly, PRANK allows for defining a potential structure
4918 for sequences to be aligned and then, simultaneously with the alignment,
4919 predicts the locations of structural units in the sequences.")
4920 (license license:gpl2+)))
4921
4922 (define-public proteinortho
4923 (package
4924 (name "proteinortho")
4925 (version "6.0.14")
4926 (source (origin
4927 (method git-fetch)
4928 (uri (git-reference
4929 (url "https://gitlab.com/paulklemm_PHD/proteinortho.git")
4930 (commit (string-append "v" version))))
4931 (file-name (git-file-name name version))
4932 (sha256
4933 (base32
4934 "0pmy617zy2z2w6hjqxjhf3rzikf5n3mpia80ysq8233vfr7wrzff"))
4935 (modules '((guix build utils)))
4936 (snippet
4937 '(begin
4938 ;; remove pre-built scripts
4939 (delete-file-recursively "src/BUILD/")
4940 #t))))
4941 (build-system gnu-build-system)
4942 (arguments
4943 `(#:test-target "test"
4944 #:make-flags '("CC=gcc")
4945 #:phases
4946 (modify-phases %standard-phases
4947 (replace 'configure
4948 ;; There is no configure script, so we modify the Makefile directly.
4949 (lambda* (#:key outputs #:allow-other-keys)
4950 (substitute* "Makefile"
4951 (("INSTALLDIR=.*")
4952 (string-append
4953 "INSTALLDIR=" (assoc-ref outputs "out") "/bin\n")))
4954 #t))
4955 (add-before 'install 'make-install-directory
4956 ;; The install directory is not created during 'make install'.
4957 (lambda* (#:key outputs #:allow-other-keys)
4958 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
4959 #t))
4960 (add-after 'install 'wrap-programs
4961 (lambda* (#:key inputs outputs #:allow-other-keys)
4962 (let ((path (getenv "PATH"))
4963 (out (assoc-ref outputs "out")))
4964 (for-each (lambda (script)
4965 (wrap-script script `("PATH" ":" prefix (,path))))
4966 (cons (string-append out "/bin/proteinortho")
4967 (find-files out "\\.(pl|py)$"))))
4968 #t)))))
4969 (inputs
4970 `(("guile" ,guile-3.0) ; for wrap-script
4971 ("diamond" ,diamond)
4972 ("perl" ,perl)
4973 ("python" ,python-wrapper)
4974 ("blast+" ,blast+)
4975 ("lapack" ,lapack)
4976 ("openblas" ,openblas)))
4977 (native-inputs
4978 `(("which" ,which)))
4979 (home-page "http://www.bioinf.uni-leipzig.de/Software/proteinortho")
4980 (synopsis "Detect orthologous genes across species")
4981 (description
4982 "Proteinortho is a tool to detect orthologous genes across different
4983 species. For doing so, it compares similarities of given gene sequences and
4984 clusters them to find significant groups. The algorithm was designed to handle
4985 large-scale data and can be applied to hundreds of species at once.")
4986 (license license:gpl3+)))
4987
4988 (define-public pyicoteo
4989 (package
4990 (name "pyicoteo")
4991 (version "2.0.7")
4992 (source
4993 (origin
4994 (method git-fetch)
4995 (uri (git-reference
4996 (url "https://bitbucket.org/regulatorygenomicsupf/pyicoteo.git")
4997 (commit (string-append "v" version))))
4998 (file-name (git-file-name name version))
4999 (sha256
5000 (base32
5001 "0hz5g8d25lbjy1wpscr490l0lmyvaix893hhax4fxnh1h9w34w8p"))))
5002 (build-system python-build-system)
5003 (arguments
5004 `(#:python ,python-2 ; does not work with Python 3
5005 #:tests? #f)) ; there are no tests
5006 (inputs
5007 `(("python2-matplotlib" ,python2-matplotlib)))
5008 (home-page "https://bitbucket.org/regulatorygenomicsupf/pyicoteo")
5009 (synopsis "Analyze high-throughput genetic sequencing data")
5010 (description
5011 "Pyicoteo is a suite of tools for the analysis of high-throughput genetic
5012 sequencing data. It works with genomic coordinates. There are currently six
5013 different command-line tools:
5014
5015 @enumerate
5016 @item pyicoregion: for generating exploratory regions automatically;
5017 @item pyicoenrich: for differential enrichment between two conditions;
5018 @item pyicoclip: for calling CLIP-Seq peaks without a control;
5019 @item pyicos: for genomic coordinates manipulation;
5020 @item pyicoller: for peak calling on punctuated ChIP-Seq;
5021 @item pyicount: to count how many reads from N experiment files overlap in a
5022 region file;
5023 @item pyicotrocol: to combine operations from pyicoteo.
5024 @end enumerate\n")
5025 (license license:gpl3+)))
5026
5027 (define-public prodigal
5028 (package
5029 (name "prodigal")
5030 ;; Check for a new home page when updating this package:
5031 ;; https://github.com/hyattpd/Prodigal/issues/36#issuecomment-536617588
5032 (version "2.6.3")
5033 (source (origin
5034 (method git-fetch)
5035 (uri (git-reference
5036 (url "https://github.com/hyattpd/Prodigal.git")
5037 (commit (string-append "v" version))))
5038 (file-name (git-file-name name version))
5039 (sha256
5040 (base32
5041 "1fs1hqk83qjbjhrvhw6ni75zakx5ki1ayy3v6wwkn3xvahc9hi5s"))))
5042 (build-system gnu-build-system)
5043 (arguments
5044 `(#:tests? #f ; no check target
5045 #:make-flags (list (string-append "INSTALLDIR="
5046 (assoc-ref %outputs "out")
5047 "/bin"))
5048 #:phases
5049 (modify-phases %standard-phases
5050 (delete 'configure))))
5051 (home-page "https://github.com/hyattpd/Prodigal")
5052 (synopsis "Protein-coding gene prediction for Archaea and Bacteria")
5053 (description
5054 "Prodigal runs smoothly on finished genomes, draft genomes, and
5055 metagenomes, providing gene predictions in GFF3, Genbank, or Sequin table
5056 format. It runs quickly, in an unsupervised fashion, handles gaps, handles
5057 partial genes, and identifies translation initiation sites.")
5058 (license license:gpl3+)))
5059
5060 (define-public roary
5061 (package
5062 (name "roary")
5063 (version "3.12.0")
5064 (source
5065 (origin
5066 (method url-fetch)
5067 (uri (string-append
5068 "mirror://cpan/authors/id/A/AJ/AJPAGE/Bio-Roary-"
5069 version ".tar.gz"))
5070 (sha256
5071 (base32
5072 "0qxrds9wx7cfhlkihrp6697kx0flhhxymap9fwan0b3rbdhcnmff"))))
5073 (build-system perl-build-system)
5074 (arguments
5075 `(#:phases
5076 (modify-phases %standard-phases
5077 (delete 'configure)
5078 (delete 'build)
5079 (replace 'check
5080 (lambda _
5081 ;; The tests are not run by default, so we run each test file
5082 ;; directly.
5083 (setenv "PATH" (string-append (getcwd) "/bin" ":"
5084 (getenv "PATH")))
5085 (setenv "PERL5LIB" (string-append (getcwd) "/lib" ":"
5086 (getenv "PERL5LIB")))
5087 (for-each (lambda (file)
5088 (display file)(display "\n")
5089 (invoke "perl" file))
5090 (find-files "t" ".*\\.t$"))
5091 #t))
5092 (replace 'install
5093 ;; There is no 'install' target in the Makefile.
5094 (lambda* (#:key outputs #:allow-other-keys)
5095 (let* ((out (assoc-ref outputs "out"))
5096 (bin (string-append out "/bin"))
5097 (perl (string-append out "/lib/perl5/site_perl"))
5098 (roary-plots "contrib/roary_plots"))
5099 (mkdir-p bin)
5100 (mkdir-p perl)
5101 (copy-recursively "bin" bin)
5102 (copy-recursively "lib" perl)
5103 #t)))
5104 (add-after 'install 'wrap-programs
5105 (lambda* (#:key inputs outputs #:allow-other-keys)
5106 (let* ((out (assoc-ref outputs "out"))
5107 (perl5lib (getenv "PERL5LIB"))
5108 (path (getenv "PATH")))
5109 (for-each (lambda (prog)
5110 (let ((binary (string-append out "/" prog)))
5111 (wrap-program binary
5112 `("PERL5LIB" ":" prefix
5113 (,(string-append perl5lib ":" out
5114 "/lib/perl5/site_perl"))))
5115 (wrap-program binary
5116 `("PATH" ":" prefix
5117 (,(string-append path ":" out "/bin"))))))
5118 (find-files "bin" ".*[^R]$"))
5119 (let ((file
5120 (string-append out "/bin/roary-create_pan_genome_plots.R"))
5121 (r-site-lib (getenv "R_LIBS_SITE"))
5122 (coreutils-path
5123 (string-append (assoc-ref inputs "coreutils") "/bin")))
5124 (wrap-program file
5125 `("R_LIBS_SITE" ":" prefix
5126 (,(string-append r-site-lib ":" out "/site-library/"))))
5127 (wrap-program file
5128 `("PATH" ":" prefix
5129 (,(string-append coreutils-path ":" out "/bin"))))))
5130 #t)))))
5131 (native-inputs
5132 `(("perl-env-path" ,perl-env-path)
5133 ("perl-test-files" ,perl-test-files)
5134 ("perl-test-most" ,perl-test-most)
5135 ("perl-test-output" ,perl-test-output)))
5136 (inputs
5137 `(("perl-array-utils" ,perl-array-utils)
5138 ("bioperl" ,bioperl-minimal)
5139 ("perl-digest-md5-file" ,perl-digest-md5-file)
5140 ("perl-exception-class" ,perl-exception-class)
5141 ("perl-file-find-rule" ,perl-file-find-rule)
5142 ("perl-file-grep" ,perl-file-grep)
5143 ("perl-file-slurper" ,perl-file-slurper)
5144 ("perl-file-which" ,perl-file-which)
5145 ("perl-graph" ,perl-graph)
5146 ("perl-graph-readwrite" ,perl-graph-readwrite)
5147 ("perl-log-log4perl" ,perl-log-log4perl)
5148 ("perl-moose" ,perl-moose)
5149 ("perl-perlio-utf8_strict" ,perl-perlio-utf8_strict)
5150 ("perl-text-csv" ,perl-text-csv)
5151 ("bedtools" ,bedtools)
5152 ("cd-hit" ,cd-hit)
5153 ("blast+" ,blast+)
5154 ("mcl" ,mcl)
5155 ("parallel" ,parallel)
5156 ("prank" ,prank)
5157 ("mafft" ,mafft)
5158 ("fasttree" ,fasttree)
5159 ("grep" ,grep)
5160 ("sed" ,sed)
5161 ("gawk" ,gawk)
5162 ("r-minimal" ,r-minimal)
5163 ("r-ggplot2" ,r-ggplot2)
5164 ("coreutils" ,coreutils)))
5165 (home-page "https://sanger-pathogens.github.io/Roary/")
5166 (synopsis "High speed stand-alone pan genome pipeline")
5167 (description
5168 "Roary is a high speed stand alone pan genome pipeline, which takes
5169 annotated assemblies in GFF3 format (produced by the Prokka program) and
5170 calculates the pan genome. Using a standard desktop PC, it can analyse
5171 datasets with thousands of samples, without compromising the quality of the
5172 results. 128 samples can be analysed in under 1 hour using 1 GB of RAM and a
5173 single processor. Roary is not intended for metagenomics or for comparing
5174 extremely diverse sets of genomes.")
5175 (license license:gpl3)))
5176
5177 (define-public raxml
5178 (package
5179 (name "raxml")
5180 (version "8.2.12")
5181 (source
5182 (origin
5183 (method git-fetch)
5184 (uri (git-reference
5185 (url "https://github.com/stamatak/standard-RAxML.git")
5186 (commit (string-append "v" version))))
5187 (file-name (git-file-name name version))
5188 (sha256
5189 (base32
5190 "1jqjzhch0rips0vp04prvb8vmc20c5pdmsqn8knadcf91yy859fh"))))
5191 (build-system gnu-build-system)
5192 (arguments
5193 `(#:tests? #f ; There are no tests.
5194 ;; Use 'standard' Makefile rather than SSE or AVX ones.
5195 #:make-flags (list "-f" "Makefile.HYBRID.gcc")
5196 #:phases
5197 (modify-phases %standard-phases
5198 (delete 'configure)
5199 (replace 'install
5200 (lambda* (#:key outputs #:allow-other-keys)
5201 (let* ((out (assoc-ref outputs "out"))
5202 (bin (string-append out "/bin"))
5203 (executable "raxmlHPC-HYBRID"))
5204 (install-file executable bin)
5205 (symlink (string-append bin "/" executable) "raxml"))
5206 #t)))))
5207 (inputs
5208 `(("openmpi" ,openmpi)))
5209 (home-page "https://cme.h-its.org/exelixis/web/software/raxml/index.html")
5210 (synopsis "Randomized Axelerated Maximum Likelihood phylogenetic trees")
5211 (description
5212 "RAxML is a tool for phylogenetic analysis and post-analysis of large
5213 phylogenies.")
5214 ;; The source includes x86 specific code
5215 (supported-systems '("x86_64-linux" "i686-linux"))
5216 (license license:gpl2+)))
5217
5218 (define-public rsem
5219 (package
5220 (name "rsem")
5221 (version "1.3.1")
5222 (source
5223 (origin
5224 (method git-fetch)
5225 (uri (git-reference
5226 (url "https://github.com/deweylab/RSEM.git")
5227 (commit (string-append "v" version))))
5228 (sha256
5229 (base32 "1jlq11d1p8qp64w75yj8cnbbd1a93viq10pzsbwal7vdn8fg13j1"))
5230 (file-name (git-file-name name version))
5231 (modules '((guix build utils)))
5232 (snippet
5233 '(begin
5234 ;; remove bundled copy of boost and samtools
5235 (delete-file-recursively "boost")
5236 (delete-file-recursively "samtools-1.3")
5237 #t))))
5238 (build-system gnu-build-system)
5239 (arguments
5240 `(#:tests? #f ;no "check" target
5241 #:make-flags
5242 (list (string-append "BOOST="
5243 (assoc-ref %build-inputs "boost")
5244 "/include/")
5245 (string-append "SAMHEADERS="
5246 (assoc-ref %build-inputs "htslib")
5247 "/include/htslib/sam.h")
5248 (string-append "SAMLIBS="
5249 (assoc-ref %build-inputs "htslib")
5250 "/lib/libhts.a"))
5251 #:phases
5252 (modify-phases %standard-phases
5253 ;; No "configure" script.
5254 ;; Do not build bundled samtools library.
5255 (replace 'configure
5256 (lambda _
5257 (substitute* "Makefile"
5258 (("^all : \\$\\(PROGRAMS\\).*") "all: $(PROGRAMS)\n")
5259 (("^\\$\\(SAMLIBS\\).*") ""))
5260 #t))
5261 (replace 'install
5262 (lambda* (#:key outputs #:allow-other-keys)
5263 (let* ((out (string-append (assoc-ref outputs "out")))
5264 (bin (string-append out "/bin/"))
5265 (perl (string-append out "/lib/perl5/site_perl")))
5266 (mkdir-p bin)
5267 (mkdir-p perl)
5268 (for-each (lambda (file)
5269 (install-file file bin))
5270 (find-files "." "rsem-.*"))
5271 (install-file "rsem_perl_utils.pm" perl))
5272 #t))
5273 (add-after 'install 'wrap-program
5274 (lambda* (#:key outputs #:allow-other-keys)
5275 (let ((out (assoc-ref outputs "out")))
5276 (for-each (lambda (prog)
5277 (wrap-program (string-append out "/bin/" prog)
5278 `("PERL5LIB" ":" prefix
5279 (,(string-append out "/lib/perl5/site_perl")))))
5280 '("rsem-calculate-expression"
5281 "rsem-control-fdr"
5282 "rsem-generate-data-matrix"
5283 "rsem-generate-ngvector"
5284 "rsem-plot-transcript-wiggles"
5285 "rsem-prepare-reference"
5286 "rsem-run-ebseq"
5287 "rsem-run-prsem-testing-procedure")))
5288 #t)))))
5289 (inputs
5290 `(("boost" ,boost)
5291 ("r-minimal" ,r-minimal)
5292 ("perl" ,perl)
5293 ("htslib" ,htslib-1.3)
5294 ("zlib" ,zlib)))
5295 (home-page "http://deweylab.biostat.wisc.edu/rsem/")
5296 (synopsis "Estimate gene expression levels from RNA-Seq data")
5297 (description
5298 "RSEM is a software package for estimating gene and isoform expression
5299 levels from RNA-Seq data. The RSEM package provides a user-friendly
5300 interface, supports threads for parallel computation of the EM algorithm,
5301 single-end and paired-end read data, quality scores, variable-length reads and
5302 RSPD estimation. In addition, it provides posterior mean and 95% credibility
5303 interval estimates for expression levels. For visualization, it can generate
5304 BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.")
5305 (license license:gpl3+)))
5306
5307 (define-public rseqc
5308 (package
5309 (name "rseqc")
5310 (version "3.0.1")
5311 (source
5312 (origin
5313 (method url-fetch)
5314 (uri
5315 (string-append "mirror://sourceforge/rseqc/"
5316 "RSeQC-" version ".tar.gz"))
5317 (sha256
5318 (base32
5319 "0gbb9iyb7swiv5455fm5rg98r7l6qn27v564yllqjd574hncpx6m"))))
5320 (build-system python-build-system)
5321 (inputs
5322 `(("python-cython" ,python-cython)
5323 ("python-bx-python" ,python-bx-python)
5324 ("python-pybigwig" ,python-pybigwig)
5325 ("python-pysam" ,python-pysam)
5326 ("python-numpy" ,python-numpy)
5327 ("zlib" ,zlib)))
5328 (native-inputs
5329 `(("python-nose" ,python-nose)))
5330 (home-page "http://rseqc.sourceforge.net/")
5331 (synopsis "RNA-seq quality control package")
5332 (description
5333 "RSeQC provides a number of modules that can comprehensively evaluate
5334 high throughput sequence data, especially RNA-seq data. Some basic modules
5335 inspect sequence quality, nucleotide composition bias, PCR bias and GC bias,
5336 while RNA-seq specific modules evaluate sequencing saturation, mapped reads
5337 distribution, coverage uniformity, strand specificity, etc.")
5338 (license license:gpl3+)))
5339
5340 (define-public seek
5341 ;; There are no release tarballs. According to the installation
5342 ;; instructions at http://seek.princeton.edu/installation.jsp, the latest
5343 ;; stable release is identified by this changeset ID.
5344 (let ((changeset "2329130")
5345 (revision "1"))
5346 (package
5347 (name "seek")
5348 (version (string-append "0-" revision "." changeset))
5349 (source (origin
5350 (method hg-fetch)
5351 (uri (hg-reference
5352 (url "https://bitbucket.org/libsleipnir/sleipnir")
5353 (changeset changeset)))
5354 (file-name (string-append name "-" version "-checkout"))
5355 (sha256
5356 (base32
5357 "0qrvilwh18dpbhkf92qvxbmay0j75ra3jg2wrhz67gf538zzphsx"))))
5358 (build-system gnu-build-system)
5359 (arguments
5360 `(#:modules ((srfi srfi-1)
5361 (guix build gnu-build-system)
5362 (guix build utils))
5363 #:phases
5364 (let ((dirs '("SeekMiner"
5365 "SeekEvaluator"
5366 "SeekPrep"
5367 "Distancer"
5368 "Data2DB"
5369 "PCL2Bin")))
5370 (modify-phases %standard-phases
5371 (replace 'bootstrap
5372 (lambda _
5373 (substitute* "gen_tools_am"
5374 (("/usr/bin/env.*") (which "perl")))
5375 (invoke "bash" "gen_auto")
5376 #t))
5377 (add-after 'build 'build-additional-tools
5378 (lambda* (#:key make-flags #:allow-other-keys)
5379 (for-each (lambda (dir)
5380 (with-directory-excursion (string-append "tools/" dir)
5381 (apply invoke "make" make-flags)))
5382 dirs)
5383 #t))
5384 (add-after 'install 'install-additional-tools
5385 (lambda* (#:key make-flags #:allow-other-keys)
5386 (for-each (lambda (dir)
5387 (with-directory-excursion (string-append "tools/" dir)
5388 (apply invoke `("make" ,@make-flags "install"))))
5389 dirs)
5390 #t))))))
5391 (inputs
5392 `(("gsl" ,gsl)
5393 ("boost" ,boost)
5394 ("libsvm" ,libsvm)
5395 ("readline" ,readline)
5396 ("gengetopt" ,gengetopt)
5397 ("log4cpp" ,log4cpp)))
5398 (native-inputs
5399 `(("autoconf" ,autoconf)
5400 ("automake" ,automake)
5401 ("perl" ,perl)))
5402 (home-page "http://seek.princeton.edu")
5403 (synopsis "Gene co-expression search engine")
5404 (description
5405 "SEEK is a computational gene co-expression search engine. SEEK provides
5406 biologists with a way to navigate the massive human expression compendium that
5407 now contains thousands of expression datasets. SEEK returns a robust ranking
5408 of co-expressed genes in the biological area of interest defined by the user's
5409 query genes. It also prioritizes thousands of expression datasets according
5410 to the user's query of interest.")
5411 (license license:cc-by3.0))))
5412
5413 (define-public samtools
5414 (package
5415 (name "samtools")
5416 (version "1.9")
5417 (source
5418 (origin
5419 (method url-fetch)
5420 (uri
5421 (string-append "mirror://sourceforge/samtools/samtools/"
5422 version "/samtools-" version ".tar.bz2"))
5423 (sha256
5424 (base32
5425 "10ilqbmm7ri8z431sn90lvbjwizd0hhkf9rcqw8j823hf26nhgq8"))
5426 (modules '((guix build utils)))
5427 (snippet '(begin
5428 ;; Delete bundled htslib.
5429 (delete-file-recursively "htslib-1.9")
5430 #t))))
5431 (build-system gnu-build-system)
5432 (arguments
5433 `(#:modules ((ice-9 ftw)
5434 (ice-9 regex)
5435 (guix build gnu-build-system)
5436 (guix build utils))
5437 #:configure-flags (list "--with-ncurses")
5438 #:phases
5439 (modify-phases %standard-phases
5440 (add-after 'unpack 'patch-tests
5441 (lambda _
5442 (substitute* "test/test.pl"
5443 ;; The test script calls out to /bin/bash
5444 (("/bin/bash") (which "bash")))
5445 #t))
5446 (add-after 'install 'install-library
5447 (lambda* (#:key outputs #:allow-other-keys)
5448 (let ((lib (string-append (assoc-ref outputs "out") "/lib")))
5449 (install-file "libbam.a" lib)
5450 #t)))
5451 (add-after 'install 'install-headers
5452 (lambda* (#:key outputs #:allow-other-keys)
5453 (let ((include (string-append (assoc-ref outputs "out")
5454 "/include/samtools/")))
5455 (for-each (lambda (file)
5456 (install-file file include))
5457 (scandir "." (lambda (name) (string-match "\\.h$" name))))
5458 #t))))))
5459 (native-inputs `(("pkg-config" ,pkg-config)))
5460 (inputs
5461 `(("htslib" ,htslib)
5462 ("ncurses" ,ncurses)
5463 ("perl" ,perl)
5464 ("python" ,python)
5465 ("zlib" ,zlib)))
5466 (home-page "http://samtools.sourceforge.net")
5467 (synopsis "Utilities to efficiently manipulate nucleotide sequence alignments")
5468 (description
5469 "Samtools implements various utilities for post-processing nucleotide
5470 sequence alignments in the SAM, BAM, and CRAM formats, including indexing,
5471 variant calling (in conjunction with bcftools), and a simple alignment
5472 viewer.")
5473 (license license:expat)))
5474
5475 (define-public samtools-0.1
5476 ;; This is the most recent version of the 0.1 line of samtools. The input
5477 ;; and output formats differ greatly from that used and produced by samtools
5478 ;; 1.x and is still used in many bioinformatics pipelines.
5479 (package (inherit samtools)
5480 (version "0.1.19")
5481 (source
5482 (origin
5483 (method url-fetch)
5484 (uri
5485 (string-append "mirror://sourceforge/samtools/samtools/"
5486 version "/samtools-" version ".tar.bz2"))
5487 (sha256
5488 (base32 "1m33xsfwz0s8qi45lylagfllqg7fphf4dr0780rsvw75av9wk06h"))))
5489 (arguments
5490 `(#:tests? #f ;no "check" target
5491 #:make-flags
5492 (list "LIBCURSES=-lncurses")
5493 ,@(substitute-keyword-arguments (package-arguments samtools)
5494 ((#:phases phases)
5495 `(modify-phases ,phases
5496 (replace 'install
5497 (lambda* (#:key outputs #:allow-other-keys)
5498 (let ((bin (string-append
5499 (assoc-ref outputs "out") "/bin")))
5500 (mkdir-p bin)
5501 (install-file "samtools" bin)
5502 #t)))
5503 (delete 'patch-tests)
5504 (delete 'configure))))))))
5505
5506 (define-public mosaik
5507 (let ((commit "5c25216d3522d6a33e53875cd76a6d65001e4e67"))
5508 (package
5509 (name "mosaik")
5510 (version "2.2.30")
5511 (source (origin
5512 ;; There are no release tarballs nor tags.
5513 (method git-fetch)
5514 (uri (git-reference
5515 (url "https://github.com/wanpinglee/MOSAIK.git")
5516 (commit commit)))
5517 (file-name (string-append name "-" version))
5518 (sha256
5519 (base32
5520 "17gj3s07cm77r41z92awh0bim7w7q7fbn0sf5nkqmcm1vw052qgw"))))
5521 (build-system gnu-build-system)
5522 (arguments
5523 `(#:tests? #f ; no tests
5524 #:make-flags (list "CC=gcc")
5525 #:phases
5526 (modify-phases %standard-phases
5527 (replace 'configure
5528 (lambda _ (chdir "src") #t))
5529 (replace 'install
5530 (lambda* (#:key outputs #:allow-other-keys)
5531 (let ((bin (string-append (assoc-ref outputs "out")
5532 "/bin")))
5533 (mkdir-p bin)
5534 (copy-recursively "../bin" bin)
5535 #t))))))
5536 (inputs
5537 `(("perl" ,perl)
5538 ("zlib:static" ,zlib "static")
5539 ("zlib" ,zlib)))
5540 (supported-systems '("x86_64-linux"))
5541 (home-page "https://github.com/wanpinglee/MOSAIK")
5542 (synopsis "Map nucleotide sequence reads to reference genomes")
5543 (description
5544 "MOSAIK is a program for mapping second and third-generation sequencing
5545 reads to a reference genome. MOSAIK can align reads generated by all the
5546 major sequencing technologies, including Illumina, Applied Biosystems SOLiD,
5547 Roche 454, Ion Torrent and Pacific BioSciences SMRT.")
5548 ;; MOSAIK is released under the GPLv2+ with the exception of third-party
5549 ;; code released into the public domain:
5550 ;; 1. fastlz by Ariya Hidayat - http://www.fastlz.org/
5551 ;; 2. MD5 implementation - RSA Data Security, RFC 1321
5552 (license (list license:gpl2+ license:public-domain)))))
5553
5554 (define-public ngs-sdk
5555 (package
5556 (name "ngs-sdk")
5557 (version "2.9.6")
5558 (source (origin
5559 (method git-fetch)
5560 (uri (git-reference
5561 (url "https://github.com/ncbi/ngs.git")
5562 (commit version)))
5563 (file-name (git-file-name name version))
5564 (sha256
5565 (base32
5566 "0d5k5kabgl15as37kj9x65xc92j4gcqms86hvihw3yb6wag0r0q3"))))
5567 (build-system gnu-build-system)
5568 (arguments
5569 `(#:parallel-build? #f ; not supported
5570 #:tests? #f ; no "check" target
5571 #:phases
5572 (modify-phases %standard-phases
5573 (replace 'configure
5574 (lambda* (#:key outputs #:allow-other-keys)
5575 (let ((out (assoc-ref outputs "out")))
5576 ;; Allow 'konfigure.perl' to find 'package.prl'.
5577 (setenv "PERL5LIB"
5578 (string-append ".:" (getenv "PERL5LIB")))
5579
5580 ;; The 'configure' script doesn't recognize things like
5581 ;; '--enable-fast-install'.
5582 (invoke "./configure"
5583 (string-append "--build-prefix=" (getcwd) "/build")
5584 (string-append "--prefix=" out))
5585 #t)))
5586 (add-after 'unpack 'enter-dir
5587 (lambda _ (chdir "ngs-sdk") #t)))))
5588 (native-inputs `(("perl" ,perl)))
5589 ;; According to the test
5590 ;; unless ($MARCH =~ /x86_64/i || $MARCH =~ /i?86/i)
5591 ;; in ngs-sdk/setup/konfigure.perl
5592 (supported-systems '("i686-linux" "x86_64-linux"))
5593 (home-page "https://github.com/ncbi/ngs")
5594 (synopsis "API for accessing Next Generation Sequencing data")
5595 (description
5596 "NGS is a domain-specific API for accessing reads, alignments and pileups
5597 produced from Next Generation Sequencing. The API itself is independent from
5598 any particular back-end implementation, and supports use of multiple back-ends
5599 simultaneously.")
5600 (license license:public-domain)))
5601
5602 (define-public java-ngs
5603 (package (inherit ngs-sdk)
5604 (name "java-ngs")
5605 (arguments
5606 `(,@(substitute-keyword-arguments
5607 `(#:modules ((guix build gnu-build-system)
5608 (guix build utils)
5609 (srfi srfi-1)
5610 (srfi srfi-26))
5611 ,@(package-arguments ngs-sdk))
5612 ((#:phases phases)
5613 `(modify-phases ,phases
5614 (replace 'enter-dir (lambda _ (chdir "ngs-java") #t)))))))
5615 (inputs
5616 `(("jdk" ,icedtea "jdk")
5617 ("ngs-sdk" ,ngs-sdk)))
5618 (synopsis "Java bindings for NGS SDK")))
5619
5620 (define-public ncbi-vdb
5621 (package
5622 (name "ncbi-vdb")
5623 (version "2.9.6")
5624 (source (origin
5625 (method git-fetch)
5626 (uri (git-reference
5627 (url "https://github.com/ncbi/ncbi-vdb.git")
5628 (commit version)))
5629 (file-name (git-file-name name version))
5630 (sha256
5631 (base32
5632 "0knkj1sq34hlivgv5qd6jlczqrs3ldmfgn6vbbw7p4mqxvb9mirk"))))
5633 (build-system gnu-build-system)
5634 (arguments
5635 `(#:parallel-build? #f ; not supported
5636 #:tests? #f ; no "check" target
5637 #:make-flags '("HAVE_HDF5=1")
5638 #:phases
5639 (modify-phases %standard-phases
5640 (add-after 'unpack 'make-files-writable
5641 (lambda _ (for-each make-file-writable (find-files "." ".*")) #t))
5642 (add-before 'configure 'set-perl-search-path
5643 (lambda _
5644 ;; Work around "dotless @INC" build failure.
5645 (setenv "PERL5LIB"
5646 (string-append (getcwd) "/setup:"
5647 (getenv "PERL5LIB")))
5648 #t))
5649 ;; See https://github.com/ncbi/ncbi-vdb/issues/14
5650 (add-after 'unpack 'patch-krypto-flags
5651 (lambda _
5652 (substitute* "libs/krypto/Makefile"
5653 (("-Wa,-march=generic64\\+aes") "")
5654 (("-Wa,-march=generic64\\+sse4") ""))
5655 #t))
5656 (replace 'configure
5657 (lambda* (#:key inputs outputs #:allow-other-keys)
5658 (let ((out (assoc-ref outputs "out")))
5659 ;; Override include path for libmagic
5660 (substitute* "setup/package.prl"
5661 (("name => 'magic', Include => '/usr/include'")
5662 (string-append "name=> 'magic', Include => '"
5663 (assoc-ref inputs "libmagic")
5664 "/include" "'")))
5665
5666 ;; Install kdf5 library (needed by sra-tools)
5667 (substitute* "build/Makefile.install"
5668 (("LIBRARIES_TO_INSTALL =")
5669 "LIBRARIES_TO_INSTALL = kdf5.$(VERSION_LIBX) kdf5.$(VERSION_SHLX)"))
5670
5671 (substitute* "build/Makefile.env"
5672 (("CFLAGS =" prefix)
5673 (string-append prefix "-msse2 ")))
5674
5675 ;; Override search path for ngs-java
5676 (substitute* "setup/package.prl"
5677 (("/usr/local/ngs/ngs-java")
5678 (assoc-ref inputs "java-ngs")))
5679
5680 ;; The 'configure' script doesn't recognize things like
5681 ;; '--enable-fast-install'.
5682 (invoke "./configure"
5683 (string-append "--build-prefix=" (getcwd) "/build")
5684 (string-append "--prefix=" (assoc-ref outputs "out"))
5685 (string-append "--debug")
5686 (string-append "--with-xml2-prefix="
5687 (assoc-ref inputs "libxml2"))
5688 (string-append "--with-ngs-sdk-prefix="
5689 (assoc-ref inputs "ngs-sdk"))
5690 (string-append "--with-hdf5-prefix="
5691 (assoc-ref inputs "hdf5")))
5692 #t)))
5693 (add-after 'install 'install-interfaces
5694 (lambda* (#:key outputs #:allow-other-keys)
5695 ;; Install interface libraries. On i686 the interface libraries
5696 ;; are installed to "linux/gcc/i386", so we need to use the Linux
5697 ;; architecture name ("i386") instead of the target system prefix
5698 ;; ("i686").
5699 (mkdir (string-append (assoc-ref outputs "out") "/ilib"))
5700 (copy-recursively (string-append "build/ncbi-vdb/linux/gcc/"
5701 ,(system->linux-architecture
5702 (or (%current-target-system)
5703 (%current-system)))
5704 "/rel/ilib")
5705 (string-append (assoc-ref outputs "out")
5706 "/ilib"))
5707 ;; Install interface headers
5708 (copy-recursively "interfaces"
5709 (string-append (assoc-ref outputs "out")
5710 "/include"))
5711 #t))
5712 ;; These files are needed by sra-tools.
5713 (add-after 'install 'install-configuration-files
5714 (lambda* (#:key outputs #:allow-other-keys)
5715 (let ((target (string-append (assoc-ref outputs "out") "/kfg")))
5716 (mkdir target)
5717 (install-file "libs/kfg/default.kfg" target)
5718 (install-file "libs/kfg/certs.kfg" target))
5719 #t)))))
5720 (inputs
5721 `(("libxml2" ,libxml2)
5722 ("ngs-sdk" ,ngs-sdk)
5723 ("java-ngs" ,java-ngs)
5724 ("libmagic" ,file)
5725 ("hdf5" ,hdf5)))
5726 (native-inputs `(("perl" ,perl)))
5727 ;; NCBI-VDB requires SSE capability.
5728 (supported-systems '("i686-linux" "x86_64-linux"))
5729 (home-page "https://github.com/ncbi/ncbi-vdb")
5730 (synopsis "Database engine for genetic information")
5731 (description
5732 "The NCBI-VDB library implements a highly compressed columnar data
5733 warehousing engine that is most often used to store genetic information.
5734 Databases are stored in a portable image within the file system, and can be
5735 accessed/downloaded on demand across HTTP.")
5736 (license license:public-domain)))
5737
5738 (define-public plink
5739 (package
5740 (name "plink")
5741 (version "1.07")
5742 (source
5743 (origin
5744 (method url-fetch)
5745 (uri (string-append
5746 "http://pngu.mgh.harvard.edu/~purcell/plink/dist/plink-"
5747 version "-src.zip"))
5748 (sha256
5749 (base32 "0as8gxm4pjyc8dxmm1sl873rrd7wn5qs0l29nqfnl31x8i467xaa"))
5750 (patches (search-patches "plink-1.07-unclobber-i.patch"
5751 "plink-endian-detection.patch"))))
5752 (build-system gnu-build-system)
5753 (arguments
5754 '(#:tests? #f ;no "check" target
5755 #:make-flags (list (string-append "LIB_LAPACK="
5756 (assoc-ref %build-inputs "lapack")
5757 "/lib/liblapack.so")
5758 "WITH_LAPACK=1"
5759 "FORCE_DYNAMIC=1"
5760 ;; disable phoning home
5761 "WITH_WEBCHECK=")
5762 #:phases
5763 (modify-phases %standard-phases
5764 ;; no "configure" script
5765 (delete 'configure)
5766 (replace 'install
5767 (lambda* (#:key outputs #:allow-other-keys)
5768 (let ((bin (string-append (assoc-ref outputs "out")
5769 "/bin/")))
5770 (install-file "plink" bin)
5771 #t))))))
5772 (inputs
5773 `(("zlib" ,zlib)
5774 ("lapack" ,lapack)))
5775 (native-inputs
5776 `(("unzip" ,unzip)))
5777 (home-page "http://pngu.mgh.harvard.edu/~purcell/plink/")
5778 (synopsis "Whole genome association analysis toolset")
5779 (description
5780 "PLINK is a whole genome association analysis toolset, designed to
5781 perform a range of basic, large-scale analyses in a computationally efficient
5782 manner. The focus of PLINK is purely on analysis of genotype/phenotype data,
5783 so there is no support for steps prior to this (e.g. study design and
5784 planning, generating genotype or CNV calls from raw data). Through
5785 integration with gPLINK and Haploview, there is some support for the
5786 subsequent visualization, annotation and storage of results.")
5787 ;; Code is released under GPLv2, except for fisher.h, which is under
5788 ;; LGPLv2.1+
5789 (license (list license:gpl2 license:lgpl2.1+))))
5790
5791 (define-public plink-ng
5792 (package (inherit plink)
5793 (name "plink-ng")
5794 (version "1.90b4")
5795 (source
5796 (origin
5797 (method git-fetch)
5798 (uri (git-reference
5799 (url "https://github.com/chrchang/plink-ng.git")
5800 (commit (string-append "v" version))))
5801 (file-name (git-file-name name version))
5802 (sha256
5803 (base32 "02npdwgkpfkdnhw819rhj5kw02a5k5m90b14zq9zzya4hyg929c0"))))
5804 (build-system gnu-build-system)
5805 (arguments
5806 '(#:tests? #f ;no "check" target
5807 #:make-flags (list "BLASFLAGS=-llapack -lopenblas"
5808 "CFLAGS=-Wall -O2 -DDYNAMIC_ZLIB=1"
5809 "ZLIB=-lz"
5810 "-f" "Makefile.std")
5811 #:phases
5812 (modify-phases %standard-phases
5813 (add-after 'unpack 'chdir
5814 (lambda _ (chdir "1.9") #t))
5815 (delete 'configure) ; no "configure" script
5816 (replace 'install
5817 (lambda* (#:key outputs #:allow-other-keys)
5818 (let ((bin (string-append (assoc-ref outputs "out")
5819 "/bin/")))
5820 (install-file "plink" bin)
5821 #t))))))
5822 (inputs
5823 `(("zlib" ,zlib)
5824 ("lapack" ,lapack)
5825 ("openblas" ,openblas)))
5826 (home-page "https://www.cog-genomics.org/plink/")
5827 (license license:gpl3+)))
5828
5829 (define-public smithlab-cpp
5830 (let ((revision "1")
5831 (commit "728a097bec88c6f4b8528b685932049e660eff2e"))
5832 (package
5833 (name "smithlab-cpp")
5834 (version (string-append "0." revision "." (string-take commit 7)))
5835 (source (origin
5836 (method git-fetch)
5837 (uri (git-reference
5838 (url "https://github.com/smithlabcode/smithlab_cpp.git")
5839 (commit commit)))
5840 (file-name (string-append name "-" version "-checkout"))
5841 (sha256
5842 (base32
5843 "0d476lmj312xk77kr9fzrv7z1bv96yfyx0w7y62ycmnfbx32ll74"))))
5844 (build-system gnu-build-system)
5845 (arguments
5846 `(#:modules ((guix build gnu-build-system)
5847 (guix build utils)
5848 (srfi srfi-26))
5849 #:tests? #f ;no "check" target
5850 #:phases
5851 (modify-phases %standard-phases
5852 (add-after 'unpack 'use-samtools-headers
5853 (lambda _
5854 (substitute* '("SAM.cpp"
5855 "SAM.hpp")
5856 (("sam.h") "samtools/sam.h"))
5857 #t))
5858 (replace 'install
5859 (lambda* (#:key outputs #:allow-other-keys)
5860 (let* ((out (assoc-ref outputs "out"))
5861 (lib (string-append out "/lib"))
5862 (include (string-append out "/include/smithlab-cpp")))
5863 (mkdir-p lib)
5864 (mkdir-p include)
5865 (for-each (cut install-file <> lib)
5866 (find-files "." "\\.o$"))
5867 (for-each (cut install-file <> include)
5868 (find-files "." "\\.hpp$")))
5869 #t))
5870 (delete 'configure))))
5871 (inputs
5872 `(("samtools" ,samtools-0.1)
5873 ("zlib" ,zlib)))
5874 (home-page "https://github.com/smithlabcode/smithlab_cpp")
5875 (synopsis "C++ helper library for functions used in Smith lab projects")
5876 (description
5877 "Smithlab CPP is a C++ library that includes functions used in many of
5878 the Smith lab bioinformatics projects, such as a wrapper around Samtools data
5879 structures, classes for genomic regions, mapped sequencing reads, etc.")
5880 (license license:gpl3+))))
5881
5882 (define-public preseq
5883 (package
5884 (name "preseq")
5885 (version "2.0.3")
5886 (source (origin
5887 (method url-fetch)
5888 (uri (string-append "https://github.com/smithlabcode/preseq/"
5889 "releases/download/v" version
5890 "/preseq_v" version ".tar.bz2"))
5891 (sha256
5892 (base32 "149x9xmk1wy1gff85325yfzqc0qk4sgp1w6gbyj9cnji4x1dszbl"))
5893 (modules '((guix build utils)))
5894 (snippet '(begin
5895 ;; Remove bundled samtools.
5896 (delete-file-recursively "samtools")
5897 #t))))
5898 (build-system gnu-build-system)
5899 (arguments
5900 `(#:tests? #f ;no "check" target
5901 #:phases
5902 (modify-phases %standard-phases
5903 (delete 'configure))
5904 #:make-flags
5905 (list (string-append "PREFIX="
5906 (assoc-ref %outputs "out"))
5907 (string-append "LIBBAM="
5908 (assoc-ref %build-inputs "samtools")
5909 "/lib/libbam.a")
5910 (string-append "SMITHLAB_CPP="
5911 (assoc-ref %build-inputs "smithlab-cpp")
5912 "/lib")
5913 "PROGS=preseq"
5914 "INCLUDEDIRS=$(SMITHLAB_CPP)/../include/smithlab-cpp $(SAMTOOLS_DIR)")))
5915 (inputs
5916 `(("gsl" ,gsl)
5917 ("samtools" ,samtools-0.1)
5918 ("smithlab-cpp" ,smithlab-cpp)
5919 ("zlib" ,zlib)))
5920 (home-page "http://smithlabresearch.org/software/preseq/")
5921 (synopsis "Program for analyzing library complexity")
5922 (description
5923 "The preseq package is aimed at predicting and estimating the complexity
5924 of a genomic sequencing library, equivalent to predicting and estimating the
5925 number of redundant reads from a given sequencing depth and how many will be
5926 expected from additional sequencing using an initial sequencing experiment.
5927 The estimates can then be used to examine the utility of further sequencing,
5928 optimize the sequencing depth, or to screen multiple libraries to avoid low
5929 complexity samples.")
5930 (license license:gpl3+)))
5931
5932 (define-public python-screed
5933 (package
5934 (name "python-screed")
5935 (version "1.0")
5936 (source
5937 (origin
5938 (method url-fetch)
5939 (uri (pypi-uri "screed" version))
5940 (sha256
5941 (base32
5942 "148vcb7w2wr6a4w6vs2bsxanbqibxfk490zbcbg4m61s8669zdjx"))))
5943 (build-system python-build-system)
5944 (arguments
5945 '(#:phases
5946 (modify-phases %standard-phases
5947 ;; Tests must be run after installation, as the "screed" command does
5948 ;; not exist right after building.
5949 (delete 'check)
5950 (add-after 'install 'check
5951 (lambda* (#:key inputs outputs #:allow-other-keys)
5952 (let ((out (assoc-ref outputs "out")))
5953 (setenv "PYTHONPATH"
5954 (string-append out "/lib/python"
5955 (string-take (string-take-right
5956 (assoc-ref inputs "python")
5957 5) 3)
5958 "/site-packages:"
5959 (getenv "PYTHONPATH")))
5960 (setenv "PATH" (string-append out "/bin:" (getenv "PATH"))))
5961 (invoke "python" "setup.py" "test")
5962 #t)))))
5963 (native-inputs
5964 `(("python-pytest" ,python-pytest)
5965 ("python-pytest-cov" ,python-pytest-cov)
5966 ("python-pytest-runner" ,python-pytest-runner)))
5967 (inputs
5968 `(("python-bz2file" ,python-bz2file)))
5969 (home-page "https://github.com/dib-lab/screed/")
5970 (synopsis "Short read sequence database utilities")
5971 (description "Screed parses FASTA and FASTQ files and generates databases.
5972 Values such as sequence name, sequence description, sequence quality and the
5973 sequence itself can be retrieved from these databases.")
5974 (license license:bsd-3)))
5975
5976 (define-public python2-screed
5977 (package-with-python2 python-screed))
5978
5979 (define-public sra-tools
5980 (package
5981 (name "sra-tools")
5982 (version "2.9.6")
5983 (source
5984 (origin
5985 (method git-fetch)
5986 (uri (git-reference
5987 (url "https://github.com/ncbi/sra-tools.git")
5988 (commit version)))
5989 (file-name (git-file-name name version))
5990 (sha256
5991 (base32
5992 "0vqzap68v81k0zif2mnqfy8pnw2nrhsg87p6mgq8qk3nk2jv2rgy"))))
5993 (build-system gnu-build-system)
5994 (arguments
5995 `(#:parallel-build? #f ; not supported
5996 #:tests? #f ; no "check" target
5997 #:make-flags
5998 (list (string-append "DEFAULT_CRT="
5999 (assoc-ref %build-inputs "ncbi-vdb")
6000 "/kfg/certs.kfg")
6001 (string-append "DEFAULT_KFG="
6002 (assoc-ref %build-inputs "ncbi-vdb")
6003 "/kfg/default.kfg")
6004 (string-append "VDB_LIBDIR="
6005 (assoc-ref %build-inputs "ncbi-vdb")
6006 ,(if (string-prefix? "x86_64"
6007 (or (%current-target-system)
6008 (%current-system)))
6009 "/lib64"
6010 "/lib32")))
6011 #:phases
6012 (modify-phases %standard-phases
6013 (add-before 'configure 'set-perl-search-path
6014 (lambda _
6015 ;; Work around "dotless @INC" build failure.
6016 (setenv "PERL5LIB"
6017 (string-append (getcwd) "/setup:"
6018 (getenv "PERL5LIB")))
6019 #t))
6020 (replace 'configure
6021 (lambda* (#:key inputs outputs #:allow-other-keys)
6022 ;; The build system expects a directory containing the sources and
6023 ;; raw build output of ncbi-vdb, including files that are not
6024 ;; installed. Since we are building against an installed version of
6025 ;; ncbi-vdb, the following modifications are needed.
6026 (substitute* "setup/konfigure.perl"
6027 ;; Make the configure script look for the "ilib" directory of
6028 ;; "ncbi-vdb" without first checking for the existence of a
6029 ;; matching library in its "lib" directory.
6030 (("^ my \\$f = File::Spec->catdir\\(\\$libdir, \\$lib\\);")
6031 "my $f = File::Spec->catdir($ilibdir, $ilib);")
6032 ;; Look for interface libraries in ncbi-vdb's "ilib" directory.
6033 (("my \\$ilibdir = File::Spec->catdir\\(\\$builddir, 'ilib'\\);")
6034 "my $ilibdir = File::Spec->catdir($dir, 'ilib');"))
6035
6036 ;; Dynamic linking
6037 (substitute* "tools/copycat/Makefile"
6038 (("smagic-static") "lmagic"))
6039
6040 ;; The 'configure' script doesn't recognize things like
6041 ;; '--enable-fast-install'.
6042 (invoke "./configure"
6043 (string-append "--build-prefix=" (getcwd) "/build")
6044 (string-append "--prefix=" (assoc-ref outputs "out"))
6045 (string-append "--debug")
6046 (string-append "--with-fuse-prefix="
6047 (assoc-ref inputs "fuse"))
6048 (string-append "--with-magic-prefix="
6049 (assoc-ref inputs "libmagic"))
6050 ;; TODO: building with libxml2 fails with linker errors
6051 ;; (string-append "--with-xml2-prefix="
6052 ;; (assoc-ref inputs "libxml2"))
6053 (string-append "--with-ncbi-vdb-sources="
6054 (assoc-ref inputs "ncbi-vdb"))
6055 (string-append "--with-ncbi-vdb-build="
6056 (assoc-ref inputs "ncbi-vdb"))
6057 (string-append "--with-ngs-sdk-prefix="
6058 (assoc-ref inputs "ngs-sdk"))
6059 (string-append "--with-hdf5-prefix="
6060 (assoc-ref inputs "hdf5")))
6061 #t)))))
6062 (native-inputs `(("perl" ,perl)))
6063 (inputs
6064 `(("ngs-sdk" ,ngs-sdk)
6065 ("ncbi-vdb" ,ncbi-vdb)
6066 ("libmagic" ,file)
6067 ("fuse" ,fuse)
6068 ("hdf5" ,hdf5)
6069 ("zlib" ,zlib)))
6070 (home-page
6071 "https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software")
6072 (synopsis "Tools and libraries for reading and writing sequencing data")
6073 (description
6074 "The SRA Toolkit from NCBI is a collection of tools and libraries for
6075 reading of sequencing files from the Sequence Read Archive (SRA) database and
6076 writing files into the .sra format.")
6077 (license license:public-domain)))
6078
6079 (define-public seqan
6080 (package
6081 (name "seqan")
6082 (version "2.4.0")
6083 (source (origin
6084 (method url-fetch)
6085 (uri (string-append "https://github.com/seqan/seqan/releases/"
6086 "download/seqan-v" version
6087 "/seqan-library-" version ".tar.xz"))
6088 (sha256
6089 (base32
6090 "19a1rlxx03qy1i1iriicly68w64yjxbv24g9gdywnfmq998v35yx"))))
6091 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
6092 ;; makes sense to split the outputs.
6093 (outputs '("out" "doc"))
6094 (build-system trivial-build-system)
6095 (arguments
6096 `(#:modules ((guix build utils))
6097 #:builder
6098 (begin
6099 (use-modules (guix build utils))
6100 (let ((tar (assoc-ref %build-inputs "tar"))
6101 (xz (assoc-ref %build-inputs "xz"))
6102 (out (assoc-ref %outputs "out"))
6103 (doc (assoc-ref %outputs "doc")))
6104 (setenv "PATH" (string-append tar "/bin:" xz "/bin"))
6105 (invoke "tar" "xvf" (assoc-ref %build-inputs "source"))
6106 (chdir (string-append "seqan-library-" ,version))
6107 (copy-recursively "include" (string-append out "/include"))
6108 (copy-recursively "share" (string-append doc "/share"))
6109 #t))))
6110 (native-inputs
6111 `(("source" ,source)
6112 ("tar" ,tar)
6113 ("xz" ,xz)))
6114 (home-page "http://www.seqan.de")
6115 (synopsis "Library for nucleotide sequence analysis")
6116 (description
6117 "SeqAn is a C++ library of efficient algorithms and data structures for
6118 the analysis of sequences with the focus on biological data. It contains
6119 algorithms and data structures for string representation and their
6120 manipulation, online and indexed string search, efficient I/O of
6121 bioinformatics file formats, sequence alignment, and more.")
6122 (license license:bsd-3)))
6123
6124 (define-public seqan-1
6125 (package (inherit seqan)
6126 (name "seqan")
6127 (version "1.4.2")
6128 (source (origin
6129 (method url-fetch)
6130 (uri (string-append "http://packages.seqan.de/seqan-library/"
6131 "seqan-library-" version ".tar.bz2"))
6132 (sha256
6133 (base32
6134 "05s3wrrwn50f81aklfm65i4a749zag1vr8z03k21xm0pdxy47yvp"))))
6135 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
6136 ;; makes sense to split the outputs.
6137 (outputs '("out" "doc"))
6138 (build-system trivial-build-system)
6139 (arguments
6140 `(#:modules ((guix build utils))
6141 #:builder
6142 (begin
6143 (use-modules (guix build utils))
6144 (let ((tar (assoc-ref %build-inputs "tar"))
6145 (bzip (assoc-ref %build-inputs "bzip2"))
6146 (out (assoc-ref %outputs "out"))
6147 (doc (assoc-ref %outputs "doc")))
6148 (setenv "PATH" (string-append tar "/bin:" bzip "/bin"))
6149 (invoke "tar" "xvf" (assoc-ref %build-inputs "source"))
6150 (chdir (string-append "seqan-library-" ,version))
6151 (copy-recursively "include" (string-append out "/include"))
6152 (copy-recursively "share" (string-append doc "/share"))
6153 #t))))
6154 (native-inputs
6155 `(("source" ,source)
6156 ("tar" ,tar)
6157 ("bzip2" ,bzip2)))))
6158
6159 (define-public seqmagick
6160 (package
6161 (name "seqmagick")
6162 (version "0.7.0")
6163 (source
6164 (origin
6165 (method url-fetch)
6166 (uri (pypi-uri "seqmagick" version))
6167 (sha256
6168 (base32
6169 "12bfyp8nqi0hd36rmj450aygafp01qy3hkbvlwn3bk39pyjjkgg5"))))
6170 (build-system python-build-system)
6171 (inputs
6172 `(("python-biopython" ,python-biopython)))
6173 (native-inputs
6174 `(("python-nose" ,python-nose)))
6175 (home-page "https://github.com/fhcrc/seqmagick")
6176 (synopsis "Tools for converting and modifying sequence files")
6177 (description
6178 "Bioinformaticians often have to convert sequence files between formats
6179 and do little manipulations on them, and it's not worth writing scripts for
6180 that. Seqmagick is a utility to expose the file format conversion in
6181 BioPython in a convenient way. Instead of having a big mess of scripts, there
6182 is one that takes arguments.")
6183 (license license:gpl3)))
6184
6185 (define-public seqtk
6186 (package
6187 (name "seqtk")
6188 (version "1.3")
6189 (source (origin
6190 (method git-fetch)
6191 (uri (git-reference
6192 (url "https://github.com/lh3/seqtk.git")
6193 (commit (string-append "v" version))))
6194 (file-name (git-file-name name version))
6195 (sha256
6196 (base32
6197 "1bfzlqa84b5s1qi22blmmw2s8xdyp9h9ydcq22pfjhh5gab3yz6l"))))
6198 (build-system gnu-build-system)
6199 (arguments
6200 `(#:phases
6201 (modify-phases %standard-phases
6202 (delete 'configure)
6203 (replace 'check
6204 ;; There are no tests, so we just run a sanity check.
6205 (lambda _ (invoke "./seqtk" "seq") #t))
6206 (replace 'install
6207 (lambda* (#:key outputs #:allow-other-keys)
6208 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6209 (install-file "seqtk" bin)
6210 #t))))))
6211 (inputs
6212 `(("zlib" ,zlib)))
6213 (home-page "https://github.com/lh3/seqtk")
6214 (synopsis "Toolkit for processing biological sequences in FASTA/Q format")
6215 (description
6216 "Seqtk is a fast and lightweight tool for processing sequences in the
6217 FASTA or FASTQ format. It parses both FASTA and FASTQ files which can be
6218 optionally compressed by gzip.")
6219 (license license:expat)))
6220
6221 (define-public snap-aligner
6222 (package
6223 (name "snap-aligner")
6224 (version "1.0beta.18")
6225 (source (origin
6226 (method git-fetch)
6227 (uri (git-reference
6228 (url "https://github.com/amplab/snap.git")
6229 (commit (string-append "v" version))))
6230 (file-name (git-file-name name version))
6231 (sha256
6232 (base32
6233 "01w3qq4wm07z73vky0cfwlmrbf50n3w722cxrlzxfi99mnb808d8"))))
6234 (build-system gnu-build-system)
6235 (arguments
6236 '(#:phases
6237 (modify-phases %standard-phases
6238 (delete 'configure)
6239 (replace 'check (lambda _ (invoke "./unit_tests") #t))
6240 (replace 'install
6241 (lambda* (#:key outputs #:allow-other-keys)
6242 (let* ((out (assoc-ref outputs "out"))
6243 (bin (string-append out "/bin")))
6244 (install-file "snap-aligner" bin)
6245 (install-file "SNAPCommand" bin)
6246 #t))))))
6247 (native-inputs
6248 `(("zlib" ,zlib)))
6249 (home-page "http://snap.cs.berkeley.edu/")
6250 (synopsis "Short read DNA sequence aligner")
6251 (description
6252 "SNAP is a fast and accurate aligner for short DNA reads. It is
6253 optimized for modern read lengths of 100 bases or higher, and takes advantage
6254 of these reads to align data quickly through a hash-based indexing scheme.")
6255 ;; 32-bit systems are not supported by the unpatched code.
6256 ;; Following the bug reports https://github.com/amplab/snap/issues/68 and
6257 ;; https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=812378 we see that
6258 ;; systems without a lot of memory cannot make good use of this program.
6259 (supported-systems '("x86_64-linux"))
6260 (license license:asl2.0)))
6261
6262 (define-public sortmerna
6263 (package
6264 (name "sortmerna")
6265 (version "2.1b")
6266 (source
6267 (origin
6268 (method git-fetch)
6269 (uri (git-reference
6270 (url "https://github.com/biocore/sortmerna.git")
6271 (commit version)))
6272 (file-name (git-file-name name version))
6273 (sha256
6274 (base32
6275 "0j3mbz4n25738yijmjbr5r4fyvkgm8v5vn3sshyfvmyqf5q9byqf"))))
6276 (build-system gnu-build-system)
6277 (outputs '("out" ;for binaries
6278 "db")) ;for sequence databases
6279 (arguments
6280 `(#:phases
6281 (modify-phases %standard-phases
6282 (replace 'install
6283 (lambda* (#:key outputs #:allow-other-keys)
6284 (let* ((out (assoc-ref outputs "out"))
6285 (bin (string-append out "/bin"))
6286 (db (assoc-ref outputs "db"))
6287 (share
6288 (string-append db "/share/sortmerna/rRNA_databases")))
6289 (install-file "sortmerna" bin)
6290 (install-file "indexdb_rna" bin)
6291 (for-each (lambda (file)
6292 (install-file file share))
6293 (find-files "rRNA_databases" ".*fasta"))
6294 #t))))))
6295 (inputs
6296 `(("zlib" ,zlib)))
6297 (home-page "https://bioinfo.lifl.fr/RNA/sortmerna/")
6298 (synopsis "Biological sequence analysis tool for NGS reads")
6299 (description
6300 "SortMeRNA is a biological sequence analysis tool for filtering, mapping
6301 and operational taxonomic unit (OTU) picking of next generation
6302 sequencing (NGS) reads. The core algorithm is based on approximate seeds and
6303 allows for fast and sensitive analyses of nucleotide sequences. The main
6304 application of SortMeRNA is filtering rRNA from metatranscriptomic data.")
6305 ;; The source includes x86 specific code
6306 (supported-systems '("x86_64-linux" "i686-linux"))
6307 (license license:lgpl3)))
6308
6309 (define-public star
6310 (package
6311 (name "star")
6312 (version "2.7.1a")
6313 (source (origin
6314 (method git-fetch)
6315 (uri (git-reference
6316 (url "https://github.com/alexdobin/STAR.git")
6317 (commit version)))
6318 (file-name (string-append name "-" version "-checkout"))
6319 (sha256
6320 (base32
6321 "0n6g4s4hgw7qygs1z97j7a2dgz8gfaa4cv5pjvvvmarvk0x07hyg"))
6322 (modules '((guix build utils)))
6323 (snippet
6324 '(begin
6325 (substitute* "source/Makefile"
6326 (("/bin/rm") "rm"))
6327 ;; Remove pre-built binaries and bundled htslib sources.
6328 (delete-file-recursively "bin/MacOSX_x86_64")
6329 (delete-file-recursively "bin/Linux_x86_64")
6330 (delete-file-recursively "bin/Linux_x86_64_static")
6331 (delete-file-recursively "source/htslib")
6332 #t))))
6333 (build-system gnu-build-system)
6334 (arguments
6335 '(#:tests? #f ;no check target
6336 #:make-flags '("STAR")
6337 #:phases
6338 (modify-phases %standard-phases
6339 (add-after 'unpack 'enter-source-dir
6340 (lambda _ (chdir "source") #t))
6341 (add-after 'enter-source-dir 'make-reproducible
6342 (lambda _
6343 (substitute* "Makefile"
6344 (("(COMPILATION_TIME_PLACE=\")(.*)(\")" _ pre mid post)
6345 (string-append pre "Built with Guix" post)))
6346 #t))
6347 ;; See https://github.com/alexdobin/STAR/pull/562
6348 (add-after 'enter-source-dir 'add-missing-header
6349 (lambda _
6350 (substitute* "SoloReadFeature_inputRecords.cpp"
6351 (("#include \"binarySearch2.h\"" h)
6352 (string-append h "\n#include <math.h>")))
6353 #t))
6354 (add-after 'enter-source-dir 'do-not-use-bundled-htslib
6355 (lambda _
6356 (substitute* "Makefile"
6357 (("(Depend.list: \\$\\(SOURCES\\) parametersDefault\\.xxd) htslib"
6358 _ prefix) prefix))
6359 (substitute* '("BAMfunctions.cpp"
6360 "signalFromBAM.h"
6361 "bam_cat.h"
6362 "bam_cat.c"
6363 "STAR.cpp"
6364 "bamRemoveDuplicates.cpp")
6365 (("#include \"htslib/([^\"]+\\.h)\"" _ header)
6366 (string-append "#include <" header ">")))
6367 (substitute* "IncludeDefine.h"
6368 (("\"htslib/(htslib/[^\"]+.h)\"" _ header)
6369 (string-append "<" header ">")))
6370 #t))
6371 (replace 'install
6372 (lambda* (#:key outputs #:allow-other-keys)
6373 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6374 (install-file "STAR" bin))
6375 #t))
6376 (delete 'configure))))
6377 (native-inputs
6378 `(("xxd" ,xxd)))
6379 (inputs
6380 `(("htslib" ,htslib)
6381 ("zlib" ,zlib)))
6382 (home-page "https://github.com/alexdobin/STAR")
6383 (synopsis "Universal RNA-seq aligner")
6384 (description
6385 "The Spliced Transcripts Alignment to a Reference (STAR) software is
6386 based on a previously undescribed RNA-seq alignment algorithm that uses
6387 sequential maximum mappable seed search in uncompressed suffix arrays followed
6388 by seed clustering and stitching procedure. In addition to unbiased de novo
6389 detection of canonical junctions, STAR can discover non-canonical splices and
6390 chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
6391 sequences.")
6392 ;; Only 64-bit systems are supported according to the README.
6393 (supported-systems '("x86_64-linux" "mips64el-linux"))
6394 ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
6395 (license license:gpl3+)))
6396
6397 (define-public starlong
6398 (package (inherit star)
6399 (name "starlong")
6400 (arguments
6401 (substitute-keyword-arguments (package-arguments star)
6402 ((#:make-flags flags)
6403 `(list "STARlong"))
6404 ((#:phases phases)
6405 `(modify-phases ,phases
6406 ;; Allow extra long sequence reads.
6407 (add-after 'unpack 'make-extra-long
6408 (lambda _
6409 (substitute* "source/IncludeDefine.h"
6410 (("(#define DEF_readNameLengthMax ).*" _ match)
6411 (string-append match "900000\n")))
6412 #t))
6413 (replace 'install
6414 (lambda* (#:key outputs #:allow-other-keys)
6415 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6416 (install-file "STARlong" bin))
6417 #t))))))))
6418
6419 (define-public subread
6420 (package
6421 (name "subread")
6422 (version "1.6.0")
6423 (source (origin
6424 (method url-fetch)
6425 (uri (string-append "mirror://sourceforge/subread/subread-"
6426 version "/subread-" version "-source.tar.gz"))
6427 (sha256
6428 (base32
6429 "0ah0n4jx6ksk2m2j7xk385x2qzmk1y4rfc6a4mfrdqrlq721w99i"))))
6430 (build-system gnu-build-system)
6431 (arguments
6432 `(#:tests? #f ;no "check" target
6433 ;; The CC and CCFLAGS variables are set to contain a lot of x86_64
6434 ;; optimizations by default, so we override these flags such that x86_64
6435 ;; flags are only added when the build target is an x86_64 system.
6436 #:make-flags
6437 (list (let ((system ,(or (%current-target-system)
6438 (%current-system)))
6439 (flags '("-ggdb" "-fomit-frame-pointer"
6440 "-ffast-math" "-funroll-loops"
6441 "-fmessage-length=0"
6442 "-O9" "-Wall" "-DMAKE_FOR_EXON"
6443 "-DMAKE_STANDALONE"
6444 "-DSUBREAD_VERSION=\\\"${SUBREAD_VERSION}\\\""))
6445 (flags64 '("-mmmx" "-msse" "-msse2" "-msse3")))
6446 (if (string-prefix? "x86_64" system)
6447 (string-append "CCFLAGS=" (string-join (append flags flags64)))
6448 (string-append "CCFLAGS=" (string-join flags))))
6449 "-f" "Makefile.Linux"
6450 "CC=gcc ${CCFLAGS}")
6451 #:phases
6452 (modify-phases %standard-phases
6453 (add-after 'unpack 'enter-dir
6454 (lambda _ (chdir "src") #t))
6455 (replace 'install
6456 (lambda* (#:key outputs #:allow-other-keys)
6457 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6458 (mkdir-p bin)
6459 (copy-recursively "../bin" bin))
6460 #t))
6461 ;; no "configure" script
6462 (delete 'configure))))
6463 (inputs `(("zlib" ,zlib)))
6464 (home-page "http://bioinf.wehi.edu.au/subread-package/")
6465 (synopsis "Tool kit for processing next-gen sequencing data")
6466 (description
6467 "The subread package contains the following tools: subread aligner, a
6468 general-purpose read aligner; subjunc aligner: detecting exon-exon junctions
6469 and mapping RNA-seq reads; featureCounts: counting mapped reads for genomic
6470 features; exactSNP: a SNP caller that discovers SNPs by testing signals
6471 against local background noises.")
6472 (license license:gpl3+)))
6473
6474 (define-public stringtie
6475 (package
6476 (name "stringtie")
6477 (version "1.2.1")
6478 (source (origin
6479 (method url-fetch)
6480 (uri (string-append "http://ccb.jhu.edu/software/stringtie/dl/"
6481 "stringtie-" version ".tar.gz"))
6482 (sha256
6483 (base32
6484 "1cqllsc1maq4kh92isi8yadgzbmnf042hlnalpk3y59aph1z3bfz"))
6485 (modules '((guix build utils)))
6486 (snippet
6487 '(begin
6488 (delete-file-recursively "samtools-0.1.18")
6489 #t))))
6490 (build-system gnu-build-system)
6491 (arguments
6492 `(#:tests? #f ;no test suite
6493 #:phases
6494 (modify-phases %standard-phases
6495 ;; no configure script
6496 (delete 'configure)
6497 (add-before 'build 'use-system-samtools
6498 (lambda _
6499 (substitute* "Makefile"
6500 (("stringtie: \\$\\{BAM\\}/libbam\\.a")
6501 "stringtie: "))
6502 (substitute* '("gclib/GBam.h"
6503 "gclib/GBam.cpp")
6504 (("#include \"(bam|sam|kstring).h\"" _ header)
6505 (string-append "#include <samtools/" header ".h>")))
6506 #t))
6507 (add-after 'unpack 'remove-duplicate-typedef
6508 (lambda _
6509 ;; This typedef conflicts with the typedef in
6510 ;; glibc-2.25/include/bits/types.h
6511 (substitute* "gclib/GThreads.h"
6512 (("typedef long long __intmax_t;") ""))
6513 #t))
6514 (replace 'install
6515 (lambda* (#:key outputs #:allow-other-keys)
6516 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6517 (install-file "stringtie" bin)
6518 #t))))))
6519 (inputs
6520 `(("samtools" ,samtools-0.1)
6521 ("zlib" ,zlib)))
6522 (home-page "http://ccb.jhu.edu/software/stringtie/")
6523 (synopsis "Transcript assembly and quantification for RNA-Seq data")
6524 (description
6525 "StringTie is a fast and efficient assembler of RNA-Seq sequence
6526 alignments into potential transcripts. It uses a novel network flow algorithm
6527 as well as an optional de novo assembly step to assemble and quantitate
6528 full-length transcripts representing multiple splice variants for each gene
6529 locus. Its input can include not only the alignments of raw reads used by
6530 other transcript assemblers, but also alignments of longer sequences that have
6531 been assembled from those reads. To identify differentially expressed genes
6532 between experiments, StringTie's output can be processed either by the
6533 Cuffdiff or Ballgown programs.")
6534 (license license:artistic2.0)))
6535
6536 (define-public taxtastic
6537 (package
6538 (name "taxtastic")
6539 (version "0.8.11")
6540 (source (origin
6541 ;; The Pypi version does not include tests.
6542 (method git-fetch)
6543 (uri (git-reference
6544 (url "https://github.com/fhcrc/taxtastic.git")
6545 (commit (string-append "v" version))))
6546 (file-name (git-file-name name version))
6547 (sha256
6548 (base32
6549 "1sv8mkg64jn7zdwf1jj71c16686yrwxk0apb1l8sjszy9p166g0p"))))
6550 (build-system python-build-system)
6551 (arguments
6552 `(#:phases
6553 (modify-phases %standard-phases
6554 (add-after 'unpack 'prepare-directory
6555 (lambda _
6556 ;; The git checkout must be writable for tests.
6557 (for-each make-file-writable (find-files "."))
6558 ;; This test fails, but the error is not caught by the test
6559 ;; framework, so the tests fail...
6560 (substitute* "tests/test_taxit.py"
6561 (("self.cmd_fails\\(''\\)")
6562 "self.cmd_fails('nothing')"))
6563 ;; This version file is expected to be created with git describe.
6564 (mkdir-p "taxtastic/data")
6565 (with-output-to-file "taxtastic/data/ver"
6566 (lambda () (display ,version)))
6567 #t))
6568 (add-after 'unpack 'python37-compatibility
6569 (lambda _
6570 (substitute* "taxtastic/utils.py"
6571 (("import csv") "import csv, errno")
6572 (("os.errno") "errno"))
6573 #t))
6574 (replace 'check
6575 ;; Note, this fails to run with "-v" as it tries to write to a
6576 ;; closed output stream.
6577 (lambda _ (invoke "python" "-m" "unittest") #t)))))
6578 (propagated-inputs
6579 `(("python-sqlalchemy" ,python-sqlalchemy)
6580 ("python-decorator" ,python-decorator)
6581 ("python-biopython" ,python-biopython)
6582 ("python-pandas" ,python-pandas)
6583 ("python-psycopg2" ,python-psycopg2)
6584 ("python-fastalite" ,python-fastalite)
6585 ("python-pyyaml" ,python-pyyaml)
6586 ("python-six" ,python-six)
6587 ("python-jinja2" ,python-jinja2)
6588 ("python-dendropy" ,python-dendropy)))
6589 (home-page "https://github.com/fhcrc/taxtastic")
6590 (synopsis "Tools for taxonomic naming and annotation")
6591 (description
6592 "Taxtastic is software written in python used to build and maintain
6593 reference packages i.e. collections of reference trees, reference alignments,
6594 profiles, and associated taxonomic information.")
6595 (license license:gpl3+)))
6596
6597 (define-public vcftools
6598 (package
6599 (name "vcftools")
6600 (version "0.1.16")
6601 (source (origin
6602 (method url-fetch)
6603 (uri (string-append
6604 "https://github.com/vcftools/vcftools/releases/download/v"
6605 version "/vcftools-" version ".tar.gz"))
6606 (sha256
6607 (base32
6608 "1qqlx7flfv7axrjwkaz6njkscsl1d0jw98ns8d8bh1n1hd1pgz6v"))))
6609 (build-system gnu-build-system)
6610 (arguments
6611 `(#:tests? #f ; no "check" target
6612 #:make-flags (list
6613 "CFLAGS=-O2" ; override "-m64" flag
6614 (string-append "PREFIX=" (assoc-ref %outputs "out"))
6615 (string-append "MANDIR=" (assoc-ref %outputs "out")
6616 "/share/man/man1"))))
6617 (native-inputs
6618 `(("pkg-config" ,pkg-config)))
6619 (inputs
6620 `(("perl" ,perl)
6621 ("zlib" ,zlib)))
6622 (home-page "https://vcftools.github.io/")
6623 (synopsis "Tools for working with VCF files")
6624 (description
6625 "VCFtools is a program package designed for working with VCF files, such
6626 as those generated by the 1000 Genomes Project. The aim of VCFtools is to
6627 provide easily accessible methods for working with complex genetic variation
6628 data in the form of VCF files.")
6629 ;; The license is declared as LGPLv3 in the README and
6630 ;; at https://vcftools.github.io/license.html
6631 (license license:lgpl3)))
6632
6633 (define-public infernal
6634 (package
6635 (name "infernal")
6636 (version "1.1.3")
6637 (source (origin
6638 (method url-fetch)
6639 (uri (string-append "http://eddylab.org/software/infernal/"
6640 "infernal-" version ".tar.gz"))
6641 (sha256
6642 (base32
6643 "0pm8bm3s6nfa0av4x6m6h27lsg12b3lz3jm0fyh1mc77l2isd61v"))))
6644 (build-system gnu-build-system)
6645 (native-inputs
6646 `(("perl" ,perl)
6647 ("python" ,python))) ; for tests
6648 (home-page "http://eddylab.org/infernal/")
6649 (synopsis "Inference of RNA alignments")
6650 (description "Infernal (\"INFERence of RNA ALignment\") is a tool for
6651 searching DNA sequence databases for RNA structure and sequence similarities.
6652 It is an implementation of a special case of profile stochastic context-free
6653 grammars called @dfn{covariance models} (CMs). A CM is like a sequence
6654 profile, but it scores a combination of sequence consensus and RNA secondary
6655 structure consensus, so in many cases, it is more capable of identifying RNA
6656 homologs that conserve their secondary structure more than their primary
6657 sequence.")
6658 ;; Infernal 1.1.3 requires VMX or SSE capability for parallel instructions.
6659 (supported-systems '("i686-linux" "x86_64-linux"))
6660 (license license:bsd-3)))
6661
6662 (define-public r-scde
6663 (package
6664 (name "r-scde")
6665 (version "1.99.2")
6666 (source (origin
6667 (method git-fetch)
6668 (uri (git-reference
6669 (url "https://github.com/hms-dbmi/scde.git")
6670 (commit version)))
6671 (file-name (git-file-name name version))
6672 (sha256
6673 (base32
6674 "10na2gyka24mszdxf92wz9h2c13hdf1ww30c68gfsw53lvvhhhxb"))))
6675 (build-system r-build-system)
6676 (propagated-inputs
6677 `(("r-rcpp" ,r-rcpp)
6678 ("r-rcpparmadillo" ,r-rcpparmadillo)
6679 ("r-mgcv" ,r-mgcv)
6680 ("r-rook" ,r-rook)
6681 ("r-rjson" ,r-rjson)
6682 ("r-cairo" ,r-cairo)
6683 ("r-rcolorbrewer" ,r-rcolorbrewer)
6684 ("r-edger" ,r-edger)
6685 ("r-quantreg" ,r-quantreg)
6686 ("r-nnet" ,r-nnet)
6687 ("r-rmtstat" ,r-rmtstat)
6688 ("r-extremes" ,r-extremes)
6689 ("r-pcamethods" ,r-pcamethods)
6690 ("r-biocparallel" ,r-biocparallel)
6691 ("r-flexmix" ,r-flexmix)))
6692 (home-page "https://hms-dbmi.github.io/scde/")
6693 (synopsis "R package for analyzing single-cell RNA-seq data")
6694 (description "The SCDE package implements a set of statistical methods for
6695 analyzing single-cell RNA-seq data. SCDE fits individual error models for
6696 single-cell RNA-seq measurements. These models can then be used for
6697 assessment of differential expression between groups of cells, as well as
6698 other types of analysis. The SCDE package also contains the pagoda framework
6699 which applies pathway and gene set overdispersion analysis to identify aspects
6700 of transcriptional heterogeneity among single cells.")
6701 ;; See https://github.com/hms-dbmi/scde/issues/38
6702 (license license:gpl2)))
6703
6704 (define-public r-centipede
6705 (package
6706 (name "r-centipede")
6707 (version "1.2")
6708 (source (origin
6709 (method url-fetch)
6710 (uri (string-append "http://download.r-forge.r-project.org/"
6711 "src/contrib/CENTIPEDE_" version ".tar.gz"))
6712 (sha256
6713 (base32
6714 "1hsx6qgwr0i67fhy9257zj7s0ppncph2hjgbia5nn6nfmj0ax6l9"))))
6715 (build-system r-build-system)
6716 (home-page "http://centipede.uchicago.edu/")
6717 (synopsis "Predict transcription factor binding sites")
6718 (description
6719 "CENTIPEDE applies a hierarchical Bayesian mixture model to infer regions
6720 of the genome that are bound by particular transcription factors. It starts
6721 by identifying a set of candidate binding sites, and then aims to classify the
6722 sites according to whether each site is bound or not bound by a transcription
6723 factor. CENTIPEDE is an unsupervised learning algorithm that discriminates
6724 between two different types of motif instances using as much relevant
6725 information as possible.")
6726 (license (list license:gpl2+ license:gpl3+))))
6727
6728 (define-public r-genefilter
6729 (package
6730 (name "r-genefilter")
6731 (version "1.68.0")
6732 (source
6733 (origin
6734 (method url-fetch)
6735 (uri (bioconductor-uri "genefilter" version))
6736 (sha256
6737 (base32
6738 "1xjr02qhldspjwd6y374wgik18fgywb6408wsz471i8b4ik98ckc"))))
6739 (build-system r-build-system)
6740 (native-inputs
6741 `(("gfortran" ,gfortran)))
6742 (propagated-inputs
6743 `(("r-annotate" ,r-annotate)
6744 ("r-annotationdbi" ,r-annotationdbi)
6745 ("r-biobase" ,r-biobase)
6746 ("r-biocgenerics" ,r-biocgenerics)
6747 ("r-survival" ,r-survival)))
6748 (home-page "https://bioconductor.org/packages/genefilter")
6749 (synopsis "Filter genes from high-throughput experiments")
6750 (description
6751 "This package provides basic functions for filtering genes from
6752 high-throughput sequencing experiments.")
6753 (license license:artistic2.0)))
6754
6755 (define-public r-deseq2
6756 (package
6757 (name "r-deseq2")
6758 (version "1.26.0")
6759 (source
6760 (origin
6761 (method url-fetch)
6762 (uri (bioconductor-uri "DESeq2" version))
6763 (sha256
6764 (base32
6765 "1lmbhznfs8dz9ipd53z4ccwvwxqwzx1ayw56jlrvlsambaj8fash"))))
6766 (properties `((upstream-name . "DESeq2")))
6767 (build-system r-build-system)
6768 (propagated-inputs
6769 `(("r-biobase" ,r-biobase)
6770 ("r-biocgenerics" ,r-biocgenerics)
6771 ("r-biocparallel" ,r-biocparallel)
6772 ("r-genefilter" ,r-genefilter)
6773 ("r-geneplotter" ,r-geneplotter)
6774 ("r-genomicranges" ,r-genomicranges)
6775 ("r-ggplot2" ,r-ggplot2)
6776 ("r-hmisc" ,r-hmisc)
6777 ("r-iranges" ,r-iranges)
6778 ("r-locfit" ,r-locfit)
6779 ("r-rcpp" ,r-rcpp)
6780 ("r-rcpparmadillo" ,r-rcpparmadillo)
6781 ("r-s4vectors" ,r-s4vectors)
6782 ("r-summarizedexperiment" ,r-summarizedexperiment)))
6783 (home-page "https://bioconductor.org/packages/DESeq2")
6784 (synopsis "Differential gene expression analysis")
6785 (description
6786 "This package provides functions to estimate variance-mean dependence in
6787 count data from high-throughput nucleotide sequencing assays and test for
6788 differential expression based on a model using the negative binomial
6789 distribution.")
6790 (license license:lgpl3+)))
6791
6792 (define-public r-dexseq
6793 (package
6794 (name "r-dexseq")
6795 (version "1.32.0")
6796 (source
6797 (origin
6798 (method url-fetch)
6799 (uri (bioconductor-uri "DEXSeq" version))
6800 (sha256
6801 (base32
6802 "0sqqb65ckliif2nmvlvc7w49id59z0nvqcdz5gry8l2mn6azrf6a"))))
6803 (properties `((upstream-name . "DEXSeq")))
6804 (build-system r-build-system)
6805 (propagated-inputs
6806 `(("r-annotationdbi" ,r-annotationdbi)
6807 ("r-biobase" ,r-biobase)
6808 ("r-biocgenerics" ,r-biocgenerics)
6809 ("r-biocparallel" ,r-biocparallel)
6810 ("r-biomart" ,r-biomart)
6811 ("r-deseq2" ,r-deseq2)
6812 ("r-genefilter" ,r-genefilter)
6813 ("r-geneplotter" ,r-geneplotter)
6814 ("r-genomicranges" ,r-genomicranges)
6815 ("r-hwriter" ,r-hwriter)
6816 ("r-iranges" ,r-iranges)
6817 ("r-rcolorbrewer" ,r-rcolorbrewer)
6818 ("r-rsamtools" ,r-rsamtools)
6819 ("r-s4vectors" ,r-s4vectors)
6820 ("r-statmod" ,r-statmod)
6821 ("r-stringr" ,r-stringr)
6822 ("r-summarizedexperiment" ,r-summarizedexperiment)))
6823 (home-page "https://bioconductor.org/packages/DEXSeq")
6824 (synopsis "Inference of differential exon usage in RNA-Seq")
6825 (description
6826 "This package is focused on finding differential exon usage using RNA-seq
6827 exon counts between samples with different experimental designs. It provides
6828 functions that allows the user to make the necessary statistical tests based
6829 on a model that uses the negative binomial distribution to estimate the
6830 variance between biological replicates and generalized linear models for
6831 testing. The package also provides functions for the visualization and
6832 exploration of the results.")
6833 (license license:gpl3+)))
6834
6835 (define-public r-annotationforge
6836 (package
6837 (name "r-annotationforge")
6838 (version "1.28.0")
6839 (source
6840 (origin
6841 (method url-fetch)
6842 (uri (bioconductor-uri "AnnotationForge" version))
6843 (sha256
6844 (base32
6845 "0h2r60v339ajk5r6xq4iwwcpihdvf12fi8255byr2dhglzrd8xl7"))))
6846 (properties
6847 `((upstream-name . "AnnotationForge")))
6848 (build-system r-build-system)
6849 (propagated-inputs
6850 `(("r-annotationdbi" ,r-annotationdbi)
6851 ("r-biobase" ,r-biobase)
6852 ("r-biocgenerics" ,r-biocgenerics)
6853 ("r-dbi" ,r-dbi)
6854 ("r-rcurl" ,r-rcurl)
6855 ("r-rsqlite" ,r-rsqlite)
6856 ("r-s4vectors" ,r-s4vectors)
6857 ("r-xml" ,r-xml)))
6858 (home-page "https://bioconductor.org/packages/AnnotationForge")
6859 (synopsis "Code for building annotation database packages")
6860 (description
6861 "This package provides code for generating Annotation packages and their
6862 databases. Packages produced are intended to be used with AnnotationDbi.")
6863 (license license:artistic2.0)))
6864
6865 (define-public r-rbgl
6866 (package
6867 (name "r-rbgl")
6868 (version "1.62.1")
6869 (source
6870 (origin
6871 (method url-fetch)
6872 (uri (bioconductor-uri "RBGL" version))
6873 (sha256
6874 (base32
6875 "0ixbkdirf08i400db587r262j52sdwdgk3884nxk5xkbhx2m6vg0"))))
6876 (properties `((upstream-name . "RBGL")))
6877 (build-system r-build-system)
6878 (propagated-inputs
6879 `(("r-bh" ,r-bh)
6880 ("r-graph" ,r-graph)))
6881 (home-page "https://www.bioconductor.org/packages/RBGL")
6882 (synopsis "Interface to the Boost graph library")
6883 (description
6884 "This package provides a fairly extensive and comprehensive interface to
6885 the graph algorithms contained in the Boost library.")
6886 (license license:artistic2.0)))
6887
6888 (define-public r-gseabase
6889 (package
6890 (name "r-gseabase")
6891 (version "1.48.0")
6892 (source
6893 (origin
6894 (method url-fetch)
6895 (uri (bioconductor-uri "GSEABase" version))
6896 (sha256
6897 (base32
6898 "0kcb90sxlf19d5dxhdbqk9x62svky4262cccl0wxarbq6gf3bd57"))))
6899 (properties `((upstream-name . "GSEABase")))
6900 (build-system r-build-system)
6901 (propagated-inputs
6902 `(("r-annotate" ,r-annotate)
6903 ("r-annotationdbi" ,r-annotationdbi)
6904 ("r-biobase" ,r-biobase)
6905 ("r-biocgenerics" ,r-biocgenerics)
6906 ("r-graph" ,r-graph)
6907 ("r-xml" ,r-xml)))
6908 (home-page "https://bioconductor.org/packages/GSEABase")
6909 (synopsis "Gene set enrichment data structures and methods")
6910 (description
6911 "This package provides classes and methods to support @dfn{Gene Set
6912 Enrichment Analysis} (GSEA).")
6913 (license license:artistic2.0)))
6914
6915 (define-public r-category
6916 (package
6917 (name "r-category")
6918 (version "2.52.1")
6919 (source
6920 (origin
6921 (method url-fetch)
6922 (uri (bioconductor-uri "Category" version))
6923 (sha256
6924 (base32
6925 "0ga0ij9hyzsxa5pavwmv35i8xggia2wygrk4m4z8an0qcvgy3v5g"))))
6926 (properties `((upstream-name . "Category")))
6927 (build-system r-build-system)
6928 (propagated-inputs
6929 `(("r-annotate" ,r-annotate)
6930 ("r-annotationdbi" ,r-annotationdbi)
6931 ("r-biobase" ,r-biobase)
6932 ("r-biocgenerics" ,r-biocgenerics)
6933 ("r-genefilter" ,r-genefilter)
6934 ("r-graph" ,r-graph)
6935 ("r-gseabase" ,r-gseabase)
6936 ("r-matrix" ,r-matrix)
6937 ("r-rbgl" ,r-rbgl)
6938 ("r-dbi" ,r-dbi)))
6939 (home-page "https://bioconductor.org/packages/Category")
6940 (synopsis "Category analysis")
6941 (description
6942 "This package provides a collection of tools for performing category
6943 analysis.")
6944 (license license:artistic2.0)))
6945
6946 (define-public r-gostats
6947 (package
6948 (name "r-gostats")
6949 (version "2.52.0")
6950 (source
6951 (origin
6952 (method url-fetch)
6953 (uri (bioconductor-uri "GOstats" version))
6954 (sha256
6955 (base32
6956 "19f4gxm3sbprqrnwzvskvywv6j4ibm8xkrbgg4h0fvh2b5331nwc"))))
6957 (properties `((upstream-name . "GOstats")))
6958 (build-system r-build-system)
6959 (propagated-inputs
6960 `(("r-annotate" ,r-annotate)
6961 ("r-annotationdbi" ,r-annotationdbi)
6962 ("r-annotationforge" ,r-annotationforge)
6963 ("r-biobase" ,r-biobase)
6964 ("r-category" ,r-category)
6965 ("r-go-db" ,r-go-db)
6966 ("r-graph" ,r-graph)
6967 ("r-rgraphviz" ,r-rgraphviz)
6968 ("r-rbgl" ,r-rbgl)))
6969 (home-page "https://bioconductor.org/packages/GOstats")
6970 (synopsis "Tools for manipulating GO and microarrays")
6971 (description
6972 "This package provides a set of tools for interacting with GO and
6973 microarray data. A variety of basic manipulation tools for graphs, hypothesis
6974 testing and other simple calculations.")
6975 (license license:artistic2.0)))
6976
6977 (define-public r-shortread
6978 (package
6979 (name "r-shortread")
6980 (version "1.44.3")
6981 (source
6982 (origin
6983 (method url-fetch)
6984 (uri (bioconductor-uri "ShortRead" version))
6985 (sha256
6986 (base32
6987 "0ykyrj4g6vc67d5s46sp4659qvar2iavflzhggm79w4p50hxia4s"))))
6988 (properties `((upstream-name . "ShortRead")))
6989 (build-system r-build-system)
6990 (inputs
6991 `(("zlib" ,zlib)))
6992 (propagated-inputs
6993 `(("r-biobase" ,r-biobase)
6994 ("r-biocgenerics" ,r-biocgenerics)
6995 ("r-biocparallel" ,r-biocparallel)
6996 ("r-biostrings" ,r-biostrings)
6997 ("r-genomeinfodb" ,r-genomeinfodb)
6998 ("r-genomicalignments" ,r-genomicalignments)
6999 ("r-genomicranges" ,r-genomicranges)
7000 ("r-hwriter" ,r-hwriter)
7001 ("r-iranges" ,r-iranges)
7002 ("r-lattice" ,r-lattice)
7003 ("r-latticeextra" ,r-latticeextra)
7004 ("r-rsamtools" ,r-rsamtools)
7005 ("r-s4vectors" ,r-s4vectors)
7006 ("r-xvector" ,r-xvector)
7007 ("r-zlibbioc" ,r-zlibbioc)))
7008 (home-page "https://bioconductor.org/packages/ShortRead")
7009 (synopsis "FASTQ input and manipulation tools")
7010 (description
7011 "This package implements sampling, iteration, and input of FASTQ files.
7012 It includes functions for filtering and trimming reads, and for generating a
7013 quality assessment report. Data are represented as
7014 @code{DNAStringSet}-derived objects, and easily manipulated for a diversity of
7015 purposes. The package also contains legacy support for early single-end,
7016 ungapped alignment formats.")
7017 (license license:artistic2.0)))
7018
7019 (define-public r-systempiper
7020 (package
7021 (name "r-systempiper")
7022 (version "1.20.0")
7023 (source
7024 (origin
7025 (method url-fetch)
7026 (uri (bioconductor-uri "systemPipeR" version))
7027 (sha256
7028 (base32
7029 "17r25v9wcglyma2v1c8fka80dm7fx86saxlsb2aprmwi4h3dhb0j"))))
7030 (properties `((upstream-name . "systemPipeR")))
7031 (build-system r-build-system)
7032 (propagated-inputs
7033 `(("r-annotate" ,r-annotate)
7034 ("r-batchtools" ,r-batchtools)
7035 ("r-biocgenerics" ,r-biocgenerics)
7036 ("r-biostrings" ,r-biostrings)
7037 ("r-deseq2" ,r-deseq2)
7038 ("r-edger" ,r-edger)
7039 ("r-genomicfeatures" ,r-genomicfeatures)
7040 ("r-genomicranges" ,r-genomicranges)
7041 ("r-ggplot2" ,r-ggplot2)
7042 ("r-go-db" ,r-go-db)
7043 ("r-gostats" ,r-gostats)
7044 ("r-limma" ,r-limma)
7045 ("r-pheatmap" ,r-pheatmap)
7046 ("r-rjson" ,r-rjson)
7047 ("r-rsamtools" ,r-rsamtools)
7048 ("r-shortread" ,r-shortread)
7049 ("r-summarizedexperiment" ,r-summarizedexperiment)
7050 ("r-yaml" ,r-yaml)
7051 ("r-variantannotation" ,r-variantannotation)))
7052 (home-page "https://github.com/tgirke/systemPipeR")
7053 (synopsis "Next generation sequencing workflow and reporting environment")
7054 (description
7055 "This R package provides tools for building and running automated
7056 end-to-end analysis workflows for a wide range of @dfn{next generation
7057 sequence} (NGS) applications such as RNA-Seq, ChIP-Seq, VAR-Seq and Ribo-Seq.
7058 Important features include a uniform workflow interface across different NGS
7059 applications, automated report generation, and support for running both R and
7060 command-line software, such as NGS aligners or peak/variant callers, on local
7061 computers or compute clusters. Efficient handling of complex sample sets and
7062 experimental designs is facilitated by a consistently implemented sample
7063 annotation infrastructure.")
7064 (license license:artistic2.0)))
7065
7066 (define-public r-grohmm
7067 (package
7068 (name "r-grohmm")
7069 (version "1.20.0")
7070 (source
7071 (origin
7072 (method url-fetch)
7073 (uri (bioconductor-uri "groHMM" version))
7074 (sha256
7075 (base32
7076 "0ywr8f6bfhg2ia3n4rmsxr4v1xqmlyhkwh0rvkgh3yqgnbvihndg"))))
7077 (properties `((upstream-name . "groHMM")))
7078 (build-system r-build-system)
7079 (propagated-inputs
7080 `(("r-genomeinfodb" ,r-genomeinfodb)
7081 ("r-genomicalignments" ,r-genomicalignments)
7082 ("r-genomicranges" ,r-genomicranges)
7083 ("r-iranges" ,r-iranges)
7084 ("r-mass" ,r-mass)
7085 ("r-rtracklayer" ,r-rtracklayer)
7086 ("r-s4vectors" ,r-s4vectors)))
7087 (home-page "https://github.com/Kraus-Lab/groHMM")
7088 (synopsis "GRO-seq analysis pipeline")
7089 (description
7090 "This package provides a pipeline for the analysis of GRO-seq data.")
7091 (license license:gpl3+)))
7092
7093 (define-public vsearch
7094 (package
7095 (name "vsearch")
7096 (version "2.9.1")
7097 (source
7098 (origin
7099 (method git-fetch)
7100 (uri (git-reference
7101 (url "https://github.com/torognes/vsearch.git")
7102 (commit (string-append "v" version))))
7103 (file-name (git-file-name name version))
7104 (sha256
7105 (base32
7106 "0vhrpjfdf75ba04b24xknp41790cvcgwl0vgpy7qbzj5xh2521ss"))
7107 (patches (search-patches "vsearch-unbundle-cityhash.patch"))
7108 (snippet
7109 '(begin
7110 ;; Remove bundled cityhash sources. The vsearch source is adjusted
7111 ;; for this in the patch.
7112 (delete-file "src/city.h")
7113 (delete-file "src/citycrc.h")
7114 (delete-file "src/city.cc")
7115 #t))))
7116 (build-system gnu-build-system)
7117 (inputs
7118 `(("zlib" ,zlib)
7119 ("bzip2" ,bzip2)
7120 ("cityhash" ,cityhash)))
7121 (native-inputs
7122 `(("autoconf" ,autoconf)
7123 ("automake" ,automake)))
7124 (synopsis "Sequence search tools for metagenomics")
7125 (description
7126 "VSEARCH supports DNA sequence searching, clustering, chimera detection,
7127 dereplication, pairwise alignment, shuffling, subsampling, sorting and
7128 masking. The tool takes advantage of parallelism in the form of SIMD
7129 vectorization as well as multiple threads to perform accurate alignments at
7130 high speed. VSEARCH uses an optimal global aligner (full dynamic programming
7131 Needleman-Wunsch).")
7132 (home-page "https://github.com/torognes/vsearch")
7133 ;; vsearch uses non-portable SSE intrinsics so building fails on other
7134 ;; platforms.
7135 (supported-systems '("x86_64-linux"))
7136 ;; Dual licensed; also includes public domain source.
7137 (license (list license:gpl3 license:bsd-2))))
7138
7139 (define-public pardre
7140 (package
7141 (name "pardre")
7142 ;; The source of 1.1.5 changed in place, so we append "-1" to the version.
7143 (version "1.1.5-1")
7144 (source
7145 (origin
7146 (method url-fetch)
7147 (uri (string-append "mirror://sourceforge/pardre/ParDRe-rel"
7148 "1.1.5" ".tar.gz"))
7149 (sha256
7150 (base32
7151 "17j73nc0viq4f6qj50nrndsrif5d6b71q8fl87m54psiv0ilns2b"))))
7152 (build-system gnu-build-system)
7153 (arguments
7154 `(#:tests? #f ; no tests included
7155 #:phases
7156 (modify-phases %standard-phases
7157 (delete 'configure)
7158 (replace 'install
7159 (lambda* (#:key outputs #:allow-other-keys)
7160 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
7161 (install-file "ParDRe" bin)
7162 #t))))))
7163 (inputs
7164 `(("openmpi" ,openmpi)
7165 ("zlib" ,zlib)))
7166 (synopsis "Parallel tool to remove duplicate DNA reads")
7167 (description
7168 "ParDRe is a parallel tool to remove duplicate genetic sequence reads.
7169 Duplicate reads can be seen as identical or nearly identical sequences with
7170 some mismatches. This tool lets users avoid the analysis of unnecessary
7171 reads, reducing the time of subsequent procedures with the
7172 dataset (e.g. assemblies, mappings, etc.). The tool is implemented with MPI
7173 in order to exploit the parallel capabilities of multicore clusters. It is
7174 faster than multithreaded counterparts (end of 2015) for the same number of
7175 cores and, thanks to the message-passing technology, it can be executed on
7176 clusters.")
7177 (home-page "https://sourceforge.net/projects/pardre/")
7178 (license license:gpl3+)))
7179
7180 (define-public ruby-bio-kseq
7181 (package
7182 (name "ruby-bio-kseq")
7183 (version "0.0.2")
7184 (source
7185 (origin
7186 (method url-fetch)
7187 (uri (rubygems-uri "bio-kseq" version))
7188 (sha256
7189 (base32
7190 "1xyaha46khb5jc6wzkbf7040jagac49jbimn0vcrzid0j8jdikrz"))))
7191 (build-system ruby-build-system)
7192 (arguments
7193 `(#:test-target "spec"))
7194 (native-inputs
7195 `(("bundler" ,bundler)
7196 ("ruby-rspec" ,ruby-rspec)
7197 ("ruby-rake-compiler" ,ruby-rake-compiler)))
7198 (inputs
7199 `(("zlib" ,zlib)))
7200 (synopsis "Ruby bindings for the kseq.h FASTA/Q parser")
7201 (description
7202 "@code{Bio::Kseq} provides ruby bindings to the @code{kseq.h} FASTA and
7203 FASTQ parsing code. It provides a fast iterator over sequences and their
7204 quality scores.")
7205 (home-page "https://github.com/gusevfe/bio-kseq")
7206 (license license:expat)))
7207
7208 (define-public bio-locus
7209 (package
7210 (name "bio-locus")
7211 (version "0.0.7")
7212 (source
7213 (origin
7214 (method url-fetch)
7215 (uri (rubygems-uri "bio-locus" version))
7216 (sha256
7217 (base32
7218 "02vmrxyimkj9sahsp4zhfhnmbvz6dbbqz1y01vglf8cbwvkajfl0"))))
7219 (build-system ruby-build-system)
7220 (native-inputs
7221 `(("ruby-rspec" ,ruby-rspec)))
7222 (synopsis "Tool for fast querying of genome locations")
7223 (description
7224 "Bio-locus is a tabix-like tool for fast querying of genome
7225 locations. Many file formats in bioinformatics contain records that
7226 start with a chromosome name and a position for a SNP, or a start-end
7227 position for indels. Bio-locus allows users to store this chr+pos or
7228 chr+pos+alt information in a database.")
7229 (home-page "https://github.com/pjotrp/bio-locus")
7230 (license license:expat)))
7231
7232 (define-public bio-blastxmlparser
7233 (package
7234 (name "bio-blastxmlparser")
7235 (version "2.0.4")
7236 (source (origin
7237 (method url-fetch)
7238 (uri (rubygems-uri "bio-blastxmlparser" version))
7239 (sha256
7240 (base32
7241 "1wf4qygcmdjgcqm6flmvsagfr1gs9lf63mj32qv3z1f481zc5692"))))
7242 (build-system ruby-build-system)
7243 (propagated-inputs
7244 `(("ruby-bio-logger" ,ruby-bio-logger)
7245 ("ruby-nokogiri" ,ruby-nokogiri)))
7246 (inputs
7247 `(("ruby-rspec" ,ruby-rspec)))
7248 (synopsis "Fast big data BLAST XML parser and library")
7249 (description
7250 "Very fast parallel big-data BLAST XML file parser which can be used as
7251 command line utility. Use blastxmlparser to: Parse BLAST XML; filter output;
7252 generate FASTA, JSON, YAML, RDF, JSON-LD, HTML, CSV, tabular output etc.")
7253 (home-page "https://github.com/pjotrp/blastxmlparser")
7254 (license license:expat)))
7255
7256 (define-public bioruby
7257 (package
7258 (name "bioruby")
7259 (version "1.5.2")
7260 (source
7261 (origin
7262 (method url-fetch)
7263 (uri (rubygems-uri "bio" version))
7264 (sha256
7265 (base32
7266 "1d56amdsjv1mag7m6gv2w0xij8hqx1v5xbdjsix8sp3yp36m7938"))))
7267 (build-system ruby-build-system)
7268 (propagated-inputs
7269 `(("ruby-libxml" ,ruby-libxml)))
7270 (native-inputs
7271 `(("which" ,which))) ; required for test phase
7272 (arguments
7273 `(#:phases
7274 (modify-phases %standard-phases
7275 (add-before 'build 'patch-test-command
7276 (lambda _
7277 (substitute* '("test/functional/bio/test_command.rb")
7278 (("/bin/sh") (which "sh")))
7279 (substitute* '("test/functional/bio/test_command.rb")
7280 (("/bin/ls") (which "ls")))
7281 (substitute* '("test/functional/bio/test_command.rb")
7282 (("which") (which "which")))
7283 (substitute* '("test/functional/bio/test_command.rb",
7284 "test/data/command/echoarg2.sh")
7285 (("/bin/echo") (which "echo")))
7286 #t)))))
7287 (synopsis "Ruby library, shell and utilities for bioinformatics")
7288 (description "BioRuby comes with a comprehensive set of Ruby development
7289 tools and libraries for bioinformatics and molecular biology. BioRuby has
7290 components for sequence analysis, pathway analysis, protein modelling and
7291 phylogenetic analysis; it supports many widely used data formats and provides
7292 easy access to databases, external programs and public web services, including
7293 BLAST, KEGG, GenBank, MEDLINE and GO.")
7294 (home-page "http://bioruby.org/")
7295 ;; Code is released under Ruby license, except for setup
7296 ;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+)
7297 (license (list license:ruby license:lgpl2.1+ license:gpl2+ ))))
7298
7299 (define-public r-biocviews
7300 (package
7301 (name "r-biocviews")
7302 (version "1.54.0")
7303 (source (origin
7304 (method url-fetch)
7305 (uri (bioconductor-uri "biocViews" version))
7306 (sha256
7307 (base32
7308 "0yn8jys1900d31haayz0ppqk5y79mwjajwp4alz6pln3dbs70f3g"))))
7309 (properties
7310 `((upstream-name . "biocViews")))
7311 (build-system r-build-system)
7312 (propagated-inputs
7313 `(("r-biobase" ,r-biobase)
7314 ("r-biocmanager" ,r-biocmanager)
7315 ("r-graph" ,r-graph)
7316 ("r-rbgl" ,r-rbgl)
7317 ("r-rcurl" ,r-rcurl)
7318 ("r-xml" ,r-xml)
7319 ("r-runit" ,r-runit)))
7320 (home-page "https://bioconductor.org/packages/biocViews")
7321 (synopsis "Bioconductor package categorization helper")
7322 (description "The purpose of biocViews is to create HTML pages that
7323 categorize packages in a Bioconductor package repository according to keywords,
7324 also known as views, in a controlled vocabulary.")
7325 (license license:artistic2.0)))
7326
7327 (define-public r-biocstyle
7328 (package
7329 (name "r-biocstyle")
7330 (version "2.14.4")
7331 (source (origin
7332 (method url-fetch)
7333 (uri (bioconductor-uri "BiocStyle" version))
7334 (sha256
7335 (base32
7336 "1x71in059zql40f4c87bd1gf96r945kdvwbq61jmch9d3d8nwxbb"))))
7337 (properties
7338 `((upstream-name . "BiocStyle")))
7339 (build-system r-build-system)
7340 (propagated-inputs
7341 `(("r-biocmanager" ,r-biocmanager)
7342 ("r-bookdown" ,r-bookdown)
7343 ("r-knitr" ,r-knitr)
7344 ("r-rmarkdown" ,r-rmarkdown)
7345 ("r-yaml" ,r-yaml)))
7346 (home-page "https://bioconductor.org/packages/BiocStyle")
7347 (synopsis "Bioconductor formatting styles")
7348 (description "This package provides standard formatting styles for
7349 Bioconductor PDF and HTML documents. Package vignettes illustrate use and
7350 functionality.")
7351 (license license:artistic2.0)))
7352
7353 (define-public r-bioccheck
7354 (package
7355 (name "r-bioccheck")
7356 (version "1.22.0")
7357 (source (origin
7358 (method url-fetch)
7359 (uri (bioconductor-uri "BiocCheck" version))
7360 (sha256
7361 (base32
7362 "1qnvl5yajgh67ijkq6gdsafri1k5hyw5gzn2ccqk9ymx6i2xd80g"))))
7363 (properties
7364 `((upstream-name . "BiocCheck")))
7365 (build-system r-build-system)
7366 (arguments
7367 '(#:phases
7368 (modify-phases %standard-phases
7369 ;; This package can be used by calling BiocCheck(<package>) from
7370 ;; within R, or by running R CMD BiocCheck <package>. This phase
7371 ;; makes sure the latter works. For this to work, the BiocCheck
7372 ;; script must be somewhere on the PATH (not the R bin directory).
7373 (add-after 'install 'install-bioccheck-subcommand
7374 (lambda* (#:key outputs #:allow-other-keys)
7375 (let* ((out (assoc-ref outputs "out"))
7376 (dest-dir (string-append out "/bin"))
7377 (script-dir
7378 (string-append out "/site-library/BiocCheck/script/")))
7379 (mkdir-p dest-dir)
7380 (symlink (string-append script-dir "/checkBadDeps.R")
7381 (string-append dest-dir "/checkBadDeps.R"))
7382 (symlink (string-append script-dir "/BiocCheck")
7383 (string-append dest-dir "/BiocCheck")))
7384 #t)))))
7385 (propagated-inputs
7386 `(("r-codetools" ,r-codetools)
7387 ("r-graph" ,r-graph)
7388 ("r-httr" ,r-httr)
7389 ("r-knitr" ,r-knitr)
7390 ("r-optparse" ,r-optparse)
7391 ("r-biocmanager" ,r-biocmanager)
7392 ("r-biocviews" ,r-biocviews)
7393 ("r-stringdist" ,r-stringdist)))
7394 (home-page "https://bioconductor.org/packages/BiocCheck")
7395 (synopsis "Executes Bioconductor-specific package checks")
7396 (description "This package contains tools to perform additional quality
7397 checks on R packages that are to be submitted to the Bioconductor repository.")
7398 (license license:artistic2.0)))
7399
7400 (define-public r-s4vectors
7401 (package
7402 (name "r-s4vectors")
7403 (version "0.24.3")
7404 (source (origin
7405 (method url-fetch)
7406 (uri (bioconductor-uri "S4Vectors" version))
7407 (sha256
7408 (base32
7409 "01f7dms4kw9ajwqlvh5s47riv748xrrs41na03byhjvn4fbdc44y"))))
7410 (properties
7411 `((upstream-name . "S4Vectors")))
7412 (build-system r-build-system)
7413 (propagated-inputs
7414 `(("r-biocgenerics" ,r-biocgenerics)))
7415 (home-page "https://bioconductor.org/packages/S4Vectors")
7416 (synopsis "S4 implementation of vectors and lists")
7417 (description
7418 "The S4Vectors package defines the @code{Vector} and @code{List} virtual
7419 classes and a set of generic functions that extend the semantic of ordinary
7420 vectors and lists in R. Package developers can easily implement vector-like
7421 or list-like objects as concrete subclasses of @code{Vector} or @code{List}.
7422 In addition, a few low-level concrete subclasses of general interest (e.g.
7423 @code{DataFrame}, @code{Rle}, and @code{Hits}) are implemented in the
7424 S4Vectors package itself.")
7425 (license license:artistic2.0)))
7426
7427 (define-public r-iranges
7428 (package
7429 (name "r-iranges")
7430 (version "2.20.2")
7431 (source (origin
7432 (method url-fetch)
7433 (uri (bioconductor-uri "IRanges" version))
7434 (sha256
7435 (base32
7436 "1jhnxb9yacmj2z82b6992gihjvj1a0gnjwbjiagyyx03fqnv23kg"))))
7437 (properties
7438 `((upstream-name . "IRanges")))
7439 (build-system r-build-system)
7440 (propagated-inputs
7441 `(("r-biocgenerics" ,r-biocgenerics)
7442 ("r-s4vectors" ,r-s4vectors)))
7443 (home-page "https://bioconductor.org/packages/IRanges")
7444 (synopsis "Infrastructure for manipulating intervals on sequences")
7445 (description
7446 "This package provides efficient low-level and highly reusable S4 classes
7447 for storing ranges of integers, RLE vectors (Run-Length Encoding), and, more
7448 generally, data that can be organized sequentially (formally defined as
7449 @code{Vector} objects), as well as views on these @code{Vector} objects.
7450 Efficient list-like classes are also provided for storing big collections of
7451 instances of the basic classes. All classes in the package use consistent
7452 naming and share the same rich and consistent \"Vector API\" as much as
7453 possible.")
7454 (license license:artistic2.0)))
7455
7456 (define-public r-genomeinfodbdata
7457 (package
7458 (name "r-genomeinfodbdata")
7459 (version "1.2.0")
7460 (source (origin
7461 (method url-fetch)
7462 ;; We cannot use bioconductor-uri here because this tarball is
7463 ;; located under "data/annotation/" instead of "bioc/".
7464 (uri (string-append "https://bioconductor.org/packages/release/"
7465 "data/annotation/src/contrib/GenomeInfoDbData_"
7466 version ".tar.gz"))
7467 (sha256
7468 (base32
7469 "0di6nlqpsyqf693k2na65ayqldih563x3zfrczpqc5q2hl5kg35c"))))
7470 (properties
7471 `((upstream-name . "GenomeInfoDbData")))
7472 (build-system r-build-system)
7473 (home-page "https://bioconductor.org/packages/GenomeInfoDbData")
7474 (synopsis "Species and taxonomy ID look up tables for GenomeInfoDb")
7475 (description "This package contains data for mapping between NCBI taxonomy
7476 ID and species. It is used by functions in the GenomeInfoDb package.")
7477 (license license:artistic2.0)))
7478
7479 (define-public r-genomeinfodb
7480 (package
7481 (name "r-genomeinfodb")
7482 (version "1.22.0")
7483 (source (origin
7484 (method url-fetch)
7485 (uri (bioconductor-uri "GenomeInfoDb" version))
7486 (sha256
7487 (base32
7488 "07zljs2mfi8rf31g903f43v2f7767xbnflfrx9qjnmgf7bm039x0"))))
7489 (properties
7490 `((upstream-name . "GenomeInfoDb")))
7491 (build-system r-build-system)
7492 (propagated-inputs
7493 `(("r-biocgenerics" ,r-biocgenerics)
7494 ("r-genomeinfodbdata" ,r-genomeinfodbdata)
7495 ("r-iranges" ,r-iranges)
7496 ("r-rcurl" ,r-rcurl)
7497 ("r-s4vectors" ,r-s4vectors)))
7498 (home-page "https://bioconductor.org/packages/GenomeInfoDb")
7499 (synopsis "Utilities for manipulating chromosome identifiers")
7500 (description
7501 "This package contains data and functions that define and allow
7502 translation between different chromosome sequence naming conventions (e.g.,
7503 \"chr1\" versus \"1\"), including a function that attempts to place sequence
7504 names in their natural, rather than lexicographic, order.")
7505 (license license:artistic2.0)))
7506
7507 (define-public r-edger
7508 (package
7509 (name "r-edger")
7510 (version "3.28.1")
7511 (source (origin
7512 (method url-fetch)
7513 (uri (bioconductor-uri "edgeR" version))
7514 (sha256
7515 (base32
7516 "07dv99kl7jfa62nzcq705r56gcpb1hq7p4px48j71y2ddi1rqmr6"))))
7517 (properties `((upstream-name . "edgeR")))
7518 (build-system r-build-system)
7519 (propagated-inputs
7520 `(("r-limma" ,r-limma)
7521 ("r-locfit" ,r-locfit)
7522 ("r-rcpp" ,r-rcpp)
7523 ("r-statmod" ,r-statmod))) ;for estimateDisp
7524 (home-page "http://bioinf.wehi.edu.au/edgeR")
7525 (synopsis "EdgeR does empirical analysis of digital gene expression data")
7526 (description "This package can do differential expression analysis of
7527 RNA-seq expression profiles with biological replication. It implements a range
7528 of statistical methodology based on the negative binomial distributions,
7529 including empirical Bayes estimation, exact tests, generalized linear models
7530 and quasi-likelihood tests. It be applied to differential signal analysis of
7531 other types of genomic data that produce counts, including ChIP-seq, SAGE and
7532 CAGE.")
7533 (license license:gpl2+)))
7534
7535 (define-public r-variantannotation
7536 (package
7537 (name "r-variantannotation")
7538 (version "1.32.0")
7539 (source (origin
7540 (method url-fetch)
7541 (uri (bioconductor-uri "VariantAnnotation" version))
7542 (sha256
7543 (base32
7544 "009s7rzp78s2w6iybizina42qx2w8qv3xwjbkpqphmm451maykgs"))))
7545 (properties
7546 `((upstream-name . "VariantAnnotation")))
7547 (inputs
7548 `(("zlib" ,zlib)))
7549 (propagated-inputs
7550 `(("r-annotationdbi" ,r-annotationdbi)
7551 ("r-biobase" ,r-biobase)
7552 ("r-biocgenerics" ,r-biocgenerics)
7553 ("r-biostrings" ,r-biostrings)
7554 ("r-bsgenome" ,r-bsgenome)
7555 ("r-dbi" ,r-dbi)
7556 ("r-genomeinfodb" ,r-genomeinfodb)
7557 ("r-genomicfeatures" ,r-genomicfeatures)
7558 ("r-genomicranges" ,r-genomicranges)
7559 ("r-iranges" ,r-iranges)
7560 ("r-summarizedexperiment" ,r-summarizedexperiment)
7561 ("r-rhtslib" ,r-rhtslib)
7562 ("r-rsamtools" ,r-rsamtools)
7563 ("r-rtracklayer" ,r-rtracklayer)
7564 ("r-s4vectors" ,r-s4vectors)
7565 ("r-xvector" ,r-xvector)
7566 ("r-zlibbioc" ,r-zlibbioc)))
7567 (build-system r-build-system)
7568 (home-page "https://bioconductor.org/packages/VariantAnnotation")
7569 (synopsis "Package for annotation of genetic variants")
7570 (description "This R package can annotate variants, compute amino acid
7571 coding changes and predict coding outcomes.")
7572 (license license:artistic2.0)))
7573
7574 (define-public r-limma
7575 (package
7576 (name "r-limma")
7577 (version "3.42.2")
7578 (source (origin
7579 (method url-fetch)
7580 (uri (bioconductor-uri "limma" version))
7581 (sha256
7582 (base32
7583 "1nd01r7rd7jb5qz84vbgfnyrmgm9wiq7fsdji68537kjgvrzmm9z"))))
7584 (build-system r-build-system)
7585 (home-page "http://bioinf.wehi.edu.au/limma")
7586 (synopsis "Package for linear models for microarray and RNA-seq data")
7587 (description "This package can be used for the analysis of gene expression
7588 studies, especially the use of linear models for analysing designed experiments
7589 and the assessment of differential expression. The analysis methods apply to
7590 different technologies, including microarrays, RNA-seq, and quantitative PCR.")
7591 (license license:gpl2+)))
7592
7593 (define-public r-xvector
7594 (package
7595 (name "r-xvector")
7596 (version "0.26.0")
7597 (source (origin
7598 (method url-fetch)
7599 (uri (bioconductor-uri "XVector" version))
7600 (sha256
7601 (base32
7602 "0s2gg84yzl5ffkzp6n7kh0jjk1vd90z189f5hkfn18rn67cy2nv7"))))
7603 (properties
7604 `((upstream-name . "XVector")))
7605 (build-system r-build-system)
7606 (arguments
7607 `(#:phases
7608 (modify-phases %standard-phases
7609 (add-after 'unpack 'use-system-zlib
7610 (lambda _
7611 (substitute* "DESCRIPTION"
7612 (("zlibbioc, ") ""))
7613 (substitute* "NAMESPACE"
7614 (("import\\(zlibbioc\\)") ""))
7615 #t)))))
7616 (inputs
7617 `(("zlib" ,zlib)))
7618 (propagated-inputs
7619 `(("r-biocgenerics" ,r-biocgenerics)
7620 ("r-iranges" ,r-iranges)
7621 ("r-s4vectors" ,r-s4vectors)))
7622 (home-page "https://bioconductor.org/packages/XVector")
7623 (synopsis "Representation and manpulation of external sequences")
7624 (description
7625 "This package provides memory efficient S4 classes for storing sequences
7626 \"externally\" (behind an R external pointer, or on disk).")
7627 (license license:artistic2.0)))
7628
7629 (define-public r-genomicranges
7630 (package
7631 (name "r-genomicranges")
7632 (version "1.38.0")
7633 (source (origin
7634 (method url-fetch)
7635 (uri (bioconductor-uri "GenomicRanges" version))
7636 (sha256
7637 (base32
7638 "0xdds6ws7jjdfy4x3mb0qhy43kpxdmifmpw0jdk4wgw999zabb50"))))
7639 (properties
7640 `((upstream-name . "GenomicRanges")))
7641 (build-system r-build-system)
7642 (propagated-inputs
7643 `(("r-biocgenerics" ,r-biocgenerics)
7644 ("r-genomeinfodb" ,r-genomeinfodb)
7645 ("r-iranges" ,r-iranges)
7646 ("r-s4vectors" ,r-s4vectors)
7647 ("r-xvector" ,r-xvector)))
7648 (home-page "https://bioconductor.org/packages/GenomicRanges")
7649 (synopsis "Representation and manipulation of genomic intervals")
7650 (description
7651 "This package provides tools to efficiently represent and manipulate
7652 genomic annotations and alignments is playing a central role when it comes to
7653 analyzing high-throughput sequencing data (a.k.a. NGS data). The
7654 GenomicRanges package defines general purpose containers for storing and
7655 manipulating genomic intervals and variables defined along a genome.")
7656 (license license:artistic2.0)))
7657
7658 (define-public r-biobase
7659 (package
7660 (name "r-biobase")
7661 (version "2.46.0")
7662 (source (origin
7663 (method url-fetch)
7664 (uri (bioconductor-uri "Biobase" version))
7665 (sha256
7666 (base32
7667 "1gx41083dqlm59vwqdxvc4ny31x91j48mda9n3scg0f2zwasvqgl"))))
7668 (properties
7669 `((upstream-name . "Biobase")))
7670 (build-system r-build-system)
7671 (propagated-inputs
7672 `(("r-biocgenerics" ,r-biocgenerics)))
7673 (home-page "https://bioconductor.org/packages/Biobase")
7674 (synopsis "Base functions for Bioconductor")
7675 (description
7676 "This package provides functions that are needed by many other packages
7677 on Bioconductor or which replace R functions.")
7678 (license license:artistic2.0)))
7679
7680 (define-public r-annotationdbi
7681 (package
7682 (name "r-annotationdbi")
7683 (version "1.48.0")
7684 (source (origin
7685 (method url-fetch)
7686 (uri (bioconductor-uri "AnnotationDbi" version))
7687 (sha256
7688 (base32
7689 "09piz1f0xpbb4amskx4ilby6lfrn27hhwk75il2c4lq6m3hr4w8s"))))
7690 (properties
7691 `((upstream-name . "AnnotationDbi")))
7692 (build-system r-build-system)
7693 (propagated-inputs
7694 `(("r-biobase" ,r-biobase)
7695 ("r-biocgenerics" ,r-biocgenerics)
7696 ("r-dbi" ,r-dbi)
7697 ("r-iranges" ,r-iranges)
7698 ("r-rsqlite" ,r-rsqlite)
7699 ("r-s4vectors" ,r-s4vectors)))
7700 (home-page "https://bioconductor.org/packages/AnnotationDbi")
7701 (synopsis "Annotation database interface")
7702 (description
7703 "This package provides user interface and database connection code for
7704 annotation data packages using SQLite data storage.")
7705 (license license:artistic2.0)))
7706
7707 (define-public r-biomart
7708 (package
7709 (name "r-biomart")
7710 (version "2.42.0")
7711 (source (origin
7712 (method url-fetch)
7713 (uri (bioconductor-uri "biomaRt" version))
7714 (sha256
7715 (base32
7716 "0difh4dsccjzhpfkvajy2adh98ym9164gd6clnsnic6qr6sk86ss"))))
7717 (properties
7718 `((upstream-name . "biomaRt")))
7719 (build-system r-build-system)
7720 (propagated-inputs
7721 `(("r-annotationdbi" ,r-annotationdbi)
7722 ("r-biocfilecache" ,r-biocfilecache)
7723 ("r-httr" ,r-httr)
7724 ("r-openssl" ,r-openssl)
7725 ("r-progress" ,r-progress)
7726 ("r-rappdirs" ,r-rappdirs)
7727 ("r-stringr" ,r-stringr)
7728 ("r-xml" ,r-xml)))
7729 (home-page "https://bioconductor.org/packages/biomaRt")
7730 (synopsis "Interface to BioMart databases")
7731 (description
7732 "biomaRt provides an interface to a growing collection of databases
7733 implementing the @url{BioMart software suite, http://www.biomart.org}. The
7734 package enables retrieval of large amounts of data in a uniform way without
7735 the need to know the underlying database schemas or write complex SQL queries.
7736 Examples of BioMart databases are Ensembl, COSMIC, Uniprot, HGNC, Gramene,
7737 Wormbase and dbSNP mapped to Ensembl. These major databases give biomaRt
7738 users direct access to a diverse set of data and enable a wide range of
7739 powerful online queries from gene annotation to database mining.")
7740 (license license:artistic2.0)))
7741
7742 (define-public r-biocparallel
7743 (package
7744 (name "r-biocparallel")
7745 (version "1.20.1")
7746 (source (origin
7747 (method url-fetch)
7748 (uri (bioconductor-uri "BiocParallel" version))
7749 (sha256
7750 (base32
7751 "0g0znb4whsvb9hpwx9xaasdi5n4vjqw8cpdyqgrdrjm91rls1h21"))))
7752 (properties
7753 `((upstream-name . "BiocParallel")))
7754 (build-system r-build-system)
7755 (propagated-inputs
7756 `(("r-futile-logger" ,r-futile-logger)
7757 ("r-snow" ,r-snow)
7758 ("r-bh" ,r-bh)))
7759 (home-page "https://bioconductor.org/packages/BiocParallel")
7760 (synopsis "Bioconductor facilities for parallel evaluation")
7761 (description
7762 "This package provides modified versions and novel implementation of
7763 functions for parallel evaluation, tailored to use with Bioconductor
7764 objects.")
7765 (license (list license:gpl2+ license:gpl3+))))
7766
7767 (define-public r-biostrings
7768 (package
7769 (name "r-biostrings")
7770 (version "2.54.0")
7771 (source (origin
7772 (method url-fetch)
7773 (uri (bioconductor-uri "Biostrings" version))
7774 (sha256
7775 (base32
7776 "0pq7g2hflx2cjlpwdj6mscw9hnxvlf5y50dxf48lbrf9r3q9kmyp"))))
7777 (properties
7778 `((upstream-name . "Biostrings")))
7779 (build-system r-build-system)
7780 (propagated-inputs
7781 `(("r-biocgenerics" ,r-biocgenerics)
7782 ("r-iranges" ,r-iranges)
7783 ("r-s4vectors" ,r-s4vectors)
7784 ("r-xvector" ,r-xvector)))
7785 (home-page "https://bioconductor.org/packages/Biostrings")
7786 (synopsis "String objects and algorithms for biological sequences")
7787 (description
7788 "This package provides memory efficient string containers, string
7789 matching algorithms, and other utilities, for fast manipulation of large
7790 biological sequences or sets of sequences.")
7791 (license license:artistic2.0)))
7792
7793 (define-public r-rsamtools
7794 (package
7795 (name "r-rsamtools")
7796 (version "2.2.3")
7797 (source (origin
7798 (method url-fetch)
7799 (uri (bioconductor-uri "Rsamtools" version))
7800 (sha256
7801 (base32
7802 "1vj43acawqqkf9yval9fzxarpsf04fmn78m2hq4f083w1k0myhyr"))))
7803 (properties
7804 `((upstream-name . "Rsamtools")))
7805 (build-system r-build-system)
7806 (arguments
7807 `(#:phases
7808 (modify-phases %standard-phases
7809 (add-after 'unpack 'use-system-zlib
7810 (lambda _
7811 (substitute* "DESCRIPTION"
7812 (("zlibbioc, ") ""))
7813 (substitute* "NAMESPACE"
7814 (("import\\(zlibbioc\\)") ""))
7815 #t)))))
7816 (inputs
7817 `(("zlib" ,zlib)))
7818 (propagated-inputs
7819 `(("r-biocgenerics" ,r-biocgenerics)
7820 ("r-biocparallel" ,r-biocparallel)
7821 ("r-biostrings" ,r-biostrings)
7822 ("r-bitops" ,r-bitops)
7823 ("r-genomeinfodb" ,r-genomeinfodb)
7824 ("r-genomicranges" ,r-genomicranges)
7825 ("r-iranges" ,r-iranges)
7826 ("r-rhtslib" ,r-rhtslib)
7827 ("r-s4vectors" ,r-s4vectors)
7828 ("r-xvector" ,r-xvector)))
7829 (home-page "https://bioconductor.org/packages/release/bioc/html/Rsamtools.html")
7830 (synopsis "Interface to samtools, bcftools, and tabix")
7831 (description
7832 "This package provides an interface to the @code{samtools},
7833 @code{bcftools}, and @code{tabix} utilities for manipulating SAM (Sequence
7834 Alignment / Map), FASTA, binary variant call (BCF) and compressed indexed
7835 tab-delimited (tabix) files.")
7836 (license license:expat)))
7837
7838 (define-public r-delayedarray
7839 (package
7840 (name "r-delayedarray")
7841 (version "0.12.2")
7842 (source (origin
7843 (method url-fetch)
7844 (uri (bioconductor-uri "DelayedArray" version))
7845 (sha256
7846 (base32
7847 "09lackgix5jpm16k0mz2zkibflfb4wzidbz4q32mlxmklf40037q"))))
7848 (properties
7849 `((upstream-name . "DelayedArray")))
7850 (build-system r-build-system)
7851 (propagated-inputs
7852 `(("r-biocgenerics" ,r-biocgenerics)
7853 ("r-biocparallel" ,r-biocparallel)
7854 ("r-s4vectors" ,r-s4vectors)
7855 ("r-iranges" ,r-iranges)
7856 ("r-matrix" ,r-matrix)
7857 ("r-matrixstats" ,r-matrixstats)))
7858 (home-page "https://bioconductor.org/packages/DelayedArray")
7859 (synopsis "Delayed operations on array-like objects")
7860 (description
7861 "Wrapping an array-like object (typically an on-disk object) in a
7862 @code{DelayedArray} object allows one to perform common array operations on it
7863 without loading the object in memory. In order to reduce memory usage and
7864 optimize performance, operations on the object are either delayed or executed
7865 using a block processing mechanism. Note that this also works on in-memory
7866 array-like objects like @code{DataFrame} objects (typically with Rle columns),
7867 @code{Matrix} objects, and ordinary arrays and data frames.")
7868 (license license:artistic2.0)))
7869
7870 (define-public r-summarizedexperiment
7871 (package
7872 (name "r-summarizedexperiment")
7873 (version "1.16.1")
7874 (source (origin
7875 (method url-fetch)
7876 (uri (bioconductor-uri "SummarizedExperiment" version))
7877 (sha256
7878 (base32
7879 "1z9bdk49dajafkfvv99nv6zyn6v70iyyy2jgdp5w5z8174a2bnn1"))))
7880 (properties
7881 `((upstream-name . "SummarizedExperiment")))
7882 (build-system r-build-system)
7883 (propagated-inputs
7884 `(("r-biobase" ,r-biobase)
7885 ("r-biocgenerics" ,r-biocgenerics)
7886 ("r-delayedarray" ,r-delayedarray)
7887 ("r-genomeinfodb" ,r-genomeinfodb)
7888 ("r-genomicranges" ,r-genomicranges)
7889 ("r-iranges" ,r-iranges)
7890 ("r-matrix" ,r-matrix)
7891 ("r-s4vectors" ,r-s4vectors)))
7892 (home-page "https://bioconductor.org/packages/SummarizedExperiment")
7893 (synopsis "Container for representing genomic ranges by sample")
7894 (description
7895 "The SummarizedExperiment container contains one or more assays, each
7896 represented by a matrix-like object of numeric or other mode. The rows
7897 typically represent genomic ranges of interest and the columns represent
7898 samples.")
7899 (license license:artistic2.0)))
7900
7901 (define-public r-genomicalignments
7902 (package
7903 (name "r-genomicalignments")
7904 (version "1.22.1")
7905 (source (origin
7906 (method url-fetch)
7907 (uri (bioconductor-uri "GenomicAlignments" version))
7908 (sha256
7909 (base32
7910 "065xvy4pkda0ajvl1b75iski95k1pnbhxwdq7vkfl8v55915vqh6"))))
7911 (properties
7912 `((upstream-name . "GenomicAlignments")))
7913 (build-system r-build-system)
7914 (propagated-inputs
7915 `(("r-biocgenerics" ,r-biocgenerics)
7916 ("r-biocparallel" ,r-biocparallel)
7917 ("r-biostrings" ,r-biostrings)
7918 ("r-genomeinfodb" ,r-genomeinfodb)
7919 ("r-genomicranges" ,r-genomicranges)
7920 ("r-iranges" ,r-iranges)
7921 ("r-rsamtools" ,r-rsamtools)
7922 ("r-s4vectors" ,r-s4vectors)
7923 ("r-summarizedexperiment" ,r-summarizedexperiment)))
7924 (home-page "https://bioconductor.org/packages/GenomicAlignments")
7925 (synopsis "Representation and manipulation of short genomic alignments")
7926 (description
7927 "This package provides efficient containers for storing and manipulating
7928 short genomic alignments (typically obtained by aligning short reads to a
7929 reference genome). This includes read counting, computing the coverage,
7930 junction detection, and working with the nucleotide content of the
7931 alignments.")
7932 (license license:artistic2.0)))
7933
7934 (define-public r-rtracklayer
7935 (package
7936 (name "r-rtracklayer")
7937 (version "1.46.0")
7938 (source (origin
7939 (method url-fetch)
7940 (uri (bioconductor-uri "rtracklayer" version))
7941 (sha256
7942 (base32
7943 "0lp9xsm8kqrgpwgwj7qaqcv1z6qynbz084grlpwp94zsp2ppf0n6"))))
7944 (build-system r-build-system)
7945 (arguments
7946 `(#:phases
7947 (modify-phases %standard-phases
7948 (add-after 'unpack 'use-system-zlib
7949 (lambda _
7950 (substitute* "DESCRIPTION"
7951 ((" zlibbioc,") ""))
7952 (substitute* "NAMESPACE"
7953 (("import\\(zlibbioc\\)") ""))
7954 #t)))))
7955 (native-inputs
7956 `(("pkg-config" ,pkg-config)))
7957 (inputs
7958 `(("zlib" ,zlib)))
7959 (propagated-inputs
7960 `(("r-biocgenerics" ,r-biocgenerics)
7961 ("r-biostrings" ,r-biostrings)
7962 ("r-genomeinfodb" ,r-genomeinfodb)
7963 ("r-genomicalignments" ,r-genomicalignments)
7964 ("r-genomicranges" ,r-genomicranges)
7965 ("r-iranges" ,r-iranges)
7966 ("r-rcurl" ,r-rcurl)
7967 ("r-rsamtools" ,r-rsamtools)
7968 ("r-s4vectors" ,r-s4vectors)
7969 ("r-xml" ,r-xml)
7970 ("r-xvector" ,r-xvector)))
7971 (home-page "https://bioconductor.org/packages/rtracklayer")
7972 (synopsis "R interface to genome browsers and their annotation tracks")
7973 (description
7974 "rtracklayer is an extensible framework for interacting with multiple
7975 genome browsers (currently UCSC built-in) and manipulating annotation tracks
7976 in various formats (currently GFF, BED, bedGraph, BED15, WIG, BigWig and 2bit
7977 built-in). The user may export/import tracks to/from the supported browsers,
7978 as well as query and modify the browser state, such as the current viewport.")
7979 (license license:artistic2.0)))
7980
7981 (define-public r-genomicfeatures
7982 (package
7983 (name "r-genomicfeatures")
7984 (version "1.38.2")
7985 (source (origin
7986 (method url-fetch)
7987 (uri (bioconductor-uri "GenomicFeatures" version))
7988 (sha256
7989 (base32
7990 "0dd226kgks50jdx5w35f3wmg95hy8aibi4kcn8p5kmqp5i8j580b"))))
7991 (properties
7992 `((upstream-name . "GenomicFeatures")))
7993 (build-system r-build-system)
7994 (propagated-inputs
7995 `(("r-annotationdbi" ,r-annotationdbi)
7996 ("r-biobase" ,r-biobase)
7997 ("r-biocgenerics" ,r-biocgenerics)
7998 ("r-biomart" ,r-biomart)
7999 ("r-biostrings" ,r-biostrings)
8000 ("r-dbi" ,r-dbi)
8001 ("r-genomeinfodb" ,r-genomeinfodb)
8002 ("r-genomicranges" ,r-genomicranges)
8003 ("r-iranges" ,r-iranges)
8004 ("r-rcurl" ,r-rcurl)
8005 ("r-rsqlite" ,r-rsqlite)
8006 ("r-rtracklayer" ,r-rtracklayer)
8007 ("r-s4vectors" ,r-s4vectors)
8008 ("r-xvector" ,r-xvector)))
8009 (home-page "https://bioconductor.org/packages/GenomicFeatures")
8010 (synopsis "Tools for working with transcript centric annotations")
8011 (description
8012 "This package provides a set of tools and methods for making and
8013 manipulating transcript centric annotations. With these tools the user can
8014 easily download the genomic locations of the transcripts, exons and cds of a
8015 given organism, from either the UCSC Genome Browser or a BioMart
8016 database (more sources will be supported in the future). This information is
8017 then stored in a local database that keeps track of the relationship between
8018 transcripts, exons, cds and genes. Flexible methods are provided for
8019 extracting the desired features in a convenient format.")
8020 (license license:artistic2.0)))
8021
8022 (define-public r-go-db
8023 (package
8024 (name "r-go-db")
8025 (version "3.7.0")
8026 (source (origin
8027 (method url-fetch)
8028 (uri (string-append "https://www.bioconductor.org/packages/"
8029 "release/data/annotation/src/contrib/GO.db_"
8030 version ".tar.gz"))
8031 (sha256
8032 (base32
8033 "0i3wcf5h3n0dawzc1hy0kv74f06j80c47n4p3g3fmrcxlhi3jpa5"))))
8034 (properties
8035 `((upstream-name . "GO.db")))
8036 (build-system r-build-system)
8037 (propagated-inputs
8038 `(("r-annotationdbi" ,r-annotationdbi)))
8039 (home-page "https://bioconductor.org/packages/GO.db")
8040 (synopsis "Annotation maps describing the entire Gene Ontology")
8041 (description
8042 "The purpose of this GO.db annotation package is to provide detailed
8043 information about the latest version of the Gene Ontologies.")
8044 (license license:artistic2.0)))
8045
8046 (define-public r-topgo
8047 (package
8048 (name "r-topgo")
8049 (version "2.38.1")
8050 (source (origin
8051 (method url-fetch)
8052 (uri (bioconductor-uri "topGO" version))
8053 (sha256
8054 (base32
8055 "1kw9m2j67895k58lx9msc248pjwblp8clxwgsl01cql7sgi1xzlf"))))
8056 (properties
8057 `((upstream-name . "topGO")))
8058 (build-system r-build-system)
8059 (propagated-inputs
8060 `(("r-annotationdbi" ,r-annotationdbi)
8061 ("r-dbi" ,r-dbi)
8062 ("r-biobase" ,r-biobase)
8063 ("r-biocgenerics" ,r-biocgenerics)
8064 ("r-go-db" ,r-go-db)
8065 ("r-graph" ,r-graph)
8066 ("r-lattice" ,r-lattice)
8067 ("r-matrixstats" ,r-matrixstats)
8068 ("r-sparsem" ,r-sparsem)))
8069 (home-page "https://bioconductor.org/packages/topGO")
8070 (synopsis "Enrichment analysis for gene ontology")
8071 (description
8072 "The topGO package provides tools for testing @dfn{gene ontology} (GO)
8073 terms while accounting for the topology of the GO graph. Different test
8074 statistics and different methods for eliminating local similarities and
8075 dependencies between GO terms can be implemented and applied.")
8076 ;; Any version of the LGPL applies.
8077 (license license:lgpl2.1+)))
8078
8079 (define-public r-bsgenome
8080 (package
8081 (name "r-bsgenome")
8082 (version "1.54.0")
8083 (source (origin
8084 (method url-fetch)
8085 (uri (bioconductor-uri "BSgenome" version))
8086 (sha256
8087 (base32
8088 "0nn1b3h4hmnx5whf2cmzmyxdrcf4myj8c38rwr0sw4rc07xfzndy"))))
8089 (properties
8090 `((upstream-name . "BSgenome")))
8091 (build-system r-build-system)
8092 (propagated-inputs
8093 `(("r-biocgenerics" ,r-biocgenerics)
8094 ("r-biostrings" ,r-biostrings)
8095 ("r-genomeinfodb" ,r-genomeinfodb)
8096 ("r-genomicranges" ,r-genomicranges)
8097 ("r-iranges" ,r-iranges)
8098 ("r-rsamtools" ,r-rsamtools)
8099 ("r-rtracklayer" ,r-rtracklayer)
8100 ("r-s4vectors" ,r-s4vectors)
8101 ("r-xvector" ,r-xvector)))
8102 (home-page "https://bioconductor.org/packages/BSgenome")
8103 (synopsis "Infrastructure for Biostrings-based genome data packages")
8104 (description
8105 "This package provides infrastructure shared by all Biostrings-based
8106 genome data packages and support for efficient SNP representation.")
8107 (license license:artistic2.0)))
8108
8109 (define-public r-impute
8110 (package
8111 (name "r-impute")
8112 (version "1.60.0")
8113 (source (origin
8114 (method url-fetch)
8115 (uri (bioconductor-uri "impute" version))
8116 (sha256
8117 (base32
8118 "0igz1phjd1j9bg9z4kyy7j8v9bxi9sdwz4df26r51i2vavlbrf4q"))))
8119 (native-inputs
8120 `(("gfortran" ,gfortran)))
8121 (build-system r-build-system)
8122 (home-page "https://bioconductor.org/packages/impute")
8123 (synopsis "Imputation for microarray data")
8124 (description
8125 "This package provides a function to impute missing gene expression
8126 microarray data, using nearest neighbor averaging.")
8127 (license license:gpl2+)))
8128
8129 (define-public r-seqpattern
8130 (package
8131 (name "r-seqpattern")
8132 (version "1.18.0")
8133 (source (origin
8134 (method url-fetch)
8135 (uri (bioconductor-uri "seqPattern" version))
8136 (sha256
8137 (base32
8138 "1gxrq6s2hiyac69idh5r1nbr1s69n0hg4ap2skm4g6857av9pwqf"))))
8139 (properties
8140 `((upstream-name . "seqPattern")))
8141 (build-system r-build-system)
8142 (propagated-inputs
8143 `(("r-biostrings" ,r-biostrings)
8144 ("r-genomicranges" ,r-genomicranges)
8145 ("r-iranges" ,r-iranges)
8146 ("r-kernsmooth" ,r-kernsmooth)
8147 ("r-plotrix" ,r-plotrix)))
8148 (home-page "https://bioconductor.org/packages/seqPattern")
8149 (synopsis "Visualising oligonucleotide patterns and motif occurrences")
8150 (description
8151 "This package provides tools to visualize oligonucleotide patterns and
8152 sequence motif occurrences across a large set of sequences centred at a common
8153 reference point and sorted by a user defined feature.")
8154 (license license:gpl3+)))
8155
8156 (define-public r-genomation
8157 (package
8158 (name "r-genomation")
8159 (version "1.18.0")
8160 (source (origin
8161 (method url-fetch)
8162 (uri (bioconductor-uri "genomation" version))
8163 (sha256
8164 (base32
8165 "1sba928h23b67gr3i4yj1bg655g24l3bcgsf5gvymzrv5idrss1l"))))
8166 (build-system r-build-system)
8167 (propagated-inputs
8168 `(("r-biostrings" ,r-biostrings)
8169 ("r-bsgenome" ,r-bsgenome)
8170 ("r-data-table" ,r-data-table)
8171 ("r-genomeinfodb" ,r-genomeinfodb)
8172 ("r-genomicalignments" ,r-genomicalignments)
8173 ("r-genomicranges" ,r-genomicranges)
8174 ("r-ggplot2" ,r-ggplot2)
8175 ("r-gridbase" ,r-gridbase)
8176 ("r-impute" ,r-impute)
8177 ("r-iranges" ,r-iranges)
8178 ("r-matrixstats" ,r-matrixstats)
8179 ("r-plotrix" ,r-plotrix)
8180 ("r-plyr" ,r-plyr)
8181 ("r-rcpp" ,r-rcpp)
8182 ("r-readr" ,r-readr)
8183 ("r-reshape2" ,r-reshape2)
8184 ("r-rsamtools" ,r-rsamtools)
8185 ("r-rtracklayer" ,r-rtracklayer)
8186 ("r-runit" ,r-runit)
8187 ("r-s4vectors" ,r-s4vectors)
8188 ("r-seqpattern" ,r-seqpattern)))
8189 (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
8190 (synopsis "Summary, annotation and visualization of genomic data")
8191 (description
8192 "This package provides a package for summary and annotation of genomic
8193 intervals. Users can visualize and quantify genomic intervals over
8194 pre-defined functional regions, such as promoters, exons, introns, etc. The
8195 genomic intervals represent regions with a defined chromosome position, which
8196 may be associated with a score, such as aligned reads from HT-seq experiments,
8197 TF binding sites, methylation scores, etc. The package can use any tabular
8198 genomic feature data as long as it has minimal information on the locations of
8199 genomic intervals. In addition, it can use BAM or BigWig files as input.")
8200 (license license:artistic2.0)))
8201
8202 (define-public r-genomationdata
8203 (package
8204 (name "r-genomationdata")
8205 (version "1.14.0")
8206 (source (origin
8207 (method url-fetch)
8208 ;; We cannot use bioconductor-uri here because this tarball is
8209 ;; located under "data/annotation/" instead of "bioc/".
8210 (uri (string-append "https://bioconductor.org/packages/"
8211 "release/data/experiment/src/contrib/"
8212 "genomationData_" version ".tar.gz"))
8213 (sha256
8214 (base32
8215 "10xyb8akjrhmak2i0mnv1agny2ipy364q9nlibyplpzc7vdb6bw7"))))
8216 (build-system r-build-system)
8217 ;; As this package provides little more than large data files, it doesn't
8218 ;; make sense to build substitutes.
8219 (arguments `(#:substitutable? #f))
8220 (native-inputs
8221 `(("r-knitr" ,r-knitr)))
8222 (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
8223 (synopsis "Experimental data for use with the genomation package")
8224 (description
8225 "This package contains experimental genetic data for use with the
8226 genomation package. Included are Chip Seq, Methylation and Cage data,
8227 downloaded from Encode.")
8228 (license license:gpl3+)))
8229
8230 (define-public r-seqlogo
8231 (package
8232 (name "r-seqlogo")
8233 (version "1.52.0")
8234 (source
8235 (origin
8236 (method url-fetch)
8237 (uri (bioconductor-uri "seqLogo" version))
8238 (sha256
8239 (base32
8240 "0s94aahp8ma1crmp83dz65ifjwrx6wqi3q6005lmbp8yk2x1rkj4"))))
8241 (properties `((upstream-name . "seqLogo")))
8242 (build-system r-build-system)
8243 (home-page "https://bioconductor.org/packages/seqLogo")
8244 (synopsis "Sequence logos for DNA sequence alignments")
8245 (description
8246 "seqLogo takes the position weight matrix of a DNA sequence motif and
8247 plots the corresponding sequence logo as introduced by Schneider and
8248 Stephens (1990).")
8249 (license license:lgpl2.0+)))
8250
8251 (define-public r-motifrg
8252 (package
8253 (name "r-motifrg")
8254 (version "1.30.0")
8255 (source
8256 (origin
8257 (method url-fetch)
8258 (uri (bioconductor-uri "motifRG" version))
8259 (sha256
8260 (base32
8261 "0s6wdr036lra9x93r9k8wvicbkgzypjh3jp46h92yacw8d829k0d"))))
8262 (properties `((upstream-name . "motifRG")))
8263 (build-system r-build-system)
8264 (propagated-inputs
8265 `(("r-biostrings" ,r-biostrings)
8266 ("r-bsgenome" ,r-bsgenome)
8267 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8268 ("r-iranges" ,r-iranges)
8269 ("r-seqlogo" ,r-seqlogo)
8270 ("r-xvector" ,r-xvector)))
8271 (home-page "https://bioconductor.org/packages/motifRG")
8272 (synopsis "Discover motifs in high throughput sequencing data")
8273 (description
8274 "This package provides tools for discriminative motif discovery in high
8275 throughput genetic sequencing data sets using regression methods.")
8276 (license license:artistic2.0)))
8277
8278 (define-public r-qtl
8279 (package
8280 (name "r-qtl")
8281 (version "1.45-11")
8282 (source
8283 (origin
8284 (method url-fetch)
8285 (uri (string-append "mirror://cran/src/contrib/qtl_"
8286 version ".tar.gz"))
8287 (sha256
8288 (base32
8289 "1d6qgj602fm6zia3djl4hmca0ri4v57ffp3g93p2yc3cabx2hq90"))))
8290 (build-system r-build-system)
8291 (home-page "https://rqtl.org/")
8292 (synopsis "R package for analyzing QTL experiments in genetics")
8293 (description "R/qtl is an extension library for the R statistics
8294 system. It is used to analyze experimental crosses for identifying
8295 genes contributing to variation in quantitative traits (so-called
8296 quantitative trait loci, QTLs).
8297
8298 Using a hidden Markov model, R/qtl allows to estimate genetic maps, to
8299 identify genotyping errors, and to perform single-QTL and two-QTL,
8300 two-dimensional genome scans.")
8301 (license license:gpl3)))
8302
8303 (define-public r-qtl2
8304 (package
8305 (name "r-qtl2")
8306 (version "0.20")
8307 (source (origin
8308 (method git-fetch)
8309 ;; Not yet available in cran.
8310 (uri (git-reference
8311 (url "https://github.com/rqtl/qtl2.git")
8312 (commit version)))
8313 (file-name (git-file-name name version))
8314 (sha256
8315 (base32 "0l1asr28q25jzbwrbg5490962sg3y4sjrd0qf09p78ws1aq8vfs0"))))
8316 (build-system r-build-system)
8317 (propagated-inputs
8318 `(("r-data-table" ,r-data-table)
8319 ("r-jsonlite" ,r-jsonlite)
8320 ("r-rcpp" ,r-rcpp)
8321 ("r-rcppeigen" ,r-rcppeigen)
8322 ("r-rsqlite" ,r-rsqlite)
8323 ("r-yaml" ,r-yaml)))
8324 (home-page "https://kbroman.org/qtl2/")
8325 (synopsis
8326 "QTL analysis software for high-dimensional data and complex cross designs")
8327 (description
8328 "R/qtl2 (aka qtl2) is a reimplementation of the QTL analysis software
8329 R/qtl, to better handle high-dimensional data and complex cross designs.")
8330 (license license:gpl3)))
8331
8332 (define-public r-zlibbioc
8333 (package
8334 (name "r-zlibbioc")
8335 (version "1.32.0")
8336 (source (origin
8337 (method url-fetch)
8338 (uri (bioconductor-uri "zlibbioc" version))
8339 (sha256
8340 (base32
8341 "1xh7qan0w62mzsmanbx9vcj6ygdfhzw1abaxijkq7f4nh5w87idj"))))
8342 (properties
8343 `((upstream-name . "zlibbioc")))
8344 (build-system r-build-system)
8345 (home-page "https://bioconductor.org/packages/zlibbioc")
8346 (synopsis "Provider for zlib-1.2.5 to R packages")
8347 (description "This package uses the source code of zlib-1.2.5 to create
8348 libraries for systems that do not have these available via other means.")
8349 (license license:artistic2.0)))
8350
8351 (define-public r-r4rna
8352 (package
8353 (name "r-r4rna")
8354 (version "0.1.4")
8355 (source
8356 (origin
8357 (method url-fetch)
8358 (uri (string-append "http://www.e-rna.org/r-chie/files/R4RNA_"
8359 version ".tar.gz"))
8360 (sha256
8361 (base32
8362 "1p0i78wh76jfgmn9jphbwwaz6yy6pipzfg08xs54cxavxg2j81p5"))))
8363 (build-system r-build-system)
8364 (propagated-inputs
8365 `(("r-optparse" ,r-optparse)
8366 ("r-rcolorbrewer" ,r-rcolorbrewer)))
8367 (home-page "https://www.e-rna.org/r-chie/index.cgi")
8368 (synopsis "Analysis framework for RNA secondary structure")
8369 (description
8370 "The R4RNA package aims to be a general framework for the analysis of RNA
8371 secondary structure and comparative analysis in R.")
8372 (license license:gpl3+)))
8373
8374 (define-public r-rhtslib
8375 (package
8376 (name "r-rhtslib")
8377 (version "1.18.1")
8378 (source
8379 (origin
8380 (method url-fetch)
8381 (uri (bioconductor-uri "Rhtslib" version))
8382 (sha256
8383 (base32
8384 "0gkbrmrcg55c9s5166ifljlx0v25rv4ijdyp4wf4c292xd6chy2l"))))
8385 (properties `((upstream-name . "Rhtslib")))
8386 (build-system r-build-system)
8387 ;; Without this a temporary directory ends up in the Rhtslib.so binary,
8388 ;; which makes R abort the build.
8389 (arguments '(#:configure-flags '("--no-staged-install")))
8390 (propagated-inputs
8391 `(("curl" ,curl)
8392 ("r-zlibbioc" ,r-zlibbioc)))
8393 (inputs
8394 `(("zlib" ,zlib)))
8395 (native-inputs
8396 `(("pkg-config" ,pkg-config)))
8397 (home-page "https://github.com/nhayden/Rhtslib")
8398 (synopsis "High-throughput sequencing library as an R package")
8399 (description
8400 "This package provides the HTSlib C library for high-throughput
8401 nucleotide sequence analysis. The package is primarily useful to developers
8402 of other R packages who wish to make use of HTSlib.")
8403 (license license:lgpl2.0+)))
8404
8405 (define-public r-bamsignals
8406 (package
8407 (name "r-bamsignals")
8408 (version "1.18.0")
8409 (source
8410 (origin
8411 (method url-fetch)
8412 (uri (bioconductor-uri "bamsignals" version))
8413 (sha256
8414 (base32
8415 "0699b0pqbs0dvs91yjibcjc90lxj9mg8rcml4a6wchfr9md7n74w"))))
8416 (build-system r-build-system)
8417 (propagated-inputs
8418 `(("r-biocgenerics" ,r-biocgenerics)
8419 ("r-genomicranges" ,r-genomicranges)
8420 ("r-iranges" ,r-iranges)
8421 ("r-rcpp" ,r-rcpp)
8422 ("r-rhtslib" ,r-rhtslib)
8423 ("r-zlibbioc" ,r-zlibbioc)))
8424 (inputs
8425 `(("zlib" ,zlib)))
8426 (home-page "https://bioconductor.org/packages/bamsignals")
8427 (synopsis "Extract read count signals from bam files")
8428 (description
8429 "This package allows to efficiently obtain count vectors from indexed bam
8430 files. It counts the number of nucleotide sequence reads in given genomic
8431 ranges and it computes reads profiles and coverage profiles. It also handles
8432 paired-end data.")
8433 (license license:gpl2+)))
8434
8435 (define-public r-rcas
8436 (package
8437 (name "r-rcas")
8438 (version "1.12.0")
8439 (source (origin
8440 (method url-fetch)
8441 (uri (bioconductor-uri "RCAS" version))
8442 (sha256
8443 (base32
8444 "1s3gvvxi1029d1vfwnjh21nnw3mlx08kcwz63891hml9y850cvsn"))))
8445 (properties `((upstream-name . "RCAS")))
8446 (build-system r-build-system)
8447 (propagated-inputs
8448 `(("r-annotationdbi" ,r-annotationdbi)
8449 ("r-biocgenerics" ,r-biocgenerics)
8450 ("r-biomart" ,r-biomart)
8451 ("r-biostrings" ,r-biostrings)
8452 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8453 ("r-cowplot" ,r-cowplot)
8454 ("r-data-table" ,r-data-table)
8455 ("r-dbi" ,r-dbi)
8456 ("r-dt" ,r-dt)
8457 ("r-genomation" ,r-genomation)
8458 ("r-genomeinfodb" ,r-genomeinfodb)
8459 ("r-genomicfeatures" ,r-genomicfeatures)
8460 ("r-genomicranges" ,r-genomicranges)
8461 ("r-ggplot2" ,r-ggplot2)
8462 ("r-ggseqlogo" ,r-ggseqlogo)
8463 ("r-knitr" ,r-knitr)
8464 ("r-motifrg" ,r-motifrg)
8465 ("r-org-hs-eg-db" ,r-org-hs-eg-db)
8466 ("r-pbapply" ,r-pbapply)
8467 ("r-pheatmap" ,r-pheatmap)
8468 ("r-plotly" ,r-plotly)
8469 ("r-plotrix" ,r-plotrix)
8470 ("r-proxy" ,r-proxy)
8471 ("r-rsqlite" ,r-rsqlite)
8472 ("r-rtracklayer" ,r-rtracklayer)
8473 ("r-rmarkdown" ,r-rmarkdown)
8474 ("r-s4vectors" ,r-s4vectors)
8475 ("r-topgo" ,r-topgo)
8476 ("pandoc" ,ghc-pandoc)))
8477 (synopsis "RNA-centric annotation system")
8478 (description
8479 "RCAS aims to be a standalone RNA-centric annotation system that provides
8480 intuitive reports and publication-ready graphics. This package provides the R
8481 library implementing most of the pipeline's features.")
8482 (home-page "https://github.com/BIMSBbioinfo/RCAS")
8483 (license license:artistic2.0)))
8484
8485 (define-public rcas-web
8486 (package
8487 (name "rcas-web")
8488 (version "0.1.0")
8489 (source
8490 (origin
8491 (method url-fetch)
8492 (uri (string-append "https://github.com/BIMSBbioinfo/rcas-web/"
8493 "releases/download/v" version
8494 "/rcas-web-" version ".tar.gz"))
8495 (sha256
8496 (base32
8497 "0wq951aj45gqki1bickg876i993lmawkp8x24agg264br5x716db"))))
8498 (build-system gnu-build-system)
8499 (arguments
8500 `(#:phases
8501 (modify-phases %standard-phases
8502 (add-before 'configure 'find-RCAS
8503 ;; The configure script can't find non-1.3.x versions of RCAS because
8504 ;; its R expression ‘1.10.1 >= 1.3.4’ evaluates to false.
8505 (lambda _
8506 (substitute* "configure"
8507 (("1\\.3\\.4") "0.0.0"))
8508 #t))
8509 (add-after 'install 'wrap-executable
8510 (lambda* (#:key inputs outputs #:allow-other-keys)
8511 (let* ((out (assoc-ref outputs "out"))
8512 (json (assoc-ref inputs "guile-json"))
8513 (redis (assoc-ref inputs "guile-redis"))
8514 (path (string-append
8515 json "/share/guile/site/2.2:"
8516 redis "/share/guile/site/2.2")))
8517 (wrap-program (string-append out "/bin/rcas-web")
8518 `("GUILE_LOAD_PATH" ":" = (,path))
8519 `("GUILE_LOAD_COMPILED_PATH" ":" = (,path))
8520 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
8521 #t)))))
8522 (inputs
8523 `(("r-minimal" ,r-minimal)
8524 ("r-rcas" ,r-rcas)
8525 ("guile-next" ,guile-2.2)
8526 ("guile-json" ,guile-json-1)
8527 ("guile-redis" ,guile-redis)))
8528 (native-inputs
8529 `(("pkg-config" ,pkg-config)))
8530 (home-page "https://github.com/BIMSBbioinfo/rcas-web")
8531 (synopsis "Web interface for RNA-centric annotation system (RCAS)")
8532 (description "This package provides a simple web interface for the
8533 @dfn{RNA-centric annotation system} (RCAS).")
8534 (license license:agpl3+)))
8535
8536 (define-public r-mutationalpatterns
8537 (package
8538 (name "r-mutationalpatterns")
8539 (version "1.12.0")
8540 (source
8541 (origin
8542 (method url-fetch)
8543 (uri (bioconductor-uri "MutationalPatterns" version))
8544 (sha256
8545 (base32
8546 "08715l6swrlccviw7932v5hyrd2x4c6049vy9qnxk0lw3sp1zvsf"))))
8547 (build-system r-build-system)
8548 (propagated-inputs
8549 `(("r-biocgenerics" ,r-biocgenerics)
8550 ("r-biostrings" ,r-biostrings)
8551 ;; These two packages are suggested packages
8552 ("r-bsgenome-hsapiens-1000g" ,r-bsgenome-hsapiens-1000genomes-hs37d5)
8553 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8554 ("r-genomicranges" ,r-genomicranges)
8555 ("r-genomeinfodb" ,r-genomeinfodb)
8556 ("r-ggplot2" ,r-ggplot2)
8557 ("r-iranges" ,r-iranges)
8558 ("r-nmf" ,r-nmf)
8559 ("r-plyr" ,r-plyr)
8560 ("r-pracma" ,r-pracma)
8561 ("r-reshape2" ,r-reshape2)
8562 ("r-cowplot" ,r-cowplot)
8563 ("r-ggdendro" ,r-ggdendro)
8564 ("r-s4vectors" ,r-s4vectors)
8565 ("r-summarizedexperiment" ,r-summarizedexperiment)
8566 ("r-variantannotation" ,r-variantannotation)))
8567 (home-page "https://bioconductor.org/packages/MutationalPatterns/")
8568 (synopsis "Extract and visualize mutational patterns in genomic data")
8569 (description "This package provides an extensive toolset for the
8570 characterization and visualization of a wide range of mutational patterns
8571 in SNV base substitution data.")
8572 (license license:expat)))
8573
8574 (define-public r-chipkernels
8575 (let ((commit "c9cfcacb626b1221094fb3490ea7bac0fd625372")
8576 (revision "1"))
8577 (package
8578 (name "r-chipkernels")
8579 (version (string-append "1.1-" revision "." (string-take commit 9)))
8580 (source
8581 (origin
8582 (method git-fetch)
8583 (uri (git-reference
8584 (url "https://github.com/ManuSetty/ChIPKernels.git")
8585 (commit commit)))
8586 (file-name (string-append name "-" version))
8587 (sha256
8588 (base32
8589 "14bj5qhjm1hsm9ay561nfbqi9wxsa7y487df2idsaaf6z10nw4v0"))))
8590 (build-system r-build-system)
8591 (propagated-inputs
8592 `(("r-iranges" ,r-iranges)
8593 ("r-xvector" ,r-xvector)
8594 ("r-biostrings" ,r-biostrings)
8595 ("r-bsgenome" ,r-bsgenome)
8596 ("r-gtools" ,r-gtools)
8597 ("r-genomicranges" ,r-genomicranges)
8598 ("r-sfsmisc" ,r-sfsmisc)
8599 ("r-kernlab" ,r-kernlab)
8600 ("r-s4vectors" ,r-s4vectors)
8601 ("r-biocgenerics" ,r-biocgenerics)))
8602 (home-page "https://github.com/ManuSetty/ChIPKernels")
8603 (synopsis "Build string kernels for DNA Sequence analysis")
8604 (description "ChIPKernels is an R package for building different string
8605 kernels used for DNA Sequence analysis. A dictionary of the desired kernel
8606 must be built and this dictionary can be used for determining kernels for DNA
8607 Sequences.")
8608 (license license:gpl2+))))
8609
8610 (define-public r-seqgl
8611 (package
8612 (name "r-seqgl")
8613 (version "1.1.4")
8614 (source
8615 (origin
8616 (method git-fetch)
8617 (uri (git-reference
8618 (url "https://github.com/ManuSetty/SeqGL.git")
8619 (commit version)))
8620 (file-name (git-file-name name version))
8621 (sha256
8622 (base32
8623 "1r6ywvhxl3ffv48lgj7sbd582mcc6dha3ksgc2qjlvjrnkbj3799"))))
8624 (build-system r-build-system)
8625 (propagated-inputs
8626 `(("r-biostrings" ,r-biostrings)
8627 ("r-chipkernels" ,r-chipkernels)
8628 ("r-genomicranges" ,r-genomicranges)
8629 ("r-spams" ,r-spams)
8630 ("r-wgcna" ,r-wgcna)
8631 ("r-fastcluster" ,r-fastcluster)))
8632 (home-page "https://github.com/ManuSetty/SeqGL")
8633 (synopsis "Group lasso for Dnase/ChIP-seq data")
8634 (description "SeqGL is a group lasso based algorithm to extract
8635 transcription factor sequence signals from ChIP, DNase and ATAC-seq profiles.
8636 This package presents a method which uses group lasso to discriminate between
8637 bound and non bound genomic regions to accurately identify transcription
8638 factors bound at the specific regions.")
8639 (license license:gpl2+)))
8640
8641 (define-public r-tximport
8642 (package
8643 (name "r-tximport")
8644 (version "1.14.0")
8645 (source (origin
8646 (method url-fetch)
8647 (uri (bioconductor-uri "tximport" version))
8648 (sha256
8649 (base32
8650 "09r23n2812q89by0r0cz2fx1gfnmn3jb3hwbg61m52bika82pakj"))))
8651 (build-system r-build-system)
8652 (home-page "https://bioconductor.org/packages/tximport")
8653 (synopsis "Import and summarize transcript-level estimates for gene-level analysis")
8654 (description
8655 "This package provides tools to import transcript-level abundance,
8656 estimated counts and transcript lengths, and to summarize them into matrices
8657 for use with downstream gene-level analysis packages. Average transcript
8658 length, weighted by sample-specific transcript abundance estimates, is
8659 provided as a matrix which can be used as an offset for different expression
8660 of gene-level counts.")
8661 (license license:gpl2+)))
8662
8663 (define-public r-rhdf5
8664 (package
8665 (name "r-rhdf5")
8666 (version "2.30.1")
8667 (source (origin
8668 (method url-fetch)
8669 (uri (bioconductor-uri "rhdf5" version))
8670 (sha256
8671 (base32
8672 "18pv74jj4wr1981r92ss10qkgf5g1b09dsbz3im3j70a4l5l0df0"))))
8673 (build-system r-build-system)
8674 (propagated-inputs
8675 `(("r-rhdf5lib" ,r-rhdf5lib)))
8676 (inputs
8677 `(("zlib" ,zlib)))
8678 (home-page "https://bioconductor.org/packages/rhdf5")
8679 (synopsis "HDF5 interface to R")
8680 (description
8681 "This R/Bioconductor package provides an interface between HDF5 and R.
8682 HDF5's main features are the ability to store and access very large and/or
8683 complex datasets and a wide variety of metadata on mass storage (disk) through
8684 a completely portable file format. The rhdf5 package is thus suited for the
8685 exchange of large and/or complex datasets between R and other software
8686 package, and for letting R applications work on datasets that are larger than
8687 the available RAM.")
8688 (license license:artistic2.0)))
8689
8690 (define-public r-annotationfilter
8691 (package
8692 (name "r-annotationfilter")
8693 (version "1.10.0")
8694 (source (origin
8695 (method url-fetch)
8696 (uri (bioconductor-uri "AnnotationFilter" version))
8697 (sha256
8698 (base32
8699 "1l9sxhlvnwn6327vgg02h11ppmqr2zr07ff8wmcng0i1jbqwa8q5"))))
8700 (properties
8701 `((upstream-name . "AnnotationFilter")))
8702 (build-system r-build-system)
8703 (propagated-inputs
8704 `(("r-genomicranges" ,r-genomicranges)
8705 ("r-lazyeval" ,r-lazyeval)))
8706 (home-page "https://github.com/Bioconductor/AnnotationFilter")
8707 (synopsis "Facilities for filtering Bioconductor annotation resources")
8708 (description
8709 "This package provides classes and other infrastructure to implement
8710 filters for manipulating Bioconductor annotation resources. The filters are
8711 used by @code{ensembldb}, @code{Organism.dplyr}, and other packages.")
8712 (license license:artistic2.0)))
8713
8714 (define-public emboss
8715 (package
8716 (name "emboss")
8717 (version "6.5.7")
8718 (source (origin
8719 (method url-fetch)
8720 (uri (string-append "ftp://emboss.open-bio.org/pub/EMBOSS/old/"
8721 (version-major+minor version) ".0/"
8722 "EMBOSS-" version ".tar.gz"))
8723 (sha256
8724 (base32
8725 "0vsmz96gc411yj2iyzdrsmg4l2n1nhgmp7vrgzlxx3xixv9xbf0q"))))
8726 (build-system gnu-build-system)
8727 (arguments
8728 `(#:configure-flags
8729 (list (string-append "--with-hpdf="
8730 (assoc-ref %build-inputs "libharu")))
8731 #:phases
8732 (modify-phases %standard-phases
8733 (add-after 'unpack 'fix-checks
8734 (lambda _
8735 ;; The PNGDRIVER tests check for the presence of libgd, libpng
8736 ;; and zlib, but assume that they are all found at the same
8737 ;; prefix.
8738 (substitute* "configure.in"
8739 (("CHECK_PNGDRIVER")
8740 "LIBS=\"$LIBS -lgd -lpng -lz -lm\"
8741 AC_DEFINE([PLD_png], [1], [Define to 1 if PNG support is available])
8742 AM_CONDITIONAL(AMPNG, true)"))
8743 #t))
8744 (add-after 'fix-checks 'disable-update-check
8745 (lambda _
8746 ;; At build time there is no connection to the Internet, so
8747 ;; looking for updates will not work.
8748 (substitute* "Makefile.am"
8749 (("\\$\\(bindir\\)/embossupdate") ""))
8750 #t))
8751 (add-after 'disable-update-check 'autogen
8752 (lambda _ (invoke "autoreconf" "-vif") #t)))))
8753 (inputs
8754 `(("perl" ,perl)
8755 ("libpng" ,libpng)
8756 ("gd" ,gd)
8757 ("libx11" ,libx11)
8758 ("libharu" ,libharu)
8759 ("zlib" ,zlib)))
8760 (native-inputs
8761 `(("autoconf" ,autoconf)
8762 ("automake" ,automake)
8763 ("libtool" ,libtool)
8764 ("pkg-config" ,pkg-config)))
8765 (home-page "http://emboss.sourceforge.net")
8766 (synopsis "Molecular biology analysis suite")
8767 (description "EMBOSS is the \"European Molecular Biology Open Software
8768 Suite\". EMBOSS is an analysis package specially developed for the needs of
8769 the molecular biology (e.g. EMBnet) user community. The software
8770 automatically copes with data in a variety of formats and even allows
8771 transparent retrieval of sequence data from the web. It also provides a
8772 number of libraries for the development of software in the field of molecular
8773 biology. EMBOSS also integrates a range of currently available packages and
8774 tools for sequence analysis into a seamless whole.")
8775 (license license:gpl2+)))
8776
8777 (define-public bits
8778 (let ((revision "1")
8779 (commit "3cc4567896d9d6442923da944beb704750a08d2d"))
8780 (package
8781 (name "bits")
8782 ;; The version is 2.13.0 even though no release archives have been
8783 ;; published as yet.
8784 (version (string-append "2.13.0-" revision "." (string-take commit 9)))
8785 (source (origin
8786 (method git-fetch)
8787 (uri (git-reference
8788 (url "https://github.com/arq5x/bits.git")
8789 (commit commit)))
8790 (file-name (string-append name "-" version "-checkout"))
8791 (sha256
8792 (base32
8793 "17n2kffk4kmhivd8c98g2vr6y1s23vbg4sxlxs689wni66797hbs"))))
8794 (build-system gnu-build-system)
8795 (arguments
8796 `(#:tests? #f ;no tests included
8797 #:phases
8798 (modify-phases %standard-phases
8799 (delete 'configure)
8800 (add-after 'unpack 'remove-cuda
8801 (lambda _
8802 (substitute* "Makefile"
8803 ((".*_cuda") "")
8804 (("(bits_test_intersections) \\\\" _ match) match))
8805 #t))
8806 (replace 'install
8807 (lambda* (#:key outputs #:allow-other-keys)
8808 (copy-recursively
8809 "bin" (string-append (assoc-ref outputs "out") "/bin"))
8810 #t)))))
8811 (inputs
8812 `(("gsl" ,gsl)
8813 ("zlib" ,zlib)))
8814 (home-page "https://github.com/arq5x/bits")
8815 (synopsis "Implementation of binary interval search algorithm")
8816 (description "This package provides an implementation of the
8817 BITS (Binary Interval Search) algorithm, an approach to interval set
8818 intersection. It is especially suited for the comparison of diverse genomic
8819 datasets and the exploration of large datasets of genome
8820 intervals (e.g. genes, sequence alignments).")
8821 (license license:gpl2))))
8822
8823 (define-public piranha
8824 ;; There is no release tarball for the latest version. The latest commit is
8825 ;; older than one year at the time of this writing.
8826 (let ((revision "1")
8827 (commit "0466d364b71117d01e4471b74c514436cc281233"))
8828 (package
8829 (name "piranha")
8830 (version (string-append "1.2.1-" revision "." (string-take commit 9)))
8831 (source (origin
8832 (method git-fetch)
8833 (uri (git-reference
8834 (url "https://github.com/smithlabcode/piranha.git")
8835 (commit commit)))
8836 (file-name (git-file-name name version))
8837 (sha256
8838 (base32
8839 "117dc0zf20c61jam69sk4abl57ah6yi6i7qra7d7y5zrbgk12q5n"))))
8840 (build-system gnu-build-system)
8841 (arguments
8842 `(#:test-target "test"
8843 #:phases
8844 (modify-phases %standard-phases
8845 (add-after 'unpack 'copy-smithlab-cpp
8846 (lambda* (#:key inputs #:allow-other-keys)
8847 (for-each (lambda (file)
8848 (install-file file "./src/smithlab_cpp/"))
8849 (find-files (assoc-ref inputs "smithlab-cpp")))
8850 #t))
8851 (add-after 'install 'install-to-store
8852 (lambda* (#:key outputs #:allow-other-keys)
8853 (let* ((out (assoc-ref outputs "out"))
8854 (bin (string-append out "/bin")))
8855 (for-each (lambda (file)
8856 (install-file file bin))
8857 (find-files "bin" ".*")))
8858 #t)))
8859 #:configure-flags
8860 (list (string-append "--with-bam_tools_headers="
8861 (assoc-ref %build-inputs "bamtools") "/include/bamtools")
8862 (string-append "--with-bam_tools_library="
8863 (assoc-ref %build-inputs "bamtools") "/lib/bamtools"))))
8864 (inputs
8865 `(("bamtools" ,bamtools)
8866 ("samtools" ,samtools-0.1)
8867 ("gsl" ,gsl)
8868 ("smithlab-cpp"
8869 ,(let ((commit "3723e2db438c51501d0423429ff396c3035ba46a"))
8870 (origin
8871 (method git-fetch)
8872 (uri (git-reference
8873 (url "https://github.com/smithlabcode/smithlab_cpp.git")
8874 (commit commit)))
8875 (file-name (string-append "smithlab_cpp-" commit "-checkout"))
8876 (sha256
8877 (base32
8878 "0l4gvbwslw5ngziskja41c00x1r06l3yidv7y0xw9djibhykzy0g")))))))
8879 (native-inputs
8880 `(("python" ,python-2)))
8881 (home-page "https://github.com/smithlabcode/piranha")
8882 (synopsis "Peak-caller for CLIP-seq and RIP-seq data")
8883 (description
8884 "Piranha is a peak-caller for genomic data produced by CLIP-seq and
8885 RIP-seq experiments. It takes input in BED or BAM format and identifies
8886 regions of statistically significant read enrichment. Additional covariates
8887 may optionally be provided to further inform the peak-calling process.")
8888 (license license:gpl3+))))
8889
8890 (define-public pepr
8891 (package
8892 (name "pepr")
8893 (version "1.0.9")
8894 (source (origin
8895 (method url-fetch)
8896 (uri (pypi-uri "PePr" version))
8897 (sha256
8898 (base32
8899 "0qxjfdpl1b1y53nccws2d85f6k74zwmx8y8sd9rszcqhfayx6gdx"))))
8900 (build-system python-build-system)
8901 (arguments
8902 `(#:python ,python-2 ; python2 only
8903 #:tests? #f)) ; no tests included
8904 (propagated-inputs
8905 `(("python2-numpy" ,python2-numpy)
8906 ("python2-scipy" ,python2-scipy)
8907 ("python2-pysam" ,python2-pysam)))
8908 (home-page "https://github.com/shawnzhangyx/PePr")
8909 (synopsis "Peak-calling and prioritization pipeline for ChIP-Seq data")
8910 (description
8911 "PePr is a ChIP-Seq peak calling or differential binding analysis tool
8912 that is primarily designed for data with biological replicates. It uses a
8913 negative binomial distribution to model the read counts among the samples in
8914 the same group, and look for consistent differences between ChIP and control
8915 group or two ChIP groups run under different conditions.")
8916 (license license:gpl3+)))
8917
8918 (define-public filevercmp
8919 (let ((commit "1a9b779b93d0b244040274794d402106907b71b7"))
8920 (package
8921 (name "filevercmp")
8922 (version (string-append "0-1." (string-take commit 7)))
8923 (source (origin
8924 (method git-fetch)
8925 (uri (git-reference
8926 (url "https://github.com/ekg/filevercmp.git")
8927 (commit commit)))
8928 (file-name (git-file-name name commit))
8929 (sha256
8930 (base32
8931 "1j9vxsy0y050v59h0q1d6501fcw1kjvj0d18l1xk2zyg0jzj247c"))))
8932 (build-system gnu-build-system)
8933 (arguments
8934 `(#:tests? #f ; There are no tests to run.
8935 #:phases
8936 (modify-phases %standard-phases
8937 (delete 'configure) ; There is no configure phase.
8938 (replace 'install
8939 (lambda* (#:key outputs #:allow-other-keys)
8940 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
8941 (install-file "filevercmp" bin)
8942 #t))))))
8943 (home-page "https://github.com/ekg/filevercmp")
8944 (synopsis "This program compares version strings")
8945 (description "This program compares version strings. It intends to be a
8946 replacement for strverscmp.")
8947 (license license:gpl3+))))
8948
8949 (define-public multiqc
8950 (package
8951 (name "multiqc")
8952 (version "1.5")
8953 (source
8954 (origin
8955 (method url-fetch)
8956 (uri (pypi-uri "multiqc" version))
8957 (sha256
8958 (base32
8959 "02iihfl0w0hpnr4pa0sbd1y9qxrg3ycyhjp5lidkcrqh1lmzs3zy"))))
8960 (build-system python-build-system)
8961 (propagated-inputs
8962 `(("python-jinja2" ,python-jinja2)
8963 ("python-simplejson" ,python-simplejson)
8964 ("python-pyyaml" ,python-pyyaml)
8965 ("python-click" ,python-click)
8966 ("python-spectra" ,python-spectra)
8967 ("python-requests" ,python-requests)
8968 ("python-markdown" ,python-markdown)
8969 ("python-lzstring" ,python-lzstring)
8970 ("python-matplotlib" ,python-matplotlib)
8971 ("python-numpy" ,python-numpy)
8972 ;; MultQC checks for the presence of nose at runtime.
8973 ("python-nose" ,python-nose)))
8974 (arguments
8975 `(#:phases
8976 (modify-phases %standard-phases
8977 (add-after 'unpack 'relax-requirements
8978 (lambda _
8979 (substitute* "setup.py"
8980 ;; MultiQC 1.5 ‘requires’ a version of python-matplotlib older
8981 ;; than the one in Guix, but should work fine with 2.2.2.
8982 ;; See <https://github.com/ewels/MultiQC/issues/725> and
8983 ;; <https://github.com/ewels/MultiQC/issues/732> for details.
8984 (("['\"]matplotlib.*?['\"]")
8985 "'matplotlib'"))
8986 #t)))))
8987 (home-page "https://multiqc.info")
8988 (synopsis "Aggregate bioinformatics analysis reports")
8989 (description
8990 "MultiQC is a tool to aggregate bioinformatics results across many
8991 samples into a single report. It contains modules for a large number of
8992 common bioinformatics tools.")
8993 (license license:gpl3+)))
8994
8995 (define-public r-chipseq
8996 (package
8997 (name "r-chipseq")
8998 (version "1.36.0")
8999 (source
9000 (origin
9001 (method url-fetch)
9002 (uri (bioconductor-uri "chipseq" version))
9003 (sha256
9004 (base32
9005 "1ln6bn08xig3j6ryak1xfkjhvpnlm2vf1czz9hlj6f02299nbs6l"))))
9006 (build-system r-build-system)
9007 (propagated-inputs
9008 `(("r-biocgenerics" ,r-biocgenerics)
9009 ("r-genomicranges" ,r-genomicranges)
9010 ("r-iranges" ,r-iranges)
9011 ("r-lattice" ,r-lattice)
9012 ("r-s4vectors" ,r-s4vectors)
9013 ("r-shortread" ,r-shortread)))
9014 (home-page "https://bioconductor.org/packages/chipseq")
9015 (synopsis "Package for analyzing ChIPseq data")
9016 (description
9017 "This package provides tools for processing short read data from ChIPseq
9018 experiments.")
9019 (license license:artistic2.0)))
9020
9021 (define-public r-copyhelper
9022 (package
9023 (name "r-copyhelper")
9024 (version "1.6.0")
9025 (source
9026 (origin
9027 (method url-fetch)
9028 (uri (string-append "https://bioconductor.org/packages/release/"
9029 "data/experiment/src/contrib/CopyhelpeR_"
9030 version ".tar.gz"))
9031 (sha256
9032 (base32
9033 "0x7cyynjmxls9as2gg0iyp9x5fpalxmdjq914ss7i84i9zyk5bhq"))))
9034 (properties `((upstream-name . "CopyhelpeR")))
9035 (build-system r-build-system)
9036 (home-page "https://bioconductor.org/packages/CopyhelpeR/")
9037 (synopsis "Helper files for CopywriteR")
9038 (description
9039 "This package contains the helper files that are required to run the
9040 Bioconductor package CopywriteR. It contains pre-assembled 1kb bin GC-content
9041 and mappability files for the reference genomes hg18, hg19, hg38, mm9 and
9042 mm10. In addition, it contains a blacklist filter to remove regions that
9043 display copy number variation. Files are stored as GRanges objects from the
9044 GenomicRanges Bioconductor package.")
9045 (license license:gpl2)))
9046
9047 (define-public r-copywriter
9048 (package
9049 (name "r-copywriter")
9050 (version "2.18.0")
9051 (source
9052 (origin
9053 (method url-fetch)
9054 (uri (bioconductor-uri "CopywriteR" version))
9055 (sha256
9056 (base32
9057 "0llg1zpxg7qnvja5f5w1z1xic0jdg6zc4mfn97h2sm44skxxcyl1"))))
9058 (properties `((upstream-name . "CopywriteR")))
9059 (build-system r-build-system)
9060 (propagated-inputs
9061 `(("r-biocparallel" ,r-biocparallel)
9062 ("r-chipseq" ,r-chipseq)
9063 ("r-copyhelper" ,r-copyhelper)
9064 ("r-data-table" ,r-data-table)
9065 ("r-dnacopy" ,r-dnacopy)
9066 ("r-futile-logger" ,r-futile-logger)
9067 ("r-genomeinfodb" ,r-genomeinfodb)
9068 ("r-genomicalignments" ,r-genomicalignments)
9069 ("r-genomicranges" ,r-genomicranges)
9070 ("r-gtools" ,r-gtools)
9071 ("r-iranges" ,r-iranges)
9072 ("r-matrixstats" ,r-matrixstats)
9073 ("r-rsamtools" ,r-rsamtools)
9074 ("r-s4vectors" ,r-s4vectors)))
9075 (home-page "https://github.com/PeeperLab/CopywriteR")
9076 (synopsis "Copy number information from targeted sequencing")
9077 (description
9078 "CopywriteR extracts DNA copy number information from targeted sequencing
9079 by utilizing off-target reads. It allows for extracting uniformly distributed
9080 copy number information, can be used without reference, and can be applied to
9081 sequencing data obtained from various techniques including chromatin
9082 immunoprecipitation and target enrichment on small gene panels. Thereby,
9083 CopywriteR constitutes a widely applicable alternative to available copy
9084 number detection tools.")
9085 (license license:gpl2)))
9086
9087 (define-public r-methylkit
9088 (package
9089 (name "r-methylkit")
9090 (version "1.12.0")
9091 (source (origin
9092 (method url-fetch)
9093 (uri (bioconductor-uri "methylKit" version))
9094 (sha256
9095 (base32
9096 "0klwc0sbmrxj1lxbz16pl39rxjm0pi57gjw547hlgnac1p9fspzy"))))
9097 (properties `((upstream-name . "methylKit")))
9098 (build-system r-build-system)
9099 (propagated-inputs
9100 `(("r-data-table" ,r-data-table)
9101 ("r-emdbook" ,r-emdbook)
9102 ("r-fastseg" ,r-fastseg)
9103 ("r-genomeinfodb" ,r-genomeinfodb)
9104 ("r-genomicranges" ,r-genomicranges)
9105 ("r-gtools" ,r-gtools)
9106 ("r-iranges" ,r-iranges)
9107 ("r-kernsmooth" ,r-kernsmooth)
9108 ("r-limma" ,r-limma)
9109 ("r-mclust" ,r-mclust)
9110 ("r-mgcv" ,r-mgcv)
9111 ("r-qvalue" ,r-qvalue)
9112 ("r-r-utils" ,r-r-utils)
9113 ("r-rcpp" ,r-rcpp)
9114 ("r-rhtslib" ,r-rhtslib)
9115 ("r-rsamtools" ,r-rsamtools)
9116 ("r-rtracklayer" ,r-rtracklayer)
9117 ("r-s4vectors" ,r-s4vectors)
9118 ("r-zlibbioc" ,r-zlibbioc)))
9119 (native-inputs
9120 `(("r-knitr" ,r-knitr))) ; for vignettes
9121 (inputs
9122 `(("zlib" ,zlib)))
9123 (home-page "https://github.com/al2na/methylKit")
9124 (synopsis
9125 "DNA methylation analysis from high-throughput bisulfite sequencing results")
9126 (description
9127 "MethylKit is an R package for DNA methylation analysis and annotation
9128 from high-throughput bisulfite sequencing. The package is designed to deal
9129 with sequencing data from @dfn{Reduced representation bisulfite
9130 sequencing} (RRBS) and its variants, but also target-capture methods and whole
9131 genome bisulfite sequencing. It also has functions to analyze base-pair
9132 resolution 5hmC data from experimental protocols such as oxBS-Seq and
9133 TAB-Seq.")
9134 (license license:artistic2.0)))
9135
9136 (define-public r-sva
9137 (package
9138 (name "r-sva")
9139 (version "3.34.0")
9140 (source
9141 (origin
9142 (method url-fetch)
9143 (uri (bioconductor-uri "sva" version))
9144 (sha256
9145 (base32
9146 "1bzms6idx30s4nxl610zwa8rjxsyxb5pf3vxsdfmxg8j4pab9gh1"))))
9147 (build-system r-build-system)
9148 (propagated-inputs
9149 `(("r-genefilter" ,r-genefilter)
9150 ("r-mgcv" ,r-mgcv)
9151 ("r-biocparallel" ,r-biocparallel)
9152 ("r-matrixstats" ,r-matrixstats)
9153 ("r-limma" ,r-limma)))
9154 (home-page "https://bioconductor.org/packages/sva")
9155 (synopsis "Surrogate variable analysis")
9156 (description
9157 "This package contains functions for removing batch effects and other
9158 unwanted variation in high-throughput experiment. It also contains functions
9159 for identifying and building surrogate variables for high-dimensional data
9160 sets. Surrogate variables are covariates constructed directly from
9161 high-dimensional data like gene expression/RNA sequencing/methylation/brain
9162 imaging data that can be used in subsequent analyses to adjust for unknown,
9163 unmodeled, or latent sources of noise.")
9164 (license license:artistic2.0)))
9165
9166 (define-public r-seqminer
9167 (package
9168 (name "r-seqminer")
9169 (version "8.0")
9170 (source
9171 (origin
9172 (method url-fetch)
9173 (uri (cran-uri "seqminer" version))
9174 (sha256
9175 (base32
9176 "00jzj8mwb0zaiwlifd41b26mrq9mzigj18nc29dydi0r42hxg16i"))))
9177 (build-system r-build-system)
9178 (inputs
9179 `(("zlib" ,zlib)))
9180 (home-page "http://seqminer.genomic.codes")
9181 (synopsis "Read nucleotide sequence data (VCF, BCF, and METAL formats)")
9182 (description
9183 "This package provides tools to integrate nucleotide sequencing
9184 data (variant call format, e.g. VCF or BCF) or meta-analysis results in R.")
9185 ;; Any version of the GPL is acceptable
9186 (license (list license:gpl2+ license:gpl3+))))
9187
9188 (define-public r-raremetals2
9189 (package
9190 (name "r-raremetals2")
9191 (version "0.1")
9192 (source
9193 (origin
9194 (method url-fetch)
9195 (uri (string-append "http://genome.sph.umich.edu/w/images/"
9196 "b/b7/RareMETALS2_" version ".tar.gz"))
9197 (sha256
9198 (base32
9199 "0z5ljcgvnm06ja9lm85a3cniq7slxcy37aqqkxrdidr79an5fs4s"))))
9200 (properties `((upstream-name . "RareMETALS2")))
9201 (build-system r-build-system)
9202 (propagated-inputs
9203 `(("r-seqminer" ,r-seqminer)
9204 ("r-mvtnorm" ,r-mvtnorm)
9205 ("r-mass" ,r-mass)
9206 ("r-compquadform" ,r-compquadform)
9207 ("r-getopt" ,r-getopt)))
9208 (home-page "http://genome.sph.umich.edu/wiki/RareMETALS2")
9209 (synopsis "Analyze gene-level association tests for binary trait")
9210 (description
9211 "The R package rareMETALS2 is an extension of the R package rareMETALS.
9212 It was designed to meta-analyze gene-level association tests for binary trait.
9213 While rareMETALS offers a near-complete solution for meta-analysis of
9214 gene-level tests for quantitative trait, it does not offer the optimal
9215 solution for binary trait. The package rareMETALS2 offers improved features
9216 for analyzing gene-level association tests in meta-analyses for binary
9217 trait.")
9218 (license license:gpl3)))
9219
9220 (define-public r-maldiquant
9221 (package
9222 (name "r-maldiquant")
9223 (version "1.19.3")
9224 (source
9225 (origin
9226 (method url-fetch)
9227 (uri (cran-uri "MALDIquant" version))
9228 (sha256
9229 (base32
9230 "0b7kdz3x4sdq413h1q09l1qhcvdnnwv6fqsqwllks1cd3xy34c57"))))
9231 (properties `((upstream-name . "MALDIquant")))
9232 (build-system r-build-system)
9233 (home-page "https://cran.r-project.org/web/packages/MALDIquant")
9234 (synopsis "Quantitative analysis of mass spectrometry data")
9235 (description
9236 "This package provides a complete analysis pipeline for matrix-assisted
9237 laser desorption/ionization-time-of-flight (MALDI-TOF) and other
9238 two-dimensional mass spectrometry data. In addition to commonly used plotting
9239 and processing methods it includes distinctive features, namely baseline
9240 subtraction methods such as morphological filters (TopHat) or the
9241 statistics-sensitive non-linear iterative peak-clipping algorithm (SNIP), peak
9242 alignment using warping functions, handling of replicated measurements as well
9243 as allowing spectra with different resolutions.")
9244 (license license:gpl3+)))
9245
9246 (define-public r-protgenerics
9247 (package
9248 (name "r-protgenerics")
9249 (version "1.18.0")
9250 (source
9251 (origin
9252 (method url-fetch)
9253 (uri (bioconductor-uri "ProtGenerics" version))
9254 (sha256
9255 (base32
9256 "1k1ggjgx2la8b21841a4ngkp6xfxwz0czv7x960r7i1jqif8y48z"))))
9257 (properties `((upstream-name . "ProtGenerics")))
9258 (build-system r-build-system)
9259 (home-page "https://github.com/lgatto/ProtGenerics")
9260 (synopsis "S4 generic functions for proteomics infrastructure")
9261 (description
9262 "This package provides S4 generic functions needed by Bioconductor
9263 proteomics packages.")
9264 (license license:artistic2.0)))
9265
9266 (define-public r-mzr
9267 (package
9268 (name "r-mzr")
9269 (version "2.20.0")
9270 (source
9271 (origin
9272 (method url-fetch)
9273 (uri (bioconductor-uri "mzR" version))
9274 (sha256
9275 (base32
9276 "1cwd7phlc5jbx6r6cznyfbdpvcin5fvsaasbbi65zn0s92a80r13"))
9277 (modules '((guix build utils)))
9278 (snippet
9279 '(begin
9280 (delete-file-recursively "src/boost")
9281 #t))))
9282 (properties `((upstream-name . "mzR")))
9283 (build-system r-build-system)
9284 (arguments
9285 `(#:phases
9286 (modify-phases %standard-phases
9287 (add-after 'unpack 'use-system-boost
9288 (lambda _
9289 (substitute* "src/Makevars"
9290 (("\\./boost/libs.*") "")
9291 (("ARCH_OBJS=" line)
9292 (string-append line
9293 "\nARCH_LIBS=-lboost_system -lboost_regex \
9294 -lboost_iostreams -lboost_thread -lboost_filesystem -lboost_chrono\n")))
9295 #t)))))
9296 (inputs
9297 `(;; Our default boost package won't work here, unfortunately, even with
9298 ;; mzR version 2.20.0.
9299 ("boost" ,boost-for-mysql) ; use this instead of the bundled boost sources
9300 ("zlib" ,zlib)))
9301 (propagated-inputs
9302 `(("r-biobase" ,r-biobase)
9303 ("r-biocgenerics" ,r-biocgenerics)
9304 ("r-ncdf4" ,r-ncdf4)
9305 ("r-protgenerics" ,r-protgenerics)
9306 ("r-rcpp" ,r-rcpp)
9307 ("r-rhdf5lib" ,r-rhdf5lib)
9308 ("r-zlibbioc" ,r-zlibbioc)))
9309 (home-page "https://github.com/sneumann/mzR/")
9310 (synopsis "Parser for mass spectrometry data files")
9311 (description
9312 "The mzR package provides a unified API to the common file formats and
9313 parsers available for mass spectrometry data. It comes with a wrapper for the
9314 ISB random access parser for mass spectrometry mzXML, mzData and mzML files.
9315 The package contains the original code written by the ISB, and a subset of the
9316 proteowizard library for mzML and mzIdentML. The netCDF reading code has
9317 previously been used in XCMS.")
9318 (license license:artistic2.0)))
9319
9320 (define-public r-affyio
9321 (package
9322 (name "r-affyio")
9323 (version "1.56.0")
9324 (source
9325 (origin
9326 (method url-fetch)
9327 (uri (bioconductor-uri "affyio" version))
9328 (sha256
9329 (base32
9330 "0sbkadxdlx7qzxc8z8iv90y6j9b2f62mk3i54dijjh56x3hjy3hb"))))
9331 (build-system r-build-system)
9332 (propagated-inputs
9333 `(("r-zlibbioc" ,r-zlibbioc)))
9334 (inputs
9335 `(("zlib" ,zlib)))
9336 (home-page "https://github.com/bmbolstad/affyio")
9337 (synopsis "Tools for parsing Affymetrix data files")
9338 (description
9339 "This package provides routines for parsing Affymetrix data files based
9340 upon file format information. The primary focus is on accessing the CEL and
9341 CDF file formats.")
9342 (license license:lgpl2.0+)))
9343
9344 (define-public r-affy
9345 (package
9346 (name "r-affy")
9347 (version "1.64.0")
9348 (source
9349 (origin
9350 (method url-fetch)
9351 (uri (bioconductor-uri "affy" version))
9352 (sha256
9353 (base32
9354 "131za66wbaz9y86gvjqcc2yd1f2ngl2b796xw726g75djhdgxgap"))))
9355 (build-system r-build-system)
9356 (propagated-inputs
9357 `(("r-affyio" ,r-affyio)
9358 ("r-biobase" ,r-biobase)
9359 ("r-biocgenerics" ,r-biocgenerics)
9360 ("r-biocmanager" ,r-biocmanager)
9361 ("r-preprocesscore" ,r-preprocesscore)
9362 ("r-zlibbioc" ,r-zlibbioc)))
9363 (inputs
9364 `(("zlib" ,zlib)))
9365 (home-page "https://bioconductor.org/packages/affy")
9366 (synopsis "Methods for affymetrix oligonucleotide arrays")
9367 (description
9368 "This package contains functions for exploratory oligonucleotide array
9369 analysis.")
9370 (license license:lgpl2.0+)))
9371
9372 (define-public r-vsn
9373 (package
9374 (name "r-vsn")
9375 (version "3.54.0")
9376 (source
9377 (origin
9378 (method url-fetch)
9379 (uri (bioconductor-uri "vsn" version))
9380 (sha256
9381 (base32
9382 "1naqzb2m0km8fzr6chf9z71sisrwviy1fdi9b3hn4i8p18b4kqzh"))))
9383 (build-system r-build-system)
9384 (propagated-inputs
9385 `(("r-affy" ,r-affy)
9386 ("r-biobase" ,r-biobase)
9387 ("r-ggplot2" ,r-ggplot2)
9388 ("r-lattice" ,r-lattice)
9389 ("r-limma" ,r-limma)))
9390 (native-inputs
9391 `(("r-knitr" ,r-knitr))) ; for vignettes
9392 (home-page "https://bioconductor.org/packages/release/bioc/html/vsn.html")
9393 (synopsis "Variance stabilization and calibration for microarray data")
9394 (description
9395 "The package implements a method for normalising microarray intensities,
9396 and works for single- and multiple-color arrays. It can also be used for data
9397 from other technologies, as long as they have similar format. The method uses
9398 a robust variant of the maximum-likelihood estimator for an
9399 additive-multiplicative error model and affine calibration. The model
9400 incorporates data calibration step (a.k.a. normalization), a model for the
9401 dependence of the variance on the mean intensity and a variance stabilizing
9402 data transformation. Differences between transformed intensities are
9403 analogous to \"normalized log-ratios\". However, in contrast to the latter,
9404 their variance is independent of the mean, and they are usually more sensitive
9405 and specific in detecting differential transcription.")
9406 (license license:artistic2.0)))
9407
9408 (define-public r-mzid
9409 (package
9410 (name "r-mzid")
9411 (version "1.24.0")
9412 (source
9413 (origin
9414 (method url-fetch)
9415 (uri (bioconductor-uri "mzID" version))
9416 (sha256
9417 (base32
9418 "1glcv096bn6pxlw89dlij1nzpwnjvrbxysvw2gm6qgm7rhxlaxrw"))))
9419 (properties `((upstream-name . "mzID")))
9420 (build-system r-build-system)
9421 (propagated-inputs
9422 `(("r-doparallel" ,r-doparallel)
9423 ("r-foreach" ,r-foreach)
9424 ("r-iterators" ,r-iterators)
9425 ("r-plyr" ,r-plyr)
9426 ("r-protgenerics" ,r-protgenerics)
9427 ("r-rcpp" ,r-rcpp)
9428 ("r-xml" ,r-xml)))
9429 (home-page "https://bioconductor.org/packages/mzID")
9430 (synopsis "Parser for mzIdentML files")
9431 (description
9432 "This package provides a parser for mzIdentML files implemented using the
9433 XML package. The parser tries to be general and able to handle all types of
9434 mzIdentML files with the drawback of having less pretty output than a vendor
9435 specific parser.")
9436 (license license:gpl2+)))
9437
9438 (define-public r-pcamethods
9439 (package
9440 (name "r-pcamethods")
9441 (version "1.78.0")
9442 (source
9443 (origin
9444 (method url-fetch)
9445 (uri (bioconductor-uri "pcaMethods" version))
9446 (sha256
9447 (base32
9448 "1wir67kfjm0m9gf0ki8qmvh45n4gx2k0wfl9pd1hp4g62fbrj1pj"))))
9449 (properties `((upstream-name . "pcaMethods")))
9450 (build-system r-build-system)
9451 (propagated-inputs
9452 `(("r-biobase" ,r-biobase)
9453 ("r-biocgenerics" ,r-biocgenerics)
9454 ("r-mass" ,r-mass)
9455 ("r-rcpp" ,r-rcpp)))
9456 (home-page "https://github.com/hredestig/pcamethods")
9457 (synopsis "Collection of PCA methods")
9458 (description
9459 "This package provides Bayesian PCA, Probabilistic PCA, Nipals PCA,
9460 Inverse Non-Linear PCA and the conventional SVD PCA. A cluster based method
9461 for missing value estimation is included for comparison. BPCA, PPCA and
9462 NipalsPCA may be used to perform PCA on incomplete data as well as for
9463 accurate missing value estimation. A set of methods for printing and plotting
9464 the results is also provided. All PCA methods make use of the same data
9465 structure (pcaRes) to provide a common interface to the PCA results.")
9466 (license license:gpl3+)))
9467
9468 (define-public r-msnbase
9469 (package
9470 (name "r-msnbase")
9471 (version "2.12.0")
9472 (source
9473 (origin
9474 (method url-fetch)
9475 (uri (bioconductor-uri "MSnbase" version))
9476 (sha256
9477 (base32
9478 "1z889xkfphqqmv31i8hh5xqyclv660ic26rfck5bjpgk3s2zzwi6"))))
9479 (properties `((upstream-name . "MSnbase")))
9480 (build-system r-build-system)
9481 (propagated-inputs
9482 `(("r-affy" ,r-affy)
9483 ("r-biobase" ,r-biobase)
9484 ("r-biocgenerics" ,r-biocgenerics)
9485 ("r-biocparallel" ,r-biocparallel)
9486 ("r-digest" ,r-digest)
9487 ("r-ggplot2" ,r-ggplot2)
9488 ("r-impute" ,r-impute)
9489 ("r-iranges" ,r-iranges)
9490 ("r-lattice" ,r-lattice)
9491 ("r-maldiquant" ,r-maldiquant)
9492 ("r-mass" ,r-mass)
9493 ("r-mzid" ,r-mzid)
9494 ("r-mzr" ,r-mzr)
9495 ("r-pcamethods" ,r-pcamethods)
9496 ("r-plyr" ,r-plyr)
9497 ("r-preprocesscore" ,r-preprocesscore)
9498 ("r-protgenerics" ,r-protgenerics)
9499 ("r-rcpp" ,r-rcpp)
9500 ("r-s4vectors" ,r-s4vectors)
9501 ("r-scales" ,r-scales)
9502 ("r-vsn" ,r-vsn)
9503 ("r-xml" ,r-xml)))
9504 (home-page "https://github.com/lgatto/MSnbase")
9505 (synopsis "Base functions and classes for MS-based proteomics")
9506 (description
9507 "This package provides basic plotting, data manipulation and processing
9508 of mass spectrometry based proteomics data.")
9509 (license license:artistic2.0)))
9510
9511 (define-public r-msnid
9512 (package
9513 (name "r-msnid")
9514 (version "1.20.0")
9515 (source
9516 (origin
9517 (method url-fetch)
9518 (uri (bioconductor-uri "MSnID" version))
9519 (sha256
9520 (base32
9521 "0m71f2y12hmwvng45kzz4r4qrgc2jbd7j9gprmw8y5laawpdaifg"))))
9522 (properties `((upstream-name . "MSnID")))
9523 (build-system r-build-system)
9524 (propagated-inputs
9525 `(("r-biobase" ,r-biobase)
9526 ("r-data-table" ,r-data-table)
9527 ("r-doparallel" ,r-doparallel)
9528 ("r-dplyr" ,r-dplyr)
9529 ("r-foreach" ,r-foreach)
9530 ("r-iterators" ,r-iterators)
9531 ("r-msnbase" ,r-msnbase)
9532 ("r-mzid" ,r-mzid)
9533 ("r-mzr" ,r-mzr)
9534 ("r-protgenerics" ,r-protgenerics)
9535 ("r-r-cache" ,r-r-cache)
9536 ("r-rcpp" ,r-rcpp)
9537 ("r-reshape2" ,r-reshape2)))
9538 (home-page "https://bioconductor.org/packages/MSnID")
9539 (synopsis "Utilities for LC-MSn proteomics identifications")
9540 (description
9541 "This package extracts @dfn{tandem mass spectrometry} (MS/MS) ID data
9542 from mzIdentML (leveraging the mzID package) or text files. After collating
9543 the search results from multiple datasets it assesses their identification
9544 quality and optimize filtering criteria to achieve the maximum number of
9545 identifications while not exceeding a specified false discovery rate. It also
9546 contains a number of utilities to explore the MS/MS results and assess missed
9547 and irregular enzymatic cleavages, mass measurement accuracy, etc.")
9548 (license license:artistic2.0)))
9549
9550 (define-public r-seurat
9551 (package
9552 (name "r-seurat")
9553 (version "3.1.4")
9554 (source (origin
9555 (method url-fetch)
9556 (uri (cran-uri "Seurat" version))
9557 (sha256
9558 (base32
9559 "0lhjbjhv1hnx5i3gkx41k68i8ykay3f24708h30wx9xywww9lsvi"))))
9560 (properties `((upstream-name . "Seurat")))
9561 (build-system r-build-system)
9562 (propagated-inputs
9563 `(("r-ape" ,r-ape)
9564 ("r-cluster" ,r-cluster)
9565 ("r-cowplot" ,r-cowplot)
9566 ("r-fitdistrplus" ,r-fitdistrplus)
9567 ("r-future" ,r-future)
9568 ("r-future-apply" ,r-future-apply)
9569 ("r-ggplot2" ,r-ggplot2)
9570 ("r-ggrepel" ,r-ggrepel)
9571 ("r-ggridges" ,r-ggridges)
9572 ("r-httr" ,r-httr)
9573 ("r-ica" ,r-ica)
9574 ("r-igraph" ,r-igraph)
9575 ("r-irlba" ,r-irlba)
9576 ("r-kernsmooth" ,r-kernsmooth)
9577 ("r-leiden" ,r-leiden)
9578 ("r-lmtest" ,r-lmtest)
9579 ("r-mass" ,r-mass)
9580 ("r-matrix" ,r-matrix)
9581 ("r-metap" ,r-metap)
9582 ("r-patchwork" ,r-patchwork)
9583 ("r-pbapply" ,r-pbapply)
9584 ("r-plotly" ,r-plotly)
9585 ("r-png" ,r-png)
9586 ("r-rann" ,r-rann)
9587 ("r-rcolorbrewer" ,r-rcolorbrewer)
9588 ("r-rcpp" ,r-rcpp)
9589 ("r-rcppannoy" ,r-rcppannoy)
9590 ("r-rcppeigen" ,r-rcppeigen)
9591 ("r-rcppprogress" ,r-rcppprogress)
9592 ("r-reticulate" ,r-reticulate)
9593 ("r-rlang" ,r-rlang)
9594 ("r-rocr" ,r-rocr)
9595 ("r-rsvd" ,r-rsvd)
9596 ("r-rtsne" ,r-rtsne)
9597 ("r-scales" ,r-scales)
9598 ("r-sctransform" ,r-sctransform)
9599 ("r-tsne" ,r-tsne)
9600 ("r-uwot" ,r-uwot)))
9601 (home-page "http://www.satijalab.org/seurat")
9602 (synopsis "Seurat is an R toolkit for single cell genomics")
9603 (description
9604 "This package is an R package designed for QC, analysis, and
9605 exploration of single cell RNA-seq data. It easily enables widely-used
9606 analytical techniques, including the identification of highly variable genes,
9607 dimensionality reduction; PCA, ICA, t-SNE, standard unsupervised clustering
9608 algorithms; density clustering, hierarchical clustering, k-means, and the
9609 discovery of differentially expressed genes and markers.")
9610 (license license:gpl3)))
9611
9612 (define-public r-aroma-light
9613 (package
9614 (name "r-aroma-light")
9615 (version "3.16.0")
9616 (source
9617 (origin
9618 (method url-fetch)
9619 (uri (bioconductor-uri "aroma.light" version))
9620 (sha256
9621 (base32
9622 "0cgdg650j4dl0b45pwaw49ib97dwjazrv9sqzkygrjmcnnfxry8x"))))
9623 (properties `((upstream-name . "aroma.light")))
9624 (build-system r-build-system)
9625 (propagated-inputs
9626 `(("r-matrixstats" ,r-matrixstats)
9627 ("r-r-methodss3" ,r-r-methodss3)
9628 ("r-r-oo" ,r-r-oo)
9629 ("r-r-utils" ,r-r-utils)))
9630 (home-page "https://github.com/HenrikBengtsson/aroma.light")
9631 (synopsis "Methods for normalization and visualization of microarray data")
9632 (description
9633 "This package provides methods for microarray analysis that take basic
9634 data types such as matrices and lists of vectors. These methods can be used
9635 standalone, be utilized in other packages, or be wrapped up in higher-level
9636 classes.")
9637 (license license:gpl2+)))
9638
9639 (define-public r-deseq
9640 (package
9641 (name "r-deseq")
9642 (version "1.38.0")
9643 (source
9644 (origin
9645 (method url-fetch)
9646 (uri (bioconductor-uri "DESeq" version))
9647 (sha256
9648 (base32
9649 "14pys93gsl50xmq5pc7pp1g20v3ywlg0yzkkhwb3kiy8573xn9nc"))))
9650 (properties `((upstream-name . "DESeq")))
9651 (build-system r-build-system)
9652 (propagated-inputs
9653 `(("r-biobase" ,r-biobase)
9654 ("r-biocgenerics" ,r-biocgenerics)
9655 ("r-genefilter" ,r-genefilter)
9656 ("r-geneplotter" ,r-geneplotter)
9657 ("r-lattice" ,r-lattice)
9658 ("r-locfit" ,r-locfit)
9659 ("r-mass" ,r-mass)
9660 ("r-rcolorbrewer" ,r-rcolorbrewer)))
9661 (home-page "https://www-huber.embl.de/users/anders/DESeq/")
9662 (synopsis "Differential gene expression analysis")
9663 (description
9664 "This package provides tools for estimating variance-mean dependence in
9665 count data from high-throughput genetic sequencing assays and for testing for
9666 differential expression based on a model using the negative binomial
9667 distribution.")
9668 (license license:gpl3+)))
9669
9670 (define-public r-edaseq
9671 (package
9672 (name "r-edaseq")
9673 (version "2.20.0")
9674 (source
9675 (origin
9676 (method url-fetch)
9677 (uri (bioconductor-uri "EDASeq" version))
9678 (sha256
9679 (base32
9680 "19mgzbv8yxgvw86wpq401l27q55ygawlngl775yavwccz1zbhjnj"))))
9681 (properties `((upstream-name . "EDASeq")))
9682 (build-system r-build-system)
9683 (propagated-inputs
9684 `(("r-annotationdbi" ,r-annotationdbi)
9685 ("r-aroma-light" ,r-aroma-light)
9686 ("r-biobase" ,r-biobase)
9687 ("r-biocgenerics" ,r-biocgenerics)
9688 ("r-biocmanager" ,r-biocmanager)
9689 ("r-biomart" ,r-biomart)
9690 ("r-biostrings" ,r-biostrings)
9691 ("r-deseq" ,r-deseq)
9692 ("r-genomicfeatures" ,r-genomicfeatures)
9693 ("r-genomicranges" ,r-genomicranges)
9694 ("r-iranges" ,r-iranges)
9695 ("r-rsamtools" ,r-rsamtools)
9696 ("r-shortread" ,r-shortread)))
9697 (home-page "https://github.com/drisso/EDASeq")
9698 (synopsis "Exploratory data analysis and normalization for RNA-Seq")
9699 (description
9700 "This package provides support for numerical and graphical summaries of
9701 RNA-Seq genomic read data. Provided within-lane normalization procedures to
9702 adjust for GC-content effect (or other gene-level effects) on read counts:
9703 loess robust local regression, global-scaling, and full-quantile
9704 normalization. Between-lane normalization procedures to adjust for
9705 distributional differences between lanes (e.g., sequencing depth):
9706 global-scaling and full-quantile normalization.")
9707 (license license:artistic2.0)))
9708
9709 (define-public r-interactivedisplaybase
9710 (package
9711 (name "r-interactivedisplaybase")
9712 (version "1.24.0")
9713 (source
9714 (origin
9715 (method url-fetch)
9716 (uri (bioconductor-uri "interactiveDisplayBase" version))
9717 (sha256
9718 (base32
9719 "0zwf3ma6wf4zypl6bgjp0n72k2hmp0g16gzl4v3y4157rxcbpl0n"))))
9720 (properties
9721 `((upstream-name . "interactiveDisplayBase")))
9722 (build-system r-build-system)
9723 (propagated-inputs
9724 `(("r-biocgenerics" ,r-biocgenerics)
9725 ("r-shiny" ,r-shiny)))
9726 (home-page "https://bioconductor.org/packages/interactiveDisplayBase")
9727 (synopsis "Base package for web displays of Bioconductor objects")
9728 (description
9729 "This package contains the basic methods needed to generate interactive
9730 Shiny-based display methods for Bioconductor objects.")
9731 (license license:artistic2.0)))
9732
9733 (define-public r-annotationhub
9734 (package
9735 (name "r-annotationhub")
9736 (version "2.18.0")
9737 (source
9738 (origin
9739 (method url-fetch)
9740 (uri (bioconductor-uri "AnnotationHub" version))
9741 (sha256
9742 (base32
9743 "19vj3bk8jz68q84g3j8xs1s9bqz90lbwbciig1h45zvn2zc6087m"))))
9744 (properties `((upstream-name . "AnnotationHub")))
9745 (build-system r-build-system)
9746 (propagated-inputs
9747 `(("r-annotationdbi" ,r-annotationdbi)
9748 ("r-biocfilecache" ,r-biocfilecache)
9749 ("r-biocgenerics" ,r-biocgenerics)
9750 ("r-biocmanager" ,r-biocmanager)
9751 ("r-biocversion" ,r-biocversion)
9752 ("r-curl" ,r-curl)
9753 ("r-dplyr" ,r-dplyr)
9754 ("r-httr" ,r-httr)
9755 ("r-interactivedisplaybase" ,r-interactivedisplaybase)
9756 ("r-rappdirs" ,r-rappdirs)
9757 ("r-rsqlite" ,r-rsqlite)
9758 ("r-s4vectors" ,r-s4vectors)
9759 ("r-yaml" ,r-yaml)))
9760 (home-page "https://bioconductor.org/packages/AnnotationHub")
9761 (synopsis "Client to access AnnotationHub resources")
9762 (description
9763 "This package provides a client for the Bioconductor AnnotationHub web
9764 resource. The AnnotationHub web resource provides a central location where
9765 genomic files (e.g. VCF, bed, wig) and other resources from standard
9766 locations (e.g. UCSC, Ensembl) can be discovered. The resource includes
9767 metadata about each resource, e.g., a textual description, tags, and date of
9768 modification. The client creates and manages a local cache of files retrieved
9769 by the user, helping with quick and reproducible access.")
9770 (license license:artistic2.0)))
9771
9772 (define-public r-fastseg
9773 (package
9774 (name "r-fastseg")
9775 (version "1.32.0")
9776 (source
9777 (origin
9778 (method url-fetch)
9779 (uri (bioconductor-uri "fastseg" version))
9780 (sha256
9781 (base32
9782 "1cys6frmbizc8bf933mwvvnr31sfya9ahcc0wm66pbd1x3mygkmk"))))
9783 (build-system r-build-system)
9784 (propagated-inputs
9785 `(("r-biobase" ,r-biobase)
9786 ("r-biocgenerics" ,r-biocgenerics)
9787 ("r-genomicranges" ,r-genomicranges)
9788 ("r-iranges" ,r-iranges)
9789 ("r-s4vectors" ,r-s4vectors)))
9790 (home-page "https://www.bioinf.jku.at/software/fastseg/index.html")
9791 (synopsis "Fast segmentation algorithm for genetic sequencing data")
9792 (description
9793 "Fastseg implements a very fast and efficient segmentation algorithm.
9794 It can segment data from DNA microarrays and data from next generation
9795 sequencing for example to detect copy number segments. Further it can segment
9796 data from RNA microarrays like tiling arrays to identify transcripts. Most
9797 generally, it can segment data given as a matrix or as a vector. Various data
9798 formats can be used as input to fastseg like expression set objects for
9799 microarrays or GRanges for sequencing data.")
9800 (license license:lgpl2.0+)))
9801
9802 (define-public r-keggrest
9803 (package
9804 (name "r-keggrest")
9805 (version "1.26.1")
9806 (source
9807 (origin
9808 (method url-fetch)
9809 (uri (bioconductor-uri "KEGGREST" version))
9810 (sha256
9811 (base32
9812 "1cgjvv9n88y3ah21356mh8z2l08vjn42hjy8hcljsibknzc4v247"))))
9813 (properties `((upstream-name . "KEGGREST")))
9814 (build-system r-build-system)
9815 (propagated-inputs
9816 `(("r-biostrings" ,r-biostrings)
9817 ("r-httr" ,r-httr)
9818 ("r-png" ,r-png)))
9819 (home-page "https://bioconductor.org/packages/KEGGREST")
9820 (synopsis "Client-side REST access to KEGG")
9821 (description
9822 "This package provides a package that provides a client interface to the
9823 @dfn{Kyoto Encyclopedia of Genes and Genomes} (KEGG) REST server.")
9824 (license license:artistic2.0)))
9825
9826 (define-public r-gage
9827 (package
9828 (name "r-gage")
9829 (version "2.36.0")
9830 (source
9831 (origin
9832 (method url-fetch)
9833 (uri (bioconductor-uri "gage" version))
9834 (sha256
9835 (base32
9836 "1qxfmg0id19iy3ia8h5nrvk3d1azqb28kl7m08i23654wb6b45c6"))))
9837 (build-system r-build-system)
9838 (propagated-inputs
9839 `(("r-annotationdbi" ,r-annotationdbi)
9840 ("r-graph" ,r-graph)
9841 ("r-keggrest" ,r-keggrest)))
9842 (home-page (string-append "https://bmcbioinformatics.biomedcentral.com/"
9843 "articles/10.1186/1471-2105-10-161"))
9844 (synopsis "Generally applicable gene-set enrichment for pathway analysis")
9845 (description
9846 "GAGE is a published method for gene set (enrichment or GSEA) or pathway
9847 analysis. GAGE is generally applicable independent of microarray or RNA-Seq
9848 data attributes including sample sizes, experimental designs, assay platforms,
9849 and other types of heterogeneity. The gage package provides functions for
9850 basic GAGE analysis, result processing and presentation. In addition, it
9851 provides demo microarray data and commonly used gene set data based on KEGG
9852 pathways and GO terms. These functions and data are also useful for gene set
9853 analysis using other methods.")
9854 (license license:gpl2+)))
9855
9856 (define-public r-genomicfiles
9857 (package
9858 (name "r-genomicfiles")
9859 (version "1.22.0")
9860 (source
9861 (origin
9862 (method url-fetch)
9863 (uri (bioconductor-uri "GenomicFiles" version))
9864 (sha256
9865 (base32
9866 "1x6q827ms2l5lwzha1vsgfrshh35n9f19jq57xagrqlafxgpz86s"))))
9867 (properties `((upstream-name . "GenomicFiles")))
9868 (build-system r-build-system)
9869 (propagated-inputs
9870 `(("r-biocgenerics" ,r-biocgenerics)
9871 ("r-biocparallel" ,r-biocparallel)
9872 ("r-genomeinfodb" ,r-genomeinfodb)
9873 ("r-genomicalignments" ,r-genomicalignments)
9874 ("r-genomicranges" ,r-genomicranges)
9875 ("r-iranges" ,r-iranges)
9876 ("r-rsamtools" ,r-rsamtools)
9877 ("r-rtracklayer" ,r-rtracklayer)
9878 ("r-s4vectors" ,r-s4vectors)
9879 ("r-summarizedexperiment" ,r-summarizedexperiment)
9880 ("r-variantannotation" ,r-variantannotation)))
9881 (home-page "https://bioconductor.org/packages/GenomicFiles")
9882 (synopsis "Distributed computing by file or by range")
9883 (description
9884 "This package provides infrastructure for parallel computations
9885 distributed by file or by range. User defined mapper and reducer functions
9886 provide added flexibility for data combination and manipulation.")
9887 (license license:artistic2.0)))
9888
9889 (define-public r-complexheatmap
9890 (package
9891 (name "r-complexheatmap")
9892 (version "2.2.0")
9893 (source
9894 (origin
9895 (method url-fetch)
9896 (uri (bioconductor-uri "ComplexHeatmap" version))
9897 (sha256
9898 (base32
9899 "1pj6a6rmqckk033pkklk6hr4066rzavamy6w194rfdhind90rk0p"))))
9900 (properties
9901 `((upstream-name . "ComplexHeatmap")))
9902 (build-system r-build-system)
9903 (propagated-inputs
9904 `(("r-circlize" ,r-circlize)
9905 ("r-clue" ,r-clue)
9906 ("r-colorspace" ,r-colorspace)
9907 ("r-getoptlong" ,r-getoptlong)
9908 ("r-globaloptions" ,r-globaloptions)
9909 ("r-png" ,r-png)
9910 ("r-rcolorbrewer" ,r-rcolorbrewer)))
9911 (home-page
9912 "https://github.com/jokergoo/ComplexHeatmap")
9913 (synopsis "Making Complex Heatmaps")
9914 (description
9915 "Complex heatmaps are efficient to visualize associations between
9916 different sources of data sets and reveal potential structures. This package
9917 provides a highly flexible way to arrange multiple heatmaps and supports
9918 self-defined annotation graphics.")
9919 (license license:gpl2+)))
9920
9921 (define-public r-dirichletmultinomial
9922 (package
9923 (name "r-dirichletmultinomial")
9924 (version "1.28.0")
9925 (source
9926 (origin
9927 (method url-fetch)
9928 (uri (bioconductor-uri "DirichletMultinomial" version))
9929 (sha256
9930 (base32
9931 "0knmncmkkf2ypyqfcl5s8nmyyf9nrzkqprzn9w3w8182c0v49r0s"))))
9932 (properties
9933 `((upstream-name . "DirichletMultinomial")))
9934 (build-system r-build-system)
9935 (inputs
9936 `(("gsl" ,gsl)))
9937 (propagated-inputs
9938 `(("r-biocgenerics" ,r-biocgenerics)
9939 ("r-iranges" ,r-iranges)
9940 ("r-s4vectors" ,r-s4vectors)))
9941 (home-page "https://bioconductor.org/packages/DirichletMultinomial")
9942 (synopsis "Dirichlet-Multinomial mixture models for microbiome data")
9943 (description
9944 "Dirichlet-multinomial mixture models can be used to describe variability
9945 in microbial metagenomic data. This package is an interface to code
9946 originally made available by Holmes, Harris, and Quince, 2012, PLoS ONE 7(2):
9947 1-15.")
9948 (license license:lgpl3)))
9949
9950 (define-public r-ensembldb
9951 (package
9952 (name "r-ensembldb")
9953 (version "2.10.2")
9954 (source
9955 (origin
9956 (method url-fetch)
9957 (uri (bioconductor-uri "ensembldb" version))
9958 (sha256
9959 (base32
9960 "02lnpyp85zchmz404hr5381zmihvq4x9zgxdrbn2afi352vg0vab"))))
9961 (build-system r-build-system)
9962 (propagated-inputs
9963 `(("r-annotationdbi" ,r-annotationdbi)
9964 ("r-annotationfilter" ,r-annotationfilter)
9965 ("r-biobase" ,r-biobase)
9966 ("r-biocgenerics" ,r-biocgenerics)
9967 ("r-biostrings" ,r-biostrings)
9968 ("r-curl" ,r-curl)
9969 ("r-dbi" ,r-dbi)
9970 ("r-genomeinfodb" ,r-genomeinfodb)
9971 ("r-genomicfeatures" ,r-genomicfeatures)
9972 ("r-genomicranges" ,r-genomicranges)
9973 ("r-iranges" ,r-iranges)
9974 ("r-protgenerics" ,r-protgenerics)
9975 ("r-rsamtools" ,r-rsamtools)
9976 ("r-rsqlite" ,r-rsqlite)
9977 ("r-rtracklayer" ,r-rtracklayer)
9978 ("r-s4vectors" ,r-s4vectors)))
9979 (home-page "https://github.com/jotsetung/ensembldb")
9980 (synopsis "Utilities to create and use Ensembl-based annotation databases")
9981 (description
9982 "The package provides functions to create and use transcript-centric
9983 annotation databases/packages. The annotation for the databases are directly
9984 fetched from Ensembl using their Perl API. The functionality and data is
9985 similar to that of the TxDb packages from the @code{GenomicFeatures} package,
9986 but, in addition to retrieve all gene/transcript models and annotations from
9987 the database, the @code{ensembldb} package also provides a filter framework
9988 allowing to retrieve annotations for specific entries like genes encoded on a
9989 chromosome region or transcript models of lincRNA genes.")
9990 ;; No version specified
9991 (license license:lgpl3+)))
9992
9993 (define-public r-organismdbi
9994 (package
9995 (name "r-organismdbi")
9996 (version "1.28.0")
9997 (source
9998 (origin
9999 (method url-fetch)
10000 (uri (bioconductor-uri "OrganismDbi" version))
10001 (sha256
10002 (base32
10003 "1bvfyh733mhka9zd00hrzpalgjs255c2blnxyf60ipzk5jg7yllb"))))
10004 (properties `((upstream-name . "OrganismDbi")))
10005 (build-system r-build-system)
10006 (propagated-inputs
10007 `(("r-annotationdbi" ,r-annotationdbi)
10008 ("r-biobase" ,r-biobase)
10009 ("r-biocgenerics" ,r-biocgenerics)
10010 ("r-biocmanager" ,r-biocmanager)
10011 ("r-dbi" ,r-dbi)
10012 ("r-genomicfeatures" ,r-genomicfeatures)
10013 ("r-genomicranges" ,r-genomicranges)
10014 ("r-graph" ,r-graph)
10015 ("r-iranges" ,r-iranges)
10016 ("r-rbgl" ,r-rbgl)
10017 ("r-s4vectors" ,r-s4vectors)))
10018 (home-page "https://bioconductor.org/packages/OrganismDbi")
10019 (synopsis "Software to enable the smooth interfacing of database packages")
10020 (description "The package enables a simple unified interface to several
10021 annotation packages each of which has its own schema by taking advantage of
10022 the fact that each of these packages implements a select methods.")
10023 (license license:artistic2.0)))
10024
10025 (define-public r-biovizbase
10026 (package
10027 (name "r-biovizbase")
10028 (version "1.34.1")
10029 (source
10030 (origin
10031 (method url-fetch)
10032 (uri (bioconductor-uri "biovizBase" version))
10033 (sha256
10034 (base32
10035 "04vvj907bgs67w8rb7n1haf80p6cd0qj5fdxw0dwryb455y35vir"))))
10036 (properties `((upstream-name . "biovizBase")))
10037 (build-system r-build-system)
10038 (propagated-inputs
10039 `(("r-annotationdbi" ,r-annotationdbi)
10040 ("r-annotationfilter" ,r-annotationfilter)
10041 ("r-biocgenerics" ,r-biocgenerics)
10042 ("r-biostrings" ,r-biostrings)
10043 ("r-dichromat" ,r-dichromat)
10044 ("r-ensembldb" ,r-ensembldb)
10045 ("r-genomeinfodb" ,r-genomeinfodb)
10046 ("r-genomicalignments" ,r-genomicalignments)
10047 ("r-genomicfeatures" ,r-genomicfeatures)
10048 ("r-genomicranges" ,r-genomicranges)
10049 ("r-hmisc" ,r-hmisc)
10050 ("r-iranges" ,r-iranges)
10051 ("r-rcolorbrewer" ,r-rcolorbrewer)
10052 ("r-rlang" ,r-rlang)
10053 ("r-rsamtools" ,r-rsamtools)
10054 ("r-s4vectors" ,r-s4vectors)
10055 ("r-scales" ,r-scales)
10056 ("r-summarizedexperiment" ,r-summarizedexperiment)
10057 ("r-variantannotation" ,r-variantannotation)))
10058 (home-page "https://bioconductor.org/packages/biovizBase")
10059 (synopsis "Basic graphic utilities for visualization of genomic data")
10060 (description
10061 "The biovizBase package is designed to provide a set of utilities, color
10062 schemes and conventions for genomic data. It serves as the base for various
10063 high-level packages for biological data visualization. This saves development
10064 effort and encourages consistency.")
10065 (license license:artistic2.0)))
10066
10067 (define-public r-ggbio
10068 (package
10069 (name "r-ggbio")
10070 (version "1.34.0")
10071 (source
10072 (origin
10073 (method url-fetch)
10074 (uri (bioconductor-uri "ggbio" version))
10075 (sha256
10076 (base32
10077 "13wzwh40anh8l53yp19bg4w5cpxykcaf228dc8cxvjndyib711qb"))))
10078 (build-system r-build-system)
10079 (arguments
10080 `(#:phases
10081 (modify-phases %standard-phases
10082 ;; See https://github.com/tengfei/ggbio/issues/117
10083 ;; This fix will be included in the next release.
10084 (add-after 'unpack 'fix-typo
10085 (lambda _
10086 (substitute* "R/GGbio-class.R"
10087 (("fechable") "fetchable"))
10088 #t)))))
10089 (propagated-inputs
10090 `(("r-annotationdbi" ,r-annotationdbi)
10091 ("r-annotationfilter" ,r-annotationfilter)
10092 ("r-biobase" ,r-biobase)
10093 ("r-biocgenerics" ,r-biocgenerics)
10094 ("r-biostrings" ,r-biostrings)
10095 ("r-biovizbase" ,r-biovizbase)
10096 ("r-bsgenome" ,r-bsgenome)
10097 ("r-ensembldb" ,r-ensembldb)
10098 ("r-genomeinfodb" ,r-genomeinfodb)
10099 ("r-genomicalignments" ,r-genomicalignments)
10100 ("r-genomicfeatures" ,r-genomicfeatures)
10101 ("r-genomicranges" ,r-genomicranges)
10102 ("r-ggally" ,r-ggally)
10103 ("r-ggplot2" ,r-ggplot2)
10104 ("r-gridextra" ,r-gridextra)
10105 ("r-gtable" ,r-gtable)
10106 ("r-hmisc" ,r-hmisc)
10107 ("r-iranges" ,r-iranges)
10108 ("r-organismdbi" ,r-organismdbi)
10109 ("r-reshape2" ,r-reshape2)
10110 ("r-rlang" ,r-rlang)
10111 ("r-rsamtools" ,r-rsamtools)
10112 ("r-rtracklayer" ,r-rtracklayer)
10113 ("r-s4vectors" ,r-s4vectors)
10114 ("r-scales" ,r-scales)
10115 ("r-summarizedexperiment" ,r-summarizedexperiment)
10116 ("r-variantannotation" ,r-variantannotation)))
10117 (home-page "http://www.tengfei.name/ggbio/")
10118 (synopsis "Visualization tools for genomic data")
10119 (description
10120 "The ggbio package extends and specializes the grammar of graphics for
10121 biological data. The graphics are designed to answer common scientific
10122 questions, in particular those often asked of high throughput genomics data.
10123 All core Bioconductor data structures are supported, where appropriate. The
10124 package supports detailed views of particular genomic regions, as well as
10125 genome-wide overviews. Supported overviews include ideograms and grand linear
10126 views. High-level plots include sequence fragment length, edge-linked
10127 interval to data view, mismatch pileup, and several splicing summaries.")
10128 (license license:artistic2.0)))
10129
10130 (define-public r-gprofiler
10131 (package
10132 (name "r-gprofiler")
10133 (version "0.7.0")
10134 (source
10135 (origin
10136 (method url-fetch)
10137 (uri (cran-uri "gProfileR" version))
10138 (sha256
10139 (base32
10140 "1h1v0kgpsn04ald2izznh7fr2riwisj5hcgz4k7h3qc931rf0r4k"))))
10141 (properties `((upstream-name . "gProfileR")))
10142 (build-system r-build-system)
10143 (propagated-inputs
10144 `(("r-plyr" ,r-plyr)
10145 ("r-rcurl" ,r-rcurl)))
10146 (home-page "https://cran.r-project.org/web/packages/gProfileR/")
10147 (synopsis "Interface to the g:Profiler toolkit")
10148 (description
10149 "This package provides tools for functional enrichment analysis,
10150 gene identifier conversion and mapping homologous genes across related
10151 organisms via the @code{g:Profiler} toolkit.")
10152 (license license:gpl2+)))
10153
10154 (define-public r-gqtlbase
10155 (package
10156 (name "r-gqtlbase")
10157 (version "1.18.0")
10158 (source
10159 (origin
10160 (method url-fetch)
10161 (uri (bioconductor-uri "gQTLBase" version))
10162 (sha256
10163 (base32
10164 "1qr8dqjbmj1mdjbzbnxwzfrm8f02wqfsgic8ws5kv7pmsby63y4x"))))
10165 (properties `((upstream-name . "gQTLBase")))
10166 (build-system r-build-system)
10167 (propagated-inputs
10168 `(("r-batchjobs" ,r-batchjobs)
10169 ("r-bbmisc" ,r-bbmisc)
10170 ("r-biocgenerics" ,r-biocgenerics)
10171 ("r-bit" ,r-bit)
10172 ("r-doparallel" ,r-doparallel)
10173 ("r-ff" ,r-ff)
10174 ("r-ffbase" ,r-ffbase)
10175 ("r-foreach" ,r-foreach)
10176 ("r-genomicfiles" ,r-genomicfiles)
10177 ("r-genomicranges" ,r-genomicranges)
10178 ("r-rtracklayer" ,r-rtracklayer)
10179 ("r-s4vectors" ,r-s4vectors)
10180 ("r-summarizedexperiment" ,r-summarizedexperiment)))
10181 (home-page "https://bioconductor.org/packages/gQTLBase")
10182 (synopsis "Infrastructure for eQTL, mQTL and similar studies")
10183 (description
10184 "The purpose of this package is to simplify the storage and interrogation
10185 of @dfn{quantitative trait loci} (QTL) archives, such as eQTL, mQTL, dsQTL,
10186 and more.")
10187 (license license:artistic2.0)))
10188
10189 (define-public r-snpstats
10190 (package
10191 (name "r-snpstats")
10192 (version "1.36.0")
10193 (source
10194 (origin
10195 (method url-fetch)
10196 (uri (bioconductor-uri "snpStats" version))
10197 (sha256
10198 (base32
10199 "1xq1rjljg70h5mshdza56dis0iv1a20sivs6dav3w5jbdd1l5qkh"))))
10200 (properties `((upstream-name . "snpStats")))
10201 (build-system r-build-system)
10202 (inputs `(("zlib" ,zlib)))
10203 (propagated-inputs
10204 `(("r-biocgenerics" ,r-biocgenerics)
10205 ("r-matrix" ,r-matrix)
10206 ("r-survival" ,r-survival)
10207 ("r-zlibbioc" ,r-zlibbioc)))
10208 (home-page "https://bioconductor.org/packages/snpStats")
10209 (synopsis "Methods for SNP association studies")
10210 (description
10211 "This package provides classes and statistical methods for large
10212 @dfn{single-nucleotide polymorphism} (SNP) association studies. This extends
10213 the earlier snpMatrix package, allowing for uncertainty in genotypes.")
10214 (license license:gpl3)))
10215
10216 (define-public r-homo-sapiens
10217 (package
10218 (name "r-homo-sapiens")
10219 (version "1.3.1")
10220 (source (origin
10221 (method url-fetch)
10222 ;; We cannot use bioconductor-uri here because this tarball is
10223 ;; located under "data/annotation/" instead of "bioc/".
10224 (uri (string-append "https://www.bioconductor.org/packages/"
10225 "release/data/annotation/src/contrib/"
10226 "Homo.sapiens_"
10227 version ".tar.gz"))
10228 (sha256
10229 (base32
10230 "151vj7h5p1c8yd5swrchk46z469p135wk50hvkl0nhgndvy0jj01"))))
10231 (properties
10232 `((upstream-name . "Homo.sapiens")))
10233 (build-system r-build-system)
10234 (propagated-inputs
10235 `(("r-genomicfeatures" ,r-genomicfeatures)
10236 ("r-go-db" ,r-go-db)
10237 ("r-org-hs-eg-db" ,r-org-hs-eg-db)
10238 ("r-txdb-hsapiens-ucsc-hg19-knowngene" ,r-txdb-hsapiens-ucsc-hg19-knowngene)
10239 ("r-organismdbi" ,r-organismdbi)
10240 ("r-annotationdbi" ,r-annotationdbi)))
10241 (home-page "https://bioconductor.org/packages/Homo.sapiens/")
10242 (synopsis "Annotation package for the Homo.sapiens object")
10243 (description
10244 "This package contains the Homo.sapiens object to access data from
10245 several related annotation packages.")
10246 (license license:artistic2.0)))
10247
10248 (define-public r-erma
10249 (package
10250 (name "r-erma")
10251 (version "1.2.0")
10252 (source
10253 (origin
10254 (method url-fetch)
10255 (uri (bioconductor-uri "erma" version))
10256 (sha256
10257 (base32
10258 "085qsr73p8nyp435f15l4l1jkfd64bfd9gl4z496nfxdnqn95srz"))))
10259 (build-system r-build-system)
10260 (propagated-inputs
10261 `(("r-annotationdbi" ,r-annotationdbi)
10262 ("r-biobase" ,r-biobase)
10263 ("r-biocgenerics" ,r-biocgenerics)
10264 ("r-biocparallel" ,r-biocparallel)
10265 ("r-genomeinfodb" ,r-genomeinfodb)
10266 ("r-genomicfiles" ,r-genomicfiles)
10267 ("r-genomicranges" ,r-genomicranges)
10268 ("r-ggplot2" ,r-ggplot2)
10269 ("r-homo-sapiens" ,r-homo-sapiens)
10270 ("r-iranges" ,r-iranges)
10271 ("r-rtracklayer" ,r-rtracklayer)
10272 ("r-s4vectors" ,r-s4vectors)
10273 ("r-shiny" ,r-shiny)
10274 ("r-summarizedexperiment" ,r-summarizedexperiment)))
10275 (home-page "https://bioconductor.org/packages/erma")
10276 (synopsis "Epigenomic road map adventures")
10277 (description
10278 "The epigenomics road map describes locations of epigenetic marks in DNA
10279 from a variety of cell types. Of interest are locations of histone
10280 modifications, sites of DNA methylation, and regions of accessible chromatin.
10281 This package presents a selection of elements of the road map including
10282 metadata and outputs of the ChromImpute procedure applied to ENCODE cell lines
10283 by Ernst and Kellis.")
10284 (license license:artistic2.0)))
10285
10286 (define-public r-ldblock
10287 (package
10288 (name "r-ldblock")
10289 (version "1.16.0")
10290 (source
10291 (origin
10292 (method url-fetch)
10293 (uri (bioconductor-uri "ldblock" version))
10294 (sha256
10295 (base32
10296 "0xpigfidmylfawy6vzshqnsw1lzjs4qms8q7zffij6bkvkv7920x"))))
10297 (build-system r-build-system)
10298 (propagated-inputs
10299 `(("r-biocgenerics" ,r-biocgenerics)
10300 ("r-ensdb-hsapiens-v75" ,r-ensdb-hsapiens-v75)
10301 ("r-ensembldb" ,r-ensembldb)
10302 ("r-genomeinfodb" ,r-genomeinfodb)
10303 ("r-genomicfiles" ,r-genomicfiles)
10304 ("r-httr" ,r-httr)
10305 ("r-matrix" ,r-matrix)
10306 ("r-rsamtools" ,r-rsamtools)
10307 ("r-snpstats" ,r-snpstats)
10308 ("r-variantannotation" ,r-variantannotation)))
10309 (home-page "https://bioconductor.org/packages/ldblock")
10310 (synopsis "Data structures for linkage disequilibrium measures in populations")
10311 (description
10312 "This package defines data structures for @dfn{linkage
10313 disequilibrium} (LD) measures in populations. Its purpose is to simplify
10314 handling of existing population-level data for the purpose of flexibly
10315 defining LD blocks.")
10316 (license license:artistic2.0)))
10317
10318 (define-public r-gqtlstats
10319 (package
10320 (name "r-gqtlstats")
10321 (version "1.18.0")
10322 (source
10323 (origin
10324 (method url-fetch)
10325 (uri (bioconductor-uri "gQTLstats" version))
10326 (sha256
10327 (base32
10328 "1dly4p9r4231hf31xg1nzqiyvjbcfjljfmhb88ic1jxwnvniyv2f"))))
10329 (properties `((upstream-name . "gQTLstats")))
10330 (build-system r-build-system)
10331 (propagated-inputs
10332 `(("r-annotationdbi" ,r-annotationdbi)
10333 ("r-batchjobs" ,r-batchjobs)
10334 ("r-bbmisc" ,r-bbmisc)
10335 ("r-beeswarm" ,r-beeswarm)
10336 ("r-biobase" ,r-biobase)
10337 ("r-biocgenerics" ,r-biocgenerics)
10338 ("r-doparallel" ,r-doparallel)
10339 ("r-dplyr" ,r-dplyr)
10340 ("r-erma" ,r-erma)
10341 ("r-ffbase" ,r-ffbase)
10342 ("r-foreach" ,r-foreach)
10343 ("r-genomeinfodb" ,r-genomeinfodb)
10344 ("r-genomicfeatures" ,r-genomicfeatures)
10345 ("r-genomicfiles" ,r-genomicfiles)
10346 ("r-genomicranges" ,r-genomicranges)
10347 ("r-ggbeeswarm" ,r-ggbeeswarm)
10348 ("r-ggplot2" ,r-ggplot2)
10349 ("r-gqtlbase" ,r-gqtlbase)
10350 ("r-hardyweinberg" ,r-hardyweinberg)
10351 ("r-homo-sapiens" ,r-homo-sapiens)
10352 ("r-iranges" ,r-iranges)
10353 ("r-limma" ,r-limma)
10354 ("r-mgcv" ,r-mgcv)
10355 ("r-plotly" ,r-plotly)
10356 ("r-reshape2" ,r-reshape2)
10357 ("r-s4vectors" ,r-s4vectors)
10358 ("r-shiny" ,r-shiny)
10359 ("r-snpstats" ,r-snpstats)
10360 ("r-summarizedexperiment" ,r-summarizedexperiment)
10361 ("r-variantannotation" ,r-variantannotation)))
10362 (home-page "https://bioconductor.org/packages/gQTLstats")
10363 (synopsis "Computationally efficient analysis for eQTL and allied studies")
10364 (description
10365 "This package provides tools for the computationally efficient analysis
10366 of @dfn{quantitative trait loci} (QTL) data, including eQTL, mQTL, dsQTL, etc.
10367 The software in this package aims to support refinements and functional
10368 interpretation of members of a collection of association statistics on a
10369 family of feature/genome hypotheses.")
10370 (license license:artistic2.0)))
10371
10372 (define-public r-gviz
10373 (package
10374 (name "r-gviz")
10375 (version "1.30.3")
10376 (source
10377 (origin
10378 (method url-fetch)
10379 (uri (bioconductor-uri "Gviz" version))
10380 (sha256
10381 (base32
10382 "0c9i26h5czm60n1bxzmdxxpywcj0sig6wcj913pb41mr83bbgra3"))))
10383 (properties `((upstream-name . "Gviz")))
10384 (build-system r-build-system)
10385 (propagated-inputs
10386 `(("r-annotationdbi" ,r-annotationdbi)
10387 ("r-biobase" ,r-biobase)
10388 ("r-biocgenerics" ,r-biocgenerics)
10389 ("r-biomart" ,r-biomart)
10390 ("r-biostrings" ,r-biostrings)
10391 ("r-biovizbase" ,r-biovizbase)
10392 ("r-bsgenome" ,r-bsgenome)
10393 ("r-digest" ,r-digest)
10394 ("r-genomeinfodb" ,r-genomeinfodb)
10395 ("r-genomicalignments" ,r-genomicalignments)
10396 ("r-genomicfeatures" ,r-genomicfeatures)
10397 ("r-genomicranges" ,r-genomicranges)
10398 ("r-iranges" ,r-iranges)
10399 ("r-lattice" ,r-lattice)
10400 ("r-latticeextra" ,r-latticeextra)
10401 ("r-matrixstats" ,r-matrixstats)
10402 ("r-rcolorbrewer" ,r-rcolorbrewer)
10403 ("r-rsamtools" ,r-rsamtools)
10404 ("r-rtracklayer" ,r-rtracklayer)
10405 ("r-s4vectors" ,r-s4vectors)
10406 ("r-xvector" ,r-xvector)))
10407 (home-page "https://bioconductor.org/packages/Gviz")
10408 (synopsis "Plotting data and annotation information along genomic coordinates")
10409 (description
10410 "Genomic data analyses requires integrated visualization of known genomic
10411 information and new experimental data. Gviz uses the biomaRt and the
10412 rtracklayer packages to perform live annotation queries to Ensembl and UCSC
10413 and translates this to e.g. gene/transcript structures in viewports of the
10414 grid graphics package. This results in genomic information plotted together
10415 with your data.")
10416 (license license:artistic2.0)))
10417
10418 (define-public r-gwascat
10419 (package
10420 (name "r-gwascat")
10421 (version "2.18.0")
10422 (source
10423 (origin
10424 (method url-fetch)
10425 (uri (bioconductor-uri "gwascat" version))
10426 (sha256
10427 (base32
10428 "038vhfsk2vs7inn5di093cmjbb81k7j0af385sg7l01jj70bdqq1"))))
10429 (build-system r-build-system)
10430 (propagated-inputs
10431 `(("r-annotationdbi" ,r-annotationdbi)
10432 ("r-biocgenerics" ,r-biocgenerics)
10433 ("r-biostrings" ,r-biostrings)
10434 ("r-genomeinfodb" ,r-genomeinfodb)
10435 ("r-genomicfeatures" ,r-genomicfeatures)
10436 ("r-genomicranges" ,r-genomicranges)
10437 ("r-homo-sapiens" ,r-homo-sapiens)
10438 ("r-iranges" ,r-iranges)
10439 ("r-rsamtools" ,r-rsamtools)
10440 ("r-rtracklayer" ,r-rtracklayer)
10441 ("r-s4vectors" ,r-s4vectors)))
10442 (home-page "https://bioconductor.org/packages/gwascat")
10443 (synopsis "Tools for data in the EMBL-EBI GWAS catalog")
10444 (description
10445 "This package provides tools for representing and modeling data in the
10446 EMBL-EBI GWAS catalog.")
10447 (license license:artistic2.0)))
10448
10449 (define-public r-sushi
10450 (package
10451 (name "r-sushi")
10452 (version "1.24.0")
10453 (source (origin
10454 (method url-fetch)
10455 (uri (bioconductor-uri "Sushi" version))
10456 (sha256
10457 (base32
10458 "15xng21hd09fb234ravrry3b872zg82w8x9lijxab9n96xihcpz5"))))
10459 (properties `((upstream-name . "Sushi")))
10460 (build-system r-build-system)
10461 (propagated-inputs
10462 `(("r-biomart" ,r-biomart)
10463 ("r-zoo" ,r-zoo)))
10464 (home-page "https://bioconductor.org/packages/Sushi")
10465 (synopsis "Tools for visualizing genomics data")
10466 (description
10467 "This package provides flexible, quantitative, and integrative genomic
10468 visualizations for publication-quality multi-panel figures.")
10469 (license license:gpl2+)))
10470
10471 (define-public r-fithic
10472 (package
10473 (name "r-fithic")
10474 (version "1.12.0")
10475 (source (origin
10476 (method url-fetch)
10477 (uri (bioconductor-uri "FitHiC" version))
10478 (sha256
10479 (base32
10480 "1irwkwi4afdj395134k31mvx7c2vpdd0rv8zrblnldascdsb04kc"))))
10481 (properties `((upstream-name . "FitHiC")))
10482 (build-system r-build-system)
10483 (propagated-inputs
10484 `(("r-data-table" ,r-data-table)
10485 ("r-fdrtool" ,r-fdrtool)
10486 ("r-rcpp" ,r-rcpp)))
10487 (home-page "https://bioconductor.org/packages/FitHiC")
10488 (synopsis "Confidence estimation for intra-chromosomal contact maps")
10489 (description
10490 "Fit-Hi-C is a tool for assigning statistical confidence estimates to
10491 intra-chromosomal contact maps produced by genome-wide genome architecture
10492 assays such as Hi-C.")
10493 (license license:gpl2+)))
10494
10495 (define-public r-hitc
10496 (package
10497 (name "r-hitc")
10498 (version "1.30.0")
10499 (source (origin
10500 (method url-fetch)
10501 (uri (bioconductor-uri "HiTC" version))
10502 (sha256
10503 (base32
10504 "0byahi0fz0dzjyklz8v9whax9ygg7gwb4pl1j3zbl6z8a9qx8pps"))))
10505 (properties `((upstream-name . "HiTC")))
10506 (build-system r-build-system)
10507 (propagated-inputs
10508 `(("r-biostrings" ,r-biostrings)
10509 ("r-genomeinfodb" ,r-genomeinfodb)
10510 ("r-genomicranges" ,r-genomicranges)
10511 ("r-iranges" ,r-iranges)
10512 ("r-matrix" ,r-matrix)
10513 ("r-rcolorbrewer" ,r-rcolorbrewer)
10514 ("r-rtracklayer" ,r-rtracklayer)))
10515 (home-page "https://bioconductor.org/packages/HiTC")
10516 (synopsis "High throughput chromosome conformation capture analysis")
10517 (description
10518 "The HiTC package was developed to explore high-throughput \"C\" data
10519 such as 5C or Hi-C. Dedicated R classes as well as standard methods for
10520 quality controls, normalization, visualization, and further analysis are also
10521 provided.")
10522 (license license:artistic2.0)))
10523
10524 (define-public r-hdf5array
10525 (package
10526 (name "r-hdf5array")
10527 (version "1.14.3")
10528 (source
10529 (origin
10530 (method url-fetch)
10531 (uri (bioconductor-uri "HDF5Array" version))
10532 (sha256
10533 (base32
10534 "1z153a7nxmlml72pl1saasj2il9g5ahpynkpv3mkhhsvl5kbwbh6"))))
10535 (properties `((upstream-name . "HDF5Array")))
10536 (build-system r-build-system)
10537 (inputs
10538 `(("zlib" ,zlib)))
10539 (propagated-inputs
10540 `(("r-biocgenerics" ,r-biocgenerics)
10541 ("r-delayedarray" ,r-delayedarray)
10542 ("r-iranges" ,r-iranges)
10543 ("r-matrix" ,r-matrix)
10544 ("r-rhdf5" ,r-rhdf5)
10545 ("r-rhdf5lib" ,r-rhdf5lib)
10546 ("r-s4vectors" ,r-s4vectors)))
10547 (home-page "https://bioconductor.org/packages/HDF5Array")
10548 (synopsis "HDF5 back end for DelayedArray objects")
10549 (description "This package provides an array-like container for convenient
10550 access and manipulation of HDF5 datasets. It supports delayed operations and
10551 block processing.")
10552 (license license:artistic2.0)))
10553
10554 (define-public r-rhdf5lib
10555 (package
10556 (name "r-rhdf5lib")
10557 (version "1.8.0")
10558 (source
10559 (origin
10560 (method url-fetch)
10561 (uri (bioconductor-uri "Rhdf5lib" version))
10562 (sha256
10563 (base32
10564 "17lhwnm9rqsvbqkvwp0m07vjrk63a4389p2y39zffv8fgznxqzd7"))
10565 (modules '((guix build utils)))
10566 (snippet
10567 '(begin
10568 ;; Delete bundled binaries
10569 (delete-file-recursively "src/winlib/")
10570 #t))))
10571 (properties `((upstream-name . "Rhdf5lib")))
10572 (build-system r-build-system)
10573 (arguments
10574 `(#:phases
10575 (modify-phases %standard-phases
10576 (add-after 'unpack 'do-not-use-bundled-hdf5
10577 (lambda* (#:key inputs #:allow-other-keys)
10578 (for-each delete-file '("configure" "configure.ac"))
10579 ;; Do not make other packages link with the proprietary libsz.
10580 (substitute* "R/zzz.R"
10581 (("'\"%s/libhdf5.a\" \"%s/libsz.a\" -lz'")
10582 "'\"%s/libhdf5.a\" \"%s/libhdf5.a\" -lz'")
10583 (("'\"%s/libhdf5_cpp.a\" \"%s/libhdf5.a\" \"%s/libsz.a\" -lz'")
10584 "'\"%s/libhdf5_cpp.a\" \"%s/libhdf5.a\" \"%s/libhdf5.a\" -lz'")
10585 (("'%s/libhdf5_hl.a %s/libhdf5.a %s/libsz.a -lz'")
10586 "'%s/libhdf5_hl.a %s/libhdf5.a %s/libhdf5.a -lz'")
10587 (("'%s/libhdf5_hl_cpp.a %s/libhdf5_hl.a %s/libhdf5_cpp.a %s/libhdf5.a %s/libsz.a -lz'")
10588 "'%s/libhdf5_hl_cpp.a %s/libhdf5_hl.a %s/libhdf5_cpp.a %s/libhdf5.a %s/libhdf5.a -lz'"))
10589 (with-directory-excursion "src"
10590 (invoke "tar" "xvf" (assoc-ref inputs "hdf5-source"))
10591 (rename-file (string-append "hdf5-" ,(package-version hdf5-1.10))
10592 "hdf5")
10593 ;; Remove timestamp and host system information to make
10594 ;; the build reproducible.
10595 (substitute* "hdf5/src/libhdf5.settings.in"
10596 (("Configured on: @CONFIG_DATE@")
10597 "Configured on: Guix")
10598 (("Uname information:.*")
10599 "Uname information: Linux\n")
10600 ;; Remove unnecessary store reference.
10601 (("C Compiler:.*")
10602 "C Compiler: GCC\n"))
10603 (rename-file "Makevars.in" "Makevars")
10604 (substitute* "Makevars"
10605 (("HDF5_CXX_LIB=.*")
10606 (string-append "HDF5_CXX_LIB="
10607 (assoc-ref inputs "hdf5") "/lib/libhdf5_cpp.a\n"))
10608 (("HDF5_LIB=.*")
10609 (string-append "HDF5_LIB="
10610 (assoc-ref inputs "hdf5") "/lib/libhdf5.a\n"))
10611 (("HDF5_CXX_INCLUDE=.*") "HDF5_CXX_INCLUDE=./hdf5/c++/src\n")
10612 (("HDF5_INCLUDE=.*") "HDF5_INCLUDE=./hdf5/src\n")
10613 (("HDF5_HL_INCLUDE=.*") "HDF5_HL_INCLUDE=./hdf5/hl/src\n")
10614 (("HDF5_HL_CXX_INCLUDE=.*") "HDF5_HL_CXX_INCLUDE=./hdf5/hl/c++/src\n")
10615 (("HDF5_HL_LIB=.*")
10616 (string-append "HDF5_HL_LIB="
10617 (assoc-ref inputs "hdf5") "/lib/libhdf5_hl.a\n"))
10618 (("HDF5_HL_CXX_LIB=.*")
10619 (string-append "HDF5_HL_CXX_LIB="
10620 (assoc-ref inputs "hdf5") "/lib/libhdf5_hl_cpp.a\n"))
10621 ;; szip is non-free software
10622 (("cp \"\\$\\{SZIP_LIB\\}.*") "")
10623 (("PKG_LIBS =.*") "PKG_LIBS = -lz -lhdf5\n")))
10624 #t)))))
10625 (inputs
10626 `(("zlib" ,zlib)))
10627 (propagated-inputs
10628 `(("hdf5" ,hdf5-1.10)))
10629 (native-inputs
10630 `(("hdf5-source" ,(package-source hdf5-1.10))))
10631 (home-page "https://bioconductor.org/packages/Rhdf5lib")
10632 (synopsis "HDF5 library as an R package")
10633 (description "This package provides C and C++ HDF5 libraries for use in R
10634 packages.")
10635 (license license:artistic2.0)))
10636
10637 (define-public r-beachmat
10638 (package
10639 (name "r-beachmat")
10640 (version "2.2.1")
10641 (source
10642 (origin
10643 (method url-fetch)
10644 (uri (bioconductor-uri "beachmat" version))
10645 (sha256
10646 (base32
10647 "1bpnlw2kdy9yc2vq948k980r0j25ipb80llhvn0j3kxjiwyfgs3i"))))
10648 (build-system r-build-system)
10649 (propagated-inputs
10650 `(("r-biocgenerics" ,r-biocgenerics)
10651 ("r-delayedarray" ,r-delayedarray)
10652 ("r-matrix" ,r-matrix)))
10653 (home-page "https://bioconductor.org/packages/beachmat")
10654 (synopsis "Compiling Bioconductor to handle each matrix type")
10655 (description "This package provides a consistent C++ class interface for a
10656 variety of commonly used matrix types, including sparse and HDF5-backed
10657 matrices.")
10658 (license license:gpl3)))
10659
10660 (define-public r-singlecellexperiment
10661 (package
10662 (name "r-singlecellexperiment")
10663 (version "1.8.0")
10664 (source
10665 (origin
10666 (method url-fetch)
10667 (uri (bioconductor-uri "SingleCellExperiment" version))
10668 (sha256
10669 (base32
10670 "11pqb3cigi9xbhxq2k3n7z23v1ibd03ws1lcrh5c5ffgb33nlyw5"))))
10671 (properties
10672 `((upstream-name . "SingleCellExperiment")))
10673 (build-system r-build-system)
10674 (propagated-inputs
10675 `(("r-biocgenerics" ,r-biocgenerics)
10676 ("r-s4vectors" ,r-s4vectors)
10677 ("r-summarizedexperiment" ,r-summarizedexperiment)))
10678 (home-page "https://bioconductor.org/packages/SingleCellExperiment")
10679 (synopsis "S4 classes for single cell data")
10680 (description "This package defines an S4 class for storing data from
10681 single-cell experiments. This includes specialized methods to store and
10682 retrieve spike-in information, dimensionality reduction coordinates and size
10683 factors for each cell, along with the usual metadata for genes and
10684 libraries.")
10685 (license license:gpl3)))
10686
10687 (define-public r-scater
10688 (package
10689 (name "r-scater")
10690 (version "1.14.6")
10691 (source (origin
10692 (method url-fetch)
10693 (uri (bioconductor-uri "scater" version))
10694 (sha256
10695 (base32
10696 "0sxd1s8wdlj9926bagq4crjrk1nnmh3j3bhgrw160zfgc3y8pzck"))))
10697 (build-system r-build-system)
10698 (propagated-inputs
10699 `(("r-beachmat" ,r-beachmat)
10700 ("r-biocgenerics" ,r-biocgenerics)
10701 ("r-biocneighbors" ,r-biocneighbors)
10702 ("r-biocparallel" ,r-biocparallel)
10703 ("r-biocsingular" ,r-biocsingular)
10704 ("r-delayedarray" ,r-delayedarray)
10705 ("r-delayedmatrixstats" ,r-delayedmatrixstats)
10706 ("r-ggbeeswarm" ,r-ggbeeswarm)
10707 ("r-ggplot2" ,r-ggplot2)
10708 ("r-matrix" ,r-matrix)
10709 ("r-rcpp" ,r-rcpp)
10710 ("r-s4vectors" ,r-s4vectors)
10711 ("r-singlecellexperiment" ,r-singlecellexperiment)
10712 ("r-summarizedexperiment" ,r-summarizedexperiment)
10713 ("r-viridis" ,r-viridis)))
10714 (home-page "https://github.com/davismcc/scater")
10715 (synopsis "Single-cell analysis toolkit for gene expression data in R")
10716 (description "This package provides a collection of tools for doing
10717 various analyses of single-cell RNA-seq gene expression data, with a focus on
10718 quality control.")
10719 (license license:gpl2+)))
10720
10721 (define-public r-scran
10722 (package
10723 (name "r-scran")
10724 (version "1.14.6")
10725 (source
10726 (origin
10727 (method url-fetch)
10728 (uri (bioconductor-uri "scran" version))
10729 (sha256
10730 (base32
10731 "1y8wlgk5zbv7c7gcp0ahfpbh9lifab7y3zwf0093fzaw7vr1y6cr"))))
10732 (build-system r-build-system)
10733 (propagated-inputs
10734 `(("r-beachmat" ,r-beachmat)
10735 ("r-bh" ,r-bh)
10736 ("r-biocgenerics" ,r-biocgenerics)
10737 ("r-biocneighbors" ,r-biocneighbors)
10738 ("r-biocparallel" ,r-biocparallel)
10739 ("r-biocsingular" ,r-biocsingular)
10740 ("r-delayedarray" ,r-delayedarray)
10741 ("r-delayedmatrixstats" ,r-delayedmatrixstats)
10742 ("r-dqrng" ,r-dqrng)
10743 ("r-edger" ,r-edger)
10744 ("r-igraph" ,r-igraph)
10745 ("r-limma" ,r-limma)
10746 ("r-matrix" ,r-matrix)
10747 ("r-rcpp" ,r-rcpp)
10748 ("r-s4vectors" ,r-s4vectors)
10749 ("r-scater" ,r-scater)
10750 ("r-singlecellexperiment" ,r-singlecellexperiment)
10751 ("r-statmod" ,r-statmod)
10752 ("r-summarizedexperiment" ,r-summarizedexperiment)))
10753 (home-page "https://bioconductor.org/packages/scran")
10754 (synopsis "Methods for single-cell RNA-Seq data analysis")
10755 (description "This package implements a variety of low-level analyses of
10756 single-cell RNA-seq data. Methods are provided for normalization of
10757 cell-specific biases, assignment of cell cycle phase, and detection of highly
10758 variable and significantly correlated genes.")
10759 (license license:gpl3)))
10760
10761 (define-public r-delayedmatrixstats
10762 (package
10763 (name "r-delayedmatrixstats")
10764 (version "1.8.0")
10765 (source
10766 (origin
10767 (method url-fetch)
10768 (uri (bioconductor-uri "DelayedMatrixStats" version))
10769 (sha256
10770 (base32
10771 "0mv2rl6a6l404piabcazxz1s6ars016pxhjf5v40hhr6y1r0wbqy"))))
10772 (properties
10773 `((upstream-name . "DelayedMatrixStats")))
10774 (build-system r-build-system)
10775 (propagated-inputs
10776 `(("r-biocparallel" ,r-biocparallel)
10777 ("r-delayedarray" ,r-delayedarray)
10778 ("r-hdf5array" ,r-hdf5array)
10779 ("r-iranges" ,r-iranges)
10780 ("r-matrix" ,r-matrix)
10781 ("r-matrixstats" ,r-matrixstats)
10782 ("r-s4vectors" ,r-s4vectors)))
10783 (home-page "https://github.com/PeteHaitch/DelayedMatrixStats")
10784 (synopsis "Functions that apply to rows and columns of DelayedMatrix objects")
10785 (description
10786 "This package provides a port of the @code{matrixStats} API for use with
10787 @code{DelayedMatrix} objects from the @code{DelayedArray} package. It
10788 contains high-performing functions operating on rows and columns of
10789 @code{DelayedMatrix} objects, e.g. @code{colMedians}, @code{rowMedians},
10790 @code{colRanks}, @code{rowRanks}, @code{colSds}, and @code{rowSds}. Functions
10791 are optimized per data type and for subsetted calculations such that both
10792 memory usage and processing time is minimized.")
10793 (license license:expat)))
10794
10795 (define-public r-phangorn
10796 (package
10797 (name "r-phangorn")
10798 (version "2.5.5")
10799 (source
10800 (origin
10801 (method url-fetch)
10802 (uri (cran-uri "phangorn" version))
10803 (sha256
10804 (base32
10805 "0ihkaykqjmf80d8wrk3saphxvnv58zma6pd13633bd3cwanc33f5"))))
10806 (build-system r-build-system)
10807 (propagated-inputs
10808 `(("r-ape" ,r-ape)
10809 ("r-fastmatch" ,r-fastmatch)
10810 ("r-igraph" ,r-igraph)
10811 ("r-magrittr" ,r-magrittr)
10812 ("r-matrix" ,r-matrix)
10813 ("r-quadprog" ,r-quadprog)
10814 ("r-rcpp" ,r-rcpp)))
10815 (home-page "https://github.com/KlausVigo/phangorn")
10816 (synopsis "Phylogenetic analysis in R")
10817 (description
10818 "Phangorn is a package for phylogenetic analysis in R. It supports
10819 estimation of phylogenetic trees and networks using Maximum Likelihood,
10820 Maximum Parsimony, distance methods and Hadamard conjugation.")
10821 (license license:gpl2+)))
10822
10823 (define-public r-dropbead
10824 (let ((commit "d746c6f3b32110428ea56d6a0001ce52a251c247")
10825 (revision "2"))
10826 (package
10827 (name "r-dropbead")
10828 (version (string-append "0-" revision "." (string-take commit 7)))
10829 (source
10830 (origin
10831 (method git-fetch)
10832 (uri (git-reference
10833 (url "https://github.com/rajewsky-lab/dropbead.git")
10834 (commit commit)))
10835 (file-name (git-file-name name version))
10836 (sha256
10837 (base32
10838 "0sbzma49aiiyw8b0jpr7fnhzys9nsqmp4hy4hdz1gzyg1lhnca26"))))
10839 (build-system r-build-system)
10840 (propagated-inputs
10841 `(("r-ggplot2" ,r-ggplot2)
10842 ("r-rcolorbrewer" ,r-rcolorbrewer)
10843 ("r-gridextra" ,r-gridextra)
10844 ("r-gplots" ,r-gplots)
10845 ("r-plyr" ,r-plyr)))
10846 (home-page "https://github.com/rajewsky-lab/dropbead")
10847 (synopsis "Basic exploration and analysis of Drop-seq data")
10848 (description "This package offers a quick and straight-forward way to
10849 explore and perform basic analysis of single cell sequencing data coming from
10850 droplet sequencing. It has been particularly tailored for Drop-seq.")
10851 (license license:gpl3))))
10852
10853 (define htslib-for-sambamba
10854 (let ((commit "2f3c3ea7b301f9b45737a793c0b2dcf0240e5ee5"))
10855 (package
10856 (inherit htslib)
10857 (name "htslib-for-sambamba")
10858 (version (string-append "1.3.1-1." (string-take commit 9)))
10859 (source
10860 (origin
10861 (method git-fetch)
10862 (uri (git-reference
10863 (url "https://github.com/lomereiter/htslib.git")
10864 (commit commit)))
10865 (file-name (string-append "htslib-" version "-checkout"))
10866 (sha256
10867 (base32
10868 "0g38g8s3npr0gjm9fahlbhiskyfws9l5i0x1ml3rakzj7az5l9c9"))))
10869 (native-inputs
10870 `(("autoconf" ,autoconf)
10871 ("automake" ,automake)
10872 ,@(package-native-inputs htslib))))))
10873
10874 (define-public sambamba
10875 (package
10876 (name "sambamba")
10877 (version "0.6.8")
10878 (source
10879 (origin
10880 (method git-fetch)
10881 (uri (git-reference
10882 (url "https://github.com/lomereiter/sambamba.git")
10883 (commit (string-append "v" version))))
10884 (file-name (string-append name "-" version "-checkout"))
10885 (sha256
10886 (base32
10887 "0k0cz3qcv98p6cq09zlbgnjsggxcqbcmzxg5zikgcgbr2nfq4lry"))))
10888 (build-system gnu-build-system)
10889 (arguments
10890 `(#:tests? #f ; there is no test target
10891 #:parallel-build? #f ; not supported
10892 #:phases
10893 (modify-phases %standard-phases
10894 (delete 'configure)
10895 (add-after 'unpack 'fix-ldc-version
10896 (lambda _
10897 (substitute* "gen_ldc_version_info.py"
10898 (("/usr/bin/env.*") (which "python3")))
10899 (substitute* "Makefile"
10900 ;; We use ldc2 instead of ldmd2 to compile sambamba.
10901 (("\\$\\(shell which ldmd2\\)") (which "ldc2")))
10902 #t))
10903 (add-after 'unpack 'place-biod-and-undead
10904 (lambda* (#:key inputs #:allow-other-keys)
10905 (copy-recursively (assoc-ref inputs "biod") "BioD")
10906 (copy-recursively (assoc-ref inputs "undead") "undeaD")
10907 #t))
10908 (add-after 'unpack 'unbundle-prerequisites
10909 (lambda _
10910 (substitute* "Makefile"
10911 (("htslib/libhts.a lz4/lib/liblz4.a")
10912 "-L-lhts -L-llz4")
10913 ((" lz4-static htslib-static") ""))
10914 #t))
10915 (replace 'install
10916 (lambda* (#:key outputs #:allow-other-keys)
10917 (let* ((out (assoc-ref outputs "out"))
10918 (bin (string-append out "/bin")))
10919 (mkdir-p bin)
10920 (install-file "bin/sambamba" bin)
10921 #t))))))
10922 (native-inputs
10923 `(("ldc" ,ldc)
10924 ("rdmd" ,rdmd)
10925 ("python" ,python)
10926 ("biod"
10927 ,(let ((commit "4f1a7d2fb7ef3dfe962aa357d672f354ebfbe42e"))
10928 (origin
10929 (method git-fetch)
10930 (uri (git-reference
10931 (url "https://github.com/biod/BioD.git")
10932 (commit commit)))
10933 (file-name (string-append "biod-"
10934 (string-take commit 9)
10935 "-checkout"))
10936 (sha256
10937 (base32
10938 "1k5pdjv1qvi0a3rwd1sfq6zbj37l86i7bf710m4c0y6737lxj426")))))
10939 ("undead"
10940 ,(let ((commit "9be93876982b5f14fcca60832563b3cd767dd84d"))
10941 (origin
10942 (method git-fetch)
10943 (uri (git-reference
10944 (url "https://github.com/biod/undeaD.git")
10945 (commit commit)))
10946 (file-name (string-append "undead-"
10947 (string-take commit 9)
10948 "-checkout"))
10949 (sha256
10950 (base32
10951 "1xfarj0nqlmi5jd1vmcmm7pabzaf9hxyvk6hp0d6jslb5k9r8r3d")))))))
10952 (inputs
10953 `(("lz4" ,lz4)
10954 ("htslib" ,htslib-for-sambamba)))
10955 (home-page "https://lomereiter.github.io/sambamba/")
10956 (synopsis "Tools for working with SAM/BAM data")
10957 (description "Sambamba is a high performance modern robust and
10958 fast tool (and library), written in the D programming language, for
10959 working with SAM and BAM files. Current parallelised functionality is
10960 an important subset of samtools functionality, including view, index,
10961 sort, markdup, and depth.")
10962 (license license:gpl2+)))
10963
10964 (define-public ritornello
10965 (package
10966 (name "ritornello")
10967 (version "2.0.1")
10968 (source (origin
10969 (method git-fetch)
10970 (uri (git-reference
10971 (url "https://github.com/KlugerLab/Ritornello.git")
10972 (commit (string-append "v" version))))
10973 (file-name (git-file-name name version))
10974 (sha256
10975 (base32
10976 "1xahvq215qld7x1w8vpa5zbrsj6p9crb9shqa2x89sb0aaxa02jk"))))
10977 (build-system gnu-build-system)
10978 (arguments
10979 `(#:tests? #f ; there are no tests
10980 #:phases
10981 (modify-phases %standard-phases
10982 (add-after 'unpack 'patch-samtools-references
10983 (lambda* (#:key inputs #:allow-other-keys)
10984 (substitute* '("src/SamStream.h"
10985 "src/FLD.cpp")
10986 (("<sam.h>") "<samtools/sam.h>"))
10987 #t))
10988 (delete 'configure)
10989 (replace 'install
10990 (lambda* (#:key inputs outputs #:allow-other-keys)
10991 (let* ((out (assoc-ref outputs "out"))
10992 (bin (string-append out "/bin/")))
10993 (mkdir-p bin)
10994 (install-file "bin/Ritornello" bin)
10995 #t))))))
10996 (inputs
10997 `(("samtools" ,samtools-0.1)
10998 ("fftw" ,fftw)
10999 ("boost" ,boost)
11000 ("zlib" ,zlib)))
11001 (home-page "https://github.com/KlugerLab/Ritornello")
11002 (synopsis "Control-free peak caller for ChIP-seq data")
11003 (description "Ritornello is a ChIP-seq peak calling algorithm based on
11004 signal processing that can accurately call binding events without the need to
11005 do a pair total DNA input or IgG control sample. It has been tested for use
11006 with narrow binding events such as transcription factor ChIP-seq.")
11007 (license license:gpl3+)))
11008
11009 (define-public trim-galore
11010 (package
11011 (name "trim-galore")
11012 (version "0.6.1")
11013 (source
11014 (origin
11015 (method git-fetch)
11016 (uri (git-reference
11017 (url "https://github.com/FelixKrueger/TrimGalore.git")
11018 (commit version)))
11019 (file-name (git-file-name name version))
11020 (sha256
11021 (base32
11022 "1y31wbxwkm9xqzr5zv1pk5q418whnmlmgmfyxxpnl12h83m2i9iv"))))
11023 (build-system gnu-build-system)
11024 (arguments
11025 `(#:tests? #f ; no tests
11026 #:phases
11027 (modify-phases %standard-phases
11028 (replace 'configure
11029 (lambda _
11030 ;; Trim Galore tries to figure out what version of Python
11031 ;; cutadapt is using by looking at the shebang. Of course that
11032 ;; doesn't work, because cutadapt is wrapped in a shell script.
11033 (substitute* "trim_galore"
11034 (("my \\$python_return.*")
11035 "my $python_return = \"Python 3.999\";\n"))
11036 #t))
11037 (delete 'build)
11038 (add-after 'unpack 'hardcode-tool-references
11039 (lambda* (#:key inputs #:allow-other-keys)
11040 (substitute* "trim_galore"
11041 (("\\$path_to_cutadapt = 'cutadapt'")
11042 (string-append "$path_to_cutadapt = '"
11043 (assoc-ref inputs "cutadapt")
11044 "/bin/cutadapt'"))
11045 (("\\$compression_path = \"gzip\"")
11046 (string-append "$compression_path = \""
11047 (assoc-ref inputs "gzip")
11048 "/bin/gzip\""))
11049 (("\"gunzip")
11050 (string-append "\""
11051 (assoc-ref inputs "gzip")
11052 "/bin/gunzip"))
11053 (("\"pigz")
11054 (string-append "\""
11055 (assoc-ref inputs "pigz")
11056 "/bin/pigz")))
11057 #t))
11058 (replace 'install
11059 (lambda* (#:key outputs #:allow-other-keys)
11060 (let ((bin (string-append (assoc-ref outputs "out")
11061 "/bin")))
11062 (mkdir-p bin)
11063 (install-file "trim_galore" bin)
11064 #t))))))
11065 (inputs
11066 `(("gzip" ,gzip)
11067 ("perl" ,perl)
11068 ("pigz" ,pigz)
11069 ("cutadapt" ,cutadapt)))
11070 (native-inputs
11071 `(("unzip" ,unzip)))
11072 (home-page "https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/")
11073 (synopsis "Wrapper around Cutadapt and FastQC")
11074 (description "Trim Galore! is a wrapper script to automate quality and
11075 adapter trimming as well as quality control, with some added functionality to
11076 remove biased methylation positions for RRBS sequence files.")
11077 (license license:gpl3+)))
11078
11079 (define-public gess
11080 (package
11081 (name "gess")
11082 (version "1.0")
11083 (source (origin
11084 (method url-fetch)
11085 (uri (string-append "http://compbio.uthscsa.edu/"
11086 "GESS_Web/files/"
11087 "gess-" version ".src.tar.gz"))
11088 (sha256
11089 (base32
11090 "0hyk403kxscclzfs24pvdgiv0wm03kjcziqdrp5w46cb049gz0d7"))))
11091 (build-system gnu-build-system)
11092 (arguments
11093 `(#:tests? #f ; no tests
11094 #:phases
11095 (modify-phases %standard-phases
11096 (delete 'configure)
11097 (delete 'build)
11098 (replace 'install
11099 (lambda* (#:key inputs outputs #:allow-other-keys)
11100 (let* ((python (assoc-ref inputs "python"))
11101 (out (assoc-ref outputs "out"))
11102 (bin (string-append out "/bin/"))
11103 (target (string-append
11104 out "/lib/python"
11105 ,(version-major+minor
11106 (package-version python))
11107 "/site-packages/gess/")))
11108 (mkdir-p target)
11109 (copy-recursively "." target)
11110 ;; Make GESS.py executable
11111 (chmod (string-append target "GESS.py") #o555)
11112 ;; Add Python shebang to the top and make Matplotlib
11113 ;; usable.
11114 (substitute* (string-append target "GESS.py")
11115 (("\"\"\"Description:" line)
11116 (string-append "#!" (which "python") "
11117 import matplotlib
11118 matplotlib.use('Agg')
11119 " line)))
11120 ;; Make sure GESS has all modules in its path
11121 (wrap-script (string-append target "GESS.py")
11122 `("PYTHONPATH" ":" = (,target ,(getenv "PYTHONPATH"))))
11123 (mkdir-p bin)
11124 (symlink (string-append target "GESS.py")
11125 (string-append bin "GESS.py"))
11126 #t))))))
11127 (inputs
11128 `(("python" ,python-2)
11129 ("python2-pysam" ,python2-pysam)
11130 ("python2-scipy" ,python2-scipy)
11131 ("python2-numpy" ,python2-numpy)
11132 ("python2-networkx" ,python2-networkx)
11133 ("python2-biopython" ,python2-biopython)
11134 ("guile" ,guile-3.0))) ; for the script wrapper
11135 (home-page "https://compbio.uthscsa.edu/GESS_Web/")
11136 (synopsis "Detect exon-skipping events from raw RNA-seq data")
11137 (description
11138 "GESS is an implementation of a novel computational method to detect de
11139 novo exon-skipping events directly from raw RNA-seq data without the prior
11140 knowledge of gene annotation information. GESS stands for the graph-based
11141 exon-skipping scanner detection scheme.")
11142 (license license:bsd-3)))
11143
11144 (define-public phylip
11145 (package
11146 (name "phylip")
11147 (version "3.696")
11148 (source
11149 (origin
11150 (method url-fetch)
11151 (uri (string-append "http://evolution.gs.washington.edu/phylip/"
11152 "download/phylip-" version ".tar.gz"))
11153 (sha256
11154 (base32
11155 "01jar1rayhr2gba2pgbw49m56rc5z4p5wn3ds0m188hrlln4a2nd"))))
11156 (build-system gnu-build-system)
11157 (arguments
11158 `(#:tests? #f ; no check target
11159 #:make-flags (list "-f" "Makefile.unx" "install")
11160 #:parallel-build? #f ; not supported
11161 #:phases
11162 (modify-phases %standard-phases
11163 (add-after 'unpack 'enter-dir
11164 (lambda _ (chdir "src") #t))
11165 (delete 'configure)
11166 (replace 'install
11167 (lambda* (#:key inputs outputs #:allow-other-keys)
11168 (let ((target (string-append (assoc-ref outputs "out")
11169 "/bin")))
11170 (mkdir-p target)
11171 (for-each (lambda (file)
11172 (install-file file target))
11173 (find-files "../exe" ".*")))
11174 #t)))))
11175 (home-page "http://evolution.genetics.washington.edu/phylip/")
11176 (synopsis "Tools for inferring phylogenies")
11177 (description "PHYLIP (the PHYLogeny Inference Package) is a package of
11178 programs for inferring phylogenies (evolutionary trees).")
11179 (license license:bsd-2)))
11180
11181 (define-public imp
11182 (package
11183 (name "imp")
11184 (version "2.6.2")
11185 (source
11186 (origin
11187 (method url-fetch)
11188 (uri (string-append "https://integrativemodeling.org/"
11189 version "/download/imp-" version ".tar.gz"))
11190 (sha256
11191 (base32
11192 "0lxqx7vh79d771svr611dkilp6sn30qrbw8zvscbrm37v38d2j6h"))))
11193 (build-system cmake-build-system)
11194 (arguments
11195 `(;; FIXME: Some tests fail because they produce warnings, others fail
11196 ;; because the PYTHONPATH does not include the modeller's directory.
11197 #:tests? #f))
11198 (inputs
11199 `(("boost" ,boost)
11200 ("gsl" ,gsl)
11201 ("swig" ,swig)
11202 ("hdf5" ,hdf5)
11203 ("fftw" ,fftw)
11204 ("python" ,python-2)))
11205 (propagated-inputs
11206 `(("python2-numpy" ,python2-numpy)
11207 ("python2-scipy" ,python2-scipy)
11208 ("python2-pandas" ,python2-pandas)
11209 ("python2-scikit-learn" ,python2-scikit-learn)
11210 ("python2-networkx" ,python2-networkx)))
11211 (home-page "https://integrativemodeling.org")
11212 (synopsis "Integrative modeling platform")
11213 (description "IMP's broad goal is to contribute to a comprehensive
11214 structural characterization of biomolecules ranging in size and complexity
11215 from small peptides to large macromolecular assemblies, by integrating data
11216 from diverse biochemical and biophysical experiments. IMP provides a C++ and
11217 Python toolbox for solving complex modeling problems, and a number of
11218 applications for tackling some common problems in a user-friendly way.")
11219 ;; IMP is largely available under the GNU Lesser GPL; see the file
11220 ;; COPYING.LGPL for the full text of this license. Some IMP modules are
11221 ;; available under the GNU GPL (see the file COPYING.GPL).
11222 (license (list license:lgpl2.1+
11223 license:gpl3+))))
11224
11225 (define-public tadbit
11226 (package
11227 (name "tadbit")
11228 (version "0.2.0")
11229 (source (origin
11230 (method git-fetch)
11231 (uri (git-reference
11232 (url "https://github.com/3DGenomes/TADbit.git")
11233 (commit (string-append "v" version))))
11234 (file-name (git-file-name name version))
11235 (sha256
11236 (base32
11237 "07g3aj648prmsvxp9caz5yl41k0y0647vxh0f5p3w8376mfiljd0"))))
11238 (build-system python-build-system)
11239 (arguments
11240 `(;; Tests are included and must be run after installation, but
11241 ;; they are incomplete and thus cannot be run.
11242 #:tests? #f
11243 #:python ,python-2
11244 #:phases
11245 (modify-phases %standard-phases
11246 (add-after 'unpack 'fix-problems-with-setup.py
11247 (lambda* (#:key outputs #:allow-other-keys)
11248 ;; setup.py opens these files for writing
11249 (chmod "_pytadbit/_version.py" #o664)
11250 (chmod "README.rst" #o664)
11251
11252 ;; Don't attempt to install the bash completions to
11253 ;; the home directory.
11254 (rename-file "extras/.bash_completion"
11255 "extras/tadbit")
11256 (substitute* "setup.py"
11257 (("\\(path.expanduser\\('~'\\)")
11258 (string-append "(\""
11259 (assoc-ref outputs "out")
11260 "/etc/bash_completion.d\""))
11261 (("extras/\\.bash_completion")
11262 "extras/tadbit"))
11263 #t)))))
11264 (inputs
11265 ;; TODO: add Chimera for visualization
11266 `(("imp" ,imp)
11267 ("mcl" ,mcl)
11268 ("python2-scipy" ,python2-scipy)
11269 ("python2-numpy" ,python2-numpy)
11270 ("python2-matplotlib" ,python2-matplotlib)
11271 ("python2-pysam" ,python2-pysam)))
11272 (home-page "https://3dgenomes.github.io/TADbit/")
11273 (synopsis "Analyze, model, and explore 3C-based data")
11274 (description
11275 "TADbit is a complete Python library to deal with all steps to analyze,
11276 model, and explore 3C-based data. With TADbit the user can map FASTQ files to
11277 obtain raw interaction binned matrices (Hi-C like matrices), normalize and
11278 correct interaction matrices, identify and compare the so-called
11279 @dfn{Topologically Associating Domains} (TADs), build 3D models from the
11280 interaction matrices, and finally, extract structural properties from the
11281 models. TADbit is complemented by TADkit for visualizing 3D models.")
11282 (license license:gpl3+)))
11283
11284 (define-public kentutils
11285 (package
11286 (name "kentutils")
11287 ;; 302.1.0 is out, but the only difference is the inclusion of
11288 ;; pre-built binaries.
11289 (version "302.0.0")
11290 (source
11291 (origin
11292 (method git-fetch)
11293 (uri (git-reference
11294 (url "https://github.com/ENCODE-DCC/kentUtils.git")
11295 (commit (string-append "v" version))))
11296 (file-name (git-file-name name version))
11297 (sha256
11298 (base32
11299 "0n1wbyjpzii2b9qhyp9r1q76j623cggpg3y8fmw78ld3z4y7ivha"))
11300 (modules '((guix build utils)
11301 (srfi srfi-26)
11302 (ice-9 ftw)))
11303 (snippet
11304 '(begin
11305 ;; Only the contents of the specified directories are free
11306 ;; for all uses, so we remove the rest. "hg/autoSql" and
11307 ;; "hg/autoXml" are nominally free, but they depend on a
11308 ;; library that is built from the sources in "hg/lib",
11309 ;; which is nonfree.
11310 (let ((free (list "." ".."
11311 "utils" "lib" "inc" "tagStorm"
11312 "parasol" "htslib"))
11313 (directory? (lambda (file)
11314 (eq? 'directory (stat:type (stat file))))))
11315 (for-each (lambda (file)
11316 (and (directory? file)
11317 (delete-file-recursively file)))
11318 (map (cut string-append "src/" <>)
11319 (scandir "src"
11320 (lambda (file)
11321 (not (member file free)))))))
11322 ;; Only make the utils target, not the userApps target,
11323 ;; because that requires libraries we won't build.
11324 (substitute* "Makefile"
11325 ((" userApps") " utils"))
11326 ;; Only build libraries that are free.
11327 (substitute* "src/makefile"
11328 (("DIRS =.*") "DIRS =\n")
11329 (("cd jkOwnLib.*") "")
11330 ((" hgLib") "")
11331 (("cd hg.*") ""))
11332 (substitute* "src/utils/makefile"
11333 ;; These tools depend on "jkhgap.a", which is part of the
11334 ;; nonfree "src/hg/lib" directory.
11335 (("raSqlQuery") "")
11336 (("pslLiftSubrangeBlat") "")
11337
11338 ;; Do not build UCSC tools, which may require nonfree
11339 ;; components.
11340 (("ALL_APPS =.*") "ALL_APPS = $(UTILS_APPLIST)\n"))
11341 #t))))
11342 (build-system gnu-build-system)
11343 (arguments
11344 `( ;; There is no global test target and the test target for
11345 ;; individual tools depends on input files that are not
11346 ;; included.
11347 #:tests? #f
11348 #:phases
11349 (modify-phases %standard-phases
11350 (add-after 'unpack 'fix-permissions
11351 (lambda _ (make-file-writable "src/inc/localEnvironment.mk") #t))
11352 (add-after 'unpack 'fix-paths
11353 (lambda _
11354 (substitute* "Makefile"
11355 (("/bin/echo") (which "echo")))
11356 #t))
11357 (add-after 'unpack 'prepare-samtabix
11358 (lambda* (#:key inputs #:allow-other-keys)
11359 (copy-recursively (assoc-ref inputs "samtabix")
11360 "samtabix")
11361 #t))
11362 (delete 'configure)
11363 (replace 'install
11364 (lambda* (#:key outputs #:allow-other-keys)
11365 (let ((bin (string-append (assoc-ref outputs "out")
11366 "/bin")))
11367 (copy-recursively "bin" bin))
11368 #t)))))
11369 (native-inputs
11370 `(("samtabix"
11371 ,(origin
11372 (method git-fetch)
11373 (uri (git-reference
11374 (url "http://genome-source.cse.ucsc.edu/samtabix.git")
11375 (commit "10fd107909c1ac4d679299908be4262a012965ba")))
11376 (sha256
11377 (base32
11378 "0c1nj64l42v395sa84n7az43xiap4i6f9n9dfz4058aqiwkhkmma"))))))
11379 (inputs
11380 `(("zlib" ,zlib)
11381 ("tcsh" ,tcsh)
11382 ("perl" ,perl)
11383 ("libpng" ,libpng)
11384 ("mariadb" ,mariadb "lib")
11385 ("mariadb-dev" ,mariadb "dev")
11386 ("openssl" ,openssl-1.0)))
11387 (home-page "https://genome.cse.ucsc.edu/index.html")
11388 (synopsis "Assorted bioinformatics utilities")
11389 (description "This package provides the kentUtils, a selection of
11390 bioinformatics utilities used in combination with the UCSC genome
11391 browser.")
11392 ;; Only a subset of the sources are released under a non-copyleft
11393 ;; free software license. All other sources are removed in a
11394 ;; snippet. See this bug report for an explanation of how the
11395 ;; license statements apply:
11396 ;; https://github.com/ENCODE-DCC/kentUtils/issues/12
11397 (license (license:non-copyleft
11398 "http://genome.ucsc.edu/license/"
11399 "The contents of this package are free for all uses."))))
11400
11401 (define-public f-seq
11402 (let ((commit "6ccded34cff38cf432deed8503648b4a66953f9b")
11403 (revision "1"))
11404 (package
11405 (name "f-seq")
11406 (version (string-append "1.1-" revision "." (string-take commit 7)))
11407 (source (origin
11408 (method git-fetch)
11409 (uri (git-reference
11410 (url "https://github.com/aboyle/F-seq.git")
11411 (commit commit)))
11412 (file-name (string-append name "-" version))
11413 (sha256
11414 (base32
11415 "1nk33k0yajg2id4g59bc4szr58r2q6pdq42vgcw054m8ip9wv26h"))
11416 (modules '((guix build utils)))
11417 ;; Remove bundled Java library archives.
11418 (snippet
11419 '(begin
11420 (for-each delete-file (find-files "lib" ".*"))
11421 #t))))
11422 (build-system ant-build-system)
11423 (arguments
11424 `(#:tests? #f ; no tests included
11425 #:phases
11426 (modify-phases %standard-phases
11427 (replace 'install
11428 (lambda* (#:key inputs outputs #:allow-other-keys)
11429 (let* ((target (assoc-ref outputs "out"))
11430 (bin (string-append target "/bin"))
11431 (doc (string-append target "/share/doc/f-seq"))
11432 (lib (string-append target "/lib")))
11433 (mkdir-p target)
11434 (mkdir-p doc)
11435 (substitute* "bin/linux/fseq"
11436 (("java") (which "java"))
11437 (("\\$REALDIR/../lib/commons-cli-1.1.jar")
11438 (string-append (assoc-ref inputs "java-commons-cli")
11439 "/share/java/commons-cli.jar"))
11440 (("REALDIR=.*")
11441 (string-append "REALDIR=" bin "\n")))
11442 (install-file "README.txt" doc)
11443 (install-file "bin/linux/fseq" bin)
11444 (install-file "build~/fseq.jar" lib)
11445 (copy-recursively "lib" lib)
11446 #t))))))
11447 (inputs
11448 `(("perl" ,perl)
11449 ("java-commons-cli" ,java-commons-cli)))
11450 (home-page "http://fureylab.web.unc.edu/software/fseq/")
11451 (synopsis "Feature density estimator for high-throughput sequence tags")
11452 (description
11453 "F-Seq is a software package that generates a continuous tag sequence
11454 density estimation allowing identification of biologically meaningful sites
11455 such as transcription factor binding sites (ChIP-seq) or regions of open
11456 chromatin (DNase-seq). Output can be displayed directly in the UCSC Genome
11457 Browser.")
11458 (license license:gpl3+))))
11459
11460 (define-public bismark
11461 (package
11462 (name "bismark")
11463 (version "0.20.1")
11464 (source
11465 (origin
11466 (method git-fetch)
11467 (uri (git-reference
11468 (url "https://github.com/FelixKrueger/Bismark.git")
11469 (commit version)))
11470 (file-name (string-append name "-" version "-checkout"))
11471 (sha256
11472 (base32
11473 "0xchm3rgilj6vfjnyzfzzymfd7djr64sbrmrvs3njbwi66jqbzw9"))))
11474 (build-system perl-build-system)
11475 (arguments
11476 `(#:tests? #f ; there are no tests
11477 #:modules ((guix build utils)
11478 (ice-9 popen)
11479 (srfi srfi-26)
11480 (guix build perl-build-system))
11481 #:phases
11482 (modify-phases %standard-phases
11483 ;; The bundled plotly.js is minified.
11484 (add-after 'unpack 'replace-plotly.js
11485 (lambda* (#:key inputs #:allow-other-keys)
11486 (let* ((file (assoc-ref inputs "plotly.js"))
11487 (installed "plotly/plotly.js"))
11488 (let ((minified (open-pipe* OPEN_READ "uglify-js" file)))
11489 (call-with-output-file installed
11490 (cut dump-port minified <>))))
11491 #t))
11492 (delete 'configure)
11493 (delete 'build)
11494 (replace 'install
11495 (lambda* (#:key inputs outputs #:allow-other-keys)
11496 (let* ((out (assoc-ref outputs "out"))
11497 (bin (string-append out "/bin"))
11498 (share (string-append out "/share/bismark"))
11499 (docdir (string-append out "/share/doc/bismark"))
11500 (docs '("Docs/Bismark_User_Guide.html"))
11501 (scripts '("bismark"
11502 "bismark_genome_preparation"
11503 "bismark_methylation_extractor"
11504 "bismark2bedGraph"
11505 "bismark2report"
11506 "coverage2cytosine"
11507 "deduplicate_bismark"
11508 "filter_non_conversion"
11509 "bam2nuc"
11510 "bismark2summary"
11511 "NOMe_filtering")))
11512 (substitute* "bismark2report"
11513 (("\\$RealBin/plotly")
11514 (string-append share "/plotly")))
11515 (mkdir-p share)
11516 (mkdir-p docdir)
11517 (mkdir-p bin)
11518 (for-each (lambda (file) (install-file file bin))
11519 scripts)
11520 (for-each (lambda (file) (install-file file docdir))
11521 docs)
11522 (copy-recursively "Docs/Images" (string-append docdir "/Images"))
11523 (copy-recursively "plotly"
11524 (string-append share "/plotly"))
11525
11526 ;; Fix references to gunzip
11527 (substitute* (map (lambda (file)
11528 (string-append bin "/" file))
11529 scripts)
11530 (("\"gunzip -c")
11531 (string-append "\"" (assoc-ref inputs "gzip")
11532 "/bin/gunzip -c")))
11533 #t))))))
11534 (inputs
11535 `(("gzip" ,gzip)
11536 ("perl-carp" ,perl-carp)
11537 ("perl-getopt-long" ,perl-getopt-long)))
11538 (native-inputs
11539 `(("plotly.js"
11540 ,(origin
11541 (method url-fetch)
11542 (uri (string-append "https://raw.githubusercontent.com/plotly/plotly.js/"
11543 "v1.39.4/dist/plotly.js"))
11544 (sha256
11545 (base32 "138mwsr4nf5qif4mrxx286mpnagxd1xwl6k8aidrjgknaqg88zyr"))))
11546 ("uglify-js" ,uglify-js)))
11547 (home-page "https://www.bioinformatics.babraham.ac.uk/projects/bismark/")
11548 (synopsis "Map bisulfite treated sequence reads and analyze methylation")
11549 (description "Bismark is a program to map bisulfite treated sequencing
11550 reads to a genome of interest and perform methylation calls in a single step.
11551 The output can be easily imported into a genome viewer, such as SeqMonk, and
11552 enables a researcher to analyse the methylation levels of their samples
11553 straight away. Its main features are:
11554
11555 @itemize
11556 @item Bisulfite mapping and methylation calling in one single step
11557 @item Supports single-end and paired-end read alignments
11558 @item Supports ungapped and gapped alignments
11559 @item Alignment seed length, number of mismatches etc are adjustable
11560 @item Output discriminates between cytosine methylation in CpG, CHG
11561 and CHH context
11562 @end itemize\n")
11563 (license license:gpl3+)))
11564
11565 (define-public paml
11566 (package
11567 (name "paml")
11568 (version "4.9e")
11569 (source (origin
11570 (method url-fetch)
11571 (uri (string-append "http://abacus.gene.ucl.ac.uk/software/"
11572 "paml" version ".tgz"))
11573 (sha256
11574 (base32
11575 "13zf6h9fiqghwhch2h06x1zdr6s42plsnqahflp5g7myr3han3s6"))
11576 (modules '((guix build utils)))
11577 ;; Remove Windows binaries
11578 (snippet
11579 '(begin
11580 (for-each delete-file (find-files "." "\\.exe$"))
11581 #t))))
11582 (build-system gnu-build-system)
11583 (arguments
11584 `(#:tests? #f ; there are no tests
11585 #:make-flags '("CC=gcc")
11586 #:phases
11587 (modify-phases %standard-phases
11588 (replace 'configure
11589 (lambda _
11590 (substitute* "src/BFdriver.c"
11591 (("/bin/bash") (which "bash")))
11592 (chdir "src")
11593 #t))
11594 (replace 'install
11595 (lambda* (#:key outputs #:allow-other-keys)
11596 (let ((tools '("baseml" "basemlg" "codeml"
11597 "pamp" "evolver" "yn00" "chi2"))
11598 (bin (string-append (assoc-ref outputs "out") "/bin"))
11599 (docdir (string-append (assoc-ref outputs "out")
11600 "/share/doc/paml")))
11601 (mkdir-p bin)
11602 (for-each (lambda (file) (install-file file bin)) tools)
11603 (copy-recursively "../doc" docdir)
11604 #t))))))
11605 (home-page "http://abacus.gene.ucl.ac.uk/software/paml.html")
11606 (synopsis "Phylogentic analysis by maximum likelihood")
11607 (description "PAML (for Phylogentic Analysis by Maximum Likelihood)
11608 contains a few programs for model fitting and phylogenetic tree reconstruction
11609 using nucleotide or amino-acid sequence data.")
11610 ;; GPLv3 only
11611 (license license:gpl3)))
11612
11613 (define-public kallisto
11614 (package
11615 (name "kallisto")
11616 (version "0.44.0")
11617 (source (origin
11618 (method git-fetch)
11619 (uri (git-reference
11620 (url "https://github.com/pachterlab/kallisto.git")
11621 (commit (string-append "v" version))))
11622 (file-name (git-file-name name version))
11623 (sha256
11624 (base32
11625 "0nj382jiywqnpgvyhichajpkkh5r0bapn43f4dx40zdaq5v4m40m"))))
11626 (build-system cmake-build-system)
11627 (arguments
11628 `(#:tests? #f ; no "check" target
11629 #:phases
11630 (modify-phases %standard-phases
11631 (add-after 'unpack 'do-not-use-bundled-htslib
11632 (lambda _
11633 (substitute* "CMakeLists.txt"
11634 (("^ExternalProject_Add" m)
11635 (string-append "if (NEVER)\n" m))
11636 (("^\\)")
11637 (string-append ")\nendif(NEVER)"))
11638 (("include_directories\\(\\$\\{htslib_PREFIX.*" m)
11639 (string-append "# " m)))
11640 (substitute* "src/CMakeLists.txt"
11641 (("target_link_libraries\\(kallisto kallisto_core pthread \
11642 \\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/../ext/htslib/libhts.a\\)")
11643 "target_link_libraries(kallisto kallisto_core pthread hts)")
11644 (("include_directories\\(\\.\\./ext/htslib\\)") ""))
11645 #t)))))
11646 (inputs
11647 `(("hdf5" ,hdf5)
11648 ("htslib" ,htslib)
11649 ("zlib" ,zlib)))
11650 (home-page "https://pachterlab.github.io/kallisto/")
11651 (synopsis "Near-optimal RNA-Seq quantification")
11652 (description
11653 "Kallisto is a program for quantifying abundances of transcripts from
11654 RNA-Seq data, or more generally of target sequences using high-throughput
11655 sequencing reads. It is based on the novel idea of pseudoalignment for
11656 rapidly determining the compatibility of reads with targets, without the need
11657 for alignment. Pseudoalignment of reads preserves the key information needed
11658 for quantification, and kallisto is therefore not only fast, but also as
11659 accurate as existing quantification tools.")
11660 (license license:bsd-2)))
11661
11662 (define-public libgff
11663 (package
11664 (name "libgff")
11665 (version "1.0")
11666 (source (origin
11667 (method git-fetch)
11668 (uri (git-reference
11669 (url "https://github.com/Kingsford-Group/libgff.git")
11670 (commit (string-append "v" version))))
11671 (file-name (git-file-name name version))
11672 (sha256
11673 (base32
11674 "0n6vfjnq7a2mianipscbshrvbncss8z4zkgkbjw754p9043nfkps"))))
11675 (build-system cmake-build-system)
11676 (arguments `(#:tests? #f)) ; no tests included
11677 (home-page "https://github.com/Kingsford-Group/libgff")
11678 (synopsis "Parser library for reading/writing GFF files")
11679 (description "This is a simple \"libraryfication\" of the GFF/GTF parsing
11680 code that is used in the Cufflinks codebase. The goal of this library is to
11681 provide this functionality without the necessity of drawing in a heavy-weight
11682 dependency like SeqAn.")
11683 (license (license:x11-style "https://www.boost.org/LICENSE_1_0.txt"))))
11684
11685 (define-public sailfish
11686 (package
11687 (name "sailfish")
11688 (version "0.10.1")
11689 (source (origin
11690 (method git-fetch)
11691 (uri (git-reference
11692 (url "https://github.com/kingsfordgroup/sailfish.git")
11693 (commit (string-append "v" version))))
11694 (file-name (git-file-name name version))
11695 (sha256
11696 (base32
11697 "1amcc5hqvsl42hg4x19bi9vy47cl874s0lw1fmi0hwsdk9i8c03v"))
11698 (modules '((guix build utils)))
11699 (snippet
11700 '(begin
11701 ;; Delete bundled headers for eigen3.
11702 (delete-file-recursively "include/eigen3/")
11703 #t))))
11704 (build-system cmake-build-system)
11705 (arguments
11706 `(#:configure-flags
11707 (list (string-append "-DBOOST_INCLUDEDIR="
11708 (assoc-ref %build-inputs "boost")
11709 "/include/")
11710 (string-append "-DBOOST_LIBRARYDIR="
11711 (assoc-ref %build-inputs "boost")
11712 "/lib/")
11713 (string-append "-DBoost_LIBRARIES="
11714 "-lboost_iostreams "
11715 "-lboost_filesystem "
11716 "-lboost_system "
11717 "-lboost_thread "
11718 "-lboost_timer "
11719 "-lboost_chrono "
11720 "-lboost_program_options")
11721 "-DBoost_FOUND=TRUE"
11722 ;; Don't download RapMap---we already have it!
11723 "-DFETCHED_RAPMAP=1")
11724 ;; Tests must be run after installation and the location of the test
11725 ;; data file must be overridden. But the tests fail. It looks like
11726 ;; they are not really meant to be run.
11727 #:tests? #f
11728 #:phases
11729 (modify-phases %standard-phases
11730 ;; Boost cannot be found, even though it's right there.
11731 (add-after 'unpack 'do-not-look-for-boost
11732 (lambda* (#:key inputs #:allow-other-keys)
11733 (substitute* "CMakeLists.txt"
11734 (("find_package\\(Boost 1\\.53\\.0") "#"))
11735 #t))
11736 (add-after 'unpack 'do-not-assign-to-macro
11737 (lambda _
11738 (substitute* "include/spdlog/details/format.cc"
11739 (("const unsigned CHAR_WIDTH = 1;") ""))
11740 #t))
11741 (add-after 'unpack 'prepare-rapmap
11742 (lambda* (#:key inputs #:allow-other-keys)
11743 (let ((src "external/install/src/rapmap/")
11744 (include "external/install/include/rapmap/")
11745 (rapmap (assoc-ref inputs "rapmap")))
11746 (mkdir-p "/tmp/rapmap")
11747 (invoke "tar" "xf"
11748 (assoc-ref inputs "rapmap")
11749 "-C" "/tmp/rapmap"
11750 "--strip-components=1")
11751 (mkdir-p src)
11752 (mkdir-p include)
11753 (for-each (lambda (file)
11754 (install-file file src))
11755 (find-files "/tmp/rapmap/src" "\\.(c|cpp)"))
11756 (copy-recursively "/tmp/rapmap/include" include))
11757 #t))
11758 (add-after 'unpack 'use-system-libraries
11759 (lambda* (#:key inputs #:allow-other-keys)
11760 (substitute* '("src/SailfishIndexer.cpp"
11761 "src/SailfishUtils.cpp"
11762 "src/SailfishQuantify.cpp"
11763 "src/FASTAParser.cpp"
11764 "include/PCA.hpp"
11765 "include/SailfishUtils.hpp"
11766 "include/SailfishIndex.hpp"
11767 "include/CollapsedEMOptimizer.hpp"
11768 "src/CollapsedEMOptimizer.cpp")
11769 (("#include \"jellyfish/config.h\"") ""))
11770 (substitute* "src/CMakeLists.txt"
11771 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/include/jellyfish-2.2..")
11772 (string-append (assoc-ref inputs "jellyfish")
11773 "/include/jellyfish-" ,(package-version jellyfish)))
11774 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libjellyfish-2.0.a")
11775 (string-append (assoc-ref inputs "jellyfish")
11776 "/lib/libjellyfish-2.0.a"))
11777 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort.a")
11778 (string-append (assoc-ref inputs "libdivsufsort")
11779 "/lib/libdivsufsort.so"))
11780 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort64.a")
11781 (string-append (assoc-ref inputs "libdivsufsort")
11782 "/lib/libdivsufsort64.so")))
11783 (substitute* "CMakeLists.txt"
11784 ;; Don't prefer static libs
11785 (("SET\\(CMAKE_FIND_LIBRARY_SUFFIXES.*") "")
11786 (("find_package\\(Jellyfish.*") "")
11787 (("ExternalProject_Add\\(libjellyfish") "message(")
11788 (("ExternalProject_Add\\(libgff") "message(")
11789 (("ExternalProject_Add\\(libsparsehash") "message(")
11790 (("ExternalProject_Add\\(libdivsufsort") "message("))
11791
11792 ;; Ensure that Eigen headers can be found
11793 (setenv "CPLUS_INCLUDE_PATH"
11794 (string-append (assoc-ref inputs "eigen")
11795 "/include/eigen3"))
11796 #t)))))
11797 (inputs
11798 `(("boost" ,boost)
11799 ("eigen" ,eigen)
11800 ("jemalloc" ,jemalloc)
11801 ("jellyfish" ,jellyfish)
11802 ("sparsehash" ,sparsehash)
11803 ("rapmap" ,(origin
11804 (method git-fetch)
11805 (uri (git-reference
11806 (url "https://github.com/COMBINE-lab/RapMap.git")
11807 (commit (string-append "sf-v" version))))
11808 (file-name (string-append "rapmap-sf-v" version "-checkout"))
11809 (sha256
11810 (base32
11811 "1hv79l5i576ykv5a1srj2p0q36yvyl5966m0fcy2lbi169ipjakf"))
11812 (modules '((guix build utils)))
11813 ;; These files are expected to be excluded.
11814 (snippet
11815 '(begin (delete-file-recursively "include/spdlog")
11816 (for-each delete-file '("include/xxhash.h"
11817 "src/xxhash.c"))
11818 #t))))
11819 ("libdivsufsort" ,libdivsufsort)
11820 ("libgff" ,libgff)
11821 ("tbb" ,tbb)
11822 ("zlib" ,zlib)))
11823 (native-inputs
11824 `(("pkg-config" ,pkg-config)))
11825 (home-page "https://www.cs.cmu.edu/~ckingsf/software/sailfish/")
11826 (synopsis "Mapping-based isoform quantification from RNA-Seq reads")
11827 (description "Sailfish is a tool for genomic transcript quantification
11828 from RNA-seq data. It requires a set of target transcripts (either from a
11829 reference or de-novo assembly) to quantify. All you need to run sailfish is a
11830 fasta file containing your reference transcripts and a (set of) fasta/fastq
11831 file(s) containing your reads.")
11832 (license license:gpl3+)))
11833
11834 (define libstadenio-for-salmon
11835 (package
11836 (name "libstadenio")
11837 (version "1.14.8")
11838 (source (origin
11839 (method git-fetch)
11840 (uri (git-reference
11841 (url "https://github.com/COMBINE-lab/staden-io_lib.git")
11842 (commit (string-append "v" version))))
11843 (file-name (string-append name "-" version "-checkout"))
11844 (sha256
11845 (base32
11846 "1x8kxxqxl892vwfbprlbyfwkkv7c34ggkc94892x9x0g37x5nbwx"))))
11847 (build-system gnu-build-system)
11848 (arguments '(#:parallel-tests? #f)) ; not supported
11849 (inputs
11850 `(("zlib" ,zlib)))
11851 (native-inputs
11852 `(("perl" ,perl))) ; for tests
11853 (home-page "https://github.com/COMBINE-lab/staden-io_lib")
11854 (synopsis "General purpose trace and experiment file library")
11855 (description "This package provides a library of file reading and writing
11856 code to provide a general purpose Trace file (and Experiment File) reading
11857 interface.
11858
11859 The following file formats are supported:
11860
11861 @enumerate
11862 @item SCF trace files
11863 @item ABI trace files
11864 @item ALF trace files
11865 @item ZTR trace files
11866 @item SFF trace archives
11867 @item SRF trace archives
11868 @item Experiment files
11869 @item Plain text files
11870 @item SAM/BAM sequence files
11871 @item CRAM sequence files
11872 @end enumerate\n")
11873 (license license:bsd-3)))
11874
11875 (define-public salmon
11876 (package
11877 (name "salmon")
11878 (version "0.13.1")
11879 (source (origin
11880 (method git-fetch)
11881 (uri (git-reference
11882 (url "https://github.com/COMBINE-lab/salmon.git")
11883 (commit (string-append "v" version))))
11884 (file-name (git-file-name name version))
11885 (sha256
11886 (base32
11887 "1i2z4aivicmiixdz9bxalp7vmfzi3k92fxa63iqa8kgvfw5a4aq5"))
11888 (modules '((guix build utils)))
11889 (snippet
11890 '(begin
11891 ;; Delete bundled headers for eigen3.
11892 (delete-file-recursively "include/eigen3/")
11893 #t))))
11894 (build-system cmake-build-system)
11895 (arguments
11896 `(#:configure-flags
11897 (list (string-append "-DBOOST_INCLUDEDIR="
11898 (assoc-ref %build-inputs "boost")
11899 "/include/")
11900 (string-append "-DBOOST_LIBRARYDIR="
11901 (assoc-ref %build-inputs "boost")
11902 "/lib/")
11903 (string-append "-DBoost_LIBRARIES="
11904 "-lboost_iostreams "
11905 "-lboost_filesystem "
11906 "-lboost_system "
11907 "-lboost_thread "
11908 "-lboost_timer "
11909 "-lboost_chrono "
11910 "-lboost_program_options")
11911 "-DBoost_FOUND=TRUE"
11912 "-DTBB_LIBRARIES=tbb tbbmalloc"
11913 ;; Don't download RapMap---we already have it!
11914 "-DFETCHED_RAPMAP=1")
11915 #:phases
11916 (modify-phases %standard-phases
11917 ;; Boost cannot be found, even though it's right there.
11918 (add-after 'unpack 'do-not-look-for-boost
11919 (lambda* (#:key inputs #:allow-other-keys)
11920 (substitute* "CMakeLists.txt"
11921 (("find_package\\(Boost 1\\.59\\.0") "#"))
11922 #t))
11923 (add-after 'unpack 'do-not-phone-home
11924 (lambda _
11925 (substitute* "src/Salmon.cpp"
11926 (("getVersionMessage\\(\\)") "\"\""))
11927 #t))
11928 (add-after 'unpack 'prepare-rapmap
11929 (lambda* (#:key inputs #:allow-other-keys)
11930 (let ((src "external/install/src/rapmap/")
11931 (include "external/install/include/rapmap/")
11932 (rapmap (assoc-ref inputs "rapmap")))
11933 (mkdir-p src)
11934 (mkdir-p include)
11935 (copy-recursively (string-append rapmap "/src") src)
11936 (copy-recursively (string-append rapmap "/include") include)
11937 (for-each delete-file '("external/install/include/rapmap/xxhash.h"
11938 "external/install/include/rapmap/FastxParser.hpp"
11939 "external/install/include/rapmap/concurrentqueue.h"
11940 "external/install/include/rapmap/FastxParserThreadUtils.hpp"
11941 "external/install/src/rapmap/FastxParser.cpp"
11942 "external/install/src/rapmap/xxhash.c"))
11943 (delete-file-recursively "external/install/include/rapmap/spdlog"))
11944 #t))
11945 (add-after 'unpack 'use-system-libraries
11946 (lambda* (#:key inputs #:allow-other-keys)
11947 (substitute* "CMakeLists.txt"
11948 ;; Don't prefer static libs
11949 (("SET\\(CMAKE_FIND_LIBRARY_SUFFIXES.*") "")
11950 (("set\\(TBB_LIBRARIES") "message(")
11951 ;; Don't download anything
11952 (("DOWNLOAD_COMMAND") "DOWNLOAD_COMMAND echo")
11953 (("externalproject_add\\(libcereal") "message(")
11954 (("externalproject_add\\(libgff") "message(")
11955 (("externalproject_add\\(libtbb") "message(")
11956 (("externalproject_add\\(libdivsufsort") "message(")
11957 (("externalproject_add\\(libstadenio") "message(")
11958 (("externalproject_add_step\\(") "message("))
11959 (substitute* "src/CMakeLists.txt"
11960 (("add_dependencies") "#")
11961 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libstaden-read.a")
11962 (string-append (assoc-ref inputs "libstadenio-for-salmon")
11963 "/lib/libstaden-read.so"))
11964 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort.a")
11965 (string-append (assoc-ref inputs "libdivsufsort")
11966 "/lib/libdivsufsort.so"))
11967 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort64.a")
11968 (string-append (assoc-ref inputs "libdivsufsort")
11969 "/lib/libdivsufsort64.so"))
11970 (("lib/libdivsufsort.a") "/lib/libdivsufsort.so"))
11971
11972 ;; Ensure that all headers can be found
11973 (setenv "CPATH"
11974 (string-append (getenv "CPATH")
11975 ":"
11976 (assoc-ref inputs "eigen")
11977 "/include/eigen3"))
11978 #t))
11979 ;; CMAKE_INSTALL_PREFIX does not exist when the tests are
11980 ;; run. It only exists after the install phase.
11981 (add-after 'unpack 'fix-tests
11982 (lambda _
11983 (substitute* "src/CMakeLists.txt"
11984 (("DTOPLEVEL_DIR=\\$\\{CMAKE_INSTALL_PREFIX")
11985 "DTOPLEVEL_DIR=${GAT_SOURCE_DIR"))
11986 #t)))))
11987 (inputs
11988 `(("boost" ,boost)
11989 ("bzip2" ,bzip2)
11990 ("cereal" ,cereal)
11991 ("eigen" ,eigen)
11992 ("rapmap" ,(origin
11993 (method git-fetch)
11994 (uri (git-reference
11995 (url "https://github.com/COMBINE-lab/RapMap.git")
11996 (commit (string-append "salmon-v" version))))
11997 (file-name (string-append "rapmap-salmon-v" version "-checkout"))
11998 (sha256
11999 (base32
12000 "1biplxf0csc7a8h1wf219b0vmjkvw6wk2zylhdklb577kgmihdms"))))
12001 ("jemalloc" ,jemalloc)
12002 ("libgff" ,libgff)
12003 ("tbb" ,tbb)
12004 ("libdivsufsort" ,libdivsufsort)
12005 ("libstadenio-for-salmon" ,libstadenio-for-salmon)
12006 ("xz" ,xz)
12007 ("zlib" ,zlib)))
12008 (native-inputs
12009 `(("pkg-config" ,pkg-config)))
12010 (home-page "https://github.com/COMBINE-lab/salmon")
12011 (synopsis "Quantification from RNA-seq reads using lightweight alignments")
12012 (description "Salmon is a program to produce highly-accurate,
12013 transcript-level quantification estimates from RNA-seq data. Salmon achieves
12014 its accuracy and speed via a number of different innovations, including the
12015 use of lightweight alignments (accurate but fast-to-compute proxies for
12016 traditional read alignments) and massively-parallel stochastic collapsed
12017 variational inference.")
12018 (license license:gpl3+)))
12019
12020 (define-public python-loompy
12021 (package
12022 (name "python-loompy")
12023 (version "2.0.17")
12024 ;; The tarball on Pypi does not include the tests.
12025 (source (origin
12026 (method git-fetch)
12027 (uri (git-reference
12028 (url "https://github.com/linnarsson-lab/loompy.git")
12029 (commit version)))
12030 (file-name (git-file-name name version))
12031 (sha256
12032 (base32
12033 "12a5kjgiikapv93wahfw0frszx1lblnppyz3vs5gy8fgmgngra07"))))
12034 (build-system python-build-system)
12035 (arguments
12036 `(#:phases
12037 (modify-phases %standard-phases
12038 (replace 'check
12039 (lambda _
12040 (setenv "PYTHONPATH"
12041 (string-append (getcwd) ":"
12042 (getenv "PYTHONPATH")))
12043 (invoke "pytest" "tests")
12044 #t)))))
12045 (propagated-inputs
12046 `(("python-h5py" ,python-h5py)
12047 ("python-numpy" ,python-numpy)
12048 ("python-pandas" ,python-pandas)
12049 ("python-scipy" ,python-scipy)))
12050 (native-inputs
12051 `(("python-pytest" ,python-pytest)))
12052 (home-page "https://github.com/linnarsson-lab/loompy")
12053 (synopsis "Work with .loom files for single-cell RNA-seq data")
12054 (description "The loom file format is an efficient format for very large
12055 omics datasets, consisting of a main matrix, optional additional layers, a
12056 variable number of row and column annotations. Loom also supports sparse
12057 graphs. This library makes it easy to work with @file{.loom} files for
12058 single-cell RNA-seq data.")
12059 (license license:bsd-3)))
12060
12061 ;; We cannot use the latest commit because it requires Java 9.
12062 (define-public java-forester
12063 (let ((commit "86b07efe302d5094b42deed9260f719a4c4ac2e6")
12064 (revision "1"))
12065 (package
12066 (name "java-forester")
12067 (version (string-append "0-" revision "." (string-take commit 7)))
12068 (source (origin
12069 (method git-fetch)
12070 (uri (git-reference
12071 (url "https://github.com/cmzmasek/forester.git")
12072 (commit commit)))
12073 (file-name (string-append name "-" version "-checkout"))
12074 (sha256
12075 (base32
12076 "0vxavc1yrf84yrnf20dq26hi0lglidk8d382xrxsy4qmlbjd276z"))
12077 (modules '((guix build utils)))
12078 (snippet
12079 '(begin
12080 ;; Delete bundled jars and pre-built classes
12081 (delete-file-recursively "forester/java/resources")
12082 (delete-file-recursively "forester/java/classes")
12083 (for-each delete-file (find-files "forester/java/" "\\.jar$"))
12084 ;; Delete bundled applications
12085 (delete-file-recursively "forester_applications")
12086 #t))))
12087 (build-system ant-build-system)
12088 (arguments
12089 `(#:tests? #f ; there are none
12090 #:jdk ,icedtea-8
12091 #:modules ((guix build ant-build-system)
12092 (guix build utils)
12093 (guix build java-utils)
12094 (sxml simple)
12095 (sxml transform))
12096 #:phases
12097 (modify-phases %standard-phases
12098 (add-after 'unpack 'chdir
12099 (lambda _ (chdir "forester/java") #t))
12100 (add-after 'chdir 'fix-dependencies
12101 (lambda _
12102 (chmod "build.xml" #o664)
12103 (call-with-output-file "build.xml.new"
12104 (lambda (port)
12105 (sxml->xml
12106 (pre-post-order
12107 (with-input-from-file "build.xml"
12108 (lambda _ (xml->sxml #:trim-whitespace? #t)))
12109 `(;; Remove all unjar tags to avoid repacking classes.
12110 (unjar . ,(lambda _ '()))
12111 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
12112 (*text* . ,(lambda (_ txt) txt))))
12113 port)))
12114 (rename-file "build.xml.new" "build.xml")
12115 #t))
12116 ;; FIXME: itext is difficult to package as it depends on a few
12117 ;; unpackaged libraries.
12118 (add-after 'chdir 'remove-dependency-on-unpackaged-itext
12119 (lambda _
12120 (delete-file "src/org/forester/archaeopteryx/PdfExporter.java")
12121 (substitute* "src/org/forester/archaeopteryx/MainFrame.java"
12122 (("pdf_written_to = PdfExporter.*")
12123 "throw new IOException(\"PDF export is not available.\");"))
12124 #t))
12125 ;; There is no install target
12126 (replace 'install (install-jars ".")))))
12127 (propagated-inputs
12128 `(("java-commons-codec" ,java-commons-codec)
12129 ("java-openchart2" ,java-openchart2)))
12130 (home-page "https://sites.google.com/site/cmzmasek/home/software/forester")
12131 (synopsis "Phylogenomics libraries for Java")
12132 (description "Forester is a collection of Java libraries for
12133 phylogenomics and evolutionary biology research. It includes support for
12134 reading, writing, and exporting phylogenetic trees.")
12135 (license license:lgpl2.1+))))
12136
12137 (define-public java-forester-1.005
12138 (package
12139 (name "java-forester")
12140 (version "1.005")
12141 (source (origin
12142 (method url-fetch)
12143 (uri (string-append "http://search.maven.org/remotecontent?"
12144 "filepath=org/biojava/thirdparty/forester/"
12145 version "/forester-" version "-sources.jar"))
12146 (file-name (string-append name "-" version ".jar"))
12147 (sha256
12148 (base32
12149 "04r8qv4rk3p71z4ajrvp11py1z46qrx0047j3zzs79s6lnsm3lcv"))))
12150 (build-system ant-build-system)
12151 (arguments
12152 `(#:tests? #f ; there are none
12153 #:jdk ,icedtea-8
12154 #:modules ((guix build ant-build-system)
12155 (guix build utils)
12156 (guix build java-utils)
12157 (sxml simple)
12158 (sxml transform))
12159 #:phases
12160 (modify-phases %standard-phases
12161 (add-after 'unpack 'fix-dependencies
12162 (lambda* (#:key inputs #:allow-other-keys)
12163 (call-with-output-file "build.xml"
12164 (lambda (port)
12165 (sxml->xml
12166 (pre-post-order
12167 (with-input-from-file "src/build.xml"
12168 (lambda _ (xml->sxml #:trim-whitespace? #t)))
12169 `(;; Remove all unjar tags to avoid repacking classes.
12170 (unjar . ,(lambda _ '()))
12171 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
12172 (*text* . ,(lambda (_ txt) txt))))
12173 port)))
12174 (copy-file (assoc-ref inputs "synth_look_and_feel_1.xml")
12175 "synth_look_and_feel_1.xml")
12176 (copy-file (assoc-ref inputs "phyloxml.xsd")
12177 "phyloxml.xsd")
12178 (substitute* "build.xml"
12179 (("../resources/synth_laf/synth_look_and_feel_1.xml")
12180 "synth_look_and_feel_1.xml")
12181 (("../resources/phyloxml_schema/1.10/phyloxml.xsd")
12182 "phyloxml.xsd"))
12183 #t))
12184 ;; FIXME: itext is difficult to package as it depends on a few
12185 ;; unpackaged libraries.
12186 (add-after 'unpack 'remove-dependency-on-unpackaged-itext
12187 (lambda _
12188 (delete-file "src/org/forester/archaeopteryx/PdfExporter.java")
12189 (substitute* '("src/org/forester/archaeopteryx/MainFrame.java"
12190 "src/org/forester/archaeopteryx/MainFrameApplication.java")
12191 (("pdf_written_to = PdfExporter.*")
12192 "throw new IOException(\"PDF export is not available.\"); /*")
12193 ((".getPrintSizeX\\(\\), getOptions\\(\\).getPrintSizeY\\(\\) \\);") "*/")
12194 (("getCurrentTreePanel\\(\\).getHeight\\(\\) \\);") "*/"))
12195 #t))
12196 (add-after 'unpack 'delete-pre-built-classes
12197 (lambda _ (delete-file-recursively "src/classes") #t))
12198 ;; There is no install target
12199 (replace 'install (install-jars ".")))))
12200 (propagated-inputs
12201 `(("java-commons-codec" ,java-commons-codec)
12202 ("java-openchart2" ,java-openchart2)))
12203 ;; The source archive does not contain the resources.
12204 (native-inputs
12205 `(("phyloxml.xsd"
12206 ,(origin
12207 (method url-fetch)
12208 (uri (string-append "https://raw.githubusercontent.com/cmzmasek/forester/"
12209 "b61cc2dcede0bede317db362472333115756b8c6/"
12210 "forester/resources/phyloxml_schema/1.10/phyloxml.xsd"))
12211 (file-name (string-append name "-phyloxml-" version ".xsd"))
12212 (sha256
12213 (base32
12214 "1zxc4m8sn4n389nqdnpxa8d0k17qnr3pm2y5y6g6vh4k0zm52npv"))))
12215 ("synth_look_and_feel_1.xml"
12216 ,(origin
12217 (method url-fetch)
12218 (uri (string-append "https://raw.githubusercontent.com/cmzmasek/forester/"
12219 "29e04321615da6b35c1e15c60e52caf3f21d8e6a/"
12220 "forester/java/classes/resources/synth_look_and_feel_1.xml"))
12221 (file-name (string-append name "-synth-look-and-feel-" version ".xml"))
12222 (sha256
12223 (base32
12224 "1gv5602gv4k7y7713y75a4jvj7i9s7nildsbdl7n9q10sc2ikg8h"))))))
12225 (home-page "https://sites.google.com/site/cmzmasek/home/software/forester")
12226 (synopsis "Phylogenomics libraries for Java")
12227 (description "Forester is a collection of Java libraries for
12228 phylogenomics and evolutionary biology research. It includes support for
12229 reading, writing, and exporting phylogenetic trees.")
12230 (license license:lgpl2.1+)))
12231
12232 (define-public java-biojava-core
12233 (package
12234 (name "java-biojava-core")
12235 (version "4.2.11")
12236 (source (origin
12237 (method git-fetch)
12238 (uri (git-reference
12239 (url "https://github.com/biojava/biojava")
12240 (commit (string-append "biojava-" version))))
12241 (file-name (string-append name "-" version "-checkout"))
12242 (sha256
12243 (base32
12244 "1bvryh2bpsvash8ln79cmc9sqm8qw72hz4xzwqxcrjm8ssxszhqk"))))
12245 (build-system ant-build-system)
12246 (arguments
12247 `(#:jdk ,icedtea-8
12248 #:jar-name "biojava-core.jar"
12249 #:source-dir "biojava-core/src/main/java/"
12250 #:test-dir "biojava-core/src/test"
12251 ;; These tests seem to require internet access.
12252 #:test-exclude (list "**/SearchIOTest.java"
12253 "**/BlastXMLParserTest.java"
12254 "**/GenbankCookbookTest.java"
12255 "**/GenbankProxySequenceReaderTest.java")
12256 #:phases
12257 (modify-phases %standard-phases
12258 (add-before 'build 'copy-resources
12259 (lambda _
12260 (copy-recursively "biojava-core/src/main/resources"
12261 "build/classes")
12262 #t))
12263 (add-before 'check 'copy-test-resources
12264 (lambda _
12265 (copy-recursively "biojava-core/src/test/resources"
12266 "build/test-classes")
12267 #t)))))
12268 (propagated-inputs
12269 `(("java-log4j-api" ,java-log4j-api)
12270 ("java-log4j-core" ,java-log4j-core)
12271 ("java-slf4j-api" ,java-slf4j-api)
12272 ("java-slf4j-simple" ,java-slf4j-simple)))
12273 (native-inputs
12274 `(("java-junit" ,java-junit)
12275 ("java-hamcrest-core" ,java-hamcrest-core)))
12276 (home-page "http://biojava.org")
12277 (synopsis "Core libraries of Java framework for processing biological data")
12278 (description "BioJava is a project dedicated to providing a Java framework
12279 for processing biological data. It provides analytical and statistical
12280 routines, parsers for common file formats, reference implementations of
12281 popular algorithms, and allows the manipulation of sequences and 3D
12282 structures. The goal of the biojava project is to facilitate rapid
12283 application development for bioinformatics.
12284
12285 This package provides the core libraries.")
12286 (license license:lgpl2.1+)))
12287
12288 (define-public java-biojava-phylo
12289 (package (inherit java-biojava-core)
12290 (name "java-biojava-phylo")
12291 (build-system ant-build-system)
12292 (arguments
12293 `(#:jdk ,icedtea-8
12294 #:jar-name "biojava-phylo.jar"
12295 #:source-dir "biojava-phylo/src/main/java/"
12296 #:test-dir "biojava-phylo/src/test"
12297 #:phases
12298 (modify-phases %standard-phases
12299 (add-before 'build 'copy-resources
12300 (lambda _
12301 (copy-recursively "biojava-phylo/src/main/resources"
12302 "build/classes")
12303 #t))
12304 (add-before 'check 'copy-test-resources
12305 (lambda _
12306 (copy-recursively "biojava-phylo/src/test/resources"
12307 "build/test-classes")
12308 #t)))))
12309 (propagated-inputs
12310 `(("java-log4j-api" ,java-log4j-api)
12311 ("java-log4j-core" ,java-log4j-core)
12312 ("java-slf4j-api" ,java-slf4j-api)
12313 ("java-slf4j-simple" ,java-slf4j-simple)
12314 ("java-biojava-core" ,java-biojava-core)
12315 ("java-forester" ,java-forester)))
12316 (native-inputs
12317 `(("java-junit" ,java-junit)
12318 ("java-hamcrest-core" ,java-hamcrest-core)))
12319 (home-page "http://biojava.org")
12320 (synopsis "Biojava interface to the forester phylogenomics library")
12321 (description "The phylo module provides a biojava interface layer to the
12322 forester phylogenomics library for constructing phylogenetic trees.")))
12323
12324 (define-public java-biojava-alignment
12325 (package (inherit java-biojava-core)
12326 (name "java-biojava-alignment")
12327 (build-system ant-build-system)
12328 (arguments
12329 `(#:jdk ,icedtea-8
12330 #:jar-name "biojava-alignment.jar"
12331 #:source-dir "biojava-alignment/src/main/java/"
12332 #:test-dir "biojava-alignment/src/test"
12333 #:phases
12334 (modify-phases %standard-phases
12335 (add-before 'build 'copy-resources
12336 (lambda _
12337 (copy-recursively "biojava-alignment/src/main/resources"
12338 "build/classes")
12339 #t))
12340 (add-before 'check 'copy-test-resources
12341 (lambda _
12342 (copy-recursively "biojava-alignment/src/test/resources"
12343 "build/test-classes")
12344 #t)))))
12345 (propagated-inputs
12346 `(("java-log4j-api" ,java-log4j-api)
12347 ("java-log4j-core" ,java-log4j-core)
12348 ("java-slf4j-api" ,java-slf4j-api)
12349 ("java-slf4j-simple" ,java-slf4j-simple)
12350 ("java-biojava-core" ,java-biojava-core)
12351 ("java-biojava-phylo" ,java-biojava-phylo)
12352 ("java-forester" ,java-forester)))
12353 (native-inputs
12354 `(("java-junit" ,java-junit)
12355 ("java-hamcrest-core" ,java-hamcrest-core)))
12356 (home-page "http://biojava.org")
12357 (synopsis "Biojava API for genetic sequence alignment")
12358 (description "The alignment module of BioJava provides an API that
12359 contains
12360
12361 @itemize
12362 @item implementations of dynamic programming algorithms for sequence
12363 alignment;
12364 @item reading and writing of popular alignment file formats;
12365 @item a single-, or multi- threaded multiple sequence alignment algorithm.
12366 @end itemize\n")))
12367
12368 (define-public java-biojava-core-4.0
12369 (package (inherit java-biojava-core)
12370 (name "java-biojava-core")
12371 (version "4.0.0")
12372 (source (origin
12373 (method git-fetch)
12374 (uri (git-reference
12375 (url "https://github.com/biojava/biojava")
12376 (commit (string-append "biojava-" version))))
12377 (file-name (string-append name "-" version "-checkout"))
12378 (sha256
12379 (base32
12380 "13675f6y9aqi7bi2lk3s1z7a22ynccjiqwa8izh7p97xi9wsfmd8"))))))
12381
12382 (define-public java-biojava-phylo-4.0
12383 (package (inherit java-biojava-core-4.0)
12384 (name "java-biojava-phylo")
12385 (build-system ant-build-system)
12386 (arguments
12387 `(#:jdk ,icedtea-8
12388 #:jar-name "biojava-phylo.jar"
12389 #:source-dir "biojava-phylo/src/main/java/"
12390 #:test-dir "biojava-phylo/src/test"
12391 #:phases
12392 (modify-phases %standard-phases
12393 (add-before 'build 'copy-resources
12394 (lambda _
12395 (copy-recursively "biojava-phylo/src/main/resources"
12396 "build/classes")
12397 #t))
12398 (add-before 'check 'copy-test-resources
12399 (lambda _
12400 (copy-recursively "biojava-phylo/src/test/resources"
12401 "build/test-classes")
12402 #t)))))
12403 (propagated-inputs
12404 `(("java-log4j-api" ,java-log4j-api)
12405 ("java-log4j-core" ,java-log4j-core)
12406 ("java-slf4j-api" ,java-slf4j-api)
12407 ("java-slf4j-simple" ,java-slf4j-simple)
12408 ("java-biojava-core" ,java-biojava-core-4.0)
12409 ("java-forester" ,java-forester-1.005)))
12410 (native-inputs
12411 `(("java-junit" ,java-junit)
12412 ("java-hamcrest-core" ,java-hamcrest-core)))
12413 (home-page "http://biojava.org")
12414 (synopsis "Biojava interface to the forester phylogenomics library")
12415 (description "The phylo module provides a biojava interface layer to the
12416 forester phylogenomics library for constructing phylogenetic trees.")))
12417
12418 (define-public java-biojava-alignment-4.0
12419 (package (inherit java-biojava-core-4.0)
12420 (name "java-biojava-alignment")
12421 (build-system ant-build-system)
12422 (arguments
12423 `(#:jdk ,icedtea-8
12424 #:jar-name "biojava-alignment.jar"
12425 #:source-dir "biojava-alignment/src/main/java/"
12426 #:test-dir "biojava-alignment/src/test"
12427 #:phases
12428 (modify-phases %standard-phases
12429 (add-before 'build 'copy-resources
12430 (lambda _
12431 (copy-recursively "biojava-alignment/src/main/resources"
12432 "build/classes")
12433 #t))
12434 (add-before 'check 'copy-test-resources
12435 (lambda _
12436 (copy-recursively "biojava-alignment/src/test/resources"
12437 "build/test-classes")
12438 #t)))))
12439 (propagated-inputs
12440 `(("java-log4j-api" ,java-log4j-api)
12441 ("java-log4j-core" ,java-log4j-core)
12442 ("java-slf4j-api" ,java-slf4j-api)
12443 ("java-slf4j-simple" ,java-slf4j-simple)
12444 ("java-biojava-core" ,java-biojava-core-4.0)
12445 ("java-biojava-phylo" ,java-biojava-phylo-4.0)
12446 ("java-forester" ,java-forester-1.005)))
12447 (native-inputs
12448 `(("java-junit" ,java-junit)
12449 ("java-hamcrest-core" ,java-hamcrest-core)))
12450 (home-page "http://biojava.org")
12451 (synopsis "Biojava API for genetic sequence alignment")
12452 (description "The alignment module of BioJava provides an API that
12453 contains
12454
12455 @itemize
12456 @item implementations of dynamic programming algorithms for sequence
12457 alignment;
12458 @item reading and writing of popular alignment file formats;
12459 @item a single-, or multi- threaded multiple sequence alignment algorithm.
12460 @end itemize\n")))
12461
12462 (define-public dropseq-tools
12463 (package
12464 (name "dropseq-tools")
12465 (version "1.13")
12466 (source
12467 (origin
12468 (method url-fetch)
12469 (uri "http://mccarrolllab.com/download/1276/")
12470 (file-name (string-append "dropseq-tools-" version ".zip"))
12471 (sha256
12472 (base32
12473 "0yrffckxqk5l8b5xb6z4laq157zd9mdypr2p4b4vq2bhjzi1sj0s"))
12474 ;; Delete bundled libraries
12475 (modules '((guix build utils)))
12476 (snippet
12477 '(begin
12478 (for-each delete-file (find-files "jar/lib" "\\.jar$"))
12479 (delete-file-recursively "3rdParty")
12480 #t))))
12481 (build-system ant-build-system)
12482 (arguments
12483 `(#:tests? #f ; test data are not included
12484 #:test-target "test"
12485 #:build-target "all"
12486 #:source-dir "public/src/"
12487 #:jdk ,icedtea-8
12488 #:make-flags
12489 (list (string-append "-Dpicard.executable.dir="
12490 (assoc-ref %build-inputs "java-picard")
12491 "/share/java/"))
12492 #:modules ((ice-9 match)
12493 (srfi srfi-1)
12494 (guix build utils)
12495 (guix build java-utils)
12496 (guix build ant-build-system))
12497 #:phases
12498 (modify-phases %standard-phases
12499 ;; FIXME: fails with "java.io.FileNotFoundException:
12500 ;; /gnu/store/…-dropseq-tools-1.13/share/java/lib/biojava-alignment.jar"
12501 (delete 'generate-jar-indices)
12502 ;; All dependencies must be linked to "lib", because that's where
12503 ;; they will be searched for when the Class-Path property of the
12504 ;; manifest is computed.
12505 (add-after 'unpack 'record-references
12506 (lambda* (#:key inputs #:allow-other-keys)
12507 (mkdir-p "jar/lib")
12508 (let ((dirs (filter-map (match-lambda
12509 ((name . dir)
12510 (if (and (string-prefix? "java-" name)
12511 (not (string=? name "java-testng")))
12512 dir #f)))
12513 inputs)))
12514 (for-each (lambda (jar)
12515 (symlink jar (string-append "jar/lib/" (basename jar))))
12516 (append-map (lambda (dir) (find-files dir "\\.jar$"))
12517 dirs)))
12518 #t))
12519 ;; There is no installation target
12520 (replace 'install
12521 (lambda* (#:key inputs outputs #:allow-other-keys)
12522 (let* ((out (assoc-ref outputs "out"))
12523 (bin (string-append out "/bin"))
12524 (share (string-append out "/share/java/"))
12525 (lib (string-append share "/lib/"))
12526 (scripts (list "BAMTagHistogram"
12527 "BAMTagofTagCounts"
12528 "BaseDistributionAtReadPosition"
12529 "CollapseBarcodesInPlace"
12530 "CollapseTagWithContext"
12531 "ConvertToRefFlat"
12532 "CreateIntervalsFiles"
12533 "DetectBeadSynthesisErrors"
12534 "DigitalExpression"
12535 "Drop-seq_alignment.sh"
12536 "FilterBAM"
12537 "FilterBAMByTag"
12538 "GatherGeneGCLength"
12539 "GatherMolecularBarcodeDistributionByGene"
12540 "GatherReadQualityMetrics"
12541 "PolyATrimmer"
12542 "ReduceGTF"
12543 "SelectCellsByNumTranscripts"
12544 "SingleCellRnaSeqMetricsCollector"
12545 "TagBamWithReadSequenceExtended"
12546 "TagReadWithGeneExon"
12547 "TagReadWithInterval"
12548 "TrimStartingSequence"
12549 "ValidateReference")))
12550 (for-each mkdir-p (list bin share lib))
12551 (install-file "dist/dropseq.jar" share)
12552 (for-each (lambda (script)
12553 (chmod script #o555)
12554 (install-file script bin))
12555 scripts)
12556 (substitute* (map (lambda (script)
12557 (string-append bin "/" script))
12558 scripts)
12559 (("^java") (which "java"))
12560 (("jar_deploy_dir=.*")
12561 (string-append "jar_deploy_dir=" share "\n"))))
12562 #t))
12563 ;; FIXME: We do this after stripping jars because we don't want it to
12564 ;; copy all these jars and strip them. We only want to install
12565 ;; links. Arguably, this is a problem with the ant-build-system.
12566 (add-after 'strip-jar-timestamps 'install-links
12567 (lambda* (#:key outputs #:allow-other-keys)
12568 (let* ((out (assoc-ref outputs "out"))
12569 (share (string-append out "/share/java/"))
12570 (lib (string-append share "/lib/")))
12571 (for-each (lambda (jar)
12572 (symlink (readlink jar)
12573 (string-append lib (basename jar))))
12574 (find-files "jar/lib" "\\.jar$")))
12575 #t)))))
12576 (inputs
12577 `(("jdk" ,icedtea-8)
12578 ("java-picard" ,java-picard-2.10.3)
12579 ("java-log4j-1.2-api" ,java-log4j-1.2-api)
12580 ("java-commons-math3" ,java-commons-math3)
12581 ("java-commons-jexl2" ,java-commons-jexl-2)
12582 ("java-commons-collections4" ,java-commons-collections4)
12583 ("java-commons-lang2" ,java-commons-lang)
12584 ("java-commons-io" ,java-commons-io)
12585 ("java-snappy-1.0.3-rc3" ,java-snappy-1)
12586 ("java-guava" ,java-guava)
12587 ("java-la4j" ,java-la4j)
12588 ("java-biojava-core" ,java-biojava-core-4.0)
12589 ("java-biojava-alignment" ,java-biojava-alignment-4.0)
12590 ("java-jdistlib" ,java-jdistlib)
12591 ("java-simple-xml" ,java-simple-xml)
12592 ("java-snakeyaml" ,java-snakeyaml)))
12593 (native-inputs
12594 `(("unzip" ,unzip)
12595 ("java-testng" ,java-testng)))
12596 (home-page "http://mccarrolllab.com/dropseq/")
12597 (synopsis "Tools for Drop-seq analyses")
12598 (description "Drop-seq is a technology to enable biologists to
12599 analyze RNA expression genome-wide in thousands of individual cells at
12600 once. This package provides tools to perform Drop-seq analyses.")
12601 (license license:expat)))
12602
12603 (define-public pigx-rnaseq
12604 (package
12605 (name "pigx-rnaseq")
12606 (version "0.0.10")
12607 (source (origin
12608 (method url-fetch)
12609 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_rnaseq/"
12610 "releases/download/v" version
12611 "/pigx_rnaseq-" version ".tar.gz"))
12612 (sha256
12613 (base32
12614 "0z3hr120wk2vrlmlpz1vp3n9wy3rq4y2mnzh2vf08qgqn2xfdwcw"))))
12615 (build-system gnu-build-system)
12616 (arguments
12617 `(#:parallel-tests? #f ; not supported
12618 #:phases
12619 (modify-phases %standard-phases
12620 ;; "test.sh" runs STAR, which requires excessive amounts of memory.
12621 (add-after 'unpack 'disable-resource-intensive-test
12622 (lambda _
12623 (substitute* "Makefile.in"
12624 (("(^ tests/test_trim_galore/test.sh).*" _ m) m)
12625 (("^ tests/test_multiqc/test.sh") "")
12626 (("^ test.sh") ""))
12627 #t)))))
12628 (inputs
12629 `(("coreutils" ,coreutils)
12630 ("sed" ,sed)
12631 ("gzip" ,gzip)
12632 ("snakemake" ,snakemake)
12633 ("fastqc" ,fastqc)
12634 ("multiqc" ,multiqc)
12635 ("star" ,star)
12636 ("trim-galore" ,trim-galore)
12637 ("htseq" ,htseq)
12638 ("samtools" ,samtools)
12639 ("r-minimal" ,r-minimal)
12640 ("r-rmarkdown" ,r-rmarkdown)
12641 ("r-ggplot2" ,r-ggplot2)
12642 ("r-ggrepel" ,r-ggrepel)
12643 ("r-gprofiler" ,r-gprofiler)
12644 ("r-deseq2" ,r-deseq2)
12645 ("r-dt" ,r-dt)
12646 ("r-knitr" ,r-knitr)
12647 ("r-pheatmap" ,r-pheatmap)
12648 ("r-corrplot" ,r-corrplot)
12649 ("r-reshape2" ,r-reshape2)
12650 ("r-plotly" ,r-plotly)
12651 ("r-scales" ,r-scales)
12652 ("r-summarizedexperiment" ,r-summarizedexperiment)
12653 ("r-crosstalk" ,r-crosstalk)
12654 ("r-tximport" ,r-tximport)
12655 ("r-rtracklayer" ,r-rtracklayer)
12656 ("r-rjson" ,r-rjson)
12657 ("salmon" ,salmon)
12658 ("ghc-pandoc" ,ghc-pandoc)
12659 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)
12660 ("python-wrapper" ,python-wrapper)
12661 ("python-pyyaml" ,python-pyyaml)))
12662 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
12663 (synopsis "Analysis pipeline for RNA sequencing experiments")
12664 (description "PiGX RNAseq is an analysis pipeline for preprocessing and
12665 reporting for RNA sequencing experiments. It is easy to use and produces high
12666 quality reports. The inputs are reads files from the sequencing experiment,
12667 and a configuration file which describes the experiment. In addition to
12668 quality control of the experiment, the pipeline produces a differential
12669 expression report comparing samples in an easily configurable manner.")
12670 (license license:gpl3+)))
12671
12672 (define-public pigx-chipseq
12673 (package
12674 (name "pigx-chipseq")
12675 (version "0.0.41")
12676 (source (origin
12677 (method url-fetch)
12678 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_chipseq/"
12679 "releases/download/v" version
12680 "/pigx_chipseq-" version ".tar.gz"))
12681 (sha256
12682 (base32
12683 "0akbxdmsjsq5fzbwaap04hqjpsfgv1l6yrc2pwgbya1xgqvcq6vy"))))
12684 (build-system gnu-build-system)
12685 ;; parts of the tests rely on access to the network
12686 (arguments '(#:tests? #f))
12687 (inputs
12688 `(("grep" ,grep)
12689 ("coreutils" ,coreutils)
12690 ("r-minimal" ,r-minimal)
12691 ("r-argparser" ,r-argparser)
12692 ("r-biocparallel" ,r-biocparallel)
12693 ("r-biostrings" ,r-biostrings)
12694 ("r-chipseq" ,r-chipseq)
12695 ("r-data-table" ,r-data-table)
12696 ("r-dplyr" ,r-dplyr)
12697 ("r-genomation" ,r-genomation)
12698 ("r-genomicalignments" ,r-genomicalignments)
12699 ("r-genomicranges" ,r-genomicranges)
12700 ("r-rsamtools" ,r-rsamtools)
12701 ("r-rtracklayer" ,r-rtracklayer)
12702 ("r-s4vectors" ,r-s4vectors)
12703 ("r-stringr" ,r-stringr)
12704 ("r-tibble" ,r-tibble)
12705 ("r-tidyr" ,r-tidyr)
12706 ("r-jsonlite" ,r-jsonlite)
12707 ("r-heatmaply" ,r-heatmaply)
12708 ("r-htmlwidgets" ,r-htmlwidgets)
12709 ("r-ggplot2" ,r-ggplot2)
12710 ("r-plotly" ,r-plotly)
12711 ("r-rmarkdown" ,r-rmarkdown)
12712 ("python-wrapper" ,python-wrapper)
12713 ("python-pyyaml" ,python-pyyaml)
12714 ("python-magic" ,python-magic)
12715 ("python-xlrd" ,python-xlrd)
12716 ("trim-galore" ,trim-galore)
12717 ("macs" ,macs)
12718 ("multiqc" ,multiqc)
12719 ("perl" ,perl)
12720 ("ghc-pandoc" ,ghc-pandoc)
12721 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)
12722 ("fastqc" ,fastqc)
12723 ("bowtie" ,bowtie)
12724 ("idr" ,idr)
12725 ("snakemake" ,snakemake)
12726 ("samtools" ,samtools)
12727 ("bedtools" ,bedtools)
12728 ("kentutils" ,kentutils)))
12729 (native-inputs
12730 `(("python-pytest" ,python-pytest)))
12731 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
12732 (synopsis "Analysis pipeline for ChIP sequencing experiments")
12733 (description "PiGX ChIPseq is an analysis pipeline for preprocessing, peak
12734 calling and reporting for ChIP sequencing experiments. It is easy to use and
12735 produces high quality reports. The inputs are reads files from the sequencing
12736 experiment, and a configuration file which describes the experiment. In
12737 addition to quality control of the experiment, the pipeline enables to set up
12738 multiple peak calling analysis and allows the generation of a UCSC track hub
12739 in an easily configurable manner.")
12740 (license license:gpl3+)))
12741
12742 (define-public pigx-bsseq
12743 (package
12744 (name "pigx-bsseq")
12745 (version "0.0.10")
12746 (source (origin
12747 (method url-fetch)
12748 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_bsseq/"
12749 "releases/download/v" version
12750 "/pigx_bsseq-" version ".tar.gz"))
12751 (sha256
12752 (base32
12753 "0l97wvkq4diq8lcarraj33bby1zzf0w804jwi8mlc5qddp8idwhy"))))
12754 (build-system gnu-build-system)
12755 (arguments
12756 `(#:phases
12757 (modify-phases %standard-phases
12758 (add-before 'check 'set-timezone
12759 ;; The readr package is picky about timezones.
12760 (lambda* (#:key inputs #:allow-other-keys)
12761 (setenv "TZ" "UTC+1")
12762 (setenv "TZDIR"
12763 (string-append (assoc-ref inputs "tzdata")
12764 "/share/zoneinfo"))
12765 #t)))))
12766 (native-inputs
12767 `(("tzdata" ,tzdata)))
12768 (inputs
12769 `(("coreutils" ,coreutils)
12770 ("sed" ,sed)
12771 ("grep" ,grep)
12772 ("r-minimal" ,r-minimal)
12773 ("r-annotationhub" ,r-annotationhub)
12774 ("r-dt" ,r-dt)
12775 ("r-genomation" ,r-genomation)
12776 ("r-methylkit" ,r-methylkit)
12777 ("r-rtracklayer" ,r-rtracklayer)
12778 ("r-rmarkdown" ,r-rmarkdown)
12779 ("r-bookdown" ,r-bookdown)
12780 ("r-ggplot2" ,r-ggplot2)
12781 ("r-ggbio" ,r-ggbio)
12782 ("ghc-pandoc" ,ghc-pandoc)
12783 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)
12784 ("python-wrapper" ,python-wrapper)
12785 ("python-pyyaml" ,python-pyyaml)
12786 ("snakemake" ,snakemake)
12787 ("bismark" ,bismark)
12788 ("fastqc" ,fastqc)
12789 ("bowtie" ,bowtie)
12790 ("trim-galore" ,trim-galore)
12791 ("cutadapt" ,cutadapt)
12792 ("samtools" ,samtools)))
12793 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
12794 (synopsis "Bisulfite sequencing pipeline from fastq to methylation reports")
12795 (description "PiGx BSseq is a data processing pipeline for raw fastq read
12796 data of bisulfite experiments; it produces reports on aggregate methylation
12797 and coverage and can be used to produce information on differential
12798 methylation and segmentation.")
12799 (license license:gpl3+)))
12800
12801 (define-public pigx-scrnaseq
12802 (package
12803 (name "pigx-scrnaseq")
12804 (version "1.1.3")
12805 (source (origin
12806 (method url-fetch)
12807 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_scrnaseq/"
12808 "releases/download/v" version
12809 "/pigx_scrnaseq-" version ".tar.gz"))
12810 (sha256
12811 (base32
12812 "0ga2jr4968qzwml6aycky4603q64lny3y7lzw6dmafch5pydl1qi"))))
12813 (build-system gnu-build-system)
12814 (inputs
12815 `(("coreutils" ,coreutils)
12816 ("perl" ,perl)
12817 ("fastqc" ,fastqc)
12818 ("flexbar" ,flexbar)
12819 ("java" ,icedtea-8)
12820 ("jellyfish" ,jellyfish)
12821 ("python-wrapper" ,python-wrapper)
12822 ("python-pyyaml" ,python-pyyaml)
12823 ("python-pandas" ,python-pandas)
12824 ("python-magic" ,python-magic)
12825 ("python-numpy" ,python-numpy)
12826 ("python-loompy" ,python-loompy)
12827 ("ghc-pandoc" ,ghc-pandoc)
12828 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)
12829 ("samtools" ,samtools)
12830 ("snakemake" ,snakemake)
12831 ("star" ,star)
12832 ("r-minimal" ,r-minimal)
12833 ("r-argparser" ,r-argparser)
12834 ("r-cowplot" ,r-cowplot)
12835 ("r-data-table" ,r-data-table)
12836 ("r-delayedarray" ,r-delayedarray)
12837 ("r-delayedmatrixstats" ,r-delayedmatrixstats)
12838 ("r-dplyr" ,r-dplyr)
12839 ("r-dropbead" ,r-dropbead)
12840 ("r-dt" ,r-dt)
12841 ("r-genomicalignments" ,r-genomicalignments)
12842 ("r-genomicfiles" ,r-genomicfiles)
12843 ("r-genomicranges" ,r-genomicranges)
12844 ("r-ggplot2" ,r-ggplot2)
12845 ("r-hdf5array" ,r-hdf5array)
12846 ("r-pheatmap" ,r-pheatmap)
12847 ("r-rmarkdown" ,r-rmarkdown)
12848 ("r-rsamtools" ,r-rsamtools)
12849 ("r-rtracklayer" ,r-rtracklayer)
12850 ("r-rtsne" ,r-rtsne)
12851 ("r-scater" ,r-scater)
12852 ("r-scran" ,r-scran)
12853 ("r-seurat" ,r-seurat)
12854 ("r-singlecellexperiment" ,r-singlecellexperiment)
12855 ("r-stringr" ,r-stringr)
12856 ("r-yaml" ,r-yaml)))
12857 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
12858 (synopsis "Analysis pipeline for single-cell RNA sequencing experiments")
12859 (description "PiGX scRNAseq is an analysis pipeline for preprocessing and
12860 quality control for single cell RNA sequencing experiments. The inputs are
12861 read files from the sequencing experiment, and a configuration file which
12862 describes the experiment. It produces processed files for downstream analysis
12863 and interactive quality reports. The pipeline is designed to work with UMI
12864 based methods.")
12865 (license license:gpl3+)))
12866
12867 (define-public pigx
12868 (package
12869 (name "pigx")
12870 (version "0.0.3")
12871 (source (origin
12872 (method url-fetch)
12873 (uri (string-append "https://github.com/BIMSBbioinfo/pigx/"
12874 "releases/download/v" version
12875 "/pigx-" version ".tar.gz"))
12876 (sha256
12877 (base32
12878 "1i5njdy1clj5ncw45d16p7mwmqvb1ilikl9n797pxklc3f4s7mq7"))))
12879 (build-system gnu-build-system)
12880 (inputs
12881 `(("python" ,python)
12882 ("pigx-bsseq" ,pigx-bsseq)
12883 ("pigx-chipseq" ,pigx-chipseq)
12884 ("pigx-rnaseq" ,pigx-rnaseq)
12885 ("pigx-scrnaseq" ,pigx-scrnaseq)))
12886 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
12887 (synopsis "Analysis pipelines for genomics")
12888 (description "PiGx is a collection of genomics pipelines. It includes the
12889 following pipelines:
12890
12891 @itemize
12892 @item PiGx BSseq for raw fastq read data of bisulfite experiments
12893 @item PiGx RNAseq for RNAseq samples
12894 @item PiGx scRNAseq for single cell dropseq analysis
12895 @item PiGx ChIPseq for reads from ChIPseq experiments
12896 @end itemize
12897
12898 All pipelines are easily configured with a simple sample sheet and a
12899 descriptive settings file. The result is a set of comprehensive, interactive
12900 HTML reports with interesting findings about your samples.")
12901 (license license:gpl3+)))
12902
12903 (define-public genrich
12904 (package
12905 (name "genrich")
12906 (version "0.5")
12907 (source (origin
12908 (method git-fetch)
12909 (uri (git-reference
12910 (url "https://github.com/jsh58/Genrich.git")
12911 (commit (string-append "v" version))))
12912 (file-name (git-file-name name version))
12913 (sha256
12914 (base32
12915 "0x0q6z0208n3cxzqjla4rgjqpyqgwpmz27852lcvzkzaigymq4zp"))))
12916 (build-system gnu-build-system)
12917 (arguments
12918 `(#:tests? #f ; there are none
12919 #:phases
12920 (modify-phases %standard-phases
12921 (delete 'configure)
12922 (replace 'install
12923 (lambda* (#:key outputs #:allow-other-keys)
12924 (install-file "Genrich" (string-append (assoc-ref outputs "out") "/bin"))
12925 #t)))))
12926 (inputs
12927 `(("zlib" ,zlib)))
12928 (home-page "https://github.com/jsh58/Genrich")
12929 (synopsis "Detecting sites of genomic enrichment")
12930 (description "Genrich is a peak-caller for genomic enrichment
12931 assays (e.g. ChIP-seq, ATAC-seq). It analyzes alignment files generated
12932 following the assay and produces a file detailing peaks of significant
12933 enrichment.")
12934 (license license:expat)))
12935
12936 (define-public mantis
12937 (let ((commit "4ffd171632c2cb0056a86d709dfd2bf21bc69b84")
12938 (revision "1"))
12939 (package
12940 (name "mantis")
12941 (version (git-version "0" revision commit))
12942 (source (origin
12943 (method git-fetch)
12944 (uri (git-reference
12945 (url "https://github.com/splatlab/mantis.git")
12946 (commit commit)))
12947 (file-name (git-file-name name version))
12948 (sha256
12949 (base32
12950 "0iqbr0dhmlc8mzpirmm2s4pkzkwdgrcx50yx6cv3wlr2qi064p55"))))
12951 (build-system cmake-build-system)
12952 (arguments '(#:tests? #f)) ; there are none
12953 (inputs
12954 `(("sdsl-lite" ,sdsl-lite)
12955 ("openssl" ,openssl)
12956 ("zlib" ,zlib)))
12957 (home-page "https://github.com/splatlab/mantis")
12958 (synopsis "Large-scale sequence-search index data structure")
12959 (description "Mantis is a space-efficient data structure that can be
12960 used to index thousands of raw-read genomics experiments and facilitate
12961 large-scale sequence searches on those experiments. Mantis uses counting
12962 quotient filters instead of Bloom filters, enabling rapid index builds and
12963 queries, small indexes, and exact results, i.e., no false positives or
12964 negatives. Furthermore, Mantis is also a colored de Bruijn graph
12965 representation, so it supports fast graph traversal and other topological
12966 analyses in addition to large-scale sequence-level searches.")
12967 ;; uses __uint128_t and inline assembly
12968 (supported-systems '("x86_64-linux"))
12969 (license license:bsd-3))))
12970
12971 (define-public r-diversitree
12972 (package
12973 (name "r-diversitree")
12974 (version "0.9-13")
12975 (source
12976 (origin
12977 (method url-fetch)
12978 (uri (cran-uri "diversitree" version))
12979 (sha256
12980 (base32
12981 "00vi4klywi35hd170ksjv3xja3hqqbkcidcnrrlpgv4179k0azix"))))
12982 (build-system r-build-system)
12983 (native-inputs
12984 `(("gfortran" ,gfortran)))
12985 (inputs `(("fftw" ,fftw) ("gsl" ,gsl)))
12986 (propagated-inputs
12987 `(("r-ape" ,r-ape)
12988 ("r-desolve" ,r-desolve)
12989 ("r-rcpp" ,r-rcpp)
12990 ("r-subplex" ,r-subplex)))
12991 (home-page "https://www.zoology.ubc.ca/prog/diversitree")
12992 (synopsis "Comparative 'phylogenetic' analyses of diversification")
12993 (description "This package contains a number of comparative \"phylogenetic\"
12994 methods, mostly focusing on analysing diversification and character evolution.
12995 Contains implementations of \"BiSSE\" (Binary State Speciation and Extinction)
12996 and its unresolved tree extensions, \"MuSSE\" (Multiple State Speciation and
12997 Extinction), \"QuaSSE\", \"GeoSSE\", and \"BiSSE-ness\" Other included methods
12998 include Markov models of discrete and continuous trait evolution and constant
12999 rate speciation and extinction.")
13000 (license license:gpl2+)))
13001
13002 (define-public sjcount
13003 ;; There is no tag for version 3.2, nor is there a release archive.
13004 (let ((commit "292d3917cadb3f6834c81e509c30e61cd7ead6e5")
13005 (revision "1"))
13006 (package
13007 (name "sjcount")
13008 (version (git-version "3.2" revision commit))
13009 (source (origin
13010 (method git-fetch)
13011 (uri (git-reference
13012 (url "https://github.com/pervouchine/sjcount-full.git")
13013 (commit commit)))
13014 (file-name (string-append name "-" version "-checkout"))
13015 (sha256
13016 (base32
13017 "0gdgj35j249f04rqgq8ymcc1xg1vi9kzbajnjqpaq2wpbh8bl234"))))
13018 (build-system gnu-build-system)
13019 (arguments
13020 `(#:tests? #f ; requires a 1.4G test file
13021 #:make-flags
13022 (list (string-append "SAMTOOLS_DIR="
13023 (assoc-ref %build-inputs "samtools")
13024 "/lib/"))
13025 #:phases
13026 (modify-phases %standard-phases
13027 (replace 'configure
13028 (lambda* (#:key inputs #:allow-other-keys)
13029 (substitute* "makefile"
13030 (("-I \\$\\{SAMTOOLS_DIR\\}")
13031 (string-append "-I" (assoc-ref inputs "samtools")
13032 "/include/samtools"))
13033 (("-lz ") "-lz -lpthread "))
13034 #t))
13035 (replace 'install
13036 (lambda* (#:key outputs #:allow-other-keys)
13037 (for-each (lambda (tool)
13038 (install-file tool
13039 (string-append (assoc-ref outputs "out")
13040 "/bin")))
13041 '("j_count" "b_count" "sjcount"))
13042 #t)))))
13043 (inputs
13044 `(("samtools" ,samtools-0.1)
13045 ("zlib" ,zlib)))
13046 (home-page "https://github.com/pervouchine/sjcount-full/")
13047 (synopsis "Annotation-agnostic splice junction counting pipeline")
13048 (description "Sjcount is a utility for fast quantification of splice
13049 junctions in RNA-seq data. It is annotation-agnostic and offset-aware. This
13050 version does count multisplits.")
13051 (license license:gpl3+))))
13052
13053 (define-public minimap2
13054 (package
13055 (name "minimap2")
13056 (version "2.10")
13057 (source
13058 (origin
13059 (method url-fetch)
13060 (uri (string-append "https://github.com/lh3/minimap2/"
13061 "releases/download/v" version "/"
13062 "minimap2-" version ".tar.bz2"))
13063 (sha256
13064 (base32
13065 "080w9066irkbhbyr4nmf19pzkdd2s4v31hpzlajgq2y0drr6zcsj"))))
13066 (build-system gnu-build-system)
13067 (arguments
13068 `(#:tests? #f ; there are none
13069 #:make-flags
13070 (list "CC=gcc"
13071 (let ((system ,(or (%current-target-system)
13072 (%current-system))))
13073 (cond
13074 ((string-prefix? "x86_64" system)
13075 "all")
13076 ((or (string-prefix? "armhf" system)
13077 (string-prefix? "aarch64" system))
13078 "arm_neon=1")
13079 (_ "sse2only=1"))))
13080 #:phases
13081 (modify-phases %standard-phases
13082 (delete 'configure)
13083 (replace 'install
13084 (lambda* (#:key outputs #:allow-other-keys)
13085 (let* ((out (assoc-ref outputs "out"))
13086 (bin (string-append out "/bin"))
13087 (man (string-append out "/share/man/man1")))
13088 (install-file "minimap2" bin)
13089 (mkdir-p man)
13090 (install-file "minimap2.1" man))
13091 #t)))))
13092 (inputs
13093 `(("zlib" ,zlib)))
13094 (home-page "https://lh3.github.io/minimap2/")
13095 (synopsis "Pairwise aligner for genomic and spliced nucleotide sequences")
13096 (description "Minimap2 is a versatile sequence alignment program that
13097 aligns DNA or mRNA sequences against a large reference database. Typical use
13098 cases include:
13099
13100 @enumerate
13101 @item mapping PacBio or Oxford Nanopore genomic reads to the human genome;
13102 @item finding overlaps between long reads with error rate up to ~15%;
13103 @item splice-aware alignment of PacBio Iso-Seq or Nanopore cDNA or Direct RNA
13104 reads against a reference genome;
13105 @item aligning Illumina single- or paired-end reads;
13106 @item assembly-to-assembly alignment;
13107 @item full-genome alignment between two closely related species with
13108 divergence below ~15%.
13109 @end enumerate\n")
13110 (license license:expat)))
13111
13112 (define-public r-circus
13113 (package
13114 (name "r-circus")
13115 (version "0.1.5")
13116 (source
13117 (origin
13118 (method git-fetch)
13119 (uri (git-reference
13120 (url "https://github.com/BIMSBbioinfo/ciRcus.git")
13121 (commit (string-append "v" version))))
13122 (file-name (git-file-name name version))
13123 (sha256
13124 (base32
13125 "0jhjn3ilb057hbf6yzrihj13ifxxs32y7nkby8l3lkm28dg4p97h"))))
13126 (build-system r-build-system)
13127 (propagated-inputs
13128 `(("r-annotationdbi" ,r-annotationdbi)
13129 ("r-annotationhub" ,r-annotationhub)
13130 ("r-biomart" ,r-biomart)
13131 ("r-data-table" ,r-data-table)
13132 ("r-dbi" ,r-dbi)
13133 ("r-genomicfeatures" ,r-genomicfeatures)
13134 ("r-genomicranges" ,r-genomicranges)
13135 ("r-ggplot2" ,r-ggplot2)
13136 ("r-hash" ,r-hash)
13137 ("r-iranges" ,r-iranges)
13138 ("r-rcolorbrewer" ,r-rcolorbrewer)
13139 ("r-rmysql" ,r-rmysql)
13140 ("r-s4vectors" ,r-s4vectors)
13141 ("r-stringr" ,r-stringr)
13142 ("r-summarizedexperiment" ,r-summarizedexperiment)))
13143 (native-inputs
13144 `(("r-knitr" ,r-knitr)))
13145 (home-page "https://github.com/BIMSBbioinfo/ciRcus")
13146 (synopsis "Annotation, analysis and visualization of circRNA data")
13147 (description "Circus is an R package for annotation, analysis and
13148 visualization of circRNA data. Users can annotate their circRNA candidates
13149 with host genes, gene featrues they are spliced from, and discriminate between
13150 known and yet unknown splice junctions. Circular-to-linear ratios of circRNAs
13151 can be calculated, and a number of descriptive plots easily generated.")
13152 (license license:artistic2.0)))
13153
13154 (define-public gffread
13155 ;; We cannot use the tagged release because it is not in sync with gclib.
13156 ;; See https://github.com/gpertea/gffread/issues/26
13157 (let ((commit "ba7535fcb3cea55a6e5a491d916e93b454e87fd0")
13158 (revision "1"))
13159 (package
13160 (name "gffread")
13161 (version (git-version "0.9.12" revision commit))
13162 (source
13163 (origin
13164 (method git-fetch)
13165 (uri (git-reference
13166 (url "https://github.com/gpertea/gffread.git")
13167 (commit commit)))
13168 (file-name (git-file-name name version))
13169 (sha256
13170 (base32
13171 "1dl2nbcg96lxpd0drg48ssa8343nf7pw9s9mkrc4mjjmfwsin3ki"))))
13172 (build-system gnu-build-system)
13173 (arguments
13174 `(#:tests? #f ; no check target
13175 #:make-flags
13176 (list "GCLDIR=gclib")
13177 #:phases
13178 (modify-phases %standard-phases
13179 (delete 'configure)
13180 (add-after 'unpack 'copy-gclib-source
13181 (lambda* (#:key inputs #:allow-other-keys)
13182 (mkdir-p "gclib")
13183 (copy-recursively (assoc-ref inputs "gclib-source") "gclib")
13184 #t))
13185 ;; There is no install target
13186 (replace 'install
13187 (lambda* (#:key outputs #:allow-other-keys)
13188 (let* ((out (assoc-ref outputs "out"))
13189 (bin (string-append out "/bin")))
13190 (install-file "gffread" bin))
13191 #t)))))
13192 (native-inputs
13193 `(("gclib-source"
13194 ,(let ((version "0.10.3")
13195 (commit "54917d0849c1e83cfb057b5f712e5cb6a35d948f")
13196 (revision "1"))
13197 (origin
13198 (method git-fetch)
13199 (uri (git-reference
13200 (url "https://github.com/gpertea/gclib.git")
13201 (commit commit)))
13202 (file-name (git-file-name "gclib" version))
13203 (sha256
13204 (base32
13205 "0b51lc0b8syrv7186fd7n8f15rwnf264qgfmm2palrwks1px24mr")))))))
13206 (home-page "https://github.com/gpertea/gffread/")
13207 (synopsis "Parse and convert GFF/GTF files")
13208 (description
13209 "This package provides a GFF/GTF file parsing utility providing format
13210 conversions, region filtering, FASTA sequence extraction and more.")
13211 ;; gffread is under Expat, but gclib is under Artistic 2.0
13212 (license (list license:expat
13213 license:artistic2.0)))))
13214
13215 (define-public find-circ
13216 ;; The last release was in 2015. The license was clarified in 2017, so we
13217 ;; take the latest commit.
13218 (let ((commit "8655dca54970fcf7e92e22fbf57e1188724dda7d")
13219 (revision "1"))
13220 (package
13221 (name "find-circ")
13222 (version (git-version "1.2" revision commit))
13223 (source
13224 (origin
13225 (method git-fetch)
13226 (uri (git-reference
13227 (url "https://github.com/marvin-jens/find_circ.git")
13228 (commit commit)))
13229 (file-name (git-file-name name version))
13230 (sha256
13231 (base32
13232 "0p77pbqbclqr4srms34y1b9b4njybfpjiknc11ki84f3p8skb3cg"))))
13233 (build-system gnu-build-system)
13234 (arguments
13235 `(#:tests? #f ; there are none
13236 #:phases
13237 ;; There is no actual build system.
13238 (modify-phases %standard-phases
13239 (delete 'configure)
13240 (delete 'build)
13241 (replace 'install
13242 (lambda* (#:key outputs #:allow-other-keys)
13243 (let* ((out (assoc-ref outputs "out"))
13244 (bin (string-append out "/bin"))
13245 (path (getenv "PYTHONPATH")))
13246 (for-each (lambda (script)
13247 (install-file script bin)
13248 (wrap-program (string-append bin "/" script)
13249 `("PYTHONPATH" ":" prefix (,path))))
13250 '("cmp_bed.py"
13251 "find_circ.py"
13252 "maxlength.py"
13253 "merge_bed.py"
13254 "unmapped2anchors.py")))
13255 #t)))))
13256 (inputs
13257 `(("python2" ,python-2)
13258 ("python2-pysam" ,python2-pysam)
13259 ("python2-numpy" ,python2-numpy)))
13260 (home-page "https://github.com/marvin-jens/find_circ")
13261 (synopsis "circRNA detection from RNA-seq reads")
13262 (description "This package provides tools to detect head-to-tail
13263 spliced (back-spliced) sequencing reads, indicative of circular RNA (circRNA)
13264 in RNA-seq data.")
13265 (license license:gpl3))))
13266
13267 (define-public python-scanpy
13268 (package
13269 (name "python-scanpy")
13270 (version "1.4.5.1")
13271 (source
13272 (origin
13273 (method url-fetch)
13274 (uri (pypi-uri "scanpy" version))
13275 (sha256
13276 (base32
13277 "14kh1ji70xxhmri5q8sgcibsidhr6f221wxrcw8a5xvibj5da17j"))))
13278 (build-system python-build-system)
13279 (arguments
13280 `(#:phases
13281 (modify-phases %standard-phases
13282 (replace 'check
13283 (lambda* (#:key inputs #:allow-other-keys)
13284 ;; These tests require Internet access.
13285 (delete-file-recursively "scanpy/tests/notebooks")
13286 (delete-file "scanpy/tests/test_clustering.py")
13287
13288 ;; TODO: I can't get the plotting tests to work, even with Xvfb.
13289 (delete-file "scanpy/tests/test_plotting.py")
13290 (delete-file "scanpy/tests/test_preprocessing.py")
13291 (delete-file "scanpy/tests/test_read_10x.py")
13292
13293 (setenv "PYTHONPATH"
13294 (string-append (getcwd) ":"
13295 (getenv "PYTHONPATH")))
13296 (invoke "pytest")
13297 #t)))))
13298 (propagated-inputs
13299 `(("python-anndata" ,python-anndata)
13300 ("python-h5py" ,python-h5py)
13301 ("python-igraph" ,python-igraph)
13302 ("python-joblib" ,python-joblib)
13303 ("python-louvain" ,python-louvain)
13304 ("python-legacy-api-wrap" ,python-legacy-api-wrap)
13305 ("python-matplotlib" ,python-matplotlib)
13306 ("python-natsort" ,python-natsort)
13307 ("python-networkx" ,python-networkx)
13308 ("python-numba" ,python-numba)
13309 ("python-packaging" ,python-packaging)
13310 ("python-pandas" ,python-pandas)
13311 ("python-patsy" ,python-patsy)
13312 ("python-scikit-learn" ,python-scikit-learn)
13313 ("python-scipy" ,python-scipy)
13314 ("python-seaborn" ,python-seaborn)
13315 ("python-statsmodels" ,python-statsmodels)
13316 ("python-tables" ,python-tables)
13317 ("python-umap-learn" ,python-umap-learn)))
13318 (native-inputs
13319 `(("python-pytest" ,python-pytest)
13320 ("python-setuptools-scm" ,python-setuptools-scm)))
13321 (home-page "https://github.com/theislab/scanpy")
13322 (synopsis "Single-Cell Analysis in Python.")
13323 (description "Scanpy is a scalable toolkit for analyzing single-cell gene
13324 expression data. It includes preprocessing, visualization, clustering,
13325 pseudotime and trajectory inference and differential expression testing. The
13326 Python-based implementation efficiently deals with datasets of more than one
13327 million cells.")
13328 (license license:bsd-3)))
13329
13330 (define-public python-bbknn
13331 (package
13332 (name "python-bbknn")
13333 (version "1.3.6")
13334 (source
13335 (origin
13336 (method url-fetch)
13337 (uri (pypi-uri "bbknn" version))
13338 (sha256
13339 (base32
13340 "1jbsh01f57zj4bhvjr3jh4532zznqd6nccmgrl3qi9gnhkf7c4y0"))))
13341 (build-system python-build-system)
13342 (arguments
13343 `(#:tests? #f)) ; TODO: Enable after migration to scikit-learn.
13344 (propagated-inputs
13345 `(("python-annoy" ,python-annoy)
13346 ("python-cython" ,python-cython)
13347 ("python-numpy" ,python-numpy)
13348 ("python-scipy" ,python-scipy)
13349 ("python-umap-learn" ,python-umap-learn)))
13350 (home-page "https://github.com/Teichlab/bbknn")
13351 (synopsis "Batch balanced KNN")
13352 (description "BBKNN is a batch effect removal tool that can be directly
13353 used in the Scanpy workflow. It serves as an alternative to
13354 @code{scanpy.api.pp.neighbors()}, with both functions creating a neighbour
13355 graph for subsequent use in clustering, pseudotime and UMAP visualisation. If
13356 technical artifacts are present in the data, they will make it challenging to
13357 link corresponding cell types across different batches. BBKNN actively
13358 combats this effect by splitting your data into batches and finding a smaller
13359 number of neighbours for each cell within each of the groups. This helps
13360 create connections between analogous cells in different batches without
13361 altering the counts or PCA space.")
13362 (license license:expat)))
13363
13364 (define-public gffcompare
13365 (let ((commit "be56ef4349ea3966c12c6397f85e49e047361c41")
13366 (revision "1"))
13367 (package
13368 (name "gffcompare")
13369 (version (git-version "0.10.15" revision commit))
13370 (source
13371 (origin
13372 (method git-fetch)
13373 (uri (git-reference
13374 (url "https://github.com/gpertea/gffcompare/")
13375 (commit commit)))
13376 (file-name (git-file-name name version))
13377 (sha256
13378 (base32 "0cp5qpxdhw4mxpya5dld8wi3jk00zyklm6rcri426wydinrnfmkg"))))
13379 (build-system gnu-build-system)
13380 (arguments
13381 `(#:tests? #f ; no check target
13382 #:phases
13383 (modify-phases %standard-phases
13384 (delete 'configure)
13385 (add-before 'build 'copy-gclib-source
13386 (lambda* (#:key inputs #:allow-other-keys)
13387 (mkdir "../gclib")
13388 (copy-recursively
13389 (assoc-ref inputs "gclib-source") "../gclib")
13390 #t))
13391 (replace 'install
13392 (lambda* (#:key outputs #:allow-other-keys)
13393 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
13394 (install-file "gffcompare" bin)
13395 #t))))))
13396 (native-inputs
13397 `(("gclib-source" ; see 'README.md' of gffcompare
13398 ,(let ((commit "54917d0849c1e83cfb057b5f712e5cb6a35d948f")
13399 (revision "1")
13400 (name "gclib")
13401 (version (git-version "0.10.3" revision commit)))
13402 (origin
13403 (method git-fetch)
13404 (uri (git-reference
13405 (url "https://github.com/gpertea/gclib/")
13406 (commit commit)))
13407 (file-name (git-file-name name version))
13408 (sha256
13409 (base32 "0b51lc0b8syrv7186fd7n8f15rwnf264qgfmm2palrwks1px24mr")))))))
13410 (home-page "https://github.com/gpertea/gffcompare/")
13411 (synopsis "Tool for comparing or classifing transcripts of RNA-Seq")
13412 (description
13413 "@code{gffcompare} is a tool that can:
13414 @enumerate
13415 @item compare and evaluate the accuracy of RNA-Seq transcript assemblers
13416 (Cufflinks, Stringtie);
13417 @item collapse (merge) duplicate transcripts from multiple GTF/GFF3 files (e.g.
13418 resulted from assembly of different samples);
13419 @item classify transcripts from one or multiple GTF/GFF3 files as they relate to
13420 reference transcripts provided in a annotation file (also in GTF/GFF3 format).
13421 @end enumerate")
13422 (license
13423 (list
13424 license:expat ;license for gffcompare
13425 license:artistic2.0))))) ;license for gclib
13426
13427 (define-public intervaltree
13428 (let ((commit "b90527f9e6d51cd36ecbb50429e4524d3a418ea5"))
13429 (package
13430 (name "intervaltree")
13431 (version (git-version "0.0.0" "1" commit))
13432 (source
13433 (origin
13434 (method git-fetch)
13435 (uri (git-reference
13436 (url "https://github.com/ekg/intervaltree/")
13437 (commit commit)))
13438 (file-name (git-file-name name version))
13439 (sha256
13440 (base32 "0rgv6q5fl4x5d74n6p5wvdna6zmbdbqpb4jqqh6vq3670gn08xad"))))
13441 (build-system gnu-build-system)
13442 (arguments
13443 '(#:tests? #f ; No tests.
13444 #:make-flags (list (string-append "PREFIX=" (assoc-ref %outputs "out"))
13445 "DESTDIR=\"\"")
13446 #:phases
13447 (modify-phases %standard-phases
13448 (delete 'configure)))) ; There is no configure phase.
13449 (home-page "https://github.com/ekg/intervaltree")
13450 (synopsis "Minimal C++ interval tree implementation")
13451 (description "An interval tree can be used to efficiently find a set of
13452 numeric intervals overlapping or containing another interval. This library
13453 provides a basic implementation of an interval tree using C++ templates,
13454 allowing the insertion of arbitrary types into the tree.")
13455 (license license:expat))))
13456
13457 (define-public python-intervaltree
13458 (package
13459 (name "python-intervaltree")
13460 (version "2.1.0")
13461 (source
13462 (origin
13463 (method url-fetch)
13464 (uri (pypi-uri "intervaltree" version))
13465 (sha256
13466 (base32
13467 "02w191m9zxkcjqr1kv2slxvhymwhj3jnsyy3a28b837pi15q19dc"))))
13468 (build-system python-build-system)
13469 ;; FIXME: error when collecting tests
13470 (arguments '(#:tests? #f))
13471 (propagated-inputs
13472 `(("python-sortedcontainers" ,python-sortedcontainers)))
13473 (native-inputs
13474 `(("python-pytest" ,python-pytest)))
13475 (home-page "https://github.com/chaimleib/intervaltree")
13476 (synopsis "Editable interval tree data structure")
13477 (description
13478 "This package provides a mutable, self-balancing interval tree
13479 implementation for Python. Queries may be by point, by range overlap, or by
13480 range envelopment. This library was designed to allow tagging text and time
13481 intervals, where the intervals include the lower bound but not the upper
13482 bound.")
13483 (license license:asl2.0)))
13484
13485 (define-public python-pypairix
13486 (package
13487 (name "python-pypairix")
13488 (version "0.3.6")
13489 (source
13490 (origin
13491 (method url-fetch)
13492 (uri (pypi-uri "pypairix" version))
13493 (sha256
13494 (base32
13495 "0zs92b74s5v4xy2h16s15f3z6l4nnbw8x8zyif7xx5xpafjn0xss"))))
13496 (build-system python-build-system)
13497 ;; FIXME: the tests fail because test.support cannot be loaded:
13498 ;; ImportError: cannot import name 'support'
13499 (arguments '(#:tests? #f))
13500 (inputs
13501 `(("zlib" ,zlib)))
13502 (home-page "https://github.com/4dn-dcic/pairix")
13503 (synopsis "Support for querying pairix-indexed bgzipped text files")
13504 (description
13505 "Pypairix is a Python module for fast querying on a pairix-indexed
13506 bgzipped text file that contains a pair of genomic coordinates per line.")
13507 (license license:expat)))
13508
13509 (define-public python-pyfaidx
13510 (package
13511 (name "python-pyfaidx")
13512 (version "0.5.7")
13513 (source
13514 (origin
13515 (method url-fetch)
13516 (uri (pypi-uri "pyfaidx" version))
13517 (sha256
13518 (base32
13519 "02jvdx3ksy6w5gd29i1d0g0zsabbz7c14qg482ff7pza6sdl0b2i"))))
13520 (build-system python-build-system)
13521 (propagated-inputs
13522 `(("python-six" ,python-six)))
13523 (home-page "http://mattshirley.com")
13524 (synopsis "Random access to fasta subsequences")
13525 (description
13526 "This package provides procedures for efficient pythonic random access to
13527 fasta subsequences.")
13528 (license license:bsd-3)))
13529
13530 (define-public python-cooler
13531 (package
13532 (name "python-cooler")
13533 (version "0.7.11")
13534 (source
13535 (origin
13536 (method url-fetch)
13537 (uri (pypi-uri "cooler" version))
13538 (sha256
13539 (base32
13540 "08k5nxnxa6qsbk15z5z0q01n28042k87wi4905hh95rzqib15mhx"))))
13541 (build-system python-build-system)
13542 (propagated-inputs
13543 `(("python-biopython" ,python-biopython)
13544 ("python-click" ,python-click)
13545 ("python-cytoolz" ,python-cytoolz)
13546 ("python-dask" ,python-dask)
13547 ("python-h5py" ,python-h5py)
13548 ("python-multiprocess" ,python-multiprocess)
13549 ("python-pandas" ,python-pandas)
13550 ("python-pyfaidx" ,python-pyfaidx)
13551 ("python-pypairix" ,python-pypairix)
13552 ("python-pysam" ,python-pysam)
13553 ("python-scipy" ,python-scipy)))
13554 (native-inputs
13555 `(("python-mock" ,python-mock)
13556 ("python-nose" ,python-nose)
13557 ("python-numpydoc" ,python-numpydoc)
13558 ("python-sphinx" ,python-sphinx)))
13559 (home-page "https://github.com/mirnylab/cooler")
13560 (synopsis "Sparse binary format for genomic interaction matrices")
13561 (description
13562 "Cooler is a support library for a sparse, compressed, binary persistent
13563 storage format, called @code{cool}, used to store genomic interaction data,
13564 such as Hi-C contact matrices.")
13565 (license license:bsd-3)))
13566
13567 (define-public python-hicexplorer
13568 (package
13569 (name "python-hicexplorer")
13570 (version "2.1.4")
13571 (source
13572 (origin
13573 ;; The latest version is not available on Pypi.
13574 (method git-fetch)
13575 (uri (git-reference
13576 (url "https://github.com/deeptools/HiCExplorer.git")
13577 (commit version)))
13578 (file-name (git-file-name name version))
13579 (sha256
13580 (base32
13581 "0q5gpbzmrkvygqgw524q36b4nrivcmyi5v194vsx0qw7b3gcmq08"))))
13582 (build-system python-build-system)
13583 (arguments
13584 `(#:phases
13585 (modify-phases %standard-phases
13586 (add-after 'unpack 'loosen-up-requirements
13587 (lambda _
13588 (substitute* "setup.py"
13589 (("==") ">="))
13590 #t)))))
13591 (propagated-inputs
13592 `(("python-biopython" ,python-biopython)
13593 ("python-configparser" ,python-configparser)
13594 ("python-cooler" ,python-cooler)
13595 ("python-future" ,python-future)
13596 ("python-intervaltree" ,python-intervaltree)
13597 ("python-jinja2" ,python-jinja2)
13598 ("python-matplotlib" ,python-matplotlib)
13599 ("python-numpy" ,python-numpy)
13600 ("python-pandas" ,python-pandas)
13601 ("python-pybigwig" ,python-pybigwig)
13602 ("python-pysam" ,python-pysam)
13603 ("python-scipy" ,python-scipy)
13604 ("python-six" ,python-six)
13605 ("python-tables" ,python-tables)
13606 ("python-unidecode" ,python-unidecode)))
13607 (home-page "https://hicexplorer.readthedocs.io")
13608 (synopsis "Process, analyze and visualize Hi-C data")
13609 (description
13610 "HiCExplorer is a powerful and easy to use set of tools to process,
13611 normalize and visualize Hi-C data. HiCExplorer facilitates the creation of
13612 contact matrices, correction of contacts, TAD detection, A/B compartments,
13613 merging, reordering or chromosomes, conversion from different formats
13614 including cooler and detection of long-range contacts. Moreover, it allows
13615 the visualization of multiple contact matrices along with other types of data
13616 like genes, compartments, ChIP-seq coverage tracks (and in general any type of
13617 genomic scores), long range contacts and the visualization of viewpoints.")
13618 (license license:gpl3)))
13619
13620 (define-public python-pygenometracks
13621 (package
13622 (name "python-pygenometracks")
13623 (version "2.0")
13624 (source
13625 (origin
13626 (method url-fetch)
13627 (uri (pypi-uri "pyGenomeTracks" version))
13628 (sha256
13629 (base32
13630 "1fws6bqsyy9kj3qiabhkqx4wd4i775gsxnhszqd3zg7w67sc1ic5"))))
13631 (build-system python-build-system)
13632 (propagated-inputs
13633 `(("python-configparser" ,python-configparser)
13634 ("python-future" ,python-future)
13635 ("python-hicexplorer" ,python-hicexplorer)
13636 ("python-intervaltree" ,python-intervaltree)
13637 ("python-matplotlib" ,python-matplotlib)
13638 ("python-numpy" ,python-numpy)
13639 ("python-pybigwig" ,python-pybigwig)))
13640 (native-inputs
13641 `(("python-pytest" ,python-pytest)))
13642 (home-page "https://pygenometracks.readthedocs.io")
13643 (synopsis "Program and library to plot beautiful genome browser tracks")
13644 (description
13645 "This package aims to produce high-quality genome browser tracks that
13646 are highly customizable. Currently, it is possible to plot: bigwig, bed (many
13647 options), bedgraph, links (represented as arcs), and Hi-C matrices.
13648 pyGenomeTracks can make plots with or without Hi-C data.")
13649 (license license:gpl3+)))
13650
13651 (define-public python-hic2cool
13652 (package
13653 (name "python-hic2cool")
13654 (version "0.4.2")
13655 (source
13656 (origin
13657 (method url-fetch)
13658 (uri (pypi-uri "hic2cool" version))
13659 (sha256
13660 (base32
13661 "0xy6mhfns2lzib1kcr6419jjp6pmh0qx8z8na55lmiwn0ds8q9cl"))))
13662 (build-system python-build-system)
13663 (arguments '(#:tests? #f)) ; no tests included
13664 (propagated-inputs
13665 `(("python-cooler" ,python-cooler)))
13666 (home-page "https://github.com/4dn-dcic/hic2cool")
13667 (synopsis "Converter for .hic and .cool files")
13668 (description
13669 "This package provides a converter between @code{.hic} files (from
13670 juicer) and single-resolution or multi-resolution @code{.cool} files (for
13671 cooler). Both @code{hic} and @code{cool} files describe Hi-C contact
13672 matrices.")
13673 (license license:expat)))
13674
13675 (define-public r-pore
13676 (package
13677 (name "r-pore")
13678 (version "0.24")
13679 (source
13680 (origin
13681 (method url-fetch)
13682 (uri
13683 (string-append "mirror://sourceforge/rpore/" version
13684 "/poRe_" version ".tar.gz"))
13685 (sha256
13686 (base32 "0pih9nljbv8g4x8rkk29i7aqq681b782r5s5ynp4nw9yzqnmmksv"))))
13687 (properties `((upstream-name . "poRe")))
13688 (build-system r-build-system)
13689 (propagated-inputs
13690 `(("r-bit64" ,r-bit64)
13691 ("r-data-table" ,r-data-table)
13692 ("r-rhdf5" ,r-rhdf5)
13693 ("r-shiny" ,r-shiny)
13694 ("r-svdialogs" ,r-svdialogs)))
13695 (home-page "https://sourceforge.net/projects/rpore/")
13696 (synopsis "Visualize Nanopore sequencing data")
13697 (description
13698 "This package provides graphical user interfaces to organize and visualize Nanopore
13699 sequencing data.")
13700 ;; This is free software but the license variant is unclear:
13701 ;; <https://github.com/mw55309/poRe_docs/issues/10>.
13702 (license license:bsd-3)))
13703
13704 (define-public r-xbioc
13705 (let ((revision "1")
13706 (commit "6ff0670a37ab3036aaf1d94aa4b208310946b0b5"))
13707 (package
13708 (name "r-xbioc")
13709 (version (git-version "0.1.16" revision commit))
13710 (source (origin
13711 (method git-fetch)
13712 (uri (git-reference
13713 (url "https://github.com/renozao/xbioc.git")
13714 (commit commit)))
13715 (file-name (git-file-name name version))
13716 (sha256
13717 (base32
13718 "0w8bsq5myiwkfhh83nm6is5ichiyvwa1axx2szvxnzq39x6knf66"))))
13719 (build-system r-build-system)
13720 (propagated-inputs
13721 `(("r-annotationdbi" ,r-annotationdbi)
13722 ("r-assertthat" ,r-assertthat)
13723 ("r-biobase" ,r-biobase)
13724 ("r-biocmanager" ,r-biocmanager)
13725 ("r-digest" ,r-digest)
13726 ("r-pkgmaker" ,r-pkgmaker)
13727 ("r-plyr" ,r-plyr)
13728 ("r-reshape2" ,r-reshape2)
13729 ("r-stringr" ,r-stringr)))
13730 (home-page "https://github.com/renozao/xbioc/")
13731 (synopsis "Extra base functions for Bioconductor")
13732 (description "This package provides extra utility functions to perform
13733 common tasks in the analysis of omics data, leveraging and enhancing features
13734 provided by Bioconductor packages.")
13735 (license license:gpl3+))))
13736
13737 (define-public r-cssam
13738 (let ((revision "1")
13739 (commit "9ec58c982fa551af0d80b1a266890d92954833f2"))
13740 (package
13741 (name "r-cssam")
13742 (version (git-version "1.4" revision commit))
13743 (source (origin
13744 (method git-fetch)
13745 (uri (git-reference
13746 (url "https://github.com/shenorrLab/csSAM.git")
13747 (commit commit)))
13748 (file-name (git-file-name name version))
13749 (sha256
13750 (base32
13751 "128syf9v39gk0z3ip000qpsjbg6l1siyq6c8b0hz41dzg5achyb3"))))
13752 (build-system r-build-system)
13753 (propagated-inputs
13754 `(("r-formula" ,r-formula)
13755 ("r-ggplot2" ,r-ggplot2)
13756 ("r-pkgmaker" ,r-pkgmaker)
13757 ("r-plyr" ,r-plyr)
13758 ("r-rngtools" ,r-rngtools)
13759 ("r-scales" ,r-scales)))
13760 (home-page "https://github.com/shenorrLab/csSAM/")
13761 (synopsis "Cell type-specific statistical analysis of microarray")
13762 (description "This package implements the method csSAM that computes
13763 cell-specific differential expression from measured cell proportions using
13764 SAM.")
13765 ;; Any version
13766 (license license:lgpl2.1+))))
13767
13768 (define-public r-bseqsc
13769 (let ((revision "1")
13770 (commit "fef3f3e38dcf3df37103348b5780937982b43b98"))
13771 (package
13772 (name "r-bseqsc")
13773 (version (git-version "1.0" revision commit))
13774 (source (origin
13775 (method git-fetch)
13776 (uri (git-reference
13777 (url "https://github.com/shenorrLab/bseqsc.git")
13778 (commit commit)))
13779 (file-name (git-file-name name version))
13780 (sha256
13781 (base32
13782 "1prw13wa20f7wlc3gkkls66n1kxz8d28qrb8icfqdwdnnv8w5qg8"))))
13783 (build-system r-build-system)
13784 (propagated-inputs
13785 `(("r-abind" ,r-abind)
13786 ("r-annotationdbi" ,r-annotationdbi)
13787 ("r-biobase" ,r-biobase)
13788 ("r-cssam" ,r-cssam)
13789 ("r-dplyr" ,r-dplyr)
13790 ("r-e1071" ,r-e1071)
13791 ("r-edger" ,r-edger)
13792 ("r-ggplot2" ,r-ggplot2)
13793 ("r-nmf" ,r-nmf)
13794 ("r-openxlsx" ,r-openxlsx)
13795 ("r-pkgmaker" ,r-pkgmaker)
13796 ("r-plyr" ,r-plyr)
13797 ("r-preprocesscore" ,r-preprocesscore)
13798 ("r-rngtools" ,r-rngtools)
13799 ("r-scales" ,r-scales)
13800 ("r-stringr" ,r-stringr)
13801 ("r-xbioc" ,r-xbioc)))
13802 (home-page "https://github.com/shenorrLab/bseqsc")
13803 (synopsis "Deconvolution of bulk sequencing experiments using single cell data")
13804 (description "BSeq-sc is a bioinformatics analysis pipeline that
13805 leverages single-cell sequencing data to estimate cell type proportion and
13806 cell type-specific gene expression differences from RNA-seq data from bulk
13807 tissue samples. This is a companion package to the publication \"A
13808 single-cell transcriptomic map of the human and mouse pancreas reveals inter-
13809 and intra-cell population structure.\" Baron et al. Cell Systems (2016)
13810 @url{https://www.ncbi.nlm.nih.gov/pubmed/27667365}.")
13811 (license license:gpl2+))))
13812
13813 (define-public porechop
13814 ;; The recommended way to install is to clone the git repository
13815 ;; https://github.com/rrwick/Porechop#installation
13816 (let ((commit "289d5dca4a5fc327f97b3f8cecb68ecaf1014861")
13817 (revision "1"))
13818 (package
13819 (name "porechop")
13820 (version (git-version "0.2.3" revision commit))
13821 (source
13822 (origin
13823 (method git-fetch)
13824 (uri (git-reference
13825 (url "https://github.com/rrwick/Porechop.git")
13826 (commit commit)))
13827 (file-name (git-file-name name version))
13828 (sha256
13829 (base32 "05ps43gig0d3ia9x5lj84lb00hbsl6ba9n7y7jz927npxbr2ym23"))))
13830 (build-system python-build-system)
13831 (home-page "https://github.com/rrwick/porechop")
13832 (synopsis "Finding, trimming or splitting adapters, in Oxford Nanopore reads")
13833 (description
13834 "The porechop package is a tool for finding and removing adapters from Oxford
13835 Nanopore reads. Adapters on the ends of reads are trimmed off, and when a read
13836 has an adapter in its middle, it is treated as chimeric and chopped into
13837 separate reads. Porechop performs thorough alignments to effectively find
13838 adapters, even at low sequence identity. Porechop also supports demultiplexing
13839 of Nanopore reads that were barcoded with the Native Barcoding Kit, PCR
13840 Barcoding Kit or Rapid Barcoding Kit.")
13841 (license license:gpl3+))))
13842
13843 (define-public poretools
13844 ;; The latest release was in 2016 and the latest commit is from 2017
13845 ;; the recommended way to install is to clone the git repository
13846 ;; https://poretools.readthedocs.io/en/latest/content/installation.html
13847 (let ((commit "e426b1f09e86ac259a00c261c79df91510777407")
13848 (revision "1"))
13849 (package
13850 (name "poretools")
13851 (version (git-version "0.6.0" revision commit))
13852 (source
13853 (origin
13854 (method git-fetch)
13855 (uri (git-reference
13856 (url "https://github.com/arq5x/poretools.git")
13857 (commit commit)))
13858 (file-name (git-file-name name version))
13859 (sha256
13860 (base32 "0bglj833wxpp3cq430p1d3xp085ls221js2y90w7ir2x5ay8l7am"))))
13861 (build-system python-build-system)
13862 ;; requires python >=2.7, <3.0, and the same for python dependencies
13863 (arguments `(#:python ,python-2))
13864 (inputs
13865 `(("hdf5" ,hdf5)))
13866 (propagated-inputs
13867 `(("python-dateutil" ,python2-dateutil)
13868 ("python-h5py" ,python2-h5py)
13869 ("python-matplotlib" ,python2-matplotlib)
13870 ("python-pandas" ,python2-pandas)
13871 ("python-seaborn" ,python2-seaborn)))
13872 (home-page "https://poretools.readthedocs.io")
13873 (synopsis "Toolkit for working with nanopore sequencing data")
13874 (description
13875 "The MinION from Oxford Nanopore Technologies is a nanopore sequencer.
13876 This @code{poretools} package is a flexible toolkit for exploring datasets
13877 generated by nanopore sequencing devices for the purposes of quality control and
13878 downstream analysis. Poretools operates directly on the native FAST5, a variant
13879 of the Hierarchical Data Format (HDF5) standard.")
13880 (license license:expat))))
13881
13882 (define-public r-absfiltergsea
13883 (package
13884 (name "r-absfiltergsea")
13885 (version "1.5.1")
13886 (source
13887 (origin
13888 (method url-fetch)
13889 (uri (cran-uri "AbsFilterGSEA" version))
13890 (sha256
13891 (base32 "15srxkxsvn38kd5frdrwfdf0ad8gskrd0h01wmdf9hglq8fjrp7w"))))
13892 (properties `((upstream-name . "AbsFilterGSEA")))
13893 (build-system r-build-system)
13894 (propagated-inputs
13895 `(("r-biobase" ,r-biobase)
13896 ("r-deseq" ,r-deseq)
13897 ("r-limma" ,r-limma)
13898 ("r-rcpp" ,r-rcpp)
13899 ("r-rcpparmadillo" ,r-rcpparmadillo)))
13900 (home-page "https://cran.r-project.org/web/packages/AbsFilterGSEA/")
13901 (synopsis "Improved false positive control of gene-permuting with absolute filtering")
13902 (description
13903 "This package provides a function that performs gene-permuting of a gene-set
13904 enrichment analysis (GSEA) calculation with or without the absolute filtering.
13905 Without filtering, users can perform (original) two-tailed or one-tailed
13906 absolute GSEA.")
13907 (license license:gpl2)))
13908
13909 (define-public jamm
13910 (package
13911 (name "jamm")
13912 (version "1.0.7.6")
13913 (source
13914 (origin
13915 (method git-fetch)
13916 (uri (git-reference
13917 (url "https://github.com/mahmoudibrahim/JAMM.git")
13918 (commit (string-append "JAMMv" version))))
13919 (file-name (git-file-name name version))
13920 (sha256
13921 (base32
13922 "0bsa5mf9n9q5jz7mmacrra41l7r8rac5vgsn6wv1fb52ya58b970"))))
13923 (build-system gnu-build-system)
13924 (arguments
13925 `(#:tests? #f ; there are none
13926 #:phases
13927 (modify-phases %standard-phases
13928 (delete 'configure)
13929 (delete 'build)
13930 (replace 'install
13931 (lambda* (#:key inputs outputs #:allow-other-keys)
13932 (let* ((out (assoc-ref outputs "out"))
13933 (libexec (string-append out "/libexec/jamm"))
13934 (bin (string-append out "/bin")))
13935 (substitute* '("JAMM.sh"
13936 "SignalGenerator.sh")
13937 (("^sPath=.*")
13938 (string-append "sPath=\"" libexec "\"\n")))
13939 (for-each (lambda (file)
13940 (install-file file libexec))
13941 (list "bincalculator.r"
13942 "peakfinder.r"
13943 "peakhelper.r"
13944 "signalmaker.r"
13945 "xcorr.r"
13946 "xcorrhelper.r"
13947 ;; Perl scripts
13948 "peakfilter.pl"
13949 "readshifter.pl"))
13950
13951 (for-each
13952 (lambda (script)
13953 (chmod script #o555)
13954 (install-file script bin)
13955 (wrap-program (string-append bin "/" script)
13956 `("PATH" ":" prefix
13957 (,(string-append (assoc-ref inputs "coreutils") "/bin")
13958 ,(string-append (assoc-ref inputs "gawk") "/bin")
13959 ,(string-append (assoc-ref inputs "perl") "/bin")
13960 ,(string-append (assoc-ref inputs "r-minimal") "/bin")))
13961 `("PERL5LIB" ":" prefix (,(getenv "PERL5LIB")))
13962 `("R_LIBS_SITE" ":" prefix (,(getenv "R_LIBS_SITE")))))
13963 (list "JAMM.sh" "SignalGenerator.sh")))
13964 #t)))))
13965 (inputs
13966 `(("bash" ,bash)
13967 ("coreutils" ,coreutils)
13968 ("gawk" ,gawk)
13969 ("perl" ,perl)
13970 ("r-minimal" ,r-minimal)
13971 ;;("r-parallel" ,r-parallel)
13972 ("r-signal" ,r-signal)
13973 ("r-mclust" ,r-mclust)))
13974 (home-page "https://github.com/mahmoudibrahim/JAMM")
13975 (synopsis "Peak finder for NGS datasets")
13976 (description
13977 "JAMM is a peak finder for next generation sequencing datasets (ChIP-Seq,
13978 ATAC-Seq, DNase-Seq, etc.) that can integrate replicates and assign peak
13979 boundaries accurately. JAMM is applicable to both broad and narrow
13980 datasets.")
13981 (license license:gpl3+)))
13982
13983 (define-public ngless
13984 (package
13985 (name "ngless")
13986 (version "1.1.0")
13987 (source
13988 (origin
13989 (method git-fetch)
13990 (uri (git-reference
13991 (url "https://gitlab.com/ngless/ngless.git")
13992 (commit (string-append "v" version))))
13993 (file-name (git-file-name name version))
13994 (sha256
13995 (base32
13996 "1wim8wpqyff080dfcazynrmjwqas38m24m0v350w245mmhrapdma"))))
13997 (build-system haskell-build-system)
13998 (arguments
13999 `(#:haddock? #f ; The haddock phase fails with: NGLess/CmdArgs.hs:20:1:
14000 ; error: parse error on input import
14001 ; import Options.Applicative
14002 #:phases
14003 (modify-phases %standard-phases
14004 (add-after 'unpack 'create-Versions.hs
14005 (lambda _
14006 (substitute* "Makefile"
14007 (("BWA_VERSION = .*")
14008 (string-append "BWA_VERSION = "
14009 ,(package-version bwa) "\n"))
14010 (("SAM_VERSION = .*")
14011 (string-append "SAM_VERSION = "
14012 ,(package-version samtools) "\n"))
14013 (("PRODIGAL_VERSION = .*")
14014 (string-append "PRODIGAL_VERSION = "
14015 ,(package-version prodigal) "\n"))
14016 (("MINIMAP2_VERSION = .*")
14017 (string-append "MINIMAP2_VERSION = "
14018 ,(package-version minimap2) "\n")))
14019 (invoke "make" "NGLess/Dependencies/Versions.hs")
14020 #t))
14021 (add-after 'create-Versions.hs 'create-cabal-file
14022 (lambda _ (invoke "hpack") #t))
14023 ;; These tools are expected to be installed alongside ngless.
14024 (add-after 'install 'link-tools
14025 (lambda* (#:key inputs outputs #:allow-other-keys)
14026 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
14027 (symlink (string-append (assoc-ref inputs "prodigal")
14028 "/bin/prodigal")
14029 (string-append bin "ngless-" ,version "-prodigal"))
14030 (symlink (string-append (assoc-ref inputs "minimap2")
14031 "/bin/minimap2")
14032 (string-append bin "ngless-" ,version "-minimap2"))
14033 (symlink (string-append (assoc-ref inputs "samtools")
14034 "/bin/samtools")
14035 (string-append bin "ngless-" ,version "-samtools"))
14036 (symlink (string-append (assoc-ref inputs "bwa")
14037 "/bin/bwa")
14038 (string-append bin "ngless-" ,version "-bwa"))
14039 #t))))))
14040 (inputs
14041 `(("prodigal" ,prodigal)
14042 ("bwa" ,bwa)
14043 ("samtools" ,samtools)
14044 ("minimap2" ,minimap2)
14045 ("ghc-aeson" ,ghc-aeson)
14046 ("ghc-ansi-terminal" ,ghc-ansi-terminal)
14047 ("ghc-async" ,ghc-async)
14048 ("ghc-atomic-write" ,ghc-atomic-write)
14049 ("ghc-bytestring-lexing" ,ghc-bytestring-lexing)
14050 ("ghc-conduit" ,ghc-conduit)
14051 ("ghc-conduit-algorithms" ,ghc-conduit-algorithms)
14052 ("ghc-conduit-extra" ,ghc-conduit-extra)
14053 ("ghc-configurator" ,ghc-configurator)
14054 ("ghc-convertible" ,ghc-convertible)
14055 ("ghc-data-default" ,ghc-data-default)
14056 ("ghc-diagrams-core" ,ghc-diagrams-core)
14057 ("ghc-diagrams-lib" ,ghc-diagrams-lib)
14058 ("ghc-diagrams-svg" ,ghc-diagrams-svg)
14059 ("ghc-double-conversion" ,ghc-double-conversion)
14060 ("ghc-edit-distance" ,ghc-edit-distance)
14061 ("ghc-either" ,ghc-either)
14062 ("ghc-errors" ,ghc-errors)
14063 ("ghc-extra" ,ghc-extra)
14064 ("ghc-filemanip" ,ghc-filemanip)
14065 ("ghc-file-embed" ,ghc-file-embed)
14066 ("ghc-gitrev" ,ghc-gitrev)
14067 ("ghc-hashtables" ,ghc-hashtables)
14068 ("ghc-http-conduit" ,ghc-http-conduit)
14069 ("ghc-inline-c" ,ghc-inline-c)
14070 ("ghc-inline-c-cpp" ,ghc-inline-c-cpp)
14071 ("ghc-intervalmap" ,ghc-intervalmap)
14072 ("ghc-missingh" ,ghc-missingh)
14073 ("ghc-optparse-applicative" ,ghc-optparse-applicative)
14074 ("ghc-regex" ,ghc-regex)
14075 ("ghc-safe" ,ghc-safe)
14076 ("ghc-safeio" ,ghc-safeio)
14077 ("ghc-strict" ,ghc-strict)
14078 ("ghc-tar" ,ghc-tar)
14079 ("ghc-tar-conduit" ,ghc-tar-conduit)
14080 ("ghc-unliftio" ,ghc-unliftio)
14081 ("ghc-unliftio-core" ,ghc-unliftio-core)
14082 ("ghc-vector" ,ghc-vector)
14083 ("ghc-yaml" ,ghc-yaml)
14084 ("ghc-zlib" ,ghc-zlib)))
14085 (propagated-inputs
14086 `(("r-r6" ,r-r6)
14087 ("r-hdf5r" ,r-hdf5r)
14088 ("r-iterators" ,r-iterators)
14089 ("r-itertools" ,r-itertools)
14090 ("r-matrix" ,r-matrix)))
14091 (native-inputs
14092 `(("ghc-hpack" ,ghc-hpack)
14093 ("ghc-quickcheck" ,ghc-quickcheck)
14094 ("ghc-test-framework" ,ghc-test-framework)
14095 ("ghc-test-framework-hunit",ghc-test-framework-hunit)
14096 ("ghc-test-framework-quickcheck2" ,ghc-test-framework-quickcheck2)
14097 ("ghc-test-framework-th" ,ghc-test-framework-th)))
14098 (home-page "https://gitlab.com/ngless/ngless")
14099 (synopsis "DSL for processing next-generation sequencing data")
14100 (description "Ngless is a domain-specific language for
14101 @dfn{next-generation sequencing} (NGS) data processing.")
14102 (license license:expat)))
14103
14104 (define-public filtlong
14105 ;; The recommended way to install is to clone the git repository
14106 ;; https://github.com/rrwick/Filtlong#installation
14107 ;; and the lastest release is more than nine months old
14108 (let ((commit "d1bb46dfe8bc7efe6257b5ce222c04bfe8aedaab")
14109 (revision "1"))
14110 (package
14111 (name "filtlong")
14112 (version (git-version "0.2.0" revision commit))
14113 (source
14114 (origin
14115 (method git-fetch)
14116 (uri (git-reference
14117 (url "https://github.com/rrwick/Filtlong.git")
14118 (commit commit)))
14119 (file-name (git-file-name name version))
14120 (sha256
14121 (base32 "1xr92r820x8qlkcr3b57iw223yq8vjgyi42jr79w2xgw47qzr575"))))
14122 (build-system gnu-build-system)
14123 (arguments
14124 `(#:tests? #f ; no check target
14125 #:phases
14126 (modify-phases %standard-phases
14127 (delete 'configure)
14128 (replace 'install
14129 (lambda* (#:key outputs #:allow-other-keys)
14130 (let* ((out (assoc-ref outputs "out"))
14131 (bin (string-append out "/bin"))
14132 (scripts (string-append out "/share/filtlong/scripts")))
14133 (install-file "bin/filtlong" bin)
14134 (install-file "scripts/histogram.py" scripts)
14135 (install-file "scripts/read_info_histograms.sh" scripts))
14136 #t))
14137 (add-after 'install 'wrap-program
14138 (lambda* (#:key inputs outputs #:allow-other-keys)
14139 (let* ((out (assoc-ref outputs "out"))
14140 (path (getenv "PYTHONPATH")))
14141 (wrap-program (string-append out
14142 "/share/filtlong/scripts/histogram.py")
14143 `("PYTHONPATH" ":" prefix (,path))))
14144 #t))
14145 (add-before 'check 'patch-tests
14146 (lambda _
14147 (substitute* "scripts/read_info_histograms.sh"
14148 (("awk") (which "gawk")))
14149 #t)))))
14150 (inputs
14151 `(("gawk" ,gawk) ;for read_info_histograms.sh
14152 ("python" ,python-2) ;required for histogram.py
14153 ("zlib" ,zlib)))
14154 (home-page "https://github.com/rrwick/Filtlong/")
14155 (synopsis "Tool for quality filtering of Nanopore and PacBio data")
14156 (description
14157 "The Filtlong package is a tool for filtering long reads by quality.
14158 It can take a set of long reads and produce a smaller, better subset. It uses
14159 both read length (longer is better) and read identity (higher is better) when
14160 choosing which reads pass the filter.")
14161 (license (list license:gpl3 ;filtlong
14162 license:asl2.0))))) ;histogram.py
14163
14164 (define-public nanopolish
14165 ;; The recommended way to install is to clone the git repository
14166 ;; <https://github.com/jts/nanopolish#installing-a-particular-release>.
14167 ;; Also, the differences between release and current version seem to be
14168 ;; significant.
14169 (let ((commit "6331dc4f15b9dfabb954ba3fae9d76b6c3ca6377")
14170 (revision "1"))
14171 (package
14172 (name "nanopolish")
14173 (version (git-version "0.11.1" revision commit))
14174 (source
14175 (origin
14176 (method git-fetch)
14177 (uri (git-reference
14178 (url "https://github.com/jts/nanopolish.git")
14179 (commit commit)
14180 (recursive? #t)))
14181 (file-name (git-file-name name version))
14182 (sha256
14183 (base32 "15ikl3d37y49pwd7vx36xksgsqajhf24q7qqsnpl15dqqyy5qgbc"))
14184 (modules '((guix build utils)))
14185 (snippet
14186 '(begin
14187 (delete-file-recursively "htslib")
14188 #t))))
14189 (build-system gnu-build-system)
14190 (arguments
14191 `(#:make-flags
14192 `("HDF5=noinstall" "EIGEN=noinstall" "HTS=noinstall" "CC=gcc")
14193 #:tests? #f ; no check target
14194 #:phases
14195 (modify-phases %standard-phases
14196 (add-after 'unpack 'find-eigen
14197 (lambda* (#:key inputs #:allow-other-keys)
14198 (setenv "CPATH"
14199 (string-append (assoc-ref inputs "eigen")
14200 "/include/eigen3:"
14201 (or (getenv "CPATH") "")))
14202 #t))
14203 (delete 'configure)
14204 (replace 'install
14205 (lambda* (#:key outputs #:allow-other-keys)
14206 (let* ((out (assoc-ref outputs "out"))
14207 (bin (string-append out "/bin"))
14208 (scripts (string-append out "/share/nanopolish/scripts")))
14209
14210 (install-file "nanopolish" bin)
14211 (for-each (lambda (file) (install-file file scripts))
14212 (find-files "scripts" ".*"))
14213 #t)))
14214 (add-after 'install 'wrap-programs
14215 (lambda* (#:key outputs #:allow-other-keys)
14216 (for-each (lambda (file)
14217 (wrap-program file `("PYTHONPATH" ":" prefix (,path))))
14218 (find-files "/share/nanopolish/scripts" "\\.py"))
14219 (for-each (lambda (file)
14220 (wrap-program file `("PERL5LIB" ":" prefix (,path))))
14221 (find-files "/share/nanopolish/scripts" "\\.pl"))
14222 #t)))))
14223 (inputs
14224 `(("eigen" ,eigen)
14225 ("hdf5" ,hdf5)
14226 ("htslib" ,htslib)
14227 ("perl" ,perl)
14228 ("python" ,python-wrapper)
14229 ("python-biopython" ,python-biopython)
14230 ("python-numpy" ,python-numpy)
14231 ("python-pysam" ,python-pysam)
14232 ("python-scikit-learn" , python-scikit-learn)
14233 ("python-scipy" ,python-scipy)
14234 ("zlib" ,zlib)))
14235 (home-page "https://github.com/jts/nanopolish")
14236 (synopsis "Signal-level analysis of Oxford Nanopore sequencing data")
14237 (description
14238 "This package analyses the Oxford Nanopore sequencing data at signal-level.
14239 Nanopolish can calculate an improved consensus sequence for a draft genome
14240 assembly, detect base modifications, call SNPs (Single nucleotide
14241 polymorphisms) and indels with respect to a reference genome and more.")
14242 (license license:expat))))
14243
14244 (define-public cnvkit
14245 (package
14246 (name "cnvkit")
14247 (version "0.9.5")
14248 (source
14249 (origin
14250 (method git-fetch)
14251 (uri (git-reference
14252 (url "https://github.com/etal/cnvkit.git")
14253 (commit (string-append "v" version))))
14254 (file-name (git-file-name name version))
14255 (sha256
14256 (base32 "0g2f78k68yglmj4fsfmgs8idqv3di9aj53fg0ld0hqljg8chhh82"))))
14257 (build-system python-build-system)
14258 (propagated-inputs
14259 `(("python-biopython" ,python-biopython)
14260 ("python-future" ,python-future)
14261 ("python-matplotlib" ,python-matplotlib)
14262 ("python-numpy" ,python-numpy)
14263 ("python-reportlab" ,python-reportlab)
14264 ("python-pandas" ,python-pandas)
14265 ("python-pysam" ,python-pysam)
14266 ("python-pyfaidx" ,python-pyfaidx)
14267 ("python-scipy" ,python-scipy)
14268 ;; R packages
14269 ("r-dnacopy" ,r-dnacopy)))
14270 (home-page "https://cnvkit.readthedocs.org/")
14271 (synopsis "Copy number variant detection from targeted DNA sequencing")
14272 (description
14273 "CNVkit is a Python library and command-line software toolkit to infer
14274 and visualize copy number from high-throughput DNA sequencing data. It is
14275 designed for use with hybrid capture, including both whole-exome and custom
14276 target panels, and short-read sequencing platforms such as Illumina and Ion
14277 Torrent.")
14278 (license license:asl2.0)))
14279
14280 (define-public python-pyfit-sne
14281 (package
14282 (name "python-pyfit-sne")
14283 (version "1.0.1")
14284 (source
14285 (origin
14286 (method git-fetch)
14287 (uri (git-reference
14288 (url "https://github.com/KlugerLab/pyFIt-SNE.git")
14289 (commit version)))
14290 (file-name (git-file-name name version))
14291 (sha256
14292 (base32 "13wh3qkzs56azmmgnxib6xfr29g7xh09sxylzjpni5j0pp0rc5qw"))))
14293 (build-system python-build-system)
14294 (propagated-inputs
14295 `(("python-numpy" ,python-numpy)))
14296 (inputs
14297 `(("fftw" ,fftw)))
14298 (native-inputs
14299 `(("python-cython" ,python-cython)))
14300 (home-page "https://github.com/KlugerLab/pyFIt-SNE")
14301 (synopsis "FFT-accelerated Interpolation-based t-SNE")
14302 (description
14303 "t-Stochastic Neighborhood Embedding (t-SNE) is a highly successful
14304 method for dimensionality reduction and visualization of high dimensional
14305 datasets. A popular implementation of t-SNE uses the Barnes-Hut algorithm to
14306 approximate the gradient at each iteration of gradient descent. This package
14307 is a Cython wrapper for FIt-SNE.")
14308 (license license:bsd-4)))
14309
14310 (define-public bbmap
14311 (package
14312 (name "bbmap")
14313 (version "35.82")
14314 (source (origin
14315 (method url-fetch)
14316 (uri (string-append
14317 "mirror://sourceforge/bbmap/BBMap_" version ".tar.gz"))
14318 (sha256
14319 (base32
14320 "1q4rfhxcb6z3gm8zg2davjz98w22lkf4hm9ikxz9kdl93pil3wkd"))))
14321 (build-system ant-build-system)
14322 (arguments
14323 `(#:build-target "dist"
14324 #:tests? #f ; there are none
14325 #:make-flags
14326 (list (string-append "-Dmpijar="
14327 (assoc-ref %build-inputs "java-openmpi")
14328 "/lib/mpi.jar"))
14329 #:modules ((guix build ant-build-system)
14330 (guix build utils)
14331 (guix build java-utils))
14332 #:phases
14333 (modify-phases %standard-phases
14334 (add-after 'build 'build-jni-library
14335 (lambda _
14336 (with-directory-excursion "jni"
14337 (invoke "make" "-f" "makefile.linux"))))
14338 ;; There is no install target
14339 (replace 'install (install-jars "dist"))
14340 (add-after 'install 'install-scripts-and-documentation
14341 (lambda* (#:key outputs #:allow-other-keys)
14342 (substitute* "calcmem.sh"
14343 (("\\| awk ") (string-append "| " (which "awk") " ")))
14344 (let* ((scripts (find-files "." "\\.sh$"))
14345 (out (assoc-ref outputs "out"))
14346 (bin (string-append out "/bin"))
14347 (doc (string-append out "/share/doc/bbmap"))
14348 (jni (string-append out "/lib/jni")))
14349 (substitute* scripts
14350 (("\\$DIR\"\"docs") doc)
14351 (("^CP=.*")
14352 (string-append "CP=" out "/share/java/BBTools.jar\n"))
14353 (("^NATIVELIBDIR.*")
14354 (string-append "NATIVELIBDIR=" jni "\n"))
14355 (("CMD=\"java")
14356 (string-append "CMD=\"" (which "java"))))
14357 (for-each (lambda (script) (install-file script bin)) scripts)
14358
14359 ;; Install JNI library
14360 (install-file "jni/libbbtoolsjni.so" jni)
14361
14362 ;; Install documentation
14363 (install-file "docs/readme.txt" doc)
14364 (copy-recursively "docs/guides" doc))
14365 #t)))
14366 #:jdk ,openjdk11))
14367 (inputs
14368 `(("gawk" ,gawk)
14369 ("java-eclipse-jdt-core" ,java-eclipse-jdt-core)
14370 ("java-eclipse-jdt-compiler-apt" ,java-eclipse-jdt-compiler-apt)
14371 ("java-openmpi" ,java-openmpi)))
14372 (home-page "http://sourceforge.net/projects/bbmap/")
14373 (synopsis "Aligner and other tools for short sequencing reads")
14374 (description
14375 "This package provides bioinformatic tools to align, deduplicate,
14376 reformat, filter and normalize DNA and RNA-seq data. It includes the
14377 following tools: BBMap, a short read aligner for DNA and RNA-seq data; BBNorm,
14378 a kmer-based error-correction and normalization tool; Dedupe, a tool to
14379 simplify assemblies by removing duplicate or contained subsequences that share
14380 a target percent identity; Reformat, to convert reads between
14381 fasta/fastq/scarf/fasta+qual/sam, interleaved/paired, and ASCII-33/64, at over
14382 500 MB/s; and BBDuk, a tool to filter, trim, or mask reads with kmer matches
14383 to an artifact/contaminant file.")
14384 (license license:bsd-3)))
14385
14386 (define-public velvet
14387 (package
14388 (name "velvet")
14389 (version "1.2.10")
14390 (source (origin
14391 (method url-fetch)
14392 (uri (string-append "https://www.ebi.ac.uk/~zerbino/velvet/"
14393 "velvet_" version ".tgz"))
14394 (sha256
14395 (base32
14396 "0h3njwy66p6bx14r3ar1byb0ccaxmxka4c65rn4iybyiqa4d8kc8"))
14397 ;; Delete bundled libraries
14398 (modules '((guix build utils)))
14399 (snippet
14400 '(begin
14401 (delete-file "Manual.pdf")
14402 (delete-file-recursively "third-party")
14403 #t))))
14404 (build-system gnu-build-system)
14405 (arguments
14406 `(#:make-flags '("OPENMP=t")
14407 #:test-target "test"
14408 #:phases
14409 (modify-phases %standard-phases
14410 (delete 'configure)
14411 (add-after 'unpack 'fix-zlib-include
14412 (lambda _
14413 (substitute* "src/binarySequences.c"
14414 (("../third-party/zlib-1.2.3/zlib.h") "zlib.h"))
14415 #t))
14416 (replace 'install
14417 (lambda* (#:key outputs #:allow-other-keys)
14418 (let* ((out (assoc-ref outputs "out"))
14419 (bin (string-append out "/bin"))
14420 (doc (string-append out "/share/doc/velvet")))
14421 (mkdir-p bin)
14422 (mkdir-p doc)
14423 (install-file "velveth" bin)
14424 (install-file "velvetg" bin)
14425 (install-file "Manual.pdf" doc)
14426 (install-file "Columbus_manual.pdf" doc)
14427 #t))))))
14428 (inputs
14429 `(("openmpi" ,openmpi)
14430 ("zlib" ,zlib)))
14431 (native-inputs
14432 `(("texlive" ,(texlive-union (list texlive-latex-graphics
14433 texlive-latex-hyperref)))))
14434 (home-page "https://www.ebi.ac.uk/~zerbino/velvet/")
14435 (synopsis "Nucleic acid sequence assembler for very short reads")
14436 (description
14437 "Velvet is a de novo genomic assembler specially designed for short read
14438 sequencing technologies, such as Solexa or 454. Velvet currently takes in
14439 short read sequences, removes errors then produces high quality unique
14440 contigs. It then uses paired read information, if available, to retrieve the
14441 repeated areas between contigs.")
14442 (license license:gpl2+)))
14443
14444 (define-public python-velocyto
14445 (package
14446 (name "python-velocyto")
14447 (version "0.17.17")
14448 (source
14449 (origin
14450 (method url-fetch)
14451 (uri (pypi-uri "velocyto" version))
14452 (sha256
14453 (base32
14454 "0fgygyzqgrq32dv6a00biq1p1cwi6kbl5iqblxq1kklj6b2mzmhs"))))
14455 (build-system python-build-system)
14456 (propagated-inputs
14457 `(("python-click" ,python-click)
14458 ("python-cython" ,python-cython)
14459 ("python-h5py" ,python-h5py)
14460 ("python-loompy" ,python-loompy)
14461 ("python-matplotlib" ,python-matplotlib)
14462 ("python-numba" ,python-numba)
14463 ("python-numpy" ,python-numpy)
14464 ("python-pandas" ,python-pandas)
14465 ("python-pysam" ,python-pysam)
14466 ("python-scikit-learn" ,python-scikit-learn)
14467 ("python-scipy" ,python-scipy)))
14468 (home-page "https://github.com/velocyto-team/velocyto.py")
14469 (synopsis "RNA velocity analysis for single cell RNA-seq data")
14470 (description
14471 "Velocyto is a library for the analysis of RNA velocity. Velocyto
14472 includes a command line tool and an analysis pipeline.")
14473 (license license:bsd-2)))
14474
14475 (define-public arriba
14476 (package
14477 (name "arriba")
14478 (version "1.0.1")
14479 (source
14480 (origin
14481 (method url-fetch)
14482 (uri (string-append "https://github.com/suhrig/arriba/releases/"
14483 "download/v" version "/arriba_v" version ".tar.gz"))
14484 (sha256
14485 (base32
14486 "0jx9656ry766vb8z08m1c3im87b0c82qpnjby9wz4kcz8vn87dx2"))))
14487 (build-system gnu-build-system)
14488 (arguments
14489 `(#:tests? #f ; there are none
14490 #:phases
14491 (modify-phases %standard-phases
14492 (replace 'configure
14493 (lambda* (#:key inputs #:allow-other-keys)
14494 (let ((htslib (assoc-ref inputs "htslib")))
14495 (substitute* "Makefile"
14496 (("-I\\$\\(HTSLIB\\)/htslib")
14497 (string-append "-I" htslib "/include/htslib"))
14498 ((" \\$\\(HTSLIB\\)/libhts.a")
14499 (string-append " " htslib "/lib/libhts.so"))))
14500 (substitute* "run_arriba.sh"
14501 (("^STAR ") (string-append (which "STAR") " "))
14502 (("samtools --version-only")
14503 (string-append (which "samtools") " --version-only"))
14504 (("samtools index")
14505 (string-append (which "samtools") " index"))
14506 (("samtools sort")
14507 (string-append (which "samtools") " sort")))
14508 #t))
14509 (replace 'install
14510 (lambda* (#:key outputs #:allow-other-keys)
14511 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
14512 (install-file "arriba" bin)
14513 (install-file "run_arriba.sh" bin)
14514 (install-file "draw_fusions.R" bin)
14515 (wrap-program (string-append bin "/draw_fusions.R")
14516 `("R_LIBS_SITE" ":" prefix (,(getenv "R_LIBS_SITE")))))
14517 #t)))))
14518 (inputs
14519 `(("htslib" ,htslib)
14520 ("r-minimal" ,r-minimal)
14521 ("r-circlize" ,r-circlize)
14522 ("r-genomicalignments" ,r-genomicalignments)
14523 ("r-genomicranges" ,r-genomicranges)
14524 ("samtools" ,samtools)
14525 ("star" ,star)
14526 ("zlib" ,zlib)))
14527 (home-page "https://github.com/suhrig/arriba")
14528 (synopsis "Gene fusion detection from RNA-Seq data ")
14529 (description
14530 "Arriba is a command-line tool for the detection of gene fusions from
14531 RNA-Seq data. It was developed for the use in a clinical research setting.
14532 Therefore, short runtimes and high sensitivity were important design criteria.
14533 It is based on the fast STAR aligner and the post-alignment runtime is
14534 typically just around two minutes. In contrast to many other fusion detection
14535 tools which build on STAR, Arriba does not require to reduce the
14536 @code{alignIntronMax} parameter of STAR to detect small deletions.")
14537 ;; All code is under the Expat license with the exception of
14538 ;; "draw_fusions.R", which is under GPLv3.
14539 (license (list license:expat license:gpl3))))
14540
14541 (define-public adapterremoval
14542 (package
14543 (name "adapterremoval")
14544 (version "2.3.0")
14545 (source
14546 (origin
14547 (method git-fetch)
14548 (uri (git-reference
14549 (url "https://github.com/MikkelSchubert/adapterremoval.git")
14550 (commit (string-append "v" version))))
14551 (file-name (git-file-name name version))
14552 (sha256
14553 (base32
14554 "1nf3ki5pfzalhrx2fr1y6pfqfi133yj2m7q4fj9irf5fb94bapwr"))))
14555 (build-system gnu-build-system)
14556 (arguments
14557 `(#:make-flags (list "COLOR_BUILD=no"
14558 (string-append "PREFIX="
14559 (assoc-ref %outputs "out")))
14560 #:test-target "test"
14561 #:phases
14562 (modify-phases %standard-phases
14563 (delete 'configure))))
14564 (inputs
14565 `(("zlib" ,zlib)))
14566 (home-page "https://adapterremoval.readthedocs.io/")
14567 (synopsis "Rapid sequence adapter trimming, identification, and read merging")
14568 (description
14569 "This program searches for and removes remnant adapter sequences from
14570 @dfn{High-Throughput Sequencing} (HTS) data and (optionally) trims low quality
14571 bases from the 3' end of reads following adapter removal. AdapterRemoval can
14572 analyze both single end and paired end data, and can be used to merge
14573 overlapping paired-ended reads into (longer) consensus sequences.
14574 Additionally, the AdapterRemoval may be used to recover a consensus adapter
14575 sequence for paired-ended data, for which this information is not available.")
14576 (license license:gpl3+)))
14577
14578 (define-public pplacer
14579 (let ((commit "807f6f3"))
14580 (package
14581 (name "pplacer")
14582 ;; The commit should be updated with each version change.
14583 (version "1.1.alpha19")
14584 (source
14585 (origin
14586 (method git-fetch)
14587 (uri (git-reference
14588 (url "https://github.com/matsen/pplacer.git")
14589 (commit (string-append "v" version))))
14590 (file-name (git-file-name name version))
14591 (sha256
14592 (base32 "11ppbbbx20p2g9wj3ff64dhnarb12q79v7qh4rk0gj6lkbz4n7cn"))))
14593 (build-system ocaml-build-system)
14594 (arguments
14595 `(#:modules ((guix build ocaml-build-system)
14596 (guix build utils)
14597 (ice-9 ftw))
14598 #:phases
14599 (modify-phases %standard-phases
14600 (delete 'configure)
14601 (add-after 'unpack 'fix-build-with-latest-ocaml
14602 (lambda _
14603 (substitute* "myocamlbuild.ml"
14604 (("dep \\[\"c_pam\"\\]" m)
14605 (string-append "flag [\"ocaml\"; \"compile\"] (A \"-unsafe-string\");\n"
14606 m))
14607 (("let run_and_read" m)
14608 (string-append "
14609 let split s ch =
14610 let x = ref [] in
14611 let rec go s =
14612 let pos = String.index s ch in
14613 x := (String.before s pos)::!x;
14614 go (String.after s (pos + 1))
14615 in
14616 try go s
14617 with Not_found -> !x
14618 let split_nl s = split s '\\n'
14619 let before_space s =
14620 try String.before s (String.index s ' ')
14621 with Not_found -> s
14622
14623 " m))
14624 (("run_and_read \"ocamlfind list \\| cut -d' ' -f1\"" m)
14625 (string-append "List.map before_space (split_nl & " m ")"))
14626 ((" blank_sep_strings &") "")
14627 ((" Lexing.from_string &") ""))
14628 #t))
14629 (add-after 'unpack 'replace-bundled-cddlib
14630 (lambda* (#:key inputs #:allow-other-keys)
14631 (let* ((cddlib-src (assoc-ref inputs "cddlib-src"))
14632 (local-dir "cddlib_guix"))
14633 (mkdir local-dir)
14634 (with-directory-excursion local-dir
14635 (invoke "tar" "xvf" cddlib-src))
14636 (let ((cddlib-src-folder
14637 (string-append local-dir "/"
14638 (list-ref (scandir local-dir) 2)
14639 "/lib-src")))
14640 (for-each make-file-writable (find-files "cdd_src" ".*"))
14641 (for-each
14642 (lambda (file)
14643 (copy-file file
14644 (string-append "cdd_src/" (basename file))))
14645 (find-files cddlib-src-folder ".*[ch]$")))
14646 #t)))
14647 (add-after 'unpack 'fix-makefile
14648 (lambda _
14649 ;; Remove system calls to 'git'.
14650 (substitute* "Makefile"
14651 (("^DESCRIPT:=pplacer-.*")
14652 (string-append
14653 "DESCRIPT:=pplacer-$(shell uname)-v" ,version "\n")))
14654 (substitute* "myocamlbuild.ml"
14655 (("git describe --tags --long .*\\\" with")
14656 (string-append
14657 "echo -n v" ,version "-" ,commit "\" with")))
14658 #t))
14659 (replace 'install
14660 (lambda* (#:key outputs #:allow-other-keys)
14661 (let* ((out (assoc-ref outputs "out"))
14662 (bin (string-append out "/bin")))
14663 (copy-recursively "bin" bin))
14664 #t)))
14665 #:ocaml ,ocaml-4.07
14666 #:findlib ,ocaml4.07-findlib))
14667 (inputs
14668 `(("zlib" ,zlib "static")
14669 ("gsl" ,gsl)
14670 ("ocaml-ounit" ,(package-with-ocaml4.07 ocaml-ounit))
14671 ("ocaml-batteries" ,(package-with-ocaml4.07 ocaml-batteries))
14672 ("ocaml-camlzip" ,(package-with-ocaml4.07 camlzip))
14673 ("ocaml-csv" ,(package-with-ocaml4.07 ocaml-csv))
14674 ("ocaml-sqlite3" ,ocaml4.07-sqlite3)
14675 ("ocaml-xmlm" ,(package-with-ocaml4.07 ocaml-xmlm))
14676 ("ocaml-mcl" ,(package-with-ocaml4.07 ocaml-mcl))
14677 ("ocaml-gsl" ,ocaml4.07-gsl-1)))
14678 (native-inputs
14679 `(("cddlib-src" ,(package-source cddlib))
14680 ("ocamlbuild" ,(package-with-ocaml4.07 ocamlbuild))
14681 ("pkg-config" ,pkg-config)))
14682 (propagated-inputs
14683 `(("pplacer-scripts" ,pplacer-scripts)))
14684 (synopsis "Phylogenetic placement of biological sequences")
14685 (description
14686 "Pplacer places query sequences on a fixed reference phylogenetic tree
14687 to maximize phylogenetic likelihood or posterior probability according to a
14688 reference alignment. Pplacer is designed to be fast, to give useful
14689 information about uncertainty, and to offer advanced visualization and
14690 downstream analysis.")
14691 (home-page "https://matsen.fhcrc.org/pplacer/")
14692 (license license:gpl3))))
14693
14694 ;; This package is installed alongside 'pplacer'. It is a separate package so
14695 ;; that it can use the python-build-system for the scripts that are
14696 ;; distributed alongside the main OCaml binaries.
14697 (define pplacer-scripts
14698 (package
14699 (inherit pplacer)
14700 (name "pplacer-scripts")
14701 (build-system python-build-system)
14702 (arguments
14703 `(#:python ,python-2
14704 #:phases
14705 (modify-phases %standard-phases
14706 (add-after 'unpack 'enter-scripts-dir
14707 (lambda _ (chdir "scripts") #t))
14708 (replace 'check
14709 (lambda _ (invoke "python" "-m" "unittest" "discover" "-v") #t))
14710 (add-after 'install 'wrap-executables
14711 (lambda* (#:key inputs outputs #:allow-other-keys)
14712 (let* ((out (assoc-ref outputs "out"))
14713 (bin (string-append out "/bin")))
14714 (let ((path (string-append
14715 (assoc-ref inputs "hmmer") "/bin:"
14716 (assoc-ref inputs "infernal") "/bin")))
14717 (display path)
14718 (wrap-program (string-append bin "/refpkg_align.py")
14719 `("PATH" ":" prefix (,path))))
14720 (let ((path (string-append
14721 (assoc-ref inputs "hmmer") "/bin")))
14722 (wrap-program (string-append bin "/hrefpkg_query.py")
14723 `("PATH" ":" prefix (,path)))))
14724 #t)))))
14725 (inputs
14726 `(("infernal" ,infernal)
14727 ("hmmer" ,hmmer)))
14728 (propagated-inputs
14729 `(("python-biopython" ,python2-biopython)
14730 ("taxtastic" ,taxtastic)))
14731 (synopsis "Pplacer Python scripts")))
14732
14733 (define-public python2-checkm-genome
14734 (package
14735 (name "python2-checkm-genome")
14736 (version "1.0.13")
14737 (source
14738 (origin
14739 (method url-fetch)
14740 (uri (pypi-uri "checkm-genome" version))
14741 (sha256
14742 (base32
14743 "0bm8gpxjmzxsxxl8lzwqhgx8g1dlnmp6znz7wv3hgb0gdjbf9dzz"))))
14744 (build-system python-build-system)
14745 (arguments
14746 `(#:python ,python-2
14747 #:tests? #f)) ; some tests are interactive
14748 (propagated-inputs
14749 `(("python-dendropy" ,python2-dendropy)
14750 ("python-matplotlib" ,python2-matplotlib)
14751 ("python-numpy" ,python2-numpy)
14752 ("python-pysam" ,python2-pysam)
14753 ("python-scipy" ,python2-scipy)))
14754 (home-page "https://pypi.org/project/Checkm/")
14755 (synopsis "Assess the quality of putative genome bins")
14756 (description
14757 "CheckM provides a set of tools for assessing the quality of genomes
14758 recovered from isolates, single cells, or metagenomes. It provides robust
14759 estimates of genome completeness and contamination by using collocated sets of
14760 genes that are ubiquitous and single-copy within a phylogenetic lineage.
14761 Assessment of genome quality can also be examined using plots depicting key
14762 genomic characteristics (e.g., GC, coding density) which highlight sequences
14763 outside the expected distributions of a typical genome. CheckM also provides
14764 tools for identifying genome bins that are likely candidates for merging based
14765 on marker set compatibility, similarity in genomic characteristics, and
14766 proximity within a reference genome.")
14767 (license license:gpl3+)))
14768
14769 (define-public umi-tools
14770 (package
14771 (name "umi-tools")
14772 (version "1.0.0")
14773 (source
14774 (origin
14775 (method url-fetch)
14776 (uri (pypi-uri "umi_tools" version))
14777 (sha256
14778 (base32
14779 "08y3vz1vcx09whmbsn722lcs6jl9wyrh9i4p3k8j4cb1i32bij4a"))))
14780 (build-system python-build-system)
14781 (inputs
14782 `(("python-pandas" ,python-pandas)
14783 ("python-future" ,python-future)
14784 ("python-scipy" ,python-scipy)
14785 ("python-matplotlib" ,python-matplotlib)
14786 ("python-regex" ,python-regex)
14787 ("python-pysam" ,python-pysam)))
14788 (native-inputs
14789 `(("python-cython" ,python-cython)))
14790 (home-page "https://github.com/CGATOxford/UMI-tools")
14791 (synopsis "Tools for analyzing unique modular identifiers")
14792 (description "This package provides tools for dealing with @dfn{Unique
14793 Molecular Identifiers} (UMIs) and @dfn{Random Molecular Tags} (RMTs) in
14794 genetic sequences. There are six tools: the @code{extract} and
14795 @code{whitelist} commands are used to prepare a fastq containing UMIs @code{+/-}
14796 cell barcodes for alignment. The remaining commands, @code{group},
14797 @code{dedup}, and @{count}/@code{count_tab}, are used to identify PCR
14798 duplicates using the UMIs and perform different levels of analysis depending
14799 on the needs of the user.")
14800 (license license:expat)))
14801
14802 (define-public ataqv
14803 (package
14804 (name "ataqv")
14805 (version "1.0.0")
14806 (source
14807 (origin
14808 (method git-fetch)
14809 (uri (git-reference
14810 (url "https://github.com/ParkerLab/ataqv.git")
14811 (commit version)))
14812 (file-name (git-file-name name version))
14813 (sha256
14814 (base32
14815 "031xr6jx1aprh26y5b1lv3gzrlmzg4alfl73vvshymx8cq8asrqi"))))
14816 (build-system gnu-build-system)
14817 (arguments
14818 `(#:make-flags
14819 (list (string-append "prefix=" (assoc-ref %outputs "out"))
14820 (string-append "BOOST_ROOT="
14821 (assoc-ref %build-inputs "boost"))
14822 (string-append "HTSLIB_ROOT="
14823 (assoc-ref %build-inputs "htslib")))
14824 #:test-target "test"
14825 #:phases
14826 (modify-phases %standard-phases
14827 (delete 'configure))))
14828 (inputs
14829 `(("boost" ,boost)
14830 ("htslib" ,htslib)
14831 ("ncurses" ,ncurses)
14832 ("zlib" ,zlib)))
14833 (native-inputs
14834 `(("lcov" ,lcov)))
14835 (home-page "https://github.com/ParkerLab/ataqv")
14836 (synopsis "Toolkit for quality control and visualization of ATAC-seq data")
14837 (description "This package provides a toolkit for measuring and comparing
14838 ATAC-seq results. It was written to make it easier to spot differences that
14839 might be caused by ATAC-seq library prep or sequencing. The main program,
14840 @code{ataqv}, examines aligned reads and reports some basic metrics.")
14841 (license license:gpl3+)))
14842
14843 (define-public r-psiplot
14844 (package
14845 (name "r-psiplot")
14846 (version "2.3.0")
14847 (source
14848 (origin
14849 (method git-fetch)
14850 (uri (git-reference
14851 (url "https://github.com/kcha/psiplot.git")
14852 (commit (string-append "v" version))))
14853 (file-name (git-file-name name version))
14854 (sha256
14855 (base32 "08438h16cfry5kqh3y9hs8q1b1a8bxhblsm75knviz5r6q0n1jxh"))))
14856 (build-system r-build-system)
14857 (propagated-inputs
14858 `(("r-mass" ,r-mass)
14859 ("r-dplyr" ,r-dplyr)
14860 ("r-tidyr" ,r-tidyr)
14861 ("r-purrr" ,r-purrr)
14862 ("r-readr" ,r-readr)
14863 ("r-magrittr" ,r-magrittr)
14864 ("r-ggplot2" ,r-ggplot2)))
14865 (home-page "https://github.com/kcha/psiplot")
14866 (synopsis "Plot percent spliced-in values of alternatively-spliced exons")
14867 (description
14868 "PSIplot is an R package for generating plots of @dfn{percent
14869 spliced-in} (PSI) values of alternatively-spliced exons that were computed by
14870 vast-tools, an RNA-Seq pipeline for alternative splicing analysis. The plots
14871 are generated using @code{ggplot2}.")
14872 (license license:expat)))
14873
14874 (define-public python-ont-fast5-api
14875 (package
14876 (name "python-ont-fast5-api")
14877 (version "1.4.4")
14878 (source
14879 (origin
14880 (method git-fetch)
14881 (uri (git-reference
14882 (url "https://github.com/nanoporetech/ont_fast5_api.git")
14883 (commit (string-append "release_" version))))
14884 (file-name (git-file-name name version))
14885 (sha256
14886 (base32
14887 "03cbq4zbbwhll8ml2m9k8sa31mirsvcbjkrq1yna0kkzz9fad5fm"))))
14888 (build-system python-build-system)
14889 (propagated-inputs
14890 `(("python-numpy" ,python-numpy)
14891 ("python-six" ,python-six)
14892 ("python-h5py" ,python-h5py)
14893 ("python-progressbar33" ,python-progressbar33)))
14894 (home-page "https://github.com/nanoporetech/ont_fast5_api")
14895 (synopsis "Interface to HDF5 files of the Oxford Nanopore fast5 file format")
14896 (description
14897 "This package provides a concrete implementation of the fast5 file schema
14898 using the generic @code{h5py} library, plain-named methods to interact with
14899 and reflect the fast5 file schema, and tools to convert between
14900 @code{multi_read} and @code{single_read} formats.")
14901 (license license:mpl2.0)))
14902
14903 (define-public tbsp
14904 (let ((commit "ec8fff4410cfb13a677dbbb95cbbc60217e64907")
14905 (revision "1"))
14906 (package
14907 (name "tbsp")
14908 (version (git-version "1.0.0" revision commit))
14909 (source
14910 (origin
14911 (method git-fetch)
14912 (uri (git-reference
14913 (url "https://github.com/phoenixding/tbsp.git")
14914 (commit commit)))
14915 (file-name (git-file-name name version))
14916 (sha256
14917 (base32
14918 "025ym14x8gbd6hb55lsinqj6f5qzw36i10klgs7ldzxxd7s39ki1"))))
14919 (build-system python-build-system)
14920 (arguments '(#:tests? #f)) ; no tests included
14921 (inputs
14922 `(("python-matplotlib" ,python-matplotlib)
14923 ("python-networkx" ,python-networkx)
14924 ("python-numpy" ,python-numpy)
14925 ("python-pybigwig" ,python-pybigwig)
14926 ("python-biopython" ,python-biopython)
14927 ("python-scikit-learn" ,python-scikit-learn)
14928 ("python-scipy" ,python-scipy)))
14929 (home-page "https://github.com/phoenixding/tbsp/")
14930 (synopsis "SNP-based trajectory inference")
14931 (description
14932 "Several studies focus on the inference of developmental and response
14933 trajectories from single cell RNA-Seq (scRNA-Seq) data. A number of
14934 computational methods, often referred to as pseudo-time ordering, have been
14935 developed for this task. CRISPR has also been used to reconstruct lineage
14936 trees by inserting random mutations. The tbsp package implements an
14937 alternative method to detect significant, cell type specific sequence
14938 mutations from scRNA-Seq data.")
14939 (license license:expat))))
14940
14941 (define-public tabixpp
14942 (package
14943 (name "tabixpp")
14944 (version "1.1.0")
14945 (source (origin
14946 (method git-fetch)
14947 (uri (git-reference
14948 (url "https://github.com/ekg/tabixpp")
14949 (commit (string-append "v" version))))
14950 (file-name (git-file-name name version))
14951 (sha256
14952 (base32 "1k2a3vbq96ic4lw72iwp5s3mwwc4xhdffjj584yn6l9637q9j1yd"))
14953 (modules '((guix build utils)))
14954 (snippet
14955 `(begin
14956 (delete-file-recursively "htslib") #t))))
14957 (build-system gnu-build-system)
14958 (inputs
14959 `(("htslib" ,htslib)
14960 ("zlib" ,zlib)))
14961 (arguments
14962 `(#:tests? #f ; There are no tests to run.
14963 #:phases
14964 (modify-phases %standard-phases
14965 (delete 'configure) ; There is no configure phase.
14966 ;; The build phase needs overriding the location of htslib.
14967 (replace 'build
14968 (lambda* (#:key inputs #:allow-other-keys)
14969 (let ((htslib-ref (assoc-ref inputs "htslib")))
14970 (invoke "make"
14971 (string-append "HTS_LIB=" htslib-ref "/lib/libhts.a")
14972 (string-append "INCLUDES= -I" htslib-ref "/include/htslib")
14973 "HTS_HEADERS=" ; No need to check for headers here.
14974 (string-append "LIBPATH=-L. -L" htslib-ref "/include")))))
14975 (replace 'install
14976 (lambda* (#:key outputs #:allow-other-keys)
14977 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
14978 (install-file "tabix++" bin))
14979 #t)))))
14980 (home-page "https://github.com/ekg/tabixpp")
14981 (synopsis "C++ wrapper around tabix project")
14982 (description "This is a C++ wrapper around the Tabix project which abstracts
14983 some of the details of opening and jumping in tabix-indexed files.")
14984 (license license:expat)))
14985
14986 (define-public smithwaterman
14987 (let ((commit "2610e259611ae4cde8f03c72499d28f03f6d38a7"))
14988 (package
14989 (name "smithwaterman")
14990 (version (git-version "0.0.0" "2" commit))
14991 (source (origin
14992 (method git-fetch)
14993 (uri (git-reference
14994 (url "https://github.com/ekg/smithwaterman/")
14995 (commit commit)))
14996 (file-name (git-file-name name version))
14997 (sha256
14998 (base32 "0i9d8zrxpiracw3mxzd9siybpy62p06rqz9mc2w93arajgbk45bs"))))
14999 (build-system gnu-build-system)
15000 (arguments
15001 `(#:tests? #f ; There are no tests to run.
15002 #:make-flags '("libsw.a" "all")
15003 #:phases
15004 (modify-phases %standard-phases
15005 (delete 'configure) ; There is no configure phase.
15006 (replace 'install
15007 (lambda* (#:key outputs #:allow-other-keys)
15008 (let* ((out (assoc-ref outputs "out"))
15009 (bin (string-append out "/bin"))
15010 (lib (string-append out "/lib")))
15011 (install-file "smithwaterman" bin)
15012 (install-file "libsw.a" lib))
15013 #t)))))
15014 (home-page "https://github.com/ekg/smithwaterman")
15015 (synopsis "Implementation of the Smith-Waterman algorithm")
15016 (description "Implementation of the Smith-Waterman algorithm.")
15017 ;; The licensing terms are unclear: https://github.com/ekg/smithwaterman/issues/9.
15018 (license (list license:gpl2 license:expat)))))
15019
15020 (define-public multichoose
15021 (package
15022 (name "multichoose")
15023 (version "1.0.3")
15024 (source (origin
15025 (method git-fetch)
15026 (uri (git-reference
15027 (url "https://github.com/ekg/multichoose/")
15028 (commit (string-append "v" version))))
15029 (file-name (git-file-name name version))
15030 (sha256
15031 (base32 "0ci5fqvmpamwgxvmyd79ygj6n3bnbl3vc7b6h1sxz58186sm3pfs"))))
15032 (build-system gnu-build-system)
15033 (arguments
15034 `(#:tests? #f ; Tests require node.
15035 #:phases
15036 (modify-phases %standard-phases
15037 (delete 'configure) ; There is no configure phase.
15038 (replace 'install
15039 (lambda* (#:key outputs #:allow-other-keys)
15040 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
15041 ;; TODO: There are Python modules for these programs too.
15042 (install-file "multichoose" bin)
15043 (install-file "multipermute" bin))
15044 #t)))))
15045 (home-page "https://github.com/ekg/multichoose")
15046 (synopsis "Efficient loopless multiset combination generation algorithm")
15047 (description "This library implements an efficient loopless multiset
15048 combination generation algorithm which is (approximately) described in
15049 \"Loopless algorithms for generating permutations, combinations, and other
15050 combinatorial configurations.\", G. Ehrlich - Journal of the ACM (JACM),
15051 1973. (Algorithm 7.)")
15052 (license license:expat)))
15053
15054 (define-public fsom
15055 (let ((commit "a6ef318fbd347c53189384aef7f670c0e6ce89a3"))
15056 (package
15057 (name "fsom")
15058 (version (git-version "0.0.0" "1" commit))
15059 (source (origin
15060 (method git-fetch)
15061 (uri (git-reference
15062 (url "https://github.com/ekg/fsom/")
15063 (commit commit)))
15064 (file-name (git-file-name name version))
15065 (sha256
15066 (base32 "0gw1lpvr812pywg9y546x0h1hhj261xwls41r6kqhddjlrcjc0pi"))))
15067 (build-system gnu-build-system)
15068 (arguments
15069 `(#:tests? #f ; There are no tests to run.
15070 #:phases
15071 (modify-phases %standard-phases
15072 (delete 'configure) ; There is no configure phase.
15073 (replace 'install
15074 (lambda* (#:key outputs #:allow-other-keys)
15075 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
15076 (install-file "fsom" bin))
15077 #t)))))
15078 (home-page "https://github.com/ekg/fsom")
15079 (synopsis "Manage SOM (Self-Organizing Maps) neural networks")
15080 (description "A tiny C library for managing SOM (Self-Organizing Maps)
15081 neural networks.")
15082 (license license:gpl3))))
15083
15084 (define-public fastahack
15085 (package
15086 (name "fastahack")
15087 (version "1.0.0")
15088 (source (origin
15089 (method git-fetch)
15090 (uri (git-reference
15091 (url "https://github.com/ekg/fastahack/")
15092 (commit (string-append "v" version))))
15093 (file-name (git-file-name name version))
15094 (sha256
15095 (base32 "0rp1blskhzxf7vbh253ibpxbgl9wwgyzf1wbkxndi08d3j4vcss9"))))
15096 (build-system gnu-build-system)
15097 (arguments
15098 `(#:tests? #f ; Unclear how to run tests: https://github.com/ekg/fastahack/issues/15
15099 #:phases
15100 (modify-phases %standard-phases
15101 (delete 'configure) ; There is no configure phase.
15102 (replace 'install
15103 (lambda* (#:key outputs #:allow-other-keys)
15104 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
15105 (install-file "fastahack" bin))
15106 #t)))))
15107 (home-page "https://github.com/ekg/fastahack")
15108 (synopsis "Indexing and sequence extraction from FASTA files")
15109 (description "Fastahack is a small application for indexing and
15110 extracting sequences and subsequences from FASTA files. The included library
15111 provides a FASTA reader and indexer that can be embedded into applications
15112 which would benefit from directly reading subsequences from FASTA files. The
15113 library automatically handles index file generation and use.")
15114 (license (list license:expat license:gpl2))))
15115
15116 (define-public vcflib
15117 (package
15118 (name "vcflib")
15119 (version "1.0.1")
15120 (source
15121 (origin
15122 (method url-fetch)
15123 (uri (string-append "https://github.com/vcflib/vcflib/releases/"
15124 "download/v" version
15125 "/vcflib-" version "-src.tar.gz"))
15126 (sha256
15127 (base32 "14zzrg8hg8cq9cvq2wdvp21j7nmxxkjrbagw2apd2yqv2kyx42lm"))
15128 (modules '((guix build utils)))
15129 (snippet
15130 `(begin
15131 (for-each delete-file-recursively
15132 '("fastahack" "filevercmp" "fsom" "googletest" "intervaltree"
15133 "libVCFH" "multichoose" "smithwaterman" "tabixpp"))
15134 #t))))
15135 (build-system gnu-build-system)
15136 (inputs
15137 `(("htslib" ,htslib)
15138 ("perl" ,perl)
15139 ("python" ,python)
15140 ("zlib" ,zlib)))
15141 (native-inputs
15142 `(;; Submodules.
15143 ;; This package builds against the .o files so we need to extract the source.
15144 ("fastahack-src" ,(package-source fastahack))
15145 ("filevercmp-src" ,(package-source filevercmp))
15146 ("fsom-src" ,(package-source fsom))
15147 ("intervaltree-src" ,(package-source intervaltree))
15148 ("multichoose-src" ,(package-source multichoose))
15149 ("smithwaterman-src" ,(package-source smithwaterman))
15150 ("tabixpp-src" ,(package-source tabixpp))))
15151 (arguments
15152 `(#:tests? #f ; no tests
15153 #:make-flags (list (string-append "HTS_LIB="
15154 (assoc-ref %build-inputs "htslib")
15155 "/lib/libhts.a")
15156 (string-append "HTS_INCLUDES= -I"
15157 (assoc-ref %build-inputs "htslib")
15158 "/include/htslib")
15159 (string-append "HTS_LDFLAGS= -L"
15160 (assoc-ref %build-inputs "htslib")
15161 "/include/htslib" " -lhts"))
15162 #:phases
15163 (modify-phases %standard-phases
15164 (delete 'configure)
15165 (delete 'check)
15166 (add-after 'unpack 'unpack-submodule-sources
15167 (lambda* (#:key inputs #:allow-other-keys)
15168 (let ((unpack (lambda (source target)
15169 (mkdir target)
15170 (with-directory-excursion target
15171 (if (file-is-directory? (assoc-ref inputs source))
15172 (copy-recursively (assoc-ref inputs source) ".")
15173 (invoke "tar" "xvf"
15174 (assoc-ref inputs source)
15175 "--strip-components=1"))))))
15176 (and
15177 (unpack "fastahack-src" "fastahack")
15178 (unpack "filevercmp-src" "filevercmp")
15179 (unpack "fsom-src" "fsom")
15180 (unpack "intervaltree-src" "intervaltree")
15181 (unpack "multichoose-src" "multichoose")
15182 (unpack "smithwaterman-src" "smithwaterman")
15183 (unpack "tabixpp-src" "tabixpp")))))
15184 (replace 'build
15185 (lambda* (#:key inputs make-flags #:allow-other-keys)
15186 (let ((htslib (assoc-ref inputs "htslib")))
15187 (with-directory-excursion "tabixpp"
15188 (substitute* "Makefile"
15189 (("-Ihtslib") (string-append "-I" htslib "/include/htslib"))
15190 (("-Lhtslib") (string-append "-L" htslib "/lib/htslib"))
15191 (("htslib/htslib") (string-append htslib "/include/htslib")))
15192 (invoke "make"
15193 (string-append "HTS_LIB=" htslib "/lib/libhts.a")))
15194 (apply invoke "make" "CC=gcc" "CFLAGS=-Itabixpp" make-flags))))
15195 (replace 'install
15196 (lambda* (#:key outputs #:allow-other-keys)
15197 (let ((bin (string-append (assoc-ref outputs "out") "/bin"))
15198 (lib (string-append (assoc-ref outputs "out") "/lib")))
15199 (for-each (lambda (file)
15200 (install-file file bin))
15201 (find-files "bin" ".*"))
15202 ;; The header files in src/ do not interface libvcflib,
15203 ;; therefore they are left out.
15204 (install-file "libvcflib.a" lib))
15205 #t)))))
15206 (home-page "https://github.com/vcflib/vcflib/")
15207 (synopsis "Library for parsing and manipulating VCF files")
15208 (description "Vcflib provides methods to manipulate and interpret
15209 sequence variation as it can be described by VCF. It is both an API for parsing
15210 and operating on records of genomic variation as it can be described by the VCF
15211 format, and a collection of command-line utilities for executing complex
15212 manipulations on VCF files.")
15213 (license license:expat)))
15214
15215 (define-public freebayes
15216 (let ((commit "3ce827d8ebf89bb3bdc097ee0fe7f46f9f30d5fb")
15217 (revision "1")
15218 (version "1.0.2"))
15219 (package
15220 (name "freebayes")
15221 (version (git-version version revision commit))
15222 (source (origin
15223 (method git-fetch)
15224 (uri (git-reference
15225 (url "https://github.com/ekg/freebayes.git")
15226 (commit commit)))
15227 (file-name (git-file-name name version))
15228 (sha256
15229 (base32 "1sbzwmcbn78ybymjnhwk7qc5r912azy5vqz2y7y81616yc3ba2a2"))))
15230 (build-system gnu-build-system)
15231 (inputs
15232 `(("bamtools" ,bamtools)
15233 ("htslib" ,htslib)
15234 ("zlib" ,zlib)))
15235 (native-inputs
15236 `(("bc" ,bc) ; Needed for running tests.
15237 ("samtools" ,samtools) ; Needed for running tests.
15238 ("parallel" ,parallel) ; Needed for running tests.
15239 ("perl" ,perl) ; Needed for running tests.
15240 ("procps" ,procps) ; Needed for running tests.
15241 ("python" ,python-2) ; Needed for running tests.
15242 ("vcflib-src" ,(package-source vcflib))
15243 ;; These are submodules for the vcflib version used in freebayes.
15244 ;; This package builds against the .o files so we need to extract the source.
15245 ("tabixpp-src" ,(package-source tabixpp))
15246 ("smithwaterman-src" ,(package-source smithwaterman))
15247 ("multichoose-src" ,(package-source multichoose))
15248 ("fsom-src" ,(package-source fsom))
15249 ("filevercmp-src" ,(package-source filevercmp))
15250 ("fastahack-src" ,(package-source fastahack))
15251 ("intervaltree-src" ,(package-source intervaltree))
15252 ;; These submodules are needed to run the tests.
15253 ("bash-tap-src" ,(package-source bash-tap))
15254 ("test-simple-bash-src"
15255 ,(origin
15256 (method git-fetch)
15257 (uri (git-reference
15258 (url "https://github.com/ingydotnet/test-simple-bash/")
15259 (commit "124673ff204b01c8e96b7fc9f9b32ee35d898acc")))
15260 (file-name "test-simple-bash-src-checkout")
15261 (sha256
15262 (base32 "043plp6z0x9yf7mdpky1fw7zcpwn1p47px95w9mh16603zqqqpga"))))))
15263 (arguments
15264 `(#:make-flags
15265 (list "CC=gcc"
15266 (string-append "BAMTOOLS_ROOT="
15267 (assoc-ref %build-inputs "bamtools")))
15268 #:test-target "test"
15269 #:phases
15270 (modify-phases %standard-phases
15271 (delete 'configure)
15272 (add-after 'unpack 'fix-tests
15273 (lambda _
15274 (substitute* "test/t/01_call_variants.t"
15275 (("grep -P \"\\(\\\\t500\\$\\|\\\\t11000\\$\\|\\\\t1000\\$\\)\"")
15276 "grep -E ' (500|11000|1000)$'"))
15277 #t))
15278 (add-after 'unpack 'unpack-submodule-sources
15279 (lambda* (#:key inputs #:allow-other-keys)
15280 (let ((unpack (lambda (source target)
15281 (with-directory-excursion target
15282 (if (file-is-directory? (assoc-ref inputs source))
15283 (copy-recursively (assoc-ref inputs source) ".")
15284 (invoke "tar" "xvf"
15285 (assoc-ref inputs source)
15286 "--strip-components=1"))))))
15287 (and
15288 (unpack "vcflib-src" "vcflib")
15289 (unpack "fastahack-src" "vcflib/fastahack")
15290 (unpack "filevercmp-src" "vcflib/filevercmp")
15291 (unpack "fsom-src" "vcflib/fsom")
15292 (unpack "intervaltree-src" "vcflib/intervaltree")
15293 (unpack "multichoose-src" "vcflib/multichoose")
15294 (unpack "smithwaterman-src" "vcflib/smithwaterman")
15295 (unpack "tabixpp-src" "vcflib/tabixpp")
15296 (unpack "test-simple-bash-src" "test/test-simple-bash")
15297 (unpack "bash-tap-src" "test/bash-tap")))))
15298 (add-after 'unpack-submodule-sources 'fix-makefiles
15299 (lambda _
15300 ;; We don't have the .git folder to get the version tag from.
15301 (substitute* "vcflib/Makefile"
15302 (("^GIT_VERSION.*")
15303 (string-append "GIT_VERSION = v" ,version)))
15304 (substitute* "src/Makefile"
15305 (("-I\\$\\(BAMTOOLS_ROOT\\)/src")
15306 "-I$(BAMTOOLS_ROOT)/include/bamtools"))
15307 #t))
15308 (add-before 'build 'build-tabixpp-and-vcflib
15309 (lambda* (#:key inputs make-flags #:allow-other-keys)
15310 (with-directory-excursion "vcflib"
15311 (with-directory-excursion "tabixpp"
15312 (apply invoke "make"
15313 (string-append "HTS_LIB="
15314 (assoc-ref inputs "htslib")
15315 "/lib/libhts.a")
15316 make-flags))
15317 (apply invoke "make"
15318 (string-append "CFLAGS=-Itabixpp")
15319 "all"
15320 make-flags))))
15321 (replace 'install
15322 (lambda* (#:key outputs #:allow-other-keys)
15323 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
15324 (install-file "bin/freebayes" bin)
15325 (install-file "bin/bamleftalign" bin))
15326 #t)))))
15327 (home-page "https://github.com/ekg/freebayes")
15328 (synopsis "Haplotype-based variant detector")
15329 (description "FreeBayes is a Bayesian genetic variant detector designed to
15330 find small polymorphisms, specifically SNPs (single-nucleotide polymorphisms),
15331 indels (insertions and deletions), MNPs (multi-nucleotide polymorphisms), and
15332 complex events (composite insertion and substitution events) smaller than the
15333 length of a short-read sequencing alignment.")
15334 (license license:expat))))
15335
15336 (define-public samblaster
15337 (package
15338 (name "samblaster")
15339 (version "0.1.24")
15340 (source (origin
15341 (method git-fetch)
15342 (uri (git-reference
15343 (url "https://github.com/GregoryFaust/samblaster.git")
15344 (commit (string-append "v." version))))
15345 (file-name (git-file-name name version))
15346 (sha256
15347 (base32
15348 "0iv2ddfw8363vb2x8gr3p8g88whb6mb9m0pf71i2cqsbv6jghap7"))))
15349 (build-system gnu-build-system)
15350 (arguments
15351 `(#:tests? #f ; there are none
15352 #:phases
15353 (modify-phases %standard-phases
15354 (delete 'configure) ; There is no configure phase.
15355 (replace 'install
15356 (lambda* (#:key outputs #:allow-other-keys)
15357 (install-file "samblaster"
15358 (string-append (assoc-ref outputs "out") "/bin"))
15359 #t)))))
15360 (home-page "https://github.com/GregoryFaust/samblaster")
15361 (synopsis "Mark duplicates in paired-end SAM files")
15362 (description "Samblaster is a fast and flexible program for marking
15363 duplicates in read-id grouped paired-end SAM files. It can also optionally
15364 output discordant read pairs and/or split read mappings to separate SAM files,
15365 and/or unmapped/clipped reads to a separate FASTQ file. When marking
15366 duplicates, samblaster will require approximately 20MB of memory per 1M read
15367 pairs.")
15368 (license license:expat)))
15369
15370 (define-public r-velocyto
15371 (let ((commit "d7790346cb99f49ab9c2b23ba70dcf9d2c9fc350")
15372 (revision "1"))
15373 (package
15374 (name "r-velocyto")
15375 (version (git-version "0.6" revision commit))
15376 (source
15377 (origin
15378 (method git-fetch)
15379 (uri (git-reference
15380 (url "https://github.com/velocyto-team/velocyto.R.git")
15381 (commit commit)))
15382 (file-name (git-file-name name version))
15383 (sha256
15384 (base32
15385 "16wqf70j7rd7pay2q513iyz12i8n9vrpg1bisah4lddbcpx5dz1n"))))
15386 (build-system r-build-system)
15387 (inputs
15388 `(("boost" ,boost)))
15389 (propagated-inputs
15390 `(("r-hdf5r" ,r-hdf5r)
15391 ("r-mass" ,r-mass)
15392 ("r-mgcv" ,r-mgcv)
15393 ("r-pcamethods" ,r-pcamethods)
15394 ("r-rcpp" ,r-rcpp)
15395 ("r-rcpparmadillo" ,r-rcpparmadillo)
15396 ;; Suggested packages
15397 ("r-rtsne" ,r-rtsne)
15398 ("r-cluster" ,r-cluster)
15399 ("r-abind" ,r-abind)
15400 ("r-h5" ,r-h5)
15401 ("r-biocgenerics" ,r-biocgenerics)
15402 ("r-genomicalignments" ,r-genomicalignments)
15403 ("r-rsamtools" ,r-rsamtools)
15404 ("r-edger" ,r-edger)
15405 ("r-igraph" ,r-igraph)))
15406 (home-page "https://velocyto.org")
15407 (synopsis "RNA velocity estimation in R")
15408 (description
15409 "This package provides basic routines for estimation of gene-specific
15410 transcriptional derivatives and visualization of the resulting velocity
15411 patterns.")
15412 (license license:gpl3))))
15413
15414 (define-public methyldackel
15415 (package
15416 (name "methyldackel")
15417 (version "0.4.0")
15418 (source (origin
15419 (method git-fetch)
15420 (uri (git-reference
15421 (url "https://github.com/dpryan79/MethylDackel.git")
15422 (commit version)))
15423 (file-name (git-file-name name version))
15424 (sha256
15425 (base32
15426 "10gh8k0ca92kywnrw5pkacq3g6r8s976s12k8jhp8g3g49q9a97g"))))
15427 (build-system gnu-build-system)
15428 (arguments
15429 `(#:test-target "test"
15430 #:make-flags
15431 (list "CC=gcc"
15432 (string-append "prefix="
15433 (assoc-ref %outputs "out") "/bin/"))
15434 #:phases
15435 (modify-phases %standard-phases
15436 (replace 'configure
15437 (lambda* (#:key outputs #:allow-other-keys)
15438 (substitute* "Makefile"
15439 (("install MethylDackel \\$\\(prefix\\)" match)
15440 (string-append "install -d $(prefix); " match)))
15441 #t)))))
15442 (inputs
15443 `(("htslib" ,htslib)
15444 ("zlib" ,zlib)))
15445 ;; Needed for tests
15446 (native-inputs
15447 `(("python" ,python-wrapper)))
15448 (home-page "https://github.com/dpryan79/MethylDackel")
15449 (synopsis "Universal methylation extractor for BS-seq experiments")
15450 (description
15451 "MethylDackel will process a coordinate-sorted and indexed BAM or CRAM
15452 file containing some form of BS-seq alignments and extract per-base
15453 methylation metrics from them. MethylDackel requires an indexed fasta file
15454 containing the reference genome as well.")
15455 ;; See https://github.com/dpryan79/MethylDackel/issues/85
15456 (license license:expat)))
15457
15458 (define-public python-gffutils
15459 ;; The latest release is older more than a year than the latest commit
15460 (let ((commit "4034c54600813b1402945e12faa91b3a53162cf1")
15461 (revision "1"))
15462 (package
15463 (name "python-gffutils")
15464 (version (git-version "0.9" revision commit))
15465 (source
15466 (origin
15467 (method git-fetch)
15468 (uri (git-reference
15469 (url "https://github.com/daler/gffutils.git")
15470 (commit commit)))
15471 (file-name (git-file-name name version))
15472 (sha256
15473 (base32
15474 "1rwafjdnbir5wnk0ap06ww4lra3p5frhy7mfs03rlldgfnwxymsn"))))
15475 (build-system python-build-system)
15476 (arguments
15477 `(#:phases
15478 (modify-phases %standard-phases
15479 (replace 'check
15480 (lambda _
15481 ;; Tests need to access the HOME directory
15482 (setenv "HOME" "/tmp")
15483 (invoke "nosetests" "-a" "!slow")))
15484 (add-after 'unpack 'make-gz-files-writable
15485 (lambda _
15486 (for-each make-file-writable
15487 (find-files "." "\\.gz"))
15488 #t)))))
15489 (propagated-inputs
15490 `(("python-argcomplete" ,python-argcomplete)
15491 ("python-argh" ,python-argh)
15492 ("python-biopython" ,python-biopython)
15493 ("python-pybedtools" ,python-pybedtools)
15494 ("python-pyfaidx" ,python-pyfaidx)
15495 ("python-simplejson" ,python-simplejson)
15496 ("python-six" ,python-six)))
15497 (native-inputs
15498 `(("python-nose" , python-nose)))
15499 (home-page "https://github.com/daler/gffutils")
15500 (synopsis "Tool for manipulation of GFF and GTF files")
15501 (description
15502 "python-gffutils is a Python package for working with and manipulating
15503 the GFF and GTF format files typically used for genomic annotations. The
15504 files are loaded into a SQLite database, allowing much more complex
15505 manipulation of hierarchical features (e.g., genes, transcripts, and exons)
15506 than is possible with plain-text methods alone.")
15507 (license license:expat))))
15508
15509 (define-public libsbml
15510 (package
15511 (name "libsbml")
15512 (version "5.18.0")
15513 (source (origin
15514 (method url-fetch)
15515 (uri (string-append "mirror://sourceforge/sbml/libsbml/"
15516 version "/stable/libSBML-"
15517 version "-core-src.tar.gz"))
15518 (sha256
15519 (base32
15520 "0slkagrk3nfi2qsksv6b1brj6zhx4bj4bkib2sdycvrcd10ql2lh"))))
15521 (build-system cmake-build-system)
15522 (arguments
15523 `(#:test-target "test"
15524 #:configure-flags
15525 (list "-DWITH_CHECK=ON"
15526 (string-append "-DLIBXML_LIBRARY="
15527 (assoc-ref %build-inputs "libxml2")
15528 "/lib/libxml2.so")
15529 (string-append "-DLIBXML_INCLUDE_DIR="
15530 (assoc-ref %build-inputs "libxml2")
15531 "/include/libxml2"))))
15532 (propagated-inputs
15533 `(("libxml2" ,libxml2)))
15534 (native-inputs
15535 `(("check" ,check)
15536 ("swig" ,swig)))
15537 (home-page "http://sbml.org/Software/libSBML")
15538 (synopsis "Process SBML files and data streams")
15539 (description "LibSBML is a library to help you read, write, manipulate,
15540 translate, and validate SBML files and data streams. The @dfn{Systems Biology
15541 Markup Language} (SBML) is an interchange format for computer models of
15542 biological processes. SBML is useful for models of metabolism, cell
15543 signaling, and more. It continues to be evolved and expanded by an
15544 international community.")
15545 (license license:lgpl2.1+)))