gnu: Move more packages from python to python-web.
[jackhill/guix/guix.git] / gnu / packages / bioinformatics.scm
1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2014, 2015, 2016, 2017 Ricardo Wurmus <rekado@elephly.net>
3 ;;; Copyright © 2015, 2016, 2017 Ben Woodcroft <donttrustben@gmail.com>
4 ;;; Copyright © 2015, 2016 Pjotr Prins <pjotr.guix@thebird.nl>
5 ;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
6 ;;; Copyright © 2016 Roel Janssen <roel@gnu.org>
7 ;;; Copyright © 2016, 2017 Efraim Flashner <efraim@flashner.co.il>
8 ;;; Copyright © 2016 Marius Bakke <mbakke@fastmail.com>
9 ;;; Copyright © 2016 Raoul Bonnal <ilpuccio.febo@gmail.com>
10 ;;; Copyright © 2017 Tobias Geerinckx-Rice <me@tobias.gr>
11 ;;;
12 ;;; This file is part of GNU Guix.
13 ;;;
14 ;;; GNU Guix is free software; you can redistribute it and/or modify it
15 ;;; under the terms of the GNU General Public License as published by
16 ;;; the Free Software Foundation; either version 3 of the License, or (at
17 ;;; your option) any later version.
18 ;;;
19 ;;; GNU Guix is distributed in the hope that it will be useful, but
20 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
21 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 ;;; GNU General Public License for more details.
23 ;;;
24 ;;; You should have received a copy of the GNU General Public License
25 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
26
27 (define-module (gnu packages bioinformatics)
28 #:use-module ((guix licenses) #:prefix license:)
29 #:use-module (guix packages)
30 #:use-module (guix utils)
31 #:use-module (guix download)
32 #:use-module (guix git-download)
33 #:use-module (guix hg-download)
34 #:use-module (guix build-system ant)
35 #:use-module (guix build-system gnu)
36 #:use-module (guix build-system cmake)
37 #:use-module (guix build-system ocaml)
38 #:use-module (guix build-system perl)
39 #:use-module (guix build-system python)
40 #:use-module (guix build-system r)
41 #:use-module (guix build-system ruby)
42 #:use-module (guix build-system trivial)
43 #:use-module (gnu packages)
44 #:use-module (gnu packages autotools)
45 #:use-module (gnu packages algebra)
46 #:use-module (gnu packages base)
47 #:use-module (gnu packages bash)
48 #:use-module (gnu packages bison)
49 #:use-module (gnu packages boost)
50 #:use-module (gnu packages compression)
51 #:use-module (gnu packages cpio)
52 #:use-module (gnu packages cran)
53 #:use-module (gnu packages curl)
54 #:use-module (gnu packages documentation)
55 #:use-module (gnu packages databases)
56 #:use-module (gnu packages datastructures)
57 #:use-module (gnu packages file)
58 #:use-module (gnu packages flex)
59 #:use-module (gnu packages gawk)
60 #:use-module (gnu packages gcc)
61 #:use-module (gnu packages gd)
62 #:use-module (gnu packages gtk)
63 #:use-module (gnu packages glib)
64 #:use-module (gnu packages graph)
65 #:use-module (gnu packages groff)
66 #:use-module (gnu packages guile)
67 #:use-module (gnu packages haskell)
68 #:use-module (gnu packages image)
69 #:use-module (gnu packages imagemagick)
70 #:use-module (gnu packages java)
71 #:use-module (gnu packages ldc)
72 #:use-module (gnu packages linux)
73 #:use-module (gnu packages logging)
74 #:use-module (gnu packages machine-learning)
75 #:use-module (gnu packages man)
76 #:use-module (gnu packages maths)
77 #:use-module (gnu packages mpi)
78 #:use-module (gnu packages ncurses)
79 #:use-module (gnu packages ocaml)
80 #:use-module (gnu packages pcre)
81 #:use-module (gnu packages parallel)
82 #:use-module (gnu packages pdf)
83 #:use-module (gnu packages perl)
84 #:use-module (gnu packages perl-check)
85 #:use-module (gnu packages pkg-config)
86 #:use-module (gnu packages popt)
87 #:use-module (gnu packages protobuf)
88 #:use-module (gnu packages python)
89 #:use-module (gnu packages python-web)
90 #:use-module (gnu packages readline)
91 #:use-module (gnu packages ruby)
92 #:use-module (gnu packages serialization)
93 #:use-module (gnu packages shells)
94 #:use-module (gnu packages statistics)
95 #:use-module (gnu packages swig)
96 #:use-module (gnu packages tbb)
97 #:use-module (gnu packages tex)
98 #:use-module (gnu packages texinfo)
99 #:use-module (gnu packages textutils)
100 #:use-module (gnu packages time)
101 #:use-module (gnu packages tls)
102 #:use-module (gnu packages vim)
103 #:use-module (gnu packages web)
104 #:use-module (gnu packages xml)
105 #:use-module (gnu packages xorg)
106 #:use-module (srfi srfi-1)
107 #:use-module (ice-9 match))
108
109 (define-public r-ape
110 (package
111 (name "r-ape")
112 (version "5.0")
113 (source
114 (origin
115 (method url-fetch)
116 (uri (cran-uri "ape" version))
117 (sha256
118 (base32
119 "0q59pmxawz498cb9mv5m49lhiwxib8ak94yyydz7qg8b6lpd4bn3"))))
120 (build-system r-build-system)
121 (propagated-inputs
122 `(("r-lattice" ,r-lattice)
123 ("r-nlme" ,r-nlme)
124 ("r-rcpp" ,r-rcpp)))
125 (home-page "http://ape-package.ird.fr/")
126 (synopsis "Analyses of phylogenetics and evolution")
127 (description
128 "This package provides functions for reading, writing, plotting, and
129 manipulating phylogenetic trees, analyses of comparative data in a
130 phylogenetic framework, ancestral character analyses, analyses of
131 diversification and macroevolution, computing distances from DNA sequences,
132 and several other tools.")
133 (license license:gpl2+)))
134
135 (define-public aragorn
136 (package
137 (name "aragorn")
138 (version "1.2.38")
139 (source (origin
140 (method url-fetch)
141 (uri (string-append
142 "http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
143 version ".tgz"))
144 (sha256
145 (base32
146 "09i1rg716smlbnixfm7q1ml2mfpaa2fpn3hwjg625ysmfwwy712b"))))
147 (build-system gnu-build-system)
148 (arguments
149 `(#:tests? #f ; there are no tests
150 #:phases
151 (modify-phases %standard-phases
152 (delete 'configure)
153 (replace 'build
154 (lambda _
155 (zero? (system* "gcc"
156 "-O3"
157 "-ffast-math"
158 "-finline-functions"
159 "-o"
160 "aragorn"
161 (string-append "aragorn" ,version ".c")))))
162 (replace 'install
163 (lambda* (#:key outputs #:allow-other-keys)
164 (let* ((out (assoc-ref outputs "out"))
165 (bin (string-append out "/bin"))
166 (man (string-append out "/share/man/man1")))
167 (mkdir-p bin)
168 (install-file "aragorn" bin)
169 (mkdir-p man)
170 (install-file "aragorn.1" man))
171 #t)))))
172 (home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
173 (synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
174 (description
175 "Aragorn identifies transfer RNA, mitochondrial RNA and
176 transfer-messenger RNA from nucleotide sequences, based on homology to known
177 tRNA consensus sequences and RNA structure. It also outputs the secondary
178 structure of the predicted RNA.")
179 (license license:gpl2)))
180
181 (define-public bamm
182 (package
183 (name "bamm")
184 (version "1.7.3")
185 (source (origin
186 (method url-fetch)
187 ;; BamM is not available on pypi.
188 (uri (string-append
189 "https://github.com/Ecogenomics/BamM/archive/"
190 version ".tar.gz"))
191 (file-name (string-append name "-" version ".tar.gz"))
192 (sha256
193 (base32
194 "1f35yxp4pc8aadsvbpg6r4kg2jh4fkjci0iby4iyljm6980sac0s"))
195 (modules '((guix build utils)))
196 (snippet
197 `(begin
198 ;; Delete bundled htslib.
199 (delete-file-recursively "c/htslib-1.3.1")
200 #t))))
201 (build-system python-build-system)
202 (arguments
203 `(#:python ,python-2 ; BamM is Python 2 only.
204 ;; Do not use bundled libhts. Do use the bundled libcfu because it has
205 ;; been modified from its original form.
206 #:configure-flags
207 (let ((htslib (assoc-ref %build-inputs "htslib")))
208 (list "--with-libhts-lib" (string-append htslib "/lib")
209 "--with-libhts-inc" (string-append htslib "/include/htslib")))
210 #:phases
211 (modify-phases %standard-phases
212 (add-after 'unpack 'autogen
213 (lambda _
214 (with-directory-excursion "c"
215 (let ((sh (which "sh")))
216 ;; Use autogen so that 'configure' works.
217 (substitute* "autogen.sh" (("/bin/sh") sh))
218 (setenv "CONFIG_SHELL" sh)
219 (substitute* "configure" (("/bin/sh") sh))
220 (zero? (system* "./autogen.sh"))))))
221 (delete 'build)
222 ;; Run tests after installation so compilation only happens once.
223 (delete 'check)
224 (add-after 'install 'wrap-executable
225 (lambda* (#:key outputs #:allow-other-keys)
226 (let* ((out (assoc-ref outputs "out"))
227 (path (getenv "PATH")))
228 (wrap-program (string-append out "/bin/bamm")
229 `("PATH" ":" prefix (,path))))
230 #t))
231 (add-after 'wrap-executable 'post-install-check
232 (lambda* (#:key inputs outputs #:allow-other-keys)
233 (setenv "PATH"
234 (string-append (assoc-ref outputs "out")
235 "/bin:"
236 (getenv "PATH")))
237 (setenv "PYTHONPATH"
238 (string-append
239 (assoc-ref outputs "out")
240 "/lib/python"
241 (string-take (string-take-right
242 (assoc-ref inputs "python") 5) 3)
243 "/site-packages:"
244 (getenv "PYTHONPATH")))
245 ;; There are 2 errors printed, but they are safe to ignore:
246 ;; 1) [E::hts_open_format] fail to open file ...
247 ;; 2) samtools view: failed to open ...
248 (zero? (system* "nosetests")))))))
249 (native-inputs
250 `(("autoconf" ,autoconf)
251 ("automake" ,automake)
252 ("libtool" ,libtool)
253 ("zlib" ,zlib)
254 ("python-nose" ,python2-nose)
255 ("python-pysam" ,python2-pysam)))
256 (inputs
257 `(("htslib" ,htslib-1.3) ; At least one test fails on htslib-1.4+.
258 ("samtools" ,samtools)
259 ("bwa" ,bwa)
260 ("grep" ,grep)
261 ("sed" ,sed)
262 ("coreutils" ,coreutils)))
263 (propagated-inputs
264 `(("python-numpy" ,python2-numpy)))
265 (home-page "http://ecogenomics.github.io/BamM/")
266 (synopsis "Metagenomics-focused BAM file manipulator")
267 (description
268 "BamM is a C library, wrapped in python, to efficiently generate and
269 parse BAM files, specifically for the analysis of metagenomic data. For
270 instance, it implements several methods to assess contig-wise read coverage.")
271 (license license:lgpl3+)))
272
273 (define-public bamtools
274 (package
275 (name "bamtools")
276 (version "2.4.1")
277 (source (origin
278 (method url-fetch)
279 (uri (string-append
280 "https://github.com/pezmaster31/bamtools/archive/v"
281 version ".tar.gz"))
282 (file-name (string-append name "-" version ".tar.gz"))
283 (sha256
284 (base32
285 "0jr024kcrhjb82cm69i7p5fcg5375zlc1h3qh2n1v368hcd0qflk"))))
286 (build-system cmake-build-system)
287 (arguments
288 `(#:tests? #f ;no "check" target
289 #:phases
290 (modify-phases %standard-phases
291 (add-before
292 'configure 'set-ldflags
293 (lambda* (#:key outputs #:allow-other-keys)
294 (setenv "LDFLAGS"
295 (string-append
296 "-Wl,-rpath="
297 (assoc-ref outputs "out") "/lib/bamtools")))))))
298 (inputs `(("zlib" ,zlib)))
299 (home-page "https://github.com/pezmaster31/bamtools")
300 (synopsis "C++ API and command-line toolkit for working with BAM data")
301 (description
302 "BamTools provides both a C++ API and a command-line toolkit for handling
303 BAM files.")
304 (license license:expat)))
305
306 (define-public bcftools
307 (package
308 (name "bcftools")
309 (version "1.5")
310 (source (origin
311 (method url-fetch)
312 (uri (string-append
313 "https://github.com/samtools/bcftools/releases/download/"
314 version "/bcftools-" version ".tar.bz2"))
315 (sha256
316 (base32
317 "0093hkkvxmbwfaa7905s6185jymynvg42kq6sxv7fili11l5mxwz"))
318 (patches (search-patches "bcftools-regidx-unsigned-char.patch"))
319 (modules '((guix build utils)))
320 (snippet
321 ;; Delete bundled htslib.
322 '(delete-file-recursively "htslib-1.5"))))
323 (build-system gnu-build-system)
324 (arguments
325 `(#:test-target "test"
326 #:configure-flags (list "--with-htslib=system")
327 #:make-flags
328 (list
329 "USE_GPL=1"
330 "LIBS=-lgsl -lgslcblas"
331 (string-append "prefix=" (assoc-ref %outputs "out"))
332 (string-append "HTSDIR=" (assoc-ref %build-inputs "htslib") "/include")
333 (string-append "HTSLIB=" (assoc-ref %build-inputs "htslib") "/lib/libhts.so")
334 (string-append "BGZIP=" (assoc-ref %build-inputs "htslib") "/bin/bgzip")
335 (string-append "TABIX=" (assoc-ref %build-inputs "htslib") "/bin/tabix")
336 (string-append "PACKAGE_VERSION=" ,version))
337 #:phases
338 (modify-phases %standard-phases
339 (add-before 'check 'patch-tests
340 (lambda _
341 (substitute* "test/test.pl"
342 (("/bin/bash") (which "bash")))
343 #t)))))
344 (native-inputs
345 `(("htslib" ,htslib)
346 ("perl" ,perl)))
347 (inputs
348 `(("gsl" ,gsl)
349 ("zlib" ,zlib)))
350 (home-page "https://samtools.github.io/bcftools/")
351 (synopsis "Utilities for variant calling and manipulating VCFs and BCFs")
352 (description
353 "BCFtools is a set of utilities that manipulate variant calls in the
354 Variant Call Format (VCF) and its binary counterpart BCF. All commands work
355 transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed.")
356 ;; The sources are dual MIT/GPL, but becomes GPL-only when USE_GPL=1.
357 (license (list license:gpl3+ license:expat))))
358
359 (define-public bedops
360 (package
361 (name "bedops")
362 (version "2.4.14")
363 (source (origin
364 (method url-fetch)
365 (uri (string-append "https://github.com/bedops/bedops/archive/v"
366 version ".tar.gz"))
367 (file-name (string-append name "-" version ".tar.gz"))
368 (sha256
369 (base32
370 "1kqbac547wyqma81cyky9n7mkgikjpsfd3nnmcm6hpqwanqgh10v"))))
371 (build-system gnu-build-system)
372 (arguments
373 '(#:tests? #f
374 #:make-flags (list (string-append "BINDIR=" %output "/bin"))
375 #:phases
376 (modify-phases %standard-phases
377 (add-after 'unpack 'unpack-tarballs
378 (lambda _
379 ;; FIXME: Bedops includes tarballs of minimally patched upstream
380 ;; libraries jansson, zlib, and bzip2. We cannot just use stock
381 ;; libraries because at least one of the libraries (zlib) is
382 ;; patched to add a C++ function definition (deflateInit2cpp).
383 ;; Until the Bedops developers offer a way to link against system
384 ;; libraries we have to build the in-tree copies of these three
385 ;; libraries.
386
387 ;; See upstream discussion:
388 ;; https://github.com/bedops/bedops/issues/124
389
390 ;; Unpack the tarballs to benefit from shebang patching.
391 (with-directory-excursion "third-party"
392 (and (zero? (system* "tar" "xvf" "jansson-2.6.tar.bz2"))
393 (zero? (system* "tar" "xvf" "zlib-1.2.7.tar.bz2"))
394 (zero? (system* "tar" "xvf" "bzip2-1.0.6.tar.bz2"))))
395 ;; Disable unpacking of tarballs in Makefile.
396 (substitute* "system.mk/Makefile.linux"
397 (("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
398 (("\\./configure") "CONFIG_SHELL=bash ./configure"))
399 (substitute* "third-party/zlib-1.2.7/Makefile.in"
400 (("^SHELL=.*$") "SHELL=bash\n"))
401 #t))
402 (delete 'configure))))
403 (home-page "https://github.com/bedops/bedops")
404 (synopsis "Tools for high-performance genomic feature operations")
405 (description
406 "BEDOPS is a suite of tools to address common questions raised in genomic
407 studies---mostly with regard to overlap and proximity relationships between
408 data sets. It aims to be scalable and flexible, facilitating the efficient
409 and accurate analysis and management of large-scale genomic data.
410
411 BEDOPS provides tools that perform highly efficient and scalable Boolean and
412 other set operations, statistical calculations, archiving, conversion and
413 other management of genomic data of arbitrary scale. Tasks can be easily
414 split by chromosome for distributing whole-genome analyses across a
415 computational cluster.")
416 (license license:gpl2+)))
417
418 (define-public bedtools
419 (package
420 (name "bedtools")
421 (version "2.26.0")
422 (source (origin
423 (method url-fetch)
424 (uri (string-append "https://github.com/arq5x/bedtools2/archive/v"
425 version ".tar.gz"))
426 (file-name (string-append name "-" version ".tar.gz"))
427 (sha256
428 (base32
429 "0xvri5hnp2iim1cx6mcd5d9f102p5ql41x69rd6106x1c17pinqm"))))
430 (build-system gnu-build-system)
431 (native-inputs `(("python" ,python-2)))
432 (inputs `(("samtools" ,samtools)
433 ("zlib" ,zlib)))
434 (arguments
435 '(#:test-target "test"
436 #:phases
437 (modify-phases %standard-phases
438 (delete 'configure)
439 (replace 'install
440 (lambda* (#:key outputs #:allow-other-keys)
441 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
442 (for-each (lambda (file)
443 (install-file file bin))
444 (find-files "bin" ".*")))
445 #t)))))
446 (home-page "https://github.com/arq5x/bedtools2")
447 (synopsis "Tools for genome analysis and arithmetic")
448 (description
449 "Collectively, the bedtools utilities are a swiss-army knife of tools for
450 a wide-range of genomics analysis tasks. The most widely-used tools enable
451 genome arithmetic: that is, set theory on the genome. For example, bedtools
452 allows one to intersect, merge, count, complement, and shuffle genomic
453 intervals from multiple files in widely-used genomic file formats such as BAM,
454 BED, GFF/GTF, VCF.")
455 (license license:gpl2)))
456
457 ;; Later releases of bedtools produce files with more columns than
458 ;; what Ribotaper expects.
459 (define-public bedtools-2.18
460 (package (inherit bedtools)
461 (name "bedtools")
462 (version "2.18.0")
463 (source (origin
464 (method url-fetch)
465 (uri (string-append "https://github.com/arq5x/bedtools2/"
466 "archive/v" version ".tar.gz"))
467 (file-name (string-append name "-" version ".tar.gz"))
468 (sha256
469 (base32
470 "05vrnr8yp7swfagshzpgqmzk1blnwnq8pq5pckzi1m26w98d63vf"))))))
471
472 (define-public ribotaper
473 (package
474 (name "ribotaper")
475 (version "1.3.1")
476 (source (origin
477 (method url-fetch)
478 (uri (string-append "https://ohlerlab.mdc-berlin.de/"
479 "files/RiboTaper/RiboTaper_Version_"
480 version ".tar.gz"))
481 (sha256
482 (base32
483 "0ykjbps1y3z3085q94npw8i9x5gldc6shy8vlc08v76zljsm07hv"))))
484 (build-system gnu-build-system)
485 (inputs
486 `(("bedtools" ,bedtools-2.18)
487 ("samtools" ,samtools-0.1)
488 ("r-minimal" ,r-minimal)
489 ("r-foreach" ,r-foreach)
490 ("r-xnomial" ,r-xnomial)
491 ("r-domc" ,r-domc)
492 ("r-multitaper" ,r-multitaper)
493 ("r-seqinr" ,r-seqinr)))
494 (home-page "https://ohlerlab.mdc-berlin.de/software/RiboTaper_126/")
495 (synopsis "Define translated ORFs using ribosome profiling data")
496 (description
497 "Ribotaper is a method for defining translated @dfn{open reading
498 frames} (ORFs) using ribosome profiling (ribo-seq) data. This package
499 provides the Ribotaper pipeline.")
500 (license license:gpl3+)))
501
502 (define-public ribodiff
503 (package
504 (name "ribodiff")
505 (version "0.2.2")
506 (source
507 (origin
508 (method url-fetch)
509 (uri (string-append "https://github.com/ratschlab/RiboDiff/"
510 "archive/v" version ".tar.gz"))
511 (file-name (string-append name "-" version ".tar.gz"))
512 (sha256
513 (base32
514 "0wpbwmfv05wdjxv7ikm664f7s7p7cqr8jnw99zrda0q67rl50aaj"))))
515 (build-system python-build-system)
516 (arguments
517 `(#:python ,python-2
518 #:phases
519 (modify-phases %standard-phases
520 ;; Generate an installable executable script wrapper.
521 (add-after 'unpack 'patch-setup.py
522 (lambda _
523 (substitute* "setup.py"
524 (("^(.*)packages=.*" line prefix)
525 (string-append line "\n"
526 prefix "scripts=['scripts/TE.py'],\n")))
527 #t)))))
528 (inputs
529 `(("python-numpy" ,python2-numpy)
530 ("python-matplotlib" ,python2-matplotlib)
531 ("python-scipy" ,python2-scipy)
532 ("python-statsmodels" ,python2-statsmodels)))
533 (native-inputs
534 `(("python-mock" ,python2-mock)
535 ("python-nose" ,python2-nose)))
536 (home-page "http://public.bmi.inf.ethz.ch/user/zhongy/RiboDiff/")
537 (synopsis "Detect translation efficiency changes from ribosome footprints")
538 (description "RiboDiff is a statistical tool that detects the protein
539 translational efficiency change from Ribo-Seq (ribosome footprinting) and
540 RNA-Seq data. It uses a generalized linear model to detect genes showing
541 difference in translational profile taking mRNA abundance into account. It
542 facilitates us to decipher the translational regulation that behave
543 independently with transcriptional regulation.")
544 (license license:gpl3+)))
545
546 (define-public bioawk
547 (package
548 (name "bioawk")
549 (version "1.0")
550 (source (origin
551 (method url-fetch)
552 (uri (string-append "https://github.com/lh3/bioawk/archive/v"
553 version ".tar.gz"))
554 (file-name (string-append name "-" version ".tar.gz"))
555 (sha256
556 (base32 "1daizxsk17ahi9n58fj8vpgwyhzrzh54bzqhanjanp88kgrz7gjw"))))
557 (build-system gnu-build-system)
558 (inputs
559 `(("zlib" ,zlib)))
560 (native-inputs
561 `(("bison" ,bison)))
562 (arguments
563 `(#:tests? #f ; There are no tests to run.
564 ;; Bison must generate files, before other targets can build.
565 #:parallel-build? #f
566 #:phases
567 (modify-phases %standard-phases
568 (delete 'configure) ; There is no configure phase.
569 (replace 'install
570 (lambda* (#:key outputs #:allow-other-keys)
571 (let* ((out (assoc-ref outputs "out"))
572 (bin (string-append out "/bin"))
573 (man (string-append out "/share/man/man1")))
574 (mkdir-p man)
575 (copy-file "awk.1" (string-append man "/bioawk.1"))
576 (install-file "bioawk" bin)))))))
577 (home-page "https://github.com/lh3/bioawk")
578 (synopsis "AWK with bioinformatics extensions")
579 (description "Bioawk is an extension to Brian Kernighan's awk, adding the
580 support of several common biological data formats, including optionally gzip'ed
581 BED, GFF, SAM, VCF, FASTA/Q and TAB-delimited formats with column names. It
582 also adds a few built-in functions and a command line option to use TAB as the
583 input/output delimiter. When the new functionality is not used, bioawk is
584 intended to behave exactly the same as the original BWK awk.")
585 (license license:x11)))
586
587 (define-public python2-pybedtools
588 (package
589 (name "python2-pybedtools")
590 (version "0.6.9")
591 (source (origin
592 (method url-fetch)
593 (uri (string-append
594 "https://pypi.python.org/packages/source/p/pybedtools/pybedtools-"
595 version ".tar.gz"))
596 (sha256
597 (base32
598 "1ldzdxw1p4y3g2ignmggsdypvqkcwqwzhdha4rbgpih048z5p4an"))))
599 (build-system python-build-system)
600 (arguments `(#:python ,python-2)) ; no Python 3 support
601 (inputs
602 `(("python-matplotlib" ,python2-matplotlib)))
603 (propagated-inputs
604 `(("bedtools" ,bedtools)
605 ("samtools" ,samtools)))
606 (native-inputs
607 `(("python-cython" ,python2-cython)
608 ("python-pyyaml" ,python2-pyyaml)
609 ("python-nose" ,python2-nose)))
610 (home-page "https://pythonhosted.org/pybedtools/")
611 (synopsis "Python wrapper for BEDtools programs")
612 (description
613 "pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
614 which are widely used for genomic interval manipulation or \"genome algebra\".
615 pybedtools extends BEDTools by offering feature-level manipulations from with
616 Python.")
617 (license license:gpl2+)))
618
619 (define-public python-biom-format
620 (package
621 (name "python-biom-format")
622 (version "2.1.6")
623 (source
624 (origin
625 (method url-fetch)
626 ;; Use GitHub as source because PyPI distribution does not contain
627 ;; test data: https://github.com/biocore/biom-format/issues/693
628 (uri (string-append "https://github.com/biocore/biom-format/archive/"
629 version ".tar.gz"))
630 (file-name (string-append name "-" version ".tar.gz"))
631 (sha256
632 (base32
633 "08cr7wpahk6zb31h4bs7jmzpvxcqv9s13xz40h6y2h656jvdvnpj"))))
634 (build-system python-build-system)
635 (propagated-inputs
636 `(("python-numpy" ,python-numpy)
637 ("python-scipy" ,python-scipy)
638 ("python-future" ,python-future)
639 ("python-click" ,python-click)
640 ("python-h5py" ,python-h5py)
641 ("python-pandas" ,python-pandas)))
642 (native-inputs
643 `(("python-nose" ,python-nose)))
644 (home-page "http://www.biom-format.org")
645 (synopsis "Biological Observation Matrix (BIOM) format utilities")
646 (description
647 "The BIOM file format is designed to be a general-use format for
648 representing counts of observations e.g. operational taxonomic units, KEGG
649 orthology groups or lipid types, in one or more biological samples
650 e.g. microbiome samples, genomes, metagenomes.")
651 (license license:bsd-3)
652 (properties `((python2-variant . ,(delay python2-biom-format))))))
653
654 (define-public python2-biom-format
655 (let ((base (package-with-python2 (strip-python2-variant python-biom-format))))
656 (package
657 (inherit base)
658 (arguments
659 `(#:phases
660 (modify-phases %standard-phases
661 ;; Do not require the unmaintained pyqi library.
662 (add-after 'unpack 'remove-pyqi
663 (lambda _
664 (substitute* "setup.py"
665 (("install_requires.append\\(\"pyqi\"\\)") "pass"))
666 #t)))
667 ,@(package-arguments base))))))
668
669 (define-public bioperl-minimal
670 (let* ((inputs `(("perl-module-build" ,perl-module-build)
671 ("perl-data-stag" ,perl-data-stag)
672 ("perl-libwww" ,perl-libwww)
673 ("perl-uri" ,perl-uri)))
674 (transitive-inputs
675 (map (compose package-name cadr)
676 (delete-duplicates
677 (concatenate
678 (map (compose package-transitive-target-inputs cadr) inputs))))))
679 (package
680 (name "bioperl-minimal")
681 (version "1.7.0")
682 (source
683 (origin
684 (method url-fetch)
685 (uri (string-append "https://github.com/bioperl/bioperl-live/"
686 "archive/release-"
687 (string-map (lambda (c)
688 (if (char=? c #\.)
689 #\- c)) version)
690 ".tar.gz"))
691 (sha256
692 (base32
693 "12phgpxwgkqflkwfb9dcqg7a31dpjlfhar8wcgv0aj5ln4akfz06"))))
694 (build-system perl-build-system)
695 (arguments
696 `(#:phases
697 (modify-phases %standard-phases
698 (add-after
699 'install 'wrap-programs
700 (lambda* (#:key outputs #:allow-other-keys)
701 ;; Make sure all executables in "bin" find the required Perl
702 ;; modules at runtime. As the PERL5LIB variable contains also
703 ;; the paths of native inputs, we pick the transitive target
704 ;; inputs from %build-inputs.
705 (let* ((out (assoc-ref outputs "out"))
706 (bin (string-append out "/bin/"))
707 (path (string-join
708 (cons (string-append out "/lib/perl5/site_perl")
709 (map (lambda (name)
710 (assoc-ref %build-inputs name))
711 ',transitive-inputs))
712 ":")))
713 (for-each (lambda (file)
714 (wrap-program file
715 `("PERL5LIB" ":" prefix (,path))))
716 (find-files bin "\\.pl$"))
717 #t))))))
718 (inputs inputs)
719 (native-inputs
720 `(("perl-test-most" ,perl-test-most)))
721 (home-page "http://search.cpan.org/dist/BioPerl")
722 (synopsis "Bioinformatics toolkit")
723 (description
724 "BioPerl is the product of a community effort to produce Perl code which
725 is useful in biology. Examples include Sequence objects, Alignment objects
726 and database searching objects. These objects not only do what they are
727 advertised to do in the documentation, but they also interact - Alignment
728 objects are made from the Sequence objects, Sequence objects have access to
729 Annotation and SeqFeature objects and databases, Blast objects can be
730 converted to Alignment objects, and so on. This means that the objects
731 provide a coordinated and extensible framework to do computational biology.")
732 (license license:perl-license))))
733
734 (define-public python-biopython
735 (package
736 (name "python-biopython")
737 (version "1.70")
738 (source (origin
739 (method url-fetch)
740 ;; use PyPi rather than biopython.org to ease updating
741 (uri (pypi-uri "biopython" version))
742 (sha256
743 (base32
744 "0nz4n9d2y2dg849gn1z0vjlkwcpzzkzy3fij7x94a6ixy2c54z2a"))))
745 (build-system python-build-system)
746 (arguments
747 `(#:phases
748 (modify-phases %standard-phases
749 (add-before 'check 'set-home
750 ;; Some tests require a home directory to be set.
751 (lambda _ (setenv "HOME" "/tmp") #t)))))
752 (propagated-inputs
753 `(("python-numpy" ,python-numpy)))
754 (home-page "http://biopython.org/")
755 (synopsis "Tools for biological computation in Python")
756 (description
757 "Biopython is a set of tools for biological computation including parsers
758 for bioinformatics files into Python data structures; interfaces to common
759 bioinformatics programs; a standard sequence class and tools for performing
760 common operations on them; code to perform data classification; code for
761 dealing with alignments; code making it easy to split up parallelizable tasks
762 into separate processes; and more.")
763 (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
764
765 (define-public python2-biopython
766 (package-with-python2 python-biopython))
767
768 ;; An outdated version of biopython is required for seqmagick, see
769 ;; https://github.com/fhcrc/seqmagick/issues/59
770 ;; When that issue has been resolved this package should be removed.
771 (define python2-biopython-1.66
772 (package
773 (inherit python2-biopython)
774 (version "1.66")
775 (source (origin
776 (method url-fetch)
777 (uri (pypi-uri "biopython" version))
778 (sha256
779 (base32
780 "1gdv92593klimg22icf5j9by7xiq86jnwzkpz4abaa05ylkdf6hp"))))))
781
782 (define-public bpp-core
783 ;; The last release was in 2014 and the recommended way to install from source
784 ;; is to clone the git repository, so we do this.
785 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
786 (let ((commit "7d8bced0d1a87291ea8dd7046b7fb5ff9c35c582"))
787 (package
788 (name "bpp-core")
789 (version (string-append "2.2.0-1." (string-take commit 7)))
790 (source (origin
791 (method git-fetch)
792 (uri (git-reference
793 (url "http://biopp.univ-montp2.fr/git/bpp-core")
794 (commit commit)))
795 (file-name (string-append name "-" version "-checkout"))
796 (sha256
797 (base32
798 "10djsq5vlnkilv436gnmh4irpk49v29pa69r6xiryg32xmvn909j"))))
799 (build-system cmake-build-system)
800 (arguments
801 `(#:parallel-build? #f))
802 (inputs
803 `(("gcc" ,gcc-5))) ; Compilation of bpp-phyl fails with GCC 4.9 so we
804 ; compile all of the bpp packages with GCC 5.
805 (home-page "http://biopp.univ-montp2.fr")
806 (synopsis "C++ libraries for Bioinformatics")
807 (description
808 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
809 analysis, phylogenetics, molecular evolution and population genetics. It is
810 Object Oriented and is designed to be both easy to use and computer efficient.
811 Bio++ intends to help programmers to write computer expensive programs, by
812 providing them a set of re-usable tools.")
813 (license license:cecill-c))))
814
815 (define-public bpp-phyl
816 ;; The last release was in 2014 and the recommended way to install from source
817 ;; is to clone the git repository, so we do this.
818 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
819 (let ((commit "0c07167b629f68b569bf274d1ad0c4af83276ae2"))
820 (package
821 (name "bpp-phyl")
822 (version (string-append "2.2.0-1." (string-take commit 7)))
823 (source (origin
824 (method git-fetch)
825 (uri (git-reference
826 (url "http://biopp.univ-montp2.fr/git/bpp-phyl")
827 (commit commit)))
828 (file-name (string-append name "-" version "-checkout"))
829 (sha256
830 (base32
831 "1ssjgchzwj3iai26kyly7gwkdv8sk59nqhkb1wpap3sf5m6kyllh"))))
832 (build-system cmake-build-system)
833 (arguments
834 `(#:parallel-build? #f
835 ;; If out-of-source, test data is not copied into the build directory
836 ;; so the tests fail.
837 #:out-of-source? #f))
838 (inputs
839 `(("bpp-core" ,bpp-core)
840 ("bpp-seq" ,bpp-seq)
841 ;; GCC 4.8 fails due to an 'internal compiler error', so we use a more
842 ;; modern GCC.
843 ("gcc" ,gcc-5)))
844 (home-page "http://biopp.univ-montp2.fr")
845 (synopsis "Bio++ phylogenetic Library")
846 (description
847 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
848 analysis, phylogenetics, molecular evolution and population genetics. This
849 library provides phylogenetics-related modules.")
850 (license license:cecill-c))))
851
852 (define-public bpp-popgen
853 ;; The last release was in 2014 and the recommended way to install from source
854 ;; is to clone the git repository, so we do this.
855 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
856 (let ((commit "e472bac9b1a148803895d747cd6d0c5904f85d9f"))
857 (package
858 (name "bpp-popgen")
859 (version (string-append "2.2.0-1." (string-take commit 7)))
860 (source (origin
861 (method git-fetch)
862 (uri (git-reference
863 (url "http://biopp.univ-montp2.fr/git/bpp-popgen")
864 (commit commit)))
865 (file-name (string-append name "-" version "-checkout"))
866 (sha256
867 (base32
868 "0yn82dzn1n5629nzja68xfrhi655709rjanyryb36vzkmymy6dw5"))))
869 (build-system cmake-build-system)
870 (arguments
871 `(#:parallel-build? #f
872 #:tests? #f)) ; There are no tests.
873 (inputs
874 `(("bpp-core" ,bpp-core)
875 ("bpp-seq" ,bpp-seq)
876 ("gcc" ,gcc-5)))
877 (home-page "http://biopp.univ-montp2.fr")
878 (synopsis "Bio++ population genetics library")
879 (description
880 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
881 analysis, phylogenetics, molecular evolution and population genetics. This
882 library provides population genetics-related modules.")
883 (license license:cecill-c))))
884
885 (define-public bpp-seq
886 ;; The last release was in 2014 and the recommended way to install from source
887 ;; is to clone the git repository, so we do this.
888 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
889 (let ((commit "6cfa07965ce152e5598a89df2fa80a75973bfa33"))
890 (package
891 (name "bpp-seq")
892 (version (string-append "2.2.0-1." (string-take commit 7)))
893 (source (origin
894 (method git-fetch)
895 (uri (git-reference
896 (url "http://biopp.univ-montp2.fr/git/bpp-seq")
897 (commit commit)))
898 (file-name (string-append name "-" version "-checkout"))
899 (sha256
900 (base32
901 "1nys5jq7jqvdg40d91wsmj3q2yzy4276cp7sp44n67p468f27zf2"))))
902 (build-system cmake-build-system)
903 (arguments
904 `(#:parallel-build? #f
905 ;; If out-of-source, test data is not copied into the build directory
906 ;; so the tests fail.
907 #:out-of-source? #f))
908 (inputs
909 `(("bpp-core" ,bpp-core)
910 ("gcc" ,gcc-5))) ; Use GCC 5 as per 'bpp-core'.
911 (home-page "http://biopp.univ-montp2.fr")
912 (synopsis "Bio++ sequence library")
913 (description
914 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
915 analysis, phylogenetics, molecular evolution and population genetics. This
916 library provides sequence-related modules.")
917 (license license:cecill-c))))
918
919 (define-public bppsuite
920 ;; The last release was in 2014 and the recommended way to install from source
921 ;; is to clone the git repository, so we do this.
922 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
923 (let ((commit "c516147f57aa50961121cd505bed52cd7603698b"))
924 (package
925 (name "bppsuite")
926 (version (string-append "2.2.0-1." (string-take commit 7)))
927 (source (origin
928 (method git-fetch)
929 (uri (git-reference
930 (url "http://biopp.univ-montp2.fr/git/bppsuite")
931 (commit commit)))
932 (file-name (string-append name "-" version "-checkout"))
933 (sha256
934 (base32
935 "1y87pxvw0jxjizhq2dr9g2r91md45k1p9ih2sl1yy1y3p934l2kb"))))
936 (build-system cmake-build-system)
937 (arguments
938 `(#:parallel-build? #f
939 #:tests? #f)) ; There are no tests.
940 (native-inputs
941 `(("groff" ,groff)
942 ("man-db" ,man-db)
943 ("texinfo" ,texinfo)))
944 (inputs
945 `(("bpp-core" ,bpp-core)
946 ("bpp-seq" ,bpp-seq)
947 ("bpp-phyl" ,bpp-phyl)
948 ("bpp-phyl" ,bpp-popgen)
949 ("gcc" ,gcc-5)))
950 (home-page "http://biopp.univ-montp2.fr")
951 (synopsis "Bioinformatics tools written with the Bio++ libraries")
952 (description
953 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
954 analysis, phylogenetics, molecular evolution and population genetics. This
955 package provides command line tools using the Bio++ library.")
956 (license license:cecill-c))))
957
958 (define-public blast+
959 (package
960 (name "blast+")
961 (version "2.6.0")
962 (source (origin
963 (method url-fetch)
964 (uri (string-append
965 "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
966 version "/ncbi-blast-" version "+-src.tar.gz"))
967 (sha256
968 (base32
969 "15n937pw5aqmyfjb6l387d18grqbb96l63d5xj4l7yyh0zbf2405"))
970 (patches (search-patches "blast+-fix-makefile.patch"))
971 (modules '((guix build utils)))
972 (snippet
973 '(begin
974 ;; Remove bundled bzip2, zlib and pcre.
975 (delete-file-recursively "c++/src/util/compress/bzip2")
976 (delete-file-recursively "c++/src/util/compress/zlib")
977 (delete-file-recursively "c++/src/util/regexp")
978 (substitute* "c++/src/util/compress/Makefile.in"
979 (("bzip2 zlib api") "api"))
980 ;; Remove useless msbuild directory
981 (delete-file-recursively
982 "c++/src/build-system/project_tree_builder/msbuild")
983 #t))))
984 (build-system gnu-build-system)
985 (arguments
986 `(;; There are two(!) tests for this massive library, and both fail with
987 ;; "unparsable timing stats".
988 ;; ERR [127] -- [serial/datatool] datatool.sh (unparsable timing stats)
989 ;; ERR [127] -- [serial/datatool] datatool_xml.sh (unparsable timing stats)
990 #:tests? #f
991 #:out-of-source? #t
992 #:parallel-build? #f ; not supported
993 #:phases
994 (modify-phases %standard-phases
995 (add-before
996 'configure 'set-HOME
997 ;; $HOME needs to be set at some point during the configure phase
998 (lambda _ (setenv "HOME" "/tmp") #t))
999 (add-after
1000 'unpack 'enter-dir
1001 (lambda _ (chdir "c++") #t))
1002 (add-after
1003 'enter-dir 'fix-build-system
1004 (lambda _
1005 (define (which* cmd)
1006 (cond ((string=? cmd "date")
1007 ;; make call to "date" deterministic
1008 "date -d @0")
1009 ((which cmd)
1010 => identity)
1011 (else
1012 (format (current-error-port)
1013 "WARNING: Unable to find absolute path for ~s~%"
1014 cmd)
1015 #f)))
1016
1017 ;; Rewrite hardcoded paths to various tools
1018 (substitute* (append '("src/build-system/configure.ac"
1019 "src/build-system/configure"
1020 "src/build-system/helpers/run_with_lock.c"
1021 "scripts/common/impl/if_diff.sh"
1022 "scripts/common/impl/run_with_lock.sh"
1023 "src/build-system/Makefile.configurables.real"
1024 "src/build-system/Makefile.in.top"
1025 "src/build-system/Makefile.meta.gmake=no"
1026 "src/build-system/Makefile.meta.in"
1027 "src/build-system/Makefile.meta_l"
1028 "src/build-system/Makefile.meta_p"
1029 "src/build-system/Makefile.meta_r"
1030 "src/build-system/Makefile.mk.in"
1031 "src/build-system/Makefile.requirements"
1032 "src/build-system/Makefile.rules_with_autodep.in")
1033 (find-files "scripts/common/check" "\\.sh$"))
1034 (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
1035 (or (which* cmd) all)))
1036
1037 (substitute* (find-files "src/build-system" "^config.*")
1038 (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
1039 (("^PATH=.*") ""))
1040
1041 ;; rewrite "/var/tmp" in check script
1042 (substitute* "scripts/common/check/check_make_unix.sh"
1043 (("/var/tmp") "/tmp"))
1044
1045 ;; do not reset PATH
1046 (substitute* (find-files "scripts/common/impl/" "\\.sh$")
1047 (("^ *PATH=.*") "")
1048 (("action=/bin/") "action=")
1049 (("export PATH") ":"))
1050 #t))
1051 (replace
1052 'configure
1053 (lambda* (#:key inputs outputs #:allow-other-keys)
1054 (let ((out (assoc-ref outputs "out"))
1055 (lib (string-append (assoc-ref outputs "lib") "/lib"))
1056 (include (string-append (assoc-ref outputs "include")
1057 "/include/ncbi-tools++")))
1058 ;; The 'configure' script doesn't recognize things like
1059 ;; '--enable-fast-install'.
1060 (zero? (system* "./configure.orig"
1061 (string-append "--with-build-root=" (getcwd) "/build")
1062 (string-append "--prefix=" out)
1063 (string-append "--libdir=" lib)
1064 (string-append "--includedir=" include)
1065 (string-append "--with-bz2="
1066 (assoc-ref inputs "bzip2"))
1067 (string-append "--with-z="
1068 (assoc-ref inputs "zlib"))
1069 (string-append "--with-pcre="
1070 (assoc-ref inputs "pcre"))
1071 ;; Each library is built twice by default, once
1072 ;; with "-static" in its name, and again
1073 ;; without.
1074 "--without-static"
1075 "--with-dll"))))))))
1076 (outputs '("out" ; 21 MB
1077 "lib" ; 226 MB
1078 "include")) ; 33 MB
1079 (inputs
1080 `(("bzip2" ,bzip2)
1081 ("zlib" ,zlib)
1082 ("pcre" ,pcre)
1083 ("perl" ,perl)
1084 ("python" ,python-wrapper)))
1085 (native-inputs
1086 `(("cpio" ,cpio)))
1087 (home-page "http://blast.ncbi.nlm.nih.gov")
1088 (synopsis "Basic local alignment search tool")
1089 (description
1090 "BLAST is a popular method of performing a DNA or protein sequence
1091 similarity search, using heuristics to produce results quickly. It also
1092 calculates an “expect value” that estimates how many matches would have
1093 occurred at a given score by chance, which can aid a user in judging how much
1094 confidence to have in an alignment.")
1095 ;; Most of the sources are in the public domain, with the following
1096 ;; exceptions:
1097 ;; * Expat:
1098 ;; * ./c++/include/util/bitset/
1099 ;; * ./c++/src/html/ncbi_menu*.js
1100 ;; * Boost license:
1101 ;; * ./c++/include/util/impl/floating_point_comparison.hpp
1102 ;; * LGPL 2+:
1103 ;; * ./c++/include/dbapi/driver/odbc/unix_odbc/
1104 ;; * ASL 2.0:
1105 ;; * ./c++/src/corelib/teamcity_*
1106 (license (list license:public-domain
1107 license:expat
1108 license:boost1.0
1109 license:lgpl2.0+
1110 license:asl2.0))))
1111
1112 (define-public bless
1113 (package
1114 (name "bless")
1115 (version "1p02")
1116 (source (origin
1117 (method url-fetch)
1118 (uri (string-append "mirror://sourceforge/bless-ec/bless.v"
1119 version ".tgz"))
1120 (sha256
1121 (base32
1122 "0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
1123 (modules '((guix build utils)))
1124 (snippet
1125 `(begin
1126 ;; Remove bundled boost, pigz, zlib, and .git directory
1127 ;; FIXME: also remove bundled sources for murmurhash3 and
1128 ;; kmc once packaged.
1129 (delete-file-recursively "boost")
1130 (delete-file-recursively "pigz")
1131 (delete-file-recursively "google-sparsehash")
1132 (delete-file-recursively "zlib")
1133 (delete-file-recursively ".git")
1134 #t))))
1135 (build-system gnu-build-system)
1136 (arguments
1137 '(#:tests? #f ;no "check" target
1138 #:make-flags
1139 (list (string-append "ZLIB="
1140 (assoc-ref %build-inputs "zlib")
1141 "/lib/libz.a")
1142 (string-append "LDFLAGS="
1143 (string-join '("-lboost_filesystem"
1144 "-lboost_system"
1145 "-lboost_iostreams"
1146 "-lz"
1147 "-fopenmp"
1148 "-std=c++11"))))
1149 #:phases
1150 (modify-phases %standard-phases
1151 (add-after 'unpack 'do-not-build-bundled-pigz
1152 (lambda* (#:key inputs outputs #:allow-other-keys)
1153 (substitute* "Makefile"
1154 (("cd pigz/pigz-2.3.3; make") ""))
1155 #t))
1156 (add-after 'unpack 'patch-paths-to-executables
1157 (lambda* (#:key inputs outputs #:allow-other-keys)
1158 (substitute* "parse_args.cpp"
1159 (("kmc_binary = .*")
1160 (string-append "kmc_binary = \""
1161 (assoc-ref outputs "out")
1162 "/bin/kmc\";"))
1163 (("pigz_binary = .*")
1164 (string-append "pigz_binary = \""
1165 (assoc-ref inputs "pigz")
1166 "/bin/pigz\";")))
1167 #t))
1168 (replace 'install
1169 (lambda* (#:key outputs #:allow-other-keys)
1170 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
1171 (for-each (lambda (file)
1172 (install-file file bin))
1173 '("bless" "kmc/bin/kmc"))
1174 #t)))
1175 (delete 'configure))))
1176 (native-inputs
1177 `(("perl" ,perl)))
1178 (inputs
1179 `(("openmpi" ,openmpi)
1180 ("boost" ,boost)
1181 ("sparsehash" ,sparsehash)
1182 ("pigz" ,pigz)
1183 ("zlib" ,zlib)))
1184 (supported-systems '("x86_64-linux"))
1185 (home-page "https://sourceforge.net/p/bless-ec/wiki/Home/")
1186 (synopsis "Bloom-filter-based error correction tool for NGS reads")
1187 (description
1188 "@dfn{Bloom-filter-based error correction solution for high-throughput
1189 sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
1190 correction tool for genomic reads produced by @dfn{Next-generation
1191 sequencing} (NGS). BLESS produces accurate correction results with much less
1192 memory compared with previous solutions and is also able to tolerate a higher
1193 false-positive rate. BLESS can extend reads like DNA assemblers to correct
1194 errors at the end of reads.")
1195 (license license:gpl3+)))
1196
1197 (define-public bowtie
1198 (package
1199 (name "bowtie")
1200 (version "2.3.2")
1201 (source (origin
1202 (method url-fetch)
1203 (uri (string-append "https://github.com/BenLangmead/bowtie2/archive/v"
1204 version ".tar.gz"))
1205 (file-name (string-append name "-" version ".tar.gz"))
1206 (sha256
1207 (base32
1208 "0hwa5r9qbglppb7sz5z79rlmmddr3n51n468jb3wh8rwjgn3yr90"))
1209 (modules '((guix build utils)))
1210 (snippet
1211 '(substitute* "Makefile"
1212 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
1213 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
1214 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))))
1215 (build-system gnu-build-system)
1216 (inputs
1217 `(("perl" ,perl)
1218 ("perl-clone" ,perl-clone)
1219 ("perl-test-deep" ,perl-test-deep)
1220 ("perl-test-simple" ,perl-test-simple)
1221 ("python" ,python-2)
1222 ("tbb" ,tbb)
1223 ("zlib" ,zlib)))
1224 (arguments
1225 '(#:make-flags
1226 (list "allall"
1227 "WITH_TBB=1"
1228 (string-append "prefix=" (assoc-ref %outputs "out")))
1229 #:phases
1230 (modify-phases %standard-phases
1231 (delete 'configure)
1232 (replace 'check
1233 (lambda* (#:key outputs #:allow-other-keys)
1234 (zero? (system* "perl"
1235 "scripts/test/simple_tests.pl"
1236 "--bowtie2=./bowtie2"
1237 "--bowtie2-build=./bowtie2-build")))))))
1238 (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
1239 (synopsis "Fast and sensitive nucleotide sequence read aligner")
1240 (description
1241 "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
1242 reads to long reference sequences. It is particularly good at aligning reads
1243 of about 50 up to 100s or 1,000s of characters, and particularly good at
1244 aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
1245 genome with an FM Index to keep its memory footprint small: for the human
1246 genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
1247 gapped, local, and paired-end alignment modes.")
1248 (supported-systems '("x86_64-linux"))
1249 (license license:gpl3+)))
1250
1251 (define-public tophat
1252 (package
1253 (name "tophat")
1254 (version "2.1.0")
1255 (source (origin
1256 (method url-fetch)
1257 (uri (string-append
1258 "http://ccb.jhu.edu/software/tophat/downloads/tophat-"
1259 version ".tar.gz"))
1260 (sha256
1261 (base32
1262 "168zlzykq622zbgkh90a90f1bdgsxkscq2zxzbj8brq80hbjpyp7"))
1263 (patches (search-patches "tophat-build-with-later-seqan.patch"))
1264 (modules '((guix build utils)))
1265 (snippet
1266 '(begin
1267 ;; Remove bundled SeqAn and samtools
1268 (delete-file-recursively "src/SeqAn-1.3")
1269 (delete-file-recursively "src/samtools-0.1.18")
1270 #t))))
1271 (build-system gnu-build-system)
1272 (arguments
1273 '(#:parallel-build? #f ; not supported
1274 #:phases
1275 (modify-phases %standard-phases
1276 (add-after 'unpack 'use-system-samtools
1277 (lambda* (#:key inputs #:allow-other-keys)
1278 (substitute* "src/Makefile.in"
1279 (("(noinst_LIBRARIES = )\\$\\(SAMLIB\\)" _ prefix) prefix)
1280 (("\\$\\(SAMPROG\\): \\$\\(SAMLIB\\)") "")
1281 (("SAMPROG = samtools_0\\.1\\.18") "")
1282 (("\\$\\(samtools_0_1_18_SOURCES\\)") "")
1283 (("am__EXEEXT_1 = samtools_0\\.1\\.18\\$\\(EXEEXT\\)") ""))
1284 (substitute* '("src/common.cpp"
1285 "src/tophat.py")
1286 (("samtools_0.1.18") (which "samtools")))
1287 (substitute* '("src/common.h"
1288 "src/bam2fastx.cpp")
1289 (("#include \"bam.h\"") "#include <samtools/bam.h>")
1290 (("#include \"sam.h\"") "#include <samtools/sam.h>"))
1291 (substitute* '("src/bwt_map.h"
1292 "src/map2gtf.h"
1293 "src/align_status.h")
1294 (("#include <bam.h>") "#include <samtools/bam.h>")
1295 (("#include <sam.h>") "#include <samtools/sam.h>"))
1296 #t)))))
1297 (inputs
1298 `(("boost" ,boost)
1299 ("bowtie" ,bowtie)
1300 ("samtools" ,samtools-0.1)
1301 ("ncurses" ,ncurses)
1302 ("python" ,python-2)
1303 ("perl" ,perl)
1304 ("zlib" ,zlib)
1305 ("seqan" ,seqan)))
1306 (home-page "http://ccb.jhu.edu/software/tophat/index.shtml")
1307 (synopsis "Spliced read mapper for RNA-Seq data")
1308 (description
1309 "TopHat is a fast splice junction mapper for nucleotide sequence
1310 reads produced by the RNA-Seq method. It aligns RNA-Seq reads to
1311 mammalian-sized genomes using the ultra high-throughput short read
1312 aligner Bowtie, and then analyzes the mapping results to identify
1313 splice junctions between exons.")
1314 ;; TopHat is released under the Boost Software License, Version 1.0
1315 ;; See https://github.com/infphilo/tophat/issues/11#issuecomment-121589893
1316 (license license:boost1.0)))
1317
1318 (define-public bwa
1319 (package
1320 (name "bwa")
1321 (version "0.7.17")
1322 (source (origin
1323 (method url-fetch)
1324 (uri (string-append
1325 "https://github.com/lh3/bwa/releases/download/v"
1326 version "/bwa-" version ".tar.bz2"))
1327 (sha256
1328 (base32
1329 "1zfhv2zg9v1icdlq4p9ssc8k01mca5d1bd87w71py2swfi74s6yy"))))
1330 (build-system gnu-build-system)
1331 (arguments
1332 '(#:tests? #f ;no "check" target
1333 #:phases
1334 (modify-phases %standard-phases
1335 (replace 'install
1336 (lambda* (#:key outputs #:allow-other-keys)
1337 (let ((bin (string-append
1338 (assoc-ref outputs "out") "/bin"))
1339 (doc (string-append
1340 (assoc-ref outputs "out") "/share/doc/bwa"))
1341 (man (string-append
1342 (assoc-ref outputs "out") "/share/man/man1")))
1343 (install-file "bwa" bin)
1344 (install-file "README.md" doc)
1345 (install-file "bwa.1" man))
1346 #t))
1347 ;; no "configure" script
1348 (delete 'configure))))
1349 (inputs `(("zlib" ,zlib)))
1350 ;; Non-portable SSE instructions are used so building fails on platforms
1351 ;; other than x86_64.
1352 (supported-systems '("x86_64-linux"))
1353 (home-page "http://bio-bwa.sourceforge.net/")
1354 (synopsis "Burrows-Wheeler sequence aligner")
1355 (description
1356 "BWA is a software package for mapping low-divergent sequences against a
1357 large reference genome, such as the human genome. It consists of three
1358 algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
1359 designed for Illumina sequence reads up to 100bp, while the rest two for
1360 longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
1361 features such as long-read support and split alignment, but BWA-MEM, which is
1362 the latest, is generally recommended for high-quality queries as it is faster
1363 and more accurate. BWA-MEM also has better performance than BWA-backtrack for
1364 70-100bp Illumina reads.")
1365 (license license:gpl3+)))
1366
1367 (define-public bwa-pssm
1368 (package (inherit bwa)
1369 (name "bwa-pssm")
1370 (version "0.5.11")
1371 (source (origin
1372 (method url-fetch)
1373 (uri (string-append "https://github.com/pkerpedjiev/bwa-pssm/"
1374 "archive/" version ".tar.gz"))
1375 (file-name (string-append name "-" version ".tar.gz"))
1376 (sha256
1377 (base32
1378 "02p7mpbs4mlxmn84g2x4ghak638vbj4lqix2ipx5g84pz9bhdavg"))))
1379 (build-system gnu-build-system)
1380 (inputs
1381 `(("gdsl" ,gdsl)
1382 ("zlib" ,zlib)
1383 ("perl" ,perl)))
1384 (home-page "http://bwa-pssm.binf.ku.dk/")
1385 (synopsis "Burrows-Wheeler transform-based probabilistic short read mapper")
1386 (description
1387 "BWA-PSSM is a probabilistic short genomic sequence read aligner based on
1388 the use of @dfn{position specific scoring matrices} (PSSM). Like many of the
1389 existing aligners it is fast and sensitive. Unlike most other aligners,
1390 however, it is also adaptible in the sense that one can direct the alignment
1391 based on known biases within the data set. It is coded as a modification of
1392 the original BWA alignment program and shares the genome index structure as
1393 well as many of the command line options.")
1394 (license license:gpl3+)))
1395
1396 (define-public python2-bx-python
1397 (package
1398 (name "python2-bx-python")
1399 (version "0.7.3")
1400 (source (origin
1401 (method url-fetch)
1402 (uri (pypi-uri "bx-python" version))
1403 (sha256
1404 (base32
1405 "15z2w3bvnc0n4qmb9bd6d8ylc2h2nj883x2w9iixf4x3vki9b22i"))
1406 (modules '((guix build utils)))
1407 (snippet
1408 '(substitute* "setup.py"
1409 ;; remove dependency on outdated "distribute" module
1410 (("^from distribute_setup import use_setuptools") "")
1411 (("^use_setuptools\\(\\)") "")))))
1412 (build-system python-build-system)
1413 (arguments
1414 `(#:tests? #f ;tests fail because test data are not included
1415 #:python ,python-2))
1416 (inputs
1417 `(("python-numpy" ,python2-numpy)
1418 ("zlib" ,zlib)))
1419 (native-inputs
1420 `(("python-nose" ,python2-nose)))
1421 (home-page "http://bitbucket.org/james_taylor/bx-python/")
1422 (synopsis "Tools for manipulating biological data")
1423 (description
1424 "bx-python provides tools for manipulating biological data, particularly
1425 multiple sequence alignments.")
1426 (license license:expat)))
1427
1428 (define-public python-pysam
1429 (package
1430 (name "python-pysam")
1431 (version "0.11.2.2")
1432 (source (origin
1433 (method url-fetch)
1434 ;; Test data is missing on PyPi.
1435 (uri (string-append
1436 "https://github.com/pysam-developers/pysam/archive/v"
1437 version ".tar.gz"))
1438 (file-name (string-append name "-" version ".tar.gz"))
1439 (sha256
1440 (base32
1441 "1cfqdxsqs3xhacns9n0271ck6wkc76px66ddjm91wfw2jxxfklvc"))
1442 (modules '((guix build utils)))
1443 (snippet
1444 ;; Drop bundled htslib. TODO: Also remove samtools and bcftools.
1445 '(delete-file-recursively "htslib"))))
1446 (build-system python-build-system)
1447 (arguments
1448 `(#:modules ((ice-9 ftw)
1449 (srfi srfi-26)
1450 (guix build python-build-system)
1451 (guix build utils))
1452 #:phases
1453 (modify-phases %standard-phases
1454 (add-before 'build 'set-flags
1455 (lambda* (#:key inputs #:allow-other-keys)
1456 (setenv "HTSLIB_MODE" "external")
1457 (setenv "HTSLIB_LIBRARY_DIR"
1458 (string-append (assoc-ref inputs "htslib") "/lib"))
1459 (setenv "HTSLIB_INCLUDE_DIR"
1460 (string-append (assoc-ref inputs "htslib") "/include"))
1461 (setenv "LDFLAGS" "-lncurses")
1462 (setenv "CFLAGS" "-D_CURSES_LIB=1")
1463 #t))
1464 (replace 'check
1465 (lambda* (#:key inputs outputs #:allow-other-keys)
1466 ;; Add first subdirectory of "build" directory to PYTHONPATH.
1467 (setenv "PYTHONPATH"
1468 (string-append
1469 (getenv "PYTHONPATH")
1470 ":" (getcwd) "/build/"
1471 (car (scandir "build"
1472 (negate (cut string-prefix? "." <>))))))
1473 ;; Step out of source dir so python does not import from CWD.
1474 (with-directory-excursion "tests"
1475 (setenv "HOME" "/tmp")
1476 (and (zero? (system* "make" "-C" "pysam_data"))
1477 (zero? (system* "make" "-C" "cbcf_data"))
1478 ;; Running nosetests without explicitly asking for a
1479 ;; single process leads to a crash. Running with multiple
1480 ;; processes fails because the tests are not designed to
1481 ;; run in parallel.
1482
1483 ;; FIXME: tests keep timing out on some systems.
1484 ;; (zero? (system* "nosetests" "-v"
1485 ;; "--processes" "1"))
1486 )))))))
1487 (propagated-inputs
1488 `(("htslib" ,htslib))) ; Included from installed header files.
1489 (inputs
1490 `(("ncurses" ,ncurses)
1491 ("zlib" ,zlib)))
1492 (native-inputs
1493 `(("python-cython" ,python-cython)
1494 ;; Dependencies below are are for tests only.
1495 ("samtools" ,samtools)
1496 ("bcftools" ,bcftools)
1497 ("python-nose" ,python-nose)))
1498 (home-page "https://github.com/pysam-developers/pysam")
1499 (synopsis "Python bindings to the SAMtools C API")
1500 (description
1501 "Pysam is a Python module for reading and manipulating files in the
1502 SAM/BAM format. Pysam is a lightweight wrapper of the SAMtools C API. It
1503 also includes an interface for tabix.")
1504 (license license:expat)))
1505
1506 (define-public python2-pysam
1507 (package-with-python2 python-pysam))
1508
1509 (define-public python-twobitreader
1510 (package
1511 (name "python-twobitreader")
1512 (version "3.1.4")
1513 (source (origin
1514 (method url-fetch)
1515 (uri (pypi-uri "twobitreader" version))
1516 (sha256
1517 (base32
1518 "1q8wnj2kga9nz1lwc4w7qv52smfm536hp6mc8w6s53lhyj0mpi22"))))
1519 (build-system python-build-system)
1520 (arguments
1521 '(;; Tests are not distributed in the PyPi release.
1522 ;; TODO Try building from the Git repo or asking the upstream maintainer
1523 ;; to distribute the tests on PyPi.
1524 #:tests? #f))
1525 (native-inputs
1526 `(("python-sphinx" ,python-sphinx)))
1527 (home-page "https://github.com/benjschiller/twobitreader")
1528 (synopsis "Python library for reading .2bit files")
1529 (description
1530 "twobitreader is a Python library for reading .2bit files as used by the
1531 UCSC genome browser.")
1532 (license license:artistic2.0)))
1533
1534 (define-public python2-twobitreader
1535 (package-with-python2 python-twobitreader))
1536
1537 (define-public python-plastid
1538 (package
1539 (name "python-plastid")
1540 (version "0.4.8")
1541 (source (origin
1542 (method url-fetch)
1543 (uri (pypi-uri "plastid" version))
1544 (sha256
1545 (base32
1546 "0l24dd3q66if8yj042m4s0g95n6acn7im1imqd3p6h8ns43kxhj8"))))
1547 (build-system python-build-system)
1548 (arguments
1549 ;; Some test files are not included.
1550 `(#:tests? #f))
1551 (propagated-inputs
1552 `(("python-numpy" ,python-numpy)
1553 ("python-scipy" ,python-scipy)
1554 ("python-pandas" ,python-pandas)
1555 ("python-pysam" ,python-pysam)
1556 ("python-matplotlib" ,python-matplotlib)
1557 ("python-biopython" ,python-biopython)
1558 ("python-twobitreader" ,python-twobitreader)
1559 ("python-termcolor" ,python-termcolor)))
1560 (native-inputs
1561 `(("python-cython" ,python-cython)
1562 ("python-nose" ,python-nose)))
1563 (home-page "https://github.com/joshuagryphon/plastid")
1564 (synopsis "Python library for genomic analysis")
1565 (description
1566 "plastid is a Python library for genomic analysis – in particular,
1567 high-throughput sequencing data – with an emphasis on simplicity.")
1568 (license license:bsd-3)))
1569
1570 (define-public python2-plastid
1571 (package-with-python2 python-plastid))
1572
1573 (define-public cd-hit
1574 (package
1575 (name "cd-hit")
1576 (version "4.6.8")
1577 (source (origin
1578 (method url-fetch)
1579 (uri (string-append "https://github.com/weizhongli/cdhit"
1580 "/releases/download/V" version
1581 "/cd-hit-v" version
1582 "-2017-0621-source.tar.gz"))
1583 (sha256
1584 (base32
1585 "1b4mwm2520ixjbw57sil20f9iixzw4bkdqqwgg1fc3pzm6rz4zmn"))))
1586 (build-system gnu-build-system)
1587 (arguments
1588 `(#:tests? #f ; there are no tests
1589 #:make-flags
1590 ;; Executables are copied directly to the PREFIX.
1591 (list (string-append "PREFIX=" (assoc-ref %outputs "out") "/bin"))
1592 #:phases
1593 (modify-phases %standard-phases
1594 ;; No "configure" script
1595 (delete 'configure)
1596 ;; Remove sources of non-determinism
1597 (add-after 'unpack 'be-timeless
1598 (lambda _
1599 (substitute* "cdhit-utility.c++"
1600 ((" \\(built on \" __DATE__ \"\\)") ""))
1601 (substitute* "cdhit-common.c++"
1602 (("__DATE__") "\"0\"")
1603 (("\", %s, \" __TIME__ \"\\\\n\", date") ""))
1604 #t))
1605 ;; The "install" target does not create the target directory.
1606 (add-before 'install 'create-target-dir
1607 (lambda* (#:key outputs #:allow-other-keys)
1608 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
1609 #t)))))
1610 (inputs
1611 `(("perl" ,perl)))
1612 (home-page "http://weizhongli-lab.org/cd-hit/")
1613 (synopsis "Cluster and compare protein or nucleotide sequences")
1614 (description
1615 "CD-HIT is a program for clustering and comparing protein or nucleotide
1616 sequences. CD-HIT is designed to be fast and handle extremely large
1617 databases.")
1618 ;; The manual says: "It can be copied under the GNU General Public License
1619 ;; version 2 (GPLv2)."
1620 (license license:gpl2)))
1621
1622 (define-public clipper
1623 (package
1624 (name "clipper")
1625 (version "1.1")
1626 (source (origin
1627 (method url-fetch)
1628 (uri (string-append
1629 "https://github.com/YeoLab/clipper/archive/"
1630 version ".tar.gz"))
1631 (file-name (string-append name "-" version ".tar.gz"))
1632 (sha256
1633 (base32
1634 "0pflmsvhbf8izbgwhbhj1i7349sw1f55qpqj8ljmapp16hb0p0qi"))
1635 (modules '((guix build utils)))
1636 (snippet
1637 '(begin
1638 ;; remove unnecessary setup dependency
1639 (substitute* "setup.py"
1640 (("setup_requires = .*") ""))
1641 (for-each delete-file
1642 '("clipper/src/peaks.so"
1643 "clipper/src/readsToWiggle.so"))
1644 (delete-file-recursively "dist/")
1645 #t))))
1646 (build-system python-build-system)
1647 (arguments `(#:python ,python-2)) ; only Python 2 is supported
1648 (inputs
1649 `(("htseq" ,python2-htseq)
1650 ("python-pybedtools" ,python2-pybedtools)
1651 ("python-cython" ,python2-cython)
1652 ("python-scikit-learn" ,python2-scikit-learn)
1653 ("python-matplotlib" ,python2-matplotlib)
1654 ("python-pandas" ,python2-pandas)
1655 ("python-pysam" ,python2-pysam)
1656 ("python-numpy" ,python2-numpy)
1657 ("python-scipy" ,python2-scipy)))
1658 (native-inputs
1659 `(("python-mock" ,python2-mock) ; for tests
1660 ("python-nose" ,python2-nose) ; for tests
1661 ("python-pytz" ,python2-pytz))) ; for tests
1662 (home-page "https://github.com/YeoLab/clipper")
1663 (synopsis "CLIP peak enrichment recognition")
1664 (description
1665 "CLIPper is a tool to define peaks in CLIP-seq datasets.")
1666 (license license:gpl2)))
1667
1668 (define-public codingquarry
1669 (package
1670 (name "codingquarry")
1671 (version "2.0")
1672 (source (origin
1673 (method url-fetch)
1674 (uri (string-append
1675 "mirror://sourceforge/codingquarry/CodingQuarry_v"
1676 version ".tar.gz"))
1677 (sha256
1678 (base32
1679 "0115hkjflsnfzn36xppwf9h9avfxlavr43djqmshkkzbgjzsz60i"))))
1680 (build-system gnu-build-system)
1681 (arguments
1682 '(#:tests? #f ; no "check" target
1683 #:phases
1684 (modify-phases %standard-phases
1685 (delete 'configure)
1686 (replace 'install
1687 (lambda* (#:key outputs #:allow-other-keys)
1688 (let* ((out (assoc-ref outputs "out"))
1689 (bin (string-append out "/bin"))
1690 (doc (string-append out "/share/doc/codingquarry")))
1691 (install-file "INSTRUCTIONS.pdf" doc)
1692 (copy-recursively "QuarryFiles"
1693 (string-append out "/QuarryFiles"))
1694 (install-file "CodingQuarry" bin)
1695 (install-file "CufflinksGTF_to_CodingQuarryGFF3.py" bin)))))))
1696 (inputs `(("openmpi" ,openmpi)))
1697 (native-search-paths
1698 (list (search-path-specification
1699 (variable "QUARRY_PATH")
1700 (files '("QuarryFiles")))))
1701 (native-inputs `(("python" ,python-2))) ; Only Python 2 is supported
1702 (synopsis "Fungal gene predictor")
1703 (description "CodingQuarry is a highly accurate, self-training GHMM fungal
1704 gene predictor designed to work with assembled, aligned RNA-seq transcripts.")
1705 (home-page "https://sourceforge.net/projects/codingquarry/")
1706 (license license:gpl3+)))
1707
1708 (define-public couger
1709 (package
1710 (name "couger")
1711 (version "1.8.2")
1712 (source (origin
1713 (method url-fetch)
1714 (uri (string-append
1715 "http://couger.oit.duke.edu/static/assets/COUGER"
1716 version ".zip"))
1717 (sha256
1718 (base32
1719 "04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq"))))
1720 (build-system gnu-build-system)
1721 (arguments
1722 `(#:tests? #f
1723 #:phases
1724 (modify-phases %standard-phases
1725 (delete 'configure)
1726 (delete 'build)
1727 (replace
1728 'install
1729 (lambda* (#:key outputs #:allow-other-keys)
1730 (let* ((out (assoc-ref outputs "out"))
1731 (bin (string-append out "/bin")))
1732 (copy-recursively "src" (string-append out "/src"))
1733 (mkdir bin)
1734 ;; Add "src" directory to module lookup path.
1735 (substitute* "couger"
1736 (("from argparse")
1737 (string-append "import sys\nsys.path.append(\""
1738 out "\")\nfrom argparse")))
1739 (install-file "couger" bin))
1740 #t))
1741 (add-after
1742 'install 'wrap-program
1743 (lambda* (#:key inputs outputs #:allow-other-keys)
1744 ;; Make sure 'couger' runs with the correct PYTHONPATH.
1745 (let* ((out (assoc-ref outputs "out"))
1746 (path (getenv "PYTHONPATH")))
1747 (wrap-program (string-append out "/bin/couger")
1748 `("PYTHONPATH" ":" prefix (,path))))
1749 #t)))))
1750 (inputs
1751 `(("python" ,python-2)
1752 ("python2-pillow" ,python2-pillow)
1753 ("python2-numpy" ,python2-numpy)
1754 ("python2-scipy" ,python2-scipy)
1755 ("python2-matplotlib" ,python2-matplotlib)))
1756 (propagated-inputs
1757 `(("r-minimal" ,r-minimal)
1758 ("libsvm" ,libsvm)
1759 ("randomjungle" ,randomjungle)))
1760 (native-inputs
1761 `(("unzip" ,unzip)))
1762 (home-page "http://couger.oit.duke.edu")
1763 (synopsis "Identify co-factors in sets of genomic regions")
1764 (description
1765 "COUGER can be applied to any two sets of genomic regions bound by
1766 paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify
1767 putative co-factors that provide specificity to each TF. The framework
1768 determines the genomic targets uniquely-bound by each TF, and identifies a
1769 small set of co-factors that best explain the in vivo binding differences
1770 between the two TFs.
1771
1772 COUGER uses classification algorithms (support vector machines and random
1773 forests) with features that reflect the DNA binding specificities of putative
1774 co-factors. The features are generated either from high-throughput TF-DNA
1775 binding data (from protein binding microarray experiments), or from large
1776 collections of DNA motifs.")
1777 (license license:gpl3+)))
1778
1779 (define-public clustal-omega
1780 (package
1781 (name "clustal-omega")
1782 (version "1.2.4")
1783 (source (origin
1784 (method url-fetch)
1785 (uri (string-append "http://www.clustal.org/omega/clustal-omega-"
1786 version ".tar.gz"))
1787 (sha256
1788 (base32
1789 "1vm30mzncwdv881vrcwg11vzvrsmwy4wg80j5i0lcfk6dlld50w6"))))
1790 (build-system gnu-build-system)
1791 (inputs
1792 `(("argtable" ,argtable)))
1793 (home-page "http://www.clustal.org/omega/")
1794 (synopsis "Multiple sequence aligner for protein and DNA/RNA")
1795 (description
1796 "Clustal-Omega is a general purpose multiple sequence alignment (MSA)
1797 program for protein and DNA/RNA. It produces high quality MSAs and is capable
1798 of handling data-sets of hundreds of thousands of sequences in reasonable
1799 time.")
1800 (license license:gpl2+)))
1801
1802 (define-public crossmap
1803 (package
1804 (name "crossmap")
1805 (version "0.2.1")
1806 (source (origin
1807 (method url-fetch)
1808 (uri (string-append "mirror://sourceforge/crossmap/CrossMap-"
1809 version ".tar.gz"))
1810 (sha256
1811 (base32
1812 "07y179f63d7qnzdvkqcziwk9bs3k4zhp81q392fp1hwszjdvy22f"))
1813 ;; This patch has been sent upstream already and is available
1814 ;; for download from Sourceforge, but it has not been merged.
1815 (patches (search-patches "crossmap-allow-system-pysam.patch"))
1816 (modules '((guix build utils)))
1817 ;; remove bundled copy of pysam
1818 (snippet
1819 '(delete-file-recursively "lib/pysam"))))
1820 (build-system python-build-system)
1821 (arguments
1822 `(#:python ,python-2
1823 #:phases
1824 (modify-phases %standard-phases
1825 (add-after 'unpack 'set-env
1826 (lambda _ (setenv "CROSSMAP_USE_SYSTEM_PYSAM" "1") #t)))))
1827 (inputs
1828 `(("python-numpy" ,python2-numpy)
1829 ("python-pysam" ,python2-pysam)
1830 ("zlib" ,zlib)))
1831 (native-inputs
1832 `(("python-cython" ,python2-cython)
1833 ("python-nose" ,python2-nose)))
1834 (home-page "http://crossmap.sourceforge.net/")
1835 (synopsis "Convert genome coordinates between assemblies")
1836 (description
1837 "CrossMap is a program for conversion of genome coordinates or annotation
1838 files between different genome assemblies. It supports most commonly used
1839 file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
1840 (license license:gpl2+)))
1841
1842 (define-public cutadapt
1843 (package
1844 (name "cutadapt")
1845 (version "1.14")
1846 (source (origin
1847 (method url-fetch)
1848 (uri (string-append
1849 "https://github.com/marcelm/cutadapt/archive/v"
1850 version ".tar.gz"))
1851 (file-name (string-append name "-" version ".tar.gz"))
1852 (sha256
1853 (base32
1854 "16gbpiwy4m48vq2h5wqar3i8vr6vcj9gcl2qvqim19x6ya9dp8kd"))))
1855 (build-system python-build-system)
1856 (arguments
1857 `(#:phases
1858 (modify-phases %standard-phases
1859 ;; The tests must be run after installation.
1860 (delete 'check)
1861 (add-after 'install 'check
1862 (lambda* (#:key inputs outputs #:allow-other-keys)
1863 (setenv "PYTHONPATH"
1864 (string-append
1865 (getenv "PYTHONPATH")
1866 ":" (assoc-ref outputs "out")
1867 "/lib/python"
1868 (string-take (string-take-right
1869 (assoc-ref inputs "python") 5) 3)
1870 "/site-packages"))
1871 (zero? (system* "nosetests" "-P" "tests")))))))
1872 (inputs
1873 `(("python-xopen" ,python-xopen)))
1874 (native-inputs
1875 `(("python-cython" ,python-cython)
1876 ("python-nose" ,python-nose)))
1877 (home-page "https://cutadapt.readthedocs.io/en/stable/")
1878 (synopsis "Remove adapter sequences from nucleotide sequencing reads")
1879 (description
1880 "Cutadapt finds and removes adapter sequences, primers, poly-A tails and
1881 other types of unwanted sequence from high-throughput sequencing reads.")
1882 (license license:expat)))
1883
1884 (define-public libbigwig
1885 (package
1886 (name "libbigwig")
1887 (version "0.1.4")
1888 (source (origin
1889 (method url-fetch)
1890 (uri (string-append "https://github.com/dpryan79/libBigWig/"
1891 "archive/" version ".tar.gz"))
1892 (file-name (string-append name "-" version ".tar.gz"))
1893 (sha256
1894 (base32
1895 "098rjh35pi4a9q83n8wiwvyzykjqj6l8q189p1xgfw4ghywdlvw1"))))
1896 (build-system gnu-build-system)
1897 (arguments
1898 `(#:test-target "test"
1899 #:make-flags
1900 (list "CC=gcc"
1901 (string-append "prefix=" (assoc-ref %outputs "out")))
1902 #:phases
1903 (modify-phases %standard-phases
1904 (delete 'configure)
1905 (add-before 'check 'disable-curl-test
1906 (lambda _
1907 (substitute* "Makefile"
1908 (("./test/testRemote.*") ""))
1909 #t))
1910 ;; This has been fixed with the upstream commit 4ff6959cd8a0, but
1911 ;; there has not yet been a release containing this change.
1912 (add-before 'install 'create-target-dirs
1913 (lambda* (#:key outputs #:allow-other-keys)
1914 (let ((out (assoc-ref outputs "out")))
1915 (mkdir-p (string-append out "/lib"))
1916 (mkdir-p (string-append out "/include"))
1917 #t))))))
1918 (inputs
1919 `(("zlib" ,zlib)
1920 ("curl" ,curl)))
1921 (native-inputs
1922 `(("doxygen" ,doxygen)))
1923 (home-page "https://github.com/dpryan79/libBigWig")
1924 (synopsis "C library for handling bigWig files")
1925 (description
1926 "This package provides a C library for parsing local and remote BigWig
1927 files.")
1928 (license license:expat)))
1929
1930 (define-public python-pybigwig
1931 (package
1932 (name "python-pybigwig")
1933 (version "0.2.5")
1934 (source (origin
1935 (method url-fetch)
1936 (uri (pypi-uri "pyBigWig" version))
1937 (sha256
1938 (base32
1939 "0yrpdxg3y0sny25x4w22lv1k47jzccqjmg7j4bp0hywklvp0hg7d"))
1940 (modules '((guix build utils)))
1941 (snippet
1942 '(begin
1943 ;; Delete bundled libBigWig sources
1944 (delete-file-recursively "libBigWig")))))
1945 (build-system python-build-system)
1946 (arguments
1947 `(#:phases
1948 (modify-phases %standard-phases
1949 (add-after 'unpack 'link-with-libBigWig
1950 (lambda* (#:key inputs #:allow-other-keys)
1951 (substitute* "setup.py"
1952 (("libs=\\[") "libs=[\"BigWig\", "))
1953 #t)))))
1954 (inputs
1955 `(("libbigwig" ,libbigwig)
1956 ("zlib" ,zlib)
1957 ("curl" ,curl)))
1958 (home-page "https://github.com/dpryan79/pyBigWig")
1959 (synopsis "Access bigWig files in Python using libBigWig")
1960 (description
1961 "This package provides Python bindings to the libBigWig library for
1962 accessing bigWig files.")
1963 (license license:expat)))
1964
1965 (define-public python2-pybigwig
1966 (package-with-python2 python-pybigwig))
1967
1968 (define-public python-dendropy
1969 (package
1970 (name "python-dendropy")
1971 (version "4.2.0")
1972 (source
1973 (origin
1974 (method url-fetch)
1975 (uri (pypi-uri "DendroPy" version))
1976 (sha256
1977 (base32
1978 "15c7s3d5gf19ljsxvq5advaa752wfi7pwrdjyhzmg85hccyvp47p"))
1979 (patches (search-patches "python-dendropy-fix-tests.patch"))))
1980 (build-system python-build-system)
1981 (home-page "http://packages.python.org/DendroPy/")
1982 (synopsis "Library for phylogenetics and phylogenetic computing")
1983 (description
1984 "DendroPy is a library for phylogenetics and phylogenetic computing: reading,
1985 writing, simulation, processing and manipulation of phylogenetic
1986 trees (phylogenies) and characters.")
1987 (license license:bsd-3)
1988 (properties `((python2-variant . ,(delay python2-dendropy))))))
1989
1990 (define-public python2-dendropy
1991 (let ((base (package-with-python2 (strip-python2-variant python-dendropy))))
1992 (package
1993 (inherit base)
1994 (arguments
1995 `(#:python ,python-2
1996 #:phases
1997 (modify-phases %standard-phases
1998 (replace 'check
1999 ;; There is currently a test failure that only happens on some
2000 ;; systems, and only using "setup.py test"
2001 (lambda _ (zero? (system* "nosetests")))))))
2002 (native-inputs `(("python2-nose" ,python2-nose)
2003 ,@(package-native-inputs base))))))
2004
2005 (define-public python-py2bit
2006 (package
2007 (name "python-py2bit")
2008 (version "0.2.1")
2009 (source
2010 (origin
2011 (method url-fetch)
2012 (uri (pypi-uri "py2bit" version))
2013 (sha256
2014 (base32
2015 "1cdf4qlmgwsh1f4k0wdv2sr8x9qn4366p0k3614vbd0fpqiarxrl"))))
2016 (build-system python-build-system)
2017 (home-page "https://github.com/dpryan79/py2bit")
2018 (synopsis "Access 2bit files using lib2bit")
2019 (description
2020 "This package provides Python bindings for lib2bit to access 2bit files
2021 with Python.")
2022 (license license:expat)))
2023
2024 (define-public deeptools
2025 (package
2026 (name "deeptools")
2027 (version "2.5.1")
2028 (source (origin
2029 (method url-fetch)
2030 (uri (string-append "https://github.com/fidelram/deepTools/"
2031 "archive/" version ".tar.gz"))
2032 (file-name (string-append name "-" version ".tar.gz"))
2033 (sha256
2034 (base32
2035 "1q8i12l2gvk4n2s8lhyzwhh9g4qbc8lrk5l7maz00yvd5g6z5540"))))
2036 (build-system python-build-system)
2037 (inputs
2038 `(("python-scipy" ,python-scipy)
2039 ("python-numpy" ,python-numpy)
2040 ("python-numpydoc" ,python-numpydoc)
2041 ("python-matplotlib" ,python-matplotlib)
2042 ("python-pysam" ,python-pysam)
2043 ("python-py2bit" ,python-py2bit)
2044 ("python-pybigwig" ,python-pybigwig)))
2045 (native-inputs
2046 `(("python-mock" ,python-mock) ;for tests
2047 ("python-nose" ,python-nose) ;for tests
2048 ("python-pytz" ,python-pytz))) ;for tests
2049 (home-page "https://github.com/fidelram/deepTools")
2050 (synopsis "Tools for normalizing and visualizing deep-sequencing data")
2051 (description
2052 "DeepTools addresses the challenge of handling the large amounts of data
2053 that are now routinely generated from DNA sequencing centers. To do so,
2054 deepTools contains useful modules to process the mapped reads data to create
2055 coverage files in standard bedGraph and bigWig file formats. By doing so,
2056 deepTools allows the creation of normalized coverage files or the comparison
2057 between two files (for example, treatment and control). Finally, using such
2058 normalized and standardized files, multiple visualizations can be created to
2059 identify enrichments with functional annotations of the genome.")
2060 (license license:gpl3+)))
2061
2062 (define-public diamond
2063 (package
2064 (name "diamond")
2065 (version "0.9.13")
2066 (source (origin
2067 (method url-fetch)
2068 (uri (string-append
2069 "https://github.com/bbuchfink/diamond/archive/v"
2070 version ".tar.gz"))
2071 (file-name (string-append name "-" version ".tar.gz"))
2072 (sha256
2073 (base32
2074 "1pi5ncqwmynqpmmp3j3lhnqrjhj34sr6wpmsgrpkv3wyxx22fv86"))))
2075 (build-system cmake-build-system)
2076 (arguments
2077 '(#:tests? #f ; no "check" target
2078 #:phases
2079 (modify-phases %standard-phases
2080 (add-after 'unpack 'remove-native-compilation
2081 (lambda _
2082 (substitute* "CMakeLists.txt" (("-march=native") ""))
2083 #t)))))
2084 (inputs
2085 `(("zlib" ,zlib)))
2086 (home-page "https://github.com/bbuchfink/diamond")
2087 (synopsis "Accelerated BLAST compatible local sequence aligner")
2088 (description
2089 "DIAMOND is a BLAST-compatible local aligner for mapping protein and
2090 translated DNA query sequences against a protein reference database (BLASTP
2091 and BLASTX alignment mode). The speedup over BLAST is up to 20,000 on short
2092 reads at a typical sensitivity of 90-99% relative to BLAST depending on the
2093 data and settings.")
2094 (license license:agpl3+)))
2095
2096 (define-public discrover
2097 (package
2098 (name "discrover")
2099 (version "1.6.0")
2100 (source
2101 (origin
2102 (method url-fetch)
2103 (uri (string-append "https://github.com/maaskola/discrover/archive/"
2104 version ".tar.gz"))
2105 (file-name (string-append name "-" version ".tar.gz"))
2106 (sha256
2107 (base32
2108 "0rah9ja4m0rl5mldd6vag9rwrivw1zrqxssfq8qx64m7961fp68k"))))
2109 (build-system cmake-build-system)
2110 (arguments
2111 `(#:tests? #f ; there are no tests
2112 #:phases
2113 (modify-phases %standard-phases
2114 (add-after 'unpack 'add-missing-includes
2115 (lambda _
2116 (substitute* "src/executioninformation.hpp"
2117 (("#define EXECUTIONINFORMATION_HPP" line)
2118 (string-append line "\n#include <random>")))
2119 (substitute* "src/plasma/fasta.hpp"
2120 (("#define FASTA_HPP" line)
2121 (string-append line "\n#include <random>")))
2122 #t)))))
2123 (inputs
2124 `(("boost" ,boost)
2125 ("cairo" ,cairo)))
2126 (native-inputs
2127 `(("texlive" ,texlive)
2128 ("imagemagick" ,imagemagick)))
2129 (home-page "http://dorina.mdc-berlin.de/public/rajewsky/discrover/")
2130 (synopsis "Discover discriminative nucleotide sequence motifs")
2131 (description "Discrover is a motif discovery method to find binding sites
2132 of nucleic acid binding proteins.")
2133 (license license:gpl3+)))
2134
2135 (define-public eigensoft
2136 (let ((revision "1")
2137 (commit "b14d1e202e21e532536ff8004f0419cd5e259dc7"))
2138 (package
2139 (name "eigensoft")
2140 (version (string-append "6.1.2-"
2141 revision "."
2142 (string-take commit 9)))
2143 (source
2144 (origin
2145 (method git-fetch)
2146 (uri (git-reference
2147 (url "https://github.com/DReichLab/EIG.git")
2148 (commit commit)))
2149 (file-name (string-append "eigensoft-" commit "-checkout"))
2150 (sha256
2151 (base32
2152 "0f5m6k2j5c16xc3xbywcs989xyc26ncy1zfzp9j9n55n9r4xcaiq"))
2153 (modules '((guix build utils)))
2154 ;; Remove pre-built binaries.
2155 (snippet '(begin
2156 (delete-file-recursively "bin")
2157 (mkdir "bin")
2158 #t))))
2159 (build-system gnu-build-system)
2160 (arguments
2161 `(#:tests? #f ; There are no tests.
2162 #:make-flags '("CC=gcc")
2163 #:phases
2164 (modify-phases %standard-phases
2165 ;; There is no configure phase, but the Makefile is in a
2166 ;; sub-directory.
2167 (replace 'configure
2168 (lambda _
2169 (chdir "src")
2170 ;; The link flags are incomplete.
2171 (substitute* "Makefile"
2172 (("-lgsl") "-lgsl -lm -llapack -llapacke -lpthread"))
2173 #t))
2174 ;; The provided install target only copies executables to
2175 ;; the "bin" directory in the build root.
2176 (add-after 'install 'actually-install
2177 (lambda* (#:key outputs #:allow-other-keys)
2178 (let* ((out (assoc-ref outputs "out"))
2179 (bin (string-append out "/bin")))
2180 (for-each (lambda (file)
2181 (install-file file bin))
2182 (find-files "../bin" ".*"))
2183 #t))))))
2184 (inputs
2185 `(("gsl" ,gsl)
2186 ("lapack" ,lapack)
2187 ("openblas" ,openblas)
2188 ("perl" ,perl)
2189 ("gfortran" ,gfortran "lib")))
2190 (home-page "https://github.com/DReichLab/EIG")
2191 (synopsis "Tools for population genetics")
2192 (description "The EIGENSOFT package provides tools for population
2193 genetics and stratification correction. EIGENSOFT implements methods commonly
2194 used in population genetics analyses such as PCA, computation of Tracy-Widom
2195 statistics, and finding related individuals in structured populations. It
2196 comes with a built-in plotting script and supports multiple file formats and
2197 quantitative phenotypes.")
2198 ;; The license of the eigensoft tools is Expat, but since it's
2199 ;; linking with the GNU Scientific Library (GSL) the effective
2200 ;; license is the GPL.
2201 (license license:gpl3+))))
2202
2203 (define-public edirect
2204 (package
2205 (name "edirect")
2206 (version "4.10")
2207 (source (origin
2208 (method url-fetch)
2209 (uri (string-append "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/"
2210 "versions/2016-05-03/edirect.tar.gz"))
2211 (sha256
2212 (base32
2213 "15zsprak5yh8c1yrz4r1knmb5s8qcmdid4xdhkh3lqcv64l60hli"))))
2214 (build-system perl-build-system)
2215 (arguments
2216 `(#:tests? #f ;no "check" target
2217 #:phases
2218 (modify-phases %standard-phases
2219 (delete 'configure)
2220 (delete 'build)
2221 (replace 'install
2222 (lambda* (#:key outputs #:allow-other-keys)
2223 (let ((target (string-append (assoc-ref outputs "out")
2224 "/bin")))
2225 (mkdir-p target)
2226 (install-file "edirect.pl" target)
2227 #t)))
2228 (add-after
2229 'install 'wrap-program
2230 (lambda* (#:key inputs outputs #:allow-other-keys)
2231 ;; Make sure 'edirect.pl' finds all perl inputs at runtime.
2232 (let* ((out (assoc-ref outputs "out"))
2233 (path (getenv "PERL5LIB")))
2234 (wrap-program (string-append out "/bin/edirect.pl")
2235 `("PERL5LIB" ":" prefix (,path)))))))))
2236 (inputs
2237 `(("perl-html-parser" ,perl-html-parser)
2238 ("perl-encode-locale" ,perl-encode-locale)
2239 ("perl-file-listing" ,perl-file-listing)
2240 ("perl-html-tagset" ,perl-html-tagset)
2241 ("perl-html-tree" ,perl-html-tree)
2242 ("perl-http-cookies" ,perl-http-cookies)
2243 ("perl-http-date" ,perl-http-date)
2244 ("perl-http-message" ,perl-http-message)
2245 ("perl-http-negotiate" ,perl-http-negotiate)
2246 ("perl-lwp-mediatypes" ,perl-lwp-mediatypes)
2247 ("perl-lwp-protocol-https" ,perl-lwp-protocol-https)
2248 ("perl-net-http" ,perl-net-http)
2249 ("perl-uri" ,perl-uri)
2250 ("perl-www-robotrules" ,perl-www-robotrules)
2251 ("perl" ,perl)))
2252 (home-page "http://www.ncbi.nlm.nih.gov/books/NBK179288/")
2253 (synopsis "Tools for accessing the NCBI's set of databases")
2254 (description
2255 "Entrez Direct (EDirect) is a method for accessing the National Center
2256 for Biotechnology Information's (NCBI) set of interconnected
2257 databases (publication, sequence, structure, gene, variation, expression,
2258 etc.) from a terminal. Functions take search terms from command-line
2259 arguments. Individual operations are combined to build multi-step queries.
2260 Record retrieval and formatting normally complete the process.
2261
2262 EDirect also provides an argument-driven function that simplifies the
2263 extraction of data from document summaries or other results that are returned
2264 in structured XML format. This can eliminate the need for writing custom
2265 software to answer ad hoc questions.")
2266 (license license:public-domain)))
2267
2268 (define-public exonerate
2269 (package
2270 (name "exonerate")
2271 (version "2.4.0")
2272 (source
2273 (origin
2274 (method url-fetch)
2275 (uri
2276 (string-append
2277 "http://ftp.ebi.ac.uk/pub/software/vertebrategenomics/exonerate/"
2278 "exonerate-" version ".tar.gz"))
2279 (sha256
2280 (base32
2281 "0hj0m9xygiqsdxvbg79wq579kbrx1mdrabi2bzqz2zn9qwfjcjgq"))))
2282 (build-system gnu-build-system)
2283 (arguments
2284 `(#:parallel-build? #f)) ; Building in parallel fails on some machines.
2285 (native-inputs
2286 `(("pkg-config" ,pkg-config)))
2287 (inputs
2288 `(("glib" ,glib)))
2289 (home-page
2290 "https://www.ebi.ac.uk/about/vertebrate-genomics/software/exonerate")
2291 (synopsis "Generic tool for biological sequence alignment")
2292 (description
2293 "Exonerate is a generic tool for pairwise sequence comparison. It allows
2294 the alignment of sequences using a many alignment models, either exhaustive
2295 dynamic programming or a variety of heuristics.")
2296 (license license:gpl3)))
2297
2298 (define-public express
2299 (package
2300 (name "express")
2301 (version "1.5.1")
2302 (source (origin
2303 (method url-fetch)
2304 (uri
2305 (string-append
2306 "http://bio.math.berkeley.edu/eXpress/downloads/express-"
2307 version "/express-" version "-src.tgz"))
2308 (sha256
2309 (base32
2310 "03rczxd0gjp2l1jxcmjfmf5j94j77zqyxa6x063zsc585nj40n0c"))))
2311 (build-system cmake-build-system)
2312 (arguments
2313 `(#:tests? #f ;no "check" target
2314 #:phases
2315 (modify-phases %standard-phases
2316 (add-after 'unpack 'use-shared-boost-libs-and-set-bamtools-paths
2317 (lambda* (#:key inputs #:allow-other-keys)
2318 (substitute* "CMakeLists.txt"
2319 (("set\\(Boost_USE_STATIC_LIBS ON\\)")
2320 "set(Boost_USE_STATIC_LIBS OFF)")
2321 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
2322 (string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
2323 (substitute* "src/CMakeLists.txt"
2324 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
2325 (string-append (assoc-ref inputs "bamtools") "/lib/bamtools")))
2326 #t)))))
2327 (inputs
2328 `(("boost" ,boost)
2329 ("bamtools" ,bamtools)
2330 ("protobuf" ,protobuf)
2331 ("zlib" ,zlib)))
2332 (home-page "http://bio.math.berkeley.edu/eXpress")
2333 (synopsis "Streaming quantification for high-throughput genomic sequencing")
2334 (description
2335 "eXpress is a streaming tool for quantifying the abundances of a set of
2336 target sequences from sampled subsequences. Example applications include
2337 transcript-level RNA-Seq quantification, allele-specific/haplotype expression
2338 analysis (from RNA-Seq), transcription factor binding quantification in
2339 ChIP-Seq, and analysis of metagenomic data.")
2340 (license license:artistic2.0)))
2341
2342 (define-public express-beta-diversity
2343 (package
2344 (name "express-beta-diversity")
2345 (version "1.0.7")
2346 (source (origin
2347 (method url-fetch)
2348 (uri
2349 (string-append
2350 "https://github.com/dparks1134/ExpressBetaDiversity/archive/v"
2351 version ".tar.gz"))
2352 (file-name (string-append name "-" version ".tar.gz"))
2353 (sha256
2354 (base32
2355 "1djvdlmqvjf6h0zq7w36y8cl5cli6rgj86x65znl48agnwmzxfxr"))))
2356 (build-system gnu-build-system)
2357 (arguments
2358 `(#:phases
2359 (modify-phases %standard-phases
2360 (delete 'configure)
2361 (add-before 'build 'enter-source (lambda _ (chdir "source") #t))
2362 (replace 'check
2363 (lambda _ (zero? (system* "../bin/ExpressBetaDiversity"
2364 "-u"))))
2365 (add-after 'check 'exit-source (lambda _ (chdir "..") #t))
2366 (replace 'install
2367 (lambda* (#:key outputs #:allow-other-keys)
2368 (let ((bin (string-append (assoc-ref outputs "out")
2369 "/bin")))
2370 (mkdir-p bin)
2371 (install-file "scripts/convertToEBD.py" bin)
2372 (install-file "bin/ExpressBetaDiversity" bin)
2373 #t))))))
2374 (inputs
2375 `(("python" ,python-2)))
2376 (home-page "http://kiwi.cs.dal.ca/Software/ExpressBetaDiversity")
2377 (synopsis "Taxon- and phylogenetic-based beta diversity measures")
2378 (description
2379 "Express Beta Diversity (EBD) calculates ecological beta diversity
2380 (dissimilarity) measures between biological communities. EBD implements a
2381 variety of diversity measures including those that make use of phylogenetic
2382 similarity of community members.")
2383 (license license:gpl3+)))
2384
2385 (define-public fasttree
2386 (package
2387 (name "fasttree")
2388 (version "2.1.10")
2389 (source (origin
2390 (method url-fetch)
2391 (uri (string-append
2392 "http://www.microbesonline.org/fasttree/FastTree-"
2393 version ".c"))
2394 (sha256
2395 (base32
2396 "0vcjdvy1j4m702vmak4svbfkrpcw63k7wymfksjp9a982zy8kjsl"))))
2397 (build-system gnu-build-system)
2398 (arguments
2399 `(#:tests? #f ; no "check" target
2400 #:phases
2401 (modify-phases %standard-phases
2402 (delete 'unpack)
2403 (delete 'configure)
2404 (replace 'build
2405 (lambda* (#:key source #:allow-other-keys)
2406 (and (zero? (system* "gcc"
2407 "-O3"
2408 "-finline-functions"
2409 "-funroll-loops"
2410 "-Wall"
2411 "-o"
2412 "FastTree"
2413 source
2414 "-lm"))
2415 (zero? (system* "gcc"
2416 "-DOPENMP"
2417 "-fopenmp"
2418 "-O3"
2419 "-finline-functions"
2420 "-funroll-loops"
2421 "-Wall"
2422 "-o"
2423 "FastTreeMP"
2424 source
2425 "-lm")))))
2426 (replace 'install
2427 (lambda* (#:key outputs #:allow-other-keys)
2428 (let ((bin (string-append (assoc-ref outputs "out")
2429 "/bin")))
2430 (mkdir-p bin)
2431 (install-file "FastTree" bin)
2432 (install-file "FastTreeMP" bin)
2433 #t))))))
2434 (home-page "http://www.microbesonline.org/fasttree")
2435 (synopsis "Infers approximately-maximum-likelihood phylogenetic trees")
2436 (description
2437 "FastTree can handle alignments with up to a million of sequences in a
2438 reasonable amount of time and memory. For large alignments, FastTree is
2439 100-1,000 times faster than PhyML 3.0 or RAxML 7.")
2440 (license license:gpl2+)))
2441
2442 (define-public fastx-toolkit
2443 (package
2444 (name "fastx-toolkit")
2445 (version "0.0.14")
2446 (source (origin
2447 (method url-fetch)
2448 (uri
2449 (string-append
2450 "https://github.com/agordon/fastx_toolkit/releases/download/"
2451 version "/fastx_toolkit-" version ".tar.bz2"))
2452 (sha256
2453 (base32
2454 "01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy"))))
2455 (build-system gnu-build-system)
2456 (inputs
2457 `(("libgtextutils" ,libgtextutils)))
2458 (native-inputs
2459 `(("pkg-config" ,pkg-config)))
2460 (home-page "http://hannonlab.cshl.edu/fastx_toolkit/")
2461 (synopsis "Tools for FASTA/FASTQ file preprocessing")
2462 (description
2463 "The FASTX-Toolkit is a collection of command line tools for Short-Reads
2464 FASTA/FASTQ files preprocessing.
2465
2466 Next-Generation sequencing machines usually produce FASTA or FASTQ files,
2467 containing multiple short-reads sequences. The main processing of such
2468 FASTA/FASTQ files is mapping the sequences to reference genomes. However, it
2469 is sometimes more productive to preprocess the files before mapping the
2470 sequences to the genome---manipulating the sequences to produce better mapping
2471 results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
2472 (license license:agpl3+)))
2473
2474 (define-public flexbar
2475 (package
2476 (name "flexbar")
2477 (version "2.5")
2478 (source (origin
2479 (method url-fetch)
2480 (uri
2481 (string-append "mirror://sourceforge/flexbar/"
2482 version "/flexbar_v" version "_src.tgz"))
2483 (sha256
2484 (base32
2485 "13jaykc3y1x8y5nn9j8ljnb79s5y51kyxz46hdmvvjj6qhyympmf"))))
2486 (build-system cmake-build-system)
2487 (arguments
2488 `(#:configure-flags (list
2489 (string-append "-DFLEXBAR_BINARY_DIR="
2490 (assoc-ref %outputs "out")
2491 "/bin/"))
2492 #:phases
2493 (modify-phases %standard-phases
2494 (replace 'check
2495 (lambda* (#:key outputs #:allow-other-keys)
2496 (setenv "PATH" (string-append
2497 (assoc-ref outputs "out") "/bin:"
2498 (getenv "PATH")))
2499 (chdir "../flexbar_v2.5_src/test")
2500 (zero? (system* "bash" "flexbar_validate.sh"))))
2501 (delete 'install))))
2502 (inputs
2503 `(("tbb" ,tbb)
2504 ("zlib" ,zlib)))
2505 (native-inputs
2506 `(("pkg-config" ,pkg-config)
2507 ("seqan" ,seqan)))
2508 (home-page "http://flexbar.sourceforge.net")
2509 (synopsis "Barcode and adapter removal tool for sequencing platforms")
2510 (description
2511 "Flexbar preprocesses high-throughput nucleotide sequencing data
2512 efficiently. It demultiplexes barcoded runs and removes adapter sequences.
2513 Moreover, trimming and filtering features are provided. Flexbar increases
2514 read mapping rates and improves genome and transcriptome assemblies. It
2515 supports next-generation sequencing data in fasta/q and csfasta/q format from
2516 Illumina, Roche 454, and the SOLiD platform.")
2517 (license license:gpl3)))
2518
2519 (define-public fraggenescan
2520 (package
2521 (name "fraggenescan")
2522 (version "1.30")
2523 (source
2524 (origin
2525 (method url-fetch)
2526 (uri
2527 (string-append "mirror://sourceforge/fraggenescan/"
2528 "FragGeneScan" version ".tar.gz"))
2529 (sha256
2530 (base32 "158dcnwczgcyhwm4qlx19sanrwgdpzf6bn2y57mbpx55lkgz1mzj"))))
2531 (build-system gnu-build-system)
2532 (arguments
2533 `(#:phases
2534 (modify-phases %standard-phases
2535 (delete 'configure)
2536 (add-before 'build 'patch-paths
2537 (lambda* (#:key outputs #:allow-other-keys)
2538 (let* ((out (string-append (assoc-ref outputs "out")))
2539 (share (string-append out "/share/fraggenescan/")))
2540 (substitute* "run_FragGeneScan.pl"
2541 (("system\\(\"rm")
2542 (string-append "system(\"" (which "rm")))
2543 (("system\\(\"mv")
2544 (string-append "system(\"" (which "mv")))
2545 (("\\\"awk") (string-append "\"" (which "awk")))
2546 ;; This script and other programs expect the training files
2547 ;; to be in the non-standard location bin/train/XXX. Change
2548 ;; this to be share/fraggenescan/train/XXX instead.
2549 (("^\\$train.file = \\$dir.*")
2550 (string-append "$train_file = \""
2551 share
2552 "train/\".$FGS_train_file;")))
2553 (substitute* "run_hmm.c"
2554 (("^ strcat\\(train_dir, \\\"train/\\\"\\);")
2555 (string-append " strcpy(train_dir, \"" share "/train/\");"))))
2556 #t))
2557 (replace 'build
2558 (lambda _ (and (zero? (system* "make" "clean"))
2559 (zero? (system* "make" "fgs")))))
2560 (replace 'install
2561 (lambda* (#:key outputs #:allow-other-keys)
2562 (let* ((out (string-append (assoc-ref outputs "out")))
2563 (bin (string-append out "/bin/"))
2564 (share (string-append out "/share/fraggenescan/train")))
2565 (install-file "run_FragGeneScan.pl" bin)
2566 (install-file "FragGeneScan" bin)
2567 (copy-recursively "train" share))))
2568 (delete 'check)
2569 (add-after 'install 'post-install-check
2570 ;; In lieu of 'make check', run one of the examples and check the
2571 ;; output files gets created.
2572 (lambda* (#:key outputs #:allow-other-keys)
2573 (let* ((out (string-append (assoc-ref outputs "out")))
2574 (bin (string-append out "/bin/"))
2575 (frag (string-append bin "run_FragGeneScan.pl")))
2576 (and (zero? (system* frag ; Test complete genome.
2577 "-genome=./example/NC_000913.fna"
2578 "-out=./test2"
2579 "-complete=1"
2580 "-train=complete"))
2581 (file-exists? "test2.faa")
2582 (file-exists? "test2.ffn")
2583 (file-exists? "test2.gff")
2584 (file-exists? "test2.out")
2585 (zero? (system* ; Test incomplete sequences.
2586 frag
2587 "-genome=./example/NC_000913-fgs.ffn"
2588 "-out=out"
2589 "-complete=0"
2590 "-train=454_30")))))))))
2591 (inputs
2592 `(("perl" ,perl)
2593 ("python" ,python-2))) ;not compatible with python 3.
2594 (home-page "https://sourceforge.net/projects/fraggenescan/")
2595 (synopsis "Finds potentially fragmented genes in short reads")
2596 (description
2597 "FragGeneScan is a program for predicting bacterial and archaeal genes in
2598 short and error-prone DNA sequencing reads. It can also be applied to predict
2599 genes in incomplete assemblies or complete genomes.")
2600 ;; GPL3+ according to private correspondense with the authors.
2601 (license license:gpl3+)))
2602
2603 (define-public fxtract
2604 (let ((util-commit "776ca85a18a47492af3794745efcb4a905113115"))
2605 (package
2606 (name "fxtract")
2607 (version "2.3")
2608 (source
2609 (origin
2610 (method url-fetch)
2611 (uri (string-append
2612 "https://github.com/ctSkennerton/fxtract/archive/"
2613 version ".tar.gz"))
2614 (file-name (string-append "ctstennerton-util-"
2615 (string-take util-commit 7)
2616 "-checkout"))
2617 (sha256
2618 (base32
2619 "0275cfdhis8517hm01is62062swmi06fxzifq7mr3knbbxjlaiwj"))))
2620 (build-system gnu-build-system)
2621 (arguments
2622 `(#:make-flags (list
2623 (string-append "PREFIX=" (assoc-ref %outputs "out"))
2624 "CC=gcc")
2625 #:test-target "fxtract_test"
2626 #:phases
2627 (modify-phases %standard-phases
2628 (delete 'configure)
2629 (add-before 'build 'copy-util
2630 (lambda* (#:key inputs #:allow-other-keys)
2631 (rmdir "util")
2632 (copy-recursively (assoc-ref inputs "ctskennerton-util") "util")
2633 #t))
2634 ;; Do not use make install as this requires additional dependencies.
2635 (replace 'install
2636 (lambda* (#:key outputs #:allow-other-keys)
2637 (let* ((out (assoc-ref outputs "out"))
2638 (bin (string-append out"/bin")))
2639 (install-file "fxtract" bin)
2640 #t))))))
2641 (inputs
2642 `(("pcre" ,pcre)
2643 ("zlib" ,zlib)))
2644 (native-inputs
2645 ;; ctskennerton-util is licensed under GPL2.
2646 `(("ctskennerton-util"
2647 ,(origin
2648 (method git-fetch)
2649 (uri (git-reference
2650 (url "https://github.com/ctSkennerton/util.git")
2651 (commit util-commit)))
2652 (file-name (string-append
2653 "ctstennerton-util-" util-commit "-checkout"))
2654 (sha256
2655 (base32
2656 "0cls1hd4vgj3f36fpzzg4xc77d6f3hpc60cbpfmn2gdr7ykzzad7"))))))
2657 (home-page "https://github.com/ctSkennerton/fxtract")
2658 (synopsis "Extract sequences from FASTA and FASTQ files")
2659 (description
2660 "Fxtract extracts sequences from a protein or nucleotide fastx (FASTA
2661 or FASTQ) file given a subsequence. It uses a simple substring search for
2662 basic tasks but can change to using POSIX regular expressions, PCRE, hash
2663 lookups or multi-pattern searching as required. By default fxtract looks in
2664 the sequence of each record but can also be told to look in the header,
2665 comment or quality sections.")
2666 ;; 'util' requires SSE instructions.
2667 (supported-systems '("x86_64-linux"))
2668 (license license:expat))))
2669
2670 (define-public gemma
2671 (package
2672 (name "gemma")
2673 (version "0.96")
2674 (source (origin
2675 (method url-fetch)
2676 (uri (string-append "https://github.com/xiangzhou/GEMMA/archive/v"
2677 version ".tar.gz"))
2678 (file-name (string-append name "-" version ".tar.gz"))
2679 (sha256
2680 (base32
2681 "055ynn16gd12pf78n4vr2a9jlwsbwzajpdnf2y2yilg1krfff222"))
2682 (patches (search-patches "gemma-intel-compat.patch"))))
2683 (inputs
2684 `(("gsl" ,gsl)
2685 ("lapack" ,lapack)
2686 ("zlib" ,zlib)))
2687 (build-system gnu-build-system)
2688 (arguments
2689 `(#:make-flags
2690 '(,@(match (%current-system)
2691 ("x86_64-linux"
2692 '("FORCE_DYNAMIC=1"))
2693 ("i686-linux"
2694 '("FORCE_DYNAMIC=1" "FORCE_32BIT=1"))
2695 (_
2696 '("FORCE_DYNAMIC=1" "NO_INTEL_COMPAT=1"))))
2697 #:phases
2698 (modify-phases %standard-phases
2699 (delete 'configure)
2700 (add-before 'build 'bin-mkdir
2701 (lambda _
2702 (mkdir-p "bin")
2703 #t))
2704 (replace 'install
2705 (lambda* (#:key outputs #:allow-other-keys)
2706 (let ((out (assoc-ref outputs "out")))
2707 (install-file "bin/gemma"
2708 (string-append
2709 out "/bin")))
2710 #t)))
2711 #:tests? #f)) ; no tests included yet
2712 (home-page "https://github.com/xiangzhou/GEMMA")
2713 (synopsis "Tool for genome-wide efficient mixed model association")
2714 (description
2715 "Genome-wide Efficient Mixed Model Association (GEMMA) provides a
2716 standard linear mixed model resolver with application in genome-wide
2717 association studies (GWAS).")
2718 (license license:gpl3)))
2719
2720 (define-public grit
2721 (package
2722 (name "grit")
2723 (version "2.0.2")
2724 (source (origin
2725 (method url-fetch)
2726 (uri (string-append
2727 "https://github.com/nboley/grit/archive/"
2728 version ".tar.gz"))
2729 (file-name (string-append name "-" version ".tar.gz"))
2730 (sha256
2731 (base32
2732 "157in84dj70wimbind3x7sy1whs3h57qfgcnj2s6lrd38fbrb7mj"))))
2733 (build-system python-build-system)
2734 (arguments
2735 `(#:python ,python-2
2736 #:phases
2737 (modify-phases %standard-phases
2738 (add-after 'unpack 'generate-from-cython-sources
2739 (lambda* (#:key inputs outputs #:allow-other-keys)
2740 ;; Delete these C files to force fresh generation from pyx sources.
2741 (delete-file "grit/sparsify_support_fns.c")
2742 (delete-file "grit/call_peaks_support_fns.c")
2743 (substitute* "setup.py"
2744 (("Cython.Setup") "Cython.Build")
2745 ;; Add numpy include path to fix compilation
2746 (("pyx\", \\]")
2747 (string-append "pyx\", ], include_dirs = ['"
2748 (assoc-ref inputs "python-numpy")
2749 "/lib/python2.7/site-packages/numpy/core/include/"
2750 "']")))
2751 #t)))))
2752 (inputs
2753 `(("python-scipy" ,python2-scipy)
2754 ("python-numpy" ,python2-numpy)
2755 ("python-pysam" ,python2-pysam)
2756 ("python-networkx" ,python2-networkx)))
2757 (native-inputs
2758 `(("python-cython" ,python2-cython)))
2759 (home-page "http://grit-bio.org")
2760 (synopsis "Tool for integrative analysis of RNA-seq type assays")
2761 (description
2762 "GRIT is designed to use RNA-seq, TES, and TSS data to build and quantify
2763 full length transcript models. When none of these data sources are available,
2764 GRIT can be run by providing a candidate set of TES or TSS sites. In
2765 addition, GRIT can merge in reference junctions and gene boundaries. GRIT can
2766 also be run in quantification mode, where it uses a provided GTF file and just
2767 estimates transcript expression.")
2768 (license license:gpl3+)))
2769
2770 (define-public hisat
2771 (package
2772 (name "hisat")
2773 (version "0.1.4")
2774 (source (origin
2775 (method url-fetch)
2776 (uri (string-append
2777 "http://ccb.jhu.edu/software/hisat/downloads/hisat-"
2778 version "-beta-source.zip"))
2779 (sha256
2780 (base32
2781 "1k381ydranqxp09yf2y7w1d0chz5d59vb6jchi89hbb0prq19lk5"))))
2782 (build-system gnu-build-system)
2783 (arguments
2784 `(#:tests? #f ;no check target
2785 #:make-flags '("allall"
2786 ;; Disable unsupported `popcnt' instructions on
2787 ;; architectures other than x86_64
2788 ,@(if (string-prefix? "x86_64"
2789 (or (%current-target-system)
2790 (%current-system)))
2791 '()
2792 '("POPCNT_CAPABILITY=0")))
2793 #:phases
2794 (modify-phases %standard-phases
2795 (add-after 'unpack 'patch-sources
2796 (lambda _
2797 ;; XXX Cannot use snippet because zip files are not supported
2798 (substitute* "Makefile"
2799 (("^CC = .*$") "CC = gcc")
2800 (("^CPP = .*$") "CPP = g++")
2801 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
2802 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
2803 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
2804 (substitute* '("hisat-build" "hisat-inspect")
2805 (("/usr/bin/env") (which "env")))
2806 #t))
2807 (replace 'install
2808 (lambda* (#:key outputs #:allow-other-keys)
2809 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
2810 (for-each (lambda (file)
2811 (install-file file bin))
2812 (find-files
2813 "."
2814 "hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$")))
2815 #t))
2816 (delete 'configure))))
2817 (native-inputs
2818 `(("unzip" ,unzip)))
2819 (inputs
2820 `(("perl" ,perl)
2821 ("python" ,python)
2822 ("zlib" ,zlib)))
2823 ;; Non-portable SSE instructions are used so building fails on platforms
2824 ;; other than x86_64.
2825 (supported-systems '("x86_64-linux"))
2826 (home-page "http://ccb.jhu.edu/software/hisat/index.shtml")
2827 (synopsis "Hierarchical indexing for spliced alignment of transcripts")
2828 (description
2829 "HISAT is a fast and sensitive spliced alignment program for mapping
2830 RNA-seq reads. In addition to one global FM index that represents a whole
2831 genome, HISAT uses a large set of small FM indexes that collectively cover the
2832 whole genome. These small indexes (called local indexes) combined with
2833 several alignment strategies enable effective alignment of RNA-seq reads, in
2834 particular, reads spanning multiple exons.")
2835 (license license:gpl3+)))
2836
2837 (define-public hisat2
2838 (package
2839 (name "hisat2")
2840 (version "2.0.5")
2841 (source
2842 (origin
2843 (method url-fetch)
2844 ;; FIXME: a better source URL is
2845 ;; (string-append "ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2"
2846 ;; "/downloads/hisat2-" version "-source.zip")
2847 ;; with hash "0lywnr8kijwsc2aw10dwxic0n0yvip6fl3rjlvc8zzwahamy4x7g"
2848 ;; but it is currently unavailable.
2849 (uri "https://github.com/infphilo/hisat2/archive/cba6e8cb.tar.gz")
2850 (file-name (string-append name "-" version ".tar.gz"))
2851 (sha256
2852 (base32
2853 "1mf2hdsyv7cd97xm9mp9a4qws02yrj95y6w6f6cdwnq0klp81r50"))))
2854 (build-system gnu-build-system)
2855 (arguments
2856 `(#:tests? #f ; no check target
2857 #:make-flags (list "CC=gcc" "CXX=g++" "allall")
2858 #:modules ((guix build gnu-build-system)
2859 (guix build utils)
2860 (srfi srfi-26))
2861 #:phases
2862 (modify-phases %standard-phases
2863 (add-after 'unpack 'make-deterministic
2864 (lambda _
2865 (substitute* "Makefile"
2866 (("`date`") "0"))
2867 #t))
2868 (delete 'configure)
2869 (replace 'install
2870 (lambda* (#:key outputs #:allow-other-keys)
2871 (let* ((out (assoc-ref outputs "out"))
2872 (bin (string-append out "/bin/"))
2873 (doc (string-append out "/share/doc/hisat2/")))
2874 (for-each
2875 (cut install-file <> bin)
2876 (find-files "."
2877 "hisat2(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))
2878 (mkdir-p doc)
2879 (install-file "doc/manual.inc.html" doc))
2880 #t)))))
2881 (native-inputs
2882 `(("unzip" ,unzip) ; needed for archive from ftp
2883 ("perl" ,perl)
2884 ("pandoc" ,ghc-pandoc))) ; for documentation
2885 (home-page "http://ccb.jhu.edu/software/hisat2/index.shtml")
2886 (synopsis "Graph-based alignment of genomic sequencing reads")
2887 (description "HISAT2 is a fast and sensitive alignment program for mapping
2888 next-generation sequencing reads (both DNA and RNA) to a population of human
2889 genomes (as well as to a single reference genome). In addition to using one
2890 global @dfn{graph FM} (GFM) index that represents a population of human
2891 genomes, HISAT2 uses a large set of small GFM indexes that collectively cover
2892 the whole genome. These small indexes, combined with several alignment
2893 strategies, enable rapid and accurate alignment of sequencing reads. This new
2894 indexing scheme is called a @dfn{Hierarchical Graph FM index} (HGFM).")
2895 ;; HISAT2 contains files from Bowtie2, which is released under
2896 ;; GPLv2 or later. The HISAT2 source files are released under
2897 ;; GPLv3 or later.
2898 (license license:gpl3+)))
2899
2900 (define-public hmmer
2901 (package
2902 (name "hmmer")
2903 (version "3.1b2")
2904 (source
2905 (origin
2906 (method url-fetch)
2907 (uri (string-append
2908 "http://eddylab.org/software/hmmer"
2909 (version-prefix version 1) "/"
2910 version "/hmmer-" version ".tar.gz"))
2911 (sha256
2912 (base32
2913 "0djmgc0pfli0jilfx8hql1axhwhqxqb8rxg2r5rg07aw73sfs5nx"))
2914 (patches (search-patches "hmmer-remove-cpu-specificity.patch"))))
2915 (build-system gnu-build-system)
2916 (native-inputs `(("perl" ,perl)))
2917 (home-page "http://hmmer.org/")
2918 (synopsis "Biosequence analysis using profile hidden Markov models")
2919 (description
2920 "HMMER is used for searching sequence databases for homologs of protein
2921 sequences, and for making protein sequence alignments. It implements methods
2922 using probabilistic models called profile hidden Markov models (profile
2923 HMMs).")
2924 (license (list license:gpl3+
2925 ;; The bundled library 'easel' is distributed
2926 ;; under The Janelia Farm Software License.
2927 (license:non-copyleft
2928 "file://easel/LICENSE"
2929 "See easel/LICENSE in the distribution.")))))
2930
2931 (define-public htseq
2932 (package
2933 (name "htseq")
2934 (version "0.9.1")
2935 (source (origin
2936 (method url-fetch)
2937 (uri (pypi-uri "HTSeq" version))
2938 (sha256
2939 (base32
2940 "11flgb1381xdhk43bzbfm3vhnszkpqg6jk76rpa5xd1zbrvvlnxg"))))
2941 (build-system python-build-system)
2942 (native-inputs
2943 `(("python-cython" ,python-cython)))
2944 ;; Numpy needs to be propagated when htseq is used as a Python library.
2945 (propagated-inputs
2946 `(("python-numpy" ,python-numpy)))
2947 (inputs
2948 `(("python-pysam" ,python-pysam)
2949 ("python-matplotlib" ,python-matplotlib)))
2950 (home-page "http://www-huber.embl.de/users/anders/HTSeq/")
2951 (synopsis "Analysing high-throughput sequencing data with Python")
2952 (description
2953 "HTSeq is a Python package that provides infrastructure to process data
2954 from high-throughput sequencing assays.")
2955 (license license:gpl3+)))
2956
2957 (define-public python2-htseq
2958 (package-with-python2 htseq))
2959
2960 (define-public java-htsjdk
2961 (package
2962 (name "java-htsjdk")
2963 (version "1.129")
2964 (source (origin
2965 (method url-fetch)
2966 (uri (string-append
2967 "https://github.com/samtools/htsjdk/archive/"
2968 version ".tar.gz"))
2969 (file-name (string-append name "-" version ".tar.gz"))
2970 (sha256
2971 (base32
2972 "0asdk9b8jx2ij7yd6apg9qx03li8q7z3ml0qy2r2qczkra79y6fw"))
2973 (modules '((guix build utils)))
2974 ;; remove build dependency on git
2975 (snippet '(substitute* "build.xml"
2976 (("failifexecutionfails=\"true\"")
2977 "failifexecutionfails=\"false\"")))))
2978 (build-system ant-build-system)
2979 (arguments
2980 `(#:tests? #f ; test require Internet access
2981 #:make-flags
2982 (list (string-append "-Ddist=" (assoc-ref %outputs "out")
2983 "/share/java/htsjdk/"))
2984 #:build-target "all"
2985 #:phases
2986 (modify-phases %standard-phases
2987 ;; The build phase also installs the jars
2988 (delete 'install))))
2989 (home-page "http://samtools.github.io/htsjdk/")
2990 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
2991 (description
2992 "HTSJDK is an implementation of a unified Java library for accessing
2993 common file formats, such as SAM and VCF, used for high-throughput
2994 sequencing (HTS) data. There are also an number of useful utilities for
2995 manipulating HTS data.")
2996 (license license:expat)))
2997
2998 (define-public htslib
2999 (package
3000 (name "htslib")
3001 (version "1.5")
3002 (source (origin
3003 (method url-fetch)
3004 (uri (string-append
3005 "https://github.com/samtools/htslib/releases/download/"
3006 version "/htslib-" version ".tar.bz2"))
3007 (sha256
3008 (base32
3009 "0bcjmnbwp2bib1z1bkrp95w9v2syzdwdfqww10mkb1hxlmg52ax0"))))
3010 (build-system gnu-build-system)
3011 (arguments
3012 `(#:phases
3013 (modify-phases %standard-phases
3014 (add-after
3015 'unpack 'patch-tests
3016 (lambda _
3017 (substitute* "test/test.pl"
3018 (("/bin/bash") (which "bash")))
3019 #t)))))
3020 (inputs
3021 `(("openssl" ,openssl)
3022 ("curl" ,curl)
3023 ("zlib" ,zlib)))
3024 (native-inputs
3025 `(("perl" ,perl)))
3026 (home-page "http://www.htslib.org")
3027 (synopsis "C library for reading/writing high-throughput sequencing data")
3028 (description
3029 "HTSlib is a C library for reading/writing high-throughput sequencing
3030 data. It also provides the bgzip, htsfile, and tabix utilities.")
3031 ;; Files under cram/ are released under the modified BSD license;
3032 ;; the rest is released under the Expat license
3033 (license (list license:expat license:bsd-3))))
3034
3035 ;; This package should be removed once no packages rely upon it.
3036 (define htslib-1.3
3037 (package
3038 (inherit htslib)
3039 (version "1.3.1")
3040 (source (origin
3041 (method url-fetch)
3042 (uri (string-append
3043 "https://github.com/samtools/htslib/releases/download/"
3044 version "/htslib-" version ".tar.bz2"))
3045 (sha256
3046 (base32
3047 "1rja282fwdc25ql6izkhdyh8ppw8x2fs0w0js78zgkmqjlikmma9"))))))
3048
3049 (define-public idr
3050 (package
3051 (name "idr")
3052 (version "2.0.0")
3053 (source (origin
3054 (method url-fetch)
3055 (uri (string-append
3056 "https://github.com/nboley/idr/archive/"
3057 version ".tar.gz"))
3058 (file-name (string-append name "-" version ".tar.gz"))
3059 (sha256
3060 (base32
3061 "1k3x44biak00aiv3hpm1yd6nn4hhp7n0qnbs3zh2q9sw7qr1qj5r"))))
3062 (build-system python-build-system)
3063 (arguments
3064 `(#:tests? #f)) ; FIXME: "ImportError: No module named 'utility'"
3065 (propagated-inputs
3066 `(("python-scipy" ,python-scipy)
3067 ("python-sympy" ,python-sympy)
3068 ("python-numpy" ,python-numpy)
3069 ("python-matplotlib" ,python-matplotlib)))
3070 (native-inputs
3071 `(("python-cython" ,python-cython)))
3072 (home-page "https://github.com/nboley/idr")
3073 (synopsis "Tool to measure the irreproducible discovery rate (IDR)")
3074 (description
3075 "The IDR (Irreproducible Discovery Rate) framework is a unified approach
3076 to measure the reproducibility of findings identified from replicate
3077 experiments and provide highly stable thresholds based on reproducibility.")
3078 (license license:gpl3+)))
3079
3080 (define-public jellyfish
3081 (package
3082 (name "jellyfish")
3083 (version "2.2.4")
3084 (source (origin
3085 (method url-fetch)
3086 (uri (string-append "https://github.com/gmarcais/Jellyfish/"
3087 "releases/download/v" version
3088 "/jellyfish-" version ".tar.gz"))
3089 (sha256
3090 (base32
3091 "0a6xnynqy2ibfbfz86b9g2m2dgm7f1469pmymkpam333gi3p26nk"))))
3092 (build-system gnu-build-system)
3093 (outputs '("out" ;for library
3094 "ruby" ;for Ruby bindings
3095 "python")) ;for Python bindings
3096 (arguments
3097 `(#:configure-flags
3098 (list (string-append "--enable-ruby-binding="
3099 (assoc-ref %outputs "ruby"))
3100 (string-append "--enable-python-binding="
3101 (assoc-ref %outputs "python")))
3102 #:phases
3103 (modify-phases %standard-phases
3104 (add-before 'check 'set-SHELL-variable
3105 (lambda _
3106 ;; generator_manager.hpp either uses /bin/sh or $SHELL
3107 ;; to run tests.
3108 (setenv "SHELL" (which "bash"))
3109 #t)))))
3110 (native-inputs
3111 `(("bc" ,bc)
3112 ("time" ,time)
3113 ("ruby" ,ruby)
3114 ("python" ,python-2)))
3115 (synopsis "Tool for fast counting of k-mers in DNA")
3116 (description
3117 "Jellyfish is a tool for fast, memory-efficient counting of k-mers in
3118 DNA. A k-mer is a substring of length k, and counting the occurrences of all
3119 such substrings is a central step in many analyses of DNA sequence. Jellyfish
3120 is a command-line program that reads FASTA and multi-FASTA files containing
3121 DNA sequences. It outputs its k-mer counts in a binary format, which can be
3122 translated into a human-readable text format using the @code{jellyfish dump}
3123 command, or queried for specific k-mers with @code{jellyfish query}.")
3124 (home-page "http://www.genome.umd.edu/jellyfish.html")
3125 ;; From their website: JELLYFISH runs on 64-bit Intel-compatible processors
3126 (supported-systems '("x86_64-linux"))
3127 ;; The combined work is published under the GPLv3 or later. Individual
3128 ;; files such as lib/jsoncpp.cpp are released under the Expat license.
3129 (license (list license:gpl3+ license:expat))))
3130
3131 (define-public khmer
3132 (package
3133 (name "khmer")
3134 (version "2.0")
3135 (source
3136 (origin
3137 (method url-fetch)
3138 (uri (pypi-uri "khmer" version))
3139 (sha256
3140 (base32
3141 "0wb05shqh77v00256qlm68vbbx3kl76fyzihszbz5nhanl4ni33a"))
3142 (patches (search-patches "khmer-use-libraries.patch"))))
3143 (build-system python-build-system)
3144 (arguments
3145 `(#:phases
3146 (modify-phases %standard-phases
3147 (add-after 'unpack 'set-paths
3148 (lambda* (#:key inputs outputs #:allow-other-keys)
3149 ;; Delete bundled libraries.
3150 (delete-file-recursively "third-party/zlib")
3151 (delete-file-recursively "third-party/bzip2")
3152 ;; Replace bundled seqan.
3153 (let* ((seqan-all "third-party/seqan")
3154 (seqan-include (string-append
3155 seqan-all "/core/include")))
3156 (delete-file-recursively seqan-all)
3157 (copy-recursively (string-append (assoc-ref inputs "seqan")
3158 "/include/seqan")
3159 (string-append seqan-include "/seqan")))
3160 ;; We do not replace the bundled MurmurHash as the canonical
3161 ;; repository for this code 'SMHasher' is unsuitable for
3162 ;; providing a library. See
3163 ;; https://lists.gnu.org/archive/html/guix-devel/2016-06/msg00977.html
3164 #t))
3165 (add-after 'unpack 'set-cc
3166 (lambda _
3167 (setenv "CC" "gcc")
3168 #t))
3169 ;; It is simpler to test after installation.
3170 (delete 'check)
3171 (add-after 'install 'post-install-check
3172 (lambda* (#:key inputs outputs #:allow-other-keys)
3173 (let ((out (assoc-ref outputs "out")))
3174 (setenv "PATH"
3175 (string-append
3176 (getenv "PATH")
3177 ":"
3178 (assoc-ref outputs "out")
3179 "/bin"))
3180 (setenv "PYTHONPATH"
3181 (string-append
3182 (getenv "PYTHONPATH")
3183 ":"
3184 out
3185 "/lib/python"
3186 (string-take (string-take-right
3187 (assoc-ref inputs "python") 5) 3)
3188 "/site-packages"))
3189 (with-directory-excursion "build"
3190 (zero? (system* "nosetests" "khmer" "--attr"
3191 "!known_failing")))))))))
3192 (native-inputs
3193 `(("seqan" ,seqan)
3194 ("python-nose" ,python-nose)))
3195 (inputs
3196 `(("zlib" ,zlib)
3197 ("bzip2" ,bzip2)
3198 ("python-screed" ,python-screed)
3199 ("python-bz2file" ,python-bz2file)
3200 ;; Tests fail when gcc-5 is used for compilation. Use gcc-4.9 at least
3201 ;; until the next version of khmer (likely 2.1) is released.
3202 ("gcc" ,gcc-4.9)))
3203 (home-page "https://khmer.readthedocs.org/")
3204 (synopsis "K-mer counting, filtering and graph traversal library")
3205 (description "The khmer software is a set of command-line tools for
3206 working with DNA shotgun sequencing data from genomes, transcriptomes,
3207 metagenomes and single cells. Khmer can make de novo assemblies faster, and
3208 sometimes better. Khmer can also identify and fix problems with shotgun
3209 data.")
3210 ;; When building on i686, armhf and mips64el, we get the following error:
3211 ;; error: ['khmer', 'khmer.tests', 'oxli'] require 64-bit operating system
3212 (supported-systems '("x86_64-linux"))
3213 (license license:bsd-3)))
3214
3215 (define-public kaiju
3216 (package
3217 (name "kaiju")
3218 (version "1.5.0")
3219 (source (origin
3220 (method url-fetch)
3221 (uri (string-append
3222 "https://github.com/bioinformatics-centre/kaiju/archive/v"
3223 version ".tar.gz"))
3224 (file-name (string-append name "-" version ".tar.gz"))
3225 (sha256
3226 (base32
3227 "0afbfalfw9y39bkwnqjrh9bghs118ws1pzj5h8l0nblgn3mbjdks"))))
3228 (build-system gnu-build-system)
3229 (arguments
3230 `(#:tests? #f ; There are no tests.
3231 #:phases
3232 (modify-phases %standard-phases
3233 (delete 'configure)
3234 (add-before 'build 'move-to-src-dir
3235 (lambda _ (chdir "src") #t))
3236 (replace 'install
3237 (lambda* (#:key inputs outputs #:allow-other-keys)
3238 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
3239 (mkdir-p bin)
3240 (chdir "..")
3241 (copy-recursively "bin" bin)
3242 (copy-recursively "util" bin))
3243 #t)))))
3244 (inputs
3245 `(("perl" ,perl)))
3246 (home-page "http://kaiju.binf.ku.dk/")
3247 (synopsis "Fast and sensitive taxonomic classification for metagenomics")
3248 (description "Kaiju is a program for sensitive taxonomic classification
3249 of high-throughput sequencing reads from metagenomic whole genome sequencing
3250 experiments.")
3251 (license license:gpl3+)))
3252
3253 (define-public macs
3254 (package
3255 (name "macs")
3256 (version "2.1.0.20151222")
3257 (source (origin
3258 (method url-fetch)
3259 (uri (pypi-uri "MACS2" version))
3260 (sha256
3261 (base32
3262 "1r2hcz6irhcq7lwbafjks98jbn34hv05avgbdjnp6w6mlfjkf8x5"))))
3263 (build-system python-build-system)
3264 (arguments
3265 `(#:python ,python-2 ; only compatible with Python 2.7
3266 #:tests? #f)) ; no test target
3267 (inputs
3268 `(("python-numpy" ,python2-numpy)))
3269 (home-page "https://github.com/taoliu/MACS/")
3270 (synopsis "Model based analysis for ChIP-Seq data")
3271 (description
3272 "MACS is an implementation of a ChIP-Seq analysis algorithm for
3273 identifying transcript factor binding sites named Model-based Analysis of
3274 ChIP-Seq (MACS). MACS captures the influence of genome complexity to evaluate
3275 the significance of enriched ChIP regions and it improves the spatial
3276 resolution of binding sites through combining the information of both
3277 sequencing tag position and orientation.")
3278 (license license:bsd-3)))
3279
3280 (define-public mafft
3281 (package
3282 (name "mafft")
3283 (version "7.310")
3284 (source (origin
3285 (method url-fetch)
3286 (uri (string-append
3287 "http://mafft.cbrc.jp/alignment/software/mafft-" version
3288 "-without-extensions-src.tgz"))
3289 (file-name (string-append name "-" version ".tgz"))
3290 (sha256
3291 (base32
3292 "0gbsaz6z2qa307kd7wfb06c3y4ikmv1hsdvlns11f6zq4w1z9pwc"))))
3293 (build-system gnu-build-system)
3294 (arguments
3295 `(#:tests? #f ; no automated tests, though there are tests in the read me
3296 #:make-flags (let ((out (assoc-ref %outputs "out")))
3297 (list (string-append "PREFIX=" out)
3298 (string-append "BINDIR="
3299 (string-append out "/bin"))))
3300 #:phases
3301 (modify-phases %standard-phases
3302 (add-after 'unpack 'enter-dir
3303 (lambda _ (chdir "core") #t))
3304 (add-after 'enter-dir 'patch-makefile
3305 (lambda _
3306 ;; on advice from the MAFFT authors, there is no need to
3307 ;; distribute mafft-profile, mafft-distance, or
3308 ;; mafft-homologs.rb as they are too "specialised".
3309 (substitute* "Makefile"
3310 ;; remove mafft-homologs.rb from SCRIPTS
3311 (("^SCRIPTS = mafft mafft-homologs.rb")
3312 "SCRIPTS = mafft")
3313 ;; remove mafft-homologs from MANPAGES
3314 (("^MANPAGES = mafft.1 mafft-homologs.1")
3315 "MANPAGES = mafft.1")
3316 ;; remove mafft-distance from PROGS
3317 (("^PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance")
3318 "PROGS = dvtditr dndfast7 dndblast sextet5")
3319 ;; remove mafft-profile from PROGS
3320 (("splittbfast disttbfast tbfast mafft-profile 2cl mccaskillwrap")
3321 "splittbfast disttbfast tbfast f2cl mccaskillwrap")
3322 (("^rm -f mafft-profile mafft-profile.exe") "#")
3323 (("^rm -f mafft-distance mafft-distance.exe") ")#")
3324 ;; do not install MAN pages in libexec folder
3325 (("^\t\\$\\(INSTALL\\) -m 644 \\$\\(MANPAGES\\) \
3326 \\$\\(DESTDIR\\)\\$\\(LIBDIR\\)") "#"))
3327 #t))
3328 (add-after 'enter-dir 'patch-paths
3329 (lambda* (#:key inputs #:allow-other-keys)
3330 (substitute* '("pairash.c"
3331 "mafft.tmpl")
3332 (("perl") (which "perl"))
3333 (("([\"`| ])awk" _ prefix)
3334 (string-append prefix (which "awk")))
3335 (("grep") (which "grep")))
3336 #t))
3337 (delete 'configure)
3338 (add-after 'install 'wrap-programs
3339 (lambda* (#:key outputs #:allow-other-keys)
3340 (let* ((out (assoc-ref outputs "out"))
3341 (bin (string-append out "/bin"))
3342 (path (string-append
3343 (assoc-ref %build-inputs "coreutils") "/bin:")))
3344 (for-each (lambda (file)
3345 (wrap-program file
3346 `("PATH" ":" prefix (,path))))
3347 (find-files bin)))
3348 #t)))))
3349 (inputs
3350 `(("perl" ,perl)
3351 ("ruby" ,ruby)
3352 ("gawk" ,gawk)
3353 ("grep" ,grep)
3354 ("coreutils" ,coreutils)))
3355 (home-page "http://mafft.cbrc.jp/alignment/software/")
3356 (synopsis "Multiple sequence alignment program")
3357 (description
3358 "MAFFT offers a range of multiple alignment methods for nucleotide and
3359 protein sequences. For instance, it offers L-INS-i (accurate; for alignment
3360 of <~200 sequences) and FFT-NS-2 (fast; for alignment of <~30,000
3361 sequences).")
3362 (license (license:non-copyleft
3363 "http://mafft.cbrc.jp/alignment/software/license.txt"
3364 "BSD-3 with different formatting"))))
3365
3366 (define-public mash
3367 (package
3368 (name "mash")
3369 (version "1.1.1")
3370 (source (origin
3371 (method url-fetch)
3372 (uri (string-append
3373 "https://github.com/marbl/mash/archive/v"
3374 version ".tar.gz"))
3375 (file-name (string-append name "-" version ".tar.gz"))
3376 (sha256
3377 (base32
3378 "08znbvqq5xknfhmpp3wcj574zvi4p7i8zifi67c9qw9a6ikp42fj"))
3379 (modules '((guix build utils)))
3380 (snippet
3381 ;; Delete bundled kseq.
3382 ;; TODO: Also delete bundled murmurhash and open bloom filter.
3383 '(delete-file "src/mash/kseq.h"))))
3384 (build-system gnu-build-system)
3385 (arguments
3386 `(#:tests? #f ; No tests.
3387 #:configure-flags
3388 (list
3389 (string-append "--with-capnp=" (assoc-ref %build-inputs "capnproto"))
3390 (string-append "--with-gsl=" (assoc-ref %build-inputs "gsl")))
3391 #:make-flags (list "CC=gcc")
3392 #:phases
3393 (modify-phases %standard-phases
3394 (add-after 'unpack 'fix-includes
3395 (lambda _
3396 (substitute* '("src/mash/Sketch.cpp" "src/mash/CommandFind.cpp")
3397 (("^#include \"kseq\\.h\"")
3398 "#include \"htslib/kseq.h\""))
3399 #t))
3400 (add-after 'fix-includes 'autoconf
3401 (lambda _ (zero? (system* "autoconf")))))))
3402 (native-inputs
3403 `(("autoconf" ,autoconf)
3404 ;; Capnproto and htslib are statically embedded in the final
3405 ;; application. Therefore we also list their licenses, below.
3406 ("capnproto" ,capnproto)
3407 ("htslib" ,htslib)))
3408 (inputs
3409 `(("gsl" ,gsl)
3410 ("zlib" ,zlib)))
3411 (supported-systems '("x86_64-linux"))
3412 (home-page "https://mash.readthedocs.io")
3413 (synopsis "Fast genome and metagenome distance estimation using MinHash")
3414 (description "Mash is a fast sequence distance estimator that uses the
3415 MinHash algorithm and is designed to work with genomes and metagenomes in the
3416 form of assemblies or reads.")
3417 (license (list license:bsd-3 ; Mash
3418 license:expat ; HTSlib and capnproto
3419 license:public-domain ; MurmurHash 3
3420 license:cpl1.0)))) ; Open Bloom Filter
3421
3422 (define-public metabat
3423 (package
3424 (name "metabat")
3425 (version "2.12.1")
3426 (source
3427 (origin
3428 (method url-fetch)
3429 (uri (string-append "https://bitbucket.org/berkeleylab/metabat/get/v"
3430 version ".tar.gz"))
3431 (file-name (string-append name "-" version ".tar.gz"))
3432 (sha256
3433 (base32
3434 "1hmvdalz3zj5sqqklg0l4npjdv37cv2hsdi1al9iby2ndxjs1b73"))
3435 (patches (search-patches "metabat-fix-compilation.patch"))))
3436 (build-system gnu-build-system)
3437 (arguments
3438 `(#:phases
3439 (modify-phases %standard-phases
3440 (add-after 'unpack 'fix-includes
3441 (lambda _
3442 (substitute* "src/BamUtils.h"
3443 (("^#include \"bam/bam\\.h\"")
3444 "#include \"samtools/bam.h\"")
3445 (("^#include \"bam/sam\\.h\"")
3446 "#include \"samtools/sam.h\""))
3447 (substitute* "src/KseqReader.h"
3448 (("^#include \"bam/kseq\\.h\"")
3449 "#include \"htslib/kseq.h\""))
3450 #t))
3451 (add-after 'unpack 'fix-scons
3452 (lambda* (#:key inputs #:allow-other-keys)
3453 (substitute* "SConstruct"
3454 (("^htslib_dir += 'samtools'")
3455 (string-append "htslib_dir = '"
3456 (assoc-ref inputs "htslib")
3457 "'"))
3458 (("^samtools_dir = 'samtools'")
3459 (string-append "samtools_dir = '"
3460 (assoc-ref inputs "samtools")
3461 "'"))
3462 (("^findStaticOrShared\\('bam', hts_lib")
3463 (string-append "findStaticOrShared('bam', '"
3464 (assoc-ref inputs "samtools")
3465 "/lib'"))
3466 ;; Do not distribute README.
3467 (("^env\\.Install\\(idir_prefix, 'README\\.md'\\)") ""))
3468 #t))
3469 (delete 'configure)
3470 (replace 'build
3471 (lambda* (#:key inputs outputs #:allow-other-keys)
3472 (mkdir (assoc-ref outputs "out"))
3473 (zero? (system* "scons"
3474 (string-append
3475 "PREFIX="
3476 (assoc-ref outputs "out"))
3477 (string-append
3478 "BOOST_ROOT="
3479 (assoc-ref inputs "boost"))
3480 "install"))))
3481 ;; Check and install are carried out during build phase.
3482 (delete 'check)
3483 (delete 'install))))
3484 (inputs
3485 `(("zlib" ,zlib)
3486 ("perl" ,perl)
3487 ("samtools" ,samtools)
3488 ("htslib" ,htslib)
3489 ("boost" ,boost)))
3490 (native-inputs
3491 `(("scons" ,scons)))
3492 (home-page "https://bitbucket.org/berkeleylab/metabat")
3493 (synopsis
3494 "Reconstruction of single genomes from complex microbial communities")
3495 (description
3496 "Grouping large genomic fragments assembled from shotgun metagenomic
3497 sequences to deconvolute complex microbial communities, or metagenome binning,
3498 enables the study of individual organisms and their interactions. MetaBAT is
3499 an automated metagenome binning software, which integrates empirical
3500 probabilistic distances of genome abundance and tetranucleotide frequency.")
3501 ;; The source code contains inline assembly.
3502 (supported-systems '("x86_64-linux" "i686-linux"))
3503 (license (license:non-copyleft "file://license.txt"
3504 "See license.txt in the distribution."))))
3505
3506 (define-public minced
3507 (package
3508 (name "minced")
3509 (version "0.2.0")
3510 (source (origin
3511 (method url-fetch)
3512 (uri (string-append
3513 "https://github.com/ctSkennerton/minced/archive/"
3514 version ".tar.gz"))
3515 (file-name (string-append name "-" version ".tar.gz"))
3516 (sha256
3517 (base32
3518 "0wxmlsapxfpxfd3ps9636h7i2xy6la8i42mwh0j2lsky63h63jp1"))))
3519 (build-system gnu-build-system)
3520 (arguments
3521 `(#:test-target "test"
3522 #:phases
3523 (modify-phases %standard-phases
3524 (delete 'configure)
3525 (add-before 'check 'fix-test
3526 (lambda _
3527 ;; Fix test for latest version.
3528 (substitute* "t/Aquifex_aeolicus_VF5.expected"
3529 (("minced:0.1.6") "minced:0.2.0"))
3530 #t))
3531 (replace 'install ; No install target.
3532 (lambda* (#:key inputs outputs #:allow-other-keys)
3533 (let* ((out (assoc-ref outputs "out"))
3534 (bin (string-append out "/bin"))
3535 (wrapper (string-append bin "/minced")))
3536 ;; Minced comes with a wrapper script that tries to figure out where
3537 ;; it is located before running the JAR. Since these paths are known
3538 ;; to us, we build our own wrapper to avoid coreutils dependency.
3539 (install-file "minced.jar" bin)
3540 (with-output-to-file wrapper
3541 (lambda _
3542 (display
3543 (string-append
3544 "#!" (assoc-ref inputs "bash") "/bin/sh\n\n"
3545 (assoc-ref inputs "jre") "/bin/java -jar "
3546 bin "/minced.jar \"$@\"\n"))))
3547 (chmod wrapper #o555)))))))
3548 (native-inputs
3549 `(("jdk" ,icedtea "jdk")))
3550 (inputs
3551 `(("bash" ,bash)
3552 ("jre" ,icedtea "out")))
3553 (home-page "https://github.com/ctSkennerton/minced")
3554 (synopsis "Mining CRISPRs in Environmental Datasets")
3555 (description
3556 "MinCED is a program to find Clustered Regularly Interspaced Short
3557 Palindromic Repeats (CRISPRs) in DNA sequences. It can be used for
3558 unassembled metagenomic reads, but is mainly designed for full genomes and
3559 assembled metagenomic sequence.")
3560 (license license:gpl3+)))
3561
3562 (define-public miso
3563 (package
3564 (name "miso")
3565 (version "0.5.4")
3566 (source (origin
3567 (method url-fetch)
3568 (uri (pypi-uri "misopy" version))
3569 (sha256
3570 (base32
3571 "1z3x0vd8ma7pdrnywj7i3kgwl89sdkwrrn62zl7r5calqaq2hyip"))
3572 (modules '((guix build utils)))
3573 (snippet
3574 '(substitute* "setup.py"
3575 ;; Use setuptools, or else the executables are not
3576 ;; installed.
3577 (("distutils.core") "setuptools")
3578 ;; use "gcc" instead of "cc" for compilation
3579 (("^defines")
3580 "cc.set_executables(
3581 compiler='gcc',
3582 compiler_so='gcc',
3583 linker_exe='gcc',
3584 linker_so='gcc -shared'); defines")))))
3585 (build-system python-build-system)
3586 (arguments
3587 `(#:python ,python-2 ; only Python 2 is supported
3588 #:tests? #f)) ; no "test" target
3589 (inputs
3590 `(("samtools" ,samtools)
3591 ("python-numpy" ,python2-numpy)
3592 ("python-pysam" ,python2-pysam)
3593 ("python-scipy" ,python2-scipy)
3594 ("python-matplotlib" ,python2-matplotlib)))
3595 (native-inputs
3596 `(("python-mock" ,python2-mock) ;for tests
3597 ("python-pytz" ,python2-pytz))) ;for tests
3598 (home-page "http://genes.mit.edu/burgelab/miso/index.html")
3599 (synopsis "Mixture of Isoforms model for RNA-Seq isoform quantitation")
3600 (description
3601 "MISO (Mixture-of-Isoforms) is a probabilistic framework that quantitates
3602 the expression level of alternatively spliced genes from RNA-Seq data, and
3603 identifies differentially regulated isoforms or exons across samples. By
3604 modeling the generative process by which reads are produced from isoforms in
3605 RNA-Seq, the MISO model uses Bayesian inference to compute the probability
3606 that a read originated from a particular isoform.")
3607 (license license:gpl2)))
3608
3609 (define-public muscle
3610 (package
3611 (name "muscle")
3612 (version "3.8.1551")
3613 (source (origin
3614 (method url-fetch/tarbomb)
3615 (uri (string-append
3616 "http://www.drive5.com/muscle/muscle_src_"
3617 version ".tar.gz"))
3618 (sha256
3619 (base32
3620 "0bj8kj7sdizy3987zx6w7axihk40fk8rn76mpbqqjcnd64i5a367"))))
3621 (build-system gnu-build-system)
3622 (arguments
3623 `(#:make-flags (list "LDLIBS = -lm")
3624 #:phases
3625 (modify-phases %standard-phases
3626 (delete 'configure)
3627 (replace 'check
3628 ;; There are no tests, so just test if it runs.
3629 (lambda _ (zero? (system* "./muscle" "-version"))))
3630 (replace 'install
3631 (lambda* (#:key outputs #:allow-other-keys)
3632 (let* ((out (assoc-ref outputs "out"))
3633 (bin (string-append out "/bin")))
3634 (install-file "muscle" bin)))))))
3635 (home-page "http://www.drive5.com/muscle")
3636 (synopsis "Multiple sequence alignment program")
3637 (description
3638 "MUSCLE aims to be a fast and accurate multiple sequence alignment
3639 program for nucleotide and protein sequences.")
3640 ;; License information found in 'muscle -h' and usage.cpp.
3641 (license license:public-domain)))
3642
3643 (define-public newick-utils
3644 ;; There are no recent releases so we package from git.
3645 (let ((commit "da121155a977197cab9fbb15953ca1b40b11eb87"))
3646 (package
3647 (name "newick-utils")
3648 (version (string-append "1.6-1." (string-take commit 8)))
3649 (source (origin
3650 (method git-fetch)
3651 (uri (git-reference
3652 (url "https://github.com/tjunier/newick_utils.git")
3653 (commit commit)))
3654 (file-name (string-append name "-" version "-checkout"))
3655 (sha256
3656 (base32
3657 "1hkw21rq1mwf7xp0rmbb2gqc0i6p11108m69i7mr7xcjl268pxnb"))))
3658 (build-system gnu-build-system)
3659 (arguments
3660 `(#:phases
3661 (modify-phases %standard-phases
3662 (add-after 'unpack 'autoconf
3663 (lambda _ (zero? (system* "autoreconf" "-vif")))))))
3664 (inputs
3665 ;; XXX: TODO: Enable Lua and Guile bindings.
3666 ;; https://github.com/tjunier/newick_utils/issues/13
3667 `(("libxml2" ,libxml2)
3668 ("flex" ,flex)
3669 ("bison" ,bison)))
3670 (native-inputs
3671 `(("autoconf" ,autoconf)
3672 ("automake" ,automake)
3673 ("libtool" ,libtool)))
3674 (synopsis "Programs for working with newick format phylogenetic trees")
3675 (description
3676 "Newick-utils is a suite of utilities for processing phylogenetic trees
3677 in Newick format. Functions include re-rooting, extracting subtrees,
3678 trimming, pruning, condensing, drawing (ASCII graphics or SVG).")
3679 (home-page "https://github.com/tjunier/newick_utils")
3680 (license license:bsd-3))))
3681
3682 (define-public orfm
3683 (package
3684 (name "orfm")
3685 (version "0.7.1")
3686 (source (origin
3687 (method url-fetch)
3688 (uri (string-append
3689 "https://github.com/wwood/OrfM/releases/download/v"
3690 version "/orfm-" version ".tar.gz"))
3691 (sha256
3692 (base32
3693 "16iigyr2gd8x0imzkk1dr3k5xsds9bpmwg31ayvjg0f4pir9rwqr"))))
3694 (build-system gnu-build-system)
3695 (inputs `(("zlib" ,zlib)))
3696 (native-inputs
3697 `(("ruby-bio-commandeer" ,ruby-bio-commandeer)
3698 ("ruby-rspec" ,ruby-rspec)
3699 ("ruby" ,ruby)))
3700 (synopsis "Simple and not slow open reading frame (ORF) caller")
3701 (description
3702 "An ORF caller finds stretches of DNA that, when translated, are not
3703 interrupted by stop codons. OrfM finds and prints these ORFs.")
3704 (home-page "https://github.com/wwood/OrfM")
3705 (license license:lgpl3+)))
3706
3707 (define-public pplacer
3708 (let ((commit "g807f6f3"))
3709 (package
3710 (name "pplacer")
3711 ;; The commit should be updated with each version change.
3712 (version "1.1.alpha19")
3713 (source
3714 (origin
3715 (method url-fetch)
3716 (uri (string-append "https://github.com/matsen/pplacer/archive/v"
3717 version ".tar.gz"))
3718 (file-name (string-append name "-" version ".tar.gz"))
3719 (sha256
3720 (base32 "0z1lnd2s8sh6kpzg106wzbh2szw7h0hvq8syd5a6wv4rmyyz6x0f"))))
3721 (build-system ocaml-build-system)
3722 (arguments
3723 `(#:ocaml ,ocaml-4.01
3724 #:findlib ,ocaml4.01-findlib
3725 #:modules ((guix build ocaml-build-system)
3726 (guix build utils)
3727 (ice-9 ftw))
3728 #:phases
3729 (modify-phases %standard-phases
3730 (delete 'configure)
3731 (add-after 'unpack 'replace-bundled-cddlib
3732 (lambda* (#:key inputs #:allow-other-keys)
3733 (let* ((cddlib-src (assoc-ref inputs "cddlib-src"))
3734 (local-dir "cddlib_guix"))
3735 (mkdir local-dir)
3736 (with-directory-excursion local-dir
3737 (system* "tar" "xvf" cddlib-src))
3738 (let ((cddlib-src-folder
3739 (string-append local-dir "/"
3740 (list-ref (scandir local-dir) 2)
3741 "/lib-src")))
3742 (for-each
3743 (lambda (file)
3744 (copy-file file
3745 (string-append "cdd_src/" (basename file))))
3746 (find-files cddlib-src-folder ".*[ch]$")))
3747 #t)))
3748 (add-after 'unpack 'fix-makefile
3749 (lambda _
3750 ;; Remove system calls to 'git'.
3751 (substitute* "Makefile"
3752 (("^DESCRIPT:=pplacer-.*")
3753 (string-append
3754 "DESCRIPT:=pplacer-$(shell uname)-v" ,version "\n")))
3755 (substitute* "myocamlbuild.ml"
3756 (("git describe --tags --long .*\\\" with")
3757 (string-append
3758 "echo -n v" ,version "-" ,commit "\" with")))
3759 #t))
3760 (replace 'install
3761 (lambda* (#:key outputs #:allow-other-keys)
3762 (let* ((out (assoc-ref outputs "out"))
3763 (bin (string-append out "/bin")))
3764 (copy-recursively "bin" bin))
3765 #t)))))
3766 (native-inputs
3767 `(("zlib" ,zlib)
3768 ("gsl" ,gsl)
3769 ("ocaml-ounit" ,ocaml4.01-ounit)
3770 ("ocaml-batteries" ,ocaml4.01-batteries)
3771 ("ocaml-camlzip" ,ocaml4.01-camlzip)
3772 ("ocaml-csv" ,ocaml4.01-csv)
3773 ("ocaml-sqlite3" ,ocaml4.01-sqlite3)
3774 ("ocaml-xmlm" ,ocaml4.01-xmlm)
3775 ("ocaml-mcl" ,ocaml4.01-mcl)
3776 ("ocaml-gsl" ,ocaml4.01-gsl)
3777 ("cddlib-src" ,(package-source cddlib))))
3778 (propagated-inputs
3779 `(("pplacer-scripts" ,pplacer-scripts)))
3780 (synopsis "Phylogenetic placement of biological sequences")
3781 (description
3782 "Pplacer places query sequences on a fixed reference phylogenetic tree
3783 to maximize phylogenetic likelihood or posterior probability according to a
3784 reference alignment. Pplacer is designed to be fast, to give useful
3785 information about uncertainty, and to offer advanced visualization and
3786 downstream analysis.")
3787 (home-page "http://matsen.fhcrc.org/pplacer")
3788 (license license:gpl3))))
3789
3790 ;; This package is installed alongside 'pplacer'. It is a separate package so
3791 ;; that it can use the python-build-system for the scripts that are
3792 ;; distributed alongside the main OCaml binaries.
3793 (define pplacer-scripts
3794 (package
3795 (inherit pplacer)
3796 (name "pplacer-scripts")
3797 (build-system python-build-system)
3798 (arguments
3799 `(#:python ,python-2
3800 #:phases
3801 (modify-phases %standard-phases
3802 (add-after 'unpack 'enter-scripts-dir
3803 (lambda _ (chdir "scripts")))
3804 (replace 'check
3805 (lambda _
3806 (zero? (system* "python" "-m" "unittest" "discover" "-v"))))
3807 (add-after 'install 'wrap-executables
3808 (lambda* (#:key inputs outputs #:allow-other-keys)
3809 (let* ((out (assoc-ref outputs "out"))
3810 (bin (string-append out "/bin")))
3811 (let ((path (string-append
3812 (assoc-ref inputs "hmmer") "/bin:"
3813 (assoc-ref inputs "infernal") "/bin")))
3814 (display path)
3815 (wrap-program (string-append bin "/refpkg_align.py")
3816 `("PATH" ":" prefix (,path))))
3817 (let ((path (string-append
3818 (assoc-ref inputs "hmmer") "/bin")))
3819 (wrap-program (string-append bin "/hrefpkg_query.py")
3820 `("PATH" ":" prefix (,path)))))
3821 #t)))))
3822 (inputs
3823 `(("infernal" ,infernal)
3824 ("hmmer" ,hmmer)))
3825 (propagated-inputs
3826 `(("python-biopython" ,python2-biopython)
3827 ("taxtastic" ,taxtastic)))
3828 (synopsis "Pplacer Python scripts")))
3829
3830 (define-public python2-pbcore
3831 (package
3832 (name "python2-pbcore")
3833 (version "1.2.10")
3834 (source (origin
3835 (method url-fetch)
3836 (uri (pypi-uri "pbcore" version))
3837 (sha256
3838 (base32
3839 "1kjmv891d6qbpp4shhhvkl02ff4q5xlpnls2513sm2cjcrs52f1i"))))
3840 (build-system python-build-system)
3841 (arguments `(#:python ,python-2)) ; pbcore requires Python 2.7
3842 (propagated-inputs
3843 `(("python-cython" ,python2-cython)
3844 ("python-numpy" ,python2-numpy)
3845 ("python-pysam" ,python2-pysam)
3846 ("python-h5py" ,python2-h5py)))
3847 (native-inputs
3848 `(("python-nose" ,python2-nose)
3849 ("python-sphinx" ,python2-sphinx)
3850 ("python-pyxb" ,python2-pyxb)))
3851 (home-page "http://pacificbiosciences.github.io/pbcore/")
3852 (synopsis "Library for reading and writing PacBio data files")
3853 (description
3854 "The pbcore package provides Python APIs for interacting with PacBio data
3855 files and writing bioinformatics applications.")
3856 (license license:bsd-3)))
3857
3858 (define-public python2-warpedlmm
3859 (package
3860 (name "python2-warpedlmm")
3861 (version "0.21")
3862 (source
3863 (origin
3864 (method url-fetch)
3865 (uri (string-append
3866 "https://pypi.python.org/packages/source/W/WarpedLMM/WarpedLMM-"
3867 version ".zip"))
3868 (sha256
3869 (base32
3870 "1agfz6zqa8nc6cw47yh0s3y14gkpa9wqazwcj7mwwj3ffnw39p3j"))))
3871 (build-system python-build-system)
3872 (arguments
3873 `(#:python ,python-2)) ; requires Python 2.7
3874 (propagated-inputs
3875 `(("python-scipy" ,python2-scipy)
3876 ("python-numpy" ,python2-numpy)
3877 ("python-matplotlib" ,python2-matplotlib)
3878 ("python-fastlmm" ,python2-fastlmm)
3879 ("python-pandas" ,python2-pandas)
3880 ("python-pysnptools" ,python2-pysnptools)))
3881 (native-inputs
3882 `(("python-mock" ,python2-mock)
3883 ("python-nose" ,python2-nose)
3884 ("unzip" ,unzip)))
3885 (home-page "https://github.com/PMBio/warpedLMM")
3886 (synopsis "Implementation of warped linear mixed models")
3887 (description
3888 "WarpedLMM is a Python implementation of the warped linear mixed model,
3889 which automatically learns an optimal warping function (or transformation) for
3890 the phenotype as it models the data.")
3891 (license license:asl2.0)))
3892
3893 (define-public pbtranscript-tofu
3894 (let ((commit "8f5467fe6a4472bcfb4226c8720993c8507adfe4"))
3895 (package
3896 (name "pbtranscript-tofu")
3897 (version (string-append "2.2.3." (string-take commit 7)))
3898 (source (origin
3899 (method git-fetch)
3900 (uri (git-reference
3901 (url "https://github.com/PacificBiosciences/cDNA_primer.git")
3902 (commit commit)))
3903 (file-name (string-append name "-" version "-checkout"))
3904 (sha256
3905 (base32
3906 "1lgnpi35ihay42qx0b6yl3kkgra723i413j33kvs0kvs61h82w0f"))
3907 (modules '((guix build utils)))
3908 (snippet
3909 '(begin
3910 ;; remove bundled Cython sources
3911 (delete-file "pbtranscript-tofu/pbtranscript/Cython-0.20.1.tar.gz")
3912 #t))))
3913 (build-system python-build-system)
3914 (arguments
3915 `(#:python ,python-2
3916 ;; FIXME: Tests fail with "No such file or directory:
3917 ;; pbtools/pbtranscript/modified_bx_intervals/intersection_unique.so"
3918 #:tests? #f
3919 #:phases
3920 (modify-phases %standard-phases
3921 (add-after 'unpack 'enter-directory
3922 (lambda _
3923 (chdir "pbtranscript-tofu/pbtranscript/")
3924 #t))
3925 ;; With setuptools version 18.0 and later this setup.py hack causes
3926 ;; a build error, so we disable it.
3927 (add-after 'enter-directory 'patch-setuppy
3928 (lambda _
3929 (substitute* "setup.py"
3930 (("if 'setuptools.extension' in sys.modules:")
3931 "if False:"))
3932 #t)))))
3933 (inputs
3934 `(("python-numpy" ,python2-numpy)
3935 ("python-bx-python" ,python2-bx-python)
3936 ("python-networkx" ,python2-networkx)
3937 ("python-scipy" ,python2-scipy)
3938 ("python-pbcore" ,python2-pbcore)
3939 ("python-h5py" ,python2-h5py)))
3940 (native-inputs
3941 `(("python-cython" ,python2-cython)
3942 ("python-nose" ,python2-nose)))
3943 (home-page "https://github.com/PacificBiosciences/cDNA_primer")
3944 (synopsis "Analyze transcriptome data generated with the Iso-Seq protocol")
3945 (description
3946 "pbtranscript-tofu contains scripts to analyze transcriptome data
3947 generated using the PacBio Iso-Seq protocol.")
3948 (license license:bsd-3))))
3949
3950 (define-public prank
3951 (package
3952 (name "prank")
3953 (version "150803")
3954 (source (origin
3955 (method url-fetch)
3956 (uri (string-append
3957 "http://wasabiapp.org/download/prank/prank.source."
3958 version ".tgz"))
3959 (sha256
3960 (base32
3961 "0am4z94fs3w2n5xpfls9zda61vq7qqz4q2i7b9hlsxz5q4j3kfm4"))))
3962 (build-system gnu-build-system)
3963 (arguments
3964 `(#:phases
3965 (modify-phases %standard-phases
3966 (add-after 'unpack 'enter-src-dir
3967 (lambda _
3968 (chdir "src")
3969 #t))
3970 (add-after 'unpack 'remove-m64-flag
3971 ;; Prank will build with the correct 'bit-ness' without this flag
3972 ;; and this allows building on 32-bit machines.
3973 (lambda _ (substitute* "src/Makefile"
3974 (("-m64") ""))
3975 #t))
3976 (delete 'configure)
3977 (replace 'install
3978 (lambda* (#:key outputs #:allow-other-keys)
3979 (let* ((out (assoc-ref outputs "out"))
3980 (bin (string-append out "/bin"))
3981 (man (string-append out "/share/man/man1"))
3982 (path (string-append
3983 (assoc-ref %build-inputs "mafft") "/bin:"
3984 (assoc-ref %build-inputs "exonerate") "/bin:"
3985 (assoc-ref %build-inputs "bppsuite") "/bin")))
3986 (install-file "prank" bin)
3987 (wrap-program (string-append bin "/prank")
3988 `("PATH" ":" prefix (,path)))
3989 (install-file "prank.1" man))
3990 #t)))))
3991 (inputs
3992 `(("mafft" ,mafft)
3993 ("exonerate" ,exonerate)
3994 ("bppsuite" ,bppsuite)))
3995 (home-page "http://wasabiapp.org/software/prank/")
3996 (synopsis "Probabilistic multiple sequence alignment program")
3997 (description
3998 "PRANK is a probabilistic multiple sequence alignment program for DNA,
3999 codon and amino-acid sequences. It is based on a novel algorithm that treats
4000 insertions correctly and avoids over-estimation of the number of deletion
4001 events. In addition, PRANK borrows ideas from maximum likelihood methods used
4002 in phylogenetics and correctly takes into account the evolutionary distances
4003 between sequences. Lastly, PRANK allows for defining a potential structure
4004 for sequences to be aligned and then, simultaneously with the alignment,
4005 predicts the locations of structural units in the sequences.")
4006 (license license:gpl2+)))
4007
4008 (define-public proteinortho
4009 (package
4010 (name "proteinortho")
4011 (version "5.16b")
4012 (source
4013 (origin
4014 (method url-fetch)
4015 (uri
4016 (string-append
4017 "http://www.bioinf.uni-leipzig.de/Software/proteinortho/proteinortho_v"
4018 version "_src.tar.gz"))
4019 (sha256
4020 (base32
4021 "1wl0dawpssqwfjvr651r4wlww8hhjin8nba6xh71ks7sbypx886j"))))
4022 (build-system gnu-build-system)
4023 (arguments
4024 `(#:test-target "test"
4025 #:phases
4026 (modify-phases %standard-phases
4027 (replace 'configure
4028 ;; There is no configure script, so we modify the Makefile directly.
4029 (lambda* (#:key outputs #:allow-other-keys)
4030 (substitute* "Makefile"
4031 (("INSTALLDIR=.*")
4032 (string-append
4033 "INSTALLDIR=" (assoc-ref outputs "out") "/bin\n")))
4034 #t))
4035 (add-before 'install 'make-install-directory
4036 ;; The install directory is not created during 'make install'.
4037 (lambda* (#:key outputs #:allow-other-keys)
4038 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
4039 #t))
4040 (add-after 'install 'wrap-programs
4041 (lambda* (#:key inputs outputs #:allow-other-keys)
4042 (let* ((path (getenv "PATH"))
4043 (out (assoc-ref outputs "out"))
4044 (binary (string-append out "/bin/proteinortho5.pl")))
4045 (wrap-program binary `("PATH" ":" prefix (,path))))
4046 #t)))))
4047 (inputs
4048 `(("perl" ,perl)
4049 ("python" ,python-2)
4050 ("blast+" ,blast+)))
4051 (home-page "http://www.bioinf.uni-leipzig.de/Software/proteinortho")
4052 (synopsis "Detect orthologous genes across species")
4053 (description
4054 "Proteinortho is a tool to detect orthologous genes across different
4055 species. For doing so, it compares similarities of given gene sequences and
4056 clusters them to find significant groups. The algorithm was designed to handle
4057 large-scale data and can be applied to hundreds of species at once.")
4058 (license license:gpl2+)))
4059
4060 (define-public pyicoteo
4061 (package
4062 (name "pyicoteo")
4063 (version "2.0.7")
4064 (source
4065 (origin
4066 (method url-fetch)
4067 (uri (string-append "https://bitbucket.org/regulatorygenomicsupf/"
4068 "pyicoteo/get/v" version ".tar.bz2"))
4069 (file-name (string-append name "-" version ".tar.bz2"))
4070 (sha256
4071 (base32
4072 "0d6087f29xp8wxwlj111c3sylli98n0l8ry58c51ixzq0zfm50wa"))))
4073 (build-system python-build-system)
4074 (arguments
4075 `(#:python ,python-2 ; does not work with Python 3
4076 #:tests? #f)) ; there are no tests
4077 (inputs
4078 `(("python2-matplotlib" ,python2-matplotlib)))
4079 (home-page "https://bitbucket.org/regulatorygenomicsupf/pyicoteo")
4080 (synopsis "Analyze high-throughput genetic sequencing data")
4081 (description
4082 "Pyicoteo is a suite of tools for the analysis of high-throughput genetic
4083 sequencing data. It works with genomic coordinates. There are currently six
4084 different command-line tools:
4085
4086 @enumerate
4087 @item pyicoregion: for generating exploratory regions automatically;
4088 @item pyicoenrich: for differential enrichment between two conditions;
4089 @item pyicoclip: for calling CLIP-Seq peaks without a control;
4090 @item pyicos: for genomic coordinates manipulation;
4091 @item pyicoller: for peak calling on punctuated ChIP-Seq;
4092 @item pyicount: to count how many reads from N experiment files overlap in a
4093 region file;
4094 @item pyicotrocol: to combine operations from pyicoteo.
4095 @end enumerate\n")
4096 (license license:gpl3+)))
4097
4098 (define-public prodigal
4099 (package
4100 (name "prodigal")
4101 (version "2.6.3")
4102 (source (origin
4103 (method url-fetch)
4104 (uri (string-append
4105 "https://github.com/hyattpd/Prodigal/archive/v"
4106 version ".tar.gz"))
4107 (file-name (string-append name "-" version ".tar.gz"))
4108 (sha256
4109 (base32
4110 "17srxkqd3jc77xk15pfbgg1a9xahqg7337w95mrsia7mpza4l2c9"))))
4111 (build-system gnu-build-system)
4112 (arguments
4113 `(#:tests? #f ;no check target
4114 #:make-flags (list (string-append "INSTALLDIR="
4115 (assoc-ref %outputs "out")
4116 "/bin"))
4117 #:phases
4118 (modify-phases %standard-phases
4119 (delete 'configure))))
4120 (home-page "http://prodigal.ornl.gov")
4121 (synopsis "Protein-coding gene prediction for Archaea and Bacteria")
4122 (description
4123 "Prodigal runs smoothly on finished genomes, draft genomes, and
4124 metagenomes, providing gene predictions in GFF3, Genbank, or Sequin table
4125 format. It runs quickly, in an unsupervised fashion, handles gaps, handles
4126 partial genes, and identifies translation initiation sites.")
4127 (license license:gpl3+)))
4128
4129 (define-public roary
4130 (package
4131 (name "roary")
4132 (version "3.11.0")
4133 (source
4134 (origin
4135 (method url-fetch)
4136 (uri (string-append
4137 "mirror://cpan/authors/id/A/AJ/AJPAGE/Bio-Roary-"
4138 version ".tar.gz"))
4139 (sha256
4140 (base32
4141 "10lw78x1xzvn7xzvnmh4bm3cak3ah5cssapl0yidvhaj1f44h29i"))))
4142 (build-system perl-build-system)
4143 (arguments
4144 `(#:phases
4145 (modify-phases %standard-phases
4146 (delete 'configure)
4147 (delete 'build)
4148 (replace 'check
4149 (lambda _
4150 ;; The tests are not run by default, so we run each test file
4151 ;; directly.
4152 (setenv "PATH" (string-append (getcwd) "/bin" ":"
4153 (getenv "PATH")))
4154 (setenv "PERL5LIB" (string-append (getcwd) "/lib" ":"
4155 (getenv "PERL5LIB")))
4156 (zero? (length (filter (lambda (file)
4157 (display file)(display "\n")
4158 (not (zero? (system* "perl" file))))
4159 (find-files "t" ".*\\.t$"))))))
4160 (replace 'install
4161 ;; There is no 'install' target in the Makefile.
4162 (lambda* (#:key outputs #:allow-other-keys)
4163 (let* ((out (assoc-ref outputs "out"))
4164 (bin (string-append out "/bin"))
4165 (perl (string-append out "/lib/perl5/site_perl"))
4166 (roary-plots "contrib/roary_plots"))
4167 (mkdir-p bin)
4168 (mkdir-p perl)
4169 (copy-recursively "bin" bin)
4170 (copy-recursively "lib" perl)
4171 #t)))
4172 (add-after 'install 'wrap-programs
4173 (lambda* (#:key inputs outputs #:allow-other-keys)
4174 (let* ((out (assoc-ref outputs "out"))
4175 (perl5lib (getenv "PERL5LIB"))
4176 (path (getenv "PATH")))
4177 (for-each (lambda (prog)
4178 (let ((binary (string-append out "/" prog)))
4179 (wrap-program binary
4180 `("PERL5LIB" ":" prefix
4181 (,(string-append perl5lib ":" out
4182 "/lib/perl5/site_perl"))))
4183 (wrap-program binary
4184 `("PATH" ":" prefix
4185 (,(string-append path ":" out "/bin"))))))
4186 (find-files "bin" ".*[^R]$"))
4187 (let ((file
4188 (string-append out "/bin/roary-create_pan_genome_plots.R"))
4189 (r-site-lib (getenv "R_LIBS_SITE"))
4190 (coreutils-path
4191 (string-append (assoc-ref inputs "coreutils") "/bin")))
4192 (wrap-program file
4193 `("R_LIBS_SITE" ":" prefix
4194 (,(string-append r-site-lib ":" out "/site-library/"))))
4195 (wrap-program file
4196 `("PATH" ":" prefix
4197 (,(string-append coreutils-path ":" out "/bin"))))))
4198 #t)))))
4199 (native-inputs
4200 `(("perl-env-path" ,perl-env-path)
4201 ("perl-test-files" ,perl-test-files)
4202 ("perl-test-most" ,perl-test-most)
4203 ("perl-test-output" ,perl-test-output)))
4204 (inputs
4205 `(("perl-array-utils" ,perl-array-utils)
4206 ("bioperl" ,bioperl-minimal)
4207 ("perl-digest-md5-file" ,perl-digest-md5-file)
4208 ("perl-exception-class" ,perl-exception-class)
4209 ("perl-file-find-rule" ,perl-file-find-rule)
4210 ("perl-file-grep" ,perl-file-grep)
4211 ("perl-file-slurper" ,perl-file-slurper)
4212 ("perl-file-which" ,perl-file-which)
4213 ("perl-graph" ,perl-graph)
4214 ("perl-graph-readwrite" ,perl-graph-readwrite)
4215 ("perl-log-log4perl" ,perl-log-log4perl)
4216 ("perl-moose" ,perl-moose)
4217 ("perl-perlio-utf8_strict" ,perl-perlio-utf8_strict)
4218 ("perl-text-csv" ,perl-text-csv)
4219 ("bedtools" ,bedtools)
4220 ("cd-hit" ,cd-hit)
4221 ("blast+" ,blast+)
4222 ("mcl" ,mcl)
4223 ("parallel" ,parallel)
4224 ("prank" ,prank)
4225 ("mafft" ,mafft)
4226 ("fasttree" ,fasttree)
4227 ("grep" ,grep)
4228 ("sed" ,sed)
4229 ("gawk" ,gawk)
4230 ("r-minimal" ,r-minimal)
4231 ("r-ggplot2" ,r-ggplot2)
4232 ("coreutils" ,coreutils)))
4233 (home-page "http://sanger-pathogens.github.io/Roary")
4234 (synopsis "High speed stand-alone pan genome pipeline")
4235 (description
4236 "Roary is a high speed stand alone pan genome pipeline, which takes
4237 annotated assemblies in GFF3 format (produced by the Prokka program) and
4238 calculates the pan genome. Using a standard desktop PC, it can analyse
4239 datasets with thousands of samples, without compromising the quality of the
4240 results. 128 samples can be analysed in under 1 hour using 1 GB of RAM and a
4241 single processor. Roary is not intended for metagenomics or for comparing
4242 extremely diverse sets of genomes.")
4243 (license license:gpl3)))
4244
4245 (define-public raxml
4246 (package
4247 (name "raxml")
4248 (version "8.2.10")
4249 (source
4250 (origin
4251 (method url-fetch)
4252 (uri
4253 (string-append
4254 "https://github.com/stamatak/standard-RAxML/archive/v"
4255 version ".tar.gz"))
4256 (file-name (string-append name "-" version ".tar.gz"))
4257 (sha256
4258 (base32
4259 "13s7aspfdcfr6asynwdg1x6vznys6pzap5f8wsffbnnwpkkg9ya8"))))
4260 (build-system gnu-build-system)
4261 (arguments
4262 `(#:tests? #f ; There are no tests.
4263 ;; Use 'standard' Makefile rather than SSE or AVX ones.
4264 #:make-flags (list "-f" "Makefile.HYBRID.gcc")
4265 #:phases
4266 (modify-phases %standard-phases
4267 (delete 'configure)
4268 (replace 'install
4269 (lambda* (#:key outputs #:allow-other-keys)
4270 (let* ((out (assoc-ref outputs "out"))
4271 (bin (string-append out "/bin"))
4272 (executable "raxmlHPC-HYBRID"))
4273 (install-file executable bin)
4274 (symlink (string-append bin "/" executable) "raxml"))
4275 #t)))))
4276 (inputs
4277 `(("openmpi" ,openmpi)))
4278 (home-page "http://sco.h-its.org/exelixis/web/software/raxml/index.html")
4279 (synopsis "Randomized Axelerated Maximum Likelihood phylogenetic trees")
4280 (description
4281 "RAxML is a tool for phylogenetic analysis and post-analysis of large
4282 phylogenies.")
4283 ;; The source includes x86 specific code
4284 (supported-systems '("x86_64-linux" "i686-linux"))
4285 (license license:gpl2+)))
4286
4287 (define-public rsem
4288 (package
4289 (name "rsem")
4290 (version "1.2.20")
4291 (source
4292 (origin
4293 (method url-fetch)
4294 (uri
4295 (string-append "http://deweylab.biostat.wisc.edu/rsem/src/rsem-"
4296 version ".tar.gz"))
4297 (sha256
4298 (base32 "0nzdc0j0hjllhsd5f2xli95dafm3nawskigs140xzvjk67xh0r9q"))
4299 (patches (search-patches "rsem-makefile.patch"))
4300 (modules '((guix build utils)))
4301 (snippet
4302 '(begin
4303 ;; remove bundled copy of boost
4304 (delete-file-recursively "boost")
4305 #t))))
4306 (build-system gnu-build-system)
4307 (arguments
4308 `(#:tests? #f ;no "check" target
4309 #:phases
4310 (modify-phases %standard-phases
4311 ;; No "configure" script.
4312 ;; Do not build bundled samtools library.
4313 (replace 'configure
4314 (lambda _
4315 (substitute* "Makefile"
4316 (("^all : sam/libbam.a") "all : "))
4317 #t))
4318 (replace 'install
4319 (lambda* (#:key outputs #:allow-other-keys)
4320 (let* ((out (string-append (assoc-ref outputs "out")))
4321 (bin (string-append out "/bin/"))
4322 (perl (string-append out "/lib/perl5/site_perl")))
4323 (mkdir-p bin)
4324 (mkdir-p perl)
4325 (for-each (lambda (file)
4326 (install-file file bin))
4327 (find-files "." "rsem-.*"))
4328 (install-file "rsem_perl_utils.pm" perl))
4329 #t))
4330 (add-after
4331 'install 'wrap-program
4332 (lambda* (#:key outputs #:allow-other-keys)
4333 (let ((out (assoc-ref outputs "out")))
4334 (for-each (lambda (prog)
4335 (wrap-program (string-append out "/bin/" prog)
4336 `("PERL5LIB" ":" prefix
4337 (,(string-append out "/lib/perl5/site_perl")))))
4338 '("rsem-plot-transcript-wiggles"
4339 "rsem-calculate-expression"
4340 "rsem-generate-ngvector"
4341 "rsem-run-ebseq"
4342 "rsem-prepare-reference")))
4343 #t)))))
4344 (inputs
4345 `(("boost" ,boost)
4346 ("ncurses" ,ncurses)
4347 ("r-minimal" ,r-minimal)
4348 ("perl" ,perl)
4349 ("samtools" ,samtools-0.1)
4350 ("zlib" ,zlib)))
4351 (home-page "http://deweylab.biostat.wisc.edu/rsem/")
4352 (synopsis "Estimate gene expression levels from RNA-Seq data")
4353 (description
4354 "RSEM is a software package for estimating gene and isoform expression
4355 levels from RNA-Seq data. The RSEM package provides a user-friendly
4356 interface, supports threads for parallel computation of the EM algorithm,
4357 single-end and paired-end read data, quality scores, variable-length reads and
4358 RSPD estimation. In addition, it provides posterior mean and 95% credibility
4359 interval estimates for expression levels. For visualization, it can generate
4360 BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.")
4361 (license license:gpl3+)))
4362
4363 (define-public rseqc
4364 (package
4365 (name "rseqc")
4366 (version "2.6.1")
4367 (source
4368 (origin
4369 (method url-fetch)
4370 (uri
4371 (string-append "mirror://sourceforge/rseqc/"
4372 "RSeQC-" version ".tar.gz"))
4373 (sha256
4374 (base32 "15ly0254yi032qzkdplg00q144qfdsd986gh62829rl5bkxhj330"))
4375 (modules '((guix build utils)))
4376 (snippet
4377 '(begin
4378 ;; remove bundled copy of pysam
4379 (delete-file-recursively "lib/pysam")
4380 (substitute* "setup.py"
4381 ;; remove dependency on outdated "distribute" module
4382 (("^from distribute_setup import use_setuptools") "")
4383 (("^use_setuptools\\(\\)") "")
4384 ;; do not use bundled copy of pysam
4385 (("^have_pysam = False") "have_pysam = True"))))))
4386 (build-system python-build-system)
4387 (arguments `(#:python ,python-2))
4388 (inputs
4389 `(("python-cython" ,python2-cython)
4390 ("python-pysam" ,python2-pysam)
4391 ("python-numpy" ,python2-numpy)
4392 ("zlib" ,zlib)))
4393 (native-inputs
4394 `(("python-nose" ,python2-nose)))
4395 (home-page "http://rseqc.sourceforge.net/")
4396 (synopsis "RNA-seq quality control package")
4397 (description
4398 "RSeQC provides a number of modules that can comprehensively evaluate
4399 high throughput sequence data, especially RNA-seq data. Some basic modules
4400 inspect sequence quality, nucleotide composition bias, PCR bias and GC bias,
4401 while RNA-seq specific modules evaluate sequencing saturation, mapped reads
4402 distribution, coverage uniformity, strand specificity, etc.")
4403 (license license:gpl3+)))
4404
4405 (define-public seek
4406 ;; There are no release tarballs. According to the installation
4407 ;; instructions at http://seek.princeton.edu/installation.jsp, the latest
4408 ;; stable release is identified by this changeset ID.
4409 (let ((changeset "2329130")
4410 (revision "1"))
4411 (package
4412 (name "seek")
4413 (version (string-append "0-" revision "." changeset))
4414 (source (origin
4415 (method hg-fetch)
4416 (uri (hg-reference
4417 (url "https://bitbucket.org/libsleipnir/sleipnir")
4418 (changeset changeset)))
4419 (sha256
4420 (base32
4421 "0qrvilwh18dpbhkf92qvxbmay0j75ra3jg2wrhz67gf538zzphsx"))))
4422 (build-system gnu-build-system)
4423 (arguments
4424 `(#:modules ((srfi srfi-1)
4425 (guix build gnu-build-system)
4426 (guix build utils))
4427 #:phases
4428 (let ((dirs '("SeekMiner"
4429 "SeekEvaluator"
4430 "SeekPrep"
4431 "Distancer"
4432 "Data2DB"
4433 "PCL2Bin")))
4434 (modify-phases %standard-phases
4435 (add-before 'configure 'bootstrap
4436 (lambda _
4437 (zero? (system* "bash" "gen_auto"))))
4438 (add-after 'build 'build-additional-tools
4439 (lambda* (#:key make-flags #:allow-other-keys)
4440 (every (lambda (dir)
4441 (with-directory-excursion (string-append "tools/" dir)
4442 (zero? (apply system* "make" make-flags))))
4443 dirs)))
4444 (add-after 'install 'install-additional-tools
4445 (lambda* (#:key make-flags #:allow-other-keys)
4446 (fold (lambda (dir result)
4447 (with-directory-excursion (string-append "tools/" dir)
4448 (and result
4449 (zero? (apply system*
4450 `("make" ,@make-flags "install"))))))
4451 #t dirs)))))))
4452 (inputs
4453 `(("gsl" ,gsl)
4454 ("boost" ,boost)
4455 ("libsvm" ,libsvm)
4456 ("readline" ,readline)
4457 ("gengetopt" ,gengetopt)
4458 ("log4cpp" ,log4cpp)))
4459 (native-inputs
4460 `(("autoconf" ,autoconf)
4461 ("automake" ,automake)
4462 ("perl" ,perl)))
4463 (home-page "http://seek.princeton.edu")
4464 (synopsis "Gene co-expression search engine")
4465 (description
4466 "SEEK is a computational gene co-expression search engine. SEEK provides
4467 biologists with a way to navigate the massive human expression compendium that
4468 now contains thousands of expression datasets. SEEK returns a robust ranking
4469 of co-expressed genes in the biological area of interest defined by the user's
4470 query genes. It also prioritizes thousands of expression datasets according
4471 to the user's query of interest.")
4472 (license license:cc-by3.0))))
4473
4474 (define-public samtools
4475 (package
4476 (name "samtools")
4477 (version "1.5")
4478 (source
4479 (origin
4480 (method url-fetch)
4481 (uri
4482 (string-append "mirror://sourceforge/samtools/samtools/"
4483 version "/samtools-" version ".tar.bz2"))
4484 (sha256
4485 (base32
4486 "1xidmv0jmfy7l0kb32hdnlshcxgzi1hmygvig0cqrq1fhckdlhl5"))))
4487 (build-system gnu-build-system)
4488 (arguments
4489 `(#:modules ((ice-9 ftw)
4490 (ice-9 regex)
4491 (guix build gnu-build-system)
4492 (guix build utils))
4493 #:make-flags (list (string-append "prefix=" (assoc-ref %outputs "out")))
4494 #:configure-flags (list "--with-ncurses" "--with-htslib=system")
4495 #:phases
4496 (modify-phases %standard-phases
4497 (add-after 'unpack 'patch-tests
4498 (lambda _
4499 (substitute* "test/test.pl"
4500 ;; The test script calls out to /bin/bash
4501 (("/bin/bash") (which "bash")))
4502 #t))
4503 (add-after 'install 'install-library
4504 (lambda* (#:key outputs #:allow-other-keys)
4505 (let ((lib (string-append (assoc-ref outputs "out") "/lib")))
4506 (install-file "libbam.a" lib)
4507 #t)))
4508 (add-after 'install 'install-headers
4509 (lambda* (#:key outputs #:allow-other-keys)
4510 (let ((include (string-append (assoc-ref outputs "out")
4511 "/include/samtools/")))
4512 (for-each (lambda (file)
4513 (install-file file include))
4514 (scandir "." (lambda (name) (string-match "\\.h$" name))))
4515 #t))))))
4516 (native-inputs `(("pkg-config" ,pkg-config)))
4517 (inputs
4518 `(("htslib" ,htslib)
4519 ("ncurses" ,ncurses)
4520 ("perl" ,perl)
4521 ("python" ,python)
4522 ("zlib" ,zlib)))
4523 (home-page "http://samtools.sourceforge.net")
4524 (synopsis "Utilities to efficiently manipulate nucleotide sequence alignments")
4525 (description
4526 "Samtools implements various utilities for post-processing nucleotide
4527 sequence alignments in the SAM, BAM, and CRAM formats, including indexing,
4528 variant calling (in conjunction with bcftools), and a simple alignment
4529 viewer.")
4530 (license license:expat)))
4531
4532 (define-public samtools-0.1
4533 ;; This is the most recent version of the 0.1 line of samtools. The input
4534 ;; and output formats differ greatly from that used and produced by samtools
4535 ;; 1.x and is still used in many bioinformatics pipelines.
4536 (package (inherit samtools)
4537 (version "0.1.19")
4538 (source
4539 (origin
4540 (method url-fetch)
4541 (uri
4542 (string-append "mirror://sourceforge/samtools/samtools/"
4543 version "/samtools-" version ".tar.bz2"))
4544 (sha256
4545 (base32 "1m33xsfwz0s8qi45lylagfllqg7fphf4dr0780rsvw75av9wk06h"))))
4546 (arguments
4547 `(#:tests? #f ;no "check" target
4548 ,@(substitute-keyword-arguments (package-arguments samtools)
4549 ((#:make-flags flags)
4550 `(cons "LIBCURSES=-lncurses" ,flags))
4551 ((#:phases phases)
4552 `(modify-phases ,phases
4553 (replace 'install
4554 (lambda* (#:key outputs #:allow-other-keys)
4555 (let ((bin (string-append
4556 (assoc-ref outputs "out") "/bin")))
4557 (mkdir-p bin)
4558 (install-file "samtools" bin)
4559 #t)))
4560 (delete 'patch-tests)
4561 (delete 'configure))))))))
4562
4563 (define-public mosaik
4564 (let ((commit "5c25216d3522d6a33e53875cd76a6d65001e4e67"))
4565 (package
4566 (name "mosaik")
4567 (version "2.2.30")
4568 (source (origin
4569 ;; There are no release tarballs nor tags.
4570 (method git-fetch)
4571 (uri (git-reference
4572 (url "https://github.com/wanpinglee/MOSAIK.git")
4573 (commit commit)))
4574 (file-name (string-append name "-" version))
4575 (sha256
4576 (base32
4577 "17gj3s07cm77r41z92awh0bim7w7q7fbn0sf5nkqmcm1vw052qgw"))))
4578 (build-system gnu-build-system)
4579 (arguments
4580 `(#:tests? #f ; no tests
4581 #:make-flags (list "CC=gcc")
4582 #:phases
4583 (modify-phases %standard-phases
4584 (replace 'configure
4585 (lambda _ (chdir "src") #t))
4586 (replace 'install
4587 (lambda* (#:key outputs #:allow-other-keys)
4588 (let ((bin (string-append (assoc-ref outputs "out")
4589 "/bin")))
4590 (mkdir-p bin)
4591 (copy-recursively "../bin" bin)
4592 #t))))))
4593 (inputs
4594 `(("perl" ,perl)
4595 ("zlib" ,zlib)))
4596 (supported-systems '("x86_64-linux"))
4597 (home-page "https://github.com/wanpinglee/MOSAIK")
4598 (synopsis "Map nucleotide sequence reads to reference genomes")
4599 (description
4600 "MOSAIK is a program for mapping second and third-generation sequencing
4601 reads to a reference genome. MOSAIK can align reads generated by all the
4602 major sequencing technologies, including Illumina, Applied Biosystems SOLiD,
4603 Roche 454, Ion Torrent and Pacific BioSciences SMRT.")
4604 ;; MOSAIK is released under the GPLv2+ with the exception of third-party
4605 ;; code released into the public domain:
4606 ;; 1. fastlz by Ariya Hidayat - http://www.fastlz.org/
4607 ;; 2. MD5 implementation - RSA Data Security, RFC 1321
4608 (license (list license:gpl2+ license:public-domain)))))
4609
4610 (define-public ngs-sdk
4611 (package
4612 (name "ngs-sdk")
4613 (version "1.3.0")
4614 (source
4615 (origin
4616 (method url-fetch)
4617 (uri
4618 (string-append "https://github.com/ncbi/ngs/archive/"
4619 version ".tar.gz"))
4620 (file-name (string-append name "-" version ".tar.gz"))
4621 (sha256
4622 (base32
4623 "1wiyf4c6nm2j87pv015cbi0qny5byf3pbvcw3likifz5dl56ag40"))))
4624 (build-system gnu-build-system)
4625 (arguments
4626 `(#:parallel-build? #f ; not supported
4627 #:tests? #f ; no "check" target
4628 #:phases
4629 (modify-phases %standard-phases
4630 (replace 'configure
4631 (lambda* (#:key outputs #:allow-other-keys)
4632 (let ((out (assoc-ref outputs "out")))
4633 ;; Allow 'konfigure.perl' to find 'package.prl'.
4634 (setenv "PERL5LIB"
4635 (string-append ".:" (getenv "PERL5LIB")))
4636
4637 ;; The 'configure' script doesn't recognize things like
4638 ;; '--enable-fast-install'.
4639 (zero? (system* "./configure"
4640 (string-append "--build-prefix=" (getcwd) "/build")
4641 (string-append "--prefix=" out))))))
4642 (add-after 'unpack 'enter-dir
4643 (lambda _ (chdir "ngs-sdk") #t)))))
4644 (native-inputs `(("perl" ,perl)))
4645 ;; According to the test
4646 ;; unless ($MARCH =~ /x86_64/i || $MARCH =~ /i?86/i)
4647 ;; in ngs-sdk/setup/konfigure.perl
4648 (supported-systems '("i686-linux" "x86_64-linux"))
4649 (home-page "https://github.com/ncbi/ngs")
4650 (synopsis "API for accessing Next Generation Sequencing data")
4651 (description
4652 "NGS is a domain-specific API for accessing reads, alignments and pileups
4653 produced from Next Generation Sequencing. The API itself is independent from
4654 any particular back-end implementation, and supports use of multiple back-ends
4655 simultaneously.")
4656 (license license:public-domain)))
4657
4658 (define-public java-ngs
4659 (package (inherit ngs-sdk)
4660 (name "java-ngs")
4661 (arguments
4662 `(,@(substitute-keyword-arguments
4663 `(#:modules ((guix build gnu-build-system)
4664 (guix build utils)
4665 (srfi srfi-1)
4666 (srfi srfi-26))
4667 ,@(package-arguments ngs-sdk))
4668 ((#:phases phases)
4669 `(modify-phases ,phases
4670 (replace 'enter-dir (lambda _ (chdir "ngs-java") #t)))))))
4671 (inputs
4672 `(("jdk" ,icedtea "jdk")
4673 ("ngs-sdk" ,ngs-sdk)))
4674 (synopsis "Java bindings for NGS SDK")))
4675
4676 (define-public ncbi-vdb
4677 (package
4678 (name "ncbi-vdb")
4679 (version "2.8.2")
4680 (source
4681 (origin
4682 (method url-fetch)
4683 (uri
4684 (string-append "https://github.com/ncbi/ncbi-vdb/archive/"
4685 version ".tar.gz"))
4686 (file-name (string-append name "-" version ".tar.gz"))
4687 (sha256
4688 (base32
4689 "1acn4bv81mfl137qnbn9995mjjhwd36pm0b7qli1iw5skrxa9j8m"))))
4690 (build-system gnu-build-system)
4691 (arguments
4692 `(#:parallel-build? #f ; not supported
4693 #:tests? #f ; no "check" target
4694 #:phases
4695 (modify-phases %standard-phases
4696 (add-before 'configure 'set-perl-search-path
4697 (lambda _
4698 ;; Work around "dotless @INC" build failure.
4699 (setenv "PERL5LIB"
4700 (string-append (getcwd) "/setup:"
4701 (getenv "PERL5LIB")))
4702 #t))
4703 (replace 'configure
4704 (lambda* (#:key inputs outputs #:allow-other-keys)
4705 (let ((out (assoc-ref outputs "out")))
4706 ;; Override include path for libmagic
4707 (substitute* "setup/package.prl"
4708 (("name => 'magic', Include => '/usr/include'")
4709 (string-append "name=> 'magic', Include => '"
4710 (assoc-ref inputs "libmagic")
4711 "/include" "'")))
4712
4713 ;; Install kdf5 library (needed by sra-tools)
4714 (substitute* "build/Makefile.install"
4715 (("LIBRARIES_TO_INSTALL =")
4716 "LIBRARIES_TO_INSTALL = kdf5.$(VERSION_LIBX) kdf5.$(VERSION_SHLX)"))
4717
4718 (substitute* "build/Makefile.env"
4719 (("CFLAGS =" prefix)
4720 (string-append prefix "-msse2 ")))
4721
4722 ;; Override search path for ngs-java
4723 (substitute* "setup/package.prl"
4724 (("/usr/local/ngs/ngs-java")
4725 (assoc-ref inputs "java-ngs")))
4726
4727 ;; The 'configure' script doesn't recognize things like
4728 ;; '--enable-fast-install'.
4729 (zero? (system*
4730 "./configure"
4731 (string-append "--build-prefix=" (getcwd) "/build")
4732 (string-append "--prefix=" (assoc-ref outputs "out"))
4733 (string-append "--debug")
4734 (string-append "--with-xml2-prefix="
4735 (assoc-ref inputs "libxml2"))
4736 (string-append "--with-ngs-sdk-prefix="
4737 (assoc-ref inputs "ngs-sdk"))
4738 (string-append "--with-hdf5-prefix="
4739 (assoc-ref inputs "hdf5")))))))
4740 (add-after 'install 'install-interfaces
4741 (lambda* (#:key outputs #:allow-other-keys)
4742 ;; Install interface libraries. On i686 the interface libraries
4743 ;; are installed to "linux/gcc/i386", so we need to use the Linux
4744 ;; architecture name ("i386") instead of the target system prefix
4745 ;; ("i686").
4746 (mkdir (string-append (assoc-ref outputs "out") "/ilib"))
4747 (copy-recursively (string-append "build/ncbi-vdb/linux/gcc/"
4748 ,(system->linux-architecture
4749 (or (%current-target-system)
4750 (%current-system)))
4751 "/rel/ilib")
4752 (string-append (assoc-ref outputs "out")
4753 "/ilib"))
4754 ;; Install interface headers
4755 (copy-recursively "interfaces"
4756 (string-append (assoc-ref outputs "out")
4757 "/include"))
4758 #t))
4759 ;; These files are needed by sra-tools.
4760 (add-after 'install 'install-configuration-files
4761 (lambda* (#:key outputs #:allow-other-keys)
4762 (let ((target (string-append (assoc-ref outputs "out") "/kfg")))
4763 (mkdir target)
4764 (install-file "libs/kfg/default.kfg" target)
4765 (install-file "libs/kfg/certs.kfg" target))
4766 #t)))))
4767 (inputs
4768 `(("libxml2" ,libxml2)
4769 ("ngs-sdk" ,ngs-sdk)
4770 ("java-ngs" ,java-ngs)
4771 ("libmagic" ,file)
4772 ("hdf5" ,hdf5)))
4773 (native-inputs `(("perl" ,perl)))
4774 ;; NCBI-VDB requires SSE capability.
4775 (supported-systems '("i686-linux" "x86_64-linux"))
4776 (home-page "https://github.com/ncbi/ncbi-vdb")
4777 (synopsis "Database engine for genetic information")
4778 (description
4779 "The NCBI-VDB library implements a highly compressed columnar data
4780 warehousing engine that is most often used to store genetic information.
4781 Databases are stored in a portable image within the file system, and can be
4782 accessed/downloaded on demand across HTTP.")
4783 (license license:public-domain)))
4784
4785 (define-public plink
4786 (package
4787 (name "plink")
4788 (version "1.07")
4789 (source
4790 (origin
4791 (method url-fetch)
4792 (uri (string-append
4793 "http://pngu.mgh.harvard.edu/~purcell/plink/dist/plink-"
4794 version "-src.zip"))
4795 (sha256
4796 (base32 "0as8gxm4pjyc8dxmm1sl873rrd7wn5qs0l29nqfnl31x8i467xaa"))
4797 (patches (search-patches "plink-1.07-unclobber-i.patch"
4798 "plink-endian-detection.patch"))))
4799 (build-system gnu-build-system)
4800 (arguments
4801 '(#:tests? #f ;no "check" target
4802 #:make-flags (list (string-append "LIB_LAPACK="
4803 (assoc-ref %build-inputs "lapack")
4804 "/lib/liblapack.so")
4805 "WITH_LAPACK=1"
4806 "FORCE_DYNAMIC=1"
4807 ;; disable phoning home
4808 "WITH_WEBCHECK=")
4809 #:phases
4810 (modify-phases %standard-phases
4811 ;; no "configure" script
4812 (delete 'configure)
4813 (replace 'install
4814 (lambda* (#:key outputs #:allow-other-keys)
4815 (let ((bin (string-append (assoc-ref outputs "out")
4816 "/bin/")))
4817 (install-file "plink" bin)
4818 #t))))))
4819 (inputs
4820 `(("zlib" ,zlib)
4821 ("lapack" ,lapack)))
4822 (native-inputs
4823 `(("unzip" ,unzip)))
4824 (home-page "http://pngu.mgh.harvard.edu/~purcell/plink/")
4825 (synopsis "Whole genome association analysis toolset")
4826 (description
4827 "PLINK is a whole genome association analysis toolset, designed to
4828 perform a range of basic, large-scale analyses in a computationally efficient
4829 manner. The focus of PLINK is purely on analysis of genotype/phenotype data,
4830 so there is no support for steps prior to this (e.g. study design and
4831 planning, generating genotype or CNV calls from raw data). Through
4832 integration with gPLINK and Haploview, there is some support for the
4833 subsequent visualization, annotation and storage of results.")
4834 ;; Code is released under GPLv2, except for fisher.h, which is under
4835 ;; LGPLv2.1+
4836 (license (list license:gpl2 license:lgpl2.1+))))
4837
4838 (define-public plink-ng
4839 (package (inherit plink)
4840 (name "plink-ng")
4841 (version "1.90b4")
4842 (source
4843 (origin
4844 (method url-fetch)
4845 (uri (string-append "https://github.com/chrchang/plink-ng/archive/v"
4846 version ".tar.gz"))
4847 (file-name (string-append name "-" version ".tar.gz"))
4848 (sha256
4849 (base32 "09ixrds009aczjswxr2alcb774mksq5g0v78dgjjn1h4dky0kf9a"))))
4850 (build-system gnu-build-system)
4851 (arguments
4852 '(#:tests? #f ;no "check" target
4853 #:make-flags (list "BLASFLAGS=-llapack -lopenblas"
4854 "CFLAGS=-Wall -O2 -DDYNAMIC_ZLIB=1"
4855 "ZLIB=-lz"
4856 "-f" "Makefile.std")
4857 #:phases
4858 (modify-phases %standard-phases
4859 (add-after 'unpack 'chdir
4860 (lambda _ (chdir "1.9") #t))
4861 (delete 'configure) ; no "configure" script
4862 (replace 'install
4863 (lambda* (#:key outputs #:allow-other-keys)
4864 (let ((bin (string-append (assoc-ref outputs "out")
4865 "/bin/")))
4866 (install-file "plink" bin)
4867 #t))))))
4868 (inputs
4869 `(("zlib" ,zlib)
4870 ("lapack" ,lapack)
4871 ("openblas" ,openblas)))
4872 (home-page "https://www.cog-genomics.org/plink/")
4873 (license license:gpl3+)))
4874
4875 (define-public smithlab-cpp
4876 (let ((revision "1")
4877 (commit "728a097bec88c6f4b8528b685932049e660eff2e"))
4878 (package
4879 (name "smithlab-cpp")
4880 (version (string-append "0." revision "." (string-take commit 7)))
4881 (source (origin
4882 (method git-fetch)
4883 (uri (git-reference
4884 (url "https://github.com/smithlabcode/smithlab_cpp.git")
4885 (commit commit)))
4886 (file-name (string-append name "-" version "-checkout"))
4887 (sha256
4888 (base32
4889 "0d476lmj312xk77kr9fzrv7z1bv96yfyx0w7y62ycmnfbx32ll74"))))
4890 (build-system gnu-build-system)
4891 (arguments
4892 `(#:modules ((guix build gnu-build-system)
4893 (guix build utils)
4894 (srfi srfi-26))
4895 #:tests? #f ;no "check" target
4896 #:phases
4897 (modify-phases %standard-phases
4898 (add-after 'unpack 'use-samtools-headers
4899 (lambda _
4900 (substitute* '("SAM.cpp"
4901 "SAM.hpp")
4902 (("sam.h") "samtools/sam.h"))
4903 #t))
4904 (replace 'install
4905 (lambda* (#:key outputs #:allow-other-keys)
4906 (let* ((out (assoc-ref outputs "out"))
4907 (lib (string-append out "/lib"))
4908 (include (string-append out "/include/smithlab-cpp")))
4909 (mkdir-p lib)
4910 (mkdir-p include)
4911 (for-each (cut install-file <> lib)
4912 (find-files "." "\\.o$"))
4913 (for-each (cut install-file <> include)
4914 (find-files "." "\\.hpp$")))
4915 #t))
4916 (delete 'configure))))
4917 (inputs
4918 `(("samtools" ,samtools-0.1)
4919 ("zlib" ,zlib)))
4920 (home-page "https://github.com/smithlabcode/smithlab_cpp")
4921 (synopsis "C++ helper library for functions used in Smith lab projects")
4922 (description
4923 "Smithlab CPP is a C++ library that includes functions used in many of
4924 the Smith lab bioinformatics projects, such as a wrapper around Samtools data
4925 structures, classes for genomic regions, mapped sequencing reads, etc.")
4926 (license license:gpl3+))))
4927
4928 (define-public preseq
4929 (package
4930 (name "preseq")
4931 (version "2.0")
4932 (source (origin
4933 (method url-fetch)
4934 (uri (string-append "https://github.com/smithlabcode/"
4935 "preseq/archive/v" version ".tar.gz"))
4936 (file-name (string-append name "-" version ".tar.gz"))
4937 (sha256
4938 (base32 "08r684l50pnxjpvmhzjgqq56yv9rfw90k8vx0nsrnrzk8mf9hsdq"))
4939 (modules '((guix build utils)))
4940 (snippet
4941 ;; Remove bundled samtools.
4942 '(delete-file-recursively "samtools"))))
4943 (build-system gnu-build-system)
4944 (arguments
4945 `(#:tests? #f ;no "check" target
4946 #:phases
4947 (modify-phases %standard-phases
4948 (delete 'configure))
4949 #:make-flags
4950 (list (string-append "PREFIX="
4951 (assoc-ref %outputs "out"))
4952 (string-append "LIBBAM="
4953 (assoc-ref %build-inputs "samtools")
4954 "/lib/libbam.a")
4955 (string-append "SMITHLAB_CPP="
4956 (assoc-ref %build-inputs "smithlab-cpp")
4957 "/lib")
4958 "PROGS=preseq"
4959 "INCLUDEDIRS=$(SMITHLAB_CPP)/../include/smithlab-cpp $(SAMTOOLS_DIR)")))
4960 (inputs
4961 `(("gsl" ,gsl)
4962 ("samtools" ,samtools-0.1)
4963 ("smithlab-cpp" ,smithlab-cpp)
4964 ("zlib" ,zlib)))
4965 (home-page "http://smithlabresearch.org/software/preseq/")
4966 (synopsis "Program for analyzing library complexity")
4967 (description
4968 "The preseq package is aimed at predicting and estimating the complexity
4969 of a genomic sequencing library, equivalent to predicting and estimating the
4970 number of redundant reads from a given sequencing depth and how many will be
4971 expected from additional sequencing using an initial sequencing experiment.
4972 The estimates can then be used to examine the utility of further sequencing,
4973 optimize the sequencing depth, or to screen multiple libraries to avoid low
4974 complexity samples.")
4975 (license license:gpl3+)))
4976
4977 (define-public python-screed
4978 (package
4979 (name "python-screed")
4980 (version "0.9")
4981 (source
4982 (origin
4983 (method url-fetch)
4984 (uri (pypi-uri "screed" version))
4985 (sha256
4986 (base32
4987 "18czszp9fkx3j6jr7y5kp6dfialscgddk05mw1zkhh2zhn0jd8i0"))))
4988 (build-system python-build-system)
4989 (arguments
4990 `(#:phases
4991 (modify-phases %standard-phases
4992 (replace 'check
4993 (lambda _
4994 (setenv "PYTHONPATH"
4995 (string-append (getenv "PYTHONPATH") ":."))
4996 (zero? (system* "nosetests" "--attr" "!known_failing")))))))
4997 (native-inputs
4998 `(("python-nose" ,python-nose)))
4999 (inputs
5000 `(("python-bz2file" ,python-bz2file)))
5001 (home-page "https://github.com/dib-lab/screed/")
5002 (synopsis "Short read sequence database utilities")
5003 (description "Screed parses FASTA and FASTQ files and generates databases.
5004 Values such as sequence name, sequence description, sequence quality and the
5005 sequence itself can be retrieved from these databases.")
5006 (license license:bsd-3)))
5007
5008 (define-public python2-screed
5009 (package-with-python2 python-screed))
5010
5011 (define-public sra-tools
5012 (package
5013 (name "sra-tools")
5014 (version "2.8.2-1")
5015 (source
5016 (origin
5017 (method url-fetch)
5018 (uri
5019 (string-append "https://github.com/ncbi/sra-tools/archive/"
5020 version ".tar.gz"))
5021 (file-name (string-append name "-" version ".tar.gz"))
5022 (sha256
5023 (base32
5024 "1camsijmvv2s45mb4iyf44ghl4gkd4rl0viphpcgl3ccchy32a0g"))))
5025 (build-system gnu-build-system)
5026 (arguments
5027 `(#:parallel-build? #f ; not supported
5028 #:tests? #f ; no "check" target
5029 #:make-flags
5030 (list (string-append "DEFAULT_CRT="
5031 (assoc-ref %build-inputs "ncbi-vdb")
5032 "/kfg/certs.kfg")
5033 (string-append "DEFAULT_KFG="
5034 (assoc-ref %build-inputs "ncbi-vdb")
5035 "/kfg/default.kfg")
5036 (string-append "VDB_LIBDIR="
5037 (assoc-ref %build-inputs "ncbi-vdb")
5038 ,(if (string-prefix? "x86_64"
5039 (or (%current-target-system)
5040 (%current-system)))
5041 "/lib64"
5042 "/lib32")))
5043 #:phases
5044 (modify-phases %standard-phases
5045 (add-before 'configure 'set-perl-search-path
5046 (lambda _
5047 ;; Work around "dotless @INC" build failure.
5048 (setenv "PERL5LIB"
5049 (string-append (getcwd) "/setup:"
5050 (getenv "PERL5LIB")))
5051 #t))
5052 (replace 'configure
5053 (lambda* (#:key inputs outputs #:allow-other-keys)
5054 ;; The build system expects a directory containing the sources and
5055 ;; raw build output of ncbi-vdb, including files that are not
5056 ;; installed. Since we are building against an installed version of
5057 ;; ncbi-vdb, the following modifications are needed.
5058 (substitute* "setup/konfigure.perl"
5059 ;; Make the configure script look for the "ilib" directory of
5060 ;; "ncbi-vdb" without first checking for the existence of a
5061 ;; matching library in its "lib" directory.
5062 (("^ my \\$f = File::Spec->catdir\\(\\$libdir, \\$lib\\);")
5063 "my $f = File::Spec->catdir($ilibdir, $ilib);")
5064 ;; Look for interface libraries in ncbi-vdb's "ilib" directory.
5065 (("my \\$ilibdir = File::Spec->catdir\\(\\$builddir, 'ilib'\\);")
5066 "my $ilibdir = File::Spec->catdir($dir, 'ilib');"))
5067
5068 ;; Dynamic linking
5069 (substitute* "tools/copycat/Makefile"
5070 (("smagic-static") "lmagic"))
5071
5072 ;; The 'configure' script doesn't recognize things like
5073 ;; '--enable-fast-install'.
5074 (zero? (system*
5075 "./configure"
5076 (string-append "--build-prefix=" (getcwd) "/build")
5077 (string-append "--prefix=" (assoc-ref outputs "out"))
5078 (string-append "--debug")
5079 (string-append "--with-fuse-prefix="
5080 (assoc-ref inputs "fuse"))
5081 (string-append "--with-magic-prefix="
5082 (assoc-ref inputs "libmagic"))
5083 ;; TODO: building with libxml2 fails with linker errors
5084 ;; (string-append "--with-xml2-prefix="
5085 ;; (assoc-ref inputs "libxml2"))
5086 (string-append "--with-ncbi-vdb-sources="
5087 (assoc-ref inputs "ncbi-vdb"))
5088 (string-append "--with-ncbi-vdb-build="
5089 (assoc-ref inputs "ncbi-vdb"))
5090 (string-append "--with-ngs-sdk-prefix="
5091 (assoc-ref inputs "ngs-sdk"))
5092 (string-append "--with-hdf5-prefix="
5093 (assoc-ref inputs "hdf5"))))))
5094 ;; This version of sra-tools fails to build with glibc because of a
5095 ;; naming conflict. glibc-2.25/include/bits/mathcalls.h already
5096 ;; contains a definition of "canonicalize", so we rename it.
5097 ;;
5098 ;; See upstream bug report:
5099 ;; https://github.com/ncbi/sra-tools/issues/67
5100 (add-after 'unpack 'patch-away-glibc-conflict
5101 (lambda _
5102 (substitute* "tools/bam-loader/bam.c"
5103 (("canonicalize\\(" line)
5104 (string-append "sra_tools_" line)))
5105 #t)))))
5106 (native-inputs `(("perl" ,perl)))
5107 (inputs
5108 `(("ngs-sdk" ,ngs-sdk)
5109 ("ncbi-vdb" ,ncbi-vdb)
5110 ("libmagic" ,file)
5111 ("fuse" ,fuse)
5112 ("hdf5" ,hdf5)
5113 ("zlib" ,zlib)))
5114 (home-page "http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software")
5115 (synopsis "Tools and libraries for reading and writing sequencing data")
5116 (description
5117 "The SRA Toolkit from NCBI is a collection of tools and libraries for
5118 reading of sequencing files from the Sequence Read Archive (SRA) database and
5119 writing files into the .sra format.")
5120 (license license:public-domain)))
5121
5122 (define-public seqan
5123 (package
5124 (name "seqan")
5125 (version "1.4.2")
5126 (source (origin
5127 (method url-fetch)
5128 (uri (string-append "http://packages.seqan.de/seqan-library/"
5129 "seqan-library-" version ".tar.bz2"))
5130 (sha256
5131 (base32
5132 "05s3wrrwn50f81aklfm65i4a749zag1vr8z03k21xm0pdxy47yvp"))))
5133 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
5134 ;; makes sense to split the outputs.
5135 (outputs '("out" "doc"))
5136 (build-system trivial-build-system)
5137 (arguments
5138 `(#:modules ((guix build utils))
5139 #:builder
5140 (begin
5141 (use-modules (guix build utils))
5142 (let ((tar (assoc-ref %build-inputs "tar"))
5143 (bzip (assoc-ref %build-inputs "bzip2"))
5144 (out (assoc-ref %outputs "out"))
5145 (doc (assoc-ref %outputs "doc")))
5146 (setenv "PATH" (string-append tar "/bin:" bzip "/bin"))
5147 (system* "tar" "xvf" (assoc-ref %build-inputs "source"))
5148 (chdir (string-append "seqan-library-" ,version))
5149 (copy-recursively "include" (string-append out "/include"))
5150 (copy-recursively "share" (string-append doc "/share"))))))
5151 (native-inputs
5152 `(("source" ,source)
5153 ("tar" ,tar)
5154 ("bzip2" ,bzip2)))
5155 (home-page "http://www.seqan.de")
5156 (synopsis "Library for nucleotide sequence analysis")
5157 (description
5158 "SeqAn is a C++ library of efficient algorithms and data structures for
5159 the analysis of sequences with the focus on biological data. It contains
5160 algorithms and data structures for string representation and their
5161 manipulation, online and indexed string search, efficient I/O of
5162 bioinformatics file formats, sequence alignment, and more.")
5163 (license license:bsd-3)))
5164
5165 (define-public seqmagick
5166 (package
5167 (name "seqmagick")
5168 (version "0.6.1")
5169 (source
5170 (origin
5171 (method url-fetch)
5172 (uri (string-append
5173 "https://pypi.python.org/packages/source/s/seqmagick/seqmagick-"
5174 version ".tar.gz"))
5175 (sha256
5176 (base32
5177 "0cgn477n74gsl4qdaakrrhi953kcsd4q3ivk2lr18x74s3g4ma1d"))))
5178 (build-system python-build-system)
5179 (arguments
5180 ;; python2 only, see https://github.com/fhcrc/seqmagick/issues/56
5181 `(#:python ,python-2
5182 #:phases
5183 (modify-phases %standard-phases
5184 ;; Current test in setup.py does not work as of 0.6.1,
5185 ;; so use nose to run tests instead for now. See
5186 ;; https://github.com/fhcrc/seqmagick/issues/55
5187 (replace 'check (lambda _ (zero? (system* "nosetests")))))))
5188 (inputs
5189 ;; biopython-1.66 is required due to
5190 ;; https://github.com/fhcrc/seqmagick/issues/59
5191 ;; When that issue is resolved the 'python2-biopython-1.66' package
5192 ;; should be removed.
5193 `(("python-biopython" ,python2-biopython-1.66)))
5194 (native-inputs
5195 `(("python-nose" ,python2-nose)))
5196 (home-page "https://github.com/fhcrc/seqmagick")
5197 (synopsis "Tools for converting and modifying sequence files")
5198 (description
5199 "Bioinformaticians often have to convert sequence files between formats
5200 and do little manipulations on them, and it's not worth writing scripts for
5201 that. Seqmagick is a utility to expose the file format conversion in
5202 BioPython in a convenient way. Instead of having a big mess of scripts, there
5203 is one that takes arguments.")
5204 (license license:gpl3)))
5205
5206 (define-public seqtk
5207 (package
5208 (name "seqtk")
5209 (version "1.2")
5210 (source (origin
5211 (method url-fetch)
5212 (uri (string-append
5213 "https://github.com/lh3/seqtk/archive/v"
5214 version ".tar.gz"))
5215 (file-name (string-append name "-" version ".tar.gz"))
5216 (sha256
5217 (base32
5218 "0ywdyzpmfiz2wp6ampbzqg4y8bj450nfgqarpamg045b8mk32lxx"))
5219 (modules '((guix build utils)))
5220 (snippet
5221 '(begin
5222 ;; Remove extraneous header files, as is done in the seqtk
5223 ;; master branch.
5224 (for-each (lambda (file) (delete-file file))
5225 (list "ksort.h" "kstring.h" "kvec.h"))
5226 #t))))
5227 (build-system gnu-build-system)
5228 (arguments
5229 `(#:phases
5230 (modify-phases %standard-phases
5231 (delete 'configure)
5232 (replace 'check
5233 ;; There are no tests, so we just run a sanity check.
5234 (lambda _ (zero? (system* "./seqtk" "seq"))))
5235 (replace 'install
5236 (lambda* (#:key outputs #:allow-other-keys)
5237 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
5238 (install-file "seqtk" bin)))))))
5239 (inputs
5240 `(("zlib" ,zlib)))
5241 (home-page "https://github.com/lh3/seqtk")
5242 (synopsis "Toolkit for processing biological sequences in FASTA/Q format")
5243 (description
5244 "Seqtk is a fast and lightweight tool for processing sequences in the
5245 FASTA or FASTQ format. It parses both FASTA and FASTQ files which can be
5246 optionally compressed by gzip.")
5247 (license license:expat)))
5248
5249 (define-public snap-aligner
5250 (package
5251 (name "snap-aligner")
5252 (version "1.0beta.18")
5253 (source (origin
5254 (method url-fetch)
5255 (uri (string-append
5256 "https://github.com/amplab/snap/archive/v"
5257 version ".tar.gz"))
5258 (file-name (string-append name "-" version ".tar.gz"))
5259 (sha256
5260 (base32
5261 "1vnsjwv007k1fl1q7d681kbwn6bc66cgw6h16hym6gvyy71qv2ly"))))
5262 (build-system gnu-build-system)
5263 (arguments
5264 '(#:phases
5265 (modify-phases %standard-phases
5266 (delete 'configure)
5267 (replace 'check (lambda _ (zero? (system* "./unit_tests"))))
5268 (replace 'install
5269 (lambda* (#:key outputs #:allow-other-keys)
5270 (let* ((out (assoc-ref outputs "out"))
5271 (bin (string-append out "/bin")))
5272 (install-file "snap-aligner" bin)
5273 (install-file "SNAPCommand" bin)
5274 #t))))))
5275 (native-inputs
5276 `(("zlib" ,zlib)))
5277 (home-page "http://snap.cs.berkeley.edu/")
5278 (synopsis "Short read DNA sequence aligner")
5279 (description
5280 "SNAP is a fast and accurate aligner for short DNA reads. It is
5281 optimized for modern read lengths of 100 bases or higher, and takes advantage
5282 of these reads to align data quickly through a hash-based indexing scheme.")
5283 ;; 32-bit systems are not supported by the unpatched code.
5284 ;; Following the bug reports https://github.com/amplab/snap/issues/68 and
5285 ;; https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=812378 we see that
5286 ;; systems without a lot of memory cannot make good use of this program.
5287 (supported-systems '("x86_64-linux"))
5288 (license license:asl2.0)))
5289
5290 (define-public sortmerna
5291 (package
5292 (name "sortmerna")
5293 (version "2.1b")
5294 (source
5295 (origin
5296 (method url-fetch)
5297 (uri (string-append
5298 "https://github.com/biocore/sortmerna/archive/"
5299 version ".tar.gz"))
5300 (file-name (string-append name "-" version ".tar.gz"))
5301 (sha256
5302 (base32
5303 "1ghaghvd82af9j5adavxh77g7hm247d1r69m3fbi6f1jdivj5ldk"))))
5304 (build-system gnu-build-system)
5305 (outputs '("out" ;for binaries
5306 "db")) ;for sequence databases
5307 (arguments
5308 `(#:phases
5309 (modify-phases %standard-phases
5310 (replace 'install
5311 (lambda* (#:key outputs #:allow-other-keys)
5312 (let* ((out (assoc-ref outputs "out"))
5313 (bin (string-append out "/bin"))
5314 (db (assoc-ref outputs "db"))
5315 (share
5316 (string-append db "/share/sortmerna/rRNA_databases")))
5317 (install-file "sortmerna" bin)
5318 (install-file "indexdb_rna" bin)
5319 (for-each (lambda (file)
5320 (install-file file share))
5321 (find-files "rRNA_databases" ".*fasta"))
5322 #t))))))
5323 (inputs
5324 `(("zlib" ,zlib)))
5325 (home-page "http://bioinfo.lifl.fr/RNA/sortmerna")
5326 (synopsis "Biological sequence analysis tool for NGS reads")
5327 (description
5328 "SortMeRNA is a biological sequence analysis tool for filtering, mapping
5329 and operational taxonomic unit (OTU) picking of next generation
5330 sequencing (NGS) reads. The core algorithm is based on approximate seeds and
5331 allows for fast and sensitive analyses of nucleotide sequences. The main
5332 application of SortMeRNA is filtering rRNA from metatranscriptomic data.")
5333 ;; The source includes x86 specific code
5334 (supported-systems '("x86_64-linux" "i686-linux"))
5335 (license license:lgpl3)))
5336
5337 (define-public star
5338 (package
5339 (name "star")
5340 (version "2.5.3a")
5341 (source (origin
5342 (method url-fetch)
5343 (uri (string-append "https://github.com/alexdobin/STAR/archive/"
5344 version ".tar.gz"))
5345 (file-name (string-append name "-" version ".tar.gz"))
5346 (sha256
5347 (base32
5348 "013wirlz8lllgjyagl48l75n1isxyabqb3sj7qlsl0x1rmvqw99a"))
5349 (modules '((guix build utils)))
5350 (snippet
5351 '(begin
5352 (substitute* "source/Makefile"
5353 (("/bin/rm") "rm"))
5354 ;; Remove pre-built binaries and bundled htslib sources.
5355 (delete-file-recursively "bin/MacOSX_x86_64")
5356 (delete-file-recursively "bin/Linux_x86_64")
5357 (delete-file-recursively "bin/Linux_x86_64_static")
5358 (delete-file-recursively "source/htslib")
5359 #t))))
5360 (build-system gnu-build-system)
5361 (arguments
5362 '(#:tests? #f ;no check target
5363 #:make-flags '("STAR")
5364 #:phases
5365 (modify-phases %standard-phases
5366 (add-after 'unpack 'enter-source-dir
5367 (lambda _ (chdir "source") #t))
5368 (add-after 'enter-source-dir 'do-not-use-bundled-htslib
5369 (lambda _
5370 (substitute* "Makefile"
5371 (("(Depend.list: \\$\\(SOURCES\\) parametersDefault\\.xxd) htslib"
5372 _ prefix) prefix))
5373 (substitute* '("BAMfunctions.cpp"
5374 "signalFromBAM.h"
5375 "bam_cat.h"
5376 "bam_cat.c"
5377 "STAR.cpp"
5378 "bamRemoveDuplicates.cpp")
5379 (("#include \"htslib/([^\"]+\\.h)\"" _ header)
5380 (string-append "#include <" header ">")))
5381 (substitute* "IncludeDefine.h"
5382 (("\"htslib/(htslib/[^\"]+.h)\"" _ header)
5383 (string-append "<" header ">")))
5384 #t))
5385 (replace 'install
5386 (lambda* (#:key outputs #:allow-other-keys)
5387 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
5388 (install-file "STAR" bin))
5389 #t))
5390 (delete 'configure))))
5391 (native-inputs
5392 `(("xxd" ,xxd)))
5393 (inputs
5394 `(("htslib" ,htslib)
5395 ("zlib" ,zlib)))
5396 (home-page "https://github.com/alexdobin/STAR")
5397 (synopsis "Universal RNA-seq aligner")
5398 (description
5399 "The Spliced Transcripts Alignment to a Reference (STAR) software is
5400 based on a previously undescribed RNA-seq alignment algorithm that uses
5401 sequential maximum mappable seed search in uncompressed suffix arrays followed
5402 by seed clustering and stitching procedure. In addition to unbiased de novo
5403 detection of canonical junctions, STAR can discover non-canonical splices and
5404 chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
5405 sequences.")
5406 ;; Only 64-bit systems are supported according to the README.
5407 (supported-systems '("x86_64-linux" "mips64el-linux"))
5408 ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
5409 (license license:gpl3+)))
5410
5411 (define-public subread
5412 (package
5413 (name "subread")
5414 (version "1.5.1")
5415 (source (origin
5416 (method url-fetch)
5417 (uri (string-append "mirror://sourceforge/subread/subread-"
5418 version "/subread-" version "-source.tar.gz"))
5419 (sha256
5420 (base32
5421 "0gn5zhbvllks0mmdg3qlmsbg91p2mpdc2wixwfqpi85yzfrh8hcy"))))
5422 (build-system gnu-build-system)
5423 (arguments
5424 `(#:tests? #f ;no "check" target
5425 ;; The CC and CCFLAGS variables are set to contain a lot of x86_64
5426 ;; optimizations by default, so we override these flags such that x86_64
5427 ;; flags are only added when the build target is an x86_64 system.
5428 #:make-flags
5429 (list (let ((system ,(or (%current-target-system)
5430 (%current-system)))
5431 (flags '("-ggdb" "-fomit-frame-pointer"
5432 "-ffast-math" "-funroll-loops"
5433 "-fmessage-length=0"
5434 "-O9" "-Wall" "-DMAKE_FOR_EXON"
5435 "-DMAKE_STANDALONE"
5436 "-DSUBREAD_VERSION=\\\"${SUBREAD_VERSION}\\\""))
5437 (flags64 '("-mmmx" "-msse" "-msse2" "-msse3")))
5438 (if (string-prefix? "x86_64" system)
5439 (string-append "CCFLAGS=" (string-join (append flags flags64)))
5440 (string-append "CCFLAGS=" (string-join flags))))
5441 "-f" "Makefile.Linux"
5442 "CC=gcc ${CCFLAGS}")
5443 #:phases
5444 (modify-phases %standard-phases
5445 (add-after 'unpack 'enter-dir
5446 (lambda _ (chdir "src") #t))
5447 (replace 'install
5448 (lambda* (#:key outputs #:allow-other-keys)
5449 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
5450 (mkdir-p bin)
5451 (copy-recursively "../bin" bin))))
5452 ;; no "configure" script
5453 (delete 'configure))))
5454 (inputs `(("zlib" ,zlib)))
5455 (home-page "http://bioinf.wehi.edu.au/subread-package/")
5456 (synopsis "Tool kit for processing next-gen sequencing data")
5457 (description
5458 "The subread package contains the following tools: subread aligner, a
5459 general-purpose read aligner; subjunc aligner: detecting exon-exon junctions
5460 and mapping RNA-seq reads; featureCounts: counting mapped reads for genomic
5461 features; exactSNP: a SNP caller that discovers SNPs by testing signals
5462 against local background noises.")
5463 (license license:gpl3+)))
5464
5465 (define-public stringtie
5466 (package
5467 (name "stringtie")
5468 (version "1.2.1")
5469 (source (origin
5470 (method url-fetch)
5471 (uri (string-append "http://ccb.jhu.edu/software/stringtie/dl/"
5472 "stringtie-" version ".tar.gz"))
5473 (sha256
5474 (base32
5475 "1cqllsc1maq4kh92isi8yadgzbmnf042hlnalpk3y59aph1z3bfz"))
5476 (modules '((guix build utils)))
5477 (snippet
5478 '(begin
5479 (delete-file-recursively "samtools-0.1.18")
5480 #t))))
5481 (build-system gnu-build-system)
5482 (arguments
5483 `(#:tests? #f ;no test suite
5484 #:phases
5485 (modify-phases %standard-phases
5486 ;; no configure script
5487 (delete 'configure)
5488 (add-before 'build 'use-system-samtools
5489 (lambda _
5490 (substitute* "Makefile"
5491 (("stringtie: \\$\\{BAM\\}/libbam\\.a")
5492 "stringtie: "))
5493 (substitute* '("gclib/GBam.h"
5494 "gclib/GBam.cpp")
5495 (("#include \"(bam|sam|kstring).h\"" _ header)
5496 (string-append "#include <samtools/" header ".h>")))
5497 #t))
5498 (add-after 'unpack 'remove-duplicate-typedef
5499 (lambda _
5500 ;; This typedef conflicts with the typedef in
5501 ;; glibc-2.25/include/bits/types.h
5502 (substitute* "gclib/GThreads.h"
5503 (("typedef long long __intmax_t;") ""))
5504 #t))
5505 (replace 'install
5506 (lambda* (#:key outputs #:allow-other-keys)
5507 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
5508 (install-file "stringtie" bin)
5509 #t))))))
5510 (inputs
5511 `(("samtools" ,samtools-0.1)
5512 ("zlib" ,zlib)))
5513 (home-page "http://ccb.jhu.edu/software/stringtie/")
5514 (synopsis "Transcript assembly and quantification for RNA-Seq data")
5515 (description
5516 "StringTie is a fast and efficient assembler of RNA-Seq sequence
5517 alignments into potential transcripts. It uses a novel network flow algorithm
5518 as well as an optional de novo assembly step to assemble and quantitate
5519 full-length transcripts representing multiple splice variants for each gene
5520 locus. Its input can include not only the alignments of raw reads used by
5521 other transcript assemblers, but also alignments of longer sequences that have
5522 been assembled from those reads. To identify differentially expressed genes
5523 between experiments, StringTie's output can be processed either by the
5524 Cuffdiff or Ballgown programs.")
5525 (license license:artistic2.0)))
5526
5527 (define-public taxtastic
5528 (package
5529 (name "taxtastic")
5530 (version "0.6.4")
5531 (source (origin
5532 (method url-fetch)
5533 (uri (pypi-uri "taxtastic" version))
5534 (sha256
5535 (base32
5536 "0s79z8kfl853x7l4h8ms05k31q87aw62nrchlk20w9n227j35929"))))
5537 (build-system python-build-system)
5538 (arguments
5539 `(#:python ,python-2
5540 #:phases
5541 (modify-phases %standard-phases
5542 (replace 'check
5543 (lambda _
5544 (zero? (system* "python" "-m" "unittest" "discover" "-v")))))))
5545 (propagated-inputs
5546 `(("python-sqlalchemy" ,python2-sqlalchemy)
5547 ("python-decorator" ,python2-decorator)
5548 ("python-biopython" ,python2-biopython)
5549 ("python-pandas" ,python2-pandas)))
5550 (home-page "https://github.com/fhcrc/taxtastic")
5551 (synopsis "Tools for taxonomic naming and annotation")
5552 (description
5553 "Taxtastic is software written in python used to build and maintain
5554 reference packages i.e. collections of reference trees, reference alignments,
5555 profiles, and associated taxonomic information.")
5556 (license license:gpl3+)))
5557
5558 (define-public vcftools
5559 (package
5560 (name "vcftools")
5561 (version "0.1.15")
5562 (source (origin
5563 (method url-fetch)
5564 (uri (string-append
5565 "https://github.com/vcftools/vcftools/releases/download/v"
5566 version "/vcftools-" version ".tar.gz"))
5567 (sha256
5568 (base32
5569 "1qw30c45wihgy632rbz4rh3njnwj4msj46l1rsgdhyg6bgypmr1i"))))
5570 (build-system gnu-build-system)
5571 (arguments
5572 `(#:tests? #f ; no "check" target
5573 #:make-flags (list
5574 "CFLAGS=-O2" ; override "-m64" flag
5575 (string-append "PREFIX=" (assoc-ref %outputs "out"))
5576 (string-append "MANDIR=" (assoc-ref %outputs "out")
5577 "/share/man/man1"))))
5578 (native-inputs
5579 `(("pkg-config" ,pkg-config)))
5580 (inputs
5581 `(("perl" ,perl)
5582 ("zlib" ,zlib)))
5583 (home-page "https://vcftools.github.io/")
5584 (synopsis "Tools for working with VCF files")
5585 (description
5586 "VCFtools is a program package designed for working with VCF files, such
5587 as those generated by the 1000 Genomes Project. The aim of VCFtools is to
5588 provide easily accessible methods for working with complex genetic variation
5589 data in the form of VCF files.")
5590 ;; The license is declared as LGPLv3 in the README and
5591 ;; at https://vcftools.github.io/license.html
5592 (license license:lgpl3)))
5593
5594 (define-public infernal
5595 (package
5596 (name "infernal")
5597 (version "1.1.2")
5598 (source (origin
5599 (method url-fetch)
5600 (uri (string-append "http://eddylab.org/software/infernal/"
5601 "infernal-" version ".tar.gz"))
5602 (sha256
5603 (base32
5604 "0sr2hiz3qxfwqpz3whxr6n82p3x27336v3f34iqznp10hks2935c"))))
5605 (build-system gnu-build-system)
5606 (native-inputs
5607 `(("perl" ,perl))) ; for tests
5608 (home-page "http://eddylab.org/infernal/")
5609 (synopsis "Inference of RNA alignments")
5610 (description "Infernal (\"INFERence of RNA ALignment\") is a tool for
5611 searching DNA sequence databases for RNA structure and sequence similarities.
5612 It is an implementation of a special case of profile stochastic context-free
5613 grammars called @dfn{covariance models} (CMs). A CM is like a sequence
5614 profile, but it scores a combination of sequence consensus and RNA secondary
5615 structure consensus, so in many cases, it is more capable of identifying RNA
5616 homologs that conserve their secondary structure more than their primary
5617 sequence.")
5618 ;; Infernal 1.1.2 requires VMX or SSE capability for parallel instructions.
5619 (supported-systems '("i686-linux" "x86_64-linux"))
5620 (license license:bsd-3)))
5621
5622 (define-public r-centipede
5623 (package
5624 (name "r-centipede")
5625 (version "1.2")
5626 (source (origin
5627 (method url-fetch)
5628 (uri (string-append "http://download.r-forge.r-project.org/"
5629 "src/contrib/CENTIPEDE_" version ".tar.gz"))
5630 (sha256
5631 (base32
5632 "1hsx6qgwr0i67fhy9257zj7s0ppncph2hjgbia5nn6nfmj0ax6l9"))))
5633 (build-system r-build-system)
5634 (home-page "http://centipede.uchicago.edu/")
5635 (synopsis "Predict transcription factor binding sites")
5636 (description
5637 "CENTIPEDE applies a hierarchical Bayesian mixture model to infer regions
5638 of the genome that are bound by particular transcription factors. It starts
5639 by identifying a set of candidate binding sites, and then aims to classify the
5640 sites according to whether each site is bound or not bound by a transcription
5641 factor. CENTIPEDE is an unsupervised learning algorithm that discriminates
5642 between two different types of motif instances using as much relevant
5643 information as possible.")
5644 (license (list license:gpl2+ license:gpl3+))))
5645
5646 (define-public r-vegan
5647 (package
5648 (name "r-vegan")
5649 (version "2.4-4")
5650 (source
5651 (origin
5652 (method url-fetch)
5653 (uri (cran-uri "vegan" version))
5654 (sha256
5655 (base32
5656 "1n57dzv2aid6iqd9fkqik401sidqanhzsawyak94qbiyh6dbd1x9"))))
5657 (build-system r-build-system)
5658 (native-inputs
5659 `(("gfortran" ,gfortran)))
5660 (propagated-inputs
5661 `(("r-cluster" ,r-cluster)
5662 ("r-lattice" ,r-lattice)
5663 ("r-mass" ,r-mass)
5664 ("r-mgcv" ,r-mgcv)
5665 ("r-permute" ,r-permute)))
5666 (home-page "https://cran.r-project.org/web/packages/vegan")
5667 (synopsis "Functions for community ecology")
5668 (description
5669 "The vegan package provides tools for descriptive community ecology. It
5670 has most basic functions of diversity analysis, community ordination and
5671 dissimilarity analysis. Most of its multivariate tools can be used for other
5672 data types as well.")
5673 (license license:gpl2+)))
5674
5675 (define-public r-annotate
5676 (package
5677 (name "r-annotate")
5678 (version "1.56.0")
5679 (source
5680 (origin
5681 (method url-fetch)
5682 (uri (bioconductor-uri "annotate" version))
5683 (sha256
5684 (base32
5685 "0wlrp3v2jxw9is98ap39dfi7z97kmw1wv1xi4h7yfh12zpj2r8l0"))))
5686 (build-system r-build-system)
5687 (propagated-inputs
5688 `(("r-annotationdbi" ,r-annotationdbi)
5689 ("r-biobase" ,r-biobase)
5690 ("r-biocgenerics" ,r-biocgenerics)
5691 ("r-dbi" ,r-dbi)
5692 ("r-rcurl" ,r-rcurl)
5693 ("r-xml" ,r-xml)
5694 ("r-xtable" ,r-xtable)))
5695 (home-page
5696 "https://bioconductor.org/packages/annotate")
5697 (synopsis "Annotation for microarrays")
5698 (description "This package provides R environments for the annotation of
5699 microarrays.")
5700 (license license:artistic2.0)))
5701
5702 (define-public r-geneplotter
5703 (package
5704 (name "r-geneplotter")
5705 (version "1.56.0")
5706 (source
5707 (origin
5708 (method url-fetch)
5709 (uri (bioconductor-uri "geneplotter" version))
5710 (sha256
5711 (base32
5712 "1z3g7frc1iviwrsv2dlm4nqvkc0685h4va0388yfxn102ln8wwma"))))
5713 (build-system r-build-system)
5714 (propagated-inputs
5715 `(("r-annotate" ,r-annotate)
5716 ("r-annotationdbi" ,r-annotationdbi)
5717 ("r-biobase" ,r-biobase)
5718 ("r-biocgenerics" ,r-biocgenerics)
5719 ("r-lattice" ,r-lattice)
5720 ("r-rcolorbrewer" ,r-rcolorbrewer)))
5721 (home-page "https://bioconductor.org/packages/geneplotter")
5722 (synopsis "Graphics functions for genomic data")
5723 (description
5724 "This package provides functions for plotting genomic data.")
5725 (license license:artistic2.0)))
5726
5727 (define-public r-genefilter
5728 (package
5729 (name "r-genefilter")
5730 (version "1.60.0")
5731 (source
5732 (origin
5733 (method url-fetch)
5734 (uri (bioconductor-uri "genefilter" version))
5735 (sha256
5736 (base32
5737 "173swlg6gj4kdllbqvyiw5dggbcxiwlwpqmllsv4dxzn7h25i3g7"))))
5738 (build-system r-build-system)
5739 (native-inputs
5740 `(("gfortran" ,gfortran)))
5741 (propagated-inputs
5742 `(("r-annotate" ,r-annotate)
5743 ("r-annotationdbi" ,r-annotationdbi)
5744 ("r-biobase" ,r-biobase)
5745 ("r-s4vectors" ,r-s4vectors)
5746 ("r-survival" ,r-survival)))
5747 (home-page "https://bioconductor.org/packages/genefilter")
5748 (synopsis "Filter genes from high-throughput experiments")
5749 (description
5750 "This package provides basic functions for filtering genes from
5751 high-throughput sequencing experiments.")
5752 (license license:artistic2.0)))
5753
5754 (define-public r-deseq2
5755 (package
5756 (name "r-deseq2")
5757 (version "1.18.0")
5758 (source
5759 (origin
5760 (method url-fetch)
5761 (uri (bioconductor-uri "DESeq2" version))
5762 (sha256
5763 (base32
5764 "1hcxnkkjfvz4hj8iqidshwsjq7jnl1z7wj63dvcwlx1zx5aichyh"))))
5765 (properties `((upstream-name . "DESeq2")))
5766 (build-system r-build-system)
5767 (propagated-inputs
5768 `(("r-biobase" ,r-biobase)
5769 ("r-biocgenerics" ,r-biocgenerics)
5770 ("r-biocparallel" ,r-biocparallel)
5771 ("r-genefilter" ,r-genefilter)
5772 ("r-geneplotter" ,r-geneplotter)
5773 ("r-genomicranges" ,r-genomicranges)
5774 ("r-ggplot2" ,r-ggplot2)
5775 ("r-hmisc" ,r-hmisc)
5776 ("r-iranges" ,r-iranges)
5777 ("r-locfit" ,r-locfit)
5778 ("r-rcpp" ,r-rcpp)
5779 ("r-rcpparmadillo" ,r-rcpparmadillo)
5780 ("r-s4vectors" ,r-s4vectors)
5781 ("r-summarizedexperiment" ,r-summarizedexperiment)))
5782 (home-page "https://bioconductor.org/packages/DESeq2")
5783 (synopsis "Differential gene expression analysis")
5784 (description
5785 "This package provides functions to estimate variance-mean dependence in
5786 count data from high-throughput nucleotide sequencing assays and test for
5787 differential expression based on a model using the negative binomial
5788 distribution.")
5789 (license license:lgpl3+)))
5790
5791 (define-public r-dexseq
5792 (package
5793 (name "r-dexseq")
5794 (version "1.24.0")
5795 (source
5796 (origin
5797 (method url-fetch)
5798 (uri (bioconductor-uri "DEXSeq" version))
5799 (sha256
5800 (base32
5801 "0qxwnz2ffhav9slcn095k206cfza9i3i5l7w1154plf08gpy1d1d"))))
5802 (properties `((upstream-name . "DEXSeq")))
5803 (build-system r-build-system)
5804 (propagated-inputs
5805 `(("r-annotationdbi" ,r-annotationdbi)
5806 ("r-biobase" ,r-biobase)
5807 ("r-biocgenerics" ,r-biocgenerics)
5808 ("r-biocparallel" ,r-biocparallel)
5809 ("r-biomart" ,r-biomart)
5810 ("r-deseq2" ,r-deseq2)
5811 ("r-genefilter" ,r-genefilter)
5812 ("r-geneplotter" ,r-geneplotter)
5813 ("r-genomicranges" ,r-genomicranges)
5814 ("r-hwriter" ,r-hwriter)
5815 ("r-iranges" ,r-iranges)
5816 ("r-rcolorbrewer" ,r-rcolorbrewer)
5817 ("r-rsamtools" ,r-rsamtools)
5818 ("r-s4vectors" ,r-s4vectors)
5819 ("r-statmod" ,r-statmod)
5820 ("r-stringr" ,r-stringr)
5821 ("r-summarizedexperiment" ,r-summarizedexperiment)))
5822 (home-page "https://bioconductor.org/packages/DEXSeq")
5823 (synopsis "Inference of differential exon usage in RNA-Seq")
5824 (description
5825 "This package is focused on finding differential exon usage using RNA-seq
5826 exon counts between samples with different experimental designs. It provides
5827 functions that allows the user to make the necessary statistical tests based
5828 on a model that uses the negative binomial distribution to estimate the
5829 variance between biological replicates and generalized linear models for
5830 testing. The package also provides functions for the visualization and
5831 exploration of the results.")
5832 (license license:gpl3+)))
5833
5834 (define-public r-annotationforge
5835 (package
5836 (name "r-annotationforge")
5837 (version "1.20.0")
5838 (source
5839 (origin
5840 (method url-fetch)
5841 (uri (bioconductor-uri "AnnotationForge" version))
5842 (sha256
5843 (base32
5844 "01vbrf76vqfvxh6vpfxkjwccxggnha3byqzj333glqz2b6kwx5q1"))))
5845 (properties
5846 `((upstream-name . "AnnotationForge")))
5847 (build-system r-build-system)
5848 (propagated-inputs
5849 `(("r-annotationdbi" ,r-annotationdbi)
5850 ("r-biobase" ,r-biobase)
5851 ("r-biocgenerics" ,r-biocgenerics)
5852 ("r-dbi" ,r-dbi)
5853 ("r-rcurl" ,r-rcurl)
5854 ("r-rsqlite" ,r-rsqlite)
5855 ("r-s4vectors" ,r-s4vectors)
5856 ("r-xml" ,r-xml)))
5857 (home-page "https://bioconductor.org/packages/AnnotationForge")
5858 (synopsis "Code for building annotation database packages")
5859 (description
5860 "This package provides code for generating Annotation packages and their
5861 databases. Packages produced are intended to be used with AnnotationDbi.")
5862 (license license:artistic2.0)))
5863
5864 (define-public r-rbgl
5865 (package
5866 (name "r-rbgl")
5867 (version "1.54.0")
5868 (source
5869 (origin
5870 (method url-fetch)
5871 (uri (bioconductor-uri "RBGL" version))
5872 (sha256
5873 (base32
5874 "18jad23i3899ypv4bg3l47cvvs3qnj1pqis2p9x0135yv5y6wnv7"))))
5875 (properties `((upstream-name . "RBGL")))
5876 (build-system r-build-system)
5877 (propagated-inputs `(("r-graph" ,r-graph)))
5878 (home-page "https://www.bioconductor.org/packages/RBGL")
5879 (synopsis "Interface to the Boost graph library")
5880 (description
5881 "This package provides a fairly extensive and comprehensive interface to
5882 the graph algorithms contained in the Boost library.")
5883 (license license:artistic2.0)))
5884
5885 (define-public r-gseabase
5886 (package
5887 (name "r-gseabase")
5888 (version "1.40.0")
5889 (source
5890 (origin
5891 (method url-fetch)
5892 (uri (bioconductor-uri "GSEABase" version))
5893 (sha256
5894 (base32
5895 "0kpkl6c5lrar6ip7wlhvd5axqlb9lb5l3lgbdb3dlih32c3nz0yq"))))
5896 (properties `((upstream-name . "GSEABase")))
5897 (build-system r-build-system)
5898 (propagated-inputs
5899 `(("r-annotate" ,r-annotate)
5900 ("r-annotationdbi" ,r-annotationdbi)
5901 ("r-biobase" ,r-biobase)
5902 ("r-biocgenerics" ,r-biocgenerics)
5903 ("r-graph" ,r-graph)
5904 ("r-xml" ,r-xml)))
5905 (home-page "https://bioconductor.org/packages/GSEABase")
5906 (synopsis "Gene set enrichment data structures and methods")
5907 (description
5908 "This package provides classes and methods to support @dfn{Gene Set
5909 Enrichment Analysis} (GSEA).")
5910 (license license:artistic2.0)))
5911
5912 (define-public r-category
5913 (package
5914 (name "r-category")
5915 (version "2.44.0")
5916 (source
5917 (origin
5918 (method url-fetch)
5919 (uri (bioconductor-uri "Category" version))
5920 (sha256
5921 (base32
5922 "0mkav04vbla0xfa0dssxdd0rjs589sxi83xklf5iq5hj3dm8y0i8"))))
5923 (properties `((upstream-name . "Category")))
5924 (build-system r-build-system)
5925 (propagated-inputs
5926 `(("r-annotate" ,r-annotate)
5927 ("r-annotationdbi" ,r-annotationdbi)
5928 ("r-biobase" ,r-biobase)
5929 ("r-biocgenerics" ,r-biocgenerics)
5930 ("r-genefilter" ,r-genefilter)
5931 ("r-graph" ,r-graph)
5932 ("r-gseabase" ,r-gseabase)
5933 ("r-matrix" ,r-matrix)
5934 ("r-rbgl" ,r-rbgl)
5935 ("r-dbi" ,r-dbi)))
5936 (home-page "https://bioconductor.org/packages/Category")
5937 (synopsis "Category analysis")
5938 (description
5939 "This package provides a collection of tools for performing category
5940 analysis.")
5941 (license license:artistic2.0)))
5942
5943 (define-public r-gostats
5944 (package
5945 (name "r-gostats")
5946 (version "2.44.0")
5947 (source
5948 (origin
5949 (method url-fetch)
5950 (uri (bioconductor-uri "GOstats" version))
5951 (sha256
5952 (base32
5953 "04gqfdlx9fxf97qf0l28x4aaqvl10n6v58qiz5fiaw05sbj1pf1i"))))
5954 (properties `((upstream-name . "GOstats")))
5955 (build-system r-build-system)
5956 (propagated-inputs
5957 `(("r-annotate" ,r-annotate)
5958 ("r-annotationdbi" ,r-annotationdbi)
5959 ("r-annotationforge" ,r-annotationforge)
5960 ("r-biobase" ,r-biobase)
5961 ("r-category" ,r-category)
5962 ("r-go-db" ,r-go-db)
5963 ("r-graph" ,r-graph)
5964 ("r-rgraphviz" ,r-rgraphviz)
5965 ("r-rbgl" ,r-rbgl)))
5966 (home-page "https://bioconductor.org/packages/GOstats")
5967 (synopsis "Tools for manipulating GO and microarrays")
5968 (description
5969 "This package provides a set of tools for interacting with GO and
5970 microarray data. A variety of basic manipulation tools for graphs, hypothesis
5971 testing and other simple calculations.")
5972 (license license:artistic2.0)))
5973
5974 (define-public r-shortread
5975 (package
5976 (name "r-shortread")
5977 (version "1.36.0")
5978 (source
5979 (origin
5980 (method url-fetch)
5981 (uri (bioconductor-uri "ShortRead" version))
5982 (sha256
5983 (base32
5984 "06mknlsmd4hnaxzdjapgvp2kgdnf9w103y500dsac5jgsz4vwzcz"))))
5985 (properties `((upstream-name . "ShortRead")))
5986 (build-system r-build-system)
5987 (inputs
5988 `(("zlib" ,zlib)))
5989 (propagated-inputs
5990 `(("r-biobase" ,r-biobase)
5991 ("r-biocgenerics" ,r-biocgenerics)
5992 ("r-biocparallel" ,r-biocparallel)
5993 ("r-biostrings" ,r-biostrings)
5994 ("r-genomeinfodb" ,r-genomeinfodb)
5995 ("r-genomicalignments" ,r-genomicalignments)
5996 ("r-genomicranges" ,r-genomicranges)
5997 ("r-hwriter" ,r-hwriter)
5998 ("r-iranges" ,r-iranges)
5999 ("r-lattice" ,r-lattice)
6000 ("r-latticeextra" ,r-latticeextra)
6001 ("r-rsamtools" ,r-rsamtools)
6002 ("r-s4vectors" ,r-s4vectors)
6003 ("r-xvector" ,r-xvector)
6004 ("r-zlibbioc" ,r-zlibbioc)))
6005 (home-page "https://bioconductor.org/packages/ShortRead")
6006 (synopsis "FASTQ input and manipulation tools")
6007 (description
6008 "This package implements sampling, iteration, and input of FASTQ files.
6009 It includes functions for filtering and trimming reads, and for generating a
6010 quality assessment report. Data are represented as
6011 @code{DNAStringSet}-derived objects, and easily manipulated for a diversity of
6012 purposes. The package also contains legacy support for early single-end,
6013 ungapped alignment formats.")
6014 (license license:artistic2.0)))
6015
6016 (define-public r-systempiper
6017 (package
6018 (name "r-systempiper")
6019 (version "1.12.0")
6020 (source
6021 (origin
6022 (method url-fetch)
6023 (uri (bioconductor-uri "systemPipeR" version))
6024 (sha256
6025 (base32
6026 "11mj8pjq5vj25768vmagpzv74fvi3p3kdk5zdlznqyiaggri04cv"))))
6027 (properties `((upstream-name . "systemPipeR")))
6028 (build-system r-build-system)
6029 (propagated-inputs
6030 `(("r-annotate" ,r-annotate)
6031 ("r-batchjobs" ,r-batchjobs)
6032 ("r-biocgenerics" ,r-biocgenerics)
6033 ("r-biostrings" ,r-biostrings)
6034 ("r-deseq2" ,r-deseq2)
6035 ("r-edger" ,r-edger)
6036 ("r-genomicfeatures" ,r-genomicfeatures)
6037 ("r-genomicranges" ,r-genomicranges)
6038 ("r-ggplot2" ,r-ggplot2)
6039 ("r-go-db" ,r-go-db)
6040 ("r-gostats" ,r-gostats)
6041 ("r-limma" ,r-limma)
6042 ("r-pheatmap" ,r-pheatmap)
6043 ("r-rjson" ,r-rjson)
6044 ("r-rsamtools" ,r-rsamtools)
6045 ("r-shortread" ,r-shortread)
6046 ("r-summarizedexperiment" ,r-summarizedexperiment)
6047 ("r-variantannotation" ,r-variantannotation)))
6048 (home-page "https://github.com/tgirke/systemPipeR")
6049 (synopsis "Next generation sequencing workflow and reporting environment")
6050 (description
6051 "This R package provides tools for building and running automated
6052 end-to-end analysis workflows for a wide range of @dfn{next generation
6053 sequence} (NGS) applications such as RNA-Seq, ChIP-Seq, VAR-Seq and Ribo-Seq.
6054 Important features include a uniform workflow interface across different NGS
6055 applications, automated report generation, and support for running both R and
6056 command-line software, such as NGS aligners or peak/variant callers, on local
6057 computers or compute clusters. Efficient handling of complex sample sets and
6058 experimental designs is facilitated by a consistently implemented sample
6059 annotation infrastructure.")
6060 (license license:artistic2.0)))
6061
6062 (define-public r-grohmm
6063 (package
6064 (name "r-grohmm")
6065 (version "1.12.0")
6066 (source
6067 (origin
6068 (method url-fetch)
6069 (uri (bioconductor-uri "groHMM" version))
6070 (sha256
6071 (base32
6072 "0cjkj0ypyc4dfi9s8dh88kh6q4xlpnc0wal7njg4b4gqj0l2hva7"))))
6073 (properties `((upstream-name . "groHMM")))
6074 (build-system r-build-system)
6075 (propagated-inputs
6076 `(("r-genomeinfodb" ,r-genomeinfodb)
6077 ("r-genomicalignments" ,r-genomicalignments)
6078 ("r-genomicranges" ,r-genomicranges)
6079 ("r-iranges" ,r-iranges)
6080 ("r-mass" ,r-mass)
6081 ("r-rtracklayer" ,r-rtracklayer)
6082 ("r-s4vectors" ,r-s4vectors)))
6083 (home-page "https://github.com/Kraus-Lab/groHMM")
6084 (synopsis "GRO-seq analysis pipeline")
6085 (description
6086 "This package provides a pipeline for the analysis of GRO-seq data.")
6087 (license license:gpl3+)))
6088
6089 (define-public r-txdb-hsapiens-ucsc-hg19-knowngene
6090 (package
6091 (name "r-txdb-hsapiens-ucsc-hg19-knowngene")
6092 (version "3.2.2")
6093 (source (origin
6094 (method url-fetch)
6095 ;; We cannot use bioconductor-uri here because this tarball is
6096 ;; located under "data/annotation/" instead of "bioc/".
6097 (uri (string-append "https://bioconductor.org/packages/"
6098 "release/data/annotation/src/contrib"
6099 "/TxDb.Hsapiens.UCSC.hg19.knownGene_"
6100 version ".tar.gz"))
6101 (sha256
6102 (base32
6103 "1sajhcqqwazgz2lqbik7rd935i7kpnh08zxbp2ra10j72yqy4g86"))))
6104 (properties
6105 `((upstream-name . "TxDb.Hsapiens.UCSC.hg19.knownGene")))
6106 (build-system r-build-system)
6107 ;; As this package provides little more than a very large data file it
6108 ;; doesn't make sense to build substitutes.
6109 (arguments `(#:substitutable? #f))
6110 (propagated-inputs
6111 `(("r-genomicfeatures" ,r-genomicfeatures)))
6112 (home-page
6113 "https://bioconductor.org/packages/TxDb.Hsapiens.UCSC.hg19.knownGene/")
6114 (synopsis "Annotation package for human genome in TxDb format")
6115 (description
6116 "This package provides an annotation database of Homo sapiens genome
6117 data. It is derived from the UCSC hg19 genome and based on the \"knownGene\"
6118 track. The database is exposed as a @code{TxDb} object.")
6119 (license license:artistic2.0)))
6120
6121 (define-public r-sparql
6122 (package
6123 (name "r-sparql")
6124 (version "1.16")
6125 (source (origin
6126 (method url-fetch)
6127 (uri (cran-uri "SPARQL" version))
6128 (sha256
6129 (base32
6130 "0gak1q06yyhdmcxb2n3v0h9gr1vqd0viqji52wpw211qp6r6dcrc"))))
6131 (properties `((upstream-name . "SPARQL")))
6132 (build-system r-build-system)
6133 (propagated-inputs
6134 `(("r-rcurl" ,r-rcurl)
6135 ("r-xml" ,r-xml)))
6136 (home-page "http://cran.r-project.org/web/packages/SPARQL")
6137 (synopsis "SPARQL client for R")
6138 (description "This package provides an interface to use SPARQL to pose
6139 SELECT or UPDATE queries to an end-point.")
6140 ;; The only license indication is found in the DESCRIPTION file,
6141 ;; which states GPL-3. So we cannot assume GPLv3+.
6142 (license license:gpl3)))
6143
6144 (define-public vsearch
6145 (package
6146 (name "vsearch")
6147 (version "2.6.0")
6148 (source
6149 (origin
6150 (method url-fetch)
6151 (uri (string-append
6152 "https://github.com/torognes/vsearch/archive/v"
6153 version ".tar.gz"))
6154 (file-name (string-append name "-" version ".tar.gz"))
6155 (sha256
6156 (base32
6157 "0rplgpvsdkxw0k371ckxrp6i77jn93ckhslaazwbyd85m83nkynr"))
6158 (patches (search-patches "vsearch-unbundle-cityhash.patch"))
6159 (snippet
6160 '(begin
6161 ;; Remove bundled cityhash sources. The vsearch source is adjusted
6162 ;; for this in the patch.
6163 (delete-file "src/city.h")
6164 (delete-file "src/citycrc.h")
6165 (delete-file "src/city.cc")
6166 #t))))
6167 (build-system gnu-build-system)
6168 (arguments
6169 `(#:phases
6170 (modify-phases %standard-phases
6171 (add-after 'unpack 'autogen
6172 (lambda _ (zero? (system* "autoreconf" "-vif")))))))
6173 (inputs
6174 `(("zlib" ,zlib)
6175 ("bzip2" ,bzip2)
6176 ("cityhash" ,cityhash)))
6177 (native-inputs
6178 `(("autoconf" ,autoconf)
6179 ("automake" ,automake)))
6180 (synopsis "Sequence search tools for metagenomics")
6181 (description
6182 "VSEARCH supports DNA sequence searching, clustering, chimera detection,
6183 dereplication, pairwise alignment, shuffling, subsampling, sorting and
6184 masking. The tool takes advantage of parallelism in the form of SIMD
6185 vectorization as well as multiple threads to perform accurate alignments at
6186 high speed. VSEARCH uses an optimal global aligner (full dynamic programming
6187 Needleman-Wunsch).")
6188 (home-page "https://github.com/torognes/vsearch")
6189 ;; vsearch uses non-portable SSE intrinsics so building fails on other
6190 ;; platforms.
6191 (supported-systems '("x86_64-linux"))
6192 ;; Dual licensed; also includes public domain source.
6193 (license (list license:gpl3 license:bsd-2))))
6194
6195 (define-public pardre
6196 (package
6197 (name "pardre")
6198 ;; The source of 1.1.5 changed in place, so we append "-1" to the version.
6199 (version "1.1.5-1")
6200 (source
6201 (origin
6202 (method url-fetch)
6203 (uri (string-append "mirror://sourceforge/pardre/ParDRe-rel"
6204 "1.1.5" ".tar.gz"))
6205 (sha256
6206 (base32
6207 "17j73nc0viq4f6qj50nrndsrif5d6b71q8fl87m54psiv0ilns2b"))))
6208 (build-system gnu-build-system)
6209 (arguments
6210 `(#:tests? #f ; no tests included
6211 #:phases
6212 (modify-phases %standard-phases
6213 (delete 'configure)
6214 (replace 'install
6215 (lambda* (#:key outputs #:allow-other-keys)
6216 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
6217 (install-file "ParDRe" bin)
6218 #t))))))
6219 (inputs
6220 `(("openmpi" ,openmpi)
6221 ("zlib" ,zlib)))
6222 (synopsis "Parallel tool to remove duplicate DNA reads")
6223 (description
6224 "ParDRe is a parallel tool to remove duplicate genetic sequence reads.
6225 Duplicate reads can be seen as identical or nearly identical sequences with
6226 some mismatches. This tool lets users avoid the analysis of unnecessary
6227 reads, reducing the time of subsequent procedures with the
6228 dataset (e.g. assemblies, mappings, etc.). The tool is implemented with MPI
6229 in order to exploit the parallel capabilities of multicore clusters. It is
6230 faster than multithreaded counterparts (end of 2015) for the same number of
6231 cores and, thanks to the message-passing technology, it can be executed on
6232 clusters.")
6233 (home-page "https://sourceforge.net/projects/pardre/")
6234 (license license:gpl3+)))
6235
6236 (define-public ruby-bio-kseq
6237 (package
6238 (name "ruby-bio-kseq")
6239 (version "0.0.2")
6240 (source
6241 (origin
6242 (method url-fetch)
6243 (uri (rubygems-uri "bio-kseq" version))
6244 (sha256
6245 (base32
6246 "1xyaha46khb5jc6wzkbf7040jagac49jbimn0vcrzid0j8jdikrz"))))
6247 (build-system ruby-build-system)
6248 (arguments
6249 `(#:test-target "spec"))
6250 (native-inputs
6251 `(("bundler" ,bundler)
6252 ("ruby-rspec" ,ruby-rspec)
6253 ("ruby-rake-compiler" ,ruby-rake-compiler)))
6254 (inputs
6255 `(("zlib" ,zlib)))
6256 (synopsis "Ruby bindings for the kseq.h FASTA/Q parser")
6257 (description
6258 "@code{Bio::Kseq} provides ruby bindings to the @code{kseq.h} FASTA and
6259 FASTQ parsing code. It provides a fast iterator over sequences and their
6260 quality scores.")
6261 (home-page "https://github.com/gusevfe/bio-kseq")
6262 (license license:expat)))
6263
6264 (define-public bio-locus
6265 (package
6266 (name "bio-locus")
6267 (version "0.0.7")
6268 (source
6269 (origin
6270 (method url-fetch)
6271 (uri (rubygems-uri "bio-locus" version))
6272 (sha256
6273 (base32
6274 "02vmrxyimkj9sahsp4zhfhnmbvz6dbbqz1y01vglf8cbwvkajfl0"))))
6275 (build-system ruby-build-system)
6276 (native-inputs
6277 `(("ruby-rspec" ,ruby-rspec)))
6278 (synopsis "Tool for fast querying of genome locations")
6279 (description
6280 "Bio-locus is a tabix-like tool for fast querying of genome
6281 locations. Many file formats in bioinformatics contain records that
6282 start with a chromosome name and a position for a SNP, or a start-end
6283 position for indels. Bio-locus allows users to store this chr+pos or
6284 chr+pos+alt information in a database.")
6285 (home-page "https://github.com/pjotrp/bio-locus")
6286 (license license:expat)))
6287
6288 (define-public bio-blastxmlparser
6289 (package
6290 (name "bio-blastxmlparser")
6291 (version "2.0.4")
6292 (source (origin
6293 (method url-fetch)
6294 (uri (rubygems-uri "bio-blastxmlparser" version))
6295 (sha256
6296 (base32
6297 "1wf4qygcmdjgcqm6flmvsagfr1gs9lf63mj32qv3z1f481zc5692"))))
6298 (build-system ruby-build-system)
6299 (propagated-inputs
6300 `(("ruby-bio-logger" ,ruby-bio-logger)
6301 ("ruby-nokogiri" ,ruby-nokogiri)))
6302 (inputs
6303 `(("ruby-rspec" ,ruby-rspec)))
6304 (synopsis "Fast big data BLAST XML parser and library")
6305 (description
6306 "Very fast parallel big-data BLAST XML file parser which can be used as
6307 command line utility. Use blastxmlparser to: Parse BLAST XML; filter output;
6308 generate FASTA, JSON, YAML, RDF, JSON-LD, HTML, CSV, tabular output etc.")
6309 (home-page "https://github.com/pjotrp/blastxmlparser")
6310 (license license:expat)))
6311
6312 (define-public bioruby
6313 (package
6314 (name "bioruby")
6315 (version "1.5.1")
6316 (source
6317 (origin
6318 (method url-fetch)
6319 (uri (rubygems-uri "bio" version))
6320 (sha256
6321 (base32
6322 "0hdl0789c9n4mprnx5pgd46bfwl8d000rqpamj5h6kkjgspijv49"))))
6323 (build-system ruby-build-system)
6324 (propagated-inputs
6325 `(("ruby-libxml" ,ruby-libxml)))
6326 (native-inputs
6327 `(("which" ,which))) ; required for test phase
6328 (arguments
6329 `(#:phases
6330 (modify-phases %standard-phases
6331 (add-before 'build 'patch-test-command
6332 (lambda _
6333 (substitute* '("test/functional/bio/test_command.rb")
6334 (("/bin/sh") (which "sh")))
6335 (substitute* '("test/functional/bio/test_command.rb")
6336 (("/bin/ls") (which "ls")))
6337 (substitute* '("test/functional/bio/test_command.rb")
6338 (("which") (which "which")))
6339 (substitute* '("test/functional/bio/test_command.rb",
6340 "test/data/command/echoarg2.sh")
6341 (("/bin/echo") (which "echo")))
6342 #t)))))
6343 (synopsis "Ruby library, shell and utilities for bioinformatics")
6344 (description "BioRuby comes with a comprehensive set of Ruby development
6345 tools and libraries for bioinformatics and molecular biology. BioRuby has
6346 components for sequence analysis, pathway analysis, protein modelling and
6347 phylogenetic analysis; it supports many widely used data formats and provides
6348 easy access to databases, external programs and public web services, including
6349 BLAST, KEGG, GenBank, MEDLINE and GO.")
6350 (home-page "http://bioruby.org/")
6351 ;; Code is released under Ruby license, except for setup
6352 ;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+)
6353 (license (list license:ruby license:lgpl2.1+ license:gpl2+ ))))
6354
6355 (define-public r-acsnminer
6356 (package
6357 (name "r-acsnminer")
6358 (version "0.16.8.25")
6359 (source (origin
6360 (method url-fetch)
6361 (uri (cran-uri "ACSNMineR" version))
6362 (sha256
6363 (base32
6364 "0gh604s8qall6zfjlwcg2ilxjvz08dplf9k5g47idhv43scm748l"))))
6365 (properties `((upstream-name . "ACSNMineR")))
6366 (build-system r-build-system)
6367 (propagated-inputs
6368 `(("r-ggplot2" ,r-ggplot2)
6369 ("r-gridextra" ,r-gridextra)))
6370 (home-page "http://cran.r-project.org/web/packages/ACSNMineR")
6371 (synopsis "Gene enrichment analysis")
6372 (description
6373 "This package provides tools to compute and represent gene set enrichment
6374 or depletion from your data based on pre-saved maps from the @dfn{Atlas of
6375 Cancer Signalling Networks} (ACSN) or user imported maps. The gene set
6376 enrichment can be run with hypergeometric test or Fisher exact test, and can
6377 use multiple corrections. Visualization of data can be done either by
6378 barplots or heatmaps.")
6379 (license license:gpl2+)))
6380
6381 (define-public r-biocgenerics
6382 (package
6383 (name "r-biocgenerics")
6384 (version "0.24.0")
6385 (source (origin
6386 (method url-fetch)
6387 (uri (bioconductor-uri "BiocGenerics" version))
6388 (sha256
6389 (base32
6390 "03wxvhxyrhipbgcg83lqlfn7p9gbzzrnl48y0dq7303xgp232zai"))))
6391 (properties
6392 `((upstream-name . "BiocGenerics")))
6393 (build-system r-build-system)
6394 (home-page "https://bioconductor.org/packages/BiocGenerics")
6395 (synopsis "S4 generic functions for Bioconductor")
6396 (description
6397 "This package provides S4 generic functions needed by many Bioconductor
6398 packages.")
6399 (license license:artistic2.0)))
6400
6401 (define-public r-biocinstaller
6402 (package
6403 (name "r-biocinstaller")
6404 (version "1.28.0")
6405 (source (origin
6406 (method url-fetch)
6407 (uri (bioconductor-uri "BiocInstaller" version))
6408 (sha256
6409 (base32
6410 "19fga27bv6q9v5mpil74y76lahmnwvpg2h33rdx1r79nvljkd19d"))))
6411 (properties
6412 `((upstream-name . "BiocInstaller")))
6413 (build-system r-build-system)
6414 (home-page "https://bioconductor.org/packages/BiocInstaller")
6415 (synopsis "Install Bioconductor packages")
6416 (description "This package is used to install and update R packages from
6417 Bioconductor, CRAN, and Github.")
6418 (license license:artistic2.0)))
6419
6420 (define-public r-biocviews
6421 (package
6422 (name "r-biocviews")
6423 (version "1.46.0")
6424 (source (origin
6425 (method url-fetch)
6426 (uri (bioconductor-uri "biocViews" version))
6427 (sha256
6428 (base32
6429 "09zyqj1kqc089lmh9sliy0acanx9zimcasvp71dsrg2bqm08r1md"))))
6430 (properties
6431 `((upstream-name . "biocViews")))
6432 (build-system r-build-system)
6433 (propagated-inputs
6434 `(("r-biobase" ,r-biobase)
6435 ("r-graph" ,r-graph)
6436 ("r-rbgl" ,r-rbgl)
6437 ("r-rcurl" ,r-rcurl)
6438 ("r-xml" ,r-xml)
6439 ("r-runit" ,r-runit)))
6440 (home-page "https://bioconductor.org/packages/biocViews")
6441 (synopsis "Bioconductor package categorization helper")
6442 (description "The purpose of biocViews is to create HTML pages that
6443 categorize packages in a Bioconductor package repository according to keywords,
6444 also known as views, in a controlled vocabulary.")
6445 (license license:artistic2.0)))
6446
6447 (define-public r-bookdown
6448 (package
6449 (name "r-bookdown")
6450 (version "0.5")
6451 (source (origin
6452 (method url-fetch)
6453 (uri (cran-uri "bookdown" version))
6454 (sha256
6455 (base32
6456 "0zm63kr4f4kja4qpwkzl119zzyciqj7ihajfqgfjpgb4dzaiycxp"))))
6457 (build-system r-build-system)
6458 (propagated-inputs
6459 `(("r-htmltools" ,r-htmltools)
6460 ("r-knitr" ,r-knitr)
6461 ("r-rmarkdown" ,r-rmarkdown)
6462 ("r-yaml" ,r-yaml)))
6463 (home-page "https://github.com/rstudio/bookdown")
6464 (synopsis "Authoring books and technical documents with R markdown")
6465 (description "This package provides output formats and utilities for
6466 authoring books and technical documents with R Markdown.")
6467 (license license:gpl3)))
6468
6469 (define-public r-biocstyle
6470 (package
6471 (name "r-biocstyle")
6472 (version "2.6.0")
6473 (source (origin
6474 (method url-fetch)
6475 (uri (bioconductor-uri "BiocStyle" version))
6476 (sha256
6477 (base32
6478 "05f2j9fx8s5gh4f8qkl6wcz32ghz04wxhqb3xxcn1bj24qd7x1x8"))))
6479 (properties
6480 `((upstream-name . "BiocStyle")))
6481 (build-system r-build-system)
6482 (propagated-inputs
6483 `(("r-bookdown" ,r-bookdown)
6484 ("r-knitr" ,r-knitr)
6485 ("r-rmarkdown" ,r-rmarkdown)
6486 ("r-yaml" ,r-yaml)))
6487 (home-page "https://bioconductor.org/packages/BiocStyle")
6488 (synopsis "Bioconductor formatting styles")
6489 (description "This package provides standard formatting styles for
6490 Bioconductor PDF and HTML documents. Package vignettes illustrate use and
6491 functionality.")
6492 (license license:artistic2.0)))
6493
6494 (define-public r-bioccheck
6495 (package
6496 (name "r-bioccheck")
6497 (version "1.14.0")
6498 (source (origin
6499 (method url-fetch)
6500 (uri (bioconductor-uri "BiocCheck" version))
6501 (sha256
6502 (base32
6503 "1nzp8kgw13z9pgf885rplj6k37jcldfhbz0adqclxr2gq0yalmyx"))))
6504 (properties
6505 `((upstream-name . "BiocCheck")))
6506 (build-system r-build-system)
6507 (arguments
6508 '(#:phases
6509 (modify-phases %standard-phases
6510 ;; This package can be used by calling BiocCheck(<package>) from
6511 ;; within R, or by running R CMD BiocCheck <package>. This phase
6512 ;; makes sure the latter works. For this to work, the BiocCheck
6513 ;; script must be somewhere on the PATH (not the R bin directory).
6514 (add-after 'install 'install-bioccheck-subcommand
6515 (lambda* (#:key outputs #:allow-other-keys)
6516 (let* ((out (assoc-ref outputs "out"))
6517 (dest-dir (string-append out "/bin"))
6518 (script-dir
6519 (string-append out "/site-library/BiocCheck/script/")))
6520 (mkdir-p dest-dir)
6521 (symlink (string-append script-dir "/checkBadDeps.R")
6522 (string-append dest-dir "/checkBadDeps.R"))
6523 (symlink (string-append script-dir "/BiocCheck")
6524 (string-append dest-dir "/BiocCheck")))
6525 #t)))))
6526 (propagated-inputs
6527 `(("r-codetools" ,r-codetools)
6528 ("r-graph" ,r-graph)
6529 ("r-httr" ,r-httr)
6530 ("r-optparse" ,r-optparse)
6531 ("r-biocinstaller" ,r-biocinstaller)
6532 ("r-biocviews" ,r-biocviews)
6533 ("r-stringdist" ,r-stringdist)))
6534 (home-page "https://bioconductor.org/packages/BiocCheck")
6535 (synopsis "Executes Bioconductor-specific package checks")
6536 (description "This package contains tools to perform additional quality
6537 checks on R packages that are to be submitted to the Bioconductor repository.")
6538 (license license:artistic2.0)))
6539
6540 (define-public r-getopt
6541 (package
6542 (name "r-getopt")
6543 (version "1.20.0")
6544 (source
6545 (origin
6546 (method url-fetch)
6547 (uri (cran-uri "getopt" version))
6548 (sha256
6549 (base32
6550 "00f57vgnzmg7cz80rjmjz1556xqcmx8nhrlbbhaq4w7gl2ibl87r"))))
6551 (build-system r-build-system)
6552 (home-page "https://github.com/trevorld/getopt")
6553 (synopsis "Command-line option processor for R")
6554 (description
6555 "This package is designed to be used with Rscript to write shebang
6556 scripts that accept short and long options. Many users will prefer to
6557 use the packages @code{optparse} or @code{argparse} which add extra
6558 features like automatically generated help options and usage texts,
6559 support for default values, positional argument support, etc.")
6560 (license license:gpl2+)))
6561
6562 (define-public r-optparse
6563 (package
6564 (name "r-optparse")
6565 (version "1.4.4")
6566 (source
6567 (origin
6568 (method url-fetch)
6569 (uri (cran-uri "optparse" version))
6570 (sha256
6571 (base32
6572 "1ff4wmsszrb3spwfp7ynfs8w11qpy1sdzfxm1wk8dqqvdwris7qb"))))
6573 (build-system r-build-system)
6574 (propagated-inputs
6575 `(("r-getopt" ,r-getopt)))
6576 (home-page
6577 "https://github.com/trevorld/optparse")
6578 (synopsis "Command line option parser")
6579 (description
6580 "This package provides a command line parser inspired by Python's
6581 @code{optparse} library to be used with Rscript to write shebang scripts
6582 that accept short and long options.")
6583 (license license:gpl2+)))
6584
6585 (define-public r-dnacopy
6586 (package
6587 (name "r-dnacopy")
6588 (version "1.52.0")
6589 (source (origin
6590 (method url-fetch)
6591 (uri (bioconductor-uri "DNAcopy" version))
6592 (sha256
6593 (base32
6594 "127il5rlg1hzjlhwhs64x3nm18p00q1pd9ckb2b9ifl0rax95wai"))))
6595 (properties
6596 `((upstream-name . "DNAcopy")))
6597 (build-system r-build-system)
6598 (inputs
6599 `(("gfortran" ,gfortran)))
6600 (home-page "https://bioconductor.org/packages/DNAcopy")
6601 (synopsis "Implementation of a circular binary segmentation algorithm")
6602 (description "This package implements the circular binary segmentation (CBS)
6603 algorithm to segment DNA copy number data and identify genomic regions with
6604 abnormal copy number.")
6605 (license license:gpl2+)))
6606
6607 (define-public r-s4vectors
6608 (package
6609 (name "r-s4vectors")
6610 (version "0.16.0")
6611 (source (origin
6612 (method url-fetch)
6613 (uri (bioconductor-uri "S4Vectors" version))
6614 (sha256
6615 (base32
6616 "03s8vz33nl6mivjb7dbvj702dkypi340lji1sjban03fyyls0hw0"))))
6617 (properties
6618 `((upstream-name . "S4Vectors")))
6619 (build-system r-build-system)
6620 (propagated-inputs
6621 `(("r-biocgenerics" ,r-biocgenerics)))
6622 (home-page "https://bioconductor.org/packages/S4Vectors")
6623 (synopsis "S4 implementation of vectors and lists")
6624 (description
6625 "The S4Vectors package defines the @code{Vector} and @code{List} virtual
6626 classes and a set of generic functions that extend the semantic of ordinary
6627 vectors and lists in R. Package developers can easily implement vector-like
6628 or list-like objects as concrete subclasses of @code{Vector} or @code{List}.
6629 In addition, a few low-level concrete subclasses of general interest (e.g.
6630 @code{DataFrame}, @code{Rle}, and @code{Hits}) are implemented in the
6631 S4Vectors package itself.")
6632 (license license:artistic2.0)))
6633
6634 (define-public r-seqinr
6635 (package
6636 (name "r-seqinr")
6637 (version "3.4-5")
6638 (source
6639 (origin
6640 (method url-fetch)
6641 (uri (cran-uri "seqinr" version))
6642 (sha256
6643 (base32
6644 "17zv0n5cji17izwmwg0jcbxbjl3w5rls91w15svcnlpxjms38ahn"))))
6645 (build-system r-build-system)
6646 (propagated-inputs
6647 `(("r-ade4" ,r-ade4)
6648 ("r-segmented" ,r-segmented)))
6649 (inputs
6650 `(("zlib" ,zlib)))
6651 (home-page "http://seqinr.r-forge.r-project.org/")
6652 (synopsis "Biological sequences retrieval and analysis")
6653 (description
6654 "This package provides tools for exploratory data analysis and data
6655 visualization of biological sequence (DNA and protein) data. It also includes
6656 utilities for sequence data management under the ACNUC system.")
6657 (license license:gpl2+)))
6658
6659 (define-public r-iranges
6660 (package
6661 (name "r-iranges")
6662 (version "2.12.0")
6663 (source (origin
6664 (method url-fetch)
6665 (uri (bioconductor-uri "IRanges" version))
6666 (sha256
6667 (base32
6668 "1vqczb9wlxsmpwpqig6j1dmiblcfpq6mgnq8qwzcrvddm4cp47m5"))))
6669 (properties
6670 `((upstream-name . "IRanges")))
6671 (build-system r-build-system)
6672 (propagated-inputs
6673 `(("r-biocgenerics" ,r-biocgenerics)
6674 ("r-s4vectors" ,r-s4vectors)))
6675 (home-page "https://bioconductor.org/packages/IRanges")
6676 (synopsis "Infrastructure for manipulating intervals on sequences")
6677 (description
6678 "This package provides efficient low-level and highly reusable S4 classes
6679 for storing ranges of integers, RLE vectors (Run-Length Encoding), and, more
6680 generally, data that can be organized sequentially (formally defined as
6681 @code{Vector} objects), as well as views on these @code{Vector} objects.
6682 Efficient list-like classes are also provided for storing big collections of
6683 instances of the basic classes. All classes in the package use consistent
6684 naming and share the same rich and consistent \"Vector API\" as much as
6685 possible.")
6686 (license license:artistic2.0)))
6687
6688 (define-public r-genomeinfodbdata
6689 (package
6690 (name "r-genomeinfodbdata")
6691 (version "0.99.0")
6692 (source (origin
6693 (method url-fetch)
6694 ;; We cannot use bioconductor-uri here because this tarball is
6695 ;; located under "data/annotation/" instead of "bioc/".
6696 (uri (string-append "https://bioconductor.org/packages/release/"
6697 "data/annotation/src/contrib/GenomeInfoDbData_"
6698 version ".tar.gz"))
6699 (sha256
6700 (base32
6701 "120qvhb0pvkzd65lsgja62vyrgc37si6fh68q4cg4w5x9f04jw25"))))
6702 (properties
6703 `((upstream-name . "GenomeInfoDbData")))
6704 (build-system r-build-system)
6705 (home-page "https://bioconductor.org/packages/GenomeInfoDbData")
6706 (synopsis "Species and taxonomy ID look up tables for GenomeInfoDb")
6707 (description "This package contains data for mapping between NCBI taxonomy
6708 ID and species. It is used by functions in the GenomeInfoDb package.")
6709 (license license:artistic2.0)))
6710
6711 (define-public r-genomeinfodb
6712 (package
6713 (name "r-genomeinfodb")
6714 (version "1.14.0")
6715 (source (origin
6716 (method url-fetch)
6717 (uri (bioconductor-uri "GenomeInfoDb" version))
6718 (sha256
6719 (base32
6720 "1jhm0imkac4gvchbjxj408aakk39xdv2fyh818d3lk295bz6bnyp"))))
6721 (properties
6722 `((upstream-name . "GenomeInfoDb")))
6723 (build-system r-build-system)
6724 (propagated-inputs
6725 `(("r-biocgenerics" ,r-biocgenerics)
6726 ("r-genomeinfodbdata" ,r-genomeinfodbdata)
6727 ("r-iranges" ,r-iranges)
6728 ("r-rcurl" ,r-rcurl)
6729 ("r-s4vectors" ,r-s4vectors)))
6730 (home-page "https://bioconductor.org/packages/GenomeInfoDb")
6731 (synopsis "Utilities for manipulating chromosome identifiers")
6732 (description
6733 "This package contains data and functions that define and allow
6734 translation between different chromosome sequence naming conventions (e.g.,
6735 \"chr1\" versus \"1\"), including a function that attempts to place sequence
6736 names in their natural, rather than lexicographic, order.")
6737 (license license:artistic2.0)))
6738
6739 (define-public r-edger
6740 (package
6741 (name "r-edger")
6742 (version "3.20.1")
6743 (source (origin
6744 (method url-fetch)
6745 (uri (bioconductor-uri "edgeR" version))
6746 (sha256
6747 (base32
6748 "01qnxwr9rmz8r5ga3hvjk632365ga2aygx71mxkk7jiad2pjznsp"))))
6749 (properties `((upstream-name . "edgeR")))
6750 (build-system r-build-system)
6751 (propagated-inputs
6752 `(("r-limma" ,r-limma)
6753 ("r-locfit" ,r-locfit)
6754 ("r-rcpp" ,r-rcpp)
6755 ("r-statmod" ,r-statmod))) ;for estimateDisp
6756 (home-page "http://bioinf.wehi.edu.au/edgeR")
6757 (synopsis "EdgeR does empirical analysis of digital gene expression data")
6758 (description "This package can do differential expression analysis of
6759 RNA-seq expression profiles with biological replication. It implements a range
6760 of statistical methodology based on the negative binomial distributions,
6761 including empirical Bayes estimation, exact tests, generalized linear models
6762 and quasi-likelihood tests. It be applied to differential signal analysis of
6763 other types of genomic data that produce counts, including ChIP-seq, SAGE and
6764 CAGE.")
6765 (license license:gpl2+)))
6766
6767 (define-public r-variantannotation
6768 (package
6769 (name "r-variantannotation")
6770 (version "1.24.0")
6771 (source (origin
6772 (method url-fetch)
6773 (uri (bioconductor-uri "VariantAnnotation" version))
6774 (sha256
6775 (base32
6776 "1lllp2vgyfbrar1yg28ji7am470hfzrzxm1bgdk68xpnrwcgcl25"))))
6777 (properties
6778 `((upstream-name . "VariantAnnotation")))
6779 (inputs
6780 `(("zlib" ,zlib)))
6781 (propagated-inputs
6782 `(("r-annotationdbi" ,r-annotationdbi)
6783 ("r-biobase" ,r-biobase)
6784 ("r-biocgenerics" ,r-biocgenerics)
6785 ("r-biostrings" ,r-biostrings)
6786 ("r-bsgenome" ,r-bsgenome)
6787 ("r-dbi" ,r-dbi)
6788 ("r-genomeinfodb" ,r-genomeinfodb)
6789 ("r-genomicfeatures" ,r-genomicfeatures)
6790 ("r-genomicranges" ,r-genomicranges)
6791 ("r-iranges" ,r-iranges)
6792 ("r-summarizedexperiment" ,r-summarizedexperiment)
6793 ("r-rsamtools" ,r-rsamtools)
6794 ("r-rtracklayer" ,r-rtracklayer)
6795 ("r-s4vectors" ,r-s4vectors)
6796 ("r-xvector" ,r-xvector)
6797 ("r-zlibbioc" ,r-zlibbioc)))
6798 (build-system r-build-system)
6799 (home-page "https://bioconductor.org/packages/VariantAnnotation")
6800 (synopsis "Package for annotation of genetic variants")
6801 (description "This R package can annotate variants, compute amino acid
6802 coding changes and predict coding outcomes.")
6803 (license license:artistic2.0)))
6804
6805 (define-public r-limma
6806 (package
6807 (name "r-limma")
6808 (version "3.34.0")
6809 (source (origin
6810 (method url-fetch)
6811 (uri (bioconductor-uri "limma" version))
6812 (sha256
6813 (base32
6814 "0a15gsaky0hfrkx8wrrmp0labzxpq6m2hrd33zl206wyas8bqzcs"))))
6815 (build-system r-build-system)
6816 (home-page "http://bioinf.wehi.edu.au/limma")
6817 (synopsis "Package for linear models for microarray and RNA-seq data")
6818 (description "This package can be used for the analysis of gene expression
6819 studies, especially the use of linear models for analysing designed experiments
6820 and the assessment of differential expression. The analysis methods apply to
6821 different technologies, including microarrays, RNA-seq, and quantitative PCR.")
6822 (license license:gpl2+)))
6823
6824 (define-public r-xvector
6825 (package
6826 (name "r-xvector")
6827 (version "0.18.0")
6828 (source (origin
6829 (method url-fetch)
6830 (uri (bioconductor-uri "XVector" version))
6831 (sha256
6832 (base32
6833 "1i4i3kdxr78lr1kcxq657p11ybi7kq10c8kyaqyh6gfc8i9rhvmk"))))
6834 (properties
6835 `((upstream-name . "XVector")))
6836 (build-system r-build-system)
6837 (arguments
6838 `(#:phases
6839 (modify-phases %standard-phases
6840 (add-after 'unpack 'use-system-zlib
6841 (lambda _
6842 (substitute* "DESCRIPTION"
6843 (("zlibbioc, ") ""))
6844 (substitute* "NAMESPACE"
6845 (("import\\(zlibbioc\\)") ""))
6846 #t)))))
6847 (inputs
6848 `(("zlib" ,zlib)))
6849 (propagated-inputs
6850 `(("r-biocgenerics" ,r-biocgenerics)
6851 ("r-iranges" ,r-iranges)
6852 ("r-s4vectors" ,r-s4vectors)))
6853 (home-page "https://bioconductor.org/packages/XVector")
6854 (synopsis "Representation and manpulation of external sequences")
6855 (description
6856 "This package provides memory efficient S4 classes for storing sequences
6857 \"externally\" (behind an R external pointer, or on disk).")
6858 (license license:artistic2.0)))
6859
6860 (define-public r-genomicranges
6861 (package
6862 (name "r-genomicranges")
6863 (version "1.30.0")
6864 (source (origin
6865 (method url-fetch)
6866 (uri (bioconductor-uri "GenomicRanges" version))
6867 (sha256
6868 (base32
6869 "10ra2sjn17h6gilm9iz0cygp9ijpgbirljlc4drwrnivnw9cmi2a"))))
6870 (properties
6871 `((upstream-name . "GenomicRanges")))
6872 (build-system r-build-system)
6873 (propagated-inputs
6874 `(("r-biocgenerics" ,r-biocgenerics)
6875 ("r-genomeinfodb" ,r-genomeinfodb)
6876 ("r-iranges" ,r-iranges)
6877 ("r-s4vectors" ,r-s4vectors)
6878 ("r-xvector" ,r-xvector)))
6879 (home-page "https://bioconductor.org/packages/GenomicRanges")
6880 (synopsis "Representation and manipulation of genomic intervals")
6881 (description
6882 "This package provides tools to efficiently represent and manipulate
6883 genomic annotations and alignments is playing a central role when it comes to
6884 analyzing high-throughput sequencing data (a.k.a. NGS data). The
6885 GenomicRanges package defines general purpose containers for storing and
6886 manipulating genomic intervals and variables defined along a genome.")
6887 (license license:artistic2.0)))
6888
6889 (define-public r-biobase
6890 (package
6891 (name "r-biobase")
6892 (version "2.38.0")
6893 (source (origin
6894 (method url-fetch)
6895 (uri (bioconductor-uri "Biobase" version))
6896 (sha256
6897 (base32
6898 "1cgm1ja1kp56zdlzyy9ggbkfn8r2vbsd4hncmz8g4hjd47fg18kg"))))
6899 (properties
6900 `((upstream-name . "Biobase")))
6901 (build-system r-build-system)
6902 (propagated-inputs
6903 `(("r-biocgenerics" ,r-biocgenerics)))
6904 (home-page "https://bioconductor.org/packages/Biobase")
6905 (synopsis "Base functions for Bioconductor")
6906 (description
6907 "This package provides functions that are needed by many other packages
6908 on Bioconductor or which replace R functions.")
6909 (license license:artistic2.0)))
6910
6911 (define-public r-annotationdbi
6912 (package
6913 (name "r-annotationdbi")
6914 (version "1.40.0")
6915 (source (origin
6916 (method url-fetch)
6917 (uri (bioconductor-uri "AnnotationDbi" version))
6918 (sha256
6919 (base32
6920 "1dh4qs1a757n640gs34lf6z2glc96nan86x0sqaw5csadl2rhnlc"))))
6921 (properties
6922 `((upstream-name . "AnnotationDbi")))
6923 (build-system r-build-system)
6924 (propagated-inputs
6925 `(("r-biobase" ,r-biobase)
6926 ("r-biocgenerics" ,r-biocgenerics)
6927 ("r-dbi" ,r-dbi)
6928 ("r-iranges" ,r-iranges)
6929 ("r-rsqlite" ,r-rsqlite)
6930 ("r-s4vectors" ,r-s4vectors)))
6931 (home-page "https://bioconductor.org/packages/AnnotationDbi")
6932 (synopsis "Annotation database interface")
6933 (description
6934 "This package provides user interface and database connection code for
6935 annotation data packages using SQLite data storage.")
6936 (license license:artistic2.0)))
6937
6938 (define-public r-biomart
6939 (package
6940 (name "r-biomart")
6941 (version "2.34.0")
6942 (source (origin
6943 (method url-fetch)
6944 (uri (bioconductor-uri "biomaRt" version))
6945 (sha256
6946 (base32
6947 "1dn3ysf0vb3mmg2b3380g0j1ajf88x4rh7fddfp990h2xlnsy2cx"))))
6948 (properties
6949 `((upstream-name . "biomaRt")))
6950 (build-system r-build-system)
6951 (propagated-inputs
6952 `(("r-annotationdbi" ,r-annotationdbi)
6953 ("r-progress" ,r-progress)
6954 ("r-rcurl" ,r-rcurl)
6955 ("r-stringr" ,r-stringr)
6956 ("r-xml" ,r-xml)))
6957 (home-page "https://bioconductor.org/packages/biomaRt")
6958 (synopsis "Interface to BioMart databases")
6959 (description
6960 "biomaRt provides an interface to a growing collection of databases
6961 implementing the @url{BioMart software suite, http://www.biomart.org}. The
6962 package enables retrieval of large amounts of data in a uniform way without
6963 the need to know the underlying database schemas or write complex SQL queries.
6964 Examples of BioMart databases are Ensembl, COSMIC, Uniprot, HGNC, Gramene,
6965 Wormbase and dbSNP mapped to Ensembl. These major databases give biomaRt
6966 users direct access to a diverse set of data and enable a wide range of
6967 powerful online queries from gene annotation to database mining.")
6968 (license license:artistic2.0)))
6969
6970 (define-public r-biocparallel
6971 (package
6972 (name "r-biocparallel")
6973 (version "1.12.0")
6974 (source (origin
6975 (method url-fetch)
6976 (uri (bioconductor-uri "BiocParallel" version))
6977 (sha256
6978 (base32
6979 "13ng3n2wsgl3fh0v6jnz3vg51k5c1sh44pqdvblcrcd1qyjmmqhd"))))
6980 (properties
6981 `((upstream-name . "BiocParallel")))
6982 (build-system r-build-system)
6983 (propagated-inputs
6984 `(("r-futile-logger" ,r-futile-logger)
6985 ("r-snow" ,r-snow)
6986 ("r-bh" ,r-bh)))
6987 (home-page "https://bioconductor.org/packages/BiocParallel")
6988 (synopsis "Bioconductor facilities for parallel evaluation")
6989 (description
6990 "This package provides modified versions and novel implementation of
6991 functions for parallel evaluation, tailored to use with Bioconductor
6992 objects.")
6993 (license (list license:gpl2+ license:gpl3+))))
6994
6995 (define-public r-biostrings
6996 (package
6997 (name "r-biostrings")
6998 (version "2.46.0")
6999 (source (origin
7000 (method url-fetch)
7001 (uri (bioconductor-uri "Biostrings" version))
7002 (sha256
7003 (base32
7004 "0vg50qdlxqcm2d6axjnzg8wh8pr4c5gz03l8bdl0llmwzp0zclzk"))))
7005 (properties
7006 `((upstream-name . "Biostrings")))
7007 (build-system r-build-system)
7008 (propagated-inputs
7009 `(("r-biocgenerics" ,r-biocgenerics)
7010 ("r-iranges" ,r-iranges)
7011 ("r-s4vectors" ,r-s4vectors)
7012 ("r-xvector" ,r-xvector)))
7013 (home-page "https://bioconductor.org/packages/Biostrings")
7014 (synopsis "String objects and algorithms for biological sequences")
7015 (description
7016 "This package provides memory efficient string containers, string
7017 matching algorithms, and other utilities, for fast manipulation of large
7018 biological sequences or sets of sequences.")
7019 (license license:artistic2.0)))
7020
7021 (define-public r-rsamtools
7022 (package
7023 (name "r-rsamtools")
7024 (version "1.30.0")
7025 (source (origin
7026 (method url-fetch)
7027 (uri (bioconductor-uri "Rsamtools" version))
7028 (sha256
7029 (base32
7030 "0pjny5fjvbnfdyhl3bwxin678sha2drvs00sivxh3l772cn6yams"))))
7031 (properties
7032 `((upstream-name . "Rsamtools")))
7033 (build-system r-build-system)
7034 (arguments
7035 `(#:phases
7036 (modify-phases %standard-phases
7037 (add-after 'unpack 'use-system-zlib
7038 (lambda _
7039 (substitute* "DESCRIPTION"
7040 (("zlibbioc, ") ""))
7041 (substitute* "NAMESPACE"
7042 (("import\\(zlibbioc\\)") ""))
7043 #t)))))
7044 (inputs
7045 `(("zlib" ,zlib)))
7046 (propagated-inputs
7047 `(("r-biocgenerics" ,r-biocgenerics)
7048 ("r-biocparallel" ,r-biocparallel)
7049 ("r-biostrings" ,r-biostrings)
7050 ("r-bitops" ,r-bitops)
7051 ("r-genomeinfodb" ,r-genomeinfodb)
7052 ("r-genomicranges" ,r-genomicranges)
7053 ("r-iranges" ,r-iranges)
7054 ("r-s4vectors" ,r-s4vectors)
7055 ("r-xvector" ,r-xvector)))
7056 (home-page "https://bioconductor.org/packages/release/bioc/html/Rsamtools.html")
7057 (synopsis "Interface to samtools, bcftools, and tabix")
7058 (description
7059 "This package provides an interface to the 'samtools', 'bcftools', and
7060 'tabix' utilities for manipulating SAM (Sequence Alignment / Map), FASTA,
7061 binary variant call (BCF) and compressed indexed tab-delimited (tabix)
7062 files.")
7063 (license license:expat)))
7064
7065 (define-public r-delayedarray
7066 (package
7067 (name "r-delayedarray")
7068 (version "0.4.1")
7069 (source (origin
7070 (method url-fetch)
7071 (uri (bioconductor-uri "DelayedArray" version))
7072 (sha256
7073 (base32
7074 "0s7h2giyvz04cg6248kbbzpwhxdrpnsvl2s8k5c8ricisd9aaz4b"))))
7075 (properties
7076 `((upstream-name . "DelayedArray")))
7077 (build-system r-build-system)
7078 (propagated-inputs
7079 `(("r-biocgenerics" ,r-biocgenerics)
7080 ("r-s4vectors" ,r-s4vectors)
7081 ("r-iranges" ,r-iranges)
7082 ("r-matrixstats" ,r-matrixstats)))
7083 (home-page "https://bioconductor.org/packages/DelayedArray")
7084 (synopsis "Delayed operations on array-like objects")
7085 (description
7086 "Wrapping an array-like object (typically an on-disk object) in a
7087 @code{DelayedArray} object allows one to perform common array operations on it
7088 without loading the object in memory. In order to reduce memory usage and
7089 optimize performance, operations on the object are either delayed or executed
7090 using a block processing mechanism. Note that this also works on in-memory
7091 array-like objects like @code{DataFrame} objects (typically with Rle columns),
7092 @code{Matrix} objects, and ordinary arrays and data frames.")
7093 (license license:artistic2.0)))
7094
7095 (define-public r-summarizedexperiment
7096 (package
7097 (name "r-summarizedexperiment")
7098 (version "1.8.0")
7099 (source (origin
7100 (method url-fetch)
7101 (uri (bioconductor-uri "SummarizedExperiment" version))
7102 (sha256
7103 (base32
7104 "1011r8l0k8420j31bmh4xdcp6ka5bzf4bqhip84v5b6alpkcbvmf"))))
7105 (properties
7106 `((upstream-name . "SummarizedExperiment")))
7107 (build-system r-build-system)
7108 (propagated-inputs
7109 `(("r-biobase" ,r-biobase)
7110 ("r-biocgenerics" ,r-biocgenerics)
7111 ("r-delayedarray" ,r-delayedarray)
7112 ("r-genomeinfodb" ,r-genomeinfodb)
7113 ("r-genomicranges" ,r-genomicranges)
7114 ("r-iranges" ,r-iranges)
7115 ("r-matrix" ,r-matrix)
7116 ("r-s4vectors" ,r-s4vectors)))
7117 (home-page "https://bioconductor.org/packages/SummarizedExperiment")
7118 (synopsis "Container for representing genomic ranges by sample")
7119 (description
7120 "The SummarizedExperiment container contains one or more assays, each
7121 represented by a matrix-like object of numeric or other mode. The rows
7122 typically represent genomic ranges of interest and the columns represent
7123 samples.")
7124 (license license:artistic2.0)))
7125
7126 (define-public r-genomicalignments
7127 (package
7128 (name "r-genomicalignments")
7129 (version "1.14.0")
7130 (source (origin
7131 (method url-fetch)
7132 (uri (bioconductor-uri "GenomicAlignments" version))
7133 (sha256
7134 (base32
7135 "0sw30lj11wv7ifzypqm04lcah987crqwvj48wz3flaw3biw41zfi"))))
7136 (properties
7137 `((upstream-name . "GenomicAlignments")))
7138 (build-system r-build-system)
7139 (propagated-inputs
7140 `(("r-biocgenerics" ,r-biocgenerics)
7141 ("r-biocparallel" ,r-biocparallel)
7142 ("r-biostrings" ,r-biostrings)
7143 ("r-genomeinfodb" ,r-genomeinfodb)
7144 ("r-genomicranges" ,r-genomicranges)
7145 ("r-iranges" ,r-iranges)
7146 ("r-rsamtools" ,r-rsamtools)
7147 ("r-s4vectors" ,r-s4vectors)
7148 ("r-summarizedexperiment" ,r-summarizedexperiment)))
7149 (home-page "https://bioconductor.org/packages/GenomicAlignments")
7150 (synopsis "Representation and manipulation of short genomic alignments")
7151 (description
7152 "This package provides efficient containers for storing and manipulating
7153 short genomic alignments (typically obtained by aligning short reads to a
7154 reference genome). This includes read counting, computing the coverage,
7155 junction detection, and working with the nucleotide content of the
7156 alignments.")
7157 (license license:artistic2.0)))
7158
7159 (define-public r-rtracklayer
7160 (package
7161 (name "r-rtracklayer")
7162 (version "1.38.0")
7163 (source (origin
7164 (method url-fetch)
7165 (uri (bioconductor-uri "rtracklayer" version))
7166 (sha256
7167 (base32
7168 "12al1ygzy9p4myxa1fd817m28x2fj6f863znk9bw3hp7knbi98dh"))))
7169 (build-system r-build-system)
7170 (arguments
7171 `(#:phases
7172 (modify-phases %standard-phases
7173 (add-after 'unpack 'use-system-zlib
7174 (lambda _
7175 (substitute* "DESCRIPTION"
7176 ((" zlibbioc,") ""))
7177 (substitute* "NAMESPACE"
7178 (("import\\(zlibbioc\\)") ""))
7179 #t)))))
7180 (inputs
7181 `(("zlib" ,zlib)))
7182 (propagated-inputs
7183 `(("r-biocgenerics" ,r-biocgenerics)
7184 ("r-biostrings" ,r-biostrings)
7185 ("r-genomeinfodb" ,r-genomeinfodb)
7186 ("r-genomicalignments" ,r-genomicalignments)
7187 ("r-genomicranges" ,r-genomicranges)
7188 ("r-iranges" ,r-iranges)
7189 ("r-rcurl" ,r-rcurl)
7190 ("r-rsamtools" ,r-rsamtools)
7191 ("r-s4vectors" ,r-s4vectors)
7192 ("r-xml" ,r-xml)
7193 ("r-xvector" ,r-xvector)))
7194 (home-page "https://bioconductor.org/packages/rtracklayer")
7195 (synopsis "R interface to genome browsers and their annotation tracks")
7196 (description
7197 "rtracklayer is an extensible framework for interacting with multiple
7198 genome browsers (currently UCSC built-in) and manipulating annotation tracks
7199 in various formats (currently GFF, BED, bedGraph, BED15, WIG, BigWig and 2bit
7200 built-in). The user may export/import tracks to/from the supported browsers,
7201 as well as query and modify the browser state, such as the current viewport.")
7202 (license license:artistic2.0)))
7203
7204 (define-public r-genomicfeatures
7205 (package
7206 (name "r-genomicfeatures")
7207 (version "1.30.0")
7208 (source (origin
7209 (method url-fetch)
7210 (uri (bioconductor-uri "GenomicFeatures" version))
7211 (sha256
7212 (base32
7213 "1khjvq1ffhqavkwf8n7bilknci60lxbg52icrcf2vnb9k8rlpghs"))))
7214 (properties
7215 `((upstream-name . "GenomicFeatures")))
7216 (build-system r-build-system)
7217 (propagated-inputs
7218 `(("r-annotationdbi" ,r-annotationdbi)
7219 ("r-biobase" ,r-biobase)
7220 ("r-biocgenerics" ,r-biocgenerics)
7221 ("r-biomart" ,r-biomart)
7222 ("r-biostrings" ,r-biostrings)
7223 ("r-dbi" ,r-dbi)
7224 ("r-genomeinfodb" ,r-genomeinfodb)
7225 ("r-genomicranges" ,r-genomicranges)
7226 ("r-iranges" ,r-iranges)
7227 ("r-rcurl" ,r-rcurl)
7228 ("r-rsqlite" ,r-rsqlite)
7229 ("r-rmysql" ,r-rmysql)
7230 ("r-rtracklayer" ,r-rtracklayer)
7231 ("r-s4vectors" ,r-s4vectors)
7232 ("r-xvector" ,r-xvector)))
7233 (home-page "https://bioconductor.org/packages/GenomicFeatures")
7234 (synopsis "Tools for working with transcript centric annotations")
7235 (description
7236 "This package provides a set of tools and methods for making and
7237 manipulating transcript centric annotations. With these tools the user can
7238 easily download the genomic locations of the transcripts, exons and cds of a
7239 given organism, from either the UCSC Genome Browser or a BioMart
7240 database (more sources will be supported in the future). This information is
7241 then stored in a local database that keeps track of the relationship between
7242 transcripts, exons, cds and genes. Flexible methods are provided for
7243 extracting the desired features in a convenient format.")
7244 (license license:artistic2.0)))
7245
7246 (define-public r-go-db
7247 (package
7248 (name "r-go-db")
7249 (version "3.4.0")
7250 (source (origin
7251 (method url-fetch)
7252 (uri (string-append "https://www.bioconductor.org/packages/"
7253 "release/data/annotation/src/contrib/GO.db_"
7254 version ".tar.gz"))
7255 (sha256
7256 (base32
7257 "02cj8kqi5w39jwcs8gp1dgj08sah262ppxnkz4h3qd0w191y8yyl"))))
7258 (properties
7259 `((upstream-name . "GO.db")))
7260 (build-system r-build-system)
7261 (propagated-inputs
7262 `(("r-annotationdbi" ,r-annotationdbi)))
7263 (home-page "https://bioconductor.org/packages/GO.db")
7264 (synopsis "Annotation maps describing the entire Gene Ontology")
7265 (description
7266 "The purpose of this GO.db annotation package is to provide detailed
7267 information about the latest version of the Gene Ontologies.")
7268 (license license:artistic2.0)))
7269
7270 (define-public r-graph
7271 (package
7272 (name "r-graph")
7273 (version "1.56.0")
7274 (source (origin
7275 (method url-fetch)
7276 (uri (bioconductor-uri "graph" version))
7277 (sha256
7278 (base32
7279 "15aajjp8h2z14p80c8hyd4rrmr9vqsm7bvwb989jxjl4k6g52an1"))))
7280 (build-system r-build-system)
7281 (propagated-inputs
7282 `(("r-biocgenerics" ,r-biocgenerics)))
7283 (home-page "https://bioconductor.org/packages/graph")
7284 (synopsis "Handle graph data structures in R")
7285 (description
7286 "This package implements some simple graph handling capabilities for R.")
7287 (license license:artistic2.0)))
7288
7289 (define-public r-topgo
7290 (package
7291 (name "r-topgo")
7292 (version "2.30.0")
7293 (source (origin
7294 (method url-fetch)
7295 (uri (bioconductor-uri "topGO" version))
7296 (sha256
7297 (base32
7298 "1hqffz5qp7glxdvjp37005g8qk5nam3f9wpf6d1wjnzpar04f3dz"))))
7299 (properties
7300 `((upstream-name . "topGO")))
7301 (build-system r-build-system)
7302 (propagated-inputs
7303 `(("r-annotationdbi" ,r-annotationdbi)
7304 ("r-dbi" ,r-dbi)
7305 ("r-biobase" ,r-biobase)
7306 ("r-biocgenerics" ,r-biocgenerics)
7307 ("r-go-db" ,r-go-db)
7308 ("r-graph" ,r-graph)
7309 ("r-lattice" ,r-lattice)
7310 ("r-matrixstats" ,r-matrixstats)
7311 ("r-sparsem" ,r-sparsem)))
7312 (home-page "https://bioconductor.org/packages/topGO")
7313 (synopsis "Enrichment analysis for gene ontology")
7314 (description
7315 "The topGO package provides tools for testing @dfn{gene ontology} (GO)
7316 terms while accounting for the topology of the GO graph. Different test
7317 statistics and different methods for eliminating local similarities and
7318 dependencies between GO terms can be implemented and applied.")
7319 ;; Any version of the LGPL applies.
7320 (license license:lgpl2.1+)))
7321
7322 (define-public r-bsgenome
7323 (package
7324 (name "r-bsgenome")
7325 (version "1.46.0")
7326 (source (origin
7327 (method url-fetch)
7328 (uri (bioconductor-uri "BSgenome" version))
7329 (sha256
7330 (base32
7331 "1jbzq7lm2iajajn2bifxnkss0k9fdvgqr30mral17cbhp5f6w4lq"))))
7332 (properties
7333 `((upstream-name . "BSgenome")))
7334 (build-system r-build-system)
7335 (propagated-inputs
7336 `(("r-biocgenerics" ,r-biocgenerics)
7337 ("r-biostrings" ,r-biostrings)
7338 ("r-genomeinfodb" ,r-genomeinfodb)
7339 ("r-genomicranges" ,r-genomicranges)
7340 ("r-iranges" ,r-iranges)
7341 ("r-rsamtools" ,r-rsamtools)
7342 ("r-rtracklayer" ,r-rtracklayer)
7343 ("r-s4vectors" ,r-s4vectors)
7344 ("r-xvector" ,r-xvector)))
7345 (home-page "https://bioconductor.org/packages/BSgenome")
7346 (synopsis "Infrastructure for Biostrings-based genome data packages")
7347 (description
7348 "This package provides infrastructure shared by all Biostrings-based
7349 genome data packages and support for efficient SNP representation.")
7350 (license license:artistic2.0)))
7351
7352 (define-public r-bsgenome-hsapiens-1000genomes-hs37d5
7353 (package
7354 (name "r-bsgenome-hsapiens-1000genomes-hs37d5")
7355 (version "0.99.1")
7356 (source (origin
7357 (method url-fetch)
7358 ;; We cannot use bioconductor-uri here because this tarball is
7359 ;; located under "data/annotation/" instead of "bioc/".
7360 (uri (string-append "https://www.bioconductor.org/packages/"
7361 "release/data/annotation/src/contrib/"
7362 "BSgenome.Hsapiens.1000genomes.hs37d5_"
7363 version ".tar.gz"))
7364 (sha256
7365 (base32
7366 "1cg0g5fqmsvwyw2p9hp2yy4ilk21jkbbrnpgqvb5c36ihjwvc7sr"))))
7367 (properties
7368 `((upstream-name . "BSgenome.Hsapiens.1000genomes.hs37d5")))
7369 (build-system r-build-system)
7370 ;; As this package provides little more than a very large data file it
7371 ;; doesn't make sense to build substitutes.
7372 (arguments `(#:substitutable? #f))
7373 (propagated-inputs
7374 `(("r-bsgenome" ,r-bsgenome)))
7375 (home-page
7376 "https://www.bioconductor.org/packages/BSgenome.Hsapiens.1000genomes.hs37d5/")
7377 (synopsis "Full genome sequences for Homo sapiens")
7378 (description
7379 "This package provides full genome sequences for Homo sapiens from
7380 1000genomes phase2 reference genome sequence (hs37d5), based on NCBI GRCh37.")
7381 (license license:artistic2.0)))
7382
7383 (define-public r-impute
7384 (package
7385 (name "r-impute")
7386 (version "1.52.0")
7387 (source (origin
7388 (method url-fetch)
7389 (uri (bioconductor-uri "impute" version))
7390 (sha256
7391 (base32
7392 "0b8r4swvyx3cjcc2ky8yn0ncpzlbi1pgfsn3wpbjmhh7sqrffm2n"))))
7393 (inputs
7394 `(("gfortran" ,gfortran)))
7395 (build-system r-build-system)
7396 (home-page "https://bioconductor.org/packages/impute")
7397 (synopsis "Imputation for microarray data")
7398 (description
7399 "This package provides a function to impute missing gene expression
7400 microarray data, using nearest neighbor averaging.")
7401 (license license:gpl2+)))
7402
7403 (define-public r-seqpattern
7404 (package
7405 (name "r-seqpattern")
7406 (version "1.10.0")
7407 (source (origin
7408 (method url-fetch)
7409 (uri (bioconductor-uri "seqPattern" version))
7410 (sha256
7411 (base32
7412 "1kcm5w83q7w0v0vs7nyp4gq5z86c6n6pqy9zmyyhxcrns7f597pm"))))
7413 (properties
7414 `((upstream-name . "seqPattern")))
7415 (build-system r-build-system)
7416 (propagated-inputs
7417 `(("r-biostrings" ,r-biostrings)
7418 ("r-genomicranges" ,r-genomicranges)
7419 ("r-iranges" ,r-iranges)
7420 ("r-kernsmooth" ,r-kernsmooth)
7421 ("r-plotrix" ,r-plotrix)))
7422 (home-page "https://bioconductor.org/packages/seqPattern")
7423 (synopsis "Visualising oligonucleotide patterns and motif occurrences")
7424 (description
7425 "This package provides tools to visualize oligonucleotide patterns and
7426 sequence motif occurrences across a large set of sequences centred at a common
7427 reference point and sorted by a user defined feature.")
7428 (license license:gpl3+)))
7429
7430 (define-public r-genomation
7431 (package
7432 (name "r-genomation")
7433 (version "1.10.0")
7434 (source (origin
7435 (method url-fetch)
7436 (uri (bioconductor-uri "genomation" version))
7437 (sha256
7438 (base32
7439 "1ddd8c9w1f1i1ga9rpbwiic8rsaws1chdxx4j38bpyaiy4zhz1ca"))))
7440 (build-system r-build-system)
7441 (propagated-inputs
7442 `(("r-biostrings" ,r-biostrings)
7443 ("r-bsgenome" ,r-bsgenome)
7444 ("r-data-table" ,r-data-table)
7445 ("r-genomeinfodb" ,r-genomeinfodb)
7446 ("r-genomicalignments" ,r-genomicalignments)
7447 ("r-genomicranges" ,r-genomicranges)
7448 ("r-ggplot2" ,r-ggplot2)
7449 ("r-gridbase" ,r-gridbase)
7450 ("r-impute" ,r-impute)
7451 ("r-iranges" ,r-iranges)
7452 ("r-matrixstats" ,r-matrixstats)
7453 ("r-plotrix" ,r-plotrix)
7454 ("r-plyr" ,r-plyr)
7455 ("r-rcpp" ,r-rcpp)
7456 ("r-readr" ,r-readr)
7457 ("r-reshape2" ,r-reshape2)
7458 ("r-rsamtools" ,r-rsamtools)
7459 ("r-rtracklayer" ,r-rtracklayer)
7460 ("r-runit" ,r-runit)
7461 ("r-s4vectors" ,r-s4vectors)
7462 ("r-seqpattern" ,r-seqpattern)))
7463 (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
7464 (synopsis "Summary, annotation and visualization of genomic data")
7465 (description
7466 "This package provides a package for summary and annotation of genomic
7467 intervals. Users can visualize and quantify genomic intervals over
7468 pre-defined functional regions, such as promoters, exons, introns, etc. The
7469 genomic intervals represent regions with a defined chromosome position, which
7470 may be associated with a score, such as aligned reads from HT-seq experiments,
7471 TF binding sites, methylation scores, etc. The package can use any tabular
7472 genomic feature data as long as it has minimal information on the locations of
7473 genomic intervals. In addition, it can use BAM or BigWig files as input.")
7474 (license license:artistic2.0)))
7475
7476 (define-public r-genomationdata
7477 (package
7478 (name "r-genomationdata")
7479 (version "1.6.0")
7480 (source (origin
7481 (method url-fetch)
7482 ;; We cannot use bioconductor-uri here because this tarball is
7483 ;; located under "data/annotation/" instead of "bioc/".
7484 (uri (string-append "https://bioconductor.org/packages/"
7485 "release/data/experiment/src/contrib/"
7486 "genomationData_" version ".tar.gz"))
7487 (sha256
7488 (base32
7489 "16dqwb7wx1igx77zdbcskx5m1hs4g4gp2hl56zzm70hcagnlkz8y"))))
7490 (build-system r-build-system)
7491 ;; As this package provides little more than large data files, it doesn't
7492 ;; make sense to build substitutes.
7493 (arguments `(#:substitutable? #f))
7494 (native-inputs
7495 `(("r-knitr" ,r-knitr)))
7496 (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
7497 (synopsis "Experimental data for use with the genomation package")
7498 (description
7499 "This package contains experimental genetic data for use with the
7500 genomation package. Included are Chip Seq, Methylation and Cage data,
7501 downloaded from Encode.")
7502 (license license:gpl3+)))
7503
7504 (define-public r-org-hs-eg-db
7505 (package
7506 (name "r-org-hs-eg-db")
7507 (version "3.4.0")
7508 (source (origin
7509 (method url-fetch)
7510 ;; We cannot use bioconductor-uri here because this tarball is
7511 ;; located under "data/annotation/" instead of "bioc/".
7512 (uri (string-append "https://www.bioconductor.org/packages/"
7513 "release/data/annotation/src/contrib/"
7514 "org.Hs.eg.db_" version ".tar.gz"))
7515 (sha256
7516 (base32
7517 "19mg64pw8zcvb9yxzzyf7caz1kvdrkfsj1hd84bzq7crrh8kc4y6"))))
7518 (properties
7519 `((upstream-name . "org.Hs.eg.db")))
7520 (build-system r-build-system)
7521 (propagated-inputs
7522 `(("r-annotationdbi" ,r-annotationdbi)))
7523 (home-page "https://www.bioconductor.org/packages/org.Hs.eg.db/")
7524 (synopsis "Genome wide annotation for Human")
7525 (description
7526 "This package provides mappings from Entrez gene identifiers to various
7527 annotations for the human genome.")
7528 (license license:artistic2.0)))
7529
7530 (define-public r-org-ce-eg-db
7531 (package
7532 (name "r-org-ce-eg-db")
7533 (version "3.4.0")
7534 (source (origin
7535 (method url-fetch)
7536 ;; We cannot use bioconductor-uri here because this tarball is
7537 ;; located under "data/annotation/" instead of "bioc/".
7538 (uri (string-append "https://www.bioconductor.org/packages/"
7539 "release/data/annotation/src/contrib/"
7540 "org.Ce.eg.db_" version ".tar.gz"))
7541 (sha256
7542 (base32
7543 "12llfzrrc09kj2wzbisdspv38qzkzgpsbn8kv7qkwg746k3pq436"))))
7544 (properties
7545 `((upstream-name . "org.Ce.eg.db")))
7546 (build-system r-build-system)
7547 (propagated-inputs
7548 `(("r-annotationdbi" ,r-annotationdbi)))
7549 (home-page "https://www.bioconductor.org/packages/org.Ce.eg.db/")
7550 (synopsis "Genome wide annotation for Worm")
7551 (description
7552 "This package provides mappings from Entrez gene identifiers to various
7553 annotations for the genome of the model worm Caenorhabditis elegans.")
7554 (license license:artistic2.0)))
7555
7556 (define-public r-org-dm-eg-db
7557 (package
7558 (name "r-org-dm-eg-db")
7559 (version "3.4.0")
7560 (source (origin
7561 (method url-fetch)
7562 ;; We cannot use bioconductor-uri here because this tarball is
7563 ;; located under "data/annotation/" instead of "bioc/".
7564 (uri (string-append "https://www.bioconductor.org/packages/"
7565 "release/data/annotation/src/contrib/"
7566 "org.Dm.eg.db_" version ".tar.gz"))
7567 (sha256
7568 (base32
7569 "1vzbphbrh1cf7xi5cksia9xy9a9l42js2z2qsajvjxvddiphrb7j"))))
7570 (properties
7571 `((upstream-name . "org.Dm.eg.db")))
7572 (build-system r-build-system)
7573 (propagated-inputs
7574 `(("r-annotationdbi" ,r-annotationdbi)))
7575 (home-page "https://www.bioconductor.org/packages/org.Dm.eg.db/")
7576 (synopsis "Genome wide annotation for Fly")
7577 (description
7578 "This package provides mappings from Entrez gene identifiers to various
7579 annotations for the genome of the model fruit fly Drosophila melanogaster.")
7580 (license license:artistic2.0)))
7581
7582 (define-public r-org-mm-eg-db
7583 (package
7584 (name "r-org-mm-eg-db")
7585 (version "3.4.0")
7586 (source (origin
7587 (method url-fetch)
7588 ;; We cannot use bioconductor-uri here because this tarball is
7589 ;; located under "data/annotation/" instead of "bioc/".
7590 (uri (string-append "https://www.bioconductor.org/packages/"
7591 "release/data/annotation/src/contrib/"
7592 "org.Mm.eg.db_" version ".tar.gz"))
7593 (sha256
7594 (base32
7595 "1lykjqjaf01fmgg3cvfcvwd5xjq6zc5vbxnm5r4l32fzvl89q50c"))))
7596 (properties
7597 `((upstream-name . "org.Mm.eg.db")))
7598 (build-system r-build-system)
7599 (propagated-inputs
7600 `(("r-annotationdbi" ,r-annotationdbi)))
7601 (home-page "https://www.bioconductor.org/packages/org.Mm.eg.db/")
7602 (synopsis "Genome wide annotation for Mouse")
7603 (description
7604 "This package provides mappings from Entrez gene identifiers to various
7605 annotations for the genome of the model mouse Mus musculus.")
7606 (license license:artistic2.0)))
7607
7608 (define-public r-seqlogo
7609 (package
7610 (name "r-seqlogo")
7611 (version "1.44.0")
7612 (source
7613 (origin
7614 (method url-fetch)
7615 (uri (bioconductor-uri "seqLogo" version))
7616 (sha256
7617 (base32
7618 "1ql4q4vx0j61a893dqc3c8zxmgs8sqhy3j1qhyfdvbd01vw9w1kq"))))
7619 (properties `((upstream-name . "seqLogo")))
7620 (build-system r-build-system)
7621 (home-page "https://bioconductor.org/packages/seqLogo")
7622 (synopsis "Sequence logos for DNA sequence alignments")
7623 (description
7624 "seqLogo takes the position weight matrix of a DNA sequence motif and
7625 plots the corresponding sequence logo as introduced by Schneider and
7626 Stephens (1990).")
7627 (license license:lgpl2.0+)))
7628
7629 (define-public r-bsgenome-hsapiens-ucsc-hg19
7630 (package
7631 (name "r-bsgenome-hsapiens-ucsc-hg19")
7632 (version "1.4.0")
7633 (source (origin
7634 (method url-fetch)
7635 ;; We cannot use bioconductor-uri here because this tarball is
7636 ;; located under "data/annotation/" instead of "bioc/".
7637 (uri (string-append "https://www.bioconductor.org/packages/"
7638 "release/data/annotation/src/contrib/"
7639 "BSgenome.Hsapiens.UCSC.hg19_"
7640 version ".tar.gz"))
7641 (sha256
7642 (base32
7643 "1y0nqpk8cw5a34sd9hmin3z4v7iqm6hf6l22cl81vlbxqbjibxc8"))))
7644 (properties
7645 `((upstream-name . "BSgenome.Hsapiens.UCSC.hg19")))
7646 (build-system r-build-system)
7647 ;; As this package provides little more than a very large data file it
7648 ;; doesn't make sense to build substitutes.
7649 (arguments `(#:substitutable? #f))
7650 (propagated-inputs
7651 `(("r-bsgenome" ,r-bsgenome)))
7652 (home-page
7653 "https://www.bioconductor.org/packages/BSgenome.Hsapiens.UCSC.hg19/")
7654 (synopsis "Full genome sequences for Homo sapiens")
7655 (description
7656 "This package provides full genome sequences for Homo sapiens as provided
7657 by UCSC (hg19, February 2009) and stored in Biostrings objects.")
7658 (license license:artistic2.0)))
7659
7660 (define-public r-bsgenome-mmusculus-ucsc-mm9
7661 (package
7662 (name "r-bsgenome-mmusculus-ucsc-mm9")
7663 (version "1.4.0")
7664 (source (origin
7665 (method url-fetch)
7666 ;; We cannot use bioconductor-uri here because this tarball is
7667 ;; located under "data/annotation/" instead of "bioc/".
7668 (uri (string-append "https://www.bioconductor.org/packages/"
7669 "release/data/annotation/src/contrib/"
7670 "BSgenome.Mmusculus.UCSC.mm9_"
7671 version ".tar.gz"))
7672 (sha256
7673 (base32
7674 "1birqw30g2azimxpnjfzmkphan7x131yy8b9h85lfz5fjdg7841i"))))
7675 (properties
7676 `((upstream-name . "BSgenome.Mmusculus.UCSC.mm9")))
7677 (build-system r-build-system)
7678 ;; As this package provides little more than a very large data file it
7679 ;; doesn't make sense to build substitutes.
7680 (arguments `(#:substitutable? #f))
7681 (propagated-inputs
7682 `(("r-bsgenome" ,r-bsgenome)))
7683 (home-page
7684 "https://www.bioconductor.org/packages/BSgenome.Mmusculus.UCSC.mm9/")
7685 (synopsis "Full genome sequences for Mouse")
7686 (description
7687 "This package provides full genome sequences for Mus musculus (Mouse) as
7688 provided by UCSC (mm9, July 2007) and stored in Biostrings objects.")
7689 (license license:artistic2.0)))
7690
7691 (define-public r-bsgenome-mmusculus-ucsc-mm10
7692 (package
7693 (name "r-bsgenome-mmusculus-ucsc-mm10")
7694 (version "1.4.0")
7695 (source (origin
7696 (method url-fetch)
7697 ;; We cannot use bioconductor-uri here because this tarball is
7698 ;; located under "data/annotation/" instead of "bioc/".
7699 (uri (string-append "https://www.bioconductor.org/packages/"
7700 "release/data/annotation/src/contrib/"
7701 "BSgenome.Mmusculus.UCSC.mm10_"
7702 version ".tar.gz"))
7703 (sha256
7704 (base32
7705 "12s0nm2na9brjad4rn9l7d3db2aj8qa1xvz0y1k7gk08wayb6bkf"))))
7706 (properties
7707 `((upstream-name . "BSgenome.Mmusculus.UCSC.mm10")))
7708 (build-system r-build-system)
7709 ;; As this package provides little more than a very large data file it
7710 ;; doesn't make sense to build substitutes.
7711 (arguments `(#:substitutable? #f))
7712 (propagated-inputs
7713 `(("r-bsgenome" ,r-bsgenome)))
7714 (home-page
7715 "https://www.bioconductor.org/packages/BSgenome.Mmusculus.UCSC.mm10/")
7716 (synopsis "Full genome sequences for Mouse")
7717 (description
7718 "This package provides full genome sequences for Mus
7719 musculus (Mouse) as provided by UCSC (mm10, December 2011) and stored
7720 in Biostrings objects.")
7721 (license license:artistic2.0)))
7722
7723 (define-public r-txdb-mmusculus-ucsc-mm10-knowngene
7724 (package
7725 (name "r-txdb-mmusculus-ucsc-mm10-knowngene")
7726 (version "3.4.0")
7727 (source (origin
7728 (method url-fetch)
7729 ;; We cannot use bioconductor-uri here because this tarball is
7730 ;; located under "data/annotation/" instead of "bioc/".
7731 (uri (string-append "https://www.bioconductor.org/packages/"
7732 "release/data/annotation/src/contrib/"
7733 "TxDb.Mmusculus.UCSC.mm10.knownGene_"
7734 version ".tar.gz"))
7735 (sha256
7736 (base32
7737 "08gava9wsvpcqz51k2sni3pj03n5155v32d9riqbf305nbirqbkb"))))
7738 (properties
7739 `((upstream-name . "TxDb.Mmusculus.UCSC.mm10.knownGene")))
7740 (build-system r-build-system)
7741 ;; As this package provides little more than a very large data file it
7742 ;; doesn't make sense to build substitutes.
7743 (arguments `(#:substitutable? #f))
7744 (propagated-inputs
7745 `(("r-bsgenome" ,r-bsgenome)
7746 ("r-genomicfeatures" ,r-genomicfeatures)
7747 ("r-annotationdbi" ,r-annotationdbi)))
7748 (home-page
7749 "https://bioconductor.org/packages/TxDb.Mmusculus.UCSC.mm10.knownGene/")
7750 (synopsis "Annotation package for TxDb knownGene object(s) for Mouse")
7751 (description
7752 "This package loads a TxDb object, which is an R interface to
7753 prefabricated databases contained in this package. This package provides
7754 the TxDb object of Mouse data as provided by UCSC (mm10, December 2011)
7755 based on the knownGene track.")
7756 (license license:artistic2.0)))
7757
7758 (define-public r-bsgenome-celegans-ucsc-ce6
7759 (package
7760 (name "r-bsgenome-celegans-ucsc-ce6")
7761 (version "1.4.0")
7762 (source (origin
7763 (method url-fetch)
7764 ;; We cannot use bioconductor-uri here because this tarball is
7765 ;; located under "data/annotation/" instead of "bioc/".
7766 (uri (string-append "https://www.bioconductor.org/packages/"
7767 "release/data/annotation/src/contrib/"
7768 "BSgenome.Celegans.UCSC.ce6_"
7769 version ".tar.gz"))
7770 (sha256
7771 (base32
7772 "0mqzb353xv2c3m3vkb315dkmnxkgczp7ndnknyhpgjlybyf715v9"))))
7773 (properties
7774 `((upstream-name . "BSgenome.Celegans.UCSC.ce6")))
7775 (build-system r-build-system)
7776 ;; As this package provides little more than a very large data file it
7777 ;; doesn't make sense to build substitutes.
7778 (arguments `(#:substitutable? #f))
7779 (propagated-inputs
7780 `(("r-bsgenome" ,r-bsgenome)))
7781 (home-page
7782 "https://www.bioconductor.org/packages/BSgenome.Celegans.UCSC.ce6/")
7783 (synopsis "Full genome sequences for Worm")
7784 (description
7785 "This package provides full genome sequences for Caenorhabditis
7786 elegans (Worm) as provided by UCSC (ce6, May 2008) and stored in Biostrings
7787 objects.")
7788 (license license:artistic2.0)))
7789
7790 (define-public r-bsgenome-celegans-ucsc-ce10
7791 (package
7792 (name "r-bsgenome-celegans-ucsc-ce10")
7793 (version "1.4.0")
7794 (source (origin
7795 (method url-fetch)
7796 ;; We cannot use bioconductor-uri here because this tarball is
7797 ;; located under "data/annotation/" instead of "bioc/".
7798 (uri (string-append "https://www.bioconductor.org/packages/"
7799 "release/data/annotation/src/contrib/"
7800 "BSgenome.Celegans.UCSC.ce10_"
7801 version ".tar.gz"))
7802 (sha256
7803 (base32
7804 "1zaym97jk4npxk14ifvwz2rvhm4zx9xgs33r9vvx9rlynp0gydrk"))))
7805 (properties
7806 `((upstream-name . "BSgenome.Celegans.UCSC.ce10")))
7807 (build-system r-build-system)
7808 ;; As this package provides little more than a very large data file it
7809 ;; doesn't make sense to build substitutes.
7810 (arguments `(#:substitutable? #f))
7811 (propagated-inputs
7812 `(("r-bsgenome" ,r-bsgenome)))
7813 (home-page
7814 "https://www.bioconductor.org/packages/BSgenome.Celegans.UCSC.ce10/")
7815 (synopsis "Full genome sequences for Worm")
7816 (description
7817 "This package provides full genome sequences for Caenorhabditis
7818 elegans (Worm) as provided by UCSC (ce10, Oct 2010) and stored in Biostrings
7819 objects.")
7820 (license license:artistic2.0)))
7821
7822 (define-public r-bsgenome-dmelanogaster-ucsc-dm3
7823 (package
7824 (name "r-bsgenome-dmelanogaster-ucsc-dm3")
7825 (version "1.4.0")
7826 (source (origin
7827 (method url-fetch)
7828 ;; We cannot use bioconductor-uri here because this tarball is
7829 ;; located under "data/annotation/" instead of "bioc/".
7830 (uri (string-append "https://www.bioconductor.org/packages/"
7831 "release/data/annotation/src/contrib/"
7832 "BSgenome.Dmelanogaster.UCSC.dm3_"
7833 version ".tar.gz"))
7834 (sha256
7835 (base32
7836 "19bm3lkhhkag3gnwp419211fh0cnr0x6fa0r1lr0ycwrikxdxsv8"))))
7837 (properties
7838 `((upstream-name . "BSgenome.Dmelanogaster.UCSC.dm3")))
7839 (build-system r-build-system)
7840 ;; As this package provides little more than a very large data file it
7841 ;; doesn't make sense to build substitutes.
7842 (arguments `(#:substitutable? #f))
7843 (propagated-inputs
7844 `(("r-bsgenome" ,r-bsgenome)))
7845 (home-page
7846 "https://www.bioconductor.org/packages/BSgenome.Dmelanogaster.UCSC.dm3/")
7847 (synopsis "Full genome sequences for Fly")
7848 (description
7849 "This package provides full genome sequences for Drosophila
7850 melanogaster (Fly) as provided by UCSC (dm3, April 2006) and stored in
7851 Biostrings objects.")
7852 (license license:artistic2.0)))
7853
7854 (define-public r-motifrg
7855 (package
7856 (name "r-motifrg")
7857 (version "1.22.0")
7858 (source
7859 (origin
7860 (method url-fetch)
7861 (uri (bioconductor-uri "motifRG" version))
7862 (sha256
7863 (base32
7864 "193zl2rlzwxv9p9q5i7rilj3w05ndqfyp9bdpvagp5s5cin4hf44"))))
7865 (properties `((upstream-name . "motifRG")))
7866 (build-system r-build-system)
7867 (propagated-inputs
7868 `(("r-biostrings" ,r-biostrings)
7869 ("r-bsgenome" ,r-bsgenome)
7870 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
7871 ("r-iranges" ,r-iranges)
7872 ("r-seqlogo" ,r-seqlogo)
7873 ("r-xvector" ,r-xvector)))
7874 (home-page "https://bioconductor.org/packages/motifRG")
7875 (synopsis "Discover motifs in high throughput sequencing data")
7876 (description
7877 "This package provides tools for discriminative motif discovery in high
7878 throughput genetic sequencing data sets using regression methods.")
7879 (license license:artistic2.0)))
7880
7881 (define-public r-qtl
7882 (package
7883 (name "r-qtl")
7884 (version "1.41-6")
7885 (source
7886 (origin
7887 (method url-fetch)
7888 (uri (string-append "mirror://cran/src/contrib/qtl_"
7889 version ".tar.gz"))
7890 (sha256
7891 (base32
7892 "067az4v432zxp6lxck8d7vlh9w4r13r0mvw5zsglyaqwsh3d9sad"))))
7893 (build-system r-build-system)
7894 (home-page "http://rqtl.org/")
7895 (synopsis "R package for analyzing QTL experiments in genetics")
7896 (description "R/qtl is an extension library for the R statistics
7897 system. It is used to analyze experimental crosses for identifying
7898 genes contributing to variation in quantitative traits (so-called
7899 quantitative trait loci, QTLs).
7900
7901 Using a hidden Markov model, R/qtl allows to estimate genetic maps, to
7902 identify genotyping errors, and to perform single-QTL and two-QTL,
7903 two-dimensional genome scans.")
7904 (license license:gpl3)))
7905
7906 (define-public r-zlibbioc
7907 (package
7908 (name "r-zlibbioc")
7909 (version "1.24.0")
7910 (source (origin
7911 (method url-fetch)
7912 (uri (bioconductor-uri "zlibbioc" version))
7913 (sha256
7914 (base32
7915 "1zr9hbh55hglfpy15cpxwmddxblhyb0an15953l3rbhmlh2vpy92"))))
7916 (properties
7917 `((upstream-name . "zlibbioc")))
7918 (build-system r-build-system)
7919 (home-page "https://bioconductor.org/packages/zlibbioc")
7920 (synopsis "Provider for zlib-1.2.5 to R packages")
7921 (description "This package uses the source code of zlib-1.2.5 to create
7922 libraries for systems that do not have these available via other means.")
7923 (license license:artistic2.0)))
7924
7925 (define-public r-r4rna
7926 (package
7927 (name "r-r4rna")
7928 (version "0.1.4")
7929 (source
7930 (origin
7931 (method url-fetch)
7932 (uri (string-append "http://www.e-rna.org/r-chie/files/R4RNA_"
7933 version ".tar.gz"))
7934 (sha256
7935 (base32
7936 "1p0i78wh76jfgmn9jphbwwaz6yy6pipzfg08xs54cxavxg2j81p5"))))
7937 (build-system r-build-system)
7938 (propagated-inputs
7939 `(("r-optparse" ,r-optparse)
7940 ("r-rcolorbrewer" ,r-rcolorbrewer)))
7941 (home-page "http://www.e-rna.org/r-chie/index.cgi")
7942 (synopsis "Analysis framework for RNA secondary structure")
7943 (description
7944 "The R4RNA package aims to be a general framework for the analysis of RNA
7945 secondary structure and comparative analysis in R.")
7946 (license license:gpl3+)))
7947
7948 (define-public r-rhtslib
7949 (package
7950 (name "r-rhtslib")
7951 (version "1.10.0")
7952 (source
7953 (origin
7954 (method url-fetch)
7955 (uri (bioconductor-uri "Rhtslib" version))
7956 (sha256
7957 (base32
7958 "1dw3p44bfr0m7w39ckc2k37sjcp1zz0b9g12mr8am15jaj6v0q2j"))))
7959 (properties `((upstream-name . "Rhtslib")))
7960 (build-system r-build-system)
7961 (propagated-inputs
7962 `(("r-zlibbioc" ,r-zlibbioc)))
7963 (inputs
7964 `(("zlib" ,zlib)))
7965 (native-inputs
7966 `(("autoconf" ,autoconf)))
7967 (home-page "https://github.com/nhayden/Rhtslib")
7968 (synopsis "High-throughput sequencing library as an R package")
7969 (description
7970 "This package provides the HTSlib C library for high-throughput
7971 nucleotide sequence analysis. The package is primarily useful to developers
7972 of other R packages who wish to make use of HTSlib.")
7973 (license license:lgpl2.0+)))
7974
7975 (define-public r-bamsignals
7976 (package
7977 (name "r-bamsignals")
7978 (version "1.10.0")
7979 (source
7980 (origin
7981 (method url-fetch)
7982 (uri (bioconductor-uri "bamsignals" version))
7983 (sha256
7984 (base32
7985 "15id6mkj95skb4kfafvfs2j7ylydal60c3pspcl7llhwpq6vcqvl"))))
7986 (build-system r-build-system)
7987 (propagated-inputs
7988 `(("r-biocgenerics" ,r-biocgenerics)
7989 ("r-genomicranges" ,r-genomicranges)
7990 ("r-iranges" ,r-iranges)
7991 ("r-rcpp" ,r-rcpp)
7992 ("r-rhtslib" ,r-rhtslib)
7993 ("r-zlibbioc" ,r-zlibbioc)))
7994 (inputs
7995 `(("zlib" ,zlib)))
7996 (home-page "https://bioconductor.org/packages/bamsignals")
7997 (synopsis "Extract read count signals from bam files")
7998 (description
7999 "This package allows to efficiently obtain count vectors from indexed bam
8000 files. It counts the number of nucleotide sequence reads in given genomic
8001 ranges and it computes reads profiles and coverage profiles. It also handles
8002 paired-end data.")
8003 (license license:gpl2+)))
8004
8005 (define-public r-rcas
8006 (package
8007 (name "r-rcas")
8008 (version "1.3.4")
8009 (source (origin
8010 (method url-fetch)
8011 (uri (string-append "https://github.com/BIMSBbioinfo/RCAS/archive/v"
8012 version ".tar.gz"))
8013 (file-name (string-append name "-" version ".tar.gz"))
8014 (sha256
8015 (base32
8016 "1qgc7vi6fpzl440yg7jhiycg5q336kd4pxqzx10yx2zcq3bq3msg"))))
8017 (build-system r-build-system)
8018 (native-inputs
8019 `(("r-knitr" ,r-knitr)
8020 ("r-testthat" ,r-testthat)
8021 ;; During vignette building knitr checks that "pandoc-citeproc"
8022 ;; is in the PATH.
8023 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)))
8024 (propagated-inputs
8025 `(("r-data-table" ,r-data-table)
8026 ("r-biomart" ,r-biomart)
8027 ("r-org-hs-eg-db" ,r-org-hs-eg-db)
8028 ("r-org-ce-eg-db" ,r-org-ce-eg-db)
8029 ("r-org-dm-eg-db" ,r-org-dm-eg-db)
8030 ("r-org-mm-eg-db" ,r-org-mm-eg-db)
8031 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8032 ("r-bsgenome-mmusculus-ucsc-mm9" ,r-bsgenome-mmusculus-ucsc-mm9)
8033 ("r-bsgenome-celegans-ucsc-ce10" ,r-bsgenome-celegans-ucsc-ce10)
8034 ("r-bsgenome-dmelanogaster-ucsc-dm3" ,r-bsgenome-dmelanogaster-ucsc-dm3)
8035 ("r-topgo" ,r-topgo)
8036 ("r-dt" ,r-dt)
8037 ("r-pbapply" ,r-pbapply)
8038 ("r-plotly" ,r-plotly)
8039 ("r-plotrix" ,r-plotrix)
8040 ("r-motifrg" ,r-motifrg)
8041 ("r-genomation" ,r-genomation)
8042 ("r-genomicfeatures" ,r-genomicfeatures)
8043 ("r-rtracklayer" ,r-rtracklayer)
8044 ("r-rmarkdown" ,r-rmarkdown)))
8045 (synopsis "RNA-centric annotation system")
8046 (description
8047 "RCAS aims to be a standalone RNA-centric annotation system that provides
8048 intuitive reports and publication-ready graphics. This package provides the R
8049 library implementing most of the pipeline's features.")
8050 (home-page "https://github.com/BIMSBbioinfo/RCAS")
8051 (license license:artistic2.0)))
8052
8053 (define-public rcas-web
8054 (package
8055 (name "rcas-web")
8056 (version "0.0.4")
8057 (source
8058 (origin
8059 (method url-fetch)
8060 (uri (string-append "https://github.com/BIMSBbioinfo/rcas-web/"
8061 "releases/download/v" version
8062 "/rcas-web-" version ".tar.gz"))
8063 (sha256
8064 (base32
8065 "1p16frfys41a8yaa4gkm457nzkqhqs2pc3lkac0ds457w9w5j1gm"))))
8066 (build-system gnu-build-system)
8067 (arguments
8068 `(#:phases
8069 (modify-phases %standard-phases
8070 (add-after 'install 'wrap-executable
8071 (lambda* (#:key inputs outputs #:allow-other-keys)
8072 (let* ((out (assoc-ref outputs "out"))
8073 (json (assoc-ref inputs "guile-json"))
8074 (redis (assoc-ref inputs "guile-redis"))
8075 (path (string-append
8076 json "/share/guile/site/2.2:"
8077 redis "/share/guile/site/2.2")))
8078 (wrap-program (string-append out "/bin/rcas-web")
8079 `("GUILE_LOAD_PATH" ":" = (,path))
8080 `("GUILE_LOAD_COMPILED_PATH" ":" = (,path))
8081 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
8082 #t)))))
8083 (inputs
8084 `(("r-minimal" ,r-minimal)
8085 ("r-rcas" ,r-rcas)
8086 ("guile-next" ,guile-2.2)
8087 ("guile-json" ,guile-json)
8088 ("guile-redis" ,guile2.2-redis)))
8089 (native-inputs
8090 `(("pkg-config" ,pkg-config)))
8091 (home-page "https://github.com/BIMSBbioinfo/rcas-web")
8092 (synopsis "Web interface for RNA-centric annotation system (RCAS)")
8093 (description "This package provides a simple web interface for the
8094 @dfn{RNA-centric annotation system} (RCAS).")
8095 (license license:agpl3+)))
8096
8097 (define-public r-mutationalpatterns
8098 (package
8099 (name "r-mutationalpatterns")
8100 (version "1.4.0")
8101 (source
8102 (origin
8103 (method url-fetch)
8104 (uri (bioconductor-uri "MutationalPatterns" version))
8105 (sha256
8106 (base32
8107 "0sqbrswg8ylkjb9q3vqcb5ggwixynwj6hyv2n4sk7snyk61z3fq9"))))
8108 (build-system r-build-system)
8109 (propagated-inputs
8110 `(("r-biocgenerics" ,r-biocgenerics)
8111 ("r-biostrings" ,r-biostrings)
8112 ("r-bsgenome-hsapiens-1000g" ,r-bsgenome-hsapiens-1000genomes-hs37d5)
8113 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8114 ("r-genomicranges" ,r-genomicranges)
8115 ("r-genomeinfodb" ,r-genomeinfodb)
8116 ("r-ggplot2" ,r-ggplot2)
8117 ("r-gridextra" ,r-gridextra)
8118 ("r-iranges" ,r-iranges)
8119 ("r-nmf" ,r-nmf)
8120 ("r-plyr" ,r-plyr)
8121 ("r-pracma" ,r-pracma)
8122 ("r-reshape2" ,r-reshape2)
8123 ("r-cowplot" ,r-cowplot)
8124 ("r-ggdendro" ,r-ggdendro)
8125 ("r-s4vectors" ,r-s4vectors)
8126 ("r-summarizedexperiment" ,r-summarizedexperiment)
8127 ("r-variantannotation" ,r-variantannotation)))
8128 (home-page "https://bioconductor.org/packages/MutationalPatterns/")
8129 (synopsis "Extract and visualize mutational patterns in genomic data")
8130 (description "This package provides an extensive toolset for the
8131 characterization and visualization of a wide range of mutational patterns
8132 in SNV base substitution data.")
8133 (license license:expat)))
8134
8135 (define-public r-wgcna
8136 (package
8137 (name "r-wgcna")
8138 (version "1.61")
8139 (source
8140 (origin
8141 (method url-fetch)
8142 (uri (cran-uri "WGCNA" version))
8143 (sha256
8144 (base32
8145 "1vrc2k33a196hrrl7k0z534fp96vv0shmigcr65ny1q0v6lq0h6i"))))
8146 (properties `((upstream-name . "WGCNA")))
8147 (build-system r-build-system)
8148 (propagated-inputs
8149 `(("r-annotationdbi" ,r-annotationdbi)
8150 ("r-doparallel" ,r-doparallel)
8151 ("r-dynamictreecut" ,r-dynamictreecut)
8152 ("r-fastcluster" ,r-fastcluster)
8153 ("r-foreach" ,r-foreach)
8154 ("r-go-db" ,r-go-db)
8155 ("r-hmisc" ,r-hmisc)
8156 ("r-impute" ,r-impute)
8157 ("r-rcpp" ,r-rcpp)
8158 ("r-robust" ,r-robust)
8159 ("r-survival" ,r-survival)
8160 ("r-matrixstats" ,r-matrixstats)
8161 ("r-preprocesscore" ,r-preprocesscore)))
8162 (home-page
8163 "http://www.genetics.ucla.edu/labs/horvath/CoexpressionNetwork/Rpackages/WGCNA/")
8164 (synopsis "Weighted correlation network analysis")
8165 (description
8166 "This package provides functions necessary to perform Weighted
8167 Correlation Network Analysis on high-dimensional data. It includes functions
8168 for rudimentary data cleaning, construction and summarization of correlation
8169 networks, module identification and functions for relating both variables and
8170 modules to sample traits. It also includes a number of utility functions for
8171 data manipulation and visualization.")
8172 (license license:gpl2+)))
8173
8174 (define-public r-chipkernels
8175 (let ((commit "c9cfcacb626b1221094fb3490ea7bac0fd625372")
8176 (revision "1"))
8177 (package
8178 (name "r-chipkernels")
8179 (version (string-append "1.1-" revision "." (string-take commit 9)))
8180 (source
8181 (origin
8182 (method git-fetch)
8183 (uri (git-reference
8184 (url "https://github.com/ManuSetty/ChIPKernels.git")
8185 (commit commit)))
8186 (file-name (string-append name "-" version))
8187 (sha256
8188 (base32
8189 "14bj5qhjm1hsm9ay561nfbqi9wxsa7y487df2idsaaf6z10nw4v0"))))
8190 (build-system r-build-system)
8191 (propagated-inputs
8192 `(("r-iranges" ,r-iranges)
8193 ("r-xvector" ,r-xvector)
8194 ("r-biostrings" ,r-biostrings)
8195 ("r-bsgenome" ,r-bsgenome)
8196 ("r-gtools" ,r-gtools)
8197 ("r-genomicranges" ,r-genomicranges)
8198 ("r-sfsmisc" ,r-sfsmisc)
8199 ("r-kernlab" ,r-kernlab)
8200 ("r-s4vectors" ,r-s4vectors)
8201 ("r-biocgenerics" ,r-biocgenerics)))
8202 (home-page "https://github.com/ManuSetty/ChIPKernels")
8203 (synopsis "Build string kernels for DNA Sequence analysis")
8204 (description "ChIPKernels is an R package for building different string
8205 kernels used for DNA Sequence analysis. A dictionary of the desired kernel
8206 must be built and this dictionary can be used for determining kernels for DNA
8207 Sequences.")
8208 (license license:gpl2+))))
8209
8210 (define-public r-seqgl
8211 (package
8212 (name "r-seqgl")
8213 (version "1.1.4")
8214 (source
8215 (origin
8216 (method url-fetch)
8217 (uri (string-append "https://github.com/ManuSetty/SeqGL/"
8218 "archive/" version ".tar.gz"))
8219 (file-name (string-append name "-" version ".tar.gz"))
8220 (sha256
8221 (base32
8222 "0pnk1p3sci5yipyc8xnb6jbmydpl80fld927xgnbcv104hy8h8yh"))))
8223 (build-system r-build-system)
8224 (propagated-inputs
8225 `(("r-biostrings" ,r-biostrings)
8226 ("r-chipkernels" ,r-chipkernels)
8227 ("r-genomicranges" ,r-genomicranges)
8228 ("r-spams" ,r-spams)
8229 ("r-wgcna" ,r-wgcna)
8230 ("r-fastcluster" ,r-fastcluster)))
8231 (home-page "https://github.com/ManuSetty/SeqGL")
8232 (synopsis "Group lasso for Dnase/ChIP-seq data")
8233 (description "SeqGL is a group lasso based algorithm to extract
8234 transcription factor sequence signals from ChIP, DNase and ATAC-seq profiles.
8235 This package presents a method which uses group lasso to discriminate between
8236 bound and non bound genomic regions to accurately identify transcription
8237 factors bound at the specific regions.")
8238 (license license:gpl2+)))
8239
8240 (define-public r-gkmsvm
8241 (package
8242 (name "r-gkmsvm")
8243 (version "0.71.0")
8244 (source
8245 (origin
8246 (method url-fetch)
8247 (uri (cran-uri "gkmSVM" version))
8248 (sha256
8249 (base32
8250 "1zpxgxmf2nd5j5wn00ps6kfxr8wxh7d1swr1rr4spq7sj5z5z0k0"))))
8251 (properties `((upstream-name . "gkmSVM")))
8252 (build-system r-build-system)
8253 (propagated-inputs
8254 `(("r-biocgenerics" ,r-biocgenerics)
8255 ("r-biostrings" ,r-biostrings)
8256 ("r-genomeinfodb" ,r-genomeinfodb)
8257 ("r-genomicranges" ,r-genomicranges)
8258 ("r-iranges" ,r-iranges)
8259 ("r-kernlab" ,r-kernlab)
8260 ("r-rcpp" ,r-rcpp)
8261 ("r-rocr" ,r-rocr)
8262 ("r-rtracklayer" ,r-rtracklayer)
8263 ("r-s4vectors" ,r-s4vectors)
8264 ("r-seqinr" ,r-seqinr)))
8265 (home-page "http://cran.r-project.org/web/packages/gkmSVM")
8266 (synopsis "Gapped-kmer support vector machine")
8267 (description
8268 "This R package provides tools for training gapped-kmer SVM classifiers
8269 for DNA and protein sequences. This package supports several sequence
8270 kernels, including: gkmSVM, kmer-SVM, mismatch kernel and wildcard kernel.")
8271 (license license:gpl2+)))
8272
8273 (define-public r-tximport
8274 (package
8275 (name "r-tximport")
8276 (version "1.6.0")
8277 (source (origin
8278 (method url-fetch)
8279 (uri (bioconductor-uri "tximport" version))
8280 (sha256
8281 (base32
8282 "1gyqcm91hxg1kgjqcz2qw1n56yp9pymjzs50rwcpb2893dr8sp2h"))))
8283 (build-system r-build-system)
8284 (home-page "https://bioconductor.org/packages/tximport")
8285 (synopsis "Import and summarize transcript-level estimates for gene-level analysis")
8286 (description
8287 "This package provides tools to import transcript-level abundance,
8288 estimated counts and transcript lengths, and to summarize them into matrices
8289 for use with downstream gene-level analysis packages. Average transcript
8290 length, weighted by sample-specific transcript abundance estimates, is
8291 provided as a matrix which can be used as an offset for different expression
8292 of gene-level counts.")
8293 (license license:gpl2+)))
8294
8295 (define-public r-rhdf5
8296 (package
8297 (name "r-rhdf5")
8298 (version "2.22.0")
8299 (source (origin
8300 (method url-fetch)
8301 (uri (bioconductor-uri "rhdf5" version))
8302 (sha256
8303 (base32
8304 "145858qg1xan6imxcbprzq3yn3mdf532aahdr6cibvdjg47hs4c1"))))
8305 (build-system r-build-system)
8306 (arguments
8307 `(#:phases
8308 (modify-phases %standard-phases
8309 (add-after 'unpack 'unpack-smallhdf5
8310 (lambda* (#:key outputs #:allow-other-keys)
8311 (system* "tar" "-xzvf"
8312 "src/hdf5source/hdf5small.tgz" "-C" "src/" )
8313 (substitute* "src/Makevars"
8314 (("^.*cd hdf5source &&.*$") "")
8315 (("^.*gunzip -dc hdf5small.tgz.*$") "")
8316 (("^.*rm -rf hdf5.*$") "")
8317 (("^.*mv hdf5source/hdf5 ..*$") ""))
8318 (substitute* "src/hdf5/configure"
8319 (("/bin/mv") "mv"))
8320 #t)))))
8321 (propagated-inputs
8322 `(("r-zlibbioc" ,r-zlibbioc)))
8323 (inputs
8324 `(("perl" ,perl)
8325 ("zlib" ,zlib)))
8326 (home-page "https://bioconductor.org/packages/rhdf5")
8327 (synopsis "HDF5 interface to R")
8328 (description
8329 "This R/Bioconductor package provides an interface between HDF5 and R.
8330 HDF5's main features are the ability to store and access very large and/or
8331 complex datasets and a wide variety of metadata on mass storage (disk) through
8332 a completely portable file format. The rhdf5 package is thus suited for the
8333 exchange of large and/or complex datasets between R and other software
8334 package, and for letting R applications work on datasets that are larger than
8335 the available RAM.")
8336 (license license:artistic2.0)))
8337
8338 (define-public r-annotationfilter
8339 (package
8340 (name "r-annotationfilter")
8341 (version "1.2.0")
8342 (source (origin
8343 (method url-fetch)
8344 (uri (bioconductor-uri "AnnotationFilter" version))
8345 (sha256
8346 (base32
8347 "04zf864c1fvdlaay2r5cn30fc1n5i3czh31fs62qlrvs61wjiscs"))))
8348 (properties
8349 `((upstream-name . "AnnotationFilter")))
8350 (build-system r-build-system)
8351 (propagated-inputs
8352 `(("r-genomicranges" ,r-genomicranges)
8353 ("r-lazyeval" ,r-lazyeval)))
8354 (home-page "https://github.com/Bioconductor/AnnotationFilter")
8355 (synopsis "Facilities for filtering Bioconductor annotation resources")
8356 (description
8357 "This package provides classes and other infrastructure to implement
8358 filters for manipulating Bioconductor annotation resources. The filters are
8359 used by @code{ensembldb}, @code{Organism.dplyr}, and other packages.")
8360 (license license:artistic2.0)))
8361
8362 (define-public emboss
8363 (package
8364 (name "emboss")
8365 (version "6.5.7")
8366 (source (origin
8367 (method url-fetch)
8368 (uri (string-append "ftp://emboss.open-bio.org/pub/EMBOSS/old/"
8369 (version-major+minor version) ".0/"
8370 "EMBOSS-" version ".tar.gz"))
8371 (sha256
8372 (base32
8373 "0vsmz96gc411yj2iyzdrsmg4l2n1nhgmp7vrgzlxx3xixv9xbf0q"))))
8374 (build-system gnu-build-system)
8375 (arguments
8376 `(#:configure-flags
8377 (list (string-append "--with-hpdf="
8378 (assoc-ref %build-inputs "libharu")))
8379 #:phases
8380 (modify-phases %standard-phases
8381 (add-after 'unpack 'fix-checks
8382 (lambda _
8383 ;; The PNGDRIVER tests check for the presence of libgd, libpng
8384 ;; and zlib, but assume that they are all found at the same
8385 ;; prefix.
8386 (substitute* "configure.in"
8387 (("CHECK_PNGDRIVER")
8388 "LIBS=\"$LIBS -lgd -lpng -lz -lm\"
8389 AC_DEFINE([PLD_png], [1], [Define to 1 if PNG support is available])
8390 AM_CONDITIONAL(AMPNG, true)"))
8391 #t))
8392 (add-after 'fix-checks 'disable-update-check
8393 (lambda _
8394 ;; At build time there is no connection to the Internet, so
8395 ;; looking for updates will not work.
8396 (substitute* "Makefile.am"
8397 (("\\$\\(bindir\\)/embossupdate") ""))
8398 #t))
8399 (add-after 'disable-update-check 'autogen
8400 (lambda _ (zero? (system* "autoreconf" "-vif")))))))
8401 (inputs
8402 `(("perl" ,perl)
8403 ("libpng" ,libpng)
8404 ("gd" ,gd)
8405 ("libx11" ,libx11)
8406 ("libharu" ,libharu)
8407 ("zlib" ,zlib)))
8408 (native-inputs
8409 `(("autoconf" ,autoconf)
8410 ("automake" ,automake)
8411 ("libtool" ,libtool)
8412 ("pkg-config" ,pkg-config)))
8413 (home-page "http://emboss.sourceforge.net")
8414 (synopsis "Molecular biology analysis suite")
8415 (description "EMBOSS is the \"European Molecular Biology Open Software
8416 Suite\". EMBOSS is an analysis package specially developed for the needs of
8417 the molecular biology (e.g. EMBnet) user community. The software
8418 automatically copes with data in a variety of formats and even allows
8419 transparent retrieval of sequence data from the web. It also provides a
8420 number of libraries for the development of software in the field of molecular
8421 biology. EMBOSS also integrates a range of currently available packages and
8422 tools for sequence analysis into a seamless whole.")
8423 (license license:gpl2+)))
8424
8425 (define-public bits
8426 (let ((revision "1")
8427 (commit "3cc4567896d9d6442923da944beb704750a08d2d"))
8428 (package
8429 (name "bits")
8430 ;; The version is 2.13.0 even though no release archives have been
8431 ;; published as yet.
8432 (version (string-append "2.13.0-" revision "." (string-take commit 9)))
8433 (source (origin
8434 (method git-fetch)
8435 (uri (git-reference
8436 (url "https://github.com/arq5x/bits.git")
8437 (commit commit)))
8438 (file-name (string-append name "-" version "-checkout"))
8439 (sha256
8440 (base32
8441 "17n2kffk4kmhivd8c98g2vr6y1s23vbg4sxlxs689wni66797hbs"))))
8442 (build-system gnu-build-system)
8443 (arguments
8444 `(#:tests? #f ;no tests included
8445 #:phases
8446 (modify-phases %standard-phases
8447 (delete 'configure)
8448 (add-after 'unpack 'remove-cuda
8449 (lambda _
8450 (substitute* "Makefile"
8451 ((".*_cuda") "")
8452 (("(bits_test_intersections) \\\\" _ match) match))
8453 #t))
8454 (replace 'install
8455 (lambda* (#:key outputs #:allow-other-keys)
8456 (copy-recursively
8457 "bin" (string-append (assoc-ref outputs "out") "/bin"))
8458 #t)))))
8459 (inputs
8460 `(("gsl" ,gsl)
8461 ("zlib" ,zlib)))
8462 (home-page "https://github.com/arq5x/bits")
8463 (synopsis "Implementation of binary interval search algorithm")
8464 (description "This package provides an implementation of the
8465 BITS (Binary Interval Search) algorithm, an approach to interval set
8466 intersection. It is especially suited for the comparison of diverse genomic
8467 datasets and the exploration of large datasets of genome
8468 intervals (e.g. genes, sequence alignments).")
8469 (license license:gpl2))))
8470
8471 (define-public piranha
8472 ;; There is no release tarball for the latest version. The latest commit is
8473 ;; older than one year at the time of this writing.
8474 (let ((revision "1")
8475 (commit "0466d364b71117d01e4471b74c514436cc281233"))
8476 (package
8477 (name "piranha")
8478 (version (string-append "1.2.1-" revision "." (string-take commit 9)))
8479 (source (origin
8480 (method git-fetch)
8481 (uri (git-reference
8482 (url "https://github.com/smithlabcode/piranha.git")
8483 (commit commit)))
8484 (sha256
8485 (base32
8486 "117dc0zf20c61jam69sk4abl57ah6yi6i7qra7d7y5zrbgk12q5n"))))
8487 (build-system gnu-build-system)
8488 (arguments
8489 `(#:test-target "test"
8490 #:phases
8491 (modify-phases %standard-phases
8492 (add-after 'unpack 'copy-smithlab-cpp
8493 (lambda* (#:key inputs #:allow-other-keys)
8494 (for-each (lambda (file)
8495 (install-file file "./src/smithlab_cpp/"))
8496 (find-files (assoc-ref inputs "smithlab-cpp")))
8497 #t))
8498 (add-after 'install 'install-to-store
8499 (lambda* (#:key outputs #:allow-other-keys)
8500 (let* ((out (assoc-ref outputs "out"))
8501 (bin (string-append out "/bin")))
8502 (for-each (lambda (file)
8503 (install-file file bin))
8504 (find-files "bin" ".*")))
8505 #t)))
8506 #:configure-flags
8507 (list (string-append "--with-bam_tools_headers="
8508 (assoc-ref %build-inputs "bamtools") "/include/bamtools")
8509 (string-append "--with-bam_tools_library="
8510 (assoc-ref %build-inputs "bamtools") "/lib/bamtools"))))
8511 (inputs
8512 `(("bamtools" ,bamtools)
8513 ("samtools" ,samtools-0.1)
8514 ("gsl" ,gsl)
8515 ("smithlab-cpp"
8516 ,(let ((commit "3723e2db438c51501d0423429ff396c3035ba46a"))
8517 (origin
8518 (method git-fetch)
8519 (uri (git-reference
8520 (url "https://github.com/smithlabcode/smithlab_cpp.git")
8521 (commit commit)))
8522 (file-name (string-append "smithlab_cpp-" commit "-checkout"))
8523 (sha256
8524 (base32
8525 "0l4gvbwslw5ngziskja41c00x1r06l3yidv7y0xw9djibhykzy0g")))))))
8526 (native-inputs
8527 `(("python" ,python-2)))
8528 (home-page "https://github.com/smithlabcode/piranha")
8529 (synopsis "Peak-caller for CLIP-seq and RIP-seq data")
8530 (description
8531 "Piranha is a peak-caller for genomic data produced by CLIP-seq and
8532 RIP-seq experiments. It takes input in BED or BAM format and identifies
8533 regions of statistically significant read enrichment. Additional covariates
8534 may optionally be provided to further inform the peak-calling process.")
8535 (license license:gpl3+))))
8536
8537 (define-public pepr
8538 (package
8539 (name "pepr")
8540 (version "1.0.9")
8541 (source (origin
8542 (method url-fetch)
8543 (uri (string-append "https://pypi.python.org/packages/source/P"
8544 "/PePr/PePr-" version ".tar.gz"))
8545 (sha256
8546 (base32
8547 "0qxjfdpl1b1y53nccws2d85f6k74zwmx8y8sd9rszcqhfayx6gdx"))))
8548 (build-system python-build-system)
8549 (arguments
8550 `(#:python ,python-2 ; python2 only
8551 #:tests? #f)) ; no tests included
8552 (propagated-inputs
8553 `(("python2-numpy" ,python2-numpy)
8554 ("python2-scipy" ,python2-scipy)
8555 ("python2-pysam" ,python2-pysam)))
8556 (home-page "https://github.com/shawnzhangyx/PePr")
8557 (synopsis "Peak-calling and prioritization pipeline for ChIP-Seq data")
8558 (description
8559 "PePr is a ChIP-Seq peak calling or differential binding analysis tool
8560 that is primarily designed for data with biological replicates. It uses a
8561 negative binomial distribution to model the read counts among the samples in
8562 the same group, and look for consistent differences between ChIP and control
8563 group or two ChIP groups run under different conditions.")
8564 (license license:gpl3+)))
8565
8566 (define-public filevercmp
8567 (let ((commit "1a9b779b93d0b244040274794d402106907b71b7"))
8568 (package
8569 (name "filevercmp")
8570 (version (string-append "0-1." (string-take commit 7)))
8571 (source (origin
8572 (method url-fetch)
8573 (uri (string-append "https://github.com/ekg/filevercmp/archive/"
8574 commit ".tar.gz"))
8575 (file-name (string-append name "-" version ".tar.gz"))
8576 (sha256
8577 (base32 "0yp5jswf5j2pqc6517x277s4s6h1ss99v57kxw9gy0jkfl3yh450"))))
8578 (build-system gnu-build-system)
8579 (arguments
8580 `(#:tests? #f ; There are no tests to run.
8581 #:phases
8582 (modify-phases %standard-phases
8583 (delete 'configure) ; There is no configure phase.
8584 (replace 'install
8585 (lambda* (#:key outputs #:allow-other-keys)
8586 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
8587 (install-file "filevercmp" bin)))))))
8588 (home-page "https://github.com/ekg/filevercmp")
8589 (synopsis "This program compares version strings")
8590 (description "This program compares version strings. It intends to be a
8591 replacement for strverscmp.")
8592 (license license:gpl3+))))
8593
8594 (define-public multiqc
8595 (package
8596 (name "multiqc")
8597 (version "1.3")
8598 (source
8599 (origin
8600 (method url-fetch)
8601 (uri (pypi-uri "multiqc" version))
8602 (sha256
8603 (base32
8604 "0fx1sx53znbgzfhbbiyd8j6cg5llpcsl5q5c45jy2c81d12piqfd"))))
8605 (build-system python-build-system)
8606 (propagated-inputs
8607 `(("python-enum34" ,python-enum34)
8608 ("python-jinja2" ,python-jinja2)
8609 ("python-simplejson" ,python-simplejson)
8610 ("python-pyyaml" ,python-pyyaml)
8611 ("python-click" ,python-click)
8612 ("python-spectra" ,python-spectra)
8613 ("python-requests" ,python-requests)
8614 ("python-markdown" ,python-markdown)
8615 ("python-lzstring" ,python-lzstring)
8616 ("python-matplotlib" ,python-matplotlib)
8617 ("python-numpy" ,python-numpy)
8618 ;; MultQC checks for the presence of nose at runtime.
8619 ("python-nose" ,python-nose)))
8620 (home-page "http://multiqc.info")
8621 (synopsis "Aggregate bioinformatics analysis reports")
8622 (description
8623 "MultiQC is a tool to aggregate bioinformatics results across many
8624 samples into a single report. It contains modules for a large number of
8625 common bioinformatics tools.")
8626 (license license:gpl3+)))
8627
8628 (define-public r-chipseq
8629 (package
8630 (name "r-chipseq")
8631 (version "1.28.0")
8632 (source
8633 (origin
8634 (method url-fetch)
8635 (uri (bioconductor-uri "chipseq" version))
8636 (sha256
8637 (base32
8638 "1ymcq77krwjzrkzzcw7i9909cmkqa7c0675z9wzvrrk81hgdssfq"))))
8639 (build-system r-build-system)
8640 (propagated-inputs
8641 `(("r-biocgenerics" ,r-biocgenerics)
8642 ("r-genomicranges" ,r-genomicranges)
8643 ("r-iranges" ,r-iranges)
8644 ("r-lattice" ,r-lattice)
8645 ("r-s4vectors" ,r-s4vectors)
8646 ("r-shortread" ,r-shortread)))
8647 (home-page "https://bioconductor.org/packages/chipseq")
8648 (synopsis "Package for analyzing ChIPseq data")
8649 (description
8650 "This package provides tools for processing short read data from ChIPseq
8651 experiments.")
8652 (license license:artistic2.0)))
8653
8654 (define-public r-copyhelper
8655 (package
8656 (name "r-copyhelper")
8657 (version "1.6.0")
8658 (source
8659 (origin
8660 (method url-fetch)
8661 (uri (string-append "https://bioconductor.org/packages/release/"
8662 "data/experiment/src/contrib/CopyhelpeR_"
8663 version ".tar.gz"))
8664 (sha256
8665 (base32
8666 "0x7cyynjmxls9as2gg0iyp9x5fpalxmdjq914ss7i84i9zyk5bhq"))))
8667 (properties `((upstream-name . "CopyhelpeR")))
8668 (build-system r-build-system)
8669 (home-page "https://bioconductor.org/packages/CopyhelpeR/")
8670 (synopsis "Helper files for CopywriteR")
8671 (description
8672 "This package contains the helper files that are required to run the
8673 Bioconductor package CopywriteR. It contains pre-assembled 1kb bin GC-content
8674 and mappability files for the reference genomes hg18, hg19, hg38, mm9 and
8675 mm10. In addition, it contains a blacklist filter to remove regions that
8676 display copy number variation. Files are stored as GRanges objects from the
8677 GenomicRanges Bioconductor package.")
8678 (license license:gpl2)))
8679
8680 (define-public r-copywriter
8681 (package
8682 (name "r-copywriter")
8683 (version "2.10.0")
8684 (source
8685 (origin
8686 (method url-fetch)
8687 (uri (bioconductor-uri "CopywriteR" version))
8688 (sha256
8689 (base32
8690 "17fy2lc5yf3nh6v077kv87h53n263hqz2540lzrl0vjiqrl2plca"))))
8691 (properties `((upstream-name . "CopywriteR")))
8692 (build-system r-build-system)
8693 (propagated-inputs
8694 `(("r-biocparallel" ,r-biocparallel)
8695 ("r-chipseq" ,r-chipseq)
8696 ("r-copyhelper" ,r-copyhelper)
8697 ("r-data-table" ,r-data-table)
8698 ("r-dnacopy" ,r-dnacopy)
8699 ("r-futile-logger" ,r-futile-logger)
8700 ("r-genomeinfodb" ,r-genomeinfodb)
8701 ("r-genomicalignments" ,r-genomicalignments)
8702 ("r-genomicranges" ,r-genomicranges)
8703 ("r-gtools" ,r-gtools)
8704 ("r-iranges" ,r-iranges)
8705 ("r-matrixstats" ,r-matrixstats)
8706 ("r-rsamtools" ,r-rsamtools)
8707 ("r-s4vectors" ,r-s4vectors)))
8708 (home-page "https://github.com/PeeperLab/CopywriteR")
8709 (synopsis "Copy number information from targeted sequencing")
8710 (description
8711 "CopywriteR extracts DNA copy number information from targeted sequencing
8712 by utilizing off-target reads. It allows for extracting uniformly distributed
8713 copy number information, can be used without reference, and can be applied to
8714 sequencing data obtained from various techniques including chromatin
8715 immunoprecipitation and target enrichment on small gene panels. Thereby,
8716 CopywriteR constitutes a widely applicable alternative to available copy
8717 number detection tools.")
8718 (license license:gpl2)))
8719
8720 (define-public r-methylkit
8721 (package
8722 (name "r-methylkit")
8723 (version "1.4.0")
8724 (source (origin
8725 (method url-fetch)
8726 (uri (bioconductor-uri "methylKit" version))
8727 (sha256
8728 (base32
8729 "0h53w2mrjrg2n0ndi12k9j6cwclgwcgpy25nz7nyj971aisw02xn"))))
8730 (properties `((upstream-name . "methylKit")))
8731 (build-system r-build-system)
8732 (propagated-inputs
8733 `(("r-data-table" ,r-data-table)
8734 ("r-emdbook" ,r-emdbook)
8735 ("r-fastseg" ,r-fastseg)
8736 ("r-genomeinfodb" ,r-genomeinfodb)
8737 ("r-genomicranges" ,r-genomicranges)
8738 ("r-gtools" ,r-gtools)
8739 ("r-iranges" ,r-iranges)
8740 ("r-kernsmooth" ,r-kernsmooth)
8741 ("r-limma" ,r-limma)
8742 ("r-mclust" ,r-mclust)
8743 ("r-qvalue" ,r-qvalue)
8744 ("r-r-utils" ,r-r-utils)
8745 ("r-rcpp" ,r-rcpp)
8746 ("r-rhtslib" ,r-rhtslib)
8747 ("r-rsamtools" ,r-rsamtools)
8748 ("r-rtracklayer" ,r-rtracklayer)
8749 ("r-s4vectors" ,r-s4vectors)
8750 ("r-zlibbioc" ,r-zlibbioc)))
8751 (inputs
8752 `(("zlib" ,zlib)))
8753 (home-page "https://github.com/al2na/methylKit")
8754 (synopsis
8755 "DNA methylation analysis from high-throughput bisulfite sequencing results")
8756 (description
8757 "MethylKit is an R package for DNA methylation analysis and annotation
8758 from high-throughput bisulfite sequencing. The package is designed to deal
8759 with sequencing data from @dfn{Reduced representation bisulfite
8760 sequencing} (RRBS) and its variants, but also target-capture methods and whole
8761 genome bisulfite sequencing. It also has functions to analyze base-pair
8762 resolution 5hmC data from experimental protocols such as oxBS-Seq and
8763 TAB-Seq.")
8764 (license license:artistic2.0)))
8765
8766 (define-public r-sva
8767 (package
8768 (name "r-sva")
8769 (version "3.26.0")
8770 (source
8771 (origin
8772 (method url-fetch)
8773 (uri (bioconductor-uri "sva" version))
8774 (sha256
8775 (base32
8776 "0q5xb68wfcnchy8rkv5ma67pmz1i91lsnvmwmj8f1c3w4xan3pgw"))))
8777 (build-system r-build-system)
8778 (propagated-inputs
8779 `(("r-genefilter" ,r-genefilter)
8780 ("r-mgcv" ,r-mgcv)
8781 ("r-biocparallel" ,r-biocparallel)
8782 ("r-matrixstats" ,r-matrixstats)
8783 ("r-limma" ,r-limma)))
8784 (home-page "https://bioconductor.org/packages/sva")
8785 (synopsis "Surrogate variable analysis")
8786 (description
8787 "This package contains functions for removing batch effects and other
8788 unwanted variation in high-throughput experiment. It also contains functions
8789 for identifying and building surrogate variables for high-dimensional data
8790 sets. Surrogate variables are covariates constructed directly from
8791 high-dimensional data like gene expression/RNA sequencing/methylation/brain
8792 imaging data that can be used in subsequent analyses to adjust for unknown,
8793 unmodeled, or latent sources of noise.")
8794 (license license:artistic2.0)))
8795
8796 (define-public r-seqminer
8797 (package
8798 (name "r-seqminer")
8799 (version "6.0")
8800 (source
8801 (origin
8802 (method url-fetch)
8803 (uri (cran-uri "seqminer" version))
8804 (sha256
8805 (base32
8806 "057j1l6dip35l1aivilapl2zv9db677b3di2pb3sfgq2sxg0ps3l"))))
8807 (build-system r-build-system)
8808 (inputs
8809 `(("zlib" ,zlib)))
8810 (home-page "http://seqminer.genomic.codes")
8811 (synopsis "Read nucleotide sequence data (VCF, BCF, and METAL formats)")
8812 (description
8813 "This package provides tools to integrate nucleotide sequencing
8814 data (variant call format, e.g. VCF or BCF) or meta-analysis results in R.")
8815 ;; Any version of the GPL is acceptable
8816 (license (list license:gpl2+ license:gpl3+))))
8817
8818 (define-public r-raremetals2
8819 (package
8820 (name "r-raremetals2")
8821 (version "0.1")
8822 (source
8823 (origin
8824 (method url-fetch)
8825 (uri (string-append "http://genome.sph.umich.edu/w/images/"
8826 "b/b7/RareMETALS2_" version ".tar.gz"))
8827 (sha256
8828 (base32
8829 "0z5ljcgvnm06ja9lm85a3cniq7slxcy37aqqkxrdidr79an5fs4s"))))
8830 (properties `((upstream-name . "RareMETALS2")))
8831 (build-system r-build-system)
8832 (propagated-inputs
8833 `(("r-seqminer" ,r-seqminer)
8834 ("r-mvtnorm" ,r-mvtnorm)
8835 ("r-mass" ,r-mass)
8836 ("r-compquadform" ,r-compquadform)
8837 ("r-getopt" ,r-getopt)))
8838 (home-page "http://genome.sph.umich.edu/wiki/RareMETALS2")
8839 (synopsis "Analyze gene-level association tests for binary trait")
8840 (description
8841 "The R package rareMETALS2 is an extension of the R package rareMETALS.
8842 It was designed to meta-analyze gene-level association tests for binary trait.
8843 While rareMETALS offers a near-complete solution for meta-analysis of
8844 gene-level tests for quantitative trait, it does not offer the optimal
8845 solution for binary trait. The package rareMETALS2 offers improved features
8846 for analyzing gene-level association tests in meta-analyses for binary
8847 trait.")
8848 (license license:gpl3)))
8849
8850 (define-public r-maldiquant
8851 (package
8852 (name "r-maldiquant")
8853 (version "1.16.4")
8854 (source
8855 (origin
8856 (method url-fetch)
8857 (uri (cran-uri "MALDIquant" version))
8858 (sha256
8859 (base32
8860 "1pmhsfvd45a44xdiml4zx3zd5fhygqyziqvygahkk9yibnyhv4cv"))))
8861 (properties `((upstream-name . "MALDIquant")))
8862 (build-system r-build-system)
8863 (home-page "http://cran.r-project.org/web/packages/MALDIquant")
8864 (synopsis "Quantitative analysis of mass spectrometry data")
8865 (description
8866 "This package provides a complete analysis pipeline for matrix-assisted
8867 laser desorption/ionization-time-of-flight (MALDI-TOF) and other
8868 two-dimensional mass spectrometry data. In addition to commonly used plotting
8869 and processing methods it includes distinctive features, namely baseline
8870 subtraction methods such as morphological filters (TopHat) or the
8871 statistics-sensitive non-linear iterative peak-clipping algorithm (SNIP), peak
8872 alignment using warping functions, handling of replicated measurements as well
8873 as allowing spectra with different resolutions.")
8874 (license license:gpl3+)))
8875
8876 (define-public r-protgenerics
8877 (package
8878 (name "r-protgenerics")
8879 (version "1.10.0")
8880 (source
8881 (origin
8882 (method url-fetch)
8883 (uri (bioconductor-uri "ProtGenerics" version))
8884 (sha256
8885 (base32
8886 "16ijp50448wnabp43klx943rhdvh7x45hvy7cnpq1s4dckxhhyni"))))
8887 (properties `((upstream-name . "ProtGenerics")))
8888 (build-system r-build-system)
8889 (home-page "https://github.com/lgatto/ProtGenerics")
8890 (synopsis "S4 generic functions for proteomics infrastructure")
8891 (description
8892 "This package provides S4 generic functions needed by Bioconductor
8893 proteomics packages.")
8894 (license license:artistic2.0)))
8895
8896 (define-public r-mzr
8897 (package
8898 (name "r-mzr")
8899 (version "2.12.0")
8900 (source
8901 (origin
8902 (method url-fetch)
8903 (uri (bioconductor-uri "mzR" version))
8904 (sha256
8905 (base32
8906 "1x3gp30sfxz2v3k3swih9kff9b2rvk7hzhnlkp6ywlnn2wgb0q8c"))
8907 (modules '((guix build utils)))
8908 (snippet
8909 '(begin
8910 (delete-file-recursively "src/boost")
8911 #t))))
8912 (properties `((upstream-name . "mzR")))
8913 (build-system r-build-system)
8914 (arguments
8915 `(#:phases
8916 (modify-phases %standard-phases
8917 (add-after 'unpack 'use-system-boost
8918 (lambda _
8919 (substitute* "src/Makevars"
8920 (("\\./boost/libs.*") "")
8921 (("ARCH_OBJS=" line)
8922 (string-append line
8923 "\nARCH_LIBS=-lboost_system -lboost_regex \
8924 -lboost_iostreams -lboost_thread -lboost_filesystem -lboost_chrono\n")))
8925 #t)))))
8926 (inputs
8927 `(("boost" ,boost) ; use this instead of the bundled boost sources
8928 ("netcdf" ,netcdf)))
8929 (propagated-inputs
8930 `(("r-biobase" ,r-biobase)
8931 ("r-biocgenerics" ,r-biocgenerics)
8932 ("r-protgenerics" ,r-protgenerics)
8933 ("r-rcpp" ,r-rcpp)
8934 ("r-zlibbioc" ,r-zlibbioc)))
8935 (home-page "https://github.com/sneumann/mzR/")
8936 (synopsis "Parser for mass spectrometry data files")
8937 (description
8938 "The mzR package provides a unified API to the common file formats and
8939 parsers available for mass spectrometry data. It comes with a wrapper for the
8940 ISB random access parser for mass spectrometry mzXML, mzData and mzML files.
8941 The package contains the original code written by the ISB, and a subset of the
8942 proteowizard library for mzML and mzIdentML. The netCDF reading code has
8943 previously been used in XCMS.")
8944 (license license:artistic2.0)))
8945
8946 (define-public r-affyio
8947 (package
8948 (name "r-affyio")
8949 (version "1.48.0")
8950 (source
8951 (origin
8952 (method url-fetch)
8953 (uri (bioconductor-uri "affyio" version))
8954 (sha256
8955 (base32
8956 "1pzzp3d3dbmyf34gvivfiprkpscn36rgvhrq853a1d3avcwr5ak9"))))
8957 (build-system r-build-system)
8958 (propagated-inputs
8959 `(("r-zlibbioc" ,r-zlibbioc)))
8960 (inputs
8961 `(("zlib" ,zlib)))
8962 (home-page "https://github.com/bmbolstad/affyio")
8963 (synopsis "Tools for parsing Affymetrix data files")
8964 (description
8965 "This package provides routines for parsing Affymetrix data files based
8966 upon file format information. The primary focus is on accessing the CEL and
8967 CDF file formats.")
8968 (license license:lgpl2.0+)))
8969
8970 (define-public r-affy
8971 (package
8972 (name "r-affy")
8973 (version "1.56.0")
8974 (source
8975 (origin
8976 (method url-fetch)
8977 (uri (bioconductor-uri "affy" version))
8978 (sha256
8979 (base32
8980 "0jmbkimma5ffsdkk3xp03g4lpz84gd95nkqakif2nqq6wmx0syrj"))))
8981 (build-system r-build-system)
8982 (propagated-inputs
8983 `(("r-affyio" ,r-affyio)
8984 ("r-biobase" ,r-biobase)
8985 ("r-biocgenerics" ,r-biocgenerics)
8986 ("r-biocinstaller" ,r-biocinstaller)
8987 ("r-preprocesscore" ,r-preprocesscore)
8988 ("r-zlibbioc" ,r-zlibbioc)))
8989 (home-page "https://bioconductor.org/packages/affy")
8990 (synopsis "Methods for affymetrix oligonucleotide arrays")
8991 (description
8992 "This package contains functions for exploratory oligonucleotide array
8993 analysis.")
8994 (license license:lgpl2.0+)))
8995
8996 (define-public r-vsn
8997 (package
8998 (name "r-vsn")
8999 (version "3.46.0")
9000 (source
9001 (origin
9002 (method url-fetch)
9003 (uri (bioconductor-uri "vsn" version))
9004 (sha256
9005 (base32
9006 "18y62phzirj75gg6v5l41jwybmk23ia6w7qhch0kxc4bl2rysw6j"))))
9007 (build-system r-build-system)
9008 (propagated-inputs
9009 `(("r-affy" ,r-affy)
9010 ("r-biobase" ,r-biobase)
9011 ("r-ggplot2" ,r-ggplot2)
9012 ("r-lattice" ,r-lattice)
9013 ("r-limma" ,r-limma)))
9014 (home-page "https://bioconductor.org/packages/release/bioc/html/vsn.html")
9015 (synopsis "Variance stabilization and calibration for microarray data")
9016 (description
9017 "The package implements a method for normalising microarray intensities,
9018 and works for single- and multiple-color arrays. It can also be used for data
9019 from other technologies, as long as they have similar format. The method uses
9020 a robust variant of the maximum-likelihood estimator for an
9021 additive-multiplicative error model and affine calibration. The model
9022 incorporates data calibration step (a.k.a. normalization), a model for the
9023 dependence of the variance on the mean intensity and a variance stabilizing
9024 data transformation. Differences between transformed intensities are
9025 analogous to \"normalized log-ratios\". However, in contrast to the latter,
9026 their variance is independent of the mean, and they are usually more sensitive
9027 and specific in detecting differential transcription.")
9028 (license license:artistic2.0)))
9029
9030 (define-public r-mzid
9031 (package
9032 (name "r-mzid")
9033 (version "1.16.0")
9034 (source
9035 (origin
9036 (method url-fetch)
9037 (uri (bioconductor-uri "mzID" version))
9038 (sha256
9039 (base32
9040 "0yk70dka56zd8w62f03ggx3mandj91gfa767h9ajj0sd3mjmfqb9"))))
9041 (properties `((upstream-name . "mzID")))
9042 (build-system r-build-system)
9043 (propagated-inputs
9044 `(("r-doparallel" ,r-doparallel)
9045 ("r-foreach" ,r-foreach)
9046 ("r-iterators" ,r-iterators)
9047 ("r-plyr" ,r-plyr)
9048 ("r-protgenerics" ,r-protgenerics)
9049 ("r-rcpp" ,r-rcpp)
9050 ("r-xml" ,r-xml)))
9051 (home-page "https://bioconductor.org/packages/mzID")
9052 (synopsis "Parser for mzIdentML files")
9053 (description
9054 "This package provides a parser for mzIdentML files implemented using the
9055 XML package. The parser tries to be general and able to handle all types of
9056 mzIdentML files with the drawback of having less pretty output than a vendor
9057 specific parser.")
9058 (license license:gpl2+)))
9059
9060 (define-public r-pcamethods
9061 (package
9062 (name "r-pcamethods")
9063 (version "1.70.0")
9064 (source
9065 (origin
9066 (method url-fetch)
9067 (uri (bioconductor-uri "pcaMethods" version))
9068 (sha256
9069 (base32
9070 "0ii235g0x0492kh8cfrf28ni0b6vd6fh7kizkqmczzqggd6b1bk8"))))
9071 (properties `((upstream-name . "pcaMethods")))
9072 (build-system r-build-system)
9073 (propagated-inputs
9074 `(("r-biobase" ,r-biobase)
9075 ("r-biocgenerics" ,r-biocgenerics)
9076 ("r-mass" ,r-mass)
9077 ("r-rcpp" ,r-rcpp)))
9078 (home-page "https://github.com/hredestig/pcamethods")
9079 (synopsis "Collection of PCA methods")
9080 (description
9081 "This package provides Bayesian PCA, Probabilistic PCA, Nipals PCA,
9082 Inverse Non-Linear PCA and the conventional SVD PCA. A cluster based method
9083 for missing value estimation is included for comparison. BPCA, PPCA and
9084 NipalsPCA may be used to perform PCA on incomplete data as well as for
9085 accurate missing value estimation. A set of methods for printing and plotting
9086 the results is also provided. All PCA methods make use of the same data
9087 structure (pcaRes) to provide a common interface to the PCA results.")
9088 (license license:gpl3+)))
9089
9090 (define-public r-msnbase
9091 (package
9092 (name "r-msnbase")
9093 (version "2.4.0")
9094 (source
9095 (origin
9096 (method url-fetch)
9097 (uri (bioconductor-uri "MSnbase" version))
9098 (sha256
9099 (base32
9100 "0dqfimljhrx3gac8d1k72gppx27lz8yckyb12v4604nbviw7xd3r"))))
9101 (properties `((upstream-name . "MSnbase")))
9102 (build-system r-build-system)
9103 (propagated-inputs
9104 `(("r-affy" ,r-affy)
9105 ("r-biobase" ,r-biobase)
9106 ("r-biocgenerics" ,r-biocgenerics)
9107 ("r-biocparallel" ,r-biocparallel)
9108 ("r-digest" ,r-digest)
9109 ("r-ggplot2" ,r-ggplot2)
9110 ("r-impute" ,r-impute)
9111 ("r-iranges" ,r-iranges)
9112 ("r-lattice" ,r-lattice)
9113 ("r-maldiquant" ,r-maldiquant)
9114 ("r-mzid" ,r-mzid)
9115 ("r-mzr" ,r-mzr)
9116 ("r-pcamethods" ,r-pcamethods)
9117 ("r-plyr" ,r-plyr)
9118 ("r-preprocesscore" ,r-preprocesscore)
9119 ("r-protgenerics" ,r-protgenerics)
9120 ("r-rcpp" ,r-rcpp)
9121 ("r-s4vectors" ,r-s4vectors)
9122 ("r-vsn" ,r-vsn)
9123 ("r-xml" ,r-xml)))
9124 (home-page "https://github.com/lgatto/MSnbase")
9125 (synopsis "Base functions and classes for MS-based proteomics")
9126 (description
9127 "This package provides basic plotting, data manipulation and processing
9128 of mass spectrometry based proteomics data.")
9129 (license license:artistic2.0)))
9130
9131 (define-public r-msnid
9132 (package
9133 (name "r-msnid")
9134 (version "1.11.0")
9135 (source
9136 (origin
9137 (method url-fetch)
9138 (uri (bioconductor-uri "MSnID" version))
9139 (sha256
9140 (base32
9141 "1vi4ngwbayrv2jkfb4pbmdp37xn04y07rh1jcklqfh0fcrm1jdig"))))
9142 (properties `((upstream-name . "MSnID")))
9143 (build-system r-build-system)
9144 (propagated-inputs
9145 `(("r-biobase" ,r-biobase)
9146 ("r-data-table" ,r-data-table)
9147 ("r-doparallel" ,r-doparallel)
9148 ("r-dplyr" ,r-dplyr)
9149 ("r-foreach" ,r-foreach)
9150 ("r-iterators" ,r-iterators)
9151 ("r-msnbase" ,r-msnbase)
9152 ("r-mzid" ,r-mzid)
9153 ("r-mzr" ,r-mzr)
9154 ("r-protgenerics" ,r-protgenerics)
9155 ("r-r-cache" ,r-r-cache)
9156 ("r-rcpp" ,r-rcpp)
9157 ("r-reshape2" ,r-reshape2)))
9158 (home-page "https://bioconductor.org/packages/MSnID")
9159 (synopsis "Utilities for LC-MSn proteomics identifications")
9160 (description
9161 "This package extracts @dfn{tandem mass spectrometry} (MS/MS) ID data
9162 from mzIdentML (leveraging the mzID package) or text files. After collating
9163 the search results from multiple datasets it assesses their identification
9164 quality and optimize filtering criteria to achieve the maximum number of
9165 identifications while not exceeding a specified false discovery rate. It also
9166 contains a number of utilities to explore the MS/MS results and assess missed
9167 and irregular enzymatic cleavages, mass measurement accuracy, etc.")
9168 (license license:artistic2.0)))
9169
9170 (define-public r-seurat
9171 ;; Source releases are only made for new x.0 versions. All newer versions
9172 ;; are only released as pre-built binaries. At the time of this writing the
9173 ;; latest binary release is 1.4.0.12, which is equivalent to this commit.
9174 (let ((commit "fccb77d1452c35ee47e47ebf8e87bddb59f3b08d")
9175 (revision "1"))
9176 (package
9177 (name "r-seurat")
9178 (version (string-append "1.4.0.12-" revision "." (string-take commit 7)))
9179 (source (origin
9180 (method git-fetch)
9181 (uri (git-reference
9182 (url "https://github.com/satijalab/seurat")
9183 (commit commit)))
9184 (file-name (string-append name "-" version "-checkout"))
9185 (sha256
9186 (base32
9187 "101wq3aqrdmbfi3lqmq4iivk9iwbf10d4z216ss25hf7n9091cyl"))
9188 ;; Delete pre-built jar.
9189 (snippet
9190 '(begin (delete-file "inst/java/ModularityOptimizer.jar")
9191 #t))))
9192 (build-system r-build-system)
9193 (arguments
9194 `(#:phases
9195 (modify-phases %standard-phases
9196 (add-after 'unpack 'build-jar
9197 (lambda* (#:key inputs #:allow-other-keys)
9198 (let ((classesdir "tmp-classes"))
9199 (setenv "JAVA_HOME" (assoc-ref inputs "jdk"))
9200 (mkdir classesdir)
9201 (and (zero? (apply system* `("javac" "-d" ,classesdir
9202 ,@(find-files "java" "\\.java$"))))
9203 (zero? (system* "jar"
9204 "-cf" "inst/java/ModularityOptimizer.jar"
9205 "-C" classesdir ".")))))))))
9206 (native-inputs
9207 `(("jdk" ,icedtea "jdk")))
9208 (propagated-inputs
9209 `(("r-ape" ,r-ape)
9210 ("r-caret" ,r-caret)
9211 ("r-cowplot" ,r-cowplot)
9212 ("r-dplyr" ,r-dplyr)
9213 ("r-fastica" ,r-fastica)
9214 ("r-fnn" ,r-fnn)
9215 ("r-fpc" ,r-fpc)
9216 ("r-gdata" ,r-gdata)
9217 ("r-ggplot2" ,r-ggplot2)
9218 ("r-gplots" ,r-gplots)
9219 ("r-gridextra" ,r-gridextra)
9220 ("r-igraph" ,r-igraph)
9221 ("r-irlba" ,r-irlba)
9222 ("r-lars" ,r-lars)
9223 ("r-mixtools" ,r-mixtools)
9224 ("r-pbapply" ,r-pbapply)
9225 ("r-plyr" ,r-plyr)
9226 ("r-ranger" ,r-ranger)
9227 ("r-rcolorbrewer" ,r-rcolorbrewer)
9228 ("r-rcpp" ,r-rcpp)
9229 ("r-rcppeigen" ,r-rcppeigen)
9230 ("r-rcppprogress" ,r-rcppprogress)
9231 ("r-reshape2" ,r-reshape2)
9232 ("r-rocr" ,r-rocr)
9233 ("r-rtsne" ,r-rtsne)
9234 ("r-stringr" ,r-stringr)
9235 ("r-tclust" ,r-tclust)
9236 ("r-tsne" ,r-tsne)
9237 ("r-vgam" ,r-vgam)))
9238 (home-page "http://www.satijalab.org/seurat")
9239 (synopsis "Seurat is an R toolkit for single cell genomics")
9240 (description
9241 "This package is an R package designed for QC, analysis, and
9242 exploration of single cell RNA-seq data. It easily enables widely-used
9243 analytical techniques, including the identification of highly variable genes,
9244 dimensionality reduction; PCA, ICA, t-SNE, standard unsupervised clustering
9245 algorithms; density clustering, hierarchical clustering, k-means, and the
9246 discovery of differentially expressed genes and markers.")
9247 (license license:gpl3))))
9248
9249 (define-public r-aroma-light
9250 (package
9251 (name "r-aroma-light")
9252 (version "3.8.0")
9253 (source
9254 (origin
9255 (method url-fetch)
9256 (uri (bioconductor-uri "aroma.light" version))
9257 (sha256
9258 (base32
9259 "0crnk6851jwypqr5l5jcbbay0vi5vvdjyisaf6z2d69c39wmr6sc"))))
9260 (properties `((upstream-name . "aroma.light")))
9261 (build-system r-build-system)
9262 (propagated-inputs
9263 `(("r-matrixstats" ,r-matrixstats)
9264 ("r-r-methodss3" ,r-r-methodss3)
9265 ("r-r-oo" ,r-r-oo)
9266 ("r-r-utils" ,r-r-utils)))
9267 (home-page "https://github.com/HenrikBengtsson/aroma.light")
9268 (synopsis "Methods for normalization and visualization of microarray data")
9269 (description
9270 "This package provides methods for microarray analysis that take basic
9271 data types such as matrices and lists of vectors. These methods can be used
9272 standalone, be utilized in other packages, or be wrapped up in higher-level
9273 classes.")
9274 (license license:gpl2+)))
9275
9276 (define-public r-deseq
9277 (package
9278 (name "r-deseq")
9279 (version "1.30.0")
9280 (source
9281 (origin
9282 (method url-fetch)
9283 (uri (bioconductor-uri "DESeq" version))
9284 (sha256
9285 (base32
9286 "0mn5w3cy16iwwk8zxs7za6aa6cnrca75z0g45zd5zh1py5d7nfv9"))))
9287 (properties `((upstream-name . "DESeq")))
9288 (build-system r-build-system)
9289 (propagated-inputs
9290 `(("r-biobase" ,r-biobase)
9291 ("r-biocgenerics" ,r-biocgenerics)
9292 ("r-genefilter" ,r-genefilter)
9293 ("r-geneplotter" ,r-geneplotter)
9294 ("r-lattice" ,r-lattice)
9295 ("r-locfit" ,r-locfit)
9296 ("r-mass" ,r-mass)
9297 ("r-rcolorbrewer" ,r-rcolorbrewer)))
9298 (home-page "http://www-huber.embl.de/users/anders/DESeq")
9299 (synopsis "Differential gene expression analysis")
9300 (description
9301 "This package provides tools for estimating variance-mean dependence in
9302 count data from high-throughput genetic sequencing assays and for testing for
9303 differential expression based on a model using the negative binomial
9304 distribution.")
9305 (license license:gpl3+)))
9306
9307 (define-public r-edaseq
9308 (package
9309 (name "r-edaseq")
9310 (version "2.12.0")
9311 (source
9312 (origin
9313 (method url-fetch)
9314 (uri (bioconductor-uri "EDASeq" version))
9315 (sha256
9316 (base32
9317 "07zm89zcivyn2261aq9grqmly8ji482kr9h9dyfknfdfrpv7jpwv"))))
9318 (properties `((upstream-name . "EDASeq")))
9319 (build-system r-build-system)
9320 (propagated-inputs
9321 `(("r-annotationdbi" ,r-annotationdbi)
9322 ("r-aroma-light" ,r-aroma-light)
9323 ("r-biobase" ,r-biobase)
9324 ("r-biocgenerics" ,r-biocgenerics)
9325 ("r-biomart" ,r-biomart)
9326 ("r-biostrings" ,r-biostrings)
9327 ("r-deseq" ,r-deseq)
9328 ("r-genomicfeatures" ,r-genomicfeatures)
9329 ("r-genomicranges" ,r-genomicranges)
9330 ("r-iranges" ,r-iranges)
9331 ("r-rsamtools" ,r-rsamtools)
9332 ("r-shortread" ,r-shortread)))
9333 (home-page "https://github.com/drisso/EDASeq")
9334 (synopsis "Exploratory data analysis and normalization for RNA-Seq")
9335 (description
9336 "This package provides support for numerical and graphical summaries of
9337 RNA-Seq genomic read data. Provided within-lane normalization procedures to
9338 adjust for GC-content effect (or other gene-level effects) on read counts:
9339 loess robust local regression, global-scaling, and full-quantile
9340 normalization. Between-lane normalization procedures to adjust for
9341 distributional differences between lanes (e.g., sequencing depth):
9342 global-scaling and full-quantile normalization.")
9343 (license license:artistic2.0)))
9344
9345 (define-public r-interactivedisplaybase
9346 (package
9347 (name "r-interactivedisplaybase")
9348 (version "1.16.0")
9349 (source
9350 (origin
9351 (method url-fetch)
9352 (uri (bioconductor-uri "interactiveDisplayBase" version))
9353 (sha256
9354 (base32
9355 "01yb945jqqimwjgriza6yy4dnp303cdirxrhl4hjyprfdlmnz5p5"))))
9356 (properties
9357 `((upstream-name . "interactiveDisplayBase")))
9358 (build-system r-build-system)
9359 (propagated-inputs
9360 `(("r-biocgenerics" ,r-biocgenerics)
9361 ("r-shiny" ,r-shiny)))
9362 (home-page "https://bioconductor.org/packages/interactiveDisplayBase")
9363 (synopsis "Base package for web displays of Bioconductor objects")
9364 (description
9365 "This package contains the basic methods needed to generate interactive
9366 Shiny-based display methods for Bioconductor objects.")
9367 (license license:artistic2.0)))
9368
9369 (define-public r-annotationhub
9370 (package
9371 (name "r-annotationhub")
9372 (version "2.10.0")
9373 (source
9374 (origin
9375 (method url-fetch)
9376 (uri (bioconductor-uri "AnnotationHub" version))
9377 (sha256
9378 (base32
9379 "1arfka3czw8hkv6n2d85bgibq81s2rgkwhmpaxzhy6nw39vv7y8b"))))
9380 (properties `((upstream-name . "AnnotationHub")))
9381 (build-system r-build-system)
9382 (propagated-inputs
9383 `(("r-annotationdbi" ,r-annotationdbi)
9384 ("r-biocgenerics" ,r-biocgenerics)
9385 ("r-biocinstaller" ,r-biocinstaller)
9386 ("r-curl" ,r-curl)
9387 ("r-httr" ,r-httr)
9388 ("r-interactivedisplaybase" ,r-interactivedisplaybase)
9389 ("r-rsqlite" ,r-rsqlite)
9390 ("r-s4vectors" ,r-s4vectors)
9391 ("r-yaml" ,r-yaml)))
9392 (home-page "https://bioconductor.org/packages/AnnotationHub")
9393 (synopsis "Client to access AnnotationHub resources")
9394 (description
9395 "This package provides a client for the Bioconductor AnnotationHub web
9396 resource. The AnnotationHub web resource provides a central location where
9397 genomic files (e.g. VCF, bed, wig) and other resources from standard
9398 locations (e.g. UCSC, Ensembl) can be discovered. The resource includes
9399 metadata about each resource, e.g., a textual description, tags, and date of
9400 modification. The client creates and manages a local cache of files retrieved
9401 by the user, helping with quick and reproducible access.")
9402 (license license:artistic2.0)))
9403
9404 (define-public r-fastseg
9405 (package
9406 (name "r-fastseg")
9407 (version "1.24.0")
9408 (source
9409 (origin
9410 (method url-fetch)
9411 (uri (bioconductor-uri "fastseg" version))
9412 (sha256
9413 (base32
9414 "0dd7nr3klwz9ailwshnbynhd62lwb8zbbpj6jf3igpb94yi6x2jp"))))
9415 (build-system r-build-system)
9416 (propagated-inputs
9417 `(("r-biobase" ,r-biobase)
9418 ("r-biocgenerics" ,r-biocgenerics)
9419 ("r-genomicranges" ,r-genomicranges)
9420 ("r-iranges" ,r-iranges)
9421 ("r-s4vectors" ,r-s4vectors)))
9422 (home-page "http://www.bioinf.jku.at/software/fastseg/index.html")
9423 (synopsis "Fast segmentation algorithm for genetic sequencing data")
9424 (description
9425 "Fastseg implements a very fast and efficient segmentation algorithm.
9426 It can segment data from DNA microarrays and data from next generation
9427 sequencing for example to detect copy number segments. Further it can segment
9428 data from RNA microarrays like tiling arrays to identify transcripts. Most
9429 generally, it can segment data given as a matrix or as a vector. Various data
9430 formats can be used as input to fastseg like expression set objects for
9431 microarrays or GRanges for sequencing data.")
9432 (license license:lgpl2.0+)))
9433
9434 (define-public r-keggrest
9435 (package
9436 (name "r-keggrest")
9437 (version "1.18.0")
9438 (source
9439 (origin
9440 (method url-fetch)
9441 (uri (bioconductor-uri "KEGGREST" version))
9442 (sha256
9443 (base32
9444 "1i3i88lj57wvpgjf75a23msgfsjv8pr2b4j1faga276p4fsblkhj"))))
9445 (properties `((upstream-name . "KEGGREST")))
9446 (build-system r-build-system)
9447 (propagated-inputs
9448 `(("r-biostrings" ,r-biostrings)
9449 ("r-httr" ,r-httr)
9450 ("r-png" ,r-png)))
9451 (home-page "https://bioconductor.org/packages/KEGGREST")
9452 (synopsis "Client-side REST access to KEGG")
9453 (description
9454 "This package provides a package that provides a client interface to the
9455 @dfn{Kyoto Encyclopedia of Genes and Genomes} (KEGG) REST server.")
9456 (license license:artistic2.0)))
9457
9458 (define-public r-gage
9459 (package
9460 (name "r-gage")
9461 (version "2.28.0")
9462 (source
9463 (origin
9464 (method url-fetch)
9465 (uri (bioconductor-uri "gage" version))
9466 (sha256
9467 (base32
9468 "1r14p88q3y736pkqm4pdimf1izy1xy3xgivmj3cr4dv65kjny1zk"))))
9469 (build-system r-build-system)
9470 (propagated-inputs
9471 `(("r-annotationdbi" ,r-annotationdbi)
9472 ("r-graph" ,r-graph)
9473 ("r-keggrest" ,r-keggrest)))
9474 (home-page "http://www.biomedcentral.com/1471-2105/10/161")
9475 (synopsis "Generally applicable gene-set enrichment for pathway analysis")
9476 (description
9477 "GAGE is a published method for gene set (enrichment or GSEA) or pathway
9478 analysis. GAGE is generally applicable independent of microarray or RNA-Seq
9479 data attributes including sample sizes, experimental designs, assay platforms,
9480 and other types of heterogeneity. The gage package provides functions for
9481 basic GAGE analysis, result processing and presentation. In addition, it
9482 provides demo microarray data and commonly used gene set data based on KEGG
9483 pathways and GO terms. These funtions and data are also useful for gene set
9484 analysis using other methods.")
9485 (license license:gpl2+)))
9486
9487 (define-public r-genomicfiles
9488 (package
9489 (name "r-genomicfiles")
9490 (version "1.14.0")
9491 (source
9492 (origin
9493 (method url-fetch)
9494 (uri (bioconductor-uri "GenomicFiles" version))
9495 (sha256
9496 (base32
9497 "0r0wmrs5jycf1kckhnc2sgjmp336srlcjdkpbb1ymm7kazdd0s9n"))))
9498 (properties `((upstream-name . "GenomicFiles")))
9499 (build-system r-build-system)
9500 (propagated-inputs
9501 `(("r-biocgenerics" ,r-biocgenerics)
9502 ("r-biocparallel" ,r-biocparallel)
9503 ("r-genomeinfodb" ,r-genomeinfodb)
9504 ("r-genomicalignments" ,r-genomicalignments)
9505 ("r-genomicranges" ,r-genomicranges)
9506 ("r-iranges" ,r-iranges)
9507 ("r-rsamtools" ,r-rsamtools)
9508 ("r-rtracklayer" ,r-rtracklayer)
9509 ("r-s4vectors" ,r-s4vectors)
9510 ("r-summarizedexperiment" ,r-summarizedexperiment)
9511 ("r-variantannotation" ,r-variantannotation)))
9512 (home-page "https://bioconductor.org/packages/GenomicFiles")
9513 (synopsis "Distributed computing by file or by range")
9514 (description
9515 "This package provides infrastructure for parallel computations
9516 distributed by file or by range. User defined mapper and reducer functions
9517 provide added flexibility for data combination and manipulation.")
9518 (license license:artistic2.0)))
9519
9520 (define-public r-complexheatmap
9521 (package
9522 (name "r-complexheatmap")
9523 (version "1.17.1")
9524 (source
9525 (origin
9526 (method url-fetch)
9527 (uri (bioconductor-uri "ComplexHeatmap" version))
9528 (sha256
9529 (base32
9530 "1x6kp55iqqsd8bhdl3qch95nfiy2y46ldbbsx1sj1v8f0b0ywwcy"))))
9531 (properties
9532 `((upstream-name . "ComplexHeatmap")))
9533 (build-system r-build-system)
9534 (propagated-inputs
9535 `(("r-circlize" ,r-circlize)
9536 ("r-colorspace" ,r-colorspace)
9537 ("r-getoptlong" ,r-getoptlong)
9538 ("r-globaloptions" ,r-globaloptions)
9539 ("r-rcolorbrewer" ,r-rcolorbrewer)))
9540 (home-page
9541 "https://github.com/jokergoo/ComplexHeatmap")
9542 (synopsis "Making Complex Heatmaps")
9543 (description
9544 "Complex heatmaps are efficient to visualize associations between
9545 different sources of data sets and reveal potential structures. This package
9546 provides a highly flexible way to arrange multiple heatmaps and supports
9547 self-defined annotation graphics.")
9548 (license license:gpl2+)))
9549
9550 (define-public r-dirichletmultinomial
9551 (package
9552 (name "r-dirichletmultinomial")
9553 (version "1.20.0")
9554 (source
9555 (origin
9556 (method url-fetch)
9557 (uri (bioconductor-uri "DirichletMultinomial" version))
9558 (sha256
9559 (base32
9560 "1c4s6x0qm20556grcd1xys9kkpnlzpasaai474malwcg6qvgi4x1"))))
9561 (properties
9562 `((upstream-name . "DirichletMultinomial")))
9563 (build-system r-build-system)
9564 (inputs
9565 `(("gsl" ,gsl)))
9566 (propagated-inputs
9567 `(("r-biocgenerics" ,r-biocgenerics)
9568 ("r-iranges" ,r-iranges)
9569 ("r-s4vectors" ,r-s4vectors)))
9570 (home-page "https://bioconductor.org/packages/DirichletMultinomial")
9571 (synopsis "Dirichlet-Multinomial mixture models for microbiome data")
9572 (description
9573 "Dirichlet-multinomial mixture models can be used to describe variability
9574 in microbial metagenomic data. This package is an interface to code
9575 originally made available by Holmes, Harris, and Quince, 2012, PLoS ONE 7(2):
9576 1-15.")
9577 (license license:lgpl3)))
9578
9579 (define-public r-annotationfilter
9580 (package
9581 (name "r-annotationfilter")
9582 (version "1.2.0")
9583 (source (origin
9584 (method url-fetch)
9585 (uri (bioconductor-uri "AnnotationFilter" version))
9586 (sha256
9587 (base32
9588 "04zf864c1fvdlaay2r5cn30fc1n5i3czh31fs62qlrvs61wjiscs"))))
9589 (properties
9590 `((upstream-name . "AnnotationFilter")))
9591 (build-system r-build-system)
9592 (propagated-inputs
9593 `(("r-genomicranges" ,r-genomicranges)
9594 ("r-lazyeval" ,r-lazyeval)))
9595 (home-page "https://github.com/Bioconductor/AnnotationFilter")
9596 (synopsis "Facilities for filtering Bioconductor annotation resources")
9597 (description
9598 "This package provides classes and other infrastructure to implement
9599 filters for manipulating Bioconductor annotation resources. The filters are
9600 used by @code{ensembldb}, @code{Organism.dplyr}, and other packages.")
9601 (license license:artistic2.0)))
9602
9603 (define-public r-ensembldb
9604 (package
9605 (name "r-ensembldb")
9606 (version "2.2.0")
9607 (source
9608 (origin
9609 (method url-fetch)
9610 (uri (bioconductor-uri "ensembldb" version))
9611 (sha256
9612 (base32
9613 "1w0lca3ws5j770bmls91cn93lznvv2pc8s42nybdzz3vdxjvb4m1"))))
9614 (build-system r-build-system)
9615 (propagated-inputs
9616 `(("r-annotationdbi" ,r-annotationdbi)
9617 ("r-annotationfilter" ,r-annotationfilter)
9618 ("r-annotationhub" ,r-annotationhub)
9619 ("r-biobase" ,r-biobase)
9620 ("r-biocgenerics" ,r-biocgenerics)
9621 ("r-biostrings" ,r-biostrings)
9622 ("r-curl" ,r-curl)
9623 ("r-dbi" ,r-dbi)
9624 ("r-genomeinfodb" ,r-genomeinfodb)
9625 ("r-genomicfeatures" ,r-genomicfeatures)
9626 ("r-genomicranges" ,r-genomicranges)
9627 ("r-iranges" ,r-iranges)
9628 ("r-protgenerics" ,r-protgenerics)
9629 ("r-rsamtools" ,r-rsamtools)
9630 ("r-rsqlite" ,r-rsqlite)
9631 ("r-rtracklayer" ,r-rtracklayer)
9632 ("r-s4vectors" ,r-s4vectors)))
9633 (home-page "https://github.com/jotsetung/ensembldb")
9634 (synopsis "Utilities to create and use Ensembl-based annotation databases")
9635 (description
9636 "The package provides functions to create and use transcript-centric
9637 annotation databases/packages. The annotation for the databases are directly
9638 fetched from Ensembl using their Perl API. The functionality and data is
9639 similar to that of the TxDb packages from the @code{GenomicFeatures} package,
9640 but, in addition to retrieve all gene/transcript models and annotations from
9641 the database, the @code{ensembldb} package also provides a filter framework
9642 allowing to retrieve annotations for specific entries like genes encoded on a
9643 chromosome region or transcript models of lincRNA genes.")
9644 ;; No version specified
9645 (license license:lgpl3+)))
9646
9647 (define-public r-organismdbi
9648 (package
9649 (name "r-organismdbi")
9650 (version "1.20.0")
9651 (source
9652 (origin
9653 (method url-fetch)
9654 (uri (bioconductor-uri "OrganismDbi" version))
9655 (sha256
9656 (base32
9657 "0yxvhwn0m53wfwp0zi81x96argdf7cf1lpymc2as51apvfcnjdl8"))))
9658 (properties `((upstream-name . "OrganismDbi")))
9659 (build-system r-build-system)
9660 (propagated-inputs
9661 `(("r-annotationdbi" ,r-annotationdbi)
9662 ("r-biobase" ,r-biobase)
9663 ("r-biocgenerics" ,r-biocgenerics)
9664 ("r-biocinstaller" ,r-biocinstaller)
9665 ("r-dbi" ,r-dbi)
9666 ("r-genomicfeatures" ,r-genomicfeatures)
9667 ("r-genomicranges" ,r-genomicranges)
9668 ("r-graph" ,r-graph)
9669 ("r-iranges" ,r-iranges)
9670 ("r-rbgl" ,r-rbgl)
9671 ("r-s4vectors" ,r-s4vectors)))
9672 (home-page "https://bioconductor.org/packages/OrganismDbi")
9673 (synopsis "Software to enable the smooth interfacing of database packages")
9674 (description "The package enables a simple unified interface to several
9675 annotation packages each of which has its own schema by taking advantage of
9676 the fact that each of these packages implements a select methods.")
9677 (license license:artistic2.0)))
9678
9679 (define-public r-biovizbase
9680 (package
9681 (name "r-biovizbase")
9682 (version "1.26.0")
9683 (source
9684 (origin
9685 (method url-fetch)
9686 (uri (bioconductor-uri "biovizBase" version))
9687 (sha256
9688 (base32
9689 "14l4vhj0a4ssr9m9zdzz3qpd4qw1mhgq5bmxq7jhrq3j9kmd6i2f"))))
9690 (properties `((upstream-name . "biovizBase")))
9691 (build-system r-build-system)
9692 (propagated-inputs
9693 `(("r-annotationdbi" ,r-annotationdbi)
9694 ("r-annotationfilter" ,r-annotationfilter)
9695 ("r-biocgenerics" ,r-biocgenerics)
9696 ("r-biostrings" ,r-biostrings)
9697 ("r-dichromat" ,r-dichromat)
9698 ("r-ensembldb" ,r-ensembldb)
9699 ("r-genomeinfodb" ,r-genomeinfodb)
9700 ("r-genomicalignments" ,r-genomicalignments)
9701 ("r-genomicfeatures" ,r-genomicfeatures)
9702 ("r-genomicranges" ,r-genomicranges)
9703 ("r-hmisc" ,r-hmisc)
9704 ("r-iranges" ,r-iranges)
9705 ("r-rcolorbrewer" ,r-rcolorbrewer)
9706 ("r-rsamtools" ,r-rsamtools)
9707 ("r-s4vectors" ,r-s4vectors)
9708 ("r-scales" ,r-scales)
9709 ("r-summarizedexperiment" ,r-summarizedexperiment)
9710 ("r-variantannotation" ,r-variantannotation)))
9711 (home-page "https://bioconductor.org/packages/biovizBase")
9712 (synopsis "Basic graphic utilities for visualization of genomic data")
9713 (description
9714 "The biovizBase package is designed to provide a set of utilities, color
9715 schemes and conventions for genomic data. It serves as the base for various
9716 high-level packages for biological data visualization. This saves development
9717 effort and encourages consistency.")
9718 (license license:artistic2.0)))
9719
9720 (define-public r-ggbio
9721 (package
9722 (name "r-ggbio")
9723 (version "1.26.0")
9724 (source
9725 (origin
9726 (method url-fetch)
9727 (uri (bioconductor-uri "ggbio" version))
9728 (sha256
9729 (base32
9730 "1bqxfqy0hff87ax92z4lfbjz01ndrz7x8pzm6dlkdmi52p30krm9"))))
9731 (build-system r-build-system)
9732 (propagated-inputs
9733 `(("r-annotationdbi" ,r-annotationdbi)
9734 ("r-annotationfilter" ,r-annotationfilter)
9735 ("r-biobase" ,r-biobase)
9736 ("r-biocgenerics" ,r-biocgenerics)
9737 ("r-biostrings" ,r-biostrings)
9738 ("r-biovizbase" ,r-biovizbase)
9739 ("r-bsgenome" ,r-bsgenome)
9740 ("r-ensembldb" ,r-ensembldb)
9741 ("r-genomeinfodb" ,r-genomeinfodb)
9742 ("r-genomicalignments" ,r-genomicalignments)
9743 ("r-genomicfeatures" ,r-genomicfeatures)
9744 ("r-genomicranges" ,r-genomicranges)
9745 ("r-ggally" ,r-ggally)
9746 ("r-ggplot2" ,r-ggplot2)
9747 ("r-gridextra" ,r-gridextra)
9748 ("r-gtable" ,r-gtable)
9749 ("r-hmisc" ,r-hmisc)
9750 ("r-iranges" ,r-iranges)
9751 ("r-organismdbi" ,r-organismdbi)
9752 ("r-reshape2" ,r-reshape2)
9753 ("r-rsamtools" ,r-rsamtools)
9754 ("r-rtracklayer" ,r-rtracklayer)
9755 ("r-s4vectors" ,r-s4vectors)
9756 ("r-scales" ,r-scales)
9757 ("r-summarizedexperiment" ,r-summarizedexperiment)
9758 ("r-variantannotation" ,r-variantannotation)))
9759 (home-page "http://www.tengfei.name/ggbio/")
9760 (synopsis "Visualization tools for genomic data")
9761 (description
9762 "The ggbio package extends and specializes the grammar of graphics for
9763 biological data. The graphics are designed to answer common scientific
9764 questions, in particular those often asked of high throughput genomics data.
9765 All core Bioconductor data structures are supported, where appropriate. The
9766 package supports detailed views of particular genomic regions, as well as
9767 genome-wide overviews. Supported overviews include ideograms and grand linear
9768 views. High-level plots include sequence fragment length, edge-linked
9769 interval to data view, mismatch pileup, and several splicing summaries.")
9770 (license license:artistic2.0)))
9771
9772 (define-public r-gprofiler
9773 (package
9774 (name "r-gprofiler")
9775 (version "0.6.1")
9776 (source
9777 (origin
9778 (method url-fetch)
9779 (uri (cran-uri "gProfileR" version))
9780 (sha256
9781 (base32
9782 "1qix15d0wa9nspdclcawml94mng4qmr2jciv7d24py315wfsvv8p"))))
9783 (properties `((upstream-name . "gProfileR")))
9784 (build-system r-build-system)
9785 (propagated-inputs
9786 `(("r-plyr" ,r-plyr)
9787 ("r-rcurl" ,r-rcurl)))
9788 (home-page "http://cran.r-project.org/web/packages/gProfileR/")
9789 (synopsis "Interface to the g:Profiler toolkit")
9790 (description
9791 "This package provides tools for functional enrichment analysis,
9792 gene identifier conversion and mapping homologous genes across related
9793 organisms via the @code{g:Profiler} toolkit.")
9794 (license license:gpl2+)))
9795
9796 (define-public r-gqtlbase
9797 (package
9798 (name "r-gqtlbase")
9799 (version "1.10.0")
9800 (source
9801 (origin
9802 (method url-fetch)
9803 (uri (bioconductor-uri "gQTLBase" version))
9804 (sha256
9805 (base32
9806 "1756vfcj2dkkgcmfkkg7qdaig36dv9gfvpypn9rbrky56wm1p035"))))
9807 (properties `((upstream-name . "gQTLBase")))
9808 (build-system r-build-system)
9809 (propagated-inputs
9810 `(("r-batchjobs" ,r-batchjobs)
9811 ("r-bbmisc" ,r-bbmisc)
9812 ("r-biocgenerics" ,r-biocgenerics)
9813 ("r-bit" ,r-bit)
9814 ("r-doparallel" ,r-doparallel)
9815 ("r-ff" ,r-ff)
9816 ("r-ffbase" ,r-ffbase)
9817 ("r-foreach" ,r-foreach)
9818 ("r-genomicfiles" ,r-genomicfiles)
9819 ("r-genomicranges" ,r-genomicranges)
9820 ("r-rtracklayer" ,r-rtracklayer)
9821 ("r-s4vectors" ,r-s4vectors)
9822 ("r-summarizedexperiment" ,r-summarizedexperiment)))
9823 (home-page "https://bioconductor.org/packages/gQTLBase")
9824 (synopsis "Infrastructure for eQTL, mQTL and similar studies")
9825 (description
9826 "The purpose of this package is to simplify the storage and interrogation
9827 of @dfn{quantitative trait loci} (QTL) archives, such as eQTL, mQTL, dsQTL,
9828 and more.")
9829 (license license:artistic2.0)))
9830
9831 (define-public r-snpstats
9832 (package
9833 (name "r-snpstats")
9834 (version "1.28.0")
9835 (source
9836 (origin
9837 (method url-fetch)
9838 (uri (bioconductor-uri "snpStats" version))
9839 (sha256
9840 (base32
9841 "1x9qwynh2hwl24vq02naf4mchpch7xi2pkdrlgw896k28kx0lvir"))))
9842 (properties `((upstream-name . "snpStats")))
9843 (build-system r-build-system)
9844 (inputs `(("zlib" ,zlib)))
9845 (propagated-inputs
9846 `(("r-biocgenerics" ,r-biocgenerics)
9847 ("r-matrix" ,r-matrix)
9848 ("r-survival" ,r-survival)
9849 ("r-zlibbioc" ,r-zlibbioc)))
9850 (home-page "https://bioconductor.org/packages/snpStats")
9851 (synopsis "Methods for SNP association studies")
9852 (description
9853 "This package provides classes and statistical methods for large
9854 @dfn{single-nucleotide polymorphism} (SNP) association studies. This extends
9855 the earlier snpMatrix package, allowing for uncertainty in genotypes.")
9856 (license license:gpl3)))
9857
9858 (define-public r-org-hs-eg-db
9859 (package
9860 (name "r-org-hs-eg-db")
9861 (version "3.4.2")
9862 (source (origin
9863 (method url-fetch)
9864 ;; We cannot use bioconductor-uri here because this tarball is
9865 ;; located under "data/annotation/" instead of "bioc/".
9866 (uri (string-append "http://www.bioconductor.org/packages/"
9867 "release/data/annotation/src/contrib/"
9868 "org.Hs.eg.db_"
9869 version ".tar.gz"))
9870 (sha256
9871 (base32
9872 "0izzmas99j64sc4x8pxi09xw0ra1941kjrsl8sjxl0cjw1d2b32z"))))
9873 (properties
9874 `((upstream-name . "org.Hs.eg.db")))
9875 (build-system r-build-system)
9876 (propagated-inputs
9877 `(("r-annotationdbi" ,r-annotationdbi)))
9878 (home-page "https://bioconductor.org/packages/org.Hs.eg.db/")
9879 (synopsis "Genome-wide annotation for Human")
9880 (description
9881 "This package contains genome-wide annotations for Human, primarily based
9882 on mapping using Entrez Gene identifiers.")
9883 (license license:artistic2.0)))
9884
9885 (define-public r-homo-sapiens
9886 (package
9887 (name "r-homo-sapiens")
9888 (version "1.3.1")
9889 (source (origin
9890 (method url-fetch)
9891 ;; We cannot use bioconductor-uri here because this tarball is
9892 ;; located under "data/annotation/" instead of "bioc/".
9893 (uri (string-append "http://www.bioconductor.org/packages/"
9894 "release/data/annotation/src/contrib/"
9895 "Homo.sapiens_"
9896 version ".tar.gz"))
9897 (sha256
9898 (base32
9899 "151vj7h5p1c8yd5swrchk46z469p135wk50hvkl0nhgndvy0jj01"))))
9900 (properties
9901 `((upstream-name . "Homo.sapiens")))
9902 (build-system r-build-system)
9903 (propagated-inputs
9904 `(("r-genomicfeatures" ,r-genomicfeatures)
9905 ("r-go-db" ,r-go-db)
9906 ("r-org-hs-eg-db" ,r-org-hs-eg-db)
9907 ("r-txdb-hsapiens-ucsc-hg19-knowngene" ,r-txdb-hsapiens-ucsc-hg19-knowngene)
9908 ("r-organismdbi" ,r-organismdbi)
9909 ("r-annotationdbi" ,r-annotationdbi)))
9910 (home-page "https://bioconductor.org/packages/Homo.sapiens/")
9911 (synopsis "Annotation package for the Homo.sapiens object")
9912 (description
9913 "This package contains the Homo.sapiens object to access data from
9914 several related annotation packages.")
9915 (license license:artistic2.0)))
9916
9917 (define-public r-erma
9918 (package
9919 (name "r-erma")
9920 (version "0.10.0")
9921 (source
9922 (origin
9923 (method url-fetch)
9924 (uri (bioconductor-uri "erma" version))
9925 (sha256
9926 (base32
9927 "0gcfs9g8vvdv5vmq9b21kd8sq5mizjj49nfzd4in9zvp4b9v7x1g"))))
9928 (build-system r-build-system)
9929 (propagated-inputs
9930 `(("r-annotationdbi" ,r-annotationdbi)
9931 ("r-biobase" ,r-biobase)
9932 ("r-biocgenerics" ,r-biocgenerics)
9933 ("r-foreach" ,r-foreach)
9934 ("r-genomicfiles" ,r-genomicfiles)
9935 ("r-genomicranges" ,r-genomicranges)
9936 ("r-ggplot2" ,r-ggplot2)
9937 ("r-homo-sapiens" ,r-homo-sapiens)
9938 ("r-rtracklayer" ,r-rtracklayer)
9939 ("r-s4vectors" ,r-s4vectors)
9940 ("r-shiny" ,r-shiny)
9941 ("r-summarizedexperiment" ,r-summarizedexperiment)))
9942 (home-page "https://bioconductor.org/packages/erma")
9943 (synopsis "Epigenomic road map adventures")
9944 (description
9945 "The epigenomics road map describes locations of epigenetic marks in DNA
9946 from a variety of cell types. Of interest are locations of histone
9947 modifications, sites of DNA methylation, and regions of accessible chromatin.
9948 This package presents a selection of elements of the road map including
9949 metadata and outputs of the ChromImpute procedure applied to ENCODE cell lines
9950 by Ernst and Kellis.")
9951 (license license:artistic2.0)))
9952
9953 (define-public r-ldblock
9954 (package
9955 (name "r-ldblock")
9956 (version "1.8.0")
9957 (source
9958 (origin
9959 (method url-fetch)
9960 (uri (bioconductor-uri "ldblock" version))
9961 (sha256
9962 (base32
9963 "18nfsixh6d2wfrb9laqsgly5w1frzihhak683k0p8fdf51h4aqba"))))
9964 (build-system r-build-system)
9965 (propagated-inputs
9966 `(("r-erma" ,r-erma)
9967 ("r-genomeinfodb" ,r-genomeinfodb)
9968 ("r-genomicfiles" ,r-genomicfiles)
9969 ("r-go-db" ,r-go-db)
9970 ("r-homo-sapiens" ,r-homo-sapiens)
9971 ("r-matrix" ,r-matrix)
9972 ("r-rsamtools" ,r-rsamtools)
9973 ("r-snpstats" ,r-snpstats)
9974 ("r-variantannotation" ,r-variantannotation)))
9975 (home-page "https://bioconductor.org/packages/ldblock")
9976 (synopsis "Data structures for linkage disequilibrium measures in populations")
9977 (description
9978 "This package defines data structures for @dfn{linkage
9979 disequilibrium} (LD) measures in populations. Its purpose is to simplify
9980 handling of existing population-level data for the purpose of flexibly
9981 defining LD blocks.")
9982 (license license:artistic2.0)))
9983
9984 (define-public r-gqtlstats
9985 (package
9986 (name "r-gqtlstats")
9987 (version "1.10.0")
9988 (source
9989 (origin
9990 (method url-fetch)
9991 (uri (bioconductor-uri "gQTLstats" version))
9992 (sha256
9993 (base32
9994 "1cbdqawxzgna8rrgj3siph5sw4d2pb57qc0gn6ibfkhyk45f8gdv"))))
9995 (properties `((upstream-name . "gQTLstats")))
9996 (build-system r-build-system)
9997 (propagated-inputs
9998 `(("r-annotationdbi" ,r-annotationdbi)
9999 ("r-batchjobs" ,r-batchjobs)
10000 ("r-bbmisc" ,r-bbmisc)
10001 ("r-beeswarm" ,r-beeswarm)
10002 ("r-biobase" ,r-biobase)
10003 ("r-biocgenerics" ,r-biocgenerics)
10004 ("r-doparallel" ,r-doparallel)
10005 ("r-dplyr" ,r-dplyr)
10006 ("r-erma" ,r-erma)
10007 ("r-ffbase" ,r-ffbase)
10008 ("r-foreach" ,r-foreach)
10009 ("r-genomeinfodb" ,r-genomeinfodb)
10010 ("r-genomicfeatures" ,r-genomicfeatures)
10011 ("r-genomicfiles" ,r-genomicfiles)
10012 ("r-genomicranges" ,r-genomicranges)
10013 ("r-ggbeeswarm" ,r-ggbeeswarm)
10014 ("r-ggplot2" ,r-ggplot2)
10015 ("r-gqtlbase" ,r-gqtlbase)
10016 ("r-hardyweinberg" ,r-hardyweinberg)
10017 ("r-iranges" ,r-iranges)
10018 ("r-ldblock" ,r-ldblock)
10019 ("r-limma" ,r-limma)
10020 ("r-mgcv" ,r-mgcv)
10021 ("r-plotly" ,r-plotly)
10022 ("r-reshape2" ,r-reshape2)
10023 ("r-s4vectors" ,r-s4vectors)
10024 ("r-shiny" ,r-shiny)
10025 ("r-snpstats" ,r-snpstats)
10026 ("r-summarizedexperiment" ,r-summarizedexperiment)
10027 ("r-variantannotation" ,r-variantannotation)))
10028 (home-page "https://bioconductor.org/packages/gQTLstats")
10029 (synopsis "Computationally efficient analysis for eQTL and allied studies")
10030 (description
10031 "This package provides tools for the computationally efficient analysis
10032 of @dfn{quantitative trait loci} (QTL) data, including eQTL, mQTL, dsQTL, etc.
10033 The software in this package aims to support refinements and functional
10034 interpretation of members of a collection of association statistics on a
10035 family of feature/genome hypotheses.")
10036 (license license:artistic2.0)))
10037
10038 (define-public r-gviz
10039 (package
10040 (name "r-gviz")
10041 (version "1.22.0")
10042 (source
10043 (origin
10044 (method url-fetch)
10045 (uri (bioconductor-uri "Gviz" version))
10046 (sha256
10047 (base32
10048 "1lrw65a8426wpxw975wjcaiacpp6fqa00nif1yxigyankbfs23c8"))))
10049 (properties `((upstream-name . "Gviz")))
10050 (build-system r-build-system)
10051 (propagated-inputs
10052 `(("r-annotationdbi" ,r-annotationdbi)
10053 ("r-biobase" ,r-biobase)
10054 ("r-biocgenerics" ,r-biocgenerics)
10055 ("r-biomart" ,r-biomart)
10056 ("r-biostrings" ,r-biostrings)
10057 ("r-biovizbase" ,r-biovizbase)
10058 ("r-bsgenome" ,r-bsgenome)
10059 ("r-digest" ,r-digest)
10060 ("r-genomeinfodb" ,r-genomeinfodb)
10061 ("r-genomicalignments" ,r-genomicalignments)
10062 ("r-genomicfeatures" ,r-genomicfeatures)
10063 ("r-genomicranges" ,r-genomicranges)
10064 ("r-iranges" ,r-iranges)
10065 ("r-lattice" ,r-lattice)
10066 ("r-latticeextra" ,r-latticeextra)
10067 ("r-matrixstats" ,r-matrixstats)
10068 ("r-rcolorbrewer" ,r-rcolorbrewer)
10069 ("r-rsamtools" ,r-rsamtools)
10070 ("r-rtracklayer" ,r-rtracklayer)
10071 ("r-s4vectors" ,r-s4vectors)
10072 ("r-xvector" ,r-xvector)))
10073 (home-page "https://bioconductor.org/packages/Gviz")
10074 (synopsis "Plotting data and annotation information along genomic coordinates")
10075 (description
10076 "Genomic data analyses requires integrated visualization of known genomic
10077 information and new experimental data. Gviz uses the biomaRt and the
10078 rtracklayer packages to perform live annotation queries to Ensembl and UCSC
10079 and translates this to e.g. gene/transcript structures in viewports of the
10080 grid graphics package. This results in genomic information plotted together
10081 with your data.")
10082 (license license:artistic2.0)))
10083
10084 (define-public r-gwascat
10085 (package
10086 (name "r-gwascat")
10087 (version "2.10.0")
10088 (source
10089 (origin
10090 (method url-fetch)
10091 (uri (bioconductor-uri "gwascat" version))
10092 (sha256
10093 (base32
10094 "0n5x5i5v6a8wpn5mxmlpkl34b4kyypmymiwww6g61zch7xqrgywi"))))
10095 (build-system r-build-system)
10096 (propagated-inputs
10097 `(("r-annotationdbi" ,r-annotationdbi)
10098 ("r-annotationhub" ,r-annotationhub)
10099 ("r-biocgenerics" ,r-biocgenerics)
10100 ("r-biostrings" ,r-biostrings)
10101 ("r-genomeinfodb" ,r-genomeinfodb)
10102 ("r-genomicfeatures" ,r-genomicfeatures)
10103 ("r-genomicranges" ,r-genomicranges)
10104 ("r-ggbio" ,r-ggbio)
10105 ("r-ggplot2" ,r-ggplot2)
10106 ("r-gqtlstats" ,r-gqtlstats)
10107 ("r-graph" ,r-graph)
10108 ("r-gviz" ,r-gviz)
10109 ("r-homo-sapiens" ,r-homo-sapiens)
10110 ("r-iranges" ,r-iranges)
10111 ("r-rsamtools" ,r-rsamtools)
10112 ("r-rtracklayer" ,r-rtracklayer)
10113 ("r-s4vectors" ,r-s4vectors)
10114 ("r-snpstats" ,r-snpstats)
10115 ("r-summarizedexperiment" ,r-summarizedexperiment)
10116 ("r-variantannotation" ,r-variantannotation)))
10117 (home-page "https://bioconductor.org/packages/gwascat")
10118 (synopsis "Tools for data in the EMBL-EBI GWAS catalog")
10119 (description
10120 "This package provides tools for representing and modeling data in the
10121 EMBL-EBI GWAS catalog.")
10122 (license license:artistic2.0)))
10123
10124 (define-public r-sushi
10125 (package
10126 (name "r-sushi")
10127 (version "1.16.0")
10128 (source (origin
10129 (method url-fetch)
10130 (uri (bioconductor-uri "Sushi" version))
10131 (sha256
10132 (base32
10133 "0axaqm480z8d0b2ldgxwm0swava1p4irc62bpl17p2k8k78g687g"))))
10134 (properties `((upstream-name . "Sushi")))
10135 (build-system r-build-system)
10136 (propagated-inputs
10137 `(("r-biomart" ,r-biomart)
10138 ("r-zoo" ,r-zoo)))
10139 (home-page "https://bioconductor.org/packages/Sushi")
10140 (synopsis "Tools for visualizing genomics data")
10141 (description
10142 "This package provides flexible, quantitative, and integrative genomic
10143 visualizations for publication-quality multi-panel figures.")
10144 (license license:gpl2+)))
10145
10146 (define-public r-fithic
10147 (package
10148 (name "r-fithic")
10149 (version "1.4.0")
10150 (source (origin
10151 (method url-fetch)
10152 (uri (bioconductor-uri "FitHiC" version))
10153 (sha256
10154 (base32
10155 "12ylhrppi051m7nqsgq95kzd9g9wmp34i0zzfi55cjqawlpx7c6n"))))
10156 (properties `((upstream-name . "FitHiC")))
10157 (build-system r-build-system)
10158 (propagated-inputs
10159 `(("r-data-table" ,r-data-table)
10160 ("r-fdrtool" ,r-fdrtool)
10161 ("r-rcpp" ,r-rcpp)))
10162 (home-page "https://bioconductor.org/packages/FitHiC")
10163 (synopsis "Confidence estimation for intra-chromosomal contact maps")
10164 (description
10165 "Fit-Hi-C is a tool for assigning statistical confidence estimates to
10166 intra-chromosomal contact maps produced by genome-wide genome architecture
10167 assays such as Hi-C.")
10168 (license license:gpl2+)))
10169
10170 (define-public r-hitc
10171 (package
10172 (name "r-hitc")
10173 (version "1.22.0")
10174 (source (origin
10175 (method url-fetch)
10176 (uri (bioconductor-uri "HiTC" version))
10177 (sha256
10178 (base32
10179 "0288xa1jy6nzvz2ha07csmp6dirjw5r7p9vy69q2wsbyzr02ymkp"))))
10180 (properties `((upstream-name . "HiTC")))
10181 (build-system r-build-system)
10182 (propagated-inputs
10183 `(("r-biostrings" ,r-biostrings)
10184 ("r-genomeinfodb" ,r-genomeinfodb)
10185 ("r-genomicranges" ,r-genomicranges)
10186 ("r-iranges" ,r-iranges)
10187 ("r-matrix" ,r-matrix)
10188 ("r-rcolorbrewer" ,r-rcolorbrewer)
10189 ("r-rtracklayer" ,r-rtracklayer)))
10190 (home-page "https://bioconductor.org/packages/HiTC")
10191 (synopsis "High throughput chromosome conformation capture analysis")
10192 (description
10193 "The HiTC package was developed to explore high-throughput \"C\" data
10194 such as 5C or Hi-C. Dedicated R classes as well as standard methods for
10195 quality controls, normalization, visualization, and further analysis are also
10196 provided.")
10197 (license license:artistic2.0)))
10198
10199 (define-public r-qvalue
10200 (package
10201 (name "r-qvalue")
10202 (version "2.10.0")
10203 (source
10204 (origin
10205 (method url-fetch)
10206 (uri (bioconductor-uri "qvalue" version))
10207 (sha256
10208 (base32
10209 "1rd9rnf16kh8wc076kahd9hsb9rfwsbzmz3kjmp0pj6rbiq0051i"))))
10210 (build-system r-build-system)
10211 (propagated-inputs
10212 `(("r-ggplot2" ,r-ggplot2)
10213 ("r-reshape2" ,r-reshape2)))
10214 (home-page "http://github.com/jdstorey/qvalue")
10215 (synopsis "Q-value estimation for false discovery rate control")
10216 (description
10217 "This package takes a list of p-values resulting from the simultaneous
10218 testing of many hypotheses and estimates their q-values and local @dfn{false
10219 discovery rate} (FDR) values. The q-value of a test measures the proportion
10220 of false positives incurred when that particular test is called significant.
10221 The local FDR measures the posterior probability the null hypothesis is true
10222 given the test's p-value. Various plots are automatically generated, allowing
10223 one to make sensible significance cut-offs. The software can be applied to
10224 problems in genomics, brain imaging, astrophysics, and data mining.")
10225 ;; Any version of the LGPL.
10226 (license license:lgpl3+)))
10227
10228 (define htslib-for-sambamba
10229 (let ((commit "2f3c3ea7b301f9b45737a793c0b2dcf0240e5ee5"))
10230 (package
10231 (inherit htslib)
10232 (name "htslib-for-sambamba")
10233 (version (string-append "1.3.1-1." (string-take commit 9)))
10234 (source
10235 (origin
10236 (method git-fetch)
10237 (uri (git-reference
10238 (url "https://github.com/lomereiter/htslib.git")
10239 (commit commit)))
10240 (file-name (string-append "htslib-" version "-checkout"))
10241 (sha256
10242 (base32
10243 "0g38g8s3npr0gjm9fahlbhiskyfws9l5i0x1ml3rakzj7az5l9c9"))))
10244 (arguments
10245 (substitute-keyword-arguments (package-arguments htslib)
10246 ((#:phases phases)
10247 `(modify-phases ,phases
10248 (add-after 'unpack 'bootstrap
10249 (lambda _
10250 (zero? (system* "autoreconf" "-vif"))))))))
10251 (native-inputs
10252 `(("autoconf" ,autoconf)
10253 ("automake" ,automake)
10254 ,@(package-native-inputs htslib))))))
10255
10256 (define-public sambamba
10257 (package
10258 (name "sambamba")
10259 (version "0.6.5")
10260 (source
10261 (origin
10262 (method url-fetch)
10263 (uri (string-append "https://github.com/lomereiter/sambamba/"
10264 "archive/v" version ".tar.gz"))
10265 (file-name (string-append name "-" version ".tar.gz"))
10266 (sha256
10267 (base32
10268 "17076gijd65a3f07zns2gvbgahiz5lriwsa6dq353ss3jl85d8vy"))))
10269 (build-system gnu-build-system)
10270 (arguments
10271 `(#:tests? #f ; there is no test target
10272 #:make-flags
10273 '("D_COMPILER=ldc2"
10274 ;; Override "--compiler" flag only.
10275 "D_FLAGS=--compiler=ldc2 -IBioD -g -d"
10276 "sambamba-ldmd2-64")
10277 #:phases
10278 (modify-phases %standard-phases
10279 (delete 'configure)
10280 (add-after 'unpack 'place-biod
10281 (lambda* (#:key inputs #:allow-other-keys)
10282 (copy-recursively (assoc-ref inputs "biod") "BioD")
10283 #t))
10284 (add-after 'unpack 'unbundle-prerequisites
10285 (lambda _
10286 (substitute* "Makefile"
10287 ((" htslib-static lz4-static") ""))
10288 #t))
10289 (replace 'install
10290 (lambda* (#:key outputs #:allow-other-keys)
10291 (let* ((out (assoc-ref outputs "out"))
10292 (bin (string-append out "/bin")))
10293 (mkdir-p bin)
10294 (install-file "build/sambamba" bin)
10295 #t))))))
10296 (native-inputs
10297 `(("ldc" ,ldc)
10298 ("rdmd" ,rdmd)
10299 ("biod"
10300 ,(let ((commit "1248586b54af4bd4dfb28ebfebfc6bf012e7a587"))
10301 (origin
10302 (method git-fetch)
10303 (uri (git-reference
10304 (url "https://github.com/biod/BioD.git")
10305 (commit commit)))
10306 (file-name (string-append "biod-"
10307 (string-take commit 9)
10308 "-checkout"))
10309 (sha256
10310 (base32
10311 "1m8hi1n7x0ri4l6s9i0x6jg4z4v94xrfdzp7mbizdipfag0m17g3")))))))
10312 (inputs
10313 `(("lz4" ,lz4)
10314 ("htslib" ,htslib-for-sambamba)))
10315 (home-page "http://lomereiter.github.io/sambamba")
10316 (synopsis "Tools for working with SAM/BAM data")
10317 (description "Sambamba is a high performance modern robust and
10318 fast tool (and library), written in the D programming language, for
10319 working with SAM and BAM files. Current parallelised functionality is
10320 an important subset of samtools functionality, including view, index,
10321 sort, markdup, and depth.")
10322 (license license:gpl2+)))
10323
10324 (define-public ritornello
10325 (package
10326 (name "ritornello")
10327 (version "1.0.0")
10328 (source (origin
10329 (method url-fetch)
10330 (uri (string-append "https://github.com/KlugerLab/"
10331 "Ritornello/archive/v"
10332 version ".tar.gz"))
10333 (file-name (string-append name "-" version ".tar.gz"))
10334 (sha256
10335 (base32
10336 "02nik86gq9ljjriv6pamwlmqnfky3ads1fpklx6mc3hx6k40pg38"))))
10337 (build-system gnu-build-system)
10338 (arguments
10339 `(#:tests? #f ; there are no tests
10340 #:phases
10341 (modify-phases %standard-phases
10342 (add-after 'unpack 'patch-samtools-references
10343 (lambda* (#:key inputs #:allow-other-keys)
10344 (substitute* '("src/SamStream.h"
10345 "src/BufferedGenomeReader.h")
10346 (("<sam.h>") "<samtools/sam.h>"))
10347 #t))
10348 (delete 'configure)
10349 (replace 'install
10350 (lambda* (#:key inputs outputs #:allow-other-keys)
10351 (let* ((out (assoc-ref outputs "out"))
10352 (bin (string-append out "/bin/")))
10353 (mkdir-p bin)
10354 (install-file "bin/Ritornello" bin)
10355 #t))))))
10356 (inputs
10357 `(("samtools" ,samtools-0.1)
10358 ("fftw" ,fftw)
10359 ("boost" ,boost)
10360 ("zlib" ,zlib)))
10361 (home-page "https://github.com/KlugerLab/Ritornello")
10362 (synopsis "Control-free peak caller for ChIP-seq data")
10363 (description "Ritornello is a ChIP-seq peak calling algorithm based on
10364 signal processing that can accurately call binding events without the need to
10365 do a pair total DNA input or IgG control sample. It has been tested for use
10366 with narrow binding events such as transcription factor ChIP-seq.")
10367 (license license:gpl3+)))
10368
10369 (define-public trim-galore
10370 (package
10371 (name "trim-galore")
10372 (version "0.4.2")
10373 (source
10374 (origin
10375 (method url-fetch)
10376 (uri (string-append "http://www.bioinformatics.babraham.ac.uk/"
10377 "projects/trim_galore/trim_galore_v"
10378 version ".zip"))
10379 (sha256
10380 (base32
10381 "0b9qdxi4521gsrjvbhgky8g7kry9b5nx3byzaxkgxz7p4k8bn1mn"))))
10382 (build-system gnu-build-system)
10383 (arguments
10384 `(#:tests? #f ; no tests
10385 #:phases
10386 (modify-phases %standard-phases
10387 ;; The archive contains plain files.
10388 (replace 'unpack
10389 (lambda* (#:key source #:allow-other-keys)
10390 (zero? (system* "unzip" source))))
10391 (delete 'configure)
10392 (delete 'build)
10393 (add-after 'unpack 'hardcode-tool-references
10394 (lambda* (#:key inputs #:allow-other-keys)
10395 (substitute* "trim_galore"
10396 (("\\$path_to_cutadapt = 'cutadapt'")
10397 (string-append "$path_to_cutadapt = '"
10398 (assoc-ref inputs "cutadapt")
10399 "/bin/cutadapt'"))
10400 (("\\| gzip")
10401 (string-append "| "
10402 (assoc-ref inputs "gzip")
10403 "/bin/gzip"))
10404 (("\"gunzip")
10405 (string-append "\""
10406 (assoc-ref inputs "gzip")
10407 "/bin/gunzip")))
10408 #t))
10409 (replace 'install
10410 (lambda* (#:key outputs #:allow-other-keys)
10411 (let ((bin (string-append (assoc-ref outputs "out")
10412 "/bin")))
10413 (mkdir-p bin)
10414 (install-file "trim_galore" bin)
10415 #t))))))
10416 (inputs
10417 `(("gzip" ,gzip)
10418 ("perl" ,perl)
10419 ("cutadapt" ,cutadapt)))
10420 (native-inputs
10421 `(("unzip" ,unzip)))
10422 (home-page "http://www.bioinformatics.babraham.ac.uk/projects/trim_galore/")
10423 (synopsis "Wrapper around Cutadapt and FastQC")
10424 (description "Trim Galore! is a wrapper script to automate quality and
10425 adapter trimming as well as quality control, with some added functionality to
10426 remove biased methylation positions for RRBS sequence files.")
10427 (license license:gpl3+)))
10428
10429 (define-public gess
10430 (package
10431 (name "gess")
10432 (version "1.0")
10433 (source (origin
10434 (method url-fetch)
10435 (uri (string-append "http://compbio.uthscsa.edu/"
10436 "GESS_Web/files/"
10437 "gess-" version ".src.tar.gz"))
10438 (sha256
10439 (base32
10440 "0hyk403kxscclzfs24pvdgiv0wm03kjcziqdrp5w46cb049gz0d7"))))
10441 (build-system gnu-build-system)
10442 (arguments
10443 `(#:tests? #f ; no tests
10444 #:phases
10445 (modify-phases %standard-phases
10446 (delete 'configure)
10447 (delete 'build)
10448 (replace 'install
10449 (lambda* (#:key inputs outputs #:allow-other-keys)
10450 (let* ((python (assoc-ref inputs "python"))
10451 (out (assoc-ref outputs "out"))
10452 (bin (string-append out "/bin/"))
10453 (target (string-append
10454 out "/lib/python2.7/site-packages/gess/")))
10455 (mkdir-p target)
10456 (copy-recursively "." target)
10457 ;; Make GESS.py executable
10458 (chmod (string-append target "GESS.py") #o555)
10459 ;; Add Python shebang to the top and make Matplotlib
10460 ;; usable.
10461 (substitute* (string-append target "GESS.py")
10462 (("\"\"\"Description:" line)
10463 (string-append "#!" (which "python") "
10464 import matplotlib
10465 matplotlib.use('Agg')
10466 " line)))
10467 ;; Make sure GESS has all modules in its path
10468 (wrap-program (string-append target "GESS.py")
10469 `("PYTHONPATH" ":" prefix (,target ,(getenv "PYTHONPATH"))))
10470 (mkdir-p bin)
10471 (symlink (string-append target "GESS.py")
10472 (string-append bin "GESS.py"))
10473 #t))))))
10474 (inputs
10475 `(("python" ,python-2)
10476 ("python2-pysam" ,python2-pysam)
10477 ("python2-scipy" ,python2-scipy)
10478 ("python2-numpy" ,python2-numpy)
10479 ("python2-networkx" ,python2-networkx)
10480 ("python2-biopython" ,python2-biopython)))
10481 (home-page "http://compbio.uthscsa.edu/GESS_Web/")
10482 (synopsis "Detect exon-skipping events from raw RNA-seq data")
10483 (description
10484 "GESS is an implementation of a novel computational method to detect de
10485 novo exon-skipping events directly from raw RNA-seq data without the prior
10486 knowledge of gene annotation information. GESS stands for the graph-based
10487 exon-skipping scanner detection scheme.")
10488 (license license:bsd-3)))
10489
10490 (define-public phylip
10491 (package
10492 (name "phylip")
10493 (version "3.696")
10494 (source
10495 (origin
10496 (method url-fetch)
10497 (uri (string-append "http://evolution.gs.washington.edu/phylip/"
10498 "download/phylip-" version ".tar.gz"))
10499 (sha256
10500 (base32
10501 "01jar1rayhr2gba2pgbw49m56rc5z4p5wn3ds0m188hrlln4a2nd"))))
10502 (build-system gnu-build-system)
10503 (arguments
10504 `(#:tests? #f ; no check target
10505 #:make-flags (list "-f" "Makefile.unx" "install")
10506 #:parallel-build? #f ; not supported
10507 #:phases
10508 (modify-phases %standard-phases
10509 (add-after 'unpack 'enter-dir
10510 (lambda _ (chdir "src") #t))
10511 (delete 'configure)
10512 (replace 'install
10513 (lambda* (#:key inputs outputs #:allow-other-keys)
10514 (let ((target (string-append (assoc-ref outputs "out")
10515 "/bin")))
10516 (mkdir-p target)
10517 (for-each (lambda (file)
10518 (install-file file target))
10519 (find-files "../exe" ".*")))
10520 #t)))))
10521 (home-page "http://evolution.genetics.washington.edu/phylip/")
10522 (synopsis "Tools for inferring phylogenies")
10523 (description "PHYLIP (the PHYLogeny Inference Package) is a package of
10524 programs for inferring phylogenies (evolutionary trees).")
10525 (license license:bsd-2)))
10526
10527 (define-public imp
10528 (package
10529 (name "imp")
10530 (version "2.6.2")
10531 (source
10532 (origin
10533 (method url-fetch)
10534 (uri (string-append "https://integrativemodeling.org/"
10535 version "/download/imp-" version ".tar.gz"))
10536 (sha256
10537 (base32
10538 "0lxqx7vh79d771svr611dkilp6sn30qrbw8zvscbrm37v38d2j6h"))))
10539 (build-system cmake-build-system)
10540 (arguments
10541 `(;; FIXME: Some tests fail because they produce warnings, others fail
10542 ;; because the PYTHONPATH does not include the modeller's directory.
10543 #:tests? #f
10544 ;; Do not place libraries in an architecture-specific directory.
10545 #:configure-flags
10546 (list "-DCMAKE_INSTALL_LIBDIR=lib")))
10547 (inputs
10548 `(("boost" ,boost)
10549 ("gsl" ,gsl)
10550 ("swig" ,swig)
10551 ("hdf5" ,hdf5)
10552 ("fftw" ,fftw)
10553 ("python" ,python-2)))
10554 (propagated-inputs
10555 `(("python2-numpy" ,python2-numpy)
10556 ("python2-scipy" ,python2-scipy)
10557 ("python2-pandas" ,python2-pandas)
10558 ("python2-scikit-learn" ,python2-scikit-learn)
10559 ("python2-networkx" ,python2-networkx)))
10560 (home-page "https://integrativemodeling.org")
10561 (synopsis "Integrative modeling platform")
10562 (description "IMP's broad goal is to contribute to a comprehensive
10563 structural characterization of biomolecules ranging in size and complexity
10564 from small peptides to large macromolecular assemblies, by integrating data
10565 from diverse biochemical and biophysical experiments. IMP provides a C++ and
10566 Python toolbox for solving complex modeling problems, and a number of
10567 applications for tackling some common problems in a user-friendly way.")
10568 ;; IMP is largely available under the GNU Lesser GPL; see the file
10569 ;; COPYING.LGPL for the full text of this license. Some IMP modules are
10570 ;; available under the GNU GPL (see the file COPYING.GPL).
10571 (license (list license:lgpl2.1+
10572 license:gpl3+))))
10573
10574 (define-public tadbit
10575 (package
10576 (name "tadbit")
10577 (version "0.2")
10578 (source (origin
10579 (method url-fetch)
10580 (uri (string-append "https://github.com/3DGenomes/TADbit/"
10581 "archive/v" version ".tar.gz"))
10582 (file-name (string-append name "-" version ".tar.gz"))
10583 (sha256
10584 (base32
10585 "1cnfqrl4685zar4nnw94j94nhvl2h29jm448nadqi1h05z6fdk4f"))))
10586 (build-system python-build-system)
10587 (arguments
10588 `(;; Tests are included and must be run after installation, but
10589 ;; they are incomplete and thus cannot be run.
10590 #:tests? #f
10591 #:python ,python-2
10592 #:phases
10593 (modify-phases %standard-phases
10594 (add-after 'unpack 'fix-problems-with-setup.py
10595 (lambda* (#:key outputs #:allow-other-keys)
10596 ;; setup.py opens these files for writing
10597 (chmod "_pytadbit/_version.py" #o664)
10598 (chmod "README.rst" #o664)
10599
10600 ;; Don't attempt to install the bash completions to
10601 ;; the home directory.
10602 (rename-file "extras/.bash_completion"
10603 "extras/tadbit")
10604 (substitute* "setup.py"
10605 (("\\(path.expanduser\\('~'\\)")
10606 (string-append "(\""
10607 (assoc-ref outputs "out")
10608 "/etc/bash_completion.d\""))
10609 (("extras/\\.bash_completion")
10610 "extras/tadbit"))
10611 #t)))))
10612 (inputs
10613 ;; TODO: add Chimera for visualization
10614 `(("imp" ,imp)
10615 ("mcl" ,mcl)
10616 ("python2-scipy" ,python2-scipy)
10617 ("python2-numpy" ,python2-numpy)
10618 ("python2-matplotlib" ,python2-matplotlib)
10619 ("python2-pysam" ,python2-pysam)))
10620 (home-page "http://3dgenomes.github.io/TADbit/")
10621 (synopsis "Analyze, model, and explore 3C-based data")
10622 (description
10623 "TADbit is a complete Python library to deal with all steps to analyze,
10624 model, and explore 3C-based data. With TADbit the user can map FASTQ files to
10625 obtain raw interaction binned matrices (Hi-C like matrices), normalize and
10626 correct interaction matrices, identify and compare the so-called
10627 @dfn{Topologically Associating Domains} (TADs), build 3D models from the
10628 interaction matrices, and finally, extract structural properties from the
10629 models. TADbit is complemented by TADkit for visualizing 3D models.")
10630 (license license:gpl3+)))
10631
10632 (define-public kentutils
10633 (package
10634 (name "kentutils")
10635 ;; 302.1.0 is out, but the only difference is the inclusion of
10636 ;; pre-built binaries.
10637 (version "302.0.0")
10638 (source
10639 (origin
10640 (method url-fetch)
10641 (uri (string-append "https://github.com/ENCODE-DCC/kentUtils/"
10642 "archive/v" version ".tar.gz"))
10643 (file-name (string-append name "-" version ".tar.gz"))
10644 (sha256
10645 (base32
10646 "134aja3k1cj32kbk1nnw0q9gxjb2krr15q6sga8qldzvc0585rmm"))
10647 (modules '((guix build utils)
10648 (srfi srfi-26)
10649 (ice-9 ftw)))
10650 (snippet
10651 '(begin
10652 ;; Only the contents of the specified directories are free
10653 ;; for all uses, so we remove the rest. "hg/autoSql" and
10654 ;; "hg/autoXml" are nominally free, but they depend on a
10655 ;; library that is built from the sources in "hg/lib",
10656 ;; which is nonfree.
10657 (let ((free (list "." ".."
10658 "utils" "lib" "inc" "tagStorm"
10659 "parasol" "htslib"))
10660 (directory? (lambda (file)
10661 (eq? 'directory (stat:type (stat file))))))
10662 (for-each (lambda (file)
10663 (and (directory? file)
10664 (delete-file-recursively file)))
10665 (map (cut string-append "src/" <>)
10666 (scandir "src"
10667 (lambda (file)
10668 (not (member file free)))))))
10669 ;; Only make the utils target, not the userApps target,
10670 ;; because that requires libraries we won't build.
10671 (substitute* "Makefile"
10672 ((" userApps") " utils"))
10673 ;; Only build libraries that are free.
10674 (substitute* "src/makefile"
10675 (("DIRS =.*") "DIRS =\n")
10676 (("cd jkOwnLib.*") "")
10677 ((" hgLib") "")
10678 (("cd hg.*") ""))
10679 (substitute* "src/utils/makefile"
10680 ;; These tools depend on "jkhgap.a", which is part of the
10681 ;; nonfree "src/hg/lib" directory.
10682 (("raSqlQuery") "")
10683 (("pslLiftSubrangeBlat") "")
10684
10685 ;; Do not build UCSC tools, which may require nonfree
10686 ;; components.
10687 (("ALL_APPS =.*") "ALL_APPS = $(UTILS_APPLIST)\n"))
10688 #t))))
10689 (build-system gnu-build-system)
10690 (arguments
10691 `( ;; There is no global test target and the test target for
10692 ;; individual tools depends on input files that are not
10693 ;; included.
10694 #:tests? #f
10695 #:phases
10696 (modify-phases %standard-phases
10697 (add-after 'unpack 'fix-paths
10698 (lambda _
10699 (substitute* "Makefile"
10700 (("/bin/echo") (which "echo")))
10701 #t))
10702 (add-after 'unpack 'prepare-samtabix
10703 (lambda* (#:key inputs #:allow-other-keys)
10704 (copy-recursively (assoc-ref inputs "samtabix")
10705 "samtabix")
10706 #t))
10707 (delete 'configure)
10708 (replace 'install
10709 (lambda* (#:key outputs #:allow-other-keys)
10710 (let ((bin (string-append (assoc-ref outputs "out")
10711 "/bin")))
10712 (copy-recursively "bin" bin))
10713 #t)))))
10714 (native-inputs
10715 `(("samtabix"
10716 ,(origin
10717 (method git-fetch)
10718 (uri (git-reference
10719 (url "http://genome-source.cse.ucsc.edu/samtabix.git")
10720 (commit "10fd107909c1ac4d679299908be4262a012965ba")))
10721 (sha256
10722 (base32
10723 "0c1nj64l42v395sa84n7az43xiap4i6f9n9dfz4058aqiwkhkmma"))))))
10724 (inputs
10725 `(("zlib" ,zlib)
10726 ("tcsh" ,tcsh)
10727 ("perl" ,perl)
10728 ("libpng" ,libpng)
10729 ("mysql" ,mysql)
10730 ("openssl" ,openssl)))
10731 (home-page "http://genome.cse.ucsc.edu/index.html")
10732 (synopsis "Assorted bioinformatics utilities")
10733 (description "This package provides the kentUtils, a selection of
10734 bioinformatics utilities used in combination with the UCSC genome
10735 browser.")
10736 ;; Only a subset of the sources are released under a non-copyleft
10737 ;; free software license. All other sources are removed in a
10738 ;; snippet. See this bug report for an explanation of how the
10739 ;; license statements apply:
10740 ;; https://github.com/ENCODE-DCC/kentUtils/issues/12
10741 (license (license:non-copyleft
10742 "http://genome.ucsc.edu/license/"
10743 "The contents of this package are free for all uses."))))
10744
10745 (define-public f-seq
10746 (let ((commit "6ccded34cff38cf432deed8503648b4a66953f9b")
10747 (revision "1"))
10748 (package
10749 (name "f-seq")
10750 (version (string-append "1.1-" revision "." (string-take commit 7)))
10751 (source (origin
10752 (method git-fetch)
10753 (uri (git-reference
10754 (url "https://github.com/aboyle/F-seq.git")
10755 (commit commit)))
10756 (file-name (string-append name "-" version))
10757 (sha256
10758 (base32
10759 "1nk33k0yajg2id4g59bc4szr58r2q6pdq42vgcw054m8ip9wv26h"))
10760 (modules '((guix build utils)))
10761 ;; Remove bundled Java library archives.
10762 (snippet
10763 '(begin
10764 (for-each delete-file (find-files "lib" ".*"))
10765 #t))))
10766 (build-system ant-build-system)
10767 (arguments
10768 `(#:tests? #f ; no tests included
10769 #:phases
10770 (modify-phases %standard-phases
10771 (replace 'install
10772 (lambda* (#:key inputs outputs #:allow-other-keys)
10773 (let* ((target (assoc-ref outputs "out"))
10774 (doc (string-append target "/share/doc/f-seq/")))
10775 (mkdir-p target)
10776 (mkdir-p doc)
10777 (substitute* "bin/linux/fseq"
10778 (("java") (which "java"))
10779 (("\\$REALDIR/../lib/commons-cli-1.1.jar")
10780 (string-append (assoc-ref inputs "java-commons-cli")
10781 "/share/java/commons-cli.jar"))
10782 (("REALDIR=.*")
10783 (string-append "REALDIR=" target "/bin\n")))
10784 (install-file "README.txt" doc)
10785 (install-file "bin/linux/fseq" (string-append target "/bin"))
10786 (install-file "build~/fseq.jar" (string-append target "/lib"))
10787 (copy-recursively "lib" (string-append target "/lib"))
10788 #t))))))
10789 (inputs
10790 `(("perl" ,perl)
10791 ("java-commons-cli" ,java-commons-cli)))
10792 (home-page "http://fureylab.web.unc.edu/software/fseq/")
10793 (synopsis "Feature density estimator for high-throughput sequence tags")
10794 (description
10795 "F-Seq is a software package that generates a continuous tag sequence
10796 density estimation allowing identification of biologically meaningful sites
10797 such as transcription factor binding sites (ChIP-seq) or regions of open
10798 chromatin (DNase-seq). Output can be displayed directly in the UCSC Genome
10799 Browser.")
10800 (license license:gpl3+))))
10801
10802 (define-public bismark
10803 (package
10804 (name "bismark")
10805 (version "0.16.3")
10806 (source
10807 (origin
10808 (method url-fetch)
10809 (uri (string-append "https://github.com/FelixKrueger/Bismark/"
10810 "archive/" version ".tar.gz"))
10811 (file-name (string-append name "-" version ".tar.gz"))
10812 (sha256
10813 (base32
10814 "1204i0pa02ll2jn5pnxypkclnskvv7a2nwh5nxhagmhxk9wfv9sq"))))
10815 (build-system perl-build-system)
10816 (arguments
10817 `(#:tests? #f ; there are no tests
10818 #:phases
10819 (modify-phases %standard-phases
10820 (delete 'configure)
10821 (delete 'build)
10822 (replace 'install
10823 (lambda* (#:key outputs #:allow-other-keys)
10824 (let ((bin (string-append (assoc-ref outputs "out")
10825 "/bin"))
10826 (docdir (string-append (assoc-ref outputs "out")
10827 "/share/doc/bismark"))
10828 (docs '("Bismark_User_Guide.pdf"
10829 "RELEASE_NOTES.txt"))
10830 (scripts '("bismark"
10831 "bismark_genome_preparation"
10832 "bismark_methylation_extractor"
10833 "bismark2bedGraph"
10834 "bismark2report"
10835 "coverage2cytosine"
10836 "deduplicate_bismark"
10837 "bismark_sitrep.tpl"
10838 "bam2nuc"
10839 "bismark2summary")))
10840 (mkdir-p docdir)
10841 (mkdir-p bin)
10842 (for-each (lambda (file) (install-file file bin))
10843 scripts)
10844 (for-each (lambda (file) (install-file file docdir))
10845 docs)
10846 #t))))))
10847 (home-page "http://www.bioinformatics.babraham.ac.uk/projects/bismark/")
10848 (synopsis "Map bisulfite treated sequence reads and analyze methylation")
10849 (description "Bismark is a program to map bisulfite treated sequencing
10850 reads to a genome of interest and perform methylation calls in a single step.
10851 The output can be easily imported into a genome viewer, such as SeqMonk, and
10852 enables a researcher to analyse the methylation levels of their samples
10853 straight away. Its main features are:
10854
10855 @itemize
10856 @item Bisulfite mapping and methylation calling in one single step
10857 @item Supports single-end and paired-end read alignments
10858 @item Supports ungapped and gapped alignments
10859 @item Alignment seed length, number of mismatches etc are adjustable
10860 @item Output discriminates between cytosine methylation in CpG, CHG
10861 and CHH context
10862 @end itemize\n")
10863 (license license:gpl3+)))
10864
10865 (define-public paml
10866 (package
10867 (name "paml")
10868 (version "4.9e")
10869 (source (origin
10870 (method url-fetch)
10871 (uri (string-append "http://abacus.gene.ucl.ac.uk/software/"
10872 "paml" version ".tgz"))
10873 (sha256
10874 (base32
10875 "13zf6h9fiqghwhch2h06x1zdr6s42plsnqahflp5g7myr3han3s6"))
10876 (modules '((guix build utils)))
10877 ;; Remove Windows binaries
10878 (snippet
10879 '(begin
10880 (for-each delete-file (find-files "." "\\.exe$"))
10881 #t))))
10882 (build-system gnu-build-system)
10883 (arguments
10884 `(#:tests? #f ; there are no tests
10885 #:make-flags '("CC=gcc")
10886 #:phases
10887 (modify-phases %standard-phases
10888 (replace 'configure
10889 (lambda _
10890 (substitute* "src/BFdriver.c"
10891 (("/bin/bash") (which "bash")))
10892 (chdir "src")
10893 #t))
10894 (replace 'install
10895 (lambda* (#:key outputs #:allow-other-keys)
10896 (let ((tools '("baseml" "basemlg" "codeml"
10897 "pamp" "evolver" "yn00" "chi2"))
10898 (bin (string-append (assoc-ref outputs "out") "/bin"))
10899 (docdir (string-append (assoc-ref outputs "out")
10900 "/share/doc/paml")))
10901 (mkdir-p bin)
10902 (for-each (lambda (file) (install-file file bin)) tools)
10903 (copy-recursively "../doc" docdir)
10904 #t))))))
10905 (home-page "http://abacus.gene.ucl.ac.uk/software/paml.html")
10906 (synopsis "Phylogentic analysis by maximum likelihood")
10907 (description "PAML (for Phylogentic Analysis by Maximum Likelihood)
10908 contains a few programs for model fitting and phylogenetic tree reconstruction
10909 using nucleotide or amino-acid sequence data.")
10910 ;; GPLv3 only
10911 (license license:gpl3)))