Merge branch 'master' into staging
[jackhill/guix/guix.git] / gnu / packages / bioinformatics.scm
1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2014, 2015, 2016, 2017 Ricardo Wurmus <rekado@elephly.net>
3 ;;; Copyright © 2015, 2016, 2017 Ben Woodcroft <donttrustben@gmail.com>
4 ;;; Copyright © 2015, 2016 Pjotr Prins <pjotr.guix@thebird.nl>
5 ;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
6 ;;; Copyright © 2016 Roel Janssen <roel@gnu.org>
7 ;;; Copyright © 2016 Efraim Flashner <efraim@flashner.co.il>
8 ;;; Copyright © 2016 Marius Bakke <mbakke@fastmail.com>
9 ;;; Copyright © 2016 Raoul Bonnal <ilpuccio.febo@gmail.com>
10 ;;;
11 ;;; This file is part of GNU Guix.
12 ;;;
13 ;;; GNU Guix is free software; you can redistribute it and/or modify it
14 ;;; under the terms of the GNU General Public License as published by
15 ;;; the Free Software Foundation; either version 3 of the License, or (at
16 ;;; your option) any later version.
17 ;;;
18 ;;; GNU Guix is distributed in the hope that it will be useful, but
19 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;;; GNU General Public License for more details.
22 ;;;
23 ;;; You should have received a copy of the GNU General Public License
24 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
25
26 (define-module (gnu packages bioinformatics)
27 #:use-module ((guix licenses) #:prefix license:)
28 #:use-module (guix packages)
29 #:use-module (guix utils)
30 #:use-module (guix download)
31 #:use-module (guix git-download)
32 #:use-module (guix hg-download)
33 #:use-module (guix build-system ant)
34 #:use-module (guix build-system gnu)
35 #:use-module (guix build-system cmake)
36 #:use-module (guix build-system perl)
37 #:use-module (guix build-system python)
38 #:use-module (guix build-system r)
39 #:use-module (guix build-system ruby)
40 #:use-module (guix build-system trivial)
41 #:use-module (gnu packages)
42 #:use-module (gnu packages autotools)
43 #:use-module (gnu packages algebra)
44 #:use-module (gnu packages base)
45 #:use-module (gnu packages bash)
46 #:use-module (gnu packages bison)
47 #:use-module (gnu packages boost)
48 #:use-module (gnu packages compression)
49 #:use-module (gnu packages cpio)
50 #:use-module (gnu packages curl)
51 #:use-module (gnu packages documentation)
52 #:use-module (gnu packages datastructures)
53 #:use-module (gnu packages file)
54 #:use-module (gnu packages flex)
55 #:use-module (gnu packages gawk)
56 #:use-module (gnu packages gcc)
57 #:use-module (gnu packages gd)
58 #:use-module (gnu packages gtk)
59 #:use-module (gnu packages glib)
60 #:use-module (gnu packages groff)
61 #:use-module (gnu packages guile)
62 #:use-module (gnu packages haskell)
63 #:use-module (gnu packages image)
64 #:use-module (gnu packages imagemagick)
65 #:use-module (gnu packages java)
66 #:use-module (gnu packages ldc)
67 #:use-module (gnu packages linux)
68 #:use-module (gnu packages logging)
69 #:use-module (gnu packages machine-learning)
70 #:use-module (gnu packages man)
71 #:use-module (gnu packages maths)
72 #:use-module (gnu packages mpi)
73 #:use-module (gnu packages ncurses)
74 #:use-module (gnu packages pcre)
75 #:use-module (gnu packages parallel)
76 #:use-module (gnu packages pdf)
77 #:use-module (gnu packages perl)
78 #:use-module (gnu packages pkg-config)
79 #:use-module (gnu packages popt)
80 #:use-module (gnu packages protobuf)
81 #:use-module (gnu packages python)
82 #:use-module (gnu packages readline)
83 #:use-module (gnu packages ruby)
84 #:use-module (gnu packages serialization)
85 #:use-module (gnu packages statistics)
86 #:use-module (gnu packages tbb)
87 #:use-module (gnu packages tex)
88 #:use-module (gnu packages texinfo)
89 #:use-module (gnu packages textutils)
90 #:use-module (gnu packages time)
91 #:use-module (gnu packages tls)
92 #:use-module (gnu packages vim)
93 #:use-module (gnu packages web)
94 #:use-module (gnu packages xml)
95 #:use-module (gnu packages xorg)
96 #:use-module (gnu packages zip)
97 #:use-module (srfi srfi-1))
98
99 (define-public r-ape
100 (package
101 (name "r-ape")
102 (version "4.1")
103 (source
104 (origin
105 (method url-fetch)
106 (uri (cran-uri "ape" version))
107 (sha256
108 (base32
109 "0959fiiy11rzfzrzaknmgrx64bhszj02l0ycz79k5a6bmpfzanlk"))))
110 (build-system r-build-system)
111 (propagated-inputs
112 `(("r-lattice" ,r-lattice)
113 ("r-nlme" ,r-nlme)))
114 (home-page "http://ape-package.ird.fr/")
115 (synopsis "Analyses of phylogenetics and evolution")
116 (description
117 "This package provides functions for reading, writing, plotting, and
118 manipulating phylogenetic trees, analyses of comparative data in a
119 phylogenetic framework, ancestral character analyses, analyses of
120 diversification and macroevolution, computing distances from DNA sequences,
121 and several other tools.")
122 (license license:gpl2+)))
123
124 (define-public aragorn
125 (package
126 (name "aragorn")
127 (version "1.2.38")
128 (source (origin
129 (method url-fetch)
130 (uri (string-append
131 "http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
132 version ".tgz"))
133 (sha256
134 (base32
135 "09i1rg716smlbnixfm7q1ml2mfpaa2fpn3hwjg625ysmfwwy712b"))))
136 (build-system gnu-build-system)
137 (arguments
138 `(#:tests? #f ; there are no tests
139 #:phases
140 (modify-phases %standard-phases
141 (delete 'configure)
142 (replace 'build
143 (lambda _
144 (zero? (system* "gcc"
145 "-O3"
146 "-ffast-math"
147 "-finline-functions"
148 "-o"
149 "aragorn"
150 (string-append "aragorn" ,version ".c")))))
151 (replace 'install
152 (lambda* (#:key outputs #:allow-other-keys)
153 (let* ((out (assoc-ref outputs "out"))
154 (bin (string-append out "/bin"))
155 (man (string-append out "/share/man/man1")))
156 (mkdir-p bin)
157 (install-file "aragorn" bin)
158 (mkdir-p man)
159 (install-file "aragorn.1" man))
160 #t)))))
161 (home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
162 (synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
163 (description
164 "Aragorn identifies transfer RNA, mitochondrial RNA and
165 transfer-messenger RNA from nucleotide sequences, based on homology to known
166 tRNA consensus sequences and RNA structure. It also outputs the secondary
167 structure of the predicted RNA.")
168 (license license:gpl2)))
169
170 (define-public bamm
171 (package
172 (name "bamm")
173 (version "1.7.3")
174 (source (origin
175 (method url-fetch)
176 ;; BamM is not available on pypi.
177 (uri (string-append
178 "https://github.com/Ecogenomics/BamM/archive/"
179 version ".tar.gz"))
180 (file-name (string-append name "-" version ".tar.gz"))
181 (sha256
182 (base32
183 "1f35yxp4pc8aadsvbpg6r4kg2jh4fkjci0iby4iyljm6980sac0s"))
184 (modules '((guix build utils)))
185 (snippet
186 `(begin
187 ;; Delete bundled htslib.
188 (delete-file-recursively "c/htslib-1.3.1")
189 #t))))
190 (build-system python-build-system)
191 (arguments
192 `(#:python ,python-2 ; BamM is Python 2 only.
193 ;; Do not use bundled libhts. Do use the bundled libcfu because it has
194 ;; been modified from its original form.
195 #:configure-flags
196 (let ((htslib (assoc-ref %build-inputs "htslib")))
197 (list "--with-libhts-lib" (string-append htslib "/lib")
198 "--with-libhts-inc" (string-append htslib "/include/htslib")))
199 #:phases
200 (modify-phases %standard-phases
201 (add-after 'unpack 'autogen
202 (lambda _
203 (with-directory-excursion "c"
204 (let ((sh (which "sh")))
205 ;; Use autogen so that 'configure' works.
206 (substitute* "autogen.sh" (("/bin/sh") sh))
207 (setenv "CONFIG_SHELL" sh)
208 (substitute* "configure" (("/bin/sh") sh))
209 (zero? (system* "./autogen.sh"))))))
210 (delete 'build)
211 ;; Run tests after installation so compilation only happens once.
212 (delete 'check)
213 (add-after 'install 'wrap-executable
214 (lambda* (#:key outputs #:allow-other-keys)
215 (let* ((out (assoc-ref outputs "out"))
216 (path (getenv "PATH")))
217 (wrap-program (string-append out "/bin/bamm")
218 `("PATH" ":" prefix (,path))))
219 #t))
220 (add-after 'wrap-executable 'post-install-check
221 (lambda* (#:key inputs outputs #:allow-other-keys)
222 (setenv "PATH"
223 (string-append (assoc-ref outputs "out")
224 "/bin:"
225 (getenv "PATH")))
226 (setenv "PYTHONPATH"
227 (string-append
228 (assoc-ref outputs "out")
229 "/lib/python"
230 (string-take (string-take-right
231 (assoc-ref inputs "python") 5) 3)
232 "/site-packages:"
233 (getenv "PYTHONPATH")))
234 ;; There are 2 errors printed, but they are safe to ignore:
235 ;; 1) [E::hts_open_format] fail to open file ...
236 ;; 2) samtools view: failed to open ...
237 (zero? (system* "nosetests")))))))
238 (native-inputs
239 `(("autoconf" ,autoconf)
240 ("automake" ,automake)
241 ("libtool" ,libtool)
242 ("zlib" ,zlib)
243 ("python-nose" ,python2-nose)
244 ("python-pysam" ,python2-pysam)))
245 (inputs
246 `(("htslib" ,htslib)
247 ("samtools" ,samtools)
248 ("bwa" ,bwa)
249 ("grep" ,grep)
250 ("sed" ,sed)
251 ("coreutils" ,coreutils)))
252 (propagated-inputs
253 `(("python-numpy" ,python2-numpy)))
254 (home-page "http://ecogenomics.github.io/BamM/")
255 (synopsis "Metagenomics-focused BAM file manipulator")
256 (description
257 "BamM is a C library, wrapped in python, to efficiently generate and
258 parse BAM files, specifically for the analysis of metagenomic data. For
259 instance, it implements several methods to assess contig-wise read coverage.")
260 (license license:lgpl3+)))
261
262 (define-public bamtools
263 (package
264 (name "bamtools")
265 (version "2.4.1")
266 (source (origin
267 (method url-fetch)
268 (uri (string-append
269 "https://github.com/pezmaster31/bamtools/archive/v"
270 version ".tar.gz"))
271 (file-name (string-append name "-" version ".tar.gz"))
272 (sha256
273 (base32
274 "0jr024kcrhjb82cm69i7p5fcg5375zlc1h3qh2n1v368hcd0qflk"))))
275 (build-system cmake-build-system)
276 (arguments
277 `(#:tests? #f ;no "check" target
278 #:phases
279 (modify-phases %standard-phases
280 (add-before
281 'configure 'set-ldflags
282 (lambda* (#:key outputs #:allow-other-keys)
283 (setenv "LDFLAGS"
284 (string-append
285 "-Wl,-rpath="
286 (assoc-ref outputs "out") "/lib/bamtools")))))))
287 (inputs `(("zlib" ,zlib)))
288 (home-page "https://github.com/pezmaster31/bamtools")
289 (synopsis "C++ API and command-line toolkit for working with BAM data")
290 (description
291 "BamTools provides both a C++ API and a command-line toolkit for handling
292 BAM files.")
293 (license license:expat)))
294
295 (define-public bcftools
296 (package
297 (name "bcftools")
298 (version "1.3.1")
299 (source (origin
300 (method url-fetch)
301 (uri (string-append
302 "https://github.com/samtools/bcftools/releases/download/"
303 version "/bcftools-" version ".tar.bz2"))
304 (sha256
305 (base32
306 "095ry68vmz9q5s1scjsa698dhgyvgw5aicz24c19iwfbai07mhqj"))
307 (modules '((guix build utils)))
308 (snippet
309 ;; Delete bundled htslib.
310 '(delete-file-recursively "htslib-1.3.1"))))
311 (build-system gnu-build-system)
312 (arguments
313 `(#:test-target "test"
314 #:make-flags
315 (list
316 "USE_GPL=1"
317 (string-append "prefix=" (assoc-ref %outputs "out"))
318 (string-append "HTSDIR=" (assoc-ref %build-inputs "htslib") "/include")
319 (string-append "HTSLIB=" (assoc-ref %build-inputs "htslib") "/lib/libhts.a")
320 (string-append "BGZIP=" (assoc-ref %build-inputs "htslib") "/bin/bgzip")
321 (string-append "TABIX=" (assoc-ref %build-inputs "htslib") "/bin/tabix"))
322 #:phases
323 (modify-phases %standard-phases
324 (add-after 'unpack 'patch-Makefile
325 (lambda _
326 (substitute* "Makefile"
327 ;; Do not attempt to build htslib.
328 (("^include \\$\\(HTSDIR\\)/htslib\\.mk") "")
329 ;; Link against GSL cblas.
330 (("-lcblas") "-lgslcblas"))
331 #t))
332 (delete 'configure)
333 (add-before 'check 'patch-tests
334 (lambda _
335 (substitute* "test/test.pl"
336 (("/bin/bash") (which "bash")))
337 #t)))))
338 (native-inputs
339 `(("htslib" ,htslib)
340 ("perl" ,perl)))
341 (inputs
342 `(("gsl" ,gsl)
343 ("zlib" ,zlib)))
344 (home-page "https://samtools.github.io/bcftools/")
345 (synopsis "Utilities for variant calling and manipulating VCFs and BCFs")
346 (description
347 "BCFtools is a set of utilities that manipulate variant calls in the
348 Variant Call Format (VCF) and its binary counterpart BCF. All commands work
349 transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed.")
350 ;; The sources are dual MIT/GPL, but becomes GPL-only when USE_GPL=1.
351 (license (list license:gpl3+ license:expat))))
352
353 (define-public bedops
354 (package
355 (name "bedops")
356 (version "2.4.14")
357 (source (origin
358 (method url-fetch)
359 (uri (string-append "https://github.com/bedops/bedops/archive/v"
360 version ".tar.gz"))
361 (file-name (string-append name "-" version ".tar.gz"))
362 (sha256
363 (base32
364 "1kqbac547wyqma81cyky9n7mkgikjpsfd3nnmcm6hpqwanqgh10v"))))
365 (build-system gnu-build-system)
366 (arguments
367 '(#:tests? #f
368 #:make-flags (list (string-append "BINDIR=" %output "/bin"))
369 #:phases
370 (alist-cons-after
371 'unpack 'unpack-tarballs
372 (lambda _
373 ;; FIXME: Bedops includes tarballs of minimally patched upstream
374 ;; libraries jansson, zlib, and bzip2. We cannot just use stock
375 ;; libraries because at least one of the libraries (zlib) is
376 ;; patched to add a C++ function definition (deflateInit2cpp).
377 ;; Until the Bedops developers offer a way to link against system
378 ;; libraries we have to build the in-tree copies of these three
379 ;; libraries.
380
381 ;; See upstream discussion:
382 ;; https://github.com/bedops/bedops/issues/124
383
384 ;; Unpack the tarballs to benefit from shebang patching.
385 (with-directory-excursion "third-party"
386 (and (zero? (system* "tar" "xvf" "jansson-2.6.tar.bz2"))
387 (zero? (system* "tar" "xvf" "zlib-1.2.7.tar.bz2"))
388 (zero? (system* "tar" "xvf" "bzip2-1.0.6.tar.bz2"))))
389 ;; Disable unpacking of tarballs in Makefile.
390 (substitute* "system.mk/Makefile.linux"
391 (("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
392 (("\\./configure") "CONFIG_SHELL=bash ./configure"))
393 (substitute* "third-party/zlib-1.2.7/Makefile.in"
394 (("^SHELL=.*$") "SHELL=bash\n")))
395 (alist-delete 'configure %standard-phases))))
396 (home-page "https://github.com/bedops/bedops")
397 (synopsis "Tools for high-performance genomic feature operations")
398 (description
399 "BEDOPS is a suite of tools to address common questions raised in genomic
400 studies---mostly with regard to overlap and proximity relationships between
401 data sets. It aims to be scalable and flexible, facilitating the efficient
402 and accurate analysis and management of large-scale genomic data.
403
404 BEDOPS provides tools that perform highly efficient and scalable Boolean and
405 other set operations, statistical calculations, archiving, conversion and
406 other management of genomic data of arbitrary scale. Tasks can be easily
407 split by chromosome for distributing whole-genome analyses across a
408 computational cluster.")
409 (license license:gpl2+)))
410
411 (define-public bedtools
412 (package
413 (name "bedtools")
414 (version "2.26.0")
415 (source (origin
416 (method url-fetch)
417 (uri (string-append "https://github.com/arq5x/bedtools2/archive/v"
418 version ".tar.gz"))
419 (file-name (string-append name "-" version ".tar.gz"))
420 (sha256
421 (base32
422 "0xvri5hnp2iim1cx6mcd5d9f102p5ql41x69rd6106x1c17pinqm"))))
423 (build-system gnu-build-system)
424 (native-inputs `(("python" ,python-2)))
425 (inputs `(("samtools" ,samtools)
426 ("zlib" ,zlib)))
427 (arguments
428 '(#:test-target "test"
429 #:phases
430 (modify-phases %standard-phases
431 (delete 'configure)
432 (replace 'install
433 (lambda* (#:key outputs #:allow-other-keys)
434 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
435 (for-each (lambda (file)
436 (install-file file bin))
437 (find-files "bin" ".*")))
438 #t)))))
439 (home-page "https://github.com/arq5x/bedtools2")
440 (synopsis "Tools for genome analysis and arithmetic")
441 (description
442 "Collectively, the bedtools utilities are a swiss-army knife of tools for
443 a wide-range of genomics analysis tasks. The most widely-used tools enable
444 genome arithmetic: that is, set theory on the genome. For example, bedtools
445 allows one to intersect, merge, count, complement, and shuffle genomic
446 intervals from multiple files in widely-used genomic file formats such as BAM,
447 BED, GFF/GTF, VCF.")
448 (license license:gpl2)))
449
450 ;; Later releases of bedtools produce files with more columns than
451 ;; what Ribotaper expects.
452 (define-public bedtools-2.18
453 (package (inherit bedtools)
454 (name "bedtools")
455 (version "2.18.0")
456 (source (origin
457 (method url-fetch)
458 (uri (string-append "https://github.com/arq5x/bedtools2/"
459 "archive/v" version ".tar.gz"))
460 (file-name (string-append name "-" version ".tar.gz"))
461 (sha256
462 (base32
463 "05vrnr8yp7swfagshzpgqmzk1blnwnq8pq5pckzi1m26w98d63vf"))))))
464
465 (define-public ribotaper
466 (package
467 (name "ribotaper")
468 (version "1.3.1")
469 (source (origin
470 (method url-fetch)
471 (uri (string-append "https://ohlerlab.mdc-berlin.de/"
472 "files/RiboTaper/RiboTaper_Version_"
473 version ".tar.gz"))
474 (sha256
475 (base32
476 "0ykjbps1y3z3085q94npw8i9x5gldc6shy8vlc08v76zljsm07hv"))))
477 (build-system gnu-build-system)
478 (inputs
479 `(("bedtools" ,bedtools-2.18)
480 ("samtools" ,samtools-0.1)
481 ("r-minimal" ,r-minimal)
482 ("r-foreach" ,r-foreach)
483 ("r-xnomial" ,r-xnomial)
484 ("r-domc" ,r-domc)
485 ("r-multitaper" ,r-multitaper)
486 ("r-seqinr" ,r-seqinr)))
487 (home-page "https://ohlerlab.mdc-berlin.de/software/RiboTaper_126/")
488 (synopsis "Define translated ORFs using ribosome profiling data")
489 (description
490 "Ribotaper is a method for defining translated @dfn{open reading
491 frames} (ORFs) using ribosome profiling (ribo-seq) data. This package
492 provides the Ribotaper pipeline.")
493 (license license:gpl3+)))
494
495 (define-public ribodiff
496 (package
497 (name "ribodiff")
498 (version "0.2.2")
499 (source
500 (origin
501 (method url-fetch)
502 (uri (string-append "https://github.com/ratschlab/RiboDiff/"
503 "archive/v" version ".tar.gz"))
504 (file-name (string-append name "-" version ".tar.gz"))
505 (sha256
506 (base32
507 "0wpbwmfv05wdjxv7ikm664f7s7p7cqr8jnw99zrda0q67rl50aaj"))))
508 (build-system python-build-system)
509 (arguments
510 `(#:python ,python-2
511 #:phases
512 (modify-phases %standard-phases
513 ;; Generate an installable executable script wrapper.
514 (add-after 'unpack 'patch-setup.py
515 (lambda _
516 (substitute* "setup.py"
517 (("^(.*)packages=.*" line prefix)
518 (string-append line "\n"
519 prefix "scripts=['scripts/TE.py'],\n")))
520 #t)))))
521 (inputs
522 `(("python-numpy" ,python2-numpy)
523 ("python-matplotlib" ,python2-matplotlib)
524 ("python-scipy" ,python2-scipy)
525 ("python-statsmodels" ,python2-statsmodels)))
526 (native-inputs
527 `(("python-mock" ,python2-mock)
528 ("python-nose" ,python2-nose)))
529 (home-page "http://public.bmi.inf.ethz.ch/user/zhongy/RiboDiff/")
530 (synopsis "Detect translation efficiency changes from ribosome footprints")
531 (description "RiboDiff is a statistical tool that detects the protein
532 translational efficiency change from Ribo-Seq (ribosome footprinting) and
533 RNA-Seq data. It uses a generalized linear model to detect genes showing
534 difference in translational profile taking mRNA abundance into account. It
535 facilitates us to decipher the translational regulation that behave
536 independently with transcriptional regulation.")
537 (license license:gpl3+)))
538
539 (define-public bioawk
540 (package
541 (name "bioawk")
542 (version "1.0")
543 (source (origin
544 (method url-fetch)
545 (uri (string-append "https://github.com/lh3/bioawk/archive/v"
546 version ".tar.gz"))
547 (file-name (string-append name "-" version ".tar.gz"))
548 (sha256
549 (base32 "1daizxsk17ahi9n58fj8vpgwyhzrzh54bzqhanjanp88kgrz7gjw"))))
550 (build-system gnu-build-system)
551 (inputs
552 `(("zlib" ,zlib)))
553 (native-inputs
554 `(("bison" ,bison)))
555 (arguments
556 `(#:tests? #f ; There are no tests to run.
557 ;; Bison must generate files, before other targets can build.
558 #:parallel-build? #f
559 #:phases
560 (modify-phases %standard-phases
561 (delete 'configure) ; There is no configure phase.
562 (replace 'install
563 (lambda* (#:key outputs #:allow-other-keys)
564 (let* ((out (assoc-ref outputs "out"))
565 (bin (string-append out "/bin"))
566 (man (string-append out "/share/man/man1")))
567 (mkdir-p man)
568 (copy-file "awk.1" (string-append man "/bioawk.1"))
569 (install-file "bioawk" bin)))))))
570 (home-page "https://github.com/lh3/bioawk")
571 (synopsis "AWK with bioinformatics extensions")
572 (description "Bioawk is an extension to Brian Kernighan's awk, adding the
573 support of several common biological data formats, including optionally gzip'ed
574 BED, GFF, SAM, VCF, FASTA/Q and TAB-delimited formats with column names. It
575 also adds a few built-in functions and a command line option to use TAB as the
576 input/output delimiter. When the new functionality is not used, bioawk is
577 intended to behave exactly the same as the original BWK awk.")
578 (license license:x11)))
579
580 (define-public python2-pybedtools
581 (package
582 (name "python2-pybedtools")
583 (version "0.6.9")
584 (source (origin
585 (method url-fetch)
586 (uri (string-append
587 "https://pypi.python.org/packages/source/p/pybedtools/pybedtools-"
588 version ".tar.gz"))
589 (sha256
590 (base32
591 "1ldzdxw1p4y3g2ignmggsdypvqkcwqwzhdha4rbgpih048z5p4an"))))
592 (build-system python-build-system)
593 (arguments `(#:python ,python-2)) ; no Python 3 support
594 (inputs
595 `(("python-matplotlib" ,python2-matplotlib)))
596 (propagated-inputs
597 `(("bedtools" ,bedtools)
598 ("samtools" ,samtools)))
599 (native-inputs
600 `(("python-cython" ,python2-cython)
601 ("python-pyyaml" ,python2-pyyaml)
602 ("python-nose" ,python2-nose)))
603 (home-page "https://pythonhosted.org/pybedtools/")
604 (synopsis "Python wrapper for BEDtools programs")
605 (description
606 "pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
607 which are widely used for genomic interval manipulation or \"genome algebra\".
608 pybedtools extends BEDTools by offering feature-level manipulations from with
609 Python.")
610 (license license:gpl2+)))
611
612 (define-public python-biom-format
613 (package
614 (name "python-biom-format")
615 (version "2.1.5")
616 (source
617 (origin
618 (method url-fetch)
619 ;; Use GitHub as source because PyPI distribution does not contain
620 ;; test data: https://github.com/biocore/biom-format/issues/693
621 (uri (string-append "https://github.com/biocore/biom-format/archive/"
622 version ".tar.gz"))
623 (file-name (string-append name "-" version ".tar.gz"))
624 (sha256
625 (base32
626 "1n25w3p1rixbpac8iysmzcja6m4ip5r6sz19l8y6wlwi49hxn278"))))
627 (build-system python-build-system)
628 (propagated-inputs
629 `(("python-numpy" ,python-numpy)
630 ("python-scipy" ,python-scipy)
631 ("python-future" ,python-future)
632 ("python-click" ,python-click)
633 ("python-h5py" ,python-h5py)))
634 (native-inputs
635 `(("python-nose" ,python-nose)))
636 (home-page "http://www.biom-format.org")
637 (synopsis "Biological Observation Matrix (BIOM) format utilities")
638 (description
639 "The BIOM file format is designed to be a general-use format for
640 representing counts of observations e.g. operational taxonomic units, KEGG
641 orthology groups or lipid types, in one or more biological samples
642 e.g. microbiome samples, genomes, metagenomes.")
643 (license license:bsd-3)
644 (properties `((python2-variant . ,(delay python2-biom-format))))))
645
646 (define-public python2-biom-format
647 (let ((base (package-with-python2 (strip-python2-variant python-biom-format))))
648 (package
649 (inherit base)
650 (arguments
651 `(#:phases
652 (modify-phases %standard-phases
653 ;; Do not require the unmaintained pyqi library.
654 (add-after 'unpack 'remove-pyqi
655 (lambda _
656 (substitute* "setup.py"
657 (("install_requires.append\\(\"pyqi\"\\)") "pass"))
658 #t)))
659 ,@(package-arguments base))))))
660
661 (define-public bioperl-minimal
662 (let* ((inputs `(("perl-module-build" ,perl-module-build)
663 ("perl-data-stag" ,perl-data-stag)
664 ("perl-libwww" ,perl-libwww)
665 ("perl-uri" ,perl-uri)))
666 (transitive-inputs
667 (map (compose package-name cadr)
668 (delete-duplicates
669 (concatenate
670 (map (compose package-transitive-target-inputs cadr) inputs))))))
671 (package
672 (name "bioperl-minimal")
673 (version "1.7.0")
674 (source
675 (origin
676 (method url-fetch)
677 (uri (string-append "https://github.com/bioperl/bioperl-live/"
678 "archive/release-"
679 (string-map (lambda (c)
680 (if (char=? c #\.)
681 #\- c)) version)
682 ".tar.gz"))
683 (sha256
684 (base32
685 "12phgpxwgkqflkwfb9dcqg7a31dpjlfhar8wcgv0aj5ln4akfz06"))))
686 (build-system perl-build-system)
687 (arguments
688 `(#:phases
689 (modify-phases %standard-phases
690 (add-after
691 'install 'wrap-programs
692 (lambda* (#:key outputs #:allow-other-keys)
693 ;; Make sure all executables in "bin" find the required Perl
694 ;; modules at runtime. As the PERL5LIB variable contains also
695 ;; the paths of native inputs, we pick the transitive target
696 ;; inputs from %build-inputs.
697 (let* ((out (assoc-ref outputs "out"))
698 (bin (string-append out "/bin/"))
699 (path (string-join
700 (cons (string-append out "/lib/perl5/site_perl")
701 (map (lambda (name)
702 (assoc-ref %build-inputs name))
703 ',transitive-inputs))
704 ":")))
705 (for-each (lambda (file)
706 (wrap-program file
707 `("PERL5LIB" ":" prefix (,path))))
708 (find-files bin "\\.pl$"))
709 #t))))))
710 (inputs inputs)
711 (native-inputs
712 `(("perl-test-most" ,perl-test-most)))
713 (home-page "http://search.cpan.org/dist/BioPerl")
714 (synopsis "Bioinformatics toolkit")
715 (description
716 "BioPerl is the product of a community effort to produce Perl code which
717 is useful in biology. Examples include Sequence objects, Alignment objects
718 and database searching objects. These objects not only do what they are
719 advertised to do in the documentation, but they also interact - Alignment
720 objects are made from the Sequence objects, Sequence objects have access to
721 Annotation and SeqFeature objects and databases, Blast objects can be
722 converted to Alignment objects, and so on. This means that the objects
723 provide a coordinated and extensible framework to do computational biology.")
724 (license (package-license perl)))))
725
726 (define-public python-biopython
727 (package
728 (name "python-biopython")
729 (version "1.68")
730 (source (origin
731 (method url-fetch)
732 ;; use PyPi rather than biopython.org to ease updating
733 (uri (pypi-uri "biopython" version))
734 (sha256
735 (base32
736 "07qc7nz0k77y8hf8s18rscvibvm91zw0kkq7ylrhisf8vp8hkp6i"))))
737 (build-system python-build-system)
738 (arguments
739 `(#:phases
740 (modify-phases %standard-phases
741 (add-before 'check 'set-home
742 ;; Some tests require a home directory to be set.
743 (lambda _ (setenv "HOME" "/tmp") #t)))))
744 (propagated-inputs
745 `(("python-numpy" ,python-numpy)))
746 (home-page "http://biopython.org/")
747 (synopsis "Tools for biological computation in Python")
748 (description
749 "Biopython is a set of tools for biological computation including parsers
750 for bioinformatics files into Python data structures; interfaces to common
751 bioinformatics programs; a standard sequence class and tools for performing
752 common operations on them; code to perform data classification; code for
753 dealing with alignments; code making it easy to split up parallelizable tasks
754 into separate processes; and more.")
755 (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
756
757 (define-public python2-biopython
758 (package-with-python2 python-biopython))
759
760 ;; An outdated version of biopython is required for seqmagick, see
761 ;; https://github.com/fhcrc/seqmagick/issues/59
762 ;; When that issue has been resolved this package should be removed.
763 (define python2-biopython-1.66
764 (package
765 (inherit python2-biopython)
766 (version "1.66")
767 (source (origin
768 (method url-fetch)
769 (uri (pypi-uri "biopython" version))
770 (sha256
771 (base32
772 "1gdv92593klimg22icf5j9by7xiq86jnwzkpz4abaa05ylkdf6hp"))))))
773
774 (define-public bpp-core
775 ;; The last release was in 2014 and the recommended way to install from source
776 ;; is to clone the git repository, so we do this.
777 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
778 (let ((commit "7d8bced0d1a87291ea8dd7046b7fb5ff9c35c582"))
779 (package
780 (name "bpp-core")
781 (version (string-append "2.2.0-1." (string-take commit 7)))
782 (source (origin
783 (method git-fetch)
784 (uri (git-reference
785 (url "http://biopp.univ-montp2.fr/git/bpp-core")
786 (commit commit)))
787 (file-name (string-append name "-" version "-checkout"))
788 (sha256
789 (base32
790 "10djsq5vlnkilv436gnmh4irpk49v29pa69r6xiryg32xmvn909j"))))
791 (build-system cmake-build-system)
792 (arguments
793 `(#:parallel-build? #f))
794 (inputs
795 `(("gcc" ,gcc-5))) ; Compilation of bpp-phyl fails with GCC 4.9 so we
796 ; compile all of the bpp packages with GCC 5.
797 (home-page "http://biopp.univ-montp2.fr")
798 (synopsis "C++ libraries for Bioinformatics")
799 (description
800 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
801 analysis, phylogenetics, molecular evolution and population genetics. It is
802 Object Oriented and is designed to be both easy to use and computer efficient.
803 Bio++ intends to help programmers to write computer expensive programs, by
804 providing them a set of re-usable tools.")
805 (license license:cecill-c))))
806
807 (define-public bpp-phyl
808 ;; The last release was in 2014 and the recommended way to install from source
809 ;; is to clone the git repository, so we do this.
810 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
811 (let ((commit "0c07167b629f68b569bf274d1ad0c4af83276ae2"))
812 (package
813 (name "bpp-phyl")
814 (version (string-append "2.2.0-1." (string-take commit 7)))
815 (source (origin
816 (method git-fetch)
817 (uri (git-reference
818 (url "http://biopp.univ-montp2.fr/git/bpp-phyl")
819 (commit commit)))
820 (file-name (string-append name "-" version "-checkout"))
821 (sha256
822 (base32
823 "1ssjgchzwj3iai26kyly7gwkdv8sk59nqhkb1wpap3sf5m6kyllh"))))
824 (build-system cmake-build-system)
825 (arguments
826 `(#:parallel-build? #f
827 ;; If out-of-source, test data is not copied into the build directory
828 ;; so the tests fail.
829 #:out-of-source? #f))
830 (inputs
831 `(("bpp-core" ,bpp-core)
832 ("bpp-seq" ,bpp-seq)
833 ;; GCC 4.8 fails due to an 'internal compiler error', so we use a more
834 ;; modern GCC.
835 ("gcc" ,gcc-5)))
836 (home-page "http://biopp.univ-montp2.fr")
837 (synopsis "Bio++ phylogenetic Library")
838 (description
839 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
840 analysis, phylogenetics, molecular evolution and population genetics. This
841 library provides phylogenetics-related modules.")
842 (license license:cecill-c))))
843
844 (define-public bpp-popgen
845 ;; The last release was in 2014 and the recommended way to install from source
846 ;; is to clone the git repository, so we do this.
847 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
848 (let ((commit "e472bac9b1a148803895d747cd6d0c5904f85d9f"))
849 (package
850 (name "bpp-popgen")
851 (version (string-append "2.2.0-1." (string-take commit 7)))
852 (source (origin
853 (method git-fetch)
854 (uri (git-reference
855 (url "http://biopp.univ-montp2.fr/git/bpp-popgen")
856 (commit commit)))
857 (file-name (string-append name "-" version "-checkout"))
858 (sha256
859 (base32
860 "0yn82dzn1n5629nzja68xfrhi655709rjanyryb36vzkmymy6dw5"))))
861 (build-system cmake-build-system)
862 (arguments
863 `(#:parallel-build? #f
864 #:tests? #f)) ; There are no tests.
865 (inputs
866 `(("bpp-core" ,bpp-core)
867 ("bpp-seq" ,bpp-seq)
868 ("gcc" ,gcc-5)))
869 (home-page "http://biopp.univ-montp2.fr")
870 (synopsis "Bio++ population genetics library")
871 (description
872 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
873 analysis, phylogenetics, molecular evolution and population genetics. This
874 library provides population genetics-related modules.")
875 (license license:cecill-c))))
876
877 (define-public bpp-seq
878 ;; The last release was in 2014 and the recommended way to install from source
879 ;; is to clone the git repository, so we do this.
880 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
881 (let ((commit "6cfa07965ce152e5598a89df2fa80a75973bfa33"))
882 (package
883 (name "bpp-seq")
884 (version (string-append "2.2.0-1." (string-take commit 7)))
885 (source (origin
886 (method git-fetch)
887 (uri (git-reference
888 (url "http://biopp.univ-montp2.fr/git/bpp-seq")
889 (commit commit)))
890 (file-name (string-append name "-" version "-checkout"))
891 (sha256
892 (base32
893 "1nys5jq7jqvdg40d91wsmj3q2yzy4276cp7sp44n67p468f27zf2"))))
894 (build-system cmake-build-system)
895 (arguments
896 `(#:parallel-build? #f
897 ;; If out-of-source, test data is not copied into the build directory
898 ;; so the tests fail.
899 #:out-of-source? #f))
900 (inputs
901 `(("bpp-core" ,bpp-core)
902 ("gcc" ,gcc-5))) ; Use GCC 5 as per 'bpp-core'.
903 (home-page "http://biopp.univ-montp2.fr")
904 (synopsis "Bio++ sequence library")
905 (description
906 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
907 analysis, phylogenetics, molecular evolution and population genetics. This
908 library provides sequence-related modules.")
909 (license license:cecill-c))))
910
911 (define-public bppsuite
912 ;; The last release was in 2014 and the recommended way to install from source
913 ;; is to clone the git repository, so we do this.
914 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
915 (let ((commit "c516147f57aa50961121cd505bed52cd7603698b"))
916 (package
917 (name "bppsuite")
918 (version (string-append "2.2.0-1." (string-take commit 7)))
919 (source (origin
920 (method git-fetch)
921 (uri (git-reference
922 (url "http://biopp.univ-montp2.fr/git/bppsuite")
923 (commit commit)))
924 (file-name (string-append name "-" version "-checkout"))
925 (sha256
926 (base32
927 "1y87pxvw0jxjizhq2dr9g2r91md45k1p9ih2sl1yy1y3p934l2kb"))))
928 (build-system cmake-build-system)
929 (arguments
930 `(#:parallel-build? #f
931 #:tests? #f)) ; There are no tests.
932 (native-inputs
933 `(("groff" ,groff)
934 ("man-db" ,man-db)
935 ("texinfo" ,texinfo)))
936 (inputs
937 `(("bpp-core" ,bpp-core)
938 ("bpp-seq" ,bpp-seq)
939 ("bpp-phyl" ,bpp-phyl)
940 ("bpp-phyl" ,bpp-popgen)
941 ("gcc" ,gcc-5)))
942 (home-page "http://biopp.univ-montp2.fr")
943 (synopsis "Bioinformatics tools written with the Bio++ libraries")
944 (description
945 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
946 analysis, phylogenetics, molecular evolution and population genetics. This
947 package provides command line tools using the Bio++ library.")
948 (license license:cecill-c))))
949
950 (define-public blast+
951 (package
952 (name "blast+")
953 (version "2.4.0")
954 (source (origin
955 (method url-fetch)
956 (uri (string-append
957 "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
958 version "/ncbi-blast-" version "+-src.tar.gz"))
959 (sha256
960 (base32
961 "14n9jik6vhiwjd3m7bach4xj1pzfn0szbsbyfxybd9l9cc43b6mb"))
962 (modules '((guix build utils)))
963 (snippet
964 '(begin
965 ;; Remove bundled bzip2 and zlib
966 (delete-file-recursively "c++/src/util/compress/bzip2")
967 (delete-file-recursively "c++/src/util/compress/zlib")
968 (substitute* "c++/src/util/compress/Makefile.in"
969 (("bzip2 zlib api") "api"))
970 ;; Remove useless msbuild directory
971 (delete-file-recursively
972 "c++/src/build-system/project_tree_builder/msbuild")
973 #t))))
974 (build-system gnu-build-system)
975 (arguments
976 `(;; There are three(!) tests for this massive library, and all fail with
977 ;; "unparsable timing stats".
978 ;; ERR [127] -- [util/regexp] test_pcre.sh (unparsable timing stats)
979 ;; ERR [127] -- [serial/datatool] datatool.sh (unparsable timing stats)
980 ;; ERR [127] -- [serial/datatool] datatool_xml.sh (unparsable timing stats)
981 #:tests? #f
982 #:out-of-source? #t
983 #:parallel-build? #f ; not supported
984 #:phases
985 (modify-phases %standard-phases
986 (add-before
987 'configure 'set-HOME
988 ;; $HOME needs to be set at some point during the configure phase
989 (lambda _ (setenv "HOME" "/tmp") #t))
990 (add-after
991 'unpack 'enter-dir
992 (lambda _ (chdir "c++") #t))
993 (add-after
994 'enter-dir 'fix-build-system
995 (lambda _
996 (define (which* cmd)
997 (cond ((string=? cmd "date")
998 ;; make call to "date" deterministic
999 "date -d @0")
1000 ((which cmd)
1001 => identity)
1002 (else
1003 (format (current-error-port)
1004 "WARNING: Unable to find absolute path for ~s~%"
1005 cmd)
1006 #f)))
1007
1008 ;; Rewrite hardcoded paths to various tools
1009 (substitute* (append '("src/build-system/configure.ac"
1010 "src/build-system/configure"
1011 "scripts/common/impl/if_diff.sh"
1012 "scripts/common/impl/run_with_lock.sh"
1013 "src/build-system/Makefile.configurables.real"
1014 "src/build-system/Makefile.in.top"
1015 "src/build-system/Makefile.meta.gmake=no"
1016 "src/build-system/Makefile.meta.in"
1017 "src/build-system/Makefile.meta_l"
1018 "src/build-system/Makefile.meta_p"
1019 "src/build-system/Makefile.meta_r"
1020 "src/build-system/Makefile.mk.in"
1021 "src/build-system/Makefile.requirements"
1022 "src/build-system/Makefile.rules_with_autodep.in")
1023 (find-files "scripts/common/check" "\\.sh$"))
1024 (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
1025 (or (which* cmd) all)))
1026
1027 (substitute* (find-files "src/build-system" "^config.*")
1028 (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
1029 (("^PATH=.*") ""))
1030
1031 ;; rewrite "/var/tmp" in check script
1032 (substitute* "scripts/common/check/check_make_unix.sh"
1033 (("/var/tmp") "/tmp"))
1034
1035 ;; do not reset PATH
1036 (substitute* (find-files "scripts/common/impl/" "\\.sh$")
1037 (("^ *PATH=.*") "")
1038 (("action=/bin/") "action=")
1039 (("export PATH") ":"))
1040 #t))
1041 (replace
1042 'configure
1043 (lambda* (#:key inputs outputs #:allow-other-keys)
1044 (let ((out (assoc-ref outputs "out"))
1045 (lib (string-append (assoc-ref outputs "lib") "/lib"))
1046 (include (string-append (assoc-ref outputs "include")
1047 "/include/ncbi-tools++")))
1048 ;; The 'configure' script doesn't recognize things like
1049 ;; '--enable-fast-install'.
1050 (zero? (system* "./configure.orig"
1051 (string-append "--with-build-root=" (getcwd) "/build")
1052 (string-append "--prefix=" out)
1053 (string-append "--libdir=" lib)
1054 (string-append "--includedir=" include)
1055 (string-append "--with-bz2="
1056 (assoc-ref inputs "bzip2"))
1057 (string-append "--with-z="
1058 (assoc-ref inputs "zlib"))
1059 ;; Each library is built twice by default, once
1060 ;; with "-static" in its name, and again
1061 ;; without.
1062 "--without-static"
1063 "--with-dll"))))))))
1064 (outputs '("out" ; 19 MB
1065 "lib" ; 203 MB
1066 "include")) ; 32 MB
1067 (inputs
1068 `(("bzip2" ,bzip2)
1069 ("zlib" ,zlib)))
1070 (native-inputs
1071 `(("cpio" ,cpio)))
1072 (home-page "http://blast.ncbi.nlm.nih.gov")
1073 (synopsis "Basic local alignment search tool")
1074 (description
1075 "BLAST is a popular method of performing a DNA or protein sequence
1076 similarity search, using heuristics to produce results quickly. It also
1077 calculates an “expect value” that estimates how many matches would have
1078 occurred at a given score by chance, which can aid a user in judging how much
1079 confidence to have in an alignment.")
1080 ;; Most of the sources are in the public domain, with the following
1081 ;; exceptions:
1082 ;; * Expat:
1083 ;; * ./c++/include/util/bitset/
1084 ;; * ./c++/src/html/ncbi_menu*.js
1085 ;; * Boost license:
1086 ;; * ./c++/include/util/impl/floating_point_comparison.hpp
1087 ;; * LGPL 2+:
1088 ;; * ./c++/include/dbapi/driver/odbc/unix_odbc/
1089 ;; * ASL 2.0:
1090 ;; * ./c++/src/corelib/teamcity_*
1091 (license (list license:public-domain
1092 license:expat
1093 license:boost1.0
1094 license:lgpl2.0+
1095 license:asl2.0))))
1096
1097 (define-public bless
1098 (package
1099 (name "bless")
1100 (version "1p02")
1101 (source (origin
1102 (method url-fetch)
1103 (uri (string-append "mirror://sourceforge/bless-ec/bless.v"
1104 version ".tgz"))
1105 (sha256
1106 (base32
1107 "0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
1108 (modules '((guix build utils)))
1109 (snippet
1110 `(begin
1111 ;; Remove bundled boost, pigz, zlib, and .git directory
1112 ;; FIXME: also remove bundled sources for murmurhash3 and
1113 ;; kmc once packaged.
1114 (delete-file-recursively "boost")
1115 (delete-file-recursively "pigz")
1116 (delete-file-recursively "google-sparsehash")
1117 (delete-file-recursively "zlib")
1118 (delete-file-recursively ".git")
1119 #t))))
1120 (build-system gnu-build-system)
1121 (arguments
1122 '(#:tests? #f ;no "check" target
1123 #:make-flags
1124 (list (string-append "ZLIB="
1125 (assoc-ref %build-inputs "zlib")
1126 "/lib/libz.a")
1127 (string-append "LDFLAGS="
1128 (string-join '("-lboost_filesystem"
1129 "-lboost_system"
1130 "-lboost_iostreams"
1131 "-lz"
1132 "-fopenmp"
1133 "-std=c++11"))))
1134 #:phases
1135 (modify-phases %standard-phases
1136 (add-after 'unpack 'do-not-build-bundled-pigz
1137 (lambda* (#:key inputs outputs #:allow-other-keys)
1138 (substitute* "Makefile"
1139 (("cd pigz/pigz-2.3.3; make") ""))
1140 #t))
1141 (add-after 'unpack 'patch-paths-to-executables
1142 (lambda* (#:key inputs outputs #:allow-other-keys)
1143 (substitute* "parse_args.cpp"
1144 (("kmc_binary = .*")
1145 (string-append "kmc_binary = \""
1146 (assoc-ref outputs "out")
1147 "/bin/kmc\";"))
1148 (("pigz_binary = .*")
1149 (string-append "pigz_binary = \""
1150 (assoc-ref inputs "pigz")
1151 "/bin/pigz\";")))
1152 #t))
1153 (replace 'install
1154 (lambda* (#:key outputs #:allow-other-keys)
1155 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
1156 (for-each (lambda (file)
1157 (install-file file bin))
1158 '("bless" "kmc/bin/kmc"))
1159 #t)))
1160 (delete 'configure))))
1161 (native-inputs
1162 `(("perl" ,perl)))
1163 (inputs
1164 `(("openmpi" ,openmpi)
1165 ("boost" ,boost)
1166 ("sparsehash" ,sparsehash)
1167 ("pigz" ,pigz)
1168 ("zlib" ,zlib)))
1169 (supported-systems '("x86_64-linux"))
1170 (home-page "https://sourceforge.net/p/bless-ec/wiki/Home/")
1171 (synopsis "Bloom-filter-based error correction tool for NGS reads")
1172 (description
1173 "@dfn{Bloom-filter-based error correction solution for high-throughput
1174 sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
1175 correction tool for genomic reads produced by @dfn{Next-generation
1176 sequencing} (NGS). BLESS produces accurate correction results with much less
1177 memory compared with previous solutions and is also able to tolerate a higher
1178 false-positive rate. BLESS can extend reads like DNA assemblers to correct
1179 errors at the end of reads.")
1180 (license license:gpl3+)))
1181
1182 (define-public bowtie
1183 (package
1184 (name "bowtie")
1185 (version "2.2.9")
1186 (source (origin
1187 (method url-fetch)
1188 (uri (string-append "https://github.com/BenLangmead/bowtie2/archive/v"
1189 version ".tar.gz"))
1190 (file-name (string-append name "-" version ".tar.gz"))
1191 (sha256
1192 (base32
1193 "1vp5db8i7is57iwjybcdg18f5ivyzlj5g1ix1nlvxainzivhz55g"))
1194 (modules '((guix build utils)))
1195 (snippet
1196 '(substitute* "Makefile"
1197 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
1198 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
1199 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))))
1200 (build-system gnu-build-system)
1201 (inputs `(("perl" ,perl)
1202 ("perl-clone" ,perl-clone)
1203 ("perl-test-deep" ,perl-test-deep)
1204 ("perl-test-simple" ,perl-test-simple)
1205 ("python" ,python-2)
1206 ("tbb" ,tbb)))
1207 (arguments
1208 '(#:make-flags
1209 (list "allall"
1210 "WITH_TBB=1"
1211 (string-append "prefix=" (assoc-ref %outputs "out")))
1212 #:phases
1213 (alist-delete
1214 'configure
1215 (alist-replace
1216 'check
1217 (lambda* (#:key outputs #:allow-other-keys)
1218 (system* "perl"
1219 "scripts/test/simple_tests.pl"
1220 "--bowtie2=./bowtie2"
1221 "--bowtie2-build=./bowtie2-build"))
1222 %standard-phases))))
1223 (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
1224 (synopsis "Fast and sensitive nucleotide sequence read aligner")
1225 (description
1226 "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
1227 reads to long reference sequences. It is particularly good at aligning reads
1228 of about 50 up to 100s or 1,000s of characters, and particularly good at
1229 aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
1230 genome with an FM Index to keep its memory footprint small: for the human
1231 genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
1232 gapped, local, and paired-end alignment modes.")
1233 (supported-systems '("x86_64-linux"))
1234 (license license:gpl3+)))
1235
1236 (define-public tophat
1237 (package
1238 (name "tophat")
1239 (version "2.1.0")
1240 (source (origin
1241 (method url-fetch)
1242 (uri (string-append
1243 "http://ccb.jhu.edu/software/tophat/downloads/tophat-"
1244 version ".tar.gz"))
1245 (sha256
1246 (base32
1247 "168zlzykq622zbgkh90a90f1bdgsxkscq2zxzbj8brq80hbjpyp7"))
1248 (patches (search-patches "tophat-build-with-later-seqan.patch"))
1249 (modules '((guix build utils)))
1250 (snippet
1251 '(begin
1252 ;; Remove bundled SeqAn and samtools
1253 (delete-file-recursively "src/SeqAn-1.3")
1254 (delete-file-recursively "src/samtools-0.1.18")
1255 #t))))
1256 (build-system gnu-build-system)
1257 (arguments
1258 '(#:parallel-build? #f ; not supported
1259 #:phases
1260 (modify-phases %standard-phases
1261 (add-after 'unpack 'use-system-samtools
1262 (lambda* (#:key inputs #:allow-other-keys)
1263 (substitute* "src/Makefile.in"
1264 (("(noinst_LIBRARIES = )\\$\\(SAMLIB\\)" _ prefix) prefix)
1265 (("\\$\\(SAMPROG\\): \\$\\(SAMLIB\\)") "")
1266 (("SAMPROG = samtools_0\\.1\\.18") "")
1267 (("\\$\\(samtools_0_1_18_SOURCES\\)") "")
1268 (("am__EXEEXT_1 = samtools_0\\.1\\.18\\$\\(EXEEXT\\)") ""))
1269 (substitute* '("src/common.cpp"
1270 "src/tophat.py")
1271 (("samtools_0.1.18") (which "samtools")))
1272 (substitute* '("src/common.h"
1273 "src/bam2fastx.cpp")
1274 (("#include \"bam.h\"") "#include <samtools/bam.h>")
1275 (("#include \"sam.h\"") "#include <samtools/sam.h>"))
1276 (substitute* '("src/bwt_map.h"
1277 "src/map2gtf.h"
1278 "src/align_status.h")
1279 (("#include <bam.h>") "#include <samtools/bam.h>")
1280 (("#include <sam.h>") "#include <samtools/sam.h>"))
1281 #t)))))
1282 (inputs
1283 `(("boost" ,boost)
1284 ("bowtie" ,bowtie)
1285 ("samtools" ,samtools-0.1)
1286 ("ncurses" ,ncurses)
1287 ("python" ,python-2)
1288 ("perl" ,perl)
1289 ("zlib" ,zlib)
1290 ("seqan" ,seqan)))
1291 (home-page "http://ccb.jhu.edu/software/tophat/index.shtml")
1292 (synopsis "Spliced read mapper for RNA-Seq data")
1293 (description
1294 "TopHat is a fast splice junction mapper for nucleotide sequence
1295 reads produced by the RNA-Seq method. It aligns RNA-Seq reads to
1296 mammalian-sized genomes using the ultra high-throughput short read
1297 aligner Bowtie, and then analyzes the mapping results to identify
1298 splice junctions between exons.")
1299 ;; TopHat is released under the Boost Software License, Version 1.0
1300 ;; See https://github.com/infphilo/tophat/issues/11#issuecomment-121589893
1301 (license license:boost1.0)))
1302
1303 (define-public bwa
1304 (package
1305 (name "bwa")
1306 (version "0.7.12")
1307 (source (origin
1308 (method url-fetch)
1309 (uri (string-append "mirror://sourceforge/bio-bwa/bwa-"
1310 version ".tar.bz2"))
1311 (sha256
1312 (base32
1313 "1330dpqncv0px3pbhjzz1gwgg39kkcv2r9qp2xs0sixf8z8wl7bh"))))
1314 (build-system gnu-build-system)
1315 (arguments
1316 '(#:tests? #f ;no "check" target
1317 #:phases
1318 (alist-replace
1319 'install
1320 (lambda* (#:key outputs #:allow-other-keys)
1321 (let ((bin (string-append
1322 (assoc-ref outputs "out") "/bin"))
1323 (doc (string-append
1324 (assoc-ref outputs "out") "/share/doc/bwa"))
1325 (man (string-append
1326 (assoc-ref outputs "out") "/share/man/man1")))
1327 (install-file "bwa" bin)
1328 (install-file "README.md" doc)
1329 (install-file "bwa.1" man)))
1330 ;; no "configure" script
1331 (alist-delete 'configure %standard-phases))))
1332 (inputs `(("zlib" ,zlib)))
1333 ;; Non-portable SSE instructions are used so building fails on platforms
1334 ;; other than x86_64.
1335 (supported-systems '("x86_64-linux"))
1336 (home-page "http://bio-bwa.sourceforge.net/")
1337 (synopsis "Burrows-Wheeler sequence aligner")
1338 (description
1339 "BWA is a software package for mapping low-divergent sequences against a
1340 large reference genome, such as the human genome. It consists of three
1341 algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
1342 designed for Illumina sequence reads up to 100bp, while the rest two for
1343 longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
1344 features such as long-read support and split alignment, but BWA-MEM, which is
1345 the latest, is generally recommended for high-quality queries as it is faster
1346 and more accurate. BWA-MEM also has better performance than BWA-backtrack for
1347 70-100bp Illumina reads.")
1348 (license license:gpl3+)))
1349
1350 (define-public bwa-pssm
1351 (package (inherit bwa)
1352 (name "bwa-pssm")
1353 (version "0.5.11")
1354 (source (origin
1355 (method url-fetch)
1356 (uri (string-append "https://github.com/pkerpedjiev/bwa-pssm/"
1357 "archive/" version ".tar.gz"))
1358 (file-name (string-append name "-" version ".tar.gz"))
1359 (sha256
1360 (base32
1361 "02p7mpbs4mlxmn84g2x4ghak638vbj4lqix2ipx5g84pz9bhdavg"))))
1362 (build-system gnu-build-system)
1363 (inputs
1364 `(("gdsl" ,gdsl)
1365 ("zlib" ,zlib)
1366 ("perl" ,perl)))
1367 (home-page "http://bwa-pssm.binf.ku.dk/")
1368 (synopsis "Burrows-Wheeler transform-based probabilistic short read mapper")
1369 (description
1370 "BWA-PSSM is a probabilistic short genomic sequence read aligner based on
1371 the use of @dfn{position specific scoring matrices} (PSSM). Like many of the
1372 existing aligners it is fast and sensitive. Unlike most other aligners,
1373 however, it is also adaptible in the sense that one can direct the alignment
1374 based on known biases within the data set. It is coded as a modification of
1375 the original BWA alignment program and shares the genome index structure as
1376 well as many of the command line options.")
1377 (license license:gpl3+)))
1378
1379 (define-public python2-bx-python
1380 (package
1381 (name "python2-bx-python")
1382 (version "0.7.2")
1383 (source (origin
1384 (method url-fetch)
1385 (uri (string-append
1386 "https://pypi.python.org/packages/source/b/bx-python/bx-python-"
1387 version ".tar.gz"))
1388 (sha256
1389 (base32
1390 "0ld49idhc5zjdvbhvjq1a2qmpjj7h5v58rqr25dzmfq7g34b50xh"))
1391 (modules '((guix build utils)))
1392 (snippet
1393 '(substitute* "setup.py"
1394 ;; remove dependency on outdated "distribute" module
1395 (("^from distribute_setup import use_setuptools") "")
1396 (("^use_setuptools\\(\\)") "")))))
1397 (build-system python-build-system)
1398 (arguments
1399 `(#:tests? #f ;tests fail because test data are not included
1400 #:python ,python-2))
1401 (inputs
1402 `(("python-numpy" ,python2-numpy)
1403 ("zlib" ,zlib)))
1404 (native-inputs
1405 `(("python-nose" ,python2-nose)))
1406 (home-page "http://bitbucket.org/james_taylor/bx-python/")
1407 (synopsis "Tools for manipulating biological data")
1408 (description
1409 "bx-python provides tools for manipulating biological data, particularly
1410 multiple sequence alignments.")
1411 (license license:expat)))
1412
1413 (define-public python-pysam
1414 (package
1415 (name "python-pysam")
1416 (version "0.10.0")
1417 (source (origin
1418 (method url-fetch)
1419 ;; Test data is missing on PyPi.
1420 (uri (string-append
1421 "https://github.com/pysam-developers/pysam/archive/v"
1422 version ".tar.gz"))
1423 (file-name (string-append name "-" version ".tar.gz"))
1424 (sha256
1425 (base32
1426 "1mmvn91agr238kwz7226xq0i7k84lg2nxywn9712mzj7gvgqhfy8"))
1427 (modules '((guix build utils)))
1428 (snippet
1429 ;; Drop bundled htslib. TODO: Also remove samtools and bcftools.
1430 '(delete-file-recursively "htslib"))))
1431 (build-system python-build-system)
1432 (arguments
1433 `(#:modules ((ice-9 ftw)
1434 (srfi srfi-26)
1435 (guix build python-build-system)
1436 (guix build utils))
1437 #:phases
1438 (modify-phases %standard-phases
1439 (add-before 'build 'set-flags
1440 (lambda* (#:key inputs #:allow-other-keys)
1441 (setenv "HTSLIB_MODE" "external")
1442 (setenv "HTSLIB_LIBRARY_DIR"
1443 (string-append (assoc-ref inputs "htslib") "/lib"))
1444 (setenv "HTSLIB_INCLUDE_DIR"
1445 (string-append (assoc-ref inputs "htslib") "/include"))
1446 (setenv "LDFLAGS" "-lncurses")
1447 (setenv "CFLAGS" "-D_CURSES_LIB=1")
1448 #t))
1449 (replace 'check
1450 (lambda* (#:key inputs outputs #:allow-other-keys)
1451 ;; Add first subdirectory of "build" directory to PYTHONPATH.
1452 (setenv "PYTHONPATH"
1453 (string-append
1454 (getenv "PYTHONPATH")
1455 ":" (getcwd) "/build/"
1456 (car (scandir "build"
1457 (negate (cut string-prefix? "." <>))))))
1458 ;; Step out of source dir so python does not import from CWD.
1459 (with-directory-excursion "tests"
1460 (setenv "HOME" "/tmp")
1461 (and (zero? (system* "make" "-C" "pysam_data"))
1462 (zero? (system* "make" "-C" "cbcf_data"))
1463 (zero? (system* "nosetests" "-v"
1464 "--processes"
1465 (number->string (parallel-job-count)))))))))))
1466 (propagated-inputs
1467 `(("htslib" ,htslib))) ; Included from installed header files.
1468 (inputs
1469 `(("ncurses" ,ncurses)
1470 ("zlib" ,zlib)))
1471 (native-inputs
1472 `(("python-cython" ,python-cython)
1473 ;; Dependencies below are are for tests only.
1474 ("samtools" ,samtools)
1475 ("bcftools" ,bcftools)
1476 ("python-nose" ,python-nose)))
1477 (home-page "https://github.com/pysam-developers/pysam")
1478 (synopsis "Python bindings to the SAMtools C API")
1479 (description
1480 "Pysam is a Python module for reading and manipulating files in the
1481 SAM/BAM format. Pysam is a lightweight wrapper of the SAMtools C API. It
1482 also includes an interface for tabix.")
1483 (license license:expat)))
1484
1485 (define-public python2-pysam
1486 (package-with-python2 python-pysam))
1487
1488 (define-public python-twobitreader
1489 (package
1490 (name "python-twobitreader")
1491 (version "3.1.4")
1492 (source (origin
1493 (method url-fetch)
1494 (uri (pypi-uri "twobitreader" version))
1495 (sha256
1496 (base32
1497 "1q8wnj2kga9nz1lwc4w7qv52smfm536hp6mc8w6s53lhyj0mpi22"))))
1498 (build-system python-build-system)
1499 (arguments
1500 '(;; Tests are not distributed in the PyPi release.
1501 ;; TODO Try building from the Git repo or asking the upstream maintainer
1502 ;; to distribute the tests on PyPi.
1503 #:tests? #f))
1504 (native-inputs
1505 `(("python-sphinx" ,python-sphinx)))
1506 (home-page "https://github.com/benjschiller/twobitreader")
1507 (synopsis "Python library for reading .2bit files")
1508 (description
1509 "twobitreader is a Python library for reading .2bit files as used by the
1510 UCSC genome browser.")
1511 (license license:artistic2.0)))
1512
1513 (define-public python2-twobitreader
1514 (package-with-python2 python-twobitreader))
1515
1516 (define-public python-plastid
1517 (package
1518 (name "python-plastid")
1519 (version "0.4.6")
1520 (source (origin
1521 (method url-fetch)
1522 (uri (pypi-uri "plastid" version))
1523 (sha256
1524 (base32
1525 "1sqkz5d3b9kf688mp7k771c87ins42j7j0whmkb49cb3fsg8s8lj"))))
1526 (build-system python-build-system)
1527 (arguments
1528 ;; Some test files are not included.
1529 `(#:tests? #f))
1530 (propagated-inputs
1531 `(("python-numpy" ,python-numpy)
1532 ("python-scipy" ,python-scipy)
1533 ("python-pandas" ,python-pandas)
1534 ("python-pysam" ,python-pysam)
1535 ("python-matplotlib" ,python-matplotlib)
1536 ("python-biopython" ,python-biopython)
1537 ("python-twobitreader" ,python-twobitreader)
1538 ("python-termcolor" ,python-termcolor)))
1539 (native-inputs
1540 `(("python-cython" ,python-cython)
1541 ("python-nose" ,python-nose)))
1542 (home-page "https://github.com/joshuagryphon/plastid")
1543 (synopsis "Python library for genomic analysis")
1544 (description
1545 "plastid is a Python library for genomic analysis – in particular,
1546 high-throughput sequencing data – with an emphasis on simplicity.")
1547 (license license:bsd-3)))
1548
1549 (define-public python2-plastid
1550 (package-with-python2 python-plastid))
1551
1552 (define-public cd-hit
1553 (package
1554 (name "cd-hit")
1555 (version "4.6.6")
1556 (source (origin
1557 (method url-fetch)
1558 (uri (string-append "https://github.com/weizhongli/cdhit"
1559 "/releases/download/V" version
1560 "/cd-hit-v" version "-2016-0711.tar.gz"))
1561 (sha256
1562 (base32
1563 "1w8hd4fszgg29nqiz569fldwy012la77nljcmlhglgicws56z54p"))))
1564 (build-system gnu-build-system)
1565 (arguments
1566 `(#:tests? #f ; there are no tests
1567 #:make-flags
1568 ;; Executables are copied directly to the PREFIX.
1569 (list (string-append "PREFIX=" (assoc-ref %outputs "out") "/bin"))
1570 #:phases
1571 (modify-phases %standard-phases
1572 ;; No "configure" script
1573 (delete 'configure)
1574 ;; Remove sources of non-determinism
1575 (add-after 'unpack 'be-timeless
1576 (lambda _
1577 (substitute* "cdhit-utility.c++"
1578 ((" \\(built on \" __DATE__ \"\\)") ""))
1579 (substitute* "cdhit-common.c++"
1580 (("__DATE__") "\"0\"")
1581 (("\", %s, \" __TIME__ \"\\\\n\", date") ""))
1582 #t))
1583 ;; The "install" target does not create the target directory
1584 (add-before 'install 'create-target-dir
1585 (lambda* (#:key outputs #:allow-other-keys)
1586 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
1587 #t)))))
1588 (inputs
1589 `(("perl" ,perl)))
1590 (home-page "http://weizhongli-lab.org/cd-hit/")
1591 (synopsis "Cluster and compare protein or nucleotide sequences")
1592 (description
1593 "CD-HIT is a program for clustering and comparing protein or nucleotide
1594 sequences. CD-HIT is designed to be fast and handle extremely large
1595 databases.")
1596 ;; The manual says: "It can be copied under the GNU General Public License
1597 ;; version 2 (GPLv2)."
1598 (license license:gpl2)))
1599
1600 (define-public clipper
1601 (package
1602 (name "clipper")
1603 (version "1.1")
1604 (source (origin
1605 (method url-fetch)
1606 (uri (string-append
1607 "https://github.com/YeoLab/clipper/archive/"
1608 version ".tar.gz"))
1609 (file-name (string-append name "-" version ".tar.gz"))
1610 (sha256
1611 (base32
1612 "0pflmsvhbf8izbgwhbhj1i7349sw1f55qpqj8ljmapp16hb0p0qi"))
1613 (modules '((guix build utils)))
1614 (snippet
1615 '(begin
1616 ;; remove unnecessary setup dependency
1617 (substitute* "setup.py"
1618 (("setup_requires = .*") ""))
1619 (for-each delete-file
1620 '("clipper/src/peaks.so"
1621 "clipper/src/readsToWiggle.so"))
1622 (delete-file-recursively "dist/")
1623 #t))))
1624 (build-system python-build-system)
1625 (arguments `(#:python ,python-2)) ; only Python 2 is supported
1626 (inputs
1627 `(("htseq" ,htseq)
1628 ("python-pybedtools" ,python2-pybedtools)
1629 ("python-cython" ,python2-cython)
1630 ("python-scikit-learn" ,python2-scikit-learn)
1631 ("python-matplotlib" ,python2-matplotlib)
1632 ("python-pandas" ,python2-pandas)
1633 ("python-pysam" ,python2-pysam)
1634 ("python-numpy" ,python2-numpy)
1635 ("python-scipy" ,python2-scipy)))
1636 (native-inputs
1637 `(("python-mock" ,python2-mock) ; for tests
1638 ("python-nose" ,python2-nose) ; for tests
1639 ("python-pytz" ,python2-pytz))) ; for tests
1640 (home-page "https://github.com/YeoLab/clipper")
1641 (synopsis "CLIP peak enrichment recognition")
1642 (description
1643 "CLIPper is a tool to define peaks in CLIP-seq datasets.")
1644 (license license:gpl2)))
1645
1646 (define-public codingquarry
1647 (package
1648 (name "codingquarry")
1649 (version "2.0")
1650 (source (origin
1651 (method url-fetch)
1652 (uri (string-append
1653 "mirror://sourceforge/codingquarry/CodingQuarry_v"
1654 version ".tar.gz"))
1655 (sha256
1656 (base32
1657 "0115hkjflsnfzn36xppwf9h9avfxlavr43djqmshkkzbgjzsz60i"))))
1658 (build-system gnu-build-system)
1659 (arguments
1660 '(#:tests? #f ; no "check" target
1661 #:phases
1662 (modify-phases %standard-phases
1663 (delete 'configure)
1664 (replace 'install
1665 (lambda* (#:key outputs #:allow-other-keys)
1666 (let* ((out (assoc-ref outputs "out"))
1667 (bin (string-append out "/bin"))
1668 (doc (string-append out "/share/doc/codingquarry")))
1669 (install-file "INSTRUCTIONS.pdf" doc)
1670 (copy-recursively "QuarryFiles"
1671 (string-append out "/QuarryFiles"))
1672 (install-file "CodingQuarry" bin)
1673 (install-file "CufflinksGTF_to_CodingQuarryGFF3.py" bin)))))))
1674 (inputs `(("openmpi" ,openmpi)))
1675 (native-search-paths
1676 (list (search-path-specification
1677 (variable "QUARRY_PATH")
1678 (files '("QuarryFiles")))))
1679 (native-inputs `(("python" ,python-2))) ; Only Python 2 is supported
1680 (synopsis "Fungal gene predictor")
1681 (description "CodingQuarry is a highly accurate, self-training GHMM fungal
1682 gene predictor designed to work with assembled, aligned RNA-seq transcripts.")
1683 (home-page "https://sourceforge.net/projects/codingquarry/")
1684 (license license:gpl3+)))
1685
1686 (define-public couger
1687 (package
1688 (name "couger")
1689 (version "1.8.2")
1690 (source (origin
1691 (method url-fetch)
1692 (uri (string-append
1693 "http://couger.oit.duke.edu/static/assets/COUGER"
1694 version ".zip"))
1695 (sha256
1696 (base32
1697 "04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq"))))
1698 (build-system gnu-build-system)
1699 (arguments
1700 `(#:tests? #f
1701 #:phases
1702 (modify-phases %standard-phases
1703 (delete 'configure)
1704 (delete 'build)
1705 (replace
1706 'install
1707 (lambda* (#:key outputs #:allow-other-keys)
1708 (let* ((out (assoc-ref outputs "out"))
1709 (bin (string-append out "/bin")))
1710 (copy-recursively "src" (string-append out "/src"))
1711 (mkdir bin)
1712 ;; Add "src" directory to module lookup path.
1713 (substitute* "couger"
1714 (("from argparse")
1715 (string-append "import sys\nsys.path.append(\""
1716 out "\")\nfrom argparse")))
1717 (install-file "couger" bin))
1718 #t))
1719 (add-after
1720 'install 'wrap-program
1721 (lambda* (#:key inputs outputs #:allow-other-keys)
1722 ;; Make sure 'couger' runs with the correct PYTHONPATH.
1723 (let* ((out (assoc-ref outputs "out"))
1724 (path (getenv "PYTHONPATH")))
1725 (wrap-program (string-append out "/bin/couger")
1726 `("PYTHONPATH" ":" prefix (,path))))
1727 #t)))))
1728 (inputs
1729 `(("python" ,python-2)
1730 ("python2-pillow" ,python2-pillow)
1731 ("python2-numpy" ,python2-numpy)
1732 ("python2-scipy" ,python2-scipy)
1733 ("python2-matplotlib" ,python2-matplotlib)))
1734 (propagated-inputs
1735 `(("r-minimal" ,r-minimal)
1736 ("libsvm" ,libsvm)
1737 ("randomjungle" ,randomjungle)))
1738 (native-inputs
1739 `(("unzip" ,unzip)))
1740 (home-page "http://couger.oit.duke.edu")
1741 (synopsis "Identify co-factors in sets of genomic regions")
1742 (description
1743 "COUGER can be applied to any two sets of genomic regions bound by
1744 paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify
1745 putative co-factors that provide specificity to each TF. The framework
1746 determines the genomic targets uniquely-bound by each TF, and identifies a
1747 small set of co-factors that best explain the in vivo binding differences
1748 between the two TFs.
1749
1750 COUGER uses classification algorithms (support vector machines and random
1751 forests) with features that reflect the DNA binding specificities of putative
1752 co-factors. The features are generated either from high-throughput TF-DNA
1753 binding data (from protein binding microarray experiments), or from large
1754 collections of DNA motifs.")
1755 (license license:gpl3+)))
1756
1757 (define-public clustal-omega
1758 (package
1759 (name "clustal-omega")
1760 (version "1.2.1")
1761 (source (origin
1762 (method url-fetch)
1763 (uri (string-append
1764 "http://www.clustal.org/omega/clustal-omega-"
1765 version ".tar.gz"))
1766 (sha256
1767 (base32
1768 "02ibkx0m0iwz8nscg998bh41gg251y56cgh86bvyrii5m8kjgwqf"))))
1769 (build-system gnu-build-system)
1770 (inputs
1771 `(("argtable" ,argtable)))
1772 (home-page "http://www.clustal.org/omega/")
1773 (synopsis "Multiple sequence aligner for protein and DNA/RNA")
1774 (description
1775 "Clustal-Omega is a general purpose multiple sequence alignment (MSA)
1776 program for protein and DNA/RNA. It produces high quality MSAs and is capable
1777 of handling data-sets of hundreds of thousands of sequences in reasonable
1778 time.")
1779 (license license:gpl2+)))
1780
1781 (define-public crossmap
1782 (package
1783 (name "crossmap")
1784 (version "0.2.1")
1785 (source (origin
1786 (method url-fetch)
1787 (uri (string-append "mirror://sourceforge/crossmap/CrossMap-"
1788 version ".tar.gz"))
1789 (sha256
1790 (base32
1791 "07y179f63d7qnzdvkqcziwk9bs3k4zhp81q392fp1hwszjdvy22f"))
1792 ;; This patch has been sent upstream already and is available
1793 ;; for download from Sourceforge, but it has not been merged.
1794 (patches (search-patches "crossmap-allow-system-pysam.patch"))
1795 (modules '((guix build utils)))
1796 ;; remove bundled copy of pysam
1797 (snippet
1798 '(delete-file-recursively "lib/pysam"))))
1799 (build-system python-build-system)
1800 (arguments
1801 `(#:python ,python-2
1802 #:phases
1803 (alist-cons-after
1804 'unpack 'set-env
1805 (lambda _ (setenv "CROSSMAP_USE_SYSTEM_PYSAM" "1"))
1806 %standard-phases)))
1807 (inputs
1808 `(("python-numpy" ,python2-numpy)
1809 ("python-pysam" ,python2-pysam)
1810 ("zlib" ,zlib)))
1811 (native-inputs
1812 `(("python-cython" ,python2-cython)
1813 ("python-nose" ,python2-nose)))
1814 (home-page "http://crossmap.sourceforge.net/")
1815 (synopsis "Convert genome coordinates between assemblies")
1816 (description
1817 "CrossMap is a program for conversion of genome coordinates or annotation
1818 files between different genome assemblies. It supports most commonly used
1819 file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
1820 (license license:gpl2+)))
1821
1822 (define-public cufflinks
1823 (package
1824 (name "cufflinks")
1825 (version "2.2.1")
1826 (source (origin
1827 (method url-fetch)
1828 (uri (string-append "http://cole-trapnell-lab.github.io/"
1829 "cufflinks/assets/downloads/cufflinks-"
1830 version ".tar.gz"))
1831 (sha256
1832 (base32
1833 "1bnm10p8m7zq4qiipjhjqb24csiqdm1pwc8c795z253r2xk6ncg8"))))
1834 (build-system gnu-build-system)
1835 (arguments
1836 `(#:make-flags
1837 (list
1838 ;; The includes for "eigen" are located in a subdirectory.
1839 (string-append "EIGEN_CPPFLAGS="
1840 "-I" (assoc-ref %build-inputs "eigen")
1841 "/include/eigen3/")
1842 ;; Cufflinks must be linked with various boost libraries.
1843 (string-append "LDFLAGS="
1844 (string-join '("-lboost_system"
1845 "-lboost_serialization"
1846 "-lboost_thread"))))
1847 #:phases
1848 (modify-phases %standard-phases
1849 (add-after 'unpack 'fix-search-for-bam
1850 (lambda _
1851 (substitute* '("ax_bam.m4"
1852 "configure"
1853 "src/hits.h")
1854 (("<bam/sam\\.h>") "<samtools/sam.h>")
1855 (("<bam/bam\\.h>") "<samtools/bam.h>")
1856 (("<bam/version\\.hpp>") "<samtools/version.h>"))
1857 #t)))
1858 #:configure-flags
1859 (list (string-append "--with-bam="
1860 (assoc-ref %build-inputs "samtools")))))
1861 (inputs
1862 `(("eigen" ,eigen)
1863 ("samtools" ,samtools-0.1)
1864 ("htslib" ,htslib)
1865 ("boost" ,boost)
1866 ("python" ,python-2)
1867 ("zlib" ,zlib)))
1868 (home-page "http://cole-trapnell-lab.github.io/cufflinks/")
1869 (synopsis "Transcriptome assembly and RNA-Seq expression analysis")
1870 (description
1871 "Cufflinks assembles RNA transcripts, estimates their abundances,
1872 and tests for differential expression and regulation in RNA-Seq
1873 samples. It accepts aligned RNA-Seq reads and assembles the
1874 alignments into a parsimonious set of transcripts. Cufflinks then
1875 estimates the relative abundances of these transcripts based on how
1876 many reads support each one, taking into account biases in library
1877 preparation protocols.")
1878 (license license:boost1.0)))
1879
1880 (define-public cutadapt
1881 (package
1882 (name "cutadapt")
1883 (version "1.12")
1884 (source (origin
1885 (method url-fetch)
1886 (uri (string-append
1887 "https://github.com/marcelm/cutadapt/archive/v"
1888 version ".tar.gz"))
1889 (file-name (string-append name "-" version ".tar.gz"))
1890 (sha256
1891 (base32
1892 "19smhh6444ikn4jlmyhvffw4m5aw7yg07rqsk7arg8dkwyga1i4v"))))
1893 (build-system python-build-system)
1894 (arguments
1895 `(#:phases
1896 (modify-phases %standard-phases
1897 ;; The tests must be run after installation.
1898 (delete 'check)
1899 (add-after 'install 'check
1900 (lambda* (#:key inputs outputs #:allow-other-keys)
1901 (setenv "PYTHONPATH"
1902 (string-append
1903 (getenv "PYTHONPATH")
1904 ":" (assoc-ref outputs "out")
1905 "/lib/python"
1906 (string-take (string-take-right
1907 (assoc-ref inputs "python") 5) 3)
1908 "/site-packages"))
1909 (zero? (system* "nosetests" "-P" "tests")))))))
1910 (inputs
1911 `(("python-xopen" ,python-xopen)))
1912 (native-inputs
1913 `(("python-cython" ,python-cython)
1914 ("python-nose" ,python-nose)))
1915 (home-page "https://cutadapt.readthedocs.io/en/stable/")
1916 (synopsis "Remove adapter sequences from nucleotide sequencing reads")
1917 (description
1918 "Cutadapt finds and removes adapter sequences, primers, poly-A tails and
1919 other types of unwanted sequence from high-throughput sequencing reads.")
1920 (license license:expat)))
1921
1922 (define-public libbigwig
1923 (package
1924 (name "libbigwig")
1925 (version "0.1.4")
1926 (source (origin
1927 (method url-fetch)
1928 (uri (string-append "https://github.com/dpryan79/libBigWig/"
1929 "archive/" version ".tar.gz"))
1930 (file-name (string-append name "-" version ".tar.gz"))
1931 (sha256
1932 (base32
1933 "098rjh35pi4a9q83n8wiwvyzykjqj6l8q189p1xgfw4ghywdlvw1"))))
1934 (build-system gnu-build-system)
1935 (arguments
1936 `(#:test-target "test"
1937 #:make-flags
1938 (list "CC=gcc"
1939 (string-append "prefix=" (assoc-ref %outputs "out")))
1940 #:phases
1941 (modify-phases %standard-phases
1942 (delete 'configure)
1943 (add-before 'check 'disable-curl-test
1944 (lambda _
1945 (substitute* "Makefile"
1946 (("./test/testRemote.*") ""))
1947 #t))
1948 ;; This has been fixed with the upstream commit 4ff6959cd8a0, but
1949 ;; there has not yet been a release containing this change.
1950 (add-before 'install 'create-target-dirs
1951 (lambda* (#:key outputs #:allow-other-keys)
1952 (let ((out (assoc-ref outputs "out")))
1953 (mkdir-p (string-append out "/lib"))
1954 (mkdir-p (string-append out "/include"))
1955 #t))))))
1956 (inputs
1957 `(("zlib" ,zlib)
1958 ("curl" ,curl)))
1959 (native-inputs
1960 `(("doxygen" ,doxygen)))
1961 (home-page "https://github.com/dpryan79/libBigWig")
1962 (synopsis "C library for handling bigWig files")
1963 (description
1964 "This package provides a C library for parsing local and remote BigWig
1965 files.")
1966 (license license:expat)))
1967
1968 (define-public python-pybigwig
1969 (package
1970 (name "python-pybigwig")
1971 (version "0.2.5")
1972 (source (origin
1973 (method url-fetch)
1974 (uri (pypi-uri "pyBigWig" version))
1975 (sha256
1976 (base32
1977 "0yrpdxg3y0sny25x4w22lv1k47jzccqjmg7j4bp0hywklvp0hg7d"))
1978 (modules '((guix build utils)))
1979 (snippet
1980 '(begin
1981 ;; Delete bundled libBigWig sources
1982 (delete-file-recursively "libBigWig")))))
1983 (build-system python-build-system)
1984 (arguments
1985 `(#:phases
1986 (modify-phases %standard-phases
1987 (add-after 'unpack 'link-with-libBigWig
1988 (lambda* (#:key inputs #:allow-other-keys)
1989 (substitute* "setup.py"
1990 (("libs=\\[") "libs=[\"BigWig\", "))
1991 #t)))))
1992 (inputs
1993 `(("libbigwig" ,libbigwig)
1994 ("zlib" ,zlib)
1995 ("curl" ,curl)))
1996 (home-page "https://github.com/dpryan79/pyBigWig")
1997 (synopsis "Access bigWig files in Python using libBigWig")
1998 (description
1999 "This package provides Python bindings to the libBigWig library for
2000 accessing bigWig files.")
2001 (license license:expat)))
2002
2003 (define-public python2-pybigwig
2004 (package-with-python2 python-pybigwig))
2005
2006 (define-public python-dendropy
2007 (package
2008 (name "python-dendropy")
2009 (version "4.2.0")
2010 (source
2011 (origin
2012 (method url-fetch)
2013 (uri (pypi-uri "DendroPy" version))
2014 (sha256
2015 (base32
2016 "15c7s3d5gf19ljsxvq5advaa752wfi7pwrdjyhzmg85hccyvp47p"))
2017 (patches (search-patches "python-dendropy-fix-tests.patch"))))
2018 (build-system python-build-system)
2019 (home-page "http://packages.python.org/DendroPy/")
2020 (synopsis "Library for phylogenetics and phylogenetic computing")
2021 (description
2022 "DendroPy is a library for phylogenetics and phylogenetic computing: reading,
2023 writing, simulation, processing and manipulation of phylogenetic
2024 trees (phylogenies) and characters.")
2025 (license license:bsd-3)
2026 (properties `((python2-variant . ,(delay python2-dendropy))))))
2027
2028 (define-public python2-dendropy
2029 (let ((base (package-with-python2 (strip-python2-variant python-dendropy))))
2030 (package
2031 (inherit base)
2032 (arguments
2033 `(#:python ,python-2
2034 #:phases
2035 (modify-phases %standard-phases
2036 (replace 'check
2037 ;; There is currently a test failure that only happens on some
2038 ;; systems, and only using "setup.py test"
2039 (lambda _ (zero? (system* "nosetests")))))))
2040 (native-inputs `(("python2-nose" ,python2-nose)
2041 ,@(package-native-inputs base))))))
2042
2043
2044 (define-public deeptools
2045 (package
2046 (name "deeptools")
2047 (version "2.1.1")
2048 (source (origin
2049 (method url-fetch)
2050 (uri (string-append "https://github.com/fidelram/deepTools/"
2051 "archive/" version ".tar.gz"))
2052 (file-name (string-append name "-" version ".tar.gz"))
2053 (sha256
2054 (base32
2055 "1nmfin0zjdby3vay3r4flvz94dr6qjhj41ax4yz3vx13j6wz8izd"))))
2056 (build-system python-build-system)
2057 (arguments
2058 `(#:python ,python-2))
2059 (inputs
2060 `(("python-scipy" ,python2-scipy)
2061 ("python-numpy" ,python2-numpy)
2062 ("python-numpydoc" ,python2-numpydoc)
2063 ("python-matplotlib" ,python2-matplotlib)
2064 ("python-bx-python" ,python2-bx-python)
2065 ("python-pysam" ,python2-pysam)
2066 ("python-pybigwig" ,python2-pybigwig)))
2067 (native-inputs
2068 `(("python-mock" ,python2-mock) ;for tests
2069 ("python-nose" ,python2-nose) ;for tests
2070 ("python-pytz" ,python2-pytz))) ;for tests
2071 (home-page "https://github.com/fidelram/deepTools")
2072 (synopsis "Tools for normalizing and visualizing deep-sequencing data")
2073 (description
2074 "DeepTools addresses the challenge of handling the large amounts of data
2075 that are now routinely generated from DNA sequencing centers. To do so,
2076 deepTools contains useful modules to process the mapped reads data to create
2077 coverage files in standard bedGraph and bigWig file formats. By doing so,
2078 deepTools allows the creation of normalized coverage files or the comparison
2079 between two files (for example, treatment and control). Finally, using such
2080 normalized and standardized files, multiple visualizations can be created to
2081 identify enrichments with functional annotations of the genome.")
2082 (license license:gpl3+)))
2083
2084 (define-public diamond
2085 (package
2086 (name "diamond")
2087 (version "0.8.37")
2088 (source (origin
2089 (method url-fetch)
2090 (uri (string-append
2091 "https://github.com/bbuchfink/diamond/archive/v"
2092 version ".tar.gz"))
2093 (file-name (string-append name "-" version ".tar.gz"))
2094 (sha256
2095 (base32
2096 "1zn7q8m41ayfnjvf9snrsnq00mm68alf9rhdadx5q1sk23lyvp2l"))))
2097 (build-system cmake-build-system)
2098 (arguments
2099 '(#:tests? #f ; no "check" target
2100 #:phases
2101 (modify-phases %standard-phases
2102 (add-after 'unpack 'remove-native-compilation
2103 (lambda _
2104 (substitute* "CMakeLists.txt" (("-march=native") ""))
2105 #t)))))
2106 (inputs
2107 `(("zlib" ,zlib)))
2108 (home-page "https://github.com/bbuchfink/diamond")
2109 (synopsis "Accelerated BLAST compatible local sequence aligner")
2110 (description
2111 "DIAMOND is a BLAST-compatible local aligner for mapping protein and
2112 translated DNA query sequences against a protein reference database (BLASTP
2113 and BLASTX alignment mode). The speedup over BLAST is up to 20,000 on short
2114 reads at a typical sensitivity of 90-99% relative to BLAST depending on the
2115 data and settings.")
2116 ;; diamond fails to build on other platforms
2117 ;; https://github.com/bbuchfink/diamond/issues/18
2118 (supported-systems '("x86_64-linux"))
2119 (license (license:non-copyleft "file://src/COPYING"
2120 "See src/COPYING in the distribution."))))
2121
2122 (define-public discrover
2123 (package
2124 (name "discrover")
2125 (version "1.6.0")
2126 (source
2127 (origin
2128 (method url-fetch)
2129 (uri (string-append "https://github.com/maaskola/discrover/archive/"
2130 version ".tar.gz"))
2131 (file-name (string-append name "-" version ".tar.gz"))
2132 (sha256
2133 (base32
2134 "0rah9ja4m0rl5mldd6vag9rwrivw1zrqxssfq8qx64m7961fp68k"))))
2135 (build-system cmake-build-system)
2136 (arguments
2137 `(#:tests? #f ; there are no tests
2138 #:phases
2139 (modify-phases %standard-phases
2140 (add-after 'unpack 'add-missing-includes
2141 (lambda _
2142 (substitute* "src/executioninformation.hpp"
2143 (("#define EXECUTIONINFORMATION_HPP" line)
2144 (string-append line "\n#include <random>")))
2145 (substitute* "src/plasma/fasta.hpp"
2146 (("#define FASTA_HPP" line)
2147 (string-append line "\n#include <random>")))
2148 #t)))))
2149 (inputs
2150 `(("boost" ,boost)
2151 ("cairo" ,cairo)))
2152 (native-inputs
2153 `(("texlive" ,texlive)
2154 ("imagemagick" ,imagemagick)))
2155 (home-page "http://dorina.mdc-berlin.de/public/rajewsky/discrover/")
2156 (synopsis "Discover discriminative nucleotide sequence motifs")
2157 (description "Discrover is a motif discovery method to find binding sites
2158 of nucleic acid binding proteins.")
2159 (license license:gpl3+)))
2160
2161 (define-public eigensoft
2162 (let ((revision "1")
2163 (commit "b14d1e202e21e532536ff8004f0419cd5e259dc7"))
2164 (package
2165 (name "eigensoft")
2166 (version (string-append "6.1.2-"
2167 revision "."
2168 (string-take commit 9)))
2169 (source
2170 (origin
2171 (method git-fetch)
2172 (uri (git-reference
2173 (url "https://github.com/DReichLab/EIG.git")
2174 (commit commit)))
2175 (file-name (string-append "eigensoft-" commit "-checkout"))
2176 (sha256
2177 (base32
2178 "0f5m6k2j5c16xc3xbywcs989xyc26ncy1zfzp9j9n55n9r4xcaiq"))
2179 (modules '((guix build utils)))
2180 ;; Remove pre-built binaries.
2181 (snippet '(begin
2182 (delete-file-recursively "bin")
2183 (mkdir "bin")
2184 #t))))
2185 (build-system gnu-build-system)
2186 (arguments
2187 `(#:tests? #f ; There are no tests.
2188 #:make-flags '("CC=gcc")
2189 #:phases
2190 (modify-phases %standard-phases
2191 ;; There is no configure phase, but the Makefile is in a
2192 ;; sub-directory.
2193 (replace 'configure
2194 (lambda _
2195 (chdir "src")
2196 ;; The link flags are incomplete.
2197 (substitute* "Makefile"
2198 (("-lgsl") "-lgsl -lm -llapack -llapacke -lpthread"))
2199 #t))
2200 ;; The provided install target only copies executables to
2201 ;; the "bin" directory in the build root.
2202 (add-after 'install 'actually-install
2203 (lambda* (#:key outputs #:allow-other-keys)
2204 (let* ((out (assoc-ref outputs "out"))
2205 (bin (string-append out "/bin")))
2206 (for-each (lambda (file)
2207 (install-file file bin))
2208 (find-files "../bin" ".*"))
2209 #t))))))
2210 (inputs
2211 `(("gsl" ,gsl)
2212 ("lapack" ,lapack)
2213 ("openblas" ,openblas)
2214 ("perl" ,perl)
2215 ("gfortran" ,gfortran "lib")))
2216 (home-page "https://github.com/DReichLab/EIG")
2217 (synopsis "Tools for population genetics")
2218 (description "The EIGENSOFT package provides tools for population
2219 genetics and stratification correction. EIGENSOFT implements methods commonly
2220 used in population genetics analyses such as PCA, computation of Tracy-Widom
2221 statistics, and finding related individuals in structured populations. It
2222 comes with a built-in plotting script and supports multiple file formats and
2223 quantitative phenotypes.")
2224 ;; The license of the eigensoft tools is Expat, but since it's
2225 ;; linking with the GNU Scientific Library (GSL) the effective
2226 ;; license is the GPL.
2227 (license license:gpl3+))))
2228
2229 (define-public edirect
2230 (package
2231 (name "edirect")
2232 (version "4.10")
2233 (source (origin
2234 (method url-fetch)
2235 (uri (string-append "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/"
2236 "versions/2016-05-03/edirect.tar.gz"))
2237 (sha256
2238 (base32
2239 "15zsprak5yh8c1yrz4r1knmb5s8qcmdid4xdhkh3lqcv64l60hli"))))
2240 (build-system perl-build-system)
2241 (arguments
2242 `(#:tests? #f ;no "check" target
2243 #:phases
2244 (modify-phases %standard-phases
2245 (delete 'configure)
2246 (delete 'build)
2247 (replace 'install
2248 (lambda* (#:key outputs #:allow-other-keys)
2249 (let ((target (string-append (assoc-ref outputs "out")
2250 "/bin")))
2251 (mkdir-p target)
2252 (install-file "edirect.pl" target)
2253 #t)))
2254 (add-after
2255 'install 'wrap-program
2256 (lambda* (#:key inputs outputs #:allow-other-keys)
2257 ;; Make sure 'edirect.pl' finds all perl inputs at runtime.
2258 (let* ((out (assoc-ref outputs "out"))
2259 (path (getenv "PERL5LIB")))
2260 (wrap-program (string-append out "/bin/edirect.pl")
2261 `("PERL5LIB" ":" prefix (,path)))))))))
2262 (inputs
2263 `(("perl-html-parser" ,perl-html-parser)
2264 ("perl-encode-locale" ,perl-encode-locale)
2265 ("perl-file-listing" ,perl-file-listing)
2266 ("perl-html-tagset" ,perl-html-tagset)
2267 ("perl-html-tree" ,perl-html-tree)
2268 ("perl-http-cookies" ,perl-http-cookies)
2269 ("perl-http-date" ,perl-http-date)
2270 ("perl-http-message" ,perl-http-message)
2271 ("perl-http-negotiate" ,perl-http-negotiate)
2272 ("perl-lwp-mediatypes" ,perl-lwp-mediatypes)
2273 ("perl-lwp-protocol-https" ,perl-lwp-protocol-https)
2274 ("perl-net-http" ,perl-net-http)
2275 ("perl-uri" ,perl-uri)
2276 ("perl-www-robotrules" ,perl-www-robotrules)
2277 ("perl" ,perl)))
2278 (home-page "http://www.ncbi.nlm.nih.gov/books/NBK179288/")
2279 (synopsis "Tools for accessing the NCBI's set of databases")
2280 (description
2281 "Entrez Direct (EDirect) is a method for accessing the National Center
2282 for Biotechnology Information's (NCBI) set of interconnected
2283 databases (publication, sequence, structure, gene, variation, expression,
2284 etc.) from a terminal. Functions take search terms from command-line
2285 arguments. Individual operations are combined to build multi-step queries.
2286 Record retrieval and formatting normally complete the process.
2287
2288 EDirect also provides an argument-driven function that simplifies the
2289 extraction of data from document summaries or other results that are returned
2290 in structured XML format. This can eliminate the need for writing custom
2291 software to answer ad hoc questions.")
2292 (license license:public-domain)))
2293
2294 (define-public exonerate
2295 (package
2296 (name "exonerate")
2297 (version "2.4.0")
2298 (source
2299 (origin
2300 (method url-fetch)
2301 (uri
2302 (string-append
2303 "http://ftp.ebi.ac.uk/pub/software/vertebrategenomics/exonerate/"
2304 "exonerate-" version ".tar.gz"))
2305 (sha256
2306 (base32
2307 "0hj0m9xygiqsdxvbg79wq579kbrx1mdrabi2bzqz2zn9qwfjcjgq"))))
2308 (build-system gnu-build-system)
2309 (arguments
2310 `(#:parallel-build? #f)) ; Building in parallel fails on some machines.
2311 (native-inputs
2312 `(("pkg-config" ,pkg-config)))
2313 (inputs
2314 `(("glib" ,glib)))
2315 (home-page
2316 "https://www.ebi.ac.uk/about/vertebrate-genomics/software/exonerate")
2317 (synopsis "Generic tool for biological sequence alignment")
2318 (description
2319 "Exonerate is a generic tool for pairwise sequence comparison. It allows
2320 the alignment of sequences using a many alignment models, either exhaustive
2321 dynamic programming or a variety of heuristics.")
2322 (license license:gpl3)))
2323
2324 (define-public express
2325 (package
2326 (name "express")
2327 (version "1.5.1")
2328 (source (origin
2329 (method url-fetch)
2330 (uri
2331 (string-append
2332 "http://bio.math.berkeley.edu/eXpress/downloads/express-"
2333 version "/express-" version "-src.tgz"))
2334 (sha256
2335 (base32
2336 "03rczxd0gjp2l1jxcmjfmf5j94j77zqyxa6x063zsc585nj40n0c"))))
2337 (build-system cmake-build-system)
2338 (arguments
2339 `(#:tests? #f ;no "check" target
2340 #:phases
2341 (alist-cons-after
2342 'unpack 'use-shared-boost-libs-and-set-bamtools-paths
2343 (lambda* (#:key inputs #:allow-other-keys)
2344 (substitute* "CMakeLists.txt"
2345 (("set\\(Boost_USE_STATIC_LIBS ON\\)")
2346 "set(Boost_USE_STATIC_LIBS OFF)")
2347 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
2348 (string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
2349 (substitute* "src/CMakeLists.txt"
2350 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
2351 (string-append (assoc-ref inputs "bamtools") "/lib/bamtools")))
2352 #t)
2353 %standard-phases)))
2354 (inputs
2355 `(("boost" ,boost)
2356 ("bamtools" ,bamtools)
2357 ("protobuf" ,protobuf)
2358 ("zlib" ,zlib)))
2359 (home-page "http://bio.math.berkeley.edu/eXpress")
2360 (synopsis "Streaming quantification for high-throughput genomic sequencing")
2361 (description
2362 "eXpress is a streaming tool for quantifying the abundances of a set of
2363 target sequences from sampled subsequences. Example applications include
2364 transcript-level RNA-Seq quantification, allele-specific/haplotype expression
2365 analysis (from RNA-Seq), transcription factor binding quantification in
2366 ChIP-Seq, and analysis of metagenomic data.")
2367 (license license:artistic2.0)))
2368
2369 (define-public express-beta-diversity
2370 (package
2371 (name "express-beta-diversity")
2372 (version "1.0.7")
2373 (source (origin
2374 (method url-fetch)
2375 (uri
2376 (string-append
2377 "https://github.com/dparks1134/ExpressBetaDiversity/archive/v"
2378 version ".tar.gz"))
2379 (file-name (string-append name "-" version ".tar.gz"))
2380 (sha256
2381 (base32
2382 "1djvdlmqvjf6h0zq7w36y8cl5cli6rgj86x65znl48agnwmzxfxr"))))
2383 (build-system gnu-build-system)
2384 (arguments
2385 `(#:phases
2386 (modify-phases %standard-phases
2387 (delete 'configure)
2388 (add-before 'build 'enter-source (lambda _ (chdir "source") #t))
2389 (replace 'check
2390 (lambda _ (zero? (system* "../bin/ExpressBetaDiversity"
2391 "-u"))))
2392 (add-after 'check 'exit-source (lambda _ (chdir "..") #t))
2393 (replace 'install
2394 (lambda* (#:key outputs #:allow-other-keys)
2395 (let ((bin (string-append (assoc-ref outputs "out")
2396 "/bin")))
2397 (mkdir-p bin)
2398 (install-file "scripts/convertToEBD.py" bin)
2399 (install-file "bin/ExpressBetaDiversity" bin)
2400 #t))))))
2401 (inputs
2402 `(("python" ,python-2)))
2403 (home-page "http://kiwi.cs.dal.ca/Software/ExpressBetaDiversity")
2404 (synopsis "Taxon- and phylogenetic-based beta diversity measures")
2405 (description
2406 "Express Beta Diversity (EBD) calculates ecological beta diversity
2407 (dissimilarity) measures between biological communities. EBD implements a
2408 variety of diversity measures including those that make use of phylogenetic
2409 similarity of community members.")
2410 (license license:gpl3+)))
2411
2412 (define-public fasttree
2413 (package
2414 (name "fasttree")
2415 (version "2.1.9")
2416 (source (origin
2417 (method url-fetch)
2418 (uri (string-append
2419 "http://www.microbesonline.org/fasttree/FastTree-"
2420 version ".c"))
2421 (sha256
2422 (base32
2423 "0ljvvw8i1als1wbfzvrf15c3ii2vw9db20a259g6pzg34xyyb97k"))))
2424 (build-system gnu-build-system)
2425 (arguments
2426 `(#:tests? #f ; no "check" target
2427 #:phases
2428 (modify-phases %standard-phases
2429 (delete 'unpack)
2430 (delete 'configure)
2431 (replace 'build
2432 (lambda* (#:key source #:allow-other-keys)
2433 (and (zero? (system* "gcc"
2434 "-O3"
2435 "-finline-functions"
2436 "-funroll-loops"
2437 "-Wall"
2438 "-o"
2439 "FastTree"
2440 source
2441 "-lm"))
2442 (zero? (system* "gcc"
2443 "-DOPENMP"
2444 "-fopenmp"
2445 "-O3"
2446 "-finline-functions"
2447 "-funroll-loops"
2448 "-Wall"
2449 "-o"
2450 "FastTreeMP"
2451 source
2452 "-lm")))))
2453 (replace 'install
2454 (lambda* (#:key outputs #:allow-other-keys)
2455 (let ((bin (string-append (assoc-ref outputs "out")
2456 "/bin")))
2457 (mkdir-p bin)
2458 (install-file "FastTree" bin)
2459 (install-file "FastTreeMP" bin)
2460 #t))))))
2461 (home-page "http://www.microbesonline.org/fasttree")
2462 (synopsis "Infers approximately-maximum-likelihood phylogenetic trees")
2463 (description
2464 "FastTree can handle alignments with up to a million of sequences in a
2465 reasonable amount of time and memory. For large alignments, FastTree is
2466 100-1,000 times faster than PhyML 3.0 or RAxML 7.")
2467 (license license:gpl2+)))
2468
2469 (define-public fastx-toolkit
2470 (package
2471 (name "fastx-toolkit")
2472 (version "0.0.14")
2473 (source (origin
2474 (method url-fetch)
2475 (uri
2476 (string-append
2477 "https://github.com/agordon/fastx_toolkit/releases/download/"
2478 version "/fastx_toolkit-" version ".tar.bz2"))
2479 (sha256
2480 (base32
2481 "01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy"))))
2482 (build-system gnu-build-system)
2483 (inputs
2484 `(("libgtextutils" ,libgtextutils)))
2485 (native-inputs
2486 `(("pkg-config" ,pkg-config)))
2487 (home-page "http://hannonlab.cshl.edu/fastx_toolkit/")
2488 (synopsis "Tools for FASTA/FASTQ file preprocessing")
2489 (description
2490 "The FASTX-Toolkit is a collection of command line tools for Short-Reads
2491 FASTA/FASTQ files preprocessing.
2492
2493 Next-Generation sequencing machines usually produce FASTA or FASTQ files,
2494 containing multiple short-reads sequences. The main processing of such
2495 FASTA/FASTQ files is mapping the sequences to reference genomes. However, it
2496 is sometimes more productive to preprocess the files before mapping the
2497 sequences to the genome---manipulating the sequences to produce better mapping
2498 results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
2499 (license license:agpl3+)))
2500
2501 (define-public flexbar
2502 (package
2503 (name "flexbar")
2504 (version "2.5")
2505 (source (origin
2506 (method url-fetch)
2507 (uri
2508 (string-append "mirror://sourceforge/flexbar/"
2509 version "/flexbar_v" version "_src.tgz"))
2510 (sha256
2511 (base32
2512 "13jaykc3y1x8y5nn9j8ljnb79s5y51kyxz46hdmvvjj6qhyympmf"))))
2513 (build-system cmake-build-system)
2514 (arguments
2515 `(#:configure-flags (list
2516 (string-append "-DFLEXBAR_BINARY_DIR="
2517 (assoc-ref %outputs "out")
2518 "/bin/"))
2519 #:phases
2520 (alist-replace
2521 'check
2522 (lambda* (#:key outputs #:allow-other-keys)
2523 (setenv "PATH" (string-append
2524 (assoc-ref outputs "out") "/bin:"
2525 (getenv "PATH")))
2526 (chdir "../flexbar_v2.5_src/test")
2527 (zero? (system* "bash" "flexbar_validate.sh")))
2528 (alist-delete 'install %standard-phases))))
2529 (inputs
2530 `(("tbb" ,tbb)
2531 ("zlib" ,zlib)))
2532 (native-inputs
2533 `(("pkg-config" ,pkg-config)
2534 ("seqan" ,seqan)))
2535 (home-page "http://flexbar.sourceforge.net")
2536 (synopsis "Barcode and adapter removal tool for sequencing platforms")
2537 (description
2538 "Flexbar preprocesses high-throughput nucleotide sequencing data
2539 efficiently. It demultiplexes barcoded runs and removes adapter sequences.
2540 Moreover, trimming and filtering features are provided. Flexbar increases
2541 read mapping rates and improves genome and transcriptome assemblies. It
2542 supports next-generation sequencing data in fasta/q and csfasta/q format from
2543 Illumina, Roche 454, and the SOLiD platform.")
2544 (license license:gpl3)))
2545
2546 (define-public fraggenescan
2547 (package
2548 (name "fraggenescan")
2549 (version "1.20")
2550 (source
2551 (origin
2552 (method url-fetch)
2553 (uri
2554 (string-append "mirror://sourceforge/fraggenescan/"
2555 "FragGeneScan" version ".tar.gz"))
2556 (sha256
2557 (base32 "1zzigqmvqvjyqv4945kv6nc5ah2xxm1nxgrlsnbzav3f5c0n0pyj"))))
2558 (build-system gnu-build-system)
2559 (arguments
2560 `(#:phases
2561 (modify-phases %standard-phases
2562 (delete 'configure)
2563 (add-before 'build 'patch-paths
2564 (lambda* (#:key outputs #:allow-other-keys)
2565 (let* ((out (string-append (assoc-ref outputs "out")))
2566 (share (string-append out "/share/fraggenescan/")))
2567 (substitute* "run_FragGeneScan.pl"
2568 (("system\\(\"rm")
2569 (string-append "system(\"" (which "rm")))
2570 (("system\\(\"mv")
2571 (string-append "system(\"" (which "mv")))
2572 ;; This script and other programs expect the training files
2573 ;; to be in the non-standard location bin/train/XXX. Change
2574 ;; this to be share/fraggenescan/train/XXX instead.
2575 (("^\\$train.file = \\$dir.*")
2576 (string-append "$train_file = \""
2577 share
2578 "train/\".$FGS_train_file;")))
2579 (substitute* "run_hmm.c"
2580 (("^ strcat\\(train_dir, \\\"train/\\\"\\);")
2581 (string-append " strcpy(train_dir, \"" share "/train/\");")))
2582 (substitute* "post_process.pl"
2583 (("^my \\$dir = substr.*")
2584 (string-append "my $dir = \"" share "\";"))))
2585 #t))
2586 (replace 'build
2587 (lambda _ (and (zero? (system* "make" "clean"))
2588 (zero? (system* "make" "fgs")))))
2589 (replace 'install
2590 (lambda* (#:key outputs #:allow-other-keys)
2591 (let* ((out (string-append (assoc-ref outputs "out")))
2592 (bin (string-append out "/bin/"))
2593 (share (string-append out "/share/fraggenescan/train")))
2594 (install-file "run_FragGeneScan.pl" bin)
2595 (install-file "FragGeneScan" bin)
2596 (install-file "FGS_gff.py" bin)
2597 (install-file "post_process.pl" bin)
2598 (copy-recursively "train" share))))
2599 (delete 'check)
2600 (add-after 'install 'post-install-check
2601 ;; In lieu of 'make check', run one of the examples and check the
2602 ;; output files gets created.
2603 (lambda* (#:key outputs #:allow-other-keys)
2604 (let* ((out (string-append (assoc-ref outputs "out")))
2605 (bin (string-append out "/bin/")))
2606 (and (zero? (system* (string-append bin "run_FragGeneScan.pl")
2607 "-genome=./example/NC_000913.fna"
2608 "-out=./test2"
2609 "-complete=1"
2610 "-train=complete"))
2611 (file-exists? "test2.faa")
2612 (file-exists? "test2.ffn")
2613 (file-exists? "test2.gff")
2614 (file-exists? "test2.out"))))))))
2615 (inputs
2616 `(("perl" ,perl)
2617 ("python" ,python-2))) ;not compatible with python 3.
2618 (home-page "https://sourceforge.net/projects/fraggenescan/")
2619 (synopsis "Finds potentially fragmented genes in short reads")
2620 (description
2621 "FragGeneScan is a program for predicting bacterial and archaeal genes in
2622 short and error-prone DNA sequencing reads. It can also be applied to predict
2623 genes in incomplete assemblies or complete genomes.")
2624 ;; GPL3+ according to private correspondense with the authors.
2625 (license license:gpl3+)))
2626
2627 (define-public fxtract
2628 (let ((util-commit "776ca85a18a47492af3794745efcb4a905113115"))
2629 (package
2630 (name "fxtract")
2631 (version "2.3")
2632 (source
2633 (origin
2634 (method url-fetch)
2635 (uri (string-append
2636 "https://github.com/ctSkennerton/fxtract/archive/"
2637 version ".tar.gz"))
2638 (file-name (string-append "ctstennerton-util-"
2639 (string-take util-commit 7)
2640 "-checkout"))
2641 (sha256
2642 (base32
2643 "0275cfdhis8517hm01is62062swmi06fxzifq7mr3knbbxjlaiwj"))))
2644 (build-system gnu-build-system)
2645 (arguments
2646 `(#:make-flags (list
2647 (string-append "PREFIX=" (assoc-ref %outputs "out"))
2648 "CC=gcc")
2649 #:test-target "fxtract_test"
2650 #:phases
2651 (modify-phases %standard-phases
2652 (delete 'configure)
2653 (add-before 'build 'copy-util
2654 (lambda* (#:key inputs #:allow-other-keys)
2655 (rmdir "util")
2656 (copy-recursively (assoc-ref inputs "ctskennerton-util") "util")
2657 #t))
2658 ;; Do not use make install as this requires additional dependencies.
2659 (replace 'install
2660 (lambda* (#:key outputs #:allow-other-keys)
2661 (let* ((out (assoc-ref outputs "out"))
2662 (bin (string-append out"/bin")))
2663 (install-file "fxtract" bin)
2664 #t))))))
2665 (inputs
2666 `(("pcre" ,pcre)
2667 ("zlib" ,zlib)))
2668 (native-inputs
2669 ;; ctskennerton-util is licensed under GPL2.
2670 `(("ctskennerton-util"
2671 ,(origin
2672 (method git-fetch)
2673 (uri (git-reference
2674 (url "https://github.com/ctSkennerton/util.git")
2675 (commit util-commit)))
2676 (file-name (string-append
2677 "ctstennerton-util-" util-commit "-checkout"))
2678 (sha256
2679 (base32
2680 "0cls1hd4vgj3f36fpzzg4xc77d6f3hpc60cbpfmn2gdr7ykzzad7"))))))
2681 (home-page "https://github.com/ctSkennerton/fxtract")
2682 (synopsis "Extract sequences from FASTA and FASTQ files")
2683 (description
2684 "Fxtract extracts sequences from a protein or nucleotide fastx (FASTA
2685 or FASTQ) file given a subsequence. It uses a simple substring search for
2686 basic tasks but can change to using POSIX regular expressions, PCRE, hash
2687 lookups or multi-pattern searching as required. By default fxtract looks in
2688 the sequence of each record but can also be told to look in the header,
2689 comment or quality sections.")
2690 ;; 'util' requires SSE instructions.
2691 (supported-systems '("x86_64-linux"))
2692 (license license:expat))))
2693
2694 (define-public grit
2695 (package
2696 (name "grit")
2697 (version "2.0.2")
2698 (source (origin
2699 (method url-fetch)
2700 (uri (string-append
2701 "https://github.com/nboley/grit/archive/"
2702 version ".tar.gz"))
2703 (file-name (string-append name "-" version ".tar.gz"))
2704 (sha256
2705 (base32
2706 "157in84dj70wimbind3x7sy1whs3h57qfgcnj2s6lrd38fbrb7mj"))))
2707 (build-system python-build-system)
2708 (arguments
2709 `(#:python ,python-2
2710 #:phases
2711 (alist-cons-after
2712 'unpack 'generate-from-cython-sources
2713 (lambda* (#:key inputs outputs #:allow-other-keys)
2714 ;; Delete these C files to force fresh generation from pyx sources.
2715 (delete-file "grit/sparsify_support_fns.c")
2716 (delete-file "grit/call_peaks_support_fns.c")
2717 (substitute* "setup.py"
2718 (("Cython.Setup") "Cython.Build")
2719 ;; Add numpy include path to fix compilation
2720 (("pyx\", \\]")
2721 (string-append "pyx\", ], include_dirs = ['"
2722 (assoc-ref inputs "python-numpy")
2723 "/lib/python2.7/site-packages/numpy/core/include/"
2724 "']"))) #t)
2725 %standard-phases)))
2726 (inputs
2727 `(("python-scipy" ,python2-scipy)
2728 ("python-numpy" ,python2-numpy)
2729 ("python-pysam" ,python2-pysam)
2730 ("python-networkx" ,python2-networkx)))
2731 (native-inputs
2732 `(("python-cython" ,python2-cython)))
2733 (home-page "http://grit-bio.org")
2734 (synopsis "Tool for integrative analysis of RNA-seq type assays")
2735 (description
2736 "GRIT is designed to use RNA-seq, TES, and TSS data to build and quantify
2737 full length transcript models. When none of these data sources are available,
2738 GRIT can be run by providing a candidate set of TES or TSS sites. In
2739 addition, GRIT can merge in reference junctions and gene boundaries. GRIT can
2740 also be run in quantification mode, where it uses a provided GTF file and just
2741 estimates transcript expression.")
2742 (license license:gpl3+)))
2743
2744 (define-public hisat
2745 (package
2746 (name "hisat")
2747 (version "0.1.4")
2748 (source (origin
2749 (method url-fetch)
2750 (uri (string-append
2751 "http://ccb.jhu.edu/software/hisat/downloads/hisat-"
2752 version "-beta-source.zip"))
2753 (sha256
2754 (base32
2755 "1k381ydranqxp09yf2y7w1d0chz5d59vb6jchi89hbb0prq19lk5"))))
2756 (build-system gnu-build-system)
2757 (arguments
2758 `(#:tests? #f ;no check target
2759 #:make-flags '("allall"
2760 ;; Disable unsupported `popcnt' instructions on
2761 ;; architectures other than x86_64
2762 ,@(if (string-prefix? "x86_64"
2763 (or (%current-target-system)
2764 (%current-system)))
2765 '()
2766 '("POPCNT_CAPABILITY=0")))
2767 #:phases
2768 (alist-cons-after
2769 'unpack 'patch-sources
2770 (lambda _
2771 ;; XXX Cannot use snippet because zip files are not supported
2772 (substitute* "Makefile"
2773 (("^CC = .*$") "CC = gcc")
2774 (("^CPP = .*$") "CPP = g++")
2775 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
2776 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
2777 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
2778 (substitute* '("hisat-build" "hisat-inspect")
2779 (("/usr/bin/env") (which "env"))))
2780 (alist-replace
2781 'install
2782 (lambda* (#:key outputs #:allow-other-keys)
2783 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
2784 (for-each (lambda (file)
2785 (install-file file bin))
2786 (find-files
2787 "."
2788 "hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))))
2789 (alist-delete 'configure %standard-phases)))))
2790 (native-inputs
2791 `(("unzip" ,unzip)))
2792 (inputs
2793 `(("perl" ,perl)
2794 ("python" ,python)
2795 ("zlib" ,zlib)))
2796 ;; Non-portable SSE instructions are used so building fails on platforms
2797 ;; other than x86_64.
2798 (supported-systems '("x86_64-linux"))
2799 (home-page "http://ccb.jhu.edu/software/hisat/index.shtml")
2800 (synopsis "Hierarchical indexing for spliced alignment of transcripts")
2801 (description
2802 "HISAT is a fast and sensitive spliced alignment program for mapping
2803 RNA-seq reads. In addition to one global FM index that represents a whole
2804 genome, HISAT uses a large set of small FM indexes that collectively cover the
2805 whole genome. These small indexes (called local indexes) combined with
2806 several alignment strategies enable effective alignment of RNA-seq reads, in
2807 particular, reads spanning multiple exons.")
2808 (license license:gpl3+)))
2809
2810 (define-public hisat2
2811 (package
2812 (name "hisat2")
2813 (version "2.0.5")
2814 (source
2815 (origin
2816 (method url-fetch)
2817 ;; FIXME: a better source URL is
2818 ;; (string-append "ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2"
2819 ;; "/downloads/hisat2-" version "-source.zip")
2820 ;; with hash "0lywnr8kijwsc2aw10dwxic0n0yvip6fl3rjlvc8zzwahamy4x7g"
2821 ;; but it is currently unavailable.
2822 (uri "https://github.com/infphilo/hisat2/archive/cba6e8cb.tar.gz")
2823 (file-name (string-append name "-" version ".tar.gz"))
2824 (sha256
2825 (base32
2826 "1mf2hdsyv7cd97xm9mp9a4qws02yrj95y6w6f6cdwnq0klp81r50"))))
2827 (build-system gnu-build-system)
2828 (arguments
2829 `(#:tests? #f ; no check target
2830 #:make-flags (list "CC=gcc" "CXX=g++" "allall")
2831 #:modules ((guix build gnu-build-system)
2832 (guix build utils)
2833 (srfi srfi-26))
2834 #:phases
2835 (modify-phases %standard-phases
2836 (add-after 'unpack 'make-deterministic
2837 (lambda _
2838 (substitute* "Makefile"
2839 (("`date`") "0"))
2840 #t))
2841 (delete 'configure)
2842 (replace 'install
2843 (lambda* (#:key outputs #:allow-other-keys)
2844 (let* ((out (assoc-ref outputs "out"))
2845 (bin (string-append out "/bin/"))
2846 (doc (string-append out "/share/doc/hisat2/")))
2847 (for-each
2848 (cut install-file <> bin)
2849 (find-files "."
2850 "hisat2(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))
2851 (mkdir-p doc)
2852 (install-file "doc/manual.inc.html" doc))
2853 #t)))))
2854 (native-inputs
2855 `(("unzip" ,unzip) ; needed for archive from ftp
2856 ("perl" ,perl)
2857 ("pandoc" ,ghc-pandoc))) ; for documentation
2858 (home-page "http://ccb.jhu.edu/software/hisat2/index.shtml")
2859 (synopsis "Graph-based alignment of genomic sequencing reads")
2860 (description "HISAT2 is a fast and sensitive alignment program for mapping
2861 next-generation sequencing reads (both DNA and RNA) to a population of human
2862 genomes (as well as to a single reference genome). In addition to using one
2863 global @dfn{graph FM} (GFM) index that represents a population of human
2864 genomes, HISAT2 uses a large set of small GFM indexes that collectively cover
2865 the whole genome. These small indexes, combined with several alignment
2866 strategies, enable rapid and accurate alignment of sequencing reads. This new
2867 indexing scheme is called a @dfn{Hierarchical Graph FM index} (HGFM).")
2868 ;; HISAT2 contains files from Bowtie2, which is released under
2869 ;; GPLv2 or later. The HISAT2 source files are released under
2870 ;; GPLv3 or later.
2871 (license license:gpl3+)))
2872
2873 (define-public hmmer
2874 (package
2875 (name "hmmer")
2876 (version "3.1b2")
2877 (source
2878 (origin
2879 (method url-fetch)
2880 (uri (string-append
2881 "http://eddylab.org/software/hmmer"
2882 (version-prefix version 1) "/"
2883 version "/hmmer-" version ".tar.gz"))
2884 (sha256
2885 (base32
2886 "0djmgc0pfli0jilfx8hql1axhwhqxqb8rxg2r5rg07aw73sfs5nx"))
2887 (patches (search-patches "hmmer-remove-cpu-specificity.patch"))))
2888 (build-system gnu-build-system)
2889 (native-inputs `(("perl" ,perl)))
2890 (home-page "http://hmmer.org/")
2891 (synopsis "Biosequence analysis using profile hidden Markov models")
2892 (description
2893 "HMMER is used for searching sequence databases for homologs of protein
2894 sequences, and for making protein sequence alignments. It implements methods
2895 using probabilistic models called profile hidden Markov models (profile
2896 HMMs).")
2897 (license (list license:gpl3+
2898 ;; The bundled library 'easel' is distributed
2899 ;; under The Janelia Farm Software License.
2900 (license:non-copyleft
2901 "file://easel/LICENSE"
2902 "See easel/LICENSE in the distribution.")))))
2903
2904 (define-public htseq
2905 (package
2906 (name "htseq")
2907 (version "0.6.1")
2908 (source (origin
2909 (method url-fetch)
2910 (uri (string-append
2911 "https://pypi.python.org/packages/source/H/HTSeq/HTSeq-"
2912 version ".tar.gz"))
2913 (sha256
2914 (base32
2915 "1i85ppf2j2lj12m0x690qq5nn17xxk23pbbx2c83r8ayb5wngzwv"))))
2916 (build-system python-build-system)
2917 (arguments `(#:python ,python-2)) ; only Python 2 is supported
2918 ;; Numpy needs to be propagated when htseq is used as a Python library.
2919 (propagated-inputs
2920 `(("python-numpy" ,python2-numpy)))
2921 (inputs
2922 `(("python-pysam" ,python2-pysam)))
2923 (home-page "http://www-huber.embl.de/users/anders/HTSeq/")
2924 (synopsis "Analysing high-throughput sequencing data with Python")
2925 (description
2926 "HTSeq is a Python package that provides infrastructure to process data
2927 from high-throughput sequencing assays.")
2928 (license license:gpl3+)))
2929
2930 (define-public java-htsjdk
2931 (package
2932 (name "java-htsjdk")
2933 (version "1.129")
2934 (source (origin
2935 (method url-fetch)
2936 (uri (string-append
2937 "https://github.com/samtools/htsjdk/archive/"
2938 version ".tar.gz"))
2939 (file-name (string-append name "-" version ".tar.gz"))
2940 (sha256
2941 (base32
2942 "0asdk9b8jx2ij7yd6apg9qx03li8q7z3ml0qy2r2qczkra79y6fw"))
2943 (modules '((guix build utils)))
2944 ;; remove build dependency on git
2945 (snippet '(substitute* "build.xml"
2946 (("failifexecutionfails=\"true\"")
2947 "failifexecutionfails=\"false\"")))))
2948 (build-system ant-build-system)
2949 (arguments
2950 `(#:tests? #f ; test require Internet access
2951 #:make-flags
2952 (list (string-append "-Ddist=" (assoc-ref %outputs "out")
2953 "/share/java/htsjdk/"))
2954 #:build-target "all"
2955 #:phases
2956 (modify-phases %standard-phases
2957 ;; The build phase also installs the jars
2958 (delete 'install))))
2959 (home-page "http://samtools.github.io/htsjdk/")
2960 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
2961 (description
2962 "HTSJDK is an implementation of a unified Java library for accessing
2963 common file formats, such as SAM and VCF, used for high-throughput
2964 sequencing (HTS) data. There are also an number of useful utilities for
2965 manipulating HTS data.")
2966 (license license:expat)))
2967
2968 (define-public htslib
2969 (package
2970 (name "htslib")
2971 (version "1.3.1")
2972 (source (origin
2973 (method url-fetch)
2974 (uri (string-append
2975 "https://github.com/samtools/htslib/releases/download/"
2976 version "/htslib-" version ".tar.bz2"))
2977 (sha256
2978 (base32
2979 "1rja282fwdc25ql6izkhdyh8ppw8x2fs0w0js78zgkmqjlikmma9"))))
2980 (build-system gnu-build-system)
2981 (arguments
2982 `(#:phases
2983 (modify-phases %standard-phases
2984 (add-after
2985 'unpack 'patch-tests
2986 (lambda _
2987 (substitute* "test/test.pl"
2988 (("/bin/bash") (which "bash")))
2989 #t)))))
2990 (inputs
2991 `(("zlib" ,zlib)))
2992 (native-inputs
2993 `(("perl" ,perl)))
2994 (home-page "http://www.htslib.org")
2995 (synopsis "C library for reading/writing high-throughput sequencing data")
2996 (description
2997 "HTSlib is a C library for reading/writing high-throughput sequencing
2998 data. It also provides the bgzip, htsfile, and tabix utilities.")
2999 ;; Files under cram/ are released under the modified BSD license;
3000 ;; the rest is released under the Expat license
3001 (license (list license:expat license:bsd-3))))
3002
3003 (define-public idr
3004 (package
3005 (name "idr")
3006 (version "2.0.0")
3007 (source (origin
3008 (method url-fetch)
3009 (uri (string-append
3010 "https://github.com/nboley/idr/archive/"
3011 version ".tar.gz"))
3012 (file-name (string-append name "-" version ".tar.gz"))
3013 (sha256
3014 (base32
3015 "1k3x44biak00aiv3hpm1yd6nn4hhp7n0qnbs3zh2q9sw7qr1qj5r"))))
3016 (build-system python-build-system)
3017 (arguments
3018 `(#:tests? #f)) ; FIXME: "ImportError: No module named 'utility'"
3019 (propagated-inputs
3020 `(("python-scipy" ,python-scipy)
3021 ("python-sympy" ,python-sympy)
3022 ("python-numpy" ,python-numpy)
3023 ("python-matplotlib" ,python-matplotlib)))
3024 (native-inputs
3025 `(("python-cython" ,python-cython)))
3026 (home-page "https://github.com/nboley/idr")
3027 (synopsis "Tool to measure the irreproducible discovery rate (IDR)")
3028 (description
3029 "The IDR (Irreproducible Discovery Rate) framework is a unified approach
3030 to measure the reproducibility of findings identified from replicate
3031 experiments and provide highly stable thresholds based on reproducibility.")
3032 (license license:gpl3+)))
3033
3034 (define-public jellyfish
3035 (package
3036 (name "jellyfish")
3037 (version "2.2.4")
3038 (source (origin
3039 (method url-fetch)
3040 (uri (string-append "https://github.com/gmarcais/Jellyfish/"
3041 "releases/download/v" version
3042 "/jellyfish-" version ".tar.gz"))
3043 (sha256
3044 (base32
3045 "0a6xnynqy2ibfbfz86b9g2m2dgm7f1469pmymkpam333gi3p26nk"))))
3046 (build-system gnu-build-system)
3047 (outputs '("out" ;for library
3048 "ruby" ;for Ruby bindings
3049 "python")) ;for Python bindings
3050 (arguments
3051 `(#:configure-flags
3052 (list (string-append "--enable-ruby-binding="
3053 (assoc-ref %outputs "ruby"))
3054 (string-append "--enable-python-binding="
3055 (assoc-ref %outputs "python")))
3056 #:phases
3057 (modify-phases %standard-phases
3058 (add-before 'check 'set-SHELL-variable
3059 (lambda _
3060 ;; generator_manager.hpp either uses /bin/sh or $SHELL
3061 ;; to run tests.
3062 (setenv "SHELL" (which "bash"))
3063 #t)))))
3064 (native-inputs
3065 `(("bc" ,bc)
3066 ("time" ,time)
3067 ("ruby" ,ruby)
3068 ("python" ,python-2)))
3069 (synopsis "Tool for fast counting of k-mers in DNA")
3070 (description
3071 "Jellyfish is a tool for fast, memory-efficient counting of k-mers in
3072 DNA. A k-mer is a substring of length k, and counting the occurrences of all
3073 such substrings is a central step in many analyses of DNA sequence. Jellyfish
3074 is a command-line program that reads FASTA and multi-FASTA files containing
3075 DNA sequences. It outputs its k-mer counts in a binary format, which can be
3076 translated into a human-readable text format using the @code{jellyfish dump}
3077 command, or queried for specific k-mers with @code{jellyfish query}.")
3078 (home-page "http://www.genome.umd.edu/jellyfish.html")
3079 ;; From their website: JELLYFISH runs on 64-bit Intel-compatible processors
3080 (supported-systems '("x86_64-linux"))
3081 ;; The combined work is published under the GPLv3 or later. Individual
3082 ;; files such as lib/jsoncpp.cpp are released under the Expat license.
3083 (license (list license:gpl3+ license:expat))))
3084
3085 (define-public khmer
3086 (package
3087 (name "khmer")
3088 (version "2.0")
3089 (source
3090 (origin
3091 (method url-fetch)
3092 (uri (pypi-uri "khmer" version))
3093 (sha256
3094 (base32
3095 "0wb05shqh77v00256qlm68vbbx3kl76fyzihszbz5nhanl4ni33a"))
3096 (patches (search-patches "khmer-use-libraries.patch"))))
3097 (build-system python-build-system)
3098 (arguments
3099 `(#:phases
3100 (modify-phases %standard-phases
3101 (add-after 'unpack 'set-paths
3102 (lambda* (#:key inputs outputs #:allow-other-keys)
3103 ;; Delete bundled libraries.
3104 (delete-file-recursively "third-party/zlib")
3105 (delete-file-recursively "third-party/bzip2")
3106 ;; Replace bundled seqan.
3107 (let* ((seqan-all "third-party/seqan")
3108 (seqan-include (string-append
3109 seqan-all "/core/include")))
3110 (delete-file-recursively seqan-all)
3111 (copy-recursively (string-append (assoc-ref inputs "seqan")
3112 "/include/seqan")
3113 (string-append seqan-include "/seqan")))
3114 ;; We do not replace the bundled MurmurHash as the canonical
3115 ;; repository for this code 'SMHasher' is unsuitable for
3116 ;; providing a library. See
3117 ;; https://lists.gnu.org/archive/html/guix-devel/2016-06/msg00977.html
3118 #t))
3119 (add-after 'unpack 'set-cc
3120 (lambda _
3121 (setenv "CC" "gcc")
3122 #t))
3123 ;; It is simpler to test after installation.
3124 (delete 'check)
3125 (add-after 'install 'post-install-check
3126 (lambda* (#:key inputs outputs #:allow-other-keys)
3127 (let ((out (assoc-ref outputs "out")))
3128 (setenv "PATH"
3129 (string-append
3130 (getenv "PATH")
3131 ":"
3132 (assoc-ref outputs "out")
3133 "/bin"))
3134 (setenv "PYTHONPATH"
3135 (string-append
3136 (getenv "PYTHONPATH")
3137 ":"
3138 out
3139 "/lib/python"
3140 (string-take (string-take-right
3141 (assoc-ref inputs "python") 5) 3)
3142 "/site-packages"))
3143 (with-directory-excursion "build"
3144 (zero? (system* "nosetests" "khmer" "--attr"
3145 "!known_failing")))))))))
3146 (native-inputs
3147 `(("seqan" ,seqan)
3148 ("python-nose" ,python-nose)))
3149 (inputs
3150 `(("zlib" ,zlib)
3151 ("bzip2" ,bzip2)
3152 ("python-screed" ,python-screed)
3153 ("python-bz2file" ,python-bz2file)
3154 ;; Tests fail when gcc-5 is used for compilation. Use gcc-4.9 at least
3155 ;; until the next version of khmer (likely 2.1) is released.
3156 ("gcc" ,gcc-4.9)))
3157 (home-page "https://khmer.readthedocs.org/")
3158 (synopsis "K-mer counting, filtering and graph traversal library")
3159 (description "The khmer software is a set of command-line tools for
3160 working with DNA shotgun sequencing data from genomes, transcriptomes,
3161 metagenomes and single cells. Khmer can make de novo assemblies faster, and
3162 sometimes better. Khmer can also identify and fix problems with shotgun
3163 data.")
3164 ;; When building on i686, armhf and mips64el, we get the following error:
3165 ;; error: ['khmer', 'khmer.tests', 'oxli'] require 64-bit operating system
3166 (supported-systems '("x86_64-linux"))
3167 (license license:bsd-3)))
3168
3169 (define-public macs
3170 (package
3171 (name "macs")
3172 (version "2.1.0.20151222")
3173 (source (origin
3174 (method url-fetch)
3175 (uri (pypi-uri "MACS2" version))
3176 (sha256
3177 (base32
3178 "1r2hcz6irhcq7lwbafjks98jbn34hv05avgbdjnp6w6mlfjkf8x5"))))
3179 (build-system python-build-system)
3180 (arguments
3181 `(#:python ,python-2 ; only compatible with Python 2.7
3182 #:tests? #f)) ; no test target
3183 (inputs
3184 `(("python-numpy" ,python2-numpy)))
3185 (home-page "https://github.com/taoliu/MACS/")
3186 (synopsis "Model based analysis for ChIP-Seq data")
3187 (description
3188 "MACS is an implementation of a ChIP-Seq analysis algorithm for
3189 identifying transcript factor binding sites named Model-based Analysis of
3190 ChIP-Seq (MACS). MACS captures the influence of genome complexity to evaluate
3191 the significance of enriched ChIP regions and it improves the spatial
3192 resolution of binding sites through combining the information of both
3193 sequencing tag position and orientation.")
3194 (license license:bsd-3)))
3195
3196 (define-public mafft
3197 (package
3198 (name "mafft")
3199 (version "7.310")
3200 (source (origin
3201 (method url-fetch)
3202 (uri (string-append
3203 "http://mafft.cbrc.jp/alignment/software/mafft-" version
3204 "-without-extensions-src.tgz"))
3205 (file-name (string-append name "-" version ".tgz"))
3206 (sha256
3207 (base32
3208 "0gbsaz6z2qa307kd7wfb06c3y4ikmv1hsdvlns11f6zq4w1z9pwc"))))
3209 (build-system gnu-build-system)
3210 (arguments
3211 `(#:tests? #f ; no automated tests, though there are tests in the read me
3212 #:make-flags (let ((out (assoc-ref %outputs "out")))
3213 (list (string-append "PREFIX=" out)
3214 (string-append "BINDIR="
3215 (string-append out "/bin"))))
3216 #:phases
3217 (modify-phases %standard-phases
3218 (add-after 'unpack 'enter-dir
3219 (lambda _ (chdir "core") #t))
3220 (add-after 'enter-dir 'patch-makefile
3221 (lambda _
3222 ;; on advice from the MAFFT authors, there is no need to
3223 ;; distribute mafft-profile, mafft-distance, or
3224 ;; mafft-homologs.rb as they are too "specialised".
3225 (substitute* "Makefile"
3226 ;; remove mafft-homologs.rb from SCRIPTS
3227 (("^SCRIPTS = mafft mafft-homologs.rb")
3228 "SCRIPTS = mafft")
3229 ;; remove mafft-homologs from MANPAGES
3230 (("^MANPAGES = mafft.1 mafft-homologs.1")
3231 "MANPAGES = mafft.1")
3232 ;; remove mafft-distance from PROGS
3233 (("^PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance")
3234 "PROGS = dvtditr dndfast7 dndblast sextet5")
3235 ;; remove mafft-profile from PROGS
3236 (("splittbfast disttbfast tbfast mafft-profile 2cl mccaskillwrap")
3237 "splittbfast disttbfast tbfast f2cl mccaskillwrap")
3238 (("^rm -f mafft-profile mafft-profile.exe") "#")
3239 (("^rm -f mafft-distance mafft-distance.exe") ")#")
3240 ;; do not install MAN pages in libexec folder
3241 (("^\t\\$\\(INSTALL\\) -m 644 \\$\\(MANPAGES\\) \
3242 \\$\\(DESTDIR\\)\\$\\(LIBDIR\\)") "#"))
3243 #t))
3244 (add-after 'enter-dir 'patch-paths
3245 (lambda* (#:key inputs #:allow-other-keys)
3246 (substitute* '("pairash.c"
3247 "mafft.tmpl")
3248 (("perl") (which "perl"))
3249 (("([\"`| ])awk" _ prefix)
3250 (string-append prefix (which "awk")))
3251 (("grep") (which "grep")))
3252 #t))
3253 (delete 'configure)
3254 (add-after 'install 'wrap-programs
3255 (lambda* (#:key outputs #:allow-other-keys)
3256 (let* ((out (assoc-ref outputs "out"))
3257 (bin (string-append out "/bin"))
3258 (path (string-append
3259 (assoc-ref %build-inputs "coreutils") "/bin:")))
3260 (for-each (lambda (file)
3261 (wrap-program file
3262 `("PATH" ":" prefix (,path))))
3263 (find-files bin)))
3264 #t)))))
3265 (inputs
3266 `(("perl" ,perl)
3267 ("ruby" ,ruby)
3268 ("gawk" ,gawk)
3269 ("grep" ,grep)
3270 ("coreutils" ,coreutils)))
3271 (home-page "http://mafft.cbrc.jp/alignment/software/")
3272 (synopsis "Multiple sequence alignment program")
3273 (description
3274 "MAFFT offers a range of multiple alignment methods for nucleotide and
3275 protein sequences. For instance, it offers L-INS-i (accurate; for alignment
3276 of <~200 sequences) and FFT-NS-2 (fast; for alignment of <~30,000
3277 sequences).")
3278 (license (license:non-copyleft
3279 "http://mafft.cbrc.jp/alignment/software/license.txt"
3280 "BSD-3 with different formatting"))))
3281
3282 (define-public mash
3283 (package
3284 (name "mash")
3285 (version "1.1.1")
3286 (source (origin
3287 (method url-fetch)
3288 (uri (string-append
3289 "https://github.com/marbl/mash/archive/v"
3290 version ".tar.gz"))
3291 (file-name (string-append name "-" version ".tar.gz"))
3292 (sha256
3293 (base32
3294 "08znbvqq5xknfhmpp3wcj574zvi4p7i8zifi67c9qw9a6ikp42fj"))
3295 (modules '((guix build utils)))
3296 (snippet
3297 ;; Delete bundled kseq.
3298 ;; TODO: Also delete bundled murmurhash and open bloom filter.
3299 '(delete-file "src/mash/kseq.h"))))
3300 (build-system gnu-build-system)
3301 (arguments
3302 `(#:tests? #f ; No tests.
3303 #:configure-flags
3304 (list
3305 (string-append "--with-capnp=" (assoc-ref %build-inputs "capnproto"))
3306 (string-append "--with-gsl=" (assoc-ref %build-inputs "gsl")))
3307 #:make-flags (list "CC=gcc")
3308 #:phases
3309 (modify-phases %standard-phases
3310 (add-after 'unpack 'fix-includes
3311 (lambda _
3312 (substitute* '("src/mash/Sketch.cpp" "src/mash/CommandFind.cpp")
3313 (("^#include \"kseq\\.h\"")
3314 "#include \"htslib/kseq.h\""))
3315 #t))
3316 (add-before 'configure 'autoconf
3317 (lambda _ (zero? (system* "autoconf")))))))
3318 (native-inputs
3319 `(("autoconf" ,autoconf)
3320 ;; Capnproto and htslib are statically embedded in the final
3321 ;; application. Therefore we also list their licenses, below.
3322 ("capnproto" ,capnproto)
3323 ("htslib" ,htslib)))
3324 (inputs
3325 `(("gsl" ,gsl)
3326 ("zlib" ,zlib)))
3327 (supported-systems '("x86_64-linux"))
3328 (home-page "https://mash.readthedocs.io")
3329 (synopsis "Fast genome and metagenome distance estimation using MinHash")
3330 (description "Mash is a fast sequence distance estimator that uses the
3331 MinHash algorithm and is designed to work with genomes and metagenomes in the
3332 form of assemblies or reads.")
3333 (license (list license:bsd-3 ; Mash
3334 license:expat ; HTSlib and capnproto
3335 license:public-domain ; MurmurHash 3
3336 license:cpl1.0)))) ; Open Bloom Filter
3337
3338 (define-public metabat
3339 ;; We package from a git commit because compilation of the released version
3340 ;; fails.
3341 (let ((commit "cbdca756993e66ae57e50a27970595dda9cbde1b"))
3342 (package
3343 (name "metabat")
3344 (version (string-append "0.32.4-1." (string-take commit 8)))
3345 (source
3346 (origin
3347 (method git-fetch)
3348 (uri (git-reference
3349 (url "https://bitbucket.org/berkeleylab/metabat.git")
3350 (commit commit)))
3351 (file-name (string-append name "-" version))
3352 (sha256
3353 (base32
3354 "0byia8nsip6zvc4ha0qkxkxxyjf4x7jcvy48q2dvb0pzr989syzr"))
3355 (patches (search-patches "metabat-remove-compilation-date.patch"))))
3356 (build-system gnu-build-system)
3357 (arguments
3358 `(#:phases
3359 (modify-phases %standard-phases
3360 (add-after 'unpack 'fix-includes
3361 (lambda _
3362 (substitute* "src/BamUtils.h"
3363 (("^#include \"bam/bam\\.h\"")
3364 "#include \"samtools/bam.h\"")
3365 (("^#include \"bam/sam\\.h\"")
3366 "#include \"samtools/sam.h\""))
3367 (substitute* "src/KseqReader.h"
3368 (("^#include \"bam/kseq\\.h\"")
3369 "#include \"htslib/kseq.h\""))
3370 #t))
3371 (add-after 'unpack 'fix-scons
3372 (lambda* (#:key inputs #:allow-other-keys)
3373 (substitute* "SConstruct"
3374 (("^htslib_dir = 'samtools'")
3375 (string-append "hitslib_dir = '"
3376 (assoc-ref inputs "htslib")
3377 "'"))
3378 (("^samtools_dir = 'samtools'")
3379 (string-append "samtools_dir = '"
3380 (assoc-ref inputs "htslib")
3381 "'"))
3382 (("^findStaticOrShared\\('bam', hts_lib")
3383 (string-append "findStaticOrShared('bam', '"
3384 (assoc-ref inputs "samtools")
3385 "/lib'"))
3386 ;; Do not distribute README.
3387 (("^env\\.Install\\(idir_prefix, 'README\\.md'\\)") ""))
3388 #t))
3389 (delete 'configure)
3390 (replace 'build
3391 (lambda* (#:key inputs outputs #:allow-other-keys)
3392 (mkdir (assoc-ref outputs "out"))
3393 (zero? (system* "scons"
3394 (string-append
3395 "PREFIX="
3396 (assoc-ref outputs "out"))
3397 (string-append
3398 "BOOST_ROOT="
3399 (assoc-ref inputs "boost"))
3400 "install"))))
3401 ;; Check and install are carried out during build phase.
3402 (delete 'check)
3403 (delete 'install))))
3404 (inputs
3405 `(("zlib" ,zlib)
3406 ("perl" ,perl)
3407 ("samtools" ,samtools)
3408 ("htslib" ,htslib)
3409 ("boost" ,boost)))
3410 (native-inputs
3411 `(("scons" ,scons)))
3412 (home-page "https://bitbucket.org/berkeleylab/metabat")
3413 (synopsis
3414 "Reconstruction of single genomes from complex microbial communities")
3415 (description
3416 "Grouping large genomic fragments assembled from shotgun metagenomic
3417 sequences to deconvolute complex microbial communities, or metagenome binning,
3418 enables the study of individual organisms and their interactions. MetaBAT is
3419 an automated metagenome binning software, which integrates empirical
3420 probabilistic distances of genome abundance and tetranucleotide frequency.")
3421 (license (license:non-copyleft "file://license.txt"
3422 "See license.txt in the distribution.")))))
3423
3424 (define-public minced
3425 (package
3426 (name "minced")
3427 (version "0.2.0")
3428 (source (origin
3429 (method url-fetch)
3430 (uri (string-append
3431 "https://github.com/ctSkennerton/minced/archive/"
3432 version ".tar.gz"))
3433 (file-name (string-append name "-" version ".tar.gz"))
3434 (sha256
3435 (base32
3436 "0wxmlsapxfpxfd3ps9636h7i2xy6la8i42mwh0j2lsky63h63jp1"))))
3437 (build-system gnu-build-system)
3438 (arguments
3439 `(#:test-target "test"
3440 #:phases
3441 (modify-phases %standard-phases
3442 (delete 'configure)
3443 (add-before 'check 'fix-test
3444 (lambda _
3445 ;; Fix test for latest version.
3446 (substitute* "t/Aquifex_aeolicus_VF5.expected"
3447 (("minced:0.1.6") "minced:0.2.0"))
3448 #t))
3449 (replace 'install ; No install target.
3450 (lambda* (#:key inputs outputs #:allow-other-keys)
3451 (let* ((out (assoc-ref outputs "out"))
3452 (bin (string-append out "/bin"))
3453 (wrapper (string-append bin "/minced")))
3454 ;; Minced comes with a wrapper script that tries to figure out where
3455 ;; it is located before running the JAR. Since these paths are known
3456 ;; to us, we build our own wrapper to avoid coreutils dependency.
3457 (install-file "minced.jar" bin)
3458 (with-output-to-file wrapper
3459 (lambda _
3460 (display
3461 (string-append
3462 "#!" (assoc-ref inputs "bash") "/bin/sh\n\n"
3463 (assoc-ref inputs "jre") "/bin/java -jar "
3464 bin "/minced.jar \"$@\"\n"))))
3465 (chmod wrapper #o555)))))))
3466 (native-inputs
3467 `(("jdk" ,icedtea "jdk")))
3468 (inputs
3469 `(("bash" ,bash)
3470 ("jre" ,icedtea "out")))
3471 (home-page "https://github.com/ctSkennerton/minced")
3472 (synopsis "Mining CRISPRs in Environmental Datasets")
3473 (description
3474 "MinCED is a program to find Clustered Regularly Interspaced Short
3475 Palindromic Repeats (CRISPRs) in DNA sequences. It can be used for
3476 unassembled metagenomic reads, but is mainly designed for full genomes and
3477 assembled metagenomic sequence.")
3478 (license license:gpl3+)))
3479
3480 (define-public miso
3481 (package
3482 (name "miso")
3483 (version "0.5.3")
3484 (source (origin
3485 (method url-fetch)
3486 (uri (string-append
3487 "https://pypi.python.org/packages/source/m/misopy/misopy-"
3488 version ".tar.gz"))
3489 (sha256
3490 (base32
3491 "0x446867az8ir0z8c1vjqffkp0ma37wm4sylixnkhgawllzx8v5w"))
3492 (modules '((guix build utils)))
3493 (snippet
3494 '(substitute* "setup.py"
3495 ;; Use setuptools, or else the executables are not
3496 ;; installed.
3497 (("distutils.core") "setuptools")
3498 ;; use "gcc" instead of "cc" for compilation
3499 (("^defines")
3500 "cc.set_executables(
3501 compiler='gcc',
3502 compiler_so='gcc',
3503 linker_exe='gcc',
3504 linker_so='gcc -shared'); defines")))))
3505 (build-system python-build-system)
3506 (arguments
3507 `(#:python ,python-2 ; only Python 2 is supported
3508 #:tests? #f)) ; no "test" target
3509 (inputs
3510 `(("samtools" ,samtools)
3511 ("python-numpy" ,python2-numpy)
3512 ("python-pysam" ,python2-pysam)
3513 ("python-scipy" ,python2-scipy)
3514 ("python-matplotlib" ,python2-matplotlib)))
3515 (native-inputs
3516 `(("python-mock" ,python2-mock) ;for tests
3517 ("python-pytz" ,python2-pytz))) ;for tests
3518 (home-page "http://genes.mit.edu/burgelab/miso/index.html")
3519 (synopsis "Mixture of Isoforms model for RNA-Seq isoform quantitation")
3520 (description
3521 "MISO (Mixture-of-Isoforms) is a probabilistic framework that quantitates
3522 the expression level of alternatively spliced genes from RNA-Seq data, and
3523 identifies differentially regulated isoforms or exons across samples. By
3524 modeling the generative process by which reads are produced from isoforms in
3525 RNA-Seq, the MISO model uses Bayesian inference to compute the probability
3526 that a read originated from a particular isoform.")
3527 (license license:gpl2)))
3528
3529 (define-public muscle
3530 (package
3531 (name "muscle")
3532 (version "3.8.1551")
3533 (source (origin
3534 (method url-fetch/tarbomb)
3535 (uri (string-append
3536 "http://www.drive5.com/muscle/muscle_src_"
3537 version ".tar.gz"))
3538 (sha256
3539 (base32
3540 "0bj8kj7sdizy3987zx6w7axihk40fk8rn76mpbqqjcnd64i5a367"))))
3541 (build-system gnu-build-system)
3542 (arguments
3543 `(#:make-flags (list "LDLIBS = -lm")
3544 #:phases
3545 (modify-phases %standard-phases
3546 (delete 'configure)
3547 (replace 'check
3548 ;; There are no tests, so just test if it runs.
3549 (lambda _ (zero? (system* "./muscle" "-version"))))
3550 (replace 'install
3551 (lambda* (#:key outputs #:allow-other-keys)
3552 (let* ((out (assoc-ref outputs "out"))
3553 (bin (string-append out "/bin")))
3554 (install-file "muscle" bin)))))))
3555 (home-page "http://www.drive5.com/muscle")
3556 (synopsis "Multiple sequence alignment program")
3557 (description
3558 "MUSCLE aims to be a fast and accurate multiple sequence alignment
3559 program for nucleotide and protein sequences.")
3560 ;; License information found in 'muscle -h' and usage.cpp.
3561 (license license:public-domain)))
3562
3563 (define-public newick-utils
3564 ;; There are no recent releases so we package from git.
3565 (let ((commit "da121155a977197cab9fbb15953ca1b40b11eb87"))
3566 (package
3567 (name "newick-utils")
3568 (version (string-append "1.6-1." (string-take commit 8)))
3569 (source (origin
3570 (method git-fetch)
3571 (uri (git-reference
3572 (url "https://github.com/tjunier/newick_utils.git")
3573 (commit commit)))
3574 (file-name (string-append name "-" version "-checkout"))
3575 (sha256
3576 (base32
3577 "1hkw21rq1mwf7xp0rmbb2gqc0i6p11108m69i7mr7xcjl268pxnb"))))
3578 (build-system gnu-build-system)
3579 (arguments
3580 `(#:phases
3581 (modify-phases %standard-phases
3582 (add-after 'unpack 'autoconf
3583 (lambda _ (zero? (system* "autoreconf" "-vif")))))))
3584 (inputs
3585 ;; XXX: TODO: Enable Lua and Guile bindings.
3586 ;; https://github.com/tjunier/newick_utils/issues/13
3587 `(("libxml2" ,libxml2)
3588 ("flex" ,flex)
3589 ("bison" ,bison)))
3590 (native-inputs
3591 `(("autoconf" ,autoconf)
3592 ("automake" ,automake)
3593 ("libtool" ,libtool)))
3594 (synopsis "Programs for working with newick format phylogenetic trees")
3595 (description
3596 "Newick-utils is a suite of utilities for processing phylogenetic trees
3597 in Newick format. Functions include re-rooting, extracting subtrees,
3598 trimming, pruning, condensing, drawing (ASCII graphics or SVG).")
3599 (home-page "https://github.com/tjunier/newick_utils")
3600 (license license:bsd-3))))
3601
3602 (define-public orfm
3603 (package
3604 (name "orfm")
3605 (version "0.6.1")
3606 (source (origin
3607 (method url-fetch)
3608 (uri (string-append
3609 "https://github.com/wwood/OrfM/releases/download/v"
3610 version "/orfm-" version ".tar.gz"))
3611 (sha256
3612 (base32
3613 "19hwp13n82isdvk16710l9m35cmzf0q3fsrcn3r8c5r67biiz39s"))))
3614 (build-system gnu-build-system)
3615 (inputs `(("zlib" ,zlib)))
3616 (native-inputs
3617 `(("ruby-bio-commandeer" ,ruby-bio-commandeer)
3618 ("ruby-rspec" ,ruby-rspec)
3619 ("ruby" ,ruby)))
3620 (synopsis "Simple and not slow open reading frame (ORF) caller")
3621 (description
3622 "An ORF caller finds stretches of DNA that, when translated, are not
3623 interrupted by stop codons. OrfM finds and prints these ORFs.")
3624 (home-page "https://github.com/wwood/OrfM")
3625 (license license:lgpl3+)))
3626
3627 (define-public python2-pbcore
3628 (package
3629 (name "python2-pbcore")
3630 (version "1.2.10")
3631 (source (origin
3632 (method url-fetch)
3633 (uri (pypi-uri "pbcore" version))
3634 (sha256
3635 (base32
3636 "1kjmv891d6qbpp4shhhvkl02ff4q5xlpnls2513sm2cjcrs52f1i"))))
3637 (build-system python-build-system)
3638 (arguments `(#:python ,python-2)) ; pbcore requires Python 2.7
3639 (propagated-inputs
3640 `(("python-cython" ,python2-cython)
3641 ("python-numpy" ,python2-numpy)
3642 ("python-pysam" ,python2-pysam)
3643 ("python-h5py" ,python2-h5py)))
3644 (native-inputs
3645 `(("python-nose" ,python2-nose)
3646 ("python-sphinx" ,python2-sphinx)
3647 ("python-pyxb" ,python2-pyxb)))
3648 (home-page "http://pacificbiosciences.github.io/pbcore/")
3649 (synopsis "Library for reading and writing PacBio data files")
3650 (description
3651 "The pbcore package provides Python APIs for interacting with PacBio data
3652 files and writing bioinformatics applications.")
3653 (license license:bsd-3)))
3654
3655 (define-public python2-warpedlmm
3656 (package
3657 (name "python2-warpedlmm")
3658 (version "0.21")
3659 (source
3660 (origin
3661 (method url-fetch)
3662 (uri (string-append
3663 "https://pypi.python.org/packages/source/W/WarpedLMM/WarpedLMM-"
3664 version ".zip"))
3665 (sha256
3666 (base32
3667 "1agfz6zqa8nc6cw47yh0s3y14gkpa9wqazwcj7mwwj3ffnw39p3j"))))
3668 (build-system python-build-system)
3669 (arguments
3670 `(#:python ,python-2)) ; requires Python 2.7
3671 (propagated-inputs
3672 `(("python-scipy" ,python2-scipy)
3673 ("python-numpy" ,python2-numpy)
3674 ("python-matplotlib" ,python2-matplotlib)
3675 ("python-fastlmm" ,python2-fastlmm)
3676 ("python-pandas" ,python2-pandas)
3677 ("python-pysnptools" ,python2-pysnptools)))
3678 (native-inputs
3679 `(("python-mock" ,python2-mock)
3680 ("python-nose" ,python2-nose)
3681 ("unzip" ,unzip)))
3682 (home-page "https://github.com/PMBio/warpedLMM")
3683 (synopsis "Implementation of warped linear mixed models")
3684 (description
3685 "WarpedLMM is a Python implementation of the warped linear mixed model,
3686 which automatically learns an optimal warping function (or transformation) for
3687 the phenotype as it models the data.")
3688 (license license:asl2.0)))
3689
3690 (define-public pbtranscript-tofu
3691 (let ((commit "8f5467fe6a4472bcfb4226c8720993c8507adfe4"))
3692 (package
3693 (name "pbtranscript-tofu")
3694 (version (string-append "2.2.3." (string-take commit 7)))
3695 (source (origin
3696 (method git-fetch)
3697 (uri (git-reference
3698 (url "https://github.com/PacificBiosciences/cDNA_primer.git")
3699 (commit commit)))
3700 (file-name (string-append name "-" version "-checkout"))
3701 (sha256
3702 (base32
3703 "1lgnpi35ihay42qx0b6yl3kkgra723i413j33kvs0kvs61h82w0f"))
3704 (modules '((guix build utils)))
3705 (snippet
3706 '(begin
3707 ;; remove bundled Cython sources
3708 (delete-file "pbtranscript-tofu/pbtranscript/Cython-0.20.1.tar.gz")
3709 #t))))
3710 (build-system python-build-system)
3711 (arguments
3712 `(#:python ,python-2
3713 ;; FIXME: Tests fail with "No such file or directory:
3714 ;; pbtools/pbtranscript/modified_bx_intervals/intersection_unique.so"
3715 #:tests? #f
3716 #:phases
3717 (modify-phases %standard-phases
3718 (add-after 'unpack 'enter-directory
3719 (lambda _
3720 (chdir "pbtranscript-tofu/pbtranscript/")
3721 #t))
3722 ;; With setuptools version 18.0 and later this setup.py hack causes
3723 ;; a build error, so we disable it.
3724 (add-after 'enter-directory 'patch-setuppy
3725 (lambda _
3726 (substitute* "setup.py"
3727 (("if 'setuptools.extension' in sys.modules:")
3728 "if False:"))
3729 #t)))))
3730 (inputs
3731 `(("python-numpy" ,python2-numpy)
3732 ("python-bx-python" ,python2-bx-python)
3733 ("python-networkx" ,python2-networkx)
3734 ("python-scipy" ,python2-scipy)
3735 ("python-pbcore" ,python2-pbcore)
3736 ("python-h5py" ,python2-h5py)))
3737 (native-inputs
3738 `(("python-cython" ,python2-cython)
3739 ("python-nose" ,python2-nose)))
3740 (home-page "https://github.com/PacificBiosciences/cDNA_primer")
3741 (synopsis "Analyze transcriptome data generated with the Iso-Seq protocol")
3742 (description
3743 "pbtranscript-tofu contains scripts to analyze transcriptome data
3744 generated using the PacBio Iso-Seq protocol.")
3745 (license license:bsd-3))))
3746
3747 (define-public prank
3748 (package
3749 (name "prank")
3750 (version "150803")
3751 (source (origin
3752 (method url-fetch)
3753 (uri (string-append
3754 "http://wasabiapp.org/download/prank/prank.source."
3755 version ".tgz"))
3756 (sha256
3757 (base32
3758 "0am4z94fs3w2n5xpfls9zda61vq7qqz4q2i7b9hlsxz5q4j3kfm4"))))
3759 (build-system gnu-build-system)
3760 (arguments
3761 `(#:phases
3762 (modify-phases %standard-phases
3763 (add-after 'unpack 'enter-src-dir
3764 (lambda _
3765 (chdir "src")
3766 #t))
3767 (add-after 'unpack 'remove-m64-flag
3768 ;; Prank will build with the correct 'bit-ness' without this flag
3769 ;; and this allows building on 32-bit machines.
3770 (lambda _ (substitute* "src/Makefile"
3771 (("-m64") ""))
3772 #t))
3773 (delete 'configure)
3774 (replace 'install
3775 (lambda* (#:key outputs #:allow-other-keys)
3776 (let* ((out (assoc-ref outputs "out"))
3777 (bin (string-append out "/bin"))
3778 (man (string-append out "/share/man/man1"))
3779 (path (string-append
3780 (assoc-ref %build-inputs "mafft") "/bin:"
3781 (assoc-ref %build-inputs "exonerate") "/bin:"
3782 (assoc-ref %build-inputs "bppsuite") "/bin")))
3783 (install-file "prank" bin)
3784 (wrap-program (string-append bin "/prank")
3785 `("PATH" ":" prefix (,path)))
3786 (install-file "prank.1" man))
3787 #t)))))
3788 (inputs
3789 `(("mafft" ,mafft)
3790 ("exonerate" ,exonerate)
3791 ("bppsuite" ,bppsuite)))
3792 (home-page "http://wasabiapp.org/software/prank/")
3793 (synopsis "Probabilistic multiple sequence alignment program")
3794 (description
3795 "PRANK is a probabilistic multiple sequence alignment program for DNA,
3796 codon and amino-acid sequences. It is based on a novel algorithm that treats
3797 insertions correctly and avoids over-estimation of the number of deletion
3798 events. In addition, PRANK borrows ideas from maximum likelihood methods used
3799 in phylogenetics and correctly takes into account the evolutionary distances
3800 between sequences. Lastly, PRANK allows for defining a potential structure
3801 for sequences to be aligned and then, simultaneously with the alignment,
3802 predicts the locations of structural units in the sequences.")
3803 (license license:gpl2+)))
3804
3805 (define-public proteinortho
3806 (package
3807 (name "proteinortho")
3808 (version "5.15")
3809 (source
3810 (origin
3811 (method url-fetch)
3812 (uri
3813 (string-append
3814 "http://www.bioinf.uni-leipzig.de/Software/proteinortho/proteinortho_v"
3815 version "_src.tar.gz"))
3816 (sha256
3817 (base32
3818 "05wacnnbx56avpcwhzlcf6b7s77swcpv3qnwz5sh1z54i51gg2ki"))))
3819 (build-system gnu-build-system)
3820 (arguments
3821 `(#:test-target "test"
3822 #:phases
3823 (modify-phases %standard-phases
3824 (replace 'configure
3825 ;; There is no configure script, so we modify the Makefile directly.
3826 (lambda* (#:key outputs #:allow-other-keys)
3827 (substitute* "Makefile"
3828 (("INSTALLDIR=.*")
3829 (string-append
3830 "INSTALLDIR=" (assoc-ref outputs "out") "/bin\n")))
3831 #t))
3832 (add-before 'install 'make-install-directory
3833 ;; The install directory is not created during 'make install'.
3834 (lambda* (#:key outputs #:allow-other-keys)
3835 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
3836 #t))
3837 (add-after 'install 'wrap-programs
3838 (lambda* (#:key inputs outputs #:allow-other-keys)
3839 (let* ((path (getenv "PATH"))
3840 (out (assoc-ref outputs "out"))
3841 (binary (string-append out "/bin/proteinortho5.pl")))
3842 (wrap-program binary `("PATH" ":" prefix (,path))))
3843 #t)))))
3844 (inputs
3845 `(("perl" ,perl)
3846 ("python" ,python-2)
3847 ("blast+" ,blast+)))
3848 (home-page "http://www.bioinf.uni-leipzig.de/Software/proteinortho")
3849 (synopsis "Detect orthologous genes across species")
3850 (description
3851 "Proteinortho is a tool to detect orthologous genes across different
3852 species. For doing so, it compares similarities of given gene sequences and
3853 clusters them to find significant groups. The algorithm was designed to handle
3854 large-scale data and can be applied to hundreds of species at once.")
3855 (license license:gpl2+)))
3856
3857 (define-public pyicoteo
3858 (package
3859 (name "pyicoteo")
3860 (version "2.0.7")
3861 (source
3862 (origin
3863 (method url-fetch)
3864 (uri (string-append "https://bitbucket.org/regulatorygenomicsupf/"
3865 "pyicoteo/get/v" version ".tar.bz2"))
3866 (file-name (string-append name "-" version ".tar.bz2"))
3867 (sha256
3868 (base32
3869 "0d6087f29xp8wxwlj111c3sylli98n0l8ry58c51ixzq0zfm50wa"))))
3870 (build-system python-build-system)
3871 (arguments
3872 `(#:python ,python-2 ; does not work with Python 3
3873 #:tests? #f)) ; there are no tests
3874 (inputs
3875 `(("python2-matplotlib" ,python2-matplotlib)))
3876 (home-page "https://bitbucket.org/regulatorygenomicsupf/pyicoteo")
3877 (synopsis "Analyze high-throughput genetic sequencing data")
3878 (description
3879 "Pyicoteo is a suite of tools for the analysis of high-throughput genetic
3880 sequencing data. It works with genomic coordinates. There are currently six
3881 different command-line tools:
3882
3883 @enumerate
3884 @item pyicoregion: for generating exploratory regions automatically;
3885 @item pyicoenrich: for differential enrichment between two conditions;
3886 @item pyicoclip: for calling CLIP-Seq peaks without a control;
3887 @item pyicos: for genomic coordinates manipulation;
3888 @item pyicoller: for peak calling on punctuated ChIP-Seq;
3889 @item pyicount: to count how many reads from N experiment files overlap in a
3890 region file;
3891 @item pyicotrocol: to combine operations from pyicoteo.
3892 @end enumerate\n")
3893 (license license:gpl3+)))
3894
3895 (define-public prodigal
3896 (package
3897 (name "prodigal")
3898 (version "2.6.3")
3899 (source (origin
3900 (method url-fetch)
3901 (uri (string-append
3902 "https://github.com/hyattpd/Prodigal/archive/v"
3903 version ".tar.gz"))
3904 (file-name (string-append name "-" version ".tar.gz"))
3905 (sha256
3906 (base32
3907 "17srxkqd3jc77xk15pfbgg1a9xahqg7337w95mrsia7mpza4l2c9"))))
3908 (build-system gnu-build-system)
3909 (arguments
3910 `(#:tests? #f ;no check target
3911 #:make-flags (list (string-append "INSTALLDIR="
3912 (assoc-ref %outputs "out")
3913 "/bin"))
3914 #:phases
3915 (modify-phases %standard-phases
3916 (delete 'configure))))
3917 (home-page "http://prodigal.ornl.gov")
3918 (synopsis "Protein-coding gene prediction for Archaea and Bacteria")
3919 (description
3920 "Prodigal runs smoothly on finished genomes, draft genomes, and
3921 metagenomes, providing gene predictions in GFF3, Genbank, or Sequin table
3922 format. It runs quickly, in an unsupervised fashion, handles gaps, handles
3923 partial genes, and identifies translation initiation sites.")
3924 (license license:gpl3+)))
3925
3926 (define-public roary
3927 (package
3928 (name "roary")
3929 (version "3.7.0")
3930 (source
3931 (origin
3932 (method url-fetch)
3933 (uri (string-append
3934 "mirror://cpan/authors/id/A/AJ/AJPAGE/Bio-Roary-"
3935 version ".tar.gz"))
3936 (sha256
3937 (base32
3938 "0x2hpb3nfsc6x2nq1788w0fhqfzc7cn2dp4xwyva9m3k6xlz0m43"))))
3939 (build-system perl-build-system)
3940 (arguments
3941 `(#:phases
3942 (modify-phases %standard-phases
3943 (delete 'configure)
3944 (delete 'build)
3945 (replace 'check
3946 (lambda _
3947 ;; The tests are not run by default, so we run each test file
3948 ;; directly.
3949 (setenv "PATH" (string-append (getcwd) "/bin" ":"
3950 (getenv "PATH")))
3951 (setenv "PERL5LIB" (string-append (getcwd) "/lib" ":"
3952 (getenv "PERL5LIB")))
3953 (zero? (length (filter (lambda (file)
3954 (display file)(display "\n")
3955 (not (zero? (system* "perl" file))))
3956 (find-files "t" ".*\\.t$"))))))
3957 (replace 'install
3958 ;; There is no 'install' target in the Makefile.
3959 (lambda* (#:key outputs #:allow-other-keys)
3960 (let* ((out (assoc-ref outputs "out"))
3961 (bin (string-append out "/bin"))
3962 (perl (string-append out "/lib/perl5/site_perl"))
3963 (roary-plots "contrib/roary_plots"))
3964 (mkdir-p bin)
3965 (mkdir-p perl)
3966 (copy-recursively "bin" bin)
3967 (copy-recursively "lib" perl)
3968 #t)))
3969 (add-after 'install 'wrap-programs
3970 (lambda* (#:key inputs outputs #:allow-other-keys)
3971 (let* ((out (assoc-ref outputs "out"))
3972 (perl5lib (getenv "PERL5LIB"))
3973 (path (getenv "PATH")))
3974 (for-each (lambda (prog)
3975 (let ((binary (string-append out "/" prog)))
3976 (wrap-program binary
3977 `("PERL5LIB" ":" prefix
3978 (,(string-append perl5lib ":" out
3979 "/lib/perl5/site_perl"))))
3980 (wrap-program binary
3981 `("PATH" ":" prefix
3982 (,(string-append path ":" out "/bin"))))))
3983 (find-files "bin" ".*[^R]$"))
3984 (let ((file
3985 (string-append out "/bin/roary-create_pan_genome_plots.R"))
3986 (r-site-lib (getenv "R_LIBS_SITE"))
3987 (coreutils-path
3988 (string-append (assoc-ref inputs "coreutils") "/bin")))
3989 (wrap-program file
3990 `("R_LIBS_SITE" ":" prefix
3991 (,(string-append r-site-lib ":" out "/site-library/"))))
3992 (wrap-program file
3993 `("PATH" ":" prefix
3994 (,(string-append coreutils-path ":" out "/bin"))))))
3995 #t)))))
3996 (native-inputs
3997 `(("perl-env-path" ,perl-env-path)
3998 ("perl-test-files" ,perl-test-files)
3999 ("perl-test-most" ,perl-test-most)
4000 ("perl-test-output" ,perl-test-output)))
4001 (inputs
4002 `(("perl-array-utils" ,perl-array-utils)
4003 ("bioperl" ,bioperl-minimal)
4004 ("perl-exception-class" ,perl-exception-class)
4005 ("perl-file-find-rule" ,perl-file-find-rule)
4006 ("perl-file-grep" ,perl-file-grep)
4007 ("perl-file-slurper" ,perl-file-slurper)
4008 ("perl-file-which" ,perl-file-which)
4009 ("perl-graph" ,perl-graph)
4010 ("perl-graph-readwrite" ,perl-graph-readwrite)
4011 ("perl-log-log4perl" ,perl-log-log4perl)
4012 ("perl-moose" ,perl-moose)
4013 ("perl-perlio-utf8_strict" ,perl-perlio-utf8_strict)
4014 ("perl-text-csv" ,perl-text-csv)
4015 ("bedtools" ,bedtools)
4016 ("cd-hit" ,cd-hit)
4017 ("blast+" ,blast+)
4018 ("mcl" ,mcl)
4019 ("parallel" ,parallel)
4020 ("prank" ,prank)
4021 ("mafft" ,mafft)
4022 ("fasttree" ,fasttree)
4023 ("grep" ,grep)
4024 ("sed" ,sed)
4025 ("gawk" ,gawk)
4026 ("r-minimal" ,r-minimal)
4027 ("r-ggplot2" ,r-ggplot2)
4028 ("coreutils" ,coreutils)))
4029 (home-page "http://sanger-pathogens.github.io/Roary")
4030 (synopsis "High speed stand-alone pan genome pipeline")
4031 (description
4032 "Roary is a high speed stand alone pan genome pipeline, which takes
4033 annotated assemblies in GFF3 format (produced by the Prokka program) and
4034 calculates the pan genome. Using a standard desktop PC, it can analyse
4035 datasets with thousands of samples, without compromising the quality of the
4036 results. 128 samples can be analysed in under 1 hour using 1 GB of RAM and a
4037 single processor. Roary is not intended for metagenomics or for comparing
4038 extremely diverse sets of genomes.")
4039 (license license:gpl3)))
4040
4041 (define-public raxml
4042 (package
4043 (name "raxml")
4044 (version "8.2.10")
4045 (source
4046 (origin
4047 (method url-fetch)
4048 (uri
4049 (string-append
4050 "https://github.com/stamatak/standard-RAxML/archive/v"
4051 version ".tar.gz"))
4052 (file-name (string-append name "-" version ".tar.gz"))
4053 (sha256
4054 (base32
4055 "13s7aspfdcfr6asynwdg1x6vznys6pzap5f8wsffbnnwpkkg9ya8"))))
4056 (build-system gnu-build-system)
4057 (arguments
4058 `(#:tests? #f ; There are no tests.
4059 ;; Use 'standard' Makefile rather than SSE or AVX ones.
4060 #:make-flags (list "-f" "Makefile.HYBRID.gcc")
4061 #:phases
4062 (modify-phases %standard-phases
4063 (delete 'configure)
4064 (replace 'install
4065 (lambda* (#:key outputs #:allow-other-keys)
4066 (let* ((out (assoc-ref outputs "out"))
4067 (bin (string-append out "/bin"))
4068 (executable "raxmlHPC-HYBRID"))
4069 (install-file executable bin)
4070 (symlink (string-append bin "/" executable) "raxml"))
4071 #t)))))
4072 (inputs
4073 `(("openmpi" ,openmpi)))
4074 (home-page "http://sco.h-its.org/exelixis/web/software/raxml/index.html")
4075 (synopsis "Randomized Axelerated Maximum Likelihood phylogenetic trees")
4076 (description
4077 "RAxML is a tool for phylogenetic analysis and post-analysis of large
4078 phylogenies.")
4079 (license license:gpl2+)))
4080
4081 (define-public rsem
4082 (package
4083 (name "rsem")
4084 (version "1.2.20")
4085 (source
4086 (origin
4087 (method url-fetch)
4088 (uri
4089 (string-append "http://deweylab.biostat.wisc.edu/rsem/src/rsem-"
4090 version ".tar.gz"))
4091 (sha256
4092 (base32 "0nzdc0j0hjllhsd5f2xli95dafm3nawskigs140xzvjk67xh0r9q"))
4093 (patches (search-patches "rsem-makefile.patch"))
4094 (modules '((guix build utils)))
4095 (snippet
4096 '(begin
4097 ;; remove bundled copy of boost
4098 (delete-file-recursively "boost")
4099 #t))))
4100 (build-system gnu-build-system)
4101 (arguments
4102 `(#:tests? #f ;no "check" target
4103 #:phases
4104 (modify-phases %standard-phases
4105 ;; No "configure" script.
4106 ;; Do not build bundled samtools library.
4107 (replace 'configure
4108 (lambda _
4109 (substitute* "Makefile"
4110 (("^all : sam/libbam.a") "all : "))
4111 #t))
4112 (replace 'install
4113 (lambda* (#:key outputs #:allow-other-keys)
4114 (let* ((out (string-append (assoc-ref outputs "out")))
4115 (bin (string-append out "/bin/"))
4116 (perl (string-append out "/lib/perl5/site_perl")))
4117 (mkdir-p bin)
4118 (mkdir-p perl)
4119 (for-each (lambda (file)
4120 (install-file file bin))
4121 (find-files "." "rsem-.*"))
4122 (install-file "rsem_perl_utils.pm" perl))
4123 #t))
4124 (add-after
4125 'install 'wrap-program
4126 (lambda* (#:key outputs #:allow-other-keys)
4127 (let ((out (assoc-ref outputs "out")))
4128 (for-each (lambda (prog)
4129 (wrap-program (string-append out "/bin/" prog)
4130 `("PERL5LIB" ":" prefix
4131 (,(string-append out "/lib/perl5/site_perl")))))
4132 '("rsem-plot-transcript-wiggles"
4133 "rsem-calculate-expression"
4134 "rsem-generate-ngvector"
4135 "rsem-run-ebseq"
4136 "rsem-prepare-reference")))
4137 #t)))))
4138 (inputs
4139 `(("boost" ,boost)
4140 ("ncurses" ,ncurses)
4141 ("r-minimal" ,r-minimal)
4142 ("perl" ,perl)
4143 ("samtools" ,samtools-0.1)
4144 ("zlib" ,zlib)))
4145 (home-page "http://deweylab.biostat.wisc.edu/rsem/")
4146 (synopsis "Estimate gene expression levels from RNA-Seq data")
4147 (description
4148 "RSEM is a software package for estimating gene and isoform expression
4149 levels from RNA-Seq data. The RSEM package provides a user-friendly
4150 interface, supports threads for parallel computation of the EM algorithm,
4151 single-end and paired-end read data, quality scores, variable-length reads and
4152 RSPD estimation. In addition, it provides posterior mean and 95% credibility
4153 interval estimates for expression levels. For visualization, it can generate
4154 BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.")
4155 (license license:gpl3+)))
4156
4157 (define-public rseqc
4158 (package
4159 (name "rseqc")
4160 (version "2.6.1")
4161 (source
4162 (origin
4163 (method url-fetch)
4164 (uri
4165 (string-append "mirror://sourceforge/rseqc/"
4166 "RSeQC-" version ".tar.gz"))
4167 (sha256
4168 (base32 "15ly0254yi032qzkdplg00q144qfdsd986gh62829rl5bkxhj330"))
4169 (modules '((guix build utils)))
4170 (snippet
4171 '(begin
4172 ;; remove bundled copy of pysam
4173 (delete-file-recursively "lib/pysam")
4174 (substitute* "setup.py"
4175 ;; remove dependency on outdated "distribute" module
4176 (("^from distribute_setup import use_setuptools") "")
4177 (("^use_setuptools\\(\\)") "")
4178 ;; do not use bundled copy of pysam
4179 (("^have_pysam = False") "have_pysam = True"))))))
4180 (build-system python-build-system)
4181 (arguments `(#:python ,python-2))
4182 (inputs
4183 `(("python-cython" ,python2-cython)
4184 ("python-pysam" ,python2-pysam)
4185 ("python-numpy" ,python2-numpy)
4186 ("zlib" ,zlib)))
4187 (native-inputs
4188 `(("python-nose" ,python2-nose)))
4189 (home-page "http://rseqc.sourceforge.net/")
4190 (synopsis "RNA-seq quality control package")
4191 (description
4192 "RSeQC provides a number of modules that can comprehensively evaluate
4193 high throughput sequence data, especially RNA-seq data. Some basic modules
4194 inspect sequence quality, nucleotide composition bias, PCR bias and GC bias,
4195 while RNA-seq specific modules evaluate sequencing saturation, mapped reads
4196 distribution, coverage uniformity, strand specificity, etc.")
4197 (license license:gpl3+)))
4198
4199 (define-public seek
4200 ;; There are no release tarballs. According to the installation
4201 ;; instructions at http://seek.princeton.edu/installation.jsp, the latest
4202 ;; stable release is identified by this changeset ID.
4203 (let ((changeset "2329130")
4204 (revision "1"))
4205 (package
4206 (name "seek")
4207 (version (string-append "0-" revision "." changeset))
4208 (source (origin
4209 (method hg-fetch)
4210 (uri (hg-reference
4211 (url "https://bitbucket.org/libsleipnir/sleipnir")
4212 (changeset changeset)))
4213 (sha256
4214 (base32
4215 "0qrvilwh18dpbhkf92qvxbmay0j75ra3jg2wrhz67gf538zzphsx"))))
4216 (build-system gnu-build-system)
4217 (arguments
4218 `(#:modules ((srfi srfi-1)
4219 (guix build gnu-build-system)
4220 (guix build utils))
4221 #:phases
4222 (let ((dirs '("SeekMiner"
4223 "SeekEvaluator"
4224 "SeekPrep"
4225 "Distancer"
4226 "Data2DB"
4227 "PCL2Bin")))
4228 (modify-phases %standard-phases
4229 (add-before 'configure 'bootstrap
4230 (lambda _
4231 (zero? (system* "bash" "gen_auto"))))
4232 (add-after 'build 'build-additional-tools
4233 (lambda* (#:key make-flags #:allow-other-keys)
4234 (every (lambda (dir)
4235 (with-directory-excursion (string-append "tools/" dir)
4236 (zero? (apply system* "make" make-flags))))
4237 dirs)))
4238 (add-after 'install 'install-additional-tools
4239 (lambda* (#:key make-flags #:allow-other-keys)
4240 (fold (lambda (dir result)
4241 (with-directory-excursion (string-append "tools/" dir)
4242 (and result
4243 (zero? (apply system*
4244 `("make" ,@make-flags "install"))))))
4245 #t dirs)))))))
4246 (inputs
4247 `(("gsl" ,gsl)
4248 ("boost" ,boost)
4249 ("libsvm" ,libsvm)
4250 ("readline" ,readline)
4251 ("gengetopt" ,gengetopt)
4252 ("log4cpp" ,log4cpp)))
4253 (native-inputs
4254 `(("autoconf" ,autoconf)
4255 ("automake" ,automake)
4256 ("perl" ,perl)))
4257 (home-page "http://seek.princeton.edu")
4258 (synopsis "Gene co-expression search engine")
4259 (description
4260 "SEEK is a computational gene co-expression search engine. SEEK provides
4261 biologists with a way to navigate the massive human expression compendium that
4262 now contains thousands of expression datasets. SEEK returns a robust ranking
4263 of co-expressed genes in the biological area of interest defined by the user's
4264 query genes. It also prioritizes thousands of expression datasets according
4265 to the user's query of interest.")
4266 (license license:cc-by3.0))))
4267
4268 (define-public samtools
4269 (package
4270 (name "samtools")
4271 (version "1.3.1")
4272 (source
4273 (origin
4274 (method url-fetch)
4275 (uri
4276 (string-append "mirror://sourceforge/samtools/samtools/"
4277 version "/samtools-" version ".tar.bz2"))
4278 (sha256
4279 (base32
4280 "0znnnxc467jbf1as2dpskrjhfh8mbll760j6w6rdkwlwbqsp8gbc"))))
4281 (build-system gnu-build-system)
4282 (arguments
4283 `(#:modules ((ice-9 ftw)
4284 (ice-9 regex)
4285 (guix build gnu-build-system)
4286 (guix build utils))
4287 #:make-flags (list (string-append "prefix=" (assoc-ref %outputs "out")))
4288 #:configure-flags (list "--with-ncurses")
4289 #:phases
4290 (alist-cons-after
4291 'unpack 'patch-tests
4292 (lambda _
4293 (substitute* "test/test.pl"
4294 ;; The test script calls out to /bin/bash
4295 (("/bin/bash") (which "bash")))
4296 #t)
4297 (alist-cons-after
4298 'install 'install-library
4299 (lambda* (#:key outputs #:allow-other-keys)
4300 (let ((lib (string-append (assoc-ref outputs "out") "/lib")))
4301 (install-file "libbam.a" lib)
4302 #t))
4303 (alist-cons-after
4304 'install 'install-headers
4305 (lambda* (#:key outputs #:allow-other-keys)
4306 (let ((include (string-append (assoc-ref outputs "out")
4307 "/include/samtools/")))
4308 (for-each (lambda (file)
4309 (install-file file include))
4310 (scandir "." (lambda (name) (string-match "\\.h$" name))))
4311 #t))
4312 %standard-phases)))))
4313 (native-inputs `(("pkg-config" ,pkg-config)))
4314 (inputs `(("ncurses" ,ncurses)
4315 ("perl" ,perl)
4316 ("python" ,python)
4317 ("zlib" ,zlib)))
4318 (home-page "http://samtools.sourceforge.net")
4319 (synopsis "Utilities to efficiently manipulate nucleotide sequence alignments")
4320 (description
4321 "Samtools implements various utilities for post-processing nucleotide
4322 sequence alignments in the SAM, BAM, and CRAM formats, including indexing,
4323 variant calling (in conjunction with bcftools), and a simple alignment
4324 viewer.")
4325 (license license:expat)))
4326
4327 (define-public samtools-0.1
4328 ;; This is the most recent version of the 0.1 line of samtools. The input
4329 ;; and output formats differ greatly from that used and produced by samtools
4330 ;; 1.x and is still used in many bioinformatics pipelines.
4331 (package (inherit samtools)
4332 (version "0.1.19")
4333 (source
4334 (origin
4335 (method url-fetch)
4336 (uri
4337 (string-append "mirror://sourceforge/samtools/samtools/"
4338 version "/samtools-" version ".tar.bz2"))
4339 (sha256
4340 (base32 "1m33xsfwz0s8qi45lylagfllqg7fphf4dr0780rsvw75av9wk06h"))))
4341 (arguments
4342 `(#:tests? #f ;no "check" target
4343 ,@(substitute-keyword-arguments (package-arguments samtools)
4344 ((#:make-flags flags)
4345 `(cons "LIBCURSES=-lncurses" ,flags))
4346 ((#:phases phases)
4347 `(modify-phases ,phases
4348 (replace 'install
4349 (lambda* (#:key outputs #:allow-other-keys)
4350 (let ((bin (string-append
4351 (assoc-ref outputs "out") "/bin")))
4352 (mkdir-p bin)
4353 (install-file "samtools" bin)
4354 #t)))
4355 (delete 'patch-tests)
4356 (delete 'configure))))))))
4357
4358 (define-public mosaik
4359 (let ((commit "5c25216d3522d6a33e53875cd76a6d65001e4e67"))
4360 (package
4361 (name "mosaik")
4362 (version "2.2.30")
4363 (source (origin
4364 ;; There are no release tarballs nor tags.
4365 (method git-fetch)
4366 (uri (git-reference
4367 (url "https://github.com/wanpinglee/MOSAIK.git")
4368 (commit commit)))
4369 (file-name (string-append name "-" version))
4370 (sha256
4371 (base32
4372 "17gj3s07cm77r41z92awh0bim7w7q7fbn0sf5nkqmcm1vw052qgw"))))
4373 (build-system gnu-build-system)
4374 (arguments
4375 `(#:tests? #f ; no tests
4376 #:make-flags (list "CC=gcc")
4377 #:phases
4378 (modify-phases %standard-phases
4379 (replace 'configure
4380 (lambda _ (chdir "src") #t))
4381 (replace 'install
4382 (lambda* (#:key outputs #:allow-other-keys)
4383 (let ((bin (string-append (assoc-ref outputs "out")
4384 "/bin")))
4385 (mkdir-p bin)
4386 (copy-recursively "../bin" bin)
4387 #t))))))
4388 (inputs
4389 `(("perl" ,perl)
4390 ("zlib" ,zlib)))
4391 (supported-systems '("x86_64-linux"))
4392 (home-page "https://github.com/wanpinglee/MOSAIK")
4393 (synopsis "Map nucleotide sequence reads to reference genomes")
4394 (description
4395 "MOSAIK is a program for mapping second and third-generation sequencing
4396 reads to a reference genome. MOSAIK can align reads generated by all the
4397 major sequencing technologies, including Illumina, Applied Biosystems SOLiD,
4398 Roche 454, Ion Torrent and Pacific BioSciences SMRT.")
4399 ;; MOSAIK is released under the GPLv2+ with the exception of third-party
4400 ;; code released into the public domain:
4401 ;; 1. fastlz by Ariya Hidayat - http://www.fastlz.org/
4402 ;; 2. MD5 implementation - RSA Data Security, RFC 1321
4403 (license (list license:gpl2+ license:public-domain)))))
4404
4405 (define-public ngs-sdk
4406 (package
4407 (name "ngs-sdk")
4408 (version "1.3.0")
4409 (source
4410 (origin
4411 (method url-fetch)
4412 (uri
4413 (string-append "https://github.com/ncbi/ngs/archive/"
4414 version ".tar.gz"))
4415 (file-name (string-append name "-" version ".tar.gz"))
4416 (sha256
4417 (base32
4418 "1wiyf4c6nm2j87pv015cbi0qny5byf3pbvcw3likifz5dl56ag40"))))
4419 (build-system gnu-build-system)
4420 (arguments
4421 `(#:parallel-build? #f ; not supported
4422 #:tests? #f ; no "check" target
4423 #:phases
4424 (alist-replace
4425 'configure
4426 (lambda* (#:key outputs #:allow-other-keys)
4427 (let ((out (assoc-ref outputs "out")))
4428 ;; The 'configure' script doesn't recognize things like
4429 ;; '--enable-fast-install'.
4430 (zero? (system* "./configure"
4431 (string-append "--build-prefix=" (getcwd) "/build")
4432 (string-append "--prefix=" out)))))
4433 (alist-cons-after
4434 'unpack 'enter-dir
4435 (lambda _ (chdir "ngs-sdk") #t)
4436 %standard-phases))))
4437 (native-inputs `(("perl" ,perl)))
4438 ;; According to the test
4439 ;; unless ($MARCH =~ /x86_64/i || $MARCH =~ /i?86/i)
4440 ;; in ngs-sdk/setup/konfigure.perl
4441 (supported-systems '("i686-linux" "x86_64-linux"))
4442 (home-page "https://github.com/ncbi/ngs")
4443 (synopsis "API for accessing Next Generation Sequencing data")
4444 (description
4445 "NGS is a domain-specific API for accessing reads, alignments and pileups
4446 produced from Next Generation Sequencing. The API itself is independent from
4447 any particular back-end implementation, and supports use of multiple back-ends
4448 simultaneously.")
4449 (license license:public-domain)))
4450
4451 (define-public java-ngs
4452 (package (inherit ngs-sdk)
4453 (name "java-ngs")
4454 (arguments
4455 `(,@(substitute-keyword-arguments
4456 `(#:modules ((guix build gnu-build-system)
4457 (guix build utils)
4458 (srfi srfi-1)
4459 (srfi srfi-26))
4460 ,@(package-arguments ngs-sdk))
4461 ((#:phases phases)
4462 `(modify-phases ,phases
4463 (replace 'enter-dir (lambda _ (chdir "ngs-java") #t)))))))
4464 (inputs
4465 `(("jdk" ,icedtea "jdk")
4466 ("ngs-sdk" ,ngs-sdk)))
4467 (synopsis "Java bindings for NGS SDK")))
4468
4469 (define-public ncbi-vdb
4470 (package
4471 (name "ncbi-vdb")
4472 (version "2.8.2")
4473 (source
4474 (origin
4475 (method url-fetch)
4476 (uri
4477 (string-append "https://github.com/ncbi/ncbi-vdb/archive/"
4478 version ".tar.gz"))
4479 (file-name (string-append name "-" version ".tar.gz"))
4480 (sha256
4481 (base32
4482 "1acn4bv81mfl137qnbn9995mjjhwd36pm0b7qli1iw5skrxa9j8m"))))
4483 (build-system gnu-build-system)
4484 (arguments
4485 `(#:parallel-build? #f ; not supported
4486 #:tests? #f ; no "check" target
4487 #:phases
4488 (modify-phases %standard-phases
4489 (replace 'configure
4490 (lambda* (#:key inputs outputs #:allow-other-keys)
4491 (let ((out (assoc-ref outputs "out")))
4492 ;; Override include path for libmagic
4493 (substitute* "setup/package.prl"
4494 (("name => 'magic', Include => '/usr/include'")
4495 (string-append "name=> 'magic', Include => '"
4496 (assoc-ref inputs "libmagic")
4497 "/include" "'")))
4498
4499 ;; Install kdf5 library (needed by sra-tools)
4500 (substitute* "build/Makefile.install"
4501 (("LIBRARIES_TO_INSTALL =")
4502 "LIBRARIES_TO_INSTALL = kdf5.$(VERSION_LIBX) kdf5.$(VERSION_SHLX)"))
4503
4504 (substitute* "build/Makefile.env"
4505 (("CFLAGS =" prefix)
4506 (string-append prefix "-msse2 ")))
4507
4508 ;; Override search path for ngs-java
4509 (substitute* "setup/package.prl"
4510 (("/usr/local/ngs/ngs-java")
4511 (assoc-ref inputs "java-ngs")))
4512
4513 ;; The 'configure' script doesn't recognize things like
4514 ;; '--enable-fast-install'.
4515 (zero? (system*
4516 "./configure"
4517 (string-append "--build-prefix=" (getcwd) "/build")
4518 (string-append "--prefix=" (assoc-ref outputs "out"))
4519 (string-append "--debug")
4520 (string-append "--with-xml2-prefix="
4521 (assoc-ref inputs "libxml2"))
4522 (string-append "--with-ngs-sdk-prefix="
4523 (assoc-ref inputs "ngs-sdk"))
4524 (string-append "--with-hdf5-prefix="
4525 (assoc-ref inputs "hdf5")))))))
4526 (add-after 'install 'install-interfaces
4527 (lambda* (#:key outputs #:allow-other-keys)
4528 ;; Install interface libraries. On i686 the interface libraries
4529 ;; are installed to "linux/gcc/i386", so we need to use the Linux
4530 ;; architecture name ("i386") instead of the target system prefix
4531 ;; ("i686").
4532 (mkdir (string-append (assoc-ref outputs "out") "/ilib"))
4533 (copy-recursively (string-append "build/ncbi-vdb/linux/gcc/"
4534 ,(system->linux-architecture
4535 (or (%current-target-system)
4536 (%current-system)))
4537 "/rel/ilib")
4538 (string-append (assoc-ref outputs "out")
4539 "/ilib"))
4540 ;; Install interface headers
4541 (copy-recursively "interfaces"
4542 (string-append (assoc-ref outputs "out")
4543 "/include"))
4544 #t))
4545 ;; These files are needed by sra-tools.
4546 (add-after 'install 'install-configuration-files
4547 (lambda* (#:key outputs #:allow-other-keys)
4548 (let ((target (string-append (assoc-ref outputs "out") "/kfg")))
4549 (mkdir target)
4550 (install-file "libs/kfg/default.kfg" target)
4551 (install-file "libs/kfg/certs.kfg" target))
4552 #t)))))
4553 (inputs
4554 `(("libxml2" ,libxml2)
4555 ("ngs-sdk" ,ngs-sdk)
4556 ("java-ngs" ,java-ngs)
4557 ("libmagic" ,file)
4558 ("hdf5" ,hdf5)))
4559 (native-inputs `(("perl" ,perl)))
4560 ;; NCBI-VDB requires SSE capability.
4561 (supported-systems '("i686-linux" "x86_64-linux"))
4562 (home-page "https://github.com/ncbi/ncbi-vdb")
4563 (synopsis "Database engine for genetic information")
4564 (description
4565 "The NCBI-VDB library implements a highly compressed columnar data
4566 warehousing engine that is most often used to store genetic information.
4567 Databases are stored in a portable image within the file system, and can be
4568 accessed/downloaded on demand across HTTP.")
4569 (license license:public-domain)))
4570
4571 (define-public plink
4572 (package
4573 (name "plink")
4574 (version "1.07")
4575 (source
4576 (origin
4577 (method url-fetch)
4578 (uri (string-append
4579 "http://pngu.mgh.harvard.edu/~purcell/plink/dist/plink-"
4580 version "-src.zip"))
4581 (sha256
4582 (base32 "0as8gxm4pjyc8dxmm1sl873rrd7wn5qs0l29nqfnl31x8i467xaa"))
4583 (patches (search-patches "plink-1.07-unclobber-i.patch"
4584 "plink-endian-detection.patch"))))
4585 (build-system gnu-build-system)
4586 (arguments
4587 '(#:tests? #f ;no "check" target
4588 #:make-flags (list (string-append "LIB_LAPACK="
4589 (assoc-ref %build-inputs "lapack")
4590 "/lib/liblapack.so")
4591 "WITH_LAPACK=1"
4592 "FORCE_DYNAMIC=1"
4593 ;; disable phoning home
4594 "WITH_WEBCHECK=")
4595 #:phases
4596 (modify-phases %standard-phases
4597 ;; no "configure" script
4598 (delete 'configure)
4599 (replace 'install
4600 (lambda* (#:key outputs #:allow-other-keys)
4601 (let ((bin (string-append (assoc-ref outputs "out")
4602 "/bin/")))
4603 (install-file "plink" bin)
4604 #t))))))
4605 (inputs
4606 `(("zlib" ,zlib)
4607 ("lapack" ,lapack)))
4608 (native-inputs
4609 `(("unzip" ,unzip)))
4610 (home-page "http://pngu.mgh.harvard.edu/~purcell/plink/")
4611 (synopsis "Whole genome association analysis toolset")
4612 (description
4613 "PLINK is a whole genome association analysis toolset, designed to
4614 perform a range of basic, large-scale analyses in a computationally efficient
4615 manner. The focus of PLINK is purely on analysis of genotype/phenotype data,
4616 so there is no support for steps prior to this (e.g. study design and
4617 planning, generating genotype or CNV calls from raw data). Through
4618 integration with gPLINK and Haploview, there is some support for the
4619 subsequent visualization, annotation and storage of results.")
4620 ;; Code is released under GPLv2, except for fisher.h, which is under
4621 ;; LGPLv2.1+
4622 (license (list license:gpl2 license:lgpl2.1+))))
4623
4624 (define-public smithlab-cpp
4625 (let ((revision "1")
4626 (commit "728a097bec88c6f4b8528b685932049e660eff2e"))
4627 (package
4628 (name "smithlab-cpp")
4629 (version (string-append "0." revision "." (string-take commit 7)))
4630 (source (origin
4631 (method git-fetch)
4632 (uri (git-reference
4633 (url "https://github.com/smithlabcode/smithlab_cpp.git")
4634 (commit commit)))
4635 (file-name (string-append name "-" version "-checkout"))
4636 (sha256
4637 (base32
4638 "0d476lmj312xk77kr9fzrv7z1bv96yfyx0w7y62ycmnfbx32ll74"))))
4639 (build-system gnu-build-system)
4640 (arguments
4641 `(#:modules ((guix build gnu-build-system)
4642 (guix build utils)
4643 (srfi srfi-26))
4644 #:tests? #f ;no "check" target
4645 #:phases
4646 (modify-phases %standard-phases
4647 (add-after 'unpack 'use-samtools-headers
4648 (lambda _
4649 (substitute* '("SAM.cpp"
4650 "SAM.hpp")
4651 (("sam.h") "samtools/sam.h"))
4652 #t))
4653 (replace 'install
4654 (lambda* (#:key outputs #:allow-other-keys)
4655 (let* ((out (assoc-ref outputs "out"))
4656 (lib (string-append out "/lib"))
4657 (include (string-append out "/include/smithlab-cpp")))
4658 (mkdir-p lib)
4659 (mkdir-p include)
4660 (for-each (cut install-file <> lib)
4661 (find-files "." "\\.o$"))
4662 (for-each (cut install-file <> include)
4663 (find-files "." "\\.hpp$")))
4664 #t))
4665 (delete 'configure))))
4666 (inputs
4667 `(("samtools" ,samtools-0.1)
4668 ("zlib" ,zlib)))
4669 (home-page "https://github.com/smithlabcode/smithlab_cpp")
4670 (synopsis "C++ helper library for functions used in Smith lab projects")
4671 (description
4672 "Smithlab CPP is a C++ library that includes functions used in many of
4673 the Smith lab bioinformatics projects, such as a wrapper around Samtools data
4674 structures, classes for genomic regions, mapped sequencing reads, etc.")
4675 (license license:gpl3+))))
4676
4677 (define-public preseq
4678 (package
4679 (name "preseq")
4680 (version "2.0")
4681 (source (origin
4682 (method url-fetch)
4683 (uri (string-append "https://github.com/smithlabcode/"
4684 "preseq/archive/v" version ".tar.gz"))
4685 (file-name (string-append name "-" version ".tar.gz"))
4686 (sha256
4687 (base32 "08r684l50pnxjpvmhzjgqq56yv9rfw90k8vx0nsrnrzk8mf9hsdq"))
4688 (modules '((guix build utils)))
4689 (snippet
4690 ;; Remove bundled samtools.
4691 '(delete-file-recursively "samtools"))))
4692 (build-system gnu-build-system)
4693 (arguments
4694 `(#:tests? #f ;no "check" target
4695 #:phases
4696 (modify-phases %standard-phases
4697 (delete 'configure))
4698 #:make-flags
4699 (list (string-append "PREFIX="
4700 (assoc-ref %outputs "out"))
4701 (string-append "LIBBAM="
4702 (assoc-ref %build-inputs "samtools")
4703 "/lib/libbam.a")
4704 (string-append "SMITHLAB_CPP="
4705 (assoc-ref %build-inputs "smithlab-cpp")
4706 "/lib")
4707 "PROGS=preseq"
4708 "INCLUDEDIRS=$(SMITHLAB_CPP)/../include/smithlab-cpp $(SAMTOOLS_DIR)")))
4709 (inputs
4710 `(("gsl" ,gsl)
4711 ("samtools" ,samtools-0.1)
4712 ("smithlab-cpp" ,smithlab-cpp)
4713 ("zlib" ,zlib)))
4714 (home-page "http://smithlabresearch.org/software/preseq/")
4715 (synopsis "Program for analyzing library complexity")
4716 (description
4717 "The preseq package is aimed at predicting and estimating the complexity
4718 of a genomic sequencing library, equivalent to predicting and estimating the
4719 number of redundant reads from a given sequencing depth and how many will be
4720 expected from additional sequencing using an initial sequencing experiment.
4721 The estimates can then be used to examine the utility of further sequencing,
4722 optimize the sequencing depth, or to screen multiple libraries to avoid low
4723 complexity samples.")
4724 (license license:gpl3+)))
4725
4726 (define-public python-screed
4727 (package
4728 (name "python-screed")
4729 (version "0.9")
4730 (source
4731 (origin
4732 (method url-fetch)
4733 (uri (pypi-uri "screed" version))
4734 (sha256
4735 (base32
4736 "18czszp9fkx3j6jr7y5kp6dfialscgddk05mw1zkhh2zhn0jd8i0"))))
4737 (build-system python-build-system)
4738 (arguments
4739 `(#:phases
4740 (modify-phases %standard-phases
4741 (replace 'check
4742 (lambda _
4743 (setenv "PYTHONPATH"
4744 (string-append (getenv "PYTHONPATH") ":."))
4745 (zero? (system* "nosetests" "--attr" "!known_failing")))))))
4746 (native-inputs
4747 `(("python-nose" ,python-nose)))
4748 (inputs
4749 `(("python-bz2file" ,python-bz2file)))
4750 (home-page "https://github.com/dib-lab/screed/")
4751 (synopsis "Short read sequence database utilities")
4752 (description "Screed parses FASTA and FASTQ files and generates databases.
4753 Values such as sequence name, sequence description, sequence quality and the
4754 sequence itself can be retrieved from these databases.")
4755 (license license:bsd-3)))
4756
4757 (define-public python2-screed
4758 (package-with-python2 python-screed))
4759
4760 (define-public sra-tools
4761 (package
4762 (name "sra-tools")
4763 (version "2.8.2-1")
4764 (source
4765 (origin
4766 (method url-fetch)
4767 (uri
4768 (string-append "https://github.com/ncbi/sra-tools/archive/"
4769 version ".tar.gz"))
4770 (file-name (string-append name "-" version ".tar.gz"))
4771 (sha256
4772 (base32
4773 "1camsijmvv2s45mb4iyf44ghl4gkd4rl0viphpcgl3ccchy32a0g"))))
4774 (build-system gnu-build-system)
4775 (arguments
4776 `(#:parallel-build? #f ; not supported
4777 #:tests? #f ; no "check" target
4778 #:make-flags
4779 (list (string-append "DEFAULT_CRT="
4780 (assoc-ref %build-inputs "ncbi-vdb")
4781 "/kfg/certs.kfg")
4782 (string-append "DEFAULT_KFG="
4783 (assoc-ref %build-inputs "ncbi-vdb")
4784 "/kfg/default.kfg")
4785 (string-append "VDB_LIBDIR="
4786 (assoc-ref %build-inputs "ncbi-vdb")
4787 ,(if (string-prefix? "x86_64"
4788 (or (%current-target-system)
4789 (%current-system)))
4790 "/lib64"
4791 "/lib32")))
4792 #:phases
4793 (modify-phases %standard-phases
4794 (replace 'configure
4795 (lambda* (#:key inputs outputs #:allow-other-keys)
4796 ;; The build system expects a directory containing the sources and
4797 ;; raw build output of ncbi-vdb, including files that are not
4798 ;; installed. Since we are building against an installed version of
4799 ;; ncbi-vdb, the following modifications are needed.
4800 (substitute* "setup/konfigure.perl"
4801 ;; Make the configure script look for the "ilib" directory of
4802 ;; "ncbi-vdb" without first checking for the existence of a
4803 ;; matching library in its "lib" directory.
4804 (("^ my \\$f = File::Spec->catdir\\(\\$libdir, \\$lib\\);")
4805 "my $f = File::Spec->catdir($ilibdir, $ilib);")
4806 ;; Look for interface libraries in ncbi-vdb's "ilib" directory.
4807 (("my \\$ilibdir = File::Spec->catdir\\(\\$builddir, 'ilib'\\);")
4808 "my $ilibdir = File::Spec->catdir($dir, 'ilib');"))
4809
4810 ;; Dynamic linking
4811 (substitute* "tools/copycat/Makefile"
4812 (("smagic-static") "lmagic"))
4813
4814 ;; The 'configure' script doesn't recognize things like
4815 ;; '--enable-fast-install'.
4816 (zero? (system*
4817 "./configure"
4818 (string-append "--build-prefix=" (getcwd) "/build")
4819 (string-append "--prefix=" (assoc-ref outputs "out"))
4820 (string-append "--debug")
4821 (string-append "--with-fuse-prefix="
4822 (assoc-ref inputs "fuse"))
4823 (string-append "--with-magic-prefix="
4824 (assoc-ref inputs "libmagic"))
4825 ;; TODO: building with libxml2 fails with linker errors
4826 ;; (string-append "--with-xml2-prefix="
4827 ;; (assoc-ref inputs "libxml2"))
4828 (string-append "--with-ncbi-vdb-sources="
4829 (assoc-ref inputs "ncbi-vdb"))
4830 (string-append "--with-ncbi-vdb-build="
4831 (assoc-ref inputs "ncbi-vdb"))
4832 (string-append "--with-ngs-sdk-prefix="
4833 (assoc-ref inputs "ngs-sdk"))
4834 (string-append "--with-hdf5-prefix="
4835 (assoc-ref inputs "hdf5"))))))
4836 ;; This version of sra-tools fails to build with glibc because of a
4837 ;; naming conflict. glibc-2.25/include/bits/mathcalls.h already
4838 ;; contains a definition of "canonicalize", so we rename it.
4839 ;;
4840 ;; See upstream bug report:
4841 ;; https://github.com/ncbi/sra-tools/issues/67
4842 (add-after 'unpack 'patch-away-glibc-conflict
4843 (lambda _
4844 (substitute* "tools/bam-loader/bam.c"
4845 (("canonicalize\\(" line)
4846 (string-append "sra_tools_" line)))
4847 #t)))))
4848 (native-inputs `(("perl" ,perl)))
4849 (inputs
4850 `(("ngs-sdk" ,ngs-sdk)
4851 ("ncbi-vdb" ,ncbi-vdb)
4852 ("libmagic" ,file)
4853 ("fuse" ,fuse)
4854 ("hdf5" ,hdf5)
4855 ("zlib" ,zlib)))
4856 (home-page "http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software")
4857 (synopsis "Tools and libraries for reading and writing sequencing data")
4858 (description
4859 "The SRA Toolkit from NCBI is a collection of tools and libraries for
4860 reading of sequencing files from the Sequence Read Archive (SRA) database and
4861 writing files into the .sra format.")
4862 (license license:public-domain)))
4863
4864 (define-public seqan
4865 (package
4866 (name "seqan")
4867 (version "1.4.2")
4868 (source (origin
4869 (method url-fetch)
4870 (uri (string-append "http://packages.seqan.de/seqan-library/"
4871 "seqan-library-" version ".tar.bz2"))
4872 (sha256
4873 (base32
4874 "05s3wrrwn50f81aklfm65i4a749zag1vr8z03k21xm0pdxy47yvp"))))
4875 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
4876 ;; makes sense to split the outputs.
4877 (outputs '("out" "doc"))
4878 (build-system trivial-build-system)
4879 (arguments
4880 `(#:modules ((guix build utils))
4881 #:builder
4882 (begin
4883 (use-modules (guix build utils))
4884 (let ((tar (assoc-ref %build-inputs "tar"))
4885 (bzip (assoc-ref %build-inputs "bzip2"))
4886 (out (assoc-ref %outputs "out"))
4887 (doc (assoc-ref %outputs "doc")))
4888 (setenv "PATH" (string-append tar "/bin:" bzip "/bin"))
4889 (system* "tar" "xvf" (assoc-ref %build-inputs "source"))
4890 (chdir (string-append "seqan-library-" ,version))
4891 (copy-recursively "include" (string-append out "/include"))
4892 (copy-recursively "share" (string-append doc "/share"))))))
4893 (native-inputs
4894 `(("source" ,source)
4895 ("tar" ,tar)
4896 ("bzip2" ,bzip2)))
4897 (home-page "http://www.seqan.de")
4898 (synopsis "Library for nucleotide sequence analysis")
4899 (description
4900 "SeqAn is a C++ library of efficient algorithms and data structures for
4901 the analysis of sequences with the focus on biological data. It contains
4902 algorithms and data structures for string representation and their
4903 manipulation, online and indexed string search, efficient I/O of
4904 bioinformatics file formats, sequence alignment, and more.")
4905 (license license:bsd-3)))
4906
4907 (define-public seqmagick
4908 (package
4909 (name "seqmagick")
4910 (version "0.6.1")
4911 (source
4912 (origin
4913 (method url-fetch)
4914 (uri (string-append
4915 "https://pypi.python.org/packages/source/s/seqmagick/seqmagick-"
4916 version ".tar.gz"))
4917 (sha256
4918 (base32
4919 "0cgn477n74gsl4qdaakrrhi953kcsd4q3ivk2lr18x74s3g4ma1d"))))
4920 (build-system python-build-system)
4921 (arguments
4922 ;; python2 only, see https://github.com/fhcrc/seqmagick/issues/56
4923 `(#:python ,python-2
4924 #:phases
4925 (modify-phases %standard-phases
4926 ;; Current test in setup.py does not work as of 0.6.1,
4927 ;; so use nose to run tests instead for now. See
4928 ;; https://github.com/fhcrc/seqmagick/issues/55
4929 (replace 'check (lambda _ (zero? (system* "nosetests")))))))
4930 (inputs
4931 ;; biopython-1.66 is required due to
4932 ;; https://github.com/fhcrc/seqmagick/issues/59
4933 ;; When that issue is resolved the 'python2-biopython-1.66' package
4934 ;; should be removed.
4935 `(("python-biopython" ,python2-biopython-1.66)))
4936 (native-inputs
4937 `(("python-nose" ,python2-nose)))
4938 (home-page "https://github.com/fhcrc/seqmagick")
4939 (synopsis "Tools for converting and modifying sequence files")
4940 (description
4941 "Bioinformaticians often have to convert sequence files between formats
4942 and do little manipulations on them, and it's not worth writing scripts for
4943 that. Seqmagick is a utility to expose the file format conversion in
4944 BioPython in a convenient way. Instead of having a big mess of scripts, there
4945 is one that takes arguments.")
4946 (license license:gpl3)))
4947
4948 (define-public seqtk
4949 (package
4950 (name "seqtk")
4951 (version "1.2")
4952 (source (origin
4953 (method url-fetch)
4954 (uri (string-append
4955 "https://github.com/lh3/seqtk/archive/v"
4956 version ".tar.gz"))
4957 (file-name (string-append name "-" version ".tar.gz"))
4958 (sha256
4959 (base32
4960 "0ywdyzpmfiz2wp6ampbzqg4y8bj450nfgqarpamg045b8mk32lxx"))
4961 (modules '((guix build utils)))
4962 (snippet
4963 '(begin
4964 ;; Remove extraneous header files, as is done in the seqtk
4965 ;; master branch.
4966 (for-each (lambda (file) (delete-file file))
4967 (list "ksort.h" "kstring.h" "kvec.h"))
4968 #t))))
4969 (build-system gnu-build-system)
4970 (arguments
4971 `(#:phases
4972 (modify-phases %standard-phases
4973 (delete 'configure)
4974 (replace 'check
4975 ;; There are no tests, so we just run a sanity check.
4976 (lambda _ (zero? (system* "./seqtk" "seq"))))
4977 (replace 'install
4978 (lambda* (#:key outputs #:allow-other-keys)
4979 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
4980 (install-file "seqtk" bin)))))))
4981 (inputs
4982 `(("zlib" ,zlib)))
4983 (home-page "https://github.com/lh3/seqtk")
4984 (synopsis "Toolkit for processing biological sequences in FASTA/Q format")
4985 (description
4986 "Seqtk is a fast and lightweight tool for processing sequences in the
4987 FASTA or FASTQ format. It parses both FASTA and FASTQ files which can be
4988 optionally compressed by gzip.")
4989 (license license:expat)))
4990
4991 (define-public snap-aligner
4992 (package
4993 (name "snap-aligner")
4994 (version "1.0beta.18")
4995 (source (origin
4996 (method url-fetch)
4997 (uri (string-append
4998 "https://github.com/amplab/snap/archive/v"
4999 version ".tar.gz"))
5000 (file-name (string-append name "-" version ".tar.gz"))
5001 (sha256
5002 (base32
5003 "1vnsjwv007k1fl1q7d681kbwn6bc66cgw6h16hym6gvyy71qv2ly"))))
5004 (build-system gnu-build-system)
5005 (arguments
5006 '(#:phases
5007 (modify-phases %standard-phases
5008 (delete 'configure)
5009 (replace 'check (lambda _ (zero? (system* "./unit_tests"))))
5010 (replace 'install
5011 (lambda* (#:key outputs #:allow-other-keys)
5012 (let* ((out (assoc-ref outputs "out"))
5013 (bin (string-append out "/bin")))
5014 (install-file "snap-aligner" bin)
5015 (install-file "SNAPCommand" bin)
5016 #t))))))
5017 (native-inputs
5018 `(("zlib" ,zlib)))
5019 (home-page "http://snap.cs.berkeley.edu/")
5020 (synopsis "Short read DNA sequence aligner")
5021 (description
5022 "SNAP is a fast and accurate aligner for short DNA reads. It is
5023 optimized for modern read lengths of 100 bases or higher, and takes advantage
5024 of these reads to align data quickly through a hash-based indexing scheme.")
5025 ;; 32-bit systems are not supported by the unpatched code.
5026 ;; Following the bug reports https://github.com/amplab/snap/issues/68 and
5027 ;; https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=812378 we see that
5028 ;; systems without a lot of memory cannot make good use of this program.
5029 (supported-systems '("x86_64-linux"))
5030 (license license:asl2.0)))
5031
5032 (define-public sortmerna
5033 (package
5034 (name "sortmerna")
5035 (version "2.1b")
5036 (source
5037 (origin
5038 (method url-fetch)
5039 (uri (string-append
5040 "https://github.com/biocore/sortmerna/archive/"
5041 version ".tar.gz"))
5042 (file-name (string-append name "-" version ".tar.gz"))
5043 (sha256
5044 (base32
5045 "1ghaghvd82af9j5adavxh77g7hm247d1r69m3fbi6f1jdivj5ldk"))))
5046 (build-system gnu-build-system)
5047 (outputs '("out" ;for binaries
5048 "db")) ;for sequence databases
5049 (arguments
5050 `(#:phases
5051 (modify-phases %standard-phases
5052 (replace 'install
5053 (lambda* (#:key outputs #:allow-other-keys)
5054 (let* ((out (assoc-ref outputs "out"))
5055 (bin (string-append out "/bin"))
5056 (db (assoc-ref outputs "db"))
5057 (share
5058 (string-append db "/share/sortmerna/rRNA_databases")))
5059 (install-file "sortmerna" bin)
5060 (install-file "indexdb_rna" bin)
5061 (for-each (lambda (file)
5062 (install-file file share))
5063 (find-files "rRNA_databases" ".*fasta"))
5064 #t))))))
5065 (inputs
5066 `(("zlib" ,zlib)))
5067 (home-page "http://bioinfo.lifl.fr/RNA/sortmerna")
5068 (synopsis "Biological sequence analysis tool for NGS reads")
5069 (description
5070 "SortMeRNA is a biological sequence analysis tool for filtering, mapping
5071 and operational taxonomic unit (OTU) picking of next generation
5072 sequencing (NGS) reads. The core algorithm is based on approximate seeds and
5073 allows for fast and sensitive analyses of nucleotide sequences. The main
5074 application of SortMeRNA is filtering rRNA from metatranscriptomic data.")
5075 (license license:lgpl3)))
5076
5077 (define-public star
5078 (package
5079 (name "star")
5080 (version "2.5.3a")
5081 (source (origin
5082 (method url-fetch)
5083 (uri (string-append "https://github.com/alexdobin/STAR/archive/"
5084 version ".tar.gz"))
5085 (file-name (string-append name "-" version ".tar.gz"))
5086 (sha256
5087 (base32
5088 "013wirlz8lllgjyagl48l75n1isxyabqb3sj7qlsl0x1rmvqw99a"))
5089 (modules '((guix build utils)))
5090 (snippet
5091 '(begin
5092 (substitute* "source/Makefile"
5093 (("/bin/rm") "rm"))
5094 ;; Remove pre-built binaries and bundled htslib sources.
5095 (delete-file-recursively "bin/MacOSX_x86_64")
5096 (delete-file-recursively "bin/Linux_x86_64")
5097 (delete-file-recursively "bin/Linux_x86_64_static")
5098 (delete-file-recursively "source/htslib")
5099 #t))))
5100 (build-system gnu-build-system)
5101 (arguments
5102 '(#:tests? #f ;no check target
5103 #:make-flags '("STAR")
5104 #:phases
5105 (modify-phases %standard-phases
5106 (add-after 'unpack 'enter-source-dir
5107 (lambda _ (chdir "source") #t))
5108 (add-after 'enter-source-dir 'do-not-use-bundled-htslib
5109 (lambda _
5110 (substitute* "Makefile"
5111 (("(Depend.list: \\$\\(SOURCES\\) parametersDefault\\.xxd) htslib"
5112 _ prefix) prefix))
5113 (substitute* '("BAMfunctions.cpp"
5114 "signalFromBAM.h"
5115 "bam_cat.h"
5116 "bam_cat.c"
5117 "STAR.cpp"
5118 "bamRemoveDuplicates.cpp")
5119 (("#include \"htslib/([^\"]+\\.h)\"" _ header)
5120 (string-append "#include <" header ">")))
5121 (substitute* "IncludeDefine.h"
5122 (("\"htslib/(htslib/[^\"]+.h)\"" _ header)
5123 (string-append "<" header ">")))
5124 #t))
5125 (replace 'install
5126 (lambda* (#:key outputs #:allow-other-keys)
5127 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
5128 (install-file "STAR" bin))
5129 #t))
5130 (delete 'configure))))
5131 (native-inputs
5132 `(("vim" ,vim))) ; for xxd
5133 (inputs
5134 `(("htslib" ,htslib)
5135 ("zlib" ,zlib)))
5136 (home-page "https://github.com/alexdobin/STAR")
5137 (synopsis "Universal RNA-seq aligner")
5138 (description
5139 "The Spliced Transcripts Alignment to a Reference (STAR) software is
5140 based on a previously undescribed RNA-seq alignment algorithm that uses
5141 sequential maximum mappable seed search in uncompressed suffix arrays followed
5142 by seed clustering and stitching procedure. In addition to unbiased de novo
5143 detection of canonical junctions, STAR can discover non-canonical splices and
5144 chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
5145 sequences.")
5146 ;; Only 64-bit systems are supported according to the README.
5147 (supported-systems '("x86_64-linux" "mips64el-linux"))
5148 ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
5149 (license license:gpl3+)))
5150
5151 (define-public subread
5152 (package
5153 (name "subread")
5154 (version "1.5.1")
5155 (source (origin
5156 (method url-fetch)
5157 (uri (string-append "mirror://sourceforge/subread/subread-"
5158 version "/subread-" version "-source.tar.gz"))
5159 (sha256
5160 (base32
5161 "0gn5zhbvllks0mmdg3qlmsbg91p2mpdc2wixwfqpi85yzfrh8hcy"))))
5162 (build-system gnu-build-system)
5163 (arguments
5164 `(#:tests? #f ;no "check" target
5165 ;; The CC and CCFLAGS variables are set to contain a lot of x86_64
5166 ;; optimizations by default, so we override these flags such that x86_64
5167 ;; flags are only added when the build target is an x86_64 system.
5168 #:make-flags
5169 (list (let ((system ,(or (%current-target-system)
5170 (%current-system)))
5171 (flags '("-ggdb" "-fomit-frame-pointer"
5172 "-ffast-math" "-funroll-loops"
5173 "-fmessage-length=0"
5174 "-O9" "-Wall" "-DMAKE_FOR_EXON"
5175 "-DMAKE_STANDALONE"
5176 "-DSUBREAD_VERSION=\\\"${SUBREAD_VERSION}\\\""))
5177 (flags64 '("-mmmx" "-msse" "-msse2" "-msse3")))
5178 (if (string-prefix? "x86_64" system)
5179 (string-append "CCFLAGS=" (string-join (append flags flags64)))
5180 (string-append "CCFLAGS=" (string-join flags))))
5181 "-f" "Makefile.Linux"
5182 "CC=gcc ${CCFLAGS}")
5183 #:phases
5184 (alist-cons-after
5185 'unpack 'enter-dir
5186 (lambda _ (chdir "src") #t)
5187 (alist-replace
5188 'install
5189 (lambda* (#:key outputs #:allow-other-keys)
5190 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
5191 (mkdir-p bin)
5192 (copy-recursively "../bin" bin)))
5193 ;; no "configure" script
5194 (alist-delete 'configure %standard-phases)))))
5195 (inputs `(("zlib" ,zlib)))
5196 (home-page "http://bioinf.wehi.edu.au/subread-package/")
5197 (synopsis "Tool kit for processing next-gen sequencing data")
5198 (description
5199 "The subread package contains the following tools: subread aligner, a
5200 general-purpose read aligner; subjunc aligner: detecting exon-exon junctions
5201 and mapping RNA-seq reads; featureCounts: counting mapped reads for genomic
5202 features; exactSNP: a SNP caller that discovers SNPs by testing signals
5203 against local background noises.")
5204 (license license:gpl3+)))
5205
5206 (define-public stringtie
5207 (package
5208 (name "stringtie")
5209 (version "1.2.1")
5210 (source (origin
5211 (method url-fetch)
5212 (uri (string-append "http://ccb.jhu.edu/software/stringtie/dl/"
5213 "stringtie-" version ".tar.gz"))
5214 (sha256
5215 (base32
5216 "1cqllsc1maq4kh92isi8yadgzbmnf042hlnalpk3y59aph1z3bfz"))
5217 (modules '((guix build utils)))
5218 (snippet
5219 '(begin
5220 (delete-file-recursively "samtools-0.1.18")
5221 #t))))
5222 (build-system gnu-build-system)
5223 (arguments
5224 `(#:tests? #f ;no test suite
5225 #:phases
5226 (modify-phases %standard-phases
5227 ;; no configure script
5228 (delete 'configure)
5229 (add-before 'build 'use-system-samtools
5230 (lambda _
5231 (substitute* "Makefile"
5232 (("stringtie: \\$\\{BAM\\}/libbam\\.a")
5233 "stringtie: "))
5234 (substitute* '("gclib/GBam.h"
5235 "gclib/GBam.cpp")
5236 (("#include \"(bam|sam|kstring).h\"" _ header)
5237 (string-append "#include <samtools/" header ".h>")))
5238 #t))
5239 (add-after 'unpack 'remove-duplicate-typedef
5240 (lambda _
5241 ;; This typedef conflicts with the typedef in
5242 ;; glibc-2.25/include/bits/types.h
5243 (substitute* "gclib/GThreads.h"
5244 (("typedef long long __intmax_t;") ""))
5245 #t))
5246 (replace 'install
5247 (lambda* (#:key outputs #:allow-other-keys)
5248 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
5249 (install-file "stringtie" bin)
5250 #t))))))
5251 (inputs
5252 `(("samtools" ,samtools-0.1)
5253 ("zlib" ,zlib)))
5254 (home-page "http://ccb.jhu.edu/software/stringtie/")
5255 (synopsis "Transcript assembly and quantification for RNA-Seq data")
5256 (description
5257 "StringTie is a fast and efficient assembler of RNA-Seq sequence
5258 alignments into potential transcripts. It uses a novel network flow algorithm
5259 as well as an optional de novo assembly step to assemble and quantitate
5260 full-length transcripts representing multiple splice variants for each gene
5261 locus. Its input can include not only the alignments of raw reads used by
5262 other transcript assemblers, but also alignments of longer sequences that have
5263 been assembled from those reads. To identify differentially expressed genes
5264 between experiments, StringTie's output can be processed either by the
5265 Cuffdiff or Ballgown programs.")
5266 (license license:artistic2.0)))
5267
5268 (define-public vcftools
5269 (package
5270 (name "vcftools")
5271 (version "0.1.14")
5272 (source (origin
5273 (method url-fetch)
5274 (uri (string-append
5275 "https://github.com/vcftools/vcftools/releases/download/v"
5276 version "/vcftools-" version ".tar.gz"))
5277 (sha256
5278 (base32
5279 "10l5c07z9p4i9pr4gl54b2c9h6ndhqlbq1rashg2zcgwkbfrkmvn"))))
5280 (build-system gnu-build-system)
5281 (arguments
5282 `(#:tests? #f ; no "check" target
5283 #:make-flags (list
5284 "CFLAGS=-O2" ; override "-m64" flag
5285 (string-append "PREFIX=" (assoc-ref %outputs "out"))
5286 (string-append "MANDIR=" (assoc-ref %outputs "out")
5287 "/share/man/man1"))))
5288 (native-inputs
5289 `(("pkg-config" ,pkg-config)))
5290 (inputs
5291 `(("perl" ,perl)
5292 ("zlib" ,zlib)))
5293 (home-page "https://vcftools.github.io/")
5294 (synopsis "Tools for working with VCF files")
5295 (description
5296 "VCFtools is a program package designed for working with VCF files, such
5297 as those generated by the 1000 Genomes Project. The aim of VCFtools is to
5298 provide easily accessible methods for working with complex genetic variation
5299 data in the form of VCF files.")
5300 ;; The license is declared as LGPLv3 in the README and
5301 ;; at https://vcftools.github.io/license.html
5302 (license license:lgpl3)))
5303
5304 (define-public infernal
5305 (package
5306 (name "infernal")
5307 (version "1.1.2")
5308 (source (origin
5309 (method url-fetch)
5310 (uri (string-append "http://eddylab.org/software/infernal/"
5311 "infernal-" version ".tar.gz"))
5312 (sha256
5313 (base32
5314 "0sr2hiz3qxfwqpz3whxr6n82p3x27336v3f34iqznp10hks2935c"))))
5315 (build-system gnu-build-system)
5316 (native-inputs
5317 `(("perl" ,perl))) ; for tests
5318 (home-page "http://eddylab.org/infernal/")
5319 (synopsis "Inference of RNA alignments")
5320 (description "Infernal (\"INFERence of RNA ALignment\") is a tool for
5321 searching DNA sequence databases for RNA structure and sequence similarities.
5322 It is an implementation of a special case of profile stochastic context-free
5323 grammars called @dfn{covariance models} (CMs). A CM is like a sequence
5324 profile, but it scores a combination of sequence consensus and RNA secondary
5325 structure consensus, so in many cases, it is more capable of identifying RNA
5326 homologs that conserve their secondary structure more than their primary
5327 sequence.")
5328 ;; Infernal 1.1.2 requires VMX or SSE capability for parallel instructions.
5329 (supported-systems '("i686-linux" "x86_64-linux"))
5330 (license license:bsd-3)))
5331
5332 (define-public r-centipede
5333 (package
5334 (name "r-centipede")
5335 (version "1.2")
5336 (source (origin
5337 (method url-fetch)
5338 (uri (string-append "http://download.r-forge.r-project.org/"
5339 "src/contrib/CENTIPEDE_" version ".tar.gz"))
5340 (sha256
5341 (base32
5342 "1hsx6qgwr0i67fhy9257zj7s0ppncph2hjgbia5nn6nfmj0ax6l9"))))
5343 (build-system r-build-system)
5344 (home-page "http://centipede.uchicago.edu/")
5345 (synopsis "Predict transcription factor binding sites")
5346 (description
5347 "CENTIPEDE applies a hierarchical Bayesian mixture model to infer regions
5348 of the genome that are bound by particular transcription factors. It starts
5349 by identifying a set of candidate binding sites, and then aims to classify the
5350 sites according to whether each site is bound or not bound by a transcription
5351 factor. CENTIPEDE is an unsupervised learning algorithm that discriminates
5352 between two different types of motif instances using as much relevant
5353 information as possible.")
5354 (license (list license:gpl2+ license:gpl3+))))
5355
5356 (define-public r-vegan
5357 (package
5358 (name "r-vegan")
5359 (version "2.4-3")
5360 (source
5361 (origin
5362 (method url-fetch)
5363 (uri (cran-uri "vegan" version))
5364 (sha256
5365 (base32
5366 "15zcxfix2d854897k1lr0sfmj2n00339nlsppcr3zrb238lb2mi5"))))
5367 (build-system r-build-system)
5368 (native-inputs
5369 `(("gfortran" ,gfortran)
5370 ("r-knitr" ,r-knitr)))
5371 (propagated-inputs
5372 `(("r-cluster" ,r-cluster)
5373 ("r-lattice" ,r-lattice)
5374 ("r-mass" ,r-mass)
5375 ("r-mgcv" ,r-mgcv)
5376 ("r-permute" ,r-permute)))
5377 (home-page "https://cran.r-project.org/web/packages/vegan")
5378 (synopsis "Functions for community ecology")
5379 (description
5380 "The vegan package provides tools for descriptive community ecology. It
5381 has most basic functions of diversity analysis, community ordination and
5382 dissimilarity analysis. Most of its multivariate tools can be used for other
5383 data types as well.")
5384 (license license:gpl2+)))
5385
5386 (define-public r-annotate
5387 (package
5388 (name "r-annotate")
5389 (version "1.52.1")
5390 (source
5391 (origin
5392 (method url-fetch)
5393 (uri (bioconductor-uri "annotate" version))
5394 (sha256
5395 (base32
5396 "0yymz8qxgnbybvfhqrgkd1hh9dhwxdii1yxkhr1zicjgb35xixxb"))))
5397 (build-system r-build-system)
5398 (propagated-inputs
5399 `(("r-annotationdbi" ,r-annotationdbi)
5400 ("r-biobase" ,r-biobase)
5401 ("r-biocgenerics" ,r-biocgenerics)
5402 ("r-dbi" ,r-dbi)
5403 ("r-rcurl" ,r-rcurl)
5404 ("r-xml" ,r-xml)
5405 ("r-xtable" ,r-xtable)))
5406 (home-page
5407 "http://bioconductor.org/packages/annotate")
5408 (synopsis "Annotation for microarrays")
5409 (description "This package provides R environments for the annotation of
5410 microarrays.")
5411 (license license:artistic2.0)))
5412
5413 (define-public r-geneplotter
5414 (package
5415 (name "r-geneplotter")
5416 (version "1.52.0")
5417 (source
5418 (origin
5419 (method url-fetch)
5420 (uri (bioconductor-uri "geneplotter" version))
5421 (sha256
5422 (base32
5423 "1p6yvxi243irhjxwm97hp73abhwampj0myyf8z00ij166674pc7h"))))
5424 (build-system r-build-system)
5425 (propagated-inputs
5426 `(("r-annotate" ,r-annotate)
5427 ("r-annotationdbi" ,r-annotationdbi)
5428 ("r-biobase" ,r-biobase)
5429 ("r-biocgenerics" ,r-biocgenerics)
5430 ("r-lattice" ,r-lattice)
5431 ("r-rcolorbrewer" ,r-rcolorbrewer)))
5432 (home-page "http://bioconductor.org/packages/geneplotter")
5433 (synopsis "Graphics functions for genomic data")
5434 (description
5435 "This package provides functions for plotting genomic data.")
5436 (license license:artistic2.0)))
5437
5438 (define-public r-genefilter
5439 (package
5440 (name "r-genefilter")
5441 (version "1.56.0")
5442 (source
5443 (origin
5444 (method url-fetch)
5445 (uri (bioconductor-uri "genefilter" version))
5446 (sha256
5447 (base32
5448 "1vzgciqd09csqcw9qync8blsv51ylrd86a65iadgyy6j26g01fwd"))))
5449 (build-system r-build-system)
5450 (native-inputs
5451 `(("gfortran" ,gfortran)))
5452 (propagated-inputs
5453 `(("r-annotate" ,r-annotate)
5454 ("r-annotationdbi" ,r-annotationdbi)
5455 ("r-biobase" ,r-biobase)
5456 ("r-s4vectors" ,r-s4vectors)
5457 ("r-survival" ,r-survival)))
5458 (home-page "http://bioconductor.org/packages/genefilter")
5459 (synopsis "Filter genes from high-throughput experiments")
5460 (description
5461 "This package provides basic functions for filtering genes from
5462 high-throughput sequencing experiments.")
5463 (license license:artistic2.0)))
5464
5465 (define-public r-deseq2
5466 (package
5467 (name "r-deseq2")
5468 (version "1.14.1")
5469 (source
5470 (origin
5471 (method url-fetch)
5472 (uri (bioconductor-uri "DESeq2" version))
5473 (sha256
5474 (base32
5475 "1walwkqryn1gnwz7zryr5764a0p6ia7ag4w6w9n8fskg8dkg0fqs"))))
5476 (properties `((upstream-name . "DESeq2")))
5477 (build-system r-build-system)
5478 (propagated-inputs
5479 `(("r-biobase" ,r-biobase)
5480 ("r-biocgenerics" ,r-biocgenerics)
5481 ("r-biocparallel" ,r-biocparallel)
5482 ("r-genefilter" ,r-genefilter)
5483 ("r-geneplotter" ,r-geneplotter)
5484 ("r-genomicranges" ,r-genomicranges)
5485 ("r-ggplot2" ,r-ggplot2)
5486 ("r-hmisc" ,r-hmisc)
5487 ("r-iranges" ,r-iranges)
5488 ("r-locfit" ,r-locfit)
5489 ("r-rcpp" ,r-rcpp)
5490 ("r-rcpparmadillo" ,r-rcpparmadillo)
5491 ("r-s4vectors" ,r-s4vectors)
5492 ("r-summarizedexperiment" ,r-summarizedexperiment)))
5493 (home-page "http://bioconductor.org/packages/DESeq2")
5494 (synopsis "Differential gene expression analysis")
5495 (description
5496 "This package provides functions to estimate variance-mean dependence in
5497 count data from high-throughput nucleotide sequencing assays and test for
5498 differential expression based on a model using the negative binomial
5499 distribution.")
5500 (license license:lgpl3+)))
5501
5502 (define-public r-annotationforge
5503 (package
5504 (name "r-annotationforge")
5505 (version "1.16.1")
5506 (source
5507 (origin
5508 (method url-fetch)
5509 (uri (bioconductor-uri "AnnotationForge" version))
5510 (sha256
5511 (base32
5512 "0l1g9hy88sh5g567svyfd8pnjvkyklkn6a3gjn8zalvh62qqjjq1"))))
5513 (properties
5514 `((upstream-name . "AnnotationForge")))
5515 (build-system r-build-system)
5516 (propagated-inputs
5517 `(("r-annotationdbi" ,r-annotationdbi)
5518 ("r-biobase" ,r-biobase)
5519 ("r-biocgenerics" ,r-biocgenerics)
5520 ("r-dbi" ,r-dbi)
5521 ("r-rcurl" ,r-rcurl)
5522 ("r-rsqlite" ,r-rsqlite)
5523 ("r-s4vectors" ,r-s4vectors)
5524 ("r-xml" ,r-xml)))
5525 (home-page "http://bioconductor.org/packages/AnnotationForge")
5526 (synopsis "Code for building annotation database packages")
5527 (description
5528 "This package provides code for generating Annotation packages and their
5529 databases. Packages produced are intended to be used with AnnotationDbi.")
5530 (license license:artistic2.0)))
5531
5532 (define-public r-rbgl
5533 (package
5534 (name "r-rbgl")
5535 (version "1.50.0")
5536 (source
5537 (origin
5538 (method url-fetch)
5539 (uri (bioconductor-uri "RBGL" version))
5540 (sha256
5541 (base32
5542 "1q14m8w6ih56v680kf3d9wh1qbgp7af33kz3cxafdf1vvzx9km08"))))
5543 (properties `((upstream-name . "RBGL")))
5544 (build-system r-build-system)
5545 (propagated-inputs `(("r-graph" ,r-graph)))
5546 (home-page "http://www.bioconductor.org/packages/RBGL")
5547 (synopsis "Interface to the Boost graph library")
5548 (description
5549 "This package provides a fairly extensive and comprehensive interface to
5550 the graph algorithms contained in the Boost library.")
5551 (license license:artistic2.0)))
5552
5553 (define-public r-gseabase
5554 (package
5555 (name "r-gseabase")
5556 (version "1.36.0")
5557 (source
5558 (origin
5559 (method url-fetch)
5560 (uri (bioconductor-uri "GSEABase" version))
5561 (sha256
5562 (base32
5563 "0l2x7yj7lfb0m2dmsav5ib026dikpgl4crdckrnj776yy08lgxpj"))))
5564 (properties `((upstream-name . "GSEABase")))
5565 (build-system r-build-system)
5566 (propagated-inputs
5567 `(("r-annotate" ,r-annotate)
5568 ("r-annotationdbi" ,r-annotationdbi)
5569 ("r-biobase" ,r-biobase)
5570 ("r-biocgenerics" ,r-biocgenerics)
5571 ("r-graph" ,r-graph)
5572 ("r-xml" ,r-xml)))
5573 (home-page "http://bioconductor.org/packages/GSEABase")
5574 (synopsis "Gene set enrichment data structures and methods")
5575 (description
5576 "This package provides classes and methods to support @dfn{Gene Set
5577 Enrichment Analysis} (GSEA).")
5578 (license license:artistic2.0)))
5579
5580 (define-public r-category
5581 (package
5582 (name "r-category")
5583 (version "2.40.0")
5584 (source
5585 (origin
5586 (method url-fetch)
5587 (uri (bioconductor-uri "Category" version))
5588 (sha256
5589 (base32
5590 "16ncwz7b4y48k0p3fvbrbmvf7nfz63li9ysgcl8kp9kl4hg7llng"))))
5591 (properties `((upstream-name . "Category")))
5592 (build-system r-build-system)
5593 (propagated-inputs
5594 `(("r-annotate" ,r-annotate)
5595 ("r-annotationdbi" ,r-annotationdbi)
5596 ("r-biobase" ,r-biobase)
5597 ("r-biocgenerics" ,r-biocgenerics)
5598 ("r-genefilter" ,r-genefilter)
5599 ("r-graph" ,r-graph)
5600 ("r-gseabase" ,r-gseabase)
5601 ("r-matrix" ,r-matrix)
5602 ("r-rbgl" ,r-rbgl)
5603 ("r-rsqlite" ,r-rsqlite)))
5604 (home-page "http://bioconductor.org/packages/Category")
5605 (synopsis "Category analysis")
5606 (description
5607 "This package provides a collection of tools for performing category
5608 analysis.")
5609 (license license:artistic2.0)))
5610
5611 (define-public r-gostats
5612 (package
5613 (name "r-gostats")
5614 (version "2.40.0")
5615 (source
5616 (origin
5617 (method url-fetch)
5618 (uri (bioconductor-uri "GOstats" version))
5619 (sha256
5620 (base32
5621 "0g2czm94zhzx92z7y2r4mjfxhwml7bhab2db6820ks8nkw1zvr9n"))))
5622 (properties `((upstream-name . "GOstats")))
5623 (build-system r-build-system)
5624 (propagated-inputs
5625 `(("r-annotate" ,r-annotate)
5626 ("r-annotationdbi" ,r-annotationdbi)
5627 ("r-annotationforge" ,r-annotationforge)
5628 ("r-biobase" ,r-biobase)
5629 ("r-category" ,r-category)
5630 ("r-go-db" ,r-go-db)
5631 ("r-graph" ,r-graph)
5632 ("r-rbgl" ,r-rbgl)))
5633 (home-page "http://bioconductor.org/packages/GOstats")
5634 (synopsis "Tools for manipulating GO and microarrays")
5635 (description
5636 "This package provides a set of tools for interacting with GO and
5637 microarray data. A variety of basic manipulation tools for graphs, hypothesis
5638 testing and other simple calculations.")
5639 (license license:artistic2.0)))
5640
5641 (define-public r-shortread
5642 (package
5643 (name "r-shortread")
5644 (version "1.32.1")
5645 (source
5646 (origin
5647 (method url-fetch)
5648 (uri (bioconductor-uri "ShortRead" version))
5649 (sha256
5650 (base32
5651 "1m7lbfxs7xwcy9xs76zy5rky2mb96anvh457xfw60lh3kygwfpxc"))))
5652 (properties `((upstream-name . "ShortRead")))
5653 (build-system r-build-system)
5654 (inputs
5655 `(("zlib" ,zlib)))
5656 (propagated-inputs
5657 `(("r-biobase" ,r-biobase)
5658 ("r-biocgenerics" ,r-biocgenerics)
5659 ("r-biocparallel" ,r-biocparallel)
5660 ("r-biostrings" ,r-biostrings)
5661 ("r-genomeinfodb" ,r-genomeinfodb)
5662 ("r-genomicalignments" ,r-genomicalignments)
5663 ("r-genomicranges" ,r-genomicranges)
5664 ("r-hwriter" ,r-hwriter)
5665 ("r-iranges" ,r-iranges)
5666 ("r-lattice" ,r-lattice)
5667 ("r-latticeextra" ,r-latticeextra)
5668 ("r-rsamtools" ,r-rsamtools)
5669 ("r-s4vectors" ,r-s4vectors)
5670 ("r-xvector" ,r-xvector)
5671 ("r-zlibbioc" ,r-zlibbioc)))
5672 (home-page "http://bioconductor.org/packages/ShortRead")
5673 (synopsis "FASTQ input and manipulation tools")
5674 (description
5675 "This package implements sampling, iteration, and input of FASTQ files.
5676 It includes functions for filtering and trimming reads, and for generating a
5677 quality assessment report. Data are represented as
5678 @code{DNAStringSet}-derived objects, and easily manipulated for a diversity of
5679 purposes. The package also contains legacy support for early single-end,
5680 ungapped alignment formats.")
5681 (license license:artistic2.0)))
5682
5683 (define-public r-systempiper
5684 (package
5685 (name "r-systempiper")
5686 (version "1.8.1")
5687 (source
5688 (origin
5689 (method url-fetch)
5690 (uri (bioconductor-uri "systemPipeR" version))
5691 (sha256
5692 (base32
5693 "0hyi841w8fm2yzpm6lwqi3jz5kc8ny8dy5p29dxynzaw5bpjw56d"))))
5694 (properties `((upstream-name . "systemPipeR")))
5695 (build-system r-build-system)
5696 (propagated-inputs
5697 `(("r-annotate" ,r-annotate)
5698 ("r-batchjobs" ,r-batchjobs)
5699 ("r-biocgenerics" ,r-biocgenerics)
5700 ("r-biostrings" ,r-biostrings)
5701 ("r-deseq2" ,r-deseq2)
5702 ("r-edger" ,r-edger)
5703 ("r-genomicfeatures" ,r-genomicfeatures)
5704 ("r-genomicranges" ,r-genomicranges)
5705 ("r-ggplot2" ,r-ggplot2)
5706 ("r-go-db" ,r-go-db)
5707 ("r-gostats" ,r-gostats)
5708 ("r-limma" ,r-limma)
5709 ("r-pheatmap" ,r-pheatmap)
5710 ("r-rjson" ,r-rjson)
5711 ("r-rsamtools" ,r-rsamtools)
5712 ("r-shortread" ,r-shortread)
5713 ("r-summarizedexperiment" ,r-summarizedexperiment)
5714 ("r-variantannotation" ,r-variantannotation)))
5715 (home-page "https://github.com/tgirke/systemPipeR")
5716 (synopsis "Next generation sequencing workflow and reporting environment")
5717 (description
5718 "This R package provides tools for building and running automated
5719 end-to-end analysis workflows for a wide range of @dfn{next generation
5720 sequence} (NGS) applications such as RNA-Seq, ChIP-Seq, VAR-Seq and Ribo-Seq.
5721 Important features include a uniform workflow interface across different NGS
5722 applications, automated report generation, and support for running both R and
5723 command-line software, such as NGS aligners or peak/variant callers, on local
5724 computers or compute clusters. Efficient handling of complex sample sets and
5725 experimental designs is facilitated by a consistently implemented sample
5726 annotation infrastructure.")
5727 (license license:artistic2.0)))
5728
5729 (define-public r-grohmm
5730 (package
5731 (name "r-grohmm")
5732 (version "1.8.0")
5733 (source
5734 (origin
5735 (method url-fetch)
5736 (uri (bioconductor-uri "groHMM" version))
5737 (sha256
5738 (base32
5739 "0d91nyhqbi5hv3mgmr2z0g29wg2md26g0hyv5mgapmz20cd9zi4y"))))
5740 (properties `((upstream-name . "groHMM")))
5741 (build-system r-build-system)
5742 (propagated-inputs
5743 `(("r-genomeinfodb" ,r-genomeinfodb)
5744 ("r-genomicalignments" ,r-genomicalignments)
5745 ("r-genomicranges" ,r-genomicranges)
5746 ("r-iranges" ,r-iranges)
5747 ("r-mass" ,r-mass)
5748 ("r-rtracklayer" ,r-rtracklayer)
5749 ("r-s4vectors" ,r-s4vectors)))
5750 (home-page "https://github.com/Kraus-Lab/groHMM")
5751 (synopsis "GRO-seq analysis pipeline")
5752 (description
5753 "This package provides a pipeline for the analysis of GRO-seq data.")
5754 (license license:gpl3+)))
5755
5756 (define-public r-txdb-hsapiens-ucsc-hg19-knowngene
5757 (package
5758 (name "r-txdb-hsapiens-ucsc-hg19-knowngene")
5759 (version "3.2.2")
5760 (source (origin
5761 (method url-fetch)
5762 ;; We cannot use bioconductor-uri here because this tarball is
5763 ;; located under "data/annotation/" instead of "bioc/".
5764 (uri (string-append "http://bioconductor.org/packages/"
5765 "release/data/annotation/src/contrib"
5766 "/TxDb.Hsapiens.UCSC.hg19.knownGene_"
5767 version ".tar.gz"))
5768 (sha256
5769 (base32
5770 "1sajhcqqwazgz2lqbik7rd935i7kpnh08zxbp2ra10j72yqy4g86"))))
5771 (properties
5772 `((upstream-name . "TxDb.Hsapiens.UCSC.hg19.knownGene")))
5773 (build-system r-build-system)
5774 ;; As this package provides little more than a very large data file it
5775 ;; doesn't make sense to build substitutes.
5776 (arguments `(#:substitutable? #f))
5777 (propagated-inputs
5778 `(("r-genomicfeatures" ,r-genomicfeatures)))
5779 (home-page
5780 "http://bioconductor.org/packages/TxDb.Hsapiens.UCSC.hg19.knownGene/")
5781 (synopsis "Annotation package for human genome in TxDb format")
5782 (description
5783 "This package provides an annotation database of Homo sapiens genome
5784 data. It is derived from the UCSC hg19 genome and based on the \"knownGene\"
5785 track. The database is exposed as a @code{TxDb} object.")
5786 (license license:artistic2.0)))
5787
5788 (define-public vsearch
5789 (package
5790 (name "vsearch")
5791 (version "2.4.3")
5792 (source
5793 (origin
5794 (method url-fetch)
5795 (uri (string-append
5796 "https://github.com/torognes/vsearch/archive/v"
5797 version ".tar.gz"))
5798 (file-name (string-append name "-" version ".tar.gz"))
5799 (sha256
5800 (base32
5801 "0hc110ycqpa54nr6x173qg7190hk08qp7yz7zzqxlsypqnpc5zzp"))
5802 (patches (search-patches "vsearch-unbundle-cityhash.patch"))
5803 (snippet
5804 '(begin
5805 ;; Remove bundled cityhash sources. The vsearch source is adjusted
5806 ;; for this in the patch.
5807 (delete-file "src/city.h")
5808 (delete-file "src/citycrc.h")
5809 (delete-file "src/city.cc")
5810 #t))))
5811 (build-system gnu-build-system)
5812 (arguments
5813 `(#:phases
5814 (modify-phases %standard-phases
5815 (add-before 'configure 'autogen
5816 (lambda _ (zero? (system* "autoreconf" "-vif")))))))
5817 (inputs
5818 `(("zlib" ,zlib)
5819 ("bzip2" ,bzip2)
5820 ("cityhash" ,cityhash)))
5821 (native-inputs
5822 `(("autoconf" ,autoconf)
5823 ("automake" ,automake)))
5824 (synopsis "Sequence search tools for metagenomics")
5825 (description
5826 "VSEARCH supports DNA sequence searching, clustering, chimera detection,
5827 dereplication, pairwise alignment, shuffling, subsampling, sorting and
5828 masking. The tool takes advantage of parallelism in the form of SIMD
5829 vectorization as well as multiple threads to perform accurate alignments at
5830 high speed. VSEARCH uses an optimal global aligner (full dynamic programming
5831 Needleman-Wunsch).")
5832 (home-page "https://github.com/torognes/vsearch")
5833 ;; vsearch uses non-portable SSE intrinsics so building fails on other
5834 ;; platforms.
5835 (supported-systems '("x86_64-linux"))
5836 ;; Dual licensed; also includes public domain source.
5837 (license (list license:gpl3 license:bsd-2))))
5838
5839 (define-public pardre
5840 (package
5841 (name "pardre")
5842 ;; The source of 1.1.5 changed in place, so we append "-1" to the version.
5843 (version "1.1.5-1")
5844 (source
5845 (origin
5846 (method url-fetch)
5847 (uri (string-append "mirror://sourceforge/pardre/ParDRe-rel"
5848 "1.1.5" ".tar.gz"))
5849 (sha256
5850 (base32
5851 "17j73nc0viq4f6qj50nrndsrif5d6b71q8fl87m54psiv0ilns2b"))))
5852 (build-system gnu-build-system)
5853 (arguments
5854 `(#:tests? #f ; no tests included
5855 #:phases
5856 (modify-phases %standard-phases
5857 (delete 'configure)
5858 (replace 'install
5859 (lambda* (#:key outputs #:allow-other-keys)
5860 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
5861 (install-file "ParDRe" bin)
5862 #t))))))
5863 (inputs
5864 `(("openmpi" ,openmpi)
5865 ("zlib" ,zlib)))
5866 (synopsis "Parallel tool to remove duplicate DNA reads")
5867 (description
5868 "ParDRe is a parallel tool to remove duplicate genetic sequence reads.
5869 Duplicate reads can be seen as identical or nearly identical sequences with
5870 some mismatches. This tool lets users avoid the analysis of unnecessary
5871 reads, reducing the time of subsequent procedures with the
5872 dataset (e.g. assemblies, mappings, etc.). The tool is implemented with MPI
5873 in order to exploit the parallel capabilities of multicore clusters. It is
5874 faster than multithreaded counterparts (end of 2015) for the same number of
5875 cores and, thanks to the message-passing technology, it can be executed on
5876 clusters.")
5877 (home-page "https://sourceforge.net/projects/pardre/")
5878 (license license:gpl3+)))
5879
5880 (define-public ruby-bio-kseq
5881 (package
5882 (name "ruby-bio-kseq")
5883 (version "0.0.2")
5884 (source
5885 (origin
5886 (method url-fetch)
5887 (uri (rubygems-uri "bio-kseq" version))
5888 (sha256
5889 (base32
5890 "1xyaha46khb5jc6wzkbf7040jagac49jbimn0vcrzid0j8jdikrz"))))
5891 (build-system ruby-build-system)
5892 (arguments
5893 `(#:test-target "spec"))
5894 (native-inputs
5895 `(("bundler" ,bundler)
5896 ("ruby-rspec" ,ruby-rspec)
5897 ("ruby-rake-compiler" ,ruby-rake-compiler)))
5898 (inputs
5899 `(("zlib" ,zlib)))
5900 (synopsis "Ruby bindings for the kseq.h FASTA/Q parser")
5901 (description
5902 "@code{Bio::Kseq} provides ruby bindings to the @code{kseq.h} FASTA and
5903 FASTQ parsing code. It provides a fast iterator over sequences and their
5904 quality scores.")
5905 (home-page "https://github.com/gusevfe/bio-kseq")
5906 (license license:expat)))
5907
5908 (define-public bio-locus
5909 (package
5910 (name "bio-locus")
5911 (version "0.0.7")
5912 (source
5913 (origin
5914 (method url-fetch)
5915 (uri (rubygems-uri "bio-locus" version))
5916 (sha256
5917 (base32
5918 "02vmrxyimkj9sahsp4zhfhnmbvz6dbbqz1y01vglf8cbwvkajfl0"))))
5919 (build-system ruby-build-system)
5920 (native-inputs
5921 `(("ruby-rspec" ,ruby-rspec)))
5922 (synopsis "Tool for fast querying of genome locations")
5923 (description
5924 "Bio-locus is a tabix-like tool for fast querying of genome
5925 locations. Many file formats in bioinformatics contain records that
5926 start with a chromosome name and a position for a SNP, or a start-end
5927 position for indels. Bio-locus allows users to store this chr+pos or
5928 chr+pos+alt information in a database.")
5929 (home-page "https://github.com/pjotrp/bio-locus")
5930 (license license:expat)))
5931
5932 (define-public bio-blastxmlparser
5933 (package
5934 (name "bio-blastxmlparser")
5935 (version "2.0.4")
5936 (source (origin
5937 (method url-fetch)
5938 (uri (rubygems-uri "bio-blastxmlparser" version))
5939 (sha256
5940 (base32
5941 "1wf4qygcmdjgcqm6flmvsagfr1gs9lf63mj32qv3z1f481zc5692"))))
5942 (build-system ruby-build-system)
5943 (propagated-inputs
5944 `(("ruby-bio-logger" ,ruby-bio-logger)
5945 ("ruby-nokogiri" ,ruby-nokogiri)))
5946 (inputs
5947 `(("ruby-rspec" ,ruby-rspec)))
5948 (synopsis "Fast big data BLAST XML parser and library")
5949 (description
5950 "Very fast parallel big-data BLAST XML file parser which can be used as
5951 command line utility. Use blastxmlparser to: Parse BLAST XML; filter output;
5952 generate FASTA, JSON, YAML, RDF, JSON-LD, HTML, CSV, tabular output etc.")
5953 (home-page "https://github.com/pjotrp/blastxmlparser")
5954 (license license:expat)))
5955
5956 (define-public bioruby
5957 (package
5958 (name "bioruby")
5959 (version "1.5.1")
5960 (source
5961 (origin
5962 (method url-fetch)
5963 (uri (rubygems-uri "bio" version))
5964 (sha256
5965 (base32
5966 "0hdl0789c9n4mprnx5pgd46bfwl8d000rqpamj5h6kkjgspijv49"))))
5967 (build-system ruby-build-system)
5968 (propagated-inputs
5969 `(("ruby-libxml" ,ruby-libxml)))
5970 (native-inputs
5971 `(("which" ,which))) ; required for test phase
5972 (arguments
5973 `(#:phases
5974 (modify-phases %standard-phases
5975 (add-before 'build 'patch-test-command
5976 (lambda _
5977 (substitute* '("test/functional/bio/test_command.rb")
5978 (("/bin/sh") (which "sh")))
5979 (substitute* '("test/functional/bio/test_command.rb")
5980 (("/bin/ls") (which "ls")))
5981 (substitute* '("test/functional/bio/test_command.rb")
5982 (("which") (which "which")))
5983 (substitute* '("test/functional/bio/test_command.rb",
5984 "test/data/command/echoarg2.sh")
5985 (("/bin/echo") (which "echo")))
5986 #t)))))
5987 (synopsis "Ruby library, shell and utilities for bioinformatics")
5988 (description "BioRuby comes with a comprehensive set of Ruby development
5989 tools and libraries for bioinformatics and molecular biology. BioRuby has
5990 components for sequence analysis, pathway analysis, protein modelling and
5991 phylogenetic analysis; it supports many widely used data formats and provides
5992 easy access to databases, external programs and public web services, including
5993 BLAST, KEGG, GenBank, MEDLINE and GO.")
5994 (home-page "http://bioruby.org/")
5995 ;; Code is released under Ruby license, except for setup
5996 ;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+)
5997 (license (list license:ruby license:lgpl2.1+ license:gpl2+ ))))
5998
5999 (define-public r-acsnminer
6000 (package
6001 (name "r-acsnminer")
6002 (version "0.16.8.25")
6003 (source (origin
6004 (method url-fetch)
6005 (uri (cran-uri "ACSNMineR" version))
6006 (sha256
6007 (base32
6008 "0gh604s8qall6zfjlwcg2ilxjvz08dplf9k5g47idhv43scm748l"))))
6009 (properties `((upstream-name . "ACSNMineR")))
6010 (build-system r-build-system)
6011 (propagated-inputs
6012 `(("r-ggplot2" ,r-ggplot2)
6013 ("r-gridextra" ,r-gridextra)))
6014 (home-page "http://cran.r-project.org/web/packages/ACSNMineR")
6015 (synopsis "Gene enrichment analysis")
6016 (description
6017 "This package provides tools to compute and represent gene set enrichment
6018 or depletion from your data based on pre-saved maps from the @dfn{Atlas of
6019 Cancer Signalling Networks} (ACSN) or user imported maps. The gene set
6020 enrichment can be run with hypergeometric test or Fisher exact test, and can
6021 use multiple corrections. Visualization of data can be done either by
6022 barplots or heatmaps.")
6023 (license license:gpl2+)))
6024
6025 (define-public r-biocgenerics
6026 (package
6027 (name "r-biocgenerics")
6028 (version "0.20.0")
6029 (source (origin
6030 (method url-fetch)
6031 (uri (bioconductor-uri "BiocGenerics" version))
6032 (sha256
6033 (base32
6034 "06szdz7dfs1iyv5zdl4fjzad18nnf1zf3wvglc6c6yd9mrqlf7vk"))))
6035 (properties
6036 `((upstream-name . "BiocGenerics")))
6037 (build-system r-build-system)
6038 (home-page "http://bioconductor.org/packages/BiocGenerics")
6039 (synopsis "S4 generic functions for Bioconductor")
6040 (description
6041 "This package provides S4 generic functions needed by many Bioconductor
6042 packages.")
6043 (license license:artistic2.0)))
6044
6045 (define-public r-biocinstaller
6046 (package
6047 (name "r-biocinstaller")
6048 (version "1.24.0")
6049 (source (origin
6050 (method url-fetch)
6051 (uri (bioconductor-uri "BiocInstaller" version))
6052 (sha256
6053 (base32
6054 "0y1y5wmy6lzjqx3hdg15n91d417ccjj8dbvdkhmp99bs5aijwcpn"))))
6055 (properties
6056 `((upstream-name . "BiocInstaller")))
6057 (build-system r-build-system)
6058 (home-page "http://bioconductor.org/packages/BiocInstaller")
6059 (synopsis "Install Bioconductor packages")
6060 (description "This package is used to install and update R packages from
6061 Bioconductor, CRAN, and Github.")
6062 (license license:artistic2.0)))
6063
6064 (define-public r-biocviews
6065 (package
6066 (name "r-biocviews")
6067 (version "1.42.0")
6068 (source (origin
6069 (method url-fetch)
6070 (uri (bioconductor-uri "biocViews" version))
6071 (sha256
6072 (base32
6073 "07rjk10b91pkriyq297w86199r2d3sfji3ggs9mq2gyalsa8y4b6"))))
6074 (properties
6075 `((upstream-name . "biocViews")))
6076 (build-system r-build-system)
6077 (propagated-inputs
6078 `(("r-biobase" ,r-biobase)
6079 ("r-graph" ,r-graph)
6080 ("r-rbgl" ,r-rbgl)
6081 ("r-rcurl" ,r-rcurl)
6082 ("r-xml" ,r-xml)
6083 ("r-knitr" ,r-knitr)
6084 ("r-runit" ,r-runit)))
6085 (home-page "http://bioconductor.org/packages/biocViews")
6086 (synopsis "Bioconductor package categorization helper")
6087 (description "The purpose of biocViews is to create HTML pages that
6088 categorize packages in a Bioconductor package repository according to keywords,
6089 also known as views, in a controlled vocabulary.")
6090 (license license:artistic2.0)))
6091
6092 (define-public r-biocstyle
6093 (package
6094 (name "r-biocstyle")
6095 (version "2.2.1")
6096 (source (origin
6097 (method url-fetch)
6098 (uri (bioconductor-uri "BiocStyle" version))
6099 (sha256
6100 (base32
6101 "0sl99xw940ixrm6v24lgaw3ljh56g59a6rdz7g160hx84z9f8n2n"))))
6102 (properties
6103 `((upstream-name . "BiocStyle")))
6104 (build-system r-build-system)
6105 (home-page "http://bioconductor.org/packages/BiocStyle")
6106 (synopsis "Bioconductor formatting styles")
6107 (description "This package provides standard formatting styles for
6108 Bioconductor PDF and HTML documents. Package vignettes illustrate use and
6109 functionality.")
6110 (license license:artistic2.0)))
6111
6112 (define-public r-bioccheck
6113 (package
6114 (name "r-bioccheck")
6115 (version "1.10.1")
6116 (source (origin
6117 (method url-fetch)
6118 (uri (bioconductor-uri "BiocCheck" version))
6119 (sha256
6120 (base32
6121 "197kpiycyl3qawm6801fxyxj81d2g57a00qxaqprapsf1d140l52"))))
6122 (properties
6123 `((upstream-name . "BiocCheck")))
6124 (build-system r-build-system)
6125 (arguments
6126 '(#:phases
6127 (modify-phases %standard-phases
6128 ;; This package can be used by calling BiocCheck(<package>) from
6129 ;; within R, or by running R CMD BiocCheck <package>. This phase
6130 ;; makes sure the latter works. For this to work, the BiocCheck
6131 ;; script must be somewhere on the PATH (not the R bin directory).
6132 (add-after 'install 'install-bioccheck-subcommand
6133 (lambda* (#:key outputs #:allow-other-keys)
6134 (let* ((out (assoc-ref outputs "out"))
6135 (dest-dir (string-append out "/bin"))
6136 (script-dir
6137 (string-append out "/site-library/BiocCheck/script/")))
6138 (mkdir-p dest-dir)
6139 (symlink (string-append script-dir "/checkBadDeps.R")
6140 (string-append dest-dir "/checkBadDeps.R"))
6141 (symlink (string-append script-dir "/BiocCheck")
6142 (string-append dest-dir "/BiocCheck")))
6143 #t)))))
6144 (native-inputs
6145 `(("which" ,which)))
6146 (propagated-inputs
6147 `(("r-codetools" ,r-codetools)
6148 ("r-graph" ,r-graph)
6149 ("r-httr" ,r-httr)
6150 ("r-optparse" ,r-optparse)
6151 ("r-biocinstaller" ,r-biocinstaller)
6152 ("r-biocviews" ,r-biocviews)))
6153 (home-page "http://bioconductor.org/packages/BiocCheck")
6154 (synopsis "Executes Bioconductor-specific package checks")
6155 (description "This package contains tools to perform additional quality
6156 checks on R packages that are to be submitted to the Bioconductor repository.")
6157 (license license:artistic2.0)))
6158
6159 (define-public r-getopt
6160 (package
6161 (name "r-getopt")
6162 (version "1.20.0")
6163 (source
6164 (origin
6165 (method url-fetch)
6166 (uri (cran-uri "getopt" version))
6167 (sha256
6168 (base32
6169 "00f57vgnzmg7cz80rjmjz1556xqcmx8nhrlbbhaq4w7gl2ibl87r"))))
6170 (build-system r-build-system)
6171 (home-page "https://github.com/trevorld/getopt")
6172 (synopsis "Command-line option processor for R")
6173 (description
6174 "This package is designed to be used with Rscript to write shebang
6175 scripts that accept short and long options. Many users will prefer to
6176 use the packages @code{optparse} or @code{argparse} which add extra
6177 features like automatically generated help options and usage texts,
6178 support for default values, positional argument support, etc.")
6179 (license license:gpl2+)))
6180
6181 (define-public r-optparse
6182 (package
6183 (name "r-optparse")
6184 (version "1.3.2")
6185 (source
6186 (origin
6187 (method url-fetch)
6188 (uri (cran-uri "optparse" version))
6189 (sha256
6190 (base32
6191 "1g8as89r91xxi5j5azsd6vrfrhg84mnfx2683j7pacdp8s33radw"))))
6192 (build-system r-build-system)
6193 (propagated-inputs
6194 `(("r-getopt" ,r-getopt)))
6195 (home-page
6196 "https://github.com/trevorld/optparse")
6197 (synopsis "Command line option parser")
6198 (description
6199 "This package provides a command line parser inspired by Python's
6200 @code{optparse} library to be used with Rscript to write shebang scripts
6201 that accept short and long options.")
6202 (license license:gpl2+)))
6203
6204 (define-public r-dnacopy
6205 (package
6206 (name "r-dnacopy")
6207 (version "1.48.0")
6208 (source (origin
6209 (method url-fetch)
6210 (uri (bioconductor-uri "DNAcopy" version))
6211 (sha256
6212 (base32
6213 "1idyvfvy7xx8k9vk00y4k3819qmip8iqm809j3vpxabmsn7r9zyh"))))
6214 (properties
6215 `((upstream-name . "DNAcopy")))
6216 (build-system r-build-system)
6217 (inputs
6218 `(("gfortran" ,gfortran)))
6219 (home-page "https://bioconductor.org/packages/DNAcopy")
6220 (synopsis "Implementation of a circular binary segmentation algorithm")
6221 (description "This package implements the circular binary segmentation (CBS)
6222 algorithm to segment DNA copy number data and identify genomic regions with
6223 abnormal copy number.")
6224 (license license:gpl2+)))
6225
6226 (define-public r-s4vectors
6227 (package
6228 (name "r-s4vectors")
6229 (version "0.12.2")
6230 (source (origin
6231 (method url-fetch)
6232 (uri (bioconductor-uri "S4Vectors" version))
6233 (sha256
6234 (base32
6235 "0syx0qgipx97zsp3b8afhzamsr30835a2a99yb9wnq7b50g3v3p1"))))
6236 (properties
6237 `((upstream-name . "S4Vectors")))
6238 (build-system r-build-system)
6239 (propagated-inputs
6240 `(("r-biocgenerics" ,r-biocgenerics)))
6241 (home-page "http://bioconductor.org/packages/S4Vectors")
6242 (synopsis "S4 implementation of vectors and lists")
6243 (description
6244 "The S4Vectors package defines the @code{Vector} and @code{List} virtual
6245 classes and a set of generic functions that extend the semantic of ordinary
6246 vectors and lists in R. Package developers can easily implement vector-like
6247 or list-like objects as concrete subclasses of @code{Vector} or @code{List}.
6248 In addition, a few low-level concrete subclasses of general interest (e.g.
6249 @code{DataFrame}, @code{Rle}, and @code{Hits}) are implemented in the
6250 S4Vectors package itself.")
6251 (license license:artistic2.0)))
6252
6253 (define-public r-seqinr
6254 (package
6255 (name "r-seqinr")
6256 (version "3.3-6")
6257 (source
6258 (origin
6259 (method url-fetch)
6260 (uri (cran-uri "seqinr" version))
6261 (sha256
6262 (base32
6263 "13d0qxm2244wgdl2dy2s8vnrnf5fx4n47if9gkb49dqx6c0sx8s2"))))
6264 (build-system r-build-system)
6265 (propagated-inputs
6266 `(("r-ade4" ,r-ade4)
6267 ("r-segmented" ,r-segmented)))
6268 (inputs
6269 `(("zlib" ,zlib)))
6270 (home-page "http://seqinr.r-forge.r-project.org/")
6271 (synopsis "Biological sequences retrieval and analysis")
6272 (description
6273 "This package provides tools for exploratory data analysis and data
6274 visualization of biological sequence (DNA and protein) data. It also includes
6275 utilities for sequence data management under the ACNUC system.")
6276 (license license:gpl2+)))
6277
6278 (define-public r-iranges
6279 (package
6280 (name "r-iranges")
6281 (version "2.8.2")
6282 (source (origin
6283 (method url-fetch)
6284 (uri (bioconductor-uri "IRanges" version))
6285 (sha256
6286 (base32
6287 "0x8h74ik3xwdnwrkn89hq5ll0qa1lp9jgzlbmpa02dpws7snfwyr"))))
6288 (properties
6289 `((upstream-name . "IRanges")))
6290 (build-system r-build-system)
6291 (propagated-inputs
6292 `(("r-biocgenerics" ,r-biocgenerics)
6293 ("r-s4vectors" ,r-s4vectors)))
6294 (home-page "http://bioconductor.org/packages/IRanges")
6295 (synopsis "Infrastructure for manipulating intervals on sequences")
6296 (description
6297 "This package provides efficient low-level and highly reusable S4 classes
6298 for storing ranges of integers, RLE vectors (Run-Length Encoding), and, more
6299 generally, data that can be organized sequentially (formally defined as
6300 @code{Vector} objects), as well as views on these @code{Vector} objects.
6301 Efficient list-like classes are also provided for storing big collections of
6302 instances of the basic classes. All classes in the package use consistent
6303 naming and share the same rich and consistent \"Vector API\" as much as
6304 possible.")
6305 (license license:artistic2.0)))
6306
6307 (define-public r-genomeinfodb
6308 (package
6309 (name "r-genomeinfodb")
6310 (version "1.10.3")
6311 (source (origin
6312 (method url-fetch)
6313 (uri (bioconductor-uri "GenomeInfoDb" version))
6314 (sha256
6315 (base32
6316 "18g24cf6b3vi13w85ki2mam6i2gl4yxr1zchyga34xc3dkdngzrw"))))
6317 (properties
6318 `((upstream-name . "GenomeInfoDb")))
6319 (build-system r-build-system)
6320 (propagated-inputs
6321 `(("r-biocgenerics" ,r-biocgenerics)
6322 ("r-iranges" ,r-iranges)
6323 ("r-rcurl" ,r-rcurl)
6324 ("r-s4vectors" ,r-s4vectors)))
6325 (home-page "http://bioconductor.org/packages/GenomeInfoDb")
6326 (synopsis "Utilities for manipulating chromosome identifiers")
6327 (description
6328 "This package contains data and functions that define and allow
6329 translation between different chromosome sequence naming conventions (e.g.,
6330 \"chr1\" versus \"1\"), including a function that attempts to place sequence
6331 names in their natural, rather than lexicographic, order.")
6332 (license license:artistic2.0)))
6333
6334 (define-public r-edger
6335 (package
6336 (name "r-edger")
6337 (version "3.16.5")
6338 (source (origin
6339 (method url-fetch)
6340 (uri (bioconductor-uri "edgeR" version))
6341 (sha256
6342 (base32
6343 "04vpa0a6dkkjyvvfbkmfjyaxf2ldkagi66g028qpaszd8jsk8yiv"))))
6344 (properties `((upstream-name . "edgeR")))
6345 (build-system r-build-system)
6346 (propagated-inputs
6347 `(("r-limma" ,r-limma)
6348 ("r-locfit" ,r-locfit)
6349 ("r-statmod" ,r-statmod))) ;for estimateDisp
6350 (home-page "http://bioinf.wehi.edu.au/edgeR")
6351 (synopsis "EdgeR does empirical analysis of digital gene expression data")
6352 (description "This package can do differential expression analysis of
6353 RNA-seq expression profiles with biological replication. It implements a range
6354 of statistical methodology based on the negative binomial distributions,
6355 including empirical Bayes estimation, exact tests, generalized linear models
6356 and quasi-likelihood tests. It be applied to differential signal analysis of
6357 other types of genomic data that produce counts, including ChIP-seq, SAGE and
6358 CAGE.")
6359 (license license:gpl2+)))
6360
6361 (define-public r-variantannotation
6362 (package
6363 (name "r-variantannotation")
6364 (version "1.20.3")
6365 (source (origin
6366 (method url-fetch)
6367 (uri (bioconductor-uri "VariantAnnotation" version))
6368 (sha256
6369 (base32
6370 "10v8apgfw57nd4dxmdxdrijxpw135jpp2p8wrk3wjpb3hhfjp1qj"))))
6371 (properties
6372 `((upstream-name . "VariantAnnotation")))
6373 (inputs
6374 `(("zlib" ,zlib)))
6375 (propagated-inputs
6376 `(("r-annotationdbi" ,r-annotationdbi)
6377 ("r-biobase" ,r-biobase)
6378 ("r-biocgenerics" ,r-biocgenerics)
6379 ("r-biostrings" ,r-biostrings)
6380 ("r-bsgenome" ,r-bsgenome)
6381 ("r-dbi" ,r-dbi)
6382 ("r-genomeinfodb" ,r-genomeinfodb)
6383 ("r-genomicfeatures" ,r-genomicfeatures)
6384 ("r-genomicranges" ,r-genomicranges)
6385 ("r-iranges" ,r-iranges)
6386 ("r-summarizedexperiment" ,r-summarizedexperiment)
6387 ("r-rsamtools" ,r-rsamtools)
6388 ("r-rtracklayer" ,r-rtracklayer)
6389 ("r-s4vectors" ,r-s4vectors)
6390 ("r-xvector" ,r-xvector)
6391 ("r-zlibbioc" ,r-zlibbioc)))
6392 (build-system r-build-system)
6393 (home-page "https://bioconductor.org/packages/VariantAnnotation")
6394 (synopsis "Package for annotation of genetic variants")
6395 (description "This R package can annotate variants, compute amino acid
6396 coding changes and predict coding outcomes.")
6397 (license license:artistic2.0)))
6398
6399 (define-public r-limma
6400 (package
6401 (name "r-limma")
6402 (version "3.30.13")
6403 (source (origin
6404 (method url-fetch)
6405 (uri (bioconductor-uri "limma" version))
6406 (sha256
6407 (base32
6408 "1ji8kb19anwq2505zii2kzqlrnk75mk1mpz8vy4s1mckzs1cz4m0"))))
6409 (build-system r-build-system)
6410 (home-page "http://bioinf.wehi.edu.au/limma")
6411 (synopsis "Package for linear models for microarray and RNA-seq data")
6412 (description "This package can be used for the analysis of gene expression
6413 studies, especially the use of linear models for analysing designed experiments
6414 and the assessment of differential expression. The analysis methods apply to
6415 different technologies, including microarrays, RNA-seq, and quantitative PCR.")
6416 (license license:gpl2+)))
6417
6418 (define-public r-xvector
6419 (package
6420 (name "r-xvector")
6421 (version "0.14.1")
6422 (source (origin
6423 (method url-fetch)
6424 (uri (bioconductor-uri "XVector" version))
6425 (sha256
6426 (base32
6427 "1j14ip4c260kdp3zcmgfa2v8ky88csa0gfdg6a1xsb64s03hdbm6"))))
6428 (properties
6429 `((upstream-name . "XVector")))
6430 (build-system r-build-system)
6431 (arguments
6432 `(#:phases
6433 (modify-phases %standard-phases
6434 (add-after 'unpack 'use-system-zlib
6435 (lambda _
6436 (substitute* "DESCRIPTION"
6437 (("zlibbioc, ") ""))
6438 (substitute* "NAMESPACE"
6439 (("import\\(zlibbioc\\)") ""))
6440 #t)))))
6441 (inputs
6442 `(("zlib" ,zlib)))
6443 (propagated-inputs
6444 `(("r-biocgenerics" ,r-biocgenerics)
6445 ("r-iranges" ,r-iranges)
6446 ("r-s4vectors" ,r-s4vectors)))
6447 (home-page "http://bioconductor.org/packages/XVector")
6448 (synopsis "Representation and manpulation of external sequences")
6449 (description
6450 "This package provides memory efficient S4 classes for storing sequences
6451 \"externally\" (behind an R external pointer, or on disk).")
6452 (license license:artistic2.0)))
6453
6454 (define-public r-genomicranges
6455 (package
6456 (name "r-genomicranges")
6457 (version "1.26.4")
6458 (source (origin
6459 (method url-fetch)
6460 (uri (bioconductor-uri "GenomicRanges" version))
6461 (sha256
6462 (base32
6463 "1789ycqzv20d8p1axkxrhsz9v0ww6w1dk2mfvm85p8j53zd1f67c"))))
6464 (properties
6465 `((upstream-name . "GenomicRanges")))
6466 (build-system r-build-system)
6467 (propagated-inputs
6468 `(("r-biocgenerics" ,r-biocgenerics)
6469 ("r-genomeinfodb" ,r-genomeinfodb)
6470 ("r-iranges" ,r-iranges)
6471 ("r-s4vectors" ,r-s4vectors)
6472 ("r-xvector" ,r-xvector)))
6473 (home-page "http://bioconductor.org/packages/GenomicRanges")
6474 (synopsis "Representation and manipulation of genomic intervals")
6475 (description
6476 "This package provides tools to efficiently represent and manipulate
6477 genomic annotations and alignments is playing a central role when it comes to
6478 analyzing high-throughput sequencing data (a.k.a. NGS data). The
6479 GenomicRanges package defines general purpose containers for storing and
6480 manipulating genomic intervals and variables defined along a genome.")
6481 (license license:artistic2.0)))
6482
6483 (define-public r-biobase
6484 (package
6485 (name "r-biobase")
6486 (version "2.34.0")
6487 (source (origin
6488 (method url-fetch)
6489 (uri (bioconductor-uri "Biobase" version))
6490 (sha256
6491 (base32
6492 "0js9j9wqls8f571ifl9ylllbb9a9hwf7b7drf2grwb1fl31ldazl"))))
6493 (properties
6494 `((upstream-name . "Biobase")))
6495 (build-system r-build-system)
6496 (propagated-inputs
6497 `(("r-biocgenerics" ,r-biocgenerics)))
6498 (home-page "http://bioconductor.org/packages/Biobase")
6499 (synopsis "Base functions for Bioconductor")
6500 (description
6501 "This package provides functions that are needed by many other packages
6502 on Bioconductor or which replace R functions.")
6503 (license license:artistic2.0)))
6504
6505 (define-public r-annotationdbi
6506 (package
6507 (name "r-annotationdbi")
6508 (version "1.36.2")
6509 (source (origin
6510 (method url-fetch)
6511 (uri (bioconductor-uri "AnnotationDbi" version))
6512 (sha256
6513 (base32
6514 "0574lmyisn3nv9aicz9x3iivx990da4q2j4i0f1jz0mpj9v3vc2w"))))
6515 (properties
6516 `((upstream-name . "AnnotationDbi")))
6517 (build-system r-build-system)
6518 (propagated-inputs
6519 `(("r-biobase" ,r-biobase)
6520 ("r-biocgenerics" ,r-biocgenerics)
6521 ("r-dbi" ,r-dbi)
6522 ("r-iranges" ,r-iranges)
6523 ("r-rsqlite" ,r-rsqlite)
6524 ("r-s4vectors" ,r-s4vectors)))
6525 (home-page "http://bioconductor.org/packages/AnnotationDbi")
6526 (synopsis "Annotation database interface")
6527 (description
6528 "This package provides user interface and database connection code for
6529 annotation data packages using SQLite data storage.")
6530 (license license:artistic2.0)))
6531
6532 (define-public r-biomart
6533 (package
6534 (name "r-biomart")
6535 (version "2.30.0")
6536 (source (origin
6537 (method url-fetch)
6538 (uri (bioconductor-uri "biomaRt" version))
6539 (sha256
6540 (base32
6541 "1x0flcghq71784q2l02j0g4f9jkmyb14f6i307n6c59d6ji7h7x6"))))
6542 (properties
6543 `((upstream-name . "biomaRt")))
6544 (build-system r-build-system)
6545 (propagated-inputs
6546 `(("r-annotationdbi" ,r-annotationdbi)
6547 ("r-rcurl" ,r-rcurl)
6548 ("r-xml" ,r-xml)))
6549 (home-page "http://bioconductor.org/packages/biomaRt")
6550 (synopsis "Interface to BioMart databases")
6551 (description
6552 "biomaRt provides an interface to a growing collection of databases
6553 implementing the @url{BioMart software suite, http://www.biomart.org}. The
6554 package enables retrieval of large amounts of data in a uniform way without
6555 the need to know the underlying database schemas or write complex SQL queries.
6556 Examples of BioMart databases are Ensembl, COSMIC, Uniprot, HGNC, Gramene,
6557 Wormbase and dbSNP mapped to Ensembl. These major databases give biomaRt
6558 users direct access to a diverse set of data and enable a wide range of
6559 powerful online queries from gene annotation to database mining.")
6560 (license license:artistic2.0)))
6561
6562 (define-public r-biocparallel
6563 (package
6564 (name "r-biocparallel")
6565 (version "1.8.2")
6566 (source (origin
6567 (method url-fetch)
6568 (uri (bioconductor-uri "BiocParallel" version))
6569 (sha256
6570 (base32
6571 "18zpa0vl375n9pvxsgbid1k96m17nqqgv1g1sfnlmm7kj34jxg6v"))))
6572 (properties
6573 `((upstream-name . "BiocParallel")))
6574 (build-system r-build-system)
6575 (propagated-inputs
6576 `(("r-futile-logger" ,r-futile-logger)
6577 ("r-snow" ,r-snow)))
6578 (home-page "http://bioconductor.org/packages/BiocParallel")
6579 (synopsis "Bioconductor facilities for parallel evaluation")
6580 (description
6581 "This package provides modified versions and novel implementation of
6582 functions for parallel evaluation, tailored to use with Bioconductor
6583 objects.")
6584 (license (list license:gpl2+ license:gpl3+))))
6585
6586 (define-public r-biostrings
6587 (package
6588 (name "r-biostrings")
6589 (version "2.42.1")
6590 (source (origin
6591 (method url-fetch)
6592 (uri (bioconductor-uri "Biostrings" version))
6593 (sha256
6594 (base32
6595 "0vqgd9i6y3wj4zviqwgvwgd4qj6033fg01rmx1cw9bw5i8ans42d"))))
6596 (properties
6597 `((upstream-name . "Biostrings")))
6598 (build-system r-build-system)
6599 (propagated-inputs
6600 `(("r-biocgenerics" ,r-biocgenerics)
6601 ("r-iranges" ,r-iranges)
6602 ("r-s4vectors" ,r-s4vectors)
6603 ("r-xvector" ,r-xvector)))
6604 (home-page "http://bioconductor.org/packages/Biostrings")
6605 (synopsis "String objects and algorithms for biological sequences")
6606 (description
6607 "This package provides memory efficient string containers, string
6608 matching algorithms, and other utilities, for fast manipulation of large
6609 biological sequences or sets of sequences.")
6610 (license license:artistic2.0)))
6611
6612 (define-public r-rsamtools
6613 (package
6614 (name "r-rsamtools")
6615 (version "1.26.2")
6616 (source (origin
6617 (method url-fetch)
6618 (uri (bioconductor-uri "Rsamtools" version))
6619 (sha256
6620 (base32
6621 "118nsajgghi4cy3h0wi7777kc70a5j1fdyxv5n1dy01glix2z4qk"))))
6622 (properties
6623 `((upstream-name . "Rsamtools")))
6624 (build-system r-build-system)
6625 (arguments
6626 `(#:phases
6627 (modify-phases %standard-phases
6628 (add-after 'unpack 'use-system-zlib
6629 (lambda _
6630 (substitute* "DESCRIPTION"
6631 (("zlibbioc, ") ""))
6632 (substitute* "NAMESPACE"
6633 (("import\\(zlibbioc\\)") ""))
6634 #t)))))
6635 (inputs
6636 `(("zlib" ,zlib)))
6637 (propagated-inputs
6638 `(("r-biocgenerics" ,r-biocgenerics)
6639 ("r-biocparallel" ,r-biocparallel)
6640 ("r-biostrings" ,r-biostrings)
6641 ("r-bitops" ,r-bitops)
6642 ("r-genomeinfodb" ,r-genomeinfodb)
6643 ("r-genomicranges" ,r-genomicranges)
6644 ("r-iranges" ,r-iranges)
6645 ("r-s4vectors" ,r-s4vectors)
6646 ("r-xvector" ,r-xvector)))
6647 (home-page "http://bioconductor.org/packages/release/bioc/html/Rsamtools.html")
6648 (synopsis "Interface to samtools, bcftools, and tabix")
6649 (description
6650 "This package provides an interface to the 'samtools', 'bcftools', and
6651 'tabix' utilities for manipulating SAM (Sequence Alignment / Map), FASTA,
6652 binary variant call (BCF) and compressed indexed tab-delimited (tabix)
6653 files.")
6654 (license license:expat)))
6655
6656 (define-public r-summarizedexperiment
6657 (package
6658 (name "r-summarizedexperiment")
6659 (version "1.4.0")
6660 (source (origin
6661 (method url-fetch)
6662 (uri (bioconductor-uri "SummarizedExperiment" version))
6663 (sha256
6664 (base32
6665 "1kbj8sg2ik9f8d6g95wz0py62jldg01qy5rsdpg1cxw95nf7dzi3"))))
6666 (properties
6667 `((upstream-name . "SummarizedExperiment")))
6668 (build-system r-build-system)
6669 (propagated-inputs
6670 `(("r-biobase" ,r-biobase)
6671 ("r-biocgenerics" ,r-biocgenerics)
6672 ("r-genomeinfodb" ,r-genomeinfodb)
6673 ("r-genomicranges" ,r-genomicranges)
6674 ("r-iranges" ,r-iranges)
6675 ("r-matrix" ,r-matrix)
6676 ("r-s4vectors" ,r-s4vectors)))
6677 (home-page "http://bioconductor.org/packages/SummarizedExperiment")
6678 (synopsis "Container for representing genomic ranges by sample")
6679 (description
6680 "The SummarizedExperiment container contains one or more assays, each
6681 represented by a matrix-like object of numeric or other mode. The rows
6682 typically represent genomic ranges of interest and the columns represent
6683 samples.")
6684 (license license:artistic2.0)))
6685
6686 (define-public r-genomicalignments
6687 (package
6688 (name "r-genomicalignments")
6689 (version "1.10.1")
6690 (source (origin
6691 (method url-fetch)
6692 (uri (bioconductor-uri "GenomicAlignments" version))
6693 (sha256
6694 (base32
6695 "1dilghbsyf64iz5c0kib2c7if72x7almd5w3ali09a2b2ff2mcjk"))))
6696 (properties
6697 `((upstream-name . "GenomicAlignments")))
6698 (build-system r-build-system)
6699 (propagated-inputs
6700 `(("r-biocgenerics" ,r-biocgenerics)
6701 ("r-biocparallel" ,r-biocparallel)
6702 ("r-biostrings" ,r-biostrings)
6703 ("r-genomeinfodb" ,r-genomeinfodb)
6704 ("r-genomicranges" ,r-genomicranges)
6705 ("r-iranges" ,r-iranges)
6706 ("r-rsamtools" ,r-rsamtools)
6707 ("r-s4vectors" ,r-s4vectors)
6708 ("r-summarizedexperiment" ,r-summarizedexperiment)))
6709 (home-page "http://bioconductor.org/packages/GenomicAlignments")
6710 (synopsis "Representation and manipulation of short genomic alignments")
6711 (description
6712 "This package provides efficient containers for storing and manipulating
6713 short genomic alignments (typically obtained by aligning short reads to a
6714 reference genome). This includes read counting, computing the coverage,
6715 junction detection, and working with the nucleotide content of the
6716 alignments.")
6717 (license license:artistic2.0)))
6718
6719 (define-public r-rtracklayer
6720 (package
6721 (name "r-rtracklayer")
6722 (version "1.34.2")
6723 (source (origin
6724 (method url-fetch)
6725 (uri (bioconductor-uri "rtracklayer" version))
6726 (sha256
6727 (base32
6728 "1j3cyvg1wg1d9l0lkcjk3jn7pb96zi17nd1qsa5lglsimja19mpl"))))
6729 (build-system r-build-system)
6730 (arguments
6731 `(#:phases
6732 (modify-phases %standard-phases
6733 (add-after 'unpack 'use-system-zlib
6734 (lambda _
6735 (substitute* "DESCRIPTION"
6736 (("zlibbioc, ") ""))
6737 (substitute* "NAMESPACE"
6738 (("import\\(zlibbioc\\)") ""))
6739 #t)))))
6740 (inputs
6741 `(("zlib" ,zlib)))
6742 (propagated-inputs
6743 `(("r-biocgenerics" ,r-biocgenerics)
6744 ("r-biostrings" ,r-biostrings)
6745 ("r-genomeinfodb" ,r-genomeinfodb)
6746 ("r-genomicalignments" ,r-genomicalignments)
6747 ("r-genomicranges" ,r-genomicranges)
6748 ("r-iranges" ,r-iranges)
6749 ("r-rcurl" ,r-rcurl)
6750 ("r-rsamtools" ,r-rsamtools)
6751 ("r-s4vectors" ,r-s4vectors)
6752 ("r-xml" ,r-xml)
6753 ("r-xvector" ,r-xvector)))
6754 (home-page "http://bioconductor.org/packages/rtracklayer")
6755 (synopsis "R interface to genome browsers and their annotation tracks")
6756 (description
6757 "rtracklayer is an extensible framework for interacting with multiple
6758 genome browsers (currently UCSC built-in) and manipulating annotation tracks
6759 in various formats (currently GFF, BED, bedGraph, BED15, WIG, BigWig and 2bit
6760 built-in). The user may export/import tracks to/from the supported browsers,
6761 as well as query and modify the browser state, such as the current viewport.")
6762 (license license:artistic2.0)))
6763
6764 (define-public r-genomicfeatures
6765 (package
6766 (name "r-genomicfeatures")
6767 (version "1.26.4")
6768 (source (origin
6769 (method url-fetch)
6770 (uri (bioconductor-uri "GenomicFeatures" version))
6771 (sha256
6772 (base32
6773 "1y16lqach0v3ym5zhdhj4r2imfi0kpa0djlb51hj85yf7xkzwdlb"))))
6774 (properties
6775 `((upstream-name . "GenomicFeatures")))
6776 (build-system r-build-system)
6777 (propagated-inputs
6778 `(("r-annotationdbi" ,r-annotationdbi)
6779 ("r-biobase" ,r-biobase)
6780 ("r-biocgenerics" ,r-biocgenerics)
6781 ("r-biomart" ,r-biomart)
6782 ("r-biostrings" ,r-biostrings)
6783 ("r-dbi" ,r-dbi)
6784 ("r-genomeinfodb" ,r-genomeinfodb)
6785 ("r-genomicranges" ,r-genomicranges)
6786 ("r-iranges" ,r-iranges)
6787 ("r-rcurl" ,r-rcurl)
6788 ("r-rsqlite" ,r-rsqlite)
6789 ("r-rtracklayer" ,r-rtracklayer)
6790 ("r-s4vectors" ,r-s4vectors)
6791 ("r-xvector" ,r-xvector)))
6792 (home-page "http://bioconductor.org/packages/GenomicFeatures")
6793 (synopsis "Tools for working with transcript centric annotations")
6794 (description
6795 "This package provides a set of tools and methods for making and
6796 manipulating transcript centric annotations. With these tools the user can
6797 easily download the genomic locations of the transcripts, exons and cds of a
6798 given organism, from either the UCSC Genome Browser or a BioMart
6799 database (more sources will be supported in the future). This information is
6800 then stored in a local database that keeps track of the relationship between
6801 transcripts, exons, cds and genes. Flexible methods are provided for
6802 extracting the desired features in a convenient format.")
6803 (license license:artistic2.0)))
6804
6805 (define-public r-go-db
6806 (package
6807 (name "r-go-db")
6808 (version "3.4.0")
6809 (source (origin
6810 (method url-fetch)
6811 (uri (string-append "http://www.bioconductor.org/packages/"
6812 "release/data/annotation/src/contrib/GO.db_"
6813 version ".tar.gz"))
6814 (sha256
6815 (base32
6816 "02cj8kqi5w39jwcs8gp1dgj08sah262ppxnkz4h3qd0w191y8yyl"))))
6817 (properties
6818 `((upstream-name . "GO.db")))
6819 (build-system r-build-system)
6820 (propagated-inputs
6821 `(("r-annotationdbi" ,r-annotationdbi)))
6822 (home-page "http://bioconductor.org/packages/GO.db")
6823 (synopsis "Annotation maps describing the entire Gene Ontology")
6824 (description
6825 "The purpose of this GO.db annotation package is to provide detailed
6826 information about the latest version of the Gene Ontologies.")
6827 (license license:artistic2.0)))
6828
6829 (define-public r-graph
6830 (package
6831 (name "r-graph")
6832 (version "1.52.0")
6833 (source (origin
6834 (method url-fetch)
6835 (uri (bioconductor-uri "graph" version))
6836 (sha256
6837 (base32
6838 "0g3dk5vsdp489fmyg8mifczmzgqrjlakkkr8i96dj15gghp3l135"))))
6839 (build-system r-build-system)
6840 (propagated-inputs
6841 `(("r-biocgenerics" ,r-biocgenerics)))
6842 (home-page "http://bioconductor.org/packages/graph")
6843 (synopsis "Handle graph data structures in R")
6844 (description
6845 "This package implements some simple graph handling capabilities for R.")
6846 (license license:artistic2.0)))
6847
6848 (define-public r-topgo
6849 (package
6850 (name "r-topgo")
6851 (version "2.26.0")
6852 (source (origin
6853 (method url-fetch)
6854 (uri (bioconductor-uri "topGO" version))
6855 (sha256
6856 (base32
6857 "0j6sgvam4lk9348ag6pypcbkv93x4fk0di8ivhr23mz2s2yqzwrx"))))
6858 (properties
6859 `((upstream-name . "topGO")))
6860 (build-system r-build-system)
6861 (propagated-inputs
6862 `(("r-annotationdbi" ,r-annotationdbi)
6863 ("r-dbi" ,r-dbi)
6864 ("r-biobase" ,r-biobase)
6865 ("r-biocgenerics" ,r-biocgenerics)
6866 ("r-go-db" ,r-go-db)
6867 ("r-graph" ,r-graph)
6868 ("r-lattice" ,r-lattice)
6869 ("r-matrixstats" ,r-matrixstats)
6870 ("r-sparsem" ,r-sparsem)))
6871 (home-page "http://bioconductor.org/packages/topGO")
6872 (synopsis "Enrichment analysis for gene ontology")
6873 (description
6874 "The topGO package provides tools for testing @dfn{gene ontology} (GO)
6875 terms while accounting for the topology of the GO graph. Different test
6876 statistics and different methods for eliminating local similarities and
6877 dependencies between GO terms can be implemented and applied.")
6878 ;; Any version of the LGPL applies.
6879 (license license:lgpl2.1+)))
6880
6881 (define-public r-bsgenome
6882 (package
6883 (name "r-bsgenome")
6884 (version "1.42.0")
6885 (source (origin
6886 (method url-fetch)
6887 (uri (bioconductor-uri "BSgenome" version))
6888 (sha256
6889 (base32
6890 "0hxwc02h5mzhkrk60d1jmlsfjf0ai9jxdc0128kj1sg4r2k1q94y"))))
6891 (properties
6892 `((upstream-name . "BSgenome")))
6893 (build-system r-build-system)
6894 (propagated-inputs
6895 `(("r-biocgenerics" ,r-biocgenerics)
6896 ("r-biostrings" ,r-biostrings)
6897 ("r-genomeinfodb" ,r-genomeinfodb)
6898 ("r-genomicranges" ,r-genomicranges)
6899 ("r-iranges" ,r-iranges)
6900 ("r-rsamtools" ,r-rsamtools)
6901 ("r-rtracklayer" ,r-rtracklayer)
6902 ("r-s4vectors" ,r-s4vectors)
6903 ("r-xvector" ,r-xvector)))
6904 (home-page "http://bioconductor.org/packages/BSgenome")
6905 (synopsis "Infrastructure for Biostrings-based genome data packages")
6906 (description
6907 "This package provides infrastructure shared by all Biostrings-based
6908 genome data packages and support for efficient SNP representation.")
6909 (license license:artistic2.0)))
6910
6911 (define-public r-bsgenome-hsapiens-1000genomes-hs37d5
6912 (package
6913 (name "r-bsgenome-hsapiens-1000genomes-hs37d5")
6914 (version "0.99.1")
6915 (source (origin
6916 (method url-fetch)
6917 ;; We cannot use bioconductor-uri here because this tarball is
6918 ;; located under "data/annotation/" instead of "bioc/".
6919 (uri (string-append "http://www.bioconductor.org/packages/"
6920 "release/data/annotation/src/contrib/"
6921 "BSgenome.Hsapiens.1000genomes.hs37d5_"
6922 version ".tar.gz"))
6923 (sha256
6924 (base32
6925 "1cg0g5fqmsvwyw2p9hp2yy4ilk21jkbbrnpgqvb5c36ihjwvc7sr"))))
6926 (properties
6927 `((upstream-name . "BSgenome.Hsapiens.1000genomes.hs37d5")))
6928 (build-system r-build-system)
6929 ;; As this package provides little more than a very large data file it
6930 ;; doesn't make sense to build substitutes.
6931 (arguments `(#:substitutable? #f))
6932 (propagated-inputs
6933 `(("r-bsgenome" ,r-bsgenome)))
6934 (home-page
6935 "http://www.bioconductor.org/packages/BSgenome.Hsapiens.1000genomes.hs37d5/")
6936 (synopsis "Full genome sequences for Homo sapiens")
6937 (description
6938 "This package provides full genome sequences for Homo sapiens from
6939 1000genomes phase2 reference genome sequence (hs37d5), based on NCBI GRCh37.")
6940 (license license:artistic2.0)))
6941
6942 (define-public r-impute
6943 (package
6944 (name "r-impute")
6945 (version "1.48.0")
6946 (source (origin
6947 (method url-fetch)
6948 (uri (bioconductor-uri "impute" version))
6949 (sha256
6950 (base32
6951 "1164zvnikbjd0ybdn9xwn520rlmdjd824vmhnl83zgv3v9lzp9bm"))))
6952 (inputs
6953 `(("gfortran" ,gfortran)))
6954 (build-system r-build-system)
6955 (home-page "http://bioconductor.org/packages/impute")
6956 (synopsis "Imputation for microarray data")
6957 (description
6958 "This package provides a function to impute missing gene expression
6959 microarray data, using nearest neighbor averaging.")
6960 (license license:gpl2+)))
6961
6962 (define-public r-seqpattern
6963 (package
6964 (name "r-seqpattern")
6965 (version "1.6.0")
6966 (source (origin
6967 (method url-fetch)
6968 (uri (bioconductor-uri "seqPattern" version))
6969 (sha256
6970 (base32
6971 "0lsa5pz36xapi3yiv78k3z286a5md5sm5g21pgfyg8zmhmkxr7y8"))))
6972 (properties
6973 `((upstream-name . "seqPattern")))
6974 (build-system r-build-system)
6975 (propagated-inputs
6976 `(("r-biostrings" ,r-biostrings)
6977 ("r-genomicranges" ,r-genomicranges)
6978 ("r-iranges" ,r-iranges)
6979 ("r-kernsmooth" ,r-kernsmooth)
6980 ("r-plotrix" ,r-plotrix)))
6981 (home-page "http://bioconductor.org/packages/seqPattern")
6982 (synopsis "Visualising oligonucleotide patterns and motif occurrences")
6983 (description
6984 "This package provides tools to visualize oligonucleotide patterns and
6985 sequence motif occurrences across a large set of sequences centred at a common
6986 reference point and sorted by a user defined feature.")
6987 (license license:gpl3+)))
6988
6989 (define-public r-genomation
6990 (package
6991 (name "r-genomation")
6992 (version "1.6.0")
6993 (source (origin
6994 (method url-fetch)
6995 (uri (bioconductor-uri "genomation" version))
6996 (sha256
6997 (base32
6998 "1m4mz7wihj8yqivwkzw68div8ybk4rjsai3ffki7xp7sh21ax03y"))))
6999 (build-system r-build-system)
7000 (propagated-inputs
7001 `(("r-biostrings" ,r-biostrings)
7002 ("r-bsgenome" ,r-bsgenome)
7003 ("r-data-table" ,r-data-table)
7004 ("r-genomeinfodb" ,r-genomeinfodb)
7005 ("r-genomicalignments" ,r-genomicalignments)
7006 ("r-genomicranges" ,r-genomicranges)
7007 ("r-ggplot2" ,r-ggplot2)
7008 ("r-gridbase" ,r-gridbase)
7009 ("r-impute" ,r-impute)
7010 ("r-iranges" ,r-iranges)
7011 ("r-matrixstats" ,r-matrixstats)
7012 ("r-plotrix" ,r-plotrix)
7013 ("r-plyr" ,r-plyr)
7014 ("r-rcpp" ,r-rcpp)
7015 ("r-readr" ,r-readr)
7016 ("r-reshape2" ,r-reshape2)
7017 ("r-rhtslib" ,r-rhtslib)
7018 ("r-rsamtools" ,r-rsamtools)
7019 ("r-rtracklayer" ,r-rtracklayer)
7020 ("r-runit" ,r-runit)
7021 ("r-s4vectors" ,r-s4vectors)
7022 ("r-seqpattern" ,r-seqpattern)))
7023 (inputs
7024 `(("zlib" ,zlib)))
7025 (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
7026 (synopsis "Summary, annotation and visualization of genomic data")
7027 (description
7028 "This package provides a package for summary and annotation of genomic
7029 intervals. Users can visualize and quantify genomic intervals over
7030 pre-defined functional regions, such as promoters, exons, introns, etc. The
7031 genomic intervals represent regions with a defined chromosome position, which
7032 may be associated with a score, such as aligned reads from HT-seq experiments,
7033 TF binding sites, methylation scores, etc. The package can use any tabular
7034 genomic feature data as long as it has minimal information on the locations of
7035 genomic intervals. In addition, it can use BAM or BigWig files as input.")
7036 (license license:artistic2.0)))
7037
7038 (define-public r-genomationdata
7039 (package
7040 (name "r-genomationdata")
7041 (version "1.6.0")
7042 (source (origin
7043 (method url-fetch)
7044 ;; We cannot use bioconductor-uri here because this tarball is
7045 ;; located under "data/annotation/" instead of "bioc/".
7046 (uri (string-append "https://bioconductor.org/packages/"
7047 "release/data/experiment/src/contrib/"
7048 "genomationData_" version ".tar.gz"))
7049 (sha256
7050 (base32
7051 "16dqwb7wx1igx77zdbcskx5m1hs4g4gp2hl56zzm70hcagnlkz8y"))))
7052 (build-system r-build-system)
7053 ;; As this package provides little more than large data files, it doesn't
7054 ;; make sense to build substitutes.
7055 (arguments `(#:substitutable? #f))
7056 (native-inputs
7057 `(("r-knitr" ,r-knitr)))
7058 (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
7059 (synopsis "Experimental data for use with the genomation package")
7060 (description
7061 "This package contains experimental genetic data for use with the
7062 genomation package. Included are Chip Seq, Methylation and Cage data,
7063 downloaded from Encode.")
7064 (license license:gpl3+)))
7065
7066 (define-public r-org-hs-eg-db
7067 (package
7068 (name "r-org-hs-eg-db")
7069 (version "3.4.0")
7070 (source (origin
7071 (method url-fetch)
7072 ;; We cannot use bioconductor-uri here because this tarball is
7073 ;; located under "data/annotation/" instead of "bioc/".
7074 (uri (string-append "http://www.bioconductor.org/packages/"
7075 "release/data/annotation/src/contrib/"
7076 "org.Hs.eg.db_" version ".tar.gz"))
7077 (sha256
7078 (base32
7079 "19mg64pw8zcvb9yxzzyf7caz1kvdrkfsj1hd84bzq7crrh8kc4y6"))))
7080 (properties
7081 `((upstream-name . "org.Hs.eg.db")))
7082 (build-system r-build-system)
7083 (propagated-inputs
7084 `(("r-annotationdbi" ,r-annotationdbi)))
7085 (home-page "http://www.bioconductor.org/packages/org.Hs.eg.db/")
7086 (synopsis "Genome wide annotation for Human")
7087 (description
7088 "This package provides mappings from Entrez gene identifiers to various
7089 annotations for the human genome.")
7090 (license license:artistic2.0)))
7091
7092 (define-public r-org-ce-eg-db
7093 (package
7094 (name "r-org-ce-eg-db")
7095 (version "3.4.0")
7096 (source (origin
7097 (method url-fetch)
7098 ;; We cannot use bioconductor-uri here because this tarball is
7099 ;; located under "data/annotation/" instead of "bioc/".
7100 (uri (string-append "http://www.bioconductor.org/packages/"
7101 "release/data/annotation/src/contrib/"
7102 "org.Ce.eg.db_" version ".tar.gz"))
7103 (sha256
7104 (base32
7105 "12llfzrrc09kj2wzbisdspv38qzkzgpsbn8kv7qkwg746k3pq436"))))
7106 (properties
7107 `((upstream-name . "org.Ce.eg.db")))
7108 (build-system r-build-system)
7109 (propagated-inputs
7110 `(("r-annotationdbi" ,r-annotationdbi)))
7111 (home-page "http://www.bioconductor.org/packages/org.Ce.eg.db/")
7112 (synopsis "Genome wide annotation for Worm")
7113 (description
7114 "This package provides mappings from Entrez gene identifiers to various
7115 annotations for the genome of the model worm Caenorhabditis elegans.")
7116 (license license:artistic2.0)))
7117
7118 (define-public r-org-dm-eg-db
7119 (package
7120 (name "r-org-dm-eg-db")
7121 (version "3.4.0")
7122 (source (origin
7123 (method url-fetch)
7124 ;; We cannot use bioconductor-uri here because this tarball is
7125 ;; located under "data/annotation/" instead of "bioc/".
7126 (uri (string-append "http://www.bioconductor.org/packages/"
7127 "release/data/annotation/src/contrib/"
7128 "org.Dm.eg.db_" version ".tar.gz"))
7129 (sha256
7130 (base32
7131 "1vzbphbrh1cf7xi5cksia9xy9a9l42js2z2qsajvjxvddiphrb7j"))))
7132 (properties
7133 `((upstream-name . "org.Dm.eg.db")))
7134 (build-system r-build-system)
7135 (propagated-inputs
7136 `(("r-annotationdbi" ,r-annotationdbi)))
7137 (home-page "http://www.bioconductor.org/packages/org.Dm.eg.db/")
7138 (synopsis "Genome wide annotation for Fly")
7139 (description
7140 "This package provides mappings from Entrez gene identifiers to various
7141 annotations for the genome of the model fruit fly Drosophila melanogaster.")
7142 (license license:artistic2.0)))
7143
7144 (define-public r-org-mm-eg-db
7145 (package
7146 (name "r-org-mm-eg-db")
7147 (version "3.4.0")
7148 (source (origin
7149 (method url-fetch)
7150 ;; We cannot use bioconductor-uri here because this tarball is
7151 ;; located under "data/annotation/" instead of "bioc/".
7152 (uri (string-append "http://www.bioconductor.org/packages/"
7153 "release/data/annotation/src/contrib/"
7154 "org.Mm.eg.db_" version ".tar.gz"))
7155 (sha256
7156 (base32
7157 "1lykjqjaf01fmgg3cvfcvwd5xjq6zc5vbxnm5r4l32fzvl89q50c"))))
7158 (properties
7159 `((upstream-name . "org.Mm.eg.db")))
7160 (build-system r-build-system)
7161 (propagated-inputs
7162 `(("r-annotationdbi" ,r-annotationdbi)))
7163 (home-page "http://www.bioconductor.org/packages/org.Mm.eg.db/")
7164 (synopsis "Genome wide annotation for Mouse")
7165 (description
7166 "This package provides mappings from Entrez gene identifiers to various
7167 annotations for the genome of the model mouse Mus musculus.")
7168 (license license:artistic2.0)))
7169
7170 (define-public r-seqlogo
7171 (package
7172 (name "r-seqlogo")
7173 (version "1.40.0")
7174 (source
7175 (origin
7176 (method url-fetch)
7177 (uri (bioconductor-uri "seqLogo" version))
7178 (sha256
7179 (base32
7180 "18bajdl75h3039559d81rgllqqvnq8ygsfxfx081xphxs0v6xggy"))))
7181 (properties `((upstream-name . "seqLogo")))
7182 (build-system r-build-system)
7183 (home-page "http://bioconductor.org/packages/seqLogo")
7184 (synopsis "Sequence logos for DNA sequence alignments")
7185 (description
7186 "seqLogo takes the position weight matrix of a DNA sequence motif and
7187 plots the corresponding sequence logo as introduced by Schneider and
7188 Stephens (1990).")
7189 (license license:lgpl2.0+)))
7190
7191 (define-public r-bsgenome-hsapiens-ucsc-hg19
7192 (package
7193 (name "r-bsgenome-hsapiens-ucsc-hg19")
7194 (version "1.4.0")
7195 (source (origin
7196 (method url-fetch)
7197 ;; We cannot use bioconductor-uri here because this tarball is
7198 ;; located under "data/annotation/" instead of "bioc/".
7199 (uri (string-append "http://www.bioconductor.org/packages/"
7200 "release/data/annotation/src/contrib/"
7201 "BSgenome.Hsapiens.UCSC.hg19_"
7202 version ".tar.gz"))
7203 (sha256
7204 (base32
7205 "1y0nqpk8cw5a34sd9hmin3z4v7iqm6hf6l22cl81vlbxqbjibxc8"))))
7206 (properties
7207 `((upstream-name . "BSgenome.Hsapiens.UCSC.hg19")))
7208 (build-system r-build-system)
7209 ;; As this package provides little more than a very large data file it
7210 ;; doesn't make sense to build substitutes.
7211 (arguments `(#:substitutable? #f))
7212 (propagated-inputs
7213 `(("r-bsgenome" ,r-bsgenome)))
7214 (home-page
7215 "http://www.bioconductor.org/packages/BSgenome.Hsapiens.UCSC.hg19/")
7216 (synopsis "Full genome sequences for Homo sapiens")
7217 (description
7218 "This package provides full genome sequences for Homo sapiens as provided
7219 by UCSC (hg19, February 2009) and stored in Biostrings objects.")
7220 (license license:artistic2.0)))
7221
7222 (define-public r-bsgenome-mmusculus-ucsc-mm9
7223 (package
7224 (name "r-bsgenome-mmusculus-ucsc-mm9")
7225 (version "1.4.0")
7226 (source (origin
7227 (method url-fetch)
7228 ;; We cannot use bioconductor-uri here because this tarball is
7229 ;; located under "data/annotation/" instead of "bioc/".
7230 (uri (string-append "http://www.bioconductor.org/packages/"
7231 "release/data/annotation/src/contrib/"
7232 "BSgenome.Mmusculus.UCSC.mm9_"
7233 version ".tar.gz"))
7234 (sha256
7235 (base32
7236 "1birqw30g2azimxpnjfzmkphan7x131yy8b9h85lfz5fjdg7841i"))))
7237 (properties
7238 `((upstream-name . "BSgenome.Mmusculus.UCSC.mm9")))
7239 (build-system r-build-system)
7240 ;; As this package provides little more than a very large data file it
7241 ;; doesn't make sense to build substitutes.
7242 (arguments `(#:substitutable? #f))
7243 (propagated-inputs
7244 `(("r-bsgenome" ,r-bsgenome)))
7245 (home-page
7246 "http://www.bioconductor.org/packages/BSgenome.Mmusculus.UCSC.mm9/")
7247 (synopsis "Full genome sequences for Mouse")
7248 (description
7249 "This package provides full genome sequences for Mus musculus (Mouse) as
7250 provided by UCSC (mm9, July 2007) and stored in Biostrings objects.")
7251 (license license:artistic2.0)))
7252
7253 (define-public r-bsgenome-mmusculus-ucsc-mm10
7254 (package
7255 (name "r-bsgenome-mmusculus-ucsc-mm10")
7256 (version "1.4.0")
7257 (source (origin
7258 (method url-fetch)
7259 ;; We cannot use bioconductor-uri here because this tarball is
7260 ;; located under "data/annotation/" instead of "bioc/".
7261 (uri (string-append "http://www.bioconductor.org/packages/"
7262 "release/data/annotation/src/contrib/"
7263 "BSgenome.Mmusculus.UCSC.mm10_"
7264 version ".tar.gz"))
7265 (sha256
7266 (base32
7267 "12s0nm2na9brjad4rn9l7d3db2aj8qa1xvz0y1k7gk08wayb6bkf"))))
7268 (properties
7269 `((upstream-name . "BSgenome.Mmusculus.UCSC.mm10")))
7270 (build-system r-build-system)
7271 ;; As this package provides little more than a very large data file it
7272 ;; doesn't make sense to build substitutes.
7273 (arguments `(#:substitutable? #f))
7274 (propagated-inputs
7275 `(("r-bsgenome" ,r-bsgenome)))
7276 (home-page
7277 "http://www.bioconductor.org/packages/BSgenome.Mmusculus.UCSC.mm10/")
7278 (synopsis "Full genome sequences for Mouse")
7279 (description
7280 "This package provides full genome sequences for Mus
7281 musculus (Mouse) as provided by UCSC (mm10, December 2011) and stored
7282 in Biostrings objects.")
7283 (license license:artistic2.0)))
7284
7285 (define-public r-txdb-mmusculus-ucsc-mm10-knowngene
7286 (package
7287 (name "r-txdb-mmusculus-ucsc-mm10-knowngene")
7288 (version "3.4.0")
7289 (source (origin
7290 (method url-fetch)
7291 ;; We cannot use bioconductor-uri here because this tarball is
7292 ;; located under "data/annotation/" instead of "bioc/".
7293 (uri (string-append "http://www.bioconductor.org/packages/"
7294 "release/data/annotation/src/contrib/"
7295 "TxDb.Mmusculus.UCSC.mm10.knownGene_"
7296 version ".tar.gz"))
7297 (sha256
7298 (base32
7299 "08gava9wsvpcqz51k2sni3pj03n5155v32d9riqbf305nbirqbkb"))))
7300 (properties
7301 `((upstream-name . "TxDb.Mmusculus.UCSC.mm10.knownGene")))
7302 (build-system r-build-system)
7303 ;; As this package provides little more than a very large data file it
7304 ;; doesn't make sense to build substitutes.
7305 (arguments `(#:substitutable? #f))
7306 (propagated-inputs
7307 `(("r-bsgenome" ,r-bsgenome)
7308 ("r-genomicfeatures" ,r-genomicfeatures)
7309 ("r-annotationdbi" ,r-annotationdbi)))
7310 (home-page
7311 "http://bioconductor.org/packages/TxDb.Mmusculus.UCSC.mm10.knownGene/")
7312 (synopsis "Annotation package for TxDb knownGene object(s) for Mouse")
7313 (description
7314 "This package loads a TxDb object, which is an R interface to
7315 prefabricated databases contained in this package. This package provides
7316 the TxDb object of Mouse data as provided by UCSC (mm10, December 2011)
7317 based on the knownGene track.")
7318 (license license:artistic2.0)))
7319
7320 (define-public r-bsgenome-celegans-ucsc-ce6
7321 (package
7322 (name "r-bsgenome-celegans-ucsc-ce6")
7323 (version "1.4.0")
7324 (source (origin
7325 (method url-fetch)
7326 ;; We cannot use bioconductor-uri here because this tarball is
7327 ;; located under "data/annotation/" instead of "bioc/".
7328 (uri (string-append "http://www.bioconductor.org/packages/"
7329 "release/data/annotation/src/contrib/"
7330 "BSgenome.Celegans.UCSC.ce6_"
7331 version ".tar.gz"))
7332 (sha256
7333 (base32
7334 "0mqzb353xv2c3m3vkb315dkmnxkgczp7ndnknyhpgjlybyf715v9"))))
7335 (properties
7336 `((upstream-name . "BSgenome.Celegans.UCSC.ce6")))
7337 (build-system r-build-system)
7338 ;; As this package provides little more than a very large data file it
7339 ;; doesn't make sense to build substitutes.
7340 (arguments `(#:substitutable? #f))
7341 (propagated-inputs
7342 `(("r-bsgenome" ,r-bsgenome)))
7343 (home-page
7344 "http://www.bioconductor.org/packages/BSgenome.Celegans.UCSC.ce6/")
7345 (synopsis "Full genome sequences for Worm")
7346 (description
7347 "This package provides full genome sequences for Caenorhabditis
7348 elegans (Worm) as provided by UCSC (ce6, May 2008) and stored in Biostrings
7349 objects.")
7350 (license license:artistic2.0)))
7351
7352 (define-public r-bsgenome-celegans-ucsc-ce10
7353 (package
7354 (name "r-bsgenome-celegans-ucsc-ce10")
7355 (version "1.4.0")
7356 (source (origin
7357 (method url-fetch)
7358 ;; We cannot use bioconductor-uri here because this tarball is
7359 ;; located under "data/annotation/" instead of "bioc/".
7360 (uri (string-append "http://www.bioconductor.org/packages/"
7361 "release/data/annotation/src/contrib/"
7362 "BSgenome.Celegans.UCSC.ce10_"
7363 version ".tar.gz"))
7364 (sha256
7365 (base32
7366 "1zaym97jk4npxk14ifvwz2rvhm4zx9xgs33r9vvx9rlynp0gydrk"))))
7367 (properties
7368 `((upstream-name . "BSgenome.Celegans.UCSC.ce10")))
7369 (build-system r-build-system)
7370 ;; As this package provides little more than a very large data file it
7371 ;; doesn't make sense to build substitutes.
7372 (arguments `(#:substitutable? #f))
7373 (propagated-inputs
7374 `(("r-bsgenome" ,r-bsgenome)))
7375 (home-page
7376 "http://www.bioconductor.org/packages/BSgenome.Celegans.UCSC.ce10/")
7377 (synopsis "Full genome sequences for Worm")
7378 (description
7379 "This package provides full genome sequences for Caenorhabditis
7380 elegans (Worm) as provided by UCSC (ce10, Oct 2010) and stored in Biostrings
7381 objects.")
7382 (license license:artistic2.0)))
7383
7384 (define-public r-bsgenome-dmelanogaster-ucsc-dm3
7385 (package
7386 (name "r-bsgenome-dmelanogaster-ucsc-dm3")
7387 (version "1.4.0")
7388 (source (origin
7389 (method url-fetch)
7390 ;; We cannot use bioconductor-uri here because this tarball is
7391 ;; located under "data/annotation/" instead of "bioc/".
7392 (uri (string-append "http://www.bioconductor.org/packages/"
7393 "release/data/annotation/src/contrib/"
7394 "BSgenome.Dmelanogaster.UCSC.dm3_"
7395 version ".tar.gz"))
7396 (sha256
7397 (base32
7398 "19bm3lkhhkag3gnwp419211fh0cnr0x6fa0r1lr0ycwrikxdxsv8"))))
7399 (properties
7400 `((upstream-name . "BSgenome.Dmelanogaster.UCSC.dm3")))
7401 (build-system r-build-system)
7402 ;; As this package provides little more than a very large data file it
7403 ;; doesn't make sense to build substitutes.
7404 (arguments `(#:substitutable? #f))
7405 (propagated-inputs
7406 `(("r-bsgenome" ,r-bsgenome)))
7407 (home-page
7408 "http://www.bioconductor.org/packages/BSgenome.Dmelanogaster.UCSC.dm3/")
7409 (synopsis "Full genome sequences for Fly")
7410 (description
7411 "This package provides full genome sequences for Drosophila
7412 melanogaster (Fly) as provided by UCSC (dm3, April 2006) and stored in
7413 Biostrings objects.")
7414 (license license:artistic2.0)))
7415
7416 (define-public r-motifrg
7417 (package
7418 (name "r-motifrg")
7419 (version "1.18.0")
7420 (source
7421 (origin
7422 (method url-fetch)
7423 (uri (bioconductor-uri "motifRG" version))
7424 (sha256
7425 (base32
7426 "1pa97aj6c5f3gx4bgriw110764dj3m9h104ddi8rv2bpy41yd98d"))))
7427 (properties `((upstream-name . "motifRG")))
7428 (build-system r-build-system)
7429 (propagated-inputs
7430 `(("r-biostrings" ,r-biostrings)
7431 ("r-bsgenome" ,r-bsgenome)
7432 ("r-bsgenome.hsapiens.ucsc.hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
7433 ("r-iranges" ,r-iranges)
7434 ("r-seqlogo" ,r-seqlogo)
7435 ("r-xvector" ,r-xvector)))
7436 (home-page "http://bioconductor.org/packages/motifRG")
7437 (synopsis "Discover motifs in high throughput sequencing data")
7438 (description
7439 "This package provides tools for discriminative motif discovery in high
7440 throughput genetic sequencing data sets using regression methods.")
7441 (license license:artistic2.0)))
7442
7443 (define-public r-qtl
7444 (package
7445 (name "r-qtl")
7446 (version "1.40-8")
7447 (source
7448 (origin
7449 (method url-fetch)
7450 (uri (string-append "mirror://cran/src/contrib/qtl_"
7451 version ".tar.gz"))
7452 (sha256
7453 (base32
7454 "05bj1x2ry0i7yqiydlswb3d2h4pxg70z8w1072az1mrv1m54k8sp"))))
7455 (build-system r-build-system)
7456 (home-page "http://rqtl.org/")
7457 (synopsis "R package for analyzing QTL experiments in genetics")
7458 (description "R/qtl is an extension library for the R statistics
7459 system. It is used to analyze experimental crosses for identifying
7460 genes contributing to variation in quantitative traits (so-called
7461 quantitative trait loci, QTLs).
7462
7463 Using a hidden Markov model, R/qtl allows to estimate genetic maps, to
7464 identify genotyping errors, and to perform single-QTL and two-QTL,
7465 two-dimensional genome scans.")
7466 (license license:gpl3)))
7467
7468 (define-public r-zlibbioc
7469 (package
7470 (name "r-zlibbioc")
7471 (version "1.20.0")
7472 (source (origin
7473 (method url-fetch)
7474 (uri (bioconductor-uri "zlibbioc" version))
7475 (sha256
7476 (base32
7477 "0hbk90q5hl0fycfvy5nxxa4hxgglag9lzp7i0fg849bqygg5nbyq"))))
7478 (properties
7479 `((upstream-name . "zlibbioc")))
7480 (build-system r-build-system)
7481 (home-page "https://bioconductor.org/packages/zlibbioc")
7482 (synopsis "Provider for zlib-1.2.5 to R packages")
7483 (description "This package uses the source code of zlib-1.2.5 to create
7484 libraries for systems that do not have these available via other means.")
7485 (license license:artistic2.0)))
7486
7487 (define-public r-r4rna
7488 (package
7489 (name "r-r4rna")
7490 (version "0.1.4")
7491 (source
7492 (origin
7493 (method url-fetch)
7494 (uri (string-append "http://www.e-rna.org/r-chie/files/R4RNA_"
7495 version ".tar.gz"))
7496 (sha256
7497 (base32
7498 "1p0i78wh76jfgmn9jphbwwaz6yy6pipzfg08xs54cxavxg2j81p5"))))
7499 (build-system r-build-system)
7500 (propagated-inputs
7501 `(("r-optparse" ,r-optparse)
7502 ("r-rcolorbrewer" ,r-rcolorbrewer)))
7503 (home-page "http://www.e-rna.org/r-chie/index.cgi")
7504 (synopsis "Analysis framework for RNA secondary structure")
7505 (description
7506 "The R4RNA package aims to be a general framework for the analysis of RNA
7507 secondary structure and comparative analysis in R.")
7508 (license license:gpl3+)))
7509
7510 (define-public r-rhtslib
7511 (package
7512 (name "r-rhtslib")
7513 (version "1.6.0")
7514 (source
7515 (origin
7516 (method url-fetch)
7517 (uri (bioconductor-uri "Rhtslib" version))
7518 (sha256
7519 (base32
7520 "1vk3ng61dhi3pbia1lp3gl3mlr3i1vb2lkq83qb53i9dzz128wh9"))))
7521 (properties `((upstream-name . "Rhtslib")))
7522 (build-system r-build-system)
7523 (propagated-inputs
7524 `(("r-zlibbioc" ,r-zlibbioc)))
7525 (inputs
7526 `(("zlib" ,zlib)))
7527 (native-inputs
7528 `(("autoconf" ,autoconf)))
7529 (home-page "https://github.com/nhayden/Rhtslib")
7530 (synopsis "High-throughput sequencing library as an R package")
7531 (description
7532 "This package provides the HTSlib C library for high-throughput
7533 nucleotide sequence analysis. The package is primarily useful to developers
7534 of other R packages who wish to make use of HTSlib.")
7535 (license license:lgpl2.0+)))
7536
7537 (define-public r-bamsignals
7538 (package
7539 (name "r-bamsignals")
7540 (version "1.6.0")
7541 (source
7542 (origin
7543 (method url-fetch)
7544 (uri (bioconductor-uri "bamsignals" version))
7545 (sha256
7546 (base32
7547 "1k42gvk5mgq4la1fp0in3an2zfdz69h6522jsqhmk0f6i75kg4mb"))))
7548 (build-system r-build-system)
7549 (propagated-inputs
7550 `(("r-biocgenerics" ,r-biocgenerics)
7551 ("r-genomicranges" ,r-genomicranges)
7552 ("r-iranges" ,r-iranges)
7553 ("r-rcpp" ,r-rcpp)
7554 ("r-rhtslib" ,r-rhtslib)
7555 ("r-zlibbioc" ,r-zlibbioc)))
7556 (inputs
7557 `(("zlib" ,zlib)))
7558 (home-page "http://bioconductor.org/packages/bamsignals")
7559 (synopsis "Extract read count signals from bam files")
7560 (description
7561 "This package allows to efficiently obtain count vectors from indexed bam
7562 files. It counts the number of nucleotide sequence reads in given genomic
7563 ranges and it computes reads profiles and coverage profiles. It also handles
7564 paired-end data.")
7565 (license license:gpl2+)))
7566
7567 (define-public r-rcas
7568 (package
7569 (name "r-rcas")
7570 (version "1.1.1")
7571 (source (origin
7572 (method url-fetch)
7573 (uri (string-append "https://github.com/BIMSBbioinfo/RCAS/archive/v"
7574 version ".tar.gz"))
7575 (file-name (string-append name "-" version ".tar.gz"))
7576 (sha256
7577 (base32
7578 "1hd0r66556bxbdd82ksjklq7nfli36l4k6y88ic7kkg9873wa1nw"))))
7579 (build-system r-build-system)
7580 (native-inputs
7581 `(("r-knitr" ,r-knitr)
7582 ("r-testthat" ,r-testthat)
7583 ;; During vignette building knitr checks that "pandoc-citeproc"
7584 ;; is in the PATH.
7585 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)))
7586 (propagated-inputs
7587 `(("r-data-table" ,r-data-table)
7588 ("r-biomart" ,r-biomart)
7589 ("r-org-hs-eg-db" ,r-org-hs-eg-db)
7590 ("r-org-ce-eg-db" ,r-org-ce-eg-db)
7591 ("r-org-dm-eg-db" ,r-org-dm-eg-db)
7592 ("r-org-mm-eg-db" ,r-org-mm-eg-db)
7593 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
7594 ("r-bsgenome-mmusculus-ucsc-mm9" ,r-bsgenome-mmusculus-ucsc-mm9)
7595 ("r-bsgenome-celegans-ucsc-ce10" ,r-bsgenome-celegans-ucsc-ce10)
7596 ("r-bsgenome-dmelanogaster-ucsc-dm3" ,r-bsgenome-dmelanogaster-ucsc-dm3)
7597 ("r-topgo" ,r-topgo)
7598 ("r-dt" ,r-dt)
7599 ("r-plotly" ,r-plotly)
7600 ("r-plotrix" ,r-plotrix)
7601 ("r-motifrg" ,r-motifrg)
7602 ("r-genomation" ,r-genomation)
7603 ("r-genomicfeatures" ,r-genomicfeatures)
7604 ("r-rtracklayer" ,r-rtracklayer)
7605 ("r-rmarkdown" ,r-rmarkdown)))
7606 (synopsis "RNA-centric annotation system")
7607 (description
7608 "RCAS aims to be a standalone RNA-centric annotation system that provides
7609 intuitive reports and publication-ready graphics. This package provides the R
7610 library implementing most of the pipeline's features.")
7611 (home-page "https://github.com/BIMSBbioinfo/RCAS")
7612 (license license:expat)))
7613
7614 (define-public rcas-web
7615 (package
7616 (name "rcas-web")
7617 (version "0.0.3")
7618 (source
7619 (origin
7620 (method url-fetch)
7621 (uri (string-append "https://github.com/BIMSBbioinfo/rcas-web/"
7622 "releases/download/v" version
7623 "/rcas-web-" version ".tar.gz"))
7624 (sha256
7625 (base32
7626 "0d3my0g8i7js59n184zzzjdki7hgmhpi4rhfvk7i6jsw01ba04qq"))))
7627 (build-system gnu-build-system)
7628 (arguments
7629 `(#:phases
7630 (modify-phases %standard-phases
7631 (add-after 'install 'wrap-executable
7632 (lambda* (#:key inputs outputs #:allow-other-keys)
7633 (let* ((out (assoc-ref outputs "out"))
7634 (json (assoc-ref inputs "guile-json"))
7635 (redis (assoc-ref inputs "guile-redis"))
7636 (path (string-append
7637 json "/share/guile/site/2.2:"
7638 redis "/share/guile/site/2.2")))
7639 (wrap-program (string-append out "/bin/rcas-web")
7640 `("GUILE_LOAD_PATH" ":" = (,path))
7641 `("GUILE_LOAD_COMPILED_PATH" ":" = (,path))
7642 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
7643 #t)))))
7644 (inputs
7645 `(("r-minimal" ,r-minimal)
7646 ("r-rcas" ,r-rcas)
7647 ("guile-next" ,guile-2.2)
7648 ("guile-json" ,guile2.2-json)
7649 ("guile-redis" ,guile2.2-redis)))
7650 (native-inputs
7651 `(("pkg-config" ,pkg-config)))
7652 (home-page "https://github.com/BIMSBbioinfo/rcas-web")
7653 (synopsis "Web interface for RNA-centric annotation system (RCAS)")
7654 (description "This package provides a simple web interface for the
7655 @dfn{RNA-centric annotation system} (RCAS).")
7656 (license license:agpl3+)))
7657
7658 (define-public r-mutationalpatterns
7659 (package
7660 (name "r-mutationalpatterns")
7661 (version "1.0.0")
7662 (source
7663 (origin
7664 (method url-fetch)
7665 (uri (bioconductor-uri "MutationalPatterns" version))
7666 (sha256
7667 (base32
7668 "1a3c2bm0xx0q4gf98jiw74msmdf2fr8rbsdysd5ww9kqlzmsbr17"))))
7669 (build-system r-build-system)
7670 (propagated-inputs
7671 `(("r-biocgenerics" ,r-biocgenerics)
7672 ("r-biostrings" ,r-biostrings)
7673 ("r-genomicranges" ,r-genomicranges)
7674 ("r-genomeinfodb" ,r-genomeinfodb)
7675 ("r-ggplot2" ,r-ggplot2)
7676 ("r-gridextra" ,r-gridextra)
7677 ("r-iranges" ,r-iranges)
7678 ("r-nmf" ,r-nmf)
7679 ("r-plyr" ,r-plyr)
7680 ("r-pracma" ,r-pracma)
7681 ("r-reshape2" ,r-reshape2)
7682 ("r-summarizedexperiment" ,r-summarizedexperiment)
7683 ("r-variantannotation" ,r-variantannotation)))
7684 (home-page "http://bioconductor.org/packages/MutationalPatterns/")
7685 (synopsis "Extract and visualize mutational patterns in genomic data")
7686 (description "This package provides an extensive toolset for the
7687 characterization and visualization of a wide range of mutational patterns
7688 in SNV base substitution data.")
7689 (license license:expat)))
7690
7691 (define-public r-wgcna
7692 (package
7693 (name "r-wgcna")
7694 (version "1.51")
7695 (source
7696 (origin
7697 (method url-fetch)
7698 (uri (cran-uri "WGCNA" version))
7699 (sha256
7700 (base32
7701 "0hzvnhw76vwg8bl8x368f0c5szpwb8323bmrb3bir93i5bmfjsxx"))))
7702 (properties `((upstream-name . "WGCNA")))
7703 (build-system r-build-system)
7704 (propagated-inputs
7705 `(("r-annotationdbi" ,r-annotationdbi)
7706 ("r-doparallel" ,r-doparallel)
7707 ("r-dynamictreecut" ,r-dynamictreecut)
7708 ("r-fastcluster" ,r-fastcluster)
7709 ("r-foreach" ,r-foreach)
7710 ("r-go-db" ,r-go-db)
7711 ("r-hmisc" ,r-hmisc)
7712 ("r-impute" ,r-impute)
7713 ("r-matrixstats" ,r-matrixstats)
7714 ("r-preprocesscore" ,r-preprocesscore)))
7715 (home-page
7716 "http://www.genetics.ucla.edu/labs/horvath/CoexpressionNetwork/Rpackages/WGCNA/")
7717 (synopsis "Weighted correlation network analysis")
7718 (description
7719 "This package provides functions necessary to perform Weighted
7720 Correlation Network Analysis on high-dimensional data. It includes functions
7721 for rudimentary data cleaning, construction and summarization of correlation
7722 networks, module identification and functions for relating both variables and
7723 modules to sample traits. It also includes a number of utility functions for
7724 data manipulation and visualization.")
7725 (license license:gpl2+)))
7726
7727 (define-public r-chipkernels
7728 (let ((commit "c9cfcacb626b1221094fb3490ea7bac0fd625372")
7729 (revision "1"))
7730 (package
7731 (name "r-chipkernels")
7732 (version (string-append "1.1-" revision "." (string-take commit 9)))
7733 (source
7734 (origin
7735 (method git-fetch)
7736 (uri (git-reference
7737 (url "https://github.com/ManuSetty/ChIPKernels.git")
7738 (commit commit)))
7739 (file-name (string-append name "-" version))
7740 (sha256
7741 (base32
7742 "14bj5qhjm1hsm9ay561nfbqi9wxsa7y487df2idsaaf6z10nw4v0"))))
7743 (build-system r-build-system)
7744 (propagated-inputs
7745 `(("r-iranges" ,r-iranges)
7746 ("r-xvector" ,r-xvector)
7747 ("r-biostrings" ,r-biostrings)
7748 ("r-bsgenome" ,r-bsgenome)
7749 ("r-gtools" ,r-gtools)
7750 ("r-genomicranges" ,r-genomicranges)
7751 ("r-sfsmisc" ,r-sfsmisc)
7752 ("r-kernlab" ,r-kernlab)
7753 ("r-s4vectors" ,r-s4vectors)
7754 ("r-biocgenerics" ,r-biocgenerics)))
7755 (home-page "https://github.com/ManuSetty/ChIPKernels")
7756 (synopsis "Build string kernels for DNA Sequence analysis")
7757 (description "ChIPKernels is an R package for building different string
7758 kernels used for DNA Sequence analysis. A dictionary of the desired kernel
7759 must be built and this dictionary can be used for determining kernels for DNA
7760 Sequences.")
7761 (license license:gpl2+))))
7762
7763 (define-public r-seqgl
7764 (package
7765 (name "r-seqgl")
7766 (version "1.1.4")
7767 (source
7768 (origin
7769 (method url-fetch)
7770 (uri (string-append "https://github.com/ManuSetty/SeqGL/"
7771 "archive/" version ".tar.gz"))
7772 (file-name (string-append name "-" version ".tar.gz"))
7773 (sha256
7774 (base32
7775 "0pnk1p3sci5yipyc8xnb6jbmydpl80fld927xgnbcv104hy8h8yh"))))
7776 (build-system r-build-system)
7777 (propagated-inputs
7778 `(("r-biostrings" ,r-biostrings)
7779 ("r-chipkernels" ,r-chipkernels)
7780 ("r-genomicranges" ,r-genomicranges)
7781 ("r-spams" ,r-spams)
7782 ("r-wgcna" ,r-wgcna)
7783 ("r-fastcluster" ,r-fastcluster)))
7784 (home-page "https://github.com/ManuSetty/SeqGL")
7785 (synopsis "Group lasso for Dnase/ChIP-seq data")
7786 (description "SeqGL is a group lasso based algorithm to extract
7787 transcription factor sequence signals from ChIP, DNase and ATAC-seq profiles.
7788 This package presents a method which uses group lasso to discriminate between
7789 bound and non bound genomic regions to accurately identify transcription
7790 factors bound at the specific regions.")
7791 (license license:gpl2+)))
7792
7793 (define-public r-gkmsvm
7794 (package
7795 (name "r-gkmsvm")
7796 (version "0.71.0")
7797 (source
7798 (origin
7799 (method url-fetch)
7800 (uri (cran-uri "gkmSVM" version))
7801 (sha256
7802 (base32
7803 "1zpxgxmf2nd5j5wn00ps6kfxr8wxh7d1swr1rr4spq7sj5z5z0k0"))))
7804 (properties `((upstream-name . "gkmSVM")))
7805 (build-system r-build-system)
7806 (propagated-inputs
7807 `(("r-biocgenerics" ,r-biocgenerics)
7808 ("r-biostrings" ,r-biostrings)
7809 ("r-genomeinfodb" ,r-genomeinfodb)
7810 ("r-genomicranges" ,r-genomicranges)
7811 ("r-iranges" ,r-iranges)
7812 ("r-kernlab" ,r-kernlab)
7813 ("r-rcpp" ,r-rcpp)
7814 ("r-rocr" ,r-rocr)
7815 ("r-rtracklayer" ,r-rtracklayer)
7816 ("r-s4vectors" ,r-s4vectors)
7817 ("r-seqinr" ,r-seqinr)))
7818 (home-page "http://cran.r-project.org/web/packages/gkmSVM")
7819 (synopsis "Gapped-kmer support vector machine")
7820 (description
7821 "This R package provides tools for training gapped-kmer SVM classifiers
7822 for DNA and protein sequences. This package supports several sequence
7823 kernels, including: gkmSVM, kmer-SVM, mismatch kernel and wildcard kernel.")
7824 (license license:gpl2+)))
7825
7826 (define-public r-tximport
7827 (package
7828 (name "r-tximport")
7829 (version "1.2.0")
7830 (source (origin
7831 (method url-fetch)
7832 (uri (bioconductor-uri "tximport" version))
7833 (sha256
7834 (base32
7835 "1k5a7dad6zqg936s17f6cmwgqp11x24z9zhxndsgwbscgpyhpcb0"))))
7836 (build-system r-build-system)
7837 (home-page "http://bioconductor.org/packages/tximport")
7838 (synopsis "Import and summarize transcript-level estimates for gene-level analysis")
7839 (description
7840 "This package provides tools to import transcript-level abundance,
7841 estimated counts and transcript lengths, and to summarize them into matrices
7842 for use with downstream gene-level analysis packages. Average transcript
7843 length, weighted by sample-specific transcript abundance estimates, is
7844 provided as a matrix which can be used as an offset for different expression
7845 of gene-level counts.")
7846 (license license:gpl2+)))
7847
7848 (define-public r-rhdf5
7849 (package
7850 (name "r-rhdf5")
7851 (version "2.18.0")
7852 (source (origin
7853 (method url-fetch)
7854 (uri (bioconductor-uri "rhdf5" version))
7855 (sha256
7856 (base32
7857 "0pb04li55ysag30s7rap7nnivc0rqmgsmpj43kin0rxdabfn1w0k"))))
7858 (build-system r-build-system)
7859 (arguments
7860 `(#:phases
7861 (modify-phases %standard-phases
7862 (add-after 'unpack 'unpack-smallhdf5
7863 (lambda* (#:key outputs #:allow-other-keys)
7864 (system* "tar" "-xzvf"
7865 "src/hdf5source/hdf5small.tgz" "-C" "src/" )
7866 (substitute* "src/Makevars"
7867 (("^.*cd hdf5source &&.*$") "")
7868 (("^.*gunzip -dc hdf5small.tgz.*$") "")
7869 (("^.*rm -rf hdf5.*$") "")
7870 (("^.*mv hdf5source/hdf5 ..*$") ""))
7871 (substitute* "src/hdf5/configure"
7872 (("/bin/mv") "mv"))
7873 #t)))))
7874 (propagated-inputs
7875 `(("r-zlibbioc" ,r-zlibbioc)))
7876 (inputs
7877 `(("perl" ,perl)
7878 ("zlib" ,zlib)))
7879 (home-page "http://bioconductor.org/packages/rhdf5")
7880 (synopsis "HDF5 interface to R")
7881 (description
7882 "This R/Bioconductor package provides an interface between HDF5 and R.
7883 HDF5's main features are the ability to store and access very large and/or
7884 complex datasets and a wide variety of metadata on mass storage (disk) through
7885 a completely portable file format. The rhdf5 package is thus suited for the
7886 exchange of large and/or complex datasets between R and other software
7887 package, and for letting R applications work on datasets that are larger than
7888 the available RAM.")
7889 (license license:artistic2.0)))
7890
7891 (define-public emboss
7892 (package
7893 (name "emboss")
7894 (version "6.5.7")
7895 (source (origin
7896 (method url-fetch)
7897 (uri (string-append "ftp://emboss.open-bio.org/pub/EMBOSS/old/"
7898 (version-major+minor version) ".0/"
7899 "EMBOSS-" version ".tar.gz"))
7900 (sha256
7901 (base32
7902 "0vsmz96gc411yj2iyzdrsmg4l2n1nhgmp7vrgzlxx3xixv9xbf0q"))))
7903 (build-system gnu-build-system)
7904 (arguments
7905 `(#:configure-flags
7906 (list (string-append "--with-hpdf="
7907 (assoc-ref %build-inputs "libharu")))
7908 #:phases
7909 (modify-phases %standard-phases
7910 (add-after 'unpack 'fix-checks
7911 (lambda _
7912 ;; The PNGDRIVER tests check for the presence of libgd, libpng
7913 ;; and zlib, but assume that they are all found at the same
7914 ;; prefix.
7915 (substitute* "configure.in"
7916 (("CHECK_PNGDRIVER")
7917 "LIBS=\"$LIBS -lgd -lpng -lz -lm\"
7918 AC_DEFINE([PLD_png], [1], [Define to 1 if PNG support is available])
7919 AM_CONDITIONAL(AMPNG, true)"))
7920 #t))
7921 (add-after 'unpack 'disable-update-check
7922 (lambda _
7923 ;; At build time there is no connection to the Internet, so
7924 ;; looking for updates will not work.
7925 (substitute* "Makefile.am"
7926 (("\\$\\(bindir\\)/embossupdate") ""))
7927 #t))
7928 (add-before 'configure 'autogen
7929 (lambda _ (zero? (system* "autoreconf" "-vif")))))))
7930 (inputs
7931 `(("perl" ,perl)
7932 ("libpng" ,libpng)
7933 ("gd" ,gd)
7934 ("libx11" ,libx11)
7935 ("libharu" ,libharu)
7936 ("zlib" ,zlib)))
7937 (native-inputs
7938 `(("autoconf" ,autoconf)
7939 ("automake" ,automake)
7940 ("libtool" ,libtool)
7941 ("pkg-config" ,pkg-config)))
7942 (home-page "http://emboss.sourceforge.net")
7943 (synopsis "Molecular biology analysis suite")
7944 (description "EMBOSS is the \"European Molecular Biology Open Software
7945 Suite\". EMBOSS is an analysis package specially developed for the needs of
7946 the molecular biology (e.g. EMBnet) user community. The software
7947 automatically copes with data in a variety of formats and even allows
7948 transparent retrieval of sequence data from the web. It also provides a
7949 number of libraries for the development of software in the field of molecular
7950 biology. EMBOSS also integrates a range of currently available packages and
7951 tools for sequence analysis into a seamless whole.")
7952 (license license:gpl2+)))
7953
7954 (define-public bits
7955 (let ((revision "1")
7956 (commit "3cc4567896d9d6442923da944beb704750a08d2d"))
7957 (package
7958 (name "bits")
7959 ;; The version is 2.13.0 even though no release archives have been
7960 ;; published as yet.
7961 (version (string-append "2.13.0-" revision "." (string-take commit 9)))
7962 (source (origin
7963 (method git-fetch)
7964 (uri (git-reference
7965 (url "https://github.com/arq5x/bits.git")
7966 (commit commit)))
7967 (file-name (string-append name "-" version "-checkout"))
7968 (sha256
7969 (base32
7970 "17n2kffk4kmhivd8c98g2vr6y1s23vbg4sxlxs689wni66797hbs"))))
7971 (build-system gnu-build-system)
7972 (arguments
7973 `(#:tests? #f ;no tests included
7974 #:phases
7975 (modify-phases %standard-phases
7976 (delete 'configure)
7977 (add-after 'unpack 'remove-cuda
7978 (lambda _
7979 (substitute* "Makefile"
7980 ((".*_cuda") "")
7981 (("(bits_test_intersections) \\\\" _ match) match))
7982 #t))
7983 (replace 'install
7984 (lambda* (#:key outputs #:allow-other-keys)
7985 (copy-recursively
7986 "bin" (string-append (assoc-ref outputs "out") "/bin"))
7987 #t)))))
7988 (inputs
7989 `(("gsl" ,gsl)
7990 ("zlib" ,zlib)))
7991 (home-page "https://github.com/arq5x/bits")
7992 (synopsis "Implementation of binary interval search algorithm")
7993 (description "This package provides an implementation of the
7994 BITS (Binary Interval Search) algorithm, an approach to interval set
7995 intersection. It is especially suited for the comparison of diverse genomic
7996 datasets and the exploration of large datasets of genome
7997 intervals (e.g. genes, sequence alignments).")
7998 (license license:gpl2))))
7999
8000 (define-public piranha
8001 ;; There is no release tarball for the latest version. The latest commit is
8002 ;; older than one year at the time of this writing.
8003 (let ((revision "1")
8004 (commit "0466d364b71117d01e4471b74c514436cc281233"))
8005 (package
8006 (name "piranha")
8007 (version (string-append "1.2.1-" revision "." (string-take commit 9)))
8008 (source (origin
8009 (method git-fetch)
8010 (uri (git-reference
8011 (url "https://github.com/smithlabcode/piranha.git")
8012 (commit commit)))
8013 (sha256
8014 (base32
8015 "117dc0zf20c61jam69sk4abl57ah6yi6i7qra7d7y5zrbgk12q5n"))))
8016 (build-system gnu-build-system)
8017 (arguments
8018 `(#:test-target "test"
8019 #:phases
8020 (modify-phases %standard-phases
8021 (add-after 'unpack 'copy-smithlab-cpp
8022 (lambda* (#:key inputs #:allow-other-keys)
8023 (for-each (lambda (file)
8024 (install-file file "./src/smithlab_cpp/"))
8025 (find-files (assoc-ref inputs "smithlab-cpp")))
8026 #t))
8027 (add-after 'install 'install-to-store
8028 (lambda* (#:key outputs #:allow-other-keys)
8029 (let* ((out (assoc-ref outputs "out"))
8030 (bin (string-append out "/bin")))
8031 (for-each (lambda (file)
8032 (install-file file bin))
8033 (find-files "bin" ".*")))
8034 #t)))
8035 #:configure-flags
8036 (list (string-append "--with-bam_tools_headers="
8037 (assoc-ref %build-inputs "bamtools") "/include/bamtools")
8038 (string-append "--with-bam_tools_library="
8039 (assoc-ref %build-inputs "bamtools") "/lib/bamtools"))))
8040 (inputs
8041 `(("bamtools" ,bamtools)
8042 ("samtools" ,samtools-0.1)
8043 ("gsl" ,gsl)
8044 ("smithlab-cpp"
8045 ,(let ((commit "3723e2db438c51501d0423429ff396c3035ba46a"))
8046 (origin
8047 (method git-fetch)
8048 (uri (git-reference
8049 (url "https://github.com/smithlabcode/smithlab_cpp.git")
8050 (commit commit)))
8051 (file-name (string-append "smithlab_cpp-" commit "-checkout"))
8052 (sha256
8053 (base32
8054 "0l4gvbwslw5ngziskja41c00x1r06l3yidv7y0xw9djibhykzy0g")))))))
8055 (native-inputs
8056 `(("python" ,python-2)))
8057 (home-page "https://github.com/smithlabcode/piranha")
8058 (synopsis "Peak-caller for CLIP-seq and RIP-seq data")
8059 (description
8060 "Piranha is a peak-caller for genomic data produced by CLIP-seq and
8061 RIP-seq experiments. It takes input in BED or BAM format and identifies
8062 regions of statistically significant read enrichment. Additional covariates
8063 may optionally be provided to further inform the peak-calling process.")
8064 (license license:gpl3+))))
8065
8066 (define-public pepr
8067 (package
8068 (name "pepr")
8069 (version "1.0.9")
8070 (source (origin
8071 (method url-fetch)
8072 (uri (string-append "https://pypi.python.org/packages/source/P"
8073 "/PePr/PePr-" version ".tar.gz"))
8074 (sha256
8075 (base32
8076 "0qxjfdpl1b1y53nccws2d85f6k74zwmx8y8sd9rszcqhfayx6gdx"))))
8077 (build-system python-build-system)
8078 (arguments
8079 `(#:python ,python-2 ; python2 only
8080 #:tests? #f)) ; no tests included
8081 (propagated-inputs
8082 `(("python2-numpy" ,python2-numpy)
8083 ("python2-scipy" ,python2-scipy)
8084 ("python2-pysam" ,python2-pysam)))
8085 (home-page "https://github.com/shawnzhangyx/PePr")
8086 (synopsis "Peak-calling and prioritization pipeline for ChIP-Seq data")
8087 (description
8088 "PePr is a ChIP-Seq peak calling or differential binding analysis tool
8089 that is primarily designed for data with biological replicates. It uses a
8090 negative binomial distribution to model the read counts among the samples in
8091 the same group, and look for consistent differences between ChIP and control
8092 group or two ChIP groups run under different conditions.")
8093 (license license:gpl3+)))
8094
8095 (define-public filevercmp
8096 (let ((commit "1a9b779b93d0b244040274794d402106907b71b7"))
8097 (package
8098 (name "filevercmp")
8099 (version (string-append "0-1." (string-take commit 7)))
8100 (source (origin
8101 (method url-fetch)
8102 (uri (string-append "https://github.com/ekg/filevercmp/archive/"
8103 commit ".tar.gz"))
8104 (file-name (string-append name "-" version ".tar.gz"))
8105 (sha256
8106 (base32 "0yp5jswf5j2pqc6517x277s4s6h1ss99v57kxw9gy0jkfl3yh450"))))
8107 (build-system gnu-build-system)
8108 (arguments
8109 `(#:tests? #f ; There are no tests to run.
8110 #:phases
8111 (modify-phases %standard-phases
8112 (delete 'configure) ; There is no configure phase.
8113 (replace 'install
8114 (lambda* (#:key outputs #:allow-other-keys)
8115 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
8116 (install-file "filevercmp" bin)))))))
8117 (home-page "https://github.com/ekg/filevercmp")
8118 (synopsis "This program compares version strings")
8119 (description "This program compares version strings. It intends to be a
8120 replacement for strverscmp.")
8121 (license license:gpl3+))))
8122
8123 (define-public multiqc
8124 (package
8125 (name "multiqc")
8126 (version "0.9")
8127 (source
8128 (origin
8129 (method url-fetch)
8130 (uri (pypi-uri "multiqc" version))
8131 (sha256
8132 (base32
8133 "12gs1jw2jrxrij529rnl5kaqxfcqn15yzcsggxkfhdx634ml0cny"))
8134 (patches (search-patches "multiqc-fix-git-subprocess-error.patch"))))
8135 (build-system python-build-system)
8136 (arguments
8137 ;; Tests are to be introduced in the next version, see
8138 ;; https://github.com/ewels/MultiQC/issues/376
8139 `(#:tests? #f))
8140 (propagated-inputs
8141 `(("python-jinja2" ,python-jinja2)
8142 ("python-simplejson" ,python-simplejson)
8143 ("python-pyyaml" ,python-pyyaml)
8144 ("python-click" ,python-click)
8145 ("python-matplotlib" ,python-matplotlib)
8146 ("python-numpy" ,python-numpy)
8147 ;; MultQC checks for the presence of nose at runtime.
8148 ("python-nose" ,python-nose)))
8149 (home-page "http://multiqc.info")
8150 (synopsis "Aggregate bioinformatics analysis reports")
8151 (description
8152 "MultiQC is a tool to aggregate bioinformatics results across many
8153 samples into a single report. It contains modules for a large number of
8154 common bioinformatics tools.")
8155 (license license:gpl3)))
8156
8157 (define-public r-chipseq
8158 (package
8159 (name "r-chipseq")
8160 (version "1.24.0")
8161 (source
8162 (origin
8163 (method url-fetch)
8164 (uri (bioconductor-uri "chipseq" version))
8165 (sha256
8166 (base32
8167 "115ayp82rs99iaswrx45skw1i5iacgwzz5k8rzijbp5qic0554n0"))))
8168 (build-system r-build-system)
8169 (propagated-inputs
8170 `(("r-biocgenerics" ,r-biocgenerics)
8171 ("r-genomicranges" ,r-genomicranges)
8172 ("r-iranges" ,r-iranges)
8173 ("r-s4vectors" ,r-s4vectors)
8174 ("r-shortread" ,r-shortread)))
8175 (home-page "http://bioconductor.org/packages/chipseq")
8176 (synopsis "Package for analyzing ChIPseq data")
8177 (description
8178 "This package provides tools for processing short read data from ChIPseq
8179 experiments.")
8180 (license license:artistic2.0)))
8181
8182 (define-public r-copyhelper
8183 (package
8184 (name "r-copyhelper")
8185 (version "1.6.0")
8186 (source
8187 (origin
8188 (method url-fetch)
8189 (uri (string-append "http://bioconductor.org/packages/release/"
8190 "data/experiment/src/contrib/CopyhelpeR_"
8191 version ".tar.gz"))
8192 (sha256
8193 (base32
8194 "0x7cyynjmxls9as2gg0iyp9x5fpalxmdjq914ss7i84i9zyk5bhq"))))
8195 (properties `((upstream-name . "CopyhelpeR")))
8196 (build-system r-build-system)
8197 (home-page "http://bioconductor.org/packages/CopyhelpeR/")
8198 (synopsis "Helper files for CopywriteR")
8199 (description
8200 "This package contains the helper files that are required to run the
8201 Bioconductor package CopywriteR. It contains pre-assembled 1kb bin GC-content
8202 and mappability files for the reference genomes hg18, hg19, hg38, mm9 and
8203 mm10. In addition, it contains a blacklist filter to remove regions that
8204 display copy number variation. Files are stored as GRanges objects from the
8205 GenomicRanges Bioconductor package.")
8206 (license license:gpl2)))
8207
8208 (define-public r-copywriter
8209 (package
8210 (name "r-copywriter")
8211 (version "2.6.0")
8212 (source
8213 (origin
8214 (method url-fetch)
8215 (uri (bioconductor-uri "CopywriteR" version))
8216 (sha256
8217 (base32
8218 "1bwwnsyk7cpgwkagsnn5mv6fv233b0rkhjvbadrh70h8m4anawfj"))))
8219 (properties `((upstream-name . "CopywriteR")))
8220 (build-system r-build-system)
8221 (propagated-inputs
8222 `(("r-biocparallel" ,r-biocparallel)
8223 ("r-chipseq" ,r-chipseq)
8224 ("r-copyhelper" ,r-copyhelper)
8225 ("r-data-table" ,r-data-table)
8226 ("r-dnacopy" ,r-dnacopy)
8227 ("r-futile-logger" ,r-futile-logger)
8228 ("r-genomeinfodb" ,r-genomeinfodb)
8229 ("r-genomicalignments" ,r-genomicalignments)
8230 ("r-genomicranges" ,r-genomicranges)
8231 ("r-gtools" ,r-gtools)
8232 ("r-iranges" ,r-iranges)
8233 ("r-matrixstats" ,r-matrixstats)
8234 ("r-rsamtools" ,r-rsamtools)
8235 ("r-s4vectors" ,r-s4vectors)))
8236 (home-page "https://github.com/PeeperLab/CopywriteR")
8237 (synopsis "Copy number information from targeted sequencing")
8238 (description
8239 "CopywriteR extracts DNA copy number information from targeted sequencing
8240 by utilizing off-target reads. It allows for extracting uniformly distributed
8241 copy number information, can be used without reference, and can be applied to
8242 sequencing data obtained from various techniques including chromatin
8243 immunoprecipitation and target enrichment on small gene panels. Thereby,
8244 CopywriteR constitutes a widely applicable alternative to available copy
8245 number detection tools.")
8246 (license license:gpl2)))
8247
8248 (define-public r-sva
8249 (package
8250 (name "r-sva")
8251 (version "3.22.0")
8252 (source
8253 (origin
8254 (method url-fetch)
8255 (uri (bioconductor-uri "sva" version))
8256 (sha256
8257 (base32
8258 "1wc1fjm6dzlsqqagm43y57w8jh8nsh0r0m8z1p6ximcb5gxqh7hn"))))
8259 (build-system r-build-system)
8260 (propagated-inputs
8261 `(("r-genefilter" ,r-genefilter)
8262 ("r-mgcv" ,r-mgcv)))
8263 (home-page "http://bioconductor.org/packages/sva")
8264 (synopsis "Surrogate variable analysis")
8265 (description
8266 "This package contains functions for removing batch effects and other
8267 unwanted variation in high-throughput experiment. It also contains functions
8268 for identifying and building surrogate variables for high-dimensional data
8269 sets. Surrogate variables are covariates constructed directly from
8270 high-dimensional data like gene expression/RNA sequencing/methylation/brain
8271 imaging data that can be used in subsequent analyses to adjust for unknown,
8272 unmodeled, or latent sources of noise.")
8273 (license license:artistic2.0)))
8274
8275 (define-public r-seqminer
8276 (package
8277 (name "r-seqminer")
8278 (version "5.7")
8279 (source
8280 (origin
8281 (method url-fetch)
8282 (uri (cran-uri "seqminer" version))
8283 (sha256
8284 (base32
8285 "0p75wyl70cvp36mwg5y74nv573j1gdqi15ac2a7xf61jmsq7ycpy"))))
8286 (build-system r-build-system)
8287 (inputs
8288 `(("zlib" ,zlib)))
8289 (home-page "http://seqminer.genomic.codes")
8290 (synopsis "Read nucleotide sequence data (VCF, BCF, and METAL formats)")
8291 (description
8292 "This package provides tools to integrate nucleotide sequencing
8293 data (variant call format, e.g. VCF or BCF) or meta-analysis results in R.")
8294 ;; Any version of the GPL is acceptable
8295 (license (list license:gpl2+ license:gpl3+))))
8296
8297 (define-public r-raremetals2
8298 (package
8299 (name "r-raremetals2")
8300 (version "0.1")
8301 (source
8302 (origin
8303 (method url-fetch)
8304 (uri (string-append "http://genome.sph.umich.edu/w/images/"
8305 "b/b7/RareMETALS2_" version ".tar.gz"))
8306 (sha256
8307 (base32
8308 "0z5ljcgvnm06ja9lm85a3cniq7slxcy37aqqkxrdidr79an5fs4s"))))
8309 (properties `((upstream-name . "RareMETALS2")))
8310 (build-system r-build-system)
8311 (propagated-inputs
8312 `(("r-seqminer" ,r-seqminer)
8313 ("r-mvtnorm" ,r-mvtnorm)
8314 ("r-mass" ,r-mass)
8315 ("r-compquadform" ,r-compquadform)
8316 ("r-getopt" ,r-getopt)))
8317 (home-page "http://genome.sph.umich.edu/wiki/RareMETALS2")
8318 (synopsis "Analyze gene-level association tests for binary trait")
8319 (description
8320 "The R package rareMETALS2 is an extension of the R package rareMETALS.
8321 It was designed to meta-analyze gene-level association tests for binary trait.
8322 While rareMETALS offers a near-complete solution for meta-analysis of
8323 gene-level tests for quantitative trait, it does not offer the optimal
8324 solution for binary trait. The package rareMETALS2 offers improved features
8325 for analyzing gene-level association tests in meta-analyses for binary
8326 trait.")
8327 (license license:gpl3)))
8328
8329 (define-public r-maldiquant
8330 (package
8331 (name "r-maldiquant")
8332 (version "1.16.2")
8333 (source
8334 (origin
8335 (method url-fetch)
8336 (uri (cran-uri "MALDIquant" version))
8337 (sha256
8338 (base32
8339 "0z5srzsfgsgi4bssr4chls4ry6d18y2g9143znqmraylppwrrqzr"))))
8340 (properties `((upstream-name . "MALDIquant")))
8341 (build-system r-build-system)
8342 (home-page "http://cran.r-project.org/web/packages/MALDIquant")
8343 (synopsis "Quantitative analysis of mass spectrometry data")
8344 (description
8345 "This package provides a complete analysis pipeline for matrix-assisted
8346 laser desorption/ionization-time-of-flight (MALDI-TOF) and other
8347 two-dimensional mass spectrometry data. In addition to commonly used plotting
8348 and processing methods it includes distinctive features, namely baseline
8349 subtraction methods such as morphological filters (TopHat) or the
8350 statistics-sensitive non-linear iterative peak-clipping algorithm (SNIP), peak
8351 alignment using warping functions, handling of replicated measurements as well
8352 as allowing spectra with different resolutions.")
8353 (license license:gpl3+)))
8354
8355 (define-public r-protgenerics
8356 (package
8357 (name "r-protgenerics")
8358 (version "1.6.0")
8359 (source
8360 (origin
8361 (method url-fetch)
8362 (uri (bioconductor-uri "ProtGenerics" version))
8363 (sha256
8364 (base32
8365 "0hb3vrrvfx6lcfalmjxm8dmigfmi5nba0pzjfgsrzd35c8mbfc6f"))))
8366 (properties `((upstream-name . "ProtGenerics")))
8367 (build-system r-build-system)
8368 (home-page "https://github.com/lgatto/ProtGenerics")
8369 (synopsis "S4 generic functions for proteomics infrastructure")
8370 (description
8371 "This package provides S4 generic functions needed by Bioconductor
8372 proteomics packages.")
8373 (license license:artistic2.0)))
8374
8375 (define-public r-mzr
8376 (package
8377 (name "r-mzr")
8378 (version "2.8.1")
8379 (source
8380 (origin
8381 (method url-fetch)
8382 (uri (bioconductor-uri "mzR" version))
8383 (sha256
8384 (base32
8385 "0ipmhg6l3pf648rdx5g2ha7l5ppd3cja6afxhdw76x8ga3633x0r"))))
8386 (properties `((upstream-name . "mzR")))
8387 (build-system r-build-system)
8388 (inputs
8389 `(("netcdf" ,netcdf)))
8390 (propagated-inputs
8391 `(("r-biobase" ,r-biobase)
8392 ("r-biocgenerics" ,r-biocgenerics)
8393 ("r-protgenerics" ,r-protgenerics)
8394 ("r-rcpp" ,r-rcpp)
8395 ("r-zlibbioc" ,r-zlibbioc)))
8396 (home-page "https://github.com/sneumann/mzR/")
8397 (synopsis "Parser for mass spectrometry data files")
8398 (description
8399 "The mzR package provides a unified API to the common file formats and
8400 parsers available for mass spectrometry data. It comes with a wrapper for the
8401 ISB random access parser for mass spectrometry mzXML, mzData and mzML files.
8402 The package contains the original code written by the ISB, and a subset of the
8403 proteowizard library for mzML and mzIdentML. The netCDF reading code has
8404 previously been used in XCMS.")
8405 (license license:artistic2.0)))
8406
8407 (define-public r-affyio
8408 (package
8409 (name "r-affyio")
8410 (version "1.44.0")
8411 (source
8412 (origin
8413 (method url-fetch)
8414 (uri (bioconductor-uri "affyio" version))
8415 (sha256
8416 (base32
8417 "1svsl4mpk06xm505pap913x69ywks99262krag8y4ygpllj7dfyy"))))
8418 (build-system r-build-system)
8419 (propagated-inputs
8420 `(("r-zlibbioc" ,r-zlibbioc)))
8421 (inputs
8422 `(("zlib" ,zlib)))
8423 (home-page "https://github.com/bmbolstad/affyio")
8424 (synopsis "Tools for parsing Affymetrix data files")
8425 (description
8426 "This package provides routines for parsing Affymetrix data files based
8427 upon file format information. The primary focus is on accessing the CEL and
8428 CDF file formats.")
8429 (license license:lgpl2.0+)))
8430
8431 (define-public r-affy
8432 (package
8433 (name "r-affy")
8434 (version "1.52.0")
8435 (source
8436 (origin
8437 (method url-fetch)
8438 (uri (bioconductor-uri "affy" version))
8439 (sha256
8440 (base32
8441 "1snq71ligf0wvaxa6zfrl13ydw0zfhspmhdyfk8q3ba3np4cz344"))))
8442 (build-system r-build-system)
8443 (propagated-inputs
8444 `(("r-affyio" ,r-affyio)
8445 ("r-biobase" ,r-biobase)
8446 ("r-biocgenerics" ,r-biocgenerics)
8447 ("r-biocinstaller" ,r-biocinstaller)
8448 ("r-preprocesscore" ,r-preprocesscore)
8449 ("r-zlibbioc" ,r-zlibbioc)))
8450 (home-page "http://bioconductor.org/packages/affy")
8451 (synopsis "Methods for affymetrix oligonucleotide arrays")
8452 (description
8453 "This package contains functions for exploratory oligonucleotide array
8454 analysis.")
8455 (license license:lgpl2.0+)))
8456
8457 (define-public r-vsn
8458 (package
8459 (name "r-vsn")
8460 (version "3.42.3")
8461 (source
8462 (origin
8463 (method url-fetch)
8464 (uri (bioconductor-uri "vsn" version))
8465 (sha256
8466 (base32
8467 "0mgl0azys2g90simf8wx6jdwd7gyg3m4pf12n6w6507jixm2cg97"))))
8468 (build-system r-build-system)
8469 (propagated-inputs
8470 `(("r-affy" ,r-affy)
8471 ("r-biobase" ,r-biobase)
8472 ("r-ggplot2" ,r-ggplot2)
8473 ("r-lattice" ,r-lattice)
8474 ("r-limma" ,r-limma)))
8475 (home-page "http://bioconductor.org/packages/release/bioc/html/vsn.html")
8476 (synopsis "Variance stabilization and calibration for microarray data")
8477 (description
8478 "The package implements a method for normalising microarray intensities,
8479 and works for single- and multiple-color arrays. It can also be used for data
8480 from other technologies, as long as they have similar format. The method uses
8481 a robust variant of the maximum-likelihood estimator for an
8482 additive-multiplicative error model and affine calibration. The model
8483 incorporates data calibration step (a.k.a. normalization), a model for the
8484 dependence of the variance on the mean intensity and a variance stabilizing
8485 data transformation. Differences between transformed intensities are
8486 analogous to \"normalized log-ratios\". However, in contrast to the latter,
8487 their variance is independent of the mean, and they are usually more sensitive
8488 and specific in detecting differential transcription.")
8489 (license license:artistic2.0)))
8490
8491 (define-public r-mzid
8492 (package
8493 (name "r-mzid")
8494 (version "1.12.0")
8495 (source
8496 (origin
8497 (method url-fetch)
8498 (uri (bioconductor-uri "mzID" version))
8499 (sha256
8500 (base32
8501 "1zn896cpfvqp1qmq5c4vcj933hb8rxwb6gkck1wqvr7393rpqy1q"))))
8502 (properties `((upstream-name . "mzID")))
8503 (build-system r-build-system)
8504 (propagated-inputs
8505 `(("r-doparallel" ,r-doparallel)
8506 ("r-foreach" ,r-foreach)
8507 ("r-iterators" ,r-iterators)
8508 ("r-plyr" ,r-plyr)
8509 ("r-protgenerics" ,r-protgenerics)
8510 ("r-rcpp" ,r-rcpp)
8511 ("r-xml" ,r-xml)))
8512 (home-page "http://bioconductor.org/packages/mzID")
8513 (synopsis "Parser for mzIdentML files")
8514 (description
8515 "This package provides a parser for mzIdentML files implemented using the
8516 XML package. The parser tries to be general and able to handle all types of
8517 mzIdentML files with the drawback of having less pretty output than a vendor
8518 specific parser.")
8519 (license license:gpl2+)))
8520
8521 (define-public r-pcamethods
8522 (package
8523 (name "r-pcamethods")
8524 (version "1.66.0")
8525 (source
8526 (origin
8527 (method url-fetch)
8528 (uri (bioconductor-uri "pcaMethods" version))
8529 (sha256
8530 (base32
8531 "18mawhxw57pgpn87qha4mwki24gqja7wpqha8q496476vyap11xw"))))
8532 (properties `((upstream-name . "pcaMethods")))
8533 (build-system r-build-system)
8534 (propagated-inputs
8535 `(("r-biobase" ,r-biobase)
8536 ("r-biocgenerics" ,r-biocgenerics)
8537 ("r-mass" ,r-mass)
8538 ("r-rcpp" ,r-rcpp)))
8539 (home-page "https://github.com/hredestig/pcamethods")
8540 (synopsis "Collection of PCA methods")
8541 (description
8542 "This package provides Bayesian PCA, Probabilistic PCA, Nipals PCA,
8543 Inverse Non-Linear PCA and the conventional SVD PCA. A cluster based method
8544 for missing value estimation is included for comparison. BPCA, PPCA and
8545 NipalsPCA may be used to perform PCA on incomplete data as well as for
8546 accurate missing value estimation. A set of methods for printing and plotting
8547 the results is also provided. All PCA methods make use of the same data
8548 structure (pcaRes) to provide a common interface to the PCA results.")
8549 (license license:gpl3+)))
8550
8551 (define-public r-msnbase
8552 (package
8553 (name "r-msnbase")
8554 (version "2.0.2")
8555 (source
8556 (origin
8557 (method url-fetch)
8558 (uri (bioconductor-uri "MSnbase" version))
8559 (sha256
8560 (base32
8561 "0jjjs29dcwsjaxzfqxy98ycpg3rwxzzchkj77my3cjgdc00sm66n"))))
8562 (properties `((upstream-name . "MSnbase")))
8563 (build-system r-build-system)
8564 (propagated-inputs
8565 `(("r-affy" ,r-affy)
8566 ("r-biobase" ,r-biobase)
8567 ("r-biocgenerics" ,r-biocgenerics)
8568 ("r-biocparallel" ,r-biocparallel)
8569 ("r-digest" ,r-digest)
8570 ("r-ggplot2" ,r-ggplot2)
8571 ("r-impute" ,r-impute)
8572 ("r-iranges" ,r-iranges)
8573 ("r-maldiquant" ,r-maldiquant)
8574 ("r-mzid" ,r-mzid)
8575 ("r-mzr" ,r-mzr)
8576 ("r-pcamethods" ,r-pcamethods)
8577 ("r-plyr" ,r-plyr)
8578 ("r-preprocesscore" ,r-preprocesscore)
8579 ("r-protgenerics" ,r-protgenerics)
8580 ("r-rcpp" ,r-rcpp)
8581 ("r-reshape2" ,r-reshape2)
8582 ("r-s4vectors" ,r-s4vectors)
8583 ("r-vsn" ,r-vsn)
8584 ("r-xml" ,r-xml)))
8585 (home-page "https://github.com/lgatto/MSnbase")
8586 (synopsis "Base functions and classes for MS-based proteomics")
8587 (description
8588 "This package provides basic plotting, data manipulation and processing
8589 of mass spectrometry based proteomics data.")
8590 (license license:artistic2.0)))
8591
8592 (define-public r-msnid
8593 (package
8594 (name "r-msnid")
8595 (version "1.8.0")
8596 (source
8597 (origin
8598 (method url-fetch)
8599 (uri (bioconductor-uri "MSnID" version))
8600 (sha256
8601 (base32
8602 "0fkk3za39cxi0jyxmagmycjdslr2xf6vg3ylz14jyffqi0blw9d5"))))
8603 (properties `((upstream-name . "MSnID")))
8604 (build-system r-build-system)
8605 (propagated-inputs
8606 `(("r-biobase" ,r-biobase)
8607 ("r-data-table" ,r-data-table)
8608 ("r-doparallel" ,r-doparallel)
8609 ("r-dplyr" ,r-dplyr)
8610 ("r-foreach" ,r-foreach)
8611 ("r-iterators" ,r-iterators)
8612 ("r-msnbase" ,r-msnbase)
8613 ("r-mzid" ,r-mzid)
8614 ("r-mzr" ,r-mzr)
8615 ("r-protgenerics" ,r-protgenerics)
8616 ("r-r-cache" ,r-r-cache)
8617 ("r-rcpp" ,r-rcpp)
8618 ("r-reshape2" ,r-reshape2)))
8619 (home-page "http://bioconductor.org/packages/MSnID")
8620 (synopsis "Utilities for LC-MSn proteomics identifications")
8621 (description
8622 "This package extracts @dfn{tandem mass spectrometry} (MS/MS) ID data
8623 from mzIdentML (leveraging the mzID package) or text files. After collating
8624 the search results from multiple datasets it assesses their identification
8625 quality and optimize filtering criteria to achieve the maximum number of
8626 identifications while not exceeding a specified false discovery rate. It also
8627 contains a number of utilities to explore the MS/MS results and assess missed
8628 and irregular enzymatic cleavages, mass measurement accuracy, etc.")
8629 (license license:artistic2.0)))
8630
8631 (define-public r-seurat
8632 ;; Source releases are only made for new x.0 versions. All newer versions
8633 ;; are only released as pre-built binaries. At the time of this writing the
8634 ;; latest binary release is 1.4.0.12, which is equivalent to this commit.
8635 (let ((commit "fccb77d1452c35ee47e47ebf8e87bddb59f3b08d")
8636 (revision "1"))
8637 (package
8638 (name "r-seurat")
8639 (version (string-append "1.4.0.12-" revision "." (string-take commit 7)))
8640 (source (origin
8641 (method git-fetch)
8642 (uri (git-reference
8643 (url "https://github.com/satijalab/seurat")
8644 (commit commit)))
8645 (file-name (string-append name "-" version "-checkout"))
8646 (sha256
8647 (base32
8648 "101wq3aqrdmbfi3lqmq4iivk9iwbf10d4z216ss25hf7n9091cyl"))
8649 ;; Delete pre-built jar.
8650 (snippet
8651 '(begin (delete-file "inst/java/ModularityOptimizer.jar")
8652 #t))))
8653 (build-system r-build-system)
8654 (arguments
8655 `(#:phases
8656 (modify-phases %standard-phases
8657 (add-after 'unpack 'build-jar
8658 (lambda* (#:key inputs #:allow-other-keys)
8659 (let ((classesdir "tmp-classes"))
8660 (setenv "JAVA_HOME" (assoc-ref inputs "jdk"))
8661 (mkdir classesdir)
8662 (and (zero? (apply system* `("javac" "-d" ,classesdir
8663 ,@(find-files "java" "\\.java$"))))
8664 (zero? (system* "jar"
8665 "-cf" "inst/java/ModularityOptimizer.jar"
8666 "-C" classesdir ".")))))))))
8667 (native-inputs
8668 `(("jdk" ,icedtea "jdk")))
8669 (propagated-inputs
8670 `(("r-ape" ,r-ape)
8671 ("r-caret" ,r-caret)
8672 ("r-cowplot" ,r-cowplot)
8673 ("r-dplyr" ,r-dplyr)
8674 ("r-fastica" ,r-fastica)
8675 ("r-fnn" ,r-fnn)
8676 ("r-fpc" ,r-fpc)
8677 ("r-gdata" ,r-gdata)
8678 ("r-ggplot2" ,r-ggplot2)
8679 ("r-gplots" ,r-gplots)
8680 ("r-gridextra" ,r-gridextra)
8681 ("r-igraph" ,r-igraph)
8682 ("r-irlba" ,r-irlba)
8683 ("r-lars" ,r-lars)
8684 ("r-mixtools" ,r-mixtools)
8685 ("r-pbapply" ,r-pbapply)
8686 ("r-plyr" ,r-plyr)
8687 ("r-ranger" ,r-ranger)
8688 ("r-rcolorbrewer" ,r-rcolorbrewer)
8689 ("r-rcpp" ,r-rcpp)
8690 ("r-rcppeigen" ,r-rcppeigen)
8691 ("r-rcppprogress" ,r-rcppprogress)
8692 ("r-reshape2" ,r-reshape2)
8693 ("r-rocr" ,r-rocr)
8694 ("r-rtsne" ,r-rtsne)
8695 ("r-stringr" ,r-stringr)
8696 ("r-tclust" ,r-tclust)
8697 ("r-tsne" ,r-tsne)
8698 ("r-vgam" ,r-vgam)))
8699 (home-page "http://www.satijalab.org/seurat")
8700 (synopsis "Seurat is an R toolkit for single cell genomics")
8701 (description
8702 "This package is an R package designed for QC, analysis, and
8703 exploration of single cell RNA-seq data. It easily enables widely-used
8704 analytical techniques, including the identification of highly variable genes,
8705 dimensionality reduction; PCA, ICA, t-SNE, standard unsupervised clustering
8706 algorithms; density clustering, hierarchical clustering, k-means, and the
8707 discovery of differentially expressed genes and markers.")
8708 (license license:gpl3))))
8709
8710 (define htslib-for-sambamba
8711 (let ((commit "2f3c3ea7b301f9b45737a793c0b2dcf0240e5ee5"))
8712 (package
8713 (inherit htslib)
8714 (name "htslib-for-sambamba")
8715 (version (string-append "1.3.1-1." (string-take commit 9)))
8716 (source
8717 (origin
8718 (method git-fetch)
8719 (uri (git-reference
8720 (url "https://github.com/lomereiter/htslib.git")
8721 (commit commit)))
8722 (file-name (string-append "htslib-" version "-checkout"))
8723 (sha256
8724 (base32
8725 "0g38g8s3npr0gjm9fahlbhiskyfws9l5i0x1ml3rakzj7az5l9c9"))))
8726 (arguments
8727 (substitute-keyword-arguments (package-arguments htslib)
8728 ((#:phases phases)
8729 `(modify-phases ,phases
8730 (add-before 'configure 'bootstrap
8731 (lambda _
8732 (zero? (system* "autoreconf" "-vif"))))))))
8733 (native-inputs
8734 `(("autoconf" ,autoconf)
8735 ("automake" ,automake)
8736 ,@(package-native-inputs htslib))))))
8737
8738 (define-public sambamba
8739 (package
8740 (name "sambamba")
8741 (version "0.6.5")
8742 (source
8743 (origin
8744 (method url-fetch)
8745 (uri (string-append "https://github.com/lomereiter/sambamba/"
8746 "archive/v" version ".tar.gz"))
8747 (file-name (string-append name "-" version ".tar.gz"))
8748 (sha256
8749 (base32
8750 "17076gijd65a3f07zns2gvbgahiz5lriwsa6dq353ss3jl85d8vy"))))
8751 (build-system gnu-build-system)
8752 (arguments
8753 `(#:tests? #f ; there is no test target
8754 #:make-flags
8755 '("D_COMPILER=ldc2"
8756 ;; Override "--compiler" flag only.
8757 "D_FLAGS=--compiler=ldc2 -IBioD -g -d"
8758 "sambamba-ldmd2-64")
8759 #:phases
8760 (modify-phases %standard-phases
8761 (delete 'configure)
8762 (add-after 'unpack 'place-biod
8763 (lambda* (#:key inputs #:allow-other-keys)
8764 (copy-recursively (assoc-ref inputs "biod") "BioD")
8765 #t))
8766 (add-after 'unpack 'unbundle-prerequisites
8767 (lambda _
8768 (substitute* "Makefile"
8769 ((" htslib-static lz4-static") ""))
8770 #t))
8771 (replace 'install
8772 (lambda* (#:key outputs #:allow-other-keys)
8773 (let* ((out (assoc-ref outputs "out"))
8774 (bin (string-append out "/bin")))
8775 (mkdir-p bin)
8776 (install-file "build/sambamba" bin)
8777 #t))))))
8778 (native-inputs
8779 `(("ldc" ,ldc)
8780 ("rdmd" ,rdmd)
8781 ("biod"
8782 ,(let ((commit "1248586b54af4bd4dfb28ebfebfc6bf012e7a587"))
8783 (origin
8784 (method git-fetch)
8785 (uri (git-reference
8786 (url "https://github.com/biod/BioD.git")
8787 (commit commit)))
8788 (file-name (string-append "biod-"
8789 (string-take commit 9)
8790 "-checkout"))
8791 (sha256
8792 (base32
8793 "1m8hi1n7x0ri4l6s9i0x6jg4z4v94xrfdzp7mbizdipfag0m17g3")))))))
8794 (inputs
8795 `(("lz4" ,lz4)
8796 ("htslib" ,htslib-for-sambamba)))
8797 (home-page "http://lomereiter.github.io/sambamba")
8798 (synopsis "Tools for working with SAM/BAM data")
8799 (description "Sambamba is a high performance modern robust and
8800 fast tool (and library), written in the D programming language, for
8801 working with SAM and BAM files. Current parallelised functionality is
8802 an important subset of samtools functionality, including view, index,
8803 sort, markdup, and depth.")
8804 (license license:gpl2+)))