gnu: Add proteinortho.
[jackhill/guix/guix.git] / gnu / packages / bioinformatics.scm
CommitLineData
4e10a221 1;;; GNU Guix --- Functional package management for GNU
0047d26a 2;;; Copyright © 2014, 2015, 2016 Ricardo Wurmus <rekado@elephly.net>
9b9b7ffd 3;;; Copyright © 2015, 2016 Ben Woodcroft <donttrustben@gmail.com>
8921841d 4;;; Copyright © 2015, 2016 Pjotr Prins <pjotr.guix@thebird.nl>
a5002ae7 5;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
a0a71439 6;;; Copyright © 2016 Roel Janssen <roel@gnu.org>
ddb83129 7;;; Copyright © 2016 Efraim Flashner <efraim@flashner.co.il>
318c0aee 8;;; Copyright © 2016 Marius Bakke <mbakke@fastmail.com>
4e10a221
RW
9;;;
10;;; This file is part of GNU Guix.
11;;;
12;;; GNU Guix is free software; you can redistribute it and/or modify it
13;;; under the terms of the GNU General Public License as published by
14;;; the Free Software Foundation; either version 3 of the License, or (at
15;;; your option) any later version.
16;;;
17;;; GNU Guix is distributed in the hope that it will be useful, but
18;;; WITHOUT ANY WARRANTY; without even the implied warranty of
19;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;;; GNU General Public License for more details.
21;;;
22;;; You should have received a copy of the GNU General Public License
23;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
24
25(define-module (gnu packages bioinformatics)
26 #:use-module ((guix licenses) #:prefix license:)
27 #:use-module (guix packages)
8e913213 28 #:use-module (guix utils)
4e10a221 29 #:use-module (guix download)
2c16316e 30 #:use-module (guix git-download)
ec946638 31 #:use-module (guix hg-download)
10b4a969 32 #:use-module (guix build-system ant)
4e10a221 33 #:use-module (guix build-system gnu)
d7678942 34 #:use-module (guix build-system cmake)
365c8153 35 #:use-module (guix build-system perl)
8622a072 36 #:use-module (guix build-system python)
a5002ae7 37 #:use-module (guix build-system r)
9c38b540 38 #:use-module (guix build-system ruby)
d3517eda 39 #:use-module (guix build-system trivial)
4e10a221 40 #:use-module (gnu packages)
a2950fa4 41 #:use-module (gnu packages autotools)
684bf7c7 42 #:use-module (gnu packages algebra)
d3517eda 43 #:use-module (gnu packages base)
318c0aee 44 #:use-module (gnu packages bash)
a0a71439 45 #:use-module (gnu packages bison)
e4e5a4d8 46 #:use-module (gnu packages boost)
4e10a221 47 #:use-module (gnu packages compression)
82c370de 48 #:use-module (gnu packages cpio)
1baee943 49 #:use-module (gnu packages curl)
99828fa7 50 #:use-module (gnu packages documentation)
d29150b5 51 #:use-module (gnu packages datastructures)
75dd2424 52 #:use-module (gnu packages file)
02f35bb5 53 #:use-module (gnu packages gawk)
2409f37f 54 #:use-module (gnu packages gcc)
66e40e00 55 #:use-module (gnu packages gd)
97b9da68 56 #:use-module (gnu packages gtk)
b16728b0 57 #:use-module (gnu packages glib)
db7a3444 58 #:use-module (gnu packages groff)
50937297 59 #:use-module (gnu packages guile)
89984be4 60 #:use-module (gnu packages haskell)
66e40e00 61 #:use-module (gnu packages image)
97b9da68 62 #:use-module (gnu packages imagemagick)
15a3c3d4 63 #:use-module (gnu packages java)
51c64999 64 #:use-module (gnu packages linux)
ec946638 65 #:use-module (gnu packages logging)
36742f43 66 #:use-module (gnu packages machine-learning)
db7a3444 67 #:use-module (gnu packages man)
c833ab55 68 #:use-module (gnu packages maths)
6c2b26e2 69 #:use-module (gnu packages mpi)
4e10a221 70 #:use-module (gnu packages ncurses)
81f3e0c1 71 #:use-module (gnu packages pcre)
ceb62d54 72 #:use-module (gnu packages parallel)
66e40e00 73 #:use-module (gnu packages pdf)
4e10a221
RW
74 #:use-module (gnu packages perl)
75 #:use-module (gnu packages pkg-config)
bfe3c685 76 #:use-module (gnu packages popt)
e4e5a4d8 77 #:use-module (gnu packages protobuf)
346a829a 78 #:use-module (gnu packages python)
ec946638 79 #:use-module (gnu packages readline)
9c38b540 80 #:use-module (gnu packages ruby)
84be3b99 81 #:use-module (gnu packages serialization)
c833ab55 82 #:use-module (gnu packages statistics)
d7678942 83 #:use-module (gnu packages tbb)
97b9da68 84 #:use-module (gnu packages tex)
db7a3444 85 #:use-module (gnu packages texinfo)
2127cedb 86 #:use-module (gnu packages textutils)
43c565d2 87 #:use-module (gnu packages time)
a2950fa4 88 #:use-module (gnu packages tls)
ce7155d5 89 #:use-module (gnu packages vim)
365c8153 90 #:use-module (gnu packages web)
c833ab55 91 #:use-module (gnu packages xml)
66e40e00 92 #:use-module (gnu packages xorg)
f7283db3
RW
93 #:use-module (gnu packages zip)
94 #:use-module (srfi srfi-1))
4e10a221 95
8dc797fa
BW
96(define-public aragorn
97 (package
98 (name "aragorn")
7d57ebaa 99 (version "1.2.37")
8dc797fa
BW
100 (source (origin
101 (method url-fetch)
102 (uri (string-append
103 "http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
104 version ".tgz"))
105 (sha256
106 (base32
7d57ebaa 107 "1c7zxk0h8wsjbix82fmmfyywcq6zn3q9h5y67kcl5y3qal2bv2xr"))))
8dc797fa
BW
108 (build-system gnu-build-system)
109 (arguments
110 `(#:tests? #f ; there are no tests
111 #:phases
112 (modify-phases %standard-phases
113 (delete 'configure)
114 (replace 'build
115 (lambda _
116 (zero? (system* "gcc"
117 "-O3"
118 "-ffast-math"
119 "-finline-functions"
120 "-o"
121 "aragorn"
122 (string-append "aragorn" ,version ".c")))))
123 (replace 'install
124 (lambda* (#:key outputs #:allow-other-keys)
125 (let* ((out (assoc-ref outputs "out"))
126 (bin (string-append out "/bin"))
127 (man (string-append out "/share/man/man1")))
128 (mkdir-p bin)
129 (copy-file "aragorn"
130 (string-append bin "/aragorn"))
131 (mkdir-p man)
132 (copy-file "aragorn.1"
133 (string-append man "/aragorn.1")))
134 #t)))))
135 (home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
136 (synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
137 (description
138 "Aragorn identifies transfer RNA, mitochondrial RNA and
139transfer-messenger RNA from nucleotide sequences, based on homology to known
140tRNA consensus sequences and RNA structure. It also outputs the secondary
141structure of the predicted RNA.")
142 (license license:gpl2)))
143
a12ba6e8
BW
144(define-public bamm
145 (package
146 (name "bamm")
4b6da268 147 (version "1.7.3")
a12ba6e8
BW
148 (source (origin
149 (method url-fetch)
150 ;; BamM is not available on pypi.
151 (uri (string-append
4b6da268 152 "https://github.com/Ecogenomics/BamM/archive/"
a12ba6e8
BW
153 version ".tar.gz"))
154 (file-name (string-append name "-" version ".tar.gz"))
155 (sha256
156 (base32
4b6da268 157 "1f35yxp4pc8aadsvbpg6r4kg2jh4fkjci0iby4iyljm6980sac0s"))
a12ba6e8
BW
158 (modules '((guix build utils)))
159 (snippet
160 `(begin
161 ;; Delete bundled htslib.
162 (delete-file-recursively "c/htslib-1.3.1")
163 #t))))
164 (build-system python-build-system)
165 (arguments
166 `(#:python ,python-2 ; BamM is Python 2 only.
167 ;; Do not use bundled libhts. Do use the bundled libcfu because it has
168 ;; been modified from its original form.
169 #:configure-flags
170 (let ((htslib (assoc-ref %build-inputs "htslib")))
171 (list "--with-libhts-lib" (string-append htslib "/lib")
172 "--with-libhts-inc" (string-append htslib "/include/htslib")))
173 #:phases
174 (modify-phases %standard-phases
175 (add-after 'unpack 'autogen
176 (lambda _
177 (with-directory-excursion "c"
178 (let ((sh (which "sh")))
179 ;; Use autogen so that 'configure' works.
180 (substitute* "autogen.sh" (("/bin/sh") sh))
181 (setenv "CONFIG_SHELL" sh)
182 (substitute* "configure" (("/bin/sh") sh))
183 (zero? (system* "./autogen.sh"))))))
184 (delete 'build)
185 ;; Run tests after installation so compilation only happens once.
186 (delete 'check)
187 (add-after 'install 'wrap-executable
188 (lambda* (#:key outputs #:allow-other-keys)
189 (let* ((out (assoc-ref outputs "out"))
190 (path (getenv "PATH")))
191 (wrap-program (string-append out "/bin/bamm")
192 `("PATH" ":" prefix (,path))))
193 #t))
194 (add-after 'wrap-executable 'post-install-check
195 (lambda* (#:key inputs outputs #:allow-other-keys)
196 (setenv "PATH"
197 (string-append (assoc-ref outputs "out")
198 "/bin:"
199 (getenv "PATH")))
200 (setenv "PYTHONPATH"
201 (string-append
202 (assoc-ref outputs "out")
203 "/lib/python"
204 (string-take (string-take-right
205 (assoc-ref inputs "python") 5) 3)
206 "/site-packages:"
207 (getenv "PYTHONPATH")))
208 ;; There are 2 errors printed, but they are safe to ignore:
209 ;; 1) [E::hts_open_format] fail to open file ...
210 ;; 2) samtools view: failed to open ...
211 (zero? (system* "nosetests")))))))
212 (native-inputs
213 `(("autoconf" ,autoconf)
214 ("automake" ,automake)
215 ("libtool" ,libtool)
216 ("zlib" ,zlib)
217 ("python-nose" ,python2-nose)
218 ("python-pysam" ,python2-pysam)
219 ("python-setuptools" ,python2-setuptools)))
220 (inputs
221 `(("htslib" ,htslib)
222 ("samtools" ,samtools)
223 ("bwa" ,bwa)
224 ("grep" ,grep)
225 ("sed" ,sed)
226 ("coreutils" ,coreutils)))
227 (propagated-inputs
228 `(("python-numpy" ,python2-numpy)))
229 (home-page "http://ecogenomics.github.io/BamM/")
230 (synopsis "Metagenomics-focused BAM file manipulator")
231 (description
232 "BamM is a C library, wrapped in python, to efficiently generate and
233parse BAM files, specifically for the analysis of metagenomic data. For
234instance, it implements several methods to assess contig-wise read coverage.")
235 (license license:lgpl3+)))
236
9794180d
RW
237(define-public bamtools
238 (package
239 (name "bamtools")
240 (version "2.3.0")
241 (source (origin
242 (method url-fetch)
243 (uri (string-append
244 "https://github.com/pezmaster31/bamtools/archive/v"
245 version ".tar.gz"))
246 (file-name (string-append name "-" version ".tar.gz"))
247 (sha256
248 (base32
249 "1brry29bw2xr2l9pqn240rkqwayg85b8qq78zk2zs6nlspk4d018"))))
250 (build-system cmake-build-system)
4702cec2
RW
251 (arguments
252 `(#:tests? #f ;no "check" target
253 #:phases
254 (modify-phases %standard-phases
255 (add-before
256 'configure 'set-ldflags
257 (lambda* (#:key outputs #:allow-other-keys)
258 (setenv "LDFLAGS"
259 (string-append
260 "-Wl,-rpath="
261 (assoc-ref outputs "out") "/lib/bamtools")))))))
9794180d
RW
262 (inputs `(("zlib" ,zlib)))
263 (home-page "https://github.com/pezmaster31/bamtools")
264 (synopsis "C++ API and command-line toolkit for working with BAM data")
265 (description
266 "BamTools provides both a C++ API and a command-line toolkit for handling
267BAM files.")
268 (license license:expat)))
269
bdc7be59
MB
270(define-public bcftools
271 (package
272 (name "bcftools")
273 (version "1.3.1")
274 (source (origin
275 (method url-fetch)
276 (uri (string-append
277 "https://github.com/samtools/bcftools/releases/download/"
278 version "/bcftools-" version ".tar.bz2"))
279 (sha256
280 (base32
281 "095ry68vmz9q5s1scjsa698dhgyvgw5aicz24c19iwfbai07mhqj"))
282 (modules '((guix build utils)))
283 (snippet
284 ;; Delete bundled htslib.
285 '(delete-file-recursively "htslib-1.3.1"))))
286 (build-system gnu-build-system)
287 (arguments
288 `(#:test-target "test"
289 #:make-flags
290 (list
291 "USE_GPL=1"
292 (string-append "prefix=" (assoc-ref %outputs "out"))
293 (string-append "HTSDIR=" (assoc-ref %build-inputs "htslib") "/include")
294 (string-append "HTSLIB=" (assoc-ref %build-inputs "htslib") "/lib/libhts.a")
295 (string-append "BGZIP=" (assoc-ref %build-inputs "htslib") "/bin/bgzip")
296 (string-append "TABIX=" (assoc-ref %build-inputs "htslib") "/bin/tabix"))
297 #:phases
298 (modify-phases %standard-phases
299 (add-after 'unpack 'patch-Makefile
300 (lambda _
301 (substitute* "Makefile"
302 ;; Do not attempt to build htslib.
303 (("^include \\$\\(HTSDIR\\)/htslib\\.mk") "")
304 ;; Link against GSL cblas.
305 (("-lcblas") "-lgslcblas"))
306 #t))
307 (delete 'configure)
308 (add-before 'check 'patch-tests
309 (lambda _
310 (substitute* "test/test.pl"
311 (("/bin/bash") (which "bash")))
312 #t)))))
313 (native-inputs
314 `(("htslib" ,htslib)
315 ("perl" ,perl)))
316 (inputs
317 `(("gsl" ,gsl)
318 ("zlib" ,zlib)))
319 (home-page "https://samtools.github.io/bcftools/")
320 (synopsis "Utilities for variant calling and manipulating VCFs and BCFs")
321 (description
322 "BCFtools is a set of utilities that manipulate variant calls in the
323Variant Call Format (VCF) and its binary counterpart BCF. All commands work
324transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed.")
325 ;; The sources are dual MIT/GPL, but becomes GPL-only when USE_GPL=1.
326 (license (list license:gpl3+ license:expat))))
327
8dd4ff11
RW
328(define-public bedops
329 (package
330 (name "bedops")
1bbc3b1d 331 (version "2.4.14")
8dd4ff11
RW
332 (source (origin
333 (method url-fetch)
334 (uri (string-append "https://github.com/bedops/bedops/archive/v"
335 version ".tar.gz"))
f586c877 336 (file-name (string-append name "-" version ".tar.gz"))
8dd4ff11
RW
337 (sha256
338 (base32
1bbc3b1d 339 "1kqbac547wyqma81cyky9n7mkgikjpsfd3nnmcm6hpqwanqgh10v"))))
8dd4ff11
RW
340 (build-system gnu-build-system)
341 (arguments
342 '(#:tests? #f
343 #:make-flags (list (string-append "BINDIR=" %output "/bin"))
344 #:phases
345 (alist-cons-after
346 'unpack 'unpack-tarballs
347 (lambda _
348 ;; FIXME: Bedops includes tarballs of minimally patched upstream
349 ;; libraries jansson, zlib, and bzip2. We cannot just use stock
350 ;; libraries because at least one of the libraries (zlib) is
351 ;; patched to add a C++ function definition (deflateInit2cpp).
352 ;; Until the Bedops developers offer a way to link against system
353 ;; libraries we have to build the in-tree copies of these three
354 ;; libraries.
355
356 ;; See upstream discussion:
357 ;; https://github.com/bedops/bedops/issues/124
358
359 ;; Unpack the tarballs to benefit from shebang patching.
360 (with-directory-excursion "third-party"
361 (and (zero? (system* "tar" "xvf" "jansson-2.6.tar.bz2"))
362 (zero? (system* "tar" "xvf" "zlib-1.2.7.tar.bz2"))
363 (zero? (system* "tar" "xvf" "bzip2-1.0.6.tar.bz2"))))
364 ;; Disable unpacking of tarballs in Makefile.
365 (substitute* "system.mk/Makefile.linux"
366 (("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
367 (("\\./configure") "CONFIG_SHELL=bash ./configure"))
368 (substitute* "third-party/zlib-1.2.7/Makefile.in"
369 (("^SHELL=.*$") "SHELL=bash\n")))
370 (alist-delete 'configure %standard-phases))))
371 (home-page "https://github.com/bedops/bedops")
372 (synopsis "Tools for high-performance genomic feature operations")
373 (description
374 "BEDOPS is a suite of tools to address common questions raised in genomic
375studies---mostly with regard to overlap and proximity relationships between
376data sets. It aims to be scalable and flexible, facilitating the efficient
377and accurate analysis and management of large-scale genomic data.
378
379BEDOPS provides tools that perform highly efficient and scalable Boolean and
380other set operations, statistical calculations, archiving, conversion and
381other management of genomic data of arbitrary scale. Tasks can be easily
382split by chromosome for distributing whole-genome analyses across a
383computational cluster.")
384 (license license:gpl2+)))
385
81de5647
RW
386(define-public bedtools
387 (package
388 (name "bedtools")
d285657e 389 (version "2.26.0")
81de5647
RW
390 (source (origin
391 (method url-fetch)
392 (uri (string-append "https://github.com/arq5x/bedtools2/archive/v"
393 version ".tar.gz"))
f586c877 394 (file-name (string-append name "-" version ".tar.gz"))
81de5647
RW
395 (sha256
396 (base32
d285657e 397 "0xvri5hnp2iim1cx6mcd5d9f102p5ql41x69rd6106x1c17pinqm"))))
81de5647
RW
398 (build-system gnu-build-system)
399 (native-inputs `(("python" ,python-2)))
400 (inputs `(("samtools" ,samtools)
401 ("zlib" ,zlib)))
402 (arguments
403 '(#:test-target "test"
404 #:phases
6573ac82 405 (modify-phases %standard-phases
6573ac82
BW
406 (delete 'configure)
407 (replace 'install
408 (lambda* (#:key outputs #:allow-other-keys)
409 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
410 (for-each (lambda (file)
411 (install-file file bin))
412 (find-files "bin" ".*")))
413 #t)))))
81de5647
RW
414 (home-page "https://github.com/arq5x/bedtools2")
415 (synopsis "Tools for genome analysis and arithmetic")
416 (description
417 "Collectively, the bedtools utilities are a swiss-army knife of tools for
418a wide-range of genomics analysis tasks. The most widely-used tools enable
419genome arithmetic: that is, set theory on the genome. For example, bedtools
420allows one to intersect, merge, count, complement, and shuffle genomic
421intervals from multiple files in widely-used genomic file formats such as BAM,
422BED, GFF/GTF, VCF.")
423 (license license:gpl2)))
424
9a8f309c
RW
425;; Later releases of bedtools produce files with more columns than
426;; what Ribotaper expects.
427(define-public bedtools-2.18
428 (package (inherit bedtools)
429 (name "bedtools")
430 (version "2.18.0")
431 (source (origin
432 (method url-fetch)
433 (uri (string-append "https://github.com/arq5x/bedtools2/"
434 "archive/v" version ".tar.gz"))
435 (file-name (string-append name "-" version ".tar.gz"))
436 (sha256
437 (base32
438 "05vrnr8yp7swfagshzpgqmzk1blnwnq8pq5pckzi1m26w98d63vf"))))))
439
17dc32a4
RW
440(define-public ribotaper
441 (package
442 (name "ribotaper")
443 (version "1.3.1")
444 (source (origin
445 (method url-fetch)
446 (uri (string-append "https://ohlerlab.mdc-berlin.de/"
447 "files/RiboTaper/RiboTaper_Version_"
448 version ".tar.gz"))
449 (sha256
450 (base32
451 "0ykjbps1y3z3085q94npw8i9x5gldc6shy8vlc08v76zljsm07hv"))))
452 (build-system gnu-build-system)
453 (inputs
454 `(("bedtools" ,bedtools-2.18)
455 ("samtools" ,samtools-0.1)
456 ("r" ,r)
457 ("r-foreach" ,r-foreach)
458 ("r-xnomial" ,r-xnomial)
459 ("r-domc" ,r-domc)
460 ("r-multitaper" ,r-multitaper)
461 ("r-seqinr" ,r-seqinr)))
462 (home-page "https://ohlerlab.mdc-berlin.de/software/RiboTaper_126/")
463 (synopsis "Define translated ORFs using ribosome profiling data")
464 (description
465 "Ribotaper is a method for defining translated @dfn{open reading
466frames} (ORFs) using ribosome profiling (ribo-seq) data. This package
467provides the Ribotaper pipeline.")
468 (license license:gpl3+)))
469
a0a71439
RJ
470(define-public bioawk
471 (package
472 (name "bioawk")
473 (version "1.0")
474 (source (origin
475 (method url-fetch)
476 (uri (string-append "https://github.com/lh3/bioawk/archive/v"
477 version ".tar.gz"))
478 (file-name (string-append name "-" version ".tar.gz"))
479 (sha256
480 (base32 "1daizxsk17ahi9n58fj8vpgwyhzrzh54bzqhanjanp88kgrz7gjw"))))
481 (build-system gnu-build-system)
482 (inputs
483 `(("zlib" ,zlib)))
484 (native-inputs
485 `(("bison" ,bison)))
486 (arguments
487 `(#:tests? #f ; There are no tests to run.
488 ;; Bison must generate files, before other targets can build.
489 #:parallel-build? #f
490 #:phases
491 (modify-phases %standard-phases
492 (delete 'configure) ; There is no configure phase.
493 (replace 'install
494 (lambda* (#:key outputs #:allow-other-keys)
495 (let* ((out (assoc-ref outputs "out"))
496 (bin (string-append out "/bin"))
497 (man (string-append out "/share/man/man1")))
498 (mkdir-p man)
499 (copy-file "awk.1" (string-append man "/bioawk.1"))
500 (install-file "bioawk" bin)))))))
501 (home-page "https://github.com/lh3/bioawk")
502 (synopsis "AWK with bioinformatics extensions")
503 (description "Bioawk is an extension to Brian Kernighan's awk, adding the
504support of several common biological data formats, including optionally gzip'ed
505BED, GFF, SAM, VCF, FASTA/Q and TAB-delimited formats with column names. It
506also adds a few built-in functions and a command line option to use TAB as the
507input/output delimiter. When the new functionality is not used, bioawk is
508intended to behave exactly the same as the original BWK awk.")
509 (license license:x11)))
510
a2fb1492
RW
511(define-public python2-pybedtools
512 (package
513 (name "python2-pybedtools")
514 (version "0.6.9")
515 (source (origin
516 (method url-fetch)
517 (uri (string-append
518 "https://pypi.python.org/packages/source/p/pybedtools/pybedtools-"
519 version ".tar.gz"))
520 (sha256
521 (base32
522 "1ldzdxw1p4y3g2ignmggsdypvqkcwqwzhdha4rbgpih048z5p4an"))))
523 (build-system python-build-system)
524 (arguments `(#:python ,python-2)) ; no Python 3 support
525 (inputs
526 `(("python-cython" ,python2-cython)
527 ("python-matplotlib" ,python2-matplotlib)))
528 (propagated-inputs
529 `(("bedtools" ,bedtools)
530 ("samtools" ,samtools)))
531 (native-inputs
532 `(("python-pyyaml" ,python2-pyyaml)
533 ("python-nose" ,python2-nose)
534 ("python-setuptools" ,python2-setuptools)))
535 (home-page "https://pythonhosted.org/pybedtools/")
536 (synopsis "Python wrapper for BEDtools programs")
537 (description
538 "pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
539which are widely used for genomic interval manipulation or \"genome algebra\".
540pybedtools extends BEDTools by offering feature-level manipulations from with
541Python.")
542 (license license:gpl2+)))
543
9e12eba8
BW
544(define-public python-biom-format
545 (package
546 (name "python-biom-format")
547 (version "2.1.5")
548 (source
549 (origin
550 (method url-fetch)
551 ;; Use GitHub as source because PyPI distribution does not contain
552 ;; test data: https://github.com/biocore/biom-format/issues/693
553 (uri (string-append "https://github.com/biocore/biom-format/archive/"
554 version ".tar.gz"))
555 (file-name (string-append name "-" version ".tar.gz"))
556 (sha256
557 (base32
558 "1n25w3p1rixbpac8iysmzcja6m4ip5r6sz19l8y6wlwi49hxn278"))))
559 (build-system python-build-system)
de96ea28 560 (propagated-inputs
9e12eba8
BW
561 `(("python-numpy" ,python-numpy)
562 ("python-scipy" ,python-scipy)
563 ("python-future" ,python-future)
564 ("python-click" ,python-click)
565 ("python-h5py" ,python-h5py)))
566 (home-page "http://www.biom-format.org")
567 (synopsis "Biological Observation Matrix (BIOM) format utilities")
568 (description
569 "The BIOM file format is designed to be a general-use format for
570representing counts of observations e.g. operational taxonomic units, KEGG
571orthology groups or lipid types, in one or more biological samples
572e.g. microbiome samples, genomes, metagenomes.")
573 (license license:bsd-3)
574 (properties `((python2-variant . ,(delay python2-biom-format))))))
575
576(define-public python2-biom-format
577 (let ((base (package-with-python2 (strip-python2-variant python-biom-format))))
578 (package
579 (inherit base)
580 (arguments
581 `(#:phases
582 (modify-phases %standard-phases
583 ;; Do not require the unmaintained pyqi library.
584 (add-after 'unpack 'remove-pyqi
585 (lambda _
586 (substitute* "setup.py"
587 (("install_requires.append\\(\"pyqi\"\\)") "pass"))
588 #t)))
589 ,@(package-arguments base)))
590 (native-inputs `(("python2-setuptools" ,python2-setuptools)
591 ,@(package-native-inputs base))))))
592
f7283db3
RW
593(define-public bioperl-minimal
594 (let* ((inputs `(("perl-module-build" ,perl-module-build)
595 ("perl-data-stag" ,perl-data-stag)
596 ("perl-libwww" ,perl-libwww)
597 ("perl-uri" ,perl-uri)))
598 (transitive-inputs
599 (map (compose package-name cadr)
600 (delete-duplicates
601 (concatenate
602 (map (compose package-transitive-target-inputs cadr) inputs))))))
603 (package
604 (name "bioperl-minimal")
c70271ec 605 (version "1.7.0")
f7283db3
RW
606 (source
607 (origin
608 (method url-fetch)
c70271ec
RW
609 (uri (string-append "https://github.com/bioperl/bioperl-live/"
610 "archive/release-"
611 (string-map (lambda (c)
612 (if (char=? c #\.)
613 #\- c)) version)
614 ".tar.gz"))
f7283db3
RW
615 (sha256
616 (base32
c70271ec 617 "12phgpxwgkqflkwfb9dcqg7a31dpjlfhar8wcgv0aj5ln4akfz06"))))
f7283db3
RW
618 (build-system perl-build-system)
619 (arguments
620 `(#:phases
621 (modify-phases %standard-phases
622 (add-after
623 'install 'wrap-programs
624 (lambda* (#:key outputs #:allow-other-keys)
625 ;; Make sure all executables in "bin" find the required Perl
626 ;; modules at runtime. As the PERL5LIB variable contains also
627 ;; the paths of native inputs, we pick the transitive target
628 ;; inputs from %build-inputs.
629 (let* ((out (assoc-ref outputs "out"))
630 (bin (string-append out "/bin/"))
631 (path (string-join
632 (cons (string-append out "/lib/perl5/site_perl")
633 (map (lambda (name)
634 (assoc-ref %build-inputs name))
635 ',transitive-inputs))
636 ":")))
637 (for-each (lambda (file)
638 (wrap-program file
639 `("PERL5LIB" ":" prefix (,path))))
640 (find-files bin "\\.pl$"))
641 #t))))))
642 (inputs inputs)
643 (native-inputs
644 `(("perl-test-most" ,perl-test-most)))
645 (home-page "http://search.cpan.org/dist/BioPerl")
646 (synopsis "Bioinformatics toolkit")
647 (description
648 "BioPerl is the product of a community effort to produce Perl code which
649is useful in biology. Examples include Sequence objects, Alignment objects
650and database searching objects. These objects not only do what they are
651advertised to do in the documentation, but they also interact - Alignment
652objects are made from the Sequence objects, Sequence objects have access to
653Annotation and SeqFeature objects and databases, Blast objects can be
654converted to Alignment objects, and so on. This means that the objects
655provide a coordinated and extensible framework to do computational biology.")
656 (license (package-license perl)))))
657
85c37e29
RW
658(define-public python-biopython
659 (package
660 (name "python-biopython")
4ce60305 661 (version "1.68")
85c37e29
RW
662 (source (origin
663 (method url-fetch)
e815c094
BW
664 ;; use PyPi rather than biopython.org to ease updating
665 (uri (pypi-uri "biopython" version))
85c37e29
RW
666 (sha256
667 (base32
4ce60305 668 "07qc7nz0k77y8hf8s18rscvibvm91zw0kkq7ylrhisf8vp8hkp6i"))))
85c37e29 669 (build-system python-build-system)
4ce60305
BW
670 (arguments
671 `(#:phases
672 (modify-phases %standard-phases
673 (add-before 'check 'set-home
674 ;; Some tests require a home directory to be set.
675 (lambda _ (setenv "HOME" "/tmp") #t)))))
85c37e29
RW
676 (inputs
677 `(("python-numpy" ,python-numpy)))
85c37e29
RW
678 (home-page "http://biopython.org/")
679 (synopsis "Tools for biological computation in Python")
680 (description
681 "Biopython is a set of tools for biological computation including parsers
682for bioinformatics files into Python data structures; interfaces to common
683bioinformatics programs; a standard sequence class and tools for performing
684common operations on them; code to perform data classification; code for
685dealing with alignments; code making it easy to split up parallelizable tasks
686into separate processes; and more.")
8c8da138
BW
687 (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))
688 (properties `((python2-variant . ,(delay python2-biopython))))))
85c37e29
RW
689
690(define-public python2-biopython
8c8da138
BW
691 (let ((base (package-with-python2 (strip-python2-variant python-biopython))))
692 (package
693 (inherit base)
694 (native-inputs `(("python2-setuptools" ,python2-setuptools)
695 ,@(package-native-inputs base))))))
85c37e29 696
4b1a1528
BW
697;; An outdated version of biopython is required for seqmagick, see
698;; https://github.com/fhcrc/seqmagick/issues/59
699;; When that issue has been resolved this package should be removed.
700(define python2-biopython-1.66
701 (package
702 (inherit python2-biopython)
703 (version "1.66")
704 (source (origin
705 (method url-fetch)
706 (uri (pypi-uri "biopython" version))
707 (sha256
708 (base32
709 "1gdv92593klimg22icf5j9by7xiq86jnwzkpz4abaa05ylkdf6hp"))))))
710
985d8411
BW
711(define-public bpp-core
712 ;; The last release was in 2014 and the recommended way to install from source
713 ;; is to clone the git repository, so we do this.
714 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
715 (let ((commit "7d8bced0d1a87291ea8dd7046b7fb5ff9c35c582"))
716 (package
717 (name "bpp-core")
718 (version (string-append "2.2.0-1." (string-take commit 7)))
719 (source (origin
720 (method git-fetch)
721 (uri (git-reference
722 (url "http://biopp.univ-montp2.fr/git/bpp-core")
723 (commit commit)))
724 (file-name (string-append name "-" version "-checkout"))
725 (sha256
726 (base32
727 "10djsq5vlnkilv436gnmh4irpk49v29pa69r6xiryg32xmvn909j"))))
728 (build-system cmake-build-system)
729 (arguments
730 `(#:parallel-build? #f))
731 (inputs
732 `(("gcc" ,gcc-5))) ; Compilation of bpp-phyl fails with GCC 4.9 so we
733 ; compile all of the bpp packages with GCC 5.
734 (home-page "http://biopp.univ-montp2.fr")
735 (synopsis "C++ libraries for Bioinformatics")
736 (description
737 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
738analysis, phylogenetics, molecular evolution and population genetics. It is
739Object Oriented and is designed to be both easy to use and computer efficient.
740Bio++ intends to help programmers to write computer expensive programs, by
741providing them a set of re-usable tools.")
742 (license license:cecill-c))))
743
8b5f4d57
BW
744(define-public bpp-phyl
745 ;; The last release was in 2014 and the recommended way to install from source
746 ;; is to clone the git repository, so we do this.
747 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
748 (let ((commit "0c07167b629f68b569bf274d1ad0c4af83276ae2"))
749 (package
750 (name "bpp-phyl")
751 (version (string-append "2.2.0-1." (string-take commit 7)))
752 (source (origin
753 (method git-fetch)
754 (uri (git-reference
755 (url "http://biopp.univ-montp2.fr/git/bpp-phyl")
756 (commit commit)))
757 (file-name (string-append name "-" version "-checkout"))
758 (sha256
759 (base32
760 "1ssjgchzwj3iai26kyly7gwkdv8sk59nqhkb1wpap3sf5m6kyllh"))))
761 (build-system cmake-build-system)
762 (arguments
763 `(#:parallel-build? #f
764 ;; If out-of-source, test data is not copied into the build directory
765 ;; so the tests fail.
766 #:out-of-source? #f))
767 (inputs
768 `(("bpp-core" ,bpp-core)
769 ("bpp-seq" ,bpp-seq)
770 ;; GCC 4.8 fails due to an 'internal compiler error', so we use a more
771 ;; modern GCC.
772 ("gcc" ,gcc-5)))
773 (home-page "http://biopp.univ-montp2.fr")
774 (synopsis "Bio++ phylogenetic Library")
775 (description
776 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
777analysis, phylogenetics, molecular evolution and population genetics. This
778library provides phylogenetics-related modules.")
779 (license license:cecill-c))))
780
159a7016
BW
781(define-public bpp-popgen
782 ;; The last release was in 2014 and the recommended way to install from source
783 ;; is to clone the git repository, so we do this.
784 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
785 (let ((commit "e472bac9b1a148803895d747cd6d0c5904f85d9f"))
786 (package
787 (name "bpp-popgen")
788 (version (string-append "2.2.0-1." (string-take commit 7)))
789 (source (origin
790 (method git-fetch)
791 (uri (git-reference
792 (url "http://biopp.univ-montp2.fr/git/bpp-popgen")
793 (commit commit)))
794 (file-name (string-append name "-" version "-checkout"))
795 (sha256
796 (base32
797 "0yn82dzn1n5629nzja68xfrhi655709rjanyryb36vzkmymy6dw5"))))
798 (build-system cmake-build-system)
799 (arguments
800 `(#:parallel-build? #f
801 #:tests? #f)) ; There are no tests.
802 (inputs
803 `(("bpp-core" ,bpp-core)
804 ("bpp-seq" ,bpp-seq)
805 ("gcc" ,gcc-5)))
806 (home-page "http://biopp.univ-montp2.fr")
807 (synopsis "Bio++ population genetics library")
808 (description
809 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
810analysis, phylogenetics, molecular evolution and population genetics. This
811library provides population genetics-related modules.")
812 (license license:cecill-c))))
813
70f1bc05
BW
814(define-public bpp-seq
815 ;; The last release was in 2014 and the recommended way to install from source
816 ;; is to clone the git repository, so we do this.
817 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
818 (let ((commit "6cfa07965ce152e5598a89df2fa80a75973bfa33"))
819 (package
820 (name "bpp-seq")
821 (version (string-append "2.2.0-1." (string-take commit 7)))
822 (source (origin
823 (method git-fetch)
824 (uri (git-reference
825 (url "http://biopp.univ-montp2.fr/git/bpp-seq")
826 (commit commit)))
827 (file-name (string-append name "-" version "-checkout"))
828 (sha256
829 (base32
830 "1nys5jq7jqvdg40d91wsmj3q2yzy4276cp7sp44n67p468f27zf2"))))
831 (build-system cmake-build-system)
832 (arguments
833 `(#:parallel-build? #f
834 ;; If out-of-source, test data is not copied into the build directory
835 ;; so the tests fail.
836 #:out-of-source? #f))
837 (inputs
838 `(("bpp-core" ,bpp-core)
839 ("gcc" ,gcc-5))) ; Use GCC 5 as per 'bpp-core'.
840 (home-page "http://biopp.univ-montp2.fr")
841 (synopsis "Bio++ sequence library")
842 (description
843 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
844analysis, phylogenetics, molecular evolution and population genetics. This
845library provides sequence-related modules.")
846 (license license:cecill-c))))
847
db7a3444
BW
848(define-public bppsuite
849 ;; The last release was in 2014 and the recommended way to install from source
850 ;; is to clone the git repository, so we do this.
851 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
852 (let ((commit "c516147f57aa50961121cd505bed52cd7603698b"))
853 (package
854 (name "bppsuite")
855 (version (string-append "2.2.0-1." (string-take commit 7)))
856 (source (origin
857 (method git-fetch)
858 (uri (git-reference
859 (url "http://biopp.univ-montp2.fr/git/bppsuite")
860 (commit commit)))
861 (file-name (string-append name "-" version "-checkout"))
862 (sha256
863 (base32
864 "1y87pxvw0jxjizhq2dr9g2r91md45k1p9ih2sl1yy1y3p934l2kb"))))
865 (build-system cmake-build-system)
866 (arguments
867 `(#:parallel-build? #f
868 #:tests? #f)) ; There are no tests.
869 (native-inputs
870 `(("groff" ,groff)
871 ("man-db" ,man-db)
872 ("texinfo" ,texinfo)))
873 (inputs
874 `(("bpp-core" ,bpp-core)
875 ("bpp-seq" ,bpp-seq)
876 ("bpp-phyl" ,bpp-phyl)
877 ("bpp-phyl" ,bpp-popgen)
878 ("gcc" ,gcc-5)))
879 (home-page "http://biopp.univ-montp2.fr")
880 (synopsis "Bioinformatics tools written with the Bio++ libraries")
881 (description
882 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
883analysis, phylogenetics, molecular evolution and population genetics. This
884package provides command line tools using the Bio++ library.")
885 (license license:cecill-c))))
886
82c370de
RW
887(define-public blast+
888 (package
889 (name "blast+")
8dec2229 890 (version "2.4.0")
82c370de
RW
891 (source (origin
892 (method url-fetch)
893 (uri (string-append
894 "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
895 version "/ncbi-blast-" version "+-src.tar.gz"))
896 (sha256
897 (base32
8dec2229 898 "14n9jik6vhiwjd3m7bach4xj1pzfn0szbsbyfxybd9l9cc43b6mb"))
82c370de
RW
899 (modules '((guix build utils)))
900 (snippet
901 '(begin
902 ;; Remove bundled bzip2 and zlib
903 (delete-file-recursively "c++/src/util/compress/bzip2")
904 (delete-file-recursively "c++/src/util/compress/zlib")
905 (substitute* "c++/src/util/compress/Makefile.in"
906 (("bzip2 zlib api") "api"))
907 ;; Remove useless msbuild directory
908 (delete-file-recursively
909 "c++/src/build-system/project_tree_builder/msbuild")
910 #t))))
911 (build-system gnu-build-system)
912 (arguments
913 `(;; There are three(!) tests for this massive library, and all fail with
914 ;; "unparsable timing stats".
915 ;; ERR [127] -- [util/regexp] test_pcre.sh (unparsable timing stats)
916 ;; ERR [127] -- [serial/datatool] datatool.sh (unparsable timing stats)
917 ;; ERR [127] -- [serial/datatool] datatool_xml.sh (unparsable timing stats)
918 #:tests? #f
919 #:out-of-source? #t
920 #:parallel-build? #f ; not supported
921 #:phases
922 (modify-phases %standard-phases
923 (add-before
924 'configure 'set-HOME
925 ;; $HOME needs to be set at some point during the configure phase
926 (lambda _ (setenv "HOME" "/tmp") #t))
927 (add-after
928 'unpack 'enter-dir
929 (lambda _ (chdir "c++") #t))
930 (add-after
931 'enter-dir 'fix-build-system
932 (lambda _
933 (define (which* cmd)
934 (cond ((string=? cmd "date")
935 ;; make call to "date" deterministic
936 "date -d @0")
937 ((which cmd)
938 => identity)
939 (else
940 (format (current-error-port)
941 "WARNING: Unable to find absolute path for ~s~%"
942 cmd)
943 #f)))
944
945 ;; Rewrite hardcoded paths to various tools
946 (substitute* (append '("src/build-system/configure.ac"
947 "src/build-system/configure"
948 "scripts/common/impl/if_diff.sh"
949 "scripts/common/impl/run_with_lock.sh"
950 "src/build-system/Makefile.configurables.real"
951 "src/build-system/Makefile.in.top"
952 "src/build-system/Makefile.meta.gmake=no"
953 "src/build-system/Makefile.meta.in"
954 "src/build-system/Makefile.meta_l"
955 "src/build-system/Makefile.meta_p"
956 "src/build-system/Makefile.meta_r"
957 "src/build-system/Makefile.mk.in"
958 "src/build-system/Makefile.requirements"
959 "src/build-system/Makefile.rules_with_autodep.in")
960 (find-files "scripts/common/check" "\\.sh$"))
961 (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
962 (or (which* cmd) all)))
963
964 (substitute* (find-files "src/build-system" "^config.*")
965 (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
966 (("^PATH=.*") ""))
967
968 ;; rewrite "/var/tmp" in check script
969 (substitute* "scripts/common/check/check_make_unix.sh"
970 (("/var/tmp") "/tmp"))
971
972 ;; do not reset PATH
973 (substitute* (find-files "scripts/common/impl/" "\\.sh$")
974 (("^ *PATH=.*") "")
975 (("action=/bin/") "action=")
976 (("export PATH") ":"))
977 #t))
978 (replace
979 'configure
980 (lambda* (#:key inputs outputs #:allow-other-keys)
981 (let ((out (assoc-ref outputs "out"))
982 (lib (string-append (assoc-ref outputs "lib") "/lib"))
983 (include (string-append (assoc-ref outputs "include")
984 "/include/ncbi-tools++")))
985 ;; The 'configure' script doesn't recognize things like
986 ;; '--enable-fast-install'.
987 (zero? (system* "./configure.orig"
988 (string-append "--with-build-root=" (getcwd) "/build")
989 (string-append "--prefix=" out)
990 (string-append "--libdir=" lib)
991 (string-append "--includedir=" include)
992 (string-append "--with-bz2="
993 (assoc-ref inputs "bzip2"))
994 (string-append "--with-z="
995 (assoc-ref inputs "zlib"))
996 ;; Each library is built twice by default, once
997 ;; with "-static" in its name, and again
998 ;; without.
999 "--without-static"
1000 "--with-dll"))))))))
1001 (outputs '("out" ; 19 MB
1002 "lib" ; 203 MB
1003 "include")) ; 32 MB
1004 (inputs
1005 `(("bzip2" ,bzip2)
1006 ("zlib" ,zlib)))
1007 (native-inputs
1008 `(("cpio" ,cpio)))
1009 (home-page "http://blast.ncbi.nlm.nih.gov")
1010 (synopsis "Basic local alignment search tool")
1011 (description
1012 "BLAST is a popular method of performing a DNA or protein sequence
1013similarity search, using heuristics to produce results quickly. It also
1014calculates an “expect value” that estimates how many matches would have
1015occurred at a given score by chance, which can aid a user in judging how much
1016confidence to have in an alignment.")
1017 ;; Most of the sources are in the public domain, with the following
1018 ;; exceptions:
1019 ;; * Expat:
1020 ;; * ./c++/include/util/bitset/
1021 ;; * ./c++/src/html/ncbi_menu*.js
1022 ;; * Boost license:
1023 ;; * ./c++/include/util/impl/floating_point_comparison.hpp
1024 ;; * LGPL 2+:
1025 ;; * ./c++/include/dbapi/driver/odbc/unix_odbc/
1026 ;; * ASL 2.0:
1027 ;; * ./c++/src/corelib/teamcity_*
1028 (license (list license:public-domain
1029 license:expat
1030 license:boost1.0
1031 license:lgpl2.0+
1032 license:asl2.0))))
1033
6c2b26e2
RW
1034(define-public bless
1035 (package
1036 (name "bless")
1037 (version "1p02")
1038 (source (origin
1039 (method url-fetch)
1040 (uri (string-append "mirror://sourceforge/bless-ec/bless.v"
1041 version ".tgz"))
1042 (sha256
1043 (base32
4d75e03a
RW
1044 "0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
1045 (modules '((guix build utils)))
6c2b26e2
RW
1046 (snippet
1047 `(begin
1048 ;; Remove bundled boost, pigz, zlib, and .git directory
953c1223
RW
1049 ;; FIXME: also remove bundled sources for murmurhash3 and
1050 ;; kmc once packaged.
6c2b26e2
RW
1051 (delete-file-recursively "boost")
1052 (delete-file-recursively "pigz")
953c1223 1053 (delete-file-recursively "google-sparsehash")
6c2b26e2
RW
1054 (delete-file-recursively "zlib")
1055 (delete-file-recursively ".git")
1056 #t))))
1057 (build-system gnu-build-system)
1058 (arguments
1059 '(#:tests? #f ;no "check" target
1060 #:make-flags
1061 (list (string-append "ZLIB="
1062 (assoc-ref %build-inputs "zlib")
1063 "/lib/libz.a")
1064 (string-append "LDFLAGS="
1065 (string-join '("-lboost_filesystem"
1066 "-lboost_system"
1067 "-lboost_iostreams"
1068 "-lz"
1069 "-fopenmp"
1070 "-std=c++11"))))
1071 #:phases
1072 (modify-phases %standard-phases
1073 (add-after 'unpack 'do-not-build-bundled-pigz
1074 (lambda* (#:key inputs outputs #:allow-other-keys)
1075 (substitute* "Makefile"
1076 (("cd pigz/pigz-2.3.3; make") ""))
1077 #t))
1078 (add-after 'unpack 'patch-paths-to-executables
1079 (lambda* (#:key inputs outputs #:allow-other-keys)
1080 (substitute* "parse_args.cpp"
1081 (("kmc_binary = .*")
1082 (string-append "kmc_binary = \""
1083 (assoc-ref outputs "out")
1084 "/bin/kmc\";"))
1085 (("pigz_binary = .*")
1086 (string-append "pigz_binary = \""
1087 (assoc-ref inputs "pigz")
1088 "/bin/pigz\";")))
1089 #t))
1090 (replace 'install
1091 (lambda* (#:key outputs #:allow-other-keys)
1092 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
1093 (for-each (lambda (file)
1094 (install-file file bin))
1095 '("bless" "kmc/bin/kmc"))
1096 #t)))
1097 (delete 'configure))))
1098 (native-inputs
1099 `(("perl" ,perl)))
1100 (inputs
1101 `(("openmpi" ,openmpi)
1102 ("boost" ,boost)
953c1223 1103 ("sparsehash" ,sparsehash)
6c2b26e2
RW
1104 ("pigz" ,pigz)
1105 ("zlib" ,zlib)))
9641a899 1106 (supported-systems '("x86_64-linux"))
4d75e03a 1107 (home-page "http://sourceforge.net/p/bless-ec/wiki/Home/")
6c2b26e2
RW
1108 (synopsis "Bloom-filter-based error correction tool for NGS reads")
1109 (description
1110 "@dfn{Bloom-filter-based error correction solution for high-throughput
1111sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
1112correction tool for genomic reads produced by @dfn{Next-generation
1113sequencing} (NGS). BLESS produces accurate correction results with much less
1114memory compared with previous solutions and is also able to tolerate a higher
1115false-positive rate. BLESS can extend reads like DNA assemblers to correct
1116errors at the end of reads.")
1117 (license license:gpl3+)))
1118
2c7ee167
RW
1119(define-public bowtie
1120 (package
1121 (name "bowtie")
2642231b 1122 (version "2.2.9")
2c7ee167
RW
1123 (source (origin
1124 (method url-fetch)
1125 (uri (string-append "https://github.com/BenLangmead/bowtie2/archive/v"
1126 version ".tar.gz"))
f586c877 1127 (file-name (string-append name "-" version ".tar.gz"))
2c7ee167
RW
1128 (sha256
1129 (base32
2642231b 1130 "1vp5db8i7is57iwjybcdg18f5ivyzlj5g1ix1nlvxainzivhz55g"))
2c7ee167
RW
1131 (modules '((guix build utils)))
1132 (snippet
1133 '(substitute* "Makefile"
2c7ee167
RW
1134 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
1135 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
0047d26a 1136 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))))
2c7ee167
RW
1137 (build-system gnu-build-system)
1138 (inputs `(("perl" ,perl)
1139 ("perl-clone" ,perl-clone)
1140 ("perl-test-deep" ,perl-test-deep)
1141 ("perl-test-simple" ,perl-test-simple)
0047d26a
RW
1142 ("python" ,python-2)
1143 ("tbb" ,tbb)))
2c7ee167 1144 (arguments
0047d26a
RW
1145 '(#:make-flags
1146 (list "allall"
1147 "WITH_TBB=1"
1148 (string-append "prefix=" (assoc-ref %outputs "out")))
2c7ee167
RW
1149 #:phases
1150 (alist-delete
1151 'configure
1152 (alist-replace
0047d26a 1153 'check
2c7ee167 1154 (lambda* (#:key outputs #:allow-other-keys)
0047d26a
RW
1155 (system* "perl"
1156 "scripts/test/simple_tests.pl"
1157 "--bowtie2=./bowtie2"
1158 "--bowtie2-build=./bowtie2-build"))
1159 %standard-phases))))
2c7ee167
RW
1160 (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
1161 (synopsis "Fast and sensitive nucleotide sequence read aligner")
1162 (description
1163 "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
1164reads to long reference sequences. It is particularly good at aligning reads
1165of about 50 up to 100s or 1,000s of characters, and particularly good at
1166aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
1167genome with an FM Index to keep its memory footprint small: for the human
1168genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
1169gapped, local, and paired-end alignment modes.")
241e1221 1170 (supported-systems '("x86_64-linux"))
2c7ee167
RW
1171 (license license:gpl3+)))
1172
94ce537e
RW
1173(define-public tophat
1174 (package
1175 (name "tophat")
1176 (version "2.1.0")
1177 (source (origin
1178 (method url-fetch)
1179 (uri (string-append
1180 "http://ccb.jhu.edu/software/tophat/downloads/tophat-"
1181 version ".tar.gz"))
1182 (sha256
1183 (base32
1184 "168zlzykq622zbgkh90a90f1bdgsxkscq2zxzbj8brq80hbjpyp7"))
fc1adab1 1185 (patches (search-patches "tophat-build-with-later-seqan.patch"))
94ce537e
RW
1186 (modules '((guix build utils)))
1187 (snippet
1188 '(begin
1189 ;; Remove bundled SeqAn and samtools
1190 (delete-file-recursively "src/SeqAn-1.3")
1191 (delete-file-recursively "src/samtools-0.1.18")
1192 #t))))
1193 (build-system gnu-build-system)
1194 (arguments
1195 '(#:parallel-build? #f ; not supported
1196 #:phases
1197 (modify-phases %standard-phases
1198 (add-after 'unpack 'use-system-samtools
1199 (lambda* (#:key inputs #:allow-other-keys)
1200 (substitute* "src/Makefile.in"
1201 (("(noinst_LIBRARIES = )\\$\\(SAMLIB\\)" _ prefix) prefix)
1202 (("\\$\\(SAMPROG\\): \\$\\(SAMLIB\\)") "")
1203 (("SAMPROG = samtools_0\\.1\\.18") "")
1204 (("\\$\\(samtools_0_1_18_SOURCES\\)") "")
1205 (("am__EXEEXT_1 = samtools_0\\.1\\.18\\$\\(EXEEXT\\)") ""))
1206 (substitute* '("src/common.cpp"
1207 "src/tophat.py")
1208 (("samtools_0.1.18") (which "samtools")))
1209 (substitute* '("src/common.h"
1210 "src/bam2fastx.cpp")
1211 (("#include \"bam.h\"") "#include <samtools/bam.h>")
1212 (("#include \"sam.h\"") "#include <samtools/sam.h>"))
1213 (substitute* '("src/bwt_map.h"
1214 "src/map2gtf.h"
1215 "src/align_status.h")
1216 (("#include <bam.h>") "#include <samtools/bam.h>")
1217 (("#include <sam.h>") "#include <samtools/sam.h>"))
1218 #t)))))
1219 (inputs
1220 `(("boost" ,boost)
1221 ("bowtie" ,bowtie)
1222 ("samtools" ,samtools-0.1)
1223 ("ncurses" ,ncurses)
1224 ("python" ,python-2)
1225 ("perl" ,perl)
1226 ("zlib" ,zlib)
1227 ("seqan" ,seqan)))
1228 (home-page "http://ccb.jhu.edu/software/tophat/index.shtml")
1229 (synopsis "Spliced read mapper for RNA-Seq data")
1230 (description
1231 "TopHat is a fast splice junction mapper for nucleotide sequence
1232reads produced by the RNA-Seq method. It aligns RNA-Seq reads to
1233mammalian-sized genomes using the ultra high-throughput short read
1234aligner Bowtie, and then analyzes the mapping results to identify
1235splice junctions between exons.")
1236 ;; TopHat is released under the Boost Software License, Version 1.0
1237 ;; See https://github.com/infphilo/tophat/issues/11#issuecomment-121589893
1238 (license license:boost1.0)))
1239
9a8336d8
RW
1240(define-public bwa
1241 (package
1242 (name "bwa")
1243 (version "0.7.12")
1244 (source (origin
1245 (method url-fetch)
1246 (uri (string-append "mirror://sourceforge/bio-bwa/bwa-"
1247 version ".tar.bz2"))
1248 (sha256
1249 (base32
1250 "1330dpqncv0px3pbhjzz1gwgg39kkcv2r9qp2xs0sixf8z8wl7bh"))))
1251 (build-system gnu-build-system)
1252 (arguments
1253 '(#:tests? #f ;no "check" target
1254 #:phases
1255 (alist-replace
1256 'install
1257 (lambda* (#:key outputs #:allow-other-keys)
1258 (let ((bin (string-append
1259 (assoc-ref outputs "out") "/bin"))
1260 (doc (string-append
1261 (assoc-ref outputs "out") "/share/doc/bwa"))
1262 (man (string-append
1263 (assoc-ref outputs "out") "/share/man/man1")))
96c46210
LC
1264 (install-file "bwa" bin)
1265 (install-file "README.md" doc)
1266 (install-file "bwa.1" man)))
9a8336d8
RW
1267 ;; no "configure" script
1268 (alist-delete 'configure %standard-phases))))
1269 (inputs `(("zlib" ,zlib)))
db94f8c7
RW
1270 ;; Non-portable SSE instructions are used so building fails on platforms
1271 ;; other than x86_64.
1272 (supported-systems '("x86_64-linux"))
9a8336d8
RW
1273 (home-page "http://bio-bwa.sourceforge.net/")
1274 (synopsis "Burrows-Wheeler sequence aligner")
1275 (description
1276 "BWA is a software package for mapping low-divergent sequences against a
1277large reference genome, such as the human genome. It consists of three
1278algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
1279designed for Illumina sequence reads up to 100bp, while the rest two for
1280longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
1281features such as long-read support and split alignment, but BWA-MEM, which is
1282the latest, is generally recommended for high-quality queries as it is faster
1283and more accurate. BWA-MEM also has better performance than BWA-backtrack for
128470-100bp Illumina reads.")
1285 (license license:gpl3+)))
1286
d29150b5
RW
1287(define-public bwa-pssm
1288 (package (inherit bwa)
1289 (name "bwa-pssm")
1290 (version "0.5.11")
1291 (source (origin
1292 (method url-fetch)
1293 (uri (string-append "https://github.com/pkerpedjiev/bwa-pssm/"
1294 "archive/" version ".tar.gz"))
1295 (file-name (string-append name "-" version ".tar.gz"))
1296 (sha256
1297 (base32
1298 "02p7mpbs4mlxmn84g2x4ghak638vbj4lqix2ipx5g84pz9bhdavg"))))
1299 (build-system gnu-build-system)
1300 (inputs
1301 `(("gdsl" ,gdsl)
1302 ("zlib" ,zlib)
1303 ("perl" ,perl)))
1304 (home-page "http://bwa-pssm.binf.ku.dk/")
1305 (synopsis "Burrows-Wheeler transform-based probabilistic short read mapper")
1306 (description
1307 "BWA-PSSM is a probabilistic short genomic sequence read aligner based on
1308the use of @dfn{position specific scoring matrices} (PSSM). Like many of the
1309existing aligners it is fast and sensitive. Unlike most other aligners,
1310however, it is also adaptible in the sense that one can direct the alignment
1311based on known biases within the data set. It is coded as a modification of
1312the original BWA alignment program and shares the genome index structure as
1313well as many of the command line options.")
1314 (license license:gpl3+)))
1315
ad641d53
RW
1316(define-public python2-bx-python
1317 (package
1318 (name "python2-bx-python")
1319 (version "0.7.2")
1320 (source (origin
1321 (method url-fetch)
1322 (uri (string-append
1323 "https://pypi.python.org/packages/source/b/bx-python/bx-python-"
1324 version ".tar.gz"))
1325 (sha256
1326 (base32
1327 "0ld49idhc5zjdvbhvjq1a2qmpjj7h5v58rqr25dzmfq7g34b50xh"))
1328 (modules '((guix build utils)))
1329 (snippet
1330 '(substitute* "setup.py"
1331 ;; remove dependency on outdated "distribute" module
1332 (("^from distribute_setup import use_setuptools") "")
1333 (("^use_setuptools\\(\\)") "")))))
1334 (build-system python-build-system)
1335 (arguments
1336 `(#:tests? #f ;tests fail because test data are not included
1337 #:python ,python-2))
1338 (inputs
1339 `(("python-numpy" ,python2-numpy)
1340 ("zlib" ,zlib)))
1341 (native-inputs
1342 `(("python-nose" ,python2-nose)
1343 ("python-setuptools" ,python2-setuptools)))
1344 (home-page "http://bitbucket.org/james_taylor/bx-python/")
1345 (synopsis "Tools for manipulating biological data")
1346 (description
1347 "bx-python provides tools for manipulating biological data, particularly
1348multiple sequence alignments.")
1349 (license license:expat)))
1350
55a9a8c2
RW
1351(define-public python-pysam
1352 (package
1353 (name "python-pysam")
fd49eb21 1354 (version "0.9.1.4")
d454640c
RW
1355 (source (origin
1356 (method url-fetch)
f536dce5
MB
1357 ;; Test data is missing on PyPi.
1358 (uri (string-append
1359 "https://github.com/pysam-developers/pysam/archive/v"
1360 version ".tar.gz"))
1361 (file-name (string-append name "-" version ".tar.gz"))
d454640c
RW
1362 (sha256
1363 (base32
f536dce5 1364 "0y41ssbg6nvn2jgcbnrvkzblpjcwszaiv1rgyd8dwzjkrbfsgsmc"))
dff26b23
MB
1365 (modules '((guix build utils)))
1366 (snippet
1367 ;; Drop bundled htslib. TODO: Also remove samtools and bcftools.
1368 '(delete-file-recursively "htslib"))))
55a9a8c2
RW
1369 (build-system python-build-system)
1370 (arguments
f536dce5 1371 `(#:phases
397d463a
MB
1372 (modify-phases %standard-phases
1373 (add-before 'build 'set-flags
dff26b23
MB
1374 (lambda* (#:key inputs #:allow-other-keys)
1375 (setenv "HTSLIB_MODE" "external")
1376 (setenv "HTSLIB_LIBRARY_DIR"
1377 (string-append (assoc-ref inputs "htslib") "/lib"))
1378 (setenv "HTSLIB_INCLUDE_DIR"
1379 (string-append (assoc-ref inputs "htslib") "/include"))
397d463a
MB
1380 (setenv "LDFLAGS" "-lncurses")
1381 (setenv "CFLAGS" "-D_CURSES_LIB=1")
f536dce5
MB
1382 #t))
1383 (delete 'check)
1384 (add-after 'install 'check
1385 (lambda* (#:key inputs outputs #:allow-other-keys)
1386 (setenv "PYTHONPATH"
1387 (string-append
1388 (getenv "PYTHONPATH")
1389 ":" (assoc-ref outputs "out")
1390 "/lib/python"
1391 (string-take (string-take-right
1392 (assoc-ref inputs "python") 5) 3)
1393 "/site-packages"))
1394 ;; Step out of source dir so python does not import from CWD.
1395 (chdir "tests")
1396 (setenv "HOME" "/tmp")
1397 (and (zero? (system* "make" "-C" "pysam_data"))
1398 (zero? (system* "make" "-C" "cbcf_data"))
1399 (zero? (system* "nosetests" "-v"))))))))
dff26b23
MB
1400 (propagated-inputs
1401 `(("htslib" ,htslib))) ; Included from installed header files.
55a9a8c2 1402 (inputs
649e9b3b 1403 `(("ncurses" ,ncurses)
55a9a8c2 1404 ("zlib" ,zlib)))
649e9b3b
RW
1405 (native-inputs
1406 `(("python-cython" ,python-cython)
f536dce5
MB
1407 ("python-setuptools" ,python-setuptools)
1408 ;; Dependencies below are are for tests only.
1409 ("samtools" ,samtools)
1410 ("bcftools" ,bcftools)
1411 ("python-nose" ,python-nose)))
55a9a8c2
RW
1412 (home-page "https://github.com/pysam-developers/pysam")
1413 (synopsis "Python bindings to the SAMtools C API")
1414 (description
1415 "Pysam is a Python module for reading and manipulating files in the
1416SAM/BAM format. Pysam is a lightweight wrapper of the SAMtools C API. It
1417also includes an interface for tabix.")
1418 (license license:expat)))
1419
1420(define-public python2-pysam
1421 (package-with-python2 python-pysam))
1422
4db9433a
RW
1423(define-public python-twobitreader
1424 (package
1425 (name "python-twobitreader")
044ac8d2 1426 (version "3.1.4")
4db9433a
RW
1427 (source (origin
1428 (method url-fetch)
1429 (uri (pypi-uri "twobitreader" version))
1430 (sha256
1431 (base32
044ac8d2 1432 "1q8wnj2kga9nz1lwc4w7qv52smfm536hp6mc8w6s53lhyj0mpi22"))))
4db9433a
RW
1433 (properties `((python2-variant . ,(delay python2-twobitreader))))
1434 (build-system python-build-system)
900fb8d0
LF
1435 (arguments
1436 '(;; Tests are not distributed in the PyPi release.
1437 ;; TODO Try building from the Git repo or asking the upstream maintainer
1438 ;; to distribute the tests on PyPi.
1439 #:tests? #f))
4db9433a
RW
1440 (native-inputs
1441 `(("python-sphinx" ,python-sphinx)))
1442 (home-page "https://github.com/benjschiller/twobitreader")
1443 (synopsis "Python library for reading .2bit files")
1444 (description
1445 "twobitreader is a Python library for reading .2bit files as used by the
1446UCSC genome browser.")
1447 (license license:artistic2.0)))
1448
1449(define-public python2-twobitreader
1450 (let ((base (package-with-python2 (strip-python2-variant python-twobitreader))))
1451 (package
1452 (inherit base)
1453 (native-inputs `(("python2-setuptools" ,python2-setuptools)
1454 ,@(package-native-inputs base))))))
1455
f94bf198
RW
1456(define-public python-plastid
1457 (package
1458 (name "python-plastid")
99caa6f7 1459 (version "0.4.6")
f94bf198
RW
1460 (source (origin
1461 (method url-fetch)
1462 (uri (pypi-uri "plastid" version))
1463 (sha256
1464 (base32
99caa6f7 1465 "1sqkz5d3b9kf688mp7k771c87ins42j7j0whmkb49cb3fsg8s8lj"))))
f94bf198
RW
1466 (properties `((python2-variant . ,(delay python2-plastid))))
1467 (build-system python-build-system)
1468 (arguments
1469 ;; Some test files are not included.
1470 `(#:tests? #f))
1471 (propagated-inputs
1472 `(("python-numpy" ,python-numpy)
1473 ("python-scipy" ,python-scipy)
1474 ("python-pandas" ,python-pandas)
1475 ("python-pysam" ,python-pysam)
1476 ("python-matplotlib" ,python-matplotlib)
1477 ("python-biopython" ,python-biopython)
99caa6f7
BW
1478 ("python-twobitreader" ,python-twobitreader)
1479 ("python-termcolor" ,python-termcolor)))
f94bf198
RW
1480 (native-inputs
1481 `(("python-cython" ,python-cython)
1482 ("python-nose" ,python-nose)))
1483 (home-page "https://github.com/joshuagryphon/plastid")
1484 (synopsis "Python library for genomic analysis")
1485 (description
1486 "plastid is a Python library for genomic analysis – in particular,
1487high-throughput sequencing data – with an emphasis on simplicity.")
1488 (license license:bsd-3)))
1489
1490(define-public python2-plastid
1491 (let ((base (package-with-python2 (strip-python2-variant python-plastid))))
1492 (package
1493 (inherit base)
4d16cc51
RW
1494 ;; setuptools is required at runtime
1495 (propagated-inputs `(("python2-setuptools" ,python2-setuptools)
1496 ,@(package-propagated-inputs base))))))
f94bf198 1497
6c1305f9
RW
1498(define-public cd-hit
1499 (package
1500 (name "cd-hit")
1501 (version "4.6.5")
1502 (source (origin
1503 (method url-fetch)
1504 (uri (string-append "https://github.com/weizhongli/cdhit"
1505 "/releases/download/V" version
1506 "/cd-hit-v" version "-2016-0304.tar.gz"))
1507 (sha256
1508 (base32
1509 "15db0hq38yyifwqx9b6l34z14jcq576dmjavhj8a426c18lvnhp3"))))
1510 (build-system gnu-build-system)
1511 (arguments
1512 `(#:tests? #f ; there are no tests
1513 #:make-flags
1514 ;; Executables are copied directly to the PREFIX.
1515 (list (string-append "PREFIX=" (assoc-ref %outputs "out") "/bin"))
1516 #:phases
1517 (modify-phases %standard-phases
1518 ;; No "configure" script
1519 (delete 'configure)
1520 ;; Remove sources of non-determinism
1521 (add-after 'unpack 'be-timeless
1522 (lambda _
1523 (substitute* "cdhit-utility.c++"
1524 ((" \\(built on \" __DATE__ \"\\)") ""))
1525 (substitute* "cdhit-common.c++"
1526 (("__DATE__") "\"0\"")
1527 (("\", %s, \" __TIME__ \"\\\\n\", date") ""))
1528 #t))
1529 ;; The "install" target does not create the target directory
1530 (add-before 'install 'create-target-dir
1531 (lambda* (#:key outputs #:allow-other-keys)
1532 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
1533 #t)))))
1534 (inputs
1535 `(("perl" ,perl)))
1536 (home-page "http://weizhongli-lab.org/cd-hit/")
1537 (synopsis "Cluster and compare protein or nucleotide sequences")
1538 (description
1539 "CD-HIT is a program for clustering and comparing protein or nucleotide
1540sequences. CD-HIT is designed to be fast and handle extremely large
1541databases.")
1542 ;; The manual says: "It can be copied under the GNU General Public License
1543 ;; version 2 (GPLv2)."
1544 (license license:gpl2)))
1545
810cff85
RW
1546(define-public clipper
1547 (package
1548 (name "clipper")
433530a5 1549 (version "1.1")
810cff85
RW
1550 (source (origin
1551 (method url-fetch)
1552 (uri (string-append
1553 "https://github.com/YeoLab/clipper/archive/"
1554 version ".tar.gz"))
9ab5ea44 1555 (file-name (string-append name "-" version ".tar.gz"))
810cff85
RW
1556 (sha256
1557 (base32
433530a5 1558 "0pflmsvhbf8izbgwhbhj1i7349sw1f55qpqj8ljmapp16hb0p0qi"))
810cff85
RW
1559 (modules '((guix build utils)))
1560 (snippet
433530a5
RW
1561 '(begin
1562 ;; remove unnecessary setup dependency
1563 (substitute* "setup.py"
1564 (("setup_requires = .*") ""))
1565 (for-each delete-file
1566 '("clipper/src/peaks.so"
1567 "clipper/src/readsToWiggle.so"))
1568 (delete-file-recursively "dist/")
1569 #t))))
810cff85
RW
1570 (build-system python-build-system)
1571 (arguments `(#:python ,python-2)) ; only Python 2 is supported
1572 (inputs
1573 `(("htseq" ,htseq)
1574 ("python-pybedtools" ,python2-pybedtools)
1575 ("python-cython" ,python2-cython)
1576 ("python-scikit-learn" ,python2-scikit-learn)
1577 ("python-matplotlib" ,python2-matplotlib)
433530a5 1578 ("python-pandas" ,python2-pandas)
810cff85
RW
1579 ("python-pysam" ,python2-pysam)
1580 ("python-numpy" ,python2-numpy)
1581 ("python-scipy" ,python2-scipy)))
1582 (native-inputs
1583 `(("python-mock" ,python2-mock) ; for tests
1584 ("python-pytz" ,python2-pytz) ; for tests
1585 ("python-setuptools" ,python2-setuptools)))
1586 (home-page "https://github.com/YeoLab/clipper")
1587 (synopsis "CLIP peak enrichment recognition")
1588 (description
1589 "CLIPper is a tool to define peaks in CLIP-seq datasets.")
1590 (license license:gpl2)))
1591
6a35566d
RS
1592(define-public codingquarry
1593 (package
1594 (name "codingquarry")
1595 (version "2.0")
1596 (source (origin
1597 (method url-fetch)
1598 (uri (string-append
1599 "mirror://sourceforge/codingquarry/CodingQuarry_v"
1600 version ".tar.gz"))
1601 (sha256
1602 (base32
1603 "0115hkjflsnfzn36xppwf9h9avfxlavr43djqmshkkzbgjzsz60i"))))
1604 (build-system gnu-build-system)
1605 (arguments
1606 '(#:tests? #f ; no "check" target
1607 #:phases
1608 (modify-phases %standard-phases
1609 (delete 'configure)
1610 (replace 'install
1611 (lambda* (#:key outputs #:allow-other-keys)
1612 (let* ((out (assoc-ref outputs "out"))
1613 (bin (string-append out "/bin"))
1614 (doc (string-append out "/share/doc/codingquarry")))
1615 (install-file "INSTRUCTIONS.pdf" doc)
1616 (copy-recursively "QuarryFiles"
1617 (string-append out "/QuarryFiles"))
1618 (install-file "CodingQuarry" bin)
1619 (install-file "CufflinksGTF_to_CodingQuarryGFF3.py" bin)))))))
1620 (inputs `(("openmpi" ,openmpi)))
1621 (native-search-paths
1622 (list (search-path-specification
1623 (variable "QUARRY_PATH")
1624 (files '("QuarryFiles")))))
1625 (native-inputs `(("python" ,python-2))) ; Only Python 2 is supported
1626 (synopsis "Fungal gene predictor")
1627 (description "CodingQuarry is a highly accurate, self-training GHMM fungal
1628gene predictor designed to work with assembled, aligned RNA-seq transcripts.")
1629 (home-page "https://sourceforge.net/projects/codingquarry/")
1630 (license license:gpl3+)))
1631
36742f43
RW
1632(define-public couger
1633 (package
1634 (name "couger")
1635 (version "1.8.2")
1636 (source (origin
1637 (method url-fetch)
1638 (uri (string-append
1639 "http://couger.oit.duke.edu/static/assets/COUGER"
1640 version ".zip"))
1641 (sha256
1642 (base32
1643 "04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq"))))
1644 (build-system gnu-build-system)
1645 (arguments
1646 `(#:tests? #f
1647 #:phases
1648 (modify-phases %standard-phases
1649 (delete 'configure)
1650 (delete 'build)
1651 (replace
1652 'install
1653 (lambda* (#:key outputs #:allow-other-keys)
1654 (let ((out (assoc-ref outputs "out")))
1655 (copy-recursively "src" (string-append out "/src"))
1656 (mkdir (string-append out "/bin"))
1657 ;; Add "src" directory to module lookup path.
1658 (substitute* "couger"
1659 (("from argparse")
1660 (string-append "import sys\nsys.path.append(\""
1661 out "\")\nfrom argparse")))
1662 (copy-file "couger" (string-append out "/bin/couger")))
1663 #t))
1664 (add-after
1665 'install 'wrap-program
1666 (lambda* (#:key inputs outputs #:allow-other-keys)
1667 ;; Make sure 'couger' runs with the correct PYTHONPATH.
1668 (let* ((out (assoc-ref outputs "out"))
1669 (path (getenv "PYTHONPATH")))
1670 (wrap-program (string-append out "/bin/couger")
1671 `("PYTHONPATH" ":" prefix (,path))))
1672 #t)))))
1673 (inputs
1674 `(("python" ,python-2)
1675 ("python2-pillow" ,python2-pillow)
1676 ("python2-numpy" ,python2-numpy)
1677 ("python2-scipy" ,python2-scipy)
1678 ("python2-matplotlib" ,python2-matplotlib)))
1679 (propagated-inputs
1680 `(("r" ,r)
1681 ("libsvm" ,libsvm)
1682 ("randomjungle" ,randomjungle)))
1683 (native-inputs
1684 `(("unzip" ,unzip)))
1685 (home-page "http://couger.oit.duke.edu")
1686 (synopsis "Identify co-factors in sets of genomic regions")
1687 (description
1688 "COUGER can be applied to any two sets of genomic regions bound by
1689paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify
1690putative co-factors that provide specificity to each TF. The framework
1691determines the genomic targets uniquely-bound by each TF, and identifies a
1692small set of co-factors that best explain the in vivo binding differences
1693between the two TFs.
1694
1695COUGER uses classification algorithms (support vector machines and random
1696forests) with features that reflect the DNA binding specificities of putative
1697co-factors. The features are generated either from high-throughput TF-DNA
1698binding data (from protein binding microarray experiments), or from large
1699collections of DNA motifs.")
1700 (license license:gpl3+)))
1701
bfe3c685
RW
1702(define-public clustal-omega
1703 (package
1704 (name "clustal-omega")
1705 (version "1.2.1")
1706 (source (origin
1707 (method url-fetch)
1708 (uri (string-append
1709 "http://www.clustal.org/omega/clustal-omega-"
1710 version ".tar.gz"))
1711 (sha256
1712 (base32
1713 "02ibkx0m0iwz8nscg998bh41gg251y56cgh86bvyrii5m8kjgwqf"))))
1714 (build-system gnu-build-system)
1715 (inputs
1716 `(("argtable" ,argtable)))
1717 (home-page "http://www.clustal.org/omega/")
1718 (synopsis "Multiple sequence aligner for protein and DNA/RNA")
1719 (description
1720 "Clustal-Omega is a general purpose multiple sequence alignment (MSA)
1721program for protein and DNA/RNA. It produces high quality MSAs and is capable
1722of handling data-sets of hundreds of thousands of sequences in reasonable
1723time.")
1724 (license license:gpl2+)))
1725
191c7101
RW
1726(define-public crossmap
1727 (package
1728 (name "crossmap")
61d5fd03 1729 (version "0.2.1")
191c7101
RW
1730 (source (origin
1731 (method url-fetch)
1732 (uri (string-append "mirror://sourceforge/crossmap/CrossMap-"
1733 version ".tar.gz"))
1734 (sha256
1735 (base32
61d5fd03
RW
1736 "07y179f63d7qnzdvkqcziwk9bs3k4zhp81q392fp1hwszjdvy22f"))
1737 ;; This patch has been sent upstream already and is available
1738 ;; for download from Sourceforge, but it has not been merged.
fc1adab1 1739 (patches (search-patches "crossmap-allow-system-pysam.patch"))
191c7101
RW
1740 (modules '((guix build utils)))
1741 ;; remove bundled copy of pysam
1742 (snippet
1743 '(delete-file-recursively "lib/pysam"))))
1744 (build-system python-build-system)
1745 (arguments
1746 `(#:python ,python-2
1747 #:phases
1748 (alist-cons-after
1749 'unpack 'set-env
1750 (lambda _ (setenv "CROSSMAP_USE_SYSTEM_PYSAM" "1"))
1751 %standard-phases)))
1752 (inputs
1753 `(("python-numpy" ,python2-numpy)
1754 ("python-pysam" ,python2-pysam)
1755 ("zlib" ,zlib)))
1756 (native-inputs
1757 `(("python-cython" ,python2-cython)
1758 ("python-nose" ,python2-nose)
1759 ("python-setuptools" ,python2-setuptools)))
1760 (home-page "http://crossmap.sourceforge.net/")
1761 (synopsis "Convert genome coordinates between assemblies")
1762 (description
1763 "CrossMap is a program for conversion of genome coordinates or annotation
1764files between different genome assemblies. It supports most commonly used
1765file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
1766 (license license:gpl2+)))
1767
3a40a92c
RW
1768(define-public cufflinks
1769 (package
1770 (name "cufflinks")
1771 (version "2.2.1")
1772 (source (origin
1773 (method url-fetch)
1774 (uri (string-append "http://cole-trapnell-lab.github.io/"
1775 "cufflinks/assets/downloads/cufflinks-"
1776 version ".tar.gz"))
1777 (sha256
1778 (base32
1779 "1bnm10p8m7zq4qiipjhjqb24csiqdm1pwc8c795z253r2xk6ncg8"))))
1780 (build-system gnu-build-system)
1781 (arguments
1782 `(#:make-flags
1783 (list
1784 ;; The includes for "eigen" are located in a subdirectory.
1785 (string-append "EIGEN_CPPFLAGS="
1786 "-I" (assoc-ref %build-inputs "eigen")
1787 "/include/eigen3/")
1788 ;; Cufflinks must be linked with various boost libraries.
1789 (string-append "LDFLAGS="
1790 (string-join '("-lboost_system"
1791 "-lboost_serialization"
1792 "-lboost_thread"))))
1793 #:phases
1794 (modify-phases %standard-phases
1795 (add-after 'unpack 'fix-search-for-bam
1796 (lambda _
1797 (substitute* '("ax_bam.m4"
1798 "configure"
1799 "src/hits.h")
1800 (("<bam/sam\\.h>") "<samtools/sam.h>")
1801 (("<bam/bam\\.h>") "<samtools/bam.h>")
1802 (("<bam/version\\.hpp>") "<samtools/version.h>"))
1803 #t)))
1804 #:configure-flags
1805 (list (string-append "--with-bam="
1806 (assoc-ref %build-inputs "samtools")))))
1807 (inputs
1808 `(("eigen" ,eigen)
1809 ("samtools" ,samtools-0.1)
1810 ("htslib" ,htslib)
1811 ("boost" ,boost)
1812 ("python" ,python-2)
1813 ("zlib" ,zlib)))
1814 (home-page "http://cole-trapnell-lab.github.io/cufflinks/")
1815 (synopsis "Transcriptome assembly and RNA-Seq expression analysis")
1816 (description
1817 "Cufflinks assembles RNA transcripts, estimates their abundances,
1818and tests for differential expression and regulation in RNA-Seq
1819samples. It accepts aligned RNA-Seq reads and assembles the
1820alignments into a parsimonious set of transcripts. Cufflinks then
1821estimates the relative abundances of these transcripts based on how
1822many reads support each one, taking into account biases in library
1823preparation protocols.")
1824 (license license:boost1.0)))
1825
8e913213
RW
1826(define-public cutadapt
1827 (package
1828 (name "cutadapt")
1829 (version "1.8")
1830 (source (origin
1831 (method url-fetch)
1832 (uri (string-append
1833 "https://github.com/marcelm/cutadapt/archive/v"
1834 version ".tar.gz"))
1835 (file-name (string-append name "-" version ".tar.gz"))
1836 (sha256
1837 (base32
1838 "161bp87y6gd6r5bmvjpn2b1k942i3fizfpa139f0jn6jv1wcp5h5"))))
1839 (build-system python-build-system)
1840 (arguments
1841 ;; tests must be run after install
1842 `(#:phases (alist-cons-after
1843 'install 'check
1844 (lambda* (#:key inputs outputs #:allow-other-keys)
1845 (setenv "PYTHONPATH"
1846 (string-append
1847 (getenv "PYTHONPATH")
1848 ":" (assoc-ref outputs "out")
1849 "/lib/python"
1850 (string-take (string-take-right
1851 (assoc-ref inputs "python") 5) 3)
1852 "/site-packages"))
1853 (zero? (system* "nosetests" "-P" "tests")))
1854 (alist-delete 'check %standard-phases))))
1855 (native-inputs
1856 `(("python-cython" ,python-cython)
1857 ("python-nose" ,python-nose)
1858 ("python-setuptools" ,python-setuptools)))
1859 (home-page "https://code.google.com/p/cutadapt/")
1860 (synopsis "Remove adapter sequences from nucleotide sequencing reads")
1861 (description
1862 "Cutadapt finds and removes adapter sequences, primers, poly-A tails and
1863other types of unwanted sequence from high-throughput sequencing reads.")
1864 (license license:expat)))
1865
1baee943
RW
1866(define-public libbigwig
1867 (package
1868 (name "libbigwig")
1869 (version "0.1.4")
1870 (source (origin
1871 (method url-fetch)
1872 (uri (string-append "https://github.com/dpryan79/libBigWig/"
1873 "archive/" version ".tar.gz"))
1874 (file-name (string-append name "-" version ".tar.gz"))
1875 (sha256
1876 (base32
1877 "098rjh35pi4a9q83n8wiwvyzykjqj6l8q189p1xgfw4ghywdlvw1"))))
1878 (build-system gnu-build-system)
1879 (arguments
1880 `(#:test-target "test"
1881 #:make-flags
1882 (list "CC=gcc"
1883 (string-append "prefix=" (assoc-ref %outputs "out")))
1884 #:phases
1885 (modify-phases %standard-phases
1886 (delete 'configure)
1887 (add-before 'check 'disable-curl-test
1888 (lambda _
1889 (substitute* "Makefile"
1890 (("./test/testRemote.*") ""))
1891 #t))
1892 ;; This has been fixed with the upstream commit 4ff6959cd8a0, but
1893 ;; there has not yet been a release containing this change.
1894 (add-before 'install 'create-target-dirs
1895 (lambda* (#:key outputs #:allow-other-keys)
1896 (let ((out (assoc-ref outputs "out")))
1897 (mkdir-p (string-append out "/lib"))
1898 (mkdir-p (string-append out "/include"))
1899 #t))))))
1900 (inputs
1901 `(("zlib" ,zlib)
1902 ("curl" ,curl)))
1903 (native-inputs
1904 `(("doxygen" ,doxygen)))
1905 (home-page "https://github.com/dpryan79/libBigWig")
1906 (synopsis "C library for handling bigWig files")
1907 (description
1908 "This package provides a C library for parsing local and remote BigWig
1909files.")
1910 (license license:expat)))
1911
69e0e03c
RW
1912(define-public python-pybigwig
1913 (package
1914 (name "python-pybigwig")
1915 (version "0.2.5")
1916 (source (origin
1917 (method url-fetch)
1918 (uri (pypi-uri "pyBigWig" version))
1919 (sha256
1920 (base32
1921 "0yrpdxg3y0sny25x4w22lv1k47jzccqjmg7j4bp0hywklvp0hg7d"))
1922 (modules '((guix build utils)))
1923 (snippet
1924 '(begin
1925 ;; Delete bundled libBigWig sources
1926 (delete-file-recursively "libBigWig")))))
1927 (build-system python-build-system)
1928 (arguments
1929 `(#:phases
1930 (modify-phases %standard-phases
1931 (add-after 'unpack 'link-with-libBigWig
1932 (lambda* (#:key inputs #:allow-other-keys)
1933 (substitute* "setup.py"
1934 (("libs=\\[") "libs=[\"BigWig\", "))
1935 #t)))))
1936 (inputs
1937 `(("libbigwig" ,libbigwig)
1938 ("zlib" ,zlib)
1939 ("curl" ,curl)))
1940 (home-page "https://github.com/dpryan79/pyBigWig")
1941 (synopsis "Access bigWig files in Python using libBigWig")
1942 (description
1943 "This package provides Python bindings to the libBigWig library for
1944accessing bigWig files.")
1945 (license license:expat)))
1946
1947(define-public python2-pybigwig
1948 (let ((pybigwig (package-with-python2 python-pybigwig)))
1949 (package (inherit pybigwig)
1950 (native-inputs
1951 `(("python-setuptools" ,python2-setuptools))))))
1952
ec2a67de
BW
1953(define-public python-dendropy
1954 (package
1955 (name "python-dendropy")
1956 (version "4.1.0")
1957 (source
1958 (origin
1959 (method url-fetch)
1960 (uri (pypi-uri "DendroPy" version))
1961 (sha256
1962 (base32
1963 "1jfz7gp18wph311w1yygbvjanb3n5mdqal439bb6myw41dwb5m63"))
1964 ;; There are two known test failures that will be fixed in the next
1965 ;; release after 4.1.0.
1966 ;; https://github.com/jeetsukumaran/DendroPy/issues/48
1967 (patches (search-patches
1968 "python-dendropy-exclude-failing-tests.patch"))))
1969 (build-system python-build-system)
1970 (home-page "http://packages.python.org/DendroPy/")
1971 (synopsis "Library for phylogenetics and phylogenetic computing")
1972 (description
1973 "DendroPy is a library for phylogenetics and phylogenetic computing: reading,
1974writing, simulation, processing and manipulation of phylogenetic
1975trees (phylogenies) and characters.")
1976 (license license:bsd-3)
1977 (properties `((python2-variant . ,(delay python2-dendropy))))))
1978
1979(define-public python2-dendropy
1980 (let ((base (package-with-python2 (strip-python2-variant python-dendropy))))
1981 (package
1982 (inherit base)
9602e3cc
BW
1983 ;; Do not use same source as 'python-dendropy' because the patched
1984 ;; failing tests do not occur on Python 2.
1985 (source
1986 (origin
1987 (method url-fetch)
1988 (uri (pypi-uri "DendroPy" (package-version base)))
1989 (sha256
1990 (base32
1991 "1jfz7gp18wph311w1yygbvjanb3n5mdqal439bb6myw41dwb5m63"))))
1992 (arguments
1993 `(#:python ,python-2
1994 #:phases
1995 (modify-phases %standard-phases
1996 (replace 'check
1997 ;; There is currently a test failure that only happens on some
1998 ;; systems, and only using "setup.py test"
1999 (lambda _ (zero? (system* "nosetests")))))))
ec2a67de 2000 (native-inputs `(("python2-setuptools" ,python2-setuptools)
9602e3cc 2001 ("python2-nose" ,python2-nose)
ec2a67de
BW
2002 ,@(package-native-inputs base))))))
2003
2004
1921b1de
RW
2005(define-public deeptools
2006 (package
2007 (name "deeptools")
3acb8c85 2008 (version "2.1.1")
1921b1de
RW
2009 (source (origin
2010 (method url-fetch)
3acb8c85
RW
2011 (uri (string-append "https://github.com/fidelram/deepTools/"
2012 "archive/" version ".tar.gz"))
1921b1de
RW
2013 (file-name (string-append name "-" version ".tar.gz"))
2014 (sha256
2015 (base32
3acb8c85 2016 "1nmfin0zjdby3vay3r4flvz94dr6qjhj41ax4yz3vx13j6wz8izd"))))
1921b1de
RW
2017 (build-system python-build-system)
2018 (arguments
2019 `(#:python ,python-2))
14bda1ff 2020 (inputs
1921b1de
RW
2021 `(("python-scipy" ,python2-scipy)
2022 ("python-numpy" ,python2-numpy)
3acb8c85 2023 ("python-numpydoc" ,python2-numpydoc)
1921b1de
RW
2024 ("python-matplotlib" ,python2-matplotlib)
2025 ("python-bx-python" ,python2-bx-python)
3acb8c85
RW
2026 ("python-pysam" ,python2-pysam)
2027 ("python-pybigwig" ,python2-pybigwig)))
1921b1de
RW
2028 (native-inputs
2029 `(("python-mock" ,python2-mock) ;for tests
2030 ("python-pytz" ,python2-pytz) ;for tests
2031 ("python-setuptools" ,python2-setuptools)))
2032 (home-page "https://github.com/fidelram/deepTools")
2033 (synopsis "Tools for normalizing and visualizing deep-sequencing data")
2034 (description
2035 "DeepTools addresses the challenge of handling the large amounts of data
2036that are now routinely generated from DNA sequencing centers. To do so,
2037deepTools contains useful modules to process the mapped reads data to create
2038coverage files in standard bedGraph and bigWig file formats. By doing so,
2039deepTools allows the creation of normalized coverage files or the comparison
2040between two files (for example, treatment and control). Finally, using such
2041normalized and standardized files, multiple visualizations can be created to
2042identify enrichments with functional annotations of the genome.")
2043 (license license:gpl3+)))
2044
684bf7c7
BW
2045(define-public diamond
2046 (package
2047 (name "diamond")
8e5f8c98 2048 (version "0.8.27")
684bf7c7
BW
2049 (source (origin
2050 (method url-fetch)
2051 (uri (string-append
2052 "https://github.com/bbuchfink/diamond/archive/v"
2053 version ".tar.gz"))
2054 (file-name (string-append name "-" version ".tar.gz"))
2055 (sha256
2056 (base32
8e5f8c98 2057 "0g0zdyfnri9v7nfbh8f7zqs4af1xydqkiw8m0cx4jc2ql4chpf6a"))))
122395f9 2058 (build-system cmake-build-system)
684bf7c7 2059 (arguments
7c544991
BW
2060 '(#:tests? #f ; no "check" target
2061 #:phases
2062 (modify-phases %standard-phases
2063 (add-after 'unpack 'remove-native-compilation
2064 (lambda _
2065 (substitute* "CMakeLists.txt" (("-march=native") ""))
2066 #t)))))
684bf7c7 2067 (inputs
122395f9 2068 `(("zlib" ,zlib)))
684bf7c7
BW
2069 (home-page "https://github.com/bbuchfink/diamond")
2070 (synopsis "Accelerated BLAST compatible local sequence aligner")
2071 (description
2072 "DIAMOND is a BLAST-compatible local aligner for mapping protein and
2073translated DNA query sequences against a protein reference database (BLASTP
2074and BLASTX alignment mode). The speedup over BLAST is up to 20,000 on short
2075reads at a typical sensitivity of 90-99% relative to BLAST depending on the
2076data and settings.")
d9c44e9c
BW
2077 ;; diamond fails to build on other platforms
2078 ;; https://github.com/bbuchfink/diamond/issues/18
2079 (supported-systems '("x86_64-linux"))
684bf7c7
BW
2080 (license (license:non-copyleft "file://src/COPYING"
2081 "See src/COPYING in the distribution."))))
2082
97b9da68
RW
2083(define-public discrover
2084 (package
2085 (name "discrover")
2086 (version "1.6.0")
2087 (source
2088 (origin
2089 (method url-fetch)
2090 (uri (string-append "https://github.com/maaskola/discrover/archive/"
2091 version ".tar.gz"))
2092 (file-name (string-append name "-" version ".tar.gz"))
2093 (sha256
2094 (base32
2095 "0rah9ja4m0rl5mldd6vag9rwrivw1zrqxssfq8qx64m7961fp68k"))))
2096 (build-system cmake-build-system)
2097 (arguments `(#:tests? #f)) ; there are no tests
2098 (inputs
2099 `(("boost" ,boost)
2100 ("cairo" ,cairo)))
2101 (native-inputs
2102 `(("texlive" ,texlive)
2103 ("imagemagick" ,imagemagick)))
2104 (home-page "http://dorina.mdc-berlin.de/public/rajewsky/discrover/")
2105 (synopsis "Discover discriminative nucleotide sequence motifs")
2106 (description "Discrover is a motif discovery method to find binding sites
2107of nucleic acid binding proteins.")
2108 (license license:gpl3+)))
2109
6619f9c7
RW
2110(define-public eigensoft
2111 (let ((revision "1")
2112 (commit "b14d1e202e21e532536ff8004f0419cd5e259dc7"))
2113 (package
2114 (name "eigensoft")
2115 (version (string-append "6.1.2-"
2116 revision "."
2117 (string-take commit 9)))
2118 (source
2119 (origin
2120 (method git-fetch)
2121 (uri (git-reference
2122 (url "https://github.com/DReichLab/EIG.git")
2123 (commit commit)))
2124 (file-name (string-append "eigensoft-" commit "-checkout"))
2125 (sha256
2126 (base32
2127 "0f5m6k2j5c16xc3xbywcs989xyc26ncy1zfzp9j9n55n9r4xcaiq"))
2128 (modules '((guix build utils)))
2129 ;; Remove pre-built binaries.
2130 (snippet '(begin
2131 (delete-file-recursively "bin")
2132 (mkdir "bin")
2133 #t))))
2134 (build-system gnu-build-system)
2135 (arguments
2136 `(#:tests? #f ; There are no tests.
2137 #:make-flags '("CC=gcc")
2138 #:phases
2139 (modify-phases %standard-phases
2140 ;; There is no configure phase, but the Makefile is in a
2141 ;; sub-directory.
2142 (replace 'configure
2143 (lambda _
2144 (chdir "src")
2145 ;; The link flags are incomplete.
2146 (substitute* "Makefile"
2147 (("-lgsl") "-lgsl -lm -llapack -llapacke -lpthread"))
2148 #t))
2149 ;; The provided install target only copies executables to
2150 ;; the "bin" directory in the build root.
2151 (add-after 'install 'actually-install
2152 (lambda* (#:key outputs #:allow-other-keys)
2153 (let* ((out (assoc-ref outputs "out"))
2154 (bin (string-append out "/bin")))
6619f9c7
RW
2155 (for-each (lambda (file)
2156 (install-file file bin))
2157 (find-files "../bin" ".*"))
2158 #t))))))
2159 (inputs
2160 `(("gsl" ,gsl)
2161 ("lapack" ,lapack)
6619f9c7
RW
2162 ("openblas" ,openblas)
2163 ("perl" ,perl)
2164 ("gfortran" ,gfortran "lib")))
2165 (home-page "https://github.com/DReichLab/EIG")
2166 (synopsis "Tools for population genetics")
2167 (description "The EIGENSOFT package provides tools for population
2168genetics and stratification correction. EIGENSOFT implements methods commonly
2169used in population genetics analyses such as PCA, computation of Tracy-Widom
2170statistics, and finding related individuals in structured populations. It
2171comes with a built-in plotting script and supports multiple file formats and
2172quantitative phenotypes.")
2173 ;; The license of the eigensoft tools is Expat, but since it's
2174 ;; linking with the GNU Scientific Library (GSL) the effective
2175 ;; license is the GPL.
2176 (license license:gpl3+))))
2177
365c8153
RW
2178(define-public edirect
2179 (package
2180 (name "edirect")
83b84fa8 2181 (version "4.10")
365c8153
RW
2182 (source (origin
2183 (method url-fetch)
83b84fa8
RW
2184 (uri (string-append "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/"
2185 "versions/2016-05-03/edirect.tar.gz"))
365c8153
RW
2186 (sha256
2187 (base32
83b84fa8 2188 "15zsprak5yh8c1yrz4r1knmb5s8qcmdid4xdhkh3lqcv64l60hli"))))
365c8153
RW
2189 (build-system perl-build-system)
2190 (arguments
2191 `(#:tests? #f ;no "check" target
2192 #:phases
2193 (modify-phases %standard-phases
2194 (delete 'configure)
2195 (delete 'build)
2196 (replace 'install
2197 (lambda* (#:key outputs #:allow-other-keys)
2198 (let ((target (string-append (assoc-ref outputs "out")
2199 "/bin")))
2200 (mkdir-p target)
2201 (copy-file "edirect.pl"
2202 (string-append target "/edirect.pl"))
2203 #t)))
2204 (add-after
2205 'install 'wrap-program
2206 (lambda* (#:key inputs outputs #:allow-other-keys)
2207 ;; Make sure 'edirect.pl' finds all perl inputs at runtime.
2208 (let* ((out (assoc-ref outputs "out"))
2209 (path (getenv "PERL5LIB")))
2210 (wrap-program (string-append out "/bin/edirect.pl")
2211 `("PERL5LIB" ":" prefix (,path)))))))))
2212 (inputs
2213 `(("perl-html-parser" ,perl-html-parser)
2214 ("perl-encode-locale" ,perl-encode-locale)
2215 ("perl-file-listing" ,perl-file-listing)
2216 ("perl-html-tagset" ,perl-html-tagset)
2217 ("perl-html-tree" ,perl-html-tree)
2218 ("perl-http-cookies" ,perl-http-cookies)
2219 ("perl-http-date" ,perl-http-date)
2220 ("perl-http-message" ,perl-http-message)
2221 ("perl-http-negotiate" ,perl-http-negotiate)
2222 ("perl-lwp-mediatypes" ,perl-lwp-mediatypes)
2223 ("perl-lwp-protocol-https" ,perl-lwp-protocol-https)
2224 ("perl-net-http" ,perl-net-http)
2225 ("perl-uri" ,perl-uri)
2226 ("perl-www-robotrules" ,perl-www-robotrules)
2227 ("perl" ,perl)))
3d51ec91 2228 (home-page "http://www.ncbi.nlm.nih.gov/books/NBK179288/")
365c8153
RW
2229 (synopsis "Tools for accessing the NCBI's set of databases")
2230 (description
2231 "Entrez Direct (EDirect) is a method for accessing the National Center
2232for Biotechnology Information's (NCBI) set of interconnected
2233databases (publication, sequence, structure, gene, variation, expression,
2234etc.) from a terminal. Functions take search terms from command-line
2235arguments. Individual operations are combined to build multi-step queries.
2236Record retrieval and formatting normally complete the process.
2237
2238EDirect also provides an argument-driven function that simplifies the
2239extraction of data from document summaries or other results that are returned
2240in structured XML format. This can eliminate the need for writing custom
2241software to answer ad hoc questions.")
2242 (license license:public-domain)))
2243
b16728b0
BW
2244(define-public exonerate
2245 (package
2246 (name "exonerate")
2247 (version "2.4.0")
2248 (source
2249 (origin
2250 (method url-fetch)
2251 (uri
2252 (string-append
2253 "http://ftp.ebi.ac.uk/pub/software/vertebrategenomics/exonerate/"
2254 "exonerate-" version ".tar.gz"))
2255 (sha256
2256 (base32
2257 "0hj0m9xygiqsdxvbg79wq579kbrx1mdrabi2bzqz2zn9qwfjcjgq"))))
2258 (build-system gnu-build-system)
2259 (arguments
2260 `(#:parallel-build? #f)) ; Building in parallel fails on some machines.
2261 (native-inputs
2262 `(("pkg-config" ,pkg-config)))
2263 (inputs
2264 `(("glib" ,glib)))
2265 (home-page
2266 "https://www.ebi.ac.uk/about/vertebrate-genomics/software/exonerate")
2267 (synopsis "Generic tool for biological sequence alignment")
2268 (description
2269 "Exonerate is a generic tool for pairwise sequence comparison. It allows
2270the alignment of sequences using a many alignment models, either exhaustive
2271dynamic programming or a variety of heuristics.")
2272 (license license:gpl3)))
2273
e4e5a4d8
RW
2274(define-public express
2275 (package
2276 (name "express")
2277 (version "1.5.1")
2278 (source (origin
2279 (method url-fetch)
2280 (uri
2281 (string-append
2282 "http://bio.math.berkeley.edu/eXpress/downloads/express-"
2283 version "/express-" version "-src.tgz"))
2284 (sha256
2285 (base32
2286 "03rczxd0gjp2l1jxcmjfmf5j94j77zqyxa6x063zsc585nj40n0c"))))
2287 (build-system cmake-build-system)
2288 (arguments
2289 `(#:tests? #f ;no "check" target
2290 #:phases
2291 (alist-cons-after
2292 'unpack 'use-shared-boost-libs-and-set-bamtools-paths
2293 (lambda* (#:key inputs #:allow-other-keys)
2294 (substitute* "CMakeLists.txt"
2295 (("set\\(Boost_USE_STATIC_LIBS ON\\)")
2296 "set(Boost_USE_STATIC_LIBS OFF)")
2297 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
2298 (string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
2299 (substitute* "src/CMakeLists.txt"
2300 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
2301 (string-append (assoc-ref inputs "bamtools") "/lib/bamtools")))
2302 #t)
2303 %standard-phases)))
2304 (inputs
2305 `(("boost" ,boost)
2306 ("bamtools" ,bamtools)
2307 ("protobuf" ,protobuf)
2308 ("zlib" ,zlib)))
2309 (home-page "http://bio.math.berkeley.edu/eXpress")
2310 (synopsis "Streaming quantification for high-throughput genomic sequencing")
2311 (description
2312 "eXpress is a streaming tool for quantifying the abundances of a set of
2313target sequences from sampled subsequences. Example applications include
2314transcript-level RNA-Seq quantification, allele-specific/haplotype expression
2315analysis (from RNA-Seq), transcription factor binding quantification in
2316ChIP-Seq, and analysis of metagenomic data.")
2317 (license license:artistic2.0)))
2318
f3674b1c
BW
2319(define-public express-beta-diversity
2320 (package
2321 (name "express-beta-diversity")
2322 (version "1.0.7")
2323 (source (origin
2324 (method url-fetch)
2325 (uri
2326 (string-append
2327 "https://github.com/dparks1134/ExpressBetaDiversity/archive/v"
2328 version ".tar.gz"))
2329 (file-name (string-append name "-" version ".tar.gz"))
2330 (sha256
2331 (base32
2332 "1djvdlmqvjf6h0zq7w36y8cl5cli6rgj86x65znl48agnwmzxfxr"))))
2333 (build-system gnu-build-system)
2334 (arguments
2335 `(#:phases
2336 (modify-phases %standard-phases
2337 (delete 'configure)
2338 (add-before 'build 'enter-source (lambda _ (chdir "source") #t))
2339 (replace 'check
2340 (lambda _ (zero? (system* "../bin/ExpressBetaDiversity"
2341 "-u"))))
2342 (add-after 'check 'exit-source (lambda _ (chdir "..") #t))
2343 (replace 'install
2344 (lambda* (#:key outputs #:allow-other-keys)
2345 (let ((bin (string-append (assoc-ref outputs "out")
2346 "/bin")))
2347 (mkdir-p bin)
2348 (copy-file "scripts/convertToEBD.py"
2349 (string-append bin "/convertToEBD.py"))
2350 (copy-file "bin/ExpressBetaDiversity"
2351 (string-append bin "/ExpressBetaDiversity"))
2352 #t))))))
2353 (inputs
2354 `(("python" ,python-2)))
2355 (home-page "http://kiwi.cs.dal.ca/Software/ExpressBetaDiversity")
2356 (synopsis "Taxon- and phylogenetic-based beta diversity measures")
2357 (description
2358 "Express Beta Diversity (EBD) calculates ecological beta diversity
2359(dissimilarity) measures between biological communities. EBD implements a
2360variety of diversity measures including those that make use of phylogenetic
2361similarity of community members.")
2362 (license license:gpl3+)))
2363
12b04cbe
BW
2364(define-public fasttree
2365 (package
2366 (name "fasttree")
e03a5153 2367 (version "2.1.9")
12b04cbe
BW
2368 (source (origin
2369 (method url-fetch)
2370 (uri (string-append
2371 "http://www.microbesonline.org/fasttree/FastTree-"
2372 version ".c"))
2373 (sha256
2374 (base32
e03a5153 2375 "0ljvvw8i1als1wbfzvrf15c3ii2vw9db20a259g6pzg34xyyb97k"))))
12b04cbe
BW
2376 (build-system gnu-build-system)
2377 (arguments
2378 `(#:tests? #f ; no "check" target
2379 #:phases
2380 (modify-phases %standard-phases
2381 (delete 'unpack)
2382 (delete 'configure)
2383 (replace 'build
e03a5153
BW
2384 (lambda* (#:key source #:allow-other-keys)
2385 (and (zero? (system* "gcc"
2386 "-O3"
2387 "-finline-functions"
2388 "-funroll-loops"
2389 "-Wall"
2390 "-o"
2391 "FastTree"
2392 source
2393 "-lm"))
2394 (zero? (system* "gcc"
2395 "-DOPENMP"
2396 "-fopenmp"
2397 "-O3"
2398 "-finline-functions"
2399 "-funroll-loops"
2400 "-Wall"
2401 "-o"
2402 "FastTreeMP"
2403 source
2404 "-lm")))))
12b04cbe 2405 (replace 'install
e03a5153
BW
2406 (lambda* (#:key outputs #:allow-other-keys)
2407 (let ((bin (string-append (assoc-ref outputs "out")
2408 "/bin")))
2409 (mkdir-p bin)
2410 (copy-file "FastTree"
2411 (string-append bin "/FastTree"))
2412 (copy-file "FastTreeMP"
2413 (string-append bin "/FastTreeMP"))
2414 #t))))))
12b04cbe
BW
2415 (home-page "http://www.microbesonline.org/fasttree")
2416 (synopsis "Infers approximately-maximum-likelihood phylogenetic trees")
2417 (description
2418 "FastTree can handle alignments with up to a million of sequences in a
2419reasonable amount of time and memory. For large alignments, FastTree is
2420100-1,000 times faster than PhyML 3.0 or RAxML 7.")
2421 (license license:gpl2+)))
2422
2127cedb
RW
2423(define-public fastx-toolkit
2424 (package
2425 (name "fastx-toolkit")
2426 (version "0.0.14")
2427 (source (origin
2428 (method url-fetch)
2429 (uri
2430 (string-append
2431 "https://github.com/agordon/fastx_toolkit/releases/download/"
2432 version "/fastx_toolkit-" version ".tar.bz2"))
2433 (sha256
2434 (base32
2435 "01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy"))))
2436 (build-system gnu-build-system)
2437 (inputs
2438 `(("libgtextutils" ,libgtextutils)))
2439 (native-inputs
2440 `(("pkg-config" ,pkg-config)))
2441 (home-page "http://hannonlab.cshl.edu/fastx_toolkit/")
2442 (synopsis "Tools for FASTA/FASTQ file preprocessing")
2443 (description
2444 "The FASTX-Toolkit is a collection of command line tools for Short-Reads
2445FASTA/FASTQ files preprocessing.
2446
2447Next-Generation sequencing machines usually produce FASTA or FASTQ files,
2448containing multiple short-reads sequences. The main processing of such
2449FASTA/FASTQ files is mapping the sequences to reference genomes. However, it
2450is sometimes more productive to preprocess the files before mapping the
2451sequences to the genome---manipulating the sequences to produce better mapping
2452results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
2453 (license license:agpl3+)))
2454
d7678942
RW
2455(define-public flexbar
2456 (package
2457 (name "flexbar")
2458 (version "2.5")
2459 (source (origin
2460 (method url-fetch)
2461 (uri
2462 (string-append "mirror://sourceforge/flexbar/"
2463 version "/flexbar_v" version "_src.tgz"))
2464 (sha256
2465 (base32
2466 "13jaykc3y1x8y5nn9j8ljnb79s5y51kyxz46hdmvvjj6qhyympmf"))))
2467 (build-system cmake-build-system)
2468 (arguments
4ca009c0 2469 `(#:configure-flags (list
d7678942
RW
2470 (string-append "-DFLEXBAR_BINARY_DIR="
2471 (assoc-ref %outputs "out")
2472 "/bin/"))
2473 #:phases
4ca009c0
RW
2474 (alist-replace
2475 'check
2476 (lambda* (#:key outputs #:allow-other-keys)
2477 (setenv "PATH" (string-append
2478 (assoc-ref outputs "out") "/bin:"
2479 (getenv "PATH")))
2480 (chdir "../flexbar_v2.5_src/test")
2481 (zero? (system* "bash" "flexbar_validate.sh")))
2482 (alist-delete 'install %standard-phases))))
d7678942
RW
2483 (inputs
2484 `(("tbb" ,tbb)
2485 ("zlib" ,zlib)))
2486 (native-inputs
2487 `(("pkg-config" ,pkg-config)
2488 ("seqan" ,seqan)))
2489 (home-page "http://flexbar.sourceforge.net")
2490 (synopsis "Barcode and adapter removal tool for sequencing platforms")
2491 (description
2492 "Flexbar preprocesses high-throughput nucleotide sequencing data
2493efficiently. It demultiplexes barcoded runs and removes adapter sequences.
2494Moreover, trimming and filtering features are provided. Flexbar increases
2495read mapping rates and improves genome and transcriptome assemblies. It
2496supports next-generation sequencing data in fasta/q and csfasta/q format from
2497Illumina, Roche 454, and the SOLiD platform.")
2498 (license license:gpl3)))
2499
19f4554c
BW
2500(define-public fraggenescan
2501 (package
2502 (name "fraggenescan")
2503 (version "1.20")
2504 (source
2505 (origin
2506 (method url-fetch)
2507 (uri
2508 (string-append "mirror://sourceforge/fraggenescan/"
2509 "FragGeneScan" version ".tar.gz"))
2510 (sha256
2511 (base32 "1zzigqmvqvjyqv4945kv6nc5ah2xxm1nxgrlsnbzav3f5c0n0pyj"))))
2512 (build-system gnu-build-system)
2513 (arguments
2514 `(#:phases
2515 (modify-phases %standard-phases
2516 (delete 'configure)
2517 (add-before 'build 'patch-paths
2518 (lambda* (#:key outputs #:allow-other-keys)
2519 (let* ((out (string-append (assoc-ref outputs "out")))
2520 (share (string-append out "/share/fraggenescan/")))
2521 (substitute* "run_FragGeneScan.pl"
2522 (("system\\(\"rm")
2523 (string-append "system(\"" (which "rm")))
2524 (("system\\(\"mv")
2525 (string-append "system(\"" (which "mv")))
2526 ;; This script and other programs expect the training files
2527 ;; to be in the non-standard location bin/train/XXX. Change
2528 ;; this to be share/fraggenescan/train/XXX instead.
2529 (("^\\$train.file = \\$dir.*")
2530 (string-append "$train_file = \""
2531 share
2532 "train/\".$FGS_train_file;")))
2533 (substitute* "run_hmm.c"
2534 (("^ strcat\\(train_dir, \\\"train/\\\"\\);")
2535 (string-append " strcpy(train_dir, \"" share "/train/\");")))
2536 (substitute* "post_process.pl"
2537 (("^my \\$dir = substr.*")
2538 (string-append "my $dir = \"" share "\";"))))
2539 #t))
2540 (replace 'build
2541 (lambda _ (and (zero? (system* "make" "clean"))
2542 (zero? (system* "make" "fgs")))))
2543 (replace 'install
2544 (lambda* (#:key outputs #:allow-other-keys)
2545 (let* ((out (string-append (assoc-ref outputs "out")))
2546 (bin (string-append out "/bin/"))
2547 (share (string-append out "/share/fraggenescan/train")))
2548 (install-file "run_FragGeneScan.pl" bin)
2549 (install-file "FragGeneScan" bin)
2550 (install-file "FGS_gff.py" bin)
2551 (install-file "post_process.pl" bin)
2552 (copy-recursively "train" share))))
2553 (delete 'check)
2554 (add-after 'install 'post-install-check
2555 ;; In lieu of 'make check', run one of the examples and check the
2556 ;; output files gets created.
2557 (lambda* (#:key outputs #:allow-other-keys)
2558 (let* ((out (string-append (assoc-ref outputs "out")))
2559 (bin (string-append out "/bin/")))
2560 (and (zero? (system* (string-append bin "run_FragGeneScan.pl")
2561 "-genome=./example/NC_000913.fna"
2562 "-out=./test2"
2563 "-complete=1"
2564 "-train=complete"))
2565 (file-exists? "test2.faa")
2566 (file-exists? "test2.ffn")
2567 (file-exists? "test2.gff")
2568 (file-exists? "test2.out"))))))))
2569 (inputs
2570 `(("perl" ,perl)
2571 ("python" ,python-2))) ;not compatible with python 3.
2572 (home-page "https://sourceforge.net/projects/fraggenescan/")
2573 (synopsis "Finds potentially fragmented genes in short reads")
2574 (description
2575 "FragGeneScan is a program for predicting bacterial and archaeal genes in
2576short and error-prone DNA sequencing reads. It can also be applied to predict
2577genes in incomplete assemblies or complete genomes.")
2578 ;; GPL3+ according to private correspondense with the authors.
2579 (license license:gpl3+)))
2580
81f3e0c1
BW
2581(define-public fxtract
2582 (let ((util-commit "776ca85a18a47492af3794745efcb4a905113115"))
2583 (package
2584 (name "fxtract")
2585 (version "2.3")
2586 (source
2587 (origin
2588 (method url-fetch)
2589 (uri (string-append
2590 "https://github.com/ctSkennerton/fxtract/archive/"
2591 version ".tar.gz"))
2592 (file-name (string-append "ctstennerton-util-"
2593 (string-take util-commit 7)
2594 "-checkout"))
2595 (sha256
2596 (base32
2597 "0275cfdhis8517hm01is62062swmi06fxzifq7mr3knbbxjlaiwj"))))
2598 (build-system gnu-build-system)
2599 (arguments
2600 `(#:make-flags (list
2601 (string-append "PREFIX=" (assoc-ref %outputs "out"))
2602 "CC=gcc")
2603 #:test-target "fxtract_test"
2604 #:phases
2605 (modify-phases %standard-phases
2606 (delete 'configure)
2607 (add-before 'build 'copy-util
2608 (lambda* (#:key inputs #:allow-other-keys)
2609 (rmdir "util")
2610 (copy-recursively (assoc-ref inputs "ctskennerton-util") "util")
2611 #t))
2612 ;; Do not use make install as this requires additional dependencies.
2613 (replace 'install
2614 (lambda* (#:key outputs #:allow-other-keys)
2615 (let* ((out (assoc-ref outputs "out"))
2616 (bin (string-append out"/bin")))
2617 (install-file "fxtract" bin)
2618 #t))))))
2619 (inputs
2620 `(("pcre" ,pcre)
2621 ("zlib" ,zlib)))
2622 (native-inputs
2623 ;; ctskennerton-util is licensed under GPL2.
2624 `(("ctskennerton-util"
2625 ,(origin
2626 (method git-fetch)
2627 (uri (git-reference
2628 (url "https://github.com/ctSkennerton/util.git")
2629 (commit util-commit)))
2630 (file-name (string-append
2631 "ctstennerton-util-" util-commit "-checkout"))
2632 (sha256
2633 (base32
2634 "0cls1hd4vgj3f36fpzzg4xc77d6f3hpc60cbpfmn2gdr7ykzzad7"))))))
2635 (home-page "https://github.com/ctSkennerton/fxtract")
2636 (synopsis "Extract sequences from FASTA and FASTQ files")
2637 (description
2638 "Fxtract extracts sequences from a protein or nucleotide fastx (FASTA
2639or FASTQ) file given a subsequence. It uses a simple substring search for
2640basic tasks but can change to using POSIX regular expressions, PCRE, hash
2641lookups or multi-pattern searching as required. By default fxtract looks in
2642the sequence of each record but can also be told to look in the header,
2643comment or quality sections.")
afde1a26
BW
2644 ;; 'util' requires SSE instructions.
2645 (supported-systems '("x86_64-linux"))
81f3e0c1
BW
2646 (license license:expat))))
2647
5854f685
RW
2648(define-public grit
2649 (package
2650 (name "grit")
2651 (version "2.0.2")
2652 (source (origin
2653 (method url-fetch)
2654 (uri (string-append
2655 "https://github.com/nboley/grit/archive/"
2656 version ".tar.gz"))
2657 (file-name (string-append name "-" version ".tar.gz"))
2658 (sha256
2659 (base32
2660 "157in84dj70wimbind3x7sy1whs3h57qfgcnj2s6lrd38fbrb7mj"))))
2661 (build-system python-build-system)
2662 (arguments
2663 `(#:python ,python-2
2664 #:phases
2665 (alist-cons-after
2666 'unpack 'generate-from-cython-sources
2667 (lambda* (#:key inputs outputs #:allow-other-keys)
2668 ;; Delete these C files to force fresh generation from pyx sources.
2669 (delete-file "grit/sparsify_support_fns.c")
2670 (delete-file "grit/call_peaks_support_fns.c")
2671 (substitute* "setup.py"
2672 (("Cython.Setup") "Cython.Build")
2673 ;; Add numpy include path to fix compilation
2674 (("pyx\", \\]")
2675 (string-append "pyx\", ], include_dirs = ['"
2676 (assoc-ref inputs "python-numpy")
2677 "/lib/python2.7/site-packages/numpy/core/include/"
2678 "']"))) #t)
2679 %standard-phases)))
2680 (inputs
2681 `(("python-scipy" ,python2-scipy)
2682 ("python-numpy" ,python2-numpy)
2683 ("python-pysam" ,python2-pysam)
2684 ("python-networkx" ,python2-networkx)))
2685 (native-inputs
2686 `(("python-cython" ,python2-cython)
2687 ("python-setuptools" ,python2-setuptools)))
2688 (home-page "http://grit-bio.org")
2689 (synopsis "Tool for integrative analysis of RNA-seq type assays")
2690 (description
2691 "GRIT is designed to use RNA-seq, TES, and TSS data to build and quantify
2692full length transcript models. When none of these data sources are available,
2693GRIT can be run by providing a candidate set of TES or TSS sites. In
2694addition, GRIT can merge in reference junctions and gene boundaries. GRIT can
2695also be run in quantification mode, where it uses a provided GTF file and just
2696estimates transcript expression.")
2697 (license license:gpl3+)))
2698
346a829a
RW
2699(define-public hisat
2700 (package
2701 (name "hisat")
2702 (version "0.1.4")
2703 (source (origin
2704 (method url-fetch)
2705 (uri (string-append
2706 "http://ccb.jhu.edu/software/hisat/downloads/hisat-"
2707 version "-beta-source.zip"))
2708 (sha256
2709 (base32
2710 "1k381ydranqxp09yf2y7w1d0chz5d59vb6jchi89hbb0prq19lk5"))))
2711 (build-system gnu-build-system)
2712 (arguments
e58d01fa
RW
2713 `(#:tests? #f ;no check target
2714 #:make-flags '("allall"
2715 ;; Disable unsupported `popcnt' instructions on
2716 ;; architectures other than x86_64
2717 ,@(if (string-prefix? "x86_64"
2718 (or (%current-target-system)
2719 (%current-system)))
2720 '()
2721 '("POPCNT_CAPABILITY=0")))
346a829a 2722 #:phases
da6dd842
LC
2723 (alist-cons-after
2724 'unpack 'patch-sources
2725 (lambda _
2726 ;; XXX Cannot use snippet because zip files are not supported
2727 (substitute* "Makefile"
2728 (("^CC = .*$") "CC = gcc")
2729 (("^CPP = .*$") "CPP = g++")
2730 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
2731 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
2732 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
2733 (substitute* '("hisat-build" "hisat-inspect")
2734 (("/usr/bin/env") (which "env"))))
2735 (alist-replace
2736 'install
2737 (lambda* (#:key outputs #:allow-other-keys)
84590149 2738 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
96c46210
LC
2739 (for-each (lambda (file)
2740 (install-file file bin))
2741 (find-files
2742 "."
2743 "hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))))
da6dd842 2744 (alist-delete 'configure %standard-phases)))))
346a829a
RW
2745 (native-inputs
2746 `(("unzip" ,unzip)))
2747 (inputs
2748 `(("perl" ,perl)
2749 ("python" ,python)
2750 ("zlib" ,zlib)))
60af3d82
RW
2751 ;; Non-portable SSE instructions are used so building fails on platforms
2752 ;; other than x86_64.
2753 (supported-systems '("x86_64-linux"))
346a829a
RW
2754 (home-page "http://ccb.jhu.edu/software/hisat/index.shtml")
2755 (synopsis "Hierarchical indexing for spliced alignment of transcripts")
2756 (description
2757 "HISAT is a fast and sensitive spliced alignment program for mapping
2758RNA-seq reads. In addition to one global FM index that represents a whole
2759genome, HISAT uses a large set of small FM indexes that collectively cover the
2760whole genome. These small indexes (called local indexes) combined with
2761several alignment strategies enable effective alignment of RNA-seq reads, in
2762particular, reads spanning multiple exons.")
2763 (license license:gpl3+)))
2764
c684629f
BW
2765(define-public hmmer
2766 (package
2767 (name "hmmer")
2768 (version "3.1b2")
2769 (source (origin
2770 (method url-fetch)
2771 (uri (string-append
a83e6046 2772 "http://eddylab.org/software/hmmer"
c684629f
BW
2773 (version-prefix version 1) "/"
2774 version "/hmmer-" version ".tar.gz"))
2775 (sha256
2776 (base32
2777 "0djmgc0pfli0jilfx8hql1axhwhqxqb8rxg2r5rg07aw73sfs5nx"))))
2778 (build-system gnu-build-system)
b3546174 2779 (native-inputs `(("perl" ,perl)))
a83e6046 2780 (home-page "http://hmmer.org/")
c684629f
BW
2781 (synopsis "Biosequence analysis using profile hidden Markov models")
2782 (description
2783 "HMMER is used for searching sequence databases for homologs of protein
2784sequences, and for making protein sequence alignments. It implements methods
2785using probabilistic models called profile hidden Markov models (profile
2786HMMs).")
2787 (license (list license:gpl3+
2788 ;; The bundled library 'easel' is distributed
2789 ;; under The Janelia Farm Software License.
2790 (license:non-copyleft
2791 "file://easel/LICENSE"
2792 "See easel/LICENSE in the distribution.")))))
2793
85652f59
RW
2794(define-public htseq
2795 (package
2796 (name "htseq")
2797 (version "0.6.1")
2798 (source (origin
2799 (method url-fetch)
2800 (uri (string-append
2801 "https://pypi.python.org/packages/source/H/HTSeq/HTSeq-"
2802 version ".tar.gz"))
2803 (sha256
2804 (base32
2805 "1i85ppf2j2lj12m0x690qq5nn17xxk23pbbx2c83r8ayb5wngzwv"))))
2806 (build-system python-build-system)
2807 (arguments `(#:python ,python-2)) ; only Python 2 is supported
0536727e
RW
2808 ;; Numpy needs to be propagated when htseq is used as a Python library.
2809 (propagated-inputs
2810 `(("python-numpy" ,python2-numpy)))
578b05d9
RW
2811 (inputs
2812 `(("python-pysam" ,python2-pysam)))
0536727e
RW
2813 (native-inputs
2814 `(("python-setuptools" ,python2-setuptools)))
85652f59
RW
2815 (home-page "http://www-huber.embl.de/users/anders/HTSeq/")
2816 (synopsis "Analysing high-throughput sequencing data with Python")
2817 (description
2818 "HTSeq is a Python package that provides infrastructure to process data
2819from high-throughput sequencing assays.")
2820 (license license:gpl3+)))
2821
1ad15c16 2822(define-public java-htsjdk
15a3c3d4 2823 (package
1ad15c16 2824 (name "java-htsjdk")
15a3c3d4
RW
2825 (version "1.129")
2826 (source (origin
2827 (method url-fetch)
2828 (uri (string-append
2829 "https://github.com/samtools/htsjdk/archive/"
2830 version ".tar.gz"))
2831 (file-name (string-append name "-" version ".tar.gz"))
2832 (sha256
2833 (base32
2834 "0asdk9b8jx2ij7yd6apg9qx03li8q7z3ml0qy2r2qczkra79y6fw"))
2835 (modules '((guix build utils)))
2836 ;; remove build dependency on git
2837 (snippet '(substitute* "build.xml"
2838 (("failifexecutionfails=\"true\"")
2839 "failifexecutionfails=\"false\"")))))
10b4a969 2840 (build-system ant-build-system)
15a3c3d4 2841 (arguments
10b4a969
RW
2842 `(#:tests? #f ; test require Internet access
2843 #:make-flags
2844 (list (string-append "-Ddist=" (assoc-ref %outputs "out")
2845 "/share/java/htsjdk/"))
2846 #:build-target "all"
2847 #:phases
2848 (modify-phases %standard-phases
2849 ;; The build phase also installs the jars
2850 (delete 'install))))
15a3c3d4
RW
2851 (home-page "http://samtools.github.io/htsjdk/")
2852 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
2853 (description
2854 "HTSJDK is an implementation of a unified Java library for accessing
2855common file formats, such as SAM and VCF, used for high-throughput
2856sequencing (HTS) data. There are also an number of useful utilities for
2857manipulating HTS data.")
2858 (license license:expat)))
2859
e7c09730
RW
2860(define-public htslib
2861 (package
2862 (name "htslib")
6c737963 2863 (version "1.3.1")
e7c09730
RW
2864 (source (origin
2865 (method url-fetch)
2866 (uri (string-append
2867 "https://github.com/samtools/htslib/releases/download/"
2868 version "/htslib-" version ".tar.bz2"))
2869 (sha256
2870 (base32
6c737963 2871 "1rja282fwdc25ql6izkhdyh8ppw8x2fs0w0js78zgkmqjlikmma9"))))
e7c09730
RW
2872 (build-system gnu-build-system)
2873 (arguments
2874 `(#:phases
2875 (modify-phases %standard-phases
2876 (add-after
2877 'unpack 'patch-tests
2878 (lambda _
2879 (substitute* "test/test.pl"
2880 (("/bin/bash") (which "bash")))
2881 #t)))))
2882 (inputs
2883 `(("zlib" ,zlib)))
2884 (native-inputs
2885 `(("perl" ,perl)))
2886 (home-page "http://www.htslib.org")
2887 (synopsis "C library for reading/writing high-throughput sequencing data")
2888 (description
2889 "HTSlib is a C library for reading/writing high-throughput sequencing
2890data. It also provides the bgzip, htsfile, and tabix utilities.")
2891 ;; Files under cram/ are released under the modified BSD license;
2892 ;; the rest is released under the Expat license
2893 (license (list license:expat license:bsd-3))))
2894
c4325f62
RW
2895(define-public idr
2896 (package
2897 (name "idr")
2898 (version "2.0.0")
2899 (source (origin
2900 (method url-fetch)
2901 (uri (string-append
2902 "https://github.com/nboley/idr/archive/"
2903 version ".tar.gz"))
2904 (file-name (string-append name "-" version ".tar.gz"))
2905 (sha256
2906 (base32
2907 "1k3x44biak00aiv3hpm1yd6nn4hhp7n0qnbs3zh2q9sw7qr1qj5r"))))
2908 (build-system python-build-system)
2909 (arguments
2910 `(#:phases
2911 (modify-phases %standard-phases
2912 (add-after
2913 'install 'wrap-program
2914 (lambda* (#:key inputs outputs #:allow-other-keys)
2915 (let* ((out (assoc-ref outputs "out"))
2916 (python-version (string-take (string-take-right
2917 (assoc-ref inputs "python") 5) 3))
2918 (path (string-join
2919 (map (lambda (name)
2920 (string-append (assoc-ref inputs name)
2921 "/lib/python" python-version
2922 "/site-packages"))
2923 '("python-scipy"
2924 "python-numpy"
2925 "python-matplotlib"))
2926 ":")))
2927 (wrap-program (string-append out "/bin/idr")
2928 `("PYTHONPATH" ":" prefix (,path))))
2929 #t)))))
2930 (inputs
2931 `(("python-scipy" ,python-scipy)
2932 ("python-numpy" ,python-numpy)
2933 ("python-matplotlib" ,python-matplotlib)))
2934 (native-inputs
2935 `(("python-cython" ,python-cython)
2936 ("python-setuptools" ,python-setuptools)))
2937 (home-page "https://github.com/nboley/idr")
2938 (synopsis "Tool to measure the irreproducible discovery rate (IDR)")
2939 (description
2940 "The IDR (Irreproducible Discovery Rate) framework is a unified approach
2941to measure the reproducibility of findings identified from replicate
2942experiments and provide highly stable thresholds based on reproducibility.")
2943 (license license:gpl3+)))
2944
43c565d2
RW
2945(define-public jellyfish
2946 (package
2947 (name "jellyfish")
2948 (version "2.2.4")
2949 (source (origin
2950 (method url-fetch)
2951 (uri (string-append "https://github.com/gmarcais/Jellyfish/"
2952 "releases/download/v" version
2953 "/jellyfish-" version ".tar.gz"))
2954 (sha256
2955 (base32
2956 "0a6xnynqy2ibfbfz86b9g2m2dgm7f1469pmymkpam333gi3p26nk"))))
2957 (build-system gnu-build-system)
2958 (outputs '("out" ;for library
2959 "ruby" ;for Ruby bindings
2960 "python")) ;for Python bindings
2961 (arguments
2962 `(#:configure-flags
2963 (list (string-append "--enable-ruby-binding="
2964 (assoc-ref %outputs "ruby"))
2965 (string-append "--enable-python-binding="
2966 (assoc-ref %outputs "python")))
2967 #:phases
2968 (modify-phases %standard-phases
2969 (add-before 'check 'set-SHELL-variable
2970 (lambda _
2971 ;; generator_manager.hpp either uses /bin/sh or $SHELL
2972 ;; to run tests.
2973 (setenv "SHELL" (which "bash"))
2974 #t)))))
2975 (native-inputs
2976 `(("bc" ,bc)
2977 ("time" ,time)
2978 ("ruby" ,ruby)
2979 ("python" ,python-2)))
2980 (synopsis "Tool for fast counting of k-mers in DNA")
2981 (description
2982 "Jellyfish is a tool for fast, memory-efficient counting of k-mers in
2983DNA. A k-mer is a substring of length k, and counting the occurrences of all
2984such substrings is a central step in many analyses of DNA sequence. Jellyfish
2985is a command-line program that reads FASTA and multi-FASTA files containing
2986DNA sequences. It outputs its k-mer counts in a binary format, which can be
2987translated into a human-readable text format using the @code{jellyfish dump}
2988command, or queried for specific k-mers with @code{jellyfish query}.")
2989 (home-page "http://www.genome.umd.edu/jellyfish.html")
6e8faf77
EF
2990 ;; From their website: JELLYFISH runs on 64-bit Intel-compatible processors
2991 (supported-systems '("x86_64-linux"))
43c565d2
RW
2992 ;; The combined work is published under the GPLv3 or later. Individual
2993 ;; files such as lib/jsoncpp.cpp are released under the Expat license.
2994 (license (list license:gpl3+ license:expat))))
2995
94ff3157
BW
2996(define-public khmer
2997 (package
2998 (name "khmer")
2999 (version "2.0")
3000 (source
3001 (origin
3002 (method url-fetch)
3003 (uri (pypi-uri "khmer" version))
3004 (sha256
3005 (base32
3006 "0wb05shqh77v00256qlm68vbbx3kl76fyzihszbz5nhanl4ni33a"))
3007 (patches (search-patches "khmer-use-libraries.patch"))))
3008 (build-system python-build-system)
3009 (arguments
3010 `(#:phases
3011 (modify-phases %standard-phases
3012 (add-after 'unpack 'set-paths
3013 (lambda* (#:key inputs outputs #:allow-other-keys)
3014 ;; Delete bundled libraries.
3015 (delete-file-recursively "third-party/zlib")
3016 (delete-file-recursively "third-party/bzip2")
3017 ;; Replace bundled seqan.
3018 (let* ((seqan-all "third-party/seqan")
3019 (seqan-include (string-append
3020 seqan-all "/core/include")))
3021 (delete-file-recursively seqan-all)
3022 (copy-recursively (string-append (assoc-ref inputs "seqan")
3023 "/include/seqan")
3024 (string-append seqan-include "/seqan")))
3025 ;; We do not replace the bundled MurmurHash as the canonical
3026 ;; repository for this code 'SMHasher' is unsuitable for
3027 ;; providing a library. See
3028 ;; https://lists.gnu.org/archive/html/guix-devel/2016-06/msg00977.html
3029 #t))
3030 (add-after 'unpack 'set-cc
3031 (lambda _
3032 (setenv "CC" "gcc")
3033 #t))
3034 ;; It is simpler to test after installation.
3035 (delete 'check)
3036 (add-after 'install 'post-install-check
3037 (lambda* (#:key inputs outputs #:allow-other-keys)
3038 (let ((out (assoc-ref outputs "out")))
3039 (setenv "PATH"
3040 (string-append
3041 (getenv "PATH")
3042 ":"
3043 (assoc-ref outputs "out")
3044 "/bin"))
3045 (setenv "PYTHONPATH"
3046 (string-append
3047 (getenv "PYTHONPATH")
3048 ":"
3049 out
3050 "/lib/python"
3051 (string-take (string-take-right
3052 (assoc-ref inputs "python") 5) 3)
3053 "/site-packages"))
3054 (with-directory-excursion "build"
3055 (zero? (system* "nosetests" "khmer" "--attr"
3056 "!known_failing")))))))))
3057 (native-inputs
3058 `(("seqan" ,seqan)
3059 ("python-nose" ,python-nose)))
3060 (inputs
3061 `(("zlib" ,zlib)
3062 ("bzip2" ,bzip2)
3063 ("python-screed" ,python-screed)
3064 ("python-bz2file" ,python-bz2file)))
3065 (home-page "https://khmer.readthedocs.org/")
3066 (synopsis "K-mer counting, filtering and graph traversal library")
3067 (description "The khmer software is a set of command-line tools for
3068working with DNA shotgun sequencing data from genomes, transcriptomes,
3069metagenomes and single cells. Khmer can make de novo assemblies faster, and
3070sometimes better. Khmer can also identify and fix problems with shotgun
3071data.")
8157af2e
EF
3072 ;; When building on i686, armhf and mips64el, we get the following error:
3073 ;; error: ['khmer', 'khmer.tests', 'oxli'] require 64-bit operating system
3074 (supported-systems '("x86_64-linux"))
94ff3157
BW
3075 (license license:bsd-3)))
3076
d57e6d0f
RW
3077(define-public macs
3078 (package
3079 (name "macs")
0d0bcaa0 3080 (version "2.1.0.20151222")
d57e6d0f
RW
3081 (source (origin
3082 (method url-fetch)
43ec07f1 3083 (uri (pypi-uri "MACS2" version))
d57e6d0f
RW
3084 (sha256
3085 (base32
0d0bcaa0 3086 "1r2hcz6irhcq7lwbafjks98jbn34hv05avgbdjnp6w6mlfjkf8x5"))))
d57e6d0f
RW
3087 (build-system python-build-system)
3088 (arguments
3089 `(#:python ,python-2 ; only compatible with Python 2.7
3090 #:tests? #f)) ; no test target
3091 (inputs
3092 `(("python-numpy" ,python2-numpy)))
3093 (native-inputs
3094 `(("python-setuptools" ,python2-setuptools)))
3095 (home-page "http://github.com/taoliu/MACS/")
3096 (synopsis "Model based analysis for ChIP-Seq data")
3097 (description
3098 "MACS is an implementation of a ChIP-Seq analysis algorithm for
3099identifying transcript factor binding sites named Model-based Analysis of
3100ChIP-Seq (MACS). MACS captures the influence of genome complexity to evaluate
3101the significance of enriched ChIP regions and it improves the spatial
3102resolution of binding sites through combining the information of both
3103sequencing tag position and orientation.")
3104 (license license:bsd-3)))
3105
41ddebdd
BW
3106(define-public mafft
3107 (package
3108 (name "mafft")
71461f88 3109 (version "7.305")
41ddebdd
BW
3110 (source (origin
3111 (method url-fetch)
3112 (uri (string-append
3113 "http://mafft.cbrc.jp/alignment/software/mafft-" version
3114 "-without-extensions-src.tgz"))
3115 (file-name (string-append name "-" version ".tgz"))
3116 (sha256
3117 (base32
71461f88 3118 "0ziim7g58n3z8gppsa713f5fxprl60ldj3xck186z0n9dpp06i8r"))))
41ddebdd
BW
3119 (build-system gnu-build-system)
3120 (arguments
3121 `(#:tests? #f ; no automated tests, though there are tests in the read me
3122 #:make-flags (let ((out (assoc-ref %outputs "out")))
3123 (list (string-append "PREFIX=" out)
3124 (string-append "BINDIR="
3125 (string-append out "/bin"))))
3126 #:phases
3127 (modify-phases %standard-phases
3128 (add-after 'unpack 'enter-dir
101e8f71 3129 (lambda _ (chdir "core") #t))
41ddebdd 3130 (add-after 'enter-dir 'patch-makefile
101e8f71
BW
3131 (lambda _
3132 ;; on advice from the MAFFT authors, there is no need to
3133 ;; distribute mafft-profile, mafft-distance, or
3134 ;; mafft-homologs.rb as they are too "specialised".
3135 (substitute* "Makefile"
3136 ;; remove mafft-homologs.rb from SCRIPTS
3137 (("^SCRIPTS = mafft mafft-homologs.rb")
3138 "SCRIPTS = mafft")
3139 ;; remove mafft-homologs from MANPAGES
3140 (("^MANPAGES = mafft.1 mafft-homologs.1")
3141 "MANPAGES = mafft.1")
3142 ;; remove mafft-distance from PROGS
3143 (("^PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance")
3144 "PROGS = dvtditr dndfast7 dndblast sextet5")
3145 ;; remove mafft-profile from PROGS
3146 (("splittbfast disttbfast tbfast mafft-profile 2cl mccaskillwrap")
3147 "splittbfast disttbfast tbfast f2cl mccaskillwrap")
3148 (("^rm -f mafft-profile mafft-profile.exe") "#")
3149 (("^rm -f mafft-distance mafft-distance.exe") ")#")
3150 ;; do not install MAN pages in libexec folder
3151 (("^\t\\$\\(INSTALL\\) -m 644 \\$\\(MANPAGES\\) \
41ddebdd 3152\\$\\(DESTDIR\\)\\$\\(LIBDIR\\)") "#"))
101e8f71 3153 #t))
02f35bb5
BW
3154 (add-after 'enter-dir 'patch-paths
3155 (lambda* (#:key inputs #:allow-other-keys)
3156 (substitute* '("pairash.c"
3157 "mafft.tmpl")
3158 (("perl") (which "perl"))
3159 (("([\"`| ])awk" _ prefix)
3160 (string-append prefix (which "awk")))
3161 (("grep") (which "grep")))
3162 #t))
101e8f71
BW
3163 (delete 'configure)
3164 (add-after 'install 'wrap-programs
3165 (lambda* (#:key outputs #:allow-other-keys)
3166 (let* ((out (assoc-ref outputs "out"))
3167 (bin (string-append out "/bin"))
3168 (path (string-append
3169 (assoc-ref %build-inputs "coreutils") "/bin:")))
3170 (for-each (lambda (file)
3171 (wrap-program file
3172 `("PATH" ":" prefix (,path))))
3173 (find-files bin)))
3174 #t)))))
41ddebdd 3175 (inputs
02f35bb5 3176 `(("perl" ,perl)
71461f88 3177 ("ruby" ,ruby)
02f35bb5 3178 ("gawk" ,gawk)
101e8f71
BW
3179 ("grep" ,grep)
3180 ("coreutils" ,coreutils)))
41ddebdd
BW
3181 (home-page "http://mafft.cbrc.jp/alignment/software/")
3182 (synopsis "Multiple sequence alignment program")
3183 (description
3184 "MAFFT offers a range of multiple alignment methods for nucleotide and
3185protein sequences. For instance, it offers L-INS-i (accurate; for alignment
3186of <~200 sequences) and FFT-NS-2 (fast; for alignment of <~30,000
3187sequences).")
3188 (license (license:non-copyleft
3189 "http://mafft.cbrc.jp/alignment/software/license.txt"
3190 "BSD-3 with different formatting"))))
8fd790eb 3191
84be3b99
MB
3192(define-public mash
3193 (package
3194 (name "mash")
3195 (version "1.1.1")
3196 (source (origin
3197 (method url-fetch)
3198 (uri (string-append
3199 "https://github.com/marbl/mash/archive/v"
3200 version ".tar.gz"))
3201 (file-name (string-append name "-" version ".tar.gz"))
3202 (sha256
3203 (base32
3204 "08znbvqq5xknfhmpp3wcj574zvi4p7i8zifi67c9qw9a6ikp42fj"))
3205 (modules '((guix build utils)))
3206 (snippet
3207 ;; Delete bundled kseq.
3208 ;; TODO: Also delete bundled murmurhash and open bloom filter.
3209 '(delete-file "src/mash/kseq.h"))))
3210 (build-system gnu-build-system)
3211 (arguments
3212 `(#:tests? #f ; No tests.
3213 #:configure-flags
3214 (list
3215 (string-append "--with-capnp=" (assoc-ref %build-inputs "capnproto"))
3216 (string-append "--with-gsl=" (assoc-ref %build-inputs "gsl")))
3217 #:make-flags (list "CC=gcc")
3218 #:phases
3219 (modify-phases %standard-phases
3220 (add-after 'unpack 'fix-includes
3221 (lambda _
3222 (substitute* '("src/mash/Sketch.cpp" "src/mash/CommandFind.cpp")
3223 (("^#include \"kseq\\.h\"")
3224 "#include \"htslib/kseq.h\""))
3225 #t))
3226 (add-before 'configure 'autoconf
3227 (lambda _ (zero? (system* "autoconf")))))))
3228 (native-inputs
3229 `(("autoconf" ,autoconf)
3230 ;; Capnproto and htslib are statically embedded in the final
3231 ;; application. Therefore we also list their licenses, below.
3232 ("capnproto" ,capnproto)
3233 ("htslib" ,htslib)))
3234 (inputs
3235 `(("gsl" ,gsl)
3236 ("zlib" ,zlib)))
3237 (supported-systems '("x86_64-linux"))
3238 (home-page "https://mash.readthedocs.io")
3239 (synopsis "Fast genome and metagenome distance estimation using MinHash")
3240 (description "Mash is a fast sequence distance estimator that uses the
3241MinHash algorithm and is designed to work with genomes and metagenomes in the
3242form of assemblies or reads.")
3243 (license (list license:bsd-3 ; Mash
3244 license:expat ; HTSlib and capnproto
3245 license:public-domain ; MurmurHash 3
3246 license:cpl1.0)))) ; Open Bloom Filter
3247
8fd790eb 3248(define-public metabat
2c3eb4b8
BW
3249 ;; We package from a git commit because compilation of the released version
3250 ;; fails.
3251 (let ((commit "cbdca756993e66ae57e50a27970595dda9cbde1b"))
3252 (package
3253 (name "metabat")
3254 (version (string-append "0.32.4-1." (string-take commit 8)))
3255 (source
3256 (origin
3257 (method git-fetch)
3258 (uri (git-reference
3259 (url "https://bitbucket.org/berkeleylab/metabat.git")
3260 (commit commit)))
3261 (file-name (string-append name "-" version))
3262 (sha256
3263 (base32
3264 "0byia8nsip6zvc4ha0qkxkxxyjf4x7jcvy48q2dvb0pzr989syzr"))
3265 (patches (search-patches "metabat-remove-compilation-date.patch"))))
8fd790eb
BW
3266 (build-system gnu-build-system)
3267 (arguments
3268 `(#:phases
3269 (modify-phases %standard-phases
3270 (add-after 'unpack 'fix-includes
45469ebe
BW
3271 (lambda _
3272 (substitute* "src/BamUtils.h"
3273 (("^#include \"bam/bam\\.h\"")
3274 "#include \"samtools/bam.h\"")
3275 (("^#include \"bam/sam\\.h\"")
3276 "#include \"samtools/sam.h\""))
3277 (substitute* "src/KseqReader.h"
3278 (("^#include \"bam/kseq\\.h\"")
3279 "#include \"htslib/kseq.h\""))
3280 #t))
8fd790eb 3281 (add-after 'unpack 'fix-scons
45469ebe
BW
3282 (lambda* (#:key inputs #:allow-other-keys)
3283 (substitute* "SConstruct"
3284 (("^htslib_dir = 'samtools'")
3285 (string-append "hitslib_dir = '"
3286 (assoc-ref inputs "htslib")
3287 "'"))
3288 (("^samtools_dir = 'samtools'")
3289 (string-append "samtools_dir = '"
3290 (assoc-ref inputs "htslib")
3291 "'"))
3292 (("^findStaticOrShared\\('bam', hts_lib")
3293 (string-append "findStaticOrShared('bam', '"
3294 (assoc-ref inputs "samtools")
3295 "/lib'"))
3296 ;; Do not distribute README.
3297 (("^env\\.Install\\(idir_prefix, 'README\\.md'\\)") ""))
3298 #t))
8fd790eb
BW
3299 (delete 'configure)
3300 (replace 'build
3301 (lambda* (#:key inputs outputs #:allow-other-keys)
3302 (mkdir (assoc-ref outputs "out"))
3303 (zero? (system* "scons"
3304 (string-append
3305 "PREFIX="
3306 (assoc-ref outputs "out"))
8fd790eb
BW
3307 (string-append
3308 "BOOST_ROOT="
3309 (assoc-ref inputs "boost"))
3310 "install"))))
45469ebe 3311 ;; Check and install are carried out during build phase.
8fd790eb
BW
3312 (delete 'check)
3313 (delete 'install))))
3314 (inputs
3315 `(("zlib" ,zlib)
3316 ("perl" ,perl)
3317 ("samtools" ,samtools)
3318 ("htslib" ,htslib)
3319 ("boost" ,boost)))
3320 (native-inputs
3321 `(("scons" ,scons)))
3322 (home-page "https://bitbucket.org/berkeleylab/metabat")
3323 (synopsis
3324 "Reconstruction of single genomes from complex microbial communities")
3325 (description
3326 "Grouping large genomic fragments assembled from shotgun metagenomic
3327sequences to deconvolute complex microbial communities, or metagenome binning,
3328enables the study of individual organisms and their interactions. MetaBAT is
3329an automated metagenome binning software, which integrates empirical
3330probabilistic distances of genome abundance and tetranucleotide frequency.")
3331 (license (license:non-copyleft "file://license.txt"
2c3eb4b8 3332 "See license.txt in the distribution.")))))
8fd790eb 3333
318c0aee
MB
3334(define-public minced
3335 (package
3336 (name "minced")
3337 (version "0.2.0")
3338 (source (origin
3339 (method url-fetch)
3340 (uri (string-append
3341 "https://github.com/ctSkennerton/minced/archive/"
3342 version ".tar.gz"))
3343 (file-name (string-append name "-" version ".tar.gz"))
3344 (sha256
3345 (base32
3346 "0wxmlsapxfpxfd3ps9636h7i2xy6la8i42mwh0j2lsky63h63jp1"))))
3347 (build-system gnu-build-system)
3348 (arguments
3349 `(#:test-target "test"
3350 #:phases
3351 (modify-phases %standard-phases
3352 (delete 'configure)
3353 (add-before 'check 'fix-test
3354 (lambda _
3355 ;; Fix test for latest version.
3356 (substitute* "t/Aquifex_aeolicus_VF5.expected"
3357 (("minced:0.1.6") "minced:0.2.0"))
3358 #t))
3359 (replace 'install ; No install target.
3360 (lambda* (#:key inputs outputs #:allow-other-keys)
3361 (let* ((out (assoc-ref outputs "out"))
3362 (bin (string-append out "/bin"))
3363 (wrapper (string-append bin "/minced")))
3364 ;; Minced comes with a wrapper script that tries to figure out where
3365 ;; it is located before running the JAR. Since these paths are known
3366 ;; to us, we build our own wrapper to avoid coreutils dependency.
3367 (install-file "minced.jar" bin)
3368 (with-output-to-file wrapper
3369 (lambda _
3370 (display
3371 (string-append
3372 "#!" (assoc-ref inputs "bash") "/bin/sh\n\n"
3373 (assoc-ref inputs "jre") "/bin/java -jar "
3374 bin "/minced.jar \"$@\"\n"))))
3375 (chmod wrapper #o555)))))))
3376 (native-inputs
3377 `(("jdk" ,icedtea "jdk")))
3378 (inputs
3379 `(("bash" ,bash)
3380 ("jre" ,icedtea "out")))
3381 (home-page "https://github.com/ctSkennerton/minced")
3382 (synopsis "Mining CRISPRs in Environmental Datasets")
3383 (description
3384 "MinCED is a program to find Clustered Regularly Interspaced Short
3385Palindromic Repeats (CRISPRs) in DNA sequences. It can be used for
3386unassembled metagenomic reads, but is mainly designed for full genomes and
3387assembled metagenomic sequence.")
3388 (license license:gpl3+)))
3389
ddd82e0e
RW
3390(define-public miso
3391 (package
3392 (name "miso")
3393 (version "0.5.3")
3394 (source (origin
3395 (method url-fetch)
3396 (uri (string-append
86517de6 3397 "https://pypi.python.org/packages/source/m/misopy/misopy-"
ddd82e0e
RW
3398 version ".tar.gz"))
3399 (sha256
3400 (base32
3401 "0x446867az8ir0z8c1vjqffkp0ma37wm4sylixnkhgawllzx8v5w"))
3402 (modules '((guix build utils)))
ddd82e0e
RW
3403 (snippet
3404 '(substitute* "setup.py"
0ec8b206
RW
3405 ;; Use setuptools, or else the executables are not
3406 ;; installed.
3407 (("distutils.core") "setuptools")
3408 ;; use "gcc" instead of "cc" for compilation
ddd82e0e
RW
3409 (("^defines")
3410 "cc.set_executables(
3411compiler='gcc',
3412compiler_so='gcc',
3413linker_exe='gcc',
3414linker_so='gcc -shared'); defines")))))
3415 (build-system python-build-system)
3416 (arguments
3417 `(#:python ,python-2 ; only Python 2 is supported
3418 #:tests? #f)) ; no "test" target
3419 (inputs
3420 `(("samtools" ,samtools)
3421 ("python-numpy" ,python2-numpy)
3422 ("python-pysam" ,python2-pysam)
3423 ("python-scipy" ,python2-scipy)
3424 ("python-matplotlib" ,python2-matplotlib)))
3425 (native-inputs
0ec8b206
RW
3426 `(("python-mock" ,python2-mock) ;for tests
3427 ("python-pytz" ,python2-pytz) ;for tests
3428 ("python-setuptools" ,python2-setuptools)))
ddd82e0e
RW
3429 (home-page "http://genes.mit.edu/burgelab/miso/index.html")
3430 (synopsis "Mixture of Isoforms model for RNA-Seq isoform quantitation")
3431 (description
3432 "MISO (Mixture-of-Isoforms) is a probabilistic framework that quantitates
3433the expression level of alternatively spliced genes from RNA-Seq data, and
3434identifies differentially regulated isoforms or exons across samples. By
3435modeling the generative process by which reads are produced from isoforms in
3436RNA-Seq, the MISO model uses Bayesian inference to compute the probability
3437that a read originated from a particular isoform.")
3438 (license license:gpl2)))
3439
324efb88
BW
3440(define-public muscle
3441 (package
3442 (name "muscle")
3443 (version "3.8.1551")
3444 (source (origin
3445 (method url-fetch/tarbomb)
3446 (file-name (string-append name "-" version))
3447 (uri (string-append
3448 "http://www.drive5.com/muscle/muscle_src_"
3449 version ".tar.gz"))
3450 (sha256
3451 (base32
3452 "0bj8kj7sdizy3987zx6w7axihk40fk8rn76mpbqqjcnd64i5a367"))))
3453 (build-system gnu-build-system)
3454 (arguments
3455 `(#:make-flags (list "LDLIBS = -lm")
3456 #:phases
3457 (modify-phases %standard-phases
3458 (delete 'configure)
3459 (replace 'check
3460 ;; There are no tests, so just test if it runs.
3461 (lambda _ (zero? (system* "./muscle" "-version"))))
3462 (replace 'install
3463 (lambda* (#:key outputs #:allow-other-keys)
3464 (let* ((out (assoc-ref outputs "out"))
3465 (bin (string-append out "/bin")))
3466 (install-file "muscle" bin)))))))
3467 (home-page "http://www.drive5.com/muscle")
3468 (synopsis "Multiple sequence alignment program")
3469 (description
3470 "MUSCLE aims to be a fast and accurate multiple sequence alignment
3471program for nucleotide and protein sequences.")
3472 ;; License information found in 'muscle -h' and usage.cpp.
3473 (license license:public-domain)))
3474
1e44cf8b
BW
3475(define-public orfm
3476 (package
3477 (name "orfm")
6b6f7d6a 3478 (version "0.5.3")
1e44cf8b
BW
3479 (source (origin
3480 (method url-fetch)
3481 (uri (string-append
3482 "https://github.com/wwood/OrfM/releases/download/v"
3483 version "/orfm-" version ".tar.gz"))
3484 (sha256
3485 (base32
6b6f7d6a 3486 "0vb6d771gl4mix8bwx919x5ayy9pkj44n7ki336nz3rz2rx4c7gk"))))
1e44cf8b
BW
3487 (build-system gnu-build-system)
3488 (inputs `(("zlib" ,zlib)))
6b6f7d6a
BW
3489 (native-inputs
3490 `(("ruby-bio-commandeer" ,ruby-bio-commandeer)
3491 ("ruby-rspec" ,ruby-rspec)
3492 ("ruby" ,ruby)))
1e44cf8b
BW
3493 (synopsis "Simple and not slow open reading frame (ORF) caller")
3494 (description
6b6f7d6a 3495 "An ORF caller finds stretches of DNA that, when translated, are not
1e44cf8b
BW
3496interrupted by stop codons. OrfM finds and prints these ORFs.")
3497 (home-page "https://github.com/wwood/OrfM")
3498 (license license:lgpl3+)))
3499
19ee9201
RW
3500(define-public python2-pbcore
3501 (package
3502 (name "python2-pbcore")
e301bfc8 3503 (version "1.2.10")
19ee9201
RW
3504 (source (origin
3505 (method url-fetch)
ddb83129 3506 (uri (pypi-uri "pbcore" version))
19ee9201
RW
3507 (sha256
3508 (base32
e301bfc8 3509 "1kjmv891d6qbpp4shhhvkl02ff4q5xlpnls2513sm2cjcrs52f1i"))))
19ee9201
RW
3510 (build-system python-build-system)
3511 (arguments `(#:python ,python-2)) ; pbcore requires Python 2.7
3512 (inputs
3513 `(("python-cython" ,python2-cython)
3514 ("python-numpy" ,python2-numpy)
3515 ("python-pysam" ,python2-pysam)
3516 ("python-h5py" ,python2-h5py)))
3517 (native-inputs
ddb83129
EF
3518 `(("python-docutils" ,python2-docutils)
3519 ("python-nose" ,python2-nose)
3520 ("python-setuptools" ,python2-setuptools)
3521 ("python-sphinx" ,python2-sphinx)))
e301bfc8
MB
3522 (propagated-inputs
3523 `(("python-pyxb" ,python2-pyxb)))
19ee9201
RW
3524 (home-page "http://pacificbiosciences.github.io/pbcore/")
3525 (synopsis "Library for reading and writing PacBio data files")
3526 (description
3527 "The pbcore package provides Python APIs for interacting with PacBio data
3528files and writing bioinformatics applications.")
3529 (license license:bsd-3)))
3530
c61fe02c
RW
3531(define-public python2-warpedlmm
3532 (package
3533 (name "python2-warpedlmm")
3534 (version "0.21")
3535 (source
3536 (origin
3537 (method url-fetch)
3538 (uri (string-append
3539 "https://pypi.python.org/packages/source/W/WarpedLMM/WarpedLMM-"
3540 version ".zip"))
3541 (sha256
3542 (base32
3543 "1agfz6zqa8nc6cw47yh0s3y14gkpa9wqazwcj7mwwj3ffnw39p3j"))))
3544 (build-system python-build-system)
3545 (arguments
3546 `(#:python ,python-2 ; requires Python 2.7
3547 #:phases
3548 (modify-phases %standard-phases
3549 (add-after
3550 'install 'remove-bin-directory
3551 (lambda* (#:key outputs #:allow-other-keys)
3552 ;; The "bin" directory only contains wrappers for running
3553 ;; the module tests. They are not needed after the
3554 ;; "check" phase.
3555 (delete-file-recursively
3556 (string-append (assoc-ref outputs "out") "/bin"))
3557 #t)))))
3558 (propagated-inputs
3559 `(("python-scipy" ,python2-scipy)
3560 ("python-numpy" ,python2-numpy)
3561 ("python-matplotlib" ,python2-matplotlib)
3562 ("python-fastlmm" ,python2-fastlmm)
3563 ("python-pandas" ,python2-pandas)
3564 ("python-pysnptools" ,python2-pysnptools)))
3565 (native-inputs
3566 `(("python-setuptools" ,python2-setuptools)
3567 ("python-mock" ,python2-mock)
3568 ("python-nose" ,python2-nose)
3569 ("unzip" ,unzip)))
3570 (home-page "https://github.com/PMBio/warpedLMM")
3571 (synopsis "Implementation of warped linear mixed models")
3572 (description
3573 "WarpedLMM is a Python implementation of the warped linear mixed model,
3574which automatically learns an optimal warping function (or transformation) for
3575the phenotype as it models the data.")
3576 (license license:asl2.0)))
3577
2c16316e 3578(define-public pbtranscript-tofu
698bd297 3579 (let ((commit "8f5467fe6a4472bcfb4226c8720993c8507adfe4"))
2c16316e
RW
3580 (package
3581 (name "pbtranscript-tofu")
698bd297 3582 (version (string-append "2.2.3." (string-take commit 7)))
2c16316e
RW
3583 (source (origin
3584 (method git-fetch)
3585 (uri (git-reference
3586 (url "https://github.com/PacificBiosciences/cDNA_primer.git")
3587 (commit commit)))
9a067efd 3588 (file-name (string-append name "-" version "-checkout"))
2c16316e
RW
3589 (sha256
3590 (base32
9a067efd
RW
3591 "1lgnpi35ihay42qx0b6yl3kkgra723i413j33kvs0kvs61h82w0f"))
3592 (modules '((guix build utils)))
3593 (snippet
3594 '(begin
3595 ;; remove bundled Cython sources
3596 (delete-file "pbtranscript-tofu/pbtranscript/Cython-0.20.1.tar.gz")
3597 #t))))
2c16316e
RW
3598 (build-system python-build-system)
3599 (arguments
3600 `(#:python ,python-2
3601 ;; With standard flags, the install phase attempts to create a zip'd
3602 ;; egg file, and fails with an error: 'ZIP does not support timestamps
3603 ;; before 1980'
3604 #:configure-flags '("--single-version-externally-managed"
3605 "--record=pbtranscript-tofu.txt")
3606 #:phases
9a067efd
RW
3607 (modify-phases %standard-phases
3608 (add-after 'unpack 'enter-directory
3609 (lambda _
3610 (chdir "pbtranscript-tofu/pbtranscript/")
3611 #t))
3612 ;; With setuptools version 18.0 and later this setup.py hack causes
3613 ;; a build error, so we disable it.
3614 (add-after 'enter-directory 'patch-setuppy
3615 (lambda _
3616 (substitute* "setup.py"
3617 (("if 'setuptools.extension' in sys.modules:")
3618 "if False:"))
3619 #t)))))
2c16316e 3620 (inputs
9a067efd 3621 `(("python-numpy" ,python2-numpy)
2c16316e 3622 ("python-bx-python" ,python2-bx-python)
c5372108
RW
3623 ("python-networkx" ,python2-networkx)
3624 ("python-scipy" ,python2-scipy)
9a067efd
RW
3625 ("python-pbcore" ,python2-pbcore)
3626 ("python-h5py" ,python2-h5py)))
2c16316e 3627 (native-inputs
9a067efd
RW
3628 `(("python-cython" ,python2-cython)
3629 ("python-nose" ,python2-nose)
2c16316e
RW
3630 ("python-setuptools" ,python2-setuptools)))
3631 (home-page "https://github.com/PacificBiosciences/cDNA_primer")
3632 (synopsis "Analyze transcriptome data generated with the Iso-Seq protocol")
3633 (description
3634 "pbtranscript-tofu contains scripts to analyze transcriptome data
3635generated using the PacBio Iso-Seq protocol.")
3636 (license license:bsd-3))))
3637
024130d2
BW
3638(define-public prank
3639 (package
3640 (name "prank")
3641 (version "150803")
3642 (source (origin
3643 (method url-fetch)
3644 (uri (string-append
3645 "http://wasabiapp.org/download/prank/prank.source."
3646 version ".tgz"))
3647 (sha256
3648 (base32
3649 "0am4z94fs3w2n5xpfls9zda61vq7qqz4q2i7b9hlsxz5q4j3kfm4"))))
3650 (build-system gnu-build-system)
3651 (arguments
3652 `(#:phases
3653 (modify-phases %standard-phases
3654 (add-after 'unpack 'enter-src-dir
3655 (lambda _
3656 (chdir "src")
3657 #t))
62d00095
EF
3658 (add-after 'unpack 'remove-m64-flag
3659 ;; Prank will build with the correct 'bit-ness' without this flag
3660 ;; and this allows building on 32-bit machines.
3661 (lambda _ (substitute* "src/Makefile"
3662 (("-m64") ""))
3663 #t))
024130d2
BW
3664 (delete 'configure)
3665 (replace 'install
3666 (lambda* (#:key outputs #:allow-other-keys)
3667 (let* ((out (assoc-ref outputs "out"))
3668 (bin (string-append out "/bin"))
3669 (man (string-append out "/share/man/man1"))
3670 (path (string-append
3671 (assoc-ref %build-inputs "mafft") "/bin:"
3672 (assoc-ref %build-inputs "exonerate") "/bin:"
3673 (assoc-ref %build-inputs "bppsuite") "/bin")))
3674 (install-file "prank" bin)
3675 (wrap-program (string-append bin "/prank")
3676 `("PATH" ":" prefix (,path)))
3677 (install-file "prank.1" man))
3678 #t)))))
3679 (inputs
3680 `(("mafft" ,mafft)
3681 ("exonerate" ,exonerate)
3682 ("bppsuite" ,bppsuite)))
3683 (home-page "http://wasabiapp.org/software/prank/")
3684 (synopsis "Probabilistic multiple sequence alignment program")
3685 (description
3686 "PRANK is a probabilistic multiple sequence alignment program for DNA,
3687codon and amino-acid sequences. It is based on a novel algorithm that treats
3688insertions correctly and avoids over-estimation of the number of deletion
3689events. In addition, PRANK borrows ideas from maximum likelihood methods used
3690in phylogenetics and correctly takes into account the evolutionary distances
3691between sequences. Lastly, PRANK allows for defining a potential structure
3692for sequences to be aligned and then, simultaneously with the alignment,
3693predicts the locations of structural units in the sequences.")
3694 (license license:gpl2+)))
3695
31a9d653
BW
3696(define-public proteinortho
3697 (package
3698 (name "proteinortho")
3699 (version "5.15")
3700 (source
3701 (origin
3702 (method url-fetch)
3703 (uri
3704 (string-append
3705 "http://www.bioinf.uni-leipzig.de/Software/proteinortho/proteinortho_v"
3706 version "_src.tar.gz"))
3707 (sha256
3708 (base32
3709 "05wacnnbx56avpcwhzlcf6b7s77swcpv3qnwz5sh1z54i51gg2ki"))))
3710 (build-system gnu-build-system)
3711 (arguments
3712 `(#:test-target "test"
3713 #:phases
3714 (modify-phases %standard-phases
3715 (replace 'configure
3716 ;; There is no configure script, so we modify the Makefile directly.
3717 (lambda* (#:key outputs #:allow-other-keys)
3718 (substitute* "Makefile"
3719 (("INSTALLDIR=.*")
3720 (string-append
3721 "INSTALLDIR=" (assoc-ref outputs "out") "/bin\n")))
3722 #t))
3723 (add-before 'install 'make-install-directory
3724 ;; The install directory is not created during 'make install'.
3725 (lambda* (#:key outputs #:allow-other-keys)
3726 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
3727 #t))
3728 (add-after 'install 'wrap-programs
3729 (lambda* (#:key inputs outputs #:allow-other-keys)
3730 (let* ((path (getenv "PATH"))
3731 (out (assoc-ref outputs "out"))
3732 (binary (string-append out "/bin/proteinortho5.pl")))
3733 (wrap-program binary `("PATH" ":" prefix (,path))))
3734 #t)))))
3735 (inputs
3736 `(("perl" ,perl)
3737 ("python" ,python-2)
3738 ("blast+" ,blast+)))
3739 (home-page "http://www.bioinf.uni-leipzig.de/Software/proteinortho")
3740 (synopsis "Detect orthologous genes across species")
3741 (description
3742 "Proteinortho is a tool to detect orthologous genes across different
3743species. For doing so, it compares similarities of given gene sequences and
3744clusters them to find significant groups. The algorithm was designed to handle
3745large-scale data and can be applied to hundreds of species at once.")
3746 (license license:gpl2+)))
3747
846e3409
RW
3748(define-public pyicoteo
3749 (package
3750 (name "pyicoteo")
3751 (version "2.0.7")
3752 (source
3753 (origin
3754 (method url-fetch)
3755 (uri (string-append "https://bitbucket.org/regulatorygenomicsupf/"
3756 "pyicoteo/get/v" version ".tar.bz2"))
3757 (file-name (string-append name "-" version ".tar.bz2"))
3758 (sha256
3759 (base32
3760 "0d6087f29xp8wxwlj111c3sylli98n0l8ry58c51ixzq0zfm50wa"))))
3761 (build-system python-build-system)
3762 (arguments
3763 `(#:python ,python-2 ; does not work with Python 3
3764 #:tests? #f)) ; there are no tests
3765 (inputs
3766 `(("python2-matplotlib" ,python2-matplotlib)))
3767 (home-page "https://bitbucket.org/regulatorygenomicsupf/pyicoteo")
3768 (synopsis "Analyze high-throughput genetic sequencing data")
3769 (description
3770 "Pyicoteo is a suite of tools for the analysis of high-throughput genetic
3771sequencing data. It works with genomic coordinates. There are currently six
3772different command-line tools:
3773
3774@enumerate
3775@item pyicoregion: for generating exploratory regions automatically;
3776@item pyicoenrich: for differential enrichment between two conditions;
3777@item pyicoclip: for calling CLIP-Seq peaks without a control;
3778@item pyicos: for genomic coordinates manipulation;
3779@item pyicoller: for peak calling on punctuated ChIP-Seq;
3780@item pyicount: to count how many reads from N experiment files overlap in a
3781 region file;
3782@item pyicotrocol: to combine operations from pyicoteo.
3783@end enumerate\n")
3784 (license license:gpl3+)))
3785
af860475
BW
3786(define-public prodigal
3787 (package
3788 (name "prodigal")
e70f7a23 3789 (version "2.6.3")
af860475
BW
3790 (source (origin
3791 (method url-fetch)
3792 (uri (string-append
3793 "https://github.com/hyattpd/Prodigal/archive/v"
3794 version ".tar.gz"))
3795 (file-name (string-append name "-" version ".tar.gz"))
3796 (sha256
3797 (base32
e70f7a23 3798 "17srxkqd3jc77xk15pfbgg1a9xahqg7337w95mrsia7mpza4l2c9"))))
af860475
BW
3799 (build-system gnu-build-system)
3800 (arguments
3801 `(#:tests? #f ;no check target
3802 #:make-flags (list (string-append "INSTALLDIR="
3803 (assoc-ref %outputs "out")
3804 "/bin"))
3805 #:phases
3806 (modify-phases %standard-phases
3807 (delete 'configure))))
3808 (home-page "http://prodigal.ornl.gov")
3809 (synopsis "Protein-coding gene prediction for Archaea and Bacteria")
3810 (description
3811 "Prodigal runs smoothly on finished genomes, draft genomes, and
3812metagenomes, providing gene predictions in GFF3, Genbank, or Sequin table
3813format. It runs quickly, in an unsupervised fashion, handles gaps, handles
3814partial genes, and identifies translation initiation sites.")
3815 (license license:gpl3+)))
608dd932 3816
ceb62d54
BW
3817(define-public roary
3818 (package
3819 (name "roary")
3820 (version "3.6.8")
3821 (source
3822 (origin
3823 (method url-fetch)
3824 (uri (string-append
3825 "mirror://cpan/authors/id/A/AJ/AJPAGE/Bio-Roary-"
3826 version ".tar.gz"))
3827 (sha256
3828 (base32
3829 "0g0pzcv8y7n2w8q7c9q0a7s2ghkwci6w8smg9mjw4agad5cd7yaw"))))
3830 (build-system perl-build-system)
3831 (arguments
3832 `(#:phases
3833 (modify-phases %standard-phases
3834 (delete 'configure)
3835 (delete 'build)
3836 (replace 'check
3837 (lambda _
3838 ;; The tests are not run by default, so we run each test file
3839 ;; directly.
3840 (setenv "PATH" (string-append (getcwd) "/bin" ":"
3841 (getenv "PATH")))
3842 (setenv "PERL5LIB" (string-append (getcwd) "/lib" ":"
3843 (getenv "PERL5LIB")))
3844 (zero? (length (filter (lambda (file)
3845 (display file)(display "\n")
3846 (not (zero? (system* "perl" file))))
3847 (find-files "t" ".*\\.t$"))))))
3848 (replace 'install
3849 ;; There is no 'install' target in the Makefile.
3850 (lambda* (#:key outputs #:allow-other-keys)
3851 (let* ((out (assoc-ref outputs "out"))
3852 (bin (string-append out "/bin"))
3853 (perl (string-append out "/lib/perl5/site_perl"))
3854 (roary-plots "contrib/roary_plots"))
3855 (mkdir-p bin)
3856 (mkdir-p perl)
3857 (copy-recursively "bin" bin)
3858 (copy-recursively "lib" perl)
3859 #t)))
3860 (add-after 'install 'wrap-programs
3861 (lambda* (#:key inputs outputs #:allow-other-keys)
3862 (let* ((out (assoc-ref outputs "out"))
3863 (perl5lib (getenv "PERL5LIB"))
3864 (path (getenv "PATH")))
3865 (for-each (lambda (prog)
3866 (let ((binary (string-append out "/" prog)))
3867 (wrap-program binary
3868 `("PERL5LIB" ":" prefix
3869 (,(string-append perl5lib ":" out
3870 "/lib/perl5/site_perl"))))
3871 (wrap-program binary
3872 `("PATH" ":" prefix
3873 (,(string-append path ":" out "/bin"))))))
3874 (find-files "bin" ".*[^R]$"))
3875 (let ((file
3876 (string-append out "/bin/roary-create_pan_genome_plots.R"))
3877 (r-site-lib (getenv "R_LIBS_SITE"))
3878 (coreutils-path
3879 (string-append (assoc-ref inputs "coreutils") "/bin")))
3880 (wrap-program file
3881 `("R_LIBS_SITE" ":" prefix
3882 (,(string-append r-site-lib ":" out "/site-library/"))))
3883 (wrap-program file
3884 `("PATH" ":" prefix
3885 (,(string-append coreutils-path ":" out "/bin"))))))
3886 #t)))))
3887 (native-inputs
3888 `(("perl-env-path" ,perl-env-path)
3889 ("perl-test-files" ,perl-test-files)
3890 ("perl-test-most" ,perl-test-most)
3891 ("perl-test-output" ,perl-test-output)))
3892 (inputs
3893 `(("perl-array-utils" ,perl-array-utils)
3894 ("bioperl" ,bioperl-minimal)
3895 ("perl-exception-class" ,perl-exception-class)
3896 ("perl-file-find-rule" ,perl-file-find-rule)
3897 ("perl-file-grep" ,perl-file-grep)
3898 ("perl-file-slurper" ,perl-file-slurper)
3899 ("perl-file-which" ,perl-file-which)
3900 ("perl-graph" ,perl-graph)
3901 ("perl-graph-readwrite" ,perl-graph-readwrite)
3902 ("perl-log-log4perl" ,perl-log-log4perl)
3903 ("perl-moose" ,perl-moose)
3904 ("perl-perlio-utf8_strict" ,perl-perlio-utf8_strict)
3905 ("perl-text-csv" ,perl-text-csv)
3906 ("bedtools" ,bedtools)
3907 ("cd-hit" ,cd-hit)
3908 ("blast+" ,blast+)
3909 ("mcl" ,mcl)
3910 ("parallel" ,parallel)
3911 ("prank" ,prank)
3912 ("mafft" ,mafft)
3913 ("fasttree" ,fasttree)
3914 ("grep" ,grep)
3915 ("sed" ,sed)
3916 ("gawk" ,gawk)
3917 ("r" ,r)
3918 ("r-ggplot2" ,r-ggplot2)
3919 ("coreutils" ,coreutils)))
3920 (home-page "http://sanger-pathogens.github.io/Roary")
3921 (synopsis "High speed stand-alone pan genome pipeline")
3922 (description
3923 "Roary is a high speed stand alone pan genome pipeline, which takes
3924annotated assemblies in GFF3 format (produced by the Prokka program) and
3925calculates the pan genome. Using a standard desktop PC, it can analyse
3926datasets with thousands of samples, without compromising the quality of the
3927results. 128 samples can be analysed in under 1 hour using 1 GB of RAM and a
3928single processor. Roary is not intended for metagenomics or for comparing
3929extremely diverse sets of genomes.")
3930 (license license:gpl3)))
3931
608dd932
BW
3932(define-public raxml
3933 (package
3934 (name "raxml")
3935 (version "8.2.9")
3936 (source
3937 (origin
3938 (method url-fetch)
3939 (uri
3940 (string-append
3941 "https://github.com/stamatak/standard-RAxML/archive/v"
3942 version ".tar.gz"))
3943 (file-name (string-append name "-" version ".tar.gz"))
3944 (sha256
3945 (base32
3946 "1pv8p2fy67y21a9y4cm7xpvxqjwz2v4201flfjshdq1p8j52rqf7"))))
3947 (build-system gnu-build-system)
3948 (arguments
3949 `(#:tests? #f ; There are no tests.
3950 ;; Use 'standard' Makefile rather than SSE or AVX ones.
3951 #:make-flags (list "-f" "Makefile.HYBRID.gcc")
3952 #:phases
3953 (modify-phases %standard-phases
3954 (delete 'configure)
3955 (replace 'install
3956 (lambda* (#:key outputs #:allow-other-keys)
3957 (let* ((out (assoc-ref outputs "out"))
3958 (bin (string-append out "/bin"))
3959 (executable "raxmlHPC-HYBRID"))
3960 (install-file executable bin)
3961 (symlink (string-append bin "/" executable) "raxml"))
3962 #t)))))
3963 (inputs
3964 `(("openmpi" ,openmpi)))
3965 (home-page "http://sco.h-its.org/exelixis/web/software/raxml/index.html")
3966 (synopsis "Randomized Axelerated Maximum Likelihood phylogenetic trees")
3967 (description
3968 "RAxML is a tool for phylogenetic analysis and post-analysis of large
3969phylogenies.")
3970 (license license:gpl2+)))
af860475 3971
66e3eff1
RW
3972(define-public rsem
3973 (package
3974 (name "rsem")
3975 (version "1.2.20")
3976 (source
3977 (origin
3978 (method url-fetch)
3979 (uri
3980 (string-append "http://deweylab.biostat.wisc.edu/rsem/src/rsem-"
3981 version ".tar.gz"))
3982 (sha256
3983 (base32 "0nzdc0j0hjllhsd5f2xli95dafm3nawskigs140xzvjk67xh0r9q"))
fc1adab1 3984 (patches (search-patches "rsem-makefile.patch"))
66e3eff1
RW
3985 (modules '((guix build utils)))
3986 (snippet
3987 '(begin
3988 ;; remove bundled copy of boost
3989 (delete-file-recursively "boost")
3990 #t))))
3991 (build-system gnu-build-system)
3992 (arguments
3993 `(#:tests? #f ;no "check" target
3994 #:phases
3995 (modify-phases %standard-phases
3996 ;; No "configure" script.
3997 ;; Do not build bundled samtools library.
3998 (replace 'configure
3999 (lambda _
4000 (substitute* "Makefile"
4001 (("^all : sam/libbam.a") "all : "))
4002 #t))
4003 (replace 'install
4004 (lambda* (#:key outputs #:allow-other-keys)
4005 (let* ((out (string-append (assoc-ref outputs "out")))
4006 (bin (string-append out "/bin/"))
4007 (perl (string-append out "/lib/perl5/site_perl")))
4008 (mkdir-p bin)
4009 (mkdir-p perl)
4010 (for-each (lambda (file)
4011 (copy-file file
4012 (string-append bin (basename file))))
4013 (find-files "." "rsem-.*"))
4014 (copy-file "rsem_perl_utils.pm"
4015 (string-append perl "/rsem_perl_utils.pm")))
4016 #t))
4017 (add-after
4018 'install 'wrap-program
4019 (lambda* (#:key outputs #:allow-other-keys)
4020 (let ((out (assoc-ref outputs "out")))
4021 (for-each (lambda (prog)
4022 (wrap-program (string-append out "/bin/" prog)
4023 `("PERL5LIB" ":" prefix
4024 (,(string-append out "/lib/perl5/site_perl")))))
4025 '("rsem-plot-transcript-wiggles"
4026 "rsem-calculate-expression"
4027 "rsem-generate-ngvector"
4028 "rsem-run-ebseq"
4029 "rsem-prepare-reference")))
4030 #t)))))
4031 (inputs
4032 `(("boost" ,boost)
4033 ("ncurses" ,ncurses)
4034 ("r" ,r)
4035 ("perl" ,perl)
4036 ("samtools" ,samtools-0.1)
4037 ("zlib" ,zlib)))
4038 (home-page "http://deweylab.biostat.wisc.edu/rsem/")
4039 (synopsis "Estimate gene expression levels from RNA-Seq data")
4040 (description
4041 "RSEM is a software package for estimating gene and isoform expression
4042levels from RNA-Seq data. The RSEM package provides a user-friendly
4043interface, supports threads for parallel computation of the EM algorithm,
4044single-end and paired-end read data, quality scores, variable-length reads and
4045RSPD estimation. In addition, it provides posterior mean and 95% credibility
4046interval estimates for expression levels. For visualization, it can generate
4047BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.")
4048 (license license:gpl3+)))
4049
8622a072
RW
4050(define-public rseqc
4051 (package
4052 (name "rseqc")
4053 (version "2.6.1")
4054 (source
4055 (origin
4056 (method url-fetch)
4057 (uri
4058 (string-append "mirror://sourceforge/rseqc/"
de67e922 4059 "RSeQC-" version ".tar.gz"))
8622a072 4060 (sha256
8214b7fb 4061 (base32 "15ly0254yi032qzkdplg00q144qfdsd986gh62829rl5bkxhj330"))
8622a072
RW
4062 (modules '((guix build utils)))
4063 (snippet
4064 '(begin
4065 ;; remove bundled copy of pysam
4066 (delete-file-recursively "lib/pysam")
4067 (substitute* "setup.py"
4068 ;; remove dependency on outdated "distribute" module
4069 (("^from distribute_setup import use_setuptools") "")
4070 (("^use_setuptools\\(\\)") "")
4071 ;; do not use bundled copy of pysam
4072 (("^have_pysam = False") "have_pysam = True"))))))
4073 (build-system python-build-system)
4074 (arguments `(#:python ,python-2))
4075 (inputs
4076 `(("python-cython" ,python2-cython)
4077 ("python-pysam" ,python2-pysam)
4078 ("python-numpy" ,python2-numpy)
4079 ("python-setuptools" ,python2-setuptools)
4080 ("zlib" ,zlib)))
4081 (native-inputs
4082 `(("python-nose" ,python2-nose)))
4083 (home-page "http://rseqc.sourceforge.net/")
4084 (synopsis "RNA-seq quality control package")
4085 (description
4086 "RSeQC provides a number of modules that can comprehensively evaluate
4087high throughput sequence data, especially RNA-seq data. Some basic modules
4088inspect sequence quality, nucleotide composition bias, PCR bias and GC bias,
4089while RNA-seq specific modules evaluate sequencing saturation, mapped reads
4090distribution, coverage uniformity, strand specificity, etc.")
4091 (license license:gpl3+)))
4092
ec946638
RW
4093(define-public seek
4094 ;; There are no release tarballs. According to the installation
4095 ;; instructions at http://seek.princeton.edu/installation.jsp, the latest
4096 ;; stable release is identified by this changeset ID.
4097 (let ((changeset "2329130")
4098 (revision "1"))
4099 (package
4100 (name "seek")
4101 (version (string-append "0-" revision "." changeset))
4102 (source (origin
4103 (method hg-fetch)
4104 (uri (hg-reference
4105 (url "https://bitbucket.org/libsleipnir/sleipnir")
4106 (changeset changeset)))
4107 (sha256
4108 (base32
4109 "0qrvilwh18dpbhkf92qvxbmay0j75ra3jg2wrhz67gf538zzphsx"))))
4110 (build-system gnu-build-system)
4111 (arguments
4112 `(#:modules ((srfi srfi-1)
4113 (guix build gnu-build-system)
4114 (guix build utils))
4115 #:phases
4116 (let ((dirs '("SeekMiner"
4117 "SeekEvaluator"
4118 "SeekPrep"
4119 "Distancer"
4120 "Data2DB"
4121 "PCL2Bin")))
4122 (modify-phases %standard-phases
4123 (add-before 'configure 'bootstrap
4124 (lambda _
4125 (zero? (system* "bash" "gen_auto"))))
4126 (add-after 'build 'build-additional-tools
4127 (lambda* (#:key make-flags #:allow-other-keys)
4128 (every (lambda (dir)
4129 (with-directory-excursion (string-append "tools/" dir)
4130 (zero? (apply system* "make" make-flags))))
4131 dirs)))
4132 (add-after 'install 'install-additional-tools
4133 (lambda* (#:key make-flags #:allow-other-keys)
4134 (fold (lambda (dir result)
4135 (with-directory-excursion (string-append "tools/" dir)
4136 (and result
4137 (zero? (apply system*
4138 `("make" ,@make-flags "install"))))))
4139 #t dirs)))))))
4140 (inputs
4141 `(("gsl" ,gsl)
4142 ("boost" ,boost)
4143 ("libsvm" ,libsvm)
4144 ("readline" ,readline)
4145 ("gengetopt" ,gengetopt)
4146 ("log4cpp" ,log4cpp)))
4147 (native-inputs
4148 `(("autoconf" ,autoconf)
4149 ("automake" ,automake)
4150 ("perl" ,perl)))
4151 (home-page "http://seek.princeton.edu")
4152 (synopsis "Gene co-expression search engine")
4153 (description
4154 "SEEK is a computational gene co-expression search engine. SEEK provides
4155biologists with a way to navigate the massive human expression compendium that
4156now contains thousands of expression datasets. SEEK returns a robust ranking
4157of co-expressed genes in the biological area of interest defined by the user's
4158query genes. It also prioritizes thousands of expression datasets according
4159to the user's query of interest.")
4160 (license license:cc-by3.0))))
4161
4e10a221
RW
4162(define-public samtools
4163 (package
4164 (name "samtools")
79b555ed 4165 (version "1.3.1")
4e10a221
RW
4166 (source
4167 (origin
4168 (method url-fetch)
4169 (uri
de67e922 4170 (string-append "mirror://sourceforge/samtools/samtools/"
4e10a221
RW
4171 version "/samtools-" version ".tar.bz2"))
4172 (sha256
4173 (base32
79b555ed 4174 "0znnnxc467jbf1as2dpskrjhfh8mbll760j6w6rdkwlwbqsp8gbc"))))
4e10a221
RW
4175 (build-system gnu-build-system)
4176 (arguments
c4473411 4177 `(#:modules ((ice-9 ftw)
5bdda30b
RW
4178 (ice-9 regex)
4179 (guix build gnu-build-system)
4180 (guix build utils))
c4473411
RW
4181 #:make-flags (list (string-append "prefix=" (assoc-ref %outputs "out")))
4182 #:configure-flags (list "--with-ncurses")
4e10a221
RW
4183 #:phases
4184 (alist-cons-after
c4473411
RW
4185 'unpack 'patch-tests
4186 (lambda _
4187 (substitute* "test/test.pl"
4188 ;; The test script calls out to /bin/bash
4189 (("/bin/bash") (which "bash")))
4190 #t)
41dd7126
RW
4191 (alist-cons-after
4192 'install 'install-library
4193 (lambda* (#:key outputs #:allow-other-keys)
4194 (let ((lib (string-append (assoc-ref outputs "out") "/lib")))
96c46210 4195 (install-file "libbam.a" lib)))
5bdda30b
RW
4196 (alist-cons-after
4197 'install 'install-headers
4198 (lambda* (#:key outputs #:allow-other-keys)
4199 (let ((include (string-append (assoc-ref outputs "out")
4200 "/include/samtools/")))
5bdda30b 4201 (for-each (lambda (file)
96c46210 4202 (install-file file include))
5bdda30b
RW
4203 (scandir "." (lambda (name) (string-match "\\.h$" name))))
4204 #t))
c4473411 4205 %standard-phases)))))
4e10a221
RW
4206 (native-inputs `(("pkg-config" ,pkg-config)))
4207 (inputs `(("ncurses" ,ncurses)
4208 ("perl" ,perl)
4209 ("python" ,python)
4210 ("zlib" ,zlib)))
4211 (home-page "http://samtools.sourceforge.net")
4212 (synopsis "Utilities to efficiently manipulate nucleotide sequence alignments")
4213 (description
4214 "Samtools implements various utilities for post-processing nucleotide
4215sequence alignments in the SAM, BAM, and CRAM formats, including indexing,
4216variant calling (in conjunction with bcftools), and a simple alignment
4217viewer.")
4218 (license license:expat)))
d3517eda 4219
0b84a0aa
RW
4220(define-public samtools-0.1
4221 ;; This is the most recent version of the 0.1 line of samtools. The input
4222 ;; and output formats differ greatly from that used and produced by samtools
4223 ;; 1.x and is still used in many bioinformatics pipelines.
4224 (package (inherit samtools)
4225 (version "0.1.19")
4226 (source
4227 (origin
4228 (method url-fetch)
4229 (uri
de67e922 4230 (string-append "mirror://sourceforge/samtools/samtools/"
0b84a0aa
RW
4231 version "/samtools-" version ".tar.bz2"))
4232 (sha256
4233 (base32 "1m33xsfwz0s8qi45lylagfllqg7fphf4dr0780rsvw75av9wk06h"))))
4234 (arguments
2309ed68
RW
4235 `(#:tests? #f ;no "check" target
4236 ,@(substitute-keyword-arguments (package-arguments samtools)
4237 ((#:make-flags flags)
4238 `(cons "LIBCURSES=-lncurses" ,flags))
4239 ((#:phases phases)
4240 `(modify-phases ,phases
4241 (replace 'install
4242 (lambda* (#:key outputs #:allow-other-keys)
4243 (let ((bin (string-append
4244 (assoc-ref outputs "out") "/bin")))
4245 (mkdir-p bin)
4246 (copy-file "samtools"
4247 (string-append bin "/samtools")))))
4248 (delete 'patch-tests)
4249 (delete 'configure))))))))
0b84a0aa 4250
fe4c37c2 4251(define-public mosaik
698bd297 4252 (let ((commit "5c25216d3522d6a33e53875cd76a6d65001e4e67"))
fe4c37c2
RW
4253 (package
4254 (name "mosaik")
4255 (version "2.2.30")
4256 (source (origin
4257 ;; There are no release tarballs nor tags.
4258 (method git-fetch)
4259 (uri (git-reference
4260 (url "https://github.com/wanpinglee/MOSAIK.git")
4261 (commit commit)))
4262 (file-name (string-append name "-" version))
4263 (sha256
4264 (base32
4265 "17gj3s07cm77r41z92awh0bim7w7q7fbn0sf5nkqmcm1vw052qgw"))))
4266 (build-system gnu-build-system)
4267 (arguments
4268 `(#:tests? #f ; no tests
4269 #:make-flags (list "CC=gcc")
4270 #:phases
4271 (modify-phases %standard-phases
4272 (replace 'configure
4273 (lambda _ (chdir "src") #t))
4274 (replace 'install
4275 (lambda* (#:key outputs #:allow-other-keys)
4276 (let ((bin (string-append (assoc-ref outputs "out")
4277 "/bin")))
4278 (mkdir-p bin)
4279 (copy-recursively "../bin" bin)
4280 #t))))))
4281 (inputs
4282 `(("perl" ,perl)
4283 ("zlib" ,zlib)))
029d9f77 4284 (supported-systems '("x86_64-linux"))
fe4c37c2
RW
4285 (home-page "https://code.google.com/p/mosaik-aligner/")
4286 (synopsis "Map nucleotide sequence reads to reference genomes")
4287 (description
4288 "MOSAIK is a program for mapping second and third-generation sequencing
4289reads to a reference genome. MOSAIK can align reads generated by all the
4290major sequencing technologies, including Illumina, Applied Biosystems SOLiD,
4291Roche 454, Ion Torrent and Pacific BioSciences SMRT.")
4292 ;; MOSAIK is released under the GPLv2+ with the exception of third-party
4293 ;; code released into the public domain:
4294 ;; 1. fastlz by Ariya Hidayat - http://www.fastlz.org/
4295 ;; 2. MD5 implementation - RSA Data Security, RFC 1321
4296 (license (list license:gpl2+ license:public-domain)))))
4297
282c5087
RW
4298(define-public ngs-sdk
4299 (package
4300 (name "ngs-sdk")
48b419eb 4301 (version "1.2.5")
282c5087
RW
4302 (source
4303 (origin
4304 (method url-fetch)
4305 (uri
4306 (string-append "https://github.com/ncbi/ngs/archive/"
4307 version ".tar.gz"))
4308 (file-name (string-append name "-" version ".tar.gz"))
4309 (sha256
4310 (base32
48b419eb 4311 "04y1fsmdnb5y86m3gg6f5g9wcscr6r25n7m8mdlcxy0i2q6w6cia"))))
282c5087
RW
4312 (build-system gnu-build-system)
4313 (arguments
4314 `(#:parallel-build? #f ; not supported
4315 #:tests? #f ; no "check" target
4316 #:phases
4317 (alist-replace
4318 'configure
4319 (lambda* (#:key outputs #:allow-other-keys)
4320 (let ((out (assoc-ref outputs "out")))
282c5087
RW
4321 ;; The 'configure' script doesn't recognize things like
4322 ;; '--enable-fast-install'.
4323 (zero? (system* "./configure"
4324 (string-append "--build-prefix=" (getcwd) "/build")
4325 (string-append "--prefix=" out)))))
4326 (alist-cons-after
4327 'unpack 'enter-dir
4328 (lambda _ (chdir "ngs-sdk") #t)
4329 %standard-phases))))
4330 (native-inputs `(("perl" ,perl)))
a0dadf0c
AE
4331 ;; According to the test
4332 ;; unless ($MARCH =~ /x86_64/i || $MARCH =~ /i?86/i)
4333 ;; in ngs-sdk/setup/konfigure.perl
ab29be81 4334 (supported-systems '("i686-linux" "x86_64-linux"))
282c5087
RW
4335 (home-page "https://github.com/ncbi/ngs")
4336 (synopsis "API for accessing Next Generation Sequencing data")
4337 (description
4338 "NGS is a domain-specific API for accessing reads, alignments and pileups
4339produced from Next Generation Sequencing. The API itself is independent from
4340any particular back-end implementation, and supports use of multiple back-ends
4341simultaneously.")
4342 (license license:public-domain)))
4343
1ad15c16 4344(define-public java-ngs
2651a5e6 4345 (package (inherit ngs-sdk)
1ad15c16 4346 (name "java-ngs")
2651a5e6
RW
4347 (arguments
4348 `(,@(substitute-keyword-arguments
4349 `(#:modules ((guix build gnu-build-system)
4350 (guix build utils)
4351 (srfi srfi-1)
4352 (srfi srfi-26))
4353 ,@(package-arguments ngs-sdk))
4354 ((#:phases phases)
614a8977
RW
4355 `(modify-phases ,phases
4356 (replace 'enter-dir (lambda _ (chdir "ngs-java") #t)))))))
2651a5e6 4357 (inputs
d2540f80 4358 `(("jdk" ,icedtea "jdk")
2651a5e6
RW
4359 ("ngs-sdk" ,ngs-sdk)))
4360 (synopsis "Java bindings for NGS SDK")))
4361
75dd2424
RW
4362(define-public ncbi-vdb
4363 (package
4364 (name "ncbi-vdb")
40974c93 4365 (version "2.7.0")
75dd2424
RW
4366 (source
4367 (origin
4368 (method url-fetch)
4369 (uri
4370 (string-append "https://github.com/ncbi/ncbi-vdb/archive/"
4371 version ".tar.gz"))
4372 (file-name (string-append name "-" version ".tar.gz"))
4373 (sha256
4374 (base32
40974c93 4375 "0x1cg1x8vy0yjlkp0snc1533zcjhxqzqsaiwqk598n7vvw37n8lf"))))
75dd2424
RW
4376 (build-system gnu-build-system)
4377 (arguments
4378 `(#:parallel-build? #f ; not supported
4379 #:tests? #f ; no "check" target
4380 #:phases
4381 (alist-replace
4382 'configure
4383 (lambda* (#:key inputs outputs #:allow-other-keys)
4384 (let ((out (assoc-ref outputs "out")))
75dd2424
RW
4385 ;; Override include path for libmagic
4386 (substitute* "setup/package.prl"
4387 (("name => 'magic', Include => '/usr/include'")
4388 (string-append "name=> 'magic', Include => '"
4389 (assoc-ref inputs "libmagic")
4390 "/include" "'")))
4391
4392 ;; Install kdf5 library (needed by sra-tools)
4393 (substitute* "build/Makefile.install"
4394 (("LIBRARIES_TO_INSTALL =")
4395 "LIBRARIES_TO_INSTALL = kdf5.$(VERSION_LIBX) kdf5.$(VERSION_SHLX)"))
4396
675d7ae2
RW
4397 (substitute* "build/Makefile.env"
4398 (("CFLAGS =" prefix)
4399 (string-append prefix "-msse2 ")))
4400
75dd2424
RW
4401 ;; The 'configure' script doesn't recognize things like
4402 ;; '--enable-fast-install'.
4403 (zero? (system*
4404 "./configure"
4405 (string-append "--build-prefix=" (getcwd) "/build")
4406 (string-append "--prefix=" (assoc-ref outputs "out"))
4407 (string-append "--debug")
4408 (string-append "--with-xml2-prefix="
4409 (assoc-ref inputs "libxml2"))
4410 (string-append "--with-ngs-sdk-prefix="
4411 (assoc-ref inputs "ngs-sdk"))
4412 (string-append "--with-ngs-java-prefix="
1ad15c16 4413 (assoc-ref inputs "java-ngs"))
75dd2424
RW
4414 (string-append "--with-hdf5-prefix="
4415 (assoc-ref inputs "hdf5"))))))
4416 (alist-cons-after
4417 'install 'install-interfaces
132b4c8c
RW
4418 (lambda* (#:key outputs #:allow-other-keys)
4419 ;; Install interface libraries. On i686 the interface libraries
4420 ;; are installed to "linux/gcc/i386", so we need to use the Linux
4421 ;; architecture name ("i386") instead of the target system prefix
4422 ;; ("i686").
75dd2424
RW
4423 (mkdir (string-append (assoc-ref outputs "out") "/ilib"))
4424 (copy-recursively (string-append "build/ncbi-vdb/linux/gcc/"
132b4c8c
RW
4425 ,(system->linux-architecture
4426 (or (%current-target-system)
4427 (%current-system)))
75dd2424
RW
4428 "/rel/ilib")
4429 (string-append (assoc-ref outputs "out")
4430 "/ilib"))
4431 ;; Install interface headers
4432 (copy-recursively "interfaces"
4433 (string-append (assoc-ref outputs "out")
4434 "/include")))
4435 %standard-phases))))
4436 (inputs
4437 `(("libxml2" ,libxml2)
4438 ("ngs-sdk" ,ngs-sdk)
1ad15c16 4439 ("java-ngs" ,java-ngs)
75dd2424
RW
4440 ("libmagic" ,file)
4441 ("hdf5" ,hdf5)))
4442 (native-inputs `(("perl" ,perl)))
675d7ae2
RW
4443 ;; NCBI-VDB requires SSE capability.
4444 (supported-systems '("i686-linux" "x86_64-linux"))
75dd2424
RW
4445 (home-page "https://github.com/ncbi/ncbi-vdb")
4446 (synopsis "Database engine for genetic information")
4447 (description
4448 "The NCBI-VDB library implements a highly compressed columnar data
4449warehousing engine that is most often used to store genetic information.
4450Databases are stored in a portable image within the file system, and can be
4451accessed/downloaded on demand across HTTP.")
4452 (license license:public-domain)))
4453
cc6ed477
RW
4454(define-public plink
4455 (package
4456 (name "plink")
4457 (version "1.07")
4458 (source
4459 (origin
4460 (method url-fetch)
4461 (uri (string-append
4462 "http://pngu.mgh.harvard.edu/~purcell/plink/dist/plink-"
4463 version "-src.zip"))
4464 (sha256
4465 (base32 "0as8gxm4pjyc8dxmm1sl873rrd7wn5qs0l29nqfnl31x8i467xaa"))
0dbb7ac2
EF
4466 (patches (search-patches "plink-1.07-unclobber-i.patch"
4467 "plink-endian-detection.patch"))))
cc6ed477
RW
4468 (build-system gnu-build-system)
4469 (arguments
4470 '(#:tests? #f ;no "check" target
4471 #:make-flags (list (string-append "LIB_LAPACK="
4472 (assoc-ref %build-inputs "lapack")
4473 "/lib/liblapack.so")
4474 "WITH_LAPACK=1"
4475 "FORCE_DYNAMIC=1"
4476 ;; disable phoning home
4477 "WITH_WEBCHECK=")
4478 #:phases
4479 (modify-phases %standard-phases
4480 ;; no "configure" script
4481 (delete 'configure)
4482 (replace 'install
4483 (lambda* (#:key outputs #:allow-other-keys)
4484 (let ((bin (string-append (assoc-ref outputs "out")
4485 "/bin/")))
96c46210 4486 (install-file "plink" bin)
cc6ed477
RW
4487 #t))))))
4488 (inputs
4489 `(("zlib" ,zlib)
4490 ("lapack" ,lapack)))
4491 (native-inputs
4492 `(("unzip" ,unzip)))
4493 (home-page "http://pngu.mgh.harvard.edu/~purcell/plink/")
4494 (synopsis "Whole genome association analysis toolset")
4495 (description
4496 "PLINK is a whole genome association analysis toolset, designed to
4497perform a range of basic, large-scale analyses in a computationally efficient
4498manner. The focus of PLINK is purely on analysis of genotype/phenotype data,
4499so there is no support for steps prior to this (e.g. study design and
4500planning, generating genotype or CNV calls from raw data). Through
4501integration with gPLINK and Haploview, there is some support for the
4502subsequent visualization, annotation and storage of results.")
4503 ;; Code is released under GPLv2, except for fisher.h, which is under
4504 ;; LGPLv2.1+
4505 (license (list license:gpl2 license:lgpl2.1+))))
4506
c6a24d6e
RW
4507(define-public smithlab-cpp
4508 (let ((revision "1")
698bd297 4509 (commit "728a097bec88c6f4b8528b685932049e660eff2e"))
c6a24d6e
RW
4510 (package
4511 (name "smithlab-cpp")
698bd297 4512 (version (string-append "0." revision "." (string-take commit 7)))
c6a24d6e
RW
4513 (source (origin
4514 (method git-fetch)
4515 (uri (git-reference
4516 (url "https://github.com/smithlabcode/smithlab_cpp.git")
4517 (commit commit)))
4518 (file-name (string-append name "-" version "-checkout"))
4519 (sha256
4520 (base32
4521 "0d476lmj312xk77kr9fzrv7z1bv96yfyx0w7y62ycmnfbx32ll74"))))
4522 (build-system gnu-build-system)
4523 (arguments
4524 `(#:modules ((guix build gnu-build-system)
4525 (guix build utils)
4526 (srfi srfi-26))
4527 #:tests? #f ;no "check" target
4528 #:phases
4529 (modify-phases %standard-phases
4530 (add-after 'unpack 'use-samtools-headers
4531 (lambda _
4532 (substitute* '("SAM.cpp"
4533 "SAM.hpp")
4534 (("sam.h") "samtools/sam.h"))
4535 #t))
4536 (replace 'install
4537 (lambda* (#:key outputs #:allow-other-keys)
4538 (let* ((out (assoc-ref outputs "out"))
4539 (lib (string-append out "/lib"))
4540 (include (string-append out "/include/smithlab-cpp")))
4541 (mkdir-p lib)
4542 (mkdir-p include)
4543 (for-each (cut install-file <> lib)
4544 (find-files "." "\\.o$"))
4545 (for-each (cut install-file <> include)
4546 (find-files "." "\\.hpp$")))
4547 #t))
4548 (delete 'configure))))
4549 (inputs
4550 `(("samtools" ,samtools-0.1)
4551 ("zlib" ,zlib)))
4552 (home-page "https://github.com/smithlabcode/smithlab_cpp")
4553 (synopsis "C++ helper library for functions used in Smith lab projects")
4554 (description
4555 "Smithlab CPP is a C++ library that includes functions used in many of
4556the Smith lab bioinformatics projects, such as a wrapper around Samtools data
4557structures, classes for genomic regions, mapped sequencing reads, etc.")
4558 (license license:gpl3+))))
4559
56e373ef
RW
4560(define-public preseq
4561 (package
4562 (name "preseq")
b49c5a58 4563 (version "2.0")
56e373ef
RW
4564 (source (origin
4565 (method url-fetch)
b49c5a58
RW
4566 (uri (string-append "https://github.com/smithlabcode/"
4567 "preseq/archive/v" version ".tar.gz"))
4568 (file-name (string-append name "-" version ".tar.gz"))
56e373ef 4569 (sha256
b49c5a58 4570 (base32 "08r684l50pnxjpvmhzjgqq56yv9rfw90k8vx0nsrnrzk8mf9hsdq"))
56e373ef
RW
4571 (modules '((guix build utils)))
4572 (snippet
4573 ;; Remove bundled samtools.
b49c5a58 4574 '(delete-file-recursively "samtools"))))
56e373ef
RW
4575 (build-system gnu-build-system)
4576 (arguments
4577 `(#:tests? #f ;no "check" target
4578 #:phases
4579 (modify-phases %standard-phases
56e373ef 4580 (delete 'configure))
b49c5a58
RW
4581 #:make-flags
4582 (list (string-append "PREFIX="
4583 (assoc-ref %outputs "out"))
4584 (string-append "LIBBAM="
4585 (assoc-ref %build-inputs "samtools")
4586 "/lib/libbam.a")
4587 (string-append "SMITHLAB_CPP="
4588 (assoc-ref %build-inputs "smithlab-cpp")
4589 "/lib")
4590 "PROGS=preseq"
4591 "INCLUDEDIRS=$(SMITHLAB_CPP)/../include/smithlab-cpp $(SAMTOOLS_DIR)")))
56e373ef
RW
4592 (inputs
4593 `(("gsl" ,gsl)
4594 ("samtools" ,samtools-0.1)
b49c5a58 4595 ("smithlab-cpp" ,smithlab-cpp)
56e373ef
RW
4596 ("zlib" ,zlib)))
4597 (home-page "http://smithlabresearch.org/software/preseq/")
4598 (synopsis "Program for analyzing library complexity")
4599 (description
4600 "The preseq package is aimed at predicting and estimating the complexity
4601of a genomic sequencing library, equivalent to predicting and estimating the
4602number of redundant reads from a given sequencing depth and how many will be
4603expected from additional sequencing using an initial sequencing experiment.
4604The estimates can then be used to examine the utility of further sequencing,
4605optimize the sequencing depth, or to screen multiple libraries to avoid low
4606complexity samples.")
4607 (license license:gpl3+)))
4608
9ded1457
BW
4609(define-public python-screed
4610 (package
4611 (name "python-screed")
4612 (version "0.9")
4613 (source
4614 (origin
4615 (method url-fetch)
4616 (uri (pypi-uri "screed" version))
4617 (sha256
4618 (base32
4619 "18czszp9fkx3j6jr7y5kp6dfialscgddk05mw1zkhh2zhn0jd8i0"))))
4620 (build-system python-build-system)
4621 (arguments
4622 `(#:phases
4623 (modify-phases %standard-phases
4624 (replace 'check
4625 (lambda _
4626 (setenv "PYTHONPATH"
4627 (string-append (getenv "PYTHONPATH") ":."))
4628 (zero? (system* "nosetests" "--attr" "!known_failing")))))))
4629 (native-inputs
4630 `(("python-nose" ,python-nose)))
4631 (inputs
4632 `(("python-bz2file" ,python-bz2file)))
4633 (home-page "http://github.com/dib-lab/screed/")
4634 (synopsis "Short read sequence database utilities")
4635 (description "Screed parses FASTA and FASTQ files and generates databases.
4636Values such as sequence name, sequence description, sequence quality and the
4637sequence itself can be retrieved from these databases.")
4638 (license license:bsd-3)))
4639
4640(define-public python2-screed
4641 (let ((base (package-with-python2 (strip-python2-variant python-screed))))
4642 (package
4643 (inherit base)
4644 (native-inputs `(("python2-setuptools" ,python2-setuptools)
4645 ,@(package-native-inputs base))))))
4646
51c64999
RW
4647(define-public sra-tools
4648 (package
4649 (name "sra-tools")
646a8433 4650 (version "2.7.0")
51c64999
RW
4651 (source
4652 (origin
4653 (method url-fetch)
4654 (uri
4655 (string-append "https://github.com/ncbi/sra-tools/archive/"
4656 version ".tar.gz"))
4657 (file-name (string-append name "-" version ".tar.gz"))
4658 (sha256
4659 (base32
646a8433 4660 "13paw7bq6y47d2pl0ac5gpgcqp1xsy1g7v1fwysm3hr8lb2dck17"))))
51c64999
RW
4661 (build-system gnu-build-system)
4662 (arguments
4663 `(#:parallel-build? #f ; not supported
4664 #:tests? #f ; no "check" target
2320e76b
RW
4665 #:make-flags
4666 (list (string-append "VDB_LIBDIR="
4667 (assoc-ref %build-inputs "ncbi-vdb")
4668 ,(if (string-prefix? "x86_64"
4669 (or (%current-target-system)
4670 (%current-system)))
4671 "/lib64"
4672 "/lib32")))
51c64999
RW
4673 #:phases
4674 (alist-replace
4675 'configure
4676 (lambda* (#:key inputs outputs #:allow-other-keys)
4677 ;; The build system expects a directory containing the sources and
4678 ;; raw build output of ncbi-vdb, including files that are not
4679 ;; installed. Since we are building against an installed version of
4680 ;; ncbi-vdb, the following modifications are needed.
4681 (substitute* "setup/konfigure.perl"
4682 ;; Make the configure script look for the "ilib" directory of
4683 ;; "ncbi-vdb" without first checking for the existence of a
4684 ;; matching library in its "lib" directory.
4685 (("^ my \\$f = File::Spec->catdir\\(\\$libdir, \\$lib\\);")
4686 "my $f = File::Spec->catdir($ilibdir, $ilib);")
4687 ;; Look for interface libraries in ncbi-vdb's "ilib" directory.
4688 (("my \\$ilibdir = File::Spec->catdir\\(\\$builddir, 'ilib'\\);")
4689 "my $ilibdir = File::Spec->catdir($dir, 'ilib');"))
4690
2320e76b
RW
4691 ;; Dynamic linking
4692 (substitute* "tools/copycat/Makefile"
4693 (("smagic-static") "lmagic"))
4694
51c64999
RW
4695 ;; The 'configure' script doesn't recognize things like
4696 ;; '--enable-fast-install'.
4697 (zero? (system*
4698 "./configure"
4699 (string-append "--build-prefix=" (getcwd) "/build")
4700 (string-append "--prefix=" (assoc-ref outputs "out"))
4701 (string-append "--debug")
4702 (string-append "--with-fuse-prefix="
4703 (assoc-ref inputs "fuse"))
4704 (string-append "--with-magic-prefix="
4705 (assoc-ref inputs "libmagic"))
4706 ;; TODO: building with libxml2 fails with linker errors
4707 ;; (string-append "--with-xml2-prefix="
4708 ;; (assoc-ref inputs "libxml2"))
4709 (string-append "--with-ncbi-vdb-sources="
4710 (assoc-ref inputs "ncbi-vdb"))
4711 (string-append "--with-ncbi-vdb-build="
4712 (assoc-ref inputs "ncbi-vdb"))
4713 (string-append "--with-ngs-sdk-prefix="
4714 (assoc-ref inputs "ngs-sdk"))
4715 (string-append "--with-hdf5-prefix="
4716 (assoc-ref inputs "hdf5")))))
4717 %standard-phases)))
4718 (native-inputs `(("perl" ,perl)))
4719 (inputs
4720 `(("ngs-sdk" ,ngs-sdk)
4721 ("ncbi-vdb" ,ncbi-vdb)
4722 ("libmagic" ,file)
4723 ("fuse" ,fuse)
4724 ("hdf5" ,hdf5)
4725 ("zlib" ,zlib)))
4726 (home-page "http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software")
4727 (synopsis "Tools and libraries for reading and writing sequencing data")
4728 (description
4729 "The SRA Toolkit from NCBI is a collection of tools and libraries for
4730reading of sequencing files from the Sequence Read Archive (SRA) database and
4731writing files into the .sra format.")
4732 (license license:public-domain)))
4733
d3517eda
RW
4734(define-public seqan
4735 (package
4736 (name "seqan")
4737 (version "1.4.2")
4738 (source (origin
4739 (method url-fetch)
4740 (uri (string-append "http://packages.seqan.de/seqan-library/"
4741 "seqan-library-" version ".tar.bz2"))
4742 (sha256
4743 (base32
4744 "05s3wrrwn50f81aklfm65i4a749zag1vr8z03k21xm0pdxy47yvp"))))
4745 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
4746 ;; makes sense to split the outputs.
4747 (outputs '("out" "doc"))
4748 (build-system trivial-build-system)
4749 (arguments
4750 `(#:modules ((guix build utils))
4751 #:builder
4752 (begin
4753 (use-modules (guix build utils))
4754 (let ((tar (assoc-ref %build-inputs "tar"))
4755 (bzip (assoc-ref %build-inputs "bzip2"))
4756 (out (assoc-ref %outputs "out"))
4757 (doc (assoc-ref %outputs "doc")))
4758 (setenv "PATH" (string-append tar "/bin:" bzip "/bin"))
4759 (system* "tar" "xvf" (assoc-ref %build-inputs "source"))
4760 (chdir (string-append "seqan-library-" ,version))
4761 (copy-recursively "include" (string-append out "/include"))
4762 (copy-recursively "share" (string-append doc "/share"))))))
4763 (native-inputs
4764 `(("source" ,source)
4765 ("tar" ,tar)
4766 ("bzip2" ,bzip2)))
4767 (home-page "http://www.seqan.de")
4768 (synopsis "Library for nucleotide sequence analysis")
4769 (description
4770 "SeqAn is a C++ library of efficient algorithms and data structures for
4771the analysis of sequences with the focus on biological data. It contains
4772algorithms and data structures for string representation and their
4773manipulation, online and indexed string search, efficient I/O of
4774bioinformatics file formats, sequence alignment, and more.")
4775 (license license:bsd-3)))
ce7155d5 4776
d708b7a9
BW
4777(define-public seqmagick
4778 (package
4779 (name "seqmagick")
4780 (version "0.6.1")
4781 (source
4782 (origin
4783 (method url-fetch)
4784 (uri (string-append
4785 "https://pypi.python.org/packages/source/s/seqmagick/seqmagick-"
4786 version ".tar.gz"))
4787 (sha256
4788 (base32
4789 "0cgn477n74gsl4qdaakrrhi953kcsd4q3ivk2lr18x74s3g4ma1d"))))
4790 (build-system python-build-system)
4791 (arguments
4792 ;; python2 only, see https://github.com/fhcrc/seqmagick/issues/56
4793 `(#:python ,python-2
4794 #:phases
4795 (modify-phases %standard-phases
4796 ;; Current test in setup.py does not work as of 0.6.1,
4797 ;; so use nose to run tests instead for now. See
4798 ;; https://github.com/fhcrc/seqmagick/issues/55
4799 (replace 'check (lambda _ (zero? (system* "nosetests")))))))
4800 (inputs
7bba95b7
BW
4801 ;; biopython-1.66 is required due to
4802 ;; https://github.com/fhcrc/seqmagick/issues/59
4803 ;; When that issue is resolved the 'python2-biopython-1.66' package
4804 ;; should be removed.
4805 `(("python-biopython" ,python2-biopython-1.66)))
d708b7a9
BW
4806 (native-inputs
4807 `(("python-setuptools" ,python2-setuptools)
4808 ("python-nose" ,python2-nose)))
4809 (home-page "http://github.com/fhcrc/seqmagick")
4810 (synopsis "Tools for converting and modifying sequence files")
4811 (description
4812 "Bioinformaticians often have to convert sequence files between formats
4813and do little manipulations on them, and it's not worth writing scripts for
4814that. Seqmagick is a utility to expose the file format conversion in
4815BioPython in a convenient way. Instead of having a big mess of scripts, there
4816is one that takes arguments.")
4817 (license license:gpl3)))
4818
66daf78c
BW
4819(define-public seqtk
4820 (package
4821 (name "seqtk")
4822 (version "1.2")
4823 (source (origin
4824 (method url-fetch)
4825 (uri (string-append
4826 "https://github.com/lh3/seqtk/archive/v"
4827 version ".tar.gz"))
4828 (file-name (string-append name "-" version ".tar.gz"))
4829 (sha256
4830 (base32
4831 "0ywdyzpmfiz2wp6ampbzqg4y8bj450nfgqarpamg045b8mk32lxx"))
4832 (modules '((guix build utils)))
4833 (snippet
4834 '(begin
4835 ;; Remove extraneous header files, as is done in the seqtk
4836 ;; master branch.
4837 (for-each (lambda (file) (delete-file file))
4838 (list "ksort.h" "kstring.h" "kvec.h"))
4839 #t))))
4840 (build-system gnu-build-system)
4841 (arguments
4842 `(#:phases
4843 (modify-phases %standard-phases
4844 (delete 'configure)
4845 (replace 'check
4846 ;; There are no tests, so we just run a sanity check.
4847 (lambda _ (zero? (system* "./seqtk" "seq"))))
4848 (replace 'install
4849 (lambda* (#:key outputs #:allow-other-keys)
4850 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
4851 (install-file "seqtk" bin)))))))
4852 (inputs
4853 `(("zlib" ,zlib)))
4854 (home-page "https://github.com/lh3/seqtk")
4855 (synopsis "Toolkit for processing biological sequences in FASTA/Q format")
4856 (description
4857 "Seqtk is a fast and lightweight tool for processing sequences in the
4858FASTA or FASTQ format. It parses both FASTA and FASTQ files which can be
4859optionally compressed by gzip.")
4860 (license license:expat)))
4861
5f7e17be
BW
4862(define-public snap-aligner
4863 (package
4864 (name "snap-aligner")
4865 (version "1.0beta.18")
4866 (source (origin
4867 (method url-fetch)
4868 (uri (string-append
4869 "https://github.com/amplab/snap/archive/v"
4870 version ".tar.gz"))
4871 (file-name (string-append name "-" version ".tar.gz"))
4872 (sha256
4873 (base32
4874 "1vnsjwv007k1fl1q7d681kbwn6bc66cgw6h16hym6gvyy71qv2ly"))))
4875 (build-system gnu-build-system)
4876 (arguments
4877 '(#:phases
4878 (modify-phases %standard-phases
4879 (delete 'configure)
4880 (replace 'check (lambda _ (zero? (system* "./unit_tests"))))
4881 (replace 'install
4882 (lambda* (#:key outputs #:allow-other-keys)
4883 (let* ((out (assoc-ref outputs "out"))
4884 (bin (string-append out "/bin")))
5f7e17be
BW
4885 (install-file "snap-aligner" bin)
4886 (install-file "SNAPCommand" bin)
4887 #t))))))
4888 (native-inputs
4889 `(("zlib" ,zlib)))
4890 (home-page "http://snap.cs.berkeley.edu/")
4891 (synopsis "Short read DNA sequence aligner")
4892 (description
4893 "SNAP is a fast and accurate aligner for short DNA reads. It is
4894optimized for modern read lengths of 100 bases or higher, and takes advantage
4895of these reads to align data quickly through a hash-based indexing scheme.")
3e6fdd5f
EF
4896 ;; 32-bit systems are not supported by the unpatched code.
4897 ;; Following the bug reports https://github.com/amplab/snap/issues/68 and
4898 ;; https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=812378 we see that
4899 ;; systems without a lot of memory cannot make good use of this program.
4900 (supported-systems '("x86_64-linux"))
5f7e17be
BW
4901 (license license:asl2.0)))
4902
bcadaf00
BW
4903(define-public sortmerna
4904 (package
4905 (name "sortmerna")
849485f5 4906 (version "2.1b")
bcadaf00
BW
4907 (source
4908 (origin
4909 (method url-fetch)
4910 (uri (string-append
4911 "https://github.com/biocore/sortmerna/archive/"
4912 version ".tar.gz"))
4913 (file-name (string-append name "-" version ".tar.gz"))
4914 (sha256
4915 (base32
849485f5 4916 "1ghaghvd82af9j5adavxh77g7hm247d1r69m3fbi6f1jdivj5ldk"))))
bcadaf00
BW
4917 (build-system gnu-build-system)
4918 (outputs '("out" ;for binaries
4919 "db")) ;for sequence databases
4920 (arguments
4921 `(#:phases
4922 (modify-phases %standard-phases
4923 (replace 'install
4924 (lambda* (#:key outputs #:allow-other-keys)
4925 (let* ((out (assoc-ref outputs "out"))
4926 (bin (string-append out "/bin"))
4927 (db (assoc-ref outputs "db"))
4928 (share
4929 (string-append db "/share/sortmerna/rRNA_databases")))
4930 (install-file "sortmerna" bin)
4931 (install-file "indexdb_rna" bin)
4932 (for-each (lambda (file)
4933 (install-file file share))
4934 (find-files "rRNA_databases" ".*fasta"))
4935 #t))))))
849485f5
BW
4936 (inputs
4937 `(("zlib" ,zlib)))
bcadaf00
BW
4938 (home-page "http://bioinfo.lifl.fr/RNA/sortmerna")
4939 (synopsis "Biological sequence analysis tool for NGS reads")
4940 (description
4941 "SortMeRNA is a biological sequence analysis tool for filtering, mapping
4942and operational taxonomic unit (OTU) picking of next generation
4943sequencing (NGS) reads. The core algorithm is based on approximate seeds and
4944allows for fast and sensitive analyses of nucleotide sequences. The main
4945application of SortMeRNA is filtering rRNA from metatranscriptomic data.")
4946 (license license:lgpl3)))
4947
ce7155d5
RW
4948(define-public star
4949 (package
4950 (name "star")
3bce0f14 4951 (version "2.5.2a")
ce7155d5
RW
4952 (source (origin
4953 (method url-fetch)
3062d750
RW
4954 (uri (string-append "https://github.com/alexdobin/STAR/archive/"
4955 version ".tar.gz"))
4956 (file-name (string-append name "-" version ".tar.gz"))
ce7155d5
RW
4957 (sha256
4958 (base32
3bce0f14 4959 "0xjlsm4p9flln111hv4xx7xy94c2nl53zvdvbk9winmiradjsdra"))
ce7155d5
RW
4960 (modules '((guix build utils)))
4961 (snippet
3062d750
RW
4962 '(begin
4963 (substitute* "source/Makefile"
4964 (("/bin/rm") "rm"))
4965 ;; Remove pre-built binaries and bundled htslib sources.
4966 (delete-file-recursively "bin/MacOSX_x86_64")
4967 (delete-file-recursively "bin/Linux_x86_64")
4968 (delete-file-recursively "source/htslib")
4969 #t))))
ce7155d5
RW
4970 (build-system gnu-build-system)
4971 (arguments
4972 '(#:tests? #f ;no check target
4973 #:make-flags '("STAR")
4974 #:phases
c0266e8d
RW
4975 (modify-phases %standard-phases
4976 (add-after 'unpack 'enter-source-dir
4977 (lambda _ (chdir "source") #t))
3062d750
RW
4978 (add-after 'enter-source-dir 'do-not-use-bundled-htslib
4979 (lambda _
4980 (substitute* "Makefile"
4981 (("(Depend.list: \\$\\(SOURCES\\) parametersDefault\\.xxd) htslib"
4982 _ prefix) prefix))
4983 (substitute* '("BAMfunctions.cpp"
4984 "signalFromBAM.h"
4985 "bam_cat.h"
4986 "bam_cat.c"
4987 "STAR.cpp"
4988 "bamRemoveDuplicates.cpp")
4989 (("#include \"htslib/([^\"]+\\.h)\"" _ header)
4990 (string-append "#include <" header ">")))
4991 (substitute* "IncludeDefine.h"
4992 (("\"htslib/(htslib/[^\"]+.h)\"" _ header)
4993 (string-append "<" header ">")))
4994 #t))
c0266e8d
RW
4995 (replace 'install
4996 (lambda* (#:key outputs #:allow-other-keys)
4997 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
4998 (install-file "STAR" bin))
4999 #t))
5000 (delete 'configure))))
ce7155d5
RW
5001 (native-inputs
5002 `(("vim" ,vim))) ; for xxd
5003 (inputs
3062d750
RW
5004 `(("htslib" ,htslib)
5005 ("zlib" ,zlib)))
ce7155d5
RW
5006 (home-page "https://github.com/alexdobin/STAR")
5007 (synopsis "Universal RNA-seq aligner")
5008 (description
5009 "The Spliced Transcripts Alignment to a Reference (STAR) software is
5010based on a previously undescribed RNA-seq alignment algorithm that uses
5011sequential maximum mappable seed search in uncompressed suffix arrays followed
5012by seed clustering and stitching procedure. In addition to unbiased de novo
5013detection of canonical junctions, STAR can discover non-canonical splices and
5014chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
5015sequences.")
c11f79a4
BW
5016 ;; Only 64-bit systems are supported according to the README.
5017 (supported-systems '("x86_64-linux" "mips64el-linux"))
ce7155d5
RW
5018 ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
5019 (license license:gpl3+)))
de07c0db 5020
dbf4ed7c
RW
5021(define-public subread
5022 (package
5023 (name "subread")
c180533b 5024 (version "1.5.1")
dbf4ed7c
RW
5025 (source (origin
5026 (method url-fetch)
de67e922
LF
5027 (uri (string-append "mirror://sourceforge/subread/subread-"
5028 version "/subread-" version "-source.tar.gz"))
dbf4ed7c
RW
5029 (sha256
5030 (base32
c180533b 5031 "0gn5zhbvllks0mmdg3qlmsbg91p2mpdc2wixwfqpi85yzfrh8hcy"))))
dbf4ed7c
RW
5032 (build-system gnu-build-system)
5033 (arguments
5034 `(#:tests? #f ;no "check" target
104c1986
RW
5035 ;; The CC and CCFLAGS variables are set to contain a lot of x86_64
5036 ;; optimizations by default, so we override these flags such that x86_64
5037 ;; flags are only added when the build target is an x86_64 system.
5038 #:make-flags
5039 (list (let ((system ,(or (%current-target-system)
5040 (%current-system)))
5041 (flags '("-ggdb" "-fomit-frame-pointer"
5042 "-ffast-math" "-funroll-loops"
5043 "-fmessage-length=0"
5044 "-O9" "-Wall" "-DMAKE_FOR_EXON"
5045 "-DMAKE_STANDALONE"
5046 "-DSUBREAD_VERSION=\\\"${SUBREAD_VERSION}\\\""))
5047 (flags64 '("-mmmx" "-msse" "-msse2" "-msse3")))
5048 (if (string-prefix? "x86_64" system)
5049 (string-append "CCFLAGS=" (string-join (append flags flags64)))
5050 (string-append "CCFLAGS=" (string-join flags))))
5051 "-f" "Makefile.Linux"
5052 "CC=gcc ${CCFLAGS}")
dbf4ed7c
RW
5053 #:phases
5054 (alist-cons-after
5055 'unpack 'enter-dir
5056 (lambda _ (chdir "src") #t)
5057 (alist-replace
5058 'install
5059 (lambda* (#:key outputs #:allow-other-keys)
5060 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
5061 (mkdir-p bin)
5062 (copy-recursively "../bin" bin)))
5063 ;; no "configure" script
5064 (alist-delete 'configure %standard-phases)))))
5065 (inputs `(("zlib" ,zlib)))
5066 (home-page "http://bioinf.wehi.edu.au/subread-package/")
5067 (synopsis "Tool kit for processing next-gen sequencing data")
5068 (description
5069 "The subread package contains the following tools: subread aligner, a
5070general-purpose read aligner; subjunc aligner: detecting exon-exon junctions
5071and mapping RNA-seq reads; featureCounts: counting mapped reads for genomic
5072features; exactSNP: a SNP caller that discovers SNPs by testing signals
5073against local background noises.")
5074 (license license:gpl3+)))
5075
d15d981e
RW
5076(define-public stringtie
5077 (package
5078 (name "stringtie")
5079 (version "1.2.1")
5080 (source (origin
5081 (method url-fetch)
5082 (uri (string-append "http://ccb.jhu.edu/software/stringtie/dl/"
5083 "stringtie-" version ".tar.gz"))
5084 (sha256
5085 (base32
5086 "1cqllsc1maq4kh92isi8yadgzbmnf042hlnalpk3y59aph1z3bfz"))
5087 (modules '((guix build utils)))
5088 (snippet
5089 '(begin
5090 (delete-file-recursively "samtools-0.1.18")
5091 #t))))
5092 (build-system gnu-build-system)
5093 (arguments
5094 `(#:tests? #f ;no test suite
5095 #:phases
5096 (modify-phases %standard-phases
5097 ;; no configure script
5098 (delete 'configure)
5099 (add-before 'build 'use-system-samtools
5100 (lambda _
5101 (substitute* "Makefile"
5102 (("stringtie: \\$\\{BAM\\}/libbam\\.a")
5103 "stringtie: "))
5104 (substitute* '("gclib/GBam.h"
5105 "gclib/GBam.cpp")
5106 (("#include \"(bam|sam|kstring).h\"" _ header)
5107 (string-append "#include <samtools/" header ".h>")))
5108 #t))
5109 (replace 'install
5110 (lambda* (#:key outputs #:allow-other-keys)
5111 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
5112 (install-file "stringtie" bin)
5113 #t))))))
5114 (inputs
5115 `(("samtools" ,samtools-0.1)
5116 ("zlib" ,zlib)))
5117 (home-page "http://ccb.jhu.edu/software/stringtie/")
5118 (synopsis "Transcript assembly and quantification for RNA-Seq data")
5119 (description
5120 "StringTie is a fast and efficient assembler of RNA-Seq sequence
5121alignments into potential transcripts. It uses a novel network flow algorithm
5122as well as an optional de novo assembly step to assemble and quantitate
5123full-length transcripts representing multiple splice variants for each gene
5124locus. Its input can include not only the alignments of raw reads used by
5125other transcript assemblers, but also alignments of longer sequences that have
5126been assembled from those reads. To identify differentially expressed genes
5127between experiments, StringTie's output can be processed either by the
5128Cuffdiff or Ballgown programs.")
5129 (license license:artistic2.0)))
5130
de07c0db
RW
5131(define-public vcftools
5132 (package
5133 (name "vcftools")
9b36e256 5134 (version "0.1.14")
de07c0db
RW
5135 (source (origin
5136 (method url-fetch)
5137 (uri (string-append
9b36e256
RJ
5138 "https://github.com/vcftools/vcftools/releases/download/v"
5139 version "/vcftools-" version ".tar.gz"))
de07c0db
RW
5140 (sha256
5141 (base32
9b36e256 5142 "10l5c07z9p4i9pr4gl54b2c9h6ndhqlbq1rashg2zcgwkbfrkmvn"))))
de07c0db
RW
5143 (build-system gnu-build-system)
5144 (arguments
5145 `(#:tests? #f ; no "check" target
5146 #:make-flags (list
7c3958e1 5147 "CFLAGS=-O2" ; override "-m64" flag
de07c0db
RW
5148 (string-append "PREFIX=" (assoc-ref %outputs "out"))
5149 (string-append "MANDIR=" (assoc-ref %outputs "out")
9b36e256
RJ
5150 "/share/man/man1"))))
5151 (native-inputs
5152 `(("pkg-config" ,pkg-config)))
de07c0db
RW
5153 (inputs
5154 `(("perl" ,perl)
5155 ("zlib" ,zlib)))
9b36e256 5156 (home-page "https://vcftools.github.io/")
de07c0db
RW
5157 (synopsis "Tools for working with VCF files")
5158 (description
5159 "VCFtools is a program package designed for working with VCF files, such
5160as those generated by the 1000 Genomes Project. The aim of VCFtools is to
5161provide easily accessible methods for working with complex genetic variation
5162data in the form of VCF files.")
5163 ;; The license is declared as LGPLv3 in the README and
9b36e256 5164 ;; at https://vcftools.github.io/license.html
de07c0db 5165 (license license:lgpl3)))
9c38b540 5166
35aa90a1
RW
5167(define-public infernal
5168 (package
5169 (name "infernal")
5170 (version "1.1.2")
5171 (source (origin
5172 (method url-fetch)
5173 (uri (string-append "http://eddylab.org/software/infernal/"
5174 "infernal-" version ".tar.gz"))
5175 (sha256
5176 (base32
5177 "0sr2hiz3qxfwqpz3whxr6n82p3x27336v3f34iqznp10hks2935c"))))
5178 (build-system gnu-build-system)
5179 (native-inputs
5180 `(("perl" ,perl))) ; for tests
5181 (home-page "http://eddylab.org/infernal/")
5182 (synopsis "Inference of RNA alignments")
5183 (description "Infernal (\"INFERence of RNA ALignment\") is a tool for
5184searching DNA sequence databases for RNA structure and sequence similarities.
5185It is an implementation of a special case of profile stochastic context-free
5186grammars called @dfn{covariance models} (CMs). A CM is like a sequence
5187profile, but it scores a combination of sequence consensus and RNA secondary
5188structure consensus, so in many cases, it is more capable of identifying RNA
5189homologs that conserve their secondary structure more than their primary
5190sequence.")
48409ef2
EF
5191 ;; Infernal 1.1.2 requires VMX or SSE capability for parallel instructions.
5192 (supported-systems '("i686-linux" "x86_64-linux"))
35aa90a1
RW
5193 (license license:bsd-3)))
5194
7b3df1e5
BW
5195(define-public r-vegan
5196 (package
5197 (name "r-vegan")
2c575bb9 5198 (version "2.4-1")
7b3df1e5
BW
5199 (source
5200 (origin
5201 (method url-fetch)
5202 (uri (cran-uri "vegan" version))
5203 (sha256
5204 (base32
2c575bb9 5205 "0i0c7rc0nzgbysd1nlxzxd2rvy75qcnw3yc7nggzqjzzj5d7yzsd"))))
7b3df1e5
BW
5206 (build-system r-build-system)
5207 (arguments
5208 `(#:phases
5209 (modify-phases %standard-phases
5210 (add-after 'unpack 'revert-test-deletion
5211 ;; The distributed sources do not include tests with the CRAN
5212 ;; package. Here we revert the commit
5213 ;; `591d0e8ba1deaaf82445474ec6619c0b43db4e63' which deletes these
5214 ;; tests. There are plans to not delete tests in future as
5215 ;; documented at https://github.com/vegandevs/vegan/issues/181.
5216 (lambda* (#:key inputs #:allow-other-keys)
5217 (zero?
5218 (system* "patch" "-R" "-p1" "-i"
5219 (assoc-ref inputs "r-vegan-delete-tests-patch"))))))))
5220 (native-inputs
5221 `(("gfortran" ,gfortran)
5222 ("r-knitr" ,r-knitr)
5223 ("r-vegan-delete-tests-patch"
5224 ,(origin
5225 (method url-fetch)
5226 (uri (string-append
5227 "https://github.com/vegandevs/vegan/commit/"
5228 "591d0e8ba1deaaf82445474ec6619c0b43db4e63.patch"))
5229 (sha256
5230 (base32
5231 "0b1bi7y4jjdl3ph721vm9apm51dr2z9piwvhy4355sf2b4kyyj5a"))))))
5232 (propagated-inputs
5233 `(("r-cluster" ,r-cluster)
5234 ("r-lattice" ,r-lattice)
5235 ("r-mgcv" ,r-mgcv)
5236 ("r-permute" ,r-permute)))
5237 (home-page "https://cran.r-project.org/web/packages/vegan")
5238 (synopsis "Functions for community ecology")
5239 (description
5240 "The vegan package provides tools for descriptive community ecology. It
5241has most basic functions of diversity analysis, community ordination and
5242dissimilarity analysis. Most of its multivariate tools can be used for other
5243data types as well.")
5244 (license license:gpl2+)))
5245
8c6de588
RW
5246(define-public r-annotate
5247 (package
5248 (name "r-annotate")
9beba74f 5249 (version "1.52.0")
8c6de588
RW
5250 (source
5251 (origin
5252 (method url-fetch)
5253 (uri (bioconductor-uri "annotate" version))
5254 (sha256
5255 (base32
9beba74f 5256 "1fd2csq7dcs2gwndgwdx2nwkymz8gsmlnqqzv3p0vjjsvvq5n2a8"))))
8c6de588
RW
5257 (build-system r-build-system)
5258 (propagated-inputs
5259 `(("r-annotationdbi" ,r-annotationdbi)
5260 ("r-biobase" ,r-biobase)
5261 ("r-biocgenerics" ,r-biocgenerics)
5262 ("r-dbi" ,r-dbi)
5263 ("r-xml" ,r-xml)
5264 ("r-xtable" ,r-xtable)))
5265 (home-page
5266 "http://bioconductor.org/packages/annotate")
5267 (synopsis "Annotation for microarrays")
5268 (description "This package provides R enviroments for the annotation of
5269microarrays.")
5270 (license license:artistic2.0)))
5271
07a664cd
RW
5272(define-public r-geneplotter
5273 (package
5274 (name "r-geneplotter")
6faa2a48 5275 (version "1.52.0")
07a664cd
RW
5276 (source
5277 (origin
5278 (method url-fetch)
5279 (uri (bioconductor-uri "geneplotter" version))
5280 (sha256
5281 (base32
6faa2a48 5282 "1p6yvxi243irhjxwm97hp73abhwampj0myyf8z00ij166674pc7h"))))
07a664cd
RW
5283 (build-system r-build-system)
5284 (propagated-inputs
5285 `(("r-annotate" ,r-annotate)
5286 ("r-annotationdbi" ,r-annotationdbi)
5287 ("r-biobase" ,r-biobase)
5288 ("r-biocgenerics" ,r-biocgenerics)
5289 ("r-lattice" ,r-lattice)
5290 ("r-rcolorbrewer" ,r-rcolorbrewer)))
5291 (home-page "http://bioconductor.org/packages/geneplotter")
5292 (synopsis "Graphics functions for genomic data")
5293 (description
5294 "This package provides functions for plotting genomic data.")
5295 (license license:artistic2.0)))
5296
2301fd3e
RW
5297(define-public r-genefilter
5298 (package
5299 (name "r-genefilter")
1e66e719 5300 (version "1.56.0")
2301fd3e
RW
5301 (source
5302 (origin
5303 (method url-fetch)
5304 (uri (bioconductor-uri "genefilter" version))
5305 (sha256
5306 (base32
1e66e719 5307 "1vzgciqd09csqcw9qync8blsv51ylrd86a65iadgyy6j26g01fwd"))))
2301fd3e
RW
5308 (build-system r-build-system)
5309 (native-inputs
5310 `(("gfortran" ,gfortran)))
5311 (propagated-inputs
5312 `(("r-annotate" ,r-annotate)
5313 ("r-annotationdbi" ,r-annotationdbi)
5314 ("r-biobase" ,r-biobase)
5315 ("r-s4vectors" ,r-s4vectors)))
5316 (home-page "http://bioconductor.org/packages/genefilter")
5317 (synopsis "Filter genes from high-throughput experiments")
5318 (description
5319 "This package provides basic functions for filtering genes from
5320high-throughput sequencing experiments.")
5321 (license license:artistic2.0)))
5322
ad34f0ac
RW
5323(define-public r-deseq2
5324 (package
5325 (name "r-deseq2")
e1db8dd8 5326 (version "1.14.0")
ad34f0ac
RW
5327 (source
5328 (origin
5329 (method url-fetch)
5330 (uri (bioconductor-uri "DESeq2" version))
5331 (sha256
5332 (base32
e1db8dd8 5333 "0kq06jy4xg5ii3a9l62f17kirsfx0gsiwq6mhiy985cqzpdn893g"))))
ad34f0ac
RW
5334 (properties `((upstream-name . "DESeq2")))
5335 (build-system r-build-system)
5336 (arguments
5337 `(#:phases
5338 (modify-phases %standard-phases
5339 (add-after 'unpack 'link-against-armadillo
5340 (lambda _
5341 (substitute* "src/Makevars"
5342 (("PKG_LIBS =" prefix)
5343 (string-append prefix "-larmadillo"))))))))
5344 (propagated-inputs
5345 `(("r-biobase" ,r-biobase)
5346 ("r-biocgenerics" ,r-biocgenerics)
5347 ("r-biocparallel" ,r-biocparallel)
5348 ("r-genefilter" ,r-genefilter)
5349 ("r-geneplotter" ,r-geneplotter)
5350 ("r-genomicranges" ,r-genomicranges)
5351 ("r-ggplot2" ,r-ggplot2)
5352 ("r-hmisc" ,r-hmisc)
5353 ("r-iranges" ,r-iranges)
5354 ("r-locfit" ,r-locfit)
5355 ("r-rcpp" ,r-rcpp)
5356 ("r-rcpparmadillo" ,r-rcpparmadillo)
5357 ("r-s4vectors" ,r-s4vectors)
5358 ("r-summarizedexperiment" ,r-summarizedexperiment)))
5359 (home-page "http://bioconductor.org/packages/DESeq2")
5360 (synopsis "Differential gene expression analysis")
5361 (description
5362 "This package provides functions to estimate variance-mean dependence in
5363count data from high-throughput nucleotide sequencing assays and test for
5364differential expression based on a model using the negative binomial
5365distribution.")
5366 (license license:lgpl3+)))
5367
e8163773
RW
5368(define-public r-annotationforge
5369 (package
5370 (name "r-annotationforge")
55cd914c 5371 (version "1.16.0")
e8163773
RW
5372 (source
5373 (origin
5374 (method url-fetch)
5375 (uri (bioconductor-uri "AnnotationForge" version))
5376 (sha256
5377 (base32
55cd914c 5378 "02msyb9p3hywrryx00zpjkjl126mrv827i1ah1092s0cplm6xxvf"))))
e8163773
RW
5379 (properties
5380 `((upstream-name . "AnnotationForge")))
5381 (build-system r-build-system)
5382 (propagated-inputs
5383 `(("r-annotationdbi" ,r-annotationdbi)
5384 ("r-biobase" ,r-biobase)
5385 ("r-biocgenerics" ,r-biocgenerics)
5386 ("r-dbi" ,r-dbi)
55cd914c 5387 ("r-rcurl" ,r-rcurl)
e8163773
RW
5388 ("r-rsqlite" ,r-rsqlite)
5389 ("r-s4vectors" ,r-s4vectors)
5390 ("r-xml" ,r-xml)))
5391 (home-page "http://bioconductor.org/packages/AnnotationForge")
5392 (synopsis "Code for building annotation database packages")
5393 (description
5394 "This package provides code for generating Annotation packages and their
5395databases. Packages produced are intended to be used with AnnotationDbi.")
5396 (license license:artistic2.0)))
5397
cd9e7dc7
RW
5398(define-public r-rbgl
5399 (package
5400 (name "r-rbgl")
700c780d 5401 (version "1.50.0")
cd9e7dc7
RW
5402 (source
5403 (origin
5404 (method url-fetch)
5405 (uri (bioconductor-uri "RBGL" version))
5406 (sha256
5407 (base32
700c780d 5408 "1q14m8w6ih56v680kf3d9wh1qbgp7af33kz3cxafdf1vvzx9km08"))))
cd9e7dc7
RW
5409 (properties `((upstream-name . "RBGL")))
5410 (build-system r-build-system)
5411 (propagated-inputs `(("r-graph" ,r-graph)))
5412 (home-page "http://www.bioconductor.org/packages/RBGL")
5413 (synopsis "Interface to the Boost graph library")
5414 (description
5415 "This package provides a fairly extensive and comprehensive interface to
5416the graph algorithms contained in the Boost library.")
5417 (license license:artistic2.0)))
5418
ad740ff8
RW
5419(define-public r-gseabase
5420 (package
5421 (name "r-gseabase")
558d02c3 5422 (version "1.36.0")
ad740ff8
RW
5423 (source
5424 (origin
5425 (method url-fetch)
5426 (uri (bioconductor-uri "GSEABase" version))
5427 (sha256
5428 (base32
558d02c3 5429 "0l2x7yj7lfb0m2dmsav5ib026dikpgl4crdckrnj776yy08lgxpj"))))
ad740ff8
RW
5430 (properties `((upstream-name . "GSEABase")))
5431 (build-system r-build-system)
5432 (propagated-inputs
5433 `(("r-annotate" ,r-annotate)
5434 ("r-annotationdbi" ,r-annotationdbi)
5435 ("r-biobase" ,r-biobase)
5436 ("r-biocgenerics" ,r-biocgenerics)
5437 ("r-graph" ,r-graph)
5438 ("r-xml" ,r-xml)))
5439 (home-page "http://bioconductor.org/packages/GSEABase")
5440 (synopsis "Gene set enrichment data structures and methods")
5441 (description
5442 "This package provides classes and methods to support @dfn{Gene Set
5443Enrichment Analysis} (GSEA).")
5444 (license license:artistic2.0)))
5445
1a1931f7
RW
5446(define-public r-category
5447 (package
5448 (name "r-category")
816599c1 5449 (version "2.40.0")
1a1931f7
RW
5450 (source
5451 (origin
5452 (method url-fetch)
5453 (uri (bioconductor-uri "Category" version))
5454 (sha256
5455 (base32
816599c1 5456 "16ncwz7b4y48k0p3fvbrbmvf7nfz63li9ysgcl8kp9kl4hg7llng"))))
1a1931f7
RW
5457 (properties `((upstream-name . "Category")))
5458 (build-system r-build-system)
5459 (propagated-inputs
5460 `(("r-annotate" ,r-annotate)
5461 ("r-annotationdbi" ,r-annotationdbi)
5462 ("r-biobase" ,r-biobase)
5463 ("r-biocgenerics" ,r-biocgenerics)
5464 ("r-genefilter" ,r-genefilter)
5465 ("r-graph" ,r-graph)
5466 ("r-gseabase" ,r-gseabase)
5467 ("r-matrix" ,r-matrix)
5468 ("r-rbgl" ,r-rbgl)
5469 ("r-rsqlite" ,r-rsqlite)))
5470 (home-page "http://bioconductor.org/packages/Category")
5471 (synopsis "Category analysis")
5472 (description
5473 "This package provides a collection of tools for performing category
5474analysis.")
5475 (license license:artistic2.0)))
5476
89f40c5e
RW
5477(define-public r-gostats
5478 (package
5479 (name "r-gostats")
ae6899b6 5480 (version "2.40.0")
89f40c5e
RW
5481 (source
5482 (origin
5483 (method url-fetch)
5484 (uri (bioconductor-uri "GOstats" version))
5485 (sha256
5486 (base32
ae6899b6 5487 "0g2czm94zhzx92z7y2r4mjfxhwml7bhab2db6820ks8nkw1zvr9n"))))
89f40c5e
RW
5488 (properties `((upstream-name . "GOstats")))
5489 (build-system r-build-system)
5490 (propagated-inputs
5491 `(("r-annotate" ,r-annotate)
5492 ("r-annotationdbi" ,r-annotationdbi)
5493 ("r-annotationforge" ,r-annotationforge)
5494 ("r-biobase" ,r-biobase)
5495 ("r-category" ,r-category)
5496 ("r-go-db" ,r-go-db)
5497 ("r-graph" ,r-graph)
5498 ("r-rbgl" ,r-rbgl)))
5499 (home-page "http://bioconductor.org/packages/GOstats")
5500 (synopsis "Tools for manipulating GO and microarrays")
5501 (description
5502 "This package provides a set of tools for interacting with GO and
5503microarray data. A variety of basic manipulation tools for graphs, hypothesis
5504testing and other simple calculations.")
5505 (license license:artistic2.0)))
5506
cb99d457
RW
5507(define-public r-shortread
5508 (package
5509 (name "r-shortread")
b87b41a7 5510 (version "1.32.0")
cb99d457
RW
5511 (source
5512 (origin
5513 (method url-fetch)
5514 (uri (bioconductor-uri "ShortRead" version))
5515 (sha256
5516 (base32
b87b41a7 5517 "0mjdlg92x5qw4x2djc4dv5lxwl7ai6ix56nnf86zr07jk8vc7yls"))))
cb99d457
RW
5518 (properties `((upstream-name . "ShortRead")))
5519 (build-system r-build-system)
5520 (inputs
5521 `(("zlib" ,zlib)))
5522 (propagated-inputs
5523 `(("r-biobase" ,r-biobase)
5524 ("r-biocgenerics" ,r-biocgenerics)
5525 ("r-biocparallel" ,r-biocparallel)
5526 ("r-biostrings" ,r-biostrings)
5527 ("r-genomeinfodb" ,r-genomeinfodb)
5528 ("r-genomicalignments" ,r-genomicalignments)
5529 ("r-genomicranges" ,r-genomicranges)
5530 ("r-hwriter" ,r-hwriter)
5531 ("r-iranges" ,r-iranges)
5532 ("r-lattice" ,r-lattice)
5533 ("r-latticeextra" ,r-latticeextra)
5534 ("r-rsamtools" ,r-rsamtools)
5535 ("r-s4vectors" ,r-s4vectors)
5536 ("r-xvector" ,r-xvector)
5537 ("r-zlibbioc" ,r-zlibbioc)))
5538 (home-page "http://bioconductor.org/packages/ShortRead")
5539 (synopsis "FASTQ input and manipulation tools")
5540 (description
5541 "This package implements sampling, iteration, and input of FASTQ files.
5542It includes functions for filtering and trimming reads, and for generating a
5543quality assessment report. Data are represented as
5544@code{DNAStringSet}-derived objects, and easily manipulated for a diversity of
5545purposes. The package also contains legacy support for early single-end,
5546ungapped alignment formats.")
5547 (license license:artistic2.0)))
5548
7f903d73
RW
5549(define-public r-systempiper
5550 (package
5551 (name "r-systempiper")
1265d387 5552 (version "1.8.1")
7f903d73
RW
5553 (source
5554 (origin
5555 (method url-fetch)
5556 (uri (bioconductor-uri "systemPipeR" version))
5557 (sha256
5558 (base32
1265d387 5559 "0hyi841w8fm2yzpm6lwqi3jz5kc8ny8dy5p29dxynzaw5bpjw56d"))))
7f903d73
RW
5560 (properties `((upstream-name . "systemPipeR")))
5561 (build-system r-build-system)
5562 (propagated-inputs
5563 `(("r-annotate" ,r-annotate)
5564 ("r-batchjobs" ,r-batchjobs)
5565 ("r-biocgenerics" ,r-biocgenerics)
5566 ("r-biostrings" ,r-biostrings)
5567 ("r-deseq2" ,r-deseq2)
5568 ("r-edger" ,r-edger)
5569 ("r-genomicfeatures" ,r-genomicfeatures)
5570 ("r-genomicranges" ,r-genomicranges)
5571 ("r-ggplot2" ,r-ggplot2)
5572 ("r-go-db" ,r-go-db)
5573 ("r-gostats" ,r-gostats)
5574 ("r-limma" ,r-limma)
5575 ("r-pheatmap" ,r-pheatmap)
5576 ("r-rjson" ,r-rjson)
5577 ("r-rsamtools" ,r-rsamtools)
5578 ("r-shortread" ,r-shortread)
5579 ("r-summarizedexperiment" ,r-summarizedexperiment)
5580 ("r-variantannotation" ,r-variantannotation)))
5581 (home-page "https://github.com/tgirke/systemPipeR")
5582 (synopsis "Next generation sequencing workflow and reporting environment")
5583 (description
5584 "This R package provides tools for building and running automated
5585end-to-end analysis workflows for a wide range of @dfn{next generation
5586sequence} (NGS) applications such as RNA-Seq, ChIP-Seq, VAR-Seq and Ribo-Seq.
5587Important features include a uniform workflow interface across different NGS
5588applications, automated report generation, and support for running both R and
5589command-line software, such as NGS aligners or peak/variant callers, on local
5590computers or compute clusters. Efficient handling of complex sample sets and
5591experimental designs is facilitated by a consistently implemented sample
5592annotation infrastructure.")
5593 (license license:artistic2.0)))
5594
684f29bd
RW
5595(define-public r-grohmm
5596 (package
5597 (name "r-grohmm")
c9b5046a 5598 (version "1.8.0")
684f29bd
RW
5599 (source
5600 (origin
5601 (method url-fetch)
5602 (uri (bioconductor-uri "groHMM" version))
5603 (sha256
5604 (base32
c9b5046a 5605 "0d91nyhqbi5hv3mgmr2z0g29wg2md26g0hyv5mgapmz20cd9zi4y"))))
684f29bd
RW
5606 (properties `((upstream-name . "groHMM")))
5607 (build-system r-build-system)
5608 (propagated-inputs
5609 `(("r-genomeinfodb" ,r-genomeinfodb)
5610 ("r-genomicalignments" ,r-genomicalignments)
5611 ("r-genomicranges" ,r-genomicranges)
5612 ("r-iranges" ,r-iranges)
5613 ("r-rtracklayer" ,r-rtracklayer)
5614 ("r-s4vectors" ,r-s4vectors)))
5615 (home-page "https://github.com/Kraus-Lab/groHMM")
5616 (synopsis "GRO-seq analysis pipeline")
5617 (description
5618 "This package provides a pipeline for the analysis of GRO-seq data.")
5619 (license license:gpl3+)))
5620
f3cfe451
RW
5621(define-public r-txdb-hsapiens-ucsc-hg19-knowngene
5622 (package
5623 (name "r-txdb-hsapiens-ucsc-hg19-knowngene")
5624 (version "3.2.2")
5625 (source (origin
5626 (method url-fetch)
5627 ;; We cannot use bioconductor-uri here because this tarball is
5628 ;; located under "data/annotation/" instead of "bioc/".
5629 (uri (string-append "http://bioconductor.org/packages/"
5630 "release/data/annotation/src/contrib"
5631 "/TxDb.Hsapiens.UCSC.hg19.knownGene_"
5632 version ".tar.gz"))
5633 (sha256
5634 (base32
5635 "1sajhcqqwazgz2lqbik7rd935i7kpnh08zxbp2ra10j72yqy4g86"))))
5636 (properties
5637 `((upstream-name . "TxDb.Hsapiens.UCSC.hg19.knownGene")))
5638 (build-system r-build-system)
5639 ;; As this package provides little more than a very large data file it
5640 ;; doesn't make sense to build substitutes.
5641 (arguments `(#:substitutable? #f))
5642 (propagated-inputs
5643 `(("r-genomicfeatures" ,r-genomicfeatures)))
5644 (home-page
5645 "http://bioconductor.org/packages/TxDb.Hsapiens.UCSC.hg19.knownGene/")
5646 (synopsis "Annotation package for human genome in TxDb format")
5647 (description
5648 "This package provides an annotation database of Homo sapiens genome
5649data. It is derived from the UCSC hg19 genome and based on the \"knownGene\"
5650track. The database is exposed as a @code{TxDb} object.")
5651 (license license:artistic2.0)))
5652
a2950fa4
BW
5653(define-public vsearch
5654 (package
5655 (name "vsearch")
3dd50d97 5656 (version "2.3.0")
a2950fa4
BW
5657 (source
5658 (origin
5659 (method url-fetch)
5660 (uri (string-append
5661 "https://github.com/torognes/vsearch/archive/v"
5662 version ".tar.gz"))
5663 (file-name (string-append name "-" version ".tar.gz"))
5664 (sha256
5665 (base32
3dd50d97 5666 "1r8fk3whkil348y5hfsd4r56qjmchhq4nxm6s7ra5rlisw0mf9fy"))
a2950fa4
BW
5667 (modules '((guix build utils)))
5668 (snippet
5669 '(begin
5670 ;; Remove bundled cityhash and '-mtune=native'.
5671 (substitute* "src/Makefile.am"
5672 (("^AM_CXXFLAGS=-I\\$\\{srcdir\\}/cityhash \
5673-O3 -mtune=native -Wall -Wsign-compare")
5674 (string-append "AM_CXXFLAGS=-lcityhash"
5675 " -O3 -Wall -Wsign-compare"))
cf6edaba 5676 (("^__top_builddir__bin_vsearch_SOURCES = city.h \\\\")
a2950fa4 5677 "__top_builddir__bin_vsearch_SOURCES = \\")
cf6edaba
BW
5678 (("^city.h \\\\") "\\")
5679 (("^citycrc.h \\\\") "\\")
5680 (("^libcityhash_a.*") "")
5681 (("noinst_LIBRARIES = libcpu_sse2.a libcpu_ssse3.a \
5682libcityhash.a")
5683 "noinst_LIBRARIES = libcpu_sse2.a libcpu_ssse3.a")
5684 (("__top_builddir__bin_vsearch_LDADD = libcpu_ssse3.a \
5685libcpu_sse2.a libcityhash.a")
5686 "__top_builddir__bin_vsearch_LDADD = libcpu_ssse3.a \
5687libcpu_sse2.a -lcityhash"))
a2950fa4 5688 (substitute* "src/vsearch.h"
cf6edaba
BW
5689 (("^\\#include \"city.h\"") "#include <city.h>")
5690 (("^\\#include \"citycrc.h\"") "#include <citycrc.h>"))
5691 (delete-file "src/city.h")
5692 (delete-file "src/citycrc.h")
5693 (delete-file "src/city.cc")
a2950fa4
BW
5694 #t))))
5695 (build-system gnu-build-system)
5696 (arguments
5697 `(#:phases
5698 (modify-phases %standard-phases
5699 (add-before 'configure 'autogen
5700 (lambda _ (zero? (system* "autoreconf" "-vif")))))))
5701 (inputs
5702 `(("zlib" ,zlib)
5703 ("bzip2" ,bzip2)
5704 ("cityhash" ,cityhash)))
5705 (native-inputs
5706 `(("autoconf" ,autoconf)
5707 ("automake" ,automake)))
5708 (synopsis "Sequence search tools for metagenomics")
5709 (description
5710 "VSEARCH supports DNA sequence searching, clustering, chimera detection,
5711dereplication, pairwise alignment, shuffling, subsampling, sorting and
5712masking. The tool takes advantage of parallelism in the form of SIMD
5713vectorization as well as multiple threads to perform accurate alignments at
5714high speed. VSEARCH uses an optimal global aligner (full dynamic programming
5715Needleman-Wunsch).")
5716 (home-page "https://github.com/torognes/vsearch")
6f04e515
BW
5717 ;; vsearch uses non-portable SSE intrinsics so building fails on other
5718 ;; platforms.
5719 (supported-systems '("x86_64-linux"))
a2950fa4
BW
5720 ;; Dual licensed; also includes public domain source.
5721 (license (list license:gpl3 license:bsd-2))))
5722
07837874
RW
5723(define-public pardre
5724 (package
5725 (name "pardre")
5726 (version "1.1.5")
5727 (source
5728 (origin
5729 (method url-fetch)
5730 (uri (string-append "mirror://sourceforge/pardre/ParDRe-rel"
5731 version ".tar.gz"))
5732 (sha256
5733 (base32
5734 "0zkyjzv4s8q2h5npalhirbk17r5b1h0n2a42mh7njzlf047h9bhy"))))
5735 (build-system gnu-build-system)
5736 (arguments
5737 `(#:tests? #f ; no tests included
5738 #:phases
5739 (modify-phases %standard-phases
5740 (delete 'configure)
5741 (replace 'install
5742 (lambda* (#:key outputs #:allow-other-keys)
5743 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
07837874
RW
5744 (install-file "ParDRe" bin)
5745 #t))))))
5746 (inputs
5747 `(("openmpi" ,openmpi)
5748 ("zlib" ,zlib)))
5749 (synopsis "Parallel tool to remove duplicate DNA reads")
5750 (description
5751 "ParDRe is a parallel tool to remove duplicate genetic sequence reads.
5752Duplicate reads can be seen as identical or nearly identical sequences with
5753some mismatches. This tool lets users avoid the analysis of unnecessary
5754reads, reducing the time of subsequent procedures with the
5755dataset (e.g. assemblies, mappings, etc.). The tool is implemented with MPI
5756in order to exploit the parallel capabilities of multicore clusters. It is
5757faster than multithreaded counterparts (end of 2015) for the same number of
5758cores and, thanks to the message-passing technology, it can be executed on
5759clusters.")
5760 (home-page "https://sourceforge.net/projects/pardre/")
5761 (license license:gpl3+)))
5762
e4a44a6a
BW
5763(define-public ruby-bio-kseq
5764 (package
5765 (name "ruby-bio-kseq")
5766 (version "0.0.2")
5767 (source
5768 (origin
5769 (method url-fetch)
5770 (uri (rubygems-uri "bio-kseq" version))
5771 (sha256
5772 (base32
5773 "1xyaha46khb5jc6wzkbf7040jagac49jbimn0vcrzid0j8jdikrz"))))
5774 (build-system ruby-build-system)
5775 (arguments
5776 `(#:test-target "spec"))
5777 (native-inputs
5778 `(("bundler" ,bundler)
5779 ("ruby-rspec" ,ruby-rspec)
5780 ("ruby-rake-compiler" ,ruby-rake-compiler)))
5781 (inputs
5782 `(("zlib" ,zlib)))
5783 (synopsis "Ruby bindings for the kseq.h FASTA/Q parser")
5784 (description
5785 "@code{Bio::Kseq} provides ruby bindings to the @code{kseq.h} FASTA and
5786FASTQ parsing code. It provides a fast iterator over sequences and their
5787quality scores.")
5788 (home-page "https://github.com/gusevfe/bio-kseq")
5789 (license license:expat)))
5790
9c38b540
PP
5791(define-public bio-locus
5792 (package
5793 (name "bio-locus")
5794 (version "0.0.7")
5795 (source
5796 (origin
5797 (method url-fetch)
5798 (uri (rubygems-uri "bio-locus" version))
5799 (sha256
5800 (base32
5801 "02vmrxyimkj9sahsp4zhfhnmbvz6dbbqz1y01vglf8cbwvkajfl0"))))
5802 (build-system ruby-build-system)
5803 (native-inputs
5804 `(("ruby-rspec" ,ruby-rspec)))
5805 (synopsis "Tool for fast querying of genome locations")
5806 (description
5807 "Bio-locus is a tabix-like tool for fast querying of genome
5808locations. Many file formats in bioinformatics contain records that
5809start with a chromosome name and a position for a SNP, or a start-end
5810position for indels. Bio-locus allows users to store this chr+pos or
5811chr+pos+alt information in a database.")
5812 (home-page "https://github.com/pjotrp/bio-locus")
5813 (license license:expat)))
edb15985 5814
b2bddb07
PP
5815(define-public bio-blastxmlparser
5816 (package
5817 (name "bio-blastxmlparser")
5818 (version "2.0.4")
5819 (source (origin
5820 (method url-fetch)
5821 (uri (rubygems-uri "bio-blastxmlparser" version))
5822 (sha256
5823 (base32
5824 "1wf4qygcmdjgcqm6flmvsagfr1gs9lf63mj32qv3z1f481zc5692"))))
5825 (build-system ruby-build-system)
5826 (propagated-inputs
5827 `(("ruby-bio-logger" ,ruby-bio-logger)
5828 ("ruby-nokogiri" ,ruby-nokogiri)))
5829 (inputs
5830 `(("ruby-rspec" ,ruby-rspec)))
5831 (synopsis "Fast big data BLAST XML parser and library")
5832 (description
5833 "Very fast parallel big-data BLAST XML file parser which can be used as
5834command line utility. Use blastxmlparser to: Parse BLAST XML; filter output;
5835generate FASTA, JSON, YAML, RDF, JSON-LD, HTML, CSV, tabular output etc.")
5836 (home-page "http://github.com/pjotrp/blastxmlparser")
5837 (license license:expat)))
5838
edb15985
PP
5839(define-public bioruby
5840 (package
5841 (name "bioruby")
5842 (version "1.5.0")
5843 (source
5844 (origin
5845 (method url-fetch)
5846 (uri (rubygems-uri "bio" version))
5847 (sha256
5848 (base32
5849 "01k2fyjl5fpx4zn8g6gqiqvsg2j1fgixrs9p03vzxckynxdq3wmc"))))
5850 (build-system ruby-build-system)
5851 (propagated-inputs
5852 `(("ruby-libxml" ,ruby-libxml)))
5853 (native-inputs
5854 `(("which" ,which))) ; required for test phase
5855 (arguments
5856 `(#:phases
5857 (modify-phases %standard-phases
5858 (add-before 'build 'patch-test-command
5859 (lambda _
5860 (substitute* '("test/functional/bio/test_command.rb")
5861 (("/bin/sh") (which "sh")))
5862 (substitute* '("test/functional/bio/test_command.rb")
5863 (("/bin/ls") (which "ls")))
5864 (substitute* '("test/functional/bio/test_command.rb")
5865 (("which") (which "which")))
5866 (substitute* '("test/functional/bio/test_command.rb",
5867 "test/data/command/echoarg2.sh")
5868 (("/bin/echo") (which "echo")))
5869 #t)))))
5870 (synopsis "Ruby library, shell and utilities for bioinformatics")
5871 (description "BioRuby comes with a comprehensive set of Ruby development
5872tools and libraries for bioinformatics and molecular biology. BioRuby has
5873components for sequence analysis, pathway analysis, protein modelling and
5874phylogenetic analysis; it supports many widely used data formats and provides
5875easy access to databases, external programs and public web services, including
5876BLAST, KEGG, GenBank, MEDLINE and GO.")
5877 (home-page "http://bioruby.org/")
5878 ;; Code is released under Ruby license, except for setup
5879 ;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+)
5880 (license (list license:ruby license:lgpl2.1+ license:gpl2+ ))))
a5002ae7 5881
9fba89e8
RW
5882(define-public r-acsnminer
5883 (package
5884 (name "r-acsnminer")
0b54b4c9 5885 (version "0.16.8.25")
9fba89e8
RW
5886 (source (origin
5887 (method url-fetch)
5888 (uri (cran-uri "ACSNMineR" version))
5889 (sha256
5890 (base32
0b54b4c9 5891 "0gh604s8qall6zfjlwcg2ilxjvz08dplf9k5g47idhv43scm748l"))))
9fba89e8
RW
5892 (properties `((upstream-name . "ACSNMineR")))
5893 (build-system r-build-system)
5894 (propagated-inputs
5895 `(("r-ggplot2" ,r-ggplot2)
5896 ("r-gridextra" ,r-gridextra)))
5897 (home-page "http://cran.r-project.org/web/packages/ACSNMineR")
5898 (synopsis "Gene enrichment analysis")
5899 (description
5900 "This package provides tools to compute and represent gene set enrichment
5901or depletion from your data based on pre-saved maps from the @dfn{Atlas of
5902Cancer Signalling Networks} (ACSN) or user imported maps. The gene set
5903enrichment can be run with hypergeometric test or Fisher exact test, and can
5904use multiple corrections. Visualization of data can be done either by
5905barplots or heatmaps.")
5906 (license license:gpl2+)))
5907
d29b25c4
RW
5908(define-public r-biocgenerics
5909 (package
5910 (name "r-biocgenerics")
f0b297e1 5911 (version "0.20.0")
d29b25c4
RW
5912 (source (origin
5913 (method url-fetch)
5914 (uri (bioconductor-uri "BiocGenerics" version))
5915 (sha256
5916 (base32
f0b297e1 5917 "06szdz7dfs1iyv5zdl4fjzad18nnf1zf3wvglc6c6yd9mrqlf7vk"))))
d29b25c4 5918 (properties
1d216b6e 5919 `((upstream-name . "BiocGenerics")))
d29b25c4
RW
5920 (build-system r-build-system)
5921 (home-page "http://bioconductor.org/packages/BiocGenerics")
5922 (synopsis "S4 generic functions for Bioconductor")
5923 (description
5924 "This package provides S4 generic functions needed by many Bioconductor
5925packages.")
5926 (license license:artistic2.0)))
5927
eb24341f
RJ
5928(define-public r-biocinstaller
5929 (package
5930 (name "r-biocinstaller")
5c7b04e0 5931 (version "1.24.0")
eb24341f
RJ
5932 (source (origin
5933 (method url-fetch)
5934 (uri (bioconductor-uri "BiocInstaller" version))
5935 (sha256
5936 (base32
5c7b04e0 5937 "0y1y5wmy6lzjqx3hdg15n91d417ccjj8dbvdkhmp99bs5aijwcpn"))))
eb24341f
RJ
5938 (properties
5939 `((upstream-name . "BiocInstaller")))
5940 (build-system r-build-system)
5941 (home-page "http://bioconductor.org/packages/BiocInstaller")
5942 (synopsis "Install Bioconductor packages")
5943 (description "This package is used to install and update R packages from
5944Bioconductor, CRAN, and Github.")
5945 (license license:artistic2.0)))
5946
207ce8fb
RJ
5947(define-public r-biocviews
5948 (package
5949 (name "r-biocviews")
5950 (version "1.42.0")
5951 (source (origin
5952 (method url-fetch)
5953 (uri (bioconductor-uri "biocViews" version))
5954 (sha256
5955 (base32
5956 "07rjk10b91pkriyq297w86199r2d3sfji3ggs9mq2gyalsa8y4b6"))))
5957 (properties
5958 `((upstream-name . "biocViews")))
5959 (build-system r-build-system)
5960 (propagated-inputs
5961 `(("r-biobase" ,r-biobase)
5962 ("r-graph" ,r-graph)
5963 ("r-rbgl" ,r-rbgl)
5964 ("r-rcurl" ,r-rcurl)
5965 ("r-xml" ,r-xml)
5966 ("r-knitr" ,r-knitr)
5967 ("r-runit" ,r-runit)))
5968 (home-page "http://bioconductor.org/packages/biocViews")
5969 (synopsis "Bioconductor package categorization helper")
5970 (description "The purpose of biocViews is to create HTML pages that
5971categorize packages in a Bioconductor package repository according to keywords,
5972also known as views, in a controlled vocabulary.")
5973 (license license:artistic2.0)))
5974
99df12cd
RJ
5975(define-public r-biocstyle
5976 (package
5977 (name "r-biocstyle")
5978 (version "2.2.0")
5979 (source (origin
5980 (method url-fetch)
5981 (uri (bioconductor-uri "BiocStyle" version))
5982 (sha256
5983 (base32
5984 "0qbk23fz8cn260isd9xlh9lxfj4adar6iqzai01c4kz0p31f45za"))))
5985 (properties
5986 `((upstream-name . "BiocStyle")))
5987 (build-system r-build-system)
5988 (home-page "http://bioconductor.org/packages/BiocStyle")
5989 (synopsis "Bioconductor formatting styles")
5990 (description "This package provides standard formatting styles for
5991Bioconductor PDF and HTML documents. Package vignettes illustrate use and
5992functionality.")
5993 (license license:artistic2.0)))
5994
4644644a
RJ
5995(define-public r-bioccheck
5996 (package
5997 (name "r-bioccheck")
5998 (version "1.10.0")
5999 (source (origin
6000 (method url-fetch)
6001 (uri (bioconductor-uri "BiocCheck" version))
6002 (sha256
6003 (base32
6004 "1rfy37xg1nc2cmgbclvzsi7sgmdcdjiahsx9crgx3yaw7kxgiack"))))
6005 (properties
6006 `((upstream-name . "BiocCheck")))
6007 (build-system r-build-system)
6008 (arguments
6009 '(#:phases
6010 (modify-phases %standard-phases
6011 ;; This package can be used by calling BiocCheck(<package>) from
6012 ;; within R, or by running R CMD BiocCheck <package>. This phase
6013 ;; makes sure the latter works. For this to work, the BiocCheck
6014 ;; script must be somewhere on the PATH (not the R bin directory).
6015 (add-after 'install 'install-bioccheck-subcommand
6016 (lambda* (#:key outputs #:allow-other-keys)
6017 (let* ((out (assoc-ref outputs "out"))
6018 (dest-dir (string-append out "/bin"))
6019 (script-dir
6020 (string-append out "/site-library/BiocCheck/script/")))
6021 (mkdir-p dest-dir)
6022 (symlink (string-append script-dir "/checkBadDeps.R")
6023 (string-append dest-dir "/checkBadDeps.R"))
6024 (symlink (string-append script-dir "/BiocCheck")
6025 (string-append dest-dir "/BiocCheck")))
6026 #t)))))
6027 (native-inputs
6028 `(("which" ,which)))
6029 (propagated-inputs
6030 `(("r-graph" ,r-graph)
6031 ("r-knitr" ,r-knitr)
6032 ("r-httr" ,r-httr)
6033 ("r-optparse" ,r-optparse)
6034 ("r-devtools" ,r-devtools)
6035 ("r-biocinstaller" ,r-biocinstaller)
6036 ("r-biocviews" ,r-biocviews)))
6037 (home-page "http://bioconductor.org/packages/BiocCheck")
6038 (synopsis "Executes Bioconductor-specific package checks")
6039 (description "This package contains tools to perform additional quality
6040checks on R packages that are to be submitted to the Bioconductor repository.")
6041 (license license:artistic2.0)))
6042
2acaaee5
RJ
6043(define-public r-getopt
6044 (package
6045 (name "r-getopt")
6046 (version "1.20.0")
6047 (source
6048 (origin
6049 (method url-fetch)
6050 (uri (cran-uri "getopt" version))
6051 (sha256
6052 (base32
6053 "00f57vgnzmg7cz80rjmjz1556xqcmx8nhrlbbhaq4w7gl2ibl87r"))))
6054 (build-system r-build-system)
6055 (home-page "https://github.com/trevorld/getopt")
6056 (synopsis "Command-line option processor for R")
6057 (description
6058 "This package is designed to be used with Rscript to write shebang
6059scripts that accept short and long options. Many users will prefer to
6060use the packages @code{optparse} or @code{argparse} which add extra
6061features like automatically generated help options and usage texts,
6062support for default values, positional argument support, etc.")
6063 (license license:gpl2+)))
6064
c79ad57a
RJ
6065(define-public r-optparse
6066 (package
6067 (name "r-optparse")
6068 (version "1.3.2")
6069 (source
6070 (origin
6071 (method url-fetch)
6072 (uri (cran-uri "optparse" version))
6073 (sha256
6074 (base32
6075 "1g8as89r91xxi5j5azsd6vrfrhg84mnfx2683j7pacdp8s33radw"))))
6076 (build-system r-build-system)
6077 (propagated-inputs
6078 `(("r-getopt" ,r-getopt)))
6079 (home-page
6080 "https://github.com/trevorld/optparse")
6081 (synopsis "Command line option parser")
6082 (description
6083 "This package provides a command line parser inspired by Python's
6084@code{optparse} library to be used with Rscript to write shebang scripts
6085that accept short and long options.")
6086 (license license:gpl2+)))
6087
247d498a
RJ
6088(define-public r-dnacopy
6089 (package
6090 (name "r-dnacopy")
22c5d736 6091 (version "1.48.0")
247d498a
RJ
6092 (source (origin
6093 (method url-fetch)
6094 (uri (bioconductor-uri "DNAcopy" version))
6095 (sha256
6096 (base32
22c5d736 6097 "1idyvfvy7xx8k9vk00y4k3819qmip8iqm809j3vpxabmsn7r9zyh"))))
247d498a
RJ
6098 (properties
6099 `((upstream-name . "DNAcopy")))
6100 (build-system r-build-system)
6101 (inputs
6102 `(("gfortran" ,gfortran)))
5697fdc3 6103 (home-page "https://bioconductor.org/packages/DNAcopy")
247d498a
RJ
6104 (synopsis "Implementation of a circular binary segmentation algorithm")
6105 (description "This package implements the circular binary segmentation (CBS)
6106algorithm to segment DNA copy number data and identify genomic regions with
6107abnormal copy number.")
6108 (license license:gpl2+)))
6109
7485129e
RW
6110(define-public r-s4vectors
6111 (package
6112 (name "r-s4vectors")
b120c5f3 6113 (version "0.12.0")
7485129e
RW
6114 (source (origin
6115 (method url-fetch)
6116 (uri (bioconductor-uri "S4Vectors" version))
6117 (sha256
6118 (base32
b120c5f3 6119 "0m0npc0vhmcwcxws7v2f8k4hvvrjvnlrsr94klxf4a8m4xw2xzzk"))))
7485129e 6120 (properties
1d216b6e 6121 `((upstream-name . "S4Vectors")))
7485129e
RW
6122 (build-system r-build-system)
6123 (propagated-inputs
6124 `(("r-biocgenerics" ,r-biocgenerics)))
6125 (home-page "http://bioconductor.org/packages/S4Vectors")
6126 (synopsis "S4 implementation of vectors and lists")
6127 (description
6128 "The S4Vectors package defines the @code{Vector} and @code{List} virtual
6129classes and a set of generic functions that extend the semantic of ordinary
6130vectors and lists in R. Package developers can easily implement vector-like
6131or list-like objects as concrete subclasses of @code{Vector} or @code{List}.
6132In addition, a few low-level concrete subclasses of general interest (e.g.
6133@code{DataFrame}, @code{Rle}, and @code{Hits}) are implemented in the
6134S4Vectors package itself.")
6135 (license license:artistic2.0)))
6136
274da826
RW
6137(define-public r-seqinr
6138 (package
6139 (name "r-seqinr")
a46cc723 6140 (version "3.3-3")
274da826
RW
6141 (source
6142 (origin
6143 (method url-fetch)
6144 (uri (cran-uri "seqinr" version))
6145 (sha256
6146 (base32
a46cc723 6147 "0rk4yba8km26c0rh1f4h474zsb5n6kjmqsi55bnzr6p8pymp18hj"))))
274da826
RW
6148 (build-system r-build-system)
6149 (propagated-inputs
3b851cd4
RW
6150 `(("r-ade4" ,r-ade4)
6151 ("r-segmented" ,r-segmented)))
274da826
RW
6152 (inputs
6153 `(("zlib" ,zlib)))
6154 (home-page "http://seqinr.r-forge.r-project.org/")
6155 (synopsis "Biological sequences retrieval and analysis")
6156 (description
6157 "This package provides tools for exploratory data analysis and data
6158visualization of biological sequence (DNA and protein) data. It also includes
6159utilities for sequence data management under the ACNUC system.")
6160 (license license:gpl2+)))
6161
78addcb0
RW
6162(define-public r-iranges
6163 (package
6164 (name "r-iranges")
7f354daf 6165 (version "2.8.0")
78addcb0
RW
6166 (source (origin
6167 (method url-fetch)
6168 (uri (bioconductor-uri "IRanges" version))
6169 (sha256
6170 (base32
7f354daf 6171 "0cdl1sfd3cvf93lnz91fdk64fbg1mnd5g958dwh1il8r358hqq3f"))))
78addcb0 6172 (properties
1d216b6e 6173 `((upstream-name . "IRanges")))
78addcb0
RW
6174 (build-system r-build-system)
6175 (propagated-inputs
6176 `(("r-biocgenerics" ,r-biocgenerics)
6177 ("r-s4vectors" ,r-s4vectors)))
6178 (home-page "http://bioconductor.org/packages/IRanges")
6179 (synopsis "Infrastructure for manipulating intervals on sequences")
6180 (description
6181 "This package provides efficient low-level and highly reusable S4 classes
6182for storing ranges of integers, RLE vectors (Run-Length Encoding), and, more
6183generally, data that can be organized sequentially (formally defined as
6184@code{Vector} objects), as well as views on these @code{Vector} objects.
6185Efficient list-like classes are also provided for storing big collections of
6186instances of the basic classes. All classes in the package use consistent
6187naming and share the same rich and consistent \"Vector API\" as much as
6188possible.")
6189 (license license:artistic2.0)))
6190
bf7764b7
RW
6191(define-public r-genomeinfodb
6192 (package
6193 (name "r-genomeinfodb")
dece310a 6194 (version "1.10.0")
bf7764b7
RW
6195 (source (origin
6196 (method url-fetch)
6197 (uri (bioconductor-uri "GenomeInfoDb" version))
6198 (sha256
6199 (base32
dece310a 6200 "0nhg4bk38gzvf3mvnbqgisbbhfv1kzjld27z1z9knnlkplkiyyyv"))))
bf7764b7 6201 (properties
1d216b6e 6202 `((upstream-name . "GenomeInfoDb")))
bf7764b7
RW
6203 (build-system r-build-system)
6204 (propagated-inputs
6205 `(("r-biocgenerics" ,r-biocgenerics)
6206 ("r-iranges" ,r-iranges)
6207 ("r-s4vectors" ,r-s4vectors)))
6208 (home-page "http://bioconductor.org/packages/GenomeInfoDb")
6209 (synopsis "Utilities for manipulating chromosome identifiers")
6210 (description
6211 "This package contains data and functions that define and allow
6212translation between different chromosome sequence naming conventions (e.g.,
6213\"chr1\" versus \"1\"), including a function that attempts to place sequence
6214names in their natural, rather than lexicographic, order.")
6215 (license license:artistic2.0)))
6216
744004a3
RJ
6217(define-public r-edger
6218 (package
6219 (name "r-edger")
ed305423 6220 (version "3.16.1")
744004a3
RJ
6221 (source (origin
6222 (method url-fetch)
6223 (uri (bioconductor-uri "edgeR" version))
6224 (sha256
6225 (base32
ed305423 6226 "1r6hhwkqp13m022hjajzr1lnjsbai0yjhykwn0kp1f0la990a808"))))
744004a3
RJ
6227 (properties `((upstream-name . "edgeR")))
6228 (build-system r-build-system)
6229 (propagated-inputs
5e48005f
RW
6230 `(("r-limma" ,r-limma)
6231 ("r-locfit" ,r-locfit)))
744004a3
RJ
6232 (home-page "http://bioinf.wehi.edu.au/edgeR")
6233 (synopsis "EdgeR does empirical analysis of digital gene expression data")
6234 (description "This package can do differential expression analysis of
6235RNA-seq expression profiles with biological replication. It implements a range
6236of statistical methodology based on the negative binomial distributions,
6237including empirical Bayes estimation, exact tests, generalized linear models
6238and quasi-likelihood tests. It be applied to differential signal analysis of
6239other types of genomic data that produce counts, including ChIP-seq, SAGE and
6240CAGE.")
6241 (license license:gpl2+)))
6242
b669d9c4
RJ
6243(define-public r-variantannotation
6244 (package
6245 (name "r-variantannotation")
37d96f1d 6246 (version "1.20.0")
b669d9c4
RJ
6247 (source (origin
6248 (method url-fetch)
6249 (uri (bioconductor-uri "VariantAnnotation" version))
6250 (sha256
6251 (base32
37d96f1d 6252 "1lwzfgahz8ipwli73kcfqb18y6adi129hap1gnycnj3980m54i8q"))))
b669d9c4
RJ
6253 (properties
6254 `((upstream-name . "VariantAnnotation")))
6255 (inputs
6256 `(("zlib" ,zlib)))
6257 (propagated-inputs
6258 `(("r-annotationdbi" ,r-annotationdbi)
37d96f1d 6259 ("r-biobase" ,r-biobase)
b669d9c4 6260 ("r-biocgenerics" ,r-biocgenerics)
37d96f1d 6261 ("r-biostrings" ,r-biostrings)
b669d9c4
RJ
6262 ("r-bsgenome" ,r-bsgenome)
6263 ("r-dbi" ,r-dbi)
6264 ("r-genomeinfodb" ,r-genomeinfodb)
6265 ("r-genomicfeatures" ,r-genomicfeatures)
6266 ("r-genomicranges" ,r-genomicranges)
37d96f1d 6267 ("r-iranges" ,r-iranges)
b669d9c4
RJ
6268 ("r-summarizedexperiment" ,r-summarizedexperiment)
6269 ("r-rsamtools" ,r-rsamtools)
37d96f1d
RW
6270 ("r-rtracklayer" ,r-rtracklayer)
6271 ("r-s4vectors" ,r-s4vectors)
6272 ("r-xvector" ,r-xvector)
b669d9c4
RJ
6273 ("r-zlibbioc" ,r-zlibbioc)))
6274 (build-system r-build-system)
6275 (home-page "https://bioconductor.org/packages/VariantAnnotation")
6276 (synopsis "Package for annotation of genetic variants")
6277 (description "This R package can annotate variants, compute amino acid
6278coding changes and predict coding outcomes.")
6279 (license license:artistic2.0)))
6280
7d4224d7
RJ
6281(define-public r-limma
6282 (package
6283 (name "r-limma")
5b3b2d2f 6284 (version "3.30.2")
7d4224d7
RJ
6285 (source (origin
6286 (method url-fetch)
6287 (uri (bioconductor-uri "limma" version))
6288 (sha256
6289 (base32
5b3b2d2f 6290 "04jris7wk2lxksrrvrjsysznsdb2k04lfgrnp18ic49sazva0hfy"))))
7d4224d7
RJ
6291 (build-system r-build-system)
6292 (home-page "http://bioinf.wehi.edu.au/limma")
6293 (synopsis "Package for linear models for microarray and RNA-seq data")
6294 (description "This package can be used for the analysis of gene expression
6295studies, especially the use of linear models for analysing designed experiments
6296and the assessment of differential expression. The analysis methods apply to
6297different technologies, including microarrays, RNA-seq, and quantitative PCR.")
6298 (license license:gpl2+)))
6299
0e7d5560
RW
6300(define-public r-xvector
6301 (package
6302 (name "r-xvector")
1d048589 6303 (version "0.14.0")
0e7d5560
RW
6304 (source (origin
6305 (method url-fetch)
6306 (uri (bioconductor-uri "XVector" version))
6307 (sha256
6308 (base32
1d048589 6309 "09lbqxpqr80g0kw77mpz0p1a8cq706j33kz8194wp71il67cdzi7"))))
0e7d5560 6310 (properties
1d216b6e 6311 `((upstream-name . "XVector")))
0e7d5560
RW
6312 (build-system r-build-system)
6313 (arguments
6314 `(#:phases
6315 (modify-phases %standard-phases
6316 (add-after 'unpack 'use-system-zlib
6317 (lambda _
6318 (substitute* "DESCRIPTION"
6319 (("zlibbioc, ") ""))
6320 (substitute* "NAMESPACE"
6321 (("import\\(zlibbioc\\)") ""))
6322 #t)))))
6323 (inputs
6324 `(("zlib" ,zlib)))
6325 (propagated-inputs
6326 `(("r-biocgenerics" ,r-biocgenerics)
6327 ("r-iranges" ,r-iranges)
6328 ("r-s4vectors" ,r-s4vectors)))
6329 (home-page "http://bioconductor.org/packages/XVector")
6330 (synopsis "Representation and manpulation of external sequences")
6331 (description
6332 "This package provides memory efficient S4 classes for storing sequences
6333\"externally\" (behind an R external pointer, or on disk).")
6334 (license license:artistic2.0)))
6335
e2cd1d0f
RW
6336(define-public r-genomicranges
6337 (package
6338 (name "r-genomicranges")
92a740af 6339 (version "1.26.1")
e2cd1d0f
RW
6340 (source (origin
6341 (method url-fetch)
6342 (uri (bioconductor-uri "GenomicRanges" version))
6343 (sha256
6344 (base32
92a740af 6345 "039nxccg9i2an8q2wni79x8dr9p1fcfcqvih9hg9w243pczg2g3c"))))
e2cd1d0f 6346 (properties
1d216b6e 6347 `((upstream-name . "GenomicRanges")))
e2cd1d0f
RW
6348 (build-system r-build-system)
6349 (propagated-inputs
6350 `(("r-biocgenerics" ,r-biocgenerics)
6351 ("r-genomeinfodb" ,r-genomeinfodb)
92a740af
RW
6352 ("r-iranges" ,r-iranges)
6353 ("r-s4vectors" ,r-s4vectors)
e2cd1d0f
RW
6354 ("r-xvector" ,r-xvector)))
6355 (home-page "http://bioconductor.org/packages/GenomicRanges")
6356 (synopsis "Representation and manipulation of genomic intervals")
6357 (description
6358 "This package provides tools to efficiently represent and manipulate
6359genomic annotations and alignments is playing a central role when it comes to
6360analyzing high-throughput sequencing data (a.k.a. NGS data). The
6361GenomicRanges package defines general purpose containers for storing and
6362manipulating genomic intervals and variables defined along a genome.")
6363 (license license:artistic2.0)))
6364
555e3399
RW
6365(define-public r-biobase
6366 (package
6367 (name "r-biobase")
464870ca 6368 (version "2.34.0")
555e3399
RW
6369 (source (origin
6370 (method url-fetch)
6371 (uri (bioconductor-uri "Biobase" version))
6372 (sha256
6373 (base32
464870ca 6374 "0js9j9wqls8f571ifl9ylllbb9a9hwf7b7drf2grwb1fl31ldazl"))))
555e3399
RW
6375 (properties
6376 `((upstream-name . "Biobase")))
6377 (build-system r-build-system)
6378 (propagated-inputs
6379 `(("r-biocgenerics" ,r-biocgenerics)))
6380 (home-page "http://bioconductor.org/packages/Biobase")
6381 (synopsis "Base functions for Bioconductor")
6382 (description
6383 "This package provides functions that are needed by many other packages
6384on Bioconductor or which replace R functions.")
6385 (license license:artistic2.0)))
6386
8b7bce74
RW
6387(define-public r-annotationdbi
6388 (package
6389 (name "r-annotationdbi")
e45dc057 6390 (version "1.36.0")
8b7bce74
RW
6391 (source (origin
6392 (method url-fetch)
6393 (uri (bioconductor-uri "AnnotationDbi" version))
6394 (sha256
6395 (base32
e45dc057 6396 "0ydrqw1k1j5p6w76bwc753cx545c055x88q87wzya93858synj6r"))))
8b7bce74
RW
6397 (properties
6398 `((upstream-name . "AnnotationDbi")))
6399 (build-system r-build-system)
6400 (propagated-inputs
6401 `(("r-biobase" ,r-biobase)
6402 ("r-biocgenerics" ,r-biocgenerics)
6403 ("r-dbi" ,r-dbi)
6404 ("r-iranges" ,r-iranges)
6405 ("r-rsqlite" ,r-rsqlite)
6406 ("r-s4vectors" ,r-s4vectors)))
6407 (home-page "http://bioconductor.org/packages/AnnotationDbi")
6408 (synopsis "Annotation database interface")
6409 (description
6410 "This package provides user interface and database connection code for
6411annotation data packages using SQLite data storage.")
6412 (license license:artistic2.0)))
6413
c465fa72
RW
6414(define-public r-biomart
6415 (package
6416 (name "r-biomart")
94053331 6417 (version "2.30.0")
c465fa72
RW
6418 (source (origin
6419 (method url-fetch)
6420 (uri (bioconductor-uri "biomaRt" version))
6421 (sha256
6422 (base32
94053331 6423 "1x0flcghq71784q2l02j0g4f9jkmyb14f6i307n6c59d6ji7h7x6"))))
c465fa72
RW
6424 (properties
6425 `((upstream-name . "biomaRt")))
6426 (build-system r-build-system)
6427 (propagated-inputs
6428 `(("r-annotationdbi" ,r-annotationdbi)
6429 ("r-rcurl" ,r-rcurl)
6430 ("r-xml" ,r-xml)))
6431 (home-page "http://bioconductor.org/packages/biomaRt")
6432 (synopsis "Interface to BioMart databases")
6433 (description
6434 "biomaRt provides an interface to a growing collection of databases
6435implementing the @url{BioMart software suite, http://www.biomart.org}. The
6436package enables retrieval of large amounts of data in a uniform way without
6437the need to know the underlying database schemas or write complex SQL queries.
6438Examples of BioMart databases are Ensembl, COSMIC, Uniprot, HGNC, Gramene,
6439Wormbase and dbSNP mapped to Ensembl. These major databases give biomaRt
6440users direct access to a diverse set of data and enable a wide range of
6441powerful online queries from gene annotation to database mining.")
6442 (license license:artistic2.0)))
6443
e91d362e
RW
6444(define-public r-biocparallel
6445 (package
6446 (name "r-biocparallel")
c15ae9b0 6447 (version "1.8.1")
e91d362e
RW
6448 (source (origin
6449 (method url-fetch)
6450 (uri (bioconductor-uri "BiocParallel" version))
6451 (sha256
6452 (base32
c15ae9b0 6453 "123i928rwi4h4sy4fpysv6pinw5nns0sm3myxi2ghqhm34ws8gyl"))))
e91d362e
RW
6454 (properties
6455 `((upstream-name . "BiocParallel")))
6456 (build-system r-build-system)
6457 (propagated-inputs
6458 `(("r-futile-logger" ,r-futile-logger)
6459 ("r-snow" ,r-snow)))
6460 (home-page "http://bioconductor.org/packages/BiocParallel")
6461 (synopsis "Bioconductor facilities for parallel evaluation")
6462 (description
6463 "This package provides modified versions and novel implementation of
6464functions for parallel evaluation, tailored to use with Bioconductor
6465objects.")
6466 (license (list license:gpl2+ license:gpl3+))))
6467
bf159353
RW
6468(define-public r-biostrings
6469 (package
6470 (name "r-biostrings")
47d39b6e 6471 (version "2.42.0")
bf159353
RW
6472 (source (origin
6473 (method url-fetch)
6474 (uri (bioconductor-uri "Biostrings" version))
6475 (sha256
6476 (base32
47d39b6e 6477 "08z8lkz3axa94wkf144a931ry6vf6cc25avi1ywr84ln2k5czz9f"))))
bf159353
RW
6478 (properties
6479 `((upstream-name . "Biostrings")))
6480 (build-system r-build-system)
6481 (propagated-inputs
6482 `(("r-biocgenerics" ,r-biocgenerics)
6483 ("r-iranges" ,r-iranges)
6484 ("r-s4vectors" ,r-s4vectors)
6485 ("r-xvector" ,r-xvector)))
6486 (home-page "http://bioconductor.org/packages/Biostrings")
6487 (synopsis "String objects and algorithms for biological sequences")
6488 (description
6489 "This package provides memory efficient string containers, string
6490matching algorithms, and other utilities, for fast manipulation of large
6491biological sequences or sets of sequences.")
6492 (license license:artistic2.0)))
6493
f8d74f70
RW
6494(define-public r-rsamtools
6495 (package
6496 (name "r-rsamtools")
c0ccef41 6497 (version "1.26.1")
f8d74f70
RW
6498 (source (origin
6499 (method url-fetch)
6500 (uri (bioconductor-uri "Rsamtools" version))
6501 (sha256
6502 (base32
c0ccef41 6503 "0pf4f6brf4bl5zgjrah0f38qslazrs49ayqgyh0xfqgrh63yx4ck"))))
f8d74f70
RW
6504 (properties
6505 `((upstream-name . "Rsamtools")))
6506 (build-system r-build-system)
6507 (arguments
6508 `(#:phases
6509 (modify-phases %standard-phases
6510 (add-after 'unpack 'use-system-zlib
6511 (lambda _
6512 (substitute* "DESCRIPTION"
6513 (("zlibbioc, ") ""))
6514 (substitute* "NAMESPACE"
6515 (("import\\(zlibbioc\\)") ""))
6516 #t)))))
6517 (inputs
6518 `(("zlib" ,zlib)))
6519 (propagated-inputs
6520 `(("r-biocgenerics" ,r-biocgenerics)
6521 ("r-biocparallel" ,r-biocparallel)
6522 ("r-biostrings" ,r-biostrings)
6523 ("r-bitops" ,r-bitops)
6524 ("r-genomeinfodb" ,r-genomeinfodb)
6525 ("r-genomicranges" ,r-genomicranges)
6526 ("r-iranges" ,r-iranges)
6527 ("r-s4vectors" ,r-s4vectors)
6528 ("r-xvector" ,r-xvector)))
6529 (home-page "http://bioconductor.org/packages/release/bioc/html/Rsamtools.html")
6530 (synopsis "Interface to samtools, bcftools, and tabix")
6531 (description
6532 "This package provides an interface to the 'samtools', 'bcftools', and
6533'tabix' utilities for manipulating SAM (Sequence Alignment / Map), FASTA,
6534binary variant call (BCF) and compressed indexed tab-delimited (tabix)
6535files.")
6536 (license license:expat)))
6537
6e76dda2
RW
6538(define-public r-summarizedexperiment
6539 (package
6540 (name "r-summarizedexperiment")
08da08d2 6541 (version "1.4.0")
6e76dda2
RW
6542 (source (origin
6543 (method url-fetch)
6544 (uri (bioconductor-uri "SummarizedExperiment" version))
6545 (sha256
6546 (base32
08da08d2 6547 "1kbj8sg2ik9f8d6g95wz0py62jldg01qy5rsdpg1cxw95nf7dzi3"))))
6e76dda2
RW
6548 (properties
6549 `((upstream-name . "SummarizedExperiment")))
6550 (build-system r-build-system)
6551 (propagated-inputs
6552 `(("r-biobase" ,r-biobase)
6553 ("r-biocgenerics" ,r-biocgenerics)
6554 ("r-genomeinfodb" ,r-genomeinfodb)
6555 ("r-genomicranges" ,r-genomicranges)
6556 ("r-iranges" ,r-iranges)
6557 ("r-s4vectors" ,r-s4vectors)))
6558 (home-page "http://bioconductor.org/packages/SummarizedExperiment")
6559 (synopsis "Container for representing genomic ranges by sample")
6560 (description
6561 "The SummarizedExperiment container contains one or more assays, each
6562represented by a matrix-like object of numeric or other mode. The rows
6563typically represent genomic ranges of interest and the columns represent
6564samples.")
6565 (license license:artistic2.0)))
6566
d8a828af
RW
6567(define-public r-genomicalignments
6568 (package
6569 (name "r-genomicalignments")
4ab32f0c 6570 (version "1.10.0")
d8a828af
RW
6571 (source (origin
6572 (method url-fetch)
6573 (uri (bioconductor-uri "GenomicAlignments" version))
6574 (sha256
6575 (base32
4ab32f0c 6576 "11vb0a0zd36i4yhg4mfijv787v0nihn6pkjj6q7rfy19gwy61xlc"))))
d8a828af
RW
6577 (properties
6578 `((upstream-name . "GenomicAlignments")))
6579 (build-system r-build-system)
6580 (propagated-inputs
6581 `(("r-biocgenerics" ,r-biocgenerics)
6582 ("r-biocparallel" ,r-biocparallel)
6583 ("r-biostrings" ,r-biostrings)
6584 ("r-genomeinfodb" ,r-genomeinfodb)
6585 ("r-genomicranges" ,r-genomicranges)
6586 ("r-iranges" ,r-iranges)
6587 ("r-rsamtools" ,r-rsamtools)
6588 ("r-s4vectors" ,r-s4vectors)
6589 ("r-summarizedexperiment" ,r-summarizedexperiment)))
6590 (home-page "http://bioconductor.org/packages/GenomicAlignments")
6591 (synopsis "Representation and manipulation of short genomic alignments")
6592 (description
6593 "This package provides efficient containers for storing and manipulating
6594short genomic alignments (typically obtained by aligning short reads to a
6595reference genome). This includes read counting, computing the coverage,
6596junction detection, and working with the nucleotide content of the
6597alignments.")
6598 (license license:artistic2.0)))
6599
317755ff
RW
6600(define-public r-rtracklayer
6601 (package
6602 (name "r-rtracklayer")
16a8bd31 6603 (version "1.34.1")
317755ff
RW
6604 (source (origin
6605 (method url-fetch)
6606 (uri (bioconductor-uri "rtracklayer" version))
6607 (sha256
6608 (base32
16a8bd31 6609 "0x59k2fd0iaqi93gy6bm58p2j2z90z1b7a6w5b4c098y98n096rc"))))
317755ff
RW
6610 (build-system r-build-system)
6611 (arguments
6612 `(#:phases
6613 (modify-phases %standard-phases
6614 (add-after 'unpack 'use-system-zlib
6615 (lambda _
6616 (substitute* "DESCRIPTION"
6617 (("zlibbioc, ") ""))
6618 (substitute* "NAMESPACE"
6619 (("import\\(zlibbioc\\)") ""))
6620 #t)))))
6621 (inputs
6622 `(("zlib" ,zlib)))
6623 (propagated-inputs
6624 `(("r-biocgenerics" ,r-biocgenerics)
6625 ("r-biostrings" ,r-biostrings)
6626 ("r-genomeinfodb" ,r-genomeinfodb)
6627 ("r-genomicalignments" ,r-genomicalignments)
6628 ("r-genomicranges" ,r-genomicranges)
6629 ("r-iranges" ,r-iranges)
6630 ("r-rcurl" ,r-rcurl)
6631 ("r-rsamtools" ,r-rsamtools)
6632 ("r-s4vectors" ,r-s4vectors)
6633 ("r-xml" ,r-xml)
6634 ("r-xvector" ,r-xvector)))
6635 (home-page "http://bioconductor.org/packages/rtracklayer")
6636 (synopsis "R interface to genome browsers and their annotation tracks")
6637 (description
6638 "rtracklayer is an extensible framework for interacting with multiple
6639genome browsers (currently UCSC built-in) and manipulating annotation tracks
6640in various formats (currently GFF, BED, bedGraph, BED15, WIG, BigWig and 2bit
6641built-in). The user may export/import tracks to/from the supported browsers,
6642as well as query and modify the browser state, such as the current viewport.")
6643 (license license:artistic2.0)))
6644
2fd7c049
RW
6645(define-public r-genomicfeatures
6646 (package
6647 (name "r-genomicfeatures")
b17aac08 6648 (version "1.26.0")
2fd7c049
RW
6649 (source (origin
6650 (method url-fetch)
6651 (uri (bioconductor-uri "GenomicFeatures" version))
6652 (sha256
6653 (base32
b17aac08 6654 "0z8spi2knwzwi10c38vr7xlvi3ah9faj7m1lka880mmxkl9cai4k"))))
2fd7c049
RW
6655 (properties
6656 `((upstream-name . "GenomicFeatures")))
6657 (build-system r-build-system)
6658 (propagated-inputs
6659 `(("r-annotationdbi" ,r-annotationdbi)
6660 ("r-biobase" ,r-biobase)
6661 ("r-biocgenerics" ,r-biocgenerics)
6662 ("r-biomart" ,r-biomart)
6663 ("r-biostrings" ,r-biostrings)
6664 ("r-dbi" ,r-dbi)
6665 ("r-genomeinfodb" ,r-genomeinfodb)
6666 ("r-genomicranges" ,r-genomicranges)
6667 ("r-iranges" ,r-iranges)
6668 ("r-rcurl" ,r-rcurl)
6669 ("r-rsqlite" ,r-rsqlite)
6670 ("r-rtracklayer" ,r-rtracklayer)
6671 ("r-s4vectors" ,r-s4vectors)
6672 ("r-xvector" ,r-xvector)))
6673 (home-page "http://bioconductor.org/packages/GenomicFeatures")
6674 (synopsis "Tools for working with transcript centric annotations")
6675 (description
6676 "This package provides a set of tools and methods for making and
6677manipulating transcript centric annotations. With these tools the user can
6678easily download the genomic locations of the transcripts, exons and cds of a
6679given organism, from either the UCSC Genome Browser or a BioMart
6680database (more sources will be supported in the future). This information is
6681then stored in a local database that keeps track of the relationship between
6682transcripts, exons, cds and genes. Flexible methods are provided for
6683extracting the desired features in a convenient format.")
6684 (license license:artistic2.0)))
6685
fb25d880
RW
6686(define-public r-go-db
6687 (package
6688 (name "r-go-db")
d1b1587c 6689 (version "3.4.0")
fb25d880
RW
6690 (source (origin
6691 (method url-fetch)
f82c8c3c
PP
6692 (uri (string-append "http://www.bioconductor.org/packages/"
6693 "release/data/annotation/src/contrib/GO.db_"
6694 version ".tar.gz"))
fb25d880
RW
6695 (sha256
6696 (base32
d1b1587c 6697 "02cj8kqi5w39jwcs8gp1dgj08sah262ppxnkz4h3qd0w191y8yyl"))))
fb25d880
RW
6698 (properties
6699 `((upstream-name . "GO.db")))
6700 (build-system r-build-system)
3141b83d
RW
6701 (propagated-inputs
6702 `(("r-annotationdbi" ,r-annotationdbi)))
fb25d880
RW
6703 (home-page "http://bioconductor.org/packages/GO.db")
6704 (synopsis "Annotation maps describing the entire Gene Ontology")
6705 (description
6706 "The purpose of this GO.db annotation package is to provide detailed
6707information about the latest version of the Gene Ontologies.")
6708 (license license:artistic2.0)))
6709
d1dbde6a
RW
6710(define-public r-graph
6711 (package
6712 (name "r-graph")
3587b464 6713 (version "1.52.0")
d1dbde6a
RW
6714 (source (origin
6715 (method url-fetch)
6716 (uri (bioconductor-uri "graph" version))
6717 (sha256
6718 (base32
3587b464 6719 "0g3dk5vsdp489fmyg8mifczmzgqrjlakkkr8i96dj15gghp3l135"))))
d1dbde6a
RW
6720 (build-system r-build-system)
6721 (propagated-inputs
6722 `(("r-biocgenerics" ,r-biocgenerics)))
6723 (home-page "http://bioconductor.org/packages/graph")
6724 (synopsis "Handle graph data structures in R")
6725 (description
6726 "This package implements some simple graph handling capabilities for R.")
6727 (license license:artistic2.0)))
6728
d547ce5e
RW
6729(define-public r-topgo
6730 (package
6731 (name "r-topgo")
30ec4de7 6732 (version "2.26.0")
d547ce5e
RW
6733 (source (origin
6734 (method url-fetch)
6735 (uri (bioconductor-uri "topGO" version))
6736 (sha256
6737 (base32
30ec4de7 6738 "0j6sgvam4lk9348ag6pypcbkv93x4fk0di8ivhr23mz2s2yqzwrx"))))
d547ce5e
RW
6739 (properties
6740 `((upstream-name . "topGO")))
6741 (build-system r-build-system)
6742 (propagated-inputs
6743 `(("r-annotationdbi" ,r-annotationdbi)
30ec4de7 6744 ("r-dbi" ,r-dbi)
d547ce5e
RW
6745 ("r-biobase" ,r-biobase)
6746 ("r-biocgenerics" ,r-biocgenerics)
6747 ("r-go-db" ,r-go-db)
e48e88c2 6748 ("r-matrixstats" ,r-matrixstats)
6d415db2 6749 ("r-graph" ,r-graph)
d547ce5e
RW
6750 ("r-sparsem" ,r-sparsem)))
6751 (home-page "http://bioconductor.org/packages/topGO")
6752 (synopsis "Enrichment analysis for gene ontology")
6753 (description
6754 "The topGO package provides tools for testing @dfn{gene ontology} (GO)
6755terms while accounting for the topology of the GO graph. Different test
6756statistics and different methods for eliminating local similarities and
6757dependencies between GO terms can be implemented and applied.")
6758 ;; Any version of the LGPL applies.
6759 (license license:lgpl2.1+)))
6760
c63cef66
RW
6761(define-public r-bsgenome
6762 (package
6763 (name "r-bsgenome")
3d74c044 6764 (version "1.42.0")
c63cef66
RW
6765 (source (origin
6766 (method url-fetch)
6767 (uri (bioconductor-uri "BSgenome" version))
6768 (sha256
6769 (base32
3d74c044 6770 "0hxwc02h5mzhkrk60d1jmlsfjf0ai9jxdc0128kj1sg4r2k1q94y"))))
c63cef66
RW
6771 (properties
6772 `((upstream-name . "BSgenome")))
6773 (build-system r-build-system)
6774 (propagated-inputs
6775 `(("r-biocgenerics" ,r-biocgenerics)
6776 ("r-biostrings" ,r-biostrings)
6777 ("r-genomeinfodb" ,r-genomeinfodb)
6778 ("r-genomicranges" ,r-genomicranges)
6779 ("r-iranges" ,r-iranges)
6780 ("r-rsamtools" ,r-rsamtools)
6781 ("r-rtracklayer" ,r-rtracklayer)
6782 ("r-s4vectors" ,r-s4vectors)
6783 ("r-xvector" ,r-xvector)))
6784 (home-page "http://bioconductor.org/packages/BSgenome")
6785 (synopsis "Infrastructure for Biostrings-based genome data packages")
6786 (description
6787 "This package provides infrastructure shared by all Biostrings-based
6788genome data packages and support for efficient SNP representation.")
6789 (license license:artistic2.0)))
6790
c43a011d
RW
6791(define-public r-impute
6792 (package
6793 (name "r-impute")
acf6f7e0 6794 (version "1.48.0")
c43a011d
RW
6795 (source (origin
6796 (method url-fetch)
6797 (uri (bioconductor-uri "impute" version))
6798 (sha256
6799 (base32
acf6f7e0 6800 "1164zvnikbjd0ybdn9xwn520rlmdjd824vmhnl83zgv3v9lzp9bm"))))
c43a011d
RW
6801 (inputs
6802 `(("gfortran" ,gfortran)))
6803 (build-system r-build-system)
6804 (home-page "http://bioconductor.org/packages/impute")
6805 (synopsis "Imputation for microarray data")
6806 (description
6807 "This package provides a function to impute missing gene expression
6808microarray data, using nearest neighbor averaging.")
6809 (license license:gpl2+)))
6810
03ea5a35
RW
6811(define-public r-seqpattern
6812 (package
6813 (name "r-seqpattern")
e92dd6f5 6814 (version "1.6.0")
03ea5a35
RW
6815 (source (origin
6816 (method url-fetch)
6817 (uri (bioconductor-uri "seqPattern" version))
6818 (sha256
6819 (base32
e92dd6f5 6820 "0lsa5pz36xapi3yiv78k3z286a5md5sm5g21pgfyg8zmhmkxr7y8"))))
03ea5a35
RW
6821 (properties
6822 `((upstream-name . "seqPattern")))
6823 (build-system r-build-system)
6824 (propagated-inputs
6825 `(("r-biostrings" ,r-biostrings)
6826 ("r-genomicranges" ,r-genomicranges)
6827 ("r-iranges" ,r-iranges)
e92dd6f5 6828 ("r-kernsmooth" ,r-kernsmooth)
03ea5a35
RW
6829 ("r-plotrix" ,r-plotrix)))
6830 (home-page "http://bioconductor.org/packages/seqPattern")
6831 (synopsis "Visualising oligonucleotide patterns and motif occurrences")
6832 (description
6833 "This package provides tools to visualize oligonucleotide patterns and
6834sequence motif occurrences across a large set of sequences centred at a common
6835reference point and sorted by a user defined feature.")
6836 (license license:gpl3+)))
6837
cb933df6
RW
6838(define-public r-genomation
6839 (package
6840 (name "r-genomation")
51c3c490 6841 (version "1.6.0")
cb933df6
RW
6842 (source (origin
6843 (method url-fetch)
6844 (uri (bioconductor-uri "genomation" version))
6845 (sha256
6846 (base32
51c3c490 6847 "1m4mz7wihj8yqivwkzw68div8ybk4rjsai3ffki7xp7sh21ax03y"))))
cb933df6
RW
6848 (build-system r-build-system)
6849 (propagated-inputs
6850 `(("r-biostrings" ,r-biostrings)
6851 ("r-bsgenome" ,r-bsgenome)
6852 ("r-data-table" ,r-data-table)
6853 ("r-genomeinfodb" ,r-genomeinfodb)
6854 ("r-genomicalignments" ,r-genomicalignments)
6855 ("r-genomicranges" ,r-genomicranges)
6856 ("r-ggplot2" ,r-ggplot2)
6857 ("r-gridbase" ,r-gridbase)
6858 ("r-impute" ,r-impute)
6859 ("r-iranges" ,r-iranges)
6860 ("r-matrixstats" ,r-matrixstats)
6861 ("r-plotrix" ,r-plotrix)
6862 ("r-plyr" ,r-plyr)
51c3c490 6863 ("r-rcpp" ,r-rcpp)
cb933df6
RW
6864 ("r-readr" ,r-readr)
6865 ("r-reshape2" ,r-reshape2)
51c3c490 6866 ("r-rhtslib" ,r-rhtslib)
cb933df6
RW
6867 ("r-rsamtools" ,r-rsamtools)
6868 ("r-rtracklayer" ,r-rtracklayer)
51c3c490
RW
6869 ("r-runit" ,r-runit)
6870 ("r-s4vectors" ,r-s4vectors)
cb933df6 6871 ("r-seqpattern" ,r-seqpattern)))
51c3c490
RW
6872 (inputs
6873 `(("zlib" ,zlib)))
cb933df6
RW
6874 (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
6875 (synopsis "Summary, annotation and visualization of genomic data")
6876 (description
6877 "This package provides a package for summary and annotation of genomic
6878intervals. Users can visualize and quantify genomic intervals over
6879pre-defined functional regions, such as promoters, exons, introns, etc. The
6880genomic intervals represent regions with a defined chromosome position, which
6881may be associated with a score, such as aligned reads from HT-seq experiments,
6882TF binding sites, methylation scores, etc. The package can use any tabular
6883genomic feature data as long as it has minimal information on the locations of
6884genomic intervals. In addition, it can use BAM or BigWig files as input.")
6885 (license license:artistic2.0)))
6886
64efa307
RW
6887(define-public r-genomationdata
6888 (package
6889 (name "r-genomationdata")
59198f8f 6890 (version "1.6.0")
64efa307
RW
6891 (source (origin
6892 (method url-fetch)
6893 ;; We cannot use bioconductor-uri here because this tarball is
6894 ;; located under "data/annotation/" instead of "bioc/".
6895 (uri (string-append "https://bioconductor.org/packages/"
6896 "release/data/experiment/src/contrib/"
6897 "genomationData_" version ".tar.gz"))
6898 (sha256
6899 (base32
59198f8f 6900 "16dqwb7wx1igx77zdbcskx5m1hs4g4gp2hl56zzm70hcagnlkz8y"))))
64efa307
RW
6901 (build-system r-build-system)
6902 ;; As this package provides little more than large data files, it doesn't
6903 ;; make sense to build substitutes.
6904 (arguments `(#:substitutable? #f))
6905 (native-inputs
6906 `(("r-knitr" ,r-knitr)))
6907 (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
6908 (synopsis "Experimental data for use with the genomation package")
6909 (description
6910 "This package contains experimental genetic data for use with the
6911genomation package. Included are Chip Seq, Methylation and Cage data,
6912downloaded from Encode.")
6913 (license license:gpl3+)))
6914
486da491
RW
6915(define-public r-org-hs-eg-db
6916 (package
6917 (name "r-org-hs-eg-db")
83f9a6fb 6918 (version "3.4.0")
486da491
RW
6919 (source (origin
6920 (method url-fetch)
6921 ;; We cannot use bioconductor-uri here because this tarball is
6922 ;; located under "data/annotation/" instead of "bioc/".
6923 (uri (string-append "http://www.bioconductor.org/packages/"
6924 "release/data/annotation/src/contrib/"
6925 "org.Hs.eg.db_" version ".tar.gz"))
6926 (sha256
6927 (base32
83f9a6fb 6928 "19mg64pw8zcvb9yxzzyf7caz1kvdrkfsj1hd84bzq7crrh8kc4y6"))))
486da491
RW
6929 (properties
6930 `((upstream-name . "org.Hs.eg.db")))
6931 (build-system r-build-system)
6932 (propagated-inputs
6933 `(("r-annotationdbi" ,r-annotationdbi)))
6934 (home-page "http://www.bioconductor.org/packages/org.Hs.eg.db/")
6935 (synopsis "Genome wide annotation for Human")
6936 (description
6937 "This package provides mappings from Entrez gene identifiers to various
6938annotations for the human genome.")
6939 (license license:artistic2.0)))
6940
fefedf98
RW
6941(define-public r-org-ce-eg-db
6942 (package
6943 (name "r-org-ce-eg-db")
f04a3eff 6944 (version "3.4.0")
fefedf98
RW
6945 (source (origin
6946 (method url-fetch)
6947 ;; We cannot use bioconductor-uri here because this tarball is
6948 ;; located under "data/annotation/" instead of "bioc/".
6949 (uri (string-append "http://www.bioconductor.org/packages/"
6950 "release/data/annotation/src/contrib/"
6951 "org.Ce.eg.db_" version ".tar.gz"))
6952 (sha256
6953 (base32
f04a3eff 6954 "12llfzrrc09kj2wzbisdspv38qzkzgpsbn8kv7qkwg746k3pq436"))))
fefedf98
RW
6955 (properties
6956 `((upstream-name . "org.Ce.eg.db")))
6957 (build-system r-build-system)
6958 (propagated-inputs
6959 `(("r-annotationdbi" ,r-annotationdbi)))
6960 (home-page "http://www.bioconductor.org/packages/org.Ce.eg.db/")
6961 (synopsis "Genome wide annotation for Worm")
6962 (description
6963 "This package provides mappings from Entrez gene identifiers to various
6964annotations for the genome of the model worm Caenorhabditis elegans.")
6965 (license license:artistic2.0)))
6966
16c53a1e
RW
6967(define-public r-org-dm-eg-db
6968 (package
6969 (name "r-org-dm-eg-db")
0cfac6f2 6970 (version "3.4.0")
16c53a1e
RW
6971 (source (origin
6972 (method url-fetch)
6973 ;; We cannot use bioconductor-uri here because this tarball is
6974 ;; located under "data/annotation/" instead of "bioc/".
6975 (uri (string-append "http://www.bioconductor.org/packages/"
6976 "release/data/annotation/src/contrib/"
6977 "org.Dm.eg.db_" version ".tar.gz"))
6978 (sha256
6979 (base32
0cfac6f2 6980 "1vzbphbrh1cf7xi5cksia9xy9a9l42js2z2qsajvjxvddiphrb7j"))))
16c53a1e
RW
6981 (properties
6982 `((upstream-name . "org.Dm.eg.db")))
6983 (build-system r-build-system)
6984 (propagated-inputs
6985 `(("r-annotationdbi" ,r-annotationdbi)))
6986 (home-page "http://www.bioconductor.org/packages/org.Dm.eg.db/")
6987 (synopsis "Genome wide annotation for Fly")
6988 (description
6989 "This package provides mappings from Entrez gene identifiers to various
6990annotations for the genome of the model fruit fly Drosophila melanogaster.")
6991 (license license:artistic2.0)))
6992
e761beb9
RW
6993(define-public r-org-mm-eg-db
6994 (package
6995 (name "r-org-mm-eg-db")
7c45670b 6996 (version "3.4.0")
e761beb9
RW
6997 (source (origin
6998 (method url-fetch)
6999 ;; We cannot use bioconductor-uri here because this tarball is
7000 ;; located under "data/annotation/" instead of "bioc/".
7001 (uri (string-append "http://www.bioconductor.org/packages/"
7002 "release/data/annotation/src/contrib/"
7003 "org.Mm.eg.db_" version ".tar.gz"))
7004 (sha256
7005 (base32
7c45670b 7006 "1lykjqjaf01fmgg3cvfcvwd5xjq6zc5vbxnm5r4l32fzvl89q50c"))))
e761beb9
RW
7007 (properties
7008 `((upstream-name . "org.Mm.eg.db")))
7009 (build-system r-build-system)
7010 (propagated-inputs
7011 `(("r-annotationdbi" ,r-annotationdbi)))
7012 (home-page "http://www.bioconductor.org/packages/org.Mm.eg.db/")
7013 (synopsis "Genome wide annotation for Mouse")
7014 (description
7015 "This package provides mappings from Entrez gene identifiers to various
7016annotations for the genome of the model mouse Mus musculus.")
7017 (license license:artistic2.0)))
7018
936e7d67
RW
7019(define-public r-seqlogo
7020 (package
7021 (name "r-seqlogo")
21d6c7a3 7022 (version "1.40.0")
936e7d67
RW
7023 (source
7024 (origin
7025 (method url-fetch)
7026 (uri (bioconductor-uri "seqLogo" version))
7027 (sha256
7028 (base32
21d6c7a3 7029 "18bajdl75h3039559d81rgllqqvnq8ygsfxfx081xphxs0v6xggy"))))
936e7d67
RW
7030 (properties `((upstream-name . "seqLogo")))
7031 (build-system r-build-system)
7032 (home-page "http://bioconductor.org/packages/seqLogo")
7033 (synopsis "Sequence logos for DNA sequence alignments")
7034 (description
7035 "seqLogo takes the position weight matrix of a DNA sequence motif and
7036plots the corresponding sequence logo as introduced by Schneider and
7037Stephens (1990).")
7038 (license license:lgpl2.0+)))
7039
c90a4baf
RW
7040(define-public r-bsgenome-hsapiens-ucsc-hg19
7041 (package
7042 (name "r-bsgenome-hsapiens-ucsc-hg19")
7043 (version "1.4.0")
7044 (source (origin
7045 (method url-fetch)
7046 ;; We cannot use bioconductor-uri here because this tarball is
7047 ;; located under "data/annotation/" instead of "bioc/".
7048 (uri (string-append "http://www.bioconductor.org/packages/"
7049 "release/data/annotation/src/contrib/"
7050 "BSgenome.Hsapiens.UCSC.hg19_"
7051 version ".tar.gz"))
7052 (sha256
7053 (base32
7054 "1y0nqpk8cw5a34sd9hmin3z4v7iqm6hf6l22cl81vlbxqbjibxc8"))))
7055 (properties
7056 `((upstream-name . "BSgenome.Hsapiens.UCSC.hg19")))
7057 (build-system r-build-system)
7058 ;; As this package provides little more than a very large data file it
7059 ;; doesn't make sense to build substitutes.
7060 (arguments `(#:substitutable? #f))
7061 (propagated-inputs
7062 `(("r-bsgenome" ,r-bsgenome)))
7063 (home-page
7064 "http://www.bioconductor.org/packages/BSgenome.Hsapiens.UCSC.hg19/")
7065 (synopsis "Full genome sequences for Homo sapiens")
7066 (description
7067 "This package provides full genome sequences for Homo sapiens as provided
7068by UCSC (hg19, February 2009) and stored in Biostrings objects.")
7069 (license license:artistic2.0)))
7070
a3e90287
RW
7071(define-public r-bsgenome-mmusculus-ucsc-mm9
7072 (package
7073 (name "r-bsgenome-mmusculus-ucsc-mm9")
7074 (version "1.4.0")
7075 (source (origin
7076 (method url-fetch)
7077 ;; We cannot use bioconductor-uri here because this tarball is
7078 ;; located under "data/annotation/" instead of "bioc/".
7079 (uri (string-append "http://www.bioconductor.org/packages/"
7080 "release/data/annotation/src/contrib/"
7081 "BSgenome.Mmusculus.UCSC.mm9_"
7082 version ".tar.gz"))
7083 (sha256
7084 (base32
7085 "1birqw30g2azimxpnjfzmkphan7x131yy8b9h85lfz5fjdg7841i"))))
7086 (properties
7087 `((upstream-name . "BSgenome.Mmusculus.UCSC.mm9")))
7088 (build-system r-build-system)
7089 ;; As this package provides little more than a very large data file it
7090 ;; doesn't make sense to build substitutes.
7091 (arguments `(#:substitutable? #f))
7092 (propagated-inputs
7093 `(("r-bsgenome" ,r-bsgenome)))
7094 (home-page
7095 "http://www.bioconductor.org/packages/BSgenome.Mmusculus.UCSC.mm9/")
7096 (synopsis "Full genome sequences for Mouse")
7097 (description
7098 "This package provides full genome sequences for Mus musculus (Mouse) as
7099provided by UCSC (mm9, July 2007) and stored in Biostrings objects.")
7100 (license license:artistic2.0)))
7101
4714d521
RW
7102(define-public r-bsgenome-mmusculus-ucsc-mm10
7103 (package
7104 (name "r-bsgenome-mmusculus-ucsc-mm10")
7105 (version "1.4.0")
7106 (source (origin
7107 (method url-fetch)
7108 ;; We cannot use bioconductor-uri here because this tarball is
7109 ;; located under "data/annotation/" instead of "bioc/".
7110 (uri (string-append "http://www.bioconductor.org/packages/"
7111 "release/data/annotation/src/contrib/"
7112 "BSgenome.Mmusculus.UCSC.mm10_"
7113 version ".tar.gz"))
7114 (sha256
7115 (base32
7116 "12s0nm2na9brjad4rn9l7d3db2aj8qa1xvz0y1k7gk08wayb6bkf"))))
7117 (properties
7118 `((upstream-name . "BSgenome.Mmusculus.UCSC.mm10")))
7119 (build-system r-build-system)
7120 ;; As this package provides little more than a very large data file it
7121 ;; doesn't make sense to build substitutes.
7122 (arguments `(#:substitutable? #f))
7123 (propagated-inputs
7124 `(("r-bsgenome" ,r-bsgenome)))
7125 (home-page
7126 "http://www.bioconductor.org/packages/BSgenome.Mmusculus.UCSC.mm10/")
7127 (synopsis "Full genome sequences for Mouse")
7128 (description
7129 "This package provides full genome sequences for Mus
7130musculus (Mouse) as provided by UCSC (mm10, December 2011) and stored
7131in Biostrings objects.")
7132 (license license:artistic2.0)))
7133
943bd627
RW
7134(define-public r-bsgenome-celegans-ucsc-ce6
7135 (package
7136 (name "r-bsgenome-celegans-ucsc-ce6")
7137 (version "1.4.0")
7138 (source (origin
7139 (method url-fetch)
7140 ;; We cannot use bioconductor-uri here because this tarball is
7141 ;; located under "data/annotation/" instead of "bioc/".
7142 (uri (string-append "http://www.bioconductor.org/packages/"
7143 "release/data/annotation/src/contrib/"
7144 "BSgenome.Celegans.UCSC.ce6_"
7145 version ".tar.gz"))
7146 (sha256
7147 (base32
7148 "0mqzb353xv2c3m3vkb315dkmnxkgczp7ndnknyhpgjlybyf715v9"))))
7149 (properties
7150 `((upstream-name . "BSgenome.Celegans.UCSC.ce6")))
7151 (build-system r-build-system)
7152 ;; As this package provides little more than a very large data file it
7153 ;; doesn't make sense to build substitutes.
7154 (arguments `(#:substitutable? #f))
7155 (propagated-inputs
7156 `(("r-bsgenome" ,r-bsgenome)))
7157 (home-page
7158 "http://www.bioconductor.org/packages/BSgenome.Celegans.UCSC.ce6/")
7159 (synopsis "Full genome sequences for Worm")
7160 (description
7161 "This package provides full genome sequences for Caenorhabditis
7162elegans (Worm) as provided by UCSC (ce6, May 2008) and stored in Biostrings
7163objects.")
7164 (license license:artistic2.0)))
7165
fc47c7d6
RW
7166(define-public r-bsgenome-celegans-ucsc-ce10
7167 (package
7168 (name "r-bsgenome-celegans-ucsc-ce10")
7169 (version "1.4.0")
7170 (source (origin
7171 (method url-fetch)
7172 ;; We cannot use bioconductor-uri here because this tarball is
7173 ;; located under "data/annotation/" instead of "bioc/".
7174 (uri (string-append "http://www.bioconductor.org/packages/"
7175 "release/data/annotation/src/contrib/"
7176 "BSgenome.Celegans.UCSC.ce10_"
7177 version ".tar.gz"))
7178 (sha256
7179 (base32
7180 "1zaym97jk4npxk14ifvwz2rvhm4zx9xgs33r9vvx9rlynp0gydrk"))))
7181 (properties
7182 `((upstream-name . "BSgenome.Celegans.UCSC.ce10")))
7183 (build-system r-build-system)
7184 ;; As this package provides little more than a very large data file it
7185 ;; doesn't make sense to build substitutes.
7186 (arguments `(#:substitutable? #f))
7187 (propagated-inputs
7188 `(("r-bsgenome" ,r-bsgenome)))
7189 (home-page
7190 "http://www.bioconductor.org/packages/BSgenome.Celegans.UCSC.ce10/")
7191 (synopsis "Full genome sequences for Worm")
7192 (description
7193 "This package provides full genome sequences for Caenorhabditis
7194elegans (Worm) as provided by UCSC (ce10, Oct 2010) and stored in Biostrings
7195objects.")
7196 (license license:artistic2.0)))
7197
6dc60998
RW
7198(define-public r-bsgenome-dmelanogaster-ucsc-dm3
7199 (package
7200 (name "r-bsgenome-dmelanogaster-ucsc-dm3")
7201 (version "1.4.0")
7202 (source (origin
7203 (method url-fetch)
7204 ;; We cannot use bioconductor-uri here because this tarball is
7205 ;; located under "data/annotation/" instead of "bioc/".
7206 (uri (string-append "http://www.bioconductor.org/packages/"
7207 "release/data/annotation/src/contrib/"
7208 "BSgenome.Dmelanogaster.UCSC.dm3_"
7209 version ".tar.gz"))
7210 (sha256
7211 (base32
7212 "19bm3lkhhkag3gnwp419211fh0cnr0x6fa0r1lr0ycwrikxdxsv8"))))
7213 (properties
7214 `((upstream-name . "BSgenome.Dmelanogaster.UCSC.dm3")))
7215 (build-system r-build-system)
7216 ;; As this package provides little more than a very large data file it
7217 ;; doesn't make sense to build substitutes.
7218 (arguments `(#:substitutable? #f))
7219 (propagated-inputs
7220 `(("r-bsgenome" ,r-bsgenome)))
7221 (home-page
7222 "http://www.bioconductor.org/packages/BSgenome.Dmelanogaster.UCSC.dm3/")
7223 (synopsis "Full genome sequences for Fly")
7224 (description
7225 "This package provides full genome sequences for Drosophila
7226melanogaster (Fly) as provided by UCSC (dm3, April 2006) and stored in
7227Biostrings objects.")
7228 (license license:artistic2.0)))
7229
ae2462f7
RW
7230(define-public r-motifrg
7231 (package
7232 (name "r-motifrg")
809251e1 7233 (version "1.18.0")
ae2462f7
RW
7234 (source
7235 (origin
7236 (method url-fetch)
7237 (uri (bioconductor-uri "motifRG" version))
7238 (sha256
7239 (base32
809251e1 7240 "1pa97aj6c5f3gx4bgriw110764dj3m9h104ddi8rv2bpy41yd98d"))))
ae2462f7
RW
7241 (properties `((upstream-name . "motifRG")))
7242 (build-system r-build-system)
7243 (propagated-inputs
7244 `(("r-biostrings" ,r-biostrings)
7245 ("r-bsgenome" ,r-bsgenome)
7246 ("r-bsgenome.hsapiens.ucsc.hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
7247 ("r-iranges" ,r-iranges)
7248 ("r-seqlogo" ,r-seqlogo)
7249 ("r-xvector" ,r-xvector)))
7250 (home-page "http://bioconductor.org/packages/motifRG")
7251 (synopsis "Discover motifs in high throughput sequencing data")
7252 (description
7253 "This package provides tools for discriminative motif discovery in high
7254throughput genetic sequencing data sets using regression methods.")
7255 (license license:artistic2.0)))
7256
a5002ae7
AE
7257(define-public r-qtl
7258 (package
7259 (name "r-qtl")
e1c2ad67 7260 (version "1.39-5")
a5002ae7
AE
7261 (source
7262 (origin
7263 (method url-fetch)
7264 (uri (string-append "mirror://cran/src/contrib/qtl_"
7265 version ".tar.gz"))
7266 (sha256
7267 (base32
e1c2ad67 7268 "1grwgvyv7x0dgay1858bg7qf4wk47gpnq7qkqpcda9cn0h970d6f"))))
a5002ae7
AE
7269 (build-system r-build-system)
7270 (home-page "http://rqtl.org/")
7271 (synopsis "R package for analyzing QTL experiments in genetics")
7272 (description "R/qtl is an extension library for the R statistics
7273system. It is used to analyze experimental crosses for identifying
7274genes contributing to variation in quantitative traits (so-called
7275quantitative trait loci, QTLs).
7276
7277Using a hidden Markov model, R/qtl allows to estimate genetic maps, to
7278identify genotyping errors, and to perform single-QTL and two-QTL,
7279two-dimensional genome scans.")
7280 (license license:gpl3)))
d1e32822 7281
9e3ba31c
RJ
7282(define-public r-zlibbioc
7283 (package
7284 (name "r-zlibbioc")
da22da86 7285 (version "1.20.0")
9e3ba31c
RJ
7286 (source (origin
7287 (method url-fetch)
7288 (uri (bioconductor-uri "zlibbioc" version))
7289 (sha256
7290 (base32
da22da86 7291 "0hbk90q5hl0fycfvy5nxxa4hxgglag9lzp7i0fg849bqygg5nbyq"))))
9e3ba31c
RJ
7292 (properties
7293 `((upstream-name . "zlibbioc")))
7294 (build-system r-build-system)
7295 (home-page "https://bioconductor.org/packages/zlibbioc")
7296 (synopsis "Provider for zlib-1.2.5 to R packages")
7297 (description "This package uses the source code of zlib-1.2.5 to create
7298libraries for systems that do not have these available via other means.")
7299 (license license:artistic2.0)))
7300
52765a63
RW
7301(define-public r-rhtslib
7302 (package
7303 (name "r-rhtslib")
53ca52f0 7304 (version "1.6.0")
52765a63
RW
7305 (source
7306 (origin
7307 (method url-fetch)
7308 (uri (bioconductor-uri "Rhtslib" version))
7309 (sha256
7310 (base32
53ca52f0 7311 "1vk3ng61dhi3pbia1lp3gl3mlr3i1vb2lkq83qb53i9dzz128wh9"))))
52765a63
RW
7312 (properties `((upstream-name . "Rhtslib")))
7313 (build-system r-build-system)
7314 (propagated-inputs
7315 `(("r-zlibbioc" ,r-zlibbioc)))
7316 (inputs
7317 `(("zlib" ,zlib)))
53ca52f0
RW
7318 (native-inputs
7319 `(("autoconf" ,autoconf)))
52765a63
RW
7320 (home-page "https://github.com/nhayden/Rhtslib")
7321 (synopsis "High-throughput sequencing library as an R package")
7322 (description
7323 "This package provides the HTSlib C library for high-throughput
7324nucleotide sequence analysis. The package is primarily useful to developers
7325of other R packages who wish to make use of HTSlib.")
7326 (license license:lgpl2.0+)))
7327
fe02c4c9
RW
7328(define-public r-bamsignals
7329 (package
7330 (name "r-bamsignals")
e357bec8 7331 (version "1.6.0")
fe02c4c9
RW
7332 (source
7333 (origin
7334 (method url-fetch)
7335 (uri (bioconductor-uri "bamsignals" version))
7336 (sha256
7337 (base32
e357bec8 7338 "1k42gvk5mgq4la1fp0in3an2zfdz69h6522jsqhmk0f6i75kg4mb"))))
fe02c4c9
RW
7339 (build-system r-build-system)
7340 (propagated-inputs
7341 `(("r-biocgenerics" ,r-biocgenerics)
7342 ("r-genomicranges" ,r-genomicranges)
7343 ("r-iranges" ,r-iranges)
7344 ("r-rcpp" ,r-rcpp)
7345 ("r-rhtslib" ,r-rhtslib)
7346 ("r-zlibbioc" ,r-zlibbioc)))
7347 (inputs
7348 `(("zlib" ,zlib)))
7349 (home-page "http://bioconductor.org/packages/bamsignals")
7350 (synopsis "Extract read count signals from bam files")
7351 (description
7352 "This package allows to efficiently obtain count vectors from indexed bam
7353files. It counts the number of nucleotide sequence reads in given genomic
7354ranges and it computes reads profiles and coverage profiles. It also handles
7355paired-end data.")
7356 (license license:gpl2+)))
7357
89984be4
RW
7358(define-public r-rcas
7359 (package
7360 (name "r-rcas")
ed8fbe5e 7361 (version "1.0.0")
89984be4
RW
7362 (source (origin
7363 (method url-fetch)
7364 (uri (string-append "https://github.com/BIMSBbioinfo/RCAS/archive/v"
7365 version ".tar.gz"))
7366 (file-name (string-append name "-" version ".tar.gz"))
7367 (sha256
7368 (base32
ed8fbe5e 7369 "1h7di822ihgkhmmmlfbfz3c2dkjyjxl307i6mx8w0cwjqbna1kp6"))))
89984be4
RW
7370 (build-system r-build-system)
7371 (native-inputs
7372 `(("r-knitr" ,r-knitr)
7373 ("r-testthat" ,r-testthat)
7374 ;; During vignette building knitr checks that "pandoc-citeproc"
7375 ;; is in the PATH.
7376 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)))
7377 (propagated-inputs
7378 `(("r-data-table" ,r-data-table)
7379 ("r-biomart" ,r-biomart)
7380 ("r-org-hs-eg-db" ,r-org-hs-eg-db)
7381 ("r-org-ce-eg-db" ,r-org-ce-eg-db)
7382 ("r-org-dm-eg-db" ,r-org-dm-eg-db)
7383 ("r-org-mm-eg-db" ,r-org-mm-eg-db)
7384 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
7385 ("r-bsgenome-mmusculus-ucsc-mm9" ,r-bsgenome-mmusculus-ucsc-mm9)
7386 ("r-bsgenome-celegans-ucsc-ce10" ,r-bsgenome-celegans-ucsc-ce10)
7387 ("r-bsgenome-dmelanogaster-ucsc-dm3" ,r-bsgenome-dmelanogaster-ucsc-dm3)
7388 ("r-topgo" ,r-topgo)
7389 ("r-dt" ,r-dt)
7390 ("r-plotly" ,r-plotly)
7391 ("r-motifrg" ,r-motifrg)
7392 ("r-genomation" ,r-genomation)
7393 ("r-genomicfeatures" ,r-genomicfeatures)
7394 ("r-rtracklayer" ,r-rtracklayer)
7395 ("r-rmarkdown" ,r-rmarkdown)))
7396 (synopsis "RNA-centric annotation system")
7397 (description
7398 "RCAS aims to be a standalone RNA-centric annotation system that provides
7399intuitive reports and publication-ready graphics. This package provides the R
7400library implementing most of the pipeline's features.")
7401 (home-page "https://github.com/BIMSBbioinfo/RCAS")
7402 (license license:expat)))
7403
50937297
RW
7404(define-public rcas-web
7405 (package
7406 (name "rcas-web")
7407 (version "0.0.3")
7408 (source
7409 (origin
7410 (method url-fetch)
7411 (uri (string-append "https://github.com/BIMSBbioinfo/rcas-web/"
7412 "releases/download/v" version
7413 "/rcas-web-" version ".tar.gz"))
7414 (sha256
7415 (base32
7416 "0d3my0g8i7js59n184zzzjdki7hgmhpi4rhfvk7i6jsw01ba04qq"))))
7417 (build-system gnu-build-system)
7418 (arguments
7419 `(#:phases
7420 (modify-phases %standard-phases
7421 (add-after 'install 'wrap-executable
7422 (lambda* (#:key inputs outputs #:allow-other-keys)
7423 (let* ((out (assoc-ref outputs "out"))
7424 (json (assoc-ref inputs "guile-json"))
7425 (redis (assoc-ref inputs "guile-redis"))
7426 (path (string-append
7427 json "/share/guile/site/2.2:"
7428 redis "/share/guile/site/2.2")))
7429 (wrap-program (string-append out "/bin/rcas-web")
7430 `("GUILE_LOAD_PATH" ":" = (,path))
7431 `("GUILE_LOAD_COMPILED_PATH" ":" = (,path))
7432 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
7433 #t)))))
7434 (inputs
7435 `(("r" ,r)
7436 ("r-rcas" ,r-rcas)
7437 ("guile-next" ,guile-next)
7438 ("guile-json" ,guile2.2-json)
7439 ("guile-redis" ,guile2.2-redis)))
7440 (native-inputs
7441 `(("pkg-config" ,pkg-config)))
7442 (home-page "https://github.com/BIMSBbioinfo/rcas-web")
7443 (synopsis "Web interface for RNA-centric annotation system (RCAS)")
7444 (description "This package provides a simple web interface for the
7445@dfn{RNA-centric annotation system} (RCAS).")
7446 (license license:agpl3+)))
7447
7500e42b
RJ
7448(define-public r-mutationalpatterns
7449 (package
7450 (name "r-mutationalpatterns")
7451 (version "1.0.0")
7452 (source
7453 (origin
7454 (method url-fetch)
7455 (uri (bioconductor-uri "MutationalPatterns" version))
7456 (sha256
7457 (base32
7458 "1a3c2bm0xx0q4gf98jiw74msmdf2fr8rbsdysd5ww9kqlzmsbr17"))))
7459 (build-system r-build-system)
7460 (propagated-inputs
7461 `(("r-biocgenerics" ,r-biocgenerics)
7462 ("r-biostrings" ,r-biostrings)
7463 ("r-genomicranges" ,r-genomicranges)
7464 ("r-genomeinfodb" ,r-genomeinfodb)
7465 ("r-ggplot2" ,r-ggplot2)
7466 ("r-gridextra" ,r-gridextra)
7467 ("r-iranges" ,r-iranges)
7468 ("r-nmf" ,r-nmf)
7469 ("r-plyr" ,r-plyr)
7470 ("r-pracma" ,r-pracma)
7471 ("r-reshape2" ,r-reshape2)
7472 ("r-summarizedexperiment" ,r-summarizedexperiment)
7473 ("r-variantannotation" ,r-variantannotation)))
7474 (home-page "http://bioconductor.org/packages/MutationalPatterns/")
7475 (synopsis "Extract and visualize mutational patterns in genomic data")
7476 (description "This package provides an extensive toolset for the
7477characterization and visualization of a wide range of mutational patterns
7478in SNV base substitution data.")
7479 (license license:expat)))
7480
66e40e00
RW
7481(define-public emboss
7482 (package
7483 (name "emboss")
7484 (version "6.5.7")
7485 (source (origin
7486 (method url-fetch)
7487 (uri (string-append "ftp://emboss.open-bio.org/pub/EMBOSS/old/"
7488 (version-major+minor version) ".0/"
7489 "EMBOSS-" version ".tar.gz"))
7490 (sha256
7491 (base32
7492 "0vsmz96gc411yj2iyzdrsmg4l2n1nhgmp7vrgzlxx3xixv9xbf0q"))))
7493 (build-system gnu-build-system)
7494 (arguments
7495 `(#:configure-flags
7496 (list (string-append "--with-hpdf="
7497 (assoc-ref %build-inputs "libharu")))
7498 #:phases
7499 (modify-phases %standard-phases
7500 (add-after 'unpack 'fix-checks
7501 (lambda _
7502 ;; The PNGDRIVER tests check for the presence of libgd, libpng
7503 ;; and zlib, but assume that they are all found at the same
7504 ;; prefix.
7505 (substitute* "configure.in"
7506 (("CHECK_PNGDRIVER")
7507 "LIBS=\"$LIBS -lgd -lpng -lz -lm\"
7508AC_DEFINE([PLD_png], [1], [Define to 1 if PNG support is available])
7509AM_CONDITIONAL(AMPNG, true)"))
7510 #t))
7511 (add-after 'unpack 'disable-update-check
7512 (lambda _
7513 ;; At build time there is no connection to the Internet, so
7514 ;; looking for updates will not work.
7515 (substitute* "Makefile.am"
7516 (("\\$\\(bindir\\)/embossupdate") ""))
7517 #t))
7518 (add-before 'configure 'autogen
7519 (lambda _ (zero? (system* "autoreconf" "-vif")))))))
7520 (inputs
7521 `(("perl" ,perl)
7522 ("libpng" ,libpng)
7523 ("gd" ,gd)
7524 ("libx11" ,libx11)
7525 ("libharu" ,libharu)
7526 ("zlib" ,zlib)))
7527 (native-inputs
7528 `(("autoconf" ,autoconf)
7529 ("automake" ,automake)
7530 ("libtool" ,libtool)
7531 ("pkg-config" ,pkg-config)))
7532 (home-page "http://emboss.sourceforge.net")
7533 (synopsis "Molecular biology analysis suite")
7534 (description "EMBOSS is the \"European Molecular Biology Open Software
7535Suite\". EMBOSS is an analysis package specially developed for the needs of
7536the molecular biology (e.g. EMBnet) user community. The software
7537automatically copes with data in a variety of formats and even allows
7538transparent retrieval of sequence data from the web. It also provides a
7539number of libraries for the development of software in the field of molecular
7540biology. EMBOSS also integrates a range of currently available packages and
7541tools for sequence analysis into a seamless whole.")
7542 (license license:gpl2+)))
7543
1f1b20b8
RW
7544(define-public bits
7545 (let ((revision "1")
7546 (commit "3cc4567896d9d6442923da944beb704750a08d2d"))
7547 (package
7548 (name "bits")
7549 ;; The version is 2.13.0 even though no release archives have been
7550 ;; published as yet.
7551 (version (string-append "2.13.0-" revision "." (string-take commit 9)))
7552 (source (origin
7553 (method git-fetch)
7554 (uri (git-reference
7555 (url "https://github.com/arq5x/bits.git")
7556 (commit commit)))
7557 (file-name (string-append name "-" version "-checkout"))
7558 (sha256
7559 (base32
7560 "17n2kffk4kmhivd8c98g2vr6y1s23vbg4sxlxs689wni66797hbs"))))
7561 (build-system gnu-build-system)
7562 (arguments
7563 `(#:tests? #f ;no tests included
7564 #:phases
7565 (modify-phases %standard-phases
7566 (delete 'configure)
7567 (add-after 'unpack 'remove-cuda
7568 (lambda _
7569 (substitute* "Makefile"
7570 ((".*_cuda") "")
7571 (("(bits_test_intersections) \\\\" _ match) match))
7572 #t))
7573 (replace 'install
7574 (lambda* (#:key outputs #:allow-other-keys)
7575 (copy-recursively
7576 "bin" (string-append (assoc-ref outputs "out") "/bin"))
7577 #t)))))
7578 (inputs
7579 `(("gsl" ,gsl)
7580 ("zlib" ,zlib)))
7581 (home-page "https://github.com/arq5x/bits")
7582 (synopsis "Implementation of binary interval search algorithm")
7583 (description "This package provides an implementation of the
7584BITS (Binary Interval Search) algorithm, an approach to interval set
7585intersection. It is especially suited for the comparison of diverse genomic
7586datasets and the exploration of large datasets of genome
7587intervals (e.g. genes, sequence alignments).")
7588 (license license:gpl2))))
7589
e62ffce5 7590(define-public piranha
883302da
RW
7591 ;; There is no release tarball for the latest version. The latest commit is
7592 ;; older than one year at the time of this writing.
7593 (let ((revision "1")
7594 (commit "0466d364b71117d01e4471b74c514436cc281233"))
7595 (package
7596 (name "piranha")
7597 (version (string-append "1.2.1-" revision "." (string-take commit 9)))
7598 (source (origin
7599 (method git-fetch)
7600 (uri (git-reference
7601 (url "https://github.com/smithlabcode/piranha.git")
7602 (commit commit)))
7603 (sha256
7604 (base32
7605 "117dc0zf20c61jam69sk4abl57ah6yi6i7qra7d7y5zrbgk12q5n"))))
7606 (build-system gnu-build-system)
7607 (arguments
7608 `(#:test-target "test"
7609 #:phases
7610 (modify-phases %standard-phases
7611 (add-after 'unpack 'copy-smithlab-cpp
7612 (lambda* (#:key inputs #:allow-other-keys)
e62ffce5 7613 (for-each (lambda (file)
883302da
RW
7614 (install-file file "./src/smithlab_cpp/"))
7615 (find-files (assoc-ref inputs "smithlab-cpp")))
7616 #t))
7617 (add-after 'install 'install-to-store
7618 (lambda* (#:key outputs #:allow-other-keys)
7619 (let* ((out (assoc-ref outputs "out"))
7620 (bin (string-append out "/bin")))
883302da
RW
7621 (for-each (lambda (file)
7622 (install-file file bin))
7623 (find-files "bin" ".*")))
7624 #t)))
7625 #:configure-flags
7626 (list (string-append "--with-bam_tools_headers="
7627 (assoc-ref %build-inputs "bamtools") "/include/bamtools")
7628 (string-append "--with-bam_tools_library="
7629 (assoc-ref %build-inputs "bamtools") "/lib/bamtools"))))
7630 (inputs
7631 `(("bamtools" ,bamtools)
7632 ("samtools" ,samtools-0.1)
7633 ("gsl" ,gsl)
7634 ("smithlab-cpp"
7635 ,(let ((commit "3723e2db438c51501d0423429ff396c3035ba46a"))
7636 (origin
7637 (method git-fetch)
7638 (uri (git-reference
7639 (url "https://github.com/smithlabcode/smithlab_cpp.git")
7640 (commit commit)))
7641 (file-name (string-append "smithlab_cpp-" commit "-checkout"))
7642 (sha256
7643 (base32
7644 "0l4gvbwslw5ngziskja41c00x1r06l3yidv7y0xw9djibhykzy0g")))))))
7645 (native-inputs
7646 `(("python" ,python-2)))
7647 (home-page "https://github.com/smithlabcode/piranha")
7648 (synopsis "Peak-caller for CLIP-seq and RIP-seq data")
7649 (description
7650 "Piranha is a peak-caller for genomic data produced by CLIP-seq and
e62ffce5
RW
7651RIP-seq experiments. It takes input in BED or BAM format and identifies
7652regions of statistically significant read enrichment. Additional covariates
7653may optionally be provided to further inform the peak-calling process.")
883302da 7654 (license license:gpl3+))))
e62ffce5 7655
d1e32822
RW
7656(define-public pepr
7657 (package
7658 (name "pepr")
7659 (version "1.0.9")
7660 (source (origin
7661 (method url-fetch)
7662 (uri (string-append "https://pypi.python.org/packages/source/P"
7663 "/PePr/PePr-" version ".tar.gz"))
7664 (sha256
7665 (base32
7666 "0qxjfdpl1b1y53nccws2d85f6k74zwmx8y8sd9rszcqhfayx6gdx"))))
7667 (build-system python-build-system)
7668 (arguments
7669 `(#:python ,python-2 ; python2 only
7670 #:tests? #f ; no tests included
7671 #:phases
7672 (modify-phases %standard-phases
7673 ;; When setuptools is used a ".egg" archive is generated and
7674 ;; installed. This makes it hard to actually run PePr. This issue
7675 ;; has been reported upstream:
7676 ;; https://github.com/shawnzhangyx/PePr/issues/9
7677 (add-after 'unpack 'disable-egg-generation
7678 (lambda _
7679 (substitute* "setup.py"
7680 (("from setuptools import setup")
7681 "from distutils.core import setup"))
7682 #t)))))
7683 (propagated-inputs
7684 `(("python2-numpy" ,python2-numpy)
7685 ("python2-scipy" ,python2-scipy)
7686 ("python2-pysam" ,python2-pysam)))
7687 (home-page "https://code.google.com/p/pepr-chip-seq/")
7688 (synopsis "Peak-calling and prioritization pipeline for ChIP-Seq data")
7689 (description
7690 "PePr is a ChIP-Seq peak calling or differential binding analysis tool
7691that is primarily designed for data with biological replicates. It uses a
7692negative binomial distribution to model the read counts among the samples in
7693the same group, and look for consistent differences between ChIP and control
7694group or two ChIP groups run under different conditions.")
7695 (license license:gpl3+)))
6b49a37e
RJ
7696
7697(define-public filevercmp
7698 (let ((commit "1a9b779b93d0b244040274794d402106907b71b7"))
7699 (package
7700 (name "filevercmp")
7701 (version (string-append "0-1." (string-take commit 7)))
7702 (source (origin
7703 (method url-fetch)
7704 (uri (string-append "https://github.com/ekg/filevercmp/archive/"
7705 commit ".tar.gz"))
7706 (file-name (string-append name "-" version ".tar.gz"))
7707 (sha256
7708 (base32 "0yp5jswf5j2pqc6517x277s4s6h1ss99v57kxw9gy0jkfl3yh450"))))
7709 (build-system gnu-build-system)
7710 (arguments
7711 `(#:tests? #f ; There are no tests to run.
7712 #:phases
7713 (modify-phases %standard-phases
7714 (delete 'configure) ; There is no configure phase.
7715 (replace 'install
7716 (lambda* (#:key outputs #:allow-other-keys)
7717 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
7718 (install-file "filevercmp" bin)))))))
7719 (home-page "https://github.com/ekg/filevercmp")
7720 (synopsis "This program compares version strings")
7721 (description "This program compares version strings. It intends to be a
7722replacement for strverscmp.")
7723 (license license:gpl3+))))
5fb5dffb
RW
7724
7725(define-public multiqc
7726 (package
7727 (name "multiqc")
7728 (version "0.6")
7729 (source
7730 (origin
7731 (method url-fetch)
7732 (uri (pypi-uri "multiqc" version))
7733 (sha256
7734 (base32
7735 "0avw11h63ldpxy5pizc3wl1wa01ha7q10wb240nggsjz3jaqvyiy"))))
7736 (build-system python-build-system)
7737 (propagated-inputs
7738 `(("python-jinja2" ,python-jinja2)
7739 ("python-simplejson" ,python-simplejson)
7740 ("python-pyyaml" ,python-pyyaml)
7741 ("python-click" ,python-click)
7742 ("python-matplotlib" ,python-matplotlib)
7743 ("python-numpy" ,python-numpy)))
7744 (native-inputs
7745 `(("python-setuptools" ,python-setuptools)))
7746 (home-page "http://multiqc.info")
7747 (synopsis "Aggregate bioinformatics analysis reports")
7748 (description
7749 "MultiQC is a tool to aggregate bioinformatics results across many
7750samples into a single report. It contains modules for a large number of
7751common bioinformatics tools.")
7752 (license license:gpl3)))