gnu: Remove python-setuptools and python2-setuptools from inputs (part 2)
[jackhill/guix/guix.git] / gnu / packages / bioinformatics.scm
CommitLineData
4e10a221 1;;; GNU Guix --- Functional package management for GNU
0047d26a 2;;; Copyright © 2014, 2015, 2016 Ricardo Wurmus <rekado@elephly.net>
9b9b7ffd 3;;; Copyright © 2015, 2016 Ben Woodcroft <donttrustben@gmail.com>
8921841d 4;;; Copyright © 2015, 2016 Pjotr Prins <pjotr.guix@thebird.nl>
a5002ae7 5;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
a0a71439 6;;; Copyright © 2016 Roel Janssen <roel@gnu.org>
ddb83129 7;;; Copyright © 2016 Efraim Flashner <efraim@flashner.co.il>
318c0aee 8;;; Copyright © 2016 Marius Bakke <mbakke@fastmail.com>
4e10a221
RW
9;;;
10;;; This file is part of GNU Guix.
11;;;
12;;; GNU Guix is free software; you can redistribute it and/or modify it
13;;; under the terms of the GNU General Public License as published by
14;;; the Free Software Foundation; either version 3 of the License, or (at
15;;; your option) any later version.
16;;;
17;;; GNU Guix is distributed in the hope that it will be useful, but
18;;; WITHOUT ANY WARRANTY; without even the implied warranty of
19;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;;; GNU General Public License for more details.
21;;;
22;;; You should have received a copy of the GNU General Public License
23;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
24
25(define-module (gnu packages bioinformatics)
26 #:use-module ((guix licenses) #:prefix license:)
27 #:use-module (guix packages)
8e913213 28 #:use-module (guix utils)
4e10a221 29 #:use-module (guix download)
2c16316e 30 #:use-module (guix git-download)
ec946638 31 #:use-module (guix hg-download)
10b4a969 32 #:use-module (guix build-system ant)
4e10a221 33 #:use-module (guix build-system gnu)
d7678942 34 #:use-module (guix build-system cmake)
365c8153 35 #:use-module (guix build-system perl)
8622a072 36 #:use-module (guix build-system python)
a5002ae7 37 #:use-module (guix build-system r)
9c38b540 38 #:use-module (guix build-system ruby)
d3517eda 39 #:use-module (guix build-system trivial)
4e10a221 40 #:use-module (gnu packages)
a2950fa4 41 #:use-module (gnu packages autotools)
684bf7c7 42 #:use-module (gnu packages algebra)
d3517eda 43 #:use-module (gnu packages base)
318c0aee 44 #:use-module (gnu packages bash)
a0a71439 45 #:use-module (gnu packages bison)
e4e5a4d8 46 #:use-module (gnu packages boost)
4e10a221 47 #:use-module (gnu packages compression)
82c370de 48 #:use-module (gnu packages cpio)
1baee943 49 #:use-module (gnu packages curl)
99828fa7 50 #:use-module (gnu packages documentation)
d29150b5 51 #:use-module (gnu packages datastructures)
75dd2424 52 #:use-module (gnu packages file)
02f35bb5 53 #:use-module (gnu packages gawk)
2409f37f 54 #:use-module (gnu packages gcc)
66e40e00 55 #:use-module (gnu packages gd)
97b9da68 56 #:use-module (gnu packages gtk)
b16728b0 57 #:use-module (gnu packages glib)
db7a3444 58 #:use-module (gnu packages groff)
50937297 59 #:use-module (gnu packages guile)
89984be4 60 #:use-module (gnu packages haskell)
66e40e00 61 #:use-module (gnu packages image)
97b9da68 62 #:use-module (gnu packages imagemagick)
15a3c3d4 63 #:use-module (gnu packages java)
51c64999 64 #:use-module (gnu packages linux)
ec946638 65 #:use-module (gnu packages logging)
36742f43 66 #:use-module (gnu packages machine-learning)
db7a3444 67 #:use-module (gnu packages man)
c833ab55 68 #:use-module (gnu packages maths)
6c2b26e2 69 #:use-module (gnu packages mpi)
4e10a221 70 #:use-module (gnu packages ncurses)
81f3e0c1 71 #:use-module (gnu packages pcre)
ceb62d54 72 #:use-module (gnu packages parallel)
66e40e00 73 #:use-module (gnu packages pdf)
4e10a221
RW
74 #:use-module (gnu packages perl)
75 #:use-module (gnu packages pkg-config)
bfe3c685 76 #:use-module (gnu packages popt)
e4e5a4d8 77 #:use-module (gnu packages protobuf)
346a829a 78 #:use-module (gnu packages python)
ec946638 79 #:use-module (gnu packages readline)
9c38b540 80 #:use-module (gnu packages ruby)
84be3b99 81 #:use-module (gnu packages serialization)
c833ab55 82 #:use-module (gnu packages statistics)
d7678942 83 #:use-module (gnu packages tbb)
97b9da68 84 #:use-module (gnu packages tex)
db7a3444 85 #:use-module (gnu packages texinfo)
2127cedb 86 #:use-module (gnu packages textutils)
43c565d2 87 #:use-module (gnu packages time)
a2950fa4 88 #:use-module (gnu packages tls)
ce7155d5 89 #:use-module (gnu packages vim)
365c8153 90 #:use-module (gnu packages web)
c833ab55 91 #:use-module (gnu packages xml)
66e40e00 92 #:use-module (gnu packages xorg)
f7283db3
RW
93 #:use-module (gnu packages zip)
94 #:use-module (srfi srfi-1))
4e10a221 95
8dc797fa
BW
96(define-public aragorn
97 (package
98 (name "aragorn")
7d57ebaa 99 (version "1.2.37")
8dc797fa
BW
100 (source (origin
101 (method url-fetch)
102 (uri (string-append
103 "http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
104 version ".tgz"))
105 (sha256
106 (base32
7d57ebaa 107 "1c7zxk0h8wsjbix82fmmfyywcq6zn3q9h5y67kcl5y3qal2bv2xr"))))
8dc797fa
BW
108 (build-system gnu-build-system)
109 (arguments
110 `(#:tests? #f ; there are no tests
111 #:phases
112 (modify-phases %standard-phases
113 (delete 'configure)
114 (replace 'build
115 (lambda _
116 (zero? (system* "gcc"
117 "-O3"
118 "-ffast-math"
119 "-finline-functions"
120 "-o"
121 "aragorn"
122 (string-append "aragorn" ,version ".c")))))
123 (replace 'install
124 (lambda* (#:key outputs #:allow-other-keys)
125 (let* ((out (assoc-ref outputs "out"))
126 (bin (string-append out "/bin"))
127 (man (string-append out "/share/man/man1")))
128 (mkdir-p bin)
129 (copy-file "aragorn"
130 (string-append bin "/aragorn"))
131 (mkdir-p man)
132 (copy-file "aragorn.1"
133 (string-append man "/aragorn.1")))
134 #t)))))
135 (home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
136 (synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
137 (description
138 "Aragorn identifies transfer RNA, mitochondrial RNA and
139transfer-messenger RNA from nucleotide sequences, based on homology to known
140tRNA consensus sequences and RNA structure. It also outputs the secondary
141structure of the predicted RNA.")
142 (license license:gpl2)))
143
a12ba6e8
BW
144(define-public bamm
145 (package
146 (name "bamm")
4b6da268 147 (version "1.7.3")
a12ba6e8
BW
148 (source (origin
149 (method url-fetch)
150 ;; BamM is not available on pypi.
151 (uri (string-append
4b6da268 152 "https://github.com/Ecogenomics/BamM/archive/"
a12ba6e8
BW
153 version ".tar.gz"))
154 (file-name (string-append name "-" version ".tar.gz"))
155 (sha256
156 (base32
4b6da268 157 "1f35yxp4pc8aadsvbpg6r4kg2jh4fkjci0iby4iyljm6980sac0s"))
a12ba6e8
BW
158 (modules '((guix build utils)))
159 (snippet
160 `(begin
161 ;; Delete bundled htslib.
162 (delete-file-recursively "c/htslib-1.3.1")
163 #t))))
164 (build-system python-build-system)
165 (arguments
166 `(#:python ,python-2 ; BamM is Python 2 only.
167 ;; Do not use bundled libhts. Do use the bundled libcfu because it has
168 ;; been modified from its original form.
169 #:configure-flags
170 (let ((htslib (assoc-ref %build-inputs "htslib")))
171 (list "--with-libhts-lib" (string-append htslib "/lib")
172 "--with-libhts-inc" (string-append htslib "/include/htslib")))
173 #:phases
174 (modify-phases %standard-phases
175 (add-after 'unpack 'autogen
176 (lambda _
177 (with-directory-excursion "c"
178 (let ((sh (which "sh")))
179 ;; Use autogen so that 'configure' works.
180 (substitute* "autogen.sh" (("/bin/sh") sh))
181 (setenv "CONFIG_SHELL" sh)
182 (substitute* "configure" (("/bin/sh") sh))
183 (zero? (system* "./autogen.sh"))))))
184 (delete 'build)
185 ;; Run tests after installation so compilation only happens once.
186 (delete 'check)
187 (add-after 'install 'wrap-executable
188 (lambda* (#:key outputs #:allow-other-keys)
189 (let* ((out (assoc-ref outputs "out"))
190 (path (getenv "PATH")))
191 (wrap-program (string-append out "/bin/bamm")
192 `("PATH" ":" prefix (,path))))
193 #t))
194 (add-after 'wrap-executable 'post-install-check
195 (lambda* (#:key inputs outputs #:allow-other-keys)
196 (setenv "PATH"
197 (string-append (assoc-ref outputs "out")
198 "/bin:"
199 (getenv "PATH")))
200 (setenv "PYTHONPATH"
201 (string-append
202 (assoc-ref outputs "out")
203 "/lib/python"
204 (string-take (string-take-right
205 (assoc-ref inputs "python") 5) 3)
206 "/site-packages:"
207 (getenv "PYTHONPATH")))
208 ;; There are 2 errors printed, but they are safe to ignore:
209 ;; 1) [E::hts_open_format] fail to open file ...
210 ;; 2) samtools view: failed to open ...
211 (zero? (system* "nosetests")))))))
212 (native-inputs
213 `(("autoconf" ,autoconf)
214 ("automake" ,automake)
215 ("libtool" ,libtool)
216 ("zlib" ,zlib)
217 ("python-nose" ,python2-nose)
f3b98f4f 218 ("python-pysam" ,python2-pysam)))
a12ba6e8
BW
219 (inputs
220 `(("htslib" ,htslib)
221 ("samtools" ,samtools)
222 ("bwa" ,bwa)
223 ("grep" ,grep)
224 ("sed" ,sed)
225 ("coreutils" ,coreutils)))
226 (propagated-inputs
227 `(("python-numpy" ,python2-numpy)))
228 (home-page "http://ecogenomics.github.io/BamM/")
229 (synopsis "Metagenomics-focused BAM file manipulator")
230 (description
231 "BamM is a C library, wrapped in python, to efficiently generate and
232parse BAM files, specifically for the analysis of metagenomic data. For
233instance, it implements several methods to assess contig-wise read coverage.")
234 (license license:lgpl3+)))
235
9794180d
RW
236(define-public bamtools
237 (package
238 (name "bamtools")
239 (version "2.3.0")
240 (source (origin
241 (method url-fetch)
242 (uri (string-append
243 "https://github.com/pezmaster31/bamtools/archive/v"
244 version ".tar.gz"))
245 (file-name (string-append name "-" version ".tar.gz"))
246 (sha256
247 (base32
248 "1brry29bw2xr2l9pqn240rkqwayg85b8qq78zk2zs6nlspk4d018"))))
249 (build-system cmake-build-system)
4702cec2
RW
250 (arguments
251 `(#:tests? #f ;no "check" target
252 #:phases
253 (modify-phases %standard-phases
254 (add-before
255 'configure 'set-ldflags
256 (lambda* (#:key outputs #:allow-other-keys)
257 (setenv "LDFLAGS"
258 (string-append
259 "-Wl,-rpath="
260 (assoc-ref outputs "out") "/lib/bamtools")))))))
9794180d
RW
261 (inputs `(("zlib" ,zlib)))
262 (home-page "https://github.com/pezmaster31/bamtools")
263 (synopsis "C++ API and command-line toolkit for working with BAM data")
264 (description
265 "BamTools provides both a C++ API and a command-line toolkit for handling
266BAM files.")
267 (license license:expat)))
268
bdc7be59
MB
269(define-public bcftools
270 (package
271 (name "bcftools")
272 (version "1.3.1")
273 (source (origin
274 (method url-fetch)
275 (uri (string-append
276 "https://github.com/samtools/bcftools/releases/download/"
277 version "/bcftools-" version ".tar.bz2"))
278 (sha256
279 (base32
280 "095ry68vmz9q5s1scjsa698dhgyvgw5aicz24c19iwfbai07mhqj"))
281 (modules '((guix build utils)))
282 (snippet
283 ;; Delete bundled htslib.
284 '(delete-file-recursively "htslib-1.3.1"))))
285 (build-system gnu-build-system)
286 (arguments
287 `(#:test-target "test"
288 #:make-flags
289 (list
290 "USE_GPL=1"
291 (string-append "prefix=" (assoc-ref %outputs "out"))
292 (string-append "HTSDIR=" (assoc-ref %build-inputs "htslib") "/include")
293 (string-append "HTSLIB=" (assoc-ref %build-inputs "htslib") "/lib/libhts.a")
294 (string-append "BGZIP=" (assoc-ref %build-inputs "htslib") "/bin/bgzip")
295 (string-append "TABIX=" (assoc-ref %build-inputs "htslib") "/bin/tabix"))
296 #:phases
297 (modify-phases %standard-phases
298 (add-after 'unpack 'patch-Makefile
299 (lambda _
300 (substitute* "Makefile"
301 ;; Do not attempt to build htslib.
302 (("^include \\$\\(HTSDIR\\)/htslib\\.mk") "")
303 ;; Link against GSL cblas.
304 (("-lcblas") "-lgslcblas"))
305 #t))
306 (delete 'configure)
307 (add-before 'check 'patch-tests
308 (lambda _
309 (substitute* "test/test.pl"
310 (("/bin/bash") (which "bash")))
311 #t)))))
312 (native-inputs
313 `(("htslib" ,htslib)
314 ("perl" ,perl)))
315 (inputs
316 `(("gsl" ,gsl)
317 ("zlib" ,zlib)))
318 (home-page "https://samtools.github.io/bcftools/")
319 (synopsis "Utilities for variant calling and manipulating VCFs and BCFs")
320 (description
321 "BCFtools is a set of utilities that manipulate variant calls in the
322Variant Call Format (VCF) and its binary counterpart BCF. All commands work
323transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed.")
324 ;; The sources are dual MIT/GPL, but becomes GPL-only when USE_GPL=1.
325 (license (list license:gpl3+ license:expat))))
326
8dd4ff11
RW
327(define-public bedops
328 (package
329 (name "bedops")
1bbc3b1d 330 (version "2.4.14")
8dd4ff11
RW
331 (source (origin
332 (method url-fetch)
333 (uri (string-append "https://github.com/bedops/bedops/archive/v"
334 version ".tar.gz"))
f586c877 335 (file-name (string-append name "-" version ".tar.gz"))
8dd4ff11
RW
336 (sha256
337 (base32
1bbc3b1d 338 "1kqbac547wyqma81cyky9n7mkgikjpsfd3nnmcm6hpqwanqgh10v"))))
8dd4ff11
RW
339 (build-system gnu-build-system)
340 (arguments
341 '(#:tests? #f
342 #:make-flags (list (string-append "BINDIR=" %output "/bin"))
343 #:phases
344 (alist-cons-after
345 'unpack 'unpack-tarballs
346 (lambda _
347 ;; FIXME: Bedops includes tarballs of minimally patched upstream
348 ;; libraries jansson, zlib, and bzip2. We cannot just use stock
349 ;; libraries because at least one of the libraries (zlib) is
350 ;; patched to add a C++ function definition (deflateInit2cpp).
351 ;; Until the Bedops developers offer a way to link against system
352 ;; libraries we have to build the in-tree copies of these three
353 ;; libraries.
354
355 ;; See upstream discussion:
356 ;; https://github.com/bedops/bedops/issues/124
357
358 ;; Unpack the tarballs to benefit from shebang patching.
359 (with-directory-excursion "third-party"
360 (and (zero? (system* "tar" "xvf" "jansson-2.6.tar.bz2"))
361 (zero? (system* "tar" "xvf" "zlib-1.2.7.tar.bz2"))
362 (zero? (system* "tar" "xvf" "bzip2-1.0.6.tar.bz2"))))
363 ;; Disable unpacking of tarballs in Makefile.
364 (substitute* "system.mk/Makefile.linux"
365 (("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
366 (("\\./configure") "CONFIG_SHELL=bash ./configure"))
367 (substitute* "third-party/zlib-1.2.7/Makefile.in"
368 (("^SHELL=.*$") "SHELL=bash\n")))
369 (alist-delete 'configure %standard-phases))))
370 (home-page "https://github.com/bedops/bedops")
371 (synopsis "Tools for high-performance genomic feature operations")
372 (description
373 "BEDOPS is a suite of tools to address common questions raised in genomic
374studies---mostly with regard to overlap and proximity relationships between
375data sets. It aims to be scalable and flexible, facilitating the efficient
376and accurate analysis and management of large-scale genomic data.
377
378BEDOPS provides tools that perform highly efficient and scalable Boolean and
379other set operations, statistical calculations, archiving, conversion and
380other management of genomic data of arbitrary scale. Tasks can be easily
381split by chromosome for distributing whole-genome analyses across a
382computational cluster.")
383 (license license:gpl2+)))
384
81de5647
RW
385(define-public bedtools
386 (package
387 (name "bedtools")
d285657e 388 (version "2.26.0")
81de5647
RW
389 (source (origin
390 (method url-fetch)
391 (uri (string-append "https://github.com/arq5x/bedtools2/archive/v"
392 version ".tar.gz"))
f586c877 393 (file-name (string-append name "-" version ".tar.gz"))
81de5647
RW
394 (sha256
395 (base32
d285657e 396 "0xvri5hnp2iim1cx6mcd5d9f102p5ql41x69rd6106x1c17pinqm"))))
81de5647
RW
397 (build-system gnu-build-system)
398 (native-inputs `(("python" ,python-2)))
399 (inputs `(("samtools" ,samtools)
400 ("zlib" ,zlib)))
401 (arguments
402 '(#:test-target "test"
403 #:phases
6573ac82 404 (modify-phases %standard-phases
6573ac82
BW
405 (delete 'configure)
406 (replace 'install
407 (lambda* (#:key outputs #:allow-other-keys)
408 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
409 (for-each (lambda (file)
410 (install-file file bin))
411 (find-files "bin" ".*")))
412 #t)))))
81de5647
RW
413 (home-page "https://github.com/arq5x/bedtools2")
414 (synopsis "Tools for genome analysis and arithmetic")
415 (description
416 "Collectively, the bedtools utilities are a swiss-army knife of tools for
417a wide-range of genomics analysis tasks. The most widely-used tools enable
418genome arithmetic: that is, set theory on the genome. For example, bedtools
419allows one to intersect, merge, count, complement, and shuffle genomic
420intervals from multiple files in widely-used genomic file formats such as BAM,
421BED, GFF/GTF, VCF.")
422 (license license:gpl2)))
423
9a8f309c
RW
424;; Later releases of bedtools produce files with more columns than
425;; what Ribotaper expects.
426(define-public bedtools-2.18
427 (package (inherit bedtools)
428 (name "bedtools")
429 (version "2.18.0")
430 (source (origin
431 (method url-fetch)
432 (uri (string-append "https://github.com/arq5x/bedtools2/"
433 "archive/v" version ".tar.gz"))
434 (file-name (string-append name "-" version ".tar.gz"))
435 (sha256
436 (base32
437 "05vrnr8yp7swfagshzpgqmzk1blnwnq8pq5pckzi1m26w98d63vf"))))))
438
17dc32a4
RW
439(define-public ribotaper
440 (package
441 (name "ribotaper")
442 (version "1.3.1")
443 (source (origin
444 (method url-fetch)
445 (uri (string-append "https://ohlerlab.mdc-berlin.de/"
446 "files/RiboTaper/RiboTaper_Version_"
447 version ".tar.gz"))
448 (sha256
449 (base32
450 "0ykjbps1y3z3085q94npw8i9x5gldc6shy8vlc08v76zljsm07hv"))))
451 (build-system gnu-build-system)
452 (inputs
453 `(("bedtools" ,bedtools-2.18)
454 ("samtools" ,samtools-0.1)
455 ("r" ,r)
456 ("r-foreach" ,r-foreach)
457 ("r-xnomial" ,r-xnomial)
458 ("r-domc" ,r-domc)
459 ("r-multitaper" ,r-multitaper)
460 ("r-seqinr" ,r-seqinr)))
461 (home-page "https://ohlerlab.mdc-berlin.de/software/RiboTaper_126/")
462 (synopsis "Define translated ORFs using ribosome profiling data")
463 (description
464 "Ribotaper is a method for defining translated @dfn{open reading
465frames} (ORFs) using ribosome profiling (ribo-seq) data. This package
466provides the Ribotaper pipeline.")
467 (license license:gpl3+)))
468
a0a71439
RJ
469(define-public bioawk
470 (package
471 (name "bioawk")
472 (version "1.0")
473 (source (origin
474 (method url-fetch)
475 (uri (string-append "https://github.com/lh3/bioawk/archive/v"
476 version ".tar.gz"))
477 (file-name (string-append name "-" version ".tar.gz"))
478 (sha256
479 (base32 "1daizxsk17ahi9n58fj8vpgwyhzrzh54bzqhanjanp88kgrz7gjw"))))
480 (build-system gnu-build-system)
481 (inputs
482 `(("zlib" ,zlib)))
483 (native-inputs
484 `(("bison" ,bison)))
485 (arguments
486 `(#:tests? #f ; There are no tests to run.
487 ;; Bison must generate files, before other targets can build.
488 #:parallel-build? #f
489 #:phases
490 (modify-phases %standard-phases
491 (delete 'configure) ; There is no configure phase.
492 (replace 'install
493 (lambda* (#:key outputs #:allow-other-keys)
494 (let* ((out (assoc-ref outputs "out"))
495 (bin (string-append out "/bin"))
496 (man (string-append out "/share/man/man1")))
497 (mkdir-p man)
498 (copy-file "awk.1" (string-append man "/bioawk.1"))
499 (install-file "bioawk" bin)))))))
500 (home-page "https://github.com/lh3/bioawk")
501 (synopsis "AWK with bioinformatics extensions")
502 (description "Bioawk is an extension to Brian Kernighan's awk, adding the
503support of several common biological data formats, including optionally gzip'ed
504BED, GFF, SAM, VCF, FASTA/Q and TAB-delimited formats with column names. It
505also adds a few built-in functions and a command line option to use TAB as the
506input/output delimiter. When the new functionality is not used, bioawk is
507intended to behave exactly the same as the original BWK awk.")
508 (license license:x11)))
509
a2fb1492
RW
510(define-public python2-pybedtools
511 (package
512 (name "python2-pybedtools")
513 (version "0.6.9")
514 (source (origin
515 (method url-fetch)
516 (uri (string-append
517 "https://pypi.python.org/packages/source/p/pybedtools/pybedtools-"
518 version ".tar.gz"))
519 (sha256
520 (base32
521 "1ldzdxw1p4y3g2ignmggsdypvqkcwqwzhdha4rbgpih048z5p4an"))))
522 (build-system python-build-system)
523 (arguments `(#:python ,python-2)) ; no Python 3 support
524 (inputs
525 `(("python-cython" ,python2-cython)
526 ("python-matplotlib" ,python2-matplotlib)))
527 (propagated-inputs
528 `(("bedtools" ,bedtools)
529 ("samtools" ,samtools)))
530 (native-inputs
531 `(("python-pyyaml" ,python2-pyyaml)
f3b98f4f 532 ("python-nose" ,python2-nose)))
a2fb1492
RW
533 (home-page "https://pythonhosted.org/pybedtools/")
534 (synopsis "Python wrapper for BEDtools programs")
535 (description
536 "pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
537which are widely used for genomic interval manipulation or \"genome algebra\".
538pybedtools extends BEDTools by offering feature-level manipulations from with
539Python.")
540 (license license:gpl2+)))
541
9e12eba8
BW
542(define-public python-biom-format
543 (package
544 (name "python-biom-format")
545 (version "2.1.5")
546 (source
547 (origin
548 (method url-fetch)
549 ;; Use GitHub as source because PyPI distribution does not contain
550 ;; test data: https://github.com/biocore/biom-format/issues/693
551 (uri (string-append "https://github.com/biocore/biom-format/archive/"
552 version ".tar.gz"))
553 (file-name (string-append name "-" version ".tar.gz"))
554 (sha256
555 (base32
556 "1n25w3p1rixbpac8iysmzcja6m4ip5r6sz19l8y6wlwi49hxn278"))))
557 (build-system python-build-system)
de96ea28 558 (propagated-inputs
9e12eba8
BW
559 `(("python-numpy" ,python-numpy)
560 ("python-scipy" ,python-scipy)
561 ("python-future" ,python-future)
562 ("python-click" ,python-click)
563 ("python-h5py" ,python-h5py)))
564 (home-page "http://www.biom-format.org")
565 (synopsis "Biological Observation Matrix (BIOM) format utilities")
566 (description
567 "The BIOM file format is designed to be a general-use format for
568representing counts of observations e.g. operational taxonomic units, KEGG
569orthology groups or lipid types, in one or more biological samples
570e.g. microbiome samples, genomes, metagenomes.")
571 (license license:bsd-3)
572 (properties `((python2-variant . ,(delay python2-biom-format))))))
573
574(define-public python2-biom-format
575 (let ((base (package-with-python2 (strip-python2-variant python-biom-format))))
576 (package
577 (inherit base)
578 (arguments
579 `(#:phases
580 (modify-phases %standard-phases
581 ;; Do not require the unmaintained pyqi library.
582 (add-after 'unpack 'remove-pyqi
583 (lambda _
584 (substitute* "setup.py"
585 (("install_requires.append\\(\"pyqi\"\\)") "pass"))
586 #t)))
587 ,@(package-arguments base)))
588 (native-inputs `(("python2-setuptools" ,python2-setuptools)
589 ,@(package-native-inputs base))))))
590
f7283db3
RW
591(define-public bioperl-minimal
592 (let* ((inputs `(("perl-module-build" ,perl-module-build)
593 ("perl-data-stag" ,perl-data-stag)
594 ("perl-libwww" ,perl-libwww)
595 ("perl-uri" ,perl-uri)))
596 (transitive-inputs
597 (map (compose package-name cadr)
598 (delete-duplicates
599 (concatenate
600 (map (compose package-transitive-target-inputs cadr) inputs))))))
601 (package
602 (name "bioperl-minimal")
c70271ec 603 (version "1.7.0")
f7283db3
RW
604 (source
605 (origin
606 (method url-fetch)
c70271ec
RW
607 (uri (string-append "https://github.com/bioperl/bioperl-live/"
608 "archive/release-"
609 (string-map (lambda (c)
610 (if (char=? c #\.)
611 #\- c)) version)
612 ".tar.gz"))
f7283db3
RW
613 (sha256
614 (base32
c70271ec 615 "12phgpxwgkqflkwfb9dcqg7a31dpjlfhar8wcgv0aj5ln4akfz06"))))
f7283db3
RW
616 (build-system perl-build-system)
617 (arguments
618 `(#:phases
619 (modify-phases %standard-phases
620 (add-after
621 'install 'wrap-programs
622 (lambda* (#:key outputs #:allow-other-keys)
623 ;; Make sure all executables in "bin" find the required Perl
624 ;; modules at runtime. As the PERL5LIB variable contains also
625 ;; the paths of native inputs, we pick the transitive target
626 ;; inputs from %build-inputs.
627 (let* ((out (assoc-ref outputs "out"))
628 (bin (string-append out "/bin/"))
629 (path (string-join
630 (cons (string-append out "/lib/perl5/site_perl")
631 (map (lambda (name)
632 (assoc-ref %build-inputs name))
633 ',transitive-inputs))
634 ":")))
635 (for-each (lambda (file)
636 (wrap-program file
637 `("PERL5LIB" ":" prefix (,path))))
638 (find-files bin "\\.pl$"))
639 #t))))))
640 (inputs inputs)
641 (native-inputs
642 `(("perl-test-most" ,perl-test-most)))
643 (home-page "http://search.cpan.org/dist/BioPerl")
644 (synopsis "Bioinformatics toolkit")
645 (description
646 "BioPerl is the product of a community effort to produce Perl code which
647is useful in biology. Examples include Sequence objects, Alignment objects
648and database searching objects. These objects not only do what they are
649advertised to do in the documentation, but they also interact - Alignment
650objects are made from the Sequence objects, Sequence objects have access to
651Annotation and SeqFeature objects and databases, Blast objects can be
652converted to Alignment objects, and so on. This means that the objects
653provide a coordinated and extensible framework to do computational biology.")
654 (license (package-license perl)))))
655
85c37e29
RW
656(define-public python-biopython
657 (package
658 (name "python-biopython")
4ce60305 659 (version "1.68")
85c37e29
RW
660 (source (origin
661 (method url-fetch)
e815c094
BW
662 ;; use PyPi rather than biopython.org to ease updating
663 (uri (pypi-uri "biopython" version))
85c37e29
RW
664 (sha256
665 (base32
4ce60305 666 "07qc7nz0k77y8hf8s18rscvibvm91zw0kkq7ylrhisf8vp8hkp6i"))))
85c37e29 667 (build-system python-build-system)
4ce60305
BW
668 (arguments
669 `(#:phases
670 (modify-phases %standard-phases
671 (add-before 'check 'set-home
672 ;; Some tests require a home directory to be set.
673 (lambda _ (setenv "HOME" "/tmp") #t)))))
85c37e29
RW
674 (inputs
675 `(("python-numpy" ,python-numpy)))
85c37e29
RW
676 (home-page "http://biopython.org/")
677 (synopsis "Tools for biological computation in Python")
678 (description
679 "Biopython is a set of tools for biological computation including parsers
680for bioinformatics files into Python data structures; interfaces to common
681bioinformatics programs; a standard sequence class and tools for performing
682common operations on them; code to perform data classification; code for
683dealing with alignments; code making it easy to split up parallelizable tasks
684into separate processes; and more.")
8c8da138
BW
685 (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))
686 (properties `((python2-variant . ,(delay python2-biopython))))))
85c37e29
RW
687
688(define-public python2-biopython
8c8da138
BW
689 (let ((base (package-with-python2 (strip-python2-variant python-biopython))))
690 (package
691 (inherit base)
692 (native-inputs `(("python2-setuptools" ,python2-setuptools)
693 ,@(package-native-inputs base))))))
85c37e29 694
4b1a1528
BW
695;; An outdated version of biopython is required for seqmagick, see
696;; https://github.com/fhcrc/seqmagick/issues/59
697;; When that issue has been resolved this package should be removed.
698(define python2-biopython-1.66
699 (package
700 (inherit python2-biopython)
701 (version "1.66")
702 (source (origin
703 (method url-fetch)
704 (uri (pypi-uri "biopython" version))
705 (sha256
706 (base32
707 "1gdv92593klimg22icf5j9by7xiq86jnwzkpz4abaa05ylkdf6hp"))))))
708
985d8411
BW
709(define-public bpp-core
710 ;; The last release was in 2014 and the recommended way to install from source
711 ;; is to clone the git repository, so we do this.
712 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
713 (let ((commit "7d8bced0d1a87291ea8dd7046b7fb5ff9c35c582"))
714 (package
715 (name "bpp-core")
716 (version (string-append "2.2.0-1." (string-take commit 7)))
717 (source (origin
718 (method git-fetch)
719 (uri (git-reference
720 (url "http://biopp.univ-montp2.fr/git/bpp-core")
721 (commit commit)))
722 (file-name (string-append name "-" version "-checkout"))
723 (sha256
724 (base32
725 "10djsq5vlnkilv436gnmh4irpk49v29pa69r6xiryg32xmvn909j"))))
726 (build-system cmake-build-system)
727 (arguments
728 `(#:parallel-build? #f))
729 (inputs
730 `(("gcc" ,gcc-5))) ; Compilation of bpp-phyl fails with GCC 4.9 so we
731 ; compile all of the bpp packages with GCC 5.
732 (home-page "http://biopp.univ-montp2.fr")
733 (synopsis "C++ libraries for Bioinformatics")
734 (description
735 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
736analysis, phylogenetics, molecular evolution and population genetics. It is
737Object Oriented and is designed to be both easy to use and computer efficient.
738Bio++ intends to help programmers to write computer expensive programs, by
739providing them a set of re-usable tools.")
740 (license license:cecill-c))))
741
8b5f4d57
BW
742(define-public bpp-phyl
743 ;; The last release was in 2014 and the recommended way to install from source
744 ;; is to clone the git repository, so we do this.
745 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
746 (let ((commit "0c07167b629f68b569bf274d1ad0c4af83276ae2"))
747 (package
748 (name "bpp-phyl")
749 (version (string-append "2.2.0-1." (string-take commit 7)))
750 (source (origin
751 (method git-fetch)
752 (uri (git-reference
753 (url "http://biopp.univ-montp2.fr/git/bpp-phyl")
754 (commit commit)))
755 (file-name (string-append name "-" version "-checkout"))
756 (sha256
757 (base32
758 "1ssjgchzwj3iai26kyly7gwkdv8sk59nqhkb1wpap3sf5m6kyllh"))))
759 (build-system cmake-build-system)
760 (arguments
761 `(#:parallel-build? #f
762 ;; If out-of-source, test data is not copied into the build directory
763 ;; so the tests fail.
764 #:out-of-source? #f))
765 (inputs
766 `(("bpp-core" ,bpp-core)
767 ("bpp-seq" ,bpp-seq)
768 ;; GCC 4.8 fails due to an 'internal compiler error', so we use a more
769 ;; modern GCC.
770 ("gcc" ,gcc-5)))
771 (home-page "http://biopp.univ-montp2.fr")
772 (synopsis "Bio++ phylogenetic Library")
773 (description
774 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
775analysis, phylogenetics, molecular evolution and population genetics. This
776library provides phylogenetics-related modules.")
777 (license license:cecill-c))))
778
159a7016
BW
779(define-public bpp-popgen
780 ;; The last release was in 2014 and the recommended way to install from source
781 ;; is to clone the git repository, so we do this.
782 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
783 (let ((commit "e472bac9b1a148803895d747cd6d0c5904f85d9f"))
784 (package
785 (name "bpp-popgen")
786 (version (string-append "2.2.0-1." (string-take commit 7)))
787 (source (origin
788 (method git-fetch)
789 (uri (git-reference
790 (url "http://biopp.univ-montp2.fr/git/bpp-popgen")
791 (commit commit)))
792 (file-name (string-append name "-" version "-checkout"))
793 (sha256
794 (base32
795 "0yn82dzn1n5629nzja68xfrhi655709rjanyryb36vzkmymy6dw5"))))
796 (build-system cmake-build-system)
797 (arguments
798 `(#:parallel-build? #f
799 #:tests? #f)) ; There are no tests.
800 (inputs
801 `(("bpp-core" ,bpp-core)
802 ("bpp-seq" ,bpp-seq)
803 ("gcc" ,gcc-5)))
804 (home-page "http://biopp.univ-montp2.fr")
805 (synopsis "Bio++ population genetics library")
806 (description
807 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
808analysis, phylogenetics, molecular evolution and population genetics. This
809library provides population genetics-related modules.")
810 (license license:cecill-c))))
811
70f1bc05
BW
812(define-public bpp-seq
813 ;; The last release was in 2014 and the recommended way to install from source
814 ;; is to clone the git repository, so we do this.
815 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
816 (let ((commit "6cfa07965ce152e5598a89df2fa80a75973bfa33"))
817 (package
818 (name "bpp-seq")
819 (version (string-append "2.2.0-1." (string-take commit 7)))
820 (source (origin
821 (method git-fetch)
822 (uri (git-reference
823 (url "http://biopp.univ-montp2.fr/git/bpp-seq")
824 (commit commit)))
825 (file-name (string-append name "-" version "-checkout"))
826 (sha256
827 (base32
828 "1nys5jq7jqvdg40d91wsmj3q2yzy4276cp7sp44n67p468f27zf2"))))
829 (build-system cmake-build-system)
830 (arguments
831 `(#:parallel-build? #f
832 ;; If out-of-source, test data is not copied into the build directory
833 ;; so the tests fail.
834 #:out-of-source? #f))
835 (inputs
836 `(("bpp-core" ,bpp-core)
837 ("gcc" ,gcc-5))) ; Use GCC 5 as per 'bpp-core'.
838 (home-page "http://biopp.univ-montp2.fr")
839 (synopsis "Bio++ sequence library")
840 (description
841 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
842analysis, phylogenetics, molecular evolution and population genetics. This
843library provides sequence-related modules.")
844 (license license:cecill-c))))
845
db7a3444
BW
846(define-public bppsuite
847 ;; The last release was in 2014 and the recommended way to install from source
848 ;; is to clone the git repository, so we do this.
849 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
850 (let ((commit "c516147f57aa50961121cd505bed52cd7603698b"))
851 (package
852 (name "bppsuite")
853 (version (string-append "2.2.0-1." (string-take commit 7)))
854 (source (origin
855 (method git-fetch)
856 (uri (git-reference
857 (url "http://biopp.univ-montp2.fr/git/bppsuite")
858 (commit commit)))
859 (file-name (string-append name "-" version "-checkout"))
860 (sha256
861 (base32
862 "1y87pxvw0jxjizhq2dr9g2r91md45k1p9ih2sl1yy1y3p934l2kb"))))
863 (build-system cmake-build-system)
864 (arguments
865 `(#:parallel-build? #f
866 #:tests? #f)) ; There are no tests.
867 (native-inputs
868 `(("groff" ,groff)
869 ("man-db" ,man-db)
870 ("texinfo" ,texinfo)))
871 (inputs
872 `(("bpp-core" ,bpp-core)
873 ("bpp-seq" ,bpp-seq)
874 ("bpp-phyl" ,bpp-phyl)
875 ("bpp-phyl" ,bpp-popgen)
876 ("gcc" ,gcc-5)))
877 (home-page "http://biopp.univ-montp2.fr")
878 (synopsis "Bioinformatics tools written with the Bio++ libraries")
879 (description
880 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
881analysis, phylogenetics, molecular evolution and population genetics. This
882package provides command line tools using the Bio++ library.")
883 (license license:cecill-c))))
884
82c370de
RW
885(define-public blast+
886 (package
887 (name "blast+")
8dec2229 888 (version "2.4.0")
82c370de
RW
889 (source (origin
890 (method url-fetch)
891 (uri (string-append
892 "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
893 version "/ncbi-blast-" version "+-src.tar.gz"))
894 (sha256
895 (base32
8dec2229 896 "14n9jik6vhiwjd3m7bach4xj1pzfn0szbsbyfxybd9l9cc43b6mb"))
82c370de
RW
897 (modules '((guix build utils)))
898 (snippet
899 '(begin
900 ;; Remove bundled bzip2 and zlib
901 (delete-file-recursively "c++/src/util/compress/bzip2")
902 (delete-file-recursively "c++/src/util/compress/zlib")
903 (substitute* "c++/src/util/compress/Makefile.in"
904 (("bzip2 zlib api") "api"))
905 ;; Remove useless msbuild directory
906 (delete-file-recursively
907 "c++/src/build-system/project_tree_builder/msbuild")
908 #t))))
909 (build-system gnu-build-system)
910 (arguments
911 `(;; There are three(!) tests for this massive library, and all fail with
912 ;; "unparsable timing stats".
913 ;; ERR [127] -- [util/regexp] test_pcre.sh (unparsable timing stats)
914 ;; ERR [127] -- [serial/datatool] datatool.sh (unparsable timing stats)
915 ;; ERR [127] -- [serial/datatool] datatool_xml.sh (unparsable timing stats)
916 #:tests? #f
917 #:out-of-source? #t
918 #:parallel-build? #f ; not supported
919 #:phases
920 (modify-phases %standard-phases
921 (add-before
922 'configure 'set-HOME
923 ;; $HOME needs to be set at some point during the configure phase
924 (lambda _ (setenv "HOME" "/tmp") #t))
925 (add-after
926 'unpack 'enter-dir
927 (lambda _ (chdir "c++") #t))
928 (add-after
929 'enter-dir 'fix-build-system
930 (lambda _
931 (define (which* cmd)
932 (cond ((string=? cmd "date")
933 ;; make call to "date" deterministic
934 "date -d @0")
935 ((which cmd)
936 => identity)
937 (else
938 (format (current-error-port)
939 "WARNING: Unable to find absolute path for ~s~%"
940 cmd)
941 #f)))
942
943 ;; Rewrite hardcoded paths to various tools
944 (substitute* (append '("src/build-system/configure.ac"
945 "src/build-system/configure"
946 "scripts/common/impl/if_diff.sh"
947 "scripts/common/impl/run_with_lock.sh"
948 "src/build-system/Makefile.configurables.real"
949 "src/build-system/Makefile.in.top"
950 "src/build-system/Makefile.meta.gmake=no"
951 "src/build-system/Makefile.meta.in"
952 "src/build-system/Makefile.meta_l"
953 "src/build-system/Makefile.meta_p"
954 "src/build-system/Makefile.meta_r"
955 "src/build-system/Makefile.mk.in"
956 "src/build-system/Makefile.requirements"
957 "src/build-system/Makefile.rules_with_autodep.in")
958 (find-files "scripts/common/check" "\\.sh$"))
959 (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
960 (or (which* cmd) all)))
961
962 (substitute* (find-files "src/build-system" "^config.*")
963 (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
964 (("^PATH=.*") ""))
965
966 ;; rewrite "/var/tmp" in check script
967 (substitute* "scripts/common/check/check_make_unix.sh"
968 (("/var/tmp") "/tmp"))
969
970 ;; do not reset PATH
971 (substitute* (find-files "scripts/common/impl/" "\\.sh$")
972 (("^ *PATH=.*") "")
973 (("action=/bin/") "action=")
974 (("export PATH") ":"))
975 #t))
976 (replace
977 'configure
978 (lambda* (#:key inputs outputs #:allow-other-keys)
979 (let ((out (assoc-ref outputs "out"))
980 (lib (string-append (assoc-ref outputs "lib") "/lib"))
981 (include (string-append (assoc-ref outputs "include")
982 "/include/ncbi-tools++")))
983 ;; The 'configure' script doesn't recognize things like
984 ;; '--enable-fast-install'.
985 (zero? (system* "./configure.orig"
986 (string-append "--with-build-root=" (getcwd) "/build")
987 (string-append "--prefix=" out)
988 (string-append "--libdir=" lib)
989 (string-append "--includedir=" include)
990 (string-append "--with-bz2="
991 (assoc-ref inputs "bzip2"))
992 (string-append "--with-z="
993 (assoc-ref inputs "zlib"))
994 ;; Each library is built twice by default, once
995 ;; with "-static" in its name, and again
996 ;; without.
997 "--without-static"
998 "--with-dll"))))))))
999 (outputs '("out" ; 19 MB
1000 "lib" ; 203 MB
1001 "include")) ; 32 MB
1002 (inputs
1003 `(("bzip2" ,bzip2)
1004 ("zlib" ,zlib)))
1005 (native-inputs
1006 `(("cpio" ,cpio)))
1007 (home-page "http://blast.ncbi.nlm.nih.gov")
1008 (synopsis "Basic local alignment search tool")
1009 (description
1010 "BLAST is a popular method of performing a DNA or protein sequence
1011similarity search, using heuristics to produce results quickly. It also
1012calculates an “expect value” that estimates how many matches would have
1013occurred at a given score by chance, which can aid a user in judging how much
1014confidence to have in an alignment.")
1015 ;; Most of the sources are in the public domain, with the following
1016 ;; exceptions:
1017 ;; * Expat:
1018 ;; * ./c++/include/util/bitset/
1019 ;; * ./c++/src/html/ncbi_menu*.js
1020 ;; * Boost license:
1021 ;; * ./c++/include/util/impl/floating_point_comparison.hpp
1022 ;; * LGPL 2+:
1023 ;; * ./c++/include/dbapi/driver/odbc/unix_odbc/
1024 ;; * ASL 2.0:
1025 ;; * ./c++/src/corelib/teamcity_*
1026 (license (list license:public-domain
1027 license:expat
1028 license:boost1.0
1029 license:lgpl2.0+
1030 license:asl2.0))))
1031
6c2b26e2
RW
1032(define-public bless
1033 (package
1034 (name "bless")
1035 (version "1p02")
1036 (source (origin
1037 (method url-fetch)
1038 (uri (string-append "mirror://sourceforge/bless-ec/bless.v"
1039 version ".tgz"))
1040 (sha256
1041 (base32
4d75e03a
RW
1042 "0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
1043 (modules '((guix build utils)))
6c2b26e2
RW
1044 (snippet
1045 `(begin
1046 ;; Remove bundled boost, pigz, zlib, and .git directory
953c1223
RW
1047 ;; FIXME: also remove bundled sources for murmurhash3 and
1048 ;; kmc once packaged.
6c2b26e2
RW
1049 (delete-file-recursively "boost")
1050 (delete-file-recursively "pigz")
953c1223 1051 (delete-file-recursively "google-sparsehash")
6c2b26e2
RW
1052 (delete-file-recursively "zlib")
1053 (delete-file-recursively ".git")
1054 #t))))
1055 (build-system gnu-build-system)
1056 (arguments
1057 '(#:tests? #f ;no "check" target
1058 #:make-flags
1059 (list (string-append "ZLIB="
1060 (assoc-ref %build-inputs "zlib")
1061 "/lib/libz.a")
1062 (string-append "LDFLAGS="
1063 (string-join '("-lboost_filesystem"
1064 "-lboost_system"
1065 "-lboost_iostreams"
1066 "-lz"
1067 "-fopenmp"
1068 "-std=c++11"))))
1069 #:phases
1070 (modify-phases %standard-phases
1071 (add-after 'unpack 'do-not-build-bundled-pigz
1072 (lambda* (#:key inputs outputs #:allow-other-keys)
1073 (substitute* "Makefile"
1074 (("cd pigz/pigz-2.3.3; make") ""))
1075 #t))
1076 (add-after 'unpack 'patch-paths-to-executables
1077 (lambda* (#:key inputs outputs #:allow-other-keys)
1078 (substitute* "parse_args.cpp"
1079 (("kmc_binary = .*")
1080 (string-append "kmc_binary = \""
1081 (assoc-ref outputs "out")
1082 "/bin/kmc\";"))
1083 (("pigz_binary = .*")
1084 (string-append "pigz_binary = \""
1085 (assoc-ref inputs "pigz")
1086 "/bin/pigz\";")))
1087 #t))
1088 (replace 'install
1089 (lambda* (#:key outputs #:allow-other-keys)
1090 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
1091 (for-each (lambda (file)
1092 (install-file file bin))
1093 '("bless" "kmc/bin/kmc"))
1094 #t)))
1095 (delete 'configure))))
1096 (native-inputs
1097 `(("perl" ,perl)))
1098 (inputs
1099 `(("openmpi" ,openmpi)
1100 ("boost" ,boost)
953c1223 1101 ("sparsehash" ,sparsehash)
6c2b26e2
RW
1102 ("pigz" ,pigz)
1103 ("zlib" ,zlib)))
9641a899 1104 (supported-systems '("x86_64-linux"))
4d75e03a 1105 (home-page "http://sourceforge.net/p/bless-ec/wiki/Home/")
6c2b26e2
RW
1106 (synopsis "Bloom-filter-based error correction tool for NGS reads")
1107 (description
1108 "@dfn{Bloom-filter-based error correction solution for high-throughput
1109sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
1110correction tool for genomic reads produced by @dfn{Next-generation
1111sequencing} (NGS). BLESS produces accurate correction results with much less
1112memory compared with previous solutions and is also able to tolerate a higher
1113false-positive rate. BLESS can extend reads like DNA assemblers to correct
1114errors at the end of reads.")
1115 (license license:gpl3+)))
1116
2c7ee167
RW
1117(define-public bowtie
1118 (package
1119 (name "bowtie")
2642231b 1120 (version "2.2.9")
2c7ee167
RW
1121 (source (origin
1122 (method url-fetch)
1123 (uri (string-append "https://github.com/BenLangmead/bowtie2/archive/v"
1124 version ".tar.gz"))
f586c877 1125 (file-name (string-append name "-" version ".tar.gz"))
2c7ee167
RW
1126 (sha256
1127 (base32
2642231b 1128 "1vp5db8i7is57iwjybcdg18f5ivyzlj5g1ix1nlvxainzivhz55g"))
2c7ee167
RW
1129 (modules '((guix build utils)))
1130 (snippet
1131 '(substitute* "Makefile"
2c7ee167
RW
1132 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
1133 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
0047d26a 1134 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))))
2c7ee167
RW
1135 (build-system gnu-build-system)
1136 (inputs `(("perl" ,perl)
1137 ("perl-clone" ,perl-clone)
1138 ("perl-test-deep" ,perl-test-deep)
1139 ("perl-test-simple" ,perl-test-simple)
0047d26a
RW
1140 ("python" ,python-2)
1141 ("tbb" ,tbb)))
2c7ee167 1142 (arguments
0047d26a
RW
1143 '(#:make-flags
1144 (list "allall"
1145 "WITH_TBB=1"
1146 (string-append "prefix=" (assoc-ref %outputs "out")))
2c7ee167
RW
1147 #:phases
1148 (alist-delete
1149 'configure
1150 (alist-replace
0047d26a 1151 'check
2c7ee167 1152 (lambda* (#:key outputs #:allow-other-keys)
0047d26a
RW
1153 (system* "perl"
1154 "scripts/test/simple_tests.pl"
1155 "--bowtie2=./bowtie2"
1156 "--bowtie2-build=./bowtie2-build"))
1157 %standard-phases))))
2c7ee167
RW
1158 (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
1159 (synopsis "Fast and sensitive nucleotide sequence read aligner")
1160 (description
1161 "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
1162reads to long reference sequences. It is particularly good at aligning reads
1163of about 50 up to 100s or 1,000s of characters, and particularly good at
1164aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
1165genome with an FM Index to keep its memory footprint small: for the human
1166genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
1167gapped, local, and paired-end alignment modes.")
241e1221 1168 (supported-systems '("x86_64-linux"))
2c7ee167
RW
1169 (license license:gpl3+)))
1170
94ce537e
RW
1171(define-public tophat
1172 (package
1173 (name "tophat")
1174 (version "2.1.0")
1175 (source (origin
1176 (method url-fetch)
1177 (uri (string-append
1178 "http://ccb.jhu.edu/software/tophat/downloads/tophat-"
1179 version ".tar.gz"))
1180 (sha256
1181 (base32
1182 "168zlzykq622zbgkh90a90f1bdgsxkscq2zxzbj8brq80hbjpyp7"))
fc1adab1 1183 (patches (search-patches "tophat-build-with-later-seqan.patch"))
94ce537e
RW
1184 (modules '((guix build utils)))
1185 (snippet
1186 '(begin
1187 ;; Remove bundled SeqAn and samtools
1188 (delete-file-recursively "src/SeqAn-1.3")
1189 (delete-file-recursively "src/samtools-0.1.18")
1190 #t))))
1191 (build-system gnu-build-system)
1192 (arguments
1193 '(#:parallel-build? #f ; not supported
1194 #:phases
1195 (modify-phases %standard-phases
1196 (add-after 'unpack 'use-system-samtools
1197 (lambda* (#:key inputs #:allow-other-keys)
1198 (substitute* "src/Makefile.in"
1199 (("(noinst_LIBRARIES = )\\$\\(SAMLIB\\)" _ prefix) prefix)
1200 (("\\$\\(SAMPROG\\): \\$\\(SAMLIB\\)") "")
1201 (("SAMPROG = samtools_0\\.1\\.18") "")
1202 (("\\$\\(samtools_0_1_18_SOURCES\\)") "")
1203 (("am__EXEEXT_1 = samtools_0\\.1\\.18\\$\\(EXEEXT\\)") ""))
1204 (substitute* '("src/common.cpp"
1205 "src/tophat.py")
1206 (("samtools_0.1.18") (which "samtools")))
1207 (substitute* '("src/common.h"
1208 "src/bam2fastx.cpp")
1209 (("#include \"bam.h\"") "#include <samtools/bam.h>")
1210 (("#include \"sam.h\"") "#include <samtools/sam.h>"))
1211 (substitute* '("src/bwt_map.h"
1212 "src/map2gtf.h"
1213 "src/align_status.h")
1214 (("#include <bam.h>") "#include <samtools/bam.h>")
1215 (("#include <sam.h>") "#include <samtools/sam.h>"))
1216 #t)))))
1217 (inputs
1218 `(("boost" ,boost)
1219 ("bowtie" ,bowtie)
1220 ("samtools" ,samtools-0.1)
1221 ("ncurses" ,ncurses)
1222 ("python" ,python-2)
1223 ("perl" ,perl)
1224 ("zlib" ,zlib)
1225 ("seqan" ,seqan)))
1226 (home-page "http://ccb.jhu.edu/software/tophat/index.shtml")
1227 (synopsis "Spliced read mapper for RNA-Seq data")
1228 (description
1229 "TopHat is a fast splice junction mapper for nucleotide sequence
1230reads produced by the RNA-Seq method. It aligns RNA-Seq reads to
1231mammalian-sized genomes using the ultra high-throughput short read
1232aligner Bowtie, and then analyzes the mapping results to identify
1233splice junctions between exons.")
1234 ;; TopHat is released under the Boost Software License, Version 1.0
1235 ;; See https://github.com/infphilo/tophat/issues/11#issuecomment-121589893
1236 (license license:boost1.0)))
1237
9a8336d8
RW
1238(define-public bwa
1239 (package
1240 (name "bwa")
1241 (version "0.7.12")
1242 (source (origin
1243 (method url-fetch)
1244 (uri (string-append "mirror://sourceforge/bio-bwa/bwa-"
1245 version ".tar.bz2"))
1246 (sha256
1247 (base32
1248 "1330dpqncv0px3pbhjzz1gwgg39kkcv2r9qp2xs0sixf8z8wl7bh"))))
1249 (build-system gnu-build-system)
1250 (arguments
1251 '(#:tests? #f ;no "check" target
1252 #:phases
1253 (alist-replace
1254 'install
1255 (lambda* (#:key outputs #:allow-other-keys)
1256 (let ((bin (string-append
1257 (assoc-ref outputs "out") "/bin"))
1258 (doc (string-append
1259 (assoc-ref outputs "out") "/share/doc/bwa"))
1260 (man (string-append
1261 (assoc-ref outputs "out") "/share/man/man1")))
1262 (mkdir-p bin)
1263 (mkdir-p doc)
1264 (mkdir-p man)
96c46210
LC
1265 (install-file "bwa" bin)
1266 (install-file "README.md" doc)
1267 (install-file "bwa.1" man)))
9a8336d8
RW
1268 ;; no "configure" script
1269 (alist-delete 'configure %standard-phases))))
1270 (inputs `(("zlib" ,zlib)))
db94f8c7
RW
1271 ;; Non-portable SSE instructions are used so building fails on platforms
1272 ;; other than x86_64.
1273 (supported-systems '("x86_64-linux"))
9a8336d8
RW
1274 (home-page "http://bio-bwa.sourceforge.net/")
1275 (synopsis "Burrows-Wheeler sequence aligner")
1276 (description
1277 "BWA is a software package for mapping low-divergent sequences against a
1278large reference genome, such as the human genome. It consists of three
1279algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
1280designed for Illumina sequence reads up to 100bp, while the rest two for
1281longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
1282features such as long-read support and split alignment, but BWA-MEM, which is
1283the latest, is generally recommended for high-quality queries as it is faster
1284and more accurate. BWA-MEM also has better performance than BWA-backtrack for
128570-100bp Illumina reads.")
1286 (license license:gpl3+)))
1287
d29150b5
RW
1288(define-public bwa-pssm
1289 (package (inherit bwa)
1290 (name "bwa-pssm")
1291 (version "0.5.11")
1292 (source (origin
1293 (method url-fetch)
1294 (uri (string-append "https://github.com/pkerpedjiev/bwa-pssm/"
1295 "archive/" version ".tar.gz"))
1296 (file-name (string-append name "-" version ".tar.gz"))
1297 (sha256
1298 (base32
1299 "02p7mpbs4mlxmn84g2x4ghak638vbj4lqix2ipx5g84pz9bhdavg"))))
1300 (build-system gnu-build-system)
1301 (inputs
1302 `(("gdsl" ,gdsl)
1303 ("zlib" ,zlib)
1304 ("perl" ,perl)))
1305 (home-page "http://bwa-pssm.binf.ku.dk/")
1306 (synopsis "Burrows-Wheeler transform-based probabilistic short read mapper")
1307 (description
1308 "BWA-PSSM is a probabilistic short genomic sequence read aligner based on
1309the use of @dfn{position specific scoring matrices} (PSSM). Like many of the
1310existing aligners it is fast and sensitive. Unlike most other aligners,
1311however, it is also adaptible in the sense that one can direct the alignment
1312based on known biases within the data set. It is coded as a modification of
1313the original BWA alignment program and shares the genome index structure as
1314well as many of the command line options.")
1315 (license license:gpl3+)))
1316
ad641d53
RW
1317(define-public python2-bx-python
1318 (package
1319 (name "python2-bx-python")
1320 (version "0.7.2")
1321 (source (origin
1322 (method url-fetch)
1323 (uri (string-append
1324 "https://pypi.python.org/packages/source/b/bx-python/bx-python-"
1325 version ".tar.gz"))
1326 (sha256
1327 (base32
1328 "0ld49idhc5zjdvbhvjq1a2qmpjj7h5v58rqr25dzmfq7g34b50xh"))
1329 (modules '((guix build utils)))
1330 (snippet
1331 '(substitute* "setup.py"
1332 ;; remove dependency on outdated "distribute" module
1333 (("^from distribute_setup import use_setuptools") "")
1334 (("^use_setuptools\\(\\)") "")))))
1335 (build-system python-build-system)
1336 (arguments
1337 `(#:tests? #f ;tests fail because test data are not included
1338 #:python ,python-2))
1339 (inputs
1340 `(("python-numpy" ,python2-numpy)
1341 ("zlib" ,zlib)))
1342 (native-inputs
f3b98f4f 1343 `(("python-nose" ,python2-nose)))
ad641d53
RW
1344 (home-page "http://bitbucket.org/james_taylor/bx-python/")
1345 (synopsis "Tools for manipulating biological data")
1346 (description
1347 "bx-python provides tools for manipulating biological data, particularly
1348multiple sequence alignments.")
1349 (license license:expat)))
1350
55a9a8c2
RW
1351(define-public python-pysam
1352 (package
1353 (name "python-pysam")
fd49eb21 1354 (version "0.9.1.4")
d454640c
RW
1355 (source (origin
1356 (method url-fetch)
f536dce5
MB
1357 ;; Test data is missing on PyPi.
1358 (uri (string-append
1359 "https://github.com/pysam-developers/pysam/archive/v"
1360 version ".tar.gz"))
1361 (file-name (string-append name "-" version ".tar.gz"))
d454640c
RW
1362 (sha256
1363 (base32
f536dce5 1364 "0y41ssbg6nvn2jgcbnrvkzblpjcwszaiv1rgyd8dwzjkrbfsgsmc"))
dff26b23
MB
1365 (modules '((guix build utils)))
1366 (snippet
1367 ;; Drop bundled htslib. TODO: Also remove samtools and bcftools.
1368 '(delete-file-recursively "htslib"))))
55a9a8c2
RW
1369 (build-system python-build-system)
1370 (arguments
f536dce5 1371 `(#:phases
397d463a
MB
1372 (modify-phases %standard-phases
1373 (add-before 'build 'set-flags
dff26b23
MB
1374 (lambda* (#:key inputs #:allow-other-keys)
1375 (setenv "HTSLIB_MODE" "external")
1376 (setenv "HTSLIB_LIBRARY_DIR"
1377 (string-append (assoc-ref inputs "htslib") "/lib"))
1378 (setenv "HTSLIB_INCLUDE_DIR"
1379 (string-append (assoc-ref inputs "htslib") "/include"))
397d463a
MB
1380 (setenv "LDFLAGS" "-lncurses")
1381 (setenv "CFLAGS" "-D_CURSES_LIB=1")
f536dce5
MB
1382 #t))
1383 (delete 'check)
1384 (add-after 'install 'check
1385 (lambda* (#:key inputs outputs #:allow-other-keys)
1386 (setenv "PYTHONPATH"
1387 (string-append
1388 (getenv "PYTHONPATH")
1389 ":" (assoc-ref outputs "out")
1390 "/lib/python"
1391 (string-take (string-take-right
1392 (assoc-ref inputs "python") 5) 3)
1393 "/site-packages"))
1394 ;; Step out of source dir so python does not import from CWD.
1395 (chdir "tests")
1396 (setenv "HOME" "/tmp")
1397 (and (zero? (system* "make" "-C" "pysam_data"))
1398 (zero? (system* "make" "-C" "cbcf_data"))
1399 (zero? (system* "nosetests" "-v"))))))))
dff26b23
MB
1400 (propagated-inputs
1401 `(("htslib" ,htslib))) ; Included from installed header files.
55a9a8c2 1402 (inputs
649e9b3b 1403 `(("ncurses" ,ncurses)
55a9a8c2 1404 ("zlib" ,zlib)))
649e9b3b
RW
1405 (native-inputs
1406 `(("python-cython" ,python-cython)
f536dce5
MB
1407 ;; Dependencies below are are for tests only.
1408 ("samtools" ,samtools)
1409 ("bcftools" ,bcftools)
1410 ("python-nose" ,python-nose)))
55a9a8c2
RW
1411 (home-page "https://github.com/pysam-developers/pysam")
1412 (synopsis "Python bindings to the SAMtools C API")
1413 (description
1414 "Pysam is a Python module for reading and manipulating files in the
1415SAM/BAM format. Pysam is a lightweight wrapper of the SAMtools C API. It
1416also includes an interface for tabix.")
1417 (license license:expat)))
1418
1419(define-public python2-pysam
1420 (package-with-python2 python-pysam))
1421
4db9433a
RW
1422(define-public python-twobitreader
1423 (package
1424 (name "python-twobitreader")
044ac8d2 1425 (version "3.1.4")
4db9433a
RW
1426 (source (origin
1427 (method url-fetch)
1428 (uri (pypi-uri "twobitreader" version))
1429 (sha256
1430 (base32
044ac8d2 1431 "1q8wnj2kga9nz1lwc4w7qv52smfm536hp6mc8w6s53lhyj0mpi22"))))
4db9433a
RW
1432 (properties `((python2-variant . ,(delay python2-twobitreader))))
1433 (build-system python-build-system)
900fb8d0
LF
1434 (arguments
1435 '(;; Tests are not distributed in the PyPi release.
1436 ;; TODO Try building from the Git repo or asking the upstream maintainer
1437 ;; to distribute the tests on PyPi.
1438 #:tests? #f))
4db9433a
RW
1439 (native-inputs
1440 `(("python-sphinx" ,python-sphinx)))
1441 (home-page "https://github.com/benjschiller/twobitreader")
1442 (synopsis "Python library for reading .2bit files")
1443 (description
1444 "twobitreader is a Python library for reading .2bit files as used by the
1445UCSC genome browser.")
1446 (license license:artistic2.0)))
1447
1448(define-public python2-twobitreader
1449 (let ((base (package-with-python2 (strip-python2-variant python-twobitreader))))
1450 (package
1451 (inherit base)
1452 (native-inputs `(("python2-setuptools" ,python2-setuptools)
1453 ,@(package-native-inputs base))))))
1454
f94bf198
RW
1455(define-public python-plastid
1456 (package
1457 (name "python-plastid")
99caa6f7 1458 (version "0.4.6")
f94bf198
RW
1459 (source (origin
1460 (method url-fetch)
1461 (uri (pypi-uri "plastid" version))
1462 (sha256
1463 (base32
99caa6f7 1464 "1sqkz5d3b9kf688mp7k771c87ins42j7j0whmkb49cb3fsg8s8lj"))))
f94bf198
RW
1465 (properties `((python2-variant . ,(delay python2-plastid))))
1466 (build-system python-build-system)
1467 (arguments
1468 ;; Some test files are not included.
1469 `(#:tests? #f))
1470 (propagated-inputs
1471 `(("python-numpy" ,python-numpy)
1472 ("python-scipy" ,python-scipy)
1473 ("python-pandas" ,python-pandas)
1474 ("python-pysam" ,python-pysam)
1475 ("python-matplotlib" ,python-matplotlib)
1476 ("python-biopython" ,python-biopython)
99caa6f7
BW
1477 ("python-twobitreader" ,python-twobitreader)
1478 ("python-termcolor" ,python-termcolor)))
f94bf198
RW
1479 (native-inputs
1480 `(("python-cython" ,python-cython)
1481 ("python-nose" ,python-nose)))
1482 (home-page "https://github.com/joshuagryphon/plastid")
1483 (synopsis "Python library for genomic analysis")
1484 (description
1485 "plastid is a Python library for genomic analysis – in particular,
1486high-throughput sequencing data – with an emphasis on simplicity.")
1487 (license license:bsd-3)))
1488
1489(define-public python2-plastid
1490 (let ((base (package-with-python2 (strip-python2-variant python-plastid))))
1491 (package
1492 (inherit base)
4d16cc51
RW
1493 ;; setuptools is required at runtime
1494 (propagated-inputs `(("python2-setuptools" ,python2-setuptools)
1495 ,@(package-propagated-inputs base))))))
f94bf198 1496
6c1305f9
RW
1497(define-public cd-hit
1498 (package
1499 (name "cd-hit")
1500 (version "4.6.5")
1501 (source (origin
1502 (method url-fetch)
1503 (uri (string-append "https://github.com/weizhongli/cdhit"
1504 "/releases/download/V" version
1505 "/cd-hit-v" version "-2016-0304.tar.gz"))
1506 (sha256
1507 (base32
1508 "15db0hq38yyifwqx9b6l34z14jcq576dmjavhj8a426c18lvnhp3"))))
1509 (build-system gnu-build-system)
1510 (arguments
1511 `(#:tests? #f ; there are no tests
1512 #:make-flags
1513 ;; Executables are copied directly to the PREFIX.
1514 (list (string-append "PREFIX=" (assoc-ref %outputs "out") "/bin"))
1515 #:phases
1516 (modify-phases %standard-phases
1517 ;; No "configure" script
1518 (delete 'configure)
1519 ;; Remove sources of non-determinism
1520 (add-after 'unpack 'be-timeless
1521 (lambda _
1522 (substitute* "cdhit-utility.c++"
1523 ((" \\(built on \" __DATE__ \"\\)") ""))
1524 (substitute* "cdhit-common.c++"
1525 (("__DATE__") "\"0\"")
1526 (("\", %s, \" __TIME__ \"\\\\n\", date") ""))
1527 #t))
1528 ;; The "install" target does not create the target directory
1529 (add-before 'install 'create-target-dir
1530 (lambda* (#:key outputs #:allow-other-keys)
1531 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
1532 #t)))))
1533 (inputs
1534 `(("perl" ,perl)))
1535 (home-page "http://weizhongli-lab.org/cd-hit/")
1536 (synopsis "Cluster and compare protein or nucleotide sequences")
1537 (description
1538 "CD-HIT is a program for clustering and comparing protein or nucleotide
1539sequences. CD-HIT is designed to be fast and handle extremely large
1540databases.")
1541 ;; The manual says: "It can be copied under the GNU General Public License
1542 ;; version 2 (GPLv2)."
1543 (license license:gpl2)))
1544
810cff85
RW
1545(define-public clipper
1546 (package
1547 (name "clipper")
433530a5 1548 (version "1.1")
810cff85
RW
1549 (source (origin
1550 (method url-fetch)
1551 (uri (string-append
1552 "https://github.com/YeoLab/clipper/archive/"
1553 version ".tar.gz"))
9ab5ea44 1554 (file-name (string-append name "-" version ".tar.gz"))
810cff85
RW
1555 (sha256
1556 (base32
433530a5 1557 "0pflmsvhbf8izbgwhbhj1i7349sw1f55qpqj8ljmapp16hb0p0qi"))
810cff85
RW
1558 (modules '((guix build utils)))
1559 (snippet
433530a5
RW
1560 '(begin
1561 ;; remove unnecessary setup dependency
1562 (substitute* "setup.py"
1563 (("setup_requires = .*") ""))
1564 (for-each delete-file
1565 '("clipper/src/peaks.so"
1566 "clipper/src/readsToWiggle.so"))
1567 (delete-file-recursively "dist/")
1568 #t))))
810cff85
RW
1569 (build-system python-build-system)
1570 (arguments `(#:python ,python-2)) ; only Python 2 is supported
1571 (inputs
1572 `(("htseq" ,htseq)
1573 ("python-pybedtools" ,python2-pybedtools)
1574 ("python-cython" ,python2-cython)
1575 ("python-scikit-learn" ,python2-scikit-learn)
1576 ("python-matplotlib" ,python2-matplotlib)
433530a5 1577 ("python-pandas" ,python2-pandas)
810cff85
RW
1578 ("python-pysam" ,python2-pysam)
1579 ("python-numpy" ,python2-numpy)
1580 ("python-scipy" ,python2-scipy)))
1581 (native-inputs
f3b98f4f
HG
1582 `(("python-mock" ,python2-mock) ; for tests
1583 ("python-pytz" ,python2-pytz))) ; for tests
810cff85
RW
1584 (home-page "https://github.com/YeoLab/clipper")
1585 (synopsis "CLIP peak enrichment recognition")
1586 (description
1587 "CLIPper is a tool to define peaks in CLIP-seq datasets.")
1588 (license license:gpl2)))
1589
6a35566d
RS
1590(define-public codingquarry
1591 (package
1592 (name "codingquarry")
1593 (version "2.0")
1594 (source (origin
1595 (method url-fetch)
1596 (uri (string-append
1597 "mirror://sourceforge/codingquarry/CodingQuarry_v"
1598 version ".tar.gz"))
1599 (sha256
1600 (base32
1601 "0115hkjflsnfzn36xppwf9h9avfxlavr43djqmshkkzbgjzsz60i"))))
1602 (build-system gnu-build-system)
1603 (arguments
1604 '(#:tests? #f ; no "check" target
1605 #:phases
1606 (modify-phases %standard-phases
1607 (delete 'configure)
1608 (replace 'install
1609 (lambda* (#:key outputs #:allow-other-keys)
1610 (let* ((out (assoc-ref outputs "out"))
1611 (bin (string-append out "/bin"))
1612 (doc (string-append out "/share/doc/codingquarry")))
1613 (install-file "INSTRUCTIONS.pdf" doc)
1614 (copy-recursively "QuarryFiles"
1615 (string-append out "/QuarryFiles"))
1616 (install-file "CodingQuarry" bin)
1617 (install-file "CufflinksGTF_to_CodingQuarryGFF3.py" bin)))))))
1618 (inputs `(("openmpi" ,openmpi)))
1619 (native-search-paths
1620 (list (search-path-specification
1621 (variable "QUARRY_PATH")
1622 (files '("QuarryFiles")))))
1623 (native-inputs `(("python" ,python-2))) ; Only Python 2 is supported
1624 (synopsis "Fungal gene predictor")
1625 (description "CodingQuarry is a highly accurate, self-training GHMM fungal
1626gene predictor designed to work with assembled, aligned RNA-seq transcripts.")
1627 (home-page "https://sourceforge.net/projects/codingquarry/")
1628 (license license:gpl3+)))
1629
36742f43
RW
1630(define-public couger
1631 (package
1632 (name "couger")
1633 (version "1.8.2")
1634 (source (origin
1635 (method url-fetch)
1636 (uri (string-append
1637 "http://couger.oit.duke.edu/static/assets/COUGER"
1638 version ".zip"))
1639 (sha256
1640 (base32
1641 "04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq"))))
1642 (build-system gnu-build-system)
1643 (arguments
1644 `(#:tests? #f
1645 #:phases
1646 (modify-phases %standard-phases
1647 (delete 'configure)
1648 (delete 'build)
1649 (replace
1650 'install
1651 (lambda* (#:key outputs #:allow-other-keys)
1652 (let ((out (assoc-ref outputs "out")))
1653 (copy-recursively "src" (string-append out "/src"))
1654 (mkdir (string-append out "/bin"))
1655 ;; Add "src" directory to module lookup path.
1656 (substitute* "couger"
1657 (("from argparse")
1658 (string-append "import sys\nsys.path.append(\""
1659 out "\")\nfrom argparse")))
1660 (copy-file "couger" (string-append out "/bin/couger")))
1661 #t))
1662 (add-after
1663 'install 'wrap-program
1664 (lambda* (#:key inputs outputs #:allow-other-keys)
1665 ;; Make sure 'couger' runs with the correct PYTHONPATH.
1666 (let* ((out (assoc-ref outputs "out"))
1667 (path (getenv "PYTHONPATH")))
1668 (wrap-program (string-append out "/bin/couger")
1669 `("PYTHONPATH" ":" prefix (,path))))
1670 #t)))))
1671 (inputs
1672 `(("python" ,python-2)
1673 ("python2-pillow" ,python2-pillow)
1674 ("python2-numpy" ,python2-numpy)
1675 ("python2-scipy" ,python2-scipy)
1676 ("python2-matplotlib" ,python2-matplotlib)))
1677 (propagated-inputs
1678 `(("r" ,r)
1679 ("libsvm" ,libsvm)
1680 ("randomjungle" ,randomjungle)))
1681 (native-inputs
1682 `(("unzip" ,unzip)))
1683 (home-page "http://couger.oit.duke.edu")
1684 (synopsis "Identify co-factors in sets of genomic regions")
1685 (description
1686 "COUGER can be applied to any two sets of genomic regions bound by
1687paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify
1688putative co-factors that provide specificity to each TF. The framework
1689determines the genomic targets uniquely-bound by each TF, and identifies a
1690small set of co-factors that best explain the in vivo binding differences
1691between the two TFs.
1692
1693COUGER uses classification algorithms (support vector machines and random
1694forests) with features that reflect the DNA binding specificities of putative
1695co-factors. The features are generated either from high-throughput TF-DNA
1696binding data (from protein binding microarray experiments), or from large
1697collections of DNA motifs.")
1698 (license license:gpl3+)))
1699
bfe3c685
RW
1700(define-public clustal-omega
1701 (package
1702 (name "clustal-omega")
1703 (version "1.2.1")
1704 (source (origin
1705 (method url-fetch)
1706 (uri (string-append
1707 "http://www.clustal.org/omega/clustal-omega-"
1708 version ".tar.gz"))
1709 (sha256
1710 (base32
1711 "02ibkx0m0iwz8nscg998bh41gg251y56cgh86bvyrii5m8kjgwqf"))))
1712 (build-system gnu-build-system)
1713 (inputs
1714 `(("argtable" ,argtable)))
1715 (home-page "http://www.clustal.org/omega/")
1716 (synopsis "Multiple sequence aligner for protein and DNA/RNA")
1717 (description
1718 "Clustal-Omega is a general purpose multiple sequence alignment (MSA)
1719program for protein and DNA/RNA. It produces high quality MSAs and is capable
1720of handling data-sets of hundreds of thousands of sequences in reasonable
1721time.")
1722 (license license:gpl2+)))
1723
191c7101
RW
1724(define-public crossmap
1725 (package
1726 (name "crossmap")
61d5fd03 1727 (version "0.2.1")
191c7101
RW
1728 (source (origin
1729 (method url-fetch)
1730 (uri (string-append "mirror://sourceforge/crossmap/CrossMap-"
1731 version ".tar.gz"))
1732 (sha256
1733 (base32
61d5fd03
RW
1734 "07y179f63d7qnzdvkqcziwk9bs3k4zhp81q392fp1hwszjdvy22f"))
1735 ;; This patch has been sent upstream already and is available
1736 ;; for download from Sourceforge, but it has not been merged.
fc1adab1 1737 (patches (search-patches "crossmap-allow-system-pysam.patch"))
191c7101
RW
1738 (modules '((guix build utils)))
1739 ;; remove bundled copy of pysam
1740 (snippet
1741 '(delete-file-recursively "lib/pysam"))))
1742 (build-system python-build-system)
1743 (arguments
1744 `(#:python ,python-2
1745 #:phases
1746 (alist-cons-after
1747 'unpack 'set-env
1748 (lambda _ (setenv "CROSSMAP_USE_SYSTEM_PYSAM" "1"))
1749 %standard-phases)))
1750 (inputs
1751 `(("python-numpy" ,python2-numpy)
1752 ("python-pysam" ,python2-pysam)
1753 ("zlib" ,zlib)))
1754 (native-inputs
1755 `(("python-cython" ,python2-cython)
f3b98f4f 1756 ("python-nose" ,python2-nose)))
191c7101
RW
1757 (home-page "http://crossmap.sourceforge.net/")
1758 (synopsis "Convert genome coordinates between assemblies")
1759 (description
1760 "CrossMap is a program for conversion of genome coordinates or annotation
1761files between different genome assemblies. It supports most commonly used
1762file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
1763 (license license:gpl2+)))
1764
3a40a92c
RW
1765(define-public cufflinks
1766 (package
1767 (name "cufflinks")
1768 (version "2.2.1")
1769 (source (origin
1770 (method url-fetch)
1771 (uri (string-append "http://cole-trapnell-lab.github.io/"
1772 "cufflinks/assets/downloads/cufflinks-"
1773 version ".tar.gz"))
1774 (sha256
1775 (base32
1776 "1bnm10p8m7zq4qiipjhjqb24csiqdm1pwc8c795z253r2xk6ncg8"))))
1777 (build-system gnu-build-system)
1778 (arguments
1779 `(#:make-flags
1780 (list
1781 ;; The includes for "eigen" are located in a subdirectory.
1782 (string-append "EIGEN_CPPFLAGS="
1783 "-I" (assoc-ref %build-inputs "eigen")
1784 "/include/eigen3/")
1785 ;; Cufflinks must be linked with various boost libraries.
1786 (string-append "LDFLAGS="
1787 (string-join '("-lboost_system"
1788 "-lboost_serialization"
1789 "-lboost_thread"))))
1790 #:phases
1791 (modify-phases %standard-phases
1792 (add-after 'unpack 'fix-search-for-bam
1793 (lambda _
1794 (substitute* '("ax_bam.m4"
1795 "configure"
1796 "src/hits.h")
1797 (("<bam/sam\\.h>") "<samtools/sam.h>")
1798 (("<bam/bam\\.h>") "<samtools/bam.h>")
1799 (("<bam/version\\.hpp>") "<samtools/version.h>"))
1800 #t)))
1801 #:configure-flags
1802 (list (string-append "--with-bam="
1803 (assoc-ref %build-inputs "samtools")))))
1804 (inputs
1805 `(("eigen" ,eigen)
1806 ("samtools" ,samtools-0.1)
1807 ("htslib" ,htslib)
1808 ("boost" ,boost)
1809 ("python" ,python-2)
1810 ("zlib" ,zlib)))
1811 (home-page "http://cole-trapnell-lab.github.io/cufflinks/")
1812 (synopsis "Transcriptome assembly and RNA-Seq expression analysis")
1813 (description
1814 "Cufflinks assembles RNA transcripts, estimates their abundances,
1815and tests for differential expression and regulation in RNA-Seq
1816samples. It accepts aligned RNA-Seq reads and assembles the
1817alignments into a parsimonious set of transcripts. Cufflinks then
1818estimates the relative abundances of these transcripts based on how
1819many reads support each one, taking into account biases in library
1820preparation protocols.")
1821 (license license:boost1.0)))
1822
8e913213
RW
1823(define-public cutadapt
1824 (package
1825 (name "cutadapt")
1826 (version "1.8")
1827 (source (origin
1828 (method url-fetch)
1829 (uri (string-append
1830 "https://github.com/marcelm/cutadapt/archive/v"
1831 version ".tar.gz"))
1832 (file-name (string-append name "-" version ".tar.gz"))
1833 (sha256
1834 (base32
1835 "161bp87y6gd6r5bmvjpn2b1k942i3fizfpa139f0jn6jv1wcp5h5"))))
1836 (build-system python-build-system)
1837 (arguments
1838 ;; tests must be run after install
1839 `(#:phases (alist-cons-after
1840 'install 'check
1841 (lambda* (#:key inputs outputs #:allow-other-keys)
1842 (setenv "PYTHONPATH"
1843 (string-append
1844 (getenv "PYTHONPATH")
1845 ":" (assoc-ref outputs "out")
1846 "/lib/python"
1847 (string-take (string-take-right
1848 (assoc-ref inputs "python") 5) 3)
1849 "/site-packages"))
1850 (zero? (system* "nosetests" "-P" "tests")))
1851 (alist-delete 'check %standard-phases))))
1852 (native-inputs
1853 `(("python-cython" ,python-cython)
f3b98f4f 1854 ("python-nose" ,python-nose)))
8e913213
RW
1855 (home-page "https://code.google.com/p/cutadapt/")
1856 (synopsis "Remove adapter sequences from nucleotide sequencing reads")
1857 (description
1858 "Cutadapt finds and removes adapter sequences, primers, poly-A tails and
1859other types of unwanted sequence from high-throughput sequencing reads.")
1860 (license license:expat)))
1861
1baee943
RW
1862(define-public libbigwig
1863 (package
1864 (name "libbigwig")
1865 (version "0.1.4")
1866 (source (origin
1867 (method url-fetch)
1868 (uri (string-append "https://github.com/dpryan79/libBigWig/"
1869 "archive/" version ".tar.gz"))
1870 (file-name (string-append name "-" version ".tar.gz"))
1871 (sha256
1872 (base32
1873 "098rjh35pi4a9q83n8wiwvyzykjqj6l8q189p1xgfw4ghywdlvw1"))))
1874 (build-system gnu-build-system)
1875 (arguments
1876 `(#:test-target "test"
1877 #:make-flags
1878 (list "CC=gcc"
1879 (string-append "prefix=" (assoc-ref %outputs "out")))
1880 #:phases
1881 (modify-phases %standard-phases
1882 (delete 'configure)
1883 (add-before 'check 'disable-curl-test
1884 (lambda _
1885 (substitute* "Makefile"
1886 (("./test/testRemote.*") ""))
1887 #t))
1888 ;; This has been fixed with the upstream commit 4ff6959cd8a0, but
1889 ;; there has not yet been a release containing this change.
1890 (add-before 'install 'create-target-dirs
1891 (lambda* (#:key outputs #:allow-other-keys)
1892 (let ((out (assoc-ref outputs "out")))
1893 (mkdir-p (string-append out "/lib"))
1894 (mkdir-p (string-append out "/include"))
1895 #t))))))
1896 (inputs
1897 `(("zlib" ,zlib)
1898 ("curl" ,curl)))
1899 (native-inputs
1900 `(("doxygen" ,doxygen)))
1901 (home-page "https://github.com/dpryan79/libBigWig")
1902 (synopsis "C library for handling bigWig files")
1903 (description
1904 "This package provides a C library for parsing local and remote BigWig
1905files.")
1906 (license license:expat)))
1907
69e0e03c
RW
1908(define-public python-pybigwig
1909 (package
1910 (name "python-pybigwig")
1911 (version "0.2.5")
1912 (source (origin
1913 (method url-fetch)
1914 (uri (pypi-uri "pyBigWig" version))
1915 (sha256
1916 (base32
1917 "0yrpdxg3y0sny25x4w22lv1k47jzccqjmg7j4bp0hywklvp0hg7d"))
1918 (modules '((guix build utils)))
1919 (snippet
1920 '(begin
1921 ;; Delete bundled libBigWig sources
1922 (delete-file-recursively "libBigWig")))))
1923 (build-system python-build-system)
1924 (arguments
1925 `(#:phases
1926 (modify-phases %standard-phases
1927 (add-after 'unpack 'link-with-libBigWig
1928 (lambda* (#:key inputs #:allow-other-keys)
1929 (substitute* "setup.py"
1930 (("libs=\\[") "libs=[\"BigWig\", "))
1931 #t)))))
1932 (inputs
1933 `(("libbigwig" ,libbigwig)
1934 ("zlib" ,zlib)
1935 ("curl" ,curl)))
1936 (home-page "https://github.com/dpryan79/pyBigWig")
1937 (synopsis "Access bigWig files in Python using libBigWig")
1938 (description
1939 "This package provides Python bindings to the libBigWig library for
1940accessing bigWig files.")
1941 (license license:expat)))
1942
1943(define-public python2-pybigwig
1944 (let ((pybigwig (package-with-python2 python-pybigwig)))
1945 (package (inherit pybigwig)
1946 (native-inputs
1947 `(("python-setuptools" ,python2-setuptools))))))
1948
ec2a67de
BW
1949(define-public python-dendropy
1950 (package
1951 (name "python-dendropy")
1952 (version "4.1.0")
1953 (source
1954 (origin
1955 (method url-fetch)
1956 (uri (pypi-uri "DendroPy" version))
1957 (sha256
1958 (base32
1959 "1jfz7gp18wph311w1yygbvjanb3n5mdqal439bb6myw41dwb5m63"))
1960 ;; There are two known test failures that will be fixed in the next
1961 ;; release after 4.1.0.
1962 ;; https://github.com/jeetsukumaran/DendroPy/issues/48
1963 (patches (search-patches
1964 "python-dendropy-exclude-failing-tests.patch"))))
1965 (build-system python-build-system)
1966 (home-page "http://packages.python.org/DendroPy/")
1967 (synopsis "Library for phylogenetics and phylogenetic computing")
1968 (description
1969 "DendroPy is a library for phylogenetics and phylogenetic computing: reading,
1970writing, simulation, processing and manipulation of phylogenetic
1971trees (phylogenies) and characters.")
1972 (license license:bsd-3)
1973 (properties `((python2-variant . ,(delay python2-dendropy))))))
1974
1975(define-public python2-dendropy
1976 (let ((base (package-with-python2 (strip-python2-variant python-dendropy))))
1977 (package
1978 (inherit base)
9602e3cc
BW
1979 ;; Do not use same source as 'python-dendropy' because the patched
1980 ;; failing tests do not occur on Python 2.
1981 (source
1982 (origin
1983 (method url-fetch)
1984 (uri (pypi-uri "DendroPy" (package-version base)))
1985 (sha256
1986 (base32
1987 "1jfz7gp18wph311w1yygbvjanb3n5mdqal439bb6myw41dwb5m63"))))
1988 (arguments
1989 `(#:python ,python-2
1990 #:phases
1991 (modify-phases %standard-phases
1992 (replace 'check
1993 ;; There is currently a test failure that only happens on some
1994 ;; systems, and only using "setup.py test"
1995 (lambda _ (zero? (system* "nosetests")))))))
f3b98f4f 1996 (native-inputs `(("python2-nose" ,python2-nose)
ec2a67de
BW
1997 ,@(package-native-inputs base))))))
1998
1999
1921b1de
RW
2000(define-public deeptools
2001 (package
2002 (name "deeptools")
3acb8c85 2003 (version "2.1.1")
1921b1de
RW
2004 (source (origin
2005 (method url-fetch)
3acb8c85
RW
2006 (uri (string-append "https://github.com/fidelram/deepTools/"
2007 "archive/" version ".tar.gz"))
1921b1de
RW
2008 (file-name (string-append name "-" version ".tar.gz"))
2009 (sha256
2010 (base32
3acb8c85 2011 "1nmfin0zjdby3vay3r4flvz94dr6qjhj41ax4yz3vx13j6wz8izd"))))
1921b1de
RW
2012 (build-system python-build-system)
2013 (arguments
2014 `(#:python ,python-2))
14bda1ff 2015 (inputs
1921b1de
RW
2016 `(("python-scipy" ,python2-scipy)
2017 ("python-numpy" ,python2-numpy)
3acb8c85 2018 ("python-numpydoc" ,python2-numpydoc)
1921b1de
RW
2019 ("python-matplotlib" ,python2-matplotlib)
2020 ("python-bx-python" ,python2-bx-python)
3acb8c85
RW
2021 ("python-pysam" ,python2-pysam)
2022 ("python-pybigwig" ,python2-pybigwig)))
1921b1de 2023 (native-inputs
f3b98f4f
HG
2024 `(("python-mock" ,python2-mock) ;for tests
2025 ("python-pytz" ,python2-pytz))) ;for tests
1921b1de
RW
2026 (home-page "https://github.com/fidelram/deepTools")
2027 (synopsis "Tools for normalizing and visualizing deep-sequencing data")
2028 (description
2029 "DeepTools addresses the challenge of handling the large amounts of data
2030that are now routinely generated from DNA sequencing centers. To do so,
2031deepTools contains useful modules to process the mapped reads data to create
2032coverage files in standard bedGraph and bigWig file formats. By doing so,
2033deepTools allows the creation of normalized coverage files or the comparison
2034between two files (for example, treatment and control). Finally, using such
2035normalized and standardized files, multiple visualizations can be created to
2036identify enrichments with functional annotations of the genome.")
2037 (license license:gpl3+)))
2038
684bf7c7
BW
2039(define-public diamond
2040 (package
2041 (name "diamond")
3cf7c317 2042 (version "0.8.26")
684bf7c7
BW
2043 (source (origin
2044 (method url-fetch)
2045 (uri (string-append
2046 "https://github.com/bbuchfink/diamond/archive/v"
2047 version ".tar.gz"))
2048 (file-name (string-append name "-" version ".tar.gz"))
2049 (sha256
2050 (base32
3cf7c317 2051 "17s38pgjih6yc2z81040pircbv60c9wr33mbcyki2rfpv8rbxlh0"))))
122395f9 2052 (build-system cmake-build-system)
684bf7c7 2053 (arguments
7c544991
BW
2054 '(#:tests? #f ; no "check" target
2055 #:phases
2056 (modify-phases %standard-phases
2057 (add-after 'unpack 'remove-native-compilation
2058 (lambda _
2059 (substitute* "CMakeLists.txt" (("-march=native") ""))
2060 #t)))))
684bf7c7 2061 (inputs
122395f9 2062 `(("zlib" ,zlib)))
684bf7c7
BW
2063 (home-page "https://github.com/bbuchfink/diamond")
2064 (synopsis "Accelerated BLAST compatible local sequence aligner")
2065 (description
2066 "DIAMOND is a BLAST-compatible local aligner for mapping protein and
2067translated DNA query sequences against a protein reference database (BLASTP
2068and BLASTX alignment mode). The speedup over BLAST is up to 20,000 on short
2069reads at a typical sensitivity of 90-99% relative to BLAST depending on the
2070data and settings.")
d9c44e9c
BW
2071 ;; diamond fails to build on other platforms
2072 ;; https://github.com/bbuchfink/diamond/issues/18
2073 (supported-systems '("x86_64-linux"))
684bf7c7
BW
2074 (license (license:non-copyleft "file://src/COPYING"
2075 "See src/COPYING in the distribution."))))
2076
97b9da68
RW
2077(define-public discrover
2078 (package
2079 (name "discrover")
2080 (version "1.6.0")
2081 (source
2082 (origin
2083 (method url-fetch)
2084 (uri (string-append "https://github.com/maaskola/discrover/archive/"
2085 version ".tar.gz"))
2086 (file-name (string-append name "-" version ".tar.gz"))
2087 (sha256
2088 (base32
2089 "0rah9ja4m0rl5mldd6vag9rwrivw1zrqxssfq8qx64m7961fp68k"))))
2090 (build-system cmake-build-system)
2091 (arguments `(#:tests? #f)) ; there are no tests
2092 (inputs
2093 `(("boost" ,boost)
2094 ("cairo" ,cairo)))
2095 (native-inputs
2096 `(("texlive" ,texlive)
2097 ("imagemagick" ,imagemagick)))
2098 (home-page "http://dorina.mdc-berlin.de/public/rajewsky/discrover/")
2099 (synopsis "Discover discriminative nucleotide sequence motifs")
2100 (description "Discrover is a motif discovery method to find binding sites
2101of nucleic acid binding proteins.")
2102 (license license:gpl3+)))
2103
6619f9c7
RW
2104(define-public eigensoft
2105 (let ((revision "1")
2106 (commit "b14d1e202e21e532536ff8004f0419cd5e259dc7"))
2107 (package
2108 (name "eigensoft")
2109 (version (string-append "6.1.2-"
2110 revision "."
2111 (string-take commit 9)))
2112 (source
2113 (origin
2114 (method git-fetch)
2115 (uri (git-reference
2116 (url "https://github.com/DReichLab/EIG.git")
2117 (commit commit)))
2118 (file-name (string-append "eigensoft-" commit "-checkout"))
2119 (sha256
2120 (base32
2121 "0f5m6k2j5c16xc3xbywcs989xyc26ncy1zfzp9j9n55n9r4xcaiq"))
2122 (modules '((guix build utils)))
2123 ;; Remove pre-built binaries.
2124 (snippet '(begin
2125 (delete-file-recursively "bin")
2126 (mkdir "bin")
2127 #t))))
2128 (build-system gnu-build-system)
2129 (arguments
2130 `(#:tests? #f ; There are no tests.
2131 #:make-flags '("CC=gcc")
2132 #:phases
2133 (modify-phases %standard-phases
2134 ;; There is no configure phase, but the Makefile is in a
2135 ;; sub-directory.
2136 (replace 'configure
2137 (lambda _
2138 (chdir "src")
2139 ;; The link flags are incomplete.
2140 (substitute* "Makefile"
2141 (("-lgsl") "-lgsl -lm -llapack -llapacke -lpthread"))
2142 #t))
2143 ;; The provided install target only copies executables to
2144 ;; the "bin" directory in the build root.
2145 (add-after 'install 'actually-install
2146 (lambda* (#:key outputs #:allow-other-keys)
2147 (let* ((out (assoc-ref outputs "out"))
2148 (bin (string-append out "/bin")))
2149 (mkdir-p bin)
2150 (for-each (lambda (file)
2151 (install-file file bin))
2152 (find-files "../bin" ".*"))
2153 #t))))))
2154 (inputs
2155 `(("gsl" ,gsl)
2156 ("lapack" ,lapack)
6619f9c7
RW
2157 ("openblas" ,openblas)
2158 ("perl" ,perl)
2159 ("gfortran" ,gfortran "lib")))
2160 (home-page "https://github.com/DReichLab/EIG")
2161 (synopsis "Tools for population genetics")
2162 (description "The EIGENSOFT package provides tools for population
2163genetics and stratification correction. EIGENSOFT implements methods commonly
2164used in population genetics analyses such as PCA, computation of Tracy-Widom
2165statistics, and finding related individuals in structured populations. It
2166comes with a built-in plotting script and supports multiple file formats and
2167quantitative phenotypes.")
2168 ;; The license of the eigensoft tools is Expat, but since it's
2169 ;; linking with the GNU Scientific Library (GSL) the effective
2170 ;; license is the GPL.
2171 (license license:gpl3+))))
2172
365c8153
RW
2173(define-public edirect
2174 (package
2175 (name "edirect")
83b84fa8 2176 (version "4.10")
365c8153
RW
2177 (source (origin
2178 (method url-fetch)
83b84fa8
RW
2179 (uri (string-append "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/"
2180 "versions/2016-05-03/edirect.tar.gz"))
365c8153
RW
2181 (sha256
2182 (base32
83b84fa8 2183 "15zsprak5yh8c1yrz4r1knmb5s8qcmdid4xdhkh3lqcv64l60hli"))))
365c8153
RW
2184 (build-system perl-build-system)
2185 (arguments
2186 `(#:tests? #f ;no "check" target
2187 #:phases
2188 (modify-phases %standard-phases
2189 (delete 'configure)
2190 (delete 'build)
2191 (replace 'install
2192 (lambda* (#:key outputs #:allow-other-keys)
2193 (let ((target (string-append (assoc-ref outputs "out")
2194 "/bin")))
2195 (mkdir-p target)
2196 (copy-file "edirect.pl"
2197 (string-append target "/edirect.pl"))
2198 #t)))
2199 (add-after
2200 'install 'wrap-program
2201 (lambda* (#:key inputs outputs #:allow-other-keys)
2202 ;; Make sure 'edirect.pl' finds all perl inputs at runtime.
2203 (let* ((out (assoc-ref outputs "out"))
2204 (path (getenv "PERL5LIB")))
2205 (wrap-program (string-append out "/bin/edirect.pl")
2206 `("PERL5LIB" ":" prefix (,path)))))))))
2207 (inputs
2208 `(("perl-html-parser" ,perl-html-parser)
2209 ("perl-encode-locale" ,perl-encode-locale)
2210 ("perl-file-listing" ,perl-file-listing)
2211 ("perl-html-tagset" ,perl-html-tagset)
2212 ("perl-html-tree" ,perl-html-tree)
2213 ("perl-http-cookies" ,perl-http-cookies)
2214 ("perl-http-date" ,perl-http-date)
2215 ("perl-http-message" ,perl-http-message)
2216 ("perl-http-negotiate" ,perl-http-negotiate)
2217 ("perl-lwp-mediatypes" ,perl-lwp-mediatypes)
2218 ("perl-lwp-protocol-https" ,perl-lwp-protocol-https)
2219 ("perl-net-http" ,perl-net-http)
2220 ("perl-uri" ,perl-uri)
2221 ("perl-www-robotrules" ,perl-www-robotrules)
2222 ("perl" ,perl)))
3d51ec91 2223 (home-page "http://www.ncbi.nlm.nih.gov/books/NBK179288/")
365c8153
RW
2224 (synopsis "Tools for accessing the NCBI's set of databases")
2225 (description
2226 "Entrez Direct (EDirect) is a method for accessing the National Center
2227for Biotechnology Information's (NCBI) set of interconnected
2228databases (publication, sequence, structure, gene, variation, expression,
2229etc.) from a terminal. Functions take search terms from command-line
2230arguments. Individual operations are combined to build multi-step queries.
2231Record retrieval and formatting normally complete the process.
2232
2233EDirect also provides an argument-driven function that simplifies the
2234extraction of data from document summaries or other results that are returned
2235in structured XML format. This can eliminate the need for writing custom
2236software to answer ad hoc questions.")
2237 (license license:public-domain)))
2238
b16728b0
BW
2239(define-public exonerate
2240 (package
2241 (name "exonerate")
2242 (version "2.4.0")
2243 (source
2244 (origin
2245 (method url-fetch)
2246 (uri
2247 (string-append
2248 "http://ftp.ebi.ac.uk/pub/software/vertebrategenomics/exonerate/"
2249 "exonerate-" version ".tar.gz"))
2250 (sha256
2251 (base32
2252 "0hj0m9xygiqsdxvbg79wq579kbrx1mdrabi2bzqz2zn9qwfjcjgq"))))
2253 (build-system gnu-build-system)
2254 (arguments
2255 `(#:parallel-build? #f)) ; Building in parallel fails on some machines.
2256 (native-inputs
2257 `(("pkg-config" ,pkg-config)))
2258 (inputs
2259 `(("glib" ,glib)))
2260 (home-page
2261 "https://www.ebi.ac.uk/about/vertebrate-genomics/software/exonerate")
2262 (synopsis "Generic tool for biological sequence alignment")
2263 (description
2264 "Exonerate is a generic tool for pairwise sequence comparison. It allows
2265the alignment of sequences using a many alignment models, either exhaustive
2266dynamic programming or a variety of heuristics.")
2267 (license license:gpl3)))
2268
e4e5a4d8
RW
2269(define-public express
2270 (package
2271 (name "express")
2272 (version "1.5.1")
2273 (source (origin
2274 (method url-fetch)
2275 (uri
2276 (string-append
2277 "http://bio.math.berkeley.edu/eXpress/downloads/express-"
2278 version "/express-" version "-src.tgz"))
2279 (sha256
2280 (base32
2281 "03rczxd0gjp2l1jxcmjfmf5j94j77zqyxa6x063zsc585nj40n0c"))))
2282 (build-system cmake-build-system)
2283 (arguments
2284 `(#:tests? #f ;no "check" target
2285 #:phases
2286 (alist-cons-after
2287 'unpack 'use-shared-boost-libs-and-set-bamtools-paths
2288 (lambda* (#:key inputs #:allow-other-keys)
2289 (substitute* "CMakeLists.txt"
2290 (("set\\(Boost_USE_STATIC_LIBS ON\\)")
2291 "set(Boost_USE_STATIC_LIBS OFF)")
2292 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
2293 (string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
2294 (substitute* "src/CMakeLists.txt"
2295 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
2296 (string-append (assoc-ref inputs "bamtools") "/lib/bamtools")))
2297 #t)
2298 %standard-phases)))
2299 (inputs
2300 `(("boost" ,boost)
2301 ("bamtools" ,bamtools)
2302 ("protobuf" ,protobuf)
2303 ("zlib" ,zlib)))
2304 (home-page "http://bio.math.berkeley.edu/eXpress")
2305 (synopsis "Streaming quantification for high-throughput genomic sequencing")
2306 (description
2307 "eXpress is a streaming tool for quantifying the abundances of a set of
2308target sequences from sampled subsequences. Example applications include
2309transcript-level RNA-Seq quantification, allele-specific/haplotype expression
2310analysis (from RNA-Seq), transcription factor binding quantification in
2311ChIP-Seq, and analysis of metagenomic data.")
2312 (license license:artistic2.0)))
2313
f3674b1c
BW
2314(define-public express-beta-diversity
2315 (package
2316 (name "express-beta-diversity")
2317 (version "1.0.7")
2318 (source (origin
2319 (method url-fetch)
2320 (uri
2321 (string-append
2322 "https://github.com/dparks1134/ExpressBetaDiversity/archive/v"
2323 version ".tar.gz"))
2324 (file-name (string-append name "-" version ".tar.gz"))
2325 (sha256
2326 (base32
2327 "1djvdlmqvjf6h0zq7w36y8cl5cli6rgj86x65znl48agnwmzxfxr"))))
2328 (build-system gnu-build-system)
2329 (arguments
2330 `(#:phases
2331 (modify-phases %standard-phases
2332 (delete 'configure)
2333 (add-before 'build 'enter-source (lambda _ (chdir "source") #t))
2334 (replace 'check
2335 (lambda _ (zero? (system* "../bin/ExpressBetaDiversity"
2336 "-u"))))
2337 (add-after 'check 'exit-source (lambda _ (chdir "..") #t))
2338 (replace 'install
2339 (lambda* (#:key outputs #:allow-other-keys)
2340 (let ((bin (string-append (assoc-ref outputs "out")
2341 "/bin")))
2342 (mkdir-p bin)
2343 (copy-file "scripts/convertToEBD.py"
2344 (string-append bin "/convertToEBD.py"))
2345 (copy-file "bin/ExpressBetaDiversity"
2346 (string-append bin "/ExpressBetaDiversity"))
2347 #t))))))
2348 (inputs
2349 `(("python" ,python-2)))
2350 (home-page "http://kiwi.cs.dal.ca/Software/ExpressBetaDiversity")
2351 (synopsis "Taxon- and phylogenetic-based beta diversity measures")
2352 (description
2353 "Express Beta Diversity (EBD) calculates ecological beta diversity
2354(dissimilarity) measures between biological communities. EBD implements a
2355variety of diversity measures including those that make use of phylogenetic
2356similarity of community members.")
2357 (license license:gpl3+)))
2358
12b04cbe
BW
2359(define-public fasttree
2360 (package
2361 (name "fasttree")
e03a5153 2362 (version "2.1.9")
12b04cbe
BW
2363 (source (origin
2364 (method url-fetch)
2365 (uri (string-append
2366 "http://www.microbesonline.org/fasttree/FastTree-"
2367 version ".c"))
2368 (sha256
2369 (base32
e03a5153 2370 "0ljvvw8i1als1wbfzvrf15c3ii2vw9db20a259g6pzg34xyyb97k"))))
12b04cbe
BW
2371 (build-system gnu-build-system)
2372 (arguments
2373 `(#:tests? #f ; no "check" target
2374 #:phases
2375 (modify-phases %standard-phases
2376 (delete 'unpack)
2377 (delete 'configure)
2378 (replace 'build
e03a5153
BW
2379 (lambda* (#:key source #:allow-other-keys)
2380 (and (zero? (system* "gcc"
2381 "-O3"
2382 "-finline-functions"
2383 "-funroll-loops"
2384 "-Wall"
2385 "-o"
2386 "FastTree"
2387 source
2388 "-lm"))
2389 (zero? (system* "gcc"
2390 "-DOPENMP"
2391 "-fopenmp"
2392 "-O3"
2393 "-finline-functions"
2394 "-funroll-loops"
2395 "-Wall"
2396 "-o"
2397 "FastTreeMP"
2398 source
2399 "-lm")))))
12b04cbe 2400 (replace 'install
e03a5153
BW
2401 (lambda* (#:key outputs #:allow-other-keys)
2402 (let ((bin (string-append (assoc-ref outputs "out")
2403 "/bin")))
2404 (mkdir-p bin)
2405 (copy-file "FastTree"
2406 (string-append bin "/FastTree"))
2407 (copy-file "FastTreeMP"
2408 (string-append bin "/FastTreeMP"))
2409 #t))))))
12b04cbe
BW
2410 (home-page "http://www.microbesonline.org/fasttree")
2411 (synopsis "Infers approximately-maximum-likelihood phylogenetic trees")
2412 (description
2413 "FastTree can handle alignments with up to a million of sequences in a
2414reasonable amount of time and memory. For large alignments, FastTree is
2415100-1,000 times faster than PhyML 3.0 or RAxML 7.")
2416 (license license:gpl2+)))
2417
2127cedb
RW
2418(define-public fastx-toolkit
2419 (package
2420 (name "fastx-toolkit")
2421 (version "0.0.14")
2422 (source (origin
2423 (method url-fetch)
2424 (uri
2425 (string-append
2426 "https://github.com/agordon/fastx_toolkit/releases/download/"
2427 version "/fastx_toolkit-" version ".tar.bz2"))
2428 (sha256
2429 (base32
2430 "01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy"))))
2431 (build-system gnu-build-system)
2432 (inputs
2433 `(("libgtextutils" ,libgtextutils)))
2434 (native-inputs
2435 `(("pkg-config" ,pkg-config)))
2436 (home-page "http://hannonlab.cshl.edu/fastx_toolkit/")
2437 (synopsis "Tools for FASTA/FASTQ file preprocessing")
2438 (description
2439 "The FASTX-Toolkit is a collection of command line tools for Short-Reads
2440FASTA/FASTQ files preprocessing.
2441
2442Next-Generation sequencing machines usually produce FASTA or FASTQ files,
2443containing multiple short-reads sequences. The main processing of such
2444FASTA/FASTQ files is mapping the sequences to reference genomes. However, it
2445is sometimes more productive to preprocess the files before mapping the
2446sequences to the genome---manipulating the sequences to produce better mapping
2447results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
2448 (license license:agpl3+)))
2449
d7678942
RW
2450(define-public flexbar
2451 (package
2452 (name "flexbar")
2453 (version "2.5")
2454 (source (origin
2455 (method url-fetch)
2456 (uri
2457 (string-append "mirror://sourceforge/flexbar/"
2458 version "/flexbar_v" version "_src.tgz"))
2459 (sha256
2460 (base32
2461 "13jaykc3y1x8y5nn9j8ljnb79s5y51kyxz46hdmvvjj6qhyympmf"))))
2462 (build-system cmake-build-system)
2463 (arguments
4ca009c0 2464 `(#:configure-flags (list
d7678942
RW
2465 (string-append "-DFLEXBAR_BINARY_DIR="
2466 (assoc-ref %outputs "out")
2467 "/bin/"))
2468 #:phases
4ca009c0
RW
2469 (alist-replace
2470 'check
2471 (lambda* (#:key outputs #:allow-other-keys)
2472 (setenv "PATH" (string-append
2473 (assoc-ref outputs "out") "/bin:"
2474 (getenv "PATH")))
2475 (chdir "../flexbar_v2.5_src/test")
2476 (zero? (system* "bash" "flexbar_validate.sh")))
2477 (alist-delete 'install %standard-phases))))
d7678942
RW
2478 (inputs
2479 `(("tbb" ,tbb)
2480 ("zlib" ,zlib)))
2481 (native-inputs
2482 `(("pkg-config" ,pkg-config)
2483 ("seqan" ,seqan)))
2484 (home-page "http://flexbar.sourceforge.net")
2485 (synopsis "Barcode and adapter removal tool for sequencing platforms")
2486 (description
2487 "Flexbar preprocesses high-throughput nucleotide sequencing data
2488efficiently. It demultiplexes barcoded runs and removes adapter sequences.
2489Moreover, trimming and filtering features are provided. Flexbar increases
2490read mapping rates and improves genome and transcriptome assemblies. It
2491supports next-generation sequencing data in fasta/q and csfasta/q format from
2492Illumina, Roche 454, and the SOLiD platform.")
2493 (license license:gpl3)))
2494
19f4554c
BW
2495(define-public fraggenescan
2496 (package
2497 (name "fraggenescan")
2498 (version "1.20")
2499 (source
2500 (origin
2501 (method url-fetch)
2502 (uri
2503 (string-append "mirror://sourceforge/fraggenescan/"
2504 "FragGeneScan" version ".tar.gz"))
2505 (sha256
2506 (base32 "1zzigqmvqvjyqv4945kv6nc5ah2xxm1nxgrlsnbzav3f5c0n0pyj"))))
2507 (build-system gnu-build-system)
2508 (arguments
2509 `(#:phases
2510 (modify-phases %standard-phases
2511 (delete 'configure)
2512 (add-before 'build 'patch-paths
2513 (lambda* (#:key outputs #:allow-other-keys)
2514 (let* ((out (string-append (assoc-ref outputs "out")))
2515 (share (string-append out "/share/fraggenescan/")))
2516 (substitute* "run_FragGeneScan.pl"
2517 (("system\\(\"rm")
2518 (string-append "system(\"" (which "rm")))
2519 (("system\\(\"mv")
2520 (string-append "system(\"" (which "mv")))
2521 ;; This script and other programs expect the training files
2522 ;; to be in the non-standard location bin/train/XXX. Change
2523 ;; this to be share/fraggenescan/train/XXX instead.
2524 (("^\\$train.file = \\$dir.*")
2525 (string-append "$train_file = \""
2526 share
2527 "train/\".$FGS_train_file;")))
2528 (substitute* "run_hmm.c"
2529 (("^ strcat\\(train_dir, \\\"train/\\\"\\);")
2530 (string-append " strcpy(train_dir, \"" share "/train/\");")))
2531 (substitute* "post_process.pl"
2532 (("^my \\$dir = substr.*")
2533 (string-append "my $dir = \"" share "\";"))))
2534 #t))
2535 (replace 'build
2536 (lambda _ (and (zero? (system* "make" "clean"))
2537 (zero? (system* "make" "fgs")))))
2538 (replace 'install
2539 (lambda* (#:key outputs #:allow-other-keys)
2540 (let* ((out (string-append (assoc-ref outputs "out")))
2541 (bin (string-append out "/bin/"))
2542 (share (string-append out "/share/fraggenescan/train")))
2543 (install-file "run_FragGeneScan.pl" bin)
2544 (install-file "FragGeneScan" bin)
2545 (install-file "FGS_gff.py" bin)
2546 (install-file "post_process.pl" bin)
2547 (copy-recursively "train" share))))
2548 (delete 'check)
2549 (add-after 'install 'post-install-check
2550 ;; In lieu of 'make check', run one of the examples and check the
2551 ;; output files gets created.
2552 (lambda* (#:key outputs #:allow-other-keys)
2553 (let* ((out (string-append (assoc-ref outputs "out")))
2554 (bin (string-append out "/bin/")))
2555 (and (zero? (system* (string-append bin "run_FragGeneScan.pl")
2556 "-genome=./example/NC_000913.fna"
2557 "-out=./test2"
2558 "-complete=1"
2559 "-train=complete"))
2560 (file-exists? "test2.faa")
2561 (file-exists? "test2.ffn")
2562 (file-exists? "test2.gff")
2563 (file-exists? "test2.out"))))))))
2564 (inputs
2565 `(("perl" ,perl)
2566 ("python" ,python-2))) ;not compatible with python 3.
2567 (home-page "https://sourceforge.net/projects/fraggenescan/")
2568 (synopsis "Finds potentially fragmented genes in short reads")
2569 (description
2570 "FragGeneScan is a program for predicting bacterial and archaeal genes in
2571short and error-prone DNA sequencing reads. It can also be applied to predict
2572genes in incomplete assemblies or complete genomes.")
2573 ;; GPL3+ according to private correspondense with the authors.
2574 (license license:gpl3+)))
2575
81f3e0c1
BW
2576(define-public fxtract
2577 (let ((util-commit "776ca85a18a47492af3794745efcb4a905113115"))
2578 (package
2579 (name "fxtract")
2580 (version "2.3")
2581 (source
2582 (origin
2583 (method url-fetch)
2584 (uri (string-append
2585 "https://github.com/ctSkennerton/fxtract/archive/"
2586 version ".tar.gz"))
2587 (file-name (string-append "ctstennerton-util-"
2588 (string-take util-commit 7)
2589 "-checkout"))
2590 (sha256
2591 (base32
2592 "0275cfdhis8517hm01is62062swmi06fxzifq7mr3knbbxjlaiwj"))))
2593 (build-system gnu-build-system)
2594 (arguments
2595 `(#:make-flags (list
2596 (string-append "PREFIX=" (assoc-ref %outputs "out"))
2597 "CC=gcc")
2598 #:test-target "fxtract_test"
2599 #:phases
2600 (modify-phases %standard-phases
2601 (delete 'configure)
2602 (add-before 'build 'copy-util
2603 (lambda* (#:key inputs #:allow-other-keys)
2604 (rmdir "util")
2605 (copy-recursively (assoc-ref inputs "ctskennerton-util") "util")
2606 #t))
2607 ;; Do not use make install as this requires additional dependencies.
2608 (replace 'install
2609 (lambda* (#:key outputs #:allow-other-keys)
2610 (let* ((out (assoc-ref outputs "out"))
2611 (bin (string-append out"/bin")))
2612 (install-file "fxtract" bin)
2613 #t))))))
2614 (inputs
2615 `(("pcre" ,pcre)
2616 ("zlib" ,zlib)))
2617 (native-inputs
2618 ;; ctskennerton-util is licensed under GPL2.
2619 `(("ctskennerton-util"
2620 ,(origin
2621 (method git-fetch)
2622 (uri (git-reference
2623 (url "https://github.com/ctSkennerton/util.git")
2624 (commit util-commit)))
2625 (file-name (string-append
2626 "ctstennerton-util-" util-commit "-checkout"))
2627 (sha256
2628 (base32
2629 "0cls1hd4vgj3f36fpzzg4xc77d6f3hpc60cbpfmn2gdr7ykzzad7"))))))
2630 (home-page "https://github.com/ctSkennerton/fxtract")
2631 (synopsis "Extract sequences from FASTA and FASTQ files")
2632 (description
2633 "Fxtract extracts sequences from a protein or nucleotide fastx (FASTA
2634or FASTQ) file given a subsequence. It uses a simple substring search for
2635basic tasks but can change to using POSIX regular expressions, PCRE, hash
2636lookups or multi-pattern searching as required. By default fxtract looks in
2637the sequence of each record but can also be told to look in the header,
2638comment or quality sections.")
afde1a26
BW
2639 ;; 'util' requires SSE instructions.
2640 (supported-systems '("x86_64-linux"))
81f3e0c1
BW
2641 (license license:expat))))
2642
5854f685
RW
2643(define-public grit
2644 (package
2645 (name "grit")
2646 (version "2.0.2")
2647 (source (origin
2648 (method url-fetch)
2649 (uri (string-append
2650 "https://github.com/nboley/grit/archive/"
2651 version ".tar.gz"))
2652 (file-name (string-append name "-" version ".tar.gz"))
2653 (sha256
2654 (base32
2655 "157in84dj70wimbind3x7sy1whs3h57qfgcnj2s6lrd38fbrb7mj"))))
2656 (build-system python-build-system)
2657 (arguments
2658 `(#:python ,python-2
2659 #:phases
2660 (alist-cons-after
2661 'unpack 'generate-from-cython-sources
2662 (lambda* (#:key inputs outputs #:allow-other-keys)
2663 ;; Delete these C files to force fresh generation from pyx sources.
2664 (delete-file "grit/sparsify_support_fns.c")
2665 (delete-file "grit/call_peaks_support_fns.c")
2666 (substitute* "setup.py"
2667 (("Cython.Setup") "Cython.Build")
2668 ;; Add numpy include path to fix compilation
2669 (("pyx\", \\]")
2670 (string-append "pyx\", ], include_dirs = ['"
2671 (assoc-ref inputs "python-numpy")
2672 "/lib/python2.7/site-packages/numpy/core/include/"
2673 "']"))) #t)
2674 %standard-phases)))
2675 (inputs
2676 `(("python-scipy" ,python2-scipy)
2677 ("python-numpy" ,python2-numpy)
2678 ("python-pysam" ,python2-pysam)
2679 ("python-networkx" ,python2-networkx)))
2680 (native-inputs
f3b98f4f 2681 `(("python-cython" ,python2-cython)))
5854f685
RW
2682 (home-page "http://grit-bio.org")
2683 (synopsis "Tool for integrative analysis of RNA-seq type assays")
2684 (description
2685 "GRIT is designed to use RNA-seq, TES, and TSS data to build and quantify
2686full length transcript models. When none of these data sources are available,
2687GRIT can be run by providing a candidate set of TES or TSS sites. In
2688addition, GRIT can merge in reference junctions and gene boundaries. GRIT can
2689also be run in quantification mode, where it uses a provided GTF file and just
2690estimates transcript expression.")
2691 (license license:gpl3+)))
2692
346a829a
RW
2693(define-public hisat
2694 (package
2695 (name "hisat")
2696 (version "0.1.4")
2697 (source (origin
2698 (method url-fetch)
2699 (uri (string-append
2700 "http://ccb.jhu.edu/software/hisat/downloads/hisat-"
2701 version "-beta-source.zip"))
2702 (sha256
2703 (base32
2704 "1k381ydranqxp09yf2y7w1d0chz5d59vb6jchi89hbb0prq19lk5"))))
2705 (build-system gnu-build-system)
2706 (arguments
e58d01fa
RW
2707 `(#:tests? #f ;no check target
2708 #:make-flags '("allall"
2709 ;; Disable unsupported `popcnt' instructions on
2710 ;; architectures other than x86_64
2711 ,@(if (string-prefix? "x86_64"
2712 (or (%current-target-system)
2713 (%current-system)))
2714 '()
2715 '("POPCNT_CAPABILITY=0")))
346a829a 2716 #:phases
da6dd842
LC
2717 (alist-cons-after
2718 'unpack 'patch-sources
2719 (lambda _
2720 ;; XXX Cannot use snippet because zip files are not supported
2721 (substitute* "Makefile"
2722 (("^CC = .*$") "CC = gcc")
2723 (("^CPP = .*$") "CPP = g++")
2724 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
2725 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
2726 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
2727 (substitute* '("hisat-build" "hisat-inspect")
2728 (("/usr/bin/env") (which "env"))))
2729 (alist-replace
2730 'install
2731 (lambda* (#:key outputs #:allow-other-keys)
96c46210
LC
2732 (let ((bin (string-append (assoc-ref outputs "out") "/bi/")))
2733 (for-each (lambda (file)
2734 (install-file file bin))
2735 (find-files
2736 "."
2737 "hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))))
da6dd842 2738 (alist-delete 'configure %standard-phases)))))
346a829a
RW
2739 (native-inputs
2740 `(("unzip" ,unzip)))
2741 (inputs
2742 `(("perl" ,perl)
2743 ("python" ,python)
2744 ("zlib" ,zlib)))
60af3d82
RW
2745 ;; Non-portable SSE instructions are used so building fails on platforms
2746 ;; other than x86_64.
2747 (supported-systems '("x86_64-linux"))
346a829a
RW
2748 (home-page "http://ccb.jhu.edu/software/hisat/index.shtml")
2749 (synopsis "Hierarchical indexing for spliced alignment of transcripts")
2750 (description
2751 "HISAT is a fast and sensitive spliced alignment program for mapping
2752RNA-seq reads. In addition to one global FM index that represents a whole
2753genome, HISAT uses a large set of small FM indexes that collectively cover the
2754whole genome. These small indexes (called local indexes) combined with
2755several alignment strategies enable effective alignment of RNA-seq reads, in
2756particular, reads spanning multiple exons.")
2757 (license license:gpl3+)))
2758
c684629f
BW
2759(define-public hmmer
2760 (package
2761 (name "hmmer")
2762 (version "3.1b2")
2763 (source (origin
2764 (method url-fetch)
2765 (uri (string-append
a83e6046 2766 "http://eddylab.org/software/hmmer"
c684629f
BW
2767 (version-prefix version 1) "/"
2768 version "/hmmer-" version ".tar.gz"))
2769 (sha256
2770 (base32
2771 "0djmgc0pfli0jilfx8hql1axhwhqxqb8rxg2r5rg07aw73sfs5nx"))))
2772 (build-system gnu-build-system)
b3546174 2773 (native-inputs `(("perl" ,perl)))
a83e6046 2774 (home-page "http://hmmer.org/")
c684629f
BW
2775 (synopsis "Biosequence analysis using profile hidden Markov models")
2776 (description
2777 "HMMER is used for searching sequence databases for homologs of protein
2778sequences, and for making protein sequence alignments. It implements methods
2779using probabilistic models called profile hidden Markov models (profile
2780HMMs).")
2781 (license (list license:gpl3+
2782 ;; The bundled library 'easel' is distributed
2783 ;; under The Janelia Farm Software License.
2784 (license:non-copyleft
2785 "file://easel/LICENSE"
2786 "See easel/LICENSE in the distribution.")))))
2787
85652f59
RW
2788(define-public htseq
2789 (package
2790 (name "htseq")
2791 (version "0.6.1")
2792 (source (origin
2793 (method url-fetch)
2794 (uri (string-append
2795 "https://pypi.python.org/packages/source/H/HTSeq/HTSeq-"
2796 version ".tar.gz"))
2797 (sha256
2798 (base32
2799 "1i85ppf2j2lj12m0x690qq5nn17xxk23pbbx2c83r8ayb5wngzwv"))))
2800 (build-system python-build-system)
2801 (arguments `(#:python ,python-2)) ; only Python 2 is supported
0536727e
RW
2802 ;; Numpy needs to be propagated when htseq is used as a Python library.
2803 (propagated-inputs
2804 `(("python-numpy" ,python2-numpy)))
578b05d9
RW
2805 (inputs
2806 `(("python-pysam" ,python2-pysam)))
85652f59
RW
2807 (home-page "http://www-huber.embl.de/users/anders/HTSeq/")
2808 (synopsis "Analysing high-throughput sequencing data with Python")
2809 (description
2810 "HTSeq is a Python package that provides infrastructure to process data
2811from high-throughput sequencing assays.")
2812 (license license:gpl3+)))
2813
1ad15c16 2814(define-public java-htsjdk
15a3c3d4 2815 (package
1ad15c16 2816 (name "java-htsjdk")
15a3c3d4
RW
2817 (version "1.129")
2818 (source (origin
2819 (method url-fetch)
2820 (uri (string-append
2821 "https://github.com/samtools/htsjdk/archive/"
2822 version ".tar.gz"))
2823 (file-name (string-append name "-" version ".tar.gz"))
2824 (sha256
2825 (base32
2826 "0asdk9b8jx2ij7yd6apg9qx03li8q7z3ml0qy2r2qczkra79y6fw"))
2827 (modules '((guix build utils)))
2828 ;; remove build dependency on git
2829 (snippet '(substitute* "build.xml"
2830 (("failifexecutionfails=\"true\"")
2831 "failifexecutionfails=\"false\"")))))
10b4a969 2832 (build-system ant-build-system)
15a3c3d4 2833 (arguments
10b4a969
RW
2834 `(#:tests? #f ; test require Internet access
2835 #:make-flags
2836 (list (string-append "-Ddist=" (assoc-ref %outputs "out")
2837 "/share/java/htsjdk/"))
2838 #:build-target "all"
2839 #:phases
2840 (modify-phases %standard-phases
2841 ;; The build phase also installs the jars
2842 (delete 'install))))
15a3c3d4
RW
2843 (home-page "http://samtools.github.io/htsjdk/")
2844 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
2845 (description
2846 "HTSJDK is an implementation of a unified Java library for accessing
2847common file formats, such as SAM and VCF, used for high-throughput
2848sequencing (HTS) data. There are also an number of useful utilities for
2849manipulating HTS data.")
2850 (license license:expat)))
2851
e7c09730
RW
2852(define-public htslib
2853 (package
2854 (name "htslib")
6c737963 2855 (version "1.3.1")
e7c09730
RW
2856 (source (origin
2857 (method url-fetch)
2858 (uri (string-append
2859 "https://github.com/samtools/htslib/releases/download/"
2860 version "/htslib-" version ".tar.bz2"))
2861 (sha256
2862 (base32
6c737963 2863 "1rja282fwdc25ql6izkhdyh8ppw8x2fs0w0js78zgkmqjlikmma9"))))
e7c09730
RW
2864 (build-system gnu-build-system)
2865 (arguments
2866 `(#:phases
2867 (modify-phases %standard-phases
2868 (add-after
2869 'unpack 'patch-tests
2870 (lambda _
2871 (substitute* "test/test.pl"
2872 (("/bin/bash") (which "bash")))
2873 #t)))))
2874 (inputs
2875 `(("zlib" ,zlib)))
2876 (native-inputs
2877 `(("perl" ,perl)))
2878 (home-page "http://www.htslib.org")
2879 (synopsis "C library for reading/writing high-throughput sequencing data")
2880 (description
2881 "HTSlib is a C library for reading/writing high-throughput sequencing
2882data. It also provides the bgzip, htsfile, and tabix utilities.")
2883 ;; Files under cram/ are released under the modified BSD license;
2884 ;; the rest is released under the Expat license
2885 (license (list license:expat license:bsd-3))))
2886
c4325f62
RW
2887(define-public idr
2888 (package
2889 (name "idr")
2890 (version "2.0.0")
2891 (source (origin
2892 (method url-fetch)
2893 (uri (string-append
2894 "https://github.com/nboley/idr/archive/"
2895 version ".tar.gz"))
2896 (file-name (string-append name "-" version ".tar.gz"))
2897 (sha256
2898 (base32
2899 "1k3x44biak00aiv3hpm1yd6nn4hhp7n0qnbs3zh2q9sw7qr1qj5r"))))
2900 (build-system python-build-system)
2901 (arguments
2902 `(#:phases
2903 (modify-phases %standard-phases
2904 (add-after
2905 'install 'wrap-program
2906 (lambda* (#:key inputs outputs #:allow-other-keys)
2907 (let* ((out (assoc-ref outputs "out"))
2908 (python-version (string-take (string-take-right
2909 (assoc-ref inputs "python") 5) 3))
2910 (path (string-join
2911 (map (lambda (name)
2912 (string-append (assoc-ref inputs name)
2913 "/lib/python" python-version
2914 "/site-packages"))
2915 '("python-scipy"
2916 "python-numpy"
2917 "python-matplotlib"))
2918 ":")))
2919 (wrap-program (string-append out "/bin/idr")
2920 `("PYTHONPATH" ":" prefix (,path))))
2921 #t)))))
2922 (inputs
2923 `(("python-scipy" ,python-scipy)
2924 ("python-numpy" ,python-numpy)
2925 ("python-matplotlib" ,python-matplotlib)))
2926 (native-inputs
f3b98f4f 2927 `(("python-cython" ,python-cython)))
c4325f62
RW
2928 (home-page "https://github.com/nboley/idr")
2929 (synopsis "Tool to measure the irreproducible discovery rate (IDR)")
2930 (description
2931 "The IDR (Irreproducible Discovery Rate) framework is a unified approach
2932to measure the reproducibility of findings identified from replicate
2933experiments and provide highly stable thresholds based on reproducibility.")
2934 (license license:gpl3+)))
2935
43c565d2
RW
2936(define-public jellyfish
2937 (package
2938 (name "jellyfish")
2939 (version "2.2.4")
2940 (source (origin
2941 (method url-fetch)
2942 (uri (string-append "https://github.com/gmarcais/Jellyfish/"
2943 "releases/download/v" version
2944 "/jellyfish-" version ".tar.gz"))
2945 (sha256
2946 (base32
2947 "0a6xnynqy2ibfbfz86b9g2m2dgm7f1469pmymkpam333gi3p26nk"))))
2948 (build-system gnu-build-system)
2949 (outputs '("out" ;for library
2950 "ruby" ;for Ruby bindings
2951 "python")) ;for Python bindings
2952 (arguments
2953 `(#:configure-flags
2954 (list (string-append "--enable-ruby-binding="
2955 (assoc-ref %outputs "ruby"))
2956 (string-append "--enable-python-binding="
2957 (assoc-ref %outputs "python")))
2958 #:phases
2959 (modify-phases %standard-phases
2960 (add-before 'check 'set-SHELL-variable
2961 (lambda _
2962 ;; generator_manager.hpp either uses /bin/sh or $SHELL
2963 ;; to run tests.
2964 (setenv "SHELL" (which "bash"))
2965 #t)))))
2966 (native-inputs
2967 `(("bc" ,bc)
2968 ("time" ,time)
2969 ("ruby" ,ruby)
2970 ("python" ,python-2)))
2971 (synopsis "Tool for fast counting of k-mers in DNA")
2972 (description
2973 "Jellyfish is a tool for fast, memory-efficient counting of k-mers in
2974DNA. A k-mer is a substring of length k, and counting the occurrences of all
2975such substrings is a central step in many analyses of DNA sequence. Jellyfish
2976is a command-line program that reads FASTA and multi-FASTA files containing
2977DNA sequences. It outputs its k-mer counts in a binary format, which can be
2978translated into a human-readable text format using the @code{jellyfish dump}
2979command, or queried for specific k-mers with @code{jellyfish query}.")
2980 (home-page "http://www.genome.umd.edu/jellyfish.html")
6e8faf77
EF
2981 ;; From their website: JELLYFISH runs on 64-bit Intel-compatible processors
2982 (supported-systems '("x86_64-linux"))
43c565d2
RW
2983 ;; The combined work is published under the GPLv3 or later. Individual
2984 ;; files such as lib/jsoncpp.cpp are released under the Expat license.
2985 (license (list license:gpl3+ license:expat))))
2986
94ff3157
BW
2987(define-public khmer
2988 (package
2989 (name "khmer")
2990 (version "2.0")
2991 (source
2992 (origin
2993 (method url-fetch)
2994 (uri (pypi-uri "khmer" version))
2995 (sha256
2996 (base32
2997 "0wb05shqh77v00256qlm68vbbx3kl76fyzihszbz5nhanl4ni33a"))
2998 (patches (search-patches "khmer-use-libraries.patch"))))
2999 (build-system python-build-system)
3000 (arguments
3001 `(#:phases
3002 (modify-phases %standard-phases
3003 (add-after 'unpack 'set-paths
3004 (lambda* (#:key inputs outputs #:allow-other-keys)
3005 ;; Delete bundled libraries.
3006 (delete-file-recursively "third-party/zlib")
3007 (delete-file-recursively "third-party/bzip2")
3008 ;; Replace bundled seqan.
3009 (let* ((seqan-all "third-party/seqan")
3010 (seqan-include (string-append
3011 seqan-all "/core/include")))
3012 (delete-file-recursively seqan-all)
3013 (copy-recursively (string-append (assoc-ref inputs "seqan")
3014 "/include/seqan")
3015 (string-append seqan-include "/seqan")))
3016 ;; We do not replace the bundled MurmurHash as the canonical
3017 ;; repository for this code 'SMHasher' is unsuitable for
3018 ;; providing a library. See
3019 ;; https://lists.gnu.org/archive/html/guix-devel/2016-06/msg00977.html
3020 #t))
3021 (add-after 'unpack 'set-cc
3022 (lambda _
3023 (setenv "CC" "gcc")
3024 #t))
3025 ;; It is simpler to test after installation.
3026 (delete 'check)
3027 (add-after 'install 'post-install-check
3028 (lambda* (#:key inputs outputs #:allow-other-keys)
3029 (let ((out (assoc-ref outputs "out")))
3030 (setenv "PATH"
3031 (string-append
3032 (getenv "PATH")
3033 ":"
3034 (assoc-ref outputs "out")
3035 "/bin"))
3036 (setenv "PYTHONPATH"
3037 (string-append
3038 (getenv "PYTHONPATH")
3039 ":"
3040 out
3041 "/lib/python"
3042 (string-take (string-take-right
3043 (assoc-ref inputs "python") 5) 3)
3044 "/site-packages"))
3045 (with-directory-excursion "build"
3046 (zero? (system* "nosetests" "khmer" "--attr"
3047 "!known_failing")))))))))
3048 (native-inputs
3049 `(("seqan" ,seqan)
3050 ("python-nose" ,python-nose)))
3051 (inputs
3052 `(("zlib" ,zlib)
3053 ("bzip2" ,bzip2)
3054 ("python-screed" ,python-screed)
3055 ("python-bz2file" ,python-bz2file)))
3056 (home-page "https://khmer.readthedocs.org/")
3057 (synopsis "K-mer counting, filtering and graph traversal library")
3058 (description "The khmer software is a set of command-line tools for
3059working with DNA shotgun sequencing data from genomes, transcriptomes,
3060metagenomes and single cells. Khmer can make de novo assemblies faster, and
3061sometimes better. Khmer can also identify and fix problems with shotgun
3062data.")
8157af2e
EF
3063 ;; When building on i686, armhf and mips64el, we get the following error:
3064 ;; error: ['khmer', 'khmer.tests', 'oxli'] require 64-bit operating system
3065 (supported-systems '("x86_64-linux"))
94ff3157
BW
3066 (license license:bsd-3)))
3067
d57e6d0f
RW
3068(define-public macs
3069 (package
3070 (name "macs")
0d0bcaa0 3071 (version "2.1.0.20151222")
d57e6d0f
RW
3072 (source (origin
3073 (method url-fetch)
43ec07f1 3074 (uri (pypi-uri "MACS2" version))
d57e6d0f
RW
3075 (sha256
3076 (base32
0d0bcaa0 3077 "1r2hcz6irhcq7lwbafjks98jbn34hv05avgbdjnp6w6mlfjkf8x5"))))
d57e6d0f
RW
3078 (build-system python-build-system)
3079 (arguments
3080 `(#:python ,python-2 ; only compatible with Python 2.7
3081 #:tests? #f)) ; no test target
3082 (inputs
3083 `(("python-numpy" ,python2-numpy)))
d57e6d0f
RW
3084 (home-page "http://github.com/taoliu/MACS/")
3085 (synopsis "Model based analysis for ChIP-Seq data")
3086 (description
3087 "MACS is an implementation of a ChIP-Seq analysis algorithm for
3088identifying transcript factor binding sites named Model-based Analysis of
3089ChIP-Seq (MACS). MACS captures the influence of genome complexity to evaluate
3090the significance of enriched ChIP regions and it improves the spatial
3091resolution of binding sites through combining the information of both
3092sequencing tag position and orientation.")
3093 (license license:bsd-3)))
3094
41ddebdd
BW
3095(define-public mafft
3096 (package
3097 (name "mafft")
71461f88 3098 (version "7.305")
41ddebdd
BW
3099 (source (origin
3100 (method url-fetch)
3101 (uri (string-append
3102 "http://mafft.cbrc.jp/alignment/software/mafft-" version
3103 "-without-extensions-src.tgz"))
3104 (file-name (string-append name "-" version ".tgz"))
3105 (sha256
3106 (base32
71461f88 3107 "0ziim7g58n3z8gppsa713f5fxprl60ldj3xck186z0n9dpp06i8r"))))
41ddebdd
BW
3108 (build-system gnu-build-system)
3109 (arguments
3110 `(#:tests? #f ; no automated tests, though there are tests in the read me
3111 #:make-flags (let ((out (assoc-ref %outputs "out")))
3112 (list (string-append "PREFIX=" out)
3113 (string-append "BINDIR="
3114 (string-append out "/bin"))))
3115 #:phases
3116 (modify-phases %standard-phases
3117 (add-after 'unpack 'enter-dir
101e8f71 3118 (lambda _ (chdir "core") #t))
41ddebdd 3119 (add-after 'enter-dir 'patch-makefile
101e8f71
BW
3120 (lambda _
3121 ;; on advice from the MAFFT authors, there is no need to
3122 ;; distribute mafft-profile, mafft-distance, or
3123 ;; mafft-homologs.rb as they are too "specialised".
3124 (substitute* "Makefile"
3125 ;; remove mafft-homologs.rb from SCRIPTS
3126 (("^SCRIPTS = mafft mafft-homologs.rb")
3127 "SCRIPTS = mafft")
3128 ;; remove mafft-homologs from MANPAGES
3129 (("^MANPAGES = mafft.1 mafft-homologs.1")
3130 "MANPAGES = mafft.1")
3131 ;; remove mafft-distance from PROGS
3132 (("^PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance")
3133 "PROGS = dvtditr dndfast7 dndblast sextet5")
3134 ;; remove mafft-profile from PROGS
3135 (("splittbfast disttbfast tbfast mafft-profile 2cl mccaskillwrap")
3136 "splittbfast disttbfast tbfast f2cl mccaskillwrap")
3137 (("^rm -f mafft-profile mafft-profile.exe") "#")
3138 (("^rm -f mafft-distance mafft-distance.exe") ")#")
3139 ;; do not install MAN pages in libexec folder
3140 (("^\t\\$\\(INSTALL\\) -m 644 \\$\\(MANPAGES\\) \
41ddebdd 3141\\$\\(DESTDIR\\)\\$\\(LIBDIR\\)") "#"))
101e8f71 3142 #t))
02f35bb5
BW
3143 (add-after 'enter-dir 'patch-paths
3144 (lambda* (#:key inputs #:allow-other-keys)
3145 (substitute* '("pairash.c"
3146 "mafft.tmpl")
3147 (("perl") (which "perl"))
3148 (("([\"`| ])awk" _ prefix)
3149 (string-append prefix (which "awk")))
3150 (("grep") (which "grep")))
3151 #t))
101e8f71
BW
3152 (delete 'configure)
3153 (add-after 'install 'wrap-programs
3154 (lambda* (#:key outputs #:allow-other-keys)
3155 (let* ((out (assoc-ref outputs "out"))
3156 (bin (string-append out "/bin"))
3157 (path (string-append
3158 (assoc-ref %build-inputs "coreutils") "/bin:")))
3159 (for-each (lambda (file)
3160 (wrap-program file
3161 `("PATH" ":" prefix (,path))))
3162 (find-files bin)))
3163 #t)))))
41ddebdd 3164 (inputs
02f35bb5 3165 `(("perl" ,perl)
71461f88 3166 ("ruby" ,ruby)
02f35bb5 3167 ("gawk" ,gawk)
101e8f71
BW
3168 ("grep" ,grep)
3169 ("coreutils" ,coreutils)))
41ddebdd
BW
3170 (home-page "http://mafft.cbrc.jp/alignment/software/")
3171 (synopsis "Multiple sequence alignment program")
3172 (description
3173 "MAFFT offers a range of multiple alignment methods for nucleotide and
3174protein sequences. For instance, it offers L-INS-i (accurate; for alignment
3175of <~200 sequences) and FFT-NS-2 (fast; for alignment of <~30,000
3176sequences).")
3177 (license (license:non-copyleft
3178 "http://mafft.cbrc.jp/alignment/software/license.txt"
3179 "BSD-3 with different formatting"))))
8fd790eb 3180
84be3b99
MB
3181(define-public mash
3182 (package
3183 (name "mash")
3184 (version "1.1.1")
3185 (source (origin
3186 (method url-fetch)
3187 (uri (string-append
3188 "https://github.com/marbl/mash/archive/v"
3189 version ".tar.gz"))
3190 (file-name (string-append name "-" version ".tar.gz"))
3191 (sha256
3192 (base32
3193 "08znbvqq5xknfhmpp3wcj574zvi4p7i8zifi67c9qw9a6ikp42fj"))
3194 (modules '((guix build utils)))
3195 (snippet
3196 ;; Delete bundled kseq.
3197 ;; TODO: Also delete bundled murmurhash and open bloom filter.
3198 '(delete-file "src/mash/kseq.h"))))
3199 (build-system gnu-build-system)
3200 (arguments
3201 `(#:tests? #f ; No tests.
3202 #:configure-flags
3203 (list
3204 (string-append "--with-capnp=" (assoc-ref %build-inputs "capnproto"))
3205 (string-append "--with-gsl=" (assoc-ref %build-inputs "gsl")))
3206 #:make-flags (list "CC=gcc")
3207 #:phases
3208 (modify-phases %standard-phases
3209 (add-after 'unpack 'fix-includes
3210 (lambda _
3211 (substitute* '("src/mash/Sketch.cpp" "src/mash/CommandFind.cpp")
3212 (("^#include \"kseq\\.h\"")
3213 "#include \"htslib/kseq.h\""))
3214 #t))
3215 (add-before 'configure 'autoconf
3216 (lambda _ (zero? (system* "autoconf")))))))
3217 (native-inputs
3218 `(("autoconf" ,autoconf)
3219 ;; Capnproto and htslib are statically embedded in the final
3220 ;; application. Therefore we also list their licenses, below.
3221 ("capnproto" ,capnproto)
3222 ("htslib" ,htslib)))
3223 (inputs
3224 `(("gsl" ,gsl)
3225 ("zlib" ,zlib)))
3226 (supported-systems '("x86_64-linux"))
3227 (home-page "https://mash.readthedocs.io")
3228 (synopsis "Fast genome and metagenome distance estimation using MinHash")
3229 (description "Mash is a fast sequence distance estimator that uses the
3230MinHash algorithm and is designed to work with genomes and metagenomes in the
3231form of assemblies or reads.")
3232 (license (list license:bsd-3 ; Mash
3233 license:expat ; HTSlib and capnproto
3234 license:public-domain ; MurmurHash 3
3235 license:cpl1.0)))) ; Open Bloom Filter
3236
8fd790eb 3237(define-public metabat
2c3eb4b8
BW
3238 ;; We package from a git commit because compilation of the released version
3239 ;; fails.
3240 (let ((commit "cbdca756993e66ae57e50a27970595dda9cbde1b"))
3241 (package
3242 (name "metabat")
3243 (version (string-append "0.32.4-1." (string-take commit 8)))
3244 (source
3245 (origin
3246 (method git-fetch)
3247 (uri (git-reference
3248 (url "https://bitbucket.org/berkeleylab/metabat.git")
3249 (commit commit)))
3250 (file-name (string-append name "-" version))
3251 (sha256
3252 (base32
3253 "0byia8nsip6zvc4ha0qkxkxxyjf4x7jcvy48q2dvb0pzr989syzr"))
3254 (patches (search-patches "metabat-remove-compilation-date.patch"))))
8fd790eb
BW
3255 (build-system gnu-build-system)
3256 (arguments
3257 `(#:phases
3258 (modify-phases %standard-phases
3259 (add-after 'unpack 'fix-includes
45469ebe
BW
3260 (lambda _
3261 (substitute* "src/BamUtils.h"
3262 (("^#include \"bam/bam\\.h\"")
3263 "#include \"samtools/bam.h\"")
3264 (("^#include \"bam/sam\\.h\"")
3265 "#include \"samtools/sam.h\""))
3266 (substitute* "src/KseqReader.h"
3267 (("^#include \"bam/kseq\\.h\"")
3268 "#include \"htslib/kseq.h\""))
3269 #t))
8fd790eb 3270 (add-after 'unpack 'fix-scons
45469ebe
BW
3271 (lambda* (#:key inputs #:allow-other-keys)
3272 (substitute* "SConstruct"
3273 (("^htslib_dir = 'samtools'")
3274 (string-append "hitslib_dir = '"
3275 (assoc-ref inputs "htslib")
3276 "'"))
3277 (("^samtools_dir = 'samtools'")
3278 (string-append "samtools_dir = '"
3279 (assoc-ref inputs "htslib")
3280 "'"))
3281 (("^findStaticOrShared\\('bam', hts_lib")
3282 (string-append "findStaticOrShared('bam', '"
3283 (assoc-ref inputs "samtools")
3284 "/lib'"))
3285 ;; Do not distribute README.
3286 (("^env\\.Install\\(idir_prefix, 'README\\.md'\\)") ""))
3287 #t))
8fd790eb
BW
3288 (delete 'configure)
3289 (replace 'build
3290 (lambda* (#:key inputs outputs #:allow-other-keys)
3291 (mkdir (assoc-ref outputs "out"))
3292 (zero? (system* "scons"
3293 (string-append
3294 "PREFIX="
3295 (assoc-ref outputs "out"))
8fd790eb
BW
3296 (string-append
3297 "BOOST_ROOT="
3298 (assoc-ref inputs "boost"))
3299 "install"))))
45469ebe 3300 ;; Check and install are carried out during build phase.
8fd790eb
BW
3301 (delete 'check)
3302 (delete 'install))))
3303 (inputs
3304 `(("zlib" ,zlib)
3305 ("perl" ,perl)
3306 ("samtools" ,samtools)
3307 ("htslib" ,htslib)
3308 ("boost" ,boost)))
3309 (native-inputs
3310 `(("scons" ,scons)))
3311 (home-page "https://bitbucket.org/berkeleylab/metabat")
3312 (synopsis
3313 "Reconstruction of single genomes from complex microbial communities")
3314 (description
3315 "Grouping large genomic fragments assembled from shotgun metagenomic
3316sequences to deconvolute complex microbial communities, or metagenome binning,
3317enables the study of individual organisms and their interactions. MetaBAT is
3318an automated metagenome binning software, which integrates empirical
3319probabilistic distances of genome abundance and tetranucleotide frequency.")
3320 (license (license:non-copyleft "file://license.txt"
2c3eb4b8 3321 "See license.txt in the distribution.")))))
8fd790eb 3322
318c0aee
MB
3323(define-public minced
3324 (package
3325 (name "minced")
3326 (version "0.2.0")
3327 (source (origin
3328 (method url-fetch)
3329 (uri (string-append
3330 "https://github.com/ctSkennerton/minced/archive/"
3331 version ".tar.gz"))
3332 (file-name (string-append name "-" version ".tar.gz"))
3333 (sha256
3334 (base32
3335 "0wxmlsapxfpxfd3ps9636h7i2xy6la8i42mwh0j2lsky63h63jp1"))))
3336 (build-system gnu-build-system)
3337 (arguments
3338 `(#:test-target "test"
3339 #:phases
3340 (modify-phases %standard-phases
3341 (delete 'configure)
3342 (add-before 'check 'fix-test
3343 (lambda _
3344 ;; Fix test for latest version.
3345 (substitute* "t/Aquifex_aeolicus_VF5.expected"
3346 (("minced:0.1.6") "minced:0.2.0"))
3347 #t))
3348 (replace 'install ; No install target.
3349 (lambda* (#:key inputs outputs #:allow-other-keys)
3350 (let* ((out (assoc-ref outputs "out"))
3351 (bin (string-append out "/bin"))
3352 (wrapper (string-append bin "/minced")))
3353 ;; Minced comes with a wrapper script that tries to figure out where
3354 ;; it is located before running the JAR. Since these paths are known
3355 ;; to us, we build our own wrapper to avoid coreutils dependency.
3356 (install-file "minced.jar" bin)
3357 (with-output-to-file wrapper
3358 (lambda _
3359 (display
3360 (string-append
3361 "#!" (assoc-ref inputs "bash") "/bin/sh\n\n"
3362 (assoc-ref inputs "jre") "/bin/java -jar "
3363 bin "/minced.jar \"$@\"\n"))))
3364 (chmod wrapper #o555)))))))
3365 (native-inputs
3366 `(("jdk" ,icedtea "jdk")))
3367 (inputs
3368 `(("bash" ,bash)
3369 ("jre" ,icedtea "out")))
3370 (home-page "https://github.com/ctSkennerton/minced")
3371 (synopsis "Mining CRISPRs in Environmental Datasets")
3372 (description
3373 "MinCED is a program to find Clustered Regularly Interspaced Short
3374Palindromic Repeats (CRISPRs) in DNA sequences. It can be used for
3375unassembled metagenomic reads, but is mainly designed for full genomes and
3376assembled metagenomic sequence.")
3377 (license license:gpl3+)))
3378
ddd82e0e
RW
3379(define-public miso
3380 (package
3381 (name "miso")
3382 (version "0.5.3")
3383 (source (origin
3384 (method url-fetch)
3385 (uri (string-append
86517de6 3386 "https://pypi.python.org/packages/source/m/misopy/misopy-"
ddd82e0e
RW
3387 version ".tar.gz"))
3388 (sha256
3389 (base32
3390 "0x446867az8ir0z8c1vjqffkp0ma37wm4sylixnkhgawllzx8v5w"))
3391 (modules '((guix build utils)))
ddd82e0e
RW
3392 (snippet
3393 '(substitute* "setup.py"
0ec8b206
RW
3394 ;; Use setuptools, or else the executables are not
3395 ;; installed.
3396 (("distutils.core") "setuptools")
3397 ;; use "gcc" instead of "cc" for compilation
ddd82e0e
RW
3398 (("^defines")
3399 "cc.set_executables(
3400compiler='gcc',
3401compiler_so='gcc',
3402linker_exe='gcc',
3403linker_so='gcc -shared'); defines")))))
3404 (build-system python-build-system)
3405 (arguments
3406 `(#:python ,python-2 ; only Python 2 is supported
3407 #:tests? #f)) ; no "test" target
3408 (inputs
3409 `(("samtools" ,samtools)
3410 ("python-numpy" ,python2-numpy)
3411 ("python-pysam" ,python2-pysam)
3412 ("python-scipy" ,python2-scipy)
3413 ("python-matplotlib" ,python2-matplotlib)))
3414 (native-inputs
f3b98f4f
HG
3415 `(("python-mock" ,python2-mock) ;for tests
3416 ("python-pytz" ,python2-pytz))) ;for tests
ddd82e0e
RW
3417 (home-page "http://genes.mit.edu/burgelab/miso/index.html")
3418 (synopsis "Mixture of Isoforms model for RNA-Seq isoform quantitation")
3419 (description
3420 "MISO (Mixture-of-Isoforms) is a probabilistic framework that quantitates
3421the expression level of alternatively spliced genes from RNA-Seq data, and
3422identifies differentially regulated isoforms or exons across samples. By
3423modeling the generative process by which reads are produced from isoforms in
3424RNA-Seq, the MISO model uses Bayesian inference to compute the probability
3425that a read originated from a particular isoform.")
3426 (license license:gpl2)))
3427
324efb88
BW
3428(define-public muscle
3429 (package
3430 (name "muscle")
3431 (version "3.8.1551")
3432 (source (origin
3433 (method url-fetch/tarbomb)
3434 (file-name (string-append name "-" version))
3435 (uri (string-append
3436 "http://www.drive5.com/muscle/muscle_src_"
3437 version ".tar.gz"))
3438 (sha256
3439 (base32
3440 "0bj8kj7sdizy3987zx6w7axihk40fk8rn76mpbqqjcnd64i5a367"))))
3441 (build-system gnu-build-system)
3442 (arguments
3443 `(#:make-flags (list "LDLIBS = -lm")
3444 #:phases
3445 (modify-phases %standard-phases
3446 (delete 'configure)
3447 (replace 'check
3448 ;; There are no tests, so just test if it runs.
3449 (lambda _ (zero? (system* "./muscle" "-version"))))
3450 (replace 'install
3451 (lambda* (#:key outputs #:allow-other-keys)
3452 (let* ((out (assoc-ref outputs "out"))
3453 (bin (string-append out "/bin")))
3454 (install-file "muscle" bin)))))))
3455 (home-page "http://www.drive5.com/muscle")
3456 (synopsis "Multiple sequence alignment program")
3457 (description
3458 "MUSCLE aims to be a fast and accurate multiple sequence alignment
3459program for nucleotide and protein sequences.")
3460 ;; License information found in 'muscle -h' and usage.cpp.
3461 (license license:public-domain)))
3462
1e44cf8b
BW
3463(define-public orfm
3464 (package
3465 (name "orfm")
6b6f7d6a 3466 (version "0.5.3")
1e44cf8b
BW
3467 (source (origin
3468 (method url-fetch)
3469 (uri (string-append
3470 "https://github.com/wwood/OrfM/releases/download/v"
3471 version "/orfm-" version ".tar.gz"))
3472 (sha256
3473 (base32
6b6f7d6a 3474 "0vb6d771gl4mix8bwx919x5ayy9pkj44n7ki336nz3rz2rx4c7gk"))))
1e44cf8b
BW
3475 (build-system gnu-build-system)
3476 (inputs `(("zlib" ,zlib)))
6b6f7d6a
BW
3477 (native-inputs
3478 `(("ruby-bio-commandeer" ,ruby-bio-commandeer)
3479 ("ruby-rspec" ,ruby-rspec)
3480 ("ruby" ,ruby)))
1e44cf8b
BW
3481 (synopsis "Simple and not slow open reading frame (ORF) caller")
3482 (description
6b6f7d6a 3483 "An ORF caller finds stretches of DNA that, when translated, are not
1e44cf8b
BW
3484interrupted by stop codons. OrfM finds and prints these ORFs.")
3485 (home-page "https://github.com/wwood/OrfM")
3486 (license license:lgpl3+)))
3487
19ee9201
RW
3488(define-public python2-pbcore
3489 (package
3490 (name "python2-pbcore")
e301bfc8 3491 (version "1.2.10")
19ee9201
RW
3492 (source (origin
3493 (method url-fetch)
ddb83129 3494 (uri (pypi-uri "pbcore" version))
19ee9201
RW
3495 (sha256
3496 (base32
e301bfc8 3497 "1kjmv891d6qbpp4shhhvkl02ff4q5xlpnls2513sm2cjcrs52f1i"))))
19ee9201
RW
3498 (build-system python-build-system)
3499 (arguments `(#:python ,python-2)) ; pbcore requires Python 2.7
3500 (inputs
3501 `(("python-cython" ,python2-cython)
3502 ("python-numpy" ,python2-numpy)
3503 ("python-pysam" ,python2-pysam)
3504 ("python-h5py" ,python2-h5py)))
3505 (native-inputs
ddb83129
EF
3506 `(("python-docutils" ,python2-docutils)
3507 ("python-nose" ,python2-nose)
ddb83129 3508 ("python-sphinx" ,python2-sphinx)))
e301bfc8
MB
3509 (propagated-inputs
3510 `(("python-pyxb" ,python2-pyxb)))
19ee9201
RW
3511 (home-page "http://pacificbiosciences.github.io/pbcore/")
3512 (synopsis "Library for reading and writing PacBio data files")
3513 (description
3514 "The pbcore package provides Python APIs for interacting with PacBio data
3515files and writing bioinformatics applications.")
3516 (license license:bsd-3)))
3517
c61fe02c
RW
3518(define-public python2-warpedlmm
3519 (package
3520 (name "python2-warpedlmm")
3521 (version "0.21")
3522 (source
3523 (origin
3524 (method url-fetch)
3525 (uri (string-append
3526 "https://pypi.python.org/packages/source/W/WarpedLMM/WarpedLMM-"
3527 version ".zip"))
3528 (sha256
3529 (base32
3530 "1agfz6zqa8nc6cw47yh0s3y14gkpa9wqazwcj7mwwj3ffnw39p3j"))))
3531 (build-system python-build-system)
3532 (arguments
3533 `(#:python ,python-2 ; requires Python 2.7
3534 #:phases
3535 (modify-phases %standard-phases
3536 (add-after
3537 'install 'remove-bin-directory
3538 (lambda* (#:key outputs #:allow-other-keys)
3539 ;; The "bin" directory only contains wrappers for running
3540 ;; the module tests. They are not needed after the
3541 ;; "check" phase.
3542 (delete-file-recursively
3543 (string-append (assoc-ref outputs "out") "/bin"))
3544 #t)))))
3545 (propagated-inputs
3546 `(("python-scipy" ,python2-scipy)
3547 ("python-numpy" ,python2-numpy)
3548 ("python-matplotlib" ,python2-matplotlib)
3549 ("python-fastlmm" ,python2-fastlmm)
3550 ("python-pandas" ,python2-pandas)
3551 ("python-pysnptools" ,python2-pysnptools)))
3552 (native-inputs
f3b98f4f 3553 `(("python-mock" ,python2-mock)
c61fe02c
RW
3554 ("python-nose" ,python2-nose)
3555 ("unzip" ,unzip)))
3556 (home-page "https://github.com/PMBio/warpedLMM")
3557 (synopsis "Implementation of warped linear mixed models")
3558 (description
3559 "WarpedLMM is a Python implementation of the warped linear mixed model,
3560which automatically learns an optimal warping function (or transformation) for
3561the phenotype as it models the data.")
3562 (license license:asl2.0)))
3563
2c16316e 3564(define-public pbtranscript-tofu
698bd297 3565 (let ((commit "8f5467fe6a4472bcfb4226c8720993c8507adfe4"))
2c16316e
RW
3566 (package
3567 (name "pbtranscript-tofu")
698bd297 3568 (version (string-append "2.2.3." (string-take commit 7)))
2c16316e
RW
3569 (source (origin
3570 (method git-fetch)
3571 (uri (git-reference
3572 (url "https://github.com/PacificBiosciences/cDNA_primer.git")
3573 (commit commit)))
9a067efd 3574 (file-name (string-append name "-" version "-checkout"))
2c16316e
RW
3575 (sha256
3576 (base32
9a067efd
RW
3577 "1lgnpi35ihay42qx0b6yl3kkgra723i413j33kvs0kvs61h82w0f"))
3578 (modules '((guix build utils)))
3579 (snippet
3580 '(begin
3581 ;; remove bundled Cython sources
3582 (delete-file "pbtranscript-tofu/pbtranscript/Cython-0.20.1.tar.gz")
3583 #t))))
2c16316e
RW
3584 (build-system python-build-system)
3585 (arguments
3586 `(#:python ,python-2
3587 ;; With standard flags, the install phase attempts to create a zip'd
3588 ;; egg file, and fails with an error: 'ZIP does not support timestamps
3589 ;; before 1980'
3590 #:configure-flags '("--single-version-externally-managed"
3591 "--record=pbtranscript-tofu.txt")
3592 #:phases
9a067efd
RW
3593 (modify-phases %standard-phases
3594 (add-after 'unpack 'enter-directory
3595 (lambda _
3596 (chdir "pbtranscript-tofu/pbtranscript/")
3597 #t))
3598 ;; With setuptools version 18.0 and later this setup.py hack causes
3599 ;; a build error, so we disable it.
3600 (add-after 'enter-directory 'patch-setuppy
3601 (lambda _
3602 (substitute* "setup.py"
3603 (("if 'setuptools.extension' in sys.modules:")
3604 "if False:"))
3605 #t)))))
2c16316e 3606 (inputs
9a067efd 3607 `(("python-numpy" ,python2-numpy)
2c16316e 3608 ("python-bx-python" ,python2-bx-python)
c5372108
RW
3609 ("python-networkx" ,python2-networkx)
3610 ("python-scipy" ,python2-scipy)
9a067efd
RW
3611 ("python-pbcore" ,python2-pbcore)
3612 ("python-h5py" ,python2-h5py)))
2c16316e 3613 (native-inputs
9a067efd 3614 `(("python-cython" ,python2-cython)
f3b98f4f 3615 ("python-nose" ,python2-nose)))
2c16316e
RW
3616 (home-page "https://github.com/PacificBiosciences/cDNA_primer")
3617 (synopsis "Analyze transcriptome data generated with the Iso-Seq protocol")
3618 (description
3619 "pbtranscript-tofu contains scripts to analyze transcriptome data
3620generated using the PacBio Iso-Seq protocol.")
3621 (license license:bsd-3))))
3622
024130d2
BW
3623(define-public prank
3624 (package
3625 (name "prank")
3626 (version "150803")
3627 (source (origin
3628 (method url-fetch)
3629 (uri (string-append
3630 "http://wasabiapp.org/download/prank/prank.source."
3631 version ".tgz"))
3632 (sha256
3633 (base32
3634 "0am4z94fs3w2n5xpfls9zda61vq7qqz4q2i7b9hlsxz5q4j3kfm4"))))
3635 (build-system gnu-build-system)
3636 (arguments
3637 `(#:phases
3638 (modify-phases %standard-phases
3639 (add-after 'unpack 'enter-src-dir
3640 (lambda _
3641 (chdir "src")
3642 #t))
62d00095
EF
3643 (add-after 'unpack 'remove-m64-flag
3644 ;; Prank will build with the correct 'bit-ness' without this flag
3645 ;; and this allows building on 32-bit machines.
3646 (lambda _ (substitute* "src/Makefile"
3647 (("-m64") ""))
3648 #t))
024130d2
BW
3649 (delete 'configure)
3650 (replace 'install
3651 (lambda* (#:key outputs #:allow-other-keys)
3652 (let* ((out (assoc-ref outputs "out"))
3653 (bin (string-append out "/bin"))
3654 (man (string-append out "/share/man/man1"))
3655 (path (string-append
3656 (assoc-ref %build-inputs "mafft") "/bin:"
3657 (assoc-ref %build-inputs "exonerate") "/bin:"
3658 (assoc-ref %build-inputs "bppsuite") "/bin")))
3659 (install-file "prank" bin)
3660 (wrap-program (string-append bin "/prank")
3661 `("PATH" ":" prefix (,path)))
3662 (install-file "prank.1" man))
3663 #t)))))
3664 (inputs
3665 `(("mafft" ,mafft)
3666 ("exonerate" ,exonerate)
3667 ("bppsuite" ,bppsuite)))
3668 (home-page "http://wasabiapp.org/software/prank/")
3669 (synopsis "Probabilistic multiple sequence alignment program")
3670 (description
3671 "PRANK is a probabilistic multiple sequence alignment program for DNA,
3672codon and amino-acid sequences. It is based on a novel algorithm that treats
3673insertions correctly and avoids over-estimation of the number of deletion
3674events. In addition, PRANK borrows ideas from maximum likelihood methods used
3675in phylogenetics and correctly takes into account the evolutionary distances
3676between sequences. Lastly, PRANK allows for defining a potential structure
3677for sequences to be aligned and then, simultaneously with the alignment,
3678predicts the locations of structural units in the sequences.")
3679 (license license:gpl2+)))
3680
846e3409
RW
3681(define-public pyicoteo
3682 (package
3683 (name "pyicoteo")
3684 (version "2.0.7")
3685 (source
3686 (origin
3687 (method url-fetch)
3688 (uri (string-append "https://bitbucket.org/regulatorygenomicsupf/"
3689 "pyicoteo/get/v" version ".tar.bz2"))
3690 (file-name (string-append name "-" version ".tar.bz2"))
3691 (sha256
3692 (base32
3693 "0d6087f29xp8wxwlj111c3sylli98n0l8ry58c51ixzq0zfm50wa"))))
3694 (build-system python-build-system)
3695 (arguments
3696 `(#:python ,python-2 ; does not work with Python 3
3697 #:tests? #f)) ; there are no tests
3698 (inputs
3699 `(("python2-matplotlib" ,python2-matplotlib)))
3700 (home-page "https://bitbucket.org/regulatorygenomicsupf/pyicoteo")
3701 (synopsis "Analyze high-throughput genetic sequencing data")
3702 (description
3703 "Pyicoteo is a suite of tools for the analysis of high-throughput genetic
3704sequencing data. It works with genomic coordinates. There are currently six
3705different command-line tools:
3706
3707@enumerate
3708@item pyicoregion: for generating exploratory regions automatically;
3709@item pyicoenrich: for differential enrichment between two conditions;
3710@item pyicoclip: for calling CLIP-Seq peaks without a control;
3711@item pyicos: for genomic coordinates manipulation;
3712@item pyicoller: for peak calling on punctuated ChIP-Seq;
3713@item pyicount: to count how many reads from N experiment files overlap in a
3714 region file;
3715@item pyicotrocol: to combine operations from pyicoteo.
3716@end enumerate\n")
3717 (license license:gpl3+)))
3718
af860475
BW
3719(define-public prodigal
3720 (package
3721 (name "prodigal")
e70f7a23 3722 (version "2.6.3")
af860475
BW
3723 (source (origin
3724 (method url-fetch)
3725 (uri (string-append
3726 "https://github.com/hyattpd/Prodigal/archive/v"
3727 version ".tar.gz"))
3728 (file-name (string-append name "-" version ".tar.gz"))
3729 (sha256
3730 (base32
e70f7a23 3731 "17srxkqd3jc77xk15pfbgg1a9xahqg7337w95mrsia7mpza4l2c9"))))
af860475
BW
3732 (build-system gnu-build-system)
3733 (arguments
3734 `(#:tests? #f ;no check target
3735 #:make-flags (list (string-append "INSTALLDIR="
3736 (assoc-ref %outputs "out")
3737 "/bin"))
3738 #:phases
3739 (modify-phases %standard-phases
3740 (delete 'configure))))
3741 (home-page "http://prodigal.ornl.gov")
3742 (synopsis "Protein-coding gene prediction for Archaea and Bacteria")
3743 (description
3744 "Prodigal runs smoothly on finished genomes, draft genomes, and
3745metagenomes, providing gene predictions in GFF3, Genbank, or Sequin table
3746format. It runs quickly, in an unsupervised fashion, handles gaps, handles
3747partial genes, and identifies translation initiation sites.")
3748 (license license:gpl3+)))
608dd932 3749
ceb62d54
BW
3750(define-public roary
3751 (package
3752 (name "roary")
3753 (version "3.6.8")
3754 (source
3755 (origin
3756 (method url-fetch)
3757 (uri (string-append
3758 "mirror://cpan/authors/id/A/AJ/AJPAGE/Bio-Roary-"
3759 version ".tar.gz"))
3760 (sha256
3761 (base32
3762 "0g0pzcv8y7n2w8q7c9q0a7s2ghkwci6w8smg9mjw4agad5cd7yaw"))))
3763 (build-system perl-build-system)
3764 (arguments
3765 `(#:phases
3766 (modify-phases %standard-phases
3767 (delete 'configure)
3768 (delete 'build)
3769 (replace 'check
3770 (lambda _
3771 ;; The tests are not run by default, so we run each test file
3772 ;; directly.
3773 (setenv "PATH" (string-append (getcwd) "/bin" ":"
3774 (getenv "PATH")))
3775 (setenv "PERL5LIB" (string-append (getcwd) "/lib" ":"
3776 (getenv "PERL5LIB")))
3777 (zero? (length (filter (lambda (file)
3778 (display file)(display "\n")
3779 (not (zero? (system* "perl" file))))
3780 (find-files "t" ".*\\.t$"))))))
3781 (replace 'install
3782 ;; There is no 'install' target in the Makefile.
3783 (lambda* (#:key outputs #:allow-other-keys)
3784 (let* ((out (assoc-ref outputs "out"))
3785 (bin (string-append out "/bin"))
3786 (perl (string-append out "/lib/perl5/site_perl"))
3787 (roary-plots "contrib/roary_plots"))
3788 (mkdir-p bin)
3789 (mkdir-p perl)
3790 (copy-recursively "bin" bin)
3791 (copy-recursively "lib" perl)
3792 #t)))
3793 (add-after 'install 'wrap-programs
3794 (lambda* (#:key inputs outputs #:allow-other-keys)
3795 (let* ((out (assoc-ref outputs "out"))
3796 (perl5lib (getenv "PERL5LIB"))
3797 (path (getenv "PATH")))
3798 (for-each (lambda (prog)
3799 (let ((binary (string-append out "/" prog)))
3800 (wrap-program binary
3801 `("PERL5LIB" ":" prefix
3802 (,(string-append perl5lib ":" out
3803 "/lib/perl5/site_perl"))))
3804 (wrap-program binary
3805 `("PATH" ":" prefix
3806 (,(string-append path ":" out "/bin"))))))
3807 (find-files "bin" ".*[^R]$"))
3808 (let ((file
3809 (string-append out "/bin/roary-create_pan_genome_plots.R"))
3810 (r-site-lib (getenv "R_LIBS_SITE"))
3811 (coreutils-path
3812 (string-append (assoc-ref inputs "coreutils") "/bin")))
3813 (wrap-program file
3814 `("R_LIBS_SITE" ":" prefix
3815 (,(string-append r-site-lib ":" out "/site-library/"))))
3816 (wrap-program file
3817 `("PATH" ":" prefix
3818 (,(string-append coreutils-path ":" out "/bin"))))))
3819 #t)))))
3820 (native-inputs
3821 `(("perl-env-path" ,perl-env-path)
3822 ("perl-test-files" ,perl-test-files)
3823 ("perl-test-most" ,perl-test-most)
3824 ("perl-test-output" ,perl-test-output)))
3825 (inputs
3826 `(("perl-array-utils" ,perl-array-utils)
3827 ("bioperl" ,bioperl-minimal)
3828 ("perl-exception-class" ,perl-exception-class)
3829 ("perl-file-find-rule" ,perl-file-find-rule)
3830 ("perl-file-grep" ,perl-file-grep)
3831 ("perl-file-slurper" ,perl-file-slurper)
3832 ("perl-file-which" ,perl-file-which)
3833 ("perl-graph" ,perl-graph)
3834 ("perl-graph-readwrite" ,perl-graph-readwrite)
3835 ("perl-log-log4perl" ,perl-log-log4perl)
3836 ("perl-moose" ,perl-moose)
3837 ("perl-perlio-utf8_strict" ,perl-perlio-utf8_strict)
3838 ("perl-text-csv" ,perl-text-csv)
3839 ("bedtools" ,bedtools)
3840 ("cd-hit" ,cd-hit)
3841 ("blast+" ,blast+)
3842 ("mcl" ,mcl)
3843 ("parallel" ,parallel)
3844 ("prank" ,prank)
3845 ("mafft" ,mafft)
3846 ("fasttree" ,fasttree)
3847 ("grep" ,grep)
3848 ("sed" ,sed)
3849 ("gawk" ,gawk)
3850 ("r" ,r)
3851 ("r-ggplot2" ,r-ggplot2)
3852 ("coreutils" ,coreutils)))
3853 (home-page "http://sanger-pathogens.github.io/Roary")
3854 (synopsis "High speed stand-alone pan genome pipeline")
3855 (description
3856 "Roary is a high speed stand alone pan genome pipeline, which takes
3857annotated assemblies in GFF3 format (produced by the Prokka program) and
3858calculates the pan genome. Using a standard desktop PC, it can analyse
3859datasets with thousands of samples, without compromising the quality of the
3860results. 128 samples can be analysed in under 1 hour using 1 GB of RAM and a
3861single processor. Roary is not intended for metagenomics or for comparing
3862extremely diverse sets of genomes.")
3863 (license license:gpl3)))
3864
608dd932
BW
3865(define-public raxml
3866 (package
3867 (name "raxml")
3868 (version "8.2.9")
3869 (source
3870 (origin
3871 (method url-fetch)
3872 (uri
3873 (string-append
3874 "https://github.com/stamatak/standard-RAxML/archive/v"
3875 version ".tar.gz"))
3876 (file-name (string-append name "-" version ".tar.gz"))
3877 (sha256
3878 (base32
3879 "1pv8p2fy67y21a9y4cm7xpvxqjwz2v4201flfjshdq1p8j52rqf7"))))
3880 (build-system gnu-build-system)
3881 (arguments
3882 `(#:tests? #f ; There are no tests.
3883 ;; Use 'standard' Makefile rather than SSE or AVX ones.
3884 #:make-flags (list "-f" "Makefile.HYBRID.gcc")
3885 #:phases
3886 (modify-phases %standard-phases
3887 (delete 'configure)
3888 (replace 'install
3889 (lambda* (#:key outputs #:allow-other-keys)
3890 (let* ((out (assoc-ref outputs "out"))
3891 (bin (string-append out "/bin"))
3892 (executable "raxmlHPC-HYBRID"))
3893 (install-file executable bin)
3894 (symlink (string-append bin "/" executable) "raxml"))
3895 #t)))))
3896 (inputs
3897 `(("openmpi" ,openmpi)))
3898 (home-page "http://sco.h-its.org/exelixis/web/software/raxml/index.html")
3899 (synopsis "Randomized Axelerated Maximum Likelihood phylogenetic trees")
3900 (description
3901 "RAxML is a tool for phylogenetic analysis and post-analysis of large
3902phylogenies.")
3903 (license license:gpl2+)))
af860475 3904
66e3eff1
RW
3905(define-public rsem
3906 (package
3907 (name "rsem")
3908 (version "1.2.20")
3909 (source
3910 (origin
3911 (method url-fetch)
3912 (uri
3913 (string-append "http://deweylab.biostat.wisc.edu/rsem/src/rsem-"
3914 version ".tar.gz"))
3915 (sha256
3916 (base32 "0nzdc0j0hjllhsd5f2xli95dafm3nawskigs140xzvjk67xh0r9q"))
fc1adab1 3917 (patches (search-patches "rsem-makefile.patch"))
66e3eff1
RW
3918 (modules '((guix build utils)))
3919 (snippet
3920 '(begin
3921 ;; remove bundled copy of boost
3922 (delete-file-recursively "boost")
3923 #t))))
3924 (build-system gnu-build-system)
3925 (arguments
3926 `(#:tests? #f ;no "check" target
3927 #:phases
3928 (modify-phases %standard-phases
3929 ;; No "configure" script.
3930 ;; Do not build bundled samtools library.
3931 (replace 'configure
3932 (lambda _
3933 (substitute* "Makefile"
3934 (("^all : sam/libbam.a") "all : "))
3935 #t))
3936 (replace 'install
3937 (lambda* (#:key outputs #:allow-other-keys)
3938 (let* ((out (string-append (assoc-ref outputs "out")))
3939 (bin (string-append out "/bin/"))
3940 (perl (string-append out "/lib/perl5/site_perl")))
3941 (mkdir-p bin)
3942 (mkdir-p perl)
3943 (for-each (lambda (file)
3944 (copy-file file
3945 (string-append bin (basename file))))
3946 (find-files "." "rsem-.*"))
3947 (copy-file "rsem_perl_utils.pm"
3948 (string-append perl "/rsem_perl_utils.pm")))
3949 #t))
3950 (add-after
3951 'install 'wrap-program
3952 (lambda* (#:key outputs #:allow-other-keys)
3953 (let ((out (assoc-ref outputs "out")))
3954 (for-each (lambda (prog)
3955 (wrap-program (string-append out "/bin/" prog)
3956 `("PERL5LIB" ":" prefix
3957 (,(string-append out "/lib/perl5/site_perl")))))
3958 '("rsem-plot-transcript-wiggles"
3959 "rsem-calculate-expression"
3960 "rsem-generate-ngvector"
3961 "rsem-run-ebseq"
3962 "rsem-prepare-reference")))
3963 #t)))))
3964 (inputs
3965 `(("boost" ,boost)
3966 ("ncurses" ,ncurses)
3967 ("r" ,r)
3968 ("perl" ,perl)
3969 ("samtools" ,samtools-0.1)
3970 ("zlib" ,zlib)))
3971 (home-page "http://deweylab.biostat.wisc.edu/rsem/")
3972 (synopsis "Estimate gene expression levels from RNA-Seq data")
3973 (description
3974 "RSEM is a software package for estimating gene and isoform expression
3975levels from RNA-Seq data. The RSEM package provides a user-friendly
3976interface, supports threads for parallel computation of the EM algorithm,
3977single-end and paired-end read data, quality scores, variable-length reads and
3978RSPD estimation. In addition, it provides posterior mean and 95% credibility
3979interval estimates for expression levels. For visualization, it can generate
3980BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.")
3981 (license license:gpl3+)))
3982
8622a072
RW
3983(define-public rseqc
3984 (package
3985 (name "rseqc")
3986 (version "2.6.1")
3987 (source
3988 (origin
3989 (method url-fetch)
3990 (uri
3991 (string-append "mirror://sourceforge/rseqc/"
de67e922 3992 "RSeQC-" version ".tar.gz"))
8622a072 3993 (sha256
8214b7fb 3994 (base32 "15ly0254yi032qzkdplg00q144qfdsd986gh62829rl5bkxhj330"))
8622a072
RW
3995 (modules '((guix build utils)))
3996 (snippet
3997 '(begin
3998 ;; remove bundled copy of pysam
3999 (delete-file-recursively "lib/pysam")
4000 (substitute* "setup.py"
4001 ;; remove dependency on outdated "distribute" module
4002 (("^from distribute_setup import use_setuptools") "")
4003 (("^use_setuptools\\(\\)") "")
4004 ;; do not use bundled copy of pysam
4005 (("^have_pysam = False") "have_pysam = True"))))))
4006 (build-system python-build-system)
4007 (arguments `(#:python ,python-2))
4008 (inputs
4009 `(("python-cython" ,python2-cython)
4010 ("python-pysam" ,python2-pysam)
4011 ("python-numpy" ,python2-numpy)
8622a072
RW
4012 ("zlib" ,zlib)))
4013 (native-inputs
4014 `(("python-nose" ,python2-nose)))
4015 (home-page "http://rseqc.sourceforge.net/")
4016 (synopsis "RNA-seq quality control package")
4017 (description
4018 "RSeQC provides a number of modules that can comprehensively evaluate
4019high throughput sequence data, especially RNA-seq data. Some basic modules
4020inspect sequence quality, nucleotide composition bias, PCR bias and GC bias,
4021while RNA-seq specific modules evaluate sequencing saturation, mapped reads
4022distribution, coverage uniformity, strand specificity, etc.")
4023 (license license:gpl3+)))
4024
ec946638
RW
4025(define-public seek
4026 ;; There are no release tarballs. According to the installation
4027 ;; instructions at http://seek.princeton.edu/installation.jsp, the latest
4028 ;; stable release is identified by this changeset ID.
4029 (let ((changeset "2329130")
4030 (revision "1"))
4031 (package
4032 (name "seek")
4033 (version (string-append "0-" revision "." changeset))
4034 (source (origin
4035 (method hg-fetch)
4036 (uri (hg-reference
4037 (url "https://bitbucket.org/libsleipnir/sleipnir")
4038 (changeset changeset)))
4039 (sha256
4040 (base32
4041 "0qrvilwh18dpbhkf92qvxbmay0j75ra3jg2wrhz67gf538zzphsx"))))
4042 (build-system gnu-build-system)
4043 (arguments
4044 `(#:modules ((srfi srfi-1)
4045 (guix build gnu-build-system)
4046 (guix build utils))
4047 #:phases
4048 (let ((dirs '("SeekMiner"
4049 "SeekEvaluator"
4050 "SeekPrep"
4051 "Distancer"
4052 "Data2DB"
4053 "PCL2Bin")))
4054 (modify-phases %standard-phases
4055 (add-before 'configure 'bootstrap
4056 (lambda _
4057 (zero? (system* "bash" "gen_auto"))))
4058 (add-after 'build 'build-additional-tools
4059 (lambda* (#:key make-flags #:allow-other-keys)
4060 (every (lambda (dir)
4061 (with-directory-excursion (string-append "tools/" dir)
4062 (zero? (apply system* "make" make-flags))))
4063 dirs)))
4064 (add-after 'install 'install-additional-tools
4065 (lambda* (#:key make-flags #:allow-other-keys)
4066 (fold (lambda (dir result)
4067 (with-directory-excursion (string-append "tools/" dir)
4068 (and result
4069 (zero? (apply system*
4070 `("make" ,@make-flags "install"))))))
4071 #t dirs)))))))
4072 (inputs
4073 `(("gsl" ,gsl)
4074 ("boost" ,boost)
4075 ("libsvm" ,libsvm)
4076 ("readline" ,readline)
4077 ("gengetopt" ,gengetopt)
4078 ("log4cpp" ,log4cpp)))
4079 (native-inputs
4080 `(("autoconf" ,autoconf)
4081 ("automake" ,automake)
4082 ("perl" ,perl)))
4083 (home-page "http://seek.princeton.edu")
4084 (synopsis "Gene co-expression search engine")
4085 (description
4086 "SEEK is a computational gene co-expression search engine. SEEK provides
4087biologists with a way to navigate the massive human expression compendium that
4088now contains thousands of expression datasets. SEEK returns a robust ranking
4089of co-expressed genes in the biological area of interest defined by the user's
4090query genes. It also prioritizes thousands of expression datasets according
4091to the user's query of interest.")
4092 (license license:cc-by3.0))))
4093
4e10a221
RW
4094(define-public samtools
4095 (package
4096 (name "samtools")
79b555ed 4097 (version "1.3.1")
4e10a221
RW
4098 (source
4099 (origin
4100 (method url-fetch)
4101 (uri
de67e922 4102 (string-append "mirror://sourceforge/samtools/samtools/"
4e10a221
RW
4103 version "/samtools-" version ".tar.bz2"))
4104 (sha256
4105 (base32
79b555ed 4106 "0znnnxc467jbf1as2dpskrjhfh8mbll760j6w6rdkwlwbqsp8gbc"))))
4e10a221
RW
4107 (build-system gnu-build-system)
4108 (arguments
c4473411 4109 `(#:modules ((ice-9 ftw)
5bdda30b
RW
4110 (ice-9 regex)
4111 (guix build gnu-build-system)
4112 (guix build utils))
c4473411
RW
4113 #:make-flags (list (string-append "prefix=" (assoc-ref %outputs "out")))
4114 #:configure-flags (list "--with-ncurses")
4e10a221
RW
4115 #:phases
4116 (alist-cons-after
c4473411
RW
4117 'unpack 'patch-tests
4118 (lambda _
4119 (substitute* "test/test.pl"
4120 ;; The test script calls out to /bin/bash
4121 (("/bin/bash") (which "bash")))
4122 #t)
41dd7126
RW
4123 (alist-cons-after
4124 'install 'install-library
4125 (lambda* (#:key outputs #:allow-other-keys)
4126 (let ((lib (string-append (assoc-ref outputs "out") "/lib")))
96c46210 4127 (install-file "libbam.a" lib)))
5bdda30b
RW
4128 (alist-cons-after
4129 'install 'install-headers
4130 (lambda* (#:key outputs #:allow-other-keys)
4131 (let ((include (string-append (assoc-ref outputs "out")
4132 "/include/samtools/")))
5bdda30b 4133 (for-each (lambda (file)
96c46210 4134 (install-file file include))
5bdda30b
RW
4135 (scandir "." (lambda (name) (string-match "\\.h$" name))))
4136 #t))
c4473411 4137 %standard-phases)))))
4e10a221
RW
4138 (native-inputs `(("pkg-config" ,pkg-config)))
4139 (inputs `(("ncurses" ,ncurses)
4140 ("perl" ,perl)
4141 ("python" ,python)
4142 ("zlib" ,zlib)))
4143 (home-page "http://samtools.sourceforge.net")
4144 (synopsis "Utilities to efficiently manipulate nucleotide sequence alignments")
4145 (description
4146 "Samtools implements various utilities for post-processing nucleotide
4147sequence alignments in the SAM, BAM, and CRAM formats, including indexing,
4148variant calling (in conjunction with bcftools), and a simple alignment
4149viewer.")
4150 (license license:expat)))
d3517eda 4151
0b84a0aa
RW
4152(define-public samtools-0.1
4153 ;; This is the most recent version of the 0.1 line of samtools. The input
4154 ;; and output formats differ greatly from that used and produced by samtools
4155 ;; 1.x and is still used in many bioinformatics pipelines.
4156 (package (inherit samtools)
4157 (version "0.1.19")
4158 (source
4159 (origin
4160 (method url-fetch)
4161 (uri
de67e922 4162 (string-append "mirror://sourceforge/samtools/samtools/"
0b84a0aa
RW
4163 version "/samtools-" version ".tar.bz2"))
4164 (sha256
4165 (base32 "1m33xsfwz0s8qi45lylagfllqg7fphf4dr0780rsvw75av9wk06h"))))
4166 (arguments
2309ed68
RW
4167 `(#:tests? #f ;no "check" target
4168 ,@(substitute-keyword-arguments (package-arguments samtools)
4169 ((#:make-flags flags)
4170 `(cons "LIBCURSES=-lncurses" ,flags))
4171 ((#:phases phases)
4172 `(modify-phases ,phases
4173 (replace 'install
4174 (lambda* (#:key outputs #:allow-other-keys)
4175 (let ((bin (string-append
4176 (assoc-ref outputs "out") "/bin")))
4177 (mkdir-p bin)
4178 (copy-file "samtools"
4179 (string-append bin "/samtools")))))
4180 (delete 'patch-tests)
4181 (delete 'configure))))))))
0b84a0aa 4182
fe4c37c2 4183(define-public mosaik
698bd297 4184 (let ((commit "5c25216d3522d6a33e53875cd76a6d65001e4e67"))
fe4c37c2
RW
4185 (package
4186 (name "mosaik")
4187 (version "2.2.30")
4188 (source (origin
4189 ;; There are no release tarballs nor tags.
4190 (method git-fetch)
4191 (uri (git-reference
4192 (url "https://github.com/wanpinglee/MOSAIK.git")
4193 (commit commit)))
4194 (file-name (string-append name "-" version))
4195 (sha256
4196 (base32
4197 "17gj3s07cm77r41z92awh0bim7w7q7fbn0sf5nkqmcm1vw052qgw"))))
4198 (build-system gnu-build-system)
4199 (arguments
4200 `(#:tests? #f ; no tests
4201 #:make-flags (list "CC=gcc")
4202 #:phases
4203 (modify-phases %standard-phases
4204 (replace 'configure
4205 (lambda _ (chdir "src") #t))
4206 (replace 'install
4207 (lambda* (#:key outputs #:allow-other-keys)
4208 (let ((bin (string-append (assoc-ref outputs "out")
4209 "/bin")))
4210 (mkdir-p bin)
4211 (copy-recursively "../bin" bin)
4212 #t))))))
4213 (inputs
4214 `(("perl" ,perl)
4215 ("zlib" ,zlib)))
029d9f77 4216 (supported-systems '("x86_64-linux"))
fe4c37c2
RW
4217 (home-page "https://code.google.com/p/mosaik-aligner/")
4218 (synopsis "Map nucleotide sequence reads to reference genomes")
4219 (description
4220 "MOSAIK is a program for mapping second and third-generation sequencing
4221reads to a reference genome. MOSAIK can align reads generated by all the
4222major sequencing technologies, including Illumina, Applied Biosystems SOLiD,
4223Roche 454, Ion Torrent and Pacific BioSciences SMRT.")
4224 ;; MOSAIK is released under the GPLv2+ with the exception of third-party
4225 ;; code released into the public domain:
4226 ;; 1. fastlz by Ariya Hidayat - http://www.fastlz.org/
4227 ;; 2. MD5 implementation - RSA Data Security, RFC 1321
4228 (license (list license:gpl2+ license:public-domain)))))
4229
282c5087
RW
4230(define-public ngs-sdk
4231 (package
4232 (name "ngs-sdk")
48b419eb 4233 (version "1.2.5")
282c5087
RW
4234 (source
4235 (origin
4236 (method url-fetch)
4237 (uri
4238 (string-append "https://github.com/ncbi/ngs/archive/"
4239 version ".tar.gz"))
4240 (file-name (string-append name "-" version ".tar.gz"))
4241 (sha256
4242 (base32
48b419eb 4243 "04y1fsmdnb5y86m3gg6f5g9wcscr6r25n7m8mdlcxy0i2q6w6cia"))))
282c5087
RW
4244 (build-system gnu-build-system)
4245 (arguments
4246 `(#:parallel-build? #f ; not supported
4247 #:tests? #f ; no "check" target
4248 #:phases
4249 (alist-replace
4250 'configure
4251 (lambda* (#:key outputs #:allow-other-keys)
4252 (let ((out (assoc-ref outputs "out")))
282c5087
RW
4253 ;; The 'configure' script doesn't recognize things like
4254 ;; '--enable-fast-install'.
4255 (zero? (system* "./configure"
4256 (string-append "--build-prefix=" (getcwd) "/build")
4257 (string-append "--prefix=" out)))))
4258 (alist-cons-after
4259 'unpack 'enter-dir
4260 (lambda _ (chdir "ngs-sdk") #t)
4261 %standard-phases))))
4262 (native-inputs `(("perl" ,perl)))
a0dadf0c
AE
4263 ;; According to the test
4264 ;; unless ($MARCH =~ /x86_64/i || $MARCH =~ /i?86/i)
4265 ;; in ngs-sdk/setup/konfigure.perl
ab29be81 4266 (supported-systems '("i686-linux" "x86_64-linux"))
282c5087
RW
4267 (home-page "https://github.com/ncbi/ngs")
4268 (synopsis "API for accessing Next Generation Sequencing data")
4269 (description
4270 "NGS is a domain-specific API for accessing reads, alignments and pileups
4271produced from Next Generation Sequencing. The API itself is independent from
4272any particular back-end implementation, and supports use of multiple back-ends
4273simultaneously.")
4274 (license license:public-domain)))
4275
1ad15c16 4276(define-public java-ngs
2651a5e6 4277 (package (inherit ngs-sdk)
1ad15c16 4278 (name "java-ngs")
2651a5e6
RW
4279 (arguments
4280 `(,@(substitute-keyword-arguments
4281 `(#:modules ((guix build gnu-build-system)
4282 (guix build utils)
4283 (srfi srfi-1)
4284 (srfi srfi-26))
4285 ,@(package-arguments ngs-sdk))
4286 ((#:phases phases)
614a8977
RW
4287 `(modify-phases ,phases
4288 (replace 'enter-dir (lambda _ (chdir "ngs-java") #t)))))))
2651a5e6 4289 (inputs
d2540f80 4290 `(("jdk" ,icedtea "jdk")
2651a5e6
RW
4291 ("ngs-sdk" ,ngs-sdk)))
4292 (synopsis "Java bindings for NGS SDK")))
4293
75dd2424
RW
4294(define-public ncbi-vdb
4295 (package
4296 (name "ncbi-vdb")
40974c93 4297 (version "2.7.0")
75dd2424
RW
4298 (source
4299 (origin
4300 (method url-fetch)
4301 (uri
4302 (string-append "https://github.com/ncbi/ncbi-vdb/archive/"
4303 version ".tar.gz"))
4304 (file-name (string-append name "-" version ".tar.gz"))
4305 (sha256
4306 (base32
40974c93 4307 "0x1cg1x8vy0yjlkp0snc1533zcjhxqzqsaiwqk598n7vvw37n8lf"))))
75dd2424
RW
4308 (build-system gnu-build-system)
4309 (arguments
4310 `(#:parallel-build? #f ; not supported
4311 #:tests? #f ; no "check" target
4312 #:phases
4313 (alist-replace
4314 'configure
4315 (lambda* (#:key inputs outputs #:allow-other-keys)
4316 (let ((out (assoc-ref outputs "out")))
75dd2424
RW
4317 ;; Override include path for libmagic
4318 (substitute* "setup/package.prl"
4319 (("name => 'magic', Include => '/usr/include'")
4320 (string-append "name=> 'magic', Include => '"
4321 (assoc-ref inputs "libmagic")
4322 "/include" "'")))
4323
4324 ;; Install kdf5 library (needed by sra-tools)
4325 (substitute* "build/Makefile.install"
4326 (("LIBRARIES_TO_INSTALL =")
4327 "LIBRARIES_TO_INSTALL = kdf5.$(VERSION_LIBX) kdf5.$(VERSION_SHLX)"))
4328
675d7ae2
RW
4329 (substitute* "build/Makefile.env"
4330 (("CFLAGS =" prefix)
4331 (string-append prefix "-msse2 ")))
4332
75dd2424
RW
4333 ;; The 'configure' script doesn't recognize things like
4334 ;; '--enable-fast-install'.
4335 (zero? (system*
4336 "./configure"
4337 (string-append "--build-prefix=" (getcwd) "/build")
4338 (string-append "--prefix=" (assoc-ref outputs "out"))
4339 (string-append "--debug")
4340 (string-append "--with-xml2-prefix="
4341 (assoc-ref inputs "libxml2"))
4342 (string-append "--with-ngs-sdk-prefix="
4343 (assoc-ref inputs "ngs-sdk"))
4344 (string-append "--with-ngs-java-prefix="
1ad15c16 4345 (assoc-ref inputs "java-ngs"))
75dd2424
RW
4346 (string-append "--with-hdf5-prefix="
4347 (assoc-ref inputs "hdf5"))))))
4348 (alist-cons-after
4349 'install 'install-interfaces
132b4c8c
RW
4350 (lambda* (#:key outputs #:allow-other-keys)
4351 ;; Install interface libraries. On i686 the interface libraries
4352 ;; are installed to "linux/gcc/i386", so we need to use the Linux
4353 ;; architecture name ("i386") instead of the target system prefix
4354 ;; ("i686").
75dd2424
RW
4355 (mkdir (string-append (assoc-ref outputs "out") "/ilib"))
4356 (copy-recursively (string-append "build/ncbi-vdb/linux/gcc/"
132b4c8c
RW
4357 ,(system->linux-architecture
4358 (or (%current-target-system)
4359 (%current-system)))
75dd2424
RW
4360 "/rel/ilib")
4361 (string-append (assoc-ref outputs "out")
4362 "/ilib"))
4363 ;; Install interface headers
4364 (copy-recursively "interfaces"
4365 (string-append (assoc-ref outputs "out")
4366 "/include")))
4367 %standard-phases))))
4368 (inputs
4369 `(("libxml2" ,libxml2)
4370 ("ngs-sdk" ,ngs-sdk)
1ad15c16 4371 ("java-ngs" ,java-ngs)
75dd2424
RW
4372 ("libmagic" ,file)
4373 ("hdf5" ,hdf5)))
4374 (native-inputs `(("perl" ,perl)))
675d7ae2
RW
4375 ;; NCBI-VDB requires SSE capability.
4376 (supported-systems '("i686-linux" "x86_64-linux"))
75dd2424
RW
4377 (home-page "https://github.com/ncbi/ncbi-vdb")
4378 (synopsis "Database engine for genetic information")
4379 (description
4380 "The NCBI-VDB library implements a highly compressed columnar data
4381warehousing engine that is most often used to store genetic information.
4382Databases are stored in a portable image within the file system, and can be
4383accessed/downloaded on demand across HTTP.")
4384 (license license:public-domain)))
4385
cc6ed477
RW
4386(define-public plink
4387 (package
4388 (name "plink")
4389 (version "1.07")
4390 (source
4391 (origin
4392 (method url-fetch)
4393 (uri (string-append
4394 "http://pngu.mgh.harvard.edu/~purcell/plink/dist/plink-"
4395 version "-src.zip"))
4396 (sha256
4397 (base32 "0as8gxm4pjyc8dxmm1sl873rrd7wn5qs0l29nqfnl31x8i467xaa"))
0dbb7ac2
EF
4398 (patches (search-patches "plink-1.07-unclobber-i.patch"
4399 "plink-endian-detection.patch"))))
cc6ed477
RW
4400 (build-system gnu-build-system)
4401 (arguments
4402 '(#:tests? #f ;no "check" target
4403 #:make-flags (list (string-append "LIB_LAPACK="
4404 (assoc-ref %build-inputs "lapack")
4405 "/lib/liblapack.so")
4406 "WITH_LAPACK=1"
4407 "FORCE_DYNAMIC=1"
4408 ;; disable phoning home
4409 "WITH_WEBCHECK=")
4410 #:phases
4411 (modify-phases %standard-phases
4412 ;; no "configure" script
4413 (delete 'configure)
4414 (replace 'install
4415 (lambda* (#:key outputs #:allow-other-keys)
4416 (let ((bin (string-append (assoc-ref outputs "out")
4417 "/bin/")))
96c46210 4418 (install-file "plink" bin)
cc6ed477
RW
4419 #t))))))
4420 (inputs
4421 `(("zlib" ,zlib)
4422 ("lapack" ,lapack)))
4423 (native-inputs
4424 `(("unzip" ,unzip)))
4425 (home-page "http://pngu.mgh.harvard.edu/~purcell/plink/")
4426 (synopsis "Whole genome association analysis toolset")
4427 (description
4428 "PLINK is a whole genome association analysis toolset, designed to
4429perform a range of basic, large-scale analyses in a computationally efficient
4430manner. The focus of PLINK is purely on analysis of genotype/phenotype data,
4431so there is no support for steps prior to this (e.g. study design and
4432planning, generating genotype or CNV calls from raw data). Through
4433integration with gPLINK and Haploview, there is some support for the
4434subsequent visualization, annotation and storage of results.")
4435 ;; Code is released under GPLv2, except for fisher.h, which is under
4436 ;; LGPLv2.1+
4437 (license (list license:gpl2 license:lgpl2.1+))))
4438
c6a24d6e
RW
4439(define-public smithlab-cpp
4440 (let ((revision "1")
698bd297 4441 (commit "728a097bec88c6f4b8528b685932049e660eff2e"))
c6a24d6e
RW
4442 (package
4443 (name "smithlab-cpp")
698bd297 4444 (version (string-append "0." revision "." (string-take commit 7)))
c6a24d6e
RW
4445 (source (origin
4446 (method git-fetch)
4447 (uri (git-reference
4448 (url "https://github.com/smithlabcode/smithlab_cpp.git")
4449 (commit commit)))
4450 (file-name (string-append name "-" version "-checkout"))
4451 (sha256
4452 (base32
4453 "0d476lmj312xk77kr9fzrv7z1bv96yfyx0w7y62ycmnfbx32ll74"))))
4454 (build-system gnu-build-system)
4455 (arguments
4456 `(#:modules ((guix build gnu-build-system)
4457 (guix build utils)
4458 (srfi srfi-26))
4459 #:tests? #f ;no "check" target
4460 #:phases
4461 (modify-phases %standard-phases
4462 (add-after 'unpack 'use-samtools-headers
4463 (lambda _
4464 (substitute* '("SAM.cpp"
4465 "SAM.hpp")
4466 (("sam.h") "samtools/sam.h"))
4467 #t))
4468 (replace 'install
4469 (lambda* (#:key outputs #:allow-other-keys)
4470 (let* ((out (assoc-ref outputs "out"))
4471 (lib (string-append out "/lib"))
4472 (include (string-append out "/include/smithlab-cpp")))
4473 (mkdir-p lib)
4474 (mkdir-p include)
4475 (for-each (cut install-file <> lib)
4476 (find-files "." "\\.o$"))
4477 (for-each (cut install-file <> include)
4478 (find-files "." "\\.hpp$")))
4479 #t))
4480 (delete 'configure))))
4481 (inputs
4482 `(("samtools" ,samtools-0.1)
4483 ("zlib" ,zlib)))
4484 (home-page "https://github.com/smithlabcode/smithlab_cpp")
4485 (synopsis "C++ helper library for functions used in Smith lab projects")
4486 (description
4487 "Smithlab CPP is a C++ library that includes functions used in many of
4488the Smith lab bioinformatics projects, such as a wrapper around Samtools data
4489structures, classes for genomic regions, mapped sequencing reads, etc.")
4490 (license license:gpl3+))))
4491
56e373ef
RW
4492(define-public preseq
4493 (package
4494 (name "preseq")
b49c5a58 4495 (version "2.0")
56e373ef
RW
4496 (source (origin
4497 (method url-fetch)
b49c5a58
RW
4498 (uri (string-append "https://github.com/smithlabcode/"
4499 "preseq/archive/v" version ".tar.gz"))
4500 (file-name (string-append name "-" version ".tar.gz"))
56e373ef 4501 (sha256
b49c5a58 4502 (base32 "08r684l50pnxjpvmhzjgqq56yv9rfw90k8vx0nsrnrzk8mf9hsdq"))
56e373ef
RW
4503 (modules '((guix build utils)))
4504 (snippet
4505 ;; Remove bundled samtools.
b49c5a58 4506 '(delete-file-recursively "samtools"))))
56e373ef
RW
4507 (build-system gnu-build-system)
4508 (arguments
4509 `(#:tests? #f ;no "check" target
4510 #:phases
4511 (modify-phases %standard-phases
56e373ef 4512 (delete 'configure))
b49c5a58
RW
4513 #:make-flags
4514 (list (string-append "PREFIX="
4515 (assoc-ref %outputs "out"))
4516 (string-append "LIBBAM="
4517 (assoc-ref %build-inputs "samtools")
4518 "/lib/libbam.a")
4519 (string-append "SMITHLAB_CPP="
4520 (assoc-ref %build-inputs "smithlab-cpp")
4521 "/lib")
4522 "PROGS=preseq"
4523 "INCLUDEDIRS=$(SMITHLAB_CPP)/../include/smithlab-cpp $(SAMTOOLS_DIR)")))
56e373ef
RW
4524 (inputs
4525 `(("gsl" ,gsl)
4526 ("samtools" ,samtools-0.1)
b49c5a58 4527 ("smithlab-cpp" ,smithlab-cpp)
56e373ef
RW
4528 ("zlib" ,zlib)))
4529 (home-page "http://smithlabresearch.org/software/preseq/")
4530 (synopsis "Program for analyzing library complexity")
4531 (description
4532 "The preseq package is aimed at predicting and estimating the complexity
4533of a genomic sequencing library, equivalent to predicting and estimating the
4534number of redundant reads from a given sequencing depth and how many will be
4535expected from additional sequencing using an initial sequencing experiment.
4536The estimates can then be used to examine the utility of further sequencing,
4537optimize the sequencing depth, or to screen multiple libraries to avoid low
4538complexity samples.")
4539 (license license:gpl3+)))
4540
9ded1457
BW
4541(define-public python-screed
4542 (package
4543 (name "python-screed")
4544 (version "0.9")
4545 (source
4546 (origin
4547 (method url-fetch)
4548 (uri (pypi-uri "screed" version))
4549 (sha256
4550 (base32
4551 "18czszp9fkx3j6jr7y5kp6dfialscgddk05mw1zkhh2zhn0jd8i0"))))
4552 (build-system python-build-system)
4553 (arguments
4554 `(#:phases
4555 (modify-phases %standard-phases
4556 (replace 'check
4557 (lambda _
4558 (setenv "PYTHONPATH"
4559 (string-append (getenv "PYTHONPATH") ":."))
4560 (zero? (system* "nosetests" "--attr" "!known_failing")))))))
4561 (native-inputs
4562 `(("python-nose" ,python-nose)))
4563 (inputs
4564 `(("python-bz2file" ,python-bz2file)))
4565 (home-page "http://github.com/dib-lab/screed/")
4566 (synopsis "Short read sequence database utilities")
4567 (description "Screed parses FASTA and FASTQ files and generates databases.
4568Values such as sequence name, sequence description, sequence quality and the
4569sequence itself can be retrieved from these databases.")
4570 (license license:bsd-3)))
4571
4572(define-public python2-screed
4573 (let ((base (package-with-python2 (strip-python2-variant python-screed))))
4574 (package
4575 (inherit base)
4576 (native-inputs `(("python2-setuptools" ,python2-setuptools)
4577 ,@(package-native-inputs base))))))
4578
51c64999
RW
4579(define-public sra-tools
4580 (package
4581 (name "sra-tools")
646a8433 4582 (version "2.7.0")
51c64999
RW
4583 (source
4584 (origin
4585 (method url-fetch)
4586 (uri
4587 (string-append "https://github.com/ncbi/sra-tools/archive/"
4588 version ".tar.gz"))
4589 (file-name (string-append name "-" version ".tar.gz"))
4590 (sha256
4591 (base32
646a8433 4592 "13paw7bq6y47d2pl0ac5gpgcqp1xsy1g7v1fwysm3hr8lb2dck17"))))
51c64999
RW
4593 (build-system gnu-build-system)
4594 (arguments
4595 `(#:parallel-build? #f ; not supported
4596 #:tests? #f ; no "check" target
2320e76b
RW
4597 #:make-flags
4598 (list (string-append "VDB_LIBDIR="
4599 (assoc-ref %build-inputs "ncbi-vdb")
4600 ,(if (string-prefix? "x86_64"
4601 (or (%current-target-system)
4602 (%current-system)))
4603 "/lib64"
4604 "/lib32")))
51c64999
RW
4605 #:phases
4606 (alist-replace
4607 'configure
4608 (lambda* (#:key inputs outputs #:allow-other-keys)
4609 ;; The build system expects a directory containing the sources and
4610 ;; raw build output of ncbi-vdb, including files that are not
4611 ;; installed. Since we are building against an installed version of
4612 ;; ncbi-vdb, the following modifications are needed.
4613 (substitute* "setup/konfigure.perl"
4614 ;; Make the configure script look for the "ilib" directory of
4615 ;; "ncbi-vdb" without first checking for the existence of a
4616 ;; matching library in its "lib" directory.
4617 (("^ my \\$f = File::Spec->catdir\\(\\$libdir, \\$lib\\);")
4618 "my $f = File::Spec->catdir($ilibdir, $ilib);")
4619 ;; Look for interface libraries in ncbi-vdb's "ilib" directory.
4620 (("my \\$ilibdir = File::Spec->catdir\\(\\$builddir, 'ilib'\\);")
4621 "my $ilibdir = File::Spec->catdir($dir, 'ilib');"))
4622
2320e76b
RW
4623 ;; Dynamic linking
4624 (substitute* "tools/copycat/Makefile"
4625 (("smagic-static") "lmagic"))
4626
51c64999
RW
4627 ;; The 'configure' script doesn't recognize things like
4628 ;; '--enable-fast-install'.
4629 (zero? (system*
4630 "./configure"
4631 (string-append "--build-prefix=" (getcwd) "/build")
4632 (string-append "--prefix=" (assoc-ref outputs "out"))
4633 (string-append "--debug")
4634 (string-append "--with-fuse-prefix="
4635 (assoc-ref inputs "fuse"))
4636 (string-append "--with-magic-prefix="
4637 (assoc-ref inputs "libmagic"))
4638 ;; TODO: building with libxml2 fails with linker errors
4639 ;; (string-append "--with-xml2-prefix="
4640 ;; (assoc-ref inputs "libxml2"))
4641 (string-append "--with-ncbi-vdb-sources="
4642 (assoc-ref inputs "ncbi-vdb"))
4643 (string-append "--with-ncbi-vdb-build="
4644 (assoc-ref inputs "ncbi-vdb"))
4645 (string-append "--with-ngs-sdk-prefix="
4646 (assoc-ref inputs "ngs-sdk"))
4647 (string-append "--with-hdf5-prefix="
4648 (assoc-ref inputs "hdf5")))))
4649 %standard-phases)))
4650 (native-inputs `(("perl" ,perl)))
4651 (inputs
4652 `(("ngs-sdk" ,ngs-sdk)
4653 ("ncbi-vdb" ,ncbi-vdb)
4654 ("libmagic" ,file)
4655 ("fuse" ,fuse)
4656 ("hdf5" ,hdf5)
4657 ("zlib" ,zlib)))
4658 (home-page "http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software")
4659 (synopsis "Tools and libraries for reading and writing sequencing data")
4660 (description
4661 "The SRA Toolkit from NCBI is a collection of tools and libraries for
4662reading of sequencing files from the Sequence Read Archive (SRA) database and
4663writing files into the .sra format.")
4664 (license license:public-domain)))
4665
d3517eda
RW
4666(define-public seqan
4667 (package
4668 (name "seqan")
4669 (version "1.4.2")
4670 (source (origin
4671 (method url-fetch)
4672 (uri (string-append "http://packages.seqan.de/seqan-library/"
4673 "seqan-library-" version ".tar.bz2"))
4674 (sha256
4675 (base32
4676 "05s3wrrwn50f81aklfm65i4a749zag1vr8z03k21xm0pdxy47yvp"))))
4677 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
4678 ;; makes sense to split the outputs.
4679 (outputs '("out" "doc"))
4680 (build-system trivial-build-system)
4681 (arguments
4682 `(#:modules ((guix build utils))
4683 #:builder
4684 (begin
4685 (use-modules (guix build utils))
4686 (let ((tar (assoc-ref %build-inputs "tar"))
4687 (bzip (assoc-ref %build-inputs "bzip2"))
4688 (out (assoc-ref %outputs "out"))
4689 (doc (assoc-ref %outputs "doc")))
4690 (setenv "PATH" (string-append tar "/bin:" bzip "/bin"))
4691 (system* "tar" "xvf" (assoc-ref %build-inputs "source"))
4692 (chdir (string-append "seqan-library-" ,version))
4693 (copy-recursively "include" (string-append out "/include"))
4694 (copy-recursively "share" (string-append doc "/share"))))))
4695 (native-inputs
4696 `(("source" ,source)
4697 ("tar" ,tar)
4698 ("bzip2" ,bzip2)))
4699 (home-page "http://www.seqan.de")
4700 (synopsis "Library for nucleotide sequence analysis")
4701 (description
4702 "SeqAn is a C++ library of efficient algorithms and data structures for
4703the analysis of sequences with the focus on biological data. It contains
4704algorithms and data structures for string representation and their
4705manipulation, online and indexed string search, efficient I/O of
4706bioinformatics file formats, sequence alignment, and more.")
4707 (license license:bsd-3)))
ce7155d5 4708
d708b7a9
BW
4709(define-public seqmagick
4710 (package
4711 (name "seqmagick")
4712 (version "0.6.1")
4713 (source
4714 (origin
4715 (method url-fetch)
4716 (uri (string-append
4717 "https://pypi.python.org/packages/source/s/seqmagick/seqmagick-"
4718 version ".tar.gz"))
4719 (sha256
4720 (base32
4721 "0cgn477n74gsl4qdaakrrhi953kcsd4q3ivk2lr18x74s3g4ma1d"))))
4722 (build-system python-build-system)
4723 (arguments
4724 ;; python2 only, see https://github.com/fhcrc/seqmagick/issues/56
4725 `(#:python ,python-2
4726 #:phases
4727 (modify-phases %standard-phases
4728 ;; Current test in setup.py does not work as of 0.6.1,
4729 ;; so use nose to run tests instead for now. See
4730 ;; https://github.com/fhcrc/seqmagick/issues/55
4731 (replace 'check (lambda _ (zero? (system* "nosetests")))))))
4732 (inputs
7bba95b7
BW
4733 ;; biopython-1.66 is required due to
4734 ;; https://github.com/fhcrc/seqmagick/issues/59
4735 ;; When that issue is resolved the 'python2-biopython-1.66' package
4736 ;; should be removed.
4737 `(("python-biopython" ,python2-biopython-1.66)))
d708b7a9 4738 (native-inputs
f3b98f4f 4739 `(("python-nose" ,python2-nose)))
d708b7a9
BW
4740 (home-page "http://github.com/fhcrc/seqmagick")
4741 (synopsis "Tools for converting and modifying sequence files")
4742 (description
4743 "Bioinformaticians often have to convert sequence files between formats
4744and do little manipulations on them, and it's not worth writing scripts for
4745that. Seqmagick is a utility to expose the file format conversion in
4746BioPython in a convenient way. Instead of having a big mess of scripts, there
4747is one that takes arguments.")
4748 (license license:gpl3)))
4749
66daf78c
BW
4750(define-public seqtk
4751 (package
4752 (name "seqtk")
4753 (version "1.2")
4754 (source (origin
4755 (method url-fetch)
4756 (uri (string-append
4757 "https://github.com/lh3/seqtk/archive/v"
4758 version ".tar.gz"))
4759 (file-name (string-append name "-" version ".tar.gz"))
4760 (sha256
4761 (base32
4762 "0ywdyzpmfiz2wp6ampbzqg4y8bj450nfgqarpamg045b8mk32lxx"))
4763 (modules '((guix build utils)))
4764 (snippet
4765 '(begin
4766 ;; Remove extraneous header files, as is done in the seqtk
4767 ;; master branch.
4768 (for-each (lambda (file) (delete-file file))
4769 (list "ksort.h" "kstring.h" "kvec.h"))
4770 #t))))
4771 (build-system gnu-build-system)
4772 (arguments
4773 `(#:phases
4774 (modify-phases %standard-phases
4775 (delete 'configure)
4776 (replace 'check
4777 ;; There are no tests, so we just run a sanity check.
4778 (lambda _ (zero? (system* "./seqtk" "seq"))))
4779 (replace 'install
4780 (lambda* (#:key outputs #:allow-other-keys)
4781 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
4782 (install-file "seqtk" bin)))))))
4783 (inputs
4784 `(("zlib" ,zlib)))
4785 (home-page "https://github.com/lh3/seqtk")
4786 (synopsis "Toolkit for processing biological sequences in FASTA/Q format")
4787 (description
4788 "Seqtk is a fast and lightweight tool for processing sequences in the
4789FASTA or FASTQ format. It parses both FASTA and FASTQ files which can be
4790optionally compressed by gzip.")
4791 (license license:expat)))
4792
5f7e17be
BW
4793(define-public snap-aligner
4794 (package
4795 (name "snap-aligner")
4796 (version "1.0beta.18")
4797 (source (origin
4798 (method url-fetch)
4799 (uri (string-append
4800 "https://github.com/amplab/snap/archive/v"
4801 version ".tar.gz"))
4802 (file-name (string-append name "-" version ".tar.gz"))
4803 (sha256
4804 (base32
4805 "1vnsjwv007k1fl1q7d681kbwn6bc66cgw6h16hym6gvyy71qv2ly"))))
4806 (build-system gnu-build-system)
4807 (arguments
4808 '(#:phases
4809 (modify-phases %standard-phases
4810 (delete 'configure)
4811 (replace 'check (lambda _ (zero? (system* "./unit_tests"))))
4812 (replace 'install
4813 (lambda* (#:key outputs #:allow-other-keys)
4814 (let* ((out (assoc-ref outputs "out"))
4815 (bin (string-append out "/bin")))
4816 (mkdir-p bin)
4817 (install-file "snap-aligner" bin)
4818 (install-file "SNAPCommand" bin)
4819 #t))))))
4820 (native-inputs
4821 `(("zlib" ,zlib)))
4822 (home-page "http://snap.cs.berkeley.edu/")
4823 (synopsis "Short read DNA sequence aligner")
4824 (description
4825 "SNAP is a fast and accurate aligner for short DNA reads. It is
4826optimized for modern read lengths of 100 bases or higher, and takes advantage
4827of these reads to align data quickly through a hash-based indexing scheme.")
3e6fdd5f
EF
4828 ;; 32-bit systems are not supported by the unpatched code.
4829 ;; Following the bug reports https://github.com/amplab/snap/issues/68 and
4830 ;; https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=812378 we see that
4831 ;; systems without a lot of memory cannot make good use of this program.
4832 (supported-systems '("x86_64-linux"))
5f7e17be
BW
4833 (license license:asl2.0)))
4834
bcadaf00
BW
4835(define-public sortmerna
4836 (package
4837 (name "sortmerna")
849485f5 4838 (version "2.1b")
bcadaf00
BW
4839 (source
4840 (origin
4841 (method url-fetch)
4842 (uri (string-append
4843 "https://github.com/biocore/sortmerna/archive/"
4844 version ".tar.gz"))
4845 (file-name (string-append name "-" version ".tar.gz"))
4846 (sha256
4847 (base32
849485f5 4848 "1ghaghvd82af9j5adavxh77g7hm247d1r69m3fbi6f1jdivj5ldk"))))
bcadaf00
BW
4849 (build-system gnu-build-system)
4850 (outputs '("out" ;for binaries
4851 "db")) ;for sequence databases
4852 (arguments
4853 `(#:phases
4854 (modify-phases %standard-phases
4855 (replace 'install
4856 (lambda* (#:key outputs #:allow-other-keys)
4857 (let* ((out (assoc-ref outputs "out"))
4858 (bin (string-append out "/bin"))
4859 (db (assoc-ref outputs "db"))
4860 (share
4861 (string-append db "/share/sortmerna/rRNA_databases")))
4862 (install-file "sortmerna" bin)
4863 (install-file "indexdb_rna" bin)
4864 (for-each (lambda (file)
4865 (install-file file share))
4866 (find-files "rRNA_databases" ".*fasta"))
4867 #t))))))
849485f5
BW
4868 (inputs
4869 `(("zlib" ,zlib)))
bcadaf00
BW
4870 (home-page "http://bioinfo.lifl.fr/RNA/sortmerna")
4871 (synopsis "Biological sequence analysis tool for NGS reads")
4872 (description
4873 "SortMeRNA is a biological sequence analysis tool for filtering, mapping
4874and operational taxonomic unit (OTU) picking of next generation
4875sequencing (NGS) reads. The core algorithm is based on approximate seeds and
4876allows for fast and sensitive analyses of nucleotide sequences. The main
4877application of SortMeRNA is filtering rRNA from metatranscriptomic data.")
4878 (license license:lgpl3)))
4879
ce7155d5
RW
4880(define-public star
4881 (package
4882 (name "star")
3bce0f14 4883 (version "2.5.2a")
ce7155d5
RW
4884 (source (origin
4885 (method url-fetch)
3062d750
RW
4886 (uri (string-append "https://github.com/alexdobin/STAR/archive/"
4887 version ".tar.gz"))
4888 (file-name (string-append name "-" version ".tar.gz"))
ce7155d5
RW
4889 (sha256
4890 (base32
3bce0f14 4891 "0xjlsm4p9flln111hv4xx7xy94c2nl53zvdvbk9winmiradjsdra"))
ce7155d5
RW
4892 (modules '((guix build utils)))
4893 (snippet
3062d750
RW
4894 '(begin
4895 (substitute* "source/Makefile"
4896 (("/bin/rm") "rm"))
4897 ;; Remove pre-built binaries and bundled htslib sources.
4898 (delete-file-recursively "bin/MacOSX_x86_64")
4899 (delete-file-recursively "bin/Linux_x86_64")
4900 (delete-file-recursively "source/htslib")
4901 #t))))
ce7155d5
RW
4902 (build-system gnu-build-system)
4903 (arguments
4904 '(#:tests? #f ;no check target
4905 #:make-flags '("STAR")
4906 #:phases
c0266e8d
RW
4907 (modify-phases %standard-phases
4908 (add-after 'unpack 'enter-source-dir
4909 (lambda _ (chdir "source") #t))
3062d750
RW
4910 (add-after 'enter-source-dir 'do-not-use-bundled-htslib
4911 (lambda _
4912 (substitute* "Makefile"
4913 (("(Depend.list: \\$\\(SOURCES\\) parametersDefault\\.xxd) htslib"
4914 _ prefix) prefix))
4915 (substitute* '("BAMfunctions.cpp"
4916 "signalFromBAM.h"
4917 "bam_cat.h"
4918 "bam_cat.c"
4919 "STAR.cpp"
4920 "bamRemoveDuplicates.cpp")
4921 (("#include \"htslib/([^\"]+\\.h)\"" _ header)
4922 (string-append "#include <" header ">")))
4923 (substitute* "IncludeDefine.h"
4924 (("\"htslib/(htslib/[^\"]+.h)\"" _ header)
4925 (string-append "<" header ">")))
4926 #t))
c0266e8d
RW
4927 (replace 'install
4928 (lambda* (#:key outputs #:allow-other-keys)
4929 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
4930 (install-file "STAR" bin))
4931 #t))
4932 (delete 'configure))))
ce7155d5
RW
4933 (native-inputs
4934 `(("vim" ,vim))) ; for xxd
4935 (inputs
3062d750
RW
4936 `(("htslib" ,htslib)
4937 ("zlib" ,zlib)))
ce7155d5
RW
4938 (home-page "https://github.com/alexdobin/STAR")
4939 (synopsis "Universal RNA-seq aligner")
4940 (description
4941 "The Spliced Transcripts Alignment to a Reference (STAR) software is
4942based on a previously undescribed RNA-seq alignment algorithm that uses
4943sequential maximum mappable seed search in uncompressed suffix arrays followed
4944by seed clustering and stitching procedure. In addition to unbiased de novo
4945detection of canonical junctions, STAR can discover non-canonical splices and
4946chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
4947sequences.")
c11f79a4
BW
4948 ;; Only 64-bit systems are supported according to the README.
4949 (supported-systems '("x86_64-linux" "mips64el-linux"))
ce7155d5
RW
4950 ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
4951 (license license:gpl3+)))
de07c0db 4952
dbf4ed7c
RW
4953(define-public subread
4954 (package
4955 (name "subread")
c180533b 4956 (version "1.5.1")
dbf4ed7c
RW
4957 (source (origin
4958 (method url-fetch)
de67e922
LF
4959 (uri (string-append "mirror://sourceforge/subread/subread-"
4960 version "/subread-" version "-source.tar.gz"))
dbf4ed7c
RW
4961 (sha256
4962 (base32
c180533b 4963 "0gn5zhbvllks0mmdg3qlmsbg91p2mpdc2wixwfqpi85yzfrh8hcy"))))
dbf4ed7c
RW
4964 (build-system gnu-build-system)
4965 (arguments
4966 `(#:tests? #f ;no "check" target
104c1986
RW
4967 ;; The CC and CCFLAGS variables are set to contain a lot of x86_64
4968 ;; optimizations by default, so we override these flags such that x86_64
4969 ;; flags are only added when the build target is an x86_64 system.
4970 #:make-flags
4971 (list (let ((system ,(or (%current-target-system)
4972 (%current-system)))
4973 (flags '("-ggdb" "-fomit-frame-pointer"
4974 "-ffast-math" "-funroll-loops"
4975 "-fmessage-length=0"
4976 "-O9" "-Wall" "-DMAKE_FOR_EXON"
4977 "-DMAKE_STANDALONE"
4978 "-DSUBREAD_VERSION=\\\"${SUBREAD_VERSION}\\\""))
4979 (flags64 '("-mmmx" "-msse" "-msse2" "-msse3")))
4980 (if (string-prefix? "x86_64" system)
4981 (string-append "CCFLAGS=" (string-join (append flags flags64)))
4982 (string-append "CCFLAGS=" (string-join flags))))
4983 "-f" "Makefile.Linux"
4984 "CC=gcc ${CCFLAGS}")
dbf4ed7c
RW
4985 #:phases
4986 (alist-cons-after
4987 'unpack 'enter-dir
4988 (lambda _ (chdir "src") #t)
4989 (alist-replace
4990 'install
4991 (lambda* (#:key outputs #:allow-other-keys)
4992 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
4993 (mkdir-p bin)
4994 (copy-recursively "../bin" bin)))
4995 ;; no "configure" script
4996 (alist-delete 'configure %standard-phases)))))
4997 (inputs `(("zlib" ,zlib)))
4998 (home-page "http://bioinf.wehi.edu.au/subread-package/")
4999 (synopsis "Tool kit for processing next-gen sequencing data")
5000 (description
5001 "The subread package contains the following tools: subread aligner, a
5002general-purpose read aligner; subjunc aligner: detecting exon-exon junctions
5003and mapping RNA-seq reads; featureCounts: counting mapped reads for genomic
5004features; exactSNP: a SNP caller that discovers SNPs by testing signals
5005against local background noises.")
5006 (license license:gpl3+)))
5007
d15d981e
RW
5008(define-public stringtie
5009 (package
5010 (name "stringtie")
5011 (version "1.2.1")
5012 (source (origin
5013 (method url-fetch)
5014 (uri (string-append "http://ccb.jhu.edu/software/stringtie/dl/"
5015 "stringtie-" version ".tar.gz"))
5016 (sha256
5017 (base32
5018 "1cqllsc1maq4kh92isi8yadgzbmnf042hlnalpk3y59aph1z3bfz"))
5019 (modules '((guix build utils)))
5020 (snippet
5021 '(begin
5022 (delete-file-recursively "samtools-0.1.18")
5023 #t))))
5024 (build-system gnu-build-system)
5025 (arguments
5026 `(#:tests? #f ;no test suite
5027 #:phases
5028 (modify-phases %standard-phases
5029 ;; no configure script
5030 (delete 'configure)
5031 (add-before 'build 'use-system-samtools
5032 (lambda _
5033 (substitute* "Makefile"
5034 (("stringtie: \\$\\{BAM\\}/libbam\\.a")
5035 "stringtie: "))
5036 (substitute* '("gclib/GBam.h"
5037 "gclib/GBam.cpp")
5038 (("#include \"(bam|sam|kstring).h\"" _ header)
5039 (string-append "#include <samtools/" header ".h>")))
5040 #t))
5041 (replace 'install
5042 (lambda* (#:key outputs #:allow-other-keys)
5043 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
5044 (install-file "stringtie" bin)
5045 #t))))))
5046 (inputs
5047 `(("samtools" ,samtools-0.1)
5048 ("zlib" ,zlib)))
5049 (home-page "http://ccb.jhu.edu/software/stringtie/")
5050 (synopsis "Transcript assembly and quantification for RNA-Seq data")
5051 (description
5052 "StringTie is a fast and efficient assembler of RNA-Seq sequence
5053alignments into potential transcripts. It uses a novel network flow algorithm
5054as well as an optional de novo assembly step to assemble and quantitate
5055full-length transcripts representing multiple splice variants for each gene
5056locus. Its input can include not only the alignments of raw reads used by
5057other transcript assemblers, but also alignments of longer sequences that have
5058been assembled from those reads. To identify differentially expressed genes
5059between experiments, StringTie's output can be processed either by the
5060Cuffdiff or Ballgown programs.")
5061 (license license:artistic2.0)))
5062
de07c0db
RW
5063(define-public vcftools
5064 (package
5065 (name "vcftools")
9b36e256 5066 (version "0.1.14")
de07c0db
RW
5067 (source (origin
5068 (method url-fetch)
5069 (uri (string-append
9b36e256
RJ
5070 "https://github.com/vcftools/vcftools/releases/download/v"
5071 version "/vcftools-" version ".tar.gz"))
de07c0db
RW
5072 (sha256
5073 (base32
9b36e256 5074 "10l5c07z9p4i9pr4gl54b2c9h6ndhqlbq1rashg2zcgwkbfrkmvn"))))
de07c0db
RW
5075 (build-system gnu-build-system)
5076 (arguments
5077 `(#:tests? #f ; no "check" target
5078 #:make-flags (list
7c3958e1 5079 "CFLAGS=-O2" ; override "-m64" flag
de07c0db
RW
5080 (string-append "PREFIX=" (assoc-ref %outputs "out"))
5081 (string-append "MANDIR=" (assoc-ref %outputs "out")
9b36e256
RJ
5082 "/share/man/man1"))))
5083 (native-inputs
5084 `(("pkg-config" ,pkg-config)))
de07c0db
RW
5085 (inputs
5086 `(("perl" ,perl)
5087 ("zlib" ,zlib)))
9b36e256 5088 (home-page "https://vcftools.github.io/")
de07c0db
RW
5089 (synopsis "Tools for working with VCF files")
5090 (description
5091 "VCFtools is a program package designed for working with VCF files, such
5092as those generated by the 1000 Genomes Project. The aim of VCFtools is to
5093provide easily accessible methods for working with complex genetic variation
5094data in the form of VCF files.")
5095 ;; The license is declared as LGPLv3 in the README and
9b36e256 5096 ;; at https://vcftools.github.io/license.html
de07c0db 5097 (license license:lgpl3)))
9c38b540 5098
35aa90a1
RW
5099(define-public infernal
5100 (package
5101 (name "infernal")
5102 (version "1.1.2")
5103 (source (origin
5104 (method url-fetch)
5105 (uri (string-append "http://eddylab.org/software/infernal/"
5106 "infernal-" version ".tar.gz"))
5107 (sha256
5108 (base32
5109 "0sr2hiz3qxfwqpz3whxr6n82p3x27336v3f34iqznp10hks2935c"))))
5110 (build-system gnu-build-system)
5111 (native-inputs
5112 `(("perl" ,perl))) ; for tests
5113 (home-page "http://eddylab.org/infernal/")
5114 (synopsis "Inference of RNA alignments")
5115 (description "Infernal (\"INFERence of RNA ALignment\") is a tool for
5116searching DNA sequence databases for RNA structure and sequence similarities.
5117It is an implementation of a special case of profile stochastic context-free
5118grammars called @dfn{covariance models} (CMs). A CM is like a sequence
5119profile, but it scores a combination of sequence consensus and RNA secondary
5120structure consensus, so in many cases, it is more capable of identifying RNA
5121homologs that conserve their secondary structure more than their primary
5122sequence.")
48409ef2
EF
5123 ;; Infernal 1.1.2 requires VMX or SSE capability for parallel instructions.
5124 (supported-systems '("i686-linux" "x86_64-linux"))
35aa90a1
RW
5125 (license license:bsd-3)))
5126
7b3df1e5
BW
5127(define-public r-vegan
5128 (package
5129 (name "r-vegan")
2c575bb9 5130 (version "2.4-1")
7b3df1e5
BW
5131 (source
5132 (origin
5133 (method url-fetch)
5134 (uri (cran-uri "vegan" version))
5135 (sha256
5136 (base32
2c575bb9 5137 "0i0c7rc0nzgbysd1nlxzxd2rvy75qcnw3yc7nggzqjzzj5d7yzsd"))))
7b3df1e5
BW
5138 (build-system r-build-system)
5139 (arguments
5140 `(#:phases
5141 (modify-phases %standard-phases
5142 (add-after 'unpack 'revert-test-deletion
5143 ;; The distributed sources do not include tests with the CRAN
5144 ;; package. Here we revert the commit
5145 ;; `591d0e8ba1deaaf82445474ec6619c0b43db4e63' which deletes these
5146 ;; tests. There are plans to not delete tests in future as
5147 ;; documented at https://github.com/vegandevs/vegan/issues/181.
5148 (lambda* (#:key inputs #:allow-other-keys)
5149 (zero?
5150 (system* "patch" "-R" "-p1" "-i"
5151 (assoc-ref inputs "r-vegan-delete-tests-patch"))))))))
5152 (native-inputs
5153 `(("gfortran" ,gfortran)
5154 ("r-knitr" ,r-knitr)
5155 ("r-vegan-delete-tests-patch"
5156 ,(origin
5157 (method url-fetch)
5158 (uri (string-append
5159 "https://github.com/vegandevs/vegan/commit/"
5160 "591d0e8ba1deaaf82445474ec6619c0b43db4e63.patch"))
5161 (sha256
5162 (base32
5163 "0b1bi7y4jjdl3ph721vm9apm51dr2z9piwvhy4355sf2b4kyyj5a"))))))
5164 (propagated-inputs
5165 `(("r-cluster" ,r-cluster)
5166 ("r-lattice" ,r-lattice)
5167 ("r-mgcv" ,r-mgcv)
5168 ("r-permute" ,r-permute)))
5169 (home-page "https://cran.r-project.org/web/packages/vegan")
5170 (synopsis "Functions for community ecology")
5171 (description
5172 "The vegan package provides tools for descriptive community ecology. It
5173has most basic functions of diversity analysis, community ordination and
5174dissimilarity analysis. Most of its multivariate tools can be used for other
5175data types as well.")
5176 (license license:gpl2+)))
5177
8c6de588
RW
5178(define-public r-annotate
5179 (package
5180 (name "r-annotate")
9beba74f 5181 (version "1.52.0")
8c6de588
RW
5182 (source
5183 (origin
5184 (method url-fetch)
5185 (uri (bioconductor-uri "annotate" version))
5186 (sha256
5187 (base32
9beba74f 5188 "1fd2csq7dcs2gwndgwdx2nwkymz8gsmlnqqzv3p0vjjsvvq5n2a8"))))
8c6de588
RW
5189 (build-system r-build-system)
5190 (propagated-inputs
5191 `(("r-annotationdbi" ,r-annotationdbi)
5192 ("r-biobase" ,r-biobase)
5193 ("r-biocgenerics" ,r-biocgenerics)
5194 ("r-dbi" ,r-dbi)
5195 ("r-xml" ,r-xml)
5196 ("r-xtable" ,r-xtable)))
5197 (home-page
5198 "http://bioconductor.org/packages/annotate")
5199 (synopsis "Annotation for microarrays")
5200 (description "This package provides R enviroments for the annotation of
5201microarrays.")
5202 (license license:artistic2.0)))
5203
07a664cd
RW
5204(define-public r-geneplotter
5205 (package
5206 (name "r-geneplotter")
6faa2a48 5207 (version "1.52.0")
07a664cd
RW
5208 (source
5209 (origin
5210 (method url-fetch)
5211 (uri (bioconductor-uri "geneplotter" version))
5212 (sha256
5213 (base32
6faa2a48 5214 "1p6yvxi243irhjxwm97hp73abhwampj0myyf8z00ij166674pc7h"))))
07a664cd
RW
5215 (build-system r-build-system)
5216 (propagated-inputs
5217 `(("r-annotate" ,r-annotate)
5218 ("r-annotationdbi" ,r-annotationdbi)
5219 ("r-biobase" ,r-biobase)
5220 ("r-biocgenerics" ,r-biocgenerics)
5221 ("r-lattice" ,r-lattice)
5222 ("r-rcolorbrewer" ,r-rcolorbrewer)))
5223 (home-page "http://bioconductor.org/packages/geneplotter")
5224 (synopsis "Graphics functions for genomic data")
5225 (description
5226 "This package provides functions for plotting genomic data.")
5227 (license license:artistic2.0)))
5228
2301fd3e
RW
5229(define-public r-genefilter
5230 (package
5231 (name "r-genefilter")
1e66e719 5232 (version "1.56.0")
2301fd3e
RW
5233 (source
5234 (origin
5235 (method url-fetch)
5236 (uri (bioconductor-uri "genefilter" version))
5237 (sha256
5238 (base32
1e66e719 5239 "1vzgciqd09csqcw9qync8blsv51ylrd86a65iadgyy6j26g01fwd"))))
2301fd3e
RW
5240 (build-system r-build-system)
5241 (native-inputs
5242 `(("gfortran" ,gfortran)))
5243 (propagated-inputs
5244 `(("r-annotate" ,r-annotate)
5245 ("r-annotationdbi" ,r-annotationdbi)
5246 ("r-biobase" ,r-biobase)
5247 ("r-s4vectors" ,r-s4vectors)))
5248 (home-page "http://bioconductor.org/packages/genefilter")
5249 (synopsis "Filter genes from high-throughput experiments")
5250 (description
5251 "This package provides basic functions for filtering genes from
5252high-throughput sequencing experiments.")
5253 (license license:artistic2.0)))
5254
ad34f0ac
RW
5255(define-public r-deseq2
5256 (package
5257 (name "r-deseq2")
e1db8dd8 5258 (version "1.14.0")
ad34f0ac
RW
5259 (source
5260 (origin
5261 (method url-fetch)
5262 (uri (bioconductor-uri "DESeq2" version))
5263 (sha256
5264 (base32
e1db8dd8 5265 "0kq06jy4xg5ii3a9l62f17kirsfx0gsiwq6mhiy985cqzpdn893g"))))
ad34f0ac
RW
5266 (properties `((upstream-name . "DESeq2")))
5267 (build-system r-build-system)
5268 (arguments
5269 `(#:phases
5270 (modify-phases %standard-phases
5271 (add-after 'unpack 'link-against-armadillo
5272 (lambda _
5273 (substitute* "src/Makevars"
5274 (("PKG_LIBS =" prefix)
5275 (string-append prefix "-larmadillo"))))))))
5276 (propagated-inputs
5277 `(("r-biobase" ,r-biobase)
5278 ("r-biocgenerics" ,r-biocgenerics)
5279 ("r-biocparallel" ,r-biocparallel)
5280 ("r-genefilter" ,r-genefilter)
5281 ("r-geneplotter" ,r-geneplotter)
5282 ("r-genomicranges" ,r-genomicranges)
5283 ("r-ggplot2" ,r-ggplot2)
5284 ("r-hmisc" ,r-hmisc)
5285 ("r-iranges" ,r-iranges)
5286 ("r-locfit" ,r-locfit)
5287 ("r-rcpp" ,r-rcpp)
5288 ("r-rcpparmadillo" ,r-rcpparmadillo)
5289 ("r-s4vectors" ,r-s4vectors)
5290 ("r-summarizedexperiment" ,r-summarizedexperiment)))
5291 (home-page "http://bioconductor.org/packages/DESeq2")
5292 (synopsis "Differential gene expression analysis")
5293 (description
5294 "This package provides functions to estimate variance-mean dependence in
5295count data from high-throughput nucleotide sequencing assays and test for
5296differential expression based on a model using the negative binomial
5297distribution.")
5298 (license license:lgpl3+)))
5299
e8163773
RW
5300(define-public r-annotationforge
5301 (package
5302 (name "r-annotationforge")
55cd914c 5303 (version "1.16.0")
e8163773
RW
5304 (source
5305 (origin
5306 (method url-fetch)
5307 (uri (bioconductor-uri "AnnotationForge" version))
5308 (sha256
5309 (base32
55cd914c 5310 "02msyb9p3hywrryx00zpjkjl126mrv827i1ah1092s0cplm6xxvf"))))
e8163773
RW
5311 (properties
5312 `((upstream-name . "AnnotationForge")))
5313 (build-system r-build-system)
5314 (propagated-inputs
5315 `(("r-annotationdbi" ,r-annotationdbi)
5316 ("r-biobase" ,r-biobase)
5317 ("r-biocgenerics" ,r-biocgenerics)
5318 ("r-dbi" ,r-dbi)
55cd914c 5319 ("r-rcurl" ,r-rcurl)
e8163773
RW
5320 ("r-rsqlite" ,r-rsqlite)
5321 ("r-s4vectors" ,r-s4vectors)
5322 ("r-xml" ,r-xml)))
5323 (home-page "http://bioconductor.org/packages/AnnotationForge")
5324 (synopsis "Code for building annotation database packages")
5325 (description
5326 "This package provides code for generating Annotation packages and their
5327databases. Packages produced are intended to be used with AnnotationDbi.")
5328 (license license:artistic2.0)))
5329
cd9e7dc7
RW
5330(define-public r-rbgl
5331 (package
5332 (name "r-rbgl")
700c780d 5333 (version "1.50.0")
cd9e7dc7
RW
5334 (source
5335 (origin
5336 (method url-fetch)
5337 (uri (bioconductor-uri "RBGL" version))
5338 (sha256
5339 (base32
700c780d 5340 "1q14m8w6ih56v680kf3d9wh1qbgp7af33kz3cxafdf1vvzx9km08"))))
cd9e7dc7
RW
5341 (properties `((upstream-name . "RBGL")))
5342 (build-system r-build-system)
5343 (propagated-inputs `(("r-graph" ,r-graph)))
5344 (home-page "http://www.bioconductor.org/packages/RBGL")
5345 (synopsis "Interface to the Boost graph library")
5346 (description
5347 "This package provides a fairly extensive and comprehensive interface to
5348the graph algorithms contained in the Boost library.")
5349 (license license:artistic2.0)))
5350
ad740ff8
RW
5351(define-public r-gseabase
5352 (package
5353 (name "r-gseabase")
558d02c3 5354 (version "1.36.0")
ad740ff8
RW
5355 (source
5356 (origin
5357 (method url-fetch)
5358 (uri (bioconductor-uri "GSEABase" version))
5359 (sha256
5360 (base32
558d02c3 5361 "0l2x7yj7lfb0m2dmsav5ib026dikpgl4crdckrnj776yy08lgxpj"))))
ad740ff8
RW
5362 (properties `((upstream-name . "GSEABase")))
5363 (build-system r-build-system)
5364 (propagated-inputs
5365 `(("r-annotate" ,r-annotate)
5366 ("r-annotationdbi" ,r-annotationdbi)
5367 ("r-biobase" ,r-biobase)
5368 ("r-biocgenerics" ,r-biocgenerics)
5369 ("r-graph" ,r-graph)
5370 ("r-xml" ,r-xml)))
5371 (home-page "http://bioconductor.org/packages/GSEABase")
5372 (synopsis "Gene set enrichment data structures and methods")
5373 (description
5374 "This package provides classes and methods to support @dfn{Gene Set
5375Enrichment Analysis} (GSEA).")
5376 (license license:artistic2.0)))
5377
1a1931f7
RW
5378(define-public r-category
5379 (package
5380 (name "r-category")
816599c1 5381 (version "2.40.0")
1a1931f7
RW
5382 (source
5383 (origin
5384 (method url-fetch)
5385 (uri (bioconductor-uri "Category" version))
5386 (sha256
5387 (base32
816599c1 5388 "16ncwz7b4y48k0p3fvbrbmvf7nfz63li9ysgcl8kp9kl4hg7llng"))))
1a1931f7
RW
5389 (properties `((upstream-name . "Category")))
5390 (build-system r-build-system)
5391 (propagated-inputs
5392 `(("r-annotate" ,r-annotate)
5393 ("r-annotationdbi" ,r-annotationdbi)
5394 ("r-biobase" ,r-biobase)
5395 ("r-biocgenerics" ,r-biocgenerics)
5396 ("r-genefilter" ,r-genefilter)
5397 ("r-graph" ,r-graph)
5398 ("r-gseabase" ,r-gseabase)
5399 ("r-matrix" ,r-matrix)
5400 ("r-rbgl" ,r-rbgl)
5401 ("r-rsqlite" ,r-rsqlite)))
5402 (home-page "http://bioconductor.org/packages/Category")
5403 (synopsis "Category analysis")
5404 (description
5405 "This package provides a collection of tools for performing category
5406analysis.")
5407 (license license:artistic2.0)))
5408
89f40c5e
RW
5409(define-public r-gostats
5410 (package
5411 (name "r-gostats")
ae6899b6 5412 (version "2.40.0")
89f40c5e
RW
5413 (source
5414 (origin
5415 (method url-fetch)
5416 (uri (bioconductor-uri "GOstats" version))
5417 (sha256
5418 (base32
ae6899b6 5419 "0g2czm94zhzx92z7y2r4mjfxhwml7bhab2db6820ks8nkw1zvr9n"))))
89f40c5e
RW
5420 (properties `((upstream-name . "GOstats")))
5421 (build-system r-build-system)
5422 (propagated-inputs
5423 `(("r-annotate" ,r-annotate)
5424 ("r-annotationdbi" ,r-annotationdbi)
5425 ("r-annotationforge" ,r-annotationforge)
5426 ("r-biobase" ,r-biobase)
5427 ("r-category" ,r-category)
5428 ("r-go-db" ,r-go-db)
5429 ("r-graph" ,r-graph)
5430 ("r-rbgl" ,r-rbgl)))
5431 (home-page "http://bioconductor.org/packages/GOstats")
5432 (synopsis "Tools for manipulating GO and microarrays")
5433 (description
5434 "This package provides a set of tools for interacting with GO and
5435microarray data. A variety of basic manipulation tools for graphs, hypothesis
5436testing and other simple calculations.")
5437 (license license:artistic2.0)))
5438
cb99d457
RW
5439(define-public r-shortread
5440 (package
5441 (name "r-shortread")
b87b41a7 5442 (version "1.32.0")
cb99d457
RW
5443 (source
5444 (origin
5445 (method url-fetch)
5446 (uri (bioconductor-uri "ShortRead" version))
5447 (sha256
5448 (base32
b87b41a7 5449 "0mjdlg92x5qw4x2djc4dv5lxwl7ai6ix56nnf86zr07jk8vc7yls"))))
cb99d457
RW
5450 (properties `((upstream-name . "ShortRead")))
5451 (build-system r-build-system)
5452 (inputs
5453 `(("zlib" ,zlib)))
5454 (propagated-inputs
5455 `(("r-biobase" ,r-biobase)
5456 ("r-biocgenerics" ,r-biocgenerics)
5457 ("r-biocparallel" ,r-biocparallel)
5458 ("r-biostrings" ,r-biostrings)
5459 ("r-genomeinfodb" ,r-genomeinfodb)
5460 ("r-genomicalignments" ,r-genomicalignments)
5461 ("r-genomicranges" ,r-genomicranges)
5462 ("r-hwriter" ,r-hwriter)
5463 ("r-iranges" ,r-iranges)
5464 ("r-lattice" ,r-lattice)
5465 ("r-latticeextra" ,r-latticeextra)
5466 ("r-rsamtools" ,r-rsamtools)
5467 ("r-s4vectors" ,r-s4vectors)
5468 ("r-xvector" ,r-xvector)
5469 ("r-zlibbioc" ,r-zlibbioc)))
5470 (home-page "http://bioconductor.org/packages/ShortRead")
5471 (synopsis "FASTQ input and manipulation tools")
5472 (description
5473 "This package implements sampling, iteration, and input of FASTQ files.
5474It includes functions for filtering and trimming reads, and for generating a
5475quality assessment report. Data are represented as
5476@code{DNAStringSet}-derived objects, and easily manipulated for a diversity of
5477purposes. The package also contains legacy support for early single-end,
5478ungapped alignment formats.")
5479 (license license:artistic2.0)))
5480
7f903d73
RW
5481(define-public r-systempiper
5482 (package
5483 (name "r-systempiper")
1265d387 5484 (version "1.8.1")
7f903d73
RW
5485 (source
5486 (origin
5487 (method url-fetch)
5488 (uri (bioconductor-uri "systemPipeR" version))
5489 (sha256
5490 (base32
1265d387 5491 "0hyi841w8fm2yzpm6lwqi3jz5kc8ny8dy5p29dxynzaw5bpjw56d"))))
7f903d73
RW
5492 (properties `((upstream-name . "systemPipeR")))
5493 (build-system r-build-system)
5494 (propagated-inputs
5495 `(("r-annotate" ,r-annotate)
5496 ("r-batchjobs" ,r-batchjobs)
5497 ("r-biocgenerics" ,r-biocgenerics)
5498 ("r-biostrings" ,r-biostrings)
5499 ("r-deseq2" ,r-deseq2)
5500 ("r-edger" ,r-edger)
5501 ("r-genomicfeatures" ,r-genomicfeatures)
5502 ("r-genomicranges" ,r-genomicranges)
5503 ("r-ggplot2" ,r-ggplot2)
5504 ("r-go-db" ,r-go-db)
5505 ("r-gostats" ,r-gostats)
5506 ("r-limma" ,r-limma)
5507 ("r-pheatmap" ,r-pheatmap)
5508 ("r-rjson" ,r-rjson)
5509 ("r-rsamtools" ,r-rsamtools)
5510 ("r-shortread" ,r-shortread)
5511 ("r-summarizedexperiment" ,r-summarizedexperiment)
5512 ("r-variantannotation" ,r-variantannotation)))
5513 (home-page "https://github.com/tgirke/systemPipeR")
5514 (synopsis "Next generation sequencing workflow and reporting environment")
5515 (description
5516 "This R package provides tools for building and running automated
5517end-to-end analysis workflows for a wide range of @dfn{next generation
5518sequence} (NGS) applications such as RNA-Seq, ChIP-Seq, VAR-Seq and Ribo-Seq.
5519Important features include a uniform workflow interface across different NGS
5520applications, automated report generation, and support for running both R and
5521command-line software, such as NGS aligners or peak/variant callers, on local
5522computers or compute clusters. Efficient handling of complex sample sets and
5523experimental designs is facilitated by a consistently implemented sample
5524annotation infrastructure.")
5525 (license license:artistic2.0)))
5526
684f29bd
RW
5527(define-public r-grohmm
5528 (package
5529 (name "r-grohmm")
c9b5046a 5530 (version "1.8.0")
684f29bd
RW
5531 (source
5532 (origin
5533 (method url-fetch)
5534 (uri (bioconductor-uri "groHMM" version))
5535 (sha256
5536 (base32
c9b5046a 5537 "0d91nyhqbi5hv3mgmr2z0g29wg2md26g0hyv5mgapmz20cd9zi4y"))))
684f29bd
RW
5538 (properties `((upstream-name . "groHMM")))
5539 (build-system r-build-system)
5540 (propagated-inputs
5541 `(("r-genomeinfodb" ,r-genomeinfodb)
5542 ("r-genomicalignments" ,r-genomicalignments)
5543 ("r-genomicranges" ,r-genomicranges)
5544 ("r-iranges" ,r-iranges)
5545 ("r-rtracklayer" ,r-rtracklayer)
5546 ("r-s4vectors" ,r-s4vectors)))
5547 (home-page "https://github.com/Kraus-Lab/groHMM")
5548 (synopsis "GRO-seq analysis pipeline")
5549 (description
5550 "This package provides a pipeline for the analysis of GRO-seq data.")
5551 (license license:gpl3+)))
5552
f3cfe451
RW
5553(define-public r-txdb-hsapiens-ucsc-hg19-knowngene
5554 (package
5555 (name "r-txdb-hsapiens-ucsc-hg19-knowngene")
5556 (version "3.2.2")
5557 (source (origin
5558 (method url-fetch)
5559 ;; We cannot use bioconductor-uri here because this tarball is
5560 ;; located under "data/annotation/" instead of "bioc/".
5561 (uri (string-append "http://bioconductor.org/packages/"
5562 "release/data/annotation/src/contrib"
5563 "/TxDb.Hsapiens.UCSC.hg19.knownGene_"
5564 version ".tar.gz"))
5565 (sha256
5566 (base32
5567 "1sajhcqqwazgz2lqbik7rd935i7kpnh08zxbp2ra10j72yqy4g86"))))
5568 (properties
5569 `((upstream-name . "TxDb.Hsapiens.UCSC.hg19.knownGene")))
5570 (build-system r-build-system)
5571 ;; As this package provides little more than a very large data file it
5572 ;; doesn't make sense to build substitutes.
5573 (arguments `(#:substitutable? #f))
5574 (propagated-inputs
5575 `(("r-genomicfeatures" ,r-genomicfeatures)))
5576 (home-page
5577 "http://bioconductor.org/packages/TxDb.Hsapiens.UCSC.hg19.knownGene/")
5578 (synopsis "Annotation package for human genome in TxDb format")
5579 (description
5580 "This package provides an annotation database of Homo sapiens genome
5581data. It is derived from the UCSC hg19 genome and based on the \"knownGene\"
5582track. The database is exposed as a @code{TxDb} object.")
5583 (license license:artistic2.0)))
5584
a2950fa4
BW
5585(define-public vsearch
5586 (package
5587 (name "vsearch")
3dd50d97 5588 (version "2.3.0")
a2950fa4
BW
5589 (source
5590 (origin
5591 (method url-fetch)
5592 (uri (string-append
5593 "https://github.com/torognes/vsearch/archive/v"
5594 version ".tar.gz"))
5595 (file-name (string-append name "-" version ".tar.gz"))
5596 (sha256
5597 (base32
3dd50d97 5598 "1r8fk3whkil348y5hfsd4r56qjmchhq4nxm6s7ra5rlisw0mf9fy"))
a2950fa4
BW
5599 (modules '((guix build utils)))
5600 (snippet
5601 '(begin
5602 ;; Remove bundled cityhash and '-mtune=native'.
5603 (substitute* "src/Makefile.am"
5604 (("^AM_CXXFLAGS=-I\\$\\{srcdir\\}/cityhash \
5605-O3 -mtune=native -Wall -Wsign-compare")
5606 (string-append "AM_CXXFLAGS=-lcityhash"
5607 " -O3 -Wall -Wsign-compare"))
cf6edaba 5608 (("^__top_builddir__bin_vsearch_SOURCES = city.h \\\\")
a2950fa4 5609 "__top_builddir__bin_vsearch_SOURCES = \\")
cf6edaba
BW
5610 (("^city.h \\\\") "\\")
5611 (("^citycrc.h \\\\") "\\")
5612 (("^libcityhash_a.*") "")
5613 (("noinst_LIBRARIES = libcpu_sse2.a libcpu_ssse3.a \
5614libcityhash.a")
5615 "noinst_LIBRARIES = libcpu_sse2.a libcpu_ssse3.a")
5616 (("__top_builddir__bin_vsearch_LDADD = libcpu_ssse3.a \
5617libcpu_sse2.a libcityhash.a")
5618 "__top_builddir__bin_vsearch_LDADD = libcpu_ssse3.a \
5619libcpu_sse2.a -lcityhash"))
a2950fa4 5620 (substitute* "src/vsearch.h"
cf6edaba
BW
5621 (("^\\#include \"city.h\"") "#include <city.h>")
5622 (("^\\#include \"citycrc.h\"") "#include <citycrc.h>"))
5623 (delete-file "src/city.h")
5624 (delete-file "src/citycrc.h")
5625 (delete-file "src/city.cc")
a2950fa4
BW
5626 #t))))
5627 (build-system gnu-build-system)
5628 (arguments
5629 `(#:phases
5630 (modify-phases %standard-phases
5631 (add-before 'configure 'autogen
5632 (lambda _ (zero? (system* "autoreconf" "-vif")))))))
5633 (inputs
5634 `(("zlib" ,zlib)
5635 ("bzip2" ,bzip2)
5636 ("cityhash" ,cityhash)))
5637 (native-inputs
5638 `(("autoconf" ,autoconf)
5639 ("automake" ,automake)))
5640 (synopsis "Sequence search tools for metagenomics")
5641 (description
5642 "VSEARCH supports DNA sequence searching, clustering, chimera detection,
5643dereplication, pairwise alignment, shuffling, subsampling, sorting and
5644masking. The tool takes advantage of parallelism in the form of SIMD
5645vectorization as well as multiple threads to perform accurate alignments at
5646high speed. VSEARCH uses an optimal global aligner (full dynamic programming
5647Needleman-Wunsch).")
5648 (home-page "https://github.com/torognes/vsearch")
6f04e515
BW
5649 ;; vsearch uses non-portable SSE intrinsics so building fails on other
5650 ;; platforms.
5651 (supported-systems '("x86_64-linux"))
a2950fa4
BW
5652 ;; Dual licensed; also includes public domain source.
5653 (license (list license:gpl3 license:bsd-2))))
5654
07837874
RW
5655(define-public pardre
5656 (package
5657 (name "pardre")
5658 (version "1.1.5")
5659 (source
5660 (origin
5661 (method url-fetch)
5662 (uri (string-append "mirror://sourceforge/pardre/ParDRe-rel"
5663 version ".tar.gz"))
5664 (sha256
5665 (base32
5666 "0zkyjzv4s8q2h5npalhirbk17r5b1h0n2a42mh7njzlf047h9bhy"))))
5667 (build-system gnu-build-system)
5668 (arguments
5669 `(#:tests? #f ; no tests included
5670 #:phases
5671 (modify-phases %standard-phases
5672 (delete 'configure)
5673 (replace 'install
5674 (lambda* (#:key outputs #:allow-other-keys)
5675 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
5676 (mkdir-p bin)
5677 (install-file "ParDRe" bin)
5678 #t))))))
5679 (inputs
5680 `(("openmpi" ,openmpi)
5681 ("zlib" ,zlib)))
5682 (synopsis "Parallel tool to remove duplicate DNA reads")
5683 (description
5684 "ParDRe is a parallel tool to remove duplicate genetic sequence reads.
5685Duplicate reads can be seen as identical or nearly identical sequences with
5686some mismatches. This tool lets users avoid the analysis of unnecessary
5687reads, reducing the time of subsequent procedures with the
5688dataset (e.g. assemblies, mappings, etc.). The tool is implemented with MPI
5689in order to exploit the parallel capabilities of multicore clusters. It is
5690faster than multithreaded counterparts (end of 2015) for the same number of
5691cores and, thanks to the message-passing technology, it can be executed on
5692clusters.")
5693 (home-page "https://sourceforge.net/projects/pardre/")
5694 (license license:gpl3+)))
5695
e4a44a6a
BW
5696(define-public ruby-bio-kseq
5697 (package
5698 (name "ruby-bio-kseq")
5699 (version "0.0.2")
5700 (source
5701 (origin
5702 (method url-fetch)
5703 (uri (rubygems-uri "bio-kseq" version))
5704 (sha256
5705 (base32
5706 "1xyaha46khb5jc6wzkbf7040jagac49jbimn0vcrzid0j8jdikrz"))))
5707 (build-system ruby-build-system)
5708 (arguments
5709 `(#:test-target "spec"))
5710 (native-inputs
5711 `(("bundler" ,bundler)
5712 ("ruby-rspec" ,ruby-rspec)
5713 ("ruby-rake-compiler" ,ruby-rake-compiler)))
5714 (inputs
5715 `(("zlib" ,zlib)))
5716 (synopsis "Ruby bindings for the kseq.h FASTA/Q parser")
5717 (description
5718 "@code{Bio::Kseq} provides ruby bindings to the @code{kseq.h} FASTA and
5719FASTQ parsing code. It provides a fast iterator over sequences and their
5720quality scores.")
5721 (home-page "https://github.com/gusevfe/bio-kseq")
5722 (license license:expat)))
5723
9c38b540
PP
5724(define-public bio-locus
5725 (package
5726 (name "bio-locus")
5727 (version "0.0.7")
5728 (source
5729 (origin
5730 (method url-fetch)
5731 (uri (rubygems-uri "bio-locus" version))
5732 (sha256
5733 (base32
5734 "02vmrxyimkj9sahsp4zhfhnmbvz6dbbqz1y01vglf8cbwvkajfl0"))))
5735 (build-system ruby-build-system)
5736 (native-inputs
5737 `(("ruby-rspec" ,ruby-rspec)))
5738 (synopsis "Tool for fast querying of genome locations")
5739 (description
5740 "Bio-locus is a tabix-like tool for fast querying of genome
5741locations. Many file formats in bioinformatics contain records that
5742start with a chromosome name and a position for a SNP, or a start-end
5743position for indels. Bio-locus allows users to store this chr+pos or
5744chr+pos+alt information in a database.")
5745 (home-page "https://github.com/pjotrp/bio-locus")
5746 (license license:expat)))
edb15985 5747
b2bddb07
PP
5748(define-public bio-blastxmlparser
5749 (package
5750 (name "bio-blastxmlparser")
5751 (version "2.0.4")
5752 (source (origin
5753 (method url-fetch)
5754 (uri (rubygems-uri "bio-blastxmlparser" version))
5755 (sha256
5756 (base32
5757 "1wf4qygcmdjgcqm6flmvsagfr1gs9lf63mj32qv3z1f481zc5692"))))
5758 (build-system ruby-build-system)
5759 (propagated-inputs
5760 `(("ruby-bio-logger" ,ruby-bio-logger)
5761 ("ruby-nokogiri" ,ruby-nokogiri)))
5762 (inputs
5763 `(("ruby-rspec" ,ruby-rspec)))
5764 (synopsis "Fast big data BLAST XML parser and library")
5765 (description
5766 "Very fast parallel big-data BLAST XML file parser which can be used as
5767command line utility. Use blastxmlparser to: Parse BLAST XML; filter output;
5768generate FASTA, JSON, YAML, RDF, JSON-LD, HTML, CSV, tabular output etc.")
5769 (home-page "http://github.com/pjotrp/blastxmlparser")
5770 (license license:expat)))
5771
edb15985
PP
5772(define-public bioruby
5773 (package
5774 (name "bioruby")
5775 (version "1.5.0")
5776 (source
5777 (origin
5778 (method url-fetch)
5779 (uri (rubygems-uri "bio" version))
5780 (sha256
5781 (base32
5782 "01k2fyjl5fpx4zn8g6gqiqvsg2j1fgixrs9p03vzxckynxdq3wmc"))))
5783 (build-system ruby-build-system)
5784 (propagated-inputs
5785 `(("ruby-libxml" ,ruby-libxml)))
5786 (native-inputs
5787 `(("which" ,which))) ; required for test phase
5788 (arguments
5789 `(#:phases
5790 (modify-phases %standard-phases
5791 (add-before 'build 'patch-test-command
5792 (lambda _
5793 (substitute* '("test/functional/bio/test_command.rb")
5794 (("/bin/sh") (which "sh")))
5795 (substitute* '("test/functional/bio/test_command.rb")
5796 (("/bin/ls") (which "ls")))
5797 (substitute* '("test/functional/bio/test_command.rb")
5798 (("which") (which "which")))
5799 (substitute* '("test/functional/bio/test_command.rb",
5800 "test/data/command/echoarg2.sh")
5801 (("/bin/echo") (which "echo")))
5802 #t)))))
5803 (synopsis "Ruby library, shell and utilities for bioinformatics")
5804 (description "BioRuby comes with a comprehensive set of Ruby development
5805tools and libraries for bioinformatics and molecular biology. BioRuby has
5806components for sequence analysis, pathway analysis, protein modelling and
5807phylogenetic analysis; it supports many widely used data formats and provides
5808easy access to databases, external programs and public web services, including
5809BLAST, KEGG, GenBank, MEDLINE and GO.")
5810 (home-page "http://bioruby.org/")
5811 ;; Code is released under Ruby license, except for setup
5812 ;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+)
5813 (license (list license:ruby license:lgpl2.1+ license:gpl2+ ))))
a5002ae7 5814
9fba89e8
RW
5815(define-public r-acsnminer
5816 (package
5817 (name "r-acsnminer")
0b54b4c9 5818 (version "0.16.8.25")
9fba89e8
RW
5819 (source (origin
5820 (method url-fetch)
5821 (uri (cran-uri "ACSNMineR" version))
5822 (sha256
5823 (base32
0b54b4c9 5824 "0gh604s8qall6zfjlwcg2ilxjvz08dplf9k5g47idhv43scm748l"))))
9fba89e8
RW
5825 (properties `((upstream-name . "ACSNMineR")))
5826 (build-system r-build-system)
5827 (propagated-inputs
5828 `(("r-ggplot2" ,r-ggplot2)
5829 ("r-gridextra" ,r-gridextra)))
5830 (home-page "http://cran.r-project.org/web/packages/ACSNMineR")
5831 (synopsis "Gene enrichment analysis")
5832 (description
5833 "This package provides tools to compute and represent gene set enrichment
5834or depletion from your data based on pre-saved maps from the @dfn{Atlas of
5835Cancer Signalling Networks} (ACSN) or user imported maps. The gene set
5836enrichment can be run with hypergeometric test or Fisher exact test, and can
5837use multiple corrections. Visualization of data can be done either by
5838barplots or heatmaps.")
5839 (license license:gpl2+)))
5840
d29b25c4
RW
5841(define-public r-biocgenerics
5842 (package
5843 (name "r-biocgenerics")
f0b297e1 5844 (version "0.20.0")
d29b25c4
RW
5845 (source (origin
5846 (method url-fetch)
5847 (uri (bioconductor-uri "BiocGenerics" version))
5848 (sha256
5849 (base32
f0b297e1 5850 "06szdz7dfs1iyv5zdl4fjzad18nnf1zf3wvglc6c6yd9mrqlf7vk"))))
d29b25c4 5851 (properties
1d216b6e 5852 `((upstream-name . "BiocGenerics")))
d29b25c4
RW
5853 (build-system r-build-system)
5854 (home-page "http://bioconductor.org/packages/BiocGenerics")
5855 (synopsis "S4 generic functions for Bioconductor")
5856 (description
5857 "This package provides S4 generic functions needed by many Bioconductor
5858packages.")
5859 (license license:artistic2.0)))
5860
eb24341f
RJ
5861(define-public r-biocinstaller
5862 (package
5863 (name "r-biocinstaller")
5c7b04e0 5864 (version "1.24.0")
eb24341f
RJ
5865 (source (origin
5866 (method url-fetch)
5867 (uri (bioconductor-uri "BiocInstaller" version))
5868 (sha256
5869 (base32
5c7b04e0 5870 "0y1y5wmy6lzjqx3hdg15n91d417ccjj8dbvdkhmp99bs5aijwcpn"))))
eb24341f
RJ
5871 (properties
5872 `((upstream-name . "BiocInstaller")))
5873 (build-system r-build-system)
5874 (home-page "http://bioconductor.org/packages/BiocInstaller")
5875 (synopsis "Install Bioconductor packages")
5876 (description "This package is used to install and update R packages from
5877Bioconductor, CRAN, and Github.")
5878 (license license:artistic2.0)))
5879
207ce8fb
RJ
5880(define-public r-biocviews
5881 (package
5882 (name "r-biocviews")
5883 (version "1.42.0")
5884 (source (origin
5885 (method url-fetch)
5886 (uri (bioconductor-uri "biocViews" version))
5887 (sha256
5888 (base32
5889 "07rjk10b91pkriyq297w86199r2d3sfji3ggs9mq2gyalsa8y4b6"))))
5890 (properties
5891 `((upstream-name . "biocViews")))
5892 (build-system r-build-system)
5893 (propagated-inputs
5894 `(("r-biobase" ,r-biobase)
5895 ("r-graph" ,r-graph)
5896 ("r-rbgl" ,r-rbgl)
5897 ("r-rcurl" ,r-rcurl)
5898 ("r-xml" ,r-xml)
5899 ("r-knitr" ,r-knitr)
5900 ("r-runit" ,r-runit)))
5901 (home-page "http://bioconductor.org/packages/biocViews")
5902 (synopsis "Bioconductor package categorization helper")
5903 (description "The purpose of biocViews is to create HTML pages that
5904categorize packages in a Bioconductor package repository according to keywords,
5905also known as views, in a controlled vocabulary.")
5906 (license license:artistic2.0)))
5907
99df12cd
RJ
5908(define-public r-biocstyle
5909 (package
5910 (name "r-biocstyle")
5911 (version "2.2.0")
5912 (source (origin
5913 (method url-fetch)
5914 (uri (bioconductor-uri "BiocStyle" version))
5915 (sha256
5916 (base32
5917 "0qbk23fz8cn260isd9xlh9lxfj4adar6iqzai01c4kz0p31f45za"))))
5918 (properties
5919 `((upstream-name . "BiocStyle")))
5920 (build-system r-build-system)
5921 (home-page "http://bioconductor.org/packages/BiocStyle")
5922 (synopsis "Bioconductor formatting styles")
5923 (description "This package provides standard formatting styles for
5924Bioconductor PDF and HTML documents. Package vignettes illustrate use and
5925functionality.")
5926 (license license:artistic2.0)))
5927
4644644a
RJ
5928(define-public r-bioccheck
5929 (package
5930 (name "r-bioccheck")
5931 (version "1.10.0")
5932 (source (origin
5933 (method url-fetch)
5934 (uri (bioconductor-uri "BiocCheck" version))
5935 (sha256
5936 (base32
5937 "1rfy37xg1nc2cmgbclvzsi7sgmdcdjiahsx9crgx3yaw7kxgiack"))))
5938 (properties
5939 `((upstream-name . "BiocCheck")))
5940 (build-system r-build-system)
5941 (arguments
5942 '(#:phases
5943 (modify-phases %standard-phases
5944 ;; This package can be used by calling BiocCheck(<package>) from
5945 ;; within R, or by running R CMD BiocCheck <package>. This phase
5946 ;; makes sure the latter works. For this to work, the BiocCheck
5947 ;; script must be somewhere on the PATH (not the R bin directory).
5948 (add-after 'install 'install-bioccheck-subcommand
5949 (lambda* (#:key outputs #:allow-other-keys)
5950 (let* ((out (assoc-ref outputs "out"))
5951 (dest-dir (string-append out "/bin"))
5952 (script-dir
5953 (string-append out "/site-library/BiocCheck/script/")))
5954 (mkdir-p dest-dir)
5955 (symlink (string-append script-dir "/checkBadDeps.R")
5956 (string-append dest-dir "/checkBadDeps.R"))
5957 (symlink (string-append script-dir "/BiocCheck")
5958 (string-append dest-dir "/BiocCheck")))
5959 #t)))))
5960 (native-inputs
5961 `(("which" ,which)))
5962 (propagated-inputs
5963 `(("r-graph" ,r-graph)
5964 ("r-knitr" ,r-knitr)
5965 ("r-httr" ,r-httr)
5966 ("r-optparse" ,r-optparse)
5967 ("r-devtools" ,r-devtools)
5968 ("r-biocinstaller" ,r-biocinstaller)
5969 ("r-biocviews" ,r-biocviews)))
5970 (home-page "http://bioconductor.org/packages/BiocCheck")
5971 (synopsis "Executes Bioconductor-specific package checks")
5972 (description "This package contains tools to perform additional quality
5973checks on R packages that are to be submitted to the Bioconductor repository.")
5974 (license license:artistic2.0)))
5975
2acaaee5
RJ
5976(define-public r-getopt
5977 (package
5978 (name "r-getopt")
5979 (version "1.20.0")
5980 (source
5981 (origin
5982 (method url-fetch)
5983 (uri (cran-uri "getopt" version))
5984 (sha256
5985 (base32
5986 "00f57vgnzmg7cz80rjmjz1556xqcmx8nhrlbbhaq4w7gl2ibl87r"))))
5987 (build-system r-build-system)
5988 (home-page "https://github.com/trevorld/getopt")
5989 (synopsis "Command-line option processor for R")
5990 (description
5991 "This package is designed to be used with Rscript to write shebang
5992scripts that accept short and long options. Many users will prefer to
5993use the packages @code{optparse} or @code{argparse} which add extra
5994features like automatically generated help options and usage texts,
5995support for default values, positional argument support, etc.")
5996 (license license:gpl2+)))
5997
c79ad57a
RJ
5998(define-public r-optparse
5999 (package
6000 (name "r-optparse")
6001 (version "1.3.2")
6002 (source
6003 (origin
6004 (method url-fetch)
6005 (uri (cran-uri "optparse" version))
6006 (sha256
6007 (base32
6008 "1g8as89r91xxi5j5azsd6vrfrhg84mnfx2683j7pacdp8s33radw"))))
6009 (build-system r-build-system)
6010 (propagated-inputs
6011 `(("r-getopt" ,r-getopt)))
6012 (home-page
6013 "https://github.com/trevorld/optparse")
6014 (synopsis "Command line option parser")
6015 (description
6016 "This package provides a command line parser inspired by Python's
6017@code{optparse} library to be used with Rscript to write shebang scripts
6018that accept short and long options.")
6019 (license license:gpl2+)))
6020
247d498a
RJ
6021(define-public r-dnacopy
6022 (package
6023 (name "r-dnacopy")
22c5d736 6024 (version "1.48.0")
247d498a
RJ
6025 (source (origin
6026 (method url-fetch)
6027 (uri (bioconductor-uri "DNAcopy" version))
6028 (sha256
6029 (base32
22c5d736 6030 "1idyvfvy7xx8k9vk00y4k3819qmip8iqm809j3vpxabmsn7r9zyh"))))
247d498a
RJ
6031 (properties
6032 `((upstream-name . "DNAcopy")))
6033 (build-system r-build-system)
6034 (inputs
6035 `(("gfortran" ,gfortran)))
5697fdc3 6036 (home-page "https://bioconductor.org/packages/DNAcopy")
247d498a
RJ
6037 (synopsis "Implementation of a circular binary segmentation algorithm")
6038 (description "This package implements the circular binary segmentation (CBS)
6039algorithm to segment DNA copy number data and identify genomic regions with
6040abnormal copy number.")
6041 (license license:gpl2+)))
6042
7485129e
RW
6043(define-public r-s4vectors
6044 (package
6045 (name "r-s4vectors")
b120c5f3 6046 (version "0.12.0")
7485129e
RW
6047 (source (origin
6048 (method url-fetch)
6049 (uri (bioconductor-uri "S4Vectors" version))
6050 (sha256
6051 (base32
b120c5f3 6052 "0m0npc0vhmcwcxws7v2f8k4hvvrjvnlrsr94klxf4a8m4xw2xzzk"))))
7485129e 6053 (properties
1d216b6e 6054 `((upstream-name . "S4Vectors")))
7485129e
RW
6055 (build-system r-build-system)
6056 (propagated-inputs
6057 `(("r-biocgenerics" ,r-biocgenerics)))
6058 (home-page "http://bioconductor.org/packages/S4Vectors")
6059 (synopsis "S4 implementation of vectors and lists")
6060 (description
6061 "The S4Vectors package defines the @code{Vector} and @code{List} virtual
6062classes and a set of generic functions that extend the semantic of ordinary
6063vectors and lists in R. Package developers can easily implement vector-like
6064or list-like objects as concrete subclasses of @code{Vector} or @code{List}.
6065In addition, a few low-level concrete subclasses of general interest (e.g.
6066@code{DataFrame}, @code{Rle}, and @code{Hits}) are implemented in the
6067S4Vectors package itself.")
6068 (license license:artistic2.0)))
6069
274da826
RW
6070(define-public r-seqinr
6071 (package
6072 (name "r-seqinr")
a46cc723 6073 (version "3.3-3")
274da826
RW
6074 (source
6075 (origin
6076 (method url-fetch)
6077 (uri (cran-uri "seqinr" version))
6078 (sha256
6079 (base32
a46cc723 6080 "0rk4yba8km26c0rh1f4h474zsb5n6kjmqsi55bnzr6p8pymp18hj"))))
274da826
RW
6081 (build-system r-build-system)
6082 (propagated-inputs
3b851cd4
RW
6083 `(("r-ade4" ,r-ade4)
6084 ("r-segmented" ,r-segmented)))
274da826
RW
6085 (inputs
6086 `(("zlib" ,zlib)))
6087 (home-page "http://seqinr.r-forge.r-project.org/")
6088 (synopsis "Biological sequences retrieval and analysis")
6089 (description
6090 "This package provides tools for exploratory data analysis and data
6091visualization of biological sequence (DNA and protein) data. It also includes
6092utilities for sequence data management under the ACNUC system.")
6093 (license license:gpl2+)))
6094
78addcb0
RW
6095(define-public r-iranges
6096 (package
6097 (name "r-iranges")
7f354daf 6098 (version "2.8.0")
78addcb0
RW
6099 (source (origin
6100 (method url-fetch)
6101 (uri (bioconductor-uri "IRanges" version))
6102 (sha256
6103 (base32
7f354daf 6104 "0cdl1sfd3cvf93lnz91fdk64fbg1mnd5g958dwh1il8r358hqq3f"))))
78addcb0 6105 (properties
1d216b6e 6106 `((upstream-name . "IRanges")))
78addcb0
RW
6107 (build-system r-build-system)
6108 (propagated-inputs
6109 `(("r-biocgenerics" ,r-biocgenerics)
6110 ("r-s4vectors" ,r-s4vectors)))
6111 (home-page "http://bioconductor.org/packages/IRanges")
6112 (synopsis "Infrastructure for manipulating intervals on sequences")
6113 (description
6114 "This package provides efficient low-level and highly reusable S4 classes
6115for storing ranges of integers, RLE vectors (Run-Length Encoding), and, more
6116generally, data that can be organized sequentially (formally defined as
6117@code{Vector} objects), as well as views on these @code{Vector} objects.
6118Efficient list-like classes are also provided for storing big collections of
6119instances of the basic classes. All classes in the package use consistent
6120naming and share the same rich and consistent \"Vector API\" as much as
6121possible.")
6122 (license license:artistic2.0)))
6123
bf7764b7
RW
6124(define-public r-genomeinfodb
6125 (package
6126 (name "r-genomeinfodb")
dece310a 6127 (version "1.10.0")
bf7764b7
RW
6128 (source (origin
6129 (method url-fetch)
6130 (uri (bioconductor-uri "GenomeInfoDb" version))
6131 (sha256
6132 (base32
dece310a 6133 "0nhg4bk38gzvf3mvnbqgisbbhfv1kzjld27z1z9knnlkplkiyyyv"))))
bf7764b7 6134 (properties
1d216b6e 6135 `((upstream-name . "GenomeInfoDb")))
bf7764b7
RW
6136 (build-system r-build-system)
6137 (propagated-inputs
6138 `(("r-biocgenerics" ,r-biocgenerics)
6139 ("r-iranges" ,r-iranges)
6140 ("r-s4vectors" ,r-s4vectors)))
6141 (home-page "http://bioconductor.org/packages/GenomeInfoDb")
6142 (synopsis "Utilities for manipulating chromosome identifiers")
6143 (description
6144 "This package contains data and functions that define and allow
6145translation between different chromosome sequence naming conventions (e.g.,
6146\"chr1\" versus \"1\"), including a function that attempts to place sequence
6147names in their natural, rather than lexicographic, order.")
6148 (license license:artistic2.0)))
6149
744004a3
RJ
6150(define-public r-edger
6151 (package
6152 (name "r-edger")
ed305423 6153 (version "3.16.1")
744004a3
RJ
6154 (source (origin
6155 (method url-fetch)
6156 (uri (bioconductor-uri "edgeR" version))
6157 (sha256
6158 (base32
ed305423 6159 "1r6hhwkqp13m022hjajzr1lnjsbai0yjhykwn0kp1f0la990a808"))))
744004a3
RJ
6160 (properties `((upstream-name . "edgeR")))
6161 (build-system r-build-system)
6162 (propagated-inputs
5e48005f
RW
6163 `(("r-limma" ,r-limma)
6164 ("r-locfit" ,r-locfit)))
744004a3
RJ
6165 (home-page "http://bioinf.wehi.edu.au/edgeR")
6166 (synopsis "EdgeR does empirical analysis of digital gene expression data")
6167 (description "This package can do differential expression analysis of
6168RNA-seq expression profiles with biological replication. It implements a range
6169of statistical methodology based on the negative binomial distributions,
6170including empirical Bayes estimation, exact tests, generalized linear models
6171and quasi-likelihood tests. It be applied to differential signal analysis of
6172other types of genomic data that produce counts, including ChIP-seq, SAGE and
6173CAGE.")
6174 (license license:gpl2+)))
6175
b669d9c4
RJ
6176(define-public r-variantannotation
6177 (package
6178 (name "r-variantannotation")
37d96f1d 6179 (version "1.20.0")
b669d9c4
RJ
6180 (source (origin
6181 (method url-fetch)
6182 (uri (bioconductor-uri "VariantAnnotation" version))
6183 (sha256
6184 (base32
37d96f1d 6185 "1lwzfgahz8ipwli73kcfqb18y6adi129hap1gnycnj3980m54i8q"))))
b669d9c4
RJ
6186 (properties
6187 `((upstream-name . "VariantAnnotation")))
6188 (inputs
6189 `(("zlib" ,zlib)))
6190 (propagated-inputs
6191 `(("r-annotationdbi" ,r-annotationdbi)
37d96f1d 6192 ("r-biobase" ,r-biobase)
b669d9c4 6193 ("r-biocgenerics" ,r-biocgenerics)
37d96f1d 6194 ("r-biostrings" ,r-biostrings)
b669d9c4
RJ
6195 ("r-bsgenome" ,r-bsgenome)
6196 ("r-dbi" ,r-dbi)
6197 ("r-genomeinfodb" ,r-genomeinfodb)
6198 ("r-genomicfeatures" ,r-genomicfeatures)
6199 ("r-genomicranges" ,r-genomicranges)
37d96f1d 6200 ("r-iranges" ,r-iranges)
b669d9c4
RJ
6201 ("r-summarizedexperiment" ,r-summarizedexperiment)
6202 ("r-rsamtools" ,r-rsamtools)
37d96f1d
RW
6203 ("r-rtracklayer" ,r-rtracklayer)
6204 ("r-s4vectors" ,r-s4vectors)
6205 ("r-xvector" ,r-xvector)
b669d9c4
RJ
6206 ("r-zlibbioc" ,r-zlibbioc)))
6207 (build-system r-build-system)
6208 (home-page "https://bioconductor.org/packages/VariantAnnotation")
6209 (synopsis "Package for annotation of genetic variants")
6210 (description "This R package can annotate variants, compute amino acid
6211coding changes and predict coding outcomes.")
6212 (license license:artistic2.0)))
6213
7d4224d7
RJ
6214(define-public r-limma
6215 (package
6216 (name "r-limma")
5b3b2d2f 6217 (version "3.30.2")
7d4224d7
RJ
6218 (source (origin
6219 (method url-fetch)
6220 (uri (bioconductor-uri "limma" version))
6221 (sha256
6222 (base32
5b3b2d2f 6223 "04jris7wk2lxksrrvrjsysznsdb2k04lfgrnp18ic49sazva0hfy"))))
7d4224d7
RJ
6224 (build-system r-build-system)
6225 (home-page "http://bioinf.wehi.edu.au/limma")
6226 (synopsis "Package for linear models for microarray and RNA-seq data")
6227 (description "This package can be used for the analysis of gene expression
6228studies, especially the use of linear models for analysing designed experiments
6229and the assessment of differential expression. The analysis methods apply to
6230different technologies, including microarrays, RNA-seq, and quantitative PCR.")
6231 (license license:gpl2+)))
6232
0e7d5560
RW
6233(define-public r-xvector
6234 (package
6235 (name "r-xvector")
1d048589 6236 (version "0.14.0")
0e7d5560
RW
6237 (source (origin
6238 (method url-fetch)
6239 (uri (bioconductor-uri "XVector" version))
6240 (sha256
6241 (base32
1d048589 6242 "09lbqxpqr80g0kw77mpz0p1a8cq706j33kz8194wp71il67cdzi7"))))
0e7d5560 6243 (properties
1d216b6e 6244 `((upstream-name . "XVector")))
0e7d5560
RW
6245 (build-system r-build-system)
6246 (arguments
6247 `(#:phases
6248 (modify-phases %standard-phases
6249 (add-after 'unpack 'use-system-zlib
6250 (lambda _
6251 (substitute* "DESCRIPTION"
6252 (("zlibbioc, ") ""))
6253 (substitute* "NAMESPACE"
6254 (("import\\(zlibbioc\\)") ""))
6255 #t)))))
6256 (inputs
6257 `(("zlib" ,zlib)))
6258 (propagated-inputs
6259 `(("r-biocgenerics" ,r-biocgenerics)
6260 ("r-iranges" ,r-iranges)
6261 ("r-s4vectors" ,r-s4vectors)))
6262 (home-page "http://bioconductor.org/packages/XVector")
6263 (synopsis "Representation and manpulation of external sequences")
6264 (description
6265 "This package provides memory efficient S4 classes for storing sequences
6266\"externally\" (behind an R external pointer, or on disk).")
6267 (license license:artistic2.0)))
6268
e2cd1d0f
RW
6269(define-public r-genomicranges
6270 (package
6271 (name "r-genomicranges")
92a740af 6272 (version "1.26.1")
e2cd1d0f
RW
6273 (source (origin
6274 (method url-fetch)
6275 (uri (bioconductor-uri "GenomicRanges" version))
6276 (sha256
6277 (base32
92a740af 6278 "039nxccg9i2an8q2wni79x8dr9p1fcfcqvih9hg9w243pczg2g3c"))))
e2cd1d0f 6279 (properties
1d216b6e 6280 `((upstream-name . "GenomicRanges")))
e2cd1d0f
RW
6281 (build-system r-build-system)
6282 (propagated-inputs
6283 `(("r-biocgenerics" ,r-biocgenerics)
6284 ("r-genomeinfodb" ,r-genomeinfodb)
92a740af
RW
6285 ("r-iranges" ,r-iranges)
6286 ("r-s4vectors" ,r-s4vectors)
e2cd1d0f
RW
6287 ("r-xvector" ,r-xvector)))
6288 (home-page "http://bioconductor.org/packages/GenomicRanges")
6289 (synopsis "Representation and manipulation of genomic intervals")
6290 (description
6291 "This package provides tools to efficiently represent and manipulate
6292genomic annotations and alignments is playing a central role when it comes to
6293analyzing high-throughput sequencing data (a.k.a. NGS data). The
6294GenomicRanges package defines general purpose containers for storing and
6295manipulating genomic intervals and variables defined along a genome.")
6296 (license license:artistic2.0)))
6297
555e3399
RW
6298(define-public r-biobase
6299 (package
6300 (name "r-biobase")
464870ca 6301 (version "2.34.0")
555e3399
RW
6302 (source (origin
6303 (method url-fetch)
6304 (uri (bioconductor-uri "Biobase" version))
6305 (sha256
6306 (base32
464870ca 6307 "0js9j9wqls8f571ifl9ylllbb9a9hwf7b7drf2grwb1fl31ldazl"))))
555e3399
RW
6308 (properties
6309 `((upstream-name . "Biobase")))
6310 (build-system r-build-system)
6311 (propagated-inputs
6312 `(("r-biocgenerics" ,r-biocgenerics)))
6313 (home-page "http://bioconductor.org/packages/Biobase")
6314 (synopsis "Base functions for Bioconductor")
6315 (description
6316 "This package provides functions that are needed by many other packages
6317on Bioconductor or which replace R functions.")
6318 (license license:artistic2.0)))
6319
8b7bce74
RW
6320(define-public r-annotationdbi
6321 (package
6322 (name "r-annotationdbi")
e45dc057 6323 (version "1.36.0")
8b7bce74
RW
6324 (source (origin
6325 (method url-fetch)
6326 (uri (bioconductor-uri "AnnotationDbi" version))
6327 (sha256
6328 (base32
e45dc057 6329 "0ydrqw1k1j5p6w76bwc753cx545c055x88q87wzya93858synj6r"))))
8b7bce74
RW
6330 (properties
6331 `((upstream-name . "AnnotationDbi")))
6332 (build-system r-build-system)
6333 (propagated-inputs
6334 `(("r-biobase" ,r-biobase)
6335 ("r-biocgenerics" ,r-biocgenerics)
6336 ("r-dbi" ,r-dbi)
6337 ("r-iranges" ,r-iranges)
6338 ("r-rsqlite" ,r-rsqlite)
6339 ("r-s4vectors" ,r-s4vectors)))
6340 (home-page "http://bioconductor.org/packages/AnnotationDbi")
6341 (synopsis "Annotation database interface")
6342 (description
6343 "This package provides user interface and database connection code for
6344annotation data packages using SQLite data storage.")
6345 (license license:artistic2.0)))
6346
c465fa72
RW
6347(define-public r-biomart
6348 (package
6349 (name "r-biomart")
94053331 6350 (version "2.30.0")
c465fa72
RW
6351 (source (origin
6352 (method url-fetch)
6353 (uri (bioconductor-uri "biomaRt" version))
6354 (sha256
6355 (base32
94053331 6356 "1x0flcghq71784q2l02j0g4f9jkmyb14f6i307n6c59d6ji7h7x6"))))
c465fa72
RW
6357 (properties
6358 `((upstream-name . "biomaRt")))
6359 (build-system r-build-system)
6360 (propagated-inputs
6361 `(("r-annotationdbi" ,r-annotationdbi)
6362 ("r-rcurl" ,r-rcurl)
6363 ("r-xml" ,r-xml)))
6364 (home-page "http://bioconductor.org/packages/biomaRt")
6365 (synopsis "Interface to BioMart databases")
6366 (description
6367 "biomaRt provides an interface to a growing collection of databases
6368implementing the @url{BioMart software suite, http://www.biomart.org}. The
6369package enables retrieval of large amounts of data in a uniform way without
6370the need to know the underlying database schemas or write complex SQL queries.
6371Examples of BioMart databases are Ensembl, COSMIC, Uniprot, HGNC, Gramene,
6372Wormbase and dbSNP mapped to Ensembl. These major databases give biomaRt
6373users direct access to a diverse set of data and enable a wide range of
6374powerful online queries from gene annotation to database mining.")
6375 (license license:artistic2.0)))
6376
e91d362e
RW
6377(define-public r-biocparallel
6378 (package
6379 (name "r-biocparallel")
c15ae9b0 6380 (version "1.8.1")
e91d362e
RW
6381 (source (origin
6382 (method url-fetch)
6383 (uri (bioconductor-uri "BiocParallel" version))
6384 (sha256
6385 (base32
c15ae9b0 6386 "123i928rwi4h4sy4fpysv6pinw5nns0sm3myxi2ghqhm34ws8gyl"))))
e91d362e
RW
6387 (properties
6388 `((upstream-name . "BiocParallel")))
6389 (build-system r-build-system)
6390 (propagated-inputs
6391 `(("r-futile-logger" ,r-futile-logger)
6392 ("r-snow" ,r-snow)))
6393 (home-page "http://bioconductor.org/packages/BiocParallel")
6394 (synopsis "Bioconductor facilities for parallel evaluation")
6395 (description
6396 "This package provides modified versions and novel implementation of
6397functions for parallel evaluation, tailored to use with Bioconductor
6398objects.")
6399 (license (list license:gpl2+ license:gpl3+))))
6400
bf159353
RW
6401(define-public r-biostrings
6402 (package
6403 (name "r-biostrings")
47d39b6e 6404 (version "2.42.0")
bf159353
RW
6405 (source (origin
6406 (method url-fetch)
6407 (uri (bioconductor-uri "Biostrings" version))
6408 (sha256
6409 (base32
47d39b6e 6410 "08z8lkz3axa94wkf144a931ry6vf6cc25avi1ywr84ln2k5czz9f"))))
bf159353
RW
6411 (properties
6412 `((upstream-name . "Biostrings")))
6413 (build-system r-build-system)
6414 (propagated-inputs
6415 `(("r-biocgenerics" ,r-biocgenerics)
6416 ("r-iranges" ,r-iranges)
6417 ("r-s4vectors" ,r-s4vectors)
6418 ("r-xvector" ,r-xvector)))
6419 (home-page "http://bioconductor.org/packages/Biostrings")
6420 (synopsis "String objects and algorithms for biological sequences")
6421 (description
6422 "This package provides memory efficient string containers, string
6423matching algorithms, and other utilities, for fast manipulation of large
6424biological sequences or sets of sequences.")
6425 (license license:artistic2.0)))
6426
f8d74f70
RW
6427(define-public r-rsamtools
6428 (package
6429 (name "r-rsamtools")
c0ccef41 6430 (version "1.26.1")
f8d74f70
RW
6431 (source (origin
6432 (method url-fetch)
6433 (uri (bioconductor-uri "Rsamtools" version))
6434 (sha256
6435 (base32
c0ccef41 6436 "0pf4f6brf4bl5zgjrah0f38qslazrs49ayqgyh0xfqgrh63yx4ck"))))
f8d74f70
RW
6437 (properties
6438 `((upstream-name . "Rsamtools")))
6439 (build-system r-build-system)
6440 (arguments
6441 `(#:phases
6442 (modify-phases %standard-phases
6443 (add-after 'unpack 'use-system-zlib
6444 (lambda _
6445 (substitute* "DESCRIPTION"
6446 (("zlibbioc, ") ""))
6447 (substitute* "NAMESPACE"
6448 (("import\\(zlibbioc\\)") ""))
6449 #t)))))
6450 (inputs
6451 `(("zlib" ,zlib)))
6452 (propagated-inputs
6453 `(("r-biocgenerics" ,r-biocgenerics)
6454 ("r-biocparallel" ,r-biocparallel)
6455 ("r-biostrings" ,r-biostrings)
6456 ("r-bitops" ,r-bitops)
6457 ("r-genomeinfodb" ,r-genomeinfodb)
6458 ("r-genomicranges" ,r-genomicranges)
6459 ("r-iranges" ,r-iranges)
6460 ("r-s4vectors" ,r-s4vectors)
6461 ("r-xvector" ,r-xvector)))
6462 (home-page "http://bioconductor.org/packages/release/bioc/html/Rsamtools.html")
6463 (synopsis "Interface to samtools, bcftools, and tabix")
6464 (description
6465 "This package provides an interface to the 'samtools', 'bcftools', and
6466'tabix' utilities for manipulating SAM (Sequence Alignment / Map), FASTA,
6467binary variant call (BCF) and compressed indexed tab-delimited (tabix)
6468files.")
6469 (license license:expat)))
6470
6e76dda2
RW
6471(define-public r-summarizedexperiment
6472 (package
6473 (name "r-summarizedexperiment")
08da08d2 6474 (version "1.4.0")
6e76dda2
RW
6475 (source (origin
6476 (method url-fetch)
6477 (uri (bioconductor-uri "SummarizedExperiment" version))
6478 (sha256
6479 (base32
08da08d2 6480 "1kbj8sg2ik9f8d6g95wz0py62jldg01qy5rsdpg1cxw95nf7dzi3"))))
6e76dda2
RW
6481 (properties
6482 `((upstream-name . "SummarizedExperiment")))
6483 (build-system r-build-system)
6484 (propagated-inputs
6485 `(("r-biobase" ,r-biobase)
6486 ("r-biocgenerics" ,r-biocgenerics)
6487 ("r-genomeinfodb" ,r-genomeinfodb)
6488 ("r-genomicranges" ,r-genomicranges)
6489 ("r-iranges" ,r-iranges)
6490 ("r-s4vectors" ,r-s4vectors)))
6491 (home-page "http://bioconductor.org/packages/SummarizedExperiment")
6492 (synopsis "Container for representing genomic ranges by sample")
6493 (description
6494 "The SummarizedExperiment container contains one or more assays, each
6495represented by a matrix-like object of numeric or other mode. The rows
6496typically represent genomic ranges of interest and the columns represent
6497samples.")
6498 (license license:artistic2.0)))
6499
d8a828af
RW
6500(define-public r-genomicalignments
6501 (package
6502 (name "r-genomicalignments")
4ab32f0c 6503 (version "1.10.0")
d8a828af
RW
6504 (source (origin
6505 (method url-fetch)
6506 (uri (bioconductor-uri "GenomicAlignments" version))
6507 (sha256
6508 (base32
4ab32f0c 6509 "11vb0a0zd36i4yhg4mfijv787v0nihn6pkjj6q7rfy19gwy61xlc"))))
d8a828af
RW
6510 (properties
6511 `((upstream-name . "GenomicAlignments")))
6512 (build-system r-build-system)
6513 (propagated-inputs
6514 `(("r-biocgenerics" ,r-biocgenerics)
6515 ("r-biocparallel" ,r-biocparallel)
6516 ("r-biostrings" ,r-biostrings)
6517 ("r-genomeinfodb" ,r-genomeinfodb)
6518 ("r-genomicranges" ,r-genomicranges)
6519 ("r-iranges" ,r-iranges)
6520 ("r-rsamtools" ,r-rsamtools)
6521 ("r-s4vectors" ,r-s4vectors)
6522 ("r-summarizedexperiment" ,r-summarizedexperiment)))
6523 (home-page "http://bioconductor.org/packages/GenomicAlignments")
6524 (synopsis "Representation and manipulation of short genomic alignments")
6525 (description
6526 "This package provides efficient containers for storing and manipulating
6527short genomic alignments (typically obtained by aligning short reads to a
6528reference genome). This includes read counting, computing the coverage,
6529junction detection, and working with the nucleotide content of the
6530alignments.")
6531 (license license:artistic2.0)))
6532
317755ff
RW
6533(define-public r-rtracklayer
6534 (package
6535 (name "r-rtracklayer")
16a8bd31 6536 (version "1.34.1")
317755ff
RW
6537 (source (origin
6538 (method url-fetch)
6539 (uri (bioconductor-uri "rtracklayer" version))
6540 (sha256
6541 (base32
16a8bd31 6542 "0x59k2fd0iaqi93gy6bm58p2j2z90z1b7a6w5b4c098y98n096rc"))))
317755ff
RW
6543 (build-system r-build-system)
6544 (arguments
6545 `(#:phases
6546 (modify-phases %standard-phases
6547 (add-after 'unpack 'use-system-zlib
6548 (lambda _
6549 (substitute* "DESCRIPTION"
6550 (("zlibbioc, ") ""))
6551 (substitute* "NAMESPACE"
6552 (("import\\(zlibbioc\\)") ""))
6553 #t)))))
6554 (inputs
6555 `(("zlib" ,zlib)))
6556 (propagated-inputs
6557 `(("r-biocgenerics" ,r-biocgenerics)
6558 ("r-biostrings" ,r-biostrings)
6559 ("r-genomeinfodb" ,r-genomeinfodb)
6560 ("r-genomicalignments" ,r-genomicalignments)
6561 ("r-genomicranges" ,r-genomicranges)
6562 ("r-iranges" ,r-iranges)
6563 ("r-rcurl" ,r-rcurl)
6564 ("r-rsamtools" ,r-rsamtools)
6565 ("r-s4vectors" ,r-s4vectors)
6566 ("r-xml" ,r-xml)
6567 ("r-xvector" ,r-xvector)))
6568 (home-page "http://bioconductor.org/packages/rtracklayer")
6569 (synopsis "R interface to genome browsers and their annotation tracks")
6570 (description
6571 "rtracklayer is an extensible framework for interacting with multiple
6572genome browsers (currently UCSC built-in) and manipulating annotation tracks
6573in various formats (currently GFF, BED, bedGraph, BED15, WIG, BigWig and 2bit
6574built-in). The user may export/import tracks to/from the supported browsers,
6575as well as query and modify the browser state, such as the current viewport.")
6576 (license license:artistic2.0)))
6577
2fd7c049
RW
6578(define-public r-genomicfeatures
6579 (package
6580 (name "r-genomicfeatures")
b17aac08 6581 (version "1.26.0")
2fd7c049
RW
6582 (source (origin
6583 (method url-fetch)
6584 (uri (bioconductor-uri "GenomicFeatures" version))
6585 (sha256
6586 (base32
b17aac08 6587 "0z8spi2knwzwi10c38vr7xlvi3ah9faj7m1lka880mmxkl9cai4k"))))
2fd7c049
RW
6588 (properties
6589 `((upstream-name . "GenomicFeatures")))
6590 (build-system r-build-system)
6591 (propagated-inputs
6592 `(("r-annotationdbi" ,r-annotationdbi)
6593 ("r-biobase" ,r-biobase)
6594 ("r-biocgenerics" ,r-biocgenerics)
6595 ("r-biomart" ,r-biomart)
6596 ("r-biostrings" ,r-biostrings)
6597 ("r-dbi" ,r-dbi)
6598 ("r-genomeinfodb" ,r-genomeinfodb)
6599 ("r-genomicranges" ,r-genomicranges)
6600 ("r-iranges" ,r-iranges)
6601 ("r-rcurl" ,r-rcurl)
6602 ("r-rsqlite" ,r-rsqlite)
6603 ("r-rtracklayer" ,r-rtracklayer)
6604 ("r-s4vectors" ,r-s4vectors)
6605 ("r-xvector" ,r-xvector)))
6606 (home-page "http://bioconductor.org/packages/GenomicFeatures")
6607 (synopsis "Tools for working with transcript centric annotations")
6608 (description
6609 "This package provides a set of tools and methods for making and
6610manipulating transcript centric annotations. With these tools the user can
6611easily download the genomic locations of the transcripts, exons and cds of a
6612given organism, from either the UCSC Genome Browser or a BioMart
6613database (more sources will be supported in the future). This information is
6614then stored in a local database that keeps track of the relationship between
6615transcripts, exons, cds and genes. Flexible methods are provided for
6616extracting the desired features in a convenient format.")
6617 (license license:artistic2.0)))
6618
fb25d880
RW
6619(define-public r-go-db
6620 (package
6621 (name "r-go-db")
d1b1587c 6622 (version "3.4.0")
fb25d880
RW
6623 (source (origin
6624 (method url-fetch)
f82c8c3c
PP
6625 (uri (string-append "http://www.bioconductor.org/packages/"
6626 "release/data/annotation/src/contrib/GO.db_"
6627 version ".tar.gz"))
fb25d880
RW
6628 (sha256
6629 (base32
d1b1587c 6630 "02cj8kqi5w39jwcs8gp1dgj08sah262ppxnkz4h3qd0w191y8yyl"))))
fb25d880
RW
6631 (properties
6632 `((upstream-name . "GO.db")))
6633 (build-system r-build-system)
3141b83d
RW
6634 (propagated-inputs
6635 `(("r-annotationdbi" ,r-annotationdbi)))
fb25d880
RW
6636 (home-page "http://bioconductor.org/packages/GO.db")
6637 (synopsis "Annotation maps describing the entire Gene Ontology")
6638 (description
6639 "The purpose of this GO.db annotation package is to provide detailed
6640information about the latest version of the Gene Ontologies.")
6641 (license license:artistic2.0)))
6642
d1dbde6a
RW
6643(define-public r-graph
6644 (package
6645 (name "r-graph")
3587b464 6646 (version "1.52.0")
d1dbde6a
RW
6647 (source (origin
6648 (method url-fetch)
6649 (uri (bioconductor-uri "graph" version))
6650 (sha256
6651 (base32
3587b464 6652 "0g3dk5vsdp489fmyg8mifczmzgqrjlakkkr8i96dj15gghp3l135"))))
d1dbde6a
RW
6653 (build-system r-build-system)
6654 (propagated-inputs
6655 `(("r-biocgenerics" ,r-biocgenerics)))
6656 (home-page "http://bioconductor.org/packages/graph")
6657 (synopsis "Handle graph data structures in R")
6658 (description
6659 "This package implements some simple graph handling capabilities for R.")
6660 (license license:artistic2.0)))
6661
d547ce5e
RW
6662(define-public r-topgo
6663 (package
6664 (name "r-topgo")
30ec4de7 6665 (version "2.26.0")
d547ce5e
RW
6666 (source (origin
6667 (method url-fetch)
6668 (uri (bioconductor-uri "topGO" version))
6669 (sha256
6670 (base32
30ec4de7 6671 "0j6sgvam4lk9348ag6pypcbkv93x4fk0di8ivhr23mz2s2yqzwrx"))))
d547ce5e
RW
6672 (properties
6673 `((upstream-name . "topGO")))
6674 (build-system r-build-system)
6675 (propagated-inputs
6676 `(("r-annotationdbi" ,r-annotationdbi)
30ec4de7 6677 ("r-dbi" ,r-dbi)
d547ce5e
RW
6678 ("r-biobase" ,r-biobase)
6679 ("r-biocgenerics" ,r-biocgenerics)
6680 ("r-go-db" ,r-go-db)
e48e88c2 6681 ("r-matrixstats" ,r-matrixstats)
6d415db2 6682 ("r-graph" ,r-graph)
d547ce5e
RW
6683 ("r-sparsem" ,r-sparsem)))
6684 (home-page "http://bioconductor.org/packages/topGO")
6685 (synopsis "Enrichment analysis for gene ontology")
6686 (description
6687 "The topGO package provides tools for testing @dfn{gene ontology} (GO)
6688terms while accounting for the topology of the GO graph. Different test
6689statistics and different methods for eliminating local similarities and
6690dependencies between GO terms can be implemented and applied.")
6691 ;; Any version of the LGPL applies.
6692 (license license:lgpl2.1+)))
6693
c63cef66
RW
6694(define-public r-bsgenome
6695 (package
6696 (name "r-bsgenome")
3d74c044 6697 (version "1.42.0")
c63cef66
RW
6698 (source (origin
6699 (method url-fetch)
6700 (uri (bioconductor-uri "BSgenome" version))
6701 (sha256
6702 (base32
3d74c044 6703 "0hxwc02h5mzhkrk60d1jmlsfjf0ai9jxdc0128kj1sg4r2k1q94y"))))
c63cef66
RW
6704 (properties
6705 `((upstream-name . "BSgenome")))
6706 (build-system r-build-system)
6707 (propagated-inputs
6708 `(("r-biocgenerics" ,r-biocgenerics)
6709 ("r-biostrings" ,r-biostrings)
6710 ("r-genomeinfodb" ,r-genomeinfodb)
6711 ("r-genomicranges" ,r-genomicranges)
6712 ("r-iranges" ,r-iranges)
6713 ("r-rsamtools" ,r-rsamtools)
6714 ("r-rtracklayer" ,r-rtracklayer)
6715 ("r-s4vectors" ,r-s4vectors)
6716 ("r-xvector" ,r-xvector)))
6717 (home-page "http://bioconductor.org/packages/BSgenome")
6718 (synopsis "Infrastructure for Biostrings-based genome data packages")
6719 (description
6720 "This package provides infrastructure shared by all Biostrings-based
6721genome data packages and support for efficient SNP representation.")
6722 (license license:artistic2.0)))
6723
c43a011d
RW
6724(define-public r-impute
6725 (package
6726 (name "r-impute")
acf6f7e0 6727 (version "1.48.0")
c43a011d
RW
6728 (source (origin
6729 (method url-fetch)
6730 (uri (bioconductor-uri "impute" version))
6731 (sha256
6732 (base32
acf6f7e0 6733 "1164zvnikbjd0ybdn9xwn520rlmdjd824vmhnl83zgv3v9lzp9bm"))))
c43a011d
RW
6734 (inputs
6735 `(("gfortran" ,gfortran)))
6736 (build-system r-build-system)
6737 (home-page "http://bioconductor.org/packages/impute")
6738 (synopsis "Imputation for microarray data")
6739 (description
6740 "This package provides a function to impute missing gene expression
6741microarray data, using nearest neighbor averaging.")
6742 (license license:gpl2+)))
6743
03ea5a35
RW
6744(define-public r-seqpattern
6745 (package
6746 (name "r-seqpattern")
e92dd6f5 6747 (version "1.6.0")
03ea5a35
RW
6748 (source (origin
6749 (method url-fetch)
6750 (uri (bioconductor-uri "seqPattern" version))
6751 (sha256
6752 (base32
e92dd6f5 6753 "0lsa5pz36xapi3yiv78k3z286a5md5sm5g21pgfyg8zmhmkxr7y8"))))
03ea5a35
RW
6754 (properties
6755 `((upstream-name . "seqPattern")))
6756 (build-system r-build-system)
6757 (propagated-inputs
6758 `(("r-biostrings" ,r-biostrings)
6759 ("r-genomicranges" ,r-genomicranges)
6760 ("r-iranges" ,r-iranges)
e92dd6f5 6761 ("r-kernsmooth" ,r-kernsmooth)
03ea5a35
RW
6762 ("r-plotrix" ,r-plotrix)))
6763 (home-page "http://bioconductor.org/packages/seqPattern")
6764 (synopsis "Visualising oligonucleotide patterns and motif occurrences")
6765 (description
6766 "This package provides tools to visualize oligonucleotide patterns and
6767sequence motif occurrences across a large set of sequences centred at a common
6768reference point and sorted by a user defined feature.")
6769 (license license:gpl3+)))
6770
cb933df6
RW
6771(define-public r-genomation
6772 (package
6773 (name "r-genomation")
51c3c490 6774 (version "1.6.0")
cb933df6
RW
6775 (source (origin
6776 (method url-fetch)
6777 (uri (bioconductor-uri "genomation" version))
6778 (sha256
6779 (base32
51c3c490 6780 "1m4mz7wihj8yqivwkzw68div8ybk4rjsai3ffki7xp7sh21ax03y"))))
cb933df6
RW
6781 (build-system r-build-system)
6782 (propagated-inputs
6783 `(("r-biostrings" ,r-biostrings)
6784 ("r-bsgenome" ,r-bsgenome)
6785 ("r-data-table" ,r-data-table)
6786 ("r-genomeinfodb" ,r-genomeinfodb)
6787 ("r-genomicalignments" ,r-genomicalignments)
6788 ("r-genomicranges" ,r-genomicranges)
6789 ("r-ggplot2" ,r-ggplot2)
6790 ("r-gridbase" ,r-gridbase)
6791 ("r-impute" ,r-impute)
6792 ("r-iranges" ,r-iranges)
6793 ("r-matrixstats" ,r-matrixstats)
6794 ("r-plotrix" ,r-plotrix)
6795 ("r-plyr" ,r-plyr)
51c3c490 6796 ("r-rcpp" ,r-rcpp)
cb933df6
RW
6797 ("r-readr" ,r-readr)
6798 ("r-reshape2" ,r-reshape2)
51c3c490 6799 ("r-rhtslib" ,r-rhtslib)
cb933df6
RW
6800 ("r-rsamtools" ,r-rsamtools)
6801 ("r-rtracklayer" ,r-rtracklayer)
51c3c490
RW
6802 ("r-runit" ,r-runit)
6803 ("r-s4vectors" ,r-s4vectors)
cb933df6 6804 ("r-seqpattern" ,r-seqpattern)))
51c3c490
RW
6805 (inputs
6806 `(("zlib" ,zlib)))
cb933df6
RW
6807 (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
6808 (synopsis "Summary, annotation and visualization of genomic data")
6809 (description
6810 "This package provides a package for summary and annotation of genomic
6811intervals. Users can visualize and quantify genomic intervals over
6812pre-defined functional regions, such as promoters, exons, introns, etc. The
6813genomic intervals represent regions with a defined chromosome position, which
6814may be associated with a score, such as aligned reads from HT-seq experiments,
6815TF binding sites, methylation scores, etc. The package can use any tabular
6816genomic feature data as long as it has minimal information on the locations of
6817genomic intervals. In addition, it can use BAM or BigWig files as input.")
6818 (license license:artistic2.0)))
6819
64efa307
RW
6820(define-public r-genomationdata
6821 (package
6822 (name "r-genomationdata")
59198f8f 6823 (version "1.6.0")
64efa307
RW
6824 (source (origin
6825 (method url-fetch)
6826 ;; We cannot use bioconductor-uri here because this tarball is
6827 ;; located under "data/annotation/" instead of "bioc/".
6828 (uri (string-append "https://bioconductor.org/packages/"
6829 "release/data/experiment/src/contrib/"
6830 "genomationData_" version ".tar.gz"))
6831 (sha256
6832 (base32
59198f8f 6833 "16dqwb7wx1igx77zdbcskx5m1hs4g4gp2hl56zzm70hcagnlkz8y"))))
64efa307
RW
6834 (build-system r-build-system)
6835 ;; As this package provides little more than large data files, it doesn't
6836 ;; make sense to build substitutes.
6837 (arguments `(#:substitutable? #f))
6838 (native-inputs
6839 `(("r-knitr" ,r-knitr)))
6840 (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
6841 (synopsis "Experimental data for use with the genomation package")
6842 (description
6843 "This package contains experimental genetic data for use with the
6844genomation package. Included are Chip Seq, Methylation and Cage data,
6845downloaded from Encode.")
6846 (license license:gpl3+)))
6847
486da491
RW
6848(define-public r-org-hs-eg-db
6849 (package
6850 (name "r-org-hs-eg-db")
83f9a6fb 6851 (version "3.4.0")
486da491
RW
6852 (source (origin
6853 (method url-fetch)
6854 ;; We cannot use bioconductor-uri here because this tarball is
6855 ;; located under "data/annotation/" instead of "bioc/".
6856 (uri (string-append "http://www.bioconductor.org/packages/"
6857 "release/data/annotation/src/contrib/"
6858 "org.Hs.eg.db_" version ".tar.gz"))
6859 (sha256
6860 (base32
83f9a6fb 6861 "19mg64pw8zcvb9yxzzyf7caz1kvdrkfsj1hd84bzq7crrh8kc4y6"))))
486da491
RW
6862 (properties
6863 `((upstream-name . "org.Hs.eg.db")))
6864 (build-system r-build-system)
6865 (propagated-inputs
6866 `(("r-annotationdbi" ,r-annotationdbi)))
6867 (home-page "http://www.bioconductor.org/packages/org.Hs.eg.db/")
6868 (synopsis "Genome wide annotation for Human")
6869 (description
6870 "This package provides mappings from Entrez gene identifiers to various
6871annotations for the human genome.")
6872 (license license:artistic2.0)))
6873
fefedf98
RW
6874(define-public r-org-ce-eg-db
6875 (package
6876 (name "r-org-ce-eg-db")
f04a3eff 6877 (version "3.4.0")
fefedf98
RW
6878 (source (origin
6879 (method url-fetch)
6880 ;; We cannot use bioconductor-uri here because this tarball is
6881 ;; located under "data/annotation/" instead of "bioc/".
6882 (uri (string-append "http://www.bioconductor.org/packages/"
6883 "release/data/annotation/src/contrib/"
6884 "org.Ce.eg.db_" version ".tar.gz"))
6885 (sha256
6886 (base32
f04a3eff 6887 "12llfzrrc09kj2wzbisdspv38qzkzgpsbn8kv7qkwg746k3pq436"))))
fefedf98
RW
6888 (properties
6889 `((upstream-name . "org.Ce.eg.db")))
6890 (build-system r-build-system)
6891 (propagated-inputs
6892 `(("r-annotationdbi" ,r-annotationdbi)))
6893 (home-page "http://www.bioconductor.org/packages/org.Ce.eg.db/")
6894 (synopsis "Genome wide annotation for Worm")
6895 (description
6896 "This package provides mappings from Entrez gene identifiers to various
6897annotations for the genome of the model worm Caenorhabditis elegans.")
6898 (license license:artistic2.0)))
6899
16c53a1e
RW
6900(define-public r-org-dm-eg-db
6901 (package
6902 (name "r-org-dm-eg-db")
0cfac6f2 6903 (version "3.4.0")
16c53a1e
RW
6904 (source (origin
6905 (method url-fetch)
6906 ;; We cannot use bioconductor-uri here because this tarball is
6907 ;; located under "data/annotation/" instead of "bioc/".
6908 (uri (string-append "http://www.bioconductor.org/packages/"
6909 "release/data/annotation/src/contrib/"
6910 "org.Dm.eg.db_" version ".tar.gz"))
6911 (sha256
6912 (base32
0cfac6f2 6913 "1vzbphbrh1cf7xi5cksia9xy9a9l42js2z2qsajvjxvddiphrb7j"))))
16c53a1e
RW
6914 (properties
6915 `((upstream-name . "org.Dm.eg.db")))
6916 (build-system r-build-system)
6917 (propagated-inputs
6918 `(("r-annotationdbi" ,r-annotationdbi)))
6919 (home-page "http://www.bioconductor.org/packages/org.Dm.eg.db/")
6920 (synopsis "Genome wide annotation for Fly")
6921 (description
6922 "This package provides mappings from Entrez gene identifiers to various
6923annotations for the genome of the model fruit fly Drosophila melanogaster.")
6924 (license license:artistic2.0)))
6925
e761beb9
RW
6926(define-public r-org-mm-eg-db
6927 (package
6928 (name "r-org-mm-eg-db")
7c45670b 6929 (version "3.4.0")
e761beb9
RW
6930 (source (origin
6931 (method url-fetch)
6932 ;; We cannot use bioconductor-uri here because this tarball is
6933 ;; located under "data/annotation/" instead of "bioc/".
6934 (uri (string-append "http://www.bioconductor.org/packages/"
6935 "release/data/annotation/src/contrib/"
6936 "org.Mm.eg.db_" version ".tar.gz"))
6937 (sha256
6938 (base32
7c45670b 6939 "1lykjqjaf01fmgg3cvfcvwd5xjq6zc5vbxnm5r4l32fzvl89q50c"))))
e761beb9
RW
6940 (properties
6941 `((upstream-name . "org.Mm.eg.db")))
6942 (build-system r-build-system)
6943 (propagated-inputs
6944 `(("r-annotationdbi" ,r-annotationdbi)))
6945 (home-page "http://www.bioconductor.org/packages/org.Mm.eg.db/")
6946 (synopsis "Genome wide annotation for Mouse")
6947 (description
6948 "This package provides mappings from Entrez gene identifiers to various
6949annotations for the genome of the model mouse Mus musculus.")
6950 (license license:artistic2.0)))
6951
936e7d67
RW
6952(define-public r-seqlogo
6953 (package
6954 (name "r-seqlogo")
21d6c7a3 6955 (version "1.40.0")
936e7d67
RW
6956 (source
6957 (origin
6958 (method url-fetch)
6959 (uri (bioconductor-uri "seqLogo" version))
6960 (sha256
6961 (base32
21d6c7a3 6962 "18bajdl75h3039559d81rgllqqvnq8ygsfxfx081xphxs0v6xggy"))))
936e7d67
RW
6963 (properties `((upstream-name . "seqLogo")))
6964 (build-system r-build-system)
6965 (home-page "http://bioconductor.org/packages/seqLogo")
6966 (synopsis "Sequence logos for DNA sequence alignments")
6967 (description
6968 "seqLogo takes the position weight matrix of a DNA sequence motif and
6969plots the corresponding sequence logo as introduced by Schneider and
6970Stephens (1990).")
6971 (license license:lgpl2.0+)))
6972
c90a4baf
RW
6973(define-public r-bsgenome-hsapiens-ucsc-hg19
6974 (package
6975 (name "r-bsgenome-hsapiens-ucsc-hg19")
6976 (version "1.4.0")
6977 (source (origin
6978 (method url-fetch)
6979 ;; We cannot use bioconductor-uri here because this tarball is
6980 ;; located under "data/annotation/" instead of "bioc/".
6981 (uri (string-append "http://www.bioconductor.org/packages/"
6982 "release/data/annotation/src/contrib/"
6983 "BSgenome.Hsapiens.UCSC.hg19_"
6984 version ".tar.gz"))
6985 (sha256
6986 (base32
6987 "1y0nqpk8cw5a34sd9hmin3z4v7iqm6hf6l22cl81vlbxqbjibxc8"))))
6988 (properties
6989 `((upstream-name . "BSgenome.Hsapiens.UCSC.hg19")))
6990 (build-system r-build-system)
6991 ;; As this package provides little more than a very large data file it
6992 ;; doesn't make sense to build substitutes.
6993 (arguments `(#:substitutable? #f))
6994 (propagated-inputs
6995 `(("r-bsgenome" ,r-bsgenome)))
6996 (home-page
6997 "http://www.bioconductor.org/packages/BSgenome.Hsapiens.UCSC.hg19/")
6998 (synopsis "Full genome sequences for Homo sapiens")
6999 (description
7000 "This package provides full genome sequences for Homo sapiens as provided
7001by UCSC (hg19, February 2009) and stored in Biostrings objects.")
7002 (license license:artistic2.0)))
7003
a3e90287
RW
7004(define-public r-bsgenome-mmusculus-ucsc-mm9
7005 (package
7006 (name "r-bsgenome-mmusculus-ucsc-mm9")
7007 (version "1.4.0")
7008 (source (origin
7009 (method url-fetch)
7010 ;; We cannot use bioconductor-uri here because this tarball is
7011 ;; located under "data/annotation/" instead of "bioc/".
7012 (uri (string-append "http://www.bioconductor.org/packages/"
7013 "release/data/annotation/src/contrib/"
7014 "BSgenome.Mmusculus.UCSC.mm9_"
7015 version ".tar.gz"))
7016 (sha256
7017 (base32
7018 "1birqw30g2azimxpnjfzmkphan7x131yy8b9h85lfz5fjdg7841i"))))
7019 (properties
7020 `((upstream-name . "BSgenome.Mmusculus.UCSC.mm9")))
7021 (build-system r-build-system)
7022 ;; As this package provides little more than a very large data file it
7023 ;; doesn't make sense to build substitutes.
7024 (arguments `(#:substitutable? #f))
7025 (propagated-inputs
7026 `(("r-bsgenome" ,r-bsgenome)))
7027 (home-page
7028 "http://www.bioconductor.org/packages/BSgenome.Mmusculus.UCSC.mm9/")
7029 (synopsis "Full genome sequences for Mouse")
7030 (description
7031 "This package provides full genome sequences for Mus musculus (Mouse) as
7032provided by UCSC (mm9, July 2007) and stored in Biostrings objects.")
7033 (license license:artistic2.0)))
7034
4714d521
RW
7035(define-public r-bsgenome-mmusculus-ucsc-mm10
7036 (package
7037 (name "r-bsgenome-mmusculus-ucsc-mm10")
7038 (version "1.4.0")
7039 (source (origin
7040 (method url-fetch)
7041 ;; We cannot use bioconductor-uri here because this tarball is
7042 ;; located under "data/annotation/" instead of "bioc/".
7043 (uri (string-append "http://www.bioconductor.org/packages/"
7044 "release/data/annotation/src/contrib/"
7045 "BSgenome.Mmusculus.UCSC.mm10_"
7046 version ".tar.gz"))
7047 (sha256
7048 (base32
7049 "12s0nm2na9brjad4rn9l7d3db2aj8qa1xvz0y1k7gk08wayb6bkf"))))
7050 (properties
7051 `((upstream-name . "BSgenome.Mmusculus.UCSC.mm10")))
7052 (build-system r-build-system)
7053 ;; As this package provides little more than a very large data file it
7054 ;; doesn't make sense to build substitutes.
7055 (arguments `(#:substitutable? #f))
7056 (propagated-inputs
7057 `(("r-bsgenome" ,r-bsgenome)))
7058 (home-page
7059 "http://www.bioconductor.org/packages/BSgenome.Mmusculus.UCSC.mm10/")
7060 (synopsis "Full genome sequences for Mouse")
7061 (description
7062 "This package provides full genome sequences for Mus
7063musculus (Mouse) as provided by UCSC (mm10, December 2011) and stored
7064in Biostrings objects.")
7065 (license license:artistic2.0)))
7066
943bd627
RW
7067(define-public r-bsgenome-celegans-ucsc-ce6
7068 (package
7069 (name "r-bsgenome-celegans-ucsc-ce6")
7070 (version "1.4.0")
7071 (source (origin
7072 (method url-fetch)
7073 ;; We cannot use bioconductor-uri here because this tarball is
7074 ;; located under "data/annotation/" instead of "bioc/".
7075 (uri (string-append "http://www.bioconductor.org/packages/"
7076 "release/data/annotation/src/contrib/"
7077 "BSgenome.Celegans.UCSC.ce6_"
7078 version ".tar.gz"))
7079 (sha256
7080 (base32
7081 "0mqzb353xv2c3m3vkb315dkmnxkgczp7ndnknyhpgjlybyf715v9"))))
7082 (properties
7083 `((upstream-name . "BSgenome.Celegans.UCSC.ce6")))
7084 (build-system r-build-system)
7085 ;; As this package provides little more than a very large data file it
7086 ;; doesn't make sense to build substitutes.
7087 (arguments `(#:substitutable? #f))
7088 (propagated-inputs
7089 `(("r-bsgenome" ,r-bsgenome)))
7090 (home-page
7091 "http://www.bioconductor.org/packages/BSgenome.Celegans.UCSC.ce6/")
7092 (synopsis "Full genome sequences for Worm")
7093 (description
7094 "This package provides full genome sequences for Caenorhabditis
7095elegans (Worm) as provided by UCSC (ce6, May 2008) and stored in Biostrings
7096objects.")
7097 (license license:artistic2.0)))
7098
fc47c7d6
RW
7099(define-public r-bsgenome-celegans-ucsc-ce10
7100 (package
7101 (name "r-bsgenome-celegans-ucsc-ce10")
7102 (version "1.4.0")
7103 (source (origin
7104 (method url-fetch)
7105 ;; We cannot use bioconductor-uri here because this tarball is
7106 ;; located under "data/annotation/" instead of "bioc/".
7107 (uri (string-append "http://www.bioconductor.org/packages/"
7108 "release/data/annotation/src/contrib/"
7109 "BSgenome.Celegans.UCSC.ce10_"
7110 version ".tar.gz"))
7111 (sha256
7112 (base32
7113 "1zaym97jk4npxk14ifvwz2rvhm4zx9xgs33r9vvx9rlynp0gydrk"))))
7114 (properties
7115 `((upstream-name . "BSgenome.Celegans.UCSC.ce10")))
7116 (build-system r-build-system)
7117 ;; As this package provides little more than a very large data file it
7118 ;; doesn't make sense to build substitutes.
7119 (arguments `(#:substitutable? #f))
7120 (propagated-inputs
7121 `(("r-bsgenome" ,r-bsgenome)))
7122 (home-page
7123 "http://www.bioconductor.org/packages/BSgenome.Celegans.UCSC.ce10/")
7124 (synopsis "Full genome sequences for Worm")
7125 (description
7126 "This package provides full genome sequences for Caenorhabditis
7127elegans (Worm) as provided by UCSC (ce10, Oct 2010) and stored in Biostrings
7128objects.")
7129 (license license:artistic2.0)))
7130
6dc60998
RW
7131(define-public r-bsgenome-dmelanogaster-ucsc-dm3
7132 (package
7133 (name "r-bsgenome-dmelanogaster-ucsc-dm3")
7134 (version "1.4.0")
7135 (source (origin
7136 (method url-fetch)
7137 ;; We cannot use bioconductor-uri here because this tarball is
7138 ;; located under "data/annotation/" instead of "bioc/".
7139 (uri (string-append "http://www.bioconductor.org/packages/"
7140 "release/data/annotation/src/contrib/"
7141 "BSgenome.Dmelanogaster.UCSC.dm3_"
7142 version ".tar.gz"))
7143 (sha256
7144 (base32
7145 "19bm3lkhhkag3gnwp419211fh0cnr0x6fa0r1lr0ycwrikxdxsv8"))))
7146 (properties
7147 `((upstream-name . "BSgenome.Dmelanogaster.UCSC.dm3")))
7148 (build-system r-build-system)
7149 ;; As this package provides little more than a very large data file it
7150 ;; doesn't make sense to build substitutes.
7151 (arguments `(#:substitutable? #f))
7152 (propagated-inputs
7153 `(("r-bsgenome" ,r-bsgenome)))
7154 (home-page
7155 "http://www.bioconductor.org/packages/BSgenome.Dmelanogaster.UCSC.dm3/")
7156 (synopsis "Full genome sequences for Fly")
7157 (description
7158 "This package provides full genome sequences for Drosophila
7159melanogaster (Fly) as provided by UCSC (dm3, April 2006) and stored in
7160Biostrings objects.")
7161 (license license:artistic2.0)))
7162
ae2462f7
RW
7163(define-public r-motifrg
7164 (package
7165 (name "r-motifrg")
809251e1 7166 (version "1.18.0")
ae2462f7
RW
7167 (source
7168 (origin
7169 (method url-fetch)
7170 (uri (bioconductor-uri "motifRG" version))
7171 (sha256
7172 (base32
809251e1 7173 "1pa97aj6c5f3gx4bgriw110764dj3m9h104ddi8rv2bpy41yd98d"))))
ae2462f7
RW
7174 (properties `((upstream-name . "motifRG")))
7175 (build-system r-build-system)
7176 (propagated-inputs
7177 `(("r-biostrings" ,r-biostrings)
7178 ("r-bsgenome" ,r-bsgenome)
7179 ("r-bsgenome.hsapiens.ucsc.hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
7180 ("r-iranges" ,r-iranges)
7181 ("r-seqlogo" ,r-seqlogo)
7182 ("r-xvector" ,r-xvector)))
7183 (home-page "http://bioconductor.org/packages/motifRG")
7184 (synopsis "Discover motifs in high throughput sequencing data")
7185 (description
7186 "This package provides tools for discriminative motif discovery in high
7187throughput genetic sequencing data sets using regression methods.")
7188 (license license:artistic2.0)))
7189
a5002ae7
AE
7190(define-public r-qtl
7191 (package
7192 (name "r-qtl")
e1c2ad67 7193 (version "1.39-5")
a5002ae7
AE
7194 (source
7195 (origin
7196 (method url-fetch)
7197 (uri (string-append "mirror://cran/src/contrib/qtl_"
7198 version ".tar.gz"))
7199 (sha256
7200 (base32
e1c2ad67 7201 "1grwgvyv7x0dgay1858bg7qf4wk47gpnq7qkqpcda9cn0h970d6f"))))
a5002ae7
AE
7202 (build-system r-build-system)
7203 (home-page "http://rqtl.org/")
7204 (synopsis "R package for analyzing QTL experiments in genetics")
7205 (description "R/qtl is an extension library for the R statistics
7206system. It is used to analyze experimental crosses for identifying
7207genes contributing to variation in quantitative traits (so-called
7208quantitative trait loci, QTLs).
7209
7210Using a hidden Markov model, R/qtl allows to estimate genetic maps, to
7211identify genotyping errors, and to perform single-QTL and two-QTL,
7212two-dimensional genome scans.")
7213 (license license:gpl3)))
d1e32822 7214
9e3ba31c
RJ
7215(define-public r-zlibbioc
7216 (package
7217 (name "r-zlibbioc")
da22da86 7218 (version "1.20.0")
9e3ba31c
RJ
7219 (source (origin
7220 (method url-fetch)
7221 (uri (bioconductor-uri "zlibbioc" version))
7222 (sha256
7223 (base32
da22da86 7224 "0hbk90q5hl0fycfvy5nxxa4hxgglag9lzp7i0fg849bqygg5nbyq"))))
9e3ba31c
RJ
7225 (properties
7226 `((upstream-name . "zlibbioc")))
7227 (build-system r-build-system)
7228 (home-page "https://bioconductor.org/packages/zlibbioc")
7229 (synopsis "Provider for zlib-1.2.5 to R packages")
7230 (description "This package uses the source code of zlib-1.2.5 to create
7231libraries for systems that do not have these available via other means.")
7232 (license license:artistic2.0)))
7233
52765a63
RW
7234(define-public r-rhtslib
7235 (package
7236 (name "r-rhtslib")
53ca52f0 7237 (version "1.6.0")
52765a63
RW
7238 (source
7239 (origin
7240 (method url-fetch)
7241 (uri (bioconductor-uri "Rhtslib" version))
7242 (sha256
7243 (base32
53ca52f0 7244 "1vk3ng61dhi3pbia1lp3gl3mlr3i1vb2lkq83qb53i9dzz128wh9"))))
52765a63
RW
7245 (properties `((upstream-name . "Rhtslib")))
7246 (build-system r-build-system)
7247 (propagated-inputs
7248 `(("r-zlibbioc" ,r-zlibbioc)))
7249 (inputs
7250 `(("zlib" ,zlib)))
53ca52f0
RW
7251 (native-inputs
7252 `(("autoconf" ,autoconf)))
52765a63
RW
7253 (home-page "https://github.com/nhayden/Rhtslib")
7254 (synopsis "High-throughput sequencing library as an R package")
7255 (description
7256 "This package provides the HTSlib C library for high-throughput
7257nucleotide sequence analysis. The package is primarily useful to developers
7258of other R packages who wish to make use of HTSlib.")
7259 (license license:lgpl2.0+)))
7260
fe02c4c9
RW
7261(define-public r-bamsignals
7262 (package
7263 (name "r-bamsignals")
e357bec8 7264 (version "1.6.0")
fe02c4c9
RW
7265 (source
7266 (origin
7267 (method url-fetch)
7268 (uri (bioconductor-uri "bamsignals" version))
7269 (sha256
7270 (base32
e357bec8 7271 "1k42gvk5mgq4la1fp0in3an2zfdz69h6522jsqhmk0f6i75kg4mb"))))
fe02c4c9
RW
7272 (build-system r-build-system)
7273 (propagated-inputs
7274 `(("r-biocgenerics" ,r-biocgenerics)
7275 ("r-genomicranges" ,r-genomicranges)
7276 ("r-iranges" ,r-iranges)
7277 ("r-rcpp" ,r-rcpp)
7278 ("r-rhtslib" ,r-rhtslib)
7279 ("r-zlibbioc" ,r-zlibbioc)))
7280 (inputs
7281 `(("zlib" ,zlib)))
7282 (home-page "http://bioconductor.org/packages/bamsignals")
7283 (synopsis "Extract read count signals from bam files")
7284 (description
7285 "This package allows to efficiently obtain count vectors from indexed bam
7286files. It counts the number of nucleotide sequence reads in given genomic
7287ranges and it computes reads profiles and coverage profiles. It also handles
7288paired-end data.")
7289 (license license:gpl2+)))
7290
89984be4
RW
7291(define-public r-rcas
7292 (package
7293 (name "r-rcas")
ed8fbe5e 7294 (version "1.0.0")
89984be4
RW
7295 (source (origin
7296 (method url-fetch)
7297 (uri (string-append "https://github.com/BIMSBbioinfo/RCAS/archive/v"
7298 version ".tar.gz"))
7299 (file-name (string-append name "-" version ".tar.gz"))
7300 (sha256
7301 (base32
ed8fbe5e 7302 "1h7di822ihgkhmmmlfbfz3c2dkjyjxl307i6mx8w0cwjqbna1kp6"))))
89984be4
RW
7303 (build-system r-build-system)
7304 (native-inputs
7305 `(("r-knitr" ,r-knitr)
7306 ("r-testthat" ,r-testthat)
7307 ;; During vignette building knitr checks that "pandoc-citeproc"
7308 ;; is in the PATH.
7309 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)))
7310 (propagated-inputs
7311 `(("r-data-table" ,r-data-table)
7312 ("r-biomart" ,r-biomart)
7313 ("r-org-hs-eg-db" ,r-org-hs-eg-db)
7314 ("r-org-ce-eg-db" ,r-org-ce-eg-db)
7315 ("r-org-dm-eg-db" ,r-org-dm-eg-db)
7316 ("r-org-mm-eg-db" ,r-org-mm-eg-db)
7317 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
7318 ("r-bsgenome-mmusculus-ucsc-mm9" ,r-bsgenome-mmusculus-ucsc-mm9)
7319 ("r-bsgenome-celegans-ucsc-ce10" ,r-bsgenome-celegans-ucsc-ce10)
7320 ("r-bsgenome-dmelanogaster-ucsc-dm3" ,r-bsgenome-dmelanogaster-ucsc-dm3)
7321 ("r-topgo" ,r-topgo)
7322 ("r-dt" ,r-dt)
7323 ("r-plotly" ,r-plotly)
7324 ("r-motifrg" ,r-motifrg)
7325 ("r-genomation" ,r-genomation)
7326 ("r-genomicfeatures" ,r-genomicfeatures)
7327 ("r-rtracklayer" ,r-rtracklayer)
7328 ("r-rmarkdown" ,r-rmarkdown)))
7329 (synopsis "RNA-centric annotation system")
7330 (description
7331 "RCAS aims to be a standalone RNA-centric annotation system that provides
7332intuitive reports and publication-ready graphics. This package provides the R
7333library implementing most of the pipeline's features.")
7334 (home-page "https://github.com/BIMSBbioinfo/RCAS")
7335 (license license:expat)))
7336
50937297
RW
7337(define-public rcas-web
7338 (package
7339 (name "rcas-web")
7340 (version "0.0.3")
7341 (source
7342 (origin
7343 (method url-fetch)
7344 (uri (string-append "https://github.com/BIMSBbioinfo/rcas-web/"
7345 "releases/download/v" version
7346 "/rcas-web-" version ".tar.gz"))
7347 (sha256
7348 (base32
7349 "0d3my0g8i7js59n184zzzjdki7hgmhpi4rhfvk7i6jsw01ba04qq"))))
7350 (build-system gnu-build-system)
7351 (arguments
7352 `(#:phases
7353 (modify-phases %standard-phases
7354 (add-after 'install 'wrap-executable
7355 (lambda* (#:key inputs outputs #:allow-other-keys)
7356 (let* ((out (assoc-ref outputs "out"))
7357 (json (assoc-ref inputs "guile-json"))
7358 (redis (assoc-ref inputs "guile-redis"))
7359 (path (string-append
7360 json "/share/guile/site/2.2:"
7361 redis "/share/guile/site/2.2")))
7362 (wrap-program (string-append out "/bin/rcas-web")
7363 `("GUILE_LOAD_PATH" ":" = (,path))
7364 `("GUILE_LOAD_COMPILED_PATH" ":" = (,path))
7365 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
7366 #t)))))
7367 (inputs
7368 `(("r" ,r)
7369 ("r-rcas" ,r-rcas)
7370 ("guile-next" ,guile-next)
7371 ("guile-json" ,guile2.2-json)
7372 ("guile-redis" ,guile2.2-redis)))
7373 (native-inputs
7374 `(("pkg-config" ,pkg-config)))
7375 (home-page "https://github.com/BIMSBbioinfo/rcas-web")
7376 (synopsis "Web interface for RNA-centric annotation system (RCAS)")
7377 (description "This package provides a simple web interface for the
7378@dfn{RNA-centric annotation system} (RCAS).")
7379 (license license:agpl3+)))
7380
7500e42b
RJ
7381(define-public r-mutationalpatterns
7382 (package
7383 (name "r-mutationalpatterns")
7384 (version "1.0.0")
7385 (source
7386 (origin
7387 (method url-fetch)
7388 (uri (bioconductor-uri "MutationalPatterns" version))
7389 (sha256
7390 (base32
7391 "1a3c2bm0xx0q4gf98jiw74msmdf2fr8rbsdysd5ww9kqlzmsbr17"))))
7392 (build-system r-build-system)
7393 (propagated-inputs
7394 `(("r-biocgenerics" ,r-biocgenerics)
7395 ("r-biostrings" ,r-biostrings)
7396 ("r-genomicranges" ,r-genomicranges)
7397 ("r-genomeinfodb" ,r-genomeinfodb)
7398 ("r-ggplot2" ,r-ggplot2)
7399 ("r-gridextra" ,r-gridextra)
7400 ("r-iranges" ,r-iranges)
7401 ("r-nmf" ,r-nmf)
7402 ("r-plyr" ,r-plyr)
7403 ("r-pracma" ,r-pracma)
7404 ("r-reshape2" ,r-reshape2)
7405 ("r-summarizedexperiment" ,r-summarizedexperiment)
7406 ("r-variantannotation" ,r-variantannotation)))
7407 (home-page "http://bioconductor.org/packages/MutationalPatterns/")
7408 (synopsis "Extract and visualize mutational patterns in genomic data")
7409 (description "This package provides an extensive toolset for the
7410characterization and visualization of a wide range of mutational patterns
7411in SNV base substitution data.")
7412 (license license:expat)))
7413
66e40e00
RW
7414(define-public emboss
7415 (package
7416 (name "emboss")
7417 (version "6.5.7")
7418 (source (origin
7419 (method url-fetch)
7420 (uri (string-append "ftp://emboss.open-bio.org/pub/EMBOSS/old/"
7421 (version-major+minor version) ".0/"
7422 "EMBOSS-" version ".tar.gz"))
7423 (sha256
7424 (base32
7425 "0vsmz96gc411yj2iyzdrsmg4l2n1nhgmp7vrgzlxx3xixv9xbf0q"))))
7426 (build-system gnu-build-system)
7427 (arguments
7428 `(#:configure-flags
7429 (list (string-append "--with-hpdf="
7430 (assoc-ref %build-inputs "libharu")))
7431 #:phases
7432 (modify-phases %standard-phases
7433 (add-after 'unpack 'fix-checks
7434 (lambda _
7435 ;; The PNGDRIVER tests check for the presence of libgd, libpng
7436 ;; and zlib, but assume that they are all found at the same
7437 ;; prefix.
7438 (substitute* "configure.in"
7439 (("CHECK_PNGDRIVER")
7440 "LIBS=\"$LIBS -lgd -lpng -lz -lm\"
7441AC_DEFINE([PLD_png], [1], [Define to 1 if PNG support is available])
7442AM_CONDITIONAL(AMPNG, true)"))
7443 #t))
7444 (add-after 'unpack 'disable-update-check
7445 (lambda _
7446 ;; At build time there is no connection to the Internet, so
7447 ;; looking for updates will not work.
7448 (substitute* "Makefile.am"
7449 (("\\$\\(bindir\\)/embossupdate") ""))
7450 #t))
7451 (add-before 'configure 'autogen
7452 (lambda _ (zero? (system* "autoreconf" "-vif")))))))
7453 (inputs
7454 `(("perl" ,perl)
7455 ("libpng" ,libpng)
7456 ("gd" ,gd)
7457 ("libx11" ,libx11)
7458 ("libharu" ,libharu)
7459 ("zlib" ,zlib)))
7460 (native-inputs
7461 `(("autoconf" ,autoconf)
7462 ("automake" ,automake)
7463 ("libtool" ,libtool)
7464 ("pkg-config" ,pkg-config)))
7465 (home-page "http://emboss.sourceforge.net")
7466 (synopsis "Molecular biology analysis suite")
7467 (description "EMBOSS is the \"European Molecular Biology Open Software
7468Suite\". EMBOSS is an analysis package specially developed for the needs of
7469the molecular biology (e.g. EMBnet) user community. The software
7470automatically copes with data in a variety of formats and even allows
7471transparent retrieval of sequence data from the web. It also provides a
7472number of libraries for the development of software in the field of molecular
7473biology. EMBOSS also integrates a range of currently available packages and
7474tools for sequence analysis into a seamless whole.")
7475 (license license:gpl2+)))
7476
1f1b20b8
RW
7477(define-public bits
7478 (let ((revision "1")
7479 (commit "3cc4567896d9d6442923da944beb704750a08d2d"))
7480 (package
7481 (name "bits")
7482 ;; The version is 2.13.0 even though no release archives have been
7483 ;; published as yet.
7484 (version (string-append "2.13.0-" revision "." (string-take commit 9)))
7485 (source (origin
7486 (method git-fetch)
7487 (uri (git-reference
7488 (url "https://github.com/arq5x/bits.git")
7489 (commit commit)))
7490 (file-name (string-append name "-" version "-checkout"))
7491 (sha256
7492 (base32
7493 "17n2kffk4kmhivd8c98g2vr6y1s23vbg4sxlxs689wni66797hbs"))))
7494 (build-system gnu-build-system)
7495 (arguments
7496 `(#:tests? #f ;no tests included
7497 #:phases
7498 (modify-phases %standard-phases
7499 (delete 'configure)
7500 (add-after 'unpack 'remove-cuda
7501 (lambda _
7502 (substitute* "Makefile"
7503 ((".*_cuda") "")
7504 (("(bits_test_intersections) \\\\" _ match) match))
7505 #t))
7506 (replace 'install
7507 (lambda* (#:key outputs #:allow-other-keys)
7508 (copy-recursively
7509 "bin" (string-append (assoc-ref outputs "out") "/bin"))
7510 #t)))))
7511 (inputs
7512 `(("gsl" ,gsl)
7513 ("zlib" ,zlib)))
7514 (home-page "https://github.com/arq5x/bits")
7515 (synopsis "Implementation of binary interval search algorithm")
7516 (description "This package provides an implementation of the
7517BITS (Binary Interval Search) algorithm, an approach to interval set
7518intersection. It is especially suited for the comparison of diverse genomic
7519datasets and the exploration of large datasets of genome
7520intervals (e.g. genes, sequence alignments).")
7521 (license license:gpl2))))
7522
e62ffce5 7523(define-public piranha
883302da
RW
7524 ;; There is no release tarball for the latest version. The latest commit is
7525 ;; older than one year at the time of this writing.
7526 (let ((revision "1")
7527 (commit "0466d364b71117d01e4471b74c514436cc281233"))
7528 (package
7529 (name "piranha")
7530 (version (string-append "1.2.1-" revision "." (string-take commit 9)))
7531 (source (origin
7532 (method git-fetch)
7533 (uri (git-reference
7534 (url "https://github.com/smithlabcode/piranha.git")
7535 (commit commit)))
7536 (sha256
7537 (base32
7538 "117dc0zf20c61jam69sk4abl57ah6yi6i7qra7d7y5zrbgk12q5n"))))
7539 (build-system gnu-build-system)
7540 (arguments
7541 `(#:test-target "test"
7542 #:phases
7543 (modify-phases %standard-phases
7544 (add-after 'unpack 'copy-smithlab-cpp
7545 (lambda* (#:key inputs #:allow-other-keys)
e62ffce5 7546 (for-each (lambda (file)
883302da
RW
7547 (install-file file "./src/smithlab_cpp/"))
7548 (find-files (assoc-ref inputs "smithlab-cpp")))
7549 #t))
7550 (add-after 'install 'install-to-store
7551 (lambda* (#:key outputs #:allow-other-keys)
7552 (let* ((out (assoc-ref outputs "out"))
7553 (bin (string-append out "/bin")))
7554 (mkdir-p bin)
7555 (for-each (lambda (file)
7556 (install-file file bin))
7557 (find-files "bin" ".*")))
7558 #t)))
7559 #:configure-flags
7560 (list (string-append "--with-bam_tools_headers="
7561 (assoc-ref %build-inputs "bamtools") "/include/bamtools")
7562 (string-append "--with-bam_tools_library="
7563 (assoc-ref %build-inputs "bamtools") "/lib/bamtools"))))
7564 (inputs
7565 `(("bamtools" ,bamtools)
7566 ("samtools" ,samtools-0.1)
7567 ("gsl" ,gsl)
7568 ("smithlab-cpp"
7569 ,(let ((commit "3723e2db438c51501d0423429ff396c3035ba46a"))
7570 (origin
7571 (method git-fetch)
7572 (uri (git-reference
7573 (url "https://github.com/smithlabcode/smithlab_cpp.git")
7574 (commit commit)))
7575 (file-name (string-append "smithlab_cpp-" commit "-checkout"))
7576 (sha256
7577 (base32
7578 "0l4gvbwslw5ngziskja41c00x1r06l3yidv7y0xw9djibhykzy0g")))))))
7579 (native-inputs
7580 `(("python" ,python-2)))
7581 (home-page "https://github.com/smithlabcode/piranha")
7582 (synopsis "Peak-caller for CLIP-seq and RIP-seq data")
7583 (description
7584 "Piranha is a peak-caller for genomic data produced by CLIP-seq and
e62ffce5
RW
7585RIP-seq experiments. It takes input in BED or BAM format and identifies
7586regions of statistically significant read enrichment. Additional covariates
7587may optionally be provided to further inform the peak-calling process.")
883302da 7588 (license license:gpl3+))))
e62ffce5 7589
d1e32822
RW
7590(define-public pepr
7591 (package
7592 (name "pepr")
7593 (version "1.0.9")
7594 (source (origin
7595 (method url-fetch)
7596 (uri (string-append "https://pypi.python.org/packages/source/P"
7597 "/PePr/PePr-" version ".tar.gz"))
7598 (sha256
7599 (base32
7600 "0qxjfdpl1b1y53nccws2d85f6k74zwmx8y8sd9rszcqhfayx6gdx"))))
7601 (build-system python-build-system)
7602 (arguments
7603 `(#:python ,python-2 ; python2 only
7604 #:tests? #f ; no tests included
7605 #:phases
7606 (modify-phases %standard-phases
7607 ;; When setuptools is used a ".egg" archive is generated and
7608 ;; installed. This makes it hard to actually run PePr. This issue
7609 ;; has been reported upstream:
7610 ;; https://github.com/shawnzhangyx/PePr/issues/9
7611 (add-after 'unpack 'disable-egg-generation
7612 (lambda _
7613 (substitute* "setup.py"
7614 (("from setuptools import setup")
7615 "from distutils.core import setup"))
7616 #t)))))
7617 (propagated-inputs
7618 `(("python2-numpy" ,python2-numpy)
7619 ("python2-scipy" ,python2-scipy)
7620 ("python2-pysam" ,python2-pysam)))
7621 (home-page "https://code.google.com/p/pepr-chip-seq/")
7622 (synopsis "Peak-calling and prioritization pipeline for ChIP-Seq data")
7623 (description
7624 "PePr is a ChIP-Seq peak calling or differential binding analysis tool
7625that is primarily designed for data with biological replicates. It uses a
7626negative binomial distribution to model the read counts among the samples in
7627the same group, and look for consistent differences between ChIP and control
7628group or two ChIP groups run under different conditions.")
7629 (license license:gpl3+)))
6b49a37e
RJ
7630
7631(define-public filevercmp
7632 (let ((commit "1a9b779b93d0b244040274794d402106907b71b7"))
7633 (package
7634 (name "filevercmp")
7635 (version (string-append "0-1." (string-take commit 7)))
7636 (source (origin
7637 (method url-fetch)
7638 (uri (string-append "https://github.com/ekg/filevercmp/archive/"
7639 commit ".tar.gz"))
7640 (file-name (string-append name "-" version ".tar.gz"))
7641 (sha256
7642 (base32 "0yp5jswf5j2pqc6517x277s4s6h1ss99v57kxw9gy0jkfl3yh450"))))
7643 (build-system gnu-build-system)
7644 (arguments
7645 `(#:tests? #f ; There are no tests to run.
7646 #:phases
7647 (modify-phases %standard-phases
7648 (delete 'configure) ; There is no configure phase.
7649 (replace 'install
7650 (lambda* (#:key outputs #:allow-other-keys)
7651 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
7652 (install-file "filevercmp" bin)))))))
7653 (home-page "https://github.com/ekg/filevercmp")
7654 (synopsis "This program compares version strings")
7655 (description "This program compares version strings. It intends to be a
7656replacement for strverscmp.")
7657 (license license:gpl3+))))
5fb5dffb
RW
7658
7659(define-public multiqc
7660 (package
7661 (name "multiqc")
7662 (version "0.6")
7663 (source
7664 (origin
7665 (method url-fetch)
7666 (uri (pypi-uri "multiqc" version))
7667 (sha256
7668 (base32
7669 "0avw11h63ldpxy5pizc3wl1wa01ha7q10wb240nggsjz3jaqvyiy"))))
7670 (build-system python-build-system)
7671 (propagated-inputs
7672 `(("python-jinja2" ,python-jinja2)
7673 ("python-simplejson" ,python-simplejson)
7674 ("python-pyyaml" ,python-pyyaml)
7675 ("python-click" ,python-click)
7676 ("python-matplotlib" ,python-matplotlib)
7677 ("python-numpy" ,python-numpy)))
5fb5dffb
RW
7678 (home-page "http://multiqc.info")
7679 (synopsis "Aggregate bioinformatics analysis reports")
7680 (description
7681 "MultiQC is a tool to aggregate bioinformatics results across many
7682samples into a single report. It contains modules for a large number of
7683common bioinformatics tools.")
7684 (license license:gpl3)))