gnu: r-impute: Update to 1.50.0.
[jackhill/guix/guix.git] / gnu / packages / bioinformatics.scm
CommitLineData
4e10a221 1;;; GNU Guix --- Functional package management for GNU
6193563a 2;;; Copyright © 2014, 2015, 2016, 2017 Ricardo Wurmus <rekado@elephly.net>
a9f754d7 3;;; Copyright © 2015, 2016, 2017 Ben Woodcroft <donttrustben@gmail.com>
8921841d 4;;; Copyright © 2015, 2016 Pjotr Prins <pjotr.guix@thebird.nl>
a5002ae7 5;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
a0a71439 6;;; Copyright © 2016 Roel Janssen <roel@gnu.org>
ddb83129 7;;; Copyright © 2016 Efraim Flashner <efraim@flashner.co.il>
318c0aee 8;;; Copyright © 2016 Marius Bakke <mbakke@fastmail.com>
3fffabce 9;;; Copyright © 2016 Raoul Bonnal <ilpuccio.febo@gmail.com>
4e10a221
RW
10;;;
11;;; This file is part of GNU Guix.
12;;;
13;;; GNU Guix is free software; you can redistribute it and/or modify it
14;;; under the terms of the GNU General Public License as published by
15;;; the Free Software Foundation; either version 3 of the License, or (at
16;;; your option) any later version.
17;;;
18;;; GNU Guix is distributed in the hope that it will be useful, but
19;;; WITHOUT ANY WARRANTY; without even the implied warranty of
20;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21;;; GNU General Public License for more details.
22;;;
23;;; You should have received a copy of the GNU General Public License
24;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
25
26(define-module (gnu packages bioinformatics)
27 #:use-module ((guix licenses) #:prefix license:)
28 #:use-module (guix packages)
8e913213 29 #:use-module (guix utils)
4e10a221 30 #:use-module (guix download)
2c16316e 31 #:use-module (guix git-download)
ec946638 32 #:use-module (guix hg-download)
10b4a969 33 #:use-module (guix build-system ant)
4e10a221 34 #:use-module (guix build-system gnu)
d7678942 35 #:use-module (guix build-system cmake)
365c8153 36 #:use-module (guix build-system perl)
8622a072 37 #:use-module (guix build-system python)
a5002ae7 38 #:use-module (guix build-system r)
9c38b540 39 #:use-module (guix build-system ruby)
d3517eda 40 #:use-module (guix build-system trivial)
4e10a221 41 #:use-module (gnu packages)
a2950fa4 42 #:use-module (gnu packages autotools)
684bf7c7 43 #:use-module (gnu packages algebra)
d3517eda 44 #:use-module (gnu packages base)
318c0aee 45 #:use-module (gnu packages bash)
a0a71439 46 #:use-module (gnu packages bison)
e4e5a4d8 47 #:use-module (gnu packages boost)
4e10a221 48 #:use-module (gnu packages compression)
82c370de 49 #:use-module (gnu packages cpio)
1baee943 50 #:use-module (gnu packages curl)
99828fa7 51 #:use-module (gnu packages documentation)
d29150b5 52 #:use-module (gnu packages datastructures)
75dd2424 53 #:use-module (gnu packages file)
99268755 54 #:use-module (gnu packages flex)
02f35bb5 55 #:use-module (gnu packages gawk)
2409f37f 56 #:use-module (gnu packages gcc)
66e40e00 57 #:use-module (gnu packages gd)
97b9da68 58 #:use-module (gnu packages gtk)
b16728b0 59 #:use-module (gnu packages glib)
db7a3444 60 #:use-module (gnu packages groff)
50937297 61 #:use-module (gnu packages guile)
89984be4 62 #:use-module (gnu packages haskell)
66e40e00 63 #:use-module (gnu packages image)
97b9da68 64 #:use-module (gnu packages imagemagick)
15a3c3d4 65 #:use-module (gnu packages java)
5ded35d8 66 #:use-module (gnu packages ldc)
51c64999 67 #:use-module (gnu packages linux)
ec946638 68 #:use-module (gnu packages logging)
36742f43 69 #:use-module (gnu packages machine-learning)
db7a3444 70 #:use-module (gnu packages man)
c833ab55 71 #:use-module (gnu packages maths)
6c2b26e2 72 #:use-module (gnu packages mpi)
4e10a221 73 #:use-module (gnu packages ncurses)
81f3e0c1 74 #:use-module (gnu packages pcre)
ceb62d54 75 #:use-module (gnu packages parallel)
66e40e00 76 #:use-module (gnu packages pdf)
4e10a221
RW
77 #:use-module (gnu packages perl)
78 #:use-module (gnu packages pkg-config)
bfe3c685 79 #:use-module (gnu packages popt)
e4e5a4d8 80 #:use-module (gnu packages protobuf)
346a829a 81 #:use-module (gnu packages python)
ec946638 82 #:use-module (gnu packages readline)
9c38b540 83 #:use-module (gnu packages ruby)
84be3b99 84 #:use-module (gnu packages serialization)
c833ab55 85 #:use-module (gnu packages statistics)
d7678942 86 #:use-module (gnu packages tbb)
97b9da68 87 #:use-module (gnu packages tex)
db7a3444 88 #:use-module (gnu packages texinfo)
2127cedb 89 #:use-module (gnu packages textutils)
43c565d2 90 #:use-module (gnu packages time)
a2950fa4 91 #:use-module (gnu packages tls)
ce7155d5 92 #:use-module (gnu packages vim)
365c8153 93 #:use-module (gnu packages web)
c833ab55 94 #:use-module (gnu packages xml)
66e40e00 95 #:use-module (gnu packages xorg)
f7283db3
RW
96 #:use-module (gnu packages zip)
97 #:use-module (srfi srfi-1))
4e10a221 98
036cd0cb
RJPB
99(define-public r-ape
100 (package
101 (name "r-ape")
102 (version "4.1")
103 (source
104 (origin
105 (method url-fetch)
106 (uri (cran-uri "ape" version))
107 (sha256
108 (base32
109 "0959fiiy11rzfzrzaknmgrx64bhszj02l0ycz79k5a6bmpfzanlk"))))
110 (build-system r-build-system)
aeb64f3c
RW
111 (propagated-inputs
112 `(("r-lattice" ,r-lattice)
113 ("r-nlme" ,r-nlme)))
036cd0cb
RJPB
114 (home-page "http://ape-package.ird.fr/")
115 (synopsis "Analyses of phylogenetics and evolution")
116 (description
117 "This package provides functions for reading, writing, plotting, and
118manipulating phylogenetic trees, analyses of comparative data in a
119phylogenetic framework, ancestral character analyses, analyses of
120diversification and macroevolution, computing distances from DNA sequences,
121and several other tools.")
122 (license license:gpl2+)))
123
8dc797fa
BW
124(define-public aragorn
125 (package
126 (name "aragorn")
e990c81d 127 (version "1.2.38")
8dc797fa
BW
128 (source (origin
129 (method url-fetch)
130 (uri (string-append
131 "http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
132 version ".tgz"))
133 (sha256
134 (base32
e990c81d 135 "09i1rg716smlbnixfm7q1ml2mfpaa2fpn3hwjg625ysmfwwy712b"))))
8dc797fa
BW
136 (build-system gnu-build-system)
137 (arguments
138 `(#:tests? #f ; there are no tests
139 #:phases
140 (modify-phases %standard-phases
141 (delete 'configure)
142 (replace 'build
143 (lambda _
144 (zero? (system* "gcc"
145 "-O3"
146 "-ffast-math"
147 "-finline-functions"
148 "-o"
149 "aragorn"
150 (string-append "aragorn" ,version ".c")))))
151 (replace 'install
152 (lambda* (#:key outputs #:allow-other-keys)
153 (let* ((out (assoc-ref outputs "out"))
154 (bin (string-append out "/bin"))
155 (man (string-append out "/share/man/man1")))
156 (mkdir-p bin)
f3860753 157 (install-file "aragorn" bin)
8dc797fa 158 (mkdir-p man)
f3860753 159 (install-file "aragorn.1" man))
8dc797fa
BW
160 #t)))))
161 (home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
162 (synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
163 (description
164 "Aragorn identifies transfer RNA, mitochondrial RNA and
165transfer-messenger RNA from nucleotide sequences, based on homology to known
166tRNA consensus sequences and RNA structure. It also outputs the secondary
167structure of the predicted RNA.")
168 (license license:gpl2)))
169
a12ba6e8
BW
170(define-public bamm
171 (package
172 (name "bamm")
4b6da268 173 (version "1.7.3")
a12ba6e8
BW
174 (source (origin
175 (method url-fetch)
176 ;; BamM is not available on pypi.
177 (uri (string-append
4b6da268 178 "https://github.com/Ecogenomics/BamM/archive/"
a12ba6e8
BW
179 version ".tar.gz"))
180 (file-name (string-append name "-" version ".tar.gz"))
181 (sha256
182 (base32
4b6da268 183 "1f35yxp4pc8aadsvbpg6r4kg2jh4fkjci0iby4iyljm6980sac0s"))
a12ba6e8
BW
184 (modules '((guix build utils)))
185 (snippet
186 `(begin
187 ;; Delete bundled htslib.
188 (delete-file-recursively "c/htslib-1.3.1")
189 #t))))
190 (build-system python-build-system)
191 (arguments
192 `(#:python ,python-2 ; BamM is Python 2 only.
193 ;; Do not use bundled libhts. Do use the bundled libcfu because it has
194 ;; been modified from its original form.
195 #:configure-flags
196 (let ((htslib (assoc-ref %build-inputs "htslib")))
197 (list "--with-libhts-lib" (string-append htslib "/lib")
198 "--with-libhts-inc" (string-append htslib "/include/htslib")))
199 #:phases
200 (modify-phases %standard-phases
201 (add-after 'unpack 'autogen
202 (lambda _
203 (with-directory-excursion "c"
204 (let ((sh (which "sh")))
205 ;; Use autogen so that 'configure' works.
206 (substitute* "autogen.sh" (("/bin/sh") sh))
207 (setenv "CONFIG_SHELL" sh)
208 (substitute* "configure" (("/bin/sh") sh))
209 (zero? (system* "./autogen.sh"))))))
210 (delete 'build)
211 ;; Run tests after installation so compilation only happens once.
212 (delete 'check)
213 (add-after 'install 'wrap-executable
214 (lambda* (#:key outputs #:allow-other-keys)
215 (let* ((out (assoc-ref outputs "out"))
216 (path (getenv "PATH")))
217 (wrap-program (string-append out "/bin/bamm")
218 `("PATH" ":" prefix (,path))))
219 #t))
220 (add-after 'wrap-executable 'post-install-check
221 (lambda* (#:key inputs outputs #:allow-other-keys)
222 (setenv "PATH"
223 (string-append (assoc-ref outputs "out")
224 "/bin:"
225 (getenv "PATH")))
226 (setenv "PYTHONPATH"
227 (string-append
228 (assoc-ref outputs "out")
229 "/lib/python"
230 (string-take (string-take-right
231 (assoc-ref inputs "python") 5) 3)
232 "/site-packages:"
233 (getenv "PYTHONPATH")))
234 ;; There are 2 errors printed, but they are safe to ignore:
235 ;; 1) [E::hts_open_format] fail to open file ...
236 ;; 2) samtools view: failed to open ...
237 (zero? (system* "nosetests")))))))
238 (native-inputs
239 `(("autoconf" ,autoconf)
240 ("automake" ,automake)
241 ("libtool" ,libtool)
242 ("zlib" ,zlib)
243 ("python-nose" ,python2-nose)
f3b98f4f 244 ("python-pysam" ,python2-pysam)))
a12ba6e8
BW
245 (inputs
246 `(("htslib" ,htslib)
247 ("samtools" ,samtools)
248 ("bwa" ,bwa)
249 ("grep" ,grep)
250 ("sed" ,sed)
251 ("coreutils" ,coreutils)))
252 (propagated-inputs
253 `(("python-numpy" ,python2-numpy)))
254 (home-page "http://ecogenomics.github.io/BamM/")
255 (synopsis "Metagenomics-focused BAM file manipulator")
256 (description
257 "BamM is a C library, wrapped in python, to efficiently generate and
258parse BAM files, specifically for the analysis of metagenomic data. For
259instance, it implements several methods to assess contig-wise read coverage.")
260 (license license:lgpl3+)))
261
9794180d
RW
262(define-public bamtools
263 (package
264 (name "bamtools")
48d66a9c 265 (version "2.4.1")
9794180d
RW
266 (source (origin
267 (method url-fetch)
268 (uri (string-append
269 "https://github.com/pezmaster31/bamtools/archive/v"
270 version ".tar.gz"))
271 (file-name (string-append name "-" version ".tar.gz"))
272 (sha256
273 (base32
48d66a9c 274 "0jr024kcrhjb82cm69i7p5fcg5375zlc1h3qh2n1v368hcd0qflk"))))
9794180d 275 (build-system cmake-build-system)
4702cec2
RW
276 (arguments
277 `(#:tests? #f ;no "check" target
278 #:phases
279 (modify-phases %standard-phases
280 (add-before
281 'configure 'set-ldflags
282 (lambda* (#:key outputs #:allow-other-keys)
283 (setenv "LDFLAGS"
284 (string-append
285 "-Wl,-rpath="
286 (assoc-ref outputs "out") "/lib/bamtools")))))))
9794180d
RW
287 (inputs `(("zlib" ,zlib)))
288 (home-page "https://github.com/pezmaster31/bamtools")
289 (synopsis "C++ API and command-line toolkit for working with BAM data")
290 (description
291 "BamTools provides both a C++ API and a command-line toolkit for handling
292BAM files.")
293 (license license:expat)))
294
bdc7be59
MB
295(define-public bcftools
296 (package
297 (name "bcftools")
298 (version "1.3.1")
299 (source (origin
300 (method url-fetch)
301 (uri (string-append
302 "https://github.com/samtools/bcftools/releases/download/"
303 version "/bcftools-" version ".tar.bz2"))
304 (sha256
305 (base32
306 "095ry68vmz9q5s1scjsa698dhgyvgw5aicz24c19iwfbai07mhqj"))
307 (modules '((guix build utils)))
308 (snippet
309 ;; Delete bundled htslib.
310 '(delete-file-recursively "htslib-1.3.1"))))
311 (build-system gnu-build-system)
312 (arguments
313 `(#:test-target "test"
314 #:make-flags
315 (list
316 "USE_GPL=1"
317 (string-append "prefix=" (assoc-ref %outputs "out"))
318 (string-append "HTSDIR=" (assoc-ref %build-inputs "htslib") "/include")
319 (string-append "HTSLIB=" (assoc-ref %build-inputs "htslib") "/lib/libhts.a")
320 (string-append "BGZIP=" (assoc-ref %build-inputs "htslib") "/bin/bgzip")
321 (string-append "TABIX=" (assoc-ref %build-inputs "htslib") "/bin/tabix"))
322 #:phases
323 (modify-phases %standard-phases
324 (add-after 'unpack 'patch-Makefile
325 (lambda _
326 (substitute* "Makefile"
327 ;; Do not attempt to build htslib.
328 (("^include \\$\\(HTSDIR\\)/htslib\\.mk") "")
329 ;; Link against GSL cblas.
330 (("-lcblas") "-lgslcblas"))
331 #t))
332 (delete 'configure)
333 (add-before 'check 'patch-tests
334 (lambda _
335 (substitute* "test/test.pl"
336 (("/bin/bash") (which "bash")))
337 #t)))))
338 (native-inputs
339 `(("htslib" ,htslib)
340 ("perl" ,perl)))
341 (inputs
342 `(("gsl" ,gsl)
343 ("zlib" ,zlib)))
344 (home-page "https://samtools.github.io/bcftools/")
345 (synopsis "Utilities for variant calling and manipulating VCFs and BCFs")
346 (description
347 "BCFtools is a set of utilities that manipulate variant calls in the
348Variant Call Format (VCF) and its binary counterpart BCF. All commands work
349transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed.")
350 ;; The sources are dual MIT/GPL, but becomes GPL-only when USE_GPL=1.
351 (license (list license:gpl3+ license:expat))))
352
8dd4ff11
RW
353(define-public bedops
354 (package
355 (name "bedops")
1bbc3b1d 356 (version "2.4.14")
8dd4ff11
RW
357 (source (origin
358 (method url-fetch)
359 (uri (string-append "https://github.com/bedops/bedops/archive/v"
360 version ".tar.gz"))
f586c877 361 (file-name (string-append name "-" version ".tar.gz"))
8dd4ff11
RW
362 (sha256
363 (base32
1bbc3b1d 364 "1kqbac547wyqma81cyky9n7mkgikjpsfd3nnmcm6hpqwanqgh10v"))))
8dd4ff11
RW
365 (build-system gnu-build-system)
366 (arguments
367 '(#:tests? #f
368 #:make-flags (list (string-append "BINDIR=" %output "/bin"))
369 #:phases
370 (alist-cons-after
371 'unpack 'unpack-tarballs
372 (lambda _
373 ;; FIXME: Bedops includes tarballs of minimally patched upstream
374 ;; libraries jansson, zlib, and bzip2. We cannot just use stock
375 ;; libraries because at least one of the libraries (zlib) is
376 ;; patched to add a C++ function definition (deflateInit2cpp).
377 ;; Until the Bedops developers offer a way to link against system
378 ;; libraries we have to build the in-tree copies of these three
379 ;; libraries.
380
381 ;; See upstream discussion:
382 ;; https://github.com/bedops/bedops/issues/124
383
384 ;; Unpack the tarballs to benefit from shebang patching.
385 (with-directory-excursion "third-party"
386 (and (zero? (system* "tar" "xvf" "jansson-2.6.tar.bz2"))
387 (zero? (system* "tar" "xvf" "zlib-1.2.7.tar.bz2"))
388 (zero? (system* "tar" "xvf" "bzip2-1.0.6.tar.bz2"))))
389 ;; Disable unpacking of tarballs in Makefile.
390 (substitute* "system.mk/Makefile.linux"
391 (("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
392 (("\\./configure") "CONFIG_SHELL=bash ./configure"))
393 (substitute* "third-party/zlib-1.2.7/Makefile.in"
394 (("^SHELL=.*$") "SHELL=bash\n")))
395 (alist-delete 'configure %standard-phases))))
396 (home-page "https://github.com/bedops/bedops")
397 (synopsis "Tools for high-performance genomic feature operations")
398 (description
399 "BEDOPS is a suite of tools to address common questions raised in genomic
400studies---mostly with regard to overlap and proximity relationships between
401data sets. It aims to be scalable and flexible, facilitating the efficient
402and accurate analysis and management of large-scale genomic data.
403
404BEDOPS provides tools that perform highly efficient and scalable Boolean and
405other set operations, statistical calculations, archiving, conversion and
406other management of genomic data of arbitrary scale. Tasks can be easily
407split by chromosome for distributing whole-genome analyses across a
408computational cluster.")
409 (license license:gpl2+)))
410
81de5647
RW
411(define-public bedtools
412 (package
413 (name "bedtools")
d285657e 414 (version "2.26.0")
81de5647
RW
415 (source (origin
416 (method url-fetch)
417 (uri (string-append "https://github.com/arq5x/bedtools2/archive/v"
418 version ".tar.gz"))
f586c877 419 (file-name (string-append name "-" version ".tar.gz"))
81de5647
RW
420 (sha256
421 (base32
d285657e 422 "0xvri5hnp2iim1cx6mcd5d9f102p5ql41x69rd6106x1c17pinqm"))))
81de5647
RW
423 (build-system gnu-build-system)
424 (native-inputs `(("python" ,python-2)))
425 (inputs `(("samtools" ,samtools)
426 ("zlib" ,zlib)))
427 (arguments
428 '(#:test-target "test"
429 #:phases
6573ac82 430 (modify-phases %standard-phases
6573ac82
BW
431 (delete 'configure)
432 (replace 'install
433 (lambda* (#:key outputs #:allow-other-keys)
434 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
435 (for-each (lambda (file)
436 (install-file file bin))
437 (find-files "bin" ".*")))
438 #t)))))
81de5647
RW
439 (home-page "https://github.com/arq5x/bedtools2")
440 (synopsis "Tools for genome analysis and arithmetic")
441 (description
442 "Collectively, the bedtools utilities are a swiss-army knife of tools for
443a wide-range of genomics analysis tasks. The most widely-used tools enable
444genome arithmetic: that is, set theory on the genome. For example, bedtools
445allows one to intersect, merge, count, complement, and shuffle genomic
446intervals from multiple files in widely-used genomic file formats such as BAM,
447BED, GFF/GTF, VCF.")
448 (license license:gpl2)))
449
9a8f309c
RW
450;; Later releases of bedtools produce files with more columns than
451;; what Ribotaper expects.
452(define-public bedtools-2.18
453 (package (inherit bedtools)
454 (name "bedtools")
455 (version "2.18.0")
456 (source (origin
457 (method url-fetch)
458 (uri (string-append "https://github.com/arq5x/bedtools2/"
459 "archive/v" version ".tar.gz"))
460 (file-name (string-append name "-" version ".tar.gz"))
461 (sha256
462 (base32
463 "05vrnr8yp7swfagshzpgqmzk1blnwnq8pq5pckzi1m26w98d63vf"))))))
464
17dc32a4
RW
465(define-public ribotaper
466 (package
467 (name "ribotaper")
468 (version "1.3.1")
469 (source (origin
470 (method url-fetch)
471 (uri (string-append "https://ohlerlab.mdc-berlin.de/"
472 "files/RiboTaper/RiboTaper_Version_"
473 version ".tar.gz"))
474 (sha256
475 (base32
476 "0ykjbps1y3z3085q94npw8i9x5gldc6shy8vlc08v76zljsm07hv"))))
477 (build-system gnu-build-system)
478 (inputs
479 `(("bedtools" ,bedtools-2.18)
480 ("samtools" ,samtools-0.1)
2d7c4ae3 481 ("r-minimal" ,r-minimal)
17dc32a4
RW
482 ("r-foreach" ,r-foreach)
483 ("r-xnomial" ,r-xnomial)
484 ("r-domc" ,r-domc)
485 ("r-multitaper" ,r-multitaper)
486 ("r-seqinr" ,r-seqinr)))
487 (home-page "https://ohlerlab.mdc-berlin.de/software/RiboTaper_126/")
488 (synopsis "Define translated ORFs using ribosome profiling data")
489 (description
490 "Ribotaper is a method for defining translated @dfn{open reading
491frames} (ORFs) using ribosome profiling (ribo-seq) data. This package
492provides the Ribotaper pipeline.")
493 (license license:gpl3+)))
494
769fc6bb
RW
495(define-public ribodiff
496 (package
497 (name "ribodiff")
498 (version "0.2.2")
499 (source
500 (origin
501 (method url-fetch)
502 (uri (string-append "https://github.com/ratschlab/RiboDiff/"
503 "archive/v" version ".tar.gz"))
504 (file-name (string-append name "-" version ".tar.gz"))
505 (sha256
506 (base32
507 "0wpbwmfv05wdjxv7ikm664f7s7p7cqr8jnw99zrda0q67rl50aaj"))))
508 (build-system python-build-system)
509 (arguments
510 `(#:python ,python-2
511 #:phases
512 (modify-phases %standard-phases
513 ;; Generate an installable executable script wrapper.
514 (add-after 'unpack 'patch-setup.py
515 (lambda _
516 (substitute* "setup.py"
517 (("^(.*)packages=.*" line prefix)
518 (string-append line "\n"
519 prefix "scripts=['scripts/TE.py'],\n")))
520 #t)))))
521 (inputs
522 `(("python-numpy" ,python2-numpy)
523 ("python-matplotlib" ,python2-matplotlib)
524 ("python-scipy" ,python2-scipy)
525 ("python-statsmodels" ,python2-statsmodels)))
2bb12f5a
MB
526 (native-inputs
527 `(("python-mock" ,python2-mock)
528 ("python-nose" ,python2-nose)))
769fc6bb
RW
529 (home-page "http://public.bmi.inf.ethz.ch/user/zhongy/RiboDiff/")
530 (synopsis "Detect translation efficiency changes from ribosome footprints")
531 (description "RiboDiff is a statistical tool that detects the protein
532translational efficiency change from Ribo-Seq (ribosome footprinting) and
533RNA-Seq data. It uses a generalized linear model to detect genes showing
534difference in translational profile taking mRNA abundance into account. It
535facilitates us to decipher the translational regulation that behave
536independently with transcriptional regulation.")
537 (license license:gpl3+)))
538
a0a71439
RJ
539(define-public bioawk
540 (package
541 (name "bioawk")
542 (version "1.0")
543 (source (origin
544 (method url-fetch)
545 (uri (string-append "https://github.com/lh3/bioawk/archive/v"
546 version ".tar.gz"))
547 (file-name (string-append name "-" version ".tar.gz"))
548 (sha256
549 (base32 "1daizxsk17ahi9n58fj8vpgwyhzrzh54bzqhanjanp88kgrz7gjw"))))
550 (build-system gnu-build-system)
551 (inputs
552 `(("zlib" ,zlib)))
553 (native-inputs
554 `(("bison" ,bison)))
555 (arguments
556 `(#:tests? #f ; There are no tests to run.
557 ;; Bison must generate files, before other targets can build.
558 #:parallel-build? #f
559 #:phases
560 (modify-phases %standard-phases
561 (delete 'configure) ; There is no configure phase.
562 (replace 'install
563 (lambda* (#:key outputs #:allow-other-keys)
564 (let* ((out (assoc-ref outputs "out"))
565 (bin (string-append out "/bin"))
566 (man (string-append out "/share/man/man1")))
567 (mkdir-p man)
568 (copy-file "awk.1" (string-append man "/bioawk.1"))
569 (install-file "bioawk" bin)))))))
570 (home-page "https://github.com/lh3/bioawk")
571 (synopsis "AWK with bioinformatics extensions")
572 (description "Bioawk is an extension to Brian Kernighan's awk, adding the
573support of several common biological data formats, including optionally gzip'ed
574BED, GFF, SAM, VCF, FASTA/Q and TAB-delimited formats with column names. It
575also adds a few built-in functions and a command line option to use TAB as the
576input/output delimiter. When the new functionality is not used, bioawk is
577intended to behave exactly the same as the original BWK awk.")
578 (license license:x11)))
579
a2fb1492
RW
580(define-public python2-pybedtools
581 (package
582 (name "python2-pybedtools")
583 (version "0.6.9")
584 (source (origin
585 (method url-fetch)
586 (uri (string-append
587 "https://pypi.python.org/packages/source/p/pybedtools/pybedtools-"
588 version ".tar.gz"))
589 (sha256
590 (base32
591 "1ldzdxw1p4y3g2ignmggsdypvqkcwqwzhdha4rbgpih048z5p4an"))))
592 (build-system python-build-system)
593 (arguments `(#:python ,python-2)) ; no Python 3 support
594 (inputs
f2516de2 595 `(("python-matplotlib" ,python2-matplotlib)))
a2fb1492
RW
596 (propagated-inputs
597 `(("bedtools" ,bedtools)
598 ("samtools" ,samtools)))
599 (native-inputs
f2516de2
HG
600 `(("python-cython" ,python2-cython)
601 ("python-pyyaml" ,python2-pyyaml)
f3b98f4f 602 ("python-nose" ,python2-nose)))
a2fb1492
RW
603 (home-page "https://pythonhosted.org/pybedtools/")
604 (synopsis "Python wrapper for BEDtools programs")
605 (description
606 "pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
607which are widely used for genomic interval manipulation or \"genome algebra\".
608pybedtools extends BEDTools by offering feature-level manipulations from with
609Python.")
610 (license license:gpl2+)))
611
9e12eba8
BW
612(define-public python-biom-format
613 (package
614 (name "python-biom-format")
615 (version "2.1.5")
616 (source
617 (origin
618 (method url-fetch)
619 ;; Use GitHub as source because PyPI distribution does not contain
620 ;; test data: https://github.com/biocore/biom-format/issues/693
621 (uri (string-append "https://github.com/biocore/biom-format/archive/"
622 version ".tar.gz"))
623 (file-name (string-append name "-" version ".tar.gz"))
624 (sha256
625 (base32
626 "1n25w3p1rixbpac8iysmzcja6m4ip5r6sz19l8y6wlwi49hxn278"))))
627 (build-system python-build-system)
de96ea28 628 (propagated-inputs
9e12eba8
BW
629 `(("python-numpy" ,python-numpy)
630 ("python-scipy" ,python-scipy)
631 ("python-future" ,python-future)
632 ("python-click" ,python-click)
633 ("python-h5py" ,python-h5py)))
da5ebd10
MB
634 (native-inputs
635 `(("python-nose" ,python-nose)))
9e12eba8
BW
636 (home-page "http://www.biom-format.org")
637 (synopsis "Biological Observation Matrix (BIOM) format utilities")
638 (description
639 "The BIOM file format is designed to be a general-use format for
640representing counts of observations e.g. operational taxonomic units, KEGG
641orthology groups or lipid types, in one or more biological samples
642e.g. microbiome samples, genomes, metagenomes.")
643 (license license:bsd-3)
644 (properties `((python2-variant . ,(delay python2-biom-format))))))
645
646(define-public python2-biom-format
647 (let ((base (package-with-python2 (strip-python2-variant python-biom-format))))
648 (package
649 (inherit base)
650 (arguments
651 `(#:phases
652 (modify-phases %standard-phases
653 ;; Do not require the unmaintained pyqi library.
654 (add-after 'unpack 'remove-pyqi
655 (lambda _
656 (substitute* "setup.py"
657 (("install_requires.append\\(\"pyqi\"\\)") "pass"))
658 #t)))
00e10c6e 659 ,@(package-arguments base))))))
9e12eba8 660
f7283db3
RW
661(define-public bioperl-minimal
662 (let* ((inputs `(("perl-module-build" ,perl-module-build)
663 ("perl-data-stag" ,perl-data-stag)
664 ("perl-libwww" ,perl-libwww)
665 ("perl-uri" ,perl-uri)))
666 (transitive-inputs
667 (map (compose package-name cadr)
668 (delete-duplicates
669 (concatenate
670 (map (compose package-transitive-target-inputs cadr) inputs))))))
671 (package
672 (name "bioperl-minimal")
c70271ec 673 (version "1.7.0")
f7283db3
RW
674 (source
675 (origin
676 (method url-fetch)
c70271ec
RW
677 (uri (string-append "https://github.com/bioperl/bioperl-live/"
678 "archive/release-"
679 (string-map (lambda (c)
680 (if (char=? c #\.)
681 #\- c)) version)
682 ".tar.gz"))
f7283db3
RW
683 (sha256
684 (base32
c70271ec 685 "12phgpxwgkqflkwfb9dcqg7a31dpjlfhar8wcgv0aj5ln4akfz06"))))
f7283db3
RW
686 (build-system perl-build-system)
687 (arguments
688 `(#:phases
689 (modify-phases %standard-phases
690 (add-after
691 'install 'wrap-programs
692 (lambda* (#:key outputs #:allow-other-keys)
693 ;; Make sure all executables in "bin" find the required Perl
694 ;; modules at runtime. As the PERL5LIB variable contains also
695 ;; the paths of native inputs, we pick the transitive target
696 ;; inputs from %build-inputs.
697 (let* ((out (assoc-ref outputs "out"))
698 (bin (string-append out "/bin/"))
699 (path (string-join
700 (cons (string-append out "/lib/perl5/site_perl")
701 (map (lambda (name)
702 (assoc-ref %build-inputs name))
703 ',transitive-inputs))
704 ":")))
705 (for-each (lambda (file)
706 (wrap-program file
707 `("PERL5LIB" ":" prefix (,path))))
708 (find-files bin "\\.pl$"))
709 #t))))))
710 (inputs inputs)
711 (native-inputs
712 `(("perl-test-most" ,perl-test-most)))
713 (home-page "http://search.cpan.org/dist/BioPerl")
714 (synopsis "Bioinformatics toolkit")
715 (description
716 "BioPerl is the product of a community effort to produce Perl code which
717is useful in biology. Examples include Sequence objects, Alignment objects
718and database searching objects. These objects not only do what they are
719advertised to do in the documentation, but they also interact - Alignment
720objects are made from the Sequence objects, Sequence objects have access to
721Annotation and SeqFeature objects and databases, Blast objects can be
722converted to Alignment objects, and so on. This means that the objects
723provide a coordinated and extensible framework to do computational biology.")
724 (license (package-license perl)))))
725
85c37e29
RW
726(define-public python-biopython
727 (package
728 (name "python-biopython")
4ce60305 729 (version "1.68")
85c37e29
RW
730 (source (origin
731 (method url-fetch)
e815c094
BW
732 ;; use PyPi rather than biopython.org to ease updating
733 (uri (pypi-uri "biopython" version))
85c37e29
RW
734 (sha256
735 (base32
4ce60305 736 "07qc7nz0k77y8hf8s18rscvibvm91zw0kkq7ylrhisf8vp8hkp6i"))))
85c37e29 737 (build-system python-build-system)
4ce60305
BW
738 (arguments
739 `(#:phases
740 (modify-phases %standard-phases
741 (add-before 'check 'set-home
742 ;; Some tests require a home directory to be set.
743 (lambda _ (setenv "HOME" "/tmp") #t)))))
f22efa01 744 (propagated-inputs
85c37e29 745 `(("python-numpy" ,python-numpy)))
85c37e29
RW
746 (home-page "http://biopython.org/")
747 (synopsis "Tools for biological computation in Python")
748 (description
749 "Biopython is a set of tools for biological computation including parsers
750for bioinformatics files into Python data structures; interfaces to common
751bioinformatics programs; a standard sequence class and tools for performing
752common operations on them; code to perform data classification; code for
753dealing with alignments; code making it easy to split up parallelizable tasks
754into separate processes; and more.")
5c31f4aa 755 (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
85c37e29
RW
756
757(define-public python2-biopython
5c31f4aa 758 (package-with-python2 python-biopython))
85c37e29 759
4b1a1528
BW
760;; An outdated version of biopython is required for seqmagick, see
761;; https://github.com/fhcrc/seqmagick/issues/59
762;; When that issue has been resolved this package should be removed.
763(define python2-biopython-1.66
764 (package
765 (inherit python2-biopython)
766 (version "1.66")
767 (source (origin
768 (method url-fetch)
769 (uri (pypi-uri "biopython" version))
770 (sha256
771 (base32
772 "1gdv92593klimg22icf5j9by7xiq86jnwzkpz4abaa05ylkdf6hp"))))))
773
985d8411
BW
774(define-public bpp-core
775 ;; The last release was in 2014 and the recommended way to install from source
776 ;; is to clone the git repository, so we do this.
777 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
778 (let ((commit "7d8bced0d1a87291ea8dd7046b7fb5ff9c35c582"))
779 (package
780 (name "bpp-core")
781 (version (string-append "2.2.0-1." (string-take commit 7)))
782 (source (origin
783 (method git-fetch)
784 (uri (git-reference
785 (url "http://biopp.univ-montp2.fr/git/bpp-core")
786 (commit commit)))
787 (file-name (string-append name "-" version "-checkout"))
788 (sha256
789 (base32
790 "10djsq5vlnkilv436gnmh4irpk49v29pa69r6xiryg32xmvn909j"))))
791 (build-system cmake-build-system)
792 (arguments
793 `(#:parallel-build? #f))
794 (inputs
795 `(("gcc" ,gcc-5))) ; Compilation of bpp-phyl fails with GCC 4.9 so we
796 ; compile all of the bpp packages with GCC 5.
797 (home-page "http://biopp.univ-montp2.fr")
798 (synopsis "C++ libraries for Bioinformatics")
799 (description
800 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
801analysis, phylogenetics, molecular evolution and population genetics. It is
802Object Oriented and is designed to be both easy to use and computer efficient.
803Bio++ intends to help programmers to write computer expensive programs, by
804providing them a set of re-usable tools.")
805 (license license:cecill-c))))
806
8b5f4d57
BW
807(define-public bpp-phyl
808 ;; The last release was in 2014 and the recommended way to install from source
809 ;; is to clone the git repository, so we do this.
810 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
811 (let ((commit "0c07167b629f68b569bf274d1ad0c4af83276ae2"))
812 (package
813 (name "bpp-phyl")
814 (version (string-append "2.2.0-1." (string-take commit 7)))
815 (source (origin
816 (method git-fetch)
817 (uri (git-reference
818 (url "http://biopp.univ-montp2.fr/git/bpp-phyl")
819 (commit commit)))
820 (file-name (string-append name "-" version "-checkout"))
821 (sha256
822 (base32
823 "1ssjgchzwj3iai26kyly7gwkdv8sk59nqhkb1wpap3sf5m6kyllh"))))
824 (build-system cmake-build-system)
825 (arguments
826 `(#:parallel-build? #f
827 ;; If out-of-source, test data is not copied into the build directory
828 ;; so the tests fail.
829 #:out-of-source? #f))
830 (inputs
831 `(("bpp-core" ,bpp-core)
832 ("bpp-seq" ,bpp-seq)
833 ;; GCC 4.8 fails due to an 'internal compiler error', so we use a more
834 ;; modern GCC.
835 ("gcc" ,gcc-5)))
836 (home-page "http://biopp.univ-montp2.fr")
837 (synopsis "Bio++ phylogenetic Library")
838 (description
839 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
840analysis, phylogenetics, molecular evolution and population genetics. This
841library provides phylogenetics-related modules.")
842 (license license:cecill-c))))
843
159a7016
BW
844(define-public bpp-popgen
845 ;; The last release was in 2014 and the recommended way to install from source
846 ;; is to clone the git repository, so we do this.
847 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
848 (let ((commit "e472bac9b1a148803895d747cd6d0c5904f85d9f"))
849 (package
850 (name "bpp-popgen")
851 (version (string-append "2.2.0-1." (string-take commit 7)))
852 (source (origin
853 (method git-fetch)
854 (uri (git-reference
855 (url "http://biopp.univ-montp2.fr/git/bpp-popgen")
856 (commit commit)))
857 (file-name (string-append name "-" version "-checkout"))
858 (sha256
859 (base32
860 "0yn82dzn1n5629nzja68xfrhi655709rjanyryb36vzkmymy6dw5"))))
861 (build-system cmake-build-system)
862 (arguments
863 `(#:parallel-build? #f
864 #:tests? #f)) ; There are no tests.
865 (inputs
866 `(("bpp-core" ,bpp-core)
867 ("bpp-seq" ,bpp-seq)
868 ("gcc" ,gcc-5)))
869 (home-page "http://biopp.univ-montp2.fr")
870 (synopsis "Bio++ population genetics library")
871 (description
872 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
873analysis, phylogenetics, molecular evolution and population genetics. This
874library provides population genetics-related modules.")
875 (license license:cecill-c))))
876
70f1bc05
BW
877(define-public bpp-seq
878 ;; The last release was in 2014 and the recommended way to install from source
879 ;; is to clone the git repository, so we do this.
880 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
881 (let ((commit "6cfa07965ce152e5598a89df2fa80a75973bfa33"))
882 (package
883 (name "bpp-seq")
884 (version (string-append "2.2.0-1." (string-take commit 7)))
885 (source (origin
886 (method git-fetch)
887 (uri (git-reference
888 (url "http://biopp.univ-montp2.fr/git/bpp-seq")
889 (commit commit)))
890 (file-name (string-append name "-" version "-checkout"))
891 (sha256
892 (base32
893 "1nys5jq7jqvdg40d91wsmj3q2yzy4276cp7sp44n67p468f27zf2"))))
894 (build-system cmake-build-system)
895 (arguments
896 `(#:parallel-build? #f
897 ;; If out-of-source, test data is not copied into the build directory
898 ;; so the tests fail.
899 #:out-of-source? #f))
900 (inputs
901 `(("bpp-core" ,bpp-core)
902 ("gcc" ,gcc-5))) ; Use GCC 5 as per 'bpp-core'.
903 (home-page "http://biopp.univ-montp2.fr")
904 (synopsis "Bio++ sequence library")
905 (description
906 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
907analysis, phylogenetics, molecular evolution and population genetics. This
908library provides sequence-related modules.")
909 (license license:cecill-c))))
910
db7a3444
BW
911(define-public bppsuite
912 ;; The last release was in 2014 and the recommended way to install from source
913 ;; is to clone the git repository, so we do this.
914 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
915 (let ((commit "c516147f57aa50961121cd505bed52cd7603698b"))
916 (package
917 (name "bppsuite")
918 (version (string-append "2.2.0-1." (string-take commit 7)))
919 (source (origin
920 (method git-fetch)
921 (uri (git-reference
922 (url "http://biopp.univ-montp2.fr/git/bppsuite")
923 (commit commit)))
924 (file-name (string-append name "-" version "-checkout"))
925 (sha256
926 (base32
927 "1y87pxvw0jxjizhq2dr9g2r91md45k1p9ih2sl1yy1y3p934l2kb"))))
928 (build-system cmake-build-system)
929 (arguments
930 `(#:parallel-build? #f
931 #:tests? #f)) ; There are no tests.
932 (native-inputs
933 `(("groff" ,groff)
934 ("man-db" ,man-db)
935 ("texinfo" ,texinfo)))
936 (inputs
937 `(("bpp-core" ,bpp-core)
938 ("bpp-seq" ,bpp-seq)
939 ("bpp-phyl" ,bpp-phyl)
940 ("bpp-phyl" ,bpp-popgen)
941 ("gcc" ,gcc-5)))
942 (home-page "http://biopp.univ-montp2.fr")
943 (synopsis "Bioinformatics tools written with the Bio++ libraries")
944 (description
945 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
946analysis, phylogenetics, molecular evolution and population genetics. This
947package provides command line tools using the Bio++ library.")
948 (license license:cecill-c))))
949
82c370de
RW
950(define-public blast+
951 (package
952 (name "blast+")
8dec2229 953 (version "2.4.0")
82c370de
RW
954 (source (origin
955 (method url-fetch)
956 (uri (string-append
957 "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
958 version "/ncbi-blast-" version "+-src.tar.gz"))
959 (sha256
960 (base32
8dec2229 961 "14n9jik6vhiwjd3m7bach4xj1pzfn0szbsbyfxybd9l9cc43b6mb"))
82c370de
RW
962 (modules '((guix build utils)))
963 (snippet
964 '(begin
965 ;; Remove bundled bzip2 and zlib
966 (delete-file-recursively "c++/src/util/compress/bzip2")
967 (delete-file-recursively "c++/src/util/compress/zlib")
968 (substitute* "c++/src/util/compress/Makefile.in"
969 (("bzip2 zlib api") "api"))
970 ;; Remove useless msbuild directory
971 (delete-file-recursively
972 "c++/src/build-system/project_tree_builder/msbuild")
973 #t))))
974 (build-system gnu-build-system)
975 (arguments
976 `(;; There are three(!) tests for this massive library, and all fail with
977 ;; "unparsable timing stats".
978 ;; ERR [127] -- [util/regexp] test_pcre.sh (unparsable timing stats)
979 ;; ERR [127] -- [serial/datatool] datatool.sh (unparsable timing stats)
980 ;; ERR [127] -- [serial/datatool] datatool_xml.sh (unparsable timing stats)
981 #:tests? #f
982 #:out-of-source? #t
983 #:parallel-build? #f ; not supported
984 #:phases
985 (modify-phases %standard-phases
986 (add-before
987 'configure 'set-HOME
988 ;; $HOME needs to be set at some point during the configure phase
989 (lambda _ (setenv "HOME" "/tmp") #t))
990 (add-after
991 'unpack 'enter-dir
992 (lambda _ (chdir "c++") #t))
993 (add-after
994 'enter-dir 'fix-build-system
995 (lambda _
996 (define (which* cmd)
997 (cond ((string=? cmd "date")
998 ;; make call to "date" deterministic
999 "date -d @0")
1000 ((which cmd)
1001 => identity)
1002 (else
1003 (format (current-error-port)
1004 "WARNING: Unable to find absolute path for ~s~%"
1005 cmd)
1006 #f)))
1007
1008 ;; Rewrite hardcoded paths to various tools
1009 (substitute* (append '("src/build-system/configure.ac"
1010 "src/build-system/configure"
1011 "scripts/common/impl/if_diff.sh"
1012 "scripts/common/impl/run_with_lock.sh"
1013 "src/build-system/Makefile.configurables.real"
1014 "src/build-system/Makefile.in.top"
1015 "src/build-system/Makefile.meta.gmake=no"
1016 "src/build-system/Makefile.meta.in"
1017 "src/build-system/Makefile.meta_l"
1018 "src/build-system/Makefile.meta_p"
1019 "src/build-system/Makefile.meta_r"
1020 "src/build-system/Makefile.mk.in"
1021 "src/build-system/Makefile.requirements"
1022 "src/build-system/Makefile.rules_with_autodep.in")
1023 (find-files "scripts/common/check" "\\.sh$"))
1024 (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
1025 (or (which* cmd) all)))
1026
1027 (substitute* (find-files "src/build-system" "^config.*")
1028 (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
1029 (("^PATH=.*") ""))
1030
1031 ;; rewrite "/var/tmp" in check script
1032 (substitute* "scripts/common/check/check_make_unix.sh"
1033 (("/var/tmp") "/tmp"))
1034
1035 ;; do not reset PATH
1036 (substitute* (find-files "scripts/common/impl/" "\\.sh$")
1037 (("^ *PATH=.*") "")
1038 (("action=/bin/") "action=")
1039 (("export PATH") ":"))
1040 #t))
1041 (replace
1042 'configure
1043 (lambda* (#:key inputs outputs #:allow-other-keys)
1044 (let ((out (assoc-ref outputs "out"))
1045 (lib (string-append (assoc-ref outputs "lib") "/lib"))
1046 (include (string-append (assoc-ref outputs "include")
1047 "/include/ncbi-tools++")))
1048 ;; The 'configure' script doesn't recognize things like
1049 ;; '--enable-fast-install'.
1050 (zero? (system* "./configure.orig"
1051 (string-append "--with-build-root=" (getcwd) "/build")
1052 (string-append "--prefix=" out)
1053 (string-append "--libdir=" lib)
1054 (string-append "--includedir=" include)
1055 (string-append "--with-bz2="
1056 (assoc-ref inputs "bzip2"))
1057 (string-append "--with-z="
1058 (assoc-ref inputs "zlib"))
1059 ;; Each library is built twice by default, once
1060 ;; with "-static" in its name, and again
1061 ;; without.
1062 "--without-static"
1063 "--with-dll"))))))))
1064 (outputs '("out" ; 19 MB
1065 "lib" ; 203 MB
1066 "include")) ; 32 MB
1067 (inputs
1068 `(("bzip2" ,bzip2)
1069 ("zlib" ,zlib)))
1070 (native-inputs
1071 `(("cpio" ,cpio)))
1072 (home-page "http://blast.ncbi.nlm.nih.gov")
1073 (synopsis "Basic local alignment search tool")
1074 (description
1075 "BLAST is a popular method of performing a DNA or protein sequence
1076similarity search, using heuristics to produce results quickly. It also
1077calculates an “expect value” that estimates how many matches would have
1078occurred at a given score by chance, which can aid a user in judging how much
1079confidence to have in an alignment.")
1080 ;; Most of the sources are in the public domain, with the following
1081 ;; exceptions:
1082 ;; * Expat:
1083 ;; * ./c++/include/util/bitset/
1084 ;; * ./c++/src/html/ncbi_menu*.js
1085 ;; * Boost license:
1086 ;; * ./c++/include/util/impl/floating_point_comparison.hpp
1087 ;; * LGPL 2+:
1088 ;; * ./c++/include/dbapi/driver/odbc/unix_odbc/
1089 ;; * ASL 2.0:
1090 ;; * ./c++/src/corelib/teamcity_*
1091 (license (list license:public-domain
1092 license:expat
1093 license:boost1.0
1094 license:lgpl2.0+
1095 license:asl2.0))))
1096
6c2b26e2
RW
1097(define-public bless
1098 (package
1099 (name "bless")
1100 (version "1p02")
1101 (source (origin
1102 (method url-fetch)
1103 (uri (string-append "mirror://sourceforge/bless-ec/bless.v"
1104 version ".tgz"))
1105 (sha256
1106 (base32
4d75e03a
RW
1107 "0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
1108 (modules '((guix build utils)))
6c2b26e2
RW
1109 (snippet
1110 `(begin
1111 ;; Remove bundled boost, pigz, zlib, and .git directory
953c1223
RW
1112 ;; FIXME: also remove bundled sources for murmurhash3 and
1113 ;; kmc once packaged.
6c2b26e2
RW
1114 (delete-file-recursively "boost")
1115 (delete-file-recursively "pigz")
953c1223 1116 (delete-file-recursively "google-sparsehash")
6c2b26e2
RW
1117 (delete-file-recursively "zlib")
1118 (delete-file-recursively ".git")
1119 #t))))
1120 (build-system gnu-build-system)
1121 (arguments
1122 '(#:tests? #f ;no "check" target
1123 #:make-flags
1124 (list (string-append "ZLIB="
1125 (assoc-ref %build-inputs "zlib")
1126 "/lib/libz.a")
1127 (string-append "LDFLAGS="
1128 (string-join '("-lboost_filesystem"
1129 "-lboost_system"
1130 "-lboost_iostreams"
1131 "-lz"
1132 "-fopenmp"
1133 "-std=c++11"))))
1134 #:phases
1135 (modify-phases %standard-phases
1136 (add-after 'unpack 'do-not-build-bundled-pigz
1137 (lambda* (#:key inputs outputs #:allow-other-keys)
1138 (substitute* "Makefile"
1139 (("cd pigz/pigz-2.3.3; make") ""))
1140 #t))
1141 (add-after 'unpack 'patch-paths-to-executables
1142 (lambda* (#:key inputs outputs #:allow-other-keys)
1143 (substitute* "parse_args.cpp"
1144 (("kmc_binary = .*")
1145 (string-append "kmc_binary = \""
1146 (assoc-ref outputs "out")
1147 "/bin/kmc\";"))
1148 (("pigz_binary = .*")
1149 (string-append "pigz_binary = \""
1150 (assoc-ref inputs "pigz")
1151 "/bin/pigz\";")))
1152 #t))
1153 (replace 'install
1154 (lambda* (#:key outputs #:allow-other-keys)
1155 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
1156 (for-each (lambda (file)
1157 (install-file file bin))
1158 '("bless" "kmc/bin/kmc"))
1159 #t)))
1160 (delete 'configure))))
1161 (native-inputs
1162 `(("perl" ,perl)))
1163 (inputs
1164 `(("openmpi" ,openmpi)
1165 ("boost" ,boost)
953c1223 1166 ("sparsehash" ,sparsehash)
6c2b26e2
RW
1167 ("pigz" ,pigz)
1168 ("zlib" ,zlib)))
9641a899 1169 (supported-systems '("x86_64-linux"))
3b3b60d0 1170 (home-page "https://sourceforge.net/p/bless-ec/wiki/Home/")
6c2b26e2
RW
1171 (synopsis "Bloom-filter-based error correction tool for NGS reads")
1172 (description
1173 "@dfn{Bloom-filter-based error correction solution for high-throughput
1174sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
1175correction tool for genomic reads produced by @dfn{Next-generation
1176sequencing} (NGS). BLESS produces accurate correction results with much less
1177memory compared with previous solutions and is also able to tolerate a higher
1178false-positive rate. BLESS can extend reads like DNA assemblers to correct
1179errors at the end of reads.")
1180 (license license:gpl3+)))
1181
2c7ee167
RW
1182(define-public bowtie
1183 (package
1184 (name "bowtie")
2642231b 1185 (version "2.2.9")
2c7ee167
RW
1186 (source (origin
1187 (method url-fetch)
1188 (uri (string-append "https://github.com/BenLangmead/bowtie2/archive/v"
1189 version ".tar.gz"))
f586c877 1190 (file-name (string-append name "-" version ".tar.gz"))
2c7ee167
RW
1191 (sha256
1192 (base32
2642231b 1193 "1vp5db8i7is57iwjybcdg18f5ivyzlj5g1ix1nlvxainzivhz55g"))
2c7ee167
RW
1194 (modules '((guix build utils)))
1195 (snippet
1196 '(substitute* "Makefile"
2c7ee167
RW
1197 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
1198 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
0047d26a 1199 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))))
2c7ee167
RW
1200 (build-system gnu-build-system)
1201 (inputs `(("perl" ,perl)
1202 ("perl-clone" ,perl-clone)
1203 ("perl-test-deep" ,perl-test-deep)
1204 ("perl-test-simple" ,perl-test-simple)
0047d26a
RW
1205 ("python" ,python-2)
1206 ("tbb" ,tbb)))
2c7ee167 1207 (arguments
0047d26a
RW
1208 '(#:make-flags
1209 (list "allall"
1210 "WITH_TBB=1"
1211 (string-append "prefix=" (assoc-ref %outputs "out")))
2c7ee167
RW
1212 #:phases
1213 (alist-delete
1214 'configure
1215 (alist-replace
0047d26a 1216 'check
2c7ee167 1217 (lambda* (#:key outputs #:allow-other-keys)
0047d26a
RW
1218 (system* "perl"
1219 "scripts/test/simple_tests.pl"
1220 "--bowtie2=./bowtie2"
1221 "--bowtie2-build=./bowtie2-build"))
1222 %standard-phases))))
2c7ee167
RW
1223 (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
1224 (synopsis "Fast and sensitive nucleotide sequence read aligner")
1225 (description
1226 "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
1227reads to long reference sequences. It is particularly good at aligning reads
1228of about 50 up to 100s or 1,000s of characters, and particularly good at
1229aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
1230genome with an FM Index to keep its memory footprint small: for the human
1231genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
1232gapped, local, and paired-end alignment modes.")
241e1221 1233 (supported-systems '("x86_64-linux"))
2c7ee167
RW
1234 (license license:gpl3+)))
1235
94ce537e
RW
1236(define-public tophat
1237 (package
1238 (name "tophat")
1239 (version "2.1.0")
1240 (source (origin
1241 (method url-fetch)
1242 (uri (string-append
1243 "http://ccb.jhu.edu/software/tophat/downloads/tophat-"
1244 version ".tar.gz"))
1245 (sha256
1246 (base32
1247 "168zlzykq622zbgkh90a90f1bdgsxkscq2zxzbj8brq80hbjpyp7"))
fc1adab1 1248 (patches (search-patches "tophat-build-with-later-seqan.patch"))
94ce537e
RW
1249 (modules '((guix build utils)))
1250 (snippet
1251 '(begin
1252 ;; Remove bundled SeqAn and samtools
1253 (delete-file-recursively "src/SeqAn-1.3")
1254 (delete-file-recursively "src/samtools-0.1.18")
1255 #t))))
1256 (build-system gnu-build-system)
1257 (arguments
1258 '(#:parallel-build? #f ; not supported
1259 #:phases
1260 (modify-phases %standard-phases
1261 (add-after 'unpack 'use-system-samtools
1262 (lambda* (#:key inputs #:allow-other-keys)
1263 (substitute* "src/Makefile.in"
1264 (("(noinst_LIBRARIES = )\\$\\(SAMLIB\\)" _ prefix) prefix)
1265 (("\\$\\(SAMPROG\\): \\$\\(SAMLIB\\)") "")
1266 (("SAMPROG = samtools_0\\.1\\.18") "")
1267 (("\\$\\(samtools_0_1_18_SOURCES\\)") "")
1268 (("am__EXEEXT_1 = samtools_0\\.1\\.18\\$\\(EXEEXT\\)") ""))
1269 (substitute* '("src/common.cpp"
1270 "src/tophat.py")
1271 (("samtools_0.1.18") (which "samtools")))
1272 (substitute* '("src/common.h"
1273 "src/bam2fastx.cpp")
1274 (("#include \"bam.h\"") "#include <samtools/bam.h>")
1275 (("#include \"sam.h\"") "#include <samtools/sam.h>"))
1276 (substitute* '("src/bwt_map.h"
1277 "src/map2gtf.h"
1278 "src/align_status.h")
1279 (("#include <bam.h>") "#include <samtools/bam.h>")
1280 (("#include <sam.h>") "#include <samtools/sam.h>"))
1281 #t)))))
1282 (inputs
1283 `(("boost" ,boost)
1284 ("bowtie" ,bowtie)
1285 ("samtools" ,samtools-0.1)
1286 ("ncurses" ,ncurses)
1287 ("python" ,python-2)
1288 ("perl" ,perl)
1289 ("zlib" ,zlib)
1290 ("seqan" ,seqan)))
1291 (home-page "http://ccb.jhu.edu/software/tophat/index.shtml")
1292 (synopsis "Spliced read mapper for RNA-Seq data")
1293 (description
1294 "TopHat is a fast splice junction mapper for nucleotide sequence
1295reads produced by the RNA-Seq method. It aligns RNA-Seq reads to
1296mammalian-sized genomes using the ultra high-throughput short read
1297aligner Bowtie, and then analyzes the mapping results to identify
1298splice junctions between exons.")
1299 ;; TopHat is released under the Boost Software License, Version 1.0
1300 ;; See https://github.com/infphilo/tophat/issues/11#issuecomment-121589893
1301 (license license:boost1.0)))
1302
9a8336d8
RW
1303(define-public bwa
1304 (package
1305 (name "bwa")
1306 (version "0.7.12")
1307 (source (origin
1308 (method url-fetch)
1309 (uri (string-append "mirror://sourceforge/bio-bwa/bwa-"
1310 version ".tar.bz2"))
1311 (sha256
1312 (base32
1313 "1330dpqncv0px3pbhjzz1gwgg39kkcv2r9qp2xs0sixf8z8wl7bh"))))
1314 (build-system gnu-build-system)
1315 (arguments
1316 '(#:tests? #f ;no "check" target
1317 #:phases
1318 (alist-replace
1319 'install
1320 (lambda* (#:key outputs #:allow-other-keys)
1321 (let ((bin (string-append
1322 (assoc-ref outputs "out") "/bin"))
1323 (doc (string-append
1324 (assoc-ref outputs "out") "/share/doc/bwa"))
1325 (man (string-append
1326 (assoc-ref outputs "out") "/share/man/man1")))
96c46210
LC
1327 (install-file "bwa" bin)
1328 (install-file "README.md" doc)
1329 (install-file "bwa.1" man)))
9a8336d8
RW
1330 ;; no "configure" script
1331 (alist-delete 'configure %standard-phases))))
1332 (inputs `(("zlib" ,zlib)))
db94f8c7
RW
1333 ;; Non-portable SSE instructions are used so building fails on platforms
1334 ;; other than x86_64.
1335 (supported-systems '("x86_64-linux"))
9a8336d8
RW
1336 (home-page "http://bio-bwa.sourceforge.net/")
1337 (synopsis "Burrows-Wheeler sequence aligner")
1338 (description
1339 "BWA is a software package for mapping low-divergent sequences against a
1340large reference genome, such as the human genome. It consists of three
1341algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
1342designed for Illumina sequence reads up to 100bp, while the rest two for
1343longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
1344features such as long-read support and split alignment, but BWA-MEM, which is
1345the latest, is generally recommended for high-quality queries as it is faster
1346and more accurate. BWA-MEM also has better performance than BWA-backtrack for
134770-100bp Illumina reads.")
1348 (license license:gpl3+)))
1349
d29150b5
RW
1350(define-public bwa-pssm
1351 (package (inherit bwa)
1352 (name "bwa-pssm")
1353 (version "0.5.11")
1354 (source (origin
1355 (method url-fetch)
1356 (uri (string-append "https://github.com/pkerpedjiev/bwa-pssm/"
1357 "archive/" version ".tar.gz"))
1358 (file-name (string-append name "-" version ".tar.gz"))
1359 (sha256
1360 (base32
1361 "02p7mpbs4mlxmn84g2x4ghak638vbj4lqix2ipx5g84pz9bhdavg"))))
1362 (build-system gnu-build-system)
1363 (inputs
1364 `(("gdsl" ,gdsl)
1365 ("zlib" ,zlib)
1366 ("perl" ,perl)))
1367 (home-page "http://bwa-pssm.binf.ku.dk/")
1368 (synopsis "Burrows-Wheeler transform-based probabilistic short read mapper")
1369 (description
1370 "BWA-PSSM is a probabilistic short genomic sequence read aligner based on
1371the use of @dfn{position specific scoring matrices} (PSSM). Like many of the
1372existing aligners it is fast and sensitive. Unlike most other aligners,
1373however, it is also adaptible in the sense that one can direct the alignment
1374based on known biases within the data set. It is coded as a modification of
1375the original BWA alignment program and shares the genome index structure as
1376well as many of the command line options.")
1377 (license license:gpl3+)))
1378
ad641d53
RW
1379(define-public python2-bx-python
1380 (package
1381 (name "python2-bx-python")
1382 (version "0.7.2")
1383 (source (origin
1384 (method url-fetch)
1385 (uri (string-append
1386 "https://pypi.python.org/packages/source/b/bx-python/bx-python-"
1387 version ".tar.gz"))
1388 (sha256
1389 (base32
1390 "0ld49idhc5zjdvbhvjq1a2qmpjj7h5v58rqr25dzmfq7g34b50xh"))
1391 (modules '((guix build utils)))
1392 (snippet
1393 '(substitute* "setup.py"
1394 ;; remove dependency on outdated "distribute" module
1395 (("^from distribute_setup import use_setuptools") "")
1396 (("^use_setuptools\\(\\)") "")))))
1397 (build-system python-build-system)
1398 (arguments
1399 `(#:tests? #f ;tests fail because test data are not included
1400 #:python ,python-2))
1401 (inputs
1402 `(("python-numpy" ,python2-numpy)
1403 ("zlib" ,zlib)))
1404 (native-inputs
f3b98f4f 1405 `(("python-nose" ,python2-nose)))
ad641d53
RW
1406 (home-page "http://bitbucket.org/james_taylor/bx-python/")
1407 (synopsis "Tools for manipulating biological data")
1408 (description
1409 "bx-python provides tools for manipulating biological data, particularly
1410multiple sequence alignments.")
1411 (license license:expat)))
1412
55a9a8c2
RW
1413(define-public python-pysam
1414 (package
1415 (name "python-pysam")
879b521c 1416 (version "0.10.0")
d454640c
RW
1417 (source (origin
1418 (method url-fetch)
f536dce5
MB
1419 ;; Test data is missing on PyPi.
1420 (uri (string-append
1421 "https://github.com/pysam-developers/pysam/archive/v"
1422 version ".tar.gz"))
1423 (file-name (string-append name "-" version ".tar.gz"))
d454640c
RW
1424 (sha256
1425 (base32
879b521c 1426 "1mmvn91agr238kwz7226xq0i7k84lg2nxywn9712mzj7gvgqhfy8"))
dff26b23
MB
1427 (modules '((guix build utils)))
1428 (snippet
1429 ;; Drop bundled htslib. TODO: Also remove samtools and bcftools.
1430 '(delete-file-recursively "htslib"))))
55a9a8c2
RW
1431 (build-system python-build-system)
1432 (arguments
71dbf592
RW
1433 `(#:modules ((ice-9 ftw)
1434 (srfi srfi-26)
1435 (guix build python-build-system)
1436 (guix build utils))
1437 #:phases
397d463a
MB
1438 (modify-phases %standard-phases
1439 (add-before 'build 'set-flags
dff26b23
MB
1440 (lambda* (#:key inputs #:allow-other-keys)
1441 (setenv "HTSLIB_MODE" "external")
1442 (setenv "HTSLIB_LIBRARY_DIR"
1443 (string-append (assoc-ref inputs "htslib") "/lib"))
1444 (setenv "HTSLIB_INCLUDE_DIR"
1445 (string-append (assoc-ref inputs "htslib") "/include"))
397d463a
MB
1446 (setenv "LDFLAGS" "-lncurses")
1447 (setenv "CFLAGS" "-D_CURSES_LIB=1")
f536dce5 1448 #t))
71dbf592 1449 (replace 'check
f536dce5 1450 (lambda* (#:key inputs outputs #:allow-other-keys)
71dbf592 1451 ;; Add first subdirectory of "build" directory to PYTHONPATH.
f536dce5
MB
1452 (setenv "PYTHONPATH"
1453 (string-append
1454 (getenv "PYTHONPATH")
71dbf592
RW
1455 ":" (getcwd) "/build/"
1456 (car (scandir "build"
e1f02f92 1457 (negate (cut string-prefix? "." <>))))))
f536dce5 1458 ;; Step out of source dir so python does not import from CWD.
71dbf592
RW
1459 (with-directory-excursion "tests"
1460 (setenv "HOME" "/tmp")
1461 (and (zero? (system* "make" "-C" "pysam_data"))
1462 (zero? (system* "make" "-C" "cbcf_data"))
b2955e22
RW
1463 ;; Running nosetests without explicitly asking for a
1464 ;; single process leads to a crash. Running with multiple
1465 ;; processes fails because the tests are not designed to
1466 ;; run in parallel.
31c374e0
RW
1467
1468 ;; FIXME: tests keep timing out on some systems.
1469 ;; (zero? (system* "nosetests" "-v"
1470 ;; "--processes" "1"))
1471 )))))))
dff26b23
MB
1472 (propagated-inputs
1473 `(("htslib" ,htslib))) ; Included from installed header files.
55a9a8c2 1474 (inputs
649e9b3b 1475 `(("ncurses" ,ncurses)
55a9a8c2 1476 ("zlib" ,zlib)))
649e9b3b
RW
1477 (native-inputs
1478 `(("python-cython" ,python-cython)
f536dce5
MB
1479 ;; Dependencies below are are for tests only.
1480 ("samtools" ,samtools)
1481 ("bcftools" ,bcftools)
1482 ("python-nose" ,python-nose)))
55a9a8c2
RW
1483 (home-page "https://github.com/pysam-developers/pysam")
1484 (synopsis "Python bindings to the SAMtools C API")
1485 (description
1486 "Pysam is a Python module for reading and manipulating files in the
1487SAM/BAM format. Pysam is a lightweight wrapper of the SAMtools C API. It
1488also includes an interface for tabix.")
1489 (license license:expat)))
1490
1491(define-public python2-pysam
1492 (package-with-python2 python-pysam))
1493
4db9433a
RW
1494(define-public python-twobitreader
1495 (package
1496 (name "python-twobitreader")
044ac8d2 1497 (version "3.1.4")
4db9433a
RW
1498 (source (origin
1499 (method url-fetch)
1500 (uri (pypi-uri "twobitreader" version))
1501 (sha256
1502 (base32
044ac8d2 1503 "1q8wnj2kga9nz1lwc4w7qv52smfm536hp6mc8w6s53lhyj0mpi22"))))
4db9433a 1504 (build-system python-build-system)
900fb8d0
LF
1505 (arguments
1506 '(;; Tests are not distributed in the PyPi release.
1507 ;; TODO Try building from the Git repo or asking the upstream maintainer
1508 ;; to distribute the tests on PyPi.
1509 #:tests? #f))
4db9433a
RW
1510 (native-inputs
1511 `(("python-sphinx" ,python-sphinx)))
1512 (home-page "https://github.com/benjschiller/twobitreader")
1513 (synopsis "Python library for reading .2bit files")
1514 (description
1515 "twobitreader is a Python library for reading .2bit files as used by the
1516UCSC genome browser.")
1517 (license license:artistic2.0)))
1518
1519(define-public python2-twobitreader
5c31f4aa 1520 (package-with-python2 python-twobitreader))
4db9433a 1521
f94bf198
RW
1522(define-public python-plastid
1523 (package
1524 (name "python-plastid")
99caa6f7 1525 (version "0.4.6")
f94bf198
RW
1526 (source (origin
1527 (method url-fetch)
1528 (uri (pypi-uri "plastid" version))
1529 (sha256
1530 (base32
99caa6f7 1531 "1sqkz5d3b9kf688mp7k771c87ins42j7j0whmkb49cb3fsg8s8lj"))))
f94bf198
RW
1532 (build-system python-build-system)
1533 (arguments
1534 ;; Some test files are not included.
1535 `(#:tests? #f))
1536 (propagated-inputs
1537 `(("python-numpy" ,python-numpy)
1538 ("python-scipy" ,python-scipy)
1539 ("python-pandas" ,python-pandas)
1540 ("python-pysam" ,python-pysam)
1541 ("python-matplotlib" ,python-matplotlib)
1542 ("python-biopython" ,python-biopython)
99caa6f7
BW
1543 ("python-twobitreader" ,python-twobitreader)
1544 ("python-termcolor" ,python-termcolor)))
f94bf198
RW
1545 (native-inputs
1546 `(("python-cython" ,python-cython)
1547 ("python-nose" ,python-nose)))
1548 (home-page "https://github.com/joshuagryphon/plastid")
1549 (synopsis "Python library for genomic analysis")
1550 (description
1551 "plastid is a Python library for genomic analysis – in particular,
1552high-throughput sequencing data – with an emphasis on simplicity.")
1553 (license license:bsd-3)))
1554
1555(define-public python2-plastid
5c31f4aa 1556 (package-with-python2 python-plastid))
f94bf198 1557
6c1305f9
RW
1558(define-public cd-hit
1559 (package
1560 (name "cd-hit")
0c37e2a3 1561 (version "4.6.6")
6c1305f9
RW
1562 (source (origin
1563 (method url-fetch)
1564 (uri (string-append "https://github.com/weizhongli/cdhit"
1565 "/releases/download/V" version
0c37e2a3 1566 "/cd-hit-v" version "-2016-0711.tar.gz"))
6c1305f9
RW
1567 (sha256
1568 (base32
0c37e2a3 1569 "1w8hd4fszgg29nqiz569fldwy012la77nljcmlhglgicws56z54p"))))
6c1305f9
RW
1570 (build-system gnu-build-system)
1571 (arguments
1572 `(#:tests? #f ; there are no tests
1573 #:make-flags
1574 ;; Executables are copied directly to the PREFIX.
1575 (list (string-append "PREFIX=" (assoc-ref %outputs "out") "/bin"))
1576 #:phases
1577 (modify-phases %standard-phases
1578 ;; No "configure" script
1579 (delete 'configure)
1580 ;; Remove sources of non-determinism
1581 (add-after 'unpack 'be-timeless
1582 (lambda _
1583 (substitute* "cdhit-utility.c++"
1584 ((" \\(built on \" __DATE__ \"\\)") ""))
1585 (substitute* "cdhit-common.c++"
1586 (("__DATE__") "\"0\"")
1587 (("\", %s, \" __TIME__ \"\\\\n\", date") ""))
1588 #t))
1589 ;; The "install" target does not create the target directory
1590 (add-before 'install 'create-target-dir
1591 (lambda* (#:key outputs #:allow-other-keys)
1592 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
1593 #t)))))
1594 (inputs
1595 `(("perl" ,perl)))
1596 (home-page "http://weizhongli-lab.org/cd-hit/")
1597 (synopsis "Cluster and compare protein or nucleotide sequences")
1598 (description
1599 "CD-HIT is a program for clustering and comparing protein or nucleotide
1600sequences. CD-HIT is designed to be fast and handle extremely large
1601databases.")
1602 ;; The manual says: "It can be copied under the GNU General Public License
1603 ;; version 2 (GPLv2)."
1604 (license license:gpl2)))
1605
810cff85
RW
1606(define-public clipper
1607 (package
1608 (name "clipper")
433530a5 1609 (version "1.1")
810cff85
RW
1610 (source (origin
1611 (method url-fetch)
1612 (uri (string-append
1613 "https://github.com/YeoLab/clipper/archive/"
1614 version ".tar.gz"))
9ab5ea44 1615 (file-name (string-append name "-" version ".tar.gz"))
810cff85
RW
1616 (sha256
1617 (base32
433530a5 1618 "0pflmsvhbf8izbgwhbhj1i7349sw1f55qpqj8ljmapp16hb0p0qi"))
810cff85
RW
1619 (modules '((guix build utils)))
1620 (snippet
433530a5
RW
1621 '(begin
1622 ;; remove unnecessary setup dependency
1623 (substitute* "setup.py"
1624 (("setup_requires = .*") ""))
1625 (for-each delete-file
1626 '("clipper/src/peaks.so"
1627 "clipper/src/readsToWiggle.so"))
1628 (delete-file-recursively "dist/")
1629 #t))))
810cff85
RW
1630 (build-system python-build-system)
1631 (arguments `(#:python ,python-2)) ; only Python 2 is supported
1632 (inputs
1633 `(("htseq" ,htseq)
1634 ("python-pybedtools" ,python2-pybedtools)
1635 ("python-cython" ,python2-cython)
1636 ("python-scikit-learn" ,python2-scikit-learn)
1637 ("python-matplotlib" ,python2-matplotlib)
433530a5 1638 ("python-pandas" ,python2-pandas)
810cff85
RW
1639 ("python-pysam" ,python2-pysam)
1640 ("python-numpy" ,python2-numpy)
1641 ("python-scipy" ,python2-scipy)))
1642 (native-inputs
f3b98f4f 1643 `(("python-mock" ,python2-mock) ; for tests
d281be18 1644 ("python-nose" ,python2-nose) ; for tests
f3b98f4f 1645 ("python-pytz" ,python2-pytz))) ; for tests
810cff85
RW
1646 (home-page "https://github.com/YeoLab/clipper")
1647 (synopsis "CLIP peak enrichment recognition")
1648 (description
1649 "CLIPper is a tool to define peaks in CLIP-seq datasets.")
1650 (license license:gpl2)))
1651
6a35566d
RS
1652(define-public codingquarry
1653 (package
1654 (name "codingquarry")
1655 (version "2.0")
1656 (source (origin
1657 (method url-fetch)
1658 (uri (string-append
1659 "mirror://sourceforge/codingquarry/CodingQuarry_v"
1660 version ".tar.gz"))
1661 (sha256
1662 (base32
1663 "0115hkjflsnfzn36xppwf9h9avfxlavr43djqmshkkzbgjzsz60i"))))
1664 (build-system gnu-build-system)
1665 (arguments
1666 '(#:tests? #f ; no "check" target
1667 #:phases
1668 (modify-phases %standard-phases
1669 (delete 'configure)
1670 (replace 'install
1671 (lambda* (#:key outputs #:allow-other-keys)
1672 (let* ((out (assoc-ref outputs "out"))
1673 (bin (string-append out "/bin"))
1674 (doc (string-append out "/share/doc/codingquarry")))
1675 (install-file "INSTRUCTIONS.pdf" doc)
1676 (copy-recursively "QuarryFiles"
1677 (string-append out "/QuarryFiles"))
1678 (install-file "CodingQuarry" bin)
1679 (install-file "CufflinksGTF_to_CodingQuarryGFF3.py" bin)))))))
1680 (inputs `(("openmpi" ,openmpi)))
1681 (native-search-paths
1682 (list (search-path-specification
1683 (variable "QUARRY_PATH")
1684 (files '("QuarryFiles")))))
1685 (native-inputs `(("python" ,python-2))) ; Only Python 2 is supported
1686 (synopsis "Fungal gene predictor")
1687 (description "CodingQuarry is a highly accurate, self-training GHMM fungal
1688gene predictor designed to work with assembled, aligned RNA-seq transcripts.")
1689 (home-page "https://sourceforge.net/projects/codingquarry/")
1690 (license license:gpl3+)))
1691
36742f43
RW
1692(define-public couger
1693 (package
1694 (name "couger")
1695 (version "1.8.2")
1696 (source (origin
1697 (method url-fetch)
1698 (uri (string-append
1699 "http://couger.oit.duke.edu/static/assets/COUGER"
1700 version ".zip"))
1701 (sha256
1702 (base32
1703 "04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq"))))
1704 (build-system gnu-build-system)
1705 (arguments
1706 `(#:tests? #f
1707 #:phases
1708 (modify-phases %standard-phases
1709 (delete 'configure)
1710 (delete 'build)
1711 (replace
1712 'install
1713 (lambda* (#:key outputs #:allow-other-keys)
f3860753
TGR
1714 (let* ((out (assoc-ref outputs "out"))
1715 (bin (string-append out "/bin")))
36742f43 1716 (copy-recursively "src" (string-append out "/src"))
f3860753 1717 (mkdir bin)
36742f43
RW
1718 ;; Add "src" directory to module lookup path.
1719 (substitute* "couger"
1720 (("from argparse")
1721 (string-append "import sys\nsys.path.append(\""
1722 out "\")\nfrom argparse")))
f3860753 1723 (install-file "couger" bin))
36742f43
RW
1724 #t))
1725 (add-after
1726 'install 'wrap-program
1727 (lambda* (#:key inputs outputs #:allow-other-keys)
1728 ;; Make sure 'couger' runs with the correct PYTHONPATH.
1729 (let* ((out (assoc-ref outputs "out"))
1730 (path (getenv "PYTHONPATH")))
1731 (wrap-program (string-append out "/bin/couger")
1732 `("PYTHONPATH" ":" prefix (,path))))
1733 #t)))))
1734 (inputs
1735 `(("python" ,python-2)
1736 ("python2-pillow" ,python2-pillow)
1737 ("python2-numpy" ,python2-numpy)
1738 ("python2-scipy" ,python2-scipy)
1739 ("python2-matplotlib" ,python2-matplotlib)))
1740 (propagated-inputs
2d7c4ae3 1741 `(("r-minimal" ,r-minimal)
36742f43
RW
1742 ("libsvm" ,libsvm)
1743 ("randomjungle" ,randomjungle)))
1744 (native-inputs
1745 `(("unzip" ,unzip)))
1746 (home-page "http://couger.oit.duke.edu")
1747 (synopsis "Identify co-factors in sets of genomic regions")
1748 (description
1749 "COUGER can be applied to any two sets of genomic regions bound by
1750paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify
1751putative co-factors that provide specificity to each TF. The framework
1752determines the genomic targets uniquely-bound by each TF, and identifies a
1753small set of co-factors that best explain the in vivo binding differences
1754between the two TFs.
1755
1756COUGER uses classification algorithms (support vector machines and random
1757forests) with features that reflect the DNA binding specificities of putative
1758co-factors. The features are generated either from high-throughput TF-DNA
1759binding data (from protein binding microarray experiments), or from large
1760collections of DNA motifs.")
1761 (license license:gpl3+)))
1762
bfe3c685
RW
1763(define-public clustal-omega
1764 (package
1765 (name "clustal-omega")
1766 (version "1.2.1")
1767 (source (origin
1768 (method url-fetch)
1769 (uri (string-append
1770 "http://www.clustal.org/omega/clustal-omega-"
1771 version ".tar.gz"))
1772 (sha256
1773 (base32
1774 "02ibkx0m0iwz8nscg998bh41gg251y56cgh86bvyrii5m8kjgwqf"))))
1775 (build-system gnu-build-system)
1776 (inputs
1777 `(("argtable" ,argtable)))
1778 (home-page "http://www.clustal.org/omega/")
1779 (synopsis "Multiple sequence aligner for protein and DNA/RNA")
1780 (description
1781 "Clustal-Omega is a general purpose multiple sequence alignment (MSA)
1782program for protein and DNA/RNA. It produces high quality MSAs and is capable
1783of handling data-sets of hundreds of thousands of sequences in reasonable
1784time.")
1785 (license license:gpl2+)))
1786
191c7101
RW
1787(define-public crossmap
1788 (package
1789 (name "crossmap")
61d5fd03 1790 (version "0.2.1")
191c7101
RW
1791 (source (origin
1792 (method url-fetch)
1793 (uri (string-append "mirror://sourceforge/crossmap/CrossMap-"
1794 version ".tar.gz"))
1795 (sha256
1796 (base32
61d5fd03
RW
1797 "07y179f63d7qnzdvkqcziwk9bs3k4zhp81q392fp1hwszjdvy22f"))
1798 ;; This patch has been sent upstream already and is available
1799 ;; for download from Sourceforge, but it has not been merged.
fc1adab1 1800 (patches (search-patches "crossmap-allow-system-pysam.patch"))
191c7101
RW
1801 (modules '((guix build utils)))
1802 ;; remove bundled copy of pysam
1803 (snippet
1804 '(delete-file-recursively "lib/pysam"))))
1805 (build-system python-build-system)
1806 (arguments
1807 `(#:python ,python-2
1808 #:phases
1809 (alist-cons-after
1810 'unpack 'set-env
1811 (lambda _ (setenv "CROSSMAP_USE_SYSTEM_PYSAM" "1"))
1812 %standard-phases)))
1813 (inputs
1814 `(("python-numpy" ,python2-numpy)
1815 ("python-pysam" ,python2-pysam)
1816 ("zlib" ,zlib)))
1817 (native-inputs
1818 `(("python-cython" ,python2-cython)
f3b98f4f 1819 ("python-nose" ,python2-nose)))
191c7101
RW
1820 (home-page "http://crossmap.sourceforge.net/")
1821 (synopsis "Convert genome coordinates between assemblies")
1822 (description
1823 "CrossMap is a program for conversion of genome coordinates or annotation
1824files between different genome assemblies. It supports most commonly used
1825file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
1826 (license license:gpl2+)))
1827
3a40a92c
RW
1828(define-public cufflinks
1829 (package
1830 (name "cufflinks")
1831 (version "2.2.1")
1832 (source (origin
1833 (method url-fetch)
1834 (uri (string-append "http://cole-trapnell-lab.github.io/"
1835 "cufflinks/assets/downloads/cufflinks-"
1836 version ".tar.gz"))
1837 (sha256
1838 (base32
1839 "1bnm10p8m7zq4qiipjhjqb24csiqdm1pwc8c795z253r2xk6ncg8"))))
1840 (build-system gnu-build-system)
1841 (arguments
1842 `(#:make-flags
1843 (list
1844 ;; The includes for "eigen" are located in a subdirectory.
1845 (string-append "EIGEN_CPPFLAGS="
1846 "-I" (assoc-ref %build-inputs "eigen")
1847 "/include/eigen3/")
1848 ;; Cufflinks must be linked with various boost libraries.
1849 (string-append "LDFLAGS="
1850 (string-join '("-lboost_system"
1851 "-lboost_serialization"
1852 "-lboost_thread"))))
1853 #:phases
1854 (modify-phases %standard-phases
1855 (add-after 'unpack 'fix-search-for-bam
1856 (lambda _
1857 (substitute* '("ax_bam.m4"
1858 "configure"
1859 "src/hits.h")
1860 (("<bam/sam\\.h>") "<samtools/sam.h>")
1861 (("<bam/bam\\.h>") "<samtools/bam.h>")
1862 (("<bam/version\\.hpp>") "<samtools/version.h>"))
1863 #t)))
1864 #:configure-flags
1865 (list (string-append "--with-bam="
1866 (assoc-ref %build-inputs "samtools")))))
1867 (inputs
1868 `(("eigen" ,eigen)
1869 ("samtools" ,samtools-0.1)
1870 ("htslib" ,htslib)
1871 ("boost" ,boost)
1872 ("python" ,python-2)
1873 ("zlib" ,zlib)))
1874 (home-page "http://cole-trapnell-lab.github.io/cufflinks/")
1875 (synopsis "Transcriptome assembly and RNA-Seq expression analysis")
1876 (description
1877 "Cufflinks assembles RNA transcripts, estimates their abundances,
1878and tests for differential expression and regulation in RNA-Seq
1879samples. It accepts aligned RNA-Seq reads and assembles the
1880alignments into a parsimonious set of transcripts. Cufflinks then
1881estimates the relative abundances of these transcripts based on how
1882many reads support each one, taking into account biases in library
1883preparation protocols.")
1884 (license license:boost1.0)))
1885
8e913213
RW
1886(define-public cutadapt
1887 (package
1888 (name "cutadapt")
1f94bff2 1889 (version "1.12")
8e913213
RW
1890 (source (origin
1891 (method url-fetch)
1892 (uri (string-append
1893 "https://github.com/marcelm/cutadapt/archive/v"
1894 version ".tar.gz"))
1895 (file-name (string-append name "-" version ".tar.gz"))
1896 (sha256
1897 (base32
1f94bff2 1898 "19smhh6444ikn4jlmyhvffw4m5aw7yg07rqsk7arg8dkwyga1i4v"))))
8e913213
RW
1899 (build-system python-build-system)
1900 (arguments
33d5b246
TGR
1901 `(#:phases
1902 (modify-phases %standard-phases
1903 ;; The tests must be run after installation.
1904 (delete 'check)
1905 (add-after 'install 'check
1906 (lambda* (#:key inputs outputs #:allow-other-keys)
1907 (setenv "PYTHONPATH"
1908 (string-append
1909 (getenv "PYTHONPATH")
1910 ":" (assoc-ref outputs "out")
1911 "/lib/python"
1912 (string-take (string-take-right
1913 (assoc-ref inputs "python") 5) 3)
1914 "/site-packages"))
1915 (zero? (system* "nosetests" "-P" "tests")))))))
1f94bff2
TGR
1916 (inputs
1917 `(("python-xopen" ,python-xopen)))
8e913213
RW
1918 (native-inputs
1919 `(("python-cython" ,python-cython)
f3b98f4f 1920 ("python-nose" ,python-nose)))
0c6c9c00 1921 (home-page "https://cutadapt.readthedocs.io/en/stable/")
8e913213
RW
1922 (synopsis "Remove adapter sequences from nucleotide sequencing reads")
1923 (description
1924 "Cutadapt finds and removes adapter sequences, primers, poly-A tails and
1925other types of unwanted sequence from high-throughput sequencing reads.")
1926 (license license:expat)))
1927
1baee943
RW
1928(define-public libbigwig
1929 (package
1930 (name "libbigwig")
1931 (version "0.1.4")
1932 (source (origin
1933 (method url-fetch)
1934 (uri (string-append "https://github.com/dpryan79/libBigWig/"
1935 "archive/" version ".tar.gz"))
1936 (file-name (string-append name "-" version ".tar.gz"))
1937 (sha256
1938 (base32
1939 "098rjh35pi4a9q83n8wiwvyzykjqj6l8q189p1xgfw4ghywdlvw1"))))
1940 (build-system gnu-build-system)
1941 (arguments
1942 `(#:test-target "test"
1943 #:make-flags
1944 (list "CC=gcc"
1945 (string-append "prefix=" (assoc-ref %outputs "out")))
1946 #:phases
1947 (modify-phases %standard-phases
1948 (delete 'configure)
1949 (add-before 'check 'disable-curl-test
1950 (lambda _
1951 (substitute* "Makefile"
1952 (("./test/testRemote.*") ""))
1953 #t))
1954 ;; This has been fixed with the upstream commit 4ff6959cd8a0, but
1955 ;; there has not yet been a release containing this change.
1956 (add-before 'install 'create-target-dirs
1957 (lambda* (#:key outputs #:allow-other-keys)
1958 (let ((out (assoc-ref outputs "out")))
1959 (mkdir-p (string-append out "/lib"))
1960 (mkdir-p (string-append out "/include"))
1961 #t))))))
1962 (inputs
1963 `(("zlib" ,zlib)
1964 ("curl" ,curl)))
1965 (native-inputs
1966 `(("doxygen" ,doxygen)))
1967 (home-page "https://github.com/dpryan79/libBigWig")
1968 (synopsis "C library for handling bigWig files")
1969 (description
1970 "This package provides a C library for parsing local and remote BigWig
1971files.")
1972 (license license:expat)))
1973
69e0e03c
RW
1974(define-public python-pybigwig
1975 (package
1976 (name "python-pybigwig")
1977 (version "0.2.5")
1978 (source (origin
1979 (method url-fetch)
1980 (uri (pypi-uri "pyBigWig" version))
1981 (sha256
1982 (base32
1983 "0yrpdxg3y0sny25x4w22lv1k47jzccqjmg7j4bp0hywklvp0hg7d"))
1984 (modules '((guix build utils)))
1985 (snippet
1986 '(begin
1987 ;; Delete bundled libBigWig sources
1988 (delete-file-recursively "libBigWig")))))
1989 (build-system python-build-system)
1990 (arguments
1991 `(#:phases
1992 (modify-phases %standard-phases
1993 (add-after 'unpack 'link-with-libBigWig
1994 (lambda* (#:key inputs #:allow-other-keys)
1995 (substitute* "setup.py"
1996 (("libs=\\[") "libs=[\"BigWig\", "))
1997 #t)))))
1998 (inputs
1999 `(("libbigwig" ,libbigwig)
2000 ("zlib" ,zlib)
2001 ("curl" ,curl)))
2002 (home-page "https://github.com/dpryan79/pyBigWig")
2003 (synopsis "Access bigWig files in Python using libBigWig")
2004 (description
2005 "This package provides Python bindings to the libBigWig library for
2006accessing bigWig files.")
2007 (license license:expat)))
2008
2009(define-public python2-pybigwig
5c31f4aa 2010 (package-with-python2 python-pybigwig))
69e0e03c 2011
ec2a67de
BW
2012(define-public python-dendropy
2013 (package
2014 (name "python-dendropy")
25d84d31 2015 (version "4.2.0")
ec2a67de
BW
2016 (source
2017 (origin
2018 (method url-fetch)
2019 (uri (pypi-uri "DendroPy" version))
2020 (sha256
2021 (base32
1885bb0c
RW
2022 "15c7s3d5gf19ljsxvq5advaa752wfi7pwrdjyhzmg85hccyvp47p"))
2023 (patches (search-patches "python-dendropy-fix-tests.patch"))))
ec2a67de
BW
2024 (build-system python-build-system)
2025 (home-page "http://packages.python.org/DendroPy/")
2026 (synopsis "Library for phylogenetics and phylogenetic computing")
2027 (description
2028 "DendroPy is a library for phylogenetics and phylogenetic computing: reading,
2029writing, simulation, processing and manipulation of phylogenetic
2030trees (phylogenies) and characters.")
2031 (license license:bsd-3)
2032 (properties `((python2-variant . ,(delay python2-dendropy))))))
2033
2034(define-public python2-dendropy
2035 (let ((base (package-with-python2 (strip-python2-variant python-dendropy))))
2036 (package
2037 (inherit base)
9602e3cc
BW
2038 (arguments
2039 `(#:python ,python-2
2040 #:phases
2041 (modify-phases %standard-phases
2042 (replace 'check
2043 ;; There is currently a test failure that only happens on some
2044 ;; systems, and only using "setup.py test"
2045 (lambda _ (zero? (system* "nosetests")))))))
f3b98f4f 2046 (native-inputs `(("python2-nose" ,python2-nose)
ec2a67de
BW
2047 ,@(package-native-inputs base))))))
2048
2049
1921b1de
RW
2050(define-public deeptools
2051 (package
2052 (name "deeptools")
3acb8c85 2053 (version "2.1.1")
1921b1de
RW
2054 (source (origin
2055 (method url-fetch)
3acb8c85
RW
2056 (uri (string-append "https://github.com/fidelram/deepTools/"
2057 "archive/" version ".tar.gz"))
1921b1de
RW
2058 (file-name (string-append name "-" version ".tar.gz"))
2059 (sha256
2060 (base32
3acb8c85 2061 "1nmfin0zjdby3vay3r4flvz94dr6qjhj41ax4yz3vx13j6wz8izd"))))
1921b1de
RW
2062 (build-system python-build-system)
2063 (arguments
2064 `(#:python ,python-2))
14bda1ff 2065 (inputs
1921b1de
RW
2066 `(("python-scipy" ,python2-scipy)
2067 ("python-numpy" ,python2-numpy)
3acb8c85 2068 ("python-numpydoc" ,python2-numpydoc)
1921b1de
RW
2069 ("python-matplotlib" ,python2-matplotlib)
2070 ("python-bx-python" ,python2-bx-python)
3acb8c85
RW
2071 ("python-pysam" ,python2-pysam)
2072 ("python-pybigwig" ,python2-pybigwig)))
1921b1de 2073 (native-inputs
f3b98f4f 2074 `(("python-mock" ,python2-mock) ;for tests
aaffb9c9 2075 ("python-nose" ,python2-nose) ;for tests
f3b98f4f 2076 ("python-pytz" ,python2-pytz))) ;for tests
1921b1de
RW
2077 (home-page "https://github.com/fidelram/deepTools")
2078 (synopsis "Tools for normalizing and visualizing deep-sequencing data")
2079 (description
2080 "DeepTools addresses the challenge of handling the large amounts of data
2081that are now routinely generated from DNA sequencing centers. To do so,
2082deepTools contains useful modules to process the mapped reads data to create
2083coverage files in standard bedGraph and bigWig file formats. By doing so,
2084deepTools allows the creation of normalized coverage files or the comparison
2085between two files (for example, treatment and control). Finally, using such
2086normalized and standardized files, multiple visualizations can be created to
2087identify enrichments with functional annotations of the genome.")
2088 (license license:gpl3+)))
2089
684bf7c7
BW
2090(define-public diamond
2091 (package
2092 (name "diamond")
37199768 2093 (version "0.8.38")
684bf7c7
BW
2094 (source (origin
2095 (method url-fetch)
2096 (uri (string-append
2097 "https://github.com/bbuchfink/diamond/archive/v"
2098 version ".tar.gz"))
2099 (file-name (string-append name "-" version ".tar.gz"))
2100 (sha256
2101 (base32
37199768 2102 "0q2z6z5f7c0kbbzpjamkcyqg0rc6h5rxfp97qbmb0wxaycr7jajq"))))
122395f9 2103 (build-system cmake-build-system)
684bf7c7 2104 (arguments
7c544991
BW
2105 '(#:tests? #f ; no "check" target
2106 #:phases
2107 (modify-phases %standard-phases
2108 (add-after 'unpack 'remove-native-compilation
2109 (lambda _
2110 (substitute* "CMakeLists.txt" (("-march=native") ""))
2111 #t)))))
684bf7c7 2112 (inputs
122395f9 2113 `(("zlib" ,zlib)))
684bf7c7
BW
2114 (home-page "https://github.com/bbuchfink/diamond")
2115 (synopsis "Accelerated BLAST compatible local sequence aligner")
2116 (description
2117 "DIAMOND is a BLAST-compatible local aligner for mapping protein and
2118translated DNA query sequences against a protein reference database (BLASTP
2119and BLASTX alignment mode). The speedup over BLAST is up to 20,000 on short
2120reads at a typical sensitivity of 90-99% relative to BLAST depending on the
2121data and settings.")
2122 (license (license:non-copyleft "file://src/COPYING"
2123 "See src/COPYING in the distribution."))))
2124
97b9da68
RW
2125(define-public discrover
2126 (package
2127 (name "discrover")
2128 (version "1.6.0")
2129 (source
2130 (origin
2131 (method url-fetch)
2132 (uri (string-append "https://github.com/maaskola/discrover/archive/"
2133 version ".tar.gz"))
2134 (file-name (string-append name "-" version ".tar.gz"))
2135 (sha256
2136 (base32
2137 "0rah9ja4m0rl5mldd6vag9rwrivw1zrqxssfq8qx64m7961fp68k"))))
2138 (build-system cmake-build-system)
fa702e1a
RW
2139 (arguments
2140 `(#:tests? #f ; there are no tests
2141 #:phases
2142 (modify-phases %standard-phases
2143 (add-after 'unpack 'add-missing-includes
2144 (lambda _
2145 (substitute* "src/executioninformation.hpp"
2146 (("#define EXECUTIONINFORMATION_HPP" line)
2147 (string-append line "\n#include <random>")))
2148 (substitute* "src/plasma/fasta.hpp"
2149 (("#define FASTA_HPP" line)
2150 (string-append line "\n#include <random>")))
2151 #t)))))
97b9da68
RW
2152 (inputs
2153 `(("boost" ,boost)
2154 ("cairo" ,cairo)))
2155 (native-inputs
2156 `(("texlive" ,texlive)
2157 ("imagemagick" ,imagemagick)))
2158 (home-page "http://dorina.mdc-berlin.de/public/rajewsky/discrover/")
2159 (synopsis "Discover discriminative nucleotide sequence motifs")
2160 (description "Discrover is a motif discovery method to find binding sites
2161of nucleic acid binding proteins.")
2162 (license license:gpl3+)))
2163
6619f9c7
RW
2164(define-public eigensoft
2165 (let ((revision "1")
2166 (commit "b14d1e202e21e532536ff8004f0419cd5e259dc7"))
2167 (package
2168 (name "eigensoft")
2169 (version (string-append "6.1.2-"
2170 revision "."
2171 (string-take commit 9)))
2172 (source
2173 (origin
2174 (method git-fetch)
2175 (uri (git-reference
2176 (url "https://github.com/DReichLab/EIG.git")
2177 (commit commit)))
2178 (file-name (string-append "eigensoft-" commit "-checkout"))
2179 (sha256
2180 (base32
2181 "0f5m6k2j5c16xc3xbywcs989xyc26ncy1zfzp9j9n55n9r4xcaiq"))
2182 (modules '((guix build utils)))
2183 ;; Remove pre-built binaries.
2184 (snippet '(begin
2185 (delete-file-recursively "bin")
2186 (mkdir "bin")
2187 #t))))
2188 (build-system gnu-build-system)
2189 (arguments
2190 `(#:tests? #f ; There are no tests.
2191 #:make-flags '("CC=gcc")
2192 #:phases
2193 (modify-phases %standard-phases
2194 ;; There is no configure phase, but the Makefile is in a
2195 ;; sub-directory.
2196 (replace 'configure
2197 (lambda _
2198 (chdir "src")
2199 ;; The link flags are incomplete.
2200 (substitute* "Makefile"
2201 (("-lgsl") "-lgsl -lm -llapack -llapacke -lpthread"))
2202 #t))
2203 ;; The provided install target only copies executables to
2204 ;; the "bin" directory in the build root.
2205 (add-after 'install 'actually-install
2206 (lambda* (#:key outputs #:allow-other-keys)
2207 (let* ((out (assoc-ref outputs "out"))
2208 (bin (string-append out "/bin")))
6619f9c7
RW
2209 (for-each (lambda (file)
2210 (install-file file bin))
2211 (find-files "../bin" ".*"))
2212 #t))))))
2213 (inputs
2214 `(("gsl" ,gsl)
2215 ("lapack" ,lapack)
6619f9c7
RW
2216 ("openblas" ,openblas)
2217 ("perl" ,perl)
2218 ("gfortran" ,gfortran "lib")))
2219 (home-page "https://github.com/DReichLab/EIG")
2220 (synopsis "Tools for population genetics")
2221 (description "The EIGENSOFT package provides tools for population
2222genetics and stratification correction. EIGENSOFT implements methods commonly
2223used in population genetics analyses such as PCA, computation of Tracy-Widom
2224statistics, and finding related individuals in structured populations. It
2225comes with a built-in plotting script and supports multiple file formats and
2226quantitative phenotypes.")
2227 ;; The license of the eigensoft tools is Expat, but since it's
2228 ;; linking with the GNU Scientific Library (GSL) the effective
2229 ;; license is the GPL.
2230 (license license:gpl3+))))
2231
365c8153
RW
2232(define-public edirect
2233 (package
2234 (name "edirect")
83b84fa8 2235 (version "4.10")
365c8153
RW
2236 (source (origin
2237 (method url-fetch)
83b84fa8
RW
2238 (uri (string-append "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/"
2239 "versions/2016-05-03/edirect.tar.gz"))
365c8153
RW
2240 (sha256
2241 (base32
83b84fa8 2242 "15zsprak5yh8c1yrz4r1knmb5s8qcmdid4xdhkh3lqcv64l60hli"))))
365c8153
RW
2243 (build-system perl-build-system)
2244 (arguments
2245 `(#:tests? #f ;no "check" target
2246 #:phases
2247 (modify-phases %standard-phases
2248 (delete 'configure)
2249 (delete 'build)
2250 (replace 'install
2251 (lambda* (#:key outputs #:allow-other-keys)
2252 (let ((target (string-append (assoc-ref outputs "out")
2253 "/bin")))
2254 (mkdir-p target)
f3860753 2255 (install-file "edirect.pl" target)
365c8153
RW
2256 #t)))
2257 (add-after
2258 'install 'wrap-program
2259 (lambda* (#:key inputs outputs #:allow-other-keys)
2260 ;; Make sure 'edirect.pl' finds all perl inputs at runtime.
2261 (let* ((out (assoc-ref outputs "out"))
2262 (path (getenv "PERL5LIB")))
2263 (wrap-program (string-append out "/bin/edirect.pl")
2264 `("PERL5LIB" ":" prefix (,path)))))))))
2265 (inputs
2266 `(("perl-html-parser" ,perl-html-parser)
2267 ("perl-encode-locale" ,perl-encode-locale)
2268 ("perl-file-listing" ,perl-file-listing)
2269 ("perl-html-tagset" ,perl-html-tagset)
2270 ("perl-html-tree" ,perl-html-tree)
2271 ("perl-http-cookies" ,perl-http-cookies)
2272 ("perl-http-date" ,perl-http-date)
2273 ("perl-http-message" ,perl-http-message)
2274 ("perl-http-negotiate" ,perl-http-negotiate)
2275 ("perl-lwp-mediatypes" ,perl-lwp-mediatypes)
2276 ("perl-lwp-protocol-https" ,perl-lwp-protocol-https)
2277 ("perl-net-http" ,perl-net-http)
2278 ("perl-uri" ,perl-uri)
2279 ("perl-www-robotrules" ,perl-www-robotrules)
2280 ("perl" ,perl)))
3d51ec91 2281 (home-page "http://www.ncbi.nlm.nih.gov/books/NBK179288/")
365c8153
RW
2282 (synopsis "Tools for accessing the NCBI's set of databases")
2283 (description
2284 "Entrez Direct (EDirect) is a method for accessing the National Center
2285for Biotechnology Information's (NCBI) set of interconnected
2286databases (publication, sequence, structure, gene, variation, expression,
2287etc.) from a terminal. Functions take search terms from command-line
2288arguments. Individual operations are combined to build multi-step queries.
2289Record retrieval and formatting normally complete the process.
2290
2291EDirect also provides an argument-driven function that simplifies the
2292extraction of data from document summaries or other results that are returned
2293in structured XML format. This can eliminate the need for writing custom
2294software to answer ad hoc questions.")
2295 (license license:public-domain)))
2296
b16728b0
BW
2297(define-public exonerate
2298 (package
2299 (name "exonerate")
2300 (version "2.4.0")
2301 (source
2302 (origin
2303 (method url-fetch)
2304 (uri
2305 (string-append
2306 "http://ftp.ebi.ac.uk/pub/software/vertebrategenomics/exonerate/"
2307 "exonerate-" version ".tar.gz"))
2308 (sha256
2309 (base32
2310 "0hj0m9xygiqsdxvbg79wq579kbrx1mdrabi2bzqz2zn9qwfjcjgq"))))
2311 (build-system gnu-build-system)
2312 (arguments
2313 `(#:parallel-build? #f)) ; Building in parallel fails on some machines.
2314 (native-inputs
2315 `(("pkg-config" ,pkg-config)))
2316 (inputs
2317 `(("glib" ,glib)))
2318 (home-page
2319 "https://www.ebi.ac.uk/about/vertebrate-genomics/software/exonerate")
2320 (synopsis "Generic tool for biological sequence alignment")
2321 (description
2322 "Exonerate is a generic tool for pairwise sequence comparison. It allows
2323the alignment of sequences using a many alignment models, either exhaustive
2324dynamic programming or a variety of heuristics.")
2325 (license license:gpl3)))
2326
e4e5a4d8
RW
2327(define-public express
2328 (package
2329 (name "express")
2330 (version "1.5.1")
2331 (source (origin
2332 (method url-fetch)
2333 (uri
2334 (string-append
2335 "http://bio.math.berkeley.edu/eXpress/downloads/express-"
2336 version "/express-" version "-src.tgz"))
2337 (sha256
2338 (base32
2339 "03rczxd0gjp2l1jxcmjfmf5j94j77zqyxa6x063zsc585nj40n0c"))))
2340 (build-system cmake-build-system)
2341 (arguments
2342 `(#:tests? #f ;no "check" target
2343 #:phases
2344 (alist-cons-after
2345 'unpack 'use-shared-boost-libs-and-set-bamtools-paths
2346 (lambda* (#:key inputs #:allow-other-keys)
2347 (substitute* "CMakeLists.txt"
2348 (("set\\(Boost_USE_STATIC_LIBS ON\\)")
2349 "set(Boost_USE_STATIC_LIBS OFF)")
2350 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
2351 (string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
2352 (substitute* "src/CMakeLists.txt"
2353 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
2354 (string-append (assoc-ref inputs "bamtools") "/lib/bamtools")))
2355 #t)
2356 %standard-phases)))
2357 (inputs
2358 `(("boost" ,boost)
2359 ("bamtools" ,bamtools)
2360 ("protobuf" ,protobuf)
2361 ("zlib" ,zlib)))
2362 (home-page "http://bio.math.berkeley.edu/eXpress")
2363 (synopsis "Streaming quantification for high-throughput genomic sequencing")
2364 (description
2365 "eXpress is a streaming tool for quantifying the abundances of a set of
2366target sequences from sampled subsequences. Example applications include
2367transcript-level RNA-Seq quantification, allele-specific/haplotype expression
2368analysis (from RNA-Seq), transcription factor binding quantification in
2369ChIP-Seq, and analysis of metagenomic data.")
2370 (license license:artistic2.0)))
2371
f3674b1c
BW
2372(define-public express-beta-diversity
2373 (package
2374 (name "express-beta-diversity")
2375 (version "1.0.7")
2376 (source (origin
2377 (method url-fetch)
2378 (uri
2379 (string-append
2380 "https://github.com/dparks1134/ExpressBetaDiversity/archive/v"
2381 version ".tar.gz"))
2382 (file-name (string-append name "-" version ".tar.gz"))
2383 (sha256
2384 (base32
2385 "1djvdlmqvjf6h0zq7w36y8cl5cli6rgj86x65znl48agnwmzxfxr"))))
2386 (build-system gnu-build-system)
2387 (arguments
2388 `(#:phases
2389 (modify-phases %standard-phases
2390 (delete 'configure)
2391 (add-before 'build 'enter-source (lambda _ (chdir "source") #t))
2392 (replace 'check
2393 (lambda _ (zero? (system* "../bin/ExpressBetaDiversity"
2394 "-u"))))
2395 (add-after 'check 'exit-source (lambda _ (chdir "..") #t))
2396 (replace 'install
2397 (lambda* (#:key outputs #:allow-other-keys)
2398 (let ((bin (string-append (assoc-ref outputs "out")
2399 "/bin")))
2400 (mkdir-p bin)
f3860753
TGR
2401 (install-file "scripts/convertToEBD.py" bin)
2402 (install-file "bin/ExpressBetaDiversity" bin)
f3674b1c
BW
2403 #t))))))
2404 (inputs
2405 `(("python" ,python-2)))
2406 (home-page "http://kiwi.cs.dal.ca/Software/ExpressBetaDiversity")
2407 (synopsis "Taxon- and phylogenetic-based beta diversity measures")
2408 (description
2409 "Express Beta Diversity (EBD) calculates ecological beta diversity
2410(dissimilarity) measures between biological communities. EBD implements a
2411variety of diversity measures including those that make use of phylogenetic
2412similarity of community members.")
2413 (license license:gpl3+)))
2414
12b04cbe
BW
2415(define-public fasttree
2416 (package
2417 (name "fasttree")
e03a5153 2418 (version "2.1.9")
12b04cbe
BW
2419 (source (origin
2420 (method url-fetch)
2421 (uri (string-append
2422 "http://www.microbesonline.org/fasttree/FastTree-"
2423 version ".c"))
2424 (sha256
2425 (base32
e03a5153 2426 "0ljvvw8i1als1wbfzvrf15c3ii2vw9db20a259g6pzg34xyyb97k"))))
12b04cbe
BW
2427 (build-system gnu-build-system)
2428 (arguments
2429 `(#:tests? #f ; no "check" target
2430 #:phases
2431 (modify-phases %standard-phases
2432 (delete 'unpack)
2433 (delete 'configure)
2434 (replace 'build
e03a5153
BW
2435 (lambda* (#:key source #:allow-other-keys)
2436 (and (zero? (system* "gcc"
2437 "-O3"
2438 "-finline-functions"
2439 "-funroll-loops"
2440 "-Wall"
2441 "-o"
2442 "FastTree"
2443 source
2444 "-lm"))
2445 (zero? (system* "gcc"
2446 "-DOPENMP"
2447 "-fopenmp"
2448 "-O3"
2449 "-finline-functions"
2450 "-funroll-loops"
2451 "-Wall"
2452 "-o"
2453 "FastTreeMP"
2454 source
2455 "-lm")))))
12b04cbe 2456 (replace 'install
e03a5153
BW
2457 (lambda* (#:key outputs #:allow-other-keys)
2458 (let ((bin (string-append (assoc-ref outputs "out")
2459 "/bin")))
2460 (mkdir-p bin)
f3860753
TGR
2461 (install-file "FastTree" bin)
2462 (install-file "FastTreeMP" bin)
e03a5153 2463 #t))))))
12b04cbe
BW
2464 (home-page "http://www.microbesonline.org/fasttree")
2465 (synopsis "Infers approximately-maximum-likelihood phylogenetic trees")
2466 (description
2467 "FastTree can handle alignments with up to a million of sequences in a
2468reasonable amount of time and memory. For large alignments, FastTree is
2469100-1,000 times faster than PhyML 3.0 or RAxML 7.")
2470 (license license:gpl2+)))
2471
2127cedb
RW
2472(define-public fastx-toolkit
2473 (package
2474 (name "fastx-toolkit")
2475 (version "0.0.14")
2476 (source (origin
2477 (method url-fetch)
2478 (uri
2479 (string-append
2480 "https://github.com/agordon/fastx_toolkit/releases/download/"
2481 version "/fastx_toolkit-" version ".tar.bz2"))
2482 (sha256
2483 (base32
2484 "01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy"))))
2485 (build-system gnu-build-system)
2486 (inputs
2487 `(("libgtextutils" ,libgtextutils)))
2488 (native-inputs
2489 `(("pkg-config" ,pkg-config)))
2490 (home-page "http://hannonlab.cshl.edu/fastx_toolkit/")
2491 (synopsis "Tools for FASTA/FASTQ file preprocessing")
2492 (description
2493 "The FASTX-Toolkit is a collection of command line tools for Short-Reads
2494FASTA/FASTQ files preprocessing.
2495
2496Next-Generation sequencing machines usually produce FASTA or FASTQ files,
2497containing multiple short-reads sequences. The main processing of such
2498FASTA/FASTQ files is mapping the sequences to reference genomes. However, it
2499is sometimes more productive to preprocess the files before mapping the
2500sequences to the genome---manipulating the sequences to produce better mapping
2501results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
2502 (license license:agpl3+)))
2503
d7678942
RW
2504(define-public flexbar
2505 (package
2506 (name "flexbar")
2507 (version "2.5")
2508 (source (origin
2509 (method url-fetch)
2510 (uri
2511 (string-append "mirror://sourceforge/flexbar/"
2512 version "/flexbar_v" version "_src.tgz"))
2513 (sha256
2514 (base32
2515 "13jaykc3y1x8y5nn9j8ljnb79s5y51kyxz46hdmvvjj6qhyympmf"))))
2516 (build-system cmake-build-system)
2517 (arguments
4ca009c0 2518 `(#:configure-flags (list
d7678942
RW
2519 (string-append "-DFLEXBAR_BINARY_DIR="
2520 (assoc-ref %outputs "out")
2521 "/bin/"))
2522 #:phases
4ca009c0
RW
2523 (alist-replace
2524 'check
2525 (lambda* (#:key outputs #:allow-other-keys)
2526 (setenv "PATH" (string-append
2527 (assoc-ref outputs "out") "/bin:"
2528 (getenv "PATH")))
2529 (chdir "../flexbar_v2.5_src/test")
2530 (zero? (system* "bash" "flexbar_validate.sh")))
2531 (alist-delete 'install %standard-phases))))
d7678942
RW
2532 (inputs
2533 `(("tbb" ,tbb)
2534 ("zlib" ,zlib)))
2535 (native-inputs
2536 `(("pkg-config" ,pkg-config)
2537 ("seqan" ,seqan)))
2538 (home-page "http://flexbar.sourceforge.net")
2539 (synopsis "Barcode and adapter removal tool for sequencing platforms")
2540 (description
2541 "Flexbar preprocesses high-throughput nucleotide sequencing data
2542efficiently. It demultiplexes barcoded runs and removes adapter sequences.
2543Moreover, trimming and filtering features are provided. Flexbar increases
2544read mapping rates and improves genome and transcriptome assemblies. It
2545supports next-generation sequencing data in fasta/q and csfasta/q format from
2546Illumina, Roche 454, and the SOLiD platform.")
2547 (license license:gpl3)))
2548
19f4554c
BW
2549(define-public fraggenescan
2550 (package
2551 (name "fraggenescan")
2552 (version "1.20")
2553 (source
2554 (origin
2555 (method url-fetch)
2556 (uri
2557 (string-append "mirror://sourceforge/fraggenescan/"
2558 "FragGeneScan" version ".tar.gz"))
2559 (sha256
2560 (base32 "1zzigqmvqvjyqv4945kv6nc5ah2xxm1nxgrlsnbzav3f5c0n0pyj"))))
2561 (build-system gnu-build-system)
2562 (arguments
2563 `(#:phases
2564 (modify-phases %standard-phases
2565 (delete 'configure)
2566 (add-before 'build 'patch-paths
2567 (lambda* (#:key outputs #:allow-other-keys)
2568 (let* ((out (string-append (assoc-ref outputs "out")))
2569 (share (string-append out "/share/fraggenescan/")))
2570 (substitute* "run_FragGeneScan.pl"
2571 (("system\\(\"rm")
2572 (string-append "system(\"" (which "rm")))
2573 (("system\\(\"mv")
2574 (string-append "system(\"" (which "mv")))
2575 ;; This script and other programs expect the training files
2576 ;; to be in the non-standard location bin/train/XXX. Change
2577 ;; this to be share/fraggenescan/train/XXX instead.
2578 (("^\\$train.file = \\$dir.*")
2579 (string-append "$train_file = \""
2580 share
2581 "train/\".$FGS_train_file;")))
2582 (substitute* "run_hmm.c"
2583 (("^ strcat\\(train_dir, \\\"train/\\\"\\);")
2584 (string-append " strcpy(train_dir, \"" share "/train/\");")))
2585 (substitute* "post_process.pl"
2586 (("^my \\$dir = substr.*")
2587 (string-append "my $dir = \"" share "\";"))))
2588 #t))
2589 (replace 'build
2590 (lambda _ (and (zero? (system* "make" "clean"))
2591 (zero? (system* "make" "fgs")))))
2592 (replace 'install
2593 (lambda* (#:key outputs #:allow-other-keys)
2594 (let* ((out (string-append (assoc-ref outputs "out")))
2595 (bin (string-append out "/bin/"))
2596 (share (string-append out "/share/fraggenescan/train")))
2597 (install-file "run_FragGeneScan.pl" bin)
2598 (install-file "FragGeneScan" bin)
2599 (install-file "FGS_gff.py" bin)
2600 (install-file "post_process.pl" bin)
2601 (copy-recursively "train" share))))
2602 (delete 'check)
2603 (add-after 'install 'post-install-check
2604 ;; In lieu of 'make check', run one of the examples and check the
2605 ;; output files gets created.
2606 (lambda* (#:key outputs #:allow-other-keys)
2607 (let* ((out (string-append (assoc-ref outputs "out")))
2608 (bin (string-append out "/bin/")))
2609 (and (zero? (system* (string-append bin "run_FragGeneScan.pl")
2610 "-genome=./example/NC_000913.fna"
2611 "-out=./test2"
2612 "-complete=1"
2613 "-train=complete"))
2614 (file-exists? "test2.faa")
2615 (file-exists? "test2.ffn")
2616 (file-exists? "test2.gff")
2617 (file-exists? "test2.out"))))))))
2618 (inputs
2619 `(("perl" ,perl)
2620 ("python" ,python-2))) ;not compatible with python 3.
2621 (home-page "https://sourceforge.net/projects/fraggenescan/")
2622 (synopsis "Finds potentially fragmented genes in short reads")
2623 (description
2624 "FragGeneScan is a program for predicting bacterial and archaeal genes in
2625short and error-prone DNA sequencing reads. It can also be applied to predict
2626genes in incomplete assemblies or complete genomes.")
2627 ;; GPL3+ according to private correspondense with the authors.
2628 (license license:gpl3+)))
2629
81f3e0c1
BW
2630(define-public fxtract
2631 (let ((util-commit "776ca85a18a47492af3794745efcb4a905113115"))
2632 (package
2633 (name "fxtract")
2634 (version "2.3")
2635 (source
2636 (origin
2637 (method url-fetch)
2638 (uri (string-append
2639 "https://github.com/ctSkennerton/fxtract/archive/"
2640 version ".tar.gz"))
2641 (file-name (string-append "ctstennerton-util-"
2642 (string-take util-commit 7)
2643 "-checkout"))
2644 (sha256
2645 (base32
2646 "0275cfdhis8517hm01is62062swmi06fxzifq7mr3knbbxjlaiwj"))))
2647 (build-system gnu-build-system)
2648 (arguments
2649 `(#:make-flags (list
2650 (string-append "PREFIX=" (assoc-ref %outputs "out"))
2651 "CC=gcc")
2652 #:test-target "fxtract_test"
2653 #:phases
2654 (modify-phases %standard-phases
2655 (delete 'configure)
2656 (add-before 'build 'copy-util
2657 (lambda* (#:key inputs #:allow-other-keys)
2658 (rmdir "util")
2659 (copy-recursively (assoc-ref inputs "ctskennerton-util") "util")
2660 #t))
2661 ;; Do not use make install as this requires additional dependencies.
2662 (replace 'install
2663 (lambda* (#:key outputs #:allow-other-keys)
2664 (let* ((out (assoc-ref outputs "out"))
2665 (bin (string-append out"/bin")))
2666 (install-file "fxtract" bin)
2667 #t))))))
2668 (inputs
2669 `(("pcre" ,pcre)
2670 ("zlib" ,zlib)))
2671 (native-inputs
2672 ;; ctskennerton-util is licensed under GPL2.
2673 `(("ctskennerton-util"
2674 ,(origin
2675 (method git-fetch)
2676 (uri (git-reference
2677 (url "https://github.com/ctSkennerton/util.git")
2678 (commit util-commit)))
2679 (file-name (string-append
2680 "ctstennerton-util-" util-commit "-checkout"))
2681 (sha256
2682 (base32
2683 "0cls1hd4vgj3f36fpzzg4xc77d6f3hpc60cbpfmn2gdr7ykzzad7"))))))
2684 (home-page "https://github.com/ctSkennerton/fxtract")
2685 (synopsis "Extract sequences from FASTA and FASTQ files")
2686 (description
2687 "Fxtract extracts sequences from a protein or nucleotide fastx (FASTA
2688or FASTQ) file given a subsequence. It uses a simple substring search for
2689basic tasks but can change to using POSIX regular expressions, PCRE, hash
2690lookups or multi-pattern searching as required. By default fxtract looks in
2691the sequence of each record but can also be told to look in the header,
2692comment or quality sections.")
afde1a26
BW
2693 ;; 'util' requires SSE instructions.
2694 (supported-systems '("x86_64-linux"))
81f3e0c1
BW
2695 (license license:expat))))
2696
5854f685
RW
2697(define-public grit
2698 (package
2699 (name "grit")
2700 (version "2.0.2")
2701 (source (origin
2702 (method url-fetch)
2703 (uri (string-append
2704 "https://github.com/nboley/grit/archive/"
2705 version ".tar.gz"))
2706 (file-name (string-append name "-" version ".tar.gz"))
2707 (sha256
2708 (base32
2709 "157in84dj70wimbind3x7sy1whs3h57qfgcnj2s6lrd38fbrb7mj"))))
2710 (build-system python-build-system)
2711 (arguments
2712 `(#:python ,python-2
2713 #:phases
2714 (alist-cons-after
2715 'unpack 'generate-from-cython-sources
2716 (lambda* (#:key inputs outputs #:allow-other-keys)
2717 ;; Delete these C files to force fresh generation from pyx sources.
2718 (delete-file "grit/sparsify_support_fns.c")
2719 (delete-file "grit/call_peaks_support_fns.c")
2720 (substitute* "setup.py"
2721 (("Cython.Setup") "Cython.Build")
2722 ;; Add numpy include path to fix compilation
2723 (("pyx\", \\]")
2724 (string-append "pyx\", ], include_dirs = ['"
2725 (assoc-ref inputs "python-numpy")
2726 "/lib/python2.7/site-packages/numpy/core/include/"
2727 "']"))) #t)
2728 %standard-phases)))
2729 (inputs
2730 `(("python-scipy" ,python2-scipy)
2731 ("python-numpy" ,python2-numpy)
2732 ("python-pysam" ,python2-pysam)
2733 ("python-networkx" ,python2-networkx)))
2734 (native-inputs
f3b98f4f 2735 `(("python-cython" ,python2-cython)))
5854f685
RW
2736 (home-page "http://grit-bio.org")
2737 (synopsis "Tool for integrative analysis of RNA-seq type assays")
2738 (description
2739 "GRIT is designed to use RNA-seq, TES, and TSS data to build and quantify
2740full length transcript models. When none of these data sources are available,
2741GRIT can be run by providing a candidate set of TES or TSS sites. In
2742addition, GRIT can merge in reference junctions and gene boundaries. GRIT can
2743also be run in quantification mode, where it uses a provided GTF file and just
2744estimates transcript expression.")
2745 (license license:gpl3+)))
2746
346a829a
RW
2747(define-public hisat
2748 (package
2749 (name "hisat")
2750 (version "0.1.4")
2751 (source (origin
2752 (method url-fetch)
2753 (uri (string-append
2754 "http://ccb.jhu.edu/software/hisat/downloads/hisat-"
2755 version "-beta-source.zip"))
2756 (sha256
2757 (base32
2758 "1k381ydranqxp09yf2y7w1d0chz5d59vb6jchi89hbb0prq19lk5"))))
2759 (build-system gnu-build-system)
2760 (arguments
e58d01fa
RW
2761 `(#:tests? #f ;no check target
2762 #:make-flags '("allall"
2763 ;; Disable unsupported `popcnt' instructions on
2764 ;; architectures other than x86_64
2765 ,@(if (string-prefix? "x86_64"
2766 (or (%current-target-system)
2767 (%current-system)))
2768 '()
2769 '("POPCNT_CAPABILITY=0")))
346a829a 2770 #:phases
da6dd842
LC
2771 (alist-cons-after
2772 'unpack 'patch-sources
2773 (lambda _
2774 ;; XXX Cannot use snippet because zip files are not supported
2775 (substitute* "Makefile"
2776 (("^CC = .*$") "CC = gcc")
2777 (("^CPP = .*$") "CPP = g++")
2778 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
2779 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
2780 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
2781 (substitute* '("hisat-build" "hisat-inspect")
2782 (("/usr/bin/env") (which "env"))))
2783 (alist-replace
2784 'install
2785 (lambda* (#:key outputs #:allow-other-keys)
84590149 2786 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
96c46210
LC
2787 (for-each (lambda (file)
2788 (install-file file bin))
2789 (find-files
2790 "."
2791 "hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))))
da6dd842 2792 (alist-delete 'configure %standard-phases)))))
346a829a
RW
2793 (native-inputs
2794 `(("unzip" ,unzip)))
2795 (inputs
2796 `(("perl" ,perl)
2797 ("python" ,python)
2798 ("zlib" ,zlib)))
60af3d82
RW
2799 ;; Non-portable SSE instructions are used so building fails on platforms
2800 ;; other than x86_64.
2801 (supported-systems '("x86_64-linux"))
346a829a
RW
2802 (home-page "http://ccb.jhu.edu/software/hisat/index.shtml")
2803 (synopsis "Hierarchical indexing for spliced alignment of transcripts")
2804 (description
2805 "HISAT is a fast and sensitive spliced alignment program for mapping
2806RNA-seq reads. In addition to one global FM index that represents a whole
2807genome, HISAT uses a large set of small FM indexes that collectively cover the
2808whole genome. These small indexes (called local indexes) combined with
2809several alignment strategies enable effective alignment of RNA-seq reads, in
2810particular, reads spanning multiple exons.")
2811 (license license:gpl3+)))
2812
e84efc50
RW
2813(define-public hisat2
2814 (package
2815 (name "hisat2")
2816 (version "2.0.5")
2817 (source
2818 (origin
2819 (method url-fetch)
2820 ;; FIXME: a better source URL is
2821 ;; (string-append "ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2"
2822 ;; "/downloads/hisat2-" version "-source.zip")
2823 ;; with hash "0lywnr8kijwsc2aw10dwxic0n0yvip6fl3rjlvc8zzwahamy4x7g"
2824 ;; but it is currently unavailable.
2825 (uri "https://github.com/infphilo/hisat2/archive/cba6e8cb.tar.gz")
2826 (file-name (string-append name "-" version ".tar.gz"))
2827 (sha256
2828 (base32
2829 "1mf2hdsyv7cd97xm9mp9a4qws02yrj95y6w6f6cdwnq0klp81r50"))))
2830 (build-system gnu-build-system)
2831 (arguments
2832 `(#:tests? #f ; no check target
2833 #:make-flags (list "CC=gcc" "CXX=g++" "allall")
2834 #:modules ((guix build gnu-build-system)
2835 (guix build utils)
2836 (srfi srfi-26))
2837 #:phases
2838 (modify-phases %standard-phases
2839 (add-after 'unpack 'make-deterministic
2840 (lambda _
2841 (substitute* "Makefile"
2842 (("`date`") "0"))
2843 #t))
2844 (delete 'configure)
2845 (replace 'install
2846 (lambda* (#:key outputs #:allow-other-keys)
2847 (let* ((out (assoc-ref outputs "out"))
2848 (bin (string-append out "/bin/"))
2849 (doc (string-append out "/share/doc/hisat2/")))
2850 (for-each
2851 (cut install-file <> bin)
2852 (find-files "."
2853 "hisat2(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))
2854 (mkdir-p doc)
2855 (install-file "doc/manual.inc.html" doc))
2856 #t)))))
2857 (native-inputs
2858 `(("unzip" ,unzip) ; needed for archive from ftp
2859 ("perl" ,perl)
2860 ("pandoc" ,ghc-pandoc))) ; for documentation
2861 (home-page "http://ccb.jhu.edu/software/hisat2/index.shtml")
2862 (synopsis "Graph-based alignment of genomic sequencing reads")
2863 (description "HISAT2 is a fast and sensitive alignment program for mapping
2864next-generation sequencing reads (both DNA and RNA) to a population of human
2865genomes (as well as to a single reference genome). In addition to using one
2866global @dfn{graph FM} (GFM) index that represents a population of human
2867genomes, HISAT2 uses a large set of small GFM indexes that collectively cover
2868the whole genome. These small indexes, combined with several alignment
2869strategies, enable rapid and accurate alignment of sequencing reads. This new
2870indexing scheme is called a @dfn{Hierarchical Graph FM index} (HGFM).")
2871 ;; HISAT2 contains files from Bowtie2, which is released under
2872 ;; GPLv2 or later. The HISAT2 source files are released under
2873 ;; GPLv3 or later.
2874 (license license:gpl3+)))
2875
c684629f
BW
2876(define-public hmmer
2877 (package
2878 (name "hmmer")
2879 (version "3.1b2")
79f09fa2
BW
2880 (source
2881 (origin
2882 (method url-fetch)
2883 (uri (string-append
2884 "http://eddylab.org/software/hmmer"
2885 (version-prefix version 1) "/"
2886 version "/hmmer-" version ".tar.gz"))
2887 (sha256
2888 (base32
2889 "0djmgc0pfli0jilfx8hql1axhwhqxqb8rxg2r5rg07aw73sfs5nx"))
2890 (patches (search-patches "hmmer-remove-cpu-specificity.patch"))))
c684629f 2891 (build-system gnu-build-system)
b3546174 2892 (native-inputs `(("perl" ,perl)))
a83e6046 2893 (home-page "http://hmmer.org/")
c684629f
BW
2894 (synopsis "Biosequence analysis using profile hidden Markov models")
2895 (description
2896 "HMMER is used for searching sequence databases for homologs of protein
2897sequences, and for making protein sequence alignments. It implements methods
2898using probabilistic models called profile hidden Markov models (profile
2899HMMs).")
2900 (license (list license:gpl3+
2901 ;; The bundled library 'easel' is distributed
2902 ;; under The Janelia Farm Software License.
2903 (license:non-copyleft
2904 "file://easel/LICENSE"
2905 "See easel/LICENSE in the distribution.")))))
2906
85652f59
RW
2907(define-public htseq
2908 (package
2909 (name "htseq")
2910 (version "0.6.1")
2911 (source (origin
2912 (method url-fetch)
2913 (uri (string-append
2914 "https://pypi.python.org/packages/source/H/HTSeq/HTSeq-"
2915 version ".tar.gz"))
2916 (sha256
2917 (base32
2918 "1i85ppf2j2lj12m0x690qq5nn17xxk23pbbx2c83r8ayb5wngzwv"))))
2919 (build-system python-build-system)
2920 (arguments `(#:python ,python-2)) ; only Python 2 is supported
0536727e
RW
2921 ;; Numpy needs to be propagated when htseq is used as a Python library.
2922 (propagated-inputs
2923 `(("python-numpy" ,python2-numpy)))
578b05d9
RW
2924 (inputs
2925 `(("python-pysam" ,python2-pysam)))
85652f59
RW
2926 (home-page "http://www-huber.embl.de/users/anders/HTSeq/")
2927 (synopsis "Analysing high-throughput sequencing data with Python")
2928 (description
2929 "HTSeq is a Python package that provides infrastructure to process data
2930from high-throughput sequencing assays.")
2931 (license license:gpl3+)))
2932
1ad15c16 2933(define-public java-htsjdk
15a3c3d4 2934 (package
1ad15c16 2935 (name "java-htsjdk")
15a3c3d4
RW
2936 (version "1.129")
2937 (source (origin
2938 (method url-fetch)
2939 (uri (string-append
2940 "https://github.com/samtools/htsjdk/archive/"
2941 version ".tar.gz"))
2942 (file-name (string-append name "-" version ".tar.gz"))
2943 (sha256
2944 (base32
2945 "0asdk9b8jx2ij7yd6apg9qx03li8q7z3ml0qy2r2qczkra79y6fw"))
2946 (modules '((guix build utils)))
2947 ;; remove build dependency on git
2948 (snippet '(substitute* "build.xml"
2949 (("failifexecutionfails=\"true\"")
2950 "failifexecutionfails=\"false\"")))))
10b4a969 2951 (build-system ant-build-system)
15a3c3d4 2952 (arguments
10b4a969
RW
2953 `(#:tests? #f ; test require Internet access
2954 #:make-flags
2955 (list (string-append "-Ddist=" (assoc-ref %outputs "out")
2956 "/share/java/htsjdk/"))
2957 #:build-target "all"
2958 #:phases
2959 (modify-phases %standard-phases
2960 ;; The build phase also installs the jars
2961 (delete 'install))))
15a3c3d4
RW
2962 (home-page "http://samtools.github.io/htsjdk/")
2963 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
2964 (description
2965 "HTSJDK is an implementation of a unified Java library for accessing
2966common file formats, such as SAM and VCF, used for high-throughput
2967sequencing (HTS) data. There are also an number of useful utilities for
2968manipulating HTS data.")
2969 (license license:expat)))
2970
e7c09730
RW
2971(define-public htslib
2972 (package
2973 (name "htslib")
6c737963 2974 (version "1.3.1")
e7c09730
RW
2975 (source (origin
2976 (method url-fetch)
2977 (uri (string-append
2978 "https://github.com/samtools/htslib/releases/download/"
2979 version "/htslib-" version ".tar.bz2"))
2980 (sha256
2981 (base32
6c737963 2982 "1rja282fwdc25ql6izkhdyh8ppw8x2fs0w0js78zgkmqjlikmma9"))))
e7c09730
RW
2983 (build-system gnu-build-system)
2984 (arguments
2985 `(#:phases
2986 (modify-phases %standard-phases
2987 (add-after
2988 'unpack 'patch-tests
2989 (lambda _
2990 (substitute* "test/test.pl"
2991 (("/bin/bash") (which "bash")))
2992 #t)))))
2993 (inputs
2994 `(("zlib" ,zlib)))
2995 (native-inputs
2996 `(("perl" ,perl)))
2997 (home-page "http://www.htslib.org")
2998 (synopsis "C library for reading/writing high-throughput sequencing data")
2999 (description
3000 "HTSlib is a C library for reading/writing high-throughput sequencing
3001data. It also provides the bgzip, htsfile, and tabix utilities.")
3002 ;; Files under cram/ are released under the modified BSD license;
3003 ;; the rest is released under the Expat license
3004 (license (list license:expat license:bsd-3))))
3005
c4325f62
RW
3006(define-public idr
3007 (package
3008 (name "idr")
3009 (version "2.0.0")
3010 (source (origin
3011 (method url-fetch)
3012 (uri (string-append
3013 "https://github.com/nboley/idr/archive/"
3014 version ".tar.gz"))
3015 (file-name (string-append name "-" version ".tar.gz"))
3016 (sha256
3017 (base32
3018 "1k3x44biak00aiv3hpm1yd6nn4hhp7n0qnbs3zh2q9sw7qr1qj5r"))))
3019 (build-system python-build-system)
3020 (arguments
14386fc7 3021 `(#:tests? #f)) ; FIXME: "ImportError: No module named 'utility'"
b7a820fc 3022 (propagated-inputs
c4325f62 3023 `(("python-scipy" ,python-scipy)
b7a820fc 3024 ("python-sympy" ,python-sympy)
c4325f62
RW
3025 ("python-numpy" ,python-numpy)
3026 ("python-matplotlib" ,python-matplotlib)))
3027 (native-inputs
f3b98f4f 3028 `(("python-cython" ,python-cython)))
c4325f62
RW
3029 (home-page "https://github.com/nboley/idr")
3030 (synopsis "Tool to measure the irreproducible discovery rate (IDR)")
3031 (description
3032 "The IDR (Irreproducible Discovery Rate) framework is a unified approach
3033to measure the reproducibility of findings identified from replicate
3034experiments and provide highly stable thresholds based on reproducibility.")
3035 (license license:gpl3+)))
3036
43c565d2
RW
3037(define-public jellyfish
3038 (package
3039 (name "jellyfish")
3040 (version "2.2.4")
3041 (source (origin
3042 (method url-fetch)
3043 (uri (string-append "https://github.com/gmarcais/Jellyfish/"
3044 "releases/download/v" version
3045 "/jellyfish-" version ".tar.gz"))
3046 (sha256
3047 (base32
3048 "0a6xnynqy2ibfbfz86b9g2m2dgm7f1469pmymkpam333gi3p26nk"))))
3049 (build-system gnu-build-system)
3050 (outputs '("out" ;for library
3051 "ruby" ;for Ruby bindings
3052 "python")) ;for Python bindings
3053 (arguments
3054 `(#:configure-flags
3055 (list (string-append "--enable-ruby-binding="
3056 (assoc-ref %outputs "ruby"))
3057 (string-append "--enable-python-binding="
3058 (assoc-ref %outputs "python")))
3059 #:phases
3060 (modify-phases %standard-phases
3061 (add-before 'check 'set-SHELL-variable
3062 (lambda _
3063 ;; generator_manager.hpp either uses /bin/sh or $SHELL
3064 ;; to run tests.
3065 (setenv "SHELL" (which "bash"))
3066 #t)))))
3067 (native-inputs
3068 `(("bc" ,bc)
3069 ("time" ,time)
3070 ("ruby" ,ruby)
3071 ("python" ,python-2)))
3072 (synopsis "Tool for fast counting of k-mers in DNA")
3073 (description
3074 "Jellyfish is a tool for fast, memory-efficient counting of k-mers in
3075DNA. A k-mer is a substring of length k, and counting the occurrences of all
3076such substrings is a central step in many analyses of DNA sequence. Jellyfish
3077is a command-line program that reads FASTA and multi-FASTA files containing
3078DNA sequences. It outputs its k-mer counts in a binary format, which can be
3079translated into a human-readable text format using the @code{jellyfish dump}
3080command, or queried for specific k-mers with @code{jellyfish query}.")
3081 (home-page "http://www.genome.umd.edu/jellyfish.html")
6e8faf77
EF
3082 ;; From their website: JELLYFISH runs on 64-bit Intel-compatible processors
3083 (supported-systems '("x86_64-linux"))
43c565d2
RW
3084 ;; The combined work is published under the GPLv3 or later. Individual
3085 ;; files such as lib/jsoncpp.cpp are released under the Expat license.
3086 (license (list license:gpl3+ license:expat))))
3087
94ff3157
BW
3088(define-public khmer
3089 (package
3090 (name "khmer")
3091 (version "2.0")
3092 (source
3093 (origin
3094 (method url-fetch)
3095 (uri (pypi-uri "khmer" version))
3096 (sha256
3097 (base32
3098 "0wb05shqh77v00256qlm68vbbx3kl76fyzihszbz5nhanl4ni33a"))
3099 (patches (search-patches "khmer-use-libraries.patch"))))
3100 (build-system python-build-system)
3101 (arguments
3102 `(#:phases
3103 (modify-phases %standard-phases
3104 (add-after 'unpack 'set-paths
3105 (lambda* (#:key inputs outputs #:allow-other-keys)
3106 ;; Delete bundled libraries.
3107 (delete-file-recursively "third-party/zlib")
3108 (delete-file-recursively "third-party/bzip2")
3109 ;; Replace bundled seqan.
3110 (let* ((seqan-all "third-party/seqan")
3111 (seqan-include (string-append
3112 seqan-all "/core/include")))
3113 (delete-file-recursively seqan-all)
3114 (copy-recursively (string-append (assoc-ref inputs "seqan")
3115 "/include/seqan")
3116 (string-append seqan-include "/seqan")))
3117 ;; We do not replace the bundled MurmurHash as the canonical
3118 ;; repository for this code 'SMHasher' is unsuitable for
3119 ;; providing a library. See
3120 ;; https://lists.gnu.org/archive/html/guix-devel/2016-06/msg00977.html
3121 #t))
3122 (add-after 'unpack 'set-cc
3123 (lambda _
3124 (setenv "CC" "gcc")
3125 #t))
3126 ;; It is simpler to test after installation.
3127 (delete 'check)
3128 (add-after 'install 'post-install-check
3129 (lambda* (#:key inputs outputs #:allow-other-keys)
3130 (let ((out (assoc-ref outputs "out")))
3131 (setenv "PATH"
3132 (string-append
3133 (getenv "PATH")
3134 ":"
3135 (assoc-ref outputs "out")
3136 "/bin"))
3137 (setenv "PYTHONPATH"
3138 (string-append
3139 (getenv "PYTHONPATH")
3140 ":"
3141 out
3142 "/lib/python"
3143 (string-take (string-take-right
3144 (assoc-ref inputs "python") 5) 3)
3145 "/site-packages"))
3146 (with-directory-excursion "build"
3147 (zero? (system* "nosetests" "khmer" "--attr"
3148 "!known_failing")))))))))
3149 (native-inputs
3150 `(("seqan" ,seqan)
3151 ("python-nose" ,python-nose)))
3152 (inputs
3153 `(("zlib" ,zlib)
3154 ("bzip2" ,bzip2)
3155 ("python-screed" ,python-screed)
cf1d5814
BW
3156 ("python-bz2file" ,python-bz2file)
3157 ;; Tests fail when gcc-5 is used for compilation. Use gcc-4.9 at least
3158 ;; until the next version of khmer (likely 2.1) is released.
3159 ("gcc" ,gcc-4.9)))
94ff3157
BW
3160 (home-page "https://khmer.readthedocs.org/")
3161 (synopsis "K-mer counting, filtering and graph traversal library")
3162 (description "The khmer software is a set of command-line tools for
3163working with DNA shotgun sequencing data from genomes, transcriptomes,
3164metagenomes and single cells. Khmer can make de novo assemblies faster, and
3165sometimes better. Khmer can also identify and fix problems with shotgun
3166data.")
8157af2e
EF
3167 ;; When building on i686, armhf and mips64el, we get the following error:
3168 ;; error: ['khmer', 'khmer.tests', 'oxli'] require 64-bit operating system
3169 (supported-systems '("x86_64-linux"))
94ff3157
BW
3170 (license license:bsd-3)))
3171
d57e6d0f
RW
3172(define-public macs
3173 (package
3174 (name "macs")
0d0bcaa0 3175 (version "2.1.0.20151222")
d57e6d0f
RW
3176 (source (origin
3177 (method url-fetch)
43ec07f1 3178 (uri (pypi-uri "MACS2" version))
d57e6d0f
RW
3179 (sha256
3180 (base32
0d0bcaa0 3181 "1r2hcz6irhcq7lwbafjks98jbn34hv05avgbdjnp6w6mlfjkf8x5"))))
d57e6d0f
RW
3182 (build-system python-build-system)
3183 (arguments
3184 `(#:python ,python-2 ; only compatible with Python 2.7
3185 #:tests? #f)) ; no test target
3186 (inputs
3187 `(("python-numpy" ,python2-numpy)))
7bf837fd 3188 (home-page "https://github.com/taoliu/MACS/")
d57e6d0f
RW
3189 (synopsis "Model based analysis for ChIP-Seq data")
3190 (description
3191 "MACS is an implementation of a ChIP-Seq analysis algorithm for
3192identifying transcript factor binding sites named Model-based Analysis of
3193ChIP-Seq (MACS). MACS captures the influence of genome complexity to evaluate
3194the significance of enriched ChIP regions and it improves the spatial
3195resolution of binding sites through combining the information of both
3196sequencing tag position and orientation.")
3197 (license license:bsd-3)))
3198
41ddebdd
BW
3199(define-public mafft
3200 (package
3201 (name "mafft")
88eb119f 3202 (version "7.310")
41ddebdd
BW
3203 (source (origin
3204 (method url-fetch)
3205 (uri (string-append
3206 "http://mafft.cbrc.jp/alignment/software/mafft-" version
3207 "-without-extensions-src.tgz"))
3208 (file-name (string-append name "-" version ".tgz"))
3209 (sha256
3210 (base32
88eb119f 3211 "0gbsaz6z2qa307kd7wfb06c3y4ikmv1hsdvlns11f6zq4w1z9pwc"))))
41ddebdd
BW
3212 (build-system gnu-build-system)
3213 (arguments
3214 `(#:tests? #f ; no automated tests, though there are tests in the read me
3215 #:make-flags (let ((out (assoc-ref %outputs "out")))
3216 (list (string-append "PREFIX=" out)
3217 (string-append "BINDIR="
3218 (string-append out "/bin"))))
3219 #:phases
3220 (modify-phases %standard-phases
3221 (add-after 'unpack 'enter-dir
101e8f71 3222 (lambda _ (chdir "core") #t))
41ddebdd 3223 (add-after 'enter-dir 'patch-makefile
101e8f71
BW
3224 (lambda _
3225 ;; on advice from the MAFFT authors, there is no need to
3226 ;; distribute mafft-profile, mafft-distance, or
3227 ;; mafft-homologs.rb as they are too "specialised".
3228 (substitute* "Makefile"
3229 ;; remove mafft-homologs.rb from SCRIPTS
3230 (("^SCRIPTS = mafft mafft-homologs.rb")
3231 "SCRIPTS = mafft")
3232 ;; remove mafft-homologs from MANPAGES
3233 (("^MANPAGES = mafft.1 mafft-homologs.1")
3234 "MANPAGES = mafft.1")
3235 ;; remove mafft-distance from PROGS
3236 (("^PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance")
3237 "PROGS = dvtditr dndfast7 dndblast sextet5")
3238 ;; remove mafft-profile from PROGS
3239 (("splittbfast disttbfast tbfast mafft-profile 2cl mccaskillwrap")
3240 "splittbfast disttbfast tbfast f2cl mccaskillwrap")
3241 (("^rm -f mafft-profile mafft-profile.exe") "#")
3242 (("^rm -f mafft-distance mafft-distance.exe") ")#")
3243 ;; do not install MAN pages in libexec folder
3244 (("^\t\\$\\(INSTALL\\) -m 644 \\$\\(MANPAGES\\) \
41ddebdd 3245\\$\\(DESTDIR\\)\\$\\(LIBDIR\\)") "#"))
101e8f71 3246 #t))
02f35bb5
BW
3247 (add-after 'enter-dir 'patch-paths
3248 (lambda* (#:key inputs #:allow-other-keys)
3249 (substitute* '("pairash.c"
3250 "mafft.tmpl")
3251 (("perl") (which "perl"))
3252 (("([\"`| ])awk" _ prefix)
3253 (string-append prefix (which "awk")))
3254 (("grep") (which "grep")))
3255 #t))
101e8f71
BW
3256 (delete 'configure)
3257 (add-after 'install 'wrap-programs
3258 (lambda* (#:key outputs #:allow-other-keys)
3259 (let* ((out (assoc-ref outputs "out"))
3260 (bin (string-append out "/bin"))
3261 (path (string-append
3262 (assoc-ref %build-inputs "coreutils") "/bin:")))
3263 (for-each (lambda (file)
3264 (wrap-program file
3265 `("PATH" ":" prefix (,path))))
3266 (find-files bin)))
3267 #t)))))
41ddebdd 3268 (inputs
02f35bb5 3269 `(("perl" ,perl)
71461f88 3270 ("ruby" ,ruby)
02f35bb5 3271 ("gawk" ,gawk)
101e8f71
BW
3272 ("grep" ,grep)
3273 ("coreutils" ,coreutils)))
41ddebdd
BW
3274 (home-page "http://mafft.cbrc.jp/alignment/software/")
3275 (synopsis "Multiple sequence alignment program")
3276 (description
3277 "MAFFT offers a range of multiple alignment methods for nucleotide and
3278protein sequences. For instance, it offers L-INS-i (accurate; for alignment
3279of <~200 sequences) and FFT-NS-2 (fast; for alignment of <~30,000
3280sequences).")
3281 (license (license:non-copyleft
3282 "http://mafft.cbrc.jp/alignment/software/license.txt"
3283 "BSD-3 with different formatting"))))
8fd790eb 3284
84be3b99
MB
3285(define-public mash
3286 (package
3287 (name "mash")
3288 (version "1.1.1")
3289 (source (origin
3290 (method url-fetch)
3291 (uri (string-append
3292 "https://github.com/marbl/mash/archive/v"
3293 version ".tar.gz"))
3294 (file-name (string-append name "-" version ".tar.gz"))
3295 (sha256
3296 (base32
3297 "08znbvqq5xknfhmpp3wcj574zvi4p7i8zifi67c9qw9a6ikp42fj"))
3298 (modules '((guix build utils)))
3299 (snippet
3300 ;; Delete bundled kseq.
3301 ;; TODO: Also delete bundled murmurhash and open bloom filter.
3302 '(delete-file "src/mash/kseq.h"))))
3303 (build-system gnu-build-system)
3304 (arguments
3305 `(#:tests? #f ; No tests.
3306 #:configure-flags
3307 (list
3308 (string-append "--with-capnp=" (assoc-ref %build-inputs "capnproto"))
3309 (string-append "--with-gsl=" (assoc-ref %build-inputs "gsl")))
3310 #:make-flags (list "CC=gcc")
3311 #:phases
3312 (modify-phases %standard-phases
3313 (add-after 'unpack 'fix-includes
3314 (lambda _
3315 (substitute* '("src/mash/Sketch.cpp" "src/mash/CommandFind.cpp")
3316 (("^#include \"kseq\\.h\"")
3317 "#include \"htslib/kseq.h\""))
3318 #t))
3319 (add-before 'configure 'autoconf
3320 (lambda _ (zero? (system* "autoconf")))))))
3321 (native-inputs
3322 `(("autoconf" ,autoconf)
3323 ;; Capnproto and htslib are statically embedded in the final
3324 ;; application. Therefore we also list their licenses, below.
3325 ("capnproto" ,capnproto)
3326 ("htslib" ,htslib)))
3327 (inputs
3328 `(("gsl" ,gsl)
3329 ("zlib" ,zlib)))
3330 (supported-systems '("x86_64-linux"))
3331 (home-page "https://mash.readthedocs.io")
3332 (synopsis "Fast genome and metagenome distance estimation using MinHash")
3333 (description "Mash is a fast sequence distance estimator that uses the
3334MinHash algorithm and is designed to work with genomes and metagenomes in the
3335form of assemblies or reads.")
3336 (license (list license:bsd-3 ; Mash
3337 license:expat ; HTSlib and capnproto
3338 license:public-domain ; MurmurHash 3
3339 license:cpl1.0)))) ; Open Bloom Filter
3340
8fd790eb 3341(define-public metabat
2c3eb4b8
BW
3342 ;; We package from a git commit because compilation of the released version
3343 ;; fails.
3344 (let ((commit "cbdca756993e66ae57e50a27970595dda9cbde1b"))
3345 (package
3346 (name "metabat")
3347 (version (string-append "0.32.4-1." (string-take commit 8)))
3348 (source
3349 (origin
3350 (method git-fetch)
3351 (uri (git-reference
3352 (url "https://bitbucket.org/berkeleylab/metabat.git")
3353 (commit commit)))
3354 (file-name (string-append name "-" version))
3355 (sha256
3356 (base32
3357 "0byia8nsip6zvc4ha0qkxkxxyjf4x7jcvy48q2dvb0pzr989syzr"))
3358 (patches (search-patches "metabat-remove-compilation-date.patch"))))
8fd790eb
BW
3359 (build-system gnu-build-system)
3360 (arguments
3361 `(#:phases
3362 (modify-phases %standard-phases
3363 (add-after 'unpack 'fix-includes
45469ebe
BW
3364 (lambda _
3365 (substitute* "src/BamUtils.h"
3366 (("^#include \"bam/bam\\.h\"")
3367 "#include \"samtools/bam.h\"")
3368 (("^#include \"bam/sam\\.h\"")
3369 "#include \"samtools/sam.h\""))
3370 (substitute* "src/KseqReader.h"
3371 (("^#include \"bam/kseq\\.h\"")
3372 "#include \"htslib/kseq.h\""))
3373 #t))
8fd790eb 3374 (add-after 'unpack 'fix-scons
45469ebe
BW
3375 (lambda* (#:key inputs #:allow-other-keys)
3376 (substitute* "SConstruct"
3377 (("^htslib_dir = 'samtools'")
3378 (string-append "hitslib_dir = '"
3379 (assoc-ref inputs "htslib")
3380 "'"))
3381 (("^samtools_dir = 'samtools'")
3382 (string-append "samtools_dir = '"
3383 (assoc-ref inputs "htslib")
3384 "'"))
3385 (("^findStaticOrShared\\('bam', hts_lib")
3386 (string-append "findStaticOrShared('bam', '"
3387 (assoc-ref inputs "samtools")
3388 "/lib'"))
3389 ;; Do not distribute README.
3390 (("^env\\.Install\\(idir_prefix, 'README\\.md'\\)") ""))
3391 #t))
8fd790eb
BW
3392 (delete 'configure)
3393 (replace 'build
3394 (lambda* (#:key inputs outputs #:allow-other-keys)
3395 (mkdir (assoc-ref outputs "out"))
3396 (zero? (system* "scons"
3397 (string-append
3398 "PREFIX="
3399 (assoc-ref outputs "out"))
8fd790eb
BW
3400 (string-append
3401 "BOOST_ROOT="
3402 (assoc-ref inputs "boost"))
3403 "install"))))
45469ebe 3404 ;; Check and install are carried out during build phase.
8fd790eb
BW
3405 (delete 'check)
3406 (delete 'install))))
3407 (inputs
3408 `(("zlib" ,zlib)
3409 ("perl" ,perl)
3410 ("samtools" ,samtools)
3411 ("htslib" ,htslib)
3412 ("boost" ,boost)))
3413 (native-inputs
3414 `(("scons" ,scons)))
3415 (home-page "https://bitbucket.org/berkeleylab/metabat")
3416 (synopsis
3417 "Reconstruction of single genomes from complex microbial communities")
3418 (description
3419 "Grouping large genomic fragments assembled from shotgun metagenomic
3420sequences to deconvolute complex microbial communities, or metagenome binning,
3421enables the study of individual organisms and their interactions. MetaBAT is
3422an automated metagenome binning software, which integrates empirical
3423probabilistic distances of genome abundance and tetranucleotide frequency.")
3424 (license (license:non-copyleft "file://license.txt"
2c3eb4b8 3425 "See license.txt in the distribution.")))))
8fd790eb 3426
318c0aee
MB
3427(define-public minced
3428 (package
3429 (name "minced")
3430 (version "0.2.0")
3431 (source (origin
3432 (method url-fetch)
3433 (uri (string-append
3434 "https://github.com/ctSkennerton/minced/archive/"
3435 version ".tar.gz"))
3436 (file-name (string-append name "-" version ".tar.gz"))
3437 (sha256
3438 (base32
3439 "0wxmlsapxfpxfd3ps9636h7i2xy6la8i42mwh0j2lsky63h63jp1"))))
3440 (build-system gnu-build-system)
3441 (arguments
3442 `(#:test-target "test"
3443 #:phases
3444 (modify-phases %standard-phases
3445 (delete 'configure)
3446 (add-before 'check 'fix-test
3447 (lambda _
3448 ;; Fix test for latest version.
3449 (substitute* "t/Aquifex_aeolicus_VF5.expected"
3450 (("minced:0.1.6") "minced:0.2.0"))
3451 #t))
3452 (replace 'install ; No install target.
3453 (lambda* (#:key inputs outputs #:allow-other-keys)
3454 (let* ((out (assoc-ref outputs "out"))
3455 (bin (string-append out "/bin"))
3456 (wrapper (string-append bin "/minced")))
3457 ;; Minced comes with a wrapper script that tries to figure out where
3458 ;; it is located before running the JAR. Since these paths are known
3459 ;; to us, we build our own wrapper to avoid coreutils dependency.
3460 (install-file "minced.jar" bin)
3461 (with-output-to-file wrapper
3462 (lambda _
3463 (display
3464 (string-append
3465 "#!" (assoc-ref inputs "bash") "/bin/sh\n\n"
3466 (assoc-ref inputs "jre") "/bin/java -jar "
3467 bin "/minced.jar \"$@\"\n"))))
3468 (chmod wrapper #o555)))))))
3469 (native-inputs
3470 `(("jdk" ,icedtea "jdk")))
3471 (inputs
3472 `(("bash" ,bash)
3473 ("jre" ,icedtea "out")))
3474 (home-page "https://github.com/ctSkennerton/minced")
3475 (synopsis "Mining CRISPRs in Environmental Datasets")
3476 (description
3477 "MinCED is a program to find Clustered Regularly Interspaced Short
3478Palindromic Repeats (CRISPRs) in DNA sequences. It can be used for
3479unassembled metagenomic reads, but is mainly designed for full genomes and
3480assembled metagenomic sequence.")
3481 (license license:gpl3+)))
3482
ddd82e0e
RW
3483(define-public miso
3484 (package
3485 (name "miso")
3486 (version "0.5.3")
3487 (source (origin
3488 (method url-fetch)
3489 (uri (string-append
86517de6 3490 "https://pypi.python.org/packages/source/m/misopy/misopy-"
ddd82e0e
RW
3491 version ".tar.gz"))
3492 (sha256
3493 (base32
3494 "0x446867az8ir0z8c1vjqffkp0ma37wm4sylixnkhgawllzx8v5w"))
3495 (modules '((guix build utils)))
ddd82e0e
RW
3496 (snippet
3497 '(substitute* "setup.py"
0ec8b206
RW
3498 ;; Use setuptools, or else the executables are not
3499 ;; installed.
3500 (("distutils.core") "setuptools")
3501 ;; use "gcc" instead of "cc" for compilation
ddd82e0e
RW
3502 (("^defines")
3503 "cc.set_executables(
3504compiler='gcc',
3505compiler_so='gcc',
3506linker_exe='gcc',
3507linker_so='gcc -shared'); defines")))))
3508 (build-system python-build-system)
3509 (arguments
3510 `(#:python ,python-2 ; only Python 2 is supported
3511 #:tests? #f)) ; no "test" target
3512 (inputs
3513 `(("samtools" ,samtools)
3514 ("python-numpy" ,python2-numpy)
3515 ("python-pysam" ,python2-pysam)
3516 ("python-scipy" ,python2-scipy)
3517 ("python-matplotlib" ,python2-matplotlib)))
3518 (native-inputs
f3b98f4f
HG
3519 `(("python-mock" ,python2-mock) ;for tests
3520 ("python-pytz" ,python2-pytz))) ;for tests
ddd82e0e
RW
3521 (home-page "http://genes.mit.edu/burgelab/miso/index.html")
3522 (synopsis "Mixture of Isoforms model for RNA-Seq isoform quantitation")
3523 (description
3524 "MISO (Mixture-of-Isoforms) is a probabilistic framework that quantitates
3525the expression level of alternatively spliced genes from RNA-Seq data, and
3526identifies differentially regulated isoforms or exons across samples. By
3527modeling the generative process by which reads are produced from isoforms in
3528RNA-Seq, the MISO model uses Bayesian inference to compute the probability
3529that a read originated from a particular isoform.")
3530 (license license:gpl2)))
3531
324efb88
BW
3532(define-public muscle
3533 (package
3534 (name "muscle")
3535 (version "3.8.1551")
3536 (source (origin
3537 (method url-fetch/tarbomb)
324efb88
BW
3538 (uri (string-append
3539 "http://www.drive5.com/muscle/muscle_src_"
3540 version ".tar.gz"))
3541 (sha256
3542 (base32
3543 "0bj8kj7sdizy3987zx6w7axihk40fk8rn76mpbqqjcnd64i5a367"))))
3544 (build-system gnu-build-system)
3545 (arguments
3546 `(#:make-flags (list "LDLIBS = -lm")
3547 #:phases
3548 (modify-phases %standard-phases
3549 (delete 'configure)
3550 (replace 'check
3551 ;; There are no tests, so just test if it runs.
3552 (lambda _ (zero? (system* "./muscle" "-version"))))
3553 (replace 'install
3554 (lambda* (#:key outputs #:allow-other-keys)
3555 (let* ((out (assoc-ref outputs "out"))
3556 (bin (string-append out "/bin")))
3557 (install-file "muscle" bin)))))))
3558 (home-page "http://www.drive5.com/muscle")
3559 (synopsis "Multiple sequence alignment program")
3560 (description
3561 "MUSCLE aims to be a fast and accurate multiple sequence alignment
3562program for nucleotide and protein sequences.")
3563 ;; License information found in 'muscle -h' and usage.cpp.
3564 (license license:public-domain)))
3565
99268755
BW
3566(define-public newick-utils
3567 ;; There are no recent releases so we package from git.
3568 (let ((commit "da121155a977197cab9fbb15953ca1b40b11eb87"))
3569 (package
3570 (name "newick-utils")
3571 (version (string-append "1.6-1." (string-take commit 8)))
3572 (source (origin
3573 (method git-fetch)
3574 (uri (git-reference
3575 (url "https://github.com/tjunier/newick_utils.git")
3576 (commit commit)))
3577 (file-name (string-append name "-" version "-checkout"))
3578 (sha256
3579 (base32
3580 "1hkw21rq1mwf7xp0rmbb2gqc0i6p11108m69i7mr7xcjl268pxnb"))))
3581 (build-system gnu-build-system)
3582 (arguments
3583 `(#:phases
3584 (modify-phases %standard-phases
3585 (add-after 'unpack 'autoconf
3586 (lambda _ (zero? (system* "autoreconf" "-vif")))))))
3587 (inputs
3588 ;; XXX: TODO: Enable Lua and Guile bindings.
3589 ;; https://github.com/tjunier/newick_utils/issues/13
3590 `(("libxml2" ,libxml2)
3591 ("flex" ,flex)
3592 ("bison" ,bison)))
3593 (native-inputs
3594 `(("autoconf" ,autoconf)
3595 ("automake" ,automake)
3596 ("libtool" ,libtool)))
3597 (synopsis "Programs for working with newick format phylogenetic trees")
3598 (description
3599 "Newick-utils is a suite of utilities for processing phylogenetic trees
3600in Newick format. Functions include re-rooting, extracting subtrees,
3601trimming, pruning, condensing, drawing (ASCII graphics or SVG).")
3602 (home-page "https://github.com/tjunier/newick_utils")
3603 (license license:bsd-3))))
3604
1e44cf8b
BW
3605(define-public orfm
3606 (package
3607 (name "orfm")
ced1c496 3608 (version "0.6.1")
1e44cf8b
BW
3609 (source (origin
3610 (method url-fetch)
3611 (uri (string-append
3612 "https://github.com/wwood/OrfM/releases/download/v"
3613 version "/orfm-" version ".tar.gz"))
3614 (sha256
3615 (base32
ced1c496 3616 "19hwp13n82isdvk16710l9m35cmzf0q3fsrcn3r8c5r67biiz39s"))))
1e44cf8b
BW
3617 (build-system gnu-build-system)
3618 (inputs `(("zlib" ,zlib)))
6b6f7d6a
BW
3619 (native-inputs
3620 `(("ruby-bio-commandeer" ,ruby-bio-commandeer)
3621 ("ruby-rspec" ,ruby-rspec)
3622 ("ruby" ,ruby)))
1e44cf8b
BW
3623 (synopsis "Simple and not slow open reading frame (ORF) caller")
3624 (description
6b6f7d6a 3625 "An ORF caller finds stretches of DNA that, when translated, are not
1e44cf8b
BW
3626interrupted by stop codons. OrfM finds and prints these ORFs.")
3627 (home-page "https://github.com/wwood/OrfM")
3628 (license license:lgpl3+)))
3629
19ee9201
RW
3630(define-public python2-pbcore
3631 (package
3632 (name "python2-pbcore")
e301bfc8 3633 (version "1.2.10")
19ee9201
RW
3634 (source (origin
3635 (method url-fetch)
ddb83129 3636 (uri (pypi-uri "pbcore" version))
19ee9201
RW
3637 (sha256
3638 (base32
e301bfc8 3639 "1kjmv891d6qbpp4shhhvkl02ff4q5xlpnls2513sm2cjcrs52f1i"))))
19ee9201
RW
3640 (build-system python-build-system)
3641 (arguments `(#:python ,python-2)) ; pbcore requires Python 2.7
de5bc890 3642 (propagated-inputs
19ee9201
RW
3643 `(("python-cython" ,python2-cython)
3644 ("python-numpy" ,python2-numpy)
3645 ("python-pysam" ,python2-pysam)
3646 ("python-h5py" ,python2-h5py)))
3647 (native-inputs
de5bc890
HG
3648 `(("python-nose" ,python2-nose)
3649 ("python-sphinx" ,python2-sphinx)
3650 ("python-pyxb" ,python2-pyxb)))
19ee9201
RW
3651 (home-page "http://pacificbiosciences.github.io/pbcore/")
3652 (synopsis "Library for reading and writing PacBio data files")
3653 (description
3654 "The pbcore package provides Python APIs for interacting with PacBio data
3655files and writing bioinformatics applications.")
3656 (license license:bsd-3)))
3657
c61fe02c
RW
3658(define-public python2-warpedlmm
3659 (package
3660 (name "python2-warpedlmm")
3661 (version "0.21")
3662 (source
3663 (origin
3664 (method url-fetch)
3665 (uri (string-append
3666 "https://pypi.python.org/packages/source/W/WarpedLMM/WarpedLMM-"
3667 version ".zip"))
3668 (sha256
3669 (base32
3670 "1agfz6zqa8nc6cw47yh0s3y14gkpa9wqazwcj7mwwj3ffnw39p3j"))))
3671 (build-system python-build-system)
3672 (arguments
54c85e12 3673 `(#:python ,python-2)) ; requires Python 2.7
c61fe02c
RW
3674 (propagated-inputs
3675 `(("python-scipy" ,python2-scipy)
3676 ("python-numpy" ,python2-numpy)
3677 ("python-matplotlib" ,python2-matplotlib)
3678 ("python-fastlmm" ,python2-fastlmm)
3679 ("python-pandas" ,python2-pandas)
3680 ("python-pysnptools" ,python2-pysnptools)))
3681 (native-inputs
f3b98f4f 3682 `(("python-mock" ,python2-mock)
c61fe02c
RW
3683 ("python-nose" ,python2-nose)
3684 ("unzip" ,unzip)))
3685 (home-page "https://github.com/PMBio/warpedLMM")
3686 (synopsis "Implementation of warped linear mixed models")
3687 (description
3688 "WarpedLMM is a Python implementation of the warped linear mixed model,
3689which automatically learns an optimal warping function (or transformation) for
3690the phenotype as it models the data.")
3691 (license license:asl2.0)))
3692
2c16316e 3693(define-public pbtranscript-tofu
698bd297 3694 (let ((commit "8f5467fe6a4472bcfb4226c8720993c8507adfe4"))
2c16316e
RW
3695 (package
3696 (name "pbtranscript-tofu")
698bd297 3697 (version (string-append "2.2.3." (string-take commit 7)))
2c16316e
RW
3698 (source (origin
3699 (method git-fetch)
3700 (uri (git-reference
3701 (url "https://github.com/PacificBiosciences/cDNA_primer.git")
3702 (commit commit)))
9a067efd 3703 (file-name (string-append name "-" version "-checkout"))
2c16316e
RW
3704 (sha256
3705 (base32
9a067efd
RW
3706 "1lgnpi35ihay42qx0b6yl3kkgra723i413j33kvs0kvs61h82w0f"))
3707 (modules '((guix build utils)))
3708 (snippet
3709 '(begin
3710 ;; remove bundled Cython sources
3711 (delete-file "pbtranscript-tofu/pbtranscript/Cython-0.20.1.tar.gz")
3712 #t))))
2c16316e
RW
3713 (build-system python-build-system)
3714 (arguments
3715 `(#:python ,python-2
cdc2bb50
MB
3716 ;; FIXME: Tests fail with "No such file or directory:
3717 ;; pbtools/pbtranscript/modified_bx_intervals/intersection_unique.so"
3718 #:tests? #f
2c16316e 3719 #:phases
9a067efd
RW
3720 (modify-phases %standard-phases
3721 (add-after 'unpack 'enter-directory
3722 (lambda _
3723 (chdir "pbtranscript-tofu/pbtranscript/")
3724 #t))
3725 ;; With setuptools version 18.0 and later this setup.py hack causes
3726 ;; a build error, so we disable it.
3727 (add-after 'enter-directory 'patch-setuppy
3728 (lambda _
3729 (substitute* "setup.py"
3730 (("if 'setuptools.extension' in sys.modules:")
3731 "if False:"))
3732 #t)))))
2c16316e 3733 (inputs
9a067efd 3734 `(("python-numpy" ,python2-numpy)
2c16316e 3735 ("python-bx-python" ,python2-bx-python)
c5372108
RW
3736 ("python-networkx" ,python2-networkx)
3737 ("python-scipy" ,python2-scipy)
9a067efd
RW
3738 ("python-pbcore" ,python2-pbcore)
3739 ("python-h5py" ,python2-h5py)))
2c16316e 3740 (native-inputs
9a067efd 3741 `(("python-cython" ,python2-cython)
f3b98f4f 3742 ("python-nose" ,python2-nose)))
2c16316e
RW
3743 (home-page "https://github.com/PacificBiosciences/cDNA_primer")
3744 (synopsis "Analyze transcriptome data generated with the Iso-Seq protocol")
3745 (description
3746 "pbtranscript-tofu contains scripts to analyze transcriptome data
3747generated using the PacBio Iso-Seq protocol.")
3748 (license license:bsd-3))))
3749
024130d2
BW
3750(define-public prank
3751 (package
3752 (name "prank")
3753 (version "150803")
3754 (source (origin
3755 (method url-fetch)
3756 (uri (string-append
3757 "http://wasabiapp.org/download/prank/prank.source."
3758 version ".tgz"))
3759 (sha256
3760 (base32
3761 "0am4z94fs3w2n5xpfls9zda61vq7qqz4q2i7b9hlsxz5q4j3kfm4"))))
3762 (build-system gnu-build-system)
3763 (arguments
3764 `(#:phases
3765 (modify-phases %standard-phases
3766 (add-after 'unpack 'enter-src-dir
3767 (lambda _
3768 (chdir "src")
3769 #t))
62d00095
EF
3770 (add-after 'unpack 'remove-m64-flag
3771 ;; Prank will build with the correct 'bit-ness' without this flag
3772 ;; and this allows building on 32-bit machines.
3773 (lambda _ (substitute* "src/Makefile"
3774 (("-m64") ""))
3775 #t))
024130d2
BW
3776 (delete 'configure)
3777 (replace 'install
3778 (lambda* (#:key outputs #:allow-other-keys)
3779 (let* ((out (assoc-ref outputs "out"))
3780 (bin (string-append out "/bin"))
3781 (man (string-append out "/share/man/man1"))
3782 (path (string-append
3783 (assoc-ref %build-inputs "mafft") "/bin:"
3784 (assoc-ref %build-inputs "exonerate") "/bin:"
3785 (assoc-ref %build-inputs "bppsuite") "/bin")))
3786 (install-file "prank" bin)
3787 (wrap-program (string-append bin "/prank")
3788 `("PATH" ":" prefix (,path)))
3789 (install-file "prank.1" man))
3790 #t)))))
3791 (inputs
3792 `(("mafft" ,mafft)
3793 ("exonerate" ,exonerate)
3794 ("bppsuite" ,bppsuite)))
3795 (home-page "http://wasabiapp.org/software/prank/")
3796 (synopsis "Probabilistic multiple sequence alignment program")
3797 (description
3798 "PRANK is a probabilistic multiple sequence alignment program for DNA,
3799codon and amino-acid sequences. It is based on a novel algorithm that treats
3800insertions correctly and avoids over-estimation of the number of deletion
3801events. In addition, PRANK borrows ideas from maximum likelihood methods used
3802in phylogenetics and correctly takes into account the evolutionary distances
3803between sequences. Lastly, PRANK allows for defining a potential structure
3804for sequences to be aligned and then, simultaneously with the alignment,
3805predicts the locations of structural units in the sequences.")
3806 (license license:gpl2+)))
3807
31a9d653
BW
3808(define-public proteinortho
3809 (package
3810 (name "proteinortho")
a7d12a9e 3811 (version "5.16")
31a9d653
BW
3812 (source
3813 (origin
3814 (method url-fetch)
3815 (uri
3816 (string-append
3817 "http://www.bioinf.uni-leipzig.de/Software/proteinortho/proteinortho_v"
3818 version "_src.tar.gz"))
3819 (sha256
3820 (base32
a7d12a9e 3821 "0z4f5cg0cs8ai62hfvp4q6w66q2phcc55nhs4xj5cyhxxivjv2ai"))))
31a9d653
BW
3822 (build-system gnu-build-system)
3823 (arguments
3824 `(#:test-target "test"
3825 #:phases
3826 (modify-phases %standard-phases
3827 (replace 'configure
3828 ;; There is no configure script, so we modify the Makefile directly.
3829 (lambda* (#:key outputs #:allow-other-keys)
3830 (substitute* "Makefile"
3831 (("INSTALLDIR=.*")
3832 (string-append
3833 "INSTALLDIR=" (assoc-ref outputs "out") "/bin\n")))
3834 #t))
3835 (add-before 'install 'make-install-directory
3836 ;; The install directory is not created during 'make install'.
3837 (lambda* (#:key outputs #:allow-other-keys)
3838 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
3839 #t))
3840 (add-after 'install 'wrap-programs
3841 (lambda* (#:key inputs outputs #:allow-other-keys)
3842 (let* ((path (getenv "PATH"))
3843 (out (assoc-ref outputs "out"))
3844 (binary (string-append out "/bin/proteinortho5.pl")))
3845 (wrap-program binary `("PATH" ":" prefix (,path))))
3846 #t)))))
3847 (inputs
3848 `(("perl" ,perl)
3849 ("python" ,python-2)
3850 ("blast+" ,blast+)))
3851 (home-page "http://www.bioinf.uni-leipzig.de/Software/proteinortho")
3852 (synopsis "Detect orthologous genes across species")
3853 (description
3854 "Proteinortho is a tool to detect orthologous genes across different
3855species. For doing so, it compares similarities of given gene sequences and
3856clusters them to find significant groups. The algorithm was designed to handle
3857large-scale data and can be applied to hundreds of species at once.")
3858 (license license:gpl2+)))
3859
846e3409
RW
3860(define-public pyicoteo
3861 (package
3862 (name "pyicoteo")
3863 (version "2.0.7")
3864 (source
3865 (origin
3866 (method url-fetch)
3867 (uri (string-append "https://bitbucket.org/regulatorygenomicsupf/"
3868 "pyicoteo/get/v" version ".tar.bz2"))
3869 (file-name (string-append name "-" version ".tar.bz2"))
3870 (sha256
3871 (base32
3872 "0d6087f29xp8wxwlj111c3sylli98n0l8ry58c51ixzq0zfm50wa"))))
3873 (build-system python-build-system)
3874 (arguments
3875 `(#:python ,python-2 ; does not work with Python 3
3876 #:tests? #f)) ; there are no tests
3877 (inputs
3878 `(("python2-matplotlib" ,python2-matplotlib)))
3879 (home-page "https://bitbucket.org/regulatorygenomicsupf/pyicoteo")
3880 (synopsis "Analyze high-throughput genetic sequencing data")
3881 (description
3882 "Pyicoteo is a suite of tools for the analysis of high-throughput genetic
3883sequencing data. It works with genomic coordinates. There are currently six
3884different command-line tools:
3885
3886@enumerate
3887@item pyicoregion: for generating exploratory regions automatically;
3888@item pyicoenrich: for differential enrichment between two conditions;
3889@item pyicoclip: for calling CLIP-Seq peaks without a control;
3890@item pyicos: for genomic coordinates manipulation;
3891@item pyicoller: for peak calling on punctuated ChIP-Seq;
3892@item pyicount: to count how many reads from N experiment files overlap in a
3893 region file;
3894@item pyicotrocol: to combine operations from pyicoteo.
3895@end enumerate\n")
3896 (license license:gpl3+)))
3897
af860475
BW
3898(define-public prodigal
3899 (package
3900 (name "prodigal")
e70f7a23 3901 (version "2.6.3")
af860475
BW
3902 (source (origin
3903 (method url-fetch)
3904 (uri (string-append
3905 "https://github.com/hyattpd/Prodigal/archive/v"
3906 version ".tar.gz"))
3907 (file-name (string-append name "-" version ".tar.gz"))
3908 (sha256
3909 (base32
e70f7a23 3910 "17srxkqd3jc77xk15pfbgg1a9xahqg7337w95mrsia7mpza4l2c9"))))
af860475
BW
3911 (build-system gnu-build-system)
3912 (arguments
3913 `(#:tests? #f ;no check target
3914 #:make-flags (list (string-append "INSTALLDIR="
3915 (assoc-ref %outputs "out")
3916 "/bin"))
3917 #:phases
3918 (modify-phases %standard-phases
3919 (delete 'configure))))
3920 (home-page "http://prodigal.ornl.gov")
3921 (synopsis "Protein-coding gene prediction for Archaea and Bacteria")
3922 (description
3923 "Prodigal runs smoothly on finished genomes, draft genomes, and
3924metagenomes, providing gene predictions in GFF3, Genbank, or Sequin table
3925format. It runs quickly, in an unsupervised fashion, handles gaps, handles
3926partial genes, and identifies translation initiation sites.")
3927 (license license:gpl3+)))
608dd932 3928
ceb62d54
BW
3929(define-public roary
3930 (package
3931 (name "roary")
5e0a0f42 3932 (version "3.7.0")
ceb62d54
BW
3933 (source
3934 (origin
3935 (method url-fetch)
3936 (uri (string-append
3937 "mirror://cpan/authors/id/A/AJ/AJPAGE/Bio-Roary-"
3938 version ".tar.gz"))
3939 (sha256
3940 (base32
5e0a0f42 3941 "0x2hpb3nfsc6x2nq1788w0fhqfzc7cn2dp4xwyva9m3k6xlz0m43"))))
ceb62d54
BW
3942 (build-system perl-build-system)
3943 (arguments
3944 `(#:phases
3945 (modify-phases %standard-phases
3946 (delete 'configure)
3947 (delete 'build)
3948 (replace 'check
3949 (lambda _
3950 ;; The tests are not run by default, so we run each test file
3951 ;; directly.
3952 (setenv "PATH" (string-append (getcwd) "/bin" ":"
3953 (getenv "PATH")))
3954 (setenv "PERL5LIB" (string-append (getcwd) "/lib" ":"
3955 (getenv "PERL5LIB")))
3956 (zero? (length (filter (lambda (file)
3957 (display file)(display "\n")
3958 (not (zero? (system* "perl" file))))
3959 (find-files "t" ".*\\.t$"))))))
3960 (replace 'install
3961 ;; There is no 'install' target in the Makefile.
3962 (lambda* (#:key outputs #:allow-other-keys)
3963 (let* ((out (assoc-ref outputs "out"))
3964 (bin (string-append out "/bin"))
3965 (perl (string-append out "/lib/perl5/site_perl"))
3966 (roary-plots "contrib/roary_plots"))
3967 (mkdir-p bin)
3968 (mkdir-p perl)
3969 (copy-recursively "bin" bin)
3970 (copy-recursively "lib" perl)
3971 #t)))
3972 (add-after 'install 'wrap-programs
3973 (lambda* (#:key inputs outputs #:allow-other-keys)
3974 (let* ((out (assoc-ref outputs "out"))
3975 (perl5lib (getenv "PERL5LIB"))
3976 (path (getenv "PATH")))
3977 (for-each (lambda (prog)
3978 (let ((binary (string-append out "/" prog)))
3979 (wrap-program binary
3980 `("PERL5LIB" ":" prefix
3981 (,(string-append perl5lib ":" out
3982 "/lib/perl5/site_perl"))))
3983 (wrap-program binary
3984 `("PATH" ":" prefix
3985 (,(string-append path ":" out "/bin"))))))
3986 (find-files "bin" ".*[^R]$"))
3987 (let ((file
3988 (string-append out "/bin/roary-create_pan_genome_plots.R"))
3989 (r-site-lib (getenv "R_LIBS_SITE"))
3990 (coreutils-path
3991 (string-append (assoc-ref inputs "coreutils") "/bin")))
3992 (wrap-program file
3993 `("R_LIBS_SITE" ":" prefix
3994 (,(string-append r-site-lib ":" out "/site-library/"))))
3995 (wrap-program file
3996 `("PATH" ":" prefix
3997 (,(string-append coreutils-path ":" out "/bin"))))))
3998 #t)))))
3999 (native-inputs
4000 `(("perl-env-path" ,perl-env-path)
4001 ("perl-test-files" ,perl-test-files)
4002 ("perl-test-most" ,perl-test-most)
4003 ("perl-test-output" ,perl-test-output)))
4004 (inputs
4005 `(("perl-array-utils" ,perl-array-utils)
4006 ("bioperl" ,bioperl-minimal)
4007 ("perl-exception-class" ,perl-exception-class)
4008 ("perl-file-find-rule" ,perl-file-find-rule)
4009 ("perl-file-grep" ,perl-file-grep)
4010 ("perl-file-slurper" ,perl-file-slurper)
4011 ("perl-file-which" ,perl-file-which)
4012 ("perl-graph" ,perl-graph)
4013 ("perl-graph-readwrite" ,perl-graph-readwrite)
4014 ("perl-log-log4perl" ,perl-log-log4perl)
4015 ("perl-moose" ,perl-moose)
4016 ("perl-perlio-utf8_strict" ,perl-perlio-utf8_strict)
4017 ("perl-text-csv" ,perl-text-csv)
4018 ("bedtools" ,bedtools)
4019 ("cd-hit" ,cd-hit)
4020 ("blast+" ,blast+)
4021 ("mcl" ,mcl)
4022 ("parallel" ,parallel)
4023 ("prank" ,prank)
4024 ("mafft" ,mafft)
4025 ("fasttree" ,fasttree)
4026 ("grep" ,grep)
4027 ("sed" ,sed)
4028 ("gawk" ,gawk)
2d7c4ae3 4029 ("r-minimal" ,r-minimal)
ceb62d54
BW
4030 ("r-ggplot2" ,r-ggplot2)
4031 ("coreutils" ,coreutils)))
4032 (home-page "http://sanger-pathogens.github.io/Roary")
4033 (synopsis "High speed stand-alone pan genome pipeline")
4034 (description
4035 "Roary is a high speed stand alone pan genome pipeline, which takes
4036annotated assemblies in GFF3 format (produced by the Prokka program) and
4037calculates the pan genome. Using a standard desktop PC, it can analyse
4038datasets with thousands of samples, without compromising the quality of the
4039results. 128 samples can be analysed in under 1 hour using 1 GB of RAM and a
4040single processor. Roary is not intended for metagenomics or for comparing
4041extremely diverse sets of genomes.")
4042 (license license:gpl3)))
4043
608dd932
BW
4044(define-public raxml
4045 (package
4046 (name "raxml")
e9e0fab0 4047 (version "8.2.10")
608dd932
BW
4048 (source
4049 (origin
4050 (method url-fetch)
4051 (uri
4052 (string-append
4053 "https://github.com/stamatak/standard-RAxML/archive/v"
4054 version ".tar.gz"))
4055 (file-name (string-append name "-" version ".tar.gz"))
4056 (sha256
4057 (base32
e9e0fab0 4058 "13s7aspfdcfr6asynwdg1x6vznys6pzap5f8wsffbnnwpkkg9ya8"))))
608dd932
BW
4059 (build-system gnu-build-system)
4060 (arguments
4061 `(#:tests? #f ; There are no tests.
4062 ;; Use 'standard' Makefile rather than SSE or AVX ones.
4063 #:make-flags (list "-f" "Makefile.HYBRID.gcc")
4064 #:phases
4065 (modify-phases %standard-phases
4066 (delete 'configure)
4067 (replace 'install
4068 (lambda* (#:key outputs #:allow-other-keys)
4069 (let* ((out (assoc-ref outputs "out"))
4070 (bin (string-append out "/bin"))
4071 (executable "raxmlHPC-HYBRID"))
4072 (install-file executable bin)
4073 (symlink (string-append bin "/" executable) "raxml"))
4074 #t)))))
4075 (inputs
4076 `(("openmpi" ,openmpi)))
4077 (home-page "http://sco.h-its.org/exelixis/web/software/raxml/index.html")
4078 (synopsis "Randomized Axelerated Maximum Likelihood phylogenetic trees")
4079 (description
4080 "RAxML is a tool for phylogenetic analysis and post-analysis of large
4081phylogenies.")
4082 (license license:gpl2+)))
af860475 4083
66e3eff1
RW
4084(define-public rsem
4085 (package
4086 (name "rsem")
4087 (version "1.2.20")
4088 (source
4089 (origin
4090 (method url-fetch)
4091 (uri
4092 (string-append "http://deweylab.biostat.wisc.edu/rsem/src/rsem-"
4093 version ".tar.gz"))
4094 (sha256
4095 (base32 "0nzdc0j0hjllhsd5f2xli95dafm3nawskigs140xzvjk67xh0r9q"))
fc1adab1 4096 (patches (search-patches "rsem-makefile.patch"))
66e3eff1
RW
4097 (modules '((guix build utils)))
4098 (snippet
4099 '(begin
4100 ;; remove bundled copy of boost
4101 (delete-file-recursively "boost")
4102 #t))))
4103 (build-system gnu-build-system)
4104 (arguments
4105 `(#:tests? #f ;no "check" target
4106 #:phases
4107 (modify-phases %standard-phases
4108 ;; No "configure" script.
4109 ;; Do not build bundled samtools library.
4110 (replace 'configure
4111 (lambda _
4112 (substitute* "Makefile"
4113 (("^all : sam/libbam.a") "all : "))
4114 #t))
4115 (replace 'install
4116 (lambda* (#:key outputs #:allow-other-keys)
4117 (let* ((out (string-append (assoc-ref outputs "out")))
4118 (bin (string-append out "/bin/"))
4119 (perl (string-append out "/lib/perl5/site_perl")))
4120 (mkdir-p bin)
4121 (mkdir-p perl)
4122 (for-each (lambda (file)
f3860753 4123 (install-file file bin))
66e3eff1 4124 (find-files "." "rsem-.*"))
f3860753 4125 (install-file "rsem_perl_utils.pm" perl))
66e3eff1
RW
4126 #t))
4127 (add-after
4128 'install 'wrap-program
4129 (lambda* (#:key outputs #:allow-other-keys)
4130 (let ((out (assoc-ref outputs "out")))
4131 (for-each (lambda (prog)
4132 (wrap-program (string-append out "/bin/" prog)
4133 `("PERL5LIB" ":" prefix
4134 (,(string-append out "/lib/perl5/site_perl")))))
4135 '("rsem-plot-transcript-wiggles"
4136 "rsem-calculate-expression"
4137 "rsem-generate-ngvector"
4138 "rsem-run-ebseq"
4139 "rsem-prepare-reference")))
4140 #t)))))
4141 (inputs
4142 `(("boost" ,boost)
4143 ("ncurses" ,ncurses)
2d7c4ae3 4144 ("r-minimal" ,r-minimal)
66e3eff1
RW
4145 ("perl" ,perl)
4146 ("samtools" ,samtools-0.1)
4147 ("zlib" ,zlib)))
4148 (home-page "http://deweylab.biostat.wisc.edu/rsem/")
4149 (synopsis "Estimate gene expression levels from RNA-Seq data")
4150 (description
4151 "RSEM is a software package for estimating gene and isoform expression
4152levels from RNA-Seq data. The RSEM package provides a user-friendly
4153interface, supports threads for parallel computation of the EM algorithm,
4154single-end and paired-end read data, quality scores, variable-length reads and
4155RSPD estimation. In addition, it provides posterior mean and 95% credibility
4156interval estimates for expression levels. For visualization, it can generate
4157BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.")
4158 (license license:gpl3+)))
4159
8622a072
RW
4160(define-public rseqc
4161 (package
4162 (name "rseqc")
4163 (version "2.6.1")
4164 (source
4165 (origin
4166 (method url-fetch)
4167 (uri
4168 (string-append "mirror://sourceforge/rseqc/"
de67e922 4169 "RSeQC-" version ".tar.gz"))
8622a072 4170 (sha256
8214b7fb 4171 (base32 "15ly0254yi032qzkdplg00q144qfdsd986gh62829rl5bkxhj330"))
8622a072
RW
4172 (modules '((guix build utils)))
4173 (snippet
4174 '(begin
4175 ;; remove bundled copy of pysam
4176 (delete-file-recursively "lib/pysam")
4177 (substitute* "setup.py"
4178 ;; remove dependency on outdated "distribute" module
4179 (("^from distribute_setup import use_setuptools") "")
4180 (("^use_setuptools\\(\\)") "")
4181 ;; do not use bundled copy of pysam
4182 (("^have_pysam = False") "have_pysam = True"))))))
4183 (build-system python-build-system)
4184 (arguments `(#:python ,python-2))
4185 (inputs
4186 `(("python-cython" ,python2-cython)
4187 ("python-pysam" ,python2-pysam)
4188 ("python-numpy" ,python2-numpy)
8622a072
RW
4189 ("zlib" ,zlib)))
4190 (native-inputs
4191 `(("python-nose" ,python2-nose)))
4192 (home-page "http://rseqc.sourceforge.net/")
4193 (synopsis "RNA-seq quality control package")
4194 (description
4195 "RSeQC provides a number of modules that can comprehensively evaluate
4196high throughput sequence data, especially RNA-seq data. Some basic modules
4197inspect sequence quality, nucleotide composition bias, PCR bias and GC bias,
4198while RNA-seq specific modules evaluate sequencing saturation, mapped reads
4199distribution, coverage uniformity, strand specificity, etc.")
4200 (license license:gpl3+)))
4201
ec946638
RW
4202(define-public seek
4203 ;; There are no release tarballs. According to the installation
4204 ;; instructions at http://seek.princeton.edu/installation.jsp, the latest
4205 ;; stable release is identified by this changeset ID.
4206 (let ((changeset "2329130")
4207 (revision "1"))
4208 (package
4209 (name "seek")
4210 (version (string-append "0-" revision "." changeset))
4211 (source (origin
4212 (method hg-fetch)
4213 (uri (hg-reference
4214 (url "https://bitbucket.org/libsleipnir/sleipnir")
4215 (changeset changeset)))
4216 (sha256
4217 (base32
4218 "0qrvilwh18dpbhkf92qvxbmay0j75ra3jg2wrhz67gf538zzphsx"))))
4219 (build-system gnu-build-system)
4220 (arguments
4221 `(#:modules ((srfi srfi-1)
4222 (guix build gnu-build-system)
4223 (guix build utils))
4224 #:phases
4225 (let ((dirs '("SeekMiner"
4226 "SeekEvaluator"
4227 "SeekPrep"
4228 "Distancer"
4229 "Data2DB"
4230 "PCL2Bin")))
4231 (modify-phases %standard-phases
4232 (add-before 'configure 'bootstrap
4233 (lambda _
4234 (zero? (system* "bash" "gen_auto"))))
4235 (add-after 'build 'build-additional-tools
4236 (lambda* (#:key make-flags #:allow-other-keys)
4237 (every (lambda (dir)
4238 (with-directory-excursion (string-append "tools/" dir)
4239 (zero? (apply system* "make" make-flags))))
4240 dirs)))
4241 (add-after 'install 'install-additional-tools
4242 (lambda* (#:key make-flags #:allow-other-keys)
4243 (fold (lambda (dir result)
4244 (with-directory-excursion (string-append "tools/" dir)
4245 (and result
4246 (zero? (apply system*
4247 `("make" ,@make-flags "install"))))))
4248 #t dirs)))))))
4249 (inputs
4250 `(("gsl" ,gsl)
4251 ("boost" ,boost)
4252 ("libsvm" ,libsvm)
4253 ("readline" ,readline)
4254 ("gengetopt" ,gengetopt)
4255 ("log4cpp" ,log4cpp)))
4256 (native-inputs
4257 `(("autoconf" ,autoconf)
4258 ("automake" ,automake)
4259 ("perl" ,perl)))
4260 (home-page "http://seek.princeton.edu")
4261 (synopsis "Gene co-expression search engine")
4262 (description
4263 "SEEK is a computational gene co-expression search engine. SEEK provides
4264biologists with a way to navigate the massive human expression compendium that
4265now contains thousands of expression datasets. SEEK returns a robust ranking
4266of co-expressed genes in the biological area of interest defined by the user's
4267query genes. It also prioritizes thousands of expression datasets according
4268to the user's query of interest.")
4269 (license license:cc-by3.0))))
4270
4e10a221
RW
4271(define-public samtools
4272 (package
4273 (name "samtools")
79b555ed 4274 (version "1.3.1")
4e10a221
RW
4275 (source
4276 (origin
4277 (method url-fetch)
4278 (uri
de67e922 4279 (string-append "mirror://sourceforge/samtools/samtools/"
4e10a221
RW
4280 version "/samtools-" version ".tar.bz2"))
4281 (sha256
4282 (base32
79b555ed 4283 "0znnnxc467jbf1as2dpskrjhfh8mbll760j6w6rdkwlwbqsp8gbc"))))
4e10a221
RW
4284 (build-system gnu-build-system)
4285 (arguments
c4473411 4286 `(#:modules ((ice-9 ftw)
5bdda30b
RW
4287 (ice-9 regex)
4288 (guix build gnu-build-system)
4289 (guix build utils))
c4473411
RW
4290 #:make-flags (list (string-append "prefix=" (assoc-ref %outputs "out")))
4291 #:configure-flags (list "--with-ncurses")
4e10a221
RW
4292 #:phases
4293 (alist-cons-after
c4473411
RW
4294 'unpack 'patch-tests
4295 (lambda _
4296 (substitute* "test/test.pl"
4297 ;; The test script calls out to /bin/bash
4298 (("/bin/bash") (which "bash")))
4299 #t)
41dd7126
RW
4300 (alist-cons-after
4301 'install 'install-library
4302 (lambda* (#:key outputs #:allow-other-keys)
4303 (let ((lib (string-append (assoc-ref outputs "out") "/lib")))
b2d748ed
TGR
4304 (install-file "libbam.a" lib)
4305 #t))
5bdda30b
RW
4306 (alist-cons-after
4307 'install 'install-headers
4308 (lambda* (#:key outputs #:allow-other-keys)
4309 (let ((include (string-append (assoc-ref outputs "out")
4310 "/include/samtools/")))
5bdda30b 4311 (for-each (lambda (file)
96c46210 4312 (install-file file include))
5bdda30b
RW
4313 (scandir "." (lambda (name) (string-match "\\.h$" name))))
4314 #t))
c4473411 4315 %standard-phases)))))
4e10a221
RW
4316 (native-inputs `(("pkg-config" ,pkg-config)))
4317 (inputs `(("ncurses" ,ncurses)
4318 ("perl" ,perl)
4319 ("python" ,python)
4320 ("zlib" ,zlib)))
4321 (home-page "http://samtools.sourceforge.net")
4322 (synopsis "Utilities to efficiently manipulate nucleotide sequence alignments")
4323 (description
4324 "Samtools implements various utilities for post-processing nucleotide
4325sequence alignments in the SAM, BAM, and CRAM formats, including indexing,
4326variant calling (in conjunction with bcftools), and a simple alignment
4327viewer.")
4328 (license license:expat)))
d3517eda 4329
0b84a0aa
RW
4330(define-public samtools-0.1
4331 ;; This is the most recent version of the 0.1 line of samtools. The input
4332 ;; and output formats differ greatly from that used and produced by samtools
4333 ;; 1.x and is still used in many bioinformatics pipelines.
4334 (package (inherit samtools)
4335 (version "0.1.19")
4336 (source
4337 (origin
4338 (method url-fetch)
4339 (uri
de67e922 4340 (string-append "mirror://sourceforge/samtools/samtools/"
0b84a0aa
RW
4341 version "/samtools-" version ".tar.bz2"))
4342 (sha256
4343 (base32 "1m33xsfwz0s8qi45lylagfllqg7fphf4dr0780rsvw75av9wk06h"))))
4344 (arguments
2309ed68
RW
4345 `(#:tests? #f ;no "check" target
4346 ,@(substitute-keyword-arguments (package-arguments samtools)
4347 ((#:make-flags flags)
4348 `(cons "LIBCURSES=-lncurses" ,flags))
4349 ((#:phases phases)
4350 `(modify-phases ,phases
4351 (replace 'install
4352 (lambda* (#:key outputs #:allow-other-keys)
4353 (let ((bin (string-append
4354 (assoc-ref outputs "out") "/bin")))
4355 (mkdir-p bin)
f3860753 4356 (install-file "samtools" bin)
b2d748ed 4357 #t)))
2309ed68
RW
4358 (delete 'patch-tests)
4359 (delete 'configure))))))))
0b84a0aa 4360
fe4c37c2 4361(define-public mosaik
698bd297 4362 (let ((commit "5c25216d3522d6a33e53875cd76a6d65001e4e67"))
fe4c37c2
RW
4363 (package
4364 (name "mosaik")
4365 (version "2.2.30")
4366 (source (origin
4367 ;; There are no release tarballs nor tags.
4368 (method git-fetch)
4369 (uri (git-reference
4370 (url "https://github.com/wanpinglee/MOSAIK.git")
4371 (commit commit)))
4372 (file-name (string-append name "-" version))
4373 (sha256
4374 (base32
4375 "17gj3s07cm77r41z92awh0bim7w7q7fbn0sf5nkqmcm1vw052qgw"))))
4376 (build-system gnu-build-system)
4377 (arguments
4378 `(#:tests? #f ; no tests
4379 #:make-flags (list "CC=gcc")
4380 #:phases
4381 (modify-phases %standard-phases
4382 (replace 'configure
4383 (lambda _ (chdir "src") #t))
4384 (replace 'install
4385 (lambda* (#:key outputs #:allow-other-keys)
4386 (let ((bin (string-append (assoc-ref outputs "out")
4387 "/bin")))
4388 (mkdir-p bin)
4389 (copy-recursively "../bin" bin)
4390 #t))))))
4391 (inputs
4392 `(("perl" ,perl)
4393 ("zlib" ,zlib)))
029d9f77 4394 (supported-systems '("x86_64-linux"))
0c6c9c00 4395 (home-page "https://github.com/wanpinglee/MOSAIK")
fe4c37c2
RW
4396 (synopsis "Map nucleotide sequence reads to reference genomes")
4397 (description
4398 "MOSAIK is a program for mapping second and third-generation sequencing
4399reads to a reference genome. MOSAIK can align reads generated by all the
4400major sequencing technologies, including Illumina, Applied Biosystems SOLiD,
4401Roche 454, Ion Torrent and Pacific BioSciences SMRT.")
4402 ;; MOSAIK is released under the GPLv2+ with the exception of third-party
4403 ;; code released into the public domain:
4404 ;; 1. fastlz by Ariya Hidayat - http://www.fastlz.org/
4405 ;; 2. MD5 implementation - RSA Data Security, RFC 1321
4406 (license (list license:gpl2+ license:public-domain)))))
4407
282c5087
RW
4408(define-public ngs-sdk
4409 (package
4410 (name "ngs-sdk")
6c4ccf32 4411 (version "1.3.0")
282c5087
RW
4412 (source
4413 (origin
4414 (method url-fetch)
4415 (uri
4416 (string-append "https://github.com/ncbi/ngs/archive/"
4417 version ".tar.gz"))
4418 (file-name (string-append name "-" version ".tar.gz"))
4419 (sha256
4420 (base32
6c4ccf32 4421 "1wiyf4c6nm2j87pv015cbi0qny5byf3pbvcw3likifz5dl56ag40"))))
282c5087
RW
4422 (build-system gnu-build-system)
4423 (arguments
4424 `(#:parallel-build? #f ; not supported
4425 #:tests? #f ; no "check" target
4426 #:phases
4427 (alist-replace
4428 'configure
4429 (lambda* (#:key outputs #:allow-other-keys)
4430 (let ((out (assoc-ref outputs "out")))
282c5087
RW
4431 ;; The 'configure' script doesn't recognize things like
4432 ;; '--enable-fast-install'.
4433 (zero? (system* "./configure"
4434 (string-append "--build-prefix=" (getcwd) "/build")
4435 (string-append "--prefix=" out)))))
4436 (alist-cons-after
4437 'unpack 'enter-dir
4438 (lambda _ (chdir "ngs-sdk") #t)
4439 %standard-phases))))
4440 (native-inputs `(("perl" ,perl)))
a0dadf0c
AE
4441 ;; According to the test
4442 ;; unless ($MARCH =~ /x86_64/i || $MARCH =~ /i?86/i)
4443 ;; in ngs-sdk/setup/konfigure.perl
ab29be81 4444 (supported-systems '("i686-linux" "x86_64-linux"))
282c5087
RW
4445 (home-page "https://github.com/ncbi/ngs")
4446 (synopsis "API for accessing Next Generation Sequencing data")
4447 (description
4448 "NGS is a domain-specific API for accessing reads, alignments and pileups
4449produced from Next Generation Sequencing. The API itself is independent from
4450any particular back-end implementation, and supports use of multiple back-ends
4451simultaneously.")
4452 (license license:public-domain)))
4453
1ad15c16 4454(define-public java-ngs
2651a5e6 4455 (package (inherit ngs-sdk)
1ad15c16 4456 (name "java-ngs")
2651a5e6
RW
4457 (arguments
4458 `(,@(substitute-keyword-arguments
4459 `(#:modules ((guix build gnu-build-system)
4460 (guix build utils)
4461 (srfi srfi-1)
4462 (srfi srfi-26))
4463 ,@(package-arguments ngs-sdk))
4464 ((#:phases phases)
614a8977
RW
4465 `(modify-phases ,phases
4466 (replace 'enter-dir (lambda _ (chdir "ngs-java") #t)))))))
2651a5e6 4467 (inputs
d2540f80 4468 `(("jdk" ,icedtea "jdk")
2651a5e6
RW
4469 ("ngs-sdk" ,ngs-sdk)))
4470 (synopsis "Java bindings for NGS SDK")))
4471
75dd2424
RW
4472(define-public ncbi-vdb
4473 (package
4474 (name "ncbi-vdb")
5021f547 4475 (version "2.8.2")
75dd2424
RW
4476 (source
4477 (origin
4478 (method url-fetch)
4479 (uri
4480 (string-append "https://github.com/ncbi/ncbi-vdb/archive/"
4481 version ".tar.gz"))
4482 (file-name (string-append name "-" version ".tar.gz"))
4483 (sha256
4484 (base32
5021f547 4485 "1acn4bv81mfl137qnbn9995mjjhwd36pm0b7qli1iw5skrxa9j8m"))))
75dd2424
RW
4486 (build-system gnu-build-system)
4487 (arguments
4488 `(#:parallel-build? #f ; not supported
4489 #:tests? #f ; no "check" target
4490 #:phases
70111198
RW
4491 (modify-phases %standard-phases
4492 (replace 'configure
4493 (lambda* (#:key inputs outputs #:allow-other-keys)
4494 (let ((out (assoc-ref outputs "out")))
4495 ;; Override include path for libmagic
4496 (substitute* "setup/package.prl"
4497 (("name => 'magic', Include => '/usr/include'")
4498 (string-append "name=> 'magic', Include => '"
4499 (assoc-ref inputs "libmagic")
4500 "/include" "'")))
75dd2424 4501
70111198
RW
4502 ;; Install kdf5 library (needed by sra-tools)
4503 (substitute* "build/Makefile.install"
4504 (("LIBRARIES_TO_INSTALL =")
4505 "LIBRARIES_TO_INSTALL = kdf5.$(VERSION_LIBX) kdf5.$(VERSION_SHLX)"))
75dd2424 4506
70111198
RW
4507 (substitute* "build/Makefile.env"
4508 (("CFLAGS =" prefix)
4509 (string-append prefix "-msse2 ")))
675d7ae2 4510
558e2307
RW
4511 ;; Override search path for ngs-java
4512 (substitute* "setup/package.prl"
4513 (("/usr/local/ngs/ngs-java")
4514 (assoc-ref inputs "java-ngs")))
4515
70111198
RW
4516 ;; The 'configure' script doesn't recognize things like
4517 ;; '--enable-fast-install'.
4518 (zero? (system*
4519 "./configure"
4520 (string-append "--build-prefix=" (getcwd) "/build")
4521 (string-append "--prefix=" (assoc-ref outputs "out"))
4522 (string-append "--debug")
4523 (string-append "--with-xml2-prefix="
4524 (assoc-ref inputs "libxml2"))
4525 (string-append "--with-ngs-sdk-prefix="
4526 (assoc-ref inputs "ngs-sdk"))
70111198
RW
4527 (string-append "--with-hdf5-prefix="
4528 (assoc-ref inputs "hdf5")))))))
4529 (add-after 'install 'install-interfaces
4530 (lambda* (#:key outputs #:allow-other-keys)
4531 ;; Install interface libraries. On i686 the interface libraries
4532 ;; are installed to "linux/gcc/i386", so we need to use the Linux
4533 ;; architecture name ("i386") instead of the target system prefix
4534 ;; ("i686").
4535 (mkdir (string-append (assoc-ref outputs "out") "/ilib"))
4536 (copy-recursively (string-append "build/ncbi-vdb/linux/gcc/"
4537 ,(system->linux-architecture
4538 (or (%current-target-system)
4539 (%current-system)))
4540 "/rel/ilib")
4541 (string-append (assoc-ref outputs "out")
4542 "/ilib"))
4543 ;; Install interface headers
4544 (copy-recursively "interfaces"
4545 (string-append (assoc-ref outputs "out")
4546 "/include"))
d5e17162
RW
4547 #t))
4548 ;; These files are needed by sra-tools.
4549 (add-after 'install 'install-configuration-files
4550 (lambda* (#:key outputs #:allow-other-keys)
4551 (let ((target (string-append (assoc-ref outputs "out") "/kfg")))
4552 (mkdir target)
4553 (install-file "libs/kfg/default.kfg" target)
4554 (install-file "libs/kfg/certs.kfg" target))
70111198 4555 #t)))))
75dd2424
RW
4556 (inputs
4557 `(("libxml2" ,libxml2)
4558 ("ngs-sdk" ,ngs-sdk)
1ad15c16 4559 ("java-ngs" ,java-ngs)
75dd2424
RW
4560 ("libmagic" ,file)
4561 ("hdf5" ,hdf5)))
4562 (native-inputs `(("perl" ,perl)))
675d7ae2
RW
4563 ;; NCBI-VDB requires SSE capability.
4564 (supported-systems '("i686-linux" "x86_64-linux"))
75dd2424
RW
4565 (home-page "https://github.com/ncbi/ncbi-vdb")
4566 (synopsis "Database engine for genetic information")
4567 (description
4568 "The NCBI-VDB library implements a highly compressed columnar data
4569warehousing engine that is most often used to store genetic information.
4570Databases are stored in a portable image within the file system, and can be
4571accessed/downloaded on demand across HTTP.")
4572 (license license:public-domain)))
4573
cc6ed477
RW
4574(define-public plink
4575 (package
4576 (name "plink")
4577 (version "1.07")
4578 (source
4579 (origin
4580 (method url-fetch)
4581 (uri (string-append
4582 "http://pngu.mgh.harvard.edu/~purcell/plink/dist/plink-"
4583 version "-src.zip"))
4584 (sha256
4585 (base32 "0as8gxm4pjyc8dxmm1sl873rrd7wn5qs0l29nqfnl31x8i467xaa"))
0dbb7ac2
EF
4586 (patches (search-patches "plink-1.07-unclobber-i.patch"
4587 "plink-endian-detection.patch"))))
cc6ed477
RW
4588 (build-system gnu-build-system)
4589 (arguments
4590 '(#:tests? #f ;no "check" target
4591 #:make-flags (list (string-append "LIB_LAPACK="
4592 (assoc-ref %build-inputs "lapack")
4593 "/lib/liblapack.so")
4594 "WITH_LAPACK=1"
4595 "FORCE_DYNAMIC=1"
4596 ;; disable phoning home
4597 "WITH_WEBCHECK=")
4598 #:phases
4599 (modify-phases %standard-phases
4600 ;; no "configure" script
4601 (delete 'configure)
4602 (replace 'install
4603 (lambda* (#:key outputs #:allow-other-keys)
4604 (let ((bin (string-append (assoc-ref outputs "out")
4605 "/bin/")))
96c46210 4606 (install-file "plink" bin)
cc6ed477
RW
4607 #t))))))
4608 (inputs
4609 `(("zlib" ,zlib)
4610 ("lapack" ,lapack)))
4611 (native-inputs
4612 `(("unzip" ,unzip)))
4613 (home-page "http://pngu.mgh.harvard.edu/~purcell/plink/")
4614 (synopsis "Whole genome association analysis toolset")
4615 (description
4616 "PLINK is a whole genome association analysis toolset, designed to
4617perform a range of basic, large-scale analyses in a computationally efficient
4618manner. The focus of PLINK is purely on analysis of genotype/phenotype data,
4619so there is no support for steps prior to this (e.g. study design and
4620planning, generating genotype or CNV calls from raw data). Through
4621integration with gPLINK and Haploview, there is some support for the
4622subsequent visualization, annotation and storage of results.")
4623 ;; Code is released under GPLv2, except for fisher.h, which is under
4624 ;; LGPLv2.1+
4625 (license (list license:gpl2 license:lgpl2.1+))))
4626
c6a24d6e
RW
4627(define-public smithlab-cpp
4628 (let ((revision "1")
698bd297 4629 (commit "728a097bec88c6f4b8528b685932049e660eff2e"))
c6a24d6e
RW
4630 (package
4631 (name "smithlab-cpp")
698bd297 4632 (version (string-append "0." revision "." (string-take commit 7)))
c6a24d6e
RW
4633 (source (origin
4634 (method git-fetch)
4635 (uri (git-reference
4636 (url "https://github.com/smithlabcode/smithlab_cpp.git")
4637 (commit commit)))
4638 (file-name (string-append name "-" version "-checkout"))
4639 (sha256
4640 (base32
4641 "0d476lmj312xk77kr9fzrv7z1bv96yfyx0w7y62ycmnfbx32ll74"))))
4642 (build-system gnu-build-system)
4643 (arguments
4644 `(#:modules ((guix build gnu-build-system)
4645 (guix build utils)
4646 (srfi srfi-26))
4647 #:tests? #f ;no "check" target
4648 #:phases
4649 (modify-phases %standard-phases
4650 (add-after 'unpack 'use-samtools-headers
4651 (lambda _
4652 (substitute* '("SAM.cpp"
4653 "SAM.hpp")
4654 (("sam.h") "samtools/sam.h"))
4655 #t))
4656 (replace 'install
4657 (lambda* (#:key outputs #:allow-other-keys)
4658 (let* ((out (assoc-ref outputs "out"))
4659 (lib (string-append out "/lib"))
4660 (include (string-append out "/include/smithlab-cpp")))
4661 (mkdir-p lib)
4662 (mkdir-p include)
4663 (for-each (cut install-file <> lib)
4664 (find-files "." "\\.o$"))
4665 (for-each (cut install-file <> include)
4666 (find-files "." "\\.hpp$")))
4667 #t))
4668 (delete 'configure))))
4669 (inputs
4670 `(("samtools" ,samtools-0.1)
4671 ("zlib" ,zlib)))
4672 (home-page "https://github.com/smithlabcode/smithlab_cpp")
4673 (synopsis "C++ helper library for functions used in Smith lab projects")
4674 (description
4675 "Smithlab CPP is a C++ library that includes functions used in many of
4676the Smith lab bioinformatics projects, such as a wrapper around Samtools data
4677structures, classes for genomic regions, mapped sequencing reads, etc.")
4678 (license license:gpl3+))))
4679
56e373ef
RW
4680(define-public preseq
4681 (package
4682 (name "preseq")
b49c5a58 4683 (version "2.0")
56e373ef
RW
4684 (source (origin
4685 (method url-fetch)
b49c5a58
RW
4686 (uri (string-append "https://github.com/smithlabcode/"
4687 "preseq/archive/v" version ".tar.gz"))
4688 (file-name (string-append name "-" version ".tar.gz"))
56e373ef 4689 (sha256
b49c5a58 4690 (base32 "08r684l50pnxjpvmhzjgqq56yv9rfw90k8vx0nsrnrzk8mf9hsdq"))
56e373ef
RW
4691 (modules '((guix build utils)))
4692 (snippet
4693 ;; Remove bundled samtools.
b49c5a58 4694 '(delete-file-recursively "samtools"))))
56e373ef
RW
4695 (build-system gnu-build-system)
4696 (arguments
4697 `(#:tests? #f ;no "check" target
4698 #:phases
4699 (modify-phases %standard-phases
56e373ef 4700 (delete 'configure))
b49c5a58
RW
4701 #:make-flags
4702 (list (string-append "PREFIX="
4703 (assoc-ref %outputs "out"))
4704 (string-append "LIBBAM="
4705 (assoc-ref %build-inputs "samtools")
4706 "/lib/libbam.a")
4707 (string-append "SMITHLAB_CPP="
4708 (assoc-ref %build-inputs "smithlab-cpp")
4709 "/lib")
4710 "PROGS=preseq"
4711 "INCLUDEDIRS=$(SMITHLAB_CPP)/../include/smithlab-cpp $(SAMTOOLS_DIR)")))
56e373ef
RW
4712 (inputs
4713 `(("gsl" ,gsl)
4714 ("samtools" ,samtools-0.1)
b49c5a58 4715 ("smithlab-cpp" ,smithlab-cpp)
56e373ef
RW
4716 ("zlib" ,zlib)))
4717 (home-page "http://smithlabresearch.org/software/preseq/")
4718 (synopsis "Program for analyzing library complexity")
4719 (description
4720 "The preseq package is aimed at predicting and estimating the complexity
4721of a genomic sequencing library, equivalent to predicting and estimating the
4722number of redundant reads from a given sequencing depth and how many will be
4723expected from additional sequencing using an initial sequencing experiment.
4724The estimates can then be used to examine the utility of further sequencing,
4725optimize the sequencing depth, or to screen multiple libraries to avoid low
4726complexity samples.")
4727 (license license:gpl3+)))
4728
9ded1457
BW
4729(define-public python-screed
4730 (package
4731 (name "python-screed")
4732 (version "0.9")
4733 (source
4734 (origin
4735 (method url-fetch)
4736 (uri (pypi-uri "screed" version))
4737 (sha256
4738 (base32
4739 "18czszp9fkx3j6jr7y5kp6dfialscgddk05mw1zkhh2zhn0jd8i0"))))
4740 (build-system python-build-system)
4741 (arguments
4742 `(#:phases
4743 (modify-phases %standard-phases
4744 (replace 'check
4745 (lambda _
4746 (setenv "PYTHONPATH"
4747 (string-append (getenv "PYTHONPATH") ":."))
4748 (zero? (system* "nosetests" "--attr" "!known_failing")))))))
4749 (native-inputs
4750 `(("python-nose" ,python-nose)))
4751 (inputs
4752 `(("python-bz2file" ,python-bz2file)))
7bf837fd 4753 (home-page "https://github.com/dib-lab/screed/")
9ded1457
BW
4754 (synopsis "Short read sequence database utilities")
4755 (description "Screed parses FASTA and FASTQ files and generates databases.
4756Values such as sequence name, sequence description, sequence quality and the
4757sequence itself can be retrieved from these databases.")
4758 (license license:bsd-3)))
4759
4760(define-public python2-screed
5c31f4aa 4761 (package-with-python2 python-screed))
9ded1457 4762
51c64999
RW
4763(define-public sra-tools
4764 (package
4765 (name "sra-tools")
79849358 4766 (version "2.8.2-1")
51c64999
RW
4767 (source
4768 (origin
4769 (method url-fetch)
4770 (uri
4771 (string-append "https://github.com/ncbi/sra-tools/archive/"
4772 version ".tar.gz"))
4773 (file-name (string-append name "-" version ".tar.gz"))
4774 (sha256
4775 (base32
79849358 4776 "1camsijmvv2s45mb4iyf44ghl4gkd4rl0viphpcgl3ccchy32a0g"))))
51c64999
RW
4777 (build-system gnu-build-system)
4778 (arguments
4779 `(#:parallel-build? #f ; not supported
4780 #:tests? #f ; no "check" target
2320e76b 4781 #:make-flags
79849358
RW
4782 (list (string-append "DEFAULT_CRT="
4783 (assoc-ref %build-inputs "ncbi-vdb")
4784 "/kfg/certs.kfg")
4785 (string-append "DEFAULT_KFG="
4786 (assoc-ref %build-inputs "ncbi-vdb")
4787 "/kfg/default.kfg")
4788 (string-append "VDB_LIBDIR="
2320e76b
RW
4789 (assoc-ref %build-inputs "ncbi-vdb")
4790 ,(if (string-prefix? "x86_64"
4791 (or (%current-target-system)
4792 (%current-system)))
4793 "/lib64"
4794 "/lib32")))
51c64999 4795 #:phases
beebe431
RW
4796 (modify-phases %standard-phases
4797 (replace 'configure
4798 (lambda* (#:key inputs outputs #:allow-other-keys)
4799 ;; The build system expects a directory containing the sources and
4800 ;; raw build output of ncbi-vdb, including files that are not
4801 ;; installed. Since we are building against an installed version of
4802 ;; ncbi-vdb, the following modifications are needed.
4803 (substitute* "setup/konfigure.perl"
4804 ;; Make the configure script look for the "ilib" directory of
4805 ;; "ncbi-vdb" without first checking for the existence of a
4806 ;; matching library in its "lib" directory.
4807 (("^ my \\$f = File::Spec->catdir\\(\\$libdir, \\$lib\\);")
4808 "my $f = File::Spec->catdir($ilibdir, $ilib);")
4809 ;; Look for interface libraries in ncbi-vdb's "ilib" directory.
4810 (("my \\$ilibdir = File::Spec->catdir\\(\\$builddir, 'ilib'\\);")
4811 "my $ilibdir = File::Spec->catdir($dir, 'ilib');"))
51c64999 4812
beebe431
RW
4813 ;; Dynamic linking
4814 (substitute* "tools/copycat/Makefile"
4815 (("smagic-static") "lmagic"))
2320e76b 4816
beebe431
RW
4817 ;; The 'configure' script doesn't recognize things like
4818 ;; '--enable-fast-install'.
4819 (zero? (system*
4820 "./configure"
4821 (string-append "--build-prefix=" (getcwd) "/build")
4822 (string-append "--prefix=" (assoc-ref outputs "out"))
4823 (string-append "--debug")
4824 (string-append "--with-fuse-prefix="
4825 (assoc-ref inputs "fuse"))
4826 (string-append "--with-magic-prefix="
4827 (assoc-ref inputs "libmagic"))
4828 ;; TODO: building with libxml2 fails with linker errors
4829 ;; (string-append "--with-xml2-prefix="
4830 ;; (assoc-ref inputs "libxml2"))
4831 (string-append "--with-ncbi-vdb-sources="
4832 (assoc-ref inputs "ncbi-vdb"))
4833 (string-append "--with-ncbi-vdb-build="
4834 (assoc-ref inputs "ncbi-vdb"))
4835 (string-append "--with-ngs-sdk-prefix="
4836 (assoc-ref inputs "ngs-sdk"))
4837 (string-append "--with-hdf5-prefix="
1108a920
RW
4838 (assoc-ref inputs "hdf5"))))))
4839 ;; This version of sra-tools fails to build with glibc because of a
4840 ;; naming conflict. glibc-2.25/include/bits/mathcalls.h already
4841 ;; contains a definition of "canonicalize", so we rename it.
4842 ;;
4843 ;; See upstream bug report:
4844 ;; https://github.com/ncbi/sra-tools/issues/67
4845 (add-after 'unpack 'patch-away-glibc-conflict
4846 (lambda _
4847 (substitute* "tools/bam-loader/bam.c"
4848 (("canonicalize\\(" line)
4849 (string-append "sra_tools_" line)))
4850 #t)))))
51c64999
RW
4851 (native-inputs `(("perl" ,perl)))
4852 (inputs
4853 `(("ngs-sdk" ,ngs-sdk)
4854 ("ncbi-vdb" ,ncbi-vdb)
4855 ("libmagic" ,file)
4856 ("fuse" ,fuse)
4857 ("hdf5" ,hdf5)
4858 ("zlib" ,zlib)))
4859 (home-page "http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software")
4860 (synopsis "Tools and libraries for reading and writing sequencing data")
4861 (description
4862 "The SRA Toolkit from NCBI is a collection of tools and libraries for
4863reading of sequencing files from the Sequence Read Archive (SRA) database and
4864writing files into the .sra format.")
4865 (license license:public-domain)))
4866
d3517eda
RW
4867(define-public seqan
4868 (package
4869 (name "seqan")
4870 (version "1.4.2")
4871 (source (origin
4872 (method url-fetch)
4873 (uri (string-append "http://packages.seqan.de/seqan-library/"
4874 "seqan-library-" version ".tar.bz2"))
4875 (sha256
4876 (base32
4877 "05s3wrrwn50f81aklfm65i4a749zag1vr8z03k21xm0pdxy47yvp"))))
4878 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
4879 ;; makes sense to split the outputs.
4880 (outputs '("out" "doc"))
4881 (build-system trivial-build-system)
4882 (arguments
4883 `(#:modules ((guix build utils))
4884 #:builder
4885 (begin
4886 (use-modules (guix build utils))
4887 (let ((tar (assoc-ref %build-inputs "tar"))
4888 (bzip (assoc-ref %build-inputs "bzip2"))
4889 (out (assoc-ref %outputs "out"))
4890 (doc (assoc-ref %outputs "doc")))
4891 (setenv "PATH" (string-append tar "/bin:" bzip "/bin"))
4892 (system* "tar" "xvf" (assoc-ref %build-inputs "source"))
4893 (chdir (string-append "seqan-library-" ,version))
4894 (copy-recursively "include" (string-append out "/include"))
4895 (copy-recursively "share" (string-append doc "/share"))))))
4896 (native-inputs
4897 `(("source" ,source)
4898 ("tar" ,tar)
4899 ("bzip2" ,bzip2)))
4900 (home-page "http://www.seqan.de")
4901 (synopsis "Library for nucleotide sequence analysis")
4902 (description
4903 "SeqAn is a C++ library of efficient algorithms and data structures for
4904the analysis of sequences with the focus on biological data. It contains
4905algorithms and data structures for string representation and their
4906manipulation, online and indexed string search, efficient I/O of
4907bioinformatics file formats, sequence alignment, and more.")
4908 (license license:bsd-3)))
ce7155d5 4909
d708b7a9
BW
4910(define-public seqmagick
4911 (package
4912 (name "seqmagick")
4913 (version "0.6.1")
4914 (source
4915 (origin
4916 (method url-fetch)
4917 (uri (string-append
4918 "https://pypi.python.org/packages/source/s/seqmagick/seqmagick-"
4919 version ".tar.gz"))
4920 (sha256
4921 (base32
4922 "0cgn477n74gsl4qdaakrrhi953kcsd4q3ivk2lr18x74s3g4ma1d"))))
4923 (build-system python-build-system)
4924 (arguments
4925 ;; python2 only, see https://github.com/fhcrc/seqmagick/issues/56
4926 `(#:python ,python-2
4927 #:phases
4928 (modify-phases %standard-phases
4929 ;; Current test in setup.py does not work as of 0.6.1,
4930 ;; so use nose to run tests instead for now. See
4931 ;; https://github.com/fhcrc/seqmagick/issues/55
4932 (replace 'check (lambda _ (zero? (system* "nosetests")))))))
4933 (inputs
7bba95b7
BW
4934 ;; biopython-1.66 is required due to
4935 ;; https://github.com/fhcrc/seqmagick/issues/59
4936 ;; When that issue is resolved the 'python2-biopython-1.66' package
4937 ;; should be removed.
4938 `(("python-biopython" ,python2-biopython-1.66)))
d708b7a9 4939 (native-inputs
f3b98f4f 4940 `(("python-nose" ,python2-nose)))
7bf837fd 4941 (home-page "https://github.com/fhcrc/seqmagick")
d708b7a9
BW
4942 (synopsis "Tools for converting and modifying sequence files")
4943 (description
4944 "Bioinformaticians often have to convert sequence files between formats
4945and do little manipulations on them, and it's not worth writing scripts for
4946that. Seqmagick is a utility to expose the file format conversion in
4947BioPython in a convenient way. Instead of having a big mess of scripts, there
4948is one that takes arguments.")
4949 (license license:gpl3)))
4950
66daf78c
BW
4951(define-public seqtk
4952 (package
4953 (name "seqtk")
4954 (version "1.2")
4955 (source (origin
4956 (method url-fetch)
4957 (uri (string-append
4958 "https://github.com/lh3/seqtk/archive/v"
4959 version ".tar.gz"))
4960 (file-name (string-append name "-" version ".tar.gz"))
4961 (sha256
4962 (base32
4963 "0ywdyzpmfiz2wp6ampbzqg4y8bj450nfgqarpamg045b8mk32lxx"))
4964 (modules '((guix build utils)))
4965 (snippet
4966 '(begin
4967 ;; Remove extraneous header files, as is done in the seqtk
4968 ;; master branch.
4969 (for-each (lambda (file) (delete-file file))
4970 (list "ksort.h" "kstring.h" "kvec.h"))
4971 #t))))
4972 (build-system gnu-build-system)
4973 (arguments
4974 `(#:phases
4975 (modify-phases %standard-phases
4976 (delete 'configure)
4977 (replace 'check
4978 ;; There are no tests, so we just run a sanity check.
4979 (lambda _ (zero? (system* "./seqtk" "seq"))))
4980 (replace 'install
4981 (lambda* (#:key outputs #:allow-other-keys)
4982 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
4983 (install-file "seqtk" bin)))))))
4984 (inputs
4985 `(("zlib" ,zlib)))
4986 (home-page "https://github.com/lh3/seqtk")
4987 (synopsis "Toolkit for processing biological sequences in FASTA/Q format")
4988 (description
4989 "Seqtk is a fast and lightweight tool for processing sequences in the
4990FASTA or FASTQ format. It parses both FASTA and FASTQ files which can be
4991optionally compressed by gzip.")
4992 (license license:expat)))
4993
5f7e17be
BW
4994(define-public snap-aligner
4995 (package
4996 (name "snap-aligner")
4997 (version "1.0beta.18")
4998 (source (origin
4999 (method url-fetch)
5000 (uri (string-append
5001 "https://github.com/amplab/snap/archive/v"
5002 version ".tar.gz"))
5003 (file-name (string-append name "-" version ".tar.gz"))
5004 (sha256
5005 (base32
5006 "1vnsjwv007k1fl1q7d681kbwn6bc66cgw6h16hym6gvyy71qv2ly"))))
5007 (build-system gnu-build-system)
5008 (arguments
5009 '(#:phases
5010 (modify-phases %standard-phases
5011 (delete 'configure)
5012 (replace 'check (lambda _ (zero? (system* "./unit_tests"))))
5013 (replace 'install
5014 (lambda* (#:key outputs #:allow-other-keys)
5015 (let* ((out (assoc-ref outputs "out"))
5016 (bin (string-append out "/bin")))
5f7e17be
BW
5017 (install-file "snap-aligner" bin)
5018 (install-file "SNAPCommand" bin)
5019 #t))))))
5020 (native-inputs
5021 `(("zlib" ,zlib)))
5022 (home-page "http://snap.cs.berkeley.edu/")
5023 (synopsis "Short read DNA sequence aligner")
5024 (description
5025 "SNAP is a fast and accurate aligner for short DNA reads. It is
5026optimized for modern read lengths of 100 bases or higher, and takes advantage
5027of these reads to align data quickly through a hash-based indexing scheme.")
3e6fdd5f
EF
5028 ;; 32-bit systems are not supported by the unpatched code.
5029 ;; Following the bug reports https://github.com/amplab/snap/issues/68 and
5030 ;; https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=812378 we see that
5031 ;; systems without a lot of memory cannot make good use of this program.
5032 (supported-systems '("x86_64-linux"))
5f7e17be
BW
5033 (license license:asl2.0)))
5034
bcadaf00
BW
5035(define-public sortmerna
5036 (package
5037 (name "sortmerna")
849485f5 5038 (version "2.1b")
bcadaf00
BW
5039 (source
5040 (origin
5041 (method url-fetch)
5042 (uri (string-append
5043 "https://github.com/biocore/sortmerna/archive/"
5044 version ".tar.gz"))
5045 (file-name (string-append name "-" version ".tar.gz"))
5046 (sha256
5047 (base32
849485f5 5048 "1ghaghvd82af9j5adavxh77g7hm247d1r69m3fbi6f1jdivj5ldk"))))
bcadaf00
BW
5049 (build-system gnu-build-system)
5050 (outputs '("out" ;for binaries
5051 "db")) ;for sequence databases
5052 (arguments
5053 `(#:phases
5054 (modify-phases %standard-phases
5055 (replace 'install
5056 (lambda* (#:key outputs #:allow-other-keys)
5057 (let* ((out (assoc-ref outputs "out"))
5058 (bin (string-append out "/bin"))
5059 (db (assoc-ref outputs "db"))
5060 (share
5061 (string-append db "/share/sortmerna/rRNA_databases")))
5062 (install-file "sortmerna" bin)
5063 (install-file "indexdb_rna" bin)
5064 (for-each (lambda (file)
5065 (install-file file share))
5066 (find-files "rRNA_databases" ".*fasta"))
5067 #t))))))
849485f5
BW
5068 (inputs
5069 `(("zlib" ,zlib)))
bcadaf00
BW
5070 (home-page "http://bioinfo.lifl.fr/RNA/sortmerna")
5071 (synopsis "Biological sequence analysis tool for NGS reads")
5072 (description
5073 "SortMeRNA is a biological sequence analysis tool for filtering, mapping
5074and operational taxonomic unit (OTU) picking of next generation
5075sequencing (NGS) reads. The core algorithm is based on approximate seeds and
5076allows for fast and sensitive analyses of nucleotide sequences. The main
5077application of SortMeRNA is filtering rRNA from metatranscriptomic data.")
5078 (license license:lgpl3)))
5079
ce7155d5
RW
5080(define-public star
5081 (package
5082 (name "star")
939b6b11 5083 (version "2.5.3a")
ce7155d5
RW
5084 (source (origin
5085 (method url-fetch)
3062d750
RW
5086 (uri (string-append "https://github.com/alexdobin/STAR/archive/"
5087 version ".tar.gz"))
5088 (file-name (string-append name "-" version ".tar.gz"))
ce7155d5
RW
5089 (sha256
5090 (base32
939b6b11 5091 "013wirlz8lllgjyagl48l75n1isxyabqb3sj7qlsl0x1rmvqw99a"))
ce7155d5
RW
5092 (modules '((guix build utils)))
5093 (snippet
3062d750
RW
5094 '(begin
5095 (substitute* "source/Makefile"
5096 (("/bin/rm") "rm"))
5097 ;; Remove pre-built binaries and bundled htslib sources.
5098 (delete-file-recursively "bin/MacOSX_x86_64")
5099 (delete-file-recursively "bin/Linux_x86_64")
c9e9154e 5100 (delete-file-recursively "bin/Linux_x86_64_static")
3062d750
RW
5101 (delete-file-recursively "source/htslib")
5102 #t))))
ce7155d5
RW
5103 (build-system gnu-build-system)
5104 (arguments
5105 '(#:tests? #f ;no check target
5106 #:make-flags '("STAR")
5107 #:phases
c0266e8d
RW
5108 (modify-phases %standard-phases
5109 (add-after 'unpack 'enter-source-dir
5110 (lambda _ (chdir "source") #t))
3062d750
RW
5111 (add-after 'enter-source-dir 'do-not-use-bundled-htslib
5112 (lambda _
5113 (substitute* "Makefile"
5114 (("(Depend.list: \\$\\(SOURCES\\) parametersDefault\\.xxd) htslib"
5115 _ prefix) prefix))
5116 (substitute* '("BAMfunctions.cpp"
5117 "signalFromBAM.h"
5118 "bam_cat.h"
5119 "bam_cat.c"
5120 "STAR.cpp"
5121 "bamRemoveDuplicates.cpp")
5122 (("#include \"htslib/([^\"]+\\.h)\"" _ header)
5123 (string-append "#include <" header ">")))
5124 (substitute* "IncludeDefine.h"
5125 (("\"htslib/(htslib/[^\"]+.h)\"" _ header)
5126 (string-append "<" header ">")))
5127 #t))
c0266e8d
RW
5128 (replace 'install
5129 (lambda* (#:key outputs #:allow-other-keys)
5130 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
5131 (install-file "STAR" bin))
5132 #t))
5133 (delete 'configure))))
ce7155d5
RW
5134 (native-inputs
5135 `(("vim" ,vim))) ; for xxd
5136 (inputs
3062d750
RW
5137 `(("htslib" ,htslib)
5138 ("zlib" ,zlib)))
ce7155d5
RW
5139 (home-page "https://github.com/alexdobin/STAR")
5140 (synopsis "Universal RNA-seq aligner")
5141 (description
5142 "The Spliced Transcripts Alignment to a Reference (STAR) software is
5143based on a previously undescribed RNA-seq alignment algorithm that uses
5144sequential maximum mappable seed search in uncompressed suffix arrays followed
5145by seed clustering and stitching procedure. In addition to unbiased de novo
5146detection of canonical junctions, STAR can discover non-canonical splices and
5147chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
5148sequences.")
c11f79a4
BW
5149 ;; Only 64-bit systems are supported according to the README.
5150 (supported-systems '("x86_64-linux" "mips64el-linux"))
ce7155d5
RW
5151 ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
5152 (license license:gpl3+)))
de07c0db 5153
dbf4ed7c
RW
5154(define-public subread
5155 (package
5156 (name "subread")
c180533b 5157 (version "1.5.1")
dbf4ed7c
RW
5158 (source (origin
5159 (method url-fetch)
de67e922
LF
5160 (uri (string-append "mirror://sourceforge/subread/subread-"
5161 version "/subread-" version "-source.tar.gz"))
dbf4ed7c
RW
5162 (sha256
5163 (base32
c180533b 5164 "0gn5zhbvllks0mmdg3qlmsbg91p2mpdc2wixwfqpi85yzfrh8hcy"))))
dbf4ed7c
RW
5165 (build-system gnu-build-system)
5166 (arguments
5167 `(#:tests? #f ;no "check" target
104c1986
RW
5168 ;; The CC and CCFLAGS variables are set to contain a lot of x86_64
5169 ;; optimizations by default, so we override these flags such that x86_64
5170 ;; flags are only added when the build target is an x86_64 system.
5171 #:make-flags
5172 (list (let ((system ,(or (%current-target-system)
5173 (%current-system)))
5174 (flags '("-ggdb" "-fomit-frame-pointer"
5175 "-ffast-math" "-funroll-loops"
5176 "-fmessage-length=0"
5177 "-O9" "-Wall" "-DMAKE_FOR_EXON"
5178 "-DMAKE_STANDALONE"
5179 "-DSUBREAD_VERSION=\\\"${SUBREAD_VERSION}\\\""))
5180 (flags64 '("-mmmx" "-msse" "-msse2" "-msse3")))
5181 (if (string-prefix? "x86_64" system)
5182 (string-append "CCFLAGS=" (string-join (append flags flags64)))
5183 (string-append "CCFLAGS=" (string-join flags))))
5184 "-f" "Makefile.Linux"
5185 "CC=gcc ${CCFLAGS}")
dbf4ed7c
RW
5186 #:phases
5187 (alist-cons-after
5188 'unpack 'enter-dir
5189 (lambda _ (chdir "src") #t)
5190 (alist-replace
5191 'install
5192 (lambda* (#:key outputs #:allow-other-keys)
5193 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
5194 (mkdir-p bin)
5195 (copy-recursively "../bin" bin)))
5196 ;; no "configure" script
5197 (alist-delete 'configure %standard-phases)))))
5198 (inputs `(("zlib" ,zlib)))
5199 (home-page "http://bioinf.wehi.edu.au/subread-package/")
5200 (synopsis "Tool kit for processing next-gen sequencing data")
5201 (description
5202 "The subread package contains the following tools: subread aligner, a
5203general-purpose read aligner; subjunc aligner: detecting exon-exon junctions
5204and mapping RNA-seq reads; featureCounts: counting mapped reads for genomic
5205features; exactSNP: a SNP caller that discovers SNPs by testing signals
5206against local background noises.")
5207 (license license:gpl3+)))
5208
d15d981e
RW
5209(define-public stringtie
5210 (package
5211 (name "stringtie")
5212 (version "1.2.1")
5213 (source (origin
5214 (method url-fetch)
5215 (uri (string-append "http://ccb.jhu.edu/software/stringtie/dl/"
5216 "stringtie-" version ".tar.gz"))
5217 (sha256
5218 (base32
5219 "1cqllsc1maq4kh92isi8yadgzbmnf042hlnalpk3y59aph1z3bfz"))
5220 (modules '((guix build utils)))
5221 (snippet
5222 '(begin
5223 (delete-file-recursively "samtools-0.1.18")
5224 #t))))
5225 (build-system gnu-build-system)
5226 (arguments
5227 `(#:tests? #f ;no test suite
5228 #:phases
5229 (modify-phases %standard-phases
5230 ;; no configure script
5231 (delete 'configure)
5232 (add-before 'build 'use-system-samtools
5233 (lambda _
5234 (substitute* "Makefile"
5235 (("stringtie: \\$\\{BAM\\}/libbam\\.a")
5236 "stringtie: "))
5237 (substitute* '("gclib/GBam.h"
5238 "gclib/GBam.cpp")
5239 (("#include \"(bam|sam|kstring).h\"" _ header)
5240 (string-append "#include <samtools/" header ".h>")))
5241 #t))
0d2c0562
RW
5242 (add-after 'unpack 'remove-duplicate-typedef
5243 (lambda _
5244 ;; This typedef conflicts with the typedef in
5245 ;; glibc-2.25/include/bits/types.h
5246 (substitute* "gclib/GThreads.h"
5247 (("typedef long long __intmax_t;") ""))
5248 #t))
d15d981e
RW
5249 (replace 'install
5250 (lambda* (#:key outputs #:allow-other-keys)
5251 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
5252 (install-file "stringtie" bin)
5253 #t))))))
5254 (inputs
5255 `(("samtools" ,samtools-0.1)
5256 ("zlib" ,zlib)))
5257 (home-page "http://ccb.jhu.edu/software/stringtie/")
5258 (synopsis "Transcript assembly and quantification for RNA-Seq data")
5259 (description
5260 "StringTie is a fast and efficient assembler of RNA-Seq sequence
5261alignments into potential transcripts. It uses a novel network flow algorithm
5262as well as an optional de novo assembly step to assemble and quantitate
5263full-length transcripts representing multiple splice variants for each gene
5264locus. Its input can include not only the alignments of raw reads used by
5265other transcript assemblers, but also alignments of longer sequences that have
5266been assembled from those reads. To identify differentially expressed genes
5267between experiments, StringTie's output can be processed either by the
5268Cuffdiff or Ballgown programs.")
5269 (license license:artistic2.0)))
5270
de07c0db
RW
5271(define-public vcftools
5272 (package
5273 (name "vcftools")
9b36e256 5274 (version "0.1.14")
de07c0db
RW
5275 (source (origin
5276 (method url-fetch)
5277 (uri (string-append
9b36e256
RJ
5278 "https://github.com/vcftools/vcftools/releases/download/v"
5279 version "/vcftools-" version ".tar.gz"))
de07c0db
RW
5280 (sha256
5281 (base32
9b36e256 5282 "10l5c07z9p4i9pr4gl54b2c9h6ndhqlbq1rashg2zcgwkbfrkmvn"))))
de07c0db
RW
5283 (build-system gnu-build-system)
5284 (arguments
5285 `(#:tests? #f ; no "check" target
5286 #:make-flags (list
7c3958e1 5287 "CFLAGS=-O2" ; override "-m64" flag
de07c0db
RW
5288 (string-append "PREFIX=" (assoc-ref %outputs "out"))
5289 (string-append "MANDIR=" (assoc-ref %outputs "out")
9b36e256
RJ
5290 "/share/man/man1"))))
5291 (native-inputs
5292 `(("pkg-config" ,pkg-config)))
de07c0db
RW
5293 (inputs
5294 `(("perl" ,perl)
5295 ("zlib" ,zlib)))
9b36e256 5296 (home-page "https://vcftools.github.io/")
de07c0db
RW
5297 (synopsis "Tools for working with VCF files")
5298 (description
5299 "VCFtools is a program package designed for working with VCF files, such
5300as those generated by the 1000 Genomes Project. The aim of VCFtools is to
5301provide easily accessible methods for working with complex genetic variation
5302data in the form of VCF files.")
5303 ;; The license is declared as LGPLv3 in the README and
9b36e256 5304 ;; at https://vcftools.github.io/license.html
de07c0db 5305 (license license:lgpl3)))
9c38b540 5306
35aa90a1
RW
5307(define-public infernal
5308 (package
5309 (name "infernal")
5310 (version "1.1.2")
5311 (source (origin
5312 (method url-fetch)
5313 (uri (string-append "http://eddylab.org/software/infernal/"
5314 "infernal-" version ".tar.gz"))
5315 (sha256
5316 (base32
5317 "0sr2hiz3qxfwqpz3whxr6n82p3x27336v3f34iqznp10hks2935c"))))
5318 (build-system gnu-build-system)
5319 (native-inputs
5320 `(("perl" ,perl))) ; for tests
5321 (home-page "http://eddylab.org/infernal/")
5322 (synopsis "Inference of RNA alignments")
5323 (description "Infernal (\"INFERence of RNA ALignment\") is a tool for
5324searching DNA sequence databases for RNA structure and sequence similarities.
5325It is an implementation of a special case of profile stochastic context-free
5326grammars called @dfn{covariance models} (CMs). A CM is like a sequence
5327profile, but it scores a combination of sequence consensus and RNA secondary
5328structure consensus, so in many cases, it is more capable of identifying RNA
5329homologs that conserve their secondary structure more than their primary
5330sequence.")
48409ef2
EF
5331 ;; Infernal 1.1.2 requires VMX or SSE capability for parallel instructions.
5332 (supported-systems '("i686-linux" "x86_64-linux"))
35aa90a1
RW
5333 (license license:bsd-3)))
5334
b91cfa22
RW
5335(define-public r-centipede
5336 (package
5337 (name "r-centipede")
5338 (version "1.2")
5339 (source (origin
5340 (method url-fetch)
5341 (uri (string-append "http://download.r-forge.r-project.org/"
5342 "src/contrib/CENTIPEDE_" version ".tar.gz"))
5343 (sha256
5344 (base32
5345 "1hsx6qgwr0i67fhy9257zj7s0ppncph2hjgbia5nn6nfmj0ax6l9"))))
5346 (build-system r-build-system)
5347 (home-page "http://centipede.uchicago.edu/")
5348 (synopsis "Predict transcription factor binding sites")
5349 (description
5350 "CENTIPEDE applies a hierarchical Bayesian mixture model to infer regions
5351of the genome that are bound by particular transcription factors. It starts
5352by identifying a set of candidate binding sites, and then aims to classify the
5353sites according to whether each site is bound or not bound by a transcription
5354factor. CENTIPEDE is an unsupervised learning algorithm that discriminates
5355between two different types of motif instances using as much relevant
5356information as possible.")
5357 (license (list license:gpl2+ license:gpl3+))))
5358
7b3df1e5
BW
5359(define-public r-vegan
5360 (package
5361 (name "r-vegan")
1d65b537 5362 (version "2.4-3")
7b3df1e5
BW
5363 (source
5364 (origin
5365 (method url-fetch)
5366 (uri (cran-uri "vegan" version))
5367 (sha256
5368 (base32
1d65b537 5369 "15zcxfix2d854897k1lr0sfmj2n00339nlsppcr3zrb238lb2mi5"))))
7b3df1e5 5370 (build-system r-build-system)
7b3df1e5
BW
5371 (native-inputs
5372 `(("gfortran" ,gfortran)
1d65b537 5373 ("r-knitr" ,r-knitr)))
7b3df1e5
BW
5374 (propagated-inputs
5375 `(("r-cluster" ,r-cluster)
5376 ("r-lattice" ,r-lattice)
aeb64f3c 5377 ("r-mass" ,r-mass)
7b3df1e5
BW
5378 ("r-mgcv" ,r-mgcv)
5379 ("r-permute" ,r-permute)))
5380 (home-page "https://cran.r-project.org/web/packages/vegan")
5381 (synopsis "Functions for community ecology")
5382 (description
5383 "The vegan package provides tools for descriptive community ecology. It
5384has most basic functions of diversity analysis, community ordination and
5385dissimilarity analysis. Most of its multivariate tools can be used for other
5386data types as well.")
5387 (license license:gpl2+)))
5388
8c6de588
RW
5389(define-public r-annotate
5390 (package
5391 (name "r-annotate")
dbab99b6 5392 (version "1.54.0")
8c6de588
RW
5393 (source
5394 (origin
5395 (method url-fetch)
5396 (uri (bioconductor-uri "annotate" version))
5397 (sha256
5398 (base32
dbab99b6 5399 "03hmbvp3i6lvd307fqdg7akxi2qp322rlky3bzw0zccgm0i0221g"))))
8c6de588
RW
5400 (build-system r-build-system)
5401 (propagated-inputs
5402 `(("r-annotationdbi" ,r-annotationdbi)
5403 ("r-biobase" ,r-biobase)
5404 ("r-biocgenerics" ,r-biocgenerics)
5405 ("r-dbi" ,r-dbi)
d0f0579e 5406 ("r-rcurl" ,r-rcurl)
8c6de588
RW
5407 ("r-xml" ,r-xml)
5408 ("r-xtable" ,r-xtable)))
5409 (home-page
5410 "http://bioconductor.org/packages/annotate")
5411 (synopsis "Annotation for microarrays")
d1e4ad1b 5412 (description "This package provides R environments for the annotation of
8c6de588
RW
5413microarrays.")
5414 (license license:artistic2.0)))
5415
07a664cd
RW
5416(define-public r-geneplotter
5417 (package
5418 (name "r-geneplotter")
97a50e4f 5419 (version "1.54.0")
07a664cd
RW
5420 (source
5421 (origin
5422 (method url-fetch)
5423 (uri (bioconductor-uri "geneplotter" version))
5424 (sha256
5425 (base32
97a50e4f 5426 "0a0ajns21db5rrjl16bq6wawggsnxr00fg184pc38nmfghv4z4b6"))))
07a664cd
RW
5427 (build-system r-build-system)
5428 (propagated-inputs
5429 `(("r-annotate" ,r-annotate)
5430 ("r-annotationdbi" ,r-annotationdbi)
5431 ("r-biobase" ,r-biobase)
5432 ("r-biocgenerics" ,r-biocgenerics)
5433 ("r-lattice" ,r-lattice)
5434 ("r-rcolorbrewer" ,r-rcolorbrewer)))
5435 (home-page "http://bioconductor.org/packages/geneplotter")
5436 (synopsis "Graphics functions for genomic data")
5437 (description
5438 "This package provides functions for plotting genomic data.")
5439 (license license:artistic2.0)))
5440
2301fd3e
RW
5441(define-public r-genefilter
5442 (package
5443 (name "r-genefilter")
b5bc3a39 5444 (version "1.58.0")
2301fd3e
RW
5445 (source
5446 (origin
5447 (method url-fetch)
5448 (uri (bioconductor-uri "genefilter" version))
5449 (sha256
5450 (base32
b5bc3a39 5451 "0sf2hdi9nv6r83vn1y65m4jiba8pffddpj46d6yjn5rlsixplmqg"))))
2301fd3e
RW
5452 (build-system r-build-system)
5453 (native-inputs
5454 `(("gfortran" ,gfortran)))
5455 (propagated-inputs
5456 `(("r-annotate" ,r-annotate)
5457 ("r-annotationdbi" ,r-annotationdbi)
5458 ("r-biobase" ,r-biobase)
aeb64f3c
RW
5459 ("r-s4vectors" ,r-s4vectors)
5460 ("r-survival" ,r-survival)))
2301fd3e
RW
5461 (home-page "http://bioconductor.org/packages/genefilter")
5462 (synopsis "Filter genes from high-throughput experiments")
5463 (description
5464 "This package provides basic functions for filtering genes from
5465high-throughput sequencing experiments.")
5466 (license license:artistic2.0)))
5467
ad34f0ac
RW
5468(define-public r-deseq2
5469 (package
5470 (name "r-deseq2")
3ae74e45 5471 (version "1.16.0")
ad34f0ac
RW
5472 (source
5473 (origin
5474 (method url-fetch)
5475 (uri (bioconductor-uri "DESeq2" version))
5476 (sha256
5477 (base32
3ae74e45 5478 "0m0apn3xi4kdkinsj4xkw5cwysicyjr6xxlxhpa4scyv589am1s5"))))
ad34f0ac
RW
5479 (properties `((upstream-name . "DESeq2")))
5480 (build-system r-build-system)
ad34f0ac
RW
5481 (propagated-inputs
5482 `(("r-biobase" ,r-biobase)
5483 ("r-biocgenerics" ,r-biocgenerics)
5484 ("r-biocparallel" ,r-biocparallel)
5485 ("r-genefilter" ,r-genefilter)
5486 ("r-geneplotter" ,r-geneplotter)
5487 ("r-genomicranges" ,r-genomicranges)
5488 ("r-ggplot2" ,r-ggplot2)
5489 ("r-hmisc" ,r-hmisc)
5490 ("r-iranges" ,r-iranges)
5491 ("r-locfit" ,r-locfit)
5492 ("r-rcpp" ,r-rcpp)
5493 ("r-rcpparmadillo" ,r-rcpparmadillo)
5494 ("r-s4vectors" ,r-s4vectors)
5495 ("r-summarizedexperiment" ,r-summarizedexperiment)))
5496 (home-page "http://bioconductor.org/packages/DESeq2")
5497 (synopsis "Differential gene expression analysis")
5498 (description
5499 "This package provides functions to estimate variance-mean dependence in
5500count data from high-throughput nucleotide sequencing assays and test for
5501differential expression based on a model using the negative binomial
5502distribution.")
5503 (license license:lgpl3+)))
5504
e8163773
RW
5505(define-public r-annotationforge
5506 (package
5507 (name "r-annotationforge")
46972a44 5508 (version "1.18.0")
e8163773
RW
5509 (source
5510 (origin
5511 (method url-fetch)
5512 (uri (bioconductor-uri "AnnotationForge" version))
5513 (sha256
5514 (base32
46972a44 5515 "01kd86vvgpa4a5zivcy4g6z8rhcykasdskrz8yqsqz211sd1xsr3"))))
e8163773
RW
5516 (properties
5517 `((upstream-name . "AnnotationForge")))
5518 (build-system r-build-system)
5519 (propagated-inputs
5520 `(("r-annotationdbi" ,r-annotationdbi)
5521 ("r-biobase" ,r-biobase)
5522 ("r-biocgenerics" ,r-biocgenerics)
5523 ("r-dbi" ,r-dbi)
55cd914c 5524 ("r-rcurl" ,r-rcurl)
e8163773
RW
5525 ("r-rsqlite" ,r-rsqlite)
5526 ("r-s4vectors" ,r-s4vectors)
5527 ("r-xml" ,r-xml)))
5528 (home-page "http://bioconductor.org/packages/AnnotationForge")
5529 (synopsis "Code for building annotation database packages")
5530 (description
5531 "This package provides code for generating Annotation packages and their
5532databases. Packages produced are intended to be used with AnnotationDbi.")
5533 (license license:artistic2.0)))
5534
cd9e7dc7
RW
5535(define-public r-rbgl
5536 (package
5537 (name "r-rbgl")
8ab6f241 5538 (version "1.52.0")
cd9e7dc7
RW
5539 (source
5540 (origin
5541 (method url-fetch)
5542 (uri (bioconductor-uri "RBGL" version))
5543 (sha256
5544 (base32
8ab6f241 5545 "11db6kvz453ypj9ds3xpjqzwrrjck84ijn4wlhkfyz2dzdgd5ryv"))))
cd9e7dc7
RW
5546 (properties `((upstream-name . "RBGL")))
5547 (build-system r-build-system)
5548 (propagated-inputs `(("r-graph" ,r-graph)))
5549 (home-page "http://www.bioconductor.org/packages/RBGL")
5550 (synopsis "Interface to the Boost graph library")
5551 (description
5552 "This package provides a fairly extensive and comprehensive interface to
5553the graph algorithms contained in the Boost library.")
5554 (license license:artistic2.0)))
5555
ad740ff8
RW
5556(define-public r-gseabase
5557 (package
5558 (name "r-gseabase")
c584a07f 5559 (version "1.38.0")
ad740ff8
RW
5560 (source
5561 (origin
5562 (method url-fetch)
5563 (uri (bioconductor-uri "GSEABase" version))
5564 (sha256
5565 (base32
c584a07f 5566 "1c6i6g4fj3b8wjyxyygr7i3v8sxrq1ffb2bbicya5ah2gdaclfad"))))
ad740ff8
RW
5567 (properties `((upstream-name . "GSEABase")))
5568 (build-system r-build-system)
5569 (propagated-inputs
5570 `(("r-annotate" ,r-annotate)
5571 ("r-annotationdbi" ,r-annotationdbi)
5572 ("r-biobase" ,r-biobase)
5573 ("r-biocgenerics" ,r-biocgenerics)
5574 ("r-graph" ,r-graph)
5575 ("r-xml" ,r-xml)))
5576 (home-page "http://bioconductor.org/packages/GSEABase")
5577 (synopsis "Gene set enrichment data structures and methods")
5578 (description
5579 "This package provides classes and methods to support @dfn{Gene Set
5580Enrichment Analysis} (GSEA).")
5581 (license license:artistic2.0)))
5582
1a1931f7
RW
5583(define-public r-category
5584 (package
5585 (name "r-category")
ca0289ae 5586 (version "2.42.0")
1a1931f7
RW
5587 (source
5588 (origin
5589 (method url-fetch)
5590 (uri (bioconductor-uri "Category" version))
5591 (sha256
5592 (base32
ca0289ae 5593 "0swcmihyjg0fhaaydl9hm24aj9zffw3bibza9y6sqs6jaqd97f09"))))
1a1931f7
RW
5594 (properties `((upstream-name . "Category")))
5595 (build-system r-build-system)
5596 (propagated-inputs
5597 `(("r-annotate" ,r-annotate)
5598 ("r-annotationdbi" ,r-annotationdbi)
5599 ("r-biobase" ,r-biobase)
5600 ("r-biocgenerics" ,r-biocgenerics)
5601 ("r-genefilter" ,r-genefilter)
5602 ("r-graph" ,r-graph)
5603 ("r-gseabase" ,r-gseabase)
5604 ("r-matrix" ,r-matrix)
5605 ("r-rbgl" ,r-rbgl)
5606 ("r-rsqlite" ,r-rsqlite)))
5607 (home-page "http://bioconductor.org/packages/Category")
5608 (synopsis "Category analysis")
5609 (description
5610 "This package provides a collection of tools for performing category
5611analysis.")
5612 (license license:artistic2.0)))
5613
89f40c5e
RW
5614(define-public r-gostats
5615 (package
5616 (name "r-gostats")
95117734 5617 (version "2.42.0")
89f40c5e
RW
5618 (source
5619 (origin
5620 (method url-fetch)
5621 (uri (bioconductor-uri "GOstats" version))
5622 (sha256
5623 (base32
95117734 5624 "0qvqjgfnd9ap4rikvyxa9p4dhcnccvkw8phzv88vghh6pq463d62"))))
89f40c5e
RW
5625 (properties `((upstream-name . "GOstats")))
5626 (build-system r-build-system)
5627 (propagated-inputs
5628 `(("r-annotate" ,r-annotate)
5629 ("r-annotationdbi" ,r-annotationdbi)
5630 ("r-annotationforge" ,r-annotationforge)
5631 ("r-biobase" ,r-biobase)
5632 ("r-category" ,r-category)
5633 ("r-go-db" ,r-go-db)
5634 ("r-graph" ,r-graph)
5635 ("r-rbgl" ,r-rbgl)))
5636 (home-page "http://bioconductor.org/packages/GOstats")
5637 (synopsis "Tools for manipulating GO and microarrays")
5638 (description
5639 "This package provides a set of tools for interacting with GO and
5640microarray data. A variety of basic manipulation tools for graphs, hypothesis
5641testing and other simple calculations.")
5642 (license license:artistic2.0)))
5643
cb99d457
RW
5644(define-public r-shortread
5645 (package
5646 (name "r-shortread")
34c276f7 5647 (version "1.34.0")
cb99d457
RW
5648 (source
5649 (origin
5650 (method url-fetch)
5651 (uri (bioconductor-uri "ShortRead" version))
5652 (sha256
5653 (base32
34c276f7 5654 "0ayk3d5625ymb5g2gycq6banzqmyd642xrwjzhdshz2dwid7kly8"))))
cb99d457
RW
5655 (properties `((upstream-name . "ShortRead")))
5656 (build-system r-build-system)
5657 (inputs
5658 `(("zlib" ,zlib)))
5659 (propagated-inputs
5660 `(("r-biobase" ,r-biobase)
5661 ("r-biocgenerics" ,r-biocgenerics)
5662 ("r-biocparallel" ,r-biocparallel)
5663 ("r-biostrings" ,r-biostrings)
5664 ("r-genomeinfodb" ,r-genomeinfodb)
5665 ("r-genomicalignments" ,r-genomicalignments)
5666 ("r-genomicranges" ,r-genomicranges)
5667 ("r-hwriter" ,r-hwriter)
5668 ("r-iranges" ,r-iranges)
5669 ("r-lattice" ,r-lattice)
5670 ("r-latticeextra" ,r-latticeextra)
5671 ("r-rsamtools" ,r-rsamtools)
5672 ("r-s4vectors" ,r-s4vectors)
5673 ("r-xvector" ,r-xvector)
5674 ("r-zlibbioc" ,r-zlibbioc)))
5675 (home-page "http://bioconductor.org/packages/ShortRead")
5676 (synopsis "FASTQ input and manipulation tools")
5677 (description
5678 "This package implements sampling, iteration, and input of FASTQ files.
5679It includes functions for filtering and trimming reads, and for generating a
5680quality assessment report. Data are represented as
5681@code{DNAStringSet}-derived objects, and easily manipulated for a diversity of
5682purposes. The package also contains legacy support for early single-end,
5683ungapped alignment formats.")
5684 (license license:artistic2.0)))
5685
7f903d73
RW
5686(define-public r-systempiper
5687 (package
5688 (name "r-systempiper")
026dc7e4 5689 (version "1.10.0")
7f903d73
RW
5690 (source
5691 (origin
5692 (method url-fetch)
5693 (uri (bioconductor-uri "systemPipeR" version))
5694 (sha256
5695 (base32
026dc7e4 5696 "0c3m5rq63ypv15yca97yag5d4vgd7xj9by2a4sd8z0pcmpajz0hw"))))
7f903d73
RW
5697 (properties `((upstream-name . "systemPipeR")))
5698 (build-system r-build-system)
5699 (propagated-inputs
5700 `(("r-annotate" ,r-annotate)
5701 ("r-batchjobs" ,r-batchjobs)
5702 ("r-biocgenerics" ,r-biocgenerics)
5703 ("r-biostrings" ,r-biostrings)
5704 ("r-deseq2" ,r-deseq2)
5705 ("r-edger" ,r-edger)
5706 ("r-genomicfeatures" ,r-genomicfeatures)
5707 ("r-genomicranges" ,r-genomicranges)
5708 ("r-ggplot2" ,r-ggplot2)
5709 ("r-go-db" ,r-go-db)
5710 ("r-gostats" ,r-gostats)
5711 ("r-limma" ,r-limma)
5712 ("r-pheatmap" ,r-pheatmap)
5713 ("r-rjson" ,r-rjson)
5714 ("r-rsamtools" ,r-rsamtools)
5715 ("r-shortread" ,r-shortread)
5716 ("r-summarizedexperiment" ,r-summarizedexperiment)
5717 ("r-variantannotation" ,r-variantannotation)))
5718 (home-page "https://github.com/tgirke/systemPipeR")
5719 (synopsis "Next generation sequencing workflow and reporting environment")
5720 (description
5721 "This R package provides tools for building and running automated
5722end-to-end analysis workflows for a wide range of @dfn{next generation
5723sequence} (NGS) applications such as RNA-Seq, ChIP-Seq, VAR-Seq and Ribo-Seq.
5724Important features include a uniform workflow interface across different NGS
5725applications, automated report generation, and support for running both R and
5726command-line software, such as NGS aligners or peak/variant callers, on local
5727computers or compute clusters. Efficient handling of complex sample sets and
5728experimental designs is facilitated by a consistently implemented sample
5729annotation infrastructure.")
5730 (license license:artistic2.0)))
5731
684f29bd
RW
5732(define-public r-grohmm
5733 (package
5734 (name "r-grohmm")
795a6045 5735 (version "1.10.0")
684f29bd
RW
5736 (source
5737 (origin
5738 (method url-fetch)
5739 (uri (bioconductor-uri "groHMM" version))
5740 (sha256
5741 (base32
795a6045 5742 "16k1kp4sbhh0vp7dzywafq52csq42ksqfrqfy4bdv1qbd7536dpd"))))
684f29bd
RW
5743 (properties `((upstream-name . "groHMM")))
5744 (build-system r-build-system)
5745 (propagated-inputs
5746 `(("r-genomeinfodb" ,r-genomeinfodb)
5747 ("r-genomicalignments" ,r-genomicalignments)
5748 ("r-genomicranges" ,r-genomicranges)
5749 ("r-iranges" ,r-iranges)
aeb64f3c 5750 ("r-mass" ,r-mass)
684f29bd
RW
5751 ("r-rtracklayer" ,r-rtracklayer)
5752 ("r-s4vectors" ,r-s4vectors)))
5753 (home-page "https://github.com/Kraus-Lab/groHMM")
5754 (synopsis "GRO-seq analysis pipeline")
5755 (description
5756 "This package provides a pipeline for the analysis of GRO-seq data.")
5757 (license license:gpl3+)))
5758
f3cfe451
RW
5759(define-public r-txdb-hsapiens-ucsc-hg19-knowngene
5760 (package
5761 (name "r-txdb-hsapiens-ucsc-hg19-knowngene")
5762 (version "3.2.2")
5763 (source (origin
5764 (method url-fetch)
5765 ;; We cannot use bioconductor-uri here because this tarball is
5766 ;; located under "data/annotation/" instead of "bioc/".
5767 (uri (string-append "http://bioconductor.org/packages/"
5768 "release/data/annotation/src/contrib"
5769 "/TxDb.Hsapiens.UCSC.hg19.knownGene_"
5770 version ".tar.gz"))
5771 (sha256
5772 (base32
5773 "1sajhcqqwazgz2lqbik7rd935i7kpnh08zxbp2ra10j72yqy4g86"))))
5774 (properties
5775 `((upstream-name . "TxDb.Hsapiens.UCSC.hg19.knownGene")))
5776 (build-system r-build-system)
5777 ;; As this package provides little more than a very large data file it
5778 ;; doesn't make sense to build substitutes.
5779 (arguments `(#:substitutable? #f))
5780 (propagated-inputs
5781 `(("r-genomicfeatures" ,r-genomicfeatures)))
5782 (home-page
5783 "http://bioconductor.org/packages/TxDb.Hsapiens.UCSC.hg19.knownGene/")
5784 (synopsis "Annotation package for human genome in TxDb format")
5785 (description
5786 "This package provides an annotation database of Homo sapiens genome
5787data. It is derived from the UCSC hg19 genome and based on the \"knownGene\"
5788track. The database is exposed as a @code{TxDb} object.")
5789 (license license:artistic2.0)))
5790
a2950fa4
BW
5791(define-public vsearch
5792 (package
5793 (name "vsearch")
37982693 5794 (version "2.4.3")
a2950fa4
BW
5795 (source
5796 (origin
5797 (method url-fetch)
5798 (uri (string-append
5799 "https://github.com/torognes/vsearch/archive/v"
5800 version ".tar.gz"))
5801 (file-name (string-append name "-" version ".tar.gz"))
5802 (sha256
5803 (base32
37982693 5804 "0hc110ycqpa54nr6x173qg7190hk08qp7yz7zzqxlsypqnpc5zzp"))
206af46f 5805 (patches (search-patches "vsearch-unbundle-cityhash.patch"))
a2950fa4
BW
5806 (snippet
5807 '(begin
206af46f
BW
5808 ;; Remove bundled cityhash sources. The vsearch source is adjusted
5809 ;; for this in the patch.
cf6edaba
BW
5810 (delete-file "src/city.h")
5811 (delete-file "src/citycrc.h")
5812 (delete-file "src/city.cc")
a2950fa4
BW
5813 #t))))
5814 (build-system gnu-build-system)
5815 (arguments
5816 `(#:phases
5817 (modify-phases %standard-phases
5818 (add-before 'configure 'autogen
5819 (lambda _ (zero? (system* "autoreconf" "-vif")))))))
5820 (inputs
5821 `(("zlib" ,zlib)
5822 ("bzip2" ,bzip2)
5823 ("cityhash" ,cityhash)))
5824 (native-inputs
5825 `(("autoconf" ,autoconf)
5826 ("automake" ,automake)))
5827 (synopsis "Sequence search tools for metagenomics")
5828 (description
5829 "VSEARCH supports DNA sequence searching, clustering, chimera detection,
5830dereplication, pairwise alignment, shuffling, subsampling, sorting and
5831masking. The tool takes advantage of parallelism in the form of SIMD
5832vectorization as well as multiple threads to perform accurate alignments at
5833high speed. VSEARCH uses an optimal global aligner (full dynamic programming
5834Needleman-Wunsch).")
5835 (home-page "https://github.com/torognes/vsearch")
6f04e515
BW
5836 ;; vsearch uses non-portable SSE intrinsics so building fails on other
5837 ;; platforms.
5838 (supported-systems '("x86_64-linux"))
a2950fa4
BW
5839 ;; Dual licensed; also includes public domain source.
5840 (license (list license:gpl3 license:bsd-2))))
5841
07837874
RW
5842(define-public pardre
5843 (package
5844 (name "pardre")
7922ab8f
BW
5845 ;; The source of 1.1.5 changed in place, so we append "-1" to the version.
5846 (version "1.1.5-1")
07837874
RW
5847 (source
5848 (origin
5849 (method url-fetch)
5850 (uri (string-append "mirror://sourceforge/pardre/ParDRe-rel"
7922ab8f 5851 "1.1.5" ".tar.gz"))
07837874
RW
5852 (sha256
5853 (base32
7922ab8f 5854 "17j73nc0viq4f6qj50nrndsrif5d6b71q8fl87m54psiv0ilns2b"))))
07837874
RW
5855 (build-system gnu-build-system)
5856 (arguments
5857 `(#:tests? #f ; no tests included
5858 #:phases
5859 (modify-phases %standard-phases
5860 (delete 'configure)
5861 (replace 'install
5862 (lambda* (#:key outputs #:allow-other-keys)
5863 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
07837874
RW
5864 (install-file "ParDRe" bin)
5865 #t))))))
5866 (inputs
5867 `(("openmpi" ,openmpi)
5868 ("zlib" ,zlib)))
5869 (synopsis "Parallel tool to remove duplicate DNA reads")
5870 (description
5871 "ParDRe is a parallel tool to remove duplicate genetic sequence reads.
5872Duplicate reads can be seen as identical or nearly identical sequences with
5873some mismatches. This tool lets users avoid the analysis of unnecessary
5874reads, reducing the time of subsequent procedures with the
5875dataset (e.g. assemblies, mappings, etc.). The tool is implemented with MPI
5876in order to exploit the parallel capabilities of multicore clusters. It is
5877faster than multithreaded counterparts (end of 2015) for the same number of
5878cores and, thanks to the message-passing technology, it can be executed on
5879clusters.")
5880 (home-page "https://sourceforge.net/projects/pardre/")
5881 (license license:gpl3+)))
5882
e4a44a6a
BW
5883(define-public ruby-bio-kseq
5884 (package
5885 (name "ruby-bio-kseq")
5886 (version "0.0.2")
5887 (source
5888 (origin
5889 (method url-fetch)
5890 (uri (rubygems-uri "bio-kseq" version))
5891 (sha256
5892 (base32
5893 "1xyaha46khb5jc6wzkbf7040jagac49jbimn0vcrzid0j8jdikrz"))))
5894 (build-system ruby-build-system)
5895 (arguments
5896 `(#:test-target "spec"))
5897 (native-inputs
5898 `(("bundler" ,bundler)
5899 ("ruby-rspec" ,ruby-rspec)
5900 ("ruby-rake-compiler" ,ruby-rake-compiler)))
5901 (inputs
5902 `(("zlib" ,zlib)))
5903 (synopsis "Ruby bindings for the kseq.h FASTA/Q parser")
5904 (description
5905 "@code{Bio::Kseq} provides ruby bindings to the @code{kseq.h} FASTA and
5906FASTQ parsing code. It provides a fast iterator over sequences and their
5907quality scores.")
5908 (home-page "https://github.com/gusevfe/bio-kseq")
5909 (license license:expat)))
5910
9c38b540
PP
5911(define-public bio-locus
5912 (package
5913 (name "bio-locus")
5914 (version "0.0.7")
5915 (source
5916 (origin
5917 (method url-fetch)
5918 (uri (rubygems-uri "bio-locus" version))
5919 (sha256
5920 (base32
5921 "02vmrxyimkj9sahsp4zhfhnmbvz6dbbqz1y01vglf8cbwvkajfl0"))))
5922 (build-system ruby-build-system)
5923 (native-inputs
5924 `(("ruby-rspec" ,ruby-rspec)))
5925 (synopsis "Tool for fast querying of genome locations")
5926 (description
5927 "Bio-locus is a tabix-like tool for fast querying of genome
5928locations. Many file formats in bioinformatics contain records that
5929start with a chromosome name and a position for a SNP, or a start-end
5930position for indels. Bio-locus allows users to store this chr+pos or
5931chr+pos+alt information in a database.")
5932 (home-page "https://github.com/pjotrp/bio-locus")
5933 (license license:expat)))
edb15985 5934
b2bddb07
PP
5935(define-public bio-blastxmlparser
5936 (package
5937 (name "bio-blastxmlparser")
5938 (version "2.0.4")
5939 (source (origin
5940 (method url-fetch)
5941 (uri (rubygems-uri "bio-blastxmlparser" version))
5942 (sha256
5943 (base32
5944 "1wf4qygcmdjgcqm6flmvsagfr1gs9lf63mj32qv3z1f481zc5692"))))
5945 (build-system ruby-build-system)
5946 (propagated-inputs
5947 `(("ruby-bio-logger" ,ruby-bio-logger)
5948 ("ruby-nokogiri" ,ruby-nokogiri)))
5949 (inputs
5950 `(("ruby-rspec" ,ruby-rspec)))
5951 (synopsis "Fast big data BLAST XML parser and library")
5952 (description
5953 "Very fast parallel big-data BLAST XML file parser which can be used as
5954command line utility. Use blastxmlparser to: Parse BLAST XML; filter output;
5955generate FASTA, JSON, YAML, RDF, JSON-LD, HTML, CSV, tabular output etc.")
7bf837fd 5956 (home-page "https://github.com/pjotrp/blastxmlparser")
b2bddb07
PP
5957 (license license:expat)))
5958
edb15985
PP
5959(define-public bioruby
5960 (package
5961 (name "bioruby")
dbf9d371 5962 (version "1.5.1")
edb15985
PP
5963 (source
5964 (origin
5965 (method url-fetch)
5966 (uri (rubygems-uri "bio" version))
5967 (sha256
5968 (base32
dbf9d371 5969 "0hdl0789c9n4mprnx5pgd46bfwl8d000rqpamj5h6kkjgspijv49"))))
edb15985
PP
5970 (build-system ruby-build-system)
5971 (propagated-inputs
5972 `(("ruby-libxml" ,ruby-libxml)))
5973 (native-inputs
5974 `(("which" ,which))) ; required for test phase
5975 (arguments
5976 `(#:phases
5977 (modify-phases %standard-phases
5978 (add-before 'build 'patch-test-command
5979 (lambda _
5980 (substitute* '("test/functional/bio/test_command.rb")
5981 (("/bin/sh") (which "sh")))
5982 (substitute* '("test/functional/bio/test_command.rb")
5983 (("/bin/ls") (which "ls")))
5984 (substitute* '("test/functional/bio/test_command.rb")
5985 (("which") (which "which")))
5986 (substitute* '("test/functional/bio/test_command.rb",
5987 "test/data/command/echoarg2.sh")
5988 (("/bin/echo") (which "echo")))
5989 #t)))))
5990 (synopsis "Ruby library, shell and utilities for bioinformatics")
5991 (description "BioRuby comes with a comprehensive set of Ruby development
5992tools and libraries for bioinformatics and molecular biology. BioRuby has
5993components for sequence analysis, pathway analysis, protein modelling and
5994phylogenetic analysis; it supports many widely used data formats and provides
5995easy access to databases, external programs and public web services, including
5996BLAST, KEGG, GenBank, MEDLINE and GO.")
5997 (home-page "http://bioruby.org/")
5998 ;; Code is released under Ruby license, except for setup
5999 ;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+)
6000 (license (list license:ruby license:lgpl2.1+ license:gpl2+ ))))
a5002ae7 6001
9fba89e8
RW
6002(define-public r-acsnminer
6003 (package
6004 (name "r-acsnminer")
0b54b4c9 6005 (version "0.16.8.25")
9fba89e8
RW
6006 (source (origin
6007 (method url-fetch)
6008 (uri (cran-uri "ACSNMineR" version))
6009 (sha256
6010 (base32
0b54b4c9 6011 "0gh604s8qall6zfjlwcg2ilxjvz08dplf9k5g47idhv43scm748l"))))
9fba89e8
RW
6012 (properties `((upstream-name . "ACSNMineR")))
6013 (build-system r-build-system)
6014 (propagated-inputs
6015 `(("r-ggplot2" ,r-ggplot2)
6016 ("r-gridextra" ,r-gridextra)))
6017 (home-page "http://cran.r-project.org/web/packages/ACSNMineR")
6018 (synopsis "Gene enrichment analysis")
6019 (description
6020 "This package provides tools to compute and represent gene set enrichment
6021or depletion from your data based on pre-saved maps from the @dfn{Atlas of
6022Cancer Signalling Networks} (ACSN) or user imported maps. The gene set
6023enrichment can be run with hypergeometric test or Fisher exact test, and can
6024use multiple corrections. Visualization of data can be done either by
6025barplots or heatmaps.")
6026 (license license:gpl2+)))
6027
d29b25c4
RW
6028(define-public r-biocgenerics
6029 (package
6030 (name "r-biocgenerics")
28a5b3f2 6031 (version "0.22.0")
d29b25c4
RW
6032 (source (origin
6033 (method url-fetch)
6034 (uri (bioconductor-uri "BiocGenerics" version))
6035 (sha256
6036 (base32
28a5b3f2 6037 "0qbmz2qxwwi30xpxpvp2h1h7l494rbbz5d9pls5cfhqdv3wnpzsv"))))
d29b25c4 6038 (properties
1d216b6e 6039 `((upstream-name . "BiocGenerics")))
d29b25c4
RW
6040 (build-system r-build-system)
6041 (home-page "http://bioconductor.org/packages/BiocGenerics")
6042 (synopsis "S4 generic functions for Bioconductor")
6043 (description
6044 "This package provides S4 generic functions needed by many Bioconductor
6045packages.")
6046 (license license:artistic2.0)))
6047
eb24341f
RJ
6048(define-public r-biocinstaller
6049 (package
6050 (name "r-biocinstaller")
3bef1528 6051 (version "1.26.0")
eb24341f
RJ
6052 (source (origin
6053 (method url-fetch)
6054 (uri (bioconductor-uri "BiocInstaller" version))
6055 (sha256
6056 (base32
3bef1528 6057 "0njw2q3lq1rrjx8qzw5d2130l72bmd3g2z8qlxqmkdcbmmgliyj2"))))
eb24341f
RJ
6058 (properties
6059 `((upstream-name . "BiocInstaller")))
6060 (build-system r-build-system)
6061 (home-page "http://bioconductor.org/packages/BiocInstaller")
6062 (synopsis "Install Bioconductor packages")
6063 (description "This package is used to install and update R packages from
6064Bioconductor, CRAN, and Github.")
6065 (license license:artistic2.0)))
6066
207ce8fb
RJ
6067(define-public r-biocviews
6068 (package
6069 (name "r-biocviews")
68eac349 6070 (version "1.44.0")
207ce8fb
RJ
6071 (source (origin
6072 (method url-fetch)
6073 (uri (bioconductor-uri "biocViews" version))
6074 (sha256
6075 (base32
68eac349 6076 "17hi8w0w63f5yc43kid5pbld3ca78sj6n8x9dmkbl8h48818xbga"))))
207ce8fb
RJ
6077 (properties
6078 `((upstream-name . "biocViews")))
6079 (build-system r-build-system)
6080 (propagated-inputs
6081 `(("r-biobase" ,r-biobase)
6082 ("r-graph" ,r-graph)
6083 ("r-rbgl" ,r-rbgl)
6084 ("r-rcurl" ,r-rcurl)
6085 ("r-xml" ,r-xml)
6086 ("r-knitr" ,r-knitr)
6087 ("r-runit" ,r-runit)))
6088 (home-page "http://bioconductor.org/packages/biocViews")
6089 (synopsis "Bioconductor package categorization helper")
6090 (description "The purpose of biocViews is to create HTML pages that
6091categorize packages in a Bioconductor package repository according to keywords,
6092also known as views, in a controlled vocabulary.")
6093 (license license:artistic2.0)))
6094
2abfc5b8
RJ
6095(define-public r-bookdown
6096 (package
6097 (name "r-bookdown")
6098 (version "0.3")
6099 (source (origin
6100 (method url-fetch)
6101 (uri (cran-uri "bookdown" version))
6102 (sha256
6103 (base32
6104 "0r9bchzg7im6psc3jphvshzbidc5bv5xaih1qg7b5518jy4iyvb9"))))
6105 (build-system r-build-system)
6106 (propagated-inputs
6107 `(("r-htmltools" ,r-htmltools)
6108 ("r-knitr" ,r-knitr)
6109 ("r-rmarkdown" ,r-rmarkdown)
6110 ("r-yaml" ,r-yaml)))
6111 (home-page "https://github.com/rstudio/bookdown")
6112 (synopsis "Authoring books and technical documents with R markdown")
6113 (description "This package provides output formats and utilities for
6114authoring books and technical documents with R Markdown.")
6115 (license license:gpl3)))
6116
99df12cd
RJ
6117(define-public r-biocstyle
6118 (package
6119 (name "r-biocstyle")
3bef24c9 6120 (version "2.4.0")
99df12cd
RJ
6121 (source (origin
6122 (method url-fetch)
6123 (uri (bioconductor-uri "BiocStyle" version))
6124 (sha256
6125 (base32
3bef24c9 6126 "1n2c8rj920wmk3q2khmjfnhn5i4b3lmhx1whnghk0zk3jf88hvbi"))))
99df12cd
RJ
6127 (properties
6128 `((upstream-name . "BiocStyle")))
6129 (build-system r-build-system)
3bef24c9
RJ
6130 (propagated-inputs
6131 `(("r-bookdown" ,r-bookdown)
6132 ("r-knitr" ,r-knitr)
6133 ("r-rmarkdown" ,r-rmarkdown)
6134 ("r-yaml" ,r-yaml)))
99df12cd
RJ
6135 (home-page "http://bioconductor.org/packages/BiocStyle")
6136 (synopsis "Bioconductor formatting styles")
6137 (description "This package provides standard formatting styles for
6138Bioconductor PDF and HTML documents. Package vignettes illustrate use and
6139functionality.")
6140 (license license:artistic2.0)))
6141
4644644a
RJ
6142(define-public r-bioccheck
6143 (package
6144 (name "r-bioccheck")
2e11beec 6145 (version "1.12.0")
4644644a
RJ
6146 (source (origin
6147 (method url-fetch)
6148 (uri (bioconductor-uri "BiocCheck" version))
6149 (sha256
6150 (base32
2e11beec 6151 "01zkw5hggzvn0wj4msac71k1mknq4h2inn1c2hwqgw4cy1675wl0"))))
4644644a
RJ
6152 (properties
6153 `((upstream-name . "BiocCheck")))
6154 (build-system r-build-system)
6155 (arguments
6156 '(#:phases
6157 (modify-phases %standard-phases
6158 ;; This package can be used by calling BiocCheck(<package>) from
6159 ;; within R, or by running R CMD BiocCheck <package>. This phase
6160 ;; makes sure the latter works. For this to work, the BiocCheck
6161 ;; script must be somewhere on the PATH (not the R bin directory).
6162 (add-after 'install 'install-bioccheck-subcommand
6163 (lambda* (#:key outputs #:allow-other-keys)
6164 (let* ((out (assoc-ref outputs "out"))
6165 (dest-dir (string-append out "/bin"))
6166 (script-dir
6167 (string-append out "/site-library/BiocCheck/script/")))
6168 (mkdir-p dest-dir)
6169 (symlink (string-append script-dir "/checkBadDeps.R")
6170 (string-append dest-dir "/checkBadDeps.R"))
6171 (symlink (string-append script-dir "/BiocCheck")
6172 (string-append dest-dir "/BiocCheck")))
6173 #t)))))
6174 (native-inputs
6175 `(("which" ,which)))
6176 (propagated-inputs
aeb64f3c
RW
6177 `(("r-codetools" ,r-codetools)
6178 ("r-graph" ,r-graph)
4644644a
RJ
6179 ("r-httr" ,r-httr)
6180 ("r-optparse" ,r-optparse)
4644644a
RJ
6181 ("r-biocinstaller" ,r-biocinstaller)
6182 ("r-biocviews" ,r-biocviews)))
6183 (home-page "http://bioconductor.org/packages/BiocCheck")
6184 (synopsis "Executes Bioconductor-specific package checks")
6185 (description "This package contains tools to perform additional quality
6186checks on R packages that are to be submitted to the Bioconductor repository.")
6187 (license license:artistic2.0)))
6188
2acaaee5
RJ
6189(define-public r-getopt
6190 (package
6191 (name "r-getopt")
6192 (version "1.20.0")
6193 (source
6194 (origin
6195 (method url-fetch)
6196 (uri (cran-uri "getopt" version))
6197 (sha256
6198 (base32
6199 "00f57vgnzmg7cz80rjmjz1556xqcmx8nhrlbbhaq4w7gl2ibl87r"))))
6200 (build-system r-build-system)
6201 (home-page "https://github.com/trevorld/getopt")
6202 (synopsis "Command-line option processor for R")
6203 (description
6204 "This package is designed to be used with Rscript to write shebang
6205scripts that accept short and long options. Many users will prefer to
6206use the packages @code{optparse} or @code{argparse} which add extra
6207features like automatically generated help options and usage texts,
6208support for default values, positional argument support, etc.")
6209 (license license:gpl2+)))
6210
c79ad57a
RJ
6211(define-public r-optparse
6212 (package
6213 (name "r-optparse")
6214 (version "1.3.2")
6215 (source
6216 (origin
6217 (method url-fetch)
6218 (uri (cran-uri "optparse" version))
6219 (sha256
6220 (base32
6221 "1g8as89r91xxi5j5azsd6vrfrhg84mnfx2683j7pacdp8s33radw"))))
6222 (build-system r-build-system)
6223 (propagated-inputs
6224 `(("r-getopt" ,r-getopt)))
6225 (home-page
6226 "https://github.com/trevorld/optparse")
6227 (synopsis "Command line option parser")
6228 (description
6229 "This package provides a command line parser inspired by Python's
6230@code{optparse} library to be used with Rscript to write shebang scripts
6231that accept short and long options.")
6232 (license license:gpl2+)))
6233
247d498a
RJ
6234(define-public r-dnacopy
6235 (package
6236 (name "r-dnacopy")
9b02d49c 6237 (version "1.50.0")
247d498a
RJ
6238 (source (origin
6239 (method url-fetch)
6240 (uri (bioconductor-uri "DNAcopy" version))
6241 (sha256
6242 (base32
9b02d49c 6243 "0112ry62z18m7rdyrn3gvbxq2f6m44cawhcfb1f02z9xzlsj0k28"))))
247d498a
RJ
6244 (properties
6245 `((upstream-name . "DNAcopy")))
6246 (build-system r-build-system)
6247 (inputs
6248 `(("gfortran" ,gfortran)))
5697fdc3 6249 (home-page "https://bioconductor.org/packages/DNAcopy")
247d498a
RJ
6250 (synopsis "Implementation of a circular binary segmentation algorithm")
6251 (description "This package implements the circular binary segmentation (CBS)
6252algorithm to segment DNA copy number data and identify genomic regions with
6253abnormal copy number.")
6254 (license license:gpl2+)))
6255
7485129e
RW
6256(define-public r-s4vectors
6257 (package
6258 (name "r-s4vectors")
f1f38018 6259 (version "0.14.0")
7485129e
RW
6260 (source (origin
6261 (method url-fetch)
6262 (uri (bioconductor-uri "S4Vectors" version))
6263 (sha256
6264 (base32
f1f38018 6265 "0ywwrs4d752xfk0p0w122kvi0xvp6nmxnyynchbsa8zciqymhgv8"))))
7485129e 6266 (properties
1d216b6e 6267 `((upstream-name . "S4Vectors")))
7485129e
RW
6268 (build-system r-build-system)
6269 (propagated-inputs
6270 `(("r-biocgenerics" ,r-biocgenerics)))
6271 (home-page "http://bioconductor.org/packages/S4Vectors")
6272 (synopsis "S4 implementation of vectors and lists")
6273 (description
6274 "The S4Vectors package defines the @code{Vector} and @code{List} virtual
6275classes and a set of generic functions that extend the semantic of ordinary
6276vectors and lists in R. Package developers can easily implement vector-like
6277or list-like objects as concrete subclasses of @code{Vector} or @code{List}.
6278In addition, a few low-level concrete subclasses of general interest (e.g.
6279@code{DataFrame}, @code{Rle}, and @code{Hits}) are implemented in the
6280S4Vectors package itself.")
6281 (license license:artistic2.0)))
6282
274da826
RW
6283(define-public r-seqinr
6284 (package
6285 (name "r-seqinr")
e18c731b 6286 (version "3.3-6")
274da826
RW
6287 (source
6288 (origin
6289 (method url-fetch)
6290 (uri (cran-uri "seqinr" version))
6291 (sha256
6292 (base32
e18c731b 6293 "13d0qxm2244wgdl2dy2s8vnrnf5fx4n47if9gkb49dqx6c0sx8s2"))))
274da826
RW
6294 (build-system r-build-system)
6295 (propagated-inputs
3b851cd4
RW
6296 `(("r-ade4" ,r-ade4)
6297 ("r-segmented" ,r-segmented)))
274da826
RW
6298 (inputs
6299 `(("zlib" ,zlib)))
6300 (home-page "http://seqinr.r-forge.r-project.org/")
6301 (synopsis "Biological sequences retrieval and analysis")
6302 (description
6303 "This package provides tools for exploratory data analysis and data
6304visualization of biological sequence (DNA and protein) data. It also includes
6305utilities for sequence data management under the ACNUC system.")
6306 (license license:gpl2+)))
6307
78addcb0
RW
6308(define-public r-iranges
6309 (package
6310 (name "r-iranges")
7d9ab5d9 6311 (version "2.10.0")
78addcb0
RW
6312 (source (origin
6313 (method url-fetch)
6314 (uri (bioconductor-uri "IRanges" version))
6315 (sha256
6316 (base32
7d9ab5d9 6317 "0zp4mxm9h1p4krj7m7cinkvwa2ibqkq59jwpan97yvhb4z8q0d6n"))))
78addcb0 6318 (properties
1d216b6e 6319 `((upstream-name . "IRanges")))
78addcb0
RW
6320 (build-system r-build-system)
6321 (propagated-inputs
6322 `(("r-biocgenerics" ,r-biocgenerics)
6323 ("r-s4vectors" ,r-s4vectors)))
6324 (home-page "http://bioconductor.org/packages/IRanges")
6325 (synopsis "Infrastructure for manipulating intervals on sequences")
6326 (description
6327 "This package provides efficient low-level and highly reusable S4 classes
6328for storing ranges of integers, RLE vectors (Run-Length Encoding), and, more
6329generally, data that can be organized sequentially (formally defined as
6330@code{Vector} objects), as well as views on these @code{Vector} objects.
6331Efficient list-like classes are also provided for storing big collections of
6332instances of the basic classes. All classes in the package use consistent
6333naming and share the same rich and consistent \"Vector API\" as much as
6334possible.")
6335 (license license:artistic2.0)))
6336
ffef27f3
RJ
6337(define-public r-genomeinfodbdata
6338 (package
6339 (name "r-genomeinfodbdata")
6340 (version "0.99.0")
6341 (source (origin
6342 (method url-fetch)
6343 (uri (bioconductor-uri "GenomeInfoDbData" version))
6344 (sha256
6345 (base32
6346 "120qvhb0pvkzd65lsgja62vyrgc37si6fh68q4cg4w5x9f04jw25"))))
6347 (properties
6348 `((upstream-name . "GenomeInfoDbData")))
6349 (build-system r-build-system)
6350 (home-page "http://bioconductor.org/packages/GenomeInfoDbData")
6351 (synopsis "Species and taxonomy ID look up tables for GenomeInfoDb")
6352 (description "This package contains data for mapping between NCBI taxonomy
6353ID and species. It is used by functions in the GenomeInfoDb package.")
6354 (license license:artistic2.0)))
6355
bf7764b7
RW
6356(define-public r-genomeinfodb
6357 (package
6358 (name "r-genomeinfodb")
38b99ccc 6359 (version "1.12.0")
bf7764b7
RW
6360 (source (origin
6361 (method url-fetch)
6362 (uri (bioconductor-uri "GenomeInfoDb" version))
6363 (sha256
6364 (base32
38b99ccc 6365 "1bwwhscjl376a5p43mx8ijrqajxmgypbqhv049pgagl22hkkf0y3"))))
bf7764b7 6366 (properties
1d216b6e 6367 `((upstream-name . "GenomeInfoDb")))
bf7764b7
RW
6368 (build-system r-build-system)
6369 (propagated-inputs
6370 `(("r-biocgenerics" ,r-biocgenerics)
38b99ccc 6371 ("r-genomeinfodbdata" ,r-genomeinfodbdata)
bf7764b7 6372 ("r-iranges" ,r-iranges)
4cd07e48 6373 ("r-rcurl" ,r-rcurl)
bf7764b7
RW
6374 ("r-s4vectors" ,r-s4vectors)))
6375 (home-page "http://bioconductor.org/packages/GenomeInfoDb")
6376 (synopsis "Utilities for manipulating chromosome identifiers")
6377 (description
6378 "This package contains data and functions that define and allow
6379translation between different chromosome sequence naming conventions (e.g.,
6380\"chr1\" versus \"1\"), including a function that attempts to place sequence
6381names in their natural, rather than lexicographic, order.")
6382 (license license:artistic2.0)))
6383
744004a3
RJ
6384(define-public r-edger
6385 (package
6386 (name "r-edger")
00ffd55f 6387 (version "3.18.0")
744004a3
RJ
6388 (source (origin
6389 (method url-fetch)
6390 (uri (bioconductor-uri "edgeR" version))
6391 (sha256
6392 (base32
00ffd55f 6393 "02l17i6xh33dv10swwvyfxrmv5kp23iv278iwvkfq2mnipasfnb9"))))
744004a3
RJ
6394 (properties `((upstream-name . "edgeR")))
6395 (build-system r-build-system)
6396 (propagated-inputs
5e48005f 6397 `(("r-limma" ,r-limma)
47055b27
RW
6398 ("r-locfit" ,r-locfit)
6399 ("r-statmod" ,r-statmod))) ;for estimateDisp
744004a3
RJ
6400 (home-page "http://bioinf.wehi.edu.au/edgeR")
6401 (synopsis "EdgeR does empirical analysis of digital gene expression data")
6402 (description "This package can do differential expression analysis of
6403RNA-seq expression profiles with biological replication. It implements a range
6404of statistical methodology based on the negative binomial distributions,
6405including empirical Bayes estimation, exact tests, generalized linear models
6406and quasi-likelihood tests. It be applied to differential signal analysis of
6407other types of genomic data that produce counts, including ChIP-seq, SAGE and
6408CAGE.")
6409 (license license:gpl2+)))
6410
b669d9c4
RJ
6411(define-public r-variantannotation
6412 (package
6413 (name "r-variantannotation")
d2d7af98 6414 (version "1.22.0")
b669d9c4
RJ
6415 (source (origin
6416 (method url-fetch)
6417 (uri (bioconductor-uri "VariantAnnotation" version))
6418 (sha256
6419 (base32
d2d7af98 6420 "05hpm4as36kvpiqhgnkfjwfx0a05p304c21ggba29iac4nanm8b3"))))
b669d9c4
RJ
6421 (properties
6422 `((upstream-name . "VariantAnnotation")))
6423 (inputs
6424 `(("zlib" ,zlib)))
6425 (propagated-inputs
6426 `(("r-annotationdbi" ,r-annotationdbi)
37d96f1d 6427 ("r-biobase" ,r-biobase)
b669d9c4 6428 ("r-biocgenerics" ,r-biocgenerics)
37d96f1d 6429 ("r-biostrings" ,r-biostrings)
b669d9c4
RJ
6430 ("r-bsgenome" ,r-bsgenome)
6431 ("r-dbi" ,r-dbi)
6432 ("r-genomeinfodb" ,r-genomeinfodb)
6433 ("r-genomicfeatures" ,r-genomicfeatures)
6434 ("r-genomicranges" ,r-genomicranges)
37d96f1d 6435 ("r-iranges" ,r-iranges)
b669d9c4
RJ
6436 ("r-summarizedexperiment" ,r-summarizedexperiment)
6437 ("r-rsamtools" ,r-rsamtools)
37d96f1d
RW
6438 ("r-rtracklayer" ,r-rtracklayer)
6439 ("r-s4vectors" ,r-s4vectors)
6440 ("r-xvector" ,r-xvector)
b669d9c4
RJ
6441 ("r-zlibbioc" ,r-zlibbioc)))
6442 (build-system r-build-system)
6443 (home-page "https://bioconductor.org/packages/VariantAnnotation")
6444 (synopsis "Package for annotation of genetic variants")
6445 (description "This R package can annotate variants, compute amino acid
6446coding changes and predict coding outcomes.")
6447 (license license:artistic2.0)))
6448
7d4224d7
RJ
6449(define-public r-limma
6450 (package
6451 (name "r-limma")
abf5a14e 6452 (version "3.32.0")
7d4224d7
RJ
6453 (source (origin
6454 (method url-fetch)
6455 (uri (bioconductor-uri "limma" version))
6456 (sha256
6457 (base32
abf5a14e 6458 "0q7rqm86nwq0rg4fjggfr7xqybjrxj425vni3cva70b4c8d1h425"))))
7d4224d7
RJ
6459 (build-system r-build-system)
6460 (home-page "http://bioinf.wehi.edu.au/limma")
6461 (synopsis "Package for linear models for microarray and RNA-seq data")
6462 (description "This package can be used for the analysis of gene expression
6463studies, especially the use of linear models for analysing designed experiments
6464and the assessment of differential expression. The analysis methods apply to
6465different technologies, including microarrays, RNA-seq, and quantitative PCR.")
6466 (license license:gpl2+)))
6467
0e7d5560
RW
6468(define-public r-xvector
6469 (package
6470 (name "r-xvector")
17a02f51 6471 (version "0.16.0")
0e7d5560
RW
6472 (source (origin
6473 (method url-fetch)
6474 (uri (bioconductor-uri "XVector" version))
6475 (sha256
6476 (base32
17a02f51 6477 "01n09f4jdm60684lzikp02zf9gjan8bdrjx740vggr21q9fa69wn"))))
0e7d5560 6478 (properties
1d216b6e 6479 `((upstream-name . "XVector")))
0e7d5560
RW
6480 (build-system r-build-system)
6481 (arguments
6482 `(#:phases
6483 (modify-phases %standard-phases
6484 (add-after 'unpack 'use-system-zlib
6485 (lambda _
6486 (substitute* "DESCRIPTION"
6487 (("zlibbioc, ") ""))
6488 (substitute* "NAMESPACE"
6489 (("import\\(zlibbioc\\)") ""))
6490 #t)))))
6491 (inputs
6492 `(("zlib" ,zlib)))
6493 (propagated-inputs
6494 `(("r-biocgenerics" ,r-biocgenerics)
6495 ("r-iranges" ,r-iranges)
6496 ("r-s4vectors" ,r-s4vectors)))
6497 (home-page "http://bioconductor.org/packages/XVector")
6498 (synopsis "Representation and manpulation of external sequences")
6499 (description
6500 "This package provides memory efficient S4 classes for storing sequences
6501\"externally\" (behind an R external pointer, or on disk).")
6502 (license license:artistic2.0)))
6503
e2cd1d0f
RW
6504(define-public r-genomicranges
6505 (package
6506 (name "r-genomicranges")
1859712b 6507 (version "1.28.0")
e2cd1d0f
RW
6508 (source (origin
6509 (method url-fetch)
6510 (uri (bioconductor-uri "GenomicRanges" version))
6511 (sha256
6512 (base32
1859712b 6513 "10x9zx0b7j05d1j6p0xs4q4f4wzbhf3rq64wzi9cgv7f44q43a5n"))))
e2cd1d0f 6514 (properties
1d216b6e 6515 `((upstream-name . "GenomicRanges")))
e2cd1d0f
RW
6516 (build-system r-build-system)
6517 (propagated-inputs
6518 `(("r-biocgenerics" ,r-biocgenerics)
6519 ("r-genomeinfodb" ,r-genomeinfodb)
92a740af
RW
6520 ("r-iranges" ,r-iranges)
6521 ("r-s4vectors" ,r-s4vectors)
e2cd1d0f
RW
6522 ("r-xvector" ,r-xvector)))
6523 (home-page "http://bioconductor.org/packages/GenomicRanges")
6524 (synopsis "Representation and manipulation of genomic intervals")
6525 (description
6526 "This package provides tools to efficiently represent and manipulate
6527genomic annotations and alignments is playing a central role when it comes to
6528analyzing high-throughput sequencing data (a.k.a. NGS data). The
6529GenomicRanges package defines general purpose containers for storing and
6530manipulating genomic intervals and variables defined along a genome.")
6531 (license license:artistic2.0)))
6532
555e3399
RW
6533(define-public r-biobase
6534 (package
6535 (name "r-biobase")
b2fff403 6536 (version "2.36.0")
555e3399
RW
6537 (source (origin
6538 (method url-fetch)
6539 (uri (bioconductor-uri "Biobase" version))
6540 (sha256
6541 (base32
b2fff403 6542 "0x7pf5xsdcj12dbf5qqki2c6bd5madqg2fbiq5xgisarpc9v6c1m"))))
555e3399
RW
6543 (properties
6544 `((upstream-name . "Biobase")))
6545 (build-system r-build-system)
6546 (propagated-inputs
6547 `(("r-biocgenerics" ,r-biocgenerics)))
6548 (home-page "http://bioconductor.org/packages/Biobase")
6549 (synopsis "Base functions for Bioconductor")
6550 (description
6551 "This package provides functions that are needed by many other packages
6552on Bioconductor or which replace R functions.")
6553 (license license:artistic2.0)))
6554
8b7bce74
RW
6555(define-public r-annotationdbi
6556 (package
6557 (name "r-annotationdbi")
aafaab22 6558 (version "1.38.0")
8b7bce74
RW
6559 (source (origin
6560 (method url-fetch)
6561 (uri (bioconductor-uri "AnnotationDbi" version))
6562 (sha256
6563 (base32
aafaab22 6564 "1xffm98s817mfc827cnr0by6167nlrl1glxzjawzz0rkghs41g27"))))
8b7bce74
RW
6565 (properties
6566 `((upstream-name . "AnnotationDbi")))
6567 (build-system r-build-system)
6568 (propagated-inputs
6569 `(("r-biobase" ,r-biobase)
6570 ("r-biocgenerics" ,r-biocgenerics)
6571 ("r-dbi" ,r-dbi)
6572 ("r-iranges" ,r-iranges)
6573 ("r-rsqlite" ,r-rsqlite)
6574 ("r-s4vectors" ,r-s4vectors)))
6575 (home-page "http://bioconductor.org/packages/AnnotationDbi")
6576 (synopsis "Annotation database interface")
6577 (description
6578 "This package provides user interface and database connection code for
6579annotation data packages using SQLite data storage.")
6580 (license license:artistic2.0)))
6581
c465fa72
RW
6582(define-public r-biomart
6583 (package
6584 (name "r-biomart")
78e7a589 6585 (version "2.32.0")
c465fa72
RW
6586 (source (origin
6587 (method url-fetch)
6588 (uri (bioconductor-uri "biomaRt" version))
6589 (sha256
6590 (base32
78e7a589 6591 "0knkxh23vl9pa0by03xr6dy9aiah714cmf54jl828k51l9wv5l2j"))))
c465fa72
RW
6592 (properties
6593 `((upstream-name . "biomaRt")))
6594 (build-system r-build-system)
6595 (propagated-inputs
6596 `(("r-annotationdbi" ,r-annotationdbi)
6597 ("r-rcurl" ,r-rcurl)
6598 ("r-xml" ,r-xml)))
6599 (home-page "http://bioconductor.org/packages/biomaRt")
6600 (synopsis "Interface to BioMart databases")
6601 (description
6602 "biomaRt provides an interface to a growing collection of databases
6603implementing the @url{BioMart software suite, http://www.biomart.org}. The
6604package enables retrieval of large amounts of data in a uniform way without
6605the need to know the underlying database schemas or write complex SQL queries.
6606Examples of BioMart databases are Ensembl, COSMIC, Uniprot, HGNC, Gramene,
6607Wormbase and dbSNP mapped to Ensembl. These major databases give biomaRt
6608users direct access to a diverse set of data and enable a wide range of
6609powerful online queries from gene annotation to database mining.")
6610 (license license:artistic2.0)))
6611
e91d362e
RW
6612(define-public r-biocparallel
6613 (package
6614 (name "r-biocparallel")
502da6ff 6615 (version "1.10.0")
e91d362e
RW
6616 (source (origin
6617 (method url-fetch)
6618 (uri (bioconductor-uri "BiocParallel" version))
6619 (sha256
6620 (base32
502da6ff 6621 "01ph0kq70b5gkd7n6a4myjlvwzgc0hi4xfwz8h17h06n9p5sdwa9"))))
e91d362e
RW
6622 (properties
6623 `((upstream-name . "BiocParallel")))
6624 (build-system r-build-system)
6625 (propagated-inputs
6626 `(("r-futile-logger" ,r-futile-logger)
6627 ("r-snow" ,r-snow)))
6628 (home-page "http://bioconductor.org/packages/BiocParallel")
6629 (synopsis "Bioconductor facilities for parallel evaluation")
6630 (description
6631 "This package provides modified versions and novel implementation of
6632functions for parallel evaluation, tailored to use with Bioconductor
6633objects.")
6634 (license (list license:gpl2+ license:gpl3+))))
6635
bf159353
RW
6636(define-public r-biostrings
6637 (package
6638 (name "r-biostrings")
3ed41af6 6639 (version "2.44.0")
bf159353
RW
6640 (source (origin
6641 (method url-fetch)
6642 (uri (bioconductor-uri "Biostrings" version))
6643 (sha256
6644 (base32
3ed41af6 6645 "0ixgx12cx2z4n2khxq83crz9gc21qckj2v78y2p31567kfsw7clg"))))
bf159353
RW
6646 (properties
6647 `((upstream-name . "Biostrings")))
6648 (build-system r-build-system)
6649 (propagated-inputs
6650 `(("r-biocgenerics" ,r-biocgenerics)
6651 ("r-iranges" ,r-iranges)
6652 ("r-s4vectors" ,r-s4vectors)
6653 ("r-xvector" ,r-xvector)))
6654 (home-page "http://bioconductor.org/packages/Biostrings")
6655 (synopsis "String objects and algorithms for biological sequences")
6656 (description
6657 "This package provides memory efficient string containers, string
6658matching algorithms, and other utilities, for fast manipulation of large
6659biological sequences or sets of sequences.")
6660 (license license:artistic2.0)))
6661
f8d74f70
RW
6662(define-public r-rsamtools
6663 (package
6664 (name "r-rsamtools")
6bf5543b 6665 (version "1.28.0")
f8d74f70
RW
6666 (source (origin
6667 (method url-fetch)
6668 (uri (bioconductor-uri "Rsamtools" version))
6669 (sha256
6670 (base32
6bf5543b 6671 "1zx6vhdz3lksbfy98jj3qzl7cxgspigm2pbsd5835b12r6mc5w6d"))))
f8d74f70
RW
6672 (properties
6673 `((upstream-name . "Rsamtools")))
6674 (build-system r-build-system)
6675 (arguments
6676 `(#:phases
6677 (modify-phases %standard-phases
6678 (add-after 'unpack 'use-system-zlib
6679 (lambda _
6680 (substitute* "DESCRIPTION"
6681 (("zlibbioc, ") ""))
6682 (substitute* "NAMESPACE"
6683 (("import\\(zlibbioc\\)") ""))
6684 #t)))))
6685 (inputs
6686 `(("zlib" ,zlib)))
6687 (propagated-inputs
6688 `(("r-biocgenerics" ,r-biocgenerics)
6689 ("r-biocparallel" ,r-biocparallel)
6690 ("r-biostrings" ,r-biostrings)
6691 ("r-bitops" ,r-bitops)
6692 ("r-genomeinfodb" ,r-genomeinfodb)
6693 ("r-genomicranges" ,r-genomicranges)
6694 ("r-iranges" ,r-iranges)
6695 ("r-s4vectors" ,r-s4vectors)
6696 ("r-xvector" ,r-xvector)))
6697 (home-page "http://bioconductor.org/packages/release/bioc/html/Rsamtools.html")
6698 (synopsis "Interface to samtools, bcftools, and tabix")
6699 (description
6700 "This package provides an interface to the 'samtools', 'bcftools', and
6701'tabix' utilities for manipulating SAM (Sequence Alignment / Map), FASTA,
6702binary variant call (BCF) and compressed indexed tab-delimited (tabix)
6703files.")
6704 (license license:expat)))
6705
71e34e6b
RJ
6706(define-public r-delayedarray
6707 (package
6708 (name "r-delayedarray")
6709 (version "0.2.0")
6710 (source (origin
6711 (method url-fetch)
6712 (uri (bioconductor-uri "DelayedArray" version))
6713 (sha256
6714 (base32
6715 "0pcsk0f2dg2ldzprs1cccqrk53jrysmm6ccgjj5wh6z3x17g7g2r"))))
6716 (properties
6717 `((upstream-name . "DelayedArray")))
6718 (build-system r-build-system)
6719 (propagated-inputs
6720 `(("r-biocgenerics" ,r-biocgenerics)
6721 ("r-s4vectors" ,r-s4vectors)
6722 ("r-iranges" ,r-iranges)
6723 ("r-matrixstats" ,r-matrixstats)))
6724 (home-page "http://bioconductor.org/packages/DelayedArray")
6725 (synopsis "Delayed operations on array-like objects")
6726 (description
6727 "Wrapping an array-like object (typically an on-disk object) in a
6728@code{DelayedArray} object allows one to perform common array operations on it
6729without loading the object in memory. In order to reduce memory usage and
6730optimize performance, operations on the object are either delayed or executed
6731using a block processing mechanism. Note that this also works on in-memory
6732array-like objects like @code{DataFrame} objects (typically with Rle columns),
6733@code{Matrix} objects, and ordinary arrays and data frames.")
6734 (license license:artistic2.0)))
6735
6e76dda2
RW
6736(define-public r-summarizedexperiment
6737 (package
6738 (name "r-summarizedexperiment")
d006ee31 6739 (version "1.6.0")
6e76dda2
RW
6740 (source (origin
6741 (method url-fetch)
6742 (uri (bioconductor-uri "SummarizedExperiment" version))
6743 (sha256
6744 (base32
d006ee31 6745 "1szjbzzz4pyip891nji71caalxh0rhqiv7rpv6q54swlrqpfkqkw"))))
6e76dda2
RW
6746 (properties
6747 `((upstream-name . "SummarizedExperiment")))
6748 (build-system r-build-system)
6749 (propagated-inputs
6750 `(("r-biobase" ,r-biobase)
6751 ("r-biocgenerics" ,r-biocgenerics)
d006ee31 6752 ("r-delayedarray" ,r-delayedarray)
6e76dda2
RW
6753 ("r-genomeinfodb" ,r-genomeinfodb)
6754 ("r-genomicranges" ,r-genomicranges)
6755 ("r-iranges" ,r-iranges)
aeb64f3c 6756 ("r-matrix" ,r-matrix)
6e76dda2
RW
6757 ("r-s4vectors" ,r-s4vectors)))
6758 (home-page "http://bioconductor.org/packages/SummarizedExperiment")
6759 (synopsis "Container for representing genomic ranges by sample")
6760 (description
6761 "The SummarizedExperiment container contains one or more assays, each
6762represented by a matrix-like object of numeric or other mode. The rows
6763typically represent genomic ranges of interest and the columns represent
6764samples.")
6765 (license license:artistic2.0)))
6766
d8a828af
RW
6767(define-public r-genomicalignments
6768 (package
6769 (name "r-genomicalignments")
5f16d1e4 6770 (version "1.12.0")
d8a828af
RW
6771 (source (origin
6772 (method url-fetch)
6773 (uri (bioconductor-uri "GenomicAlignments" version))
6774 (sha256
6775 (base32
5f16d1e4 6776 "1aagyrdk5309a7awg42lg0bpirp91i6i2ddvpmrs38pzriwahnjy"))))
d8a828af
RW
6777 (properties
6778 `((upstream-name . "GenomicAlignments")))
6779 (build-system r-build-system)
6780 (propagated-inputs
6781 `(("r-biocgenerics" ,r-biocgenerics)
6782 ("r-biocparallel" ,r-biocparallel)
6783 ("r-biostrings" ,r-biostrings)
6784 ("r-genomeinfodb" ,r-genomeinfodb)
6785 ("r-genomicranges" ,r-genomicranges)
6786 ("r-iranges" ,r-iranges)
6787 ("r-rsamtools" ,r-rsamtools)
6788 ("r-s4vectors" ,r-s4vectors)
6789 ("r-summarizedexperiment" ,r-summarizedexperiment)))
6790 (home-page "http://bioconductor.org/packages/GenomicAlignments")
6791 (synopsis "Representation and manipulation of short genomic alignments")
6792 (description
6793 "This package provides efficient containers for storing and manipulating
6794short genomic alignments (typically obtained by aligning short reads to a
6795reference genome). This includes read counting, computing the coverage,
6796junction detection, and working with the nucleotide content of the
6797alignments.")
6798 (license license:artistic2.0)))
6799
317755ff
RW
6800(define-public r-rtracklayer
6801 (package
6802 (name "r-rtracklayer")
2ea7155b 6803 (version "1.36.0")
317755ff
RW
6804 (source (origin
6805 (method url-fetch)
6806 (uri (bioconductor-uri "rtracklayer" version))
6807 (sha256
6808 (base32
2ea7155b 6809 "0dv7p3wzmx57inznf6fb06417zcm48g7fpazyahxny7bqgzwq0ig"))))
317755ff
RW
6810 (build-system r-build-system)
6811 (arguments
6812 `(#:phases
6813 (modify-phases %standard-phases
6814 (add-after 'unpack 'use-system-zlib
6815 (lambda _
6816 (substitute* "DESCRIPTION"
6817 (("zlibbioc, ") ""))
6818 (substitute* "NAMESPACE"
6819 (("import\\(zlibbioc\\)") ""))
6820 #t)))))
6821 (inputs
6822 `(("zlib" ,zlib)))
6823 (propagated-inputs
6824 `(("r-biocgenerics" ,r-biocgenerics)
6825 ("r-biostrings" ,r-biostrings)
6826 ("r-genomeinfodb" ,r-genomeinfodb)
6827 ("r-genomicalignments" ,r-genomicalignments)
6828 ("r-genomicranges" ,r-genomicranges)
6829 ("r-iranges" ,r-iranges)
6830 ("r-rcurl" ,r-rcurl)
6831 ("r-rsamtools" ,r-rsamtools)
6832 ("r-s4vectors" ,r-s4vectors)
6833 ("r-xml" ,r-xml)
6834 ("r-xvector" ,r-xvector)))
6835 (home-page "http://bioconductor.org/packages/rtracklayer")
6836 (synopsis "R interface to genome browsers and their annotation tracks")
6837 (description
6838 "rtracklayer is an extensible framework for interacting with multiple
6839genome browsers (currently UCSC built-in) and manipulating annotation tracks
6840in various formats (currently GFF, BED, bedGraph, BED15, WIG, BigWig and 2bit
6841built-in). The user may export/import tracks to/from the supported browsers,
6842as well as query and modify the browser state, such as the current viewport.")
6843 (license license:artistic2.0)))
6844
2fd7c049
RW
6845(define-public r-genomicfeatures
6846 (package
6847 (name "r-genomicfeatures")
5d0301a3 6848 (version "1.28.0")
2fd7c049
RW
6849 (source (origin
6850 (method url-fetch)
6851 (uri (bioconductor-uri "GenomicFeatures" version))
6852 (sha256
6853 (base32
5d0301a3 6854 "1pjxlr34ygv8pvfwpyq268wpgqzphiwpij85fyhjqdwdp0a253ik"))))
2fd7c049
RW
6855 (properties
6856 `((upstream-name . "GenomicFeatures")))
6857 (build-system r-build-system)
6858 (propagated-inputs
6859 `(("r-annotationdbi" ,r-annotationdbi)
6860 ("r-biobase" ,r-biobase)
6861 ("r-biocgenerics" ,r-biocgenerics)
6862 ("r-biomart" ,r-biomart)
6863 ("r-biostrings" ,r-biostrings)
6864 ("r-dbi" ,r-dbi)
6865 ("r-genomeinfodb" ,r-genomeinfodb)
6866 ("r-genomicranges" ,r-genomicranges)
6867 ("r-iranges" ,r-iranges)
6868 ("r-rcurl" ,r-rcurl)
6869 ("r-rsqlite" ,r-rsqlite)
6870 ("r-rtracklayer" ,r-rtracklayer)
6871 ("r-s4vectors" ,r-s4vectors)
6872 ("r-xvector" ,r-xvector)))
6873 (home-page "http://bioconductor.org/packages/GenomicFeatures")
6874 (synopsis "Tools for working with transcript centric annotations")
6875 (description
6876 "This package provides a set of tools and methods for making and
6877manipulating transcript centric annotations. With these tools the user can
6878easily download the genomic locations of the transcripts, exons and cds of a
6879given organism, from either the UCSC Genome Browser or a BioMart
6880database (more sources will be supported in the future). This information is
6881then stored in a local database that keeps track of the relationship between
6882transcripts, exons, cds and genes. Flexible methods are provided for
6883extracting the desired features in a convenient format.")
6884 (license license:artistic2.0)))
6885
fb25d880
RW
6886(define-public r-go-db
6887 (package
6888 (name "r-go-db")
d1b1587c 6889 (version "3.4.0")
fb25d880
RW
6890 (source (origin
6891 (method url-fetch)
f82c8c3c
PP
6892 (uri (string-append "http://www.bioconductor.org/packages/"
6893 "release/data/annotation/src/contrib/GO.db_"
6894 version ".tar.gz"))
fb25d880
RW
6895 (sha256
6896 (base32
d1b1587c 6897 "02cj8kqi5w39jwcs8gp1dgj08sah262ppxnkz4h3qd0w191y8yyl"))))
fb25d880
RW
6898 (properties
6899 `((upstream-name . "GO.db")))
6900 (build-system r-build-system)
3141b83d
RW
6901 (propagated-inputs
6902 `(("r-annotationdbi" ,r-annotationdbi)))
fb25d880
RW
6903 (home-page "http://bioconductor.org/packages/GO.db")
6904 (synopsis "Annotation maps describing the entire Gene Ontology")
6905 (description
6906 "The purpose of this GO.db annotation package is to provide detailed
6907information about the latest version of the Gene Ontologies.")
6908 (license license:artistic2.0)))
6909
d1dbde6a
RW
6910(define-public r-graph
6911 (package
6912 (name "r-graph")
56373771 6913 (version "1.54.0")
d1dbde6a
RW
6914 (source (origin
6915 (method url-fetch)
6916 (uri (bioconductor-uri "graph" version))
6917 (sha256
6918 (base32
56373771 6919 "0hx9wslnrci7c5i1gd1zlpjmgrkdnx9gabfgmzzwfnykk1mdvvna"))))
d1dbde6a
RW
6920 (build-system r-build-system)
6921 (propagated-inputs
6922 `(("r-biocgenerics" ,r-biocgenerics)))
6923 (home-page "http://bioconductor.org/packages/graph")
6924 (synopsis "Handle graph data structures in R")
6925 (description
6926 "This package implements some simple graph handling capabilities for R.")
6927 (license license:artistic2.0)))
6928
d547ce5e
RW
6929(define-public r-topgo
6930 (package
6931 (name "r-topgo")
6e70b6d6 6932 (version "2.28.0")
d547ce5e
RW
6933 (source (origin
6934 (method url-fetch)
6935 (uri (bioconductor-uri "topGO" version))
6936 (sha256
6937 (base32
6e70b6d6 6938 "04kvxz9qsxfz0np7wlrzp4r95nykz94x7mqpyyk76f90g6m66vcc"))))
d547ce5e
RW
6939 (properties
6940 `((upstream-name . "topGO")))
6941 (build-system r-build-system)
6942 (propagated-inputs
6943 `(("r-annotationdbi" ,r-annotationdbi)
30ec4de7 6944 ("r-dbi" ,r-dbi)
d547ce5e
RW
6945 ("r-biobase" ,r-biobase)
6946 ("r-biocgenerics" ,r-biocgenerics)
6947 ("r-go-db" ,r-go-db)
6d415db2 6948 ("r-graph" ,r-graph)
aeb64f3c
RW
6949 ("r-lattice" ,r-lattice)
6950 ("r-matrixstats" ,r-matrixstats)
d547ce5e
RW
6951 ("r-sparsem" ,r-sparsem)))
6952 (home-page "http://bioconductor.org/packages/topGO")
6953 (synopsis "Enrichment analysis for gene ontology")
6954 (description
6955 "The topGO package provides tools for testing @dfn{gene ontology} (GO)
6956terms while accounting for the topology of the GO graph. Different test
6957statistics and different methods for eliminating local similarities and
6958dependencies between GO terms can be implemented and applied.")
6959 ;; Any version of the LGPL applies.
6960 (license license:lgpl2.1+)))
6961
c63cef66
RW
6962(define-public r-bsgenome
6963 (package
6964 (name "r-bsgenome")
c54a702a 6965 (version "1.44.0")
c63cef66
RW
6966 (source (origin
6967 (method url-fetch)
6968 (uri (bioconductor-uri "BSgenome" version))
6969 (sha256
6970 (base32
c54a702a 6971 "18dlknwk3xvi52hamxf9pl5bjc5806mpw98xwvx5xajn8mrxiy5h"))))
c63cef66
RW
6972 (properties
6973 `((upstream-name . "BSgenome")))
6974 (build-system r-build-system)
6975 (propagated-inputs
6976 `(("r-biocgenerics" ,r-biocgenerics)
6977 ("r-biostrings" ,r-biostrings)
6978 ("r-genomeinfodb" ,r-genomeinfodb)
6979 ("r-genomicranges" ,r-genomicranges)
6980 ("r-iranges" ,r-iranges)
6981 ("r-rsamtools" ,r-rsamtools)
6982 ("r-rtracklayer" ,r-rtracklayer)
6983 ("r-s4vectors" ,r-s4vectors)
6984 ("r-xvector" ,r-xvector)))
6985 (home-page "http://bioconductor.org/packages/BSgenome")
6986 (synopsis "Infrastructure for Biostrings-based genome data packages")
6987 (description
6988 "This package provides infrastructure shared by all Biostrings-based
6989genome data packages and support for efficient SNP representation.")
6990 (license license:artistic2.0)))
6991
aa3eeeb5
RJ
6992(define-public r-bsgenome-hsapiens-1000genomes-hs37d5
6993 (package
6994 (name "r-bsgenome-hsapiens-1000genomes-hs37d5")
6995 (version "0.99.1")
6996 (source (origin
6997 (method url-fetch)
6998 ;; We cannot use bioconductor-uri here because this tarball is
6999 ;; located under "data/annotation/" instead of "bioc/".
7000 (uri (string-append "http://www.bioconductor.org/packages/"
7001 "release/data/annotation/src/contrib/"
7002 "BSgenome.Hsapiens.1000genomes.hs37d5_"
7003 version ".tar.gz"))
7004 (sha256
7005 (base32
7006 "1cg0g5fqmsvwyw2p9hp2yy4ilk21jkbbrnpgqvb5c36ihjwvc7sr"))))
7007 (properties
7008 `((upstream-name . "BSgenome.Hsapiens.1000genomes.hs37d5")))
7009 (build-system r-build-system)
7010 ;; As this package provides little more than a very large data file it
7011 ;; doesn't make sense to build substitutes.
7012 (arguments `(#:substitutable? #f))
7013 (propagated-inputs
7014 `(("r-bsgenome" ,r-bsgenome)))
7015 (home-page
7016 "http://www.bioconductor.org/packages/BSgenome.Hsapiens.1000genomes.hs37d5/")
7017 (synopsis "Full genome sequences for Homo sapiens")
7018 (description
7019 "This package provides full genome sequences for Homo sapiens from
70201000genomes phase2 reference genome sequence (hs37d5), based on NCBI GRCh37.")
7021 (license license:artistic2.0)))
7022
c43a011d
RW
7023(define-public r-impute
7024 (package
7025 (name "r-impute")
5999359f 7026 (version "1.50.0")
c43a011d
RW
7027 (source (origin
7028 (method url-fetch)
7029 (uri (bioconductor-uri "impute" version))
7030 (sha256
7031 (base32
5999359f 7032 "0va45vfixy3np549md87h3b3rbavm54gfnmnjhpr9hf02lll6zxs"))))
c43a011d
RW
7033 (inputs
7034 `(("gfortran" ,gfortran)))
7035 (build-system r-build-system)
7036 (home-page "http://bioconductor.org/packages/impute")
7037 (synopsis "Imputation for microarray data")
7038 (description
7039 "This package provides a function to impute missing gene expression
7040microarray data, using nearest neighbor averaging.")
7041 (license license:gpl2+)))
7042
03ea5a35
RW
7043(define-public r-seqpattern
7044 (package
7045 (name "r-seqpattern")
e92dd6f5 7046 (version "1.6.0")
03ea5a35
RW
7047 (source (origin
7048 (method url-fetch)
7049 (uri (bioconductor-uri "seqPattern" version))
7050 (sha256
7051 (base32
e92dd6f5 7052 "0lsa5pz36xapi3yiv78k3z286a5md5sm5g21pgfyg8zmhmkxr7y8"))))
03ea5a35
RW
7053 (properties
7054 `((upstream-name . "seqPattern")))
7055 (build-system r-build-system)
7056 (propagated-inputs
7057 `(("r-biostrings" ,r-biostrings)
7058 ("r-genomicranges" ,r-genomicranges)
7059 ("r-iranges" ,r-iranges)
e92dd6f5 7060 ("r-kernsmooth" ,r-kernsmooth)
03ea5a35
RW
7061 ("r-plotrix" ,r-plotrix)))
7062 (home-page "http://bioconductor.org/packages/seqPattern")
7063 (synopsis "Visualising oligonucleotide patterns and motif occurrences")
7064 (description
7065 "This package provides tools to visualize oligonucleotide patterns and
7066sequence motif occurrences across a large set of sequences centred at a common
7067reference point and sorted by a user defined feature.")
7068 (license license:gpl3+)))
7069
cb933df6
RW
7070(define-public r-genomation
7071 (package
7072 (name "r-genomation")
51c3c490 7073 (version "1.6.0")
cb933df6
RW
7074 (source (origin
7075 (method url-fetch)
7076 (uri (bioconductor-uri "genomation" version))
7077 (sha256
7078 (base32
51c3c490 7079 "1m4mz7wihj8yqivwkzw68div8ybk4rjsai3ffki7xp7sh21ax03y"))))
cb933df6
RW
7080 (build-system r-build-system)
7081 (propagated-inputs
7082 `(("r-biostrings" ,r-biostrings)
7083 ("r-bsgenome" ,r-bsgenome)
7084 ("r-data-table" ,r-data-table)
7085 ("r-genomeinfodb" ,r-genomeinfodb)
7086 ("r-genomicalignments" ,r-genomicalignments)
7087 ("r-genomicranges" ,r-genomicranges)
7088 ("r-ggplot2" ,r-ggplot2)
7089 ("r-gridbase" ,r-gridbase)
7090 ("r-impute" ,r-impute)
7091 ("r-iranges" ,r-iranges)
7092 ("r-matrixstats" ,r-matrixstats)
7093 ("r-plotrix" ,r-plotrix)
7094 ("r-plyr" ,r-plyr)
51c3c490 7095 ("r-rcpp" ,r-rcpp)
cb933df6
RW
7096 ("r-readr" ,r-readr)
7097 ("r-reshape2" ,r-reshape2)
51c3c490 7098 ("r-rhtslib" ,r-rhtslib)
cb933df6
RW
7099 ("r-rsamtools" ,r-rsamtools)
7100 ("r-rtracklayer" ,r-rtracklayer)
51c3c490
RW
7101 ("r-runit" ,r-runit)
7102 ("r-s4vectors" ,r-s4vectors)
cb933df6 7103 ("r-seqpattern" ,r-seqpattern)))
51c3c490
RW
7104 (inputs
7105 `(("zlib" ,zlib)))
cb933df6
RW
7106 (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
7107 (synopsis "Summary, annotation and visualization of genomic data")
7108 (description
7109 "This package provides a package for summary and annotation of genomic
7110intervals. Users can visualize and quantify genomic intervals over
7111pre-defined functional regions, such as promoters, exons, introns, etc. The
7112genomic intervals represent regions with a defined chromosome position, which
7113may be associated with a score, such as aligned reads from HT-seq experiments,
7114TF binding sites, methylation scores, etc. The package can use any tabular
7115genomic feature data as long as it has minimal information on the locations of
7116genomic intervals. In addition, it can use BAM or BigWig files as input.")
7117 (license license:artistic2.0)))
7118
64efa307
RW
7119(define-public r-genomationdata
7120 (package
7121 (name "r-genomationdata")
59198f8f 7122 (version "1.6.0")
64efa307
RW
7123 (source (origin
7124 (method url-fetch)
7125 ;; We cannot use bioconductor-uri here because this tarball is
7126 ;; located under "data/annotation/" instead of "bioc/".
7127 (uri (string-append "https://bioconductor.org/packages/"
7128 "release/data/experiment/src/contrib/"
7129 "genomationData_" version ".tar.gz"))
7130 (sha256
7131 (base32
59198f8f 7132 "16dqwb7wx1igx77zdbcskx5m1hs4g4gp2hl56zzm70hcagnlkz8y"))))
64efa307
RW
7133 (build-system r-build-system)
7134 ;; As this package provides little more than large data files, it doesn't
7135 ;; make sense to build substitutes.
7136 (arguments `(#:substitutable? #f))
7137 (native-inputs
7138 `(("r-knitr" ,r-knitr)))
7139 (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
7140 (synopsis "Experimental data for use with the genomation package")
7141 (description
7142 "This package contains experimental genetic data for use with the
7143genomation package. Included are Chip Seq, Methylation and Cage data,
7144downloaded from Encode.")
7145 (license license:gpl3+)))
7146
486da491
RW
7147(define-public r-org-hs-eg-db
7148 (package
7149 (name "r-org-hs-eg-db")
83f9a6fb 7150 (version "3.4.0")
486da491
RW
7151 (source (origin
7152 (method url-fetch)
7153 ;; We cannot use bioconductor-uri here because this tarball is
7154 ;; located under "data/annotation/" instead of "bioc/".
7155 (uri (string-append "http://www.bioconductor.org/packages/"
7156 "release/data/annotation/src/contrib/"
7157 "org.Hs.eg.db_" version ".tar.gz"))
7158 (sha256
7159 (base32
83f9a6fb 7160 "19mg64pw8zcvb9yxzzyf7caz1kvdrkfsj1hd84bzq7crrh8kc4y6"))))
486da491
RW
7161 (properties
7162 `((upstream-name . "org.Hs.eg.db")))
7163 (build-system r-build-system)
7164 (propagated-inputs
7165 `(("r-annotationdbi" ,r-annotationdbi)))
7166 (home-page "http://www.bioconductor.org/packages/org.Hs.eg.db/")
7167 (synopsis "Genome wide annotation for Human")
7168 (description
7169 "This package provides mappings from Entrez gene identifiers to various
7170annotations for the human genome.")
7171 (license license:artistic2.0)))
7172
fefedf98
RW
7173(define-public r-org-ce-eg-db
7174 (package
7175 (name "r-org-ce-eg-db")
f04a3eff 7176 (version "3.4.0")
fefedf98
RW
7177 (source (origin
7178 (method url-fetch)
7179 ;; We cannot use bioconductor-uri here because this tarball is
7180 ;; located under "data/annotation/" instead of "bioc/".
7181 (uri (string-append "http://www.bioconductor.org/packages/"
7182 "release/data/annotation/src/contrib/"
7183 "org.Ce.eg.db_" version ".tar.gz"))
7184 (sha256
7185 (base32
f04a3eff 7186 "12llfzrrc09kj2wzbisdspv38qzkzgpsbn8kv7qkwg746k3pq436"))))
fefedf98
RW
7187 (properties
7188 `((upstream-name . "org.Ce.eg.db")))
7189 (build-system r-build-system)
7190 (propagated-inputs
7191 `(("r-annotationdbi" ,r-annotationdbi)))
7192 (home-page "http://www.bioconductor.org/packages/org.Ce.eg.db/")
7193 (synopsis "Genome wide annotation for Worm")
7194 (description
7195 "This package provides mappings from Entrez gene identifiers to various
7196annotations for the genome of the model worm Caenorhabditis elegans.")
7197 (license license:artistic2.0)))
7198
16c53a1e
RW
7199(define-public r-org-dm-eg-db
7200 (package
7201 (name "r-org-dm-eg-db")
0cfac6f2 7202 (version "3.4.0")
16c53a1e
RW
7203 (source (origin
7204 (method url-fetch)
7205 ;; We cannot use bioconductor-uri here because this tarball is
7206 ;; located under "data/annotation/" instead of "bioc/".
7207 (uri (string-append "http://www.bioconductor.org/packages/"
7208 "release/data/annotation/src/contrib/"
7209 "org.Dm.eg.db_" version ".tar.gz"))
7210 (sha256
7211 (base32
0cfac6f2 7212 "1vzbphbrh1cf7xi5cksia9xy9a9l42js2z2qsajvjxvddiphrb7j"))))
16c53a1e
RW
7213 (properties
7214 `((upstream-name . "org.Dm.eg.db")))
7215 (build-system r-build-system)
7216 (propagated-inputs
7217 `(("r-annotationdbi" ,r-annotationdbi)))
7218 (home-page "http://www.bioconductor.org/packages/org.Dm.eg.db/")
7219 (synopsis "Genome wide annotation for Fly")
7220 (description
7221 "This package provides mappings from Entrez gene identifiers to various
7222annotations for the genome of the model fruit fly Drosophila melanogaster.")
7223 (license license:artistic2.0)))
7224
e761beb9
RW
7225(define-public r-org-mm-eg-db
7226 (package
7227 (name "r-org-mm-eg-db")
7c45670b 7228 (version "3.4.0")
e761beb9
RW
7229 (source (origin
7230 (method url-fetch)
7231 ;; We cannot use bioconductor-uri here because this tarball is
7232 ;; located under "data/annotation/" instead of "bioc/".
7233 (uri (string-append "http://www.bioconductor.org/packages/"
7234 "release/data/annotation/src/contrib/"
7235 "org.Mm.eg.db_" version ".tar.gz"))
7236 (sha256
7237 (base32
7c45670b 7238 "1lykjqjaf01fmgg3cvfcvwd5xjq6zc5vbxnm5r4l32fzvl89q50c"))))
e761beb9
RW
7239 (properties
7240 `((upstream-name . "org.Mm.eg.db")))
7241 (build-system r-build-system)
7242 (propagated-inputs
7243 `(("r-annotationdbi" ,r-annotationdbi)))
7244 (home-page "http://www.bioconductor.org/packages/org.Mm.eg.db/")
7245 (synopsis "Genome wide annotation for Mouse")
7246 (description
7247 "This package provides mappings from Entrez gene identifiers to various
7248annotations for the genome of the model mouse Mus musculus.")
7249 (license license:artistic2.0)))
7250
936e7d67
RW
7251(define-public r-seqlogo
7252 (package
7253 (name "r-seqlogo")
21d6c7a3 7254 (version "1.40.0")
936e7d67
RW
7255 (source
7256 (origin
7257 (method url-fetch)
7258 (uri (bioconductor-uri "seqLogo" version))
7259 (sha256
7260 (base32
21d6c7a3 7261 "18bajdl75h3039559d81rgllqqvnq8ygsfxfx081xphxs0v6xggy"))))
936e7d67
RW
7262 (properties `((upstream-name . "seqLogo")))
7263 (build-system r-build-system)
7264 (home-page "http://bioconductor.org/packages/seqLogo")
7265 (synopsis "Sequence logos for DNA sequence alignments")
7266 (description
7267 "seqLogo takes the position weight matrix of a DNA sequence motif and
7268plots the corresponding sequence logo as introduced by Schneider and
7269Stephens (1990).")
7270 (license license:lgpl2.0+)))
7271
c90a4baf
RW
7272(define-public r-bsgenome-hsapiens-ucsc-hg19
7273 (package
7274 (name "r-bsgenome-hsapiens-ucsc-hg19")
7275 (version "1.4.0")
7276 (source (origin
7277 (method url-fetch)
7278 ;; We cannot use bioconductor-uri here because this tarball is
7279 ;; located under "data/annotation/" instead of "bioc/".
7280 (uri (string-append "http://www.bioconductor.org/packages/"
7281 "release/data/annotation/src/contrib/"
7282 "BSgenome.Hsapiens.UCSC.hg19_"
7283 version ".tar.gz"))
7284 (sha256
7285 (base32
7286 "1y0nqpk8cw5a34sd9hmin3z4v7iqm6hf6l22cl81vlbxqbjibxc8"))))
7287 (properties
7288 `((upstream-name . "BSgenome.Hsapiens.UCSC.hg19")))
7289 (build-system r-build-system)
7290 ;; As this package provides little more than a very large data file it
7291 ;; doesn't make sense to build substitutes.
7292 (arguments `(#:substitutable? #f))
7293 (propagated-inputs
7294 `(("r-bsgenome" ,r-bsgenome)))
7295 (home-page
7296 "http://www.bioconductor.org/packages/BSgenome.Hsapiens.UCSC.hg19/")
7297 (synopsis "Full genome sequences for Homo sapiens")
7298 (description
7299 "This package provides full genome sequences for Homo sapiens as provided
7300by UCSC (hg19, February 2009) and stored in Biostrings objects.")
7301 (license license:artistic2.0)))
7302
a3e90287
RW
7303(define-public r-bsgenome-mmusculus-ucsc-mm9
7304 (package
7305 (name "r-bsgenome-mmusculus-ucsc-mm9")
7306 (version "1.4.0")
7307 (source (origin
7308 (method url-fetch)
7309 ;; We cannot use bioconductor-uri here because this tarball is
7310 ;; located under "data/annotation/" instead of "bioc/".
7311 (uri (string-append "http://www.bioconductor.org/packages/"
7312 "release/data/annotation/src/contrib/"
7313 "BSgenome.Mmusculus.UCSC.mm9_"
7314 version ".tar.gz"))
7315 (sha256
7316 (base32
7317 "1birqw30g2azimxpnjfzmkphan7x131yy8b9h85lfz5fjdg7841i"))))
7318 (properties
7319 `((upstream-name . "BSgenome.Mmusculus.UCSC.mm9")))
7320 (build-system r-build-system)
7321 ;; As this package provides little more than a very large data file it
7322 ;; doesn't make sense to build substitutes.
7323 (arguments `(#:substitutable? #f))
7324 (propagated-inputs
7325 `(("r-bsgenome" ,r-bsgenome)))
7326 (home-page
7327 "http://www.bioconductor.org/packages/BSgenome.Mmusculus.UCSC.mm9/")
7328 (synopsis "Full genome sequences for Mouse")
7329 (description
7330 "This package provides full genome sequences for Mus musculus (Mouse) as
7331provided by UCSC (mm9, July 2007) and stored in Biostrings objects.")
7332 (license license:artistic2.0)))
7333
4714d521
RW
7334(define-public r-bsgenome-mmusculus-ucsc-mm10
7335 (package
7336 (name "r-bsgenome-mmusculus-ucsc-mm10")
7337 (version "1.4.0")
7338 (source (origin
7339 (method url-fetch)
7340 ;; We cannot use bioconductor-uri here because this tarball is
7341 ;; located under "data/annotation/" instead of "bioc/".
7342 (uri (string-append "http://www.bioconductor.org/packages/"
7343 "release/data/annotation/src/contrib/"
7344 "BSgenome.Mmusculus.UCSC.mm10_"
7345 version ".tar.gz"))
7346 (sha256
7347 (base32
7348 "12s0nm2na9brjad4rn9l7d3db2aj8qa1xvz0y1k7gk08wayb6bkf"))))
7349 (properties
7350 `((upstream-name . "BSgenome.Mmusculus.UCSC.mm10")))
7351 (build-system r-build-system)
7352 ;; As this package provides little more than a very large data file it
7353 ;; doesn't make sense to build substitutes.
7354 (arguments `(#:substitutable? #f))
7355 (propagated-inputs
7356 `(("r-bsgenome" ,r-bsgenome)))
7357 (home-page
7358 "http://www.bioconductor.org/packages/BSgenome.Mmusculus.UCSC.mm10/")
7359 (synopsis "Full genome sequences for Mouse")
7360 (description
7361 "This package provides full genome sequences for Mus
7362musculus (Mouse) as provided by UCSC (mm10, December 2011) and stored
7363in Biostrings objects.")
7364 (license license:artistic2.0)))
7365
c5173d74
RJ
7366(define-public r-txdb-mmusculus-ucsc-mm10-knowngene
7367 (package
7368 (name "r-txdb-mmusculus-ucsc-mm10-knowngene")
7369 (version "3.4.0")
7370 (source (origin
7371 (method url-fetch)
7372 ;; We cannot use bioconductor-uri here because this tarball is
7373 ;; located under "data/annotation/" instead of "bioc/".
7374 (uri (string-append "http://www.bioconductor.org/packages/"
7375 "release/data/annotation/src/contrib/"
7376 "TxDb.Mmusculus.UCSC.mm10.knownGene_"
7377 version ".tar.gz"))
7378 (sha256
7379 (base32
7380 "08gava9wsvpcqz51k2sni3pj03n5155v32d9riqbf305nbirqbkb"))))
7381 (properties
7382 `((upstream-name . "TxDb.Mmusculus.UCSC.mm10.knownGene")))
7383 (build-system r-build-system)
7384 ;; As this package provides little more than a very large data file it
7385 ;; doesn't make sense to build substitutes.
7386 (arguments `(#:substitutable? #f))
7387 (propagated-inputs
7388 `(("r-bsgenome" ,r-bsgenome)
7389 ("r-genomicfeatures" ,r-genomicfeatures)
7390 ("r-annotationdbi" ,r-annotationdbi)))
7391 (home-page
7392 "http://bioconductor.org/packages/TxDb.Mmusculus.UCSC.mm10.knownGene/")
7393 (synopsis "Annotation package for TxDb knownGene object(s) for Mouse")
7394 (description
7395 "This package loads a TxDb object, which is an R interface to
7396prefabricated databases contained in this package. This package provides
7397the TxDb object of Mouse data as provided by UCSC (mm10, December 2011)
7398based on the knownGene track.")
7399 (license license:artistic2.0)))
7400
943bd627
RW
7401(define-public r-bsgenome-celegans-ucsc-ce6
7402 (package
7403 (name "r-bsgenome-celegans-ucsc-ce6")
7404 (version "1.4.0")
7405 (source (origin
7406 (method url-fetch)
7407 ;; We cannot use bioconductor-uri here because this tarball is
7408 ;; located under "data/annotation/" instead of "bioc/".
7409 (uri (string-append "http://www.bioconductor.org/packages/"
7410 "release/data/annotation/src/contrib/"
7411 "BSgenome.Celegans.UCSC.ce6_"
7412 version ".tar.gz"))
7413 (sha256
7414 (base32
7415 "0mqzb353xv2c3m3vkb315dkmnxkgczp7ndnknyhpgjlybyf715v9"))))
7416 (properties
7417 `((upstream-name . "BSgenome.Celegans.UCSC.ce6")))
7418 (build-system r-build-system)
7419 ;; As this package provides little more than a very large data file it
7420 ;; doesn't make sense to build substitutes.
7421 (arguments `(#:substitutable? #f))
7422 (propagated-inputs
7423 `(("r-bsgenome" ,r-bsgenome)))
7424 (home-page
7425 "http://www.bioconductor.org/packages/BSgenome.Celegans.UCSC.ce6/")
7426 (synopsis "Full genome sequences for Worm")
7427 (description
7428 "This package provides full genome sequences for Caenorhabditis
7429elegans (Worm) as provided by UCSC (ce6, May 2008) and stored in Biostrings
7430objects.")
7431 (license license:artistic2.0)))
7432
fc47c7d6
RW
7433(define-public r-bsgenome-celegans-ucsc-ce10
7434 (package
7435 (name "r-bsgenome-celegans-ucsc-ce10")
7436 (version "1.4.0")
7437 (source (origin
7438 (method url-fetch)
7439 ;; We cannot use bioconductor-uri here because this tarball is
7440 ;; located under "data/annotation/" instead of "bioc/".
7441 (uri (string-append "http://www.bioconductor.org/packages/"
7442 "release/data/annotation/src/contrib/"
7443 "BSgenome.Celegans.UCSC.ce10_"
7444 version ".tar.gz"))
7445 (sha256
7446 (base32
7447 "1zaym97jk4npxk14ifvwz2rvhm4zx9xgs33r9vvx9rlynp0gydrk"))))
7448 (properties
7449 `((upstream-name . "BSgenome.Celegans.UCSC.ce10")))
7450 (build-system r-build-system)
7451 ;; As this package provides little more than a very large data file it
7452 ;; doesn't make sense to build substitutes.
7453 (arguments `(#:substitutable? #f))
7454 (propagated-inputs
7455 `(("r-bsgenome" ,r-bsgenome)))
7456 (home-page
7457 "http://www.bioconductor.org/packages/BSgenome.Celegans.UCSC.ce10/")
7458 (synopsis "Full genome sequences for Worm")
7459 (description
7460 "This package provides full genome sequences for Caenorhabditis
7461elegans (Worm) as provided by UCSC (ce10, Oct 2010) and stored in Biostrings
7462objects.")
7463 (license license:artistic2.0)))
7464
6dc60998
RW
7465(define-public r-bsgenome-dmelanogaster-ucsc-dm3
7466 (package
7467 (name "r-bsgenome-dmelanogaster-ucsc-dm3")
7468 (version "1.4.0")
7469 (source (origin
7470 (method url-fetch)
7471 ;; We cannot use bioconductor-uri here because this tarball is
7472 ;; located under "data/annotation/" instead of "bioc/".
7473 (uri (string-append "http://www.bioconductor.org/packages/"
7474 "release/data/annotation/src/contrib/"
7475 "BSgenome.Dmelanogaster.UCSC.dm3_"
7476 version ".tar.gz"))
7477 (sha256
7478 (base32
7479 "19bm3lkhhkag3gnwp419211fh0cnr0x6fa0r1lr0ycwrikxdxsv8"))))
7480 (properties
7481 `((upstream-name . "BSgenome.Dmelanogaster.UCSC.dm3")))
7482 (build-system r-build-system)
7483 ;; As this package provides little more than a very large data file it
7484 ;; doesn't make sense to build substitutes.
7485 (arguments `(#:substitutable? #f))
7486 (propagated-inputs
7487 `(("r-bsgenome" ,r-bsgenome)))
7488 (home-page
7489 "http://www.bioconductor.org/packages/BSgenome.Dmelanogaster.UCSC.dm3/")
7490 (synopsis "Full genome sequences for Fly")
7491 (description
7492 "This package provides full genome sequences for Drosophila
7493melanogaster (Fly) as provided by UCSC (dm3, April 2006) and stored in
7494Biostrings objects.")
7495 (license license:artistic2.0)))
7496
ae2462f7
RW
7497(define-public r-motifrg
7498 (package
7499 (name "r-motifrg")
809251e1 7500 (version "1.18.0")
ae2462f7
RW
7501 (source
7502 (origin
7503 (method url-fetch)
7504 (uri (bioconductor-uri "motifRG" version))
7505 (sha256
7506 (base32
809251e1 7507 "1pa97aj6c5f3gx4bgriw110764dj3m9h104ddi8rv2bpy41yd98d"))))
ae2462f7
RW
7508 (properties `((upstream-name . "motifRG")))
7509 (build-system r-build-system)
7510 (propagated-inputs
7511 `(("r-biostrings" ,r-biostrings)
7512 ("r-bsgenome" ,r-bsgenome)
7513 ("r-bsgenome.hsapiens.ucsc.hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
7514 ("r-iranges" ,r-iranges)
7515 ("r-seqlogo" ,r-seqlogo)
7516 ("r-xvector" ,r-xvector)))
7517 (home-page "http://bioconductor.org/packages/motifRG")
7518 (synopsis "Discover motifs in high throughput sequencing data")
7519 (description
7520 "This package provides tools for discriminative motif discovery in high
7521throughput genetic sequencing data sets using regression methods.")
7522 (license license:artistic2.0)))
7523
a5002ae7
AE
7524(define-public r-qtl
7525 (package
7526 (name "r-qtl")
421f7772 7527 (version "1.40-8")
a5002ae7
AE
7528 (source
7529 (origin
7530 (method url-fetch)
7531 (uri (string-append "mirror://cran/src/contrib/qtl_"
7532 version ".tar.gz"))
7533 (sha256
7534 (base32
421f7772 7535 "05bj1x2ry0i7yqiydlswb3d2h4pxg70z8w1072az1mrv1m54k8sp"))))
a5002ae7
AE
7536 (build-system r-build-system)
7537 (home-page "http://rqtl.org/")
7538 (synopsis "R package for analyzing QTL experiments in genetics")
7539 (description "R/qtl is an extension library for the R statistics
7540system. It is used to analyze experimental crosses for identifying
7541genes contributing to variation in quantitative traits (so-called
7542quantitative trait loci, QTLs).
7543
7544Using a hidden Markov model, R/qtl allows to estimate genetic maps, to
7545identify genotyping errors, and to perform single-QTL and two-QTL,
7546two-dimensional genome scans.")
7547 (license license:gpl3)))
d1e32822 7548
9e3ba31c
RJ
7549(define-public r-zlibbioc
7550 (package
7551 (name "r-zlibbioc")
da22da86 7552 (version "1.20.0")
9e3ba31c
RJ
7553 (source (origin
7554 (method url-fetch)
7555 (uri (bioconductor-uri "zlibbioc" version))
7556 (sha256
7557 (base32
da22da86 7558 "0hbk90q5hl0fycfvy5nxxa4hxgglag9lzp7i0fg849bqygg5nbyq"))))
9e3ba31c
RJ
7559 (properties
7560 `((upstream-name . "zlibbioc")))
7561 (build-system r-build-system)
7562 (home-page "https://bioconductor.org/packages/zlibbioc")
7563 (synopsis "Provider for zlib-1.2.5 to R packages")
7564 (description "This package uses the source code of zlib-1.2.5 to create
7565libraries for systems that do not have these available via other means.")
7566 (license license:artistic2.0)))
7567
e619a5c2
RW
7568(define-public r-r4rna
7569 (package
7570 (name "r-r4rna")
7571 (version "0.1.4")
7572 (source
7573 (origin
7574 (method url-fetch)
7575 (uri (string-append "http://www.e-rna.org/r-chie/files/R4RNA_"
7576 version ".tar.gz"))
7577 (sha256
7578 (base32
7579 "1p0i78wh76jfgmn9jphbwwaz6yy6pipzfg08xs54cxavxg2j81p5"))))
7580 (build-system r-build-system)
7581 (propagated-inputs
7582 `(("r-optparse" ,r-optparse)
7583 ("r-rcolorbrewer" ,r-rcolorbrewer)))
7584 (home-page "http://www.e-rna.org/r-chie/index.cgi")
7585 (synopsis "Analysis framework for RNA secondary structure")
7586 (description
7587 "The R4RNA package aims to be a general framework for the analysis of RNA
7588secondary structure and comparative analysis in R.")
7589 (license license:gpl3+)))
7590
52765a63
RW
7591(define-public r-rhtslib
7592 (package
7593 (name "r-rhtslib")
53ca52f0 7594 (version "1.6.0")
52765a63
RW
7595 (source
7596 (origin
7597 (method url-fetch)
7598 (uri (bioconductor-uri "Rhtslib" version))
7599 (sha256
7600 (base32
53ca52f0 7601 "1vk3ng61dhi3pbia1lp3gl3mlr3i1vb2lkq83qb53i9dzz128wh9"))))
52765a63
RW
7602 (properties `((upstream-name . "Rhtslib")))
7603 (build-system r-build-system)
7604 (propagated-inputs
7605 `(("r-zlibbioc" ,r-zlibbioc)))
7606 (inputs
7607 `(("zlib" ,zlib)))
53ca52f0
RW
7608 (native-inputs
7609 `(("autoconf" ,autoconf)))
52765a63
RW
7610 (home-page "https://github.com/nhayden/Rhtslib")
7611 (synopsis "High-throughput sequencing library as an R package")
7612 (description
7613 "This package provides the HTSlib C library for high-throughput
7614nucleotide sequence analysis. The package is primarily useful to developers
7615of other R packages who wish to make use of HTSlib.")
7616 (license license:lgpl2.0+)))
7617
fe02c4c9
RW
7618(define-public r-bamsignals
7619 (package
7620 (name "r-bamsignals")
e357bec8 7621 (version "1.6.0")
fe02c4c9
RW
7622 (source
7623 (origin
7624 (method url-fetch)
7625 (uri (bioconductor-uri "bamsignals" version))
7626 (sha256
7627 (base32
e357bec8 7628 "1k42gvk5mgq4la1fp0in3an2zfdz69h6522jsqhmk0f6i75kg4mb"))))
fe02c4c9
RW
7629 (build-system r-build-system)
7630 (propagated-inputs
7631 `(("r-biocgenerics" ,r-biocgenerics)
7632 ("r-genomicranges" ,r-genomicranges)
7633 ("r-iranges" ,r-iranges)
7634 ("r-rcpp" ,r-rcpp)
7635 ("r-rhtslib" ,r-rhtslib)
7636 ("r-zlibbioc" ,r-zlibbioc)))
7637 (inputs
7638 `(("zlib" ,zlib)))
7639 (home-page "http://bioconductor.org/packages/bamsignals")
7640 (synopsis "Extract read count signals from bam files")
7641 (description
7642 "This package allows to efficiently obtain count vectors from indexed bam
7643files. It counts the number of nucleotide sequence reads in given genomic
7644ranges and it computes reads profiles and coverage profiles. It also handles
7645paired-end data.")
7646 (license license:gpl2+)))
7647
89984be4
RW
7648(define-public r-rcas
7649 (package
7650 (name "r-rcas")
2c8d6c0b 7651 (version "1.1.1")
89984be4
RW
7652 (source (origin
7653 (method url-fetch)
7654 (uri (string-append "https://github.com/BIMSBbioinfo/RCAS/archive/v"
7655 version ".tar.gz"))
7656 (file-name (string-append name "-" version ".tar.gz"))
7657 (sha256
7658 (base32
2c8d6c0b 7659 "1hd0r66556bxbdd82ksjklq7nfli36l4k6y88ic7kkg9873wa1nw"))))
89984be4
RW
7660 (build-system r-build-system)
7661 (native-inputs
7662 `(("r-knitr" ,r-knitr)
7663 ("r-testthat" ,r-testthat)
7664 ;; During vignette building knitr checks that "pandoc-citeproc"
7665 ;; is in the PATH.
7666 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)))
7667 (propagated-inputs
7668 `(("r-data-table" ,r-data-table)
7669 ("r-biomart" ,r-biomart)
7670 ("r-org-hs-eg-db" ,r-org-hs-eg-db)
7671 ("r-org-ce-eg-db" ,r-org-ce-eg-db)
7672 ("r-org-dm-eg-db" ,r-org-dm-eg-db)
7673 ("r-org-mm-eg-db" ,r-org-mm-eg-db)
7674 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
7675 ("r-bsgenome-mmusculus-ucsc-mm9" ,r-bsgenome-mmusculus-ucsc-mm9)
7676 ("r-bsgenome-celegans-ucsc-ce10" ,r-bsgenome-celegans-ucsc-ce10)
7677 ("r-bsgenome-dmelanogaster-ucsc-dm3" ,r-bsgenome-dmelanogaster-ucsc-dm3)
7678 ("r-topgo" ,r-topgo)
7679 ("r-dt" ,r-dt)
7680 ("r-plotly" ,r-plotly)
2c8d6c0b 7681 ("r-plotrix" ,r-plotrix)
89984be4
RW
7682 ("r-motifrg" ,r-motifrg)
7683 ("r-genomation" ,r-genomation)
7684 ("r-genomicfeatures" ,r-genomicfeatures)
7685 ("r-rtracklayer" ,r-rtracklayer)
7686 ("r-rmarkdown" ,r-rmarkdown)))
7687 (synopsis "RNA-centric annotation system")
7688 (description
7689 "RCAS aims to be a standalone RNA-centric annotation system that provides
7690intuitive reports and publication-ready graphics. This package provides the R
7691library implementing most of the pipeline's features.")
7692 (home-page "https://github.com/BIMSBbioinfo/RCAS")
7693 (license license:expat)))
7694
50937297
RW
7695(define-public rcas-web
7696 (package
7697 (name "rcas-web")
7698 (version "0.0.3")
7699 (source
7700 (origin
7701 (method url-fetch)
7702 (uri (string-append "https://github.com/BIMSBbioinfo/rcas-web/"
7703 "releases/download/v" version
7704 "/rcas-web-" version ".tar.gz"))
7705 (sha256
7706 (base32
7707 "0d3my0g8i7js59n184zzzjdki7hgmhpi4rhfvk7i6jsw01ba04qq"))))
7708 (build-system gnu-build-system)
7709 (arguments
7710 `(#:phases
7711 (modify-phases %standard-phases
7712 (add-after 'install 'wrap-executable
7713 (lambda* (#:key inputs outputs #:allow-other-keys)
7714 (let* ((out (assoc-ref outputs "out"))
7715 (json (assoc-ref inputs "guile-json"))
7716 (redis (assoc-ref inputs "guile-redis"))
7717 (path (string-append
7718 json "/share/guile/site/2.2:"
7719 redis "/share/guile/site/2.2")))
7720 (wrap-program (string-append out "/bin/rcas-web")
7721 `("GUILE_LOAD_PATH" ":" = (,path))
7722 `("GUILE_LOAD_COMPILED_PATH" ":" = (,path))
7723 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
7724 #t)))))
7725 (inputs
2d7c4ae3 7726 `(("r-minimal" ,r-minimal)
50937297 7727 ("r-rcas" ,r-rcas)
f6396d86 7728 ("guile-next" ,guile-2.2)
50937297
RW
7729 ("guile-json" ,guile2.2-json)
7730 ("guile-redis" ,guile2.2-redis)))
7731 (native-inputs
7732 `(("pkg-config" ,pkg-config)))
7733 (home-page "https://github.com/BIMSBbioinfo/rcas-web")
7734 (synopsis "Web interface for RNA-centric annotation system (RCAS)")
7735 (description "This package provides a simple web interface for the
7736@dfn{RNA-centric annotation system} (RCAS).")
7737 (license license:agpl3+)))
7738
7500e42b
RJ
7739(define-public r-mutationalpatterns
7740 (package
7741 (name "r-mutationalpatterns")
7742 (version "1.0.0")
7743 (source
7744 (origin
7745 (method url-fetch)
7746 (uri (bioconductor-uri "MutationalPatterns" version))
7747 (sha256
7748 (base32
7749 "1a3c2bm0xx0q4gf98jiw74msmdf2fr8rbsdysd5ww9kqlzmsbr17"))))
7750 (build-system r-build-system)
7751 (propagated-inputs
7752 `(("r-biocgenerics" ,r-biocgenerics)
7753 ("r-biostrings" ,r-biostrings)
7754 ("r-genomicranges" ,r-genomicranges)
7755 ("r-genomeinfodb" ,r-genomeinfodb)
7756 ("r-ggplot2" ,r-ggplot2)
7757 ("r-gridextra" ,r-gridextra)
7758 ("r-iranges" ,r-iranges)
7759 ("r-nmf" ,r-nmf)
7760 ("r-plyr" ,r-plyr)
7761 ("r-pracma" ,r-pracma)
7762 ("r-reshape2" ,r-reshape2)
7763 ("r-summarizedexperiment" ,r-summarizedexperiment)
7764 ("r-variantannotation" ,r-variantannotation)))
7765 (home-page "http://bioconductor.org/packages/MutationalPatterns/")
7766 (synopsis "Extract and visualize mutational patterns in genomic data")
7767 (description "This package provides an extensive toolset for the
7768characterization and visualization of a wide range of mutational patterns
7769in SNV base substitution data.")
7770 (license license:expat)))
7771
d7160529
RW
7772(define-public r-wgcna
7773 (package
7774 (name "r-wgcna")
7775 (version "1.51")
7776 (source
7777 (origin
7778 (method url-fetch)
7779 (uri (cran-uri "WGCNA" version))
7780 (sha256
7781 (base32
7782 "0hzvnhw76vwg8bl8x368f0c5szpwb8323bmrb3bir93i5bmfjsxx"))))
7783 (properties `((upstream-name . "WGCNA")))
7784 (build-system r-build-system)
7785 (propagated-inputs
7786 `(("r-annotationdbi" ,r-annotationdbi)
7787 ("r-doparallel" ,r-doparallel)
7788 ("r-dynamictreecut" ,r-dynamictreecut)
7789 ("r-fastcluster" ,r-fastcluster)
7790 ("r-foreach" ,r-foreach)
7791 ("r-go-db" ,r-go-db)
7792 ("r-hmisc" ,r-hmisc)
7793 ("r-impute" ,r-impute)
7794 ("r-matrixstats" ,r-matrixstats)
7795 ("r-preprocesscore" ,r-preprocesscore)))
7796 (home-page
7797 "http://www.genetics.ucla.edu/labs/horvath/CoexpressionNetwork/Rpackages/WGCNA/")
7798 (synopsis "Weighted correlation network analysis")
7799 (description
7800 "This package provides functions necessary to perform Weighted
7801Correlation Network Analysis on high-dimensional data. It includes functions
7802for rudimentary data cleaning, construction and summarization of correlation
7803networks, module identification and functions for relating both variables and
7804modules to sample traits. It also includes a number of utility functions for
7805data manipulation and visualization.")
7806 (license license:gpl2+)))
7807
c827f202
RW
7808(define-public r-chipkernels
7809 (let ((commit "c9cfcacb626b1221094fb3490ea7bac0fd625372")
7810 (revision "1"))
7811 (package
7812 (name "r-chipkernels")
7813 (version (string-append "1.1-" revision "." (string-take commit 9)))
7814 (source
7815 (origin
7816 (method git-fetch)
7817 (uri (git-reference
7818 (url "https://github.com/ManuSetty/ChIPKernels.git")
7819 (commit commit)))
7820 (file-name (string-append name "-" version))
7821 (sha256
7822 (base32
7823 "14bj5qhjm1hsm9ay561nfbqi9wxsa7y487df2idsaaf6z10nw4v0"))))
7824 (build-system r-build-system)
7825 (propagated-inputs
7826 `(("r-iranges" ,r-iranges)
7827 ("r-xvector" ,r-xvector)
7828 ("r-biostrings" ,r-biostrings)
7829 ("r-bsgenome" ,r-bsgenome)
7830 ("r-gtools" ,r-gtools)
7831 ("r-genomicranges" ,r-genomicranges)
7832 ("r-sfsmisc" ,r-sfsmisc)
7833 ("r-kernlab" ,r-kernlab)
7834 ("r-s4vectors" ,r-s4vectors)
7835 ("r-biocgenerics" ,r-biocgenerics)))
7836 (home-page "https://github.com/ManuSetty/ChIPKernels")
7837 (synopsis "Build string kernels for DNA Sequence analysis")
7838 (description "ChIPKernels is an R package for building different string
7839kernels used for DNA Sequence analysis. A dictionary of the desired kernel
7840must be built and this dictionary can be used for determining kernels for DNA
7841Sequences.")
7842 (license license:gpl2+))))
7843
2d9fb170
RW
7844(define-public r-seqgl
7845 (package
7846 (name "r-seqgl")
7847 (version "1.1.4")
7848 (source
7849 (origin
7850 (method url-fetch)
7851 (uri (string-append "https://github.com/ManuSetty/SeqGL/"
7852 "archive/" version ".tar.gz"))
7853 (file-name (string-append name "-" version ".tar.gz"))
7854 (sha256
7855 (base32
7856 "0pnk1p3sci5yipyc8xnb6jbmydpl80fld927xgnbcv104hy8h8yh"))))
7857 (build-system r-build-system)
7858 (propagated-inputs
7859 `(("r-biostrings" ,r-biostrings)
7860 ("r-chipkernels" ,r-chipkernels)
7861 ("r-genomicranges" ,r-genomicranges)
7862 ("r-spams" ,r-spams)
7863 ("r-wgcna" ,r-wgcna)
7864 ("r-fastcluster" ,r-fastcluster)))
7865 (home-page "https://github.com/ManuSetty/SeqGL")
7866 (synopsis "Group lasso for Dnase/ChIP-seq data")
7867 (description "SeqGL is a group lasso based algorithm to extract
7868transcription factor sequence signals from ChIP, DNase and ATAC-seq profiles.
7869This package presents a method which uses group lasso to discriminate between
7870bound and non bound genomic regions to accurately identify transcription
7871factors bound at the specific regions.")
7872 (license license:gpl2+)))
7873
bd3be46e
RW
7874(define-public r-gkmsvm
7875 (package
7876 (name "r-gkmsvm")
7877 (version "0.71.0")
7878 (source
7879 (origin
7880 (method url-fetch)
7881 (uri (cran-uri "gkmSVM" version))
7882 (sha256
7883 (base32
7884 "1zpxgxmf2nd5j5wn00ps6kfxr8wxh7d1swr1rr4spq7sj5z5z0k0"))))
7885 (properties `((upstream-name . "gkmSVM")))
7886 (build-system r-build-system)
7887 (propagated-inputs
7888 `(("r-biocgenerics" ,r-biocgenerics)
7889 ("r-biostrings" ,r-biostrings)
7890 ("r-genomeinfodb" ,r-genomeinfodb)
7891 ("r-genomicranges" ,r-genomicranges)
7892 ("r-iranges" ,r-iranges)
7893 ("r-kernlab" ,r-kernlab)
7894 ("r-rcpp" ,r-rcpp)
7895 ("r-rocr" ,r-rocr)
7896 ("r-rtracklayer" ,r-rtracklayer)
7897 ("r-s4vectors" ,r-s4vectors)
7898 ("r-seqinr" ,r-seqinr)))
7899 (home-page "http://cran.r-project.org/web/packages/gkmSVM")
7900 (synopsis "Gapped-kmer support vector machine")
7901 (description
7902 "This R package provides tools for training gapped-kmer SVM classifiers
7903for DNA and protein sequences. This package supports several sequence
7904kernels, including: gkmSVM, kmer-SVM, mismatch kernel and wildcard kernel.")
7905 (license license:gpl2+)))
7906
d4af25b5
RJPB
7907(define-public r-tximport
7908 (package
7909 (name "r-tximport")
7910 (version "1.2.0")
7911 (source (origin
7912 (method url-fetch)
7913 (uri (bioconductor-uri "tximport" version))
7914 (sha256
7915 (base32
7916 "1k5a7dad6zqg936s17f6cmwgqp11x24z9zhxndsgwbscgpyhpcb0"))))
7917 (build-system r-build-system)
7918 (home-page "http://bioconductor.org/packages/tximport")
7919 (synopsis "Import and summarize transcript-level estimates for gene-level analysis")
7920 (description
7921 "This package provides tools to import transcript-level abundance,
7922estimated counts and transcript lengths, and to summarize them into matrices
7923for use with downstream gene-level analysis packages. Average transcript
7924length, weighted by sample-specific transcript abundance estimates, is
7925provided as a matrix which can be used as an offset for different expression
7926of gene-level counts.")
7927 (license license:gpl2+)))
7928
69f2b3bd
RJPB
7929(define-public r-rhdf5
7930 (package
7931 (name "r-rhdf5")
7932 (version "2.18.0")
7933 (source (origin
7934 (method url-fetch)
7935 (uri (bioconductor-uri "rhdf5" version))
7936 (sha256
7937 (base32
7938 "0pb04li55ysag30s7rap7nnivc0rqmgsmpj43kin0rxdabfn1w0k"))))
7939 (build-system r-build-system)
7940 (arguments
7941 `(#:phases
7942 (modify-phases %standard-phases
7943 (add-after 'unpack 'unpack-smallhdf5
7944 (lambda* (#:key outputs #:allow-other-keys)
7945 (system* "tar" "-xzvf"
7946 "src/hdf5source/hdf5small.tgz" "-C" "src/" )
7947 (substitute* "src/Makevars"
7948 (("^.*cd hdf5source &&.*$") "")
7949 (("^.*gunzip -dc hdf5small.tgz.*$") "")
7950 (("^.*rm -rf hdf5.*$") "")
7951 (("^.*mv hdf5source/hdf5 ..*$") ""))
7952 (substitute* "src/hdf5/configure"
7953 (("/bin/mv") "mv"))
7954 #t)))))
7955 (propagated-inputs
7956 `(("r-zlibbioc" ,r-zlibbioc)))
7957 (inputs
7958 `(("perl" ,perl)
7959 ("zlib" ,zlib)))
7960 (home-page "http://bioconductor.org/packages/rhdf5")
7961 (synopsis "HDF5 interface to R")
7962 (description
7963 "This R/Bioconductor package provides an interface between HDF5 and R.
7964HDF5's main features are the ability to store and access very large and/or
7965complex datasets and a wide variety of metadata on mass storage (disk) through
7966a completely portable file format. The rhdf5 package is thus suited for the
7967exchange of large and/or complex datasets between R and other software
7968package, and for letting R applications work on datasets that are larger than
7969the available RAM.")
7970 (license license:artistic2.0)))
7971
66e40e00
RW
7972(define-public emboss
7973 (package
7974 (name "emboss")
7975 (version "6.5.7")
7976 (source (origin
7977 (method url-fetch)
7978 (uri (string-append "ftp://emboss.open-bio.org/pub/EMBOSS/old/"
7979 (version-major+minor version) ".0/"
7980 "EMBOSS-" version ".tar.gz"))
7981 (sha256
7982 (base32
7983 "0vsmz96gc411yj2iyzdrsmg4l2n1nhgmp7vrgzlxx3xixv9xbf0q"))))
7984 (build-system gnu-build-system)
7985 (arguments
7986 `(#:configure-flags
7987 (list (string-append "--with-hpdf="
7988 (assoc-ref %build-inputs "libharu")))
7989 #:phases
7990 (modify-phases %standard-phases
7991 (add-after 'unpack 'fix-checks
7992 (lambda _
7993 ;; The PNGDRIVER tests check for the presence of libgd, libpng
7994 ;; and zlib, but assume that they are all found at the same
7995 ;; prefix.
7996 (substitute* "configure.in"
7997 (("CHECK_PNGDRIVER")
7998 "LIBS=\"$LIBS -lgd -lpng -lz -lm\"
7999AC_DEFINE([PLD_png], [1], [Define to 1 if PNG support is available])
8000AM_CONDITIONAL(AMPNG, true)"))
8001 #t))
8002 (add-after 'unpack 'disable-update-check
8003 (lambda _
8004 ;; At build time there is no connection to the Internet, so
8005 ;; looking for updates will not work.
8006 (substitute* "Makefile.am"
8007 (("\\$\\(bindir\\)/embossupdate") ""))
8008 #t))
8009 (add-before 'configure 'autogen
8010 (lambda _ (zero? (system* "autoreconf" "-vif")))))))
8011 (inputs
8012 `(("perl" ,perl)
8013 ("libpng" ,libpng)
8014 ("gd" ,gd)
8015 ("libx11" ,libx11)
8016 ("libharu" ,libharu)
8017 ("zlib" ,zlib)))
8018 (native-inputs
8019 `(("autoconf" ,autoconf)
8020 ("automake" ,automake)
8021 ("libtool" ,libtool)
8022 ("pkg-config" ,pkg-config)))
8023 (home-page "http://emboss.sourceforge.net")
8024 (synopsis "Molecular biology analysis suite")
8025 (description "EMBOSS is the \"European Molecular Biology Open Software
8026Suite\". EMBOSS is an analysis package specially developed for the needs of
8027the molecular biology (e.g. EMBnet) user community. The software
8028automatically copes with data in a variety of formats and even allows
8029transparent retrieval of sequence data from the web. It also provides a
8030number of libraries for the development of software in the field of molecular
8031biology. EMBOSS also integrates a range of currently available packages and
8032tools for sequence analysis into a seamless whole.")
8033 (license license:gpl2+)))
8034
1f1b20b8
RW
8035(define-public bits
8036 (let ((revision "1")
8037 (commit "3cc4567896d9d6442923da944beb704750a08d2d"))
8038 (package
8039 (name "bits")
8040 ;; The version is 2.13.0 even though no release archives have been
8041 ;; published as yet.
8042 (version (string-append "2.13.0-" revision "." (string-take commit 9)))
8043 (source (origin
8044 (method git-fetch)
8045 (uri (git-reference
8046 (url "https://github.com/arq5x/bits.git")
8047 (commit commit)))
8048 (file-name (string-append name "-" version "-checkout"))
8049 (sha256
8050 (base32
8051 "17n2kffk4kmhivd8c98g2vr6y1s23vbg4sxlxs689wni66797hbs"))))
8052 (build-system gnu-build-system)
8053 (arguments
8054 `(#:tests? #f ;no tests included
8055 #:phases
8056 (modify-phases %standard-phases
8057 (delete 'configure)
8058 (add-after 'unpack 'remove-cuda
8059 (lambda _
8060 (substitute* "Makefile"
8061 ((".*_cuda") "")
8062 (("(bits_test_intersections) \\\\" _ match) match))
8063 #t))
8064 (replace 'install
8065 (lambda* (#:key outputs #:allow-other-keys)
8066 (copy-recursively
8067 "bin" (string-append (assoc-ref outputs "out") "/bin"))
8068 #t)))))
8069 (inputs
8070 `(("gsl" ,gsl)
8071 ("zlib" ,zlib)))
8072 (home-page "https://github.com/arq5x/bits")
8073 (synopsis "Implementation of binary interval search algorithm")
8074 (description "This package provides an implementation of the
8075BITS (Binary Interval Search) algorithm, an approach to interval set
8076intersection. It is especially suited for the comparison of diverse genomic
8077datasets and the exploration of large datasets of genome
8078intervals (e.g. genes, sequence alignments).")
8079 (license license:gpl2))))
8080
e62ffce5 8081(define-public piranha
883302da
RW
8082 ;; There is no release tarball for the latest version. The latest commit is
8083 ;; older than one year at the time of this writing.
8084 (let ((revision "1")
8085 (commit "0466d364b71117d01e4471b74c514436cc281233"))
8086 (package
8087 (name "piranha")
8088 (version (string-append "1.2.1-" revision "." (string-take commit 9)))
8089 (source (origin
8090 (method git-fetch)
8091 (uri (git-reference
8092 (url "https://github.com/smithlabcode/piranha.git")
8093 (commit commit)))
8094 (sha256
8095 (base32
8096 "117dc0zf20c61jam69sk4abl57ah6yi6i7qra7d7y5zrbgk12q5n"))))
8097 (build-system gnu-build-system)
8098 (arguments
8099 `(#:test-target "test"
8100 #:phases
8101 (modify-phases %standard-phases
8102 (add-after 'unpack 'copy-smithlab-cpp
8103 (lambda* (#:key inputs #:allow-other-keys)
e62ffce5 8104 (for-each (lambda (file)
883302da
RW
8105 (install-file file "./src/smithlab_cpp/"))
8106 (find-files (assoc-ref inputs "smithlab-cpp")))
8107 #t))
8108 (add-after 'install 'install-to-store
8109 (lambda* (#:key outputs #:allow-other-keys)
8110 (let* ((out (assoc-ref outputs "out"))
8111 (bin (string-append out "/bin")))
883302da
RW
8112 (for-each (lambda (file)
8113 (install-file file bin))
8114 (find-files "bin" ".*")))
8115 #t)))
8116 #:configure-flags
8117 (list (string-append "--with-bam_tools_headers="
8118 (assoc-ref %build-inputs "bamtools") "/include/bamtools")
8119 (string-append "--with-bam_tools_library="
8120 (assoc-ref %build-inputs "bamtools") "/lib/bamtools"))))
8121 (inputs
8122 `(("bamtools" ,bamtools)
8123 ("samtools" ,samtools-0.1)
8124 ("gsl" ,gsl)
8125 ("smithlab-cpp"
8126 ,(let ((commit "3723e2db438c51501d0423429ff396c3035ba46a"))
8127 (origin
8128 (method git-fetch)
8129 (uri (git-reference
8130 (url "https://github.com/smithlabcode/smithlab_cpp.git")
8131 (commit commit)))
8132 (file-name (string-append "smithlab_cpp-" commit "-checkout"))
8133 (sha256
8134 (base32
8135 "0l4gvbwslw5ngziskja41c00x1r06l3yidv7y0xw9djibhykzy0g")))))))
8136 (native-inputs
8137 `(("python" ,python-2)))
8138 (home-page "https://github.com/smithlabcode/piranha")
8139 (synopsis "Peak-caller for CLIP-seq and RIP-seq data")
8140 (description
8141 "Piranha is a peak-caller for genomic data produced by CLIP-seq and
e62ffce5
RW
8142RIP-seq experiments. It takes input in BED or BAM format and identifies
8143regions of statistically significant read enrichment. Additional covariates
8144may optionally be provided to further inform the peak-calling process.")
883302da 8145 (license license:gpl3+))))
e62ffce5 8146
d1e32822
RW
8147(define-public pepr
8148 (package
8149 (name "pepr")
8150 (version "1.0.9")
8151 (source (origin
8152 (method url-fetch)
8153 (uri (string-append "https://pypi.python.org/packages/source/P"
8154 "/PePr/PePr-" version ".tar.gz"))
8155 (sha256
8156 (base32
8157 "0qxjfdpl1b1y53nccws2d85f6k74zwmx8y8sd9rszcqhfayx6gdx"))))
8158 (build-system python-build-system)
8159 (arguments
8160 `(#:python ,python-2 ; python2 only
b41a05ce 8161 #:tests? #f)) ; no tests included
d1e32822
RW
8162 (propagated-inputs
8163 `(("python2-numpy" ,python2-numpy)
8164 ("python2-scipy" ,python2-scipy)
8165 ("python2-pysam" ,python2-pysam)))
0c6c9c00 8166 (home-page "https://github.com/shawnzhangyx/PePr")
d1e32822
RW
8167 (synopsis "Peak-calling and prioritization pipeline for ChIP-Seq data")
8168 (description
8169 "PePr is a ChIP-Seq peak calling or differential binding analysis tool
8170that is primarily designed for data with biological replicates. It uses a
8171negative binomial distribution to model the read counts among the samples in
8172the same group, and look for consistent differences between ChIP and control
8173group or two ChIP groups run under different conditions.")
8174 (license license:gpl3+)))
6b49a37e
RJ
8175
8176(define-public filevercmp
8177 (let ((commit "1a9b779b93d0b244040274794d402106907b71b7"))
8178 (package
8179 (name "filevercmp")
8180 (version (string-append "0-1." (string-take commit 7)))
8181 (source (origin
8182 (method url-fetch)
8183 (uri (string-append "https://github.com/ekg/filevercmp/archive/"
8184 commit ".tar.gz"))
8185 (file-name (string-append name "-" version ".tar.gz"))
8186 (sha256
8187 (base32 "0yp5jswf5j2pqc6517x277s4s6h1ss99v57kxw9gy0jkfl3yh450"))))
8188 (build-system gnu-build-system)
8189 (arguments
8190 `(#:tests? #f ; There are no tests to run.
8191 #:phases
8192 (modify-phases %standard-phases
8193 (delete 'configure) ; There is no configure phase.
8194 (replace 'install
8195 (lambda* (#:key outputs #:allow-other-keys)
8196 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
8197 (install-file "filevercmp" bin)))))))
8198 (home-page "https://github.com/ekg/filevercmp")
8199 (synopsis "This program compares version strings")
8200 (description "This program compares version strings. It intends to be a
8201replacement for strverscmp.")
8202 (license license:gpl3+))))
5fb5dffb
RW
8203
8204(define-public multiqc
8205 (package
8206 (name "multiqc")
a29929b3 8207 (version "0.9")
5fb5dffb
RW
8208 (source
8209 (origin
8210 (method url-fetch)
8211 (uri (pypi-uri "multiqc" version))
8212 (sha256
8213 (base32
a29929b3
BW
8214 "12gs1jw2jrxrij529rnl5kaqxfcqn15yzcsggxkfhdx634ml0cny"))
8215 (patches (search-patches "multiqc-fix-git-subprocess-error.patch"))))
5fb5dffb 8216 (build-system python-build-system)
a29929b3
BW
8217 (arguments
8218 ;; Tests are to be introduced in the next version, see
8219 ;; https://github.com/ewels/MultiQC/issues/376
8220 `(#:tests? #f))
5fb5dffb
RW
8221 (propagated-inputs
8222 `(("python-jinja2" ,python-jinja2)
8223 ("python-simplejson" ,python-simplejson)
8224 ("python-pyyaml" ,python-pyyaml)
8225 ("python-click" ,python-click)
8226 ("python-matplotlib" ,python-matplotlib)
1778ea74
RW
8227 ("python-numpy" ,python-numpy)
8228 ;; MultQC checks for the presence of nose at runtime.
8229 ("python-nose" ,python-nose)))
5fb5dffb
RW
8230 (home-page "http://multiqc.info")
8231 (synopsis "Aggregate bioinformatics analysis reports")
8232 (description
8233 "MultiQC is a tool to aggregate bioinformatics results across many
8234samples into a single report. It contains modules for a large number of
8235common bioinformatics tools.")
8236 (license license:gpl3)))
6193563a
RW
8237
8238(define-public r-chipseq
8239 (package
8240 (name "r-chipseq")
8241 (version "1.24.0")
8242 (source
8243 (origin
8244 (method url-fetch)
8245 (uri (bioconductor-uri "chipseq" version))
8246 (sha256
8247 (base32
8248 "115ayp82rs99iaswrx45skw1i5iacgwzz5k8rzijbp5qic0554n0"))))
8249 (build-system r-build-system)
8250 (propagated-inputs
8251 `(("r-biocgenerics" ,r-biocgenerics)
8252 ("r-genomicranges" ,r-genomicranges)
8253 ("r-iranges" ,r-iranges)
8254 ("r-s4vectors" ,r-s4vectors)
8255 ("r-shortread" ,r-shortread)))
8256 (home-page "http://bioconductor.org/packages/chipseq")
8257 (synopsis "Package for analyzing ChIPseq data")
8258 (description
8259 "This package provides tools for processing short read data from ChIPseq
8260experiments.")
8261 (license license:artistic2.0)))
d407bdb9
RW
8262
8263(define-public r-copyhelper
8264 (package
8265 (name "r-copyhelper")
8266 (version "1.6.0")
8267 (source
8268 (origin
8269 (method url-fetch)
8270 (uri (string-append "http://bioconductor.org/packages/release/"
8271 "data/experiment/src/contrib/CopyhelpeR_"
8272 version ".tar.gz"))
8273 (sha256
8274 (base32
8275 "0x7cyynjmxls9as2gg0iyp9x5fpalxmdjq914ss7i84i9zyk5bhq"))))
8276 (properties `((upstream-name . "CopyhelpeR")))
8277 (build-system r-build-system)
8278 (home-page "http://bioconductor.org/packages/CopyhelpeR/")
8279 (synopsis "Helper files for CopywriteR")
8280 (description
8281 "This package contains the helper files that are required to run the
8282Bioconductor package CopywriteR. It contains pre-assembled 1kb bin GC-content
8283and mappability files for the reference genomes hg18, hg19, hg38, mm9 and
8284mm10. In addition, it contains a blacklist filter to remove regions that
8285display copy number variation. Files are stored as GRanges objects from the
8286GenomicRanges Bioconductor package.")
8287 (license license:gpl2)))
3a3bf2f8
RW
8288
8289(define-public r-copywriter
8290 (package
8291 (name "r-copywriter")
8292 (version "2.6.0")
8293 (source
8294 (origin
8295 (method url-fetch)
8296 (uri (bioconductor-uri "CopywriteR" version))
8297 (sha256
8298 (base32
8299 "1bwwnsyk7cpgwkagsnn5mv6fv233b0rkhjvbadrh70h8m4anawfj"))))
8300 (properties `((upstream-name . "CopywriteR")))
8301 (build-system r-build-system)
8302 (propagated-inputs
8303 `(("r-biocparallel" ,r-biocparallel)
8304 ("r-chipseq" ,r-chipseq)
8305 ("r-copyhelper" ,r-copyhelper)
8306 ("r-data-table" ,r-data-table)
8307 ("r-dnacopy" ,r-dnacopy)
8308 ("r-futile-logger" ,r-futile-logger)
8309 ("r-genomeinfodb" ,r-genomeinfodb)
8310 ("r-genomicalignments" ,r-genomicalignments)
8311 ("r-genomicranges" ,r-genomicranges)
8312 ("r-gtools" ,r-gtools)
8313 ("r-iranges" ,r-iranges)
8314 ("r-matrixstats" ,r-matrixstats)
8315 ("r-rsamtools" ,r-rsamtools)
8316 ("r-s4vectors" ,r-s4vectors)))
8317 (home-page "https://github.com/PeeperLab/CopywriteR")
8318 (synopsis "Copy number information from targeted sequencing")
8319 (description
8320 "CopywriteR extracts DNA copy number information from targeted sequencing
8321by utilizing off-target reads. It allows for extracting uniformly distributed
8322copy number information, can be used without reference, and can be applied to
8323sequencing data obtained from various techniques including chromatin
8324immunoprecipitation and target enrichment on small gene panels. Thereby,
8325CopywriteR constitutes a widely applicable alternative to available copy
8326number detection tools.")
8327 (license license:gpl2)))
dd42a330
RJPB
8328
8329(define-public r-sva
8330 (package
8331 (name "r-sva")
8332 (version "3.22.0")
8333 (source
8334 (origin
8335 (method url-fetch)
8336 (uri (bioconductor-uri "sva" version))
8337 (sha256
8338 (base32
8339 "1wc1fjm6dzlsqqagm43y57w8jh8nsh0r0m8z1p6ximcb5gxqh7hn"))))
8340 (build-system r-build-system)
8341 (propagated-inputs
aeb64f3c
RW
8342 `(("r-genefilter" ,r-genefilter)
8343 ("r-mgcv" ,r-mgcv)))
dd42a330
RJPB
8344 (home-page "http://bioconductor.org/packages/sva")
8345 (synopsis "Surrogate variable analysis")
8346 (description
8347 "This package contains functions for removing batch effects and other
8348unwanted variation in high-throughput experiment. It also contains functions
8349for identifying and building surrogate variables for high-dimensional data
8350sets. Surrogate variables are covariates constructed directly from
8351high-dimensional data like gene expression/RNA sequencing/methylation/brain
8352imaging data that can be used in subsequent analyses to adjust for unknown,
8353unmodeled, or latent sources of noise.")
8354 (license license:artistic2.0)))
fb1e528e
RW
8355
8356(define-public r-seqminer
8357 (package
8358 (name "r-seqminer")
1a84edeb 8359 (version "5.9")
fb1e528e
RW
8360 (source
8361 (origin
8362 (method url-fetch)
8363 (uri (cran-uri "seqminer" version))
8364 (sha256
8365 (base32
1a84edeb 8366 "0sfkxrc9gy5a8fadzyzfzh7l5grasm8cj6cd2nnpv85ws6mqr6qd"))))
fb1e528e
RW
8367 (build-system r-build-system)
8368 (inputs
8369 `(("zlib" ,zlib)))
8370 (home-page "http://seqminer.genomic.codes")
8371 (synopsis "Read nucleotide sequence data (VCF, BCF, and METAL formats)")
8372 (description
8373 "This package provides tools to integrate nucleotide sequencing
8374data (variant call format, e.g. VCF or BCF) or meta-analysis results in R.")
8375 ;; Any version of the GPL is acceptable
8376 (license (list license:gpl2+ license:gpl3+))))
1f40e73c
RW
8377
8378(define-public r-raremetals2
8379 (package
8380 (name "r-raremetals2")
8381 (version "0.1")
8382 (source
8383 (origin
8384 (method url-fetch)
8385 (uri (string-append "http://genome.sph.umich.edu/w/images/"
8386 "b/b7/RareMETALS2_" version ".tar.gz"))
8387 (sha256
8388 (base32
8389 "0z5ljcgvnm06ja9lm85a3cniq7slxcy37aqqkxrdidr79an5fs4s"))))
8390 (properties `((upstream-name . "RareMETALS2")))
8391 (build-system r-build-system)
8392 (propagated-inputs
8393 `(("r-seqminer" ,r-seqminer)
8394 ("r-mvtnorm" ,r-mvtnorm)
aeb64f3c 8395 ("r-mass" ,r-mass)
1f40e73c
RW
8396 ("r-compquadform" ,r-compquadform)
8397 ("r-getopt" ,r-getopt)))
8398 (home-page "http://genome.sph.umich.edu/wiki/RareMETALS2")
8399 (synopsis "Analyze gene-level association tests for binary trait")
8400 (description
8401 "The R package rareMETALS2 is an extension of the R package rareMETALS.
8402It was designed to meta-analyze gene-level association tests for binary trait.
8403While rareMETALS offers a near-complete solution for meta-analysis of
8404gene-level tests for quantitative trait, it does not offer the optimal
8405solution for binary trait. The package rareMETALS2 offers improved features
8406for analyzing gene-level association tests in meta-analyses for binary
8407trait.")
8408 (license license:gpl3)))
0e7d058e
RW
8409
8410(define-public r-maldiquant
8411 (package
8412 (name "r-maldiquant")
e8bec95e 8413 (version "1.16.2")
0e7d058e
RW
8414 (source
8415 (origin
8416 (method url-fetch)
8417 (uri (cran-uri "MALDIquant" version))
8418 (sha256
8419 (base32
e8bec95e 8420 "0z5srzsfgsgi4bssr4chls4ry6d18y2g9143znqmraylppwrrqzr"))))
0e7d058e
RW
8421 (properties `((upstream-name . "MALDIquant")))
8422 (build-system r-build-system)
8423 (home-page "http://cran.r-project.org/web/packages/MALDIquant")
8424 (synopsis "Quantitative analysis of mass spectrometry data")
8425 (description
8426 "This package provides a complete analysis pipeline for matrix-assisted
8427laser desorption/ionization-time-of-flight (MALDI-TOF) and other
8428two-dimensional mass spectrometry data. In addition to commonly used plotting
8429and processing methods it includes distinctive features, namely baseline
8430subtraction methods such as morphological filters (TopHat) or the
8431statistics-sensitive non-linear iterative peak-clipping algorithm (SNIP), peak
8432alignment using warping functions, handling of replicated measurements as well
8433as allowing spectra with different resolutions.")
8434 (license license:gpl3+)))
ae262174
RW
8435
8436(define-public r-protgenerics
8437 (package
8438 (name "r-protgenerics")
8439 (version "1.6.0")
8440 (source
8441 (origin
8442 (method url-fetch)
8443 (uri (bioconductor-uri "ProtGenerics" version))
8444 (sha256
8445 (base32
8446 "0hb3vrrvfx6lcfalmjxm8dmigfmi5nba0pzjfgsrzd35c8mbfc6f"))))
8447 (properties `((upstream-name . "ProtGenerics")))
8448 (build-system r-build-system)
8449 (home-page "https://github.com/lgatto/ProtGenerics")
8450 (synopsis "S4 generic functions for proteomics infrastructure")
8451 (description
8452 "This package provides S4 generic functions needed by Bioconductor
8453proteomics packages.")
8454 (license license:artistic2.0)))
71676a1f
RW
8455
8456(define-public r-mzr
8457 (package
8458 (name "r-mzr")
8459 (version "2.8.1")
8460 (source
8461 (origin
8462 (method url-fetch)
8463 (uri (bioconductor-uri "mzR" version))
8464 (sha256
8465 (base32
8466 "0ipmhg6l3pf648rdx5g2ha7l5ppd3cja6afxhdw76x8ga3633x0r"))))
8467 (properties `((upstream-name . "mzR")))
8468 (build-system r-build-system)
8469 (inputs
8470 `(("netcdf" ,netcdf)))
8471 (propagated-inputs
8472 `(("r-biobase" ,r-biobase)
8473 ("r-biocgenerics" ,r-biocgenerics)
8474 ("r-protgenerics" ,r-protgenerics)
8475 ("r-rcpp" ,r-rcpp)
8476 ("r-zlibbioc" ,r-zlibbioc)))
8477 (home-page "https://github.com/sneumann/mzR/")
8478 (synopsis "Parser for mass spectrometry data files")
8479 (description
8480 "The mzR package provides a unified API to the common file formats and
8481parsers available for mass spectrometry data. It comes with a wrapper for the
8482ISB random access parser for mass spectrometry mzXML, mzData and mzML files.
8483The package contains the original code written by the ISB, and a subset of the
8484proteowizard library for mzML and mzIdentML. The netCDF reading code has
8485previously been used in XCMS.")
8486 (license license:artistic2.0)))
5c9d1505
RW
8487
8488(define-public r-affyio
8489 (package
8490 (name "r-affyio")
8491 (version "1.44.0")
8492 (source
8493 (origin
8494 (method url-fetch)
8495 (uri (bioconductor-uri "affyio" version))
8496 (sha256
8497 (base32
8498 "1svsl4mpk06xm505pap913x69ywks99262krag8y4ygpllj7dfyy"))))
8499 (build-system r-build-system)
8500 (propagated-inputs
8501 `(("r-zlibbioc" ,r-zlibbioc)))
8502 (inputs
8503 `(("zlib" ,zlib)))
8504 (home-page "https://github.com/bmbolstad/affyio")
8505 (synopsis "Tools for parsing Affymetrix data files")
8506 (description
8507 "This package provides routines for parsing Affymetrix data files based
8508upon file format information. The primary focus is on accessing the CEL and
8509CDF file formats.")
8510 (license license:lgpl2.0+)))
05c7e5fb
RW
8511
8512(define-public r-affy
8513 (package
8514 (name "r-affy")
8515 (version "1.52.0")
8516 (source
8517 (origin
8518 (method url-fetch)
8519 (uri (bioconductor-uri "affy" version))
8520 (sha256
8521 (base32
8522 "1snq71ligf0wvaxa6zfrl13ydw0zfhspmhdyfk8q3ba3np4cz344"))))
8523 (build-system r-build-system)
8524 (propagated-inputs
8525 `(("r-affyio" ,r-affyio)
8526 ("r-biobase" ,r-biobase)
8527 ("r-biocgenerics" ,r-biocgenerics)
8528 ("r-biocinstaller" ,r-biocinstaller)
8529 ("r-preprocesscore" ,r-preprocesscore)
8530 ("r-zlibbioc" ,r-zlibbioc)))
8531 (home-page "http://bioconductor.org/packages/affy")
8532 (synopsis "Methods for affymetrix oligonucleotide arrays")
8533 (description
8534 "This package contains functions for exploratory oligonucleotide array
8535analysis.")
8536 (license license:lgpl2.0+)))
7c08afaf
RW
8537
8538(define-public r-vsn
8539 (package
8540 (name "r-vsn")
8541 (version "3.42.3")
8542 (source
8543 (origin
8544 (method url-fetch)
8545 (uri (bioconductor-uri "vsn" version))
8546 (sha256
8547 (base32
8548 "0mgl0azys2g90simf8wx6jdwd7gyg3m4pf12n6w6507jixm2cg97"))))
8549 (build-system r-build-system)
8550 (propagated-inputs
8551 `(("r-affy" ,r-affy)
8552 ("r-biobase" ,r-biobase)
8553 ("r-ggplot2" ,r-ggplot2)
aeb64f3c 8554 ("r-lattice" ,r-lattice)
7c08afaf
RW
8555 ("r-limma" ,r-limma)))
8556 (home-page "http://bioconductor.org/packages/release/bioc/html/vsn.html")
8557 (synopsis "Variance stabilization and calibration for microarray data")
8558 (description
8559 "The package implements a method for normalising microarray intensities,
8560and works for single- and multiple-color arrays. It can also be used for data
8561from other technologies, as long as they have similar format. The method uses
8562a robust variant of the maximum-likelihood estimator for an
8563additive-multiplicative error model and affine calibration. The model
8564incorporates data calibration step (a.k.a. normalization), a model for the
8565dependence of the variance on the mean intensity and a variance stabilizing
8566data transformation. Differences between transformed intensities are
8567analogous to \"normalized log-ratios\". However, in contrast to the latter,
8568their variance is independent of the mean, and they are usually more sensitive
8569and specific in detecting differential transcription.")
8570 (license license:artistic2.0)))
4aa7d592
RW
8571
8572(define-public r-mzid
8573 (package
8574 (name "r-mzid")
8575 (version "1.12.0")
8576 (source
8577 (origin
8578 (method url-fetch)
8579 (uri (bioconductor-uri "mzID" version))
8580 (sha256
8581 (base32
8582 "1zn896cpfvqp1qmq5c4vcj933hb8rxwb6gkck1wqvr7393rpqy1q"))))
8583 (properties `((upstream-name . "mzID")))
8584 (build-system r-build-system)
8585 (propagated-inputs
8586 `(("r-doparallel" ,r-doparallel)
8587 ("r-foreach" ,r-foreach)
8588 ("r-iterators" ,r-iterators)
8589 ("r-plyr" ,r-plyr)
8590 ("r-protgenerics" ,r-protgenerics)
8591 ("r-rcpp" ,r-rcpp)
8592 ("r-xml" ,r-xml)))
8593 (home-page "http://bioconductor.org/packages/mzID")
8594 (synopsis "Parser for mzIdentML files")
8595 (description
8596 "This package provides a parser for mzIdentML files implemented using the
8597XML package. The parser tries to be general and able to handle all types of
8598mzIdentML files with the drawback of having less pretty output than a vendor
8599specific parser.")
8600 (license license:gpl2+)))
6a67e181
RW
8601
8602(define-public r-pcamethods
8603 (package
8604 (name "r-pcamethods")
8605 (version "1.66.0")
8606 (source
8607 (origin
8608 (method url-fetch)
8609 (uri (bioconductor-uri "pcaMethods" version))
8610 (sha256
8611 (base32
8612 "18mawhxw57pgpn87qha4mwki24gqja7wpqha8q496476vyap11xw"))))
8613 (properties `((upstream-name . "pcaMethods")))
8614 (build-system r-build-system)
8615 (propagated-inputs
8616 `(("r-biobase" ,r-biobase)
8617 ("r-biocgenerics" ,r-biocgenerics)
aeb64f3c 8618 ("r-mass" ,r-mass)
6a67e181
RW
8619 ("r-rcpp" ,r-rcpp)))
8620 (home-page "https://github.com/hredestig/pcamethods")
8621 (synopsis "Collection of PCA methods")
8622 (description
8623 "This package provides Bayesian PCA, Probabilistic PCA, Nipals PCA,
8624Inverse Non-Linear PCA and the conventional SVD PCA. A cluster based method
8625for missing value estimation is included for comparison. BPCA, PPCA and
8626NipalsPCA may be used to perform PCA on incomplete data as well as for
8627accurate missing value estimation. A set of methods for printing and plotting
8628the results is also provided. All PCA methods make use of the same data
8629structure (pcaRes) to provide a common interface to the PCA results.")
8630 (license license:gpl3+)))
11879284
RW
8631
8632(define-public r-msnbase
8633 (package
8634 (name "r-msnbase")
8635 (version "2.0.2")
8636 (source
8637 (origin
8638 (method url-fetch)
8639 (uri (bioconductor-uri "MSnbase" version))
8640 (sha256
8641 (base32
8642 "0jjjs29dcwsjaxzfqxy98ycpg3rwxzzchkj77my3cjgdc00sm66n"))))
8643 (properties `((upstream-name . "MSnbase")))
8644 (build-system r-build-system)
8645 (propagated-inputs
8646 `(("r-affy" ,r-affy)
8647 ("r-biobase" ,r-biobase)
8648 ("r-biocgenerics" ,r-biocgenerics)
8649 ("r-biocparallel" ,r-biocparallel)
8650 ("r-digest" ,r-digest)
8651 ("r-ggplot2" ,r-ggplot2)
8652 ("r-impute" ,r-impute)
8653 ("r-iranges" ,r-iranges)
8654 ("r-maldiquant" ,r-maldiquant)
8655 ("r-mzid" ,r-mzid)
8656 ("r-mzr" ,r-mzr)
8657 ("r-pcamethods" ,r-pcamethods)
8658 ("r-plyr" ,r-plyr)
8659 ("r-preprocesscore" ,r-preprocesscore)
8660 ("r-protgenerics" ,r-protgenerics)
8661 ("r-rcpp" ,r-rcpp)
8662 ("r-reshape2" ,r-reshape2)
8663 ("r-s4vectors" ,r-s4vectors)
8664 ("r-vsn" ,r-vsn)
8665 ("r-xml" ,r-xml)))
8666 (home-page "https://github.com/lgatto/MSnbase")
8667 (synopsis "Base functions and classes for MS-based proteomics")
8668 (description
8669 "This package provides basic plotting, data manipulation and processing
e614d639 8670of mass spectrometry based proteomics data.")
11879284 8671 (license license:artistic2.0)))
66da3a9f
RW
8672
8673(define-public r-msnid
8674 (package
8675 (name "r-msnid")
8676 (version "1.8.0")
8677 (source
8678 (origin
8679 (method url-fetch)
8680 (uri (bioconductor-uri "MSnID" version))
8681 (sha256
8682 (base32
8683 "0fkk3za39cxi0jyxmagmycjdslr2xf6vg3ylz14jyffqi0blw9d5"))))
8684 (properties `((upstream-name . "MSnID")))
8685 (build-system r-build-system)
8686 (propagated-inputs
8687 `(("r-biobase" ,r-biobase)
8688 ("r-data-table" ,r-data-table)
8689 ("r-doparallel" ,r-doparallel)
8690 ("r-dplyr" ,r-dplyr)
8691 ("r-foreach" ,r-foreach)
8692 ("r-iterators" ,r-iterators)
8693 ("r-msnbase" ,r-msnbase)
8694 ("r-mzid" ,r-mzid)
8695 ("r-mzr" ,r-mzr)
8696 ("r-protgenerics" ,r-protgenerics)
8697 ("r-r-cache" ,r-r-cache)
8698 ("r-rcpp" ,r-rcpp)
8699 ("r-reshape2" ,r-reshape2)))
8700 (home-page "http://bioconductor.org/packages/MSnID")
8701 (synopsis "Utilities for LC-MSn proteomics identifications")
8702 (description
2923f3e5
RW
8703 "This package extracts @dfn{tandem mass spectrometry} (MS/MS) ID data
8704from mzIdentML (leveraging the mzID package) or text files. After collating
8705the search results from multiple datasets it assesses their identification
8706quality and optimize filtering criteria to achieve the maximum number of
8707identifications while not exceeding a specified false discovery rate. It also
8708contains a number of utilities to explore the MS/MS results and assess missed
8709and irregular enzymatic cleavages, mass measurement accuracy, etc.")
66da3a9f 8710 (license license:artistic2.0)))
5ded35d8 8711
2441c284
RJPB
8712(define-public r-seurat
8713 ;; Source releases are only made for new x.0 versions. All newer versions
8714 ;; are only released as pre-built binaries. At the time of this writing the
8715 ;; latest binary release is 1.4.0.12, which is equivalent to this commit.
8716 (let ((commit "fccb77d1452c35ee47e47ebf8e87bddb59f3b08d")
8717 (revision "1"))
8718 (package
8719 (name "r-seurat")
8720 (version (string-append "1.4.0.12-" revision "." (string-take commit 7)))
8721 (source (origin
8722 (method git-fetch)
8723 (uri (git-reference
8724 (url "https://github.com/satijalab/seurat")
8725 (commit commit)))
8726 (file-name (string-append name "-" version "-checkout"))
8727 (sha256
8728 (base32
8729 "101wq3aqrdmbfi3lqmq4iivk9iwbf10d4z216ss25hf7n9091cyl"))
8730 ;; Delete pre-built jar.
8731 (snippet
8732 '(begin (delete-file "inst/java/ModularityOptimizer.jar")
8733 #t))))
8734 (build-system r-build-system)
8735 (arguments
8736 `(#:phases
8737 (modify-phases %standard-phases
8738 (add-after 'unpack 'build-jar
8739 (lambda* (#:key inputs #:allow-other-keys)
8740 (let ((classesdir "tmp-classes"))
8741 (setenv "JAVA_HOME" (assoc-ref inputs "jdk"))
8742 (mkdir classesdir)
8743 (and (zero? (apply system* `("javac" "-d" ,classesdir
8744 ,@(find-files "java" "\\.java$"))))
8745 (zero? (system* "jar"
8746 "-cf" "inst/java/ModularityOptimizer.jar"
8747 "-C" classesdir ".")))))))))
8748 (native-inputs
8749 `(("jdk" ,icedtea "jdk")))
8750 (propagated-inputs
8751 `(("r-ape" ,r-ape)
8752 ("r-caret" ,r-caret)
8753 ("r-cowplot" ,r-cowplot)
8754 ("r-dplyr" ,r-dplyr)
8755 ("r-fastica" ,r-fastica)
8756 ("r-fnn" ,r-fnn)
8757 ("r-fpc" ,r-fpc)
8758 ("r-gdata" ,r-gdata)
8759 ("r-ggplot2" ,r-ggplot2)
8760 ("r-gplots" ,r-gplots)
8761 ("r-gridextra" ,r-gridextra)
8762 ("r-igraph" ,r-igraph)
8763 ("r-irlba" ,r-irlba)
8764 ("r-lars" ,r-lars)
8765 ("r-mixtools" ,r-mixtools)
8766 ("r-pbapply" ,r-pbapply)
8767 ("r-plyr" ,r-plyr)
8768 ("r-ranger" ,r-ranger)
8769 ("r-rcolorbrewer" ,r-rcolorbrewer)
8770 ("r-rcpp" ,r-rcpp)
8771 ("r-rcppeigen" ,r-rcppeigen)
8772 ("r-rcppprogress" ,r-rcppprogress)
8773 ("r-reshape2" ,r-reshape2)
8774 ("r-rocr" ,r-rocr)
8775 ("r-rtsne" ,r-rtsne)
8776 ("r-stringr" ,r-stringr)
8777 ("r-tclust" ,r-tclust)
8778 ("r-tsne" ,r-tsne)
8779 ("r-vgam" ,r-vgam)))
8780 (home-page "http://www.satijalab.org/seurat")
8781 (synopsis "Seurat is an R toolkit for single cell genomics")
8782 (description
8783 "This package is an R package designed for QC, analysis, and
8784exploration of single cell RNA-seq data. It easily enables widely-used
8785analytical techniques, including the identification of highly variable genes,
8786dimensionality reduction; PCA, ICA, t-SNE, standard unsupervised clustering
8787algorithms; density clustering, hierarchical clustering, k-means, and the
8788discovery of differentially expressed genes and markers.")
8789 (license license:gpl3))))
8790
5ded35d8
RW
8791(define htslib-for-sambamba
8792 (let ((commit "2f3c3ea7b301f9b45737a793c0b2dcf0240e5ee5"))
8793 (package
8794 (inherit htslib)
8795 (name "htslib-for-sambamba")
8796 (version (string-append "1.3.1-1." (string-take commit 9)))
8797 (source
8798 (origin
8799 (method git-fetch)
8800 (uri (git-reference
8801 (url "https://github.com/lomereiter/htslib.git")
8802 (commit commit)))
8803 (file-name (string-append "htslib-" version "-checkout"))
8804 (sha256
8805 (base32
8806 "0g38g8s3npr0gjm9fahlbhiskyfws9l5i0x1ml3rakzj7az5l9c9"))))
8807 (arguments
8808 (substitute-keyword-arguments (package-arguments htslib)
8809 ((#:phases phases)
8810 `(modify-phases ,phases
8811 (add-before 'configure 'bootstrap
8812 (lambda _
8813 (zero? (system* "autoreconf" "-vif"))))))))
8814 (native-inputs
8815 `(("autoconf" ,autoconf)
8816 ("automake" ,automake)
8817 ,@(package-native-inputs htslib))))))
8818
8819(define-public sambamba
8820 (package
8821 (name "sambamba")
8822 (version "0.6.5")
8823 (source
8824 (origin
8825 (method url-fetch)
8826 (uri (string-append "https://github.com/lomereiter/sambamba/"
8827 "archive/v" version ".tar.gz"))
8828 (file-name (string-append name "-" version ".tar.gz"))
8829 (sha256
8830 (base32
8831 "17076gijd65a3f07zns2gvbgahiz5lriwsa6dq353ss3jl85d8vy"))))
8832 (build-system gnu-build-system)
8833 (arguments
8834 `(#:tests? #f ; there is no test target
8835 #:make-flags
8836 '("D_COMPILER=ldc2"
8837 ;; Override "--compiler" flag only.
8838 "D_FLAGS=--compiler=ldc2 -IBioD -g -d"
8839 "sambamba-ldmd2-64")
8840 #:phases
8841 (modify-phases %standard-phases
8842 (delete 'configure)
8843 (add-after 'unpack 'place-biod
8844 (lambda* (#:key inputs #:allow-other-keys)
8845 (copy-recursively (assoc-ref inputs "biod") "BioD")
8846 #t))
8847 (add-after 'unpack 'unbundle-prerequisites
8848 (lambda _
8849 (substitute* "Makefile"
8850 ((" htslib-static lz4-static") ""))
8851 #t))
8852 (replace 'install
8853 (lambda* (#:key outputs #:allow-other-keys)
8854 (let* ((out (assoc-ref outputs "out"))
8855 (bin (string-append out "/bin")))
8856 (mkdir-p bin)
8857 (install-file "build/sambamba" bin)
8858 #t))))))
8859 (native-inputs
8860 `(("ldc" ,ldc)
8861 ("rdmd" ,rdmd)
8862 ("biod"
8863 ,(let ((commit "1248586b54af4bd4dfb28ebfebfc6bf012e7a587"))
8864 (origin
8865 (method git-fetch)
8866 (uri (git-reference
8867 (url "https://github.com/biod/BioD.git")
8868 (commit commit)))
8869 (file-name (string-append "biod-"
8870 (string-take commit 9)
8871 "-checkout"))
8872 (sha256
8873 (base32
8874 "1m8hi1n7x0ri4l6s9i0x6jg4z4v94xrfdzp7mbizdipfag0m17g3")))))))
8875 (inputs
8876 `(("lz4" ,lz4)
8877 ("htslib" ,htslib-for-sambamba)))
8878 (home-page "http://lomereiter.github.io/sambamba")
8879 (synopsis "Tools for working with SAM/BAM data")
8880 (description "Sambamba is a high performance modern robust and
8881fast tool (and library), written in the D programming language, for
8882working with SAM and BAM files. Current parallelised functionality is
8883an important subset of samtools functionality, including view, index,
8884sort, markdup, and depth.")
8885 (license license:gpl2+)))