Merge branch 'master' into core-updates
[jackhill/guix/guix.git] / gnu / packages / bioinformatics.scm
1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2014, 2015, 2016, 2017, 2018 Ricardo Wurmus <rekado@elephly.net>
3 ;;; Copyright © 2015, 2016, 2017, 2018 Ben Woodcroft <donttrustben@gmail.com>
4 ;;; Copyright © 2015, 2016 Pjotr Prins <pjotr.guix@thebird.nl>
5 ;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
6 ;;; Copyright © 2016 Roel Janssen <roel@gnu.org>
7 ;;; Copyright © 2016, 2017, 2018 Efraim Flashner <efraim@flashner.co.il>
8 ;;; Copyright © 2016 Marius Bakke <mbakke@fastmail.com>
9 ;;; Copyright © 2016, 2018 Raoul Bonnal <ilpuccio.febo@gmail.com>
10 ;;; Copyright © 2017, 2018 Tobias Geerinckx-Rice <me@tobias.gr>
11 ;;; Copyright © 2017 Arun Isaac <arunisaac@systemreboot.net>
12 ;;; Copyright © 2018 Joshua Sierles, Nextjournal <joshua@nextjournal.com>
13 ;;;
14 ;;; This file is part of GNU Guix.
15 ;;;
16 ;;; GNU Guix is free software; you can redistribute it and/or modify it
17 ;;; under the terms of the GNU General Public License as published by
18 ;;; the Free Software Foundation; either version 3 of the License, or (at
19 ;;; your option) any later version.
20 ;;;
21 ;;; GNU Guix is distributed in the hope that it will be useful, but
22 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
23 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 ;;; GNU General Public License for more details.
25 ;;;
26 ;;; You should have received a copy of the GNU General Public License
27 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
28
29 (define-module (gnu packages bioinformatics)
30 #:use-module ((guix licenses) #:prefix license:)
31 #:use-module (guix packages)
32 #:use-module (guix utils)
33 #:use-module (guix download)
34 #:use-module (guix git-download)
35 #:use-module (guix hg-download)
36 #:use-module (guix build-system ant)
37 #:use-module (guix build-system gnu)
38 #:use-module (guix build-system cmake)
39 #:use-module (guix build-system ocaml)
40 #:use-module (guix build-system perl)
41 #:use-module (guix build-system python)
42 #:use-module (guix build-system r)
43 #:use-module (guix build-system ruby)
44 #:use-module (guix build-system scons)
45 #:use-module (guix build-system trivial)
46 #:use-module (gnu packages)
47 #:use-module (gnu packages autotools)
48 #:use-module (gnu packages algebra)
49 #:use-module (gnu packages base)
50 #:use-module (gnu packages bash)
51 #:use-module (gnu packages bison)
52 #:use-module (gnu packages boost)
53 #:use-module (gnu packages check)
54 #:use-module (gnu packages compression)
55 #:use-module (gnu packages cpio)
56 #:use-module (gnu packages cran)
57 #:use-module (gnu packages curl)
58 #:use-module (gnu packages documentation)
59 #:use-module (gnu packages databases)
60 #:use-module (gnu packages datastructures)
61 #:use-module (gnu packages file)
62 #:use-module (gnu packages flex)
63 #:use-module (gnu packages gawk)
64 #:use-module (gnu packages gcc)
65 #:use-module (gnu packages gd)
66 #:use-module (gnu packages gtk)
67 #:use-module (gnu packages glib)
68 #:use-module (gnu packages graph)
69 #:use-module (gnu packages groff)
70 #:use-module (gnu packages guile)
71 #:use-module (gnu packages haskell)
72 #:use-module (gnu packages image)
73 #:use-module (gnu packages imagemagick)
74 #:use-module (gnu packages java)
75 #:use-module (gnu packages jemalloc)
76 #:use-module (gnu packages ldc)
77 #:use-module (gnu packages linux)
78 #:use-module (gnu packages logging)
79 #:use-module (gnu packages machine-learning)
80 #:use-module (gnu packages man)
81 #:use-module (gnu packages maths)
82 #:use-module (gnu packages mpi)
83 #:use-module (gnu packages ncurses)
84 #:use-module (gnu packages ocaml)
85 #:use-module (gnu packages pcre)
86 #:use-module (gnu packages parallel)
87 #:use-module (gnu packages pdf)
88 #:use-module (gnu packages perl)
89 #:use-module (gnu packages perl-check)
90 #:use-module (gnu packages pkg-config)
91 #:use-module (gnu packages popt)
92 #:use-module (gnu packages protobuf)
93 #:use-module (gnu packages python)
94 #:use-module (gnu packages python-web)
95 #:use-module (gnu packages readline)
96 #:use-module (gnu packages ruby)
97 #:use-module (gnu packages serialization)
98 #:use-module (gnu packages shells)
99 #:use-module (gnu packages statistics)
100 #:use-module (gnu packages swig)
101 #:use-module (gnu packages tbb)
102 #:use-module (gnu packages tex)
103 #:use-module (gnu packages texinfo)
104 #:use-module (gnu packages textutils)
105 #:use-module (gnu packages time)
106 #:use-module (gnu packages tls)
107 #:use-module (gnu packages vim)
108 #:use-module (gnu packages web)
109 #:use-module (gnu packages xml)
110 #:use-module (gnu packages xorg)
111 #:use-module (srfi srfi-1)
112 #:use-module (ice-9 match))
113
114 (define-public aragorn
115 (package
116 (name "aragorn")
117 (version "1.2.38")
118 (source (origin
119 (method url-fetch)
120 (uri (string-append
121 "http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
122 version ".tgz"))
123 (sha256
124 (base32
125 "09i1rg716smlbnixfm7q1ml2mfpaa2fpn3hwjg625ysmfwwy712b"))))
126 (build-system gnu-build-system)
127 (arguments
128 `(#:tests? #f ; there are no tests
129 #:phases
130 (modify-phases %standard-phases
131 (delete 'configure)
132 (replace 'build
133 (lambda _
134 (zero? (system* "gcc"
135 "-O3"
136 "-ffast-math"
137 "-finline-functions"
138 "-o"
139 "aragorn"
140 (string-append "aragorn" ,version ".c")))))
141 (replace 'install
142 (lambda* (#:key outputs #:allow-other-keys)
143 (let* ((out (assoc-ref outputs "out"))
144 (bin (string-append out "/bin"))
145 (man (string-append out "/share/man/man1")))
146 (mkdir-p bin)
147 (install-file "aragorn" bin)
148 (mkdir-p man)
149 (install-file "aragorn.1" man))
150 #t)))))
151 (home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
152 (synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
153 (description
154 "Aragorn identifies transfer RNA, mitochondrial RNA and
155 transfer-messenger RNA from nucleotide sequences, based on homology to known
156 tRNA consensus sequences and RNA structure. It also outputs the secondary
157 structure of the predicted RNA.")
158 (license license:gpl2)))
159
160 (define-public bamm
161 (package
162 (name "bamm")
163 (version "1.7.3")
164 (source (origin
165 (method url-fetch)
166 ;; BamM is not available on pypi.
167 (uri (string-append
168 "https://github.com/Ecogenomics/BamM/archive/"
169 version ".tar.gz"))
170 (file-name (string-append name "-" version ".tar.gz"))
171 (sha256
172 (base32
173 "1f35yxp4pc8aadsvbpg6r4kg2jh4fkjci0iby4iyljm6980sac0s"))
174 (modules '((guix build utils)))
175 (snippet
176 `(begin
177 ;; Delete bundled htslib.
178 (delete-file-recursively "c/htslib-1.3.1")
179 #t))))
180 (build-system python-build-system)
181 (arguments
182 `(#:python ,python-2 ; BamM is Python 2 only.
183 ;; Do not use bundled libhts. Do use the bundled libcfu because it has
184 ;; been modified from its original form.
185 #:configure-flags
186 (let ((htslib (assoc-ref %build-inputs "htslib")))
187 (list "--with-libhts-lib" (string-append htslib "/lib")
188 "--with-libhts-inc" (string-append htslib "/include/htslib")))
189 #:phases
190 (modify-phases %standard-phases
191 (add-after 'unpack 'autogen
192 (lambda _
193 (with-directory-excursion "c"
194 (let ((sh (which "sh")))
195 ;; Use autogen so that 'configure' works.
196 (substitute* "autogen.sh" (("/bin/sh") sh))
197 (setenv "CONFIG_SHELL" sh)
198 (substitute* "configure" (("/bin/sh") sh))
199 (zero? (system* "./autogen.sh"))))))
200 (delete 'build)
201 ;; Run tests after installation so compilation only happens once.
202 (delete 'check)
203 (add-after 'install 'wrap-executable
204 (lambda* (#:key outputs #:allow-other-keys)
205 (let* ((out (assoc-ref outputs "out"))
206 (path (getenv "PATH")))
207 (wrap-program (string-append out "/bin/bamm")
208 `("PATH" ":" prefix (,path))))
209 #t))
210 (add-after 'wrap-executable 'post-install-check
211 (lambda* (#:key inputs outputs #:allow-other-keys)
212 (setenv "PATH"
213 (string-append (assoc-ref outputs "out")
214 "/bin:"
215 (getenv "PATH")))
216 (setenv "PYTHONPATH"
217 (string-append
218 (assoc-ref outputs "out")
219 "/lib/python"
220 (string-take (string-take-right
221 (assoc-ref inputs "python") 5) 3)
222 "/site-packages:"
223 (getenv "PYTHONPATH")))
224 ;; There are 2 errors printed, but they are safe to ignore:
225 ;; 1) [E::hts_open_format] fail to open file ...
226 ;; 2) samtools view: failed to open ...
227 (zero? (system* "nosetests")))))))
228 (native-inputs
229 `(("autoconf" ,autoconf)
230 ("automake" ,automake)
231 ("libtool" ,libtool)
232 ("zlib" ,zlib)
233 ("python-nose" ,python2-nose)
234 ("python-pysam" ,python2-pysam)))
235 (inputs
236 `(("htslib" ,htslib-1.3) ; At least one test fails on htslib-1.4+.
237 ("samtools" ,samtools)
238 ("bwa" ,bwa)
239 ("grep" ,grep)
240 ("sed" ,sed)
241 ("coreutils" ,coreutils)))
242 (propagated-inputs
243 `(("python-numpy" ,python2-numpy)))
244 (home-page "http://ecogenomics.github.io/BamM/")
245 (synopsis "Metagenomics-focused BAM file manipulator")
246 (description
247 "BamM is a C library, wrapped in python, to efficiently generate and
248 parse BAM files, specifically for the analysis of metagenomic data. For
249 instance, it implements several methods to assess contig-wise read coverage.")
250 (license license:lgpl3+)))
251
252 (define-public bamtools
253 (package
254 (name "bamtools")
255 (version "2.5.1")
256 (source (origin
257 (method url-fetch)
258 (uri (string-append
259 "https://github.com/pezmaster31/bamtools/archive/v"
260 version ".tar.gz"))
261 (file-name (string-append name "-" version ".tar.gz"))
262 (sha256
263 (base32
264 "1z3kg24qrwq13a88n9d86izngrar4fll7gr6phddb2faw75pdgaa"))))
265 (build-system cmake-build-system)
266 (arguments
267 `(#:tests? #f ;no "check" target
268 #:phases
269 (modify-phases %standard-phases
270 (add-before
271 'configure 'set-ldflags
272 (lambda* (#:key outputs #:allow-other-keys)
273 (setenv "LDFLAGS"
274 (string-append
275 "-Wl,-rpath="
276 (assoc-ref outputs "out") "/lib/bamtools")))))))
277 (inputs `(("zlib" ,zlib)))
278 (home-page "https://github.com/pezmaster31/bamtools")
279 (synopsis "C++ API and command-line toolkit for working with BAM data")
280 (description
281 "BamTools provides both a C++ API and a command-line toolkit for handling
282 BAM files.")
283 (license license:expat)))
284
285 (define-public bcftools
286 (package
287 (name "bcftools")
288 (version "1.8")
289 (source (origin
290 (method url-fetch)
291 (uri (string-append
292 "https://github.com/samtools/bcftools/releases/download/"
293 version "/bcftools-" version ".tar.bz2"))
294 (sha256
295 (base32
296 "1vgw2mwngq20c530zim52zvgmw1lci8rzl33pvh44xqk3xlzvjsa"))
297 (modules '((guix build utils)))
298 (snippet '(begin
299 ;; Delete bundled htslib.
300 (delete-file-recursively "htslib-1.8")
301 #t))))
302 (build-system gnu-build-system)
303 (arguments
304 `(#:test-target "test"
305 #:configure-flags (list "--with-htslib=system")
306 #:make-flags
307 (list
308 "USE_GPL=1"
309 "LIBS=-lgsl -lgslcblas"
310 (string-append "prefix=" (assoc-ref %outputs "out"))
311 (string-append "HTSDIR=" (assoc-ref %build-inputs "htslib") "/include")
312 (string-append "HTSLIB=" (assoc-ref %build-inputs "htslib") "/lib/libhts.so")
313 (string-append "BGZIP=" (assoc-ref %build-inputs "htslib") "/bin/bgzip")
314 (string-append "TABIX=" (assoc-ref %build-inputs "htslib") "/bin/tabix")
315 (string-append "PACKAGE_VERSION=" ,version))
316 #:phases
317 (modify-phases %standard-phases
318 (add-before 'check 'patch-tests
319 (lambda _
320 (substitute* "test/test.pl"
321 (("/bin/bash") (which "bash")))
322 #t)))))
323 (native-inputs
324 `(("htslib" ,htslib)
325 ("perl" ,perl)))
326 (inputs
327 `(("gsl" ,gsl)
328 ("zlib" ,zlib)))
329 (home-page "https://samtools.github.io/bcftools/")
330 (synopsis "Utilities for variant calling and manipulating VCFs and BCFs")
331 (description
332 "BCFtools is a set of utilities that manipulate variant calls in the
333 Variant Call Format (VCF) and its binary counterpart BCF. All commands work
334 transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed.")
335 ;; The sources are dual MIT/GPL, but becomes GPL-only when USE_GPL=1.
336 (license (list license:gpl3+ license:expat))))
337
338 (define-public bedops
339 (package
340 (name "bedops")
341 (version "2.4.33")
342 (source (origin
343 (method url-fetch)
344 (uri (string-append "https://github.com/bedops/bedops/archive/v"
345 version ".tar.gz"))
346 (file-name (string-append name "-" version ".tar.gz"))
347 (sha256
348 (base32
349 "0kx4awrwby8f33wqyx8w7ms7v25xhf0d421csgf96a3hfzn2mb0m"))))
350 (build-system gnu-build-system)
351 (arguments
352 '(#:tests? #f
353 #:make-flags (list (string-append "BINDIR=" %output "/bin"))
354 #:phases
355 (modify-phases %standard-phases
356 (add-after 'unpack 'unpack-tarballs
357 (lambda _
358 ;; FIXME: Bedops includes tarballs of minimally patched upstream
359 ;; libraries jansson, zlib, and bzip2. We cannot just use stock
360 ;; libraries because at least one of the libraries (zlib) is
361 ;; patched to add a C++ function definition (deflateInit2cpp).
362 ;; Until the Bedops developers offer a way to link against system
363 ;; libraries we have to build the in-tree copies of these three
364 ;; libraries.
365
366 ;; See upstream discussion:
367 ;; https://github.com/bedops/bedops/issues/124
368
369 ;; Unpack the tarballs to benefit from shebang patching.
370 (with-directory-excursion "third-party"
371 (and (zero? (system* "tar" "xvf" "jansson-2.6.tar.bz2"))
372 (zero? (system* "tar" "xvf" "zlib-1.2.7.tar.bz2"))
373 (zero? (system* "tar" "xvf" "bzip2-1.0.6.tar.bz2"))))
374 ;; Disable unpacking of tarballs in Makefile.
375 (substitute* "system.mk/Makefile.linux"
376 (("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
377 (("\\./configure") "CONFIG_SHELL=bash ./configure"))
378 (substitute* "third-party/zlib-1.2.7/Makefile.in"
379 (("^SHELL=.*$") "SHELL=bash\n"))
380 #t))
381 (delete 'configure))))
382 (home-page "https://github.com/bedops/bedops")
383 (synopsis "Tools for high-performance genomic feature operations")
384 (description
385 "BEDOPS is a suite of tools to address common questions raised in genomic
386 studies---mostly with regard to overlap and proximity relationships between
387 data sets. It aims to be scalable and flexible, facilitating the efficient
388 and accurate analysis and management of large-scale genomic data.
389
390 BEDOPS provides tools that perform highly efficient and scalable Boolean and
391 other set operations, statistical calculations, archiving, conversion and
392 other management of genomic data of arbitrary scale. Tasks can be easily
393 split by chromosome for distributing whole-genome analyses across a
394 computational cluster.")
395 (license license:gpl2+)))
396
397 (define-public bedtools
398 (package
399 (name "bedtools")
400 (version "2.27.1")
401 (source (origin
402 (method url-fetch)
403 (uri (string-append "https://github.com/arq5x/bedtools2/releases/"
404 "download/v" version "/"
405 "bedtools-" version ".tar.gz"))
406 (sha256
407 (base32
408 "1ndg5yknrxl4djx8ddzgk12rrbiidfpmkkg5z3f95jzryfxarhn8"))))
409 (build-system gnu-build-system)
410 (arguments
411 '(#:test-target "test"
412 #:make-flags
413 (list (string-append "prefix=" (assoc-ref %outputs "out")))
414 #:phases
415 (modify-phases %standard-phases
416 (delete 'configure))))
417 (native-inputs `(("python" ,python-2)))
418 (inputs
419 `(("samtools" ,samtools)
420 ("zlib" ,zlib)))
421 (home-page "https://github.com/arq5x/bedtools2")
422 (synopsis "Tools for genome analysis and arithmetic")
423 (description
424 "Collectively, the bedtools utilities are a swiss-army knife of tools for
425 a wide-range of genomics analysis tasks. The most widely-used tools enable
426 genome arithmetic: that is, set theory on the genome. For example, bedtools
427 allows one to intersect, merge, count, complement, and shuffle genomic
428 intervals from multiple files in widely-used genomic file formats such as BAM,
429 BED, GFF/GTF, VCF.")
430 (license license:gpl2)))
431
432 ;; Later releases of bedtools produce files with more columns than
433 ;; what Ribotaper expects.
434 (define-public bedtools-2.18
435 (package (inherit bedtools)
436 (name "bedtools")
437 (version "2.18.0")
438 (source (origin
439 (method url-fetch)
440 (uri (string-append "https://github.com/arq5x/bedtools2/"
441 "archive/v" version ".tar.gz"))
442 (file-name (string-append name "-" version ".tar.gz"))
443 (sha256
444 (base32
445 "05vrnr8yp7swfagshzpgqmzk1blnwnq8pq5pckzi1m26w98d63vf"))))
446 (arguments
447 '(#:test-target "test"
448 #:phases
449 (modify-phases %standard-phases
450 (delete 'configure)
451 (replace 'install
452 (lambda* (#:key outputs #:allow-other-keys)
453 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
454 (for-each (lambda (file)
455 (install-file file bin))
456 (find-files "bin" ".*")))
457 #t)))))))
458
459 (define-public ribotaper
460 (package
461 (name "ribotaper")
462 (version "1.3.1")
463 (source (origin
464 (method url-fetch)
465 (uri (string-append "https://ohlerlab.mdc-berlin.de/"
466 "files/RiboTaper/RiboTaper_Version_"
467 version ".tar.gz"))
468 (sha256
469 (base32
470 "0ykjbps1y3z3085q94npw8i9x5gldc6shy8vlc08v76zljsm07hv"))))
471 (build-system gnu-build-system)
472 (arguments
473 `(#:phases
474 (modify-phases %standard-phases
475 (add-after 'install 'wrap-executables
476 (lambda* (#:key inputs outputs #:allow-other-keys)
477 (let* ((out (assoc-ref outputs "out")))
478 (for-each
479 (lambda (script)
480 (wrap-program (string-append out "/bin/" script)
481 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
482 '("create_annotations_files.bash"
483 "create_metaplots.bash"
484 "Ribotaper_ORF_find.sh"
485 "Ribotaper.sh"))))))))
486 (inputs
487 `(("bedtools" ,bedtools-2.18)
488 ("samtools" ,samtools-0.1)
489 ("r-minimal" ,r-minimal)
490 ("r-foreach" ,r-foreach)
491 ("r-xnomial" ,r-xnomial)
492 ("r-domc" ,r-domc)
493 ("r-multitaper" ,r-multitaper)
494 ("r-seqinr" ,r-seqinr)))
495 (home-page "https://ohlerlab.mdc-berlin.de/software/RiboTaper_126/")
496 (synopsis "Define translated ORFs using ribosome profiling data")
497 (description
498 "Ribotaper is a method for defining translated @dfn{open reading
499 frames} (ORFs) using ribosome profiling (ribo-seq) data. This package
500 provides the Ribotaper pipeline.")
501 (license license:gpl3+)))
502
503 (define-public ribodiff
504 (package
505 (name "ribodiff")
506 (version "0.2.2")
507 (source
508 (origin
509 (method url-fetch)
510 (uri (string-append "https://github.com/ratschlab/RiboDiff/"
511 "archive/v" version ".tar.gz"))
512 (file-name (string-append name "-" version ".tar.gz"))
513 (sha256
514 (base32
515 "0wpbwmfv05wdjxv7ikm664f7s7p7cqr8jnw99zrda0q67rl50aaj"))))
516 (build-system python-build-system)
517 (arguments
518 `(#:python ,python-2
519 #:phases
520 (modify-phases %standard-phases
521 ;; Generate an installable executable script wrapper.
522 (add-after 'unpack 'patch-setup.py
523 (lambda _
524 (substitute* "setup.py"
525 (("^(.*)packages=.*" line prefix)
526 (string-append line "\n"
527 prefix "scripts=['scripts/TE.py'],\n")))
528 #t)))))
529 (inputs
530 `(("python-numpy" ,python2-numpy)
531 ("python-matplotlib" ,python2-matplotlib)
532 ("python-scipy" ,python2-scipy)
533 ("python-statsmodels" ,python2-statsmodels)))
534 (native-inputs
535 `(("python-mock" ,python2-mock)
536 ("python-nose" ,python2-nose)))
537 (home-page "http://public.bmi.inf.ethz.ch/user/zhongy/RiboDiff/")
538 (synopsis "Detect translation efficiency changes from ribosome footprints")
539 (description "RiboDiff is a statistical tool that detects the protein
540 translational efficiency change from Ribo-Seq (ribosome footprinting) and
541 RNA-Seq data. It uses a generalized linear model to detect genes showing
542 difference in translational profile taking mRNA abundance into account. It
543 facilitates us to decipher the translational regulation that behave
544 independently with transcriptional regulation.")
545 (license license:gpl3+)))
546
547 (define-public bioawk
548 (package
549 (name "bioawk")
550 (version "1.0")
551 (source (origin
552 (method url-fetch)
553 (uri (string-append "https://github.com/lh3/bioawk/archive/v"
554 version ".tar.gz"))
555 (file-name (string-append name "-" version ".tar.gz"))
556 (sha256
557 (base32 "1daizxsk17ahi9n58fj8vpgwyhzrzh54bzqhanjanp88kgrz7gjw"))))
558 (build-system gnu-build-system)
559 (inputs
560 `(("zlib" ,zlib)))
561 (native-inputs
562 `(("bison" ,bison)))
563 (arguments
564 `(#:tests? #f ; There are no tests to run.
565 ;; Bison must generate files, before other targets can build.
566 #:parallel-build? #f
567 #:phases
568 (modify-phases %standard-phases
569 (delete 'configure) ; There is no configure phase.
570 (replace 'install
571 (lambda* (#:key outputs #:allow-other-keys)
572 (let* ((out (assoc-ref outputs "out"))
573 (bin (string-append out "/bin"))
574 (man (string-append out "/share/man/man1")))
575 (mkdir-p man)
576 (copy-file "awk.1" (string-append man "/bioawk.1"))
577 (install-file "bioawk" bin)))))))
578 (home-page "https://github.com/lh3/bioawk")
579 (synopsis "AWK with bioinformatics extensions")
580 (description "Bioawk is an extension to Brian Kernighan's awk, adding the
581 support of several common biological data formats, including optionally gzip'ed
582 BED, GFF, SAM, VCF, FASTA/Q and TAB-delimited formats with column names. It
583 also adds a few built-in functions and a command line option to use TAB as the
584 input/output delimiter. When the new functionality is not used, bioawk is
585 intended to behave exactly the same as the original BWK awk.")
586 (license license:x11)))
587
588 (define-public python2-pybedtools
589 (package
590 (name "python2-pybedtools")
591 (version "0.6.9")
592 (source (origin
593 (method url-fetch)
594 (uri (string-append
595 "https://pypi.python.org/packages/source/p/pybedtools/pybedtools-"
596 version ".tar.gz"))
597 (sha256
598 (base32
599 "1ldzdxw1p4y3g2ignmggsdypvqkcwqwzhdha4rbgpih048z5p4an"))))
600 (build-system python-build-system)
601 (arguments `(#:python ,python-2)) ; no Python 3 support
602 (inputs
603 `(("python-matplotlib" ,python2-matplotlib)))
604 (propagated-inputs
605 `(("bedtools" ,bedtools)
606 ("samtools" ,samtools)))
607 (native-inputs
608 `(("python-cython" ,python2-cython)
609 ("python-pyyaml" ,python2-pyyaml)
610 ("python-nose" ,python2-nose)))
611 (home-page "https://pythonhosted.org/pybedtools/")
612 (synopsis "Python wrapper for BEDtools programs")
613 (description
614 "pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
615 which are widely used for genomic interval manipulation or \"genome algebra\".
616 pybedtools extends BEDTools by offering feature-level manipulations from with
617 Python.")
618 (license license:gpl2+)))
619
620 (define-public python-biom-format
621 (package
622 (name "python-biom-format")
623 (version "2.1.6")
624 (source
625 (origin
626 (method url-fetch)
627 ;; Use GitHub as source because PyPI distribution does not contain
628 ;; test data: https://github.com/biocore/biom-format/issues/693
629 (uri (string-append "https://github.com/biocore/biom-format/archive/"
630 version ".tar.gz"))
631 (file-name (string-append name "-" version ".tar.gz"))
632 (sha256
633 (base32
634 "08cr7wpahk6zb31h4bs7jmzpvxcqv9s13xz40h6y2h656jvdvnpj"))))
635 (build-system python-build-system)
636 (propagated-inputs
637 `(("python-numpy" ,python-numpy)
638 ("python-scipy" ,python-scipy)
639 ("python-future" ,python-future)
640 ("python-click" ,python-click)
641 ("python-h5py" ,python-h5py)
642 ("python-pandas" ,python-pandas)))
643 (native-inputs
644 `(("python-nose" ,python-nose)))
645 (home-page "http://www.biom-format.org")
646 (synopsis "Biological Observation Matrix (BIOM) format utilities")
647 (description
648 "The BIOM file format is designed to be a general-use format for
649 representing counts of observations e.g. operational taxonomic units, KEGG
650 orthology groups or lipid types, in one or more biological samples
651 e.g. microbiome samples, genomes, metagenomes.")
652 (license license:bsd-3)
653 (properties `((python2-variant . ,(delay python2-biom-format))))))
654
655 (define-public python2-biom-format
656 (let ((base (package-with-python2 (strip-python2-variant python-biom-format))))
657 (package
658 (inherit base)
659 (arguments
660 `(#:phases
661 (modify-phases %standard-phases
662 ;; Do not require the unmaintained pyqi library.
663 (add-after 'unpack 'remove-pyqi
664 (lambda _
665 (substitute* "setup.py"
666 (("install_requires.append\\(\"pyqi\"\\)") "pass"))
667 #t)))
668 ,@(package-arguments base))))))
669
670 (define-public bioperl-minimal
671 (let* ((inputs `(("perl-module-build" ,perl-module-build)
672 ("perl-data-stag" ,perl-data-stag)
673 ("perl-libwww" ,perl-libwww)
674 ("perl-uri" ,perl-uri)))
675 (transitive-inputs
676 (map (compose package-name cadr)
677 (delete-duplicates
678 (concatenate
679 (map (compose package-transitive-target-inputs cadr) inputs))))))
680 (package
681 (name "bioperl-minimal")
682 (version "1.7.0")
683 (source
684 (origin
685 (method url-fetch)
686 (uri (string-append "https://github.com/bioperl/bioperl-live/"
687 "archive/release-"
688 (string-map (lambda (c)
689 (if (char=? c #\.)
690 #\- c)) version)
691 ".tar.gz"))
692 (sha256
693 (base32
694 "12phgpxwgkqflkwfb9dcqg7a31dpjlfhar8wcgv0aj5ln4akfz06"))))
695 (build-system perl-build-system)
696 (arguments
697 `(#:phases
698 (modify-phases %standard-phases
699 (add-after
700 'install 'wrap-programs
701 (lambda* (#:key outputs #:allow-other-keys)
702 ;; Make sure all executables in "bin" find the required Perl
703 ;; modules at runtime. As the PERL5LIB variable contains also
704 ;; the paths of native inputs, we pick the transitive target
705 ;; inputs from %build-inputs.
706 (let* ((out (assoc-ref outputs "out"))
707 (bin (string-append out "/bin/"))
708 (path (string-join
709 (cons (string-append out "/lib/perl5/site_perl")
710 (map (lambda (name)
711 (assoc-ref %build-inputs name))
712 ',transitive-inputs))
713 ":")))
714 (for-each (lambda (file)
715 (wrap-program file
716 `("PERL5LIB" ":" prefix (,path))))
717 (find-files bin "\\.pl$"))
718 #t))))))
719 (inputs inputs)
720 (native-inputs
721 `(("perl-test-most" ,perl-test-most)))
722 (home-page "http://search.cpan.org/dist/BioPerl")
723 (synopsis "Bioinformatics toolkit")
724 (description
725 "BioPerl is the product of a community effort to produce Perl code which
726 is useful in biology. Examples include Sequence objects, Alignment objects
727 and database searching objects. These objects not only do what they are
728 advertised to do in the documentation, but they also interact - Alignment
729 objects are made from the Sequence objects, Sequence objects have access to
730 Annotation and SeqFeature objects and databases, Blast objects can be
731 converted to Alignment objects, and so on. This means that the objects
732 provide a coordinated and extensible framework to do computational biology.")
733 (license license:perl-license))))
734
735 (define-public python-biopython
736 (package
737 (name "python-biopython")
738 (version "1.70")
739 (source (origin
740 (method url-fetch)
741 ;; use PyPi rather than biopython.org to ease updating
742 (uri (pypi-uri "biopython" version))
743 (sha256
744 (base32
745 "0nz4n9d2y2dg849gn1z0vjlkwcpzzkzy3fij7x94a6ixy2c54z2a"))))
746 (build-system python-build-system)
747 (arguments
748 `(#:phases
749 (modify-phases %standard-phases
750 (add-before 'check 'set-home
751 ;; Some tests require a home directory to be set.
752 (lambda _ (setenv "HOME" "/tmp") #t)))))
753 (propagated-inputs
754 `(("python-numpy" ,python-numpy)))
755 (home-page "http://biopython.org/")
756 (synopsis "Tools for biological computation in Python")
757 (description
758 "Biopython is a set of tools for biological computation including parsers
759 for bioinformatics files into Python data structures; interfaces to common
760 bioinformatics programs; a standard sequence class and tools for performing
761 common operations on them; code to perform data classification; code for
762 dealing with alignments; code making it easy to split up parallelizable tasks
763 into separate processes; and more.")
764 (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
765
766 (define-public python2-biopython
767 (package-with-python2 python-biopython))
768
769 (define-public bpp-core
770 ;; The last release was in 2014 and the recommended way to install from source
771 ;; is to clone the git repository, so we do this.
772 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
773 (let ((commit "7d8bced0d1a87291ea8dd7046b7fb5ff9c35c582"))
774 (package
775 (name "bpp-core")
776 (version (string-append "2.2.0-1." (string-take commit 7)))
777 (source (origin
778 (method git-fetch)
779 (uri (git-reference
780 (url "http://biopp.univ-montp2.fr/git/bpp-core")
781 (commit commit)))
782 (file-name (string-append name "-" version "-checkout"))
783 (sha256
784 (base32
785 "10djsq5vlnkilv436gnmh4irpk49v29pa69r6xiryg32xmvn909j"))))
786 (build-system cmake-build-system)
787 (arguments
788 `(#:parallel-build? #f))
789 (inputs
790 `(("gcc" ,gcc-5))) ; Compilation of bpp-phyl fails with GCC 4.9 so we
791 ; compile all of the bpp packages with GCC 5.
792 (home-page "http://biopp.univ-montp2.fr")
793 (synopsis "C++ libraries for Bioinformatics")
794 (description
795 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
796 analysis, phylogenetics, molecular evolution and population genetics. It is
797 Object Oriented and is designed to be both easy to use and computer efficient.
798 Bio++ intends to help programmers to write computer expensive programs, by
799 providing them a set of re-usable tools.")
800 (license license:cecill-c))))
801
802 (define-public bpp-phyl
803 ;; The last release was in 2014 and the recommended way to install from source
804 ;; is to clone the git repository, so we do this.
805 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
806 (let ((commit "0c07167b629f68b569bf274d1ad0c4af83276ae2"))
807 (package
808 (name "bpp-phyl")
809 (version (string-append "2.2.0-1." (string-take commit 7)))
810 (source (origin
811 (method git-fetch)
812 (uri (git-reference
813 (url "http://biopp.univ-montp2.fr/git/bpp-phyl")
814 (commit commit)))
815 (file-name (string-append name "-" version "-checkout"))
816 (sha256
817 (base32
818 "1ssjgchzwj3iai26kyly7gwkdv8sk59nqhkb1wpap3sf5m6kyllh"))))
819 (build-system cmake-build-system)
820 (arguments
821 `(#:parallel-build? #f
822 ;; If out-of-source, test data is not copied into the build directory
823 ;; so the tests fail.
824 #:out-of-source? #f))
825 (inputs
826 `(("bpp-core" ,bpp-core)
827 ("bpp-seq" ,bpp-seq)
828 ;; GCC 4.8 fails due to an 'internal compiler error', so we use a more
829 ;; modern GCC.
830 ("gcc" ,gcc-5)))
831 (home-page "http://biopp.univ-montp2.fr")
832 (synopsis "Bio++ phylogenetic Library")
833 (description
834 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
835 analysis, phylogenetics, molecular evolution and population genetics. This
836 library provides phylogenetics-related modules.")
837 (license license:cecill-c))))
838
839 (define-public bpp-popgen
840 ;; The last release was in 2014 and the recommended way to install from source
841 ;; is to clone the git repository, so we do this.
842 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
843 (let ((commit "e472bac9b1a148803895d747cd6d0c5904f85d9f"))
844 (package
845 (name "bpp-popgen")
846 (version (string-append "2.2.0-1." (string-take commit 7)))
847 (source (origin
848 (method git-fetch)
849 (uri (git-reference
850 (url "http://biopp.univ-montp2.fr/git/bpp-popgen")
851 (commit commit)))
852 (file-name (string-append name "-" version "-checkout"))
853 (sha256
854 (base32
855 "0yn82dzn1n5629nzja68xfrhi655709rjanyryb36vzkmymy6dw5"))))
856 (build-system cmake-build-system)
857 (arguments
858 `(#:parallel-build? #f
859 #:tests? #f)) ; There are no tests.
860 (inputs
861 `(("bpp-core" ,bpp-core)
862 ("bpp-seq" ,bpp-seq)
863 ("gcc" ,gcc-5)))
864 (home-page "http://biopp.univ-montp2.fr")
865 (synopsis "Bio++ population genetics library")
866 (description
867 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
868 analysis, phylogenetics, molecular evolution and population genetics. This
869 library provides population genetics-related modules.")
870 (license license:cecill-c))))
871
872 (define-public bpp-seq
873 ;; The last release was in 2014 and the recommended way to install from source
874 ;; is to clone the git repository, so we do this.
875 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
876 (let ((commit "6cfa07965ce152e5598a89df2fa80a75973bfa33"))
877 (package
878 (name "bpp-seq")
879 (version (string-append "2.2.0-1." (string-take commit 7)))
880 (source (origin
881 (method git-fetch)
882 (uri (git-reference
883 (url "http://biopp.univ-montp2.fr/git/bpp-seq")
884 (commit commit)))
885 (file-name (string-append name "-" version "-checkout"))
886 (sha256
887 (base32
888 "1nys5jq7jqvdg40d91wsmj3q2yzy4276cp7sp44n67p468f27zf2"))))
889 (build-system cmake-build-system)
890 (arguments
891 `(#:parallel-build? #f
892 ;; If out-of-source, test data is not copied into the build directory
893 ;; so the tests fail.
894 #:out-of-source? #f))
895 (inputs
896 `(("bpp-core" ,bpp-core)
897 ("gcc" ,gcc-5))) ; Use GCC 5 as per 'bpp-core'.
898 (home-page "http://biopp.univ-montp2.fr")
899 (synopsis "Bio++ sequence library")
900 (description
901 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
902 analysis, phylogenetics, molecular evolution and population genetics. This
903 library provides sequence-related modules.")
904 (license license:cecill-c))))
905
906 (define-public bppsuite
907 ;; The last release was in 2014 and the recommended way to install from source
908 ;; is to clone the git repository, so we do this.
909 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
910 (let ((commit "c516147f57aa50961121cd505bed52cd7603698b"))
911 (package
912 (name "bppsuite")
913 (version (string-append "2.2.0-1." (string-take commit 7)))
914 (source (origin
915 (method git-fetch)
916 (uri (git-reference
917 (url "http://biopp.univ-montp2.fr/git/bppsuite")
918 (commit commit)))
919 (file-name (string-append name "-" version "-checkout"))
920 (sha256
921 (base32
922 "1y87pxvw0jxjizhq2dr9g2r91md45k1p9ih2sl1yy1y3p934l2kb"))))
923 (build-system cmake-build-system)
924 (arguments
925 `(#:parallel-build? #f
926 #:tests? #f)) ; There are no tests.
927 (native-inputs
928 `(("groff" ,groff)
929 ("man-db" ,man-db)
930 ("texinfo" ,texinfo)))
931 (inputs
932 `(("bpp-core" ,bpp-core)
933 ("bpp-seq" ,bpp-seq)
934 ("bpp-phyl" ,bpp-phyl)
935 ("bpp-phyl" ,bpp-popgen)
936 ("gcc" ,gcc-5)))
937 (home-page "http://biopp.univ-montp2.fr")
938 (synopsis "Bioinformatics tools written with the Bio++ libraries")
939 (description
940 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
941 analysis, phylogenetics, molecular evolution and population genetics. This
942 package provides command line tools using the Bio++ library.")
943 (license license:cecill-c))))
944
945 (define-public blast+
946 (package
947 (name "blast+")
948 (version "2.6.0")
949 (source (origin
950 (method url-fetch)
951 (uri (string-append
952 "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
953 version "/ncbi-blast-" version "+-src.tar.gz"))
954 (sha256
955 (base32
956 "15n937pw5aqmyfjb6l387d18grqbb96l63d5xj4l7yyh0zbf2405"))
957 (patches (search-patches "blast+-fix-makefile.patch"))
958 (modules '((guix build utils)))
959 (snippet
960 '(begin
961 ;; Remove bundled bzip2, zlib and pcre.
962 (delete-file-recursively "c++/src/util/compress/bzip2")
963 (delete-file-recursively "c++/src/util/compress/zlib")
964 (delete-file-recursively "c++/src/util/regexp")
965 (substitute* "c++/src/util/compress/Makefile.in"
966 (("bzip2 zlib api") "api"))
967 ;; Remove useless msbuild directory
968 (delete-file-recursively
969 "c++/src/build-system/project_tree_builder/msbuild")
970 #t))))
971 (build-system gnu-build-system)
972 (arguments
973 `(;; There are two(!) tests for this massive library, and both fail with
974 ;; "unparsable timing stats".
975 ;; ERR [127] -- [serial/datatool] datatool.sh (unparsable timing stats)
976 ;; ERR [127] -- [serial/datatool] datatool_xml.sh (unparsable timing stats)
977 #:tests? #f
978 #:out-of-source? #t
979 #:parallel-build? #f ; not supported
980 #:phases
981 (modify-phases %standard-phases
982 (add-before
983 'configure 'set-HOME
984 ;; $HOME needs to be set at some point during the configure phase
985 (lambda _ (setenv "HOME" "/tmp") #t))
986 (add-after
987 'unpack 'enter-dir
988 (lambda _ (chdir "c++") #t))
989 (add-after
990 'enter-dir 'fix-build-system
991 (lambda _
992 (define (which* cmd)
993 (cond ((string=? cmd "date")
994 ;; make call to "date" deterministic
995 "date -d @0")
996 ((which cmd)
997 => identity)
998 (else
999 (format (current-error-port)
1000 "WARNING: Unable to find absolute path for ~s~%"
1001 cmd)
1002 #f)))
1003
1004 ;; Rewrite hardcoded paths to various tools
1005 (substitute* (append '("src/build-system/configure.ac"
1006 "src/build-system/configure"
1007 "src/build-system/helpers/run_with_lock.c"
1008 "scripts/common/impl/if_diff.sh"
1009 "scripts/common/impl/run_with_lock.sh"
1010 "src/build-system/Makefile.configurables.real"
1011 "src/build-system/Makefile.in.top"
1012 "src/build-system/Makefile.meta.gmake=no"
1013 "src/build-system/Makefile.meta.in"
1014 "src/build-system/Makefile.meta_l"
1015 "src/build-system/Makefile.meta_p"
1016 "src/build-system/Makefile.meta_r"
1017 "src/build-system/Makefile.mk.in"
1018 "src/build-system/Makefile.requirements"
1019 "src/build-system/Makefile.rules_with_autodep.in")
1020 (find-files "scripts/common/check" "\\.sh$"))
1021 (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
1022 (or (which* cmd) all)))
1023
1024 (substitute* (find-files "src/build-system" "^config.*")
1025 (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
1026 (("^PATH=.*") ""))
1027
1028 ;; rewrite "/var/tmp" in check script
1029 (substitute* "scripts/common/check/check_make_unix.sh"
1030 (("/var/tmp") "/tmp"))
1031
1032 ;; do not reset PATH
1033 (substitute* (find-files "scripts/common/impl/" "\\.sh$")
1034 (("^ *PATH=.*") "")
1035 (("action=/bin/") "action=")
1036 (("export PATH") ":"))
1037 #t))
1038 (replace
1039 'configure
1040 (lambda* (#:key inputs outputs #:allow-other-keys)
1041 (let ((out (assoc-ref outputs "out"))
1042 (lib (string-append (assoc-ref outputs "lib") "/lib"))
1043 (include (string-append (assoc-ref outputs "include")
1044 "/include/ncbi-tools++")))
1045 ;; The 'configure' script doesn't recognize things like
1046 ;; '--enable-fast-install'.
1047 (zero? (system* "./configure.orig"
1048 (string-append "--with-build-root=" (getcwd) "/build")
1049 (string-append "--prefix=" out)
1050 (string-append "--libdir=" lib)
1051 (string-append "--includedir=" include)
1052 (string-append "--with-bz2="
1053 (assoc-ref inputs "bzip2"))
1054 (string-append "--with-z="
1055 (assoc-ref inputs "zlib"))
1056 (string-append "--with-pcre="
1057 (assoc-ref inputs "pcre"))
1058 ;; Each library is built twice by default, once
1059 ;; with "-static" in its name, and again
1060 ;; without.
1061 "--without-static"
1062 "--with-dll"))))))))
1063 (outputs '("out" ; 21 MB
1064 "lib" ; 226 MB
1065 "include")) ; 33 MB
1066 (inputs
1067 `(("bzip2" ,bzip2)
1068 ("zlib" ,zlib)
1069 ("pcre" ,pcre)
1070 ("perl" ,perl)
1071 ("python" ,python-wrapper)))
1072 (native-inputs
1073 `(("cpio" ,cpio)))
1074 (home-page "http://blast.ncbi.nlm.nih.gov")
1075 (synopsis "Basic local alignment search tool")
1076 (description
1077 "BLAST is a popular method of performing a DNA or protein sequence
1078 similarity search, using heuristics to produce results quickly. It also
1079 calculates an “expect value” that estimates how many matches would have
1080 occurred at a given score by chance, which can aid a user in judging how much
1081 confidence to have in an alignment.")
1082 ;; Most of the sources are in the public domain, with the following
1083 ;; exceptions:
1084 ;; * Expat:
1085 ;; * ./c++/include/util/bitset/
1086 ;; * ./c++/src/html/ncbi_menu*.js
1087 ;; * Boost license:
1088 ;; * ./c++/include/util/impl/floating_point_comparison.hpp
1089 ;; * LGPL 2+:
1090 ;; * ./c++/include/dbapi/driver/odbc/unix_odbc/
1091 ;; * ASL 2.0:
1092 ;; * ./c++/src/corelib/teamcity_*
1093 (license (list license:public-domain
1094 license:expat
1095 license:boost1.0
1096 license:lgpl2.0+
1097 license:asl2.0))))
1098
1099 (define-public bless
1100 (package
1101 (name "bless")
1102 (version "1p02")
1103 (source (origin
1104 (method url-fetch)
1105 (uri (string-append "mirror://sourceforge/bless-ec/bless.v"
1106 version ".tgz"))
1107 (sha256
1108 (base32
1109 "0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
1110 (modules '((guix build utils)))
1111 (snippet
1112 `(begin
1113 ;; Remove bundled boost, pigz, zlib, and .git directory
1114 ;; FIXME: also remove bundled sources for murmurhash3 and
1115 ;; kmc once packaged.
1116 (delete-file-recursively "boost")
1117 (delete-file-recursively "pigz")
1118 (delete-file-recursively "google-sparsehash")
1119 (delete-file-recursively "zlib")
1120 (delete-file-recursively ".git")
1121 #t))))
1122 (build-system gnu-build-system)
1123 (arguments
1124 '(#:tests? #f ;no "check" target
1125 #:make-flags
1126 (list (string-append "ZLIB="
1127 (assoc-ref %build-inputs "zlib")
1128 "/lib/libz.a")
1129 (string-append "LDFLAGS="
1130 (string-join '("-lboost_filesystem"
1131 "-lboost_system"
1132 "-lboost_iostreams"
1133 "-lz"
1134 "-fopenmp"
1135 "-std=c++11"))))
1136 #:phases
1137 (modify-phases %standard-phases
1138 (add-after 'unpack 'do-not-build-bundled-pigz
1139 (lambda* (#:key inputs outputs #:allow-other-keys)
1140 (substitute* "Makefile"
1141 (("cd pigz/pigz-2.3.3; make") ""))
1142 #t))
1143 (add-after 'unpack 'patch-paths-to-executables
1144 (lambda* (#:key inputs outputs #:allow-other-keys)
1145 (substitute* "parse_args.cpp"
1146 (("kmc_binary = .*")
1147 (string-append "kmc_binary = \""
1148 (assoc-ref outputs "out")
1149 "/bin/kmc\";"))
1150 (("pigz_binary = .*")
1151 (string-append "pigz_binary = \""
1152 (assoc-ref inputs "pigz")
1153 "/bin/pigz\";")))
1154 #t))
1155 (replace 'install
1156 (lambda* (#:key outputs #:allow-other-keys)
1157 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
1158 (for-each (lambda (file)
1159 (install-file file bin))
1160 '("bless" "kmc/bin/kmc"))
1161 #t)))
1162 (delete 'configure))))
1163 (native-inputs
1164 `(("perl" ,perl)))
1165 (inputs
1166 `(("openmpi" ,openmpi)
1167 ("boost" ,boost)
1168 ("sparsehash" ,sparsehash)
1169 ("pigz" ,pigz)
1170 ("zlib" ,zlib)))
1171 (supported-systems '("x86_64-linux"))
1172 (home-page "https://sourceforge.net/p/bless-ec/wiki/Home/")
1173 (synopsis "Bloom-filter-based error correction tool for NGS reads")
1174 (description
1175 "@dfn{Bloom-filter-based error correction solution for high-throughput
1176 sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
1177 correction tool for genomic reads produced by @dfn{Next-generation
1178 sequencing} (NGS). BLESS produces accurate correction results with much less
1179 memory compared with previous solutions and is also able to tolerate a higher
1180 false-positive rate. BLESS can extend reads like DNA assemblers to correct
1181 errors at the end of reads.")
1182 (license license:gpl3+)))
1183
1184 (define-public bowtie
1185 (package
1186 (name "bowtie")
1187 (version "2.3.2")
1188 (source (origin
1189 (method url-fetch)
1190 (uri (string-append "https://github.com/BenLangmead/bowtie2/archive/v"
1191 version ".tar.gz"))
1192 (file-name (string-append name "-" version ".tar.gz"))
1193 (sha256
1194 (base32
1195 "0hwa5r9qbglppb7sz5z79rlmmddr3n51n468jb3wh8rwjgn3yr90"))
1196 (modules '((guix build utils)))
1197 (snippet
1198 '(begin
1199 (substitute* "Makefile"
1200 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
1201 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
1202 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
1203 #t))))
1204 (build-system gnu-build-system)
1205 (inputs
1206 `(("perl" ,perl)
1207 ("perl-clone" ,perl-clone)
1208 ("perl-test-deep" ,perl-test-deep)
1209 ("perl-test-simple" ,perl-test-simple)
1210 ("python" ,python-2)
1211 ("tbb" ,tbb)
1212 ("zlib" ,zlib)))
1213 (arguments
1214 '(#:make-flags
1215 (list "allall"
1216 "WITH_TBB=1"
1217 (string-append "prefix=" (assoc-ref %outputs "out")))
1218 #:phases
1219 (modify-phases %standard-phases
1220 (delete 'configure)
1221 (replace 'check
1222 (lambda* (#:key outputs #:allow-other-keys)
1223 (zero? (system* "perl"
1224 "scripts/test/simple_tests.pl"
1225 "--bowtie2=./bowtie2"
1226 "--bowtie2-build=./bowtie2-build")))))))
1227 (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
1228 (synopsis "Fast and sensitive nucleotide sequence read aligner")
1229 (description
1230 "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
1231 reads to long reference sequences. It is particularly good at aligning reads
1232 of about 50 up to 100s or 1,000s of characters, and particularly good at
1233 aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
1234 genome with an FM Index to keep its memory footprint small: for the human
1235 genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
1236 gapped, local, and paired-end alignment modes.")
1237 (supported-systems '("x86_64-linux"))
1238 (license license:gpl3+)))
1239
1240 (define-public tophat
1241 (package
1242 (name "tophat")
1243 (version "2.1.0")
1244 (source (origin
1245 (method url-fetch)
1246 (uri (string-append
1247 "http://ccb.jhu.edu/software/tophat/downloads/tophat-"
1248 version ".tar.gz"))
1249 (sha256
1250 (base32
1251 "168zlzykq622zbgkh90a90f1bdgsxkscq2zxzbj8brq80hbjpyp7"))
1252 (patches (search-patches "tophat-build-with-later-seqan.patch"))
1253 (modules '((guix build utils)))
1254 (snippet
1255 '(begin
1256 ;; Remove bundled SeqAn and samtools
1257 (delete-file-recursively "src/SeqAn-1.3")
1258 (delete-file-recursively "src/samtools-0.1.18")
1259 #t))))
1260 (build-system gnu-build-system)
1261 (arguments
1262 '(#:parallel-build? #f ; not supported
1263 #:phases
1264 (modify-phases %standard-phases
1265 (add-after 'unpack 'use-system-samtools
1266 (lambda* (#:key inputs #:allow-other-keys)
1267 (substitute* "src/Makefile.in"
1268 (("(noinst_LIBRARIES = )\\$\\(SAMLIB\\)" _ prefix) prefix)
1269 (("\\$\\(SAMPROG\\): \\$\\(SAMLIB\\)") "")
1270 (("SAMPROG = samtools_0\\.1\\.18") "")
1271 (("\\$\\(samtools_0_1_18_SOURCES\\)") "")
1272 (("am__EXEEXT_1 = samtools_0\\.1\\.18\\$\\(EXEEXT\\)") ""))
1273 (substitute* '("src/common.cpp"
1274 "src/tophat.py")
1275 (("samtools_0.1.18") (which "samtools")))
1276 (substitute* '("src/common.h"
1277 "src/bam2fastx.cpp")
1278 (("#include \"bam.h\"") "#include <samtools/bam.h>")
1279 (("#include \"sam.h\"") "#include <samtools/sam.h>"))
1280 (substitute* '("src/bwt_map.h"
1281 "src/map2gtf.h"
1282 "src/align_status.h")
1283 (("#include <bam.h>") "#include <samtools/bam.h>")
1284 (("#include <sam.h>") "#include <samtools/sam.h>"))
1285 #t)))))
1286 (inputs
1287 `(("boost" ,boost)
1288 ("bowtie" ,bowtie)
1289 ("samtools" ,samtools-0.1)
1290 ("ncurses" ,ncurses)
1291 ("python" ,python-2)
1292 ("perl" ,perl)
1293 ("zlib" ,zlib)
1294 ("seqan" ,seqan)))
1295 (home-page "http://ccb.jhu.edu/software/tophat/index.shtml")
1296 (synopsis "Spliced read mapper for RNA-Seq data")
1297 (description
1298 "TopHat is a fast splice junction mapper for nucleotide sequence
1299 reads produced by the RNA-Seq method. It aligns RNA-Seq reads to
1300 mammalian-sized genomes using the ultra high-throughput short read
1301 aligner Bowtie, and then analyzes the mapping results to identify
1302 splice junctions between exons.")
1303 ;; TopHat is released under the Boost Software License, Version 1.0
1304 ;; See https://github.com/infphilo/tophat/issues/11#issuecomment-121589893
1305 (license license:boost1.0)))
1306
1307 (define-public bwa
1308 (package
1309 (name "bwa")
1310 (version "0.7.17")
1311 (source (origin
1312 (method url-fetch)
1313 (uri (string-append
1314 "https://github.com/lh3/bwa/releases/download/v"
1315 version "/bwa-" version ".tar.bz2"))
1316 (sha256
1317 (base32
1318 "1zfhv2zg9v1icdlq4p9ssc8k01mca5d1bd87w71py2swfi74s6yy"))))
1319 (build-system gnu-build-system)
1320 (arguments
1321 '(#:tests? #f ;no "check" target
1322 #:phases
1323 (modify-phases %standard-phases
1324 (replace 'install
1325 (lambda* (#:key outputs #:allow-other-keys)
1326 (let ((bin (string-append
1327 (assoc-ref outputs "out") "/bin"))
1328 (doc (string-append
1329 (assoc-ref outputs "out") "/share/doc/bwa"))
1330 (man (string-append
1331 (assoc-ref outputs "out") "/share/man/man1")))
1332 (install-file "bwa" bin)
1333 (install-file "README.md" doc)
1334 (install-file "bwa.1" man))
1335 #t))
1336 ;; no "configure" script
1337 (delete 'configure))))
1338 (inputs `(("zlib" ,zlib)))
1339 ;; Non-portable SSE instructions are used so building fails on platforms
1340 ;; other than x86_64.
1341 (supported-systems '("x86_64-linux"))
1342 (home-page "http://bio-bwa.sourceforge.net/")
1343 (synopsis "Burrows-Wheeler sequence aligner")
1344 (description
1345 "BWA is a software package for mapping low-divergent sequences against a
1346 large reference genome, such as the human genome. It consists of three
1347 algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
1348 designed for Illumina sequence reads up to 100bp, while the rest two for
1349 longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
1350 features such as long-read support and split alignment, but BWA-MEM, which is
1351 the latest, is generally recommended for high-quality queries as it is faster
1352 and more accurate. BWA-MEM also has better performance than BWA-backtrack for
1353 70-100bp Illumina reads.")
1354 (license license:gpl3+)))
1355
1356 (define-public bwa-pssm
1357 (package (inherit bwa)
1358 (name "bwa-pssm")
1359 (version "0.5.11")
1360 (source (origin
1361 (method url-fetch)
1362 (uri (string-append "https://github.com/pkerpedjiev/bwa-pssm/"
1363 "archive/" version ".tar.gz"))
1364 (file-name (string-append name "-" version ".tar.gz"))
1365 (sha256
1366 (base32
1367 "02p7mpbs4mlxmn84g2x4ghak638vbj4lqix2ipx5g84pz9bhdavg"))))
1368 (build-system gnu-build-system)
1369 (inputs
1370 `(("gdsl" ,gdsl)
1371 ("zlib" ,zlib)
1372 ("perl" ,perl)))
1373 (home-page "http://bwa-pssm.binf.ku.dk/")
1374 (synopsis "Burrows-Wheeler transform-based probabilistic short read mapper")
1375 (description
1376 "BWA-PSSM is a probabilistic short genomic sequence read aligner based on
1377 the use of @dfn{position specific scoring matrices} (PSSM). Like many of the
1378 existing aligners it is fast and sensitive. Unlike most other aligners,
1379 however, it is also adaptible in the sense that one can direct the alignment
1380 based on known biases within the data set. It is coded as a modification of
1381 the original BWA alignment program and shares the genome index structure as
1382 well as many of the command line options.")
1383 (license license:gpl3+)))
1384
1385 (define-public python2-bx-python
1386 (package
1387 (name "python2-bx-python")
1388 (version "0.7.3")
1389 (source (origin
1390 (method url-fetch)
1391 (uri (pypi-uri "bx-python" version))
1392 (sha256
1393 (base32
1394 "15z2w3bvnc0n4qmb9bd6d8ylc2h2nj883x2w9iixf4x3vki9b22i"))
1395 (modules '((guix build utils)))
1396 (snippet
1397 '(begin
1398 (substitute* "setup.py"
1399 ;; remove dependency on outdated "distribute" module
1400 (("^from distribute_setup import use_setuptools") "")
1401 (("^use_setuptools\\(\\)") ""))
1402 #t))))
1403 (build-system python-build-system)
1404 (arguments
1405 `(#:tests? #f ;tests fail because test data are not included
1406 #:python ,python-2))
1407 (inputs
1408 `(("python-numpy" ,python2-numpy)
1409 ("zlib" ,zlib)))
1410 (native-inputs
1411 `(("python-nose" ,python2-nose)))
1412 (home-page "http://bitbucket.org/james_taylor/bx-python/")
1413 (synopsis "Tools for manipulating biological data")
1414 (description
1415 "bx-python provides tools for manipulating biological data, particularly
1416 multiple sequence alignments.")
1417 (license license:expat)))
1418
1419 (define-public python-pysam
1420 (package
1421 (name "python-pysam")
1422 (version "0.13.0")
1423 (source (origin
1424 (method url-fetch)
1425 ;; Test data is missing on PyPi.
1426 (uri (string-append
1427 "https://github.com/pysam-developers/pysam/archive/v"
1428 version ".tar.gz"))
1429 (file-name (string-append name "-" version ".tar.gz"))
1430 (sha256
1431 (base32
1432 "0dzap2axin9cbbl0d825w294bpn00zagfm1sigamm4v2pm5bj9lp"))
1433 (modules '((guix build utils)))
1434 (snippet '(begin
1435 ;; Drop bundled htslib. TODO: Also remove samtools
1436 ;; and bcftools.
1437 (delete-file-recursively "htslib")
1438 #t))))
1439 (build-system python-build-system)
1440 (arguments
1441 `(#:modules ((ice-9 ftw)
1442 (srfi srfi-26)
1443 (guix build python-build-system)
1444 (guix build utils))
1445 #:phases
1446 (modify-phases %standard-phases
1447 (add-before 'build 'set-flags
1448 (lambda* (#:key inputs #:allow-other-keys)
1449 (setenv "HTSLIB_MODE" "external")
1450 (setenv "HTSLIB_LIBRARY_DIR"
1451 (string-append (assoc-ref inputs "htslib") "/lib"))
1452 (setenv "HTSLIB_INCLUDE_DIR"
1453 (string-append (assoc-ref inputs "htslib") "/include"))
1454 (setenv "LDFLAGS" "-lncurses")
1455 (setenv "CFLAGS" "-D_CURSES_LIB=1")
1456 #t))
1457 (replace 'check
1458 (lambda* (#:key inputs outputs #:allow-other-keys)
1459 ;; Add first subdirectory of "build" directory to PYTHONPATH.
1460 (setenv "PYTHONPATH"
1461 (string-append
1462 (getenv "PYTHONPATH")
1463 ":" (getcwd) "/build/"
1464 (car (scandir "build"
1465 (negate (cut string-prefix? "." <>))))))
1466 ;; Step out of source dir so python does not import from CWD.
1467 (with-directory-excursion "tests"
1468 (setenv "HOME" "/tmp")
1469 (and (zero? (system* "make" "-C" "pysam_data"))
1470 (zero? (system* "make" "-C" "cbcf_data"))
1471 ;; Running nosetests without explicitly asking for a
1472 ;; single process leads to a crash. Running with multiple
1473 ;; processes fails because the tests are not designed to
1474 ;; run in parallel.
1475
1476 ;; FIXME: tests keep timing out on some systems.
1477 ;; (zero? (system* "nosetests" "-v"
1478 ;; "--processes" "1"))
1479 )))))))
1480 (propagated-inputs
1481 `(("htslib" ,htslib))) ; Included from installed header files.
1482 (inputs
1483 `(("ncurses" ,ncurses)
1484 ("zlib" ,zlib)))
1485 (native-inputs
1486 `(("python-cython" ,python-cython)
1487 ;; Dependencies below are are for tests only.
1488 ("samtools" ,samtools)
1489 ("bcftools" ,bcftools)
1490 ("python-nose" ,python-nose)))
1491 (home-page "https://github.com/pysam-developers/pysam")
1492 (synopsis "Python bindings to the SAMtools C API")
1493 (description
1494 "Pysam is a Python module for reading and manipulating files in the
1495 SAM/BAM format. Pysam is a lightweight wrapper of the SAMtools C API. It
1496 also includes an interface for tabix.")
1497 (license license:expat)))
1498
1499 (define-public python2-pysam
1500 (package-with-python2 python-pysam))
1501
1502 (define-public python-twobitreader
1503 (package
1504 (name "python-twobitreader")
1505 (version "3.1.4")
1506 (source (origin
1507 (method url-fetch)
1508 (uri (pypi-uri "twobitreader" version))
1509 (sha256
1510 (base32
1511 "1q8wnj2kga9nz1lwc4w7qv52smfm536hp6mc8w6s53lhyj0mpi22"))))
1512 (build-system python-build-system)
1513 (arguments
1514 '(;; Tests are not distributed in the PyPi release.
1515 ;; TODO Try building from the Git repo or asking the upstream maintainer
1516 ;; to distribute the tests on PyPi.
1517 #:tests? #f))
1518 (native-inputs
1519 `(("python-sphinx" ,python-sphinx)))
1520 (home-page "https://github.com/benjschiller/twobitreader")
1521 (synopsis "Python library for reading .2bit files")
1522 (description
1523 "twobitreader is a Python library for reading .2bit files as used by the
1524 UCSC genome browser.")
1525 (license license:artistic2.0)))
1526
1527 (define-public python2-twobitreader
1528 (package-with-python2 python-twobitreader))
1529
1530 (define-public python-plastid
1531 (package
1532 (name "python-plastid")
1533 (version "0.4.8")
1534 (source (origin
1535 (method url-fetch)
1536 (uri (pypi-uri "plastid" version))
1537 (sha256
1538 (base32
1539 "0l24dd3q66if8yj042m4s0g95n6acn7im1imqd3p6h8ns43kxhj8"))))
1540 (build-system python-build-system)
1541 (arguments
1542 ;; Some test files are not included.
1543 `(#:tests? #f))
1544 (propagated-inputs
1545 `(("python-numpy" ,python-numpy)
1546 ("python-scipy" ,python-scipy)
1547 ("python-pandas" ,python-pandas)
1548 ("python-pysam" ,python-pysam)
1549 ("python-matplotlib" ,python-matplotlib)
1550 ("python-biopython" ,python-biopython)
1551 ("python-twobitreader" ,python-twobitreader)
1552 ("python-termcolor" ,python-termcolor)))
1553 (native-inputs
1554 `(("python-cython" ,python-cython)
1555 ("python-nose" ,python-nose)))
1556 (home-page "https://github.com/joshuagryphon/plastid")
1557 (synopsis "Python library for genomic analysis")
1558 (description
1559 "plastid is a Python library for genomic analysis – in particular,
1560 high-throughput sequencing data – with an emphasis on simplicity.")
1561 (license license:bsd-3)))
1562
1563 (define-public python2-plastid
1564 (package-with-python2 python-plastid))
1565
1566 (define-public cd-hit
1567 (package
1568 (name "cd-hit")
1569 (version "4.6.8")
1570 (source (origin
1571 (method url-fetch)
1572 (uri (string-append "https://github.com/weizhongli/cdhit"
1573 "/releases/download/V" version
1574 "/cd-hit-v" version
1575 "-2017-0621-source.tar.gz"))
1576 (sha256
1577 (base32
1578 "1b4mwm2520ixjbw57sil20f9iixzw4bkdqqwgg1fc3pzm6rz4zmn"))))
1579 (build-system gnu-build-system)
1580 (arguments
1581 `(#:tests? #f ; there are no tests
1582 #:make-flags
1583 ;; Executables are copied directly to the PREFIX.
1584 (list (string-append "PREFIX=" (assoc-ref %outputs "out") "/bin"))
1585 #:phases
1586 (modify-phases %standard-phases
1587 ;; No "configure" script
1588 (delete 'configure)
1589 ;; Remove sources of non-determinism
1590 (add-after 'unpack 'be-timeless
1591 (lambda _
1592 (substitute* "cdhit-utility.c++"
1593 ((" \\(built on \" __DATE__ \"\\)") ""))
1594 (substitute* "cdhit-common.c++"
1595 (("__DATE__") "\"0\"")
1596 (("\", %s, \" __TIME__ \"\\\\n\", date") ""))
1597 #t))
1598 ;; The "install" target does not create the target directory.
1599 (add-before 'install 'create-target-dir
1600 (lambda* (#:key outputs #:allow-other-keys)
1601 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
1602 #t)))))
1603 (inputs
1604 `(("perl" ,perl)))
1605 (home-page "http://weizhongli-lab.org/cd-hit/")
1606 (synopsis "Cluster and compare protein or nucleotide sequences")
1607 (description
1608 "CD-HIT is a program for clustering and comparing protein or nucleotide
1609 sequences. CD-HIT is designed to be fast and handle extremely large
1610 databases.")
1611 ;; The manual says: "It can be copied under the GNU General Public License
1612 ;; version 2 (GPLv2)."
1613 (license license:gpl2)))
1614
1615 (define-public clipper
1616 (package
1617 (name "clipper")
1618 (version "1.1")
1619 (source (origin
1620 (method url-fetch)
1621 (uri (string-append
1622 "https://github.com/YeoLab/clipper/archive/"
1623 version ".tar.gz"))
1624 (file-name (string-append name "-" version ".tar.gz"))
1625 (sha256
1626 (base32
1627 "0pflmsvhbf8izbgwhbhj1i7349sw1f55qpqj8ljmapp16hb0p0qi"))
1628 (modules '((guix build utils)))
1629 (snippet
1630 '(begin
1631 ;; remove unnecessary setup dependency
1632 (substitute* "setup.py"
1633 (("setup_requires = .*") ""))
1634 (for-each delete-file
1635 '("clipper/src/peaks.so"
1636 "clipper/src/readsToWiggle.so"))
1637 (delete-file-recursively "dist/")
1638 #t))))
1639 (build-system python-build-system)
1640 (arguments `(#:python ,python-2)) ; only Python 2 is supported
1641 (inputs
1642 `(("htseq" ,python2-htseq)
1643 ("python-pybedtools" ,python2-pybedtools)
1644 ("python-cython" ,python2-cython)
1645 ("python-scikit-learn" ,python2-scikit-learn)
1646 ("python-matplotlib" ,python2-matplotlib)
1647 ("python-pandas" ,python2-pandas)
1648 ("python-pysam" ,python2-pysam)
1649 ("python-numpy" ,python2-numpy)
1650 ("python-scipy" ,python2-scipy)))
1651 (native-inputs
1652 `(("python-mock" ,python2-mock) ; for tests
1653 ("python-nose" ,python2-nose) ; for tests
1654 ("python-pytz" ,python2-pytz))) ; for tests
1655 (home-page "https://github.com/YeoLab/clipper")
1656 (synopsis "CLIP peak enrichment recognition")
1657 (description
1658 "CLIPper is a tool to define peaks in CLIP-seq datasets.")
1659 (license license:gpl2)))
1660
1661 (define-public codingquarry
1662 (package
1663 (name "codingquarry")
1664 (version "2.0")
1665 (source (origin
1666 (method url-fetch)
1667 (uri (string-append
1668 "mirror://sourceforge/codingquarry/CodingQuarry_v"
1669 version ".tar.gz"))
1670 (sha256
1671 (base32
1672 "0115hkjflsnfzn36xppwf9h9avfxlavr43djqmshkkzbgjzsz60i"))))
1673 (build-system gnu-build-system)
1674 (arguments
1675 '(#:tests? #f ; no "check" target
1676 #:phases
1677 (modify-phases %standard-phases
1678 (delete 'configure)
1679 (replace 'install
1680 (lambda* (#:key outputs #:allow-other-keys)
1681 (let* ((out (assoc-ref outputs "out"))
1682 (bin (string-append out "/bin"))
1683 (doc (string-append out "/share/doc/codingquarry")))
1684 (install-file "INSTRUCTIONS.pdf" doc)
1685 (copy-recursively "QuarryFiles"
1686 (string-append out "/QuarryFiles"))
1687 (install-file "CodingQuarry" bin)
1688 (install-file "CufflinksGTF_to_CodingQuarryGFF3.py" bin)))))))
1689 (inputs `(("openmpi" ,openmpi)))
1690 (native-search-paths
1691 (list (search-path-specification
1692 (variable "QUARRY_PATH")
1693 (files '("QuarryFiles")))))
1694 (native-inputs `(("python" ,python-2))) ; Only Python 2 is supported
1695 (synopsis "Fungal gene predictor")
1696 (description "CodingQuarry is a highly accurate, self-training GHMM fungal
1697 gene predictor designed to work with assembled, aligned RNA-seq transcripts.")
1698 (home-page "https://sourceforge.net/projects/codingquarry/")
1699 (license license:gpl3+)))
1700
1701 (define-public couger
1702 (package
1703 (name "couger")
1704 (version "1.8.2")
1705 (source (origin
1706 (method url-fetch)
1707 (uri (string-append
1708 "http://couger.oit.duke.edu/static/assets/COUGER"
1709 version ".zip"))
1710 (sha256
1711 (base32
1712 "04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq"))))
1713 (build-system gnu-build-system)
1714 (arguments
1715 `(#:tests? #f
1716 #:phases
1717 (modify-phases %standard-phases
1718 (delete 'configure)
1719 (delete 'build)
1720 (replace
1721 'install
1722 (lambda* (#:key outputs #:allow-other-keys)
1723 (let* ((out (assoc-ref outputs "out"))
1724 (bin (string-append out "/bin")))
1725 (copy-recursively "src" (string-append out "/src"))
1726 (mkdir bin)
1727 ;; Add "src" directory to module lookup path.
1728 (substitute* "couger"
1729 (("from argparse")
1730 (string-append "import sys\nsys.path.append(\""
1731 out "\")\nfrom argparse")))
1732 (install-file "couger" bin))
1733 #t))
1734 (add-after
1735 'install 'wrap-program
1736 (lambda* (#:key inputs outputs #:allow-other-keys)
1737 ;; Make sure 'couger' runs with the correct PYTHONPATH.
1738 (let* ((out (assoc-ref outputs "out"))
1739 (path (getenv "PYTHONPATH")))
1740 (wrap-program (string-append out "/bin/couger")
1741 `("PYTHONPATH" ":" prefix (,path))))
1742 #t)))))
1743 (inputs
1744 `(("python" ,python-2)
1745 ("python2-pillow" ,python2-pillow)
1746 ("python2-numpy" ,python2-numpy)
1747 ("python2-scipy" ,python2-scipy)
1748 ("python2-matplotlib" ,python2-matplotlib)))
1749 (propagated-inputs
1750 `(("r-minimal" ,r-minimal)
1751 ("libsvm" ,libsvm)
1752 ("randomjungle" ,randomjungle)))
1753 (native-inputs
1754 `(("unzip" ,unzip)))
1755 (home-page "http://couger.oit.duke.edu")
1756 (synopsis "Identify co-factors in sets of genomic regions")
1757 (description
1758 "COUGER can be applied to any two sets of genomic regions bound by
1759 paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify
1760 putative co-factors that provide specificity to each TF. The framework
1761 determines the genomic targets uniquely-bound by each TF, and identifies a
1762 small set of co-factors that best explain the in vivo binding differences
1763 between the two TFs.
1764
1765 COUGER uses classification algorithms (support vector machines and random
1766 forests) with features that reflect the DNA binding specificities of putative
1767 co-factors. The features are generated either from high-throughput TF-DNA
1768 binding data (from protein binding microarray experiments), or from large
1769 collections of DNA motifs.")
1770 (license license:gpl3+)))
1771
1772 (define-public clustal-omega
1773 (package
1774 (name "clustal-omega")
1775 (version "1.2.4")
1776 (source (origin
1777 (method url-fetch)
1778 (uri (string-append "http://www.clustal.org/omega/clustal-omega-"
1779 version ".tar.gz"))
1780 (sha256
1781 (base32
1782 "1vm30mzncwdv881vrcwg11vzvrsmwy4wg80j5i0lcfk6dlld50w6"))))
1783 (build-system gnu-build-system)
1784 (inputs
1785 `(("argtable" ,argtable)))
1786 (home-page "http://www.clustal.org/omega/")
1787 (synopsis "Multiple sequence aligner for protein and DNA/RNA")
1788 (description
1789 "Clustal-Omega is a general purpose multiple sequence alignment (MSA)
1790 program for protein and DNA/RNA. It produces high quality MSAs and is capable
1791 of handling data-sets of hundreds of thousands of sequences in reasonable
1792 time.")
1793 (license license:gpl2+)))
1794
1795 (define-public crossmap
1796 (package
1797 (name "crossmap")
1798 (version "0.2.1")
1799 (source (origin
1800 (method url-fetch)
1801 (uri (string-append "mirror://sourceforge/crossmap/CrossMap-"
1802 version ".tar.gz"))
1803 (sha256
1804 (base32
1805 "07y179f63d7qnzdvkqcziwk9bs3k4zhp81q392fp1hwszjdvy22f"))
1806 ;; This patch has been sent upstream already and is available
1807 ;; for download from Sourceforge, but it has not been merged.
1808 (patches (search-patches "crossmap-allow-system-pysam.patch"))
1809 (modules '((guix build utils)))
1810 (snippet '(begin
1811 ;; remove bundled copy of pysam
1812 (delete-file-recursively "lib/pysam")
1813 #t))))
1814 (build-system python-build-system)
1815 (arguments
1816 `(#:python ,python-2
1817 #:phases
1818 (modify-phases %standard-phases
1819 (add-after 'unpack 'set-env
1820 (lambda _ (setenv "CROSSMAP_USE_SYSTEM_PYSAM" "1") #t)))))
1821 (inputs
1822 `(("python-numpy" ,python2-numpy)
1823 ("python-pysam" ,python2-pysam)
1824 ("zlib" ,zlib)))
1825 (native-inputs
1826 `(("python-cython" ,python2-cython)
1827 ("python-nose" ,python2-nose)))
1828 (home-page "http://crossmap.sourceforge.net/")
1829 (synopsis "Convert genome coordinates between assemblies")
1830 (description
1831 "CrossMap is a program for conversion of genome coordinates or annotation
1832 files between different genome assemblies. It supports most commonly used
1833 file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
1834 (license license:gpl2+)))
1835
1836 (define-public cutadapt
1837 (package
1838 (name "cutadapt")
1839 (version "1.16")
1840 (source (origin
1841 (method git-fetch)
1842 (uri (git-reference
1843 (url "https://github.com/marcelm/cutadapt.git")
1844 (commit (string-append "v" version))))
1845 (file-name (string-append name "-" version "-checkout"))
1846 (sha256
1847 (base32
1848 "09pr02067jiks19nc0aby4xp70hhgvb554i2y1c04rv1m401w7q8"))))
1849 (build-system python-build-system)
1850 (inputs
1851 `(("python-xopen" ,python-xopen)))
1852 (native-inputs
1853 `(("python-cython" ,python-cython)
1854 ("python-pytest" ,python-pytest)))
1855 (home-page "https://cutadapt.readthedocs.io/en/stable/")
1856 (synopsis "Remove adapter sequences from nucleotide sequencing reads")
1857 (description
1858 "Cutadapt finds and removes adapter sequences, primers, poly-A tails and
1859 other types of unwanted sequence from high-throughput sequencing reads.")
1860 (license license:expat)))
1861
1862 (define-public libbigwig
1863 (package
1864 (name "libbigwig")
1865 (version "0.1.4")
1866 (source (origin
1867 (method url-fetch)
1868 (uri (string-append "https://github.com/dpryan79/libBigWig/"
1869 "archive/" version ".tar.gz"))
1870 (file-name (string-append name "-" version ".tar.gz"))
1871 (sha256
1872 (base32
1873 "098rjh35pi4a9q83n8wiwvyzykjqj6l8q189p1xgfw4ghywdlvw1"))))
1874 (build-system gnu-build-system)
1875 (arguments
1876 `(#:test-target "test"
1877 #:make-flags
1878 (list "CC=gcc"
1879 (string-append "prefix=" (assoc-ref %outputs "out")))
1880 #:phases
1881 (modify-phases %standard-phases
1882 (delete 'configure)
1883 (add-before 'check 'disable-curl-test
1884 (lambda _
1885 (substitute* "Makefile"
1886 (("./test/testRemote.*") ""))
1887 #t))
1888 ;; This has been fixed with the upstream commit 4ff6959cd8a0, but
1889 ;; there has not yet been a release containing this change.
1890 (add-before 'install 'create-target-dirs
1891 (lambda* (#:key outputs #:allow-other-keys)
1892 (let ((out (assoc-ref outputs "out")))
1893 (mkdir-p (string-append out "/lib"))
1894 (mkdir-p (string-append out "/include"))
1895 #t))))))
1896 (inputs
1897 `(("zlib" ,zlib)
1898 ("curl" ,curl)))
1899 (native-inputs
1900 `(("doxygen" ,doxygen)))
1901 (home-page "https://github.com/dpryan79/libBigWig")
1902 (synopsis "C library for handling bigWig files")
1903 (description
1904 "This package provides a C library for parsing local and remote BigWig
1905 files.")
1906 (license license:expat)))
1907
1908 (define-public python-pybigwig
1909 (package
1910 (name "python-pybigwig")
1911 (version "0.2.5")
1912 (source (origin
1913 (method url-fetch)
1914 (uri (pypi-uri "pyBigWig" version))
1915 (sha256
1916 (base32
1917 "0yrpdxg3y0sny25x4w22lv1k47jzccqjmg7j4bp0hywklvp0hg7d"))
1918 (modules '((guix build utils)))
1919 (snippet
1920 '(begin
1921 ;; Delete bundled libBigWig sources
1922 (delete-file-recursively "libBigWig")
1923 #t))))
1924 (build-system python-build-system)
1925 (arguments
1926 `(#:phases
1927 (modify-phases %standard-phases
1928 (add-after 'unpack 'link-with-libBigWig
1929 (lambda* (#:key inputs #:allow-other-keys)
1930 (substitute* "setup.py"
1931 (("libs=\\[") "libs=[\"BigWig\", "))
1932 #t)))))
1933 (inputs
1934 `(("libbigwig" ,libbigwig)
1935 ("zlib" ,zlib)
1936 ("curl" ,curl)))
1937 (home-page "https://github.com/dpryan79/pyBigWig")
1938 (synopsis "Access bigWig files in Python using libBigWig")
1939 (description
1940 "This package provides Python bindings to the libBigWig library for
1941 accessing bigWig files.")
1942 (license license:expat)))
1943
1944 (define-public python2-pybigwig
1945 (package-with-python2 python-pybigwig))
1946
1947 (define-public python-dendropy
1948 (package
1949 (name "python-dendropy")
1950 (version "4.2.0")
1951 (source
1952 (origin
1953 (method url-fetch)
1954 (uri (pypi-uri "DendroPy" version))
1955 (sha256
1956 (base32
1957 "15c7s3d5gf19ljsxvq5advaa752wfi7pwrdjyhzmg85hccyvp47p"))
1958 (patches (search-patches "python-dendropy-fix-tests.patch"))))
1959 (build-system python-build-system)
1960 (home-page "http://packages.python.org/DendroPy/")
1961 (synopsis "Library for phylogenetics and phylogenetic computing")
1962 (description
1963 "DendroPy is a library for phylogenetics and phylogenetic computing: reading,
1964 writing, simulation, processing and manipulation of phylogenetic
1965 trees (phylogenies) and characters.")
1966 (license license:bsd-3)
1967 (properties `((python2-variant . ,(delay python2-dendropy))))))
1968
1969 (define-public python2-dendropy
1970 (let ((base (package-with-python2 (strip-python2-variant python-dendropy))))
1971 (package
1972 (inherit base)
1973 (arguments
1974 `(#:python ,python-2
1975 #:phases
1976 (modify-phases %standard-phases
1977 (replace 'check
1978 ;; There is currently a test failure that only happens on some
1979 ;; systems, and only using "setup.py test"
1980 (lambda _ (zero? (system* "nosetests")))))))
1981 (native-inputs `(("python2-nose" ,python2-nose)
1982 ,@(package-native-inputs base))))))
1983
1984 (define-public python-py2bit
1985 (package
1986 (name "python-py2bit")
1987 (version "0.2.1")
1988 (source
1989 (origin
1990 (method url-fetch)
1991 (uri (pypi-uri "py2bit" version))
1992 (sha256
1993 (base32
1994 "1cdf4qlmgwsh1f4k0wdv2sr8x9qn4366p0k3614vbd0fpqiarxrl"))))
1995 (build-system python-build-system)
1996 (home-page "https://github.com/dpryan79/py2bit")
1997 (synopsis "Access 2bit files using lib2bit")
1998 (description
1999 "This package provides Python bindings for lib2bit to access 2bit files
2000 with Python.")
2001 (license license:expat)))
2002
2003 (define-public deeptools
2004 (package
2005 (name "deeptools")
2006 (version "2.5.1")
2007 (source (origin
2008 (method url-fetch)
2009 (uri (string-append "https://github.com/deeptools/deepTools/"
2010 "archive/" version ".tar.gz"))
2011 (file-name (string-append name "-" version ".tar.gz"))
2012 (sha256
2013 (base32
2014 "1q8i12l2gvk4n2s8lhyzwhh9g4qbc8lrk5l7maz00yvd5g6z5540"))))
2015 (build-system python-build-system)
2016 (inputs
2017 `(("python-scipy" ,python-scipy)
2018 ("python-numpy" ,python-numpy)
2019 ("python-numpydoc" ,python-numpydoc)
2020 ("python-matplotlib" ,python-matplotlib)
2021 ("python-pysam" ,python-pysam)
2022 ("python-py2bit" ,python-py2bit)
2023 ("python-pybigwig" ,python-pybigwig)))
2024 (native-inputs
2025 `(("python-mock" ,python-mock) ;for tests
2026 ("python-nose" ,python-nose) ;for tests
2027 ("python-pytz" ,python-pytz))) ;for tests
2028 (home-page "https://github.com/deeptools/deepTools")
2029 (synopsis "Tools for normalizing and visualizing deep-sequencing data")
2030 (description
2031 "DeepTools addresses the challenge of handling the large amounts of data
2032 that are now routinely generated from DNA sequencing centers. To do so,
2033 deepTools contains useful modules to process the mapped reads data to create
2034 coverage files in standard bedGraph and bigWig file formats. By doing so,
2035 deepTools allows the creation of normalized coverage files or the comparison
2036 between two files (for example, treatment and control). Finally, using such
2037 normalized and standardized files, multiple visualizations can be created to
2038 identify enrichments with functional annotations of the genome.")
2039 (license license:gpl3+)))
2040
2041 (define-public delly
2042 (package
2043 (name "delly")
2044 (version "0.7.7")
2045 (source (origin
2046 (method url-fetch)
2047 (uri (string-append
2048 "https://github.com/tobiasrausch/delly/archive/v"
2049 version ".tar.gz"))
2050 (file-name (string-append name "-" version ".tar.gz"))
2051 (sha256
2052 (base32 "0dkwy3pyxmi6dhh1lpsr3698ri5sslw9qz67hfys0bz8dgrqwabj"))
2053 (patches (search-patches "delly-use-system-libraries.patch"))))
2054 (build-system gnu-build-system)
2055 (arguments
2056 `(#:tests? #f ; There are no tests to run.
2057 #:make-flags '("PARALLEL=1") ; Allow parallel execution at run-time.
2058 #:phases
2059 (modify-phases %standard-phases
2060 (delete 'configure) ; There is no configure phase.
2061 (replace 'install
2062 (lambda _
2063 (let ((bin (string-append (assoc-ref %outputs "out") "/bin"))
2064 (templates (string-append (assoc-ref %outputs "out")
2065 "/share/delly/templates")))
2066 (mkdir-p bin)
2067 (mkdir-p templates)
2068 (copy-recursively "excludeTemplates" templates)
2069 (install-file "src/cov" bin)
2070 (install-file "src/delly" bin)
2071 (install-file "src/dpe" bin)))))))
2072 (native-inputs
2073 `(("python" ,python-2)))
2074 (inputs
2075 `(("boost" ,boost)
2076 ("htslib" ,htslib)
2077 ("zlib" ,zlib)
2078 ("bzip2" ,bzip2)))
2079 (home-page "https://github.com/tobiasrausch/delly")
2080 (synopsis "Integrated structural variant prediction method")
2081 (description "Delly is an integrated structural variant prediction method
2082 that can discover and genotype deletions, tandem duplications, inversions and
2083 translocations at single-nucleotide resolution in short-read massively parallel
2084 sequencing data. It uses paired-ends and split-reads to sensitively and
2085 accurately delineate genomic rearrangements throughout the genome.")
2086 (license license:gpl3+)))
2087
2088 (define-public diamond
2089 (package
2090 (name "diamond")
2091 (version "0.9.21")
2092 (source (origin
2093 (method url-fetch)
2094 (uri (string-append
2095 "https://github.com/bbuchfink/diamond/archive/v"
2096 version ".tar.gz"))
2097 (file-name (string-append name "-" version ".tar.gz"))
2098 (sha256
2099 (base32
2100 "1cf98vcsiwcv3c4apg50w1240v1mpw0zln1sdw3g692dqa4y041z"))))
2101 (build-system cmake-build-system)
2102 (arguments
2103 '(#:tests? #f ; no "check" target
2104 #:phases
2105 (modify-phases %standard-phases
2106 (add-after 'unpack 'remove-native-compilation
2107 (lambda _
2108 (substitute* "CMakeLists.txt" (("-march=native") ""))
2109 #t)))))
2110 (inputs
2111 `(("zlib" ,zlib)))
2112 (home-page "https://github.com/bbuchfink/diamond")
2113 (synopsis "Accelerated BLAST compatible local sequence aligner")
2114 (description
2115 "DIAMOND is a BLAST-compatible local aligner for mapping protein and
2116 translated DNA query sequences against a protein reference database (BLASTP
2117 and BLASTX alignment mode). The speedup over BLAST is up to 20,000 on short
2118 reads at a typical sensitivity of 90-99% relative to BLAST depending on the
2119 data and settings.")
2120 (license license:agpl3+)))
2121
2122 (define-public discrover
2123 (package
2124 (name "discrover")
2125 (version "1.6.0")
2126 (source
2127 (origin
2128 (method url-fetch)
2129 (uri (string-append "https://github.com/maaskola/discrover/archive/"
2130 version ".tar.gz"))
2131 (file-name (string-append name "-" version ".tar.gz"))
2132 (sha256
2133 (base32
2134 "0rah9ja4m0rl5mldd6vag9rwrivw1zrqxssfq8qx64m7961fp68k"))))
2135 (build-system cmake-build-system)
2136 (arguments
2137 `(#:tests? #f ; there are no tests
2138 #:phases
2139 (modify-phases %standard-phases
2140 (add-after 'unpack 'add-missing-includes
2141 (lambda _
2142 (substitute* "src/executioninformation.hpp"
2143 (("#define EXECUTIONINFORMATION_HPP" line)
2144 (string-append line "\n#include <random>")))
2145 (substitute* "src/plasma/fasta.hpp"
2146 (("#define FASTA_HPP" line)
2147 (string-append line "\n#include <random>")))
2148 #t)))))
2149 (inputs
2150 `(("boost" ,boost)
2151 ("cairo" ,cairo)))
2152 (native-inputs
2153 `(("texlive" ,texlive)
2154 ("imagemagick" ,imagemagick)))
2155 (home-page "http://dorina.mdc-berlin.de/public/rajewsky/discrover/")
2156 (synopsis "Discover discriminative nucleotide sequence motifs")
2157 (description "Discrover is a motif discovery method to find binding sites
2158 of nucleic acid binding proteins.")
2159 (license license:gpl3+)))
2160
2161 (define-public eigensoft
2162 (let ((revision "1")
2163 (commit "b14d1e202e21e532536ff8004f0419cd5e259dc7"))
2164 (package
2165 (name "eigensoft")
2166 (version (string-append "6.1.2-"
2167 revision "."
2168 (string-take commit 9)))
2169 (source
2170 (origin
2171 (method git-fetch)
2172 (uri (git-reference
2173 (url "https://github.com/DReichLab/EIG.git")
2174 (commit commit)))
2175 (file-name (string-append "eigensoft-" commit "-checkout"))
2176 (sha256
2177 (base32
2178 "0f5m6k2j5c16xc3xbywcs989xyc26ncy1zfzp9j9n55n9r4xcaiq"))
2179 (modules '((guix build utils)))
2180 ;; Remove pre-built binaries.
2181 (snippet '(begin
2182 (delete-file-recursively "bin")
2183 (mkdir "bin")
2184 #t))))
2185 (build-system gnu-build-system)
2186 (arguments
2187 `(#:tests? #f ; There are no tests.
2188 #:make-flags '("CC=gcc")
2189 #:phases
2190 (modify-phases %standard-phases
2191 ;; There is no configure phase, but the Makefile is in a
2192 ;; sub-directory.
2193 (replace 'configure
2194 (lambda _
2195 (chdir "src")
2196 ;; The link flags are incomplete.
2197 (substitute* "Makefile"
2198 (("-lgsl") "-lgsl -lm -llapack -llapacke -lpthread"))
2199 #t))
2200 ;; The provided install target only copies executables to
2201 ;; the "bin" directory in the build root.
2202 (add-after 'install 'actually-install
2203 (lambda* (#:key outputs #:allow-other-keys)
2204 (let* ((out (assoc-ref outputs "out"))
2205 (bin (string-append out "/bin")))
2206 (for-each (lambda (file)
2207 (install-file file bin))
2208 (find-files "../bin" ".*"))
2209 #t))))))
2210 (inputs
2211 `(("gsl" ,gsl)
2212 ("lapack" ,lapack)
2213 ("openblas" ,openblas)
2214 ("perl" ,perl)
2215 ("gfortran" ,gfortran "lib")))
2216 (home-page "https://github.com/DReichLab/EIG")
2217 (synopsis "Tools for population genetics")
2218 (description "The EIGENSOFT package provides tools for population
2219 genetics and stratification correction. EIGENSOFT implements methods commonly
2220 used in population genetics analyses such as PCA, computation of Tracy-Widom
2221 statistics, and finding related individuals in structured populations. It
2222 comes with a built-in plotting script and supports multiple file formats and
2223 quantitative phenotypes.")
2224 ;; The license of the eigensoft tools is Expat, but since it's
2225 ;; linking with the GNU Scientific Library (GSL) the effective
2226 ;; license is the GPL.
2227 (license license:gpl3+))))
2228
2229 (define-public edirect
2230 (package
2231 (name "edirect")
2232 (version "4.10")
2233 (source (origin
2234 (method url-fetch)
2235 (uri (string-append "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/"
2236 "versions/2016-05-03/edirect.tar.gz"))
2237 (sha256
2238 (base32
2239 "15zsprak5yh8c1yrz4r1knmb5s8qcmdid4xdhkh3lqcv64l60hli"))))
2240 (build-system perl-build-system)
2241 (arguments
2242 `(#:tests? #f ;no "check" target
2243 #:phases
2244 (modify-phases %standard-phases
2245 (delete 'configure)
2246 (delete 'build)
2247 (replace 'install
2248 (lambda* (#:key outputs #:allow-other-keys)
2249 (let ((target (string-append (assoc-ref outputs "out")
2250 "/bin")))
2251 (mkdir-p target)
2252 (install-file "edirect.pl" target)
2253 #t)))
2254 (add-after
2255 'install 'wrap-program
2256 (lambda* (#:key inputs outputs #:allow-other-keys)
2257 ;; Make sure 'edirect.pl' finds all perl inputs at runtime.
2258 (let* ((out (assoc-ref outputs "out"))
2259 (path (getenv "PERL5LIB")))
2260 (wrap-program (string-append out "/bin/edirect.pl")
2261 `("PERL5LIB" ":" prefix (,path)))))))))
2262 (inputs
2263 `(("perl-html-parser" ,perl-html-parser)
2264 ("perl-encode-locale" ,perl-encode-locale)
2265 ("perl-file-listing" ,perl-file-listing)
2266 ("perl-html-tagset" ,perl-html-tagset)
2267 ("perl-html-tree" ,perl-html-tree)
2268 ("perl-http-cookies" ,perl-http-cookies)
2269 ("perl-http-date" ,perl-http-date)
2270 ("perl-http-message" ,perl-http-message)
2271 ("perl-http-negotiate" ,perl-http-negotiate)
2272 ("perl-lwp-mediatypes" ,perl-lwp-mediatypes)
2273 ("perl-lwp-protocol-https" ,perl-lwp-protocol-https)
2274 ("perl-net-http" ,perl-net-http)
2275 ("perl-uri" ,perl-uri)
2276 ("perl-www-robotrules" ,perl-www-robotrules)
2277 ("perl" ,perl)))
2278 (home-page "http://www.ncbi.nlm.nih.gov/books/NBK179288/")
2279 (synopsis "Tools for accessing the NCBI's set of databases")
2280 (description
2281 "Entrez Direct (EDirect) is a method for accessing the National Center
2282 for Biotechnology Information's (NCBI) set of interconnected
2283 databases (publication, sequence, structure, gene, variation, expression,
2284 etc.) from a terminal. Functions take search terms from command-line
2285 arguments. Individual operations are combined to build multi-step queries.
2286 Record retrieval and formatting normally complete the process.
2287
2288 EDirect also provides an argument-driven function that simplifies the
2289 extraction of data from document summaries or other results that are returned
2290 in structured XML format. This can eliminate the need for writing custom
2291 software to answer ad hoc questions.")
2292 (license license:public-domain)))
2293
2294 (define-public exonerate
2295 (package
2296 (name "exonerate")
2297 (version "2.4.0")
2298 (source
2299 (origin
2300 (method url-fetch)
2301 (uri
2302 (string-append
2303 "http://ftp.ebi.ac.uk/pub/software/vertebrategenomics/exonerate/"
2304 "exonerate-" version ".tar.gz"))
2305 (sha256
2306 (base32
2307 "0hj0m9xygiqsdxvbg79wq579kbrx1mdrabi2bzqz2zn9qwfjcjgq"))))
2308 (build-system gnu-build-system)
2309 (arguments
2310 `(#:parallel-build? #f)) ; Building in parallel fails on some machines.
2311 (native-inputs
2312 `(("pkg-config" ,pkg-config)))
2313 (inputs
2314 `(("glib" ,glib)))
2315 (home-page
2316 "https://www.ebi.ac.uk/about/vertebrate-genomics/software/exonerate")
2317 (synopsis "Generic tool for biological sequence alignment")
2318 (description
2319 "Exonerate is a generic tool for pairwise sequence comparison. It allows
2320 the alignment of sequences using a many alignment models, either exhaustive
2321 dynamic programming or a variety of heuristics.")
2322 (license license:gpl3)))
2323
2324 (define-public express
2325 (package
2326 (name "express")
2327 (version "1.5.1")
2328 (source (origin
2329 (method url-fetch)
2330 (uri
2331 (string-append
2332 "http://bio.math.berkeley.edu/eXpress/downloads/express-"
2333 version "/express-" version "-src.tgz"))
2334 (sha256
2335 (base32
2336 "03rczxd0gjp2l1jxcmjfmf5j94j77zqyxa6x063zsc585nj40n0c"))))
2337 (build-system cmake-build-system)
2338 (arguments
2339 `(#:tests? #f ;no "check" target
2340 #:phases
2341 (modify-phases %standard-phases
2342 (add-after 'unpack 'use-shared-boost-libs-and-set-bamtools-paths
2343 (lambda* (#:key inputs #:allow-other-keys)
2344 (substitute* "CMakeLists.txt"
2345 (("set\\(Boost_USE_STATIC_LIBS ON\\)")
2346 "set(Boost_USE_STATIC_LIBS OFF)")
2347 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
2348 (string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
2349 (substitute* "src/CMakeLists.txt"
2350 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
2351 (string-append (assoc-ref inputs "bamtools") "/lib"))
2352 (("libprotobuf.a") "libprotobuf.so"))
2353 #t)))))
2354 (inputs
2355 `(("boost" ,boost)
2356 ("bamtools" ,bamtools)
2357 ("protobuf" ,protobuf)
2358 ("zlib" ,zlib)))
2359 (home-page "http://bio.math.berkeley.edu/eXpress")
2360 (synopsis "Streaming quantification for high-throughput genomic sequencing")
2361 (description
2362 "eXpress is a streaming tool for quantifying the abundances of a set of
2363 target sequences from sampled subsequences. Example applications include
2364 transcript-level RNA-Seq quantification, allele-specific/haplotype expression
2365 analysis (from RNA-Seq), transcription factor binding quantification in
2366 ChIP-Seq, and analysis of metagenomic data.")
2367 (license license:artistic2.0)))
2368
2369 (define-public express-beta-diversity
2370 (package
2371 (name "express-beta-diversity")
2372 (version "1.0.7")
2373 (source (origin
2374 (method url-fetch)
2375 (uri
2376 (string-append
2377 "https://github.com/dparks1134/ExpressBetaDiversity/archive/v"
2378 version ".tar.gz"))
2379 (file-name (string-append name "-" version ".tar.gz"))
2380 (sha256
2381 (base32
2382 "1djvdlmqvjf6h0zq7w36y8cl5cli6rgj86x65znl48agnwmzxfxr"))))
2383 (build-system gnu-build-system)
2384 (arguments
2385 `(#:phases
2386 (modify-phases %standard-phases
2387 (delete 'configure)
2388 (add-before 'build 'enter-source (lambda _ (chdir "source") #t))
2389 (replace 'check
2390 (lambda _ (zero? (system* "../bin/ExpressBetaDiversity"
2391 "-u"))))
2392 (add-after 'check 'exit-source (lambda _ (chdir "..") #t))
2393 (replace 'install
2394 (lambda* (#:key outputs #:allow-other-keys)
2395 (let ((bin (string-append (assoc-ref outputs "out")
2396 "/bin")))
2397 (mkdir-p bin)
2398 (install-file "scripts/convertToEBD.py" bin)
2399 (install-file "bin/ExpressBetaDiversity" bin)
2400 #t))))))
2401 (inputs
2402 `(("python" ,python-2)))
2403 (home-page "http://kiwi.cs.dal.ca/Software/ExpressBetaDiversity")
2404 (synopsis "Taxon- and phylogenetic-based beta diversity measures")
2405 (description
2406 "Express Beta Diversity (EBD) calculates ecological beta diversity
2407 (dissimilarity) measures between biological communities. EBD implements a
2408 variety of diversity measures including those that make use of phylogenetic
2409 similarity of community members.")
2410 (license license:gpl3+)))
2411
2412 (define-public fasttree
2413 (package
2414 (name "fasttree")
2415 (version "2.1.10")
2416 (source (origin
2417 (method url-fetch)
2418 (uri (string-append
2419 "http://www.microbesonline.org/fasttree/FastTree-"
2420 version ".c"))
2421 (sha256
2422 (base32
2423 "0vcjdvy1j4m702vmak4svbfkrpcw63k7wymfksjp9a982zy8kjsl"))))
2424 (build-system gnu-build-system)
2425 (arguments
2426 `(#:tests? #f ; no "check" target
2427 #:phases
2428 (modify-phases %standard-phases
2429 (delete 'unpack)
2430 (delete 'configure)
2431 (replace 'build
2432 (lambda* (#:key source #:allow-other-keys)
2433 (and (zero? (system* "gcc"
2434 "-O3"
2435 "-finline-functions"
2436 "-funroll-loops"
2437 "-Wall"
2438 "-o"
2439 "FastTree"
2440 source
2441 "-lm"))
2442 (zero? (system* "gcc"
2443 "-DOPENMP"
2444 "-fopenmp"
2445 "-O3"
2446 "-finline-functions"
2447 "-funroll-loops"
2448 "-Wall"
2449 "-o"
2450 "FastTreeMP"
2451 source
2452 "-lm")))))
2453 (replace 'install
2454 (lambda* (#:key outputs #:allow-other-keys)
2455 (let ((bin (string-append (assoc-ref outputs "out")
2456 "/bin")))
2457 (mkdir-p bin)
2458 (install-file "FastTree" bin)
2459 (install-file "FastTreeMP" bin)
2460 #t))))))
2461 (home-page "http://www.microbesonline.org/fasttree")
2462 (synopsis "Infers approximately-maximum-likelihood phylogenetic trees")
2463 (description
2464 "FastTree can handle alignments with up to a million of sequences in a
2465 reasonable amount of time and memory. For large alignments, FastTree is
2466 100-1,000 times faster than PhyML 3.0 or RAxML 7.")
2467 (license license:gpl2+)))
2468
2469 (define-public fastx-toolkit
2470 (package
2471 (name "fastx-toolkit")
2472 (version "0.0.14")
2473 (source (origin
2474 (method url-fetch)
2475 (uri
2476 (string-append
2477 "https://github.com/agordon/fastx_toolkit/releases/download/"
2478 version "/fastx_toolkit-" version ".tar.bz2"))
2479 (sha256
2480 (base32
2481 "01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy"))))
2482 (build-system gnu-build-system)
2483 (inputs
2484 `(("libgtextutils" ,libgtextutils)))
2485 (native-inputs
2486 `(("pkg-config" ,pkg-config)))
2487 (home-page "http://hannonlab.cshl.edu/fastx_toolkit/")
2488 (synopsis "Tools for FASTA/FASTQ file preprocessing")
2489 (description
2490 "The FASTX-Toolkit is a collection of command line tools for Short-Reads
2491 FASTA/FASTQ files preprocessing.
2492
2493 Next-Generation sequencing machines usually produce FASTA or FASTQ files,
2494 containing multiple short-reads sequences. The main processing of such
2495 FASTA/FASTQ files is mapping the sequences to reference genomes. However, it
2496 is sometimes more productive to preprocess the files before mapping the
2497 sequences to the genome---manipulating the sequences to produce better mapping
2498 results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
2499 (license license:agpl3+)))
2500
2501 (define-public flexbar
2502 (package
2503 (name "flexbar")
2504 (version "2.5")
2505 (source (origin
2506 (method url-fetch)
2507 (uri
2508 (string-append "mirror://sourceforge/flexbar/"
2509 version "/flexbar_v" version "_src.tgz"))
2510 (sha256
2511 (base32
2512 "13jaykc3y1x8y5nn9j8ljnb79s5y51kyxz46hdmvvjj6qhyympmf"))))
2513 (build-system cmake-build-system)
2514 (arguments
2515 `(#:configure-flags (list
2516 (string-append "-DFLEXBAR_BINARY_DIR="
2517 (assoc-ref %outputs "out")
2518 "/bin/"))
2519 #:phases
2520 (modify-phases %standard-phases
2521 (replace 'check
2522 (lambda* (#:key outputs #:allow-other-keys)
2523 (setenv "PATH" (string-append
2524 (assoc-ref outputs "out") "/bin:"
2525 (getenv "PATH")))
2526 (chdir "../flexbar_v2.5_src/test")
2527 (zero? (system* "bash" "flexbar_validate.sh"))))
2528 (delete 'install))))
2529 (inputs
2530 `(("tbb" ,tbb)
2531 ("zlib" ,zlib)))
2532 (native-inputs
2533 `(("pkg-config" ,pkg-config)
2534 ("seqan" ,seqan)))
2535 (home-page "http://flexbar.sourceforge.net")
2536 (synopsis "Barcode and adapter removal tool for sequencing platforms")
2537 (description
2538 "Flexbar preprocesses high-throughput nucleotide sequencing data
2539 efficiently. It demultiplexes barcoded runs and removes adapter sequences.
2540 Moreover, trimming and filtering features are provided. Flexbar increases
2541 read mapping rates and improves genome and transcriptome assemblies. It
2542 supports next-generation sequencing data in fasta/q and csfasta/q format from
2543 Illumina, Roche 454, and the SOLiD platform.")
2544 (license license:gpl3)))
2545
2546 (define-public fraggenescan
2547 (package
2548 (name "fraggenescan")
2549 (version "1.30")
2550 (source
2551 (origin
2552 (method url-fetch)
2553 (uri
2554 (string-append "mirror://sourceforge/fraggenescan/"
2555 "FragGeneScan" version ".tar.gz"))
2556 (sha256
2557 (base32 "158dcnwczgcyhwm4qlx19sanrwgdpzf6bn2y57mbpx55lkgz1mzj"))))
2558 (build-system gnu-build-system)
2559 (arguments
2560 `(#:phases
2561 (modify-phases %standard-phases
2562 (delete 'configure)
2563 (add-before 'build 'patch-paths
2564 (lambda* (#:key outputs #:allow-other-keys)
2565 (let* ((out (string-append (assoc-ref outputs "out")))
2566 (share (string-append out "/share/fraggenescan/")))
2567 (substitute* "run_FragGeneScan.pl"
2568 (("system\\(\"rm")
2569 (string-append "system(\"" (which "rm")))
2570 (("system\\(\"mv")
2571 (string-append "system(\"" (which "mv")))
2572 (("\\\"awk") (string-append "\"" (which "awk")))
2573 ;; This script and other programs expect the training files
2574 ;; to be in the non-standard location bin/train/XXX. Change
2575 ;; this to be share/fraggenescan/train/XXX instead.
2576 (("^\\$train.file = \\$dir.*")
2577 (string-append "$train_file = \""
2578 share
2579 "train/\".$FGS_train_file;")))
2580 (substitute* "run_hmm.c"
2581 (("^ strcat\\(train_dir, \\\"train/\\\"\\);")
2582 (string-append " strcpy(train_dir, \"" share "/train/\");"))))
2583 #t))
2584 (replace 'build
2585 (lambda _ (and (zero? (system* "make" "clean"))
2586 (zero? (system* "make" "fgs")))))
2587 (replace 'install
2588 (lambda* (#:key outputs #:allow-other-keys)
2589 (let* ((out (string-append (assoc-ref outputs "out")))
2590 (bin (string-append out "/bin/"))
2591 (share (string-append out "/share/fraggenescan/train")))
2592 (install-file "run_FragGeneScan.pl" bin)
2593 (install-file "FragGeneScan" bin)
2594 (copy-recursively "train" share))))
2595 (delete 'check)
2596 (add-after 'install 'post-install-check
2597 ;; In lieu of 'make check', run one of the examples and check the
2598 ;; output files gets created.
2599 (lambda* (#:key outputs #:allow-other-keys)
2600 (let* ((out (string-append (assoc-ref outputs "out")))
2601 (bin (string-append out "/bin/"))
2602 (frag (string-append bin "run_FragGeneScan.pl")))
2603 (and (zero? (system* frag ; Test complete genome.
2604 "-genome=./example/NC_000913.fna"
2605 "-out=./test2"
2606 "-complete=1"
2607 "-train=complete"))
2608 (file-exists? "test2.faa")
2609 (file-exists? "test2.ffn")
2610 (file-exists? "test2.gff")
2611 (file-exists? "test2.out")
2612 (zero? (system* ; Test incomplete sequences.
2613 frag
2614 "-genome=./example/NC_000913-fgs.ffn"
2615 "-out=out"
2616 "-complete=0"
2617 "-train=454_30")))))))))
2618 (inputs
2619 `(("perl" ,perl)
2620 ("python" ,python-2))) ;not compatible with python 3.
2621 (home-page "https://sourceforge.net/projects/fraggenescan/")
2622 (synopsis "Finds potentially fragmented genes in short reads")
2623 (description
2624 "FragGeneScan is a program for predicting bacterial and archaeal genes in
2625 short and error-prone DNA sequencing reads. It can also be applied to predict
2626 genes in incomplete assemblies or complete genomes.")
2627 ;; GPL3+ according to private correspondense with the authors.
2628 (license license:gpl3+)))
2629
2630 (define-public fxtract
2631 (let ((util-commit "776ca85a18a47492af3794745efcb4a905113115"))
2632 (package
2633 (name "fxtract")
2634 (version "2.3")
2635 (source
2636 (origin
2637 (method url-fetch)
2638 (uri (string-append
2639 "https://github.com/ctSkennerton/fxtract/archive/"
2640 version ".tar.gz"))
2641 (file-name (string-append "ctstennerton-util-"
2642 (string-take util-commit 7)
2643 "-checkout"))
2644 (sha256
2645 (base32
2646 "0275cfdhis8517hm01is62062swmi06fxzifq7mr3knbbxjlaiwj"))))
2647 (build-system gnu-build-system)
2648 (arguments
2649 `(#:make-flags (list
2650 (string-append "PREFIX=" (assoc-ref %outputs "out"))
2651 "CC=gcc")
2652 #:test-target "fxtract_test"
2653 #:phases
2654 (modify-phases %standard-phases
2655 (delete 'configure)
2656 (add-before 'build 'copy-util
2657 (lambda* (#:key inputs #:allow-other-keys)
2658 (rmdir "util")
2659 (copy-recursively (assoc-ref inputs "ctskennerton-util") "util")
2660 #t))
2661 ;; Do not use make install as this requires additional dependencies.
2662 (replace 'install
2663 (lambda* (#:key outputs #:allow-other-keys)
2664 (let* ((out (assoc-ref outputs "out"))
2665 (bin (string-append out"/bin")))
2666 (install-file "fxtract" bin)
2667 #t))))))
2668 (inputs
2669 `(("pcre" ,pcre)
2670 ("zlib" ,zlib)))
2671 (native-inputs
2672 ;; ctskennerton-util is licensed under GPL2.
2673 `(("ctskennerton-util"
2674 ,(origin
2675 (method git-fetch)
2676 (uri (git-reference
2677 (url "https://github.com/ctSkennerton/util.git")
2678 (commit util-commit)))
2679 (file-name (string-append
2680 "ctstennerton-util-" util-commit "-checkout"))
2681 (sha256
2682 (base32
2683 "0cls1hd4vgj3f36fpzzg4xc77d6f3hpc60cbpfmn2gdr7ykzzad7"))))))
2684 (home-page "https://github.com/ctSkennerton/fxtract")
2685 (synopsis "Extract sequences from FASTA and FASTQ files")
2686 (description
2687 "Fxtract extracts sequences from a protein or nucleotide fastx (FASTA
2688 or FASTQ) file given a subsequence. It uses a simple substring search for
2689 basic tasks but can change to using POSIX regular expressions, PCRE, hash
2690 lookups or multi-pattern searching as required. By default fxtract looks in
2691 the sequence of each record but can also be told to look in the header,
2692 comment or quality sections.")
2693 ;; 'util' requires SSE instructions.
2694 (supported-systems '("x86_64-linux"))
2695 (license license:expat))))
2696
2697 (define-public gemma
2698 (package
2699 (name "gemma")
2700 (version "0.96")
2701 (source (origin
2702 (method url-fetch)
2703 (uri (string-append "https://github.com/xiangzhou/GEMMA/archive/v"
2704 version ".tar.gz"))
2705 (file-name (string-append name "-" version ".tar.gz"))
2706 (sha256
2707 (base32
2708 "055ynn16gd12pf78n4vr2a9jlwsbwzajpdnf2y2yilg1krfff222"))
2709 (patches (search-patches "gemma-intel-compat.patch"))))
2710 (inputs
2711 `(("gsl" ,gsl)
2712 ("lapack" ,lapack)
2713 ("zlib" ,zlib)))
2714 (build-system gnu-build-system)
2715 (arguments
2716 `(#:make-flags
2717 '(,@(match (%current-system)
2718 ("x86_64-linux"
2719 '("FORCE_DYNAMIC=1"))
2720 ("i686-linux"
2721 '("FORCE_DYNAMIC=1" "FORCE_32BIT=1"))
2722 (_
2723 '("FORCE_DYNAMIC=1" "NO_INTEL_COMPAT=1"))))
2724 #:phases
2725 (modify-phases %standard-phases
2726 (delete 'configure)
2727 (add-before 'build 'bin-mkdir
2728 (lambda _
2729 (mkdir-p "bin")
2730 #t))
2731 (replace 'install
2732 (lambda* (#:key outputs #:allow-other-keys)
2733 (let ((out (assoc-ref outputs "out")))
2734 (install-file "bin/gemma"
2735 (string-append
2736 out "/bin")))
2737 #t)))
2738 #:tests? #f)) ; no tests included yet
2739 (home-page "https://github.com/xiangzhou/GEMMA")
2740 (synopsis "Tool for genome-wide efficient mixed model association")
2741 (description
2742 "Genome-wide Efficient Mixed Model Association (GEMMA) provides a
2743 standard linear mixed model resolver with application in genome-wide
2744 association studies (GWAS).")
2745 (license license:gpl3)))
2746
2747 (define-public grit
2748 (package
2749 (name "grit")
2750 (version "2.0.2")
2751 (source (origin
2752 (method url-fetch)
2753 (uri (string-append
2754 "https://github.com/nboley/grit/archive/"
2755 version ".tar.gz"))
2756 (file-name (string-append name "-" version ".tar.gz"))
2757 (sha256
2758 (base32
2759 "157in84dj70wimbind3x7sy1whs3h57qfgcnj2s6lrd38fbrb7mj"))))
2760 (build-system python-build-system)
2761 (arguments
2762 `(#:python ,python-2
2763 #:phases
2764 (modify-phases %standard-phases
2765 (add-after 'unpack 'generate-from-cython-sources
2766 (lambda* (#:key inputs outputs #:allow-other-keys)
2767 ;; Delete these C files to force fresh generation from pyx sources.
2768 (delete-file "grit/sparsify_support_fns.c")
2769 (delete-file "grit/call_peaks_support_fns.c")
2770 (substitute* "setup.py"
2771 (("Cython.Setup") "Cython.Build")
2772 ;; Add numpy include path to fix compilation
2773 (("pyx\", \\]")
2774 (string-append "pyx\", ], include_dirs = ['"
2775 (assoc-ref inputs "python-numpy")
2776 "/lib/python2.7/site-packages/numpy/core/include/"
2777 "']")))
2778 #t)))))
2779 (inputs
2780 `(("python-scipy" ,python2-scipy)
2781 ("python-numpy" ,python2-numpy)
2782 ("python-pysam" ,python2-pysam)
2783 ("python-networkx" ,python2-networkx)))
2784 (native-inputs
2785 `(("python-cython" ,python2-cython)))
2786 (home-page "http://grit-bio.org")
2787 (synopsis "Tool for integrative analysis of RNA-seq type assays")
2788 (description
2789 "GRIT is designed to use RNA-seq, TES, and TSS data to build and quantify
2790 full length transcript models. When none of these data sources are available,
2791 GRIT can be run by providing a candidate set of TES or TSS sites. In
2792 addition, GRIT can merge in reference junctions and gene boundaries. GRIT can
2793 also be run in quantification mode, where it uses a provided GTF file and just
2794 estimates transcript expression.")
2795 (license license:gpl3+)))
2796
2797 (define-public hisat
2798 (package
2799 (name "hisat")
2800 (version "0.1.4")
2801 (source (origin
2802 (method url-fetch)
2803 (uri (string-append
2804 "http://ccb.jhu.edu/software/hisat/downloads/hisat-"
2805 version "-beta-source.zip"))
2806 (sha256
2807 (base32
2808 "1k381ydranqxp09yf2y7w1d0chz5d59vb6jchi89hbb0prq19lk5"))))
2809 (build-system gnu-build-system)
2810 (arguments
2811 `(#:tests? #f ;no check target
2812 #:make-flags '("allall"
2813 ;; Disable unsupported `popcnt' instructions on
2814 ;; architectures other than x86_64
2815 ,@(if (string-prefix? "x86_64"
2816 (or (%current-target-system)
2817 (%current-system)))
2818 '()
2819 '("POPCNT_CAPABILITY=0")))
2820 #:phases
2821 (modify-phases %standard-phases
2822 (add-after 'unpack 'patch-sources
2823 (lambda _
2824 ;; XXX Cannot use snippet because zip files are not supported
2825 (substitute* "Makefile"
2826 (("^CC = .*$") "CC = gcc")
2827 (("^CPP = .*$") "CPP = g++")
2828 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
2829 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
2830 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
2831 (substitute* '("hisat-build" "hisat-inspect")
2832 (("/usr/bin/env") (which "env")))
2833 #t))
2834 (replace 'install
2835 (lambda* (#:key outputs #:allow-other-keys)
2836 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
2837 (for-each (lambda (file)
2838 (install-file file bin))
2839 (find-files
2840 "."
2841 "hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$")))
2842 #t))
2843 (delete 'configure))))
2844 (native-inputs
2845 `(("unzip" ,unzip)))
2846 (inputs
2847 `(("perl" ,perl)
2848 ("python" ,python)
2849 ("zlib" ,zlib)))
2850 ;; Non-portable SSE instructions are used so building fails on platforms
2851 ;; other than x86_64.
2852 (supported-systems '("x86_64-linux"))
2853 (home-page "http://ccb.jhu.edu/software/hisat/index.shtml")
2854 (synopsis "Hierarchical indexing for spliced alignment of transcripts")
2855 (description
2856 "HISAT is a fast and sensitive spliced alignment program for mapping
2857 RNA-seq reads. In addition to one global FM index that represents a whole
2858 genome, HISAT uses a large set of small FM indexes that collectively cover the
2859 whole genome. These small indexes (called local indexes) combined with
2860 several alignment strategies enable effective alignment of RNA-seq reads, in
2861 particular, reads spanning multiple exons.")
2862 (license license:gpl3+)))
2863
2864 (define-public hisat2
2865 (package
2866 (name "hisat2")
2867 (version "2.0.5")
2868 (source
2869 (origin
2870 (method url-fetch)
2871 ;; FIXME: a better source URL is
2872 ;; (string-append "ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2"
2873 ;; "/downloads/hisat2-" version "-source.zip")
2874 ;; with hash "0lywnr8kijwsc2aw10dwxic0n0yvip6fl3rjlvc8zzwahamy4x7g"
2875 ;; but it is currently unavailable.
2876 (uri "https://github.com/infphilo/hisat2/archive/cba6e8cb.tar.gz")
2877 (file-name (string-append name "-" version ".tar.gz"))
2878 (sha256
2879 (base32
2880 "1mf2hdsyv7cd97xm9mp9a4qws02yrj95y6w6f6cdwnq0klp81r50"))))
2881 (build-system gnu-build-system)
2882 (arguments
2883 `(#:tests? #f ; no check target
2884 #:make-flags (list "CC=gcc" "CXX=g++" "allall")
2885 #:modules ((guix build gnu-build-system)
2886 (guix build utils)
2887 (srfi srfi-26))
2888 #:phases
2889 (modify-phases %standard-phases
2890 (add-after 'unpack 'make-deterministic
2891 (lambda _
2892 (substitute* "Makefile"
2893 (("`date`") "0"))
2894 #t))
2895 (delete 'configure)
2896 (replace 'install
2897 (lambda* (#:key outputs #:allow-other-keys)
2898 (let* ((out (assoc-ref outputs "out"))
2899 (bin (string-append out "/bin/"))
2900 (doc (string-append out "/share/doc/hisat2/")))
2901 (for-each
2902 (cut install-file <> bin)
2903 (find-files "."
2904 "hisat2(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))
2905 (mkdir-p doc)
2906 (install-file "doc/manual.inc.html" doc))
2907 #t)))))
2908 (native-inputs
2909 `(("unzip" ,unzip) ; needed for archive from ftp
2910 ("perl" ,perl)
2911 ("pandoc" ,ghc-pandoc))) ; for documentation
2912 (home-page "http://ccb.jhu.edu/software/hisat2/index.shtml")
2913 (synopsis "Graph-based alignment of genomic sequencing reads")
2914 (description "HISAT2 is a fast and sensitive alignment program for mapping
2915 next-generation sequencing reads (both DNA and RNA) to a population of human
2916 genomes (as well as to a single reference genome). In addition to using one
2917 global @dfn{graph FM} (GFM) index that represents a population of human
2918 genomes, HISAT2 uses a large set of small GFM indexes that collectively cover
2919 the whole genome. These small indexes, combined with several alignment
2920 strategies, enable rapid and accurate alignment of sequencing reads. This new
2921 indexing scheme is called a @dfn{Hierarchical Graph FM index} (HGFM).")
2922 ;; HISAT2 contains files from Bowtie2, which is released under
2923 ;; GPLv2 or later. The HISAT2 source files are released under
2924 ;; GPLv3 or later.
2925 (license license:gpl3+)))
2926
2927 (define-public hmmer
2928 (package
2929 (name "hmmer")
2930 (version "3.1b2")
2931 (source
2932 (origin
2933 (method url-fetch)
2934 (uri (string-append
2935 "http://eddylab.org/software/hmmer"
2936 (version-major version) "/"
2937 version "/hmmer-" version ".tar.gz"))
2938 (sha256
2939 (base32
2940 "0djmgc0pfli0jilfx8hql1axhwhqxqb8rxg2r5rg07aw73sfs5nx"))
2941 (patches (search-patches "hmmer-remove-cpu-specificity.patch"))))
2942 (build-system gnu-build-system)
2943 (native-inputs `(("perl" ,perl)))
2944 (home-page "http://hmmer.org/")
2945 (synopsis "Biosequence analysis using profile hidden Markov models")
2946 (description
2947 "HMMER is used for searching sequence databases for homologs of protein
2948 sequences, and for making protein sequence alignments. It implements methods
2949 using probabilistic models called profile hidden Markov models (profile
2950 HMMs).")
2951 (license (list license:gpl3+
2952 ;; The bundled library 'easel' is distributed
2953 ;; under The Janelia Farm Software License.
2954 (license:non-copyleft
2955 "file://easel/LICENSE"
2956 "See easel/LICENSE in the distribution.")))))
2957
2958 (define-public htseq
2959 (package
2960 (name "htseq")
2961 (version "0.9.1")
2962 (source (origin
2963 (method url-fetch)
2964 (uri (pypi-uri "HTSeq" version))
2965 (sha256
2966 (base32
2967 "11flgb1381xdhk43bzbfm3vhnszkpqg6jk76rpa5xd1zbrvvlnxg"))))
2968 (build-system python-build-system)
2969 (native-inputs
2970 `(("python-cython" ,python-cython)))
2971 ;; Numpy needs to be propagated when htseq is used as a Python library.
2972 (propagated-inputs
2973 `(("python-numpy" ,python-numpy)))
2974 (inputs
2975 `(("python-pysam" ,python-pysam)
2976 ("python-matplotlib" ,python-matplotlib)))
2977 (home-page "http://www-huber.embl.de/users/anders/HTSeq/")
2978 (synopsis "Analysing high-throughput sequencing data with Python")
2979 (description
2980 "HTSeq is a Python package that provides infrastructure to process data
2981 from high-throughput sequencing assays.")
2982 (license license:gpl3+)))
2983
2984 (define-public python2-htseq
2985 (package-with-python2 htseq))
2986
2987 (define-public java-htsjdk
2988 (package
2989 (name "java-htsjdk")
2990 (version "2.3.0") ; last version without build dependency on gradle
2991 (source (origin
2992 (method url-fetch)
2993 (uri (string-append
2994 "https://github.com/samtools/htsjdk/archive/"
2995 version ".tar.gz"))
2996 (file-name (string-append name "-" version ".tar.gz"))
2997 (sha256
2998 (base32
2999 "1ibhzzxsfc38nqyk9r8zqj6blfc1kh26iirypd4q6n90hs2m6nyq"))
3000 (modules '((guix build utils)))
3001 (snippet
3002 ;; Delete pre-built binaries
3003 '(begin
3004 (delete-file-recursively "lib")
3005 (mkdir-p "lib")
3006 #t))))
3007 (build-system ant-build-system)
3008 (arguments
3009 `(#:tests? #f ; test require Internet access
3010 #:jdk ,icedtea-8
3011 #:make-flags
3012 (list (string-append "-Ddist=" (assoc-ref %outputs "out")
3013 "/share/java/htsjdk/"))
3014 #:build-target "all"
3015 #:phases
3016 (modify-phases %standard-phases
3017 ;; The build phase also installs the jars
3018 (delete 'install))))
3019 (inputs
3020 `(("java-ngs" ,java-ngs)
3021 ("java-snappy-1" ,java-snappy-1)
3022 ("java-commons-compress" ,java-commons-compress)
3023 ("java-commons-logging-minimal" ,java-commons-logging-minimal)
3024 ("java-commons-jexl-2" ,java-commons-jexl-2)
3025 ("java-xz" ,java-xz)))
3026 (native-inputs
3027 `(("java-testng" ,java-testng)))
3028 (home-page "http://samtools.github.io/htsjdk/")
3029 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
3030 (description
3031 "HTSJDK is an implementation of a unified Java library for accessing
3032 common file formats, such as SAM and VCF, used for high-throughput
3033 sequencing (HTS) data. There are also an number of useful utilities for
3034 manipulating HTS data.")
3035 (license license:expat)))
3036
3037 (define-public java-htsjdk-latest
3038 (package
3039 (name "java-htsjdk")
3040 (version "2.14.3")
3041 (source (origin
3042 (method git-fetch)
3043 (uri (git-reference
3044 (url "https://github.com/samtools/htsjdk.git")
3045 (commit version)))
3046 (file-name (string-append name "-" version "-checkout"))
3047 (sha256
3048 (base32
3049 "1lmya1fdjy03mz6zmdmd86j9v9vfhqb3952mqq075navx1i6g4bc"))))
3050 (build-system ant-build-system)
3051 (arguments
3052 `(#:tests? #f ; test require Scala
3053 #:jdk ,icedtea-8
3054 #:jar-name "htsjdk.jar"
3055 #:phases
3056 (modify-phases %standard-phases
3057 (add-after 'unpack 'remove-useless-build.xml
3058 (lambda _ (delete-file "build.xml") #t))
3059 ;; The tests require the scalatest package.
3060 (add-after 'unpack 'remove-tests
3061 (lambda _ (delete-file-recursively "src/test") #t)))))
3062 (inputs
3063 `(("java-ngs" ,java-ngs)
3064 ("java-snappy-1" ,java-snappy-1)
3065 ("java-commons-compress" ,java-commons-compress)
3066 ("java-commons-logging-minimal" ,java-commons-logging-minimal)
3067 ("java-commons-jexl-2" ,java-commons-jexl-2)
3068 ("java-xz" ,java-xz)))
3069 (native-inputs
3070 `(("java-junit" ,java-junit)))
3071 (home-page "http://samtools.github.io/htsjdk/")
3072 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
3073 (description
3074 "HTSJDK is an implementation of a unified Java library for accessing
3075 common file formats, such as SAM and VCF, used for high-throughput
3076 sequencing (HTS) data. There are also an number of useful utilities for
3077 manipulating HTS data.")
3078 (license license:expat)))
3079
3080 ;; This is needed for picard 2.10.3
3081 (define-public java-htsjdk-2.10.1
3082 (package (inherit java-htsjdk-latest)
3083 (name "java-htsjdk")
3084 (version "2.10.1")
3085 (source (origin
3086 (method git-fetch)
3087 (uri (git-reference
3088 (url "https://github.com/samtools/htsjdk.git")
3089 (commit version)))
3090 (file-name (string-append name "-" version "-checkout"))
3091 (sha256
3092 (base32
3093 "1kxh7slm2pm3x9p6jxa1wqsq9a31dhiiflhxnxqcisan4k3rwia2"))))
3094 (build-system ant-build-system)
3095 (arguments
3096 `(#:tests? #f ; tests require Scala
3097 #:jdk ,icedtea-8
3098 #:jar-name "htsjdk.jar"
3099 #:phases
3100 (modify-phases %standard-phases
3101 (add-after 'unpack 'remove-useless-build.xml
3102 (lambda _ (delete-file "build.xml") #t))
3103 ;; The tests require the scalatest package.
3104 (add-after 'unpack 'remove-tests
3105 (lambda _ (delete-file-recursively "src/test") #t)))))))
3106
3107 ;; This version matches java-htsjdk 2.3.0. Later versions also require a more
3108 ;; recent version of java-htsjdk, which depends on gradle.
3109 (define-public java-picard
3110 (package
3111 (name "java-picard")
3112 (version "2.3.0")
3113 (source (origin
3114 (method git-fetch)
3115 (uri (git-reference
3116 (url "https://github.com/broadinstitute/picard.git")
3117 (commit version)))
3118 (file-name (string-append "java-picard-" version "-checkout"))
3119 (sha256
3120 (base32
3121 "1ll7mf4r3by92w2nhlmpa591xd1f46xlkwh59mq6fvbb5pdwzvx6"))
3122 (modules '((guix build utils)))
3123 (snippet
3124 '(begin
3125 ;; Delete pre-built binaries.
3126 (delete-file-recursively "lib")
3127 (mkdir-p "lib")
3128 (substitute* "build.xml"
3129 ;; Remove build-time dependency on git.
3130 (("failifexecutionfails=\"true\"")
3131 "failifexecutionfails=\"false\"")
3132 ;; Use our htsjdk.
3133 (("depends=\"compile-htsjdk, ")
3134 "depends=\"")
3135 (("depends=\"compile-htsjdk-tests, ")
3136 "depends=\"")
3137 ;; Build picard-lib.jar before building picard.jar
3138 (("name=\"picard-jar\" depends=\"" line)
3139 (string-append line "picard-lib-jar, ")))
3140 #t))))
3141 (build-system ant-build-system)
3142 (arguments
3143 `(#:build-target "picard-jar"
3144 #:test-target "test"
3145 ;; Tests require jacoco:coverage.
3146 #:tests? #f
3147 #:make-flags
3148 (list (string-append "-Dhtsjdk_lib_dir="
3149 (assoc-ref %build-inputs "java-htsjdk")
3150 "/share/java/htsjdk/")
3151 "-Dhtsjdk-classes=dist/tmp"
3152 (string-append "-Dhtsjdk-version="
3153 ,(package-version java-htsjdk)))
3154 #:jdk ,icedtea-8
3155 #:phases
3156 (modify-phases %standard-phases
3157 (add-after 'unpack 'use-our-htsjdk
3158 (lambda* (#:key inputs #:allow-other-keys)
3159 (substitute* "build.xml"
3160 (("\\$\\{htsjdk\\}/lib")
3161 (string-append (assoc-ref inputs "java-htsjdk")
3162 "/share/java/htsjdk/")))
3163 #t))
3164 (add-after 'unpack 'make-test-target-independent
3165 (lambda* (#:key inputs #:allow-other-keys)
3166 (substitute* "build.xml"
3167 (("name=\"test\" depends=\"compile, ")
3168 "name=\"test\" depends=\""))
3169 #t))
3170 (replace 'install (install-jars "dist")))))
3171 (inputs
3172 `(("java-htsjdk" ,java-htsjdk)
3173 ("java-guava" ,java-guava)))
3174 (native-inputs
3175 `(("java-testng" ,java-testng)))
3176 (home-page "http://broadinstitute.github.io/picard/")
3177 (synopsis "Tools for manipulating high-throughput sequencing data and formats")
3178 (description "Picard is a set of Java command line tools for manipulating
3179 high-throughput sequencing (HTS) data and formats. Picard is implemented
3180 using the HTSJDK Java library to support accessing file formats that are
3181 commonly used for high-throughput sequencing data such as SAM, BAM, CRAM and
3182 VCF.")
3183 (license license:expat)))
3184
3185 ;; This is needed for dropseq-tools
3186 (define-public java-picard-2.10.3
3187 (package
3188 (name "java-picard")
3189 (version "2.10.3")
3190 (source (origin
3191 (method git-fetch)
3192 (uri (git-reference
3193 (url "https://github.com/broadinstitute/picard.git")
3194 (commit version)))
3195 (file-name (string-append "java-picard-" version "-checkout"))
3196 (sha256
3197 (base32
3198 "1ajlx31l6i1k3y2rhnmgq07sz99g2czqfqgkr9mihmdjp3gwjhvi"))))
3199 (build-system ant-build-system)
3200 (arguments
3201 `(#:jar-name "picard.jar"
3202 ;; Tests require jacoco:coverage.
3203 #:tests? #f
3204 #:jdk ,icedtea-8
3205 #:main-class "picard.cmdline.PicardCommandLine"
3206 #:modules ((guix build ant-build-system)
3207 (guix build utils)
3208 (guix build java-utils)
3209 (sxml simple)
3210 (sxml transform)
3211 (sxml xpath))
3212 #:phases
3213 (modify-phases %standard-phases
3214 (add-after 'unpack 'remove-useless-build.xml
3215 (lambda _ (delete-file "build.xml") #t))
3216 ;; This is necessary to ensure that htsjdk is found when using
3217 ;; picard.jar as an executable.
3218 (add-before 'build 'edit-classpath-in-manifest
3219 (lambda* (#:key inputs #:allow-other-keys)
3220 (chmod "build.xml" #o664)
3221 (call-with-output-file "build.xml.new"
3222 (lambda (port)
3223 (sxml->xml
3224 (pre-post-order
3225 (with-input-from-file "build.xml"
3226 (lambda _ (xml->sxml #:trim-whitespace? #t)))
3227 `((target . ,(lambda (tag . kids)
3228 (let ((name ((sxpath '(name *text*))
3229 (car kids)))
3230 ;; FIXME: We're breaking the line
3231 ;; early with a dummy path to
3232 ;; ensure that the store reference
3233 ;; isn't broken apart and can still
3234 ;; be found by the reference
3235 ;; scanner.
3236 (msg (format #f
3237 "\
3238 Class-Path: /~a \
3239 ~a/share/java/htsjdk.jar${line.separator}"
3240 ;; maximum line length is 70
3241 (string-tabulate (const #\b) 57)
3242 (assoc-ref inputs "java-htsjdk"))))
3243 (if (member "manifest" name)
3244 `(,tag ,@kids
3245 (echo
3246 (@ (message ,msg)
3247 (file "${manifest.file}")
3248 (append "true"))))
3249 `(,tag ,@kids)))))
3250 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
3251 (*text* . ,(lambda (_ txt) txt))))
3252 port)))
3253 (rename-file "build.xml.new" "build.xml")
3254 #t)))))
3255 (propagated-inputs
3256 `(("java-htsjdk" ,java-htsjdk-2.10.1)))
3257 (native-inputs
3258 `(("java-testng" ,java-testng)
3259 ("java-guava" ,java-guava)))
3260 (home-page "http://broadinstitute.github.io/picard/")
3261 (synopsis "Tools for manipulating high-throughput sequencing data and formats")
3262 (description "Picard is a set of Java command line tools for manipulating
3263 high-throughput sequencing (HTS) data and formats. Picard is implemented
3264 using the HTSJDK Java library to support accessing file formats that are
3265 commonly used for high-throughput sequencing data such as SAM, BAM, CRAM and
3266 VCF.")
3267 (license license:expat)))
3268
3269 ;; This is the last version of Picard to provide net.sf.samtools
3270 (define-public java-picard-1.113
3271 (package (inherit java-picard)
3272 (name "java-picard")
3273 (version "1.113")
3274 (source (origin
3275 (method git-fetch)
3276 (uri (git-reference
3277 (url "https://github.com/broadinstitute/picard.git")
3278 (commit version)))
3279 (file-name (string-append "java-picard-" version "-checkout"))
3280 (sha256
3281 (base32
3282 "0lkpvin2fz3hhly4l02kk56fqy8lmlgyzr9kmvljk6ry6l1hw973"))
3283 (modules '((guix build utils)))
3284 (snippet
3285 '(begin
3286 ;; Delete pre-built binaries.
3287 (delete-file-recursively "lib")
3288 (mkdir-p "lib")
3289 #t))))
3290 (build-system ant-build-system)
3291 (arguments
3292 `(#:build-target "picard-jar"
3293 #:test-target "test"
3294 ;; FIXME: the class path at test time is wrong.
3295 ;; [testng] Error: A JNI error has occurred, please check your installation and try again
3296 ;; [testng] Exception in thread "main" java.lang.NoClassDefFoundError: com/beust/jcommander/ParameterException
3297 #:tests? #f
3298 #:jdk ,icedtea-8
3299 ;; This is only used for tests.
3300 #:make-flags
3301 (list "-Dsamjdk.intel_deflater_so_path=lib/jni/libIntelDeflater.so")
3302 #:phases
3303 (modify-phases %standard-phases
3304 ;; Do not use bundled ant bzip2.
3305 (add-after 'unpack 'use-ant-bzip
3306 (lambda* (#:key inputs #:allow-other-keys)
3307 (substitute* "build.xml"
3308 (("\\$\\{lib\\}/apache-ant-1.8.2-bzip2.jar")
3309 (string-append (assoc-ref inputs "ant")
3310 "/lib/ant.jar")))
3311 #t))
3312 (add-after 'unpack 'make-test-target-independent
3313 (lambda* (#:key inputs #:allow-other-keys)
3314 (substitute* "build.xml"
3315 (("name=\"test\" depends=\"compile, ")
3316 "name=\"test\" depends=\"compile-tests, ")
3317 (("name=\"compile\" depends=\"compile-src, compile-tests\"")
3318 "name=\"compile\" depends=\"compile-src\""))
3319 #t))
3320 (add-after 'unpack 'fix-deflater-path
3321 (lambda* (#:key outputs #:allow-other-keys)
3322 (substitute* "src/java/net/sf/samtools/Defaults.java"
3323 (("getStringProperty\\(\"intel_deflater_so_path\", null\\)")
3324 (string-append "getStringProperty(\"intel_deflater_so_path\", \""
3325 (assoc-ref outputs "out")
3326 "/lib/jni/libIntelDeflater.so"
3327 "\")")))
3328 #t))
3329 ;; Build the deflater library, because we've previously deleted the
3330 ;; pre-built one. This can only be built with access to the JDK
3331 ;; sources.
3332 (add-after 'build 'build-jni
3333 (lambda* (#:key inputs #:allow-other-keys)
3334 (mkdir-p "lib/jni")
3335 (mkdir-p "jdk-src")
3336 (and (zero? (system* "tar" "--strip-components=1" "-C" "jdk-src"
3337 "-xf" (assoc-ref inputs "jdk-src")))
3338 (zero? (system* "javah" "-jni"
3339 "-classpath" "classes"
3340 "-d" "lib/"
3341 "net.sf.samtools.util.zip.IntelDeflater"))
3342 (with-directory-excursion "src/c/inteldeflater"
3343 (zero? (system* "gcc" "-I../../../lib" "-I."
3344 (string-append "-I" (assoc-ref inputs "jdk")
3345 "/include/linux")
3346 "-I../../../jdk-src/src/share/native/common/"
3347 "-I../../../jdk-src/src/solaris/native/common/"
3348 "-c" "-O3" "-fPIC" "IntelDeflater.c"))
3349 (zero? (system* "gcc" "-shared"
3350 "-o" "../../../lib/jni/libIntelDeflater.so"
3351 "IntelDeflater.o" "-lz" "-lstdc++"))))))
3352 ;; We can only build everything else after building the JNI library.
3353 (add-after 'build-jni 'build-rest
3354 (lambda* (#:key make-flags #:allow-other-keys)
3355 (zero? (apply system* `("ant" "all" ,@make-flags)))))
3356 (add-before 'build 'set-JAVA6_HOME
3357 (lambda _
3358 (setenv "JAVA6_HOME" (getenv "JAVA_HOME"))
3359 #t))
3360 (replace 'install (install-jars "dist"))
3361 (add-after 'install 'install-jni-lib
3362 (lambda* (#:key outputs #:allow-other-keys)
3363 (let ((jni (string-append (assoc-ref outputs "out")
3364 "/lib/jni")))
3365 (mkdir-p jni)
3366 (install-file "lib/jni/libIntelDeflater.so" jni)
3367 #t))))))
3368 (inputs
3369 `(("java-snappy-1" ,java-snappy-1)
3370 ("java-commons-jexl-2" ,java-commons-jexl-2)
3371 ("java-cofoja" ,java-cofoja)
3372 ("ant" ,ant) ; for bzip2 support at runtime
3373 ("zlib" ,zlib)))
3374 (native-inputs
3375 `(("ant-apache-bcel" ,ant-apache-bcel)
3376 ("ant-junit" ,ant-junit)
3377 ("java-testng" ,java-testng)
3378 ("java-commons-bcel" ,java-commons-bcel)
3379 ("java-jcommander" ,java-jcommander)
3380 ("jdk" ,icedtea-8 "jdk")
3381 ("jdk-src" ,(car (assoc-ref (package-native-inputs icedtea-8) "jdk-drop")))))))
3382
3383 (define-public fastqc
3384 (package
3385 (name "fastqc")
3386 (version "0.11.5")
3387 (source
3388 (origin
3389 (method url-fetch)
3390 (uri (string-append "http://www.bioinformatics.babraham.ac.uk/"
3391 "projects/fastqc/fastqc_v"
3392 version "_source.zip"))
3393 (sha256
3394 (base32
3395 "18rrlkhcrxvvvlapch4dpj6xc6mpayzys8qfppybi8jrpgx5cc5f"))))
3396 (build-system ant-build-system)
3397 (arguments
3398 `(#:tests? #f ; there are no tests
3399 #:build-target "build"
3400 #:phases
3401 (modify-phases %standard-phases
3402 (add-after 'unpack 'fix-dependencies
3403 (lambda* (#:key inputs #:allow-other-keys)
3404 (substitute* "build.xml"
3405 (("jbzip2-0.9.jar")
3406 (string-append (assoc-ref inputs "java-jbzip2")
3407 "/share/java/jbzip2.jar"))
3408 (("sam-1.103.jar")
3409 (string-append (assoc-ref inputs "java-picard-1.113")
3410 "/share/java/sam-1.112.jar"))
3411 (("cisd-jhdf5.jar")
3412 (string-append (assoc-ref inputs "java-cisd-jhdf5")
3413 "/share/java/sis-jhdf5.jar")))
3414 #t))
3415 ;; There is no installation target
3416 (replace 'install
3417 (lambda* (#:key inputs outputs #:allow-other-keys)
3418 (let* ((out (assoc-ref outputs "out"))
3419 (bin (string-append out "/bin"))
3420 (share (string-append out "/share/fastqc/"))
3421 (exe (string-append share "/fastqc")))
3422 (for-each mkdir-p (list bin share))
3423 (copy-recursively "bin" share)
3424 (substitute* exe
3425 (("my \\$java_bin = 'java';")
3426 (string-append "my $java_bin = '"
3427 (assoc-ref inputs "java")
3428 "/bin/java';")))
3429 (chmod exe #o555)
3430 (symlink exe (string-append bin "/fastqc"))
3431 #t))))))
3432 (inputs
3433 `(("java" ,icedtea)
3434 ("perl" ,perl) ; needed for the wrapper script
3435 ("java-cisd-jhdf5" ,java-cisd-jhdf5)
3436 ("java-picard-1.113" ,java-picard-1.113)
3437 ("java-jbzip2" ,java-jbzip2)))
3438 (native-inputs
3439 `(("unzip" ,unzip)))
3440 (home-page "http://www.bioinformatics.babraham.ac.uk/projects/fastqc/")
3441 (synopsis "Quality control tool for high throughput sequence data")
3442 (description
3443 "FastQC aims to provide a simple way to do some quality control
3444 checks on raw sequence data coming from high throughput sequencing
3445 pipelines. It provides a modular set of analyses which you can use to
3446 give a quick impression of whether your data has any problems of which
3447 you should be aware before doing any further analysis.
3448
3449 The main functions of FastQC are:
3450
3451 @itemize
3452 @item Import of data from BAM, SAM or FastQ files (any variant);
3453 @item Providing a quick overview to tell you in which areas there may
3454 be problems;
3455 @item Summary graphs and tables to quickly assess your data;
3456 @item Export of results to an HTML based permanent report;
3457 @item Offline operation to allow automated generation of reports
3458 without running the interactive application.
3459 @end itemize\n")
3460 (license license:gpl3+)))
3461
3462 (define-public htslib
3463 (package
3464 (name "htslib")
3465 (version "1.8")
3466 (source (origin
3467 (method url-fetch)
3468 (uri (string-append
3469 "https://github.com/samtools/htslib/releases/download/"
3470 version "/htslib-" version ".tar.bz2"))
3471 (sha256
3472 (base32
3473 "18bw0mn9pj5wgarnlaxmf1bb8pdqgl1zd6czirqcr62ajpn1xvy0"))))
3474 (build-system gnu-build-system)
3475 (inputs
3476 `(("openssl" ,openssl)
3477 ("curl" ,curl)
3478 ("zlib" ,zlib)))
3479 (native-inputs
3480 `(("perl" ,perl)))
3481 (home-page "http://www.htslib.org")
3482 (synopsis "C library for reading/writing high-throughput sequencing data")
3483 (description
3484 "HTSlib is a C library for reading/writing high-throughput sequencing
3485 data. It also provides the @command{bgzip}, @command{htsfile}, and
3486 @command{tabix} utilities.")
3487 ;; Files under cram/ are released under the modified BSD license;
3488 ;; the rest is released under the Expat license
3489 (license (list license:expat license:bsd-3))))
3490
3491 ;; This package should be removed once no packages rely upon it.
3492 (define htslib-1.3
3493 (package
3494 (inherit htslib)
3495 (version "1.3.1")
3496 (source (origin
3497 (method url-fetch)
3498 (uri (string-append
3499 "https://github.com/samtools/htslib/releases/download/"
3500 version "/htslib-" version ".tar.bz2"))
3501 (sha256
3502 (base32
3503 "1rja282fwdc25ql6izkhdyh8ppw8x2fs0w0js78zgkmqjlikmma9"))))))
3504
3505 (define-public idr
3506 (package
3507 (name "idr")
3508 (version "2.0.3")
3509 (source (origin
3510 (method url-fetch)
3511 (uri (string-append
3512 "https://github.com/nboley/idr/archive/"
3513 version ".tar.gz"))
3514 (file-name (string-append name "-" version ".tar.gz"))
3515 (sha256
3516 (base32
3517 "1rjdly6daslw66r43g9md8znizlscn1sphycqyldzsidkc4vxqv3"))
3518 ;; Delete generated C code.
3519 (snippet
3520 '(begin (delete-file "idr/inv_cdf.c") #t))))
3521 (build-system python-build-system)
3522 ;; There is only one test ("test_inv_cdf.py") and it tests features that
3523 ;; are no longer part of this package. It also asserts False, which
3524 ;; causes the tests to always fail.
3525 (arguments `(#:tests? #f))
3526 (propagated-inputs
3527 `(("python-scipy" ,python-scipy)
3528 ("python-sympy" ,python-sympy)
3529 ("python-numpy" ,python-numpy)
3530 ("python-matplotlib" ,python-matplotlib)))
3531 (native-inputs
3532 `(("python-cython" ,python-cython)))
3533 (home-page "https://github.com/nboley/idr")
3534 (synopsis "Tool to measure the irreproducible discovery rate (IDR)")
3535 (description
3536 "The IDR (Irreproducible Discovery Rate) framework is a unified approach
3537 to measure the reproducibility of findings identified from replicate
3538 experiments and provide highly stable thresholds based on reproducibility.")
3539 (license license:gpl2+)))
3540
3541 (define-public jellyfish
3542 (package
3543 (name "jellyfish")
3544 (version "2.2.7")
3545 (source (origin
3546 (method url-fetch)
3547 (uri (string-append "https://github.com/gmarcais/Jellyfish/"
3548 "releases/download/v" version
3549 "/jellyfish-" version ".tar.gz"))
3550 (sha256
3551 (base32
3552 "1a1iwq9pq54k2m9ypvwl5s0bqfl64gwh9dx5af9i382ajas2016q"))))
3553 (build-system gnu-build-system)
3554 (outputs '("out" ;for library
3555 "ruby" ;for Ruby bindings
3556 "python")) ;for Python bindings
3557 (arguments
3558 `(#:configure-flags
3559 (list (string-append "--enable-ruby-binding="
3560 (assoc-ref %outputs "ruby"))
3561 (string-append "--enable-python-binding="
3562 (assoc-ref %outputs "python")))
3563 #:phases
3564 (modify-phases %standard-phases
3565 (add-before 'check 'set-SHELL-variable
3566 (lambda _
3567 ;; generator_manager.hpp either uses /bin/sh or $SHELL
3568 ;; to run tests.
3569 (setenv "SHELL" (which "bash"))
3570 #t)))))
3571 (native-inputs
3572 `(("bc" ,bc)
3573 ("time" ,time)
3574 ("ruby" ,ruby)
3575 ("python" ,python-2)
3576 ("pkg-config" ,pkg-config)))
3577 (inputs
3578 `(("htslib" ,htslib)))
3579 (synopsis "Tool for fast counting of k-mers in DNA")
3580 (description
3581 "Jellyfish is a tool for fast, memory-efficient counting of k-mers in
3582 DNA. A k-mer is a substring of length k, and counting the occurrences of all
3583 such substrings is a central step in many analyses of DNA sequence. Jellyfish
3584 is a command-line program that reads FASTA and multi-FASTA files containing
3585 DNA sequences. It outputs its k-mer counts in a binary format, which can be
3586 translated into a human-readable text format using the @code{jellyfish dump}
3587 command, or queried for specific k-mers with @code{jellyfish query}.")
3588 (home-page "http://www.genome.umd.edu/jellyfish.html")
3589 ;; From their website: JELLYFISH runs on 64-bit Intel-compatible processors
3590 (supported-systems '("x86_64-linux"))
3591 ;; The combined work is published under the GPLv3 or later. Individual
3592 ;; files such as lib/jsoncpp.cpp are released under the Expat license.
3593 (license (list license:gpl3+ license:expat))))
3594
3595 (define-public khmer
3596 (package
3597 (name "khmer")
3598 (version "2.0")
3599 (source
3600 (origin
3601 (method url-fetch)
3602 (uri (pypi-uri "khmer" version))
3603 (sha256
3604 (base32
3605 "0wb05shqh77v00256qlm68vbbx3kl76fyzihszbz5nhanl4ni33a"))
3606 (patches (search-patches "khmer-use-libraries.patch"))))
3607 (build-system python-build-system)
3608 (arguments
3609 `(#:phases
3610 (modify-phases %standard-phases
3611 (add-after 'unpack 'set-paths
3612 (lambda* (#:key inputs outputs #:allow-other-keys)
3613 ;; Delete bundled libraries.
3614 (delete-file-recursively "third-party/zlib")
3615 (delete-file-recursively "third-party/bzip2")
3616 ;; Replace bundled seqan.
3617 (let* ((seqan-all "third-party/seqan")
3618 (seqan-include (string-append
3619 seqan-all "/core/include")))
3620 (delete-file-recursively seqan-all)
3621 (copy-recursively (string-append (assoc-ref inputs "seqan")
3622 "/include/seqan")
3623 (string-append seqan-include "/seqan")))
3624 ;; We do not replace the bundled MurmurHash as the canonical
3625 ;; repository for this code 'SMHasher' is unsuitable for
3626 ;; providing a library. See
3627 ;; https://lists.gnu.org/archive/html/guix-devel/2016-06/msg00977.html
3628 #t))
3629 (add-after 'unpack 'set-cc
3630 (lambda _
3631 (setenv "CC" "gcc")
3632 #t))
3633 ;; It is simpler to test after installation.
3634 (delete 'check)
3635 (add-after 'install 'post-install-check
3636 (lambda* (#:key inputs outputs #:allow-other-keys)
3637 (let ((out (assoc-ref outputs "out")))
3638 (setenv "PATH"
3639 (string-append
3640 (getenv "PATH")
3641 ":"
3642 (assoc-ref outputs "out")
3643 "/bin"))
3644 (setenv "PYTHONPATH"
3645 (string-append
3646 (getenv "PYTHONPATH")
3647 ":"
3648 out
3649 "/lib/python"
3650 (string-take (string-take-right
3651 (assoc-ref inputs "python") 5) 3)
3652 "/site-packages"))
3653 (with-directory-excursion "build"
3654 (zero? (system* "nosetests" "khmer" "--attr"
3655 "!known_failing")))))))))
3656 (native-inputs
3657 `(("seqan" ,seqan)
3658 ("python-nose" ,python-nose)))
3659 (inputs
3660 `(("zlib" ,zlib)
3661 ("bzip2" ,bzip2)
3662 ("python-screed" ,python-screed)
3663 ("python-bz2file" ,python-bz2file)
3664 ;; Tests fail when gcc-5 is used for compilation. Use gcc-4.9 at least
3665 ;; until the next version of khmer (likely 2.1) is released.
3666 ("gcc" ,gcc-4.9)))
3667 (home-page "https://khmer.readthedocs.org/")
3668 (synopsis "K-mer counting, filtering and graph traversal library")
3669 (description "The khmer software is a set of command-line tools for
3670 working with DNA shotgun sequencing data from genomes, transcriptomes,
3671 metagenomes and single cells. Khmer can make de novo assemblies faster, and
3672 sometimes better. Khmer can also identify and fix problems with shotgun
3673 data.")
3674 ;; When building on i686, armhf and mips64el, we get the following error:
3675 ;; error: ['khmer', 'khmer.tests', 'oxli'] require 64-bit operating system
3676 (supported-systems '("x86_64-linux" "aarch64-linux"))
3677 (license license:bsd-3)))
3678
3679 (define-public kaiju
3680 (package
3681 (name "kaiju")
3682 (version "1.6.2")
3683 (source (origin
3684 (method url-fetch)
3685 (uri (string-append
3686 "https://github.com/bioinformatics-centre/kaiju/archive/v"
3687 version ".tar.gz"))
3688 (file-name (string-append name "-" version ".tar.gz"))
3689 (sha256
3690 (base32
3691 "1kdn4rxs0kr9ibmrgrfcci71aa6j6gr71dbc8pff7731rpab6kj7"))))
3692 (build-system gnu-build-system)
3693 (arguments
3694 `(#:tests? #f ; There are no tests.
3695 #:phases
3696 (modify-phases %standard-phases
3697 (delete 'configure)
3698 (add-before 'build 'move-to-src-dir
3699 (lambda _ (chdir "src") #t))
3700 (replace 'install
3701 (lambda* (#:key inputs outputs #:allow-other-keys)
3702 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
3703 (mkdir-p bin)
3704 (chdir "..")
3705 (copy-recursively "bin" bin)
3706 (copy-recursively "util" bin))
3707 #t)))))
3708 (inputs
3709 `(("perl" ,perl)
3710 ("zlib" ,zlib)))
3711 (home-page "http://kaiju.binf.ku.dk/")
3712 (synopsis "Fast and sensitive taxonomic classification for metagenomics")
3713 (description "Kaiju is a program for sensitive taxonomic classification
3714 of high-throughput sequencing reads from metagenomic whole genome sequencing
3715 experiments.")
3716 (license license:gpl3+)))
3717
3718 (define-public macs
3719 (package
3720 (name "macs")
3721 (version "2.1.0.20151222")
3722 (source (origin
3723 (method url-fetch)
3724 (uri (pypi-uri "MACS2" version))
3725 (sha256
3726 (base32
3727 "1r2hcz6irhcq7lwbafjks98jbn34hv05avgbdjnp6w6mlfjkf8x5"))))
3728 (build-system python-build-system)
3729 (arguments
3730 `(#:python ,python-2 ; only compatible with Python 2.7
3731 #:tests? #f)) ; no test target
3732 (inputs
3733 `(("python-numpy" ,python2-numpy)))
3734 (home-page "https://github.com/taoliu/MACS/")
3735 (synopsis "Model based analysis for ChIP-Seq data")
3736 (description
3737 "MACS is an implementation of a ChIP-Seq analysis algorithm for
3738 identifying transcript factor binding sites named Model-based Analysis of
3739 ChIP-Seq (MACS). MACS captures the influence of genome complexity to evaluate
3740 the significance of enriched ChIP regions and it improves the spatial
3741 resolution of binding sites through combining the information of both
3742 sequencing tag position and orientation.")
3743 (license license:bsd-3)))
3744
3745 (define-public mafft
3746 (package
3747 (name "mafft")
3748 (version "7.394")
3749 (source (origin
3750 (method url-fetch)
3751 (uri (string-append
3752 "https://mafft.cbrc.jp/alignment/software/mafft-" version
3753 "-without-extensions-src.tgz"))
3754 (file-name (string-append name "-" version ".tgz"))
3755 (sha256
3756 (base32
3757 "0bacjkxfg944p5khhyh5rd4y7wkjc9qk4v2jjj442sqlq0f8ar7b"))))
3758 (build-system gnu-build-system)
3759 (arguments
3760 `(#:tests? #f ; no automated tests, though there are tests in the read me
3761 #:make-flags (let ((out (assoc-ref %outputs "out")))
3762 (list (string-append "PREFIX=" out)
3763 (string-append "BINDIR="
3764 (string-append out "/bin"))))
3765 #:phases
3766 (modify-phases %standard-phases
3767 (add-after 'unpack 'enter-dir
3768 (lambda _ (chdir "core") #t))
3769 (add-after 'enter-dir 'patch-makefile
3770 (lambda _
3771 ;; on advice from the MAFFT authors, there is no need to
3772 ;; distribute mafft-profile, mafft-distance, or
3773 ;; mafft-homologs.rb as they are too "specialised".
3774 (substitute* "Makefile"
3775 ;; remove mafft-homologs.rb from SCRIPTS
3776 (("^SCRIPTS = mafft mafft-homologs.rb")
3777 "SCRIPTS = mafft")
3778 ;; remove mafft-homologs from MANPAGES
3779 (("^MANPAGES = mafft.1 mafft-homologs.1")
3780 "MANPAGES = mafft.1")
3781 ;; remove mafft-distance from PROGS
3782 (("^PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance")
3783 "PROGS = dvtditr dndfast7 dndblast sextet5")
3784 ;; remove mafft-profile from PROGS
3785 (("splittbfast disttbfast tbfast mafft-profile 2cl mccaskillwrap")
3786 "splittbfast disttbfast tbfast f2cl mccaskillwrap")
3787 (("^rm -f mafft-profile mafft-profile.exe") "#")
3788 (("^rm -f mafft-distance mafft-distance.exe") ")#")
3789 ;; do not install MAN pages in libexec folder
3790 (("^\t\\$\\(INSTALL\\) -m 644 \\$\\(MANPAGES\\) \
3791 \\$\\(DESTDIR\\)\\$\\(LIBDIR\\)") "#"))
3792 #t))
3793 (add-after 'enter-dir 'patch-paths
3794 (lambda* (#:key inputs #:allow-other-keys)
3795 (substitute* '("pairash.c"
3796 "mafft.tmpl")
3797 (("perl") (which "perl"))
3798 (("([\"`| ])awk" _ prefix)
3799 (string-append prefix (which "awk")))
3800 (("grep") (which "grep")))
3801 #t))
3802 (delete 'configure)
3803 (add-after 'install 'wrap-programs
3804 (lambda* (#:key outputs #:allow-other-keys)
3805 (let* ((out (assoc-ref outputs "out"))
3806 (bin (string-append out "/bin"))
3807 (path (string-append
3808 (assoc-ref %build-inputs "coreutils") "/bin:")))
3809 (for-each (lambda (file)
3810 (wrap-program file
3811 `("PATH" ":" prefix (,path))))
3812 (find-files bin)))
3813 #t)))))
3814 (inputs
3815 `(("perl" ,perl)
3816 ("ruby" ,ruby)
3817 ("gawk" ,gawk)
3818 ("grep" ,grep)
3819 ("coreutils" ,coreutils)))
3820 (home-page "http://mafft.cbrc.jp/alignment/software/")
3821 (synopsis "Multiple sequence alignment program")
3822 (description
3823 "MAFFT offers a range of multiple alignment methods for nucleotide and
3824 protein sequences. For instance, it offers L-INS-i (accurate; for alignment
3825 of <~200 sequences) and FFT-NS-2 (fast; for alignment of <~30,000
3826 sequences).")
3827 (license (license:non-copyleft
3828 "http://mafft.cbrc.jp/alignment/software/license.txt"
3829 "BSD-3 with different formatting"))))
3830
3831 (define-public mash
3832 (package
3833 (name "mash")
3834 (version "2.0")
3835 (source (origin
3836 (method url-fetch)
3837 (uri (string-append
3838 "https://github.com/marbl/mash/archive/v"
3839 version ".tar.gz"))
3840 (file-name (string-append name "-" version ".tar.gz"))
3841 (sha256
3842 (base32
3843 "00fx14vpmgsijwxd1xql3if934l82v8ckqgjjyyhnr36qb9qrskv"))
3844 (modules '((guix build utils)))
3845 (snippet
3846 '(begin
3847 ;; Delete bundled kseq.
3848 ;; TODO: Also delete bundled murmurhash and open bloom filter.
3849 (delete-file "src/mash/kseq.h")
3850 #t))))
3851 (build-system gnu-build-system)
3852 (arguments
3853 `(#:tests? #f ; No tests.
3854 #:configure-flags
3855 (list
3856 (string-append "--with-capnp=" (assoc-ref %build-inputs "capnproto"))
3857 (string-append "--with-gsl=" (assoc-ref %build-inputs "gsl")))
3858 #:make-flags (list "CC=gcc")
3859 #:phases
3860 (modify-phases %standard-phases
3861 (add-after 'unpack 'fix-includes
3862 (lambda _
3863 (substitute* '("src/mash/Sketch.cpp"
3864 "src/mash/CommandFind.cpp"
3865 "src/mash/CommandScreen.cpp")
3866 (("^#include \"kseq\\.h\"")
3867 "#include \"htslib/kseq.h\""))
3868 #t))
3869 (add-after 'fix-includes 'autoconf
3870 (lambda _ (zero? (system* "autoconf")))))))
3871 (native-inputs
3872 `(("autoconf" ,autoconf)
3873 ;; Capnproto and htslib are statically embedded in the final
3874 ;; application. Therefore we also list their licenses, below.
3875 ("capnproto" ,capnproto)
3876 ("htslib" ,htslib)))
3877 (inputs
3878 `(("gsl" ,gsl)
3879 ("zlib" ,zlib)))
3880 (supported-systems '("x86_64-linux"))
3881 (home-page "https://mash.readthedocs.io")
3882 (synopsis "Fast genome and metagenome distance estimation using MinHash")
3883 (description "Mash is a fast sequence distance estimator that uses the
3884 MinHash algorithm and is designed to work with genomes and metagenomes in the
3885 form of assemblies or reads.")
3886 (license (list license:bsd-3 ; Mash
3887 license:expat ; HTSlib and capnproto
3888 license:public-domain ; MurmurHash 3
3889 license:cpl1.0)))) ; Open Bloom Filter
3890
3891 (define-public metabat
3892 (package
3893 (name "metabat")
3894 (version "2.12.1")
3895 (source
3896 (origin
3897 (method url-fetch)
3898 (uri (string-append "https://bitbucket.org/berkeleylab/metabat/get/v"
3899 version ".tar.gz"))
3900 (file-name (string-append name "-" version ".tar.gz"))
3901 (sha256
3902 (base32
3903 "1hmvdalz3zj5sqqklg0l4npjdv37cv2hsdi1al9iby2ndxjs1b73"))
3904 (patches (search-patches "metabat-fix-compilation.patch"))))
3905 (build-system scons-build-system)
3906 (arguments
3907 `(#:scons ,scons-python2
3908 #:scons-flags
3909 (list (string-append "PREFIX=" (assoc-ref %outputs "out"))
3910 (string-append "BOOST_ROOT=" (assoc-ref %build-inputs "boost")))
3911 #:tests? #f ;; Tests are run during the build phase.
3912 #:phases
3913 (modify-phases %standard-phases
3914 (add-after 'unpack 'fix-includes
3915 (lambda _
3916 (substitute* "src/BamUtils.h"
3917 (("^#include \"bam/bam\\.h\"")
3918 "#include \"samtools/bam.h\"")
3919 (("^#include \"bam/sam\\.h\"")
3920 "#include \"samtools/sam.h\""))
3921 (substitute* "src/KseqReader.h"
3922 (("^#include \"bam/kseq\\.h\"")
3923 "#include \"htslib/kseq.h\""))
3924 #t))
3925 (add-after 'unpack 'fix-scons
3926 (lambda* (#:key inputs #:allow-other-keys)
3927 (substitute* "SConstruct"
3928 (("^htslib_dir += 'samtools'")
3929 (string-append "htslib_dir = '"
3930 (assoc-ref inputs "htslib")
3931 "'"))
3932 (("^samtools_dir = 'samtools'")
3933 (string-append "samtools_dir = '"
3934 (assoc-ref inputs "samtools")
3935 "'"))
3936 (("^findStaticOrShared\\('bam', hts_lib")
3937 (string-append "findStaticOrShared('bam', '"
3938 (assoc-ref inputs "samtools")
3939 "/lib'"))
3940 ;; Do not distribute README.
3941 (("^env\\.Install\\(idir_prefix, 'README\\.md'\\)") ""))
3942 #t)))))
3943 (inputs
3944 `(("zlib" ,zlib)
3945 ("perl" ,perl)
3946 ("samtools" ,samtools)
3947 ("htslib" ,htslib)
3948 ("boost" ,boost)))
3949 (home-page "https://bitbucket.org/berkeleylab/metabat")
3950 (synopsis
3951 "Reconstruction of single genomes from complex microbial communities")
3952 (description
3953 "Grouping large genomic fragments assembled from shotgun metagenomic
3954 sequences to deconvolute complex microbial communities, or metagenome binning,
3955 enables the study of individual organisms and their interactions. MetaBAT is
3956 an automated metagenome binning software, which integrates empirical
3957 probabilistic distances of genome abundance and tetranucleotide frequency.")
3958 ;; The source code contains inline assembly.
3959 (supported-systems '("x86_64-linux" "i686-linux"))
3960 (license (license:non-copyleft "file://license.txt"
3961 "See license.txt in the distribution."))))
3962
3963 (define-public minced
3964 (package
3965 (name "minced")
3966 (version "0.2.0")
3967 (source (origin
3968 (method url-fetch)
3969 (uri (string-append
3970 "https://github.com/ctSkennerton/minced/archive/"
3971 version ".tar.gz"))
3972 (file-name (string-append name "-" version ".tar.gz"))
3973 (sha256
3974 (base32
3975 "0wxmlsapxfpxfd3ps9636h7i2xy6la8i42mwh0j2lsky63h63jp1"))))
3976 (build-system gnu-build-system)
3977 (arguments
3978 `(#:test-target "test"
3979 #:phases
3980 (modify-phases %standard-phases
3981 (delete 'configure)
3982 (add-before 'check 'fix-test
3983 (lambda _
3984 ;; Fix test for latest version.
3985 (substitute* "t/Aquifex_aeolicus_VF5.expected"
3986 (("minced:0.1.6") "minced:0.2.0"))
3987 #t))
3988 (replace 'install ; No install target.
3989 (lambda* (#:key inputs outputs #:allow-other-keys)
3990 (let* ((out (assoc-ref outputs "out"))
3991 (bin (string-append out "/bin"))
3992 (wrapper (string-append bin "/minced")))
3993 ;; Minced comes with a wrapper script that tries to figure out where
3994 ;; it is located before running the JAR. Since these paths are known
3995 ;; to us, we build our own wrapper to avoid coreutils dependency.
3996 (install-file "minced.jar" bin)
3997 (with-output-to-file wrapper
3998 (lambda _
3999 (display
4000 (string-append
4001 "#!" (assoc-ref inputs "bash") "/bin/sh\n\n"
4002 (assoc-ref inputs "jre") "/bin/java -jar "
4003 bin "/minced.jar \"$@\"\n"))))
4004 (chmod wrapper #o555)))))))
4005 (native-inputs
4006 `(("jdk" ,icedtea "jdk")))
4007 (inputs
4008 `(("bash" ,bash)
4009 ("jre" ,icedtea "out")))
4010 (home-page "https://github.com/ctSkennerton/minced")
4011 (synopsis "Mining CRISPRs in Environmental Datasets")
4012 (description
4013 "MinCED is a program to find Clustered Regularly Interspaced Short
4014 Palindromic Repeats (CRISPRs) in DNA sequences. It can be used for
4015 unassembled metagenomic reads, but is mainly designed for full genomes and
4016 assembled metagenomic sequence.")
4017 (license license:gpl3+)))
4018
4019 (define-public miso
4020 (package
4021 (name "miso")
4022 (version "0.5.4")
4023 (source (origin
4024 (method url-fetch)
4025 (uri (pypi-uri "misopy" version))
4026 (sha256
4027 (base32
4028 "1z3x0vd8ma7pdrnywj7i3kgwl89sdkwrrn62zl7r5calqaq2hyip"))
4029 (modules '((guix build utils)))
4030 (snippet '(begin
4031 (substitute* "setup.py"
4032 ;; Use setuptools, or else the executables are not
4033 ;; installed.
4034 (("distutils.core") "setuptools")
4035 ;; use "gcc" instead of "cc" for compilation
4036 (("^defines")
4037 "cc.set_executables(
4038 compiler='gcc',
4039 compiler_so='gcc',
4040 linker_exe='gcc',
4041 linker_so='gcc -shared'); defines"))
4042 #t))))
4043 (build-system python-build-system)
4044 (arguments
4045 `(#:python ,python-2 ; only Python 2 is supported
4046 #:tests? #f)) ; no "test" target
4047 (inputs
4048 `(("samtools" ,samtools)
4049 ("python-numpy" ,python2-numpy)
4050 ("python-pysam" ,python2-pysam)
4051 ("python-scipy" ,python2-scipy)
4052 ("python-matplotlib" ,python2-matplotlib)))
4053 (native-inputs
4054 `(("python-mock" ,python2-mock) ;for tests
4055 ("python-pytz" ,python2-pytz))) ;for tests
4056 (home-page "http://genes.mit.edu/burgelab/miso/index.html")
4057 (synopsis "Mixture of Isoforms model for RNA-Seq isoform quantitation")
4058 (description
4059 "MISO (Mixture-of-Isoforms) is a probabilistic framework that quantitates
4060 the expression level of alternatively spliced genes from RNA-Seq data, and
4061 identifies differentially regulated isoforms or exons across samples. By
4062 modeling the generative process by which reads are produced from isoforms in
4063 RNA-Seq, the MISO model uses Bayesian inference to compute the probability
4064 that a read originated from a particular isoform.")
4065 (license license:gpl2)))
4066
4067 (define-public muscle
4068 (package
4069 (name "muscle")
4070 (version "3.8.1551")
4071 (source (origin
4072 (method url-fetch/tarbomb)
4073 (uri (string-append
4074 "http://www.drive5.com/muscle/muscle_src_"
4075 version ".tar.gz"))
4076 (sha256
4077 (base32
4078 "0bj8kj7sdizy3987zx6w7axihk40fk8rn76mpbqqjcnd64i5a367"))))
4079 (build-system gnu-build-system)
4080 (arguments
4081 `(#:make-flags (list "LDLIBS = -lm")
4082 #:phases
4083 (modify-phases %standard-phases
4084 (delete 'configure)
4085 (replace 'check
4086 ;; There are no tests, so just test if it runs.
4087 (lambda _ (zero? (system* "./muscle" "-version"))))
4088 (replace 'install
4089 (lambda* (#:key outputs #:allow-other-keys)
4090 (let* ((out (assoc-ref outputs "out"))
4091 (bin (string-append out "/bin")))
4092 (install-file "muscle" bin)))))))
4093 (home-page "http://www.drive5.com/muscle")
4094 (synopsis "Multiple sequence alignment program")
4095 (description
4096 "MUSCLE aims to be a fast and accurate multiple sequence alignment
4097 program for nucleotide and protein sequences.")
4098 ;; License information found in 'muscle -h' and usage.cpp.
4099 (license license:public-domain)))
4100
4101 (define-public newick-utils
4102 ;; There are no recent releases so we package from git.
4103 (let ((commit "da121155a977197cab9fbb15953ca1b40b11eb87"))
4104 (package
4105 (name "newick-utils")
4106 (version (string-append "1.6-1." (string-take commit 8)))
4107 (source (origin
4108 (method git-fetch)
4109 (uri (git-reference
4110 (url "https://github.com/tjunier/newick_utils.git")
4111 (commit commit)))
4112 (file-name (string-append name "-" version "-checkout"))
4113 (sha256
4114 (base32
4115 "1hkw21rq1mwf7xp0rmbb2gqc0i6p11108m69i7mr7xcjl268pxnb"))))
4116 (build-system gnu-build-system)
4117 (arguments
4118 `(#:phases
4119 (modify-phases %standard-phases
4120 (add-after 'unpack 'autoconf
4121 (lambda _ (zero? (system* "autoreconf" "-vif")))))))
4122 (inputs
4123 ;; XXX: TODO: Enable Lua and Guile bindings.
4124 ;; https://github.com/tjunier/newick_utils/issues/13
4125 `(("libxml2" ,libxml2)
4126 ("flex" ,flex)
4127 ("bison" ,bison)))
4128 (native-inputs
4129 `(("autoconf" ,autoconf)
4130 ("automake" ,automake)
4131 ("libtool" ,libtool)))
4132 (synopsis "Programs for working with newick format phylogenetic trees")
4133 (description
4134 "Newick-utils is a suite of utilities for processing phylogenetic trees
4135 in Newick format. Functions include re-rooting, extracting subtrees,
4136 trimming, pruning, condensing, drawing (ASCII graphics or SVG).")
4137 (home-page "https://github.com/tjunier/newick_utils")
4138 (license license:bsd-3))))
4139
4140 (define-public orfm
4141 (package
4142 (name "orfm")
4143 (version "0.7.1")
4144 (source (origin
4145 (method url-fetch)
4146 (uri (string-append
4147 "https://github.com/wwood/OrfM/releases/download/v"
4148 version "/orfm-" version ".tar.gz"))
4149 (sha256
4150 (base32
4151 "16iigyr2gd8x0imzkk1dr3k5xsds9bpmwg31ayvjg0f4pir9rwqr"))))
4152 (build-system gnu-build-system)
4153 (inputs `(("zlib" ,zlib)))
4154 (native-inputs
4155 `(("ruby-bio-commandeer" ,ruby-bio-commandeer)
4156 ("ruby-rspec" ,ruby-rspec)
4157 ("ruby" ,ruby)))
4158 (synopsis "Simple and not slow open reading frame (ORF) caller")
4159 (description
4160 "An ORF caller finds stretches of DNA that, when translated, are not
4161 interrupted by stop codons. OrfM finds and prints these ORFs.")
4162 (home-page "https://github.com/wwood/OrfM")
4163 (license license:lgpl3+)))
4164
4165 (define-public pplacer
4166 (let ((commit "g807f6f3"))
4167 (package
4168 (name "pplacer")
4169 ;; The commit should be updated with each version change.
4170 (version "1.1.alpha19")
4171 (source
4172 (origin
4173 (method url-fetch)
4174 (uri (string-append "https://github.com/matsen/pplacer/archive/v"
4175 version ".tar.gz"))
4176 (file-name (string-append name "-" version ".tar.gz"))
4177 (sha256
4178 (base32 "0z1lnd2s8sh6kpzg106wzbh2szw7h0hvq8syd5a6wv4rmyyz6x0f"))))
4179 (build-system ocaml-build-system)
4180 (arguments
4181 `(#:ocaml ,ocaml-4.01
4182 #:findlib ,ocaml4.01-findlib
4183 #:modules ((guix build ocaml-build-system)
4184 (guix build utils)
4185 (ice-9 ftw))
4186 #:phases
4187 (modify-phases %standard-phases
4188 (delete 'configure)
4189 (add-after 'unpack 'replace-bundled-cddlib
4190 (lambda* (#:key inputs #:allow-other-keys)
4191 (let* ((cddlib-src (assoc-ref inputs "cddlib-src"))
4192 (local-dir "cddlib_guix"))
4193 (mkdir local-dir)
4194 (with-directory-excursion local-dir
4195 (system* "tar" "xvf" cddlib-src))
4196 (let ((cddlib-src-folder
4197 (string-append local-dir "/"
4198 (list-ref (scandir local-dir) 2)
4199 "/lib-src")))
4200 (for-each
4201 (lambda (file)
4202 (copy-file file
4203 (string-append "cdd_src/" (basename file))))
4204 (find-files cddlib-src-folder ".*[ch]$")))
4205 #t)))
4206 (add-after 'unpack 'fix-makefile
4207 (lambda _
4208 ;; Remove system calls to 'git'.
4209 (substitute* "Makefile"
4210 (("^DESCRIPT:=pplacer-.*")
4211 (string-append
4212 "DESCRIPT:=pplacer-$(shell uname)-v" ,version "\n")))
4213 (substitute* "myocamlbuild.ml"
4214 (("git describe --tags --long .*\\\" with")
4215 (string-append
4216 "echo -n v" ,version "-" ,commit "\" with")))
4217 #t))
4218 (replace 'install
4219 (lambda* (#:key outputs #:allow-other-keys)
4220 (let* ((out (assoc-ref outputs "out"))
4221 (bin (string-append out "/bin")))
4222 (copy-recursively "bin" bin))
4223 #t)))))
4224 (native-inputs
4225 `(("zlib" ,zlib)
4226 ("gsl" ,gsl)
4227 ("ocaml-ounit" ,ocaml4.01-ounit)
4228 ("ocaml-batteries" ,ocaml4.01-batteries)
4229 ("ocaml-camlzip" ,ocaml4.01-camlzip)
4230 ("ocaml-csv" ,ocaml4.01-csv)
4231 ("ocaml-sqlite3" ,ocaml4.01-sqlite3)
4232 ("ocaml-xmlm" ,ocaml4.01-xmlm)
4233 ("ocaml-mcl" ,ocaml4.01-mcl)
4234 ("ocaml-gsl" ,ocaml4.01-gsl)
4235 ("cddlib-src" ,(package-source cddlib))))
4236 (propagated-inputs
4237 `(("pplacer-scripts" ,pplacer-scripts)))
4238 (synopsis "Phylogenetic placement of biological sequences")
4239 (description
4240 "Pplacer places query sequences on a fixed reference phylogenetic tree
4241 to maximize phylogenetic likelihood or posterior probability according to a
4242 reference alignment. Pplacer is designed to be fast, to give useful
4243 information about uncertainty, and to offer advanced visualization and
4244 downstream analysis.")
4245 (home-page "http://matsen.fhcrc.org/pplacer")
4246 (license license:gpl3))))
4247
4248 ;; This package is installed alongside 'pplacer'. It is a separate package so
4249 ;; that it can use the python-build-system for the scripts that are
4250 ;; distributed alongside the main OCaml binaries.
4251 (define pplacer-scripts
4252 (package
4253 (inherit pplacer)
4254 (name "pplacer-scripts")
4255 (build-system python-build-system)
4256 (arguments
4257 `(#:python ,python-2
4258 #:phases
4259 (modify-phases %standard-phases
4260 (add-after 'unpack 'enter-scripts-dir
4261 (lambda _ (chdir "scripts")))
4262 (replace 'check
4263 (lambda _
4264 (zero? (system* "python" "-m" "unittest" "discover" "-v"))))
4265 (add-after 'install 'wrap-executables
4266 (lambda* (#:key inputs outputs #:allow-other-keys)
4267 (let* ((out (assoc-ref outputs "out"))
4268 (bin (string-append out "/bin")))
4269 (let ((path (string-append
4270 (assoc-ref inputs "hmmer") "/bin:"
4271 (assoc-ref inputs "infernal") "/bin")))
4272 (display path)
4273 (wrap-program (string-append bin "/refpkg_align.py")
4274 `("PATH" ":" prefix (,path))))
4275 (let ((path (string-append
4276 (assoc-ref inputs "hmmer") "/bin")))
4277 (wrap-program (string-append bin "/hrefpkg_query.py")
4278 `("PATH" ":" prefix (,path)))))
4279 #t)))))
4280 (inputs
4281 `(("infernal" ,infernal)
4282 ("hmmer" ,hmmer)))
4283 (propagated-inputs
4284 `(("python-biopython" ,python2-biopython)
4285 ("taxtastic" ,taxtastic)))
4286 (synopsis "Pplacer Python scripts")))
4287
4288 (define-public python2-pbcore
4289 (package
4290 (name "python2-pbcore")
4291 (version "1.2.10")
4292 (source (origin
4293 (method url-fetch)
4294 (uri (pypi-uri "pbcore" version))
4295 (sha256
4296 (base32
4297 "1kjmv891d6qbpp4shhhvkl02ff4q5xlpnls2513sm2cjcrs52f1i"))))
4298 (build-system python-build-system)
4299 (arguments `(#:python ,python-2)) ; pbcore requires Python 2.7
4300 (propagated-inputs
4301 `(("python-cython" ,python2-cython)
4302 ("python-numpy" ,python2-numpy)
4303 ("python-pysam" ,python2-pysam)
4304 ("python-h5py" ,python2-h5py)))
4305 (native-inputs
4306 `(("python-nose" ,python2-nose)
4307 ("python-sphinx" ,python2-sphinx)
4308 ("python-pyxb" ,python2-pyxb)))
4309 (home-page "http://pacificbiosciences.github.io/pbcore/")
4310 (synopsis "Library for reading and writing PacBio data files")
4311 (description
4312 "The pbcore package provides Python APIs for interacting with PacBio data
4313 files and writing bioinformatics applications.")
4314 (license license:bsd-3)))
4315
4316 (define-public python2-warpedlmm
4317 (package
4318 (name "python2-warpedlmm")
4319 (version "0.21")
4320 (source
4321 (origin
4322 (method url-fetch)
4323 (uri (string-append
4324 "https://pypi.python.org/packages/source/W/WarpedLMM/WarpedLMM-"
4325 version ".zip"))
4326 (sha256
4327 (base32
4328 "1agfz6zqa8nc6cw47yh0s3y14gkpa9wqazwcj7mwwj3ffnw39p3j"))))
4329 (build-system python-build-system)
4330 (arguments
4331 `(#:python ,python-2)) ; requires Python 2.7
4332 (propagated-inputs
4333 `(("python-scipy" ,python2-scipy)
4334 ("python-numpy" ,python2-numpy)
4335 ("python-matplotlib" ,python2-matplotlib)
4336 ("python-fastlmm" ,python2-fastlmm)
4337 ("python-pandas" ,python2-pandas)
4338 ("python-pysnptools" ,python2-pysnptools)))
4339 (native-inputs
4340 `(("python-mock" ,python2-mock)
4341 ("python-nose" ,python2-nose)
4342 ("unzip" ,unzip)))
4343 (home-page "https://github.com/PMBio/warpedLMM")
4344 (synopsis "Implementation of warped linear mixed models")
4345 (description
4346 "WarpedLMM is a Python implementation of the warped linear mixed model,
4347 which automatically learns an optimal warping function (or transformation) for
4348 the phenotype as it models the data.")
4349 (license license:asl2.0)))
4350
4351 (define-public pbtranscript-tofu
4352 (let ((commit "8f5467fe6a4472bcfb4226c8720993c8507adfe4"))
4353 (package
4354 (name "pbtranscript-tofu")
4355 (version (string-append "2.2.3." (string-take commit 7)))
4356 (source (origin
4357 (method git-fetch)
4358 (uri (git-reference
4359 (url "https://github.com/PacificBiosciences/cDNA_primer.git")
4360 (commit commit)))
4361 (file-name (string-append name "-" version "-checkout"))
4362 (sha256
4363 (base32
4364 "1lgnpi35ihay42qx0b6yl3kkgra723i413j33kvs0kvs61h82w0f"))
4365 (modules '((guix build utils)))
4366 (snippet
4367 '(begin
4368 ;; remove bundled Cython sources
4369 (delete-file "pbtranscript-tofu/pbtranscript/Cython-0.20.1.tar.gz")
4370 #t))))
4371 (build-system python-build-system)
4372 (arguments
4373 `(#:python ,python-2
4374 ;; FIXME: Tests fail with "No such file or directory:
4375 ;; pbtools/pbtranscript/modified_bx_intervals/intersection_unique.so"
4376 #:tests? #f
4377 #:phases
4378 (modify-phases %standard-phases
4379 (add-after 'unpack 'enter-directory
4380 (lambda _
4381 (chdir "pbtranscript-tofu/pbtranscript/")
4382 #t))
4383 ;; With setuptools version 18.0 and later this setup.py hack causes
4384 ;; a build error, so we disable it.
4385 (add-after 'enter-directory 'patch-setuppy
4386 (lambda _
4387 (substitute* "setup.py"
4388 (("if 'setuptools.extension' in sys.modules:")
4389 "if False:"))
4390 #t)))))
4391 (inputs
4392 `(("python-numpy" ,python2-numpy)
4393 ("python-bx-python" ,python2-bx-python)
4394 ("python-networkx" ,python2-networkx)
4395 ("python-scipy" ,python2-scipy)
4396 ("python-pbcore" ,python2-pbcore)
4397 ("python-h5py" ,python2-h5py)))
4398 (native-inputs
4399 `(("python-cython" ,python2-cython)
4400 ("python-nose" ,python2-nose)))
4401 (home-page "https://github.com/PacificBiosciences/cDNA_primer")
4402 (synopsis "Analyze transcriptome data generated with the Iso-Seq protocol")
4403 (description
4404 "pbtranscript-tofu contains scripts to analyze transcriptome data
4405 generated using the PacBio Iso-Seq protocol.")
4406 (license license:bsd-3))))
4407
4408 (define-public prank
4409 (package
4410 (name "prank")
4411 (version "150803")
4412 (source (origin
4413 (method url-fetch)
4414 (uri (string-append
4415 "http://wasabiapp.org/download/prank/prank.source."
4416 version ".tgz"))
4417 (sha256
4418 (base32
4419 "0am4z94fs3w2n5xpfls9zda61vq7qqz4q2i7b9hlsxz5q4j3kfm4"))))
4420 (build-system gnu-build-system)
4421 (arguments
4422 `(#:phases
4423 (modify-phases %standard-phases
4424 (add-after 'unpack 'enter-src-dir
4425 (lambda _
4426 (chdir "src")
4427 #t))
4428 (add-after 'unpack 'remove-m64-flag
4429 ;; Prank will build with the correct 'bit-ness' without this flag
4430 ;; and this allows building on 32-bit machines.
4431 (lambda _ (substitute* "src/Makefile"
4432 (("-m64") ""))
4433 #t))
4434 (delete 'configure)
4435 (replace 'install
4436 (lambda* (#:key outputs #:allow-other-keys)
4437 (let* ((out (assoc-ref outputs "out"))
4438 (bin (string-append out "/bin"))
4439 (man (string-append out "/share/man/man1"))
4440 (path (string-append
4441 (assoc-ref %build-inputs "mafft") "/bin:"
4442 (assoc-ref %build-inputs "exonerate") "/bin:"
4443 (assoc-ref %build-inputs "bppsuite") "/bin")))
4444 (install-file "prank" bin)
4445 (wrap-program (string-append bin "/prank")
4446 `("PATH" ":" prefix (,path)))
4447 (install-file "prank.1" man))
4448 #t)))))
4449 (inputs
4450 `(("mafft" ,mafft)
4451 ("exonerate" ,exonerate)
4452 ("bppsuite" ,bppsuite)))
4453 (home-page "http://wasabiapp.org/software/prank/")
4454 (synopsis "Probabilistic multiple sequence alignment program")
4455 (description
4456 "PRANK is a probabilistic multiple sequence alignment program for DNA,
4457 codon and amino-acid sequences. It is based on a novel algorithm that treats
4458 insertions correctly and avoids over-estimation of the number of deletion
4459 events. In addition, PRANK borrows ideas from maximum likelihood methods used
4460 in phylogenetics and correctly takes into account the evolutionary distances
4461 between sequences. Lastly, PRANK allows for defining a potential structure
4462 for sequences to be aligned and then, simultaneously with the alignment,
4463 predicts the locations of structural units in the sequences.")
4464 (license license:gpl2+)))
4465
4466 (define-public proteinortho
4467 (package
4468 (name "proteinortho")
4469 (version "5.16b")
4470 (source
4471 (origin
4472 (method url-fetch)
4473 (uri
4474 (string-append
4475 "http://www.bioinf.uni-leipzig.de/Software/proteinortho/proteinortho_v"
4476 version "_src.tar.gz"))
4477 (sha256
4478 (base32
4479 "1wl0dawpssqwfjvr651r4wlww8hhjin8nba6xh71ks7sbypx886j"))))
4480 (build-system gnu-build-system)
4481 (arguments
4482 `(#:test-target "test"
4483 #:phases
4484 (modify-phases %standard-phases
4485 (replace 'configure
4486 ;; There is no configure script, so we modify the Makefile directly.
4487 (lambda* (#:key outputs #:allow-other-keys)
4488 (substitute* "Makefile"
4489 (("INSTALLDIR=.*")
4490 (string-append
4491 "INSTALLDIR=" (assoc-ref outputs "out") "/bin\n")))
4492 #t))
4493 (add-before 'install 'make-install-directory
4494 ;; The install directory is not created during 'make install'.
4495 (lambda* (#:key outputs #:allow-other-keys)
4496 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
4497 #t))
4498 (add-after 'install 'wrap-programs
4499 (lambda* (#:key inputs outputs #:allow-other-keys)
4500 (let* ((path (getenv "PATH"))
4501 (out (assoc-ref outputs "out"))
4502 (binary (string-append out "/bin/proteinortho5.pl")))
4503 (wrap-program binary `("PATH" ":" prefix (,path))))
4504 #t)))))
4505 (inputs
4506 `(("perl" ,perl)
4507 ("python" ,python-2)
4508 ("blast+" ,blast+)))
4509 (home-page "http://www.bioinf.uni-leipzig.de/Software/proteinortho")
4510 (synopsis "Detect orthologous genes across species")
4511 (description
4512 "Proteinortho is a tool to detect orthologous genes across different
4513 species. For doing so, it compares similarities of given gene sequences and
4514 clusters them to find significant groups. The algorithm was designed to handle
4515 large-scale data and can be applied to hundreds of species at once.")
4516 (license license:gpl2+)))
4517
4518 (define-public pyicoteo
4519 (package
4520 (name "pyicoteo")
4521 (version "2.0.7")
4522 (source
4523 (origin
4524 (method url-fetch)
4525 (uri (string-append "https://bitbucket.org/regulatorygenomicsupf/"
4526 "pyicoteo/get/v" version ".tar.bz2"))
4527 (file-name (string-append name "-" version ".tar.bz2"))
4528 (sha256
4529 (base32
4530 "0d6087f29xp8wxwlj111c3sylli98n0l8ry58c51ixzq0zfm50wa"))))
4531 (build-system python-build-system)
4532 (arguments
4533 `(#:python ,python-2 ; does not work with Python 3
4534 #:tests? #f)) ; there are no tests
4535 (inputs
4536 `(("python2-matplotlib" ,python2-matplotlib)))
4537 (home-page "https://bitbucket.org/regulatorygenomicsupf/pyicoteo")
4538 (synopsis "Analyze high-throughput genetic sequencing data")
4539 (description
4540 "Pyicoteo is a suite of tools for the analysis of high-throughput genetic
4541 sequencing data. It works with genomic coordinates. There are currently six
4542 different command-line tools:
4543
4544 @enumerate
4545 @item pyicoregion: for generating exploratory regions automatically;
4546 @item pyicoenrich: for differential enrichment between two conditions;
4547 @item pyicoclip: for calling CLIP-Seq peaks without a control;
4548 @item pyicos: for genomic coordinates manipulation;
4549 @item pyicoller: for peak calling on punctuated ChIP-Seq;
4550 @item pyicount: to count how many reads from N experiment files overlap in a
4551 region file;
4552 @item pyicotrocol: to combine operations from pyicoteo.
4553 @end enumerate\n")
4554 (license license:gpl3+)))
4555
4556 (define-public prodigal
4557 (package
4558 (name "prodigal")
4559 (version "2.6.3")
4560 (source (origin
4561 (method url-fetch)
4562 (uri (string-append
4563 "https://github.com/hyattpd/Prodigal/archive/v"
4564 version ".tar.gz"))
4565 (file-name (string-append name "-" version ".tar.gz"))
4566 (sha256
4567 (base32
4568 "17srxkqd3jc77xk15pfbgg1a9xahqg7337w95mrsia7mpza4l2c9"))))
4569 (build-system gnu-build-system)
4570 (arguments
4571 `(#:tests? #f ;no check target
4572 #:make-flags (list (string-append "INSTALLDIR="
4573 (assoc-ref %outputs "out")
4574 "/bin"))
4575 #:phases
4576 (modify-phases %standard-phases
4577 (delete 'configure))))
4578 (home-page "http://prodigal.ornl.gov")
4579 (synopsis "Protein-coding gene prediction for Archaea and Bacteria")
4580 (description
4581 "Prodigal runs smoothly on finished genomes, draft genomes, and
4582 metagenomes, providing gene predictions in GFF3, Genbank, or Sequin table
4583 format. It runs quickly, in an unsupervised fashion, handles gaps, handles
4584 partial genes, and identifies translation initiation sites.")
4585 (license license:gpl3+)))
4586
4587 (define-public roary
4588 (package
4589 (name "roary")
4590 (version "3.12.0")
4591 (source
4592 (origin
4593 (method url-fetch)
4594 (uri (string-append
4595 "mirror://cpan/authors/id/A/AJ/AJPAGE/Bio-Roary-"
4596 version ".tar.gz"))
4597 (sha256
4598 (base32
4599 "0qxrds9wx7cfhlkihrp6697kx0flhhxymap9fwan0b3rbdhcnmff"))))
4600 (build-system perl-build-system)
4601 (arguments
4602 `(#:phases
4603 (modify-phases %standard-phases
4604 (delete 'configure)
4605 (delete 'build)
4606 (replace 'check
4607 (lambda _
4608 ;; The tests are not run by default, so we run each test file
4609 ;; directly.
4610 (setenv "PATH" (string-append (getcwd) "/bin" ":"
4611 (getenv "PATH")))
4612 (setenv "PERL5LIB" (string-append (getcwd) "/lib" ":"
4613 (getenv "PERL5LIB")))
4614 (zero? (length (filter (lambda (file)
4615 (display file)(display "\n")
4616 (not (zero? (system* "perl" file))))
4617 (find-files "t" ".*\\.t$"))))))
4618 (replace 'install
4619 ;; There is no 'install' target in the Makefile.
4620 (lambda* (#:key outputs #:allow-other-keys)
4621 (let* ((out (assoc-ref outputs "out"))
4622 (bin (string-append out "/bin"))
4623 (perl (string-append out "/lib/perl5/site_perl"))
4624 (roary-plots "contrib/roary_plots"))
4625 (mkdir-p bin)
4626 (mkdir-p perl)
4627 (copy-recursively "bin" bin)
4628 (copy-recursively "lib" perl)
4629 #t)))
4630 (add-after 'install 'wrap-programs
4631 (lambda* (#:key inputs outputs #:allow-other-keys)
4632 (let* ((out (assoc-ref outputs "out"))
4633 (perl5lib (getenv "PERL5LIB"))
4634 (path (getenv "PATH")))
4635 (for-each (lambda (prog)
4636 (let ((binary (string-append out "/" prog)))
4637 (wrap-program binary
4638 `("PERL5LIB" ":" prefix
4639 (,(string-append perl5lib ":" out
4640 "/lib/perl5/site_perl"))))
4641 (wrap-program binary
4642 `("PATH" ":" prefix
4643 (,(string-append path ":" out "/bin"))))))
4644 (find-files "bin" ".*[^R]$"))
4645 (let ((file
4646 (string-append out "/bin/roary-create_pan_genome_plots.R"))
4647 (r-site-lib (getenv "R_LIBS_SITE"))
4648 (coreutils-path
4649 (string-append (assoc-ref inputs "coreutils") "/bin")))
4650 (wrap-program file
4651 `("R_LIBS_SITE" ":" prefix
4652 (,(string-append r-site-lib ":" out "/site-library/"))))
4653 (wrap-program file
4654 `("PATH" ":" prefix
4655 (,(string-append coreutils-path ":" out "/bin"))))))
4656 #t)))))
4657 (native-inputs
4658 `(("perl-env-path" ,perl-env-path)
4659 ("perl-test-files" ,perl-test-files)
4660 ("perl-test-most" ,perl-test-most)
4661 ("perl-test-output" ,perl-test-output)))
4662 (inputs
4663 `(("perl-array-utils" ,perl-array-utils)
4664 ("bioperl" ,bioperl-minimal)
4665 ("perl-digest-md5-file" ,perl-digest-md5-file)
4666 ("perl-exception-class" ,perl-exception-class)
4667 ("perl-file-find-rule" ,perl-file-find-rule)
4668 ("perl-file-grep" ,perl-file-grep)
4669 ("perl-file-slurper" ,perl-file-slurper)
4670 ("perl-file-which" ,perl-file-which)
4671 ("perl-graph" ,perl-graph)
4672 ("perl-graph-readwrite" ,perl-graph-readwrite)
4673 ("perl-log-log4perl" ,perl-log-log4perl)
4674 ("perl-moose" ,perl-moose)
4675 ("perl-perlio-utf8_strict" ,perl-perlio-utf8_strict)
4676 ("perl-text-csv" ,perl-text-csv)
4677 ("bedtools" ,bedtools)
4678 ("cd-hit" ,cd-hit)
4679 ("blast+" ,blast+)
4680 ("mcl" ,mcl)
4681 ("parallel" ,parallel)
4682 ("prank" ,prank)
4683 ("mafft" ,mafft)
4684 ("fasttree" ,fasttree)
4685 ("grep" ,grep)
4686 ("sed" ,sed)
4687 ("gawk" ,gawk)
4688 ("r-minimal" ,r-minimal)
4689 ("r-ggplot2" ,r-ggplot2)
4690 ("coreutils" ,coreutils)))
4691 (home-page "http://sanger-pathogens.github.io/Roary")
4692 (synopsis "High speed stand-alone pan genome pipeline")
4693 (description
4694 "Roary is a high speed stand alone pan genome pipeline, which takes
4695 annotated assemblies in GFF3 format (produced by the Prokka program) and
4696 calculates the pan genome. Using a standard desktop PC, it can analyse
4697 datasets with thousands of samples, without compromising the quality of the
4698 results. 128 samples can be analysed in under 1 hour using 1 GB of RAM and a
4699 single processor. Roary is not intended for metagenomics or for comparing
4700 extremely diverse sets of genomes.")
4701 (license license:gpl3)))
4702
4703 (define-public raxml
4704 (package
4705 (name "raxml")
4706 (version "8.2.10")
4707 (source
4708 (origin
4709 (method url-fetch)
4710 (uri
4711 (string-append
4712 "https://github.com/stamatak/standard-RAxML/archive/v"
4713 version ".tar.gz"))
4714 (file-name (string-append name "-" version ".tar.gz"))
4715 (sha256
4716 (base32
4717 "13s7aspfdcfr6asynwdg1x6vznys6pzap5f8wsffbnnwpkkg9ya8"))))
4718 (build-system gnu-build-system)
4719 (arguments
4720 `(#:tests? #f ; There are no tests.
4721 ;; Use 'standard' Makefile rather than SSE or AVX ones.
4722 #:make-flags (list "-f" "Makefile.HYBRID.gcc")
4723 #:phases
4724 (modify-phases %standard-phases
4725 (delete 'configure)
4726 (replace 'install
4727 (lambda* (#:key outputs #:allow-other-keys)
4728 (let* ((out (assoc-ref outputs "out"))
4729 (bin (string-append out "/bin"))
4730 (executable "raxmlHPC-HYBRID"))
4731 (install-file executable bin)
4732 (symlink (string-append bin "/" executable) "raxml"))
4733 #t)))))
4734 (inputs
4735 `(("openmpi" ,openmpi)))
4736 (home-page "http://sco.h-its.org/exelixis/web/software/raxml/index.html")
4737 (synopsis "Randomized Axelerated Maximum Likelihood phylogenetic trees")
4738 (description
4739 "RAxML is a tool for phylogenetic analysis and post-analysis of large
4740 phylogenies.")
4741 ;; The source includes x86 specific code
4742 (supported-systems '("x86_64-linux" "i686-linux"))
4743 (license license:gpl2+)))
4744
4745 (define-public rsem
4746 (package
4747 (name "rsem")
4748 (version "1.2.20")
4749 (source
4750 (origin
4751 (method url-fetch)
4752 (uri
4753 (string-append "http://deweylab.biostat.wisc.edu/rsem/src/rsem-"
4754 version ".tar.gz"))
4755 (sha256
4756 (base32 "0nzdc0j0hjllhsd5f2xli95dafm3nawskigs140xzvjk67xh0r9q"))
4757 (patches (search-patches "rsem-makefile.patch"))
4758 (modules '((guix build utils)))
4759 (snippet
4760 '(begin
4761 ;; remove bundled copy of boost
4762 (delete-file-recursively "boost")
4763 #t))))
4764 (build-system gnu-build-system)
4765 (arguments
4766 `(#:tests? #f ;no "check" target
4767 #:phases
4768 (modify-phases %standard-phases
4769 ;; No "configure" script.
4770 ;; Do not build bundled samtools library.
4771 (replace 'configure
4772 (lambda _
4773 (substitute* "Makefile"
4774 (("^all : sam/libbam.a") "all : "))
4775 #t))
4776 (replace 'install
4777 (lambda* (#:key outputs #:allow-other-keys)
4778 (let* ((out (string-append (assoc-ref outputs "out")))
4779 (bin (string-append out "/bin/"))
4780 (perl (string-append out "/lib/perl5/site_perl")))
4781 (mkdir-p bin)
4782 (mkdir-p perl)
4783 (for-each (lambda (file)
4784 (install-file file bin))
4785 (find-files "." "rsem-.*"))
4786 (install-file "rsem_perl_utils.pm" perl))
4787 #t))
4788 (add-after
4789 'install 'wrap-program
4790 (lambda* (#:key outputs #:allow-other-keys)
4791 (let ((out (assoc-ref outputs "out")))
4792 (for-each (lambda (prog)
4793 (wrap-program (string-append out "/bin/" prog)
4794 `("PERL5LIB" ":" prefix
4795 (,(string-append out "/lib/perl5/site_perl")))))
4796 '("rsem-plot-transcript-wiggles"
4797 "rsem-calculate-expression"
4798 "rsem-generate-ngvector"
4799 "rsem-run-ebseq"
4800 "rsem-prepare-reference")))
4801 #t)))))
4802 (inputs
4803 `(("boost" ,boost)
4804 ("ncurses" ,ncurses)
4805 ("r-minimal" ,r-minimal)
4806 ("perl" ,perl)
4807 ("samtools" ,samtools-0.1)
4808 ("zlib" ,zlib)))
4809 (home-page "http://deweylab.biostat.wisc.edu/rsem/")
4810 (synopsis "Estimate gene expression levels from RNA-Seq data")
4811 (description
4812 "RSEM is a software package for estimating gene and isoform expression
4813 levels from RNA-Seq data. The RSEM package provides a user-friendly
4814 interface, supports threads for parallel computation of the EM algorithm,
4815 single-end and paired-end read data, quality scores, variable-length reads and
4816 RSPD estimation. In addition, it provides posterior mean and 95% credibility
4817 interval estimates for expression levels. For visualization, it can generate
4818 BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.")
4819 (license license:gpl3+)))
4820
4821 (define-public rseqc
4822 (package
4823 (name "rseqc")
4824 (version "2.6.1")
4825 (source
4826 (origin
4827 (method url-fetch)
4828 (uri
4829 (string-append "mirror://sourceforge/rseqc/"
4830 "RSeQC-" version ".tar.gz"))
4831 (sha256
4832 (base32 "15ly0254yi032qzkdplg00q144qfdsd986gh62829rl5bkxhj330"))
4833 (modules '((guix build utils)))
4834 (snippet
4835 '(begin
4836 ;; remove bundled copy of pysam
4837 (delete-file-recursively "lib/pysam")
4838 (substitute* "setup.py"
4839 ;; remove dependency on outdated "distribute" module
4840 (("^from distribute_setup import use_setuptools") "")
4841 (("^use_setuptools\\(\\)") "")
4842 ;; do not use bundled copy of pysam
4843 (("^have_pysam = False") "have_pysam = True"))
4844 #t))))
4845 (build-system python-build-system)
4846 (arguments `(#:python ,python-2))
4847 (inputs
4848 `(("python-cython" ,python2-cython)
4849 ("python-pysam" ,python2-pysam)
4850 ("python-numpy" ,python2-numpy)
4851 ("zlib" ,zlib)))
4852 (native-inputs
4853 `(("python-nose" ,python2-nose)))
4854 (home-page "http://rseqc.sourceforge.net/")
4855 (synopsis "RNA-seq quality control package")
4856 (description
4857 "RSeQC provides a number of modules that can comprehensively evaluate
4858 high throughput sequence data, especially RNA-seq data. Some basic modules
4859 inspect sequence quality, nucleotide composition bias, PCR bias and GC bias,
4860 while RNA-seq specific modules evaluate sequencing saturation, mapped reads
4861 distribution, coverage uniformity, strand specificity, etc.")
4862 (license license:gpl3+)))
4863
4864 (define-public seek
4865 ;; There are no release tarballs. According to the installation
4866 ;; instructions at http://seek.princeton.edu/installation.jsp, the latest
4867 ;; stable release is identified by this changeset ID.
4868 (let ((changeset "2329130")
4869 (revision "1"))
4870 (package
4871 (name "seek")
4872 (version (string-append "0-" revision "." changeset))
4873 (source (origin
4874 (method hg-fetch)
4875 (uri (hg-reference
4876 (url "https://bitbucket.org/libsleipnir/sleipnir")
4877 (changeset changeset)))
4878 (file-name (string-append name "-" version "-checkout"))
4879 (sha256
4880 (base32
4881 "0qrvilwh18dpbhkf92qvxbmay0j75ra3jg2wrhz67gf538zzphsx"))))
4882 (build-system gnu-build-system)
4883 (arguments
4884 `(#:modules ((srfi srfi-1)
4885 (guix build gnu-build-system)
4886 (guix build utils))
4887 #:phases
4888 (let ((dirs '("SeekMiner"
4889 "SeekEvaluator"
4890 "SeekPrep"
4891 "Distancer"
4892 "Data2DB"
4893 "PCL2Bin")))
4894 (modify-phases %standard-phases
4895 (replace 'bootstrap
4896 (lambda _
4897 (invoke "bash" "gen_auto")))
4898 (add-after 'build 'build-additional-tools
4899 (lambda* (#:key make-flags #:allow-other-keys)
4900 (every (lambda (dir)
4901 (with-directory-excursion (string-append "tools/" dir)
4902 (zero? (apply system* "make" make-flags))))
4903 dirs)))
4904 (add-after 'install 'install-additional-tools
4905 (lambda* (#:key make-flags #:allow-other-keys)
4906 (fold (lambda (dir result)
4907 (with-directory-excursion (string-append "tools/" dir)
4908 (and result
4909 (zero? (apply system*
4910 `("make" ,@make-flags "install"))))))
4911 #t dirs)))))))
4912 (inputs
4913 `(("gsl" ,gsl)
4914 ("boost" ,boost)
4915 ("libsvm" ,libsvm)
4916 ("readline" ,readline)
4917 ("gengetopt" ,gengetopt)
4918 ("log4cpp" ,log4cpp)))
4919 (native-inputs
4920 `(("autoconf" ,autoconf)
4921 ("automake" ,automake)
4922 ("perl" ,perl)))
4923 (home-page "http://seek.princeton.edu")
4924 (synopsis "Gene co-expression search engine")
4925 (description
4926 "SEEK is a computational gene co-expression search engine. SEEK provides
4927 biologists with a way to navigate the massive human expression compendium that
4928 now contains thousands of expression datasets. SEEK returns a robust ranking
4929 of co-expressed genes in the biological area of interest defined by the user's
4930 query genes. It also prioritizes thousands of expression datasets according
4931 to the user's query of interest.")
4932 (license license:cc-by3.0))))
4933
4934 (define-public samtools
4935 (package
4936 (name "samtools")
4937 (version "1.8")
4938 (source
4939 (origin
4940 (method url-fetch)
4941 (uri
4942 (string-append "mirror://sourceforge/samtools/samtools/"
4943 version "/samtools-" version ".tar.bz2"))
4944 (sha256
4945 (base32
4946 "05myg7bs90i68qbqab9cdg9rqj2xh39azibrx82ipzc5kcfvqhn9"))))
4947 (build-system gnu-build-system)
4948 (arguments
4949 `(#:modules ((ice-9 ftw)
4950 (ice-9 regex)
4951 (guix build gnu-build-system)
4952 (guix build utils))
4953 #:make-flags (list (string-append "prefix=" (assoc-ref %outputs "out")))
4954 #:configure-flags (list "--with-ncurses" "--with-htslib=system")
4955 #:phases
4956 (modify-phases %standard-phases
4957 (add-after 'unpack 'patch-tests
4958 (lambda _
4959 (substitute* "test/test.pl"
4960 ;; The test script calls out to /bin/bash
4961 (("/bin/bash") (which "bash")))
4962 #t))
4963 (add-after 'install 'install-library
4964 (lambda* (#:key outputs #:allow-other-keys)
4965 (let ((lib (string-append (assoc-ref outputs "out") "/lib")))
4966 (install-file "libbam.a" lib)
4967 #t)))
4968 (add-after 'install 'install-headers
4969 (lambda* (#:key outputs #:allow-other-keys)
4970 (let ((include (string-append (assoc-ref outputs "out")
4971 "/include/samtools/")))
4972 (for-each (lambda (file)
4973 (install-file file include))
4974 (scandir "." (lambda (name) (string-match "\\.h$" name))))
4975 #t))))))
4976 (native-inputs `(("pkg-config" ,pkg-config)))
4977 (inputs
4978 `(("htslib" ,htslib)
4979 ("ncurses" ,ncurses)
4980 ("perl" ,perl)
4981 ("python" ,python)
4982 ("zlib" ,zlib)))
4983 (home-page "http://samtools.sourceforge.net")
4984 (synopsis "Utilities to efficiently manipulate nucleotide sequence alignments")
4985 (description
4986 "Samtools implements various utilities for post-processing nucleotide
4987 sequence alignments in the SAM, BAM, and CRAM formats, including indexing,
4988 variant calling (in conjunction with bcftools), and a simple alignment
4989 viewer.")
4990 (license license:expat)))
4991
4992 (define-public samtools-0.1
4993 ;; This is the most recent version of the 0.1 line of samtools. The input
4994 ;; and output formats differ greatly from that used and produced by samtools
4995 ;; 1.x and is still used in many bioinformatics pipelines.
4996 (package (inherit samtools)
4997 (version "0.1.19")
4998 (source
4999 (origin
5000 (method url-fetch)
5001 (uri
5002 (string-append "mirror://sourceforge/samtools/samtools/"
5003 version "/samtools-" version ".tar.bz2"))
5004 (sha256
5005 (base32 "1m33xsfwz0s8qi45lylagfllqg7fphf4dr0780rsvw75av9wk06h"))))
5006 (arguments
5007 `(#:tests? #f ;no "check" target
5008 ,@(substitute-keyword-arguments (package-arguments samtools)
5009 ((#:make-flags flags)
5010 `(cons "LIBCURSES=-lncurses" ,flags))
5011 ((#:phases phases)
5012 `(modify-phases ,phases
5013 (replace 'install
5014 (lambda* (#:key outputs #:allow-other-keys)
5015 (let ((bin (string-append
5016 (assoc-ref outputs "out") "/bin")))
5017 (mkdir-p bin)
5018 (install-file "samtools" bin)
5019 #t)))
5020 (delete 'patch-tests)
5021 (delete 'configure))))))))
5022
5023 (define-public mosaik
5024 (let ((commit "5c25216d3522d6a33e53875cd76a6d65001e4e67"))
5025 (package
5026 (name "mosaik")
5027 (version "2.2.30")
5028 (source (origin
5029 ;; There are no release tarballs nor tags.
5030 (method git-fetch)
5031 (uri (git-reference
5032 (url "https://github.com/wanpinglee/MOSAIK.git")
5033 (commit commit)))
5034 (file-name (string-append name "-" version))
5035 (sha256
5036 (base32
5037 "17gj3s07cm77r41z92awh0bim7w7q7fbn0sf5nkqmcm1vw052qgw"))))
5038 (build-system gnu-build-system)
5039 (arguments
5040 `(#:tests? #f ; no tests
5041 #:make-flags (list "CC=gcc")
5042 #:phases
5043 (modify-phases %standard-phases
5044 (replace 'configure
5045 (lambda _ (chdir "src") #t))
5046 (replace 'install
5047 (lambda* (#:key outputs #:allow-other-keys)
5048 (let ((bin (string-append (assoc-ref outputs "out")
5049 "/bin")))
5050 (mkdir-p bin)
5051 (copy-recursively "../bin" bin)
5052 #t))))))
5053 (inputs
5054 `(("perl" ,perl)
5055 ("zlib" ,zlib)))
5056 (supported-systems '("x86_64-linux"))
5057 (home-page "https://github.com/wanpinglee/MOSAIK")
5058 (synopsis "Map nucleotide sequence reads to reference genomes")
5059 (description
5060 "MOSAIK is a program for mapping second and third-generation sequencing
5061 reads to a reference genome. MOSAIK can align reads generated by all the
5062 major sequencing technologies, including Illumina, Applied Biosystems SOLiD,
5063 Roche 454, Ion Torrent and Pacific BioSciences SMRT.")
5064 ;; MOSAIK is released under the GPLv2+ with the exception of third-party
5065 ;; code released into the public domain:
5066 ;; 1. fastlz by Ariya Hidayat - http://www.fastlz.org/
5067 ;; 2. MD5 implementation - RSA Data Security, RFC 1321
5068 (license (list license:gpl2+ license:public-domain)))))
5069
5070 (define-public ngs-sdk
5071 (package
5072 (name "ngs-sdk")
5073 (version "1.3.0")
5074 (source
5075 (origin
5076 (method url-fetch)
5077 (uri
5078 (string-append "https://github.com/ncbi/ngs/archive/"
5079 version ".tar.gz"))
5080 (file-name (string-append name "-" version ".tar.gz"))
5081 (sha256
5082 (base32
5083 "1wiyf4c6nm2j87pv015cbi0qny5byf3pbvcw3likifz5dl56ag40"))))
5084 (build-system gnu-build-system)
5085 (arguments
5086 `(#:parallel-build? #f ; not supported
5087 #:tests? #f ; no "check" target
5088 #:phases
5089 (modify-phases %standard-phases
5090 (replace 'configure
5091 (lambda* (#:key outputs #:allow-other-keys)
5092 (let ((out (assoc-ref outputs "out")))
5093 ;; Allow 'konfigure.perl' to find 'package.prl'.
5094 (setenv "PERL5LIB"
5095 (string-append ".:" (getenv "PERL5LIB")))
5096
5097 ;; The 'configure' script doesn't recognize things like
5098 ;; '--enable-fast-install'.
5099 (zero? (system* "./configure"
5100 (string-append "--build-prefix=" (getcwd) "/build")
5101 (string-append "--prefix=" out))))))
5102 (add-after 'unpack 'enter-dir
5103 (lambda _ (chdir "ngs-sdk") #t)))))
5104 (native-inputs `(("perl" ,perl)))
5105 ;; According to the test
5106 ;; unless ($MARCH =~ /x86_64/i || $MARCH =~ /i?86/i)
5107 ;; in ngs-sdk/setup/konfigure.perl
5108 (supported-systems '("i686-linux" "x86_64-linux"))
5109 (home-page "https://github.com/ncbi/ngs")
5110 (synopsis "API for accessing Next Generation Sequencing data")
5111 (description
5112 "NGS is a domain-specific API for accessing reads, alignments and pileups
5113 produced from Next Generation Sequencing. The API itself is independent from
5114 any particular back-end implementation, and supports use of multiple back-ends
5115 simultaneously.")
5116 (license license:public-domain)))
5117
5118 (define-public java-ngs
5119 (package (inherit ngs-sdk)
5120 (name "java-ngs")
5121 (arguments
5122 `(,@(substitute-keyword-arguments
5123 `(#:modules ((guix build gnu-build-system)
5124 (guix build utils)
5125 (srfi srfi-1)
5126 (srfi srfi-26))
5127 ,@(package-arguments ngs-sdk))
5128 ((#:phases phases)
5129 `(modify-phases ,phases
5130 (replace 'enter-dir (lambda _ (chdir "ngs-java") #t)))))))
5131 (inputs
5132 `(("jdk" ,icedtea "jdk")
5133 ("ngs-sdk" ,ngs-sdk)))
5134 (synopsis "Java bindings for NGS SDK")))
5135
5136 (define-public ncbi-vdb
5137 (package
5138 (name "ncbi-vdb")
5139 (version "2.8.2")
5140 (source
5141 (origin
5142 (method url-fetch)
5143 (uri
5144 (string-append "https://github.com/ncbi/ncbi-vdb/archive/"
5145 version ".tar.gz"))
5146 (file-name (string-append name "-" version ".tar.gz"))
5147 (sha256
5148 (base32
5149 "1acn4bv81mfl137qnbn9995mjjhwd36pm0b7qli1iw5skrxa9j8m"))))
5150 (build-system gnu-build-system)
5151 (arguments
5152 `(#:parallel-build? #f ; not supported
5153 #:tests? #f ; no "check" target
5154 #:phases
5155 (modify-phases %standard-phases
5156 (add-before 'configure 'set-perl-search-path
5157 (lambda _
5158 ;; Work around "dotless @INC" build failure.
5159 (setenv "PERL5LIB"
5160 (string-append (getcwd) "/setup:"
5161 (getenv "PERL5LIB")))
5162 #t))
5163 (replace 'configure
5164 (lambda* (#:key inputs outputs #:allow-other-keys)
5165 (let ((out (assoc-ref outputs "out")))
5166 ;; Override include path for libmagic
5167 (substitute* "setup/package.prl"
5168 (("name => 'magic', Include => '/usr/include'")
5169 (string-append "name=> 'magic', Include => '"
5170 (assoc-ref inputs "libmagic")
5171 "/include" "'")))
5172
5173 ;; Install kdf5 library (needed by sra-tools)
5174 (substitute* "build/Makefile.install"
5175 (("LIBRARIES_TO_INSTALL =")
5176 "LIBRARIES_TO_INSTALL = kdf5.$(VERSION_LIBX) kdf5.$(VERSION_SHLX)"))
5177
5178 (substitute* "build/Makefile.env"
5179 (("CFLAGS =" prefix)
5180 (string-append prefix "-msse2 ")))
5181
5182 ;; Override search path for ngs-java
5183 (substitute* "setup/package.prl"
5184 (("/usr/local/ngs/ngs-java")
5185 (assoc-ref inputs "java-ngs")))
5186
5187 ;; The 'configure' script doesn't recognize things like
5188 ;; '--enable-fast-install'.
5189 (zero? (system*
5190 "./configure"
5191 (string-append "--build-prefix=" (getcwd) "/build")
5192 (string-append "--prefix=" (assoc-ref outputs "out"))
5193 (string-append "--debug")
5194 (string-append "--with-xml2-prefix="
5195 (assoc-ref inputs "libxml2"))
5196 (string-append "--with-ngs-sdk-prefix="
5197 (assoc-ref inputs "ngs-sdk"))
5198 (string-append "--with-hdf5-prefix="
5199 (assoc-ref inputs "hdf5")))))))
5200 (add-after 'install 'install-interfaces
5201 (lambda* (#:key outputs #:allow-other-keys)
5202 ;; Install interface libraries. On i686 the interface libraries
5203 ;; are installed to "linux/gcc/i386", so we need to use the Linux
5204 ;; architecture name ("i386") instead of the target system prefix
5205 ;; ("i686").
5206 (mkdir (string-append (assoc-ref outputs "out") "/ilib"))
5207 (copy-recursively (string-append "build/ncbi-vdb/linux/gcc/"
5208 ,(system->linux-architecture
5209 (or (%current-target-system)
5210 (%current-system)))
5211 "/rel/ilib")
5212 (string-append (assoc-ref outputs "out")
5213 "/ilib"))
5214 ;; Install interface headers
5215 (copy-recursively "interfaces"
5216 (string-append (assoc-ref outputs "out")
5217 "/include"))
5218 #t))
5219 ;; These files are needed by sra-tools.
5220 (add-after 'install 'install-configuration-files
5221 (lambda* (#:key outputs #:allow-other-keys)
5222 (let ((target (string-append (assoc-ref outputs "out") "/kfg")))
5223 (mkdir target)
5224 (install-file "libs/kfg/default.kfg" target)
5225 (install-file "libs/kfg/certs.kfg" target))
5226 #t)))))
5227 (inputs
5228 `(("libxml2" ,libxml2)
5229 ("ngs-sdk" ,ngs-sdk)
5230 ("java-ngs" ,java-ngs)
5231 ("libmagic" ,file)
5232 ("hdf5" ,hdf5)))
5233 (native-inputs `(("perl" ,perl)))
5234 ;; NCBI-VDB requires SSE capability.
5235 (supported-systems '("i686-linux" "x86_64-linux"))
5236 (home-page "https://github.com/ncbi/ncbi-vdb")
5237 (synopsis "Database engine for genetic information")
5238 (description
5239 "The NCBI-VDB library implements a highly compressed columnar data
5240 warehousing engine that is most often used to store genetic information.
5241 Databases are stored in a portable image within the file system, and can be
5242 accessed/downloaded on demand across HTTP.")
5243 (license license:public-domain)))
5244
5245 (define-public plink
5246 (package
5247 (name "plink")
5248 (version "1.07")
5249 (source
5250 (origin
5251 (method url-fetch)
5252 (uri (string-append
5253 "http://pngu.mgh.harvard.edu/~purcell/plink/dist/plink-"
5254 version "-src.zip"))
5255 (sha256
5256 (base32 "0as8gxm4pjyc8dxmm1sl873rrd7wn5qs0l29nqfnl31x8i467xaa"))
5257 (patches (search-patches "plink-1.07-unclobber-i.patch"
5258 "plink-endian-detection.patch"))))
5259 (build-system gnu-build-system)
5260 (arguments
5261 '(#:tests? #f ;no "check" target
5262 #:make-flags (list (string-append "LIB_LAPACK="
5263 (assoc-ref %build-inputs "lapack")
5264 "/lib/liblapack.so")
5265 "WITH_LAPACK=1"
5266 "FORCE_DYNAMIC=1"
5267 ;; disable phoning home
5268 "WITH_WEBCHECK=")
5269 #:phases
5270 (modify-phases %standard-phases
5271 ;; no "configure" script
5272 (delete 'configure)
5273 (replace 'install
5274 (lambda* (#:key outputs #:allow-other-keys)
5275 (let ((bin (string-append (assoc-ref outputs "out")
5276 "/bin/")))
5277 (install-file "plink" bin)
5278 #t))))))
5279 (inputs
5280 `(("zlib" ,zlib)
5281 ("lapack" ,lapack)))
5282 (native-inputs
5283 `(("unzip" ,unzip)))
5284 (home-page "http://pngu.mgh.harvard.edu/~purcell/plink/")
5285 (synopsis "Whole genome association analysis toolset")
5286 (description
5287 "PLINK is a whole genome association analysis toolset, designed to
5288 perform a range of basic, large-scale analyses in a computationally efficient
5289 manner. The focus of PLINK is purely on analysis of genotype/phenotype data,
5290 so there is no support for steps prior to this (e.g. study design and
5291 planning, generating genotype or CNV calls from raw data). Through
5292 integration with gPLINK and Haploview, there is some support for the
5293 subsequent visualization, annotation and storage of results.")
5294 ;; Code is released under GPLv2, except for fisher.h, which is under
5295 ;; LGPLv2.1+
5296 (license (list license:gpl2 license:lgpl2.1+))))
5297
5298 (define-public plink-ng
5299 (package (inherit plink)
5300 (name "plink-ng")
5301 (version "1.90b4")
5302 (source
5303 (origin
5304 (method url-fetch)
5305 (uri (string-append "https://github.com/chrchang/plink-ng/archive/v"
5306 version ".tar.gz"))
5307 (file-name (string-append name "-" version ".tar.gz"))
5308 (sha256
5309 (base32 "09ixrds009aczjswxr2alcb774mksq5g0v78dgjjn1h4dky0kf9a"))))
5310 (build-system gnu-build-system)
5311 (arguments
5312 '(#:tests? #f ;no "check" target
5313 #:make-flags (list "BLASFLAGS=-llapack -lopenblas"
5314 "CFLAGS=-Wall -O2 -DDYNAMIC_ZLIB=1"
5315 "ZLIB=-lz"
5316 "-f" "Makefile.std")
5317 #:phases
5318 (modify-phases %standard-phases
5319 (add-after 'unpack 'chdir
5320 (lambda _ (chdir "1.9") #t))
5321 (delete 'configure) ; no "configure" script
5322 (replace 'install
5323 (lambda* (#:key outputs #:allow-other-keys)
5324 (let ((bin (string-append (assoc-ref outputs "out")
5325 "/bin/")))
5326 (install-file "plink" bin)
5327 #t))))))
5328 (inputs
5329 `(("zlib" ,zlib)
5330 ("lapack" ,lapack)
5331 ("openblas" ,openblas)))
5332 (home-page "https://www.cog-genomics.org/plink/")
5333 (license license:gpl3+)))
5334
5335 (define-public smithlab-cpp
5336 (let ((revision "1")
5337 (commit "728a097bec88c6f4b8528b685932049e660eff2e"))
5338 (package
5339 (name "smithlab-cpp")
5340 (version (string-append "0." revision "." (string-take commit 7)))
5341 (source (origin
5342 (method git-fetch)
5343 (uri (git-reference
5344 (url "https://github.com/smithlabcode/smithlab_cpp.git")
5345 (commit commit)))
5346 (file-name (string-append name "-" version "-checkout"))
5347 (sha256
5348 (base32
5349 "0d476lmj312xk77kr9fzrv7z1bv96yfyx0w7y62ycmnfbx32ll74"))))
5350 (build-system gnu-build-system)
5351 (arguments
5352 `(#:modules ((guix build gnu-build-system)
5353 (guix build utils)
5354 (srfi srfi-26))
5355 #:tests? #f ;no "check" target
5356 #:phases
5357 (modify-phases %standard-phases
5358 (add-after 'unpack 'use-samtools-headers
5359 (lambda _
5360 (substitute* '("SAM.cpp"
5361 "SAM.hpp")
5362 (("sam.h") "samtools/sam.h"))
5363 #t))
5364 (replace 'install
5365 (lambda* (#:key outputs #:allow-other-keys)
5366 (let* ((out (assoc-ref outputs "out"))
5367 (lib (string-append out "/lib"))
5368 (include (string-append out "/include/smithlab-cpp")))
5369 (mkdir-p lib)
5370 (mkdir-p include)
5371 (for-each (cut install-file <> lib)
5372 (find-files "." "\\.o$"))
5373 (for-each (cut install-file <> include)
5374 (find-files "." "\\.hpp$")))
5375 #t))
5376 (delete 'configure))))
5377 (inputs
5378 `(("samtools" ,samtools-0.1)
5379 ("zlib" ,zlib)))
5380 (home-page "https://github.com/smithlabcode/smithlab_cpp")
5381 (synopsis "C++ helper library for functions used in Smith lab projects")
5382 (description
5383 "Smithlab CPP is a C++ library that includes functions used in many of
5384 the Smith lab bioinformatics projects, such as a wrapper around Samtools data
5385 structures, classes for genomic regions, mapped sequencing reads, etc.")
5386 (license license:gpl3+))))
5387
5388 (define-public preseq
5389 (package
5390 (name "preseq")
5391 (version "2.0")
5392 (source (origin
5393 (method url-fetch)
5394 (uri (string-append "https://github.com/smithlabcode/"
5395 "preseq/archive/v" version ".tar.gz"))
5396 (file-name (string-append name "-" version ".tar.gz"))
5397 (sha256
5398 (base32 "08r684l50pnxjpvmhzjgqq56yv9rfw90k8vx0nsrnrzk8mf9hsdq"))
5399 (modules '((guix build utils)))
5400 (snippet '(begin
5401 ;; Remove bundled samtools.
5402 (delete-file-recursively "samtools")
5403 #t))))
5404 (build-system gnu-build-system)
5405 (arguments
5406 `(#:tests? #f ;no "check" target
5407 #:phases
5408 (modify-phases %standard-phases
5409 (delete 'configure))
5410 #:make-flags
5411 (list (string-append "PREFIX="
5412 (assoc-ref %outputs "out"))
5413 (string-append "LIBBAM="
5414 (assoc-ref %build-inputs "samtools")
5415 "/lib/libbam.a")
5416 (string-append "SMITHLAB_CPP="
5417 (assoc-ref %build-inputs "smithlab-cpp")
5418 "/lib")
5419 "PROGS=preseq"
5420 "INCLUDEDIRS=$(SMITHLAB_CPP)/../include/smithlab-cpp $(SAMTOOLS_DIR)")))
5421 (inputs
5422 `(("gsl" ,gsl)
5423 ("samtools" ,samtools-0.1)
5424 ("smithlab-cpp" ,smithlab-cpp)
5425 ("zlib" ,zlib)))
5426 (home-page "http://smithlabresearch.org/software/preseq/")
5427 (synopsis "Program for analyzing library complexity")
5428 (description
5429 "The preseq package is aimed at predicting and estimating the complexity
5430 of a genomic sequencing library, equivalent to predicting and estimating the
5431 number of redundant reads from a given sequencing depth and how many will be
5432 expected from additional sequencing using an initial sequencing experiment.
5433 The estimates can then be used to examine the utility of further sequencing,
5434 optimize the sequencing depth, or to screen multiple libraries to avoid low
5435 complexity samples.")
5436 (license license:gpl3+)))
5437
5438 (define-public python-screed
5439 (package
5440 (name "python-screed")
5441 (version "0.9")
5442 (source
5443 (origin
5444 (method url-fetch)
5445 (uri (pypi-uri "screed" version))
5446 (sha256
5447 (base32
5448 "18czszp9fkx3j6jr7y5kp6dfialscgddk05mw1zkhh2zhn0jd8i0"))))
5449 (build-system python-build-system)
5450 (arguments
5451 `(#:phases
5452 (modify-phases %standard-phases
5453 (replace 'check
5454 (lambda _
5455 (setenv "PYTHONPATH"
5456 (string-append (getenv "PYTHONPATH") ":."))
5457 (zero? (system* "nosetests" "--attr" "!known_failing")))))))
5458 (native-inputs
5459 `(("python-nose" ,python-nose)))
5460 (inputs
5461 `(("python-bz2file" ,python-bz2file)))
5462 (home-page "https://github.com/dib-lab/screed/")
5463 (synopsis "Short read sequence database utilities")
5464 (description "Screed parses FASTA and FASTQ files and generates databases.
5465 Values such as sequence name, sequence description, sequence quality and the
5466 sequence itself can be retrieved from these databases.")
5467 (license license:bsd-3)))
5468
5469 (define-public python2-screed
5470 (package-with-python2 python-screed))
5471
5472 (define-public sra-tools
5473 (package
5474 (name "sra-tools")
5475 (version "2.8.2-1")
5476 (source
5477 (origin
5478 (method url-fetch)
5479 (uri
5480 (string-append "https://github.com/ncbi/sra-tools/archive/"
5481 version ".tar.gz"))
5482 (file-name (string-append name "-" version ".tar.gz"))
5483 (sha256
5484 (base32
5485 "1camsijmvv2s45mb4iyf44ghl4gkd4rl0viphpcgl3ccchy32a0g"))))
5486 (build-system gnu-build-system)
5487 (arguments
5488 `(#:parallel-build? #f ; not supported
5489 #:tests? #f ; no "check" target
5490 #:make-flags
5491 (list (string-append "DEFAULT_CRT="
5492 (assoc-ref %build-inputs "ncbi-vdb")
5493 "/kfg/certs.kfg")
5494 (string-append "DEFAULT_KFG="
5495 (assoc-ref %build-inputs "ncbi-vdb")
5496 "/kfg/default.kfg")
5497 (string-append "VDB_LIBDIR="
5498 (assoc-ref %build-inputs "ncbi-vdb")
5499 ,(if (string-prefix? "x86_64"
5500 (or (%current-target-system)
5501 (%current-system)))
5502 "/lib64"
5503 "/lib32")))
5504 #:phases
5505 (modify-phases %standard-phases
5506 (add-before 'configure 'set-perl-search-path
5507 (lambda _
5508 ;; Work around "dotless @INC" build failure.
5509 (setenv "PERL5LIB"
5510 (string-append (getcwd) "/setup:"
5511 (getenv "PERL5LIB")))
5512 #t))
5513 (replace 'configure
5514 (lambda* (#:key inputs outputs #:allow-other-keys)
5515 ;; The build system expects a directory containing the sources and
5516 ;; raw build output of ncbi-vdb, including files that are not
5517 ;; installed. Since we are building against an installed version of
5518 ;; ncbi-vdb, the following modifications are needed.
5519 (substitute* "setup/konfigure.perl"
5520 ;; Make the configure script look for the "ilib" directory of
5521 ;; "ncbi-vdb" without first checking for the existence of a
5522 ;; matching library in its "lib" directory.
5523 (("^ my \\$f = File::Spec->catdir\\(\\$libdir, \\$lib\\);")
5524 "my $f = File::Spec->catdir($ilibdir, $ilib);")
5525 ;; Look for interface libraries in ncbi-vdb's "ilib" directory.
5526 (("my \\$ilibdir = File::Spec->catdir\\(\\$builddir, 'ilib'\\);")
5527 "my $ilibdir = File::Spec->catdir($dir, 'ilib');"))
5528
5529 ;; Dynamic linking
5530 (substitute* "tools/copycat/Makefile"
5531 (("smagic-static") "lmagic"))
5532
5533 ;; The 'configure' script doesn't recognize things like
5534 ;; '--enable-fast-install'.
5535 (zero? (system*
5536 "./configure"
5537 (string-append "--build-prefix=" (getcwd) "/build")
5538 (string-append "--prefix=" (assoc-ref outputs "out"))
5539 (string-append "--debug")
5540 (string-append "--with-fuse-prefix="
5541 (assoc-ref inputs "fuse"))
5542 (string-append "--with-magic-prefix="
5543 (assoc-ref inputs "libmagic"))
5544 ;; TODO: building with libxml2 fails with linker errors
5545 ;; (string-append "--with-xml2-prefix="
5546 ;; (assoc-ref inputs "libxml2"))
5547 (string-append "--with-ncbi-vdb-sources="
5548 (assoc-ref inputs "ncbi-vdb"))
5549 (string-append "--with-ncbi-vdb-build="
5550 (assoc-ref inputs "ncbi-vdb"))
5551 (string-append "--with-ngs-sdk-prefix="
5552 (assoc-ref inputs "ngs-sdk"))
5553 (string-append "--with-hdf5-prefix="
5554 (assoc-ref inputs "hdf5"))))))
5555 ;; This version of sra-tools fails to build with glibc because of a
5556 ;; naming conflict. glibc-2.25/include/bits/mathcalls.h already
5557 ;; contains a definition of "canonicalize", so we rename it.
5558 ;;
5559 ;; See upstream bug report:
5560 ;; https://github.com/ncbi/sra-tools/issues/67
5561 (add-after 'unpack 'patch-away-glibc-conflict
5562 (lambda _
5563 (substitute* "tools/bam-loader/bam.c"
5564 (("canonicalize\\(" line)
5565 (string-append "sra_tools_" line)))
5566 #t)))))
5567 (native-inputs `(("perl" ,perl)))
5568 (inputs
5569 `(("ngs-sdk" ,ngs-sdk)
5570 ("ncbi-vdb" ,ncbi-vdb)
5571 ("libmagic" ,file)
5572 ("fuse" ,fuse)
5573 ("hdf5" ,hdf5)
5574 ("zlib" ,zlib)))
5575 (home-page "http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software")
5576 (synopsis "Tools and libraries for reading and writing sequencing data")
5577 (description
5578 "The SRA Toolkit from NCBI is a collection of tools and libraries for
5579 reading of sequencing files from the Sequence Read Archive (SRA) database and
5580 writing files into the .sra format.")
5581 (license license:public-domain)))
5582
5583 (define-public seqan
5584 (package
5585 (name "seqan")
5586 (version "1.4.2")
5587 (source (origin
5588 (method url-fetch)
5589 (uri (string-append "http://packages.seqan.de/seqan-library/"
5590 "seqan-library-" version ".tar.bz2"))
5591 (sha256
5592 (base32
5593 "05s3wrrwn50f81aklfm65i4a749zag1vr8z03k21xm0pdxy47yvp"))))
5594 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
5595 ;; makes sense to split the outputs.
5596 (outputs '("out" "doc"))
5597 (build-system trivial-build-system)
5598 (arguments
5599 `(#:modules ((guix build utils))
5600 #:builder
5601 (begin
5602 (use-modules (guix build utils))
5603 (let ((tar (assoc-ref %build-inputs "tar"))
5604 (bzip (assoc-ref %build-inputs "bzip2"))
5605 (out (assoc-ref %outputs "out"))
5606 (doc (assoc-ref %outputs "doc")))
5607 (setenv "PATH" (string-append tar "/bin:" bzip "/bin"))
5608 (invoke "tar" "xvf" (assoc-ref %build-inputs "source"))
5609 (chdir (string-append "seqan-library-" ,version))
5610 (copy-recursively "include" (string-append out "/include"))
5611 (copy-recursively "share" (string-append doc "/share"))
5612 #t))))
5613 (native-inputs
5614 `(("source" ,source)
5615 ("tar" ,tar)
5616 ("bzip2" ,bzip2)))
5617 (home-page "http://www.seqan.de")
5618 (synopsis "Library for nucleotide sequence analysis")
5619 (description
5620 "SeqAn is a C++ library of efficient algorithms and data structures for
5621 the analysis of sequences with the focus on biological data. It contains
5622 algorithms and data structures for string representation and their
5623 manipulation, online and indexed string search, efficient I/O of
5624 bioinformatics file formats, sequence alignment, and more.")
5625 (license license:bsd-3)))
5626
5627 (define-public seqmagick
5628 (package
5629 (name "seqmagick")
5630 (version "0.7.0")
5631 (source
5632 (origin
5633 (method url-fetch)
5634 (uri (pypi-uri "seqmagick" version))
5635 (sha256
5636 (base32
5637 "12bfyp8nqi0hd36rmj450aygafp01qy3hkbvlwn3bk39pyjjkgg5"))))
5638 (build-system python-build-system)
5639 (inputs
5640 `(("python-biopython" ,python-biopython)))
5641 (native-inputs
5642 `(("python-nose" ,python-nose)))
5643 (home-page "https://github.com/fhcrc/seqmagick")
5644 (synopsis "Tools for converting and modifying sequence files")
5645 (description
5646 "Bioinformaticians often have to convert sequence files between formats
5647 and do little manipulations on them, and it's not worth writing scripts for
5648 that. Seqmagick is a utility to expose the file format conversion in
5649 BioPython in a convenient way. Instead of having a big mess of scripts, there
5650 is one that takes arguments.")
5651 (license license:gpl3)))
5652
5653 (define-public seqtk
5654 (package
5655 (name "seqtk")
5656 (version "1.2")
5657 (source (origin
5658 (method url-fetch)
5659 (uri (string-append
5660 "https://github.com/lh3/seqtk/archive/v"
5661 version ".tar.gz"))
5662 (file-name (string-append name "-" version ".tar.gz"))
5663 (sha256
5664 (base32
5665 "0ywdyzpmfiz2wp6ampbzqg4y8bj450nfgqarpamg045b8mk32lxx"))
5666 (modules '((guix build utils)))
5667 (snippet
5668 '(begin
5669 ;; Remove extraneous header files, as is done in the seqtk
5670 ;; master branch.
5671 (for-each (lambda (file) (delete-file file))
5672 (list "ksort.h" "kstring.h" "kvec.h"))
5673 #t))))
5674 (build-system gnu-build-system)
5675 (arguments
5676 `(#:phases
5677 (modify-phases %standard-phases
5678 (delete 'configure)
5679 (replace 'check
5680 ;; There are no tests, so we just run a sanity check.
5681 (lambda _ (zero? (system* "./seqtk" "seq"))))
5682 (replace 'install
5683 (lambda* (#:key outputs #:allow-other-keys)
5684 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
5685 (install-file "seqtk" bin)))))))
5686 (inputs
5687 `(("zlib" ,zlib)))
5688 (home-page "https://github.com/lh3/seqtk")
5689 (synopsis "Toolkit for processing biological sequences in FASTA/Q format")
5690 (description
5691 "Seqtk is a fast and lightweight tool for processing sequences in the
5692 FASTA or FASTQ format. It parses both FASTA and FASTQ files which can be
5693 optionally compressed by gzip.")
5694 (license license:expat)))
5695
5696 (define-public snap-aligner
5697 (package
5698 (name "snap-aligner")
5699 (version "1.0beta.18")
5700 (source (origin
5701 (method url-fetch)
5702 (uri (string-append
5703 "https://github.com/amplab/snap/archive/v"
5704 version ".tar.gz"))
5705 (file-name (string-append name "-" version ".tar.gz"))
5706 (sha256
5707 (base32
5708 "1vnsjwv007k1fl1q7d681kbwn6bc66cgw6h16hym6gvyy71qv2ly"))))
5709 (build-system gnu-build-system)
5710 (arguments
5711 '(#:phases
5712 (modify-phases %standard-phases
5713 (delete 'configure)
5714 (replace 'check (lambda _ (zero? (system* "./unit_tests"))))
5715 (replace 'install
5716 (lambda* (#:key outputs #:allow-other-keys)
5717 (let* ((out (assoc-ref outputs "out"))
5718 (bin (string-append out "/bin")))
5719 (install-file "snap-aligner" bin)
5720 (install-file "SNAPCommand" bin)
5721 #t))))))
5722 (native-inputs
5723 `(("zlib" ,zlib)))
5724 (home-page "http://snap.cs.berkeley.edu/")
5725 (synopsis "Short read DNA sequence aligner")
5726 (description
5727 "SNAP is a fast and accurate aligner for short DNA reads. It is
5728 optimized for modern read lengths of 100 bases or higher, and takes advantage
5729 of these reads to align data quickly through a hash-based indexing scheme.")
5730 ;; 32-bit systems are not supported by the unpatched code.
5731 ;; Following the bug reports https://github.com/amplab/snap/issues/68 and
5732 ;; https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=812378 we see that
5733 ;; systems without a lot of memory cannot make good use of this program.
5734 (supported-systems '("x86_64-linux"))
5735 (license license:asl2.0)))
5736
5737 (define-public sortmerna
5738 (package
5739 (name "sortmerna")
5740 (version "2.1b")
5741 (source
5742 (origin
5743 (method url-fetch)
5744 (uri (string-append
5745 "https://github.com/biocore/sortmerna/archive/"
5746 version ".tar.gz"))
5747 (file-name (string-append name "-" version ".tar.gz"))
5748 (sha256
5749 (base32
5750 "1ghaghvd82af9j5adavxh77g7hm247d1r69m3fbi6f1jdivj5ldk"))))
5751 (build-system gnu-build-system)
5752 (outputs '("out" ;for binaries
5753 "db")) ;for sequence databases
5754 (arguments
5755 `(#:phases
5756 (modify-phases %standard-phases
5757 (replace 'install
5758 (lambda* (#:key outputs #:allow-other-keys)
5759 (let* ((out (assoc-ref outputs "out"))
5760 (bin (string-append out "/bin"))
5761 (db (assoc-ref outputs "db"))
5762 (share
5763 (string-append db "/share/sortmerna/rRNA_databases")))
5764 (install-file "sortmerna" bin)
5765 (install-file "indexdb_rna" bin)
5766 (for-each (lambda (file)
5767 (install-file file share))
5768 (find-files "rRNA_databases" ".*fasta"))
5769 #t))))))
5770 (inputs
5771 `(("zlib" ,zlib)))
5772 (home-page "http://bioinfo.lifl.fr/RNA/sortmerna")
5773 (synopsis "Biological sequence analysis tool for NGS reads")
5774 (description
5775 "SortMeRNA is a biological sequence analysis tool for filtering, mapping
5776 and operational taxonomic unit (OTU) picking of next generation
5777 sequencing (NGS) reads. The core algorithm is based on approximate seeds and
5778 allows for fast and sensitive analyses of nucleotide sequences. The main
5779 application of SortMeRNA is filtering rRNA from metatranscriptomic data.")
5780 ;; The source includes x86 specific code
5781 (supported-systems '("x86_64-linux" "i686-linux"))
5782 (license license:lgpl3)))
5783
5784 (define-public star
5785 (package
5786 (name "star")
5787 (version "2.6.0a")
5788 (source (origin
5789 (method url-fetch)
5790 (uri (string-append "https://github.com/alexdobin/STAR/archive/"
5791 version ".tar.gz"))
5792 (file-name (string-append name "-" version ".tar.gz"))
5793 (sha256
5794 (base32
5795 "0yci4ymy4407kjh0lqp021nzccp6r31wgrkixkmix5p130cxvc56"))
5796 (modules '((guix build utils)))
5797 (snippet
5798 '(begin
5799 (substitute* "source/Makefile"
5800 (("/bin/rm") "rm"))
5801 ;; Remove pre-built binaries and bundled htslib sources.
5802 (delete-file-recursively "bin/MacOSX_x86_64")
5803 (delete-file-recursively "bin/Linux_x86_64")
5804 (delete-file-recursively "bin/Linux_x86_64_static")
5805 (delete-file-recursively "source/htslib")
5806 #t))))
5807 (build-system gnu-build-system)
5808 (arguments
5809 '(#:tests? #f ;no check target
5810 #:make-flags '("STAR")
5811 #:phases
5812 (modify-phases %standard-phases
5813 (add-after 'unpack 'enter-source-dir
5814 (lambda _ (chdir "source") #t))
5815 (add-after 'enter-source-dir 'make-reproducible
5816 (lambda _
5817 (substitute* "Makefile"
5818 (("(COMPILATION_TIME_PLACE=\")(.*)(\")" _ pre mid post)
5819 (string-append pre "Built with Guix" post)))))
5820 (add-after 'enter-source-dir 'do-not-use-bundled-htslib
5821 (lambda _
5822 (substitute* "Makefile"
5823 (("(Depend.list: \\$\\(SOURCES\\) parametersDefault\\.xxd) htslib"
5824 _ prefix) prefix))
5825 (substitute* '("BAMfunctions.cpp"
5826 "signalFromBAM.h"
5827 "bam_cat.h"
5828 "bam_cat.c"
5829 "STAR.cpp"
5830 "bamRemoveDuplicates.cpp")
5831 (("#include \"htslib/([^\"]+\\.h)\"" _ header)
5832 (string-append "#include <" header ">")))
5833 (substitute* "IncludeDefine.h"
5834 (("\"htslib/(htslib/[^\"]+.h)\"" _ header)
5835 (string-append "<" header ">")))
5836 #t))
5837 (replace 'install
5838 (lambda* (#:key outputs #:allow-other-keys)
5839 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
5840 (install-file "STAR" bin))
5841 #t))
5842 (delete 'configure))))
5843 (native-inputs
5844 `(("xxd" ,xxd)))
5845 (inputs
5846 `(("htslib" ,htslib)
5847 ("zlib" ,zlib)))
5848 (home-page "https://github.com/alexdobin/STAR")
5849 (synopsis "Universal RNA-seq aligner")
5850 (description
5851 "The Spliced Transcripts Alignment to a Reference (STAR) software is
5852 based on a previously undescribed RNA-seq alignment algorithm that uses
5853 sequential maximum mappable seed search in uncompressed suffix arrays followed
5854 by seed clustering and stitching procedure. In addition to unbiased de novo
5855 detection of canonical junctions, STAR can discover non-canonical splices and
5856 chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
5857 sequences.")
5858 ;; Only 64-bit systems are supported according to the README.
5859 (supported-systems '("x86_64-linux" "mips64el-linux"))
5860 ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
5861 (license license:gpl3+)))
5862
5863 (define-public subread
5864 (package
5865 (name "subread")
5866 (version "1.6.0")
5867 (source (origin
5868 (method url-fetch)
5869 (uri (string-append "mirror://sourceforge/subread/subread-"
5870 version "/subread-" version "-source.tar.gz"))
5871 (sha256
5872 (base32
5873 "0ah0n4jx6ksk2m2j7xk385x2qzmk1y4rfc6a4mfrdqrlq721w99i"))))
5874 (build-system gnu-build-system)
5875 (arguments
5876 `(#:tests? #f ;no "check" target
5877 ;; The CC and CCFLAGS variables are set to contain a lot of x86_64
5878 ;; optimizations by default, so we override these flags such that x86_64
5879 ;; flags are only added when the build target is an x86_64 system.
5880 #:make-flags
5881 (list (let ((system ,(or (%current-target-system)
5882 (%current-system)))
5883 (flags '("-ggdb" "-fomit-frame-pointer"
5884 "-ffast-math" "-funroll-loops"
5885 "-fmessage-length=0"
5886 "-O9" "-Wall" "-DMAKE_FOR_EXON"
5887 "-DMAKE_STANDALONE"
5888 "-DSUBREAD_VERSION=\\\"${SUBREAD_VERSION}\\\""))
5889 (flags64 '("-mmmx" "-msse" "-msse2" "-msse3")))
5890 (if (string-prefix? "x86_64" system)
5891 (string-append "CCFLAGS=" (string-join (append flags flags64)))
5892 (string-append "CCFLAGS=" (string-join flags))))
5893 "-f" "Makefile.Linux"
5894 "CC=gcc ${CCFLAGS}")
5895 #:phases
5896 (modify-phases %standard-phases
5897 (add-after 'unpack 'enter-dir
5898 (lambda _ (chdir "src") #t))
5899 (replace 'install
5900 (lambda* (#:key outputs #:allow-other-keys)
5901 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
5902 (mkdir-p bin)
5903 (copy-recursively "../bin" bin))))
5904 ;; no "configure" script
5905 (delete 'configure))))
5906 (inputs `(("zlib" ,zlib)))
5907 (home-page "http://bioinf.wehi.edu.au/subread-package/")
5908 (synopsis "Tool kit for processing next-gen sequencing data")
5909 (description
5910 "The subread package contains the following tools: subread aligner, a
5911 general-purpose read aligner; subjunc aligner: detecting exon-exon junctions
5912 and mapping RNA-seq reads; featureCounts: counting mapped reads for genomic
5913 features; exactSNP: a SNP caller that discovers SNPs by testing signals
5914 against local background noises.")
5915 (license license:gpl3+)))
5916
5917 (define-public stringtie
5918 (package
5919 (name "stringtie")
5920 (version "1.2.1")
5921 (source (origin
5922 (method url-fetch)
5923 (uri (string-append "http://ccb.jhu.edu/software/stringtie/dl/"
5924 "stringtie-" version ".tar.gz"))
5925 (sha256
5926 (base32
5927 "1cqllsc1maq4kh92isi8yadgzbmnf042hlnalpk3y59aph1z3bfz"))
5928 (modules '((guix build utils)))
5929 (snippet
5930 '(begin
5931 (delete-file-recursively "samtools-0.1.18")
5932 #t))))
5933 (build-system gnu-build-system)
5934 (arguments
5935 `(#:tests? #f ;no test suite
5936 #:phases
5937 (modify-phases %standard-phases
5938 ;; no configure script
5939 (delete 'configure)
5940 (add-before 'build 'use-system-samtools
5941 (lambda _
5942 (substitute* "Makefile"
5943 (("stringtie: \\$\\{BAM\\}/libbam\\.a")
5944 "stringtie: "))
5945 (substitute* '("gclib/GBam.h"
5946 "gclib/GBam.cpp")
5947 (("#include \"(bam|sam|kstring).h\"" _ header)
5948 (string-append "#include <samtools/" header ".h>")))
5949 #t))
5950 (add-after 'unpack 'remove-duplicate-typedef
5951 (lambda _
5952 ;; This typedef conflicts with the typedef in
5953 ;; glibc-2.25/include/bits/types.h
5954 (substitute* "gclib/GThreads.h"
5955 (("typedef long long __intmax_t;") ""))
5956 #t))
5957 (replace 'install
5958 (lambda* (#:key outputs #:allow-other-keys)
5959 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
5960 (install-file "stringtie" bin)
5961 #t))))))
5962 (inputs
5963 `(("samtools" ,samtools-0.1)
5964 ("zlib" ,zlib)))
5965 (home-page "http://ccb.jhu.edu/software/stringtie/")
5966 (synopsis "Transcript assembly and quantification for RNA-Seq data")
5967 (description
5968 "StringTie is a fast and efficient assembler of RNA-Seq sequence
5969 alignments into potential transcripts. It uses a novel network flow algorithm
5970 as well as an optional de novo assembly step to assemble and quantitate
5971 full-length transcripts representing multiple splice variants for each gene
5972 locus. Its input can include not only the alignments of raw reads used by
5973 other transcript assemblers, but also alignments of longer sequences that have
5974 been assembled from those reads. To identify differentially expressed genes
5975 between experiments, StringTie's output can be processed either by the
5976 Cuffdiff or Ballgown programs.")
5977 (license license:artistic2.0)))
5978
5979 (define-public taxtastic
5980 (package
5981 (name "taxtastic")
5982 (version "0.8.5")
5983 (source (origin
5984 (method url-fetch)
5985 (uri (pypi-uri "taxtastic" version))
5986 (sha256
5987 (base32
5988 "03pysw79lsrvz4lwzis88j15067ffqbi4cid5pqhrlxmd6bh8rrk"))))
5989 (build-system python-build-system)
5990 (arguments
5991 `(#:python ,python-2
5992 #:phases
5993 (modify-phases %standard-phases
5994 (replace 'check
5995 (lambda _
5996 (zero? (system* "python" "-m" "unittest" "discover" "-v")))))))
5997 (propagated-inputs
5998 `(("python-sqlalchemy" ,python2-sqlalchemy)
5999 ("python-decorator" ,python2-decorator)
6000 ("python-biopython" ,python2-biopython)
6001 ("python-pandas" ,python2-pandas)))
6002 (home-page "https://github.com/fhcrc/taxtastic")
6003 (synopsis "Tools for taxonomic naming and annotation")
6004 (description
6005 "Taxtastic is software written in python used to build and maintain
6006 reference packages i.e. collections of reference trees, reference alignments,
6007 profiles, and associated taxonomic information.")
6008 (license license:gpl3+)))
6009
6010 (define-public vcftools
6011 (package
6012 (name "vcftools")
6013 (version "0.1.15")
6014 (source (origin
6015 (method url-fetch)
6016 (uri (string-append
6017 "https://github.com/vcftools/vcftools/releases/download/v"
6018 version "/vcftools-" version ".tar.gz"))
6019 (sha256
6020 (base32
6021 "1qw30c45wihgy632rbz4rh3njnwj4msj46l1rsgdhyg6bgypmr1i"))))
6022 (build-system gnu-build-system)
6023 (arguments
6024 `(#:tests? #f ; no "check" target
6025 #:make-flags (list
6026 "CFLAGS=-O2" ; override "-m64" flag
6027 (string-append "PREFIX=" (assoc-ref %outputs "out"))
6028 (string-append "MANDIR=" (assoc-ref %outputs "out")
6029 "/share/man/man1"))))
6030 (native-inputs
6031 `(("pkg-config" ,pkg-config)))
6032 (inputs
6033 `(("perl" ,perl)
6034 ("zlib" ,zlib)))
6035 (home-page "https://vcftools.github.io/")
6036 (synopsis "Tools for working with VCF files")
6037 (description
6038 "VCFtools is a program package designed for working with VCF files, such
6039 as those generated by the 1000 Genomes Project. The aim of VCFtools is to
6040 provide easily accessible methods for working with complex genetic variation
6041 data in the form of VCF files.")
6042 ;; The license is declared as LGPLv3 in the README and
6043 ;; at https://vcftools.github.io/license.html
6044 (license license:lgpl3)))
6045
6046 (define-public infernal
6047 (package
6048 (name "infernal")
6049 (version "1.1.2")
6050 (source (origin
6051 (method url-fetch)
6052 (uri (string-append "http://eddylab.org/software/infernal/"
6053 "infernal-" version ".tar.gz"))
6054 (sha256
6055 (base32
6056 "0sr2hiz3qxfwqpz3whxr6n82p3x27336v3f34iqznp10hks2935c"))))
6057 (build-system gnu-build-system)
6058 (native-inputs
6059 `(("perl" ,perl))) ; for tests
6060 (home-page "http://eddylab.org/infernal/")
6061 (synopsis "Inference of RNA alignments")
6062 (description "Infernal (\"INFERence of RNA ALignment\") is a tool for
6063 searching DNA sequence databases for RNA structure and sequence similarities.
6064 It is an implementation of a special case of profile stochastic context-free
6065 grammars called @dfn{covariance models} (CMs). A CM is like a sequence
6066 profile, but it scores a combination of sequence consensus and RNA secondary
6067 structure consensus, so in many cases, it is more capable of identifying RNA
6068 homologs that conserve their secondary structure more than their primary
6069 sequence.")
6070 ;; Infernal 1.1.2 requires VMX or SSE capability for parallel instructions.
6071 (supported-systems '("i686-linux" "x86_64-linux"))
6072 (license license:bsd-3)))
6073
6074 (define-public r-centipede
6075 (package
6076 (name "r-centipede")
6077 (version "1.2")
6078 (source (origin
6079 (method url-fetch)
6080 (uri (string-append "http://download.r-forge.r-project.org/"
6081 "src/contrib/CENTIPEDE_" version ".tar.gz"))
6082 (sha256
6083 (base32
6084 "1hsx6qgwr0i67fhy9257zj7s0ppncph2hjgbia5nn6nfmj0ax6l9"))))
6085 (build-system r-build-system)
6086 (home-page "http://centipede.uchicago.edu/")
6087 (synopsis "Predict transcription factor binding sites")
6088 (description
6089 "CENTIPEDE applies a hierarchical Bayesian mixture model to infer regions
6090 of the genome that are bound by particular transcription factors. It starts
6091 by identifying a set of candidate binding sites, and then aims to classify the
6092 sites according to whether each site is bound or not bound by a transcription
6093 factor. CENTIPEDE is an unsupervised learning algorithm that discriminates
6094 between two different types of motif instances using as much relevant
6095 information as possible.")
6096 (license (list license:gpl2+ license:gpl3+))))
6097
6098 (define-public r-vegan
6099 (package
6100 (name "r-vegan")
6101 (version "2.5-1")
6102 (source
6103 (origin
6104 (method url-fetch)
6105 (uri (cran-uri "vegan" version))
6106 (sha256
6107 (base32
6108 "0pynr02d1xngda6c3va8fc4nxpgfkawhzcnz1ws4dnarp9b1w90r"))))
6109 (build-system r-build-system)
6110 (native-inputs
6111 `(("gfortran" ,gfortran)))
6112 (propagated-inputs
6113 `(("r-cluster" ,r-cluster)
6114 ("r-knitr" ,r-knitr)
6115 ("r-lattice" ,r-lattice)
6116 ("r-mass" ,r-mass)
6117 ("r-mgcv" ,r-mgcv)
6118 ("r-permute" ,r-permute)))
6119 (home-page "https://cran.r-project.org/web/packages/vegan")
6120 (synopsis "Functions for community ecology")
6121 (description
6122 "The vegan package provides tools for descriptive community ecology. It
6123 has most basic functions of diversity analysis, community ordination and
6124 dissimilarity analysis. Most of its multivariate tools can be used for other
6125 data types as well.")
6126 (license license:gpl2+)))
6127
6128 (define-public r-annotate
6129 (package
6130 (name "r-annotate")
6131 (version "1.56.2")
6132 (source
6133 (origin
6134 (method url-fetch)
6135 (uri (bioconductor-uri "annotate" version))
6136 (sha256
6137 (base32
6138 "0ybg9k1s289h15nj1kp9821i1rsk1gkn8i8blplmk7gsgpbw1f42"))))
6139 (build-system r-build-system)
6140 (propagated-inputs
6141 `(("r-annotationdbi" ,r-annotationdbi)
6142 ("r-biobase" ,r-biobase)
6143 ("r-biocgenerics" ,r-biocgenerics)
6144 ("r-dbi" ,r-dbi)
6145 ("r-rcurl" ,r-rcurl)
6146 ("r-xml" ,r-xml)
6147 ("r-xtable" ,r-xtable)))
6148 (home-page
6149 "https://bioconductor.org/packages/annotate")
6150 (synopsis "Annotation for microarrays")
6151 (description "This package provides R environments for the annotation of
6152 microarrays.")
6153 (license license:artistic2.0)))
6154
6155 (define-public r-copynumber
6156 (package
6157 (name "r-copynumber")
6158 (version "1.18.0")
6159 (source (origin
6160 (method url-fetch)
6161 (uri (bioconductor-uri "copynumber" version))
6162 (sha256
6163 (base32
6164 "01kcwzl485yjrkgyg8117b1il957ss0v6rq4bbxf4ksd5fzcjmyx"))))
6165 (build-system r-build-system)
6166 (propagated-inputs
6167 `(("r-s4vectors" ,r-s4vectors)
6168 ("r-iranges" ,r-iranges)
6169 ("r-genomicranges" ,r-genomicranges)
6170 ("r-biocgenerics" ,r-biocgenerics)))
6171 (home-page "https://bioconductor.org/packages/copynumber")
6172 (synopsis "Segmentation of single- and multi-track copy number data")
6173 (description
6174 "This package segments single- and multi-track copy number data by a
6175 penalized least squares regression method.")
6176 (license license:artistic2.0)))
6177
6178 (define-public r-geneplotter
6179 (package
6180 (name "r-geneplotter")
6181 (version "1.56.0")
6182 (source
6183 (origin
6184 (method url-fetch)
6185 (uri (bioconductor-uri "geneplotter" version))
6186 (sha256
6187 (base32
6188 "1z3g7frc1iviwrsv2dlm4nqvkc0685h4va0388yfxn102ln8wwma"))))
6189 (build-system r-build-system)
6190 (propagated-inputs
6191 `(("r-annotate" ,r-annotate)
6192 ("r-annotationdbi" ,r-annotationdbi)
6193 ("r-biobase" ,r-biobase)
6194 ("r-biocgenerics" ,r-biocgenerics)
6195 ("r-lattice" ,r-lattice)
6196 ("r-rcolorbrewer" ,r-rcolorbrewer)))
6197 (home-page "https://bioconductor.org/packages/geneplotter")
6198 (synopsis "Graphics functions for genomic data")
6199 (description
6200 "This package provides functions for plotting genomic data.")
6201 (license license:artistic2.0)))
6202
6203 (define-public r-genefilter
6204 (package
6205 (name "r-genefilter")
6206 (version "1.60.0")
6207 (source
6208 (origin
6209 (method url-fetch)
6210 (uri (bioconductor-uri "genefilter" version))
6211 (sha256
6212 (base32
6213 "173swlg6gj4kdllbqvyiw5dggbcxiwlwpqmllsv4dxzn7h25i3g7"))))
6214 (build-system r-build-system)
6215 (native-inputs
6216 `(("gfortran" ,gfortran)))
6217 (propagated-inputs
6218 `(("r-annotate" ,r-annotate)
6219 ("r-annotationdbi" ,r-annotationdbi)
6220 ("r-biobase" ,r-biobase)
6221 ("r-s4vectors" ,r-s4vectors)
6222 ("r-survival" ,r-survival)))
6223 (home-page "https://bioconductor.org/packages/genefilter")
6224 (synopsis "Filter genes from high-throughput experiments")
6225 (description
6226 "This package provides basic functions for filtering genes from
6227 high-throughput sequencing experiments.")
6228 (license license:artistic2.0)))
6229
6230 (define-public r-deseq2
6231 (package
6232 (name "r-deseq2")
6233 (version "1.18.1")
6234 (source
6235 (origin
6236 (method url-fetch)
6237 (uri (bioconductor-uri "DESeq2" version))
6238 (sha256
6239 (base32
6240 "1iyimg1s0x5pdmvl8x08s8h0v019y0nhjzs50chagbpk2x91fsmv"))))
6241 (properties `((upstream-name . "DESeq2")))
6242 (build-system r-build-system)
6243 (propagated-inputs
6244 `(("r-biobase" ,r-biobase)
6245 ("r-biocgenerics" ,r-biocgenerics)
6246 ("r-biocparallel" ,r-biocparallel)
6247 ("r-genefilter" ,r-genefilter)
6248 ("r-geneplotter" ,r-geneplotter)
6249 ("r-genomicranges" ,r-genomicranges)
6250 ("r-ggplot2" ,r-ggplot2)
6251 ("r-hmisc" ,r-hmisc)
6252 ("r-iranges" ,r-iranges)
6253 ("r-locfit" ,r-locfit)
6254 ("r-rcpp" ,r-rcpp)
6255 ("r-rcpparmadillo" ,r-rcpparmadillo)
6256 ("r-s4vectors" ,r-s4vectors)
6257 ("r-summarizedexperiment" ,r-summarizedexperiment)))
6258 (home-page "https://bioconductor.org/packages/DESeq2")
6259 (synopsis "Differential gene expression analysis")
6260 (description
6261 "This package provides functions to estimate variance-mean dependence in
6262 count data from high-throughput nucleotide sequencing assays and test for
6263 differential expression based on a model using the negative binomial
6264 distribution.")
6265 (license license:lgpl3+)))
6266
6267 (define-public r-dexseq
6268 (package
6269 (name "r-dexseq")
6270 (version "1.24.4")
6271 (source
6272 (origin
6273 (method url-fetch)
6274 (uri (bioconductor-uri "DEXSeq" version))
6275 (sha256
6276 (base32
6277 "1a80yv742fx5c7qav7imsdybphf0d5bixsqyf8w5zng7fk8j16d5"))))
6278 (properties `((upstream-name . "DEXSeq")))
6279 (build-system r-build-system)
6280 (propagated-inputs
6281 `(("r-annotationdbi" ,r-annotationdbi)
6282 ("r-biobase" ,r-biobase)
6283 ("r-biocgenerics" ,r-biocgenerics)
6284 ("r-biocparallel" ,r-biocparallel)
6285 ("r-biomart" ,r-biomart)
6286 ("r-deseq2" ,r-deseq2)
6287 ("r-genefilter" ,r-genefilter)
6288 ("r-geneplotter" ,r-geneplotter)
6289 ("r-genomicranges" ,r-genomicranges)
6290 ("r-hwriter" ,r-hwriter)
6291 ("r-iranges" ,r-iranges)
6292 ("r-rcolorbrewer" ,r-rcolorbrewer)
6293 ("r-rsamtools" ,r-rsamtools)
6294 ("r-s4vectors" ,r-s4vectors)
6295 ("r-statmod" ,r-statmod)
6296 ("r-stringr" ,r-stringr)
6297 ("r-summarizedexperiment" ,r-summarizedexperiment)))
6298 (home-page "https://bioconductor.org/packages/DEXSeq")
6299 (synopsis "Inference of differential exon usage in RNA-Seq")
6300 (description
6301 "This package is focused on finding differential exon usage using RNA-seq
6302 exon counts between samples with different experimental designs. It provides
6303 functions that allows the user to make the necessary statistical tests based
6304 on a model that uses the negative binomial distribution to estimate the
6305 variance between biological replicates and generalized linear models for
6306 testing. The package also provides functions for the visualization and
6307 exploration of the results.")
6308 (license license:gpl3+)))
6309
6310 (define-public r-annotationforge
6311 (package
6312 (name "r-annotationforge")
6313 (version "1.20.0")
6314 (source
6315 (origin
6316 (method url-fetch)
6317 (uri (bioconductor-uri "AnnotationForge" version))
6318 (sha256
6319 (base32
6320 "01vbrf76vqfvxh6vpfxkjwccxggnha3byqzj333glqz2b6kwx5q1"))))
6321 (properties
6322 `((upstream-name . "AnnotationForge")))
6323 (build-system r-build-system)
6324 (propagated-inputs
6325 `(("r-annotationdbi" ,r-annotationdbi)
6326 ("r-biobase" ,r-biobase)
6327 ("r-biocgenerics" ,r-biocgenerics)
6328 ("r-dbi" ,r-dbi)
6329 ("r-rcurl" ,r-rcurl)
6330 ("r-rsqlite" ,r-rsqlite)
6331 ("r-s4vectors" ,r-s4vectors)
6332 ("r-xml" ,r-xml)))
6333 (home-page "https://bioconductor.org/packages/AnnotationForge")
6334 (synopsis "Code for building annotation database packages")
6335 (description
6336 "This package provides code for generating Annotation packages and their
6337 databases. Packages produced are intended to be used with AnnotationDbi.")
6338 (license license:artistic2.0)))
6339
6340 (define-public r-rbgl
6341 (package
6342 (name "r-rbgl")
6343 (version "1.54.0")
6344 (source
6345 (origin
6346 (method url-fetch)
6347 (uri (bioconductor-uri "RBGL" version))
6348 (sha256
6349 (base32
6350 "18jad23i3899ypv4bg3l47cvvs3qnj1pqis2p9x0135yv5y6wnv7"))))
6351 (properties `((upstream-name . "RBGL")))
6352 (build-system r-build-system)
6353 (propagated-inputs `(("r-graph" ,r-graph)))
6354 (home-page "https://www.bioconductor.org/packages/RBGL")
6355 (synopsis "Interface to the Boost graph library")
6356 (description
6357 "This package provides a fairly extensive and comprehensive interface to
6358 the graph algorithms contained in the Boost library.")
6359 (license license:artistic2.0)))
6360
6361 (define-public r-gseabase
6362 (package
6363 (name "r-gseabase")
6364 (version "1.40.1")
6365 (source
6366 (origin
6367 (method url-fetch)
6368 (uri (bioconductor-uri "GSEABase" version))
6369 (sha256
6370 (base32
6371 "10cmjxahg2plwacfan6g0k8cwyzya96ypc7m1r79gwqkyykxw5fz"))))
6372 (properties `((upstream-name . "GSEABase")))
6373 (build-system r-build-system)
6374 (propagated-inputs
6375 `(("r-annotate" ,r-annotate)
6376 ("r-annotationdbi" ,r-annotationdbi)
6377 ("r-biobase" ,r-biobase)
6378 ("r-biocgenerics" ,r-biocgenerics)
6379 ("r-graph" ,r-graph)
6380 ("r-xml" ,r-xml)))
6381 (home-page "https://bioconductor.org/packages/GSEABase")
6382 (synopsis "Gene set enrichment data structures and methods")
6383 (description
6384 "This package provides classes and methods to support @dfn{Gene Set
6385 Enrichment Analysis} (GSEA).")
6386 (license license:artistic2.0)))
6387
6388 (define-public r-category
6389 (package
6390 (name "r-category")
6391 (version "2.44.0")
6392 (source
6393 (origin
6394 (method url-fetch)
6395 (uri (bioconductor-uri "Category" version))
6396 (sha256
6397 (base32
6398 "0mkav04vbla0xfa0dssxdd0rjs589sxi83xklf5iq5hj3dm8y0i8"))))
6399 (properties `((upstream-name . "Category")))
6400 (build-system r-build-system)
6401 (propagated-inputs
6402 `(("r-annotate" ,r-annotate)
6403 ("r-annotationdbi" ,r-annotationdbi)
6404 ("r-biobase" ,r-biobase)
6405 ("r-biocgenerics" ,r-biocgenerics)
6406 ("r-genefilter" ,r-genefilter)
6407 ("r-graph" ,r-graph)
6408 ("r-gseabase" ,r-gseabase)
6409 ("r-matrix" ,r-matrix)
6410 ("r-rbgl" ,r-rbgl)
6411 ("r-dbi" ,r-dbi)))
6412 (home-page "https://bioconductor.org/packages/Category")
6413 (synopsis "Category analysis")
6414 (description
6415 "This package provides a collection of tools for performing category
6416 analysis.")
6417 (license license:artistic2.0)))
6418
6419 (define-public r-gostats
6420 (package
6421 (name "r-gostats")
6422 (version "2.44.0")
6423 (source
6424 (origin
6425 (method url-fetch)
6426 (uri (bioconductor-uri "GOstats" version))
6427 (sha256
6428 (base32
6429 "04gqfdlx9fxf97qf0l28x4aaqvl10n6v58qiz5fiaw05sbj1pf1i"))))
6430 (properties `((upstream-name . "GOstats")))
6431 (build-system r-build-system)
6432 (propagated-inputs
6433 `(("r-annotate" ,r-annotate)
6434 ("r-annotationdbi" ,r-annotationdbi)
6435 ("r-annotationforge" ,r-annotationforge)
6436 ("r-biobase" ,r-biobase)
6437 ("r-category" ,r-category)
6438 ("r-go-db" ,r-go-db)
6439 ("r-graph" ,r-graph)
6440 ("r-rgraphviz" ,r-rgraphviz)
6441 ("r-rbgl" ,r-rbgl)))
6442 (home-page "https://bioconductor.org/packages/GOstats")
6443 (synopsis "Tools for manipulating GO and microarrays")
6444 (description
6445 "This package provides a set of tools for interacting with GO and
6446 microarray data. A variety of basic manipulation tools for graphs, hypothesis
6447 testing and other simple calculations.")
6448 (license license:artistic2.0)))
6449
6450 (define-public r-shortread
6451 (package
6452 (name "r-shortread")
6453 (version "1.36.1")
6454 (source
6455 (origin
6456 (method url-fetch)
6457 (uri (bioconductor-uri "ShortRead" version))
6458 (sha256
6459 (base32
6460 "1cyv47632m9ljkxfsvnvmd19sb607ys5kz8fwh6v39dnw16g0a6m"))))
6461 (properties `((upstream-name . "ShortRead")))
6462 (build-system r-build-system)
6463 (inputs
6464 `(("zlib" ,zlib)))
6465 (propagated-inputs
6466 `(("r-biobase" ,r-biobase)
6467 ("r-biocgenerics" ,r-biocgenerics)
6468 ("r-biocparallel" ,r-biocparallel)
6469 ("r-biostrings" ,r-biostrings)
6470 ("r-genomeinfodb" ,r-genomeinfodb)
6471 ("r-genomicalignments" ,r-genomicalignments)
6472 ("r-genomicranges" ,r-genomicranges)
6473 ("r-hwriter" ,r-hwriter)
6474 ("r-iranges" ,r-iranges)
6475 ("r-lattice" ,r-lattice)
6476 ("r-latticeextra" ,r-latticeextra)
6477 ("r-rsamtools" ,r-rsamtools)
6478 ("r-s4vectors" ,r-s4vectors)
6479 ("r-xvector" ,r-xvector)
6480 ("r-zlibbioc" ,r-zlibbioc)))
6481 (home-page "https://bioconductor.org/packages/ShortRead")
6482 (synopsis "FASTQ input and manipulation tools")
6483 (description
6484 "This package implements sampling, iteration, and input of FASTQ files.
6485 It includes functions for filtering and trimming reads, and for generating a
6486 quality assessment report. Data are represented as
6487 @code{DNAStringSet}-derived objects, and easily manipulated for a diversity of
6488 purposes. The package also contains legacy support for early single-end,
6489 ungapped alignment formats.")
6490 (license license:artistic2.0)))
6491
6492 (define-public r-systempiper
6493 (package
6494 (name "r-systempiper")
6495 (version "1.12.0")
6496 (source
6497 (origin
6498 (method url-fetch)
6499 (uri (bioconductor-uri "systemPipeR" version))
6500 (sha256
6501 (base32
6502 "11mj8pjq5vj25768vmagpzv74fvi3p3kdk5zdlznqyiaggri04cv"))))
6503 (properties `((upstream-name . "systemPipeR")))
6504 (build-system r-build-system)
6505 (propagated-inputs
6506 `(("r-annotate" ,r-annotate)
6507 ("r-batchjobs" ,r-batchjobs)
6508 ("r-biocgenerics" ,r-biocgenerics)
6509 ("r-biostrings" ,r-biostrings)
6510 ("r-deseq2" ,r-deseq2)
6511 ("r-edger" ,r-edger)
6512 ("r-genomicfeatures" ,r-genomicfeatures)
6513 ("r-genomicranges" ,r-genomicranges)
6514 ("r-ggplot2" ,r-ggplot2)
6515 ("r-go-db" ,r-go-db)
6516 ("r-gostats" ,r-gostats)
6517 ("r-limma" ,r-limma)
6518 ("r-pheatmap" ,r-pheatmap)
6519 ("r-rjson" ,r-rjson)
6520 ("r-rsamtools" ,r-rsamtools)
6521 ("r-shortread" ,r-shortread)
6522 ("r-summarizedexperiment" ,r-summarizedexperiment)
6523 ("r-variantannotation" ,r-variantannotation)))
6524 (home-page "https://github.com/tgirke/systemPipeR")
6525 (synopsis "Next generation sequencing workflow and reporting environment")
6526 (description
6527 "This R package provides tools for building and running automated
6528 end-to-end analysis workflows for a wide range of @dfn{next generation
6529 sequence} (NGS) applications such as RNA-Seq, ChIP-Seq, VAR-Seq and Ribo-Seq.
6530 Important features include a uniform workflow interface across different NGS
6531 applications, automated report generation, and support for running both R and
6532 command-line software, such as NGS aligners or peak/variant callers, on local
6533 computers or compute clusters. Efficient handling of complex sample sets and
6534 experimental designs is facilitated by a consistently implemented sample
6535 annotation infrastructure.")
6536 (license license:artistic2.0)))
6537
6538 (define-public r-grohmm
6539 (package
6540 (name "r-grohmm")
6541 (version "1.12.0")
6542 (source
6543 (origin
6544 (method url-fetch)
6545 (uri (bioconductor-uri "groHMM" version))
6546 (sha256
6547 (base32
6548 "0cjkj0ypyc4dfi9s8dh88kh6q4xlpnc0wal7njg4b4gqj0l2hva7"))))
6549 (properties `((upstream-name . "groHMM")))
6550 (build-system r-build-system)
6551 (propagated-inputs
6552 `(("r-genomeinfodb" ,r-genomeinfodb)
6553 ("r-genomicalignments" ,r-genomicalignments)
6554 ("r-genomicranges" ,r-genomicranges)
6555 ("r-iranges" ,r-iranges)
6556 ("r-mass" ,r-mass)
6557 ("r-rtracklayer" ,r-rtracklayer)
6558 ("r-s4vectors" ,r-s4vectors)))
6559 (home-page "https://github.com/Kraus-Lab/groHMM")
6560 (synopsis "GRO-seq analysis pipeline")
6561 (description
6562 "This package provides a pipeline for the analysis of GRO-seq data.")
6563 (license license:gpl3+)))
6564
6565 (define-public r-txdb-hsapiens-ucsc-hg19-knowngene
6566 (package
6567 (name "r-txdb-hsapiens-ucsc-hg19-knowngene")
6568 (version "3.2.2")
6569 (source (origin
6570 (method url-fetch)
6571 ;; We cannot use bioconductor-uri here because this tarball is
6572 ;; located under "data/annotation/" instead of "bioc/".
6573 (uri (string-append "https://bioconductor.org/packages/"
6574 "release/data/annotation/src/contrib"
6575 "/TxDb.Hsapiens.UCSC.hg19.knownGene_"
6576 version ".tar.gz"))
6577 (sha256
6578 (base32
6579 "1sajhcqqwazgz2lqbik7rd935i7kpnh08zxbp2ra10j72yqy4g86"))))
6580 (properties
6581 `((upstream-name . "TxDb.Hsapiens.UCSC.hg19.knownGene")))
6582 (build-system r-build-system)
6583 ;; As this package provides little more than a very large data file it
6584 ;; doesn't make sense to build substitutes.
6585 (arguments `(#:substitutable? #f))
6586 (propagated-inputs
6587 `(("r-genomicfeatures" ,r-genomicfeatures)))
6588 (home-page
6589 "https://bioconductor.org/packages/TxDb.Hsapiens.UCSC.hg19.knownGene/")
6590 (synopsis "Annotation package for human genome in TxDb format")
6591 (description
6592 "This package provides an annotation database of Homo sapiens genome
6593 data. It is derived from the UCSC hg19 genome and based on the \"knownGene\"
6594 track. The database is exposed as a @code{TxDb} object.")
6595 (license license:artistic2.0)))
6596
6597 (define-public r-sparql
6598 (package
6599 (name "r-sparql")
6600 (version "1.16")
6601 (source (origin
6602 (method url-fetch)
6603 (uri (cran-uri "SPARQL" version))
6604 (sha256
6605 (base32
6606 "0gak1q06yyhdmcxb2n3v0h9gr1vqd0viqji52wpw211qp6r6dcrc"))))
6607 (properties `((upstream-name . "SPARQL")))
6608 (build-system r-build-system)
6609 (propagated-inputs
6610 `(("r-rcurl" ,r-rcurl)
6611 ("r-xml" ,r-xml)))
6612 (home-page "https://cran.r-project.org/web/packages/SPARQL")
6613 (synopsis "SPARQL client for R")
6614 (description "This package provides an interface to use SPARQL to pose
6615 SELECT or UPDATE queries to an end-point.")
6616 ;; The only license indication is found in the DESCRIPTION file,
6617 ;; which states GPL-3. So we cannot assume GPLv3+.
6618 (license license:gpl3)))
6619
6620 (define-public vsearch
6621 (package
6622 (name "vsearch")
6623 (version "2.8.0")
6624 (source
6625 (origin
6626 (method url-fetch)
6627 (uri (string-append
6628 "https://github.com/torognes/vsearch/archive/v"
6629 version ".tar.gz"))
6630 (file-name (string-append name "-" version ".tar.gz"))
6631 (sha256
6632 (base32
6633 "15pbirgzhvflj4pi5n82vybbzjy9mlb0lv5l3qhrmdkfzpbyahw3"))
6634 (patches (search-patches "vsearch-unbundle-cityhash.patch"))
6635 (snippet
6636 '(begin
6637 ;; Remove bundled cityhash sources. The vsearch source is adjusted
6638 ;; for this in the patch.
6639 (delete-file "src/city.h")
6640 (delete-file "src/citycrc.h")
6641 (delete-file "src/city.cc")
6642 #t))))
6643 (build-system gnu-build-system)
6644 (arguments
6645 `(#:phases
6646 (modify-phases %standard-phases
6647 (add-after 'unpack 'autogen
6648 (lambda _ (zero? (system* "autoreconf" "-vif")))))))
6649 (inputs
6650 `(("zlib" ,zlib)
6651 ("bzip2" ,bzip2)
6652 ("cityhash" ,cityhash)))
6653 (native-inputs
6654 `(("autoconf" ,autoconf)
6655 ("automake" ,automake)))
6656 (synopsis "Sequence search tools for metagenomics")
6657 (description
6658 "VSEARCH supports DNA sequence searching, clustering, chimera detection,
6659 dereplication, pairwise alignment, shuffling, subsampling, sorting and
6660 masking. The tool takes advantage of parallelism in the form of SIMD
6661 vectorization as well as multiple threads to perform accurate alignments at
6662 high speed. VSEARCH uses an optimal global aligner (full dynamic programming
6663 Needleman-Wunsch).")
6664 (home-page "https://github.com/torognes/vsearch")
6665 ;; vsearch uses non-portable SSE intrinsics so building fails on other
6666 ;; platforms.
6667 (supported-systems '("x86_64-linux"))
6668 ;; Dual licensed; also includes public domain source.
6669 (license (list license:gpl3 license:bsd-2))))
6670
6671 (define-public pardre
6672 (package
6673 (name "pardre")
6674 ;; The source of 1.1.5 changed in place, so we append "-1" to the version.
6675 (version "1.1.5-1")
6676 (source
6677 (origin
6678 (method url-fetch)
6679 (uri (string-append "mirror://sourceforge/pardre/ParDRe-rel"
6680 "1.1.5" ".tar.gz"))
6681 (sha256
6682 (base32
6683 "17j73nc0viq4f6qj50nrndsrif5d6b71q8fl87m54psiv0ilns2b"))))
6684 (build-system gnu-build-system)
6685 (arguments
6686 `(#:tests? #f ; no tests included
6687 #:phases
6688 (modify-phases %standard-phases
6689 (delete 'configure)
6690 (replace 'install
6691 (lambda* (#:key outputs #:allow-other-keys)
6692 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
6693 (install-file "ParDRe" bin)
6694 #t))))))
6695 (inputs
6696 `(("openmpi" ,openmpi)
6697 ("zlib" ,zlib)))
6698 (synopsis "Parallel tool to remove duplicate DNA reads")
6699 (description
6700 "ParDRe is a parallel tool to remove duplicate genetic sequence reads.
6701 Duplicate reads can be seen as identical or nearly identical sequences with
6702 some mismatches. This tool lets users avoid the analysis of unnecessary
6703 reads, reducing the time of subsequent procedures with the
6704 dataset (e.g. assemblies, mappings, etc.). The tool is implemented with MPI
6705 in order to exploit the parallel capabilities of multicore clusters. It is
6706 faster than multithreaded counterparts (end of 2015) for the same number of
6707 cores and, thanks to the message-passing technology, it can be executed on
6708 clusters.")
6709 (home-page "https://sourceforge.net/projects/pardre/")
6710 (license license:gpl3+)))
6711
6712 (define-public ruby-bio-kseq
6713 (package
6714 (name "ruby-bio-kseq")
6715 (version "0.0.2")
6716 (source
6717 (origin
6718 (method url-fetch)
6719 (uri (rubygems-uri "bio-kseq" version))
6720 (sha256
6721 (base32
6722 "1xyaha46khb5jc6wzkbf7040jagac49jbimn0vcrzid0j8jdikrz"))))
6723 (build-system ruby-build-system)
6724 (arguments
6725 `(#:test-target "spec"))
6726 (native-inputs
6727 `(("bundler" ,bundler)
6728 ("ruby-rspec" ,ruby-rspec)
6729 ("ruby-rake-compiler" ,ruby-rake-compiler)))
6730 (inputs
6731 `(("zlib" ,zlib)))
6732 (synopsis "Ruby bindings for the kseq.h FASTA/Q parser")
6733 (description
6734 "@code{Bio::Kseq} provides ruby bindings to the @code{kseq.h} FASTA and
6735 FASTQ parsing code. It provides a fast iterator over sequences and their
6736 quality scores.")
6737 (home-page "https://github.com/gusevfe/bio-kseq")
6738 (license license:expat)))
6739
6740 (define-public bio-locus
6741 (package
6742 (name "bio-locus")
6743 (version "0.0.7")
6744 (source
6745 (origin
6746 (method url-fetch)
6747 (uri (rubygems-uri "bio-locus" version))
6748 (sha256
6749 (base32
6750 "02vmrxyimkj9sahsp4zhfhnmbvz6dbbqz1y01vglf8cbwvkajfl0"))))
6751 (build-system ruby-build-system)
6752 (native-inputs
6753 `(("ruby-rspec" ,ruby-rspec)))
6754 (synopsis "Tool for fast querying of genome locations")
6755 (description
6756 "Bio-locus is a tabix-like tool for fast querying of genome
6757 locations. Many file formats in bioinformatics contain records that
6758 start with a chromosome name and a position for a SNP, or a start-end
6759 position for indels. Bio-locus allows users to store this chr+pos or
6760 chr+pos+alt information in a database.")
6761 (home-page "https://github.com/pjotrp/bio-locus")
6762 (license license:expat)))
6763
6764 (define-public bio-blastxmlparser
6765 (package
6766 (name "bio-blastxmlparser")
6767 (version "2.0.4")
6768 (source (origin
6769 (method url-fetch)
6770 (uri (rubygems-uri "bio-blastxmlparser" version))
6771 (sha256
6772 (base32
6773 "1wf4qygcmdjgcqm6flmvsagfr1gs9lf63mj32qv3z1f481zc5692"))))
6774 (build-system ruby-build-system)
6775 (propagated-inputs
6776 `(("ruby-bio-logger" ,ruby-bio-logger)
6777 ("ruby-nokogiri" ,ruby-nokogiri)))
6778 (inputs
6779 `(("ruby-rspec" ,ruby-rspec)))
6780 (synopsis "Fast big data BLAST XML parser and library")
6781 (description
6782 "Very fast parallel big-data BLAST XML file parser which can be used as
6783 command line utility. Use blastxmlparser to: Parse BLAST XML; filter output;
6784 generate FASTA, JSON, YAML, RDF, JSON-LD, HTML, CSV, tabular output etc.")
6785 (home-page "https://github.com/pjotrp/blastxmlparser")
6786 (license license:expat)))
6787
6788 (define-public bioruby
6789 (package
6790 (name "bioruby")
6791 (version "1.5.1")
6792 (source
6793 (origin
6794 (method url-fetch)
6795 (uri (rubygems-uri "bio" version))
6796 (sha256
6797 (base32
6798 "0hdl0789c9n4mprnx5pgd46bfwl8d000rqpamj5h6kkjgspijv49"))))
6799 (build-system ruby-build-system)
6800 (propagated-inputs
6801 `(("ruby-libxml" ,ruby-libxml)))
6802 (native-inputs
6803 `(("which" ,which))) ; required for test phase
6804 (arguments
6805 `(#:phases
6806 (modify-phases %standard-phases
6807 (add-before 'build 'patch-test-command
6808 (lambda _
6809 (substitute* '("test/functional/bio/test_command.rb")
6810 (("/bin/sh") (which "sh")))
6811 (substitute* '("test/functional/bio/test_command.rb")
6812 (("/bin/ls") (which "ls")))
6813 (substitute* '("test/functional/bio/test_command.rb")
6814 (("which") (which "which")))
6815 (substitute* '("test/functional/bio/test_command.rb",
6816 "test/data/command/echoarg2.sh")
6817 (("/bin/echo") (which "echo")))
6818 #t)))))
6819 (synopsis "Ruby library, shell and utilities for bioinformatics")
6820 (description "BioRuby comes with a comprehensive set of Ruby development
6821 tools and libraries for bioinformatics and molecular biology. BioRuby has
6822 components for sequence analysis, pathway analysis, protein modelling and
6823 phylogenetic analysis; it supports many widely used data formats and provides
6824 easy access to databases, external programs and public web services, including
6825 BLAST, KEGG, GenBank, MEDLINE and GO.")
6826 (home-page "http://bioruby.org/")
6827 ;; Code is released under Ruby license, except for setup
6828 ;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+)
6829 (license (list license:ruby license:lgpl2.1+ license:gpl2+ ))))
6830
6831 (define-public r-acsnminer
6832 (package
6833 (name "r-acsnminer")
6834 (version "0.16.8.25")
6835 (source (origin
6836 (method url-fetch)
6837 (uri (cran-uri "ACSNMineR" version))
6838 (sha256
6839 (base32
6840 "0gh604s8qall6zfjlwcg2ilxjvz08dplf9k5g47idhv43scm748l"))))
6841 (properties `((upstream-name . "ACSNMineR")))
6842 (build-system r-build-system)
6843 (propagated-inputs
6844 `(("r-ggplot2" ,r-ggplot2)
6845 ("r-gridextra" ,r-gridextra)))
6846 (home-page "https://cran.r-project.org/web/packages/ACSNMineR")
6847 (synopsis "Gene enrichment analysis")
6848 (description
6849 "This package provides tools to compute and represent gene set enrichment
6850 or depletion from your data based on pre-saved maps from the @dfn{Atlas of
6851 Cancer Signalling Networks} (ACSN) or user imported maps. The gene set
6852 enrichment can be run with hypergeometric test or Fisher exact test, and can
6853 use multiple corrections. Visualization of data can be done either by
6854 barplots or heatmaps.")
6855 (license license:gpl2+)))
6856
6857 (define-public r-biocgenerics
6858 (package
6859 (name "r-biocgenerics")
6860 (version "0.24.0")
6861 (source (origin
6862 (method url-fetch)
6863 (uri (bioconductor-uri "BiocGenerics" version))
6864 (sha256
6865 (base32
6866 "03wxvhxyrhipbgcg83lqlfn7p9gbzzrnl48y0dq7303xgp232zai"))))
6867 (properties
6868 `((upstream-name . "BiocGenerics")))
6869 (build-system r-build-system)
6870 (home-page "https://bioconductor.org/packages/BiocGenerics")
6871 (synopsis "S4 generic functions for Bioconductor")
6872 (description
6873 "This package provides S4 generic functions needed by many Bioconductor
6874 packages.")
6875 (license license:artistic2.0)))
6876
6877 (define-public r-biocinstaller
6878 (package
6879 (name "r-biocinstaller")
6880 (version "1.28.0")
6881 (source (origin
6882 (method url-fetch)
6883 (uri (bioconductor-uri "BiocInstaller" version))
6884 (sha256
6885 (base32
6886 "19fga27bv6q9v5mpil74y76lahmnwvpg2h33rdx1r79nvljkd19d"))))
6887 (properties
6888 `((upstream-name . "BiocInstaller")))
6889 (build-system r-build-system)
6890 (home-page "https://bioconductor.org/packages/BiocInstaller")
6891 (synopsis "Install Bioconductor packages")
6892 (description "This package is used to install and update R packages from
6893 Bioconductor, CRAN, and Github.")
6894 (license license:artistic2.0)))
6895
6896 (define-public r-biocviews
6897 (package
6898 (name "r-biocviews")
6899 (version "1.46.0")
6900 (source (origin
6901 (method url-fetch)
6902 (uri (bioconductor-uri "biocViews" version))
6903 (sha256
6904 (base32
6905 "09zyqj1kqc089lmh9sliy0acanx9zimcasvp71dsrg2bqm08r1md"))))
6906 (properties
6907 `((upstream-name . "biocViews")))
6908 (build-system r-build-system)
6909 (propagated-inputs
6910 `(("r-biobase" ,r-biobase)
6911 ("r-graph" ,r-graph)
6912 ("r-rbgl" ,r-rbgl)
6913 ("r-rcurl" ,r-rcurl)
6914 ("r-xml" ,r-xml)
6915 ("r-runit" ,r-runit)))
6916 (home-page "https://bioconductor.org/packages/biocViews")
6917 (synopsis "Bioconductor package categorization helper")
6918 (description "The purpose of biocViews is to create HTML pages that
6919 categorize packages in a Bioconductor package repository according to keywords,
6920 also known as views, in a controlled vocabulary.")
6921 (license license:artistic2.0)))
6922
6923 (define-public r-bookdown
6924 (package
6925 (name "r-bookdown")
6926 (version "0.7")
6927 (source (origin
6928 (method url-fetch)
6929 (uri (cran-uri "bookdown" version))
6930 (sha256
6931 (base32
6932 "1b3fw1f41zph5yw3kynb47aijq53vhaa6mnnvxly72zamyzdf95q"))))
6933 (build-system r-build-system)
6934 (propagated-inputs
6935 `(("r-htmltools" ,r-htmltools)
6936 ("r-knitr" ,r-knitr)
6937 ("r-rmarkdown" ,r-rmarkdown)
6938 ("r-tinytex" ,r-tinytex)
6939 ("r-yaml" ,r-yaml)
6940 ("r-xfun" ,r-xfun)))
6941 (home-page "https://github.com/rstudio/bookdown")
6942 (synopsis "Authoring books and technical documents with R markdown")
6943 (description "This package provides output formats and utilities for
6944 authoring books and technical documents with R Markdown.")
6945 (license license:gpl3)))
6946
6947 (define-public r-biocstyle
6948 (package
6949 (name "r-biocstyle")
6950 (version "2.6.1")
6951 (source (origin
6952 (method url-fetch)
6953 (uri (bioconductor-uri "BiocStyle" version))
6954 (sha256
6955 (base32
6956 "03pp04pkcq99kdv2spzr995h2cxsza7l6w3d4gp4112m06prcybm"))))
6957 (properties
6958 `((upstream-name . "BiocStyle")))
6959 (build-system r-build-system)
6960 (propagated-inputs
6961 `(("r-bookdown" ,r-bookdown)
6962 ("r-knitr" ,r-knitr)
6963 ("r-rmarkdown" ,r-rmarkdown)
6964 ("r-yaml" ,r-yaml)))
6965 (home-page "https://bioconductor.org/packages/BiocStyle")
6966 (synopsis "Bioconductor formatting styles")
6967 (description "This package provides standard formatting styles for
6968 Bioconductor PDF and HTML documents. Package vignettes illustrate use and
6969 functionality.")
6970 (license license:artistic2.0)))
6971
6972 (define-public r-bioccheck
6973 (package
6974 (name "r-bioccheck")
6975 (version "1.14.0")
6976 (source (origin
6977 (method url-fetch)
6978 (uri (bioconductor-uri "BiocCheck" version))
6979 (sha256
6980 (base32
6981 "1nzp8kgw13z9pgf885rplj6k37jcldfhbz0adqclxr2gq0yalmyx"))))
6982 (properties
6983 `((upstream-name . "BiocCheck")))
6984 (build-system r-build-system)
6985 (arguments
6986 '(#:phases
6987 (modify-phases %standard-phases
6988 ;; This package can be used by calling BiocCheck(<package>) from
6989 ;; within R, or by running R CMD BiocCheck <package>. This phase
6990 ;; makes sure the latter works. For this to work, the BiocCheck
6991 ;; script must be somewhere on the PATH (not the R bin directory).
6992 (add-after 'install 'install-bioccheck-subcommand
6993 (lambda* (#:key outputs #:allow-other-keys)
6994 (let* ((out (assoc-ref outputs "out"))
6995 (dest-dir (string-append out "/bin"))
6996 (script-dir
6997 (string-append out "/site-library/BiocCheck/script/")))
6998 (mkdir-p dest-dir)
6999 (symlink (string-append script-dir "/checkBadDeps.R")
7000 (string-append dest-dir "/checkBadDeps.R"))
7001 (symlink (string-append script-dir "/BiocCheck")
7002 (string-append dest-dir "/BiocCheck")))
7003 #t)))))
7004 (propagated-inputs
7005 `(("r-codetools" ,r-codetools)
7006 ("r-graph" ,r-graph)
7007 ("r-httr" ,r-httr)
7008 ("r-optparse" ,r-optparse)
7009 ("r-biocinstaller" ,r-biocinstaller)
7010 ("r-biocviews" ,r-biocviews)
7011 ("r-stringdist" ,r-stringdist)))
7012 (home-page "https://bioconductor.org/packages/BiocCheck")
7013 (synopsis "Executes Bioconductor-specific package checks")
7014 (description "This package contains tools to perform additional quality
7015 checks on R packages that are to be submitted to the Bioconductor repository.")
7016 (license license:artistic2.0)))
7017
7018 (define-public r-getopt
7019 (package
7020 (name "r-getopt")
7021 (version "1.20.2")
7022 (source
7023 (origin
7024 (method url-fetch)
7025 (uri (cran-uri "getopt" version))
7026 (sha256
7027 (base32
7028 "13p35lbpy7i578752fa71sbfvcsqw5qfa9p6kf8b5m3c5p9i4v1x"))))
7029 (build-system r-build-system)
7030 (home-page "https://github.com/trevorld/getopt")
7031 (synopsis "Command-line option processor for R")
7032 (description
7033 "This package is designed to be used with Rscript to write shebang
7034 scripts that accept short and long options. Many users will prefer to
7035 use the packages @code{optparse} or @code{argparse} which add extra
7036 features like automatically generated help options and usage texts,
7037 support for default values, positional argument support, etc.")
7038 (license license:gpl2+)))
7039
7040 (define-public r-optparse
7041 (package
7042 (name "r-optparse")
7043 (version "1.4.4")
7044 (source
7045 (origin
7046 (method url-fetch)
7047 (uri (cran-uri "optparse" version))
7048 (sha256
7049 (base32
7050 "1ff4wmsszrb3spwfp7ynfs8w11qpy1sdzfxm1wk8dqqvdwris7qb"))))
7051 (build-system r-build-system)
7052 (propagated-inputs
7053 `(("r-getopt" ,r-getopt)))
7054 (home-page
7055 "https://github.com/trevorld/optparse")
7056 (synopsis "Command line option parser")
7057 (description
7058 "This package provides a command line parser inspired by Python's
7059 @code{optparse} library to be used with Rscript to write shebang scripts
7060 that accept short and long options.")
7061 (license license:gpl2+)))
7062
7063 (define-public r-dnacopy
7064 (package
7065 (name "r-dnacopy")
7066 (version "1.52.0")
7067 (source (origin
7068 (method url-fetch)
7069 (uri (bioconductor-uri "DNAcopy" version))
7070 (sha256
7071 (base32
7072 "127il5rlg1hzjlhwhs64x3nm18p00q1pd9ckb2b9ifl0rax95wai"))))
7073 (properties
7074 `((upstream-name . "DNAcopy")))
7075 (build-system r-build-system)
7076 (inputs
7077 `(("gfortran" ,gfortran)))
7078 (home-page "https://bioconductor.org/packages/DNAcopy")
7079 (synopsis "Implementation of a circular binary segmentation algorithm")
7080 (description "This package implements the circular binary segmentation (CBS)
7081 algorithm to segment DNA copy number data and identify genomic regions with
7082 abnormal copy number.")
7083 (license license:gpl2+)))
7084
7085 (define-public r-s4vectors
7086 (package
7087 (name "r-s4vectors")
7088 (version "0.16.0")
7089 (source (origin
7090 (method url-fetch)
7091 (uri (bioconductor-uri "S4Vectors" version))
7092 (sha256
7093 (base32
7094 "03s8vz33nl6mivjb7dbvj702dkypi340lji1sjban03fyyls0hw0"))))
7095 (properties
7096 `((upstream-name . "S4Vectors")))
7097 (build-system r-build-system)
7098 (propagated-inputs
7099 `(("r-biocgenerics" ,r-biocgenerics)))
7100 (home-page "https://bioconductor.org/packages/S4Vectors")
7101 (synopsis "S4 implementation of vectors and lists")
7102 (description
7103 "The S4Vectors package defines the @code{Vector} and @code{List} virtual
7104 classes and a set of generic functions that extend the semantic of ordinary
7105 vectors and lists in R. Package developers can easily implement vector-like
7106 or list-like objects as concrete subclasses of @code{Vector} or @code{List}.
7107 In addition, a few low-level concrete subclasses of general interest (e.g.
7108 @code{DataFrame}, @code{Rle}, and @code{Hits}) are implemented in the
7109 S4Vectors package itself.")
7110 (license license:artistic2.0)))
7111
7112 (define-public r-seqinr
7113 (package
7114 (name "r-seqinr")
7115 (version "3.4-5")
7116 (source
7117 (origin
7118 (method url-fetch)
7119 (uri (cran-uri "seqinr" version))
7120 (sha256
7121 (base32
7122 "17zv0n5cji17izwmwg0jcbxbjl3w5rls91w15svcnlpxjms38ahn"))))
7123 (build-system r-build-system)
7124 (propagated-inputs
7125 `(("r-ade4" ,r-ade4)
7126 ("r-segmented" ,r-segmented)))
7127 (inputs
7128 `(("zlib" ,zlib)))
7129 (home-page "http://seqinr.r-forge.r-project.org/")
7130 (synopsis "Biological sequences retrieval and analysis")
7131 (description
7132 "This package provides tools for exploratory data analysis and data
7133 visualization of biological sequence (DNA and protein) data. It also includes
7134 utilities for sequence data management under the ACNUC system.")
7135 (license license:gpl2+)))
7136
7137 (define-public r-iranges
7138 (package
7139 (name "r-iranges")
7140 (version "2.12.0")
7141 (source (origin
7142 (method url-fetch)
7143 (uri (bioconductor-uri "IRanges" version))
7144 (sha256
7145 (base32
7146 "1vqczb9wlxsmpwpqig6j1dmiblcfpq6mgnq8qwzcrvddm4cp47m5"))))
7147 (properties
7148 `((upstream-name . "IRanges")))
7149 (build-system r-build-system)
7150 (propagated-inputs
7151 `(("r-biocgenerics" ,r-biocgenerics)
7152 ("r-s4vectors" ,r-s4vectors)))
7153 (home-page "https://bioconductor.org/packages/IRanges")
7154 (synopsis "Infrastructure for manipulating intervals on sequences")
7155 (description
7156 "This package provides efficient low-level and highly reusable S4 classes
7157 for storing ranges of integers, RLE vectors (Run-Length Encoding), and, more
7158 generally, data that can be organized sequentially (formally defined as
7159 @code{Vector} objects), as well as views on these @code{Vector} objects.
7160 Efficient list-like classes are also provided for storing big collections of
7161 instances of the basic classes. All classes in the package use consistent
7162 naming and share the same rich and consistent \"Vector API\" as much as
7163 possible.")
7164 (license license:artistic2.0)))
7165
7166 (define-public r-genomeinfodbdata
7167 (package
7168 (name "r-genomeinfodbdata")
7169 (version "0.99.1")
7170 (source (origin
7171 (method url-fetch)
7172 ;; We cannot use bioconductor-uri here because this tarball is
7173 ;; located under "data/annotation/" instead of "bioc/".
7174 (uri (string-append "https://bioconductor.org/packages/release/"
7175 "data/annotation/src/contrib/GenomeInfoDbData_"
7176 version ".tar.gz"))
7177 (sha256
7178 (base32
7179 "0hipipvyvrh75n68hsjg35sxbcfzrghzxv547vnkk2f8ya99g01r"))))
7180 (properties
7181 `((upstream-name . "GenomeInfoDbData")))
7182 (build-system r-build-system)
7183 (home-page "https://bioconductor.org/packages/GenomeInfoDbData")
7184 (synopsis "Species and taxonomy ID look up tables for GenomeInfoDb")
7185 (description "This package contains data for mapping between NCBI taxonomy
7186 ID and species. It is used by functions in the GenomeInfoDb package.")
7187 (license license:artistic2.0)))
7188
7189 (define-public r-genomeinfodb
7190 (package
7191 (name "r-genomeinfodb")
7192 (version "1.14.0")
7193 (source (origin
7194 (method url-fetch)
7195 (uri (bioconductor-uri "GenomeInfoDb" version))
7196 (sha256
7197 (base32
7198 "1jhm0imkac4gvchbjxj408aakk39xdv2fyh818d3lk295bz6bnyp"))))
7199 (properties
7200 `((upstream-name . "GenomeInfoDb")))
7201 (build-system r-build-system)
7202 (propagated-inputs
7203 `(("r-biocgenerics" ,r-biocgenerics)
7204 ("r-genomeinfodbdata" ,r-genomeinfodbdata)
7205 ("r-iranges" ,r-iranges)
7206 ("r-rcurl" ,r-rcurl)
7207 ("r-s4vectors" ,r-s4vectors)))
7208 (home-page "https://bioconductor.org/packages/GenomeInfoDb")
7209 (synopsis "Utilities for manipulating chromosome identifiers")
7210 (description
7211 "This package contains data and functions that define and allow
7212 translation between different chromosome sequence naming conventions (e.g.,
7213 \"chr1\" versus \"1\"), including a function that attempts to place sequence
7214 names in their natural, rather than lexicographic, order.")
7215 (license license:artistic2.0)))
7216
7217 (define-public r-edger
7218 (package
7219 (name "r-edger")
7220 (version "3.20.9")
7221 (source (origin
7222 (method url-fetch)
7223 (uri (bioconductor-uri "edgeR" version))
7224 (sha256
7225 (base32
7226 "0y52snwbz37xzdd7gihdkqczbndlfzmmypv6hri3ymjyfmlx6qaw"))))
7227 (properties `((upstream-name . "edgeR")))
7228 (build-system r-build-system)
7229 (propagated-inputs
7230 `(("r-limma" ,r-limma)
7231 ("r-locfit" ,r-locfit)
7232 ("r-rcpp" ,r-rcpp)
7233 ("r-statmod" ,r-statmod))) ;for estimateDisp
7234 (home-page "http://bioinf.wehi.edu.au/edgeR")
7235 (synopsis "EdgeR does empirical analysis of digital gene expression data")
7236 (description "This package can do differential expression analysis of
7237 RNA-seq expression profiles with biological replication. It implements a range
7238 of statistical methodology based on the negative binomial distributions,
7239 including empirical Bayes estimation, exact tests, generalized linear models
7240 and quasi-likelihood tests. It be applied to differential signal analysis of
7241 other types of genomic data that produce counts, including ChIP-seq, SAGE and
7242 CAGE.")
7243 (license license:gpl2+)))
7244
7245 (define-public r-variantannotation
7246 (package
7247 (name "r-variantannotation")
7248 (version "1.24.5")
7249 (source (origin
7250 (method url-fetch)
7251 (uri (bioconductor-uri "VariantAnnotation" version))
7252 (sha256
7253 (base32
7254 "07ywn3c4w83l3sr76d0z3b1nv9icgdh3phsjlc6cfx7i6nfmvxw2"))))
7255 (properties
7256 `((upstream-name . "VariantAnnotation")))
7257 (inputs
7258 `(("zlib" ,zlib)))
7259 (propagated-inputs
7260 `(("r-annotationdbi" ,r-annotationdbi)
7261 ("r-biobase" ,r-biobase)
7262 ("r-biocgenerics" ,r-biocgenerics)
7263 ("r-biostrings" ,r-biostrings)
7264 ("r-bsgenome" ,r-bsgenome)
7265 ("r-dbi" ,r-dbi)
7266 ("r-genomeinfodb" ,r-genomeinfodb)
7267 ("r-genomicfeatures" ,r-genomicfeatures)
7268 ("r-genomicranges" ,r-genomicranges)
7269 ("r-iranges" ,r-iranges)
7270 ("r-summarizedexperiment" ,r-summarizedexperiment)
7271 ("r-rsamtools" ,r-rsamtools)
7272 ("r-rtracklayer" ,r-rtracklayer)
7273 ("r-s4vectors" ,r-s4vectors)
7274 ("r-xvector" ,r-xvector)
7275 ("r-zlibbioc" ,r-zlibbioc)))
7276 (build-system r-build-system)
7277 (home-page "https://bioconductor.org/packages/VariantAnnotation")
7278 (synopsis "Package for annotation of genetic variants")
7279 (description "This R package can annotate variants, compute amino acid
7280 coding changes and predict coding outcomes.")
7281 (license license:artistic2.0)))
7282
7283 (define-public r-limma
7284 (package
7285 (name "r-limma")
7286 (version "3.34.9")
7287 (source (origin
7288 (method url-fetch)
7289 (uri (bioconductor-uri "limma" version))
7290 (sha256
7291 (base32
7292 "1y2fm61g5i0fn0j3l31xvwh9zww9bpkc4nwzb1d0yv1cag20jkdc"))))
7293 (build-system r-build-system)
7294 (home-page "http://bioinf.wehi.edu.au/limma")
7295 (synopsis "Package for linear models for microarray and RNA-seq data")
7296 (description "This package can be used for the analysis of gene expression
7297 studies, especially the use of linear models for analysing designed experiments
7298 and the assessment of differential expression. The analysis methods apply to
7299 different technologies, including microarrays, RNA-seq, and quantitative PCR.")
7300 (license license:gpl2+)))
7301
7302 (define-public r-xvector
7303 (package
7304 (name "r-xvector")
7305 (version "0.18.0")
7306 (source (origin
7307 (method url-fetch)
7308 (uri (bioconductor-uri "XVector" version))
7309 (sha256
7310 (base32
7311 "1i4i3kdxr78lr1kcxq657p11ybi7kq10c8kyaqyh6gfc8i9rhvmk"))))
7312 (properties
7313 `((upstream-name . "XVector")))
7314 (build-system r-build-system)
7315 (arguments
7316 `(#:phases
7317 (modify-phases %standard-phases
7318 (add-after 'unpack 'use-system-zlib
7319 (lambda _
7320 (substitute* "DESCRIPTION"
7321 (("zlibbioc, ") ""))
7322 (substitute* "NAMESPACE"
7323 (("import\\(zlibbioc\\)") ""))
7324 #t)))))
7325 (inputs
7326 `(("zlib" ,zlib)))
7327 (propagated-inputs
7328 `(("r-biocgenerics" ,r-biocgenerics)
7329 ("r-iranges" ,r-iranges)
7330 ("r-s4vectors" ,r-s4vectors)))
7331 (home-page "https://bioconductor.org/packages/XVector")
7332 (synopsis "Representation and manpulation of external sequences")
7333 (description
7334 "This package provides memory efficient S4 classes for storing sequences
7335 \"externally\" (behind an R external pointer, or on disk).")
7336 (license license:artistic2.0)))
7337
7338 (define-public r-genomicranges
7339 (package
7340 (name "r-genomicranges")
7341 (version "1.30.3")
7342 (source (origin
7343 (method url-fetch)
7344 (uri (bioconductor-uri "GenomicRanges" version))
7345 (sha256
7346 (base32
7347 "07cszc9ri94nzk4dffwnsj247ih6pchnrzrvnb0q4dkk33gwy8n1"))))
7348 (properties
7349 `((upstream-name . "GenomicRanges")))
7350 (build-system r-build-system)
7351 (propagated-inputs
7352 `(("r-biocgenerics" ,r-biocgenerics)
7353 ("r-genomeinfodb" ,r-genomeinfodb)
7354 ("r-iranges" ,r-iranges)
7355 ("r-s4vectors" ,r-s4vectors)
7356 ("r-xvector" ,r-xvector)))
7357 (home-page "https://bioconductor.org/packages/GenomicRanges")
7358 (synopsis "Representation and manipulation of genomic intervals")
7359 (description
7360 "This package provides tools to efficiently represent and manipulate
7361 genomic annotations and alignments is playing a central role when it comes to
7362 analyzing high-throughput sequencing data (a.k.a. NGS data). The
7363 GenomicRanges package defines general purpose containers for storing and
7364 manipulating genomic intervals and variables defined along a genome.")
7365 (license license:artistic2.0)))
7366
7367 (define-public r-biobase
7368 (package
7369 (name "r-biobase")
7370 (version "2.38.0")
7371 (source (origin
7372 (method url-fetch)
7373 (uri (bioconductor-uri "Biobase" version))
7374 (sha256
7375 (base32
7376 "1cgm1ja1kp56zdlzyy9ggbkfn8r2vbsd4hncmz8g4hjd47fg18kg"))))
7377 (properties
7378 `((upstream-name . "Biobase")))
7379 (build-system r-build-system)
7380 (propagated-inputs
7381 `(("r-biocgenerics" ,r-biocgenerics)))
7382 (home-page "https://bioconductor.org/packages/Biobase")
7383 (synopsis "Base functions for Bioconductor")
7384 (description
7385 "This package provides functions that are needed by many other packages
7386 on Bioconductor or which replace R functions.")
7387 (license license:artistic2.0)))
7388
7389 (define-public r-annotationdbi
7390 (package
7391 (name "r-annotationdbi")
7392 (version "1.40.0")
7393 (source (origin
7394 (method url-fetch)
7395 (uri (bioconductor-uri "AnnotationDbi" version))
7396 (sha256
7397 (base32
7398 "1dh4qs1a757n640gs34lf6z2glc96nan86x0sqaw5csadl2rhnlc"))))
7399 (properties
7400 `((upstream-name . "AnnotationDbi")))
7401 (build-system r-build-system)
7402 (propagated-inputs
7403 `(("r-biobase" ,r-biobase)
7404 ("r-biocgenerics" ,r-biocgenerics)
7405 ("r-dbi" ,r-dbi)
7406 ("r-iranges" ,r-iranges)
7407 ("r-rsqlite" ,r-rsqlite)
7408 ("r-s4vectors" ,r-s4vectors)))
7409 (home-page "https://bioconductor.org/packages/AnnotationDbi")
7410 (synopsis "Annotation database interface")
7411 (description
7412 "This package provides user interface and database connection code for
7413 annotation data packages using SQLite data storage.")
7414 (license license:artistic2.0)))
7415
7416 (define-public r-biomart
7417 (package
7418 (name "r-biomart")
7419 (version "2.34.2")
7420 (source (origin
7421 (method url-fetch)
7422 (uri (bioconductor-uri "biomaRt" version))
7423 (sha256
7424 (base32
7425 "1zlgs2zg0lmnk572p55n7m34nkxka8w10x8f2ndssjkffl2csy79"))))
7426 (properties
7427 `((upstream-name . "biomaRt")))
7428 (build-system r-build-system)
7429 (propagated-inputs
7430 `(("r-annotationdbi" ,r-annotationdbi)
7431 ("r-httr" ,r-httr)
7432 ("r-progress" ,r-progress)
7433 ("r-rcurl" ,r-rcurl)
7434 ("r-stringr" ,r-stringr)
7435 ("r-xml" ,r-xml)))
7436 (home-page "https://bioconductor.org/packages/biomaRt")
7437 (synopsis "Interface to BioMart databases")
7438 (description
7439 "biomaRt provides an interface to a growing collection of databases
7440 implementing the @url{BioMart software suite, http://www.biomart.org}. The
7441 package enables retrieval of large amounts of data in a uniform way without
7442 the need to know the underlying database schemas or write complex SQL queries.
7443 Examples of BioMart databases are Ensembl, COSMIC, Uniprot, HGNC, Gramene,
7444 Wormbase and dbSNP mapped to Ensembl. These major databases give biomaRt
7445 users direct access to a diverse set of data and enable a wide range of
7446 powerful online queries from gene annotation to database mining.")
7447 (license license:artistic2.0)))
7448
7449 (define-public r-biocparallel
7450 (package
7451 (name "r-biocparallel")
7452 (version "1.12.0")
7453 (source (origin
7454 (method url-fetch)
7455 (uri (bioconductor-uri "BiocParallel" version))
7456 (sha256
7457 (base32
7458 "13ng3n2wsgl3fh0v6jnz3vg51k5c1sh44pqdvblcrcd1qyjmmqhd"))))
7459 (properties
7460 `((upstream-name . "BiocParallel")))
7461 (build-system r-build-system)
7462 (propagated-inputs
7463 `(("r-futile-logger" ,r-futile-logger)
7464 ("r-snow" ,r-snow)
7465 ("r-bh" ,r-bh)))
7466 (home-page "https://bioconductor.org/packages/BiocParallel")
7467 (synopsis "Bioconductor facilities for parallel evaluation")
7468 (description
7469 "This package provides modified versions and novel implementation of
7470 functions for parallel evaluation, tailored to use with Bioconductor
7471 objects.")
7472 (license (list license:gpl2+ license:gpl3+))))
7473
7474 (define-public r-biostrings
7475 (package
7476 (name "r-biostrings")
7477 (version "2.46.0")
7478 (source (origin
7479 (method url-fetch)
7480 (uri (bioconductor-uri "Biostrings" version))
7481 (sha256
7482 (base32
7483 "0vg50qdlxqcm2d6axjnzg8wh8pr4c5gz03l8bdl0llmwzp0zclzk"))))
7484 (properties
7485 `((upstream-name . "Biostrings")))
7486 (build-system r-build-system)
7487 (propagated-inputs
7488 `(("r-biocgenerics" ,r-biocgenerics)
7489 ("r-iranges" ,r-iranges)
7490 ("r-s4vectors" ,r-s4vectors)
7491 ("r-xvector" ,r-xvector)))
7492 (home-page "https://bioconductor.org/packages/Biostrings")
7493 (synopsis "String objects and algorithms for biological sequences")
7494 (description
7495 "This package provides memory efficient string containers, string
7496 matching algorithms, and other utilities, for fast manipulation of large
7497 biological sequences or sets of sequences.")
7498 (license license:artistic2.0)))
7499
7500 (define-public r-rsamtools
7501 (package
7502 (name "r-rsamtools")
7503 (version "1.30.0")
7504 (source (origin
7505 (method url-fetch)
7506 (uri (bioconductor-uri "Rsamtools" version))
7507 (sha256
7508 (base32
7509 "0pjny5fjvbnfdyhl3bwxin678sha2drvs00sivxh3l772cn6yams"))))
7510 (properties
7511 `((upstream-name . "Rsamtools")))
7512 (build-system r-build-system)
7513 (arguments
7514 `(#:phases
7515 (modify-phases %standard-phases
7516 (add-after 'unpack 'use-system-zlib
7517 (lambda _
7518 (substitute* "DESCRIPTION"
7519 (("zlibbioc, ") ""))
7520 (substitute* "NAMESPACE"
7521 (("import\\(zlibbioc\\)") ""))
7522 #t)))))
7523 (inputs
7524 `(("zlib" ,zlib)))
7525 (propagated-inputs
7526 `(("r-biocgenerics" ,r-biocgenerics)
7527 ("r-biocparallel" ,r-biocparallel)
7528 ("r-biostrings" ,r-biostrings)
7529 ("r-bitops" ,r-bitops)
7530 ("r-genomeinfodb" ,r-genomeinfodb)
7531 ("r-genomicranges" ,r-genomicranges)
7532 ("r-iranges" ,r-iranges)
7533 ("r-s4vectors" ,r-s4vectors)
7534 ("r-xvector" ,r-xvector)))
7535 (home-page "https://bioconductor.org/packages/release/bioc/html/Rsamtools.html")
7536 (synopsis "Interface to samtools, bcftools, and tabix")
7537 (description
7538 "This package provides an interface to the 'samtools', 'bcftools', and
7539 'tabix' utilities for manipulating SAM (Sequence Alignment / Map), FASTA,
7540 binary variant call (BCF) and compressed indexed tab-delimited (tabix)
7541 files.")
7542 (license license:expat)))
7543
7544 (define-public r-delayedarray
7545 (package
7546 (name "r-delayedarray")
7547 (version "0.4.1")
7548 (source (origin
7549 (method url-fetch)
7550 (uri (bioconductor-uri "DelayedArray" version))
7551 (sha256
7552 (base32
7553 "0s7h2giyvz04cg6248kbbzpwhxdrpnsvl2s8k5c8ricisd9aaz4b"))))
7554 (properties
7555 `((upstream-name . "DelayedArray")))
7556 (build-system r-build-system)
7557 (propagated-inputs
7558 `(("r-biocgenerics" ,r-biocgenerics)
7559 ("r-s4vectors" ,r-s4vectors)
7560 ("r-iranges" ,r-iranges)
7561 ("r-matrixstats" ,r-matrixstats)))
7562 (home-page "https://bioconductor.org/packages/DelayedArray")
7563 (synopsis "Delayed operations on array-like objects")
7564 (description
7565 "Wrapping an array-like object (typically an on-disk object) in a
7566 @code{DelayedArray} object allows one to perform common array operations on it
7567 without loading the object in memory. In order to reduce memory usage and
7568 optimize performance, operations on the object are either delayed or executed
7569 using a block processing mechanism. Note that this also works on in-memory
7570 array-like objects like @code{DataFrame} objects (typically with Rle columns),
7571 @code{Matrix} objects, and ordinary arrays and data frames.")
7572 (license license:artistic2.0)))
7573
7574 (define-public r-summarizedexperiment
7575 (package
7576 (name "r-summarizedexperiment")
7577 (version "1.8.1")
7578 (source (origin
7579 (method url-fetch)
7580 (uri (bioconductor-uri "SummarizedExperiment" version))
7581 (sha256
7582 (base32
7583 "19vlwnby83fqjrilsxvnvgz0gvby7mrxvlmx18nb3p1w591ddfjh"))))
7584 (properties
7585 `((upstream-name . "SummarizedExperiment")))
7586 (build-system r-build-system)
7587 (propagated-inputs
7588 `(("r-biobase" ,r-biobase)
7589 ("r-biocgenerics" ,r-biocgenerics)
7590 ("r-delayedarray" ,r-delayedarray)
7591 ("r-genomeinfodb" ,r-genomeinfodb)
7592 ("r-genomicranges" ,r-genomicranges)
7593 ("r-iranges" ,r-iranges)
7594 ("r-matrix" ,r-matrix)
7595 ("r-s4vectors" ,r-s4vectors)))
7596 (home-page "https://bioconductor.org/packages/SummarizedExperiment")
7597 (synopsis "Container for representing genomic ranges by sample")
7598 (description
7599 "The SummarizedExperiment container contains one or more assays, each
7600 represented by a matrix-like object of numeric or other mode. The rows
7601 typically represent genomic ranges of interest and the columns represent
7602 samples.")
7603 (license license:artistic2.0)))
7604
7605 (define-public r-genomicalignments
7606 (package
7607 (name "r-genomicalignments")
7608 (version "1.14.2")
7609 (source (origin
7610 (method url-fetch)
7611 (uri (bioconductor-uri "GenomicAlignments" version))
7612 (sha256
7613 (base32
7614 "1659nj1xps7vliy5955i51x6hvrf16n1z0dfh10mmpaaswn2d2mv"))))
7615 (properties
7616 `((upstream-name . "GenomicAlignments")))
7617 (build-system r-build-system)
7618 (propagated-inputs
7619 `(("r-biocgenerics" ,r-biocgenerics)
7620 ("r-biocparallel" ,r-biocparallel)
7621 ("r-biostrings" ,r-biostrings)
7622 ("r-genomeinfodb" ,r-genomeinfodb)
7623 ("r-genomicranges" ,r-genomicranges)
7624 ("r-iranges" ,r-iranges)
7625 ("r-rsamtools" ,r-rsamtools)
7626 ("r-s4vectors" ,r-s4vectors)
7627 ("r-summarizedexperiment" ,r-summarizedexperiment)))
7628 (home-page "https://bioconductor.org/packages/GenomicAlignments")
7629 (synopsis "Representation and manipulation of short genomic alignments")
7630 (description
7631 "This package provides efficient containers for storing and manipulating
7632 short genomic alignments (typically obtained by aligning short reads to a
7633 reference genome). This includes read counting, computing the coverage,
7634 junction detection, and working with the nucleotide content of the
7635 alignments.")
7636 (license license:artistic2.0)))
7637
7638 (define-public r-rtracklayer
7639 (package
7640 (name "r-rtracklayer")
7641 (version "1.38.3")
7642 (source (origin
7643 (method url-fetch)
7644 (uri (bioconductor-uri "rtracklayer" version))
7645 (sha256
7646 (base32
7647 "1khzfczm35k5lq9h0jlqrq01192spzjyh8s6is89spj006flwn4k"))))
7648 (build-system r-build-system)
7649 (arguments
7650 `(#:phases
7651 (modify-phases %standard-phases
7652 (add-after 'unpack 'use-system-zlib
7653 (lambda _
7654 (substitute* "DESCRIPTION"
7655 ((" zlibbioc,") ""))
7656 (substitute* "NAMESPACE"
7657 (("import\\(zlibbioc\\)") ""))
7658 #t)))))
7659 (inputs
7660 `(("zlib" ,zlib)))
7661 (propagated-inputs
7662 `(("r-biocgenerics" ,r-biocgenerics)
7663 ("r-biostrings" ,r-biostrings)
7664 ("r-genomeinfodb" ,r-genomeinfodb)
7665 ("r-genomicalignments" ,r-genomicalignments)
7666 ("r-genomicranges" ,r-genomicranges)
7667 ("r-iranges" ,r-iranges)
7668 ("r-rcurl" ,r-rcurl)
7669 ("r-rsamtools" ,r-rsamtools)
7670 ("r-s4vectors" ,r-s4vectors)
7671 ("r-xml" ,r-xml)
7672 ("r-xvector" ,r-xvector)))
7673 (home-page "https://bioconductor.org/packages/rtracklayer")
7674 (synopsis "R interface to genome browsers and their annotation tracks")
7675 (description
7676 "rtracklayer is an extensible framework for interacting with multiple
7677 genome browsers (currently UCSC built-in) and manipulating annotation tracks
7678 in various formats (currently GFF, BED, bedGraph, BED15, WIG, BigWig and 2bit
7679 built-in). The user may export/import tracks to/from the supported browsers,
7680 as well as query and modify the browser state, such as the current viewport.")
7681 (license license:artistic2.0)))
7682
7683 (define-public r-genomicfeatures
7684 (package
7685 (name "r-genomicfeatures")
7686 (version "1.30.3")
7687 (source (origin
7688 (method url-fetch)
7689 (uri (bioconductor-uri "GenomicFeatures" version))
7690 (sha256
7691 (base32
7692 "010vn8hlwbnw12pd1d8pv6m12yp3xwx557gba5rbjq9p4qypnn3z"))))
7693 (properties
7694 `((upstream-name . "GenomicFeatures")))
7695 (build-system r-build-system)
7696 (propagated-inputs
7697 `(("r-annotationdbi" ,r-annotationdbi)
7698 ("r-biobase" ,r-biobase)
7699 ("r-biocgenerics" ,r-biocgenerics)
7700 ("r-biomart" ,r-biomart)
7701 ("r-biostrings" ,r-biostrings)
7702 ("r-dbi" ,r-dbi)
7703 ("r-genomeinfodb" ,r-genomeinfodb)
7704 ("r-genomicranges" ,r-genomicranges)
7705 ("r-iranges" ,r-iranges)
7706 ("r-rcurl" ,r-rcurl)
7707 ("r-rsqlite" ,r-rsqlite)
7708 ("r-rmysql" ,r-rmysql)
7709 ("r-rtracklayer" ,r-rtracklayer)
7710 ("r-s4vectors" ,r-s4vectors)
7711 ("r-xvector" ,r-xvector)))
7712 (home-page "https://bioconductor.org/packages/GenomicFeatures")
7713 (synopsis "Tools for working with transcript centric annotations")
7714 (description
7715 "This package provides a set of tools and methods for making and
7716 manipulating transcript centric annotations. With these tools the user can
7717 easily download the genomic locations of the transcripts, exons and cds of a
7718 given organism, from either the UCSC Genome Browser or a BioMart
7719 database (more sources will be supported in the future). This information is
7720 then stored in a local database that keeps track of the relationship between
7721 transcripts, exons, cds and genes. Flexible methods are provided for
7722 extracting the desired features in a convenient format.")
7723 (license license:artistic2.0)))
7724
7725 (define-public r-go-db
7726 (package
7727 (name "r-go-db")
7728 (version "3.5.0")
7729 (source (origin
7730 (method url-fetch)
7731 (uri (string-append "https://www.bioconductor.org/packages/"
7732 "release/data/annotation/src/contrib/GO.db_"
7733 version ".tar.gz"))
7734 (sha256
7735 (base32
7736 "02d1mn1al3q7qvhx1ylrr3ar4w4iw0qyi5d89v2336rzwk9maq35"))))
7737 (properties
7738 `((upstream-name . "GO.db")))
7739 (build-system r-build-system)
7740 (propagated-inputs
7741 `(("r-annotationdbi" ,r-annotationdbi)))
7742 (home-page "https://bioconductor.org/packages/GO.db")
7743 (synopsis "Annotation maps describing the entire Gene Ontology")
7744 (description
7745 "The purpose of this GO.db annotation package is to provide detailed
7746 information about the latest version of the Gene Ontologies.")
7747 (license license:artistic2.0)))
7748
7749 (define-public r-graph
7750 (package
7751 (name "r-graph")
7752 (version "1.56.0")
7753 (source (origin
7754 (method url-fetch)
7755 (uri (bioconductor-uri "graph" version))
7756 (sha256
7757 (base32
7758 "15aajjp8h2z14p80c8hyd4rrmr9vqsm7bvwb989jxjl4k6g52an1"))))
7759 (build-system r-build-system)
7760 (propagated-inputs
7761 `(("r-biocgenerics" ,r-biocgenerics)))
7762 (home-page "https://bioconductor.org/packages/graph")
7763 (synopsis "Handle graph data structures in R")
7764 (description
7765 "This package implements some simple graph handling capabilities for R.")
7766 (license license:artistic2.0)))
7767
7768 (define-public r-topgo
7769 (package
7770 (name "r-topgo")
7771 (version "2.30.1")
7772 (source (origin
7773 (method url-fetch)
7774 (uri (bioconductor-uri "topGO" version))
7775 (sha256
7776 (base32
7777 "1cgz4knxr328xfqlhl6ypxl6x86rfrlqz748kn94ainxjzz55i6x"))))
7778 (properties
7779 `((upstream-name . "topGO")))
7780 (build-system r-build-system)
7781 (propagated-inputs
7782 `(("r-annotationdbi" ,r-annotationdbi)
7783 ("r-dbi" ,r-dbi)
7784 ("r-biobase" ,r-biobase)
7785 ("r-biocgenerics" ,r-biocgenerics)
7786 ("r-go-db" ,r-go-db)
7787 ("r-graph" ,r-graph)
7788 ("r-lattice" ,r-lattice)
7789 ("r-matrixstats" ,r-matrixstats)
7790 ("r-sparsem" ,r-sparsem)))
7791 (home-page "https://bioconductor.org/packages/topGO")
7792 (synopsis "Enrichment analysis for gene ontology")
7793 (description
7794 "The topGO package provides tools for testing @dfn{gene ontology} (GO)
7795 terms while accounting for the topology of the GO graph. Different test
7796 statistics and different methods for eliminating local similarities and
7797 dependencies between GO terms can be implemented and applied.")
7798 ;; Any version of the LGPL applies.
7799 (license license:lgpl2.1+)))
7800
7801 (define-public r-bsgenome
7802 (package
7803 (name "r-bsgenome")
7804 (version "1.46.0")
7805 (source (origin
7806 (method url-fetch)
7807 (uri (bioconductor-uri "BSgenome" version))
7808 (sha256
7809 (base32
7810 "1jbzq7lm2iajajn2bifxnkss0k9fdvgqr30mral17cbhp5f6w4lq"))))
7811 (properties
7812 `((upstream-name . "BSgenome")))
7813 (build-system r-build-system)
7814 (propagated-inputs
7815 `(("r-biocgenerics" ,r-biocgenerics)
7816 ("r-biostrings" ,r-biostrings)
7817 ("r-genomeinfodb" ,r-genomeinfodb)
7818 ("r-genomicranges" ,r-genomicranges)
7819 ("r-iranges" ,r-iranges)
7820 ("r-rsamtools" ,r-rsamtools)
7821 ("r-rtracklayer" ,r-rtracklayer)
7822 ("r-s4vectors" ,r-s4vectors)
7823 ("r-xvector" ,r-xvector)))
7824 (home-page "https://bioconductor.org/packages/BSgenome")
7825 (synopsis "Infrastructure for Biostrings-based genome data packages")
7826 (description
7827 "This package provides infrastructure shared by all Biostrings-based
7828 genome data packages and support for efficient SNP representation.")
7829 (license license:artistic2.0)))
7830
7831 (define-public r-bsgenome-hsapiens-1000genomes-hs37d5
7832 (package
7833 (name "r-bsgenome-hsapiens-1000genomes-hs37d5")
7834 (version "0.99.1")
7835 (source (origin
7836 (method url-fetch)
7837 ;; We cannot use bioconductor-uri here because this tarball is
7838 ;; located under "data/annotation/" instead of "bioc/".
7839 (uri (string-append "https://www.bioconductor.org/packages/"
7840 "release/data/annotation/src/contrib/"
7841 "BSgenome.Hsapiens.1000genomes.hs37d5_"
7842 version ".tar.gz"))
7843 (sha256
7844 (base32
7845 "1cg0g5fqmsvwyw2p9hp2yy4ilk21jkbbrnpgqvb5c36ihjwvc7sr"))))
7846 (properties
7847 `((upstream-name . "BSgenome.Hsapiens.1000genomes.hs37d5")))
7848 (build-system r-build-system)
7849 ;; As this package provides little more than a very large data file it
7850 ;; doesn't make sense to build substitutes.
7851 (arguments `(#:substitutable? #f))
7852 (propagated-inputs
7853 `(("r-bsgenome" ,r-bsgenome)))
7854 (home-page
7855 "https://www.bioconductor.org/packages/BSgenome.Hsapiens.1000genomes.hs37d5/")
7856 (synopsis "Full genome sequences for Homo sapiens")
7857 (description
7858 "This package provides full genome sequences for Homo sapiens from
7859 1000genomes phase2 reference genome sequence (hs37d5), based on NCBI GRCh37.")
7860 (license license:artistic2.0)))
7861
7862 (define-public r-impute
7863 (package
7864 (name "r-impute")
7865 (version "1.52.0")
7866 (source (origin
7867 (method url-fetch)
7868 (uri (bioconductor-uri "impute" version))
7869 (sha256
7870 (base32
7871 "0b8r4swvyx3cjcc2ky8yn0ncpzlbi1pgfsn3wpbjmhh7sqrffm2n"))))
7872 (inputs
7873 `(("gfortran" ,gfortran)))
7874 (build-system r-build-system)
7875 (home-page "https://bioconductor.org/packages/impute")
7876 (synopsis "Imputation for microarray data")
7877 (description
7878 "This package provides a function to impute missing gene expression
7879 microarray data, using nearest neighbor averaging.")
7880 (license license:gpl2+)))
7881
7882 (define-public r-seqpattern
7883 (package
7884 (name "r-seqpattern")
7885 (version "1.10.0")
7886 (source (origin
7887 (method url-fetch)
7888 (uri (bioconductor-uri "seqPattern" version))
7889 (sha256
7890 (base32
7891 "1kcm5w83q7w0v0vs7nyp4gq5z86c6n6pqy9zmyyhxcrns7f597pm"))))
7892 (properties
7893 `((upstream-name . "seqPattern")))
7894 (build-system r-build-system)
7895 (propagated-inputs
7896 `(("r-biostrings" ,r-biostrings)
7897 ("r-genomicranges" ,r-genomicranges)
7898 ("r-iranges" ,r-iranges)
7899 ("r-kernsmooth" ,r-kernsmooth)
7900 ("r-plotrix" ,r-plotrix)))
7901 (home-page "https://bioconductor.org/packages/seqPattern")
7902 (synopsis "Visualising oligonucleotide patterns and motif occurrences")
7903 (description
7904 "This package provides tools to visualize oligonucleotide patterns and
7905 sequence motif occurrences across a large set of sequences centred at a common
7906 reference point and sorted by a user defined feature.")
7907 (license license:gpl3+)))
7908
7909 (define-public r-genomation
7910 (package
7911 (name "r-genomation")
7912 (version "1.11.3")
7913 (source (origin
7914 (method url-fetch)
7915 (uri (bioconductor-uri "genomation" version))
7916 (sha256
7917 (base32
7918 "1d2g1v6xhrf3gm86pv8ln22df5g6v6k6i4i39v4j82zn4apany6v"))))
7919 (build-system r-build-system)
7920 (propagated-inputs
7921 `(("r-biostrings" ,r-biostrings)
7922 ("r-bsgenome" ,r-bsgenome)
7923 ("r-data-table" ,r-data-table)
7924 ("r-genomeinfodb" ,r-genomeinfodb)
7925 ("r-genomicalignments" ,r-genomicalignments)
7926 ("r-genomicranges" ,r-genomicranges)
7927 ("r-ggplot2" ,r-ggplot2)
7928 ("r-gridbase" ,r-gridbase)
7929 ("r-impute" ,r-impute)
7930 ("r-iranges" ,r-iranges)
7931 ("r-matrixstats" ,r-matrixstats)
7932 ("r-plotrix" ,r-plotrix)
7933 ("r-plyr" ,r-plyr)
7934 ("r-rcpp" ,r-rcpp)
7935 ("r-readr" ,r-readr)
7936 ("r-reshape2" ,r-reshape2)
7937 ("r-rsamtools" ,r-rsamtools)
7938 ("r-rtracklayer" ,r-rtracklayer)
7939 ("r-runit" ,r-runit)
7940 ("r-s4vectors" ,r-s4vectors)
7941 ("r-seqpattern" ,r-seqpattern)))
7942 (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
7943 (synopsis "Summary, annotation and visualization of genomic data")
7944 (description
7945 "This package provides a package for summary and annotation of genomic
7946 intervals. Users can visualize and quantify genomic intervals over
7947 pre-defined functional regions, such as promoters, exons, introns, etc. The
7948 genomic intervals represent regions with a defined chromosome position, which
7949 may be associated with a score, such as aligned reads from HT-seq experiments,
7950 TF binding sites, methylation scores, etc. The package can use any tabular
7951 genomic feature data as long as it has minimal information on the locations of
7952 genomic intervals. In addition, it can use BAM or BigWig files as input.")
7953 (license license:artistic2.0)))
7954
7955 (define-public r-genomationdata
7956 (package
7957 (name "r-genomationdata")
7958 (version "1.10.0")
7959 (source (origin
7960 (method url-fetch)
7961 ;; We cannot use bioconductor-uri here because this tarball is
7962 ;; located under "data/annotation/" instead of "bioc/".
7963 (uri (string-append "https://bioconductor.org/packages/"
7964 "release/data/experiment/src/contrib/"
7965 "genomationData_" version ".tar.gz"))
7966 (sha256
7967 (base32
7968 "0h7g5x3kyb50qlblz5hc85lfm6n6f5nb68i146way3ggs04sqvla"))))
7969 (build-system r-build-system)
7970 ;; As this package provides little more than large data files, it doesn't
7971 ;; make sense to build substitutes.
7972 (arguments `(#:substitutable? #f))
7973 (native-inputs
7974 `(("r-knitr" ,r-knitr)))
7975 (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
7976 (synopsis "Experimental data for use with the genomation package")
7977 (description
7978 "This package contains experimental genetic data for use with the
7979 genomation package. Included are Chip Seq, Methylation and Cage data,
7980 downloaded from Encode.")
7981 (license license:gpl3+)))
7982
7983 (define-public r-org-hs-eg-db
7984 (package
7985 (name "r-org-hs-eg-db")
7986 (version "3.5.0")
7987 (source (origin
7988 (method url-fetch)
7989 ;; We cannot use bioconductor-uri here because this tarball is
7990 ;; located under "data/annotation/" instead of "bioc/".
7991 (uri (string-append "https://www.bioconductor.org/packages/"
7992 "release/data/annotation/src/contrib/"
7993 "org.Hs.eg.db_" version ".tar.gz"))
7994 (sha256
7995 (base32
7996 "1v6wa5613cjq59xd7x1qz8lr9nb2abm9abl2cci1khrnrlpla927"))))
7997 (properties
7998 `((upstream-name . "org.Hs.eg.db")))
7999 (build-system r-build-system)
8000 (propagated-inputs
8001 `(("r-annotationdbi" ,r-annotationdbi)))
8002 (home-page "https://www.bioconductor.org/packages/org.Hs.eg.db/")
8003 (synopsis "Genome wide annotation for Human")
8004 (description
8005 "This package contains genome-wide annotations for Human, primarily based
8006 on mapping using Entrez Gene identifiers.")
8007 (license license:artistic2.0)))
8008
8009 (define-public r-org-ce-eg-db
8010 (package
8011 (name "r-org-ce-eg-db")
8012 (version "3.5.0")
8013 (source (origin
8014 (method url-fetch)
8015 ;; We cannot use bioconductor-uri here because this tarball is
8016 ;; located under "data/annotation/" instead of "bioc/".
8017 (uri (string-append "https://www.bioconductor.org/packages/"
8018 "release/data/annotation/src/contrib/"
8019 "org.Ce.eg.db_" version ".tar.gz"))
8020 (sha256
8021 (base32
8022 "02ggchixlmzywhsbr0h2ms4dravv7m5964cjxqcjxqs16vjwlbk9"))))
8023 (properties
8024 `((upstream-name . "org.Ce.eg.db")))
8025 (build-system r-build-system)
8026 (propagated-inputs
8027 `(("r-annotationdbi" ,r-annotationdbi)))
8028 (home-page "https://www.bioconductor.org/packages/org.Ce.eg.db/")
8029 (synopsis "Genome wide annotation for Worm")
8030 (description
8031 "This package provides mappings from Entrez gene identifiers to various
8032 annotations for the genome of the model worm Caenorhabditis elegans.")
8033 (license license:artistic2.0)))
8034
8035 (define-public r-org-dm-eg-db
8036 (package
8037 (name "r-org-dm-eg-db")
8038 (version "3.5.0")
8039 (source (origin
8040 (method url-fetch)
8041 ;; We cannot use bioconductor-uri here because this tarball is
8042 ;; located under "data/annotation/" instead of "bioc/".
8043 (uri (string-append "https://www.bioconductor.org/packages/"
8044 "release/data/annotation/src/contrib/"
8045 "org.Dm.eg.db_" version ".tar.gz"))
8046 (sha256
8047 (base32
8048 "033qak1d3wwz17va0bh8z8p8arx0aw2va6gm1qfwsvdkj9cd9d7d"))))
8049 (properties
8050 `((upstream-name . "org.Dm.eg.db")))
8051 (build-system r-build-system)
8052 (propagated-inputs
8053 `(("r-annotationdbi" ,r-annotationdbi)))
8054 (home-page "https://www.bioconductor.org/packages/org.Dm.eg.db/")
8055 (synopsis "Genome wide annotation for Fly")
8056 (description
8057 "This package provides mappings from Entrez gene identifiers to various
8058 annotations for the genome of the model fruit fly Drosophila melanogaster.")
8059 (license license:artistic2.0)))
8060
8061 (define-public r-org-mm-eg-db
8062 (package
8063 (name "r-org-mm-eg-db")
8064 (version "3.5.0")
8065 (source (origin
8066 (method url-fetch)
8067 ;; We cannot use bioconductor-uri here because this tarball is
8068 ;; located under "data/annotation/" instead of "bioc/".
8069 (uri (string-append "https://www.bioconductor.org/packages/"
8070 "release/data/annotation/src/contrib/"
8071 "org.Mm.eg.db_" version ".tar.gz"))
8072 (sha256
8073 (base32
8074 "11q21p3ki4bn4hb3aix0g775l45l66jmas6m94nfhqqnpjhv4d6g"))))
8075 (properties
8076 `((upstream-name . "org.Mm.eg.db")))
8077 (build-system r-build-system)
8078 (propagated-inputs
8079 `(("r-annotationdbi" ,r-annotationdbi)))
8080 (home-page "https://www.bioconductor.org/packages/org.Mm.eg.db/")
8081 (synopsis "Genome wide annotation for Mouse")
8082 (description
8083 "This package provides mappings from Entrez gene identifiers to various
8084 annotations for the genome of the model mouse Mus musculus.")
8085 (license license:artistic2.0)))
8086
8087 (define-public r-seqlogo
8088 (package
8089 (name "r-seqlogo")
8090 (version "1.44.0")
8091 (source
8092 (origin
8093 (method url-fetch)
8094 (uri (bioconductor-uri "seqLogo" version))
8095 (sha256
8096 (base32
8097 "1ql4q4vx0j61a893dqc3c8zxmgs8sqhy3j1qhyfdvbd01vw9w1kq"))))
8098 (properties `((upstream-name . "seqLogo")))
8099 (build-system r-build-system)
8100 (home-page "https://bioconductor.org/packages/seqLogo")
8101 (synopsis "Sequence logos for DNA sequence alignments")
8102 (description
8103 "seqLogo takes the position weight matrix of a DNA sequence motif and
8104 plots the corresponding sequence logo as introduced by Schneider and
8105 Stephens (1990).")
8106 (license license:lgpl2.0+)))
8107
8108 (define-public r-bsgenome-hsapiens-ucsc-hg19
8109 (package
8110 (name "r-bsgenome-hsapiens-ucsc-hg19")
8111 (version "1.4.0")
8112 (source (origin
8113 (method url-fetch)
8114 ;; We cannot use bioconductor-uri here because this tarball is
8115 ;; located under "data/annotation/" instead of "bioc/".
8116 (uri (string-append "https://www.bioconductor.org/packages/"
8117 "release/data/annotation/src/contrib/"
8118 "BSgenome.Hsapiens.UCSC.hg19_"
8119 version ".tar.gz"))
8120 (sha256
8121 (base32
8122 "1y0nqpk8cw5a34sd9hmin3z4v7iqm6hf6l22cl81vlbxqbjibxc8"))))
8123 (properties
8124 `((upstream-name . "BSgenome.Hsapiens.UCSC.hg19")))
8125 (build-system r-build-system)
8126 ;; As this package provides little more than a very large data file it
8127 ;; doesn't make sense to build substitutes.
8128 (arguments `(#:substitutable? #f))
8129 (propagated-inputs
8130 `(("r-bsgenome" ,r-bsgenome)))
8131 (home-page
8132 "https://www.bioconductor.org/packages/BSgenome.Hsapiens.UCSC.hg19/")
8133 (synopsis "Full genome sequences for Homo sapiens")
8134 (description
8135 "This package provides full genome sequences for Homo sapiens as provided
8136 by UCSC (hg19, February 2009) and stored in Biostrings objects.")
8137 (license license:artistic2.0)))
8138
8139 (define-public r-bsgenome-mmusculus-ucsc-mm9
8140 (package
8141 (name "r-bsgenome-mmusculus-ucsc-mm9")
8142 (version "1.4.0")
8143 (source (origin
8144 (method url-fetch)
8145 ;; We cannot use bioconductor-uri here because this tarball is
8146 ;; located under "data/annotation/" instead of "bioc/".
8147 (uri (string-append "https://www.bioconductor.org/packages/"
8148 "release/data/annotation/src/contrib/"
8149 "BSgenome.Mmusculus.UCSC.mm9_"
8150 version ".tar.gz"))
8151 (sha256
8152 (base32
8153 "1birqw30g2azimxpnjfzmkphan7x131yy8b9h85lfz5fjdg7841i"))))
8154 (properties
8155 `((upstream-name . "BSgenome.Mmusculus.UCSC.mm9")))
8156 (build-system r-build-system)
8157 ;; As this package provides little more than a very large data file it
8158 ;; doesn't make sense to build substitutes.
8159 (arguments `(#:substitutable? #f))
8160 (propagated-inputs
8161 `(("r-bsgenome" ,r-bsgenome)))
8162 (home-page
8163 "https://www.bioconductor.org/packages/BSgenome.Mmusculus.UCSC.mm9/")
8164 (synopsis "Full genome sequences for Mouse")
8165 (description
8166 "This package provides full genome sequences for Mus musculus (Mouse) as
8167 provided by UCSC (mm9, July 2007) and stored in Biostrings objects.")
8168 (license license:artistic2.0)))
8169
8170 (define-public r-bsgenome-mmusculus-ucsc-mm10
8171 (package
8172 (name "r-bsgenome-mmusculus-ucsc-mm10")
8173 (version "1.4.0")
8174 (source (origin
8175 (method url-fetch)
8176 ;; We cannot use bioconductor-uri here because this tarball is
8177 ;; located under "data/annotation/" instead of "bioc/".
8178 (uri (string-append "https://www.bioconductor.org/packages/"
8179 "release/data/annotation/src/contrib/"
8180 "BSgenome.Mmusculus.UCSC.mm10_"
8181 version ".tar.gz"))
8182 (sha256
8183 (base32
8184 "12s0nm2na9brjad4rn9l7d3db2aj8qa1xvz0y1k7gk08wayb6bkf"))))
8185 (properties
8186 `((upstream-name . "BSgenome.Mmusculus.UCSC.mm10")))
8187 (build-system r-build-system)
8188 ;; As this package provides little more than a very large data file it
8189 ;; doesn't make sense to build substitutes.
8190 (arguments `(#:substitutable? #f))
8191 (propagated-inputs
8192 `(("r-bsgenome" ,r-bsgenome)))
8193 (home-page
8194 "https://www.bioconductor.org/packages/BSgenome.Mmusculus.UCSC.mm10/")
8195 (synopsis "Full genome sequences for Mouse")
8196 (description
8197 "This package provides full genome sequences for Mus
8198 musculus (Mouse) as provided by UCSC (mm10, December 2011) and stored
8199 in Biostrings objects.")
8200 (license license:artistic2.0)))
8201
8202 (define-public r-txdb-mmusculus-ucsc-mm10-knowngene
8203 (package
8204 (name "r-txdb-mmusculus-ucsc-mm10-knowngene")
8205 (version "3.4.0")
8206 (source (origin
8207 (method url-fetch)
8208 ;; We cannot use bioconductor-uri here because this tarball is
8209 ;; located under "data/annotation/" instead of "bioc/".
8210 (uri (string-append "https://www.bioconductor.org/packages/"
8211 "release/data/annotation/src/contrib/"
8212 "TxDb.Mmusculus.UCSC.mm10.knownGene_"
8213 version ".tar.gz"))
8214 (sha256
8215 (base32
8216 "08gava9wsvpcqz51k2sni3pj03n5155v32d9riqbf305nbirqbkb"))))
8217 (properties
8218 `((upstream-name . "TxDb.Mmusculus.UCSC.mm10.knownGene")))
8219 (build-system r-build-system)
8220 ;; As this package provides little more than a very large data file it
8221 ;; doesn't make sense to build substitutes.
8222 (arguments `(#:substitutable? #f))
8223 (propagated-inputs
8224 `(("r-bsgenome" ,r-bsgenome)
8225 ("r-genomicfeatures" ,r-genomicfeatures)
8226 ("r-annotationdbi" ,r-annotationdbi)))
8227 (home-page
8228 "https://bioconductor.org/packages/TxDb.Mmusculus.UCSC.mm10.knownGene/")
8229 (synopsis "Annotation package for TxDb knownGene object(s) for Mouse")
8230 (description
8231 "This package loads a TxDb object, which is an R interface to
8232 prefabricated databases contained in this package. This package provides
8233 the TxDb object of Mouse data as provided by UCSC (mm10, December 2011)
8234 based on the knownGene track.")
8235 (license license:artistic2.0)))
8236
8237 (define-public r-bsgenome-celegans-ucsc-ce6
8238 (package
8239 (name "r-bsgenome-celegans-ucsc-ce6")
8240 (version "1.4.0")
8241 (source (origin
8242 (method url-fetch)
8243 ;; We cannot use bioconductor-uri here because this tarball is
8244 ;; located under "data/annotation/" instead of "bioc/".
8245 (uri (string-append "https://www.bioconductor.org/packages/"
8246 "release/data/annotation/src/contrib/"
8247 "BSgenome.Celegans.UCSC.ce6_"
8248 version ".tar.gz"))
8249 (sha256
8250 (base32
8251 "0mqzb353xv2c3m3vkb315dkmnxkgczp7ndnknyhpgjlybyf715v9"))))
8252 (properties
8253 `((upstream-name . "BSgenome.Celegans.UCSC.ce6")))
8254 (build-system r-build-system)
8255 ;; As this package provides little more than a very large data file it
8256 ;; doesn't make sense to build substitutes.
8257 (arguments `(#:substitutable? #f))
8258 (propagated-inputs
8259 `(("r-bsgenome" ,r-bsgenome)))
8260 (home-page
8261 "https://www.bioconductor.org/packages/BSgenome.Celegans.UCSC.ce6/")
8262 (synopsis "Full genome sequences for Worm")
8263 (description
8264 "This package provides full genome sequences for Caenorhabditis
8265 elegans (Worm) as provided by UCSC (ce6, May 2008) and stored in Biostrings
8266 objects.")
8267 (license license:artistic2.0)))
8268
8269 (define-public r-bsgenome-celegans-ucsc-ce10
8270 (package
8271 (name "r-bsgenome-celegans-ucsc-ce10")
8272 (version "1.4.0")
8273 (source (origin
8274 (method url-fetch)
8275 ;; We cannot use bioconductor-uri here because this tarball is
8276 ;; located under "data/annotation/" instead of "bioc/".
8277 (uri (string-append "https://www.bioconductor.org/packages/"
8278 "release/data/annotation/src/contrib/"
8279 "BSgenome.Celegans.UCSC.ce10_"
8280 version ".tar.gz"))
8281 (sha256
8282 (base32
8283 "1zaym97jk4npxk14ifvwz2rvhm4zx9xgs33r9vvx9rlynp0gydrk"))))
8284 (properties
8285 `((upstream-name . "BSgenome.Celegans.UCSC.ce10")))
8286 (build-system r-build-system)
8287 ;; As this package provides little more than a very large data file it
8288 ;; doesn't make sense to build substitutes.
8289 (arguments `(#:substitutable? #f))
8290 (propagated-inputs
8291 `(("r-bsgenome" ,r-bsgenome)))
8292 (home-page
8293 "https://www.bioconductor.org/packages/BSgenome.Celegans.UCSC.ce10/")
8294 (synopsis "Full genome sequences for Worm")
8295 (description
8296 "This package provides full genome sequences for Caenorhabditis
8297 elegans (Worm) as provided by UCSC (ce10, Oct 2010) and stored in Biostrings
8298 objects.")
8299 (license license:artistic2.0)))
8300
8301 (define-public r-bsgenome-dmelanogaster-ucsc-dm3
8302 (package
8303 (name "r-bsgenome-dmelanogaster-ucsc-dm3")
8304 (version "1.4.0")
8305 (source (origin
8306 (method url-fetch)
8307 ;; We cannot use bioconductor-uri here because this tarball is
8308 ;; located under "data/annotation/" instead of "bioc/".
8309 (uri (string-append "https://www.bioconductor.org/packages/"
8310 "release/data/annotation/src/contrib/"
8311 "BSgenome.Dmelanogaster.UCSC.dm3_"
8312 version ".tar.gz"))
8313 (sha256
8314 (base32
8315 "19bm3lkhhkag3gnwp419211fh0cnr0x6fa0r1lr0ycwrikxdxsv8"))))
8316 (properties
8317 `((upstream-name . "BSgenome.Dmelanogaster.UCSC.dm3")))
8318 (build-system r-build-system)
8319 ;; As this package provides little more than a very large data file it
8320 ;; doesn't make sense to build substitutes.
8321 (arguments `(#:substitutable? #f))
8322 (propagated-inputs
8323 `(("r-bsgenome" ,r-bsgenome)))
8324 (home-page
8325 "https://www.bioconductor.org/packages/BSgenome.Dmelanogaster.UCSC.dm3/")
8326 (synopsis "Full genome sequences for Fly")
8327 (description
8328 "This package provides full genome sequences for Drosophila
8329 melanogaster (Fly) as provided by UCSC (dm3, April 2006) and stored in
8330 Biostrings objects.")
8331 (license license:artistic2.0)))
8332
8333 (define-public r-motifrg
8334 (package
8335 (name "r-motifrg")
8336 (version "1.22.0")
8337 (source
8338 (origin
8339 (method url-fetch)
8340 (uri (bioconductor-uri "motifRG" version))
8341 (sha256
8342 (base32
8343 "193zl2rlzwxv9p9q5i7rilj3w05ndqfyp9bdpvagp5s5cin4hf44"))))
8344 (properties `((upstream-name . "motifRG")))
8345 (build-system r-build-system)
8346 (propagated-inputs
8347 `(("r-biostrings" ,r-biostrings)
8348 ("r-bsgenome" ,r-bsgenome)
8349 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8350 ("r-iranges" ,r-iranges)
8351 ("r-seqlogo" ,r-seqlogo)
8352 ("r-xvector" ,r-xvector)))
8353 (home-page "https://bioconductor.org/packages/motifRG")
8354 (synopsis "Discover motifs in high throughput sequencing data")
8355 (description
8356 "This package provides tools for discriminative motif discovery in high
8357 throughput genetic sequencing data sets using regression methods.")
8358 (license license:artistic2.0)))
8359
8360 (define-public r-qtl
8361 (package
8362 (name "r-qtl")
8363 (version "1.42-8")
8364 (source
8365 (origin
8366 (method url-fetch)
8367 (uri (string-append "mirror://cran/src/contrib/qtl_"
8368 version ".tar.gz"))
8369 (sha256
8370 (base32
8371 "1l528dwvfpdlr05imrrm4rq32axp6hld9nqm6mm43kn5n7z2f5k6"))))
8372 (build-system r-build-system)
8373 (home-page "http://rqtl.org/")
8374 (synopsis "R package for analyzing QTL experiments in genetics")
8375 (description "R/qtl is an extension library for the R statistics
8376 system. It is used to analyze experimental crosses for identifying
8377 genes contributing to variation in quantitative traits (so-called
8378 quantitative trait loci, QTLs).
8379
8380 Using a hidden Markov model, R/qtl allows to estimate genetic maps, to
8381 identify genotyping errors, and to perform single-QTL and two-QTL,
8382 two-dimensional genome scans.")
8383 (license license:gpl3)))
8384
8385 (define-public r-zlibbioc
8386 (package
8387 (name "r-zlibbioc")
8388 (version "1.24.0")
8389 (source (origin
8390 (method url-fetch)
8391 (uri (bioconductor-uri "zlibbioc" version))
8392 (sha256
8393 (base32
8394 "1zr9hbh55hglfpy15cpxwmddxblhyb0an15953l3rbhmlh2vpy92"))))
8395 (properties
8396 `((upstream-name . "zlibbioc")))
8397 (build-system r-build-system)
8398 (home-page "https://bioconductor.org/packages/zlibbioc")
8399 (synopsis "Provider for zlib-1.2.5 to R packages")
8400 (description "This package uses the source code of zlib-1.2.5 to create
8401 libraries for systems that do not have these available via other means.")
8402 (license license:artistic2.0)))
8403
8404 (define-public r-r4rna
8405 (package
8406 (name "r-r4rna")
8407 (version "0.1.4")
8408 (source
8409 (origin
8410 (method url-fetch)
8411 (uri (string-append "http://www.e-rna.org/r-chie/files/R4RNA_"
8412 version ".tar.gz"))
8413 (sha256
8414 (base32
8415 "1p0i78wh76jfgmn9jphbwwaz6yy6pipzfg08xs54cxavxg2j81p5"))))
8416 (build-system r-build-system)
8417 (propagated-inputs
8418 `(("r-optparse" ,r-optparse)
8419 ("r-rcolorbrewer" ,r-rcolorbrewer)))
8420 (home-page "http://www.e-rna.org/r-chie/index.cgi")
8421 (synopsis "Analysis framework for RNA secondary structure")
8422 (description
8423 "The R4RNA package aims to be a general framework for the analysis of RNA
8424 secondary structure and comparative analysis in R.")
8425 (license license:gpl3+)))
8426
8427 (define-public r-rhtslib
8428 (package
8429 (name "r-rhtslib")
8430 (version "1.10.0")
8431 (source
8432 (origin
8433 (method url-fetch)
8434 (uri (bioconductor-uri "Rhtslib" version))
8435 (sha256
8436 (base32
8437 "1dw3p44bfr0m7w39ckc2k37sjcp1zz0b9g12mr8am15jaj6v0q2j"))))
8438 (properties `((upstream-name . "Rhtslib")))
8439 (build-system r-build-system)
8440 (propagated-inputs
8441 `(("r-zlibbioc" ,r-zlibbioc)))
8442 (inputs
8443 `(("zlib" ,zlib)))
8444 (native-inputs
8445 `(("autoconf" ,autoconf)))
8446 (home-page "https://github.com/nhayden/Rhtslib")
8447 (synopsis "High-throughput sequencing library as an R package")
8448 (description
8449 "This package provides the HTSlib C library for high-throughput
8450 nucleotide sequence analysis. The package is primarily useful to developers
8451 of other R packages who wish to make use of HTSlib.")
8452 (license license:lgpl2.0+)))
8453
8454 (define-public r-bamsignals
8455 (package
8456 (name "r-bamsignals")
8457 (version "1.10.0")
8458 (source
8459 (origin
8460 (method url-fetch)
8461 (uri (bioconductor-uri "bamsignals" version))
8462 (sha256
8463 (base32
8464 "15id6mkj95skb4kfafvfs2j7ylydal60c3pspcl7llhwpq6vcqvl"))))
8465 (build-system r-build-system)
8466 (propagated-inputs
8467 `(("r-biocgenerics" ,r-biocgenerics)
8468 ("r-genomicranges" ,r-genomicranges)
8469 ("r-iranges" ,r-iranges)
8470 ("r-rcpp" ,r-rcpp)
8471 ("r-rhtslib" ,r-rhtslib)
8472 ("r-zlibbioc" ,r-zlibbioc)))
8473 (inputs
8474 `(("zlib" ,zlib)))
8475 (home-page "https://bioconductor.org/packages/bamsignals")
8476 (synopsis "Extract read count signals from bam files")
8477 (description
8478 "This package allows to efficiently obtain count vectors from indexed bam
8479 files. It counts the number of nucleotide sequence reads in given genomic
8480 ranges and it computes reads profiles and coverage profiles. It also handles
8481 paired-end data.")
8482 (license license:gpl2+)))
8483
8484 (define-public r-rcas
8485 (package
8486 (name "r-rcas")
8487 (version "1.3.4")
8488 (source (origin
8489 (method url-fetch)
8490 (uri (string-append "https://github.com/BIMSBbioinfo/RCAS/archive/v"
8491 version ".tar.gz"))
8492 (file-name (string-append name "-" version ".tar.gz"))
8493 (sha256
8494 (base32
8495 "1qgc7vi6fpzl440yg7jhiycg5q336kd4pxqzx10yx2zcq3bq3msg"))))
8496 (build-system r-build-system)
8497 (native-inputs
8498 `(("r-knitr" ,r-knitr)
8499 ("r-testthat" ,r-testthat)
8500 ;; During vignette building knitr checks that "pandoc-citeproc"
8501 ;; is in the PATH.
8502 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc-with-pandoc-1)))
8503 (propagated-inputs
8504 `(("r-data-table" ,r-data-table)
8505 ("r-biomart" ,r-biomart)
8506 ("r-org-hs-eg-db" ,r-org-hs-eg-db)
8507 ("r-org-ce-eg-db" ,r-org-ce-eg-db)
8508 ("r-org-dm-eg-db" ,r-org-dm-eg-db)
8509 ("r-org-mm-eg-db" ,r-org-mm-eg-db)
8510 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8511 ("r-bsgenome-mmusculus-ucsc-mm9" ,r-bsgenome-mmusculus-ucsc-mm9)
8512 ("r-bsgenome-celegans-ucsc-ce10" ,r-bsgenome-celegans-ucsc-ce10)
8513 ("r-bsgenome-dmelanogaster-ucsc-dm3" ,r-bsgenome-dmelanogaster-ucsc-dm3)
8514 ("r-topgo" ,r-topgo)
8515 ("r-dt" ,r-dt)
8516 ("r-pbapply" ,r-pbapply)
8517 ("r-plotly" ,r-plotly)
8518 ("r-plotrix" ,r-plotrix)
8519 ("r-motifrg" ,r-motifrg)
8520 ("r-genomation" ,r-genomation)
8521 ("r-genomicfeatures" ,r-genomicfeatures)
8522 ("r-rtracklayer" ,r-rtracklayer)
8523 ("r-rmarkdown" ,r-rmarkdown)))
8524 (synopsis "RNA-centric annotation system")
8525 (description
8526 "RCAS aims to be a standalone RNA-centric annotation system that provides
8527 intuitive reports and publication-ready graphics. This package provides the R
8528 library implementing most of the pipeline's features.")
8529 (home-page "https://github.com/BIMSBbioinfo/RCAS")
8530 (license license:artistic2.0)))
8531
8532 (define-public rcas-web
8533 (package
8534 (name "rcas-web")
8535 (version "0.0.4")
8536 (source
8537 (origin
8538 (method url-fetch)
8539 (uri (string-append "https://github.com/BIMSBbioinfo/rcas-web/"
8540 "releases/download/v" version
8541 "/rcas-web-" version ".tar.gz"))
8542 (sha256
8543 (base32
8544 "1p16frfys41a8yaa4gkm457nzkqhqs2pc3lkac0ds457w9w5j1gm"))))
8545 (build-system gnu-build-system)
8546 (arguments
8547 `(#:phases
8548 (modify-phases %standard-phases
8549 (add-after 'install 'wrap-executable
8550 (lambda* (#:key inputs outputs #:allow-other-keys)
8551 (let* ((out (assoc-ref outputs "out"))
8552 (json (assoc-ref inputs "guile-json"))
8553 (redis (assoc-ref inputs "guile-redis"))
8554 (path (string-append
8555 json "/share/guile/site/2.2:"
8556 redis "/share/guile/site/2.2")))
8557 (wrap-program (string-append out "/bin/rcas-web")
8558 `("GUILE_LOAD_PATH" ":" = (,path))
8559 `("GUILE_LOAD_COMPILED_PATH" ":" = (,path))
8560 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
8561 #t)))))
8562 (inputs
8563 `(("r-minimal" ,r-minimal)
8564 ("r-rcas" ,r-rcas)
8565 ("guile-next" ,guile-2.2)
8566 ("guile-json" ,guile-json)
8567 ("guile-redis" ,guile2.2-redis)))
8568 (native-inputs
8569 `(("pkg-config" ,pkg-config)))
8570 (home-page "https://github.com/BIMSBbioinfo/rcas-web")
8571 (synopsis "Web interface for RNA-centric annotation system (RCAS)")
8572 (description "This package provides a simple web interface for the
8573 @dfn{RNA-centric annotation system} (RCAS).")
8574 (license license:agpl3+)))
8575
8576 (define-public r-mutationalpatterns
8577 (package
8578 (name "r-mutationalpatterns")
8579 (version "1.4.3")
8580 (source
8581 (origin
8582 (method url-fetch)
8583 (uri (bioconductor-uri "MutationalPatterns" version))
8584 (sha256
8585 (base32
8586 "0ml4gsp5dfv23xqrknxh25q8q65hly1xb1215lcwyc8hj9z8f941"))))
8587 (build-system r-build-system)
8588 (propagated-inputs
8589 `(("r-biocgenerics" ,r-biocgenerics)
8590 ("r-biostrings" ,r-biostrings)
8591 ("r-bsgenome-hsapiens-1000g" ,r-bsgenome-hsapiens-1000genomes-hs37d5)
8592 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8593 ("r-genomicranges" ,r-genomicranges)
8594 ("r-genomeinfodb" ,r-genomeinfodb)
8595 ("r-ggplot2" ,r-ggplot2)
8596 ("r-iranges" ,r-iranges)
8597 ("r-nmf" ,r-nmf)
8598 ("r-plyr" ,r-plyr)
8599 ("r-pracma" ,r-pracma)
8600 ("r-reshape2" ,r-reshape2)
8601 ("r-cowplot" ,r-cowplot)
8602 ("r-ggdendro" ,r-ggdendro)
8603 ("r-s4vectors" ,r-s4vectors)
8604 ("r-summarizedexperiment" ,r-summarizedexperiment)
8605 ("r-variantannotation" ,r-variantannotation)))
8606 (home-page "https://bioconductor.org/packages/MutationalPatterns/")
8607 (synopsis "Extract and visualize mutational patterns in genomic data")
8608 (description "This package provides an extensive toolset for the
8609 characterization and visualization of a wide range of mutational patterns
8610 in SNV base substitution data.")
8611 (license license:expat)))
8612
8613 (define-public r-wgcna
8614 (package
8615 (name "r-wgcna")
8616 (version "1.63")
8617 (source
8618 (origin
8619 (method url-fetch)
8620 (uri (cran-uri "WGCNA" version))
8621 (sha256
8622 (base32
8623 "1225dqm68bynkmklnsxdqdd3zqrpzbvqwyly8ibxmk75z33xz309"))))
8624 (properties `((upstream-name . "WGCNA")))
8625 (build-system r-build-system)
8626 (propagated-inputs
8627 `(("r-annotationdbi" ,r-annotationdbi)
8628 ("r-doparallel" ,r-doparallel)
8629 ("r-dynamictreecut" ,r-dynamictreecut)
8630 ("r-fastcluster" ,r-fastcluster)
8631 ("r-foreach" ,r-foreach)
8632 ("r-go-db" ,r-go-db)
8633 ("r-hmisc" ,r-hmisc)
8634 ("r-impute" ,r-impute)
8635 ("r-rcpp" ,r-rcpp)
8636 ("r-robust" ,r-robust)
8637 ("r-survival" ,r-survival)
8638 ("r-matrixstats" ,r-matrixstats)
8639 ("r-preprocesscore" ,r-preprocesscore)))
8640 (home-page
8641 "http://www.genetics.ucla.edu/labs/horvath/CoexpressionNetwork/Rpackages/WGCNA/")
8642 (synopsis "Weighted correlation network analysis")
8643 (description
8644 "This package provides functions necessary to perform Weighted
8645 Correlation Network Analysis on high-dimensional data. It includes functions
8646 for rudimentary data cleaning, construction and summarization of correlation
8647 networks, module identification and functions for relating both variables and
8648 modules to sample traits. It also includes a number of utility functions for
8649 data manipulation and visualization.")
8650 (license license:gpl2+)))
8651
8652 (define-public r-chipkernels
8653 (let ((commit "c9cfcacb626b1221094fb3490ea7bac0fd625372")
8654 (revision "1"))
8655 (package
8656 (name "r-chipkernels")
8657 (version (string-append "1.1-" revision "." (string-take commit 9)))
8658 (source
8659 (origin
8660 (method git-fetch)
8661 (uri (git-reference
8662 (url "https://github.com/ManuSetty/ChIPKernels.git")
8663 (commit commit)))
8664 (file-name (string-append name "-" version))
8665 (sha256
8666 (base32
8667 "14bj5qhjm1hsm9ay561nfbqi9wxsa7y487df2idsaaf6z10nw4v0"))))
8668 (build-system r-build-system)
8669 (propagated-inputs
8670 `(("r-iranges" ,r-iranges)
8671 ("r-xvector" ,r-xvector)
8672 ("r-biostrings" ,r-biostrings)
8673 ("r-bsgenome" ,r-bsgenome)
8674 ("r-gtools" ,r-gtools)
8675 ("r-genomicranges" ,r-genomicranges)
8676 ("r-sfsmisc" ,r-sfsmisc)
8677 ("r-kernlab" ,r-kernlab)
8678 ("r-s4vectors" ,r-s4vectors)
8679 ("r-biocgenerics" ,r-biocgenerics)))
8680 (home-page "https://github.com/ManuSetty/ChIPKernels")
8681 (synopsis "Build string kernels for DNA Sequence analysis")
8682 (description "ChIPKernels is an R package for building different string
8683 kernels used for DNA Sequence analysis. A dictionary of the desired kernel
8684 must be built and this dictionary can be used for determining kernels for DNA
8685 Sequences.")
8686 (license license:gpl2+))))
8687
8688 (define-public r-seqgl
8689 (package
8690 (name "r-seqgl")
8691 (version "1.1.4")
8692 (source
8693 (origin
8694 (method url-fetch)
8695 (uri (string-append "https://github.com/ManuSetty/SeqGL/"
8696 "archive/" version ".tar.gz"))
8697 (file-name (string-append name "-" version ".tar.gz"))
8698 (sha256
8699 (base32
8700 "0pnk1p3sci5yipyc8xnb6jbmydpl80fld927xgnbcv104hy8h8yh"))))
8701 (build-system r-build-system)
8702 (propagated-inputs
8703 `(("r-biostrings" ,r-biostrings)
8704 ("r-chipkernels" ,r-chipkernels)
8705 ("r-genomicranges" ,r-genomicranges)
8706 ("r-spams" ,r-spams)
8707 ("r-wgcna" ,r-wgcna)
8708 ("r-fastcluster" ,r-fastcluster)))
8709 (home-page "https://github.com/ManuSetty/SeqGL")
8710 (synopsis "Group lasso for Dnase/ChIP-seq data")
8711 (description "SeqGL is a group lasso based algorithm to extract
8712 transcription factor sequence signals from ChIP, DNase and ATAC-seq profiles.
8713 This package presents a method which uses group lasso to discriminate between
8714 bound and non bound genomic regions to accurately identify transcription
8715 factors bound at the specific regions.")
8716 (license license:gpl2+)))
8717
8718 (define-public r-gkmsvm
8719 (package
8720 (name "r-gkmsvm")
8721 (version "0.79.0")
8722 (source
8723 (origin
8724 (method url-fetch)
8725 (uri (cran-uri "gkmSVM" version))
8726 (sha256
8727 (base32
8728 "04dakbgfvfalz4rm4fvvybp506dn5fbj5g86ybfhrc6wywjllsz3"))))
8729 (properties `((upstream-name . "gkmSVM")))
8730 (build-system r-build-system)
8731 (propagated-inputs
8732 `(("r-biocgenerics" ,r-biocgenerics)
8733 ("r-biostrings" ,r-biostrings)
8734 ("r-genomeinfodb" ,r-genomeinfodb)
8735 ("r-genomicranges" ,r-genomicranges)
8736 ("r-iranges" ,r-iranges)
8737 ("r-kernlab" ,r-kernlab)
8738 ("r-rcpp" ,r-rcpp)
8739 ("r-rocr" ,r-rocr)
8740 ("r-rtracklayer" ,r-rtracklayer)
8741 ("r-s4vectors" ,r-s4vectors)
8742 ("r-seqinr" ,r-seqinr)))
8743 (home-page "https://cran.r-project.org/web/packages/gkmSVM")
8744 (synopsis "Gapped-kmer support vector machine")
8745 (description
8746 "This R package provides tools for training gapped-kmer SVM classifiers
8747 for DNA and protein sequences. This package supports several sequence
8748 kernels, including: gkmSVM, kmer-SVM, mismatch kernel and wildcard kernel.")
8749 (license license:gpl2+)))
8750
8751 (define-public r-tximport
8752 (package
8753 (name "r-tximport")
8754 (version "1.6.0")
8755 (source (origin
8756 (method url-fetch)
8757 (uri (bioconductor-uri "tximport" version))
8758 (sha256
8759 (base32
8760 "1gyqcm91hxg1kgjqcz2qw1n56yp9pymjzs50rwcpb2893dr8sp2h"))))
8761 (build-system r-build-system)
8762 (home-page "https://bioconductor.org/packages/tximport")
8763 (synopsis "Import and summarize transcript-level estimates for gene-level analysis")
8764 (description
8765 "This package provides tools to import transcript-level abundance,
8766 estimated counts and transcript lengths, and to summarize them into matrices
8767 for use with downstream gene-level analysis packages. Average transcript
8768 length, weighted by sample-specific transcript abundance estimates, is
8769 provided as a matrix which can be used as an offset for different expression
8770 of gene-level counts.")
8771 (license license:gpl2+)))
8772
8773 (define-public r-rhdf5
8774 (package
8775 (name "r-rhdf5")
8776 (version "2.22.0")
8777 (source (origin
8778 (method url-fetch)
8779 (uri (bioconductor-uri "rhdf5" version))
8780 (sha256
8781 (base32
8782 "145858qg1xan6imxcbprzq3yn3mdf532aahdr6cibvdjg47hs4c1"))))
8783 (build-system r-build-system)
8784 (arguments
8785 `(#:phases
8786 (modify-phases %standard-phases
8787 (add-after 'unpack 'unpack-smallhdf5
8788 (lambda* (#:key outputs #:allow-other-keys)
8789 (system* "tar" "-xzvf"
8790 "src/hdf5source/hdf5small.tgz" "-C" "src/" )
8791 (substitute* "src/hdf5/configure"
8792 (("/bin/mv") "mv"))
8793 ;; Remove timestamp and host system information to make
8794 ;; the build reproducible.
8795 (substitute* "src/hdf5/src/libhdf5.settings.in"
8796 (("Configured on: @CONFIG_DATE@")
8797 "Configured on: Guix")
8798 (("Uname information:.*")
8799 "Uname information: Linux\n")
8800 ;; Remove unnecessary store reference.
8801 (("C Compiler:.*")
8802 "C Compiler: GCC\n"))
8803 #t)))))
8804 (propagated-inputs
8805 `(("r-zlibbioc" ,r-zlibbioc)))
8806 (inputs
8807 `(("perl" ,perl)
8808 ("zlib" ,zlib)))
8809 (home-page "https://bioconductor.org/packages/rhdf5")
8810 (synopsis "HDF5 interface to R")
8811 (description
8812 "This R/Bioconductor package provides an interface between HDF5 and R.
8813 HDF5's main features are the ability to store and access very large and/or
8814 complex datasets and a wide variety of metadata on mass storage (disk) through
8815 a completely portable file format. The rhdf5 package is thus suited for the
8816 exchange of large and/or complex datasets between R and other software
8817 package, and for letting R applications work on datasets that are larger than
8818 the available RAM.")
8819 (license license:artistic2.0)))
8820
8821 (define-public r-annotationfilter
8822 (package
8823 (name "r-annotationfilter")
8824 (version "1.2.0")
8825 (source (origin
8826 (method url-fetch)
8827 (uri (bioconductor-uri "AnnotationFilter" version))
8828 (sha256
8829 (base32
8830 "04zf864c1fvdlaay2r5cn30fc1n5i3czh31fs62qlrvs61wjiscs"))))
8831 (properties
8832 `((upstream-name . "AnnotationFilter")))
8833 (build-system r-build-system)
8834 (propagated-inputs
8835 `(("r-genomicranges" ,r-genomicranges)
8836 ("r-lazyeval" ,r-lazyeval)))
8837 (home-page "https://github.com/Bioconductor/AnnotationFilter")
8838 (synopsis "Facilities for filtering Bioconductor annotation resources")
8839 (description
8840 "This package provides classes and other infrastructure to implement
8841 filters for manipulating Bioconductor annotation resources. The filters are
8842 used by @code{ensembldb}, @code{Organism.dplyr}, and other packages.")
8843 (license license:artistic2.0)))
8844
8845 (define-public emboss
8846 (package
8847 (name "emboss")
8848 (version "6.5.7")
8849 (source (origin
8850 (method url-fetch)
8851 (uri (string-append "ftp://emboss.open-bio.org/pub/EMBOSS/old/"
8852 (version-major+minor version) ".0/"
8853 "EMBOSS-" version ".tar.gz"))
8854 (sha256
8855 (base32
8856 "0vsmz96gc411yj2iyzdrsmg4l2n1nhgmp7vrgzlxx3xixv9xbf0q"))))
8857 (build-system gnu-build-system)
8858 (arguments
8859 `(#:configure-flags
8860 (list (string-append "--with-hpdf="
8861 (assoc-ref %build-inputs "libharu")))
8862 #:phases
8863 (modify-phases %standard-phases
8864 (add-after 'unpack 'fix-checks
8865 (lambda _
8866 ;; The PNGDRIVER tests check for the presence of libgd, libpng
8867 ;; and zlib, but assume that they are all found at the same
8868 ;; prefix.
8869 (substitute* "configure.in"
8870 (("CHECK_PNGDRIVER")
8871 "LIBS=\"$LIBS -lgd -lpng -lz -lm\"
8872 AC_DEFINE([PLD_png], [1], [Define to 1 if PNG support is available])
8873 AM_CONDITIONAL(AMPNG, true)"))
8874 #t))
8875 (add-after 'fix-checks 'disable-update-check
8876 (lambda _
8877 ;; At build time there is no connection to the Internet, so
8878 ;; looking for updates will not work.
8879 (substitute* "Makefile.am"
8880 (("\\$\\(bindir\\)/embossupdate") ""))
8881 #t))
8882 (add-after 'disable-update-check 'autogen
8883 (lambda _ (zero? (system* "autoreconf" "-vif")))))))
8884 (inputs
8885 `(("perl" ,perl)
8886 ("libpng" ,libpng)
8887 ("gd" ,gd)
8888 ("libx11" ,libx11)
8889 ("libharu" ,libharu)
8890 ("zlib" ,zlib)))
8891 (native-inputs
8892 `(("autoconf" ,autoconf)
8893 ("automake" ,automake)
8894 ("libtool" ,libtool)
8895 ("pkg-config" ,pkg-config)))
8896 (home-page "http://emboss.sourceforge.net")
8897 (synopsis "Molecular biology analysis suite")
8898 (description "EMBOSS is the \"European Molecular Biology Open Software
8899 Suite\". EMBOSS is an analysis package specially developed for the needs of
8900 the molecular biology (e.g. EMBnet) user community. The software
8901 automatically copes with data in a variety of formats and even allows
8902 transparent retrieval of sequence data from the web. It also provides a
8903 number of libraries for the development of software in the field of molecular
8904 biology. EMBOSS also integrates a range of currently available packages and
8905 tools for sequence analysis into a seamless whole.")
8906 (license license:gpl2+)))
8907
8908 (define-public bits
8909 (let ((revision "1")
8910 (commit "3cc4567896d9d6442923da944beb704750a08d2d"))
8911 (package
8912 (name "bits")
8913 ;; The version is 2.13.0 even though no release archives have been
8914 ;; published as yet.
8915 (version (string-append "2.13.0-" revision "." (string-take commit 9)))
8916 (source (origin
8917 (method git-fetch)
8918 (uri (git-reference
8919 (url "https://github.com/arq5x/bits.git")
8920 (commit commit)))
8921 (file-name (string-append name "-" version "-checkout"))
8922 (sha256
8923 (base32
8924 "17n2kffk4kmhivd8c98g2vr6y1s23vbg4sxlxs689wni66797hbs"))))
8925 (build-system gnu-build-system)
8926 (arguments
8927 `(#:tests? #f ;no tests included
8928 #:phases
8929 (modify-phases %standard-phases
8930 (delete 'configure)
8931 (add-after 'unpack 'remove-cuda
8932 (lambda _
8933 (substitute* "Makefile"
8934 ((".*_cuda") "")
8935 (("(bits_test_intersections) \\\\" _ match) match))
8936 #t))
8937 (replace 'install
8938 (lambda* (#:key outputs #:allow-other-keys)
8939 (copy-recursively
8940 "bin" (string-append (assoc-ref outputs "out") "/bin"))
8941 #t)))))
8942 (inputs
8943 `(("gsl" ,gsl)
8944 ("zlib" ,zlib)))
8945 (home-page "https://github.com/arq5x/bits")
8946 (synopsis "Implementation of binary interval search algorithm")
8947 (description "This package provides an implementation of the
8948 BITS (Binary Interval Search) algorithm, an approach to interval set
8949 intersection. It is especially suited for the comparison of diverse genomic
8950 datasets and the exploration of large datasets of genome
8951 intervals (e.g. genes, sequence alignments).")
8952 (license license:gpl2))))
8953
8954 (define-public piranha
8955 ;; There is no release tarball for the latest version. The latest commit is
8956 ;; older than one year at the time of this writing.
8957 (let ((revision "1")
8958 (commit "0466d364b71117d01e4471b74c514436cc281233"))
8959 (package
8960 (name "piranha")
8961 (version (string-append "1.2.1-" revision "." (string-take commit 9)))
8962 (source (origin
8963 (method git-fetch)
8964 (uri (git-reference
8965 (url "https://github.com/smithlabcode/piranha.git")
8966 (commit commit)))
8967 (file-name (git-file-name name version))
8968 (sha256
8969 (base32
8970 "117dc0zf20c61jam69sk4abl57ah6yi6i7qra7d7y5zrbgk12q5n"))))
8971 (build-system gnu-build-system)
8972 (arguments
8973 `(#:test-target "test"
8974 #:phases
8975 (modify-phases %standard-phases
8976 (add-after 'unpack 'copy-smithlab-cpp
8977 (lambda* (#:key inputs #:allow-other-keys)
8978 (for-each (lambda (file)
8979 (install-file file "./src/smithlab_cpp/"))
8980 (find-files (assoc-ref inputs "smithlab-cpp")))
8981 #t))
8982 (add-after 'install 'install-to-store
8983 (lambda* (#:key outputs #:allow-other-keys)
8984 (let* ((out (assoc-ref outputs "out"))
8985 (bin (string-append out "/bin")))
8986 (for-each (lambda (file)
8987 (install-file file bin))
8988 (find-files "bin" ".*")))
8989 #t)))
8990 #:configure-flags
8991 (list (string-append "--with-bam_tools_headers="
8992 (assoc-ref %build-inputs "bamtools") "/include/bamtools")
8993 (string-append "--with-bam_tools_library="
8994 (assoc-ref %build-inputs "bamtools") "/lib/bamtools"))))
8995 (inputs
8996 `(("bamtools" ,bamtools)
8997 ("samtools" ,samtools-0.1)
8998 ("gsl" ,gsl)
8999 ("smithlab-cpp"
9000 ,(let ((commit "3723e2db438c51501d0423429ff396c3035ba46a"))
9001 (origin
9002 (method git-fetch)
9003 (uri (git-reference
9004 (url "https://github.com/smithlabcode/smithlab_cpp.git")
9005 (commit commit)))
9006 (file-name (string-append "smithlab_cpp-" commit "-checkout"))
9007 (sha256
9008 (base32
9009 "0l4gvbwslw5ngziskja41c00x1r06l3yidv7y0xw9djibhykzy0g")))))))
9010 (native-inputs
9011 `(("python" ,python-2)))
9012 (home-page "https://github.com/smithlabcode/piranha")
9013 (synopsis "Peak-caller for CLIP-seq and RIP-seq data")
9014 (description
9015 "Piranha is a peak-caller for genomic data produced by CLIP-seq and
9016 RIP-seq experiments. It takes input in BED or BAM format and identifies
9017 regions of statistically significant read enrichment. Additional covariates
9018 may optionally be provided to further inform the peak-calling process.")
9019 (license license:gpl3+))))
9020
9021 (define-public pepr
9022 (package
9023 (name "pepr")
9024 (version "1.0.9")
9025 (source (origin
9026 (method url-fetch)
9027 (uri (string-append "https://pypi.python.org/packages/source/P"
9028 "/PePr/PePr-" version ".tar.gz"))
9029 (sha256
9030 (base32
9031 "0qxjfdpl1b1y53nccws2d85f6k74zwmx8y8sd9rszcqhfayx6gdx"))))
9032 (build-system python-build-system)
9033 (arguments
9034 `(#:python ,python-2 ; python2 only
9035 #:tests? #f)) ; no tests included
9036 (propagated-inputs
9037 `(("python2-numpy" ,python2-numpy)
9038 ("python2-scipy" ,python2-scipy)
9039 ("python2-pysam" ,python2-pysam)))
9040 (home-page "https://github.com/shawnzhangyx/PePr")
9041 (synopsis "Peak-calling and prioritization pipeline for ChIP-Seq data")
9042 (description
9043 "PePr is a ChIP-Seq peak calling or differential binding analysis tool
9044 that is primarily designed for data with biological replicates. It uses a
9045 negative binomial distribution to model the read counts among the samples in
9046 the same group, and look for consistent differences between ChIP and control
9047 group or two ChIP groups run under different conditions.")
9048 (license license:gpl3+)))
9049
9050 (define-public filevercmp
9051 (let ((commit "1a9b779b93d0b244040274794d402106907b71b7"))
9052 (package
9053 (name "filevercmp")
9054 (version (string-append "0-1." (string-take commit 7)))
9055 (source (origin
9056 (method url-fetch)
9057 (uri (string-append "https://github.com/ekg/filevercmp/archive/"
9058 commit ".tar.gz"))
9059 (file-name (string-append name "-" version ".tar.gz"))
9060 (sha256
9061 (base32 "0yp5jswf5j2pqc6517x277s4s6h1ss99v57kxw9gy0jkfl3yh450"))))
9062 (build-system gnu-build-system)
9063 (arguments
9064 `(#:tests? #f ; There are no tests to run.
9065 #:phases
9066 (modify-phases %standard-phases
9067 (delete 'configure) ; There is no configure phase.
9068 (replace 'install
9069 (lambda* (#:key outputs #:allow-other-keys)
9070 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
9071 (install-file "filevercmp" bin)))))))
9072 (home-page "https://github.com/ekg/filevercmp")
9073 (synopsis "This program compares version strings")
9074 (description "This program compares version strings. It intends to be a
9075 replacement for strverscmp.")
9076 (license license:gpl3+))))
9077
9078 (define-public multiqc
9079 (package
9080 (name "multiqc")
9081 (version "1.5")
9082 (source
9083 (origin
9084 (method url-fetch)
9085 (uri (pypi-uri "multiqc" version))
9086 (sha256
9087 (base32
9088 "02iihfl0w0hpnr4pa0sbd1y9qxrg3ycyhjp5lidkcrqh1lmzs3zy"))))
9089 (build-system python-build-system)
9090 (propagated-inputs
9091 `(("python-jinja2" ,python-jinja2)
9092 ("python-simplejson" ,python-simplejson)
9093 ("python-pyyaml" ,python-pyyaml)
9094 ("python-click" ,python-click)
9095 ("python-spectra" ,python-spectra)
9096 ("python-requests" ,python-requests)
9097 ("python-markdown" ,python-markdown)
9098 ("python-lzstring" ,python-lzstring)
9099 ("python-matplotlib" ,python-matplotlib)
9100 ("python-numpy" ,python-numpy)
9101 ;; MultQC checks for the presence of nose at runtime.
9102 ("python-nose" ,python-nose)))
9103 (home-page "http://multiqc.info")
9104 (synopsis "Aggregate bioinformatics analysis reports")
9105 (description
9106 "MultiQC is a tool to aggregate bioinformatics results across many
9107 samples into a single report. It contains modules for a large number of
9108 common bioinformatics tools.")
9109 (license license:gpl3+)))
9110
9111 (define-public r-chipseq
9112 (package
9113 (name "r-chipseq")
9114 (version "1.28.0")
9115 (source
9116 (origin
9117 (method url-fetch)
9118 (uri (bioconductor-uri "chipseq" version))
9119 (sha256
9120 (base32
9121 "1ymcq77krwjzrkzzcw7i9909cmkqa7c0675z9wzvrrk81hgdssfq"))))
9122 (build-system r-build-system)
9123 (propagated-inputs
9124 `(("r-biocgenerics" ,r-biocgenerics)
9125 ("r-genomicranges" ,r-genomicranges)
9126 ("r-iranges" ,r-iranges)
9127 ("r-lattice" ,r-lattice)
9128 ("r-s4vectors" ,r-s4vectors)
9129 ("r-shortread" ,r-shortread)))
9130 (home-page "https://bioconductor.org/packages/chipseq")
9131 (synopsis "Package for analyzing ChIPseq data")
9132 (description
9133 "This package provides tools for processing short read data from ChIPseq
9134 experiments.")
9135 (license license:artistic2.0)))
9136
9137 (define-public r-copyhelper
9138 (package
9139 (name "r-copyhelper")
9140 (version "1.6.0")
9141 (source
9142 (origin
9143 (method url-fetch)
9144 (uri (string-append "https://bioconductor.org/packages/release/"
9145 "data/experiment/src/contrib/CopyhelpeR_"
9146 version ".tar.gz"))
9147 (sha256
9148 (base32
9149 "0x7cyynjmxls9as2gg0iyp9x5fpalxmdjq914ss7i84i9zyk5bhq"))))
9150 (properties `((upstream-name . "CopyhelpeR")))
9151 (build-system r-build-system)
9152 (home-page "https://bioconductor.org/packages/CopyhelpeR/")
9153 (synopsis "Helper files for CopywriteR")
9154 (description
9155 "This package contains the helper files that are required to run the
9156 Bioconductor package CopywriteR. It contains pre-assembled 1kb bin GC-content
9157 and mappability files for the reference genomes hg18, hg19, hg38, mm9 and
9158 mm10. In addition, it contains a blacklist filter to remove regions that
9159 display copy number variation. Files are stored as GRanges objects from the
9160 GenomicRanges Bioconductor package.")
9161 (license license:gpl2)))
9162
9163 (define-public r-copywriter
9164 (package
9165 (name "r-copywriter")
9166 (version "2.10.0")
9167 (source
9168 (origin
9169 (method url-fetch)
9170 (uri (bioconductor-uri "CopywriteR" version))
9171 (sha256
9172 (base32
9173 "17fy2lc5yf3nh6v077kv87h53n263hqz2540lzrl0vjiqrl2plca"))))
9174 (properties `((upstream-name . "CopywriteR")))
9175 (build-system r-build-system)
9176 (propagated-inputs
9177 `(("r-biocparallel" ,r-biocparallel)
9178 ("r-chipseq" ,r-chipseq)
9179 ("r-copyhelper" ,r-copyhelper)
9180 ("r-data-table" ,r-data-table)
9181 ("r-dnacopy" ,r-dnacopy)
9182 ("r-futile-logger" ,r-futile-logger)
9183 ("r-genomeinfodb" ,r-genomeinfodb)
9184 ("r-genomicalignments" ,r-genomicalignments)
9185 ("r-genomicranges" ,r-genomicranges)
9186 ("r-gtools" ,r-gtools)
9187 ("r-iranges" ,r-iranges)
9188 ("r-matrixstats" ,r-matrixstats)
9189 ("r-rsamtools" ,r-rsamtools)
9190 ("r-s4vectors" ,r-s4vectors)))
9191 (home-page "https://github.com/PeeperLab/CopywriteR")
9192 (synopsis "Copy number information from targeted sequencing")
9193 (description
9194 "CopywriteR extracts DNA copy number information from targeted sequencing
9195 by utilizing off-target reads. It allows for extracting uniformly distributed
9196 copy number information, can be used without reference, and can be applied to
9197 sequencing data obtained from various techniques including chromatin
9198 immunoprecipitation and target enrichment on small gene panels. Thereby,
9199 CopywriteR constitutes a widely applicable alternative to available copy
9200 number detection tools.")
9201 (license license:gpl2)))
9202
9203 (define-public r-methylkit
9204 (package
9205 (name "r-methylkit")
9206 (version "1.4.1")
9207 (source (origin
9208 (method url-fetch)
9209 (uri (bioconductor-uri "methylKit" version))
9210 (sha256
9211 (base32
9212 "1k0nfn9318sgwm4z963bhnbp4c3zv85v3f9886vc5hgaisr0yvai"))))
9213 (properties `((upstream-name . "methylKit")))
9214 (build-system r-build-system)
9215 (propagated-inputs
9216 `(("r-data-table" ,r-data-table)
9217 ("r-emdbook" ,r-emdbook)
9218 ("r-fastseg" ,r-fastseg)
9219 ("r-genomeinfodb" ,r-genomeinfodb)
9220 ("r-genomicranges" ,r-genomicranges)
9221 ("r-gtools" ,r-gtools)
9222 ("r-iranges" ,r-iranges)
9223 ("r-kernsmooth" ,r-kernsmooth)
9224 ("r-limma" ,r-limma)
9225 ("r-mclust" ,r-mclust)
9226 ("r-qvalue" ,r-qvalue)
9227 ("r-r-utils" ,r-r-utils)
9228 ("r-rcpp" ,r-rcpp)
9229 ("r-rhtslib" ,r-rhtslib)
9230 ("r-rsamtools" ,r-rsamtools)
9231 ("r-rtracklayer" ,r-rtracklayer)
9232 ("r-s4vectors" ,r-s4vectors)
9233 ("r-zlibbioc" ,r-zlibbioc)))
9234 (inputs
9235 `(("zlib" ,zlib)))
9236 (home-page "https://github.com/al2na/methylKit")
9237 (synopsis
9238 "DNA methylation analysis from high-throughput bisulfite sequencing results")
9239 (description
9240 "MethylKit is an R package for DNA methylation analysis and annotation
9241 from high-throughput bisulfite sequencing. The package is designed to deal
9242 with sequencing data from @dfn{Reduced representation bisulfite
9243 sequencing} (RRBS) and its variants, but also target-capture methods and whole
9244 genome bisulfite sequencing. It also has functions to analyze base-pair
9245 resolution 5hmC data from experimental protocols such as oxBS-Seq and
9246 TAB-Seq.")
9247 (license license:artistic2.0)))
9248
9249 (define-public r-sva
9250 (package
9251 (name "r-sva")
9252 (version "3.26.0")
9253 (source
9254 (origin
9255 (method url-fetch)
9256 (uri (bioconductor-uri "sva" version))
9257 (sha256
9258 (base32
9259 "0q5xb68wfcnchy8rkv5ma67pmz1i91lsnvmwmj8f1c3w4xan3pgw"))))
9260 (build-system r-build-system)
9261 (propagated-inputs
9262 `(("r-genefilter" ,r-genefilter)
9263 ("r-mgcv" ,r-mgcv)
9264 ("r-biocparallel" ,r-biocparallel)
9265 ("r-matrixstats" ,r-matrixstats)
9266 ("r-limma" ,r-limma)))
9267 (home-page "https://bioconductor.org/packages/sva")
9268 (synopsis "Surrogate variable analysis")
9269 (description
9270 "This package contains functions for removing batch effects and other
9271 unwanted variation in high-throughput experiment. It also contains functions
9272 for identifying and building surrogate variables for high-dimensional data
9273 sets. Surrogate variables are covariates constructed directly from
9274 high-dimensional data like gene expression/RNA sequencing/methylation/brain
9275 imaging data that can be used in subsequent analyses to adjust for unknown,
9276 unmodeled, or latent sources of noise.")
9277 (license license:artistic2.0)))
9278
9279 (define-public r-seqminer
9280 (package
9281 (name "r-seqminer")
9282 (version "6.0")
9283 (source
9284 (origin
9285 (method url-fetch)
9286 (uri (cran-uri "seqminer" version))
9287 (sha256
9288 (base32
9289 "057j1l6dip35l1aivilapl2zv9db677b3di2pb3sfgq2sxg0ps3l"))))
9290 (build-system r-build-system)
9291 (inputs
9292 `(("zlib" ,zlib)))
9293 (home-page "http://seqminer.genomic.codes")
9294 (synopsis "Read nucleotide sequence data (VCF, BCF, and METAL formats)")
9295 (description
9296 "This package provides tools to integrate nucleotide sequencing
9297 data (variant call format, e.g. VCF or BCF) or meta-analysis results in R.")
9298 ;; Any version of the GPL is acceptable
9299 (license (list license:gpl2+ license:gpl3+))))
9300
9301 (define-public r-raremetals2
9302 (package
9303 (name "r-raremetals2")
9304 (version "0.1")
9305 (source
9306 (origin
9307 (method url-fetch)
9308 (uri (string-append "http://genome.sph.umich.edu/w/images/"
9309 "b/b7/RareMETALS2_" version ".tar.gz"))
9310 (sha256
9311 (base32
9312 "0z5ljcgvnm06ja9lm85a3cniq7slxcy37aqqkxrdidr79an5fs4s"))))
9313 (properties `((upstream-name . "RareMETALS2")))
9314 (build-system r-build-system)
9315 (propagated-inputs
9316 `(("r-seqminer" ,r-seqminer)
9317 ("r-mvtnorm" ,r-mvtnorm)
9318 ("r-mass" ,r-mass)
9319 ("r-compquadform" ,r-compquadform)
9320 ("r-getopt" ,r-getopt)))
9321 (home-page "http://genome.sph.umich.edu/wiki/RareMETALS2")
9322 (synopsis "Analyze gene-level association tests for binary trait")
9323 (description
9324 "The R package rareMETALS2 is an extension of the R package rareMETALS.
9325 It was designed to meta-analyze gene-level association tests for binary trait.
9326 While rareMETALS offers a near-complete solution for meta-analysis of
9327 gene-level tests for quantitative trait, it does not offer the optimal
9328 solution for binary trait. The package rareMETALS2 offers improved features
9329 for analyzing gene-level association tests in meta-analyses for binary
9330 trait.")
9331 (license license:gpl3)))
9332
9333 (define-public r-maldiquant
9334 (package
9335 (name "r-maldiquant")
9336 (version "1.17")
9337 (source
9338 (origin
9339 (method url-fetch)
9340 (uri (cran-uri "MALDIquant" version))
9341 (sha256
9342 (base32
9343 "047s6007ydc38x8wm027mlb4mngz15n0d4238fr8h43wyll5zy0z"))))
9344 (properties `((upstream-name . "MALDIquant")))
9345 (build-system r-build-system)
9346 (home-page "https://cran.r-project.org/web/packages/MALDIquant")
9347 (synopsis "Quantitative analysis of mass spectrometry data")
9348 (description
9349 "This package provides a complete analysis pipeline for matrix-assisted
9350 laser desorption/ionization-time-of-flight (MALDI-TOF) and other
9351 two-dimensional mass spectrometry data. In addition to commonly used plotting
9352 and processing methods it includes distinctive features, namely baseline
9353 subtraction methods such as morphological filters (TopHat) or the
9354 statistics-sensitive non-linear iterative peak-clipping algorithm (SNIP), peak
9355 alignment using warping functions, handling of replicated measurements as well
9356 as allowing spectra with different resolutions.")
9357 (license license:gpl3+)))
9358
9359 (define-public r-protgenerics
9360 (package
9361 (name "r-protgenerics")
9362 (version "1.10.0")
9363 (source
9364 (origin
9365 (method url-fetch)
9366 (uri (bioconductor-uri "ProtGenerics" version))
9367 (sha256
9368 (base32
9369 "16ijp50448wnabp43klx943rhdvh7x45hvy7cnpq1s4dckxhhyni"))))
9370 (properties `((upstream-name . "ProtGenerics")))
9371 (build-system r-build-system)
9372 (home-page "https://github.com/lgatto/ProtGenerics")
9373 (synopsis "S4 generic functions for proteomics infrastructure")
9374 (description
9375 "This package provides S4 generic functions needed by Bioconductor
9376 proteomics packages.")
9377 (license license:artistic2.0)))
9378
9379 (define-public r-mzr
9380 (package
9381 (name "r-mzr")
9382 (version "2.12.0")
9383 (source
9384 (origin
9385 (method url-fetch)
9386 (uri (bioconductor-uri "mzR" version))
9387 (sha256
9388 (base32
9389 "1x3gp30sfxz2v3k3swih9kff9b2rvk7hzhnlkp6ywlnn2wgb0q8c"))
9390 (modules '((guix build utils)))
9391 (snippet
9392 '(begin
9393 (delete-file-recursively "src/boost")
9394 #t))))
9395 (properties `((upstream-name . "mzR")))
9396 (build-system r-build-system)
9397 (arguments
9398 `(#:phases
9399 (modify-phases %standard-phases
9400 (add-after 'unpack 'use-system-boost
9401 (lambda _
9402 (substitute* "src/Makevars"
9403 (("\\./boost/libs.*") "")
9404 (("ARCH_OBJS=" line)
9405 (string-append line
9406 "\nARCH_LIBS=-lboost_system -lboost_regex \
9407 -lboost_iostreams -lboost_thread -lboost_filesystem -lboost_chrono\n")))
9408 #t)))))
9409 (inputs
9410 `(("boost" ,boost) ; use this instead of the bundled boost sources
9411 ("netcdf" ,netcdf)))
9412 (propagated-inputs
9413 `(("r-biobase" ,r-biobase)
9414 ("r-biocgenerics" ,r-biocgenerics)
9415 ("r-protgenerics" ,r-protgenerics)
9416 ("r-rcpp" ,r-rcpp)
9417 ("r-zlibbioc" ,r-zlibbioc)))
9418 (home-page "https://github.com/sneumann/mzR/")
9419 (synopsis "Parser for mass spectrometry data files")
9420 (description
9421 "The mzR package provides a unified API to the common file formats and
9422 parsers available for mass spectrometry data. It comes with a wrapper for the
9423 ISB random access parser for mass spectrometry mzXML, mzData and mzML files.
9424 The package contains the original code written by the ISB, and a subset of the
9425 proteowizard library for mzML and mzIdentML. The netCDF reading code has
9426 previously been used in XCMS.")
9427 (license license:artistic2.0)))
9428
9429 (define-public r-affyio
9430 (package
9431 (name "r-affyio")
9432 (version "1.48.0")
9433 (source
9434 (origin
9435 (method url-fetch)
9436 (uri (bioconductor-uri "affyio" version))
9437 (sha256
9438 (base32
9439 "1pzzp3d3dbmyf34gvivfiprkpscn36rgvhrq853a1d3avcwr5ak9"))))
9440 (build-system r-build-system)
9441 (propagated-inputs
9442 `(("r-zlibbioc" ,r-zlibbioc)))
9443 (inputs
9444 `(("zlib" ,zlib)))
9445 (home-page "https://github.com/bmbolstad/affyio")
9446 (synopsis "Tools for parsing Affymetrix data files")
9447 (description
9448 "This package provides routines for parsing Affymetrix data files based
9449 upon file format information. The primary focus is on accessing the CEL and
9450 CDF file formats.")
9451 (license license:lgpl2.0+)))
9452
9453 (define-public r-affy
9454 (package
9455 (name "r-affy")
9456 (version "1.56.0")
9457 (source
9458 (origin
9459 (method url-fetch)
9460 (uri (bioconductor-uri "affy" version))
9461 (sha256
9462 (base32
9463 "0jmbkimma5ffsdkk3xp03g4lpz84gd95nkqakif2nqq6wmx0syrj"))))
9464 (build-system r-build-system)
9465 (propagated-inputs
9466 `(("r-affyio" ,r-affyio)
9467 ("r-biobase" ,r-biobase)
9468 ("r-biocgenerics" ,r-biocgenerics)
9469 ("r-biocinstaller" ,r-biocinstaller)
9470 ("r-preprocesscore" ,r-preprocesscore)
9471 ("r-zlibbioc" ,r-zlibbioc)))
9472 (home-page "https://bioconductor.org/packages/affy")
9473 (synopsis "Methods for affymetrix oligonucleotide arrays")
9474 (description
9475 "This package contains functions for exploratory oligonucleotide array
9476 analysis.")
9477 (license license:lgpl2.0+)))
9478
9479 (define-public r-vsn
9480 (package
9481 (name "r-vsn")
9482 (version "3.46.0")
9483 (source
9484 (origin
9485 (method url-fetch)
9486 (uri (bioconductor-uri "vsn" version))
9487 (sha256
9488 (base32
9489 "18y62phzirj75gg6v5l41jwybmk23ia6w7qhch0kxc4bl2rysw6j"))))
9490 (build-system r-build-system)
9491 (propagated-inputs
9492 `(("r-affy" ,r-affy)
9493 ("r-biobase" ,r-biobase)
9494 ("r-ggplot2" ,r-ggplot2)
9495 ("r-lattice" ,r-lattice)
9496 ("r-limma" ,r-limma)))
9497 (home-page "https://bioconductor.org/packages/release/bioc/html/vsn.html")
9498 (synopsis "Variance stabilization and calibration for microarray data")
9499 (description
9500 "The package implements a method for normalising microarray intensities,
9501 and works for single- and multiple-color arrays. It can also be used for data
9502 from other technologies, as long as they have similar format. The method uses
9503 a robust variant of the maximum-likelihood estimator for an
9504 additive-multiplicative error model and affine calibration. The model
9505 incorporates data calibration step (a.k.a. normalization), a model for the
9506 dependence of the variance on the mean intensity and a variance stabilizing
9507 data transformation. Differences between transformed intensities are
9508 analogous to \"normalized log-ratios\". However, in contrast to the latter,
9509 their variance is independent of the mean, and they are usually more sensitive
9510 and specific in detecting differential transcription.")
9511 (license license:artistic2.0)))
9512
9513 (define-public r-mzid
9514 (package
9515 (name "r-mzid")
9516 (version "1.16.0")
9517 (source
9518 (origin
9519 (method url-fetch)
9520 (uri (bioconductor-uri "mzID" version))
9521 (sha256
9522 (base32
9523 "0yk70dka56zd8w62f03ggx3mandj91gfa767h9ajj0sd3mjmfqb9"))))
9524 (properties `((upstream-name . "mzID")))
9525 (build-system r-build-system)
9526 (propagated-inputs
9527 `(("r-doparallel" ,r-doparallel)
9528 ("r-foreach" ,r-foreach)
9529 ("r-iterators" ,r-iterators)
9530 ("r-plyr" ,r-plyr)
9531 ("r-protgenerics" ,r-protgenerics)
9532 ("r-rcpp" ,r-rcpp)
9533 ("r-xml" ,r-xml)))
9534 (home-page "https://bioconductor.org/packages/mzID")
9535 (synopsis "Parser for mzIdentML files")
9536 (description
9537 "This package provides a parser for mzIdentML files implemented using the
9538 XML package. The parser tries to be general and able to handle all types of
9539 mzIdentML files with the drawback of having less pretty output than a vendor
9540 specific parser.")
9541 (license license:gpl2+)))
9542
9543 (define-public r-pcamethods
9544 (package
9545 (name "r-pcamethods")
9546 (version "1.70.0")
9547 (source
9548 (origin
9549 (method url-fetch)
9550 (uri (bioconductor-uri "pcaMethods" version))
9551 (sha256
9552 (base32
9553 "0ii235g0x0492kh8cfrf28ni0b6vd6fh7kizkqmczzqggd6b1bk8"))))
9554 (properties `((upstream-name . "pcaMethods")))
9555 (build-system r-build-system)
9556 (propagated-inputs
9557 `(("r-biobase" ,r-biobase)
9558 ("r-biocgenerics" ,r-biocgenerics)
9559 ("r-mass" ,r-mass)
9560 ("r-rcpp" ,r-rcpp)))
9561 (home-page "https://github.com/hredestig/pcamethods")
9562 (synopsis "Collection of PCA methods")
9563 (description
9564 "This package provides Bayesian PCA, Probabilistic PCA, Nipals PCA,
9565 Inverse Non-Linear PCA and the conventional SVD PCA. A cluster based method
9566 for missing value estimation is included for comparison. BPCA, PPCA and
9567 NipalsPCA may be used to perform PCA on incomplete data as well as for
9568 accurate missing value estimation. A set of methods for printing and plotting
9569 the results is also provided. All PCA methods make use of the same data
9570 structure (pcaRes) to provide a common interface to the PCA results.")
9571 (license license:gpl3+)))
9572
9573 (define-public r-msnbase
9574 (package
9575 (name "r-msnbase")
9576 (version "2.4.2")
9577 (source
9578 (origin
9579 (method url-fetch)
9580 (uri (bioconductor-uri "MSnbase" version))
9581 (sha256
9582 (base32
9583 "1ig64bf881p118dwqfr0ry41m7yhnyv165smv8fdwfv7sb6sagif"))))
9584 (properties `((upstream-name . "MSnbase")))
9585 (build-system r-build-system)
9586 (propagated-inputs
9587 `(("r-affy" ,r-affy)
9588 ("r-biobase" ,r-biobase)
9589 ("r-biocgenerics" ,r-biocgenerics)
9590 ("r-biocparallel" ,r-biocparallel)
9591 ("r-digest" ,r-digest)
9592 ("r-ggplot2" ,r-ggplot2)
9593 ("r-impute" ,r-impute)
9594 ("r-iranges" ,r-iranges)
9595 ("r-lattice" ,r-lattice)
9596 ("r-maldiquant" ,r-maldiquant)
9597 ("r-mzid" ,r-mzid)
9598 ("r-mzr" ,r-mzr)
9599 ("r-pcamethods" ,r-pcamethods)
9600 ("r-plyr" ,r-plyr)
9601 ("r-preprocesscore" ,r-preprocesscore)
9602 ("r-protgenerics" ,r-protgenerics)
9603 ("r-rcpp" ,r-rcpp)
9604 ("r-s4vectors" ,r-s4vectors)
9605 ("r-vsn" ,r-vsn)
9606 ("r-xml" ,r-xml)))
9607 (home-page "https://github.com/lgatto/MSnbase")
9608 (synopsis "Base functions and classes for MS-based proteomics")
9609 (description
9610 "This package provides basic plotting, data manipulation and processing
9611 of mass spectrometry based proteomics data.")
9612 (license license:artistic2.0)))
9613
9614 (define-public r-msnid
9615 (package
9616 (name "r-msnid")
9617 (version "1.12.1")
9618 (source
9619 (origin
9620 (method url-fetch)
9621 (uri (bioconductor-uri "MSnID" version))
9622 (sha256
9623 (base32
9624 "1zw508kk4f8brg69674wp18gqkpx2kpya5f6x9cl3qng7v4h5pxx"))))
9625 (properties `((upstream-name . "MSnID")))
9626 (build-system r-build-system)
9627 (propagated-inputs
9628 `(("r-biobase" ,r-biobase)
9629 ("r-data-table" ,r-data-table)
9630 ("r-doparallel" ,r-doparallel)
9631 ("r-dplyr" ,r-dplyr)
9632 ("r-foreach" ,r-foreach)
9633 ("r-iterators" ,r-iterators)
9634 ("r-msnbase" ,r-msnbase)
9635 ("r-mzid" ,r-mzid)
9636 ("r-mzr" ,r-mzr)
9637 ("r-protgenerics" ,r-protgenerics)
9638 ("r-r-cache" ,r-r-cache)
9639 ("r-rcpp" ,r-rcpp)
9640 ("r-reshape2" ,r-reshape2)))
9641 (home-page "https://bioconductor.org/packages/MSnID")
9642 (synopsis "Utilities for LC-MSn proteomics identifications")
9643 (description
9644 "This package extracts @dfn{tandem mass spectrometry} (MS/MS) ID data
9645 from mzIdentML (leveraging the mzID package) or text files. After collating
9646 the search results from multiple datasets it assesses their identification
9647 quality and optimize filtering criteria to achieve the maximum number of
9648 identifications while not exceeding a specified false discovery rate. It also
9649 contains a number of utilities to explore the MS/MS results and assess missed
9650 and irregular enzymatic cleavages, mass measurement accuracy, etc.")
9651 (license license:artistic2.0)))
9652
9653 (define-public r-seurat
9654 (package
9655 (name "r-seurat")
9656 (version "2.3.0")
9657 (source (origin
9658 (method url-fetch)
9659 (uri (cran-uri "Seurat" version))
9660 (sha256
9661 (base32
9662 "0kp3lw1s896zkjd6x2wp8qcg7wnm5b40g8vihps13f1m7j4nx7r0"))
9663 ;; Delete pre-built jar.
9664 (snippet
9665 '(begin (delete-file "inst/java/ModularityOptimizer.jar")
9666 #t))))
9667 (properties `((upstream-name . "Seurat")))
9668 (build-system r-build-system)
9669 (arguments
9670 `(#:phases
9671 (modify-phases %standard-phases
9672 (add-after 'unpack 'build-jar
9673 (lambda* (#:key inputs #:allow-other-keys)
9674 (let ((classesdir "tmp-classes"))
9675 (setenv "JAVA_HOME" (assoc-ref inputs "jdk"))
9676 (mkdir classesdir)
9677 (with-output-to-file "manifest"
9678 (lambda _
9679 (display "Manifest-Version: 1.0
9680 Main-Class: ModularityOptimizer\n")))
9681 (and (zero? (apply system* `("javac" "-d" ,classesdir
9682 ,@(find-files "java" "\\.java$"))))
9683 (zero? (system* "jar"
9684 "-cmf" "manifest"
9685 "inst/java/ModularityOptimizer.jar"
9686 "-C" classesdir ".")))))))))
9687 (native-inputs
9688 `(("jdk" ,icedtea "jdk")))
9689 (propagated-inputs
9690 `(("r-ape" ,r-ape)
9691 ("r-caret" ,r-caret)
9692 ("r-cluster" ,r-cluster)
9693 ("r-cowplot" ,r-cowplot)
9694 ("r-diffusionmap" ,r-diffusionmap)
9695 ("r-dosnow" ,r-dosnow)
9696 ("r-dplyr" ,r-dplyr)
9697 ("r-dtw" ,r-dtw)
9698 ("r-fitdistrplus" ,r-fitdistrplus)
9699 ("r-fnn" ,r-fnn)
9700 ("r-foreach" ,r-foreach)
9701 ("r-fpc" ,r-fpc)
9702 ("r-gdata" ,r-gdata)
9703 ("r-ggplot2" ,r-ggplot2)
9704 ("r-ggridges" ,r-ggridges)
9705 ("r-gplots" ,r-gplots)
9706 ("r-gridextra" ,r-gridextra)
9707 ("r-hmisc" ,r-hmisc)
9708 ("r-ica" ,r-ica)
9709 ("r-igraph" ,r-igraph)
9710 ("r-irlba" ,r-irlba)
9711 ("r-lars" ,r-lars)
9712 ("r-lmtest" ,r-lmtest)
9713 ("r-mass" ,r-mass)
9714 ("r-matrix" ,r-matrix)
9715 ("r-metap" ,r-metap)
9716 ("r-mixtools" ,r-mixtools)
9717 ("r-pbapply" ,r-pbapply)
9718 ("r-plotly" ,r-plotly)
9719 ("r-png" ,r-png)
9720 ("r-ranger" ,r-ranger)
9721 ("r-rann" ,r-rann)
9722 ("r-rcolorbrewer" ,r-rcolorbrewer)
9723 ("r-rcpp" ,r-rcpp)
9724 ("r-rcppeigen" ,r-rcppeigen)
9725 ("r-rcppprogress" ,r-rcppprogress)
9726 ("r-reshape2" ,r-reshape2)
9727 ("r-rocr" ,r-rocr)
9728 ("r-rtsne" ,r-rtsne)
9729 ("r-sdmtools" ,r-sdmtools)
9730 ("r-stringr" ,r-stringr)
9731 ("r-tclust" ,r-tclust)
9732 ("r-tidyr" ,r-tidyr)
9733 ("r-tsne" ,r-tsne)
9734 ("r-vgam" ,r-vgam)))
9735 (home-page "http://www.satijalab.org/seurat")
9736 (synopsis "Seurat is an R toolkit for single cell genomics")
9737 (description
9738 "This package is an R package designed for QC, analysis, and
9739 exploration of single cell RNA-seq data. It easily enables widely-used
9740 analytical techniques, including the identification of highly variable genes,
9741 dimensionality reduction; PCA, ICA, t-SNE, standard unsupervised clustering
9742 algorithms; density clustering, hierarchical clustering, k-means, and the
9743 discovery of differentially expressed genes and markers.")
9744 (license license:gpl3)))
9745
9746 (define-public r-aroma-light
9747 (package
9748 (name "r-aroma-light")
9749 (version "3.8.0")
9750 (source
9751 (origin
9752 (method url-fetch)
9753 (uri (bioconductor-uri "aroma.light" version))
9754 (sha256
9755 (base32
9756 "0crnk6851jwypqr5l5jcbbay0vi5vvdjyisaf6z2d69c39wmr6sc"))))
9757 (properties `((upstream-name . "aroma.light")))
9758 (build-system r-build-system)
9759 (propagated-inputs
9760 `(("r-matrixstats" ,r-matrixstats)
9761 ("r-r-methodss3" ,r-r-methodss3)
9762 ("r-r-oo" ,r-r-oo)
9763 ("r-r-utils" ,r-r-utils)))
9764 (home-page "https://github.com/HenrikBengtsson/aroma.light")
9765 (synopsis "Methods for normalization and visualization of microarray data")
9766 (description
9767 "This package provides methods for microarray analysis that take basic
9768 data types such as matrices and lists of vectors. These methods can be used
9769 standalone, be utilized in other packages, or be wrapped up in higher-level
9770 classes.")
9771 (license license:gpl2+)))
9772
9773 (define-public r-deseq
9774 (package
9775 (name "r-deseq")
9776 (version "1.30.0")
9777 (source
9778 (origin
9779 (method url-fetch)
9780 (uri (bioconductor-uri "DESeq" version))
9781 (sha256
9782 (base32
9783 "0mn5w3cy16iwwk8zxs7za6aa6cnrca75z0g45zd5zh1py5d7nfv9"))))
9784 (properties `((upstream-name . "DESeq")))
9785 (build-system r-build-system)
9786 (propagated-inputs
9787 `(("r-biobase" ,r-biobase)
9788 ("r-biocgenerics" ,r-biocgenerics)
9789 ("r-genefilter" ,r-genefilter)
9790 ("r-geneplotter" ,r-geneplotter)
9791 ("r-lattice" ,r-lattice)
9792 ("r-locfit" ,r-locfit)
9793 ("r-mass" ,r-mass)
9794 ("r-rcolorbrewer" ,r-rcolorbrewer)))
9795 (home-page "http://www-huber.embl.de/users/anders/DESeq")
9796 (synopsis "Differential gene expression analysis")
9797 (description
9798 "This package provides tools for estimating variance-mean dependence in
9799 count data from high-throughput genetic sequencing assays and for testing for
9800 differential expression based on a model using the negative binomial
9801 distribution.")
9802 (license license:gpl3+)))
9803
9804 (define-public r-edaseq
9805 (package
9806 (name "r-edaseq")
9807 (version "2.12.0")
9808 (source
9809 (origin
9810 (method url-fetch)
9811 (uri (bioconductor-uri "EDASeq" version))
9812 (sha256
9813 (base32
9814 "07zm89zcivyn2261aq9grqmly8ji482kr9h9dyfknfdfrpv7jpwv"))))
9815 (properties `((upstream-name . "EDASeq")))
9816 (build-system r-build-system)
9817 (propagated-inputs
9818 `(("r-annotationdbi" ,r-annotationdbi)
9819 ("r-aroma-light" ,r-aroma-light)
9820 ("r-biobase" ,r-biobase)
9821 ("r-biocgenerics" ,r-biocgenerics)
9822 ("r-biomart" ,r-biomart)
9823 ("r-biostrings" ,r-biostrings)
9824 ("r-deseq" ,r-deseq)
9825 ("r-genomicfeatures" ,r-genomicfeatures)
9826 ("r-genomicranges" ,r-genomicranges)
9827 ("r-iranges" ,r-iranges)
9828 ("r-rsamtools" ,r-rsamtools)
9829 ("r-shortread" ,r-shortread)))
9830 (home-page "https://github.com/drisso/EDASeq")
9831 (synopsis "Exploratory data analysis and normalization for RNA-Seq")
9832 (description
9833 "This package provides support for numerical and graphical summaries of
9834 RNA-Seq genomic read data. Provided within-lane normalization procedures to
9835 adjust for GC-content effect (or other gene-level effects) on read counts:
9836 loess robust local regression, global-scaling, and full-quantile
9837 normalization. Between-lane normalization procedures to adjust for
9838 distributional differences between lanes (e.g., sequencing depth):
9839 global-scaling and full-quantile normalization.")
9840 (license license:artistic2.0)))
9841
9842 (define-public r-interactivedisplaybase
9843 (package
9844 (name "r-interactivedisplaybase")
9845 (version "1.16.0")
9846 (source
9847 (origin
9848 (method url-fetch)
9849 (uri (bioconductor-uri "interactiveDisplayBase" version))
9850 (sha256
9851 (base32
9852 "01yb945jqqimwjgriza6yy4dnp303cdirxrhl4hjyprfdlmnz5p5"))))
9853 (properties
9854 `((upstream-name . "interactiveDisplayBase")))
9855 (build-system r-build-system)
9856 (propagated-inputs
9857 `(("r-biocgenerics" ,r-biocgenerics)
9858 ("r-shiny" ,r-shiny)))
9859 (home-page "https://bioconductor.org/packages/interactiveDisplayBase")
9860 (synopsis "Base package for web displays of Bioconductor objects")
9861 (description
9862 "This package contains the basic methods needed to generate interactive
9863 Shiny-based display methods for Bioconductor objects.")
9864 (license license:artistic2.0)))
9865
9866 (define-public r-annotationhub
9867 (package
9868 (name "r-annotationhub")
9869 (version "2.10.1")
9870 (source
9871 (origin
9872 (method url-fetch)
9873 (uri (bioconductor-uri "AnnotationHub" version))
9874 (sha256
9875 (base32
9876 "14v8g44a6zg9j2rwn9x9y8509k0wr2cw8yccliz24glplb40wva4"))))
9877 (properties `((upstream-name . "AnnotationHub")))
9878 (build-system r-build-system)
9879 (propagated-inputs
9880 `(("r-annotationdbi" ,r-annotationdbi)
9881 ("r-biocgenerics" ,r-biocgenerics)
9882 ("r-biocinstaller" ,r-biocinstaller)
9883 ("r-curl" ,r-curl)
9884 ("r-httr" ,r-httr)
9885 ("r-interactivedisplaybase" ,r-interactivedisplaybase)
9886 ("r-rsqlite" ,r-rsqlite)
9887 ("r-s4vectors" ,r-s4vectors)
9888 ("r-yaml" ,r-yaml)))
9889 (home-page "https://bioconductor.org/packages/AnnotationHub")
9890 (synopsis "Client to access AnnotationHub resources")
9891 (description
9892 "This package provides a client for the Bioconductor AnnotationHub web
9893 resource. The AnnotationHub web resource provides a central location where
9894 genomic files (e.g. VCF, bed, wig) and other resources from standard
9895 locations (e.g. UCSC, Ensembl) can be discovered. The resource includes
9896 metadata about each resource, e.g., a textual description, tags, and date of
9897 modification. The client creates and manages a local cache of files retrieved
9898 by the user, helping with quick and reproducible access.")
9899 (license license:artistic2.0)))
9900
9901 (define-public r-fastseg
9902 (package
9903 (name "r-fastseg")
9904 (version "1.24.0")
9905 (source
9906 (origin
9907 (method url-fetch)
9908 (uri (bioconductor-uri "fastseg" version))
9909 (sha256
9910 (base32
9911 "0dd7nr3klwz9ailwshnbynhd62lwb8zbbpj6jf3igpb94yi6x2jp"))))
9912 (build-system r-build-system)
9913 (propagated-inputs
9914 `(("r-biobase" ,r-biobase)
9915 ("r-biocgenerics" ,r-biocgenerics)
9916 ("r-genomicranges" ,r-genomicranges)
9917 ("r-iranges" ,r-iranges)
9918 ("r-s4vectors" ,r-s4vectors)))
9919 (home-page "http://www.bioinf.jku.at/software/fastseg/index.html")
9920 (synopsis "Fast segmentation algorithm for genetic sequencing data")
9921 (description
9922 "Fastseg implements a very fast and efficient segmentation algorithm.
9923 It can segment data from DNA microarrays and data from next generation
9924 sequencing for example to detect copy number segments. Further it can segment
9925 data from RNA microarrays like tiling arrays to identify transcripts. Most
9926 generally, it can segment data given as a matrix or as a vector. Various data
9927 formats can be used as input to fastseg like expression set objects for
9928 microarrays or GRanges for sequencing data.")
9929 (license license:lgpl2.0+)))
9930
9931 (define-public r-keggrest
9932 (package
9933 (name "r-keggrest")
9934 (version "1.18.1")
9935 (source
9936 (origin
9937 (method url-fetch)
9938 (uri (bioconductor-uri "KEGGREST" version))
9939 (sha256
9940 (base32
9941 "02gwmm79djj55a90dzc80hlgwc6bafl7xd7fnx2q59pk945k3z9c"))))
9942 (properties `((upstream-name . "KEGGREST")))
9943 (build-system r-build-system)
9944 (propagated-inputs
9945 `(("r-biostrings" ,r-biostrings)
9946 ("r-httr" ,r-httr)
9947 ("r-png" ,r-png)))
9948 (home-page "https://bioconductor.org/packages/KEGGREST")
9949 (synopsis "Client-side REST access to KEGG")
9950 (description
9951 "This package provides a package that provides a client interface to the
9952 @dfn{Kyoto Encyclopedia of Genes and Genomes} (KEGG) REST server.")
9953 (license license:artistic2.0)))
9954
9955 (define-public r-gage
9956 (package
9957 (name "r-gage")
9958 (version "2.28.2")
9959 (source
9960 (origin
9961 (method url-fetch)
9962 (uri (bioconductor-uri "gage" version))
9963 (sha256
9964 (base32
9965 "0h0mlhns9j7cpfksvdlvx9jb7szm3r1dwqb3s4s8p8hmkb9byyii"))))
9966 (build-system r-build-system)
9967 (propagated-inputs
9968 `(("r-annotationdbi" ,r-annotationdbi)
9969 ("r-graph" ,r-graph)
9970 ("r-keggrest" ,r-keggrest)))
9971 (home-page "http://www.biomedcentral.com/1471-2105/10/161")
9972 (synopsis "Generally applicable gene-set enrichment for pathway analysis")
9973 (description
9974 "GAGE is a published method for gene set (enrichment or GSEA) or pathway
9975 analysis. GAGE is generally applicable independent of microarray or RNA-Seq
9976 data attributes including sample sizes, experimental designs, assay platforms,
9977 and other types of heterogeneity. The gage package provides functions for
9978 basic GAGE analysis, result processing and presentation. In addition, it
9979 provides demo microarray data and commonly used gene set data based on KEGG
9980 pathways and GO terms. These funtions and data are also useful for gene set
9981 analysis using other methods.")
9982 (license license:gpl2+)))
9983
9984 (define-public r-genomicfiles
9985 (package
9986 (name "r-genomicfiles")
9987 (version "1.14.0")
9988 (source
9989 (origin
9990 (method url-fetch)
9991 (uri (bioconductor-uri "GenomicFiles" version))
9992 (sha256
9993 (base32
9994 "0r0wmrs5jycf1kckhnc2sgjmp336srlcjdkpbb1ymm7kazdd0s9n"))))
9995 (properties `((upstream-name . "GenomicFiles")))
9996 (build-system r-build-system)
9997 (propagated-inputs
9998 `(("r-biocgenerics" ,r-biocgenerics)
9999 ("r-biocparallel" ,r-biocparallel)
10000 ("r-genomeinfodb" ,r-genomeinfodb)
10001 ("r-genomicalignments" ,r-genomicalignments)
10002 ("r-genomicranges" ,r-genomicranges)
10003 ("r-iranges" ,r-iranges)
10004 ("r-rsamtools" ,r-rsamtools)
10005 ("r-rtracklayer" ,r-rtracklayer)
10006 ("r-s4vectors" ,r-s4vectors)
10007 ("r-summarizedexperiment" ,r-summarizedexperiment)
10008 ("r-variantannotation" ,r-variantannotation)))
10009 (home-page "https://bioconductor.org/packages/GenomicFiles")
10010 (synopsis "Distributed computing by file or by range")
10011 (description
10012 "This package provides infrastructure for parallel computations
10013 distributed by file or by range. User defined mapper and reducer functions
10014 provide added flexibility for data combination and manipulation.")
10015 (license license:artistic2.0)))
10016
10017 (define-public r-complexheatmap
10018 (package
10019 (name "r-complexheatmap")
10020 (version "1.17.1")
10021 (source
10022 (origin
10023 (method url-fetch)
10024 (uri (bioconductor-uri "ComplexHeatmap" version))
10025 (sha256
10026 (base32
10027 "1x6kp55iqqsd8bhdl3qch95nfiy2y46ldbbsx1sj1v8f0b0ywwcy"))))
10028 (properties
10029 `((upstream-name . "ComplexHeatmap")))
10030 (build-system r-build-system)
10031 (propagated-inputs
10032 `(("r-circlize" ,r-circlize)
10033 ("r-colorspace" ,r-colorspace)
10034 ("r-getoptlong" ,r-getoptlong)
10035 ("r-globaloptions" ,r-globaloptions)
10036 ("r-rcolorbrewer" ,r-rcolorbrewer)))
10037 (home-page
10038 "https://github.com/jokergoo/ComplexHeatmap")
10039 (synopsis "Making Complex Heatmaps")
10040 (description
10041 "Complex heatmaps are efficient to visualize associations between
10042 different sources of data sets and reveal potential structures. This package
10043 provides a highly flexible way to arrange multiple heatmaps and supports
10044 self-defined annotation graphics.")
10045 (license license:gpl2+)))
10046
10047 (define-public r-dirichletmultinomial
10048 (package
10049 (name "r-dirichletmultinomial")
10050 (version "1.20.0")
10051 (source
10052 (origin
10053 (method url-fetch)
10054 (uri (bioconductor-uri "DirichletMultinomial" version))
10055 (sha256
10056 (base32
10057 "1c4s6x0qm20556grcd1xys9kkpnlzpasaai474malwcg6qvgi4x1"))))
10058 (properties
10059 `((upstream-name . "DirichletMultinomial")))
10060 (build-system r-build-system)
10061 (inputs
10062 `(("gsl" ,gsl)))
10063 (propagated-inputs
10064 `(("r-biocgenerics" ,r-biocgenerics)
10065 ("r-iranges" ,r-iranges)
10066 ("r-s4vectors" ,r-s4vectors)))
10067 (home-page "https://bioconductor.org/packages/DirichletMultinomial")
10068 (synopsis "Dirichlet-Multinomial mixture models for microbiome data")
10069 (description
10070 "Dirichlet-multinomial mixture models can be used to describe variability
10071 in microbial metagenomic data. This package is an interface to code
10072 originally made available by Holmes, Harris, and Quince, 2012, PLoS ONE 7(2):
10073 1-15.")
10074 (license license:lgpl3)))
10075
10076 (define-public r-ensembldb
10077 (package
10078 (name "r-ensembldb")
10079 (version "2.2.2")
10080 (source
10081 (origin
10082 (method url-fetch)
10083 (uri (bioconductor-uri "ensembldb" version))
10084 (sha256
10085 (base32
10086 "1yngndkf3588z91z0a2fvkg423p26ajm6xv1p27x0l9mzhhaqq3k"))))
10087 (build-system r-build-system)
10088 (propagated-inputs
10089 `(("r-annotationdbi" ,r-annotationdbi)
10090 ("r-annotationfilter" ,r-annotationfilter)
10091 ("r-annotationhub" ,r-annotationhub)
10092 ("r-biobase" ,r-biobase)
10093 ("r-biocgenerics" ,r-biocgenerics)
10094 ("r-biostrings" ,r-biostrings)
10095 ("r-curl" ,r-curl)
10096 ("r-dbi" ,r-dbi)
10097 ("r-genomeinfodb" ,r-genomeinfodb)
10098 ("r-genomicfeatures" ,r-genomicfeatures)
10099 ("r-genomicranges" ,r-genomicranges)
10100 ("r-iranges" ,r-iranges)
10101 ("r-protgenerics" ,r-protgenerics)
10102 ("r-rsamtools" ,r-rsamtools)
10103 ("r-rsqlite" ,r-rsqlite)
10104 ("r-rtracklayer" ,r-rtracklayer)
10105 ("r-s4vectors" ,r-s4vectors)))
10106 (home-page "https://github.com/jotsetung/ensembldb")
10107 (synopsis "Utilities to create and use Ensembl-based annotation databases")
10108 (description
10109 "The package provides functions to create and use transcript-centric
10110 annotation databases/packages. The annotation for the databases are directly
10111 fetched from Ensembl using their Perl API. The functionality and data is
10112 similar to that of the TxDb packages from the @code{GenomicFeatures} package,
10113 but, in addition to retrieve all gene/transcript models and annotations from
10114 the database, the @code{ensembldb} package also provides a filter framework
10115 allowing to retrieve annotations for specific entries like genes encoded on a
10116 chromosome region or transcript models of lincRNA genes.")
10117 ;; No version specified
10118 (license license:lgpl3+)))
10119
10120 (define-public r-organismdbi
10121 (package
10122 (name "r-organismdbi")
10123 (version "1.20.0")
10124 (source
10125 (origin
10126 (method url-fetch)
10127 (uri (bioconductor-uri "OrganismDbi" version))
10128 (sha256
10129 (base32
10130 "0yxvhwn0m53wfwp0zi81x96argdf7cf1lpymc2as51apvfcnjdl8"))))
10131 (properties `((upstream-name . "OrganismDbi")))
10132 (build-system r-build-system)
10133 (propagated-inputs
10134 `(("r-annotationdbi" ,r-annotationdbi)
10135 ("r-biobase" ,r-biobase)
10136 ("r-biocgenerics" ,r-biocgenerics)
10137 ("r-biocinstaller" ,r-biocinstaller)
10138 ("r-dbi" ,r-dbi)
10139 ("r-genomicfeatures" ,r-genomicfeatures)
10140 ("r-genomicranges" ,r-genomicranges)
10141 ("r-graph" ,r-graph)
10142 ("r-iranges" ,r-iranges)
10143 ("r-rbgl" ,r-rbgl)
10144 ("r-s4vectors" ,r-s4vectors)))
10145 (home-page "https://bioconductor.org/packages/OrganismDbi")
10146 (synopsis "Software to enable the smooth interfacing of database packages")
10147 (description "The package enables a simple unified interface to several
10148 annotation packages each of which has its own schema by taking advantage of
10149 the fact that each of these packages implements a select methods.")
10150 (license license:artistic2.0)))
10151
10152 (define-public r-biovizbase
10153 (package
10154 (name "r-biovizbase")
10155 (version "1.26.0")
10156 (source
10157 (origin
10158 (method url-fetch)
10159 (uri (bioconductor-uri "biovizBase" version))
10160 (sha256
10161 (base32
10162 "14l4vhj0a4ssr9m9zdzz3qpd4qw1mhgq5bmxq7jhrq3j9kmd6i2f"))))
10163 (properties `((upstream-name . "biovizBase")))
10164 (build-system r-build-system)
10165 (propagated-inputs
10166 `(("r-annotationdbi" ,r-annotationdbi)
10167 ("r-annotationfilter" ,r-annotationfilter)
10168 ("r-biocgenerics" ,r-biocgenerics)
10169 ("r-biostrings" ,r-biostrings)
10170 ("r-dichromat" ,r-dichromat)
10171 ("r-ensembldb" ,r-ensembldb)
10172 ("r-genomeinfodb" ,r-genomeinfodb)
10173 ("r-genomicalignments" ,r-genomicalignments)
10174 ("r-genomicfeatures" ,r-genomicfeatures)
10175 ("r-genomicranges" ,r-genomicranges)
10176 ("r-hmisc" ,r-hmisc)
10177 ("r-iranges" ,r-iranges)
10178 ("r-rcolorbrewer" ,r-rcolorbrewer)
10179 ("r-rsamtools" ,r-rsamtools)
10180 ("r-s4vectors" ,r-s4vectors)
10181 ("r-scales" ,r-scales)
10182 ("r-summarizedexperiment" ,r-summarizedexperiment)
10183 ("r-variantannotation" ,r-variantannotation)))
10184 (home-page "https://bioconductor.org/packages/biovizBase")
10185 (synopsis "Basic graphic utilities for visualization of genomic data")
10186 (description
10187 "The biovizBase package is designed to provide a set of utilities, color
10188 schemes and conventions for genomic data. It serves as the base for various
10189 high-level packages for biological data visualization. This saves development
10190 effort and encourages consistency.")
10191 (license license:artistic2.0)))
10192
10193 (define-public r-ggbio
10194 (package
10195 (name "r-ggbio")
10196 (version "1.26.1")
10197 (source
10198 (origin
10199 (method url-fetch)
10200 (uri (bioconductor-uri "ggbio" version))
10201 (sha256
10202 (base32
10203 "1xlmlngn27iwnr21s9di4059kav1a7c1sajx08wja8yn8f7j06hp"))))
10204 (build-system r-build-system)
10205 (propagated-inputs
10206 `(("r-annotationdbi" ,r-annotationdbi)
10207 ("r-annotationfilter" ,r-annotationfilter)
10208 ("r-biobase" ,r-biobase)
10209 ("r-biocgenerics" ,r-biocgenerics)
10210 ("r-biostrings" ,r-biostrings)
10211 ("r-biovizbase" ,r-biovizbase)
10212 ("r-bsgenome" ,r-bsgenome)
10213 ("r-ensembldb" ,r-ensembldb)
10214 ("r-genomeinfodb" ,r-genomeinfodb)
10215 ("r-genomicalignments" ,r-genomicalignments)
10216 ("r-genomicfeatures" ,r-genomicfeatures)
10217 ("r-genomicranges" ,r-genomicranges)
10218 ("r-ggally" ,r-ggally)
10219 ("r-ggplot2" ,r-ggplot2)
10220 ("r-gridextra" ,r-gridextra)
10221 ("r-gtable" ,r-gtable)
10222 ("r-hmisc" ,r-hmisc)
10223 ("r-iranges" ,r-iranges)
10224 ("r-organismdbi" ,r-organismdbi)
10225 ("r-reshape2" ,r-reshape2)
10226 ("r-rsamtools" ,r-rsamtools)
10227 ("r-rtracklayer" ,r-rtracklayer)
10228 ("r-s4vectors" ,r-s4vectors)
10229 ("r-scales" ,r-scales)
10230 ("r-summarizedexperiment" ,r-summarizedexperiment)
10231 ("r-variantannotation" ,r-variantannotation)))
10232 (home-page "http://www.tengfei.name/ggbio/")
10233 (synopsis "Visualization tools for genomic data")
10234 (description
10235 "The ggbio package extends and specializes the grammar of graphics for
10236 biological data. The graphics are designed to answer common scientific
10237 questions, in particular those often asked of high throughput genomics data.
10238 All core Bioconductor data structures are supported, where appropriate. The
10239 package supports detailed views of particular genomic regions, as well as
10240 genome-wide overviews. Supported overviews include ideograms and grand linear
10241 views. High-level plots include sequence fragment length, edge-linked
10242 interval to data view, mismatch pileup, and several splicing summaries.")
10243 (license license:artistic2.0)))
10244
10245 (define-public r-gprofiler
10246 (package
10247 (name "r-gprofiler")
10248 (version "0.6.6")
10249 (source
10250 (origin
10251 (method url-fetch)
10252 (uri (cran-uri "gProfileR" version))
10253 (sha256
10254 (base32
10255 "1n6cj12j102b4x9vhyl4dljp1i0r43p23cnhqbx4als2xfxdlqgi"))))
10256 (properties `((upstream-name . "gProfileR")))
10257 (build-system r-build-system)
10258 (propagated-inputs
10259 `(("r-plyr" ,r-plyr)
10260 ("r-rcurl" ,r-rcurl)))
10261 (home-page "https://cran.r-project.org/web/packages/gProfileR/")
10262 (synopsis "Interface to the g:Profiler toolkit")
10263 (description
10264 "This package provides tools for functional enrichment analysis,
10265 gene identifier conversion and mapping homologous genes across related
10266 organisms via the @code{g:Profiler} toolkit.")
10267 (license license:gpl2+)))
10268
10269 (define-public r-gqtlbase
10270 (package
10271 (name "r-gqtlbase")
10272 (version "1.10.0")
10273 (source
10274 (origin
10275 (method url-fetch)
10276 (uri (bioconductor-uri "gQTLBase" version))
10277 (sha256
10278 (base32
10279 "1756vfcj2dkkgcmfkkg7qdaig36dv9gfvpypn9rbrky56wm1p035"))))
10280 (properties `((upstream-name . "gQTLBase")))
10281 (build-system r-build-system)
10282 (propagated-inputs
10283 `(("r-batchjobs" ,r-batchjobs)
10284 ("r-bbmisc" ,r-bbmisc)
10285 ("r-biocgenerics" ,r-biocgenerics)
10286 ("r-bit" ,r-bit)
10287 ("r-doparallel" ,r-doparallel)
10288 ("r-ff" ,r-ff)
10289 ("r-ffbase" ,r-ffbase)
10290 ("r-foreach" ,r-foreach)
10291 ("r-genomicfiles" ,r-genomicfiles)
10292 ("r-genomicranges" ,r-genomicranges)
10293 ("r-rtracklayer" ,r-rtracklayer)
10294 ("r-s4vectors" ,r-s4vectors)
10295 ("r-summarizedexperiment" ,r-summarizedexperiment)))
10296 (home-page "https://bioconductor.org/packages/gQTLBase")
10297 (synopsis "Infrastructure for eQTL, mQTL and similar studies")
10298 (description
10299 "The purpose of this package is to simplify the storage and interrogation
10300 of @dfn{quantitative trait loci} (QTL) archives, such as eQTL, mQTL, dsQTL,
10301 and more.")
10302 (license license:artistic2.0)))
10303
10304 (define-public r-snpstats
10305 (package
10306 (name "r-snpstats")
10307 (version "1.28.0")
10308 (source
10309 (origin
10310 (method url-fetch)
10311 (uri (bioconductor-uri "snpStats" version))
10312 (sha256
10313 (base32
10314 "1x9qwynh2hwl24vq02naf4mchpch7xi2pkdrlgw896k28kx0lvir"))))
10315 (properties `((upstream-name . "snpStats")))
10316 (build-system r-build-system)
10317 (inputs `(("zlib" ,zlib)))
10318 (propagated-inputs
10319 `(("r-biocgenerics" ,r-biocgenerics)
10320 ("r-matrix" ,r-matrix)
10321 ("r-survival" ,r-survival)
10322 ("r-zlibbioc" ,r-zlibbioc)))
10323 (home-page "https://bioconductor.org/packages/snpStats")
10324 (synopsis "Methods for SNP association studies")
10325 (description
10326 "This package provides classes and statistical methods for large
10327 @dfn{single-nucleotide polymorphism} (SNP) association studies. This extends
10328 the earlier snpMatrix package, allowing for uncertainty in genotypes.")
10329 (license license:gpl3)))
10330
10331 (define-public r-homo-sapiens
10332 (package
10333 (name "r-homo-sapiens")
10334 (version "1.3.1")
10335 (source (origin
10336 (method url-fetch)
10337 ;; We cannot use bioconductor-uri here because this tarball is
10338 ;; located under "data/annotation/" instead of "bioc/".
10339 (uri (string-append "http://www.bioconductor.org/packages/"
10340 "release/data/annotation/src/contrib/"
10341 "Homo.sapiens_"
10342 version ".tar.gz"))
10343 (sha256
10344 (base32
10345 "151vj7h5p1c8yd5swrchk46z469p135wk50hvkl0nhgndvy0jj01"))))
10346 (properties
10347 `((upstream-name . "Homo.sapiens")))
10348 (build-system r-build-system)
10349 (propagated-inputs
10350 `(("r-genomicfeatures" ,r-genomicfeatures)
10351 ("r-go-db" ,r-go-db)
10352 ("r-org-hs-eg-db" ,r-org-hs-eg-db)
10353 ("r-txdb-hsapiens-ucsc-hg19-knowngene" ,r-txdb-hsapiens-ucsc-hg19-knowngene)
10354 ("r-organismdbi" ,r-organismdbi)
10355 ("r-annotationdbi" ,r-annotationdbi)))
10356 (home-page "https://bioconductor.org/packages/Homo.sapiens/")
10357 (synopsis "Annotation package for the Homo.sapiens object")
10358 (description
10359 "This package contains the Homo.sapiens object to access data from
10360 several related annotation packages.")
10361 (license license:artistic2.0)))
10362
10363 (define-public r-erma
10364 (package
10365 (name "r-erma")
10366 (version "0.10.1")
10367 (source
10368 (origin
10369 (method url-fetch)
10370 (uri (bioconductor-uri "erma" version))
10371 (sha256
10372 (base32
10373 "1fi8nc6fbd7i4p9i9hli31xplmdpsxqcdrb8v3nf8fx9klllbdav"))))
10374 (build-system r-build-system)
10375 (propagated-inputs
10376 `(("r-annotationdbi" ,r-annotationdbi)
10377 ("r-biobase" ,r-biobase)
10378 ("r-biocgenerics" ,r-biocgenerics)
10379 ("r-foreach" ,r-foreach)
10380 ("r-genomicfiles" ,r-genomicfiles)
10381 ("r-genomicranges" ,r-genomicranges)
10382 ("r-ggplot2" ,r-ggplot2)
10383 ("r-homo-sapiens" ,r-homo-sapiens)
10384 ("r-rtracklayer" ,r-rtracklayer)
10385 ("r-s4vectors" ,r-s4vectors)
10386 ("r-shiny" ,r-shiny)
10387 ("r-summarizedexperiment" ,r-summarizedexperiment)))
10388 (home-page "https://bioconductor.org/packages/erma")
10389 (synopsis "Epigenomic road map adventures")
10390 (description
10391 "The epigenomics road map describes locations of epigenetic marks in DNA
10392 from a variety of cell types. Of interest are locations of histone
10393 modifications, sites of DNA methylation, and regions of accessible chromatin.
10394 This package presents a selection of elements of the road map including
10395 metadata and outputs of the ChromImpute procedure applied to ENCODE cell lines
10396 by Ernst and Kellis.")
10397 (license license:artistic2.0)))
10398
10399 (define-public r-ldblock
10400 (package
10401 (name "r-ldblock")
10402 (version "1.8.1")
10403 (source
10404 (origin
10405 (method url-fetch)
10406 (uri (bioconductor-uri "ldblock" version))
10407 (sha256
10408 (base32
10409 "1q8dz8wcq1r7kr635s9q21g36sxkdybk8khhpa4p57qv8r0gppl0"))))
10410 (build-system r-build-system)
10411 (propagated-inputs
10412 `(("r-erma" ,r-erma)
10413 ("r-genomeinfodb" ,r-genomeinfodb)
10414 ("r-genomicfiles" ,r-genomicfiles)
10415 ("r-go-db" ,r-go-db)
10416 ("r-homo-sapiens" ,r-homo-sapiens)
10417 ("r-matrix" ,r-matrix)
10418 ("r-rsamtools" ,r-rsamtools)
10419 ("r-snpstats" ,r-snpstats)
10420 ("r-variantannotation" ,r-variantannotation)))
10421 (home-page "https://bioconductor.org/packages/ldblock")
10422 (synopsis "Data structures for linkage disequilibrium measures in populations")
10423 (description
10424 "This package defines data structures for @dfn{linkage
10425 disequilibrium} (LD) measures in populations. Its purpose is to simplify
10426 handling of existing population-level data for the purpose of flexibly
10427 defining LD blocks.")
10428 (license license:artistic2.0)))
10429
10430 (define-public r-gqtlstats
10431 (package
10432 (name "r-gqtlstats")
10433 (version "1.10.1")
10434 (source
10435 (origin
10436 (method url-fetch)
10437 (uri (bioconductor-uri "gQTLstats" version))
10438 (sha256
10439 (base32
10440 "0gvq1sf2zjbkk431x40z6wql3c1rpclnnwa2f1hvykb8mmw70kmq"))))
10441 (properties `((upstream-name . "gQTLstats")))
10442 (build-system r-build-system)
10443 (propagated-inputs
10444 `(("r-annotationdbi" ,r-annotationdbi)
10445 ("r-batchjobs" ,r-batchjobs)
10446 ("r-bbmisc" ,r-bbmisc)
10447 ("r-beeswarm" ,r-beeswarm)
10448 ("r-biobase" ,r-biobase)
10449 ("r-biocgenerics" ,r-biocgenerics)
10450 ("r-doparallel" ,r-doparallel)
10451 ("r-dplyr" ,r-dplyr)
10452 ("r-erma" ,r-erma)
10453 ("r-ffbase" ,r-ffbase)
10454 ("r-foreach" ,r-foreach)
10455 ("r-genomeinfodb" ,r-genomeinfodb)
10456 ("r-genomicfeatures" ,r-genomicfeatures)
10457 ("r-genomicfiles" ,r-genomicfiles)
10458 ("r-genomicranges" ,r-genomicranges)
10459 ("r-ggbeeswarm" ,r-ggbeeswarm)
10460 ("r-ggplot2" ,r-ggplot2)
10461 ("r-gqtlbase" ,r-gqtlbase)
10462 ("r-hardyweinberg" ,r-hardyweinberg)
10463 ("r-iranges" ,r-iranges)
10464 ("r-ldblock" ,r-ldblock)
10465 ("r-limma" ,r-limma)
10466 ("r-mgcv" ,r-mgcv)
10467 ("r-plotly" ,r-plotly)
10468 ("r-reshape2" ,r-reshape2)
10469 ("r-s4vectors" ,r-s4vectors)
10470 ("r-shiny" ,r-shiny)
10471 ("r-snpstats" ,r-snpstats)
10472 ("r-summarizedexperiment" ,r-summarizedexperiment)
10473 ("r-variantannotation" ,r-variantannotation)))
10474 (home-page "https://bioconductor.org/packages/gQTLstats")
10475 (synopsis "Computationally efficient analysis for eQTL and allied studies")
10476 (description
10477 "This package provides tools for the computationally efficient analysis
10478 of @dfn{quantitative trait loci} (QTL) data, including eQTL, mQTL, dsQTL, etc.
10479 The software in this package aims to support refinements and functional
10480 interpretation of members of a collection of association statistics on a
10481 family of feature/genome hypotheses.")
10482 (license license:artistic2.0)))
10483
10484 (define-public r-gviz
10485 (package
10486 (name "r-gviz")
10487 (version "1.22.3")
10488 (source
10489 (origin
10490 (method url-fetch)
10491 (uri (bioconductor-uri "Gviz" version))
10492 (sha256
10493 (base32
10494 "1grjzrjpzkw572pbvpsvdnfkfgwybl0cnjd7nnk2xdr26wnbsi9a"))))
10495 (properties `((upstream-name . "Gviz")))
10496 (build-system r-build-system)
10497 (propagated-inputs
10498 `(("r-annotationdbi" ,r-annotationdbi)
10499 ("r-biobase" ,r-biobase)
10500 ("r-biocgenerics" ,r-biocgenerics)
10501 ("r-biomart" ,r-biomart)
10502 ("r-biostrings" ,r-biostrings)
10503 ("r-biovizbase" ,r-biovizbase)
10504 ("r-bsgenome" ,r-bsgenome)
10505 ("r-digest" ,r-digest)
10506 ("r-genomeinfodb" ,r-genomeinfodb)
10507 ("r-genomicalignments" ,r-genomicalignments)
10508 ("r-genomicfeatures" ,r-genomicfeatures)
10509 ("r-genomicranges" ,r-genomicranges)
10510 ("r-iranges" ,r-iranges)
10511 ("r-lattice" ,r-lattice)
10512 ("r-latticeextra" ,r-latticeextra)
10513 ("r-matrixstats" ,r-matrixstats)
10514 ("r-rcolorbrewer" ,r-rcolorbrewer)
10515 ("r-rsamtools" ,r-rsamtools)
10516 ("r-rtracklayer" ,r-rtracklayer)
10517 ("r-s4vectors" ,r-s4vectors)
10518 ("r-xvector" ,r-xvector)))
10519 (home-page "https://bioconductor.org/packages/Gviz")
10520 (synopsis "Plotting data and annotation information along genomic coordinates")
10521 (description
10522 "Genomic data analyses requires integrated visualization of known genomic
10523 information and new experimental data. Gviz uses the biomaRt and the
10524 rtracklayer packages to perform live annotation queries to Ensembl and UCSC
10525 and translates this to e.g. gene/transcript structures in viewports of the
10526 grid graphics package. This results in genomic information plotted together
10527 with your data.")
10528 (license license:artistic2.0)))
10529
10530 (define-public r-gwascat
10531 (package
10532 (name "r-gwascat")
10533 (version "2.10.0")
10534 (source
10535 (origin
10536 (method url-fetch)
10537 (uri (bioconductor-uri "gwascat" version))
10538 (sha256
10539 (base32
10540 "0n5x5i5v6a8wpn5mxmlpkl34b4kyypmymiwww6g61zch7xqrgywi"))))
10541 (build-system r-build-system)
10542 (propagated-inputs
10543 `(("r-annotationdbi" ,r-annotationdbi)
10544 ("r-annotationhub" ,r-annotationhub)
10545 ("r-biocgenerics" ,r-biocgenerics)
10546 ("r-biostrings" ,r-biostrings)
10547 ("r-genomeinfodb" ,r-genomeinfodb)
10548 ("r-genomicfeatures" ,r-genomicfeatures)
10549 ("r-genomicranges" ,r-genomicranges)
10550 ("r-ggbio" ,r-ggbio)
10551 ("r-ggplot2" ,r-ggplot2)
10552 ("r-gqtlstats" ,r-gqtlstats)
10553 ("r-graph" ,r-graph)
10554 ("r-gviz" ,r-gviz)
10555 ("r-homo-sapiens" ,r-homo-sapiens)
10556 ("r-iranges" ,r-iranges)
10557 ("r-rsamtools" ,r-rsamtools)
10558 ("r-rtracklayer" ,r-rtracklayer)
10559 ("r-s4vectors" ,r-s4vectors)
10560 ("r-snpstats" ,r-snpstats)
10561 ("r-summarizedexperiment" ,r-summarizedexperiment)
10562 ("r-variantannotation" ,r-variantannotation)))
10563 (home-page "https://bioconductor.org/packages/gwascat")
10564 (synopsis "Tools for data in the EMBL-EBI GWAS catalog")
10565 (description
10566 "This package provides tools for representing and modeling data in the
10567 EMBL-EBI GWAS catalog.")
10568 (license license:artistic2.0)))
10569
10570 (define-public r-sushi
10571 (package
10572 (name "r-sushi")
10573 (version "1.16.0")
10574 (source (origin
10575 (method url-fetch)
10576 (uri (bioconductor-uri "Sushi" version))
10577 (sha256
10578 (base32
10579 "0axaqm480z8d0b2ldgxwm0swava1p4irc62bpl17p2k8k78g687g"))))
10580 (properties `((upstream-name . "Sushi")))
10581 (build-system r-build-system)
10582 (propagated-inputs
10583 `(("r-biomart" ,r-biomart)
10584 ("r-zoo" ,r-zoo)))
10585 (home-page "https://bioconductor.org/packages/Sushi")
10586 (synopsis "Tools for visualizing genomics data")
10587 (description
10588 "This package provides flexible, quantitative, and integrative genomic
10589 visualizations for publication-quality multi-panel figures.")
10590 (license license:gpl2+)))
10591
10592 (define-public r-fithic
10593 (package
10594 (name "r-fithic")
10595 (version "1.4.0")
10596 (source (origin
10597 (method url-fetch)
10598 (uri (bioconductor-uri "FitHiC" version))
10599 (sha256
10600 (base32
10601 "12ylhrppi051m7nqsgq95kzd9g9wmp34i0zzfi55cjqawlpx7c6n"))))
10602 (properties `((upstream-name . "FitHiC")))
10603 (build-system r-build-system)
10604 (propagated-inputs
10605 `(("r-data-table" ,r-data-table)
10606 ("r-fdrtool" ,r-fdrtool)
10607 ("r-rcpp" ,r-rcpp)))
10608 (home-page "https://bioconductor.org/packages/FitHiC")
10609 (synopsis "Confidence estimation for intra-chromosomal contact maps")
10610 (description
10611 "Fit-Hi-C is a tool for assigning statistical confidence estimates to
10612 intra-chromosomal contact maps produced by genome-wide genome architecture
10613 assays such as Hi-C.")
10614 (license license:gpl2+)))
10615
10616 (define-public r-hitc
10617 (package
10618 (name "r-hitc")
10619 (version "1.22.1")
10620 (source (origin
10621 (method url-fetch)
10622 (uri (bioconductor-uri "HiTC" version))
10623 (sha256
10624 (base32
10625 "0da1jw9my2n2gihs31zyn14wwr23d8v2vij39ll7rm6fma3ydfbl"))))
10626 (properties `((upstream-name . "HiTC")))
10627 (build-system r-build-system)
10628 (propagated-inputs
10629 `(("r-biostrings" ,r-biostrings)
10630 ("r-genomeinfodb" ,r-genomeinfodb)
10631 ("r-genomicranges" ,r-genomicranges)
10632 ("r-iranges" ,r-iranges)
10633 ("r-matrix" ,r-matrix)
10634 ("r-rcolorbrewer" ,r-rcolorbrewer)
10635 ("r-rtracklayer" ,r-rtracklayer)))
10636 (home-page "https://bioconductor.org/packages/HiTC")
10637 (synopsis "High throughput chromosome conformation capture analysis")
10638 (description
10639 "The HiTC package was developed to explore high-throughput \"C\" data
10640 such as 5C or Hi-C. Dedicated R classes as well as standard methods for
10641 quality controls, normalization, visualization, and further analysis are also
10642 provided.")
10643 (license license:artistic2.0)))
10644
10645 (define-public r-qvalue
10646 (package
10647 (name "r-qvalue")
10648 (version "2.10.0")
10649 (source
10650 (origin
10651 (method url-fetch)
10652 (uri (bioconductor-uri "qvalue" version))
10653 (sha256
10654 (base32
10655 "1rd9rnf16kh8wc076kahd9hsb9rfwsbzmz3kjmp0pj6rbiq0051i"))))
10656 (build-system r-build-system)
10657 (propagated-inputs
10658 `(("r-ggplot2" ,r-ggplot2)
10659 ("r-reshape2" ,r-reshape2)))
10660 (home-page "http://github.com/jdstorey/qvalue")
10661 (synopsis "Q-value estimation for false discovery rate control")
10662 (description
10663 "This package takes a list of p-values resulting from the simultaneous
10664 testing of many hypotheses and estimates their q-values and local @dfn{false
10665 discovery rate} (FDR) values. The q-value of a test measures the proportion
10666 of false positives incurred when that particular test is called significant.
10667 The local FDR measures the posterior probability the null hypothesis is true
10668 given the test's p-value. Various plots are automatically generated, allowing
10669 one to make sensible significance cut-offs. The software can be applied to
10670 problems in genomics, brain imaging, astrophysics, and data mining.")
10671 ;; Any version of the LGPL.
10672 (license license:lgpl3+)))
10673
10674 (define-public r-hdf5array
10675 (package
10676 (name "r-hdf5array")
10677 (version "1.6.0")
10678 (source
10679 (origin
10680 (method url-fetch)
10681 (uri (bioconductor-uri "HDF5Array" version))
10682 (sha256
10683 (base32
10684 "0kcdza41saqv6vlpvqd841awbiwkg84lh0plx6c7fmfgbqv7a0jh"))))
10685 (properties `((upstream-name . "HDF5Array")))
10686 (build-system r-build-system)
10687 (propagated-inputs
10688 `(("r-biocgenerics" ,r-biocgenerics)
10689 ("r-delayedarray" ,r-delayedarray)
10690 ("r-iranges" ,r-iranges)
10691 ("r-rhdf5" ,r-rhdf5)
10692 ("r-s4vectors" ,r-s4vectors)))
10693 (home-page "https://bioconductor.org/packages/HDF5Array")
10694 (synopsis "HDF5 back end for DelayedArray objects")
10695 (description "This package provides an array-like container for convenient
10696 access and manipulation of HDF5 datasets. It supports delayed operations and
10697 block processing.")
10698 (license license:artistic2.0)))
10699
10700 (define-public r-rhdf5lib
10701 (package
10702 (name "r-rhdf5lib")
10703 (version "1.0.0")
10704 (source
10705 (origin
10706 (method url-fetch)
10707 (uri (bioconductor-uri "Rhdf5lib" version))
10708 (sha256
10709 (base32
10710 "0kkc4rprjbqn2wvbx4d49kk9l91vihccxbl4843qr1wqk6v33r1w"))))
10711 (properties `((upstream-name . "Rhdf5lib")))
10712 (build-system r-build-system)
10713 (arguments
10714 `(#:phases
10715 (modify-phases %standard-phases
10716 (add-after 'unpack 'do-not-use-bundled-hdf5
10717 (lambda* (#:key inputs #:allow-other-keys)
10718 (for-each delete-file '("configure" "configure.ac"))
10719 ;; Do not make other packages link with the proprietary libsz.
10720 (substitute* "R/zzz.R"
10721 (("'%s/libhdf5_cpp.a %s/libhdf5.a %s/libsz.a'")
10722 "'%s/libhdf5_cpp.a %s/libhdf5.a %s/libhdf5.a'")
10723 (("'%s/libhdf5.a %s/libsz.a'")
10724 "'%s/libhdf5.a %s/libhdf5.a'"))
10725 (with-directory-excursion "src"
10726 (invoke "tar" "xvf" (assoc-ref inputs "hdf5-source"))
10727 (rename-file (string-append "hdf5-" ,(package-version hdf5))
10728 "hdf5")
10729 ;; Remove timestamp and host system information to make
10730 ;; the build reproducible.
10731 (substitute* "hdf5/src/libhdf5.settings.in"
10732 (("Configured on: @CONFIG_DATE@")
10733 "Configured on: Guix")
10734 (("Uname information:.*")
10735 "Uname information: Linux\n")
10736 ;; Remove unnecessary store reference.
10737 (("C Compiler:.*")
10738 "C Compiler: GCC\n"))
10739 (rename-file "Makevars.in" "Makevars")
10740 (substitute* "Makevars"
10741 (("HDF5_CXX_LIB=.*")
10742 (string-append "HDF5_CXX_LIB="
10743 (assoc-ref inputs "hdf5") "/lib/libhdf5_cpp.a\n"))
10744 (("HDF5_LIB=.*")
10745 (string-append "HDF5_LIB="
10746 (assoc-ref inputs "hdf5") "/lib/libhdf5.a\n"))
10747 (("HDF5_CXX_INCLUDE=.*") "HDF5_CXX_INCLUDE=./hdf5/c++/src\n")
10748 (("HDF5_INCLUDE=.*") "HDF5_INCLUDE=./hdf5/src\n")
10749 ;; szip is non-free software
10750 (("cp \\$\\{SZIP_LIB\\}.*") "")
10751 (("PKG_LIBS = \\$\\{HDF5_LIB\\} \\$\\{SZIP_LIB\\}")
10752 "PKG_LIBS = ${HDF5_LIB}\n")))
10753 #t)))))
10754 (inputs
10755 `(("zlib" ,zlib)))
10756 (propagated-inputs
10757 `(("hdf5" ,hdf5)))
10758 (native-inputs
10759 `(("hdf5-source" ,(package-source hdf5))))
10760 (home-page "https://bioconductor.org/packages/Rhdf5lib")
10761 (synopsis "HDF5 library as an R package")
10762 (description "This package provides C and C++ HDF5 libraries for use in R
10763 packages.")
10764 (license license:artistic2.0)))
10765
10766 (define-public r-beachmat
10767 (package
10768 (name "r-beachmat")
10769 (version "1.0.2")
10770 (source
10771 (origin
10772 (method url-fetch)
10773 (uri (bioconductor-uri "beachmat" version))
10774 (sha256
10775 (base32
10776 "0b6dzja5fbx4dawb7ixj67mlhw4fy62pfp20mfp918fy96zmdwqz"))))
10777 (build-system r-build-system)
10778 (inputs
10779 `(("hdf5" ,hdf5)))
10780 (propagated-inputs
10781 `(("r-delayedarray" ,r-delayedarray)
10782 ("r-hdf5array" ,r-hdf5array)
10783 ("r-rcpp" ,r-rcpp)
10784 ("r-rhdf5" ,r-rhdf5)
10785 ("r-rhdf5lib" ,r-rhdf5lib)))
10786 (home-page "https://bioconductor.org/packages/beachmat")
10787 (synopsis "Compiling Bioconductor to handle each matrix type")
10788 (description "This package provides a consistent C++ class interface for a
10789 variety of commonly used matrix types, including sparse and HDF5-backed
10790 matrices.")
10791 (license license:gpl3)))
10792
10793 (define-public r-singlecellexperiment
10794 (package
10795 (name "r-singlecellexperiment")
10796 (version "1.0.0")
10797 (source
10798 (origin
10799 (method url-fetch)
10800 (uri (bioconductor-uri "SingleCellExperiment" version))
10801 (sha256
10802 (base32
10803 "1r276i97w64a5vdlg6952gkj7bls909p42zl8fn8yz87cdwyaars"))))
10804 (properties
10805 `((upstream-name . "SingleCellExperiment")))
10806 (build-system r-build-system)
10807 (propagated-inputs
10808 `(("r-biocgenerics" ,r-biocgenerics)
10809 ("r-s4vectors" ,r-s4vectors)
10810 ("r-summarizedexperiment" ,r-summarizedexperiment)))
10811 (home-page "https://bioconductor.org/packages/SingleCellExperiment")
10812 (synopsis "S4 classes for single cell data")
10813 (description "This package defines an S4 class for storing data from
10814 single-cell experiments. This includes specialized methods to store and
10815 retrieve spike-in information, dimensionality reduction coordinates and size
10816 factors for each cell, along with the usual metadata for genes and
10817 libraries.")
10818 (license license:gpl3)))
10819
10820 (define-public r-scater
10821 (package
10822 (name "r-scater")
10823 (version "1.6.3")
10824 (source (origin
10825 (method url-fetch)
10826 (uri (bioconductor-uri "scater" version))
10827 (sha256
10828 (base32
10829 "0q3s96gf8saa1dq2fvmpl0jyj7bx3wrdfck3hanb8pxkcir2p7dn"))))
10830 (build-system r-build-system)
10831 (propagated-inputs
10832 `(("r-beachmat" ,r-beachmat)
10833 ("r-biobase" ,r-biobase)
10834 ("r-biocgenerics" ,r-biocgenerics)
10835 ("r-biomart" ,r-biomart)
10836 ("r-data-table" ,r-data-table)
10837 ("r-dplyr" ,r-dplyr)
10838 ("r-edger" ,r-edger)
10839 ("r-ggbeeswarm" ,r-ggbeeswarm)
10840 ("r-ggplot2" ,r-ggplot2)
10841 ("r-limma" ,r-limma)
10842 ("r-matrix" ,r-matrix)
10843 ("r-matrixstats" ,r-matrixstats)
10844 ("r-plyr" ,r-plyr)
10845 ("r-rcpp" ,r-rcpp)
10846 ("r-reshape2" ,r-reshape2)
10847 ("r-rhdf5" ,r-rhdf5)
10848 ("r-rhdf5lib" ,r-rhdf5lib)
10849 ("r-rjson" ,r-rjson)
10850 ("r-s4vectors" ,r-s4vectors)
10851 ("r-shiny" ,r-shiny)
10852 ("r-shinydashboard" ,r-shinydashboard)
10853 ("r-singlecellexperiment" ,r-singlecellexperiment)
10854 ("r-summarizedexperiment" ,r-summarizedexperiment)
10855 ("r-tximport" ,r-tximport)
10856 ("r-viridis" ,r-viridis)))
10857 (home-page "https://github.com/davismcc/scater")
10858 (synopsis "Single-cell analysis toolkit for gene expression data in R")
10859 (description "This package provides a collection of tools for doing
10860 various analyses of single-cell RNA-seq gene expression data, with a focus on
10861 quality control.")
10862 (license license:gpl2+)))
10863
10864 (define-public r-scran
10865 (package
10866 (name "r-scran")
10867 (version "1.6.9")
10868 (source
10869 (origin
10870 (method url-fetch)
10871 (uri (bioconductor-uri "scran" version))
10872 (sha256
10873 (base32
10874 "0cs64cnf0xjcgmawr210y99j3gxs6aqgh8081n9827kkqnx2y5dm"))))
10875 (build-system r-build-system)
10876 (propagated-inputs
10877 `(("r-beachmat" ,r-beachmat)
10878 ("r-biocgenerics" ,r-biocgenerics)
10879 ("r-biocparallel" ,r-biocparallel)
10880 ("r-dt" ,r-dt)
10881 ("r-dynamictreecut" ,r-dynamictreecut)
10882 ("r-edger" ,r-edger)
10883 ("r-fnn" ,r-fnn)
10884 ("r-ggplot2" ,r-ggplot2)
10885 ("r-igraph" ,r-igraph)
10886 ("r-limma" ,r-limma)
10887 ("r-matrix" ,r-matrix)
10888 ("r-rcpp" ,r-rcpp)
10889 ("r-rhdf5lib" ,r-rhdf5lib)
10890 ("r-s4vectors" ,r-s4vectors)
10891 ("r-scater" ,r-scater)
10892 ("r-shiny" ,r-shiny)
10893 ("r-singlecellexperiment" ,r-singlecellexperiment)
10894 ("r-statmod" ,r-statmod)
10895 ("r-summarizedexperiment" ,r-summarizedexperiment)
10896 ("r-viridis" ,r-viridis)
10897 ("r-zoo" ,r-zoo)))
10898 (home-page "https://bioconductor.org/packages/scran")
10899 (synopsis "Methods for single-cell RNA-Seq data analysis")
10900 (description "This package implements a variety of low-level analyses of
10901 single-cell RNA-seq data. Methods are provided for normalization of
10902 cell-specific biases, assignment of cell cycle phase, and detection of highly
10903 variable and significantly correlated genes.")
10904 (license license:gpl3)))
10905
10906 (define-public r-delayedmatrixstats
10907 (package
10908 (name "r-delayedmatrixstats")
10909 (version "1.0.3")
10910 (source
10911 (origin
10912 (method url-fetch)
10913 (uri (bioconductor-uri "DelayedMatrixStats" version))
10914 (sha256
10915 (base32
10916 "1cxjbjdq9hg9cm95rci0al7a4pk2h73ym276ahw9q4977zbg6381"))))
10917 (properties
10918 `((upstream-name . "DelayedMatrixStats")))
10919 (build-system r-build-system)
10920 (propagated-inputs
10921 `(("r-delayedarray" ,r-delayedarray)
10922 ("r-iranges" ,r-iranges)
10923 ("r-matrix" ,r-matrix)
10924 ("r-matrixstats" ,r-matrixstats)
10925 ("r-s4vectors" ,r-s4vectors)))
10926 (home-page "https://github.com/PeteHaitch/DelayedMatrixStats")
10927 (synopsis "Functions that apply to rows and columns of DelayedMatrix objects")
10928 (description
10929 "This package provides a port of the @code{matrixStats} API for use with
10930 @code{DelayedMatrix} objects from the @code{DelayedArray} package. It
10931 contains high-performing functions operating on rows and columns of
10932 @code{DelayedMatrix} objects, e.g. @code{colMedians}, @code{rowMedians},
10933 @code{colRanks}, @code{rowRanks}, @code{colSds}, and @code{rowSds}. Functions
10934 are optimized per data type and for subsetted calculations such that both
10935 memory usage and processing time is minimized.")
10936 (license license:expat)))
10937
10938 (define-public r-phangorn
10939 (package
10940 (name "r-phangorn")
10941 (version "2.4.0")
10942 (source
10943 (origin
10944 (method url-fetch)
10945 (uri (cran-uri "phangorn" version))
10946 (sha256
10947 (base32
10948 "0xc8k552nxczy19jr0xjjagrzc8x6lafasgk2c099ls8bc1yml1i"))))
10949 (build-system r-build-system)
10950 (propagated-inputs
10951 `(("r-ape" ,r-ape)
10952 ("r-fastmatch" ,r-fastmatch)
10953 ("r-igraph" ,r-igraph)
10954 ("r-magrittr" ,r-magrittr)
10955 ("r-matrix" ,r-matrix)
10956 ("r-quadprog" ,r-quadprog)
10957 ("r-rcpp" ,r-rcpp)))
10958 (home-page "https://github.com/KlausVigo/phangorn")
10959 (synopsis "Phylogenetic analysis in R")
10960 (description
10961 "Phangorn is a package for phylogenetic analysis in R. It supports
10962 estimation of phylogenetic trees and networks using Maximum Likelihood,
10963 Maximum Parsimony, distance methods and Hadamard conjugation.")
10964 (license license:gpl2+)))
10965
10966 (define-public r-dropbead
10967 (let ((commit "d746c6f3b32110428ea56d6a0001ce52a251c247")
10968 (revision "2"))
10969 (package
10970 (name "r-dropbead")
10971 (version (string-append "0-" revision "." (string-take commit 7)))
10972 (source
10973 (origin
10974 (method git-fetch)
10975 (uri (git-reference
10976 (url "https://github.com/rajewsky-lab/dropbead.git")
10977 (commit commit)))
10978 (file-name (git-file-name name version))
10979 (sha256
10980 (base32
10981 "0sbzma49aiiyw8b0jpr7fnhzys9nsqmp4hy4hdz1gzyg1lhnca26"))))
10982 (build-system r-build-system)
10983 (propagated-inputs
10984 `(("r-ggplot2" ,r-ggplot2)
10985 ("r-rcolorbrewer" ,r-rcolorbrewer)
10986 ("r-gridextra" ,r-gridextra)
10987 ("r-gplots" ,r-gplots)
10988 ("r-plyr" ,r-plyr)))
10989 (home-page "https://github.com/rajewsky-lab/dropbead")
10990 (synopsis "Basic exploration and analysis of Drop-seq data")
10991 (description "This package offers a quick and straight-forward way to
10992 explore and perform basic analysis of single cell sequencing data coming from
10993 droplet sequencing. It has been particularly tailored for Drop-seq.")
10994 (license license:gpl3))))
10995
10996 (define htslib-for-sambamba
10997 (let ((commit "2f3c3ea7b301f9b45737a793c0b2dcf0240e5ee5"))
10998 (package
10999 (inherit htslib)
11000 (name "htslib-for-sambamba")
11001 (version (string-append "1.3.1-1." (string-take commit 9)))
11002 (source
11003 (origin
11004 (method git-fetch)
11005 (uri (git-reference
11006 (url "https://github.com/lomereiter/htslib.git")
11007 (commit commit)))
11008 (file-name (string-append "htslib-" version "-checkout"))
11009 (sha256
11010 (base32
11011 "0g38g8s3npr0gjm9fahlbhiskyfws9l5i0x1ml3rakzj7az5l9c9"))))
11012 (native-inputs
11013 `(("autoconf" ,autoconf)
11014 ("automake" ,automake)
11015 ,@(package-native-inputs htslib))))))
11016
11017 (define-public sambamba
11018 (package
11019 (name "sambamba")
11020 (version "0.6.7-10-g223fa20")
11021 (source
11022 (origin
11023 (method git-fetch)
11024 (uri (git-reference
11025 (url "https://github.com/lomereiter/sambamba.git")
11026 (commit (string-append "v" version))))
11027 (file-name (string-append name "-" version "-checkout"))
11028 (sha256
11029 (base32
11030 "1zb9hrxglxqh13ava9wwri30cvf85hjnbn8ccnr8l60a3k5avczn"))))
11031 (build-system gnu-build-system)
11032 (arguments
11033 `(#:tests? #f ; there is no test target
11034 #:parallel-build? #f ; not supported
11035 #:phases
11036 (modify-phases %standard-phases
11037 (delete 'configure)
11038 (add-after 'unpack 'fix-ldc-version
11039 (lambda _
11040 (substitute* "gen_ldc_version_info.py"
11041 (("/usr/bin/env.*") (which "python")))
11042 (substitute* "Makefile"
11043 (("\\$\\(shell which ldmd2\\)") (which "ldmd2")))
11044 #t))
11045 (add-after 'unpack 'place-biod-and-undead
11046 (lambda* (#:key inputs #:allow-other-keys)
11047 (copy-recursively (assoc-ref inputs "biod") "BioD")
11048 (copy-recursively (assoc-ref inputs "undead") "undeaD")
11049 #t))
11050 (add-after 'unpack 'unbundle-prerequisites
11051 (lambda _
11052 (substitute* "Makefile"
11053 (("htslib/libhts.a lz4/lib/liblz4.a")
11054 "-L-lhts -L-llz4")
11055 ((" htslib-static lz4-static") ""))
11056 #t))
11057 (replace 'install
11058 (lambda* (#:key outputs #:allow-other-keys)
11059 (let* ((out (assoc-ref outputs "out"))
11060 (bin (string-append out "/bin")))
11061 (mkdir-p bin)
11062 (install-file "build/sambamba" bin)
11063 #t))))))
11064 (native-inputs
11065 `(("ldc" ,ldc)
11066 ("rdmd" ,rdmd)
11067 ("python" ,python2-minimal)
11068 ("biod"
11069 ,(let ((commit "c778e4f2d8bacea7499283ce39f5577b232732c6"))
11070 (origin
11071 (method git-fetch)
11072 (uri (git-reference
11073 (url "https://github.com/biod/BioD.git")
11074 (commit commit)))
11075 (file-name (string-append "biod-"
11076 (string-take commit 9)
11077 "-checkout"))
11078 (sha256
11079 (base32
11080 "1z90562hg47i63gx042wb3ak2vqjg5z7hwgn9bp2pdxfg3nxrw37")))))
11081 ("undead"
11082 ,(let ((commit "92803d25c88657e945511f0976a0c79d8da46e89"))
11083 (origin
11084 (method git-fetch)
11085 (uri (git-reference
11086 (url "https://github.com/dlang/undeaD.git")
11087 (commit commit)))
11088 (file-name (string-append "undead-"
11089 (string-take commit 9)
11090 "-checkout"))
11091 (sha256
11092 (base32
11093 "0vq6n81vzqvgphjw54lz2isc1j8lcxwjdbrhqz1h5gwrvw9w5138")))))))
11094 (inputs
11095 `(("lz4" ,lz4)
11096 ("htslib" ,htslib-for-sambamba)))
11097 (home-page "http://lomereiter.github.io/sambamba")
11098 (synopsis "Tools for working with SAM/BAM data")
11099 (description "Sambamba is a high performance modern robust and
11100 fast tool (and library), written in the D programming language, for
11101 working with SAM and BAM files. Current parallelised functionality is
11102 an important subset of samtools functionality, including view, index,
11103 sort, markdup, and depth.")
11104 (license license:gpl2+)))
11105
11106 (define-public ritornello
11107 (package
11108 (name "ritornello")
11109 (version "1.0.0")
11110 (source (origin
11111 (method url-fetch)
11112 (uri (string-append "https://github.com/KlugerLab/"
11113 "Ritornello/archive/v"
11114 version ".tar.gz"))
11115 (file-name (string-append name "-" version ".tar.gz"))
11116 (sha256
11117 (base32
11118 "02nik86gq9ljjriv6pamwlmqnfky3ads1fpklx6mc3hx6k40pg38"))))
11119 (build-system gnu-build-system)
11120 (arguments
11121 `(#:tests? #f ; there are no tests
11122 #:phases
11123 (modify-phases %standard-phases
11124 (add-after 'unpack 'patch-samtools-references
11125 (lambda* (#:key inputs #:allow-other-keys)
11126 (substitute* '("src/SamStream.h"
11127 "src/BufferedGenomeReader.h")
11128 (("<sam.h>") "<samtools/sam.h>"))
11129 #t))
11130 (delete 'configure)
11131 (replace 'install
11132 (lambda* (#:key inputs outputs #:allow-other-keys)
11133 (let* ((out (assoc-ref outputs "out"))
11134 (bin (string-append out "/bin/")))
11135 (mkdir-p bin)
11136 (install-file "bin/Ritornello" bin)
11137 #t))))))
11138 (inputs
11139 `(("samtools" ,samtools-0.1)
11140 ("fftw" ,fftw)
11141 ("boost" ,boost)
11142 ("zlib" ,zlib)))
11143 (home-page "https://github.com/KlugerLab/Ritornello")
11144 (synopsis "Control-free peak caller for ChIP-seq data")
11145 (description "Ritornello is a ChIP-seq peak calling algorithm based on
11146 signal processing that can accurately call binding events without the need to
11147 do a pair total DNA input or IgG control sample. It has been tested for use
11148 with narrow binding events such as transcription factor ChIP-seq.")
11149 (license license:gpl3+)))
11150
11151 (define-public trim-galore
11152 (package
11153 (name "trim-galore")
11154 (version "0.4.2")
11155 (source
11156 (origin
11157 (method url-fetch)
11158 (uri (string-append "http://www.bioinformatics.babraham.ac.uk/"
11159 "projects/trim_galore/trim_galore_v"
11160 version ".zip"))
11161 (sha256
11162 (base32
11163 "0b9qdxi4521gsrjvbhgky8g7kry9b5nx3byzaxkgxz7p4k8bn1mn"))))
11164 (build-system gnu-build-system)
11165 (arguments
11166 `(#:tests? #f ; no tests
11167 #:phases
11168 (modify-phases %standard-phases
11169 ;; The archive contains plain files.
11170 (replace 'unpack
11171 (lambda* (#:key source #:allow-other-keys)
11172 (zero? (system* "unzip" source))))
11173 (delete 'configure)
11174 (delete 'build)
11175 (add-after 'unpack 'hardcode-tool-references
11176 (lambda* (#:key inputs #:allow-other-keys)
11177 (substitute* "trim_galore"
11178 (("\\$path_to_cutadapt = 'cutadapt'")
11179 (string-append "$path_to_cutadapt = '"
11180 (assoc-ref inputs "cutadapt")
11181 "/bin/cutadapt'"))
11182 (("\\| gzip")
11183 (string-append "| "
11184 (assoc-ref inputs "gzip")
11185 "/bin/gzip"))
11186 (("\"gunzip")
11187 (string-append "\""
11188 (assoc-ref inputs "gzip")
11189 "/bin/gunzip")))
11190 #t))
11191 (replace 'install
11192 (lambda* (#:key outputs #:allow-other-keys)
11193 (let ((bin (string-append (assoc-ref outputs "out")
11194 "/bin")))
11195 (mkdir-p bin)
11196 (install-file "trim_galore" bin)
11197 #t))))))
11198 (inputs
11199 `(("gzip" ,gzip)
11200 ("perl" ,perl)
11201 ("cutadapt" ,cutadapt)))
11202 (native-inputs
11203 `(("unzip" ,unzip)))
11204 (home-page "http://www.bioinformatics.babraham.ac.uk/projects/trim_galore/")
11205 (synopsis "Wrapper around Cutadapt and FastQC")
11206 (description "Trim Galore! is a wrapper script to automate quality and
11207 adapter trimming as well as quality control, with some added functionality to
11208 remove biased methylation positions for RRBS sequence files.")
11209 (license license:gpl3+)))
11210
11211 (define-public gess
11212 (package
11213 (name "gess")
11214 (version "1.0")
11215 (source (origin
11216 (method url-fetch)
11217 (uri (string-append "http://compbio.uthscsa.edu/"
11218 "GESS_Web/files/"
11219 "gess-" version ".src.tar.gz"))
11220 (sha256
11221 (base32
11222 "0hyk403kxscclzfs24pvdgiv0wm03kjcziqdrp5w46cb049gz0d7"))))
11223 (build-system gnu-build-system)
11224 (arguments
11225 `(#:tests? #f ; no tests
11226 #:phases
11227 (modify-phases %standard-phases
11228 (delete 'configure)
11229 (delete 'build)
11230 (replace 'install
11231 (lambda* (#:key inputs outputs #:allow-other-keys)
11232 (let* ((python (assoc-ref inputs "python"))
11233 (out (assoc-ref outputs "out"))
11234 (bin (string-append out "/bin/"))
11235 (target (string-append
11236 out "/lib/python2.7/site-packages/gess/")))
11237 (mkdir-p target)
11238 (copy-recursively "." target)
11239 ;; Make GESS.py executable
11240 (chmod (string-append target "GESS.py") #o555)
11241 ;; Add Python shebang to the top and make Matplotlib
11242 ;; usable.
11243 (substitute* (string-append target "GESS.py")
11244 (("\"\"\"Description:" line)
11245 (string-append "#!" (which "python") "
11246 import matplotlib
11247 matplotlib.use('Agg')
11248 " line)))
11249 ;; Make sure GESS has all modules in its path
11250 (wrap-program (string-append target "GESS.py")
11251 `("PYTHONPATH" ":" prefix (,target ,(getenv "PYTHONPATH"))))
11252 (mkdir-p bin)
11253 (symlink (string-append target "GESS.py")
11254 (string-append bin "GESS.py"))
11255 #t))))))
11256 (inputs
11257 `(("python" ,python-2)
11258 ("python2-pysam" ,python2-pysam)
11259 ("python2-scipy" ,python2-scipy)
11260 ("python2-numpy" ,python2-numpy)
11261 ("python2-networkx" ,python2-networkx)
11262 ("python2-biopython" ,python2-biopython)))
11263 (home-page "http://compbio.uthscsa.edu/GESS_Web/")
11264 (synopsis "Detect exon-skipping events from raw RNA-seq data")
11265 (description
11266 "GESS is an implementation of a novel computational method to detect de
11267 novo exon-skipping events directly from raw RNA-seq data without the prior
11268 knowledge of gene annotation information. GESS stands for the graph-based
11269 exon-skipping scanner detection scheme.")
11270 (license license:bsd-3)))
11271
11272 (define-public phylip
11273 (package
11274 (name "phylip")
11275 (version "3.696")
11276 (source
11277 (origin
11278 (method url-fetch)
11279 (uri (string-append "http://evolution.gs.washington.edu/phylip/"
11280 "download/phylip-" version ".tar.gz"))
11281 (sha256
11282 (base32
11283 "01jar1rayhr2gba2pgbw49m56rc5z4p5wn3ds0m188hrlln4a2nd"))))
11284 (build-system gnu-build-system)
11285 (arguments
11286 `(#:tests? #f ; no check target
11287 #:make-flags (list "-f" "Makefile.unx" "install")
11288 #:parallel-build? #f ; not supported
11289 #:phases
11290 (modify-phases %standard-phases
11291 (add-after 'unpack 'enter-dir
11292 (lambda _ (chdir "src") #t))
11293 (delete 'configure)
11294 (replace 'install
11295 (lambda* (#:key inputs outputs #:allow-other-keys)
11296 (let ((target (string-append (assoc-ref outputs "out")
11297 "/bin")))
11298 (mkdir-p target)
11299 (for-each (lambda (file)
11300 (install-file file target))
11301 (find-files "../exe" ".*")))
11302 #t)))))
11303 (home-page "http://evolution.genetics.washington.edu/phylip/")
11304 (synopsis "Tools for inferring phylogenies")
11305 (description "PHYLIP (the PHYLogeny Inference Package) is a package of
11306 programs for inferring phylogenies (evolutionary trees).")
11307 (license license:bsd-2)))
11308
11309 (define-public imp
11310 (package
11311 (name "imp")
11312 (version "2.6.2")
11313 (source
11314 (origin
11315 (method url-fetch)
11316 (uri (string-append "https://integrativemodeling.org/"
11317 version "/download/imp-" version ".tar.gz"))
11318 (sha256
11319 (base32
11320 "0lxqx7vh79d771svr611dkilp6sn30qrbw8zvscbrm37v38d2j6h"))))
11321 (build-system cmake-build-system)
11322 (arguments
11323 `(;; FIXME: Some tests fail because they produce warnings, others fail
11324 ;; because the PYTHONPATH does not include the modeller's directory.
11325 #:tests? #f))
11326 (inputs
11327 `(("boost" ,boost)
11328 ("gsl" ,gsl)
11329 ("swig" ,swig)
11330 ("hdf5" ,hdf5)
11331 ("fftw" ,fftw)
11332 ("python" ,python-2)))
11333 (propagated-inputs
11334 `(("python2-numpy" ,python2-numpy)
11335 ("python2-scipy" ,python2-scipy)
11336 ("python2-pandas" ,python2-pandas)
11337 ("python2-scikit-learn" ,python2-scikit-learn)
11338 ("python2-networkx" ,python2-networkx)))
11339 (home-page "https://integrativemodeling.org")
11340 (synopsis "Integrative modeling platform")
11341 (description "IMP's broad goal is to contribute to a comprehensive
11342 structural characterization of biomolecules ranging in size and complexity
11343 from small peptides to large macromolecular assemblies, by integrating data
11344 from diverse biochemical and biophysical experiments. IMP provides a C++ and
11345 Python toolbox for solving complex modeling problems, and a number of
11346 applications for tackling some common problems in a user-friendly way.")
11347 ;; IMP is largely available under the GNU Lesser GPL; see the file
11348 ;; COPYING.LGPL for the full text of this license. Some IMP modules are
11349 ;; available under the GNU GPL (see the file COPYING.GPL).
11350 (license (list license:lgpl2.1+
11351 license:gpl3+))))
11352
11353 (define-public tadbit
11354 (package
11355 (name "tadbit")
11356 (version "0.2")
11357 (source (origin
11358 (method url-fetch)
11359 (uri (string-append "https://github.com/3DGenomes/TADbit/"
11360 "archive/v" version ".tar.gz"))
11361 (file-name (string-append name "-" version ".tar.gz"))
11362 (sha256
11363 (base32
11364 "1cnfqrl4685zar4nnw94j94nhvl2h29jm448nadqi1h05z6fdk4f"))))
11365 (build-system python-build-system)
11366 (arguments
11367 `(;; Tests are included and must be run after installation, but
11368 ;; they are incomplete and thus cannot be run.
11369 #:tests? #f
11370 #:python ,python-2
11371 #:phases
11372 (modify-phases %standard-phases
11373 (add-after 'unpack 'fix-problems-with-setup.py
11374 (lambda* (#:key outputs #:allow-other-keys)
11375 ;; setup.py opens these files for writing
11376 (chmod "_pytadbit/_version.py" #o664)
11377 (chmod "README.rst" #o664)
11378
11379 ;; Don't attempt to install the bash completions to
11380 ;; the home directory.
11381 (rename-file "extras/.bash_completion"
11382 "extras/tadbit")
11383 (substitute* "setup.py"
11384 (("\\(path.expanduser\\('~'\\)")
11385 (string-append "(\""
11386 (assoc-ref outputs "out")
11387 "/etc/bash_completion.d\""))
11388 (("extras/\\.bash_completion")
11389 "extras/tadbit"))
11390 #t)))))
11391 (inputs
11392 ;; TODO: add Chimera for visualization
11393 `(("imp" ,imp)
11394 ("mcl" ,mcl)
11395 ("python2-scipy" ,python2-scipy)
11396 ("python2-numpy" ,python2-numpy)
11397 ("python2-matplotlib" ,python2-matplotlib)
11398 ("python2-pysam" ,python2-pysam)))
11399 (home-page "http://3dgenomes.github.io/TADbit/")
11400 (synopsis "Analyze, model, and explore 3C-based data")
11401 (description
11402 "TADbit is a complete Python library to deal with all steps to analyze,
11403 model, and explore 3C-based data. With TADbit the user can map FASTQ files to
11404 obtain raw interaction binned matrices (Hi-C like matrices), normalize and
11405 correct interaction matrices, identify and compare the so-called
11406 @dfn{Topologically Associating Domains} (TADs), build 3D models from the
11407 interaction matrices, and finally, extract structural properties from the
11408 models. TADbit is complemented by TADkit for visualizing 3D models.")
11409 (license license:gpl3+)))
11410
11411 (define-public kentutils
11412 (package
11413 (name "kentutils")
11414 ;; 302.1.0 is out, but the only difference is the inclusion of
11415 ;; pre-built binaries.
11416 (version "302.0.0")
11417 (source
11418 (origin
11419 (method url-fetch)
11420 (uri (string-append "https://github.com/ENCODE-DCC/kentUtils/"
11421 "archive/v" version ".tar.gz"))
11422 (file-name (string-append name "-" version ".tar.gz"))
11423 (sha256
11424 (base32
11425 "134aja3k1cj32kbk1nnw0q9gxjb2krr15q6sga8qldzvc0585rmm"))
11426 (modules '((guix build utils)
11427 (srfi srfi-26)
11428 (ice-9 ftw)))
11429 (snippet
11430 '(begin
11431 ;; Only the contents of the specified directories are free
11432 ;; for all uses, so we remove the rest. "hg/autoSql" and
11433 ;; "hg/autoXml" are nominally free, but they depend on a
11434 ;; library that is built from the sources in "hg/lib",
11435 ;; which is nonfree.
11436 (let ((free (list "." ".."
11437 "utils" "lib" "inc" "tagStorm"
11438 "parasol" "htslib"))
11439 (directory? (lambda (file)
11440 (eq? 'directory (stat:type (stat file))))))
11441 (for-each (lambda (file)
11442 (and (directory? file)
11443 (delete-file-recursively file)))
11444 (map (cut string-append "src/" <>)
11445 (scandir "src"
11446 (lambda (file)
11447 (not (member file free)))))))
11448 ;; Only make the utils target, not the userApps target,
11449 ;; because that requires libraries we won't build.
11450 (substitute* "Makefile"
11451 ((" userApps") " utils"))
11452 ;; Only build libraries that are free.
11453 (substitute* "src/makefile"
11454 (("DIRS =.*") "DIRS =\n")
11455 (("cd jkOwnLib.*") "")
11456 ((" hgLib") "")
11457 (("cd hg.*") ""))
11458 (substitute* "src/utils/makefile"
11459 ;; These tools depend on "jkhgap.a", which is part of the
11460 ;; nonfree "src/hg/lib" directory.
11461 (("raSqlQuery") "")
11462 (("pslLiftSubrangeBlat") "")
11463
11464 ;; Do not build UCSC tools, which may require nonfree
11465 ;; components.
11466 (("ALL_APPS =.*") "ALL_APPS = $(UTILS_APPLIST)\n"))
11467 #t))))
11468 (build-system gnu-build-system)
11469 (arguments
11470 `( ;; There is no global test target and the test target for
11471 ;; individual tools depends on input files that are not
11472 ;; included.
11473 #:tests? #f
11474 #:phases
11475 (modify-phases %standard-phases
11476 (add-after 'unpack 'fix-paths
11477 (lambda _
11478 (substitute* "Makefile"
11479 (("/bin/echo") (which "echo")))
11480 #t))
11481 (add-after 'unpack 'prepare-samtabix
11482 (lambda* (#:key inputs #:allow-other-keys)
11483 (copy-recursively (assoc-ref inputs "samtabix")
11484 "samtabix")
11485 #t))
11486 (delete 'configure)
11487 (replace 'install
11488 (lambda* (#:key outputs #:allow-other-keys)
11489 (let ((bin (string-append (assoc-ref outputs "out")
11490 "/bin")))
11491 (copy-recursively "bin" bin))
11492 #t)))))
11493 (native-inputs
11494 `(("samtabix"
11495 ,(origin
11496 (method git-fetch)
11497 (uri (git-reference
11498 (url "http://genome-source.cse.ucsc.edu/samtabix.git")
11499 (commit "10fd107909c1ac4d679299908be4262a012965ba")))
11500 (sha256
11501 (base32
11502 "0c1nj64l42v395sa84n7az43xiap4i6f9n9dfz4058aqiwkhkmma"))))))
11503 (inputs
11504 `(("zlib" ,zlib)
11505 ("tcsh" ,tcsh)
11506 ("perl" ,perl)
11507 ("libpng" ,libpng)
11508 ("mariadb" ,mariadb)
11509 ("openssl" ,openssl)))
11510 (home-page "http://genome.cse.ucsc.edu/index.html")
11511 (synopsis "Assorted bioinformatics utilities")
11512 (description "This package provides the kentUtils, a selection of
11513 bioinformatics utilities used in combination with the UCSC genome
11514 browser.")
11515 ;; Only a subset of the sources are released under a non-copyleft
11516 ;; free software license. All other sources are removed in a
11517 ;; snippet. See this bug report for an explanation of how the
11518 ;; license statements apply:
11519 ;; https://github.com/ENCODE-DCC/kentUtils/issues/12
11520 (license (license:non-copyleft
11521 "http://genome.ucsc.edu/license/"
11522 "The contents of this package are free for all uses."))))
11523
11524 (define-public f-seq
11525 (let ((commit "6ccded34cff38cf432deed8503648b4a66953f9b")
11526 (revision "1"))
11527 (package
11528 (name "f-seq")
11529 (version (string-append "1.1-" revision "." (string-take commit 7)))
11530 (source (origin
11531 (method git-fetch)
11532 (uri (git-reference
11533 (url "https://github.com/aboyle/F-seq.git")
11534 (commit commit)))
11535 (file-name (string-append name "-" version))
11536 (sha256
11537 (base32
11538 "1nk33k0yajg2id4g59bc4szr58r2q6pdq42vgcw054m8ip9wv26h"))
11539 (modules '((guix build utils)))
11540 ;; Remove bundled Java library archives.
11541 (snippet
11542 '(begin
11543 (for-each delete-file (find-files "lib" ".*"))
11544 #t))))
11545 (build-system ant-build-system)
11546 (arguments
11547 `(#:tests? #f ; no tests included
11548 #:phases
11549 (modify-phases %standard-phases
11550 (replace 'install
11551 (lambda* (#:key inputs outputs #:allow-other-keys)
11552 (let* ((target (assoc-ref outputs "out"))
11553 (doc (string-append target "/share/doc/f-seq/")))
11554 (mkdir-p target)
11555 (mkdir-p doc)
11556 (substitute* "bin/linux/fseq"
11557 (("java") (which "java"))
11558 (("\\$REALDIR/../lib/commons-cli-1.1.jar")
11559 (string-append (assoc-ref inputs "java-commons-cli")
11560 "/share/java/commons-cli.jar"))
11561 (("REALDIR=.*")
11562 (string-append "REALDIR=" target "/bin\n")))
11563 (install-file "README.txt" doc)
11564 (install-file "bin/linux/fseq" (string-append target "/bin"))
11565 (install-file "build~/fseq.jar" (string-append target "/lib"))
11566 (copy-recursively "lib" (string-append target "/lib"))
11567 #t))))))
11568 (inputs
11569 `(("perl" ,perl)
11570 ("java-commons-cli" ,java-commons-cli)))
11571 (home-page "http://fureylab.web.unc.edu/software/fseq/")
11572 (synopsis "Feature density estimator for high-throughput sequence tags")
11573 (description
11574 "F-Seq is a software package that generates a continuous tag sequence
11575 density estimation allowing identification of biologically meaningful sites
11576 such as transcription factor binding sites (ChIP-seq) or regions of open
11577 chromatin (DNase-seq). Output can be displayed directly in the UCSC Genome
11578 Browser.")
11579 (license license:gpl3+))))
11580
11581 (define-public bismark
11582 (package
11583 (name "bismark")
11584 (version "0.16.3")
11585 (source
11586 (origin
11587 (method url-fetch)
11588 (uri (string-append "https://github.com/FelixKrueger/Bismark/"
11589 "archive/" version ".tar.gz"))
11590 (file-name (string-append name "-" version ".tar.gz"))
11591 (sha256
11592 (base32
11593 "1204i0pa02ll2jn5pnxypkclnskvv7a2nwh5nxhagmhxk9wfv9sq"))))
11594 (build-system perl-build-system)
11595 (arguments
11596 `(#:tests? #f ; there are no tests
11597 #:phases
11598 (modify-phases %standard-phases
11599 (delete 'configure)
11600 (delete 'build)
11601 (replace 'install
11602 (lambda* (#:key inputs outputs #:allow-other-keys)
11603 (let ((bin (string-append (assoc-ref outputs "out")
11604 "/bin"))
11605 (docdir (string-append (assoc-ref outputs "out")
11606 "/share/doc/bismark"))
11607 (docs '("Bismark_User_Guide.pdf"
11608 "RELEASE_NOTES.txt"))
11609 (scripts '("bismark"
11610 "bismark_genome_preparation"
11611 "bismark_methylation_extractor"
11612 "bismark2bedGraph"
11613 "bismark2report"
11614 "coverage2cytosine"
11615 "deduplicate_bismark"
11616 "bismark_sitrep.tpl"
11617 "bam2nuc"
11618 "bismark2summary")))
11619 (mkdir-p docdir)
11620 (mkdir-p bin)
11621 (for-each (lambda (file) (install-file file bin))
11622 scripts)
11623 (for-each (lambda (file) (install-file file docdir))
11624 docs)
11625 ;; Fix references to gunzip
11626 (substitute* (map (lambda (file)
11627 (string-append bin "/" file))
11628 scripts)
11629 (("\"gunzip -c")
11630 (string-append "\"" (assoc-ref inputs "gzip")
11631 "/bin/gunzip -c")))
11632 #t))))))
11633 (inputs
11634 `(("gzip" ,gzip)))
11635 (home-page "http://www.bioinformatics.babraham.ac.uk/projects/bismark/")
11636 (synopsis "Map bisulfite treated sequence reads and analyze methylation")
11637 (description "Bismark is a program to map bisulfite treated sequencing
11638 reads to a genome of interest and perform methylation calls in a single step.
11639 The output can be easily imported into a genome viewer, such as SeqMonk, and
11640 enables a researcher to analyse the methylation levels of their samples
11641 straight away. Its main features are:
11642
11643 @itemize
11644 @item Bisulfite mapping and methylation calling in one single step
11645 @item Supports single-end and paired-end read alignments
11646 @item Supports ungapped and gapped alignments
11647 @item Alignment seed length, number of mismatches etc are adjustable
11648 @item Output discriminates between cytosine methylation in CpG, CHG
11649 and CHH context
11650 @end itemize\n")
11651 (license license:gpl3+)))
11652
11653 (define-public paml
11654 (package
11655 (name "paml")
11656 (version "4.9e")
11657 (source (origin
11658 (method url-fetch)
11659 (uri (string-append "http://abacus.gene.ucl.ac.uk/software/"
11660 "paml" version ".tgz"))
11661 (sha256
11662 (base32
11663 "13zf6h9fiqghwhch2h06x1zdr6s42plsnqahflp5g7myr3han3s6"))
11664 (modules '((guix build utils)))
11665 ;; Remove Windows binaries
11666 (snippet
11667 '(begin
11668 (for-each delete-file (find-files "." "\\.exe$"))
11669 #t))))
11670 (build-system gnu-build-system)
11671 (arguments
11672 `(#:tests? #f ; there are no tests
11673 #:make-flags '("CC=gcc")
11674 #:phases
11675 (modify-phases %standard-phases
11676 (replace 'configure
11677 (lambda _
11678 (substitute* "src/BFdriver.c"
11679 (("/bin/bash") (which "bash")))
11680 (chdir "src")
11681 #t))
11682 (replace 'install
11683 (lambda* (#:key outputs #:allow-other-keys)
11684 (let ((tools '("baseml" "basemlg" "codeml"
11685 "pamp" "evolver" "yn00" "chi2"))
11686 (bin (string-append (assoc-ref outputs "out") "/bin"))
11687 (docdir (string-append (assoc-ref outputs "out")
11688 "/share/doc/paml")))
11689 (mkdir-p bin)
11690 (for-each (lambda (file) (install-file file bin)) tools)
11691 (copy-recursively "../doc" docdir)
11692 #t))))))
11693 (home-page "http://abacus.gene.ucl.ac.uk/software/paml.html")
11694 (synopsis "Phylogentic analysis by maximum likelihood")
11695 (description "PAML (for Phylogentic Analysis by Maximum Likelihood)
11696 contains a few programs for model fitting and phylogenetic tree reconstruction
11697 using nucleotide or amino-acid sequence data.")
11698 ;; GPLv3 only
11699 (license license:gpl3)))
11700
11701 (define-public kallisto
11702 (package
11703 (name "kallisto")
11704 (version "0.43.1")
11705 (source (origin
11706 (method url-fetch)
11707 (uri (string-append "https://github.com/pachterlab/"
11708 "kallisto/archive/v" version ".tar.gz"))
11709 (file-name (string-append name "-" version ".tar.gz"))
11710 (sha256
11711 (base32
11712 "03j3iqhvq7ya3c91gidly3k3jvgm97vjq4scihrlxh315j696r11"))))
11713 (build-system cmake-build-system)
11714 (arguments `(#:tests? #f)) ; no "check" target
11715 (inputs
11716 `(("hdf5" ,hdf5)
11717 ("zlib" ,zlib)))
11718 (home-page "http://pachterlab.github.io/kallisto/")
11719 (synopsis "Near-optimal RNA-Seq quantification")
11720 (description
11721 "Kallisto is a program for quantifying abundances of transcripts from
11722 RNA-Seq data, or more generally of target sequences using high-throughput
11723 sequencing reads. It is based on the novel idea of pseudoalignment for
11724 rapidly determining the compatibility of reads with targets, without the need
11725 for alignment. Pseudoalignment of reads preserves the key information needed
11726 for quantification, and kallisto is therefore not only fast, but also as
11727 accurate as existing quantification tools.")
11728 (license license:bsd-2)))
11729
11730 (define-public libgff
11731 (package
11732 (name "libgff")
11733 (version "1.0")
11734 (source (origin
11735 (method url-fetch)
11736 (uri (string-append
11737 "https://github.com/Kingsford-Group/"
11738 "libgff/archive/v" version ".tar.gz"))
11739 (file-name (string-append name "-" version ".tar.gz"))
11740 (sha256
11741 (base32
11742 "0vc4nxyhlm6g9vvmx5l4lfs5pnvixsv1hiiy4kddf2y3p6jna8ls"))))
11743 (build-system cmake-build-system)
11744 (arguments `(#:tests? #f)) ; no tests included
11745 (home-page "https://github.com/Kingsford-Group/libgff")
11746 (synopsis "Parser library for reading/writing GFF files")
11747 (description "This is a simple \"libraryfication\" of the GFF/GTF parsing
11748 code that is used in the Cufflinks codebase. The goal of this library is to
11749 provide this functionality without the necessity of drawing in a heavy-weight
11750 dependency like SeqAn.")
11751 (license (license:x11-style "http://www.boost.org/LICENSE_1_0.txt"))))
11752
11753 (define-public libdivsufsort
11754 (package
11755 (name "libdivsufsort")
11756 (version "2.0.1")
11757 (source (origin
11758 (method git-fetch)
11759 (uri (git-reference
11760 (url "https://github.com/y-256/libdivsufsort.git")
11761 (commit version)))
11762 (file-name (git-file-name name version))
11763 (sha256
11764 (base32
11765 "0fgdz9fzihlvjjrxy01md1bv9vh12rkgkwbm90b1hj5xpbaqp7z2"))))
11766 (build-system cmake-build-system)
11767 (arguments
11768 '(#:tests? #f ; there are no tests
11769 #:configure-flags
11770 ;; Needed for rapmap and sailfish.
11771 '("-DBUILD_DIVSUFSORT64=ON")))
11772 (home-page "https://github.com/y-256/libdivsufsort")
11773 (synopsis "Lightweight suffix-sorting library")
11774 (description "libdivsufsort is a software library that implements a
11775 lightweight suffix array construction algorithm. This library provides a
11776 simple and an efficient C API to construct a suffix array and a
11777 Burrows-Wheeler transformed string from a given string over a constant-size
11778 alphabet. The algorithm runs in O(n log n) worst-case time using only 5n+O(1)
11779 bytes of memory space, where n is the length of the string.")
11780 (license license:expat)))
11781
11782 (define-public sailfish
11783 (package
11784 (name "sailfish")
11785 (version "0.10.1")
11786 (source (origin
11787 (method url-fetch)
11788 (uri
11789 (string-append "https://github.com/kingsfordgroup/"
11790 "sailfish/archive/v" version ".tar.gz"))
11791 (file-name (string-append name "-" version ".tar.gz"))
11792 (sha256
11793 (base32
11794 "1inn60dxiwsz8g9w7kvfhjxj4bwfb0r12dyhpzzhfbig712dkmm0"))
11795 (modules '((guix build utils)))
11796 (snippet
11797 '(begin
11798 ;; Delete bundled headers for eigen3.
11799 (delete-file-recursively "include/eigen3/")
11800 #t))))
11801 (build-system cmake-build-system)
11802 (arguments
11803 `(#:configure-flags
11804 (list (string-append "-DBOOST_INCLUDEDIR="
11805 (assoc-ref %build-inputs "boost")
11806 "/include/")
11807 (string-append "-DBOOST_LIBRARYDIR="
11808 (assoc-ref %build-inputs "boost")
11809 "/lib/")
11810 (string-append "-DBoost_LIBRARIES="
11811 "-lboost_iostreams "
11812 "-lboost_filesystem "
11813 "-lboost_system "
11814 "-lboost_thread "
11815 "-lboost_timer "
11816 "-lboost_chrono "
11817 "-lboost_program_options")
11818 "-DBoost_FOUND=TRUE"
11819 ;; Don't download RapMap---we already have it!
11820 "-DFETCHED_RAPMAP=1")
11821 ;; Tests must be run after installation and the location of the test
11822 ;; data file must be overridden. But the tests fail. It looks like
11823 ;; they are not really meant to be run.
11824 #:tests? #f
11825 #:phases
11826 (modify-phases %standard-phases
11827 ;; Boost cannot be found, even though it's right there.
11828 (add-after 'unpack 'do-not-look-for-boost
11829 (lambda* (#:key inputs #:allow-other-keys)
11830 (substitute* "CMakeLists.txt"
11831 (("find_package\\(Boost 1\\.53\\.0") "#"))))
11832 (add-after 'unpack 'do-not-assign-to-macro
11833 (lambda _
11834 (substitute* "include/spdlog/details/format.cc"
11835 (("const unsigned CHAR_WIDTH = 1;") ""))))
11836 (add-after 'unpack 'prepare-rapmap
11837 (lambda* (#:key inputs #:allow-other-keys)
11838 (let ((src "external/install/src/rapmap/")
11839 (include "external/install/include/rapmap/")
11840 (rapmap (assoc-ref inputs "rapmap")))
11841 (mkdir-p "/tmp/rapmap")
11842 (system* "tar" "xf"
11843 (assoc-ref inputs "rapmap")
11844 "-C" "/tmp/rapmap"
11845 "--strip-components=1")
11846 (mkdir-p src)
11847 (mkdir-p include)
11848 (for-each (lambda (file)
11849 (install-file file src))
11850 (find-files "/tmp/rapmap/src" "\\.(c|cpp)"))
11851 (copy-recursively "/tmp/rapmap/include" include))))
11852 (add-after 'unpack 'use-system-libraries
11853 (lambda* (#:key inputs #:allow-other-keys)
11854 (substitute* '("src/SailfishIndexer.cpp"
11855 "src/SailfishUtils.cpp"
11856 "src/SailfishQuantify.cpp"
11857 "src/FASTAParser.cpp"
11858 "include/PCA.hpp"
11859 "include/SailfishUtils.hpp"
11860 "include/SailfishIndex.hpp"
11861 "include/CollapsedEMOptimizer.hpp"
11862 "src/CollapsedEMOptimizer.cpp")
11863 (("#include \"jellyfish/config.h\"") ""))
11864 (substitute* "src/CMakeLists.txt"
11865 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/include/jellyfish-2.2..")
11866 (string-append (assoc-ref inputs "jellyfish")
11867 "/include/jellyfish-" ,(package-version jellyfish)))
11868 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libjellyfish-2.0.a")
11869 (string-append (assoc-ref inputs "jellyfish")
11870 "/lib/libjellyfish-2.0.a"))
11871 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort.a")
11872 (string-append (assoc-ref inputs "libdivsufsort")
11873 "/lib/libdivsufsort.so"))
11874 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort64.a")
11875 (string-append (assoc-ref inputs "libdivsufsort")
11876 "/lib/libdivsufsort64.so")))
11877 (substitute* "CMakeLists.txt"
11878 ;; Don't prefer static libs
11879 (("SET\\(CMAKE_FIND_LIBRARY_SUFFIXES.*") "")
11880 (("find_package\\(Jellyfish.*") "")
11881 (("ExternalProject_Add\\(libjellyfish") "message(")
11882 (("ExternalProject_Add\\(libgff") "message(")
11883 (("ExternalProject_Add\\(libsparsehash") "message(")
11884 (("ExternalProject_Add\\(libdivsufsort") "message("))
11885
11886 ;; Ensure that Eigen headers can be found
11887 (setenv "CPLUS_INCLUDE_PATH"
11888 (string-append (getenv "CPLUS_INCLUDE_PATH")
11889 ":"
11890 (assoc-ref inputs "eigen")
11891 "/include/eigen3")))))))
11892 (inputs
11893 `(("boost" ,boost)
11894 ("eigen" ,eigen)
11895 ("jemalloc" ,jemalloc)
11896 ("jellyfish" ,jellyfish)
11897 ("sparsehash" ,sparsehash)
11898 ("rapmap" ,(origin
11899 (method git-fetch)
11900 (uri (git-reference
11901 (url "https://github.com/COMBINE-lab/RapMap.git")
11902 (commit (string-append "sf-v" version))))
11903 (file-name (string-append "rapmap-sf-v" version "-checkout"))
11904 (sha256
11905 (base32
11906 "1hv79l5i576ykv5a1srj2p0q36yvyl5966m0fcy2lbi169ipjakf"))
11907 (modules '((guix build utils)))
11908 ;; These files are expected to be excluded.
11909 (snippet
11910 '(begin (delete-file-recursively "include/spdlog")
11911 (for-each delete-file '("include/xxhash.h"
11912 "src/xxhash.c"))
11913 #t))))
11914 ("libdivsufsort" ,libdivsufsort)
11915 ("libgff" ,libgff)
11916 ("tbb" ,tbb)
11917 ("zlib" ,zlib)))
11918 (native-inputs
11919 `(("pkg-config" ,pkg-config)))
11920 (home-page "http://www.cs.cmu.edu/~ckingsf/software/sailfish")
11921 (synopsis "Mapping-based isoform quantification from RNA-Seq reads")
11922 (description "Sailfish is a tool for genomic transcript quantification
11923 from RNA-seq data. It requires a set of target transcripts (either from a
11924 reference or de-novo assembly) to quantify. All you need to run sailfish is a
11925 fasta file containing your reference transcripts and a (set of) fasta/fastq
11926 file(s) containing your reads.")
11927 (license license:gpl3+)))
11928
11929 (define libstadenio-for-salmon
11930 (package
11931 (name "libstadenio")
11932 (version "1.14.8")
11933 (source (origin
11934 (method git-fetch)
11935 (uri (git-reference
11936 (url "https://github.com/COMBINE-lab/staden-io_lib.git")
11937 (commit (string-append "v" version))))
11938 (file-name (string-append name "-" version "-checkout"))
11939 (sha256
11940 (base32
11941 "1x8kxxqxl892vwfbprlbyfwkkv7c34ggkc94892x9x0g37x5nbwx"))))
11942 (build-system gnu-build-system)
11943 (arguments '(#:parallel-tests? #f)) ; not supported
11944 (inputs
11945 `(("zlib" ,zlib)))
11946 (native-inputs
11947 `(("perl" ,perl))) ; for tests
11948 (home-page "https://github.com/COMBINE-lab/staden-io_lib")
11949 (synopsis "General purpose trace and experiment file library")
11950 (description "This package provides a library of file reading and writing
11951 code to provide a general purpose Trace file (and Experiment File) reading
11952 interface.
11953
11954 The following file formats are supported:
11955
11956 @enumerate
11957 @item SCF trace files
11958 @item ABI trace files
11959 @item ALF trace files
11960 @item ZTR trace files
11961 @item SFF trace archives
11962 @item SRF trace archives
11963 @item Experiment files
11964 @item Plain text files
11965 @item SAM/BAM sequence files
11966 @item CRAM sequence files
11967 @end enumerate\n")
11968 (license license:bsd-3)))
11969
11970 (define spdlog-for-salmon
11971 (package
11972 (name "spdlog")
11973 (version "0.14.0")
11974 (source (origin
11975 (method git-fetch)
11976 (uri (git-reference
11977 (url "https://github.com/COMBINE-lab/spdlog.git")
11978 (commit (string-append "v" version))))
11979 (file-name (string-append name "-" version "-checkout"))
11980 (sha256
11981 (base32
11982 "13730429gwlabi432ilpnja3sfvy0nn2719vnhhmii34xcdyc57q"))))
11983 (build-system cmake-build-system)
11984 (home-page "https://github.com/COMBINE-lab/spdlog")
11985 (synopsis "Very fast C++ logging library")
11986 (description "Spdlog is a very fast header-only C++ logging library with
11987 performance as its primary goal.")
11988 (license license:expat)))
11989
11990 ;; This is a modified variant of bwa for use with Salmon. It installs a
11991 ;; library to avoid having to build this as part of Salmon.
11992 (define bwa-for-salmon
11993 (package (inherit bwa)
11994 (name "bwa")
11995 (version "0.7.12.5")
11996 (source (origin
11997 (method git-fetch)
11998 (uri (git-reference
11999 (url "https://github.com/COMBINE-lab/bwa.git")
12000 (commit (string-append "v" version))))
12001 (file-name (string-append "bwa-for-salmon-" version "-checkout"))
12002 (sha256
12003 (base32
12004 "1z2qa64y0c5hky10510x137mnzlhz6k8qf27csw4w9j6qihq95gb"))))
12005 (build-system gnu-build-system)
12006 (arguments
12007 '(#:tests? #f ;no "check" target
12008 #:phases
12009 (modify-phases %standard-phases
12010 (replace 'install
12011 (lambda* (#:key outputs #:allow-other-keys)
12012 (let* ((out (assoc-ref outputs "out"))
12013 (bin (string-append out "/bin"))
12014 (lib (string-append out "/lib"))
12015 (doc (string-append out "/share/doc/bwa"))
12016 (man (string-append out "/share/man/man1"))
12017 (inc (string-append out "/include/bwa")))
12018 (install-file "bwa" bin)
12019 (install-file "README.md" doc)
12020 (install-file "bwa.1" man)
12021 (install-file "libbwa.a" lib)
12022 (mkdir-p lib)
12023 (mkdir-p inc)
12024 (for-each (lambda (file)
12025 (install-file file inc))
12026 (find-files "." "\\.h$")))
12027 #t))
12028 ;; no "configure" script
12029 (delete 'configure))))))
12030
12031 (define-public salmon
12032 (package
12033 (name "salmon")
12034 (version "0.9.1")
12035 (source (origin
12036 (method git-fetch)
12037 (uri (git-reference
12038 (url "https://github.com/COMBINE-lab/salmon.git")
12039 (commit (string-append "v" version))))
12040 (file-name (string-append name "-" version "-checkout"))
12041 (sha256
12042 (base32
12043 "1zi1ff4i7y2ykk0vdzysgwzzzv166vg2x77pj1mf4baclavxj87a"))
12044 (modules '((guix build utils)))
12045 (snippet
12046 '(begin
12047 ;; Delete bundled headers for eigen3.
12048 (delete-file-recursively "include/eigen3/")
12049 #t))))
12050 (build-system cmake-build-system)
12051 (arguments
12052 `(#:configure-flags
12053 (list (string-append "-DBOOST_INCLUDEDIR="
12054 (assoc-ref %build-inputs "boost")
12055 "/include/")
12056 (string-append "-DBOOST_LIBRARYDIR="
12057 (assoc-ref %build-inputs "boost")
12058 "/lib/")
12059 (string-append "-DBoost_LIBRARIES="
12060 "-lboost_iostreams "
12061 "-lboost_filesystem "
12062 "-lboost_system "
12063 "-lboost_thread "
12064 "-lboost_timer "
12065 "-lboost_chrono "
12066 "-lboost_program_options")
12067 "-DBoost_FOUND=TRUE"
12068 "-DTBB_LIBRARIES=tbb tbbmalloc"
12069 ;; Don't download RapMap---we already have it!
12070 "-DFETCHED_RAPMAP=1")
12071 #:phases
12072 (modify-phases %standard-phases
12073 ;; Boost cannot be found, even though it's right there.
12074 (add-after 'unpack 'do-not-look-for-boost
12075 (lambda* (#:key inputs #:allow-other-keys)
12076 (substitute* "CMakeLists.txt"
12077 (("find_package\\(Boost 1\\.53\\.0") "#"))))
12078 (add-after 'unpack 'do-not-phone-home
12079 (lambda _
12080 (substitute* "src/Salmon.cpp"
12081 (("getVersionMessage\\(\\)") "\"\""))))
12082 (add-after 'unpack 'prepare-rapmap
12083 (lambda* (#:key inputs #:allow-other-keys)
12084 (let ((src "external/install/src/rapmap/")
12085 (include "external/install/include/rapmap/")
12086 (rapmap (assoc-ref inputs "rapmap")))
12087 (mkdir-p src)
12088 (mkdir-p include)
12089 (for-each (lambda (file)
12090 (install-file file src))
12091 (find-files (string-append rapmap "/src") "\\.(c|cpp)"))
12092 (copy-recursively (string-append rapmap "/include") include)
12093 (for-each delete-file '("external/install/include/rapmap/xxhash.h"
12094 "external/install/include/rapmap/FastxParser.hpp"
12095 "external/install/include/rapmap/concurrentqueue.h"
12096 "external/install/include/rapmap/FastxParserThreadUtils.hpp"
12097 "external/install/src/rapmap/FastxParser.cpp"
12098 "external/install/src/rapmap/xxhash.c")))))
12099 (add-after 'unpack 'use-system-libraries
12100 (lambda* (#:key inputs #:allow-other-keys)
12101 (substitute* "src/CMakeLists.txt"
12102 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/include/jellyfish-2.2..")
12103 (string-append (assoc-ref inputs "jellyfish")
12104 "/include/jellyfish-" ,(package-version jellyfish)))
12105 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libjellyfish-2.0.a")
12106 (string-append (assoc-ref inputs "jellyfish")
12107 "/lib/libjellyfish-2.0.a"))
12108 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort.a")
12109 (string-append (assoc-ref inputs "libdivsufsort")
12110 "/lib/libdivsufsort.so"))
12111 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libstaden-read.a")
12112 (string-append (assoc-ref inputs "libstadenio-for-salmon")
12113 "/lib/libstaden-read.a"))
12114 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libbwa.a")
12115 (string-append (assoc-ref inputs "bwa") "/lib/libbwa.a"))
12116 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort64.a")
12117 (string-append (assoc-ref inputs "libdivsufsort")
12118 "/lib/libdivsufsort64.so")))
12119 (substitute* "CMakeLists.txt"
12120 ;; Don't prefer static libs
12121 (("SET\\(CMAKE_FIND_LIBRARY_SUFFIXES.*") "")
12122 (("set\\(TBB_LIBRARIES") "message(")
12123 (("find_package\\(Jellyfish.*") "")
12124 (("ExternalProject_Add\\(libcereal") "message(")
12125 (("ExternalProject_Add\\(libbwa") "message(")
12126 (("ExternalProject_Add\\(libjellyfish") "message(")
12127 (("ExternalProject_Add\\(libgff") "message(")
12128 (("ExternalProject_Add\\(libtbb") "message(")
12129 (("ExternalProject_Add\\(libspdlog") "message(")
12130 (("ExternalProject_Add\\(libdivsufsort") "message(")
12131 (("ExternalProject_Add\\(libstadenio") "message(")
12132 (("ExternalProject_Add_Step\\(") "message("))
12133
12134 ;; Ensure that all headers can be found
12135 (setenv "CPLUS_INCLUDE_PATH"
12136 (string-append (getenv "CPLUS_INCLUDE_PATH")
12137 ":"
12138 (assoc-ref inputs "bwa")
12139 "/include/bwa"
12140 ":"
12141 (assoc-ref inputs "eigen")
12142 "/include/eigen3"))
12143 (setenv "CPATH"
12144 (string-append (assoc-ref inputs "bwa")
12145 "/include/bwa"
12146 ":"
12147 (assoc-ref inputs "eigen")
12148 "/include/eigen3"))
12149 #t))
12150 ;; CMAKE_INSTALL_PREFIX does not exist when the tests are
12151 ;; run. It only exists after the install phase.
12152 (add-after 'unpack 'fix-tests
12153 (lambda _
12154 (substitute* "src/CMakeLists.txt"
12155 (("DTOPLEVEL_DIR=\\$\\{CMAKE_INSTALL_PREFIX")
12156 "DTOPLEVEL_DIR=${GAT_SOURCE_DIR"))
12157 #t)))))
12158 (inputs
12159 `(("boost" ,boost)
12160 ("bwa" ,bwa-for-salmon)
12161 ("bzip2" ,bzip2)
12162 ("cereal" ,cereal)
12163 ("eigen" ,eigen)
12164 ("rapmap" ,(origin
12165 (method git-fetch)
12166 (uri (git-reference
12167 (url "https://github.com/COMBINE-lab/RapMap.git")
12168 (commit (string-append "salmon-v" version))))
12169 (file-name (string-append "rapmap-salmon-v" version "-checkout"))
12170 (sha256
12171 (base32
12172 "1yc12yqsz6f0r8sg1qnk57xg34aqwc9jbqq6gd5ys28xw3plj98p"))))
12173 ("jemalloc" ,jemalloc)
12174 ("jellyfish" ,jellyfish)
12175 ("libgff" ,libgff)
12176 ("tbb" ,tbb)
12177 ("libdivsufsort" ,libdivsufsort)
12178 ("libstadenio-for-salmon" ,libstadenio-for-salmon)
12179 ("spdlog-for-salmon" ,spdlog-for-salmon)
12180 ("xz" ,xz)
12181 ("zlib" ,zlib)))
12182 (home-page "https://github.com/COMBINE-lab/salmon")
12183 (synopsis "Quantification from RNA-seq reads using lightweight alignments")
12184 (description "Salmon is a program to produce highly-accurate,
12185 transcript-level quantification estimates from RNA-seq data. Salmon achieves
12186 its accuracy and speed via a number of different innovations, including the
12187 use of lightweight alignments (accurate but fast-to-compute proxies for
12188 traditional read alignments) and massively-parallel stochastic collapsed
12189 variational inference.")
12190 (license license:gpl3+)))
12191
12192 (define-public python-loompy
12193 (package
12194 (name "python-loompy")
12195 (version "2.0.2")
12196 (source
12197 (origin
12198 (method url-fetch)
12199 (uri (pypi-uri "loompy" version))
12200 (sha256
12201 (base32
12202 "1drgv8j1hxqzzpnfg272x9djb6j8qr798w1pc2x8ikmfgyd9gh51"))))
12203 (build-system python-build-system)
12204 ;; There are no tests
12205 (arguments '(#:tests? #f))
12206 (propagated-inputs
12207 `(("python-h5py" ,python-h5py)
12208 ("python-numpy" ,python-numpy)
12209 ("python-scipy" ,python-scipy)
12210 ("python-typing" ,python-typing)))
12211 (home-page "https://github.com/linnarsson-lab/loompy")
12212 (synopsis "Work with .loom files for single-cell RNA-seq data")
12213 (description "The loom file format is an efficient format for very large
12214 omics datasets, consisting of a main matrix, optional additional layers, a
12215 variable number of row and column annotations. Loom also supports sparse
12216 graphs. This library makes it easy to work with @file{.loom} files for
12217 single-cell RNA-seq data.")
12218 (license license:bsd-3)))
12219
12220 ;; We cannot use the latest commit because it requires Java 9.
12221 (define-public java-forester
12222 (let ((commit "86b07efe302d5094b42deed9260f719a4c4ac2e6")
12223 (revision "1"))
12224 (package
12225 (name "java-forester")
12226 (version (string-append "0-" revision "." (string-take commit 7)))
12227 (source (origin
12228 (method git-fetch)
12229 (uri (git-reference
12230 (url "https://github.com/cmzmasek/forester.git")
12231 (commit commit)))
12232 (file-name (string-append name "-" version "-checkout"))
12233 (sha256
12234 (base32
12235 "0vxavc1yrf84yrnf20dq26hi0lglidk8d382xrxsy4qmlbjd276z"))
12236 (modules '((guix build utils)))
12237 (snippet
12238 '(begin
12239 ;; Delete bundled jars and pre-built classes
12240 (delete-file-recursively "forester/java/resources")
12241 (delete-file-recursively "forester/java/classes")
12242 (for-each delete-file (find-files "forester/java/" "\\.jar$"))
12243 ;; Delete bundled applications
12244 (delete-file-recursively "forester_applications")
12245 #t))))
12246 (build-system ant-build-system)
12247 (arguments
12248 `(#:tests? #f ; there are none
12249 #:jdk ,icedtea-8
12250 #:modules ((guix build ant-build-system)
12251 (guix build utils)
12252 (guix build java-utils)
12253 (sxml simple)
12254 (sxml transform))
12255 #:phases
12256 (modify-phases %standard-phases
12257 (add-after 'unpack 'chdir
12258 (lambda _ (chdir "forester/java") #t))
12259 (add-after 'chdir 'fix-dependencies
12260 (lambda _
12261 (chmod "build.xml" #o664)
12262 (call-with-output-file "build.xml.new"
12263 (lambda (port)
12264 (sxml->xml
12265 (pre-post-order
12266 (with-input-from-file "build.xml"
12267 (lambda _ (xml->sxml #:trim-whitespace? #t)))
12268 `(;; Remove all unjar tags to avoid repacking classes.
12269 (unjar . ,(lambda _ '()))
12270 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
12271 (*text* . ,(lambda (_ txt) txt))))
12272 port)))
12273 (rename-file "build.xml.new" "build.xml")
12274 #t))
12275 ;; FIXME: itext is difficult to package as it depends on a few
12276 ;; unpackaged libraries.
12277 (add-after 'chdir 'remove-dependency-on-unpackaged-itext
12278 (lambda _
12279 (delete-file "src/org/forester/archaeopteryx/PdfExporter.java")
12280 (substitute* "src/org/forester/archaeopteryx/MainFrame.java"
12281 (("pdf_written_to = PdfExporter.*")
12282 "throw new IOException(\"PDF export is not available.\");"))
12283 #t))
12284 ;; There is no install target
12285 (replace 'install (install-jars ".")))))
12286 (propagated-inputs
12287 `(("java-commons-codec" ,java-commons-codec)
12288 ("java-openchart2" ,java-openchart2)))
12289 (home-page "https://sites.google.com/site/cmzmasek/home/software/forester")
12290 (synopsis "Phylogenomics libraries for Java")
12291 (description "Forester is a collection of Java libraries for
12292 phylogenomics and evolutionary biology research. It includes support for
12293 reading, writing, and exporting phylogenetic trees.")
12294 (license license:lgpl2.1+))))
12295
12296 (define-public java-forester-1.005
12297 (package
12298 (name "java-forester")
12299 (version "1.005")
12300 (source (origin
12301 (method url-fetch)
12302 (uri (string-append "http://search.maven.org/remotecontent?"
12303 "filepath=org/biojava/thirdparty/forester/"
12304 version "/forester-" version "-sources.jar"))
12305 (file-name (string-append name "-" version ".jar"))
12306 (sha256
12307 (base32
12308 "04r8qv4rk3p71z4ajrvp11py1z46qrx0047j3zzs79s6lnsm3lcv"))))
12309 (build-system ant-build-system)
12310 (arguments
12311 `(#:tests? #f ; there are none
12312 #:jdk ,icedtea-8
12313 #:modules ((guix build ant-build-system)
12314 (guix build utils)
12315 (guix build java-utils)
12316 (sxml simple)
12317 (sxml transform))
12318 #:phases
12319 (modify-phases %standard-phases
12320 (add-after 'unpack 'fix-dependencies
12321 (lambda* (#:key inputs #:allow-other-keys)
12322 (call-with-output-file "build.xml"
12323 (lambda (port)
12324 (sxml->xml
12325 (pre-post-order
12326 (with-input-from-file "src/build.xml"
12327 (lambda _ (xml->sxml #:trim-whitespace? #t)))
12328 `(;; Remove all unjar tags to avoid repacking classes.
12329 (unjar . ,(lambda _ '()))
12330 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
12331 (*text* . ,(lambda (_ txt) txt))))
12332 port)))
12333 (copy-file (assoc-ref inputs "synth_look_and_feel_1.xml")
12334 "synth_look_and_feel_1.xml")
12335 (copy-file (assoc-ref inputs "phyloxml.xsd")
12336 "phyloxml.xsd")
12337 (substitute* "build.xml"
12338 (("../resources/synth_laf/synth_look_and_feel_1.xml")
12339 "synth_look_and_feel_1.xml")
12340 (("../resources/phyloxml_schema/1.10/phyloxml.xsd")
12341 "phyloxml.xsd"))
12342 #t))
12343 ;; FIXME: itext is difficult to package as it depends on a few
12344 ;; unpackaged libraries.
12345 (add-after 'unpack 'remove-dependency-on-unpackaged-itext
12346 (lambda _
12347 (delete-file "src/org/forester/archaeopteryx/PdfExporter.java")
12348 (substitute* '("src/org/forester/archaeopteryx/MainFrame.java"
12349 "src/org/forester/archaeopteryx/MainFrameApplication.java")
12350 (("pdf_written_to = PdfExporter.*")
12351 "throw new IOException(\"PDF export is not available.\"); /*")
12352 ((".getPrintSizeX\\(\\), getOptions\\(\\).getPrintSizeY\\(\\) \\);") "*/")
12353 (("getCurrentTreePanel\\(\\).getHeight\\(\\) \\);") "*/"))
12354 #t))
12355 (add-after 'unpack 'delete-pre-built-classes
12356 (lambda _ (delete-file-recursively "src/classes") #t))
12357 ;; There is no install target
12358 (replace 'install (install-jars ".")))))
12359 (propagated-inputs
12360 `(("java-commons-codec" ,java-commons-codec)
12361 ("java-openchart2" ,java-openchart2)))
12362 ;; The source archive does not contain the resources.
12363 (native-inputs
12364 `(("phyloxml.xsd"
12365 ,(origin
12366 (method url-fetch)
12367 (uri (string-append "https://raw.githubusercontent.com/cmzmasek/forester/"
12368 "b61cc2dcede0bede317db362472333115756b8c6/"
12369 "forester/resources/phyloxml_schema/1.10/phyloxml.xsd"))
12370 (file-name (string-append name "-phyloxml-" version ".xsd"))
12371 (sha256
12372 (base32
12373 "1zxc4m8sn4n389nqdnpxa8d0k17qnr3pm2y5y6g6vh4k0zm52npv"))))
12374 ("synth_look_and_feel_1.xml"
12375 ,(origin
12376 (method url-fetch)
12377 (uri (string-append "https://raw.githubusercontent.com/cmzmasek/forester/"
12378 "29e04321615da6b35c1e15c60e52caf3f21d8e6a/"
12379 "forester/java/classes/resources/synth_look_and_feel_1.xml"))
12380 (file-name (string-append name "-synth-look-and-feel-" version ".xml"))
12381 (sha256
12382 (base32
12383 "1gv5602gv4k7y7713y75a4jvj7i9s7nildsbdl7n9q10sc2ikg8h"))))))
12384 (home-page "https://sites.google.com/site/cmzmasek/home/software/forester")
12385 (synopsis "Phylogenomics libraries for Java")
12386 (description "Forester is a collection of Java libraries for
12387 phylogenomics and evolutionary biology research. It includes support for
12388 reading, writing, and exporting phylogenetic trees.")
12389 (license license:lgpl2.1+)))
12390
12391 (define-public java-biojava-core
12392 (package
12393 (name "java-biojava-core")
12394 (version "4.2.11")
12395 (source (origin
12396 (method git-fetch)
12397 (uri (git-reference
12398 (url "https://github.com/biojava/biojava")
12399 (commit (string-append "biojava-" version))))
12400 (file-name (string-append name "-" version "-checkout"))
12401 (sha256
12402 (base32
12403 "1bvryh2bpsvash8ln79cmc9sqm8qw72hz4xzwqxcrjm8ssxszhqk"))))
12404 (build-system ant-build-system)
12405 (arguments
12406 `(#:jdk ,icedtea-8
12407 #:jar-name "biojava-core.jar"
12408 #:source-dir "biojava-core/src/main/java/"
12409 #:test-dir "biojava-core/src/test"
12410 ;; These tests seem to require internet access.
12411 #:test-exclude (list "**/SearchIOTest.java"
12412 "**/BlastXMLParserTest.java"
12413 "**/GenbankCookbookTest.java"
12414 "**/GenbankProxySequenceReaderTest.java")
12415 #:phases
12416 (modify-phases %standard-phases
12417 (add-before 'build 'copy-resources
12418 (lambda _
12419 (copy-recursively "biojava-core/src/main/resources"
12420 "build/classes")
12421 #t))
12422 (add-before 'check 'copy-test-resources
12423 (lambda _
12424 (copy-recursively "biojava-core/src/test/resources"
12425 "build/test-classes")
12426 #t)))))
12427 (propagated-inputs
12428 `(("java-log4j-api" ,java-log4j-api)
12429 ("java-log4j-core" ,java-log4j-core)
12430 ("java-slf4j-api" ,java-slf4j-api)
12431 ("java-slf4j-simple" ,java-slf4j-simple)))
12432 (native-inputs
12433 `(("java-junit" ,java-junit)
12434 ("java-hamcrest-core" ,java-hamcrest-core)))
12435 (home-page "http://biojava.org")
12436 (synopsis "Core libraries of Java framework for processing biological data")
12437 (description "BioJava is a project dedicated to providing a Java framework
12438 for processing biological data. It provides analytical and statistical
12439 routines, parsers for common file formats, reference implementations of
12440 popular algorithms, and allows the manipulation of sequences and 3D
12441 structures. The goal of the biojava project is to facilitate rapid
12442 application development for bioinformatics.
12443
12444 This package provides the core libraries.")
12445 (license license:lgpl2.1+)))
12446
12447 (define-public java-biojava-phylo
12448 (package (inherit java-biojava-core)
12449 (name "java-biojava-phylo")
12450 (build-system ant-build-system)
12451 (arguments
12452 `(#:jdk ,icedtea-8
12453 #:jar-name "biojava-phylo.jar"
12454 #:source-dir "biojava-phylo/src/main/java/"
12455 #:test-dir "biojava-phylo/src/test"
12456 #:phases
12457 (modify-phases %standard-phases
12458 (add-before 'build 'copy-resources
12459 (lambda _
12460 (copy-recursively "biojava-phylo/src/main/resources"
12461 "build/classes")
12462 #t))
12463 (add-before 'check 'copy-test-resources
12464 (lambda _
12465 (copy-recursively "biojava-phylo/src/test/resources"
12466 "build/test-classes")
12467 #t)))))
12468 (propagated-inputs
12469 `(("java-log4j-api" ,java-log4j-api)
12470 ("java-log4j-core" ,java-log4j-core)
12471 ("java-slf4j-api" ,java-slf4j-api)
12472 ("java-slf4j-simple" ,java-slf4j-simple)
12473 ("java-biojava-core" ,java-biojava-core)
12474 ("java-forester" ,java-forester)))
12475 (native-inputs
12476 `(("java-junit" ,java-junit)
12477 ("java-hamcrest-core" ,java-hamcrest-core)))
12478 (home-page "http://biojava.org")
12479 (synopsis "Biojava interface to the forester phylogenomics library")
12480 (description "The phylo module provides a biojava interface layer to the
12481 forester phylogenomics library for constructing phylogenetic trees.")))
12482
12483 (define-public java-biojava-alignment
12484 (package (inherit java-biojava-core)
12485 (name "java-biojava-alignment")
12486 (build-system ant-build-system)
12487 (arguments
12488 `(#:jdk ,icedtea-8
12489 #:jar-name "biojava-alignment.jar"
12490 #:source-dir "biojava-alignment/src/main/java/"
12491 #:test-dir "biojava-alignment/src/test"
12492 #:phases
12493 (modify-phases %standard-phases
12494 (add-before 'build 'copy-resources
12495 (lambda _
12496 (copy-recursively "biojava-alignment/src/main/resources"
12497 "build/classes")
12498 #t))
12499 (add-before 'check 'copy-test-resources
12500 (lambda _
12501 (copy-recursively "biojava-alignment/src/test/resources"
12502 "build/test-classes")
12503 #t)))))
12504 (propagated-inputs
12505 `(("java-log4j-api" ,java-log4j-api)
12506 ("java-log4j-core" ,java-log4j-core)
12507 ("java-slf4j-api" ,java-slf4j-api)
12508 ("java-slf4j-simple" ,java-slf4j-simple)
12509 ("java-biojava-core" ,java-biojava-core)
12510 ("java-biojava-phylo" ,java-biojava-phylo)
12511 ("java-forester" ,java-forester)))
12512 (native-inputs
12513 `(("java-junit" ,java-junit)
12514 ("java-hamcrest-core" ,java-hamcrest-core)))
12515 (home-page "http://biojava.org")
12516 (synopsis "Biojava API for genetic sequence alignment")
12517 (description "The alignment module of BioJava provides an API that
12518 contains
12519
12520 @itemize
12521 @item implementations of dynamic programming algorithms for sequence
12522 alignment;
12523 @item reading and writing of popular alignment file formats;
12524 @item a single-, or multi- threaded multiple sequence alignment algorithm.
12525 @end itemize\n")))
12526
12527 (define-public java-biojava-core-4.0
12528 (package (inherit java-biojava-core)
12529 (name "java-biojava-core")
12530 (version "4.0.0")
12531 (source (origin
12532 (method git-fetch)
12533 (uri (git-reference
12534 (url "https://github.com/biojava/biojava")
12535 (commit (string-append "biojava-" version))))
12536 (file-name (string-append name "-" version "-checkout"))
12537 (sha256
12538 (base32
12539 "13675f6y9aqi7bi2lk3s1z7a22ynccjiqwa8izh7p97xi9wsfmd8"))))))
12540
12541 (define-public java-biojava-phylo-4.0
12542 (package (inherit java-biojava-core-4.0)
12543 (name "java-biojava-phylo")
12544 (build-system ant-build-system)
12545 (arguments
12546 `(#:jdk ,icedtea-8
12547 #:jar-name "biojava-phylo.jar"
12548 #:source-dir "biojava-phylo/src/main/java/"
12549 #:test-dir "biojava-phylo/src/test"
12550 #:phases
12551 (modify-phases %standard-phases
12552 (add-before 'build 'copy-resources
12553 (lambda _
12554 (copy-recursively "biojava-phylo/src/main/resources"
12555 "build/classes")
12556 #t))
12557 (add-before 'check 'copy-test-resources
12558 (lambda _
12559 (copy-recursively "biojava-phylo/src/test/resources"
12560 "build/test-classes")
12561 #t)))))
12562 (propagated-inputs
12563 `(("java-log4j-api" ,java-log4j-api)
12564 ("java-log4j-core" ,java-log4j-core)
12565 ("java-slf4j-api" ,java-slf4j-api)
12566 ("java-slf4j-simple" ,java-slf4j-simple)
12567 ("java-biojava-core" ,java-biojava-core-4.0)
12568 ("java-forester" ,java-forester-1.005)))
12569 (native-inputs
12570 `(("java-junit" ,java-junit)
12571 ("java-hamcrest-core" ,java-hamcrest-core)))
12572 (home-page "http://biojava.org")
12573 (synopsis "Biojava interface to the forester phylogenomics library")
12574 (description "The phylo module provides a biojava interface layer to the
12575 forester phylogenomics library for constructing phylogenetic trees.")))
12576
12577 (define-public java-biojava-alignment-4.0
12578 (package (inherit java-biojava-core-4.0)
12579 (name "java-biojava-alignment")
12580 (build-system ant-build-system)
12581 (arguments
12582 `(#:jdk ,icedtea-8
12583 #:jar-name "biojava-alignment.jar"
12584 #:source-dir "biojava-alignment/src/main/java/"
12585 #:test-dir "biojava-alignment/src/test"
12586 #:phases
12587 (modify-phases %standard-phases
12588 (add-before 'build 'copy-resources
12589 (lambda _
12590 (copy-recursively "biojava-alignment/src/main/resources"
12591 "build/classes")
12592 #t))
12593 (add-before 'check 'copy-test-resources
12594 (lambda _
12595 (copy-recursively "biojava-alignment/src/test/resources"
12596 "build/test-classes")
12597 #t)))))
12598 (propagated-inputs
12599 `(("java-log4j-api" ,java-log4j-api)
12600 ("java-log4j-core" ,java-log4j-core)
12601 ("java-slf4j-api" ,java-slf4j-api)
12602 ("java-slf4j-simple" ,java-slf4j-simple)
12603 ("java-biojava-core" ,java-biojava-core-4.0)
12604 ("java-biojava-phylo" ,java-biojava-phylo-4.0)
12605 ("java-forester" ,java-forester-1.005)))
12606 (native-inputs
12607 `(("java-junit" ,java-junit)
12608 ("java-hamcrest-core" ,java-hamcrest-core)))
12609 (home-page "http://biojava.org")
12610 (synopsis "Biojava API for genetic sequence alignment")
12611 (description "The alignment module of BioJava provides an API that
12612 contains
12613
12614 @itemize
12615 @item implementations of dynamic programming algorithms for sequence
12616 alignment;
12617 @item reading and writing of popular alignment file formats;
12618 @item a single-, or multi- threaded multiple sequence alignment algorithm.
12619 @end itemize\n")))
12620
12621 (define-public dropseq-tools
12622 (package
12623 (name "dropseq-tools")
12624 (version "1.13")
12625 (source
12626 (origin
12627 (method url-fetch)
12628 (uri "http://mccarrolllab.com/download/1276/")
12629 (file-name (string-append "dropseq-tools-" version ".zip"))
12630 (sha256
12631 (base32
12632 "0yrffckxqk5l8b5xb6z4laq157zd9mdypr2p4b4vq2bhjzi1sj0s"))
12633 ;; Delete bundled libraries
12634 (modules '((guix build utils)))
12635 (snippet
12636 '(begin
12637 (for-each delete-file (find-files "jar/lib" "\\.jar$"))
12638 (delete-file-recursively "3rdParty")
12639 #t))))
12640 (build-system ant-build-system)
12641 (arguments
12642 `(#:tests? #f ; test data are not included
12643 #:test-target "test"
12644 #:build-target "all"
12645 #:source-dir "public/src/"
12646 #:jdk ,icedtea-8
12647 #:make-flags
12648 (list (string-append "-Dpicard.executable.dir="
12649 (assoc-ref %build-inputs "java-picard")
12650 "/share/java/"))
12651 #:modules ((ice-9 match)
12652 (srfi srfi-1)
12653 (guix build utils)
12654 (guix build java-utils)
12655 (guix build ant-build-system))
12656 #:phases
12657 (modify-phases %standard-phases
12658 ;; All dependencies must be linked to "lib", because that's where
12659 ;; they will be searched for when the Class-Path property of the
12660 ;; manifest is computed.
12661 (add-after 'unpack 'record-references
12662 (lambda* (#:key inputs #:allow-other-keys)
12663 (mkdir-p "jar/lib")
12664 (let ((dirs (filter-map (match-lambda
12665 ((name . dir)
12666 (if (and (string-prefix? "java-" name)
12667 (not (string=? name "java-testng")))
12668 dir #f)))
12669 inputs)))
12670 (for-each (lambda (jar)
12671 (symlink jar (string-append "jar/lib/" (basename jar))))
12672 (append-map (lambda (dir) (find-files dir "\\.jar$"))
12673 dirs)))
12674 #t))
12675 ;; There is no installation target
12676 (replace 'install
12677 (lambda* (#:key inputs outputs #:allow-other-keys)
12678 (let* ((out (assoc-ref outputs "out"))
12679 (bin (string-append out "/bin"))
12680 (share (string-append out "/share/java/"))
12681 (lib (string-append share "/lib/"))
12682 (scripts (list "BAMTagHistogram"
12683 "BAMTagofTagCounts"
12684 "BaseDistributionAtReadPosition"
12685 "CollapseBarcodesInPlace"
12686 "CollapseTagWithContext"
12687 "ConvertToRefFlat"
12688 "CreateIntervalsFiles"
12689 "DetectBeadSynthesisErrors"
12690 "DigitalExpression"
12691 "Drop-seq_alignment.sh"
12692 "FilterBAM"
12693 "FilterBAMByTag"
12694 "GatherGeneGCLength"
12695 "GatherMolecularBarcodeDistributionByGene"
12696 "GatherReadQualityMetrics"
12697 "PolyATrimmer"
12698 "ReduceGTF"
12699 "SelectCellsByNumTranscripts"
12700 "SingleCellRnaSeqMetricsCollector"
12701 "TagBamWithReadSequenceExtended"
12702 "TagReadWithGeneExon"
12703 "TagReadWithInterval"
12704 "TrimStartingSequence"
12705 "ValidateReference")))
12706 (for-each mkdir-p (list bin share lib))
12707 (install-file "dist/dropseq.jar" share)
12708 (for-each (lambda (script)
12709 (chmod script #o555)
12710 (install-file script bin))
12711 scripts)
12712 (substitute* (map (lambda (script)
12713 (string-append bin "/" script))
12714 scripts)
12715 (("^java") (which "java"))
12716 (("jar_deploy_dir=.*")
12717 (string-append "jar_deploy_dir=" share "\n"))))
12718 #t))
12719 ;; FIXME: We do this after stripping jars because we don't want it to
12720 ;; copy all these jars and strip them. We only want to install
12721 ;; links. Arguably, this is a problem with the ant-build-system.
12722 (add-after 'strip-jar-timestamps 'install-links
12723 (lambda* (#:key outputs #:allow-other-keys)
12724 (let* ((out (assoc-ref outputs "out"))
12725 (share (string-append out "/share/java/"))
12726 (lib (string-append share "/lib/")))
12727 (for-each (lambda (jar)
12728 (symlink (readlink jar)
12729 (string-append lib (basename jar))))
12730 (find-files "jar/lib" "\\.jar$")))
12731 #t)))))
12732 (inputs
12733 `(("jdk" ,icedtea-8)
12734 ("java-picard" ,java-picard-2.10.3)
12735 ("java-log4j-1.2-api" ,java-log4j-1.2-api)
12736 ("java-commons-math3" ,java-commons-math3)
12737 ("java-commons-jexl2" ,java-commons-jexl-2)
12738 ("java-commons-collections4" ,java-commons-collections4)
12739 ("java-commons-lang2" ,java-commons-lang)
12740 ("java-commons-io" ,java-commons-io)
12741 ("java-snappy-1.0.3-rc3" ,java-snappy-1)
12742 ("java-guava" ,java-guava)
12743 ("java-la4j" ,java-la4j)
12744 ("java-biojava-core" ,java-biojava-core-4.0)
12745 ("java-biojava-alignment" ,java-biojava-alignment-4.0)
12746 ("java-jdistlib" ,java-jdistlib)
12747 ("java-simple-xml" ,java-simple-xml)
12748 ("java-snakeyaml" ,java-snakeyaml)))
12749 (native-inputs
12750 `(("unzip" ,unzip)
12751 ("java-testng" ,java-testng)))
12752 (home-page "http://mccarrolllab.com/dropseq/")
12753 (synopsis "Tools for Drop-seq analyses")
12754 (description "Drop-seq is a technology to enable biologists to
12755 analyze RNA expression genome-wide in thousands of individual cells at
12756 once. This package provides tools to perform Drop-seq analyses.")
12757 (license license:expat)))
12758
12759 (define-public pigx-rnaseq
12760 (package
12761 (name "pigx-rnaseq")
12762 (version "0.0.3")
12763 (source (origin
12764 (method url-fetch)
12765 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_rnaseq/"
12766 "releases/download/v" version
12767 "/pigx_rnaseq-" version ".tar.gz"))
12768 (sha256
12769 (base32
12770 "0pz080k4ajlc4rlznkn3najy2a6874gb56rf9g4ag9wqz31q174j"))))
12771 (build-system gnu-build-system)
12772 (arguments
12773 `(#:parallel-tests? #f ; not supported
12774 #:phases
12775 (modify-phases %standard-phases
12776 ;; "test.sh" runs STAR, which requires excessive amounts of memory.
12777 (add-after 'unpack 'disable-resource-intensive-test
12778 (lambda _
12779 (substitute* "Makefile.in"
12780 (("(^ tests/test_trim_galore/test.sh).*" _ m) m)
12781 (("^ test.sh") ""))
12782 #t))
12783 (add-after 'install 'wrap-executable
12784 ;; Make sure the executable finds all R modules.
12785 (lambda* (#:key inputs outputs #:allow-other-keys)
12786 (let ((out (assoc-ref outputs "out")))
12787 (wrap-program (string-append out "/bin/pigx-rnaseq")
12788 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))
12789 `("PYTHONPATH" ":" = (,(getenv "PYTHONPATH")))))
12790 #t)))))
12791 (inputs
12792 `(("gzip" ,gzip)
12793 ("snakemake" ,snakemake)
12794 ("fastqc" ,fastqc)
12795 ("multiqc" ,multiqc)
12796 ("star" ,star)
12797 ("trim-galore" ,trim-galore)
12798 ("htseq" ,htseq)
12799 ("samtools" ,samtools)
12800 ("bedtools" ,bedtools)
12801 ("r-minimal" ,r-minimal)
12802 ("r-rmarkdown" ,r-rmarkdown)
12803 ("r-ggplot2" ,r-ggplot2)
12804 ("r-ggrepel" ,r-ggrepel)
12805 ("r-gprofiler" ,r-gprofiler)
12806 ("r-deseq2" ,r-deseq2)
12807 ("r-dt" ,r-dt)
12808 ("r-knitr" ,r-knitr)
12809 ("r-pheatmap" ,r-pheatmap)
12810 ("r-corrplot" ,r-corrplot)
12811 ("r-reshape2" ,r-reshape2)
12812 ("r-plotly" ,r-plotly)
12813 ("r-scales" ,r-scales)
12814 ("r-summarizedexperiment" ,r-summarizedexperiment)
12815 ("r-crosstalk" ,r-crosstalk)
12816 ("r-tximport" ,r-tximport)
12817 ("r-rtracklayer" ,r-rtracklayer)
12818 ("r-rjson" ,r-rjson)
12819 ("salmon" ,salmon)
12820 ("ghc-pandoc" ,ghc-pandoc-1)
12821 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc-with-pandoc-1)
12822 ("python-wrapper" ,python-wrapper)
12823 ("python-pyyaml" ,python-pyyaml)))
12824 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
12825 (synopsis "Analysis pipeline for RNA sequencing experiments")
12826 (description "PiGX RNAseq is an analysis pipeline for preprocessing and
12827 reporting for RNA sequencing experiments. It is easy to use and produces high
12828 quality reports. The inputs are reads files from the sequencing experiment,
12829 and a configuration file which describes the experiment. In addition to
12830 quality control of the experiment, the pipeline produces a differential
12831 expression report comparing samples in an easily configurable manner.")
12832 (license license:gpl3+)))
12833
12834 (define-public pigx-chipseq
12835 (package
12836 (name "pigx-chipseq")
12837 (version "0.0.15")
12838 (source (origin
12839 (method url-fetch)
12840 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_chipseq/"
12841 "releases/download/v" version
12842 "/pigx_chipseq-" version ".tar.gz"))
12843 (sha256
12844 (base32
12845 "11v9v3vyda0sv4cl45nki7mm4v4bjfcdq7a70kcvi9h465nq66wg"))))
12846 (build-system gnu-build-system)
12847 (arguments
12848 `(#:tests? #f ; parts of the tests rely on access to the network
12849 #:phases
12850 (modify-phases %standard-phases
12851 (add-after 'install 'wrap-executable
12852 ;; Make sure the executable finds all R modules.
12853 (lambda* (#:key inputs outputs #:allow-other-keys)
12854 (let ((out (assoc-ref outputs "out")))
12855 (wrap-program (string-append out "/bin/pigx-chipseq")
12856 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))
12857 `("PYTHONPATH" ":" = (,(getenv "PYTHONPATH")))))
12858 #t)))))
12859 (inputs
12860 `(("grep" ,grep)
12861 ("coreutils" ,coreutils)
12862 ("r-minimal" ,r-minimal)
12863 ("r-argparser" ,r-argparser)
12864 ("r-biocparallel" ,r-biocparallel)
12865 ("r-biostrings" ,r-biostrings)
12866 ("r-chipseq" ,r-chipseq)
12867 ("r-data-table" ,r-data-table)
12868 ("r-dplyr" ,r-dplyr)
12869 ("r-genomation" ,r-genomation)
12870 ("r-genomicalignments" ,r-genomicalignments)
12871 ("r-genomicranges" ,r-genomicranges)
12872 ("r-rsamtools" ,r-rsamtools)
12873 ("r-rtracklayer" ,r-rtracklayer)
12874 ("r-s4vectors" ,r-s4vectors)
12875 ("r-stringr" ,r-stringr)
12876 ("r-tibble" ,r-tibble)
12877 ("r-tidyr" ,r-tidyr)
12878 ("r-jsonlite" ,r-jsonlite)
12879 ("r-heatmaply" ,r-heatmaply)
12880 ("r-htmlwidgets" ,r-htmlwidgets)
12881 ("r-ggplot2" ,r-ggplot2)
12882 ("r-plotly" ,r-plotly)
12883 ("r-rmarkdown" ,r-rmarkdown)
12884 ("python-wrapper" ,python-wrapper)
12885 ("python-pyyaml" ,python-pyyaml)
12886 ("python-magic" ,python-magic)
12887 ("python-xlrd" ,python-xlrd)
12888 ("trim-galore" ,trim-galore)
12889 ("macs" ,macs)
12890 ("multiqc" ,multiqc)
12891 ("perl" ,perl)
12892 ("ghc-pandoc" ,ghc-pandoc-1)
12893 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc-with-pandoc-1)
12894 ("fastqc" ,fastqc)
12895 ("bowtie" ,bowtie)
12896 ("idr" ,idr)
12897 ("snakemake" ,snakemake)
12898 ("samtools" ,samtools)
12899 ("bedtools" ,bedtools)
12900 ("kentutils" ,kentutils)))
12901 (native-inputs
12902 `(("python-pytest" ,python-pytest)))
12903 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
12904 (synopsis "Analysis pipeline for ChIP sequencing experiments")
12905 (description "PiGX ChIPseq is an analysis pipeline for preprocessing, peak
12906 calling and reporting for ChIP sequencing experiments. It is easy to use and
12907 produces high quality reports. The inputs are reads files from the sequencing
12908 experiment, and a configuration file which describes the experiment. In
12909 addition to quality control of the experiment, the pipeline enables to set up
12910 multiple peak calling analysis and allows the generation of a UCSC track hub
12911 in an easily configurable manner.")
12912 (license license:gpl3+)))
12913
12914 (define-public pigx-bsseq
12915 (package
12916 (name "pigx-bsseq")
12917 (version "0.0.8")
12918 (source (origin
12919 (method url-fetch)
12920 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_bsseq/"
12921 "releases/download/v" version
12922 "/pigx_bsseq-" version ".tar.gz"))
12923 (sha256
12924 (base32
12925 "0irlnlhhw9fd4ha7hksrxn3y7j76mz5qq1wjswbs9p364laqg69y"))))
12926 (build-system gnu-build-system)
12927 (arguments
12928 `(#:phases
12929 (modify-phases %standard-phases
12930 (add-before 'check 'set-timezone
12931 ;; The readr package is picky about timezones.
12932 (lambda* (#:key inputs #:allow-other-keys)
12933 (setenv "TZ" "UTC+1")
12934 (setenv "TZDIR"
12935 (string-append (assoc-ref inputs "tzdata")
12936 "/share/zoneinfo"))
12937 #t))
12938 (add-after 'install 'wrap-executable
12939 ;; Make sure the executable finds all R modules.
12940 (lambda* (#:key inputs outputs #:allow-other-keys)
12941 (let ((out (assoc-ref outputs "out")))
12942 (wrap-program (string-append out "/bin/pigx-bsseq")
12943 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))
12944 `("PYTHONPATH" ":" = (,(getenv "PYTHONPATH")))))
12945 #t)))))
12946 (native-inputs
12947 `(("tzdata" ,tzdata)))
12948 (inputs
12949 `(("coreutils" ,coreutils)
12950 ("sed" ,sed)
12951 ("grep" ,grep)
12952 ("r-minimal" ,r-minimal)
12953 ("r-annotationhub" ,r-annotationhub)
12954 ("r-dt" ,r-dt)
12955 ("r-genomation" ,r-genomation)
12956 ("r-methylkit" ,r-methylkit)
12957 ("r-rtracklayer" ,r-rtracklayer)
12958 ("r-rmarkdown" ,r-rmarkdown)
12959 ("r-bookdown" ,r-bookdown)
12960 ("r-ggplot2" ,r-ggplot2)
12961 ("r-ggbio" ,r-ggbio)
12962 ("ghc-pandoc" ,ghc-pandoc-1)
12963 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc-with-pandoc-1)
12964 ("python-wrapper" ,python-wrapper)
12965 ("python-pyyaml" ,python-pyyaml)
12966 ("snakemake" ,snakemake)
12967 ("bismark" ,bismark)
12968 ("fastqc" ,fastqc)
12969 ("bowtie" ,bowtie)
12970 ("trim-galore" ,trim-galore)
12971 ("cutadapt" ,cutadapt)
12972 ("samtools" ,samtools)))
12973 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
12974 (synopsis "Bisulfite sequencing pipeline from fastq to methylation reports")
12975 (description "PiGx BSseq is a data processing pipeline for raw fastq read
12976 data of bisulfite experiments; it produces reports on aggregate methylation
12977 and coverage and can be used to produce information on differential
12978 methylation and segmentation.")
12979 (license license:gpl3+)))
12980
12981 (define-public pigx-scrnaseq
12982 (package
12983 (name "pigx-scrnaseq")
12984 (version "0.0.3")
12985 (source (origin
12986 (method url-fetch)
12987 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_scrnaseq/"
12988 "releases/download/v" version
12989 "/pigx_scrnaseq-" version ".tar.gz"))
12990 (sha256
12991 (base32
12992 "12qdq0nj1wdkyighdxj6924bmbpd1a0b3gam6w64l4hiqrv5sijz"))))
12993 (build-system gnu-build-system)
12994 (arguments
12995 `(#:configure-flags
12996 (list (string-append "PICARDJAR=" (assoc-ref %build-inputs "java-picard")
12997 "/share/java/picard.jar")
12998 (string-append "DROPSEQJAR=" (assoc-ref %build-inputs "dropseq-tools")
12999 "/share/java/dropseq.jar"))
13000 #:phases
13001 (modify-phases %standard-phases
13002 (add-after 'install 'wrap-executable
13003 ;; Make sure the executable finds all R modules.
13004 (lambda* (#:key inputs outputs #:allow-other-keys)
13005 (let ((out (assoc-ref outputs "out")))
13006 (wrap-program (string-append out "/bin/pigx-scrnaseq")
13007 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))
13008 `("PYTHONPATH" ":" = (,(getenv "PYTHONPATH")))))
13009 #t)))))
13010 (inputs
13011 `(("coreutils" ,coreutils)
13012 ("perl" ,perl)
13013 ("dropseq-tools" ,dropseq-tools)
13014 ("fastqc" ,fastqc)
13015 ("java-picard" ,java-picard)
13016 ("java" ,icedtea-8)
13017 ("python-wrapper" ,python-wrapper)
13018 ("python-pyyaml" ,python-pyyaml)
13019 ("python-pandas" ,python-pandas)
13020 ("python-numpy" ,python-numpy)
13021 ("python-loompy" ,python-loompy)
13022 ("ghc-pandoc" ,ghc-pandoc-1)
13023 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc-with-pandoc-1)
13024 ("snakemake" ,snakemake)
13025 ("star" ,star)
13026 ("r-minimal" ,r-minimal)
13027 ("r-argparser" ,r-argparser)
13028 ("r-cowplot" ,r-cowplot)
13029 ("r-data-table" ,r-data-table)
13030 ("r-delayedarray" ,r-delayedarray)
13031 ("r-delayedmatrixstats" ,r-delayedmatrixstats)
13032 ("r-dplyr" ,r-dplyr)
13033 ("r-dropbead" ,r-dropbead)
13034 ("r-dt" ,r-dt)
13035 ("r-genomicalignments" ,r-genomicalignments)
13036 ("r-genomicfiles" ,r-genomicfiles)
13037 ("r-genomicranges" ,r-genomicranges)
13038 ("r-ggplot2" ,r-ggplot2)
13039 ("r-hdf5array" ,r-hdf5array)
13040 ("r-pheatmap" ,r-pheatmap)
13041 ("r-rmarkdown" ,r-rmarkdown)
13042 ("r-rsamtools" ,r-rsamtools)
13043 ("r-rtracklayer" ,r-rtracklayer)
13044 ("r-rtsne" ,r-rtsne)
13045 ("r-scater" ,r-scater)
13046 ("r-scran" ,r-scran)
13047 ("r-singlecellexperiment" ,r-singlecellexperiment)
13048 ("r-stringr" ,r-stringr)
13049 ("r-yaml" ,r-yaml)))
13050 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
13051 (synopsis "Analysis pipeline for single-cell RNA sequencing experiments")
13052 (description "PiGX scRNAseq is an analysis pipeline for preprocessing and
13053 quality control for single cell RNA sequencing experiments. The inputs are
13054 read files from the sequencing experiment, and a configuration file which
13055 describes the experiment. It produces processed files for downstream analysis
13056 and interactive quality reports. The pipeline is designed to work with UMI
13057 based methods.")
13058 (license license:gpl3+)))
13059
13060 (define-public pigx
13061 (package
13062 (name "pigx")
13063 (version "0.0.2")
13064 (source (origin
13065 (method url-fetch)
13066 (uri (string-append "https://github.com/BIMSBbioinfo/pigx/"
13067 "releases/download/v" version
13068 "/pigx-" version ".tar.gz"))
13069 (sha256
13070 (base32
13071 "0sb708sl42h3s5z872jb1w70bbqplwapnsc1wm27zcsvi7li4gw8"))))
13072 (build-system gnu-build-system)
13073 (inputs
13074 `(("python" ,python)
13075 ("pigx-bsseq" ,pigx-bsseq)
13076 ("pigx-chipseq" ,pigx-chipseq)
13077 ("pigx-rnaseq" ,pigx-rnaseq)
13078 ("pigx-scrnaseq" ,pigx-scrnaseq)))
13079 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
13080 (synopsis "Analysis pipelines for genomics")
13081 (description "PiGx is a collection of genomics pipelines. It includes the
13082 following pipelines:
13083
13084 @itemize
13085 @item PiGx BSseq for raw fastq read data of bisulfite experiments
13086 @item PiGx RNAseq for RNAseq samples
13087 @item PiGx scRNAseq for single cell dropseq analysis
13088 @item PiGx ChIPseq for reads from ChIPseq experiments
13089 @end itemize
13090
13091 All pipelines are easily configured with a simple sample sheet and a
13092 descriptive settings file. The result is a set of comprehensive, interactive
13093 HTML reports with interesting findings about your samples.")
13094 (license license:gpl3+)))
13095
13096 (define-public r-diversitree
13097 (package
13098 (name "r-diversitree")
13099 (version "0.9-10")
13100 (source
13101 (origin
13102 (method url-fetch)
13103 (uri (cran-uri "diversitree" version))
13104 (sha256
13105 (base32
13106 "0gh4rcrp0an3jh8915i1fsxlgyfk7njywgbd5ln5r2jhr085kpz7"))))
13107 (build-system r-build-system)
13108 (native-inputs
13109 `(("gfortran" ,gfortran)))
13110 (inputs `(("fftw" ,fftw) ("gsl" ,gsl)))
13111 (propagated-inputs
13112 `(("r-ape" ,r-ape)
13113 ("r-desolve" ,r-desolve)
13114 ("r-rcpp" ,r-rcpp)
13115 ("r-suplex" ,r-subplex)))
13116 (home-page "https://www.zoology.ubc.ca/prog/diversitree")
13117 (synopsis "Comparative 'phylogenetic' analyses of diversification")
13118 (description "This package contains a number of comparative \"phylogenetic\"
13119 methods, mostly focusing on analysing diversification and character evolution.
13120 Contains implementations of \"BiSSE\" (Binary State Speciation and Extinction)
13121 and its unresolved tree extensions, \"MuSSE\" (Multiple State Speciation and
13122 Extinction), \"QuaSSE\", \"GeoSSE\", and \"BiSSE-ness\" Other included methods
13123 include Markov models of discrete and continuous trait evolution and constant
13124 rate speciation and extinction.")
13125 (license license:gpl2+)))