gnu: r-seqminer: Update to 6.1.
[jackhill/guix/guix.git] / gnu / packages / bioinformatics.scm
1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2014, 2015, 2016, 2017, 2018 Ricardo Wurmus <rekado@elephly.net>
3 ;;; Copyright © 2015, 2016, 2017, 2018 Ben Woodcroft <donttrustben@gmail.com>
4 ;;; Copyright © 2015, 2016 Pjotr Prins <pjotr.guix@thebird.nl>
5 ;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
6 ;;; Copyright © 2016 Roel Janssen <roel@gnu.org>
7 ;;; Copyright © 2016, 2017, 2018 Efraim Flashner <efraim@flashner.co.il>
8 ;;; Copyright © 2016 Marius Bakke <mbakke@fastmail.com>
9 ;;; Copyright © 2016, 2018 Raoul Bonnal <ilpuccio.febo@gmail.com>
10 ;;; Copyright © 2017, 2018 Tobias Geerinckx-Rice <me@tobias.gr>
11 ;;; Copyright © 2017 Arun Isaac <arunisaac@systemreboot.net>
12 ;;; Copyright © 2018 Joshua Sierles, Nextjournal <joshua@nextjournal.com>
13 ;;; Copyright © 2018 Gábor Boskovits <boskovits@gmail.com>
14 ;;; Copyright © 2018 Mădălin Ionel Patrașcu <madalinionel.patrascu@mdc-berlin.de>
15 ;;;
16 ;;; This file is part of GNU Guix.
17 ;;;
18 ;;; GNU Guix is free software; you can redistribute it and/or modify it
19 ;;; under the terms of the GNU General Public License as published by
20 ;;; the Free Software Foundation; either version 3 of the License, or (at
21 ;;; your option) any later version.
22 ;;;
23 ;;; GNU Guix is distributed in the hope that it will be useful, but
24 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
25 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 ;;; GNU General Public License for more details.
27 ;;;
28 ;;; You should have received a copy of the GNU General Public License
29 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
30
31 (define-module (gnu packages bioinformatics)
32 #:use-module ((guix licenses) #:prefix license:)
33 #:use-module (guix packages)
34 #:use-module (guix utils)
35 #:use-module (guix download)
36 #:use-module (guix git-download)
37 #:use-module (guix hg-download)
38 #:use-module (guix build-system ant)
39 #:use-module (guix build-system gnu)
40 #:use-module (guix build-system cmake)
41 #:use-module (guix build-system ocaml)
42 #:use-module (guix build-system perl)
43 #:use-module (guix build-system python)
44 #:use-module (guix build-system r)
45 #:use-module (guix build-system ruby)
46 #:use-module (guix build-system scons)
47 #:use-module (guix build-system trivial)
48 #:use-module (gnu packages)
49 #:use-module (gnu packages autotools)
50 #:use-module (gnu packages algebra)
51 #:use-module (gnu packages base)
52 #:use-module (gnu packages bash)
53 #:use-module (gnu packages bison)
54 #:use-module (gnu packages boost)
55 #:use-module (gnu packages check)
56 #:use-module (gnu packages compression)
57 #:use-module (gnu packages cpio)
58 #:use-module (gnu packages cran)
59 #:use-module (gnu packages curl)
60 #:use-module (gnu packages documentation)
61 #:use-module (gnu packages databases)
62 #:use-module (gnu packages datastructures)
63 #:use-module (gnu packages file)
64 #:use-module (gnu packages flex)
65 #:use-module (gnu packages gawk)
66 #:use-module (gnu packages gcc)
67 #:use-module (gnu packages gd)
68 #:use-module (gnu packages gtk)
69 #:use-module (gnu packages glib)
70 #:use-module (gnu packages graph)
71 #:use-module (gnu packages groff)
72 #:use-module (gnu packages guile)
73 #:use-module (gnu packages haskell)
74 #:use-module (gnu packages image)
75 #:use-module (gnu packages imagemagick)
76 #:use-module (gnu packages java)
77 #:use-module (gnu packages jemalloc)
78 #:use-module (gnu packages ldc)
79 #:use-module (gnu packages linux)
80 #:use-module (gnu packages logging)
81 #:use-module (gnu packages machine-learning)
82 #:use-module (gnu packages man)
83 #:use-module (gnu packages maths)
84 #:use-module (gnu packages mpi)
85 #:use-module (gnu packages ncurses)
86 #:use-module (gnu packages ocaml)
87 #:use-module (gnu packages pcre)
88 #:use-module (gnu packages parallel)
89 #:use-module (gnu packages pdf)
90 #:use-module (gnu packages perl)
91 #:use-module (gnu packages perl-check)
92 #:use-module (gnu packages pkg-config)
93 #:use-module (gnu packages popt)
94 #:use-module (gnu packages protobuf)
95 #:use-module (gnu packages python)
96 #:use-module (gnu packages python-web)
97 #:use-module (gnu packages readline)
98 #:use-module (gnu packages ruby)
99 #:use-module (gnu packages serialization)
100 #:use-module (gnu packages shells)
101 #:use-module (gnu packages statistics)
102 #:use-module (gnu packages swig)
103 #:use-module (gnu packages tbb)
104 #:use-module (gnu packages tex)
105 #:use-module (gnu packages texinfo)
106 #:use-module (gnu packages textutils)
107 #:use-module (gnu packages time)
108 #:use-module (gnu packages tls)
109 #:use-module (gnu packages vim)
110 #:use-module (gnu packages web)
111 #:use-module (gnu packages xml)
112 #:use-module (gnu packages xorg)
113 #:use-module (srfi srfi-1)
114 #:use-module (ice-9 match))
115
116 (define-public aragorn
117 (package
118 (name "aragorn")
119 (version "1.2.38")
120 (source (origin
121 (method url-fetch)
122 (uri (string-append
123 "http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
124 version ".tgz"))
125 (sha256
126 (base32
127 "09i1rg716smlbnixfm7q1ml2mfpaa2fpn3hwjg625ysmfwwy712b"))))
128 (build-system gnu-build-system)
129 (arguments
130 `(#:tests? #f ; there are no tests
131 #:phases
132 (modify-phases %standard-phases
133 (delete 'configure)
134 (replace 'build
135 (lambda _
136 (zero? (system* "gcc"
137 "-O3"
138 "-ffast-math"
139 "-finline-functions"
140 "-o"
141 "aragorn"
142 (string-append "aragorn" ,version ".c")))))
143 (replace 'install
144 (lambda* (#:key outputs #:allow-other-keys)
145 (let* ((out (assoc-ref outputs "out"))
146 (bin (string-append out "/bin"))
147 (man (string-append out "/share/man/man1")))
148 (mkdir-p bin)
149 (install-file "aragorn" bin)
150 (mkdir-p man)
151 (install-file "aragorn.1" man))
152 #t)))))
153 (home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
154 (synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
155 (description
156 "Aragorn identifies transfer RNA, mitochondrial RNA and
157 transfer-messenger RNA from nucleotide sequences, based on homology to known
158 tRNA consensus sequences and RNA structure. It also outputs the secondary
159 structure of the predicted RNA.")
160 (license license:gpl2)))
161
162 (define-public bamm
163 (package
164 (name "bamm")
165 (version "1.7.3")
166 (source (origin
167 (method url-fetch)
168 ;; BamM is not available on pypi.
169 (uri (string-append
170 "https://github.com/Ecogenomics/BamM/archive/"
171 version ".tar.gz"))
172 (file-name (string-append name "-" version ".tar.gz"))
173 (sha256
174 (base32
175 "1f35yxp4pc8aadsvbpg6r4kg2jh4fkjci0iby4iyljm6980sac0s"))
176 (modules '((guix build utils)))
177 (snippet
178 `(begin
179 ;; Delete bundled htslib.
180 (delete-file-recursively "c/htslib-1.3.1")
181 #t))))
182 (build-system python-build-system)
183 (arguments
184 `(#:python ,python-2 ; BamM is Python 2 only.
185 ;; Do not use bundled libhts. Do use the bundled libcfu because it has
186 ;; been modified from its original form.
187 #:configure-flags
188 (let ((htslib (assoc-ref %build-inputs "htslib")))
189 (list "--with-libhts-lib" (string-append htslib "/lib")
190 "--with-libhts-inc" (string-append htslib "/include/htslib")))
191 #:phases
192 (modify-phases %standard-phases
193 (add-after 'unpack 'autogen
194 (lambda _
195 (with-directory-excursion "c"
196 (let ((sh (which "sh")))
197 ;; Use autogen so that 'configure' works.
198 (substitute* "autogen.sh" (("/bin/sh") sh))
199 (setenv "CONFIG_SHELL" sh)
200 (substitute* "configure" (("/bin/sh") sh))
201 (zero? (system* "./autogen.sh"))))))
202 (delete 'build)
203 ;; Run tests after installation so compilation only happens once.
204 (delete 'check)
205 (add-after 'install 'wrap-executable
206 (lambda* (#:key outputs #:allow-other-keys)
207 (let* ((out (assoc-ref outputs "out"))
208 (path (getenv "PATH")))
209 (wrap-program (string-append out "/bin/bamm")
210 `("PATH" ":" prefix (,path))))
211 #t))
212 (add-after 'wrap-executable 'post-install-check
213 (lambda* (#:key inputs outputs #:allow-other-keys)
214 (setenv "PATH"
215 (string-append (assoc-ref outputs "out")
216 "/bin:"
217 (getenv "PATH")))
218 (setenv "PYTHONPATH"
219 (string-append
220 (assoc-ref outputs "out")
221 "/lib/python"
222 (string-take (string-take-right
223 (assoc-ref inputs "python") 5) 3)
224 "/site-packages:"
225 (getenv "PYTHONPATH")))
226 ;; There are 2 errors printed, but they are safe to ignore:
227 ;; 1) [E::hts_open_format] fail to open file ...
228 ;; 2) samtools view: failed to open ...
229 (zero? (system* "nosetests")))))))
230 (native-inputs
231 `(("autoconf" ,autoconf)
232 ("automake" ,automake)
233 ("libtool" ,libtool)
234 ("zlib" ,zlib)
235 ("python-nose" ,python2-nose)
236 ("python-pysam" ,python2-pysam)))
237 (inputs
238 `(("htslib" ,htslib-1.3) ; At least one test fails on htslib-1.4+.
239 ("samtools" ,samtools)
240 ("bwa" ,bwa)
241 ("grep" ,grep)
242 ("sed" ,sed)
243 ("coreutils" ,coreutils)))
244 (propagated-inputs
245 `(("python-numpy" ,python2-numpy)))
246 (home-page "http://ecogenomics.github.io/BamM/")
247 (synopsis "Metagenomics-focused BAM file manipulator")
248 (description
249 "BamM is a C library, wrapped in python, to efficiently generate and
250 parse BAM files, specifically for the analysis of metagenomic data. For
251 instance, it implements several methods to assess contig-wise read coverage.")
252 (license license:lgpl3+)))
253
254 (define-public bamtools
255 (package
256 (name "bamtools")
257 (version "2.5.1")
258 (source (origin
259 (method url-fetch)
260 (uri (string-append
261 "https://github.com/pezmaster31/bamtools/archive/v"
262 version ".tar.gz"))
263 (file-name (string-append name "-" version ".tar.gz"))
264 (sha256
265 (base32
266 "1z3kg24qrwq13a88n9d86izngrar4fll7gr6phddb2faw75pdgaa"))))
267 (build-system cmake-build-system)
268 (arguments
269 `(#:tests? #f ;no "check" target
270 #:phases
271 (modify-phases %standard-phases
272 (add-before
273 'configure 'set-ldflags
274 (lambda* (#:key outputs #:allow-other-keys)
275 (setenv "LDFLAGS"
276 (string-append
277 "-Wl,-rpath="
278 (assoc-ref outputs "out") "/lib/bamtools")))))))
279 (inputs `(("zlib" ,zlib)))
280 (home-page "https://github.com/pezmaster31/bamtools")
281 (synopsis "C++ API and command-line toolkit for working with BAM data")
282 (description
283 "BamTools provides both a C++ API and a command-line toolkit for handling
284 BAM files.")
285 (license license:expat)))
286
287 (define-public bcftools
288 (package
289 (name "bcftools")
290 (version "1.8")
291 (source (origin
292 (method url-fetch)
293 (uri (string-append
294 "https://github.com/samtools/bcftools/releases/download/"
295 version "/bcftools-" version ".tar.bz2"))
296 (sha256
297 (base32
298 "1vgw2mwngq20c530zim52zvgmw1lci8rzl33pvh44xqk3xlzvjsa"))
299 (modules '((guix build utils)))
300 (snippet '(begin
301 ;; Delete bundled htslib.
302 (delete-file-recursively "htslib-1.8")
303 #t))))
304 (build-system gnu-build-system)
305 (arguments
306 `(#:test-target "test"
307 #:configure-flags (list "--with-htslib=system")
308 #:make-flags
309 (list
310 "USE_GPL=1"
311 "LIBS=-lgsl -lgslcblas"
312 (string-append "prefix=" (assoc-ref %outputs "out"))
313 (string-append "HTSDIR=" (assoc-ref %build-inputs "htslib") "/include")
314 (string-append "HTSLIB=" (assoc-ref %build-inputs "htslib") "/lib/libhts.so")
315 (string-append "BGZIP=" (assoc-ref %build-inputs "htslib") "/bin/bgzip")
316 (string-append "TABIX=" (assoc-ref %build-inputs "htslib") "/bin/tabix")
317 (string-append "PACKAGE_VERSION=" ,version))
318 #:phases
319 (modify-phases %standard-phases
320 (add-before 'check 'patch-tests
321 (lambda _
322 (substitute* "test/test.pl"
323 (("/bin/bash") (which "bash")))
324 #t)))))
325 (native-inputs
326 `(("htslib" ,htslib)
327 ("perl" ,perl)))
328 (inputs
329 `(("gsl" ,gsl)
330 ("zlib" ,zlib)))
331 (home-page "https://samtools.github.io/bcftools/")
332 (synopsis "Utilities for variant calling and manipulating VCFs and BCFs")
333 (description
334 "BCFtools is a set of utilities that manipulate variant calls in the
335 Variant Call Format (VCF) and its binary counterpart BCF. All commands work
336 transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed.")
337 ;; The sources are dual MIT/GPL, but becomes GPL-only when USE_GPL=1.
338 (license (list license:gpl3+ license:expat))))
339
340 (define-public bedops
341 (package
342 (name "bedops")
343 (version "2.4.33")
344 (source (origin
345 (method url-fetch)
346 (uri (string-append "https://github.com/bedops/bedops/archive/v"
347 version ".tar.gz"))
348 (file-name (string-append name "-" version ".tar.gz"))
349 (sha256
350 (base32
351 "0kx4awrwby8f33wqyx8w7ms7v25xhf0d421csgf96a3hfzn2mb0m"))))
352 (build-system gnu-build-system)
353 (arguments
354 '(#:tests? #f
355 #:make-flags (list (string-append "BINDIR=" %output "/bin"))
356 #:phases
357 (modify-phases %standard-phases
358 (add-after 'unpack 'unpack-tarballs
359 (lambda _
360 ;; FIXME: Bedops includes tarballs of minimally patched upstream
361 ;; libraries jansson, zlib, and bzip2. We cannot just use stock
362 ;; libraries because at least one of the libraries (zlib) is
363 ;; patched to add a C++ function definition (deflateInit2cpp).
364 ;; Until the Bedops developers offer a way to link against system
365 ;; libraries we have to build the in-tree copies of these three
366 ;; libraries.
367
368 ;; See upstream discussion:
369 ;; https://github.com/bedops/bedops/issues/124
370
371 ;; Unpack the tarballs to benefit from shebang patching.
372 (with-directory-excursion "third-party"
373 (and (zero? (system* "tar" "xvf" "jansson-2.6.tar.bz2"))
374 (zero? (system* "tar" "xvf" "zlib-1.2.7.tar.bz2"))
375 (zero? (system* "tar" "xvf" "bzip2-1.0.6.tar.bz2"))))
376 ;; Disable unpacking of tarballs in Makefile.
377 (substitute* "system.mk/Makefile.linux"
378 (("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
379 (("\\./configure") "CONFIG_SHELL=bash ./configure"))
380 (substitute* "third-party/zlib-1.2.7/Makefile.in"
381 (("^SHELL=.*$") "SHELL=bash\n"))
382 #t))
383 (delete 'configure))))
384 (home-page "https://github.com/bedops/bedops")
385 (synopsis "Tools for high-performance genomic feature operations")
386 (description
387 "BEDOPS is a suite of tools to address common questions raised in genomic
388 studies---mostly with regard to overlap and proximity relationships between
389 data sets. It aims to be scalable and flexible, facilitating the efficient
390 and accurate analysis and management of large-scale genomic data.
391
392 BEDOPS provides tools that perform highly efficient and scalable Boolean and
393 other set operations, statistical calculations, archiving, conversion and
394 other management of genomic data of arbitrary scale. Tasks can be easily
395 split by chromosome for distributing whole-genome analyses across a
396 computational cluster.")
397 (license license:gpl2+)))
398
399 (define-public bedtools
400 (package
401 (name "bedtools")
402 (version "2.27.1")
403 (source (origin
404 (method url-fetch)
405 (uri (string-append "https://github.com/arq5x/bedtools2/releases/"
406 "download/v" version "/"
407 "bedtools-" version ".tar.gz"))
408 (sha256
409 (base32
410 "1ndg5yknrxl4djx8ddzgk12rrbiidfpmkkg5z3f95jzryfxarhn8"))))
411 (build-system gnu-build-system)
412 (arguments
413 '(#:test-target "test"
414 #:make-flags
415 (list (string-append "prefix=" (assoc-ref %outputs "out")))
416 #:phases
417 (modify-phases %standard-phases
418 (delete 'configure))))
419 (native-inputs `(("python" ,python-2)))
420 (inputs
421 `(("samtools" ,samtools)
422 ("zlib" ,zlib)))
423 (home-page "https://github.com/arq5x/bedtools2")
424 (synopsis "Tools for genome analysis and arithmetic")
425 (description
426 "Collectively, the bedtools utilities are a swiss-army knife of tools for
427 a wide-range of genomics analysis tasks. The most widely-used tools enable
428 genome arithmetic: that is, set theory on the genome. For example, bedtools
429 allows one to intersect, merge, count, complement, and shuffle genomic
430 intervals from multiple files in widely-used genomic file formats such as BAM,
431 BED, GFF/GTF, VCF.")
432 (license license:gpl2)))
433
434 ;; Later releases of bedtools produce files with more columns than
435 ;; what Ribotaper expects.
436 (define-public bedtools-2.18
437 (package (inherit bedtools)
438 (name "bedtools")
439 (version "2.18.0")
440 (source (origin
441 (method url-fetch)
442 (uri (string-append "https://github.com/arq5x/bedtools2/"
443 "archive/v" version ".tar.gz"))
444 (file-name (string-append name "-" version ".tar.gz"))
445 (sha256
446 (base32
447 "05vrnr8yp7swfagshzpgqmzk1blnwnq8pq5pckzi1m26w98d63vf"))))
448 (arguments
449 '(#:test-target "test"
450 #:phases
451 (modify-phases %standard-phases
452 (delete 'configure)
453 (replace 'install
454 (lambda* (#:key outputs #:allow-other-keys)
455 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
456 (for-each (lambda (file)
457 (install-file file bin))
458 (find-files "bin" ".*")))
459 #t)))))))
460
461 (define-public ribotaper
462 (package
463 (name "ribotaper")
464 (version "1.3.1")
465 (source (origin
466 (method url-fetch)
467 (uri (string-append "https://ohlerlab.mdc-berlin.de/"
468 "files/RiboTaper/RiboTaper_Version_"
469 version ".tar.gz"))
470 (sha256
471 (base32
472 "0ykjbps1y3z3085q94npw8i9x5gldc6shy8vlc08v76zljsm07hv"))))
473 (build-system gnu-build-system)
474 (arguments
475 `(#:phases
476 (modify-phases %standard-phases
477 (add-after 'install 'wrap-executables
478 (lambda* (#:key inputs outputs #:allow-other-keys)
479 (let* ((out (assoc-ref outputs "out")))
480 (for-each
481 (lambda (script)
482 (wrap-program (string-append out "/bin/" script)
483 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
484 '("create_annotations_files.bash"
485 "create_metaplots.bash"
486 "Ribotaper_ORF_find.sh"
487 "Ribotaper.sh"))))))))
488 (inputs
489 `(("bedtools" ,bedtools-2.18)
490 ("samtools" ,samtools-0.1)
491 ("r-minimal" ,r-minimal)
492 ("r-foreach" ,r-foreach)
493 ("r-xnomial" ,r-xnomial)
494 ("r-domc" ,r-domc)
495 ("r-multitaper" ,r-multitaper)
496 ("r-seqinr" ,r-seqinr)))
497 (home-page "https://ohlerlab.mdc-berlin.de/software/RiboTaper_126/")
498 (synopsis "Define translated ORFs using ribosome profiling data")
499 (description
500 "Ribotaper is a method for defining translated @dfn{open reading
501 frames} (ORFs) using ribosome profiling (ribo-seq) data. This package
502 provides the Ribotaper pipeline.")
503 (license license:gpl3+)))
504
505 (define-public ribodiff
506 (package
507 (name "ribodiff")
508 (version "0.2.2")
509 (source
510 (origin
511 (method url-fetch)
512 (uri (string-append "https://github.com/ratschlab/RiboDiff/"
513 "archive/v" version ".tar.gz"))
514 (file-name (string-append name "-" version ".tar.gz"))
515 (sha256
516 (base32
517 "0wpbwmfv05wdjxv7ikm664f7s7p7cqr8jnw99zrda0q67rl50aaj"))))
518 (build-system python-build-system)
519 (arguments
520 `(#:python ,python-2
521 #:phases
522 (modify-phases %standard-phases
523 ;; Generate an installable executable script wrapper.
524 (add-after 'unpack 'patch-setup.py
525 (lambda _
526 (substitute* "setup.py"
527 (("^(.*)packages=.*" line prefix)
528 (string-append line "\n"
529 prefix "scripts=['scripts/TE.py'],\n")))
530 #t)))))
531 (inputs
532 `(("python-numpy" ,python2-numpy)
533 ("python-matplotlib" ,python2-matplotlib)
534 ("python-scipy" ,python2-scipy)
535 ("python-statsmodels" ,python2-statsmodels)))
536 (native-inputs
537 `(("python-mock" ,python2-mock)
538 ("python-nose" ,python2-nose)))
539 (home-page "http://public.bmi.inf.ethz.ch/user/zhongy/RiboDiff/")
540 (synopsis "Detect translation efficiency changes from ribosome footprints")
541 (description "RiboDiff is a statistical tool that detects the protein
542 translational efficiency change from Ribo-Seq (ribosome footprinting) and
543 RNA-Seq data. It uses a generalized linear model to detect genes showing
544 difference in translational profile taking mRNA abundance into account. It
545 facilitates us to decipher the translational regulation that behave
546 independently with transcriptional regulation.")
547 (license license:gpl3+)))
548
549 (define-public bioawk
550 (package
551 (name "bioawk")
552 (version "1.0")
553 (source (origin
554 (method url-fetch)
555 (uri (string-append "https://github.com/lh3/bioawk/archive/v"
556 version ".tar.gz"))
557 (file-name (string-append name "-" version ".tar.gz"))
558 (sha256
559 (base32 "1daizxsk17ahi9n58fj8vpgwyhzrzh54bzqhanjanp88kgrz7gjw"))))
560 (build-system gnu-build-system)
561 (inputs
562 `(("zlib" ,zlib)))
563 (native-inputs
564 `(("bison" ,bison)))
565 (arguments
566 `(#:tests? #f ; There are no tests to run.
567 ;; Bison must generate files, before other targets can build.
568 #:parallel-build? #f
569 #:phases
570 (modify-phases %standard-phases
571 (delete 'configure) ; There is no configure phase.
572 (replace 'install
573 (lambda* (#:key outputs #:allow-other-keys)
574 (let* ((out (assoc-ref outputs "out"))
575 (bin (string-append out "/bin"))
576 (man (string-append out "/share/man/man1")))
577 (mkdir-p man)
578 (copy-file "awk.1" (string-append man "/bioawk.1"))
579 (install-file "bioawk" bin)))))))
580 (home-page "https://github.com/lh3/bioawk")
581 (synopsis "AWK with bioinformatics extensions")
582 (description "Bioawk is an extension to Brian Kernighan's awk, adding the
583 support of several common biological data formats, including optionally gzip'ed
584 BED, GFF, SAM, VCF, FASTA/Q and TAB-delimited formats with column names. It
585 also adds a few built-in functions and a command line option to use TAB as the
586 input/output delimiter. When the new functionality is not used, bioawk is
587 intended to behave exactly the same as the original BWK awk.")
588 (license license:x11)))
589
590 (define-public python2-pybedtools
591 (package
592 (name "python2-pybedtools")
593 (version "0.6.9")
594 (source (origin
595 (method url-fetch)
596 (uri (string-append
597 "https://pypi.python.org/packages/source/p/pybedtools/pybedtools-"
598 version ".tar.gz"))
599 (sha256
600 (base32
601 "1ldzdxw1p4y3g2ignmggsdypvqkcwqwzhdha4rbgpih048z5p4an"))))
602 (build-system python-build-system)
603 (arguments `(#:python ,python-2)) ; no Python 3 support
604 (inputs
605 `(("python-matplotlib" ,python2-matplotlib)))
606 (propagated-inputs
607 `(("bedtools" ,bedtools)
608 ("samtools" ,samtools)))
609 (native-inputs
610 `(("python-cython" ,python2-cython)
611 ("python-pyyaml" ,python2-pyyaml)
612 ("python-nose" ,python2-nose)))
613 (home-page "https://pythonhosted.org/pybedtools/")
614 (synopsis "Python wrapper for BEDtools programs")
615 (description
616 "pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
617 which are widely used for genomic interval manipulation or \"genome algebra\".
618 pybedtools extends BEDTools by offering feature-level manipulations from with
619 Python.")
620 (license license:gpl2+)))
621
622 (define-public python-biom-format
623 (package
624 (name "python-biom-format")
625 (version "2.1.6")
626 (source
627 (origin
628 (method url-fetch)
629 ;; Use GitHub as source because PyPI distribution does not contain
630 ;; test data: https://github.com/biocore/biom-format/issues/693
631 (uri (string-append "https://github.com/biocore/biom-format/archive/"
632 version ".tar.gz"))
633 (file-name (string-append name "-" version ".tar.gz"))
634 (sha256
635 (base32
636 "08cr7wpahk6zb31h4bs7jmzpvxcqv9s13xz40h6y2h656jvdvnpj"))))
637 (build-system python-build-system)
638 (propagated-inputs
639 `(("python-numpy" ,python-numpy)
640 ("python-scipy" ,python-scipy)
641 ("python-future" ,python-future)
642 ("python-click" ,python-click)
643 ("python-h5py" ,python-h5py)
644 ("python-pandas" ,python-pandas)))
645 (native-inputs
646 `(("python-nose" ,python-nose)))
647 (home-page "http://www.biom-format.org")
648 (synopsis "Biological Observation Matrix (BIOM) format utilities")
649 (description
650 "The BIOM file format is designed to be a general-use format for
651 representing counts of observations e.g. operational taxonomic units, KEGG
652 orthology groups or lipid types, in one or more biological samples
653 e.g. microbiome samples, genomes, metagenomes.")
654 (license license:bsd-3)
655 (properties `((python2-variant . ,(delay python2-biom-format))))))
656
657 (define-public python2-biom-format
658 (let ((base (package-with-python2 (strip-python2-variant python-biom-format))))
659 (package
660 (inherit base)
661 (arguments
662 `(#:phases
663 (modify-phases %standard-phases
664 ;; Do not require the unmaintained pyqi library.
665 (add-after 'unpack 'remove-pyqi
666 (lambda _
667 (substitute* "setup.py"
668 (("install_requires.append\\(\"pyqi\"\\)") "pass"))
669 #t)))
670 ,@(package-arguments base))))))
671
672 (define-public bioperl-minimal
673 (let* ((inputs `(("perl-module-build" ,perl-module-build)
674 ("perl-data-stag" ,perl-data-stag)
675 ("perl-libwww" ,perl-libwww)
676 ("perl-uri" ,perl-uri)))
677 (transitive-inputs
678 (map (compose package-name cadr)
679 (delete-duplicates
680 (concatenate
681 (map (compose package-transitive-target-inputs cadr) inputs))))))
682 (package
683 (name "bioperl-minimal")
684 (version "1.7.0")
685 (source
686 (origin
687 (method url-fetch)
688 (uri (string-append "https://github.com/bioperl/bioperl-live/"
689 "archive/release-"
690 (string-map (lambda (c)
691 (if (char=? c #\.)
692 #\- c)) version)
693 ".tar.gz"))
694 (sha256
695 (base32
696 "12phgpxwgkqflkwfb9dcqg7a31dpjlfhar8wcgv0aj5ln4akfz06"))))
697 (build-system perl-build-system)
698 (arguments
699 `(#:phases
700 (modify-phases %standard-phases
701 (add-after
702 'install 'wrap-programs
703 (lambda* (#:key outputs #:allow-other-keys)
704 ;; Make sure all executables in "bin" find the required Perl
705 ;; modules at runtime. As the PERL5LIB variable contains also
706 ;; the paths of native inputs, we pick the transitive target
707 ;; inputs from %build-inputs.
708 (let* ((out (assoc-ref outputs "out"))
709 (bin (string-append out "/bin/"))
710 (path (string-join
711 (cons (string-append out "/lib/perl5/site_perl")
712 (map (lambda (name)
713 (assoc-ref %build-inputs name))
714 ',transitive-inputs))
715 ":")))
716 (for-each (lambda (file)
717 (wrap-program file
718 `("PERL5LIB" ":" prefix (,path))))
719 (find-files bin "\\.pl$"))
720 #t))))))
721 (inputs inputs)
722 (native-inputs
723 `(("perl-test-most" ,perl-test-most)))
724 (home-page "https://metacpan.org/release/BioPerl")
725 (synopsis "Bioinformatics toolkit")
726 (description
727 "BioPerl is the product of a community effort to produce Perl code which
728 is useful in biology. Examples include Sequence objects, Alignment objects
729 and database searching objects. These objects not only do what they are
730 advertised to do in the documentation, but they also interact - Alignment
731 objects are made from the Sequence objects, Sequence objects have access to
732 Annotation and SeqFeature objects and databases, Blast objects can be
733 converted to Alignment objects, and so on. This means that the objects
734 provide a coordinated and extensible framework to do computational biology.")
735 (license license:perl-license))))
736
737 (define-public python-biopython
738 (package
739 (name "python-biopython")
740 (version "1.70")
741 (source (origin
742 (method url-fetch)
743 ;; use PyPi rather than biopython.org to ease updating
744 (uri (pypi-uri "biopython" version))
745 (sha256
746 (base32
747 "0nz4n9d2y2dg849gn1z0vjlkwcpzzkzy3fij7x94a6ixy2c54z2a"))))
748 (build-system python-build-system)
749 (arguments
750 `(#:phases
751 (modify-phases %standard-phases
752 (add-before 'check 'set-home
753 ;; Some tests require a home directory to be set.
754 (lambda _ (setenv "HOME" "/tmp") #t)))))
755 (propagated-inputs
756 `(("python-numpy" ,python-numpy)))
757 (home-page "http://biopython.org/")
758 (synopsis "Tools for biological computation in Python")
759 (description
760 "Biopython is a set of tools for biological computation including parsers
761 for bioinformatics files into Python data structures; interfaces to common
762 bioinformatics programs; a standard sequence class and tools for performing
763 common operations on them; code to perform data classification; code for
764 dealing with alignments; code making it easy to split up parallelizable tasks
765 into separate processes; and more.")
766 (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
767
768 (define-public python2-biopython
769 (package-with-python2 python-biopython))
770
771 (define-public python-fastalite
772 (package
773 (name "python-fastalite")
774 (version "0.3")
775 (source
776 (origin
777 (method url-fetch)
778 (uri (pypi-uri "fastalite" version))
779 (sha256
780 (base32
781 "1qli6pxp77i9xn2wfciq2zaxhl82bdxb33cpzqzj1z25yd036wqj"))))
782 (build-system python-build-system)
783 (arguments
784 `(#:tests? #f)) ; Test data is not distributed.
785 (home-page "https://github.com/nhoffman/fastalite")
786 (synopsis "Simplest possible FASTA parser")
787 (description "This library implements a FASTA and a FASTQ parser without
788 relying on a complex dependency tree.")
789 (license license:expat)))
790
791 (define-public python2-fastalite
792 (package-with-python2 python-fastalite))
793
794 (define-public bpp-core
795 ;; The last release was in 2014 and the recommended way to install from source
796 ;; is to clone the git repository, so we do this.
797 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
798 (let ((commit "7d8bced0d1a87291ea8dd7046b7fb5ff9c35c582"))
799 (package
800 (name "bpp-core")
801 (version (string-append "2.2.0-1." (string-take commit 7)))
802 (source (origin
803 (method git-fetch)
804 (uri (git-reference
805 (url "http://biopp.univ-montp2.fr/git/bpp-core")
806 (commit commit)))
807 (file-name (string-append name "-" version "-checkout"))
808 (sha256
809 (base32
810 "10djsq5vlnkilv436gnmh4irpk49v29pa69r6xiryg32xmvn909j"))))
811 (build-system cmake-build-system)
812 (arguments
813 `(#:parallel-build? #f))
814 (inputs
815 `(("gcc" ,gcc-5))) ; Compilation of bpp-phyl fails with GCC 4.9 so we
816 ; compile all of the bpp packages with GCC 5.
817 (home-page "http://biopp.univ-montp2.fr")
818 (synopsis "C++ libraries for Bioinformatics")
819 (description
820 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
821 analysis, phylogenetics, molecular evolution and population genetics. It is
822 Object Oriented and is designed to be both easy to use and computer efficient.
823 Bio++ intends to help programmers to write computer expensive programs, by
824 providing them a set of re-usable tools.")
825 (license license:cecill-c))))
826
827 (define-public bpp-phyl
828 ;; The last release was in 2014 and the recommended way to install from source
829 ;; is to clone the git repository, so we do this.
830 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
831 (let ((commit "0c07167b629f68b569bf274d1ad0c4af83276ae2"))
832 (package
833 (name "bpp-phyl")
834 (version (string-append "2.2.0-1." (string-take commit 7)))
835 (source (origin
836 (method git-fetch)
837 (uri (git-reference
838 (url "http://biopp.univ-montp2.fr/git/bpp-phyl")
839 (commit commit)))
840 (file-name (string-append name "-" version "-checkout"))
841 (sha256
842 (base32
843 "1ssjgchzwj3iai26kyly7gwkdv8sk59nqhkb1wpap3sf5m6kyllh"))))
844 (build-system cmake-build-system)
845 (arguments
846 `(#:parallel-build? #f
847 ;; If out-of-source, test data is not copied into the build directory
848 ;; so the tests fail.
849 #:out-of-source? #f))
850 (inputs
851 `(("bpp-core" ,bpp-core)
852 ("bpp-seq" ,bpp-seq)
853 ;; GCC 4.8 fails due to an 'internal compiler error', so we use a more
854 ;; modern GCC.
855 ("gcc" ,gcc-5)))
856 (home-page "http://biopp.univ-montp2.fr")
857 (synopsis "Bio++ phylogenetic Library")
858 (description
859 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
860 analysis, phylogenetics, molecular evolution and population genetics. This
861 library provides phylogenetics-related modules.")
862 (license license:cecill-c))))
863
864 (define-public bpp-popgen
865 ;; The last release was in 2014 and the recommended way to install from source
866 ;; is to clone the git repository, so we do this.
867 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
868 (let ((commit "e472bac9b1a148803895d747cd6d0c5904f85d9f"))
869 (package
870 (name "bpp-popgen")
871 (version (string-append "2.2.0-1." (string-take commit 7)))
872 (source (origin
873 (method git-fetch)
874 (uri (git-reference
875 (url "http://biopp.univ-montp2.fr/git/bpp-popgen")
876 (commit commit)))
877 (file-name (string-append name "-" version "-checkout"))
878 (sha256
879 (base32
880 "0yn82dzn1n5629nzja68xfrhi655709rjanyryb36vzkmymy6dw5"))))
881 (build-system cmake-build-system)
882 (arguments
883 `(#:parallel-build? #f
884 #:tests? #f)) ; There are no tests.
885 (inputs
886 `(("bpp-core" ,bpp-core)
887 ("bpp-seq" ,bpp-seq)
888 ("gcc" ,gcc-5)))
889 (home-page "http://biopp.univ-montp2.fr")
890 (synopsis "Bio++ population genetics library")
891 (description
892 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
893 analysis, phylogenetics, molecular evolution and population genetics. This
894 library provides population genetics-related modules.")
895 (license license:cecill-c))))
896
897 (define-public bpp-seq
898 ;; The last release was in 2014 and the recommended way to install from source
899 ;; is to clone the git repository, so we do this.
900 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
901 (let ((commit "6cfa07965ce152e5598a89df2fa80a75973bfa33"))
902 (package
903 (name "bpp-seq")
904 (version (string-append "2.2.0-1." (string-take commit 7)))
905 (source (origin
906 (method git-fetch)
907 (uri (git-reference
908 (url "http://biopp.univ-montp2.fr/git/bpp-seq")
909 (commit commit)))
910 (file-name (string-append name "-" version "-checkout"))
911 (sha256
912 (base32
913 "1nys5jq7jqvdg40d91wsmj3q2yzy4276cp7sp44n67p468f27zf2"))))
914 (build-system cmake-build-system)
915 (arguments
916 `(#:parallel-build? #f
917 ;; If out-of-source, test data is not copied into the build directory
918 ;; so the tests fail.
919 #:out-of-source? #f))
920 (inputs
921 `(("bpp-core" ,bpp-core)
922 ("gcc" ,gcc-5))) ; Use GCC 5 as per 'bpp-core'.
923 (home-page "http://biopp.univ-montp2.fr")
924 (synopsis "Bio++ sequence library")
925 (description
926 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
927 analysis, phylogenetics, molecular evolution and population genetics. This
928 library provides sequence-related modules.")
929 (license license:cecill-c))))
930
931 (define-public bppsuite
932 ;; The last release was in 2014 and the recommended way to install from source
933 ;; is to clone the git repository, so we do this.
934 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
935 (let ((commit "c516147f57aa50961121cd505bed52cd7603698b"))
936 (package
937 (name "bppsuite")
938 (version (string-append "2.2.0-1." (string-take commit 7)))
939 (source (origin
940 (method git-fetch)
941 (uri (git-reference
942 (url "http://biopp.univ-montp2.fr/git/bppsuite")
943 (commit commit)))
944 (file-name (string-append name "-" version "-checkout"))
945 (sha256
946 (base32
947 "1y87pxvw0jxjizhq2dr9g2r91md45k1p9ih2sl1yy1y3p934l2kb"))))
948 (build-system cmake-build-system)
949 (arguments
950 `(#:parallel-build? #f
951 #:tests? #f)) ; There are no tests.
952 (native-inputs
953 `(("groff" ,groff)
954 ("man-db" ,man-db)
955 ("texinfo" ,texinfo)))
956 (inputs
957 `(("bpp-core" ,bpp-core)
958 ("bpp-seq" ,bpp-seq)
959 ("bpp-phyl" ,bpp-phyl)
960 ("bpp-phyl" ,bpp-popgen)
961 ("gcc" ,gcc-5)))
962 (home-page "http://biopp.univ-montp2.fr")
963 (synopsis "Bioinformatics tools written with the Bio++ libraries")
964 (description
965 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
966 analysis, phylogenetics, molecular evolution and population genetics. This
967 package provides command line tools using the Bio++ library.")
968 (license license:cecill-c))))
969
970 (define-public blast+
971 (package
972 (name "blast+")
973 (version "2.6.0")
974 (source (origin
975 (method url-fetch)
976 (uri (string-append
977 "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
978 version "/ncbi-blast-" version "+-src.tar.gz"))
979 (sha256
980 (base32
981 "15n937pw5aqmyfjb6l387d18grqbb96l63d5xj4l7yyh0zbf2405"))
982 (patches (search-patches "blast+-fix-makefile.patch"))
983 (modules '((guix build utils)))
984 (snippet
985 '(begin
986 ;; Remove bundled bzip2, zlib and pcre.
987 (delete-file-recursively "c++/src/util/compress/bzip2")
988 (delete-file-recursively "c++/src/util/compress/zlib")
989 (delete-file-recursively "c++/src/util/regexp")
990 (substitute* "c++/src/util/compress/Makefile.in"
991 (("bzip2 zlib api") "api"))
992 ;; Remove useless msbuild directory
993 (delete-file-recursively
994 "c++/src/build-system/project_tree_builder/msbuild")
995 #t))))
996 (build-system gnu-build-system)
997 (arguments
998 `(;; There are two(!) tests for this massive library, and both fail with
999 ;; "unparsable timing stats".
1000 ;; ERR [127] -- [serial/datatool] datatool.sh (unparsable timing stats)
1001 ;; ERR [127] -- [serial/datatool] datatool_xml.sh (unparsable timing stats)
1002 #:tests? #f
1003 #:out-of-source? #t
1004 #:parallel-build? #f ; not supported
1005 #:phases
1006 (modify-phases %standard-phases
1007 (add-before
1008 'configure 'set-HOME
1009 ;; $HOME needs to be set at some point during the configure phase
1010 (lambda _ (setenv "HOME" "/tmp") #t))
1011 (add-after
1012 'unpack 'enter-dir
1013 (lambda _ (chdir "c++") #t))
1014 (add-after
1015 'enter-dir 'fix-build-system
1016 (lambda _
1017 (define (which* cmd)
1018 (cond ((string=? cmd "date")
1019 ;; make call to "date" deterministic
1020 "date -d @0")
1021 ((which cmd)
1022 => identity)
1023 (else
1024 (format (current-error-port)
1025 "WARNING: Unable to find absolute path for ~s~%"
1026 cmd)
1027 #f)))
1028
1029 ;; Rewrite hardcoded paths to various tools
1030 (substitute* (append '("src/build-system/configure.ac"
1031 "src/build-system/configure"
1032 "src/build-system/helpers/run_with_lock.c"
1033 "scripts/common/impl/if_diff.sh"
1034 "scripts/common/impl/run_with_lock.sh"
1035 "src/build-system/Makefile.configurables.real"
1036 "src/build-system/Makefile.in.top"
1037 "src/build-system/Makefile.meta.gmake=no"
1038 "src/build-system/Makefile.meta.in"
1039 "src/build-system/Makefile.meta_l"
1040 "src/build-system/Makefile.meta_p"
1041 "src/build-system/Makefile.meta_r"
1042 "src/build-system/Makefile.mk.in"
1043 "src/build-system/Makefile.requirements"
1044 "src/build-system/Makefile.rules_with_autodep.in")
1045 (find-files "scripts/common/check" "\\.sh$"))
1046 (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
1047 (or (which* cmd) all)))
1048
1049 (substitute* (find-files "src/build-system" "^config.*")
1050 (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
1051 (("^PATH=.*") ""))
1052
1053 ;; rewrite "/var/tmp" in check script
1054 (substitute* "scripts/common/check/check_make_unix.sh"
1055 (("/var/tmp") "/tmp"))
1056
1057 ;; do not reset PATH
1058 (substitute* (find-files "scripts/common/impl/" "\\.sh$")
1059 (("^ *PATH=.*") "")
1060 (("action=/bin/") "action=")
1061 (("export PATH") ":"))
1062 #t))
1063 (replace
1064 'configure
1065 (lambda* (#:key inputs outputs #:allow-other-keys)
1066 (let ((out (assoc-ref outputs "out"))
1067 (lib (string-append (assoc-ref outputs "lib") "/lib"))
1068 (include (string-append (assoc-ref outputs "include")
1069 "/include/ncbi-tools++")))
1070 ;; The 'configure' script doesn't recognize things like
1071 ;; '--enable-fast-install'.
1072 (zero? (system* "./configure.orig"
1073 (string-append "--with-build-root=" (getcwd) "/build")
1074 (string-append "--prefix=" out)
1075 (string-append "--libdir=" lib)
1076 (string-append "--includedir=" include)
1077 (string-append "--with-bz2="
1078 (assoc-ref inputs "bzip2"))
1079 (string-append "--with-z="
1080 (assoc-ref inputs "zlib"))
1081 (string-append "--with-pcre="
1082 (assoc-ref inputs "pcre"))
1083 ;; Each library is built twice by default, once
1084 ;; with "-static" in its name, and again
1085 ;; without.
1086 "--without-static"
1087 "--with-dll"))))))))
1088 (outputs '("out" ; 21 MB
1089 "lib" ; 226 MB
1090 "include")) ; 33 MB
1091 (inputs
1092 `(("bzip2" ,bzip2)
1093 ("zlib" ,zlib)
1094 ("pcre" ,pcre)
1095 ("perl" ,perl)
1096 ("python" ,python-wrapper)))
1097 (native-inputs
1098 `(("cpio" ,cpio)))
1099 (home-page "http://blast.ncbi.nlm.nih.gov")
1100 (synopsis "Basic local alignment search tool")
1101 (description
1102 "BLAST is a popular method of performing a DNA or protein sequence
1103 similarity search, using heuristics to produce results quickly. It also
1104 calculates an “expect value” that estimates how many matches would have
1105 occurred at a given score by chance, which can aid a user in judging how much
1106 confidence to have in an alignment.")
1107 ;; Most of the sources are in the public domain, with the following
1108 ;; exceptions:
1109 ;; * Expat:
1110 ;; * ./c++/include/util/bitset/
1111 ;; * ./c++/src/html/ncbi_menu*.js
1112 ;; * Boost license:
1113 ;; * ./c++/include/util/impl/floating_point_comparison.hpp
1114 ;; * LGPL 2+:
1115 ;; * ./c++/include/dbapi/driver/odbc/unix_odbc/
1116 ;; * ASL 2.0:
1117 ;; * ./c++/src/corelib/teamcity_*
1118 (license (list license:public-domain
1119 license:expat
1120 license:boost1.0
1121 license:lgpl2.0+
1122 license:asl2.0))))
1123
1124 (define-public bless
1125 (package
1126 (name "bless")
1127 (version "1p02")
1128 (source (origin
1129 (method url-fetch)
1130 (uri (string-append "mirror://sourceforge/bless-ec/bless.v"
1131 version ".tgz"))
1132 (sha256
1133 (base32
1134 "0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
1135 (modules '((guix build utils)))
1136 (snippet
1137 `(begin
1138 ;; Remove bundled boost, pigz, zlib, and .git directory
1139 ;; FIXME: also remove bundled sources for murmurhash3 and
1140 ;; kmc once packaged.
1141 (delete-file-recursively "boost")
1142 (delete-file-recursively "pigz")
1143 (delete-file-recursively "google-sparsehash")
1144 (delete-file-recursively "zlib")
1145 (delete-file-recursively ".git")
1146 #t))))
1147 (build-system gnu-build-system)
1148 (arguments
1149 '(#:tests? #f ;no "check" target
1150 #:make-flags
1151 (list (string-append "ZLIB="
1152 (assoc-ref %build-inputs "zlib")
1153 "/lib/libz.a")
1154 (string-append "LDFLAGS="
1155 (string-join '("-lboost_filesystem"
1156 "-lboost_system"
1157 "-lboost_iostreams"
1158 "-lz"
1159 "-fopenmp"
1160 "-std=c++11"))))
1161 #:phases
1162 (modify-phases %standard-phases
1163 (add-after 'unpack 'do-not-build-bundled-pigz
1164 (lambda* (#:key inputs outputs #:allow-other-keys)
1165 (substitute* "Makefile"
1166 (("cd pigz/pigz-2.3.3; make") ""))
1167 #t))
1168 (add-after 'unpack 'patch-paths-to-executables
1169 (lambda* (#:key inputs outputs #:allow-other-keys)
1170 (substitute* "parse_args.cpp"
1171 (("kmc_binary = .*")
1172 (string-append "kmc_binary = \""
1173 (assoc-ref outputs "out")
1174 "/bin/kmc\";"))
1175 (("pigz_binary = .*")
1176 (string-append "pigz_binary = \""
1177 (assoc-ref inputs "pigz")
1178 "/bin/pigz\";")))
1179 #t))
1180 (replace 'install
1181 (lambda* (#:key outputs #:allow-other-keys)
1182 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
1183 (for-each (lambda (file)
1184 (install-file file bin))
1185 '("bless" "kmc/bin/kmc"))
1186 #t)))
1187 (delete 'configure))))
1188 (native-inputs
1189 `(("perl" ,perl)))
1190 (inputs
1191 `(("openmpi" ,openmpi)
1192 ("boost" ,boost)
1193 ("sparsehash" ,sparsehash)
1194 ("pigz" ,pigz)
1195 ("zlib" ,zlib)))
1196 (supported-systems '("x86_64-linux"))
1197 (home-page "https://sourceforge.net/p/bless-ec/wiki/Home/")
1198 (synopsis "Bloom-filter-based error correction tool for NGS reads")
1199 (description
1200 "@dfn{Bloom-filter-based error correction solution for high-throughput
1201 sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
1202 correction tool for genomic reads produced by @dfn{Next-generation
1203 sequencing} (NGS). BLESS produces accurate correction results with much less
1204 memory compared with previous solutions and is also able to tolerate a higher
1205 false-positive rate. BLESS can extend reads like DNA assemblers to correct
1206 errors at the end of reads.")
1207 (license license:gpl3+)))
1208
1209 (define-public bowtie
1210 (package
1211 (name "bowtie")
1212 (version "2.3.2")
1213 (source (origin
1214 (method url-fetch)
1215 (uri (string-append "https://github.com/BenLangmead/bowtie2/archive/v"
1216 version ".tar.gz"))
1217 (file-name (string-append name "-" version ".tar.gz"))
1218 (sha256
1219 (base32
1220 "0hwa5r9qbglppb7sz5z79rlmmddr3n51n468jb3wh8rwjgn3yr90"))
1221 (modules '((guix build utils)))
1222 (snippet
1223 '(begin
1224 (substitute* "Makefile"
1225 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
1226 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
1227 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
1228 #t))))
1229 (build-system gnu-build-system)
1230 (inputs
1231 `(("perl" ,perl)
1232 ("perl-clone" ,perl-clone)
1233 ("perl-test-deep" ,perl-test-deep)
1234 ("perl-test-simple" ,perl-test-simple)
1235 ("python" ,python-2)
1236 ("tbb" ,tbb)
1237 ("zlib" ,zlib)))
1238 (arguments
1239 '(#:make-flags
1240 (list "allall"
1241 "WITH_TBB=1"
1242 (string-append "prefix=" (assoc-ref %outputs "out")))
1243 #:phases
1244 (modify-phases %standard-phases
1245 (delete 'configure)
1246 (replace 'check
1247 (lambda* (#:key outputs #:allow-other-keys)
1248 (zero? (system* "perl"
1249 "scripts/test/simple_tests.pl"
1250 "--bowtie2=./bowtie2"
1251 "--bowtie2-build=./bowtie2-build")))))))
1252 (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
1253 (synopsis "Fast and sensitive nucleotide sequence read aligner")
1254 (description
1255 "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
1256 reads to long reference sequences. It is particularly good at aligning reads
1257 of about 50 up to 100s or 1,000s of characters, and particularly good at
1258 aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
1259 genome with an FM Index to keep its memory footprint small: for the human
1260 genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
1261 gapped, local, and paired-end alignment modes.")
1262 (supported-systems '("x86_64-linux"))
1263 (license license:gpl3+)))
1264
1265 (define-public tophat
1266 (package
1267 (name "tophat")
1268 (version "2.1.0")
1269 (source (origin
1270 (method url-fetch)
1271 (uri (string-append
1272 "http://ccb.jhu.edu/software/tophat/downloads/tophat-"
1273 version ".tar.gz"))
1274 (sha256
1275 (base32
1276 "168zlzykq622zbgkh90a90f1bdgsxkscq2zxzbj8brq80hbjpyp7"))
1277 (patches (search-patches "tophat-build-with-later-seqan.patch"))
1278 (modules '((guix build utils)))
1279 (snippet
1280 '(begin
1281 ;; Remove bundled SeqAn and samtools
1282 (delete-file-recursively "src/SeqAn-1.3")
1283 (delete-file-recursively "src/samtools-0.1.18")
1284 #t))))
1285 (build-system gnu-build-system)
1286 (arguments
1287 '(#:parallel-build? #f ; not supported
1288 #:phases
1289 (modify-phases %standard-phases
1290 (add-after 'unpack 'use-system-samtools
1291 (lambda* (#:key inputs #:allow-other-keys)
1292 (substitute* "src/Makefile.in"
1293 (("(noinst_LIBRARIES = )\\$\\(SAMLIB\\)" _ prefix) prefix)
1294 (("\\$\\(SAMPROG\\): \\$\\(SAMLIB\\)") "")
1295 (("SAMPROG = samtools_0\\.1\\.18") "")
1296 (("\\$\\(samtools_0_1_18_SOURCES\\)") "")
1297 (("am__EXEEXT_1 = samtools_0\\.1\\.18\\$\\(EXEEXT\\)") ""))
1298 (substitute* '("src/common.cpp"
1299 "src/tophat.py")
1300 (("samtools_0.1.18") (which "samtools")))
1301 (substitute* '("src/common.h"
1302 "src/bam2fastx.cpp")
1303 (("#include \"bam.h\"") "#include <samtools/bam.h>")
1304 (("#include \"sam.h\"") "#include <samtools/sam.h>"))
1305 (substitute* '("src/bwt_map.h"
1306 "src/map2gtf.h"
1307 "src/align_status.h")
1308 (("#include <bam.h>") "#include <samtools/bam.h>")
1309 (("#include <sam.h>") "#include <samtools/sam.h>"))
1310 #t)))))
1311 (inputs
1312 `(("boost" ,boost)
1313 ("bowtie" ,bowtie)
1314 ("samtools" ,samtools-0.1)
1315 ("ncurses" ,ncurses)
1316 ("python" ,python-2)
1317 ("perl" ,perl)
1318 ("zlib" ,zlib)
1319 ("seqan" ,seqan)))
1320 (home-page "http://ccb.jhu.edu/software/tophat/index.shtml")
1321 (synopsis "Spliced read mapper for RNA-Seq data")
1322 (description
1323 "TopHat is a fast splice junction mapper for nucleotide sequence
1324 reads produced by the RNA-Seq method. It aligns RNA-Seq reads to
1325 mammalian-sized genomes using the ultra high-throughput short read
1326 aligner Bowtie, and then analyzes the mapping results to identify
1327 splice junctions between exons.")
1328 ;; TopHat is released under the Boost Software License, Version 1.0
1329 ;; See https://github.com/infphilo/tophat/issues/11#issuecomment-121589893
1330 (license license:boost1.0)))
1331
1332 (define-public bwa
1333 (package
1334 (name "bwa")
1335 (version "0.7.17")
1336 (source (origin
1337 (method url-fetch)
1338 (uri (string-append
1339 "https://github.com/lh3/bwa/releases/download/v"
1340 version "/bwa-" version ".tar.bz2"))
1341 (sha256
1342 (base32
1343 "1zfhv2zg9v1icdlq4p9ssc8k01mca5d1bd87w71py2swfi74s6yy"))))
1344 (build-system gnu-build-system)
1345 (arguments
1346 '(#:tests? #f ;no "check" target
1347 #:phases
1348 (modify-phases %standard-phases
1349 (replace 'install
1350 (lambda* (#:key outputs #:allow-other-keys)
1351 (let ((bin (string-append
1352 (assoc-ref outputs "out") "/bin"))
1353 (doc (string-append
1354 (assoc-ref outputs "out") "/share/doc/bwa"))
1355 (man (string-append
1356 (assoc-ref outputs "out") "/share/man/man1")))
1357 (install-file "bwa" bin)
1358 (install-file "README.md" doc)
1359 (install-file "bwa.1" man))
1360 #t))
1361 ;; no "configure" script
1362 (delete 'configure))))
1363 (inputs `(("zlib" ,zlib)))
1364 ;; Non-portable SSE instructions are used so building fails on platforms
1365 ;; other than x86_64.
1366 (supported-systems '("x86_64-linux"))
1367 (home-page "http://bio-bwa.sourceforge.net/")
1368 (synopsis "Burrows-Wheeler sequence aligner")
1369 (description
1370 "BWA is a software package for mapping low-divergent sequences against a
1371 large reference genome, such as the human genome. It consists of three
1372 algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
1373 designed for Illumina sequence reads up to 100bp, while the rest two for
1374 longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
1375 features such as long-read support and split alignment, but BWA-MEM, which is
1376 the latest, is generally recommended for high-quality queries as it is faster
1377 and more accurate. BWA-MEM also has better performance than BWA-backtrack for
1378 70-100bp Illumina reads.")
1379 (license license:gpl3+)))
1380
1381 (define-public bwa-pssm
1382 (package (inherit bwa)
1383 (name "bwa-pssm")
1384 (version "0.5.11")
1385 (source (origin
1386 (method url-fetch)
1387 (uri (string-append "https://github.com/pkerpedjiev/bwa-pssm/"
1388 "archive/" version ".tar.gz"))
1389 (file-name (string-append name "-" version ".tar.gz"))
1390 (sha256
1391 (base32
1392 "02p7mpbs4mlxmn84g2x4ghak638vbj4lqix2ipx5g84pz9bhdavg"))))
1393 (build-system gnu-build-system)
1394 (inputs
1395 `(("gdsl" ,gdsl)
1396 ("zlib" ,zlib)
1397 ("perl" ,perl)))
1398 (home-page "http://bwa-pssm.binf.ku.dk/")
1399 (synopsis "Burrows-Wheeler transform-based probabilistic short read mapper")
1400 (description
1401 "BWA-PSSM is a probabilistic short genomic sequence read aligner based on
1402 the use of @dfn{position specific scoring matrices} (PSSM). Like many of the
1403 existing aligners it is fast and sensitive. Unlike most other aligners,
1404 however, it is also adaptible in the sense that one can direct the alignment
1405 based on known biases within the data set. It is coded as a modification of
1406 the original BWA alignment program and shares the genome index structure as
1407 well as many of the command line options.")
1408 (license license:gpl3+)))
1409
1410 (define-public python2-bx-python
1411 (package
1412 (name "python2-bx-python")
1413 (version "0.7.3")
1414 (source (origin
1415 (method url-fetch)
1416 (uri (pypi-uri "bx-python" version))
1417 (sha256
1418 (base32
1419 "15z2w3bvnc0n4qmb9bd6d8ylc2h2nj883x2w9iixf4x3vki9b22i"))
1420 (modules '((guix build utils)))
1421 (snippet
1422 '(begin
1423 (substitute* "setup.py"
1424 ;; remove dependency on outdated "distribute" module
1425 (("^from distribute_setup import use_setuptools") "")
1426 (("^use_setuptools\\(\\)") ""))
1427 #t))))
1428 (build-system python-build-system)
1429 (arguments
1430 `(#:tests? #f ;tests fail because test data are not included
1431 #:python ,python-2))
1432 (inputs
1433 `(("python-numpy" ,python2-numpy)
1434 ("zlib" ,zlib)))
1435 (native-inputs
1436 `(("python-nose" ,python2-nose)))
1437 (home-page "http://bitbucket.org/james_taylor/bx-python/")
1438 (synopsis "Tools for manipulating biological data")
1439 (description
1440 "bx-python provides tools for manipulating biological data, particularly
1441 multiple sequence alignments.")
1442 (license license:expat)))
1443
1444 (define-public python-pysam
1445 (package
1446 (name "python-pysam")
1447 (version "0.13.0")
1448 (source (origin
1449 (method url-fetch)
1450 ;; Test data is missing on PyPi.
1451 (uri (string-append
1452 "https://github.com/pysam-developers/pysam/archive/v"
1453 version ".tar.gz"))
1454 (file-name (string-append name "-" version ".tar.gz"))
1455 (sha256
1456 (base32
1457 "0dzap2axin9cbbl0d825w294bpn00zagfm1sigamm4v2pm5bj9lp"))
1458 (modules '((guix build utils)))
1459 (snippet '(begin
1460 ;; Drop bundled htslib. TODO: Also remove samtools
1461 ;; and bcftools.
1462 (delete-file-recursively "htslib")
1463 #t))))
1464 (build-system python-build-system)
1465 (arguments
1466 `(#:modules ((ice-9 ftw)
1467 (srfi srfi-26)
1468 (guix build python-build-system)
1469 (guix build utils))
1470 #:phases
1471 (modify-phases %standard-phases
1472 (add-before 'build 'set-flags
1473 (lambda* (#:key inputs #:allow-other-keys)
1474 (setenv "HTSLIB_MODE" "external")
1475 (setenv "HTSLIB_LIBRARY_DIR"
1476 (string-append (assoc-ref inputs "htslib") "/lib"))
1477 (setenv "HTSLIB_INCLUDE_DIR"
1478 (string-append (assoc-ref inputs "htslib") "/include"))
1479 (setenv "LDFLAGS" "-lncurses")
1480 (setenv "CFLAGS" "-D_CURSES_LIB=1")
1481 #t))
1482 (replace 'check
1483 (lambda* (#:key inputs outputs #:allow-other-keys)
1484 ;; Add first subdirectory of "build" directory to PYTHONPATH.
1485 (setenv "PYTHONPATH"
1486 (string-append
1487 (getenv "PYTHONPATH")
1488 ":" (getcwd) "/build/"
1489 (car (scandir "build"
1490 (negate (cut string-prefix? "." <>))))))
1491 ;; Step out of source dir so python does not import from CWD.
1492 (with-directory-excursion "tests"
1493 (setenv "HOME" "/tmp")
1494 (and (zero? (system* "make" "-C" "pysam_data"))
1495 (zero? (system* "make" "-C" "cbcf_data"))
1496 ;; Running nosetests without explicitly asking for a
1497 ;; single process leads to a crash. Running with multiple
1498 ;; processes fails because the tests are not designed to
1499 ;; run in parallel.
1500
1501 ;; FIXME: tests keep timing out on some systems.
1502 ;; (zero? (system* "nosetests" "-v"
1503 ;; "--processes" "1"))
1504 )))))))
1505 (propagated-inputs
1506 `(("htslib" ,htslib))) ; Included from installed header files.
1507 (inputs
1508 `(("ncurses" ,ncurses)
1509 ("zlib" ,zlib)))
1510 (native-inputs
1511 `(("python-cython" ,python-cython)
1512 ;; Dependencies below are are for tests only.
1513 ("samtools" ,samtools)
1514 ("bcftools" ,bcftools)
1515 ("python-nose" ,python-nose)))
1516 (home-page "https://github.com/pysam-developers/pysam")
1517 (synopsis "Python bindings to the SAMtools C API")
1518 (description
1519 "Pysam is a Python module for reading and manipulating files in the
1520 SAM/BAM format. Pysam is a lightweight wrapper of the SAMtools C API. It
1521 also includes an interface for tabix.")
1522 (license license:expat)))
1523
1524 (define-public python2-pysam
1525 (package-with-python2 python-pysam))
1526
1527 (define-public python-twobitreader
1528 (package
1529 (name "python-twobitreader")
1530 (version "3.1.4")
1531 (source (origin
1532 (method url-fetch)
1533 (uri (pypi-uri "twobitreader" version))
1534 (sha256
1535 (base32
1536 "1q8wnj2kga9nz1lwc4w7qv52smfm536hp6mc8w6s53lhyj0mpi22"))))
1537 (build-system python-build-system)
1538 (arguments
1539 '(;; Tests are not distributed in the PyPi release.
1540 ;; TODO Try building from the Git repo or asking the upstream maintainer
1541 ;; to distribute the tests on PyPi.
1542 #:tests? #f))
1543 (native-inputs
1544 `(("python-sphinx" ,python-sphinx)))
1545 (home-page "https://github.com/benjschiller/twobitreader")
1546 (synopsis "Python library for reading .2bit files")
1547 (description
1548 "twobitreader is a Python library for reading .2bit files as used by the
1549 UCSC genome browser.")
1550 (license license:artistic2.0)))
1551
1552 (define-public python2-twobitreader
1553 (package-with-python2 python-twobitreader))
1554
1555 (define-public python-plastid
1556 (package
1557 (name "python-plastid")
1558 (version "0.4.8")
1559 (source (origin
1560 (method url-fetch)
1561 (uri (pypi-uri "plastid" version))
1562 (sha256
1563 (base32
1564 "0l24dd3q66if8yj042m4s0g95n6acn7im1imqd3p6h8ns43kxhj8"))))
1565 (build-system python-build-system)
1566 (arguments
1567 ;; Some test files are not included.
1568 `(#:tests? #f))
1569 (propagated-inputs
1570 `(("python-numpy" ,python-numpy)
1571 ("python-scipy" ,python-scipy)
1572 ("python-pandas" ,python-pandas)
1573 ("python-pysam" ,python-pysam)
1574 ("python-matplotlib" ,python-matplotlib)
1575 ("python-biopython" ,python-biopython)
1576 ("python-twobitreader" ,python-twobitreader)
1577 ("python-termcolor" ,python-termcolor)))
1578 (native-inputs
1579 `(("python-cython" ,python-cython)
1580 ("python-nose" ,python-nose)))
1581 (home-page "https://github.com/joshuagryphon/plastid")
1582 (synopsis "Python library for genomic analysis")
1583 (description
1584 "plastid is a Python library for genomic analysis – in particular,
1585 high-throughput sequencing data – with an emphasis on simplicity.")
1586 (license license:bsd-3)))
1587
1588 (define-public python2-plastid
1589 (package-with-python2 python-plastid))
1590
1591 (define-public cd-hit
1592 (package
1593 (name "cd-hit")
1594 (version "4.6.8")
1595 (source (origin
1596 (method url-fetch)
1597 (uri (string-append "https://github.com/weizhongli/cdhit"
1598 "/releases/download/V" version
1599 "/cd-hit-v" version
1600 "-2017-0621-source.tar.gz"))
1601 (sha256
1602 (base32
1603 "1b4mwm2520ixjbw57sil20f9iixzw4bkdqqwgg1fc3pzm6rz4zmn"))))
1604 (build-system gnu-build-system)
1605 (arguments
1606 `(#:tests? #f ; there are no tests
1607 #:make-flags
1608 ;; Executables are copied directly to the PREFIX.
1609 (list (string-append "PREFIX=" (assoc-ref %outputs "out") "/bin"))
1610 #:phases
1611 (modify-phases %standard-phases
1612 ;; No "configure" script
1613 (delete 'configure)
1614 ;; Remove sources of non-determinism
1615 (add-after 'unpack 'be-timeless
1616 (lambda _
1617 (substitute* "cdhit-utility.c++"
1618 ((" \\(built on \" __DATE__ \"\\)") ""))
1619 (substitute* "cdhit-common.c++"
1620 (("__DATE__") "\"0\"")
1621 (("\", %s, \" __TIME__ \"\\\\n\", date") ""))
1622 #t))
1623 ;; The "install" target does not create the target directory.
1624 (add-before 'install 'create-target-dir
1625 (lambda* (#:key outputs #:allow-other-keys)
1626 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
1627 #t)))))
1628 (inputs
1629 `(("perl" ,perl)))
1630 (home-page "http://weizhongli-lab.org/cd-hit/")
1631 (synopsis "Cluster and compare protein or nucleotide sequences")
1632 (description
1633 "CD-HIT is a program for clustering and comparing protein or nucleotide
1634 sequences. CD-HIT is designed to be fast and handle extremely large
1635 databases.")
1636 ;; The manual says: "It can be copied under the GNU General Public License
1637 ;; version 2 (GPLv2)."
1638 (license license:gpl2)))
1639
1640 (define-public clipper
1641 (package
1642 (name "clipper")
1643 (version "1.1")
1644 (source (origin
1645 (method url-fetch)
1646 (uri (string-append
1647 "https://github.com/YeoLab/clipper/archive/"
1648 version ".tar.gz"))
1649 (file-name (string-append name "-" version ".tar.gz"))
1650 (sha256
1651 (base32
1652 "0pflmsvhbf8izbgwhbhj1i7349sw1f55qpqj8ljmapp16hb0p0qi"))
1653 (modules '((guix build utils)))
1654 (snippet
1655 '(begin
1656 ;; remove unnecessary setup dependency
1657 (substitute* "setup.py"
1658 (("setup_requires = .*") ""))
1659 (for-each delete-file
1660 '("clipper/src/peaks.so"
1661 "clipper/src/readsToWiggle.so"))
1662 (delete-file-recursively "dist/")
1663 #t))))
1664 (build-system python-build-system)
1665 (arguments `(#:python ,python-2)) ; only Python 2 is supported
1666 (inputs
1667 `(("htseq" ,python2-htseq)
1668 ("python-pybedtools" ,python2-pybedtools)
1669 ("python-cython" ,python2-cython)
1670 ("python-scikit-learn" ,python2-scikit-learn)
1671 ("python-matplotlib" ,python2-matplotlib)
1672 ("python-pandas" ,python2-pandas)
1673 ("python-pysam" ,python2-pysam)
1674 ("python-numpy" ,python2-numpy)
1675 ("python-scipy" ,python2-scipy)))
1676 (native-inputs
1677 `(("python-mock" ,python2-mock) ; for tests
1678 ("python-nose" ,python2-nose) ; for tests
1679 ("python-pytz" ,python2-pytz))) ; for tests
1680 (home-page "https://github.com/YeoLab/clipper")
1681 (synopsis "CLIP peak enrichment recognition")
1682 (description
1683 "CLIPper is a tool to define peaks in CLIP-seq datasets.")
1684 (license license:gpl2)))
1685
1686 (define-public codingquarry
1687 (package
1688 (name "codingquarry")
1689 (version "2.0")
1690 (source (origin
1691 (method url-fetch)
1692 (uri (string-append
1693 "mirror://sourceforge/codingquarry/CodingQuarry_v"
1694 version ".tar.gz"))
1695 (sha256
1696 (base32
1697 "0115hkjflsnfzn36xppwf9h9avfxlavr43djqmshkkzbgjzsz60i"))))
1698 (build-system gnu-build-system)
1699 (arguments
1700 '(#:tests? #f ; no "check" target
1701 #:phases
1702 (modify-phases %standard-phases
1703 (delete 'configure)
1704 (replace 'install
1705 (lambda* (#:key outputs #:allow-other-keys)
1706 (let* ((out (assoc-ref outputs "out"))
1707 (bin (string-append out "/bin"))
1708 (doc (string-append out "/share/doc/codingquarry")))
1709 (install-file "INSTRUCTIONS.pdf" doc)
1710 (copy-recursively "QuarryFiles"
1711 (string-append out "/QuarryFiles"))
1712 (install-file "CodingQuarry" bin)
1713 (install-file "CufflinksGTF_to_CodingQuarryGFF3.py" bin)))))))
1714 (inputs `(("openmpi" ,openmpi)))
1715 (native-search-paths
1716 (list (search-path-specification
1717 (variable "QUARRY_PATH")
1718 (files '("QuarryFiles")))))
1719 (native-inputs `(("python" ,python-2))) ; Only Python 2 is supported
1720 (synopsis "Fungal gene predictor")
1721 (description "CodingQuarry is a highly accurate, self-training GHMM fungal
1722 gene predictor designed to work with assembled, aligned RNA-seq transcripts.")
1723 (home-page "https://sourceforge.net/projects/codingquarry/")
1724 (license license:gpl3+)))
1725
1726 (define-public couger
1727 (package
1728 (name "couger")
1729 (version "1.8.2")
1730 (source (origin
1731 (method url-fetch)
1732 (uri (string-append
1733 "http://couger.oit.duke.edu/static/assets/COUGER"
1734 version ".zip"))
1735 (sha256
1736 (base32
1737 "04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq"))))
1738 (build-system gnu-build-system)
1739 (arguments
1740 `(#:tests? #f
1741 #:phases
1742 (modify-phases %standard-phases
1743 (delete 'configure)
1744 (delete 'build)
1745 (replace
1746 'install
1747 (lambda* (#:key outputs #:allow-other-keys)
1748 (let* ((out (assoc-ref outputs "out"))
1749 (bin (string-append out "/bin")))
1750 (copy-recursively "src" (string-append out "/src"))
1751 (mkdir bin)
1752 ;; Add "src" directory to module lookup path.
1753 (substitute* "couger"
1754 (("from argparse")
1755 (string-append "import sys\nsys.path.append(\""
1756 out "\")\nfrom argparse")))
1757 (install-file "couger" bin))
1758 #t))
1759 (add-after
1760 'install 'wrap-program
1761 (lambda* (#:key inputs outputs #:allow-other-keys)
1762 ;; Make sure 'couger' runs with the correct PYTHONPATH.
1763 (let* ((out (assoc-ref outputs "out"))
1764 (path (getenv "PYTHONPATH")))
1765 (wrap-program (string-append out "/bin/couger")
1766 `("PYTHONPATH" ":" prefix (,path))))
1767 #t)))))
1768 (inputs
1769 `(("python" ,python-2)
1770 ("python2-pillow" ,python2-pillow)
1771 ("python2-numpy" ,python2-numpy)
1772 ("python2-scipy" ,python2-scipy)
1773 ("python2-matplotlib" ,python2-matplotlib)))
1774 (propagated-inputs
1775 `(("r-minimal" ,r-minimal)
1776 ("libsvm" ,libsvm)
1777 ("randomjungle" ,randomjungle)))
1778 (native-inputs
1779 `(("unzip" ,unzip)))
1780 (home-page "http://couger.oit.duke.edu")
1781 (synopsis "Identify co-factors in sets of genomic regions")
1782 (description
1783 "COUGER can be applied to any two sets of genomic regions bound by
1784 paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify
1785 putative co-factors that provide specificity to each TF. The framework
1786 determines the genomic targets uniquely-bound by each TF, and identifies a
1787 small set of co-factors that best explain the in vivo binding differences
1788 between the two TFs.
1789
1790 COUGER uses classification algorithms (support vector machines and random
1791 forests) with features that reflect the DNA binding specificities of putative
1792 co-factors. The features are generated either from high-throughput TF-DNA
1793 binding data (from protein binding microarray experiments), or from large
1794 collections of DNA motifs.")
1795 (license license:gpl3+)))
1796
1797 (define-public clustal-omega
1798 (package
1799 (name "clustal-omega")
1800 (version "1.2.4")
1801 (source (origin
1802 (method url-fetch)
1803 (uri (string-append "http://www.clustal.org/omega/clustal-omega-"
1804 version ".tar.gz"))
1805 (sha256
1806 (base32
1807 "1vm30mzncwdv881vrcwg11vzvrsmwy4wg80j5i0lcfk6dlld50w6"))))
1808 (build-system gnu-build-system)
1809 (inputs
1810 `(("argtable" ,argtable)))
1811 (home-page "http://www.clustal.org/omega/")
1812 (synopsis "Multiple sequence aligner for protein and DNA/RNA")
1813 (description
1814 "Clustal-Omega is a general purpose multiple sequence alignment (MSA)
1815 program for protein and DNA/RNA. It produces high quality MSAs and is capable
1816 of handling data-sets of hundreds of thousands of sequences in reasonable
1817 time.")
1818 (license license:gpl2+)))
1819
1820 (define-public crossmap
1821 (package
1822 (name "crossmap")
1823 (version "0.2.1")
1824 (source (origin
1825 (method url-fetch)
1826 (uri (string-append "mirror://sourceforge/crossmap/CrossMap-"
1827 version ".tar.gz"))
1828 (sha256
1829 (base32
1830 "07y179f63d7qnzdvkqcziwk9bs3k4zhp81q392fp1hwszjdvy22f"))
1831 ;; This patch has been sent upstream already and is available
1832 ;; for download from Sourceforge, but it has not been merged.
1833 (patches (search-patches "crossmap-allow-system-pysam.patch"))
1834 (modules '((guix build utils)))
1835 (snippet '(begin
1836 ;; remove bundled copy of pysam
1837 (delete-file-recursively "lib/pysam")
1838 #t))))
1839 (build-system python-build-system)
1840 (arguments
1841 `(#:python ,python-2
1842 #:phases
1843 (modify-phases %standard-phases
1844 (add-after 'unpack 'set-env
1845 (lambda _ (setenv "CROSSMAP_USE_SYSTEM_PYSAM" "1") #t)))))
1846 (inputs
1847 `(("python-numpy" ,python2-numpy)
1848 ("python-pysam" ,python2-pysam)
1849 ("zlib" ,zlib)))
1850 (native-inputs
1851 `(("python-cython" ,python2-cython)
1852 ("python-nose" ,python2-nose)))
1853 (home-page "http://crossmap.sourceforge.net/")
1854 (synopsis "Convert genome coordinates between assemblies")
1855 (description
1856 "CrossMap is a program for conversion of genome coordinates or annotation
1857 files between different genome assemblies. It supports most commonly used
1858 file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
1859 (license license:gpl2+)))
1860
1861 (define-public cutadapt
1862 (package
1863 (name "cutadapt")
1864 (version "1.16")
1865 (source (origin
1866 (method git-fetch)
1867 (uri (git-reference
1868 (url "https://github.com/marcelm/cutadapt.git")
1869 (commit (string-append "v" version))))
1870 (file-name (string-append name "-" version "-checkout"))
1871 (sha256
1872 (base32
1873 "09pr02067jiks19nc0aby4xp70hhgvb554i2y1c04rv1m401w7q8"))))
1874 (build-system python-build-system)
1875 (inputs
1876 `(("python-xopen" ,python-xopen)))
1877 (native-inputs
1878 `(("python-cython" ,python-cython)
1879 ("python-pytest" ,python-pytest)))
1880 (home-page "https://cutadapt.readthedocs.io/en/stable/")
1881 (synopsis "Remove adapter sequences from nucleotide sequencing reads")
1882 (description
1883 "Cutadapt finds and removes adapter sequences, primers, poly-A tails and
1884 other types of unwanted sequence from high-throughput sequencing reads.")
1885 (license license:expat)))
1886
1887 (define-public libbigwig
1888 (package
1889 (name "libbigwig")
1890 (version "0.4.2")
1891 (source (origin
1892 (method git-fetch)
1893 (uri (git-reference
1894 (url "https://github.com/dpryan79/libBigWig.git")
1895 (commit version)))
1896 (file-name (string-append name "-" version "-checkout"))
1897 (sha256
1898 (base32
1899 "0h2smg24v5srdcqzrmz2g23cmlp4va465mgx8r2z571sfz8pv454"))))
1900 (build-system gnu-build-system)
1901 (arguments
1902 `(#:test-target "test"
1903 #:tests? #f ; tests require access to the web
1904 #:make-flags
1905 (list "CC=gcc"
1906 (string-append "prefix=" (assoc-ref %outputs "out")))
1907 #:phases
1908 (modify-phases %standard-phases
1909 (delete 'configure))))
1910 (inputs
1911 `(("zlib" ,zlib)
1912 ("curl" ,curl)))
1913 (native-inputs
1914 `(("doxygen" ,doxygen)
1915 ;; Need for tests
1916 ("python" ,python-2)))
1917 (home-page "https://github.com/dpryan79/libBigWig")
1918 (synopsis "C library for handling bigWig files")
1919 (description
1920 "This package provides a C library for parsing local and remote BigWig
1921 files.")
1922 (license license:expat)))
1923
1924 (define-public python-pybigwig
1925 (package
1926 (name "python-pybigwig")
1927 (version "0.3.12")
1928 (source (origin
1929 (method url-fetch)
1930 (uri (pypi-uri "pyBigWig" version))
1931 (sha256
1932 (base32
1933 "00w4kfnm2c5l7wdwr2nj1z5djv8kzgf7h1zhsgv6njff1rwr26g0"))
1934 (modules '((guix build utils)))
1935 (snippet
1936 '(begin
1937 ;; Delete bundled libBigWig sources
1938 (delete-file-recursively "libBigWig")
1939 #t))))
1940 (build-system python-build-system)
1941 (arguments
1942 `(#:phases
1943 (modify-phases %standard-phases
1944 (add-after 'unpack 'link-with-libBigWig
1945 (lambda* (#:key inputs #:allow-other-keys)
1946 (substitute* "setup.py"
1947 (("libs=\\[") "libs=[\"BigWig\", "))
1948 #t)))))
1949 (propagated-inputs
1950 `(("python-numpy" ,python-numpy)))
1951 (inputs
1952 `(("libbigwig" ,libbigwig)
1953 ("zlib" ,zlib)
1954 ("curl" ,curl)))
1955 (home-page "https://github.com/dpryan79/pyBigWig")
1956 (synopsis "Access bigWig files in Python using libBigWig")
1957 (description
1958 "This package provides Python bindings to the libBigWig library for
1959 accessing bigWig files.")
1960 (license license:expat)))
1961
1962 (define-public python2-pybigwig
1963 (package-with-python2 python-pybigwig))
1964
1965 (define-public python-dendropy
1966 (package
1967 (name "python-dendropy")
1968 (version "4.4.0")
1969 (source
1970 (origin
1971 (method url-fetch)
1972 ;; Source from GitHub so that tests are included.
1973 (uri
1974 (string-append "https://github.com/jeetsukumaran/DendroPy/archive/v"
1975 version ".tar.gz"))
1976 (file-name (string-append name "-" version ".tar.gz"))
1977 (sha256
1978 (base32
1979 "0v2fccny5xjaah546bsch1mw4kh61qq5frz2ibllxs9mp6ih9bsn"))))
1980 (build-system python-build-system)
1981 (home-page "http://packages.python.org/DendroPy/")
1982 (synopsis "Library for phylogenetics and phylogenetic computing")
1983 (description
1984 "DendroPy is a library for phylogenetics and phylogenetic computing: reading,
1985 writing, simulation, processing and manipulation of phylogenetic
1986 trees (phylogenies) and characters.")
1987 (license license:bsd-3)))
1988
1989 (define-public python2-dendropy
1990 (package-with-python2 python-dendropy))
1991
1992 (define-public python-py2bit
1993 (package
1994 (name "python-py2bit")
1995 (version "0.2.1")
1996 (source
1997 (origin
1998 (method url-fetch)
1999 (uri (pypi-uri "py2bit" version))
2000 (sha256
2001 (base32
2002 "1cdf4qlmgwsh1f4k0wdv2sr8x9qn4366p0k3614vbd0fpqiarxrl"))))
2003 (build-system python-build-system)
2004 (home-page "https://github.com/dpryan79/py2bit")
2005 (synopsis "Access 2bit files using lib2bit")
2006 (description
2007 "This package provides Python bindings for lib2bit to access 2bit files
2008 with Python.")
2009 (license license:expat)))
2010
2011 (define-public deeptools
2012 (package
2013 (name "deeptools")
2014 (version "2.5.1")
2015 (source (origin
2016 (method url-fetch)
2017 (uri (string-append "https://github.com/deeptools/deepTools/"
2018 "archive/" version ".tar.gz"))
2019 (file-name (string-append name "-" version ".tar.gz"))
2020 (sha256
2021 (base32
2022 "1q8i12l2gvk4n2s8lhyzwhh9g4qbc8lrk5l7maz00yvd5g6z5540"))))
2023 (build-system python-build-system)
2024 (inputs
2025 `(("python-scipy" ,python-scipy)
2026 ("python-numpy" ,python-numpy)
2027 ("python-numpydoc" ,python-numpydoc)
2028 ("python-matplotlib" ,python-matplotlib)
2029 ("python-pysam" ,python-pysam)
2030 ("python-py2bit" ,python-py2bit)
2031 ("python-pybigwig" ,python-pybigwig)))
2032 (native-inputs
2033 `(("python-mock" ,python-mock) ;for tests
2034 ("python-nose" ,python-nose) ;for tests
2035 ("python-pytz" ,python-pytz))) ;for tests
2036 (home-page "https://github.com/deeptools/deepTools")
2037 (synopsis "Tools for normalizing and visualizing deep-sequencing data")
2038 (description
2039 "DeepTools addresses the challenge of handling the large amounts of data
2040 that are now routinely generated from DNA sequencing centers. To do so,
2041 deepTools contains useful modules to process the mapped reads data to create
2042 coverage files in standard bedGraph and bigWig file formats. By doing so,
2043 deepTools allows the creation of normalized coverage files or the comparison
2044 between two files (for example, treatment and control). Finally, using such
2045 normalized and standardized files, multiple visualizations can be created to
2046 identify enrichments with functional annotations of the genome.")
2047 (license license:gpl3+)))
2048
2049 (define-public delly
2050 (package
2051 (name "delly")
2052 (version "0.7.7")
2053 (source (origin
2054 (method url-fetch)
2055 (uri (string-append
2056 "https://github.com/tobiasrausch/delly/archive/v"
2057 version ".tar.gz"))
2058 (file-name (string-append name "-" version ".tar.gz"))
2059 (sha256
2060 (base32 "0dkwy3pyxmi6dhh1lpsr3698ri5sslw9qz67hfys0bz8dgrqwabj"))
2061 (patches (search-patches "delly-use-system-libraries.patch"))))
2062 (build-system gnu-build-system)
2063 (arguments
2064 `(#:tests? #f ; There are no tests to run.
2065 #:make-flags '("PARALLEL=1") ; Allow parallel execution at run-time.
2066 #:phases
2067 (modify-phases %standard-phases
2068 (delete 'configure) ; There is no configure phase.
2069 (replace 'install
2070 (lambda _
2071 (let ((bin (string-append (assoc-ref %outputs "out") "/bin"))
2072 (templates (string-append (assoc-ref %outputs "out")
2073 "/share/delly/templates")))
2074 (mkdir-p bin)
2075 (mkdir-p templates)
2076 (copy-recursively "excludeTemplates" templates)
2077 (install-file "src/cov" bin)
2078 (install-file "src/delly" bin)
2079 (install-file "src/dpe" bin)))))))
2080 (native-inputs
2081 `(("python" ,python-2)))
2082 (inputs
2083 `(("boost" ,boost)
2084 ("htslib" ,htslib)
2085 ("zlib" ,zlib)
2086 ("bzip2" ,bzip2)))
2087 (home-page "https://github.com/tobiasrausch/delly")
2088 (synopsis "Integrated structural variant prediction method")
2089 (description "Delly is an integrated structural variant prediction method
2090 that can discover and genotype deletions, tandem duplications, inversions and
2091 translocations at single-nucleotide resolution in short-read massively parallel
2092 sequencing data. It uses paired-ends and split-reads to sensitively and
2093 accurately delineate genomic rearrangements throughout the genome.")
2094 (license license:gpl3+)))
2095
2096 (define-public diamond
2097 (package
2098 (name "diamond")
2099 (version "0.9.22")
2100 (source (origin
2101 (method url-fetch)
2102 (uri (string-append
2103 "https://github.com/bbuchfink/diamond/archive/v"
2104 version ".tar.gz"))
2105 (file-name (string-append name "-" version ".tar.gz"))
2106 (sha256
2107 (base32
2108 "0adp87r9ak63frdrdmrdfhsn6g0jnnyq1lr2wibvqbxcl37iir9m"))))
2109 (build-system cmake-build-system)
2110 (arguments
2111 '(#:tests? #f ; no "check" target
2112 #:phases
2113 (modify-phases %standard-phases
2114 (add-after 'unpack 'remove-native-compilation
2115 (lambda _
2116 (substitute* "CMakeLists.txt" (("-march=native") ""))
2117 #t)))))
2118 (inputs
2119 `(("zlib" ,zlib)))
2120 (home-page "https://github.com/bbuchfink/diamond")
2121 (synopsis "Accelerated BLAST compatible local sequence aligner")
2122 (description
2123 "DIAMOND is a BLAST-compatible local aligner for mapping protein and
2124 translated DNA query sequences against a protein reference database (BLASTP
2125 and BLASTX alignment mode). The speedup over BLAST is up to 20,000 on short
2126 reads at a typical sensitivity of 90-99% relative to BLAST depending on the
2127 data and settings.")
2128 (license license:agpl3+)))
2129
2130 (define-public discrover
2131 (package
2132 (name "discrover")
2133 (version "1.6.0")
2134 (source
2135 (origin
2136 (method url-fetch)
2137 (uri (string-append "https://github.com/maaskola/discrover/archive/"
2138 version ".tar.gz"))
2139 (file-name (string-append name "-" version ".tar.gz"))
2140 (sha256
2141 (base32
2142 "0rah9ja4m0rl5mldd6vag9rwrivw1zrqxssfq8qx64m7961fp68k"))))
2143 (build-system cmake-build-system)
2144 (arguments
2145 `(#:tests? #f ; there are no tests
2146 #:phases
2147 (modify-phases %standard-phases
2148 (add-after 'unpack 'add-missing-includes
2149 (lambda _
2150 (substitute* "src/executioninformation.hpp"
2151 (("#define EXECUTIONINFORMATION_HPP" line)
2152 (string-append line "\n#include <random>")))
2153 (substitute* "src/plasma/fasta.hpp"
2154 (("#define FASTA_HPP" line)
2155 (string-append line "\n#include <random>")))
2156 #t)))))
2157 (inputs
2158 `(("boost" ,boost)
2159 ("cairo" ,cairo)))
2160 (native-inputs
2161 `(("texlive" ,texlive)
2162 ("imagemagick" ,imagemagick)))
2163 (home-page "http://dorina.mdc-berlin.de/public/rajewsky/discrover/")
2164 (synopsis "Discover discriminative nucleotide sequence motifs")
2165 (description "Discrover is a motif discovery method to find binding sites
2166 of nucleic acid binding proteins.")
2167 (license license:gpl3+)))
2168
2169 (define-public eigensoft
2170 (let ((revision "1")
2171 (commit "b14d1e202e21e532536ff8004f0419cd5e259dc7"))
2172 (package
2173 (name "eigensoft")
2174 (version (string-append "6.1.2-"
2175 revision "."
2176 (string-take commit 9)))
2177 (source
2178 (origin
2179 (method git-fetch)
2180 (uri (git-reference
2181 (url "https://github.com/DReichLab/EIG.git")
2182 (commit commit)))
2183 (file-name (string-append "eigensoft-" commit "-checkout"))
2184 (sha256
2185 (base32
2186 "0f5m6k2j5c16xc3xbywcs989xyc26ncy1zfzp9j9n55n9r4xcaiq"))
2187 (modules '((guix build utils)))
2188 ;; Remove pre-built binaries.
2189 (snippet '(begin
2190 (delete-file-recursively "bin")
2191 (mkdir "bin")
2192 #t))))
2193 (build-system gnu-build-system)
2194 (arguments
2195 `(#:tests? #f ; There are no tests.
2196 #:make-flags '("CC=gcc")
2197 #:phases
2198 (modify-phases %standard-phases
2199 ;; There is no configure phase, but the Makefile is in a
2200 ;; sub-directory.
2201 (replace 'configure
2202 (lambda _
2203 (chdir "src")
2204 ;; The link flags are incomplete.
2205 (substitute* "Makefile"
2206 (("-lgsl") "-lgsl -lm -llapack -llapacke -lpthread"))
2207 #t))
2208 ;; The provided install target only copies executables to
2209 ;; the "bin" directory in the build root.
2210 (add-after 'install 'actually-install
2211 (lambda* (#:key outputs #:allow-other-keys)
2212 (let* ((out (assoc-ref outputs "out"))
2213 (bin (string-append out "/bin")))
2214 (for-each (lambda (file)
2215 (install-file file bin))
2216 (find-files "../bin" ".*"))
2217 #t))))))
2218 (inputs
2219 `(("gsl" ,gsl)
2220 ("lapack" ,lapack)
2221 ("openblas" ,openblas)
2222 ("perl" ,perl)
2223 ("gfortran" ,gfortran "lib")))
2224 (home-page "https://github.com/DReichLab/EIG")
2225 (synopsis "Tools for population genetics")
2226 (description "The EIGENSOFT package provides tools for population
2227 genetics and stratification correction. EIGENSOFT implements methods commonly
2228 used in population genetics analyses such as PCA, computation of Tracy-Widom
2229 statistics, and finding related individuals in structured populations. It
2230 comes with a built-in plotting script and supports multiple file formats and
2231 quantitative phenotypes.")
2232 ;; The license of the eigensoft tools is Expat, but since it's
2233 ;; linking with the GNU Scientific Library (GSL) the effective
2234 ;; license is the GPL.
2235 (license license:gpl3+))))
2236
2237 (define-public edirect
2238 (package
2239 (name "edirect")
2240 (version "4.10")
2241 (source (origin
2242 (method url-fetch)
2243 (uri (string-append "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/"
2244 "versions/2016-05-03/edirect.tar.gz"))
2245 (sha256
2246 (base32
2247 "15zsprak5yh8c1yrz4r1knmb5s8qcmdid4xdhkh3lqcv64l60hli"))))
2248 (build-system perl-build-system)
2249 (arguments
2250 `(#:tests? #f ;no "check" target
2251 #:phases
2252 (modify-phases %standard-phases
2253 (delete 'configure)
2254 (delete 'build)
2255 (replace 'install
2256 (lambda* (#:key outputs #:allow-other-keys)
2257 (let ((target (string-append (assoc-ref outputs "out")
2258 "/bin")))
2259 (mkdir-p target)
2260 (install-file "edirect.pl" target)
2261 #t)))
2262 (add-after
2263 'install 'wrap-program
2264 (lambda* (#:key inputs outputs #:allow-other-keys)
2265 ;; Make sure 'edirect.pl' finds all perl inputs at runtime.
2266 (let* ((out (assoc-ref outputs "out"))
2267 (path (getenv "PERL5LIB")))
2268 (wrap-program (string-append out "/bin/edirect.pl")
2269 `("PERL5LIB" ":" prefix (,path)))))))))
2270 (inputs
2271 `(("perl-html-parser" ,perl-html-parser)
2272 ("perl-encode-locale" ,perl-encode-locale)
2273 ("perl-file-listing" ,perl-file-listing)
2274 ("perl-html-tagset" ,perl-html-tagset)
2275 ("perl-html-tree" ,perl-html-tree)
2276 ("perl-http-cookies" ,perl-http-cookies)
2277 ("perl-http-date" ,perl-http-date)
2278 ("perl-http-message" ,perl-http-message)
2279 ("perl-http-negotiate" ,perl-http-negotiate)
2280 ("perl-lwp-mediatypes" ,perl-lwp-mediatypes)
2281 ("perl-lwp-protocol-https" ,perl-lwp-protocol-https)
2282 ("perl-net-http" ,perl-net-http)
2283 ("perl-uri" ,perl-uri)
2284 ("perl-www-robotrules" ,perl-www-robotrules)
2285 ("perl" ,perl)))
2286 (home-page "http://www.ncbi.nlm.nih.gov/books/NBK179288/")
2287 (synopsis "Tools for accessing the NCBI's set of databases")
2288 (description
2289 "Entrez Direct (EDirect) is a method for accessing the National Center
2290 for Biotechnology Information's (NCBI) set of interconnected
2291 databases (publication, sequence, structure, gene, variation, expression,
2292 etc.) from a terminal. Functions take search terms from command-line
2293 arguments. Individual operations are combined to build multi-step queries.
2294 Record retrieval and formatting normally complete the process.
2295
2296 EDirect also provides an argument-driven function that simplifies the
2297 extraction of data from document summaries or other results that are returned
2298 in structured XML format. This can eliminate the need for writing custom
2299 software to answer ad hoc questions.")
2300 (license license:public-domain)))
2301
2302 (define-public exonerate
2303 (package
2304 (name "exonerate")
2305 (version "2.4.0")
2306 (source
2307 (origin
2308 (method url-fetch)
2309 (uri
2310 (string-append
2311 "http://ftp.ebi.ac.uk/pub/software/vertebrategenomics/exonerate/"
2312 "exonerate-" version ".tar.gz"))
2313 (sha256
2314 (base32
2315 "0hj0m9xygiqsdxvbg79wq579kbrx1mdrabi2bzqz2zn9qwfjcjgq"))))
2316 (build-system gnu-build-system)
2317 (arguments
2318 `(#:parallel-build? #f)) ; Building in parallel fails on some machines.
2319 (native-inputs
2320 `(("pkg-config" ,pkg-config)))
2321 (inputs
2322 `(("glib" ,glib)))
2323 (home-page
2324 "https://www.ebi.ac.uk/about/vertebrate-genomics/software/exonerate")
2325 (synopsis "Generic tool for biological sequence alignment")
2326 (description
2327 "Exonerate is a generic tool for pairwise sequence comparison. It allows
2328 the alignment of sequences using a many alignment models, either exhaustive
2329 dynamic programming or a variety of heuristics.")
2330 (license license:gpl3)))
2331
2332 (define-public express
2333 (package
2334 (name "express")
2335 (version "1.5.1")
2336 (source (origin
2337 (method url-fetch)
2338 (uri
2339 (string-append
2340 "http://bio.math.berkeley.edu/eXpress/downloads/express-"
2341 version "/express-" version "-src.tgz"))
2342 (sha256
2343 (base32
2344 "03rczxd0gjp2l1jxcmjfmf5j94j77zqyxa6x063zsc585nj40n0c"))))
2345 (build-system cmake-build-system)
2346 (arguments
2347 `(#:tests? #f ;no "check" target
2348 #:phases
2349 (modify-phases %standard-phases
2350 (add-after 'unpack 'use-shared-boost-libs-and-set-bamtools-paths
2351 (lambda* (#:key inputs #:allow-other-keys)
2352 (substitute* "CMakeLists.txt"
2353 (("set\\(Boost_USE_STATIC_LIBS ON\\)")
2354 "set(Boost_USE_STATIC_LIBS OFF)")
2355 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
2356 (string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
2357 (substitute* "src/CMakeLists.txt"
2358 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
2359 (string-append (assoc-ref inputs "bamtools") "/lib"))
2360 (("libprotobuf.a") "libprotobuf.so"))
2361 #t)))))
2362 (inputs
2363 `(("boost" ,boost)
2364 ("bamtools" ,bamtools)
2365 ("protobuf" ,protobuf)
2366 ("zlib" ,zlib)))
2367 (home-page "http://bio.math.berkeley.edu/eXpress")
2368 (synopsis "Streaming quantification for high-throughput genomic sequencing")
2369 (description
2370 "eXpress is a streaming tool for quantifying the abundances of a set of
2371 target sequences from sampled subsequences. Example applications include
2372 transcript-level RNA-Seq quantification, allele-specific/haplotype expression
2373 analysis (from RNA-Seq), transcription factor binding quantification in
2374 ChIP-Seq, and analysis of metagenomic data.")
2375 (license license:artistic2.0)))
2376
2377 (define-public express-beta-diversity
2378 (package
2379 (name "express-beta-diversity")
2380 (version "1.0.7")
2381 (source (origin
2382 (method url-fetch)
2383 (uri
2384 (string-append
2385 "https://github.com/dparks1134/ExpressBetaDiversity/archive/v"
2386 version ".tar.gz"))
2387 (file-name (string-append name "-" version ".tar.gz"))
2388 (sha256
2389 (base32
2390 "1djvdlmqvjf6h0zq7w36y8cl5cli6rgj86x65znl48agnwmzxfxr"))))
2391 (build-system gnu-build-system)
2392 (arguments
2393 `(#:phases
2394 (modify-phases %standard-phases
2395 (delete 'configure)
2396 (add-before 'build 'enter-source (lambda _ (chdir "source") #t))
2397 (replace 'check
2398 (lambda _ (zero? (system* "../bin/ExpressBetaDiversity"
2399 "-u"))))
2400 (add-after 'check 'exit-source (lambda _ (chdir "..") #t))
2401 (replace 'install
2402 (lambda* (#:key outputs #:allow-other-keys)
2403 (let ((bin (string-append (assoc-ref outputs "out")
2404 "/bin")))
2405 (mkdir-p bin)
2406 (install-file "scripts/convertToEBD.py" bin)
2407 (install-file "bin/ExpressBetaDiversity" bin)
2408 #t))))))
2409 (inputs
2410 `(("python" ,python-2)))
2411 (home-page "http://kiwi.cs.dal.ca/Software/ExpressBetaDiversity")
2412 (synopsis "Taxon- and phylogenetic-based beta diversity measures")
2413 (description
2414 "Express Beta Diversity (EBD) calculates ecological beta diversity
2415 (dissimilarity) measures between biological communities. EBD implements a
2416 variety of diversity measures including those that make use of phylogenetic
2417 similarity of community members.")
2418 (license license:gpl3+)))
2419
2420 (define-public fasttree
2421 (package
2422 (name "fasttree")
2423 (version "2.1.10")
2424 (source (origin
2425 (method url-fetch)
2426 (uri (string-append
2427 "http://www.microbesonline.org/fasttree/FastTree-"
2428 version ".c"))
2429 (sha256
2430 (base32
2431 "0vcjdvy1j4m702vmak4svbfkrpcw63k7wymfksjp9a982zy8kjsl"))))
2432 (build-system gnu-build-system)
2433 (arguments
2434 `(#:tests? #f ; no "check" target
2435 #:phases
2436 (modify-phases %standard-phases
2437 (delete 'unpack)
2438 (delete 'configure)
2439 (replace 'build
2440 (lambda* (#:key source #:allow-other-keys)
2441 (and (zero? (system* "gcc"
2442 "-O3"
2443 "-finline-functions"
2444 "-funroll-loops"
2445 "-Wall"
2446 "-o"
2447 "FastTree"
2448 source
2449 "-lm"))
2450 (zero? (system* "gcc"
2451 "-DOPENMP"
2452 "-fopenmp"
2453 "-O3"
2454 "-finline-functions"
2455 "-funroll-loops"
2456 "-Wall"
2457 "-o"
2458 "FastTreeMP"
2459 source
2460 "-lm")))))
2461 (replace 'install
2462 (lambda* (#:key outputs #:allow-other-keys)
2463 (let ((bin (string-append (assoc-ref outputs "out")
2464 "/bin")))
2465 (mkdir-p bin)
2466 (install-file "FastTree" bin)
2467 (install-file "FastTreeMP" bin)
2468 #t))))))
2469 (home-page "http://www.microbesonline.org/fasttree")
2470 (synopsis "Infers approximately-maximum-likelihood phylogenetic trees")
2471 (description
2472 "FastTree can handle alignments with up to a million of sequences in a
2473 reasonable amount of time and memory. For large alignments, FastTree is
2474 100-1,000 times faster than PhyML 3.0 or RAxML 7.")
2475 (license license:gpl2+)))
2476
2477 (define-public fastx-toolkit
2478 (package
2479 (name "fastx-toolkit")
2480 (version "0.0.14")
2481 (source (origin
2482 (method url-fetch)
2483 (uri
2484 (string-append
2485 "https://github.com/agordon/fastx_toolkit/releases/download/"
2486 version "/fastx_toolkit-" version ".tar.bz2"))
2487 (sha256
2488 (base32
2489 "01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy"))))
2490 (build-system gnu-build-system)
2491 (inputs
2492 `(("libgtextutils" ,libgtextutils)))
2493 (native-inputs
2494 `(("pkg-config" ,pkg-config)))
2495 (home-page "http://hannonlab.cshl.edu/fastx_toolkit/")
2496 (synopsis "Tools for FASTA/FASTQ file preprocessing")
2497 (description
2498 "The FASTX-Toolkit is a collection of command line tools for Short-Reads
2499 FASTA/FASTQ files preprocessing.
2500
2501 Next-Generation sequencing machines usually produce FASTA or FASTQ files,
2502 containing multiple short-reads sequences. The main processing of such
2503 FASTA/FASTQ files is mapping the sequences to reference genomes. However, it
2504 is sometimes more productive to preprocess the files before mapping the
2505 sequences to the genome---manipulating the sequences to produce better mapping
2506 results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
2507 (license license:agpl3+)))
2508
2509 (define-public flexbar
2510 (package
2511 (name "flexbar")
2512 (version "2.5")
2513 (source (origin
2514 (method url-fetch)
2515 (uri
2516 (string-append "mirror://sourceforge/flexbar/"
2517 version "/flexbar_v" version "_src.tgz"))
2518 (sha256
2519 (base32
2520 "13jaykc3y1x8y5nn9j8ljnb79s5y51kyxz46hdmvvjj6qhyympmf"))))
2521 (build-system cmake-build-system)
2522 (arguments
2523 `(#:configure-flags (list
2524 (string-append "-DFLEXBAR_BINARY_DIR="
2525 (assoc-ref %outputs "out")
2526 "/bin/"))
2527 #:phases
2528 (modify-phases %standard-phases
2529 (replace 'check
2530 (lambda* (#:key outputs #:allow-other-keys)
2531 (setenv "PATH" (string-append
2532 (assoc-ref outputs "out") "/bin:"
2533 (getenv "PATH")))
2534 (chdir "../flexbar_v2.5_src/test")
2535 (zero? (system* "bash" "flexbar_validate.sh"))))
2536 (delete 'install))))
2537 (inputs
2538 `(("tbb" ,tbb)
2539 ("zlib" ,zlib)))
2540 (native-inputs
2541 `(("pkg-config" ,pkg-config)
2542 ("seqan" ,seqan)))
2543 (home-page "http://flexbar.sourceforge.net")
2544 (synopsis "Barcode and adapter removal tool for sequencing platforms")
2545 (description
2546 "Flexbar preprocesses high-throughput nucleotide sequencing data
2547 efficiently. It demultiplexes barcoded runs and removes adapter sequences.
2548 Moreover, trimming and filtering features are provided. Flexbar increases
2549 read mapping rates and improves genome and transcriptome assemblies. It
2550 supports next-generation sequencing data in fasta/q and csfasta/q format from
2551 Illumina, Roche 454, and the SOLiD platform.")
2552 (license license:gpl3)))
2553
2554 (define-public fraggenescan
2555 (package
2556 (name "fraggenescan")
2557 (version "1.30")
2558 (source
2559 (origin
2560 (method url-fetch)
2561 (uri
2562 (string-append "mirror://sourceforge/fraggenescan/"
2563 "FragGeneScan" version ".tar.gz"))
2564 (sha256
2565 (base32 "158dcnwczgcyhwm4qlx19sanrwgdpzf6bn2y57mbpx55lkgz1mzj"))))
2566 (build-system gnu-build-system)
2567 (arguments
2568 `(#:phases
2569 (modify-phases %standard-phases
2570 (delete 'configure)
2571 (add-before 'build 'patch-paths
2572 (lambda* (#:key outputs #:allow-other-keys)
2573 (let* ((out (string-append (assoc-ref outputs "out")))
2574 (share (string-append out "/share/fraggenescan/")))
2575 (substitute* "run_FragGeneScan.pl"
2576 (("system\\(\"rm")
2577 (string-append "system(\"" (which "rm")))
2578 (("system\\(\"mv")
2579 (string-append "system(\"" (which "mv")))
2580 (("\\\"awk") (string-append "\"" (which "awk")))
2581 ;; This script and other programs expect the training files
2582 ;; to be in the non-standard location bin/train/XXX. Change
2583 ;; this to be share/fraggenescan/train/XXX instead.
2584 (("^\\$train.file = \\$dir.*")
2585 (string-append "$train_file = \""
2586 share
2587 "train/\".$FGS_train_file;")))
2588 (substitute* "run_hmm.c"
2589 (("^ strcat\\(train_dir, \\\"train/\\\"\\);")
2590 (string-append " strcpy(train_dir, \"" share "/train/\");"))))
2591 #t))
2592 (replace 'build
2593 (lambda _ (and (zero? (system* "make" "clean"))
2594 (zero? (system* "make" "fgs")))))
2595 (replace 'install
2596 (lambda* (#:key outputs #:allow-other-keys)
2597 (let* ((out (string-append (assoc-ref outputs "out")))
2598 (bin (string-append out "/bin/"))
2599 (share (string-append out "/share/fraggenescan/train")))
2600 (install-file "run_FragGeneScan.pl" bin)
2601 (install-file "FragGeneScan" bin)
2602 (copy-recursively "train" share))))
2603 (delete 'check)
2604 (add-after 'install 'post-install-check
2605 ;; In lieu of 'make check', run one of the examples and check the
2606 ;; output files gets created.
2607 (lambda* (#:key outputs #:allow-other-keys)
2608 (let* ((out (string-append (assoc-ref outputs "out")))
2609 (bin (string-append out "/bin/"))
2610 (frag (string-append bin "run_FragGeneScan.pl")))
2611 (and (zero? (system* frag ; Test complete genome.
2612 "-genome=./example/NC_000913.fna"
2613 "-out=./test2"
2614 "-complete=1"
2615 "-train=complete"))
2616 (file-exists? "test2.faa")
2617 (file-exists? "test2.ffn")
2618 (file-exists? "test2.gff")
2619 (file-exists? "test2.out")
2620 (zero? (system* ; Test incomplete sequences.
2621 frag
2622 "-genome=./example/NC_000913-fgs.ffn"
2623 "-out=out"
2624 "-complete=0"
2625 "-train=454_30")))))))))
2626 (inputs
2627 `(("perl" ,perl)
2628 ("python" ,python-2))) ;not compatible with python 3.
2629 (home-page "https://sourceforge.net/projects/fraggenescan/")
2630 (synopsis "Finds potentially fragmented genes in short reads")
2631 (description
2632 "FragGeneScan is a program for predicting bacterial and archaeal genes in
2633 short and error-prone DNA sequencing reads. It can also be applied to predict
2634 genes in incomplete assemblies or complete genomes.")
2635 ;; GPL3+ according to private correspondense with the authors.
2636 (license license:gpl3+)))
2637
2638 (define-public fxtract
2639 (let ((util-commit "776ca85a18a47492af3794745efcb4a905113115"))
2640 (package
2641 (name "fxtract")
2642 (version "2.3")
2643 (source
2644 (origin
2645 (method url-fetch)
2646 (uri (string-append
2647 "https://github.com/ctSkennerton/fxtract/archive/"
2648 version ".tar.gz"))
2649 (file-name (string-append "ctstennerton-util-"
2650 (string-take util-commit 7)
2651 "-checkout"))
2652 (sha256
2653 (base32
2654 "0275cfdhis8517hm01is62062swmi06fxzifq7mr3knbbxjlaiwj"))))
2655 (build-system gnu-build-system)
2656 (arguments
2657 `(#:make-flags (list
2658 (string-append "PREFIX=" (assoc-ref %outputs "out"))
2659 "CC=gcc")
2660 #:test-target "fxtract_test"
2661 #:phases
2662 (modify-phases %standard-phases
2663 (delete 'configure)
2664 (add-before 'build 'copy-util
2665 (lambda* (#:key inputs #:allow-other-keys)
2666 (rmdir "util")
2667 (copy-recursively (assoc-ref inputs "ctskennerton-util") "util")
2668 #t))
2669 ;; Do not use make install as this requires additional dependencies.
2670 (replace 'install
2671 (lambda* (#:key outputs #:allow-other-keys)
2672 (let* ((out (assoc-ref outputs "out"))
2673 (bin (string-append out"/bin")))
2674 (install-file "fxtract" bin)
2675 #t))))))
2676 (inputs
2677 `(("pcre" ,pcre)
2678 ("zlib" ,zlib)))
2679 (native-inputs
2680 ;; ctskennerton-util is licensed under GPL2.
2681 `(("ctskennerton-util"
2682 ,(origin
2683 (method git-fetch)
2684 (uri (git-reference
2685 (url "https://github.com/ctSkennerton/util.git")
2686 (commit util-commit)))
2687 (file-name (string-append
2688 "ctstennerton-util-" util-commit "-checkout"))
2689 (sha256
2690 (base32
2691 "0cls1hd4vgj3f36fpzzg4xc77d6f3hpc60cbpfmn2gdr7ykzzad7"))))))
2692 (home-page "https://github.com/ctSkennerton/fxtract")
2693 (synopsis "Extract sequences from FASTA and FASTQ files")
2694 (description
2695 "Fxtract extracts sequences from a protein or nucleotide fastx (FASTA
2696 or FASTQ) file given a subsequence. It uses a simple substring search for
2697 basic tasks but can change to using POSIX regular expressions, PCRE, hash
2698 lookups or multi-pattern searching as required. By default fxtract looks in
2699 the sequence of each record but can also be told to look in the header,
2700 comment or quality sections.")
2701 ;; 'util' requires SSE instructions.
2702 (supported-systems '("x86_64-linux"))
2703 (license license:expat))))
2704
2705 (define-public gemma
2706 (package
2707 (name "gemma")
2708 (version "0.96")
2709 (source (origin
2710 (method url-fetch)
2711 (uri (string-append "https://github.com/xiangzhou/GEMMA/archive/v"
2712 version ".tar.gz"))
2713 (file-name (string-append name "-" version ".tar.gz"))
2714 (sha256
2715 (base32
2716 "055ynn16gd12pf78n4vr2a9jlwsbwzajpdnf2y2yilg1krfff222"))
2717 (patches (search-patches "gemma-intel-compat.patch"))))
2718 (inputs
2719 `(("gsl" ,gsl)
2720 ("lapack" ,lapack)
2721 ("zlib" ,zlib)))
2722 (build-system gnu-build-system)
2723 (arguments
2724 `(#:make-flags
2725 '(,@(match (%current-system)
2726 ("x86_64-linux"
2727 '("FORCE_DYNAMIC=1"))
2728 ("i686-linux"
2729 '("FORCE_DYNAMIC=1" "FORCE_32BIT=1"))
2730 (_
2731 '("FORCE_DYNAMIC=1" "NO_INTEL_COMPAT=1"))))
2732 #:phases
2733 (modify-phases %standard-phases
2734 (delete 'configure)
2735 (add-before 'build 'bin-mkdir
2736 (lambda _
2737 (mkdir-p "bin")
2738 #t))
2739 (replace 'install
2740 (lambda* (#:key outputs #:allow-other-keys)
2741 (let ((out (assoc-ref outputs "out")))
2742 (install-file "bin/gemma"
2743 (string-append
2744 out "/bin")))
2745 #t)))
2746 #:tests? #f)) ; no tests included yet
2747 (home-page "https://github.com/xiangzhou/GEMMA")
2748 (synopsis "Tool for genome-wide efficient mixed model association")
2749 (description
2750 "Genome-wide Efficient Mixed Model Association (GEMMA) provides a
2751 standard linear mixed model resolver with application in genome-wide
2752 association studies (GWAS).")
2753 (license license:gpl3)))
2754
2755 (define-public grit
2756 (package
2757 (name "grit")
2758 (version "2.0.2")
2759 (source (origin
2760 (method url-fetch)
2761 (uri (string-append
2762 "https://github.com/nboley/grit/archive/"
2763 version ".tar.gz"))
2764 (file-name (string-append name "-" version ".tar.gz"))
2765 (sha256
2766 (base32
2767 "157in84dj70wimbind3x7sy1whs3h57qfgcnj2s6lrd38fbrb7mj"))))
2768 (build-system python-build-system)
2769 (arguments
2770 `(#:python ,python-2
2771 #:phases
2772 (modify-phases %standard-phases
2773 (add-after 'unpack 'generate-from-cython-sources
2774 (lambda* (#:key inputs outputs #:allow-other-keys)
2775 ;; Delete these C files to force fresh generation from pyx sources.
2776 (delete-file "grit/sparsify_support_fns.c")
2777 (delete-file "grit/call_peaks_support_fns.c")
2778 (substitute* "setup.py"
2779 (("Cython.Setup") "Cython.Build")
2780 ;; Add numpy include path to fix compilation
2781 (("pyx\", \\]")
2782 (string-append "pyx\", ], include_dirs = ['"
2783 (assoc-ref inputs "python-numpy")
2784 "/lib/python2.7/site-packages/numpy/core/include/"
2785 "']")))
2786 #t)))))
2787 (inputs
2788 `(("python-scipy" ,python2-scipy)
2789 ("python-numpy" ,python2-numpy)
2790 ("python-pysam" ,python2-pysam)
2791 ("python-networkx" ,python2-networkx)))
2792 (native-inputs
2793 `(("python-cython" ,python2-cython)))
2794 (home-page "http://grit-bio.org")
2795 (synopsis "Tool for integrative analysis of RNA-seq type assays")
2796 (description
2797 "GRIT is designed to use RNA-seq, TES, and TSS data to build and quantify
2798 full length transcript models. When none of these data sources are available,
2799 GRIT can be run by providing a candidate set of TES or TSS sites. In
2800 addition, GRIT can merge in reference junctions and gene boundaries. GRIT can
2801 also be run in quantification mode, where it uses a provided GTF file and just
2802 estimates transcript expression.")
2803 (license license:gpl3+)))
2804
2805 (define-public hisat
2806 (package
2807 (name "hisat")
2808 (version "0.1.4")
2809 (source (origin
2810 (method url-fetch)
2811 (uri (string-append
2812 "http://ccb.jhu.edu/software/hisat/downloads/hisat-"
2813 version "-beta-source.zip"))
2814 (sha256
2815 (base32
2816 "1k381ydranqxp09yf2y7w1d0chz5d59vb6jchi89hbb0prq19lk5"))))
2817 (build-system gnu-build-system)
2818 (arguments
2819 `(#:tests? #f ;no check target
2820 #:make-flags '("allall"
2821 ;; Disable unsupported `popcnt' instructions on
2822 ;; architectures other than x86_64
2823 ,@(if (string-prefix? "x86_64"
2824 (or (%current-target-system)
2825 (%current-system)))
2826 '()
2827 '("POPCNT_CAPABILITY=0")))
2828 #:phases
2829 (modify-phases %standard-phases
2830 (add-after 'unpack 'patch-sources
2831 (lambda _
2832 ;; XXX Cannot use snippet because zip files are not supported
2833 (substitute* "Makefile"
2834 (("^CC = .*$") "CC = gcc")
2835 (("^CPP = .*$") "CPP = g++")
2836 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
2837 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
2838 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
2839 (substitute* '("hisat-build" "hisat-inspect")
2840 (("/usr/bin/env") (which "env")))
2841 #t))
2842 (replace 'install
2843 (lambda* (#:key outputs #:allow-other-keys)
2844 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
2845 (for-each (lambda (file)
2846 (install-file file bin))
2847 (find-files
2848 "."
2849 "hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$")))
2850 #t))
2851 (delete 'configure))))
2852 (native-inputs
2853 `(("unzip" ,unzip)))
2854 (inputs
2855 `(("perl" ,perl)
2856 ("python" ,python)
2857 ("zlib" ,zlib)))
2858 ;; Non-portable SSE instructions are used so building fails on platforms
2859 ;; other than x86_64.
2860 (supported-systems '("x86_64-linux"))
2861 (home-page "http://ccb.jhu.edu/software/hisat/index.shtml")
2862 (synopsis "Hierarchical indexing for spliced alignment of transcripts")
2863 (description
2864 "HISAT is a fast and sensitive spliced alignment program for mapping
2865 RNA-seq reads. In addition to one global FM index that represents a whole
2866 genome, HISAT uses a large set of small FM indexes that collectively cover the
2867 whole genome. These small indexes (called local indexes) combined with
2868 several alignment strategies enable effective alignment of RNA-seq reads, in
2869 particular, reads spanning multiple exons.")
2870 (license license:gpl3+)))
2871
2872 (define-public hisat2
2873 (package
2874 (name "hisat2")
2875 (version "2.0.5")
2876 (source
2877 (origin
2878 (method url-fetch)
2879 ;; FIXME: a better source URL is
2880 ;; (string-append "ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2"
2881 ;; "/downloads/hisat2-" version "-source.zip")
2882 ;; with hash "0lywnr8kijwsc2aw10dwxic0n0yvip6fl3rjlvc8zzwahamy4x7g"
2883 ;; but it is currently unavailable.
2884 (uri "https://github.com/infphilo/hisat2/archive/cba6e8cb.tar.gz")
2885 (file-name (string-append name "-" version ".tar.gz"))
2886 (sha256
2887 (base32
2888 "1mf2hdsyv7cd97xm9mp9a4qws02yrj95y6w6f6cdwnq0klp81r50"))))
2889 (build-system gnu-build-system)
2890 (arguments
2891 `(#:tests? #f ; no check target
2892 #:make-flags (list "CC=gcc" "CXX=g++" "allall")
2893 #:modules ((guix build gnu-build-system)
2894 (guix build utils)
2895 (srfi srfi-26))
2896 #:phases
2897 (modify-phases %standard-phases
2898 (add-after 'unpack 'make-deterministic
2899 (lambda _
2900 (substitute* "Makefile"
2901 (("`date`") "0"))
2902 #t))
2903 (delete 'configure)
2904 (replace 'install
2905 (lambda* (#:key outputs #:allow-other-keys)
2906 (let* ((out (assoc-ref outputs "out"))
2907 (bin (string-append out "/bin/"))
2908 (doc (string-append out "/share/doc/hisat2/")))
2909 (for-each
2910 (cut install-file <> bin)
2911 (find-files "."
2912 "hisat2(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))
2913 (mkdir-p doc)
2914 (install-file "doc/manual.inc.html" doc))
2915 #t)))))
2916 (native-inputs
2917 `(("unzip" ,unzip) ; needed for archive from ftp
2918 ("perl" ,perl)
2919 ("pandoc" ,ghc-pandoc))) ; for documentation
2920 (home-page "http://ccb.jhu.edu/software/hisat2/index.shtml")
2921 (synopsis "Graph-based alignment of genomic sequencing reads")
2922 (description "HISAT2 is a fast and sensitive alignment program for mapping
2923 next-generation sequencing reads (both DNA and RNA) to a population of human
2924 genomes (as well as to a single reference genome). In addition to using one
2925 global @dfn{graph FM} (GFM) index that represents a population of human
2926 genomes, HISAT2 uses a large set of small GFM indexes that collectively cover
2927 the whole genome. These small indexes, combined with several alignment
2928 strategies, enable rapid and accurate alignment of sequencing reads. This new
2929 indexing scheme is called a @dfn{Hierarchical Graph FM index} (HGFM).")
2930 ;; HISAT2 contains files from Bowtie2, which is released under
2931 ;; GPLv2 or later. The HISAT2 source files are released under
2932 ;; GPLv3 or later.
2933 (license license:gpl3+)))
2934
2935 (define-public hmmer
2936 (package
2937 (name "hmmer")
2938 (version "3.1b2")
2939 (source
2940 (origin
2941 (method url-fetch)
2942 (uri (string-append
2943 "http://eddylab.org/software/hmmer"
2944 (version-major version) "/"
2945 version "/hmmer-" version ".tar.gz"))
2946 (sha256
2947 (base32
2948 "0djmgc0pfli0jilfx8hql1axhwhqxqb8rxg2r5rg07aw73sfs5nx"))
2949 (patches (search-patches "hmmer-remove-cpu-specificity.patch"))))
2950 (build-system gnu-build-system)
2951 (native-inputs `(("perl" ,perl)))
2952 (home-page "http://hmmer.org/")
2953 (synopsis "Biosequence analysis using profile hidden Markov models")
2954 (description
2955 "HMMER is used for searching sequence databases for homologs of protein
2956 sequences, and for making protein sequence alignments. It implements methods
2957 using probabilistic models called profile hidden Markov models (profile
2958 HMMs).")
2959 (license (list license:gpl3+
2960 ;; The bundled library 'easel' is distributed
2961 ;; under The Janelia Farm Software License.
2962 (license:non-copyleft
2963 "file://easel/LICENSE"
2964 "See easel/LICENSE in the distribution.")))))
2965
2966 (define-public htseq
2967 (package
2968 (name "htseq")
2969 (version "0.9.1")
2970 (source (origin
2971 (method url-fetch)
2972 (uri (pypi-uri "HTSeq" version))
2973 (sha256
2974 (base32
2975 "11flgb1381xdhk43bzbfm3vhnszkpqg6jk76rpa5xd1zbrvvlnxg"))))
2976 (build-system python-build-system)
2977 (native-inputs
2978 `(("python-cython" ,python-cython)))
2979 ;; Numpy needs to be propagated when htseq is used as a Python library.
2980 (propagated-inputs
2981 `(("python-numpy" ,python-numpy)))
2982 (inputs
2983 `(("python-pysam" ,python-pysam)
2984 ("python-matplotlib" ,python-matplotlib)))
2985 (home-page "http://www-huber.embl.de/users/anders/HTSeq/")
2986 (synopsis "Analysing high-throughput sequencing data with Python")
2987 (description
2988 "HTSeq is a Python package that provides infrastructure to process data
2989 from high-throughput sequencing assays.")
2990 (license license:gpl3+)))
2991
2992 (define-public python2-htseq
2993 (package-with-python2 htseq))
2994
2995 (define-public java-htsjdk
2996 (package
2997 (name "java-htsjdk")
2998 (version "2.3.0") ; last version without build dependency on gradle
2999 (source (origin
3000 (method url-fetch)
3001 (uri (string-append
3002 "https://github.com/samtools/htsjdk/archive/"
3003 version ".tar.gz"))
3004 (file-name (string-append name "-" version ".tar.gz"))
3005 (sha256
3006 (base32
3007 "1ibhzzxsfc38nqyk9r8zqj6blfc1kh26iirypd4q6n90hs2m6nyq"))
3008 (modules '((guix build utils)))
3009 (snippet
3010 ;; Delete pre-built binaries
3011 '(begin
3012 (delete-file-recursively "lib")
3013 (mkdir-p "lib")
3014 #t))))
3015 (build-system ant-build-system)
3016 (arguments
3017 `(#:tests? #f ; test require Internet access
3018 #:jdk ,icedtea-8
3019 #:make-flags
3020 (list (string-append "-Ddist=" (assoc-ref %outputs "out")
3021 "/share/java/htsjdk/"))
3022 #:build-target "all"
3023 #:phases
3024 (modify-phases %standard-phases
3025 ;; The build phase also installs the jars
3026 (delete 'install))))
3027 (inputs
3028 `(("java-ngs" ,java-ngs)
3029 ("java-snappy-1" ,java-snappy-1)
3030 ("java-commons-compress" ,java-commons-compress)
3031 ("java-commons-logging-minimal" ,java-commons-logging-minimal)
3032 ("java-commons-jexl-2" ,java-commons-jexl-2)
3033 ("java-xz" ,java-xz)))
3034 (native-inputs
3035 `(("java-testng" ,java-testng)))
3036 (home-page "http://samtools.github.io/htsjdk/")
3037 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
3038 (description
3039 "HTSJDK is an implementation of a unified Java library for accessing
3040 common file formats, such as SAM and VCF, used for high-throughput
3041 sequencing (HTS) data. There are also an number of useful utilities for
3042 manipulating HTS data.")
3043 (license license:expat)))
3044
3045 (define-public java-htsjdk-latest
3046 (package
3047 (name "java-htsjdk")
3048 (version "2.14.3")
3049 (source (origin
3050 (method git-fetch)
3051 (uri (git-reference
3052 (url "https://github.com/samtools/htsjdk.git")
3053 (commit version)))
3054 (file-name (string-append name "-" version "-checkout"))
3055 (sha256
3056 (base32
3057 "1lmya1fdjy03mz6zmdmd86j9v9vfhqb3952mqq075navx1i6g4bc"))))
3058 (build-system ant-build-system)
3059 (arguments
3060 `(#:tests? #f ; test require Scala
3061 #:jdk ,icedtea-8
3062 #:jar-name "htsjdk.jar"
3063 #:phases
3064 (modify-phases %standard-phases
3065 (add-after 'unpack 'remove-useless-build.xml
3066 (lambda _ (delete-file "build.xml") #t))
3067 ;; The tests require the scalatest package.
3068 (add-after 'unpack 'remove-tests
3069 (lambda _ (delete-file-recursively "src/test") #t)))))
3070 (inputs
3071 `(("java-ngs" ,java-ngs)
3072 ("java-snappy-1" ,java-snappy-1)
3073 ("java-commons-compress" ,java-commons-compress)
3074 ("java-commons-logging-minimal" ,java-commons-logging-minimal)
3075 ("java-commons-jexl-2" ,java-commons-jexl-2)
3076 ("java-xz" ,java-xz)))
3077 (native-inputs
3078 `(("java-junit" ,java-junit)))
3079 (home-page "http://samtools.github.io/htsjdk/")
3080 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
3081 (description
3082 "HTSJDK is an implementation of a unified Java library for accessing
3083 common file formats, such as SAM and VCF, used for high-throughput
3084 sequencing (HTS) data. There are also an number of useful utilities for
3085 manipulating HTS data.")
3086 (license license:expat)))
3087
3088 ;; This is needed for picard 2.10.3
3089 (define-public java-htsjdk-2.10.1
3090 (package (inherit java-htsjdk-latest)
3091 (name "java-htsjdk")
3092 (version "2.10.1")
3093 (source (origin
3094 (method git-fetch)
3095 (uri (git-reference
3096 (url "https://github.com/samtools/htsjdk.git")
3097 (commit version)))
3098 (file-name (string-append name "-" version "-checkout"))
3099 (sha256
3100 (base32
3101 "1kxh7slm2pm3x9p6jxa1wqsq9a31dhiiflhxnxqcisan4k3rwia2"))))
3102 (build-system ant-build-system)
3103 (arguments
3104 `(#:tests? #f ; tests require Scala
3105 #:jdk ,icedtea-8
3106 #:jar-name "htsjdk.jar"
3107 #:phases
3108 (modify-phases %standard-phases
3109 (add-after 'unpack 'remove-useless-build.xml
3110 (lambda _ (delete-file "build.xml") #t))
3111 ;; The tests require the scalatest package.
3112 (add-after 'unpack 'remove-tests
3113 (lambda _ (delete-file-recursively "src/test") #t)))))))
3114
3115 ;; This version matches java-htsjdk 2.3.0. Later versions also require a more
3116 ;; recent version of java-htsjdk, which depends on gradle.
3117 (define-public java-picard
3118 (package
3119 (name "java-picard")
3120 (version "2.3.0")
3121 (source (origin
3122 (method git-fetch)
3123 (uri (git-reference
3124 (url "https://github.com/broadinstitute/picard.git")
3125 (commit version)))
3126 (file-name (string-append "java-picard-" version "-checkout"))
3127 (sha256
3128 (base32
3129 "1ll7mf4r3by92w2nhlmpa591xd1f46xlkwh59mq6fvbb5pdwzvx6"))
3130 (modules '((guix build utils)))
3131 (snippet
3132 '(begin
3133 ;; Delete pre-built binaries.
3134 (delete-file-recursively "lib")
3135 (mkdir-p "lib")
3136 (substitute* "build.xml"
3137 ;; Remove build-time dependency on git.
3138 (("failifexecutionfails=\"true\"")
3139 "failifexecutionfails=\"false\"")
3140 ;; Use our htsjdk.
3141 (("depends=\"compile-htsjdk, ")
3142 "depends=\"")
3143 (("depends=\"compile-htsjdk-tests, ")
3144 "depends=\"")
3145 ;; Build picard-lib.jar before building picard.jar
3146 (("name=\"picard-jar\" depends=\"" line)
3147 (string-append line "picard-lib-jar, ")))
3148 #t))))
3149 (build-system ant-build-system)
3150 (arguments
3151 `(#:build-target "picard-jar"
3152 #:test-target "test"
3153 ;; Tests require jacoco:coverage.
3154 #:tests? #f
3155 #:make-flags
3156 (list (string-append "-Dhtsjdk_lib_dir="
3157 (assoc-ref %build-inputs "java-htsjdk")
3158 "/share/java/htsjdk/")
3159 "-Dhtsjdk-classes=dist/tmp"
3160 (string-append "-Dhtsjdk-version="
3161 ,(package-version java-htsjdk)))
3162 #:jdk ,icedtea-8
3163 #:phases
3164 (modify-phases %standard-phases
3165 ;; FIXME: this phase fails with "duplicate entry: htsjdk/samtools/AbstractBAMFileIndex$1.class"
3166 (delete 'generate-jar-indices)
3167 (add-after 'unpack 'use-our-htsjdk
3168 (lambda* (#:key inputs #:allow-other-keys)
3169 (substitute* "build.xml"
3170 (("\\$\\{htsjdk\\}/lib")
3171 (string-append (assoc-ref inputs "java-htsjdk")
3172 "/share/java/htsjdk/")))
3173 #t))
3174 (add-after 'unpack 'make-test-target-independent
3175 (lambda* (#:key inputs #:allow-other-keys)
3176 (substitute* "build.xml"
3177 (("name=\"test\" depends=\"compile, ")
3178 "name=\"test\" depends=\""))
3179 #t))
3180 (replace 'install (install-jars "dist")))))
3181 (inputs
3182 `(("java-htsjdk" ,java-htsjdk)
3183 ("java-guava" ,java-guava)))
3184 (native-inputs
3185 `(("java-testng" ,java-testng)))
3186 (home-page "http://broadinstitute.github.io/picard/")
3187 (synopsis "Tools for manipulating high-throughput sequencing data and formats")
3188 (description "Picard is a set of Java command line tools for manipulating
3189 high-throughput sequencing (HTS) data and formats. Picard is implemented
3190 using the HTSJDK Java library to support accessing file formats that are
3191 commonly used for high-throughput sequencing data such as SAM, BAM, CRAM and
3192 VCF.")
3193 (license license:expat)))
3194
3195 ;; This is needed for dropseq-tools
3196 (define-public java-picard-2.10.3
3197 (package
3198 (name "java-picard")
3199 (version "2.10.3")
3200 (source (origin
3201 (method git-fetch)
3202 (uri (git-reference
3203 (url "https://github.com/broadinstitute/picard.git")
3204 (commit version)))
3205 (file-name (string-append "java-picard-" version "-checkout"))
3206 (sha256
3207 (base32
3208 "1ajlx31l6i1k3y2rhnmgq07sz99g2czqfqgkr9mihmdjp3gwjhvi"))))
3209 (build-system ant-build-system)
3210 (arguments
3211 `(#:jar-name "picard.jar"
3212 ;; Tests require jacoco:coverage.
3213 #:tests? #f
3214 #:jdk ,icedtea-8
3215 #:main-class "picard.cmdline.PicardCommandLine"
3216 #:modules ((guix build ant-build-system)
3217 (guix build utils)
3218 (guix build java-utils)
3219 (sxml simple)
3220 (sxml transform)
3221 (sxml xpath))
3222 #:phases
3223 (modify-phases %standard-phases
3224 ;; FIXME: this phase fails with "duplicate entry: htsjdk/samtools/AbstractBAMFileIndex$1.class"
3225 (delete 'generate-jar-indices)
3226 (add-after 'unpack 'remove-useless-build.xml
3227 (lambda _ (delete-file "build.xml") #t))
3228 ;; This is necessary to ensure that htsjdk is found when using
3229 ;; picard.jar as an executable.
3230 (add-before 'build 'edit-classpath-in-manifest
3231 (lambda* (#:key inputs #:allow-other-keys)
3232 (chmod "build.xml" #o664)
3233 (call-with-output-file "build.xml.new"
3234 (lambda (port)
3235 (sxml->xml
3236 (pre-post-order
3237 (with-input-from-file "build.xml"
3238 (lambda _ (xml->sxml #:trim-whitespace? #t)))
3239 `((target . ,(lambda (tag . kids)
3240 (let ((name ((sxpath '(name *text*))
3241 (car kids)))
3242 ;; FIXME: We're breaking the line
3243 ;; early with a dummy path to
3244 ;; ensure that the store reference
3245 ;; isn't broken apart and can still
3246 ;; be found by the reference
3247 ;; scanner.
3248 (msg (format #f
3249 "\
3250 Class-Path: /~a \
3251 ~a/share/java/htsjdk.jar${line.separator}${line.separator}"
3252 ;; maximum line length is 70
3253 (string-tabulate (const #\b) 57)
3254 (assoc-ref inputs "java-htsjdk"))))
3255 (if (member "manifest" name)
3256 `(,tag ,@kids
3257 (replaceregexp
3258 (@ (file "${manifest.file}")
3259 (match "\\r\\n\\r\\n")
3260 (replace "${line.separator}")))
3261 (echo
3262 (@ (message ,msg)
3263 (file "${manifest.file}")
3264 (append "true"))))
3265 `(,tag ,@kids)))))
3266 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
3267 (*text* . ,(lambda (_ txt) txt))))
3268 port)))
3269 (rename-file "build.xml.new" "build.xml")
3270 #t)))))
3271 (propagated-inputs
3272 `(("java-htsjdk" ,java-htsjdk-2.10.1)))
3273 (native-inputs
3274 `(("java-testng" ,java-testng)
3275 ("java-guava" ,java-guava)))
3276 (home-page "http://broadinstitute.github.io/picard/")
3277 (synopsis "Tools for manipulating high-throughput sequencing data and formats")
3278 (description "Picard is a set of Java command line tools for manipulating
3279 high-throughput sequencing (HTS) data and formats. Picard is implemented
3280 using the HTSJDK Java library to support accessing file formats that are
3281 commonly used for high-throughput sequencing data such as SAM, BAM, CRAM and
3282 VCF.")
3283 (license license:expat)))
3284
3285 ;; This is the last version of Picard to provide net.sf.samtools
3286 (define-public java-picard-1.113
3287 (package (inherit java-picard)
3288 (name "java-picard")
3289 (version "1.113")
3290 (source (origin
3291 (method git-fetch)
3292 (uri (git-reference
3293 (url "https://github.com/broadinstitute/picard.git")
3294 (commit version)))
3295 (file-name (string-append "java-picard-" version "-checkout"))
3296 (sha256
3297 (base32
3298 "0lkpvin2fz3hhly4l02kk56fqy8lmlgyzr9kmvljk6ry6l1hw973"))
3299 (modules '((guix build utils)))
3300 (snippet
3301 '(begin
3302 ;; Delete pre-built binaries.
3303 (delete-file-recursively "lib")
3304 (mkdir-p "lib")
3305 #t))))
3306 (build-system ant-build-system)
3307 (arguments
3308 `(#:build-target "picard-jar"
3309 #:test-target "test"
3310 ;; FIXME: the class path at test time is wrong.
3311 ;; [testng] Error: A JNI error has occurred, please check your installation and try again
3312 ;; [testng] Exception in thread "main" java.lang.NoClassDefFoundError: com/beust/jcommander/ParameterException
3313 #:tests? #f
3314 #:jdk ,icedtea-8
3315 ;; This is only used for tests.
3316 #:make-flags
3317 (list "-Dsamjdk.intel_deflater_so_path=lib/jni/libIntelDeflater.so")
3318 #:phases
3319 (modify-phases %standard-phases
3320 ;; FIXME: This phase fails.
3321 (delete 'generate-jar-indices)
3322 ;; Do not use bundled ant bzip2.
3323 (add-after 'unpack 'use-ant-bzip
3324 (lambda* (#:key inputs #:allow-other-keys)
3325 (substitute* "build.xml"
3326 (("\\$\\{lib\\}/apache-ant-1.8.2-bzip2.jar")
3327 (string-append (assoc-ref inputs "ant")
3328 "/lib/ant.jar")))
3329 #t))
3330 (add-after 'unpack 'make-test-target-independent
3331 (lambda* (#:key inputs #:allow-other-keys)
3332 (substitute* "build.xml"
3333 (("name=\"test\" depends=\"compile, ")
3334 "name=\"test\" depends=\"compile-tests, ")
3335 (("name=\"compile\" depends=\"compile-src, compile-tests\"")
3336 "name=\"compile\" depends=\"compile-src\""))
3337 #t))
3338 (add-after 'unpack 'fix-deflater-path
3339 (lambda* (#:key outputs #:allow-other-keys)
3340 (substitute* "src/java/net/sf/samtools/Defaults.java"
3341 (("getStringProperty\\(\"intel_deflater_so_path\", null\\)")
3342 (string-append "getStringProperty(\"intel_deflater_so_path\", \""
3343 (assoc-ref outputs "out")
3344 "/lib/jni/libIntelDeflater.so"
3345 "\")")))
3346 #t))
3347 ;; Build the deflater library, because we've previously deleted the
3348 ;; pre-built one. This can only be built with access to the JDK
3349 ;; sources.
3350 (add-after 'build 'build-jni
3351 (lambda* (#:key inputs #:allow-other-keys)
3352 (mkdir-p "lib/jni")
3353 (mkdir-p "jdk-src")
3354 (and (zero? (system* "tar" "--strip-components=1" "-C" "jdk-src"
3355 "-xf" (assoc-ref inputs "jdk-src")))
3356 (zero? (system* "javah" "-jni"
3357 "-classpath" "classes"
3358 "-d" "lib/"
3359 "net.sf.samtools.util.zip.IntelDeflater"))
3360 (with-directory-excursion "src/c/inteldeflater"
3361 (zero? (system* "gcc" "-I../../../lib" "-I."
3362 (string-append "-I" (assoc-ref inputs "jdk")
3363 "/include/linux")
3364 "-I../../../jdk-src/src/share/native/common/"
3365 "-I../../../jdk-src/src/solaris/native/common/"
3366 "-c" "-O3" "-fPIC" "IntelDeflater.c"))
3367 (zero? (system* "gcc" "-shared"
3368 "-o" "../../../lib/jni/libIntelDeflater.so"
3369 "IntelDeflater.o" "-lz" "-lstdc++"))))))
3370 ;; We can only build everything else after building the JNI library.
3371 (add-after 'build-jni 'build-rest
3372 (lambda* (#:key make-flags #:allow-other-keys)
3373 (zero? (apply system* `("ant" "all" ,@make-flags)))))
3374 (add-before 'build 'set-JAVA6_HOME
3375 (lambda _
3376 (setenv "JAVA6_HOME" (getenv "JAVA_HOME"))
3377 #t))
3378 (replace 'install (install-jars "dist"))
3379 (add-after 'install 'install-jni-lib
3380 (lambda* (#:key outputs #:allow-other-keys)
3381 (let ((jni (string-append (assoc-ref outputs "out")
3382 "/lib/jni")))
3383 (mkdir-p jni)
3384 (install-file "lib/jni/libIntelDeflater.so" jni)
3385 #t))))))
3386 (inputs
3387 `(("java-snappy-1" ,java-snappy-1)
3388 ("java-commons-jexl-2" ,java-commons-jexl-2)
3389 ("java-cofoja" ,java-cofoja)
3390 ("ant" ,ant) ; for bzip2 support at runtime
3391 ("zlib" ,zlib)))
3392 (native-inputs
3393 `(("ant-apache-bcel" ,ant-apache-bcel)
3394 ("ant-junit" ,ant-junit)
3395 ("java-testng" ,java-testng)
3396 ("java-commons-bcel" ,java-commons-bcel)
3397 ("java-jcommander" ,java-jcommander)
3398 ("jdk" ,icedtea-8 "jdk")
3399 ("jdk-src" ,(car (assoc-ref (package-native-inputs icedtea-8) "jdk-drop")))))))
3400
3401 (define-public fastqc
3402 (package
3403 (name "fastqc")
3404 (version "0.11.5")
3405 (source
3406 (origin
3407 (method url-fetch)
3408 (uri (string-append "http://www.bioinformatics.babraham.ac.uk/"
3409 "projects/fastqc/fastqc_v"
3410 version "_source.zip"))
3411 (sha256
3412 (base32
3413 "18rrlkhcrxvvvlapch4dpj6xc6mpayzys8qfppybi8jrpgx5cc5f"))))
3414 (build-system ant-build-system)
3415 (arguments
3416 `(#:tests? #f ; there are no tests
3417 #:build-target "build"
3418 #:phases
3419 (modify-phases %standard-phases
3420 (add-after 'unpack 'fix-dependencies
3421 (lambda* (#:key inputs #:allow-other-keys)
3422 (substitute* "build.xml"
3423 (("jbzip2-0.9.jar")
3424 (string-append (assoc-ref inputs "java-jbzip2")
3425 "/share/java/jbzip2.jar"))
3426 (("sam-1.103.jar")
3427 (string-append (assoc-ref inputs "java-picard-1.113")
3428 "/share/java/sam-1.112.jar"))
3429 (("cisd-jhdf5.jar")
3430 (string-append (assoc-ref inputs "java-cisd-jhdf5")
3431 "/share/java/sis-jhdf5.jar")))
3432 #t))
3433 ;; There is no installation target
3434 (replace 'install
3435 (lambda* (#:key inputs outputs #:allow-other-keys)
3436 (let* ((out (assoc-ref outputs "out"))
3437 (bin (string-append out "/bin"))
3438 (share (string-append out "/share/fastqc/"))
3439 (exe (string-append share "/fastqc")))
3440 (for-each mkdir-p (list bin share))
3441 (copy-recursively "bin" share)
3442 (substitute* exe
3443 (("my \\$java_bin = 'java';")
3444 (string-append "my $java_bin = '"
3445 (assoc-ref inputs "java")
3446 "/bin/java';")))
3447 (chmod exe #o555)
3448 (symlink exe (string-append bin "/fastqc"))
3449 #t))))))
3450 (inputs
3451 `(("java" ,icedtea)
3452 ("perl" ,perl) ; needed for the wrapper script
3453 ("java-cisd-jhdf5" ,java-cisd-jhdf5)
3454 ("java-picard-1.113" ,java-picard-1.113)
3455 ("java-jbzip2" ,java-jbzip2)))
3456 (native-inputs
3457 `(("unzip" ,unzip)))
3458 (home-page "http://www.bioinformatics.babraham.ac.uk/projects/fastqc/")
3459 (synopsis "Quality control tool for high throughput sequence data")
3460 (description
3461 "FastQC aims to provide a simple way to do some quality control
3462 checks on raw sequence data coming from high throughput sequencing
3463 pipelines. It provides a modular set of analyses which you can use to
3464 give a quick impression of whether your data has any problems of which
3465 you should be aware before doing any further analysis.
3466
3467 The main functions of FastQC are:
3468
3469 @itemize
3470 @item Import of data from BAM, SAM or FastQ files (any variant);
3471 @item Providing a quick overview to tell you in which areas there may
3472 be problems;
3473 @item Summary graphs and tables to quickly assess your data;
3474 @item Export of results to an HTML based permanent report;
3475 @item Offline operation to allow automated generation of reports
3476 without running the interactive application.
3477 @end itemize\n")
3478 (license license:gpl3+)))
3479
3480 (define-public fastp
3481 (package
3482 (name "fastp")
3483 (version "0.14.1")
3484 (source
3485 (origin
3486 (method git-fetch)
3487 (uri (git-reference
3488 (url "https://github.com/OpenGene/fastp.git")
3489 (commit (string-append "v" version))))
3490 (file-name (git-file-name name version))
3491 (sha256
3492 (base32
3493 "1r6ms5zbf5rps4rgp4z73nczadl00b5rqylw8f684isfz27dp0xh"))))
3494 (build-system gnu-build-system)
3495 (arguments
3496 `(#:tests? #f ; there are none
3497 #:make-flags
3498 (list (string-append "BINDIR=" (assoc-ref %outputs "out") "/bin"))
3499 #:phases
3500 (modify-phases %standard-phases
3501 (delete 'configure)
3502 (add-before 'install 'create-target-dir
3503 (lambda* (#:key outputs #:allow-other-keys)
3504 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
3505 #t)))))
3506 (inputs
3507 `(("zlib" ,zlib)))
3508 (home-page "https://github.com/OpenGene/fastp/")
3509 (synopsis "All-in-one FastQ preprocessor")
3510 (description
3511 "Fastp is a tool designed to provide fast all-in-one preprocessing for
3512 FastQ files. This tool has multi-threading support to afford high
3513 performance.")
3514 (license license:expat)))
3515
3516 (define-public htslib
3517 (package
3518 (name "htslib")
3519 (version "1.8")
3520 (source (origin
3521 (method url-fetch)
3522 (uri (string-append
3523 "https://github.com/samtools/htslib/releases/download/"
3524 version "/htslib-" version ".tar.bz2"))
3525 (sha256
3526 (base32
3527 "18bw0mn9pj5wgarnlaxmf1bb8pdqgl1zd6czirqcr62ajpn1xvy0"))))
3528 (build-system gnu-build-system)
3529 (inputs
3530 `(("openssl" ,openssl)
3531 ("curl" ,curl)
3532 ("zlib" ,zlib)))
3533 (native-inputs
3534 `(("perl" ,perl)))
3535 (home-page "http://www.htslib.org")
3536 (synopsis "C library for reading/writing high-throughput sequencing data")
3537 (description
3538 "HTSlib is a C library for reading/writing high-throughput sequencing
3539 data. It also provides the @command{bgzip}, @command{htsfile}, and
3540 @command{tabix} utilities.")
3541 ;; Files under cram/ are released under the modified BSD license;
3542 ;; the rest is released under the Expat license
3543 (license (list license:expat license:bsd-3))))
3544
3545 ;; This package should be removed once no packages rely upon it.
3546 (define htslib-1.3
3547 (package
3548 (inherit htslib)
3549 (version "1.3.1")
3550 (source (origin
3551 (method url-fetch)
3552 (uri (string-append
3553 "https://github.com/samtools/htslib/releases/download/"
3554 version "/htslib-" version ".tar.bz2"))
3555 (sha256
3556 (base32
3557 "1rja282fwdc25ql6izkhdyh8ppw8x2fs0w0js78zgkmqjlikmma9"))))))
3558
3559 (define-public idr
3560 (package
3561 (name "idr")
3562 (version "2.0.3")
3563 (source (origin
3564 (method url-fetch)
3565 (uri (string-append
3566 "https://github.com/nboley/idr/archive/"
3567 version ".tar.gz"))
3568 (file-name (string-append name "-" version ".tar.gz"))
3569 (sha256
3570 (base32
3571 "1rjdly6daslw66r43g9md8znizlscn1sphycqyldzsidkc4vxqv3"))
3572 ;; Delete generated C code.
3573 (snippet
3574 '(begin (delete-file "idr/inv_cdf.c") #t))))
3575 (build-system python-build-system)
3576 ;; There is only one test ("test_inv_cdf.py") and it tests features that
3577 ;; are no longer part of this package. It also asserts False, which
3578 ;; causes the tests to always fail.
3579 (arguments `(#:tests? #f))
3580 (propagated-inputs
3581 `(("python-scipy" ,python-scipy)
3582 ("python-sympy" ,python-sympy)
3583 ("python-numpy" ,python-numpy)
3584 ("python-matplotlib" ,python-matplotlib)))
3585 (native-inputs
3586 `(("python-cython" ,python-cython)))
3587 (home-page "https://github.com/nboley/idr")
3588 (synopsis "Tool to measure the irreproducible discovery rate (IDR)")
3589 (description
3590 "The IDR (Irreproducible Discovery Rate) framework is a unified approach
3591 to measure the reproducibility of findings identified from replicate
3592 experiments and provide highly stable thresholds based on reproducibility.")
3593 (license license:gpl2+)))
3594
3595 (define-public jellyfish
3596 (package
3597 (name "jellyfish")
3598 (version "2.2.10")
3599 (source (origin
3600 (method url-fetch)
3601 (uri (string-append "https://github.com/gmarcais/Jellyfish/"
3602 "releases/download/v" version
3603 "/jellyfish-" version ".tar.gz"))
3604 (sha256
3605 (base32
3606 "1k4pc3fvv6w1km2yph4m5sd78fbxp21d6xyzgmy0gjihzc6mb249"))))
3607 (build-system gnu-build-system)
3608 (outputs '("out" ;for library
3609 "ruby" ;for Ruby bindings
3610 "python")) ;for Python bindings
3611 (arguments
3612 `(#:configure-flags
3613 (list (string-append "--enable-ruby-binding="
3614 (assoc-ref %outputs "ruby"))
3615 (string-append "--enable-python-binding="
3616 (assoc-ref %outputs "python")))
3617 #:phases
3618 (modify-phases %standard-phases
3619 (add-before 'check 'set-SHELL-variable
3620 (lambda _
3621 ;; generator_manager.hpp either uses /bin/sh or $SHELL
3622 ;; to run tests.
3623 (setenv "SHELL" (which "bash"))
3624 #t)))))
3625 (native-inputs
3626 `(("bc" ,bc)
3627 ("time" ,time)
3628 ("ruby" ,ruby)
3629 ("python" ,python-2)
3630 ("pkg-config" ,pkg-config)))
3631 (inputs
3632 `(("htslib" ,htslib)))
3633 (synopsis "Tool for fast counting of k-mers in DNA")
3634 (description
3635 "Jellyfish is a tool for fast, memory-efficient counting of k-mers in
3636 DNA. A k-mer is a substring of length k, and counting the occurrences of all
3637 such substrings is a central step in many analyses of DNA sequence. Jellyfish
3638 is a command-line program that reads FASTA and multi-FASTA files containing
3639 DNA sequences. It outputs its k-mer counts in a binary format, which can be
3640 translated into a human-readable text format using the @code{jellyfish dump}
3641 command, or queried for specific k-mers with @code{jellyfish query}.")
3642 (home-page "http://www.genome.umd.edu/jellyfish.html")
3643 ;; JELLYFISH seems to be 64-bit only.
3644 (supported-systems '("x86_64-linux" "aarch64-linux" "mips64el-linux"))
3645 ;; The combined work is published under the GPLv3 or later. Individual
3646 ;; files such as lib/jsoncpp.cpp are released under the Expat license.
3647 (license (list license:gpl3+ license:expat))))
3648
3649 (define-public khmer
3650 (package
3651 (name "khmer")
3652 (version "2.0")
3653 (source
3654 (origin
3655 (method url-fetch)
3656 (uri (pypi-uri "khmer" version))
3657 (sha256
3658 (base32
3659 "0wb05shqh77v00256qlm68vbbx3kl76fyzihszbz5nhanl4ni33a"))
3660 (patches (search-patches "khmer-use-libraries.patch"))))
3661 (build-system python-build-system)
3662 (arguments
3663 `(#:phases
3664 (modify-phases %standard-phases
3665 (add-after 'unpack 'set-paths
3666 (lambda* (#:key inputs outputs #:allow-other-keys)
3667 ;; Delete bundled libraries.
3668 (delete-file-recursively "third-party/zlib")
3669 (delete-file-recursively "third-party/bzip2")
3670 ;; Replace bundled seqan.
3671 (let* ((seqan-all "third-party/seqan")
3672 (seqan-include (string-append
3673 seqan-all "/core/include")))
3674 (delete-file-recursively seqan-all)
3675 (copy-recursively (string-append (assoc-ref inputs "seqan")
3676 "/include/seqan")
3677 (string-append seqan-include "/seqan")))
3678 ;; We do not replace the bundled MurmurHash as the canonical
3679 ;; repository for this code 'SMHasher' is unsuitable for
3680 ;; providing a library. See
3681 ;; https://lists.gnu.org/archive/html/guix-devel/2016-06/msg00977.html
3682 #t))
3683 (add-after 'unpack 'set-cc
3684 (lambda _
3685 (setenv "CC" "gcc")
3686 #t))
3687 ;; It is simpler to test after installation.
3688 (delete 'check)
3689 (add-after 'install 'post-install-check
3690 (lambda* (#:key inputs outputs #:allow-other-keys)
3691 (let ((out (assoc-ref outputs "out")))
3692 (setenv "PATH"
3693 (string-append
3694 (getenv "PATH")
3695 ":"
3696 (assoc-ref outputs "out")
3697 "/bin"))
3698 (setenv "PYTHONPATH"
3699 (string-append
3700 (getenv "PYTHONPATH")
3701 ":"
3702 out
3703 "/lib/python"
3704 (string-take (string-take-right
3705 (assoc-ref inputs "python") 5) 3)
3706 "/site-packages"))
3707 (with-directory-excursion "build"
3708 (zero? (system* "nosetests" "khmer" "--attr"
3709 "!known_failing")))))))))
3710 (native-inputs
3711 `(("seqan" ,seqan)
3712 ("python-nose" ,python-nose)))
3713 (inputs
3714 `(("zlib" ,zlib)
3715 ("bzip2" ,bzip2)
3716 ("python-screed" ,python-screed)
3717 ("python-bz2file" ,python-bz2file)
3718 ;; Tests fail when gcc-5 is used for compilation. Use gcc-4.9 at least
3719 ;; until the next version of khmer (likely 2.1) is released.
3720 ("gcc" ,gcc-4.9)))
3721 (home-page "https://khmer.readthedocs.org/")
3722 (synopsis "K-mer counting, filtering and graph traversal library")
3723 (description "The khmer software is a set of command-line tools for
3724 working with DNA shotgun sequencing data from genomes, transcriptomes,
3725 metagenomes and single cells. Khmer can make de novo assemblies faster, and
3726 sometimes better. Khmer can also identify and fix problems with shotgun
3727 data.")
3728 ;; When building on i686, armhf and mips64el, we get the following error:
3729 ;; error: ['khmer', 'khmer.tests', 'oxli'] require 64-bit operating system
3730 (supported-systems '("x86_64-linux" "aarch64-linux"))
3731 (license license:bsd-3)))
3732
3733 (define-public kaiju
3734 (package
3735 (name "kaiju")
3736 (version "1.6.2")
3737 (source (origin
3738 (method url-fetch)
3739 (uri (string-append
3740 "https://github.com/bioinformatics-centre/kaiju/archive/v"
3741 version ".tar.gz"))
3742 (file-name (string-append name "-" version ".tar.gz"))
3743 (sha256
3744 (base32
3745 "1kdn4rxs0kr9ibmrgrfcci71aa6j6gr71dbc8pff7731rpab6kj7"))))
3746 (build-system gnu-build-system)
3747 (arguments
3748 `(#:tests? #f ; There are no tests.
3749 #:phases
3750 (modify-phases %standard-phases
3751 (delete 'configure)
3752 (add-before 'build 'move-to-src-dir
3753 (lambda _ (chdir "src") #t))
3754 (replace 'install
3755 (lambda* (#:key inputs outputs #:allow-other-keys)
3756 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
3757 (mkdir-p bin)
3758 (chdir "..")
3759 (copy-recursively "bin" bin)
3760 (copy-recursively "util" bin))
3761 #t)))))
3762 (inputs
3763 `(("perl" ,perl)
3764 ("zlib" ,zlib)))
3765 (home-page "http://kaiju.binf.ku.dk/")
3766 (synopsis "Fast and sensitive taxonomic classification for metagenomics")
3767 (description "Kaiju is a program for sensitive taxonomic classification
3768 of high-throughput sequencing reads from metagenomic whole genome sequencing
3769 experiments.")
3770 (license license:gpl3+)))
3771
3772 (define-public macs
3773 (package
3774 (name "macs")
3775 (version "2.1.1.20160309")
3776 (source (origin
3777 (method url-fetch)
3778 (uri (pypi-uri "MACS2" version))
3779 (sha256
3780 (base32
3781 "09ixspd1vcqmz1c81ih70xs4m7qml2iy5vyx1y74zww3iy1vl210"))))
3782 (build-system python-build-system)
3783 (arguments
3784 `(#:python ,python-2 ; only compatible with Python 2.7
3785 #:tests? #f)) ; no test target
3786 (inputs
3787 `(("python-numpy" ,python2-numpy)))
3788 (home-page "https://github.com/taoliu/MACS/")
3789 (synopsis "Model based analysis for ChIP-Seq data")
3790 (description
3791 "MACS is an implementation of a ChIP-Seq analysis algorithm for
3792 identifying transcript factor binding sites named Model-based Analysis of
3793 ChIP-Seq (MACS). MACS captures the influence of genome complexity to evaluate
3794 the significance of enriched ChIP regions and it improves the spatial
3795 resolution of binding sites through combining the information of both
3796 sequencing tag position and orientation.")
3797 (license license:bsd-3)))
3798
3799 (define-public mafft
3800 (package
3801 (name "mafft")
3802 (version "7.394")
3803 (source (origin
3804 (method url-fetch)
3805 (uri (string-append
3806 "https://mafft.cbrc.jp/alignment/software/mafft-" version
3807 "-without-extensions-src.tgz"))
3808 (file-name (string-append name "-" version ".tgz"))
3809 (sha256
3810 (base32
3811 "0bacjkxfg944p5khhyh5rd4y7wkjc9qk4v2jjj442sqlq0f8ar7b"))))
3812 (build-system gnu-build-system)
3813 (arguments
3814 `(#:tests? #f ; no automated tests, though there are tests in the read me
3815 #:make-flags (let ((out (assoc-ref %outputs "out")))
3816 (list (string-append "PREFIX=" out)
3817 (string-append "BINDIR="
3818 (string-append out "/bin"))))
3819 #:phases
3820 (modify-phases %standard-phases
3821 (add-after 'unpack 'enter-dir
3822 (lambda _ (chdir "core") #t))
3823 (add-after 'enter-dir 'patch-makefile
3824 (lambda _
3825 ;; on advice from the MAFFT authors, there is no need to
3826 ;; distribute mafft-profile, mafft-distance, or
3827 ;; mafft-homologs.rb as they are too "specialised".
3828 (substitute* "Makefile"
3829 ;; remove mafft-homologs.rb from SCRIPTS
3830 (("^SCRIPTS = mafft mafft-homologs.rb")
3831 "SCRIPTS = mafft")
3832 ;; remove mafft-homologs from MANPAGES
3833 (("^MANPAGES = mafft.1 mafft-homologs.1")
3834 "MANPAGES = mafft.1")
3835 ;; remove mafft-distance from PROGS
3836 (("^PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance")
3837 "PROGS = dvtditr dndfast7 dndblast sextet5")
3838 ;; remove mafft-profile from PROGS
3839 (("splittbfast disttbfast tbfast mafft-profile 2cl mccaskillwrap")
3840 "splittbfast disttbfast tbfast f2cl mccaskillwrap")
3841 (("^rm -f mafft-profile mafft-profile.exe") "#")
3842 (("^rm -f mafft-distance mafft-distance.exe") ")#")
3843 ;; do not install MAN pages in libexec folder
3844 (("^\t\\$\\(INSTALL\\) -m 644 \\$\\(MANPAGES\\) \
3845 \\$\\(DESTDIR\\)\\$\\(LIBDIR\\)") "#"))
3846 #t))
3847 (add-after 'enter-dir 'patch-paths
3848 (lambda* (#:key inputs #:allow-other-keys)
3849 (substitute* '("pairash.c"
3850 "mafft.tmpl")
3851 (("perl") (which "perl"))
3852 (("([\"`| ])awk" _ prefix)
3853 (string-append prefix (which "awk")))
3854 (("grep") (which "grep")))
3855 #t))
3856 (delete 'configure)
3857 (add-after 'install 'wrap-programs
3858 (lambda* (#:key outputs #:allow-other-keys)
3859 (let* ((out (assoc-ref outputs "out"))
3860 (bin (string-append out "/bin"))
3861 (path (string-append
3862 (assoc-ref %build-inputs "coreutils") "/bin:")))
3863 (for-each (lambda (file)
3864 (wrap-program file
3865 `("PATH" ":" prefix (,path))))
3866 (find-files bin)))
3867 #t)))))
3868 (inputs
3869 `(("perl" ,perl)
3870 ("ruby" ,ruby)
3871 ("gawk" ,gawk)
3872 ("grep" ,grep)
3873 ("coreutils" ,coreutils)))
3874 (home-page "http://mafft.cbrc.jp/alignment/software/")
3875 (synopsis "Multiple sequence alignment program")
3876 (description
3877 "MAFFT offers a range of multiple alignment methods for nucleotide and
3878 protein sequences. For instance, it offers L-INS-i (accurate; for alignment
3879 of <~200 sequences) and FFT-NS-2 (fast; for alignment of <~30,000
3880 sequences).")
3881 (license (license:non-copyleft
3882 "http://mafft.cbrc.jp/alignment/software/license.txt"
3883 "BSD-3 with different formatting"))))
3884
3885 (define-public mash
3886 (package
3887 (name "mash")
3888 (version "2.0")
3889 (source (origin
3890 (method url-fetch)
3891 (uri (string-append
3892 "https://github.com/marbl/mash/archive/v"
3893 version ".tar.gz"))
3894 (file-name (string-append name "-" version ".tar.gz"))
3895 (sha256
3896 (base32
3897 "00fx14vpmgsijwxd1xql3if934l82v8ckqgjjyyhnr36qb9qrskv"))
3898 (modules '((guix build utils)))
3899 (snippet
3900 '(begin
3901 ;; Delete bundled kseq.
3902 ;; TODO: Also delete bundled murmurhash and open bloom filter.
3903 (delete-file "src/mash/kseq.h")
3904 #t))))
3905 (build-system gnu-build-system)
3906 (arguments
3907 `(#:tests? #f ; No tests.
3908 #:configure-flags
3909 (list
3910 (string-append "--with-capnp=" (assoc-ref %build-inputs "capnproto"))
3911 (string-append "--with-gsl=" (assoc-ref %build-inputs "gsl")))
3912 #:make-flags (list "CC=gcc")
3913 #:phases
3914 (modify-phases %standard-phases
3915 (add-after 'unpack 'fix-includes
3916 (lambda _
3917 (substitute* '("src/mash/Sketch.cpp"
3918 "src/mash/CommandFind.cpp"
3919 "src/mash/CommandScreen.cpp")
3920 (("^#include \"kseq\\.h\"")
3921 "#include \"htslib/kseq.h\""))
3922 #t))
3923 (add-after 'fix-includes 'autoconf
3924 (lambda _ (zero? (system* "autoconf")))))))
3925 (native-inputs
3926 `(("autoconf" ,autoconf)
3927 ;; Capnproto and htslib are statically embedded in the final
3928 ;; application. Therefore we also list their licenses, below.
3929 ("capnproto" ,capnproto)
3930 ("htslib" ,htslib)))
3931 (inputs
3932 `(("gsl" ,gsl)
3933 ("zlib" ,zlib)))
3934 (supported-systems '("x86_64-linux"))
3935 (home-page "https://mash.readthedocs.io")
3936 (synopsis "Fast genome and metagenome distance estimation using MinHash")
3937 (description "Mash is a fast sequence distance estimator that uses the
3938 MinHash algorithm and is designed to work with genomes and metagenomes in the
3939 form of assemblies or reads.")
3940 (license (list license:bsd-3 ; Mash
3941 license:expat ; HTSlib and capnproto
3942 license:public-domain ; MurmurHash 3
3943 license:cpl1.0)))) ; Open Bloom Filter
3944
3945 (define-public metabat
3946 (package
3947 (name "metabat")
3948 (version "2.12.1")
3949 (source
3950 (origin
3951 (method url-fetch)
3952 (uri (string-append "https://bitbucket.org/berkeleylab/metabat/get/v"
3953 version ".tar.gz"))
3954 (file-name (string-append name "-" version ".tar.gz"))
3955 (sha256
3956 (base32
3957 "1hmvdalz3zj5sqqklg0l4npjdv37cv2hsdi1al9iby2ndxjs1b73"))
3958 (patches (search-patches "metabat-fix-compilation.patch"))))
3959 (build-system scons-build-system)
3960 (arguments
3961 `(#:scons ,scons-python2
3962 #:scons-flags
3963 (list (string-append "PREFIX=" (assoc-ref %outputs "out"))
3964 (string-append "BOOST_ROOT=" (assoc-ref %build-inputs "boost")))
3965 #:tests? #f ;; Tests are run during the build phase.
3966 #:phases
3967 (modify-phases %standard-phases
3968 (add-after 'unpack 'fix-includes
3969 (lambda _
3970 (substitute* "src/BamUtils.h"
3971 (("^#include \"bam/bam\\.h\"")
3972 "#include \"samtools/bam.h\"")
3973 (("^#include \"bam/sam\\.h\"")
3974 "#include \"samtools/sam.h\""))
3975 (substitute* "src/KseqReader.h"
3976 (("^#include \"bam/kseq\\.h\"")
3977 "#include \"htslib/kseq.h\""))
3978 #t))
3979 (add-after 'unpack 'fix-scons
3980 (lambda* (#:key inputs #:allow-other-keys)
3981 (substitute* "SConstruct"
3982 (("^htslib_dir += 'samtools'")
3983 (string-append "htslib_dir = '"
3984 (assoc-ref inputs "htslib")
3985 "'"))
3986 (("^samtools_dir = 'samtools'")
3987 (string-append "samtools_dir = '"
3988 (assoc-ref inputs "samtools")
3989 "'"))
3990 (("^findStaticOrShared\\('bam', hts_lib")
3991 (string-append "findStaticOrShared('bam', '"
3992 (assoc-ref inputs "samtools")
3993 "/lib'"))
3994 ;; Do not distribute README.
3995 (("^env\\.Install\\(idir_prefix, 'README\\.md'\\)") ""))
3996 #t)))))
3997 (inputs
3998 `(("zlib" ,zlib)
3999 ("perl" ,perl)
4000 ("samtools" ,samtools)
4001 ("htslib" ,htslib)
4002 ("boost" ,boost)))
4003 (home-page "https://bitbucket.org/berkeleylab/metabat")
4004 (synopsis
4005 "Reconstruction of single genomes from complex microbial communities")
4006 (description
4007 "Grouping large genomic fragments assembled from shotgun metagenomic
4008 sequences to deconvolute complex microbial communities, or metagenome binning,
4009 enables the study of individual organisms and their interactions. MetaBAT is
4010 an automated metagenome binning software, which integrates empirical
4011 probabilistic distances of genome abundance and tetranucleotide frequency.")
4012 ;; The source code contains inline assembly.
4013 (supported-systems '("x86_64-linux" "i686-linux"))
4014 (license (license:non-copyleft "file://license.txt"
4015 "See license.txt in the distribution."))))
4016
4017 (define-public minced
4018 (package
4019 (name "minced")
4020 (version "0.2.0")
4021 (source (origin
4022 (method url-fetch)
4023 (uri (string-append
4024 "https://github.com/ctSkennerton/minced/archive/"
4025 version ".tar.gz"))
4026 (file-name (string-append name "-" version ".tar.gz"))
4027 (sha256
4028 (base32
4029 "0wxmlsapxfpxfd3ps9636h7i2xy6la8i42mwh0j2lsky63h63jp1"))))
4030 (build-system gnu-build-system)
4031 (arguments
4032 `(#:test-target "test"
4033 #:phases
4034 (modify-phases %standard-phases
4035 (delete 'configure)
4036 (add-before 'check 'fix-test
4037 (lambda _
4038 ;; Fix test for latest version.
4039 (substitute* "t/Aquifex_aeolicus_VF5.expected"
4040 (("minced:0.1.6") "minced:0.2.0"))
4041 #t))
4042 (replace 'install ; No install target.
4043 (lambda* (#:key inputs outputs #:allow-other-keys)
4044 (let* ((out (assoc-ref outputs "out"))
4045 (bin (string-append out "/bin"))
4046 (wrapper (string-append bin "/minced")))
4047 ;; Minced comes with a wrapper script that tries to figure out where
4048 ;; it is located before running the JAR. Since these paths are known
4049 ;; to us, we build our own wrapper to avoid coreutils dependency.
4050 (install-file "minced.jar" bin)
4051 (with-output-to-file wrapper
4052 (lambda _
4053 (display
4054 (string-append
4055 "#!" (assoc-ref inputs "bash") "/bin/sh\n\n"
4056 (assoc-ref inputs "jre") "/bin/java -jar "
4057 bin "/minced.jar \"$@\"\n"))))
4058 (chmod wrapper #o555)))))))
4059 (native-inputs
4060 `(("jdk" ,icedtea "jdk")))
4061 (inputs
4062 `(("bash" ,bash)
4063 ("jre" ,icedtea "out")))
4064 (home-page "https://github.com/ctSkennerton/minced")
4065 (synopsis "Mining CRISPRs in Environmental Datasets")
4066 (description
4067 "MinCED is a program to find Clustered Regularly Interspaced Short
4068 Palindromic Repeats (CRISPRs) in DNA sequences. It can be used for
4069 unassembled metagenomic reads, but is mainly designed for full genomes and
4070 assembled metagenomic sequence.")
4071 (license license:gpl3+)))
4072
4073 (define-public miso
4074 (package
4075 (name "miso")
4076 (version "0.5.4")
4077 (source (origin
4078 (method url-fetch)
4079 (uri (pypi-uri "misopy" version))
4080 (sha256
4081 (base32
4082 "1z3x0vd8ma7pdrnywj7i3kgwl89sdkwrrn62zl7r5calqaq2hyip"))
4083 (modules '((guix build utils)))
4084 (snippet '(begin
4085 (substitute* "setup.py"
4086 ;; Use setuptools, or else the executables are not
4087 ;; installed.
4088 (("distutils.core") "setuptools")
4089 ;; use "gcc" instead of "cc" for compilation
4090 (("^defines")
4091 "cc.set_executables(
4092 compiler='gcc',
4093 compiler_so='gcc',
4094 linker_exe='gcc',
4095 linker_so='gcc -shared'); defines"))
4096 #t))))
4097 (build-system python-build-system)
4098 (arguments
4099 `(#:python ,python-2 ; only Python 2 is supported
4100 #:tests? #f)) ; no "test" target
4101 (inputs
4102 `(("samtools" ,samtools)
4103 ("python-numpy" ,python2-numpy)
4104 ("python-pysam" ,python2-pysam)
4105 ("python-scipy" ,python2-scipy)
4106 ("python-matplotlib" ,python2-matplotlib)))
4107 (native-inputs
4108 `(("python-mock" ,python2-mock) ;for tests
4109 ("python-pytz" ,python2-pytz))) ;for tests
4110 (home-page "http://genes.mit.edu/burgelab/miso/index.html")
4111 (synopsis "Mixture of Isoforms model for RNA-Seq isoform quantitation")
4112 (description
4113 "MISO (Mixture-of-Isoforms) is a probabilistic framework that quantitates
4114 the expression level of alternatively spliced genes from RNA-Seq data, and
4115 identifies differentially regulated isoforms or exons across samples. By
4116 modeling the generative process by which reads are produced from isoforms in
4117 RNA-Seq, the MISO model uses Bayesian inference to compute the probability
4118 that a read originated from a particular isoform.")
4119 (license license:gpl2)))
4120
4121 (define-public muscle
4122 (package
4123 (name "muscle")
4124 (version "3.8.1551")
4125 (source (origin
4126 (method url-fetch/tarbomb)
4127 (uri (string-append
4128 "http://www.drive5.com/muscle/muscle_src_"
4129 version ".tar.gz"))
4130 (sha256
4131 (base32
4132 "0bj8kj7sdizy3987zx6w7axihk40fk8rn76mpbqqjcnd64i5a367"))))
4133 (build-system gnu-build-system)
4134 (arguments
4135 `(#:make-flags (list "LDLIBS = -lm")
4136 #:phases
4137 (modify-phases %standard-phases
4138 (delete 'configure)
4139 (replace 'check
4140 ;; There are no tests, so just test if it runs.
4141 (lambda _ (zero? (system* "./muscle" "-version"))))
4142 (replace 'install
4143 (lambda* (#:key outputs #:allow-other-keys)
4144 (let* ((out (assoc-ref outputs "out"))
4145 (bin (string-append out "/bin")))
4146 (install-file "muscle" bin)))))))
4147 (home-page "http://www.drive5.com/muscle")
4148 (synopsis "Multiple sequence alignment program")
4149 (description
4150 "MUSCLE aims to be a fast and accurate multiple sequence alignment
4151 program for nucleotide and protein sequences.")
4152 ;; License information found in 'muscle -h' and usage.cpp.
4153 (license license:public-domain)))
4154
4155 (define-public newick-utils
4156 ;; There are no recent releases so we package from git.
4157 (let ((commit "da121155a977197cab9fbb15953ca1b40b11eb87"))
4158 (package
4159 (name "newick-utils")
4160 (version (string-append "1.6-1." (string-take commit 8)))
4161 (source (origin
4162 (method git-fetch)
4163 (uri (git-reference
4164 (url "https://github.com/tjunier/newick_utils.git")
4165 (commit commit)))
4166 (file-name (string-append name "-" version "-checkout"))
4167 (sha256
4168 (base32
4169 "1hkw21rq1mwf7xp0rmbb2gqc0i6p11108m69i7mr7xcjl268pxnb"))))
4170 (build-system gnu-build-system)
4171 (arguments
4172 `(#:phases
4173 (modify-phases %standard-phases
4174 (add-after 'unpack 'autoconf
4175 (lambda _ (zero? (system* "autoreconf" "-vif")))))))
4176 (inputs
4177 ;; XXX: TODO: Enable Lua and Guile bindings.
4178 ;; https://github.com/tjunier/newick_utils/issues/13
4179 `(("libxml2" ,libxml2)
4180 ("flex" ,flex)
4181 ("bison" ,bison)))
4182 (native-inputs
4183 `(("autoconf" ,autoconf)
4184 ("automake" ,automake)
4185 ("libtool" ,libtool)))
4186 (synopsis "Programs for working with newick format phylogenetic trees")
4187 (description
4188 "Newick-utils is a suite of utilities for processing phylogenetic trees
4189 in Newick format. Functions include re-rooting, extracting subtrees,
4190 trimming, pruning, condensing, drawing (ASCII graphics or SVG).")
4191 (home-page "https://github.com/tjunier/newick_utils")
4192 (license license:bsd-3))))
4193
4194 (define-public orfm
4195 (package
4196 (name "orfm")
4197 (version "0.7.1")
4198 (source (origin
4199 (method url-fetch)
4200 (uri (string-append
4201 "https://github.com/wwood/OrfM/releases/download/v"
4202 version "/orfm-" version ".tar.gz"))
4203 (sha256
4204 (base32
4205 "16iigyr2gd8x0imzkk1dr3k5xsds9bpmwg31ayvjg0f4pir9rwqr"))))
4206 (build-system gnu-build-system)
4207 (inputs `(("zlib" ,zlib)))
4208 (native-inputs
4209 `(("ruby-bio-commandeer" ,ruby-bio-commandeer)
4210 ("ruby-rspec" ,ruby-rspec)
4211 ("ruby" ,ruby)))
4212 (synopsis "Simple and not slow open reading frame (ORF) caller")
4213 (description
4214 "An ORF caller finds stretches of DNA that, when translated, are not
4215 interrupted by stop codons. OrfM finds and prints these ORFs.")
4216 (home-page "https://github.com/wwood/OrfM")
4217 (license license:lgpl3+)))
4218
4219 (define-public pplacer
4220 (let ((commit "g807f6f3"))
4221 (package
4222 (name "pplacer")
4223 ;; The commit should be updated with each version change.
4224 (version "1.1.alpha19")
4225 (source
4226 (origin
4227 (method url-fetch)
4228 (uri (string-append "https://github.com/matsen/pplacer/archive/v"
4229 version ".tar.gz"))
4230 (file-name (string-append name "-" version ".tar.gz"))
4231 (sha256
4232 (base32 "0z1lnd2s8sh6kpzg106wzbh2szw7h0hvq8syd5a6wv4rmyyz6x0f"))))
4233 (build-system ocaml-build-system)
4234 (arguments
4235 `(#:ocaml ,ocaml-4.01
4236 #:findlib ,ocaml4.01-findlib
4237 #:modules ((guix build ocaml-build-system)
4238 (guix build utils)
4239 (ice-9 ftw))
4240 #:phases
4241 (modify-phases %standard-phases
4242 (delete 'configure)
4243 (add-after 'unpack 'replace-bundled-cddlib
4244 (lambda* (#:key inputs #:allow-other-keys)
4245 (let* ((cddlib-src (assoc-ref inputs "cddlib-src"))
4246 (local-dir "cddlib_guix"))
4247 (mkdir local-dir)
4248 (with-directory-excursion local-dir
4249 (system* "tar" "xvf" cddlib-src))
4250 (let ((cddlib-src-folder
4251 (string-append local-dir "/"
4252 (list-ref (scandir local-dir) 2)
4253 "/lib-src")))
4254 (for-each
4255 (lambda (file)
4256 (copy-file file
4257 (string-append "cdd_src/" (basename file))))
4258 (find-files cddlib-src-folder ".*[ch]$")))
4259 #t)))
4260 (add-after 'unpack 'fix-makefile
4261 (lambda _
4262 ;; Remove system calls to 'git'.
4263 (substitute* "Makefile"
4264 (("^DESCRIPT:=pplacer-.*")
4265 (string-append
4266 "DESCRIPT:=pplacer-$(shell uname)-v" ,version "\n")))
4267 (substitute* "myocamlbuild.ml"
4268 (("git describe --tags --long .*\\\" with")
4269 (string-append
4270 "echo -n v" ,version "-" ,commit "\" with")))
4271 #t))
4272 (replace 'install
4273 (lambda* (#:key outputs #:allow-other-keys)
4274 (let* ((out (assoc-ref outputs "out"))
4275 (bin (string-append out "/bin")))
4276 (copy-recursively "bin" bin))
4277 #t)))))
4278 (native-inputs
4279 `(("zlib" ,zlib)
4280 ("gsl" ,gsl)
4281 ("ocaml-ounit" ,ocaml4.01-ounit)
4282 ("ocaml-batteries" ,ocaml4.01-batteries)
4283 ("ocaml-camlzip" ,ocaml4.01-camlzip)
4284 ("ocaml-csv" ,ocaml4.01-csv)
4285 ("ocaml-sqlite3" ,ocaml4.01-sqlite3)
4286 ("ocaml-xmlm" ,ocaml4.01-xmlm)
4287 ("ocaml-mcl" ,ocaml4.01-mcl)
4288 ("ocaml-gsl" ,ocaml4.01-gsl)
4289 ("cddlib-src" ,(package-source cddlib))))
4290 (propagated-inputs
4291 `(("pplacer-scripts" ,pplacer-scripts)))
4292 (synopsis "Phylogenetic placement of biological sequences")
4293 (description
4294 "Pplacer places query sequences on a fixed reference phylogenetic tree
4295 to maximize phylogenetic likelihood or posterior probability according to a
4296 reference alignment. Pplacer is designed to be fast, to give useful
4297 information about uncertainty, and to offer advanced visualization and
4298 downstream analysis.")
4299 (home-page "http://matsen.fhcrc.org/pplacer")
4300 (license license:gpl3))))
4301
4302 ;; This package is installed alongside 'pplacer'. It is a separate package so
4303 ;; that it can use the python-build-system for the scripts that are
4304 ;; distributed alongside the main OCaml binaries.
4305 (define pplacer-scripts
4306 (package
4307 (inherit pplacer)
4308 (name "pplacer-scripts")
4309 (build-system python-build-system)
4310 (arguments
4311 `(#:python ,python-2
4312 #:phases
4313 (modify-phases %standard-phases
4314 (add-after 'unpack 'enter-scripts-dir
4315 (lambda _ (chdir "scripts")))
4316 (replace 'check
4317 (lambda _
4318 (zero? (system* "python" "-m" "unittest" "discover" "-v"))))
4319 (add-after 'install 'wrap-executables
4320 (lambda* (#:key inputs outputs #:allow-other-keys)
4321 (let* ((out (assoc-ref outputs "out"))
4322 (bin (string-append out "/bin")))
4323 (let ((path (string-append
4324 (assoc-ref inputs "hmmer") "/bin:"
4325 (assoc-ref inputs "infernal") "/bin")))
4326 (display path)
4327 (wrap-program (string-append bin "/refpkg_align.py")
4328 `("PATH" ":" prefix (,path))))
4329 (let ((path (string-append
4330 (assoc-ref inputs "hmmer") "/bin")))
4331 (wrap-program (string-append bin "/hrefpkg_query.py")
4332 `("PATH" ":" prefix (,path)))))
4333 #t)))))
4334 (inputs
4335 `(("infernal" ,infernal)
4336 ("hmmer" ,hmmer)))
4337 (propagated-inputs
4338 `(("python-biopython" ,python2-biopython)
4339 ("taxtastic" ,taxtastic)))
4340 (synopsis "Pplacer Python scripts")))
4341
4342 (define-public python2-pbcore
4343 (package
4344 (name "python2-pbcore")
4345 (version "1.2.10")
4346 (source (origin
4347 (method url-fetch)
4348 (uri (pypi-uri "pbcore" version))
4349 (sha256
4350 (base32
4351 "1kjmv891d6qbpp4shhhvkl02ff4q5xlpnls2513sm2cjcrs52f1i"))))
4352 (build-system python-build-system)
4353 (arguments `(#:python ,python-2)) ; pbcore requires Python 2.7
4354 (propagated-inputs
4355 `(("python-cython" ,python2-cython)
4356 ("python-numpy" ,python2-numpy)
4357 ("python-pysam" ,python2-pysam)
4358 ("python-h5py" ,python2-h5py)))
4359 (native-inputs
4360 `(("python-nose" ,python2-nose)
4361 ("python-sphinx" ,python2-sphinx)
4362 ("python-pyxb" ,python2-pyxb)))
4363 (home-page "http://pacificbiosciences.github.io/pbcore/")
4364 (synopsis "Library for reading and writing PacBio data files")
4365 (description
4366 "The pbcore package provides Python APIs for interacting with PacBio data
4367 files and writing bioinformatics applications.")
4368 (license license:bsd-3)))
4369
4370 (define-public python2-warpedlmm
4371 (package
4372 (name "python2-warpedlmm")
4373 (version "0.21")
4374 (source
4375 (origin
4376 (method url-fetch)
4377 (uri (string-append
4378 "https://pypi.python.org/packages/source/W/WarpedLMM/WarpedLMM-"
4379 version ".zip"))
4380 (sha256
4381 (base32
4382 "1agfz6zqa8nc6cw47yh0s3y14gkpa9wqazwcj7mwwj3ffnw39p3j"))))
4383 (build-system python-build-system)
4384 (arguments
4385 `(#:python ,python-2)) ; requires Python 2.7
4386 (propagated-inputs
4387 `(("python-scipy" ,python2-scipy)
4388 ("python-numpy" ,python2-numpy)
4389 ("python-matplotlib" ,python2-matplotlib)
4390 ("python-fastlmm" ,python2-fastlmm)
4391 ("python-pandas" ,python2-pandas)
4392 ("python-pysnptools" ,python2-pysnptools)))
4393 (native-inputs
4394 `(("python-mock" ,python2-mock)
4395 ("python-nose" ,python2-nose)
4396 ("unzip" ,unzip)))
4397 (home-page "https://github.com/PMBio/warpedLMM")
4398 (synopsis "Implementation of warped linear mixed models")
4399 (description
4400 "WarpedLMM is a Python implementation of the warped linear mixed model,
4401 which automatically learns an optimal warping function (or transformation) for
4402 the phenotype as it models the data.")
4403 (license license:asl2.0)))
4404
4405 (define-public pbtranscript-tofu
4406 (let ((commit "8f5467fe6a4472bcfb4226c8720993c8507adfe4"))
4407 (package
4408 (name "pbtranscript-tofu")
4409 (version (string-append "2.2.3." (string-take commit 7)))
4410 (source (origin
4411 (method git-fetch)
4412 (uri (git-reference
4413 (url "https://github.com/PacificBiosciences/cDNA_primer.git")
4414 (commit commit)))
4415 (file-name (string-append name "-" version "-checkout"))
4416 (sha256
4417 (base32
4418 "1lgnpi35ihay42qx0b6yl3kkgra723i413j33kvs0kvs61h82w0f"))
4419 (modules '((guix build utils)))
4420 (snippet
4421 '(begin
4422 ;; remove bundled Cython sources
4423 (delete-file "pbtranscript-tofu/pbtranscript/Cython-0.20.1.tar.gz")
4424 #t))))
4425 (build-system python-build-system)
4426 (arguments
4427 `(#:python ,python-2
4428 ;; FIXME: Tests fail with "No such file or directory:
4429 ;; pbtools/pbtranscript/modified_bx_intervals/intersection_unique.so"
4430 #:tests? #f
4431 #:phases
4432 (modify-phases %standard-phases
4433 (add-after 'unpack 'enter-directory
4434 (lambda _
4435 (chdir "pbtranscript-tofu/pbtranscript/")
4436 #t))
4437 ;; With setuptools version 18.0 and later this setup.py hack causes
4438 ;; a build error, so we disable it.
4439 (add-after 'enter-directory 'patch-setuppy
4440 (lambda _
4441 (substitute* "setup.py"
4442 (("if 'setuptools.extension' in sys.modules:")
4443 "if False:"))
4444 #t)))))
4445 (inputs
4446 `(("python-numpy" ,python2-numpy)
4447 ("python-bx-python" ,python2-bx-python)
4448 ("python-networkx" ,python2-networkx)
4449 ("python-scipy" ,python2-scipy)
4450 ("python-pbcore" ,python2-pbcore)
4451 ("python-h5py" ,python2-h5py)))
4452 (native-inputs
4453 `(("python-cython" ,python2-cython)
4454 ("python-nose" ,python2-nose)))
4455 (home-page "https://github.com/PacificBiosciences/cDNA_primer")
4456 (synopsis "Analyze transcriptome data generated with the Iso-Seq protocol")
4457 (description
4458 "pbtranscript-tofu contains scripts to analyze transcriptome data
4459 generated using the PacBio Iso-Seq protocol.")
4460 (license license:bsd-3))))
4461
4462 (define-public prank
4463 (package
4464 (name "prank")
4465 (version "150803")
4466 (source (origin
4467 (method url-fetch)
4468 (uri (string-append
4469 "http://wasabiapp.org/download/prank/prank.source."
4470 version ".tgz"))
4471 (sha256
4472 (base32
4473 "0am4z94fs3w2n5xpfls9zda61vq7qqz4q2i7b9hlsxz5q4j3kfm4"))))
4474 (build-system gnu-build-system)
4475 (arguments
4476 `(#:phases
4477 (modify-phases %standard-phases
4478 (add-after 'unpack 'enter-src-dir
4479 (lambda _
4480 (chdir "src")
4481 #t))
4482 (add-after 'unpack 'remove-m64-flag
4483 ;; Prank will build with the correct 'bit-ness' without this flag
4484 ;; and this allows building on 32-bit machines.
4485 (lambda _ (substitute* "src/Makefile"
4486 (("-m64") ""))
4487 #t))
4488 (delete 'configure)
4489 (replace 'install
4490 (lambda* (#:key outputs #:allow-other-keys)
4491 (let* ((out (assoc-ref outputs "out"))
4492 (bin (string-append out "/bin"))
4493 (man (string-append out "/share/man/man1"))
4494 (path (string-append
4495 (assoc-ref %build-inputs "mafft") "/bin:"
4496 (assoc-ref %build-inputs "exonerate") "/bin:"
4497 (assoc-ref %build-inputs "bppsuite") "/bin")))
4498 (install-file "prank" bin)
4499 (wrap-program (string-append bin "/prank")
4500 `("PATH" ":" prefix (,path)))
4501 (install-file "prank.1" man))
4502 #t)))))
4503 (inputs
4504 `(("mafft" ,mafft)
4505 ("exonerate" ,exonerate)
4506 ("bppsuite" ,bppsuite)))
4507 (home-page "http://wasabiapp.org/software/prank/")
4508 (synopsis "Probabilistic multiple sequence alignment program")
4509 (description
4510 "PRANK is a probabilistic multiple sequence alignment program for DNA,
4511 codon and amino-acid sequences. It is based on a novel algorithm that treats
4512 insertions correctly and avoids over-estimation of the number of deletion
4513 events. In addition, PRANK borrows ideas from maximum likelihood methods used
4514 in phylogenetics and correctly takes into account the evolutionary distances
4515 between sequences. Lastly, PRANK allows for defining a potential structure
4516 for sequences to be aligned and then, simultaneously with the alignment,
4517 predicts the locations of structural units in the sequences.")
4518 (license license:gpl2+)))
4519
4520 (define-public proteinortho
4521 (package
4522 (name "proteinortho")
4523 (version "5.16b")
4524 (source
4525 (origin
4526 (method url-fetch)
4527 (uri
4528 (string-append
4529 "http://www.bioinf.uni-leipzig.de/Software/proteinortho/proteinortho_v"
4530 version "_src.tar.gz"))
4531 (sha256
4532 (base32
4533 "1wl0dawpssqwfjvr651r4wlww8hhjin8nba6xh71ks7sbypx886j"))))
4534 (build-system gnu-build-system)
4535 (arguments
4536 `(#:test-target "test"
4537 #:phases
4538 (modify-phases %standard-phases
4539 (replace 'configure
4540 ;; There is no configure script, so we modify the Makefile directly.
4541 (lambda* (#:key outputs #:allow-other-keys)
4542 (substitute* "Makefile"
4543 (("INSTALLDIR=.*")
4544 (string-append
4545 "INSTALLDIR=" (assoc-ref outputs "out") "/bin\n")))
4546 #t))
4547 (add-before 'install 'make-install-directory
4548 ;; The install directory is not created during 'make install'.
4549 (lambda* (#:key outputs #:allow-other-keys)
4550 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
4551 #t))
4552 (add-after 'install 'wrap-programs
4553 (lambda* (#:key inputs outputs #:allow-other-keys)
4554 (let* ((path (getenv "PATH"))
4555 (out (assoc-ref outputs "out"))
4556 (binary (string-append out "/bin/proteinortho5.pl")))
4557 (wrap-program binary `("PATH" ":" prefix (,path))))
4558 #t)))))
4559 (inputs
4560 `(("perl" ,perl)
4561 ("python" ,python-2)
4562 ("blast+" ,blast+)))
4563 (home-page "http://www.bioinf.uni-leipzig.de/Software/proteinortho")
4564 (synopsis "Detect orthologous genes across species")
4565 (description
4566 "Proteinortho is a tool to detect orthologous genes across different
4567 species. For doing so, it compares similarities of given gene sequences and
4568 clusters them to find significant groups. The algorithm was designed to handle
4569 large-scale data and can be applied to hundreds of species at once.")
4570 (license license:gpl2+)))
4571
4572 (define-public pyicoteo
4573 (package
4574 (name "pyicoteo")
4575 (version "2.0.7")
4576 (source
4577 (origin
4578 (method url-fetch)
4579 (uri (string-append "https://bitbucket.org/regulatorygenomicsupf/"
4580 "pyicoteo/get/v" version ".tar.bz2"))
4581 (file-name (string-append name "-" version ".tar.bz2"))
4582 (sha256
4583 (base32
4584 "0d6087f29xp8wxwlj111c3sylli98n0l8ry58c51ixzq0zfm50wa"))))
4585 (build-system python-build-system)
4586 (arguments
4587 `(#:python ,python-2 ; does not work with Python 3
4588 #:tests? #f)) ; there are no tests
4589 (inputs
4590 `(("python2-matplotlib" ,python2-matplotlib)))
4591 (home-page "https://bitbucket.org/regulatorygenomicsupf/pyicoteo")
4592 (synopsis "Analyze high-throughput genetic sequencing data")
4593 (description
4594 "Pyicoteo is a suite of tools for the analysis of high-throughput genetic
4595 sequencing data. It works with genomic coordinates. There are currently six
4596 different command-line tools:
4597
4598 @enumerate
4599 @item pyicoregion: for generating exploratory regions automatically;
4600 @item pyicoenrich: for differential enrichment between two conditions;
4601 @item pyicoclip: for calling CLIP-Seq peaks without a control;
4602 @item pyicos: for genomic coordinates manipulation;
4603 @item pyicoller: for peak calling on punctuated ChIP-Seq;
4604 @item pyicount: to count how many reads from N experiment files overlap in a
4605 region file;
4606 @item pyicotrocol: to combine operations from pyicoteo.
4607 @end enumerate\n")
4608 (license license:gpl3+)))
4609
4610 (define-public prodigal
4611 (package
4612 (name "prodigal")
4613 (version "2.6.3")
4614 (source (origin
4615 (method url-fetch)
4616 (uri (string-append
4617 "https://github.com/hyattpd/Prodigal/archive/v"
4618 version ".tar.gz"))
4619 (file-name (string-append name "-" version ".tar.gz"))
4620 (sha256
4621 (base32
4622 "17srxkqd3jc77xk15pfbgg1a9xahqg7337w95mrsia7mpza4l2c9"))))
4623 (build-system gnu-build-system)
4624 (arguments
4625 `(#:tests? #f ;no check target
4626 #:make-flags (list (string-append "INSTALLDIR="
4627 (assoc-ref %outputs "out")
4628 "/bin"))
4629 #:phases
4630 (modify-phases %standard-phases
4631 (delete 'configure))))
4632 (home-page "http://prodigal.ornl.gov")
4633 (synopsis "Protein-coding gene prediction for Archaea and Bacteria")
4634 (description
4635 "Prodigal runs smoothly on finished genomes, draft genomes, and
4636 metagenomes, providing gene predictions in GFF3, Genbank, or Sequin table
4637 format. It runs quickly, in an unsupervised fashion, handles gaps, handles
4638 partial genes, and identifies translation initiation sites.")
4639 (license license:gpl3+)))
4640
4641 (define-public roary
4642 (package
4643 (name "roary")
4644 (version "3.12.0")
4645 (source
4646 (origin
4647 (method url-fetch)
4648 (uri (string-append
4649 "mirror://cpan/authors/id/A/AJ/AJPAGE/Bio-Roary-"
4650 version ".tar.gz"))
4651 (sha256
4652 (base32
4653 "0qxrds9wx7cfhlkihrp6697kx0flhhxymap9fwan0b3rbdhcnmff"))))
4654 (build-system perl-build-system)
4655 (arguments
4656 `(#:phases
4657 (modify-phases %standard-phases
4658 (delete 'configure)
4659 (delete 'build)
4660 (replace 'check
4661 (lambda _
4662 ;; The tests are not run by default, so we run each test file
4663 ;; directly.
4664 (setenv "PATH" (string-append (getcwd) "/bin" ":"
4665 (getenv "PATH")))
4666 (setenv "PERL5LIB" (string-append (getcwd) "/lib" ":"
4667 (getenv "PERL5LIB")))
4668 (zero? (length (filter (lambda (file)
4669 (display file)(display "\n")
4670 (not (zero? (system* "perl" file))))
4671 (find-files "t" ".*\\.t$"))))))
4672 (replace 'install
4673 ;; There is no 'install' target in the Makefile.
4674 (lambda* (#:key outputs #:allow-other-keys)
4675 (let* ((out (assoc-ref outputs "out"))
4676 (bin (string-append out "/bin"))
4677 (perl (string-append out "/lib/perl5/site_perl"))
4678 (roary-plots "contrib/roary_plots"))
4679 (mkdir-p bin)
4680 (mkdir-p perl)
4681 (copy-recursively "bin" bin)
4682 (copy-recursively "lib" perl)
4683 #t)))
4684 (add-after 'install 'wrap-programs
4685 (lambda* (#:key inputs outputs #:allow-other-keys)
4686 (let* ((out (assoc-ref outputs "out"))
4687 (perl5lib (getenv "PERL5LIB"))
4688 (path (getenv "PATH")))
4689 (for-each (lambda (prog)
4690 (let ((binary (string-append out "/" prog)))
4691 (wrap-program binary
4692 `("PERL5LIB" ":" prefix
4693 (,(string-append perl5lib ":" out
4694 "/lib/perl5/site_perl"))))
4695 (wrap-program binary
4696 `("PATH" ":" prefix
4697 (,(string-append path ":" out "/bin"))))))
4698 (find-files "bin" ".*[^R]$"))
4699 (let ((file
4700 (string-append out "/bin/roary-create_pan_genome_plots.R"))
4701 (r-site-lib (getenv "R_LIBS_SITE"))
4702 (coreutils-path
4703 (string-append (assoc-ref inputs "coreutils") "/bin")))
4704 (wrap-program file
4705 `("R_LIBS_SITE" ":" prefix
4706 (,(string-append r-site-lib ":" out "/site-library/"))))
4707 (wrap-program file
4708 `("PATH" ":" prefix
4709 (,(string-append coreutils-path ":" out "/bin"))))))
4710 #t)))))
4711 (native-inputs
4712 `(("perl-env-path" ,perl-env-path)
4713 ("perl-test-files" ,perl-test-files)
4714 ("perl-test-most" ,perl-test-most)
4715 ("perl-test-output" ,perl-test-output)))
4716 (inputs
4717 `(("perl-array-utils" ,perl-array-utils)
4718 ("bioperl" ,bioperl-minimal)
4719 ("perl-digest-md5-file" ,perl-digest-md5-file)
4720 ("perl-exception-class" ,perl-exception-class)
4721 ("perl-file-find-rule" ,perl-file-find-rule)
4722 ("perl-file-grep" ,perl-file-grep)
4723 ("perl-file-slurper" ,perl-file-slurper)
4724 ("perl-file-which" ,perl-file-which)
4725 ("perl-graph" ,perl-graph)
4726 ("perl-graph-readwrite" ,perl-graph-readwrite)
4727 ("perl-log-log4perl" ,perl-log-log4perl)
4728 ("perl-moose" ,perl-moose)
4729 ("perl-perlio-utf8_strict" ,perl-perlio-utf8_strict)
4730 ("perl-text-csv" ,perl-text-csv)
4731 ("bedtools" ,bedtools)
4732 ("cd-hit" ,cd-hit)
4733 ("blast+" ,blast+)
4734 ("mcl" ,mcl)
4735 ("parallel" ,parallel)
4736 ("prank" ,prank)
4737 ("mafft" ,mafft)
4738 ("fasttree" ,fasttree)
4739 ("grep" ,grep)
4740 ("sed" ,sed)
4741 ("gawk" ,gawk)
4742 ("r-minimal" ,r-minimal)
4743 ("r-ggplot2" ,r-ggplot2)
4744 ("coreutils" ,coreutils)))
4745 (home-page "http://sanger-pathogens.github.io/Roary")
4746 (synopsis "High speed stand-alone pan genome pipeline")
4747 (description
4748 "Roary is a high speed stand alone pan genome pipeline, which takes
4749 annotated assemblies in GFF3 format (produced by the Prokka program) and
4750 calculates the pan genome. Using a standard desktop PC, it can analyse
4751 datasets with thousands of samples, without compromising the quality of the
4752 results. 128 samples can be analysed in under 1 hour using 1 GB of RAM and a
4753 single processor. Roary is not intended for metagenomics or for comparing
4754 extremely diverse sets of genomes.")
4755 (license license:gpl3)))
4756
4757 (define-public raxml
4758 (package
4759 (name "raxml")
4760 (version "8.2.10")
4761 (source
4762 (origin
4763 (method url-fetch)
4764 (uri
4765 (string-append
4766 "https://github.com/stamatak/standard-RAxML/archive/v"
4767 version ".tar.gz"))
4768 (file-name (string-append name "-" version ".tar.gz"))
4769 (sha256
4770 (base32
4771 "13s7aspfdcfr6asynwdg1x6vznys6pzap5f8wsffbnnwpkkg9ya8"))))
4772 (build-system gnu-build-system)
4773 (arguments
4774 `(#:tests? #f ; There are no tests.
4775 ;; Use 'standard' Makefile rather than SSE or AVX ones.
4776 #:make-flags (list "-f" "Makefile.HYBRID.gcc")
4777 #:phases
4778 (modify-phases %standard-phases
4779 (delete 'configure)
4780 (replace 'install
4781 (lambda* (#:key outputs #:allow-other-keys)
4782 (let* ((out (assoc-ref outputs "out"))
4783 (bin (string-append out "/bin"))
4784 (executable "raxmlHPC-HYBRID"))
4785 (install-file executable bin)
4786 (symlink (string-append bin "/" executable) "raxml"))
4787 #t)))))
4788 (inputs
4789 `(("openmpi" ,openmpi)))
4790 (home-page "http://sco.h-its.org/exelixis/web/software/raxml/index.html")
4791 (synopsis "Randomized Axelerated Maximum Likelihood phylogenetic trees")
4792 (description
4793 "RAxML is a tool for phylogenetic analysis and post-analysis of large
4794 phylogenies.")
4795 ;; The source includes x86 specific code
4796 (supported-systems '("x86_64-linux" "i686-linux"))
4797 (license license:gpl2+)))
4798
4799 (define-public rsem
4800 (package
4801 (name "rsem")
4802 (version "1.2.20")
4803 (source
4804 (origin
4805 (method url-fetch)
4806 (uri
4807 (string-append "http://deweylab.biostat.wisc.edu/rsem/src/rsem-"
4808 version ".tar.gz"))
4809 (sha256
4810 (base32 "0nzdc0j0hjllhsd5f2xli95dafm3nawskigs140xzvjk67xh0r9q"))
4811 (patches (search-patches "rsem-makefile.patch"))
4812 (modules '((guix build utils)))
4813 (snippet
4814 '(begin
4815 ;; remove bundled copy of boost
4816 (delete-file-recursively "boost")
4817 #t))))
4818 (build-system gnu-build-system)
4819 (arguments
4820 `(#:tests? #f ;no "check" target
4821 #:phases
4822 (modify-phases %standard-phases
4823 ;; No "configure" script.
4824 ;; Do not build bundled samtools library.
4825 (replace 'configure
4826 (lambda _
4827 (substitute* "Makefile"
4828 (("^all : sam/libbam.a") "all : "))
4829 #t))
4830 (replace 'install
4831 (lambda* (#:key outputs #:allow-other-keys)
4832 (let* ((out (string-append (assoc-ref outputs "out")))
4833 (bin (string-append out "/bin/"))
4834 (perl (string-append out "/lib/perl5/site_perl")))
4835 (mkdir-p bin)
4836 (mkdir-p perl)
4837 (for-each (lambda (file)
4838 (install-file file bin))
4839 (find-files "." "rsem-.*"))
4840 (install-file "rsem_perl_utils.pm" perl))
4841 #t))
4842 (add-after
4843 'install 'wrap-program
4844 (lambda* (#:key outputs #:allow-other-keys)
4845 (let ((out (assoc-ref outputs "out")))
4846 (for-each (lambda (prog)
4847 (wrap-program (string-append out "/bin/" prog)
4848 `("PERL5LIB" ":" prefix
4849 (,(string-append out "/lib/perl5/site_perl")))))
4850 '("rsem-plot-transcript-wiggles"
4851 "rsem-calculate-expression"
4852 "rsem-generate-ngvector"
4853 "rsem-run-ebseq"
4854 "rsem-prepare-reference")))
4855 #t)))))
4856 (inputs
4857 `(("boost" ,boost)
4858 ("ncurses" ,ncurses)
4859 ("r-minimal" ,r-minimal)
4860 ("perl" ,perl)
4861 ("samtools" ,samtools-0.1)
4862 ("zlib" ,zlib)))
4863 (home-page "http://deweylab.biostat.wisc.edu/rsem/")
4864 (synopsis "Estimate gene expression levels from RNA-Seq data")
4865 (description
4866 "RSEM is a software package for estimating gene and isoform expression
4867 levels from RNA-Seq data. The RSEM package provides a user-friendly
4868 interface, supports threads for parallel computation of the EM algorithm,
4869 single-end and paired-end read data, quality scores, variable-length reads and
4870 RSPD estimation. In addition, it provides posterior mean and 95% credibility
4871 interval estimates for expression levels. For visualization, it can generate
4872 BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.")
4873 (license license:gpl3+)))
4874
4875 (define-public rseqc
4876 (package
4877 (name "rseqc")
4878 (version "2.6.1")
4879 (source
4880 (origin
4881 (method url-fetch)
4882 (uri
4883 (string-append "mirror://sourceforge/rseqc/"
4884 "RSeQC-" version ".tar.gz"))
4885 (sha256
4886 (base32 "15ly0254yi032qzkdplg00q144qfdsd986gh62829rl5bkxhj330"))
4887 (modules '((guix build utils)))
4888 (snippet
4889 '(begin
4890 ;; remove bundled copy of pysam
4891 (delete-file-recursively "lib/pysam")
4892 (substitute* "setup.py"
4893 ;; remove dependency on outdated "distribute" module
4894 (("^from distribute_setup import use_setuptools") "")
4895 (("^use_setuptools\\(\\)") "")
4896 ;; do not use bundled copy of pysam
4897 (("^have_pysam = False") "have_pysam = True"))
4898 #t))))
4899 (build-system python-build-system)
4900 (arguments `(#:python ,python-2))
4901 (inputs
4902 `(("python-cython" ,python2-cython)
4903 ("python-pysam" ,python2-pysam)
4904 ("python-numpy" ,python2-numpy)
4905 ("zlib" ,zlib)))
4906 (native-inputs
4907 `(("python-nose" ,python2-nose)))
4908 (home-page "http://rseqc.sourceforge.net/")
4909 (synopsis "RNA-seq quality control package")
4910 (description
4911 "RSeQC provides a number of modules that can comprehensively evaluate
4912 high throughput sequence data, especially RNA-seq data. Some basic modules
4913 inspect sequence quality, nucleotide composition bias, PCR bias and GC bias,
4914 while RNA-seq specific modules evaluate sequencing saturation, mapped reads
4915 distribution, coverage uniformity, strand specificity, etc.")
4916 (license license:gpl3+)))
4917
4918 (define-public seek
4919 ;; There are no release tarballs. According to the installation
4920 ;; instructions at http://seek.princeton.edu/installation.jsp, the latest
4921 ;; stable release is identified by this changeset ID.
4922 (let ((changeset "2329130")
4923 (revision "1"))
4924 (package
4925 (name "seek")
4926 (version (string-append "0-" revision "." changeset))
4927 (source (origin
4928 (method hg-fetch)
4929 (uri (hg-reference
4930 (url "https://bitbucket.org/libsleipnir/sleipnir")
4931 (changeset changeset)))
4932 (file-name (string-append name "-" version "-checkout"))
4933 (sha256
4934 (base32
4935 "0qrvilwh18dpbhkf92qvxbmay0j75ra3jg2wrhz67gf538zzphsx"))))
4936 (build-system gnu-build-system)
4937 (arguments
4938 `(#:modules ((srfi srfi-1)
4939 (guix build gnu-build-system)
4940 (guix build utils))
4941 #:phases
4942 (let ((dirs '("SeekMiner"
4943 "SeekEvaluator"
4944 "SeekPrep"
4945 "Distancer"
4946 "Data2DB"
4947 "PCL2Bin")))
4948 (modify-phases %standard-phases
4949 (replace 'bootstrap
4950 (lambda _
4951 (invoke "bash" "gen_auto")))
4952 (add-after 'build 'build-additional-tools
4953 (lambda* (#:key make-flags #:allow-other-keys)
4954 (every (lambda (dir)
4955 (with-directory-excursion (string-append "tools/" dir)
4956 (zero? (apply system* "make" make-flags))))
4957 dirs)))
4958 (add-after 'install 'install-additional-tools
4959 (lambda* (#:key make-flags #:allow-other-keys)
4960 (fold (lambda (dir result)
4961 (with-directory-excursion (string-append "tools/" dir)
4962 (and result
4963 (zero? (apply system*
4964 `("make" ,@make-flags "install"))))))
4965 #t dirs)))))))
4966 (inputs
4967 `(("gsl" ,gsl)
4968 ("boost" ,boost)
4969 ("libsvm" ,libsvm)
4970 ("readline" ,readline)
4971 ("gengetopt" ,gengetopt)
4972 ("log4cpp" ,log4cpp)))
4973 (native-inputs
4974 `(("autoconf" ,autoconf)
4975 ("automake" ,automake)
4976 ("perl" ,perl)))
4977 (home-page "http://seek.princeton.edu")
4978 (synopsis "Gene co-expression search engine")
4979 (description
4980 "SEEK is a computational gene co-expression search engine. SEEK provides
4981 biologists with a way to navigate the massive human expression compendium that
4982 now contains thousands of expression datasets. SEEK returns a robust ranking
4983 of co-expressed genes in the biological area of interest defined by the user's
4984 query genes. It also prioritizes thousands of expression datasets according
4985 to the user's query of interest.")
4986 (license license:cc-by3.0))))
4987
4988 (define-public samtools
4989 (package
4990 (name "samtools")
4991 (version "1.8")
4992 (source
4993 (origin
4994 (method url-fetch)
4995 (uri
4996 (string-append "mirror://sourceforge/samtools/samtools/"
4997 version "/samtools-" version ".tar.bz2"))
4998 (sha256
4999 (base32
5000 "05myg7bs90i68qbqab9cdg9rqj2xh39azibrx82ipzc5kcfvqhn9"))))
5001 (build-system gnu-build-system)
5002 (arguments
5003 `(#:modules ((ice-9 ftw)
5004 (ice-9 regex)
5005 (guix build gnu-build-system)
5006 (guix build utils))
5007 #:make-flags (list (string-append "prefix=" (assoc-ref %outputs "out")))
5008 #:configure-flags (list "--with-ncurses" "--with-htslib=system")
5009 #:phases
5010 (modify-phases %standard-phases
5011 (add-after 'unpack 'patch-tests
5012 (lambda _
5013 (substitute* "test/test.pl"
5014 ;; The test script calls out to /bin/bash
5015 (("/bin/bash") (which "bash")))
5016 #t))
5017 (add-after 'install 'install-library
5018 (lambda* (#:key outputs #:allow-other-keys)
5019 (let ((lib (string-append (assoc-ref outputs "out") "/lib")))
5020 (install-file "libbam.a" lib)
5021 #t)))
5022 (add-after 'install 'install-headers
5023 (lambda* (#:key outputs #:allow-other-keys)
5024 (let ((include (string-append (assoc-ref outputs "out")
5025 "/include/samtools/")))
5026 (for-each (lambda (file)
5027 (install-file file include))
5028 (scandir "." (lambda (name) (string-match "\\.h$" name))))
5029 #t))))))
5030 (native-inputs `(("pkg-config" ,pkg-config)))
5031 (inputs
5032 `(("htslib" ,htslib)
5033 ("ncurses" ,ncurses)
5034 ("perl" ,perl)
5035 ("python" ,python)
5036 ("zlib" ,zlib)))
5037 (home-page "http://samtools.sourceforge.net")
5038 (synopsis "Utilities to efficiently manipulate nucleotide sequence alignments")
5039 (description
5040 "Samtools implements various utilities for post-processing nucleotide
5041 sequence alignments in the SAM, BAM, and CRAM formats, including indexing,
5042 variant calling (in conjunction with bcftools), and a simple alignment
5043 viewer.")
5044 (license license:expat)))
5045
5046 (define-public samtools-0.1
5047 ;; This is the most recent version of the 0.1 line of samtools. The input
5048 ;; and output formats differ greatly from that used and produced by samtools
5049 ;; 1.x and is still used in many bioinformatics pipelines.
5050 (package (inherit samtools)
5051 (version "0.1.19")
5052 (source
5053 (origin
5054 (method url-fetch)
5055 (uri
5056 (string-append "mirror://sourceforge/samtools/samtools/"
5057 version "/samtools-" version ".tar.bz2"))
5058 (sha256
5059 (base32 "1m33xsfwz0s8qi45lylagfllqg7fphf4dr0780rsvw75av9wk06h"))))
5060 (arguments
5061 `(#:tests? #f ;no "check" target
5062 ,@(substitute-keyword-arguments (package-arguments samtools)
5063 ((#:make-flags flags)
5064 `(cons "LIBCURSES=-lncurses" ,flags))
5065 ((#:phases phases)
5066 `(modify-phases ,phases
5067 (replace 'install
5068 (lambda* (#:key outputs #:allow-other-keys)
5069 (let ((bin (string-append
5070 (assoc-ref outputs "out") "/bin")))
5071 (mkdir-p bin)
5072 (install-file "samtools" bin)
5073 #t)))
5074 (delete 'patch-tests)
5075 (delete 'configure))))))))
5076
5077 (define-public mosaik
5078 (let ((commit "5c25216d3522d6a33e53875cd76a6d65001e4e67"))
5079 (package
5080 (name "mosaik")
5081 (version "2.2.30")
5082 (source (origin
5083 ;; There are no release tarballs nor tags.
5084 (method git-fetch)
5085 (uri (git-reference
5086 (url "https://github.com/wanpinglee/MOSAIK.git")
5087 (commit commit)))
5088 (file-name (string-append name "-" version))
5089 (sha256
5090 (base32
5091 "17gj3s07cm77r41z92awh0bim7w7q7fbn0sf5nkqmcm1vw052qgw"))))
5092 (build-system gnu-build-system)
5093 (arguments
5094 `(#:tests? #f ; no tests
5095 #:make-flags (list "CC=gcc")
5096 #:phases
5097 (modify-phases %standard-phases
5098 (replace 'configure
5099 (lambda _ (chdir "src") #t))
5100 (replace 'install
5101 (lambda* (#:key outputs #:allow-other-keys)
5102 (let ((bin (string-append (assoc-ref outputs "out")
5103 "/bin")))
5104 (mkdir-p bin)
5105 (copy-recursively "../bin" bin)
5106 #t))))))
5107 (inputs
5108 `(("perl" ,perl)
5109 ("zlib" ,zlib)))
5110 (supported-systems '("x86_64-linux"))
5111 (home-page "https://github.com/wanpinglee/MOSAIK")
5112 (synopsis "Map nucleotide sequence reads to reference genomes")
5113 (description
5114 "MOSAIK is a program for mapping second and third-generation sequencing
5115 reads to a reference genome. MOSAIK can align reads generated by all the
5116 major sequencing technologies, including Illumina, Applied Biosystems SOLiD,
5117 Roche 454, Ion Torrent and Pacific BioSciences SMRT.")
5118 ;; MOSAIK is released under the GPLv2+ with the exception of third-party
5119 ;; code released into the public domain:
5120 ;; 1. fastlz by Ariya Hidayat - http://www.fastlz.org/
5121 ;; 2. MD5 implementation - RSA Data Security, RFC 1321
5122 (license (list license:gpl2+ license:public-domain)))))
5123
5124 (define-public ngs-sdk
5125 (package
5126 (name "ngs-sdk")
5127 (version "1.3.0")
5128 (source
5129 (origin
5130 (method url-fetch)
5131 (uri
5132 (string-append "https://github.com/ncbi/ngs/archive/"
5133 version ".tar.gz"))
5134 (file-name (string-append name "-" version ".tar.gz"))
5135 (sha256
5136 (base32
5137 "1wiyf4c6nm2j87pv015cbi0qny5byf3pbvcw3likifz5dl56ag40"))))
5138 (build-system gnu-build-system)
5139 (arguments
5140 `(#:parallel-build? #f ; not supported
5141 #:tests? #f ; no "check" target
5142 #:phases
5143 (modify-phases %standard-phases
5144 (replace 'configure
5145 (lambda* (#:key outputs #:allow-other-keys)
5146 (let ((out (assoc-ref outputs "out")))
5147 ;; Allow 'konfigure.perl' to find 'package.prl'.
5148 (setenv "PERL5LIB"
5149 (string-append ".:" (getenv "PERL5LIB")))
5150
5151 ;; The 'configure' script doesn't recognize things like
5152 ;; '--enable-fast-install'.
5153 (zero? (system* "./configure"
5154 (string-append "--build-prefix=" (getcwd) "/build")
5155 (string-append "--prefix=" out))))))
5156 (add-after 'unpack 'enter-dir
5157 (lambda _ (chdir "ngs-sdk") #t)))))
5158 (native-inputs `(("perl" ,perl)))
5159 ;; According to the test
5160 ;; unless ($MARCH =~ /x86_64/i || $MARCH =~ /i?86/i)
5161 ;; in ngs-sdk/setup/konfigure.perl
5162 (supported-systems '("i686-linux" "x86_64-linux"))
5163 (home-page "https://github.com/ncbi/ngs")
5164 (synopsis "API for accessing Next Generation Sequencing data")
5165 (description
5166 "NGS is a domain-specific API for accessing reads, alignments and pileups
5167 produced from Next Generation Sequencing. The API itself is independent from
5168 any particular back-end implementation, and supports use of multiple back-ends
5169 simultaneously.")
5170 (license license:public-domain)))
5171
5172 (define-public java-ngs
5173 (package (inherit ngs-sdk)
5174 (name "java-ngs")
5175 (arguments
5176 `(,@(substitute-keyword-arguments
5177 `(#:modules ((guix build gnu-build-system)
5178 (guix build utils)
5179 (srfi srfi-1)
5180 (srfi srfi-26))
5181 ,@(package-arguments ngs-sdk))
5182 ((#:phases phases)
5183 `(modify-phases ,phases
5184 (replace 'enter-dir (lambda _ (chdir "ngs-java") #t)))))))
5185 (inputs
5186 `(("jdk" ,icedtea "jdk")
5187 ("ngs-sdk" ,ngs-sdk)))
5188 (synopsis "Java bindings for NGS SDK")))
5189
5190 (define-public ncbi-vdb
5191 (package
5192 (name "ncbi-vdb")
5193 (version "2.8.2")
5194 (source
5195 (origin
5196 (method url-fetch)
5197 (uri
5198 (string-append "https://github.com/ncbi/ncbi-vdb/archive/"
5199 version ".tar.gz"))
5200 (file-name (string-append name "-" version ".tar.gz"))
5201 (sha256
5202 (base32
5203 "1acn4bv81mfl137qnbn9995mjjhwd36pm0b7qli1iw5skrxa9j8m"))))
5204 (build-system gnu-build-system)
5205 (arguments
5206 `(#:parallel-build? #f ; not supported
5207 #:tests? #f ; no "check" target
5208 #:phases
5209 (modify-phases %standard-phases
5210 (add-before 'configure 'set-perl-search-path
5211 (lambda _
5212 ;; Work around "dotless @INC" build failure.
5213 (setenv "PERL5LIB"
5214 (string-append (getcwd) "/setup:"
5215 (getenv "PERL5LIB")))
5216 #t))
5217 (replace 'configure
5218 (lambda* (#:key inputs outputs #:allow-other-keys)
5219 (let ((out (assoc-ref outputs "out")))
5220 ;; Override include path for libmagic
5221 (substitute* "setup/package.prl"
5222 (("name => 'magic', Include => '/usr/include'")
5223 (string-append "name=> 'magic', Include => '"
5224 (assoc-ref inputs "libmagic")
5225 "/include" "'")))
5226
5227 ;; Install kdf5 library (needed by sra-tools)
5228 (substitute* "build/Makefile.install"
5229 (("LIBRARIES_TO_INSTALL =")
5230 "LIBRARIES_TO_INSTALL = kdf5.$(VERSION_LIBX) kdf5.$(VERSION_SHLX)"))
5231
5232 (substitute* "build/Makefile.env"
5233 (("CFLAGS =" prefix)
5234 (string-append prefix "-msse2 ")))
5235
5236 ;; Override search path for ngs-java
5237 (substitute* "setup/package.prl"
5238 (("/usr/local/ngs/ngs-java")
5239 (assoc-ref inputs "java-ngs")))
5240
5241 ;; The 'configure' script doesn't recognize things like
5242 ;; '--enable-fast-install'.
5243 (zero? (system*
5244 "./configure"
5245 (string-append "--build-prefix=" (getcwd) "/build")
5246 (string-append "--prefix=" (assoc-ref outputs "out"))
5247 (string-append "--debug")
5248 (string-append "--with-xml2-prefix="
5249 (assoc-ref inputs "libxml2"))
5250 (string-append "--with-ngs-sdk-prefix="
5251 (assoc-ref inputs "ngs-sdk"))
5252 (string-append "--with-hdf5-prefix="
5253 (assoc-ref inputs "hdf5")))))))
5254 (add-after 'install 'install-interfaces
5255 (lambda* (#:key outputs #:allow-other-keys)
5256 ;; Install interface libraries. On i686 the interface libraries
5257 ;; are installed to "linux/gcc/i386", so we need to use the Linux
5258 ;; architecture name ("i386") instead of the target system prefix
5259 ;; ("i686").
5260 (mkdir (string-append (assoc-ref outputs "out") "/ilib"))
5261 (copy-recursively (string-append "build/ncbi-vdb/linux/gcc/"
5262 ,(system->linux-architecture
5263 (or (%current-target-system)
5264 (%current-system)))
5265 "/rel/ilib")
5266 (string-append (assoc-ref outputs "out")
5267 "/ilib"))
5268 ;; Install interface headers
5269 (copy-recursively "interfaces"
5270 (string-append (assoc-ref outputs "out")
5271 "/include"))
5272 #t))
5273 ;; These files are needed by sra-tools.
5274 (add-after 'install 'install-configuration-files
5275 (lambda* (#:key outputs #:allow-other-keys)
5276 (let ((target (string-append (assoc-ref outputs "out") "/kfg")))
5277 (mkdir target)
5278 (install-file "libs/kfg/default.kfg" target)
5279 (install-file "libs/kfg/certs.kfg" target))
5280 #t)))))
5281 (inputs
5282 `(("libxml2" ,libxml2)
5283 ("ngs-sdk" ,ngs-sdk)
5284 ("java-ngs" ,java-ngs)
5285 ("libmagic" ,file)
5286 ("hdf5" ,hdf5)))
5287 (native-inputs `(("perl" ,perl)))
5288 ;; NCBI-VDB requires SSE capability.
5289 (supported-systems '("i686-linux" "x86_64-linux"))
5290 (home-page "https://github.com/ncbi/ncbi-vdb")
5291 (synopsis "Database engine for genetic information")
5292 (description
5293 "The NCBI-VDB library implements a highly compressed columnar data
5294 warehousing engine that is most often used to store genetic information.
5295 Databases are stored in a portable image within the file system, and can be
5296 accessed/downloaded on demand across HTTP.")
5297 (license license:public-domain)))
5298
5299 (define-public plink
5300 (package
5301 (name "plink")
5302 (version "1.07")
5303 (source
5304 (origin
5305 (method url-fetch)
5306 (uri (string-append
5307 "http://pngu.mgh.harvard.edu/~purcell/plink/dist/plink-"
5308 version "-src.zip"))
5309 (sha256
5310 (base32 "0as8gxm4pjyc8dxmm1sl873rrd7wn5qs0l29nqfnl31x8i467xaa"))
5311 (patches (search-patches "plink-1.07-unclobber-i.patch"
5312 "plink-endian-detection.patch"))))
5313 (build-system gnu-build-system)
5314 (arguments
5315 '(#:tests? #f ;no "check" target
5316 #:make-flags (list (string-append "LIB_LAPACK="
5317 (assoc-ref %build-inputs "lapack")
5318 "/lib/liblapack.so")
5319 "WITH_LAPACK=1"
5320 "FORCE_DYNAMIC=1"
5321 ;; disable phoning home
5322 "WITH_WEBCHECK=")
5323 #:phases
5324 (modify-phases %standard-phases
5325 ;; no "configure" script
5326 (delete 'configure)
5327 (replace 'install
5328 (lambda* (#:key outputs #:allow-other-keys)
5329 (let ((bin (string-append (assoc-ref outputs "out")
5330 "/bin/")))
5331 (install-file "plink" bin)
5332 #t))))))
5333 (inputs
5334 `(("zlib" ,zlib)
5335 ("lapack" ,lapack)))
5336 (native-inputs
5337 `(("unzip" ,unzip)))
5338 (home-page "http://pngu.mgh.harvard.edu/~purcell/plink/")
5339 (synopsis "Whole genome association analysis toolset")
5340 (description
5341 "PLINK is a whole genome association analysis toolset, designed to
5342 perform a range of basic, large-scale analyses in a computationally efficient
5343 manner. The focus of PLINK is purely on analysis of genotype/phenotype data,
5344 so there is no support for steps prior to this (e.g. study design and
5345 planning, generating genotype or CNV calls from raw data). Through
5346 integration with gPLINK and Haploview, there is some support for the
5347 subsequent visualization, annotation and storage of results.")
5348 ;; Code is released under GPLv2, except for fisher.h, which is under
5349 ;; LGPLv2.1+
5350 (license (list license:gpl2 license:lgpl2.1+))))
5351
5352 (define-public plink-ng
5353 (package (inherit plink)
5354 (name "plink-ng")
5355 (version "1.90b4")
5356 (source
5357 (origin
5358 (method url-fetch)
5359 (uri (string-append "https://github.com/chrchang/plink-ng/archive/v"
5360 version ".tar.gz"))
5361 (file-name (string-append name "-" version ".tar.gz"))
5362 (sha256
5363 (base32 "09ixrds009aczjswxr2alcb774mksq5g0v78dgjjn1h4dky0kf9a"))))
5364 (build-system gnu-build-system)
5365 (arguments
5366 '(#:tests? #f ;no "check" target
5367 #:make-flags (list "BLASFLAGS=-llapack -lopenblas"
5368 "CFLAGS=-Wall -O2 -DDYNAMIC_ZLIB=1"
5369 "ZLIB=-lz"
5370 "-f" "Makefile.std")
5371 #:phases
5372 (modify-phases %standard-phases
5373 (add-after 'unpack 'chdir
5374 (lambda _ (chdir "1.9") #t))
5375 (delete 'configure) ; no "configure" script
5376 (replace 'install
5377 (lambda* (#:key outputs #:allow-other-keys)
5378 (let ((bin (string-append (assoc-ref outputs "out")
5379 "/bin/")))
5380 (install-file "plink" bin)
5381 #t))))))
5382 (inputs
5383 `(("zlib" ,zlib)
5384 ("lapack" ,lapack)
5385 ("openblas" ,openblas)))
5386 (home-page "https://www.cog-genomics.org/plink/")
5387 (license license:gpl3+)))
5388
5389 (define-public smithlab-cpp
5390 (let ((revision "1")
5391 (commit "728a097bec88c6f4b8528b685932049e660eff2e"))
5392 (package
5393 (name "smithlab-cpp")
5394 (version (string-append "0." revision "." (string-take commit 7)))
5395 (source (origin
5396 (method git-fetch)
5397 (uri (git-reference
5398 (url "https://github.com/smithlabcode/smithlab_cpp.git")
5399 (commit commit)))
5400 (file-name (string-append name "-" version "-checkout"))
5401 (sha256
5402 (base32
5403 "0d476lmj312xk77kr9fzrv7z1bv96yfyx0w7y62ycmnfbx32ll74"))))
5404 (build-system gnu-build-system)
5405 (arguments
5406 `(#:modules ((guix build gnu-build-system)
5407 (guix build utils)
5408 (srfi srfi-26))
5409 #:tests? #f ;no "check" target
5410 #:phases
5411 (modify-phases %standard-phases
5412 (add-after 'unpack 'use-samtools-headers
5413 (lambda _
5414 (substitute* '("SAM.cpp"
5415 "SAM.hpp")
5416 (("sam.h") "samtools/sam.h"))
5417 #t))
5418 (replace 'install
5419 (lambda* (#:key outputs #:allow-other-keys)
5420 (let* ((out (assoc-ref outputs "out"))
5421 (lib (string-append out "/lib"))
5422 (include (string-append out "/include/smithlab-cpp")))
5423 (mkdir-p lib)
5424 (mkdir-p include)
5425 (for-each (cut install-file <> lib)
5426 (find-files "." "\\.o$"))
5427 (for-each (cut install-file <> include)
5428 (find-files "." "\\.hpp$")))
5429 #t))
5430 (delete 'configure))))
5431 (inputs
5432 `(("samtools" ,samtools-0.1)
5433 ("zlib" ,zlib)))
5434 (home-page "https://github.com/smithlabcode/smithlab_cpp")
5435 (synopsis "C++ helper library for functions used in Smith lab projects")
5436 (description
5437 "Smithlab CPP is a C++ library that includes functions used in many of
5438 the Smith lab bioinformatics projects, such as a wrapper around Samtools data
5439 structures, classes for genomic regions, mapped sequencing reads, etc.")
5440 (license license:gpl3+))))
5441
5442 (define-public preseq
5443 (package
5444 (name "preseq")
5445 (version "2.0")
5446 (source (origin
5447 (method url-fetch)
5448 (uri (string-append "https://github.com/smithlabcode/"
5449 "preseq/archive/v" version ".tar.gz"))
5450 (file-name (string-append name "-" version ".tar.gz"))
5451 (sha256
5452 (base32 "08r684l50pnxjpvmhzjgqq56yv9rfw90k8vx0nsrnrzk8mf9hsdq"))
5453 (modules '((guix build utils)))
5454 (snippet '(begin
5455 ;; Remove bundled samtools.
5456 (delete-file-recursively "samtools")
5457 #t))))
5458 (build-system gnu-build-system)
5459 (arguments
5460 `(#:tests? #f ;no "check" target
5461 #:phases
5462 (modify-phases %standard-phases
5463 (delete 'configure))
5464 #:make-flags
5465 (list (string-append "PREFIX="
5466 (assoc-ref %outputs "out"))
5467 (string-append "LIBBAM="
5468 (assoc-ref %build-inputs "samtools")
5469 "/lib/libbam.a")
5470 (string-append "SMITHLAB_CPP="
5471 (assoc-ref %build-inputs "smithlab-cpp")
5472 "/lib")
5473 "PROGS=preseq"
5474 "INCLUDEDIRS=$(SMITHLAB_CPP)/../include/smithlab-cpp $(SAMTOOLS_DIR)")))
5475 (inputs
5476 `(("gsl" ,gsl)
5477 ("samtools" ,samtools-0.1)
5478 ("smithlab-cpp" ,smithlab-cpp)
5479 ("zlib" ,zlib)))
5480 (home-page "http://smithlabresearch.org/software/preseq/")
5481 (synopsis "Program for analyzing library complexity")
5482 (description
5483 "The preseq package is aimed at predicting and estimating the complexity
5484 of a genomic sequencing library, equivalent to predicting and estimating the
5485 number of redundant reads from a given sequencing depth and how many will be
5486 expected from additional sequencing using an initial sequencing experiment.
5487 The estimates can then be used to examine the utility of further sequencing,
5488 optimize the sequencing depth, or to screen multiple libraries to avoid low
5489 complexity samples.")
5490 (license license:gpl3+)))
5491
5492 (define-public python-screed
5493 (package
5494 (name "python-screed")
5495 (version "0.9")
5496 (source
5497 (origin
5498 (method url-fetch)
5499 (uri (pypi-uri "screed" version))
5500 (sha256
5501 (base32
5502 "18czszp9fkx3j6jr7y5kp6dfialscgddk05mw1zkhh2zhn0jd8i0"))))
5503 (build-system python-build-system)
5504 (arguments
5505 `(#:phases
5506 (modify-phases %standard-phases
5507 (replace 'check
5508 (lambda _
5509 (setenv "PYTHONPATH"
5510 (string-append (getenv "PYTHONPATH") ":."))
5511 (zero? (system* "nosetests" "--attr" "!known_failing")))))))
5512 (native-inputs
5513 `(("python-nose" ,python-nose)))
5514 (inputs
5515 `(("python-bz2file" ,python-bz2file)))
5516 (home-page "https://github.com/dib-lab/screed/")
5517 (synopsis "Short read sequence database utilities")
5518 (description "Screed parses FASTA and FASTQ files and generates databases.
5519 Values such as sequence name, sequence description, sequence quality and the
5520 sequence itself can be retrieved from these databases.")
5521 (license license:bsd-3)))
5522
5523 (define-public python2-screed
5524 (package-with-python2 python-screed))
5525
5526 (define-public sra-tools
5527 (package
5528 (name "sra-tools")
5529 (version "2.8.2-1")
5530 (source
5531 (origin
5532 (method url-fetch)
5533 (uri
5534 (string-append "https://github.com/ncbi/sra-tools/archive/"
5535 version ".tar.gz"))
5536 (file-name (string-append name "-" version ".tar.gz"))
5537 (sha256
5538 (base32
5539 "1camsijmvv2s45mb4iyf44ghl4gkd4rl0viphpcgl3ccchy32a0g"))))
5540 (build-system gnu-build-system)
5541 (arguments
5542 `(#:parallel-build? #f ; not supported
5543 #:tests? #f ; no "check" target
5544 #:make-flags
5545 (list (string-append "DEFAULT_CRT="
5546 (assoc-ref %build-inputs "ncbi-vdb")
5547 "/kfg/certs.kfg")
5548 (string-append "DEFAULT_KFG="
5549 (assoc-ref %build-inputs "ncbi-vdb")
5550 "/kfg/default.kfg")
5551 (string-append "VDB_LIBDIR="
5552 (assoc-ref %build-inputs "ncbi-vdb")
5553 ,(if (string-prefix? "x86_64"
5554 (or (%current-target-system)
5555 (%current-system)))
5556 "/lib64"
5557 "/lib32")))
5558 #:phases
5559 (modify-phases %standard-phases
5560 (add-before 'configure 'set-perl-search-path
5561 (lambda _
5562 ;; Work around "dotless @INC" build failure.
5563 (setenv "PERL5LIB"
5564 (string-append (getcwd) "/setup:"
5565 (getenv "PERL5LIB")))
5566 #t))
5567 (replace 'configure
5568 (lambda* (#:key inputs outputs #:allow-other-keys)
5569 ;; The build system expects a directory containing the sources and
5570 ;; raw build output of ncbi-vdb, including files that are not
5571 ;; installed. Since we are building against an installed version of
5572 ;; ncbi-vdb, the following modifications are needed.
5573 (substitute* "setup/konfigure.perl"
5574 ;; Make the configure script look for the "ilib" directory of
5575 ;; "ncbi-vdb" without first checking for the existence of a
5576 ;; matching library in its "lib" directory.
5577 (("^ my \\$f = File::Spec->catdir\\(\\$libdir, \\$lib\\);")
5578 "my $f = File::Spec->catdir($ilibdir, $ilib);")
5579 ;; Look for interface libraries in ncbi-vdb's "ilib" directory.
5580 (("my \\$ilibdir = File::Spec->catdir\\(\\$builddir, 'ilib'\\);")
5581 "my $ilibdir = File::Spec->catdir($dir, 'ilib');"))
5582
5583 ;; Dynamic linking
5584 (substitute* "tools/copycat/Makefile"
5585 (("smagic-static") "lmagic"))
5586
5587 ;; The 'configure' script doesn't recognize things like
5588 ;; '--enable-fast-install'.
5589 (zero? (system*
5590 "./configure"
5591 (string-append "--build-prefix=" (getcwd) "/build")
5592 (string-append "--prefix=" (assoc-ref outputs "out"))
5593 (string-append "--debug")
5594 (string-append "--with-fuse-prefix="
5595 (assoc-ref inputs "fuse"))
5596 (string-append "--with-magic-prefix="
5597 (assoc-ref inputs "libmagic"))
5598 ;; TODO: building with libxml2 fails with linker errors
5599 ;; (string-append "--with-xml2-prefix="
5600 ;; (assoc-ref inputs "libxml2"))
5601 (string-append "--with-ncbi-vdb-sources="
5602 (assoc-ref inputs "ncbi-vdb"))
5603 (string-append "--with-ncbi-vdb-build="
5604 (assoc-ref inputs "ncbi-vdb"))
5605 (string-append "--with-ngs-sdk-prefix="
5606 (assoc-ref inputs "ngs-sdk"))
5607 (string-append "--with-hdf5-prefix="
5608 (assoc-ref inputs "hdf5"))))))
5609 ;; This version of sra-tools fails to build with glibc because of a
5610 ;; naming conflict. glibc-2.25/include/bits/mathcalls.h already
5611 ;; contains a definition of "canonicalize", so we rename it.
5612 ;;
5613 ;; See upstream bug report:
5614 ;; https://github.com/ncbi/sra-tools/issues/67
5615 (add-after 'unpack 'patch-away-glibc-conflict
5616 (lambda _
5617 (substitute* "tools/bam-loader/bam.c"
5618 (("canonicalize\\(" line)
5619 (string-append "sra_tools_" line)))
5620 #t)))))
5621 (native-inputs `(("perl" ,perl)))
5622 (inputs
5623 `(("ngs-sdk" ,ngs-sdk)
5624 ("ncbi-vdb" ,ncbi-vdb)
5625 ("libmagic" ,file)
5626 ("fuse" ,fuse)
5627 ("hdf5" ,hdf5)
5628 ("zlib" ,zlib)))
5629 (home-page "http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software")
5630 (synopsis "Tools and libraries for reading and writing sequencing data")
5631 (description
5632 "The SRA Toolkit from NCBI is a collection of tools and libraries for
5633 reading of sequencing files from the Sequence Read Archive (SRA) database and
5634 writing files into the .sra format.")
5635 (license license:public-domain)))
5636
5637 (define-public seqan
5638 (package
5639 (name "seqan")
5640 (version "1.4.2")
5641 (source (origin
5642 (method url-fetch)
5643 (uri (string-append "http://packages.seqan.de/seqan-library/"
5644 "seqan-library-" version ".tar.bz2"))
5645 (sha256
5646 (base32
5647 "05s3wrrwn50f81aklfm65i4a749zag1vr8z03k21xm0pdxy47yvp"))))
5648 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
5649 ;; makes sense to split the outputs.
5650 (outputs '("out" "doc"))
5651 (build-system trivial-build-system)
5652 (arguments
5653 `(#:modules ((guix build utils))
5654 #:builder
5655 (begin
5656 (use-modules (guix build utils))
5657 (let ((tar (assoc-ref %build-inputs "tar"))
5658 (bzip (assoc-ref %build-inputs "bzip2"))
5659 (out (assoc-ref %outputs "out"))
5660 (doc (assoc-ref %outputs "doc")))
5661 (setenv "PATH" (string-append tar "/bin:" bzip "/bin"))
5662 (invoke "tar" "xvf" (assoc-ref %build-inputs "source"))
5663 (chdir (string-append "seqan-library-" ,version))
5664 (copy-recursively "include" (string-append out "/include"))
5665 (copy-recursively "share" (string-append doc "/share"))
5666 #t))))
5667 (native-inputs
5668 `(("source" ,source)
5669 ("tar" ,tar)
5670 ("bzip2" ,bzip2)))
5671 (home-page "http://www.seqan.de")
5672 (synopsis "Library for nucleotide sequence analysis")
5673 (description
5674 "SeqAn is a C++ library of efficient algorithms and data structures for
5675 the analysis of sequences with the focus on biological data. It contains
5676 algorithms and data structures for string representation and their
5677 manipulation, online and indexed string search, efficient I/O of
5678 bioinformatics file formats, sequence alignment, and more.")
5679 (license license:bsd-3)))
5680
5681 (define-public seqmagick
5682 (package
5683 (name "seqmagick")
5684 (version "0.7.0")
5685 (source
5686 (origin
5687 (method url-fetch)
5688 (uri (pypi-uri "seqmagick" version))
5689 (sha256
5690 (base32
5691 "12bfyp8nqi0hd36rmj450aygafp01qy3hkbvlwn3bk39pyjjkgg5"))))
5692 (build-system python-build-system)
5693 (inputs
5694 `(("python-biopython" ,python-biopython)))
5695 (native-inputs
5696 `(("python-nose" ,python-nose)))
5697 (home-page "https://github.com/fhcrc/seqmagick")
5698 (synopsis "Tools for converting and modifying sequence files")
5699 (description
5700 "Bioinformaticians often have to convert sequence files between formats
5701 and do little manipulations on them, and it's not worth writing scripts for
5702 that. Seqmagick is a utility to expose the file format conversion in
5703 BioPython in a convenient way. Instead of having a big mess of scripts, there
5704 is one that takes arguments.")
5705 (license license:gpl3)))
5706
5707 (define-public seqtk
5708 (package
5709 (name "seqtk")
5710 (version "1.2")
5711 (source (origin
5712 (method url-fetch)
5713 (uri (string-append
5714 "https://github.com/lh3/seqtk/archive/v"
5715 version ".tar.gz"))
5716 (file-name (string-append name "-" version ".tar.gz"))
5717 (sha256
5718 (base32
5719 "0ywdyzpmfiz2wp6ampbzqg4y8bj450nfgqarpamg045b8mk32lxx"))
5720 (modules '((guix build utils)))
5721 (snippet
5722 '(begin
5723 ;; Remove extraneous header files, as is done in the seqtk
5724 ;; master branch.
5725 (for-each (lambda (file) (delete-file file))
5726 (list "ksort.h" "kstring.h" "kvec.h"))
5727 #t))))
5728 (build-system gnu-build-system)
5729 (arguments
5730 `(#:phases
5731 (modify-phases %standard-phases
5732 (delete 'configure)
5733 (replace 'check
5734 ;; There are no tests, so we just run a sanity check.
5735 (lambda _ (zero? (system* "./seqtk" "seq"))))
5736 (replace 'install
5737 (lambda* (#:key outputs #:allow-other-keys)
5738 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
5739 (install-file "seqtk" bin)))))))
5740 (inputs
5741 `(("zlib" ,zlib)))
5742 (home-page "https://github.com/lh3/seqtk")
5743 (synopsis "Toolkit for processing biological sequences in FASTA/Q format")
5744 (description
5745 "Seqtk is a fast and lightweight tool for processing sequences in the
5746 FASTA or FASTQ format. It parses both FASTA and FASTQ files which can be
5747 optionally compressed by gzip.")
5748 (license license:expat)))
5749
5750 (define-public snap-aligner
5751 (package
5752 (name "snap-aligner")
5753 (version "1.0beta.18")
5754 (source (origin
5755 (method url-fetch)
5756 (uri (string-append
5757 "https://github.com/amplab/snap/archive/v"
5758 version ".tar.gz"))
5759 (file-name (string-append name "-" version ".tar.gz"))
5760 (sha256
5761 (base32
5762 "1vnsjwv007k1fl1q7d681kbwn6bc66cgw6h16hym6gvyy71qv2ly"))))
5763 (build-system gnu-build-system)
5764 (arguments
5765 '(#:phases
5766 (modify-phases %standard-phases
5767 (delete 'configure)
5768 (replace 'check (lambda _ (zero? (system* "./unit_tests"))))
5769 (replace 'install
5770 (lambda* (#:key outputs #:allow-other-keys)
5771 (let* ((out (assoc-ref outputs "out"))
5772 (bin (string-append out "/bin")))
5773 (install-file "snap-aligner" bin)
5774 (install-file "SNAPCommand" bin)
5775 #t))))))
5776 (native-inputs
5777 `(("zlib" ,zlib)))
5778 (home-page "http://snap.cs.berkeley.edu/")
5779 (synopsis "Short read DNA sequence aligner")
5780 (description
5781 "SNAP is a fast and accurate aligner for short DNA reads. It is
5782 optimized for modern read lengths of 100 bases or higher, and takes advantage
5783 of these reads to align data quickly through a hash-based indexing scheme.")
5784 ;; 32-bit systems are not supported by the unpatched code.
5785 ;; Following the bug reports https://github.com/amplab/snap/issues/68 and
5786 ;; https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=812378 we see that
5787 ;; systems without a lot of memory cannot make good use of this program.
5788 (supported-systems '("x86_64-linux"))
5789 (license license:asl2.0)))
5790
5791 (define-public sortmerna
5792 (package
5793 (name "sortmerna")
5794 (version "2.1b")
5795 (source
5796 (origin
5797 (method url-fetch)
5798 (uri (string-append
5799 "https://github.com/biocore/sortmerna/archive/"
5800 version ".tar.gz"))
5801 (file-name (string-append name "-" version ".tar.gz"))
5802 (sha256
5803 (base32
5804 "1ghaghvd82af9j5adavxh77g7hm247d1r69m3fbi6f1jdivj5ldk"))))
5805 (build-system gnu-build-system)
5806 (outputs '("out" ;for binaries
5807 "db")) ;for sequence databases
5808 (arguments
5809 `(#:phases
5810 (modify-phases %standard-phases
5811 (replace 'install
5812 (lambda* (#:key outputs #:allow-other-keys)
5813 (let* ((out (assoc-ref outputs "out"))
5814 (bin (string-append out "/bin"))
5815 (db (assoc-ref outputs "db"))
5816 (share
5817 (string-append db "/share/sortmerna/rRNA_databases")))
5818 (install-file "sortmerna" bin)
5819 (install-file "indexdb_rna" bin)
5820 (for-each (lambda (file)
5821 (install-file file share))
5822 (find-files "rRNA_databases" ".*fasta"))
5823 #t))))))
5824 (inputs
5825 `(("zlib" ,zlib)))
5826 (home-page "http://bioinfo.lifl.fr/RNA/sortmerna")
5827 (synopsis "Biological sequence analysis tool for NGS reads")
5828 (description
5829 "SortMeRNA is a biological sequence analysis tool for filtering, mapping
5830 and operational taxonomic unit (OTU) picking of next generation
5831 sequencing (NGS) reads. The core algorithm is based on approximate seeds and
5832 allows for fast and sensitive analyses of nucleotide sequences. The main
5833 application of SortMeRNA is filtering rRNA from metatranscriptomic data.")
5834 ;; The source includes x86 specific code
5835 (supported-systems '("x86_64-linux" "i686-linux"))
5836 (license license:lgpl3)))
5837
5838 (define-public star
5839 (package
5840 (name "star")
5841 (version "2.6.0c")
5842 (source (origin
5843 (method git-fetch)
5844 (uri (git-reference
5845 (url "https://github.com/alexdobin/STAR.git")
5846 (commit version)))
5847 (file-name (string-append name "-" version "-checkout"))
5848 (sha256
5849 (base32
5850 "04cj6jw8d9q6lk9c78wa4fky6jdlicf1d13plq7182h8vqiz8p59"))
5851 (modules '((guix build utils)))
5852 (snippet
5853 '(begin
5854 (substitute* "source/Makefile"
5855 (("/bin/rm") "rm"))
5856 ;; Remove pre-built binaries and bundled htslib sources.
5857 (delete-file-recursively "bin/MacOSX_x86_64")
5858 (delete-file-recursively "bin/Linux_x86_64")
5859 (delete-file-recursively "bin/Linux_x86_64_static")
5860 (delete-file-recursively "source/htslib")
5861 #t))))
5862 (build-system gnu-build-system)
5863 (arguments
5864 '(#:tests? #f ;no check target
5865 #:make-flags '("STAR")
5866 #:phases
5867 (modify-phases %standard-phases
5868 (add-after 'unpack 'enter-source-dir
5869 (lambda _ (chdir "source") #t))
5870 (add-after 'enter-source-dir 'make-reproducible
5871 (lambda _
5872 (substitute* "Makefile"
5873 (("(COMPILATION_TIME_PLACE=\")(.*)(\")" _ pre mid post)
5874 (string-append pre "Built with Guix" post)))))
5875 (add-after 'enter-source-dir 'do-not-use-bundled-htslib
5876 (lambda _
5877 (substitute* "Makefile"
5878 (("(Depend.list: \\$\\(SOURCES\\) parametersDefault\\.xxd) htslib"
5879 _ prefix) prefix))
5880 (substitute* '("BAMfunctions.cpp"
5881 "signalFromBAM.h"
5882 "bam_cat.h"
5883 "bam_cat.c"
5884 "STAR.cpp"
5885 "bamRemoveDuplicates.cpp")
5886 (("#include \"htslib/([^\"]+\\.h)\"" _ header)
5887 (string-append "#include <" header ">")))
5888 (substitute* "IncludeDefine.h"
5889 (("\"htslib/(htslib/[^\"]+.h)\"" _ header)
5890 (string-append "<" header ">")))
5891 #t))
5892 (replace 'install
5893 (lambda* (#:key outputs #:allow-other-keys)
5894 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
5895 (install-file "STAR" bin))
5896 #t))
5897 (delete 'configure))))
5898 (native-inputs
5899 `(("xxd" ,xxd)))
5900 (inputs
5901 `(("htslib" ,htslib)
5902 ("zlib" ,zlib)))
5903 (home-page "https://github.com/alexdobin/STAR")
5904 (synopsis "Universal RNA-seq aligner")
5905 (description
5906 "The Spliced Transcripts Alignment to a Reference (STAR) software is
5907 based on a previously undescribed RNA-seq alignment algorithm that uses
5908 sequential maximum mappable seed search in uncompressed suffix arrays followed
5909 by seed clustering and stitching procedure. In addition to unbiased de novo
5910 detection of canonical junctions, STAR can discover non-canonical splices and
5911 chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
5912 sequences.")
5913 ;; Only 64-bit systems are supported according to the README.
5914 (supported-systems '("x86_64-linux" "mips64el-linux"))
5915 ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
5916 (license license:gpl3+)))
5917
5918 (define-public subread
5919 (package
5920 (name "subread")
5921 (version "1.6.0")
5922 (source (origin
5923 (method url-fetch)
5924 (uri (string-append "mirror://sourceforge/subread/subread-"
5925 version "/subread-" version "-source.tar.gz"))
5926 (sha256
5927 (base32
5928 "0ah0n4jx6ksk2m2j7xk385x2qzmk1y4rfc6a4mfrdqrlq721w99i"))))
5929 (build-system gnu-build-system)
5930 (arguments
5931 `(#:tests? #f ;no "check" target
5932 ;; The CC and CCFLAGS variables are set to contain a lot of x86_64
5933 ;; optimizations by default, so we override these flags such that x86_64
5934 ;; flags are only added when the build target is an x86_64 system.
5935 #:make-flags
5936 (list (let ((system ,(or (%current-target-system)
5937 (%current-system)))
5938 (flags '("-ggdb" "-fomit-frame-pointer"
5939 "-ffast-math" "-funroll-loops"
5940 "-fmessage-length=0"
5941 "-O9" "-Wall" "-DMAKE_FOR_EXON"
5942 "-DMAKE_STANDALONE"
5943 "-DSUBREAD_VERSION=\\\"${SUBREAD_VERSION}\\\""))
5944 (flags64 '("-mmmx" "-msse" "-msse2" "-msse3")))
5945 (if (string-prefix? "x86_64" system)
5946 (string-append "CCFLAGS=" (string-join (append flags flags64)))
5947 (string-append "CCFLAGS=" (string-join flags))))
5948 "-f" "Makefile.Linux"
5949 "CC=gcc ${CCFLAGS}")
5950 #:phases
5951 (modify-phases %standard-phases
5952 (add-after 'unpack 'enter-dir
5953 (lambda _ (chdir "src") #t))
5954 (replace 'install
5955 (lambda* (#:key outputs #:allow-other-keys)
5956 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
5957 (mkdir-p bin)
5958 (copy-recursively "../bin" bin))))
5959 ;; no "configure" script
5960 (delete 'configure))))
5961 (inputs `(("zlib" ,zlib)))
5962 (home-page "http://bioinf.wehi.edu.au/subread-package/")
5963 (synopsis "Tool kit for processing next-gen sequencing data")
5964 (description
5965 "The subread package contains the following tools: subread aligner, a
5966 general-purpose read aligner; subjunc aligner: detecting exon-exon junctions
5967 and mapping RNA-seq reads; featureCounts: counting mapped reads for genomic
5968 features; exactSNP: a SNP caller that discovers SNPs by testing signals
5969 against local background noises.")
5970 (license license:gpl3+)))
5971
5972 (define-public stringtie
5973 (package
5974 (name "stringtie")
5975 (version "1.2.1")
5976 (source (origin
5977 (method url-fetch)
5978 (uri (string-append "http://ccb.jhu.edu/software/stringtie/dl/"
5979 "stringtie-" version ".tar.gz"))
5980 (sha256
5981 (base32
5982 "1cqllsc1maq4kh92isi8yadgzbmnf042hlnalpk3y59aph1z3bfz"))
5983 (modules '((guix build utils)))
5984 (snippet
5985 '(begin
5986 (delete-file-recursively "samtools-0.1.18")
5987 #t))))
5988 (build-system gnu-build-system)
5989 (arguments
5990 `(#:tests? #f ;no test suite
5991 #:phases
5992 (modify-phases %standard-phases
5993 ;; no configure script
5994 (delete 'configure)
5995 (add-before 'build 'use-system-samtools
5996 (lambda _
5997 (substitute* "Makefile"
5998 (("stringtie: \\$\\{BAM\\}/libbam\\.a")
5999 "stringtie: "))
6000 (substitute* '("gclib/GBam.h"
6001 "gclib/GBam.cpp")
6002 (("#include \"(bam|sam|kstring).h\"" _ header)
6003 (string-append "#include <samtools/" header ".h>")))
6004 #t))
6005 (add-after 'unpack 'remove-duplicate-typedef
6006 (lambda _
6007 ;; This typedef conflicts with the typedef in
6008 ;; glibc-2.25/include/bits/types.h
6009 (substitute* "gclib/GThreads.h"
6010 (("typedef long long __intmax_t;") ""))
6011 #t))
6012 (replace 'install
6013 (lambda* (#:key outputs #:allow-other-keys)
6014 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6015 (install-file "stringtie" bin)
6016 #t))))))
6017 (inputs
6018 `(("samtools" ,samtools-0.1)
6019 ("zlib" ,zlib)))
6020 (home-page "http://ccb.jhu.edu/software/stringtie/")
6021 (synopsis "Transcript assembly and quantification for RNA-Seq data")
6022 (description
6023 "StringTie is a fast and efficient assembler of RNA-Seq sequence
6024 alignments into potential transcripts. It uses a novel network flow algorithm
6025 as well as an optional de novo assembly step to assemble and quantitate
6026 full-length transcripts representing multiple splice variants for each gene
6027 locus. Its input can include not only the alignments of raw reads used by
6028 other transcript assemblers, but also alignments of longer sequences that have
6029 been assembled from those reads. To identify differentially expressed genes
6030 between experiments, StringTie's output can be processed either by the
6031 Cuffdiff or Ballgown programs.")
6032 (license license:artistic2.0)))
6033
6034 (define-public taxtastic
6035 (package
6036 (name "taxtastic")
6037 (version "0.8.5")
6038 (source (origin
6039 (method url-fetch)
6040 (uri (pypi-uri "taxtastic" version))
6041 (sha256
6042 (base32
6043 "03pysw79lsrvz4lwzis88j15067ffqbi4cid5pqhrlxmd6bh8rrk"))))
6044 (build-system python-build-system)
6045 (arguments
6046 `(#:python ,python-2
6047 #:phases
6048 (modify-phases %standard-phases
6049 (replace 'check
6050 (lambda _
6051 (zero? (system* "python" "-m" "unittest" "discover" "-v")))))))
6052 (propagated-inputs
6053 `(("python-sqlalchemy" ,python2-sqlalchemy)
6054 ("python-decorator" ,python2-decorator)
6055 ("python-biopython" ,python2-biopython)
6056 ("python-pandas" ,python2-pandas)
6057 ("python-psycopg2" ,python2-psycopg2)
6058 ("python-fastalite" ,python2-fastalite)
6059 ("python-pyyaml" ,python2-pyyaml)
6060 ("python-six" ,python2-six)
6061 ("python-jinja2" ,python2-jinja2)
6062 ("python-dendropy" ,python2-dendropy)))
6063 (home-page "https://github.com/fhcrc/taxtastic")
6064 (synopsis "Tools for taxonomic naming and annotation")
6065 (description
6066 "Taxtastic is software written in python used to build and maintain
6067 reference packages i.e. collections of reference trees, reference alignments,
6068 profiles, and associated taxonomic information.")
6069 (license license:gpl3+)))
6070
6071 (define-public vcftools
6072 (package
6073 (name "vcftools")
6074 (version "0.1.15")
6075 (source (origin
6076 (method url-fetch)
6077 (uri (string-append
6078 "https://github.com/vcftools/vcftools/releases/download/v"
6079 version "/vcftools-" version ".tar.gz"))
6080 (sha256
6081 (base32
6082 "1qw30c45wihgy632rbz4rh3njnwj4msj46l1rsgdhyg6bgypmr1i"))))
6083 (build-system gnu-build-system)
6084 (arguments
6085 `(#:tests? #f ; no "check" target
6086 #:make-flags (list
6087 "CFLAGS=-O2" ; override "-m64" flag
6088 (string-append "PREFIX=" (assoc-ref %outputs "out"))
6089 (string-append "MANDIR=" (assoc-ref %outputs "out")
6090 "/share/man/man1"))))
6091 (native-inputs
6092 `(("pkg-config" ,pkg-config)))
6093 (inputs
6094 `(("perl" ,perl)
6095 ("zlib" ,zlib)))
6096 (home-page "https://vcftools.github.io/")
6097 (synopsis "Tools for working with VCF files")
6098 (description
6099 "VCFtools is a program package designed for working with VCF files, such
6100 as those generated by the 1000 Genomes Project. The aim of VCFtools is to
6101 provide easily accessible methods for working with complex genetic variation
6102 data in the form of VCF files.")
6103 ;; The license is declared as LGPLv3 in the README and
6104 ;; at https://vcftools.github.io/license.html
6105 (license license:lgpl3)))
6106
6107 (define-public infernal
6108 (package
6109 (name "infernal")
6110 (version "1.1.2")
6111 (source (origin
6112 (method url-fetch)
6113 (uri (string-append "http://eddylab.org/software/infernal/"
6114 "infernal-" version ".tar.gz"))
6115 (sha256
6116 (base32
6117 "0sr2hiz3qxfwqpz3whxr6n82p3x27336v3f34iqznp10hks2935c"))))
6118 (build-system gnu-build-system)
6119 (native-inputs
6120 `(("perl" ,perl))) ; for tests
6121 (home-page "http://eddylab.org/infernal/")
6122 (synopsis "Inference of RNA alignments")
6123 (description "Infernal (\"INFERence of RNA ALignment\") is a tool for
6124 searching DNA sequence databases for RNA structure and sequence similarities.
6125 It is an implementation of a special case of profile stochastic context-free
6126 grammars called @dfn{covariance models} (CMs). A CM is like a sequence
6127 profile, but it scores a combination of sequence consensus and RNA secondary
6128 structure consensus, so in many cases, it is more capable of identifying RNA
6129 homologs that conserve their secondary structure more than their primary
6130 sequence.")
6131 ;; Infernal 1.1.2 requires VMX or SSE capability for parallel instructions.
6132 (supported-systems '("i686-linux" "x86_64-linux"))
6133 (license license:bsd-3)))
6134
6135 (define-public r-centipede
6136 (package
6137 (name "r-centipede")
6138 (version "1.2")
6139 (source (origin
6140 (method url-fetch)
6141 (uri (string-append "http://download.r-forge.r-project.org/"
6142 "src/contrib/CENTIPEDE_" version ".tar.gz"))
6143 (sha256
6144 (base32
6145 "1hsx6qgwr0i67fhy9257zj7s0ppncph2hjgbia5nn6nfmj0ax6l9"))))
6146 (build-system r-build-system)
6147 (home-page "http://centipede.uchicago.edu/")
6148 (synopsis "Predict transcription factor binding sites")
6149 (description
6150 "CENTIPEDE applies a hierarchical Bayesian mixture model to infer regions
6151 of the genome that are bound by particular transcription factors. It starts
6152 by identifying a set of candidate binding sites, and then aims to classify the
6153 sites according to whether each site is bound or not bound by a transcription
6154 factor. CENTIPEDE is an unsupervised learning algorithm that discriminates
6155 between two different types of motif instances using as much relevant
6156 information as possible.")
6157 (license (list license:gpl2+ license:gpl3+))))
6158
6159 (define-public r-vegan
6160 (package
6161 (name "r-vegan")
6162 (version "2.5-2")
6163 (source
6164 (origin
6165 (method url-fetch)
6166 (uri (cran-uri "vegan" version))
6167 (sha256
6168 (base32
6169 "13pyqvlpz64qibi8k5d109v7q09j06mbv6ndix3n4cn21mjx391c"))))
6170 (build-system r-build-system)
6171 (native-inputs
6172 `(("gfortran" ,gfortran)))
6173 (propagated-inputs
6174 `(("r-cluster" ,r-cluster)
6175 ("r-knitr" ,r-knitr) ; needed for vignettes
6176 ("r-lattice" ,r-lattice)
6177 ("r-mass" ,r-mass)
6178 ("r-mgcv" ,r-mgcv)
6179 ("r-permute" ,r-permute)))
6180 (home-page "https://cran.r-project.org/web/packages/vegan")
6181 (synopsis "Functions for community ecology")
6182 (description
6183 "The vegan package provides tools for descriptive community ecology. It
6184 has most basic functions of diversity analysis, community ordination and
6185 dissimilarity analysis. Most of its multivariate tools can be used for other
6186 data types as well.")
6187 (license license:gpl2+)))
6188
6189 (define-public r-annotate
6190 (package
6191 (name "r-annotate")
6192 (version "1.58.0")
6193 (source
6194 (origin
6195 (method url-fetch)
6196 (uri (bioconductor-uri "annotate" version))
6197 (sha256
6198 (base32
6199 "1qmncyvy147a1ll3iri45p822kcs3s7583jfnq9jf6sz9ilk8cjf"))))
6200 (build-system r-build-system)
6201 (propagated-inputs
6202 `(("r-annotationdbi" ,r-annotationdbi)
6203 ("r-biobase" ,r-biobase)
6204 ("r-biocgenerics" ,r-biocgenerics)
6205 ("r-dbi" ,r-dbi)
6206 ("r-rcurl" ,r-rcurl)
6207 ("r-xml" ,r-xml)
6208 ("r-xtable" ,r-xtable)))
6209 (home-page
6210 "https://bioconductor.org/packages/annotate")
6211 (synopsis "Annotation for microarrays")
6212 (description "This package provides R environments for the annotation of
6213 microarrays.")
6214 (license license:artistic2.0)))
6215
6216 (define-public r-copynumber
6217 (package
6218 (name "r-copynumber")
6219 (version "1.20.0")
6220 (source (origin
6221 (method url-fetch)
6222 (uri (bioconductor-uri "copynumber" version))
6223 (sha256
6224 (base32
6225 "0y9nnwb0psphp3ix88wj2f8z5gr45r5znf55w892ysm27isdpmms"))))
6226 (build-system r-build-system)
6227 (propagated-inputs
6228 `(("r-s4vectors" ,r-s4vectors)
6229 ("r-iranges" ,r-iranges)
6230 ("r-genomicranges" ,r-genomicranges)
6231 ("r-biocgenerics" ,r-biocgenerics)))
6232 (home-page "https://bioconductor.org/packages/copynumber")
6233 (synopsis "Segmentation of single- and multi-track copy number data")
6234 (description
6235 "This package segments single- and multi-track copy number data by a
6236 penalized least squares regression method.")
6237 (license license:artistic2.0)))
6238
6239 (define-public r-geneplotter
6240 (package
6241 (name "r-geneplotter")
6242 (version "1.58.0")
6243 (source
6244 (origin
6245 (method url-fetch)
6246 (uri (bioconductor-uri "geneplotter" version))
6247 (sha256
6248 (base32
6249 "055g28xgiazl4l0gkg8xiamks64f5yckjjyvw1abd6d6qjavwx0g"))))
6250 (build-system r-build-system)
6251 (propagated-inputs
6252 `(("r-annotate" ,r-annotate)
6253 ("r-annotationdbi" ,r-annotationdbi)
6254 ("r-biobase" ,r-biobase)
6255 ("r-biocgenerics" ,r-biocgenerics)
6256 ("r-lattice" ,r-lattice)
6257 ("r-rcolorbrewer" ,r-rcolorbrewer)))
6258 (home-page "https://bioconductor.org/packages/geneplotter")
6259 (synopsis "Graphics functions for genomic data")
6260 (description
6261 "This package provides functions for plotting genomic data.")
6262 (license license:artistic2.0)))
6263
6264 (define-public r-genefilter
6265 (package
6266 (name "r-genefilter")
6267 (version "1.62.0")
6268 (source
6269 (origin
6270 (method url-fetch)
6271 (uri (bioconductor-uri "genefilter" version))
6272 (sha256
6273 (base32
6274 "14l0ff02spmjwxj0m1czhg5vlkgwcfi73cym8m2n9vn6i7bjdaqi"))))
6275 (build-system r-build-system)
6276 (native-inputs
6277 `(("gfortran" ,gfortran)))
6278 (propagated-inputs
6279 `(("r-annotate" ,r-annotate)
6280 ("r-annotationdbi" ,r-annotationdbi)
6281 ("r-biobase" ,r-biobase)
6282 ("r-s4vectors" ,r-s4vectors)
6283 ("r-survival" ,r-survival)))
6284 (home-page "https://bioconductor.org/packages/genefilter")
6285 (synopsis "Filter genes from high-throughput experiments")
6286 (description
6287 "This package provides basic functions for filtering genes from
6288 high-throughput sequencing experiments.")
6289 (license license:artistic2.0)))
6290
6291 (define-public r-deseq2
6292 (package
6293 (name "r-deseq2")
6294 (version "1.20.0")
6295 (source
6296 (origin
6297 (method url-fetch)
6298 (uri (bioconductor-uri "DESeq2" version))
6299 (sha256
6300 (base32
6301 "1wjnfpb41a9mmf9a22bz4zh7r1d4id50vpdc1mn5vfzrz7li9qik"))))
6302 (properties `((upstream-name . "DESeq2")))
6303 (build-system r-build-system)
6304 (propagated-inputs
6305 `(("r-biobase" ,r-biobase)
6306 ("r-biocgenerics" ,r-biocgenerics)
6307 ("r-biocparallel" ,r-biocparallel)
6308 ("r-genefilter" ,r-genefilter)
6309 ("r-geneplotter" ,r-geneplotter)
6310 ("r-genomicranges" ,r-genomicranges)
6311 ("r-ggplot2" ,r-ggplot2)
6312 ("r-hmisc" ,r-hmisc)
6313 ("r-iranges" ,r-iranges)
6314 ("r-locfit" ,r-locfit)
6315 ("r-rcpp" ,r-rcpp)
6316 ("r-rcpparmadillo" ,r-rcpparmadillo)
6317 ("r-s4vectors" ,r-s4vectors)
6318 ("r-summarizedexperiment" ,r-summarizedexperiment)))
6319 (home-page "https://bioconductor.org/packages/DESeq2")
6320 (synopsis "Differential gene expression analysis")
6321 (description
6322 "This package provides functions to estimate variance-mean dependence in
6323 count data from high-throughput nucleotide sequencing assays and test for
6324 differential expression based on a model using the negative binomial
6325 distribution.")
6326 (license license:lgpl3+)))
6327
6328 (define-public r-dexseq
6329 (package
6330 (name "r-dexseq")
6331 (version "1.26.0")
6332 (source
6333 (origin
6334 (method url-fetch)
6335 (uri (bioconductor-uri "DEXSeq" version))
6336 (sha256
6337 (base32
6338 "1mqb3mdxcsi3largsl7k27bvqrgps9ixv806xvmf29pw0xn05sg1"))))
6339 (properties `((upstream-name . "DEXSeq")))
6340 (build-system r-build-system)
6341 (propagated-inputs
6342 `(("r-annotationdbi" ,r-annotationdbi)
6343 ("r-biobase" ,r-biobase)
6344 ("r-biocgenerics" ,r-biocgenerics)
6345 ("r-biocparallel" ,r-biocparallel)
6346 ("r-biomart" ,r-biomart)
6347 ("r-deseq2" ,r-deseq2)
6348 ("r-genefilter" ,r-genefilter)
6349 ("r-geneplotter" ,r-geneplotter)
6350 ("r-genomicranges" ,r-genomicranges)
6351 ("r-hwriter" ,r-hwriter)
6352 ("r-iranges" ,r-iranges)
6353 ("r-rcolorbrewer" ,r-rcolorbrewer)
6354 ("r-rsamtools" ,r-rsamtools)
6355 ("r-s4vectors" ,r-s4vectors)
6356 ("r-statmod" ,r-statmod)
6357 ("r-stringr" ,r-stringr)
6358 ("r-summarizedexperiment" ,r-summarizedexperiment)))
6359 (home-page "https://bioconductor.org/packages/DEXSeq")
6360 (synopsis "Inference of differential exon usage in RNA-Seq")
6361 (description
6362 "This package is focused on finding differential exon usage using RNA-seq
6363 exon counts between samples with different experimental designs. It provides
6364 functions that allows the user to make the necessary statistical tests based
6365 on a model that uses the negative binomial distribution to estimate the
6366 variance between biological replicates and generalized linear models for
6367 testing. The package also provides functions for the visualization and
6368 exploration of the results.")
6369 (license license:gpl3+)))
6370
6371 (define-public r-annotationforge
6372 (package
6373 (name "r-annotationforge")
6374 (version "1.22.2")
6375 (source
6376 (origin
6377 (method url-fetch)
6378 (uri (bioconductor-uri "AnnotationForge" version))
6379 (sha256
6380 (base32
6381 "17kmy7nvpyyj6w5jyrjciw87rydmmmc8q6cnwqjv1j7li9bp09gr"))))
6382 (properties
6383 `((upstream-name . "AnnotationForge")))
6384 (build-system r-build-system)
6385 (propagated-inputs
6386 `(("r-annotationdbi" ,r-annotationdbi)
6387 ("r-biobase" ,r-biobase)
6388 ("r-biocgenerics" ,r-biocgenerics)
6389 ("r-dbi" ,r-dbi)
6390 ("r-rcurl" ,r-rcurl)
6391 ("r-rsqlite" ,r-rsqlite)
6392 ("r-s4vectors" ,r-s4vectors)
6393 ("r-xml" ,r-xml)))
6394 (home-page "https://bioconductor.org/packages/AnnotationForge")
6395 (synopsis "Code for building annotation database packages")
6396 (description
6397 "This package provides code for generating Annotation packages and their
6398 databases. Packages produced are intended to be used with AnnotationDbi.")
6399 (license license:artistic2.0)))
6400
6401 (define-public r-rbgl
6402 (package
6403 (name "r-rbgl")
6404 (version "1.56.0")
6405 (source
6406 (origin
6407 (method url-fetch)
6408 (uri (bioconductor-uri "RBGL" version))
6409 (sha256
6410 (base32
6411 "0hj972mmqpyi5fx1rq33kysavdyz4nspi6gcffzi3rv339m0anhf"))))
6412 (properties `((upstream-name . "RBGL")))
6413 (build-system r-build-system)
6414 (propagated-inputs `(("r-graph" ,r-graph)))
6415 (home-page "https://www.bioconductor.org/packages/RBGL")
6416 (synopsis "Interface to the Boost graph library")
6417 (description
6418 "This package provides a fairly extensive and comprehensive interface to
6419 the graph algorithms contained in the Boost library.")
6420 (license license:artistic2.0)))
6421
6422 (define-public r-gseabase
6423 (package
6424 (name "r-gseabase")
6425 (version "1.42.0")
6426 (source
6427 (origin
6428 (method url-fetch)
6429 (uri (bioconductor-uri "GSEABase" version))
6430 (sha256
6431 (base32
6432 "11bv92svik399q677jv96b71i4bq68xxyxn1yijpdik2lq4hgl7a"))))
6433 (properties `((upstream-name . "GSEABase")))
6434 (build-system r-build-system)
6435 (propagated-inputs
6436 `(("r-annotate" ,r-annotate)
6437 ("r-annotationdbi" ,r-annotationdbi)
6438 ("r-biobase" ,r-biobase)
6439 ("r-biocgenerics" ,r-biocgenerics)
6440 ("r-graph" ,r-graph)
6441 ("r-xml" ,r-xml)))
6442 (home-page "https://bioconductor.org/packages/GSEABase")
6443 (synopsis "Gene set enrichment data structures and methods")
6444 (description
6445 "This package provides classes and methods to support @dfn{Gene Set
6446 Enrichment Analysis} (GSEA).")
6447 (license license:artistic2.0)))
6448
6449 (define-public r-category
6450 (package
6451 (name "r-category")
6452 (version "2.46.0")
6453 (source
6454 (origin
6455 (method url-fetch)
6456 (uri (bioconductor-uri "Category" version))
6457 (sha256
6458 (base32
6459 "03wfqa8d1dgwsm327zl2mpkq7dq3mzhq12598qz3ylfhrwplbgx0"))))
6460 (properties `((upstream-name . "Category")))
6461 (build-system r-build-system)
6462 (propagated-inputs
6463 `(("r-annotate" ,r-annotate)
6464 ("r-annotationdbi" ,r-annotationdbi)
6465 ("r-biobase" ,r-biobase)
6466 ("r-biocgenerics" ,r-biocgenerics)
6467 ("r-genefilter" ,r-genefilter)
6468 ("r-graph" ,r-graph)
6469 ("r-gseabase" ,r-gseabase)
6470 ("r-matrix" ,r-matrix)
6471 ("r-rbgl" ,r-rbgl)
6472 ("r-dbi" ,r-dbi)))
6473 (home-page "https://bioconductor.org/packages/Category")
6474 (synopsis "Category analysis")
6475 (description
6476 "This package provides a collection of tools for performing category
6477 analysis.")
6478 (license license:artistic2.0)))
6479
6480 (define-public r-gostats
6481 (package
6482 (name "r-gostats")
6483 (version "2.46.0")
6484 (source
6485 (origin
6486 (method url-fetch)
6487 (uri (bioconductor-uri "GOstats" version))
6488 (sha256
6489 (base32
6490 "1i5mydz5d95w2k28qr9j01hmbnl2id55jq94jvcpcyp1pvinkdq0"))))
6491 (properties `((upstream-name . "GOstats")))
6492 (build-system r-build-system)
6493 (propagated-inputs
6494 `(("r-annotate" ,r-annotate)
6495 ("r-annotationdbi" ,r-annotationdbi)
6496 ("r-annotationforge" ,r-annotationforge)
6497 ("r-biobase" ,r-biobase)
6498 ("r-category" ,r-category)
6499 ("r-go-db" ,r-go-db)
6500 ("r-graph" ,r-graph)
6501 ("r-rgraphviz" ,r-rgraphviz)
6502 ("r-rbgl" ,r-rbgl)))
6503 (home-page "https://bioconductor.org/packages/GOstats")
6504 (synopsis "Tools for manipulating GO and microarrays")
6505 (description
6506 "This package provides a set of tools for interacting with GO and
6507 microarray data. A variety of basic manipulation tools for graphs, hypothesis
6508 testing and other simple calculations.")
6509 (license license:artistic2.0)))
6510
6511 (define-public r-shortread
6512 (package
6513 (name "r-shortread")
6514 (version "1.38.0")
6515 (source
6516 (origin
6517 (method url-fetch)
6518 (uri (bioconductor-uri "ShortRead" version))
6519 (sha256
6520 (base32
6521 "038z3z7qaw5bpgjzy91sjkybsny6jwjjsrnnq4gdqdw9ss1qy1fb"))))
6522 (properties `((upstream-name . "ShortRead")))
6523 (build-system r-build-system)
6524 (inputs
6525 `(("zlib" ,zlib)))
6526 (propagated-inputs
6527 `(("r-biobase" ,r-biobase)
6528 ("r-biocgenerics" ,r-biocgenerics)
6529 ("r-biocparallel" ,r-biocparallel)
6530 ("r-biostrings" ,r-biostrings)
6531 ("r-genomeinfodb" ,r-genomeinfodb)
6532 ("r-genomicalignments" ,r-genomicalignments)
6533 ("r-genomicranges" ,r-genomicranges)
6534 ("r-hwriter" ,r-hwriter)
6535 ("r-iranges" ,r-iranges)
6536 ("r-lattice" ,r-lattice)
6537 ("r-latticeextra" ,r-latticeextra)
6538 ("r-rsamtools" ,r-rsamtools)
6539 ("r-s4vectors" ,r-s4vectors)
6540 ("r-xvector" ,r-xvector)
6541 ("r-zlibbioc" ,r-zlibbioc)))
6542 (home-page "https://bioconductor.org/packages/ShortRead")
6543 (synopsis "FASTQ input and manipulation tools")
6544 (description
6545 "This package implements sampling, iteration, and input of FASTQ files.
6546 It includes functions for filtering and trimming reads, and for generating a
6547 quality assessment report. Data are represented as
6548 @code{DNAStringSet}-derived objects, and easily manipulated for a diversity of
6549 purposes. The package also contains legacy support for early single-end,
6550 ungapped alignment formats.")
6551 (license license:artistic2.0)))
6552
6553 (define-public r-systempiper
6554 (package
6555 (name "r-systempiper")
6556 (version "1.14.0")
6557 (source
6558 (origin
6559 (method url-fetch)
6560 (uri (bioconductor-uri "systemPipeR" version))
6561 (sha256
6562 (base32
6563 "1550pd63mmky0vgkmpni7zf14kqz1741wv63nfaw29kcmhh3m5lm"))))
6564 (properties `((upstream-name . "systemPipeR")))
6565 (build-system r-build-system)
6566 (propagated-inputs
6567 `(("r-annotate" ,r-annotate)
6568 ("r-batchjobs" ,r-batchjobs)
6569 ("r-biocgenerics" ,r-biocgenerics)
6570 ("r-biostrings" ,r-biostrings)
6571 ("r-deseq2" ,r-deseq2)
6572 ("r-edger" ,r-edger)
6573 ("r-genomicfeatures" ,r-genomicfeatures)
6574 ("r-genomicranges" ,r-genomicranges)
6575 ("r-ggplot2" ,r-ggplot2)
6576 ("r-go-db" ,r-go-db)
6577 ("r-gostats" ,r-gostats)
6578 ("r-limma" ,r-limma)
6579 ("r-pheatmap" ,r-pheatmap)
6580 ("r-rjson" ,r-rjson)
6581 ("r-rsamtools" ,r-rsamtools)
6582 ("r-shortread" ,r-shortread)
6583 ("r-summarizedexperiment" ,r-summarizedexperiment)
6584 ("r-variantannotation" ,r-variantannotation)))
6585 (home-page "https://github.com/tgirke/systemPipeR")
6586 (synopsis "Next generation sequencing workflow and reporting environment")
6587 (description
6588 "This R package provides tools for building and running automated
6589 end-to-end analysis workflows for a wide range of @dfn{next generation
6590 sequence} (NGS) applications such as RNA-Seq, ChIP-Seq, VAR-Seq and Ribo-Seq.
6591 Important features include a uniform workflow interface across different NGS
6592 applications, automated report generation, and support for running both R and
6593 command-line software, such as NGS aligners or peak/variant callers, on local
6594 computers or compute clusters. Efficient handling of complex sample sets and
6595 experimental designs is facilitated by a consistently implemented sample
6596 annotation infrastructure.")
6597 (license license:artistic2.0)))
6598
6599 (define-public r-grohmm
6600 (package
6601 (name "r-grohmm")
6602 (version "1.14.0")
6603 (source
6604 (origin
6605 (method url-fetch)
6606 (uri (bioconductor-uri "groHMM" version))
6607 (sha256
6608 (base32
6609 "1kjb14apyly44qdlx2ld6gr69wlazd4mbhs58l35hir12aphgrzp"))))
6610 (properties `((upstream-name . "groHMM")))
6611 (build-system r-build-system)
6612 (propagated-inputs
6613 `(("r-genomeinfodb" ,r-genomeinfodb)
6614 ("r-genomicalignments" ,r-genomicalignments)
6615 ("r-genomicranges" ,r-genomicranges)
6616 ("r-iranges" ,r-iranges)
6617 ("r-mass" ,r-mass)
6618 ("r-rtracklayer" ,r-rtracklayer)
6619 ("r-s4vectors" ,r-s4vectors)))
6620 (home-page "https://github.com/Kraus-Lab/groHMM")
6621 (synopsis "GRO-seq analysis pipeline")
6622 (description
6623 "This package provides a pipeline for the analysis of GRO-seq data.")
6624 (license license:gpl3+)))
6625
6626 (define-public r-txdb-hsapiens-ucsc-hg19-knowngene
6627 (package
6628 (name "r-txdb-hsapiens-ucsc-hg19-knowngene")
6629 (version "3.2.2")
6630 (source (origin
6631 (method url-fetch)
6632 ;; We cannot use bioconductor-uri here because this tarball is
6633 ;; located under "data/annotation/" instead of "bioc/".
6634 (uri (string-append "https://bioconductor.org/packages/"
6635 "release/data/annotation/src/contrib"
6636 "/TxDb.Hsapiens.UCSC.hg19.knownGene_"
6637 version ".tar.gz"))
6638 (sha256
6639 (base32
6640 "1sajhcqqwazgz2lqbik7rd935i7kpnh08zxbp2ra10j72yqy4g86"))))
6641 (properties
6642 `((upstream-name . "TxDb.Hsapiens.UCSC.hg19.knownGene")))
6643 (build-system r-build-system)
6644 ;; As this package provides little more than a very large data file it
6645 ;; doesn't make sense to build substitutes.
6646 (arguments `(#:substitutable? #f))
6647 (propagated-inputs
6648 `(("r-genomicfeatures" ,r-genomicfeatures)))
6649 (home-page
6650 "https://bioconductor.org/packages/TxDb.Hsapiens.UCSC.hg19.knownGene/")
6651 (synopsis "Annotation package for human genome in TxDb format")
6652 (description
6653 "This package provides an annotation database of Homo sapiens genome
6654 data. It is derived from the UCSC hg19 genome and based on the \"knownGene\"
6655 track. The database is exposed as a @code{TxDb} object.")
6656 (license license:artistic2.0)))
6657
6658 (define-public r-sparql
6659 (package
6660 (name "r-sparql")
6661 (version "1.16")
6662 (source (origin
6663 (method url-fetch)
6664 (uri (cran-uri "SPARQL" version))
6665 (sha256
6666 (base32
6667 "0gak1q06yyhdmcxb2n3v0h9gr1vqd0viqji52wpw211qp6r6dcrc"))))
6668 (properties `((upstream-name . "SPARQL")))
6669 (build-system r-build-system)
6670 (propagated-inputs
6671 `(("r-rcurl" ,r-rcurl)
6672 ("r-xml" ,r-xml)))
6673 (home-page "https://cran.r-project.org/web/packages/SPARQL")
6674 (synopsis "SPARQL client for R")
6675 (description "This package provides an interface to use SPARQL to pose
6676 SELECT or UPDATE queries to an end-point.")
6677 ;; The only license indication is found in the DESCRIPTION file,
6678 ;; which states GPL-3. So we cannot assume GPLv3+.
6679 (license license:gpl3)))
6680
6681 (define-public vsearch
6682 (package
6683 (name "vsearch")
6684 (version "2.8.0")
6685 (source
6686 (origin
6687 (method url-fetch)
6688 (uri (string-append
6689 "https://github.com/torognes/vsearch/archive/v"
6690 version ".tar.gz"))
6691 (file-name (string-append name "-" version ".tar.gz"))
6692 (sha256
6693 (base32
6694 "15pbirgzhvflj4pi5n82vybbzjy9mlb0lv5l3qhrmdkfzpbyahw3"))
6695 (patches (search-patches "vsearch-unbundle-cityhash.patch"))
6696 (snippet
6697 '(begin
6698 ;; Remove bundled cityhash sources. The vsearch source is adjusted
6699 ;; for this in the patch.
6700 (delete-file "src/city.h")
6701 (delete-file "src/citycrc.h")
6702 (delete-file "src/city.cc")
6703 #t))))
6704 (build-system gnu-build-system)
6705 (arguments
6706 `(#:phases
6707 (modify-phases %standard-phases
6708 (add-after 'unpack 'autogen
6709 (lambda _ (zero? (system* "autoreconf" "-vif")))))))
6710 (inputs
6711 `(("zlib" ,zlib)
6712 ("bzip2" ,bzip2)
6713 ("cityhash" ,cityhash)))
6714 (native-inputs
6715 `(("autoconf" ,autoconf)
6716 ("automake" ,automake)))
6717 (synopsis "Sequence search tools for metagenomics")
6718 (description
6719 "VSEARCH supports DNA sequence searching, clustering, chimera detection,
6720 dereplication, pairwise alignment, shuffling, subsampling, sorting and
6721 masking. The tool takes advantage of parallelism in the form of SIMD
6722 vectorization as well as multiple threads to perform accurate alignments at
6723 high speed. VSEARCH uses an optimal global aligner (full dynamic programming
6724 Needleman-Wunsch).")
6725 (home-page "https://github.com/torognes/vsearch")
6726 ;; vsearch uses non-portable SSE intrinsics so building fails on other
6727 ;; platforms.
6728 (supported-systems '("x86_64-linux"))
6729 ;; Dual licensed; also includes public domain source.
6730 (license (list license:gpl3 license:bsd-2))))
6731
6732 (define-public pardre
6733 (package
6734 (name "pardre")
6735 ;; The source of 1.1.5 changed in place, so we append "-1" to the version.
6736 (version "1.1.5-1")
6737 (source
6738 (origin
6739 (method url-fetch)
6740 (uri (string-append "mirror://sourceforge/pardre/ParDRe-rel"
6741 "1.1.5" ".tar.gz"))
6742 (sha256
6743 (base32
6744 "17j73nc0viq4f6qj50nrndsrif5d6b71q8fl87m54psiv0ilns2b"))))
6745 (build-system gnu-build-system)
6746 (arguments
6747 `(#:tests? #f ; no tests included
6748 #:phases
6749 (modify-phases %standard-phases
6750 (delete 'configure)
6751 (replace 'install
6752 (lambda* (#:key outputs #:allow-other-keys)
6753 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
6754 (install-file "ParDRe" bin)
6755 #t))))))
6756 (inputs
6757 `(("openmpi" ,openmpi)
6758 ("zlib" ,zlib)))
6759 (synopsis "Parallel tool to remove duplicate DNA reads")
6760 (description
6761 "ParDRe is a parallel tool to remove duplicate genetic sequence reads.
6762 Duplicate reads can be seen as identical or nearly identical sequences with
6763 some mismatches. This tool lets users avoid the analysis of unnecessary
6764 reads, reducing the time of subsequent procedures with the
6765 dataset (e.g. assemblies, mappings, etc.). The tool is implemented with MPI
6766 in order to exploit the parallel capabilities of multicore clusters. It is
6767 faster than multithreaded counterparts (end of 2015) for the same number of
6768 cores and, thanks to the message-passing technology, it can be executed on
6769 clusters.")
6770 (home-page "https://sourceforge.net/projects/pardre/")
6771 (license license:gpl3+)))
6772
6773 (define-public ruby-bio-kseq
6774 (package
6775 (name "ruby-bio-kseq")
6776 (version "0.0.2")
6777 (source
6778 (origin
6779 (method url-fetch)
6780 (uri (rubygems-uri "bio-kseq" version))
6781 (sha256
6782 (base32
6783 "1xyaha46khb5jc6wzkbf7040jagac49jbimn0vcrzid0j8jdikrz"))))
6784 (build-system ruby-build-system)
6785 (arguments
6786 `(#:test-target "spec"))
6787 (native-inputs
6788 `(("bundler" ,bundler)
6789 ("ruby-rspec" ,ruby-rspec)
6790 ("ruby-rake-compiler" ,ruby-rake-compiler)))
6791 (inputs
6792 `(("zlib" ,zlib)))
6793 (synopsis "Ruby bindings for the kseq.h FASTA/Q parser")
6794 (description
6795 "@code{Bio::Kseq} provides ruby bindings to the @code{kseq.h} FASTA and
6796 FASTQ parsing code. It provides a fast iterator over sequences and their
6797 quality scores.")
6798 (home-page "https://github.com/gusevfe/bio-kseq")
6799 (license license:expat)))
6800
6801 (define-public bio-locus
6802 (package
6803 (name "bio-locus")
6804 (version "0.0.7")
6805 (source
6806 (origin
6807 (method url-fetch)
6808 (uri (rubygems-uri "bio-locus" version))
6809 (sha256
6810 (base32
6811 "02vmrxyimkj9sahsp4zhfhnmbvz6dbbqz1y01vglf8cbwvkajfl0"))))
6812 (build-system ruby-build-system)
6813 (native-inputs
6814 `(("ruby-rspec" ,ruby-rspec)))
6815 (synopsis "Tool for fast querying of genome locations")
6816 (description
6817 "Bio-locus is a tabix-like tool for fast querying of genome
6818 locations. Many file formats in bioinformatics contain records that
6819 start with a chromosome name and a position for a SNP, or a start-end
6820 position for indels. Bio-locus allows users to store this chr+pos or
6821 chr+pos+alt information in a database.")
6822 (home-page "https://github.com/pjotrp/bio-locus")
6823 (license license:expat)))
6824
6825 (define-public bio-blastxmlparser
6826 (package
6827 (name "bio-blastxmlparser")
6828 (version "2.0.4")
6829 (source (origin
6830 (method url-fetch)
6831 (uri (rubygems-uri "bio-blastxmlparser" version))
6832 (sha256
6833 (base32
6834 "1wf4qygcmdjgcqm6flmvsagfr1gs9lf63mj32qv3z1f481zc5692"))))
6835 (build-system ruby-build-system)
6836 (propagated-inputs
6837 `(("ruby-bio-logger" ,ruby-bio-logger)
6838 ("ruby-nokogiri" ,ruby-nokogiri)))
6839 (inputs
6840 `(("ruby-rspec" ,ruby-rspec)))
6841 (synopsis "Fast big data BLAST XML parser and library")
6842 (description
6843 "Very fast parallel big-data BLAST XML file parser which can be used as
6844 command line utility. Use blastxmlparser to: Parse BLAST XML; filter output;
6845 generate FASTA, JSON, YAML, RDF, JSON-LD, HTML, CSV, tabular output etc.")
6846 (home-page "https://github.com/pjotrp/blastxmlparser")
6847 (license license:expat)))
6848
6849 (define-public bioruby
6850 (package
6851 (name "bioruby")
6852 (version "1.5.1")
6853 (source
6854 (origin
6855 (method url-fetch)
6856 (uri (rubygems-uri "bio" version))
6857 (sha256
6858 (base32
6859 "0hdl0789c9n4mprnx5pgd46bfwl8d000rqpamj5h6kkjgspijv49"))))
6860 (build-system ruby-build-system)
6861 (propagated-inputs
6862 `(("ruby-libxml" ,ruby-libxml)))
6863 (native-inputs
6864 `(("which" ,which))) ; required for test phase
6865 (arguments
6866 `(#:phases
6867 (modify-phases %standard-phases
6868 (add-before 'build 'patch-test-command
6869 (lambda _
6870 (substitute* '("test/functional/bio/test_command.rb")
6871 (("/bin/sh") (which "sh")))
6872 (substitute* '("test/functional/bio/test_command.rb")
6873 (("/bin/ls") (which "ls")))
6874 (substitute* '("test/functional/bio/test_command.rb")
6875 (("which") (which "which")))
6876 (substitute* '("test/functional/bio/test_command.rb",
6877 "test/data/command/echoarg2.sh")
6878 (("/bin/echo") (which "echo")))
6879 #t)))))
6880 (synopsis "Ruby library, shell and utilities for bioinformatics")
6881 (description "BioRuby comes with a comprehensive set of Ruby development
6882 tools and libraries for bioinformatics and molecular biology. BioRuby has
6883 components for sequence analysis, pathway analysis, protein modelling and
6884 phylogenetic analysis; it supports many widely used data formats and provides
6885 easy access to databases, external programs and public web services, including
6886 BLAST, KEGG, GenBank, MEDLINE and GO.")
6887 (home-page "http://bioruby.org/")
6888 ;; Code is released under Ruby license, except for setup
6889 ;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+)
6890 (license (list license:ruby license:lgpl2.1+ license:gpl2+ ))))
6891
6892 (define-public r-acsnminer
6893 (package
6894 (name "r-acsnminer")
6895 (version "0.16.8.25")
6896 (source (origin
6897 (method url-fetch)
6898 (uri (cran-uri "ACSNMineR" version))
6899 (sha256
6900 (base32
6901 "0gh604s8qall6zfjlwcg2ilxjvz08dplf9k5g47idhv43scm748l"))))
6902 (properties `((upstream-name . "ACSNMineR")))
6903 (build-system r-build-system)
6904 (propagated-inputs
6905 `(("r-ggplot2" ,r-ggplot2)
6906 ("r-gridextra" ,r-gridextra)))
6907 (home-page "https://cran.r-project.org/web/packages/ACSNMineR")
6908 (synopsis "Gene enrichment analysis")
6909 (description
6910 "This package provides tools to compute and represent gene set enrichment
6911 or depletion from your data based on pre-saved maps from the @dfn{Atlas of
6912 Cancer Signalling Networks} (ACSN) or user imported maps. The gene set
6913 enrichment can be run with hypergeometric test or Fisher exact test, and can
6914 use multiple corrections. Visualization of data can be done either by
6915 barplots or heatmaps.")
6916 (license license:gpl2+)))
6917
6918 (define-public r-biocgenerics
6919 (package
6920 (name "r-biocgenerics")
6921 (version "0.26.0")
6922 (source (origin
6923 (method url-fetch)
6924 (uri (bioconductor-uri "BiocGenerics" version))
6925 (sha256
6926 (base32
6927 "19qxhy2cd3pykkhzbb5q3crgaxf65cpzf2mkfsz16gqhi8flj72p"))))
6928 (properties
6929 `((upstream-name . "BiocGenerics")))
6930 (build-system r-build-system)
6931 (home-page "https://bioconductor.org/packages/BiocGenerics")
6932 (synopsis "S4 generic functions for Bioconductor")
6933 (description
6934 "This package provides S4 generic functions needed by many Bioconductor
6935 packages.")
6936 (license license:artistic2.0)))
6937
6938 (define-public r-biocinstaller
6939 (package
6940 (name "r-biocinstaller")
6941 (version "1.30.0")
6942 (source (origin
6943 (method url-fetch)
6944 (uri (bioconductor-uri "BiocInstaller" version))
6945 (sha256
6946 (base32
6947 "1xg1gi1hf5vflp71ji21gnmr4kcjpx8a6c47cllpc7yqnjv5nfg0"))))
6948 (properties
6949 `((upstream-name . "BiocInstaller")))
6950 (build-system r-build-system)
6951 (home-page "https://bioconductor.org/packages/BiocInstaller")
6952 (synopsis "Install Bioconductor packages")
6953 (description "This package is used to install and update R packages from
6954 Bioconductor, CRAN, and Github.")
6955 (license license:artistic2.0)))
6956
6957 (define-public r-biocviews
6958 (package
6959 (name "r-biocviews")
6960 (version "1.48.3")
6961 (source (origin
6962 (method url-fetch)
6963 (uri (bioconductor-uri "biocViews" version))
6964 (sha256
6965 (base32
6966 "1rxvwikqivsgxjjcazlszy8xgz346lfh5rw4llxw6fz38fjgb0k5"))))
6967 (properties
6968 `((upstream-name . "biocViews")))
6969 (build-system r-build-system)
6970 (propagated-inputs
6971 `(("r-biobase" ,r-biobase)
6972 ("r-graph" ,r-graph)
6973 ("r-rbgl" ,r-rbgl)
6974 ("r-rcurl" ,r-rcurl)
6975 ("r-xml" ,r-xml)
6976 ("r-runit" ,r-runit)))
6977 (home-page "https://bioconductor.org/packages/biocViews")
6978 (synopsis "Bioconductor package categorization helper")
6979 (description "The purpose of biocViews is to create HTML pages that
6980 categorize packages in a Bioconductor package repository according to keywords,
6981 also known as views, in a controlled vocabulary.")
6982 (license license:artistic2.0)))
6983
6984 (define-public r-bookdown
6985 (package
6986 (name "r-bookdown")
6987 (version "0.7")
6988 (source (origin
6989 (method url-fetch)
6990 (uri (cran-uri "bookdown" version))
6991 (sha256
6992 (base32
6993 "1b3fw1f41zph5yw3kynb47aijq53vhaa6mnnvxly72zamyzdf95q"))))
6994 (build-system r-build-system)
6995 (propagated-inputs
6996 `(("r-htmltools" ,r-htmltools)
6997 ("r-knitr" ,r-knitr)
6998 ("r-rmarkdown" ,r-rmarkdown)
6999 ("r-tinytex" ,r-tinytex)
7000 ("r-yaml" ,r-yaml)
7001 ("r-xfun" ,r-xfun)))
7002 (home-page "https://github.com/rstudio/bookdown")
7003 (synopsis "Authoring books and technical documents with R markdown")
7004 (description "This package provides output formats and utilities for
7005 authoring books and technical documents with R Markdown.")
7006 (license license:gpl3)))
7007
7008 (define-public r-biocstyle
7009 (package
7010 (name "r-biocstyle")
7011 (version "2.8.2")
7012 (source (origin
7013 (method url-fetch)
7014 (uri (bioconductor-uri "BiocStyle" version))
7015 (sha256
7016 (base32
7017 "17m901ylz00w1a3nq5f910v55zixm1nr6rb3qrsbhqd94qzr0l2p"))))
7018 (properties
7019 `((upstream-name . "BiocStyle")))
7020 (build-system r-build-system)
7021 (propagated-inputs
7022 `(("r-bookdown" ,r-bookdown)
7023 ("r-knitr" ,r-knitr)
7024 ("r-rmarkdown" ,r-rmarkdown)
7025 ("r-yaml" ,r-yaml)))
7026 (home-page "https://bioconductor.org/packages/BiocStyle")
7027 (synopsis "Bioconductor formatting styles")
7028 (description "This package provides standard formatting styles for
7029 Bioconductor PDF and HTML documents. Package vignettes illustrate use and
7030 functionality.")
7031 (license license:artistic2.0)))
7032
7033 (define-public r-bioccheck
7034 (package
7035 (name "r-bioccheck")
7036 (version "1.16.0")
7037 (source (origin
7038 (method url-fetch)
7039 (uri (bioconductor-uri "BiocCheck" version))
7040 (sha256
7041 (base32
7042 "1srp1g809f1nn0fyqknr7r6dq89bw1xpjvmygr7cw6ffknbc671s"))))
7043 (properties
7044 `((upstream-name . "BiocCheck")))
7045 (build-system r-build-system)
7046 (arguments
7047 '(#:phases
7048 (modify-phases %standard-phases
7049 ;; This package can be used by calling BiocCheck(<package>) from
7050 ;; within R, or by running R CMD BiocCheck <package>. This phase
7051 ;; makes sure the latter works. For this to work, the BiocCheck
7052 ;; script must be somewhere on the PATH (not the R bin directory).
7053 (add-after 'install 'install-bioccheck-subcommand
7054 (lambda* (#:key outputs #:allow-other-keys)
7055 (let* ((out (assoc-ref outputs "out"))
7056 (dest-dir (string-append out "/bin"))
7057 (script-dir
7058 (string-append out "/site-library/BiocCheck/script/")))
7059 (mkdir-p dest-dir)
7060 (symlink (string-append script-dir "/checkBadDeps.R")
7061 (string-append dest-dir "/checkBadDeps.R"))
7062 (symlink (string-append script-dir "/BiocCheck")
7063 (string-append dest-dir "/BiocCheck")))
7064 #t)))))
7065 (propagated-inputs
7066 `(("r-codetools" ,r-codetools)
7067 ("r-graph" ,r-graph)
7068 ("r-httr" ,r-httr)
7069 ("r-optparse" ,r-optparse)
7070 ("r-biocinstaller" ,r-biocinstaller)
7071 ("r-biocviews" ,r-biocviews)
7072 ("r-stringdist" ,r-stringdist)))
7073 (home-page "https://bioconductor.org/packages/BiocCheck")
7074 (synopsis "Executes Bioconductor-specific package checks")
7075 (description "This package contains tools to perform additional quality
7076 checks on R packages that are to be submitted to the Bioconductor repository.")
7077 (license license:artistic2.0)))
7078
7079 (define-public r-optparse
7080 (package
7081 (name "r-optparse")
7082 (version "1.6.0")
7083 (source
7084 (origin
7085 (method url-fetch)
7086 (uri (cran-uri "optparse" version))
7087 (sha256
7088 (base32
7089 "1d7v5gl45x4amsfmzn5zyyffyqlc7a82h01szlnda22viyxids0h"))))
7090 (build-system r-build-system)
7091 (propagated-inputs
7092 `(("r-getopt" ,r-getopt)))
7093 (home-page
7094 "https://github.com/trevorld/optparse")
7095 (synopsis "Command line option parser")
7096 (description
7097 "This package provides a command line parser inspired by Python's
7098 @code{optparse} library to be used with Rscript to write shebang scripts
7099 that accept short and long options.")
7100 (license license:gpl2+)))
7101
7102 (define-public r-dnacopy
7103 (package
7104 (name "r-dnacopy")
7105 (version "1.54.0")
7106 (source (origin
7107 (method url-fetch)
7108 (uri (bioconductor-uri "DNAcopy" version))
7109 (sha256
7110 (base32
7111 "03hfhmmc5y60r2gcgm367w2fr7qj115l74m9bp3h9qpn5yci0d8n"))))
7112 (properties
7113 `((upstream-name . "DNAcopy")))
7114 (build-system r-build-system)
7115 (inputs
7116 `(("gfortran" ,gfortran)))
7117 (home-page "https://bioconductor.org/packages/DNAcopy")
7118 (synopsis "Implementation of a circular binary segmentation algorithm")
7119 (description "This package implements the circular binary segmentation (CBS)
7120 algorithm to segment DNA copy number data and identify genomic regions with
7121 abnormal copy number.")
7122 (license license:gpl2+)))
7123
7124 (define-public r-s4vectors
7125 (package
7126 (name "r-s4vectors")
7127 (version "0.18.3")
7128 (source (origin
7129 (method url-fetch)
7130 (uri (bioconductor-uri "S4Vectors" version))
7131 (sha256
7132 (base32
7133 "02bps2rpjqx2npwxq3x62ncwi9ggr165cwi56h6hld28bw2gddy8"))))
7134 (properties
7135 `((upstream-name . "S4Vectors")))
7136 (build-system r-build-system)
7137 (propagated-inputs
7138 `(("r-biocgenerics" ,r-biocgenerics)))
7139 (home-page "https://bioconductor.org/packages/S4Vectors")
7140 (synopsis "S4 implementation of vectors and lists")
7141 (description
7142 "The S4Vectors package defines the @code{Vector} and @code{List} virtual
7143 classes and a set of generic functions that extend the semantic of ordinary
7144 vectors and lists in R. Package developers can easily implement vector-like
7145 or list-like objects as concrete subclasses of @code{Vector} or @code{List}.
7146 In addition, a few low-level concrete subclasses of general interest (e.g.
7147 @code{DataFrame}, @code{Rle}, and @code{Hits}) are implemented in the
7148 S4Vectors package itself.")
7149 (license license:artistic2.0)))
7150
7151 (define-public r-seqinr
7152 (package
7153 (name "r-seqinr")
7154 (version "3.4-5")
7155 (source
7156 (origin
7157 (method url-fetch)
7158 (uri (cran-uri "seqinr" version))
7159 (sha256
7160 (base32
7161 "17zv0n5cji17izwmwg0jcbxbjl3w5rls91w15svcnlpxjms38ahn"))))
7162 (build-system r-build-system)
7163 (propagated-inputs
7164 `(("r-ade4" ,r-ade4)
7165 ("r-segmented" ,r-segmented)))
7166 (inputs
7167 `(("zlib" ,zlib)))
7168 (home-page "http://seqinr.r-forge.r-project.org/")
7169 (synopsis "Biological sequences retrieval and analysis")
7170 (description
7171 "This package provides tools for exploratory data analysis and data
7172 visualization of biological sequence (DNA and protein) data. It also includes
7173 utilities for sequence data management under the ACNUC system.")
7174 (license license:gpl2+)))
7175
7176 (define-public r-iranges
7177 (package
7178 (name "r-iranges")
7179 (version "2.14.11")
7180 (source (origin
7181 (method url-fetch)
7182 (uri (bioconductor-uri "IRanges" version))
7183 (sha256
7184 (base32
7185 "0wz63hysspyjihqadg91dbvllc5a61zzjrsz0b9498lihqc6m1la"))))
7186 (properties
7187 `((upstream-name . "IRanges")))
7188 (build-system r-build-system)
7189 (propagated-inputs
7190 `(("r-biocgenerics" ,r-biocgenerics)
7191 ("r-s4vectors" ,r-s4vectors)))
7192 (home-page "https://bioconductor.org/packages/IRanges")
7193 (synopsis "Infrastructure for manipulating intervals on sequences")
7194 (description
7195 "This package provides efficient low-level and highly reusable S4 classes
7196 for storing ranges of integers, RLE vectors (Run-Length Encoding), and, more
7197 generally, data that can be organized sequentially (formally defined as
7198 @code{Vector} objects), as well as views on these @code{Vector} objects.
7199 Efficient list-like classes are also provided for storing big collections of
7200 instances of the basic classes. All classes in the package use consistent
7201 naming and share the same rich and consistent \"Vector API\" as much as
7202 possible.")
7203 (license license:artistic2.0)))
7204
7205 (define-public r-genomeinfodbdata
7206 (package
7207 (name "r-genomeinfodbdata")
7208 (version "0.99.1")
7209 (source (origin
7210 (method url-fetch)
7211 ;; We cannot use bioconductor-uri here because this tarball is
7212 ;; located under "data/annotation/" instead of "bioc/".
7213 (uri (string-append "https://bioconductor.org/packages/release/"
7214 "data/annotation/src/contrib/GenomeInfoDbData_"
7215 version ".tar.gz"))
7216 (sha256
7217 (base32
7218 "0hipipvyvrh75n68hsjg35sxbcfzrghzxv547vnkk2f8ya99g01r"))))
7219 (properties
7220 `((upstream-name . "GenomeInfoDbData")))
7221 (build-system r-build-system)
7222 (home-page "https://bioconductor.org/packages/GenomeInfoDbData")
7223 (synopsis "Species and taxonomy ID look up tables for GenomeInfoDb")
7224 (description "This package contains data for mapping between NCBI taxonomy
7225 ID and species. It is used by functions in the GenomeInfoDb package.")
7226 (license license:artistic2.0)))
7227
7228 (define-public r-genomeinfodb
7229 (package
7230 (name "r-genomeinfodb")
7231 (version "1.16.0")
7232 (source (origin
7233 (method url-fetch)
7234 (uri (bioconductor-uri "GenomeInfoDb" version))
7235 (sha256
7236 (base32
7237 "0yhnqhaydmmq7ihmhj3rbal4afq5p993l2qqrd0n5wmbyg7glg2d"))))
7238 (properties
7239 `((upstream-name . "GenomeInfoDb")))
7240 (build-system r-build-system)
7241 (propagated-inputs
7242 `(("r-biocgenerics" ,r-biocgenerics)
7243 ("r-genomeinfodbdata" ,r-genomeinfodbdata)
7244 ("r-iranges" ,r-iranges)
7245 ("r-rcurl" ,r-rcurl)
7246 ("r-s4vectors" ,r-s4vectors)))
7247 (home-page "https://bioconductor.org/packages/GenomeInfoDb")
7248 (synopsis "Utilities for manipulating chromosome identifiers")
7249 (description
7250 "This package contains data and functions that define and allow
7251 translation between different chromosome sequence naming conventions (e.g.,
7252 \"chr1\" versus \"1\"), including a function that attempts to place sequence
7253 names in their natural, rather than lexicographic, order.")
7254 (license license:artistic2.0)))
7255
7256 (define-public r-edger
7257 (package
7258 (name "r-edger")
7259 (version "3.22.3")
7260 (source (origin
7261 (method url-fetch)
7262 (uri (bioconductor-uri "edgeR" version))
7263 (sha256
7264 (base32
7265 "0w3jv29n0kkaiig8dbbdqy2dkng8xfaihch82mj9ci5hphrx3nng"))))
7266 (properties `((upstream-name . "edgeR")))
7267 (build-system r-build-system)
7268 (propagated-inputs
7269 `(("r-limma" ,r-limma)
7270 ("r-locfit" ,r-locfit)
7271 ("r-rcpp" ,r-rcpp)
7272 ("r-statmod" ,r-statmod))) ;for estimateDisp
7273 (home-page "http://bioinf.wehi.edu.au/edgeR")
7274 (synopsis "EdgeR does empirical analysis of digital gene expression data")
7275 (description "This package can do differential expression analysis of
7276 RNA-seq expression profiles with biological replication. It implements a range
7277 of statistical methodology based on the negative binomial distributions,
7278 including empirical Bayes estimation, exact tests, generalized linear models
7279 and quasi-likelihood tests. It be applied to differential signal analysis of
7280 other types of genomic data that produce counts, including ChIP-seq, SAGE and
7281 CAGE.")
7282 (license license:gpl2+)))
7283
7284 (define-public r-variantannotation
7285 (package
7286 (name "r-variantannotation")
7287 (version "1.26.1")
7288 (source (origin
7289 (method url-fetch)
7290 (uri (bioconductor-uri "VariantAnnotation" version))
7291 (sha256
7292 (base32
7293 "1r55ki951dj81qvy73knfcy69ik5vzkd56wnk3f6vvf9vngqb8jr"))))
7294 (properties
7295 `((upstream-name . "VariantAnnotation")))
7296 (inputs
7297 `(("zlib" ,zlib)))
7298 (propagated-inputs
7299 `(("r-annotationdbi" ,r-annotationdbi)
7300 ("r-biobase" ,r-biobase)
7301 ("r-biocgenerics" ,r-biocgenerics)
7302 ("r-biostrings" ,r-biostrings)
7303 ("r-bsgenome" ,r-bsgenome)
7304 ("r-dbi" ,r-dbi)
7305 ("r-genomeinfodb" ,r-genomeinfodb)
7306 ("r-genomicfeatures" ,r-genomicfeatures)
7307 ("r-genomicranges" ,r-genomicranges)
7308 ("r-iranges" ,r-iranges)
7309 ("r-summarizedexperiment" ,r-summarizedexperiment)
7310 ("r-rsamtools" ,r-rsamtools)
7311 ("r-rtracklayer" ,r-rtracklayer)
7312 ("r-s4vectors" ,r-s4vectors)
7313 ("r-xvector" ,r-xvector)
7314 ("r-zlibbioc" ,r-zlibbioc)))
7315 (build-system r-build-system)
7316 (home-page "https://bioconductor.org/packages/VariantAnnotation")
7317 (synopsis "Package for annotation of genetic variants")
7318 (description "This R package can annotate variants, compute amino acid
7319 coding changes and predict coding outcomes.")
7320 (license license:artistic2.0)))
7321
7322 (define-public r-limma
7323 (package
7324 (name "r-limma")
7325 (version "3.36.3")
7326 (source (origin
7327 (method url-fetch)
7328 (uri (bioconductor-uri "limma" version))
7329 (sha256
7330 (base32
7331 "0iiifszr6hcqih6kszdsbkx3gacfg3d7v8hdx0lbjqnjqgqz7pwk"))))
7332 (build-system r-build-system)
7333 (home-page "http://bioinf.wehi.edu.au/limma")
7334 (synopsis "Package for linear models for microarray and RNA-seq data")
7335 (description "This package can be used for the analysis of gene expression
7336 studies, especially the use of linear models for analysing designed experiments
7337 and the assessment of differential expression. The analysis methods apply to
7338 different technologies, including microarrays, RNA-seq, and quantitative PCR.")
7339 (license license:gpl2+)))
7340
7341 (define-public r-xvector
7342 (package
7343 (name "r-xvector")
7344 (version "0.20.0")
7345 (source (origin
7346 (method url-fetch)
7347 (uri (bioconductor-uri "XVector" version))
7348 (sha256
7349 (base32
7350 "1zjlhh9lsyhg0js1858csyw2389kbrzdqnqnha833wazkwxilp3f"))))
7351 (properties
7352 `((upstream-name . "XVector")))
7353 (build-system r-build-system)
7354 (arguments
7355 `(#:phases
7356 (modify-phases %standard-phases
7357 (add-after 'unpack 'use-system-zlib
7358 (lambda _
7359 (substitute* "DESCRIPTION"
7360 (("zlibbioc, ") ""))
7361 (substitute* "NAMESPACE"
7362 (("import\\(zlibbioc\\)") ""))
7363 #t)))))
7364 (inputs
7365 `(("zlib" ,zlib)))
7366 (propagated-inputs
7367 `(("r-biocgenerics" ,r-biocgenerics)
7368 ("r-iranges" ,r-iranges)
7369 ("r-s4vectors" ,r-s4vectors)))
7370 (home-page "https://bioconductor.org/packages/XVector")
7371 (synopsis "Representation and manpulation of external sequences")
7372 (description
7373 "This package provides memory efficient S4 classes for storing sequences
7374 \"externally\" (behind an R external pointer, or on disk).")
7375 (license license:artistic2.0)))
7376
7377 (define-public r-genomicranges
7378 (package
7379 (name "r-genomicranges")
7380 (version "1.32.6")
7381 (source (origin
7382 (method url-fetch)
7383 (uri (bioconductor-uri "GenomicRanges" version))
7384 (sha256
7385 (base32
7386 "0p58yk2i5gqvjlkx548mnrr49wvs0xfcl06l9rqj2hi6hkkbvnp3"))))
7387 (properties
7388 `((upstream-name . "GenomicRanges")))
7389 (build-system r-build-system)
7390 (propagated-inputs
7391 `(("r-biocgenerics" ,r-biocgenerics)
7392 ("r-genomeinfodb" ,r-genomeinfodb)
7393 ("r-iranges" ,r-iranges)
7394 ("r-s4vectors" ,r-s4vectors)
7395 ("r-xvector" ,r-xvector)))
7396 (home-page "https://bioconductor.org/packages/GenomicRanges")
7397 (synopsis "Representation and manipulation of genomic intervals")
7398 (description
7399 "This package provides tools to efficiently represent and manipulate
7400 genomic annotations and alignments is playing a central role when it comes to
7401 analyzing high-throughput sequencing data (a.k.a. NGS data). The
7402 GenomicRanges package defines general purpose containers for storing and
7403 manipulating genomic intervals and variables defined along a genome.")
7404 (license license:artistic2.0)))
7405
7406 (define-public r-biobase
7407 (package
7408 (name "r-biobase")
7409 (version "2.40.0")
7410 (source (origin
7411 (method url-fetch)
7412 (uri (bioconductor-uri "Biobase" version))
7413 (sha256
7414 (base32
7415 "1iwds2a5ir29k19dbpynlc7nn836cw2gamchhgpi2jf2xar9m9jz"))))
7416 (properties
7417 `((upstream-name . "Biobase")))
7418 (build-system r-build-system)
7419 (propagated-inputs
7420 `(("r-biocgenerics" ,r-biocgenerics)))
7421 (home-page "https://bioconductor.org/packages/Biobase")
7422 (synopsis "Base functions for Bioconductor")
7423 (description
7424 "This package provides functions that are needed by many other packages
7425 on Bioconductor or which replace R functions.")
7426 (license license:artistic2.0)))
7427
7428 (define-public r-annotationdbi
7429 (package
7430 (name "r-annotationdbi")
7431 (version "1.42.1")
7432 (source (origin
7433 (method url-fetch)
7434 (uri (bioconductor-uri "AnnotationDbi" version))
7435 (sha256
7436 (base32
7437 "0afkbzli08vq02r2pr9phrz3rxd6ilp1w7yw8y99nbjiz14f8b1c"))))
7438 (properties
7439 `((upstream-name . "AnnotationDbi")))
7440 (build-system r-build-system)
7441 (propagated-inputs
7442 `(("r-biobase" ,r-biobase)
7443 ("r-biocgenerics" ,r-biocgenerics)
7444 ("r-dbi" ,r-dbi)
7445 ("r-iranges" ,r-iranges)
7446 ("r-rsqlite" ,r-rsqlite)
7447 ("r-s4vectors" ,r-s4vectors)))
7448 (home-page "https://bioconductor.org/packages/AnnotationDbi")
7449 (synopsis "Annotation database interface")
7450 (description
7451 "This package provides user interface and database connection code for
7452 annotation data packages using SQLite data storage.")
7453 (license license:artistic2.0)))
7454
7455 (define-public r-biomart
7456 (package
7457 (name "r-biomart")
7458 (version "2.36.1")
7459 (source (origin
7460 (method url-fetch)
7461 (uri (bioconductor-uri "biomaRt" version))
7462 (sha256
7463 (base32
7464 "0b70s350ffc74v3xz5c3jpazr9zxdb7gjmjfj7aghlsrizrspill"))))
7465 (properties
7466 `((upstream-name . "biomaRt")))
7467 (build-system r-build-system)
7468 (propagated-inputs
7469 `(("r-annotationdbi" ,r-annotationdbi)
7470 ("r-httr" ,r-httr)
7471 ("r-progress" ,r-progress)
7472 ("r-rcurl" ,r-rcurl)
7473 ("r-stringr" ,r-stringr)
7474 ("r-xml" ,r-xml)))
7475 (home-page "https://bioconductor.org/packages/biomaRt")
7476 (synopsis "Interface to BioMart databases")
7477 (description
7478 "biomaRt provides an interface to a growing collection of databases
7479 implementing the @url{BioMart software suite, http://www.biomart.org}. The
7480 package enables retrieval of large amounts of data in a uniform way without
7481 the need to know the underlying database schemas or write complex SQL queries.
7482 Examples of BioMart databases are Ensembl, COSMIC, Uniprot, HGNC, Gramene,
7483 Wormbase and dbSNP mapped to Ensembl. These major databases give biomaRt
7484 users direct access to a diverse set of data and enable a wide range of
7485 powerful online queries from gene annotation to database mining.")
7486 (license license:artistic2.0)))
7487
7488 (define-public r-biocparallel
7489 (package
7490 (name "r-biocparallel")
7491 (version "1.14.2")
7492 (source (origin
7493 (method url-fetch)
7494 (uri (bioconductor-uri "BiocParallel" version))
7495 (sha256
7496 (base32
7497 "1llb5a62hn4yxpdgqdh2l7i5zd06mjkk8hagsna69cq65wv6iifm"))))
7498 (properties
7499 `((upstream-name . "BiocParallel")))
7500 (build-system r-build-system)
7501 (propagated-inputs
7502 `(("r-futile-logger" ,r-futile-logger)
7503 ("r-snow" ,r-snow)
7504 ("r-bh" ,r-bh)))
7505 (home-page "https://bioconductor.org/packages/BiocParallel")
7506 (synopsis "Bioconductor facilities for parallel evaluation")
7507 (description
7508 "This package provides modified versions and novel implementation of
7509 functions for parallel evaluation, tailored to use with Bioconductor
7510 objects.")
7511 (license (list license:gpl2+ license:gpl3+))))
7512
7513 (define-public r-biostrings
7514 (package
7515 (name "r-biostrings")
7516 (version "2.48.0")
7517 (source (origin
7518 (method url-fetch)
7519 (uri (bioconductor-uri "Biostrings" version))
7520 (sha256
7521 (base32
7522 "118b490jk87ydigm6ln25ms4kskzkw0akmh77clzznhzpqnxsi6j"))))
7523 (properties
7524 `((upstream-name . "Biostrings")))
7525 (build-system r-build-system)
7526 (propagated-inputs
7527 `(("r-biocgenerics" ,r-biocgenerics)
7528 ("r-iranges" ,r-iranges)
7529 ("r-s4vectors" ,r-s4vectors)
7530 ("r-xvector" ,r-xvector)))
7531 (home-page "https://bioconductor.org/packages/Biostrings")
7532 (synopsis "String objects and algorithms for biological sequences")
7533 (description
7534 "This package provides memory efficient string containers, string
7535 matching algorithms, and other utilities, for fast manipulation of large
7536 biological sequences or sets of sequences.")
7537 (license license:artistic2.0)))
7538
7539 (define-public r-rsamtools
7540 (package
7541 (name "r-rsamtools")
7542 (version "1.32.3")
7543 (source (origin
7544 (method url-fetch)
7545 (uri (bioconductor-uri "Rsamtools" version))
7546 (sha256
7547 (base32
7548 "1hpjr22h33pf4fgv0sj83rqzv6l5l7s6fpmmqvchh45ikks1mnhq"))))
7549 (properties
7550 `((upstream-name . "Rsamtools")))
7551 (build-system r-build-system)
7552 (arguments
7553 `(#:phases
7554 (modify-phases %standard-phases
7555 (add-after 'unpack 'use-system-zlib
7556 (lambda _
7557 (substitute* "DESCRIPTION"
7558 (("zlibbioc, ") ""))
7559 (substitute* "NAMESPACE"
7560 (("import\\(zlibbioc\\)") ""))
7561 #t)))))
7562 (inputs
7563 `(("zlib" ,zlib)))
7564 (propagated-inputs
7565 `(("r-biocgenerics" ,r-biocgenerics)
7566 ("r-biocparallel" ,r-biocparallel)
7567 ("r-biostrings" ,r-biostrings)
7568 ("r-bitops" ,r-bitops)
7569 ("r-genomeinfodb" ,r-genomeinfodb)
7570 ("r-genomicranges" ,r-genomicranges)
7571 ("r-iranges" ,r-iranges)
7572 ("r-s4vectors" ,r-s4vectors)
7573 ("r-xvector" ,r-xvector)))
7574 (home-page "https://bioconductor.org/packages/release/bioc/html/Rsamtools.html")
7575 (synopsis "Interface to samtools, bcftools, and tabix")
7576 (description
7577 "This package provides an interface to the 'samtools', 'bcftools', and
7578 'tabix' utilities for manipulating SAM (Sequence Alignment / Map), FASTA,
7579 binary variant call (BCF) and compressed indexed tab-delimited (tabix)
7580 files.")
7581 (license license:expat)))
7582
7583 (define-public r-delayedarray
7584 (package
7585 (name "r-delayedarray")
7586 (version "0.6.5")
7587 (source (origin
7588 (method url-fetch)
7589 (uri (bioconductor-uri "DelayedArray" version))
7590 (sha256
7591 (base32
7592 "10b03zrnvz5isfh4z55hasya2m71lrfx10l5lm2sdmqs0gwkanrd"))))
7593 (properties
7594 `((upstream-name . "DelayedArray")))
7595 (build-system r-build-system)
7596 (propagated-inputs
7597 `(("r-biocgenerics" ,r-biocgenerics)
7598 ("r-biocparallel" ,r-biocparallel)
7599 ("r-s4vectors" ,r-s4vectors)
7600 ("r-iranges" ,r-iranges)
7601 ("r-matrixstats" ,r-matrixstats)))
7602 (home-page "https://bioconductor.org/packages/DelayedArray")
7603 (synopsis "Delayed operations on array-like objects")
7604 (description
7605 "Wrapping an array-like object (typically an on-disk object) in a
7606 @code{DelayedArray} object allows one to perform common array operations on it
7607 without loading the object in memory. In order to reduce memory usage and
7608 optimize performance, operations on the object are either delayed or executed
7609 using a block processing mechanism. Note that this also works on in-memory
7610 array-like objects like @code{DataFrame} objects (typically with Rle columns),
7611 @code{Matrix} objects, and ordinary arrays and data frames.")
7612 (license license:artistic2.0)))
7613
7614 (define-public r-summarizedexperiment
7615 (package
7616 (name "r-summarizedexperiment")
7617 (version "1.10.1")
7618 (source (origin
7619 (method url-fetch)
7620 (uri (bioconductor-uri "SummarizedExperiment" version))
7621 (sha256
7622 (base32
7623 "0v3zxl9cqsv79ag5cnvzlhvgaz5cr8f4rn7flmwnwpqd508cznl1"))))
7624 (properties
7625 `((upstream-name . "SummarizedExperiment")))
7626 (build-system r-build-system)
7627 (propagated-inputs
7628 `(("r-biobase" ,r-biobase)
7629 ("r-biocgenerics" ,r-biocgenerics)
7630 ("r-delayedarray" ,r-delayedarray)
7631 ("r-genomeinfodb" ,r-genomeinfodb)
7632 ("r-genomicranges" ,r-genomicranges)
7633 ("r-iranges" ,r-iranges)
7634 ("r-matrix" ,r-matrix)
7635 ("r-s4vectors" ,r-s4vectors)))
7636 (home-page "https://bioconductor.org/packages/SummarizedExperiment")
7637 (synopsis "Container for representing genomic ranges by sample")
7638 (description
7639 "The SummarizedExperiment container contains one or more assays, each
7640 represented by a matrix-like object of numeric or other mode. The rows
7641 typically represent genomic ranges of interest and the columns represent
7642 samples.")
7643 (license license:artistic2.0)))
7644
7645 (define-public r-genomicalignments
7646 (package
7647 (name "r-genomicalignments")
7648 (version "1.16.0")
7649 (source (origin
7650 (method url-fetch)
7651 (uri (bioconductor-uri "GenomicAlignments" version))
7652 (sha256
7653 (base32
7654 "00pi2cnkkbj2023fg2x2cmglkdalwzy1vr3glsikwz7ix9yylcaw"))))
7655 (properties
7656 `((upstream-name . "GenomicAlignments")))
7657 (build-system r-build-system)
7658 (propagated-inputs
7659 `(("r-biocgenerics" ,r-biocgenerics)
7660 ("r-biocparallel" ,r-biocparallel)
7661 ("r-biostrings" ,r-biostrings)
7662 ("r-genomeinfodb" ,r-genomeinfodb)
7663 ("r-genomicranges" ,r-genomicranges)
7664 ("r-iranges" ,r-iranges)
7665 ("r-rsamtools" ,r-rsamtools)
7666 ("r-s4vectors" ,r-s4vectors)
7667 ("r-summarizedexperiment" ,r-summarizedexperiment)))
7668 (home-page "https://bioconductor.org/packages/GenomicAlignments")
7669 (synopsis "Representation and manipulation of short genomic alignments")
7670 (description
7671 "This package provides efficient containers for storing and manipulating
7672 short genomic alignments (typically obtained by aligning short reads to a
7673 reference genome). This includes read counting, computing the coverage,
7674 junction detection, and working with the nucleotide content of the
7675 alignments.")
7676 (license license:artistic2.0)))
7677
7678 (define-public r-rtracklayer
7679 (package
7680 (name "r-rtracklayer")
7681 (version "1.40.6")
7682 (source (origin
7683 (method url-fetch)
7684 (uri (bioconductor-uri "rtracklayer" version))
7685 (sha256
7686 (base32
7687 "1wxxxlyps19dw3i0pw4mlm3kinnswsc35rgvlnbwvpnpjbca6w4l"))))
7688 (build-system r-build-system)
7689 (arguments
7690 `(#:phases
7691 (modify-phases %standard-phases
7692 (add-after 'unpack 'use-system-zlib
7693 (lambda _
7694 (substitute* "DESCRIPTION"
7695 ((" zlibbioc,") ""))
7696 (substitute* "NAMESPACE"
7697 (("import\\(zlibbioc\\)") ""))
7698 #t)))))
7699 (native-inputs
7700 `(("pkg-config" ,pkg-config)))
7701 (inputs
7702 `(("zlib" ,zlib)))
7703 (propagated-inputs
7704 `(("r-biocgenerics" ,r-biocgenerics)
7705 ("r-biostrings" ,r-biostrings)
7706 ("r-genomeinfodb" ,r-genomeinfodb)
7707 ("r-genomicalignments" ,r-genomicalignments)
7708 ("r-genomicranges" ,r-genomicranges)
7709 ("r-iranges" ,r-iranges)
7710 ("r-rcurl" ,r-rcurl)
7711 ("r-rsamtools" ,r-rsamtools)
7712 ("r-s4vectors" ,r-s4vectors)
7713 ("r-xml" ,r-xml)
7714 ("r-xvector" ,r-xvector)))
7715 (home-page "https://bioconductor.org/packages/rtracklayer")
7716 (synopsis "R interface to genome browsers and their annotation tracks")
7717 (description
7718 "rtracklayer is an extensible framework for interacting with multiple
7719 genome browsers (currently UCSC built-in) and manipulating annotation tracks
7720 in various formats (currently GFF, BED, bedGraph, BED15, WIG, BigWig and 2bit
7721 built-in). The user may export/import tracks to/from the supported browsers,
7722 as well as query and modify the browser state, such as the current viewport.")
7723 (license license:artistic2.0)))
7724
7725 (define-public r-genomicfeatures
7726 (package
7727 (name "r-genomicfeatures")
7728 (version "1.32.2")
7729 (source (origin
7730 (method url-fetch)
7731 (uri (bioconductor-uri "GenomicFeatures" version))
7732 (sha256
7733 (base32
7734 "0kfyyg1ib8fkq2hxraal10z4bx3rg8figdskw4yhn1mbh6l42q5f"))))
7735 (properties
7736 `((upstream-name . "GenomicFeatures")))
7737 (build-system r-build-system)
7738 (propagated-inputs
7739 `(("r-annotationdbi" ,r-annotationdbi)
7740 ("r-biobase" ,r-biobase)
7741 ("r-biocgenerics" ,r-biocgenerics)
7742 ("r-biomart" ,r-biomart)
7743 ("r-biostrings" ,r-biostrings)
7744 ("r-dbi" ,r-dbi)
7745 ("r-genomeinfodb" ,r-genomeinfodb)
7746 ("r-genomicranges" ,r-genomicranges)
7747 ("r-iranges" ,r-iranges)
7748 ("r-rcurl" ,r-rcurl)
7749 ("r-rsqlite" ,r-rsqlite)
7750 ("r-rtracklayer" ,r-rtracklayer)
7751 ("r-s4vectors" ,r-s4vectors)
7752 ("r-xvector" ,r-xvector)))
7753 (home-page "https://bioconductor.org/packages/GenomicFeatures")
7754 (synopsis "Tools for working with transcript centric annotations")
7755 (description
7756 "This package provides a set of tools and methods for making and
7757 manipulating transcript centric annotations. With these tools the user can
7758 easily download the genomic locations of the transcripts, exons and cds of a
7759 given organism, from either the UCSC Genome Browser or a BioMart
7760 database (more sources will be supported in the future). This information is
7761 then stored in a local database that keeps track of the relationship between
7762 transcripts, exons, cds and genes. Flexible methods are provided for
7763 extracting the desired features in a convenient format.")
7764 (license license:artistic2.0)))
7765
7766 (define-public r-go-db
7767 (package
7768 (name "r-go-db")
7769 (version "3.5.0")
7770 (source (origin
7771 (method url-fetch)
7772 (uri (string-append "https://www.bioconductor.org/packages/"
7773 "release/data/annotation/src/contrib/GO.db_"
7774 version ".tar.gz"))
7775 (sha256
7776 (base32
7777 "02d1mn1al3q7qvhx1ylrr3ar4w4iw0qyi5d89v2336rzwk9maq35"))))
7778 (properties
7779 `((upstream-name . "GO.db")))
7780 (build-system r-build-system)
7781 (propagated-inputs
7782 `(("r-annotationdbi" ,r-annotationdbi)))
7783 (home-page "https://bioconductor.org/packages/GO.db")
7784 (synopsis "Annotation maps describing the entire Gene Ontology")
7785 (description
7786 "The purpose of this GO.db annotation package is to provide detailed
7787 information about the latest version of the Gene Ontologies.")
7788 (license license:artistic2.0)))
7789
7790 (define-public r-graph
7791 (package
7792 (name "r-graph")
7793 (version "1.58.0")
7794 (source (origin
7795 (method url-fetch)
7796 (uri (bioconductor-uri "graph" version))
7797 (sha256
7798 (base32
7799 "1zx445lk36g1s6i5dbhhf00nzzazyklfjxxjfax6q8hnhvgm9759"))))
7800 (build-system r-build-system)
7801 (propagated-inputs
7802 `(("r-biocgenerics" ,r-biocgenerics)))
7803 (home-page "https://bioconductor.org/packages/graph")
7804 (synopsis "Handle graph data structures in R")
7805 (description
7806 "This package implements some simple graph handling capabilities for R.")
7807 (license license:artistic2.0)))
7808
7809 (define-public r-topgo
7810 (package
7811 (name "r-topgo")
7812 (version "2.32.0")
7813 (source (origin
7814 (method url-fetch)
7815 (uri (bioconductor-uri "topGO" version))
7816 (sha256
7817 (base32
7818 "05yxnkid8bgw9lkm90if9fg63djhgvbailfa3qsfqa5c0zjmixw1"))))
7819 (properties
7820 `((upstream-name . "topGO")))
7821 (build-system r-build-system)
7822 (propagated-inputs
7823 `(("r-annotationdbi" ,r-annotationdbi)
7824 ("r-dbi" ,r-dbi)
7825 ("r-biobase" ,r-biobase)
7826 ("r-biocgenerics" ,r-biocgenerics)
7827 ("r-go-db" ,r-go-db)
7828 ("r-graph" ,r-graph)
7829 ("r-lattice" ,r-lattice)
7830 ("r-matrixstats" ,r-matrixstats)
7831 ("r-sparsem" ,r-sparsem)))
7832 (home-page "https://bioconductor.org/packages/topGO")
7833 (synopsis "Enrichment analysis for gene ontology")
7834 (description
7835 "The topGO package provides tools for testing @dfn{gene ontology} (GO)
7836 terms while accounting for the topology of the GO graph. Different test
7837 statistics and different methods for eliminating local similarities and
7838 dependencies between GO terms can be implemented and applied.")
7839 ;; Any version of the LGPL applies.
7840 (license license:lgpl2.1+)))
7841
7842 (define-public r-bsgenome
7843 (package
7844 (name "r-bsgenome")
7845 (version "1.48.0")
7846 (source (origin
7847 (method url-fetch)
7848 (uri (bioconductor-uri "BSgenome" version))
7849 (sha256
7850 (base32
7851 "1rk2piqq5dppkd51ln3r872d7ng3rvq98071mnd0xdv2xwnyn5g8"))))
7852 (properties
7853 `((upstream-name . "BSgenome")))
7854 (build-system r-build-system)
7855 (propagated-inputs
7856 `(("r-biocgenerics" ,r-biocgenerics)
7857 ("r-biostrings" ,r-biostrings)
7858 ("r-genomeinfodb" ,r-genomeinfodb)
7859 ("r-genomicranges" ,r-genomicranges)
7860 ("r-iranges" ,r-iranges)
7861 ("r-rsamtools" ,r-rsamtools)
7862 ("r-rtracklayer" ,r-rtracklayer)
7863 ("r-s4vectors" ,r-s4vectors)
7864 ("r-xvector" ,r-xvector)))
7865 (home-page "https://bioconductor.org/packages/BSgenome")
7866 (synopsis "Infrastructure for Biostrings-based genome data packages")
7867 (description
7868 "This package provides infrastructure shared by all Biostrings-based
7869 genome data packages and support for efficient SNP representation.")
7870 (license license:artistic2.0)))
7871
7872 (define-public r-bsgenome-hsapiens-1000genomes-hs37d5
7873 (package
7874 (name "r-bsgenome-hsapiens-1000genomes-hs37d5")
7875 (version "0.99.1")
7876 (source (origin
7877 (method url-fetch)
7878 ;; We cannot use bioconductor-uri here because this tarball is
7879 ;; located under "data/annotation/" instead of "bioc/".
7880 (uri (string-append "https://www.bioconductor.org/packages/"
7881 "release/data/annotation/src/contrib/"
7882 "BSgenome.Hsapiens.1000genomes.hs37d5_"
7883 version ".tar.gz"))
7884 (sha256
7885 (base32
7886 "1cg0g5fqmsvwyw2p9hp2yy4ilk21jkbbrnpgqvb5c36ihjwvc7sr"))))
7887 (properties
7888 `((upstream-name . "BSgenome.Hsapiens.1000genomes.hs37d5")))
7889 (build-system r-build-system)
7890 ;; As this package provides little more than a very large data file it
7891 ;; doesn't make sense to build substitutes.
7892 (arguments `(#:substitutable? #f))
7893 (propagated-inputs
7894 `(("r-bsgenome" ,r-bsgenome)))
7895 (home-page
7896 "https://www.bioconductor.org/packages/BSgenome.Hsapiens.1000genomes.hs37d5/")
7897 (synopsis "Full genome sequences for Homo sapiens")
7898 (description
7899 "This package provides full genome sequences for Homo sapiens from
7900 1000genomes phase2 reference genome sequence (hs37d5), based on NCBI GRCh37.")
7901 (license license:artistic2.0)))
7902
7903 (define-public r-impute
7904 (package
7905 (name "r-impute")
7906 (version "1.54.0")
7907 (source (origin
7908 (method url-fetch)
7909 (uri (bioconductor-uri "impute" version))
7910 (sha256
7911 (base32
7912 "1d3cpfaqlq2gnb3hsc2yhxwkrnbd7m6ifif32yp0ya0jr5brl4hr"))))
7913 (inputs
7914 `(("gfortran" ,gfortran)))
7915 (build-system r-build-system)
7916 (home-page "https://bioconductor.org/packages/impute")
7917 (synopsis "Imputation for microarray data")
7918 (description
7919 "This package provides a function to impute missing gene expression
7920 microarray data, using nearest neighbor averaging.")
7921 (license license:gpl2+)))
7922
7923 (define-public r-seqpattern
7924 (package
7925 (name "r-seqpattern")
7926 (version "1.12.0")
7927 (source (origin
7928 (method url-fetch)
7929 (uri (bioconductor-uri "seqPattern" version))
7930 (sha256
7931 (base32
7932 "0dw0yldfcf0ibvpqxlpx1ijnjf9lma47w9w22siszzhw09i0wp3w"))))
7933 (properties
7934 `((upstream-name . "seqPattern")))
7935 (build-system r-build-system)
7936 (propagated-inputs
7937 `(("r-biostrings" ,r-biostrings)
7938 ("r-genomicranges" ,r-genomicranges)
7939 ("r-iranges" ,r-iranges)
7940 ("r-kernsmooth" ,r-kernsmooth)
7941 ("r-plotrix" ,r-plotrix)))
7942 (home-page "https://bioconductor.org/packages/seqPattern")
7943 (synopsis "Visualising oligonucleotide patterns and motif occurrences")
7944 (description
7945 "This package provides tools to visualize oligonucleotide patterns and
7946 sequence motif occurrences across a large set of sequences centred at a common
7947 reference point and sorted by a user defined feature.")
7948 (license license:gpl3+)))
7949
7950 (define-public r-genomation
7951 (package
7952 (name "r-genomation")
7953 (version "1.12.0")
7954 (source (origin
7955 (method url-fetch)
7956 (uri (bioconductor-uri "genomation" version))
7957 (sha256
7958 (base32
7959 "1vdmdyrq0n7pf8cvy2950v7hrcrcbd9zl4fg7dcmyly3iiwdyirp"))))
7960 (build-system r-build-system)
7961 (propagated-inputs
7962 `(("r-biostrings" ,r-biostrings)
7963 ("r-bsgenome" ,r-bsgenome)
7964 ("r-data-table" ,r-data-table)
7965 ("r-genomeinfodb" ,r-genomeinfodb)
7966 ("r-genomicalignments" ,r-genomicalignments)
7967 ("r-genomicranges" ,r-genomicranges)
7968 ("r-ggplot2" ,r-ggplot2)
7969 ("r-gridbase" ,r-gridbase)
7970 ("r-impute" ,r-impute)
7971 ("r-iranges" ,r-iranges)
7972 ("r-matrixstats" ,r-matrixstats)
7973 ("r-plotrix" ,r-plotrix)
7974 ("r-plyr" ,r-plyr)
7975 ("r-rcpp" ,r-rcpp)
7976 ("r-readr" ,r-readr)
7977 ("r-reshape2" ,r-reshape2)
7978 ("r-rsamtools" ,r-rsamtools)
7979 ("r-rtracklayer" ,r-rtracklayer)
7980 ("r-runit" ,r-runit)
7981 ("r-s4vectors" ,r-s4vectors)
7982 ("r-seqpattern" ,r-seqpattern)))
7983 (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
7984 (synopsis "Summary, annotation and visualization of genomic data")
7985 (description
7986 "This package provides a package for summary and annotation of genomic
7987 intervals. Users can visualize and quantify genomic intervals over
7988 pre-defined functional regions, such as promoters, exons, introns, etc. The
7989 genomic intervals represent regions with a defined chromosome position, which
7990 may be associated with a score, such as aligned reads from HT-seq experiments,
7991 TF binding sites, methylation scores, etc. The package can use any tabular
7992 genomic feature data as long as it has minimal information on the locations of
7993 genomic intervals. In addition, it can use BAM or BigWig files as input.")
7994 (license license:artistic2.0)))
7995
7996 (define-public r-genomationdata
7997 (package
7998 (name "r-genomationdata")
7999 (version "1.10.0")
8000 (source (origin
8001 (method url-fetch)
8002 ;; We cannot use bioconductor-uri here because this tarball is
8003 ;; located under "data/annotation/" instead of "bioc/".
8004 (uri (string-append "https://bioconductor.org/packages/"
8005 "release/data/experiment/src/contrib/"
8006 "genomationData_" version ".tar.gz"))
8007 (sha256
8008 (base32
8009 "0h7g5x3kyb50qlblz5hc85lfm6n6f5nb68i146way3ggs04sqvla"))))
8010 (build-system r-build-system)
8011 ;; As this package provides little more than large data files, it doesn't
8012 ;; make sense to build substitutes.
8013 (arguments `(#:substitutable? #f))
8014 (native-inputs
8015 `(("r-knitr" ,r-knitr)))
8016 (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
8017 (synopsis "Experimental data for use with the genomation package")
8018 (description
8019 "This package contains experimental genetic data for use with the
8020 genomation package. Included are Chip Seq, Methylation and Cage data,
8021 downloaded from Encode.")
8022 (license license:gpl3+)))
8023
8024 (define-public r-org-hs-eg-db
8025 (package
8026 (name "r-org-hs-eg-db")
8027 (version "3.5.0")
8028 (source (origin
8029 (method url-fetch)
8030 ;; We cannot use bioconductor-uri here because this tarball is
8031 ;; located under "data/annotation/" instead of "bioc/".
8032 (uri (string-append "https://www.bioconductor.org/packages/"
8033 "release/data/annotation/src/contrib/"
8034 "org.Hs.eg.db_" version ".tar.gz"))
8035 (sha256
8036 (base32
8037 "1v6wa5613cjq59xd7x1qz8lr9nb2abm9abl2cci1khrnrlpla927"))))
8038 (properties
8039 `((upstream-name . "org.Hs.eg.db")))
8040 (build-system r-build-system)
8041 (propagated-inputs
8042 `(("r-annotationdbi" ,r-annotationdbi)))
8043 (home-page "https://www.bioconductor.org/packages/org.Hs.eg.db/")
8044 (synopsis "Genome wide annotation for Human")
8045 (description
8046 "This package contains genome-wide annotations for Human, primarily based
8047 on mapping using Entrez Gene identifiers.")
8048 (license license:artistic2.0)))
8049
8050 (define-public r-org-ce-eg-db
8051 (package
8052 (name "r-org-ce-eg-db")
8053 (version "3.5.0")
8054 (source (origin
8055 (method url-fetch)
8056 ;; We cannot use bioconductor-uri here because this tarball is
8057 ;; located under "data/annotation/" instead of "bioc/".
8058 (uri (string-append "https://www.bioconductor.org/packages/"
8059 "release/data/annotation/src/contrib/"
8060 "org.Ce.eg.db_" version ".tar.gz"))
8061 (sha256
8062 (base32
8063 "02ggchixlmzywhsbr0h2ms4dravv7m5964cjxqcjxqs16vjwlbk9"))))
8064 (properties
8065 `((upstream-name . "org.Ce.eg.db")))
8066 (build-system r-build-system)
8067 (propagated-inputs
8068 `(("r-annotationdbi" ,r-annotationdbi)))
8069 (home-page "https://www.bioconductor.org/packages/org.Ce.eg.db/")
8070 (synopsis "Genome wide annotation for Worm")
8071 (description
8072 "This package provides mappings from Entrez gene identifiers to various
8073 annotations for the genome of the model worm Caenorhabditis elegans.")
8074 (license license:artistic2.0)))
8075
8076 (define-public r-org-dm-eg-db
8077 (package
8078 (name "r-org-dm-eg-db")
8079 (version "3.5.0")
8080 (source (origin
8081 (method url-fetch)
8082 ;; We cannot use bioconductor-uri here because this tarball is
8083 ;; located under "data/annotation/" instead of "bioc/".
8084 (uri (string-append "https://www.bioconductor.org/packages/"
8085 "release/data/annotation/src/contrib/"
8086 "org.Dm.eg.db_" version ".tar.gz"))
8087 (sha256
8088 (base32
8089 "033qak1d3wwz17va0bh8z8p8arx0aw2va6gm1qfwsvdkj9cd9d7d"))))
8090 (properties
8091 `((upstream-name . "org.Dm.eg.db")))
8092 (build-system r-build-system)
8093 (propagated-inputs
8094 `(("r-annotationdbi" ,r-annotationdbi)))
8095 (home-page "https://www.bioconductor.org/packages/org.Dm.eg.db/")
8096 (synopsis "Genome wide annotation for Fly")
8097 (description
8098 "This package provides mappings from Entrez gene identifiers to various
8099 annotations for the genome of the model fruit fly Drosophila melanogaster.")
8100 (license license:artistic2.0)))
8101
8102 (define-public r-org-mm-eg-db
8103 (package
8104 (name "r-org-mm-eg-db")
8105 (version "3.5.0")
8106 (source (origin
8107 (method url-fetch)
8108 ;; We cannot use bioconductor-uri here because this tarball is
8109 ;; located under "data/annotation/" instead of "bioc/".
8110 (uri (string-append "https://www.bioconductor.org/packages/"
8111 "release/data/annotation/src/contrib/"
8112 "org.Mm.eg.db_" version ".tar.gz"))
8113 (sha256
8114 (base32
8115 "11q21p3ki4bn4hb3aix0g775l45l66jmas6m94nfhqqnpjhv4d6g"))))
8116 (properties
8117 `((upstream-name . "org.Mm.eg.db")))
8118 (build-system r-build-system)
8119 (propagated-inputs
8120 `(("r-annotationdbi" ,r-annotationdbi)))
8121 (home-page "https://www.bioconductor.org/packages/org.Mm.eg.db/")
8122 (synopsis "Genome wide annotation for Mouse")
8123 (description
8124 "This package provides mappings from Entrez gene identifiers to various
8125 annotations for the genome of the model mouse Mus musculus.")
8126 (license license:artistic2.0)))
8127
8128 (define-public r-seqlogo
8129 (package
8130 (name "r-seqlogo")
8131 (version "1.46.0")
8132 (source
8133 (origin
8134 (method url-fetch)
8135 (uri (bioconductor-uri "seqLogo" version))
8136 (sha256
8137 (base32
8138 "16xvqcdknix9vjm8mrixi6nyfsr45jm844jh1x90m8044lwrsic1"))))
8139 (properties `((upstream-name . "seqLogo")))
8140 (build-system r-build-system)
8141 (home-page "https://bioconductor.org/packages/seqLogo")
8142 (synopsis "Sequence logos for DNA sequence alignments")
8143 (description
8144 "seqLogo takes the position weight matrix of a DNA sequence motif and
8145 plots the corresponding sequence logo as introduced by Schneider and
8146 Stephens (1990).")
8147 (license license:lgpl2.0+)))
8148
8149 (define-public r-bsgenome-hsapiens-ucsc-hg19
8150 (package
8151 (name "r-bsgenome-hsapiens-ucsc-hg19")
8152 (version "1.4.0")
8153 (source (origin
8154 (method url-fetch)
8155 ;; We cannot use bioconductor-uri here because this tarball is
8156 ;; located under "data/annotation/" instead of "bioc/".
8157 (uri (string-append "https://www.bioconductor.org/packages/"
8158 "release/data/annotation/src/contrib/"
8159 "BSgenome.Hsapiens.UCSC.hg19_"
8160 version ".tar.gz"))
8161 (sha256
8162 (base32
8163 "1y0nqpk8cw5a34sd9hmin3z4v7iqm6hf6l22cl81vlbxqbjibxc8"))))
8164 (properties
8165 `((upstream-name . "BSgenome.Hsapiens.UCSC.hg19")))
8166 (build-system r-build-system)
8167 ;; As this package provides little more than a very large data file it
8168 ;; doesn't make sense to build substitutes.
8169 (arguments `(#:substitutable? #f))
8170 (propagated-inputs
8171 `(("r-bsgenome" ,r-bsgenome)))
8172 (home-page
8173 "https://www.bioconductor.org/packages/BSgenome.Hsapiens.UCSC.hg19/")
8174 (synopsis "Full genome sequences for Homo sapiens")
8175 (description
8176 "This package provides full genome sequences for Homo sapiens as provided
8177 by UCSC (hg19, February 2009) and stored in Biostrings objects.")
8178 (license license:artistic2.0)))
8179
8180 (define-public r-bsgenome-mmusculus-ucsc-mm9
8181 (package
8182 (name "r-bsgenome-mmusculus-ucsc-mm9")
8183 (version "1.4.0")
8184 (source (origin
8185 (method url-fetch)
8186 ;; We cannot use bioconductor-uri here because this tarball is
8187 ;; located under "data/annotation/" instead of "bioc/".
8188 (uri (string-append "https://www.bioconductor.org/packages/"
8189 "release/data/annotation/src/contrib/"
8190 "BSgenome.Mmusculus.UCSC.mm9_"
8191 version ".tar.gz"))
8192 (sha256
8193 (base32
8194 "1birqw30g2azimxpnjfzmkphan7x131yy8b9h85lfz5fjdg7841i"))))
8195 (properties
8196 `((upstream-name . "BSgenome.Mmusculus.UCSC.mm9")))
8197 (build-system r-build-system)
8198 ;; As this package provides little more than a very large data file it
8199 ;; doesn't make sense to build substitutes.
8200 (arguments `(#:substitutable? #f))
8201 (propagated-inputs
8202 `(("r-bsgenome" ,r-bsgenome)))
8203 (home-page
8204 "https://www.bioconductor.org/packages/BSgenome.Mmusculus.UCSC.mm9/")
8205 (synopsis "Full genome sequences for Mouse")
8206 (description
8207 "This package provides full genome sequences for Mus musculus (Mouse) as
8208 provided by UCSC (mm9, July 2007) and stored in Biostrings objects.")
8209 (license license:artistic2.0)))
8210
8211 (define-public r-bsgenome-mmusculus-ucsc-mm10
8212 (package
8213 (name "r-bsgenome-mmusculus-ucsc-mm10")
8214 (version "1.4.0")
8215 (source (origin
8216 (method url-fetch)
8217 ;; We cannot use bioconductor-uri here because this tarball is
8218 ;; located under "data/annotation/" instead of "bioc/".
8219 (uri (string-append "https://www.bioconductor.org/packages/"
8220 "release/data/annotation/src/contrib/"
8221 "BSgenome.Mmusculus.UCSC.mm10_"
8222 version ".tar.gz"))
8223 (sha256
8224 (base32
8225 "12s0nm2na9brjad4rn9l7d3db2aj8qa1xvz0y1k7gk08wayb6bkf"))))
8226 (properties
8227 `((upstream-name . "BSgenome.Mmusculus.UCSC.mm10")))
8228 (build-system r-build-system)
8229 ;; As this package provides little more than a very large data file it
8230 ;; doesn't make sense to build substitutes.
8231 (arguments `(#:substitutable? #f))
8232 (propagated-inputs
8233 `(("r-bsgenome" ,r-bsgenome)))
8234 (home-page
8235 "https://www.bioconductor.org/packages/BSgenome.Mmusculus.UCSC.mm10/")
8236 (synopsis "Full genome sequences for Mouse")
8237 (description
8238 "This package provides full genome sequences for Mus
8239 musculus (Mouse) as provided by UCSC (mm10, December 2011) and stored
8240 in Biostrings objects.")
8241 (license license:artistic2.0)))
8242
8243 (define-public r-txdb-mmusculus-ucsc-mm10-knowngene
8244 (package
8245 (name "r-txdb-mmusculus-ucsc-mm10-knowngene")
8246 (version "3.4.0")
8247 (source (origin
8248 (method url-fetch)
8249 ;; We cannot use bioconductor-uri here because this tarball is
8250 ;; located under "data/annotation/" instead of "bioc/".
8251 (uri (string-append "https://www.bioconductor.org/packages/"
8252 "release/data/annotation/src/contrib/"
8253 "TxDb.Mmusculus.UCSC.mm10.knownGene_"
8254 version ".tar.gz"))
8255 (sha256
8256 (base32
8257 "08gava9wsvpcqz51k2sni3pj03n5155v32d9riqbf305nbirqbkb"))))
8258 (properties
8259 `((upstream-name . "TxDb.Mmusculus.UCSC.mm10.knownGene")))
8260 (build-system r-build-system)
8261 ;; As this package provides little more than a very large data file it
8262 ;; doesn't make sense to build substitutes.
8263 (arguments `(#:substitutable? #f))
8264 (propagated-inputs
8265 `(("r-bsgenome" ,r-bsgenome)
8266 ("r-genomicfeatures" ,r-genomicfeatures)
8267 ("r-annotationdbi" ,r-annotationdbi)))
8268 (home-page
8269 "https://bioconductor.org/packages/TxDb.Mmusculus.UCSC.mm10.knownGene/")
8270 (synopsis "Annotation package for TxDb knownGene object(s) for Mouse")
8271 (description
8272 "This package loads a TxDb object, which is an R interface to
8273 prefabricated databases contained in this package. This package provides
8274 the TxDb object of Mouse data as provided by UCSC (mm10, December 2011)
8275 based on the knownGene track.")
8276 (license license:artistic2.0)))
8277
8278 (define-public r-bsgenome-celegans-ucsc-ce6
8279 (package
8280 (name "r-bsgenome-celegans-ucsc-ce6")
8281 (version "1.4.0")
8282 (source (origin
8283 (method url-fetch)
8284 ;; We cannot use bioconductor-uri here because this tarball is
8285 ;; located under "data/annotation/" instead of "bioc/".
8286 (uri (string-append "https://www.bioconductor.org/packages/"
8287 "release/data/annotation/src/contrib/"
8288 "BSgenome.Celegans.UCSC.ce6_"
8289 version ".tar.gz"))
8290 (sha256
8291 (base32
8292 "0mqzb353xv2c3m3vkb315dkmnxkgczp7ndnknyhpgjlybyf715v9"))))
8293 (properties
8294 `((upstream-name . "BSgenome.Celegans.UCSC.ce6")))
8295 (build-system r-build-system)
8296 ;; As this package provides little more than a very large data file it
8297 ;; doesn't make sense to build substitutes.
8298 (arguments `(#:substitutable? #f))
8299 (propagated-inputs
8300 `(("r-bsgenome" ,r-bsgenome)))
8301 (home-page
8302 "https://www.bioconductor.org/packages/BSgenome.Celegans.UCSC.ce6/")
8303 (synopsis "Full genome sequences for Worm")
8304 (description
8305 "This package provides full genome sequences for Caenorhabditis
8306 elegans (Worm) as provided by UCSC (ce6, May 2008) and stored in Biostrings
8307 objects.")
8308 (license license:artistic2.0)))
8309
8310 (define-public r-bsgenome-celegans-ucsc-ce10
8311 (package
8312 (name "r-bsgenome-celegans-ucsc-ce10")
8313 (version "1.4.0")
8314 (source (origin
8315 (method url-fetch)
8316 ;; We cannot use bioconductor-uri here because this tarball is
8317 ;; located under "data/annotation/" instead of "bioc/".
8318 (uri (string-append "https://www.bioconductor.org/packages/"
8319 "release/data/annotation/src/contrib/"
8320 "BSgenome.Celegans.UCSC.ce10_"
8321 version ".tar.gz"))
8322 (sha256
8323 (base32
8324 "1zaym97jk4npxk14ifvwz2rvhm4zx9xgs33r9vvx9rlynp0gydrk"))))
8325 (properties
8326 `((upstream-name . "BSgenome.Celegans.UCSC.ce10")))
8327 (build-system r-build-system)
8328 ;; As this package provides little more than a very large data file it
8329 ;; doesn't make sense to build substitutes.
8330 (arguments `(#:substitutable? #f))
8331 (propagated-inputs
8332 `(("r-bsgenome" ,r-bsgenome)))
8333 (home-page
8334 "https://www.bioconductor.org/packages/BSgenome.Celegans.UCSC.ce10/")
8335 (synopsis "Full genome sequences for Worm")
8336 (description
8337 "This package provides full genome sequences for Caenorhabditis
8338 elegans (Worm) as provided by UCSC (ce10, Oct 2010) and stored in Biostrings
8339 objects.")
8340 (license license:artistic2.0)))
8341
8342 (define-public r-bsgenome-dmelanogaster-ucsc-dm3
8343 (package
8344 (name "r-bsgenome-dmelanogaster-ucsc-dm3")
8345 (version "1.4.0")
8346 (source (origin
8347 (method url-fetch)
8348 ;; We cannot use bioconductor-uri here because this tarball is
8349 ;; located under "data/annotation/" instead of "bioc/".
8350 (uri (string-append "https://www.bioconductor.org/packages/"
8351 "release/data/annotation/src/contrib/"
8352 "BSgenome.Dmelanogaster.UCSC.dm3_"
8353 version ".tar.gz"))
8354 (sha256
8355 (base32
8356 "19bm3lkhhkag3gnwp419211fh0cnr0x6fa0r1lr0ycwrikxdxsv8"))))
8357 (properties
8358 `((upstream-name . "BSgenome.Dmelanogaster.UCSC.dm3")))
8359 (build-system r-build-system)
8360 ;; As this package provides little more than a very large data file it
8361 ;; doesn't make sense to build substitutes.
8362 (arguments `(#:substitutable? #f))
8363 (propagated-inputs
8364 `(("r-bsgenome" ,r-bsgenome)))
8365 (home-page
8366 "https://www.bioconductor.org/packages/BSgenome.Dmelanogaster.UCSC.dm3/")
8367 (synopsis "Full genome sequences for Fly")
8368 (description
8369 "This package provides full genome sequences for Drosophila
8370 melanogaster (Fly) as provided by UCSC (dm3, April 2006) and stored in
8371 Biostrings objects.")
8372 (license license:artistic2.0)))
8373
8374 (define-public r-motifrg
8375 (package
8376 (name "r-motifrg")
8377 (version "1.24.0")
8378 (source
8379 (origin
8380 (method url-fetch)
8381 (uri (bioconductor-uri "motifRG" version))
8382 (sha256
8383 (base32
8384 "0mxhyidkyd2zqahdbg69y20r550z78lvr1r3pbjymnwfg4hcfq1p"))))
8385 (properties `((upstream-name . "motifRG")))
8386 (build-system r-build-system)
8387 (propagated-inputs
8388 `(("r-biostrings" ,r-biostrings)
8389 ("r-bsgenome" ,r-bsgenome)
8390 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8391 ("r-iranges" ,r-iranges)
8392 ("r-seqlogo" ,r-seqlogo)
8393 ("r-xvector" ,r-xvector)))
8394 (home-page "https://bioconductor.org/packages/motifRG")
8395 (synopsis "Discover motifs in high throughput sequencing data")
8396 (description
8397 "This package provides tools for discriminative motif discovery in high
8398 throughput genetic sequencing data sets using regression methods.")
8399 (license license:artistic2.0)))
8400
8401 (define-public r-qtl
8402 (package
8403 (name "r-qtl")
8404 (version "1.42-8")
8405 (source
8406 (origin
8407 (method url-fetch)
8408 (uri (string-append "mirror://cran/src/contrib/qtl_"
8409 version ".tar.gz"))
8410 (sha256
8411 (base32
8412 "1l528dwvfpdlr05imrrm4rq32axp6hld9nqm6mm43kn5n7z2f5k6"))))
8413 (build-system r-build-system)
8414 (home-page "http://rqtl.org/")
8415 (synopsis "R package for analyzing QTL experiments in genetics")
8416 (description "R/qtl is an extension library for the R statistics
8417 system. It is used to analyze experimental crosses for identifying
8418 genes contributing to variation in quantitative traits (so-called
8419 quantitative trait loci, QTLs).
8420
8421 Using a hidden Markov model, R/qtl allows to estimate genetic maps, to
8422 identify genotyping errors, and to perform single-QTL and two-QTL,
8423 two-dimensional genome scans.")
8424 (license license:gpl3)))
8425
8426 (define-public r-zlibbioc
8427 (package
8428 (name "r-zlibbioc")
8429 (version "1.26.0")
8430 (source (origin
8431 (method url-fetch)
8432 (uri (bioconductor-uri "zlibbioc" version))
8433 (sha256
8434 (base32
8435 "1rwr0mci8a712q0isavi4jmhm94gwivc4nr8j7r4kw05flp4g7gz"))))
8436 (properties
8437 `((upstream-name . "zlibbioc")))
8438 (build-system r-build-system)
8439 (home-page "https://bioconductor.org/packages/zlibbioc")
8440 (synopsis "Provider for zlib-1.2.5 to R packages")
8441 (description "This package uses the source code of zlib-1.2.5 to create
8442 libraries for systems that do not have these available via other means.")
8443 (license license:artistic2.0)))
8444
8445 (define-public r-r4rna
8446 (package
8447 (name "r-r4rna")
8448 (version "0.1.4")
8449 (source
8450 (origin
8451 (method url-fetch)
8452 (uri (string-append "http://www.e-rna.org/r-chie/files/R4RNA_"
8453 version ".tar.gz"))
8454 (sha256
8455 (base32
8456 "1p0i78wh76jfgmn9jphbwwaz6yy6pipzfg08xs54cxavxg2j81p5"))))
8457 (build-system r-build-system)
8458 (propagated-inputs
8459 `(("r-optparse" ,r-optparse)
8460 ("r-rcolorbrewer" ,r-rcolorbrewer)))
8461 (home-page "http://www.e-rna.org/r-chie/index.cgi")
8462 (synopsis "Analysis framework for RNA secondary structure")
8463 (description
8464 "The R4RNA package aims to be a general framework for the analysis of RNA
8465 secondary structure and comparative analysis in R.")
8466 (license license:gpl3+)))
8467
8468 (define-public r-rhtslib
8469 (package
8470 (name "r-rhtslib")
8471 (version "1.12.1")
8472 (source
8473 (origin
8474 (method url-fetch)
8475 (uri (bioconductor-uri "Rhtslib" version))
8476 (sha256
8477 (base32
8478 "16ywnb8cmr2xabd1i21b92rfziw7xfbv25yv16ipw617p41wa39z"))))
8479 (properties `((upstream-name . "Rhtslib")))
8480 (build-system r-build-system)
8481 (propagated-inputs
8482 `(("r-zlibbioc" ,r-zlibbioc)))
8483 (inputs
8484 `(("zlib" ,zlib)))
8485 (native-inputs
8486 `(("pkg-config" ,pkg-config)))
8487 (home-page "https://github.com/nhayden/Rhtslib")
8488 (synopsis "High-throughput sequencing library as an R package")
8489 (description
8490 "This package provides the HTSlib C library for high-throughput
8491 nucleotide sequence analysis. The package is primarily useful to developers
8492 of other R packages who wish to make use of HTSlib.")
8493 (license license:lgpl2.0+)))
8494
8495 (define-public r-bamsignals
8496 (package
8497 (name "r-bamsignals")
8498 (version "1.12.1")
8499 (source
8500 (origin
8501 (method url-fetch)
8502 (uri (bioconductor-uri "bamsignals" version))
8503 (sha256
8504 (base32
8505 "141q3p4lzwiqk1mfxi8q1q84axjl0gyiqg59xd3sp4viny4jqmgv"))))
8506 (build-system r-build-system)
8507 (propagated-inputs
8508 `(("r-biocgenerics" ,r-biocgenerics)
8509 ("r-genomicranges" ,r-genomicranges)
8510 ("r-iranges" ,r-iranges)
8511 ("r-rcpp" ,r-rcpp)
8512 ("r-rhtslib" ,r-rhtslib)
8513 ("r-zlibbioc" ,r-zlibbioc)))
8514 (inputs
8515 `(("zlib" ,zlib)))
8516 (home-page "https://bioconductor.org/packages/bamsignals")
8517 (synopsis "Extract read count signals from bam files")
8518 (description
8519 "This package allows to efficiently obtain count vectors from indexed bam
8520 files. It counts the number of nucleotide sequence reads in given genomic
8521 ranges and it computes reads profiles and coverage profiles. It also handles
8522 paired-end data.")
8523 (license license:gpl2+)))
8524
8525 (define-public r-rcas
8526 (package
8527 (name "r-rcas")
8528 (version "1.6.0")
8529 (source (origin
8530 (method url-fetch)
8531 (uri (bioconductor-uri "RCAS" version))
8532 (sha256
8533 (base32
8534 "0vmn7a0rm2ban0kaxrf5danhss2r4hfhnwh5889fjcgqy300fdd5"))))
8535 (build-system r-build-system)
8536 (native-inputs
8537 `(("r-testthat" ,r-testthat)
8538 ;; During vignette building knitr checks that "pandoc-citeproc"
8539 ;; is in the PATH.
8540 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc-with-pandoc-1)))
8541 (propagated-inputs
8542 `(("r-biocgenerics" ,r-biocgenerics)
8543 ("r-biomart" ,r-biomart)
8544 ("r-biostrings" ,r-biostrings)
8545 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8546 ("r-bsgenome-mmusculus-ucsc-mm9" ,r-bsgenome-mmusculus-ucsc-mm9)
8547 ("r-bsgenome-celegans-ucsc-ce10" ,r-bsgenome-celegans-ucsc-ce10)
8548 ("r-bsgenome-dmelanogaster-ucsc-dm3" ,r-bsgenome-dmelanogaster-ucsc-dm3)
8549 ("r-cowplot" ,r-cowplot)
8550 ("r-data-table" ,r-data-table)
8551 ("r-dbi" ,r-dbi)
8552 ("r-dt" ,r-dt)
8553 ("r-genomation" ,r-genomation)
8554 ("r-genomicfeatures" ,r-genomicfeatures)
8555 ("r-ggplot2" ,r-ggplot2)
8556 ("r-ggseqlogo" ,r-ggseqlogo)
8557 ("r-knitr" ,r-knitr)
8558 ("r-motifrg" ,r-motifrg)
8559 ("r-org-hs-eg-db" ,r-org-hs-eg-db)
8560 ("r-org-ce-eg-db" ,r-org-ce-eg-db)
8561 ("r-org-dm-eg-db" ,r-org-dm-eg-db)
8562 ("r-org-mm-eg-db" ,r-org-mm-eg-db)
8563 ("r-pbapply" ,r-pbapply)
8564 ("r-pheatmap" ,r-pheatmap)
8565 ("r-plotly" ,r-plotly)
8566 ("r-plotrix" ,r-plotrix)
8567 ("r-proxy" ,r-proxy)
8568 ("r-rsqlite" ,r-rsqlite)
8569 ("r-rtracklayer" ,r-rtracklayer)
8570 ("r-rmarkdown" ,r-rmarkdown)
8571 ("r-s4vectors" ,r-s4vectors)
8572 ("r-topgo" ,r-topgo)))
8573 (synopsis "RNA-centric annotation system")
8574 (description
8575 "RCAS aims to be a standalone RNA-centric annotation system that provides
8576 intuitive reports and publication-ready graphics. This package provides the R
8577 library implementing most of the pipeline's features.")
8578 (home-page "https://github.com/BIMSBbioinfo/RCAS")
8579 (license license:artistic2.0)))
8580
8581 (define-public rcas-web
8582 (package
8583 (name "rcas-web")
8584 (version "0.0.5")
8585 (source
8586 (origin
8587 (method url-fetch)
8588 (uri (string-append "https://github.com/BIMSBbioinfo/rcas-web/"
8589 "releases/download/v" version
8590 "/rcas-web-" version ".tar.gz"))
8591 (sha256
8592 (base32
8593 "0igz7jpcf7cm9800zcag6p3gd1i649figrhbdba6cjkm8f4gfspr"))))
8594 (build-system gnu-build-system)
8595 (arguments
8596 `(#:phases
8597 (modify-phases %standard-phases
8598 (add-after 'install 'wrap-executable
8599 (lambda* (#:key inputs outputs #:allow-other-keys)
8600 (let* ((out (assoc-ref outputs "out"))
8601 (json (assoc-ref inputs "guile-json"))
8602 (redis (assoc-ref inputs "guile-redis"))
8603 (path (string-append
8604 json "/share/guile/site/2.2:"
8605 redis "/share/guile/site/2.2")))
8606 (wrap-program (string-append out "/bin/rcas-web")
8607 `("GUILE_LOAD_PATH" ":" = (,path))
8608 `("GUILE_LOAD_COMPILED_PATH" ":" = (,path))
8609 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
8610 #t)))))
8611 (inputs
8612 `(("r-minimal" ,r-minimal)
8613 ("r-rcas" ,r-rcas)
8614 ("guile-next" ,guile-2.2)
8615 ("guile-json" ,guile-json)
8616 ("guile-redis" ,guile2.2-redis)))
8617 (native-inputs
8618 `(("pkg-config" ,pkg-config)))
8619 (home-page "https://github.com/BIMSBbioinfo/rcas-web")
8620 (synopsis "Web interface for RNA-centric annotation system (RCAS)")
8621 (description "This package provides a simple web interface for the
8622 @dfn{RNA-centric annotation system} (RCAS).")
8623 (license license:agpl3+)))
8624
8625 (define-public r-mutationalpatterns
8626 (package
8627 (name "r-mutationalpatterns")
8628 (version "1.6.1")
8629 (source
8630 (origin
8631 (method url-fetch)
8632 (uri (bioconductor-uri "MutationalPatterns" version))
8633 (sha256
8634 (base32
8635 "1yq7351j42mjxn8fd3c5bdxzb2l5s4lvqhjdvv4rwj4f600n6wj9"))))
8636 (build-system r-build-system)
8637 (propagated-inputs
8638 `(("r-biocgenerics" ,r-biocgenerics)
8639 ("r-biostrings" ,r-biostrings)
8640 ;; These two packages are suggested packages
8641 ("r-bsgenome-hsapiens-1000g" ,r-bsgenome-hsapiens-1000genomes-hs37d5)
8642 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8643 ("r-genomicranges" ,r-genomicranges)
8644 ("r-genomeinfodb" ,r-genomeinfodb)
8645 ("r-ggplot2" ,r-ggplot2)
8646 ("r-iranges" ,r-iranges)
8647 ("r-nmf" ,r-nmf)
8648 ("r-plyr" ,r-plyr)
8649 ("r-pracma" ,r-pracma)
8650 ("r-reshape2" ,r-reshape2)
8651 ("r-cowplot" ,r-cowplot)
8652 ("r-ggdendro" ,r-ggdendro)
8653 ("r-s4vectors" ,r-s4vectors)
8654 ("r-summarizedexperiment" ,r-summarizedexperiment)
8655 ("r-variantannotation" ,r-variantannotation)))
8656 (home-page "https://bioconductor.org/packages/MutationalPatterns/")
8657 (synopsis "Extract and visualize mutational patterns in genomic data")
8658 (description "This package provides an extensive toolset for the
8659 characterization and visualization of a wide range of mutational patterns
8660 in SNV base substitution data.")
8661 (license license:expat)))
8662
8663 (define-public r-wgcna
8664 (package
8665 (name "r-wgcna")
8666 (version "1.63")
8667 (source
8668 (origin
8669 (method url-fetch)
8670 (uri (cran-uri "WGCNA" version))
8671 (sha256
8672 (base32
8673 "1225dqm68bynkmklnsxdqdd3zqrpzbvqwyly8ibxmk75z33xz309"))))
8674 (properties `((upstream-name . "WGCNA")))
8675 (build-system r-build-system)
8676 (propagated-inputs
8677 `(("r-annotationdbi" ,r-annotationdbi)
8678 ("r-doparallel" ,r-doparallel)
8679 ("r-dynamictreecut" ,r-dynamictreecut)
8680 ("r-fastcluster" ,r-fastcluster)
8681 ("r-foreach" ,r-foreach)
8682 ("r-go-db" ,r-go-db)
8683 ("r-hmisc" ,r-hmisc)
8684 ("r-impute" ,r-impute)
8685 ("r-rcpp" ,r-rcpp)
8686 ("r-robust" ,r-robust)
8687 ("r-survival" ,r-survival)
8688 ("r-matrixstats" ,r-matrixstats)
8689 ("r-preprocesscore" ,r-preprocesscore)))
8690 (home-page
8691 "http://www.genetics.ucla.edu/labs/horvath/CoexpressionNetwork/Rpackages/WGCNA/")
8692 (synopsis "Weighted correlation network analysis")
8693 (description
8694 "This package provides functions necessary to perform Weighted
8695 Correlation Network Analysis on high-dimensional data. It includes functions
8696 for rudimentary data cleaning, construction and summarization of correlation
8697 networks, module identification and functions for relating both variables and
8698 modules to sample traits. It also includes a number of utility functions for
8699 data manipulation and visualization.")
8700 (license license:gpl2+)))
8701
8702 (define-public r-chipkernels
8703 (let ((commit "c9cfcacb626b1221094fb3490ea7bac0fd625372")
8704 (revision "1"))
8705 (package
8706 (name "r-chipkernels")
8707 (version (string-append "1.1-" revision "." (string-take commit 9)))
8708 (source
8709 (origin
8710 (method git-fetch)
8711 (uri (git-reference
8712 (url "https://github.com/ManuSetty/ChIPKernels.git")
8713 (commit commit)))
8714 (file-name (string-append name "-" version))
8715 (sha256
8716 (base32
8717 "14bj5qhjm1hsm9ay561nfbqi9wxsa7y487df2idsaaf6z10nw4v0"))))
8718 (build-system r-build-system)
8719 (propagated-inputs
8720 `(("r-iranges" ,r-iranges)
8721 ("r-xvector" ,r-xvector)
8722 ("r-biostrings" ,r-biostrings)
8723 ("r-bsgenome" ,r-bsgenome)
8724 ("r-gtools" ,r-gtools)
8725 ("r-genomicranges" ,r-genomicranges)
8726 ("r-sfsmisc" ,r-sfsmisc)
8727 ("r-kernlab" ,r-kernlab)
8728 ("r-s4vectors" ,r-s4vectors)
8729 ("r-biocgenerics" ,r-biocgenerics)))
8730 (home-page "https://github.com/ManuSetty/ChIPKernels")
8731 (synopsis "Build string kernels for DNA Sequence analysis")
8732 (description "ChIPKernels is an R package for building different string
8733 kernels used for DNA Sequence analysis. A dictionary of the desired kernel
8734 must be built and this dictionary can be used for determining kernels for DNA
8735 Sequences.")
8736 (license license:gpl2+))))
8737
8738 (define-public r-seqgl
8739 (package
8740 (name "r-seqgl")
8741 (version "1.1.4")
8742 (source
8743 (origin
8744 (method url-fetch)
8745 (uri (string-append "https://github.com/ManuSetty/SeqGL/"
8746 "archive/" version ".tar.gz"))
8747 (file-name (string-append name "-" version ".tar.gz"))
8748 (sha256
8749 (base32
8750 "0pnk1p3sci5yipyc8xnb6jbmydpl80fld927xgnbcv104hy8h8yh"))))
8751 (build-system r-build-system)
8752 (propagated-inputs
8753 `(("r-biostrings" ,r-biostrings)
8754 ("r-chipkernels" ,r-chipkernels)
8755 ("r-genomicranges" ,r-genomicranges)
8756 ("r-spams" ,r-spams)
8757 ("r-wgcna" ,r-wgcna)
8758 ("r-fastcluster" ,r-fastcluster)))
8759 (home-page "https://github.com/ManuSetty/SeqGL")
8760 (synopsis "Group lasso for Dnase/ChIP-seq data")
8761 (description "SeqGL is a group lasso based algorithm to extract
8762 transcription factor sequence signals from ChIP, DNase and ATAC-seq profiles.
8763 This package presents a method which uses group lasso to discriminate between
8764 bound and non bound genomic regions to accurately identify transcription
8765 factors bound at the specific regions.")
8766 (license license:gpl2+)))
8767
8768 (define-public r-gkmsvm
8769 (package
8770 (name "r-gkmsvm")
8771 (version "0.79.0")
8772 (source
8773 (origin
8774 (method url-fetch)
8775 (uri (cran-uri "gkmSVM" version))
8776 (sha256
8777 (base32
8778 "04dakbgfvfalz4rm4fvvybp506dn5fbj5g86ybfhrc6wywjllsz3"))))
8779 (properties `((upstream-name . "gkmSVM")))
8780 (build-system r-build-system)
8781 (propagated-inputs
8782 `(("r-biocgenerics" ,r-biocgenerics)
8783 ("r-biostrings" ,r-biostrings)
8784 ("r-genomeinfodb" ,r-genomeinfodb)
8785 ("r-genomicranges" ,r-genomicranges)
8786 ("r-iranges" ,r-iranges)
8787 ("r-kernlab" ,r-kernlab)
8788 ("r-rcpp" ,r-rcpp)
8789 ("r-rocr" ,r-rocr)
8790 ("r-rtracklayer" ,r-rtracklayer)
8791 ("r-s4vectors" ,r-s4vectors)
8792 ("r-seqinr" ,r-seqinr)))
8793 (home-page "https://cran.r-project.org/web/packages/gkmSVM")
8794 (synopsis "Gapped-kmer support vector machine")
8795 (description
8796 "This R package provides tools for training gapped-kmer SVM classifiers
8797 for DNA and protein sequences. This package supports several sequence
8798 kernels, including: gkmSVM, kmer-SVM, mismatch kernel and wildcard kernel.")
8799 (license license:gpl2+)))
8800
8801 (define-public r-tximport
8802 (package
8803 (name "r-tximport")
8804 (version "1.8.0")
8805 (source (origin
8806 (method url-fetch)
8807 (uri (bioconductor-uri "tximport" version))
8808 (sha256
8809 (base32
8810 "1qjc7ah9dzccpvcjrp9k4qnaz13x6kvy1c1xpdj503km6k528lip"))))
8811 (build-system r-build-system)
8812 (home-page "https://bioconductor.org/packages/tximport")
8813 (synopsis "Import and summarize transcript-level estimates for gene-level analysis")
8814 (description
8815 "This package provides tools to import transcript-level abundance,
8816 estimated counts and transcript lengths, and to summarize them into matrices
8817 for use with downstream gene-level analysis packages. Average transcript
8818 length, weighted by sample-specific transcript abundance estimates, is
8819 provided as a matrix which can be used as an offset for different expression
8820 of gene-level counts.")
8821 (license license:gpl2+)))
8822
8823 (define-public r-rhdf5
8824 (package
8825 (name "r-rhdf5")
8826 (version "2.24.0")
8827 (source (origin
8828 (method url-fetch)
8829 (uri (bioconductor-uri "rhdf5" version))
8830 (sha256
8831 (base32
8832 "15cmmchhk8bnp94gxg0zk9qyzdjx5kv16dzpbnb62mkq7ydmifx6"))))
8833 (build-system r-build-system)
8834 (propagated-inputs
8835 `(("r-rhdf5lib" ,r-rhdf5lib)))
8836 (inputs
8837 `(("zlib" ,zlib)))
8838 (home-page "https://bioconductor.org/packages/rhdf5")
8839 (synopsis "HDF5 interface to R")
8840 (description
8841 "This R/Bioconductor package provides an interface between HDF5 and R.
8842 HDF5's main features are the ability to store and access very large and/or
8843 complex datasets and a wide variety of metadata on mass storage (disk) through
8844 a completely portable file format. The rhdf5 package is thus suited for the
8845 exchange of large and/or complex datasets between R and other software
8846 package, and for letting R applications work on datasets that are larger than
8847 the available RAM.")
8848 (license license:artistic2.0)))
8849
8850 (define-public r-annotationfilter
8851 (package
8852 (name "r-annotationfilter")
8853 (version "1.4.0")
8854 (source (origin
8855 (method url-fetch)
8856 (uri (bioconductor-uri "AnnotationFilter" version))
8857 (sha256
8858 (base32
8859 "1w8ypfdz4g7vnwfrvnhjcpm8waciqyq2cn883ajdwg4vv7a5mj9a"))))
8860 (properties
8861 `((upstream-name . "AnnotationFilter")))
8862 (build-system r-build-system)
8863 (propagated-inputs
8864 `(("r-genomicranges" ,r-genomicranges)
8865 ("r-lazyeval" ,r-lazyeval)))
8866 (home-page "https://github.com/Bioconductor/AnnotationFilter")
8867 (synopsis "Facilities for filtering Bioconductor annotation resources")
8868 (description
8869 "This package provides classes and other infrastructure to implement
8870 filters for manipulating Bioconductor annotation resources. The filters are
8871 used by @code{ensembldb}, @code{Organism.dplyr}, and other packages.")
8872 (license license:artistic2.0)))
8873
8874 (define-public emboss
8875 (package
8876 (name "emboss")
8877 (version "6.5.7")
8878 (source (origin
8879 (method url-fetch)
8880 (uri (string-append "ftp://emboss.open-bio.org/pub/EMBOSS/old/"
8881 (version-major+minor version) ".0/"
8882 "EMBOSS-" version ".tar.gz"))
8883 (sha256
8884 (base32
8885 "0vsmz96gc411yj2iyzdrsmg4l2n1nhgmp7vrgzlxx3xixv9xbf0q"))))
8886 (build-system gnu-build-system)
8887 (arguments
8888 `(#:configure-flags
8889 (list (string-append "--with-hpdf="
8890 (assoc-ref %build-inputs "libharu")))
8891 #:phases
8892 (modify-phases %standard-phases
8893 (add-after 'unpack 'fix-checks
8894 (lambda _
8895 ;; The PNGDRIVER tests check for the presence of libgd, libpng
8896 ;; and zlib, but assume that they are all found at the same
8897 ;; prefix.
8898 (substitute* "configure.in"
8899 (("CHECK_PNGDRIVER")
8900 "LIBS=\"$LIBS -lgd -lpng -lz -lm\"
8901 AC_DEFINE([PLD_png], [1], [Define to 1 if PNG support is available])
8902 AM_CONDITIONAL(AMPNG, true)"))
8903 #t))
8904 (add-after 'fix-checks 'disable-update-check
8905 (lambda _
8906 ;; At build time there is no connection to the Internet, so
8907 ;; looking for updates will not work.
8908 (substitute* "Makefile.am"
8909 (("\\$\\(bindir\\)/embossupdate") ""))
8910 #t))
8911 (add-after 'disable-update-check 'autogen
8912 (lambda _ (zero? (system* "autoreconf" "-vif")))))))
8913 (inputs
8914 `(("perl" ,perl)
8915 ("libpng" ,libpng)
8916 ("gd" ,gd)
8917 ("libx11" ,libx11)
8918 ("libharu" ,libharu)
8919 ("zlib" ,zlib)))
8920 (native-inputs
8921 `(("autoconf" ,autoconf)
8922 ("automake" ,automake)
8923 ("libtool" ,libtool)
8924 ("pkg-config" ,pkg-config)))
8925 (home-page "http://emboss.sourceforge.net")
8926 (synopsis "Molecular biology analysis suite")
8927 (description "EMBOSS is the \"European Molecular Biology Open Software
8928 Suite\". EMBOSS is an analysis package specially developed for the needs of
8929 the molecular biology (e.g. EMBnet) user community. The software
8930 automatically copes with data in a variety of formats and even allows
8931 transparent retrieval of sequence data from the web. It also provides a
8932 number of libraries for the development of software in the field of molecular
8933 biology. EMBOSS also integrates a range of currently available packages and
8934 tools for sequence analysis into a seamless whole.")
8935 (license license:gpl2+)))
8936
8937 (define-public bits
8938 (let ((revision "1")
8939 (commit "3cc4567896d9d6442923da944beb704750a08d2d"))
8940 (package
8941 (name "bits")
8942 ;; The version is 2.13.0 even though no release archives have been
8943 ;; published as yet.
8944 (version (string-append "2.13.0-" revision "." (string-take commit 9)))
8945 (source (origin
8946 (method git-fetch)
8947 (uri (git-reference
8948 (url "https://github.com/arq5x/bits.git")
8949 (commit commit)))
8950 (file-name (string-append name "-" version "-checkout"))
8951 (sha256
8952 (base32
8953 "17n2kffk4kmhivd8c98g2vr6y1s23vbg4sxlxs689wni66797hbs"))))
8954 (build-system gnu-build-system)
8955 (arguments
8956 `(#:tests? #f ;no tests included
8957 #:phases
8958 (modify-phases %standard-phases
8959 (delete 'configure)
8960 (add-after 'unpack 'remove-cuda
8961 (lambda _
8962 (substitute* "Makefile"
8963 ((".*_cuda") "")
8964 (("(bits_test_intersections) \\\\" _ match) match))
8965 #t))
8966 (replace 'install
8967 (lambda* (#:key outputs #:allow-other-keys)
8968 (copy-recursively
8969 "bin" (string-append (assoc-ref outputs "out") "/bin"))
8970 #t)))))
8971 (inputs
8972 `(("gsl" ,gsl)
8973 ("zlib" ,zlib)))
8974 (home-page "https://github.com/arq5x/bits")
8975 (synopsis "Implementation of binary interval search algorithm")
8976 (description "This package provides an implementation of the
8977 BITS (Binary Interval Search) algorithm, an approach to interval set
8978 intersection. It is especially suited for the comparison of diverse genomic
8979 datasets and the exploration of large datasets of genome
8980 intervals (e.g. genes, sequence alignments).")
8981 (license license:gpl2))))
8982
8983 (define-public piranha
8984 ;; There is no release tarball for the latest version. The latest commit is
8985 ;; older than one year at the time of this writing.
8986 (let ((revision "1")
8987 (commit "0466d364b71117d01e4471b74c514436cc281233"))
8988 (package
8989 (name "piranha")
8990 (version (string-append "1.2.1-" revision "." (string-take commit 9)))
8991 (source (origin
8992 (method git-fetch)
8993 (uri (git-reference
8994 (url "https://github.com/smithlabcode/piranha.git")
8995 (commit commit)))
8996 (file-name (git-file-name name version))
8997 (sha256
8998 (base32
8999 "117dc0zf20c61jam69sk4abl57ah6yi6i7qra7d7y5zrbgk12q5n"))))
9000 (build-system gnu-build-system)
9001 (arguments
9002 `(#:test-target "test"
9003 #:phases
9004 (modify-phases %standard-phases
9005 (add-after 'unpack 'copy-smithlab-cpp
9006 (lambda* (#:key inputs #:allow-other-keys)
9007 (for-each (lambda (file)
9008 (install-file file "./src/smithlab_cpp/"))
9009 (find-files (assoc-ref inputs "smithlab-cpp")))
9010 #t))
9011 (add-after 'install 'install-to-store
9012 (lambda* (#:key outputs #:allow-other-keys)
9013 (let* ((out (assoc-ref outputs "out"))
9014 (bin (string-append out "/bin")))
9015 (for-each (lambda (file)
9016 (install-file file bin))
9017 (find-files "bin" ".*")))
9018 #t)))
9019 #:configure-flags
9020 (list (string-append "--with-bam_tools_headers="
9021 (assoc-ref %build-inputs "bamtools") "/include/bamtools")
9022 (string-append "--with-bam_tools_library="
9023 (assoc-ref %build-inputs "bamtools") "/lib/bamtools"))))
9024 (inputs
9025 `(("bamtools" ,bamtools)
9026 ("samtools" ,samtools-0.1)
9027 ("gsl" ,gsl)
9028 ("smithlab-cpp"
9029 ,(let ((commit "3723e2db438c51501d0423429ff396c3035ba46a"))
9030 (origin
9031 (method git-fetch)
9032 (uri (git-reference
9033 (url "https://github.com/smithlabcode/smithlab_cpp.git")
9034 (commit commit)))
9035 (file-name (string-append "smithlab_cpp-" commit "-checkout"))
9036 (sha256
9037 (base32
9038 "0l4gvbwslw5ngziskja41c00x1r06l3yidv7y0xw9djibhykzy0g")))))))
9039 (native-inputs
9040 `(("python" ,python-2)))
9041 (home-page "https://github.com/smithlabcode/piranha")
9042 (synopsis "Peak-caller for CLIP-seq and RIP-seq data")
9043 (description
9044 "Piranha is a peak-caller for genomic data produced by CLIP-seq and
9045 RIP-seq experiments. It takes input in BED or BAM format and identifies
9046 regions of statistically significant read enrichment. Additional covariates
9047 may optionally be provided to further inform the peak-calling process.")
9048 (license license:gpl3+))))
9049
9050 (define-public pepr
9051 (package
9052 (name "pepr")
9053 (version "1.0.9")
9054 (source (origin
9055 (method url-fetch)
9056 (uri (string-append "https://pypi.python.org/packages/source/P"
9057 "/PePr/PePr-" version ".tar.gz"))
9058 (sha256
9059 (base32
9060 "0qxjfdpl1b1y53nccws2d85f6k74zwmx8y8sd9rszcqhfayx6gdx"))))
9061 (build-system python-build-system)
9062 (arguments
9063 `(#:python ,python-2 ; python2 only
9064 #:tests? #f)) ; no tests included
9065 (propagated-inputs
9066 `(("python2-numpy" ,python2-numpy)
9067 ("python2-scipy" ,python2-scipy)
9068 ("python2-pysam" ,python2-pysam)))
9069 (home-page "https://github.com/shawnzhangyx/PePr")
9070 (synopsis "Peak-calling and prioritization pipeline for ChIP-Seq data")
9071 (description
9072 "PePr is a ChIP-Seq peak calling or differential binding analysis tool
9073 that is primarily designed for data with biological replicates. It uses a
9074 negative binomial distribution to model the read counts among the samples in
9075 the same group, and look for consistent differences between ChIP and control
9076 group or two ChIP groups run under different conditions.")
9077 (license license:gpl3+)))
9078
9079 (define-public filevercmp
9080 (let ((commit "1a9b779b93d0b244040274794d402106907b71b7"))
9081 (package
9082 (name "filevercmp")
9083 (version (string-append "0-1." (string-take commit 7)))
9084 (source (origin
9085 (method url-fetch)
9086 (uri (string-append "https://github.com/ekg/filevercmp/archive/"
9087 commit ".tar.gz"))
9088 (file-name (string-append name "-" version ".tar.gz"))
9089 (sha256
9090 (base32 "0yp5jswf5j2pqc6517x277s4s6h1ss99v57kxw9gy0jkfl3yh450"))))
9091 (build-system gnu-build-system)
9092 (arguments
9093 `(#:tests? #f ; There are no tests to run.
9094 #:phases
9095 (modify-phases %standard-phases
9096 (delete 'configure) ; There is no configure phase.
9097 (replace 'install
9098 (lambda* (#:key outputs #:allow-other-keys)
9099 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
9100 (install-file "filevercmp" bin)))))))
9101 (home-page "https://github.com/ekg/filevercmp")
9102 (synopsis "This program compares version strings")
9103 (description "This program compares version strings. It intends to be a
9104 replacement for strverscmp.")
9105 (license license:gpl3+))))
9106
9107 (define-public multiqc
9108 (package
9109 (name "multiqc")
9110 (version "1.5")
9111 (source
9112 (origin
9113 (method url-fetch)
9114 (uri (pypi-uri "multiqc" version))
9115 (sha256
9116 (base32
9117 "02iihfl0w0hpnr4pa0sbd1y9qxrg3ycyhjp5lidkcrqh1lmzs3zy"))))
9118 (build-system python-build-system)
9119 (propagated-inputs
9120 `(("python-jinja2" ,python-jinja2)
9121 ("python-simplejson" ,python-simplejson)
9122 ("python-pyyaml" ,python-pyyaml)
9123 ("python-click" ,python-click)
9124 ("python-spectra" ,python-spectra)
9125 ("python-requests" ,python-requests)
9126 ("python-markdown" ,python-markdown)
9127 ("python-lzstring" ,python-lzstring)
9128 ("python-matplotlib" ,python-matplotlib)
9129 ("python-numpy" ,python-numpy)
9130 ;; MultQC checks for the presence of nose at runtime.
9131 ("python-nose" ,python-nose)))
9132 (arguments
9133 `(#:phases
9134 (modify-phases %standard-phases
9135 (add-after 'unpack 'relax-requirements
9136 (lambda _
9137 (substitute* "setup.py"
9138 ;; MultiQC 1.5 ‘requires’ a version of python-matplotlib older
9139 ;; than the one in Guix, but should work fine with 2.2.2.
9140 ;; See <https://github.com/ewels/MultiQC/issues/725> and
9141 ;; <https://github.com/ewels/MultiQC/issues/732> for details.
9142 (("['\"]matplotlib.*?['\"]")
9143 "'matplotlib'"))
9144 #t)))))
9145 (home-page "http://multiqc.info")
9146 (synopsis "Aggregate bioinformatics analysis reports")
9147 (description
9148 "MultiQC is a tool to aggregate bioinformatics results across many
9149 samples into a single report. It contains modules for a large number of
9150 common bioinformatics tools.")
9151 (license license:gpl3+)))
9152
9153 (define-public r-chipseq
9154 (package
9155 (name "r-chipseq")
9156 (version "1.30.0")
9157 (source
9158 (origin
9159 (method url-fetch)
9160 (uri (bioconductor-uri "chipseq" version))
9161 (sha256
9162 (base32
9163 "09f8dgl5ni75qkf7nvvppwr3irpplv4xb3ks59ld7l8i2mplcrx7"))))
9164 (build-system r-build-system)
9165 (propagated-inputs
9166 `(("r-biocgenerics" ,r-biocgenerics)
9167 ("r-genomicranges" ,r-genomicranges)
9168 ("r-iranges" ,r-iranges)
9169 ("r-lattice" ,r-lattice)
9170 ("r-s4vectors" ,r-s4vectors)
9171 ("r-shortread" ,r-shortread)))
9172 (home-page "https://bioconductor.org/packages/chipseq")
9173 (synopsis "Package for analyzing ChIPseq data")
9174 (description
9175 "This package provides tools for processing short read data from ChIPseq
9176 experiments.")
9177 (license license:artistic2.0)))
9178
9179 (define-public r-copyhelper
9180 (package
9181 (name "r-copyhelper")
9182 (version "1.6.0")
9183 (source
9184 (origin
9185 (method url-fetch)
9186 (uri (string-append "https://bioconductor.org/packages/release/"
9187 "data/experiment/src/contrib/CopyhelpeR_"
9188 version ".tar.gz"))
9189 (sha256
9190 (base32
9191 "0x7cyynjmxls9as2gg0iyp9x5fpalxmdjq914ss7i84i9zyk5bhq"))))
9192 (properties `((upstream-name . "CopyhelpeR")))
9193 (build-system r-build-system)
9194 (home-page "https://bioconductor.org/packages/CopyhelpeR/")
9195 (synopsis "Helper files for CopywriteR")
9196 (description
9197 "This package contains the helper files that are required to run the
9198 Bioconductor package CopywriteR. It contains pre-assembled 1kb bin GC-content
9199 and mappability files for the reference genomes hg18, hg19, hg38, mm9 and
9200 mm10. In addition, it contains a blacklist filter to remove regions that
9201 display copy number variation. Files are stored as GRanges objects from the
9202 GenomicRanges Bioconductor package.")
9203 (license license:gpl2)))
9204
9205 (define-public r-copywriter
9206 (package
9207 (name "r-copywriter")
9208 (version "2.12.0")
9209 (source
9210 (origin
9211 (method url-fetch)
9212 (uri (bioconductor-uri "CopywriteR" version))
9213 (sha256
9214 (base32
9215 "0b7xwq1va2zclb54f07c5ipmmq4iv2hrlph3j93jz5hmyayv50z3"))))
9216 (properties `((upstream-name . "CopywriteR")))
9217 (build-system r-build-system)
9218 (propagated-inputs
9219 `(("r-biocparallel" ,r-biocparallel)
9220 ("r-chipseq" ,r-chipseq)
9221 ("r-copyhelper" ,r-copyhelper)
9222 ("r-data-table" ,r-data-table)
9223 ("r-dnacopy" ,r-dnacopy)
9224 ("r-futile-logger" ,r-futile-logger)
9225 ("r-genomeinfodb" ,r-genomeinfodb)
9226 ("r-genomicalignments" ,r-genomicalignments)
9227 ("r-genomicranges" ,r-genomicranges)
9228 ("r-gtools" ,r-gtools)
9229 ("r-iranges" ,r-iranges)
9230 ("r-matrixstats" ,r-matrixstats)
9231 ("r-rsamtools" ,r-rsamtools)
9232 ("r-s4vectors" ,r-s4vectors)))
9233 (home-page "https://github.com/PeeperLab/CopywriteR")
9234 (synopsis "Copy number information from targeted sequencing")
9235 (description
9236 "CopywriteR extracts DNA copy number information from targeted sequencing
9237 by utilizing off-target reads. It allows for extracting uniformly distributed
9238 copy number information, can be used without reference, and can be applied to
9239 sequencing data obtained from various techniques including chromatin
9240 immunoprecipitation and target enrichment on small gene panels. Thereby,
9241 CopywriteR constitutes a widely applicable alternative to available copy
9242 number detection tools.")
9243 (license license:gpl2)))
9244
9245 (define-public r-methylkit
9246 (package
9247 (name "r-methylkit")
9248 (version "1.6.1")
9249 (source (origin
9250 (method url-fetch)
9251 (uri (bioconductor-uri "methylKit" version))
9252 (sha256
9253 (base32
9254 "1hr2czi5ybdf7hdmqsv39d17f3mvmw94wa38bc14zzm9mgy9gfy7"))))
9255 (properties `((upstream-name . "methylKit")))
9256 (build-system r-build-system)
9257 (propagated-inputs
9258 `(("r-data-table" ,r-data-table)
9259 ("r-emdbook" ,r-emdbook)
9260 ("r-fastseg" ,r-fastseg)
9261 ("r-genomeinfodb" ,r-genomeinfodb)
9262 ("r-genomicranges" ,r-genomicranges)
9263 ("r-gtools" ,r-gtools)
9264 ("r-iranges" ,r-iranges)
9265 ("r-kernsmooth" ,r-kernsmooth)
9266 ("r-limma" ,r-limma)
9267 ("r-mclust" ,r-mclust)
9268 ("r-qvalue" ,r-qvalue)
9269 ("r-r-utils" ,r-r-utils)
9270 ("r-rcpp" ,r-rcpp)
9271 ("r-rhtslib" ,r-rhtslib)
9272 ("r-rsamtools" ,r-rsamtools)
9273 ("r-rtracklayer" ,r-rtracklayer)
9274 ("r-s4vectors" ,r-s4vectors)
9275 ("r-zlibbioc" ,r-zlibbioc)))
9276 (inputs
9277 `(("zlib" ,zlib)))
9278 (home-page "https://github.com/al2na/methylKit")
9279 (synopsis
9280 "DNA methylation analysis from high-throughput bisulfite sequencing results")
9281 (description
9282 "MethylKit is an R package for DNA methylation analysis and annotation
9283 from high-throughput bisulfite sequencing. The package is designed to deal
9284 with sequencing data from @dfn{Reduced representation bisulfite
9285 sequencing} (RRBS) and its variants, but also target-capture methods and whole
9286 genome bisulfite sequencing. It also has functions to analyze base-pair
9287 resolution 5hmC data from experimental protocols such as oxBS-Seq and
9288 TAB-Seq.")
9289 (license license:artistic2.0)))
9290
9291 (define-public r-sva
9292 (package
9293 (name "r-sva")
9294 (version "3.28.0")
9295 (source
9296 (origin
9297 (method url-fetch)
9298 (uri (bioconductor-uri "sva" version))
9299 (sha256
9300 (base32
9301 "0a3jqbz0jp1jxrnjkqfpmca840yqcdwxprdl608bzzx2zb4jl52s"))))
9302 (build-system r-build-system)
9303 (propagated-inputs
9304 `(("r-genefilter" ,r-genefilter)
9305 ("r-mgcv" ,r-mgcv)
9306 ("r-biocparallel" ,r-biocparallel)
9307 ("r-matrixstats" ,r-matrixstats)
9308 ("r-limma" ,r-limma)))
9309 (home-page "https://bioconductor.org/packages/sva")
9310 (synopsis "Surrogate variable analysis")
9311 (description
9312 "This package contains functions for removing batch effects and other
9313 unwanted variation in high-throughput experiment. It also contains functions
9314 for identifying and building surrogate variables for high-dimensional data
9315 sets. Surrogate variables are covariates constructed directly from
9316 high-dimensional data like gene expression/RNA sequencing/methylation/brain
9317 imaging data that can be used in subsequent analyses to adjust for unknown,
9318 unmodeled, or latent sources of noise.")
9319 (license license:artistic2.0)))
9320
9321 (define-public r-seqminer
9322 (package
9323 (name "r-seqminer")
9324 (version "6.1")
9325 (source
9326 (origin
9327 (method url-fetch)
9328 (uri (cran-uri "seqminer" version))
9329 (sha256
9330 (base32
9331 "15yhg4vfc7jg1jnqb3371j00pgbmbyc9l1xx63hq1l3p34lazq2l"))))
9332 (build-system r-build-system)
9333 (inputs
9334 `(("zlib" ,zlib)))
9335 (home-page "http://seqminer.genomic.codes")
9336 (synopsis "Read nucleotide sequence data (VCF, BCF, and METAL formats)")
9337 (description
9338 "This package provides tools to integrate nucleotide sequencing
9339 data (variant call format, e.g. VCF or BCF) or meta-analysis results in R.")
9340 ;; Any version of the GPL is acceptable
9341 (license (list license:gpl2+ license:gpl3+))))
9342
9343 (define-public r-raremetals2
9344 (package
9345 (name "r-raremetals2")
9346 (version "0.1")
9347 (source
9348 (origin
9349 (method url-fetch)
9350 (uri (string-append "http://genome.sph.umich.edu/w/images/"
9351 "b/b7/RareMETALS2_" version ".tar.gz"))
9352 (sha256
9353 (base32
9354 "0z5ljcgvnm06ja9lm85a3cniq7slxcy37aqqkxrdidr79an5fs4s"))))
9355 (properties `((upstream-name . "RareMETALS2")))
9356 (build-system r-build-system)
9357 (propagated-inputs
9358 `(("r-seqminer" ,r-seqminer)
9359 ("r-mvtnorm" ,r-mvtnorm)
9360 ("r-mass" ,r-mass)
9361 ("r-compquadform" ,r-compquadform)
9362 ("r-getopt" ,r-getopt)))
9363 (home-page "http://genome.sph.umich.edu/wiki/RareMETALS2")
9364 (synopsis "Analyze gene-level association tests for binary trait")
9365 (description
9366 "The R package rareMETALS2 is an extension of the R package rareMETALS.
9367 It was designed to meta-analyze gene-level association tests for binary trait.
9368 While rareMETALS offers a near-complete solution for meta-analysis of
9369 gene-level tests for quantitative trait, it does not offer the optimal
9370 solution for binary trait. The package rareMETALS2 offers improved features
9371 for analyzing gene-level association tests in meta-analyses for binary
9372 trait.")
9373 (license license:gpl3)))
9374
9375 (define-public r-maldiquant
9376 (package
9377 (name "r-maldiquant")
9378 (version "1.17")
9379 (source
9380 (origin
9381 (method url-fetch)
9382 (uri (cran-uri "MALDIquant" version))
9383 (sha256
9384 (base32
9385 "047s6007ydc38x8wm027mlb4mngz15n0d4238fr8h43wyll5zy0z"))))
9386 (properties `((upstream-name . "MALDIquant")))
9387 (build-system r-build-system)
9388 (home-page "https://cran.r-project.org/web/packages/MALDIquant")
9389 (synopsis "Quantitative analysis of mass spectrometry data")
9390 (description
9391 "This package provides a complete analysis pipeline for matrix-assisted
9392 laser desorption/ionization-time-of-flight (MALDI-TOF) and other
9393 two-dimensional mass spectrometry data. In addition to commonly used plotting
9394 and processing methods it includes distinctive features, namely baseline
9395 subtraction methods such as morphological filters (TopHat) or the
9396 statistics-sensitive non-linear iterative peak-clipping algorithm (SNIP), peak
9397 alignment using warping functions, handling of replicated measurements as well
9398 as allowing spectra with different resolutions.")
9399 (license license:gpl3+)))
9400
9401 (define-public r-protgenerics
9402 (package
9403 (name "r-protgenerics")
9404 (version "1.12.0")
9405 (source
9406 (origin
9407 (method url-fetch)
9408 (uri (bioconductor-uri "ProtGenerics" version))
9409 (sha256
9410 (base32
9411 "05jbadw2aiwy8vi2ia0jxg06cmwhly2cq4dy1ag7kdxf1c5i9ccn"))))
9412 (properties `((upstream-name . "ProtGenerics")))
9413 (build-system r-build-system)
9414 (home-page "https://github.com/lgatto/ProtGenerics")
9415 (synopsis "S4 generic functions for proteomics infrastructure")
9416 (description
9417 "This package provides S4 generic functions needed by Bioconductor
9418 proteomics packages.")
9419 (license license:artistic2.0)))
9420
9421 (define-public r-mzr
9422 (package
9423 (name "r-mzr")
9424 (version "2.14.0")
9425 (source
9426 (origin
9427 (method url-fetch)
9428 (uri (bioconductor-uri "mzR" version))
9429 (sha256
9430 (base32
9431 "190m2bq5bqxhljaj4f7vz9wj5h5laaxd8zp5jampnql6xc4zmarg"))
9432 (modules '((guix build utils)))
9433 (snippet
9434 '(begin
9435 (delete-file-recursively "src/boost")
9436 #t))))
9437 (properties `((upstream-name . "mzR")))
9438 (build-system r-build-system)
9439 (arguments
9440 `(#:phases
9441 (modify-phases %standard-phases
9442 (add-after 'unpack 'use-system-boost
9443 (lambda _
9444 (substitute* "src/Makevars"
9445 (("\\./boost/libs.*") "")
9446 (("ARCH_OBJS=" line)
9447 (string-append line
9448 "\nARCH_LIBS=-lboost_system -lboost_regex \
9449 -lboost_iostreams -lboost_thread -lboost_filesystem -lboost_chrono\n")))
9450 #t)))))
9451 (inputs
9452 `(("boost" ,boost) ; use this instead of the bundled boost sources
9453 ("netcdf" ,netcdf)
9454 ("zlib" ,zlib)))
9455 (propagated-inputs
9456 `(("r-biobase" ,r-biobase)
9457 ("r-biocgenerics" ,r-biocgenerics)
9458 ("r-protgenerics" ,r-protgenerics)
9459 ("r-rcpp" ,r-rcpp)
9460 ("r-rhdf5lib" ,r-rhdf5lib)
9461 ("r-zlibbioc" ,r-zlibbioc)))
9462 (home-page "https://github.com/sneumann/mzR/")
9463 (synopsis "Parser for mass spectrometry data files")
9464 (description
9465 "The mzR package provides a unified API to the common file formats and
9466 parsers available for mass spectrometry data. It comes with a wrapper for the
9467 ISB random access parser for mass spectrometry mzXML, mzData and mzML files.
9468 The package contains the original code written by the ISB, and a subset of the
9469 proteowizard library for mzML and mzIdentML. The netCDF reading code has
9470 previously been used in XCMS.")
9471 (license license:artistic2.0)))
9472
9473 (define-public r-affyio
9474 (package
9475 (name "r-affyio")
9476 (version "1.50.0")
9477 (source
9478 (origin
9479 (method url-fetch)
9480 (uri (bioconductor-uri "affyio" version))
9481 (sha256
9482 (base32
9483 "0sh5wnnchyfpq5n6rchbqmb27byn7kdzn5rgran6i39c81i5z22n"))))
9484 (build-system r-build-system)
9485 (propagated-inputs
9486 `(("r-zlibbioc" ,r-zlibbioc)))
9487 (inputs
9488 `(("zlib" ,zlib)))
9489 (home-page "https://github.com/bmbolstad/affyio")
9490 (synopsis "Tools for parsing Affymetrix data files")
9491 (description
9492 "This package provides routines for parsing Affymetrix data files based
9493 upon file format information. The primary focus is on accessing the CEL and
9494 CDF file formats.")
9495 (license license:lgpl2.0+)))
9496
9497 (define-public r-affy
9498 (package
9499 (name "r-affy")
9500 (version "1.58.0")
9501 (source
9502 (origin
9503 (method url-fetch)
9504 (uri (bioconductor-uri "affy" version))
9505 (sha256
9506 (base32
9507 "0sxq875sigm21sf3qncrfrwfdz9nqw1vldxn3d3hj6aq64jg1ki6"))))
9508 (build-system r-build-system)
9509 (propagated-inputs
9510 `(("r-affyio" ,r-affyio)
9511 ("r-biobase" ,r-biobase)
9512 ("r-biocgenerics" ,r-biocgenerics)
9513 ("r-biocinstaller" ,r-biocinstaller)
9514 ("r-preprocesscore" ,r-preprocesscore)
9515 ("r-zlibbioc" ,r-zlibbioc)))
9516 (inputs
9517 `(("zlib" ,zlib)))
9518 (home-page "https://bioconductor.org/packages/affy")
9519 (synopsis "Methods for affymetrix oligonucleotide arrays")
9520 (description
9521 "This package contains functions for exploratory oligonucleotide array
9522 analysis.")
9523 (license license:lgpl2.0+)))
9524
9525 (define-public r-vsn
9526 (package
9527 (name "r-vsn")
9528 (version "3.48.1")
9529 (source
9530 (origin
9531 (method url-fetch)
9532 (uri (bioconductor-uri "vsn" version))
9533 (sha256
9534 (base32
9535 "0k6mah3g3zqbfap31xmvig4fn452a18xwwa5y0mfj5mj8588p57h"))))
9536 (build-system r-build-system)
9537 (propagated-inputs
9538 `(("r-affy" ,r-affy)
9539 ("r-biobase" ,r-biobase)
9540 ("r-ggplot2" ,r-ggplot2)
9541 ("r-lattice" ,r-lattice)
9542 ("r-limma" ,r-limma)))
9543 (home-page "https://bioconductor.org/packages/release/bioc/html/vsn.html")
9544 (synopsis "Variance stabilization and calibration for microarray data")
9545 (description
9546 "The package implements a method for normalising microarray intensities,
9547 and works for single- and multiple-color arrays. It can also be used for data
9548 from other technologies, as long as they have similar format. The method uses
9549 a robust variant of the maximum-likelihood estimator for an
9550 additive-multiplicative error model and affine calibration. The model
9551 incorporates data calibration step (a.k.a. normalization), a model for the
9552 dependence of the variance on the mean intensity and a variance stabilizing
9553 data transformation. Differences between transformed intensities are
9554 analogous to \"normalized log-ratios\". However, in contrast to the latter,
9555 their variance is independent of the mean, and they are usually more sensitive
9556 and specific in detecting differential transcription.")
9557 (license license:artistic2.0)))
9558
9559 (define-public r-mzid
9560 (package
9561 (name "r-mzid")
9562 (version "1.18.0")
9563 (source
9564 (origin
9565 (method url-fetch)
9566 (uri (bioconductor-uri "mzID" version))
9567 (sha256
9568 (base32
9569 "060k0xlhg8q802c6zsb4b8ps0ccd9ybyaz0gnsvqkxb786i2vk40"))))
9570 (properties `((upstream-name . "mzID")))
9571 (build-system r-build-system)
9572 (propagated-inputs
9573 `(("r-doparallel" ,r-doparallel)
9574 ("r-foreach" ,r-foreach)
9575 ("r-iterators" ,r-iterators)
9576 ("r-plyr" ,r-plyr)
9577 ("r-protgenerics" ,r-protgenerics)
9578 ("r-rcpp" ,r-rcpp)
9579 ("r-xml" ,r-xml)))
9580 (home-page "https://bioconductor.org/packages/mzID")
9581 (synopsis "Parser for mzIdentML files")
9582 (description
9583 "This package provides a parser for mzIdentML files implemented using the
9584 XML package. The parser tries to be general and able to handle all types of
9585 mzIdentML files with the drawback of having less pretty output than a vendor
9586 specific parser.")
9587 (license license:gpl2+)))
9588
9589 (define-public r-pcamethods
9590 (package
9591 (name "r-pcamethods")
9592 (version "1.72.0")
9593 (source
9594 (origin
9595 (method url-fetch)
9596 (uri (bioconductor-uri "pcaMethods" version))
9597 (sha256
9598 (base32
9599 "0v99yf8m7ryh6z0r3z0ggpqfnflcq5bn1q1i1cl9b7q4p6b4sa07"))))
9600 (properties `((upstream-name . "pcaMethods")))
9601 (build-system r-build-system)
9602 (propagated-inputs
9603 `(("r-biobase" ,r-biobase)
9604 ("r-biocgenerics" ,r-biocgenerics)
9605 ("r-mass" ,r-mass)
9606 ("r-rcpp" ,r-rcpp)))
9607 (home-page "https://github.com/hredestig/pcamethods")
9608 (synopsis "Collection of PCA methods")
9609 (description
9610 "This package provides Bayesian PCA, Probabilistic PCA, Nipals PCA,
9611 Inverse Non-Linear PCA and the conventional SVD PCA. A cluster based method
9612 for missing value estimation is included for comparison. BPCA, PPCA and
9613 NipalsPCA may be used to perform PCA on incomplete data as well as for
9614 accurate missing value estimation. A set of methods for printing and plotting
9615 the results is also provided. All PCA methods make use of the same data
9616 structure (pcaRes) to provide a common interface to the PCA results.")
9617 (license license:gpl3+)))
9618
9619 (define-public r-msnbase
9620 (package
9621 (name "r-msnbase")
9622 (version "2.6.1")
9623 (source
9624 (origin
9625 (method url-fetch)
9626 (uri (bioconductor-uri "MSnbase" version))
9627 (sha256
9628 (base32
9629 "0zrpx9r93q5ca2zdak5rs2m9sjm0wjdra1xfj3d3sx6p5gzfyg6n"))))
9630 (properties `((upstream-name . "MSnbase")))
9631 (build-system r-build-system)
9632 (propagated-inputs
9633 `(("r-affy" ,r-affy)
9634 ("r-biobase" ,r-biobase)
9635 ("r-biocgenerics" ,r-biocgenerics)
9636 ("r-biocparallel" ,r-biocparallel)
9637 ("r-digest" ,r-digest)
9638 ("r-ggplot2" ,r-ggplot2)
9639 ("r-impute" ,r-impute)
9640 ("r-iranges" ,r-iranges)
9641 ("r-lattice" ,r-lattice)
9642 ("r-maldiquant" ,r-maldiquant)
9643 ("r-mass" ,r-mass)
9644 ("r-mzid" ,r-mzid)
9645 ("r-mzr" ,r-mzr)
9646 ("r-pcamethods" ,r-pcamethods)
9647 ("r-plyr" ,r-plyr)
9648 ("r-preprocesscore" ,r-preprocesscore)
9649 ("r-protgenerics" ,r-protgenerics)
9650 ("r-rcpp" ,r-rcpp)
9651 ("r-s4vectors" ,r-s4vectors)
9652 ("r-scales" ,r-scales)
9653 ("r-vsn" ,r-vsn)
9654 ("r-xml" ,r-xml)))
9655 (home-page "https://github.com/lgatto/MSnbase")
9656 (synopsis "Base functions and classes for MS-based proteomics")
9657 (description
9658 "This package provides basic plotting, data manipulation and processing
9659 of mass spectrometry based proteomics data.")
9660 (license license:artistic2.0)))
9661
9662 (define-public r-msnid
9663 (package
9664 (name "r-msnid")
9665 (version "1.14.0")
9666 (source
9667 (origin
9668 (method url-fetch)
9669 (uri (bioconductor-uri "MSnID" version))
9670 (sha256
9671 (base32
9672 "172q5chi44104iz4y0g42wrimfp7hlhrfa8vzybx6m0ccrkkhl17"))))
9673 (properties `((upstream-name . "MSnID")))
9674 (build-system r-build-system)
9675 (propagated-inputs
9676 `(("r-biobase" ,r-biobase)
9677 ("r-data-table" ,r-data-table)
9678 ("r-doparallel" ,r-doparallel)
9679 ("r-dplyr" ,r-dplyr)
9680 ("r-foreach" ,r-foreach)
9681 ("r-iterators" ,r-iterators)
9682 ("r-msnbase" ,r-msnbase)
9683 ("r-mzid" ,r-mzid)
9684 ("r-mzr" ,r-mzr)
9685 ("r-protgenerics" ,r-protgenerics)
9686 ("r-r-cache" ,r-r-cache)
9687 ("r-rcpp" ,r-rcpp)
9688 ("r-reshape2" ,r-reshape2)))
9689 (home-page "https://bioconductor.org/packages/MSnID")
9690 (synopsis "Utilities for LC-MSn proteomics identifications")
9691 (description
9692 "This package extracts @dfn{tandem mass spectrometry} (MS/MS) ID data
9693 from mzIdentML (leveraging the mzID package) or text files. After collating
9694 the search results from multiple datasets it assesses their identification
9695 quality and optimize filtering criteria to achieve the maximum number of
9696 identifications while not exceeding a specified false discovery rate. It also
9697 contains a number of utilities to explore the MS/MS results and assess missed
9698 and irregular enzymatic cleavages, mass measurement accuracy, etc.")
9699 (license license:artistic2.0)))
9700
9701 (define-public r-seurat
9702 (package
9703 (name "r-seurat")
9704 (version "2.3.2")
9705 (source (origin
9706 (method url-fetch)
9707 (uri (cran-uri "Seurat" version))
9708 (sha256
9709 (base32
9710 "1sjpy5rrpvlpm6hs7qy7qpglgbp7zrgfybcsalpmjb51rhxhgcg1"))
9711 ;; Delete pre-built jar.
9712 (snippet
9713 '(begin (delete-file "inst/java/ModularityOptimizer.jar")
9714 #t))))
9715 (properties `((upstream-name . "Seurat")))
9716 (build-system r-build-system)
9717 (arguments
9718 `(#:phases
9719 (modify-phases %standard-phases
9720 (add-after 'unpack 'build-jar
9721 (lambda* (#:key inputs #:allow-other-keys)
9722 (let ((classesdir "tmp-classes"))
9723 (setenv "JAVA_HOME" (assoc-ref inputs "jdk"))
9724 (mkdir classesdir)
9725 (with-output-to-file "manifest"
9726 (lambda _
9727 (display "Manifest-Version: 1.0
9728 Main-Class: ModularityOptimizer\n")))
9729 (and (zero? (apply system* `("javac" "-d" ,classesdir
9730 ,@(find-files "java" "\\.java$"))))
9731 (zero? (system* "jar"
9732 "-cmf" "manifest"
9733 "inst/java/ModularityOptimizer.jar"
9734 "-C" classesdir ".")))))))))
9735 (native-inputs
9736 `(("jdk" ,icedtea "jdk")))
9737 (propagated-inputs
9738 `(("r-ape" ,r-ape)
9739 ("r-caret" ,r-caret)
9740 ("r-cluster" ,r-cluster)
9741 ("r-cowplot" ,r-cowplot)
9742 ("r-diffusionmap" ,r-diffusionmap)
9743 ("r-dosnow" ,r-dosnow)
9744 ("r-dplyr" ,r-dplyr)
9745 ("r-dtw" ,r-dtw)
9746 ("r-fitdistrplus" ,r-fitdistrplus)
9747 ("r-fnn" ,r-fnn)
9748 ("r-foreach" ,r-foreach)
9749 ("r-fpc" ,r-fpc)
9750 ("r-gdata" ,r-gdata)
9751 ("r-ggplot2" ,r-ggplot2)
9752 ("r-ggridges" ,r-ggridges)
9753 ("r-gplots" ,r-gplots)
9754 ("r-hdf5r" ,r-hdf5r)
9755 ("r-hmisc" ,r-hmisc)
9756 ("r-ica" ,r-ica)
9757 ("r-igraph" ,r-igraph)
9758 ("r-irlba" ,r-irlba)
9759 ("r-lars" ,r-lars)
9760 ("r-lmtest" ,r-lmtest)
9761 ("r-mass" ,r-mass)
9762 ("r-matrix" ,r-matrix)
9763 ("r-metap" ,r-metap)
9764 ("r-mixtools" ,r-mixtools)
9765 ("r-pbapply" ,r-pbapply)
9766 ("r-plotly" ,r-plotly)
9767 ("r-png" ,r-png)
9768 ("r-ranger" ,r-ranger)
9769 ("r-rann" ,r-rann)
9770 ("r-rcolorbrewer" ,r-rcolorbrewer)
9771 ("r-rcpp" ,r-rcpp)
9772 ("r-rcppeigen" ,r-rcppeigen)
9773 ("r-rcppprogress" ,r-rcppprogress)
9774 ("r-reshape2" ,r-reshape2)
9775 ("r-reticulate" ,r-reticulate)
9776 ("r-rocr" ,r-rocr)
9777 ("r-rtsne" ,r-rtsne)
9778 ("r-sdmtools" ,r-sdmtools)
9779 ("r-stringr" ,r-stringr)
9780 ("r-tclust" ,r-tclust)
9781 ("r-tidyr" ,r-tidyr)
9782 ("r-tsne" ,r-tsne)
9783 ("r-vgam" ,r-vgam)))
9784 (home-page "http://www.satijalab.org/seurat")
9785 (synopsis "Seurat is an R toolkit for single cell genomics")
9786 (description
9787 "This package is an R package designed for QC, analysis, and
9788 exploration of single cell RNA-seq data. It easily enables widely-used
9789 analytical techniques, including the identification of highly variable genes,
9790 dimensionality reduction; PCA, ICA, t-SNE, standard unsupervised clustering
9791 algorithms; density clustering, hierarchical clustering, k-means, and the
9792 discovery of differentially expressed genes and markers.")
9793 (license license:gpl3)))
9794
9795 (define-public r-aroma-light
9796 (package
9797 (name "r-aroma-light")
9798 (version "3.10.0")
9799 (source
9800 (origin
9801 (method url-fetch)
9802 (uri (bioconductor-uri "aroma.light" version))
9803 (sha256
9804 (base32
9805 "1dxsiwsrwcq9mj573f9vpdzrhagdqzal328ma8076px4gg6khxkn"))))
9806 (properties `((upstream-name . "aroma.light")))
9807 (build-system r-build-system)
9808 (propagated-inputs
9809 `(("r-matrixstats" ,r-matrixstats)
9810 ("r-r-methodss3" ,r-r-methodss3)
9811 ("r-r-oo" ,r-r-oo)
9812 ("r-r-utils" ,r-r-utils)))
9813 (home-page "https://github.com/HenrikBengtsson/aroma.light")
9814 (synopsis "Methods for normalization and visualization of microarray data")
9815 (description
9816 "This package provides methods for microarray analysis that take basic
9817 data types such as matrices and lists of vectors. These methods can be used
9818 standalone, be utilized in other packages, or be wrapped up in higher-level
9819 classes.")
9820 (license license:gpl2+)))
9821
9822 (define-public r-deseq
9823 (package
9824 (name "r-deseq")
9825 (version "1.32.0")
9826 (source
9827 (origin
9828 (method url-fetch)
9829 (uri (bioconductor-uri "DESeq" version))
9830 (sha256
9831 (base32
9832 "0ykxw8ksif026xy25wx50j2sdsrp156aqkmhcgfjkpgcw699glnm"))))
9833 (properties `((upstream-name . "DESeq")))
9834 (build-system r-build-system)
9835 (propagated-inputs
9836 `(("r-biobase" ,r-biobase)
9837 ("r-biocgenerics" ,r-biocgenerics)
9838 ("r-genefilter" ,r-genefilter)
9839 ("r-geneplotter" ,r-geneplotter)
9840 ("r-lattice" ,r-lattice)
9841 ("r-locfit" ,r-locfit)
9842 ("r-mass" ,r-mass)
9843 ("r-rcolorbrewer" ,r-rcolorbrewer)))
9844 (home-page "http://www-huber.embl.de/users/anders/DESeq")
9845 (synopsis "Differential gene expression analysis")
9846 (description
9847 "This package provides tools for estimating variance-mean dependence in
9848 count data from high-throughput genetic sequencing assays and for testing for
9849 differential expression based on a model using the negative binomial
9850 distribution.")
9851 (license license:gpl3+)))
9852
9853 (define-public r-edaseq
9854 (package
9855 (name "r-edaseq")
9856 (version "2.14.0")
9857 (source
9858 (origin
9859 (method url-fetch)
9860 (uri (bioconductor-uri "EDASeq" version))
9861 (sha256
9862 (base32
9863 "1832pb3jkim4vrqzb8lajwx9r482bhww5n9nz3s6crvyamlp2dj0"))))
9864 (properties `((upstream-name . "EDASeq")))
9865 (build-system r-build-system)
9866 (propagated-inputs
9867 `(("r-annotationdbi" ,r-annotationdbi)
9868 ("r-aroma-light" ,r-aroma-light)
9869 ("r-biobase" ,r-biobase)
9870 ("r-biocgenerics" ,r-biocgenerics)
9871 ("r-biomart" ,r-biomart)
9872 ("r-biostrings" ,r-biostrings)
9873 ("r-deseq" ,r-deseq)
9874 ("r-genomicfeatures" ,r-genomicfeatures)
9875 ("r-genomicranges" ,r-genomicranges)
9876 ("r-iranges" ,r-iranges)
9877 ("r-rsamtools" ,r-rsamtools)
9878 ("r-shortread" ,r-shortread)))
9879 (home-page "https://github.com/drisso/EDASeq")
9880 (synopsis "Exploratory data analysis and normalization for RNA-Seq")
9881 (description
9882 "This package provides support for numerical and graphical summaries of
9883 RNA-Seq genomic read data. Provided within-lane normalization procedures to
9884 adjust for GC-content effect (or other gene-level effects) on read counts:
9885 loess robust local regression, global-scaling, and full-quantile
9886 normalization. Between-lane normalization procedures to adjust for
9887 distributional differences between lanes (e.g., sequencing depth):
9888 global-scaling and full-quantile normalization.")
9889 (license license:artistic2.0)))
9890
9891 (define-public r-interactivedisplaybase
9892 (package
9893 (name "r-interactivedisplaybase")
9894 (version "1.18.0")
9895 (source
9896 (origin
9897 (method url-fetch)
9898 (uri (bioconductor-uri "interactiveDisplayBase" version))
9899 (sha256
9900 (base32
9901 "05w58z3i9vkma4jd6rhjaxls4yiq4kwrppgcdq9xrr1pxp99k575"))))
9902 (properties
9903 `((upstream-name . "interactiveDisplayBase")))
9904 (build-system r-build-system)
9905 (propagated-inputs
9906 `(("r-biocgenerics" ,r-biocgenerics)
9907 ("r-shiny" ,r-shiny)))
9908 (home-page "https://bioconductor.org/packages/interactiveDisplayBase")
9909 (synopsis "Base package for web displays of Bioconductor objects")
9910 (description
9911 "This package contains the basic methods needed to generate interactive
9912 Shiny-based display methods for Bioconductor objects.")
9913 (license license:artistic2.0)))
9914
9915 (define-public r-annotationhub
9916 (package
9917 (name "r-annotationhub")
9918 (version "2.12.0")
9919 (source
9920 (origin
9921 (method url-fetch)
9922 (uri (bioconductor-uri "AnnotationHub" version))
9923 (sha256
9924 (base32
9925 "11gh7qkgazs90czdqgv74gh2hz26xrmdp6wsz9x5pygbxls8xdw3"))))
9926 (properties `((upstream-name . "AnnotationHub")))
9927 (build-system r-build-system)
9928 (propagated-inputs
9929 `(("r-annotationdbi" ,r-annotationdbi)
9930 ("r-biocgenerics" ,r-biocgenerics)
9931 ("r-biocinstaller" ,r-biocinstaller)
9932 ("r-curl" ,r-curl)
9933 ("r-httr" ,r-httr)
9934 ("r-interactivedisplaybase" ,r-interactivedisplaybase)
9935 ("r-rsqlite" ,r-rsqlite)
9936 ("r-s4vectors" ,r-s4vectors)
9937 ("r-yaml" ,r-yaml)))
9938 (home-page "https://bioconductor.org/packages/AnnotationHub")
9939 (synopsis "Client to access AnnotationHub resources")
9940 (description
9941 "This package provides a client for the Bioconductor AnnotationHub web
9942 resource. The AnnotationHub web resource provides a central location where
9943 genomic files (e.g. VCF, bed, wig) and other resources from standard
9944 locations (e.g. UCSC, Ensembl) can be discovered. The resource includes
9945 metadata about each resource, e.g., a textual description, tags, and date of
9946 modification. The client creates and manages a local cache of files retrieved
9947 by the user, helping with quick and reproducible access.")
9948 (license license:artistic2.0)))
9949
9950 (define-public r-fastseg
9951 (package
9952 (name "r-fastseg")
9953 (version "1.26.0")
9954 (source
9955 (origin
9956 (method url-fetch)
9957 (uri (bioconductor-uri "fastseg" version))
9958 (sha256
9959 (base32
9960 "1yw6hai6hb8qy7akdm4frfp6h4zy93zb68kdj094sanm7kgqmgik"))))
9961 (build-system r-build-system)
9962 (propagated-inputs
9963 `(("r-biobase" ,r-biobase)
9964 ("r-biocgenerics" ,r-biocgenerics)
9965 ("r-genomicranges" ,r-genomicranges)
9966 ("r-iranges" ,r-iranges)
9967 ("r-s4vectors" ,r-s4vectors)))
9968 (home-page "http://www.bioinf.jku.at/software/fastseg/index.html")
9969 (synopsis "Fast segmentation algorithm for genetic sequencing data")
9970 (description
9971 "Fastseg implements a very fast and efficient segmentation algorithm.
9972 It can segment data from DNA microarrays and data from next generation
9973 sequencing for example to detect copy number segments. Further it can segment
9974 data from RNA microarrays like tiling arrays to identify transcripts. Most
9975 generally, it can segment data given as a matrix or as a vector. Various data
9976 formats can be used as input to fastseg like expression set objects for
9977 microarrays or GRanges for sequencing data.")
9978 (license license:lgpl2.0+)))
9979
9980 (define-public r-keggrest
9981 (package
9982 (name "r-keggrest")
9983 (version "1.20.0")
9984 (source
9985 (origin
9986 (method url-fetch)
9987 (uri (bioconductor-uri "KEGGREST" version))
9988 (sha256
9989 (base32
9990 "1349vidgl9m10l1rbrp3pkwwgi2xcbsw9h9z2xqbvg97lmqc4r8j"))))
9991 (properties `((upstream-name . "KEGGREST")))
9992 (build-system r-build-system)
9993 (propagated-inputs
9994 `(("r-biostrings" ,r-biostrings)
9995 ("r-httr" ,r-httr)
9996 ("r-png" ,r-png)))
9997 (home-page "https://bioconductor.org/packages/KEGGREST")
9998 (synopsis "Client-side REST access to KEGG")
9999 (description
10000 "This package provides a package that provides a client interface to the
10001 @dfn{Kyoto Encyclopedia of Genes and Genomes} (KEGG) REST server.")
10002 (license license:artistic2.0)))
10003
10004 (define-public r-gage
10005 (package
10006 (name "r-gage")
10007 (version "2.30.0")
10008 (source
10009 (origin
10010 (method url-fetch)
10011 (uri (bioconductor-uri "gage" version))
10012 (sha256
10013 (base32
10014 "0j3cqxy97lpf146wkmdfaq9680gicmzxvhp6w5pxq3j7ipiy7262"))))
10015 (build-system r-build-system)
10016 (propagated-inputs
10017 `(("r-annotationdbi" ,r-annotationdbi)
10018 ("r-graph" ,r-graph)
10019 ("r-keggrest" ,r-keggrest)))
10020 (home-page "http://www.biomedcentral.com/1471-2105/10/161")
10021 (synopsis "Generally applicable gene-set enrichment for pathway analysis")
10022 (description
10023 "GAGE is a published method for gene set (enrichment or GSEA) or pathway
10024 analysis. GAGE is generally applicable independent of microarray or RNA-Seq
10025 data attributes including sample sizes, experimental designs, assay platforms,
10026 and other types of heterogeneity. The gage package provides functions for
10027 basic GAGE analysis, result processing and presentation. In addition, it
10028 provides demo microarray data and commonly used gene set data based on KEGG
10029 pathways and GO terms. These funtions and data are also useful for gene set
10030 analysis using other methods.")
10031 (license license:gpl2+)))
10032
10033 (define-public r-genomicfiles
10034 (package
10035 (name "r-genomicfiles")
10036 (version "1.16.0")
10037 (source
10038 (origin
10039 (method url-fetch)
10040 (uri (bioconductor-uri "GenomicFiles" version))
10041 (sha256
10042 (base32
10043 "0bhsq5czigrjyl9gkb2kpkpl367b3ac5g8s280adkcxggn9g7sxq"))))
10044 (properties `((upstream-name . "GenomicFiles")))
10045 (build-system r-build-system)
10046 (propagated-inputs
10047 `(("r-biocgenerics" ,r-biocgenerics)
10048 ("r-biocparallel" ,r-biocparallel)
10049 ("r-genomeinfodb" ,r-genomeinfodb)
10050 ("r-genomicalignments" ,r-genomicalignments)
10051 ("r-genomicranges" ,r-genomicranges)
10052 ("r-iranges" ,r-iranges)
10053 ("r-rsamtools" ,r-rsamtools)
10054 ("r-rtracklayer" ,r-rtracklayer)
10055 ("r-s4vectors" ,r-s4vectors)
10056 ("r-summarizedexperiment" ,r-summarizedexperiment)
10057 ("r-variantannotation" ,r-variantannotation)))
10058 (home-page "https://bioconductor.org/packages/GenomicFiles")
10059 (synopsis "Distributed computing by file or by range")
10060 (description
10061 "This package provides infrastructure for parallel computations
10062 distributed by file or by range. User defined mapper and reducer functions
10063 provide added flexibility for data combination and manipulation.")
10064 (license license:artistic2.0)))
10065
10066 (define-public r-complexheatmap
10067 (package
10068 (name "r-complexheatmap")
10069 (version "1.18.1")
10070 (source
10071 (origin
10072 (method url-fetch)
10073 (uri (bioconductor-uri "ComplexHeatmap" version))
10074 (sha256
10075 (base32
10076 "0qjwz1hzpjnc90jiinjkikfnr0shi72q3zfdjjz7pxydy0mglq8n"))))
10077 (properties
10078 `((upstream-name . "ComplexHeatmap")))
10079 (build-system r-build-system)
10080 (propagated-inputs
10081 `(("r-circlize" ,r-circlize)
10082 ("r-colorspace" ,r-colorspace)
10083 ("r-getoptlong" ,r-getoptlong)
10084 ("r-globaloptions" ,r-globaloptions)
10085 ("r-rcolorbrewer" ,r-rcolorbrewer)))
10086 (home-page
10087 "https://github.com/jokergoo/ComplexHeatmap")
10088 (synopsis "Making Complex Heatmaps")
10089 (description
10090 "Complex heatmaps are efficient to visualize associations between
10091 different sources of data sets and reveal potential structures. This package
10092 provides a highly flexible way to arrange multiple heatmaps and supports
10093 self-defined annotation graphics.")
10094 (license license:gpl2+)))
10095
10096 (define-public r-dirichletmultinomial
10097 (package
10098 (name "r-dirichletmultinomial")
10099 (version "1.22.0")
10100 (source
10101 (origin
10102 (method url-fetch)
10103 (uri (bioconductor-uri "DirichletMultinomial" version))
10104 (sha256
10105 (base32
10106 "0vcyp81b90in4ls5nbadc66cw2g9aydr94aqifq5j4b7diq74yfs"))))
10107 (properties
10108 `((upstream-name . "DirichletMultinomial")))
10109 (build-system r-build-system)
10110 (inputs
10111 `(("gsl" ,gsl)))
10112 (propagated-inputs
10113 `(("r-biocgenerics" ,r-biocgenerics)
10114 ("r-iranges" ,r-iranges)
10115 ("r-s4vectors" ,r-s4vectors)))
10116 (home-page "https://bioconductor.org/packages/DirichletMultinomial")
10117 (synopsis "Dirichlet-Multinomial mixture models for microbiome data")
10118 (description
10119 "Dirichlet-multinomial mixture models can be used to describe variability
10120 in microbial metagenomic data. This package is an interface to code
10121 originally made available by Holmes, Harris, and Quince, 2012, PLoS ONE 7(2):
10122 1-15.")
10123 (license license:lgpl3)))
10124
10125 (define-public r-ensembldb
10126 (package
10127 (name "r-ensembldb")
10128 (version "2.4.1")
10129 (source
10130 (origin
10131 (method url-fetch)
10132 (uri (bioconductor-uri "ensembldb" version))
10133 (sha256
10134 (base32
10135 "1l2b4cxiycv05mz4z4f3dhx57r9ksha02psc114h30ldm5rxz8w6"))))
10136 (build-system r-build-system)
10137 (propagated-inputs
10138 `(("r-annotationdbi" ,r-annotationdbi)
10139 ("r-annotationfilter" ,r-annotationfilter)
10140 ("r-biobase" ,r-biobase)
10141 ("r-biocgenerics" ,r-biocgenerics)
10142 ("r-biostrings" ,r-biostrings)
10143 ("r-curl" ,r-curl)
10144 ("r-dbi" ,r-dbi)
10145 ("r-genomeinfodb" ,r-genomeinfodb)
10146 ("r-genomicfeatures" ,r-genomicfeatures)
10147 ("r-genomicranges" ,r-genomicranges)
10148 ("r-iranges" ,r-iranges)
10149 ("r-protgenerics" ,r-protgenerics)
10150 ("r-rsamtools" ,r-rsamtools)
10151 ("r-rsqlite" ,r-rsqlite)
10152 ("r-rtracklayer" ,r-rtracklayer)
10153 ("r-s4vectors" ,r-s4vectors)))
10154 (home-page "https://github.com/jotsetung/ensembldb")
10155 (synopsis "Utilities to create and use Ensembl-based annotation databases")
10156 (description
10157 "The package provides functions to create and use transcript-centric
10158 annotation databases/packages. The annotation for the databases are directly
10159 fetched from Ensembl using their Perl API. The functionality and data is
10160 similar to that of the TxDb packages from the @code{GenomicFeatures} package,
10161 but, in addition to retrieve all gene/transcript models and annotations from
10162 the database, the @code{ensembldb} package also provides a filter framework
10163 allowing to retrieve annotations for specific entries like genes encoded on a
10164 chromosome region or transcript models of lincRNA genes.")
10165 ;; No version specified
10166 (license license:lgpl3+)))
10167
10168 (define-public r-organismdbi
10169 (package
10170 (name "r-organismdbi")
10171 (version "1.22.0")
10172 (source
10173 (origin
10174 (method url-fetch)
10175 (uri (bioconductor-uri "OrganismDbi" version))
10176 (sha256
10177 (base32
10178 "0hb9ni41bjfy5s5ryw2qmqs2sx3i7j47w1g0l8g1pvn7ppnxb6cv"))))
10179 (properties `((upstream-name . "OrganismDbi")))
10180 (build-system r-build-system)
10181 (propagated-inputs
10182 `(("r-annotationdbi" ,r-annotationdbi)
10183 ("r-biobase" ,r-biobase)
10184 ("r-biocgenerics" ,r-biocgenerics)
10185 ("r-biocinstaller" ,r-biocinstaller)
10186 ("r-dbi" ,r-dbi)
10187 ("r-genomicfeatures" ,r-genomicfeatures)
10188 ("r-genomicranges" ,r-genomicranges)
10189 ("r-graph" ,r-graph)
10190 ("r-iranges" ,r-iranges)
10191 ("r-rbgl" ,r-rbgl)
10192 ("r-s4vectors" ,r-s4vectors)))
10193 (home-page "https://bioconductor.org/packages/OrganismDbi")
10194 (synopsis "Software to enable the smooth interfacing of database packages")
10195 (description "The package enables a simple unified interface to several
10196 annotation packages each of which has its own schema by taking advantage of
10197 the fact that each of these packages implements a select methods.")
10198 (license license:artistic2.0)))
10199
10200 (define-public r-biovizbase
10201 (package
10202 (name "r-biovizbase")
10203 (version "1.28.0")
10204 (source
10205 (origin
10206 (method url-fetch)
10207 (uri (bioconductor-uri "biovizBase" version))
10208 (sha256
10209 (base32
10210 "0lkiqdr3ics6hgv47lwkykcy761823bbkhffbn4ykyfzyqwl4p67"))))
10211 (properties `((upstream-name . "biovizBase")))
10212 (build-system r-build-system)
10213 (propagated-inputs
10214 `(("r-annotationdbi" ,r-annotationdbi)
10215 ("r-annotationfilter" ,r-annotationfilter)
10216 ("r-biocgenerics" ,r-biocgenerics)
10217 ("r-biostrings" ,r-biostrings)
10218 ("r-dichromat" ,r-dichromat)
10219 ("r-ensembldb" ,r-ensembldb)
10220 ("r-genomeinfodb" ,r-genomeinfodb)
10221 ("r-genomicalignments" ,r-genomicalignments)
10222 ("r-genomicfeatures" ,r-genomicfeatures)
10223 ("r-genomicranges" ,r-genomicranges)
10224 ("r-hmisc" ,r-hmisc)
10225 ("r-iranges" ,r-iranges)
10226 ("r-rcolorbrewer" ,r-rcolorbrewer)
10227 ("r-rsamtools" ,r-rsamtools)
10228 ("r-s4vectors" ,r-s4vectors)
10229 ("r-scales" ,r-scales)
10230 ("r-summarizedexperiment" ,r-summarizedexperiment)
10231 ("r-variantannotation" ,r-variantannotation)))
10232 (home-page "https://bioconductor.org/packages/biovizBase")
10233 (synopsis "Basic graphic utilities for visualization of genomic data")
10234 (description
10235 "The biovizBase package is designed to provide a set of utilities, color
10236 schemes and conventions for genomic data. It serves as the base for various
10237 high-level packages for biological data visualization. This saves development
10238 effort and encourages consistency.")
10239 (license license:artistic2.0)))
10240
10241 (define-public r-ggbio
10242 (package
10243 (name "r-ggbio")
10244 (version "1.28.0")
10245 (source
10246 (origin
10247 (method url-fetch)
10248 (uri (bioconductor-uri "ggbio" version))
10249 (sha256
10250 (base32
10251 "0wszh3w8yia5zw758h837i1q35k99sn444y2hahcxqbdmmlbf7in"))))
10252 (build-system r-build-system)
10253 (propagated-inputs
10254 `(("r-annotationdbi" ,r-annotationdbi)
10255 ("r-annotationfilter" ,r-annotationfilter)
10256 ("r-biobase" ,r-biobase)
10257 ("r-biocgenerics" ,r-biocgenerics)
10258 ("r-biostrings" ,r-biostrings)
10259 ("r-biovizbase" ,r-biovizbase)
10260 ("r-bsgenome" ,r-bsgenome)
10261 ("r-ensembldb" ,r-ensembldb)
10262 ("r-genomeinfodb" ,r-genomeinfodb)
10263 ("r-genomicalignments" ,r-genomicalignments)
10264 ("r-genomicfeatures" ,r-genomicfeatures)
10265 ("r-genomicranges" ,r-genomicranges)
10266 ("r-ggally" ,r-ggally)
10267 ("r-ggplot2" ,r-ggplot2)
10268 ("r-gridextra" ,r-gridextra)
10269 ("r-gtable" ,r-gtable)
10270 ("r-hmisc" ,r-hmisc)
10271 ("r-iranges" ,r-iranges)
10272 ("r-organismdbi" ,r-organismdbi)
10273 ("r-reshape2" ,r-reshape2)
10274 ("r-rsamtools" ,r-rsamtools)
10275 ("r-rtracklayer" ,r-rtracklayer)
10276 ("r-s4vectors" ,r-s4vectors)
10277 ("r-scales" ,r-scales)
10278 ("r-summarizedexperiment" ,r-summarizedexperiment)
10279 ("r-variantannotation" ,r-variantannotation)))
10280 (home-page "http://www.tengfei.name/ggbio/")
10281 (synopsis "Visualization tools for genomic data")
10282 (description
10283 "The ggbio package extends and specializes the grammar of graphics for
10284 biological data. The graphics are designed to answer common scientific
10285 questions, in particular those often asked of high throughput genomics data.
10286 All core Bioconductor data structures are supported, where appropriate. The
10287 package supports detailed views of particular genomic regions, as well as
10288 genome-wide overviews. Supported overviews include ideograms and grand linear
10289 views. High-level plots include sequence fragment length, edge-linked
10290 interval to data view, mismatch pileup, and several splicing summaries.")
10291 (license license:artistic2.0)))
10292
10293 (define-public r-gprofiler
10294 (package
10295 (name "r-gprofiler")
10296 (version "0.6.6")
10297 (source
10298 (origin
10299 (method url-fetch)
10300 (uri (cran-uri "gProfileR" version))
10301 (sha256
10302 (base32
10303 "1n6cj12j102b4x9vhyl4dljp1i0r43p23cnhqbx4als2xfxdlqgi"))))
10304 (properties `((upstream-name . "gProfileR")))
10305 (build-system r-build-system)
10306 (propagated-inputs
10307 `(("r-plyr" ,r-plyr)
10308 ("r-rcurl" ,r-rcurl)))
10309 (home-page "https://cran.r-project.org/web/packages/gProfileR/")
10310 (synopsis "Interface to the g:Profiler toolkit")
10311 (description
10312 "This package provides tools for functional enrichment analysis,
10313 gene identifier conversion and mapping homologous genes across related
10314 organisms via the @code{g:Profiler} toolkit.")
10315 (license license:gpl2+)))
10316
10317 (define-public r-gqtlbase
10318 (package
10319 (name "r-gqtlbase")
10320 (version "1.12.0")
10321 (source
10322 (origin
10323 (method url-fetch)
10324 (uri (bioconductor-uri "gQTLBase" version))
10325 (sha256
10326 (base32
10327 "1m3ajpqjhw1nwwsn372r44xfxq0a9a0pzsnrprzdjp6mh52p9b5m"))))
10328 (properties `((upstream-name . "gQTLBase")))
10329 (build-system r-build-system)
10330 (propagated-inputs
10331 `(("r-batchjobs" ,r-batchjobs)
10332 ("r-bbmisc" ,r-bbmisc)
10333 ("r-biocgenerics" ,r-biocgenerics)
10334 ("r-bit" ,r-bit)
10335 ("r-doparallel" ,r-doparallel)
10336 ("r-ff" ,r-ff)
10337 ("r-ffbase" ,r-ffbase)
10338 ("r-foreach" ,r-foreach)
10339 ("r-genomicfiles" ,r-genomicfiles)
10340 ("r-genomicranges" ,r-genomicranges)
10341 ("r-rtracklayer" ,r-rtracklayer)
10342 ("r-s4vectors" ,r-s4vectors)
10343 ("r-summarizedexperiment" ,r-summarizedexperiment)))
10344 (home-page "https://bioconductor.org/packages/gQTLBase")
10345 (synopsis "Infrastructure for eQTL, mQTL and similar studies")
10346 (description
10347 "The purpose of this package is to simplify the storage and interrogation
10348 of @dfn{quantitative trait loci} (QTL) archives, such as eQTL, mQTL, dsQTL,
10349 and more.")
10350 (license license:artistic2.0)))
10351
10352 (define-public r-snpstats
10353 (package
10354 (name "r-snpstats")
10355 (version "1.30.0")
10356 (source
10357 (origin
10358 (method url-fetch)
10359 (uri (bioconductor-uri "snpStats" version))
10360 (sha256
10361 (base32
10362 "0iydgfnm053iw860qa1bbh4f6nwzlsf3vhgq92gvl2v4xsz1jbbs"))))
10363 (properties `((upstream-name . "snpStats")))
10364 (build-system r-build-system)
10365 (inputs `(("zlib" ,zlib)))
10366 (propagated-inputs
10367 `(("r-biocgenerics" ,r-biocgenerics)
10368 ("r-matrix" ,r-matrix)
10369 ("r-survival" ,r-survival)
10370 ("r-zlibbioc" ,r-zlibbioc)))
10371 (home-page "https://bioconductor.org/packages/snpStats")
10372 (synopsis "Methods for SNP association studies")
10373 (description
10374 "This package provides classes and statistical methods for large
10375 @dfn{single-nucleotide polymorphism} (SNP) association studies. This extends
10376 the earlier snpMatrix package, allowing for uncertainty in genotypes.")
10377 (license license:gpl3)))
10378
10379 (define-public r-homo-sapiens
10380 (package
10381 (name "r-homo-sapiens")
10382 (version "1.3.1")
10383 (source (origin
10384 (method url-fetch)
10385 ;; We cannot use bioconductor-uri here because this tarball is
10386 ;; located under "data/annotation/" instead of "bioc/".
10387 (uri (string-append "http://www.bioconductor.org/packages/"
10388 "release/data/annotation/src/contrib/"
10389 "Homo.sapiens_"
10390 version ".tar.gz"))
10391 (sha256
10392 (base32
10393 "151vj7h5p1c8yd5swrchk46z469p135wk50hvkl0nhgndvy0jj01"))))
10394 (properties
10395 `((upstream-name . "Homo.sapiens")))
10396 (build-system r-build-system)
10397 (propagated-inputs
10398 `(("r-genomicfeatures" ,r-genomicfeatures)
10399 ("r-go-db" ,r-go-db)
10400 ("r-org-hs-eg-db" ,r-org-hs-eg-db)
10401 ("r-txdb-hsapiens-ucsc-hg19-knowngene" ,r-txdb-hsapiens-ucsc-hg19-knowngene)
10402 ("r-organismdbi" ,r-organismdbi)
10403 ("r-annotationdbi" ,r-annotationdbi)))
10404 (home-page "https://bioconductor.org/packages/Homo.sapiens/")
10405 (synopsis "Annotation package for the Homo.sapiens object")
10406 (description
10407 "This package contains the Homo.sapiens object to access data from
10408 several related annotation packages.")
10409 (license license:artistic2.0)))
10410
10411 (define-public r-erma
10412 (package
10413 (name "r-erma")
10414 (version "0.12.0")
10415 (source
10416 (origin
10417 (method url-fetch)
10418 (uri (bioconductor-uri "erma" version))
10419 (sha256
10420 (base32
10421 "1ka68n18yizlyvb8bpwwcl4hqbsasg8hw8jb3vgy3cd4szji87hh"))))
10422 (build-system r-build-system)
10423 (propagated-inputs
10424 `(("r-annotationdbi" ,r-annotationdbi)
10425 ("r-biobase" ,r-biobase)
10426 ("r-biocgenerics" ,r-biocgenerics)
10427 ("r-biocparallel" ,r-biocparallel)
10428 ("r-genomeinfodb" ,r-genomeinfodb)
10429 ("r-genomicfiles" ,r-genomicfiles)
10430 ("r-genomicranges" ,r-genomicranges)
10431 ("r-ggplot2" ,r-ggplot2)
10432 ("r-homo-sapiens" ,r-homo-sapiens)
10433 ("r-iranges" ,r-iranges)
10434 ("r-rtracklayer" ,r-rtracklayer)
10435 ("r-s4vectors" ,r-s4vectors)
10436 ("r-shiny" ,r-shiny)
10437 ("r-summarizedexperiment" ,r-summarizedexperiment)))
10438 (home-page "https://bioconductor.org/packages/erma")
10439 (synopsis "Epigenomic road map adventures")
10440 (description
10441 "The epigenomics road map describes locations of epigenetic marks in DNA
10442 from a variety of cell types. Of interest are locations of histone
10443 modifications, sites of DNA methylation, and regions of accessible chromatin.
10444 This package presents a selection of elements of the road map including
10445 metadata and outputs of the ChromImpute procedure applied to ENCODE cell lines
10446 by Ernst and Kellis.")
10447 (license license:artistic2.0)))
10448
10449 (define-public r-ldblock
10450 (package
10451 (name "r-ldblock")
10452 (version "1.10.0")
10453 (source
10454 (origin
10455 (method url-fetch)
10456 (uri (bioconductor-uri "ldblock" version))
10457 (sha256
10458 (base32
10459 "0c24zvnwsp39d3q0bps13sc441jj9ms2zi34xsb8c392lqmbypvd"))))
10460 (build-system r-build-system)
10461 (propagated-inputs
10462 `(("r-biocgenerics" ,r-biocgenerics)
10463 ("r-erma" ,r-erma)
10464 ("r-genomeinfodb" ,r-genomeinfodb)
10465 ("r-genomicfiles" ,r-genomicfiles)
10466 ("r-go-db" ,r-go-db)
10467 ("r-homo-sapiens" ,r-homo-sapiens)
10468 ("r-matrix" ,r-matrix)
10469 ("r-rsamtools" ,r-rsamtools)
10470 ("r-snpstats" ,r-snpstats)
10471 ("r-variantannotation" ,r-variantannotation)))
10472 (home-page "https://bioconductor.org/packages/ldblock")
10473 (synopsis "Data structures for linkage disequilibrium measures in populations")
10474 (description
10475 "This package defines data structures for @dfn{linkage
10476 disequilibrium} (LD) measures in populations. Its purpose is to simplify
10477 handling of existing population-level data for the purpose of flexibly
10478 defining LD blocks.")
10479 (license license:artistic2.0)))
10480
10481 (define-public r-gqtlstats
10482 (package
10483 (name "r-gqtlstats")
10484 (version "1.12.0")
10485 (source
10486 (origin
10487 (method url-fetch)
10488 (uri (bioconductor-uri "gQTLstats" version))
10489 (sha256
10490 (base32
10491 "19g8qhfgngdc14cw9k4i44cxhs3qva87x56gjzmn25k1yj8qgsp1"))))
10492 (properties `((upstream-name . "gQTLstats")))
10493 (build-system r-build-system)
10494 (propagated-inputs
10495 `(("r-annotationdbi" ,r-annotationdbi)
10496 ("r-batchjobs" ,r-batchjobs)
10497 ("r-bbmisc" ,r-bbmisc)
10498 ("r-beeswarm" ,r-beeswarm)
10499 ("r-biobase" ,r-biobase)
10500 ("r-biocgenerics" ,r-biocgenerics)
10501 ("r-doparallel" ,r-doparallel)
10502 ("r-dplyr" ,r-dplyr)
10503 ("r-erma" ,r-erma)
10504 ("r-ffbase" ,r-ffbase)
10505 ("r-foreach" ,r-foreach)
10506 ("r-genomeinfodb" ,r-genomeinfodb)
10507 ("r-genomicfeatures" ,r-genomicfeatures)
10508 ("r-genomicfiles" ,r-genomicfiles)
10509 ("r-genomicranges" ,r-genomicranges)
10510 ("r-ggbeeswarm" ,r-ggbeeswarm)
10511 ("r-ggplot2" ,r-ggplot2)
10512 ("r-gqtlbase" ,r-gqtlbase)
10513 ("r-hardyweinberg" ,r-hardyweinberg)
10514 ("r-homo-sapiens" ,r-homo-sapiens)
10515 ("r-iranges" ,r-iranges)
10516 ("r-limma" ,r-limma)
10517 ("r-mgcv" ,r-mgcv)
10518 ("r-plotly" ,r-plotly)
10519 ("r-reshape2" ,r-reshape2)
10520 ("r-s4vectors" ,r-s4vectors)
10521 ("r-shiny" ,r-shiny)
10522 ("r-snpstats" ,r-snpstats)
10523 ("r-summarizedexperiment" ,r-summarizedexperiment)
10524 ("r-variantannotation" ,r-variantannotation)))
10525 (home-page "https://bioconductor.org/packages/gQTLstats")
10526 (synopsis "Computationally efficient analysis for eQTL and allied studies")
10527 (description
10528 "This package provides tools for the computationally efficient analysis
10529 of @dfn{quantitative trait loci} (QTL) data, including eQTL, mQTL, dsQTL, etc.
10530 The software in this package aims to support refinements and functional
10531 interpretation of members of a collection of association statistics on a
10532 family of feature/genome hypotheses.")
10533 (license license:artistic2.0)))
10534
10535 (define-public r-gviz
10536 (package
10537 (name "r-gviz")
10538 (version "1.24.0")
10539 (source
10540 (origin
10541 (method url-fetch)
10542 (uri (bioconductor-uri "Gviz" version))
10543 (sha256
10544 (base32
10545 "1fhli7ahkl5r43j0hc89ib41mfadj6qyrg36i03ncz8zs6iqwpx4"))))
10546 (properties `((upstream-name . "Gviz")))
10547 (build-system r-build-system)
10548 (propagated-inputs
10549 `(("r-annotationdbi" ,r-annotationdbi)
10550 ("r-biobase" ,r-biobase)
10551 ("r-biocgenerics" ,r-biocgenerics)
10552 ("r-biomart" ,r-biomart)
10553 ("r-biostrings" ,r-biostrings)
10554 ("r-biovizbase" ,r-biovizbase)
10555 ("r-bsgenome" ,r-bsgenome)
10556 ("r-digest" ,r-digest)
10557 ("r-genomeinfodb" ,r-genomeinfodb)
10558 ("r-genomicalignments" ,r-genomicalignments)
10559 ("r-genomicfeatures" ,r-genomicfeatures)
10560 ("r-genomicranges" ,r-genomicranges)
10561 ("r-iranges" ,r-iranges)
10562 ("r-lattice" ,r-lattice)
10563 ("r-latticeextra" ,r-latticeextra)
10564 ("r-matrixstats" ,r-matrixstats)
10565 ("r-rcolorbrewer" ,r-rcolorbrewer)
10566 ("r-rsamtools" ,r-rsamtools)
10567 ("r-rtracklayer" ,r-rtracklayer)
10568 ("r-s4vectors" ,r-s4vectors)
10569 ("r-xvector" ,r-xvector)))
10570 (home-page "https://bioconductor.org/packages/Gviz")
10571 (synopsis "Plotting data and annotation information along genomic coordinates")
10572 (description
10573 "Genomic data analyses requires integrated visualization of known genomic
10574 information and new experimental data. Gviz uses the biomaRt and the
10575 rtracklayer packages to perform live annotation queries to Ensembl and UCSC
10576 and translates this to e.g. gene/transcript structures in viewports of the
10577 grid graphics package. This results in genomic information plotted together
10578 with your data.")
10579 (license license:artistic2.0)))
10580
10581 (define-public r-gwascat
10582 (package
10583 (name "r-gwascat")
10584 (version "2.12.0")
10585 (source
10586 (origin
10587 (method url-fetch)
10588 (uri (bioconductor-uri "gwascat" version))
10589 (sha256
10590 (base32
10591 "08ba9il4vbjjwlbwmqg4ai6ya1p09js9agn95sw0dhc9gqln42hx"))))
10592 (build-system r-build-system)
10593 (propagated-inputs
10594 `(("r-annotationdbi" ,r-annotationdbi)
10595 ("r-annotationhub" ,r-annotationhub)
10596 ("r-biocgenerics" ,r-biocgenerics)
10597 ("r-biostrings" ,r-biostrings)
10598 ("r-genomeinfodb" ,r-genomeinfodb)
10599 ("r-genomicfeatures" ,r-genomicfeatures)
10600 ("r-genomicranges" ,r-genomicranges)
10601 ("r-ggbio" ,r-ggbio)
10602 ("r-ggplot2" ,r-ggplot2)
10603 ("r-gqtlstats" ,r-gqtlstats)
10604 ("r-graph" ,r-graph)
10605 ("r-gviz" ,r-gviz)
10606 ("r-homo-sapiens" ,r-homo-sapiens)
10607 ("r-iranges" ,r-iranges)
10608 ("r-rsamtools" ,r-rsamtools)
10609 ("r-rtracklayer" ,r-rtracklayer)
10610 ("r-s4vectors" ,r-s4vectors)
10611 ("r-snpstats" ,r-snpstats)
10612 ("r-summarizedexperiment" ,r-summarizedexperiment)
10613 ("r-variantannotation" ,r-variantannotation)))
10614 (home-page "https://bioconductor.org/packages/gwascat")
10615 (synopsis "Tools for data in the EMBL-EBI GWAS catalog")
10616 (description
10617 "This package provides tools for representing and modeling data in the
10618 EMBL-EBI GWAS catalog.")
10619 (license license:artistic2.0)))
10620
10621 (define-public r-sushi
10622 (package
10623 (name "r-sushi")
10624 (version "1.18.0")
10625 (source (origin
10626 (method url-fetch)
10627 (uri (bioconductor-uri "Sushi" version))
10628 (sha256
10629 (base32
10630 "1m15hmg4k0qhshyn65xfj5hx7xbaf0kxqw70lxisak6pj1w00l41"))))
10631 (properties `((upstream-name . "Sushi")))
10632 (build-system r-build-system)
10633 (propagated-inputs
10634 `(("r-biomart" ,r-biomart)
10635 ("r-zoo" ,r-zoo)))
10636 (home-page "https://bioconductor.org/packages/Sushi")
10637 (synopsis "Tools for visualizing genomics data")
10638 (description
10639 "This package provides flexible, quantitative, and integrative genomic
10640 visualizations for publication-quality multi-panel figures.")
10641 (license license:gpl2+)))
10642
10643 (define-public r-fithic
10644 (package
10645 (name "r-fithic")
10646 (version "1.6.0")
10647 (source (origin
10648 (method url-fetch)
10649 (uri (bioconductor-uri "FitHiC" version))
10650 (sha256
10651 (base32
10652 "06w4q836bi1mvkbl1saghv4r5p4hxpjg8cp7kgad13ls450kqmyd"))))
10653 (properties `((upstream-name . "FitHiC")))
10654 (build-system r-build-system)
10655 (propagated-inputs
10656 `(("r-data-table" ,r-data-table)
10657 ("r-fdrtool" ,r-fdrtool)
10658 ("r-rcpp" ,r-rcpp)))
10659 (home-page "https://bioconductor.org/packages/FitHiC")
10660 (synopsis "Confidence estimation for intra-chromosomal contact maps")
10661 (description
10662 "Fit-Hi-C is a tool for assigning statistical confidence estimates to
10663 intra-chromosomal contact maps produced by genome-wide genome architecture
10664 assays such as Hi-C.")
10665 (license license:gpl2+)))
10666
10667 (define-public r-hitc
10668 (package
10669 (name "r-hitc")
10670 (version "1.24.0")
10671 (source (origin
10672 (method url-fetch)
10673 (uri (bioconductor-uri "HiTC" version))
10674 (sha256
10675 (base32
10676 "0qkk5139f51lwwy1yh7nbkflh5d69prirmhniwam34nlg9rzjm2z"))))
10677 (properties `((upstream-name . "HiTC")))
10678 (build-system r-build-system)
10679 (propagated-inputs
10680 `(("r-biostrings" ,r-biostrings)
10681 ("r-genomeinfodb" ,r-genomeinfodb)
10682 ("r-genomicranges" ,r-genomicranges)
10683 ("r-iranges" ,r-iranges)
10684 ("r-matrix" ,r-matrix)
10685 ("r-rcolorbrewer" ,r-rcolorbrewer)
10686 ("r-rtracklayer" ,r-rtracklayer)))
10687 (home-page "https://bioconductor.org/packages/HiTC")
10688 (synopsis "High throughput chromosome conformation capture analysis")
10689 (description
10690 "The HiTC package was developed to explore high-throughput \"C\" data
10691 such as 5C or Hi-C. Dedicated R classes as well as standard methods for
10692 quality controls, normalization, visualization, and further analysis are also
10693 provided.")
10694 (license license:artistic2.0)))
10695
10696 (define-public r-qvalue
10697 (package
10698 (name "r-qvalue")
10699 (version "2.12.0")
10700 (source
10701 (origin
10702 (method url-fetch)
10703 (uri (bioconductor-uri "qvalue" version))
10704 (sha256
10705 (base32
10706 "1ndwkj0hh7v4lwylq1v0fkxqs7mfmbcj8kxbdpj1wkvf131z2ns8"))))
10707 (build-system r-build-system)
10708 (propagated-inputs
10709 `(("r-ggplot2" ,r-ggplot2)
10710 ("r-reshape2" ,r-reshape2)))
10711 (home-page "http://github.com/jdstorey/qvalue")
10712 (synopsis "Q-value estimation for false discovery rate control")
10713 (description
10714 "This package takes a list of p-values resulting from the simultaneous
10715 testing of many hypotheses and estimates their q-values and local @dfn{false
10716 discovery rate} (FDR) values. The q-value of a test measures the proportion
10717 of false positives incurred when that particular test is called significant.
10718 The local FDR measures the posterior probability the null hypothesis is true
10719 given the test's p-value. Various plots are automatically generated, allowing
10720 one to make sensible significance cut-offs. The software can be applied to
10721 problems in genomics, brain imaging, astrophysics, and data mining.")
10722 ;; Any version of the LGPL.
10723 (license license:lgpl3+)))
10724
10725 (define-public r-hdf5array
10726 (package
10727 (name "r-hdf5array")
10728 (version "1.8.0")
10729 (source
10730 (origin
10731 (method url-fetch)
10732 (uri (bioconductor-uri "HDF5Array" version))
10733 (sha256
10734 (base32
10735 "1l0276qxkhgdxsfck3jmi8jvnsr20g10gjki53g0mqa45wnhm3ck"))))
10736 (properties `((upstream-name . "HDF5Array")))
10737 (build-system r-build-system)
10738 (propagated-inputs
10739 `(("r-biocgenerics" ,r-biocgenerics)
10740 ("r-delayedarray" ,r-delayedarray)
10741 ("r-iranges" ,r-iranges)
10742 ("r-rhdf5" ,r-rhdf5)
10743 ("r-s4vectors" ,r-s4vectors)))
10744 (home-page "https://bioconductor.org/packages/HDF5Array")
10745 (synopsis "HDF5 back end for DelayedArray objects")
10746 (description "This package provides an array-like container for convenient
10747 access and manipulation of HDF5 datasets. It supports delayed operations and
10748 block processing.")
10749 (license license:artistic2.0)))
10750
10751 (define-public r-rhdf5lib
10752 (package
10753 (name "r-rhdf5lib")
10754 (version "1.2.1")
10755 (source
10756 (origin
10757 (method url-fetch)
10758 (uri (bioconductor-uri "Rhdf5lib" version))
10759 (sha256
10760 (base32
10761 "1y59acac6v8hrhv84gghn9ifsni9xxxacaj177rrl4frmkrz4x3c"))))
10762 (properties `((upstream-name . "Rhdf5lib")))
10763 (build-system r-build-system)
10764 (arguments
10765 `(#:phases
10766 (modify-phases %standard-phases
10767 (add-after 'unpack 'do-not-use-bundled-hdf5
10768 (lambda* (#:key inputs #:allow-other-keys)
10769 (for-each delete-file '("configure" "configure.ac"))
10770 ;; Do not make other packages link with the proprietary libsz.
10771 (substitute* "R/zzz.R"
10772 (("'%s/libhdf5_cpp.a %s/libhdf5.a %s/libsz.a -lz'")
10773 "'%s/libhdf5_cpp.a %s/libhdf5.a %s/libhdf5.a -lz'")
10774 (("'%s/libhdf5.a %s/libsz.a -lz'")
10775 "'%s/libhdf5.a %s/libhdf5.a -lz'"))
10776 (with-directory-excursion "src"
10777 (invoke "tar" "xvf" (assoc-ref inputs "hdf5-source"))
10778 (rename-file (string-append "hdf5-" ,(package-version hdf5))
10779 "hdf5")
10780 ;; Remove timestamp and host system information to make
10781 ;; the build reproducible.
10782 (substitute* "hdf5/src/libhdf5.settings.in"
10783 (("Configured on: @CONFIG_DATE@")
10784 "Configured on: Guix")
10785 (("Uname information:.*")
10786 "Uname information: Linux\n")
10787 ;; Remove unnecessary store reference.
10788 (("C Compiler:.*")
10789 "C Compiler: GCC\n"))
10790 (rename-file "Makevars.in" "Makevars")
10791 (substitute* "Makevars"
10792 (("HDF5_CXX_LIB=.*")
10793 (string-append "HDF5_CXX_LIB="
10794 (assoc-ref inputs "hdf5") "/lib/libhdf5_cpp.a\n"))
10795 (("HDF5_LIB=.*")
10796 (string-append "HDF5_LIB="
10797 (assoc-ref inputs "hdf5") "/lib/libhdf5.a\n"))
10798 (("HDF5_CXX_INCLUDE=.*") "HDF5_CXX_INCLUDE=./hdf5/c++/src\n")
10799 (("HDF5_INCLUDE=.*") "HDF5_INCLUDE=./hdf5/src\n")
10800 ;; szip is non-free software
10801 (("cp \\$\\{SZIP_LIB\\}.*") "")
10802 (("\\$\\{USER_LIB_DIR\\}libsz.a") "")))
10803 #t)))))
10804 (inputs
10805 `(("zlib" ,zlib)))
10806 (propagated-inputs
10807 `(("hdf5" ,hdf5)))
10808 (native-inputs
10809 `(("hdf5-source" ,(package-source hdf5))))
10810 (home-page "https://bioconductor.org/packages/Rhdf5lib")
10811 (synopsis "HDF5 library as an R package")
10812 (description "This package provides C and C++ HDF5 libraries for use in R
10813 packages.")
10814 (license license:artistic2.0)))
10815
10816 (define-public r-beachmat
10817 (package
10818 (name "r-beachmat")
10819 (version "1.2.1")
10820 (source
10821 (origin
10822 (method url-fetch)
10823 (uri (bioconductor-uri "beachmat" version))
10824 (sha256
10825 (base32
10826 "1w90v0jx1zgrfxzx99gdkk0dz2vi25hr51jml1bvq33i64rj7996"))))
10827 (build-system r-build-system)
10828 (inputs
10829 `(("hdf5" ,hdf5)
10830 ("zlib" ,zlib)))
10831 (propagated-inputs
10832 `(("r-delayedarray" ,r-delayedarray)
10833 ("r-hdf5array" ,r-hdf5array)
10834 ("r-rcpp" ,r-rcpp)
10835 ("r-rhdf5" ,r-rhdf5)
10836 ("r-rhdf5lib" ,r-rhdf5lib)))
10837 (home-page "https://bioconductor.org/packages/beachmat")
10838 (synopsis "Compiling Bioconductor to handle each matrix type")
10839 (description "This package provides a consistent C++ class interface for a
10840 variety of commonly used matrix types, including sparse and HDF5-backed
10841 matrices.")
10842 (license license:gpl3)))
10843
10844 (define-public r-singlecellexperiment
10845 (package
10846 (name "r-singlecellexperiment")
10847 (version "1.2.0")
10848 (source
10849 (origin
10850 (method url-fetch)
10851 (uri (bioconductor-uri "SingleCellExperiment" version))
10852 (sha256
10853 (base32
10854 "0mz3chia250v8v6q8r5cqv5fc4bpcw1hhrfr3p7l5i4xi85scpka"))))
10855 (properties
10856 `((upstream-name . "SingleCellExperiment")))
10857 (build-system r-build-system)
10858 (propagated-inputs
10859 `(("r-biocgenerics" ,r-biocgenerics)
10860 ("r-s4vectors" ,r-s4vectors)
10861 ("r-summarizedexperiment" ,r-summarizedexperiment)))
10862 (home-page "https://bioconductor.org/packages/SingleCellExperiment")
10863 (synopsis "S4 classes for single cell data")
10864 (description "This package defines an S4 class for storing data from
10865 single-cell experiments. This includes specialized methods to store and
10866 retrieve spike-in information, dimensionality reduction coordinates and size
10867 factors for each cell, along with the usual metadata for genes and
10868 libraries.")
10869 (license license:gpl3)))
10870
10871 (define-public r-scater
10872 (package
10873 (name "r-scater")
10874 (version "1.8.0")
10875 (source (origin
10876 (method url-fetch)
10877 (uri (bioconductor-uri "scater" version))
10878 (sha256
10879 (base32
10880 "0bhpikgz3b9f510dawsay4zry9rlp8vjx5n6zvwbcpwrd94p3903"))))
10881 (build-system r-build-system)
10882 (propagated-inputs
10883 `(("r-beachmat" ,r-beachmat)
10884 ("r-biobase" ,r-biobase)
10885 ("r-biocgenerics" ,r-biocgenerics)
10886 ("r-data-table" ,r-data-table)
10887 ("r-delayedarray" ,r-delayedarray)
10888 ("r-delayedmatrixstats" ,r-delayedmatrixstats)
10889 ("r-dplyr" ,r-dplyr)
10890 ("r-edger" ,r-edger)
10891 ("r-ggbeeswarm" ,r-ggbeeswarm)
10892 ("r-ggplot2" ,r-ggplot2)
10893 ("r-limma" ,r-limma)
10894 ("r-matrix" ,r-matrix)
10895 ("r-plyr" ,r-plyr)
10896 ("r-rcpp" ,r-rcpp)
10897 ("r-reshape2" ,r-reshape2)
10898 ("r-rhdf5" ,r-rhdf5)
10899 ("r-rhdf5lib" ,r-rhdf5lib)
10900 ("r-rjson" ,r-rjson)
10901 ("r-s4vectors" ,r-s4vectors)
10902 ("r-shiny" ,r-shiny)
10903 ("r-shinydashboard" ,r-shinydashboard)
10904 ("r-singlecellexperiment" ,r-singlecellexperiment)
10905 ("r-summarizedexperiment" ,r-summarizedexperiment)
10906 ("r-tximport" ,r-tximport)
10907 ("r-viridis" ,r-viridis)))
10908 (home-page "https://github.com/davismcc/scater")
10909 (synopsis "Single-cell analysis toolkit for gene expression data in R")
10910 (description "This package provides a collection of tools for doing
10911 various analyses of single-cell RNA-seq gene expression data, with a focus on
10912 quality control.")
10913 (license license:gpl2+)))
10914
10915 (define-public r-scran
10916 (package
10917 (name "r-scran")
10918 (version "1.8.2")
10919 (source
10920 (origin
10921 (method url-fetch)
10922 (uri (bioconductor-uri "scran" version))
10923 (sha256
10924 (base32
10925 "0nbn5x75gf9d0p18w7vpkbv30cpdqvp5bz8xvila0h7jla7xdyih"))))
10926 (build-system r-build-system)
10927 (propagated-inputs
10928 `(("r-beachmat" ,r-beachmat)
10929 ("r-biocgenerics" ,r-biocgenerics)
10930 ("r-biocparallel" ,r-biocparallel)
10931 ("r-delayedarray" ,r-delayedarray)
10932 ("r-delayedmatrixstats" ,r-delayedmatrixstats)
10933 ("r-dt" ,r-dt)
10934 ("r-dynamictreecut" ,r-dynamictreecut)
10935 ("r-edger" ,r-edger)
10936 ("r-fnn" ,r-fnn)
10937 ("r-ggplot2" ,r-ggplot2)
10938 ("r-igraph" ,r-igraph)
10939 ("r-limma" ,r-limma)
10940 ("r-matrix" ,r-matrix)
10941 ("r-rcpp" ,r-rcpp)
10942 ("r-rhdf5lib" ,r-rhdf5lib)
10943 ("r-s4vectors" ,r-s4vectors)
10944 ("r-scater" ,r-scater)
10945 ("r-shiny" ,r-shiny)
10946 ("r-singlecellexperiment" ,r-singlecellexperiment)
10947 ("r-statmod" ,r-statmod)
10948 ("r-summarizedexperiment" ,r-summarizedexperiment)
10949 ("r-viridis" ,r-viridis)))
10950 (home-page "https://bioconductor.org/packages/scran")
10951 (synopsis "Methods for single-cell RNA-Seq data analysis")
10952 (description "This package implements a variety of low-level analyses of
10953 single-cell RNA-seq data. Methods are provided for normalization of
10954 cell-specific biases, assignment of cell cycle phase, and detection of highly
10955 variable and significantly correlated genes.")
10956 (license license:gpl3)))
10957
10958 (define-public r-delayedmatrixstats
10959 (package
10960 (name "r-delayedmatrixstats")
10961 (version "1.2.0")
10962 (source
10963 (origin
10964 (method url-fetch)
10965 (uri (bioconductor-uri "DelayedMatrixStats" version))
10966 (sha256
10967 (base32
10968 "1dasghfy8x27zzmd0igag4mc1gxxxbchsl4hpc1050dj3wnw9w3y"))))
10969 (properties
10970 `((upstream-name . "DelayedMatrixStats")))
10971 (build-system r-build-system)
10972 (propagated-inputs
10973 `(("r-delayedarray" ,r-delayedarray)
10974 ("r-iranges" ,r-iranges)
10975 ("r-matrix" ,r-matrix)
10976 ("r-matrixstats" ,r-matrixstats)
10977 ("r-s4vectors" ,r-s4vectors)))
10978 (home-page "https://github.com/PeteHaitch/DelayedMatrixStats")
10979 (synopsis "Functions that apply to rows and columns of DelayedMatrix objects")
10980 (description
10981 "This package provides a port of the @code{matrixStats} API for use with
10982 @code{DelayedMatrix} objects from the @code{DelayedArray} package. It
10983 contains high-performing functions operating on rows and columns of
10984 @code{DelayedMatrix} objects, e.g. @code{colMedians}, @code{rowMedians},
10985 @code{colRanks}, @code{rowRanks}, @code{colSds}, and @code{rowSds}. Functions
10986 are optimized per data type and for subsetted calculations such that both
10987 memory usage and processing time is minimized.")
10988 (license license:expat)))
10989
10990 (define-public r-phangorn
10991 (package
10992 (name "r-phangorn")
10993 (version "2.4.0")
10994 (source
10995 (origin
10996 (method url-fetch)
10997 (uri (cran-uri "phangorn" version))
10998 (sha256
10999 (base32
11000 "0xc8k552nxczy19jr0xjjagrzc8x6lafasgk2c099ls8bc1yml1i"))))
11001 (build-system r-build-system)
11002 (propagated-inputs
11003 `(("r-ape" ,r-ape)
11004 ("r-fastmatch" ,r-fastmatch)
11005 ("r-igraph" ,r-igraph)
11006 ("r-magrittr" ,r-magrittr)
11007 ("r-matrix" ,r-matrix)
11008 ("r-quadprog" ,r-quadprog)
11009 ("r-rcpp" ,r-rcpp)))
11010 (home-page "https://github.com/KlausVigo/phangorn")
11011 (synopsis "Phylogenetic analysis in R")
11012 (description
11013 "Phangorn is a package for phylogenetic analysis in R. It supports
11014 estimation of phylogenetic trees and networks using Maximum Likelihood,
11015 Maximum Parsimony, distance methods and Hadamard conjugation.")
11016 (license license:gpl2+)))
11017
11018 (define-public r-dropbead
11019 (let ((commit "d746c6f3b32110428ea56d6a0001ce52a251c247")
11020 (revision "2"))
11021 (package
11022 (name "r-dropbead")
11023 (version (string-append "0-" revision "." (string-take commit 7)))
11024 (source
11025 (origin
11026 (method git-fetch)
11027 (uri (git-reference
11028 (url "https://github.com/rajewsky-lab/dropbead.git")
11029 (commit commit)))
11030 (file-name (git-file-name name version))
11031 (sha256
11032 (base32
11033 "0sbzma49aiiyw8b0jpr7fnhzys9nsqmp4hy4hdz1gzyg1lhnca26"))))
11034 (build-system r-build-system)
11035 (propagated-inputs
11036 `(("r-ggplot2" ,r-ggplot2)
11037 ("r-rcolorbrewer" ,r-rcolorbrewer)
11038 ("r-gridextra" ,r-gridextra)
11039 ("r-gplots" ,r-gplots)
11040 ("r-plyr" ,r-plyr)))
11041 (home-page "https://github.com/rajewsky-lab/dropbead")
11042 (synopsis "Basic exploration and analysis of Drop-seq data")
11043 (description "This package offers a quick and straight-forward way to
11044 explore and perform basic analysis of single cell sequencing data coming from
11045 droplet sequencing. It has been particularly tailored for Drop-seq.")
11046 (license license:gpl3))))
11047
11048 (define htslib-for-sambamba
11049 (let ((commit "2f3c3ea7b301f9b45737a793c0b2dcf0240e5ee5"))
11050 (package
11051 (inherit htslib)
11052 (name "htslib-for-sambamba")
11053 (version (string-append "1.3.1-1." (string-take commit 9)))
11054 (source
11055 (origin
11056 (method git-fetch)
11057 (uri (git-reference
11058 (url "https://github.com/lomereiter/htslib.git")
11059 (commit commit)))
11060 (file-name (string-append "htslib-" version "-checkout"))
11061 (sha256
11062 (base32
11063 "0g38g8s3npr0gjm9fahlbhiskyfws9l5i0x1ml3rakzj7az5l9c9"))))
11064 (native-inputs
11065 `(("autoconf" ,autoconf)
11066 ("automake" ,automake)
11067 ,@(package-native-inputs htslib))))))
11068
11069 (define-public sambamba
11070 (package
11071 (name "sambamba")
11072 (version "0.6.7-10-g223fa20")
11073 (source
11074 (origin
11075 (method git-fetch)
11076 (uri (git-reference
11077 (url "https://github.com/lomereiter/sambamba.git")
11078 (commit (string-append "v" version))))
11079 (file-name (string-append name "-" version "-checkout"))
11080 (sha256
11081 (base32
11082 "1zb9hrxglxqh13ava9wwri30cvf85hjnbn8ccnr8l60a3k5avczn"))))
11083 (build-system gnu-build-system)
11084 (arguments
11085 `(#:tests? #f ; there is no test target
11086 #:parallel-build? #f ; not supported
11087 #:phases
11088 (modify-phases %standard-phases
11089 (delete 'configure)
11090 (add-after 'unpack 'fix-ldc-version
11091 (lambda _
11092 (substitute* "gen_ldc_version_info.py"
11093 (("/usr/bin/env.*") (which "python")))
11094 (substitute* "Makefile"
11095 (("\\$\\(shell which ldmd2\\)") (which "ldmd2")))
11096 #t))
11097 (add-after 'unpack 'place-biod-and-undead
11098 (lambda* (#:key inputs #:allow-other-keys)
11099 (copy-recursively (assoc-ref inputs "biod") "BioD")
11100 (copy-recursively (assoc-ref inputs "undead") "undeaD")
11101 #t))
11102 (add-after 'unpack 'unbundle-prerequisites
11103 (lambda _
11104 (substitute* "Makefile"
11105 (("htslib/libhts.a lz4/lib/liblz4.a")
11106 "-L-lhts -L-llz4")
11107 ((" htslib-static lz4-static") ""))
11108 #t))
11109 (replace 'install
11110 (lambda* (#:key outputs #:allow-other-keys)
11111 (let* ((out (assoc-ref outputs "out"))
11112 (bin (string-append out "/bin")))
11113 (mkdir-p bin)
11114 (install-file "build/sambamba" bin)
11115 #t))))))
11116 (native-inputs
11117 `(("ldc" ,ldc)
11118 ("rdmd" ,rdmd)
11119 ("python" ,python2-minimal)
11120 ("biod"
11121 ,(let ((commit "c778e4f2d8bacea7499283ce39f5577b232732c6"))
11122 (origin
11123 (method git-fetch)
11124 (uri (git-reference
11125 (url "https://github.com/biod/BioD.git")
11126 (commit commit)))
11127 (file-name (string-append "biod-"
11128 (string-take commit 9)
11129 "-checkout"))
11130 (sha256
11131 (base32
11132 "1z90562hg47i63gx042wb3ak2vqjg5z7hwgn9bp2pdxfg3nxrw37")))))
11133 ("undead"
11134 ,(let ((commit "92803d25c88657e945511f0976a0c79d8da46e89"))
11135 (origin
11136 (method git-fetch)
11137 (uri (git-reference
11138 (url "https://github.com/dlang/undeaD.git")
11139 (commit commit)))
11140 (file-name (string-append "undead-"
11141 (string-take commit 9)
11142 "-checkout"))
11143 (sha256
11144 (base32
11145 "0vq6n81vzqvgphjw54lz2isc1j8lcxwjdbrhqz1h5gwrvw9w5138")))))))
11146 (inputs
11147 `(("lz4" ,lz4)
11148 ("htslib" ,htslib-for-sambamba)))
11149 (home-page "http://lomereiter.github.io/sambamba")
11150 (synopsis "Tools for working with SAM/BAM data")
11151 (description "Sambamba is a high performance modern robust and
11152 fast tool (and library), written in the D programming language, for
11153 working with SAM and BAM files. Current parallelised functionality is
11154 an important subset of samtools functionality, including view, index,
11155 sort, markdup, and depth.")
11156 (license license:gpl2+)))
11157
11158 (define-public ritornello
11159 (package
11160 (name "ritornello")
11161 (version "1.0.0")
11162 (source (origin
11163 (method url-fetch)
11164 (uri (string-append "https://github.com/KlugerLab/"
11165 "Ritornello/archive/v"
11166 version ".tar.gz"))
11167 (file-name (string-append name "-" version ".tar.gz"))
11168 (sha256
11169 (base32
11170 "02nik86gq9ljjriv6pamwlmqnfky3ads1fpklx6mc3hx6k40pg38"))))
11171 (build-system gnu-build-system)
11172 (arguments
11173 `(#:tests? #f ; there are no tests
11174 #:phases
11175 (modify-phases %standard-phases
11176 (add-after 'unpack 'patch-samtools-references
11177 (lambda* (#:key inputs #:allow-other-keys)
11178 (substitute* '("src/SamStream.h"
11179 "src/BufferedGenomeReader.h")
11180 (("<sam.h>") "<samtools/sam.h>"))
11181 #t))
11182 (delete 'configure)
11183 (replace 'install
11184 (lambda* (#:key inputs outputs #:allow-other-keys)
11185 (let* ((out (assoc-ref outputs "out"))
11186 (bin (string-append out "/bin/")))
11187 (mkdir-p bin)
11188 (install-file "bin/Ritornello" bin)
11189 #t))))))
11190 (inputs
11191 `(("samtools" ,samtools-0.1)
11192 ("fftw" ,fftw)
11193 ("boost" ,boost)
11194 ("zlib" ,zlib)))
11195 (home-page "https://github.com/KlugerLab/Ritornello")
11196 (synopsis "Control-free peak caller for ChIP-seq data")
11197 (description "Ritornello is a ChIP-seq peak calling algorithm based on
11198 signal processing that can accurately call binding events without the need to
11199 do a pair total DNA input or IgG control sample. It has been tested for use
11200 with narrow binding events such as transcription factor ChIP-seq.")
11201 (license license:gpl3+)))
11202
11203 (define-public trim-galore
11204 (package
11205 (name "trim-galore")
11206 (version "0.4.5")
11207 (source
11208 (origin
11209 (method git-fetch)
11210 (uri (git-reference
11211 (url "https://github.com/FelixKrueger/TrimGalore.git")
11212 (commit version)))
11213 (file-name (string-append name "-" version "-checkout"))
11214 (sha256
11215 (base32
11216 "0x5892l48c816pf00wmnz5vq0zq6170d3xc8zrxncd4jcz7h1p71"))))
11217 (build-system gnu-build-system)
11218 (arguments
11219 `(#:tests? #f ; no tests
11220 #:phases
11221 (modify-phases %standard-phases
11222 (delete 'configure)
11223 (delete 'build)
11224 (add-after 'unpack 'hardcode-tool-references
11225 (lambda* (#:key inputs #:allow-other-keys)
11226 (substitute* "trim_galore"
11227 (("\\$path_to_cutadapt = 'cutadapt'")
11228 (string-append "$path_to_cutadapt = '"
11229 (assoc-ref inputs "cutadapt")
11230 "/bin/cutadapt'"))
11231 (("\\| gzip")
11232 (string-append "| "
11233 (assoc-ref inputs "gzip")
11234 "/bin/gzip"))
11235 (("\"gunzip")
11236 (string-append "\""
11237 (assoc-ref inputs "gzip")
11238 "/bin/gunzip")))
11239 #t))
11240 (replace 'install
11241 (lambda* (#:key outputs #:allow-other-keys)
11242 (let ((bin (string-append (assoc-ref outputs "out")
11243 "/bin")))
11244 (mkdir-p bin)
11245 (install-file "trim_galore" bin)
11246 #t))))))
11247 (inputs
11248 `(("gzip" ,gzip)
11249 ("perl" ,perl)
11250 ("cutadapt" ,cutadapt)))
11251 (native-inputs
11252 `(("unzip" ,unzip)))
11253 (home-page "http://www.bioinformatics.babraham.ac.uk/projects/trim_galore/")
11254 (synopsis "Wrapper around Cutadapt and FastQC")
11255 (description "Trim Galore! is a wrapper script to automate quality and
11256 adapter trimming as well as quality control, with some added functionality to
11257 remove biased methylation positions for RRBS sequence files.")
11258 (license license:gpl3+)))
11259
11260 (define-public gess
11261 (package
11262 (name "gess")
11263 (version "1.0")
11264 (source (origin
11265 (method url-fetch)
11266 (uri (string-append "http://compbio.uthscsa.edu/"
11267 "GESS_Web/files/"
11268 "gess-" version ".src.tar.gz"))
11269 (sha256
11270 (base32
11271 "0hyk403kxscclzfs24pvdgiv0wm03kjcziqdrp5w46cb049gz0d7"))))
11272 (build-system gnu-build-system)
11273 (arguments
11274 `(#:tests? #f ; no tests
11275 #:phases
11276 (modify-phases %standard-phases
11277 (delete 'configure)
11278 (delete 'build)
11279 (replace 'install
11280 (lambda* (#:key inputs outputs #:allow-other-keys)
11281 (let* ((python (assoc-ref inputs "python"))
11282 (out (assoc-ref outputs "out"))
11283 (bin (string-append out "/bin/"))
11284 (target (string-append
11285 out "/lib/python2.7/site-packages/gess/")))
11286 (mkdir-p target)
11287 (copy-recursively "." target)
11288 ;; Make GESS.py executable
11289 (chmod (string-append target "GESS.py") #o555)
11290 ;; Add Python shebang to the top and make Matplotlib
11291 ;; usable.
11292 (substitute* (string-append target "GESS.py")
11293 (("\"\"\"Description:" line)
11294 (string-append "#!" (which "python") "
11295 import matplotlib
11296 matplotlib.use('Agg')
11297 " line)))
11298 ;; Make sure GESS has all modules in its path
11299 (wrap-program (string-append target "GESS.py")
11300 `("PYTHONPATH" ":" prefix (,target ,(getenv "PYTHONPATH"))))
11301 (mkdir-p bin)
11302 (symlink (string-append target "GESS.py")
11303 (string-append bin "GESS.py"))
11304 #t))))))
11305 (inputs
11306 `(("python" ,python-2)
11307 ("python2-pysam" ,python2-pysam)
11308 ("python2-scipy" ,python2-scipy)
11309 ("python2-numpy" ,python2-numpy)
11310 ("python2-networkx" ,python2-networkx)
11311 ("python2-biopython" ,python2-biopython)))
11312 (home-page "http://compbio.uthscsa.edu/GESS_Web/")
11313 (synopsis "Detect exon-skipping events from raw RNA-seq data")
11314 (description
11315 "GESS is an implementation of a novel computational method to detect de
11316 novo exon-skipping events directly from raw RNA-seq data without the prior
11317 knowledge of gene annotation information. GESS stands for the graph-based
11318 exon-skipping scanner detection scheme.")
11319 (license license:bsd-3)))
11320
11321 (define-public phylip
11322 (package
11323 (name "phylip")
11324 (version "3.696")
11325 (source
11326 (origin
11327 (method url-fetch)
11328 (uri (string-append "http://evolution.gs.washington.edu/phylip/"
11329 "download/phylip-" version ".tar.gz"))
11330 (sha256
11331 (base32
11332 "01jar1rayhr2gba2pgbw49m56rc5z4p5wn3ds0m188hrlln4a2nd"))))
11333 (build-system gnu-build-system)
11334 (arguments
11335 `(#:tests? #f ; no check target
11336 #:make-flags (list "-f" "Makefile.unx" "install")
11337 #:parallel-build? #f ; not supported
11338 #:phases
11339 (modify-phases %standard-phases
11340 (add-after 'unpack 'enter-dir
11341 (lambda _ (chdir "src") #t))
11342 (delete 'configure)
11343 (replace 'install
11344 (lambda* (#:key inputs outputs #:allow-other-keys)
11345 (let ((target (string-append (assoc-ref outputs "out")
11346 "/bin")))
11347 (mkdir-p target)
11348 (for-each (lambda (file)
11349 (install-file file target))
11350 (find-files "../exe" ".*")))
11351 #t)))))
11352 (home-page "http://evolution.genetics.washington.edu/phylip/")
11353 (synopsis "Tools for inferring phylogenies")
11354 (description "PHYLIP (the PHYLogeny Inference Package) is a package of
11355 programs for inferring phylogenies (evolutionary trees).")
11356 (license license:bsd-2)))
11357
11358 (define-public imp
11359 (package
11360 (name "imp")
11361 (version "2.6.2")
11362 (source
11363 (origin
11364 (method url-fetch)
11365 (uri (string-append "https://integrativemodeling.org/"
11366 version "/download/imp-" version ".tar.gz"))
11367 (sha256
11368 (base32
11369 "0lxqx7vh79d771svr611dkilp6sn30qrbw8zvscbrm37v38d2j6h"))))
11370 (build-system cmake-build-system)
11371 (arguments
11372 `(;; FIXME: Some tests fail because they produce warnings, others fail
11373 ;; because the PYTHONPATH does not include the modeller's directory.
11374 #:tests? #f))
11375 (inputs
11376 `(("boost" ,boost)
11377 ("gsl" ,gsl)
11378 ("swig" ,swig)
11379 ("hdf5" ,hdf5)
11380 ("fftw" ,fftw)
11381 ("python" ,python-2)))
11382 (propagated-inputs
11383 `(("python2-numpy" ,python2-numpy)
11384 ("python2-scipy" ,python2-scipy)
11385 ("python2-pandas" ,python2-pandas)
11386 ("python2-scikit-learn" ,python2-scikit-learn)
11387 ("python2-networkx" ,python2-networkx)))
11388 (home-page "https://integrativemodeling.org")
11389 (synopsis "Integrative modeling platform")
11390 (description "IMP's broad goal is to contribute to a comprehensive
11391 structural characterization of biomolecules ranging in size and complexity
11392 from small peptides to large macromolecular assemblies, by integrating data
11393 from diverse biochemical and biophysical experiments. IMP provides a C++ and
11394 Python toolbox for solving complex modeling problems, and a number of
11395 applications for tackling some common problems in a user-friendly way.")
11396 ;; IMP is largely available under the GNU Lesser GPL; see the file
11397 ;; COPYING.LGPL for the full text of this license. Some IMP modules are
11398 ;; available under the GNU GPL (see the file COPYING.GPL).
11399 (license (list license:lgpl2.1+
11400 license:gpl3+))))
11401
11402 (define-public tadbit
11403 (package
11404 (name "tadbit")
11405 (version "0.2")
11406 (source (origin
11407 (method url-fetch)
11408 (uri (string-append "https://github.com/3DGenomes/TADbit/"
11409 "archive/v" version ".tar.gz"))
11410 (file-name (string-append name "-" version ".tar.gz"))
11411 (sha256
11412 (base32
11413 "1cnfqrl4685zar4nnw94j94nhvl2h29jm448nadqi1h05z6fdk4f"))))
11414 (build-system python-build-system)
11415 (arguments
11416 `(;; Tests are included and must be run after installation, but
11417 ;; they are incomplete and thus cannot be run.
11418 #:tests? #f
11419 #:python ,python-2
11420 #:phases
11421 (modify-phases %standard-phases
11422 (add-after 'unpack 'fix-problems-with-setup.py
11423 (lambda* (#:key outputs #:allow-other-keys)
11424 ;; setup.py opens these files for writing
11425 (chmod "_pytadbit/_version.py" #o664)
11426 (chmod "README.rst" #o664)
11427
11428 ;; Don't attempt to install the bash completions to
11429 ;; the home directory.
11430 (rename-file "extras/.bash_completion"
11431 "extras/tadbit")
11432 (substitute* "setup.py"
11433 (("\\(path.expanduser\\('~'\\)")
11434 (string-append "(\""
11435 (assoc-ref outputs "out")
11436 "/etc/bash_completion.d\""))
11437 (("extras/\\.bash_completion")
11438 "extras/tadbit"))
11439 #t)))))
11440 (inputs
11441 ;; TODO: add Chimera for visualization
11442 `(("imp" ,imp)
11443 ("mcl" ,mcl)
11444 ("python2-scipy" ,python2-scipy)
11445 ("python2-numpy" ,python2-numpy)
11446 ("python2-matplotlib" ,python2-matplotlib)
11447 ("python2-pysam" ,python2-pysam)))
11448 (home-page "https://3dgenomes.github.io/TADbit/")
11449 (synopsis "Analyze, model, and explore 3C-based data")
11450 (description
11451 "TADbit is a complete Python library to deal with all steps to analyze,
11452 model, and explore 3C-based data. With TADbit the user can map FASTQ files to
11453 obtain raw interaction binned matrices (Hi-C like matrices), normalize and
11454 correct interaction matrices, identify and compare the so-called
11455 @dfn{Topologically Associating Domains} (TADs), build 3D models from the
11456 interaction matrices, and finally, extract structural properties from the
11457 models. TADbit is complemented by TADkit for visualizing 3D models.")
11458 (license license:gpl3+)))
11459
11460 (define-public kentutils
11461 (package
11462 (name "kentutils")
11463 ;; 302.1.0 is out, but the only difference is the inclusion of
11464 ;; pre-built binaries.
11465 (version "302.0.0")
11466 (source
11467 (origin
11468 (method url-fetch)
11469 (uri (string-append "https://github.com/ENCODE-DCC/kentUtils/"
11470 "archive/v" version ".tar.gz"))
11471 (file-name (string-append name "-" version ".tar.gz"))
11472 (sha256
11473 (base32
11474 "134aja3k1cj32kbk1nnw0q9gxjb2krr15q6sga8qldzvc0585rmm"))
11475 (modules '((guix build utils)
11476 (srfi srfi-26)
11477 (ice-9 ftw)))
11478 (snippet
11479 '(begin
11480 ;; Only the contents of the specified directories are free
11481 ;; for all uses, so we remove the rest. "hg/autoSql" and
11482 ;; "hg/autoXml" are nominally free, but they depend on a
11483 ;; library that is built from the sources in "hg/lib",
11484 ;; which is nonfree.
11485 (let ((free (list "." ".."
11486 "utils" "lib" "inc" "tagStorm"
11487 "parasol" "htslib"))
11488 (directory? (lambda (file)
11489 (eq? 'directory (stat:type (stat file))))))
11490 (for-each (lambda (file)
11491 (and (directory? file)
11492 (delete-file-recursively file)))
11493 (map (cut string-append "src/" <>)
11494 (scandir "src"
11495 (lambda (file)
11496 (not (member file free)))))))
11497 ;; Only make the utils target, not the userApps target,
11498 ;; because that requires libraries we won't build.
11499 (substitute* "Makefile"
11500 ((" userApps") " utils"))
11501 ;; Only build libraries that are free.
11502 (substitute* "src/makefile"
11503 (("DIRS =.*") "DIRS =\n")
11504 (("cd jkOwnLib.*") "")
11505 ((" hgLib") "")
11506 (("cd hg.*") ""))
11507 (substitute* "src/utils/makefile"
11508 ;; These tools depend on "jkhgap.a", which is part of the
11509 ;; nonfree "src/hg/lib" directory.
11510 (("raSqlQuery") "")
11511 (("pslLiftSubrangeBlat") "")
11512
11513 ;; Do not build UCSC tools, which may require nonfree
11514 ;; components.
11515 (("ALL_APPS =.*") "ALL_APPS = $(UTILS_APPLIST)\n"))
11516 #t))))
11517 (build-system gnu-build-system)
11518 (arguments
11519 `( ;; There is no global test target and the test target for
11520 ;; individual tools depends on input files that are not
11521 ;; included.
11522 #:tests? #f
11523 #:phases
11524 (modify-phases %standard-phases
11525 (add-after 'unpack 'fix-paths
11526 (lambda _
11527 (substitute* "Makefile"
11528 (("/bin/echo") (which "echo")))
11529 #t))
11530 (add-after 'unpack 'prepare-samtabix
11531 (lambda* (#:key inputs #:allow-other-keys)
11532 (copy-recursively (assoc-ref inputs "samtabix")
11533 "samtabix")
11534 #t))
11535 (delete 'configure)
11536 (replace 'install
11537 (lambda* (#:key outputs #:allow-other-keys)
11538 (let ((bin (string-append (assoc-ref outputs "out")
11539 "/bin")))
11540 (copy-recursively "bin" bin))
11541 #t)))))
11542 (native-inputs
11543 `(("samtabix"
11544 ,(origin
11545 (method git-fetch)
11546 (uri (git-reference
11547 (url "http://genome-source.cse.ucsc.edu/samtabix.git")
11548 (commit "10fd107909c1ac4d679299908be4262a012965ba")))
11549 (sha256
11550 (base32
11551 "0c1nj64l42v395sa84n7az43xiap4i6f9n9dfz4058aqiwkhkmma"))))))
11552 (inputs
11553 `(("zlib" ,zlib)
11554 ("tcsh" ,tcsh)
11555 ("perl" ,perl)
11556 ("libpng" ,libpng)
11557 ("mariadb" ,mariadb)
11558 ("openssl" ,openssl)))
11559 (home-page "http://genome.cse.ucsc.edu/index.html")
11560 (synopsis "Assorted bioinformatics utilities")
11561 (description "This package provides the kentUtils, a selection of
11562 bioinformatics utilities used in combination with the UCSC genome
11563 browser.")
11564 ;; Only a subset of the sources are released under a non-copyleft
11565 ;; free software license. All other sources are removed in a
11566 ;; snippet. See this bug report for an explanation of how the
11567 ;; license statements apply:
11568 ;; https://github.com/ENCODE-DCC/kentUtils/issues/12
11569 (license (license:non-copyleft
11570 "http://genome.ucsc.edu/license/"
11571 "The contents of this package are free for all uses."))))
11572
11573 (define-public f-seq
11574 (let ((commit "6ccded34cff38cf432deed8503648b4a66953f9b")
11575 (revision "1"))
11576 (package
11577 (name "f-seq")
11578 (version (string-append "1.1-" revision "." (string-take commit 7)))
11579 (source (origin
11580 (method git-fetch)
11581 (uri (git-reference
11582 (url "https://github.com/aboyle/F-seq.git")
11583 (commit commit)))
11584 (file-name (string-append name "-" version))
11585 (sha256
11586 (base32
11587 "1nk33k0yajg2id4g59bc4szr58r2q6pdq42vgcw054m8ip9wv26h"))
11588 (modules '((guix build utils)))
11589 ;; Remove bundled Java library archives.
11590 (snippet
11591 '(begin
11592 (for-each delete-file (find-files "lib" ".*"))
11593 #t))))
11594 (build-system ant-build-system)
11595 (arguments
11596 `(#:tests? #f ; no tests included
11597 #:phases
11598 (modify-phases %standard-phases
11599 (replace 'install
11600 (lambda* (#:key inputs outputs #:allow-other-keys)
11601 (let* ((target (assoc-ref outputs "out"))
11602 (bin (string-append target "/bin"))
11603 (doc (string-append target "/share/doc/f-seq"))
11604 (lib (string-append target "/lib")))
11605 (mkdir-p target)
11606 (mkdir-p doc)
11607 (substitute* "bin/linux/fseq"
11608 (("java") (which "java"))
11609 (("\\$REALDIR/../lib/commons-cli-1.1.jar")
11610 (string-append (assoc-ref inputs "java-commons-cli")
11611 "/share/java/commons-cli.jar"))
11612 (("REALDIR=.*")
11613 (string-append "REALDIR=" bin "\n")))
11614 (install-file "README.txt" doc)
11615 (install-file "bin/linux/fseq" bin)
11616 (install-file "build~/fseq.jar" lib)
11617 (copy-recursively "lib" lib)
11618 #t))))))
11619 (inputs
11620 `(("perl" ,perl)
11621 ("java-commons-cli" ,java-commons-cli)))
11622 (home-page "http://fureylab.web.unc.edu/software/fseq/")
11623 (synopsis "Feature density estimator for high-throughput sequence tags")
11624 (description
11625 "F-Seq is a software package that generates a continuous tag sequence
11626 density estimation allowing identification of biologically meaningful sites
11627 such as transcription factor binding sites (ChIP-seq) or regions of open
11628 chromatin (DNase-seq). Output can be displayed directly in the UCSC Genome
11629 Browser.")
11630 (license license:gpl3+))))
11631
11632 (define-public bismark
11633 (package
11634 (name "bismark")
11635 (version "0.19.1")
11636 (source
11637 (origin
11638 (method git-fetch)
11639 (uri (git-reference
11640 (url "https://github.com/FelixKrueger/Bismark.git")
11641 (commit version)))
11642 (file-name (string-append name "-" version "-checkout"))
11643 (sha256
11644 (base32
11645 "0yb5l36slwg02fp4b1jdlplgljcsxgqfzvzihzdnphd87dghcc84"))
11646 (snippet
11647 '(begin
11648 ;; highcharts.js is non-free software. The code is available under
11649 ;; CC-BY-NC or proprietary licenses only.
11650 (delete-file "bismark_sitrep/highcharts.js")
11651 #t))))
11652 (build-system perl-build-system)
11653 (arguments
11654 `(#:tests? #f ; there are no tests
11655 #:phases
11656 (modify-phases %standard-phases
11657 (delete 'configure)
11658 (delete 'build)
11659 (replace 'install
11660 (lambda* (#:key inputs outputs #:allow-other-keys)
11661 (let* ((out (assoc-ref outputs "out"))
11662 (bin (string-append out "/bin"))
11663 (share (string-append out "/share/bismark"))
11664 (docdir (string-append out "/share/doc/bismark"))
11665 (docs '("Docs/Bismark_User_Guide.html"))
11666 (scripts '("bismark"
11667 "bismark_genome_preparation"
11668 "bismark_methylation_extractor"
11669 "bismark2bedGraph"
11670 "bismark2report"
11671 "coverage2cytosine"
11672 "deduplicate_bismark"
11673 "filter_non_conversion"
11674 "bam2nuc"
11675 "bismark2summary")))
11676 (substitute* "bismark2report"
11677 (("\\$RealBin/bismark_sitrep")
11678 (string-append share "/bismark_sitrep")))
11679 (mkdir-p share)
11680 (mkdir-p docdir)
11681 (mkdir-p bin)
11682 (for-each (lambda (file) (install-file file bin))
11683 scripts)
11684 (for-each (lambda (file) (install-file file docdir))
11685 docs)
11686 (copy-recursively "Docs/Images" (string-append docdir "/Images"))
11687 (copy-recursively "bismark_sitrep"
11688 (string-append share "/bismark_sitrep"))
11689
11690 ;; Fix references to gunzip
11691 (substitute* (map (lambda (file)
11692 (string-append bin "/" file))
11693 scripts)
11694 (("\"gunzip -c")
11695 (string-append "\"" (assoc-ref inputs "gzip")
11696 "/bin/gunzip -c")))
11697 #t))))))
11698 (inputs
11699 `(("gzip" ,gzip)))
11700 (home-page "http://www.bioinformatics.babraham.ac.uk/projects/bismark/")
11701 (synopsis "Map bisulfite treated sequence reads and analyze methylation")
11702 (description "Bismark is a program to map bisulfite treated sequencing
11703 reads to a genome of interest and perform methylation calls in a single step.
11704 The output can be easily imported into a genome viewer, such as SeqMonk, and
11705 enables a researcher to analyse the methylation levels of their samples
11706 straight away. Its main features are:
11707
11708 @itemize
11709 @item Bisulfite mapping and methylation calling in one single step
11710 @item Supports single-end and paired-end read alignments
11711 @item Supports ungapped and gapped alignments
11712 @item Alignment seed length, number of mismatches etc are adjustable
11713 @item Output discriminates between cytosine methylation in CpG, CHG
11714 and CHH context
11715 @end itemize\n")
11716 (license license:gpl3+)))
11717
11718 (define-public paml
11719 (package
11720 (name "paml")
11721 (version "4.9e")
11722 (source (origin
11723 (method url-fetch)
11724 (uri (string-append "http://abacus.gene.ucl.ac.uk/software/"
11725 "paml" version ".tgz"))
11726 (sha256
11727 (base32
11728 "13zf6h9fiqghwhch2h06x1zdr6s42plsnqahflp5g7myr3han3s6"))
11729 (modules '((guix build utils)))
11730 ;; Remove Windows binaries
11731 (snippet
11732 '(begin
11733 (for-each delete-file (find-files "." "\\.exe$"))
11734 #t))))
11735 (build-system gnu-build-system)
11736 (arguments
11737 `(#:tests? #f ; there are no tests
11738 #:make-flags '("CC=gcc")
11739 #:phases
11740 (modify-phases %standard-phases
11741 (replace 'configure
11742 (lambda _
11743 (substitute* "src/BFdriver.c"
11744 (("/bin/bash") (which "bash")))
11745 (chdir "src")
11746 #t))
11747 (replace 'install
11748 (lambda* (#:key outputs #:allow-other-keys)
11749 (let ((tools '("baseml" "basemlg" "codeml"
11750 "pamp" "evolver" "yn00" "chi2"))
11751 (bin (string-append (assoc-ref outputs "out") "/bin"))
11752 (docdir (string-append (assoc-ref outputs "out")
11753 "/share/doc/paml")))
11754 (mkdir-p bin)
11755 (for-each (lambda (file) (install-file file bin)) tools)
11756 (copy-recursively "../doc" docdir)
11757 #t))))))
11758 (home-page "http://abacus.gene.ucl.ac.uk/software/paml.html")
11759 (synopsis "Phylogentic analysis by maximum likelihood")
11760 (description "PAML (for Phylogentic Analysis by Maximum Likelihood)
11761 contains a few programs for model fitting and phylogenetic tree reconstruction
11762 using nucleotide or amino-acid sequence data.")
11763 ;; GPLv3 only
11764 (license license:gpl3)))
11765
11766 (define-public kallisto
11767 (package
11768 (name "kallisto")
11769 (version "0.43.1")
11770 (source (origin
11771 (method url-fetch)
11772 (uri (string-append "https://github.com/pachterlab/"
11773 "kallisto/archive/v" version ".tar.gz"))
11774 (file-name (string-append name "-" version ".tar.gz"))
11775 (sha256
11776 (base32
11777 "03j3iqhvq7ya3c91gidly3k3jvgm97vjq4scihrlxh315j696r11"))))
11778 (build-system cmake-build-system)
11779 (arguments `(#:tests? #f)) ; no "check" target
11780 (inputs
11781 `(("hdf5" ,hdf5)
11782 ("zlib" ,zlib)))
11783 (home-page "http://pachterlab.github.io/kallisto/")
11784 (synopsis "Near-optimal RNA-Seq quantification")
11785 (description
11786 "Kallisto is a program for quantifying abundances of transcripts from
11787 RNA-Seq data, or more generally of target sequences using high-throughput
11788 sequencing reads. It is based on the novel idea of pseudoalignment for
11789 rapidly determining the compatibility of reads with targets, without the need
11790 for alignment. Pseudoalignment of reads preserves the key information needed
11791 for quantification, and kallisto is therefore not only fast, but also as
11792 accurate as existing quantification tools.")
11793 (license license:bsd-2)))
11794
11795 (define-public libgff
11796 (package
11797 (name "libgff")
11798 (version "1.0")
11799 (source (origin
11800 (method url-fetch)
11801 (uri (string-append
11802 "https://github.com/Kingsford-Group/"
11803 "libgff/archive/v" version ".tar.gz"))
11804 (file-name (string-append name "-" version ".tar.gz"))
11805 (sha256
11806 (base32
11807 "0vc4nxyhlm6g9vvmx5l4lfs5pnvixsv1hiiy4kddf2y3p6jna8ls"))))
11808 (build-system cmake-build-system)
11809 (arguments `(#:tests? #f)) ; no tests included
11810 (home-page "https://github.com/Kingsford-Group/libgff")
11811 (synopsis "Parser library for reading/writing GFF files")
11812 (description "This is a simple \"libraryfication\" of the GFF/GTF parsing
11813 code that is used in the Cufflinks codebase. The goal of this library is to
11814 provide this functionality without the necessity of drawing in a heavy-weight
11815 dependency like SeqAn.")
11816 (license (license:x11-style "https://www.boost.org/LICENSE_1_0.txt"))))
11817
11818 (define-public libdivsufsort
11819 (package
11820 (name "libdivsufsort")
11821 (version "2.0.1")
11822 (source (origin
11823 (method git-fetch)
11824 (uri (git-reference
11825 (url "https://github.com/y-256/libdivsufsort.git")
11826 (commit version)))
11827 (file-name (git-file-name name version))
11828 (sha256
11829 (base32
11830 "0fgdz9fzihlvjjrxy01md1bv9vh12rkgkwbm90b1hj5xpbaqp7z2"))))
11831 (build-system cmake-build-system)
11832 (arguments
11833 '(#:tests? #f ; there are no tests
11834 #:configure-flags
11835 ;; Needed for rapmap and sailfish.
11836 '("-DBUILD_DIVSUFSORT64=ON")))
11837 (home-page "https://github.com/y-256/libdivsufsort")
11838 (synopsis "Lightweight suffix-sorting library")
11839 (description "libdivsufsort is a software library that implements a
11840 lightweight suffix array construction algorithm. This library provides a
11841 simple and an efficient C API to construct a suffix array and a
11842 Burrows-Wheeler transformed string from a given string over a constant-size
11843 alphabet. The algorithm runs in O(n log n) worst-case time using only 5n+O(1)
11844 bytes of memory space, where n is the length of the string.")
11845 (license license:expat)))
11846
11847 (define-public sailfish
11848 (package
11849 (name "sailfish")
11850 (version "0.10.1")
11851 (source (origin
11852 (method url-fetch)
11853 (uri
11854 (string-append "https://github.com/kingsfordgroup/"
11855 "sailfish/archive/v" version ".tar.gz"))
11856 (file-name (string-append name "-" version ".tar.gz"))
11857 (sha256
11858 (base32
11859 "1inn60dxiwsz8g9w7kvfhjxj4bwfb0r12dyhpzzhfbig712dkmm0"))
11860 (modules '((guix build utils)))
11861 (snippet
11862 '(begin
11863 ;; Delete bundled headers for eigen3.
11864 (delete-file-recursively "include/eigen3/")
11865 #t))))
11866 (build-system cmake-build-system)
11867 (arguments
11868 `(#:configure-flags
11869 (list (string-append "-DBOOST_INCLUDEDIR="
11870 (assoc-ref %build-inputs "boost")
11871 "/include/")
11872 (string-append "-DBOOST_LIBRARYDIR="
11873 (assoc-ref %build-inputs "boost")
11874 "/lib/")
11875 (string-append "-DBoost_LIBRARIES="
11876 "-lboost_iostreams "
11877 "-lboost_filesystem "
11878 "-lboost_system "
11879 "-lboost_thread "
11880 "-lboost_timer "
11881 "-lboost_chrono "
11882 "-lboost_program_options")
11883 "-DBoost_FOUND=TRUE"
11884 ;; Don't download RapMap---we already have it!
11885 "-DFETCHED_RAPMAP=1")
11886 ;; Tests must be run after installation and the location of the test
11887 ;; data file must be overridden. But the tests fail. It looks like
11888 ;; they are not really meant to be run.
11889 #:tests? #f
11890 #:phases
11891 (modify-phases %standard-phases
11892 ;; Boost cannot be found, even though it's right there.
11893 (add-after 'unpack 'do-not-look-for-boost
11894 (lambda* (#:key inputs #:allow-other-keys)
11895 (substitute* "CMakeLists.txt"
11896 (("find_package\\(Boost 1\\.53\\.0") "#"))))
11897 (add-after 'unpack 'do-not-assign-to-macro
11898 (lambda _
11899 (substitute* "include/spdlog/details/format.cc"
11900 (("const unsigned CHAR_WIDTH = 1;") ""))))
11901 (add-after 'unpack 'prepare-rapmap
11902 (lambda* (#:key inputs #:allow-other-keys)
11903 (let ((src "external/install/src/rapmap/")
11904 (include "external/install/include/rapmap/")
11905 (rapmap (assoc-ref inputs "rapmap")))
11906 (mkdir-p "/tmp/rapmap")
11907 (system* "tar" "xf"
11908 (assoc-ref inputs "rapmap")
11909 "-C" "/tmp/rapmap"
11910 "--strip-components=1")
11911 (mkdir-p src)
11912 (mkdir-p include)
11913 (for-each (lambda (file)
11914 (install-file file src))
11915 (find-files "/tmp/rapmap/src" "\\.(c|cpp)"))
11916 (copy-recursively "/tmp/rapmap/include" include))))
11917 (add-after 'unpack 'use-system-libraries
11918 (lambda* (#:key inputs #:allow-other-keys)
11919 (substitute* '("src/SailfishIndexer.cpp"
11920 "src/SailfishUtils.cpp"
11921 "src/SailfishQuantify.cpp"
11922 "src/FASTAParser.cpp"
11923 "include/PCA.hpp"
11924 "include/SailfishUtils.hpp"
11925 "include/SailfishIndex.hpp"
11926 "include/CollapsedEMOptimizer.hpp"
11927 "src/CollapsedEMOptimizer.cpp")
11928 (("#include \"jellyfish/config.h\"") ""))
11929 (substitute* "src/CMakeLists.txt"
11930 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/include/jellyfish-2.2..")
11931 (string-append (assoc-ref inputs "jellyfish")
11932 "/include/jellyfish-" ,(package-version jellyfish)))
11933 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libjellyfish-2.0.a")
11934 (string-append (assoc-ref inputs "jellyfish")
11935 "/lib/libjellyfish-2.0.a"))
11936 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort.a")
11937 (string-append (assoc-ref inputs "libdivsufsort")
11938 "/lib/libdivsufsort.so"))
11939 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort64.a")
11940 (string-append (assoc-ref inputs "libdivsufsort")
11941 "/lib/libdivsufsort64.so")))
11942 (substitute* "CMakeLists.txt"
11943 ;; Don't prefer static libs
11944 (("SET\\(CMAKE_FIND_LIBRARY_SUFFIXES.*") "")
11945 (("find_package\\(Jellyfish.*") "")
11946 (("ExternalProject_Add\\(libjellyfish") "message(")
11947 (("ExternalProject_Add\\(libgff") "message(")
11948 (("ExternalProject_Add\\(libsparsehash") "message(")
11949 (("ExternalProject_Add\\(libdivsufsort") "message("))
11950
11951 ;; Ensure that Eigen headers can be found
11952 (setenv "CPLUS_INCLUDE_PATH"
11953 (string-append (getenv "CPLUS_INCLUDE_PATH")
11954 ":"
11955 (assoc-ref inputs "eigen")
11956 "/include/eigen3")))))))
11957 (inputs
11958 `(("boost" ,boost)
11959 ("eigen" ,eigen)
11960 ("jemalloc" ,jemalloc)
11961 ("jellyfish" ,jellyfish)
11962 ("sparsehash" ,sparsehash)
11963 ("rapmap" ,(origin
11964 (method git-fetch)
11965 (uri (git-reference
11966 (url "https://github.com/COMBINE-lab/RapMap.git")
11967 (commit (string-append "sf-v" version))))
11968 (file-name (string-append "rapmap-sf-v" version "-checkout"))
11969 (sha256
11970 (base32
11971 "1hv79l5i576ykv5a1srj2p0q36yvyl5966m0fcy2lbi169ipjakf"))
11972 (modules '((guix build utils)))
11973 ;; These files are expected to be excluded.
11974 (snippet
11975 '(begin (delete-file-recursively "include/spdlog")
11976 (for-each delete-file '("include/xxhash.h"
11977 "src/xxhash.c"))
11978 #t))))
11979 ("libdivsufsort" ,libdivsufsort)
11980 ("libgff" ,libgff)
11981 ("tbb" ,tbb)
11982 ("zlib" ,zlib)))
11983 (native-inputs
11984 `(("pkg-config" ,pkg-config)))
11985 (home-page "http://www.cs.cmu.edu/~ckingsf/software/sailfish")
11986 (synopsis "Mapping-based isoform quantification from RNA-Seq reads")
11987 (description "Sailfish is a tool for genomic transcript quantification
11988 from RNA-seq data. It requires a set of target transcripts (either from a
11989 reference or de-novo assembly) to quantify. All you need to run sailfish is a
11990 fasta file containing your reference transcripts and a (set of) fasta/fastq
11991 file(s) containing your reads.")
11992 (license license:gpl3+)))
11993
11994 (define libstadenio-for-salmon
11995 (package
11996 (name "libstadenio")
11997 (version "1.14.8")
11998 (source (origin
11999 (method git-fetch)
12000 (uri (git-reference
12001 (url "https://github.com/COMBINE-lab/staden-io_lib.git")
12002 (commit (string-append "v" version))))
12003 (file-name (string-append name "-" version "-checkout"))
12004 (sha256
12005 (base32
12006 "1x8kxxqxl892vwfbprlbyfwkkv7c34ggkc94892x9x0g37x5nbwx"))))
12007 (build-system gnu-build-system)
12008 (arguments '(#:parallel-tests? #f)) ; not supported
12009 (inputs
12010 `(("zlib" ,zlib)))
12011 (native-inputs
12012 `(("perl" ,perl))) ; for tests
12013 (home-page "https://github.com/COMBINE-lab/staden-io_lib")
12014 (synopsis "General purpose trace and experiment file library")
12015 (description "This package provides a library of file reading and writing
12016 code to provide a general purpose Trace file (and Experiment File) reading
12017 interface.
12018
12019 The following file formats are supported:
12020
12021 @enumerate
12022 @item SCF trace files
12023 @item ABI trace files
12024 @item ALF trace files
12025 @item ZTR trace files
12026 @item SFF trace archives
12027 @item SRF trace archives
12028 @item Experiment files
12029 @item Plain text files
12030 @item SAM/BAM sequence files
12031 @item CRAM sequence files
12032 @end enumerate\n")
12033 (license license:bsd-3)))
12034
12035 (define spdlog-for-salmon
12036 (package
12037 (name "spdlog")
12038 (version "0.14.0")
12039 (source (origin
12040 (method git-fetch)
12041 (uri (git-reference
12042 (url "https://github.com/COMBINE-lab/spdlog.git")
12043 (commit (string-append "v" version))))
12044 (file-name (string-append name "-" version "-checkout"))
12045 (sha256
12046 (base32
12047 "13730429gwlabi432ilpnja3sfvy0nn2719vnhhmii34xcdyc57q"))))
12048 (build-system cmake-build-system)
12049 (home-page "https://github.com/COMBINE-lab/spdlog")
12050 (synopsis "Very fast C++ logging library")
12051 (description "Spdlog is a very fast header-only C++ logging library with
12052 performance as its primary goal.")
12053 (license license:expat)))
12054
12055 ;; This is a modified variant of bwa for use with Salmon. It installs a
12056 ;; library to avoid having to build this as part of Salmon.
12057 (define bwa-for-salmon
12058 (package (inherit bwa)
12059 (name "bwa")
12060 (version "0.7.12.5")
12061 (source (origin
12062 (method git-fetch)
12063 (uri (git-reference
12064 (url "https://github.com/COMBINE-lab/bwa.git")
12065 (commit (string-append "v" version))))
12066 (file-name (string-append "bwa-for-salmon-" version "-checkout"))
12067 (sha256
12068 (base32
12069 "1z2qa64y0c5hky10510x137mnzlhz6k8qf27csw4w9j6qihq95gb"))))
12070 (build-system gnu-build-system)
12071 (arguments
12072 '(#:tests? #f ;no "check" target
12073 #:phases
12074 (modify-phases %standard-phases
12075 (replace 'install
12076 (lambda* (#:key outputs #:allow-other-keys)
12077 (let* ((out (assoc-ref outputs "out"))
12078 (bin (string-append out "/bin"))
12079 (lib (string-append out "/lib"))
12080 (doc (string-append out "/share/doc/bwa"))
12081 (man (string-append out "/share/man/man1"))
12082 (inc (string-append out "/include/bwa")))
12083 (install-file "bwa" bin)
12084 (install-file "README.md" doc)
12085 (install-file "bwa.1" man)
12086 (install-file "libbwa.a" lib)
12087 (mkdir-p lib)
12088 (mkdir-p inc)
12089 (for-each (lambda (file)
12090 (install-file file inc))
12091 (find-files "." "\\.h$")))
12092 #t))
12093 ;; no "configure" script
12094 (delete 'configure))))))
12095
12096 (define-public salmon
12097 (package
12098 (name "salmon")
12099 (version "0.9.1")
12100 (source (origin
12101 (method git-fetch)
12102 (uri (git-reference
12103 (url "https://github.com/COMBINE-lab/salmon.git")
12104 (commit (string-append "v" version))))
12105 (file-name (string-append name "-" version "-checkout"))
12106 (sha256
12107 (base32
12108 "1zi1ff4i7y2ykk0vdzysgwzzzv166vg2x77pj1mf4baclavxj87a"))
12109 (modules '((guix build utils)))
12110 (snippet
12111 '(begin
12112 ;; Delete bundled headers for eigen3.
12113 (delete-file-recursively "include/eigen3/")
12114 #t))))
12115 (build-system cmake-build-system)
12116 (arguments
12117 `(#:configure-flags
12118 (list (string-append "-DBOOST_INCLUDEDIR="
12119 (assoc-ref %build-inputs "boost")
12120 "/include/")
12121 (string-append "-DBOOST_LIBRARYDIR="
12122 (assoc-ref %build-inputs "boost")
12123 "/lib/")
12124 (string-append "-DBoost_LIBRARIES="
12125 "-lboost_iostreams "
12126 "-lboost_filesystem "
12127 "-lboost_system "
12128 "-lboost_thread "
12129 "-lboost_timer "
12130 "-lboost_chrono "
12131 "-lboost_program_options")
12132 "-DBoost_FOUND=TRUE"
12133 "-DTBB_LIBRARIES=tbb tbbmalloc"
12134 ;; Don't download RapMap---we already have it!
12135 "-DFETCHED_RAPMAP=1")
12136 #:phases
12137 (modify-phases %standard-phases
12138 ;; Boost cannot be found, even though it's right there.
12139 (add-after 'unpack 'do-not-look-for-boost
12140 (lambda* (#:key inputs #:allow-other-keys)
12141 (substitute* "CMakeLists.txt"
12142 (("find_package\\(Boost 1\\.53\\.0") "#"))))
12143 (add-after 'unpack 'do-not-phone-home
12144 (lambda _
12145 (substitute* "src/Salmon.cpp"
12146 (("getVersionMessage\\(\\)") "\"\""))))
12147 (add-after 'unpack 'prepare-rapmap
12148 (lambda* (#:key inputs #:allow-other-keys)
12149 (let ((src "external/install/src/rapmap/")
12150 (include "external/install/include/rapmap/")
12151 (rapmap (assoc-ref inputs "rapmap")))
12152 (mkdir-p src)
12153 (mkdir-p include)
12154 (for-each (lambda (file)
12155 (install-file file src))
12156 (find-files (string-append rapmap "/src") "\\.(c|cpp)"))
12157 (copy-recursively (string-append rapmap "/include") include)
12158 (for-each delete-file '("external/install/include/rapmap/xxhash.h"
12159 "external/install/include/rapmap/FastxParser.hpp"
12160 "external/install/include/rapmap/concurrentqueue.h"
12161 "external/install/include/rapmap/FastxParserThreadUtils.hpp"
12162 "external/install/src/rapmap/FastxParser.cpp"
12163 "external/install/src/rapmap/xxhash.c")))))
12164 (add-after 'unpack 'use-system-libraries
12165 (lambda* (#:key inputs #:allow-other-keys)
12166 (substitute* "src/CMakeLists.txt"
12167 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/include/jellyfish-2.2..")
12168 (string-append (assoc-ref inputs "jellyfish")
12169 "/include/jellyfish-" ,(package-version jellyfish)))
12170 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libjellyfish-2.0.a")
12171 (string-append (assoc-ref inputs "jellyfish")
12172 "/lib/libjellyfish-2.0.a"))
12173 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort.a")
12174 (string-append (assoc-ref inputs "libdivsufsort")
12175 "/lib/libdivsufsort.so"))
12176 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libstaden-read.a")
12177 (string-append (assoc-ref inputs "libstadenio-for-salmon")
12178 "/lib/libstaden-read.a"))
12179 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libbwa.a")
12180 (string-append (assoc-ref inputs "bwa") "/lib/libbwa.a"))
12181 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort64.a")
12182 (string-append (assoc-ref inputs "libdivsufsort")
12183 "/lib/libdivsufsort64.so")))
12184 (substitute* "CMakeLists.txt"
12185 ;; Don't prefer static libs
12186 (("SET\\(CMAKE_FIND_LIBRARY_SUFFIXES.*") "")
12187 (("set\\(TBB_LIBRARIES") "message(")
12188 (("find_package\\(Jellyfish.*") "")
12189 (("ExternalProject_Add\\(libcereal") "message(")
12190 (("ExternalProject_Add\\(libbwa") "message(")
12191 (("ExternalProject_Add\\(libjellyfish") "message(")
12192 (("ExternalProject_Add\\(libgff") "message(")
12193 (("ExternalProject_Add\\(libtbb") "message(")
12194 (("ExternalProject_Add\\(libspdlog") "message(")
12195 (("ExternalProject_Add\\(libdivsufsort") "message(")
12196 (("ExternalProject_Add\\(libstadenio") "message(")
12197 (("ExternalProject_Add_Step\\(") "message("))
12198
12199 ;; Ensure that all headers can be found
12200 (setenv "CPLUS_INCLUDE_PATH"
12201 (string-append (getenv "CPLUS_INCLUDE_PATH")
12202 ":"
12203 (assoc-ref inputs "bwa")
12204 "/include/bwa"
12205 ":"
12206 (assoc-ref inputs "eigen")
12207 "/include/eigen3"))
12208 (setenv "CPATH"
12209 (string-append (assoc-ref inputs "bwa")
12210 "/include/bwa"
12211 ":"
12212 (assoc-ref inputs "eigen")
12213 "/include/eigen3"))
12214 #t))
12215 ;; CMAKE_INSTALL_PREFIX does not exist when the tests are
12216 ;; run. It only exists after the install phase.
12217 (add-after 'unpack 'fix-tests
12218 (lambda _
12219 (substitute* "src/CMakeLists.txt"
12220 (("DTOPLEVEL_DIR=\\$\\{CMAKE_INSTALL_PREFIX")
12221 "DTOPLEVEL_DIR=${GAT_SOURCE_DIR"))
12222 #t)))))
12223 (inputs
12224 `(("boost" ,boost)
12225 ("bwa" ,bwa-for-salmon)
12226 ("bzip2" ,bzip2)
12227 ("cereal" ,cereal)
12228 ("eigen" ,eigen)
12229 ("rapmap" ,(origin
12230 (method git-fetch)
12231 (uri (git-reference
12232 (url "https://github.com/COMBINE-lab/RapMap.git")
12233 (commit (string-append "salmon-v" version))))
12234 (file-name (string-append "rapmap-salmon-v" version "-checkout"))
12235 (sha256
12236 (base32
12237 "1yc12yqsz6f0r8sg1qnk57xg34aqwc9jbqq6gd5ys28xw3plj98p"))))
12238 ("jemalloc" ,jemalloc)
12239 ("jellyfish" ,jellyfish)
12240 ("libgff" ,libgff)
12241 ("tbb" ,tbb)
12242 ("libdivsufsort" ,libdivsufsort)
12243 ("libstadenio-for-salmon" ,libstadenio-for-salmon)
12244 ("spdlog-for-salmon" ,spdlog-for-salmon)
12245 ("xz" ,xz)
12246 ("zlib" ,zlib)))
12247 (home-page "https://github.com/COMBINE-lab/salmon")
12248 (synopsis "Quantification from RNA-seq reads using lightweight alignments")
12249 (description "Salmon is a program to produce highly-accurate,
12250 transcript-level quantification estimates from RNA-seq data. Salmon achieves
12251 its accuracy and speed via a number of different innovations, including the
12252 use of lightweight alignments (accurate but fast-to-compute proxies for
12253 traditional read alignments) and massively-parallel stochastic collapsed
12254 variational inference.")
12255 (license license:gpl3+)))
12256
12257 (define-public python-loompy
12258 (package
12259 (name "python-loompy")
12260 (version "2.0.2")
12261 (source
12262 (origin
12263 (method url-fetch)
12264 (uri (pypi-uri "loompy" version))
12265 (sha256
12266 (base32
12267 "1drgv8j1hxqzzpnfg272x9djb6j8qr798w1pc2x8ikmfgyd9gh51"))))
12268 (build-system python-build-system)
12269 ;; There are no tests
12270 (arguments '(#:tests? #f))
12271 (propagated-inputs
12272 `(("python-h5py" ,python-h5py)
12273 ("python-numpy" ,python-numpy)
12274 ("python-scipy" ,python-scipy)
12275 ("python-typing" ,python-typing)))
12276 (home-page "https://github.com/linnarsson-lab/loompy")
12277 (synopsis "Work with .loom files for single-cell RNA-seq data")
12278 (description "The loom file format is an efficient format for very large
12279 omics datasets, consisting of a main matrix, optional additional layers, a
12280 variable number of row and column annotations. Loom also supports sparse
12281 graphs. This library makes it easy to work with @file{.loom} files for
12282 single-cell RNA-seq data.")
12283 (license license:bsd-3)))
12284
12285 ;; We cannot use the latest commit because it requires Java 9.
12286 (define-public java-forester
12287 (let ((commit "86b07efe302d5094b42deed9260f719a4c4ac2e6")
12288 (revision "1"))
12289 (package
12290 (name "java-forester")
12291 (version (string-append "0-" revision "." (string-take commit 7)))
12292 (source (origin
12293 (method git-fetch)
12294 (uri (git-reference
12295 (url "https://github.com/cmzmasek/forester.git")
12296 (commit commit)))
12297 (file-name (string-append name "-" version "-checkout"))
12298 (sha256
12299 (base32
12300 "0vxavc1yrf84yrnf20dq26hi0lglidk8d382xrxsy4qmlbjd276z"))
12301 (modules '((guix build utils)))
12302 (snippet
12303 '(begin
12304 ;; Delete bundled jars and pre-built classes
12305 (delete-file-recursively "forester/java/resources")
12306 (delete-file-recursively "forester/java/classes")
12307 (for-each delete-file (find-files "forester/java/" "\\.jar$"))
12308 ;; Delete bundled applications
12309 (delete-file-recursively "forester_applications")
12310 #t))))
12311 (build-system ant-build-system)
12312 (arguments
12313 `(#:tests? #f ; there are none
12314 #:jdk ,icedtea-8
12315 #:modules ((guix build ant-build-system)
12316 (guix build utils)
12317 (guix build java-utils)
12318 (sxml simple)
12319 (sxml transform))
12320 #:phases
12321 (modify-phases %standard-phases
12322 (add-after 'unpack 'chdir
12323 (lambda _ (chdir "forester/java") #t))
12324 (add-after 'chdir 'fix-dependencies
12325 (lambda _
12326 (chmod "build.xml" #o664)
12327 (call-with-output-file "build.xml.new"
12328 (lambda (port)
12329 (sxml->xml
12330 (pre-post-order
12331 (with-input-from-file "build.xml"
12332 (lambda _ (xml->sxml #:trim-whitespace? #t)))
12333 `(;; Remove all unjar tags to avoid repacking classes.
12334 (unjar . ,(lambda _ '()))
12335 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
12336 (*text* . ,(lambda (_ txt) txt))))
12337 port)))
12338 (rename-file "build.xml.new" "build.xml")
12339 #t))
12340 ;; FIXME: itext is difficult to package as it depends on a few
12341 ;; unpackaged libraries.
12342 (add-after 'chdir 'remove-dependency-on-unpackaged-itext
12343 (lambda _
12344 (delete-file "src/org/forester/archaeopteryx/PdfExporter.java")
12345 (substitute* "src/org/forester/archaeopteryx/MainFrame.java"
12346 (("pdf_written_to = PdfExporter.*")
12347 "throw new IOException(\"PDF export is not available.\");"))
12348 #t))
12349 ;; There is no install target
12350 (replace 'install (install-jars ".")))))
12351 (propagated-inputs
12352 `(("java-commons-codec" ,java-commons-codec)
12353 ("java-openchart2" ,java-openchart2)))
12354 (home-page "https://sites.google.com/site/cmzmasek/home/software/forester")
12355 (synopsis "Phylogenomics libraries for Java")
12356 (description "Forester is a collection of Java libraries for
12357 phylogenomics and evolutionary biology research. It includes support for
12358 reading, writing, and exporting phylogenetic trees.")
12359 (license license:lgpl2.1+))))
12360
12361 (define-public java-forester-1.005
12362 (package
12363 (name "java-forester")
12364 (version "1.005")
12365 (source (origin
12366 (method url-fetch)
12367 (uri (string-append "http://search.maven.org/remotecontent?"
12368 "filepath=org/biojava/thirdparty/forester/"
12369 version "/forester-" version "-sources.jar"))
12370 (file-name (string-append name "-" version ".jar"))
12371 (sha256
12372 (base32
12373 "04r8qv4rk3p71z4ajrvp11py1z46qrx0047j3zzs79s6lnsm3lcv"))))
12374 (build-system ant-build-system)
12375 (arguments
12376 `(#:tests? #f ; there are none
12377 #:jdk ,icedtea-8
12378 #:modules ((guix build ant-build-system)
12379 (guix build utils)
12380 (guix build java-utils)
12381 (sxml simple)
12382 (sxml transform))
12383 #:phases
12384 (modify-phases %standard-phases
12385 (add-after 'unpack 'fix-dependencies
12386 (lambda* (#:key inputs #:allow-other-keys)
12387 (call-with-output-file "build.xml"
12388 (lambda (port)
12389 (sxml->xml
12390 (pre-post-order
12391 (with-input-from-file "src/build.xml"
12392 (lambda _ (xml->sxml #:trim-whitespace? #t)))
12393 `(;; Remove all unjar tags to avoid repacking classes.
12394 (unjar . ,(lambda _ '()))
12395 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
12396 (*text* . ,(lambda (_ txt) txt))))
12397 port)))
12398 (copy-file (assoc-ref inputs "synth_look_and_feel_1.xml")
12399 "synth_look_and_feel_1.xml")
12400 (copy-file (assoc-ref inputs "phyloxml.xsd")
12401 "phyloxml.xsd")
12402 (substitute* "build.xml"
12403 (("../resources/synth_laf/synth_look_and_feel_1.xml")
12404 "synth_look_and_feel_1.xml")
12405 (("../resources/phyloxml_schema/1.10/phyloxml.xsd")
12406 "phyloxml.xsd"))
12407 #t))
12408 ;; FIXME: itext is difficult to package as it depends on a few
12409 ;; unpackaged libraries.
12410 (add-after 'unpack 'remove-dependency-on-unpackaged-itext
12411 (lambda _
12412 (delete-file "src/org/forester/archaeopteryx/PdfExporter.java")
12413 (substitute* '("src/org/forester/archaeopteryx/MainFrame.java"
12414 "src/org/forester/archaeopteryx/MainFrameApplication.java")
12415 (("pdf_written_to = PdfExporter.*")
12416 "throw new IOException(\"PDF export is not available.\"); /*")
12417 ((".getPrintSizeX\\(\\), getOptions\\(\\).getPrintSizeY\\(\\) \\);") "*/")
12418 (("getCurrentTreePanel\\(\\).getHeight\\(\\) \\);") "*/"))
12419 #t))
12420 (add-after 'unpack 'delete-pre-built-classes
12421 (lambda _ (delete-file-recursively "src/classes") #t))
12422 ;; There is no install target
12423 (replace 'install (install-jars ".")))))
12424 (propagated-inputs
12425 `(("java-commons-codec" ,java-commons-codec)
12426 ("java-openchart2" ,java-openchart2)))
12427 ;; The source archive does not contain the resources.
12428 (native-inputs
12429 `(("phyloxml.xsd"
12430 ,(origin
12431 (method url-fetch)
12432 (uri (string-append "https://raw.githubusercontent.com/cmzmasek/forester/"
12433 "b61cc2dcede0bede317db362472333115756b8c6/"
12434 "forester/resources/phyloxml_schema/1.10/phyloxml.xsd"))
12435 (file-name (string-append name "-phyloxml-" version ".xsd"))
12436 (sha256
12437 (base32
12438 "1zxc4m8sn4n389nqdnpxa8d0k17qnr3pm2y5y6g6vh4k0zm52npv"))))
12439 ("synth_look_and_feel_1.xml"
12440 ,(origin
12441 (method url-fetch)
12442 (uri (string-append "https://raw.githubusercontent.com/cmzmasek/forester/"
12443 "29e04321615da6b35c1e15c60e52caf3f21d8e6a/"
12444 "forester/java/classes/resources/synth_look_and_feel_1.xml"))
12445 (file-name (string-append name "-synth-look-and-feel-" version ".xml"))
12446 (sha256
12447 (base32
12448 "1gv5602gv4k7y7713y75a4jvj7i9s7nildsbdl7n9q10sc2ikg8h"))))))
12449 (home-page "https://sites.google.com/site/cmzmasek/home/software/forester")
12450 (synopsis "Phylogenomics libraries for Java")
12451 (description "Forester is a collection of Java libraries for
12452 phylogenomics and evolutionary biology research. It includes support for
12453 reading, writing, and exporting phylogenetic trees.")
12454 (license license:lgpl2.1+)))
12455
12456 (define-public java-biojava-core
12457 (package
12458 (name "java-biojava-core")
12459 (version "4.2.11")
12460 (source (origin
12461 (method git-fetch)
12462 (uri (git-reference
12463 (url "https://github.com/biojava/biojava")
12464 (commit (string-append "biojava-" version))))
12465 (file-name (string-append name "-" version "-checkout"))
12466 (sha256
12467 (base32
12468 "1bvryh2bpsvash8ln79cmc9sqm8qw72hz4xzwqxcrjm8ssxszhqk"))))
12469 (build-system ant-build-system)
12470 (arguments
12471 `(#:jdk ,icedtea-8
12472 #:jar-name "biojava-core.jar"
12473 #:source-dir "biojava-core/src/main/java/"
12474 #:test-dir "biojava-core/src/test"
12475 ;; These tests seem to require internet access.
12476 #:test-exclude (list "**/SearchIOTest.java"
12477 "**/BlastXMLParserTest.java"
12478 "**/GenbankCookbookTest.java"
12479 "**/GenbankProxySequenceReaderTest.java")
12480 #:phases
12481 (modify-phases %standard-phases
12482 (add-before 'build 'copy-resources
12483 (lambda _
12484 (copy-recursively "biojava-core/src/main/resources"
12485 "build/classes")
12486 #t))
12487 (add-before 'check 'copy-test-resources
12488 (lambda _
12489 (copy-recursively "biojava-core/src/test/resources"
12490 "build/test-classes")
12491 #t)))))
12492 (propagated-inputs
12493 `(("java-log4j-api" ,java-log4j-api)
12494 ("java-log4j-core" ,java-log4j-core)
12495 ("java-slf4j-api" ,java-slf4j-api)
12496 ("java-slf4j-simple" ,java-slf4j-simple)))
12497 (native-inputs
12498 `(("java-junit" ,java-junit)
12499 ("java-hamcrest-core" ,java-hamcrest-core)))
12500 (home-page "http://biojava.org")
12501 (synopsis "Core libraries of Java framework for processing biological data")
12502 (description "BioJava is a project dedicated to providing a Java framework
12503 for processing biological data. It provides analytical and statistical
12504 routines, parsers for common file formats, reference implementations of
12505 popular algorithms, and allows the manipulation of sequences and 3D
12506 structures. The goal of the biojava project is to facilitate rapid
12507 application development for bioinformatics.
12508
12509 This package provides the core libraries.")
12510 (license license:lgpl2.1+)))
12511
12512 (define-public java-biojava-phylo
12513 (package (inherit java-biojava-core)
12514 (name "java-biojava-phylo")
12515 (build-system ant-build-system)
12516 (arguments
12517 `(#:jdk ,icedtea-8
12518 #:jar-name "biojava-phylo.jar"
12519 #:source-dir "biojava-phylo/src/main/java/"
12520 #:test-dir "biojava-phylo/src/test"
12521 #:phases
12522 (modify-phases %standard-phases
12523 (add-before 'build 'copy-resources
12524 (lambda _
12525 (copy-recursively "biojava-phylo/src/main/resources"
12526 "build/classes")
12527 #t))
12528 (add-before 'check 'copy-test-resources
12529 (lambda _
12530 (copy-recursively "biojava-phylo/src/test/resources"
12531 "build/test-classes")
12532 #t)))))
12533 (propagated-inputs
12534 `(("java-log4j-api" ,java-log4j-api)
12535 ("java-log4j-core" ,java-log4j-core)
12536 ("java-slf4j-api" ,java-slf4j-api)
12537 ("java-slf4j-simple" ,java-slf4j-simple)
12538 ("java-biojava-core" ,java-biojava-core)
12539 ("java-forester" ,java-forester)))
12540 (native-inputs
12541 `(("java-junit" ,java-junit)
12542 ("java-hamcrest-core" ,java-hamcrest-core)))
12543 (home-page "http://biojava.org")
12544 (synopsis "Biojava interface to the forester phylogenomics library")
12545 (description "The phylo module provides a biojava interface layer to the
12546 forester phylogenomics library for constructing phylogenetic trees.")))
12547
12548 (define-public java-biojava-alignment
12549 (package (inherit java-biojava-core)
12550 (name "java-biojava-alignment")
12551 (build-system ant-build-system)
12552 (arguments
12553 `(#:jdk ,icedtea-8
12554 #:jar-name "biojava-alignment.jar"
12555 #:source-dir "biojava-alignment/src/main/java/"
12556 #:test-dir "biojava-alignment/src/test"
12557 #:phases
12558 (modify-phases %standard-phases
12559 (add-before 'build 'copy-resources
12560 (lambda _
12561 (copy-recursively "biojava-alignment/src/main/resources"
12562 "build/classes")
12563 #t))
12564 (add-before 'check 'copy-test-resources
12565 (lambda _
12566 (copy-recursively "biojava-alignment/src/test/resources"
12567 "build/test-classes")
12568 #t)))))
12569 (propagated-inputs
12570 `(("java-log4j-api" ,java-log4j-api)
12571 ("java-log4j-core" ,java-log4j-core)
12572 ("java-slf4j-api" ,java-slf4j-api)
12573 ("java-slf4j-simple" ,java-slf4j-simple)
12574 ("java-biojava-core" ,java-biojava-core)
12575 ("java-biojava-phylo" ,java-biojava-phylo)
12576 ("java-forester" ,java-forester)))
12577 (native-inputs
12578 `(("java-junit" ,java-junit)
12579 ("java-hamcrest-core" ,java-hamcrest-core)))
12580 (home-page "http://biojava.org")
12581 (synopsis "Biojava API for genetic sequence alignment")
12582 (description "The alignment module of BioJava provides an API that
12583 contains
12584
12585 @itemize
12586 @item implementations of dynamic programming algorithms for sequence
12587 alignment;
12588 @item reading and writing of popular alignment file formats;
12589 @item a single-, or multi- threaded multiple sequence alignment algorithm.
12590 @end itemize\n")))
12591
12592 (define-public java-biojava-core-4.0
12593 (package (inherit java-biojava-core)
12594 (name "java-biojava-core")
12595 (version "4.0.0")
12596 (source (origin
12597 (method git-fetch)
12598 (uri (git-reference
12599 (url "https://github.com/biojava/biojava")
12600 (commit (string-append "biojava-" version))))
12601 (file-name (string-append name "-" version "-checkout"))
12602 (sha256
12603 (base32
12604 "13675f6y9aqi7bi2lk3s1z7a22ynccjiqwa8izh7p97xi9wsfmd8"))))))
12605
12606 (define-public java-biojava-phylo-4.0
12607 (package (inherit java-biojava-core-4.0)
12608 (name "java-biojava-phylo")
12609 (build-system ant-build-system)
12610 (arguments
12611 `(#:jdk ,icedtea-8
12612 #:jar-name "biojava-phylo.jar"
12613 #:source-dir "biojava-phylo/src/main/java/"
12614 #:test-dir "biojava-phylo/src/test"
12615 #:phases
12616 (modify-phases %standard-phases
12617 (add-before 'build 'copy-resources
12618 (lambda _
12619 (copy-recursively "biojava-phylo/src/main/resources"
12620 "build/classes")
12621 #t))
12622 (add-before 'check 'copy-test-resources
12623 (lambda _
12624 (copy-recursively "biojava-phylo/src/test/resources"
12625 "build/test-classes")
12626 #t)))))
12627 (propagated-inputs
12628 `(("java-log4j-api" ,java-log4j-api)
12629 ("java-log4j-core" ,java-log4j-core)
12630 ("java-slf4j-api" ,java-slf4j-api)
12631 ("java-slf4j-simple" ,java-slf4j-simple)
12632 ("java-biojava-core" ,java-biojava-core-4.0)
12633 ("java-forester" ,java-forester-1.005)))
12634 (native-inputs
12635 `(("java-junit" ,java-junit)
12636 ("java-hamcrest-core" ,java-hamcrest-core)))
12637 (home-page "http://biojava.org")
12638 (synopsis "Biojava interface to the forester phylogenomics library")
12639 (description "The phylo module provides a biojava interface layer to the
12640 forester phylogenomics library for constructing phylogenetic trees.")))
12641
12642 (define-public java-biojava-alignment-4.0
12643 (package (inherit java-biojava-core-4.0)
12644 (name "java-biojava-alignment")
12645 (build-system ant-build-system)
12646 (arguments
12647 `(#:jdk ,icedtea-8
12648 #:jar-name "biojava-alignment.jar"
12649 #:source-dir "biojava-alignment/src/main/java/"
12650 #:test-dir "biojava-alignment/src/test"
12651 #:phases
12652 (modify-phases %standard-phases
12653 (add-before 'build 'copy-resources
12654 (lambda _
12655 (copy-recursively "biojava-alignment/src/main/resources"
12656 "build/classes")
12657 #t))
12658 (add-before 'check 'copy-test-resources
12659 (lambda _
12660 (copy-recursively "biojava-alignment/src/test/resources"
12661 "build/test-classes")
12662 #t)))))
12663 (propagated-inputs
12664 `(("java-log4j-api" ,java-log4j-api)
12665 ("java-log4j-core" ,java-log4j-core)
12666 ("java-slf4j-api" ,java-slf4j-api)
12667 ("java-slf4j-simple" ,java-slf4j-simple)
12668 ("java-biojava-core" ,java-biojava-core-4.0)
12669 ("java-biojava-phylo" ,java-biojava-phylo-4.0)
12670 ("java-forester" ,java-forester-1.005)))
12671 (native-inputs
12672 `(("java-junit" ,java-junit)
12673 ("java-hamcrest-core" ,java-hamcrest-core)))
12674 (home-page "http://biojava.org")
12675 (synopsis "Biojava API for genetic sequence alignment")
12676 (description "The alignment module of BioJava provides an API that
12677 contains
12678
12679 @itemize
12680 @item implementations of dynamic programming algorithms for sequence
12681 alignment;
12682 @item reading and writing of popular alignment file formats;
12683 @item a single-, or multi- threaded multiple sequence alignment algorithm.
12684 @end itemize\n")))
12685
12686 (define-public dropseq-tools
12687 (package
12688 (name "dropseq-tools")
12689 (version "1.13")
12690 (source
12691 (origin
12692 (method url-fetch)
12693 (uri "http://mccarrolllab.com/download/1276/")
12694 (file-name (string-append "dropseq-tools-" version ".zip"))
12695 (sha256
12696 (base32
12697 "0yrffckxqk5l8b5xb6z4laq157zd9mdypr2p4b4vq2bhjzi1sj0s"))
12698 ;; Delete bundled libraries
12699 (modules '((guix build utils)))
12700 (snippet
12701 '(begin
12702 (for-each delete-file (find-files "jar/lib" "\\.jar$"))
12703 (delete-file-recursively "3rdParty")
12704 #t))))
12705 (build-system ant-build-system)
12706 (arguments
12707 `(#:tests? #f ; test data are not included
12708 #:test-target "test"
12709 #:build-target "all"
12710 #:source-dir "public/src/"
12711 #:jdk ,icedtea-8
12712 #:make-flags
12713 (list (string-append "-Dpicard.executable.dir="
12714 (assoc-ref %build-inputs "java-picard")
12715 "/share/java/"))
12716 #:modules ((ice-9 match)
12717 (srfi srfi-1)
12718 (guix build utils)
12719 (guix build java-utils)
12720 (guix build ant-build-system))
12721 #:phases
12722 (modify-phases %standard-phases
12723 ;; FIXME: fails with "java.io.FileNotFoundException:
12724 ;; /gnu/store/…-dropseq-tools-1.13/share/java/lib/biojava-alignment.jar"
12725 (delete 'generate-jar-indices)
12726 ;; All dependencies must be linked to "lib", because that's where
12727 ;; they will be searched for when the Class-Path property of the
12728 ;; manifest is computed.
12729 (add-after 'unpack 'record-references
12730 (lambda* (#:key inputs #:allow-other-keys)
12731 (mkdir-p "jar/lib")
12732 (let ((dirs (filter-map (match-lambda
12733 ((name . dir)
12734 (if (and (string-prefix? "java-" name)
12735 (not (string=? name "java-testng")))
12736 dir #f)))
12737 inputs)))
12738 (for-each (lambda (jar)
12739 (symlink jar (string-append "jar/lib/" (basename jar))))
12740 (append-map (lambda (dir) (find-files dir "\\.jar$"))
12741 dirs)))
12742 #t))
12743 ;; There is no installation target
12744 (replace 'install
12745 (lambda* (#:key inputs outputs #:allow-other-keys)
12746 (let* ((out (assoc-ref outputs "out"))
12747 (bin (string-append out "/bin"))
12748 (share (string-append out "/share/java/"))
12749 (lib (string-append share "/lib/"))
12750 (scripts (list "BAMTagHistogram"
12751 "BAMTagofTagCounts"
12752 "BaseDistributionAtReadPosition"
12753 "CollapseBarcodesInPlace"
12754 "CollapseTagWithContext"
12755 "ConvertToRefFlat"
12756 "CreateIntervalsFiles"
12757 "DetectBeadSynthesisErrors"
12758 "DigitalExpression"
12759 "Drop-seq_alignment.sh"
12760 "FilterBAM"
12761 "FilterBAMByTag"
12762 "GatherGeneGCLength"
12763 "GatherMolecularBarcodeDistributionByGene"
12764 "GatherReadQualityMetrics"
12765 "PolyATrimmer"
12766 "ReduceGTF"
12767 "SelectCellsByNumTranscripts"
12768 "SingleCellRnaSeqMetricsCollector"
12769 "TagBamWithReadSequenceExtended"
12770 "TagReadWithGeneExon"
12771 "TagReadWithInterval"
12772 "TrimStartingSequence"
12773 "ValidateReference")))
12774 (for-each mkdir-p (list bin share lib))
12775 (install-file "dist/dropseq.jar" share)
12776 (for-each (lambda (script)
12777 (chmod script #o555)
12778 (install-file script bin))
12779 scripts)
12780 (substitute* (map (lambda (script)
12781 (string-append bin "/" script))
12782 scripts)
12783 (("^java") (which "java"))
12784 (("jar_deploy_dir=.*")
12785 (string-append "jar_deploy_dir=" share "\n"))))
12786 #t))
12787 ;; FIXME: We do this after stripping jars because we don't want it to
12788 ;; copy all these jars and strip them. We only want to install
12789 ;; links. Arguably, this is a problem with the ant-build-system.
12790 (add-after 'strip-jar-timestamps 'install-links
12791 (lambda* (#:key outputs #:allow-other-keys)
12792 (let* ((out (assoc-ref outputs "out"))
12793 (share (string-append out "/share/java/"))
12794 (lib (string-append share "/lib/")))
12795 (for-each (lambda (jar)
12796 (symlink (readlink jar)
12797 (string-append lib (basename jar))))
12798 (find-files "jar/lib" "\\.jar$")))
12799 #t)))))
12800 (inputs
12801 `(("jdk" ,icedtea-8)
12802 ("java-picard" ,java-picard-2.10.3)
12803 ("java-log4j-1.2-api" ,java-log4j-1.2-api)
12804 ("java-commons-math3" ,java-commons-math3)
12805 ("java-commons-jexl2" ,java-commons-jexl-2)
12806 ("java-commons-collections4" ,java-commons-collections4)
12807 ("java-commons-lang2" ,java-commons-lang)
12808 ("java-commons-io" ,java-commons-io)
12809 ("java-snappy-1.0.3-rc3" ,java-snappy-1)
12810 ("java-guava" ,java-guava)
12811 ("java-la4j" ,java-la4j)
12812 ("java-biojava-core" ,java-biojava-core-4.0)
12813 ("java-biojava-alignment" ,java-biojava-alignment-4.0)
12814 ("java-jdistlib" ,java-jdistlib)
12815 ("java-simple-xml" ,java-simple-xml)
12816 ("java-snakeyaml" ,java-snakeyaml)))
12817 (native-inputs
12818 `(("unzip" ,unzip)
12819 ("java-testng" ,java-testng)))
12820 (home-page "http://mccarrolllab.com/dropseq/")
12821 (synopsis "Tools for Drop-seq analyses")
12822 (description "Drop-seq is a technology to enable biologists to
12823 analyze RNA expression genome-wide in thousands of individual cells at
12824 once. This package provides tools to perform Drop-seq analyses.")
12825 (license license:expat)))
12826
12827 (define-public pigx-rnaseq
12828 (package
12829 (name "pigx-rnaseq")
12830 (version "0.0.4")
12831 (source (origin
12832 (method url-fetch)
12833 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_rnaseq/"
12834 "releases/download/v" version
12835 "/pigx_rnaseq-" version ".tar.gz"))
12836 (sha256
12837 (base32
12838 "16gla23rmziimqan7w494q0nr7vfbp42zzkrl9fracmr4k7b1kzr"))))
12839 (build-system gnu-build-system)
12840 (arguments
12841 `(#:parallel-tests? #f ; not supported
12842 #:phases
12843 (modify-phases %standard-phases
12844 ;; "test.sh" runs STAR, which requires excessive amounts of memory.
12845 (add-after 'unpack 'disable-resource-intensive-test
12846 (lambda _
12847 (substitute* "Makefile.in"
12848 (("(^ tests/test_trim_galore/test.sh).*" _ m) m)
12849 (("^ tests/test_multiqc/test.sh") "")
12850 (("^ test.sh") ""))
12851 #t)))))
12852 (inputs
12853 `(("gzip" ,gzip)
12854 ("snakemake" ,snakemake-4)
12855 ("fastqc" ,fastqc)
12856 ("multiqc" ,multiqc)
12857 ("star" ,star)
12858 ("trim-galore" ,trim-galore)
12859 ("htseq" ,htseq)
12860 ("samtools" ,samtools)
12861 ("bedtools" ,bedtools)
12862 ("r-minimal" ,r-minimal)
12863 ("r-rmarkdown" ,r-rmarkdown)
12864 ("r-ggplot2" ,r-ggplot2)
12865 ("r-ggrepel" ,r-ggrepel)
12866 ("r-gprofiler" ,r-gprofiler)
12867 ("r-deseq2" ,r-deseq2)
12868 ("r-dt" ,r-dt)
12869 ("r-knitr" ,r-knitr)
12870 ("r-pheatmap" ,r-pheatmap)
12871 ("r-corrplot" ,r-corrplot)
12872 ("r-reshape2" ,r-reshape2)
12873 ("r-plotly" ,r-plotly)
12874 ("r-scales" ,r-scales)
12875 ("r-summarizedexperiment" ,r-summarizedexperiment)
12876 ("r-crosstalk" ,r-crosstalk)
12877 ("r-tximport" ,r-tximport)
12878 ("r-rtracklayer" ,r-rtracklayer)
12879 ("r-rjson" ,r-rjson)
12880 ("salmon" ,salmon)
12881 ("ghc-pandoc" ,ghc-pandoc-1)
12882 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc-with-pandoc-1)
12883 ("python-wrapper" ,python-wrapper)
12884 ("python-pyyaml" ,python-pyyaml)))
12885 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
12886 (synopsis "Analysis pipeline for RNA sequencing experiments")
12887 (description "PiGX RNAseq is an analysis pipeline for preprocessing and
12888 reporting for RNA sequencing experiments. It is easy to use and produces high
12889 quality reports. The inputs are reads files from the sequencing experiment,
12890 and a configuration file which describes the experiment. In addition to
12891 quality control of the experiment, the pipeline produces a differential
12892 expression report comparing samples in an easily configurable manner.")
12893 (license license:gpl3+)))
12894
12895 (define-public pigx-chipseq
12896 (package
12897 (name "pigx-chipseq")
12898 (version "0.0.20")
12899 (source (origin
12900 (method url-fetch)
12901 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_chipseq/"
12902 "releases/download/v" version
12903 "/pigx_chipseq-" version ".tar.gz"))
12904 (sha256
12905 (base32
12906 "19a7dclqq0b4kqg3phiz4d4arlwfp34nm3z0rf1gkqdpsy7gghp3"))))
12907 (build-system gnu-build-system)
12908 ;; parts of the tests rely on access to the network
12909 (arguments '(#:tests? #f))
12910 (inputs
12911 `(("grep" ,grep)
12912 ("coreutils" ,coreutils)
12913 ("r-minimal" ,r-minimal)
12914 ("r-argparser" ,r-argparser)
12915 ("r-biocparallel" ,r-biocparallel)
12916 ("r-biostrings" ,r-biostrings)
12917 ("r-chipseq" ,r-chipseq)
12918 ("r-data-table" ,r-data-table)
12919 ("r-dplyr" ,r-dplyr)
12920 ("r-genomation" ,r-genomation)
12921 ("r-genomicalignments" ,r-genomicalignments)
12922 ("r-genomicranges" ,r-genomicranges)
12923 ("r-rsamtools" ,r-rsamtools)
12924 ("r-rtracklayer" ,r-rtracklayer)
12925 ("r-s4vectors" ,r-s4vectors)
12926 ("r-stringr" ,r-stringr)
12927 ("r-tibble" ,r-tibble)
12928 ("r-tidyr" ,r-tidyr)
12929 ("r-jsonlite" ,r-jsonlite)
12930 ("r-heatmaply" ,r-heatmaply)
12931 ("r-htmlwidgets" ,r-htmlwidgets)
12932 ("r-ggplot2" ,r-ggplot2)
12933 ("r-plotly" ,r-plotly)
12934 ("r-rmarkdown" ,r-rmarkdown)
12935 ("python-wrapper" ,python-wrapper)
12936 ("python-pyyaml" ,python-pyyaml)
12937 ("python-magic" ,python-magic)
12938 ("python-xlrd" ,python-xlrd)
12939 ("trim-galore" ,trim-galore)
12940 ("macs" ,macs)
12941 ("multiqc" ,multiqc)
12942 ("perl" ,perl)
12943 ("ghc-pandoc" ,ghc-pandoc-1)
12944 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc-with-pandoc-1)
12945 ("fastqc" ,fastqc)
12946 ("bowtie" ,bowtie)
12947 ("idr" ,idr)
12948 ("snakemake" ,snakemake-4)
12949 ("samtools" ,samtools)
12950 ("bedtools" ,bedtools)
12951 ("kentutils" ,kentutils)))
12952 (native-inputs
12953 `(("python-pytest" ,python-pytest)))
12954 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
12955 (synopsis "Analysis pipeline for ChIP sequencing experiments")
12956 (description "PiGX ChIPseq is an analysis pipeline for preprocessing, peak
12957 calling and reporting for ChIP sequencing experiments. It is easy to use and
12958 produces high quality reports. The inputs are reads files from the sequencing
12959 experiment, and a configuration file which describes the experiment. In
12960 addition to quality control of the experiment, the pipeline enables to set up
12961 multiple peak calling analysis and allows the generation of a UCSC track hub
12962 in an easily configurable manner.")
12963 (license license:gpl3+)))
12964
12965 (define-public pigx-bsseq
12966 (package
12967 (name "pigx-bsseq")
12968 (version "0.0.10")
12969 (source (origin
12970 (method url-fetch)
12971 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_bsseq/"
12972 "releases/download/v" version
12973 "/pigx_bsseq-" version ".tar.gz"))
12974 (sha256
12975 (base32
12976 "0l97wvkq4diq8lcarraj33bby1zzf0w804jwi8mlc5qddp8idwhy"))))
12977 (build-system gnu-build-system)
12978 (arguments
12979 `(#:phases
12980 (modify-phases %standard-phases
12981 (add-before 'check 'set-timezone
12982 ;; The readr package is picky about timezones.
12983 (lambda* (#:key inputs #:allow-other-keys)
12984 (setenv "TZ" "UTC+1")
12985 (setenv "TZDIR"
12986 (string-append (assoc-ref inputs "tzdata")
12987 "/share/zoneinfo"))
12988 #t)))))
12989 (native-inputs
12990 `(("tzdata" ,tzdata)))
12991 (inputs
12992 `(("coreutils" ,coreutils)
12993 ("sed" ,sed)
12994 ("grep" ,grep)
12995 ("r-minimal" ,r-minimal)
12996 ("r-annotationhub" ,r-annotationhub)
12997 ("r-dt" ,r-dt)
12998 ("r-genomation" ,r-genomation)
12999 ("r-methylkit" ,r-methylkit)
13000 ("r-rtracklayer" ,r-rtracklayer)
13001 ("r-rmarkdown" ,r-rmarkdown)
13002 ("r-bookdown" ,r-bookdown)
13003 ("r-ggplot2" ,r-ggplot2)
13004 ("r-ggbio" ,r-ggbio)
13005 ("ghc-pandoc" ,ghc-pandoc-1)
13006 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc-with-pandoc-1)
13007 ("python-wrapper" ,python-wrapper)
13008 ("python-pyyaml" ,python-pyyaml)
13009 ("snakemake" ,snakemake-4)
13010 ("bismark" ,bismark)
13011 ("fastqc" ,fastqc)
13012 ("bowtie" ,bowtie)
13013 ("trim-galore" ,trim-galore)
13014 ("cutadapt" ,cutadapt)
13015 ("samtools" ,samtools)))
13016 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
13017 (synopsis "Bisulfite sequencing pipeline from fastq to methylation reports")
13018 (description "PiGx BSseq is a data processing pipeline for raw fastq read
13019 data of bisulfite experiments; it produces reports on aggregate methylation
13020 and coverage and can be used to produce information on differential
13021 methylation and segmentation.")
13022 (license license:gpl3+)))
13023
13024 (define-public pigx-scrnaseq
13025 (package
13026 (name "pigx-scrnaseq")
13027 (version "0.0.7")
13028 (source (origin
13029 (method url-fetch)
13030 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_scrnaseq/"
13031 "releases/download/v" version
13032 "/pigx_scrnaseq-" version ".tar.gz"))
13033 (sha256
13034 (base32
13035 "131zarirv16w8653m0d66jgjnwqfsxqc0hix0rypssz4d83bl51j"))))
13036 (build-system gnu-build-system)
13037 (arguments
13038 `(#:configure-flags
13039 (list (string-append "PICARDJAR=" (assoc-ref %build-inputs "java-picard")
13040 "/share/java/picard.jar")
13041 (string-append "DROPSEQJAR=" (assoc-ref %build-inputs "dropseq-tools")
13042 "/share/java/dropseq.jar"))))
13043 (inputs
13044 `(("coreutils" ,coreutils)
13045 ("perl" ,perl)
13046 ("dropseq-tools" ,dropseq-tools)
13047 ("fastqc" ,fastqc)
13048 ("java-picard" ,java-picard-2.10.3) ; same as for dropseq
13049 ("java" ,icedtea-8)
13050 ("python-wrapper" ,python-wrapper)
13051 ("python-pyyaml" ,python-pyyaml)
13052 ("python-pandas" ,python-pandas)
13053 ("python-magic" ,python-magic)
13054 ("python-numpy" ,python-numpy)
13055 ("python-loompy" ,python-loompy)
13056 ("ghc-pandoc" ,ghc-pandoc-1)
13057 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc-with-pandoc-1)
13058 ("samtools" ,samtools)
13059 ("snakemake" ,snakemake-4)
13060 ("star" ,star)
13061 ("r-minimal" ,r-minimal)
13062 ("r-argparser" ,r-argparser)
13063 ("r-cowplot" ,r-cowplot)
13064 ("r-data-table" ,r-data-table)
13065 ("r-delayedarray" ,r-delayedarray)
13066 ("r-delayedmatrixstats" ,r-delayedmatrixstats)
13067 ("r-dplyr" ,r-dplyr)
13068 ("r-dropbead" ,r-dropbead)
13069 ("r-dt" ,r-dt)
13070 ("r-genomicalignments" ,r-genomicalignments)
13071 ("r-genomicfiles" ,r-genomicfiles)
13072 ("r-genomicranges" ,r-genomicranges)
13073 ("r-ggplot2" ,r-ggplot2)
13074 ("r-hdf5array" ,r-hdf5array)
13075 ("r-pheatmap" ,r-pheatmap)
13076 ("r-rmarkdown" ,r-rmarkdown)
13077 ("r-rsamtools" ,r-rsamtools)
13078 ("r-rtracklayer" ,r-rtracklayer)
13079 ("r-rtsne" ,r-rtsne)
13080 ("r-scater" ,r-scater)
13081 ("r-scran" ,r-scran)
13082 ("r-singlecellexperiment" ,r-singlecellexperiment)
13083 ("r-stringr" ,r-stringr)
13084 ("r-yaml" ,r-yaml)))
13085 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
13086 (synopsis "Analysis pipeline for single-cell RNA sequencing experiments")
13087 (description "PiGX scRNAseq is an analysis pipeline for preprocessing and
13088 quality control for single cell RNA sequencing experiments. The inputs are
13089 read files from the sequencing experiment, and a configuration file which
13090 describes the experiment. It produces processed files for downstream analysis
13091 and interactive quality reports. The pipeline is designed to work with UMI
13092 based methods.")
13093 (license license:gpl3+)))
13094
13095 (define-public pigx
13096 (package
13097 (name "pigx")
13098 (version "0.0.3")
13099 (source (origin
13100 (method url-fetch)
13101 (uri (string-append "https://github.com/BIMSBbioinfo/pigx/"
13102 "releases/download/v" version
13103 "/pigx-" version ".tar.gz"))
13104 (sha256
13105 (base32
13106 "1i5njdy1clj5ncw45d16p7mwmqvb1ilikl9n797pxklc3f4s7mq7"))))
13107 (build-system gnu-build-system)
13108 (inputs
13109 `(("python" ,python)
13110 ("pigx-bsseq" ,pigx-bsseq)
13111 ("pigx-chipseq" ,pigx-chipseq)
13112 ("pigx-rnaseq" ,pigx-rnaseq)
13113 ("pigx-scrnaseq" ,pigx-scrnaseq)))
13114 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
13115 (synopsis "Analysis pipelines for genomics")
13116 (description "PiGx is a collection of genomics pipelines. It includes the
13117 following pipelines:
13118
13119 @itemize
13120 @item PiGx BSseq for raw fastq read data of bisulfite experiments
13121 @item PiGx RNAseq for RNAseq samples
13122 @item PiGx scRNAseq for single cell dropseq analysis
13123 @item PiGx ChIPseq for reads from ChIPseq experiments
13124 @end itemize
13125
13126 All pipelines are easily configured with a simple sample sheet and a
13127 descriptive settings file. The result is a set of comprehensive, interactive
13128 HTML reports with interesting findings about your samples.")
13129 (license license:gpl3+)))
13130
13131 (define-public r-diversitree
13132 (package
13133 (name "r-diversitree")
13134 (version "0.9-10")
13135 (source
13136 (origin
13137 (method url-fetch)
13138 (uri (cran-uri "diversitree" version))
13139 (sha256
13140 (base32
13141 "0gh4rcrp0an3jh8915i1fsxlgyfk7njywgbd5ln5r2jhr085kpz7"))))
13142 (build-system r-build-system)
13143 (native-inputs
13144 `(("gfortran" ,gfortran)))
13145 (inputs `(("fftw" ,fftw) ("gsl" ,gsl)))
13146 (propagated-inputs
13147 `(("r-ape" ,r-ape)
13148 ("r-desolve" ,r-desolve)
13149 ("r-rcpp" ,r-rcpp)
13150 ("r-suplex" ,r-subplex)))
13151 (home-page "https://www.zoology.ubc.ca/prog/diversitree")
13152 (synopsis "Comparative 'phylogenetic' analyses of diversification")
13153 (description "This package contains a number of comparative \"phylogenetic\"
13154 methods, mostly focusing on analysing diversification and character evolution.
13155 Contains implementations of \"BiSSE\" (Binary State Speciation and Extinction)
13156 and its unresolved tree extensions, \"MuSSE\" (Multiple State Speciation and
13157 Extinction), \"QuaSSE\", \"GeoSSE\", and \"BiSSE-ness\" Other included methods
13158 include Markov models of discrete and continuous trait evolution and constant
13159 rate speciation and extinction.")
13160 (license license:gpl2+)))
13161
13162 (define-public sjcount
13163 ;; There is no tag for version 3.2, nor is there a release archive.
13164 (let ((commit "292d3917cadb3f6834c81e509c30e61cd7ead6e5")
13165 (revision "1"))
13166 (package
13167 (name "sjcount")
13168 (version (git-version "3.2" revision commit))
13169 (source (origin
13170 (method git-fetch)
13171 (uri (git-reference
13172 (url "https://github.com/pervouchine/sjcount-full.git")
13173 (commit commit)))
13174 (file-name (string-append name "-" version "-checkout"))
13175 (sha256
13176 (base32
13177 "0gdgj35j249f04rqgq8ymcc1xg1vi9kzbajnjqpaq2wpbh8bl234"))))
13178 (build-system gnu-build-system)
13179 (arguments
13180 `(#:tests? #f ; requires a 1.4G test file
13181 #:make-flags
13182 (list (string-append "SAMTOOLS_DIR="
13183 (assoc-ref %build-inputs "samtools")
13184 "/lib/"))
13185 #:phases
13186 (modify-phases %standard-phases
13187 (replace 'configure
13188 (lambda* (#:key inputs #:allow-other-keys)
13189 (substitute* "makefile"
13190 (("-I \\$\\{SAMTOOLS_DIR\\}")
13191 (string-append "-I" (assoc-ref inputs "samtools")
13192 "/include/samtools"))
13193 (("-lz ") "-lz -lpthread "))
13194 #t))
13195 (replace 'install
13196 (lambda* (#:key outputs #:allow-other-keys)
13197 (for-each (lambda (tool)
13198 (install-file tool
13199 (string-append (assoc-ref outputs "out")
13200 "/bin")))
13201 '("j_count" "b_count" "sjcount"))
13202 #t)))))
13203 (inputs
13204 `(("samtools" ,samtools-0.1)
13205 ("zlib" ,zlib)))
13206 (home-page "https://github.com/pervouchine/sjcount-full/")
13207 (synopsis "Annotation-agnostic splice junction counting pipeline")
13208 (description "Sjcount is a utility for fast quantification of splice
13209 junctions in RNA-seq data. It is annotation-agnostic and offset-aware. This
13210 version does count multisplits.")
13211 (license license:gpl3+))))
13212
13213 (define-public minimap2
13214 (package
13215 (name "minimap2")
13216 (version "2.10")
13217 (source
13218 (origin
13219 (method url-fetch)
13220 (uri (string-append "https://github.com/lh3/minimap2/"
13221 "releases/download/v" version "/"
13222 "minimap2-" version ".tar.bz2"))
13223 (sha256
13224 (base32
13225 "080w9066irkbhbyr4nmf19pzkdd2s4v31hpzlajgq2y0drr6zcsj"))))
13226 (build-system gnu-build-system)
13227 (arguments
13228 `(#:tests? #f ; there are none
13229 #:make-flags
13230 (list "CC=gcc"
13231 (let ((system ,(or (%current-target-system)
13232 (%current-system))))
13233 (cond
13234 ((string-prefix? "x86_64" system)
13235 "all")
13236 ((or (string-prefix? "armhf" system)
13237 (string-prefix? "aarch64" system))
13238 "arm_neon=1")
13239 (_ "sse2only=1"))))
13240 #:phases
13241 (modify-phases %standard-phases
13242 (delete 'configure)
13243 (replace 'install
13244 (lambda* (#:key outputs #:allow-other-keys)
13245 (let* ((out (assoc-ref outputs "out"))
13246 (bin (string-append out "/bin"))
13247 (man (string-append out "/share/man/man1")))
13248 (install-file "minimap2" bin)
13249 (mkdir-p man)
13250 (install-file "minimap2.1" man))
13251 #t)))))
13252 (inputs
13253 `(("zlib" ,zlib)))
13254 (home-page "https://lh3.github.io/minimap2/")
13255 (synopsis "Pairwise aligner for genomic and spliced nucleotide sequences")
13256 (description "Minimap2 is a versatile sequence alignment program that
13257 aligns DNA or mRNA sequences against a large reference database. Typical use
13258 cases include:
13259
13260 @enumerate
13261 @item mapping PacBio or Oxford Nanopore genomic reads to the human genome;
13262 @item finding overlaps between long reads with error rate up to ~15%;
13263 @item splice-aware alignment of PacBio Iso-Seq or Nanopore cDNA or Direct RNA
13264 reads against a reference genome;
13265 @item aligning Illumina single- or paired-end reads;
13266 @item assembly-to-assembly alignment;
13267 @item full-genome alignment between two closely related species with
13268 divergence below ~15%.
13269 @end enumerate\n")
13270 (license license:expat)))
13271
13272 (define-public r-circus
13273 (package
13274 (name "r-circus")
13275 (version "0.1.5")
13276 (source
13277 (origin
13278 (method git-fetch)
13279 (uri (git-reference
13280 (url "https://github.com/BIMSBbioinfo/ciRcus.git")
13281 (commit (string-append "v" version))))
13282 (file-name (git-file-name name version))
13283 (sha256
13284 (base32
13285 "0jhjn3ilb057hbf6yzrihj13ifxxs32y7nkby8l3lkm28dg4p97h"))))
13286 (build-system r-build-system)
13287 (propagated-inputs
13288 `(("r-annotationdbi" ,r-annotationdbi)
13289 ("r-annotationhub" ,r-annotationhub)
13290 ("r-biomart" ,r-biomart)
13291 ("r-data-table" ,r-data-table)
13292 ("r-dbi" ,r-dbi)
13293 ("r-genomicfeatures" ,r-genomicfeatures)
13294 ("r-genomicranges" ,r-genomicranges)
13295 ("r-ggplot2" ,r-ggplot2)
13296 ("r-hash" ,r-hash)
13297 ("r-iranges" ,r-iranges)
13298 ("r-rcolorbrewer" ,r-rcolorbrewer)
13299 ("r-rmysql" ,r-rmysql)
13300 ("r-s4vectors" ,r-s4vectors)
13301 ("r-stringr" ,r-stringr)
13302 ("r-summarizedexperiment" ,r-summarizedexperiment)))
13303 (native-inputs
13304 `(("r-knitr" ,r-knitr)))
13305 (home-page "https://github.com/BIMSBbioinfo/ciRcus")
13306 (synopsis "Annotation, analysis and visualization of circRNA data")
13307 (description "Circus is an R package for annotation, analysis and
13308 visualization of circRNA data. Users can annotate their circRNA candidates
13309 with host genes, gene featrues they are spliced from, and discriminate between
13310 known and yet unknown splice junctions. Circular-to-linear ratios of circRNAs
13311 can be calculated, and a number of descriptive plots easily generated.")
13312 (license license:artistic2.0)))
13313
13314 (define-public r-loomr
13315 (let ((commit "df0144bd2bbceca6fadef9edc1bbc5ca672d4739")
13316 (revision "1"))
13317 (package
13318 (name "r-loomr")
13319 (version (git-version "0.2.0" revision commit))
13320 (source
13321 (origin
13322 (method git-fetch)
13323 (uri (git-reference
13324 (url "https://github.com/mojaveazure/loomR.git")
13325 (commit commit)))
13326 (file-name (git-file-name name version))
13327 (sha256
13328 (base32
13329 "1b1g4dlmfdyhn56bz1mkh9ymirri43wiz7rjhs7py3y7bdw1s3yr"))))
13330 (build-system r-build-system)
13331 (propagated-inputs
13332 `(("r-r6" ,r-r6)
13333 ("r-hdf5r" ,r-hdf5r)
13334 ("r-iterators" ,r-iterators)
13335 ("r-itertools" ,r-itertools)
13336 ("r-matrix" ,r-matrix)))
13337 (home-page "https://github.com/mojaveazure/loomR")
13338 (synopsis "R interface for loom files")
13339 (description "This package provides an R interface to access, create,
13340 and modify loom files. loomR aims to be completely compatible with loompy.")
13341 (license license:gpl3))))
13342
13343 (define-public gffread
13344 ;; We cannot use the tagged release because it is not in sync with gclib.
13345 ;; See https://github.com/gpertea/gffread/issues/26
13346 (let ((commit "ba7535fcb3cea55a6e5a491d916e93b454e87fd0")
13347 (revision "1"))
13348 (package
13349 (name "gffread")
13350 (version (git-version "0.9.12" revision commit))
13351 (source
13352 (origin
13353 (method git-fetch)
13354 (uri (git-reference
13355 (url "https://github.com/gpertea/gffread.git")
13356 (commit commit)))
13357 (file-name (git-file-name name version))
13358 (sha256
13359 (base32
13360 "1dl2nbcg96lxpd0drg48ssa8343nf7pw9s9mkrc4mjjmfwsin3ki"))))
13361 (build-system gnu-build-system)
13362 (arguments
13363 `(#:tests? #f ; no check target
13364 #:make-flags
13365 (list "GCLDIR=gclib")
13366 #:phases
13367 (modify-phases %standard-phases
13368 (delete 'configure)
13369 (add-after 'unpack 'copy-gclib-source
13370 (lambda* (#:key inputs #:allow-other-keys)
13371 (mkdir-p "gclib")
13372 (copy-recursively (assoc-ref inputs "gclib-source") "gclib")
13373 #t))
13374 ;; There is no install target
13375 (replace 'install
13376 (lambda* (#:key outputs #:allow-other-keys)
13377 (let* ((out (assoc-ref outputs "out"))
13378 (bin (string-append out "/bin")))
13379 (install-file "gffread" bin))
13380 #t)))))
13381 (native-inputs
13382 `(("gclib-source"
13383 ,(let ((version "0.10.3")
13384 (commit "54917d0849c1e83cfb057b5f712e5cb6a35d948f")
13385 (revision "1"))
13386 (origin
13387 (method git-fetch)
13388 (uri (git-reference
13389 (url "https://github.com/gpertea/gclib.git")
13390 (commit commit)))
13391 (file-name (git-file-name "gclib" version))
13392 (sha256
13393 (base32
13394 "0b51lc0b8syrv7186fd7n8f15rwnf264qgfmm2palrwks1px24mr")))))))
13395 (home-page "https://github.com/gpertea/gffread/")
13396 (synopsis "Parse and convert GFF/GTF files")
13397 (description
13398 "This package provides a GFF/GTF file parsing utility providing format
13399 conversions, region filtering, FASTA sequence extraction and more.")
13400 ;; gffread is under Expat, but gclib is under Artistic 2.0
13401 (license (list license:expat
13402 license:artistic2.0)))))
13403
13404 (define-public find-circ
13405 ;; The last release was in 2015. The license was clarified in 2017, so we
13406 ;; take the latest commit.
13407 (let ((commit "8655dca54970fcf7e92e22fbf57e1188724dda7d")
13408 (revision "1"))
13409 (package
13410 (name "find-circ")
13411 (version (git-version "1.2" revision commit))
13412 (source
13413 (origin
13414 (method git-fetch)
13415 (uri (git-reference
13416 (url "https://github.com/marvin-jens/find_circ.git")
13417 (commit commit)))
13418 (file-name (git-file-name name version))
13419 (sha256
13420 (base32
13421 "0p77pbqbclqr4srms34y1b9b4njybfpjiknc11ki84f3p8skb3cg"))))
13422 (build-system gnu-build-system)
13423 (arguments
13424 `(#:tests? #f ; there are none
13425 #:phases
13426 ;; There is no actual build system.
13427 (modify-phases %standard-phases
13428 (delete 'configure)
13429 (delete 'build)
13430 (replace 'install
13431 (lambda* (#:key outputs #:allow-other-keys)
13432 (let* ((out (assoc-ref outputs "out"))
13433 (bin (string-append out "/bin"))
13434 (path (getenv "PYTHONPATH")))
13435 (for-each (lambda (script)
13436 (install-file script bin)
13437 (wrap-program (string-append bin "/" script)
13438 `("PYTHONPATH" ":" prefix (,path))))
13439 '("cmp_bed.py"
13440 "find_circ.py"
13441 "maxlength.py"
13442 "merge_bed.py"
13443 "unmapped2anchors.py")))
13444 #t)))))
13445 (inputs
13446 `(("python2" ,python-2)
13447 ("python2-pysam" ,python2-pysam)
13448 ("python2-numpy" ,python2-numpy)))
13449 (home-page "https://github.com/marvin-jens/find_circ")
13450 (synopsis "circRNA detection from RNA-seq reads")
13451 (description "This package provides tools to detect head-to-tail
13452 spliced (back-spliced) sequencing reads, indicative of circular RNA (circRNA)
13453 in RNA-seq data.")
13454 (license license:gpl3))))
13455
13456 (define-public python-scanpy
13457 (package
13458 (name "python-scanpy")
13459 (version "1.2.2")
13460 (source
13461 (origin
13462 (method url-fetch)
13463 (uri (pypi-uri "scanpy" version))
13464 (sha256
13465 (base32
13466 "1ak7bxms5a0yvf65prppq2g38clkv7c7jnjbnfpkh3xxv7q512jz"))))
13467 (build-system python-build-system)
13468 (propagated-inputs
13469 `(("python-anndata" ,python-anndata)
13470 ("python-igraph" ,python-igraph)
13471 ("python-numba" ,python-numba)
13472 ("python-joblib" ,python-joblib)
13473 ("python-natsort" ,python-natsort)
13474 ("python-networkx" ,python-networkx)
13475 ("python-statsmodels" ,python-statsmodels)
13476 ("python-scikit-learn" ,python-scikit-learn)
13477 ("python-matplotlib" ,python-matplotlib)
13478 ("python-pandas" ,python-pandas)
13479 ("python-scipy" ,python-scipy)
13480 ("python-seaborn" ,python-seaborn)
13481 ("python-h5py" ,python-h5py)
13482 ("python-tables" ,python-tables)))
13483 (home-page "http://github.com/theislab/scanpy")
13484 (synopsis "Single-Cell Analysis in Python.")
13485 (description "Scanpy is a scalable toolkit for analyzing single-cell gene
13486 expression data. It includes preprocessing, visualization, clustering,
13487 pseudotime and trajectory inference and differential expression testing. The
13488 Python-based implementation efficiently deals with datasets of more than one
13489 million cells.")
13490 (license license:bsd-3)))
13491
13492 (define-public gffcompare
13493 (let ((commit "be56ef4349ea3966c12c6397f85e49e047361c41")
13494 (revision "1"))
13495 (package
13496 (name "gffcompare")
13497 (version (git-version "0.10.15" revision commit))
13498 (source
13499 (origin
13500 (method git-fetch)
13501 (uri (git-reference
13502 (url "https://github.com/gpertea/gffcompare/")
13503 (commit commit)))
13504 (file-name (git-file-name name version))
13505 (sha256
13506 (base32 "0cp5qpxdhw4mxpya5dld8wi3jk00zyklm6rcri426wydinrnfmkg"))))
13507 (build-system gnu-build-system)
13508 (arguments
13509 `(#:tests? #f ; no check target
13510 #:phases
13511 (modify-phases %standard-phases
13512 (delete 'configure)
13513 (add-before 'build 'copy-gclib-source
13514 (lambda* (#:key inputs #:allow-other-keys)
13515 (mkdir "../gclib")
13516 (copy-recursively
13517 (assoc-ref inputs "gclib-source") "../gclib")
13518 #t))
13519 (replace 'install
13520 (lambda* (#:key outputs #:allow-other-keys)
13521 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
13522 (install-file "gffcompare" bin)
13523 #t))))))
13524 (native-inputs
13525 `(("gclib-source" ; see 'README.md' of gffcompare
13526 ,(let ((commit "54917d0849c1e83cfb057b5f712e5cb6a35d948f")
13527 (revision "1")
13528 (name "gclib")
13529 (version (git-version "0.10.3" revision commit)))
13530 (origin
13531 (method git-fetch)
13532 (uri (git-reference
13533 (url "https://github.com/gpertea/gclib/")
13534 (commit commit)))
13535 (file-name (git-file-name name version))
13536 (sha256
13537 (base32 "0b51lc0b8syrv7186fd7n8f15rwnf264qgfmm2palrwks1px24mr")))))))
13538 (home-page "https://github.com/gpertea/gffcompare/")
13539 (synopsis "Tool for comparing or classifing transcripts of RNA-Seq")
13540 (description
13541 "@code{gffcompare} is a tool that can:
13542 @enumerate
13543 @item compare and evaluate the accuracy of RNA-Seq transcript assemblers
13544 (Cufflinks, Stringtie);
13545 @item collapse (merge) duplicate transcripts from multiple GTF/GFF3 files (e.g.
13546 resulted from assembly of different samples);
13547 @item classify transcripts from one or multiple GTF/GFF3 files as they relate to
13548 reference transcripts provided in a annotation file (also in GTF/GFF3 format).
13549 @end enumerate")
13550 (license
13551 (list
13552 license:expat ;license for gffcompare
13553 license:artistic2.0))))) ;license for gclib