gnu: Add libusbmuxd.
[jackhill/guix/guix.git] / gnu / packages / bioinformatics.scm
1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019 Ricardo Wurmus <rekado@elephly.net>
3 ;;; Copyright © 2015, 2016, 2017, 2018 Ben Woodcroft <donttrustben@gmail.com>
4 ;;; Copyright © 2015, 2016 Pjotr Prins <pjotr.guix@thebird.nl>
5 ;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
6 ;;; Copyright © 2016 Roel Janssen <roel@gnu.org>
7 ;;; Copyright © 2016, 2017, 2018, 2019 Efraim Flashner <efraim@flashner.co.il>
8 ;;; Copyright © 2016 Marius Bakke <mbakke@fastmail.com>
9 ;;; Copyright © 2016, 2018 Raoul Bonnal <ilpuccio.febo@gmail.com>
10 ;;; Copyright © 2017, 2018 Tobias Geerinckx-Rice <me@tobias.gr>
11 ;;; Copyright © 2017 Arun Isaac <arunisaac@systemreboot.net>
12 ;;; Copyright © 2018 Joshua Sierles, Nextjournal <joshua@nextjournal.com>
13 ;;; Copyright © 2018 Gábor Boskovits <boskovits@gmail.com>
14 ;;; Copyright © 2018 Mădălin Ionel Patrașcu <madalinionel.patrascu@mdc-berlin.de>
15 ;;;
16 ;;; This file is part of GNU Guix.
17 ;;;
18 ;;; GNU Guix is free software; you can redistribute it and/or modify it
19 ;;; under the terms of the GNU General Public License as published by
20 ;;; the Free Software Foundation; either version 3 of the License, or (at
21 ;;; your option) any later version.
22 ;;;
23 ;;; GNU Guix is distributed in the hope that it will be useful, but
24 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
25 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 ;;; GNU General Public License for more details.
27 ;;;
28 ;;; You should have received a copy of the GNU General Public License
29 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
30
31 (define-module (gnu packages bioinformatics)
32 #:use-module ((guix licenses) #:prefix license:)
33 #:use-module (guix packages)
34 #:use-module (guix utils)
35 #:use-module (guix download)
36 #:use-module (guix git-download)
37 #:use-module (guix hg-download)
38 #:use-module (guix build-system ant)
39 #:use-module (guix build-system gnu)
40 #:use-module (guix build-system cmake)
41 #:use-module (guix build-system haskell)
42 #:use-module (guix build-system ocaml)
43 #:use-module (guix build-system perl)
44 #:use-module (guix build-system python)
45 #:use-module (guix build-system r)
46 #:use-module (guix build-system ruby)
47 #:use-module (guix build-system scons)
48 #:use-module (guix build-system trivial)
49 #:use-module (gnu packages)
50 #:use-module (gnu packages autotools)
51 #:use-module (gnu packages algebra)
52 #:use-module (gnu packages base)
53 #:use-module (gnu packages bash)
54 #:use-module (gnu packages bison)
55 #:use-module (gnu packages bioconductor)
56 #:use-module (gnu packages boost)
57 #:use-module (gnu packages check)
58 #:use-module (gnu packages compression)
59 #:use-module (gnu packages cpio)
60 #:use-module (gnu packages cran)
61 #:use-module (gnu packages curl)
62 #:use-module (gnu packages documentation)
63 #:use-module (gnu packages databases)
64 #:use-module (gnu packages datastructures)
65 #:use-module (gnu packages file)
66 #:use-module (gnu packages flex)
67 #:use-module (gnu packages gawk)
68 #:use-module (gnu packages gcc)
69 #:use-module (gnu packages gd)
70 #:use-module (gnu packages gtk)
71 #:use-module (gnu packages glib)
72 #:use-module (gnu packages graph)
73 #:use-module (gnu packages groff)
74 #:use-module (gnu packages guile)
75 #:use-module (gnu packages haskell)
76 #:use-module (gnu packages haskell-check)
77 #:use-module (gnu packages haskell-web)
78 #:use-module (gnu packages image)
79 #:use-module (gnu packages imagemagick)
80 #:use-module (gnu packages java)
81 #:use-module (gnu packages java-compression)
82 #:use-module (gnu packages jemalloc)
83 #:use-module (gnu packages dlang)
84 #:use-module (gnu packages linux)
85 #:use-module (gnu packages logging)
86 #:use-module (gnu packages machine-learning)
87 #:use-module (gnu packages man)
88 #:use-module (gnu packages maths)
89 #:use-module (gnu packages mpi)
90 #:use-module (gnu packages ncurses)
91 #:use-module (gnu packages ocaml)
92 #:use-module (gnu packages pcre)
93 #:use-module (gnu packages parallel)
94 #:use-module (gnu packages pdf)
95 #:use-module (gnu packages perl)
96 #:use-module (gnu packages perl-check)
97 #:use-module (gnu packages pkg-config)
98 #:use-module (gnu packages popt)
99 #:use-module (gnu packages protobuf)
100 #:use-module (gnu packages python)
101 #:use-module (gnu packages python-compression)
102 #:use-module (gnu packages python-web)
103 #:use-module (gnu packages python-xyz)
104 #:use-module (gnu packages readline)
105 #:use-module (gnu packages ruby)
106 #:use-module (gnu packages serialization)
107 #:use-module (gnu packages shells)
108 #:use-module (gnu packages statistics)
109 #:use-module (gnu packages swig)
110 #:use-module (gnu packages tbb)
111 #:use-module (gnu packages tex)
112 #:use-module (gnu packages texinfo)
113 #:use-module (gnu packages textutils)
114 #:use-module (gnu packages time)
115 #:use-module (gnu packages tls)
116 #:use-module (gnu packages vim)
117 #:use-module (gnu packages web)
118 #:use-module (gnu packages xml)
119 #:use-module (gnu packages xorg)
120 #:use-module (srfi srfi-1)
121 #:use-module (ice-9 match))
122
123 (define-public aragorn
124 (package
125 (name "aragorn")
126 (version "1.2.38")
127 (source (origin
128 (method url-fetch)
129 (uri (string-append
130 "http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
131 version ".tgz"))
132 (sha256
133 (base32
134 "09i1rg716smlbnixfm7q1ml2mfpaa2fpn3hwjg625ysmfwwy712b"))))
135 (build-system gnu-build-system)
136 (arguments
137 `(#:tests? #f ; there are no tests
138 #:phases
139 (modify-phases %standard-phases
140 (delete 'configure)
141 (replace 'build
142 (lambda _
143 (invoke "gcc"
144 "-O3"
145 "-ffast-math"
146 "-finline-functions"
147 "-o"
148 "aragorn"
149 (string-append "aragorn" ,version ".c"))
150 #t))
151 (replace 'install
152 (lambda* (#:key outputs #:allow-other-keys)
153 (let* ((out (assoc-ref outputs "out"))
154 (bin (string-append out "/bin"))
155 (man (string-append out "/share/man/man1")))
156 (install-file "aragorn" bin)
157 (install-file "aragorn.1" man))
158 #t)))))
159 (home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
160 (synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
161 (description
162 "Aragorn identifies transfer RNA, mitochondrial RNA and
163 transfer-messenger RNA from nucleotide sequences, based on homology to known
164 tRNA consensus sequences and RNA structure. It also outputs the secondary
165 structure of the predicted RNA.")
166 (license license:gpl2)))
167
168 (define-public bamm
169 (package
170 (name "bamm")
171 (version "1.7.3")
172 (source (origin
173 (method git-fetch)
174 ;; BamM is not available on pypi.
175 (uri (git-reference
176 (url "https://github.com/Ecogenomics/BamM.git")
177 (commit version)
178 (recursive? #t)))
179 (file-name (git-file-name name version))
180 (sha256
181 (base32
182 "1p83ahi984ipslxlg4yqy1gdnya9rkn1v71z8djgxkm9d2chw4c5"))
183 (modules '((guix build utils)))
184 (snippet
185 `(begin
186 ;; Delete bundled htslib.
187 (delete-file-recursively "c/htslib-1.3.1")
188 #t))))
189 (build-system python-build-system)
190 (arguments
191 `(#:python ,python-2 ; BamM is Python 2 only.
192 ;; Do not use bundled libhts. Do use the bundled libcfu because it has
193 ;; been modified from its original form.
194 #:configure-flags
195 (let ((htslib (assoc-ref %build-inputs "htslib")))
196 (list "--with-libhts-lib" (string-append htslib "/lib")
197 "--with-libhts-inc" (string-append htslib "/include/htslib")))
198 #:phases
199 (modify-phases %standard-phases
200 (add-after 'unpack 'autogen
201 (lambda _
202 (with-directory-excursion "c"
203 (let ((sh (which "sh")))
204 (for-each make-file-writable (find-files "." ".*"))
205 ;; Use autogen so that 'configure' works.
206 (substitute* "autogen.sh" (("/bin/sh") sh))
207 (setenv "CONFIG_SHELL" sh)
208 (invoke "./autogen.sh")))
209 #t))
210 (delete 'build)
211 ;; Run tests after installation so compilation only happens once.
212 (delete 'check)
213 (add-after 'install 'wrap-executable
214 (lambda* (#:key outputs #:allow-other-keys)
215 (let* ((out (assoc-ref outputs "out"))
216 (path (getenv "PATH")))
217 (wrap-program (string-append out "/bin/bamm")
218 `("PATH" ":" prefix (,path))))
219 #t))
220 (add-after 'wrap-executable 'post-install-check
221 (lambda* (#:key inputs outputs #:allow-other-keys)
222 (setenv "PATH"
223 (string-append (assoc-ref outputs "out")
224 "/bin:"
225 (getenv "PATH")))
226 (setenv "PYTHONPATH"
227 (string-append
228 (assoc-ref outputs "out")
229 "/lib/python"
230 (string-take (string-take-right
231 (assoc-ref inputs "python") 5) 3)
232 "/site-packages:"
233 (getenv "PYTHONPATH")))
234 ;; There are 2 errors printed, but they are safe to ignore:
235 ;; 1) [E::hts_open_format] fail to open file ...
236 ;; 2) samtools view: failed to open ...
237 (invoke "nosetests")
238 #t)))))
239 (native-inputs
240 `(("autoconf" ,autoconf)
241 ("automake" ,automake)
242 ("libtool" ,libtool)
243 ("zlib" ,zlib)
244 ("python-nose" ,python2-nose)
245 ("python-pysam" ,python2-pysam)))
246 (inputs
247 `(("htslib" ,htslib-1.3) ; At least one test fails on htslib-1.4+.
248 ("samtools" ,samtools)
249 ("bwa" ,bwa)
250 ("grep" ,grep)
251 ("sed" ,sed)
252 ("coreutils" ,coreutils)))
253 (propagated-inputs
254 `(("python-numpy" ,python2-numpy)))
255 (home-page "http://ecogenomics.github.io/BamM/")
256 (synopsis "Metagenomics-focused BAM file manipulator")
257 (description
258 "BamM is a C library, wrapped in python, to efficiently generate and
259 parse BAM files, specifically for the analysis of metagenomic data. For
260 instance, it implements several methods to assess contig-wise read coverage.")
261 (license license:lgpl3+)))
262
263 (define-public bamtools
264 (package
265 (name "bamtools")
266 (version "2.5.1")
267 (source (origin
268 (method git-fetch)
269 (uri (git-reference
270 (url "https://github.com/pezmaster31/bamtools.git")
271 (commit (string-append "v" version))))
272 (file-name (git-file-name name version))
273 (sha256
274 (base32
275 "0nfb2ypcx9959xnbz6wxh6py3xfizgmg8nrknxl95c507m9hmq8b"))))
276 (build-system cmake-build-system)
277 (arguments
278 `(#:tests? #f ;no "check" target
279 #:phases
280 (modify-phases %standard-phases
281 (add-before
282 'configure 'set-ldflags
283 (lambda* (#:key outputs #:allow-other-keys)
284 (setenv "LDFLAGS"
285 (string-append
286 "-Wl,-rpath="
287 (assoc-ref outputs "out") "/lib/bamtools"))
288 #t)))))
289 (inputs `(("zlib" ,zlib)))
290 (home-page "https://github.com/pezmaster31/bamtools")
291 (synopsis "C++ API and command-line toolkit for working with BAM data")
292 (description
293 "BamTools provides both a C++ API and a command-line toolkit for handling
294 BAM files.")
295 (license license:expat)))
296
297 (define-public bcftools
298 (package
299 (name "bcftools")
300 (version "1.9")
301 (source (origin
302 (method url-fetch)
303 (uri (string-append "https://github.com/samtools/bcftools/"
304 "releases/download/"
305 version "/bcftools-" version ".tar.bz2"))
306 (sha256
307 (base32
308 "1j3h638i8kgihzyrlnpj82xg1b23sijibys9hvwari3fy7kd0dkg"))
309 (modules '((guix build utils)))
310 (snippet '(begin
311 ;; Delete bundled htslib.
312 (delete-file-recursively "htslib-1.9")
313 #t))))
314 (build-system gnu-build-system)
315 (arguments
316 `(#:configure-flags
317 (list "--enable-libgsl")
318 #:test-target "test"
319 #:phases
320 (modify-phases %standard-phases
321 (add-before 'check 'patch-tests
322 (lambda _
323 (substitute* "test/test.pl"
324 (("/bin/bash") (which "bash")))
325 #t)))))
326 (native-inputs
327 `(("htslib" ,htslib)
328 ("perl" ,perl)))
329 (inputs
330 `(("gsl" ,gsl)
331 ("zlib" ,zlib)))
332 (home-page "https://samtools.github.io/bcftools/")
333 (synopsis "Utilities for variant calling and manipulating VCFs and BCFs")
334 (description
335 "BCFtools is a set of utilities that manipulate variant calls in the
336 Variant Call Format (VCF) and its binary counterpart BCF. All commands work
337 transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed.")
338 ;; The sources are dual MIT/GPL, but becomes GPL-only when USE_GPL=1.
339 (license (list license:gpl3+ license:expat))))
340
341 (define-public bedops
342 (package
343 (name "bedops")
344 (version "2.4.35")
345 (source (origin
346 (method git-fetch)
347 (uri (git-reference
348 (url "https://github.com/bedops/bedops.git")
349 (commit (string-append "v" version))))
350 (file-name (git-file-name name version))
351 (sha256
352 (base32
353 "0mmgsgwz5r9w76hzgxkxc9s9lkdhhaf7vr6i02b09vbswvs1fyqx"))))
354 (build-system gnu-build-system)
355 (arguments
356 '(#:tests? #f
357 #:make-flags (list (string-append "BINDIR=" %output "/bin"))
358 #:phases
359 (modify-phases %standard-phases
360 (add-after 'unpack 'unpack-tarballs
361 (lambda _
362 ;; FIXME: Bedops includes tarballs of minimally patched upstream
363 ;; libraries jansson, zlib, and bzip2. We cannot just use stock
364 ;; libraries because at least one of the libraries (zlib) is
365 ;; patched to add a C++ function definition (deflateInit2cpp).
366 ;; Until the Bedops developers offer a way to link against system
367 ;; libraries we have to build the in-tree copies of these three
368 ;; libraries.
369
370 ;; See upstream discussion:
371 ;; https://github.com/bedops/bedops/issues/124
372
373 ;; Unpack the tarballs to benefit from shebang patching.
374 (with-directory-excursion "third-party"
375 (invoke "tar" "xvf" "jansson-2.6.tar.bz2")
376 (invoke "tar" "xvf" "zlib-1.2.7.tar.bz2")
377 (invoke "tar" "xvf" "bzip2-1.0.6.tar.bz2"))
378 ;; Disable unpacking of tarballs in Makefile.
379 (substitute* "system.mk/Makefile.linux"
380 (("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
381 (("\\./configure") "CONFIG_SHELL=bash ./configure"))
382 (substitute* "third-party/zlib-1.2.7/Makefile.in"
383 (("^SHELL=.*$") "SHELL=bash\n"))
384 #t))
385 (delete 'configure))))
386 (home-page "https://github.com/bedops/bedops")
387 (synopsis "Tools for high-performance genomic feature operations")
388 (description
389 "BEDOPS is a suite of tools to address common questions raised in genomic
390 studies---mostly with regard to overlap and proximity relationships between
391 data sets. It aims to be scalable and flexible, facilitating the efficient
392 and accurate analysis and management of large-scale genomic data.
393
394 BEDOPS provides tools that perform highly efficient and scalable Boolean and
395 other set operations, statistical calculations, archiving, conversion and
396 other management of genomic data of arbitrary scale. Tasks can be easily
397 split by chromosome for distributing whole-genome analyses across a
398 computational cluster.")
399 (license license:gpl2+)))
400
401 (define-public bedtools
402 (package
403 (name "bedtools")
404 (version "2.27.1")
405 (source (origin
406 (method url-fetch)
407 (uri (string-append "https://github.com/arq5x/bedtools2/releases/"
408 "download/v" version "/"
409 "bedtools-" version ".tar.gz"))
410 (sha256
411 (base32
412 "1ndg5yknrxl4djx8ddzgk12rrbiidfpmkkg5z3f95jzryfxarhn8"))))
413 (build-system gnu-build-system)
414 (arguments
415 '(#:test-target "test"
416 #:make-flags
417 (list (string-append "prefix=" (assoc-ref %outputs "out")))
418 #:phases
419 (modify-phases %standard-phases
420 (delete 'configure))))
421 (native-inputs `(("python" ,python-2)))
422 (inputs
423 `(("samtools" ,samtools)
424 ("zlib" ,zlib)))
425 (home-page "https://github.com/arq5x/bedtools2")
426 (synopsis "Tools for genome analysis and arithmetic")
427 (description
428 "Collectively, the bedtools utilities are a swiss-army knife of tools for
429 a wide-range of genomics analysis tasks. The most widely-used tools enable
430 genome arithmetic: that is, set theory on the genome. For example, bedtools
431 allows one to intersect, merge, count, complement, and shuffle genomic
432 intervals from multiple files in widely-used genomic file formats such as BAM,
433 BED, GFF/GTF, VCF.")
434 (license license:gpl2)))
435
436 ;; Later releases of bedtools produce files with more columns than
437 ;; what Ribotaper expects.
438 (define-public bedtools-2.18
439 (package (inherit bedtools)
440 (name "bedtools")
441 (version "2.18.0")
442 (source (origin
443 (method url-fetch)
444 (uri (string-append "https://github.com/arq5x/bedtools2/"
445 "releases/download/v" version
446 "/bedtools-" version ".tar.gz"))
447 (sha256
448 (base32
449 "11rvca19ncg03kxd0wzlfx5ws7r3nisd0z8s9j9n182d8ksp2pxz"))))
450 (arguments
451 '(#:test-target "test"
452 #:phases
453 (modify-phases %standard-phases
454 (delete 'configure)
455 (replace 'install
456 (lambda* (#:key outputs #:allow-other-keys)
457 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
458 (for-each (lambda (file)
459 (install-file file bin))
460 (find-files "bin" ".*")))
461 #t)))))))
462
463 ;; Needed for pybedtools.
464 (define-public bedtools-2.26
465 (package (inherit bedtools)
466 (name "bedtools")
467 (version "2.26.0")
468 (source (origin
469 (method url-fetch)
470 (uri (string-append "https://github.com/arq5x/bedtools2/releases/"
471 "download/v" version "/"
472 "bedtools-" version ".tar.gz"))
473 (sha256
474 (base32
475 "0jhavwifnf7lmkb11h9y7dynr8d699h0rd2l52j1pfgircr2zwv5"))))))
476
477 (define-public ribotaper
478 (package
479 (name "ribotaper")
480 (version "1.3.1")
481 (source (origin
482 (method url-fetch)
483 (uri (string-append "https://ohlerlab.mdc-berlin.de/"
484 "files/RiboTaper/RiboTaper_Version_"
485 version ".tar.gz"))
486 (sha256
487 (base32
488 "0ykjbps1y3z3085q94npw8i9x5gldc6shy8vlc08v76zljsm07hv"))))
489 (build-system gnu-build-system)
490 (arguments
491 `(#:phases
492 (modify-phases %standard-phases
493 (add-after 'install 'wrap-executables
494 (lambda* (#:key inputs outputs #:allow-other-keys)
495 (let* ((out (assoc-ref outputs "out")))
496 (for-each
497 (lambda (script)
498 (wrap-program (string-append out "/bin/" script)
499 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
500 '("create_annotations_files.bash"
501 "create_metaplots.bash"
502 "Ribotaper_ORF_find.sh"
503 "Ribotaper.sh")))
504 #t)))))
505 (inputs
506 `(("bedtools" ,bedtools-2.18)
507 ("samtools" ,samtools-0.1)
508 ("r-minimal" ,r-minimal)
509 ("r-foreach" ,r-foreach)
510 ("r-xnomial" ,r-xnomial)
511 ("r-domc" ,r-domc)
512 ("r-multitaper" ,r-multitaper)
513 ("r-seqinr" ,r-seqinr)))
514 (home-page "https://ohlerlab.mdc-berlin.de/software/RiboTaper_126/")
515 (synopsis "Define translated ORFs using ribosome profiling data")
516 (description
517 "Ribotaper is a method for defining translated @dfn{open reading
518 frames} (ORFs) using ribosome profiling (ribo-seq) data. This package
519 provides the Ribotaper pipeline.")
520 (license license:gpl3+)))
521
522 (define-public ribodiff
523 (package
524 (name "ribodiff")
525 (version "0.2.2")
526 (source
527 (origin
528 (method git-fetch)
529 (uri (git-reference
530 (url "https://github.com/ratschlab/RiboDiff.git")
531 (commit (string-append "v" version))))
532 (file-name (git-file-name name version))
533 (sha256
534 (base32
535 "0x75nlp7qnmm64jasbi6l21f2cy99r2cjyl6b4hr8zf2bq22drnz"))))
536 (build-system python-build-system)
537 (arguments
538 `(#:python ,python-2
539 #:phases
540 (modify-phases %standard-phases
541 ;; Generate an installable executable script wrapper.
542 (add-after 'unpack 'patch-setup.py
543 (lambda _
544 (substitute* "setup.py"
545 (("^(.*)packages=.*" line prefix)
546 (string-append line "\n"
547 prefix "scripts=['scripts/TE.py'],\n")))
548 #t)))))
549 (inputs
550 `(("python-numpy" ,python2-numpy)
551 ("python-matplotlib" ,python2-matplotlib)
552 ("python-scipy" ,python2-scipy)
553 ("python-statsmodels" ,python2-statsmodels)))
554 (native-inputs
555 `(("python-mock" ,python2-mock)
556 ("python-nose" ,python2-nose)))
557 (home-page "http://public.bmi.inf.ethz.ch/user/zhongy/RiboDiff/")
558 (synopsis "Detect translation efficiency changes from ribosome footprints")
559 (description "RiboDiff is a statistical tool that detects the protein
560 translational efficiency change from Ribo-Seq (ribosome footprinting) and
561 RNA-Seq data. It uses a generalized linear model to detect genes showing
562 difference in translational profile taking mRNA abundance into account. It
563 facilitates us to decipher the translational regulation that behave
564 independently with transcriptional regulation.")
565 (license license:gpl3+)))
566
567 (define-public bioawk
568 (package
569 (name "bioawk")
570 (version "1.0")
571 (source (origin
572 (method git-fetch)
573 (uri (git-reference
574 (url "https://github.com/lh3/bioawk.git")
575 (commit (string-append "v" version))))
576 (file-name (git-file-name name version))
577 (sha256
578 (base32
579 "1pxc3zdnirxbf9a0az698hd8xdik7qkhypm7v6hn922x8y9qmspm"))))
580 (build-system gnu-build-system)
581 (inputs
582 `(("zlib" ,zlib)))
583 (native-inputs
584 `(("bison" ,bison)))
585 (arguments
586 `(#:tests? #f ; There are no tests to run.
587 ;; Bison must generate files, before other targets can build.
588 #:parallel-build? #f
589 #:phases
590 (modify-phases %standard-phases
591 (delete 'configure) ; There is no configure phase.
592 (replace 'install
593 (lambda* (#:key outputs #:allow-other-keys)
594 (let* ((out (assoc-ref outputs "out"))
595 (bin (string-append out "/bin"))
596 (man (string-append out "/share/man/man1")))
597 (mkdir-p man)
598 (copy-file "awk.1" (string-append man "/bioawk.1"))
599 (install-file "bioawk" bin))
600 #t)))))
601 (home-page "https://github.com/lh3/bioawk")
602 (synopsis "AWK with bioinformatics extensions")
603 (description "Bioawk is an extension to Brian Kernighan's awk, adding the
604 support of several common biological data formats, including optionally gzip'ed
605 BED, GFF, SAM, VCF, FASTA/Q and TAB-delimited formats with column names. It
606 also adds a few built-in functions and a command line option to use TAB as the
607 input/output delimiter. When the new functionality is not used, bioawk is
608 intended to behave exactly the same as the original BWK awk.")
609 (license license:x11)))
610
611 (define-public python-pybedtools
612 (package
613 (name "python-pybedtools")
614 (version "0.7.10")
615 (source (origin
616 (method url-fetch)
617 (uri (pypi-uri "pybedtools" version))
618 (sha256
619 (base32
620 "0l2b2wrnj85azfqgr0zwr60f7j58vlla1hcgxvr9rwikpl8j72ji"))))
621 (build-system python-build-system)
622 (arguments
623 `(#:phases
624 (modify-phases %standard-phases
625 ;; See https://github.com/daler/pybedtools/issues/261
626 (add-after 'unpack 'disable-broken-tests
627 (lambda _
628 ;; This test (pybedtools.test.test_scripts.test_venn_mpl) needs a
629 ;; graphical environment.
630 (substitute* "pybedtools/test/test_scripts.py"
631 (("def test_venn_mpl")
632 "def _do_not_test_venn_mpl"))
633 ;; Requires internet access.
634 (substitute* "pybedtools/test/test_helpers.py"
635 (("def test_chromsizes")
636 "def _do_not_test_chromsizes"))
637 ;; FIXME: these two fail for no good reason.
638 (substitute* "pybedtools/test/test1.py"
639 (("def test_issue_157")
640 "def _do_not_test_issue_157")
641 (("def test_to_dataframe")
642 "def _do_not_test_to_dataframe"))
643 #t)))))
644 (propagated-inputs
645 ;; Tests don't pass with Bedtools 2.27.1.
646 ;; See https://github.com/daler/pybedtools/issues/260
647 `(("bedtools" ,bedtools-2.26)
648 ("samtools" ,samtools)
649 ("python-matplotlib" ,python-matplotlib)
650 ("python-pysam" ,python-pysam)
651 ("python-pyyaml" ,python-pyyaml)))
652 (native-inputs
653 `(("python-numpy" ,python-numpy)
654 ("python-pandas" ,python-pandas)
655 ("python-cython" ,python-cython)
656 ("python-nose" ,python-nose)
657 ("kentutils" ,kentutils) ; for bedGraphToBigWig
658 ("python-six" ,python-six)))
659 (home-page "https://pythonhosted.org/pybedtools/")
660 (synopsis "Python wrapper for BEDtools programs")
661 (description
662 "pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
663 which are widely used for genomic interval manipulation or \"genome algebra\".
664 pybedtools extends BEDTools by offering feature-level manipulations from with
665 Python.")
666 (license license:gpl2+)))
667
668 (define-public python2-pybedtools
669 (let ((pkg (package-with-python2 python-pybedtools)))
670 (package (inherit pkg)
671 (arguments
672 `(#:modules ((ice-9 ftw)
673 (srfi srfi-1)
674 (srfi srfi-26)
675 (guix build utils)
676 (guix build python-build-system))
677 ;; See https://github.com/daler/pybedtools/issues/192
678 ,@(substitute-keyword-arguments (package-arguments pkg)
679 ((#:phases phases)
680 `(modify-phases ,phases
681 (replace 'check
682 (lambda _
683 (let ((cwd (getcwd)))
684 (setenv "PYTHONPATH"
685 (string-append cwd "/build/"
686 (find (cut string-prefix? "lib" <>)
687 (scandir (string-append cwd "/build")))
688 ":" (getenv "PYTHONPATH"))))
689 ;; The tests need to be run from elsewhere...
690 (mkdir-p "/tmp/test")
691 (copy-recursively "pybedtools/test" "/tmp/test")
692 (with-directory-excursion "/tmp/test"
693 (invoke "nosetests"
694 ;; This test fails for unknown reasons
695 "--exclude=.*test_getting_example_beds"))
696 #t))))))))))
697
698 (define-public python-biom-format
699 (package
700 (name "python-biom-format")
701 (version "2.1.7")
702 (source
703 (origin
704 (method git-fetch)
705 ;; Use GitHub as source because PyPI distribution does not contain
706 ;; test data: https://github.com/biocore/biom-format/issues/693
707 (uri (git-reference
708 (url "https://github.com/biocore/biom-format.git")
709 (commit version)))
710 (file-name (git-file-name name version))
711 (sha256
712 (base32
713 "1rna16lyk5aqhnv0dp77wwaplias93f1vw28ad3jmyw6hwkai05v"))))
714 (build-system python-build-system)
715 (arguments
716 `(#:phases
717 (modify-phases %standard-phases
718 (add-after 'unpack 'use-cython
719 (lambda _ (setenv "USE_CYTHON" "1") #t))
720 (add-after 'unpack 'disable-broken-test
721 (lambda _
722 (substitute* "biom/tests/test_cli/test_validate_table.py"
723 (("^(.+)def test_invalid_hdf5" m indent)
724 (string-append indent
725 "@npt.dec.skipif(True, msg='Guix')\n"
726 m)))
727 #t))
728 (add-before 'reset-gzip-timestamps 'make-files-writable
729 (lambda* (#:key outputs #:allow-other-keys)
730 (let ((out (assoc-ref outputs "out")))
731 (for-each (lambda (file) (chmod file #o644))
732 (find-files out "\\.gz"))
733 #t))))))
734 (propagated-inputs
735 `(("python-numpy" ,python-numpy)
736 ("python-scipy" ,python-scipy)
737 ("python-flake8" ,python-flake8)
738 ("python-future" ,python-future)
739 ("python-click" ,python-click)
740 ("python-h5py" ,python-h5py)
741 ("python-pandas" ,python-pandas)))
742 (native-inputs
743 `(("python-cython" ,python-cython)
744 ("python-pytest" ,python-pytest)
745 ("python-pytest-cov" ,python-pytest-cov)
746 ("python-nose" ,python-nose)))
747 (home-page "http://www.biom-format.org")
748 (synopsis "Biological Observation Matrix (BIOM) format utilities")
749 (description
750 "The BIOM file format is designed to be a general-use format for
751 representing counts of observations e.g. operational taxonomic units, KEGG
752 orthology groups or lipid types, in one or more biological samples
753 e.g. microbiome samples, genomes, metagenomes.")
754 (license license:bsd-3)
755 (properties `((python2-variant . ,(delay python2-biom-format))))))
756
757 (define-public python2-biom-format
758 (let ((base (package-with-python2 (strip-python2-variant python-biom-format))))
759 (package
760 (inherit base)
761 (arguments
762 (substitute-keyword-arguments (package-arguments base)
763 ((#:phases phases)
764 `(modify-phases ,phases
765 ;; Do not require the unmaintained pyqi library.
766 (add-after 'unpack 'remove-pyqi
767 (lambda _
768 (substitute* "setup.py"
769 (("install_requires.append\\(\"pyqi\"\\)") "pass"))
770 #t)))))))))
771
772 (define-public bioperl-minimal
773 (let* ((inputs `(("perl-module-build" ,perl-module-build)
774 ("perl-data-stag" ,perl-data-stag)
775 ("perl-libwww" ,perl-libwww)
776 ("perl-uri" ,perl-uri)))
777 (transitive-inputs
778 (map (compose package-name cadr)
779 (delete-duplicates
780 (concatenate
781 (map (compose package-transitive-target-inputs cadr) inputs))))))
782 (package
783 (name "bioperl-minimal")
784 (version "1.7.0")
785 (source
786 (origin
787 (method url-fetch)
788 (uri (string-append "https://github.com/bioperl/bioperl-live/"
789 "archive/release-"
790 (string-map (lambda (c)
791 (if (char=? c #\.)
792 #\- c)) version)
793 ".tar.gz"))
794 (sha256
795 (base32
796 "12phgpxwgkqflkwfb9dcqg7a31dpjlfhar8wcgv0aj5ln4akfz06"))))
797 (build-system perl-build-system)
798 (arguments
799 `(#:phases
800 (modify-phases %standard-phases
801 (add-after
802 'install 'wrap-programs
803 (lambda* (#:key outputs #:allow-other-keys)
804 ;; Make sure all executables in "bin" find the required Perl
805 ;; modules at runtime. As the PERL5LIB variable contains also
806 ;; the paths of native inputs, we pick the transitive target
807 ;; inputs from %build-inputs.
808 (let* ((out (assoc-ref outputs "out"))
809 (bin (string-append out "/bin/"))
810 (path (string-join
811 (cons (string-append out "/lib/perl5/site_perl")
812 (map (lambda (name)
813 (assoc-ref %build-inputs name))
814 ',transitive-inputs))
815 ":")))
816 (for-each (lambda (file)
817 (wrap-program file
818 `("PERL5LIB" ":" prefix (,path))))
819 (find-files bin "\\.pl$"))
820 #t))))))
821 (inputs inputs)
822 (native-inputs
823 `(("perl-test-most" ,perl-test-most)))
824 (home-page "https://metacpan.org/release/BioPerl")
825 (synopsis "Bioinformatics toolkit")
826 (description
827 "BioPerl is the product of a community effort to produce Perl code which
828 is useful in biology. Examples include Sequence objects, Alignment objects
829 and database searching objects. These objects not only do what they are
830 advertised to do in the documentation, but they also interact - Alignment
831 objects are made from the Sequence objects, Sequence objects have access to
832 Annotation and SeqFeature objects and databases, Blast objects can be
833 converted to Alignment objects, and so on. This means that the objects
834 provide a coordinated and extensible framework to do computational biology.")
835 (license license:perl-license))))
836
837 (define-public python-biopython
838 (package
839 (name "python-biopython")
840 (version "1.70")
841 (source (origin
842 (method url-fetch)
843 ;; use PyPi rather than biopython.org to ease updating
844 (uri (pypi-uri "biopython" version))
845 (sha256
846 (base32
847 "0nz4n9d2y2dg849gn1z0vjlkwcpzzkzy3fij7x94a6ixy2c54z2a"))))
848 (build-system python-build-system)
849 (arguments
850 `(#:phases
851 (modify-phases %standard-phases
852 (add-before 'check 'set-home
853 ;; Some tests require a home directory to be set.
854 (lambda _ (setenv "HOME" "/tmp") #t)))))
855 (propagated-inputs
856 `(("python-numpy" ,python-numpy)))
857 (home-page "http://biopython.org/")
858 (synopsis "Tools for biological computation in Python")
859 (description
860 "Biopython is a set of tools for biological computation including parsers
861 for bioinformatics files into Python data structures; interfaces to common
862 bioinformatics programs; a standard sequence class and tools for performing
863 common operations on them; code to perform data classification; code for
864 dealing with alignments; code making it easy to split up parallelizable tasks
865 into separate processes; and more.")
866 (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
867
868 (define-public python2-biopython
869 (package-with-python2 python-biopython))
870
871 (define-public python-fastalite
872 (package
873 (name "python-fastalite")
874 (version "0.3")
875 (source
876 (origin
877 (method url-fetch)
878 (uri (pypi-uri "fastalite" version))
879 (sha256
880 (base32
881 "1qli6pxp77i9xn2wfciq2zaxhl82bdxb33cpzqzj1z25yd036wqj"))))
882 (build-system python-build-system)
883 (arguments
884 `(#:tests? #f)) ; Test data is not distributed.
885 (home-page "https://github.com/nhoffman/fastalite")
886 (synopsis "Simplest possible FASTA parser")
887 (description "This library implements a FASTA and a FASTQ parser without
888 relying on a complex dependency tree.")
889 (license license:expat)))
890
891 (define-public python2-fastalite
892 (package-with-python2 python-fastalite))
893
894 (define-public bpp-core
895 ;; The last release was in 2014 and the recommended way to install from source
896 ;; is to clone the git repository, so we do this.
897 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
898 (let ((commit "7d8bced0d1a87291ea8dd7046b7fb5ff9c35c582"))
899 (package
900 (name "bpp-core")
901 (version (string-append "2.2.0-1." (string-take commit 7)))
902 (source (origin
903 (method git-fetch)
904 (uri (git-reference
905 (url "http://biopp.univ-montp2.fr/git/bpp-core")
906 (commit commit)))
907 (file-name (string-append name "-" version "-checkout"))
908 (sha256
909 (base32
910 "10djsq5vlnkilv436gnmh4irpk49v29pa69r6xiryg32xmvn909j"))))
911 (build-system cmake-build-system)
912 (arguments
913 `(#:parallel-build? #f))
914 (inputs
915 `(("gcc" ,gcc-5))) ; Compilation of bpp-phyl fails with GCC 4.9 so we
916 ; compile all of the bpp packages with GCC 5.
917 (home-page "http://biopp.univ-montp2.fr")
918 (synopsis "C++ libraries for Bioinformatics")
919 (description
920 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
921 analysis, phylogenetics, molecular evolution and population genetics. It is
922 Object Oriented and is designed to be both easy to use and computer efficient.
923 Bio++ intends to help programmers to write computer expensive programs, by
924 providing them a set of re-usable tools.")
925 (license license:cecill-c))))
926
927 (define-public bpp-phyl
928 ;; The last release was in 2014 and the recommended way to install from source
929 ;; is to clone the git repository, so we do this.
930 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
931 (let ((commit "0c07167b629f68b569bf274d1ad0c4af83276ae2"))
932 (package
933 (name "bpp-phyl")
934 (version (string-append "2.2.0-1." (string-take commit 7)))
935 (source (origin
936 (method git-fetch)
937 (uri (git-reference
938 (url "http://biopp.univ-montp2.fr/git/bpp-phyl")
939 (commit commit)))
940 (file-name (string-append name "-" version "-checkout"))
941 (sha256
942 (base32
943 "1ssjgchzwj3iai26kyly7gwkdv8sk59nqhkb1wpap3sf5m6kyllh"))))
944 (build-system cmake-build-system)
945 (arguments
946 `(#:parallel-build? #f
947 ;; If out-of-source, test data is not copied into the build directory
948 ;; so the tests fail.
949 #:out-of-source? #f))
950 (inputs
951 `(("bpp-core" ,bpp-core)
952 ("bpp-seq" ,bpp-seq)
953 ;; GCC 4.8 fails due to an 'internal compiler error', so we use a more
954 ;; modern GCC.
955 ("gcc" ,gcc-5)))
956 (home-page "http://biopp.univ-montp2.fr")
957 (synopsis "Bio++ phylogenetic Library")
958 (description
959 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
960 analysis, phylogenetics, molecular evolution and population genetics. This
961 library provides phylogenetics-related modules.")
962 (license license:cecill-c))))
963
964 (define-public bpp-popgen
965 ;; The last release was in 2014 and the recommended way to install from source
966 ;; is to clone the git repository, so we do this.
967 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
968 (let ((commit "e472bac9b1a148803895d747cd6d0c5904f85d9f"))
969 (package
970 (name "bpp-popgen")
971 (version (string-append "2.2.0-1." (string-take commit 7)))
972 (source (origin
973 (method git-fetch)
974 (uri (git-reference
975 (url "http://biopp.univ-montp2.fr/git/bpp-popgen")
976 (commit commit)))
977 (file-name (string-append name "-" version "-checkout"))
978 (sha256
979 (base32
980 "0yn82dzn1n5629nzja68xfrhi655709rjanyryb36vzkmymy6dw5"))))
981 (build-system cmake-build-system)
982 (arguments
983 `(#:parallel-build? #f
984 #:tests? #f)) ; There are no tests.
985 (inputs
986 `(("bpp-core" ,bpp-core)
987 ("bpp-seq" ,bpp-seq)
988 ("gcc" ,gcc-5)))
989 (home-page "http://biopp.univ-montp2.fr")
990 (synopsis "Bio++ population genetics library")
991 (description
992 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
993 analysis, phylogenetics, molecular evolution and population genetics. This
994 library provides population genetics-related modules.")
995 (license license:cecill-c))))
996
997 (define-public bpp-seq
998 ;; The last release was in 2014 and the recommended way to install from source
999 ;; is to clone the git repository, so we do this.
1000 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1001 (let ((commit "6cfa07965ce152e5598a89df2fa80a75973bfa33"))
1002 (package
1003 (name "bpp-seq")
1004 (version (string-append "2.2.0-1." (string-take commit 7)))
1005 (source (origin
1006 (method git-fetch)
1007 (uri (git-reference
1008 (url "http://biopp.univ-montp2.fr/git/bpp-seq")
1009 (commit commit)))
1010 (file-name (string-append name "-" version "-checkout"))
1011 (sha256
1012 (base32
1013 "1nys5jq7jqvdg40d91wsmj3q2yzy4276cp7sp44n67p468f27zf2"))))
1014 (build-system cmake-build-system)
1015 (arguments
1016 `(#:parallel-build? #f
1017 ;; If out-of-source, test data is not copied into the build directory
1018 ;; so the tests fail.
1019 #:out-of-source? #f))
1020 (inputs
1021 `(("bpp-core" ,bpp-core)
1022 ("gcc" ,gcc-5))) ; Use GCC 5 as per 'bpp-core'.
1023 (home-page "http://biopp.univ-montp2.fr")
1024 (synopsis "Bio++ sequence library")
1025 (description
1026 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1027 analysis, phylogenetics, molecular evolution and population genetics. This
1028 library provides sequence-related modules.")
1029 (license license:cecill-c))))
1030
1031 (define-public bppsuite
1032 ;; The last release was in 2014 and the recommended way to install from source
1033 ;; is to clone the git repository, so we do this.
1034 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1035 (let ((commit "c516147f57aa50961121cd505bed52cd7603698b"))
1036 (package
1037 (name "bppsuite")
1038 (version (string-append "2.2.0-1." (string-take commit 7)))
1039 (source (origin
1040 (method git-fetch)
1041 (uri (git-reference
1042 (url "http://biopp.univ-montp2.fr/git/bppsuite")
1043 (commit commit)))
1044 (file-name (string-append name "-" version "-checkout"))
1045 (sha256
1046 (base32
1047 "1y87pxvw0jxjizhq2dr9g2r91md45k1p9ih2sl1yy1y3p934l2kb"))))
1048 (build-system cmake-build-system)
1049 (arguments
1050 `(#:parallel-build? #f
1051 #:tests? #f)) ; There are no tests.
1052 (native-inputs
1053 `(("groff" ,groff)
1054 ("man-db" ,man-db)
1055 ("texinfo" ,texinfo)))
1056 (inputs
1057 `(("bpp-core" ,bpp-core)
1058 ("bpp-seq" ,bpp-seq)
1059 ("bpp-phyl" ,bpp-phyl)
1060 ("bpp-phyl" ,bpp-popgen)
1061 ("gcc" ,gcc-5)))
1062 (home-page "http://biopp.univ-montp2.fr")
1063 (synopsis "Bioinformatics tools written with the Bio++ libraries")
1064 (description
1065 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1066 analysis, phylogenetics, molecular evolution and population genetics. This
1067 package provides command line tools using the Bio++ library.")
1068 (license license:cecill-c))))
1069
1070 (define-public blast+
1071 (package
1072 (name "blast+")
1073 (version "2.6.0")
1074 (source (origin
1075 (method url-fetch)
1076 (uri (string-append
1077 "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
1078 version "/ncbi-blast-" version "+-src.tar.gz"))
1079 (sha256
1080 (base32
1081 "15n937pw5aqmyfjb6l387d18grqbb96l63d5xj4l7yyh0zbf2405"))
1082 (patches (search-patches "blast+-fix-makefile.patch"))
1083 (modules '((guix build utils)))
1084 (snippet
1085 '(begin
1086 ;; Remove bundled bzip2, zlib and pcre.
1087 (delete-file-recursively "c++/src/util/compress/bzip2")
1088 (delete-file-recursively "c++/src/util/compress/zlib")
1089 (delete-file-recursively "c++/src/util/regexp")
1090 (substitute* "c++/src/util/compress/Makefile.in"
1091 (("bzip2 zlib api") "api"))
1092 ;; Remove useless msbuild directory
1093 (delete-file-recursively
1094 "c++/src/build-system/project_tree_builder/msbuild")
1095 #t))))
1096 (build-system gnu-build-system)
1097 (arguments
1098 `(;; There are two(!) tests for this massive library, and both fail with
1099 ;; "unparsable timing stats".
1100 ;; ERR [127] -- [serial/datatool] datatool.sh (unparsable timing stats)
1101 ;; ERR [127] -- [serial/datatool] datatool_xml.sh (unparsable timing stats)
1102 #:tests? #f
1103 #:out-of-source? #t
1104 #:parallel-build? #f ; not supported
1105 #:phases
1106 (modify-phases %standard-phases
1107 (add-before 'configure 'set-HOME
1108 ;; $HOME needs to be set at some point during the configure phase
1109 (lambda _ (setenv "HOME" "/tmp") #t))
1110 (add-after 'unpack 'enter-dir
1111 (lambda _ (chdir "c++") #t))
1112 (add-after 'enter-dir 'fix-build-system
1113 (lambda _
1114 (define (which* cmd)
1115 (cond ((string=? cmd "date")
1116 ;; make call to "date" deterministic
1117 "date -d @0")
1118 ((which cmd)
1119 => identity)
1120 (else
1121 (format (current-error-port)
1122 "WARNING: Unable to find absolute path for ~s~%"
1123 cmd)
1124 #f)))
1125
1126 ;; Rewrite hardcoded paths to various tools
1127 (substitute* (append '("src/build-system/configure.ac"
1128 "src/build-system/configure"
1129 "src/build-system/helpers/run_with_lock.c"
1130 "scripts/common/impl/if_diff.sh"
1131 "scripts/common/impl/run_with_lock.sh"
1132 "src/build-system/Makefile.configurables.real"
1133 "src/build-system/Makefile.in.top"
1134 "src/build-system/Makefile.meta.gmake=no"
1135 "src/build-system/Makefile.meta.in"
1136 "src/build-system/Makefile.meta_l"
1137 "src/build-system/Makefile.meta_p"
1138 "src/build-system/Makefile.meta_r"
1139 "src/build-system/Makefile.mk.in"
1140 "src/build-system/Makefile.requirements"
1141 "src/build-system/Makefile.rules_with_autodep.in")
1142 (find-files "scripts/common/check" "\\.sh$"))
1143 (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
1144 (or (which* cmd) all)))
1145
1146 (substitute* (find-files "src/build-system" "^config.*")
1147 (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
1148 (("^PATH=.*") ""))
1149
1150 ;; rewrite "/var/tmp" in check script
1151 (substitute* "scripts/common/check/check_make_unix.sh"
1152 (("/var/tmp") "/tmp"))
1153
1154 ;; do not reset PATH
1155 (substitute* (find-files "scripts/common/impl/" "\\.sh$")
1156 (("^ *PATH=.*") "")
1157 (("action=/bin/") "action=")
1158 (("export PATH") ":"))
1159 #t))
1160 (replace 'configure
1161 (lambda* (#:key inputs outputs #:allow-other-keys)
1162 (let ((out (assoc-ref outputs "out"))
1163 (lib (string-append (assoc-ref outputs "lib") "/lib"))
1164 (include (string-append (assoc-ref outputs "include")
1165 "/include/ncbi-tools++")))
1166 ;; The 'configure' script doesn't recognize things like
1167 ;; '--enable-fast-install'.
1168 (invoke "./configure.orig"
1169 (string-append "--with-build-root=" (getcwd) "/build")
1170 (string-append "--prefix=" out)
1171 (string-append "--libdir=" lib)
1172 (string-append "--includedir=" include)
1173 (string-append "--with-bz2="
1174 (assoc-ref inputs "bzip2"))
1175 (string-append "--with-z="
1176 (assoc-ref inputs "zlib"))
1177 (string-append "--with-pcre="
1178 (assoc-ref inputs "pcre"))
1179 ;; Each library is built twice by default, once
1180 ;; with "-static" in its name, and again
1181 ;; without.
1182 "--without-static"
1183 "--with-dll")
1184 #t))))))
1185 (outputs '("out" ; 21 MB
1186 "lib" ; 226 MB
1187 "include")) ; 33 MB
1188 (inputs
1189 `(("bzip2" ,bzip2)
1190 ("zlib" ,zlib)
1191 ("pcre" ,pcre)
1192 ("perl" ,perl)
1193 ("python" ,python-wrapper)))
1194 (native-inputs
1195 `(("cpio" ,cpio)))
1196 (home-page "http://blast.ncbi.nlm.nih.gov")
1197 (synopsis "Basic local alignment search tool")
1198 (description
1199 "BLAST is a popular method of performing a DNA or protein sequence
1200 similarity search, using heuristics to produce results quickly. It also
1201 calculates an “expect value” that estimates how many matches would have
1202 occurred at a given score by chance, which can aid a user in judging how much
1203 confidence to have in an alignment.")
1204 ;; Most of the sources are in the public domain, with the following
1205 ;; exceptions:
1206 ;; * Expat:
1207 ;; * ./c++/include/util/bitset/
1208 ;; * ./c++/src/html/ncbi_menu*.js
1209 ;; * Boost license:
1210 ;; * ./c++/include/util/impl/floating_point_comparison.hpp
1211 ;; * LGPL 2+:
1212 ;; * ./c++/include/dbapi/driver/odbc/unix_odbc/
1213 ;; * ASL 2.0:
1214 ;; * ./c++/src/corelib/teamcity_*
1215 (license (list license:public-domain
1216 license:expat
1217 license:boost1.0
1218 license:lgpl2.0+
1219 license:asl2.0))))
1220
1221 (define-public bless
1222 (package
1223 (name "bless")
1224 (version "1p02")
1225 (source (origin
1226 (method url-fetch)
1227 (uri (string-append "mirror://sourceforge/bless-ec/bless.v"
1228 version ".tgz"))
1229 (sha256
1230 (base32
1231 "0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
1232 (modules '((guix build utils)))
1233 (snippet
1234 `(begin
1235 ;; Remove bundled boost, pigz, zlib, and .git directory
1236 ;; FIXME: also remove bundled sources for murmurhash3 and
1237 ;; kmc once packaged.
1238 (delete-file-recursively "boost")
1239 (delete-file-recursively "pigz")
1240 (delete-file-recursively "google-sparsehash")
1241 (delete-file-recursively "zlib")
1242 (delete-file-recursively ".git")
1243 #t))))
1244 (build-system gnu-build-system)
1245 (arguments
1246 '(#:tests? #f ;no "check" target
1247 #:make-flags
1248 (list (string-append "ZLIB="
1249 (assoc-ref %build-inputs "zlib:static")
1250 "/lib/libz.a")
1251 (string-append "LDFLAGS="
1252 (string-join '("-lboost_filesystem"
1253 "-lboost_system"
1254 "-lboost_iostreams"
1255 "-lz"
1256 "-fopenmp"
1257 "-std=c++11"))))
1258 #:phases
1259 (modify-phases %standard-phases
1260 (add-after 'unpack 'do-not-build-bundled-pigz
1261 (lambda* (#:key inputs outputs #:allow-other-keys)
1262 (substitute* "Makefile"
1263 (("cd pigz/pigz-2.3.3; make") ""))
1264 #t))
1265 (add-after 'unpack 'patch-paths-to-executables
1266 (lambda* (#:key inputs outputs #:allow-other-keys)
1267 (substitute* "parse_args.cpp"
1268 (("kmc_binary = .*")
1269 (string-append "kmc_binary = \""
1270 (assoc-ref outputs "out")
1271 "/bin/kmc\";"))
1272 (("pigz_binary = .*")
1273 (string-append "pigz_binary = \""
1274 (assoc-ref inputs "pigz")
1275 "/bin/pigz\";")))
1276 #t))
1277 (replace 'install
1278 (lambda* (#:key outputs #:allow-other-keys)
1279 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
1280 (for-each (lambda (file)
1281 (install-file file bin))
1282 '("bless" "kmc/bin/kmc"))
1283 #t)))
1284 (delete 'configure))))
1285 (native-inputs
1286 `(("perl" ,perl)))
1287 (inputs
1288 `(("openmpi" ,openmpi)
1289 ("boost" ,boost)
1290 ("sparsehash" ,sparsehash)
1291 ("pigz" ,pigz)
1292 ("zlib:static" ,zlib "static")
1293 ("zlib" ,zlib)))
1294 (supported-systems '("x86_64-linux"))
1295 (home-page "https://sourceforge.net/p/bless-ec/wiki/Home/")
1296 (synopsis "Bloom-filter-based error correction tool for NGS reads")
1297 (description
1298 "@dfn{Bloom-filter-based error correction solution for high-throughput
1299 sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
1300 correction tool for genomic reads produced by @dfn{Next-generation
1301 sequencing} (NGS). BLESS produces accurate correction results with much less
1302 memory compared with previous solutions and is also able to tolerate a higher
1303 false-positive rate. BLESS can extend reads like DNA assemblers to correct
1304 errors at the end of reads.")
1305 (license license:gpl3+)))
1306
1307 (define-public bowtie
1308 (package
1309 (name "bowtie")
1310 (version "2.3.4.3")
1311 (source (origin
1312 (method git-fetch)
1313 (uri (git-reference
1314 (url "https://github.com/BenLangmead/bowtie2.git")
1315 (commit (string-append "v" version))))
1316 (file-name (git-file-name name version))
1317 (sha256
1318 (base32
1319 "1zl3cf327y2p7p03cavymbh7b00djc7lncfaqih33n96iy9q8ibp"))
1320 (modules '((guix build utils)))
1321 (snippet
1322 '(begin
1323 (substitute* "Makefile"
1324 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
1325 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
1326 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
1327 #t))))
1328 (build-system gnu-build-system)
1329 (arguments
1330 '(#:make-flags
1331 (list "allall"
1332 "WITH_TBB=1"
1333 (string-append "prefix=" (assoc-ref %outputs "out")))
1334 #:phases
1335 (modify-phases %standard-phases
1336 (delete 'configure)
1337 (replace 'check
1338 (lambda _
1339 (invoke "perl"
1340 "scripts/test/simple_tests.pl"
1341 "--bowtie2=./bowtie2"
1342 "--bowtie2-build=./bowtie2-build")
1343 #t)))))
1344 (inputs
1345 `(("tbb" ,tbb)
1346 ("zlib" ,zlib)
1347 ("python" ,python-wrapper)))
1348 (native-inputs
1349 `(("perl" ,perl)
1350 ("perl-clone" ,perl-clone)
1351 ("perl-test-deep" ,perl-test-deep)
1352 ("perl-test-simple" ,perl-test-simple)))
1353 (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
1354 (synopsis "Fast and sensitive nucleotide sequence read aligner")
1355 (description
1356 "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
1357 reads to long reference sequences. It is particularly good at aligning reads
1358 of about 50 up to 100s or 1,000s of characters, and particularly good at
1359 aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
1360 genome with an FM Index to keep its memory footprint small: for the human
1361 genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
1362 gapped, local, and paired-end alignment modes.")
1363 (supported-systems '("x86_64-linux"))
1364 (license license:gpl3+)))
1365
1366 (define-public tophat
1367 (package
1368 (name "tophat")
1369 (version "2.1.1")
1370 (source (origin
1371 (method url-fetch)
1372 (uri (string-append
1373 "http://ccb.jhu.edu/software/tophat/downloads/tophat-"
1374 version ".tar.gz"))
1375 (sha256
1376 (base32
1377 "19add02kv2xhd6ihd779dr7x35ggym3jqr0m5c4315i1yfb0p11p"))
1378 (modules '((guix build utils)))
1379 (snippet
1380 '(begin
1381 ;; Remove bundled SeqAn and samtools
1382 (delete-file-recursively "src/SeqAn-1.4.2")
1383 (delete-file-recursively "src/samtools-0.1.18")
1384 #t))))
1385 (build-system gnu-build-system)
1386 (arguments
1387 '(#:parallel-build? #f ; not supported
1388 #:phases
1389 (modify-phases %standard-phases
1390 (add-after 'unpack 'use-system-samtools
1391 (lambda* (#:key inputs #:allow-other-keys)
1392 (substitute* "src/Makefile.in"
1393 (("(noinst_LIBRARIES = )\\$\\(SAMLIB\\)" _ prefix) prefix)
1394 (("\\$\\(SAMPROG\\): \\$\\(SAMLIB\\)") "")
1395 (("SAMPROG = samtools_0\\.1\\.18") "")
1396 (("\\$\\(samtools_0_1_18_SOURCES\\)") "")
1397 (("am__EXEEXT_1 = samtools_0\\.1\\.18\\$\\(EXEEXT\\)") ""))
1398 (substitute* '("src/common.cpp"
1399 "src/tophat.py")
1400 (("samtools_0.1.18") (which "samtools")))
1401 (substitute* '("src/common.h"
1402 "src/bam2fastx.cpp")
1403 (("#include \"bam.h\"") "#include <samtools/bam.h>")
1404 (("#include \"sam.h\"") "#include <samtools/sam.h>"))
1405 (substitute* '("src/bwt_map.h"
1406 "src/map2gtf.h"
1407 "src/align_status.h")
1408 (("#include <bam.h>") "#include <samtools/bam.h>")
1409 (("#include <sam.h>") "#include <samtools/sam.h>"))
1410 #t)))))
1411 (inputs
1412 `(("boost" ,boost)
1413 ("bowtie" ,bowtie)
1414 ("samtools" ,samtools-0.1)
1415 ("ncurses" ,ncurses)
1416 ("python" ,python-2)
1417 ("perl" ,perl)
1418 ("zlib" ,zlib)
1419 ("seqan" ,seqan-1)))
1420 (home-page "http://ccb.jhu.edu/software/tophat/index.shtml")
1421 (synopsis "Spliced read mapper for RNA-Seq data")
1422 (description
1423 "TopHat is a fast splice junction mapper for nucleotide sequence
1424 reads produced by the RNA-Seq method. It aligns RNA-Seq reads to
1425 mammalian-sized genomes using the ultra high-throughput short read
1426 aligner Bowtie, and then analyzes the mapping results to identify
1427 splice junctions between exons.")
1428 ;; TopHat is released under the Boost Software License, Version 1.0
1429 ;; See https://github.com/infphilo/tophat/issues/11#issuecomment-121589893
1430 (license license:boost1.0)))
1431
1432 (define-public bwa
1433 (package
1434 (name "bwa")
1435 (version "0.7.17")
1436 (source (origin
1437 (method url-fetch)
1438 (uri (string-append
1439 "https://github.com/lh3/bwa/releases/download/v"
1440 version "/bwa-" version ".tar.bz2"))
1441 (sha256
1442 (base32
1443 "1zfhv2zg9v1icdlq4p9ssc8k01mca5d1bd87w71py2swfi74s6yy"))))
1444 (build-system gnu-build-system)
1445 (arguments
1446 '(#:tests? #f ;no "check" target
1447 #:phases
1448 (modify-phases %standard-phases
1449 (replace 'install
1450 (lambda* (#:key outputs #:allow-other-keys)
1451 (let ((bin (string-append
1452 (assoc-ref outputs "out") "/bin"))
1453 (doc (string-append
1454 (assoc-ref outputs "out") "/share/doc/bwa"))
1455 (man (string-append
1456 (assoc-ref outputs "out") "/share/man/man1")))
1457 (install-file "bwa" bin)
1458 (install-file "README.md" doc)
1459 (install-file "bwa.1" man))
1460 #t))
1461 ;; no "configure" script
1462 (delete 'configure))))
1463 (inputs `(("zlib" ,zlib)))
1464 ;; Non-portable SSE instructions are used so building fails on platforms
1465 ;; other than x86_64.
1466 (supported-systems '("x86_64-linux"))
1467 (home-page "http://bio-bwa.sourceforge.net/")
1468 (synopsis "Burrows-Wheeler sequence aligner")
1469 (description
1470 "BWA is a software package for mapping low-divergent sequences against a
1471 large reference genome, such as the human genome. It consists of three
1472 algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
1473 designed for Illumina sequence reads up to 100bp, while the rest two for
1474 longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
1475 features such as long-read support and split alignment, but BWA-MEM, which is
1476 the latest, is generally recommended for high-quality queries as it is faster
1477 and more accurate. BWA-MEM also has better performance than BWA-backtrack for
1478 70-100bp Illumina reads.")
1479 (license license:gpl3+)))
1480
1481 (define-public bwa-pssm
1482 (package (inherit bwa)
1483 (name "bwa-pssm")
1484 (version "0.5.11")
1485 (source (origin
1486 (method git-fetch)
1487 (uri (git-reference
1488 (url "https://github.com/pkerpedjiev/bwa-pssm.git")
1489 (commit version)))
1490 (file-name (git-file-name name version))
1491 (sha256
1492 (base32
1493 "076c4q0cdqz8jgylb067y9zmvxglppnzi3qiscn0xiypgc6lgb5r"))))
1494 (build-system gnu-build-system)
1495 (inputs
1496 `(("gdsl" ,gdsl)
1497 ("zlib" ,zlib)
1498 ("perl" ,perl)))
1499 (home-page "http://bwa-pssm.binf.ku.dk/")
1500 (synopsis "Burrows-Wheeler transform-based probabilistic short read mapper")
1501 (description
1502 "BWA-PSSM is a probabilistic short genomic sequence read aligner based on
1503 the use of @dfn{position specific scoring matrices} (PSSM). Like many of the
1504 existing aligners it is fast and sensitive. Unlike most other aligners,
1505 however, it is also adaptible in the sense that one can direct the alignment
1506 based on known biases within the data set. It is coded as a modification of
1507 the original BWA alignment program and shares the genome index structure as
1508 well as many of the command line options.")
1509 (license license:gpl3+)))
1510
1511 (define-public python-bx-python
1512 (package
1513 (name "python-bx-python")
1514 (version "0.8.2")
1515 (source (origin
1516 (method url-fetch)
1517 (uri (pypi-uri "bx-python" version))
1518 (sha256
1519 (base32
1520 "11kksg2rbzihpmcid823xvg42xi88m7sz58rzk29abybkxy0rszs"))))
1521 (build-system python-build-system)
1522 ;; Tests fail because test data are not included
1523 (arguments '(#:tests? #f))
1524 (propagated-inputs
1525 `(("python-numpy" ,python-numpy)
1526 ("python-six" ,python-six)))
1527 (inputs
1528 `(("zlib" ,zlib)))
1529 (native-inputs
1530 `(("python-lzo" ,python-lzo)
1531 ("python-nose" ,python-nose)
1532 ("python-cython" ,python-cython)))
1533 (home-page "https://github.com/bxlab/bx-python")
1534 (synopsis "Tools for manipulating biological data")
1535 (description
1536 "bx-python provides tools for manipulating biological data, particularly
1537 multiple sequence alignments.")
1538 (license license:expat)))
1539
1540 (define-public python2-bx-python
1541 (package-with-python2 python-bx-python))
1542
1543 (define-public python-pysam
1544 (package
1545 (name "python-pysam")
1546 (version "0.15.1")
1547 (source (origin
1548 (method git-fetch)
1549 ;; Test data is missing on PyPi.
1550 (uri (git-reference
1551 (url "https://github.com/pysam-developers/pysam.git")
1552 (commit (string-append "v" version))))
1553 (file-name (git-file-name name version))
1554 (sha256
1555 (base32
1556 "1vj367w6xbn9bpmksm162l1aipf7cj97h1q83y7jcpm33ihwpf7x"))
1557 (modules '((guix build utils)))
1558 (snippet '(begin
1559 ;; Drop bundled htslib. TODO: Also remove samtools
1560 ;; and bcftools.
1561 (delete-file-recursively "htslib")
1562 #t))))
1563 (build-system python-build-system)
1564 (arguments
1565 `(#:modules ((ice-9 ftw)
1566 (srfi srfi-26)
1567 (guix build python-build-system)
1568 (guix build utils))
1569 #:phases
1570 (modify-phases %standard-phases
1571 (add-before 'build 'set-flags
1572 (lambda* (#:key inputs #:allow-other-keys)
1573 (setenv "HTSLIB_MODE" "external")
1574 (setenv "HTSLIB_LIBRARY_DIR"
1575 (string-append (assoc-ref inputs "htslib") "/lib"))
1576 (setenv "HTSLIB_INCLUDE_DIR"
1577 (string-append (assoc-ref inputs "htslib") "/include"))
1578 (setenv "LDFLAGS" "-lncurses")
1579 (setenv "CFLAGS" "-D_CURSES_LIB=1")
1580 #t))
1581 (replace 'check
1582 (lambda* (#:key inputs outputs #:allow-other-keys)
1583 ;; This file contains tests that require a connection to the
1584 ;; internet.
1585 (delete-file "tests/tabix_test.py")
1586 ;; FIXME: This test fails
1587 (delete-file "tests/AlignmentFile_test.py")
1588 ;; Add first subdirectory of "build" directory to PYTHONPATH.
1589 (setenv "PYTHONPATH"
1590 (string-append
1591 (getenv "PYTHONPATH")
1592 ":" (getcwd) "/build/"
1593 (car (scandir "build"
1594 (negate (cut string-prefix? "." <>))))))
1595 ;; Step out of source dir so python does not import from CWD.
1596 (with-directory-excursion "tests"
1597 (setenv "HOME" "/tmp")
1598 (invoke "make" "-C" "pysam_data")
1599 (invoke "make" "-C" "cbcf_data")
1600 ;; Running nosetests without explicitly asking for a single
1601 ;; process leads to a crash. Running with multiple processes
1602 ;; fails because the tests are not designed to run in parallel.
1603
1604 ;; FIXME: tests keep timing out on some systems.
1605 (invoke "nosetests" "-v" "--processes" "1")))))))
1606 (propagated-inputs
1607 `(("htslib" ,htslib))) ; Included from installed header files.
1608 (inputs
1609 `(("ncurses" ,ncurses)
1610 ("curl" ,curl)
1611 ("zlib" ,zlib)))
1612 (native-inputs
1613 `(("python-cython" ,python-cython)
1614 ;; Dependencies below are are for tests only.
1615 ("samtools" ,samtools)
1616 ("bcftools" ,bcftools)
1617 ("python-nose" ,python-nose)))
1618 (home-page "https://github.com/pysam-developers/pysam")
1619 (synopsis "Python bindings to the SAMtools C API")
1620 (description
1621 "Pysam is a Python module for reading and manipulating files in the
1622 SAM/BAM format. Pysam is a lightweight wrapper of the SAMtools C API. It
1623 also includes an interface for tabix.")
1624 (license license:expat)))
1625
1626 (define-public python2-pysam
1627 (package-with-python2 python-pysam))
1628
1629 (define-public python-twobitreader
1630 (package
1631 (name "python-twobitreader")
1632 (version "3.1.6")
1633 (source (origin
1634 (method git-fetch)
1635 (uri (git-reference
1636 (url "https://github.com/benjschiller/twobitreader")
1637 (commit version)))
1638 (file-name (git-file-name name version))
1639 (sha256
1640 (base32
1641 "1qbxvv1h58cismbk1anpjrkpghsaiy64a11ir3lhy6qch6xf8n62"))))
1642 (build-system python-build-system)
1643 ;; Tests are not included
1644 (arguments '(#:tests? #f))
1645 (native-inputs
1646 `(("python-sphinx" ,python-sphinx)))
1647 (home-page "https://github.com/benjschiller/twobitreader")
1648 (synopsis "Python library for reading .2bit files")
1649 (description
1650 "twobitreader is a Python library for reading .2bit files as used by the
1651 UCSC genome browser.")
1652 (license license:artistic2.0)))
1653
1654 (define-public python2-twobitreader
1655 (package-with-python2 python-twobitreader))
1656
1657 (define-public python-plastid
1658 (package
1659 (name "python-plastid")
1660 (version "0.4.8")
1661 (source (origin
1662 (method url-fetch)
1663 (uri (pypi-uri "plastid" version))
1664 (sha256
1665 (base32
1666 "0l24dd3q66if8yj042m4s0g95n6acn7im1imqd3p6h8ns43kxhj8"))))
1667 (build-system python-build-system)
1668 (arguments
1669 ;; Some test files are not included.
1670 `(#:tests? #f))
1671 (propagated-inputs
1672 `(("python-numpy" ,python-numpy)
1673 ("python-scipy" ,python-scipy)
1674 ("python-pandas" ,python-pandas)
1675 ("python-pysam" ,python-pysam)
1676 ("python-matplotlib" ,python-matplotlib)
1677 ("python-biopython" ,python-biopython)
1678 ("python-twobitreader" ,python-twobitreader)
1679 ("python-termcolor" ,python-termcolor)))
1680 (native-inputs
1681 `(("python-cython" ,python-cython)
1682 ("python-nose" ,python-nose)))
1683 (home-page "https://github.com/joshuagryphon/plastid")
1684 (synopsis "Python library for genomic analysis")
1685 (description
1686 "plastid is a Python library for genomic analysis – in particular,
1687 high-throughput sequencing data – with an emphasis on simplicity.")
1688 (license license:bsd-3)))
1689
1690 (define-public python2-plastid
1691 (package-with-python2 python-plastid))
1692
1693 (define-public cd-hit
1694 (package
1695 (name "cd-hit")
1696 (version "4.6.8")
1697 (source (origin
1698 (method url-fetch)
1699 (uri (string-append "https://github.com/weizhongli/cdhit"
1700 "/releases/download/V" version
1701 "/cd-hit-v" version
1702 "-2017-0621-source.tar.gz"))
1703 (sha256
1704 (base32
1705 "1b4mwm2520ixjbw57sil20f9iixzw4bkdqqwgg1fc3pzm6rz4zmn"))))
1706 (build-system gnu-build-system)
1707 (arguments
1708 `(#:tests? #f ; there are no tests
1709 #:make-flags
1710 ;; Executables are copied directly to the PREFIX.
1711 (list (string-append "PREFIX=" (assoc-ref %outputs "out") "/bin"))
1712 #:phases
1713 (modify-phases %standard-phases
1714 ;; No "configure" script
1715 (delete 'configure)
1716 ;; Remove sources of non-determinism
1717 (add-after 'unpack 'be-timeless
1718 (lambda _
1719 (substitute* "cdhit-utility.c++"
1720 ((" \\(built on \" __DATE__ \"\\)") ""))
1721 (substitute* "cdhit-common.c++"
1722 (("__DATE__") "\"0\"")
1723 (("\", %s, \" __TIME__ \"\\\\n\", date") ""))
1724 #t))
1725 ;; The "install" target does not create the target directory.
1726 (add-before 'install 'create-target-dir
1727 (lambda* (#:key outputs #:allow-other-keys)
1728 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
1729 #t)))))
1730 (inputs
1731 `(("perl" ,perl)))
1732 (home-page "http://weizhongli-lab.org/cd-hit/")
1733 (synopsis "Cluster and compare protein or nucleotide sequences")
1734 (description
1735 "CD-HIT is a program for clustering and comparing protein or nucleotide
1736 sequences. CD-HIT is designed to be fast and handle extremely large
1737 databases.")
1738 ;; The manual says: "It can be copied under the GNU General Public License
1739 ;; version 2 (GPLv2)."
1740 (license license:gpl2)))
1741
1742 (define-public clipper
1743 (package
1744 (name "clipper")
1745 (version "1.2.1")
1746 (source (origin
1747 (method git-fetch)
1748 (uri (git-reference
1749 (url "https://github.com/YeoLab/clipper.git")
1750 (commit version)))
1751 (file-name (git-file-name name version))
1752 (sha256
1753 (base32
1754 "0fja1rj84wp9vpj8rxpj3n8zqzcqq454m904yp9as1w4phccirjb"))
1755 (modules '((guix build utils)))
1756 (snippet
1757 '(begin
1758 ;; remove unnecessary setup dependency
1759 (substitute* "setup.py"
1760 (("setup_requires = .*") ""))
1761 #t))))
1762 (build-system python-build-system)
1763 (arguments
1764 `(#:python ,python-2 ; only Python 2 is supported
1765 #:phases
1766 (modify-phases %standard-phases
1767 ;; This is fixed in upstream commit
1768 ;; f6c2990198f906bf97730d95695b4bd5a6d01ddb.
1769 (add-after 'unpack 'fix-typo
1770 (lambda _
1771 (substitute* "clipper/src/readsToWiggle.pyx"
1772 (("^sc.*") ""))
1773 #t)))))
1774 (inputs
1775 `(("htseq" ,python2-htseq)
1776 ("python-pybedtools" ,python2-pybedtools)
1777 ("python-cython" ,python2-cython)
1778 ("python-scikit-learn" ,python2-scikit-learn)
1779 ("python-matplotlib" ,python2-matplotlib)
1780 ("python-pandas" ,python2-pandas)
1781 ("python-pysam" ,python2-pysam)
1782 ("python-numpy" ,python2-numpy)
1783 ("python-scipy" ,python2-scipy)))
1784 (native-inputs
1785 `(("python-mock" ,python2-mock) ; for tests
1786 ("python-nose" ,python2-nose) ; for tests
1787 ("python-pytz" ,python2-pytz))) ; for tests
1788 (home-page "https://github.com/YeoLab/clipper")
1789 (synopsis "CLIP peak enrichment recognition")
1790 (description
1791 "CLIPper is a tool to define peaks in CLIP-seq datasets.")
1792 (license license:gpl2)))
1793
1794 (define-public codingquarry
1795 (package
1796 (name "codingquarry")
1797 (version "2.0")
1798 (source (origin
1799 (method url-fetch)
1800 (uri (string-append
1801 "mirror://sourceforge/codingquarry/CodingQuarry_v"
1802 version ".tar.gz"))
1803 (sha256
1804 (base32
1805 "0115hkjflsnfzn36xppwf9h9avfxlavr43djqmshkkzbgjzsz60i"))))
1806 (build-system gnu-build-system)
1807 (arguments
1808 '(#:tests? #f ; no "check" target
1809 #:phases
1810 (modify-phases %standard-phases
1811 (delete 'configure)
1812 (replace 'install
1813 (lambda* (#:key outputs #:allow-other-keys)
1814 (let* ((out (assoc-ref outputs "out"))
1815 (bin (string-append out "/bin"))
1816 (doc (string-append out "/share/doc/codingquarry")))
1817 (install-file "INSTRUCTIONS.pdf" doc)
1818 (copy-recursively "QuarryFiles"
1819 (string-append out "/QuarryFiles"))
1820 (install-file "CodingQuarry" bin)
1821 (install-file "CufflinksGTF_to_CodingQuarryGFF3.py" bin))
1822 #t)))))
1823 (inputs `(("openmpi" ,openmpi)))
1824 (native-search-paths
1825 (list (search-path-specification
1826 (variable "QUARRY_PATH")
1827 (files '("QuarryFiles")))))
1828 (native-inputs `(("python" ,python-2))) ; Only Python 2 is supported
1829 (synopsis "Fungal gene predictor")
1830 (description "CodingQuarry is a highly accurate, self-training GHMM fungal
1831 gene predictor designed to work with assembled, aligned RNA-seq transcripts.")
1832 (home-page "https://sourceforge.net/projects/codingquarry/")
1833 (license license:gpl3+)))
1834
1835 (define-public couger
1836 (package
1837 (name "couger")
1838 (version "1.8.2")
1839 (source (origin
1840 (method url-fetch)
1841 (uri (string-append
1842 "http://couger.oit.duke.edu/static/assets/COUGER"
1843 version ".zip"))
1844 (sha256
1845 (base32
1846 "04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq"))))
1847 (build-system gnu-build-system)
1848 (arguments
1849 `(#:tests? #f
1850 #:phases
1851 (modify-phases %standard-phases
1852 (delete 'configure)
1853 (delete 'build)
1854 (replace
1855 'install
1856 (lambda* (#:key outputs #:allow-other-keys)
1857 (let* ((out (assoc-ref outputs "out"))
1858 (bin (string-append out "/bin")))
1859 (copy-recursively "src" (string-append out "/src"))
1860 (mkdir bin)
1861 ;; Add "src" directory to module lookup path.
1862 (substitute* "couger"
1863 (("from argparse")
1864 (string-append "import sys\nsys.path.append(\""
1865 out "\")\nfrom argparse")))
1866 (install-file "couger" bin))
1867 #t))
1868 (add-after
1869 'install 'wrap-program
1870 (lambda* (#:key inputs outputs #:allow-other-keys)
1871 ;; Make sure 'couger' runs with the correct PYTHONPATH.
1872 (let* ((out (assoc-ref outputs "out"))
1873 (path (getenv "PYTHONPATH")))
1874 (wrap-program (string-append out "/bin/couger")
1875 `("PYTHONPATH" ":" prefix (,path))))
1876 #t)))))
1877 (inputs
1878 `(("python" ,python-2)
1879 ("python2-pillow" ,python2-pillow)
1880 ("python2-numpy" ,python2-numpy)
1881 ("python2-scipy" ,python2-scipy)
1882 ("python2-matplotlib" ,python2-matplotlib)))
1883 (propagated-inputs
1884 `(("r-minimal" ,r-minimal)
1885 ("libsvm" ,libsvm)
1886 ("randomjungle" ,randomjungle)))
1887 (native-inputs
1888 `(("unzip" ,unzip)))
1889 (home-page "http://couger.oit.duke.edu")
1890 (synopsis "Identify co-factors in sets of genomic regions")
1891 (description
1892 "COUGER can be applied to any two sets of genomic regions bound by
1893 paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify
1894 putative co-factors that provide specificity to each TF. The framework
1895 determines the genomic targets uniquely-bound by each TF, and identifies a
1896 small set of co-factors that best explain the in vivo binding differences
1897 between the two TFs.
1898
1899 COUGER uses classification algorithms (support vector machines and random
1900 forests) with features that reflect the DNA binding specificities of putative
1901 co-factors. The features are generated either from high-throughput TF-DNA
1902 binding data (from protein binding microarray experiments), or from large
1903 collections of DNA motifs.")
1904 (license license:gpl3+)))
1905
1906 (define-public clustal-omega
1907 (package
1908 (name "clustal-omega")
1909 (version "1.2.4")
1910 (source (origin
1911 (method url-fetch)
1912 (uri (string-append "http://www.clustal.org/omega/clustal-omega-"
1913 version ".tar.gz"))
1914 (sha256
1915 (base32
1916 "1vm30mzncwdv881vrcwg11vzvrsmwy4wg80j5i0lcfk6dlld50w6"))))
1917 (build-system gnu-build-system)
1918 (inputs
1919 `(("argtable" ,argtable)))
1920 (home-page "http://www.clustal.org/omega/")
1921 (synopsis "Multiple sequence aligner for protein and DNA/RNA")
1922 (description
1923 "Clustal-Omega is a general purpose multiple sequence alignment (MSA)
1924 program for protein and DNA/RNA. It produces high quality MSAs and is capable
1925 of handling data-sets of hundreds of thousands of sequences in reasonable
1926 time.")
1927 (license license:gpl2+)))
1928
1929 (define-public crossmap
1930 (package
1931 (name "crossmap")
1932 (version "0.2.9")
1933 (source (origin
1934 (method url-fetch)
1935 (uri (pypi-uri "CrossMap" version))
1936 (sha256
1937 (base32
1938 "1byhclrqnqpvc1rqkfh4jwj6yhn0x9y7jk47i0qcjlhk0pjkw92p"))))
1939 (build-system python-build-system)
1940 (arguments `(#:python ,python-2))
1941 (inputs
1942 `(("python-bx-python" ,python2-bx-python)
1943 ("python-numpy" ,python2-numpy)
1944 ("python-pysam" ,python2-pysam)
1945 ("zlib" ,zlib)))
1946 (native-inputs
1947 `(("python-cython" ,python2-cython)
1948 ("python-nose" ,python2-nose)))
1949 (home-page "http://crossmap.sourceforge.net/")
1950 (synopsis "Convert genome coordinates between assemblies")
1951 (description
1952 "CrossMap is a program for conversion of genome coordinates or annotation
1953 files between different genome assemblies. It supports most commonly used
1954 file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
1955 (license license:gpl2+)))
1956
1957 (define-public python-dnaio
1958 (package
1959 (name "python-dnaio")
1960 (version "0.3")
1961 (source
1962 (origin
1963 (method url-fetch)
1964 (uri (pypi-uri "dnaio" version))
1965 (sha256
1966 (base32
1967 "0f16m7hdlm0fz1n7y5asy0v9ghyrq17ni1p9iybq22ddzyd49r27"))))
1968 (build-system python-build-system)
1969 (native-inputs
1970 `(("python-cython" ,python-cython)
1971 ("python-pytest" ,python-pytest)
1972 ("python-xopen" ,python-xopen)))
1973 (home-page "https://github.com/marcelm/dnaio/")
1974 (synopsis "Read FASTA and FASTQ files efficiently")
1975 (description
1976 "dnaio is a Python library for fast parsing of FASTQ and also FASTA
1977 files. The code was previously part of the cutadapt tool.")
1978 (license license:expat)))
1979
1980 (define-public cutadapt
1981 (package
1982 (name "cutadapt")
1983 (version "1.18")
1984 (source (origin
1985 (method git-fetch)
1986 (uri (git-reference
1987 (url "https://github.com/marcelm/cutadapt.git")
1988 (commit (string-append "v" version))))
1989 (file-name (git-file-name name version))
1990 (sha256
1991 (base32
1992 "08bbfwyc0kvcd95jf2s95xiv9s3cbsxm39ydl0qck3fw3cviwxpg"))))
1993 (build-system python-build-system)
1994 (inputs
1995 `(("python-dnaio" ,python-dnaio)
1996 ("python-xopen" ,python-xopen)))
1997 (native-inputs
1998 `(("python-cython" ,python-cython)
1999 ("python-pytest" ,python-pytest)))
2000 (home-page "https://cutadapt.readthedocs.io/en/stable/")
2001 (synopsis "Remove adapter sequences from nucleotide sequencing reads")
2002 (description
2003 "Cutadapt finds and removes adapter sequences, primers, poly-A tails and
2004 other types of unwanted sequence from high-throughput sequencing reads.")
2005 (license license:expat)))
2006
2007 (define-public libbigwig
2008 (package
2009 (name "libbigwig")
2010 (version "0.4.2")
2011 (source (origin
2012 (method git-fetch)
2013 (uri (git-reference
2014 (url "https://github.com/dpryan79/libBigWig.git")
2015 (commit version)))
2016 (file-name (string-append name "-" version "-checkout"))
2017 (sha256
2018 (base32
2019 "0h2smg24v5srdcqzrmz2g23cmlp4va465mgx8r2z571sfz8pv454"))))
2020 (build-system gnu-build-system)
2021 (arguments
2022 `(#:test-target "test"
2023 #:tests? #f ; tests require access to the web
2024 #:make-flags
2025 (list "CC=gcc"
2026 (string-append "prefix=" (assoc-ref %outputs "out")))
2027 #:phases
2028 (modify-phases %standard-phases
2029 (delete 'configure))))
2030 (inputs
2031 `(("zlib" ,zlib)
2032 ("curl" ,curl)))
2033 (native-inputs
2034 `(("doxygen" ,doxygen)
2035 ;; Need for tests
2036 ("python" ,python-2)))
2037 (home-page "https://github.com/dpryan79/libBigWig")
2038 (synopsis "C library for handling bigWig files")
2039 (description
2040 "This package provides a C library for parsing local and remote BigWig
2041 files.")
2042 (license license:expat)))
2043
2044 (define-public python-pybigwig
2045 (package
2046 (name "python-pybigwig")
2047 (version "0.3.12")
2048 (source (origin
2049 (method url-fetch)
2050 (uri (pypi-uri "pyBigWig" version))
2051 (sha256
2052 (base32
2053 "00w4kfnm2c5l7wdwr2nj1z5djv8kzgf7h1zhsgv6njff1rwr26g0"))
2054 (modules '((guix build utils)))
2055 (snippet
2056 '(begin
2057 ;; Delete bundled libBigWig sources
2058 (delete-file-recursively "libBigWig")
2059 #t))))
2060 (build-system python-build-system)
2061 (arguments
2062 `(#:phases
2063 (modify-phases %standard-phases
2064 (add-after 'unpack 'link-with-libBigWig
2065 (lambda* (#:key inputs #:allow-other-keys)
2066 (substitute* "setup.py"
2067 (("libs=\\[") "libs=[\"BigWig\", "))
2068 #t)))))
2069 (propagated-inputs
2070 `(("python-numpy" ,python-numpy)))
2071 (inputs
2072 `(("libbigwig" ,libbigwig)
2073 ("zlib" ,zlib)
2074 ("curl" ,curl)))
2075 (home-page "https://github.com/dpryan79/pyBigWig")
2076 (synopsis "Access bigWig files in Python using libBigWig")
2077 (description
2078 "This package provides Python bindings to the libBigWig library for
2079 accessing bigWig files.")
2080 (license license:expat)))
2081
2082 (define-public python2-pybigwig
2083 (package-with-python2 python-pybigwig))
2084
2085 (define-public python-dendropy
2086 (package
2087 (name "python-dendropy")
2088 (version "4.4.0")
2089 (source
2090 (origin
2091 (method git-fetch)
2092 ;; Source from GitHub so that tests are included.
2093 (uri (git-reference
2094 (url "https://github.com/jeetsukumaran/DendroPy.git")
2095 (commit (string-append "v" version))))
2096 (file-name (git-file-name name version))
2097 (sha256
2098 (base32
2099 "097hfyv2kaf4x92i4rjx0paw2cncxap48qivv8zxng4z7nhid0x9"))))
2100 (build-system python-build-system)
2101 (home-page "http://packages.python.org/DendroPy/")
2102 (synopsis "Library for phylogenetics and phylogenetic computing")
2103 (description
2104 "DendroPy is a library for phylogenetics and phylogenetic computing: reading,
2105 writing, simulation, processing and manipulation of phylogenetic
2106 trees (phylogenies) and characters.")
2107 (license license:bsd-3)))
2108
2109 (define-public python2-dendropy
2110 (let ((base (package-with-python2 python-dendropy)))
2111 (package
2112 (inherit base)
2113 (arguments
2114 `(#:phases
2115 (modify-phases %standard-phases
2116 (add-after 'unpack 'remove-failing-test
2117 (lambda _
2118 ;; This test fails when the full test suite is run, as documented
2119 ;; at https://github.com/jeetsukumaran/DendroPy/issues/74
2120 (substitute* "tests/test_dataio_nexml_reader_tree_list.py"
2121 (("test_collection_comments_and_annotations")
2122 "do_not_test_collection_comments_and_annotations"))
2123 #t)))
2124 ,@(package-arguments base))))))
2125
2126 (define-public python-py2bit
2127 (package
2128 (name "python-py2bit")
2129 (version "0.3.0")
2130 (source
2131 (origin
2132 (method url-fetch)
2133 (uri (pypi-uri "py2bit" version))
2134 (sha256
2135 (base32
2136 "1vw2nvw1yrl7ikkqsqs1pg239yr5nspvd969r1x9arms1k25a1a5"))))
2137 (build-system python-build-system)
2138 (home-page "https://github.com/dpryan79/py2bit")
2139 (synopsis "Access 2bit files using lib2bit")
2140 (description
2141 "This package provides Python bindings for lib2bit to access 2bit files
2142 with Python.")
2143 (license license:expat)))
2144
2145 (define-public deeptools
2146 (package
2147 (name "deeptools")
2148 (version "3.1.3")
2149 (source (origin
2150 (method git-fetch)
2151 (uri (git-reference
2152 (url "https://github.com/deeptools/deepTools.git")
2153 (commit version)))
2154 (file-name (git-file-name name version))
2155 (sha256
2156 (base32
2157 "1vggnf52g6q2vifdl4cyi7s2fnfqq0ky2zrkj5zv2qfzsc3p3siw"))))
2158 (build-system python-build-system)
2159 (arguments
2160 `(#:phases
2161 (modify-phases %standard-phases
2162 ;; This phase fails, but it's not needed.
2163 (delete 'reset-gzip-timestamps))))
2164 (inputs
2165 `(("python-plotly" ,python-plotly)
2166 ("python-scipy" ,python-scipy)
2167 ("python-numpy" ,python-numpy)
2168 ("python-numpydoc" ,python-numpydoc)
2169 ("python-matplotlib" ,python-matplotlib)
2170 ("python-pysam" ,python-pysam)
2171 ("python-py2bit" ,python-py2bit)
2172 ("python-pybigwig" ,python-pybigwig)))
2173 (native-inputs
2174 `(("python-mock" ,python-mock) ;for tests
2175 ("python-nose" ,python-nose) ;for tests
2176 ("python-pytz" ,python-pytz))) ;for tests
2177 (home-page "https://github.com/deeptools/deepTools")
2178 (synopsis "Tools for normalizing and visualizing deep-sequencing data")
2179 (description
2180 "DeepTools addresses the challenge of handling the large amounts of data
2181 that are now routinely generated from DNA sequencing centers. To do so,
2182 deepTools contains useful modules to process the mapped reads data to create
2183 coverage files in standard bedGraph and bigWig file formats. By doing so,
2184 deepTools allows the creation of normalized coverage files or the comparison
2185 between two files (for example, treatment and control). Finally, using such
2186 normalized and standardized files, multiple visualizations can be created to
2187 identify enrichments with functional annotations of the genome.")
2188 (license license:gpl3+)))
2189
2190 (define-public delly
2191 (package
2192 (name "delly")
2193 (version "0.7.9")
2194 (source (origin
2195 (method git-fetch)
2196 (uri (git-reference
2197 (url "https://github.com/dellytools/delly.git")
2198 (commit (string-append "v" version))))
2199 (file-name (git-file-name name version))
2200 (sha256
2201 (base32 "034jqsxswy9gqdh2zkgc1js99qkv75ks4xvzgmh0284sraagv61z"))
2202 (modules '((guix build utils)))
2203 (snippet
2204 '(begin
2205 (delete-file-recursively "src/htslib")
2206 #t))))
2207 (build-system gnu-build-system)
2208 (arguments
2209 `(#:tests? #f ; There are no tests to run.
2210 #:make-flags
2211 (list "PARALLEL=1" ; Allow parallel execution at run-time.
2212 (string-append "prefix=" (assoc-ref %outputs "out")))
2213 #:phases
2214 (modify-phases %standard-phases
2215 (delete 'configure) ; There is no configure phase.
2216 (add-after 'install 'install-templates
2217 (lambda* (#:key outputs #:allow-other-keys)
2218 (let ((templates (string-append (assoc-ref outputs "out")
2219 "/share/delly/templates")))
2220 (mkdir-p templates)
2221 (copy-recursively "excludeTemplates" templates)
2222 #t))))))
2223 (inputs
2224 `(("boost" ,boost)
2225 ("htslib" ,htslib)
2226 ("zlib" ,zlib)
2227 ("bzip2" ,bzip2)))
2228 (home-page "https://github.com/dellytools/delly")
2229 (synopsis "Integrated structural variant prediction method")
2230 (description "Delly is an integrated structural variant prediction method
2231 that can discover and genotype deletions, tandem duplications, inversions and
2232 translocations at single-nucleotide resolution in short-read massively parallel
2233 sequencing data. It uses paired-ends and split-reads to sensitively and
2234 accurately delineate genomic rearrangements throughout the genome.")
2235 (license license:gpl3+)))
2236
2237 (define-public diamond
2238 (package
2239 (name "diamond")
2240 (version "0.9.22")
2241 (source (origin
2242 (method git-fetch)
2243 (uri (git-reference
2244 (url "https://github.com/bbuchfink/diamond.git")
2245 (commit (string-append "v" version))))
2246 (file-name (git-file-name name version))
2247 (sha256
2248 (base32
2249 "0bky78v79g3wmdpsd706cscckgw1v09fg8vdd0z8z0d5b97aj9zl"))))
2250 (build-system cmake-build-system)
2251 (arguments
2252 '(#:tests? #f ; no "check" target
2253 #:phases
2254 (modify-phases %standard-phases
2255 (add-after 'unpack 'remove-native-compilation
2256 (lambda _
2257 (substitute* "CMakeLists.txt" (("-march=native") ""))
2258 #t)))))
2259 (inputs
2260 `(("zlib" ,zlib)))
2261 (home-page "https://github.com/bbuchfink/diamond")
2262 (synopsis "Accelerated BLAST compatible local sequence aligner")
2263 (description
2264 "DIAMOND is a BLAST-compatible local aligner for mapping protein and
2265 translated DNA query sequences against a protein reference database (BLASTP
2266 and BLASTX alignment mode). The speedup over BLAST is up to 20,000 on short
2267 reads at a typical sensitivity of 90-99% relative to BLAST depending on the
2268 data and settings.")
2269 (license license:agpl3+)))
2270
2271 (define-public discrover
2272 (package
2273 (name "discrover")
2274 (version "1.6.0")
2275 (source
2276 (origin
2277 (method git-fetch)
2278 (uri (git-reference
2279 (url "https://github.com/maaskola/discrover.git")
2280 (commit version)))
2281 (file-name (git-file-name name version))
2282 (sha256
2283 (base32
2284 "173fwi2vb6a5kp406hm3jj6j7v4whww796f2qcygp4rpvamh307y"))))
2285 (build-system cmake-build-system)
2286 (arguments
2287 `(#:tests? #f ; there are no tests
2288 #:phases
2289 (modify-phases %standard-phases
2290 (add-after 'unpack 'add-missing-includes
2291 (lambda _
2292 (substitute* "src/executioninformation.hpp"
2293 (("#define EXECUTIONINFORMATION_HPP" line)
2294 (string-append line "\n#include <random>")))
2295 (substitute* "src/plasma/fasta.hpp"
2296 (("#define FASTA_HPP" line)
2297 (string-append line "\n#include <random>")))
2298 #t)))))
2299 (inputs
2300 `(("boost" ,boost)
2301 ("cairo" ,cairo)))
2302 (native-inputs
2303 `(("texlive" ,texlive)
2304 ;; TODO: Replace texlive with minimal texlive-union.
2305 ;; ("texlive" ,(texlive-union (list texlive-latex-doi
2306 ;; texlive-latex-hyperref
2307 ;; texlive-latex-oberdiek
2308 ;; texlive-generic-ifxetex
2309 ;; texlive-latex-url
2310 ;; texlive-latex-pgf
2311 ;; texlive-latex-examplep
2312 ;; texlive-latex-natbib
2313 ;; texlive-latex-verbatimbox
2314 ;; texlive-latex-ms
2315 ;; texlive-latex-xcolor
2316 ;; texlive-fonts-amsfonts
2317 ;; texlive-latex-amsfonts
2318 ;; ;; ...
2319 ;; )))
2320 ("imagemagick" ,imagemagick)))
2321 (home-page "http://dorina.mdc-berlin.de/public/rajewsky/discrover/")
2322 (synopsis "Discover discriminative nucleotide sequence motifs")
2323 (description "Discrover is a motif discovery method to find binding sites
2324 of nucleic acid binding proteins.")
2325 (license license:gpl3+)))
2326
2327 (define-public eigensoft
2328 (package
2329 (name "eigensoft")
2330 (version "7.2.1")
2331 (source
2332 (origin
2333 (method git-fetch)
2334 (uri (git-reference
2335 (url "https://github.com/DReichLab/EIG.git")
2336 (commit (string-append "v" version))))
2337 (file-name (git-file-name name version))
2338 (sha256
2339 (base32
2340 "1c141fqvhnzibmnf22sv23vbmzm20kjjyrib44cfh75wyndp2d9k"))
2341 (modules '((guix build utils)))
2342 ;; Remove pre-built binaries.
2343 (snippet '(begin
2344 (delete-file-recursively "bin")
2345 (mkdir "bin")
2346 #t))))
2347 (build-system gnu-build-system)
2348 (arguments
2349 `(#:tests? #f ; There are no tests.
2350 #:make-flags '("CC=gcc")
2351 #:phases
2352 (modify-phases %standard-phases
2353 ;; There is no configure phase, but the Makefile is in a
2354 ;; sub-directory.
2355 (replace 'configure
2356 (lambda _ (chdir "src") #t))
2357 ;; The provided install target only copies executables to
2358 ;; the "bin" directory in the build root.
2359 (add-after 'install 'actually-install
2360 (lambda* (#:key outputs #:allow-other-keys)
2361 (let* ((out (assoc-ref outputs "out"))
2362 (bin (string-append out "/bin")))
2363 (for-each (lambda (file)
2364 (install-file file bin))
2365 (find-files "../bin" ".*"))
2366 #t))))))
2367 (inputs
2368 `(("gsl" ,gsl)
2369 ("lapack" ,lapack)
2370 ("openblas" ,openblas)
2371 ("perl" ,perl)
2372 ("gfortran" ,gfortran "lib")))
2373 (home-page "https://github.com/DReichLab/EIG")
2374 (synopsis "Tools for population genetics")
2375 (description "The EIGENSOFT package provides tools for population
2376 genetics and stratification correction. EIGENSOFT implements methods commonly
2377 used in population genetics analyses such as PCA, computation of Tracy-Widom
2378 statistics, and finding related individuals in structured populations. It
2379 comes with a built-in plotting script and supports multiple file formats and
2380 quantitative phenotypes.")
2381 ;; The license of the eigensoft tools is Expat, but since it's
2382 ;; linking with the GNU Scientific Library (GSL) the effective
2383 ;; license is the GPL.
2384 (license license:gpl3+)))
2385
2386 (define-public edirect
2387 (package
2388 (name "edirect")
2389 (version "10.2.20181018")
2390 (source (origin
2391 (method url-fetch)
2392 (uri (string-append "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect"
2393 "/versions/" version
2394 "/edirect-" version ".tar.gz"))
2395 (sha256
2396 (base32
2397 "091f4aigzpbqih6h82nq566gkp3y07i72yqndmqskfgar1vwgci7"))))
2398 (build-system perl-build-system)
2399 (arguments
2400 `(#:phases
2401 (modify-phases %standard-phases
2402 (delete 'configure)
2403 (delete 'build)
2404 (delete 'check) ; simple check after install
2405 (replace 'install
2406 (lambda* (#:key outputs #:allow-other-keys)
2407 (install-file "edirect.pl"
2408 (string-append (assoc-ref outputs "out") "/bin"))
2409 #t))
2410 (add-after 'install 'wrap-program
2411 (lambda* (#:key outputs #:allow-other-keys)
2412 ;; Make sure 'edirect.pl' finds all perl inputs at runtime.
2413 (let* ((out (assoc-ref outputs "out"))
2414 (path (getenv "PERL5LIB")))
2415 (wrap-program (string-append out "/bin/edirect.pl")
2416 `("PERL5LIB" ":" prefix (,path))))
2417 #t))
2418 (add-after 'wrap-program 'check
2419 (lambda* (#:key outputs #:allow-other-keys)
2420 (invoke (string-append (assoc-ref outputs "out")
2421 "/bin/edirect.pl")
2422 "-filter" "-help")
2423 #t)))))
2424 (inputs
2425 `(("perl-html-parser" ,perl-html-parser)
2426 ("perl-encode-locale" ,perl-encode-locale)
2427 ("perl-file-listing" ,perl-file-listing)
2428 ("perl-html-tagset" ,perl-html-tagset)
2429 ("perl-html-tree" ,perl-html-tree)
2430 ("perl-http-cookies" ,perl-http-cookies)
2431 ("perl-http-date" ,perl-http-date)
2432 ("perl-http-message" ,perl-http-message)
2433 ("perl-http-negotiate" ,perl-http-negotiate)
2434 ("perl-lwp-mediatypes" ,perl-lwp-mediatypes)
2435 ("perl-lwp-protocol-https" ,perl-lwp-protocol-https)
2436 ("perl-net-http" ,perl-net-http)
2437 ("perl-uri" ,perl-uri)
2438 ("perl-www-robotrules" ,perl-www-robotrules)
2439 ("perl-xml-simple" ,perl-xml-simple)
2440 ("perl" ,perl)))
2441 (home-page "http://www.ncbi.nlm.nih.gov/books/NBK179288/")
2442 (synopsis "Tools for accessing the NCBI's set of databases")
2443 (description
2444 "Entrez Direct (EDirect) is a method for accessing the National Center
2445 for Biotechnology Information's (NCBI) set of interconnected
2446 databases (publication, sequence, structure, gene, variation, expression,
2447 etc.) from a terminal. Functions take search terms from command-line
2448 arguments. Individual operations are combined to build multi-step queries.
2449 Record retrieval and formatting normally complete the process.
2450
2451 EDirect also provides an argument-driven function that simplifies the
2452 extraction of data from document summaries or other results that are returned
2453 in structured XML format. This can eliminate the need for writing custom
2454 software to answer ad hoc questions.")
2455 (license license:public-domain)))
2456
2457 (define-public exonerate
2458 (package
2459 (name "exonerate")
2460 (version "2.4.0")
2461 (source
2462 (origin
2463 (method url-fetch)
2464 (uri
2465 (string-append
2466 "http://ftp.ebi.ac.uk/pub/software/vertebrategenomics/exonerate/"
2467 "exonerate-" version ".tar.gz"))
2468 (sha256
2469 (base32
2470 "0hj0m9xygiqsdxvbg79wq579kbrx1mdrabi2bzqz2zn9qwfjcjgq"))))
2471 (build-system gnu-build-system)
2472 (arguments
2473 `(#:parallel-build? #f)) ; Building in parallel fails on some machines.
2474 (native-inputs
2475 `(("pkg-config" ,pkg-config)))
2476 (inputs
2477 `(("glib" ,glib)))
2478 (home-page
2479 "https://www.ebi.ac.uk/about/vertebrate-genomics/software/exonerate")
2480 (synopsis "Generic tool for biological sequence alignment")
2481 (description
2482 "Exonerate is a generic tool for pairwise sequence comparison. It allows
2483 the alignment of sequences using a many alignment models, either exhaustive
2484 dynamic programming or a variety of heuristics.")
2485 (license license:gpl3)))
2486
2487 (define-public express
2488 (package
2489 (name "express")
2490 (version "1.5.1")
2491 (source (origin
2492 (method url-fetch)
2493 (uri
2494 (string-append
2495 "http://bio.math.berkeley.edu/eXpress/downloads/express-"
2496 version "/express-" version "-src.tgz"))
2497 (sha256
2498 (base32
2499 "03rczxd0gjp2l1jxcmjfmf5j94j77zqyxa6x063zsc585nj40n0c"))))
2500 (build-system cmake-build-system)
2501 (arguments
2502 `(#:tests? #f ;no "check" target
2503 #:phases
2504 (modify-phases %standard-phases
2505 (add-after 'unpack 'use-shared-boost-libs-and-set-bamtools-paths
2506 (lambda* (#:key inputs #:allow-other-keys)
2507 (substitute* "CMakeLists.txt"
2508 (("set\\(Boost_USE_STATIC_LIBS ON\\)")
2509 "set(Boost_USE_STATIC_LIBS OFF)")
2510 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
2511 (string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
2512 (substitute* "src/CMakeLists.txt"
2513 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
2514 (string-append (assoc-ref inputs "bamtools") "/lib"))
2515 (("libprotobuf.a") "libprotobuf.so"))
2516 #t)))))
2517 (inputs
2518 `(("boost" ,boost)
2519 ("bamtools" ,bamtools)
2520 ("protobuf" ,protobuf)
2521 ("zlib" ,zlib)))
2522 (home-page "http://bio.math.berkeley.edu/eXpress")
2523 (synopsis "Streaming quantification for high-throughput genomic sequencing")
2524 (description
2525 "eXpress is a streaming tool for quantifying the abundances of a set of
2526 target sequences from sampled subsequences. Example applications include
2527 transcript-level RNA-Seq quantification, allele-specific/haplotype expression
2528 analysis (from RNA-Seq), transcription factor binding quantification in
2529 ChIP-Seq, and analysis of metagenomic data.")
2530 (license license:artistic2.0)))
2531
2532 (define-public express-beta-diversity
2533 (package
2534 (name "express-beta-diversity")
2535 (version "1.0.8")
2536 (source (origin
2537 (method git-fetch)
2538 (uri (git-reference
2539 (url "https://github.com/dparks1134/ExpressBetaDiversity.git")
2540 (commit (string-append "v" version))))
2541 (file-name (git-file-name name version))
2542 (sha256
2543 (base32
2544 "0s0yzg5c21349rh7x4w9266jsvnp7j1hp9cf8sk32hz8nvrj745x"))))
2545 (build-system gnu-build-system)
2546 (arguments
2547 `(#:phases
2548 (modify-phases %standard-phases
2549 (delete 'configure)
2550 (add-before 'build 'enter-source (lambda _ (chdir "source") #t))
2551 (replace 'check
2552 (lambda _ (invoke "../bin/ExpressBetaDiversity" "-u") #t))
2553 (replace 'install
2554 (lambda* (#:key outputs #:allow-other-keys)
2555 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
2556 (install-file "../scripts/convertToEBD.py" bin)
2557 (install-file "../bin/ExpressBetaDiversity" bin)
2558 #t))))))
2559 (inputs
2560 `(("python" ,python-2)))
2561 (home-page "http://kiwi.cs.dal.ca/Software/ExpressBetaDiversity")
2562 (synopsis "Taxon- and phylogenetic-based beta diversity measures")
2563 (description
2564 "Express Beta Diversity (EBD) calculates ecological beta diversity
2565 (dissimilarity) measures between biological communities. EBD implements a
2566 variety of diversity measures including those that make use of phylogenetic
2567 similarity of community members.")
2568 (license license:gpl3+)))
2569
2570 (define-public fasttree
2571 (package
2572 (name "fasttree")
2573 (version "2.1.10")
2574 (source (origin
2575 (method url-fetch)
2576 (uri (string-append
2577 "http://www.microbesonline.org/fasttree/FastTree-"
2578 version ".c"))
2579 (sha256
2580 (base32
2581 "0vcjdvy1j4m702vmak4svbfkrpcw63k7wymfksjp9a982zy8kjsl"))))
2582 (build-system gnu-build-system)
2583 (arguments
2584 `(#:tests? #f ; no "check" target
2585 #:phases
2586 (modify-phases %standard-phases
2587 (delete 'unpack)
2588 (delete 'configure)
2589 (replace 'build
2590 (lambda* (#:key source #:allow-other-keys)
2591 (invoke "gcc"
2592 "-O3"
2593 "-finline-functions"
2594 "-funroll-loops"
2595 "-Wall"
2596 "-o"
2597 "FastTree"
2598 source
2599 "-lm")
2600 (invoke "gcc"
2601 "-DOPENMP"
2602 "-fopenmp"
2603 "-O3"
2604 "-finline-functions"
2605 "-funroll-loops"
2606 "-Wall"
2607 "-o"
2608 "FastTreeMP"
2609 source
2610 "-lm")
2611 #t))
2612 (replace 'install
2613 (lambda* (#:key outputs #:allow-other-keys)
2614 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
2615 (install-file "FastTree" bin)
2616 (install-file "FastTreeMP" bin)
2617 #t))))))
2618 (home-page "http://www.microbesonline.org/fasttree")
2619 (synopsis "Infers approximately-maximum-likelihood phylogenetic trees")
2620 (description
2621 "FastTree can handle alignments with up to a million of sequences in a
2622 reasonable amount of time and memory. For large alignments, FastTree is
2623 100-1,000 times faster than PhyML 3.0 or RAxML 7.")
2624 (license license:gpl2+)))
2625
2626 (define-public fastx-toolkit
2627 (package
2628 (name "fastx-toolkit")
2629 (version "0.0.14")
2630 (source (origin
2631 (method url-fetch)
2632 (uri
2633 (string-append
2634 "https://github.com/agordon/fastx_toolkit/releases/download/"
2635 version "/fastx_toolkit-" version ".tar.bz2"))
2636 (sha256
2637 (base32
2638 "01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy"))))
2639 (build-system gnu-build-system)
2640 (inputs
2641 `(("libgtextutils" ,libgtextutils)))
2642 (native-inputs
2643 `(("pkg-config" ,pkg-config)))
2644 (home-page "http://hannonlab.cshl.edu/fastx_toolkit/")
2645 (synopsis "Tools for FASTA/FASTQ file preprocessing")
2646 (description
2647 "The FASTX-Toolkit is a collection of command line tools for Short-Reads
2648 FASTA/FASTQ files preprocessing.
2649
2650 Next-Generation sequencing machines usually produce FASTA or FASTQ files,
2651 containing multiple short-reads sequences. The main processing of such
2652 FASTA/FASTQ files is mapping the sequences to reference genomes. However, it
2653 is sometimes more productive to preprocess the files before mapping the
2654 sequences to the genome---manipulating the sequences to produce better mapping
2655 results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
2656 (license license:agpl3+)))
2657
2658 (define-public flexbar
2659 (package
2660 (name "flexbar")
2661 (version "3.4.0")
2662 (source (origin
2663 (method git-fetch)
2664 (uri (git-reference
2665 (url "https://github.com/seqan/flexbar.git")
2666 (commit (string-append "v" version))))
2667 (file-name (git-file-name name version))
2668 (sha256
2669 (base32
2670 "1pq9sxvdnldl14libk234m72dqhwgzs3acgl943wchwdqlcsi5r2"))))
2671 (build-system cmake-build-system)
2672 (arguments
2673 `(#:phases
2674 (modify-phases %standard-phases
2675 (replace 'check
2676 (lambda* (#:key outputs #:allow-other-keys)
2677 (setenv "PATH" (string-append (getcwd) ":" (getenv "PATH")))
2678 (with-directory-excursion "../source/test"
2679 (invoke "bash" "flexbar_test.sh"))
2680 #t))
2681 (replace 'install
2682 (lambda* (#:key outputs #:allow-other-keys)
2683 (let* ((out (string-append (assoc-ref outputs "out")))
2684 (bin (string-append out "/bin/")))
2685 (install-file "flexbar" bin))
2686 #t)))))
2687 (inputs
2688 `(("tbb" ,tbb)
2689 ("zlib" ,zlib)))
2690 (native-inputs
2691 `(("pkg-config" ,pkg-config)
2692 ("seqan" ,seqan)))
2693 (home-page "https://github.com/seqan/flexbar")
2694 (synopsis "Barcode and adapter removal tool for sequencing platforms")
2695 (description
2696 "Flexbar preprocesses high-throughput nucleotide sequencing data
2697 efficiently. It demultiplexes barcoded runs and removes adapter sequences.
2698 Moreover, trimming and filtering features are provided. Flexbar increases
2699 read mapping rates and improves genome and transcriptome assemblies. It
2700 supports next-generation sequencing data in fasta/q and csfasta/q format from
2701 Illumina, Roche 454, and the SOLiD platform.")
2702 (license license:bsd-3)))
2703
2704 (define-public fraggenescan
2705 (package
2706 (name "fraggenescan")
2707 (version "1.30")
2708 (source
2709 (origin
2710 (method url-fetch)
2711 (uri
2712 (string-append "mirror://sourceforge/fraggenescan/"
2713 "FragGeneScan" version ".tar.gz"))
2714 (sha256
2715 (base32 "158dcnwczgcyhwm4qlx19sanrwgdpzf6bn2y57mbpx55lkgz1mzj"))))
2716 (build-system gnu-build-system)
2717 (arguments
2718 `(#:phases
2719 (modify-phases %standard-phases
2720 (delete 'configure)
2721 (add-before 'build 'patch-paths
2722 (lambda* (#:key outputs #:allow-other-keys)
2723 (let* ((out (string-append (assoc-ref outputs "out")))
2724 (share (string-append out "/share/fraggenescan/")))
2725 (substitute* "run_FragGeneScan.pl"
2726 (("system\\(\"rm")
2727 (string-append "system(\"" (which "rm")))
2728 (("system\\(\"mv")
2729 (string-append "system(\"" (which "mv")))
2730 (("\\\"awk") (string-append "\"" (which "awk")))
2731 ;; This script and other programs expect the training files
2732 ;; to be in the non-standard location bin/train/XXX. Change
2733 ;; this to be share/fraggenescan/train/XXX instead.
2734 (("^\\$train.file = \\$dir.*")
2735 (string-append "$train_file = \""
2736 share
2737 "train/\".$FGS_train_file;")))
2738 (substitute* "run_hmm.c"
2739 (("^ strcat\\(train_dir, \\\"train/\\\"\\);")
2740 (string-append " strcpy(train_dir, \"" share "/train/\");"))))
2741 #t))
2742 (replace 'build
2743 (lambda _
2744 (invoke "make" "clean")
2745 (invoke "make" "fgs")
2746 #t))
2747 (replace 'install
2748 (lambda* (#:key outputs #:allow-other-keys)
2749 (let* ((out (string-append (assoc-ref outputs "out")))
2750 (bin (string-append out "/bin/"))
2751 (share (string-append out "/share/fraggenescan/train")))
2752 (install-file "run_FragGeneScan.pl" bin)
2753 (install-file "FragGeneScan" bin)
2754 (copy-recursively "train" share))
2755 #t))
2756 (delete 'check)
2757 (add-after 'install 'post-install-check
2758 ;; In lieu of 'make check', run one of the examples and check the
2759 ;; output files gets created.
2760 (lambda* (#:key outputs #:allow-other-keys)
2761 (let* ((out (string-append (assoc-ref outputs "out")))
2762 (bin (string-append out "/bin/"))
2763 (frag (string-append bin "run_FragGeneScan.pl")))
2764 ;; Test complete genome.
2765 (invoke frag
2766 "-genome=./example/NC_000913.fna"
2767 "-out=./test2"
2768 "-complete=1"
2769 "-train=complete")
2770 (unless (and (file-exists? "test2.faa")
2771 (file-exists? "test2.ffn")
2772 (file-exists? "test2.gff")
2773 (file-exists? "test2.out"))
2774 (error "Expected files do not exist."))
2775 ;; Test incomplete sequences.
2776 (invoke frag
2777 "-genome=./example/NC_000913-fgs.ffn"
2778 "-out=out"
2779 "-complete=0"
2780 "-train=454_30")
2781 #t))))))
2782 (inputs
2783 `(("perl" ,perl)
2784 ("python" ,python-2))) ;not compatible with python 3.
2785 (home-page "https://sourceforge.net/projects/fraggenescan/")
2786 (synopsis "Finds potentially fragmented genes in short reads")
2787 (description
2788 "FragGeneScan is a program for predicting bacterial and archaeal genes in
2789 short and error-prone DNA sequencing reads. It can also be applied to predict
2790 genes in incomplete assemblies or complete genomes.")
2791 ;; GPL3+ according to private correspondense with the authors.
2792 (license license:gpl3+)))
2793
2794 (define-public fxtract
2795 (let ((util-commit "776ca85a18a47492af3794745efcb4a905113115"))
2796 (package
2797 (name "fxtract")
2798 (version "2.3")
2799 (source
2800 (origin
2801 (method git-fetch)
2802 (uri (git-reference
2803 (url "https://github.com/ctSkennerton/fxtract.git")
2804 (commit version)))
2805 (file-name (git-file-name name version))
2806 (sha256
2807 (base32
2808 "0hab3gpwf4w9s87qlbswq6ws1qqybh4dcqk79q1ahyldzai5fgp5"))))
2809 (build-system gnu-build-system)
2810 (arguments
2811 `(#:make-flags (list
2812 (string-append "PREFIX=" (assoc-ref %outputs "out"))
2813 "CC=gcc")
2814 #:test-target "fxtract_test"
2815 #:phases
2816 (modify-phases %standard-phases
2817 (delete 'configure)
2818 (add-before 'build 'copy-util
2819 (lambda* (#:key inputs #:allow-other-keys)
2820 (rmdir "util")
2821 (copy-recursively (assoc-ref inputs "ctskennerton-util") "util")
2822 #t))
2823 ;; Do not use make install as this requires additional dependencies.
2824 (replace 'install
2825 (lambda* (#:key outputs #:allow-other-keys)
2826 (let* ((out (assoc-ref outputs "out"))
2827 (bin (string-append out"/bin")))
2828 (install-file "fxtract" bin)
2829 #t))))))
2830 (inputs
2831 `(("pcre" ,pcre)
2832 ("zlib" ,zlib)))
2833 (native-inputs
2834 ;; ctskennerton-util is licensed under GPL2.
2835 `(("ctskennerton-util"
2836 ,(origin
2837 (method git-fetch)
2838 (uri (git-reference
2839 (url "https://github.com/ctSkennerton/util.git")
2840 (commit util-commit)))
2841 (file-name (string-append
2842 "ctstennerton-util-" util-commit "-checkout"))
2843 (sha256
2844 (base32
2845 "0cls1hd4vgj3f36fpzzg4xc77d6f3hpc60cbpfmn2gdr7ykzzad7"))))))
2846 (home-page "https://github.com/ctSkennerton/fxtract")
2847 (synopsis "Extract sequences from FASTA and FASTQ files")
2848 (description
2849 "Fxtract extracts sequences from a protein or nucleotide fastx (FASTA
2850 or FASTQ) file given a subsequence. It uses a simple substring search for
2851 basic tasks but can change to using POSIX regular expressions, PCRE, hash
2852 lookups or multi-pattern searching as required. By default fxtract looks in
2853 the sequence of each record but can also be told to look in the header,
2854 comment or quality sections.")
2855 ;; 'util' requires SSE instructions.
2856 (supported-systems '("x86_64-linux"))
2857 (license license:expat))))
2858
2859 (define-public gemma
2860 (package
2861 (name "gemma")
2862 (version "0.98")
2863 (source (origin
2864 (method git-fetch)
2865 (uri (git-reference
2866 (url "https://github.com/xiangzhou/GEMMA.git")
2867 (commit (string-append "v" version))))
2868 (file-name (git-file-name name version))
2869 (sha256
2870 (base32
2871 "1s3ncnbn45r2hh1cvrqky1kbqq6546biypr4f5mkw1kqlrgyh0yg"))))
2872 (inputs
2873 `(("eigen" ,eigen)
2874 ("gfortran" ,gfortran "lib")
2875 ("gsl" ,gsl)
2876 ("lapack" ,lapack)
2877 ("openblas" ,openblas)
2878 ("zlib" ,zlib)))
2879 (build-system gnu-build-system)
2880 (arguments
2881 `(#:make-flags
2882 '(,@(match (%current-system)
2883 ("x86_64-linux"
2884 '("FORCE_DYNAMIC=1"))
2885 ("i686-linux"
2886 '("FORCE_DYNAMIC=1" "FORCE_32BIT=1"))
2887 (_
2888 '("FORCE_DYNAMIC=1" "NO_INTEL_COMPAT=1"))))
2889 #:phases
2890 (modify-phases %standard-phases
2891 (delete 'configure)
2892 (add-after 'unpack 'find-eigen
2893 (lambda* (#:key inputs #:allow-other-keys)
2894 ;; Ensure that Eigen headers can be found
2895 (setenv "CPLUS_INCLUDE_PATH"
2896 (string-append (getenv "CPLUS_INCLUDE_PATH")
2897 ":"
2898 (assoc-ref inputs "eigen")
2899 "/include/eigen3"))
2900 #t))
2901 (add-before 'build 'bin-mkdir
2902 (lambda _
2903 (mkdir-p "bin")
2904 #t))
2905 (replace 'install
2906 (lambda* (#:key outputs #:allow-other-keys)
2907 (let ((out (assoc-ref outputs "out")))
2908 (install-file "bin/gemma"
2909 (string-append
2910 out "/bin")))
2911 #t)))
2912 #:tests? #f)) ; no tests included yet
2913 (home-page "https://github.com/xiangzhou/GEMMA")
2914 (synopsis "Tool for genome-wide efficient mixed model association")
2915 (description
2916 "Genome-wide Efficient Mixed Model Association (GEMMA) provides a
2917 standard linear mixed model resolver with application in genome-wide
2918 association studies (GWAS).")
2919 (license license:gpl3)))
2920
2921 (define-public grit
2922 (package
2923 (name "grit")
2924 (version "2.0.5")
2925 (source (origin
2926 (method git-fetch)
2927 (uri (git-reference
2928 (url "https://github.com/nboley/grit.git")
2929 (commit version)))
2930 (file-name (git-file-name name version))
2931 (sha256
2932 (base32
2933 "1l5v8vfvfbrpmgnrvbrbv40d0arhxcnmxgv2f1mlcqfa3q6bkqm9"))))
2934 (build-system python-build-system)
2935 (arguments
2936 `(#:python ,python-2
2937 #:phases
2938 (modify-phases %standard-phases
2939 (add-after 'unpack 'generate-from-cython-sources
2940 (lambda* (#:key inputs outputs #:allow-other-keys)
2941 ;; Delete these C files to force fresh generation from pyx sources.
2942 (delete-file "grit/sparsify_support_fns.c")
2943 (delete-file "grit/call_peaks_support_fns.c")
2944 (substitute* "setup.py"
2945 (("Cython.Setup") "Cython.Build"))
2946 #t)))))
2947 (inputs
2948 `(("python-scipy" ,python2-scipy)
2949 ("python-numpy" ,python2-numpy)
2950 ("python-pysam" ,python2-pysam)
2951 ("python-networkx" ,python2-networkx)))
2952 (native-inputs
2953 `(("python-cython" ,python2-cython)))
2954 (home-page "http://grit-bio.org")
2955 (synopsis "Tool for integrative analysis of RNA-seq type assays")
2956 (description
2957 "GRIT is designed to use RNA-seq, TES, and TSS data to build and quantify
2958 full length transcript models. When none of these data sources are available,
2959 GRIT can be run by providing a candidate set of TES or TSS sites. In
2960 addition, GRIT can merge in reference junctions and gene boundaries. GRIT can
2961 also be run in quantification mode, where it uses a provided GTF file and just
2962 estimates transcript expression.")
2963 (license license:gpl3+)))
2964
2965 (define-public hisat
2966 (package
2967 (name "hisat")
2968 (version "0.1.4")
2969 (source (origin
2970 (method url-fetch)
2971 (uri (string-append
2972 "http://ccb.jhu.edu/software/hisat/downloads/hisat-"
2973 version "-beta-source.zip"))
2974 (sha256
2975 (base32
2976 "1k381ydranqxp09yf2y7w1d0chz5d59vb6jchi89hbb0prq19lk5"))))
2977 (build-system gnu-build-system)
2978 (arguments
2979 `(#:tests? #f ;no check target
2980 #:make-flags '("allall"
2981 ;; Disable unsupported `popcnt' instructions on
2982 ;; architectures other than x86_64
2983 ,@(if (string-prefix? "x86_64"
2984 (or (%current-target-system)
2985 (%current-system)))
2986 '()
2987 '("POPCNT_CAPABILITY=0")))
2988 #:phases
2989 (modify-phases %standard-phases
2990 (add-after 'unpack 'patch-sources
2991 (lambda _
2992 ;; XXX Cannot use snippet because zip files are not supported
2993 (substitute* "Makefile"
2994 (("^CC = .*$") "CC = gcc")
2995 (("^CPP = .*$") "CPP = g++")
2996 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
2997 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
2998 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
2999 (substitute* '("hisat-build" "hisat-inspect")
3000 (("/usr/bin/env") (which "env")))
3001 #t))
3002 (replace 'install
3003 (lambda* (#:key outputs #:allow-other-keys)
3004 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
3005 (for-each (lambda (file)
3006 (install-file file bin))
3007 (find-files
3008 "."
3009 "hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$")))
3010 #t))
3011 (delete 'configure))))
3012 (native-inputs
3013 `(("unzip" ,unzip)))
3014 (inputs
3015 `(("perl" ,perl)
3016 ("python" ,python)
3017 ("zlib" ,zlib)))
3018 ;; Non-portable SSE instructions are used so building fails on platforms
3019 ;; other than x86_64.
3020 (supported-systems '("x86_64-linux"))
3021 (home-page "http://ccb.jhu.edu/software/hisat/index.shtml")
3022 (synopsis "Hierarchical indexing for spliced alignment of transcripts")
3023 (description
3024 "HISAT is a fast and sensitive spliced alignment program for mapping
3025 RNA-seq reads. In addition to one global FM index that represents a whole
3026 genome, HISAT uses a large set of small FM indexes that collectively cover the
3027 whole genome. These small indexes (called local indexes) combined with
3028 several alignment strategies enable effective alignment of RNA-seq reads, in
3029 particular, reads spanning multiple exons.")
3030 (license license:gpl3+)))
3031
3032 (define-public hisat2
3033 (package
3034 (name "hisat2")
3035 (version "2.0.5")
3036 (source
3037 (origin
3038 (method url-fetch)
3039 (uri (string-append "ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2"
3040 "/downloads/hisat2-" version "-source.zip"))
3041 (sha256
3042 (base32
3043 "0lywnr8kijwsc2aw10dwxic0n0yvip6fl3rjlvc8zzwahamy4x7g"))))
3044 (build-system gnu-build-system)
3045 (arguments
3046 `(#:tests? #f ; no check target
3047 #:make-flags (list "CC=gcc" "CXX=g++" "allall")
3048 #:modules ((guix build gnu-build-system)
3049 (guix build utils)
3050 (srfi srfi-26))
3051 #:phases
3052 (modify-phases %standard-phases
3053 (add-after 'unpack 'make-deterministic
3054 (lambda _
3055 (substitute* "Makefile"
3056 (("`date`") "0"))
3057 #t))
3058 (delete 'configure)
3059 (replace 'install
3060 (lambda* (#:key outputs #:allow-other-keys)
3061 (let* ((out (assoc-ref outputs "out"))
3062 (bin (string-append out "/bin/"))
3063 (doc (string-append out "/share/doc/hisat2/")))
3064 (for-each
3065 (cut install-file <> bin)
3066 (find-files "."
3067 "hisat2(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))
3068 (mkdir-p doc)
3069 (install-file "doc/manual.inc.html" doc))
3070 #t)))))
3071 (native-inputs
3072 `(("unzip" ,unzip) ; needed for archive from ftp
3073 ("perl" ,perl)
3074 ("pandoc" ,ghc-pandoc))) ; for documentation
3075 (home-page "http://ccb.jhu.edu/software/hisat2/index.shtml")
3076 (synopsis "Graph-based alignment of genomic sequencing reads")
3077 (description "HISAT2 is a fast and sensitive alignment program for mapping
3078 next-generation sequencing reads (both DNA and RNA) to a population of human
3079 genomes (as well as to a single reference genome). In addition to using one
3080 global @dfn{graph FM} (GFM) index that represents a population of human
3081 genomes, HISAT2 uses a large set of small GFM indexes that collectively cover
3082 the whole genome. These small indexes, combined with several alignment
3083 strategies, enable rapid and accurate alignment of sequencing reads. This new
3084 indexing scheme is called a @dfn{Hierarchical Graph FM index} (HGFM).")
3085 ;; HISAT2 contains files from Bowtie2, which is released under
3086 ;; GPLv2 or later. The HISAT2 source files are released under
3087 ;; GPLv3 or later.
3088 (license license:gpl3+)))
3089
3090 (define-public hmmer
3091 (package
3092 (name "hmmer")
3093 (version "3.2.1")
3094 (source
3095 (origin
3096 (method url-fetch)
3097 (uri (string-append
3098 "http://eddylab.org/software/hmmer/hmmer-" version ".tar.gz"))
3099 (sha256
3100 (base32
3101 "171bivy6xhgjsz5nv53n81pc3frnwz29ylblawk2bv46szwjjqd5"))))
3102 (build-system gnu-build-system)
3103 (native-inputs `(("perl" ,perl)))
3104 (home-page "http://hmmer.org/")
3105 (synopsis "Biosequence analysis using profile hidden Markov models")
3106 (description
3107 "HMMER is used for searching sequence databases for homologs of protein
3108 sequences, and for making protein sequence alignments. It implements methods
3109 using probabilistic models called profile hidden Markov models (profile
3110 HMMs).")
3111 ;; hmmer uses non-portable SSE intrinsics so building fails on other
3112 ;; platforms.
3113 (supported-systems '("x86_64-linux" "i686-linux"))
3114 (license license:bsd-3)))
3115
3116 (define-public htseq
3117 (package
3118 (name "htseq")
3119 (version "0.9.1")
3120 (source (origin
3121 (method url-fetch)
3122 (uri (pypi-uri "HTSeq" version))
3123 (sha256
3124 (base32
3125 "11flgb1381xdhk43bzbfm3vhnszkpqg6jk76rpa5xd1zbrvvlnxg"))))
3126 (build-system python-build-system)
3127 (native-inputs
3128 `(("python-cython" ,python-cython)))
3129 ;; Numpy needs to be propagated when htseq is used as a Python library.
3130 (propagated-inputs
3131 `(("python-numpy" ,python-numpy)))
3132 (inputs
3133 `(("python-pysam" ,python-pysam)
3134 ("python-matplotlib" ,python-matplotlib)))
3135 (home-page "http://www-huber.embl.de/users/anders/HTSeq/")
3136 (synopsis "Analysing high-throughput sequencing data with Python")
3137 (description
3138 "HTSeq is a Python package that provides infrastructure to process data
3139 from high-throughput sequencing assays.")
3140 (license license:gpl3+)))
3141
3142 (define-public python2-htseq
3143 (package-with-python2 htseq))
3144
3145 (define-public java-htsjdk
3146 (package
3147 (name "java-htsjdk")
3148 (version "2.3.0") ; last version without build dependency on gradle
3149 (source (origin
3150 (method git-fetch)
3151 (uri (git-reference
3152 (url "https://github.com/samtools/htsjdk.git")
3153 (commit version)))
3154 (file-name (git-file-name name version))
3155 (sha256
3156 (base32
3157 "1b178ixcabanm834ydjl3jiakpyxdmki32hqfv2abrzn3rcwa28i"))
3158 (modules '((guix build utils)))
3159 (snippet
3160 ;; Delete pre-built binaries
3161 '(begin
3162 (delete-file-recursively "lib")
3163 (mkdir-p "lib")
3164 #t))))
3165 (build-system ant-build-system)
3166 (arguments
3167 `(#:tests? #f ; test require Internet access
3168 #:jdk ,icedtea-8
3169 #:make-flags
3170 (list (string-append "-Ddist=" (assoc-ref %outputs "out")
3171 "/share/java/htsjdk/"))
3172 #:build-target "all"
3173 #:phases
3174 (modify-phases %standard-phases
3175 ;; The build phase also installs the jars
3176 (delete 'install))))
3177 (inputs
3178 `(("java-ngs" ,java-ngs)
3179 ("java-snappy-1" ,java-snappy-1)
3180 ("java-commons-compress" ,java-commons-compress)
3181 ("java-commons-logging-minimal" ,java-commons-logging-minimal)
3182 ("java-commons-jexl-2" ,java-commons-jexl-2)
3183 ("java-xz" ,java-xz)))
3184 (native-inputs
3185 `(("java-testng" ,java-testng)))
3186 (home-page "http://samtools.github.io/htsjdk/")
3187 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
3188 (description
3189 "HTSJDK is an implementation of a unified Java library for accessing
3190 common file formats, such as SAM and VCF, used for high-throughput
3191 sequencing (HTS) data. There are also an number of useful utilities for
3192 manipulating HTS data.")
3193 (license license:expat)))
3194
3195 (define-public java-htsjdk-latest
3196 (package
3197 (name "java-htsjdk")
3198 (version "2.14.3")
3199 (source (origin
3200 (method git-fetch)
3201 (uri (git-reference
3202 (url "https://github.com/samtools/htsjdk.git")
3203 (commit version)))
3204 (file-name (string-append name "-" version "-checkout"))
3205 (sha256
3206 (base32
3207 "1lmya1fdjy03mz6zmdmd86j9v9vfhqb3952mqq075navx1i6g4bc"))))
3208 (build-system ant-build-system)
3209 (arguments
3210 `(#:tests? #f ; test require Scala
3211 #:jdk ,icedtea-8
3212 #:jar-name "htsjdk.jar"
3213 #:phases
3214 (modify-phases %standard-phases
3215 (add-after 'unpack 'remove-useless-build.xml
3216 (lambda _ (delete-file "build.xml") #t))
3217 ;; The tests require the scalatest package.
3218 (add-after 'unpack 'remove-tests
3219 (lambda _ (delete-file-recursively "src/test") #t)))))
3220 (inputs
3221 `(("java-ngs" ,java-ngs)
3222 ("java-snappy-1" ,java-snappy-1)
3223 ("java-commons-compress" ,java-commons-compress)
3224 ("java-commons-logging-minimal" ,java-commons-logging-minimal)
3225 ("java-commons-jexl-2" ,java-commons-jexl-2)
3226 ("java-xz" ,java-xz)))
3227 (native-inputs
3228 `(("java-junit" ,java-junit)))
3229 (home-page "http://samtools.github.io/htsjdk/")
3230 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
3231 (description
3232 "HTSJDK is an implementation of a unified Java library for accessing
3233 common file formats, such as SAM and VCF, used for high-throughput
3234 sequencing (HTS) data. There are also an number of useful utilities for
3235 manipulating HTS data.")
3236 (license license:expat)))
3237
3238 ;; This is needed for picard 2.10.3
3239 (define-public java-htsjdk-2.10.1
3240 (package (inherit java-htsjdk-latest)
3241 (name "java-htsjdk")
3242 (version "2.10.1")
3243 (source (origin
3244 (method git-fetch)
3245 (uri (git-reference
3246 (url "https://github.com/samtools/htsjdk.git")
3247 (commit version)))
3248 (file-name (string-append name "-" version "-checkout"))
3249 (sha256
3250 (base32
3251 "1kxh7slm2pm3x9p6jxa1wqsq9a31dhiiflhxnxqcisan4k3rwia2"))))
3252 (build-system ant-build-system)
3253 (arguments
3254 `(#:tests? #f ; tests require Scala
3255 #:jdk ,icedtea-8
3256 #:jar-name "htsjdk.jar"
3257 #:phases
3258 (modify-phases %standard-phases
3259 (add-after 'unpack 'remove-useless-build.xml
3260 (lambda _ (delete-file "build.xml") #t))
3261 ;; The tests require the scalatest package.
3262 (add-after 'unpack 'remove-tests
3263 (lambda _ (delete-file-recursively "src/test") #t)))))))
3264
3265 ;; This version matches java-htsjdk 2.3.0. Later versions also require a more
3266 ;; recent version of java-htsjdk, which depends on gradle.
3267 (define-public java-picard
3268 (package
3269 (name "java-picard")
3270 (version "2.3.0")
3271 (source (origin
3272 (method git-fetch)
3273 (uri (git-reference
3274 (url "https://github.com/broadinstitute/picard.git")
3275 (commit version)))
3276 (file-name (string-append "java-picard-" version "-checkout"))
3277 (sha256
3278 (base32
3279 "1ll7mf4r3by92w2nhlmpa591xd1f46xlkwh59mq6fvbb5pdwzvx6"))
3280 (modules '((guix build utils)))
3281 (snippet
3282 '(begin
3283 ;; Delete pre-built binaries.
3284 (delete-file-recursively "lib")
3285 (mkdir-p "lib")
3286 (substitute* "build.xml"
3287 ;; Remove build-time dependency on git.
3288 (("failifexecutionfails=\"true\"")
3289 "failifexecutionfails=\"false\"")
3290 ;; Use our htsjdk.
3291 (("depends=\"compile-htsjdk, ")
3292 "depends=\"")
3293 (("depends=\"compile-htsjdk-tests, ")
3294 "depends=\"")
3295 ;; Build picard-lib.jar before building picard.jar
3296 (("name=\"picard-jar\" depends=\"" line)
3297 (string-append line "picard-lib-jar, ")))
3298 #t))))
3299 (build-system ant-build-system)
3300 (arguments
3301 `(#:build-target "picard-jar"
3302 #:test-target "test"
3303 ;; Tests require jacoco:coverage.
3304 #:tests? #f
3305 #:make-flags
3306 (list (string-append "-Dhtsjdk_lib_dir="
3307 (assoc-ref %build-inputs "java-htsjdk")
3308 "/share/java/htsjdk/")
3309 "-Dhtsjdk-classes=dist/tmp"
3310 (string-append "-Dhtsjdk-version="
3311 ,(package-version java-htsjdk)))
3312 #:jdk ,icedtea-8
3313 #:phases
3314 (modify-phases %standard-phases
3315 ;; FIXME: this phase fails with "duplicate entry: htsjdk/samtools/AbstractBAMFileIndex$1.class"
3316 (delete 'generate-jar-indices)
3317 (add-after 'unpack 'use-our-htsjdk
3318 (lambda* (#:key inputs #:allow-other-keys)
3319 (substitute* "build.xml"
3320 (("\\$\\{htsjdk\\}/lib")
3321 (string-append (assoc-ref inputs "java-htsjdk")
3322 "/share/java/htsjdk/")))
3323 #t))
3324 (add-after 'unpack 'make-test-target-independent
3325 (lambda* (#:key inputs #:allow-other-keys)
3326 (substitute* "build.xml"
3327 (("name=\"test\" depends=\"compile, ")
3328 "name=\"test\" depends=\""))
3329 #t))
3330 (replace 'install (install-jars "dist")))))
3331 (inputs
3332 `(("java-htsjdk" ,java-htsjdk)
3333 ("java-guava" ,java-guava)))
3334 (native-inputs
3335 `(("java-testng" ,java-testng)))
3336 (home-page "http://broadinstitute.github.io/picard/")
3337 (synopsis "Tools for manipulating high-throughput sequencing data and formats")
3338 (description "Picard is a set of Java command line tools for manipulating
3339 high-throughput sequencing (HTS) data and formats. Picard is implemented
3340 using the HTSJDK Java library to support accessing file formats that are
3341 commonly used for high-throughput sequencing data such as SAM, BAM, CRAM and
3342 VCF.")
3343 (license license:expat)))
3344
3345 ;; This is needed for dropseq-tools
3346 (define-public java-picard-2.10.3
3347 (package
3348 (name "java-picard")
3349 (version "2.10.3")
3350 (source (origin
3351 (method git-fetch)
3352 (uri (git-reference
3353 (url "https://github.com/broadinstitute/picard.git")
3354 (commit version)))
3355 (file-name (string-append "java-picard-" version "-checkout"))
3356 (sha256
3357 (base32
3358 "1ajlx31l6i1k3y2rhnmgq07sz99g2czqfqgkr9mihmdjp3gwjhvi"))))
3359 (build-system ant-build-system)
3360 (arguments
3361 `(#:jar-name "picard.jar"
3362 ;; Tests require jacoco:coverage.
3363 #:tests? #f
3364 #:jdk ,icedtea-8
3365 #:main-class "picard.cmdline.PicardCommandLine"
3366 #:modules ((guix build ant-build-system)
3367 (guix build utils)
3368 (guix build java-utils)
3369 (sxml simple)
3370 (sxml transform)
3371 (sxml xpath))
3372 #:phases
3373 (modify-phases %standard-phases
3374 ;; FIXME: this phase fails with "duplicate entry: htsjdk/samtools/AbstractBAMFileIndex$1.class"
3375 (delete 'generate-jar-indices)
3376 (add-after 'unpack 'remove-useless-build.xml
3377 (lambda _ (delete-file "build.xml") #t))
3378 ;; This is necessary to ensure that htsjdk is found when using
3379 ;; picard.jar as an executable.
3380 (add-before 'build 'edit-classpath-in-manifest
3381 (lambda* (#:key inputs #:allow-other-keys)
3382 (chmod "build.xml" #o664)
3383 (call-with-output-file "build.xml.new"
3384 (lambda (port)
3385 (sxml->xml
3386 (pre-post-order
3387 (with-input-from-file "build.xml"
3388 (lambda _ (xml->sxml #:trim-whitespace? #t)))
3389 `((target . ,(lambda (tag . kids)
3390 (let ((name ((sxpath '(name *text*))
3391 (car kids)))
3392 ;; FIXME: We're breaking the line
3393 ;; early with a dummy path to
3394 ;; ensure that the store reference
3395 ;; isn't broken apart and can still
3396 ;; be found by the reference
3397 ;; scanner.
3398 (msg (format #f
3399 "\
3400 Class-Path: /~a \
3401 ~a/share/java/htsjdk.jar${line.separator}${line.separator}"
3402 ;; maximum line length is 70
3403 (string-tabulate (const #\b) 57)
3404 (assoc-ref inputs "java-htsjdk"))))
3405 (if (member "manifest" name)
3406 `(,tag ,@kids
3407 (replaceregexp
3408 (@ (file "${manifest.file}")
3409 (match "\\r\\n\\r\\n")
3410 (replace "${line.separator}")))
3411 (echo
3412 (@ (message ,msg)
3413 (file "${manifest.file}")
3414 (append "true"))))
3415 `(,tag ,@kids)))))
3416 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
3417 (*text* . ,(lambda (_ txt) txt))))
3418 port)))
3419 (rename-file "build.xml.new" "build.xml")
3420 #t)))))
3421 (propagated-inputs
3422 `(("java-htsjdk" ,java-htsjdk-2.10.1)))
3423 (native-inputs
3424 `(("java-testng" ,java-testng)
3425 ("java-guava" ,java-guava)))
3426 (home-page "http://broadinstitute.github.io/picard/")
3427 (synopsis "Tools for manipulating high-throughput sequencing data and formats")
3428 (description "Picard is a set of Java command line tools for manipulating
3429 high-throughput sequencing (HTS) data and formats. Picard is implemented
3430 using the HTSJDK Java library to support accessing file formats that are
3431 commonly used for high-throughput sequencing data such as SAM, BAM, CRAM and
3432 VCF.")
3433 (license license:expat)))
3434
3435 ;; This is the last version of Picard to provide net.sf.samtools
3436 (define-public java-picard-1.113
3437 (package (inherit java-picard)
3438 (name "java-picard")
3439 (version "1.113")
3440 (source (origin
3441 (method git-fetch)
3442 (uri (git-reference
3443 (url "https://github.com/broadinstitute/picard.git")
3444 (commit version)))
3445 (file-name (string-append "java-picard-" version "-checkout"))
3446 (sha256
3447 (base32
3448 "0lkpvin2fz3hhly4l02kk56fqy8lmlgyzr9kmvljk6ry6l1hw973"))
3449 (modules '((guix build utils)))
3450 (snippet
3451 '(begin
3452 ;; Delete pre-built binaries.
3453 (delete-file-recursively "lib")
3454 (mkdir-p "lib")
3455 #t))))
3456 (build-system ant-build-system)
3457 (arguments
3458 `(#:build-target "picard-jar"
3459 #:test-target "test"
3460 ;; FIXME: the class path at test time is wrong.
3461 ;; [testng] Error: A JNI error has occurred, please check your installation and try again
3462 ;; [testng] Exception in thread "main" java.lang.NoClassDefFoundError: com/beust/jcommander/ParameterException
3463 #:tests? #f
3464 #:jdk ,icedtea-8
3465 ;; This is only used for tests.
3466 #:make-flags
3467 (list "-Dsamjdk.intel_deflater_so_path=lib/jni/libIntelDeflater.so")
3468 #:phases
3469 (modify-phases %standard-phases
3470 ;; FIXME: This phase fails.
3471 (delete 'generate-jar-indices)
3472 ;; Do not use bundled ant bzip2.
3473 (add-after 'unpack 'use-ant-bzip
3474 (lambda* (#:key inputs #:allow-other-keys)
3475 (substitute* "build.xml"
3476 (("\\$\\{lib\\}/apache-ant-1.8.2-bzip2.jar")
3477 (string-append (assoc-ref inputs "ant")
3478 "/lib/ant.jar")))
3479 #t))
3480 (add-after 'unpack 'make-test-target-independent
3481 (lambda* (#:key inputs #:allow-other-keys)
3482 (substitute* "build.xml"
3483 (("name=\"test\" depends=\"compile, ")
3484 "name=\"test\" depends=\"compile-tests, ")
3485 (("name=\"compile\" depends=\"compile-src, compile-tests\"")
3486 "name=\"compile\" depends=\"compile-src\""))
3487 #t))
3488 (add-after 'unpack 'fix-deflater-path
3489 (lambda* (#:key outputs #:allow-other-keys)
3490 (substitute* "src/java/net/sf/samtools/Defaults.java"
3491 (("getStringProperty\\(\"intel_deflater_so_path\", null\\)")
3492 (string-append "getStringProperty(\"intel_deflater_so_path\", \""
3493 (assoc-ref outputs "out")
3494 "/lib/jni/libIntelDeflater.so"
3495 "\")")))
3496 #t))
3497 ;; Build the deflater library, because we've previously deleted the
3498 ;; pre-built one. This can only be built with access to the JDK
3499 ;; sources.
3500 (add-after 'build 'build-jni
3501 (lambda* (#:key inputs #:allow-other-keys)
3502 (mkdir-p "lib/jni")
3503 (mkdir-p "jdk-src")
3504 (invoke "tar" "--strip-components=1" "-C" "jdk-src"
3505 "-xf" (assoc-ref inputs "jdk-src"))
3506 (invoke "javah" "-jni"
3507 "-classpath" "classes"
3508 "-d" "lib/"
3509 "net.sf.samtools.util.zip.IntelDeflater")
3510 (with-directory-excursion "src/c/inteldeflater"
3511 (invoke "gcc" "-I../../../lib" "-I."
3512 (string-append "-I" (assoc-ref inputs "jdk")
3513 "/include/linux")
3514 "-I../../../jdk-src/src/share/native/common/"
3515 "-I../../../jdk-src/src/solaris/native/common/"
3516 "-c" "-O3" "-fPIC" "IntelDeflater.c")
3517 (invoke "gcc" "-shared"
3518 "-o" "../../../lib/jni/libIntelDeflater.so"
3519 "IntelDeflater.o" "-lz" "-lstdc++"))
3520 #t))
3521 ;; We can only build everything else after building the JNI library.
3522 (add-after 'build-jni 'build-rest
3523 (lambda* (#:key make-flags #:allow-other-keys)
3524 (apply invoke `("ant" "all" ,@make-flags))
3525 #t))
3526 (add-before 'build 'set-JAVA6_HOME
3527 (lambda _
3528 (setenv "JAVA6_HOME" (getenv "JAVA_HOME"))
3529 #t))
3530 (replace 'install (install-jars "dist"))
3531 (add-after 'install 'install-jni-lib
3532 (lambda* (#:key outputs #:allow-other-keys)
3533 (let ((jni (string-append (assoc-ref outputs "out")
3534 "/lib/jni")))
3535 (mkdir-p jni)
3536 (install-file "lib/jni/libIntelDeflater.so" jni)
3537 #t))))))
3538 (inputs
3539 `(("java-snappy-1" ,java-snappy-1)
3540 ("java-commons-jexl-2" ,java-commons-jexl-2)
3541 ("java-cofoja" ,java-cofoja)
3542 ("ant" ,ant) ; for bzip2 support at runtime
3543 ("zlib" ,zlib)))
3544 (native-inputs
3545 `(("ant-apache-bcel" ,ant-apache-bcel)
3546 ("ant-junit" ,ant-junit)
3547 ("java-testng" ,java-testng)
3548 ("java-commons-bcel" ,java-commons-bcel)
3549 ("java-jcommander" ,java-jcommander)
3550 ("jdk" ,icedtea-8 "jdk")
3551 ("jdk-src" ,(car (assoc-ref (package-native-inputs icedtea-8) "jdk-drop")))))))
3552
3553 (define-public fastqc
3554 (package
3555 (name "fastqc")
3556 (version "0.11.5")
3557 (source
3558 (origin
3559 (method url-fetch)
3560 (uri (string-append "http://www.bioinformatics.babraham.ac.uk/"
3561 "projects/fastqc/fastqc_v"
3562 version "_source.zip"))
3563 (sha256
3564 (base32
3565 "18rrlkhcrxvvvlapch4dpj6xc6mpayzys8qfppybi8jrpgx5cc5f"))))
3566 (build-system ant-build-system)
3567 (arguments
3568 `(#:tests? #f ; there are no tests
3569 #:build-target "build"
3570 #:phases
3571 (modify-phases %standard-phases
3572 (add-after 'unpack 'fix-dependencies
3573 (lambda* (#:key inputs #:allow-other-keys)
3574 (substitute* "build.xml"
3575 (("jbzip2-0.9.jar")
3576 (string-append (assoc-ref inputs "java-jbzip2")
3577 "/share/java/jbzip2.jar"))
3578 (("sam-1.103.jar")
3579 (string-append (assoc-ref inputs "java-picard-1.113")
3580 "/share/java/sam-1.112.jar"))
3581 (("cisd-jhdf5.jar")
3582 (string-append (assoc-ref inputs "java-cisd-jhdf5")
3583 "/share/java/sis-jhdf5.jar")))
3584 #t))
3585 ;; There is no installation target
3586 (replace 'install
3587 (lambda* (#:key inputs outputs #:allow-other-keys)
3588 (let* ((out (assoc-ref outputs "out"))
3589 (bin (string-append out "/bin"))
3590 (share (string-append out "/share/fastqc/"))
3591 (exe (string-append share "/fastqc")))
3592 (for-each mkdir-p (list bin share))
3593 (copy-recursively "bin" share)
3594 (substitute* exe
3595 (("my \\$java_bin = 'java';")
3596 (string-append "my $java_bin = '"
3597 (assoc-ref inputs "java")
3598 "/bin/java';")))
3599 (chmod exe #o555)
3600 (symlink exe (string-append bin "/fastqc"))
3601 #t))))))
3602 (inputs
3603 `(("java" ,icedtea)
3604 ("perl" ,perl) ; needed for the wrapper script
3605 ("java-cisd-jhdf5" ,java-cisd-jhdf5)
3606 ("java-picard-1.113" ,java-picard-1.113)
3607 ("java-jbzip2" ,java-jbzip2)))
3608 (native-inputs
3609 `(("unzip" ,unzip)))
3610 (home-page "http://www.bioinformatics.babraham.ac.uk/projects/fastqc/")
3611 (synopsis "Quality control tool for high throughput sequence data")
3612 (description
3613 "FastQC aims to provide a simple way to do some quality control
3614 checks on raw sequence data coming from high throughput sequencing
3615 pipelines. It provides a modular set of analyses which you can use to
3616 give a quick impression of whether your data has any problems of which
3617 you should be aware before doing any further analysis.
3618
3619 The main functions of FastQC are:
3620
3621 @itemize
3622 @item Import of data from BAM, SAM or FastQ files (any variant);
3623 @item Providing a quick overview to tell you in which areas there may
3624 be problems;
3625 @item Summary graphs and tables to quickly assess your data;
3626 @item Export of results to an HTML based permanent report;
3627 @item Offline operation to allow automated generation of reports
3628 without running the interactive application.
3629 @end itemize\n")
3630 (license license:gpl3+)))
3631
3632 (define-public fastp
3633 (package
3634 (name "fastp")
3635 (version "0.14.1")
3636 (source
3637 (origin
3638 (method git-fetch)
3639 (uri (git-reference
3640 (url "https://github.com/OpenGene/fastp.git")
3641 (commit (string-append "v" version))))
3642 (file-name (git-file-name name version))
3643 (sha256
3644 (base32
3645 "1r6ms5zbf5rps4rgp4z73nczadl00b5rqylw8f684isfz27dp0xh"))))
3646 (build-system gnu-build-system)
3647 (arguments
3648 `(#:tests? #f ; there are none
3649 #:make-flags
3650 (list (string-append "BINDIR=" (assoc-ref %outputs "out") "/bin"))
3651 #:phases
3652 (modify-phases %standard-phases
3653 (delete 'configure)
3654 (add-before 'install 'create-target-dir
3655 (lambda* (#:key outputs #:allow-other-keys)
3656 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
3657 #t)))))
3658 (inputs
3659 `(("zlib" ,zlib)))
3660 (home-page "https://github.com/OpenGene/fastp/")
3661 (synopsis "All-in-one FastQ preprocessor")
3662 (description
3663 "Fastp is a tool designed to provide fast all-in-one preprocessing for
3664 FastQ files. This tool has multi-threading support to afford high
3665 performance.")
3666 (license license:expat)))
3667
3668 (define-public htslib
3669 (package
3670 (name "htslib")
3671 (version "1.9")
3672 (source (origin
3673 (method url-fetch)
3674 (uri (string-append
3675 "https://github.com/samtools/htslib/releases/download/"
3676 version "/htslib-" version ".tar.bz2"))
3677 (sha256
3678 (base32
3679 "16ljv43sc3fxmv63w7b2ff8m1s7h89xhazwmbm1bicz8axq8fjz0"))))
3680 (build-system gnu-build-system)
3681 (inputs
3682 `(("openssl" ,openssl)
3683 ("curl" ,curl)
3684 ("zlib" ,zlib)))
3685 (native-inputs
3686 `(("perl" ,perl)))
3687 (home-page "http://www.htslib.org")
3688 (synopsis "C library for reading/writing high-throughput sequencing data")
3689 (description
3690 "HTSlib is a C library for reading/writing high-throughput sequencing
3691 data. It also provides the @command{bgzip}, @command{htsfile}, and
3692 @command{tabix} utilities.")
3693 ;; Files under cram/ are released under the modified BSD license;
3694 ;; the rest is released under the Expat license
3695 (license (list license:expat license:bsd-3))))
3696
3697 ;; This package should be removed once no packages rely upon it.
3698 (define htslib-1.3
3699 (package
3700 (inherit htslib)
3701 (version "1.3.1")
3702 (source (origin
3703 (method url-fetch)
3704 (uri (string-append
3705 "https://github.com/samtools/htslib/releases/download/"
3706 version "/htslib-" version ".tar.bz2"))
3707 (sha256
3708 (base32
3709 "1rja282fwdc25ql6izkhdyh8ppw8x2fs0w0js78zgkmqjlikmma9"))))))
3710
3711 (define-public idr
3712 (package
3713 (name "idr")
3714 (version "2.0.3")
3715 (source (origin
3716 (method git-fetch)
3717 (uri (git-reference
3718 (url "https://github.com/nboley/idr.git")
3719 (commit version)))
3720 (file-name (git-file-name name version))
3721 (sha256
3722 (base32
3723 "04j876h6z444v2q79drxx283d3k5snd72kj895wbalnl42206x9g"))
3724 ;; Delete generated C code.
3725 (snippet
3726 '(begin (delete-file "idr/inv_cdf.c") #t))))
3727 (build-system python-build-system)
3728 ;; There is only one test ("test_inv_cdf.py") and it tests features that
3729 ;; are no longer part of this package. It also asserts False, which
3730 ;; causes the tests to always fail.
3731 (arguments `(#:tests? #f))
3732 (propagated-inputs
3733 `(("python-scipy" ,python-scipy)
3734 ("python-sympy" ,python-sympy)
3735 ("python-numpy" ,python-numpy)
3736 ("python-matplotlib" ,python-matplotlib)))
3737 (native-inputs
3738 `(("python-cython" ,python-cython)))
3739 (home-page "https://github.com/nboley/idr")
3740 (synopsis "Tool to measure the irreproducible discovery rate (IDR)")
3741 (description
3742 "The IDR (Irreproducible Discovery Rate) framework is a unified approach
3743 to measure the reproducibility of findings identified from replicate
3744 experiments and provide highly stable thresholds based on reproducibility.")
3745 (license license:gpl2+)))
3746
3747 (define-public jellyfish
3748 (package
3749 (name "jellyfish")
3750 (version "2.2.10")
3751 (source (origin
3752 (method url-fetch)
3753 (uri (string-append "https://github.com/gmarcais/Jellyfish/"
3754 "releases/download/v" version
3755 "/jellyfish-" version ".tar.gz"))
3756 (sha256
3757 (base32
3758 "1k4pc3fvv6w1km2yph4m5sd78fbxp21d6xyzgmy0gjihzc6mb249"))))
3759 (build-system gnu-build-system)
3760 (outputs '("out" ;for library
3761 "ruby" ;for Ruby bindings
3762 "python")) ;for Python bindings
3763 (arguments
3764 `(#:configure-flags
3765 (list (string-append "--enable-ruby-binding="
3766 (assoc-ref %outputs "ruby"))
3767 (string-append "--enable-python-binding="
3768 (assoc-ref %outputs "python")))
3769 #:phases
3770 (modify-phases %standard-phases
3771 (add-before 'check 'set-SHELL-variable
3772 (lambda _
3773 ;; generator_manager.hpp either uses /bin/sh or $SHELL
3774 ;; to run tests.
3775 (setenv "SHELL" (which "bash"))
3776 #t)))))
3777 (native-inputs
3778 `(("bc" ,bc)
3779 ("time" ,time)
3780 ("ruby" ,ruby)
3781 ("python" ,python-2)
3782 ("pkg-config" ,pkg-config)))
3783 (inputs
3784 `(("htslib" ,htslib)))
3785 (synopsis "Tool for fast counting of k-mers in DNA")
3786 (description
3787 "Jellyfish is a tool for fast, memory-efficient counting of k-mers in
3788 DNA. A k-mer is a substring of length k, and counting the occurrences of all
3789 such substrings is a central step in many analyses of DNA sequence. Jellyfish
3790 is a command-line program that reads FASTA and multi-FASTA files containing
3791 DNA sequences. It outputs its k-mer counts in a binary format, which can be
3792 translated into a human-readable text format using the @code{jellyfish dump}
3793 command, or queried for specific k-mers with @code{jellyfish query}.")
3794 (home-page "http://www.genome.umd.edu/jellyfish.html")
3795 ;; JELLYFISH seems to be 64-bit only.
3796 (supported-systems '("x86_64-linux" "aarch64-linux" "mips64el-linux"))
3797 ;; The combined work is published under the GPLv3 or later. Individual
3798 ;; files such as lib/jsoncpp.cpp are released under the Expat license.
3799 (license (list license:gpl3+ license:expat))))
3800
3801 (define-public khmer
3802 (package
3803 (name "khmer")
3804 (version "2.1.2")
3805 (source
3806 (origin
3807 (method git-fetch)
3808 (uri (git-reference
3809 (url "https://github.com/dib-lab/khmer.git")
3810 (commit (string-append "v" version))))
3811 (file-name (git-file-name name version))
3812 (sha256
3813 (base32
3814 "02x38d9jw2r58y8dmnj4hffy9wxv1yc1jwbvdbhby9dxndv94r9m"))
3815 (patches (search-patches "khmer-use-libraries.patch"))
3816 (modules '((guix build utils)))
3817 (snippet
3818 '(begin
3819 ;; Delete bundled libraries. We do not replace the bundled seqan
3820 ;; as it is a modified subset of the old version 1.4.1.
3821 ;;
3822 ;; We do not replace the bundled MurmurHash as the canonical
3823 ;; repository for this code 'SMHasher' is unsuitable for providing
3824 ;; a library. See
3825 ;; https://lists.gnu.org/archive/html/guix-devel/2016-06/msg00977.html
3826 (delete-file-recursively "third-party/zlib")
3827 (delete-file-recursively "third-party/bzip2")
3828 #t))))
3829 (build-system python-build-system)
3830 (arguments
3831 `(#:phases
3832 (modify-phases %standard-phases
3833 (add-after 'unpack 'set-cc
3834 (lambda _ (setenv "CC" "gcc") #t))
3835 ;; FIXME: This fails with "permission denied".
3836 (delete 'reset-gzip-timestamps))))
3837 (native-inputs
3838 `(("python-cython" ,python-cython)
3839 ("python-pytest" ,python-pytest)
3840 ("python-pytest-runner" ,python-pytest-runner)))
3841 (inputs
3842 `(("zlib" ,zlib)
3843 ("bzip2" ,bzip2)
3844 ("python-screed" ,python-screed)
3845 ("python-bz2file" ,python-bz2file)))
3846 (home-page "https://khmer.readthedocs.org/")
3847 (synopsis "K-mer counting, filtering and graph traversal library")
3848 (description "The khmer software is a set of command-line tools for
3849 working with DNA shotgun sequencing data from genomes, transcriptomes,
3850 metagenomes and single cells. Khmer can make de novo assemblies faster, and
3851 sometimes better. Khmer can also identify and fix problems with shotgun
3852 data.")
3853 ;; When building on i686, armhf and mips64el, we get the following error:
3854 ;; error: ['khmer', 'khmer.tests', 'oxli'] require 64-bit operating system
3855 (supported-systems '("x86_64-linux" "aarch64-linux"))
3856 (license license:bsd-3)))
3857
3858 (define-public kaiju
3859 (package
3860 (name "kaiju")
3861 (version "1.6.3")
3862 (source (origin
3863 (method git-fetch)
3864 (uri (git-reference
3865 (url "https://github.com/bioinformatics-centre/kaiju")
3866 (commit (string-append "v" version))))
3867 (file-name (git-file-name name version))
3868 (sha256
3869 (base32
3870 "119pzi0ddzv9mjg4wwa6han0cwr3k3ssn7kirvsjfcq05mi5ka0x"))))
3871 (build-system gnu-build-system)
3872 (arguments
3873 `(#:tests? #f ; There are no tests.
3874 #:phases
3875 (modify-phases %standard-phases
3876 (delete 'configure)
3877 (add-before 'build 'move-to-src-dir
3878 (lambda _ (chdir "src") #t))
3879 (replace 'install
3880 (lambda* (#:key inputs outputs #:allow-other-keys)
3881 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
3882 (mkdir-p bin)
3883 (chdir "..")
3884 (copy-recursively "bin" bin))
3885 #t)))))
3886 (inputs
3887 `(("perl" ,perl)
3888 ("zlib" ,zlib)))
3889 (home-page "http://kaiju.binf.ku.dk/")
3890 (synopsis "Fast and sensitive taxonomic classification for metagenomics")
3891 (description "Kaiju is a program for sensitive taxonomic classification
3892 of high-throughput sequencing reads from metagenomic whole genome sequencing
3893 experiments.")
3894 (license license:gpl3+)))
3895
3896 (define-public macs
3897 (package
3898 (name "macs")
3899 (version "2.1.1.20160309")
3900 (source (origin
3901 (method url-fetch)
3902 (uri (pypi-uri "MACS2" version))
3903 (sha256
3904 (base32
3905 "09ixspd1vcqmz1c81ih70xs4m7qml2iy5vyx1y74zww3iy1vl210"))))
3906 (build-system python-build-system)
3907 (arguments
3908 `(#:python ,python-2 ; only compatible with Python 2.7
3909 #:tests? #f)) ; no test target
3910 (inputs
3911 `(("python-numpy" ,python2-numpy)))
3912 (home-page "https://github.com/taoliu/MACS/")
3913 (synopsis "Model based analysis for ChIP-Seq data")
3914 (description
3915 "MACS is an implementation of a ChIP-Seq analysis algorithm for
3916 identifying transcript factor binding sites named Model-based Analysis of
3917 ChIP-Seq (MACS). MACS captures the influence of genome complexity to evaluate
3918 the significance of enriched ChIP regions and it improves the spatial
3919 resolution of binding sites through combining the information of both
3920 sequencing tag position and orientation.")
3921 (license license:bsd-3)))
3922
3923 (define-public mafft
3924 (package
3925 (name "mafft")
3926 (version "7.394")
3927 (source (origin
3928 (method url-fetch)
3929 (uri (string-append
3930 "https://mafft.cbrc.jp/alignment/software/mafft-" version
3931 "-without-extensions-src.tgz"))
3932 (file-name (string-append name "-" version ".tgz"))
3933 (sha256
3934 (base32
3935 "0bacjkxfg944p5khhyh5rd4y7wkjc9qk4v2jjj442sqlq0f8ar7b"))))
3936 (build-system gnu-build-system)
3937 (arguments
3938 `(#:tests? #f ; no automated tests, though there are tests in the read me
3939 #:make-flags (let ((out (assoc-ref %outputs "out")))
3940 (list (string-append "PREFIX=" out)
3941 (string-append "BINDIR="
3942 (string-append out "/bin"))))
3943 #:phases
3944 (modify-phases %standard-phases
3945 (add-after 'unpack 'enter-dir
3946 (lambda _ (chdir "core") #t))
3947 (add-after 'enter-dir 'patch-makefile
3948 (lambda _
3949 ;; on advice from the MAFFT authors, there is no need to
3950 ;; distribute mafft-profile, mafft-distance, or
3951 ;; mafft-homologs.rb as they are too "specialised".
3952 (substitute* "Makefile"
3953 ;; remove mafft-homologs.rb from SCRIPTS
3954 (("^SCRIPTS = mafft mafft-homologs.rb")
3955 "SCRIPTS = mafft")
3956 ;; remove mafft-homologs from MANPAGES
3957 (("^MANPAGES = mafft.1 mafft-homologs.1")
3958 "MANPAGES = mafft.1")
3959 ;; remove mafft-distance from PROGS
3960 (("^PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance")
3961 "PROGS = dvtditr dndfast7 dndblast sextet5")
3962 ;; remove mafft-profile from PROGS
3963 (("splittbfast disttbfast tbfast mafft-profile 2cl mccaskillwrap")
3964 "splittbfast disttbfast tbfast f2cl mccaskillwrap")
3965 (("^rm -f mafft-profile mafft-profile.exe") "#")
3966 (("^rm -f mafft-distance mafft-distance.exe") ")#")
3967 ;; do not install MAN pages in libexec folder
3968 (("^\t\\$\\(INSTALL\\) -m 644 \\$\\(MANPAGES\\) \
3969 \\$\\(DESTDIR\\)\\$\\(LIBDIR\\)") "#"))
3970 #t))
3971 (add-after 'enter-dir 'patch-paths
3972 (lambda* (#:key inputs #:allow-other-keys)
3973 (substitute* '("pairash.c"
3974 "mafft.tmpl")
3975 (("perl") (which "perl"))
3976 (("([\"`| ])awk" _ prefix)
3977 (string-append prefix (which "awk")))
3978 (("grep") (which "grep")))
3979 #t))
3980 (delete 'configure)
3981 (add-after 'install 'wrap-programs
3982 (lambda* (#:key outputs #:allow-other-keys)
3983 (let* ((out (assoc-ref outputs "out"))
3984 (bin (string-append out "/bin"))
3985 (path (string-append
3986 (assoc-ref %build-inputs "coreutils") "/bin:")))
3987 (for-each (lambda (file)
3988 (wrap-program file
3989 `("PATH" ":" prefix (,path))))
3990 (find-files bin)))
3991 #t)))))
3992 (inputs
3993 `(("perl" ,perl)
3994 ("ruby" ,ruby)
3995 ("gawk" ,gawk)
3996 ("grep" ,grep)
3997 ("coreutils" ,coreutils)))
3998 (home-page "http://mafft.cbrc.jp/alignment/software/")
3999 (synopsis "Multiple sequence alignment program")
4000 (description
4001 "MAFFT offers a range of multiple alignment methods for nucleotide and
4002 protein sequences. For instance, it offers L-INS-i (accurate; for alignment
4003 of <~200 sequences) and FFT-NS-2 (fast; for alignment of <~30,000
4004 sequences).")
4005 (license (license:non-copyleft
4006 "http://mafft.cbrc.jp/alignment/software/license.txt"
4007 "BSD-3 with different formatting"))))
4008
4009 (define-public mash
4010 (package
4011 (name "mash")
4012 (version "2.1")
4013 (source (origin
4014 (method git-fetch)
4015 (uri (git-reference
4016 (url "https://github.com/marbl/mash.git")
4017 (commit (string-append "v" version))))
4018 (file-name (git-file-name name version))
4019 (sha256
4020 (base32
4021 "049hwcc059p2fd9vwndn63laifvvsi0wmv84i6y1fr79k15dxwy6"))
4022 (modules '((guix build utils)))
4023 (snippet
4024 '(begin
4025 ;; Delete bundled kseq.
4026 ;; TODO: Also delete bundled murmurhash and open bloom filter.
4027 (delete-file "src/mash/kseq.h")
4028 #t))))
4029 (build-system gnu-build-system)
4030 (arguments
4031 `(#:tests? #f ; No tests.
4032 #:configure-flags
4033 (list
4034 (string-append "--with-capnp=" (assoc-ref %build-inputs "capnproto"))
4035 (string-append "--with-gsl=" (assoc-ref %build-inputs "gsl")))
4036 #:make-flags (list "CC=gcc")
4037 #:phases
4038 (modify-phases %standard-phases
4039 (add-after 'unpack 'fix-includes
4040 (lambda _
4041 (substitute* '("src/mash/Sketch.cpp"
4042 "src/mash/CommandFind.cpp"
4043 "src/mash/CommandScreen.cpp")
4044 (("^#include \"kseq\\.h\"")
4045 "#include \"htslib/kseq.h\""))
4046 #t))
4047 (add-after 'fix-includes 'use-c++14
4048 (lambda _
4049 ;; capnproto 0.7 requires c++14 to build
4050 (substitute* "configure.ac"
4051 (("c\\+\\+11") "c++14"))
4052 (substitute* "Makefile.in"
4053 (("c\\+\\+11") "c++14"))
4054 #t)))))
4055 (native-inputs
4056 `(("autoconf" ,autoconf)
4057 ;; Capnproto and htslib are statically embedded in the final
4058 ;; application. Therefore we also list their licenses, below.
4059 ("capnproto" ,capnproto)
4060 ("htslib" ,htslib)))
4061 (inputs
4062 `(("gsl" ,gsl)
4063 ("zlib" ,zlib)))
4064 (supported-systems '("x86_64-linux"))
4065 (home-page "https://mash.readthedocs.io")
4066 (synopsis "Fast genome and metagenome distance estimation using MinHash")
4067 (description "Mash is a fast sequence distance estimator that uses the
4068 MinHash algorithm and is designed to work with genomes and metagenomes in the
4069 form of assemblies or reads.")
4070 (license (list license:bsd-3 ; Mash
4071 license:expat ; HTSlib and capnproto
4072 license:public-domain ; MurmurHash 3
4073 license:cpl1.0)))) ; Open Bloom Filter
4074
4075 (define-public metabat
4076 (package
4077 (name "metabat")
4078 (version "2.12.1")
4079 (source
4080 (origin
4081 (method git-fetch)
4082 (uri (git-reference
4083 (url "https://bitbucket.org/berkeleylab/metabat.git")
4084 (commit (string-append "v" version))))
4085 (file-name (git-file-name name version))
4086 (sha256
4087 (base32
4088 "0hyg2smw1nz69mfvjpk45xyyychmda92c80a0cv7baji84ri4iyn"))
4089 (patches (search-patches "metabat-fix-compilation.patch"))))
4090 (build-system scons-build-system)
4091 (arguments
4092 `(#:scons ,scons-python2
4093 #:scons-flags
4094 (list (string-append "PREFIX=" (assoc-ref %outputs "out"))
4095 (string-append "BOOST_ROOT=" (assoc-ref %build-inputs "boost")))
4096 #:tests? #f ;; Tests are run during the build phase.
4097 #:phases
4098 (modify-phases %standard-phases
4099 (add-after 'unpack 'fix-includes
4100 (lambda _
4101 (substitute* "src/BamUtils.h"
4102 (("^#include \"bam/bam\\.h\"")
4103 "#include \"samtools/bam.h\"")
4104 (("^#include \"bam/sam\\.h\"")
4105 "#include \"samtools/sam.h\""))
4106 (substitute* "src/KseqReader.h"
4107 (("^#include \"bam/kseq\\.h\"")
4108 "#include \"htslib/kseq.h\""))
4109 #t))
4110 (add-after 'unpack 'fix-scons
4111 (lambda* (#:key inputs #:allow-other-keys)
4112 (substitute* "SConstruct"
4113 (("^htslib_dir += 'samtools'")
4114 (string-append "htslib_dir = '"
4115 (assoc-ref inputs "htslib")
4116 "'"))
4117 (("^samtools_dir = 'samtools'")
4118 (string-append "samtools_dir = '"
4119 (assoc-ref inputs "samtools")
4120 "'"))
4121 (("^findStaticOrShared\\('bam', hts_lib")
4122 (string-append "findStaticOrShared('bam', '"
4123 (assoc-ref inputs "samtools")
4124 "/lib'"))
4125 ;; Do not distribute README.
4126 (("^env\\.Install\\(idir_prefix, 'README\\.md'\\)") ""))
4127 #t)))))
4128 (inputs
4129 `(("zlib" ,zlib)
4130 ("perl" ,perl)
4131 ("samtools" ,samtools)
4132 ("htslib" ,htslib)
4133 ("boost" ,boost)))
4134 (home-page "https://bitbucket.org/berkeleylab/metabat")
4135 (synopsis
4136 "Reconstruction of single genomes from complex microbial communities")
4137 (description
4138 "Grouping large genomic fragments assembled from shotgun metagenomic
4139 sequences to deconvolute complex microbial communities, or metagenome binning,
4140 enables the study of individual organisms and their interactions. MetaBAT is
4141 an automated metagenome binning software, which integrates empirical
4142 probabilistic distances of genome abundance and tetranucleotide frequency.")
4143 ;; The source code contains inline assembly.
4144 (supported-systems '("x86_64-linux" "i686-linux"))
4145 (license (license:non-copyleft "file://license.txt"
4146 "See license.txt in the distribution."))))
4147
4148 (define-public minced
4149 (package
4150 (name "minced")
4151 (version "0.3.2")
4152 (source (origin
4153 (method git-fetch)
4154 (uri (git-reference
4155 (url "https://github.com/ctSkennerton/minced.git")
4156 (commit version)))
4157 (file-name (git-file-name name version))
4158 (sha256
4159 (base32
4160 "1f5h9him0gd355cnx7p6pnxpknhckd4g0v62mg8zyhfbx9as25fv"))))
4161 (build-system gnu-build-system)
4162 (arguments
4163 `(#:test-target "test"
4164 #:phases
4165 (modify-phases %standard-phases
4166 (delete 'configure)
4167 (add-before 'check 'fix-test
4168 (lambda _
4169 ;; Fix test for latest version.
4170 (substitute* "t/Aquifex_aeolicus_VF5.expected"
4171 (("minced:0.1.6") "minced:0.2.0"))
4172 #t))
4173 (replace 'install ; No install target.
4174 (lambda* (#:key inputs outputs #:allow-other-keys)
4175 (let* ((out (assoc-ref outputs "out"))
4176 (bin (string-append out "/bin"))
4177 (wrapper (string-append bin "/minced")))
4178 ;; Minced comes with a wrapper script that tries to figure out where
4179 ;; it is located before running the JAR. Since these paths are known
4180 ;; to us, we build our own wrapper to avoid coreutils dependency.
4181 (install-file "minced.jar" bin)
4182 (with-output-to-file wrapper
4183 (lambda _
4184 (display
4185 (string-append
4186 "#!" (assoc-ref inputs "bash") "/bin/sh\n\n"
4187 (assoc-ref inputs "jre") "/bin/java -jar "
4188 bin "/minced.jar \"$@\"\n"))))
4189 (chmod wrapper #o555))
4190 #t)))))
4191 (native-inputs
4192 `(("jdk" ,icedtea "jdk")))
4193 (inputs
4194 `(("bash" ,bash)
4195 ("jre" ,icedtea "out")))
4196 (home-page "https://github.com/ctSkennerton/minced")
4197 (synopsis "Mining CRISPRs in Environmental Datasets")
4198 (description
4199 "MinCED is a program to find Clustered Regularly Interspaced Short
4200 Palindromic Repeats (CRISPRs) in DNA sequences. It can be used for
4201 unassembled metagenomic reads, but is mainly designed for full genomes and
4202 assembled metagenomic sequence.")
4203 (license license:gpl3+)))
4204
4205 (define-public miso
4206 (package
4207 (name "miso")
4208 (version "0.5.4")
4209 (source (origin
4210 (method url-fetch)
4211 (uri (pypi-uri "misopy" version))
4212 (sha256
4213 (base32
4214 "1z3x0vd8ma7pdrnywj7i3kgwl89sdkwrrn62zl7r5calqaq2hyip"))
4215 (modules '((guix build utils)))
4216 (snippet '(begin
4217 (substitute* "setup.py"
4218 ;; Use setuptools, or else the executables are not
4219 ;; installed.
4220 (("distutils.core") "setuptools")
4221 ;; use "gcc" instead of "cc" for compilation
4222 (("^defines")
4223 "cc.set_executables(
4224 compiler='gcc',
4225 compiler_so='gcc',
4226 linker_exe='gcc',
4227 linker_so='gcc -shared'); defines"))
4228 #t))))
4229 (build-system python-build-system)
4230 (arguments
4231 `(#:python ,python-2 ; only Python 2 is supported
4232 #:tests? #f)) ; no "test" target
4233 (inputs
4234 `(("samtools" ,samtools)
4235 ("python-numpy" ,python2-numpy)
4236 ("python-pysam" ,python2-pysam)
4237 ("python-scipy" ,python2-scipy)
4238 ("python-matplotlib" ,python2-matplotlib)))
4239 (native-inputs
4240 `(("python-mock" ,python2-mock) ;for tests
4241 ("python-pytz" ,python2-pytz))) ;for tests
4242 (home-page "http://genes.mit.edu/burgelab/miso/index.html")
4243 (synopsis "Mixture of Isoforms model for RNA-Seq isoform quantitation")
4244 (description
4245 "MISO (Mixture-of-Isoforms) is a probabilistic framework that quantitates
4246 the expression level of alternatively spliced genes from RNA-Seq data, and
4247 identifies differentially regulated isoforms or exons across samples. By
4248 modeling the generative process by which reads are produced from isoforms in
4249 RNA-Seq, the MISO model uses Bayesian inference to compute the probability
4250 that a read originated from a particular isoform.")
4251 (license license:gpl2)))
4252
4253 (define-public muscle
4254 (package
4255 (name "muscle")
4256 (version "3.8.1551")
4257 (source (origin
4258 (method url-fetch/tarbomb)
4259 (uri (string-append
4260 "http://www.drive5.com/muscle/muscle_src_"
4261 version ".tar.gz"))
4262 (sha256
4263 (base32
4264 "0bj8kj7sdizy3987zx6w7axihk40fk8rn76mpbqqjcnd64i5a367"))))
4265 (build-system gnu-build-system)
4266 (arguments
4267 `(#:make-flags (list "LDLIBS = -lm")
4268 #:phases
4269 (modify-phases %standard-phases
4270 (delete 'configure)
4271 (replace 'check
4272 ;; There are no tests, so just test if it runs.
4273 (lambda _ (invoke "./muscle" "-version") #t))
4274 (replace 'install
4275 (lambda* (#:key outputs #:allow-other-keys)
4276 (let* ((out (assoc-ref outputs "out"))
4277 (bin (string-append out "/bin")))
4278 (install-file "muscle" bin)
4279 #t))))))
4280 (home-page "http://www.drive5.com/muscle")
4281 (synopsis "Multiple sequence alignment program")
4282 (description
4283 "MUSCLE aims to be a fast and accurate multiple sequence alignment
4284 program for nucleotide and protein sequences.")
4285 ;; License information found in 'muscle -h' and usage.cpp.
4286 (license license:public-domain)))
4287
4288 (define-public newick-utils
4289 ;; There are no recent releases so we package from git.
4290 (let ((commit "da121155a977197cab9fbb15953ca1b40b11eb87"))
4291 (package
4292 (name "newick-utils")
4293 (version (string-append "1.6-1." (string-take commit 8)))
4294 (source (origin
4295 (method git-fetch)
4296 (uri (git-reference
4297 (url "https://github.com/tjunier/newick_utils.git")
4298 (commit commit)))
4299 (file-name (string-append name "-" version "-checkout"))
4300 (sha256
4301 (base32
4302 "1hkw21rq1mwf7xp0rmbb2gqc0i6p11108m69i7mr7xcjl268pxnb"))))
4303 (build-system gnu-build-system)
4304 (inputs
4305 ;; XXX: TODO: Enable Lua and Guile bindings.
4306 ;; https://github.com/tjunier/newick_utils/issues/13
4307 `(("libxml2" ,libxml2)
4308 ("flex" ,flex)
4309 ("bison" ,bison)))
4310 (native-inputs
4311 `(("autoconf" ,autoconf)
4312 ("automake" ,automake)
4313 ("libtool" ,libtool)))
4314 (synopsis "Programs for working with newick format phylogenetic trees")
4315 (description
4316 "Newick-utils is a suite of utilities for processing phylogenetic trees
4317 in Newick format. Functions include re-rooting, extracting subtrees,
4318 trimming, pruning, condensing, drawing (ASCII graphics or SVG).")
4319 (home-page "https://github.com/tjunier/newick_utils")
4320 (license license:bsd-3))))
4321
4322 (define-public orfm
4323 (package
4324 (name "orfm")
4325 (version "0.7.1")
4326 (source (origin
4327 (method url-fetch)
4328 (uri (string-append
4329 "https://github.com/wwood/OrfM/releases/download/v"
4330 version "/orfm-" version ".tar.gz"))
4331 (sha256
4332 (base32
4333 "16iigyr2gd8x0imzkk1dr3k5xsds9bpmwg31ayvjg0f4pir9rwqr"))))
4334 (build-system gnu-build-system)
4335 (inputs `(("zlib" ,zlib)))
4336 (native-inputs
4337 `(("ruby-bio-commandeer" ,ruby-bio-commandeer)
4338 ("ruby-rspec" ,ruby-rspec)
4339 ("ruby" ,ruby)))
4340 (synopsis "Simple and not slow open reading frame (ORF) caller")
4341 (description
4342 "An ORF caller finds stretches of DNA that, when translated, are not
4343 interrupted by stop codons. OrfM finds and prints these ORFs.")
4344 (home-page "https://github.com/wwood/OrfM")
4345 (license license:lgpl3+)))
4346
4347 (define-public pplacer
4348 (let ((commit "807f6f3"))
4349 (package
4350 (name "pplacer")
4351 ;; The commit should be updated with each version change.
4352 (version "1.1.alpha19")
4353 (source
4354 (origin
4355 (method git-fetch)
4356 (uri (git-reference
4357 (url "https://github.com/matsen/pplacer.git")
4358 (commit (string-append "v" version))))
4359 (file-name (git-file-name name version))
4360 (sha256
4361 (base32 "11ppbbbx20p2g9wj3ff64dhnarb12q79v7qh4rk0gj6lkbz4n7cn"))))
4362 (build-system ocaml-build-system)
4363 (arguments
4364 `(#:ocaml ,ocaml-4.01
4365 #:findlib ,ocaml4.01-findlib
4366 #:modules ((guix build ocaml-build-system)
4367 (guix build utils)
4368 (ice-9 ftw))
4369 #:phases
4370 (modify-phases %standard-phases
4371 (delete 'configure)
4372 (add-after 'unpack 'replace-bundled-cddlib
4373 (lambda* (#:key inputs #:allow-other-keys)
4374 (let* ((cddlib-src (assoc-ref inputs "cddlib-src"))
4375 (local-dir "cddlib_guix"))
4376 (mkdir local-dir)
4377 (with-directory-excursion local-dir
4378 (invoke "tar" "xvf" cddlib-src))
4379 (let ((cddlib-src-folder
4380 (string-append local-dir "/"
4381 (list-ref (scandir local-dir) 2)
4382 "/lib-src")))
4383 (for-each make-file-writable (find-files "cdd_src" ".*"))
4384 (for-each
4385 (lambda (file)
4386 (copy-file file
4387 (string-append "cdd_src/" (basename file))))
4388 (find-files cddlib-src-folder ".*[ch]$")))
4389 #t)))
4390 (add-after 'unpack 'fix-makefile
4391 (lambda _
4392 ;; Remove system calls to 'git'.
4393 (substitute* "Makefile"
4394 (("^DESCRIPT:=pplacer-.*")
4395 (string-append
4396 "DESCRIPT:=pplacer-$(shell uname)-v" ,version "\n")))
4397 (substitute* "myocamlbuild.ml"
4398 (("git describe --tags --long .*\\\" with")
4399 (string-append
4400 "echo -n v" ,version "-" ,commit "\" with")))
4401 #t))
4402 (replace 'install
4403 (lambda* (#:key outputs #:allow-other-keys)
4404 (let* ((out (assoc-ref outputs "out"))
4405 (bin (string-append out "/bin")))
4406 (copy-recursively "bin" bin))
4407 #t)))))
4408 (native-inputs
4409 `(("zlib" ,zlib)
4410 ("gsl" ,gsl)
4411 ("ocaml-ounit" ,ocaml4.01-ounit)
4412 ("ocaml-batteries" ,ocaml4.01-batteries)
4413 ("ocaml-camlzip" ,ocaml4.01-camlzip)
4414 ("ocaml-csv" ,ocaml4.01-csv)
4415 ("ocaml-sqlite3" ,ocaml4.01-sqlite3)
4416 ("ocaml-xmlm" ,ocaml4.01-xmlm)
4417 ("ocaml-mcl" ,ocaml4.01-mcl)
4418 ("ocaml-gsl" ,ocaml4.01-gsl)
4419 ("cddlib-src" ,(package-source cddlib))))
4420 (propagated-inputs
4421 `(("pplacer-scripts" ,pplacer-scripts)))
4422 (synopsis "Phylogenetic placement of biological sequences")
4423 (description
4424 "Pplacer places query sequences on a fixed reference phylogenetic tree
4425 to maximize phylogenetic likelihood or posterior probability according to a
4426 reference alignment. Pplacer is designed to be fast, to give useful
4427 information about uncertainty, and to offer advanced visualization and
4428 downstream analysis.")
4429 (home-page "http://matsen.fhcrc.org/pplacer")
4430 (license license:gpl3))))
4431
4432 ;; This package is installed alongside 'pplacer'. It is a separate package so
4433 ;; that it can use the python-build-system for the scripts that are
4434 ;; distributed alongside the main OCaml binaries.
4435 (define pplacer-scripts
4436 (package
4437 (inherit pplacer)
4438 (name "pplacer-scripts")
4439 (build-system python-build-system)
4440 (arguments
4441 `(#:python ,python-2
4442 #:phases
4443 (modify-phases %standard-phases
4444 (add-after 'unpack 'enter-scripts-dir
4445 (lambda _ (chdir "scripts") #t))
4446 (replace 'check
4447 (lambda _ (invoke "python" "-m" "unittest" "discover" "-v") #t))
4448 (add-after 'install 'wrap-executables
4449 (lambda* (#:key inputs outputs #:allow-other-keys)
4450 (let* ((out (assoc-ref outputs "out"))
4451 (bin (string-append out "/bin")))
4452 (let ((path (string-append
4453 (assoc-ref inputs "hmmer") "/bin:"
4454 (assoc-ref inputs "infernal") "/bin")))
4455 (display path)
4456 (wrap-program (string-append bin "/refpkg_align.py")
4457 `("PATH" ":" prefix (,path))))
4458 (let ((path (string-append
4459 (assoc-ref inputs "hmmer") "/bin")))
4460 (wrap-program (string-append bin "/hrefpkg_query.py")
4461 `("PATH" ":" prefix (,path)))))
4462 #t)))))
4463 (inputs
4464 `(("infernal" ,infernal)
4465 ("hmmer" ,hmmer)))
4466 (propagated-inputs
4467 `(("python-biopython" ,python2-biopython)
4468 ("taxtastic" ,taxtastic)))
4469 (synopsis "Pplacer Python scripts")))
4470
4471 (define-public python2-pbcore
4472 (package
4473 (name "python2-pbcore")
4474 (version "1.2.10")
4475 (source (origin
4476 (method url-fetch)
4477 (uri (pypi-uri "pbcore" version))
4478 (sha256
4479 (base32
4480 "1kjmv891d6qbpp4shhhvkl02ff4q5xlpnls2513sm2cjcrs52f1i"))))
4481 (build-system python-build-system)
4482 (arguments `(#:python ,python-2)) ; pbcore requires Python 2.7
4483 (propagated-inputs
4484 `(("python-cython" ,python2-cython)
4485 ("python-numpy" ,python2-numpy)
4486 ("python-pysam" ,python2-pysam)
4487 ("python-h5py" ,python2-h5py)))
4488 (native-inputs
4489 `(("python-nose" ,python2-nose)
4490 ("python-sphinx" ,python2-sphinx)
4491 ("python-pyxb" ,python2-pyxb)))
4492 (home-page "http://pacificbiosciences.github.io/pbcore/")
4493 (synopsis "Library for reading and writing PacBio data files")
4494 (description
4495 "The pbcore package provides Python APIs for interacting with PacBio data
4496 files and writing bioinformatics applications.")
4497 (license license:bsd-3)))
4498
4499 (define-public python2-warpedlmm
4500 (package
4501 (name "python2-warpedlmm")
4502 (version "0.21")
4503 (source
4504 (origin
4505 (method url-fetch)
4506 (uri (string-append
4507 "https://pypi.python.org/packages/source/W/WarpedLMM/WarpedLMM-"
4508 version ".zip"))
4509 (sha256
4510 (base32
4511 "1agfz6zqa8nc6cw47yh0s3y14gkpa9wqazwcj7mwwj3ffnw39p3j"))))
4512 (build-system python-build-system)
4513 (arguments
4514 `(#:python ,python-2)) ; requires Python 2.7
4515 (propagated-inputs
4516 `(("python-scipy" ,python2-scipy)
4517 ("python-numpy" ,python2-numpy)
4518 ("python-matplotlib" ,python2-matplotlib)
4519 ("python-fastlmm" ,python2-fastlmm)
4520 ("python-pandas" ,python2-pandas)
4521 ("python-pysnptools" ,python2-pysnptools)))
4522 (native-inputs
4523 `(("python-mock" ,python2-mock)
4524 ("python-nose" ,python2-nose)
4525 ("unzip" ,unzip)))
4526 (home-page "https://github.com/PMBio/warpedLMM")
4527 (synopsis "Implementation of warped linear mixed models")
4528 (description
4529 "WarpedLMM is a Python implementation of the warped linear mixed model,
4530 which automatically learns an optimal warping function (or transformation) for
4531 the phenotype as it models the data.")
4532 (license license:asl2.0)))
4533
4534 (define-public pbtranscript-tofu
4535 (let ((commit "8f5467fe6a4472bcfb4226c8720993c8507adfe4"))
4536 (package
4537 (name "pbtranscript-tofu")
4538 (version (string-append "2.2.3." (string-take commit 7)))
4539 (source (origin
4540 (method git-fetch)
4541 (uri (git-reference
4542 (url "https://github.com/PacificBiosciences/cDNA_primer.git")
4543 (commit commit)))
4544 (file-name (string-append name "-" version "-checkout"))
4545 (sha256
4546 (base32
4547 "1lgnpi35ihay42qx0b6yl3kkgra723i413j33kvs0kvs61h82w0f"))
4548 (modules '((guix build utils)))
4549 (snippet
4550 '(begin
4551 ;; remove bundled Cython sources
4552 (delete-file "pbtranscript-tofu/pbtranscript/Cython-0.20.1.tar.gz")
4553 #t))))
4554 (build-system python-build-system)
4555 (arguments
4556 `(#:python ,python-2
4557 ;; FIXME: Tests fail with "No such file or directory:
4558 ;; pbtools/pbtranscript/modified_bx_intervals/intersection_unique.so"
4559 #:tests? #f
4560 #:phases
4561 (modify-phases %standard-phases
4562 (add-after 'unpack 'enter-directory
4563 (lambda _
4564 (chdir "pbtranscript-tofu/pbtranscript/")
4565 #t))
4566 ;; With setuptools version 18.0 and later this setup.py hack causes
4567 ;; a build error, so we disable it.
4568 (add-after 'enter-directory 'patch-setuppy
4569 (lambda _
4570 (substitute* "setup.py"
4571 (("if 'setuptools.extension' in sys.modules:")
4572 "if False:"))
4573 #t)))))
4574 (inputs
4575 `(("python-numpy" ,python2-numpy)
4576 ("python-bx-python" ,python2-bx-python)
4577 ("python-networkx" ,python2-networkx)
4578 ("python-scipy" ,python2-scipy)
4579 ("python-pbcore" ,python2-pbcore)
4580 ("python-h5py" ,python2-h5py)))
4581 (native-inputs
4582 `(("python-cython" ,python2-cython)
4583 ("python-nose" ,python2-nose)))
4584 (home-page "https://github.com/PacificBiosciences/cDNA_primer")
4585 (synopsis "Analyze transcriptome data generated with the Iso-Seq protocol")
4586 (description
4587 "pbtranscript-tofu contains scripts to analyze transcriptome data
4588 generated using the PacBio Iso-Seq protocol.")
4589 (license license:bsd-3))))
4590
4591 (define-public prank
4592 (package
4593 (name "prank")
4594 (version "150803")
4595 (source (origin
4596 (method url-fetch)
4597 (uri (string-append
4598 "http://wasabiapp.org/download/prank/prank.source."
4599 version ".tgz"))
4600 (sha256
4601 (base32
4602 "0am4z94fs3w2n5xpfls9zda61vq7qqz4q2i7b9hlsxz5q4j3kfm4"))))
4603 (build-system gnu-build-system)
4604 (arguments
4605 `(#:phases
4606 (modify-phases %standard-phases
4607 (add-after 'unpack 'enter-src-dir
4608 (lambda _
4609 (chdir "src")
4610 #t))
4611 (add-after 'unpack 'remove-m64-flag
4612 ;; Prank will build with the correct 'bit-ness' without this flag
4613 ;; and this allows building on 32-bit machines.
4614 (lambda _ (substitute* "src/Makefile"
4615 (("-m64") ""))
4616 #t))
4617 (delete 'configure)
4618 (replace 'install
4619 (lambda* (#:key outputs #:allow-other-keys)
4620 (let* ((out (assoc-ref outputs "out"))
4621 (bin (string-append out "/bin"))
4622 (man (string-append out "/share/man/man1"))
4623 (path (string-append
4624 (assoc-ref %build-inputs "mafft") "/bin:"
4625 (assoc-ref %build-inputs "exonerate") "/bin:"
4626 (assoc-ref %build-inputs "bppsuite") "/bin")))
4627 (install-file "prank" bin)
4628 (wrap-program (string-append bin "/prank")
4629 `("PATH" ":" prefix (,path)))
4630 (install-file "prank.1" man))
4631 #t)))))
4632 (inputs
4633 `(("mafft" ,mafft)
4634 ("exonerate" ,exonerate)
4635 ("bppsuite" ,bppsuite)))
4636 (home-page "http://wasabiapp.org/software/prank/")
4637 (synopsis "Probabilistic multiple sequence alignment program")
4638 (description
4639 "PRANK is a probabilistic multiple sequence alignment program for DNA,
4640 codon and amino-acid sequences. It is based on a novel algorithm that treats
4641 insertions correctly and avoids over-estimation of the number of deletion
4642 events. In addition, PRANK borrows ideas from maximum likelihood methods used
4643 in phylogenetics and correctly takes into account the evolutionary distances
4644 between sequences. Lastly, PRANK allows for defining a potential structure
4645 for sequences to be aligned and then, simultaneously with the alignment,
4646 predicts the locations of structural units in the sequences.")
4647 (license license:gpl2+)))
4648
4649 (define-public proteinortho
4650 (package
4651 (name "proteinortho")
4652 (version "5.16b")
4653 (source
4654 (origin
4655 (method url-fetch)
4656 (uri
4657 (string-append
4658 "http://www.bioinf.uni-leipzig.de/Software/proteinortho/proteinortho_v"
4659 version "_src.tar.gz"))
4660 (sha256
4661 (base32
4662 "1wl0dawpssqwfjvr651r4wlww8hhjin8nba6xh71ks7sbypx886j"))))
4663 (build-system gnu-build-system)
4664 (arguments
4665 `(#:test-target "test"
4666 #:phases
4667 (modify-phases %standard-phases
4668 (replace 'configure
4669 ;; There is no configure script, so we modify the Makefile directly.
4670 (lambda* (#:key outputs #:allow-other-keys)
4671 (substitute* "Makefile"
4672 (("INSTALLDIR=.*")
4673 (string-append
4674 "INSTALLDIR=" (assoc-ref outputs "out") "/bin\n")))
4675 #t))
4676 (add-before 'install 'make-install-directory
4677 ;; The install directory is not created during 'make install'.
4678 (lambda* (#:key outputs #:allow-other-keys)
4679 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
4680 #t))
4681 (add-after 'install 'wrap-programs
4682 (lambda* (#:key inputs outputs #:allow-other-keys)
4683 (let* ((path (getenv "PATH"))
4684 (out (assoc-ref outputs "out"))
4685 (binary (string-append out "/bin/proteinortho5.pl")))
4686 (wrap-program binary `("PATH" ":" prefix (,path))))
4687 #t)))))
4688 (inputs
4689 `(("perl" ,perl)
4690 ("python" ,python-2)
4691 ("blast+" ,blast+)))
4692 (home-page "http://www.bioinf.uni-leipzig.de/Software/proteinortho")
4693 (synopsis "Detect orthologous genes across species")
4694 (description
4695 "Proteinortho is a tool to detect orthologous genes across different
4696 species. For doing so, it compares similarities of given gene sequences and
4697 clusters them to find significant groups. The algorithm was designed to handle
4698 large-scale data and can be applied to hundreds of species at once.")
4699 (license license:gpl2+)))
4700
4701 (define-public pyicoteo
4702 (package
4703 (name "pyicoteo")
4704 (version "2.0.7")
4705 (source
4706 (origin
4707 (method git-fetch)
4708 (uri (git-reference
4709 (url "https://bitbucket.org/regulatorygenomicsupf/pyicoteo.git")
4710 (commit (string-append "v" version))))
4711 (file-name (git-file-name name version))
4712 (sha256
4713 (base32
4714 "0hz5g8d25lbjy1wpscr490l0lmyvaix893hhax4fxnh1h9w34w8p"))))
4715 (build-system python-build-system)
4716 (arguments
4717 `(#:python ,python-2 ; does not work with Python 3
4718 #:tests? #f)) ; there are no tests
4719 (inputs
4720 `(("python2-matplotlib" ,python2-matplotlib)))
4721 (home-page "https://bitbucket.org/regulatorygenomicsupf/pyicoteo")
4722 (synopsis "Analyze high-throughput genetic sequencing data")
4723 (description
4724 "Pyicoteo is a suite of tools for the analysis of high-throughput genetic
4725 sequencing data. It works with genomic coordinates. There are currently six
4726 different command-line tools:
4727
4728 @enumerate
4729 @item pyicoregion: for generating exploratory regions automatically;
4730 @item pyicoenrich: for differential enrichment between two conditions;
4731 @item pyicoclip: for calling CLIP-Seq peaks without a control;
4732 @item pyicos: for genomic coordinates manipulation;
4733 @item pyicoller: for peak calling on punctuated ChIP-Seq;
4734 @item pyicount: to count how many reads from N experiment files overlap in a
4735 region file;
4736 @item pyicotrocol: to combine operations from pyicoteo.
4737 @end enumerate\n")
4738 (license license:gpl3+)))
4739
4740 (define-public prodigal
4741 (package
4742 (name "prodigal")
4743 (version "2.6.3")
4744 (source (origin
4745 (method git-fetch)
4746 (uri (git-reference
4747 (url "https://github.com/hyattpd/Prodigal.git")
4748 (commit (string-append "v" version))))
4749 (file-name (git-file-name name version))
4750 (sha256
4751 (base32
4752 "1fs1hqk83qjbjhrvhw6ni75zakx5ki1ayy3v6wwkn3xvahc9hi5s"))))
4753 (build-system gnu-build-system)
4754 (arguments
4755 `(#:tests? #f ;no check target
4756 #:make-flags (list (string-append "INSTALLDIR="
4757 (assoc-ref %outputs "out")
4758 "/bin"))
4759 #:phases
4760 (modify-phases %standard-phases
4761 (delete 'configure))))
4762 (home-page "http://prodigal.ornl.gov")
4763 (synopsis "Protein-coding gene prediction for Archaea and Bacteria")
4764 (description
4765 "Prodigal runs smoothly on finished genomes, draft genomes, and
4766 metagenomes, providing gene predictions in GFF3, Genbank, or Sequin table
4767 format. It runs quickly, in an unsupervised fashion, handles gaps, handles
4768 partial genes, and identifies translation initiation sites.")
4769 (license license:gpl3+)))
4770
4771 (define-public roary
4772 (package
4773 (name "roary")
4774 (version "3.12.0")
4775 (source
4776 (origin
4777 (method url-fetch)
4778 (uri (string-append
4779 "mirror://cpan/authors/id/A/AJ/AJPAGE/Bio-Roary-"
4780 version ".tar.gz"))
4781 (sha256
4782 (base32
4783 "0qxrds9wx7cfhlkihrp6697kx0flhhxymap9fwan0b3rbdhcnmff"))))
4784 (build-system perl-build-system)
4785 (arguments
4786 `(#:phases
4787 (modify-phases %standard-phases
4788 (delete 'configure)
4789 (delete 'build)
4790 (replace 'check
4791 (lambda _
4792 ;; The tests are not run by default, so we run each test file
4793 ;; directly.
4794 (setenv "PATH" (string-append (getcwd) "/bin" ":"
4795 (getenv "PATH")))
4796 (setenv "PERL5LIB" (string-append (getcwd) "/lib" ":"
4797 (getenv "PERL5LIB")))
4798 (for-each (lambda (file)
4799 (display file)(display "\n")
4800 (invoke "perl" file))
4801 (find-files "t" ".*\\.t$"))
4802 #t))
4803 (replace 'install
4804 ;; There is no 'install' target in the Makefile.
4805 (lambda* (#:key outputs #:allow-other-keys)
4806 (let* ((out (assoc-ref outputs "out"))
4807 (bin (string-append out "/bin"))
4808 (perl (string-append out "/lib/perl5/site_perl"))
4809 (roary-plots "contrib/roary_plots"))
4810 (mkdir-p bin)
4811 (mkdir-p perl)
4812 (copy-recursively "bin" bin)
4813 (copy-recursively "lib" perl)
4814 #t)))
4815 (add-after 'install 'wrap-programs
4816 (lambda* (#:key inputs outputs #:allow-other-keys)
4817 (let* ((out (assoc-ref outputs "out"))
4818 (perl5lib (getenv "PERL5LIB"))
4819 (path (getenv "PATH")))
4820 (for-each (lambda (prog)
4821 (let ((binary (string-append out "/" prog)))
4822 (wrap-program binary
4823 `("PERL5LIB" ":" prefix
4824 (,(string-append perl5lib ":" out
4825 "/lib/perl5/site_perl"))))
4826 (wrap-program binary
4827 `("PATH" ":" prefix
4828 (,(string-append path ":" out "/bin"))))))
4829 (find-files "bin" ".*[^R]$"))
4830 (let ((file
4831 (string-append out "/bin/roary-create_pan_genome_plots.R"))
4832 (r-site-lib (getenv "R_LIBS_SITE"))
4833 (coreutils-path
4834 (string-append (assoc-ref inputs "coreutils") "/bin")))
4835 (wrap-program file
4836 `("R_LIBS_SITE" ":" prefix
4837 (,(string-append r-site-lib ":" out "/site-library/"))))
4838 (wrap-program file
4839 `("PATH" ":" prefix
4840 (,(string-append coreutils-path ":" out "/bin"))))))
4841 #t)))))
4842 (native-inputs
4843 `(("perl-env-path" ,perl-env-path)
4844 ("perl-test-files" ,perl-test-files)
4845 ("perl-test-most" ,perl-test-most)
4846 ("perl-test-output" ,perl-test-output)))
4847 (inputs
4848 `(("perl-array-utils" ,perl-array-utils)
4849 ("bioperl" ,bioperl-minimal)
4850 ("perl-digest-md5-file" ,perl-digest-md5-file)
4851 ("perl-exception-class" ,perl-exception-class)
4852 ("perl-file-find-rule" ,perl-file-find-rule)
4853 ("perl-file-grep" ,perl-file-grep)
4854 ("perl-file-slurper" ,perl-file-slurper)
4855 ("perl-file-which" ,perl-file-which)
4856 ("perl-graph" ,perl-graph)
4857 ("perl-graph-readwrite" ,perl-graph-readwrite)
4858 ("perl-log-log4perl" ,perl-log-log4perl)
4859 ("perl-moose" ,perl-moose)
4860 ("perl-perlio-utf8_strict" ,perl-perlio-utf8_strict)
4861 ("perl-text-csv" ,perl-text-csv)
4862 ("bedtools" ,bedtools)
4863 ("cd-hit" ,cd-hit)
4864 ("blast+" ,blast+)
4865 ("mcl" ,mcl)
4866 ("parallel" ,parallel)
4867 ("prank" ,prank)
4868 ("mafft" ,mafft)
4869 ("fasttree" ,fasttree)
4870 ("grep" ,grep)
4871 ("sed" ,sed)
4872 ("gawk" ,gawk)
4873 ("r-minimal" ,r-minimal)
4874 ("r-ggplot2" ,r-ggplot2)
4875 ("coreutils" ,coreutils)))
4876 (home-page "http://sanger-pathogens.github.io/Roary")
4877 (synopsis "High speed stand-alone pan genome pipeline")
4878 (description
4879 "Roary is a high speed stand alone pan genome pipeline, which takes
4880 annotated assemblies in GFF3 format (produced by the Prokka program) and
4881 calculates the pan genome. Using a standard desktop PC, it can analyse
4882 datasets with thousands of samples, without compromising the quality of the
4883 results. 128 samples can be analysed in under 1 hour using 1 GB of RAM and a
4884 single processor. Roary is not intended for metagenomics or for comparing
4885 extremely diverse sets of genomes.")
4886 (license license:gpl3)))
4887
4888 (define-public raxml
4889 (package
4890 (name "raxml")
4891 (version "8.2.12")
4892 (source
4893 (origin
4894 (method git-fetch)
4895 (uri (git-reference
4896 (url "https://github.com/stamatak/standard-RAxML.git")
4897 (commit (string-append "v" version))))
4898 (file-name (git-file-name name version))
4899 (sha256
4900 (base32
4901 "1jqjzhch0rips0vp04prvb8vmc20c5pdmsqn8knadcf91yy859fh"))))
4902 (build-system gnu-build-system)
4903 (arguments
4904 `(#:tests? #f ; There are no tests.
4905 ;; Use 'standard' Makefile rather than SSE or AVX ones.
4906 #:make-flags (list "-f" "Makefile.HYBRID.gcc")
4907 #:phases
4908 (modify-phases %standard-phases
4909 (delete 'configure)
4910 (replace 'install
4911 (lambda* (#:key outputs #:allow-other-keys)
4912 (let* ((out (assoc-ref outputs "out"))
4913 (bin (string-append out "/bin"))
4914 (executable "raxmlHPC-HYBRID"))
4915 (install-file executable bin)
4916 (symlink (string-append bin "/" executable) "raxml"))
4917 #t)))))
4918 (inputs
4919 `(("openmpi" ,openmpi)))
4920 (home-page "http://sco.h-its.org/exelixis/web/software/raxml/index.html")
4921 (synopsis "Randomized Axelerated Maximum Likelihood phylogenetic trees")
4922 (description
4923 "RAxML is a tool for phylogenetic analysis and post-analysis of large
4924 phylogenies.")
4925 ;; The source includes x86 specific code
4926 (supported-systems '("x86_64-linux" "i686-linux"))
4927 (license license:gpl2+)))
4928
4929 (define-public rsem
4930 (package
4931 (name "rsem")
4932 (version "1.3.1")
4933 (source
4934 (origin
4935 (method git-fetch)
4936 (uri (git-reference
4937 (url "https://github.com/deweylab/RSEM.git")
4938 (commit (string-append "v" version))))
4939 (sha256
4940 (base32 "1jlq11d1p8qp64w75yj8cnbbd1a93viq10pzsbwal7vdn8fg13j1"))
4941 (file-name (git-file-name name version))
4942 (modules '((guix build utils)))
4943 (snippet
4944 '(begin
4945 ;; remove bundled copy of boost and samtools
4946 (delete-file-recursively "boost")
4947 (delete-file-recursively "samtools-1.3")
4948 #t))))
4949 (build-system gnu-build-system)
4950 (arguments
4951 `(#:tests? #f ;no "check" target
4952 #:make-flags
4953 (list (string-append "BOOST="
4954 (assoc-ref %build-inputs "boost")
4955 "/include/")
4956 (string-append "SAMHEADERS="
4957 (assoc-ref %build-inputs "htslib")
4958 "/include/htslib/sam.h")
4959 (string-append "SAMLIBS="
4960 (assoc-ref %build-inputs "htslib")
4961 "/lib/libhts.a"))
4962 #:phases
4963 (modify-phases %standard-phases
4964 ;; No "configure" script.
4965 ;; Do not build bundled samtools library.
4966 (replace 'configure
4967 (lambda _
4968 (substitute* "Makefile"
4969 (("^all : \\$\\(PROGRAMS\\).*") "all: $(PROGRAMS)\n")
4970 (("^\\$\\(SAMLIBS\\).*") ""))
4971 #t))
4972 (replace 'install
4973 (lambda* (#:key outputs #:allow-other-keys)
4974 (let* ((out (string-append (assoc-ref outputs "out")))
4975 (bin (string-append out "/bin/"))
4976 (perl (string-append out "/lib/perl5/site_perl")))
4977 (mkdir-p bin)
4978 (mkdir-p perl)
4979 (for-each (lambda (file)
4980 (install-file file bin))
4981 (find-files "." "rsem-.*"))
4982 (install-file "rsem_perl_utils.pm" perl))
4983 #t))
4984 (add-after 'install 'wrap-program
4985 (lambda* (#:key outputs #:allow-other-keys)
4986 (let ((out (assoc-ref outputs "out")))
4987 (for-each (lambda (prog)
4988 (wrap-program (string-append out "/bin/" prog)
4989 `("PERL5LIB" ":" prefix
4990 (,(string-append out "/lib/perl5/site_perl")))))
4991 '("rsem-calculate-expression"
4992 "rsem-control-fdr"
4993 "rsem-generate-data-matrix"
4994 "rsem-generate-ngvector"
4995 "rsem-plot-transcript-wiggles"
4996 "rsem-prepare-reference"
4997 "rsem-run-ebseq"
4998 "rsem-run-prsem-testing-procedure")))
4999 #t)))))
5000 (inputs
5001 `(("boost" ,boost)
5002 ("r-minimal" ,r-minimal)
5003 ("perl" ,perl)
5004 ("htslib" ,htslib-1.3)
5005 ("zlib" ,zlib)))
5006 (home-page "http://deweylab.biostat.wisc.edu/rsem/")
5007 (synopsis "Estimate gene expression levels from RNA-Seq data")
5008 (description
5009 "RSEM is a software package for estimating gene and isoform expression
5010 levels from RNA-Seq data. The RSEM package provides a user-friendly
5011 interface, supports threads for parallel computation of the EM algorithm,
5012 single-end and paired-end read data, quality scores, variable-length reads and
5013 RSPD estimation. In addition, it provides posterior mean and 95% credibility
5014 interval estimates for expression levels. For visualization, it can generate
5015 BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.")
5016 (license license:gpl3+)))
5017
5018 (define-public rseqc
5019 (package
5020 (name "rseqc")
5021 (version "2.6.1")
5022 (source
5023 (origin
5024 (method url-fetch)
5025 (uri
5026 (string-append "mirror://sourceforge/rseqc/"
5027 "RSeQC-" version ".tar.gz"))
5028 (sha256
5029 (base32 "15ly0254yi032qzkdplg00q144qfdsd986gh62829rl5bkxhj330"))
5030 (modules '((guix build utils)))
5031 (snippet
5032 '(begin
5033 ;; remove bundled copy of pysam
5034 (delete-file-recursively "lib/pysam")
5035 (substitute* "setup.py"
5036 ;; remove dependency on outdated "distribute" module
5037 (("^from distribute_setup import use_setuptools") "")
5038 (("^use_setuptools\\(\\)") "")
5039 ;; do not use bundled copy of pysam
5040 (("^have_pysam = False") "have_pysam = True"))
5041 #t))))
5042 (build-system python-build-system)
5043 (arguments `(#:python ,python-2))
5044 (inputs
5045 `(("python-cython" ,python2-cython)
5046 ("python-pysam" ,python2-pysam)
5047 ("python-numpy" ,python2-numpy)
5048 ("zlib" ,zlib)))
5049 (native-inputs
5050 `(("python-nose" ,python2-nose)))
5051 (home-page "http://rseqc.sourceforge.net/")
5052 (synopsis "RNA-seq quality control package")
5053 (description
5054 "RSeQC provides a number of modules that can comprehensively evaluate
5055 high throughput sequence data, especially RNA-seq data. Some basic modules
5056 inspect sequence quality, nucleotide composition bias, PCR bias and GC bias,
5057 while RNA-seq specific modules evaluate sequencing saturation, mapped reads
5058 distribution, coverage uniformity, strand specificity, etc.")
5059 (license license:gpl3+)))
5060
5061 (define-public seek
5062 ;; There are no release tarballs. According to the installation
5063 ;; instructions at http://seek.princeton.edu/installation.jsp, the latest
5064 ;; stable release is identified by this changeset ID.
5065 (let ((changeset "2329130")
5066 (revision "1"))
5067 (package
5068 (name "seek")
5069 (version (string-append "0-" revision "." changeset))
5070 (source (origin
5071 (method hg-fetch)
5072 (uri (hg-reference
5073 (url "https://bitbucket.org/libsleipnir/sleipnir")
5074 (changeset changeset)))
5075 (file-name (string-append name "-" version "-checkout"))
5076 (sha256
5077 (base32
5078 "0qrvilwh18dpbhkf92qvxbmay0j75ra3jg2wrhz67gf538zzphsx"))))
5079 (build-system gnu-build-system)
5080 (arguments
5081 `(#:modules ((srfi srfi-1)
5082 (guix build gnu-build-system)
5083 (guix build utils))
5084 #:phases
5085 (let ((dirs '("SeekMiner"
5086 "SeekEvaluator"
5087 "SeekPrep"
5088 "Distancer"
5089 "Data2DB"
5090 "PCL2Bin")))
5091 (modify-phases %standard-phases
5092 (replace 'bootstrap
5093 (lambda _
5094 (substitute* "gen_tools_am"
5095 (("/usr/bin/env.*") (which "perl")))
5096 (invoke "bash" "gen_auto")
5097 #t))
5098 (add-after 'build 'build-additional-tools
5099 (lambda* (#:key make-flags #:allow-other-keys)
5100 (for-each (lambda (dir)
5101 (with-directory-excursion (string-append "tools/" dir)
5102 (apply invoke "make" make-flags)))
5103 dirs)
5104 #t))
5105 (add-after 'install 'install-additional-tools
5106 (lambda* (#:key make-flags #:allow-other-keys)
5107 (for-each (lambda (dir)
5108 (with-directory-excursion (string-append "tools/" dir)
5109 (apply invoke `("make" ,@make-flags "install"))))
5110 dirs)
5111 #t))))))
5112 (inputs
5113 `(("gsl" ,gsl)
5114 ("boost" ,boost)
5115 ("libsvm" ,libsvm)
5116 ("readline" ,readline)
5117 ("gengetopt" ,gengetopt)
5118 ("log4cpp" ,log4cpp)))
5119 (native-inputs
5120 `(("autoconf" ,autoconf)
5121 ("automake" ,automake)
5122 ("perl" ,perl)))
5123 (home-page "http://seek.princeton.edu")
5124 (synopsis "Gene co-expression search engine")
5125 (description
5126 "SEEK is a computational gene co-expression search engine. SEEK provides
5127 biologists with a way to navigate the massive human expression compendium that
5128 now contains thousands of expression datasets. SEEK returns a robust ranking
5129 of co-expressed genes in the biological area of interest defined by the user's
5130 query genes. It also prioritizes thousands of expression datasets according
5131 to the user's query of interest.")
5132 (license license:cc-by3.0))))
5133
5134 (define-public samtools
5135 (package
5136 (name "samtools")
5137 (version "1.9")
5138 (source
5139 (origin
5140 (method url-fetch)
5141 (uri
5142 (string-append "mirror://sourceforge/samtools/samtools/"
5143 version "/samtools-" version ".tar.bz2"))
5144 (sha256
5145 (base32
5146 "10ilqbmm7ri8z431sn90lvbjwizd0hhkf9rcqw8j823hf26nhgq8"))
5147 (modules '((guix build utils)))
5148 (snippet '(begin
5149 ;; Delete bundled htslib.
5150 (delete-file-recursively "htslib-1.9")
5151 #t))))
5152 (build-system gnu-build-system)
5153 (arguments
5154 `(#:modules ((ice-9 ftw)
5155 (ice-9 regex)
5156 (guix build gnu-build-system)
5157 (guix build utils))
5158 #:configure-flags (list "--with-ncurses")
5159 #:phases
5160 (modify-phases %standard-phases
5161 (add-after 'unpack 'patch-tests
5162 (lambda _
5163 (substitute* "test/test.pl"
5164 ;; The test script calls out to /bin/bash
5165 (("/bin/bash") (which "bash")))
5166 #t))
5167 (add-after 'install 'install-library
5168 (lambda* (#:key outputs #:allow-other-keys)
5169 (let ((lib (string-append (assoc-ref outputs "out") "/lib")))
5170 (install-file "libbam.a" lib)
5171 #t)))
5172 (add-after 'install 'install-headers
5173 (lambda* (#:key outputs #:allow-other-keys)
5174 (let ((include (string-append (assoc-ref outputs "out")
5175 "/include/samtools/")))
5176 (for-each (lambda (file)
5177 (install-file file include))
5178 (scandir "." (lambda (name) (string-match "\\.h$" name))))
5179 #t))))))
5180 (native-inputs `(("pkg-config" ,pkg-config)))
5181 (inputs
5182 `(("htslib" ,htslib)
5183 ("ncurses" ,ncurses)
5184 ("perl" ,perl)
5185 ("python" ,python)
5186 ("zlib" ,zlib)))
5187 (home-page "http://samtools.sourceforge.net")
5188 (synopsis "Utilities to efficiently manipulate nucleotide sequence alignments")
5189 (description
5190 "Samtools implements various utilities for post-processing nucleotide
5191 sequence alignments in the SAM, BAM, and CRAM formats, including indexing,
5192 variant calling (in conjunction with bcftools), and a simple alignment
5193 viewer.")
5194 (license license:expat)))
5195
5196 (define-public samtools-0.1
5197 ;; This is the most recent version of the 0.1 line of samtools. The input
5198 ;; and output formats differ greatly from that used and produced by samtools
5199 ;; 1.x and is still used in many bioinformatics pipelines.
5200 (package (inherit samtools)
5201 (version "0.1.19")
5202 (source
5203 (origin
5204 (method url-fetch)
5205 (uri
5206 (string-append "mirror://sourceforge/samtools/samtools/"
5207 version "/samtools-" version ".tar.bz2"))
5208 (sha256
5209 (base32 "1m33xsfwz0s8qi45lylagfllqg7fphf4dr0780rsvw75av9wk06h"))))
5210 (arguments
5211 `(#:tests? #f ;no "check" target
5212 #:make-flags
5213 (list "LIBCURSES=-lncurses")
5214 ,@(substitute-keyword-arguments (package-arguments samtools)
5215 ((#:phases phases)
5216 `(modify-phases ,phases
5217 (replace 'install
5218 (lambda* (#:key outputs #:allow-other-keys)
5219 (let ((bin (string-append
5220 (assoc-ref outputs "out") "/bin")))
5221 (mkdir-p bin)
5222 (install-file "samtools" bin)
5223 #t)))
5224 (delete 'patch-tests)
5225 (delete 'configure))))))))
5226
5227 (define-public mosaik
5228 (let ((commit "5c25216d3522d6a33e53875cd76a6d65001e4e67"))
5229 (package
5230 (name "mosaik")
5231 (version "2.2.30")
5232 (source (origin
5233 ;; There are no release tarballs nor tags.
5234 (method git-fetch)
5235 (uri (git-reference
5236 (url "https://github.com/wanpinglee/MOSAIK.git")
5237 (commit commit)))
5238 (file-name (string-append name "-" version))
5239 (sha256
5240 (base32
5241 "17gj3s07cm77r41z92awh0bim7w7q7fbn0sf5nkqmcm1vw052qgw"))))
5242 (build-system gnu-build-system)
5243 (arguments
5244 `(#:tests? #f ; no tests
5245 #:make-flags (list "CC=gcc")
5246 #:phases
5247 (modify-phases %standard-phases
5248 (replace 'configure
5249 (lambda _ (chdir "src") #t))
5250 (replace 'install
5251 (lambda* (#:key outputs #:allow-other-keys)
5252 (let ((bin (string-append (assoc-ref outputs "out")
5253 "/bin")))
5254 (mkdir-p bin)
5255 (copy-recursively "../bin" bin)
5256 #t))))))
5257 (inputs
5258 `(("perl" ,perl)
5259 ("zlib:static" ,zlib "static")
5260 ("zlib" ,zlib)))
5261 (supported-systems '("x86_64-linux"))
5262 (home-page "https://github.com/wanpinglee/MOSAIK")
5263 (synopsis "Map nucleotide sequence reads to reference genomes")
5264 (description
5265 "MOSAIK is a program for mapping second and third-generation sequencing
5266 reads to a reference genome. MOSAIK can align reads generated by all the
5267 major sequencing technologies, including Illumina, Applied Biosystems SOLiD,
5268 Roche 454, Ion Torrent and Pacific BioSciences SMRT.")
5269 ;; MOSAIK is released under the GPLv2+ with the exception of third-party
5270 ;; code released into the public domain:
5271 ;; 1. fastlz by Ariya Hidayat - http://www.fastlz.org/
5272 ;; 2. MD5 implementation - RSA Data Security, RFC 1321
5273 (license (list license:gpl2+ license:public-domain)))))
5274
5275 (define-public ngs-sdk
5276 (package
5277 (name "ngs-sdk")
5278 (version "2.9.3")
5279 (source (origin
5280 (method git-fetch)
5281 (uri (git-reference
5282 (url "https://github.com/ncbi/ngs.git")
5283 (commit version)))
5284 (file-name (git-file-name name version))
5285 (sha256
5286 (base32
5287 "17c0v1nah3g3d2ib5bbi0vhma1ghd6vb9xycavqsh64lhp840rk3"))))
5288 (build-system gnu-build-system)
5289 (arguments
5290 `(#:parallel-build? #f ; not supported
5291 #:tests? #f ; no "check" target
5292 #:phases
5293 (modify-phases %standard-phases
5294 (replace 'configure
5295 (lambda* (#:key outputs #:allow-other-keys)
5296 (let ((out (assoc-ref outputs "out")))
5297 ;; Allow 'konfigure.perl' to find 'package.prl'.
5298 (setenv "PERL5LIB"
5299 (string-append ".:" (getenv "PERL5LIB")))
5300
5301 ;; The 'configure' script doesn't recognize things like
5302 ;; '--enable-fast-install'.
5303 (invoke "./configure"
5304 (string-append "--build-prefix=" (getcwd) "/build")
5305 (string-append "--prefix=" out))
5306 #t)))
5307 (add-after 'unpack 'enter-dir
5308 (lambda _ (chdir "ngs-sdk") #t)))))
5309 (native-inputs `(("perl" ,perl)))
5310 ;; According to the test
5311 ;; unless ($MARCH =~ /x86_64/i || $MARCH =~ /i?86/i)
5312 ;; in ngs-sdk/setup/konfigure.perl
5313 (supported-systems '("i686-linux" "x86_64-linux"))
5314 (home-page "https://github.com/ncbi/ngs")
5315 (synopsis "API for accessing Next Generation Sequencing data")
5316 (description
5317 "NGS is a domain-specific API for accessing reads, alignments and pileups
5318 produced from Next Generation Sequencing. The API itself is independent from
5319 any particular back-end implementation, and supports use of multiple back-ends
5320 simultaneously.")
5321 (license license:public-domain)))
5322
5323 (define-public java-ngs
5324 (package (inherit ngs-sdk)
5325 (name "java-ngs")
5326 (arguments
5327 `(,@(substitute-keyword-arguments
5328 `(#:modules ((guix build gnu-build-system)
5329 (guix build utils)
5330 (srfi srfi-1)
5331 (srfi srfi-26))
5332 ,@(package-arguments ngs-sdk))
5333 ((#:phases phases)
5334 `(modify-phases ,phases
5335 (replace 'enter-dir (lambda _ (chdir "ngs-java") #t)))))))
5336 (inputs
5337 `(("jdk" ,icedtea "jdk")
5338 ("ngs-sdk" ,ngs-sdk)))
5339 (synopsis "Java bindings for NGS SDK")))
5340
5341 (define-public ncbi-vdb
5342 (package
5343 (name "ncbi-vdb")
5344 (version "2.9.3")
5345 (source (origin
5346 (method git-fetch)
5347 (uri (git-reference
5348 (url "https://github.com/ncbi/ncbi-vdb.git")
5349 (commit version)))
5350 (file-name (git-file-name name version))
5351 (sha256
5352 (base32
5353 "1l4ny67nxwv1lagk9wwjbrgm7ln7adci6dnpc7k1yaln6shj0qpm"))))
5354 (build-system gnu-build-system)
5355 (arguments
5356 `(#:parallel-build? #f ; not supported
5357 #:tests? #f ; no "check" target
5358 #:phases
5359 (modify-phases %standard-phases
5360 (add-after 'unpack 'make-files-writable
5361 (lambda _ (for-each make-file-writable (find-files "." ".*")) #t))
5362 (add-before 'configure 'set-perl-search-path
5363 (lambda _
5364 ;; Work around "dotless @INC" build failure.
5365 (setenv "PERL5LIB"
5366 (string-append (getcwd) "/setup:"
5367 (getenv "PERL5LIB")))
5368 #t))
5369 (replace 'configure
5370 (lambda* (#:key inputs outputs #:allow-other-keys)
5371 (let ((out (assoc-ref outputs "out")))
5372 ;; Override include path for libmagic
5373 (substitute* "setup/package.prl"
5374 (("name => 'magic', Include => '/usr/include'")
5375 (string-append "name=> 'magic', Include => '"
5376 (assoc-ref inputs "libmagic")
5377 "/include" "'")))
5378
5379 ;; Install kdf5 library (needed by sra-tools)
5380 (substitute* "build/Makefile.install"
5381 (("LIBRARIES_TO_INSTALL =")
5382 "LIBRARIES_TO_INSTALL = kdf5.$(VERSION_LIBX) kdf5.$(VERSION_SHLX)"))
5383
5384 (substitute* "build/Makefile.env"
5385 (("CFLAGS =" prefix)
5386 (string-append prefix "-msse2 ")))
5387
5388 ;; Override search path for ngs-java
5389 (substitute* "setup/package.prl"
5390 (("/usr/local/ngs/ngs-java")
5391 (assoc-ref inputs "java-ngs")))
5392
5393 ;; The 'configure' script doesn't recognize things like
5394 ;; '--enable-fast-install'.
5395 (invoke "./configure"
5396 (string-append "--build-prefix=" (getcwd) "/build")
5397 (string-append "--prefix=" (assoc-ref outputs "out"))
5398 (string-append "--debug")
5399 (string-append "--with-xml2-prefix="
5400 (assoc-ref inputs "libxml2"))
5401 (string-append "--with-ngs-sdk-prefix="
5402 (assoc-ref inputs "ngs-sdk"))
5403 (string-append "--with-hdf5-prefix="
5404 (assoc-ref inputs "hdf5")))
5405 #t)))
5406 (add-after 'install 'install-interfaces
5407 (lambda* (#:key outputs #:allow-other-keys)
5408 ;; Install interface libraries. On i686 the interface libraries
5409 ;; are installed to "linux/gcc/i386", so we need to use the Linux
5410 ;; architecture name ("i386") instead of the target system prefix
5411 ;; ("i686").
5412 (mkdir (string-append (assoc-ref outputs "out") "/ilib"))
5413 (copy-recursively (string-append "build/ncbi-vdb/linux/gcc/"
5414 ,(system->linux-architecture
5415 (or (%current-target-system)
5416 (%current-system)))
5417 "/rel/ilib")
5418 (string-append (assoc-ref outputs "out")
5419 "/ilib"))
5420 ;; Install interface headers
5421 (copy-recursively "interfaces"
5422 (string-append (assoc-ref outputs "out")
5423 "/include"))
5424 #t))
5425 ;; These files are needed by sra-tools.
5426 (add-after 'install 'install-configuration-files
5427 (lambda* (#:key outputs #:allow-other-keys)
5428 (let ((target (string-append (assoc-ref outputs "out") "/kfg")))
5429 (mkdir target)
5430 (install-file "libs/kfg/default.kfg" target)
5431 (install-file "libs/kfg/certs.kfg" target))
5432 #t)))))
5433 (inputs
5434 `(("libxml2" ,libxml2)
5435 ("ngs-sdk" ,ngs-sdk)
5436 ("java-ngs" ,java-ngs)
5437 ("libmagic" ,file)
5438 ("hdf5" ,hdf5)))
5439 (native-inputs `(("perl" ,perl)))
5440 ;; NCBI-VDB requires SSE capability.
5441 (supported-systems '("i686-linux" "x86_64-linux"))
5442 (home-page "https://github.com/ncbi/ncbi-vdb")
5443 (synopsis "Database engine for genetic information")
5444 (description
5445 "The NCBI-VDB library implements a highly compressed columnar data
5446 warehousing engine that is most often used to store genetic information.
5447 Databases are stored in a portable image within the file system, and can be
5448 accessed/downloaded on demand across HTTP.")
5449 (license license:public-domain)))
5450
5451 (define-public plink
5452 (package
5453 (name "plink")
5454 (version "1.07")
5455 (source
5456 (origin
5457 (method url-fetch)
5458 (uri (string-append
5459 "http://pngu.mgh.harvard.edu/~purcell/plink/dist/plink-"
5460 version "-src.zip"))
5461 (sha256
5462 (base32 "0as8gxm4pjyc8dxmm1sl873rrd7wn5qs0l29nqfnl31x8i467xaa"))
5463 (patches (search-patches "plink-1.07-unclobber-i.patch"
5464 "plink-endian-detection.patch"))))
5465 (build-system gnu-build-system)
5466 (arguments
5467 '(#:tests? #f ;no "check" target
5468 #:make-flags (list (string-append "LIB_LAPACK="
5469 (assoc-ref %build-inputs "lapack")
5470 "/lib/liblapack.so")
5471 "WITH_LAPACK=1"
5472 "FORCE_DYNAMIC=1"
5473 ;; disable phoning home
5474 "WITH_WEBCHECK=")
5475 #:phases
5476 (modify-phases %standard-phases
5477 ;; no "configure" script
5478 (delete 'configure)
5479 (replace 'install
5480 (lambda* (#:key outputs #:allow-other-keys)
5481 (let ((bin (string-append (assoc-ref outputs "out")
5482 "/bin/")))
5483 (install-file "plink" bin)
5484 #t))))))
5485 (inputs
5486 `(("zlib" ,zlib)
5487 ("lapack" ,lapack)))
5488 (native-inputs
5489 `(("unzip" ,unzip)))
5490 (home-page "http://pngu.mgh.harvard.edu/~purcell/plink/")
5491 (synopsis "Whole genome association analysis toolset")
5492 (description
5493 "PLINK is a whole genome association analysis toolset, designed to
5494 perform a range of basic, large-scale analyses in a computationally efficient
5495 manner. The focus of PLINK is purely on analysis of genotype/phenotype data,
5496 so there is no support for steps prior to this (e.g. study design and
5497 planning, generating genotype or CNV calls from raw data). Through
5498 integration with gPLINK and Haploview, there is some support for the
5499 subsequent visualization, annotation and storage of results.")
5500 ;; Code is released under GPLv2, except for fisher.h, which is under
5501 ;; LGPLv2.1+
5502 (license (list license:gpl2 license:lgpl2.1+))))
5503
5504 (define-public plink-ng
5505 (package (inherit plink)
5506 (name "plink-ng")
5507 (version "1.90b4")
5508 (source
5509 (origin
5510 (method git-fetch)
5511 (uri (git-reference
5512 (url "https://github.com/chrchang/plink-ng.git")
5513 (commit (string-append "v" version))))
5514 (file-name (git-file-name name version))
5515 (sha256
5516 (base32 "02npdwgkpfkdnhw819rhj5kw02a5k5m90b14zq9zzya4hyg929c0"))))
5517 (build-system gnu-build-system)
5518 (arguments
5519 '(#:tests? #f ;no "check" target
5520 #:make-flags (list "BLASFLAGS=-llapack -lopenblas"
5521 "CFLAGS=-Wall -O2 -DDYNAMIC_ZLIB=1"
5522 "ZLIB=-lz"
5523 "-f" "Makefile.std")
5524 #:phases
5525 (modify-phases %standard-phases
5526 (add-after 'unpack 'chdir
5527 (lambda _ (chdir "1.9") #t))
5528 (delete 'configure) ; no "configure" script
5529 (replace 'install
5530 (lambda* (#:key outputs #:allow-other-keys)
5531 (let ((bin (string-append (assoc-ref outputs "out")
5532 "/bin/")))
5533 (install-file "plink" bin)
5534 #t))))))
5535 (inputs
5536 `(("zlib" ,zlib)
5537 ("lapack" ,lapack)
5538 ("openblas" ,openblas)))
5539 (home-page "https://www.cog-genomics.org/plink/")
5540 (license license:gpl3+)))
5541
5542 (define-public smithlab-cpp
5543 (let ((revision "1")
5544 (commit "728a097bec88c6f4b8528b685932049e660eff2e"))
5545 (package
5546 (name "smithlab-cpp")
5547 (version (string-append "0." revision "." (string-take commit 7)))
5548 (source (origin
5549 (method git-fetch)
5550 (uri (git-reference
5551 (url "https://github.com/smithlabcode/smithlab_cpp.git")
5552 (commit commit)))
5553 (file-name (string-append name "-" version "-checkout"))
5554 (sha256
5555 (base32
5556 "0d476lmj312xk77kr9fzrv7z1bv96yfyx0w7y62ycmnfbx32ll74"))))
5557 (build-system gnu-build-system)
5558 (arguments
5559 `(#:modules ((guix build gnu-build-system)
5560 (guix build utils)
5561 (srfi srfi-26))
5562 #:tests? #f ;no "check" target
5563 #:phases
5564 (modify-phases %standard-phases
5565 (add-after 'unpack 'use-samtools-headers
5566 (lambda _
5567 (substitute* '("SAM.cpp"
5568 "SAM.hpp")
5569 (("sam.h") "samtools/sam.h"))
5570 #t))
5571 (replace 'install
5572 (lambda* (#:key outputs #:allow-other-keys)
5573 (let* ((out (assoc-ref outputs "out"))
5574 (lib (string-append out "/lib"))
5575 (include (string-append out "/include/smithlab-cpp")))
5576 (mkdir-p lib)
5577 (mkdir-p include)
5578 (for-each (cut install-file <> lib)
5579 (find-files "." "\\.o$"))
5580 (for-each (cut install-file <> include)
5581 (find-files "." "\\.hpp$")))
5582 #t))
5583 (delete 'configure))))
5584 (inputs
5585 `(("samtools" ,samtools-0.1)
5586 ("zlib" ,zlib)))
5587 (home-page "https://github.com/smithlabcode/smithlab_cpp")
5588 (synopsis "C++ helper library for functions used in Smith lab projects")
5589 (description
5590 "Smithlab CPP is a C++ library that includes functions used in many of
5591 the Smith lab bioinformatics projects, such as a wrapper around Samtools data
5592 structures, classes for genomic regions, mapped sequencing reads, etc.")
5593 (license license:gpl3+))))
5594
5595 (define-public preseq
5596 (package
5597 (name "preseq")
5598 (version "2.0.3")
5599 (source (origin
5600 (method url-fetch)
5601 (uri (string-append "https://github.com/smithlabcode/preseq/"
5602 "releases/download/v" version
5603 "/preseq_v" version ".tar.bz2"))
5604 (sha256
5605 (base32 "149x9xmk1wy1gff85325yfzqc0qk4sgp1w6gbyj9cnji4x1dszbl"))
5606 (modules '((guix build utils)))
5607 (snippet '(begin
5608 ;; Remove bundled samtools.
5609 (delete-file-recursively "samtools")
5610 #t))))
5611 (build-system gnu-build-system)
5612 (arguments
5613 `(#:tests? #f ;no "check" target
5614 #:phases
5615 (modify-phases %standard-phases
5616 (delete 'configure))
5617 #:make-flags
5618 (list (string-append "PREFIX="
5619 (assoc-ref %outputs "out"))
5620 (string-append "LIBBAM="
5621 (assoc-ref %build-inputs "samtools")
5622 "/lib/libbam.a")
5623 (string-append "SMITHLAB_CPP="
5624 (assoc-ref %build-inputs "smithlab-cpp")
5625 "/lib")
5626 "PROGS=preseq"
5627 "INCLUDEDIRS=$(SMITHLAB_CPP)/../include/smithlab-cpp $(SAMTOOLS_DIR)")))
5628 (inputs
5629 `(("gsl" ,gsl)
5630 ("samtools" ,samtools-0.1)
5631 ("smithlab-cpp" ,smithlab-cpp)
5632 ("zlib" ,zlib)))
5633 (home-page "http://smithlabresearch.org/software/preseq/")
5634 (synopsis "Program for analyzing library complexity")
5635 (description
5636 "The preseq package is aimed at predicting and estimating the complexity
5637 of a genomic sequencing library, equivalent to predicting and estimating the
5638 number of redundant reads from a given sequencing depth and how many will be
5639 expected from additional sequencing using an initial sequencing experiment.
5640 The estimates can then be used to examine the utility of further sequencing,
5641 optimize the sequencing depth, or to screen multiple libraries to avoid low
5642 complexity samples.")
5643 (license license:gpl3+)))
5644
5645 (define-public python-screed
5646 (package
5647 (name "python-screed")
5648 (version "1.0")
5649 (source
5650 (origin
5651 (method url-fetch)
5652 (uri (pypi-uri "screed" version))
5653 (sha256
5654 (base32
5655 "148vcb7w2wr6a4w6vs2bsxanbqibxfk490zbcbg4m61s8669zdjx"))))
5656 (build-system python-build-system)
5657 (arguments
5658 '(#:phases
5659 (modify-phases %standard-phases
5660 ;; Tests must be run after installation, as the "screed" command does
5661 ;; not exist right after building.
5662 (delete 'check)
5663 (add-after 'install 'check
5664 (lambda* (#:key inputs outputs #:allow-other-keys)
5665 (let ((out (assoc-ref outputs "out")))
5666 (setenv "PYTHONPATH"
5667 (string-append out "/lib/python"
5668 (string-take (string-take-right
5669 (assoc-ref inputs "python")
5670 5) 3)
5671 "/site-packages:"
5672 (getenv "PYTHONPATH")))
5673 (setenv "PATH" (string-append out "/bin:" (getenv "PATH"))))
5674 (invoke "python" "setup.py" "test")
5675 #t)))))
5676 (native-inputs
5677 `(("python-pytest" ,python-pytest)
5678 ("python-pytest-cov" ,python-pytest-cov)
5679 ("python-pytest-runner" ,python-pytest-runner)))
5680 (inputs
5681 `(("python-bz2file" ,python-bz2file)))
5682 (home-page "https://github.com/dib-lab/screed/")
5683 (synopsis "Short read sequence database utilities")
5684 (description "Screed parses FASTA and FASTQ files and generates databases.
5685 Values such as sequence name, sequence description, sequence quality and the
5686 sequence itself can be retrieved from these databases.")
5687 (license license:bsd-3)))
5688
5689 (define-public python2-screed
5690 (package-with-python2 python-screed))
5691
5692 (define-public sra-tools
5693 (package
5694 (name "sra-tools")
5695 (version "2.9.3")
5696 (source
5697 (origin
5698 (method git-fetch)
5699 (uri (git-reference
5700 (url "https://github.com/ncbi/sra-tools.git")
5701 (commit version)))
5702 (file-name (git-file-name name version))
5703 (sha256
5704 (base32
5705 "0663gcdxkziwsmlznjxysb00621rllpbz6jwsfifq7z3dj3lwm8b"))))
5706 (build-system gnu-build-system)
5707 (arguments
5708 `(#:parallel-build? #f ; not supported
5709 #:tests? #f ; no "check" target
5710 #:make-flags
5711 (list (string-append "DEFAULT_CRT="
5712 (assoc-ref %build-inputs "ncbi-vdb")
5713 "/kfg/certs.kfg")
5714 (string-append "DEFAULT_KFG="
5715 (assoc-ref %build-inputs "ncbi-vdb")
5716 "/kfg/default.kfg")
5717 (string-append "VDB_LIBDIR="
5718 (assoc-ref %build-inputs "ncbi-vdb")
5719 ,(if (string-prefix? "x86_64"
5720 (or (%current-target-system)
5721 (%current-system)))
5722 "/lib64"
5723 "/lib32")))
5724 #:phases
5725 (modify-phases %standard-phases
5726 (add-before 'configure 'set-perl-search-path
5727 (lambda _
5728 ;; Work around "dotless @INC" build failure.
5729 (setenv "PERL5LIB"
5730 (string-append (getcwd) "/setup:"
5731 (getenv "PERL5LIB")))
5732 #t))
5733 (replace 'configure
5734 (lambda* (#:key inputs outputs #:allow-other-keys)
5735 ;; The build system expects a directory containing the sources and
5736 ;; raw build output of ncbi-vdb, including files that are not
5737 ;; installed. Since we are building against an installed version of
5738 ;; ncbi-vdb, the following modifications are needed.
5739 (substitute* "setup/konfigure.perl"
5740 ;; Make the configure script look for the "ilib" directory of
5741 ;; "ncbi-vdb" without first checking for the existence of a
5742 ;; matching library in its "lib" directory.
5743 (("^ my \\$f = File::Spec->catdir\\(\\$libdir, \\$lib\\);")
5744 "my $f = File::Spec->catdir($ilibdir, $ilib);")
5745 ;; Look for interface libraries in ncbi-vdb's "ilib" directory.
5746 (("my \\$ilibdir = File::Spec->catdir\\(\\$builddir, 'ilib'\\);")
5747 "my $ilibdir = File::Spec->catdir($dir, 'ilib');"))
5748
5749 ;; Dynamic linking
5750 (substitute* "tools/copycat/Makefile"
5751 (("smagic-static") "lmagic"))
5752
5753 ;; The 'configure' script doesn't recognize things like
5754 ;; '--enable-fast-install'.
5755 (invoke "./configure"
5756 (string-append "--build-prefix=" (getcwd) "/build")
5757 (string-append "--prefix=" (assoc-ref outputs "out"))
5758 (string-append "--debug")
5759 (string-append "--with-fuse-prefix="
5760 (assoc-ref inputs "fuse"))
5761 (string-append "--with-magic-prefix="
5762 (assoc-ref inputs "libmagic"))
5763 ;; TODO: building with libxml2 fails with linker errors
5764 ;; (string-append "--with-xml2-prefix="
5765 ;; (assoc-ref inputs "libxml2"))
5766 (string-append "--with-ncbi-vdb-sources="
5767 (assoc-ref inputs "ncbi-vdb"))
5768 (string-append "--with-ncbi-vdb-build="
5769 (assoc-ref inputs "ncbi-vdb"))
5770 (string-append "--with-ngs-sdk-prefix="
5771 (assoc-ref inputs "ngs-sdk"))
5772 (string-append "--with-hdf5-prefix="
5773 (assoc-ref inputs "hdf5")))
5774 #t)))))
5775 (native-inputs `(("perl" ,perl)))
5776 (inputs
5777 `(("ngs-sdk" ,ngs-sdk)
5778 ("ncbi-vdb" ,ncbi-vdb)
5779 ("libmagic" ,file)
5780 ("fuse" ,fuse)
5781 ("hdf5" ,hdf5)
5782 ("zlib" ,zlib)))
5783 (home-page "http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software")
5784 (synopsis "Tools and libraries for reading and writing sequencing data")
5785 (description
5786 "The SRA Toolkit from NCBI is a collection of tools and libraries for
5787 reading of sequencing files from the Sequence Read Archive (SRA) database and
5788 writing files into the .sra format.")
5789 (license license:public-domain)))
5790
5791 (define-public seqan
5792 (package
5793 (name "seqan")
5794 (version "2.4.0")
5795 (source (origin
5796 (method url-fetch)
5797 (uri (string-append "https://github.com/seqan/seqan/releases/"
5798 "download/seqan-v" version
5799 "/seqan-library-" version ".tar.xz"))
5800 (sha256
5801 (base32
5802 "19a1rlxx03qy1i1iriicly68w64yjxbv24g9gdywnfmq998v35yx"))))
5803 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
5804 ;; makes sense to split the outputs.
5805 (outputs '("out" "doc"))
5806 (build-system trivial-build-system)
5807 (arguments
5808 `(#:modules ((guix build utils))
5809 #:builder
5810 (begin
5811 (use-modules (guix build utils))
5812 (let ((tar (assoc-ref %build-inputs "tar"))
5813 (xz (assoc-ref %build-inputs "xz"))
5814 (out (assoc-ref %outputs "out"))
5815 (doc (assoc-ref %outputs "doc")))
5816 (setenv "PATH" (string-append tar "/bin:" xz "/bin"))
5817 (invoke "tar" "xvf" (assoc-ref %build-inputs "source"))
5818 (chdir (string-append "seqan-library-" ,version))
5819 (copy-recursively "include" (string-append out "/include"))
5820 (copy-recursively "share" (string-append doc "/share"))
5821 #t))))
5822 (native-inputs
5823 `(("source" ,source)
5824 ("tar" ,tar)
5825 ("xz" ,xz)))
5826 (home-page "http://www.seqan.de")
5827 (synopsis "Library for nucleotide sequence analysis")
5828 (description
5829 "SeqAn is a C++ library of efficient algorithms and data structures for
5830 the analysis of sequences with the focus on biological data. It contains
5831 algorithms and data structures for string representation and their
5832 manipulation, online and indexed string search, efficient I/O of
5833 bioinformatics file formats, sequence alignment, and more.")
5834 (license license:bsd-3)))
5835
5836 (define-public seqan-1
5837 (package (inherit seqan)
5838 (name "seqan")
5839 (version "1.4.2")
5840 (source (origin
5841 (method url-fetch)
5842 (uri (string-append "http://packages.seqan.de/seqan-library/"
5843 "seqan-library-" version ".tar.bz2"))
5844 (sha256
5845 (base32
5846 "05s3wrrwn50f81aklfm65i4a749zag1vr8z03k21xm0pdxy47yvp"))))
5847 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
5848 ;; makes sense to split the outputs.
5849 (outputs '("out" "doc"))
5850 (build-system trivial-build-system)
5851 (arguments
5852 `(#:modules ((guix build utils))
5853 #:builder
5854 (begin
5855 (use-modules (guix build utils))
5856 (let ((tar (assoc-ref %build-inputs "tar"))
5857 (bzip (assoc-ref %build-inputs "bzip2"))
5858 (out (assoc-ref %outputs "out"))
5859 (doc (assoc-ref %outputs "doc")))
5860 (setenv "PATH" (string-append tar "/bin:" bzip "/bin"))
5861 (invoke "tar" "xvf" (assoc-ref %build-inputs "source"))
5862 (chdir (string-append "seqan-library-" ,version))
5863 (copy-recursively "include" (string-append out "/include"))
5864 (copy-recursively "share" (string-append doc "/share"))
5865 #t))))
5866 (native-inputs
5867 `(("source" ,source)
5868 ("tar" ,tar)
5869 ("bzip2" ,bzip2)))))
5870
5871 (define-public seqmagick
5872 (package
5873 (name "seqmagick")
5874 (version "0.7.0")
5875 (source
5876 (origin
5877 (method url-fetch)
5878 (uri (pypi-uri "seqmagick" version))
5879 (sha256
5880 (base32
5881 "12bfyp8nqi0hd36rmj450aygafp01qy3hkbvlwn3bk39pyjjkgg5"))))
5882 (build-system python-build-system)
5883 (inputs
5884 `(("python-biopython" ,python-biopython)))
5885 (native-inputs
5886 `(("python-nose" ,python-nose)))
5887 (home-page "https://github.com/fhcrc/seqmagick")
5888 (synopsis "Tools for converting and modifying sequence files")
5889 (description
5890 "Bioinformaticians often have to convert sequence files between formats
5891 and do little manipulations on them, and it's not worth writing scripts for
5892 that. Seqmagick is a utility to expose the file format conversion in
5893 BioPython in a convenient way. Instead of having a big mess of scripts, there
5894 is one that takes arguments.")
5895 (license license:gpl3)))
5896
5897 (define-public seqtk
5898 (package
5899 (name "seqtk")
5900 (version "1.3")
5901 (source (origin
5902 (method git-fetch)
5903 (uri (git-reference
5904 (url "https://github.com/lh3/seqtk.git")
5905 (commit (string-append "v" version))))
5906 (file-name (git-file-name name version))
5907 (sha256
5908 (base32
5909 "1bfzlqa84b5s1qi22blmmw2s8xdyp9h9ydcq22pfjhh5gab3yz6l"))))
5910 (build-system gnu-build-system)
5911 (arguments
5912 `(#:phases
5913 (modify-phases %standard-phases
5914 (delete 'configure)
5915 (replace 'check
5916 ;; There are no tests, so we just run a sanity check.
5917 (lambda _ (invoke "./seqtk" "seq") #t))
5918 (replace 'install
5919 (lambda* (#:key outputs #:allow-other-keys)
5920 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
5921 (install-file "seqtk" bin)
5922 #t))))))
5923 (inputs
5924 `(("zlib" ,zlib)))
5925 (home-page "https://github.com/lh3/seqtk")
5926 (synopsis "Toolkit for processing biological sequences in FASTA/Q format")
5927 (description
5928 "Seqtk is a fast and lightweight tool for processing sequences in the
5929 FASTA or FASTQ format. It parses both FASTA and FASTQ files which can be
5930 optionally compressed by gzip.")
5931 (license license:expat)))
5932
5933 (define-public snap-aligner
5934 (package
5935 (name "snap-aligner")
5936 (version "1.0beta.18")
5937 (source (origin
5938 (method git-fetch)
5939 (uri (git-reference
5940 (url "https://github.com/amplab/snap.git")
5941 (commit (string-append "v" version))))
5942 (file-name (git-file-name name version))
5943 (sha256
5944 (base32
5945 "01w3qq4wm07z73vky0cfwlmrbf50n3w722cxrlzxfi99mnb808d8"))))
5946 (build-system gnu-build-system)
5947 (arguments
5948 '(#:phases
5949 (modify-phases %standard-phases
5950 (delete 'configure)
5951 (replace 'check (lambda _ (invoke "./unit_tests") #t))
5952 (replace 'install
5953 (lambda* (#:key outputs #:allow-other-keys)
5954 (let* ((out (assoc-ref outputs "out"))
5955 (bin (string-append out "/bin")))
5956 (install-file "snap-aligner" bin)
5957 (install-file "SNAPCommand" bin)
5958 #t))))))
5959 (native-inputs
5960 `(("zlib" ,zlib)))
5961 (home-page "http://snap.cs.berkeley.edu/")
5962 (synopsis "Short read DNA sequence aligner")
5963 (description
5964 "SNAP is a fast and accurate aligner for short DNA reads. It is
5965 optimized for modern read lengths of 100 bases or higher, and takes advantage
5966 of these reads to align data quickly through a hash-based indexing scheme.")
5967 ;; 32-bit systems are not supported by the unpatched code.
5968 ;; Following the bug reports https://github.com/amplab/snap/issues/68 and
5969 ;; https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=812378 we see that
5970 ;; systems without a lot of memory cannot make good use of this program.
5971 (supported-systems '("x86_64-linux"))
5972 (license license:asl2.0)))
5973
5974 (define-public sortmerna
5975 (package
5976 (name "sortmerna")
5977 (version "2.1b")
5978 (source
5979 (origin
5980 (method git-fetch)
5981 (uri (git-reference
5982 (url "https://github.com/biocore/sortmerna.git")
5983 (commit version)))
5984 (file-name (git-file-name name version))
5985 (sha256
5986 (base32
5987 "0j3mbz4n25738yijmjbr5r4fyvkgm8v5vn3sshyfvmyqf5q9byqf"))))
5988 (build-system gnu-build-system)
5989 (outputs '("out" ;for binaries
5990 "db")) ;for sequence databases
5991 (arguments
5992 `(#:phases
5993 (modify-phases %standard-phases
5994 (replace 'install
5995 (lambda* (#:key outputs #:allow-other-keys)
5996 (let* ((out (assoc-ref outputs "out"))
5997 (bin (string-append out "/bin"))
5998 (db (assoc-ref outputs "db"))
5999 (share
6000 (string-append db "/share/sortmerna/rRNA_databases")))
6001 (install-file "sortmerna" bin)
6002 (install-file "indexdb_rna" bin)
6003 (for-each (lambda (file)
6004 (install-file file share))
6005 (find-files "rRNA_databases" ".*fasta"))
6006 #t))))))
6007 (inputs
6008 `(("zlib" ,zlib)))
6009 (home-page "http://bioinfo.lifl.fr/RNA/sortmerna")
6010 (synopsis "Biological sequence analysis tool for NGS reads")
6011 (description
6012 "SortMeRNA is a biological sequence analysis tool for filtering, mapping
6013 and operational taxonomic unit (OTU) picking of next generation
6014 sequencing (NGS) reads. The core algorithm is based on approximate seeds and
6015 allows for fast and sensitive analyses of nucleotide sequences. The main
6016 application of SortMeRNA is filtering rRNA from metatranscriptomic data.")
6017 ;; The source includes x86 specific code
6018 (supported-systems '("x86_64-linux" "i686-linux"))
6019 (license license:lgpl3)))
6020
6021 (define-public star
6022 (package
6023 (name "star")
6024 (version "2.6.0c")
6025 (source (origin
6026 (method git-fetch)
6027 (uri (git-reference
6028 (url "https://github.com/alexdobin/STAR.git")
6029 (commit version)))
6030 (file-name (string-append name "-" version "-checkout"))
6031 (sha256
6032 (base32
6033 "04cj6jw8d9q6lk9c78wa4fky6jdlicf1d13plq7182h8vqiz8p59"))
6034 (modules '((guix build utils)))
6035 (snippet
6036 '(begin
6037 (substitute* "source/Makefile"
6038 (("/bin/rm") "rm"))
6039 ;; Remove pre-built binaries and bundled htslib sources.
6040 (delete-file-recursively "bin/MacOSX_x86_64")
6041 (delete-file-recursively "bin/Linux_x86_64")
6042 (delete-file-recursively "bin/Linux_x86_64_static")
6043 (delete-file-recursively "source/htslib")
6044 #t))))
6045 (build-system gnu-build-system)
6046 (arguments
6047 '(#:tests? #f ;no check target
6048 #:make-flags '("STAR")
6049 #:phases
6050 (modify-phases %standard-phases
6051 (add-after 'unpack 'enter-source-dir
6052 (lambda _ (chdir "source") #t))
6053 (add-after 'enter-source-dir 'make-reproducible
6054 (lambda _
6055 (substitute* "Makefile"
6056 (("(COMPILATION_TIME_PLACE=\")(.*)(\")" _ pre mid post)
6057 (string-append pre "Built with Guix" post)))
6058 #t))
6059 (add-after 'enter-source-dir 'do-not-use-bundled-htslib
6060 (lambda _
6061 (substitute* "Makefile"
6062 (("(Depend.list: \\$\\(SOURCES\\) parametersDefault\\.xxd) htslib"
6063 _ prefix) prefix))
6064 (substitute* '("BAMfunctions.cpp"
6065 "signalFromBAM.h"
6066 "bam_cat.h"
6067 "bam_cat.c"
6068 "STAR.cpp"
6069 "bamRemoveDuplicates.cpp")
6070 (("#include \"htslib/([^\"]+\\.h)\"" _ header)
6071 (string-append "#include <" header ">")))
6072 (substitute* "IncludeDefine.h"
6073 (("\"htslib/(htslib/[^\"]+.h)\"" _ header)
6074 (string-append "<" header ">")))
6075 #t))
6076 (replace 'install
6077 (lambda* (#:key outputs #:allow-other-keys)
6078 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6079 (install-file "STAR" bin))
6080 #t))
6081 (delete 'configure))))
6082 (native-inputs
6083 `(("xxd" ,xxd)))
6084 (inputs
6085 `(("htslib" ,htslib)
6086 ("zlib" ,zlib)))
6087 (home-page "https://github.com/alexdobin/STAR")
6088 (synopsis "Universal RNA-seq aligner")
6089 (description
6090 "The Spliced Transcripts Alignment to a Reference (STAR) software is
6091 based on a previously undescribed RNA-seq alignment algorithm that uses
6092 sequential maximum mappable seed search in uncompressed suffix arrays followed
6093 by seed clustering and stitching procedure. In addition to unbiased de novo
6094 detection of canonical junctions, STAR can discover non-canonical splices and
6095 chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
6096 sequences.")
6097 ;; Only 64-bit systems are supported according to the README.
6098 (supported-systems '("x86_64-linux" "mips64el-linux"))
6099 ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
6100 (license license:gpl3+)))
6101
6102 (define-public subread
6103 (package
6104 (name "subread")
6105 (version "1.6.0")
6106 (source (origin
6107 (method url-fetch)
6108 (uri (string-append "mirror://sourceforge/subread/subread-"
6109 version "/subread-" version "-source.tar.gz"))
6110 (sha256
6111 (base32
6112 "0ah0n4jx6ksk2m2j7xk385x2qzmk1y4rfc6a4mfrdqrlq721w99i"))))
6113 (build-system gnu-build-system)
6114 (arguments
6115 `(#:tests? #f ;no "check" target
6116 ;; The CC and CCFLAGS variables are set to contain a lot of x86_64
6117 ;; optimizations by default, so we override these flags such that x86_64
6118 ;; flags are only added when the build target is an x86_64 system.
6119 #:make-flags
6120 (list (let ((system ,(or (%current-target-system)
6121 (%current-system)))
6122 (flags '("-ggdb" "-fomit-frame-pointer"
6123 "-ffast-math" "-funroll-loops"
6124 "-fmessage-length=0"
6125 "-O9" "-Wall" "-DMAKE_FOR_EXON"
6126 "-DMAKE_STANDALONE"
6127 "-DSUBREAD_VERSION=\\\"${SUBREAD_VERSION}\\\""))
6128 (flags64 '("-mmmx" "-msse" "-msse2" "-msse3")))
6129 (if (string-prefix? "x86_64" system)
6130 (string-append "CCFLAGS=" (string-join (append flags flags64)))
6131 (string-append "CCFLAGS=" (string-join flags))))
6132 "-f" "Makefile.Linux"
6133 "CC=gcc ${CCFLAGS}")
6134 #:phases
6135 (modify-phases %standard-phases
6136 (add-after 'unpack 'enter-dir
6137 (lambda _ (chdir "src") #t))
6138 (replace 'install
6139 (lambda* (#:key outputs #:allow-other-keys)
6140 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6141 (mkdir-p bin)
6142 (copy-recursively "../bin" bin))
6143 #t))
6144 ;; no "configure" script
6145 (delete 'configure))))
6146 (inputs `(("zlib" ,zlib)))
6147 (home-page "http://bioinf.wehi.edu.au/subread-package/")
6148 (synopsis "Tool kit for processing next-gen sequencing data")
6149 (description
6150 "The subread package contains the following tools: subread aligner, a
6151 general-purpose read aligner; subjunc aligner: detecting exon-exon junctions
6152 and mapping RNA-seq reads; featureCounts: counting mapped reads for genomic
6153 features; exactSNP: a SNP caller that discovers SNPs by testing signals
6154 against local background noises.")
6155 (license license:gpl3+)))
6156
6157 (define-public stringtie
6158 (package
6159 (name "stringtie")
6160 (version "1.2.1")
6161 (source (origin
6162 (method url-fetch)
6163 (uri (string-append "http://ccb.jhu.edu/software/stringtie/dl/"
6164 "stringtie-" version ".tar.gz"))
6165 (sha256
6166 (base32
6167 "1cqllsc1maq4kh92isi8yadgzbmnf042hlnalpk3y59aph1z3bfz"))
6168 (modules '((guix build utils)))
6169 (snippet
6170 '(begin
6171 (delete-file-recursively "samtools-0.1.18")
6172 #t))))
6173 (build-system gnu-build-system)
6174 (arguments
6175 `(#:tests? #f ;no test suite
6176 #:phases
6177 (modify-phases %standard-phases
6178 ;; no configure script
6179 (delete 'configure)
6180 (add-before 'build 'use-system-samtools
6181 (lambda _
6182 (substitute* "Makefile"
6183 (("stringtie: \\$\\{BAM\\}/libbam\\.a")
6184 "stringtie: "))
6185 (substitute* '("gclib/GBam.h"
6186 "gclib/GBam.cpp")
6187 (("#include \"(bam|sam|kstring).h\"" _ header)
6188 (string-append "#include <samtools/" header ".h>")))
6189 #t))
6190 (add-after 'unpack 'remove-duplicate-typedef
6191 (lambda _
6192 ;; This typedef conflicts with the typedef in
6193 ;; glibc-2.25/include/bits/types.h
6194 (substitute* "gclib/GThreads.h"
6195 (("typedef long long __intmax_t;") ""))
6196 #t))
6197 (replace 'install
6198 (lambda* (#:key outputs #:allow-other-keys)
6199 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6200 (install-file "stringtie" bin)
6201 #t))))))
6202 (inputs
6203 `(("samtools" ,samtools-0.1)
6204 ("zlib" ,zlib)))
6205 (home-page "http://ccb.jhu.edu/software/stringtie/")
6206 (synopsis "Transcript assembly and quantification for RNA-Seq data")
6207 (description
6208 "StringTie is a fast and efficient assembler of RNA-Seq sequence
6209 alignments into potential transcripts. It uses a novel network flow algorithm
6210 as well as an optional de novo assembly step to assemble and quantitate
6211 full-length transcripts representing multiple splice variants for each gene
6212 locus. Its input can include not only the alignments of raw reads used by
6213 other transcript assemblers, but also alignments of longer sequences that have
6214 been assembled from those reads. To identify differentially expressed genes
6215 between experiments, StringTie's output can be processed either by the
6216 Cuffdiff or Ballgown programs.")
6217 (license license:artistic2.0)))
6218
6219 (define-public taxtastic
6220 (package
6221 (name "taxtastic")
6222 (version "0.8.5")
6223 (source (origin
6224 (method url-fetch)
6225 (uri (pypi-uri "taxtastic" version))
6226 (sha256
6227 (base32
6228 "03pysw79lsrvz4lwzis88j15067ffqbi4cid5pqhrlxmd6bh8rrk"))))
6229 (build-system python-build-system)
6230 (arguments
6231 `(#:python ,python-2
6232 #:phases
6233 (modify-phases %standard-phases
6234 (replace 'check
6235 (lambda _ (invoke "python" "-m" "unittest" "discover" "-v") #t)))))
6236 (propagated-inputs
6237 `(("python-sqlalchemy" ,python2-sqlalchemy)
6238 ("python-decorator" ,python2-decorator)
6239 ("python-biopython" ,python2-biopython)
6240 ("python-pandas" ,python2-pandas)
6241 ("python-psycopg2" ,python2-psycopg2)
6242 ("python-fastalite" ,python2-fastalite)
6243 ("python-pyyaml" ,python2-pyyaml)
6244 ("python-six" ,python2-six)
6245 ("python-jinja2" ,python2-jinja2)
6246 ("python-dendropy" ,python2-dendropy)))
6247 (home-page "https://github.com/fhcrc/taxtastic")
6248 (synopsis "Tools for taxonomic naming and annotation")
6249 (description
6250 "Taxtastic is software written in python used to build and maintain
6251 reference packages i.e. collections of reference trees, reference alignments,
6252 profiles, and associated taxonomic information.")
6253 (license license:gpl3+)))
6254
6255 (define-public vcftools
6256 (package
6257 (name "vcftools")
6258 (version "0.1.15")
6259 (source (origin
6260 (method url-fetch)
6261 (uri (string-append
6262 "https://github.com/vcftools/vcftools/releases/download/v"
6263 version "/vcftools-" version ".tar.gz"))
6264 (sha256
6265 (base32
6266 "1qw30c45wihgy632rbz4rh3njnwj4msj46l1rsgdhyg6bgypmr1i"))))
6267 (build-system gnu-build-system)
6268 (arguments
6269 `(#:tests? #f ; no "check" target
6270 #:make-flags (list
6271 "CFLAGS=-O2" ; override "-m64" flag
6272 (string-append "PREFIX=" (assoc-ref %outputs "out"))
6273 (string-append "MANDIR=" (assoc-ref %outputs "out")
6274 "/share/man/man1"))))
6275 (native-inputs
6276 `(("pkg-config" ,pkg-config)))
6277 (inputs
6278 `(("perl" ,perl)
6279 ("zlib" ,zlib)))
6280 (home-page "https://vcftools.github.io/")
6281 (synopsis "Tools for working with VCF files")
6282 (description
6283 "VCFtools is a program package designed for working with VCF files, such
6284 as those generated by the 1000 Genomes Project. The aim of VCFtools is to
6285 provide easily accessible methods for working with complex genetic variation
6286 data in the form of VCF files.")
6287 ;; The license is declared as LGPLv3 in the README and
6288 ;; at https://vcftools.github.io/license.html
6289 (license license:lgpl3)))
6290
6291 (define-public infernal
6292 (package
6293 (name "infernal")
6294 (version "1.1.2")
6295 (source (origin
6296 (method url-fetch)
6297 (uri (string-append "http://eddylab.org/software/infernal/"
6298 "infernal-" version ".tar.gz"))
6299 (sha256
6300 (base32
6301 "0sr2hiz3qxfwqpz3whxr6n82p3x27336v3f34iqznp10hks2935c"))))
6302 (build-system gnu-build-system)
6303 (native-inputs
6304 `(("perl" ,perl))) ; for tests
6305 (home-page "http://eddylab.org/infernal/")
6306 (synopsis "Inference of RNA alignments")
6307 (description "Infernal (\"INFERence of RNA ALignment\") is a tool for
6308 searching DNA sequence databases for RNA structure and sequence similarities.
6309 It is an implementation of a special case of profile stochastic context-free
6310 grammars called @dfn{covariance models} (CMs). A CM is like a sequence
6311 profile, but it scores a combination of sequence consensus and RNA secondary
6312 structure consensus, so in many cases, it is more capable of identifying RNA
6313 homologs that conserve their secondary structure more than their primary
6314 sequence.")
6315 ;; Infernal 1.1.2 requires VMX or SSE capability for parallel instructions.
6316 (supported-systems '("i686-linux" "x86_64-linux"))
6317 (license license:bsd-3)))
6318
6319 (define-public r-centipede
6320 (package
6321 (name "r-centipede")
6322 (version "1.2")
6323 (source (origin
6324 (method url-fetch)
6325 (uri (string-append "http://download.r-forge.r-project.org/"
6326 "src/contrib/CENTIPEDE_" version ".tar.gz"))
6327 (sha256
6328 (base32
6329 "1hsx6qgwr0i67fhy9257zj7s0ppncph2hjgbia5nn6nfmj0ax6l9"))))
6330 (build-system r-build-system)
6331 (home-page "http://centipede.uchicago.edu/")
6332 (synopsis "Predict transcription factor binding sites")
6333 (description
6334 "CENTIPEDE applies a hierarchical Bayesian mixture model to infer regions
6335 of the genome that are bound by particular transcription factors. It starts
6336 by identifying a set of candidate binding sites, and then aims to classify the
6337 sites according to whether each site is bound or not bound by a transcription
6338 factor. CENTIPEDE is an unsupervised learning algorithm that discriminates
6339 between two different types of motif instances using as much relevant
6340 information as possible.")
6341 (license (list license:gpl2+ license:gpl3+))))
6342
6343 (define-public r-copynumber
6344 (package
6345 (name "r-copynumber")
6346 (version "1.22.0")
6347 (source (origin
6348 (method url-fetch)
6349 (uri (bioconductor-uri "copynumber" version))
6350 (sha256
6351 (base32
6352 "0ipwj9i5p1bwhg5d80jdjagm02krpj2v0j47qdgw41h8wncdyal3"))))
6353 (build-system r-build-system)
6354 (propagated-inputs
6355 `(("r-s4vectors" ,r-s4vectors)
6356 ("r-iranges" ,r-iranges)
6357 ("r-genomicranges" ,r-genomicranges)
6358 ("r-biocgenerics" ,r-biocgenerics)))
6359 (home-page "https://bioconductor.org/packages/copynumber")
6360 (synopsis "Segmentation of single- and multi-track copy number data")
6361 (description
6362 "This package segments single- and multi-track copy number data by a
6363 penalized least squares regression method.")
6364 (license license:artistic2.0)))
6365
6366 (define-public r-geneplotter
6367 (package
6368 (name "r-geneplotter")
6369 (version "1.60.0")
6370 (source
6371 (origin
6372 (method url-fetch)
6373 (uri (bioconductor-uri "geneplotter" version))
6374 (sha256
6375 (base32
6376 "10khr0pznxf3m0f5gzck9ymljrwcv3vamfmpskd51yjh36lhllqz"))))
6377 (build-system r-build-system)
6378 (propagated-inputs
6379 `(("r-annotate" ,r-annotate)
6380 ("r-annotationdbi" ,r-annotationdbi)
6381 ("r-biobase" ,r-biobase)
6382 ("r-biocgenerics" ,r-biocgenerics)
6383 ("r-lattice" ,r-lattice)
6384 ("r-rcolorbrewer" ,r-rcolorbrewer)))
6385 (home-page "https://bioconductor.org/packages/geneplotter")
6386 (synopsis "Graphics functions for genomic data")
6387 (description
6388 "This package provides functions for plotting genomic data.")
6389 (license license:artistic2.0)))
6390
6391 (define-public r-genefilter
6392 (package
6393 (name "r-genefilter")
6394 (version "1.64.0")
6395 (source
6396 (origin
6397 (method url-fetch)
6398 (uri (bioconductor-uri "genefilter" version))
6399 (sha256
6400 (base32
6401 "0p64s1n1627yafnp25wjr4b22p34lqw574fx2qg4s1m0lffh1z6i"))))
6402 (build-system r-build-system)
6403 (native-inputs
6404 `(("gfortran" ,gfortran)))
6405 (propagated-inputs
6406 `(("r-annotate" ,r-annotate)
6407 ("r-annotationdbi" ,r-annotationdbi)
6408 ("r-biobase" ,r-biobase)
6409 ("r-s4vectors" ,r-s4vectors)
6410 ("r-survival" ,r-survival)))
6411 (home-page "https://bioconductor.org/packages/genefilter")
6412 (synopsis "Filter genes from high-throughput experiments")
6413 (description
6414 "This package provides basic functions for filtering genes from
6415 high-throughput sequencing experiments.")
6416 (license license:artistic2.0)))
6417
6418 (define-public r-deseq2
6419 (package
6420 (name "r-deseq2")
6421 (version "1.22.2")
6422 (source
6423 (origin
6424 (method url-fetch)
6425 (uri (bioconductor-uri "DESeq2" version))
6426 (sha256
6427 (base32
6428 "0n5ah84mxn87p45drzy0wh2yknmzj1q5i6gv0v9vgg1lj7awb91r"))))
6429 (properties `((upstream-name . "DESeq2")))
6430 (build-system r-build-system)
6431 (propagated-inputs
6432 `(("r-biobase" ,r-biobase)
6433 ("r-biocgenerics" ,r-biocgenerics)
6434 ("r-biocparallel" ,r-biocparallel)
6435 ("r-genefilter" ,r-genefilter)
6436 ("r-geneplotter" ,r-geneplotter)
6437 ("r-genomicranges" ,r-genomicranges)
6438 ("r-ggplot2" ,r-ggplot2)
6439 ("r-hmisc" ,r-hmisc)
6440 ("r-iranges" ,r-iranges)
6441 ("r-locfit" ,r-locfit)
6442 ("r-rcpp" ,r-rcpp)
6443 ("r-rcpparmadillo" ,r-rcpparmadillo)
6444 ("r-s4vectors" ,r-s4vectors)
6445 ("r-summarizedexperiment" ,r-summarizedexperiment)))
6446 (home-page "https://bioconductor.org/packages/DESeq2")
6447 (synopsis "Differential gene expression analysis")
6448 (description
6449 "This package provides functions to estimate variance-mean dependence in
6450 count data from high-throughput nucleotide sequencing assays and test for
6451 differential expression based on a model using the negative binomial
6452 distribution.")
6453 (license license:lgpl3+)))
6454
6455 (define-public r-dexseq
6456 (package
6457 (name "r-dexseq")
6458 (version "1.28.1")
6459 (source
6460 (origin
6461 (method url-fetch)
6462 (uri (bioconductor-uri "DEXSeq" version))
6463 (sha256
6464 (base32
6465 "0g5w9bn2nb3m670hkcsnhfvvkza2318z9irlhhwhb3n8rdzlsdym"))))
6466 (properties `((upstream-name . "DEXSeq")))
6467 (build-system r-build-system)
6468 (propagated-inputs
6469 `(("r-annotationdbi" ,r-annotationdbi)
6470 ("r-biobase" ,r-biobase)
6471 ("r-biocgenerics" ,r-biocgenerics)
6472 ("r-biocparallel" ,r-biocparallel)
6473 ("r-biomart" ,r-biomart)
6474 ("r-deseq2" ,r-deseq2)
6475 ("r-genefilter" ,r-genefilter)
6476 ("r-geneplotter" ,r-geneplotter)
6477 ("r-genomicranges" ,r-genomicranges)
6478 ("r-hwriter" ,r-hwriter)
6479 ("r-iranges" ,r-iranges)
6480 ("r-rcolorbrewer" ,r-rcolorbrewer)
6481 ("r-rsamtools" ,r-rsamtools)
6482 ("r-s4vectors" ,r-s4vectors)
6483 ("r-statmod" ,r-statmod)
6484 ("r-stringr" ,r-stringr)
6485 ("r-summarizedexperiment" ,r-summarizedexperiment)))
6486 (home-page "https://bioconductor.org/packages/DEXSeq")
6487 (synopsis "Inference of differential exon usage in RNA-Seq")
6488 (description
6489 "This package is focused on finding differential exon usage using RNA-seq
6490 exon counts between samples with different experimental designs. It provides
6491 functions that allows the user to make the necessary statistical tests based
6492 on a model that uses the negative binomial distribution to estimate the
6493 variance between biological replicates and generalized linear models for
6494 testing. The package also provides functions for the visualization and
6495 exploration of the results.")
6496 (license license:gpl3+)))
6497
6498 (define-public r-annotationforge
6499 (package
6500 (name "r-annotationforge")
6501 (version "1.24.0")
6502 (source
6503 (origin
6504 (method url-fetch)
6505 (uri (bioconductor-uri "AnnotationForge" version))
6506 (sha256
6507 (base32
6508 "13yvhf3yskmvhs8szs6rkw93h81h5xqa3h19h91pp6nprhc8s3ll"))))
6509 (properties
6510 `((upstream-name . "AnnotationForge")))
6511 (build-system r-build-system)
6512 (propagated-inputs
6513 `(("r-annotationdbi" ,r-annotationdbi)
6514 ("r-biobase" ,r-biobase)
6515 ("r-biocgenerics" ,r-biocgenerics)
6516 ("r-dbi" ,r-dbi)
6517 ("r-rcurl" ,r-rcurl)
6518 ("r-rsqlite" ,r-rsqlite)
6519 ("r-s4vectors" ,r-s4vectors)
6520 ("r-xml" ,r-xml)))
6521 (home-page "https://bioconductor.org/packages/AnnotationForge")
6522 (synopsis "Code for building annotation database packages")
6523 (description
6524 "This package provides code for generating Annotation packages and their
6525 databases. Packages produced are intended to be used with AnnotationDbi.")
6526 (license license:artistic2.0)))
6527
6528 (define-public r-rbgl
6529 (package
6530 (name "r-rbgl")
6531 (version "1.58.1")
6532 (source
6533 (origin
6534 (method url-fetch)
6535 (uri (bioconductor-uri "RBGL" version))
6536 (sha256
6537 (base32
6538 "1l5x2icv9di1lr3gqfi0vjnyd9xc3l77yc42ippqd4cadj3d1pzf"))))
6539 (properties `((upstream-name . "RBGL")))
6540 (build-system r-build-system)
6541 (propagated-inputs `(("r-graph" ,r-graph)))
6542 (home-page "https://www.bioconductor.org/packages/RBGL")
6543 (synopsis "Interface to the Boost graph library")
6544 (description
6545 "This package provides a fairly extensive and comprehensive interface to
6546 the graph algorithms contained in the Boost library.")
6547 (license license:artistic2.0)))
6548
6549 (define-public r-gseabase
6550 (package
6551 (name "r-gseabase")
6552 (version "1.44.0")
6553 (source
6554 (origin
6555 (method url-fetch)
6556 (uri (bioconductor-uri "GSEABase" version))
6557 (sha256
6558 (base32
6559 "110al7x0ig8plzrprvhwc7xshi1jzpj2n8llhhg2fh6v6k0k6awr"))))
6560 (properties `((upstream-name . "GSEABase")))
6561 (build-system r-build-system)
6562 (propagated-inputs
6563 `(("r-annotate" ,r-annotate)
6564 ("r-annotationdbi" ,r-annotationdbi)
6565 ("r-biobase" ,r-biobase)
6566 ("r-biocgenerics" ,r-biocgenerics)
6567 ("r-graph" ,r-graph)
6568 ("r-xml" ,r-xml)))
6569 (home-page "https://bioconductor.org/packages/GSEABase")
6570 (synopsis "Gene set enrichment data structures and methods")
6571 (description
6572 "This package provides classes and methods to support @dfn{Gene Set
6573 Enrichment Analysis} (GSEA).")
6574 (license license:artistic2.0)))
6575
6576 (define-public r-category
6577 (package
6578 (name "r-category")
6579 (version "2.48.0")
6580 (source
6581 (origin
6582 (method url-fetch)
6583 (uri (bioconductor-uri "Category" version))
6584 (sha256
6585 (base32
6586 "1jdm83bwdfhpfm1y6hwgvxzj6l83h1bdkqv23799kzywnwm016kv"))))
6587 (properties `((upstream-name . "Category")))
6588 (build-system r-build-system)
6589 (propagated-inputs
6590 `(("r-annotate" ,r-annotate)
6591 ("r-annotationdbi" ,r-annotationdbi)
6592 ("r-biobase" ,r-biobase)
6593 ("r-biocgenerics" ,r-biocgenerics)
6594 ("r-genefilter" ,r-genefilter)
6595 ("r-graph" ,r-graph)
6596 ("r-gseabase" ,r-gseabase)
6597 ("r-matrix" ,r-matrix)
6598 ("r-rbgl" ,r-rbgl)
6599 ("r-dbi" ,r-dbi)))
6600 (home-page "https://bioconductor.org/packages/Category")
6601 (synopsis "Category analysis")
6602 (description
6603 "This package provides a collection of tools for performing category
6604 analysis.")
6605 (license license:artistic2.0)))
6606
6607 (define-public r-gostats
6608 (package
6609 (name "r-gostats")
6610 (version "2.48.0")
6611 (source
6612 (origin
6613 (method url-fetch)
6614 (uri (bioconductor-uri "GOstats" version))
6615 (sha256
6616 (base32
6617 "0wlqqgfynwqnqhckhsfjwg9zkj6hkmzwd5y76dhqz720vy21rcln"))))
6618 (properties `((upstream-name . "GOstats")))
6619 (build-system r-build-system)
6620 (propagated-inputs
6621 `(("r-annotate" ,r-annotate)
6622 ("r-annotationdbi" ,r-annotationdbi)
6623 ("r-annotationforge" ,r-annotationforge)
6624 ("r-biobase" ,r-biobase)
6625 ("r-category" ,r-category)
6626 ("r-go-db" ,r-go-db)
6627 ("r-graph" ,r-graph)
6628 ("r-rgraphviz" ,r-rgraphviz)
6629 ("r-rbgl" ,r-rbgl)))
6630 (home-page "https://bioconductor.org/packages/GOstats")
6631 (synopsis "Tools for manipulating GO and microarrays")
6632 (description
6633 "This package provides a set of tools for interacting with GO and
6634 microarray data. A variety of basic manipulation tools for graphs, hypothesis
6635 testing and other simple calculations.")
6636 (license license:artistic2.0)))
6637
6638 (define-public r-shortread
6639 (package
6640 (name "r-shortread")
6641 (version "1.40.0")
6642 (source
6643 (origin
6644 (method url-fetch)
6645 (uri (bioconductor-uri "ShortRead" version))
6646 (sha256
6647 (base32
6648 "0iks123i1adkb9i2q4wvfqdmmj9dy867jvngj9757y8gj6xbcpy1"))))
6649 (properties `((upstream-name . "ShortRead")))
6650 (build-system r-build-system)
6651 (inputs
6652 `(("zlib" ,zlib)))
6653 (propagated-inputs
6654 `(("r-biobase" ,r-biobase)
6655 ("r-biocgenerics" ,r-biocgenerics)
6656 ("r-biocparallel" ,r-biocparallel)
6657 ("r-biostrings" ,r-biostrings)
6658 ("r-genomeinfodb" ,r-genomeinfodb)
6659 ("r-genomicalignments" ,r-genomicalignments)
6660 ("r-genomicranges" ,r-genomicranges)
6661 ("r-hwriter" ,r-hwriter)
6662 ("r-iranges" ,r-iranges)
6663 ("r-lattice" ,r-lattice)
6664 ("r-latticeextra" ,r-latticeextra)
6665 ("r-rsamtools" ,r-rsamtools)
6666 ("r-s4vectors" ,r-s4vectors)
6667 ("r-xvector" ,r-xvector)
6668 ("r-zlibbioc" ,r-zlibbioc)))
6669 (home-page "https://bioconductor.org/packages/ShortRead")
6670 (synopsis "FASTQ input and manipulation tools")
6671 (description
6672 "This package implements sampling, iteration, and input of FASTQ files.
6673 It includes functions for filtering and trimming reads, and for generating a
6674 quality assessment report. Data are represented as
6675 @code{DNAStringSet}-derived objects, and easily manipulated for a diversity of
6676 purposes. The package also contains legacy support for early single-end,
6677 ungapped alignment formats.")
6678 (license license:artistic2.0)))
6679
6680 (define-public r-systempiper
6681 (package
6682 (name "r-systempiper")
6683 (version "1.16.1")
6684 (source
6685 (origin
6686 (method url-fetch)
6687 (uri (bioconductor-uri "systemPipeR" version))
6688 (sha256
6689 (base32
6690 "0qzydz87rld2nhwzbfgrw5jfgh8maa9y54mjx9c4285m11qj2shq"))))
6691 (properties `((upstream-name . "systemPipeR")))
6692 (build-system r-build-system)
6693 (propagated-inputs
6694 `(("r-annotate" ,r-annotate)
6695 ("r-batchjobs" ,r-batchjobs)
6696 ("r-biocgenerics" ,r-biocgenerics)
6697 ("r-biostrings" ,r-biostrings)
6698 ("r-deseq2" ,r-deseq2)
6699 ("r-edger" ,r-edger)
6700 ("r-genomicfeatures" ,r-genomicfeatures)
6701 ("r-genomicranges" ,r-genomicranges)
6702 ("r-ggplot2" ,r-ggplot2)
6703 ("r-go-db" ,r-go-db)
6704 ("r-gostats" ,r-gostats)
6705 ("r-limma" ,r-limma)
6706 ("r-pheatmap" ,r-pheatmap)
6707 ("r-rjson" ,r-rjson)
6708 ("r-rsamtools" ,r-rsamtools)
6709 ("r-shortread" ,r-shortread)
6710 ("r-summarizedexperiment" ,r-summarizedexperiment)
6711 ("r-variantannotation" ,r-variantannotation)))
6712 (home-page "https://github.com/tgirke/systemPipeR")
6713 (synopsis "Next generation sequencing workflow and reporting environment")
6714 (description
6715 "This R package provides tools for building and running automated
6716 end-to-end analysis workflows for a wide range of @dfn{next generation
6717 sequence} (NGS) applications such as RNA-Seq, ChIP-Seq, VAR-Seq and Ribo-Seq.
6718 Important features include a uniform workflow interface across different NGS
6719 applications, automated report generation, and support for running both R and
6720 command-line software, such as NGS aligners or peak/variant callers, on local
6721 computers or compute clusters. Efficient handling of complex sample sets and
6722 experimental designs is facilitated by a consistently implemented sample
6723 annotation infrastructure.")
6724 (license license:artistic2.0)))
6725
6726 (define-public r-grohmm
6727 (package
6728 (name "r-grohmm")
6729 (version "1.16.0")
6730 (source
6731 (origin
6732 (method url-fetch)
6733 (uri (bioconductor-uri "groHMM" version))
6734 (sha256
6735 (base32
6736 "1ph92fv44b90v7mk4b1mjvv0dlrhl8ba01klxbnd0vs4qn9zxplh"))))
6737 (properties `((upstream-name . "groHMM")))
6738 (build-system r-build-system)
6739 (propagated-inputs
6740 `(("r-genomeinfodb" ,r-genomeinfodb)
6741 ("r-genomicalignments" ,r-genomicalignments)
6742 ("r-genomicranges" ,r-genomicranges)
6743 ("r-iranges" ,r-iranges)
6744 ("r-mass" ,r-mass)
6745 ("r-rtracklayer" ,r-rtracklayer)
6746 ("r-s4vectors" ,r-s4vectors)))
6747 (home-page "https://github.com/Kraus-Lab/groHMM")
6748 (synopsis "GRO-seq analysis pipeline")
6749 (description
6750 "This package provides a pipeline for the analysis of GRO-seq data.")
6751 (license license:gpl3+)))
6752
6753 (define-public r-txdb-hsapiens-ucsc-hg19-knowngene
6754 (package
6755 (name "r-txdb-hsapiens-ucsc-hg19-knowngene")
6756 (version "3.2.2")
6757 (source (origin
6758 (method url-fetch)
6759 ;; We cannot use bioconductor-uri here because this tarball is
6760 ;; located under "data/annotation/" instead of "bioc/".
6761 (uri (string-append "https://bioconductor.org/packages/"
6762 "release/data/annotation/src/contrib"
6763 "/TxDb.Hsapiens.UCSC.hg19.knownGene_"
6764 version ".tar.gz"))
6765 (sha256
6766 (base32
6767 "1sajhcqqwazgz2lqbik7rd935i7kpnh08zxbp2ra10j72yqy4g86"))))
6768 (properties
6769 `((upstream-name . "TxDb.Hsapiens.UCSC.hg19.knownGene")))
6770 (build-system r-build-system)
6771 ;; As this package provides little more than a very large data file it
6772 ;; doesn't make sense to build substitutes.
6773 (arguments `(#:substitutable? #f))
6774 (propagated-inputs
6775 `(("r-genomicfeatures" ,r-genomicfeatures)))
6776 (home-page
6777 "https://bioconductor.org/packages/TxDb.Hsapiens.UCSC.hg19.knownGene/")
6778 (synopsis "Annotation package for human genome in TxDb format")
6779 (description
6780 "This package provides an annotation database of Homo sapiens genome
6781 data. It is derived from the UCSC hg19 genome and based on the \"knownGene\"
6782 track. The database is exposed as a @code{TxDb} object.")
6783 (license license:artistic2.0)))
6784
6785 (define-public r-sparql
6786 (package
6787 (name "r-sparql")
6788 (version "1.16")
6789 (source (origin
6790 (method url-fetch)
6791 (uri (cran-uri "SPARQL" version))
6792 (sha256
6793 (base32
6794 "0gak1q06yyhdmcxb2n3v0h9gr1vqd0viqji52wpw211qp6r6dcrc"))))
6795 (properties `((upstream-name . "SPARQL")))
6796 (build-system r-build-system)
6797 (propagated-inputs
6798 `(("r-rcurl" ,r-rcurl)
6799 ("r-xml" ,r-xml)))
6800 (home-page "https://cran.r-project.org/web/packages/SPARQL")
6801 (synopsis "SPARQL client for R")
6802 (description "This package provides an interface to use SPARQL to pose
6803 SELECT or UPDATE queries to an end-point.")
6804 ;; The only license indication is found in the DESCRIPTION file,
6805 ;; which states GPL-3. So we cannot assume GPLv3+.
6806 (license license:gpl3)))
6807
6808 (define-public vsearch
6809 (package
6810 (name "vsearch")
6811 (version "2.9.1")
6812 (source
6813 (origin
6814 (method git-fetch)
6815 (uri (git-reference
6816 (url "https://github.com/torognes/vsearch.git")
6817 (commit (string-append "v" version))))
6818 (file-name (git-file-name name version))
6819 (sha256
6820 (base32
6821 "0vhrpjfdf75ba04b24xknp41790cvcgwl0vgpy7qbzj5xh2521ss"))
6822 (patches (search-patches "vsearch-unbundle-cityhash.patch"))
6823 (snippet
6824 '(begin
6825 ;; Remove bundled cityhash sources. The vsearch source is adjusted
6826 ;; for this in the patch.
6827 (delete-file "src/city.h")
6828 (delete-file "src/citycrc.h")
6829 (delete-file "src/city.cc")
6830 #t))))
6831 (build-system gnu-build-system)
6832 (inputs
6833 `(("zlib" ,zlib)
6834 ("bzip2" ,bzip2)
6835 ("cityhash" ,cityhash)))
6836 (native-inputs
6837 `(("autoconf" ,autoconf)
6838 ("automake" ,automake)))
6839 (synopsis "Sequence search tools for metagenomics")
6840 (description
6841 "VSEARCH supports DNA sequence searching, clustering, chimera detection,
6842 dereplication, pairwise alignment, shuffling, subsampling, sorting and
6843 masking. The tool takes advantage of parallelism in the form of SIMD
6844 vectorization as well as multiple threads to perform accurate alignments at
6845 high speed. VSEARCH uses an optimal global aligner (full dynamic programming
6846 Needleman-Wunsch).")
6847 (home-page "https://github.com/torognes/vsearch")
6848 ;; vsearch uses non-portable SSE intrinsics so building fails on other
6849 ;; platforms.
6850 (supported-systems '("x86_64-linux"))
6851 ;; Dual licensed; also includes public domain source.
6852 (license (list license:gpl3 license:bsd-2))))
6853
6854 (define-public pardre
6855 (package
6856 (name "pardre")
6857 ;; The source of 1.1.5 changed in place, so we append "-1" to the version.
6858 (version "1.1.5-1")
6859 (source
6860 (origin
6861 (method url-fetch)
6862 (uri (string-append "mirror://sourceforge/pardre/ParDRe-rel"
6863 "1.1.5" ".tar.gz"))
6864 (sha256
6865 (base32
6866 "17j73nc0viq4f6qj50nrndsrif5d6b71q8fl87m54psiv0ilns2b"))))
6867 (build-system gnu-build-system)
6868 (arguments
6869 `(#:tests? #f ; no tests included
6870 #:phases
6871 (modify-phases %standard-phases
6872 (delete 'configure)
6873 (replace 'install
6874 (lambda* (#:key outputs #:allow-other-keys)
6875 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
6876 (install-file "ParDRe" bin)
6877 #t))))))
6878 (inputs
6879 `(("openmpi" ,openmpi)
6880 ("zlib" ,zlib)))
6881 (synopsis "Parallel tool to remove duplicate DNA reads")
6882 (description
6883 "ParDRe is a parallel tool to remove duplicate genetic sequence reads.
6884 Duplicate reads can be seen as identical or nearly identical sequences with
6885 some mismatches. This tool lets users avoid the analysis of unnecessary
6886 reads, reducing the time of subsequent procedures with the
6887 dataset (e.g. assemblies, mappings, etc.). The tool is implemented with MPI
6888 in order to exploit the parallel capabilities of multicore clusters. It is
6889 faster than multithreaded counterparts (end of 2015) for the same number of
6890 cores and, thanks to the message-passing technology, it can be executed on
6891 clusters.")
6892 (home-page "https://sourceforge.net/projects/pardre/")
6893 (license license:gpl3+)))
6894
6895 (define-public ruby-bio-kseq
6896 (package
6897 (name "ruby-bio-kseq")
6898 (version "0.0.2")
6899 (source
6900 (origin
6901 (method url-fetch)
6902 (uri (rubygems-uri "bio-kseq" version))
6903 (sha256
6904 (base32
6905 "1xyaha46khb5jc6wzkbf7040jagac49jbimn0vcrzid0j8jdikrz"))))
6906 (build-system ruby-build-system)
6907 (arguments
6908 `(#:test-target "spec"))
6909 (native-inputs
6910 `(("bundler" ,bundler)
6911 ("ruby-rspec" ,ruby-rspec)
6912 ("ruby-rake-compiler" ,ruby-rake-compiler)))
6913 (inputs
6914 `(("zlib" ,zlib)))
6915 (synopsis "Ruby bindings for the kseq.h FASTA/Q parser")
6916 (description
6917 "@code{Bio::Kseq} provides ruby bindings to the @code{kseq.h} FASTA and
6918 FASTQ parsing code. It provides a fast iterator over sequences and their
6919 quality scores.")
6920 (home-page "https://github.com/gusevfe/bio-kseq")
6921 (license license:expat)))
6922
6923 (define-public bio-locus
6924 (package
6925 (name "bio-locus")
6926 (version "0.0.7")
6927 (source
6928 (origin
6929 (method url-fetch)
6930 (uri (rubygems-uri "bio-locus" version))
6931 (sha256
6932 (base32
6933 "02vmrxyimkj9sahsp4zhfhnmbvz6dbbqz1y01vglf8cbwvkajfl0"))))
6934 (build-system ruby-build-system)
6935 (native-inputs
6936 `(("ruby-rspec" ,ruby-rspec)))
6937 (synopsis "Tool for fast querying of genome locations")
6938 (description
6939 "Bio-locus is a tabix-like tool for fast querying of genome
6940 locations. Many file formats in bioinformatics contain records that
6941 start with a chromosome name and a position for a SNP, or a start-end
6942 position for indels. Bio-locus allows users to store this chr+pos or
6943 chr+pos+alt information in a database.")
6944 (home-page "https://github.com/pjotrp/bio-locus")
6945 (license license:expat)))
6946
6947 (define-public bio-blastxmlparser
6948 (package
6949 (name "bio-blastxmlparser")
6950 (version "2.0.4")
6951 (source (origin
6952 (method url-fetch)
6953 (uri (rubygems-uri "bio-blastxmlparser" version))
6954 (sha256
6955 (base32
6956 "1wf4qygcmdjgcqm6flmvsagfr1gs9lf63mj32qv3z1f481zc5692"))))
6957 (build-system ruby-build-system)
6958 (propagated-inputs
6959 `(("ruby-bio-logger" ,ruby-bio-logger)
6960 ("ruby-nokogiri" ,ruby-nokogiri)))
6961 (inputs
6962 `(("ruby-rspec" ,ruby-rspec)))
6963 (synopsis "Fast big data BLAST XML parser and library")
6964 (description
6965 "Very fast parallel big-data BLAST XML file parser which can be used as
6966 command line utility. Use blastxmlparser to: Parse BLAST XML; filter output;
6967 generate FASTA, JSON, YAML, RDF, JSON-LD, HTML, CSV, tabular output etc.")
6968 (home-page "https://github.com/pjotrp/blastxmlparser")
6969 (license license:expat)))
6970
6971 (define-public bioruby
6972 (package
6973 (name "bioruby")
6974 (version "1.5.2")
6975 (source
6976 (origin
6977 (method url-fetch)
6978 (uri (rubygems-uri "bio" version))
6979 (sha256
6980 (base32
6981 "1d56amdsjv1mag7m6gv2w0xij8hqx1v5xbdjsix8sp3yp36m7938"))))
6982 (build-system ruby-build-system)
6983 (propagated-inputs
6984 `(("ruby-libxml" ,ruby-libxml)))
6985 (native-inputs
6986 `(("which" ,which))) ; required for test phase
6987 (arguments
6988 `(#:phases
6989 (modify-phases %standard-phases
6990 (add-before 'build 'patch-test-command
6991 (lambda _
6992 (substitute* '("test/functional/bio/test_command.rb")
6993 (("/bin/sh") (which "sh")))
6994 (substitute* '("test/functional/bio/test_command.rb")
6995 (("/bin/ls") (which "ls")))
6996 (substitute* '("test/functional/bio/test_command.rb")
6997 (("which") (which "which")))
6998 (substitute* '("test/functional/bio/test_command.rb",
6999 "test/data/command/echoarg2.sh")
7000 (("/bin/echo") (which "echo")))
7001 #t)))))
7002 (synopsis "Ruby library, shell and utilities for bioinformatics")
7003 (description "BioRuby comes with a comprehensive set of Ruby development
7004 tools and libraries for bioinformatics and molecular biology. BioRuby has
7005 components for sequence analysis, pathway analysis, protein modelling and
7006 phylogenetic analysis; it supports many widely used data formats and provides
7007 easy access to databases, external programs and public web services, including
7008 BLAST, KEGG, GenBank, MEDLINE and GO.")
7009 (home-page "http://bioruby.org/")
7010 ;; Code is released under Ruby license, except for setup
7011 ;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+)
7012 (license (list license:ruby license:lgpl2.1+ license:gpl2+ ))))
7013
7014 (define-public r-acsnminer
7015 (package
7016 (name "r-acsnminer")
7017 (version "0.16.8.25")
7018 (source (origin
7019 (method url-fetch)
7020 (uri (cran-uri "ACSNMineR" version))
7021 (sha256
7022 (base32
7023 "0gh604s8qall6zfjlwcg2ilxjvz08dplf9k5g47idhv43scm748l"))))
7024 (properties `((upstream-name . "ACSNMineR")))
7025 (build-system r-build-system)
7026 (propagated-inputs
7027 `(("r-ggplot2" ,r-ggplot2)
7028 ("r-gridextra" ,r-gridextra)))
7029 (home-page "https://cran.r-project.org/web/packages/ACSNMineR")
7030 (synopsis "Gene enrichment analysis")
7031 (description
7032 "This package provides tools to compute and represent gene set enrichment
7033 or depletion from your data based on pre-saved maps from the @dfn{Atlas of
7034 Cancer Signalling Networks} (ACSN) or user imported maps. The gene set
7035 enrichment can be run with hypergeometric test or Fisher exact test, and can
7036 use multiple corrections. Visualization of data can be done either by
7037 barplots or heatmaps.")
7038 (license license:gpl2+)))
7039
7040 (define-public r-biocinstaller
7041 (package
7042 (name "r-biocinstaller")
7043 (version "1.32.1")
7044 (source (origin
7045 (method url-fetch)
7046 (uri (bioconductor-uri "BiocInstaller" version))
7047 (sha256
7048 (base32
7049 "1s1f9qhyf3mc73ir25x2zlgi9hf45a37lg4z8fbva4i21hqisgsl"))))
7050 (properties
7051 `((upstream-name . "BiocInstaller")))
7052 (build-system r-build-system)
7053 (home-page "https://bioconductor.org/packages/BiocInstaller")
7054 (synopsis "Install Bioconductor packages")
7055 (description "This package is used to install and update R packages from
7056 Bioconductor, CRAN, and Github.")
7057 (license license:artistic2.0)))
7058
7059 (define-public r-biocviews
7060 (package
7061 (name "r-biocviews")
7062 (version "1.50.10")
7063 (source (origin
7064 (method url-fetch)
7065 (uri (bioconductor-uri "biocViews" version))
7066 (sha256
7067 (base32
7068 "06ms82pyc5rxbd9crfvqjxcwpafv0c627i83v80d12925mrc51h8"))))
7069 (properties
7070 `((upstream-name . "biocViews")))
7071 (build-system r-build-system)
7072 (propagated-inputs
7073 `(("r-biobase" ,r-biobase)
7074 ("r-graph" ,r-graph)
7075 ("r-rbgl" ,r-rbgl)
7076 ("r-rcurl" ,r-rcurl)
7077 ("r-xml" ,r-xml)
7078 ("r-runit" ,r-runit)))
7079 (home-page "https://bioconductor.org/packages/biocViews")
7080 (synopsis "Bioconductor package categorization helper")
7081 (description "The purpose of biocViews is to create HTML pages that
7082 categorize packages in a Bioconductor package repository according to keywords,
7083 also known as views, in a controlled vocabulary.")
7084 (license license:artistic2.0)))
7085
7086 (define-public r-bookdown
7087 (package
7088 (name "r-bookdown")
7089 (version "0.9")
7090 (source (origin
7091 (method url-fetch)
7092 (uri (cran-uri "bookdown" version))
7093 (sha256
7094 (base32
7095 "0vg1s1w0l9pm95asqb21yf39mfk1nc9rdhmlys9xwr7p7i7rsz32"))))
7096 (build-system r-build-system)
7097 (propagated-inputs
7098 `(("r-htmltools" ,r-htmltools)
7099 ("r-knitr" ,r-knitr)
7100 ("r-rmarkdown" ,r-rmarkdown)
7101 ("r-tinytex" ,r-tinytex)
7102 ("r-yaml" ,r-yaml)
7103 ("r-xfun" ,r-xfun)))
7104 (home-page "https://github.com/rstudio/bookdown")
7105 (synopsis "Authoring books and technical documents with R markdown")
7106 (description "This package provides output formats and utilities for
7107 authoring books and technical documents with R Markdown.")
7108 (license license:gpl3)))
7109
7110 (define-public r-biocstyle
7111 (package
7112 (name "r-biocstyle")
7113 (version "2.10.0")
7114 (source (origin
7115 (method url-fetch)
7116 (uri (bioconductor-uri "BiocStyle" version))
7117 (sha256
7118 (base32
7119 "01lm8xljilj666fcl3wnw82dxkcxnlr294lddr553rm8xr5nwg31"))))
7120 (properties
7121 `((upstream-name . "BiocStyle")))
7122 (build-system r-build-system)
7123 (propagated-inputs
7124 `(("r-biocmanager" ,r-biocmanager)
7125 ("r-bookdown" ,r-bookdown)
7126 ("r-knitr" ,r-knitr)
7127 ("r-rmarkdown" ,r-rmarkdown)
7128 ("r-yaml" ,r-yaml)))
7129 (home-page "https://bioconductor.org/packages/BiocStyle")
7130 (synopsis "Bioconductor formatting styles")
7131 (description "This package provides standard formatting styles for
7132 Bioconductor PDF and HTML documents. Package vignettes illustrate use and
7133 functionality.")
7134 (license license:artistic2.0)))
7135
7136 (define-public r-bioccheck
7137 (package
7138 (name "r-bioccheck")
7139 (version "1.18.0")
7140 (source (origin
7141 (method url-fetch)
7142 (uri (bioconductor-uri "BiocCheck" version))
7143 (sha256
7144 (base32
7145 "0zamvs5jar38293ff27imvwy0ra25y64ls9z8w3q1y4jcp8p8pg7"))))
7146 (properties
7147 `((upstream-name . "BiocCheck")))
7148 (build-system r-build-system)
7149 (arguments
7150 '(#:phases
7151 (modify-phases %standard-phases
7152 ;; This package can be used by calling BiocCheck(<package>) from
7153 ;; within R, or by running R CMD BiocCheck <package>. This phase
7154 ;; makes sure the latter works. For this to work, the BiocCheck
7155 ;; script must be somewhere on the PATH (not the R bin directory).
7156 (add-after 'install 'install-bioccheck-subcommand
7157 (lambda* (#:key outputs #:allow-other-keys)
7158 (let* ((out (assoc-ref outputs "out"))
7159 (dest-dir (string-append out "/bin"))
7160 (script-dir
7161 (string-append out "/site-library/BiocCheck/script/")))
7162 (mkdir-p dest-dir)
7163 (symlink (string-append script-dir "/checkBadDeps.R")
7164 (string-append dest-dir "/checkBadDeps.R"))
7165 (symlink (string-append script-dir "/BiocCheck")
7166 (string-append dest-dir "/BiocCheck")))
7167 #t)))))
7168 (propagated-inputs
7169 `(("r-codetools" ,r-codetools)
7170 ("r-graph" ,r-graph)
7171 ("r-httr" ,r-httr)
7172 ("r-knitr" ,r-knitr)
7173 ("r-optparse" ,r-optparse)
7174 ("r-biocmanager" ,r-biocmanager)
7175 ("r-biocviews" ,r-biocviews)
7176 ("r-stringdist" ,r-stringdist)))
7177 (home-page "https://bioconductor.org/packages/BiocCheck")
7178 (synopsis "Executes Bioconductor-specific package checks")
7179 (description "This package contains tools to perform additional quality
7180 checks on R packages that are to be submitted to the Bioconductor repository.")
7181 (license license:artistic2.0)))
7182
7183 (define-public r-optparse
7184 (package
7185 (name "r-optparse")
7186 (version "1.6.0")
7187 (source
7188 (origin
7189 (method url-fetch)
7190 (uri (cran-uri "optparse" version))
7191 (sha256
7192 (base32
7193 "1d7v5gl45x4amsfmzn5zyyffyqlc7a82h01szlnda22viyxids0h"))))
7194 (build-system r-build-system)
7195 (propagated-inputs
7196 `(("r-getopt" ,r-getopt)))
7197 (home-page
7198 "https://github.com/trevorld/optparse")
7199 (synopsis "Command line option parser")
7200 (description
7201 "This package provides a command line parser inspired by Python's
7202 @code{optparse} library to be used with Rscript to write shebang scripts
7203 that accept short and long options.")
7204 (license license:gpl2+)))
7205
7206 (define-public r-dnacopy
7207 (package
7208 (name "r-dnacopy")
7209 (version "1.56.0")
7210 (source (origin
7211 (method url-fetch)
7212 (uri (bioconductor-uri "DNAcopy" version))
7213 (sha256
7214 (base32
7215 "04cqdqxhva66xwh1s2vffi56b9fcrqd4slcrvqasj5lp2rkjli82"))))
7216 (properties
7217 `((upstream-name . "DNAcopy")))
7218 (build-system r-build-system)
7219 (inputs
7220 `(("gfortran" ,gfortran)))
7221 (home-page "https://bioconductor.org/packages/DNAcopy")
7222 (synopsis "Implementation of a circular binary segmentation algorithm")
7223 (description "This package implements the circular binary segmentation (CBS)
7224 algorithm to segment DNA copy number data and identify genomic regions with
7225 abnormal copy number.")
7226 (license license:gpl2+)))
7227
7228 (define-public r-s4vectors
7229 (package
7230 (name "r-s4vectors")
7231 (version "0.20.1")
7232 (source (origin
7233 (method url-fetch)
7234 (uri (bioconductor-uri "S4Vectors" version))
7235 (sha256
7236 (base32
7237 "18whrw67nxn82xshckl2pjy7d14sa3c27h3n9naqyqwz88lr6dzg"))))
7238 (properties
7239 `((upstream-name . "S4Vectors")))
7240 (build-system r-build-system)
7241 (propagated-inputs
7242 `(("r-biocgenerics" ,r-biocgenerics)))
7243 (home-page "https://bioconductor.org/packages/S4Vectors")
7244 (synopsis "S4 implementation of vectors and lists")
7245 (description
7246 "The S4Vectors package defines the @code{Vector} and @code{List} virtual
7247 classes and a set of generic functions that extend the semantic of ordinary
7248 vectors and lists in R. Package developers can easily implement vector-like
7249 or list-like objects as concrete subclasses of @code{Vector} or @code{List}.
7250 In addition, a few low-level concrete subclasses of general interest (e.g.
7251 @code{DataFrame}, @code{Rle}, and @code{Hits}) are implemented in the
7252 S4Vectors package itself.")
7253 (license license:artistic2.0)))
7254
7255 (define-public r-seqinr
7256 (package
7257 (name "r-seqinr")
7258 (version "3.4-5")
7259 (source
7260 (origin
7261 (method url-fetch)
7262 (uri (cran-uri "seqinr" version))
7263 (sha256
7264 (base32
7265 "17zv0n5cji17izwmwg0jcbxbjl3w5rls91w15svcnlpxjms38ahn"))))
7266 (build-system r-build-system)
7267 (propagated-inputs
7268 `(("r-ade4" ,r-ade4)
7269 ("r-segmented" ,r-segmented)))
7270 (inputs
7271 `(("zlib" ,zlib)))
7272 (home-page "http://seqinr.r-forge.r-project.org/")
7273 (synopsis "Biological sequences retrieval and analysis")
7274 (description
7275 "This package provides tools for exploratory data analysis and data
7276 visualization of biological sequence (DNA and protein) data. It also includes
7277 utilities for sequence data management under the ACNUC system.")
7278 (license license:gpl2+)))
7279
7280 (define-public r-iranges
7281 (package
7282 (name "r-iranges")
7283 (version "2.16.0")
7284 (source (origin
7285 (method url-fetch)
7286 (uri (bioconductor-uri "IRanges" version))
7287 (sha256
7288 (base32
7289 "0ljppsk611xi72gc8mbdx1311b63b1ijd401jz5xmxk5frla1nc1"))))
7290 (properties
7291 `((upstream-name . "IRanges")))
7292 (build-system r-build-system)
7293 (propagated-inputs
7294 `(("r-biocgenerics" ,r-biocgenerics)
7295 ("r-s4vectors" ,r-s4vectors)))
7296 (home-page "https://bioconductor.org/packages/IRanges")
7297 (synopsis "Infrastructure for manipulating intervals on sequences")
7298 (description
7299 "This package provides efficient low-level and highly reusable S4 classes
7300 for storing ranges of integers, RLE vectors (Run-Length Encoding), and, more
7301 generally, data that can be organized sequentially (formally defined as
7302 @code{Vector} objects), as well as views on these @code{Vector} objects.
7303 Efficient list-like classes are also provided for storing big collections of
7304 instances of the basic classes. All classes in the package use consistent
7305 naming and share the same rich and consistent \"Vector API\" as much as
7306 possible.")
7307 (license license:artistic2.0)))
7308
7309 (define-public r-genomeinfodbdata
7310 (package
7311 (name "r-genomeinfodbdata")
7312 (version "1.2.0")
7313 (source (origin
7314 (method url-fetch)
7315 ;; We cannot use bioconductor-uri here because this tarball is
7316 ;; located under "data/annotation/" instead of "bioc/".
7317 (uri (string-append "https://bioconductor.org/packages/release/"
7318 "data/annotation/src/contrib/GenomeInfoDbData_"
7319 version ".tar.gz"))
7320 (sha256
7321 (base32
7322 "0di6nlqpsyqf693k2na65ayqldih563x3zfrczpqc5q2hl5kg35c"))))
7323 (properties
7324 `((upstream-name . "GenomeInfoDbData")))
7325 (build-system r-build-system)
7326 (home-page "https://bioconductor.org/packages/GenomeInfoDbData")
7327 (synopsis "Species and taxonomy ID look up tables for GenomeInfoDb")
7328 (description "This package contains data for mapping between NCBI taxonomy
7329 ID and species. It is used by functions in the GenomeInfoDb package.")
7330 (license license:artistic2.0)))
7331
7332 (define-public r-genomeinfodb
7333 (package
7334 (name "r-genomeinfodb")
7335 (version "1.18.1")
7336 (source (origin
7337 (method url-fetch)
7338 (uri (bioconductor-uri "GenomeInfoDb" version))
7339 (sha256
7340 (base32
7341 "049pyzr8iszv3g7wdqf3pz7vg7bzd450c20ln6fgw4g5xnkkr10s"))))
7342 (properties
7343 `((upstream-name . "GenomeInfoDb")))
7344 (build-system r-build-system)
7345 (propagated-inputs
7346 `(("r-biocgenerics" ,r-biocgenerics)
7347 ("r-genomeinfodbdata" ,r-genomeinfodbdata)
7348 ("r-iranges" ,r-iranges)
7349 ("r-rcurl" ,r-rcurl)
7350 ("r-s4vectors" ,r-s4vectors)))
7351 (home-page "https://bioconductor.org/packages/GenomeInfoDb")
7352 (synopsis "Utilities for manipulating chromosome identifiers")
7353 (description
7354 "This package contains data and functions that define and allow
7355 translation between different chromosome sequence naming conventions (e.g.,
7356 \"chr1\" versus \"1\"), including a function that attempts to place sequence
7357 names in their natural, rather than lexicographic, order.")
7358 (license license:artistic2.0)))
7359
7360 (define-public r-edger
7361 (package
7362 (name "r-edger")
7363 (version "3.24.3")
7364 (source (origin
7365 (method url-fetch)
7366 (uri (bioconductor-uri "edgeR" version))
7367 (sha256
7368 (base32
7369 "15yimsbsxmxhlsfmgw5j7fd8qn08zz4xqxrir1c6n2dc103y22xg"))))
7370 (properties `((upstream-name . "edgeR")))
7371 (build-system r-build-system)
7372 (propagated-inputs
7373 `(("r-limma" ,r-limma)
7374 ("r-locfit" ,r-locfit)
7375 ("r-rcpp" ,r-rcpp)
7376 ("r-statmod" ,r-statmod))) ;for estimateDisp
7377 (home-page "http://bioinf.wehi.edu.au/edgeR")
7378 (synopsis "EdgeR does empirical analysis of digital gene expression data")
7379 (description "This package can do differential expression analysis of
7380 RNA-seq expression profiles with biological replication. It implements a range
7381 of statistical methodology based on the negative binomial distributions,
7382 including empirical Bayes estimation, exact tests, generalized linear models
7383 and quasi-likelihood tests. It be applied to differential signal analysis of
7384 other types of genomic data that produce counts, including ChIP-seq, SAGE and
7385 CAGE.")
7386 (license license:gpl2+)))
7387
7388 (define-public r-variantannotation
7389 (package
7390 (name "r-variantannotation")
7391 (version "1.28.8")
7392 (source (origin
7393 (method url-fetch)
7394 (uri (bioconductor-uri "VariantAnnotation" version))
7395 (sha256
7396 (base32
7397 "0gf36lr9xy3zmcc4rxs5bi2ccrrc7b6wqp6p3cvnclgif4i0l66k"))))
7398 (properties
7399 `((upstream-name . "VariantAnnotation")))
7400 (inputs
7401 `(("zlib" ,zlib)))
7402 (propagated-inputs
7403 `(("r-annotationdbi" ,r-annotationdbi)
7404 ("r-biobase" ,r-biobase)
7405 ("r-biocgenerics" ,r-biocgenerics)
7406 ("r-biostrings" ,r-biostrings)
7407 ("r-bsgenome" ,r-bsgenome)
7408 ("r-dbi" ,r-dbi)
7409 ("r-genomeinfodb" ,r-genomeinfodb)
7410 ("r-genomicfeatures" ,r-genomicfeatures)
7411 ("r-genomicranges" ,r-genomicranges)
7412 ("r-iranges" ,r-iranges)
7413 ("r-summarizedexperiment" ,r-summarizedexperiment)
7414 ("r-rsamtools" ,r-rsamtools)
7415 ("r-rtracklayer" ,r-rtracklayer)
7416 ("r-s4vectors" ,r-s4vectors)
7417 ("r-xvector" ,r-xvector)
7418 ("r-zlibbioc" ,r-zlibbioc)))
7419 (build-system r-build-system)
7420 (home-page "https://bioconductor.org/packages/VariantAnnotation")
7421 (synopsis "Package for annotation of genetic variants")
7422 (description "This R package can annotate variants, compute amino acid
7423 coding changes and predict coding outcomes.")
7424 (license license:artistic2.0)))
7425
7426 (define-public r-limma
7427 (package
7428 (name "r-limma")
7429 (version "3.38.3")
7430 (source (origin
7431 (method url-fetch)
7432 (uri (bioconductor-uri "limma" version))
7433 (sha256
7434 (base32
7435 "08va8jggmv61wym955mnb1n31mgikrmjys7dl1kp5hp3yia8jg7l"))))
7436 (build-system r-build-system)
7437 (home-page "http://bioinf.wehi.edu.au/limma")
7438 (synopsis "Package for linear models for microarray and RNA-seq data")
7439 (description "This package can be used for the analysis of gene expression
7440 studies, especially the use of linear models for analysing designed experiments
7441 and the assessment of differential expression. The analysis methods apply to
7442 different technologies, including microarrays, RNA-seq, and quantitative PCR.")
7443 (license license:gpl2+)))
7444
7445 (define-public r-xvector
7446 (package
7447 (name "r-xvector")
7448 (version "0.22.0")
7449 (source (origin
7450 (method url-fetch)
7451 (uri (bioconductor-uri "XVector" version))
7452 (sha256
7453 (base32
7454 "01fph1ydd6g0rl5mcw54spx22glq2kqv7wyw8bqw0plmabzcwwdm"))))
7455 (properties
7456 `((upstream-name . "XVector")))
7457 (build-system r-build-system)
7458 (arguments
7459 `(#:phases
7460 (modify-phases %standard-phases
7461 (add-after 'unpack 'use-system-zlib
7462 (lambda _
7463 (substitute* "DESCRIPTION"
7464 (("zlibbioc, ") ""))
7465 (substitute* "NAMESPACE"
7466 (("import\\(zlibbioc\\)") ""))
7467 #t)))))
7468 (inputs
7469 `(("zlib" ,zlib)))
7470 (propagated-inputs
7471 `(("r-biocgenerics" ,r-biocgenerics)
7472 ("r-iranges" ,r-iranges)
7473 ("r-s4vectors" ,r-s4vectors)))
7474 (home-page "https://bioconductor.org/packages/XVector")
7475 (synopsis "Representation and manpulation of external sequences")
7476 (description
7477 "This package provides memory efficient S4 classes for storing sequences
7478 \"externally\" (behind an R external pointer, or on disk).")
7479 (license license:artistic2.0)))
7480
7481 (define-public r-genomicranges
7482 (package
7483 (name "r-genomicranges")
7484 (version "1.34.0")
7485 (source (origin
7486 (method url-fetch)
7487 (uri (bioconductor-uri "GenomicRanges" version))
7488 (sha256
7489 (base32
7490 "0bgh14d15dpf2iy36qinw45r6n45rqkf0ghazrdl3jfva6vbrb29"))))
7491 (properties
7492 `((upstream-name . "GenomicRanges")))
7493 (build-system r-build-system)
7494 (propagated-inputs
7495 `(("r-biocgenerics" ,r-biocgenerics)
7496 ("r-genomeinfodb" ,r-genomeinfodb)
7497 ("r-iranges" ,r-iranges)
7498 ("r-s4vectors" ,r-s4vectors)
7499 ("r-xvector" ,r-xvector)))
7500 (home-page "https://bioconductor.org/packages/GenomicRanges")
7501 (synopsis "Representation and manipulation of genomic intervals")
7502 (description
7503 "This package provides tools to efficiently represent and manipulate
7504 genomic annotations and alignments is playing a central role when it comes to
7505 analyzing high-throughput sequencing data (a.k.a. NGS data). The
7506 GenomicRanges package defines general purpose containers for storing and
7507 manipulating genomic intervals and variables defined along a genome.")
7508 (license license:artistic2.0)))
7509
7510 (define-public r-biobase
7511 (package
7512 (name "r-biobase")
7513 (version "2.42.0")
7514 (source (origin
7515 (method url-fetch)
7516 (uri (bioconductor-uri "Biobase" version))
7517 (sha256
7518 (base32
7519 "10nr6nrkj5vlq8hsgbhbhv669z0dbpz4m3vz9k32rx1czbrrqwin"))))
7520 (properties
7521 `((upstream-name . "Biobase")))
7522 (build-system r-build-system)
7523 (propagated-inputs
7524 `(("r-biocgenerics" ,r-biocgenerics)))
7525 (home-page "https://bioconductor.org/packages/Biobase")
7526 (synopsis "Base functions for Bioconductor")
7527 (description
7528 "This package provides functions that are needed by many other packages
7529 on Bioconductor or which replace R functions.")
7530 (license license:artistic2.0)))
7531
7532 (define-public r-annotationdbi
7533 (package
7534 (name "r-annotationdbi")
7535 (version "1.44.0")
7536 (source (origin
7537 (method url-fetch)
7538 (uri (bioconductor-uri "AnnotationDbi" version))
7539 (sha256
7540 (base32
7541 "1954vimkx5yb9irppq8vssq0f3yjkg36w38b9r0rqmijx1ps7x5d"))))
7542 (properties
7543 `((upstream-name . "AnnotationDbi")))
7544 (build-system r-build-system)
7545 (propagated-inputs
7546 `(("r-biobase" ,r-biobase)
7547 ("r-biocgenerics" ,r-biocgenerics)
7548 ("r-dbi" ,r-dbi)
7549 ("r-iranges" ,r-iranges)
7550 ("r-rsqlite" ,r-rsqlite)
7551 ("r-s4vectors" ,r-s4vectors)))
7552 (home-page "https://bioconductor.org/packages/AnnotationDbi")
7553 (synopsis "Annotation database interface")
7554 (description
7555 "This package provides user interface and database connection code for
7556 annotation data packages using SQLite data storage.")
7557 (license license:artistic2.0)))
7558
7559 (define-public r-biomart
7560 (package
7561 (name "r-biomart")
7562 (version "2.38.0")
7563 (source (origin
7564 (method url-fetch)
7565 (uri (bioconductor-uri "biomaRt" version))
7566 (sha256
7567 (base32
7568 "1lshkknp7dmr3p6dd2zbv86cc71h53ggh9ji83jcjym8sgbbspl2"))))
7569 (properties
7570 `((upstream-name . "biomaRt")))
7571 (build-system r-build-system)
7572 (propagated-inputs
7573 `(("r-annotationdbi" ,r-annotationdbi)
7574 ("r-httr" ,r-httr)
7575 ("r-progress" ,r-progress)
7576 ("r-rcurl" ,r-rcurl)
7577 ("r-stringr" ,r-stringr)
7578 ("r-xml" ,r-xml)))
7579 (home-page "https://bioconductor.org/packages/biomaRt")
7580 (synopsis "Interface to BioMart databases")
7581 (description
7582 "biomaRt provides an interface to a growing collection of databases
7583 implementing the @url{BioMart software suite, http://www.biomart.org}. The
7584 package enables retrieval of large amounts of data in a uniform way without
7585 the need to know the underlying database schemas or write complex SQL queries.
7586 Examples of BioMart databases are Ensembl, COSMIC, Uniprot, HGNC, Gramene,
7587 Wormbase and dbSNP mapped to Ensembl. These major databases give biomaRt
7588 users direct access to a diverse set of data and enable a wide range of
7589 powerful online queries from gene annotation to database mining.")
7590 (license license:artistic2.0)))
7591
7592 (define-public r-biocparallel
7593 (package
7594 (name "r-biocparallel")
7595 (version "1.16.5")
7596 (source (origin
7597 (method url-fetch)
7598 (uri (bioconductor-uri "BiocParallel" version))
7599 (sha256
7600 (base32
7601 "1164dk0fajb2vrkfpcjs11055qf1cs4vvbnq0aqdaaf2p4lyx41l"))))
7602 (properties
7603 `((upstream-name . "BiocParallel")))
7604 (build-system r-build-system)
7605 (propagated-inputs
7606 `(("r-futile-logger" ,r-futile-logger)
7607 ("r-snow" ,r-snow)
7608 ("r-bh" ,r-bh)))
7609 (home-page "https://bioconductor.org/packages/BiocParallel")
7610 (synopsis "Bioconductor facilities for parallel evaluation")
7611 (description
7612 "This package provides modified versions and novel implementation of
7613 functions for parallel evaluation, tailored to use with Bioconductor
7614 objects.")
7615 (license (list license:gpl2+ license:gpl3+))))
7616
7617 (define-public r-biostrings
7618 (package
7619 (name "r-biostrings")
7620 (version "2.50.2")
7621 (source (origin
7622 (method url-fetch)
7623 (uri (bioconductor-uri "Biostrings" version))
7624 (sha256
7625 (base32
7626 "16cqqc8i6gb0jcz0lizfqqxsq7g0yb0ll2s9qzmb45brp07dg8f7"))))
7627 (properties
7628 `((upstream-name . "Biostrings")))
7629 (build-system r-build-system)
7630 (propagated-inputs
7631 `(("r-biocgenerics" ,r-biocgenerics)
7632 ("r-iranges" ,r-iranges)
7633 ("r-s4vectors" ,r-s4vectors)
7634 ("r-xvector" ,r-xvector)))
7635 (home-page "https://bioconductor.org/packages/Biostrings")
7636 (synopsis "String objects and algorithms for biological sequences")
7637 (description
7638 "This package provides memory efficient string containers, string
7639 matching algorithms, and other utilities, for fast manipulation of large
7640 biological sequences or sets of sequences.")
7641 (license license:artistic2.0)))
7642
7643 (define-public r-rsamtools
7644 (package
7645 (name "r-rsamtools")
7646 (version "1.34.0")
7647 (source (origin
7648 (method url-fetch)
7649 (uri (bioconductor-uri "Rsamtools" version))
7650 (sha256
7651 (base32
7652 "01v4bjhj2i126pwyk0v9lvmfp2ih495xsq903k3xa2z24bjxphbi"))))
7653 (properties
7654 `((upstream-name . "Rsamtools")))
7655 (build-system r-build-system)
7656 (arguments
7657 `(#:phases
7658 (modify-phases %standard-phases
7659 (add-after 'unpack 'use-system-zlib
7660 (lambda _
7661 (substitute* "DESCRIPTION"
7662 (("zlibbioc, ") ""))
7663 (substitute* "NAMESPACE"
7664 (("import\\(zlibbioc\\)") ""))
7665 #t)))))
7666 (inputs
7667 `(("zlib" ,zlib)))
7668 (propagated-inputs
7669 `(("r-biocgenerics" ,r-biocgenerics)
7670 ("r-biocparallel" ,r-biocparallel)
7671 ("r-biostrings" ,r-biostrings)
7672 ("r-bitops" ,r-bitops)
7673 ("r-genomeinfodb" ,r-genomeinfodb)
7674 ("r-genomicranges" ,r-genomicranges)
7675 ("r-iranges" ,r-iranges)
7676 ("r-s4vectors" ,r-s4vectors)
7677 ("r-xvector" ,r-xvector)))
7678 (home-page "https://bioconductor.org/packages/release/bioc/html/Rsamtools.html")
7679 (synopsis "Interface to samtools, bcftools, and tabix")
7680 (description
7681 "This package provides an interface to the 'samtools', 'bcftools', and
7682 'tabix' utilities for manipulating SAM (Sequence Alignment / Map), FASTA,
7683 binary variant call (BCF) and compressed indexed tab-delimited (tabix)
7684 files.")
7685 (license license:expat)))
7686
7687 (define-public r-delayedarray
7688 (package
7689 (name "r-delayedarray")
7690 (version "0.8.0")
7691 (source (origin
7692 (method url-fetch)
7693 (uri (bioconductor-uri "DelayedArray" version))
7694 (sha256
7695 (base32
7696 "0cl5anqkjwvqx19snjhz0zj8cp8ibckiifl28h821h50g62nvb2f"))))
7697 (properties
7698 `((upstream-name . "DelayedArray")))
7699 (build-system r-build-system)
7700 (propagated-inputs
7701 `(("r-biocgenerics" ,r-biocgenerics)
7702 ("r-biocparallel" ,r-biocparallel)
7703 ("r-s4vectors" ,r-s4vectors)
7704 ("r-iranges" ,r-iranges)
7705 ("r-matrixstats" ,r-matrixstats)))
7706 (home-page "https://bioconductor.org/packages/DelayedArray")
7707 (synopsis "Delayed operations on array-like objects")
7708 (description
7709 "Wrapping an array-like object (typically an on-disk object) in a
7710 @code{DelayedArray} object allows one to perform common array operations on it
7711 without loading the object in memory. In order to reduce memory usage and
7712 optimize performance, operations on the object are either delayed or executed
7713 using a block processing mechanism. Note that this also works on in-memory
7714 array-like objects like @code{DataFrame} objects (typically with Rle columns),
7715 @code{Matrix} objects, and ordinary arrays and data frames.")
7716 (license license:artistic2.0)))
7717
7718 (define-public r-summarizedexperiment
7719 (package
7720 (name "r-summarizedexperiment")
7721 (version "1.12.0")
7722 (source (origin
7723 (method url-fetch)
7724 (uri (bioconductor-uri "SummarizedExperiment" version))
7725 (sha256
7726 (base32
7727 "07805572xhpj5mfwq6kw1ha21wgalqvhh4ydvafyl1bnf3r20vps"))))
7728 (properties
7729 `((upstream-name . "SummarizedExperiment")))
7730 (build-system r-build-system)
7731 (propagated-inputs
7732 `(("r-biobase" ,r-biobase)
7733 ("r-biocgenerics" ,r-biocgenerics)
7734 ("r-delayedarray" ,r-delayedarray)
7735 ("r-genomeinfodb" ,r-genomeinfodb)
7736 ("r-genomicranges" ,r-genomicranges)
7737 ("r-iranges" ,r-iranges)
7738 ("r-matrix" ,r-matrix)
7739 ("r-s4vectors" ,r-s4vectors)))
7740 (home-page "https://bioconductor.org/packages/SummarizedExperiment")
7741 (synopsis "Container for representing genomic ranges by sample")
7742 (description
7743 "The SummarizedExperiment container contains one or more assays, each
7744 represented by a matrix-like object of numeric or other mode. The rows
7745 typically represent genomic ranges of interest and the columns represent
7746 samples.")
7747 (license license:artistic2.0)))
7748
7749 (define-public r-genomicalignments
7750 (package
7751 (name "r-genomicalignments")
7752 (version "1.18.1")
7753 (source (origin
7754 (method url-fetch)
7755 (uri (bioconductor-uri "GenomicAlignments" version))
7756 (sha256
7757 (base32
7758 "1maslav2r34wjyzh2nlwa862in1ir7i5xk57nw2nlfh5gqy112jd"))))
7759 (properties
7760 `((upstream-name . "GenomicAlignments")))
7761 (build-system r-build-system)
7762 (propagated-inputs
7763 `(("r-biocgenerics" ,r-biocgenerics)
7764 ("r-biocparallel" ,r-biocparallel)
7765 ("r-biostrings" ,r-biostrings)
7766 ("r-genomeinfodb" ,r-genomeinfodb)
7767 ("r-genomicranges" ,r-genomicranges)
7768 ("r-iranges" ,r-iranges)
7769 ("r-rsamtools" ,r-rsamtools)
7770 ("r-s4vectors" ,r-s4vectors)
7771 ("r-summarizedexperiment" ,r-summarizedexperiment)))
7772 (home-page "https://bioconductor.org/packages/GenomicAlignments")
7773 (synopsis "Representation and manipulation of short genomic alignments")
7774 (description
7775 "This package provides efficient containers for storing and manipulating
7776 short genomic alignments (typically obtained by aligning short reads to a
7777 reference genome). This includes read counting, computing the coverage,
7778 junction detection, and working with the nucleotide content of the
7779 alignments.")
7780 (license license:artistic2.0)))
7781
7782 (define-public r-rtracklayer
7783 (package
7784 (name "r-rtracklayer")
7785 (version "1.42.1")
7786 (source (origin
7787 (method url-fetch)
7788 (uri (bioconductor-uri "rtracklayer" version))
7789 (sha256
7790 (base32
7791 "1ycmcxvgvszvjv75hlmg0i6pq8i7r8720vgmfayb905s9l6j82x6"))))
7792 (build-system r-build-system)
7793 (arguments
7794 `(#:phases
7795 (modify-phases %standard-phases
7796 (add-after 'unpack 'use-system-zlib
7797 (lambda _
7798 (substitute* "DESCRIPTION"
7799 ((" zlibbioc,") ""))
7800 (substitute* "NAMESPACE"
7801 (("import\\(zlibbioc\\)") ""))
7802 #t)))))
7803 (native-inputs
7804 `(("pkg-config" ,pkg-config)))
7805 (inputs
7806 `(("zlib" ,zlib)))
7807 (propagated-inputs
7808 `(("r-biocgenerics" ,r-biocgenerics)
7809 ("r-biostrings" ,r-biostrings)
7810 ("r-genomeinfodb" ,r-genomeinfodb)
7811 ("r-genomicalignments" ,r-genomicalignments)
7812 ("r-genomicranges" ,r-genomicranges)
7813 ("r-iranges" ,r-iranges)
7814 ("r-rcurl" ,r-rcurl)
7815 ("r-rsamtools" ,r-rsamtools)
7816 ("r-s4vectors" ,r-s4vectors)
7817 ("r-xml" ,r-xml)
7818 ("r-xvector" ,r-xvector)))
7819 (home-page "https://bioconductor.org/packages/rtracklayer")
7820 (synopsis "R interface to genome browsers and their annotation tracks")
7821 (description
7822 "rtracklayer is an extensible framework for interacting with multiple
7823 genome browsers (currently UCSC built-in) and manipulating annotation tracks
7824 in various formats (currently GFF, BED, bedGraph, BED15, WIG, BigWig and 2bit
7825 built-in). The user may export/import tracks to/from the supported browsers,
7826 as well as query and modify the browser state, such as the current viewport.")
7827 (license license:artistic2.0)))
7828
7829 (define-public r-genomicfeatures
7830 (package
7831 (name "r-genomicfeatures")
7832 (version "1.34.1")
7833 (source (origin
7834 (method url-fetch)
7835 (uri (bioconductor-uri "GenomicFeatures" version))
7836 (sha256
7837 (base32
7838 "0slq6hv5bmc3bgrl824jzmr6db3fvaj6b7ihwmdn76pgqqbq2fq6"))))
7839 (properties
7840 `((upstream-name . "GenomicFeatures")))
7841 (build-system r-build-system)
7842 (propagated-inputs
7843 `(("r-annotationdbi" ,r-annotationdbi)
7844 ("r-biobase" ,r-biobase)
7845 ("r-biocgenerics" ,r-biocgenerics)
7846 ("r-biomart" ,r-biomart)
7847 ("r-biostrings" ,r-biostrings)
7848 ("r-dbi" ,r-dbi)
7849 ("r-genomeinfodb" ,r-genomeinfodb)
7850 ("r-genomicranges" ,r-genomicranges)
7851 ("r-iranges" ,r-iranges)
7852 ("r-rcurl" ,r-rcurl)
7853 ("r-rsqlite" ,r-rsqlite)
7854 ("r-rtracklayer" ,r-rtracklayer)
7855 ("r-s4vectors" ,r-s4vectors)
7856 ("r-xvector" ,r-xvector)))
7857 (home-page "https://bioconductor.org/packages/GenomicFeatures")
7858 (synopsis "Tools for working with transcript centric annotations")
7859 (description
7860 "This package provides a set of tools and methods for making and
7861 manipulating transcript centric annotations. With these tools the user can
7862 easily download the genomic locations of the transcripts, exons and cds of a
7863 given organism, from either the UCSC Genome Browser or a BioMart
7864 database (more sources will be supported in the future). This information is
7865 then stored in a local database that keeps track of the relationship between
7866 transcripts, exons, cds and genes. Flexible methods are provided for
7867 extracting the desired features in a convenient format.")
7868 (license license:artistic2.0)))
7869
7870 (define-public r-go-db
7871 (package
7872 (name "r-go-db")
7873 (version "3.7.0")
7874 (source (origin
7875 (method url-fetch)
7876 (uri (string-append "https://www.bioconductor.org/packages/"
7877 "release/data/annotation/src/contrib/GO.db_"
7878 version ".tar.gz"))
7879 (sha256
7880 (base32
7881 "0i3wcf5h3n0dawzc1hy0kv74f06j80c47n4p3g3fmrcxlhi3jpa5"))))
7882 (properties
7883 `((upstream-name . "GO.db")))
7884 (build-system r-build-system)
7885 (propagated-inputs
7886 `(("r-annotationdbi" ,r-annotationdbi)))
7887 (home-page "https://bioconductor.org/packages/GO.db")
7888 (synopsis "Annotation maps describing the entire Gene Ontology")
7889 (description
7890 "The purpose of this GO.db annotation package is to provide detailed
7891 information about the latest version of the Gene Ontologies.")
7892 (license license:artistic2.0)))
7893
7894 (define-public r-topgo
7895 (package
7896 (name "r-topgo")
7897 (version "2.34.0")
7898 (source (origin
7899 (method url-fetch)
7900 (uri (bioconductor-uri "topGO" version))
7901 (sha256
7902 (base32
7903 "1j1jcd16j564kr6qz28140fzmnh9xasi84v1c1fi98sqv30zq9bh"))))
7904 (properties
7905 `((upstream-name . "topGO")))
7906 (build-system r-build-system)
7907 (propagated-inputs
7908 `(("r-annotationdbi" ,r-annotationdbi)
7909 ("r-dbi" ,r-dbi)
7910 ("r-biobase" ,r-biobase)
7911 ("r-biocgenerics" ,r-biocgenerics)
7912 ("r-go-db" ,r-go-db)
7913 ("r-graph" ,r-graph)
7914 ("r-lattice" ,r-lattice)
7915 ("r-matrixstats" ,r-matrixstats)
7916 ("r-sparsem" ,r-sparsem)))
7917 (home-page "https://bioconductor.org/packages/topGO")
7918 (synopsis "Enrichment analysis for gene ontology")
7919 (description
7920 "The topGO package provides tools for testing @dfn{gene ontology} (GO)
7921 terms while accounting for the topology of the GO graph. Different test
7922 statistics and different methods for eliminating local similarities and
7923 dependencies between GO terms can be implemented and applied.")
7924 ;; Any version of the LGPL applies.
7925 (license license:lgpl2.1+)))
7926
7927 (define-public r-bsgenome
7928 (package
7929 (name "r-bsgenome")
7930 (version "1.50.0")
7931 (source (origin
7932 (method url-fetch)
7933 (uri (bioconductor-uri "BSgenome" version))
7934 (sha256
7935 (base32
7936 "07z4zxx0khrc86qqvc7vxww8df9fh6pyks9ajxkc9gdqr5nn79j7"))))
7937 (properties
7938 `((upstream-name . "BSgenome")))
7939 (build-system r-build-system)
7940 (propagated-inputs
7941 `(("r-biocgenerics" ,r-biocgenerics)
7942 ("r-biostrings" ,r-biostrings)
7943 ("r-genomeinfodb" ,r-genomeinfodb)
7944 ("r-genomicranges" ,r-genomicranges)
7945 ("r-iranges" ,r-iranges)
7946 ("r-rsamtools" ,r-rsamtools)
7947 ("r-rtracklayer" ,r-rtracklayer)
7948 ("r-s4vectors" ,r-s4vectors)
7949 ("r-xvector" ,r-xvector)))
7950 (home-page "https://bioconductor.org/packages/BSgenome")
7951 (synopsis "Infrastructure for Biostrings-based genome data packages")
7952 (description
7953 "This package provides infrastructure shared by all Biostrings-based
7954 genome data packages and support for efficient SNP representation.")
7955 (license license:artistic2.0)))
7956
7957 (define-public r-bsgenome-hsapiens-1000genomes-hs37d5
7958 (package
7959 (name "r-bsgenome-hsapiens-1000genomes-hs37d5")
7960 (version "0.99.1")
7961 (source (origin
7962 (method url-fetch)
7963 ;; We cannot use bioconductor-uri here because this tarball is
7964 ;; located under "data/annotation/" instead of "bioc/".
7965 (uri (string-append "https://www.bioconductor.org/packages/"
7966 "release/data/annotation/src/contrib/"
7967 "BSgenome.Hsapiens.1000genomes.hs37d5_"
7968 version ".tar.gz"))
7969 (sha256
7970 (base32
7971 "1cg0g5fqmsvwyw2p9hp2yy4ilk21jkbbrnpgqvb5c36ihjwvc7sr"))))
7972 (properties
7973 `((upstream-name . "BSgenome.Hsapiens.1000genomes.hs37d5")))
7974 (build-system r-build-system)
7975 ;; As this package provides little more than a very large data file it
7976 ;; doesn't make sense to build substitutes.
7977 (arguments `(#:substitutable? #f))
7978 (propagated-inputs
7979 `(("r-bsgenome" ,r-bsgenome)))
7980 (home-page
7981 "https://www.bioconductor.org/packages/BSgenome.Hsapiens.1000genomes.hs37d5/")
7982 (synopsis "Full genome sequences for Homo sapiens")
7983 (description
7984 "This package provides full genome sequences for Homo sapiens from
7985 1000genomes phase2 reference genome sequence (hs37d5), based on NCBI GRCh37.")
7986 (license license:artistic2.0)))
7987
7988 (define-public r-impute
7989 (package
7990 (name "r-impute")
7991 (version "1.56.0")
7992 (source (origin
7993 (method url-fetch)
7994 (uri (bioconductor-uri "impute" version))
7995 (sha256
7996 (base32
7997 "08z0pj1dz5iq967nwj67qyka7ir7m5an2ggv7bsrlz3apzfsla33"))))
7998 (inputs
7999 `(("gfortran" ,gfortran)))
8000 (build-system r-build-system)
8001 (home-page "https://bioconductor.org/packages/impute")
8002 (synopsis "Imputation for microarray data")
8003 (description
8004 "This package provides a function to impute missing gene expression
8005 microarray data, using nearest neighbor averaging.")
8006 (license license:gpl2+)))
8007
8008 (define-public r-seqpattern
8009 (package
8010 (name "r-seqpattern")
8011 (version "1.14.0")
8012 (source (origin
8013 (method url-fetch)
8014 (uri (bioconductor-uri "seqPattern" version))
8015 (sha256
8016 (base32
8017 "0di83qi83mrlw7i12khsq55d03hlazcywaa9m9pki1sfhafpq733"))))
8018 (properties
8019 `((upstream-name . "seqPattern")))
8020 (build-system r-build-system)
8021 (propagated-inputs
8022 `(("r-biostrings" ,r-biostrings)
8023 ("r-genomicranges" ,r-genomicranges)
8024 ("r-iranges" ,r-iranges)
8025 ("r-kernsmooth" ,r-kernsmooth)
8026 ("r-plotrix" ,r-plotrix)))
8027 (home-page "https://bioconductor.org/packages/seqPattern")
8028 (synopsis "Visualising oligonucleotide patterns and motif occurrences")
8029 (description
8030 "This package provides tools to visualize oligonucleotide patterns and
8031 sequence motif occurrences across a large set of sequences centred at a common
8032 reference point and sorted by a user defined feature.")
8033 (license license:gpl3+)))
8034
8035 (define-public r-genomation
8036 (package
8037 (name "r-genomation")
8038 (version "1.14.0")
8039 (source (origin
8040 (method url-fetch)
8041 (uri (bioconductor-uri "genomation" version))
8042 (sha256
8043 (base32
8044 "0g0v4alfpqlinqinjnyzl3mrjnpbdx9ri34mcaiqbvbvg8ic8wvg"))))
8045 (build-system r-build-system)
8046 (propagated-inputs
8047 `(("r-biostrings" ,r-biostrings)
8048 ("r-bsgenome" ,r-bsgenome)
8049 ("r-data-table" ,r-data-table)
8050 ("r-genomeinfodb" ,r-genomeinfodb)
8051 ("r-genomicalignments" ,r-genomicalignments)
8052 ("r-genomicranges" ,r-genomicranges)
8053 ("r-ggplot2" ,r-ggplot2)
8054 ("r-gridbase" ,r-gridbase)
8055 ("r-impute" ,r-impute)
8056 ("r-iranges" ,r-iranges)
8057 ("r-matrixstats" ,r-matrixstats)
8058 ("r-plotrix" ,r-plotrix)
8059 ("r-plyr" ,r-plyr)
8060 ("r-rcpp" ,r-rcpp)
8061 ("r-readr" ,r-readr)
8062 ("r-reshape2" ,r-reshape2)
8063 ("r-rsamtools" ,r-rsamtools)
8064 ("r-rtracklayer" ,r-rtracklayer)
8065 ("r-runit" ,r-runit)
8066 ("r-s4vectors" ,r-s4vectors)
8067 ("r-seqpattern" ,r-seqpattern)))
8068 (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
8069 (synopsis "Summary, annotation and visualization of genomic data")
8070 (description
8071 "This package provides a package for summary and annotation of genomic
8072 intervals. Users can visualize and quantify genomic intervals over
8073 pre-defined functional regions, such as promoters, exons, introns, etc. The
8074 genomic intervals represent regions with a defined chromosome position, which
8075 may be associated with a score, such as aligned reads from HT-seq experiments,
8076 TF binding sites, methylation scores, etc. The package can use any tabular
8077 genomic feature data as long as it has minimal information on the locations of
8078 genomic intervals. In addition, it can use BAM or BigWig files as input.")
8079 (license license:artistic2.0)))
8080
8081 (define-public r-genomationdata
8082 (package
8083 (name "r-genomationdata")
8084 (version "1.14.0")
8085 (source (origin
8086 (method url-fetch)
8087 ;; We cannot use bioconductor-uri here because this tarball is
8088 ;; located under "data/annotation/" instead of "bioc/".
8089 (uri (string-append "https://bioconductor.org/packages/"
8090 "release/data/experiment/src/contrib/"
8091 "genomationData_" version ".tar.gz"))
8092 (sha256
8093 (base32
8094 "10xyb8akjrhmak2i0mnv1agny2ipy364q9nlibyplpzc7vdb6bw7"))))
8095 (build-system r-build-system)
8096 ;; As this package provides little more than large data files, it doesn't
8097 ;; make sense to build substitutes.
8098 (arguments `(#:substitutable? #f))
8099 (native-inputs
8100 `(("r-knitr" ,r-knitr)))
8101 (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
8102 (synopsis "Experimental data for use with the genomation package")
8103 (description
8104 "This package contains experimental genetic data for use with the
8105 genomation package. Included are Chip Seq, Methylation and Cage data,
8106 downloaded from Encode.")
8107 (license license:gpl3+)))
8108
8109 (define-public r-org-hs-eg-db
8110 (package
8111 (name "r-org-hs-eg-db")
8112 (version "3.7.0")
8113 (source (origin
8114 (method url-fetch)
8115 ;; We cannot use bioconductor-uri here because this tarball is
8116 ;; located under "data/annotation/" instead of "bioc/".
8117 (uri (string-append "https://www.bioconductor.org/packages/"
8118 "release/data/annotation/src/contrib/"
8119 "org.Hs.eg.db_" version ".tar.gz"))
8120 (sha256
8121 (base32
8122 "1qxz9l80yg3qdqszs6dsscp7lrpfi1bgd0pxh9j7q34vprzwhdim"))))
8123 (properties
8124 `((upstream-name . "org.Hs.eg.db")))
8125 (build-system r-build-system)
8126 (propagated-inputs
8127 `(("r-annotationdbi" ,r-annotationdbi)))
8128 (home-page "https://www.bioconductor.org/packages/org.Hs.eg.db/")
8129 (synopsis "Genome wide annotation for Human")
8130 (description
8131 "This package contains genome-wide annotations for Human, primarily based
8132 on mapping using Entrez Gene identifiers.")
8133 (license license:artistic2.0)))
8134
8135 (define-public r-org-ce-eg-db
8136 (package
8137 (name "r-org-ce-eg-db")
8138 (version "3.7.0")
8139 (source (origin
8140 (method url-fetch)
8141 ;; We cannot use bioconductor-uri here because this tarball is
8142 ;; located under "data/annotation/" instead of "bioc/".
8143 (uri (string-append "https://www.bioconductor.org/packages/"
8144 "release/data/annotation/src/contrib/"
8145 "org.Ce.eg.db_" version ".tar.gz"))
8146 (sha256
8147 (base32
8148 "1w5br1ss4ha8wv4v2saj7cmbjc2jw0dyj2f2y269l078z31wcnaz"))))
8149 (properties
8150 `((upstream-name . "org.Ce.eg.db")))
8151 (build-system r-build-system)
8152 (propagated-inputs
8153 `(("r-annotationdbi" ,r-annotationdbi)))
8154 (home-page "https://www.bioconductor.org/packages/org.Ce.eg.db/")
8155 (synopsis "Genome wide annotation for Worm")
8156 (description
8157 "This package provides mappings from Entrez gene identifiers to various
8158 annotations for the genome of the model worm Caenorhabditis elegans.")
8159 (license license:artistic2.0)))
8160
8161 (define-public r-org-dm-eg-db
8162 (package
8163 (name "r-org-dm-eg-db")
8164 (version "3.7.0")
8165 (source (origin
8166 (method url-fetch)
8167 ;; We cannot use bioconductor-uri here because this tarball is
8168 ;; located under "data/annotation/" instead of "bioc/".
8169 (uri (string-append "https://www.bioconductor.org/packages/"
8170 "release/data/annotation/src/contrib/"
8171 "org.Dm.eg.db_" version ".tar.gz"))
8172 (sha256
8173 (base32
8174 "1pqjrzlyg72bjpy8zsxvaglc7jsv176bnyi87xdajmkvsgxpm7b3"))))
8175 (properties
8176 `((upstream-name . "org.Dm.eg.db")))
8177 (build-system r-build-system)
8178 (propagated-inputs
8179 `(("r-annotationdbi" ,r-annotationdbi)))
8180 (home-page "https://www.bioconductor.org/packages/org.Dm.eg.db/")
8181 (synopsis "Genome wide annotation for Fly")
8182 (description
8183 "This package provides mappings from Entrez gene identifiers to various
8184 annotations for the genome of the model fruit fly Drosophila melanogaster.")
8185 (license license:artistic2.0)))
8186
8187 (define-public r-org-mm-eg-db
8188 (package
8189 (name "r-org-mm-eg-db")
8190 (version "3.7.0")
8191 (source (origin
8192 (method url-fetch)
8193 ;; We cannot use bioconductor-uri here because this tarball is
8194 ;; located under "data/annotation/" instead of "bioc/".
8195 (uri (string-append "https://www.bioconductor.org/packages/"
8196 "release/data/annotation/src/contrib/"
8197 "org.Mm.eg.db_" version ".tar.gz"))
8198 (sha256
8199 (base32
8200 "1i3nvrd3wjigf1rmgxq1p5xxc3p8v02h5gwi62s30rkrsyjjfjxx"))))
8201 (properties
8202 `((upstream-name . "org.Mm.eg.db")))
8203 (build-system r-build-system)
8204 (propagated-inputs
8205 `(("r-annotationdbi" ,r-annotationdbi)))
8206 (home-page "https://www.bioconductor.org/packages/org.Mm.eg.db/")
8207 (synopsis "Genome wide annotation for Mouse")
8208 (description
8209 "This package provides mappings from Entrez gene identifiers to various
8210 annotations for the genome of the model mouse Mus musculus.")
8211 (license license:artistic2.0)))
8212
8213 (define-public r-seqlogo
8214 (package
8215 (name "r-seqlogo")
8216 (version "1.48.0")
8217 (source
8218 (origin
8219 (method url-fetch)
8220 (uri (bioconductor-uri "seqLogo" version))
8221 (sha256
8222 (base32
8223 "022vr9ydwcivs7rw7kwj73gfk5gc7ckwa1q66vhd4kw9ylh70v68"))))
8224 (properties `((upstream-name . "seqLogo")))
8225 (build-system r-build-system)
8226 (home-page "https://bioconductor.org/packages/seqLogo")
8227 (synopsis "Sequence logos for DNA sequence alignments")
8228 (description
8229 "seqLogo takes the position weight matrix of a DNA sequence motif and
8230 plots the corresponding sequence logo as introduced by Schneider and
8231 Stephens (1990).")
8232 (license license:lgpl2.0+)))
8233
8234 (define-public r-bsgenome-hsapiens-ucsc-hg19
8235 (package
8236 (name "r-bsgenome-hsapiens-ucsc-hg19")
8237 (version "1.4.0")
8238 (source (origin
8239 (method url-fetch)
8240 ;; We cannot use bioconductor-uri here because this tarball is
8241 ;; located under "data/annotation/" instead of "bioc/".
8242 (uri (string-append "https://www.bioconductor.org/packages/"
8243 "release/data/annotation/src/contrib/"
8244 "BSgenome.Hsapiens.UCSC.hg19_"
8245 version ".tar.gz"))
8246 (sha256
8247 (base32
8248 "1y0nqpk8cw5a34sd9hmin3z4v7iqm6hf6l22cl81vlbxqbjibxc8"))))
8249 (properties
8250 `((upstream-name . "BSgenome.Hsapiens.UCSC.hg19")))
8251 (build-system r-build-system)
8252 ;; As this package provides little more than a very large data file it
8253 ;; doesn't make sense to build substitutes.
8254 (arguments `(#:substitutable? #f))
8255 (propagated-inputs
8256 `(("r-bsgenome" ,r-bsgenome)))
8257 (home-page
8258 "https://www.bioconductor.org/packages/BSgenome.Hsapiens.UCSC.hg19/")
8259 (synopsis "Full genome sequences for Homo sapiens")
8260 (description
8261 "This package provides full genome sequences for Homo sapiens as provided
8262 by UCSC (hg19, February 2009) and stored in Biostrings objects.")
8263 (license license:artistic2.0)))
8264
8265 (define-public r-bsgenome-mmusculus-ucsc-mm9
8266 (package
8267 (name "r-bsgenome-mmusculus-ucsc-mm9")
8268 (version "1.4.0")
8269 (source (origin
8270 (method url-fetch)
8271 ;; We cannot use bioconductor-uri here because this tarball is
8272 ;; located under "data/annotation/" instead of "bioc/".
8273 (uri (string-append "https://www.bioconductor.org/packages/"
8274 "release/data/annotation/src/contrib/"
8275 "BSgenome.Mmusculus.UCSC.mm9_"
8276 version ".tar.gz"))
8277 (sha256
8278 (base32
8279 "1birqw30g2azimxpnjfzmkphan7x131yy8b9h85lfz5fjdg7841i"))))
8280 (properties
8281 `((upstream-name . "BSgenome.Mmusculus.UCSC.mm9")))
8282 (build-system r-build-system)
8283 ;; As this package provides little more than a very large data file it
8284 ;; doesn't make sense to build substitutes.
8285 (arguments `(#:substitutable? #f))
8286 (propagated-inputs
8287 `(("r-bsgenome" ,r-bsgenome)))
8288 (home-page
8289 "https://www.bioconductor.org/packages/BSgenome.Mmusculus.UCSC.mm9/")
8290 (synopsis "Full genome sequences for Mouse")
8291 (description
8292 "This package provides full genome sequences for Mus musculus (Mouse) as
8293 provided by UCSC (mm9, July 2007) and stored in Biostrings objects.")
8294 (license license:artistic2.0)))
8295
8296 (define-public r-bsgenome-mmusculus-ucsc-mm10
8297 (package
8298 (name "r-bsgenome-mmusculus-ucsc-mm10")
8299 (version "1.4.0")
8300 (source (origin
8301 (method url-fetch)
8302 ;; We cannot use bioconductor-uri here because this tarball is
8303 ;; located under "data/annotation/" instead of "bioc/".
8304 (uri (string-append "https://www.bioconductor.org/packages/"
8305 "release/data/annotation/src/contrib/"
8306 "BSgenome.Mmusculus.UCSC.mm10_"
8307 version ".tar.gz"))
8308 (sha256
8309 (base32
8310 "12s0nm2na9brjad4rn9l7d3db2aj8qa1xvz0y1k7gk08wayb6bkf"))))
8311 (properties
8312 `((upstream-name . "BSgenome.Mmusculus.UCSC.mm10")))
8313 (build-system r-build-system)
8314 ;; As this package provides little more than a very large data file it
8315 ;; doesn't make sense to build substitutes.
8316 (arguments `(#:substitutable? #f))
8317 (propagated-inputs
8318 `(("r-bsgenome" ,r-bsgenome)))
8319 (home-page
8320 "https://www.bioconductor.org/packages/BSgenome.Mmusculus.UCSC.mm10/")
8321 (synopsis "Full genome sequences for Mouse")
8322 (description
8323 "This package provides full genome sequences for Mus
8324 musculus (Mouse) as provided by UCSC (mm10, December 2011) and stored
8325 in Biostrings objects.")
8326 (license license:artistic2.0)))
8327
8328 (define-public r-txdb-mmusculus-ucsc-mm10-knowngene
8329 (package
8330 (name "r-txdb-mmusculus-ucsc-mm10-knowngene")
8331 (version "3.4.4")
8332 (source (origin
8333 (method url-fetch)
8334 ;; We cannot use bioconductor-uri here because this tarball is
8335 ;; located under "data/annotation/" instead of "bioc/".
8336 (uri (string-append "https://www.bioconductor.org/packages/"
8337 "release/data/annotation/src/contrib/"
8338 "TxDb.Mmusculus.UCSC.mm10.knownGene_"
8339 version ".tar.gz"))
8340 (sha256
8341 (base32
8342 "01lgxc1fx5nhlpbwjd5zqghkkbmh6axd98ikx4b0spv0jdg6gf39"))))
8343 (properties
8344 `((upstream-name . "TxDb.Mmusculus.UCSC.mm10.knownGene")))
8345 (build-system r-build-system)
8346 ;; As this package provides little more than a very large data file it
8347 ;; doesn't make sense to build substitutes.
8348 (arguments `(#:substitutable? #f))
8349 (propagated-inputs
8350 `(("r-bsgenome" ,r-bsgenome)
8351 ("r-genomicfeatures" ,r-genomicfeatures)
8352 ("r-annotationdbi" ,r-annotationdbi)))
8353 (home-page
8354 "https://bioconductor.org/packages/TxDb.Mmusculus.UCSC.mm10.knownGene/")
8355 (synopsis "Annotation package for TxDb knownGene object(s) for Mouse")
8356 (description
8357 "This package loads a TxDb object, which is an R interface to
8358 prefabricated databases contained in this package. This package provides
8359 the TxDb object of Mouse data as provided by UCSC (mm10, December 2011)
8360 based on the knownGene track.")
8361 (license license:artistic2.0)))
8362
8363 (define-public r-bsgenome-celegans-ucsc-ce6
8364 (package
8365 (name "r-bsgenome-celegans-ucsc-ce6")
8366 (version "1.4.0")
8367 (source (origin
8368 (method url-fetch)
8369 ;; We cannot use bioconductor-uri here because this tarball is
8370 ;; located under "data/annotation/" instead of "bioc/".
8371 (uri (string-append "https://www.bioconductor.org/packages/"
8372 "release/data/annotation/src/contrib/"
8373 "BSgenome.Celegans.UCSC.ce6_"
8374 version ".tar.gz"))
8375 (sha256
8376 (base32
8377 "0mqzb353xv2c3m3vkb315dkmnxkgczp7ndnknyhpgjlybyf715v9"))))
8378 (properties
8379 `((upstream-name . "BSgenome.Celegans.UCSC.ce6")))
8380 (build-system r-build-system)
8381 ;; As this package provides little more than a very large data file it
8382 ;; doesn't make sense to build substitutes.
8383 (arguments `(#:substitutable? #f))
8384 (propagated-inputs
8385 `(("r-bsgenome" ,r-bsgenome)))
8386 (home-page
8387 "https://www.bioconductor.org/packages/BSgenome.Celegans.UCSC.ce6/")
8388 (synopsis "Full genome sequences for Worm")
8389 (description
8390 "This package provides full genome sequences for Caenorhabditis
8391 elegans (Worm) as provided by UCSC (ce6, May 2008) and stored in Biostrings
8392 objects.")
8393 (license license:artistic2.0)))
8394
8395 (define-public r-bsgenome-celegans-ucsc-ce10
8396 (package
8397 (name "r-bsgenome-celegans-ucsc-ce10")
8398 (version "1.4.0")
8399 (source (origin
8400 (method url-fetch)
8401 ;; We cannot use bioconductor-uri here because this tarball is
8402 ;; located under "data/annotation/" instead of "bioc/".
8403 (uri (string-append "https://www.bioconductor.org/packages/"
8404 "release/data/annotation/src/contrib/"
8405 "BSgenome.Celegans.UCSC.ce10_"
8406 version ".tar.gz"))
8407 (sha256
8408 (base32
8409 "1zaym97jk4npxk14ifvwz2rvhm4zx9xgs33r9vvx9rlynp0gydrk"))))
8410 (properties
8411 `((upstream-name . "BSgenome.Celegans.UCSC.ce10")))
8412 (build-system r-build-system)
8413 ;; As this package provides little more than a very large data file it
8414 ;; doesn't make sense to build substitutes.
8415 (arguments `(#:substitutable? #f))
8416 (propagated-inputs
8417 `(("r-bsgenome" ,r-bsgenome)))
8418 (home-page
8419 "https://www.bioconductor.org/packages/BSgenome.Celegans.UCSC.ce10/")
8420 (synopsis "Full genome sequences for Worm")
8421 (description
8422 "This package provides full genome sequences for Caenorhabditis
8423 elegans (Worm) as provided by UCSC (ce10, Oct 2010) and stored in Biostrings
8424 objects.")
8425 (license license:artistic2.0)))
8426
8427 (define-public r-bsgenome-dmelanogaster-ucsc-dm3
8428 (package
8429 (name "r-bsgenome-dmelanogaster-ucsc-dm3")
8430 (version "1.4.0")
8431 (source (origin
8432 (method url-fetch)
8433 ;; We cannot use bioconductor-uri here because this tarball is
8434 ;; located under "data/annotation/" instead of "bioc/".
8435 (uri (string-append "https://www.bioconductor.org/packages/"
8436 "release/data/annotation/src/contrib/"
8437 "BSgenome.Dmelanogaster.UCSC.dm3_"
8438 version ".tar.gz"))
8439 (sha256
8440 (base32
8441 "19bm3lkhhkag3gnwp419211fh0cnr0x6fa0r1lr0ycwrikxdxsv8"))))
8442 (properties
8443 `((upstream-name . "BSgenome.Dmelanogaster.UCSC.dm3")))
8444 (build-system r-build-system)
8445 ;; As this package provides little more than a very large data file it
8446 ;; doesn't make sense to build substitutes.
8447 (arguments `(#:substitutable? #f))
8448 (propagated-inputs
8449 `(("r-bsgenome" ,r-bsgenome)))
8450 (home-page
8451 "https://www.bioconductor.org/packages/BSgenome.Dmelanogaster.UCSC.dm3/")
8452 (synopsis "Full genome sequences for Fly")
8453 (description
8454 "This package provides full genome sequences for Drosophila
8455 melanogaster (Fly) as provided by UCSC (dm3, April 2006) and stored in
8456 Biostrings objects.")
8457 (license license:artistic2.0)))
8458
8459 (define-public r-motifrg
8460 (package
8461 (name "r-motifrg")
8462 (version "1.26.0")
8463 (source
8464 (origin
8465 (method url-fetch)
8466 (uri (bioconductor-uri "motifRG" version))
8467 (sha256
8468 (base32
8469 "1wxww6i0jgyapqclcwy0zzf9kqjvrvylr89z7yhg1izi7jnw2fka"))))
8470 (properties `((upstream-name . "motifRG")))
8471 (build-system r-build-system)
8472 (propagated-inputs
8473 `(("r-biostrings" ,r-biostrings)
8474 ("r-bsgenome" ,r-bsgenome)
8475 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8476 ("r-iranges" ,r-iranges)
8477 ("r-seqlogo" ,r-seqlogo)
8478 ("r-xvector" ,r-xvector)))
8479 (home-page "https://bioconductor.org/packages/motifRG")
8480 (synopsis "Discover motifs in high throughput sequencing data")
8481 (description
8482 "This package provides tools for discriminative motif discovery in high
8483 throughput genetic sequencing data sets using regression methods.")
8484 (license license:artistic2.0)))
8485
8486 (define-public r-qtl
8487 (package
8488 (name "r-qtl")
8489 (version "1.42-8")
8490 (source
8491 (origin
8492 (method url-fetch)
8493 (uri (string-append "mirror://cran/src/contrib/qtl_"
8494 version ".tar.gz"))
8495 (sha256
8496 (base32
8497 "1l528dwvfpdlr05imrrm4rq32axp6hld9nqm6mm43kn5n7z2f5k6"))))
8498 (build-system r-build-system)
8499 (home-page "http://rqtl.org/")
8500 (synopsis "R package for analyzing QTL experiments in genetics")
8501 (description "R/qtl is an extension library for the R statistics
8502 system. It is used to analyze experimental crosses for identifying
8503 genes contributing to variation in quantitative traits (so-called
8504 quantitative trait loci, QTLs).
8505
8506 Using a hidden Markov model, R/qtl allows to estimate genetic maps, to
8507 identify genotyping errors, and to perform single-QTL and two-QTL,
8508 two-dimensional genome scans.")
8509 (license license:gpl3)))
8510
8511 (define-public r-zlibbioc
8512 (package
8513 (name "r-zlibbioc")
8514 (version "1.28.0")
8515 (source (origin
8516 (method url-fetch)
8517 (uri (bioconductor-uri "zlibbioc" version))
8518 (sha256
8519 (base32
8520 "0bjvzy24kab7ank02cc1qk2ikcz4dllgf66wpsdl0d3zp4gn3l2h"))))
8521 (properties
8522 `((upstream-name . "zlibbioc")))
8523 (build-system r-build-system)
8524 (home-page "https://bioconductor.org/packages/zlibbioc")
8525 (synopsis "Provider for zlib-1.2.5 to R packages")
8526 (description "This package uses the source code of zlib-1.2.5 to create
8527 libraries for systems that do not have these available via other means.")
8528 (license license:artistic2.0)))
8529
8530 (define-public r-r4rna
8531 (package
8532 (name "r-r4rna")
8533 (version "0.1.4")
8534 (source
8535 (origin
8536 (method url-fetch)
8537 (uri (string-append "http://www.e-rna.org/r-chie/files/R4RNA_"
8538 version ".tar.gz"))
8539 (sha256
8540 (base32
8541 "1p0i78wh76jfgmn9jphbwwaz6yy6pipzfg08xs54cxavxg2j81p5"))))
8542 (build-system r-build-system)
8543 (propagated-inputs
8544 `(("r-optparse" ,r-optparse)
8545 ("r-rcolorbrewer" ,r-rcolorbrewer)))
8546 (home-page "http://www.e-rna.org/r-chie/index.cgi")
8547 (synopsis "Analysis framework for RNA secondary structure")
8548 (description
8549 "The R4RNA package aims to be a general framework for the analysis of RNA
8550 secondary structure and comparative analysis in R.")
8551 (license license:gpl3+)))
8552
8553 (define-public r-rhtslib
8554 (package
8555 (name "r-rhtslib")
8556 (version "1.14.0")
8557 (source
8558 (origin
8559 (method url-fetch)
8560 (uri (bioconductor-uri "Rhtslib" version))
8561 (sha256
8562 (base32
8563 "1h4q54f8za3aaxgy186zf2165sar5c3cgxkk44lq5hzx5pxkl5wn"))))
8564 (properties `((upstream-name . "Rhtslib")))
8565 (build-system r-build-system)
8566 (propagated-inputs
8567 `(("r-zlibbioc" ,r-zlibbioc)))
8568 (inputs
8569 `(("zlib" ,zlib)))
8570 (native-inputs
8571 `(("pkg-config" ,pkg-config)))
8572 (home-page "https://github.com/nhayden/Rhtslib")
8573 (synopsis "High-throughput sequencing library as an R package")
8574 (description
8575 "This package provides the HTSlib C library for high-throughput
8576 nucleotide sequence analysis. The package is primarily useful to developers
8577 of other R packages who wish to make use of HTSlib.")
8578 (license license:lgpl2.0+)))
8579
8580 (define-public r-bamsignals
8581 (package
8582 (name "r-bamsignals")
8583 (version "1.14.0")
8584 (source
8585 (origin
8586 (method url-fetch)
8587 (uri (bioconductor-uri "bamsignals" version))
8588 (sha256
8589 (base32
8590 "19irfx1y1izf903vq59wxsdbf88g143zy9l89gxqawh7jfxds8w8"))))
8591 (build-system r-build-system)
8592 (propagated-inputs
8593 `(("r-biocgenerics" ,r-biocgenerics)
8594 ("r-genomicranges" ,r-genomicranges)
8595 ("r-iranges" ,r-iranges)
8596 ("r-rcpp" ,r-rcpp)
8597 ("r-rhtslib" ,r-rhtslib)
8598 ("r-zlibbioc" ,r-zlibbioc)))
8599 (inputs
8600 `(("zlib" ,zlib)))
8601 (home-page "https://bioconductor.org/packages/bamsignals")
8602 (synopsis "Extract read count signals from bam files")
8603 (description
8604 "This package allows to efficiently obtain count vectors from indexed bam
8605 files. It counts the number of nucleotide sequence reads in given genomic
8606 ranges and it computes reads profiles and coverage profiles. It also handles
8607 paired-end data.")
8608 (license license:gpl2+)))
8609
8610 (define-public r-rcas
8611 (package
8612 (name "r-rcas")
8613 (version "1.8.0")
8614 (source (origin
8615 (method url-fetch)
8616 (uri (bioconductor-uri "RCAS" version))
8617 (sha256
8618 (base32
8619 "0ss5hcg2m7gjji6dd23zxa5bd5a7knwcnada4qs5q2l4clgk39ad"))))
8620 (build-system r-build-system)
8621 (propagated-inputs
8622 `(("r-annotationdbi" ,r-annotationdbi)
8623 ("r-biocgenerics" ,r-biocgenerics)
8624 ("r-biomart" ,r-biomart)
8625 ("r-biostrings" ,r-biostrings)
8626 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8627 ("r-cowplot" ,r-cowplot)
8628 ("r-data-table" ,r-data-table)
8629 ("r-dbi" ,r-dbi)
8630 ("r-dt" ,r-dt)
8631 ("r-genomation" ,r-genomation)
8632 ("r-genomeinfodb" ,r-genomeinfodb)
8633 ("r-genomicfeatures" ,r-genomicfeatures)
8634 ("r-genomicranges" ,r-genomicranges)
8635 ("r-ggplot2" ,r-ggplot2)
8636 ("r-ggseqlogo" ,r-ggseqlogo)
8637 ("r-knitr" ,r-knitr)
8638 ("r-motifrg" ,r-motifrg)
8639 ("r-org-hs-eg-db" ,r-org-hs-eg-db)
8640 ("r-pbapply" ,r-pbapply)
8641 ("r-pheatmap" ,r-pheatmap)
8642 ("r-plotly" ,r-plotly)
8643 ("r-plotrix" ,r-plotrix)
8644 ("r-proxy" ,r-proxy)
8645 ("r-rsqlite" ,r-rsqlite)
8646 ("r-rtracklayer" ,r-rtracklayer)
8647 ("r-rmarkdown" ,r-rmarkdown)
8648 ("r-s4vectors" ,r-s4vectors)
8649 ("r-topgo" ,r-topgo)))
8650 (synopsis "RNA-centric annotation system")
8651 (description
8652 "RCAS aims to be a standalone RNA-centric annotation system that provides
8653 intuitive reports and publication-ready graphics. This package provides the R
8654 library implementing most of the pipeline's features.")
8655 (home-page "https://github.com/BIMSBbioinfo/RCAS")
8656 (license license:artistic2.0)))
8657
8658 (define-public rcas-web
8659 (package
8660 (name "rcas-web")
8661 (version "0.0.5")
8662 (source
8663 (origin
8664 (method url-fetch)
8665 (uri (string-append "https://github.com/BIMSBbioinfo/rcas-web/"
8666 "releases/download/v" version
8667 "/rcas-web-" version ".tar.gz"))
8668 (sha256
8669 (base32
8670 "0igz7jpcf7cm9800zcag6p3gd1i649figrhbdba6cjkm8f4gfspr"))))
8671 (build-system gnu-build-system)
8672 (arguments
8673 `(#:phases
8674 (modify-phases %standard-phases
8675 (add-after 'install 'wrap-executable
8676 (lambda* (#:key inputs outputs #:allow-other-keys)
8677 (let* ((out (assoc-ref outputs "out"))
8678 (json (assoc-ref inputs "guile-json"))
8679 (redis (assoc-ref inputs "guile-redis"))
8680 (path (string-append
8681 json "/share/guile/site/2.2:"
8682 redis "/share/guile/site/2.2")))
8683 (wrap-program (string-append out "/bin/rcas-web")
8684 `("GUILE_LOAD_PATH" ":" = (,path))
8685 `("GUILE_LOAD_COMPILED_PATH" ":" = (,path))
8686 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
8687 #t)))))
8688 (inputs
8689 `(("r-minimal" ,r-minimal)
8690 ("r-rcas" ,r-rcas)
8691 ("guile-next" ,guile-2.2)
8692 ("guile-json" ,guile-json)
8693 ("guile-redis" ,guile2.2-redis)))
8694 (native-inputs
8695 `(("pkg-config" ,pkg-config)))
8696 (home-page "https://github.com/BIMSBbioinfo/rcas-web")
8697 (synopsis "Web interface for RNA-centric annotation system (RCAS)")
8698 (description "This package provides a simple web interface for the
8699 @dfn{RNA-centric annotation system} (RCAS).")
8700 (license license:agpl3+)))
8701
8702 (define-public r-mutationalpatterns
8703 (package
8704 (name "r-mutationalpatterns")
8705 (version "1.8.0")
8706 (source
8707 (origin
8708 (method url-fetch)
8709 (uri (bioconductor-uri "MutationalPatterns" version))
8710 (sha256
8711 (base32
8712 "0w9lg1zs106h6rqvy8mhikq6q6q9syw6c1prcxr38ssh85rcih12"))))
8713 (build-system r-build-system)
8714 (propagated-inputs
8715 `(("r-biocgenerics" ,r-biocgenerics)
8716 ("r-biostrings" ,r-biostrings)
8717 ;; These two packages are suggested packages
8718 ("r-bsgenome-hsapiens-1000g" ,r-bsgenome-hsapiens-1000genomes-hs37d5)
8719 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8720 ("r-genomicranges" ,r-genomicranges)
8721 ("r-genomeinfodb" ,r-genomeinfodb)
8722 ("r-ggplot2" ,r-ggplot2)
8723 ("r-iranges" ,r-iranges)
8724 ("r-nmf" ,r-nmf)
8725 ("r-plyr" ,r-plyr)
8726 ("r-pracma" ,r-pracma)
8727 ("r-reshape2" ,r-reshape2)
8728 ("r-cowplot" ,r-cowplot)
8729 ("r-ggdendro" ,r-ggdendro)
8730 ("r-s4vectors" ,r-s4vectors)
8731 ("r-summarizedexperiment" ,r-summarizedexperiment)
8732 ("r-variantannotation" ,r-variantannotation)))
8733 (home-page "https://bioconductor.org/packages/MutationalPatterns/")
8734 (synopsis "Extract and visualize mutational patterns in genomic data")
8735 (description "This package provides an extensive toolset for the
8736 characterization and visualization of a wide range of mutational patterns
8737 in SNV base substitution data.")
8738 (license license:expat)))
8739
8740 (define-public r-wgcna
8741 (package
8742 (name "r-wgcna")
8743 (version "1.66")
8744 (source
8745 (origin
8746 (method url-fetch)
8747 (uri (cran-uri "WGCNA" version))
8748 (sha256
8749 (base32
8750 "0rhnyhzfn93yp24jz9v6dzrmyizwzdw070a7idm0k33w1cm8sjqv"))))
8751 (properties `((upstream-name . "WGCNA")))
8752 (build-system r-build-system)
8753 (propagated-inputs
8754 `(("r-annotationdbi" ,r-annotationdbi)
8755 ("r-doparallel" ,r-doparallel)
8756 ("r-dynamictreecut" ,r-dynamictreecut)
8757 ("r-fastcluster" ,r-fastcluster)
8758 ("r-foreach" ,r-foreach)
8759 ("r-go-db" ,r-go-db)
8760 ("r-hmisc" ,r-hmisc)
8761 ("r-impute" ,r-impute)
8762 ("r-rcpp" ,r-rcpp)
8763 ("r-robust" ,r-robust)
8764 ("r-survival" ,r-survival)
8765 ("r-matrixstats" ,r-matrixstats)
8766 ("r-preprocesscore" ,r-preprocesscore)))
8767 (home-page
8768 "http://www.genetics.ucla.edu/labs/horvath/CoexpressionNetwork/Rpackages/WGCNA/")
8769 (synopsis "Weighted correlation network analysis")
8770 (description
8771 "This package provides functions necessary to perform Weighted
8772 Correlation Network Analysis on high-dimensional data. It includes functions
8773 for rudimentary data cleaning, construction and summarization of correlation
8774 networks, module identification and functions for relating both variables and
8775 modules to sample traits. It also includes a number of utility functions for
8776 data manipulation and visualization.")
8777 (license license:gpl2+)))
8778
8779 (define-public r-chipkernels
8780 (let ((commit "c9cfcacb626b1221094fb3490ea7bac0fd625372")
8781 (revision "1"))
8782 (package
8783 (name "r-chipkernels")
8784 (version (string-append "1.1-" revision "." (string-take commit 9)))
8785 (source
8786 (origin
8787 (method git-fetch)
8788 (uri (git-reference
8789 (url "https://github.com/ManuSetty/ChIPKernels.git")
8790 (commit commit)))
8791 (file-name (string-append name "-" version))
8792 (sha256
8793 (base32
8794 "14bj5qhjm1hsm9ay561nfbqi9wxsa7y487df2idsaaf6z10nw4v0"))))
8795 (build-system r-build-system)
8796 (propagated-inputs
8797 `(("r-iranges" ,r-iranges)
8798 ("r-xvector" ,r-xvector)
8799 ("r-biostrings" ,r-biostrings)
8800 ("r-bsgenome" ,r-bsgenome)
8801 ("r-gtools" ,r-gtools)
8802 ("r-genomicranges" ,r-genomicranges)
8803 ("r-sfsmisc" ,r-sfsmisc)
8804 ("r-kernlab" ,r-kernlab)
8805 ("r-s4vectors" ,r-s4vectors)
8806 ("r-biocgenerics" ,r-biocgenerics)))
8807 (home-page "https://github.com/ManuSetty/ChIPKernels")
8808 (synopsis "Build string kernels for DNA Sequence analysis")
8809 (description "ChIPKernels is an R package for building different string
8810 kernels used for DNA Sequence analysis. A dictionary of the desired kernel
8811 must be built and this dictionary can be used for determining kernels for DNA
8812 Sequences.")
8813 (license license:gpl2+))))
8814
8815 (define-public r-seqgl
8816 (package
8817 (name "r-seqgl")
8818 (version "1.1.4")
8819 (source
8820 (origin
8821 (method git-fetch)
8822 (uri (git-reference
8823 (url "https://github.com/ManuSetty/SeqGL.git")
8824 (commit version)))
8825 (file-name (git-file-name name version))
8826 (sha256
8827 (base32
8828 "1r6ywvhxl3ffv48lgj7sbd582mcc6dha3ksgc2qjlvjrnkbj3799"))))
8829 (build-system r-build-system)
8830 (propagated-inputs
8831 `(("r-biostrings" ,r-biostrings)
8832 ("r-chipkernels" ,r-chipkernels)
8833 ("r-genomicranges" ,r-genomicranges)
8834 ("r-spams" ,r-spams)
8835 ("r-wgcna" ,r-wgcna)
8836 ("r-fastcluster" ,r-fastcluster)))
8837 (home-page "https://github.com/ManuSetty/SeqGL")
8838 (synopsis "Group lasso for Dnase/ChIP-seq data")
8839 (description "SeqGL is a group lasso based algorithm to extract
8840 transcription factor sequence signals from ChIP, DNase and ATAC-seq profiles.
8841 This package presents a method which uses group lasso to discriminate between
8842 bound and non bound genomic regions to accurately identify transcription
8843 factors bound at the specific regions.")
8844 (license license:gpl2+)))
8845
8846 (define-public r-gkmsvm
8847 (package
8848 (name "r-gkmsvm")
8849 (version "0.79.0")
8850 (source
8851 (origin
8852 (method url-fetch)
8853 (uri (cran-uri "gkmSVM" version))
8854 (sha256
8855 (base32
8856 "04dakbgfvfalz4rm4fvvybp506dn5fbj5g86ybfhrc6wywjllsz3"))))
8857 (properties `((upstream-name . "gkmSVM")))
8858 (build-system r-build-system)
8859 (propagated-inputs
8860 `(("r-biocgenerics" ,r-biocgenerics)
8861 ("r-biostrings" ,r-biostrings)
8862 ("r-genomeinfodb" ,r-genomeinfodb)
8863 ("r-genomicranges" ,r-genomicranges)
8864 ("r-iranges" ,r-iranges)
8865 ("r-kernlab" ,r-kernlab)
8866 ("r-rcpp" ,r-rcpp)
8867 ("r-rocr" ,r-rocr)
8868 ("r-rtracklayer" ,r-rtracklayer)
8869 ("r-s4vectors" ,r-s4vectors)
8870 ("r-seqinr" ,r-seqinr)))
8871 (home-page "https://cran.r-project.org/web/packages/gkmSVM")
8872 (synopsis "Gapped-kmer support vector machine")
8873 (description
8874 "This R package provides tools for training gapped-kmer SVM classifiers
8875 for DNA and protein sequences. This package supports several sequence
8876 kernels, including: gkmSVM, kmer-SVM, mismatch kernel and wildcard kernel.")
8877 (license license:gpl2+)))
8878
8879 (define-public r-tximport
8880 (package
8881 (name "r-tximport")
8882 (version "1.10.1")
8883 (source (origin
8884 (method url-fetch)
8885 (uri (bioconductor-uri "tximport" version))
8886 (sha256
8887 (base32
8888 "16wp09dm0cpb4mc00nmglfb8ica7qb4a55vm8ajgzyagbpfdd44l"))))
8889 (build-system r-build-system)
8890 (home-page "https://bioconductor.org/packages/tximport")
8891 (synopsis "Import and summarize transcript-level estimates for gene-level analysis")
8892 (description
8893 "This package provides tools to import transcript-level abundance,
8894 estimated counts and transcript lengths, and to summarize them into matrices
8895 for use with downstream gene-level analysis packages. Average transcript
8896 length, weighted by sample-specific transcript abundance estimates, is
8897 provided as a matrix which can be used as an offset for different expression
8898 of gene-level counts.")
8899 (license license:gpl2+)))
8900
8901 (define-public r-rhdf5
8902 (package
8903 (name "r-rhdf5")
8904 (version "2.26.2")
8905 (source (origin
8906 (method url-fetch)
8907 (uri (bioconductor-uri "rhdf5" version))
8908 (sha256
8909 (base32
8910 "10zkw3k13wmvyif417gplyf6rwp2gpkjasw97lhwv2f9i32rry9l"))))
8911 (build-system r-build-system)
8912 (propagated-inputs
8913 `(("r-rhdf5lib" ,r-rhdf5lib)))
8914 (inputs
8915 `(("zlib" ,zlib)))
8916 (home-page "https://bioconductor.org/packages/rhdf5")
8917 (synopsis "HDF5 interface to R")
8918 (description
8919 "This R/Bioconductor package provides an interface between HDF5 and R.
8920 HDF5's main features are the ability to store and access very large and/or
8921 complex datasets and a wide variety of metadata on mass storage (disk) through
8922 a completely portable file format. The rhdf5 package is thus suited for the
8923 exchange of large and/or complex datasets between R and other software
8924 package, and for letting R applications work on datasets that are larger than
8925 the available RAM.")
8926 (license license:artistic2.0)))
8927
8928 (define-public r-annotationfilter
8929 (package
8930 (name "r-annotationfilter")
8931 (version "1.6.0")
8932 (source (origin
8933 (method url-fetch)
8934 (uri (bioconductor-uri "AnnotationFilter" version))
8935 (sha256
8936 (base32
8937 "0wrr10cxjzmxx46vjzq2nsf6xlqz1sqwx4xm0sk3d77ff8wmph4x"))))
8938 (properties
8939 `((upstream-name . "AnnotationFilter")))
8940 (build-system r-build-system)
8941 (propagated-inputs
8942 `(("r-genomicranges" ,r-genomicranges)
8943 ("r-lazyeval" ,r-lazyeval)))
8944 (home-page "https://github.com/Bioconductor/AnnotationFilter")
8945 (synopsis "Facilities for filtering Bioconductor annotation resources")
8946 (description
8947 "This package provides classes and other infrastructure to implement
8948 filters for manipulating Bioconductor annotation resources. The filters are
8949 used by @code{ensembldb}, @code{Organism.dplyr}, and other packages.")
8950 (license license:artistic2.0)))
8951
8952 (define-public emboss
8953 (package
8954 (name "emboss")
8955 (version "6.5.7")
8956 (source (origin
8957 (method url-fetch)
8958 (uri (string-append "ftp://emboss.open-bio.org/pub/EMBOSS/old/"
8959 (version-major+minor version) ".0/"
8960 "EMBOSS-" version ".tar.gz"))
8961 (sha256
8962 (base32
8963 "0vsmz96gc411yj2iyzdrsmg4l2n1nhgmp7vrgzlxx3xixv9xbf0q"))))
8964 (build-system gnu-build-system)
8965 (arguments
8966 `(#:configure-flags
8967 (list (string-append "--with-hpdf="
8968 (assoc-ref %build-inputs "libharu")))
8969 #:phases
8970 (modify-phases %standard-phases
8971 (add-after 'unpack 'fix-checks
8972 (lambda _
8973 ;; The PNGDRIVER tests check for the presence of libgd, libpng
8974 ;; and zlib, but assume that they are all found at the same
8975 ;; prefix.
8976 (substitute* "configure.in"
8977 (("CHECK_PNGDRIVER")
8978 "LIBS=\"$LIBS -lgd -lpng -lz -lm\"
8979 AC_DEFINE([PLD_png], [1], [Define to 1 if PNG support is available])
8980 AM_CONDITIONAL(AMPNG, true)"))
8981 #t))
8982 (add-after 'fix-checks 'disable-update-check
8983 (lambda _
8984 ;; At build time there is no connection to the Internet, so
8985 ;; looking for updates will not work.
8986 (substitute* "Makefile.am"
8987 (("\\$\\(bindir\\)/embossupdate") ""))
8988 #t))
8989 (add-after 'disable-update-check 'autogen
8990 (lambda _ (invoke "autoreconf" "-vif") #t)))))
8991 (inputs
8992 `(("perl" ,perl)
8993 ("libpng" ,libpng)
8994 ("gd" ,gd)
8995 ("libx11" ,libx11)
8996 ("libharu" ,libharu)
8997 ("zlib" ,zlib)))
8998 (native-inputs
8999 `(("autoconf" ,autoconf)
9000 ("automake" ,automake)
9001 ("libtool" ,libtool)
9002 ("pkg-config" ,pkg-config)))
9003 (home-page "http://emboss.sourceforge.net")
9004 (synopsis "Molecular biology analysis suite")
9005 (description "EMBOSS is the \"European Molecular Biology Open Software
9006 Suite\". EMBOSS is an analysis package specially developed for the needs of
9007 the molecular biology (e.g. EMBnet) user community. The software
9008 automatically copes with data in a variety of formats and even allows
9009 transparent retrieval of sequence data from the web. It also provides a
9010 number of libraries for the development of software in the field of molecular
9011 biology. EMBOSS also integrates a range of currently available packages and
9012 tools for sequence analysis into a seamless whole.")
9013 (license license:gpl2+)))
9014
9015 (define-public bits
9016 (let ((revision "1")
9017 (commit "3cc4567896d9d6442923da944beb704750a08d2d"))
9018 (package
9019 (name "bits")
9020 ;; The version is 2.13.0 even though no release archives have been
9021 ;; published as yet.
9022 (version (string-append "2.13.0-" revision "." (string-take commit 9)))
9023 (source (origin
9024 (method git-fetch)
9025 (uri (git-reference
9026 (url "https://github.com/arq5x/bits.git")
9027 (commit commit)))
9028 (file-name (string-append name "-" version "-checkout"))
9029 (sha256
9030 (base32
9031 "17n2kffk4kmhivd8c98g2vr6y1s23vbg4sxlxs689wni66797hbs"))))
9032 (build-system gnu-build-system)
9033 (arguments
9034 `(#:tests? #f ;no tests included
9035 #:phases
9036 (modify-phases %standard-phases
9037 (delete 'configure)
9038 (add-after 'unpack 'remove-cuda
9039 (lambda _
9040 (substitute* "Makefile"
9041 ((".*_cuda") "")
9042 (("(bits_test_intersections) \\\\" _ match) match))
9043 #t))
9044 (replace 'install
9045 (lambda* (#:key outputs #:allow-other-keys)
9046 (copy-recursively
9047 "bin" (string-append (assoc-ref outputs "out") "/bin"))
9048 #t)))))
9049 (inputs
9050 `(("gsl" ,gsl)
9051 ("zlib" ,zlib)))
9052 (home-page "https://github.com/arq5x/bits")
9053 (synopsis "Implementation of binary interval search algorithm")
9054 (description "This package provides an implementation of the
9055 BITS (Binary Interval Search) algorithm, an approach to interval set
9056 intersection. It is especially suited for the comparison of diverse genomic
9057 datasets and the exploration of large datasets of genome
9058 intervals (e.g. genes, sequence alignments).")
9059 (license license:gpl2))))
9060
9061 (define-public piranha
9062 ;; There is no release tarball for the latest version. The latest commit is
9063 ;; older than one year at the time of this writing.
9064 (let ((revision "1")
9065 (commit "0466d364b71117d01e4471b74c514436cc281233"))
9066 (package
9067 (name "piranha")
9068 (version (string-append "1.2.1-" revision "." (string-take commit 9)))
9069 (source (origin
9070 (method git-fetch)
9071 (uri (git-reference
9072 (url "https://github.com/smithlabcode/piranha.git")
9073 (commit commit)))
9074 (file-name (git-file-name name version))
9075 (sha256
9076 (base32
9077 "117dc0zf20c61jam69sk4abl57ah6yi6i7qra7d7y5zrbgk12q5n"))))
9078 (build-system gnu-build-system)
9079 (arguments
9080 `(#:test-target "test"
9081 #:phases
9082 (modify-phases %standard-phases
9083 (add-after 'unpack 'copy-smithlab-cpp
9084 (lambda* (#:key inputs #:allow-other-keys)
9085 (for-each (lambda (file)
9086 (install-file file "./src/smithlab_cpp/"))
9087 (find-files (assoc-ref inputs "smithlab-cpp")))
9088 #t))
9089 (add-after 'install 'install-to-store
9090 (lambda* (#:key outputs #:allow-other-keys)
9091 (let* ((out (assoc-ref outputs "out"))
9092 (bin (string-append out "/bin")))
9093 (for-each (lambda (file)
9094 (install-file file bin))
9095 (find-files "bin" ".*")))
9096 #t)))
9097 #:configure-flags
9098 (list (string-append "--with-bam_tools_headers="
9099 (assoc-ref %build-inputs "bamtools") "/include/bamtools")
9100 (string-append "--with-bam_tools_library="
9101 (assoc-ref %build-inputs "bamtools") "/lib/bamtools"))))
9102 (inputs
9103 `(("bamtools" ,bamtools)
9104 ("samtools" ,samtools-0.1)
9105 ("gsl" ,gsl)
9106 ("smithlab-cpp"
9107 ,(let ((commit "3723e2db438c51501d0423429ff396c3035ba46a"))
9108 (origin
9109 (method git-fetch)
9110 (uri (git-reference
9111 (url "https://github.com/smithlabcode/smithlab_cpp.git")
9112 (commit commit)))
9113 (file-name (string-append "smithlab_cpp-" commit "-checkout"))
9114 (sha256
9115 (base32
9116 "0l4gvbwslw5ngziskja41c00x1r06l3yidv7y0xw9djibhykzy0g")))))))
9117 (native-inputs
9118 `(("python" ,python-2)))
9119 (home-page "https://github.com/smithlabcode/piranha")
9120 (synopsis "Peak-caller for CLIP-seq and RIP-seq data")
9121 (description
9122 "Piranha is a peak-caller for genomic data produced by CLIP-seq and
9123 RIP-seq experiments. It takes input in BED or BAM format and identifies
9124 regions of statistically significant read enrichment. Additional covariates
9125 may optionally be provided to further inform the peak-calling process.")
9126 (license license:gpl3+))))
9127
9128 (define-public pepr
9129 (package
9130 (name "pepr")
9131 (version "1.0.9")
9132 (source (origin
9133 (method url-fetch)
9134 (uri (string-append "https://pypi.python.org/packages/source/P"
9135 "/PePr/PePr-" version ".tar.gz"))
9136 (sha256
9137 (base32
9138 "0qxjfdpl1b1y53nccws2d85f6k74zwmx8y8sd9rszcqhfayx6gdx"))))
9139 (build-system python-build-system)
9140 (arguments
9141 `(#:python ,python-2 ; python2 only
9142 #:tests? #f)) ; no tests included
9143 (propagated-inputs
9144 `(("python2-numpy" ,python2-numpy)
9145 ("python2-scipy" ,python2-scipy)
9146 ("python2-pysam" ,python2-pysam)))
9147 (home-page "https://github.com/shawnzhangyx/PePr")
9148 (synopsis "Peak-calling and prioritization pipeline for ChIP-Seq data")
9149 (description
9150 "PePr is a ChIP-Seq peak calling or differential binding analysis tool
9151 that is primarily designed for data with biological replicates. It uses a
9152 negative binomial distribution to model the read counts among the samples in
9153 the same group, and look for consistent differences between ChIP and control
9154 group or two ChIP groups run under different conditions.")
9155 (license license:gpl3+)))
9156
9157 (define-public filevercmp
9158 (let ((commit "1a9b779b93d0b244040274794d402106907b71b7"))
9159 (package
9160 (name "filevercmp")
9161 (version (string-append "0-1." (string-take commit 7)))
9162 (source (origin
9163 (method git-fetch)
9164 (uri (git-reference
9165 (url "https://github.com/ekg/filevercmp.git")
9166 (commit commit)))
9167 (file-name (git-file-name name commit))
9168 (sha256
9169 (base32
9170 "1j9vxsy0y050v59h0q1d6501fcw1kjvj0d18l1xk2zyg0jzj247c"))))
9171 (build-system gnu-build-system)
9172 (arguments
9173 `(#:tests? #f ; There are no tests to run.
9174 #:phases
9175 (modify-phases %standard-phases
9176 (delete 'configure) ; There is no configure phase.
9177 (replace 'install
9178 (lambda* (#:key outputs #:allow-other-keys)
9179 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
9180 (install-file "filevercmp" bin)
9181 #t))))))
9182 (home-page "https://github.com/ekg/filevercmp")
9183 (synopsis "This program compares version strings")
9184 (description "This program compares version strings. It intends to be a
9185 replacement for strverscmp.")
9186 (license license:gpl3+))))
9187
9188 (define-public multiqc
9189 (package
9190 (name "multiqc")
9191 (version "1.5")
9192 (source
9193 (origin
9194 (method url-fetch)
9195 (uri (pypi-uri "multiqc" version))
9196 (sha256
9197 (base32
9198 "02iihfl0w0hpnr4pa0sbd1y9qxrg3ycyhjp5lidkcrqh1lmzs3zy"))))
9199 (build-system python-build-system)
9200 (propagated-inputs
9201 `(("python-jinja2" ,python-jinja2)
9202 ("python-simplejson" ,python-simplejson)
9203 ("python-pyyaml" ,python-pyyaml)
9204 ("python-click" ,python-click)
9205 ("python-spectra" ,python-spectra)
9206 ("python-requests" ,python-requests)
9207 ("python-markdown" ,python-markdown)
9208 ("python-lzstring" ,python-lzstring)
9209 ("python-matplotlib" ,python-matplotlib)
9210 ("python-numpy" ,python-numpy)
9211 ;; MultQC checks for the presence of nose at runtime.
9212 ("python-nose" ,python-nose)))
9213 (arguments
9214 `(#:phases
9215 (modify-phases %standard-phases
9216 (add-after 'unpack 'relax-requirements
9217 (lambda _
9218 (substitute* "setup.py"
9219 ;; MultiQC 1.5 ‘requires’ a version of python-matplotlib older
9220 ;; than the one in Guix, but should work fine with 2.2.2.
9221 ;; See <https://github.com/ewels/MultiQC/issues/725> and
9222 ;; <https://github.com/ewels/MultiQC/issues/732> for details.
9223 (("['\"]matplotlib.*?['\"]")
9224 "'matplotlib'"))
9225 #t)))))
9226 (home-page "http://multiqc.info")
9227 (synopsis "Aggregate bioinformatics analysis reports")
9228 (description
9229 "MultiQC is a tool to aggregate bioinformatics results across many
9230 samples into a single report. It contains modules for a large number of
9231 common bioinformatics tools.")
9232 (license license:gpl3+)))
9233
9234 (define-public r-chipseq
9235 (package
9236 (name "r-chipseq")
9237 (version "1.32.0")
9238 (source
9239 (origin
9240 (method url-fetch)
9241 (uri (bioconductor-uri "chipseq" version))
9242 (sha256
9243 (base32
9244 "1pp1rm5fs3hlar5x4dl3a3b4gara7qwf81dbvka6r1n78hrf9x1b"))))
9245 (build-system r-build-system)
9246 (propagated-inputs
9247 `(("r-biocgenerics" ,r-biocgenerics)
9248 ("r-genomicranges" ,r-genomicranges)
9249 ("r-iranges" ,r-iranges)
9250 ("r-lattice" ,r-lattice)
9251 ("r-s4vectors" ,r-s4vectors)
9252 ("r-shortread" ,r-shortread)))
9253 (home-page "https://bioconductor.org/packages/chipseq")
9254 (synopsis "Package for analyzing ChIPseq data")
9255 (description
9256 "This package provides tools for processing short read data from ChIPseq
9257 experiments.")
9258 (license license:artistic2.0)))
9259
9260 (define-public r-copyhelper
9261 (package
9262 (name "r-copyhelper")
9263 (version "1.6.0")
9264 (source
9265 (origin
9266 (method url-fetch)
9267 (uri (string-append "https://bioconductor.org/packages/release/"
9268 "data/experiment/src/contrib/CopyhelpeR_"
9269 version ".tar.gz"))
9270 (sha256
9271 (base32
9272 "0x7cyynjmxls9as2gg0iyp9x5fpalxmdjq914ss7i84i9zyk5bhq"))))
9273 (properties `((upstream-name . "CopyhelpeR")))
9274 (build-system r-build-system)
9275 (home-page "https://bioconductor.org/packages/CopyhelpeR/")
9276 (synopsis "Helper files for CopywriteR")
9277 (description
9278 "This package contains the helper files that are required to run the
9279 Bioconductor package CopywriteR. It contains pre-assembled 1kb bin GC-content
9280 and mappability files for the reference genomes hg18, hg19, hg38, mm9 and
9281 mm10. In addition, it contains a blacklist filter to remove regions that
9282 display copy number variation. Files are stored as GRanges objects from the
9283 GenomicRanges Bioconductor package.")
9284 (license license:gpl2)))
9285
9286 (define-public r-copywriter
9287 (package
9288 (name "r-copywriter")
9289 (version "2.14.1")
9290 (source
9291 (origin
9292 (method url-fetch)
9293 (uri (bioconductor-uri "CopywriteR" version))
9294 (sha256
9295 (base32
9296 "1hbiw0m9hmx4na9v502pxf8y5wvxzr68r4d3fqr2755gxx86qck6"))))
9297 (properties `((upstream-name . "CopywriteR")))
9298 (build-system r-build-system)
9299 (propagated-inputs
9300 `(("r-biocparallel" ,r-biocparallel)
9301 ("r-chipseq" ,r-chipseq)
9302 ("r-copyhelper" ,r-copyhelper)
9303 ("r-data-table" ,r-data-table)
9304 ("r-dnacopy" ,r-dnacopy)
9305 ("r-futile-logger" ,r-futile-logger)
9306 ("r-genomeinfodb" ,r-genomeinfodb)
9307 ("r-genomicalignments" ,r-genomicalignments)
9308 ("r-genomicranges" ,r-genomicranges)
9309 ("r-gtools" ,r-gtools)
9310 ("r-iranges" ,r-iranges)
9311 ("r-matrixstats" ,r-matrixstats)
9312 ("r-rsamtools" ,r-rsamtools)
9313 ("r-s4vectors" ,r-s4vectors)))
9314 (home-page "https://github.com/PeeperLab/CopywriteR")
9315 (synopsis "Copy number information from targeted sequencing")
9316 (description
9317 "CopywriteR extracts DNA copy number information from targeted sequencing
9318 by utilizing off-target reads. It allows for extracting uniformly distributed
9319 copy number information, can be used without reference, and can be applied to
9320 sequencing data obtained from various techniques including chromatin
9321 immunoprecipitation and target enrichment on small gene panels. Thereby,
9322 CopywriteR constitutes a widely applicable alternative to available copy
9323 number detection tools.")
9324 (license license:gpl2)))
9325
9326 (define-public r-methylkit
9327 (package
9328 (name "r-methylkit")
9329 (version "1.8.1")
9330 (source (origin
9331 (method url-fetch)
9332 (uri (bioconductor-uri "methylKit" version))
9333 (sha256
9334 (base32
9335 "1zcfwy7i10aqgnf7r0c41hakb5aai3s3n9y8pc6a98vimz51ly2z"))))
9336 (properties `((upstream-name . "methylKit")))
9337 (build-system r-build-system)
9338 (propagated-inputs
9339 `(("r-data-table" ,r-data-table)
9340 ("r-emdbook" ,r-emdbook)
9341 ("r-fastseg" ,r-fastseg)
9342 ("r-genomeinfodb" ,r-genomeinfodb)
9343 ("r-genomicranges" ,r-genomicranges)
9344 ("r-gtools" ,r-gtools)
9345 ("r-iranges" ,r-iranges)
9346 ("r-kernsmooth" ,r-kernsmooth)
9347 ("r-limma" ,r-limma)
9348 ("r-mclust" ,r-mclust)
9349 ("r-qvalue" ,r-qvalue)
9350 ("r-r-utils" ,r-r-utils)
9351 ("r-rcpp" ,r-rcpp)
9352 ("r-rhtslib" ,r-rhtslib)
9353 ("r-rsamtools" ,r-rsamtools)
9354 ("r-rtracklayer" ,r-rtracklayer)
9355 ("r-s4vectors" ,r-s4vectors)
9356 ("r-zlibbioc" ,r-zlibbioc)))
9357 (inputs
9358 `(("zlib" ,zlib)))
9359 (home-page "https://github.com/al2na/methylKit")
9360 (synopsis
9361 "DNA methylation analysis from high-throughput bisulfite sequencing results")
9362 (description
9363 "MethylKit is an R package for DNA methylation analysis and annotation
9364 from high-throughput bisulfite sequencing. The package is designed to deal
9365 with sequencing data from @dfn{Reduced representation bisulfite
9366 sequencing} (RRBS) and its variants, but also target-capture methods and whole
9367 genome bisulfite sequencing. It also has functions to analyze base-pair
9368 resolution 5hmC data from experimental protocols such as oxBS-Seq and
9369 TAB-Seq.")
9370 (license license:artistic2.0)))
9371
9372 (define-public r-sva
9373 (package
9374 (name "r-sva")
9375 (version "3.30.1")
9376 (source
9377 (origin
9378 (method url-fetch)
9379 (uri (bioconductor-uri "sva" version))
9380 (sha256
9381 (base32
9382 "0czja4c5jxa0g3fspi90nyajqmvzb29my4ykv2wi66h43f5dlwhq"))))
9383 (build-system r-build-system)
9384 (propagated-inputs
9385 `(("r-genefilter" ,r-genefilter)
9386 ("r-mgcv" ,r-mgcv)
9387 ("r-biocparallel" ,r-biocparallel)
9388 ("r-matrixstats" ,r-matrixstats)
9389 ("r-limma" ,r-limma)))
9390 (home-page "https://bioconductor.org/packages/sva")
9391 (synopsis "Surrogate variable analysis")
9392 (description
9393 "This package contains functions for removing batch effects and other
9394 unwanted variation in high-throughput experiment. It also contains functions
9395 for identifying and building surrogate variables for high-dimensional data
9396 sets. Surrogate variables are covariates constructed directly from
9397 high-dimensional data like gene expression/RNA sequencing/methylation/brain
9398 imaging data that can be used in subsequent analyses to adjust for unknown,
9399 unmodeled, or latent sources of noise.")
9400 (license license:artistic2.0)))
9401
9402 (define-public r-seqminer
9403 (package
9404 (name "r-seqminer")
9405 (version "7.1")
9406 (source
9407 (origin
9408 (method url-fetch)
9409 (uri (cran-uri "seqminer" version))
9410 (sha256
9411 (base32
9412 "1jydcpkw4rwfp983j83kipvsvr10as9pb49zzn3c2v09k1gh3ymy"))))
9413 (build-system r-build-system)
9414 (inputs
9415 `(("zlib" ,zlib)))
9416 (home-page "http://seqminer.genomic.codes")
9417 (synopsis "Read nucleotide sequence data (VCF, BCF, and METAL formats)")
9418 (description
9419 "This package provides tools to integrate nucleotide sequencing
9420 data (variant call format, e.g. VCF or BCF) or meta-analysis results in R.")
9421 ;; Any version of the GPL is acceptable
9422 (license (list license:gpl2+ license:gpl3+))))
9423
9424 (define-public r-raremetals2
9425 (package
9426 (name "r-raremetals2")
9427 (version "0.1")
9428 (source
9429 (origin
9430 (method url-fetch)
9431 (uri (string-append "http://genome.sph.umich.edu/w/images/"
9432 "b/b7/RareMETALS2_" version ".tar.gz"))
9433 (sha256
9434 (base32
9435 "0z5ljcgvnm06ja9lm85a3cniq7slxcy37aqqkxrdidr79an5fs4s"))))
9436 (properties `((upstream-name . "RareMETALS2")))
9437 (build-system r-build-system)
9438 (propagated-inputs
9439 `(("r-seqminer" ,r-seqminer)
9440 ("r-mvtnorm" ,r-mvtnorm)
9441 ("r-mass" ,r-mass)
9442 ("r-compquadform" ,r-compquadform)
9443 ("r-getopt" ,r-getopt)))
9444 (home-page "http://genome.sph.umich.edu/wiki/RareMETALS2")
9445 (synopsis "Analyze gene-level association tests for binary trait")
9446 (description
9447 "The R package rareMETALS2 is an extension of the R package rareMETALS.
9448 It was designed to meta-analyze gene-level association tests for binary trait.
9449 While rareMETALS offers a near-complete solution for meta-analysis of
9450 gene-level tests for quantitative trait, it does not offer the optimal
9451 solution for binary trait. The package rareMETALS2 offers improved features
9452 for analyzing gene-level association tests in meta-analyses for binary
9453 trait.")
9454 (license license:gpl3)))
9455
9456 (define-public r-maldiquant
9457 (package
9458 (name "r-maldiquant")
9459 (version "1.18")
9460 (source
9461 (origin
9462 (method url-fetch)
9463 (uri (cran-uri "MALDIquant" version))
9464 (sha256
9465 (base32
9466 "18nl214xjsxkcpbg79jkmw0yznwm5szyh2qb84n7ip46mm779ha6"))))
9467 (properties `((upstream-name . "MALDIquant")))
9468 (build-system r-build-system)
9469 (home-page "https://cran.r-project.org/web/packages/MALDIquant")
9470 (synopsis "Quantitative analysis of mass spectrometry data")
9471 (description
9472 "This package provides a complete analysis pipeline for matrix-assisted
9473 laser desorption/ionization-time-of-flight (MALDI-TOF) and other
9474 two-dimensional mass spectrometry data. In addition to commonly used plotting
9475 and processing methods it includes distinctive features, namely baseline
9476 subtraction methods such as morphological filters (TopHat) or the
9477 statistics-sensitive non-linear iterative peak-clipping algorithm (SNIP), peak
9478 alignment using warping functions, handling of replicated measurements as well
9479 as allowing spectra with different resolutions.")
9480 (license license:gpl3+)))
9481
9482 (define-public r-protgenerics
9483 (package
9484 (name "r-protgenerics")
9485 (version "1.14.0")
9486 (source
9487 (origin
9488 (method url-fetch)
9489 (uri (bioconductor-uri "ProtGenerics" version))
9490 (sha256
9491 (base32
9492 "053mmxhzncqgigl2iqjlq56qzimlw2zzw31wpzw19rf7rld1vi3b"))))
9493 (properties `((upstream-name . "ProtGenerics")))
9494 (build-system r-build-system)
9495 (home-page "https://github.com/lgatto/ProtGenerics")
9496 (synopsis "S4 generic functions for proteomics infrastructure")
9497 (description
9498 "This package provides S4 generic functions needed by Bioconductor
9499 proteomics packages.")
9500 (license license:artistic2.0)))
9501
9502 (define-public r-mzr
9503 (package
9504 (name "r-mzr")
9505 (version "2.16.1")
9506 (source
9507 (origin
9508 (method url-fetch)
9509 (uri (bioconductor-uri "mzR" version))
9510 (sha256
9511 (base32
9512 "0mlwg646k49klxrznckzfv54a9mz6irj42fqpaaa0xjm6cw2lwaa"))
9513 (modules '((guix build utils)))
9514 (snippet
9515 '(begin
9516 (delete-file-recursively "src/boost")
9517 #t))))
9518 (properties `((upstream-name . "mzR")))
9519 (build-system r-build-system)
9520 (arguments
9521 `(#:phases
9522 (modify-phases %standard-phases
9523 (add-after 'unpack 'use-system-boost
9524 (lambda _
9525 (substitute* "src/Makevars"
9526 (("\\./boost/libs.*") "")
9527 (("ARCH_OBJS=" line)
9528 (string-append line
9529 "\nARCH_LIBS=-lboost_system -lboost_regex \
9530 -lboost_iostreams -lboost_thread -lboost_filesystem -lboost_chrono\n")))
9531 #t)))))
9532 (inputs
9533 `(("boost" ,boost) ; use this instead of the bundled boost sources
9534 ("zlib" ,zlib)))
9535 (propagated-inputs
9536 `(("r-biobase" ,r-biobase)
9537 ("r-biocgenerics" ,r-biocgenerics)
9538 ("r-ncdf4" ,r-ncdf4)
9539 ("r-protgenerics" ,r-protgenerics)
9540 ("r-rcpp" ,r-rcpp)
9541 ("r-rhdf5lib" ,r-rhdf5lib)
9542 ("r-zlibbioc" ,r-zlibbioc)))
9543 (home-page "https://github.com/sneumann/mzR/")
9544 (synopsis "Parser for mass spectrometry data files")
9545 (description
9546 "The mzR package provides a unified API to the common file formats and
9547 parsers available for mass spectrometry data. It comes with a wrapper for the
9548 ISB random access parser for mass spectrometry mzXML, mzData and mzML files.
9549 The package contains the original code written by the ISB, and a subset of the
9550 proteowizard library for mzML and mzIdentML. The netCDF reading code has
9551 previously been used in XCMS.")
9552 (license license:artistic2.0)))
9553
9554 (define-public r-affyio
9555 (package
9556 (name "r-affyio")
9557 (version "1.52.0")
9558 (source
9559 (origin
9560 (method url-fetch)
9561 (uri (bioconductor-uri "affyio" version))
9562 (sha256
9563 (base32
9564 "1s4zp1211vf0krxzch9v3q3r6vs8hihqppq18i2fpvwlknfja7c1"))))
9565 (build-system r-build-system)
9566 (propagated-inputs
9567 `(("r-zlibbioc" ,r-zlibbioc)))
9568 (inputs
9569 `(("zlib" ,zlib)))
9570 (home-page "https://github.com/bmbolstad/affyio")
9571 (synopsis "Tools for parsing Affymetrix data files")
9572 (description
9573 "This package provides routines for parsing Affymetrix data files based
9574 upon file format information. The primary focus is on accessing the CEL and
9575 CDF file formats.")
9576 (license license:lgpl2.0+)))
9577
9578 (define-public r-affy
9579 (package
9580 (name "r-affy")
9581 (version "1.60.0")
9582 (source
9583 (origin
9584 (method url-fetch)
9585 (uri (bioconductor-uri "affy" version))
9586 (sha256
9587 (base32
9588 "0x8h4fk2igv7vykqfvf6v9whmx3344v5rf3gyfajd431xkjldz6k"))))
9589 (build-system r-build-system)
9590 (propagated-inputs
9591 `(("r-affyio" ,r-affyio)
9592 ("r-biobase" ,r-biobase)
9593 ("r-biocgenerics" ,r-biocgenerics)
9594 ("r-biocmanager" ,r-biocmanager)
9595 ("r-preprocesscore" ,r-preprocesscore)
9596 ("r-zlibbioc" ,r-zlibbioc)))
9597 (inputs
9598 `(("zlib" ,zlib)))
9599 (home-page "https://bioconductor.org/packages/affy")
9600 (synopsis "Methods for affymetrix oligonucleotide arrays")
9601 (description
9602 "This package contains functions for exploratory oligonucleotide array
9603 analysis.")
9604 (license license:lgpl2.0+)))
9605
9606 (define-public r-vsn
9607 (package
9608 (name "r-vsn")
9609 (version "3.50.0")
9610 (source
9611 (origin
9612 (method url-fetch)
9613 (uri (bioconductor-uri "vsn" version))
9614 (sha256
9615 (base32
9616 "1g6qkpykw99jm2wv2i61dg2ffwk0n8fm4s5pm2q4c024vw5c9b69"))))
9617 (build-system r-build-system)
9618 (propagated-inputs
9619 `(("r-affy" ,r-affy)
9620 ("r-biobase" ,r-biobase)
9621 ("r-ggplot2" ,r-ggplot2)
9622 ("r-lattice" ,r-lattice)
9623 ("r-limma" ,r-limma)))
9624 (home-page "https://bioconductor.org/packages/release/bioc/html/vsn.html")
9625 (synopsis "Variance stabilization and calibration for microarray data")
9626 (description
9627 "The package implements a method for normalising microarray intensities,
9628 and works for single- and multiple-color arrays. It can also be used for data
9629 from other technologies, as long as they have similar format. The method uses
9630 a robust variant of the maximum-likelihood estimator for an
9631 additive-multiplicative error model and affine calibration. The model
9632 incorporates data calibration step (a.k.a. normalization), a model for the
9633 dependence of the variance on the mean intensity and a variance stabilizing
9634 data transformation. Differences between transformed intensities are
9635 analogous to \"normalized log-ratios\". However, in contrast to the latter,
9636 their variance is independent of the mean, and they are usually more sensitive
9637 and specific in detecting differential transcription.")
9638 (license license:artistic2.0)))
9639
9640 (define-public r-mzid
9641 (package
9642 (name "r-mzid")
9643 (version "1.20.1")
9644 (source
9645 (origin
9646 (method url-fetch)
9647 (uri (bioconductor-uri "mzID" version))
9648 (sha256
9649 (base32
9650 "15yd4bdxprw3kg7zj2k652y3yr3si781iw28jqvnkm0gsc23rd0c"))))
9651 (properties `((upstream-name . "mzID")))
9652 (build-system r-build-system)
9653 (propagated-inputs
9654 `(("r-doparallel" ,r-doparallel)
9655 ("r-foreach" ,r-foreach)
9656 ("r-iterators" ,r-iterators)
9657 ("r-plyr" ,r-plyr)
9658 ("r-protgenerics" ,r-protgenerics)
9659 ("r-rcpp" ,r-rcpp)
9660 ("r-xml" ,r-xml)))
9661 (home-page "https://bioconductor.org/packages/mzID")
9662 (synopsis "Parser for mzIdentML files")
9663 (description
9664 "This package provides a parser for mzIdentML files implemented using the
9665 XML package. The parser tries to be general and able to handle all types of
9666 mzIdentML files with the drawback of having less pretty output than a vendor
9667 specific parser.")
9668 (license license:gpl2+)))
9669
9670 (define-public r-pcamethods
9671 (package
9672 (name "r-pcamethods")
9673 (version "1.74.0")
9674 (source
9675 (origin
9676 (method url-fetch)
9677 (uri (bioconductor-uri "pcaMethods" version))
9678 (sha256
9679 (base32
9680 "0ik82s9bsdj4a1mmv0a3k6yisa92mxx7maf3dvip1r8gqlm3dyng"))))
9681 (properties `((upstream-name . "pcaMethods")))
9682 (build-system r-build-system)
9683 (propagated-inputs
9684 `(("r-biobase" ,r-biobase)
9685 ("r-biocgenerics" ,r-biocgenerics)
9686 ("r-mass" ,r-mass)
9687 ("r-rcpp" ,r-rcpp)))
9688 (home-page "https://github.com/hredestig/pcamethods")
9689 (synopsis "Collection of PCA methods")
9690 (description
9691 "This package provides Bayesian PCA, Probabilistic PCA, Nipals PCA,
9692 Inverse Non-Linear PCA and the conventional SVD PCA. A cluster based method
9693 for missing value estimation is included for comparison. BPCA, PPCA and
9694 NipalsPCA may be used to perform PCA on incomplete data as well as for
9695 accurate missing value estimation. A set of methods for printing and plotting
9696 the results is also provided. All PCA methods make use of the same data
9697 structure (pcaRes) to provide a common interface to the PCA results.")
9698 (license license:gpl3+)))
9699
9700 (define-public r-msnbase
9701 (package
9702 (name "r-msnbase")
9703 (version "2.8.3")
9704 (source
9705 (origin
9706 (method url-fetch)
9707 (uri (bioconductor-uri "MSnbase" version))
9708 (sha256
9709 (base32
9710 "1kl1d7byphnfpmbl5fzbgs68dxskhpsdyx7ka51bpfn0nv3pp492"))))
9711 (properties `((upstream-name . "MSnbase")))
9712 (build-system r-build-system)
9713 (propagated-inputs
9714 `(("r-affy" ,r-affy)
9715 ("r-biobase" ,r-biobase)
9716 ("r-biocgenerics" ,r-biocgenerics)
9717 ("r-biocparallel" ,r-biocparallel)
9718 ("r-digest" ,r-digest)
9719 ("r-ggplot2" ,r-ggplot2)
9720 ("r-impute" ,r-impute)
9721 ("r-iranges" ,r-iranges)
9722 ("r-lattice" ,r-lattice)
9723 ("r-maldiquant" ,r-maldiquant)
9724 ("r-mass" ,r-mass)
9725 ("r-mzid" ,r-mzid)
9726 ("r-mzr" ,r-mzr)
9727 ("r-pcamethods" ,r-pcamethods)
9728 ("r-plyr" ,r-plyr)
9729 ("r-preprocesscore" ,r-preprocesscore)
9730 ("r-protgenerics" ,r-protgenerics)
9731 ("r-rcpp" ,r-rcpp)
9732 ("r-s4vectors" ,r-s4vectors)
9733 ("r-scales" ,r-scales)
9734 ("r-vsn" ,r-vsn)
9735 ("r-xml" ,r-xml)))
9736 (home-page "https://github.com/lgatto/MSnbase")
9737 (synopsis "Base functions and classes for MS-based proteomics")
9738 (description
9739 "This package provides basic plotting, data manipulation and processing
9740 of mass spectrometry based proteomics data.")
9741 (license license:artistic2.0)))
9742
9743 (define-public r-msnid
9744 (package
9745 (name "r-msnid")
9746 (version "1.16.1")
9747 (source
9748 (origin
9749 (method url-fetch)
9750 (uri (bioconductor-uri "MSnID" version))
9751 (sha256
9752 (base32
9753 "077n6ljcnnl7q4w0qj8v46vm4sjk9vzzfqf7wsc6lz0wmyzqdng3"))))
9754 (properties `((upstream-name . "MSnID")))
9755 (build-system r-build-system)
9756 (propagated-inputs
9757 `(("r-biobase" ,r-biobase)
9758 ("r-data-table" ,r-data-table)
9759 ("r-doparallel" ,r-doparallel)
9760 ("r-dplyr" ,r-dplyr)
9761 ("r-foreach" ,r-foreach)
9762 ("r-iterators" ,r-iterators)
9763 ("r-msnbase" ,r-msnbase)
9764 ("r-mzid" ,r-mzid)
9765 ("r-mzr" ,r-mzr)
9766 ("r-protgenerics" ,r-protgenerics)
9767 ("r-r-cache" ,r-r-cache)
9768 ("r-rcpp" ,r-rcpp)
9769 ("r-reshape2" ,r-reshape2)))
9770 (home-page "https://bioconductor.org/packages/MSnID")
9771 (synopsis "Utilities for LC-MSn proteomics identifications")
9772 (description
9773 "This package extracts @dfn{tandem mass spectrometry} (MS/MS) ID data
9774 from mzIdentML (leveraging the mzID package) or text files. After collating
9775 the search results from multiple datasets it assesses their identification
9776 quality and optimize filtering criteria to achieve the maximum number of
9777 identifications while not exceeding a specified false discovery rate. It also
9778 contains a number of utilities to explore the MS/MS results and assess missed
9779 and irregular enzymatic cleavages, mass measurement accuracy, etc.")
9780 (license license:artistic2.0)))
9781
9782 (define-public r-seurat
9783 (package
9784 (name "r-seurat")
9785 (version "2.3.4")
9786 (source (origin
9787 (method url-fetch)
9788 (uri (cran-uri "Seurat" version))
9789 (sha256
9790 (base32
9791 "0l8bv4i9nzz26mirnva10mq6pimibj24vk7vpvfypgn7xk4942hd"))))
9792 (properties `((upstream-name . "Seurat")))
9793 (build-system r-build-system)
9794 (propagated-inputs
9795 `(("r-ape" ,r-ape)
9796 ("r-cluster" ,r-cluster)
9797 ("r-cowplot" ,r-cowplot)
9798 ("r-dosnow" ,r-dosnow)
9799 ("r-dplyr" ,r-dplyr)
9800 ("r-dtw" ,r-dtw)
9801 ("r-fitdistrplus" ,r-fitdistrplus)
9802 ("r-foreach" ,r-foreach)
9803 ("r-fpc" ,r-fpc)
9804 ("r-ggplot2" ,r-ggplot2)
9805 ("r-ggridges" ,r-ggridges)
9806 ("r-gplots" ,r-gplots)
9807 ("r-hdf5r" ,r-hdf5r)
9808 ("r-hmisc" ,r-hmisc)
9809 ("r-httr" ,r-httr)
9810 ("r-ica" ,r-ica)
9811 ("r-igraph" ,r-igraph)
9812 ("r-irlba" ,r-irlba)
9813 ("r-lars" ,r-lars)
9814 ("r-lmtest" ,r-lmtest)
9815 ("r-mass" ,r-mass)
9816 ("r-matrix" ,r-matrix)
9817 ("r-metap" ,r-metap)
9818 ("r-mixtools" ,r-mixtools)
9819 ("r-pbapply" ,r-pbapply)
9820 ("r-plotly" ,r-plotly)
9821 ("r-png" ,r-png)
9822 ("r-rann" ,r-rann)
9823 ("r-rcolorbrewer" ,r-rcolorbrewer)
9824 ("r-rcpp" ,r-rcpp)
9825 ("r-rcppeigen" ,r-rcppeigen)
9826 ("r-rcppprogress" ,r-rcppprogress)
9827 ("r-reshape2" ,r-reshape2)
9828 ("r-reticulate" ,r-reticulate)
9829 ("r-rocr" ,r-rocr)
9830 ("r-rtsne" ,r-rtsne)
9831 ("r-sdmtools" ,r-sdmtools)
9832 ("r-tidyr" ,r-tidyr)
9833 ("r-tsne" ,r-tsne)))
9834 (home-page "http://www.satijalab.org/seurat")
9835 (synopsis "Seurat is an R toolkit for single cell genomics")
9836 (description
9837 "This package is an R package designed for QC, analysis, and
9838 exploration of single cell RNA-seq data. It easily enables widely-used
9839 analytical techniques, including the identification of highly variable genes,
9840 dimensionality reduction; PCA, ICA, t-SNE, standard unsupervised clustering
9841 algorithms; density clustering, hierarchical clustering, k-means, and the
9842 discovery of differentially expressed genes and markers.")
9843 (license license:gpl3)))
9844
9845 (define-public r-aroma-light
9846 (package
9847 (name "r-aroma-light")
9848 (version "3.12.0")
9849 (source
9850 (origin
9851 (method url-fetch)
9852 (uri (bioconductor-uri "aroma.light" version))
9853 (sha256
9854 (base32
9855 "0vfifgpqxjjncbiv6gvlk9jmj14j90r9f30bqk3ks9v1csjnjhrb"))))
9856 (properties `((upstream-name . "aroma.light")))
9857 (build-system r-build-system)
9858 (propagated-inputs
9859 `(("r-matrixstats" ,r-matrixstats)
9860 ("r-r-methodss3" ,r-r-methodss3)
9861 ("r-r-oo" ,r-r-oo)
9862 ("r-r-utils" ,r-r-utils)))
9863 (home-page "https://github.com/HenrikBengtsson/aroma.light")
9864 (synopsis "Methods for normalization and visualization of microarray data")
9865 (description
9866 "This package provides methods for microarray analysis that take basic
9867 data types such as matrices and lists of vectors. These methods can be used
9868 standalone, be utilized in other packages, or be wrapped up in higher-level
9869 classes.")
9870 (license license:gpl2+)))
9871
9872 (define-public r-deseq
9873 (package
9874 (name "r-deseq")
9875 (version "1.34.1")
9876 (source
9877 (origin
9878 (method url-fetch)
9879 (uri (bioconductor-uri "DESeq" version))
9880 (sha256
9881 (base32
9882 "0bpiixczbhlyaiinpbl6xrpmv72k2bq76bxnw06gl35m4pgs94p2"))))
9883 (properties `((upstream-name . "DESeq")))
9884 (build-system r-build-system)
9885 (propagated-inputs
9886 `(("r-biobase" ,r-biobase)
9887 ("r-biocgenerics" ,r-biocgenerics)
9888 ("r-genefilter" ,r-genefilter)
9889 ("r-geneplotter" ,r-geneplotter)
9890 ("r-lattice" ,r-lattice)
9891 ("r-locfit" ,r-locfit)
9892 ("r-mass" ,r-mass)
9893 ("r-rcolorbrewer" ,r-rcolorbrewer)))
9894 (home-page "http://www-huber.embl.de/users/anders/DESeq")
9895 (synopsis "Differential gene expression analysis")
9896 (description
9897 "This package provides tools for estimating variance-mean dependence in
9898 count data from high-throughput genetic sequencing assays and for testing for
9899 differential expression based on a model using the negative binomial
9900 distribution.")
9901 (license license:gpl3+)))
9902
9903 (define-public r-edaseq
9904 (package
9905 (name "r-edaseq")
9906 (version "2.16.3")
9907 (source
9908 (origin
9909 (method url-fetch)
9910 (uri (bioconductor-uri "EDASeq" version))
9911 (sha256
9912 (base32
9913 "0559ph606ps2g9bwbl0a2knkcs5w581n9igngpjxvk5p56k24gb5"))))
9914 (properties `((upstream-name . "EDASeq")))
9915 (build-system r-build-system)
9916 (propagated-inputs
9917 `(("r-annotationdbi" ,r-annotationdbi)
9918 ("r-aroma-light" ,r-aroma-light)
9919 ("r-biobase" ,r-biobase)
9920 ("r-biocgenerics" ,r-biocgenerics)
9921 ("r-biocmanager" ,r-biocmanager)
9922 ("r-biomart" ,r-biomart)
9923 ("r-biostrings" ,r-biostrings)
9924 ("r-deseq" ,r-deseq)
9925 ("r-genomicfeatures" ,r-genomicfeatures)
9926 ("r-genomicranges" ,r-genomicranges)
9927 ("r-iranges" ,r-iranges)
9928 ("r-rsamtools" ,r-rsamtools)
9929 ("r-shortread" ,r-shortread)))
9930 (home-page "https://github.com/drisso/EDASeq")
9931 (synopsis "Exploratory data analysis and normalization for RNA-Seq")
9932 (description
9933 "This package provides support for numerical and graphical summaries of
9934 RNA-Seq genomic read data. Provided within-lane normalization procedures to
9935 adjust for GC-content effect (or other gene-level effects) on read counts:
9936 loess robust local regression, global-scaling, and full-quantile
9937 normalization. Between-lane normalization procedures to adjust for
9938 distributional differences between lanes (e.g., sequencing depth):
9939 global-scaling and full-quantile normalization.")
9940 (license license:artistic2.0)))
9941
9942 (define-public r-interactivedisplaybase
9943 (package
9944 (name "r-interactivedisplaybase")
9945 (version "1.20.0")
9946 (source
9947 (origin
9948 (method url-fetch)
9949 (uri (bioconductor-uri "interactiveDisplayBase" version))
9950 (sha256
9951 (base32
9952 "04xz3dkwan2s5ic1mwkdfnggm0l41mgqfagx160bcsrpkw6z7ark"))))
9953 (properties
9954 `((upstream-name . "interactiveDisplayBase")))
9955 (build-system r-build-system)
9956 (propagated-inputs
9957 `(("r-biocgenerics" ,r-biocgenerics)
9958 ("r-shiny" ,r-shiny)))
9959 (home-page "https://bioconductor.org/packages/interactiveDisplayBase")
9960 (synopsis "Base package for web displays of Bioconductor objects")
9961 (description
9962 "This package contains the basic methods needed to generate interactive
9963 Shiny-based display methods for Bioconductor objects.")
9964 (license license:artistic2.0)))
9965
9966 (define-public r-annotationhub
9967 (package
9968 (name "r-annotationhub")
9969 (version "2.14.2")
9970 (source
9971 (origin
9972 (method url-fetch)
9973 (uri (bioconductor-uri "AnnotationHub" version))
9974 (sha256
9975 (base32
9976 "17fgrvcnbii9siv5rq5j09bxhqffx47f6jf10418qvr7hh61ic1g"))))
9977 (properties `((upstream-name . "AnnotationHub")))
9978 (build-system r-build-system)
9979 (propagated-inputs
9980 `(("r-annotationdbi" ,r-annotationdbi)
9981 ("r-biocgenerics" ,r-biocgenerics)
9982 ("r-biocmanager" ,r-biocmanager)
9983 ("r-curl" ,r-curl)
9984 ("r-httr" ,r-httr)
9985 ("r-interactivedisplaybase" ,r-interactivedisplaybase)
9986 ("r-rsqlite" ,r-rsqlite)
9987 ("r-s4vectors" ,r-s4vectors)
9988 ("r-yaml" ,r-yaml)))
9989 (home-page "https://bioconductor.org/packages/AnnotationHub")
9990 (synopsis "Client to access AnnotationHub resources")
9991 (description
9992 "This package provides a client for the Bioconductor AnnotationHub web
9993 resource. The AnnotationHub web resource provides a central location where
9994 genomic files (e.g. VCF, bed, wig) and other resources from standard
9995 locations (e.g. UCSC, Ensembl) can be discovered. The resource includes
9996 metadata about each resource, e.g., a textual description, tags, and date of
9997 modification. The client creates and manages a local cache of files retrieved
9998 by the user, helping with quick and reproducible access.")
9999 (license license:artistic2.0)))
10000
10001 (define-public r-fastseg
10002 (package
10003 (name "r-fastseg")
10004 (version "1.28.0")
10005 (source
10006 (origin
10007 (method url-fetch)
10008 (uri (bioconductor-uri "fastseg" version))
10009 (sha256
10010 (base32
10011 "1l8mdjpfpgwqdss2ywjkb8b4h55wf8v6kmyxdlvy04ds2hj16sb1"))))
10012 (build-system r-build-system)
10013 (propagated-inputs
10014 `(("r-biobase" ,r-biobase)
10015 ("r-biocgenerics" ,r-biocgenerics)
10016 ("r-genomicranges" ,r-genomicranges)
10017 ("r-iranges" ,r-iranges)
10018 ("r-s4vectors" ,r-s4vectors)))
10019 (home-page "http://www.bioinf.jku.at/software/fastseg/index.html")
10020 (synopsis "Fast segmentation algorithm for genetic sequencing data")
10021 (description
10022 "Fastseg implements a very fast and efficient segmentation algorithm.
10023 It can segment data from DNA microarrays and data from next generation
10024 sequencing for example to detect copy number segments. Further it can segment
10025 data from RNA microarrays like tiling arrays to identify transcripts. Most
10026 generally, it can segment data given as a matrix or as a vector. Various data
10027 formats can be used as input to fastseg like expression set objects for
10028 microarrays or GRanges for sequencing data.")
10029 (license license:lgpl2.0+)))
10030
10031 (define-public r-keggrest
10032 (package
10033 (name "r-keggrest")
10034 (version "1.22.0")
10035 (source
10036 (origin
10037 (method url-fetch)
10038 (uri (bioconductor-uri "KEGGREST" version))
10039 (sha256
10040 (base32
10041 "0blpd5a7whd2sswfhqd17h58hg06ymaf80gapdr9ja43hnnlj309"))))
10042 (properties `((upstream-name . "KEGGREST")))
10043 (build-system r-build-system)
10044 (propagated-inputs
10045 `(("r-biostrings" ,r-biostrings)
10046 ("r-httr" ,r-httr)
10047 ("r-png" ,r-png)))
10048 (home-page "https://bioconductor.org/packages/KEGGREST")
10049 (synopsis "Client-side REST access to KEGG")
10050 (description
10051 "This package provides a package that provides a client interface to the
10052 @dfn{Kyoto Encyclopedia of Genes and Genomes} (KEGG) REST server.")
10053 (license license:artistic2.0)))
10054
10055 (define-public r-gage
10056 (package
10057 (name "r-gage")
10058 (version "2.32.1")
10059 (source
10060 (origin
10061 (method url-fetch)
10062 (uri (bioconductor-uri "gage" version))
10063 (sha256
10064 (base32
10065 "02g796sb1800ff0f1mq9f2m5wwzpf8pnfzajs49i68dhq2hm01a8"))))
10066 (build-system r-build-system)
10067 (propagated-inputs
10068 `(("r-annotationdbi" ,r-annotationdbi)
10069 ("r-graph" ,r-graph)
10070 ("r-keggrest" ,r-keggrest)))
10071 (home-page "http://www.biomedcentral.com/1471-2105/10/161")
10072 (synopsis "Generally applicable gene-set enrichment for pathway analysis")
10073 (description
10074 "GAGE is a published method for gene set (enrichment or GSEA) or pathway
10075 analysis. GAGE is generally applicable independent of microarray or RNA-Seq
10076 data attributes including sample sizes, experimental designs, assay platforms,
10077 and other types of heterogeneity. The gage package provides functions for
10078 basic GAGE analysis, result processing and presentation. In addition, it
10079 provides demo microarray data and commonly used gene set data based on KEGG
10080 pathways and GO terms. These funtions and data are also useful for gene set
10081 analysis using other methods.")
10082 (license license:gpl2+)))
10083
10084 (define-public r-genomicfiles
10085 (package
10086 (name "r-genomicfiles")
10087 (version "1.18.0")
10088 (source
10089 (origin
10090 (method url-fetch)
10091 (uri (bioconductor-uri "GenomicFiles" version))
10092 (sha256
10093 (base32
10094 "0qf2yj4lfnnk64fk125n8sqms01shfqiik04nasx2z3k129ykpxp"))))
10095 (properties `((upstream-name . "GenomicFiles")))
10096 (build-system r-build-system)
10097 (propagated-inputs
10098 `(("r-biocgenerics" ,r-biocgenerics)
10099 ("r-biocparallel" ,r-biocparallel)
10100 ("r-genomeinfodb" ,r-genomeinfodb)
10101 ("r-genomicalignments" ,r-genomicalignments)
10102 ("r-genomicranges" ,r-genomicranges)
10103 ("r-iranges" ,r-iranges)
10104 ("r-rsamtools" ,r-rsamtools)
10105 ("r-rtracklayer" ,r-rtracklayer)
10106 ("r-s4vectors" ,r-s4vectors)
10107 ("r-summarizedexperiment" ,r-summarizedexperiment)
10108 ("r-variantannotation" ,r-variantannotation)))
10109 (home-page "https://bioconductor.org/packages/GenomicFiles")
10110 (synopsis "Distributed computing by file or by range")
10111 (description
10112 "This package provides infrastructure for parallel computations
10113 distributed by file or by range. User defined mapper and reducer functions
10114 provide added flexibility for data combination and manipulation.")
10115 (license license:artistic2.0)))
10116
10117 (define-public r-complexheatmap
10118 (package
10119 (name "r-complexheatmap")
10120 (version "1.20.0")
10121 (source
10122 (origin
10123 (method url-fetch)
10124 (uri (bioconductor-uri "ComplexHeatmap" version))
10125 (sha256
10126 (base32
10127 "0s01dzcfj1lmpqfpsbqw7r4858krfzy499lz4cwx4fq3mbyvy2aj"))))
10128 (properties
10129 `((upstream-name . "ComplexHeatmap")))
10130 (build-system r-build-system)
10131 (propagated-inputs
10132 `(("r-circlize" ,r-circlize)
10133 ("r-colorspace" ,r-colorspace)
10134 ("r-getoptlong" ,r-getoptlong)
10135 ("r-globaloptions" ,r-globaloptions)
10136 ("r-rcolorbrewer" ,r-rcolorbrewer)))
10137 (home-page
10138 "https://github.com/jokergoo/ComplexHeatmap")
10139 (synopsis "Making Complex Heatmaps")
10140 (description
10141 "Complex heatmaps are efficient to visualize associations between
10142 different sources of data sets and reveal potential structures. This package
10143 provides a highly flexible way to arrange multiple heatmaps and supports
10144 self-defined annotation graphics.")
10145 (license license:gpl2+)))
10146
10147 (define-public r-dirichletmultinomial
10148 (package
10149 (name "r-dirichletmultinomial")
10150 (version "1.24.1")
10151 (source
10152 (origin
10153 (method url-fetch)
10154 (uri (bioconductor-uri "DirichletMultinomial" version))
10155 (sha256
10156 (base32
10157 "0vazfjzqy78p5g7dnv30lbqbj4bhq4zafd2wh6gdwy2il1fd78xa"))))
10158 (properties
10159 `((upstream-name . "DirichletMultinomial")))
10160 (build-system r-build-system)
10161 (inputs
10162 `(("gsl" ,gsl)))
10163 (propagated-inputs
10164 `(("r-biocgenerics" ,r-biocgenerics)
10165 ("r-iranges" ,r-iranges)
10166 ("r-s4vectors" ,r-s4vectors)))
10167 (home-page "https://bioconductor.org/packages/DirichletMultinomial")
10168 (synopsis "Dirichlet-Multinomial mixture models for microbiome data")
10169 (description
10170 "Dirichlet-multinomial mixture models can be used to describe variability
10171 in microbial metagenomic data. This package is an interface to code
10172 originally made available by Holmes, Harris, and Quince, 2012, PLoS ONE 7(2):
10173 1-15.")
10174 (license license:lgpl3)))
10175
10176 (define-public r-ensembldb
10177 (package
10178 (name "r-ensembldb")
10179 (version "2.6.3")
10180 (source
10181 (origin
10182 (method url-fetch)
10183 (uri (bioconductor-uri "ensembldb" version))
10184 (sha256
10185 (base32
10186 "0kzdsfk6mdwlp57sw4j2cf7lx5nc67v5j0xr3iag9kzmgikaq1lb"))))
10187 (build-system r-build-system)
10188 (propagated-inputs
10189 `(("r-annotationdbi" ,r-annotationdbi)
10190 ("r-annotationfilter" ,r-annotationfilter)
10191 ("r-biobase" ,r-biobase)
10192 ("r-biocgenerics" ,r-biocgenerics)
10193 ("r-biostrings" ,r-biostrings)
10194 ("r-curl" ,r-curl)
10195 ("r-dbi" ,r-dbi)
10196 ("r-genomeinfodb" ,r-genomeinfodb)
10197 ("r-genomicfeatures" ,r-genomicfeatures)
10198 ("r-genomicranges" ,r-genomicranges)
10199 ("r-iranges" ,r-iranges)
10200 ("r-protgenerics" ,r-protgenerics)
10201 ("r-rsamtools" ,r-rsamtools)
10202 ("r-rsqlite" ,r-rsqlite)
10203 ("r-rtracklayer" ,r-rtracklayer)
10204 ("r-s4vectors" ,r-s4vectors)))
10205 (home-page "https://github.com/jotsetung/ensembldb")
10206 (synopsis "Utilities to create and use Ensembl-based annotation databases")
10207 (description
10208 "The package provides functions to create and use transcript-centric
10209 annotation databases/packages. The annotation for the databases are directly
10210 fetched from Ensembl using their Perl API. The functionality and data is
10211 similar to that of the TxDb packages from the @code{GenomicFeatures} package,
10212 but, in addition to retrieve all gene/transcript models and annotations from
10213 the database, the @code{ensembldb} package also provides a filter framework
10214 allowing to retrieve annotations for specific entries like genes encoded on a
10215 chromosome region or transcript models of lincRNA genes.")
10216 ;; No version specified
10217 (license license:lgpl3+)))
10218
10219 (define-public r-organismdbi
10220 (package
10221 (name "r-organismdbi")
10222 (version "1.24.0")
10223 (source
10224 (origin
10225 (method url-fetch)
10226 (uri (bioconductor-uri "OrganismDbi" version))
10227 (sha256
10228 (base32
10229 "11pyv56cy4iy095h40k6k0mpjdlh6gsb4ld3s57nfa9nd4ypx3yi"))))
10230 (properties `((upstream-name . "OrganismDbi")))
10231 (build-system r-build-system)
10232 (propagated-inputs
10233 `(("r-annotationdbi" ,r-annotationdbi)
10234 ("r-biobase" ,r-biobase)
10235 ("r-biocgenerics" ,r-biocgenerics)
10236 ("r-biocmanager" ,r-biocmanager)
10237 ("r-dbi" ,r-dbi)
10238 ("r-genomicfeatures" ,r-genomicfeatures)
10239 ("r-genomicranges" ,r-genomicranges)
10240 ("r-graph" ,r-graph)
10241 ("r-iranges" ,r-iranges)
10242 ("r-rbgl" ,r-rbgl)
10243 ("r-s4vectors" ,r-s4vectors)))
10244 (home-page "https://bioconductor.org/packages/OrganismDbi")
10245 (synopsis "Software to enable the smooth interfacing of database packages")
10246 (description "The package enables a simple unified interface to several
10247 annotation packages each of which has its own schema by taking advantage of
10248 the fact that each of these packages implements a select methods.")
10249 (license license:artistic2.0)))
10250
10251 (define-public r-biovizbase
10252 (package
10253 (name "r-biovizbase")
10254 (version "1.30.1")
10255 (source
10256 (origin
10257 (method url-fetch)
10258 (uri (bioconductor-uri "biovizBase" version))
10259 (sha256
10260 (base32
10261 "0v5gvcx180qn5487i1dph9abadw3ggqwp5yzy41jswzbdc8q6sbm"))))
10262 (properties `((upstream-name . "biovizBase")))
10263 (build-system r-build-system)
10264 (propagated-inputs
10265 `(("r-annotationdbi" ,r-annotationdbi)
10266 ("r-annotationfilter" ,r-annotationfilter)
10267 ("r-biocgenerics" ,r-biocgenerics)
10268 ("r-biostrings" ,r-biostrings)
10269 ("r-dichromat" ,r-dichromat)
10270 ("r-ensembldb" ,r-ensembldb)
10271 ("r-genomeinfodb" ,r-genomeinfodb)
10272 ("r-genomicalignments" ,r-genomicalignments)
10273 ("r-genomicfeatures" ,r-genomicfeatures)
10274 ("r-genomicranges" ,r-genomicranges)
10275 ("r-hmisc" ,r-hmisc)
10276 ("r-iranges" ,r-iranges)
10277 ("r-rcolorbrewer" ,r-rcolorbrewer)
10278 ("r-rlang" ,r-rlang)
10279 ("r-rsamtools" ,r-rsamtools)
10280 ("r-s4vectors" ,r-s4vectors)
10281 ("r-scales" ,r-scales)
10282 ("r-summarizedexperiment" ,r-summarizedexperiment)
10283 ("r-variantannotation" ,r-variantannotation)))
10284 (home-page "https://bioconductor.org/packages/biovizBase")
10285 (synopsis "Basic graphic utilities for visualization of genomic data")
10286 (description
10287 "The biovizBase package is designed to provide a set of utilities, color
10288 schemes and conventions for genomic data. It serves as the base for various
10289 high-level packages for biological data visualization. This saves development
10290 effort and encourages consistency.")
10291 (license license:artistic2.0)))
10292
10293 (define-public r-ggbio
10294 (package
10295 (name "r-ggbio")
10296 (version "1.30.0")
10297 (source
10298 (origin
10299 (method url-fetch)
10300 (uri (bioconductor-uri "ggbio" version))
10301 (sha256
10302 (base32
10303 "0wq49qqzkcn8s19xgaxf2s1j1a563d7pbhhvris6fhxfdjsz4934"))))
10304 (build-system r-build-system)
10305 (propagated-inputs
10306 `(("r-annotationdbi" ,r-annotationdbi)
10307 ("r-annotationfilter" ,r-annotationfilter)
10308 ("r-biobase" ,r-biobase)
10309 ("r-biocgenerics" ,r-biocgenerics)
10310 ("r-biostrings" ,r-biostrings)
10311 ("r-biovizbase" ,r-biovizbase)
10312 ("r-bsgenome" ,r-bsgenome)
10313 ("r-ensembldb" ,r-ensembldb)
10314 ("r-genomeinfodb" ,r-genomeinfodb)
10315 ("r-genomicalignments" ,r-genomicalignments)
10316 ("r-genomicfeatures" ,r-genomicfeatures)
10317 ("r-genomicranges" ,r-genomicranges)
10318 ("r-ggally" ,r-ggally)
10319 ("r-ggplot2" ,r-ggplot2)
10320 ("r-gridextra" ,r-gridextra)
10321 ("r-gtable" ,r-gtable)
10322 ("r-hmisc" ,r-hmisc)
10323 ("r-iranges" ,r-iranges)
10324 ("r-organismdbi" ,r-organismdbi)
10325 ("r-reshape2" ,r-reshape2)
10326 ("r-rlang" ,r-rlang)
10327 ("r-rsamtools" ,r-rsamtools)
10328 ("r-rtracklayer" ,r-rtracklayer)
10329 ("r-s4vectors" ,r-s4vectors)
10330 ("r-scales" ,r-scales)
10331 ("r-summarizedexperiment" ,r-summarizedexperiment)
10332 ("r-variantannotation" ,r-variantannotation)))
10333 (home-page "http://www.tengfei.name/ggbio/")
10334 (synopsis "Visualization tools for genomic data")
10335 (description
10336 "The ggbio package extends and specializes the grammar of graphics for
10337 biological data. The graphics are designed to answer common scientific
10338 questions, in particular those often asked of high throughput genomics data.
10339 All core Bioconductor data structures are supported, where appropriate. The
10340 package supports detailed views of particular genomic regions, as well as
10341 genome-wide overviews. Supported overviews include ideograms and grand linear
10342 views. High-level plots include sequence fragment length, edge-linked
10343 interval to data view, mismatch pileup, and several splicing summaries.")
10344 (license license:artistic2.0)))
10345
10346 (define-public r-gprofiler
10347 (package
10348 (name "r-gprofiler")
10349 (version "0.6.7")
10350 (source
10351 (origin
10352 (method url-fetch)
10353 (uri (cran-uri "gProfileR" version))
10354 (sha256
10355 (base32
10356 "12nwidbnqmnfy5dnqga26byslvdnkrpz2fi19qfcby6xx0wbndk7"))))
10357 (properties `((upstream-name . "gProfileR")))
10358 (build-system r-build-system)
10359 (propagated-inputs
10360 `(("r-plyr" ,r-plyr)
10361 ("r-rcurl" ,r-rcurl)))
10362 (home-page "https://cran.r-project.org/web/packages/gProfileR/")
10363 (synopsis "Interface to the g:Profiler toolkit")
10364 (description
10365 "This package provides tools for functional enrichment analysis,
10366 gene identifier conversion and mapping homologous genes across related
10367 organisms via the @code{g:Profiler} toolkit.")
10368 (license license:gpl2+)))
10369
10370 (define-public r-gqtlbase
10371 (package
10372 (name "r-gqtlbase")
10373 (version "1.14.0")
10374 (source
10375 (origin
10376 (method url-fetch)
10377 (uri (bioconductor-uri "gQTLBase" version))
10378 (sha256
10379 (base32
10380 "1lbk1m1mkvbk30flk5pf3pcrnm2s0sj5r48kbjgad39dsvd8zgqx"))))
10381 (properties `((upstream-name . "gQTLBase")))
10382 (build-system r-build-system)
10383 (propagated-inputs
10384 `(("r-batchjobs" ,r-batchjobs)
10385 ("r-bbmisc" ,r-bbmisc)
10386 ("r-biocgenerics" ,r-biocgenerics)
10387 ("r-bit" ,r-bit)
10388 ("r-doparallel" ,r-doparallel)
10389 ("r-ff" ,r-ff)
10390 ("r-ffbase" ,r-ffbase)
10391 ("r-foreach" ,r-foreach)
10392 ("r-genomicfiles" ,r-genomicfiles)
10393 ("r-genomicranges" ,r-genomicranges)
10394 ("r-rtracklayer" ,r-rtracklayer)
10395 ("r-s4vectors" ,r-s4vectors)
10396 ("r-summarizedexperiment" ,r-summarizedexperiment)))
10397 (home-page "https://bioconductor.org/packages/gQTLBase")
10398 (synopsis "Infrastructure for eQTL, mQTL and similar studies")
10399 (description
10400 "The purpose of this package is to simplify the storage and interrogation
10401 of @dfn{quantitative trait loci} (QTL) archives, such as eQTL, mQTL, dsQTL,
10402 and more.")
10403 (license license:artistic2.0)))
10404
10405 (define-public r-snpstats
10406 (package
10407 (name "r-snpstats")
10408 (version "1.32.0")
10409 (source
10410 (origin
10411 (method url-fetch)
10412 (uri (bioconductor-uri "snpStats" version))
10413 (sha256
10414 (base32
10415 "1pplx4pf9bqi7v5v1l74yknc1s61carvbqkf327ky7vbvp0bck33"))))
10416 (properties `((upstream-name . "snpStats")))
10417 (build-system r-build-system)
10418 (inputs `(("zlib" ,zlib)))
10419 (propagated-inputs
10420 `(("r-biocgenerics" ,r-biocgenerics)
10421 ("r-matrix" ,r-matrix)
10422 ("r-survival" ,r-survival)
10423 ("r-zlibbioc" ,r-zlibbioc)))
10424 (home-page "https://bioconductor.org/packages/snpStats")
10425 (synopsis "Methods for SNP association studies")
10426 (description
10427 "This package provides classes and statistical methods for large
10428 @dfn{single-nucleotide polymorphism} (SNP) association studies. This extends
10429 the earlier snpMatrix package, allowing for uncertainty in genotypes.")
10430 (license license:gpl3)))
10431
10432 (define-public r-homo-sapiens
10433 (package
10434 (name "r-homo-sapiens")
10435 (version "1.3.1")
10436 (source (origin
10437 (method url-fetch)
10438 ;; We cannot use bioconductor-uri here because this tarball is
10439 ;; located under "data/annotation/" instead of "bioc/".
10440 (uri (string-append "http://www.bioconductor.org/packages/"
10441 "release/data/annotation/src/contrib/"
10442 "Homo.sapiens_"
10443 version ".tar.gz"))
10444 (sha256
10445 (base32
10446 "151vj7h5p1c8yd5swrchk46z469p135wk50hvkl0nhgndvy0jj01"))))
10447 (properties
10448 `((upstream-name . "Homo.sapiens")))
10449 (build-system r-build-system)
10450 (propagated-inputs
10451 `(("r-genomicfeatures" ,r-genomicfeatures)
10452 ("r-go-db" ,r-go-db)
10453 ("r-org-hs-eg-db" ,r-org-hs-eg-db)
10454 ("r-txdb-hsapiens-ucsc-hg19-knowngene" ,r-txdb-hsapiens-ucsc-hg19-knowngene)
10455 ("r-organismdbi" ,r-organismdbi)
10456 ("r-annotationdbi" ,r-annotationdbi)))
10457 (home-page "https://bioconductor.org/packages/Homo.sapiens/")
10458 (synopsis "Annotation package for the Homo.sapiens object")
10459 (description
10460 "This package contains the Homo.sapiens object to access data from
10461 several related annotation packages.")
10462 (license license:artistic2.0)))
10463
10464 (define-public r-erma
10465 (package
10466 (name "r-erma")
10467 (version "0.14.0")
10468 (source
10469 (origin
10470 (method url-fetch)
10471 (uri (bioconductor-uri "erma" version))
10472 (sha256
10473 (base32
10474 "0hj9iz904rr1y66442lkxjywkw1ydyxxlhmjirawbf09ic5ad4g9"))))
10475 (build-system r-build-system)
10476 (propagated-inputs
10477 `(("r-annotationdbi" ,r-annotationdbi)
10478 ("r-biobase" ,r-biobase)
10479 ("r-biocgenerics" ,r-biocgenerics)
10480 ("r-biocparallel" ,r-biocparallel)
10481 ("r-genomeinfodb" ,r-genomeinfodb)
10482 ("r-genomicfiles" ,r-genomicfiles)
10483 ("r-genomicranges" ,r-genomicranges)
10484 ("r-ggplot2" ,r-ggplot2)
10485 ("r-homo-sapiens" ,r-homo-sapiens)
10486 ("r-iranges" ,r-iranges)
10487 ("r-rtracklayer" ,r-rtracklayer)
10488 ("r-s4vectors" ,r-s4vectors)
10489 ("r-shiny" ,r-shiny)
10490 ("r-summarizedexperiment" ,r-summarizedexperiment)))
10491 (home-page "https://bioconductor.org/packages/erma")
10492 (synopsis "Epigenomic road map adventures")
10493 (description
10494 "The epigenomics road map describes locations of epigenetic marks in DNA
10495 from a variety of cell types. Of interest are locations of histone
10496 modifications, sites of DNA methylation, and regions of accessible chromatin.
10497 This package presents a selection of elements of the road map including
10498 metadata and outputs of the ChromImpute procedure applied to ENCODE cell lines
10499 by Ernst and Kellis.")
10500 (license license:artistic2.0)))
10501
10502 (define-public r-ldblock
10503 (package
10504 (name "r-ldblock")
10505 (version "1.12.0")
10506 (source
10507 (origin
10508 (method url-fetch)
10509 (uri (bioconductor-uri "ldblock" version))
10510 (sha256
10511 (base32
10512 "0xbf4pmhrk5fnd1iz5wzjvdr75v114bwpznhcig4wiqmxc27sips"))))
10513 (build-system r-build-system)
10514 (propagated-inputs
10515 `(("r-biocgenerics" ,r-biocgenerics)
10516 ("r-erma" ,r-erma)
10517 ("r-genomeinfodb" ,r-genomeinfodb)
10518 ("r-genomicfiles" ,r-genomicfiles)
10519 ("r-go-db" ,r-go-db)
10520 ("r-homo-sapiens" ,r-homo-sapiens)
10521 ("r-matrix" ,r-matrix)
10522 ("r-rsamtools" ,r-rsamtools)
10523 ("r-snpstats" ,r-snpstats)
10524 ("r-variantannotation" ,r-variantannotation)))
10525 (home-page "https://bioconductor.org/packages/ldblock")
10526 (synopsis "Data structures for linkage disequilibrium measures in populations")
10527 (description
10528 "This package defines data structures for @dfn{linkage
10529 disequilibrium} (LD) measures in populations. Its purpose is to simplify
10530 handling of existing population-level data for the purpose of flexibly
10531 defining LD blocks.")
10532 (license license:artistic2.0)))
10533
10534 (define-public r-gqtlstats
10535 (package
10536 (name "r-gqtlstats")
10537 (version "1.14.0")
10538 (source
10539 (origin
10540 (method url-fetch)
10541 (uri (bioconductor-uri "gQTLstats" version))
10542 (sha256
10543 (base32
10544 "1sg9kw59dlayj7qxql9pd93d4hmml504sa3kkfpzfh3xri7m5pxf"))))
10545 (properties `((upstream-name . "gQTLstats")))
10546 (build-system r-build-system)
10547 (propagated-inputs
10548 `(("r-annotationdbi" ,r-annotationdbi)
10549 ("r-batchjobs" ,r-batchjobs)
10550 ("r-bbmisc" ,r-bbmisc)
10551 ("r-beeswarm" ,r-beeswarm)
10552 ("r-biobase" ,r-biobase)
10553 ("r-biocgenerics" ,r-biocgenerics)
10554 ("r-doparallel" ,r-doparallel)
10555 ("r-dplyr" ,r-dplyr)
10556 ("r-erma" ,r-erma)
10557 ("r-ffbase" ,r-ffbase)
10558 ("r-foreach" ,r-foreach)
10559 ("r-genomeinfodb" ,r-genomeinfodb)
10560 ("r-genomicfeatures" ,r-genomicfeatures)
10561 ("r-genomicfiles" ,r-genomicfiles)
10562 ("r-genomicranges" ,r-genomicranges)
10563 ("r-ggbeeswarm" ,r-ggbeeswarm)
10564 ("r-ggplot2" ,r-ggplot2)
10565 ("r-gqtlbase" ,r-gqtlbase)
10566 ("r-hardyweinberg" ,r-hardyweinberg)
10567 ("r-homo-sapiens" ,r-homo-sapiens)
10568 ("r-iranges" ,r-iranges)
10569 ("r-limma" ,r-limma)
10570 ("r-mgcv" ,r-mgcv)
10571 ("r-plotly" ,r-plotly)
10572 ("r-reshape2" ,r-reshape2)
10573 ("r-s4vectors" ,r-s4vectors)
10574 ("r-shiny" ,r-shiny)
10575 ("r-snpstats" ,r-snpstats)
10576 ("r-summarizedexperiment" ,r-summarizedexperiment)
10577 ("r-variantannotation" ,r-variantannotation)))
10578 (home-page "https://bioconductor.org/packages/gQTLstats")
10579 (synopsis "Computationally efficient analysis for eQTL and allied studies")
10580 (description
10581 "This package provides tools for the computationally efficient analysis
10582 of @dfn{quantitative trait loci} (QTL) data, including eQTL, mQTL, dsQTL, etc.
10583 The software in this package aims to support refinements and functional
10584 interpretation of members of a collection of association statistics on a
10585 family of feature/genome hypotheses.")
10586 (license license:artistic2.0)))
10587
10588 (define-public r-gviz
10589 (package
10590 (name "r-gviz")
10591 (version "1.26.4")
10592 (source
10593 (origin
10594 (method url-fetch)
10595 (uri (bioconductor-uri "Gviz" version))
10596 (sha256
10597 (base32
10598 "0jvcivgw0ahv2rjadxmrww76xambhf7silczmh38nn4yn4qw6w9y"))))
10599 (properties `((upstream-name . "Gviz")))
10600 (build-system r-build-system)
10601 (propagated-inputs
10602 `(("r-annotationdbi" ,r-annotationdbi)
10603 ("r-biobase" ,r-biobase)
10604 ("r-biocgenerics" ,r-biocgenerics)
10605 ("r-biomart" ,r-biomart)
10606 ("r-biostrings" ,r-biostrings)
10607 ("r-biovizbase" ,r-biovizbase)
10608 ("r-bsgenome" ,r-bsgenome)
10609 ("r-digest" ,r-digest)
10610 ("r-genomeinfodb" ,r-genomeinfodb)
10611 ("r-genomicalignments" ,r-genomicalignments)
10612 ("r-genomicfeatures" ,r-genomicfeatures)
10613 ("r-genomicranges" ,r-genomicranges)
10614 ("r-iranges" ,r-iranges)
10615 ("r-lattice" ,r-lattice)
10616 ("r-latticeextra" ,r-latticeextra)
10617 ("r-matrixstats" ,r-matrixstats)
10618 ("r-rcolorbrewer" ,r-rcolorbrewer)
10619 ("r-rsamtools" ,r-rsamtools)
10620 ("r-rtracklayer" ,r-rtracklayer)
10621 ("r-s4vectors" ,r-s4vectors)
10622 ("r-xvector" ,r-xvector)))
10623 (home-page "https://bioconductor.org/packages/Gviz")
10624 (synopsis "Plotting data and annotation information along genomic coordinates")
10625 (description
10626 "Genomic data analyses requires integrated visualization of known genomic
10627 information and new experimental data. Gviz uses the biomaRt and the
10628 rtracklayer packages to perform live annotation queries to Ensembl and UCSC
10629 and translates this to e.g. gene/transcript structures in viewports of the
10630 grid graphics package. This results in genomic information plotted together
10631 with your data.")
10632 (license license:artistic2.0)))
10633
10634 (define-public r-gwascat
10635 (package
10636 (name "r-gwascat")
10637 (version "2.14.0")
10638 (source
10639 (origin
10640 (method url-fetch)
10641 (uri (bioconductor-uri "gwascat" version))
10642 (sha256
10643 (base32
10644 "1fnyjydhicq4ayrv0lqjv48h9bd72h40s6l82g1h2ng0icwz38g0"))))
10645 (build-system r-build-system)
10646 (propagated-inputs
10647 `(("r-annotationdbi" ,r-annotationdbi)
10648 ("r-annotationhub" ,r-annotationhub)
10649 ("r-biocgenerics" ,r-biocgenerics)
10650 ("r-biostrings" ,r-biostrings)
10651 ("r-genomeinfodb" ,r-genomeinfodb)
10652 ("r-genomicfeatures" ,r-genomicfeatures)
10653 ("r-genomicranges" ,r-genomicranges)
10654 ("r-ggbio" ,r-ggbio)
10655 ("r-ggplot2" ,r-ggplot2)
10656 ("r-gqtlstats" ,r-gqtlstats)
10657 ("r-graph" ,r-graph)
10658 ("r-gviz" ,r-gviz)
10659 ("r-homo-sapiens" ,r-homo-sapiens)
10660 ("r-iranges" ,r-iranges)
10661 ("r-rsamtools" ,r-rsamtools)
10662 ("r-rtracklayer" ,r-rtracklayer)
10663 ("r-s4vectors" ,r-s4vectors)
10664 ("r-snpstats" ,r-snpstats)
10665 ("r-summarizedexperiment" ,r-summarizedexperiment)
10666 ("r-variantannotation" ,r-variantannotation)))
10667 (home-page "https://bioconductor.org/packages/gwascat")
10668 (synopsis "Tools for data in the EMBL-EBI GWAS catalog")
10669 (description
10670 "This package provides tools for representing and modeling data in the
10671 EMBL-EBI GWAS catalog.")
10672 (license license:artistic2.0)))
10673
10674 (define-public r-sushi
10675 (package
10676 (name "r-sushi")
10677 (version "1.20.0")
10678 (source (origin
10679 (method url-fetch)
10680 (uri (bioconductor-uri "Sushi" version))
10681 (sha256
10682 (base32
10683 "0dv5di0hgbvk9cxnqhyf18mdjl50k6bk00a89r6zgp83rbxwr1r8"))))
10684 (properties `((upstream-name . "Sushi")))
10685 (build-system r-build-system)
10686 (propagated-inputs
10687 `(("r-biomart" ,r-biomart)
10688 ("r-zoo" ,r-zoo)))
10689 (home-page "https://bioconductor.org/packages/Sushi")
10690 (synopsis "Tools for visualizing genomics data")
10691 (description
10692 "This package provides flexible, quantitative, and integrative genomic
10693 visualizations for publication-quality multi-panel figures.")
10694 (license license:gpl2+)))
10695
10696 (define-public r-fithic
10697 (package
10698 (name "r-fithic")
10699 (version "1.8.0")
10700 (source (origin
10701 (method url-fetch)
10702 (uri (bioconductor-uri "FitHiC" version))
10703 (sha256
10704 (base32
10705 "15xd8mz7660q4zr9p74mq1pqps4iz7pxp8f9ifn21gwg94aq1avn"))))
10706 (properties `((upstream-name . "FitHiC")))
10707 (build-system r-build-system)
10708 (propagated-inputs
10709 `(("r-data-table" ,r-data-table)
10710 ("r-fdrtool" ,r-fdrtool)
10711 ("r-rcpp" ,r-rcpp)))
10712 (home-page "https://bioconductor.org/packages/FitHiC")
10713 (synopsis "Confidence estimation for intra-chromosomal contact maps")
10714 (description
10715 "Fit-Hi-C is a tool for assigning statistical confidence estimates to
10716 intra-chromosomal contact maps produced by genome-wide genome architecture
10717 assays such as Hi-C.")
10718 (license license:gpl2+)))
10719
10720 (define-public r-hitc
10721 (package
10722 (name "r-hitc")
10723 (version "1.26.0")
10724 (source (origin
10725 (method url-fetch)
10726 (uri (bioconductor-uri "HiTC" version))
10727 (sha256
10728 (base32
10729 "11f96k1707g6milpjgnrjf3b5r42hsrxhb5d8znkcr3y3mrskdbj"))))
10730 (properties `((upstream-name . "HiTC")))
10731 (build-system r-build-system)
10732 (propagated-inputs
10733 `(("r-biostrings" ,r-biostrings)
10734 ("r-genomeinfodb" ,r-genomeinfodb)
10735 ("r-genomicranges" ,r-genomicranges)
10736 ("r-iranges" ,r-iranges)
10737 ("r-matrix" ,r-matrix)
10738 ("r-rcolorbrewer" ,r-rcolorbrewer)
10739 ("r-rtracklayer" ,r-rtracklayer)))
10740 (home-page "https://bioconductor.org/packages/HiTC")
10741 (synopsis "High throughput chromosome conformation capture analysis")
10742 (description
10743 "The HiTC package was developed to explore high-throughput \"C\" data
10744 such as 5C or Hi-C. Dedicated R classes as well as standard methods for
10745 quality controls, normalization, visualization, and further analysis are also
10746 provided.")
10747 (license license:artistic2.0)))
10748
10749 (define-public r-qvalue
10750 (package
10751 (name "r-qvalue")
10752 (version "2.14.1")
10753 (source
10754 (origin
10755 (method url-fetch)
10756 (uri (bioconductor-uri "qvalue" version))
10757 (sha256
10758 (base32
10759 "0kxavzm1j2mk26qicmjm90nxx4w5h3dxighzks7wzihay3k8cysc"))))
10760 (build-system r-build-system)
10761 (propagated-inputs
10762 `(("r-ggplot2" ,r-ggplot2)
10763 ("r-reshape2" ,r-reshape2)))
10764 (home-page "http://github.com/jdstorey/qvalue")
10765 (synopsis "Q-value estimation for false discovery rate control")
10766 (description
10767 "This package takes a list of p-values resulting from the simultaneous
10768 testing of many hypotheses and estimates their q-values and local @dfn{false
10769 discovery rate} (FDR) values. The q-value of a test measures the proportion
10770 of false positives incurred when that particular test is called significant.
10771 The local FDR measures the posterior probability the null hypothesis is true
10772 given the test's p-value. Various plots are automatically generated, allowing
10773 one to make sensible significance cut-offs. The software can be applied to
10774 problems in genomics, brain imaging, astrophysics, and data mining.")
10775 ;; Any version of the LGPL.
10776 (license license:lgpl3+)))
10777
10778 (define-public r-hdf5array
10779 (package
10780 (name "r-hdf5array")
10781 (version "1.10.1")
10782 (source
10783 (origin
10784 (method url-fetch)
10785 (uri (bioconductor-uri "HDF5Array" version))
10786 (sha256
10787 (base32
10788 "1qwdsygcadl58qj598hfyvs8hp0hqcl9ghnhknahrlhmb7k2bd2d"))))
10789 (properties `((upstream-name . "HDF5Array")))
10790 (build-system r-build-system)
10791 (propagated-inputs
10792 `(("r-biocgenerics" ,r-biocgenerics)
10793 ("r-delayedarray" ,r-delayedarray)
10794 ("r-iranges" ,r-iranges)
10795 ("r-rhdf5" ,r-rhdf5)
10796 ("r-s4vectors" ,r-s4vectors)))
10797 (home-page "https://bioconductor.org/packages/HDF5Array")
10798 (synopsis "HDF5 back end for DelayedArray objects")
10799 (description "This package provides an array-like container for convenient
10800 access and manipulation of HDF5 datasets. It supports delayed operations and
10801 block processing.")
10802 (license license:artistic2.0)))
10803
10804 (define-public r-rhdf5lib
10805 (package
10806 (name "r-rhdf5lib")
10807 (version "1.4.2")
10808 (source
10809 (origin
10810 (method url-fetch)
10811 (uri (bioconductor-uri "Rhdf5lib" version))
10812 (sha256
10813 (base32
10814 "06bxd3wz8lrvh2hzvmjpdv4lvzj5lz9353bw5b3zb98cb8w9r2j5"))
10815 (modules '((guix build utils)))
10816 (snippet
10817 '(begin
10818 ;; Delete bundled binaries
10819 (delete-file-recursively "src/winlib/")
10820 #t))))
10821 (properties `((upstream-name . "Rhdf5lib")))
10822 (build-system r-build-system)
10823 (arguments
10824 `(#:phases
10825 (modify-phases %standard-phases
10826 (add-after 'unpack 'do-not-use-bundled-hdf5
10827 (lambda* (#:key inputs #:allow-other-keys)
10828 (for-each delete-file '("configure" "configure.ac"))
10829 ;; Do not make other packages link with the proprietary libsz.
10830 (substitute* "R/zzz.R"
10831 (("'%s/libhdf5_cpp.a %s/libhdf5.a %s/libsz.a -lz'")
10832 "'%s/libhdf5_cpp.a %s/libhdf5.a %s/libhdf5.a -lz'")
10833 (("'%s/libhdf5.a %s/libsz.a -lz'")
10834 "'%s/libhdf5.a %s/libhdf5.a -lz'"))
10835 (with-directory-excursion "src"
10836 (invoke "tar" "xvf" (assoc-ref inputs "hdf5-source"))
10837 (rename-file (string-append "hdf5-" ,(package-version hdf5-1.10))
10838 "hdf5")
10839 ;; Remove timestamp and host system information to make
10840 ;; the build reproducible.
10841 (substitute* "hdf5/src/libhdf5.settings.in"
10842 (("Configured on: @CONFIG_DATE@")
10843 "Configured on: Guix")
10844 (("Uname information:.*")
10845 "Uname information: Linux\n")
10846 ;; Remove unnecessary store reference.
10847 (("C Compiler:.*")
10848 "C Compiler: GCC\n"))
10849 (rename-file "Makevars.in" "Makevars")
10850 (substitute* "Makevars"
10851 (("HDF5_CXX_LIB=.*")
10852 (string-append "HDF5_CXX_LIB="
10853 (assoc-ref inputs "hdf5") "/lib/libhdf5_cpp.a\n"))
10854 (("HDF5_LIB=.*")
10855 (string-append "HDF5_LIB="
10856 (assoc-ref inputs "hdf5") "/lib/libhdf5.a\n"))
10857 (("HDF5_CXX_INCLUDE=.*") "HDF5_CXX_INCLUDE=./hdf5/c++/src\n")
10858 (("HDF5_INCLUDE=.*") "HDF5_INCLUDE=./hdf5/src\n")
10859 ;; szip is non-free software
10860 (("cp \\$\\{SZIP_LIB\\}.*") "")
10861 (("\\$\\{USER_LIB_DIR\\}libsz.a") "")))
10862 #t)))))
10863 (inputs
10864 `(("zlib" ,zlib)))
10865 (propagated-inputs
10866 `(("hdf5" ,hdf5-1.10)))
10867 (native-inputs
10868 `(("hdf5-source" ,(package-source hdf5-1.10))))
10869 (home-page "https://bioconductor.org/packages/Rhdf5lib")
10870 (synopsis "HDF5 library as an R package")
10871 (description "This package provides C and C++ HDF5 libraries for use in R
10872 packages.")
10873 (license license:artistic2.0)))
10874
10875 (define-public r-beachmat
10876 (package
10877 (name "r-beachmat")
10878 (version "1.4.0")
10879 (source
10880 (origin
10881 (method url-fetch)
10882 (uri (bioconductor-uri "beachmat" version))
10883 (sha256
10884 (base32
10885 "07zgmms0qg8gw7x0js46965bbhpfj2aa1h5ixdz9r332bxv9cdmr"))))
10886 (build-system r-build-system)
10887 (inputs
10888 `(("hdf5" ,hdf5)
10889 ("zlib" ,zlib)))
10890 (propagated-inputs
10891 `(("r-biocgenerics" ,r-biocgenerics)
10892 ("r-delayedarray" ,r-delayedarray)
10893 ("r-hdf5array" ,r-hdf5array)
10894 ("r-rcpp" ,r-rcpp)
10895 ("r-rhdf5" ,r-rhdf5)
10896 ("r-rhdf5lib" ,r-rhdf5lib)))
10897 (home-page "https://bioconductor.org/packages/beachmat")
10898 (synopsis "Compiling Bioconductor to handle each matrix type")
10899 (description "This package provides a consistent C++ class interface for a
10900 variety of commonly used matrix types, including sparse and HDF5-backed
10901 matrices.")
10902 (license license:gpl3)))
10903
10904 (define-public r-singlecellexperiment
10905 (package
10906 (name "r-singlecellexperiment")
10907 (version "1.4.1")
10908 (source
10909 (origin
10910 (method url-fetch)
10911 (uri (bioconductor-uri "SingleCellExperiment" version))
10912 (sha256
10913 (base32
10914 "12139kk9cqgzpm6f3cwdsq31gj5lxamz2q939dy9fa0fa54gdaq4"))))
10915 (properties
10916 `((upstream-name . "SingleCellExperiment")))
10917 (build-system r-build-system)
10918 (propagated-inputs
10919 `(("r-biocgenerics" ,r-biocgenerics)
10920 ("r-s4vectors" ,r-s4vectors)
10921 ("r-summarizedexperiment" ,r-summarizedexperiment)))
10922 (home-page "https://bioconductor.org/packages/SingleCellExperiment")
10923 (synopsis "S4 classes for single cell data")
10924 (description "This package defines an S4 class for storing data from
10925 single-cell experiments. This includes specialized methods to store and
10926 retrieve spike-in information, dimensionality reduction coordinates and size
10927 factors for each cell, along with the usual metadata for genes and
10928 libraries.")
10929 (license license:gpl3)))
10930
10931 (define-public r-scater
10932 (package
10933 (name "r-scater")
10934 (version "1.10.1")
10935 (source (origin
10936 (method url-fetch)
10937 (uri (bioconductor-uri "scater" version))
10938 (sha256
10939 (base32
10940 "0rijhy7g5qmcn927y1wyd63la1fhyar9fv1hccsqd23jd98yc55a"))))
10941 (build-system r-build-system)
10942 (propagated-inputs
10943 `(("r-beachmat" ,r-beachmat)
10944 ("r-biocgenerics" ,r-biocgenerics)
10945 ("r-biocparallel" ,r-biocparallel)
10946 ("r-delayedarray" ,r-delayedarray)
10947 ("r-delayedmatrixstats" ,r-delayedmatrixstats)
10948 ("r-dplyr" ,r-dplyr)
10949 ("r-ggbeeswarm" ,r-ggbeeswarm)
10950 ("r-ggplot2" ,r-ggplot2)
10951 ("r-matrix" ,r-matrix)
10952 ("r-plyr" ,r-plyr)
10953 ("r-rcpp" ,r-rcpp)
10954 ("r-reshape2" ,r-reshape2)
10955 ("r-rhdf5lib" ,r-rhdf5lib)
10956 ("r-s4vectors" ,r-s4vectors)
10957 ("r-singlecellexperiment" ,r-singlecellexperiment)
10958 ("r-summarizedexperiment" ,r-summarizedexperiment)
10959 ("r-viridis" ,r-viridis)))
10960 (home-page "https://github.com/davismcc/scater")
10961 (synopsis "Single-cell analysis toolkit for gene expression data in R")
10962 (description "This package provides a collection of tools for doing
10963 various analyses of single-cell RNA-seq gene expression data, with a focus on
10964 quality control.")
10965 (license license:gpl2+)))
10966
10967 (define-public r-scran
10968 (package
10969 (name "r-scran")
10970 (version "1.10.2")
10971 (source
10972 (origin
10973 (method url-fetch)
10974 (uri (bioconductor-uri "scran" version))
10975 (sha256
10976 (base32
10977 "07mgilr3gq3lnrm1fjm9zhz4w7970bjhsykln1drqy9gkzj5sn7g"))))
10978 (build-system r-build-system)
10979 (propagated-inputs
10980 `(("r-beachmat" ,r-beachmat)
10981 ("r-biocgenerics" ,r-biocgenerics)
10982 ("r-biocneighbors" ,r-biocneighbors)
10983 ("r-biocparallel" ,r-biocparallel)
10984 ("r-delayedarray" ,r-delayedarray)
10985 ("r-delayedmatrixstats" ,r-delayedmatrixstats)
10986 ("r-dynamictreecut" ,r-dynamictreecut)
10987 ("r-edger" ,r-edger)
10988 ("r-igraph" ,r-igraph)
10989 ("r-limma" ,r-limma)
10990 ("r-matrix" ,r-matrix)
10991 ("r-rcpp" ,r-rcpp)
10992 ("r-rhdf5lib" ,r-rhdf5lib)
10993 ("r-s4vectors" ,r-s4vectors)
10994 ("r-scater" ,r-scater)
10995 ("r-singlecellexperiment" ,r-singlecellexperiment)
10996 ("r-statmod" ,r-statmod)
10997 ("r-summarizedexperiment" ,r-summarizedexperiment)))
10998 (home-page "https://bioconductor.org/packages/scran")
10999 (synopsis "Methods for single-cell RNA-Seq data analysis")
11000 (description "This package implements a variety of low-level analyses of
11001 single-cell RNA-seq data. Methods are provided for normalization of
11002 cell-specific biases, assignment of cell cycle phase, and detection of highly
11003 variable and significantly correlated genes.")
11004 (license license:gpl3)))
11005
11006 (define-public r-delayedmatrixstats
11007 (package
11008 (name "r-delayedmatrixstats")
11009 (version "1.4.0")
11010 (source
11011 (origin
11012 (method url-fetch)
11013 (uri (bioconductor-uri "DelayedMatrixStats" version))
11014 (sha256
11015 (base32
11016 "03fk2avl1vyjv2wslczkc82qr0zmp1ra8iimd47pbmnnm839ly4w"))))
11017 (properties
11018 `((upstream-name . "DelayedMatrixStats")))
11019 (build-system r-build-system)
11020 (propagated-inputs
11021 `(("r-biocparallel" ,r-biocparallel)
11022 ("r-delayedarray" ,r-delayedarray)
11023 ("r-hdf5array" ,r-hdf5array)
11024 ("r-iranges" ,r-iranges)
11025 ("r-matrix" ,r-matrix)
11026 ("r-matrixstats" ,r-matrixstats)
11027 ("r-s4vectors" ,r-s4vectors)))
11028 (home-page "https://github.com/PeteHaitch/DelayedMatrixStats")
11029 (synopsis "Functions that apply to rows and columns of DelayedMatrix objects")
11030 (description
11031 "This package provides a port of the @code{matrixStats} API for use with
11032 @code{DelayedMatrix} objects from the @code{DelayedArray} package. It
11033 contains high-performing functions operating on rows and columns of
11034 @code{DelayedMatrix} objects, e.g. @code{colMedians}, @code{rowMedians},
11035 @code{colRanks}, @code{rowRanks}, @code{colSds}, and @code{rowSds}. Functions
11036 are optimized per data type and for subsetted calculations such that both
11037 memory usage and processing time is minimized.")
11038 (license license:expat)))
11039
11040 (define-public r-phangorn
11041 (package
11042 (name "r-phangorn")
11043 (version "2.4.0")
11044 (source
11045 (origin
11046 (method url-fetch)
11047 (uri (cran-uri "phangorn" version))
11048 (sha256
11049 (base32
11050 "0xc8k552nxczy19jr0xjjagrzc8x6lafasgk2c099ls8bc1yml1i"))))
11051 (build-system r-build-system)
11052 (propagated-inputs
11053 `(("r-ape" ,r-ape)
11054 ("r-fastmatch" ,r-fastmatch)
11055 ("r-igraph" ,r-igraph)
11056 ("r-magrittr" ,r-magrittr)
11057 ("r-matrix" ,r-matrix)
11058 ("r-quadprog" ,r-quadprog)
11059 ("r-rcpp" ,r-rcpp)))
11060 (home-page "https://github.com/KlausVigo/phangorn")
11061 (synopsis "Phylogenetic analysis in R")
11062 (description
11063 "Phangorn is a package for phylogenetic analysis in R. It supports
11064 estimation of phylogenetic trees and networks using Maximum Likelihood,
11065 Maximum Parsimony, distance methods and Hadamard conjugation.")
11066 (license license:gpl2+)))
11067
11068 (define-public r-dropbead
11069 (let ((commit "d746c6f3b32110428ea56d6a0001ce52a251c247")
11070 (revision "2"))
11071 (package
11072 (name "r-dropbead")
11073 (version (string-append "0-" revision "." (string-take commit 7)))
11074 (source
11075 (origin
11076 (method git-fetch)
11077 (uri (git-reference
11078 (url "https://github.com/rajewsky-lab/dropbead.git")
11079 (commit commit)))
11080 (file-name (git-file-name name version))
11081 (sha256
11082 (base32
11083 "0sbzma49aiiyw8b0jpr7fnhzys9nsqmp4hy4hdz1gzyg1lhnca26"))))
11084 (build-system r-build-system)
11085 (propagated-inputs
11086 `(("r-ggplot2" ,r-ggplot2)
11087 ("r-rcolorbrewer" ,r-rcolorbrewer)
11088 ("r-gridextra" ,r-gridextra)
11089 ("r-gplots" ,r-gplots)
11090 ("r-plyr" ,r-plyr)))
11091 (home-page "https://github.com/rajewsky-lab/dropbead")
11092 (synopsis "Basic exploration and analysis of Drop-seq data")
11093 (description "This package offers a quick and straight-forward way to
11094 explore and perform basic analysis of single cell sequencing data coming from
11095 droplet sequencing. It has been particularly tailored for Drop-seq.")
11096 (license license:gpl3))))
11097
11098 (define htslib-for-sambamba
11099 (let ((commit "2f3c3ea7b301f9b45737a793c0b2dcf0240e5ee5"))
11100 (package
11101 (inherit htslib)
11102 (name "htslib-for-sambamba")
11103 (version (string-append "1.3.1-1." (string-take commit 9)))
11104 (source
11105 (origin
11106 (method git-fetch)
11107 (uri (git-reference
11108 (url "https://github.com/lomereiter/htslib.git")
11109 (commit commit)))
11110 (file-name (string-append "htslib-" version "-checkout"))
11111 (sha256
11112 (base32
11113 "0g38g8s3npr0gjm9fahlbhiskyfws9l5i0x1ml3rakzj7az5l9c9"))))
11114 (native-inputs
11115 `(("autoconf" ,autoconf)
11116 ("automake" ,automake)
11117 ,@(package-native-inputs htslib))))))
11118
11119 (define-public sambamba
11120 (package
11121 (name "sambamba")
11122 (version "0.6.8")
11123 (source
11124 (origin
11125 (method git-fetch)
11126 (uri (git-reference
11127 (url "https://github.com/lomereiter/sambamba.git")
11128 (commit (string-append "v" version))))
11129 (file-name (string-append name "-" version "-checkout"))
11130 (sha256
11131 (base32
11132 "0k0cz3qcv98p6cq09zlbgnjsggxcqbcmzxg5zikgcgbr2nfq4lry"))))
11133 (build-system gnu-build-system)
11134 (arguments
11135 `(#:tests? #f ; there is no test target
11136 #:parallel-build? #f ; not supported
11137 #:phases
11138 (modify-phases %standard-phases
11139 (delete 'configure)
11140 (add-after 'unpack 'fix-ldc-version
11141 (lambda _
11142 (substitute* "gen_ldc_version_info.py"
11143 (("/usr/bin/env.*") (which "python3")))
11144 (substitute* "Makefile"
11145 ;; We use ldc2 instead of ldmd2 to compile sambamba.
11146 (("\\$\\(shell which ldmd2\\)") (which "ldc2")))
11147 #t))
11148 (add-after 'unpack 'place-biod-and-undead
11149 (lambda* (#:key inputs #:allow-other-keys)
11150 (copy-recursively (assoc-ref inputs "biod") "BioD")
11151 (copy-recursively (assoc-ref inputs "undead") "undeaD")
11152 #t))
11153 (add-after 'unpack 'unbundle-prerequisites
11154 (lambda _
11155 (substitute* "Makefile"
11156 (("htslib/libhts.a lz4/lib/liblz4.a")
11157 "-L-lhts -L-llz4")
11158 ((" lz4-static htslib-static") ""))
11159 #t))
11160 (replace 'install
11161 (lambda* (#:key outputs #:allow-other-keys)
11162 (let* ((out (assoc-ref outputs "out"))
11163 (bin (string-append out "/bin")))
11164 (mkdir-p bin)
11165 (install-file "bin/sambamba" bin)
11166 #t))))))
11167 (native-inputs
11168 `(("ldc" ,ldc)
11169 ("rdmd" ,rdmd)
11170 ("python" ,python-minimal)
11171 ("biod"
11172 ,(let ((commit "4f1a7d2fb7ef3dfe962aa357d672f354ebfbe42e"))
11173 (origin
11174 (method git-fetch)
11175 (uri (git-reference
11176 (url "https://github.com/biod/BioD.git")
11177 (commit commit)))
11178 (file-name (string-append "biod-"
11179 (string-take commit 9)
11180 "-checkout"))
11181 (sha256
11182 (base32
11183 "1k5pdjv1qvi0a3rwd1sfq6zbj37l86i7bf710m4c0y6737lxj426")))))
11184 ("undead"
11185 ,(let ((commit "9be93876982b5f14fcca60832563b3cd767dd84d"))
11186 (origin
11187 (method git-fetch)
11188 (uri (git-reference
11189 (url "https://github.com/biod/undeaD.git")
11190 (commit commit)))
11191 (file-name (string-append "undead-"
11192 (string-take commit 9)
11193 "-checkout"))
11194 (sha256
11195 (base32
11196 "1xfarj0nqlmi5jd1vmcmm7pabzaf9hxyvk6hp0d6jslb5k9r8r3d")))))))
11197 (inputs
11198 `(("lz4" ,lz4)
11199 ("htslib" ,htslib-for-sambamba)))
11200 (home-page "http://lomereiter.github.io/sambamba")
11201 (synopsis "Tools for working with SAM/BAM data")
11202 (description "Sambamba is a high performance modern robust and
11203 fast tool (and library), written in the D programming language, for
11204 working with SAM and BAM files. Current parallelised functionality is
11205 an important subset of samtools functionality, including view, index,
11206 sort, markdup, and depth.")
11207 (license license:gpl2+)))
11208
11209 (define-public ritornello
11210 (package
11211 (name "ritornello")
11212 (version "2.0.1")
11213 (source (origin
11214 (method git-fetch)
11215 (uri (git-reference
11216 (url "https://github.com/KlugerLab/Ritornello.git")
11217 (commit (string-append "v" version))))
11218 (file-name (git-file-name name version))
11219 (sha256
11220 (base32
11221 "1xahvq215qld7x1w8vpa5zbrsj6p9crb9shqa2x89sb0aaxa02jk"))))
11222 (build-system gnu-build-system)
11223 (arguments
11224 `(#:tests? #f ; there are no tests
11225 #:phases
11226 (modify-phases %standard-phases
11227 (add-after 'unpack 'patch-samtools-references
11228 (lambda* (#:key inputs #:allow-other-keys)
11229 (substitute* '("src/SamStream.h"
11230 "src/FLD.cpp")
11231 (("<sam.h>") "<samtools/sam.h>"))
11232 #t))
11233 (delete 'configure)
11234 (replace 'install
11235 (lambda* (#:key inputs outputs #:allow-other-keys)
11236 (let* ((out (assoc-ref outputs "out"))
11237 (bin (string-append out "/bin/")))
11238 (mkdir-p bin)
11239 (install-file "bin/Ritornello" bin)
11240 #t))))))
11241 (inputs
11242 `(("samtools" ,samtools-0.1)
11243 ("fftw" ,fftw)
11244 ("boost" ,boost)
11245 ("zlib" ,zlib)))
11246 (home-page "https://github.com/KlugerLab/Ritornello")
11247 (synopsis "Control-free peak caller for ChIP-seq data")
11248 (description "Ritornello is a ChIP-seq peak calling algorithm based on
11249 signal processing that can accurately call binding events without the need to
11250 do a pair total DNA input or IgG control sample. It has been tested for use
11251 with narrow binding events such as transcription factor ChIP-seq.")
11252 (license license:gpl3+)))
11253
11254 (define-public trim-galore
11255 (package
11256 (name "trim-galore")
11257 (version "0.4.5")
11258 (source
11259 (origin
11260 (method git-fetch)
11261 (uri (git-reference
11262 (url "https://github.com/FelixKrueger/TrimGalore.git")
11263 (commit version)))
11264 (file-name (string-append name "-" version "-checkout"))
11265 (sha256
11266 (base32
11267 "0x5892l48c816pf00wmnz5vq0zq6170d3xc8zrxncd4jcz7h1p71"))))
11268 (build-system gnu-build-system)
11269 (arguments
11270 `(#:tests? #f ; no tests
11271 #:phases
11272 (modify-phases %standard-phases
11273 (delete 'configure)
11274 (delete 'build)
11275 (add-after 'unpack 'hardcode-tool-references
11276 (lambda* (#:key inputs #:allow-other-keys)
11277 (substitute* "trim_galore"
11278 (("\\$path_to_cutadapt = 'cutadapt'")
11279 (string-append "$path_to_cutadapt = '"
11280 (assoc-ref inputs "cutadapt")
11281 "/bin/cutadapt'"))
11282 (("\\| gzip")
11283 (string-append "| "
11284 (assoc-ref inputs "gzip")
11285 "/bin/gzip"))
11286 (("\"gunzip")
11287 (string-append "\""
11288 (assoc-ref inputs "gzip")
11289 "/bin/gunzip")))
11290 #t))
11291 (replace 'install
11292 (lambda* (#:key outputs #:allow-other-keys)
11293 (let ((bin (string-append (assoc-ref outputs "out")
11294 "/bin")))
11295 (mkdir-p bin)
11296 (install-file "trim_galore" bin)
11297 #t))))))
11298 (inputs
11299 `(("gzip" ,gzip)
11300 ("perl" ,perl)
11301 ("cutadapt" ,cutadapt)))
11302 (native-inputs
11303 `(("unzip" ,unzip)))
11304 (home-page "http://www.bioinformatics.babraham.ac.uk/projects/trim_galore/")
11305 (synopsis "Wrapper around Cutadapt and FastQC")
11306 (description "Trim Galore! is a wrapper script to automate quality and
11307 adapter trimming as well as quality control, with some added functionality to
11308 remove biased methylation positions for RRBS sequence files.")
11309 (license license:gpl3+)))
11310
11311 (define-public gess
11312 (package
11313 (name "gess")
11314 (version "1.0")
11315 (source (origin
11316 (method url-fetch)
11317 (uri (string-append "http://compbio.uthscsa.edu/"
11318 "GESS_Web/files/"
11319 "gess-" version ".src.tar.gz"))
11320 (sha256
11321 (base32
11322 "0hyk403kxscclzfs24pvdgiv0wm03kjcziqdrp5w46cb049gz0d7"))))
11323 (build-system gnu-build-system)
11324 (arguments
11325 `(#:tests? #f ; no tests
11326 #:phases
11327 (modify-phases %standard-phases
11328 (delete 'configure)
11329 (delete 'build)
11330 (replace 'install
11331 (lambda* (#:key inputs outputs #:allow-other-keys)
11332 (let* ((python (assoc-ref inputs "python"))
11333 (out (assoc-ref outputs "out"))
11334 (bin (string-append out "/bin/"))
11335 (target (string-append
11336 out "/lib/python"
11337 ,(version-major+minor
11338 (package-version python))
11339 "/site-packages/gess/")))
11340 (mkdir-p target)
11341 (copy-recursively "." target)
11342 ;; Make GESS.py executable
11343 (chmod (string-append target "GESS.py") #o555)
11344 ;; Add Python shebang to the top and make Matplotlib
11345 ;; usable.
11346 (substitute* (string-append target "GESS.py")
11347 (("\"\"\"Description:" line)
11348 (string-append "#!" (which "python") "
11349 import matplotlib
11350 matplotlib.use('Agg')
11351 " line)))
11352 ;; Make sure GESS has all modules in its path
11353 (wrap-program (string-append target "GESS.py")
11354 `("PYTHONPATH" ":" prefix (,target ,(getenv "PYTHONPATH"))))
11355 (mkdir-p bin)
11356 (symlink (string-append target "GESS.py")
11357 (string-append bin "GESS.py"))
11358 #t))))))
11359 (inputs
11360 `(("python" ,python-2)
11361 ("python2-pysam" ,python2-pysam)
11362 ("python2-scipy" ,python2-scipy)
11363 ("python2-numpy" ,python2-numpy)
11364 ("python2-networkx" ,python2-networkx)
11365 ("python2-biopython" ,python2-biopython)))
11366 (home-page "http://compbio.uthscsa.edu/GESS_Web/")
11367 (synopsis "Detect exon-skipping events from raw RNA-seq data")
11368 (description
11369 "GESS is an implementation of a novel computational method to detect de
11370 novo exon-skipping events directly from raw RNA-seq data without the prior
11371 knowledge of gene annotation information. GESS stands for the graph-based
11372 exon-skipping scanner detection scheme.")
11373 (license license:bsd-3)))
11374
11375 (define-public phylip
11376 (package
11377 (name "phylip")
11378 (version "3.696")
11379 (source
11380 (origin
11381 (method url-fetch)
11382 (uri (string-append "http://evolution.gs.washington.edu/phylip/"
11383 "download/phylip-" version ".tar.gz"))
11384 (sha256
11385 (base32
11386 "01jar1rayhr2gba2pgbw49m56rc5z4p5wn3ds0m188hrlln4a2nd"))))
11387 (build-system gnu-build-system)
11388 (arguments
11389 `(#:tests? #f ; no check target
11390 #:make-flags (list "-f" "Makefile.unx" "install")
11391 #:parallel-build? #f ; not supported
11392 #:phases
11393 (modify-phases %standard-phases
11394 (add-after 'unpack 'enter-dir
11395 (lambda _ (chdir "src") #t))
11396 (delete 'configure)
11397 (replace 'install
11398 (lambda* (#:key inputs outputs #:allow-other-keys)
11399 (let ((target (string-append (assoc-ref outputs "out")
11400 "/bin")))
11401 (mkdir-p target)
11402 (for-each (lambda (file)
11403 (install-file file target))
11404 (find-files "../exe" ".*")))
11405 #t)))))
11406 (home-page "http://evolution.genetics.washington.edu/phylip/")
11407 (synopsis "Tools for inferring phylogenies")
11408 (description "PHYLIP (the PHYLogeny Inference Package) is a package of
11409 programs for inferring phylogenies (evolutionary trees).")
11410 (license license:bsd-2)))
11411
11412 (define-public imp
11413 (package
11414 (name "imp")
11415 (version "2.6.2")
11416 (source
11417 (origin
11418 (method url-fetch)
11419 (uri (string-append "https://integrativemodeling.org/"
11420 version "/download/imp-" version ".tar.gz"))
11421 (sha256
11422 (base32
11423 "0lxqx7vh79d771svr611dkilp6sn30qrbw8zvscbrm37v38d2j6h"))))
11424 (build-system cmake-build-system)
11425 (arguments
11426 `(;; FIXME: Some tests fail because they produce warnings, others fail
11427 ;; because the PYTHONPATH does not include the modeller's directory.
11428 #:tests? #f))
11429 (inputs
11430 `(("boost" ,boost)
11431 ("gsl" ,gsl)
11432 ("swig" ,swig)
11433 ("hdf5" ,hdf5)
11434 ("fftw" ,fftw)
11435 ("python" ,python-2)))
11436 (propagated-inputs
11437 `(("python2-numpy" ,python2-numpy)
11438 ("python2-scipy" ,python2-scipy)
11439 ("python2-pandas" ,python2-pandas)
11440 ("python2-scikit-learn" ,python2-scikit-learn)
11441 ("python2-networkx" ,python2-networkx)))
11442 (home-page "https://integrativemodeling.org")
11443 (synopsis "Integrative modeling platform")
11444 (description "IMP's broad goal is to contribute to a comprehensive
11445 structural characterization of biomolecules ranging in size and complexity
11446 from small peptides to large macromolecular assemblies, by integrating data
11447 from diverse biochemical and biophysical experiments. IMP provides a C++ and
11448 Python toolbox for solving complex modeling problems, and a number of
11449 applications for tackling some common problems in a user-friendly way.")
11450 ;; IMP is largely available under the GNU Lesser GPL; see the file
11451 ;; COPYING.LGPL for the full text of this license. Some IMP modules are
11452 ;; available under the GNU GPL (see the file COPYING.GPL).
11453 (license (list license:lgpl2.1+
11454 license:gpl3+))))
11455
11456 (define-public tadbit
11457 (package
11458 (name "tadbit")
11459 (version "0.2.0")
11460 (source (origin
11461 (method git-fetch)
11462 (uri (git-reference
11463 (url "https://github.com/3DGenomes/TADbit.git")
11464 (commit (string-append "v" version))))
11465 (file-name (git-file-name name version))
11466 (sha256
11467 (base32
11468 "07g3aj648prmsvxp9caz5yl41k0y0647vxh0f5p3w8376mfiljd0"))))
11469 (build-system python-build-system)
11470 (arguments
11471 `(;; Tests are included and must be run after installation, but
11472 ;; they are incomplete and thus cannot be run.
11473 #:tests? #f
11474 #:python ,python-2
11475 #:phases
11476 (modify-phases %standard-phases
11477 (add-after 'unpack 'fix-problems-with-setup.py
11478 (lambda* (#:key outputs #:allow-other-keys)
11479 ;; setup.py opens these files for writing
11480 (chmod "_pytadbit/_version.py" #o664)
11481 (chmod "README.rst" #o664)
11482
11483 ;; Don't attempt to install the bash completions to
11484 ;; the home directory.
11485 (rename-file "extras/.bash_completion"
11486 "extras/tadbit")
11487 (substitute* "setup.py"
11488 (("\\(path.expanduser\\('~'\\)")
11489 (string-append "(\""
11490 (assoc-ref outputs "out")
11491 "/etc/bash_completion.d\""))
11492 (("extras/\\.bash_completion")
11493 "extras/tadbit"))
11494 #t)))))
11495 (inputs
11496 ;; TODO: add Chimera for visualization
11497 `(("imp" ,imp)
11498 ("mcl" ,mcl)
11499 ("python2-scipy" ,python2-scipy)
11500 ("python2-numpy" ,python2-numpy)
11501 ("python2-matplotlib" ,python2-matplotlib)
11502 ("python2-pysam" ,python2-pysam)))
11503 (home-page "https://3dgenomes.github.io/TADbit/")
11504 (synopsis "Analyze, model, and explore 3C-based data")
11505 (description
11506 "TADbit is a complete Python library to deal with all steps to analyze,
11507 model, and explore 3C-based data. With TADbit the user can map FASTQ files to
11508 obtain raw interaction binned matrices (Hi-C like matrices), normalize and
11509 correct interaction matrices, identify and compare the so-called
11510 @dfn{Topologically Associating Domains} (TADs), build 3D models from the
11511 interaction matrices, and finally, extract structural properties from the
11512 models. TADbit is complemented by TADkit for visualizing 3D models.")
11513 (license license:gpl3+)))
11514
11515 (define-public kentutils
11516 (package
11517 (name "kentutils")
11518 ;; 302.1.0 is out, but the only difference is the inclusion of
11519 ;; pre-built binaries.
11520 (version "302.0.0")
11521 (source
11522 (origin
11523 (method git-fetch)
11524 (uri (git-reference
11525 (url "https://github.com/ENCODE-DCC/kentUtils.git")
11526 (commit (string-append "v" version))))
11527 (file-name (git-file-name name version))
11528 (sha256
11529 (base32
11530 "0n1wbyjpzii2b9qhyp9r1q76j623cggpg3y8fmw78ld3z4y7ivha"))
11531 (modules '((guix build utils)
11532 (srfi srfi-26)
11533 (ice-9 ftw)))
11534 (snippet
11535 '(begin
11536 ;; Only the contents of the specified directories are free
11537 ;; for all uses, so we remove the rest. "hg/autoSql" and
11538 ;; "hg/autoXml" are nominally free, but they depend on a
11539 ;; library that is built from the sources in "hg/lib",
11540 ;; which is nonfree.
11541 (let ((free (list "." ".."
11542 "utils" "lib" "inc" "tagStorm"
11543 "parasol" "htslib"))
11544 (directory? (lambda (file)
11545 (eq? 'directory (stat:type (stat file))))))
11546 (for-each (lambda (file)
11547 (and (directory? file)
11548 (delete-file-recursively file)))
11549 (map (cut string-append "src/" <>)
11550 (scandir "src"
11551 (lambda (file)
11552 (not (member file free)))))))
11553 ;; Only make the utils target, not the userApps target,
11554 ;; because that requires libraries we won't build.
11555 (substitute* "Makefile"
11556 ((" userApps") " utils"))
11557 ;; Only build libraries that are free.
11558 (substitute* "src/makefile"
11559 (("DIRS =.*") "DIRS =\n")
11560 (("cd jkOwnLib.*") "")
11561 ((" hgLib") "")
11562 (("cd hg.*") ""))
11563 (substitute* "src/utils/makefile"
11564 ;; These tools depend on "jkhgap.a", which is part of the
11565 ;; nonfree "src/hg/lib" directory.
11566 (("raSqlQuery") "")
11567 (("pslLiftSubrangeBlat") "")
11568
11569 ;; Do not build UCSC tools, which may require nonfree
11570 ;; components.
11571 (("ALL_APPS =.*") "ALL_APPS = $(UTILS_APPLIST)\n"))
11572 #t))))
11573 (build-system gnu-build-system)
11574 (arguments
11575 `( ;; There is no global test target and the test target for
11576 ;; individual tools depends on input files that are not
11577 ;; included.
11578 #:tests? #f
11579 #:phases
11580 (modify-phases %standard-phases
11581 (add-after 'unpack 'fix-permissions
11582 (lambda _ (make-file-writable "src/inc/localEnvironment.mk") #t))
11583 (add-after 'unpack 'fix-paths
11584 (lambda _
11585 (substitute* "Makefile"
11586 (("/bin/echo") (which "echo")))
11587 #t))
11588 (add-after 'unpack 'prepare-samtabix
11589 (lambda* (#:key inputs #:allow-other-keys)
11590 (copy-recursively (assoc-ref inputs "samtabix")
11591 "samtabix")
11592 #t))
11593 (delete 'configure)
11594 (replace 'install
11595 (lambda* (#:key outputs #:allow-other-keys)
11596 (let ((bin (string-append (assoc-ref outputs "out")
11597 "/bin")))
11598 (copy-recursively "bin" bin))
11599 #t)))))
11600 (native-inputs
11601 `(("samtabix"
11602 ,(origin
11603 (method git-fetch)
11604 (uri (git-reference
11605 (url "http://genome-source.cse.ucsc.edu/samtabix.git")
11606 (commit "10fd107909c1ac4d679299908be4262a012965ba")))
11607 (sha256
11608 (base32
11609 "0c1nj64l42v395sa84n7az43xiap4i6f9n9dfz4058aqiwkhkmma"))))))
11610 (inputs
11611 `(("zlib" ,zlib)
11612 ("tcsh" ,tcsh)
11613 ("perl" ,perl)
11614 ("libpng" ,libpng)
11615 ("mariadb" ,mariadb)
11616 ("openssl" ,openssl)))
11617 (home-page "http://genome.cse.ucsc.edu/index.html")
11618 (synopsis "Assorted bioinformatics utilities")
11619 (description "This package provides the kentUtils, a selection of
11620 bioinformatics utilities used in combination with the UCSC genome
11621 browser.")
11622 ;; Only a subset of the sources are released under a non-copyleft
11623 ;; free software license. All other sources are removed in a
11624 ;; snippet. See this bug report for an explanation of how the
11625 ;; license statements apply:
11626 ;; https://github.com/ENCODE-DCC/kentUtils/issues/12
11627 (license (license:non-copyleft
11628 "http://genome.ucsc.edu/license/"
11629 "The contents of this package are free for all uses."))))
11630
11631 (define-public f-seq
11632 (let ((commit "6ccded34cff38cf432deed8503648b4a66953f9b")
11633 (revision "1"))
11634 (package
11635 (name "f-seq")
11636 (version (string-append "1.1-" revision "." (string-take commit 7)))
11637 (source (origin
11638 (method git-fetch)
11639 (uri (git-reference
11640 (url "https://github.com/aboyle/F-seq.git")
11641 (commit commit)))
11642 (file-name (string-append name "-" version))
11643 (sha256
11644 (base32
11645 "1nk33k0yajg2id4g59bc4szr58r2q6pdq42vgcw054m8ip9wv26h"))
11646 (modules '((guix build utils)))
11647 ;; Remove bundled Java library archives.
11648 (snippet
11649 '(begin
11650 (for-each delete-file (find-files "lib" ".*"))
11651 #t))))
11652 (build-system ant-build-system)
11653 (arguments
11654 `(#:tests? #f ; no tests included
11655 #:phases
11656 (modify-phases %standard-phases
11657 (replace 'install
11658 (lambda* (#:key inputs outputs #:allow-other-keys)
11659 (let* ((target (assoc-ref outputs "out"))
11660 (bin (string-append target "/bin"))
11661 (doc (string-append target "/share/doc/f-seq"))
11662 (lib (string-append target "/lib")))
11663 (mkdir-p target)
11664 (mkdir-p doc)
11665 (substitute* "bin/linux/fseq"
11666 (("java") (which "java"))
11667 (("\\$REALDIR/../lib/commons-cli-1.1.jar")
11668 (string-append (assoc-ref inputs "java-commons-cli")
11669 "/share/java/commons-cli.jar"))
11670 (("REALDIR=.*")
11671 (string-append "REALDIR=" bin "\n")))
11672 (install-file "README.txt" doc)
11673 (install-file "bin/linux/fseq" bin)
11674 (install-file "build~/fseq.jar" lib)
11675 (copy-recursively "lib" lib)
11676 #t))))))
11677 (inputs
11678 `(("perl" ,perl)
11679 ("java-commons-cli" ,java-commons-cli)))
11680 (home-page "http://fureylab.web.unc.edu/software/fseq/")
11681 (synopsis "Feature density estimator for high-throughput sequence tags")
11682 (description
11683 "F-Seq is a software package that generates a continuous tag sequence
11684 density estimation allowing identification of biologically meaningful sites
11685 such as transcription factor binding sites (ChIP-seq) or regions of open
11686 chromatin (DNase-seq). Output can be displayed directly in the UCSC Genome
11687 Browser.")
11688 (license license:gpl3+))))
11689
11690 (define-public bismark
11691 (package
11692 (name "bismark")
11693 (version "0.19.1")
11694 (source
11695 (origin
11696 (method git-fetch)
11697 (uri (git-reference
11698 (url "https://github.com/FelixKrueger/Bismark.git")
11699 (commit version)))
11700 (file-name (string-append name "-" version "-checkout"))
11701 (sha256
11702 (base32
11703 "0yb5l36slwg02fp4b1jdlplgljcsxgqfzvzihzdnphd87dghcc84"))
11704 (snippet
11705 '(begin
11706 ;; highcharts.js is non-free software. The code is available under
11707 ;; CC-BY-NC or proprietary licenses only.
11708 (delete-file "bismark_sitrep/highcharts.js")
11709 #t))))
11710 (build-system perl-build-system)
11711 (arguments
11712 `(#:tests? #f ; there are no tests
11713 #:phases
11714 (modify-phases %standard-phases
11715 (delete 'configure)
11716 (delete 'build)
11717 (replace 'install
11718 (lambda* (#:key inputs outputs #:allow-other-keys)
11719 (let* ((out (assoc-ref outputs "out"))
11720 (bin (string-append out "/bin"))
11721 (share (string-append out "/share/bismark"))
11722 (docdir (string-append out "/share/doc/bismark"))
11723 (docs '("Docs/Bismark_User_Guide.html"))
11724 (scripts '("bismark"
11725 "bismark_genome_preparation"
11726 "bismark_methylation_extractor"
11727 "bismark2bedGraph"
11728 "bismark2report"
11729 "coverage2cytosine"
11730 "deduplicate_bismark"
11731 "filter_non_conversion"
11732 "bam2nuc"
11733 "bismark2summary")))
11734 (substitute* "bismark2report"
11735 (("\\$RealBin/bismark_sitrep")
11736 (string-append share "/bismark_sitrep")))
11737 (mkdir-p share)
11738 (mkdir-p docdir)
11739 (mkdir-p bin)
11740 (for-each (lambda (file) (install-file file bin))
11741 scripts)
11742 (for-each (lambda (file) (install-file file docdir))
11743 docs)
11744 (copy-recursively "Docs/Images" (string-append docdir "/Images"))
11745 (copy-recursively "bismark_sitrep"
11746 (string-append share "/bismark_sitrep"))
11747
11748 ;; Fix references to gunzip
11749 (substitute* (map (lambda (file)
11750 (string-append bin "/" file))
11751 scripts)
11752 (("\"gunzip -c")
11753 (string-append "\"" (assoc-ref inputs "gzip")
11754 "/bin/gunzip -c")))
11755 #t))))))
11756 (inputs
11757 `(("gzip" ,gzip)))
11758 (home-page "http://www.bioinformatics.babraham.ac.uk/projects/bismark/")
11759 (synopsis "Map bisulfite treated sequence reads and analyze methylation")
11760 (description "Bismark is a program to map bisulfite treated sequencing
11761 reads to a genome of interest and perform methylation calls in a single step.
11762 The output can be easily imported into a genome viewer, such as SeqMonk, and
11763 enables a researcher to analyse the methylation levels of their samples
11764 straight away. Its main features are:
11765
11766 @itemize
11767 @item Bisulfite mapping and methylation calling in one single step
11768 @item Supports single-end and paired-end read alignments
11769 @item Supports ungapped and gapped alignments
11770 @item Alignment seed length, number of mismatches etc are adjustable
11771 @item Output discriminates between cytosine methylation in CpG, CHG
11772 and CHH context
11773 @end itemize\n")
11774 (license license:gpl3+)))
11775
11776 (define-public paml
11777 (package
11778 (name "paml")
11779 (version "4.9e")
11780 (source (origin
11781 (method url-fetch)
11782 (uri (string-append "http://abacus.gene.ucl.ac.uk/software/"
11783 "paml" version ".tgz"))
11784 (sha256
11785 (base32
11786 "13zf6h9fiqghwhch2h06x1zdr6s42plsnqahflp5g7myr3han3s6"))
11787 (modules '((guix build utils)))
11788 ;; Remove Windows binaries
11789 (snippet
11790 '(begin
11791 (for-each delete-file (find-files "." "\\.exe$"))
11792 #t))))
11793 (build-system gnu-build-system)
11794 (arguments
11795 `(#:tests? #f ; there are no tests
11796 #:make-flags '("CC=gcc")
11797 #:phases
11798 (modify-phases %standard-phases
11799 (replace 'configure
11800 (lambda _
11801 (substitute* "src/BFdriver.c"
11802 (("/bin/bash") (which "bash")))
11803 (chdir "src")
11804 #t))
11805 (replace 'install
11806 (lambda* (#:key outputs #:allow-other-keys)
11807 (let ((tools '("baseml" "basemlg" "codeml"
11808 "pamp" "evolver" "yn00" "chi2"))
11809 (bin (string-append (assoc-ref outputs "out") "/bin"))
11810 (docdir (string-append (assoc-ref outputs "out")
11811 "/share/doc/paml")))
11812 (mkdir-p bin)
11813 (for-each (lambda (file) (install-file file bin)) tools)
11814 (copy-recursively "../doc" docdir)
11815 #t))))))
11816 (home-page "http://abacus.gene.ucl.ac.uk/software/paml.html")
11817 (synopsis "Phylogentic analysis by maximum likelihood")
11818 (description "PAML (for Phylogentic Analysis by Maximum Likelihood)
11819 contains a few programs for model fitting and phylogenetic tree reconstruction
11820 using nucleotide or amino-acid sequence data.")
11821 ;; GPLv3 only
11822 (license license:gpl3)))
11823
11824 (define-public kallisto
11825 (package
11826 (name "kallisto")
11827 (version "0.44.0")
11828 (source (origin
11829 (method git-fetch)
11830 (uri (git-reference
11831 (url "https://github.com/pachterlab/kallisto.git")
11832 (commit (string-append "v" version))))
11833 (file-name (git-file-name name version))
11834 (sha256
11835 (base32
11836 "0nj382jiywqnpgvyhichajpkkh5r0bapn43f4dx40zdaq5v4m40m"))))
11837 (build-system cmake-build-system)
11838 (arguments
11839 `(#:tests? #f ; no "check" target
11840 #:phases
11841 (modify-phases %standard-phases
11842 (add-after 'unpack 'do-not-use-bundled-htslib
11843 (lambda _
11844 (substitute* "CMakeLists.txt"
11845 (("^ExternalProject_Add" m)
11846 (string-append "if (NEVER)\n" m))
11847 (("^\\)")
11848 (string-append ")\nendif(NEVER)"))
11849 (("include_directories\\(\\$\\{htslib_PREFIX.*" m)
11850 (string-append "# " m)))
11851 (substitute* "src/CMakeLists.txt"
11852 (("target_link_libraries\\(kallisto kallisto_core pthread \
11853 \\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/../ext/htslib/libhts.a\\)")
11854 "target_link_libraries(kallisto kallisto_core pthread hts)")
11855 (("include_directories\\(\\.\\./ext/htslib\\)") ""))
11856 #t)))))
11857 (inputs
11858 `(("hdf5" ,hdf5)
11859 ("htslib" ,htslib)
11860 ("zlib" ,zlib)))
11861 (home-page "http://pachterlab.github.io/kallisto/")
11862 (synopsis "Near-optimal RNA-Seq quantification")
11863 (description
11864 "Kallisto is a program for quantifying abundances of transcripts from
11865 RNA-Seq data, or more generally of target sequences using high-throughput
11866 sequencing reads. It is based on the novel idea of pseudoalignment for
11867 rapidly determining the compatibility of reads with targets, without the need
11868 for alignment. Pseudoalignment of reads preserves the key information needed
11869 for quantification, and kallisto is therefore not only fast, but also as
11870 accurate as existing quantification tools.")
11871 (license license:bsd-2)))
11872
11873 (define-public libgff
11874 (package
11875 (name "libgff")
11876 (version "1.0")
11877 (source (origin
11878 (method git-fetch)
11879 (uri (git-reference
11880 (url "https://github.com/Kingsford-Group/libgff.git")
11881 (commit (string-append "v" version))))
11882 (file-name (git-file-name name version))
11883 (sha256
11884 (base32
11885 "0n6vfjnq7a2mianipscbshrvbncss8z4zkgkbjw754p9043nfkps"))))
11886 (build-system cmake-build-system)
11887 (arguments `(#:tests? #f)) ; no tests included
11888 (home-page "https://github.com/Kingsford-Group/libgff")
11889 (synopsis "Parser library for reading/writing GFF files")
11890 (description "This is a simple \"libraryfication\" of the GFF/GTF parsing
11891 code that is used in the Cufflinks codebase. The goal of this library is to
11892 provide this functionality without the necessity of drawing in a heavy-weight
11893 dependency like SeqAn.")
11894 (license (license:x11-style "https://www.boost.org/LICENSE_1_0.txt"))))
11895
11896 (define-public libdivsufsort
11897 (package
11898 (name "libdivsufsort")
11899 (version "2.0.1")
11900 (source (origin
11901 (method git-fetch)
11902 (uri (git-reference
11903 (url "https://github.com/y-256/libdivsufsort.git")
11904 (commit version)))
11905 (file-name (git-file-name name version))
11906 (sha256
11907 (base32
11908 "0fgdz9fzihlvjjrxy01md1bv9vh12rkgkwbm90b1hj5xpbaqp7z2"))))
11909 (build-system cmake-build-system)
11910 (arguments
11911 '(#:tests? #f ; there are no tests
11912 #:configure-flags
11913 ;; Needed for rapmap and sailfish.
11914 '("-DBUILD_DIVSUFSORT64=ON")))
11915 (home-page "https://github.com/y-256/libdivsufsort")
11916 (synopsis "Lightweight suffix-sorting library")
11917 (description "libdivsufsort is a software library that implements a
11918 lightweight suffix array construction algorithm. This library provides a
11919 simple and an efficient C API to construct a suffix array and a
11920 Burrows-Wheeler transformed string from a given string over a constant-size
11921 alphabet. The algorithm runs in O(n log n) worst-case time using only 5n+O(1)
11922 bytes of memory space, where n is the length of the string.")
11923 (license license:expat)))
11924
11925 (define-public sailfish
11926 (package
11927 (name "sailfish")
11928 (version "0.10.1")
11929 (source (origin
11930 (method git-fetch)
11931 (uri (git-reference
11932 (url "https://github.com/kingsfordgroup/sailfish.git")
11933 (commit (string-append "v" version))))
11934 (file-name (git-file-name name version))
11935 (sha256
11936 (base32
11937 "1amcc5hqvsl42hg4x19bi9vy47cl874s0lw1fmi0hwsdk9i8c03v"))
11938 (modules '((guix build utils)))
11939 (snippet
11940 '(begin
11941 ;; Delete bundled headers for eigen3.
11942 (delete-file-recursively "include/eigen3/")
11943 #t))))
11944 (build-system cmake-build-system)
11945 (arguments
11946 `(#:configure-flags
11947 (list (string-append "-DBOOST_INCLUDEDIR="
11948 (assoc-ref %build-inputs "boost")
11949 "/include/")
11950 (string-append "-DBOOST_LIBRARYDIR="
11951 (assoc-ref %build-inputs "boost")
11952 "/lib/")
11953 (string-append "-DBoost_LIBRARIES="
11954 "-lboost_iostreams "
11955 "-lboost_filesystem "
11956 "-lboost_system "
11957 "-lboost_thread "
11958 "-lboost_timer "
11959 "-lboost_chrono "
11960 "-lboost_program_options")
11961 "-DBoost_FOUND=TRUE"
11962 ;; Don't download RapMap---we already have it!
11963 "-DFETCHED_RAPMAP=1")
11964 ;; Tests must be run after installation and the location of the test
11965 ;; data file must be overridden. But the tests fail. It looks like
11966 ;; they are not really meant to be run.
11967 #:tests? #f
11968 #:phases
11969 (modify-phases %standard-phases
11970 ;; Boost cannot be found, even though it's right there.
11971 (add-after 'unpack 'do-not-look-for-boost
11972 (lambda* (#:key inputs #:allow-other-keys)
11973 (substitute* "CMakeLists.txt"
11974 (("find_package\\(Boost 1\\.53\\.0") "#"))
11975 #t))
11976 (add-after 'unpack 'do-not-assign-to-macro
11977 (lambda _
11978 (substitute* "include/spdlog/details/format.cc"
11979 (("const unsigned CHAR_WIDTH = 1;") ""))
11980 #t))
11981 (add-after 'unpack 'prepare-rapmap
11982 (lambda* (#:key inputs #:allow-other-keys)
11983 (let ((src "external/install/src/rapmap/")
11984 (include "external/install/include/rapmap/")
11985 (rapmap (assoc-ref inputs "rapmap")))
11986 (mkdir-p "/tmp/rapmap")
11987 (invoke "tar" "xf"
11988 (assoc-ref inputs "rapmap")
11989 "-C" "/tmp/rapmap"
11990 "--strip-components=1")
11991 (mkdir-p src)
11992 (mkdir-p include)
11993 (for-each (lambda (file)
11994 (install-file file src))
11995 (find-files "/tmp/rapmap/src" "\\.(c|cpp)"))
11996 (copy-recursively "/tmp/rapmap/include" include))
11997 #t))
11998 (add-after 'unpack 'use-system-libraries
11999 (lambda* (#:key inputs #:allow-other-keys)
12000 (substitute* '("src/SailfishIndexer.cpp"
12001 "src/SailfishUtils.cpp"
12002 "src/SailfishQuantify.cpp"
12003 "src/FASTAParser.cpp"
12004 "include/PCA.hpp"
12005 "include/SailfishUtils.hpp"
12006 "include/SailfishIndex.hpp"
12007 "include/CollapsedEMOptimizer.hpp"
12008 "src/CollapsedEMOptimizer.cpp")
12009 (("#include \"jellyfish/config.h\"") ""))
12010 (substitute* "src/CMakeLists.txt"
12011 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/include/jellyfish-2.2..")
12012 (string-append (assoc-ref inputs "jellyfish")
12013 "/include/jellyfish-" ,(package-version jellyfish)))
12014 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libjellyfish-2.0.a")
12015 (string-append (assoc-ref inputs "jellyfish")
12016 "/lib/libjellyfish-2.0.a"))
12017 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort.a")
12018 (string-append (assoc-ref inputs "libdivsufsort")
12019 "/lib/libdivsufsort.so"))
12020 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort64.a")
12021 (string-append (assoc-ref inputs "libdivsufsort")
12022 "/lib/libdivsufsort64.so")))
12023 (substitute* "CMakeLists.txt"
12024 ;; Don't prefer static libs
12025 (("SET\\(CMAKE_FIND_LIBRARY_SUFFIXES.*") "")
12026 (("find_package\\(Jellyfish.*") "")
12027 (("ExternalProject_Add\\(libjellyfish") "message(")
12028 (("ExternalProject_Add\\(libgff") "message(")
12029 (("ExternalProject_Add\\(libsparsehash") "message(")
12030 (("ExternalProject_Add\\(libdivsufsort") "message("))
12031
12032 ;; Ensure that Eigen headers can be found
12033 (setenv "CPLUS_INCLUDE_PATH"
12034 (string-append (getenv "CPLUS_INCLUDE_PATH")
12035 ":"
12036 (assoc-ref inputs "eigen")
12037 "/include/eigen3"))
12038 #t)))))
12039 (inputs
12040 `(("boost" ,boost)
12041 ("eigen" ,eigen)
12042 ("jemalloc" ,jemalloc)
12043 ("jellyfish" ,jellyfish)
12044 ("sparsehash" ,sparsehash)
12045 ("rapmap" ,(origin
12046 (method git-fetch)
12047 (uri (git-reference
12048 (url "https://github.com/COMBINE-lab/RapMap.git")
12049 (commit (string-append "sf-v" version))))
12050 (file-name (string-append "rapmap-sf-v" version "-checkout"))
12051 (sha256
12052 (base32
12053 "1hv79l5i576ykv5a1srj2p0q36yvyl5966m0fcy2lbi169ipjakf"))
12054 (modules '((guix build utils)))
12055 ;; These files are expected to be excluded.
12056 (snippet
12057 '(begin (delete-file-recursively "include/spdlog")
12058 (for-each delete-file '("include/xxhash.h"
12059 "src/xxhash.c"))
12060 #t))))
12061 ("libdivsufsort" ,libdivsufsort)
12062 ("libgff" ,libgff)
12063 ("tbb" ,tbb)
12064 ("zlib" ,zlib)))
12065 (native-inputs
12066 `(("pkg-config" ,pkg-config)))
12067 (home-page "http://www.cs.cmu.edu/~ckingsf/software/sailfish")
12068 (synopsis "Mapping-based isoform quantification from RNA-Seq reads")
12069 (description "Sailfish is a tool for genomic transcript quantification
12070 from RNA-seq data. It requires a set of target transcripts (either from a
12071 reference or de-novo assembly) to quantify. All you need to run sailfish is a
12072 fasta file containing your reference transcripts and a (set of) fasta/fastq
12073 file(s) containing your reads.")
12074 (license license:gpl3+)))
12075
12076 (define libstadenio-for-salmon
12077 (package
12078 (name "libstadenio")
12079 (version "1.14.8")
12080 (source (origin
12081 (method git-fetch)
12082 (uri (git-reference
12083 (url "https://github.com/COMBINE-lab/staden-io_lib.git")
12084 (commit (string-append "v" version))))
12085 (file-name (string-append name "-" version "-checkout"))
12086 (sha256
12087 (base32
12088 "1x8kxxqxl892vwfbprlbyfwkkv7c34ggkc94892x9x0g37x5nbwx"))))
12089 (build-system gnu-build-system)
12090 (arguments '(#:parallel-tests? #f)) ; not supported
12091 (inputs
12092 `(("zlib" ,zlib)))
12093 (native-inputs
12094 `(("perl" ,perl))) ; for tests
12095 (home-page "https://github.com/COMBINE-lab/staden-io_lib")
12096 (synopsis "General purpose trace and experiment file library")
12097 (description "This package provides a library of file reading and writing
12098 code to provide a general purpose Trace file (and Experiment File) reading
12099 interface.
12100
12101 The following file formats are supported:
12102
12103 @enumerate
12104 @item SCF trace files
12105 @item ABI trace files
12106 @item ALF trace files
12107 @item ZTR trace files
12108 @item SFF trace archives
12109 @item SRF trace archives
12110 @item Experiment files
12111 @item Plain text files
12112 @item SAM/BAM sequence files
12113 @item CRAM sequence files
12114 @end enumerate\n")
12115 (license license:bsd-3)))
12116
12117 (define spdlog-for-salmon
12118 (package
12119 (name "spdlog")
12120 (version "0.14.0")
12121 (source (origin
12122 (method git-fetch)
12123 (uri (git-reference
12124 (url "https://github.com/COMBINE-lab/spdlog.git")
12125 (commit (string-append "v" version))))
12126 (file-name (string-append name "-" version "-checkout"))
12127 (sha256
12128 (base32
12129 "13730429gwlabi432ilpnja3sfvy0nn2719vnhhmii34xcdyc57q"))))
12130 (build-system cmake-build-system)
12131 (home-page "https://github.com/COMBINE-lab/spdlog")
12132 (synopsis "Very fast C++ logging library")
12133 (description "Spdlog is a very fast header-only C++ logging library with
12134 performance as its primary goal.")
12135 (license license:expat)))
12136
12137 ;; This is a modified variant of bwa for use with Salmon. It installs a
12138 ;; library to avoid having to build this as part of Salmon.
12139 (define bwa-for-salmon
12140 (package (inherit bwa)
12141 (name "bwa")
12142 (version "0.7.12.5")
12143 (source (origin
12144 (method git-fetch)
12145 (uri (git-reference
12146 (url "https://github.com/COMBINE-lab/bwa.git")
12147 (commit (string-append "v" version))))
12148 (file-name (string-append "bwa-for-salmon-" version "-checkout"))
12149 (sha256
12150 (base32
12151 "1z2qa64y0c5hky10510x137mnzlhz6k8qf27csw4w9j6qihq95gb"))))
12152 (build-system gnu-build-system)
12153 (arguments
12154 '(#:tests? #f ;no "check" target
12155 #:phases
12156 (modify-phases %standard-phases
12157 (replace 'install
12158 (lambda* (#:key outputs #:allow-other-keys)
12159 (let* ((out (assoc-ref outputs "out"))
12160 (bin (string-append out "/bin"))
12161 (lib (string-append out "/lib"))
12162 (doc (string-append out "/share/doc/bwa"))
12163 (man (string-append out "/share/man/man1"))
12164 (inc (string-append out "/include/bwa")))
12165 (install-file "bwa" bin)
12166 (install-file "README.md" doc)
12167 (install-file "bwa.1" man)
12168 (install-file "libbwa.a" lib)
12169 (mkdir-p lib)
12170 (mkdir-p inc)
12171 (for-each (lambda (file)
12172 (install-file file inc))
12173 (find-files "." "\\.h$")))
12174 #t))
12175 ;; no "configure" script
12176 (delete 'configure))))))
12177
12178 (define-public salmon
12179 (package
12180 (name "salmon")
12181 (version "0.9.1")
12182 (source (origin
12183 (method git-fetch)
12184 (uri (git-reference
12185 (url "https://github.com/COMBINE-lab/salmon.git")
12186 (commit (string-append "v" version))))
12187 (file-name (string-append name "-" version "-checkout"))
12188 (sha256
12189 (base32
12190 "1zi1ff4i7y2ykk0vdzysgwzzzv166vg2x77pj1mf4baclavxj87a"))
12191 (modules '((guix build utils)))
12192 (snippet
12193 '(begin
12194 ;; Delete bundled headers for eigen3.
12195 (delete-file-recursively "include/eigen3/")
12196 #t))))
12197 (build-system cmake-build-system)
12198 (arguments
12199 `(#:configure-flags
12200 (list (string-append "-DBOOST_INCLUDEDIR="
12201 (assoc-ref %build-inputs "boost")
12202 "/include/")
12203 (string-append "-DBOOST_LIBRARYDIR="
12204 (assoc-ref %build-inputs "boost")
12205 "/lib/")
12206 (string-append "-DBoost_LIBRARIES="
12207 "-lboost_iostreams "
12208 "-lboost_filesystem "
12209 "-lboost_system "
12210 "-lboost_thread "
12211 "-lboost_timer "
12212 "-lboost_chrono "
12213 "-lboost_program_options")
12214 "-DBoost_FOUND=TRUE"
12215 "-DTBB_LIBRARIES=tbb tbbmalloc"
12216 ;; Don't download RapMap---we already have it!
12217 "-DFETCHED_RAPMAP=1")
12218 #:phases
12219 (modify-phases %standard-phases
12220 ;; Boost cannot be found, even though it's right there.
12221 (add-after 'unpack 'do-not-look-for-boost
12222 (lambda* (#:key inputs #:allow-other-keys)
12223 (substitute* "CMakeLists.txt"
12224 (("find_package\\(Boost 1\\.53\\.0") "#"))
12225 #t))
12226 (add-after 'unpack 'do-not-phone-home
12227 (lambda _
12228 (substitute* "src/Salmon.cpp"
12229 (("getVersionMessage\\(\\)") "\"\""))
12230 #t))
12231 (add-after 'unpack 'prepare-rapmap
12232 (lambda* (#:key inputs #:allow-other-keys)
12233 (let ((src "external/install/src/rapmap/")
12234 (include "external/install/include/rapmap/")
12235 (rapmap (assoc-ref inputs "rapmap")))
12236 (mkdir-p src)
12237 (mkdir-p include)
12238 (for-each (lambda (file)
12239 (install-file file src))
12240 (find-files (string-append rapmap "/src") "\\.(c|cpp)"))
12241 (copy-recursively (string-append rapmap "/include") include)
12242 (for-each delete-file '("external/install/include/rapmap/xxhash.h"
12243 "external/install/include/rapmap/FastxParser.hpp"
12244 "external/install/include/rapmap/concurrentqueue.h"
12245 "external/install/include/rapmap/FastxParserThreadUtils.hpp"
12246 "external/install/src/rapmap/FastxParser.cpp"
12247 "external/install/src/rapmap/xxhash.c")))
12248 #t))
12249 (add-after 'unpack 'use-system-libraries
12250 (lambda* (#:key inputs #:allow-other-keys)
12251 (substitute* "src/CMakeLists.txt"
12252 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/include/jellyfish-2.2..")
12253 (string-append (assoc-ref inputs "jellyfish")
12254 "/include/jellyfish-" ,(package-version jellyfish)))
12255 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libjellyfish-2.0.a")
12256 (string-append (assoc-ref inputs "jellyfish")
12257 "/lib/libjellyfish-2.0.a"))
12258 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort.a")
12259 (string-append (assoc-ref inputs "libdivsufsort")
12260 "/lib/libdivsufsort.so"))
12261 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libstaden-read.a")
12262 (string-append (assoc-ref inputs "libstadenio-for-salmon")
12263 "/lib/libstaden-read.a"))
12264 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libbwa.a")
12265 (string-append (assoc-ref inputs "bwa") "/lib/libbwa.a"))
12266 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort64.a")
12267 (string-append (assoc-ref inputs "libdivsufsort")
12268 "/lib/libdivsufsort64.so")))
12269 (substitute* "CMakeLists.txt"
12270 ;; Don't prefer static libs
12271 (("SET\\(CMAKE_FIND_LIBRARY_SUFFIXES.*") "")
12272 (("set\\(TBB_LIBRARIES") "message(")
12273 (("find_package\\(Jellyfish.*") "")
12274 (("ExternalProject_Add\\(libcereal") "message(")
12275 (("ExternalProject_Add\\(libbwa") "message(")
12276 (("ExternalProject_Add\\(libjellyfish") "message(")
12277 (("ExternalProject_Add\\(libgff") "message(")
12278 (("ExternalProject_Add\\(libtbb") "message(")
12279 (("ExternalProject_Add\\(libspdlog") "message(")
12280 (("ExternalProject_Add\\(libdivsufsort") "message(")
12281 (("ExternalProject_Add\\(libstadenio") "message(")
12282 (("ExternalProject_Add_Step\\(") "message("))
12283
12284 ;; Ensure that all headers can be found
12285 (setenv "CPLUS_INCLUDE_PATH"
12286 (string-append (getenv "CPLUS_INCLUDE_PATH")
12287 ":"
12288 (assoc-ref inputs "bwa")
12289 "/include/bwa"
12290 ":"
12291 (assoc-ref inputs "eigen")
12292 "/include/eigen3"))
12293 (setenv "CPATH"
12294 (string-append (assoc-ref inputs "bwa")
12295 "/include/bwa"
12296 ":"
12297 (assoc-ref inputs "eigen")
12298 "/include/eigen3"))
12299 #t))
12300 ;; CMAKE_INSTALL_PREFIX does not exist when the tests are
12301 ;; run. It only exists after the install phase.
12302 (add-after 'unpack 'fix-tests
12303 (lambda _
12304 (substitute* "src/CMakeLists.txt"
12305 (("DTOPLEVEL_DIR=\\$\\{CMAKE_INSTALL_PREFIX")
12306 "DTOPLEVEL_DIR=${GAT_SOURCE_DIR"))
12307 #t)))))
12308 (inputs
12309 `(("boost" ,boost)
12310 ("bwa" ,bwa-for-salmon)
12311 ("bzip2" ,bzip2)
12312 ("cereal" ,cereal)
12313 ("eigen" ,eigen)
12314 ("rapmap" ,(origin
12315 (method git-fetch)
12316 (uri (git-reference
12317 (url "https://github.com/COMBINE-lab/RapMap.git")
12318 (commit (string-append "salmon-v" version))))
12319 (file-name (string-append "rapmap-salmon-v" version "-checkout"))
12320 (sha256
12321 (base32
12322 "1yc12yqsz6f0r8sg1qnk57xg34aqwc9jbqq6gd5ys28xw3plj98p"))))
12323 ("jemalloc" ,jemalloc)
12324 ("jellyfish" ,jellyfish)
12325 ("libgff" ,libgff)
12326 ("tbb" ,tbb)
12327 ("libdivsufsort" ,libdivsufsort)
12328 ("libstadenio-for-salmon" ,libstadenio-for-salmon)
12329 ("spdlog-for-salmon" ,spdlog-for-salmon)
12330 ("xz" ,xz)
12331 ("zlib" ,zlib)))
12332 (home-page "https://github.com/COMBINE-lab/salmon")
12333 (synopsis "Quantification from RNA-seq reads using lightweight alignments")
12334 (description "Salmon is a program to produce highly-accurate,
12335 transcript-level quantification estimates from RNA-seq data. Salmon achieves
12336 its accuracy and speed via a number of different innovations, including the
12337 use of lightweight alignments (accurate but fast-to-compute proxies for
12338 traditional read alignments) and massively-parallel stochastic collapsed
12339 variational inference.")
12340 (license license:gpl3+)))
12341
12342 (define-public python-loompy
12343 (package
12344 (name "python-loompy")
12345 (version "2.0.2")
12346 (source
12347 (origin
12348 (method url-fetch)
12349 (uri (pypi-uri "loompy" version))
12350 (sha256
12351 (base32
12352 "1drgv8j1hxqzzpnfg272x9djb6j8qr798w1pc2x8ikmfgyd9gh51"))))
12353 (build-system python-build-system)
12354 ;; There are no tests
12355 (arguments '(#:tests? #f))
12356 (propagated-inputs
12357 `(("python-h5py" ,python-h5py)
12358 ("python-numpy" ,python-numpy)
12359 ("python-scipy" ,python-scipy)))
12360 (home-page "https://github.com/linnarsson-lab/loompy")
12361 (synopsis "Work with .loom files for single-cell RNA-seq data")
12362 (description "The loom file format is an efficient format for very large
12363 omics datasets, consisting of a main matrix, optional additional layers, a
12364 variable number of row and column annotations. Loom also supports sparse
12365 graphs. This library makes it easy to work with @file{.loom} files for
12366 single-cell RNA-seq data.")
12367 (license license:bsd-3)))
12368
12369 ;; We cannot use the latest commit because it requires Java 9.
12370 (define-public java-forester
12371 (let ((commit "86b07efe302d5094b42deed9260f719a4c4ac2e6")
12372 (revision "1"))
12373 (package
12374 (name "java-forester")
12375 (version (string-append "0-" revision "." (string-take commit 7)))
12376 (source (origin
12377 (method git-fetch)
12378 (uri (git-reference
12379 (url "https://github.com/cmzmasek/forester.git")
12380 (commit commit)))
12381 (file-name (string-append name "-" version "-checkout"))
12382 (sha256
12383 (base32
12384 "0vxavc1yrf84yrnf20dq26hi0lglidk8d382xrxsy4qmlbjd276z"))
12385 (modules '((guix build utils)))
12386 (snippet
12387 '(begin
12388 ;; Delete bundled jars and pre-built classes
12389 (delete-file-recursively "forester/java/resources")
12390 (delete-file-recursively "forester/java/classes")
12391 (for-each delete-file (find-files "forester/java/" "\\.jar$"))
12392 ;; Delete bundled applications
12393 (delete-file-recursively "forester_applications")
12394 #t))))
12395 (build-system ant-build-system)
12396 (arguments
12397 `(#:tests? #f ; there are none
12398 #:jdk ,icedtea-8
12399 #:modules ((guix build ant-build-system)
12400 (guix build utils)
12401 (guix build java-utils)
12402 (sxml simple)
12403 (sxml transform))
12404 #:phases
12405 (modify-phases %standard-phases
12406 (add-after 'unpack 'chdir
12407 (lambda _ (chdir "forester/java") #t))
12408 (add-after 'chdir 'fix-dependencies
12409 (lambda _
12410 (chmod "build.xml" #o664)
12411 (call-with-output-file "build.xml.new"
12412 (lambda (port)
12413 (sxml->xml
12414 (pre-post-order
12415 (with-input-from-file "build.xml"
12416 (lambda _ (xml->sxml #:trim-whitespace? #t)))
12417 `(;; Remove all unjar tags to avoid repacking classes.
12418 (unjar . ,(lambda _ '()))
12419 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
12420 (*text* . ,(lambda (_ txt) txt))))
12421 port)))
12422 (rename-file "build.xml.new" "build.xml")
12423 #t))
12424 ;; FIXME: itext is difficult to package as it depends on a few
12425 ;; unpackaged libraries.
12426 (add-after 'chdir 'remove-dependency-on-unpackaged-itext
12427 (lambda _
12428 (delete-file "src/org/forester/archaeopteryx/PdfExporter.java")
12429 (substitute* "src/org/forester/archaeopteryx/MainFrame.java"
12430 (("pdf_written_to = PdfExporter.*")
12431 "throw new IOException(\"PDF export is not available.\");"))
12432 #t))
12433 ;; There is no install target
12434 (replace 'install (install-jars ".")))))
12435 (propagated-inputs
12436 `(("java-commons-codec" ,java-commons-codec)
12437 ("java-openchart2" ,java-openchart2)))
12438 (home-page "https://sites.google.com/site/cmzmasek/home/software/forester")
12439 (synopsis "Phylogenomics libraries for Java")
12440 (description "Forester is a collection of Java libraries for
12441 phylogenomics and evolutionary biology research. It includes support for
12442 reading, writing, and exporting phylogenetic trees.")
12443 (license license:lgpl2.1+))))
12444
12445 (define-public java-forester-1.005
12446 (package
12447 (name "java-forester")
12448 (version "1.005")
12449 (source (origin
12450 (method url-fetch)
12451 (uri (string-append "http://search.maven.org/remotecontent?"
12452 "filepath=org/biojava/thirdparty/forester/"
12453 version "/forester-" version "-sources.jar"))
12454 (file-name (string-append name "-" version ".jar"))
12455 (sha256
12456 (base32
12457 "04r8qv4rk3p71z4ajrvp11py1z46qrx0047j3zzs79s6lnsm3lcv"))))
12458 (build-system ant-build-system)
12459 (arguments
12460 `(#:tests? #f ; there are none
12461 #:jdk ,icedtea-8
12462 #:modules ((guix build ant-build-system)
12463 (guix build utils)
12464 (guix build java-utils)
12465 (sxml simple)
12466 (sxml transform))
12467 #:phases
12468 (modify-phases %standard-phases
12469 (add-after 'unpack 'fix-dependencies
12470 (lambda* (#:key inputs #:allow-other-keys)
12471 (call-with-output-file "build.xml"
12472 (lambda (port)
12473 (sxml->xml
12474 (pre-post-order
12475 (with-input-from-file "src/build.xml"
12476 (lambda _ (xml->sxml #:trim-whitespace? #t)))
12477 `(;; Remove all unjar tags to avoid repacking classes.
12478 (unjar . ,(lambda _ '()))
12479 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
12480 (*text* . ,(lambda (_ txt) txt))))
12481 port)))
12482 (copy-file (assoc-ref inputs "synth_look_and_feel_1.xml")
12483 "synth_look_and_feel_1.xml")
12484 (copy-file (assoc-ref inputs "phyloxml.xsd")
12485 "phyloxml.xsd")
12486 (substitute* "build.xml"
12487 (("../resources/synth_laf/synth_look_and_feel_1.xml")
12488 "synth_look_and_feel_1.xml")
12489 (("../resources/phyloxml_schema/1.10/phyloxml.xsd")
12490 "phyloxml.xsd"))
12491 #t))
12492 ;; FIXME: itext is difficult to package as it depends on a few
12493 ;; unpackaged libraries.
12494 (add-after 'unpack 'remove-dependency-on-unpackaged-itext
12495 (lambda _
12496 (delete-file "src/org/forester/archaeopteryx/PdfExporter.java")
12497 (substitute* '("src/org/forester/archaeopteryx/MainFrame.java"
12498 "src/org/forester/archaeopteryx/MainFrameApplication.java")
12499 (("pdf_written_to = PdfExporter.*")
12500 "throw new IOException(\"PDF export is not available.\"); /*")
12501 ((".getPrintSizeX\\(\\), getOptions\\(\\).getPrintSizeY\\(\\) \\);") "*/")
12502 (("getCurrentTreePanel\\(\\).getHeight\\(\\) \\);") "*/"))
12503 #t))
12504 (add-after 'unpack 'delete-pre-built-classes
12505 (lambda _ (delete-file-recursively "src/classes") #t))
12506 ;; There is no install target
12507 (replace 'install (install-jars ".")))))
12508 (propagated-inputs
12509 `(("java-commons-codec" ,java-commons-codec)
12510 ("java-openchart2" ,java-openchart2)))
12511 ;; The source archive does not contain the resources.
12512 (native-inputs
12513 `(("phyloxml.xsd"
12514 ,(origin
12515 (method url-fetch)
12516 (uri (string-append "https://raw.githubusercontent.com/cmzmasek/forester/"
12517 "b61cc2dcede0bede317db362472333115756b8c6/"
12518 "forester/resources/phyloxml_schema/1.10/phyloxml.xsd"))
12519 (file-name (string-append name "-phyloxml-" version ".xsd"))
12520 (sha256
12521 (base32
12522 "1zxc4m8sn4n389nqdnpxa8d0k17qnr3pm2y5y6g6vh4k0zm52npv"))))
12523 ("synth_look_and_feel_1.xml"
12524 ,(origin
12525 (method url-fetch)
12526 (uri (string-append "https://raw.githubusercontent.com/cmzmasek/forester/"
12527 "29e04321615da6b35c1e15c60e52caf3f21d8e6a/"
12528 "forester/java/classes/resources/synth_look_and_feel_1.xml"))
12529 (file-name (string-append name "-synth-look-and-feel-" version ".xml"))
12530 (sha256
12531 (base32
12532 "1gv5602gv4k7y7713y75a4jvj7i9s7nildsbdl7n9q10sc2ikg8h"))))))
12533 (home-page "https://sites.google.com/site/cmzmasek/home/software/forester")
12534 (synopsis "Phylogenomics libraries for Java")
12535 (description "Forester is a collection of Java libraries for
12536 phylogenomics and evolutionary biology research. It includes support for
12537 reading, writing, and exporting phylogenetic trees.")
12538 (license license:lgpl2.1+)))
12539
12540 (define-public java-biojava-core
12541 (package
12542 (name "java-biojava-core")
12543 (version "4.2.11")
12544 (source (origin
12545 (method git-fetch)
12546 (uri (git-reference
12547 (url "https://github.com/biojava/biojava")
12548 (commit (string-append "biojava-" version))))
12549 (file-name (string-append name "-" version "-checkout"))
12550 (sha256
12551 (base32
12552 "1bvryh2bpsvash8ln79cmc9sqm8qw72hz4xzwqxcrjm8ssxszhqk"))))
12553 (build-system ant-build-system)
12554 (arguments
12555 `(#:jdk ,icedtea-8
12556 #:jar-name "biojava-core.jar"
12557 #:source-dir "biojava-core/src/main/java/"
12558 #:test-dir "biojava-core/src/test"
12559 ;; These tests seem to require internet access.
12560 #:test-exclude (list "**/SearchIOTest.java"
12561 "**/BlastXMLParserTest.java"
12562 "**/GenbankCookbookTest.java"
12563 "**/GenbankProxySequenceReaderTest.java")
12564 #:phases
12565 (modify-phases %standard-phases
12566 (add-before 'build 'copy-resources
12567 (lambda _
12568 (copy-recursively "biojava-core/src/main/resources"
12569 "build/classes")
12570 #t))
12571 (add-before 'check 'copy-test-resources
12572 (lambda _
12573 (copy-recursively "biojava-core/src/test/resources"
12574 "build/test-classes")
12575 #t)))))
12576 (propagated-inputs
12577 `(("java-log4j-api" ,java-log4j-api)
12578 ("java-log4j-core" ,java-log4j-core)
12579 ("java-slf4j-api" ,java-slf4j-api)
12580 ("java-slf4j-simple" ,java-slf4j-simple)))
12581 (native-inputs
12582 `(("java-junit" ,java-junit)
12583 ("java-hamcrest-core" ,java-hamcrest-core)))
12584 (home-page "http://biojava.org")
12585 (synopsis "Core libraries of Java framework for processing biological data")
12586 (description "BioJava is a project dedicated to providing a Java framework
12587 for processing biological data. It provides analytical and statistical
12588 routines, parsers for common file formats, reference implementations of
12589 popular algorithms, and allows the manipulation of sequences and 3D
12590 structures. The goal of the biojava project is to facilitate rapid
12591 application development for bioinformatics.
12592
12593 This package provides the core libraries.")
12594 (license license:lgpl2.1+)))
12595
12596 (define-public java-biojava-phylo
12597 (package (inherit java-biojava-core)
12598 (name "java-biojava-phylo")
12599 (build-system ant-build-system)
12600 (arguments
12601 `(#:jdk ,icedtea-8
12602 #:jar-name "biojava-phylo.jar"
12603 #:source-dir "biojava-phylo/src/main/java/"
12604 #:test-dir "biojava-phylo/src/test"
12605 #:phases
12606 (modify-phases %standard-phases
12607 (add-before 'build 'copy-resources
12608 (lambda _
12609 (copy-recursively "biojava-phylo/src/main/resources"
12610 "build/classes")
12611 #t))
12612 (add-before 'check 'copy-test-resources
12613 (lambda _
12614 (copy-recursively "biojava-phylo/src/test/resources"
12615 "build/test-classes")
12616 #t)))))
12617 (propagated-inputs
12618 `(("java-log4j-api" ,java-log4j-api)
12619 ("java-log4j-core" ,java-log4j-core)
12620 ("java-slf4j-api" ,java-slf4j-api)
12621 ("java-slf4j-simple" ,java-slf4j-simple)
12622 ("java-biojava-core" ,java-biojava-core)
12623 ("java-forester" ,java-forester)))
12624 (native-inputs
12625 `(("java-junit" ,java-junit)
12626 ("java-hamcrest-core" ,java-hamcrest-core)))
12627 (home-page "http://biojava.org")
12628 (synopsis "Biojava interface to the forester phylogenomics library")
12629 (description "The phylo module provides a biojava interface layer to the
12630 forester phylogenomics library for constructing phylogenetic trees.")))
12631
12632 (define-public java-biojava-alignment
12633 (package (inherit java-biojava-core)
12634 (name "java-biojava-alignment")
12635 (build-system ant-build-system)
12636 (arguments
12637 `(#:jdk ,icedtea-8
12638 #:jar-name "biojava-alignment.jar"
12639 #:source-dir "biojava-alignment/src/main/java/"
12640 #:test-dir "biojava-alignment/src/test"
12641 #:phases
12642 (modify-phases %standard-phases
12643 (add-before 'build 'copy-resources
12644 (lambda _
12645 (copy-recursively "biojava-alignment/src/main/resources"
12646 "build/classes")
12647 #t))
12648 (add-before 'check 'copy-test-resources
12649 (lambda _
12650 (copy-recursively "biojava-alignment/src/test/resources"
12651 "build/test-classes")
12652 #t)))))
12653 (propagated-inputs
12654 `(("java-log4j-api" ,java-log4j-api)
12655 ("java-log4j-core" ,java-log4j-core)
12656 ("java-slf4j-api" ,java-slf4j-api)
12657 ("java-slf4j-simple" ,java-slf4j-simple)
12658 ("java-biojava-core" ,java-biojava-core)
12659 ("java-biojava-phylo" ,java-biojava-phylo)
12660 ("java-forester" ,java-forester)))
12661 (native-inputs
12662 `(("java-junit" ,java-junit)
12663 ("java-hamcrest-core" ,java-hamcrest-core)))
12664 (home-page "http://biojava.org")
12665 (synopsis "Biojava API for genetic sequence alignment")
12666 (description "The alignment module of BioJava provides an API that
12667 contains
12668
12669 @itemize
12670 @item implementations of dynamic programming algorithms for sequence
12671 alignment;
12672 @item reading and writing of popular alignment file formats;
12673 @item a single-, or multi- threaded multiple sequence alignment algorithm.
12674 @end itemize\n")))
12675
12676 (define-public java-biojava-core-4.0
12677 (package (inherit java-biojava-core)
12678 (name "java-biojava-core")
12679 (version "4.0.0")
12680 (source (origin
12681 (method git-fetch)
12682 (uri (git-reference
12683 (url "https://github.com/biojava/biojava")
12684 (commit (string-append "biojava-" version))))
12685 (file-name (string-append name "-" version "-checkout"))
12686 (sha256
12687 (base32
12688 "13675f6y9aqi7bi2lk3s1z7a22ynccjiqwa8izh7p97xi9wsfmd8"))))))
12689
12690 (define-public java-biojava-phylo-4.0
12691 (package (inherit java-biojava-core-4.0)
12692 (name "java-biojava-phylo")
12693 (build-system ant-build-system)
12694 (arguments
12695 `(#:jdk ,icedtea-8
12696 #:jar-name "biojava-phylo.jar"
12697 #:source-dir "biojava-phylo/src/main/java/"
12698 #:test-dir "biojava-phylo/src/test"
12699 #:phases
12700 (modify-phases %standard-phases
12701 (add-before 'build 'copy-resources
12702 (lambda _
12703 (copy-recursively "biojava-phylo/src/main/resources"
12704 "build/classes")
12705 #t))
12706 (add-before 'check 'copy-test-resources
12707 (lambda _
12708 (copy-recursively "biojava-phylo/src/test/resources"
12709 "build/test-classes")
12710 #t)))))
12711 (propagated-inputs
12712 `(("java-log4j-api" ,java-log4j-api)
12713 ("java-log4j-core" ,java-log4j-core)
12714 ("java-slf4j-api" ,java-slf4j-api)
12715 ("java-slf4j-simple" ,java-slf4j-simple)
12716 ("java-biojava-core" ,java-biojava-core-4.0)
12717 ("java-forester" ,java-forester-1.005)))
12718 (native-inputs
12719 `(("java-junit" ,java-junit)
12720 ("java-hamcrest-core" ,java-hamcrest-core)))
12721 (home-page "http://biojava.org")
12722 (synopsis "Biojava interface to the forester phylogenomics library")
12723 (description "The phylo module provides a biojava interface layer to the
12724 forester phylogenomics library for constructing phylogenetic trees.")))
12725
12726 (define-public java-biojava-alignment-4.0
12727 (package (inherit java-biojava-core-4.0)
12728 (name "java-biojava-alignment")
12729 (build-system ant-build-system)
12730 (arguments
12731 `(#:jdk ,icedtea-8
12732 #:jar-name "biojava-alignment.jar"
12733 #:source-dir "biojava-alignment/src/main/java/"
12734 #:test-dir "biojava-alignment/src/test"
12735 #:phases
12736 (modify-phases %standard-phases
12737 (add-before 'build 'copy-resources
12738 (lambda _
12739 (copy-recursively "biojava-alignment/src/main/resources"
12740 "build/classes")
12741 #t))
12742 (add-before 'check 'copy-test-resources
12743 (lambda _
12744 (copy-recursively "biojava-alignment/src/test/resources"
12745 "build/test-classes")
12746 #t)))))
12747 (propagated-inputs
12748 `(("java-log4j-api" ,java-log4j-api)
12749 ("java-log4j-core" ,java-log4j-core)
12750 ("java-slf4j-api" ,java-slf4j-api)
12751 ("java-slf4j-simple" ,java-slf4j-simple)
12752 ("java-biojava-core" ,java-biojava-core-4.0)
12753 ("java-biojava-phylo" ,java-biojava-phylo-4.0)
12754 ("java-forester" ,java-forester-1.005)))
12755 (native-inputs
12756 `(("java-junit" ,java-junit)
12757 ("java-hamcrest-core" ,java-hamcrest-core)))
12758 (home-page "http://biojava.org")
12759 (synopsis "Biojava API for genetic sequence alignment")
12760 (description "The alignment module of BioJava provides an API that
12761 contains
12762
12763 @itemize
12764 @item implementations of dynamic programming algorithms for sequence
12765 alignment;
12766 @item reading and writing of popular alignment file formats;
12767 @item a single-, or multi- threaded multiple sequence alignment algorithm.
12768 @end itemize\n")))
12769
12770 (define-public dropseq-tools
12771 (package
12772 (name "dropseq-tools")
12773 (version "1.13")
12774 (source
12775 (origin
12776 (method url-fetch)
12777 (uri "http://mccarrolllab.com/download/1276/")
12778 (file-name (string-append "dropseq-tools-" version ".zip"))
12779 (sha256
12780 (base32
12781 "0yrffckxqk5l8b5xb6z4laq157zd9mdypr2p4b4vq2bhjzi1sj0s"))
12782 ;; Delete bundled libraries
12783 (modules '((guix build utils)))
12784 (snippet
12785 '(begin
12786 (for-each delete-file (find-files "jar/lib" "\\.jar$"))
12787 (delete-file-recursively "3rdParty")
12788 #t))))
12789 (build-system ant-build-system)
12790 (arguments
12791 `(#:tests? #f ; test data are not included
12792 #:test-target "test"
12793 #:build-target "all"
12794 #:source-dir "public/src/"
12795 #:jdk ,icedtea-8
12796 #:make-flags
12797 (list (string-append "-Dpicard.executable.dir="
12798 (assoc-ref %build-inputs "java-picard")
12799 "/share/java/"))
12800 #:modules ((ice-9 match)
12801 (srfi srfi-1)
12802 (guix build utils)
12803 (guix build java-utils)
12804 (guix build ant-build-system))
12805 #:phases
12806 (modify-phases %standard-phases
12807 ;; FIXME: fails with "java.io.FileNotFoundException:
12808 ;; /gnu/store/…-dropseq-tools-1.13/share/java/lib/biojava-alignment.jar"
12809 (delete 'generate-jar-indices)
12810 ;; All dependencies must be linked to "lib", because that's where
12811 ;; they will be searched for when the Class-Path property of the
12812 ;; manifest is computed.
12813 (add-after 'unpack 'record-references
12814 (lambda* (#:key inputs #:allow-other-keys)
12815 (mkdir-p "jar/lib")
12816 (let ((dirs (filter-map (match-lambda
12817 ((name . dir)
12818 (if (and (string-prefix? "java-" name)
12819 (not (string=? name "java-testng")))
12820 dir #f)))
12821 inputs)))
12822 (for-each (lambda (jar)
12823 (symlink jar (string-append "jar/lib/" (basename jar))))
12824 (append-map (lambda (dir) (find-files dir "\\.jar$"))
12825 dirs)))
12826 #t))
12827 ;; There is no installation target
12828 (replace 'install
12829 (lambda* (#:key inputs outputs #:allow-other-keys)
12830 (let* ((out (assoc-ref outputs "out"))
12831 (bin (string-append out "/bin"))
12832 (share (string-append out "/share/java/"))
12833 (lib (string-append share "/lib/"))
12834 (scripts (list "BAMTagHistogram"
12835 "BAMTagofTagCounts"
12836 "BaseDistributionAtReadPosition"
12837 "CollapseBarcodesInPlace"
12838 "CollapseTagWithContext"
12839 "ConvertToRefFlat"
12840 "CreateIntervalsFiles"
12841 "DetectBeadSynthesisErrors"
12842 "DigitalExpression"
12843 "Drop-seq_alignment.sh"
12844 "FilterBAM"
12845 "FilterBAMByTag"
12846 "GatherGeneGCLength"
12847 "GatherMolecularBarcodeDistributionByGene"
12848 "GatherReadQualityMetrics"
12849 "PolyATrimmer"
12850 "ReduceGTF"
12851 "SelectCellsByNumTranscripts"
12852 "SingleCellRnaSeqMetricsCollector"
12853 "TagBamWithReadSequenceExtended"
12854 "TagReadWithGeneExon"
12855 "TagReadWithInterval"
12856 "TrimStartingSequence"
12857 "ValidateReference")))
12858 (for-each mkdir-p (list bin share lib))
12859 (install-file "dist/dropseq.jar" share)
12860 (for-each (lambda (script)
12861 (chmod script #o555)
12862 (install-file script bin))
12863 scripts)
12864 (substitute* (map (lambda (script)
12865 (string-append bin "/" script))
12866 scripts)
12867 (("^java") (which "java"))
12868 (("jar_deploy_dir=.*")
12869 (string-append "jar_deploy_dir=" share "\n"))))
12870 #t))
12871 ;; FIXME: We do this after stripping jars because we don't want it to
12872 ;; copy all these jars and strip them. We only want to install
12873 ;; links. Arguably, this is a problem with the ant-build-system.
12874 (add-after 'strip-jar-timestamps 'install-links
12875 (lambda* (#:key outputs #:allow-other-keys)
12876 (let* ((out (assoc-ref outputs "out"))
12877 (share (string-append out "/share/java/"))
12878 (lib (string-append share "/lib/")))
12879 (for-each (lambda (jar)
12880 (symlink (readlink jar)
12881 (string-append lib (basename jar))))
12882 (find-files "jar/lib" "\\.jar$")))
12883 #t)))))
12884 (inputs
12885 `(("jdk" ,icedtea-8)
12886 ("java-picard" ,java-picard-2.10.3)
12887 ("java-log4j-1.2-api" ,java-log4j-1.2-api)
12888 ("java-commons-math3" ,java-commons-math3)
12889 ("java-commons-jexl2" ,java-commons-jexl-2)
12890 ("java-commons-collections4" ,java-commons-collections4)
12891 ("java-commons-lang2" ,java-commons-lang)
12892 ("java-commons-io" ,java-commons-io)
12893 ("java-snappy-1.0.3-rc3" ,java-snappy-1)
12894 ("java-guava" ,java-guava)
12895 ("java-la4j" ,java-la4j)
12896 ("java-biojava-core" ,java-biojava-core-4.0)
12897 ("java-biojava-alignment" ,java-biojava-alignment-4.0)
12898 ("java-jdistlib" ,java-jdistlib)
12899 ("java-simple-xml" ,java-simple-xml)
12900 ("java-snakeyaml" ,java-snakeyaml)))
12901 (native-inputs
12902 `(("unzip" ,unzip)
12903 ("java-testng" ,java-testng)))
12904 (home-page "http://mccarrolllab.com/dropseq/")
12905 (synopsis "Tools for Drop-seq analyses")
12906 (description "Drop-seq is a technology to enable biologists to
12907 analyze RNA expression genome-wide in thousands of individual cells at
12908 once. This package provides tools to perform Drop-seq analyses.")
12909 (license license:expat)))
12910
12911 (define-public pigx-rnaseq
12912 (package
12913 (name "pigx-rnaseq")
12914 (version "0.0.5")
12915 (source (origin
12916 (method url-fetch)
12917 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_rnaseq/"
12918 "releases/download/v" version
12919 "/pigx_rnaseq-" version ".tar.gz"))
12920 (sha256
12921 (base32
12922 "05gn658zpj9xki5dbs728z9zxq1mcm25hkwr5vzwqxsfi15l5f2l"))))
12923 (build-system gnu-build-system)
12924 (arguments
12925 `(#:parallel-tests? #f ; not supported
12926 #:phases
12927 (modify-phases %standard-phases
12928 ;; "test.sh" runs STAR, which requires excessive amounts of memory.
12929 (add-after 'unpack 'disable-resource-intensive-test
12930 (lambda _
12931 (substitute* "Makefile.in"
12932 (("(^ tests/test_trim_galore/test.sh).*" _ m) m)
12933 (("^ tests/test_multiqc/test.sh") "")
12934 (("^ test.sh") ""))
12935 #t)))))
12936 (inputs
12937 `(("gzip" ,gzip)
12938 ("snakemake" ,snakemake)
12939 ("fastqc" ,fastqc)
12940 ("multiqc" ,multiqc)
12941 ("star" ,star)
12942 ("trim-galore" ,trim-galore)
12943 ("htseq" ,htseq)
12944 ("samtools" ,samtools)
12945 ("bedtools" ,bedtools)
12946 ("r-minimal" ,r-minimal)
12947 ("r-rmarkdown" ,r-rmarkdown)
12948 ("r-ggplot2" ,r-ggplot2)
12949 ("r-ggrepel" ,r-ggrepel)
12950 ("r-gprofiler" ,r-gprofiler)
12951 ("r-deseq2" ,r-deseq2)
12952 ("r-dt" ,r-dt)
12953 ("r-knitr" ,r-knitr)
12954 ("r-pheatmap" ,r-pheatmap)
12955 ("r-corrplot" ,r-corrplot)
12956 ("r-reshape2" ,r-reshape2)
12957 ("r-plotly" ,r-plotly)
12958 ("r-scales" ,r-scales)
12959 ("r-summarizedexperiment" ,r-summarizedexperiment)
12960 ("r-crosstalk" ,r-crosstalk)
12961 ("r-tximport" ,r-tximport)
12962 ("r-rtracklayer" ,r-rtracklayer)
12963 ("r-rjson" ,r-rjson)
12964 ("salmon" ,salmon)
12965 ("ghc-pandoc" ,ghc-pandoc)
12966 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)
12967 ("python-wrapper" ,python-wrapper)
12968 ("python-pyyaml" ,python-pyyaml)))
12969 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
12970 (synopsis "Analysis pipeline for RNA sequencing experiments")
12971 (description "PiGX RNAseq is an analysis pipeline for preprocessing and
12972 reporting for RNA sequencing experiments. It is easy to use and produces high
12973 quality reports. The inputs are reads files from the sequencing experiment,
12974 and a configuration file which describes the experiment. In addition to
12975 quality control of the experiment, the pipeline produces a differential
12976 expression report comparing samples in an easily configurable manner.")
12977 (license license:gpl3+)))
12978
12979 (define-public pigx-chipseq
12980 (package
12981 (name "pigx-chipseq")
12982 (version "0.0.20")
12983 (source (origin
12984 (method url-fetch)
12985 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_chipseq/"
12986 "releases/download/v" version
12987 "/pigx_chipseq-" version ".tar.gz"))
12988 (sha256
12989 (base32
12990 "19a7dclqq0b4kqg3phiz4d4arlwfp34nm3z0rf1gkqdpsy7gghp3"))))
12991 (build-system gnu-build-system)
12992 ;; parts of the tests rely on access to the network
12993 (arguments '(#:tests? #f))
12994 (inputs
12995 `(("grep" ,grep)
12996 ("coreutils" ,coreutils)
12997 ("r-minimal" ,r-minimal)
12998 ("r-argparser" ,r-argparser)
12999 ("r-biocparallel" ,r-biocparallel)
13000 ("r-biostrings" ,r-biostrings)
13001 ("r-chipseq" ,r-chipseq)
13002 ("r-data-table" ,r-data-table)
13003 ("r-dplyr" ,r-dplyr)
13004 ("r-genomation" ,r-genomation)
13005 ("r-genomicalignments" ,r-genomicalignments)
13006 ("r-genomicranges" ,r-genomicranges)
13007 ("r-rsamtools" ,r-rsamtools)
13008 ("r-rtracklayer" ,r-rtracklayer)
13009 ("r-s4vectors" ,r-s4vectors)
13010 ("r-stringr" ,r-stringr)
13011 ("r-tibble" ,r-tibble)
13012 ("r-tidyr" ,r-tidyr)
13013 ("r-jsonlite" ,r-jsonlite)
13014 ("r-heatmaply" ,r-heatmaply)
13015 ("r-htmlwidgets" ,r-htmlwidgets)
13016 ("r-ggplot2" ,r-ggplot2)
13017 ("r-plotly" ,r-plotly)
13018 ("r-rmarkdown" ,r-rmarkdown)
13019 ("python-wrapper" ,python-wrapper)
13020 ("python-pyyaml" ,python-pyyaml)
13021 ("python-magic" ,python-magic)
13022 ("python-xlrd" ,python-xlrd)
13023 ("trim-galore" ,trim-galore)
13024 ("macs" ,macs)
13025 ("multiqc" ,multiqc)
13026 ("perl" ,perl)
13027 ("ghc-pandoc" ,ghc-pandoc)
13028 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)
13029 ("fastqc" ,fastqc)
13030 ("bowtie" ,bowtie)
13031 ("idr" ,idr)
13032 ("snakemake" ,snakemake)
13033 ("samtools" ,samtools)
13034 ("bedtools" ,bedtools)
13035 ("kentutils" ,kentutils)))
13036 (native-inputs
13037 `(("python-pytest" ,python-pytest)))
13038 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
13039 (synopsis "Analysis pipeline for ChIP sequencing experiments")
13040 (description "PiGX ChIPseq is an analysis pipeline for preprocessing, peak
13041 calling and reporting for ChIP sequencing experiments. It is easy to use and
13042 produces high quality reports. The inputs are reads files from the sequencing
13043 experiment, and a configuration file which describes the experiment. In
13044 addition to quality control of the experiment, the pipeline enables to set up
13045 multiple peak calling analysis and allows the generation of a UCSC track hub
13046 in an easily configurable manner.")
13047 (license license:gpl3+)))
13048
13049 (define-public pigx-bsseq
13050 (package
13051 (name "pigx-bsseq")
13052 (version "0.0.10")
13053 (source (origin
13054 (method url-fetch)
13055 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_bsseq/"
13056 "releases/download/v" version
13057 "/pigx_bsseq-" version ".tar.gz"))
13058 (sha256
13059 (base32
13060 "0l97wvkq4diq8lcarraj33bby1zzf0w804jwi8mlc5qddp8idwhy"))))
13061 (build-system gnu-build-system)
13062 (arguments
13063 `(#:phases
13064 (modify-phases %standard-phases
13065 (add-before 'check 'set-timezone
13066 ;; The readr package is picky about timezones.
13067 (lambda* (#:key inputs #:allow-other-keys)
13068 (setenv "TZ" "UTC+1")
13069 (setenv "TZDIR"
13070 (string-append (assoc-ref inputs "tzdata")
13071 "/share/zoneinfo"))
13072 #t)))))
13073 (native-inputs
13074 `(("tzdata" ,tzdata)))
13075 (inputs
13076 `(("coreutils" ,coreutils)
13077 ("sed" ,sed)
13078 ("grep" ,grep)
13079 ("r-minimal" ,r-minimal)
13080 ("r-annotationhub" ,r-annotationhub)
13081 ("r-dt" ,r-dt)
13082 ("r-genomation" ,r-genomation)
13083 ("r-methylkit" ,r-methylkit)
13084 ("r-rtracklayer" ,r-rtracklayer)
13085 ("r-rmarkdown" ,r-rmarkdown)
13086 ("r-bookdown" ,r-bookdown)
13087 ("r-ggplot2" ,r-ggplot2)
13088 ("r-ggbio" ,r-ggbio)
13089 ("ghc-pandoc" ,ghc-pandoc)
13090 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)
13091 ("python-wrapper" ,python-wrapper)
13092 ("python-pyyaml" ,python-pyyaml)
13093 ("snakemake" ,snakemake)
13094 ("bismark" ,bismark)
13095 ("fastqc" ,fastqc)
13096 ("bowtie" ,bowtie)
13097 ("trim-galore" ,trim-galore)
13098 ("cutadapt" ,cutadapt)
13099 ("samtools" ,samtools)))
13100 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
13101 (synopsis "Bisulfite sequencing pipeline from fastq to methylation reports")
13102 (description "PiGx BSseq is a data processing pipeline for raw fastq read
13103 data of bisulfite experiments; it produces reports on aggregate methylation
13104 and coverage and can be used to produce information on differential
13105 methylation and segmentation.")
13106 (license license:gpl3+)))
13107
13108 (define-public pigx-scrnaseq
13109 (package
13110 (name "pigx-scrnaseq")
13111 (version "0.0.7")
13112 (source (origin
13113 (method url-fetch)
13114 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_scrnaseq/"
13115 "releases/download/v" version
13116 "/pigx_scrnaseq-" version ".tar.gz"))
13117 (sha256
13118 (base32
13119 "131zarirv16w8653m0d66jgjnwqfsxqc0hix0rypssz4d83bl51j"))))
13120 (build-system gnu-build-system)
13121 (arguments
13122 `(#:configure-flags
13123 (list (string-append "PICARDJAR=" (assoc-ref %build-inputs "java-picard")
13124 "/share/java/picard.jar")
13125 (string-append "DROPSEQJAR=" (assoc-ref %build-inputs "dropseq-tools")
13126 "/share/java/dropseq.jar"))))
13127 (inputs
13128 `(("coreutils" ,coreutils)
13129 ("perl" ,perl)
13130 ("dropseq-tools" ,dropseq-tools)
13131 ("fastqc" ,fastqc)
13132 ("java-picard" ,java-picard-2.10.3) ; same as for dropseq
13133 ("java" ,icedtea-8)
13134 ("python-wrapper" ,python-wrapper)
13135 ("python-pyyaml" ,python-pyyaml)
13136 ("python-pandas" ,python-pandas)
13137 ("python-magic" ,python-magic)
13138 ("python-numpy" ,python-numpy)
13139 ("python-loompy" ,python-loompy)
13140 ("ghc-pandoc" ,ghc-pandoc)
13141 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)
13142 ("samtools" ,samtools)
13143 ("snakemake" ,snakemake)
13144 ("star" ,star)
13145 ("r-minimal" ,r-minimal)
13146 ("r-argparser" ,r-argparser)
13147 ("r-cowplot" ,r-cowplot)
13148 ("r-data-table" ,r-data-table)
13149 ("r-delayedarray" ,r-delayedarray)
13150 ("r-delayedmatrixstats" ,r-delayedmatrixstats)
13151 ("r-dplyr" ,r-dplyr)
13152 ("r-dropbead" ,r-dropbead)
13153 ("r-dt" ,r-dt)
13154 ("r-genomicalignments" ,r-genomicalignments)
13155 ("r-genomicfiles" ,r-genomicfiles)
13156 ("r-genomicranges" ,r-genomicranges)
13157 ("r-ggplot2" ,r-ggplot2)
13158 ("r-hdf5array" ,r-hdf5array)
13159 ("r-pheatmap" ,r-pheatmap)
13160 ("r-rmarkdown" ,r-rmarkdown)
13161 ("r-rsamtools" ,r-rsamtools)
13162 ("r-rtracklayer" ,r-rtracklayer)
13163 ("r-rtsne" ,r-rtsne)
13164 ("r-scater" ,r-scater)
13165 ("r-scran" ,r-scran)
13166 ("r-singlecellexperiment" ,r-singlecellexperiment)
13167 ("r-stringr" ,r-stringr)
13168 ("r-yaml" ,r-yaml)))
13169 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
13170 (synopsis "Analysis pipeline for single-cell RNA sequencing experiments")
13171 (description "PiGX scRNAseq is an analysis pipeline for preprocessing and
13172 quality control for single cell RNA sequencing experiments. The inputs are
13173 read files from the sequencing experiment, and a configuration file which
13174 describes the experiment. It produces processed files for downstream analysis
13175 and interactive quality reports. The pipeline is designed to work with UMI
13176 based methods.")
13177 (license license:gpl3+)))
13178
13179 (define-public pigx
13180 (package
13181 (name "pigx")
13182 (version "0.0.3")
13183 (source (origin
13184 (method url-fetch)
13185 (uri (string-append "https://github.com/BIMSBbioinfo/pigx/"
13186 "releases/download/v" version
13187 "/pigx-" version ".tar.gz"))
13188 (sha256
13189 (base32
13190 "1i5njdy1clj5ncw45d16p7mwmqvb1ilikl9n797pxklc3f4s7mq7"))))
13191 (build-system gnu-build-system)
13192 (inputs
13193 `(("python" ,python)
13194 ("pigx-bsseq" ,pigx-bsseq)
13195 ("pigx-chipseq" ,pigx-chipseq)
13196 ("pigx-rnaseq" ,pigx-rnaseq)
13197 ("pigx-scrnaseq" ,pigx-scrnaseq)))
13198 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
13199 (synopsis "Analysis pipelines for genomics")
13200 (description "PiGx is a collection of genomics pipelines. It includes the
13201 following pipelines:
13202
13203 @itemize
13204 @item PiGx BSseq for raw fastq read data of bisulfite experiments
13205 @item PiGx RNAseq for RNAseq samples
13206 @item PiGx scRNAseq for single cell dropseq analysis
13207 @item PiGx ChIPseq for reads from ChIPseq experiments
13208 @end itemize
13209
13210 All pipelines are easily configured with a simple sample sheet and a
13211 descriptive settings file. The result is a set of comprehensive, interactive
13212 HTML reports with interesting findings about your samples.")
13213 (license license:gpl3+)))
13214
13215 (define-public mantis
13216 (let ((commit "4ffd171632c2cb0056a86d709dfd2bf21bc69b84")
13217 (revision "1"))
13218 (package
13219 (name "mantis")
13220 (version (git-version "0" revision commit))
13221 (source (origin
13222 (method git-fetch)
13223 (uri (git-reference
13224 (url "https://github.com/splatlab/mantis.git")
13225 (commit commit)))
13226 (file-name (git-file-name name version))
13227 (sha256
13228 (base32
13229 "0iqbr0dhmlc8mzpirmm2s4pkzkwdgrcx50yx6cv3wlr2qi064p55"))))
13230 (build-system cmake-build-system)
13231 (arguments '(#:tests? #f)) ; there are none
13232 (inputs
13233 `(("sdsl-lite" ,sdsl-lite)
13234 ("openssl" ,openssl)
13235 ("zlib" ,zlib)))
13236 (home-page "https://github.com/splatlab/mantis")
13237 (synopsis "Large-scale sequence-search index data structure")
13238 (description "Mantis is a space-efficient data structure that can be
13239 used to index thousands of raw-read genomics experiments and facilitate
13240 large-scale sequence searches on those experiments. Mantis uses counting
13241 quotient filters instead of Bloom filters, enabling rapid index builds and
13242 queries, small indexes, and exact results, i.e., no false positives or
13243 negatives. Furthermore, Mantis is also a colored de Bruijn graph
13244 representation, so it supports fast graph traversal and other topological
13245 analyses in addition to large-scale sequence-level searches.")
13246 ;; uses __uint128_t and inline assembly
13247 (supported-systems '("x86_64-linux"))
13248 (license license:bsd-3))))
13249
13250 (define-public r-diversitree
13251 (package
13252 (name "r-diversitree")
13253 (version "0.9-10")
13254 (source
13255 (origin
13256 (method url-fetch)
13257 (uri (cran-uri "diversitree" version))
13258 (sha256
13259 (base32
13260 "0gh4rcrp0an3jh8915i1fsxlgyfk7njywgbd5ln5r2jhr085kpz7"))))
13261 (build-system r-build-system)
13262 (native-inputs
13263 `(("gfortran" ,gfortran)))
13264 (inputs `(("fftw" ,fftw) ("gsl" ,gsl)))
13265 (propagated-inputs
13266 `(("r-ape" ,r-ape)
13267 ("r-desolve" ,r-desolve)
13268 ("r-rcpp" ,r-rcpp)
13269 ("r-suplex" ,r-subplex)))
13270 (home-page "https://www.zoology.ubc.ca/prog/diversitree")
13271 (synopsis "Comparative 'phylogenetic' analyses of diversification")
13272 (description "This package contains a number of comparative \"phylogenetic\"
13273 methods, mostly focusing on analysing diversification and character evolution.
13274 Contains implementations of \"BiSSE\" (Binary State Speciation and Extinction)
13275 and its unresolved tree extensions, \"MuSSE\" (Multiple State Speciation and
13276 Extinction), \"QuaSSE\", \"GeoSSE\", and \"BiSSE-ness\" Other included methods
13277 include Markov models of discrete and continuous trait evolution and constant
13278 rate speciation and extinction.")
13279 (license license:gpl2+)))
13280
13281 (define-public sjcount
13282 ;; There is no tag for version 3.2, nor is there a release archive.
13283 (let ((commit "292d3917cadb3f6834c81e509c30e61cd7ead6e5")
13284 (revision "1"))
13285 (package
13286 (name "sjcount")
13287 (version (git-version "3.2" revision commit))
13288 (source (origin
13289 (method git-fetch)
13290 (uri (git-reference
13291 (url "https://github.com/pervouchine/sjcount-full.git")
13292 (commit commit)))
13293 (file-name (string-append name "-" version "-checkout"))
13294 (sha256
13295 (base32
13296 "0gdgj35j249f04rqgq8ymcc1xg1vi9kzbajnjqpaq2wpbh8bl234"))))
13297 (build-system gnu-build-system)
13298 (arguments
13299 `(#:tests? #f ; requires a 1.4G test file
13300 #:make-flags
13301 (list (string-append "SAMTOOLS_DIR="
13302 (assoc-ref %build-inputs "samtools")
13303 "/lib/"))
13304 #:phases
13305 (modify-phases %standard-phases
13306 (replace 'configure
13307 (lambda* (#:key inputs #:allow-other-keys)
13308 (substitute* "makefile"
13309 (("-I \\$\\{SAMTOOLS_DIR\\}")
13310 (string-append "-I" (assoc-ref inputs "samtools")
13311 "/include/samtools"))
13312 (("-lz ") "-lz -lpthread "))
13313 #t))
13314 (replace 'install
13315 (lambda* (#:key outputs #:allow-other-keys)
13316 (for-each (lambda (tool)
13317 (install-file tool
13318 (string-append (assoc-ref outputs "out")
13319 "/bin")))
13320 '("j_count" "b_count" "sjcount"))
13321 #t)))))
13322 (inputs
13323 `(("samtools" ,samtools-0.1)
13324 ("zlib" ,zlib)))
13325 (home-page "https://github.com/pervouchine/sjcount-full/")
13326 (synopsis "Annotation-agnostic splice junction counting pipeline")
13327 (description "Sjcount is a utility for fast quantification of splice
13328 junctions in RNA-seq data. It is annotation-agnostic and offset-aware. This
13329 version does count multisplits.")
13330 (license license:gpl3+))))
13331
13332 (define-public minimap2
13333 (package
13334 (name "minimap2")
13335 (version "2.10")
13336 (source
13337 (origin
13338 (method url-fetch)
13339 (uri (string-append "https://github.com/lh3/minimap2/"
13340 "releases/download/v" version "/"
13341 "minimap2-" version ".tar.bz2"))
13342 (sha256
13343 (base32
13344 "080w9066irkbhbyr4nmf19pzkdd2s4v31hpzlajgq2y0drr6zcsj"))))
13345 (build-system gnu-build-system)
13346 (arguments
13347 `(#:tests? #f ; there are none
13348 #:make-flags
13349 (list "CC=gcc"
13350 (let ((system ,(or (%current-target-system)
13351 (%current-system))))
13352 (cond
13353 ((string-prefix? "x86_64" system)
13354 "all")
13355 ((or (string-prefix? "armhf" system)
13356 (string-prefix? "aarch64" system))
13357 "arm_neon=1")
13358 (_ "sse2only=1"))))
13359 #:phases
13360 (modify-phases %standard-phases
13361 (delete 'configure)
13362 (replace 'install
13363 (lambda* (#:key outputs #:allow-other-keys)
13364 (let* ((out (assoc-ref outputs "out"))
13365 (bin (string-append out "/bin"))
13366 (man (string-append out "/share/man/man1")))
13367 (install-file "minimap2" bin)
13368 (mkdir-p man)
13369 (install-file "minimap2.1" man))
13370 #t)))))
13371 (inputs
13372 `(("zlib" ,zlib)))
13373 (home-page "https://lh3.github.io/minimap2/")
13374 (synopsis "Pairwise aligner for genomic and spliced nucleotide sequences")
13375 (description "Minimap2 is a versatile sequence alignment program that
13376 aligns DNA or mRNA sequences against a large reference database. Typical use
13377 cases include:
13378
13379 @enumerate
13380 @item mapping PacBio or Oxford Nanopore genomic reads to the human genome;
13381 @item finding overlaps between long reads with error rate up to ~15%;
13382 @item splice-aware alignment of PacBio Iso-Seq or Nanopore cDNA or Direct RNA
13383 reads against a reference genome;
13384 @item aligning Illumina single- or paired-end reads;
13385 @item assembly-to-assembly alignment;
13386 @item full-genome alignment between two closely related species with
13387 divergence below ~15%.
13388 @end enumerate\n")
13389 (license license:expat)))
13390
13391 (define-public r-circus
13392 (package
13393 (name "r-circus")
13394 (version "0.1.5")
13395 (source
13396 (origin
13397 (method git-fetch)
13398 (uri (git-reference
13399 (url "https://github.com/BIMSBbioinfo/ciRcus.git")
13400 (commit (string-append "v" version))))
13401 (file-name (git-file-name name version))
13402 (sha256
13403 (base32
13404 "0jhjn3ilb057hbf6yzrihj13ifxxs32y7nkby8l3lkm28dg4p97h"))))
13405 (build-system r-build-system)
13406 (propagated-inputs
13407 `(("r-annotationdbi" ,r-annotationdbi)
13408 ("r-annotationhub" ,r-annotationhub)
13409 ("r-biomart" ,r-biomart)
13410 ("r-data-table" ,r-data-table)
13411 ("r-dbi" ,r-dbi)
13412 ("r-genomicfeatures" ,r-genomicfeatures)
13413 ("r-genomicranges" ,r-genomicranges)
13414 ("r-ggplot2" ,r-ggplot2)
13415 ("r-hash" ,r-hash)
13416 ("r-iranges" ,r-iranges)
13417 ("r-rcolorbrewer" ,r-rcolorbrewer)
13418 ("r-rmysql" ,r-rmysql)
13419 ("r-s4vectors" ,r-s4vectors)
13420 ("r-stringr" ,r-stringr)
13421 ("r-summarizedexperiment" ,r-summarizedexperiment)))
13422 (native-inputs
13423 `(("r-knitr" ,r-knitr)))
13424 (home-page "https://github.com/BIMSBbioinfo/ciRcus")
13425 (synopsis "Annotation, analysis and visualization of circRNA data")
13426 (description "Circus is an R package for annotation, analysis and
13427 visualization of circRNA data. Users can annotate their circRNA candidates
13428 with host genes, gene featrues they are spliced from, and discriminate between
13429 known and yet unknown splice junctions. Circular-to-linear ratios of circRNAs
13430 can be calculated, and a number of descriptive plots easily generated.")
13431 (license license:artistic2.0)))
13432
13433 (define-public r-loomr
13434 (let ((commit "df0144bd2bbceca6fadef9edc1bbc5ca672d4739")
13435 (revision "1"))
13436 (package
13437 (name "r-loomr")
13438 (version (git-version "0.2.0" revision commit))
13439 (source
13440 (origin
13441 (method git-fetch)
13442 (uri (git-reference
13443 (url "https://github.com/mojaveazure/loomR.git")
13444 (commit commit)))
13445 (file-name (git-file-name name version))
13446 (sha256
13447 (base32
13448 "1b1g4dlmfdyhn56bz1mkh9ymirri43wiz7rjhs7py3y7bdw1s3yr"))))
13449 (build-system r-build-system)
13450 (propagated-inputs
13451 `(("r-r6" ,r-r6)
13452 ("r-hdf5r" ,r-hdf5r)
13453 ("r-iterators" ,r-iterators)
13454 ("r-itertools" ,r-itertools)
13455 ("r-matrix" ,r-matrix)))
13456 (home-page "https://github.com/mojaveazure/loomR")
13457 (synopsis "R interface for loom files")
13458 (description "This package provides an R interface to access, create,
13459 and modify loom files. loomR aims to be completely compatible with loompy.")
13460 (license license:gpl3))))
13461
13462 (define-public gffread
13463 ;; We cannot use the tagged release because it is not in sync with gclib.
13464 ;; See https://github.com/gpertea/gffread/issues/26
13465 (let ((commit "ba7535fcb3cea55a6e5a491d916e93b454e87fd0")
13466 (revision "1"))
13467 (package
13468 (name "gffread")
13469 (version (git-version "0.9.12" revision commit))
13470 (source
13471 (origin
13472 (method git-fetch)
13473 (uri (git-reference
13474 (url "https://github.com/gpertea/gffread.git")
13475 (commit commit)))
13476 (file-name (git-file-name name version))
13477 (sha256
13478 (base32
13479 "1dl2nbcg96lxpd0drg48ssa8343nf7pw9s9mkrc4mjjmfwsin3ki"))))
13480 (build-system gnu-build-system)
13481 (arguments
13482 `(#:tests? #f ; no check target
13483 #:make-flags
13484 (list "GCLDIR=gclib")
13485 #:phases
13486 (modify-phases %standard-phases
13487 (delete 'configure)
13488 (add-after 'unpack 'copy-gclib-source
13489 (lambda* (#:key inputs #:allow-other-keys)
13490 (mkdir-p "gclib")
13491 (copy-recursively (assoc-ref inputs "gclib-source") "gclib")
13492 #t))
13493 ;; There is no install target
13494 (replace 'install
13495 (lambda* (#:key outputs #:allow-other-keys)
13496 (let* ((out (assoc-ref outputs "out"))
13497 (bin (string-append out "/bin")))
13498 (install-file "gffread" bin))
13499 #t)))))
13500 (native-inputs
13501 `(("gclib-source"
13502 ,(let ((version "0.10.3")
13503 (commit "54917d0849c1e83cfb057b5f712e5cb6a35d948f")
13504 (revision "1"))
13505 (origin
13506 (method git-fetch)
13507 (uri (git-reference
13508 (url "https://github.com/gpertea/gclib.git")
13509 (commit commit)))
13510 (file-name (git-file-name "gclib" version))
13511 (sha256
13512 (base32
13513 "0b51lc0b8syrv7186fd7n8f15rwnf264qgfmm2palrwks1px24mr")))))))
13514 (home-page "https://github.com/gpertea/gffread/")
13515 (synopsis "Parse and convert GFF/GTF files")
13516 (description
13517 "This package provides a GFF/GTF file parsing utility providing format
13518 conversions, region filtering, FASTA sequence extraction and more.")
13519 ;; gffread is under Expat, but gclib is under Artistic 2.0
13520 (license (list license:expat
13521 license:artistic2.0)))))
13522
13523 (define-public find-circ
13524 ;; The last release was in 2015. The license was clarified in 2017, so we
13525 ;; take the latest commit.
13526 (let ((commit "8655dca54970fcf7e92e22fbf57e1188724dda7d")
13527 (revision "1"))
13528 (package
13529 (name "find-circ")
13530 (version (git-version "1.2" revision commit))
13531 (source
13532 (origin
13533 (method git-fetch)
13534 (uri (git-reference
13535 (url "https://github.com/marvin-jens/find_circ.git")
13536 (commit commit)))
13537 (file-name (git-file-name name version))
13538 (sha256
13539 (base32
13540 "0p77pbqbclqr4srms34y1b9b4njybfpjiknc11ki84f3p8skb3cg"))))
13541 (build-system gnu-build-system)
13542 (arguments
13543 `(#:tests? #f ; there are none
13544 #:phases
13545 ;; There is no actual build system.
13546 (modify-phases %standard-phases
13547 (delete 'configure)
13548 (delete 'build)
13549 (replace 'install
13550 (lambda* (#:key outputs #:allow-other-keys)
13551 (let* ((out (assoc-ref outputs "out"))
13552 (bin (string-append out "/bin"))
13553 (path (getenv "PYTHONPATH")))
13554 (for-each (lambda (script)
13555 (install-file script bin)
13556 (wrap-program (string-append bin "/" script)
13557 `("PYTHONPATH" ":" prefix (,path))))
13558 '("cmp_bed.py"
13559 "find_circ.py"
13560 "maxlength.py"
13561 "merge_bed.py"
13562 "unmapped2anchors.py")))
13563 #t)))))
13564 (inputs
13565 `(("python2" ,python-2)
13566 ("python2-pysam" ,python2-pysam)
13567 ("python2-numpy" ,python2-numpy)))
13568 (home-page "https://github.com/marvin-jens/find_circ")
13569 (synopsis "circRNA detection from RNA-seq reads")
13570 (description "This package provides tools to detect head-to-tail
13571 spliced (back-spliced) sequencing reads, indicative of circular RNA (circRNA)
13572 in RNA-seq data.")
13573 (license license:gpl3))))
13574
13575 (define-public python-scanpy
13576 (package
13577 (name "python-scanpy")
13578 (version "1.2.2")
13579 (source
13580 (origin
13581 (method url-fetch)
13582 (uri (pypi-uri "scanpy" version))
13583 (sha256
13584 (base32
13585 "1ak7bxms5a0yvf65prppq2g38clkv7c7jnjbnfpkh3xxv7q512jz"))))
13586 (build-system python-build-system)
13587 (propagated-inputs
13588 `(("python-anndata" ,python-anndata)
13589 ("python-igraph" ,python-igraph)
13590 ("python-numba" ,python-numba)
13591 ("python-joblib" ,python-joblib)
13592 ("python-natsort" ,python-natsort)
13593 ("python-networkx" ,python-networkx)
13594 ("python-statsmodels" ,python-statsmodels)
13595 ("python-scikit-learn" ,python-scikit-learn)
13596 ("python-matplotlib" ,python-matplotlib)
13597 ("python-pandas" ,python-pandas)
13598 ("python-scipy" ,python-scipy)
13599 ("python-seaborn" ,python-seaborn)
13600 ("python-h5py" ,python-h5py)
13601 ("python-tables" ,python-tables)))
13602 (home-page "http://github.com/theislab/scanpy")
13603 (synopsis "Single-Cell Analysis in Python.")
13604 (description "Scanpy is a scalable toolkit for analyzing single-cell gene
13605 expression data. It includes preprocessing, visualization, clustering,
13606 pseudotime and trajectory inference and differential expression testing. The
13607 Python-based implementation efficiently deals with datasets of more than one
13608 million cells.")
13609 (license license:bsd-3)))
13610
13611 (define-public gffcompare
13612 (let ((commit "be56ef4349ea3966c12c6397f85e49e047361c41")
13613 (revision "1"))
13614 (package
13615 (name "gffcompare")
13616 (version (git-version "0.10.15" revision commit))
13617 (source
13618 (origin
13619 (method git-fetch)
13620 (uri (git-reference
13621 (url "https://github.com/gpertea/gffcompare/")
13622 (commit commit)))
13623 (file-name (git-file-name name version))
13624 (sha256
13625 (base32 "0cp5qpxdhw4mxpya5dld8wi3jk00zyklm6rcri426wydinrnfmkg"))))
13626 (build-system gnu-build-system)
13627 (arguments
13628 `(#:tests? #f ; no check target
13629 #:phases
13630 (modify-phases %standard-phases
13631 (delete 'configure)
13632 (add-before 'build 'copy-gclib-source
13633 (lambda* (#:key inputs #:allow-other-keys)
13634 (mkdir "../gclib")
13635 (copy-recursively
13636 (assoc-ref inputs "gclib-source") "../gclib")
13637 #t))
13638 (replace 'install
13639 (lambda* (#:key outputs #:allow-other-keys)
13640 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
13641 (install-file "gffcompare" bin)
13642 #t))))))
13643 (native-inputs
13644 `(("gclib-source" ; see 'README.md' of gffcompare
13645 ,(let ((commit "54917d0849c1e83cfb057b5f712e5cb6a35d948f")
13646 (revision "1")
13647 (name "gclib")
13648 (version (git-version "0.10.3" revision commit)))
13649 (origin
13650 (method git-fetch)
13651 (uri (git-reference
13652 (url "https://github.com/gpertea/gclib/")
13653 (commit commit)))
13654 (file-name (git-file-name name version))
13655 (sha256
13656 (base32 "0b51lc0b8syrv7186fd7n8f15rwnf264qgfmm2palrwks1px24mr")))))))
13657 (home-page "https://github.com/gpertea/gffcompare/")
13658 (synopsis "Tool for comparing or classifing transcripts of RNA-Seq")
13659 (description
13660 "@code{gffcompare} is a tool that can:
13661 @enumerate
13662 @item compare and evaluate the accuracy of RNA-Seq transcript assemblers
13663 (Cufflinks, Stringtie);
13664 @item collapse (merge) duplicate transcripts from multiple GTF/GFF3 files (e.g.
13665 resulted from assembly of different samples);
13666 @item classify transcripts from one or multiple GTF/GFF3 files as they relate to
13667 reference transcripts provided in a annotation file (also in GTF/GFF3 format).
13668 @end enumerate")
13669 (license
13670 (list
13671 license:expat ;license for gffcompare
13672 license:artistic2.0))))) ;license for gclib
13673
13674 (define-public python-intervaltree
13675 (package
13676 (name "python-intervaltree")
13677 (version "2.1.0")
13678 (source
13679 (origin
13680 (method url-fetch)
13681 (uri (pypi-uri "intervaltree" version))
13682 (sha256
13683 (base32
13684 "02w191m9zxkcjqr1kv2slxvhymwhj3jnsyy3a28b837pi15q19dc"))))
13685 (build-system python-build-system)
13686 ;; FIXME: error when collecting tests
13687 (arguments '(#:tests? #f))
13688 (propagated-inputs
13689 `(("python-sortedcontainers" ,python-sortedcontainers)))
13690 (native-inputs
13691 `(("python-pytest" ,python-pytest)))
13692 (home-page "https://github.com/chaimleib/intervaltree")
13693 (synopsis "Editable interval tree data structure")
13694 (description
13695 "This package provides a mutable, self-balancing interval tree
13696 implementation for Python. Queries may be by point, by range overlap, or by
13697 range envelopment. This library was designed to allow tagging text and time
13698 intervals, where the intervals include the lower bound but not the upper
13699 bound.")
13700 (license license:asl2.0)))
13701
13702 (define-public python-pypairix
13703 (package
13704 (name "python-pypairix")
13705 (version "0.3.6")
13706 (source
13707 (origin
13708 (method url-fetch)
13709 (uri (pypi-uri "pypairix" version))
13710 (sha256
13711 (base32
13712 "0zs92b74s5v4xy2h16s15f3z6l4nnbw8x8zyif7xx5xpafjn0xss"))))
13713 (build-system python-build-system)
13714 ;; FIXME: the tests fail because test.support cannot be loaded:
13715 ;; ImportError: cannot import name 'support'
13716 (arguments '(#:tests? #f))
13717 (inputs
13718 `(("zlib" ,zlib)))
13719 (home-page "https://github.com/4dn-dcic/pairix")
13720 (synopsis "Support for querying pairix-indexed bgzipped text files")
13721 (description
13722 "Pypairix is a Python module for fast querying on a pairix-indexed
13723 bgzipped text file that contains a pair of genomic coordinates per line.")
13724 (license license:expat)))
13725
13726 (define-public python-pyfaidx
13727 (package
13728 (name "python-pyfaidx")
13729 (version "0.5.4.2")
13730 (source
13731 (origin
13732 (method url-fetch)
13733 (uri (pypi-uri "pyfaidx" version))
13734 (sha256
13735 (base32
13736 "0y5zyjksj1rdglj601xd2bbni5abhdh622y3ck76chyzxz9z4rx8"))))
13737 (build-system python-build-system)
13738 (propagated-inputs
13739 `(("python-setuptools" ,python-setuptools)
13740 ("python-six" ,python-six)))
13741 (home-page "http://mattshirley.com")
13742 (synopsis "Random access to fasta subsequences")
13743 (description
13744 "This package provides procedures for efficient pythonic random access to
13745 fasta subsequences.")
13746 (license license:bsd-3)))
13747
13748 (define-public python-cooler
13749 (package
13750 (name "python-cooler")
13751 (version "0.7.11")
13752 (source
13753 (origin
13754 (method url-fetch)
13755 (uri (pypi-uri "cooler" version))
13756 (sha256
13757 (base32
13758 "08k5nxnxa6qsbk15z5z0q01n28042k87wi4905hh95rzqib15mhx"))))
13759 (build-system python-build-system)
13760 (propagated-inputs
13761 `(("python-biopython" ,python-biopython)
13762 ("python-click" ,python-click)
13763 ("python-cytoolz" ,python-cytoolz)
13764 ("python-dask" ,python-dask)
13765 ("python-h5py" ,python-h5py)
13766 ("python-multiprocess" ,python-multiprocess)
13767 ("python-pandas" ,python-pandas)
13768 ("python-pyfaidx" ,python-pyfaidx)
13769 ("python-pypairix" ,python-pypairix)
13770 ("python-pysam" ,python-pysam)
13771 ("python-scipy" ,python-scipy)))
13772 (native-inputs
13773 `(("python-mock" ,python-mock)
13774 ("python-nose" ,python-nose)
13775 ("python-numpydoc" ,python-numpydoc)
13776 ("python-sphinx" ,python-sphinx)))
13777 (home-page "https://github.com/mirnylab/cooler")
13778 (synopsis "Sparse binary format for genomic interaction matrices")
13779 (description
13780 "Cooler is a support library for a sparse, compressed, binary persistent
13781 storage format, called @code{cool}, used to store genomic interaction data,
13782 such as Hi-C contact matrices.")
13783 (license license:bsd-3)))
13784
13785 (define-public python-hicexplorer
13786 (package
13787 (name "python-hicexplorer")
13788 (version "2.1.4")
13789 (source
13790 (origin
13791 ;; The latest version is not available on Pypi.
13792 (method git-fetch)
13793 (uri (git-reference
13794 (url "https://github.com/deeptools/HiCExplorer.git")
13795 (commit version)))
13796 (file-name (git-file-name name version))
13797 (sha256
13798 (base32
13799 "0q5gpbzmrkvygqgw524q36b4nrivcmyi5v194vsx0qw7b3gcmq08"))))
13800 (build-system python-build-system)
13801 (arguments
13802 `(#:phases
13803 (modify-phases %standard-phases
13804 (add-after 'unpack 'loosen-up-requirements
13805 (lambda _
13806 (substitute* "setup.py"
13807 (("==") ">="))
13808 #t)))))
13809 (propagated-inputs
13810 `(("python-biopython" ,python-biopython)
13811 ("python-configparser" ,python-configparser)
13812 ("python-cooler" ,python-cooler)
13813 ("python-future" ,python-future)
13814 ("python-intervaltree" ,python-intervaltree)
13815 ("python-jinja2" ,python-jinja2)
13816 ("python-matplotlib" ,python-matplotlib)
13817 ("python-numpy" ,python-numpy)
13818 ("python-pandas" ,python-pandas)
13819 ("python-pybigwig" ,python-pybigwig)
13820 ("python-pysam" ,python-pysam)
13821 ("python-scipy" ,python-scipy)
13822 ("python-six" ,python-six)
13823 ("python-tables" ,python-tables)
13824 ("python-unidecode" ,python-unidecode)))
13825 (home-page "http://hicexplorer.readthedocs.io")
13826 (synopsis "Process, analyze and visualize Hi-C data")
13827 (description
13828 "HiCExplorer is a powerful and easy to use set of tools to process,
13829 normalize and visualize Hi-C data. HiCExplorer facilitates the creation of
13830 contact matrices, correction of contacts, TAD detection, A/B compartments,
13831 merging, reordering or chromosomes, conversion from different formats
13832 including cooler and detection of long-range contacts. Moreover, it allows
13833 the visualization of multiple contact matrices along with other types of data
13834 like genes, compartments, ChIP-seq coverage tracks (and in general any type of
13835 genomic scores), long range contacts and the visualization of viewpoints.")
13836 (license license:gpl3)))
13837
13838 (define-public python-pygenometracks
13839 (package
13840 (name "python-pygenometracks")
13841 (version "2.0")
13842 (source
13843 (origin
13844 (method url-fetch)
13845 (uri (pypi-uri "pyGenomeTracks" version))
13846 (sha256
13847 (base32
13848 "1fws6bqsyy9kj3qiabhkqx4wd4i775gsxnhszqd3zg7w67sc1ic5"))))
13849 (build-system python-build-system)
13850 (propagated-inputs
13851 `(("python-configparser" ,python-configparser)
13852 ("python-future" ,python-future)
13853 ("python-hicexplorer" ,python-hicexplorer)
13854 ("python-intervaltree" ,python-intervaltree)
13855 ("python-matplotlib" ,python-matplotlib)
13856 ("python-numpy" ,python-numpy)
13857 ("python-pybigwig" ,python-pybigwig)))
13858 (native-inputs
13859 `(("python-pytest" ,python-pytest)))
13860 (home-page "https://pygenometracks.readthedocs.io")
13861 (synopsis "Program and library to plot beautiful genome browser tracks")
13862 (description
13863 "This package aims to produce high-quality genome browser tracks that
13864 are highly customizable. Currently, it is possible to plot: bigwig, bed (many
13865 options), bedgraph, links (represented as arcs), and Hi-C matrices.
13866 pyGenomeTracks can make plots with or without Hi-C data.")
13867 (license license:gpl3+)))
13868
13869 (define-public python-hic2cool
13870 (package
13871 (name "python-hic2cool")
13872 (version "0.4.2")
13873 (source
13874 (origin
13875 (method url-fetch)
13876 (uri (pypi-uri "hic2cool" version))
13877 (sha256
13878 (base32
13879 "0xy6mhfns2lzib1kcr6419jjp6pmh0qx8z8na55lmiwn0ds8q9cl"))))
13880 (build-system python-build-system)
13881 (arguments '(#:tests? #f)) ; no tests included
13882 (propagated-inputs
13883 `(("python-cooler" ,python-cooler)))
13884 (home-page "https://github.com/4dn-dcic/hic2cool")
13885 (synopsis "Converter for .hic and .cool files")
13886 (description
13887 "This package provides a converter between @code{.hic} files (from
13888 juicer) and single-resolution or multi-resolution @code{.cool} files (for
13889 cooler). Both @code{hic} and @code{cool} files describe Hi-C contact
13890 matrices.")
13891 (license license:expat)))
13892
13893 (define-public r-pore
13894 (package
13895 (name "r-pore")
13896 (version "0.24")
13897 (source
13898 (origin
13899 (method url-fetch)
13900 (uri
13901 (string-append "mirror://sourceforge/rpore/" version
13902 "/poRe_" version ".tar.gz"))
13903 (sha256
13904 (base32 "0pih9nljbv8g4x8rkk29i7aqq681b782r5s5ynp4nw9yzqnmmksv"))))
13905 (properties `((upstream-name . "poRe")))
13906 (build-system r-build-system)
13907 (propagated-inputs
13908 `(("r-bit64" ,r-bit64)
13909 ("r-data-table" ,r-data-table)
13910 ("r-rhdf5" ,r-rhdf5)
13911 ("r-shiny" ,r-shiny)
13912 ("r-svdialogs" ,r-svdialogs)))
13913 (home-page "https://sourceforge.net/projects/rpore/")
13914 (synopsis "Visualize Nanopore sequencing data")
13915 (description
13916 "This package provides graphical user interfaces to organize and visualize Nanopore
13917 sequencing data.")
13918 ;; This is free software but the license variant is unclear:
13919 ;; <https://github.com/mw55309/poRe_docs/issues/10>.
13920 (license license:bsd-3)))
13921
13922 (define-public r-xbioc
13923 (let ((revision "1")
13924 (commit "f798c187e376fd1ba27abd559f47bbae7e3e466b"))
13925 (package
13926 (name "r-xbioc")
13927 (version (git-version "0.1.15" revision commit))
13928 (source (origin
13929 (method git-fetch)
13930 (uri (git-reference
13931 (url "https://github.com/renozao/xbioc.git")
13932 (commit commit)))
13933 (file-name (git-file-name name version))
13934 (sha256
13935 (base32
13936 "03hffh2f6z71y6l6dqpa5cql3hdaw7zigdi8sm2dzgx379k9rgrr"))))
13937 (build-system r-build-system)
13938 (propagated-inputs
13939 `(("r-annotationdbi" ,r-annotationdbi)
13940 ("r-assertthat" ,r-assertthat)
13941 ("r-biobase" ,r-biobase)
13942 ("r-biocinstaller" ,r-biocinstaller)
13943 ("r-digest" ,r-digest)
13944 ("r-pkgmaker" ,r-pkgmaker)
13945 ("r-plyr" ,r-plyr)
13946 ("r-reshape2" ,r-reshape2)
13947 ("r-stringr" ,r-stringr)))
13948 (home-page "https://github.com/renozao/xbioc/")
13949 (synopsis "Extra base functions for Bioconductor")
13950 (description "This package provides extra utility functions to perform
13951 common tasks in the analysis of omics data, leveraging and enhancing features
13952 provided by Bioconductor packages.")
13953 (license license:gpl3+))))
13954
13955 (define-public r-cssam
13956 (let ((revision "1")
13957 (commit "9ec58c982fa551af0d80b1a266890d92954833f2"))
13958 (package
13959 (name "r-cssam")
13960 (version (git-version "1.4" revision commit))
13961 (source (origin
13962 (method git-fetch)
13963 (uri (git-reference
13964 (url "https://github.com/shenorrLab/csSAM.git")
13965 (commit commit)))
13966 (file-name (git-file-name name version))
13967 (sha256
13968 (base32
13969 "128syf9v39gk0z3ip000qpsjbg6l1siyq6c8b0hz41dzg5achyb3"))))
13970 (build-system r-build-system)
13971 (propagated-inputs
13972 `(("r-formula" ,r-formula)
13973 ("r-ggplot2" ,r-ggplot2)
13974 ("r-pkgmaker" ,r-pkgmaker)
13975 ("r-plyr" ,r-plyr)
13976 ("r-rngtools" ,r-rngtools)
13977 ("r-scales" ,r-scales)))
13978 (home-page "https://github.com/shenorrLab/csSAM/")
13979 (synopsis "Cell type-specific statistical analysis of microarray")
13980 (description "This package implements the method csSAM that computes
13981 cell-specific differential expression from measured cell proportions using
13982 SAM.")
13983 ;; Any version
13984 (license license:lgpl2.1+))))
13985
13986 (define-public r-bseqsc
13987 (let ((revision "1")
13988 (commit "fef3f3e38dcf3df37103348b5780937982b43b98"))
13989 (package
13990 (name "r-bseqsc")
13991 (version (git-version "1.0" revision commit))
13992 (source (origin
13993 (method git-fetch)
13994 (uri (git-reference
13995 (url "https://github.com/shenorrLab/bseqsc.git")
13996 (commit commit)))
13997 (file-name (git-file-name name version))
13998 (sha256
13999 (base32
14000 "1prw13wa20f7wlc3gkkls66n1kxz8d28qrb8icfqdwdnnv8w5qg8"))))
14001 (build-system r-build-system)
14002 (propagated-inputs
14003 `(("r-abind" ,r-abind)
14004 ("r-annotationdbi" ,r-annotationdbi)
14005 ("r-biobase" ,r-biobase)
14006 ("r-cssam" ,r-cssam)
14007 ("r-dplyr" ,r-dplyr)
14008 ("r-e1071" ,r-e1071)
14009 ("r-edger" ,r-edger)
14010 ("r-ggplot2" ,r-ggplot2)
14011 ("r-nmf" ,r-nmf)
14012 ("r-openxlsx" ,r-openxlsx)
14013 ("r-pkgmaker" ,r-pkgmaker)
14014 ("r-plyr" ,r-plyr)
14015 ("r-preprocesscore" ,r-preprocesscore)
14016 ("r-rngtools" ,r-rngtools)
14017 ("r-scales" ,r-scales)
14018 ("r-stringr" ,r-stringr)
14019 ("r-xbioc" ,r-xbioc)))
14020 (home-page "https://github.com/shenorrLab/bseqsc")
14021 (synopsis "Deconvolution of bulk sequencing experiments using single cell data")
14022 (description "BSeq-sc is a bioinformatics analysis pipeline that
14023 leverages single-cell sequencing data to estimate cell type proportion and
14024 cell type-specific gene expression differences from RNA-seq data from bulk
14025 tissue samples. This is a companion package to the publication \"A
14026 single-cell transcriptomic map of the human and mouse pancreas reveals inter-
14027 and intra-cell population structure.\" Baron et al. Cell Systems (2016)
14028 @url{https://www.ncbi.nlm.nih.gov/pubmed/27667365}.")
14029 (license license:gpl2+))))
14030
14031 (define-public porechop
14032 ;; The recommended way to install is to clone the git repository
14033 ;; https://github.com/rrwick/Porechop#installation
14034 (let ((commit "289d5dca4a5fc327f97b3f8cecb68ecaf1014861")
14035 (revision "1"))
14036 (package
14037 (name "porechop")
14038 (version (git-version "0.2.3" revision commit))
14039 (source
14040 (origin
14041 (method git-fetch)
14042 (uri (git-reference
14043 (url "https://github.com/rrwick/Porechop.git")
14044 (commit commit)))
14045 (file-name (git-file-name name version))
14046 (sha256
14047 (base32 "05ps43gig0d3ia9x5lj84lb00hbsl6ba9n7y7jz927npxbr2ym23"))))
14048 (build-system python-build-system)
14049 (home-page "https://github.com/rrwick/porechop")
14050 (synopsis "Finding, trimming or splitting adapters, in Oxford Nanopore reads")
14051 (description
14052 "The porechop package is a tool for finding and removing adapters from Oxford
14053 Nanopore reads. Adapters on the ends of reads are trimmed off, and when a read
14054 has an adapter in its middle, it is treated as chimeric and chopped into
14055 separate reads. Porechop performs thorough alignments to effectively find
14056 adapters, even at low sequence identity. Porechop also supports demultiplexing
14057 of Nanopore reads that were barcoded with the Native Barcoding Kit, PCR
14058 Barcoding Kit or Rapid Barcoding Kit.")
14059 (license license:gpl3+))))
14060
14061 (define-public poretools
14062 ;; The latest release was in 2016 and the latest commit is from 2017
14063 ;; the recommended way to install is to clone the git repository
14064 ;; https://poretools.readthedocs.io/en/latest/content/installation.html
14065 (let ((commit "e426b1f09e86ac259a00c261c79df91510777407")
14066 (revision "1"))
14067 (package
14068 (name "poretools")
14069 (version (git-version "0.6.0" revision commit))
14070 (source
14071 (origin
14072 (method git-fetch)
14073 (uri (git-reference
14074 (url "https://github.com/arq5x/poretools.git")
14075 (commit commit)))
14076 (file-name (git-file-name name version))
14077 (sha256
14078 (base32 "0bglj833wxpp3cq430p1d3xp085ls221js2y90w7ir2x5ay8l7am"))))
14079 (build-system python-build-system)
14080 ;; requires python >=2.7, <3.0, and the same for python dependencies
14081 (arguments `(#:python ,python-2))
14082 (inputs
14083 `(("hdf5" ,hdf5)))
14084 (propagated-inputs
14085 `(("python-dateutil" ,python2-dateutil)
14086 ("python-h5py" ,python2-h5py)
14087 ("python-matplotlib" ,python2-matplotlib)
14088 ("python-pandas" ,python2-pandas)
14089 ("python-seaborn" ,python2-seaborn)))
14090 (home-page "https://poretools.readthedocs.io")
14091 (synopsis "Toolkit for working with nanopore sequencing data")
14092 (description
14093 "The MinION from Oxford Nanopore Technologies is a nanopore sequencer.
14094 This @code{poretools} package is a flexible toolkit for exploring datasets
14095 generated by nanopore sequencing devices for the purposes of quality control and
14096 downstream analysis. Poretools operates directly on the native FAST5, a variant
14097 of the Hierarchical Data Format (HDF5) standard.")
14098 (license license:expat))))
14099
14100 (define-public r-absfiltergsea
14101 (package
14102 (name "r-absfiltergsea")
14103 (version "1.5.1")
14104 (source
14105 (origin
14106 (method url-fetch)
14107 (uri (cran-uri "AbsFilterGSEA" version))
14108 (sha256
14109 (base32 "15srxkxsvn38kd5frdrwfdf0ad8gskrd0h01wmdf9hglq8fjrp7w"))))
14110 (properties `((upstream-name . "AbsFilterGSEA")))
14111 (build-system r-build-system)
14112 (propagated-inputs
14113 `(("r-biobase" ,r-biobase)
14114 ("r-deseq" ,r-deseq)
14115 ("r-limma" ,r-limma)
14116 ("r-rcpp" ,r-rcpp)
14117 ("r-rcpparmadillo" ,r-rcpparmadillo)))
14118 (home-page "https://cran.r-project.org/web/packages/AbsFilterGSEA/")
14119 (synopsis "Improved false positive control of gene-permuting with absolute filtering")
14120 (description
14121 "This package provides a function that performs gene-permuting of a gene-set
14122 enrichment analysis (GSEA) calculation with or without the absolute filtering.
14123 Without filtering, users can perform (original) two-tailed or one-tailed
14124 absolute GSEA.")
14125 (license license:gpl2)))
14126
14127 (define-public jamm
14128 (package
14129 (name "jamm")
14130 (version "1.0.7.5")
14131 (source
14132 (origin
14133 (method git-fetch)
14134 (uri (git-reference
14135 (url "https://github.com/mahmoudibrahim/JAMM.git")
14136 (commit (string-append "JAMMv" version))))
14137 (file-name (git-file-name name version))
14138 (sha256
14139 (base32
14140 "0ls889jcma1ch9h21jjhnkadgszgqj41842hhcjh6cg88f85qf3i"))))
14141 (build-system gnu-build-system)
14142 (arguments
14143 `(#:tests? #f ; there are none
14144 #:phases
14145 (modify-phases %standard-phases
14146 (delete 'configure)
14147 (delete 'build)
14148 (replace 'install
14149 (lambda* (#:key inputs outputs #:allow-other-keys)
14150 (let* ((out (assoc-ref outputs "out"))
14151 (libexec (string-append out "/libexec/jamm"))
14152 (bin (string-append out "/bin")))
14153 (substitute* '("JAMM.sh"
14154 "SignalGenerator.sh")
14155 (("^sPath=.*")
14156 (string-append "sPath=\"" libexec "\"\n")))
14157 (for-each (lambda (file)
14158 (install-file file libexec))
14159 (list "bincalculator.r"
14160 "peakfinder.r"
14161 "peakhelper.r"
14162 "signalmaker.r"
14163 "xcorr.r"
14164 "xcorrhelper.r"
14165 ;; Perl scripts
14166 "peakfilter.pl"
14167 "readshifter.pl"))
14168
14169 (for-each
14170 (lambda (script)
14171 (chmod script #o555)
14172 (install-file script bin)
14173 (wrap-program (string-append bin "/" script)
14174 `("PATH" ":" prefix
14175 (,(string-append (assoc-ref inputs "coreutils") "/bin")
14176 ,(string-append (assoc-ref inputs "gawk") "/bin")
14177 ,(string-append (assoc-ref inputs "perl") "/bin")
14178 ,(string-append (assoc-ref inputs "r-minimal") "/bin")))
14179 `("PERL5LIB" ":" prefix (,(getenv "PERL5LIB")))
14180 `("R_LIBS_SITE" ":" prefix (,(getenv "R_LIBS_SITE")))))
14181 (list "JAMM.sh" "SignalGenerator.sh")))
14182 #t)))))
14183 (inputs
14184 `(("bash" ,bash)
14185 ("coreutils" ,coreutils)
14186 ("gawk" ,gawk)
14187 ("perl" ,perl)
14188 ("r-minimal" ,r-minimal)
14189 ;;("r-parallel" ,r-parallel)
14190 ("r-signal" ,r-signal)
14191 ("r-mclust" ,r-mclust)))
14192 (home-page "https://github.com/mahmoudibrahim/JAMM")
14193 (synopsis "Peak finder for NGS datasets")
14194 (description
14195 "JAMM is a peak finder for next generation sequencing datasets (ChIP-Seq,
14196 ATAC-Seq, DNase-Seq, etc.) that can integrate replicates and assign peak
14197 boundaries accurately. JAMM is applicable to both broad and narrow
14198 datasets.")
14199 (license license:gpl3+)))
14200
14201 (define-public ngless
14202 (package
14203 (name "ngless")
14204 (version "0.9.1")
14205 (source
14206 (origin
14207 (method git-fetch)
14208 (uri (git-reference
14209 (url "https://gitlab.com/ngless/ngless.git")
14210 (commit (string-append "v" version))))
14211 (file-name (git-file-name name version))
14212 (sha256
14213 (base32
14214 "0mc2gi7h4lx74zylvyp76mvc0w6706j858ii9vlgzqsw6acpr117"))))
14215 (build-system haskell-build-system)
14216 (arguments
14217 `(#:haddock? #f ; The haddock phase fails with: NGLess/CmdArgs.hs:20:1:
14218 ; error: parse error on input import
14219 ; import Options.Applicative
14220 #:phases
14221 (modify-phases %standard-phases
14222 (add-after 'unpack 'create-cabal-file
14223 (lambda _ (invoke "hpack") #t))
14224 ;; These tools are expected to be installed alongside ngless.
14225 (add-after 'install 'link-tools
14226 (lambda* (#:key inputs outputs #:allow-other-keys)
14227 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
14228 (symlink (string-append (assoc-ref inputs "prodigal")
14229 "/bin/prodigal")
14230 (string-append bin "ngless-" ,version "-prodigal"))
14231 (symlink (string-append (assoc-ref inputs "minimap2")
14232 "/bin/minimap2")
14233 (string-append bin "ngless-" ,version "-minimap2"))
14234 (symlink (string-append (assoc-ref inputs "samtools")
14235 "/bin/samtools")
14236 (string-append bin "ngless-" ,version "-samtools"))
14237 (symlink (string-append (assoc-ref inputs "bwa")
14238 "/bin/bwa")
14239 (string-append bin "ngless-" ,version "-bwa"))
14240 #t))))))
14241 (inputs
14242 `(("prodigal" ,prodigal)
14243 ("bwa" ,bwa)
14244 ("samtools" ,samtools)
14245 ("minimap2" ,minimap2)
14246 ("ghc-aeson" ,ghc-aeson)
14247 ("ghc-ansi-terminal" ,ghc-ansi-terminal)
14248 ("ghc-async" ,ghc-async)
14249 ("ghc-atomic-write" ,ghc-atomic-write)
14250 ("ghc-bytestring-lexing" ,ghc-bytestring-lexing)
14251 ("ghc-chart" ,ghc-chart)
14252 ("ghc-chart-cairo" ,ghc-chart-cairo)
14253 ("ghc-conduit" ,ghc-conduit)
14254 ("ghc-conduit-algorithms" ,ghc-conduit-algorithms)
14255 ("ghc-conduit-combinators" ,ghc-conduit-combinators)
14256 ("ghc-conduit-extra" ,ghc-conduit-extra)
14257 ("ghc-configurator" ,ghc-configurator)
14258 ("ghc-convertible" ,ghc-convertible)
14259 ("ghc-data-default" ,ghc-data-default)
14260 ("ghc-double-conversion" ,ghc-double-conversion)
14261 ("ghc-edit-distance" ,ghc-edit-distance)
14262 ("ghc-either" ,ghc-either)
14263 ("ghc-errors" ,ghc-errors)
14264 ("ghc-extra" ,ghc-extra)
14265 ("ghc-filemanip" ,ghc-filemanip)
14266 ("ghc-file-embed" ,ghc-file-embed)
14267 ("ghc-gitrev" ,ghc-gitrev)
14268 ("ghc-hashtables" ,ghc-hashtables)
14269 ("ghc-http-conduit" ,ghc-http-conduit)
14270 ("ghc-inline-c" ,ghc-inline-c)
14271 ("ghc-inline-c-cpp" ,ghc-inline-c-cpp)
14272 ("ghc-intervalmap" ,ghc-intervalmap)
14273 ("ghc-missingh" ,ghc-missingh)
14274 ("ghc-optparse-applicative" ,ghc-optparse-applicative)
14275 ("ghc-parsec" ,ghc-parsec)
14276 ("ghc-regex" ,ghc-regex)
14277 ("ghc-safe" ,ghc-safe)
14278 ("ghc-safeio" ,ghc-safeio)
14279 ("ghc-strict" ,ghc-strict)
14280 ("ghc-tar" ,ghc-tar)
14281 ("ghc-text" ,ghc-text)
14282 ("ghc-unliftio" ,ghc-unliftio)
14283 ("ghc-unliftio-core" ,ghc-unliftio-core)
14284 ("ghc-vector" ,ghc-vector)
14285 ("ghc-yaml" ,ghc-yaml)
14286 ("ghc-zlib" ,ghc-zlib)))
14287 (propagated-inputs
14288 `(("r-r6" ,r-r6)
14289 ("r-hdf5r" ,r-hdf5r)
14290 ("r-iterators" ,r-iterators)
14291 ("r-itertools" ,r-itertools)
14292 ("r-matrix" ,r-matrix)))
14293 (native-inputs
14294 `(("ghc-hpack" ,ghc-hpack)
14295 ("ghc-quickcheck" ,ghc-quickcheck)
14296 ("ghc-test-framework" ,ghc-test-framework)
14297 ("ghc-test-framework-hunit",ghc-test-framework-hunit)
14298 ("ghc-test-framework-quickcheck2" ,ghc-test-framework-quickcheck2)
14299 ("ghc-test-framework-th" ,ghc-test-framework-th)))
14300 (home-page "https://gitlab.com/ngless/ngless")
14301 (synopsis "DSL for processing next-generation sequencing data")
14302 (description "Ngless is a domain-specific language for
14303 @dfn{next-generation sequencing} (NGS) data processing.")
14304 (license license:expat)))
14305
14306 (define-public filtlong
14307 ;; The recommended way to install is to clone the git repository
14308 ;; https://github.com/rrwick/Filtlong#installation
14309 ;; and the lastest release is more than nine months old
14310 (let ((commit "d1bb46dfe8bc7efe6257b5ce222c04bfe8aedaab")
14311 (revision "1"))
14312 (package
14313 (name "filtlong")
14314 (version (git-version "0.2.0" revision commit))
14315 (source
14316 (origin
14317 (method git-fetch)
14318 (uri (git-reference
14319 (url "https://github.com/rrwick/Filtlong.git")
14320 (commit commit)))
14321 (file-name (git-file-name name version))
14322 (sha256
14323 (base32 "1xr92r820x8qlkcr3b57iw223yq8vjgyi42jr79w2xgw47qzr575"))))
14324 (build-system gnu-build-system)
14325 (arguments
14326 `(#:tests? #f ; no check target
14327 #:phases
14328 (modify-phases %standard-phases
14329 (delete 'configure)
14330 (replace 'install
14331 (lambda* (#:key outputs #:allow-other-keys)
14332 (let* ((out (assoc-ref outputs "out"))
14333 (bin (string-append out "/bin"))
14334 (scripts (string-append out "/share/filtlong/scripts")))
14335 (install-file "bin/filtlong" bin)
14336 (install-file "scripts/histogram.py" scripts)
14337 (install-file "scripts/read_info_histograms.sh" scripts))
14338 #t))
14339 (add-after 'install 'wrap-program
14340 (lambda* (#:key inputs outputs #:allow-other-keys)
14341 (let* ((out (assoc-ref outputs "out"))
14342 (path (getenv "PYTHONPATH")))
14343 (wrap-program (string-append out
14344 "/share/filtlong/scripts/histogram.py")
14345 `("PYTHONPATH" ":" prefix (,path))))
14346 #t))
14347 (add-before 'check 'patch-tests
14348 (lambda _
14349 (substitute* "scripts/read_info_histograms.sh"
14350 (("awk") (which "gawk")))
14351 #t)))))
14352 (inputs
14353 `(("gawk" ,gawk) ;for read_info_histograms.sh
14354 ("python" ,python-2) ;required for histogram.py
14355 ("zlib" ,zlib)))
14356 (home-page "https://github.com/rrwick/Filtlong/")
14357 (synopsis "Tool for quality filtering of Nanopore and PacBio data")
14358 (description
14359 "The Filtlong package is a tool for filtering long reads by quality.
14360 It can take a set of long reads and produce a smaller, better subset. It uses
14361 both read length (longer is better) and read identity (higher is better) when
14362 choosing which reads pass the filter.")
14363 (license (list license:gpl3 ;filtlong
14364 license:asl2.0))))) ;histogram.py
14365
14366 (define-public nanopolish
14367 ;; The recommended way to install is to clone the git repository
14368 ;; <https://github.com/jts/nanopolish#installing-a-particular-release>.
14369 ;; Also, the differences between release and current version seem to be
14370 ;; significant.
14371 (let ((commit "50e8b5cc62f9b46f5445f5c5e8c5ab7263ea6d9d")
14372 (revision "1"))
14373 (package
14374 (name "nanopolish")
14375 (version (git-version "0.10.2" revision commit))
14376 (source
14377 (origin
14378 (method git-fetch)
14379 (uri (git-reference
14380 (url "https://github.com/jts/nanopolish.git")
14381 (commit commit)
14382 (recursive? #t)))
14383 (file-name (git-file-name name version))
14384 (sha256
14385 (base32 "09j5gz57yr9i34a27vbl72i4g8syv2zzgmsfyjq02yshmnrvkjs6"))))
14386 (build-system gnu-build-system)
14387 (arguments
14388 `(#:make-flags
14389 `("HDF5=noinstall" "EIGEN=noinstall" "HTS=noinstall" "CC=gcc")
14390 #:tests? #f ; no check target
14391 #:phases
14392 (modify-phases %standard-phases
14393 (add-after 'unpack 'find-eigen
14394 (lambda* (#:key inputs #:allow-other-keys)
14395 (setenv "CPATH"
14396 (string-append (assoc-ref inputs "eigen")
14397 "/include/eigen3"))
14398 #t))
14399 (delete 'configure)
14400 (replace 'install
14401 (lambda* (#:key outputs #:allow-other-keys)
14402 (let* ((out (assoc-ref outputs "out"))
14403 (bin (string-append out "/bin"))
14404 (scripts (string-append out "/share/nanopolish/scripts")))
14405
14406 (install-file "nanopolish" bin)
14407 (for-each (lambda (file) (install-file file scripts))
14408 (find-files "scripts" ".*"))
14409 #t)))
14410 (add-after 'install 'wrap-programs
14411 (lambda* (#:key outputs #:allow-other-keys)
14412 (for-each (lambda (file)
14413 (wrap-program file `("PYTHONPATH" ":" prefix (,path))))
14414 (find-files "/share/nanopolish/scripts" "\\.py"))
14415 (for-each (lambda (file)
14416 (wrap-program file `("PERL5LIB" ":" prefix (,path))))
14417 (find-files "/share/nanopolish/scripts" "\\.pl"))
14418 #t)))))
14419 (inputs
14420 `(("eigen" ,eigen)
14421 ("hdf5" ,hdf5)
14422 ("htslib" ,htslib)
14423 ("perl" ,perl)
14424 ("python" ,python)
14425 ("python-biopython" ,python-biopython)
14426 ("python-numpy" ,python-numpy)
14427 ("python-pysam" ,python-pysam)
14428 ("python-scikit-learn" , python-scikit-learn)
14429 ("python-scipy" ,python-scipy)
14430 ("zlib" ,zlib)))
14431 (home-page "https://github.com/jts/nanopolish")
14432 (synopsis "Signal-level analysis of Oxford Nanopore sequencing data")
14433 (description
14434 "This package analyses the Oxford Nanopore sequencing data at signal-level.
14435 Nanopolish can calculate an improved consensus sequence for a draft genome
14436 assembly, detect base modifications, call SNPs (Single nucleotide
14437 polymorphisms) and indels with respect to a reference genome and more.")
14438 (license license:expat))))