gnu: Add confusion-mdl.
[jackhill/guix/guix.git] / gnu / packages / bioinformatics.scm
CommitLineData
4e10a221 1;;; GNU Guix --- Functional package management for GNU
c3b2ab9d 2;;; Copyright © 2014, 2015, 2016, 2017, 2018 Ricardo Wurmus <rekado@elephly.net>
7502badb 3;;; Copyright © 2015, 2016, 2017, 2018 Ben Woodcroft <donttrustben@gmail.com>
8921841d 4;;; Copyright © 2015, 2016 Pjotr Prins <pjotr.guix@thebird.nl>
a5002ae7 5;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
a0a71439 6;;; Copyright © 2016 Roel Janssen <roel@gnu.org>
3df57b3a 7;;; Copyright © 2016, 2017, 2018 Efraim Flashner <efraim@flashner.co.il>
318c0aee 8;;; Copyright © 2016 Marius Bakke <mbakke@fastmail.com>
658ab21d 9;;; Copyright © 2016, 2018 Raoul Bonnal <ilpuccio.febo@gmail.com>
ddf38ece 10;;; Copyright © 2017, 2018 Tobias Geerinckx-Rice <me@tobias.gr>
9364a520 11;;; Copyright © 2017 Arun Isaac <arunisaac@systemreboot.net>
32cbbac1 12;;; Copyright © 2018 Joshua Sierles, Nextjournal <joshua@nextjournal.com>
d60772dc 13;;; Copyright © 2018 Gábor Boskovits <boskovits@gmail.com>
4f9355c3 14;;; Copyright © 2018 Mădălin Ionel Patrașcu <madalinionel.patrascu@mdc-berlin.de>
4e10a221
RW
15;;;
16;;; This file is part of GNU Guix.
17;;;
18;;; GNU Guix is free software; you can redistribute it and/or modify it
19;;; under the terms of the GNU General Public License as published by
20;;; the Free Software Foundation; either version 3 of the License, or (at
21;;; your option) any later version.
22;;;
23;;; GNU Guix is distributed in the hope that it will be useful, but
24;;; WITHOUT ANY WARRANTY; without even the implied warranty of
25;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26;;; GNU General Public License for more details.
27;;;
28;;; You should have received a copy of the GNU General Public License
29;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
30
31(define-module (gnu packages bioinformatics)
32 #:use-module ((guix licenses) #:prefix license:)
33 #:use-module (guix packages)
8e913213 34 #:use-module (guix utils)
4e10a221 35 #:use-module (guix download)
2c16316e 36 #:use-module (guix git-download)
ec946638 37 #:use-module (guix hg-download)
10b4a969 38 #:use-module (guix build-system ant)
4e10a221 39 #:use-module (guix build-system gnu)
d7678942 40 #:use-module (guix build-system cmake)
c033f5d6 41 #:use-module (guix build-system ocaml)
365c8153 42 #:use-module (guix build-system perl)
8622a072 43 #:use-module (guix build-system python)
a5002ae7 44 #:use-module (guix build-system r)
9c38b540 45 #:use-module (guix build-system ruby)
9364a520 46 #:use-module (guix build-system scons)
d3517eda 47 #:use-module (guix build-system trivial)
4e10a221 48 #:use-module (gnu packages)
a2950fa4 49 #:use-module (gnu packages autotools)
684bf7c7 50 #:use-module (gnu packages algebra)
d3517eda 51 #:use-module (gnu packages base)
318c0aee 52 #:use-module (gnu packages bash)
a0a71439 53 #:use-module (gnu packages bison)
d53aeeaf 54 #:use-module (gnu packages bioconductor)
e4e5a4d8 55 #:use-module (gnu packages boost)
ac257f12 56 #:use-module (gnu packages check)
4e10a221 57 #:use-module (gnu packages compression)
82c370de 58 #:use-module (gnu packages cpio)
7cb61550 59 #:use-module (gnu packages cran)
1baee943 60 #:use-module (gnu packages curl)
99828fa7 61 #:use-module (gnu packages documentation)
94820951 62 #:use-module (gnu packages databases)
d29150b5 63 #:use-module (gnu packages datastructures)
75dd2424 64 #:use-module (gnu packages file)
99268755 65 #:use-module (gnu packages flex)
02f35bb5 66 #:use-module (gnu packages gawk)
2409f37f 67 #:use-module (gnu packages gcc)
66e40e00 68 #:use-module (gnu packages gd)
97b9da68 69 #:use-module (gnu packages gtk)
b16728b0 70 #:use-module (gnu packages glib)
18f5d2a7 71 #:use-module (gnu packages graph)
db7a3444 72 #:use-module (gnu packages groff)
50937297 73 #:use-module (gnu packages guile)
89984be4 74 #:use-module (gnu packages haskell)
66e40e00 75 #:use-module (gnu packages image)
97b9da68 76 #:use-module (gnu packages imagemagick)
15a3c3d4 77 #:use-module (gnu packages java)
8d77a085 78 #:use-module (gnu packages jemalloc)
875d0681 79 #:use-module (gnu packages dlang)
51c64999 80 #:use-module (gnu packages linux)
ec946638 81 #:use-module (gnu packages logging)
36742f43 82 #:use-module (gnu packages machine-learning)
db7a3444 83 #:use-module (gnu packages man)
c833ab55 84 #:use-module (gnu packages maths)
6c2b26e2 85 #:use-module (gnu packages mpi)
4e10a221 86 #:use-module (gnu packages ncurses)
c033f5d6 87 #:use-module (gnu packages ocaml)
81f3e0c1 88 #:use-module (gnu packages pcre)
ceb62d54 89 #:use-module (gnu packages parallel)
66e40e00 90 #:use-module (gnu packages pdf)
4e10a221 91 #:use-module (gnu packages perl)
5ccde207 92 #:use-module (gnu packages perl-check)
4e10a221 93 #:use-module (gnu packages pkg-config)
bfe3c685 94 #:use-module (gnu packages popt)
e4e5a4d8 95 #:use-module (gnu packages protobuf)
346a829a 96 #:use-module (gnu packages python)
589e3f4e 97 #:use-module (gnu packages python-web)
ec946638 98 #:use-module (gnu packages readline)
9c38b540 99 #:use-module (gnu packages ruby)
84be3b99 100 #:use-module (gnu packages serialization)
94820951 101 #:use-module (gnu packages shells)
c833ab55 102 #:use-module (gnu packages statistics)
aa163424 103 #:use-module (gnu packages swig)
d7678942 104 #:use-module (gnu packages tbb)
97b9da68 105 #:use-module (gnu packages tex)
db7a3444 106 #:use-module (gnu packages texinfo)
2127cedb 107 #:use-module (gnu packages textutils)
43c565d2 108 #:use-module (gnu packages time)
a2950fa4 109 #:use-module (gnu packages tls)
ce7155d5 110 #:use-module (gnu packages vim)
365c8153 111 #:use-module (gnu packages web)
c833ab55 112 #:use-module (gnu packages xml)
66e40e00 113 #:use-module (gnu packages xorg)
2c9232ae 114 #:use-module (srfi srfi-1)
ce7e361f 115 #:use-module (ice-9 match))
4e10a221 116
8dc797fa
BW
117(define-public aragorn
118 (package
119 (name "aragorn")
e990c81d 120 (version "1.2.38")
8dc797fa
BW
121 (source (origin
122 (method url-fetch)
123 (uri (string-append
124 "http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
125 version ".tgz"))
126 (sha256
127 (base32
e990c81d 128 "09i1rg716smlbnixfm7q1ml2mfpaa2fpn3hwjg625ysmfwwy712b"))))
8dc797fa
BW
129 (build-system gnu-build-system)
130 (arguments
131 `(#:tests? #f ; there are no tests
132 #:phases
133 (modify-phases %standard-phases
134 (delete 'configure)
135 (replace 'build
136 (lambda _
137 (zero? (system* "gcc"
138 "-O3"
139 "-ffast-math"
140 "-finline-functions"
141 "-o"
142 "aragorn"
143 (string-append "aragorn" ,version ".c")))))
144 (replace 'install
145 (lambda* (#:key outputs #:allow-other-keys)
146 (let* ((out (assoc-ref outputs "out"))
147 (bin (string-append out "/bin"))
148 (man (string-append out "/share/man/man1")))
149 (mkdir-p bin)
f3860753 150 (install-file "aragorn" bin)
8dc797fa 151 (mkdir-p man)
f3860753 152 (install-file "aragorn.1" man))
8dc797fa
BW
153 #t)))))
154 (home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
155 (synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
156 (description
157 "Aragorn identifies transfer RNA, mitochondrial RNA and
158transfer-messenger RNA from nucleotide sequences, based on homology to known
159tRNA consensus sequences and RNA structure. It also outputs the secondary
160structure of the predicted RNA.")
161 (license license:gpl2)))
162
a12ba6e8
BW
163(define-public bamm
164 (package
165 (name "bamm")
4b6da268 166 (version "1.7.3")
a12ba6e8
BW
167 (source (origin
168 (method url-fetch)
169 ;; BamM is not available on pypi.
170 (uri (string-append
4b6da268 171 "https://github.com/Ecogenomics/BamM/archive/"
a12ba6e8
BW
172 version ".tar.gz"))
173 (file-name (string-append name "-" version ".tar.gz"))
174 (sha256
175 (base32
4b6da268 176 "1f35yxp4pc8aadsvbpg6r4kg2jh4fkjci0iby4iyljm6980sac0s"))
a12ba6e8
BW
177 (modules '((guix build utils)))
178 (snippet
179 `(begin
180 ;; Delete bundled htslib.
181 (delete-file-recursively "c/htslib-1.3.1")
182 #t))))
183 (build-system python-build-system)
184 (arguments
185 `(#:python ,python-2 ; BamM is Python 2 only.
186 ;; Do not use bundled libhts. Do use the bundled libcfu because it has
187 ;; been modified from its original form.
188 #:configure-flags
189 (let ((htslib (assoc-ref %build-inputs "htslib")))
190 (list "--with-libhts-lib" (string-append htslib "/lib")
191 "--with-libhts-inc" (string-append htslib "/include/htslib")))
192 #:phases
193 (modify-phases %standard-phases
194 (add-after 'unpack 'autogen
195 (lambda _
196 (with-directory-excursion "c"
197 (let ((sh (which "sh")))
198 ;; Use autogen so that 'configure' works.
199 (substitute* "autogen.sh" (("/bin/sh") sh))
200 (setenv "CONFIG_SHELL" sh)
201 (substitute* "configure" (("/bin/sh") sh))
202 (zero? (system* "./autogen.sh"))))))
203 (delete 'build)
204 ;; Run tests after installation so compilation only happens once.
205 (delete 'check)
206 (add-after 'install 'wrap-executable
207 (lambda* (#:key outputs #:allow-other-keys)
208 (let* ((out (assoc-ref outputs "out"))
209 (path (getenv "PATH")))
210 (wrap-program (string-append out "/bin/bamm")
211 `("PATH" ":" prefix (,path))))
212 #t))
213 (add-after 'wrap-executable 'post-install-check
214 (lambda* (#:key inputs outputs #:allow-other-keys)
215 (setenv "PATH"
216 (string-append (assoc-ref outputs "out")
217 "/bin:"
218 (getenv "PATH")))
219 (setenv "PYTHONPATH"
220 (string-append
221 (assoc-ref outputs "out")
222 "/lib/python"
223 (string-take (string-take-right
224 (assoc-ref inputs "python") 5) 3)
225 "/site-packages:"
226 (getenv "PYTHONPATH")))
227 ;; There are 2 errors printed, but they are safe to ignore:
228 ;; 1) [E::hts_open_format] fail to open file ...
229 ;; 2) samtools view: failed to open ...
230 (zero? (system* "nosetests")))))))
231 (native-inputs
232 `(("autoconf" ,autoconf)
233 ("automake" ,automake)
234 ("libtool" ,libtool)
235 ("zlib" ,zlib)
236 ("python-nose" ,python2-nose)
f3b98f4f 237 ("python-pysam" ,python2-pysam)))
a12ba6e8 238 (inputs
bca2c576 239 `(("htslib" ,htslib-1.3) ; At least one test fails on htslib-1.4+.
a12ba6e8
BW
240 ("samtools" ,samtools)
241 ("bwa" ,bwa)
242 ("grep" ,grep)
243 ("sed" ,sed)
244 ("coreutils" ,coreutils)))
245 (propagated-inputs
246 `(("python-numpy" ,python2-numpy)))
247 (home-page "http://ecogenomics.github.io/BamM/")
248 (synopsis "Metagenomics-focused BAM file manipulator")
249 (description
250 "BamM is a C library, wrapped in python, to efficiently generate and
251parse BAM files, specifically for the analysis of metagenomic data. For
252instance, it implements several methods to assess contig-wise read coverage.")
253 (license license:lgpl3+)))
254
9794180d
RW
255(define-public bamtools
256 (package
257 (name "bamtools")
4ffa0858 258 (version "2.5.1")
9794180d
RW
259 (source (origin
260 (method url-fetch)
261 (uri (string-append
262 "https://github.com/pezmaster31/bamtools/archive/v"
263 version ".tar.gz"))
264 (file-name (string-append name "-" version ".tar.gz"))
265 (sha256
266 (base32
4ffa0858 267 "1z3kg24qrwq13a88n9d86izngrar4fll7gr6phddb2faw75pdgaa"))))
9794180d 268 (build-system cmake-build-system)
4702cec2
RW
269 (arguments
270 `(#:tests? #f ;no "check" target
271 #:phases
272 (modify-phases %standard-phases
273 (add-before
274 'configure 'set-ldflags
275 (lambda* (#:key outputs #:allow-other-keys)
276 (setenv "LDFLAGS"
277 (string-append
278 "-Wl,-rpath="
279 (assoc-ref outputs "out") "/lib/bamtools")))))))
9794180d
RW
280 (inputs `(("zlib" ,zlib)))
281 (home-page "https://github.com/pezmaster31/bamtools")
282 (synopsis "C++ API and command-line toolkit for working with BAM data")
283 (description
284 "BamTools provides both a C++ API and a command-line toolkit for handling
285BAM files.")
286 (license license:expat)))
287
bdc7be59
MB
288(define-public bcftools
289 (package
290 (name "bcftools")
cbd9f91c 291 (version "1.8")
bdc7be59
MB
292 (source (origin
293 (method url-fetch)
294 (uri (string-append
295 "https://github.com/samtools/bcftools/releases/download/"
296 version "/bcftools-" version ".tar.bz2"))
297 (sha256
298 (base32
cbd9f91c 299 "1vgw2mwngq20c530zim52zvgmw1lci8rzl33pvh44xqk3xlzvjsa"))
bdc7be59 300 (modules '((guix build utils)))
6cbee49d
MW
301 (snippet '(begin
302 ;; Delete bundled htslib.
3d5ad159 303 (delete-file-recursively "htslib-1.8")
6cbee49d 304 #t))))
bdc7be59
MB
305 (build-system gnu-build-system)
306 (arguments
307 `(#:test-target "test"
0620387a 308 #:configure-flags (list "--with-htslib=system")
bdc7be59
MB
309 #:make-flags
310 (list
311 "USE_GPL=1"
0620387a 312 "LIBS=-lgsl -lgslcblas"
bdc7be59
MB
313 (string-append "prefix=" (assoc-ref %outputs "out"))
314 (string-append "HTSDIR=" (assoc-ref %build-inputs "htslib") "/include")
98593f9f 315 (string-append "HTSLIB=" (assoc-ref %build-inputs "htslib") "/lib/libhts.so")
bdc7be59 316 (string-append "BGZIP=" (assoc-ref %build-inputs "htslib") "/bin/bgzip")
98593f9f
BW
317 (string-append "TABIX=" (assoc-ref %build-inputs "htslib") "/bin/tabix")
318 (string-append "PACKAGE_VERSION=" ,version))
bdc7be59
MB
319 #:phases
320 (modify-phases %standard-phases
bdc7be59
MB
321 (add-before 'check 'patch-tests
322 (lambda _
323 (substitute* "test/test.pl"
324 (("/bin/bash") (which "bash")))
325 #t)))))
326 (native-inputs
327 `(("htslib" ,htslib)
328 ("perl" ,perl)))
329 (inputs
330 `(("gsl" ,gsl)
331 ("zlib" ,zlib)))
332 (home-page "https://samtools.github.io/bcftools/")
333 (synopsis "Utilities for variant calling and manipulating VCFs and BCFs")
334 (description
335 "BCFtools is a set of utilities that manipulate variant calls in the
336Variant Call Format (VCF) and its binary counterpart BCF. All commands work
337transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed.")
338 ;; The sources are dual MIT/GPL, but becomes GPL-only when USE_GPL=1.
339 (license (list license:gpl3+ license:expat))))
340
8dd4ff11
RW
341(define-public bedops
342 (package
343 (name "bedops")
b43a9eb3 344 (version "2.4.33")
8dd4ff11
RW
345 (source (origin
346 (method url-fetch)
347 (uri (string-append "https://github.com/bedops/bedops/archive/v"
348 version ".tar.gz"))
f586c877 349 (file-name (string-append name "-" version ".tar.gz"))
8dd4ff11
RW
350 (sha256
351 (base32
b43a9eb3 352 "0kx4awrwby8f33wqyx8w7ms7v25xhf0d421csgf96a3hfzn2mb0m"))))
8dd4ff11
RW
353 (build-system gnu-build-system)
354 (arguments
355 '(#:tests? #f
356 #:make-flags (list (string-append "BINDIR=" %output "/bin"))
357 #:phases
dc1d3cde
KK
358 (modify-phases %standard-phases
359 (add-after 'unpack 'unpack-tarballs
360 (lambda _
361 ;; FIXME: Bedops includes tarballs of minimally patched upstream
362 ;; libraries jansson, zlib, and bzip2. We cannot just use stock
363 ;; libraries because at least one of the libraries (zlib) is
364 ;; patched to add a C++ function definition (deflateInit2cpp).
365 ;; Until the Bedops developers offer a way to link against system
366 ;; libraries we have to build the in-tree copies of these three
367 ;; libraries.
368
369 ;; See upstream discussion:
370 ;; https://github.com/bedops/bedops/issues/124
371
372 ;; Unpack the tarballs to benefit from shebang patching.
373 (with-directory-excursion "third-party"
374 (and (zero? (system* "tar" "xvf" "jansson-2.6.tar.bz2"))
375 (zero? (system* "tar" "xvf" "zlib-1.2.7.tar.bz2"))
376 (zero? (system* "tar" "xvf" "bzip2-1.0.6.tar.bz2"))))
377 ;; Disable unpacking of tarballs in Makefile.
378 (substitute* "system.mk/Makefile.linux"
379 (("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
380 (("\\./configure") "CONFIG_SHELL=bash ./configure"))
381 (substitute* "third-party/zlib-1.2.7/Makefile.in"
382 (("^SHELL=.*$") "SHELL=bash\n"))
383 #t))
384 (delete 'configure))))
8dd4ff11
RW
385 (home-page "https://github.com/bedops/bedops")
386 (synopsis "Tools for high-performance genomic feature operations")
387 (description
388 "BEDOPS is a suite of tools to address common questions raised in genomic
389studies---mostly with regard to overlap and proximity relationships between
390data sets. It aims to be scalable and flexible, facilitating the efficient
391and accurate analysis and management of large-scale genomic data.
392
393BEDOPS provides tools that perform highly efficient and scalable Boolean and
394other set operations, statistical calculations, archiving, conversion and
395other management of genomic data of arbitrary scale. Tasks can be easily
396split by chromosome for distributing whole-genome analyses across a
397computational cluster.")
398 (license license:gpl2+)))
399
81de5647
RW
400(define-public bedtools
401 (package
402 (name "bedtools")
6098a386 403 (version "2.27.1")
81de5647
RW
404 (source (origin
405 (method url-fetch)
0d9824cc
RW
406 (uri (string-append "https://github.com/arq5x/bedtools2/releases/"
407 "download/v" version "/"
408 "bedtools-" version ".tar.gz"))
81de5647
RW
409 (sha256
410 (base32
6098a386 411 "1ndg5yknrxl4djx8ddzgk12rrbiidfpmkkg5z3f95jzryfxarhn8"))))
81de5647 412 (build-system gnu-build-system)
81de5647
RW
413 (arguments
414 '(#:test-target "test"
0d9824cc
RW
415 #:make-flags
416 (list (string-append "prefix=" (assoc-ref %outputs "out")))
81de5647 417 #:phases
6573ac82 418 (modify-phases %standard-phases
0d9824cc
RW
419 (delete 'configure))))
420 (native-inputs `(("python" ,python-2)))
421 (inputs
422 `(("samtools" ,samtools)
423 ("zlib" ,zlib)))
81de5647
RW
424 (home-page "https://github.com/arq5x/bedtools2")
425 (synopsis "Tools for genome analysis and arithmetic")
426 (description
427 "Collectively, the bedtools utilities are a swiss-army knife of tools for
428a wide-range of genomics analysis tasks. The most widely-used tools enable
429genome arithmetic: that is, set theory on the genome. For example, bedtools
430allows one to intersect, merge, count, complement, and shuffle genomic
431intervals from multiple files in widely-used genomic file formats such as BAM,
432BED, GFF/GTF, VCF.")
433 (license license:gpl2)))
434
9a8f309c
RW
435;; Later releases of bedtools produce files with more columns than
436;; what Ribotaper expects.
437(define-public bedtools-2.18
438 (package (inherit bedtools)
439 (name "bedtools")
440 (version "2.18.0")
441 (source (origin
442 (method url-fetch)
443 (uri (string-append "https://github.com/arq5x/bedtools2/"
444 "archive/v" version ".tar.gz"))
445 (file-name (string-append name "-" version ".tar.gz"))
446 (sha256
447 (base32
83b9d121
RW
448 "05vrnr8yp7swfagshzpgqmzk1blnwnq8pq5pckzi1m26w98d63vf"))))
449 (arguments
450 '(#:test-target "test"
451 #:phases
452 (modify-phases %standard-phases
453 (delete 'configure)
454 (replace 'install
455 (lambda* (#:key outputs #:allow-other-keys)
456 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
457 (for-each (lambda (file)
458 (install-file file bin))
459 (find-files "bin" ".*")))
460 #t)))))))
9a8f309c 461
17dc32a4
RW
462(define-public ribotaper
463 (package
464 (name "ribotaper")
465 (version "1.3.1")
466 (source (origin
467 (method url-fetch)
468 (uri (string-append "https://ohlerlab.mdc-berlin.de/"
469 "files/RiboTaper/RiboTaper_Version_"
470 version ".tar.gz"))
471 (sha256
472 (base32
473 "0ykjbps1y3z3085q94npw8i9x5gldc6shy8vlc08v76zljsm07hv"))))
474 (build-system gnu-build-system)
7edee344
RW
475 (arguments
476 `(#:phases
477 (modify-phases %standard-phases
478 (add-after 'install 'wrap-executables
479 (lambda* (#:key inputs outputs #:allow-other-keys)
480 (let* ((out (assoc-ref outputs "out")))
481 (for-each
482 (lambda (script)
483 (wrap-program (string-append out "/bin/" script)
484 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
485 '("create_annotations_files.bash"
486 "create_metaplots.bash"
487 "Ribotaper_ORF_find.sh"
488 "Ribotaper.sh"))))))))
17dc32a4
RW
489 (inputs
490 `(("bedtools" ,bedtools-2.18)
491 ("samtools" ,samtools-0.1)
2d7c4ae3 492 ("r-minimal" ,r-minimal)
17dc32a4
RW
493 ("r-foreach" ,r-foreach)
494 ("r-xnomial" ,r-xnomial)
495 ("r-domc" ,r-domc)
496 ("r-multitaper" ,r-multitaper)
497 ("r-seqinr" ,r-seqinr)))
498 (home-page "https://ohlerlab.mdc-berlin.de/software/RiboTaper_126/")
499 (synopsis "Define translated ORFs using ribosome profiling data")
500 (description
501 "Ribotaper is a method for defining translated @dfn{open reading
502frames} (ORFs) using ribosome profiling (ribo-seq) data. This package
503provides the Ribotaper pipeline.")
504 (license license:gpl3+)))
505
769fc6bb
RW
506(define-public ribodiff
507 (package
508 (name "ribodiff")
509 (version "0.2.2")
510 (source
511 (origin
512 (method url-fetch)
513 (uri (string-append "https://github.com/ratschlab/RiboDiff/"
514 "archive/v" version ".tar.gz"))
515 (file-name (string-append name "-" version ".tar.gz"))
516 (sha256
517 (base32
518 "0wpbwmfv05wdjxv7ikm664f7s7p7cqr8jnw99zrda0q67rl50aaj"))))
519 (build-system python-build-system)
520 (arguments
521 `(#:python ,python-2
522 #:phases
523 (modify-phases %standard-phases
524 ;; Generate an installable executable script wrapper.
525 (add-after 'unpack 'patch-setup.py
526 (lambda _
527 (substitute* "setup.py"
528 (("^(.*)packages=.*" line prefix)
529 (string-append line "\n"
530 prefix "scripts=['scripts/TE.py'],\n")))
531 #t)))))
532 (inputs
533 `(("python-numpy" ,python2-numpy)
534 ("python-matplotlib" ,python2-matplotlib)
535 ("python-scipy" ,python2-scipy)
536 ("python-statsmodels" ,python2-statsmodels)))
2bb12f5a
MB
537 (native-inputs
538 `(("python-mock" ,python2-mock)
539 ("python-nose" ,python2-nose)))
769fc6bb
RW
540 (home-page "http://public.bmi.inf.ethz.ch/user/zhongy/RiboDiff/")
541 (synopsis "Detect translation efficiency changes from ribosome footprints")
542 (description "RiboDiff is a statistical tool that detects the protein
543translational efficiency change from Ribo-Seq (ribosome footprinting) and
544RNA-Seq data. It uses a generalized linear model to detect genes showing
545difference in translational profile taking mRNA abundance into account. It
546facilitates us to decipher the translational regulation that behave
547independently with transcriptional regulation.")
548 (license license:gpl3+)))
549
a0a71439
RJ
550(define-public bioawk
551 (package
552 (name "bioawk")
553 (version "1.0")
554 (source (origin
555 (method url-fetch)
556 (uri (string-append "https://github.com/lh3/bioawk/archive/v"
557 version ".tar.gz"))
558 (file-name (string-append name "-" version ".tar.gz"))
559 (sha256
560 (base32 "1daizxsk17ahi9n58fj8vpgwyhzrzh54bzqhanjanp88kgrz7gjw"))))
561 (build-system gnu-build-system)
562 (inputs
563 `(("zlib" ,zlib)))
564 (native-inputs
565 `(("bison" ,bison)))
566 (arguments
567 `(#:tests? #f ; There are no tests to run.
568 ;; Bison must generate files, before other targets can build.
569 #:parallel-build? #f
570 #:phases
571 (modify-phases %standard-phases
572 (delete 'configure) ; There is no configure phase.
573 (replace 'install
574 (lambda* (#:key outputs #:allow-other-keys)
575 (let* ((out (assoc-ref outputs "out"))
576 (bin (string-append out "/bin"))
577 (man (string-append out "/share/man/man1")))
578 (mkdir-p man)
579 (copy-file "awk.1" (string-append man "/bioawk.1"))
580 (install-file "bioawk" bin)))))))
581 (home-page "https://github.com/lh3/bioawk")
582 (synopsis "AWK with bioinformatics extensions")
583 (description "Bioawk is an extension to Brian Kernighan's awk, adding the
584support of several common biological data formats, including optionally gzip'ed
585BED, GFF, SAM, VCF, FASTA/Q and TAB-delimited formats with column names. It
586also adds a few built-in functions and a command line option to use TAB as the
587input/output delimiter. When the new functionality is not used, bioawk is
588intended to behave exactly the same as the original BWK awk.")
589 (license license:x11)))
590
a2fb1492
RW
591(define-public python2-pybedtools
592 (package
593 (name "python2-pybedtools")
594 (version "0.6.9")
595 (source (origin
596 (method url-fetch)
597 (uri (string-append
598 "https://pypi.python.org/packages/source/p/pybedtools/pybedtools-"
599 version ".tar.gz"))
600 (sha256
601 (base32
602 "1ldzdxw1p4y3g2ignmggsdypvqkcwqwzhdha4rbgpih048z5p4an"))))
603 (build-system python-build-system)
604 (arguments `(#:python ,python-2)) ; no Python 3 support
605 (inputs
f2516de2 606 `(("python-matplotlib" ,python2-matplotlib)))
a2fb1492
RW
607 (propagated-inputs
608 `(("bedtools" ,bedtools)
609 ("samtools" ,samtools)))
610 (native-inputs
f2516de2
HG
611 `(("python-cython" ,python2-cython)
612 ("python-pyyaml" ,python2-pyyaml)
f3b98f4f 613 ("python-nose" ,python2-nose)))
a2fb1492
RW
614 (home-page "https://pythonhosted.org/pybedtools/")
615 (synopsis "Python wrapper for BEDtools programs")
616 (description
617 "pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
618which are widely used for genomic interval manipulation or \"genome algebra\".
619pybedtools extends BEDTools by offering feature-level manipulations from with
620Python.")
621 (license license:gpl2+)))
622
9e12eba8
BW
623(define-public python-biom-format
624 (package
625 (name "python-biom-format")
abc08cba 626 (version "2.1.6")
9e12eba8
BW
627 (source
628 (origin
629 (method url-fetch)
630 ;; Use GitHub as source because PyPI distribution does not contain
631 ;; test data: https://github.com/biocore/biom-format/issues/693
632 (uri (string-append "https://github.com/biocore/biom-format/archive/"
633 version ".tar.gz"))
634 (file-name (string-append name "-" version ".tar.gz"))
635 (sha256
636 (base32
abc08cba 637 "08cr7wpahk6zb31h4bs7jmzpvxcqv9s13xz40h6y2h656jvdvnpj"))))
9e12eba8 638 (build-system python-build-system)
de96ea28 639 (propagated-inputs
9e12eba8
BW
640 `(("python-numpy" ,python-numpy)
641 ("python-scipy" ,python-scipy)
642 ("python-future" ,python-future)
643 ("python-click" ,python-click)
abc08cba
BW
644 ("python-h5py" ,python-h5py)
645 ("python-pandas" ,python-pandas)))
da5ebd10
MB
646 (native-inputs
647 `(("python-nose" ,python-nose)))
9e12eba8
BW
648 (home-page "http://www.biom-format.org")
649 (synopsis "Biological Observation Matrix (BIOM) format utilities")
650 (description
651 "The BIOM file format is designed to be a general-use format for
652representing counts of observations e.g. operational taxonomic units, KEGG
653orthology groups or lipid types, in one or more biological samples
654e.g. microbiome samples, genomes, metagenomes.")
655 (license license:bsd-3)
656 (properties `((python2-variant . ,(delay python2-biom-format))))))
657
658(define-public python2-biom-format
659 (let ((base (package-with-python2 (strip-python2-variant python-biom-format))))
660 (package
661 (inherit base)
662 (arguments
663 `(#:phases
664 (modify-phases %standard-phases
665 ;; Do not require the unmaintained pyqi library.
666 (add-after 'unpack 'remove-pyqi
667 (lambda _
668 (substitute* "setup.py"
669 (("install_requires.append\\(\"pyqi\"\\)") "pass"))
670 #t)))
00e10c6e 671 ,@(package-arguments base))))))
9e12eba8 672
f7283db3
RW
673(define-public bioperl-minimal
674 (let* ((inputs `(("perl-module-build" ,perl-module-build)
675 ("perl-data-stag" ,perl-data-stag)
676 ("perl-libwww" ,perl-libwww)
677 ("perl-uri" ,perl-uri)))
678 (transitive-inputs
679 (map (compose package-name cadr)
680 (delete-duplicates
681 (concatenate
682 (map (compose package-transitive-target-inputs cadr) inputs))))))
683 (package
684 (name "bioperl-minimal")
c70271ec 685 (version "1.7.0")
f7283db3
RW
686 (source
687 (origin
688 (method url-fetch)
c70271ec
RW
689 (uri (string-append "https://github.com/bioperl/bioperl-live/"
690 "archive/release-"
691 (string-map (lambda (c)
692 (if (char=? c #\.)
693 #\- c)) version)
694 ".tar.gz"))
f7283db3
RW
695 (sha256
696 (base32
c70271ec 697 "12phgpxwgkqflkwfb9dcqg7a31dpjlfhar8wcgv0aj5ln4akfz06"))))
f7283db3
RW
698 (build-system perl-build-system)
699 (arguments
700 `(#:phases
701 (modify-phases %standard-phases
702 (add-after
703 'install 'wrap-programs
704 (lambda* (#:key outputs #:allow-other-keys)
705 ;; Make sure all executables in "bin" find the required Perl
706 ;; modules at runtime. As the PERL5LIB variable contains also
707 ;; the paths of native inputs, we pick the transitive target
708 ;; inputs from %build-inputs.
709 (let* ((out (assoc-ref outputs "out"))
710 (bin (string-append out "/bin/"))
711 (path (string-join
712 (cons (string-append out "/lib/perl5/site_perl")
713 (map (lambda (name)
714 (assoc-ref %build-inputs name))
715 ',transitive-inputs))
716 ":")))
717 (for-each (lambda (file)
718 (wrap-program file
719 `("PERL5LIB" ":" prefix (,path))))
720 (find-files bin "\\.pl$"))
721 #t))))))
722 (inputs inputs)
723 (native-inputs
724 `(("perl-test-most" ,perl-test-most)))
9aba9b12 725 (home-page "https://metacpan.org/release/BioPerl")
f7283db3
RW
726 (synopsis "Bioinformatics toolkit")
727 (description
728 "BioPerl is the product of a community effort to produce Perl code which
729is useful in biology. Examples include Sequence objects, Alignment objects
730and database searching objects. These objects not only do what they are
731advertised to do in the documentation, but they also interact - Alignment
732objects are made from the Sequence objects, Sequence objects have access to
733Annotation and SeqFeature objects and databases, Blast objects can be
734converted to Alignment objects, and so on. This means that the objects
735provide a coordinated and extensible framework to do computational biology.")
2f3108ad 736 (license license:perl-license))))
f7283db3 737
85c37e29
RW
738(define-public python-biopython
739 (package
740 (name "python-biopython")
af6ce610 741 (version "1.70")
85c37e29
RW
742 (source (origin
743 (method url-fetch)
e815c094
BW
744 ;; use PyPi rather than biopython.org to ease updating
745 (uri (pypi-uri "biopython" version))
85c37e29
RW
746 (sha256
747 (base32
af6ce610 748 "0nz4n9d2y2dg849gn1z0vjlkwcpzzkzy3fij7x94a6ixy2c54z2a"))))
85c37e29 749 (build-system python-build-system)
4ce60305
BW
750 (arguments
751 `(#:phases
752 (modify-phases %standard-phases
753 (add-before 'check 'set-home
754 ;; Some tests require a home directory to be set.
755 (lambda _ (setenv "HOME" "/tmp") #t)))))
f22efa01 756 (propagated-inputs
85c37e29 757 `(("python-numpy" ,python-numpy)))
85c37e29
RW
758 (home-page "http://biopython.org/")
759 (synopsis "Tools for biological computation in Python")
760 (description
761 "Biopython is a set of tools for biological computation including parsers
762for bioinformatics files into Python data structures; interfaces to common
763bioinformatics programs; a standard sequence class and tools for performing
764common operations on them; code to perform data classification; code for
765dealing with alignments; code making it easy to split up parallelizable tasks
766into separate processes; and more.")
5c31f4aa 767 (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
85c37e29
RW
768
769(define-public python2-biopython
5c31f4aa 770 (package-with-python2 python-biopython))
85c37e29 771
5d5a02dd
BW
772(define-public python-fastalite
773 (package
774 (name "python-fastalite")
775 (version "0.3")
776 (source
777 (origin
778 (method url-fetch)
779 (uri (pypi-uri "fastalite" version))
780 (sha256
781 (base32
782 "1qli6pxp77i9xn2wfciq2zaxhl82bdxb33cpzqzj1z25yd036wqj"))))
783 (build-system python-build-system)
784 (arguments
785 `(#:tests? #f)) ; Test data is not distributed.
786 (home-page "https://github.com/nhoffman/fastalite")
787 (synopsis "Simplest possible FASTA parser")
788 (description "This library implements a FASTA and a FASTQ parser without
789relying on a complex dependency tree.")
790 (license license:expat)))
791
792(define-public python2-fastalite
793 (package-with-python2 python-fastalite))
794
985d8411
BW
795(define-public bpp-core
796 ;; The last release was in 2014 and the recommended way to install from source
797 ;; is to clone the git repository, so we do this.
798 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
799 (let ((commit "7d8bced0d1a87291ea8dd7046b7fb5ff9c35c582"))
800 (package
801 (name "bpp-core")
802 (version (string-append "2.2.0-1." (string-take commit 7)))
803 (source (origin
804 (method git-fetch)
805 (uri (git-reference
806 (url "http://biopp.univ-montp2.fr/git/bpp-core")
807 (commit commit)))
808 (file-name (string-append name "-" version "-checkout"))
809 (sha256
810 (base32
811 "10djsq5vlnkilv436gnmh4irpk49v29pa69r6xiryg32xmvn909j"))))
812 (build-system cmake-build-system)
813 (arguments
814 `(#:parallel-build? #f))
815 (inputs
816 `(("gcc" ,gcc-5))) ; Compilation of bpp-phyl fails with GCC 4.9 so we
817 ; compile all of the bpp packages with GCC 5.
818 (home-page "http://biopp.univ-montp2.fr")
819 (synopsis "C++ libraries for Bioinformatics")
820 (description
821 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
822analysis, phylogenetics, molecular evolution and population genetics. It is
823Object Oriented and is designed to be both easy to use and computer efficient.
824Bio++ intends to help programmers to write computer expensive programs, by
825providing them a set of re-usable tools.")
826 (license license:cecill-c))))
827
8b5f4d57
BW
828(define-public bpp-phyl
829 ;; The last release was in 2014 and the recommended way to install from source
830 ;; is to clone the git repository, so we do this.
831 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
832 (let ((commit "0c07167b629f68b569bf274d1ad0c4af83276ae2"))
833 (package
834 (name "bpp-phyl")
835 (version (string-append "2.2.0-1." (string-take commit 7)))
836 (source (origin
837 (method git-fetch)
838 (uri (git-reference
839 (url "http://biopp.univ-montp2.fr/git/bpp-phyl")
840 (commit commit)))
841 (file-name (string-append name "-" version "-checkout"))
842 (sha256
843 (base32
844 "1ssjgchzwj3iai26kyly7gwkdv8sk59nqhkb1wpap3sf5m6kyllh"))))
845 (build-system cmake-build-system)
846 (arguments
847 `(#:parallel-build? #f
848 ;; If out-of-source, test data is not copied into the build directory
849 ;; so the tests fail.
850 #:out-of-source? #f))
851 (inputs
852 `(("bpp-core" ,bpp-core)
853 ("bpp-seq" ,bpp-seq)
854 ;; GCC 4.8 fails due to an 'internal compiler error', so we use a more
855 ;; modern GCC.
856 ("gcc" ,gcc-5)))
857 (home-page "http://biopp.univ-montp2.fr")
858 (synopsis "Bio++ phylogenetic Library")
859 (description
860 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
861analysis, phylogenetics, molecular evolution and population genetics. This
862library provides phylogenetics-related modules.")
863 (license license:cecill-c))))
864
159a7016
BW
865(define-public bpp-popgen
866 ;; The last release was in 2014 and the recommended way to install from source
867 ;; is to clone the git repository, so we do this.
868 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
869 (let ((commit "e472bac9b1a148803895d747cd6d0c5904f85d9f"))
870 (package
871 (name "bpp-popgen")
872 (version (string-append "2.2.0-1." (string-take commit 7)))
873 (source (origin
874 (method git-fetch)
875 (uri (git-reference
876 (url "http://biopp.univ-montp2.fr/git/bpp-popgen")
877 (commit commit)))
878 (file-name (string-append name "-" version "-checkout"))
879 (sha256
880 (base32
881 "0yn82dzn1n5629nzja68xfrhi655709rjanyryb36vzkmymy6dw5"))))
882 (build-system cmake-build-system)
883 (arguments
884 `(#:parallel-build? #f
885 #:tests? #f)) ; There are no tests.
886 (inputs
887 `(("bpp-core" ,bpp-core)
888 ("bpp-seq" ,bpp-seq)
889 ("gcc" ,gcc-5)))
890 (home-page "http://biopp.univ-montp2.fr")
891 (synopsis "Bio++ population genetics library")
892 (description
893 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
894analysis, phylogenetics, molecular evolution and population genetics. This
895library provides population genetics-related modules.")
896 (license license:cecill-c))))
897
70f1bc05
BW
898(define-public bpp-seq
899 ;; The last release was in 2014 and the recommended way to install from source
900 ;; is to clone the git repository, so we do this.
901 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
902 (let ((commit "6cfa07965ce152e5598a89df2fa80a75973bfa33"))
903 (package
904 (name "bpp-seq")
905 (version (string-append "2.2.0-1." (string-take commit 7)))
906 (source (origin
907 (method git-fetch)
908 (uri (git-reference
909 (url "http://biopp.univ-montp2.fr/git/bpp-seq")
910 (commit commit)))
911 (file-name (string-append name "-" version "-checkout"))
912 (sha256
913 (base32
914 "1nys5jq7jqvdg40d91wsmj3q2yzy4276cp7sp44n67p468f27zf2"))))
915 (build-system cmake-build-system)
916 (arguments
917 `(#:parallel-build? #f
918 ;; If out-of-source, test data is not copied into the build directory
919 ;; so the tests fail.
920 #:out-of-source? #f))
921 (inputs
922 `(("bpp-core" ,bpp-core)
923 ("gcc" ,gcc-5))) ; Use GCC 5 as per 'bpp-core'.
924 (home-page "http://biopp.univ-montp2.fr")
925 (synopsis "Bio++ sequence library")
926 (description
927 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
928analysis, phylogenetics, molecular evolution and population genetics. This
929library provides sequence-related modules.")
930 (license license:cecill-c))))
931
db7a3444
BW
932(define-public bppsuite
933 ;; The last release was in 2014 and the recommended way to install from source
934 ;; is to clone the git repository, so we do this.
935 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
936 (let ((commit "c516147f57aa50961121cd505bed52cd7603698b"))
937 (package
938 (name "bppsuite")
939 (version (string-append "2.2.0-1." (string-take commit 7)))
940 (source (origin
941 (method git-fetch)
942 (uri (git-reference
943 (url "http://biopp.univ-montp2.fr/git/bppsuite")
944 (commit commit)))
945 (file-name (string-append name "-" version "-checkout"))
946 (sha256
947 (base32
948 "1y87pxvw0jxjizhq2dr9g2r91md45k1p9ih2sl1yy1y3p934l2kb"))))
949 (build-system cmake-build-system)
950 (arguments
951 `(#:parallel-build? #f
952 #:tests? #f)) ; There are no tests.
953 (native-inputs
954 `(("groff" ,groff)
955 ("man-db" ,man-db)
956 ("texinfo" ,texinfo)))
957 (inputs
958 `(("bpp-core" ,bpp-core)
959 ("bpp-seq" ,bpp-seq)
960 ("bpp-phyl" ,bpp-phyl)
961 ("bpp-phyl" ,bpp-popgen)
962 ("gcc" ,gcc-5)))
963 (home-page "http://biopp.univ-montp2.fr")
964 (synopsis "Bioinformatics tools written with the Bio++ libraries")
965 (description
966 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
967analysis, phylogenetics, molecular evolution and population genetics. This
968package provides command line tools using the Bio++ library.")
969 (license license:cecill-c))))
970
82c370de
RW
971(define-public blast+
972 (package
973 (name "blast+")
4732e6ee 974 (version "2.6.0")
82c370de
RW
975 (source (origin
976 (method url-fetch)
977 (uri (string-append
978 "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
979 version "/ncbi-blast-" version "+-src.tar.gz"))
980 (sha256
981 (base32
4732e6ee
BW
982 "15n937pw5aqmyfjb6l387d18grqbb96l63d5xj4l7yyh0zbf2405"))
983 (patches (search-patches "blast+-fix-makefile.patch"))
82c370de
RW
984 (modules '((guix build utils)))
985 (snippet
986 '(begin
4732e6ee 987 ;; Remove bundled bzip2, zlib and pcre.
82c370de
RW
988 (delete-file-recursively "c++/src/util/compress/bzip2")
989 (delete-file-recursively "c++/src/util/compress/zlib")
4732e6ee 990 (delete-file-recursively "c++/src/util/regexp")
82c370de
RW
991 (substitute* "c++/src/util/compress/Makefile.in"
992 (("bzip2 zlib api") "api"))
993 ;; Remove useless msbuild directory
994 (delete-file-recursively
995 "c++/src/build-system/project_tree_builder/msbuild")
996 #t))))
997 (build-system gnu-build-system)
998 (arguments
4732e6ee 999 `(;; There are two(!) tests for this massive library, and both fail with
82c370de 1000 ;; "unparsable timing stats".
82c370de
RW
1001 ;; ERR [127] -- [serial/datatool] datatool.sh (unparsable timing stats)
1002 ;; ERR [127] -- [serial/datatool] datatool_xml.sh (unparsable timing stats)
1003 #:tests? #f
1004 #:out-of-source? #t
1005 #:parallel-build? #f ; not supported
1006 #:phases
1007 (modify-phases %standard-phases
1008 (add-before
1009 'configure 'set-HOME
1010 ;; $HOME needs to be set at some point during the configure phase
1011 (lambda _ (setenv "HOME" "/tmp") #t))
1012 (add-after
1013 'unpack 'enter-dir
1014 (lambda _ (chdir "c++") #t))
1015 (add-after
1016 'enter-dir 'fix-build-system
1017 (lambda _
1018 (define (which* cmd)
1019 (cond ((string=? cmd "date")
1020 ;; make call to "date" deterministic
1021 "date -d @0")
1022 ((which cmd)
1023 => identity)
1024 (else
1025 (format (current-error-port)
1026 "WARNING: Unable to find absolute path for ~s~%"
1027 cmd)
1028 #f)))
1029
1030 ;; Rewrite hardcoded paths to various tools
1031 (substitute* (append '("src/build-system/configure.ac"
1032 "src/build-system/configure"
4732e6ee 1033 "src/build-system/helpers/run_with_lock.c"
82c370de
RW
1034 "scripts/common/impl/if_diff.sh"
1035 "scripts/common/impl/run_with_lock.sh"
1036 "src/build-system/Makefile.configurables.real"
1037 "src/build-system/Makefile.in.top"
1038 "src/build-system/Makefile.meta.gmake=no"
1039 "src/build-system/Makefile.meta.in"
1040 "src/build-system/Makefile.meta_l"
1041 "src/build-system/Makefile.meta_p"
1042 "src/build-system/Makefile.meta_r"
1043 "src/build-system/Makefile.mk.in"
1044 "src/build-system/Makefile.requirements"
1045 "src/build-system/Makefile.rules_with_autodep.in")
1046 (find-files "scripts/common/check" "\\.sh$"))
1047 (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
1048 (or (which* cmd) all)))
1049
1050 (substitute* (find-files "src/build-system" "^config.*")
1051 (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
1052 (("^PATH=.*") ""))
1053
1054 ;; rewrite "/var/tmp" in check script
1055 (substitute* "scripts/common/check/check_make_unix.sh"
1056 (("/var/tmp") "/tmp"))
1057
1058 ;; do not reset PATH
1059 (substitute* (find-files "scripts/common/impl/" "\\.sh$")
1060 (("^ *PATH=.*") "")
1061 (("action=/bin/") "action=")
1062 (("export PATH") ":"))
1063 #t))
1064 (replace
1065 'configure
1066 (lambda* (#:key inputs outputs #:allow-other-keys)
1067 (let ((out (assoc-ref outputs "out"))
1068 (lib (string-append (assoc-ref outputs "lib") "/lib"))
1069 (include (string-append (assoc-ref outputs "include")
1070 "/include/ncbi-tools++")))
1071 ;; The 'configure' script doesn't recognize things like
1072 ;; '--enable-fast-install'.
1073 (zero? (system* "./configure.orig"
1074 (string-append "--with-build-root=" (getcwd) "/build")
1075 (string-append "--prefix=" out)
1076 (string-append "--libdir=" lib)
1077 (string-append "--includedir=" include)
1078 (string-append "--with-bz2="
1079 (assoc-ref inputs "bzip2"))
1080 (string-append "--with-z="
1081 (assoc-ref inputs "zlib"))
4732e6ee
BW
1082 (string-append "--with-pcre="
1083 (assoc-ref inputs "pcre"))
82c370de
RW
1084 ;; Each library is built twice by default, once
1085 ;; with "-static" in its name, and again
1086 ;; without.
1087 "--without-static"
1088 "--with-dll"))))))))
4732e6ee
BW
1089 (outputs '("out" ; 21 MB
1090 "lib" ; 226 MB
1091 "include")) ; 33 MB
82c370de
RW
1092 (inputs
1093 `(("bzip2" ,bzip2)
4732e6ee
BW
1094 ("zlib" ,zlib)
1095 ("pcre" ,pcre)
1096 ("perl" ,perl)
1097 ("python" ,python-wrapper)))
82c370de
RW
1098 (native-inputs
1099 `(("cpio" ,cpio)))
1100 (home-page "http://blast.ncbi.nlm.nih.gov")
1101 (synopsis "Basic local alignment search tool")
1102 (description
1103 "BLAST is a popular method of performing a DNA or protein sequence
1104similarity search, using heuristics to produce results quickly. It also
1105calculates an “expect value” that estimates how many matches would have
1106occurred at a given score by chance, which can aid a user in judging how much
1107confidence to have in an alignment.")
1108 ;; Most of the sources are in the public domain, with the following
1109 ;; exceptions:
1110 ;; * Expat:
1111 ;; * ./c++/include/util/bitset/
1112 ;; * ./c++/src/html/ncbi_menu*.js
1113 ;; * Boost license:
1114 ;; * ./c++/include/util/impl/floating_point_comparison.hpp
1115 ;; * LGPL 2+:
1116 ;; * ./c++/include/dbapi/driver/odbc/unix_odbc/
1117 ;; * ASL 2.0:
1118 ;; * ./c++/src/corelib/teamcity_*
1119 (license (list license:public-domain
1120 license:expat
1121 license:boost1.0
1122 license:lgpl2.0+
1123 license:asl2.0))))
1124
6c2b26e2
RW
1125(define-public bless
1126 (package
1127 (name "bless")
1128 (version "1p02")
1129 (source (origin
1130 (method url-fetch)
1131 (uri (string-append "mirror://sourceforge/bless-ec/bless.v"
1132 version ".tgz"))
1133 (sha256
1134 (base32
4d75e03a
RW
1135 "0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
1136 (modules '((guix build utils)))
6c2b26e2
RW
1137 (snippet
1138 `(begin
1139 ;; Remove bundled boost, pigz, zlib, and .git directory
953c1223
RW
1140 ;; FIXME: also remove bundled sources for murmurhash3 and
1141 ;; kmc once packaged.
6c2b26e2
RW
1142 (delete-file-recursively "boost")
1143 (delete-file-recursively "pigz")
953c1223 1144 (delete-file-recursively "google-sparsehash")
6c2b26e2
RW
1145 (delete-file-recursively "zlib")
1146 (delete-file-recursively ".git")
1147 #t))))
1148 (build-system gnu-build-system)
1149 (arguments
1150 '(#:tests? #f ;no "check" target
1151 #:make-flags
1152 (list (string-append "ZLIB="
1153 (assoc-ref %build-inputs "zlib")
1154 "/lib/libz.a")
1155 (string-append "LDFLAGS="
1156 (string-join '("-lboost_filesystem"
1157 "-lboost_system"
1158 "-lboost_iostreams"
1159 "-lz"
1160 "-fopenmp"
1161 "-std=c++11"))))
1162 #:phases
1163 (modify-phases %standard-phases
1164 (add-after 'unpack 'do-not-build-bundled-pigz
1165 (lambda* (#:key inputs outputs #:allow-other-keys)
1166 (substitute* "Makefile"
1167 (("cd pigz/pigz-2.3.3; make") ""))
1168 #t))
1169 (add-after 'unpack 'patch-paths-to-executables
1170 (lambda* (#:key inputs outputs #:allow-other-keys)
1171 (substitute* "parse_args.cpp"
1172 (("kmc_binary = .*")
1173 (string-append "kmc_binary = \""
1174 (assoc-ref outputs "out")
1175 "/bin/kmc\";"))
1176 (("pigz_binary = .*")
1177 (string-append "pigz_binary = \""
1178 (assoc-ref inputs "pigz")
1179 "/bin/pigz\";")))
1180 #t))
1181 (replace 'install
1182 (lambda* (#:key outputs #:allow-other-keys)
1183 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
1184 (for-each (lambda (file)
1185 (install-file file bin))
1186 '("bless" "kmc/bin/kmc"))
1187 #t)))
1188 (delete 'configure))))
1189 (native-inputs
1190 `(("perl" ,perl)))
1191 (inputs
1192 `(("openmpi" ,openmpi)
1193 ("boost" ,boost)
953c1223 1194 ("sparsehash" ,sparsehash)
6c2b26e2
RW
1195 ("pigz" ,pigz)
1196 ("zlib" ,zlib)))
9641a899 1197 (supported-systems '("x86_64-linux"))
3b3b60d0 1198 (home-page "https://sourceforge.net/p/bless-ec/wiki/Home/")
6c2b26e2
RW
1199 (synopsis "Bloom-filter-based error correction tool for NGS reads")
1200 (description
1201 "@dfn{Bloom-filter-based error correction solution for high-throughput
1202sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
1203correction tool for genomic reads produced by @dfn{Next-generation
1204sequencing} (NGS). BLESS produces accurate correction results with much less
1205memory compared with previous solutions and is also able to tolerate a higher
1206false-positive rate. BLESS can extend reads like DNA assemblers to correct
1207errors at the end of reads.")
1208 (license license:gpl3+)))
1209
2c7ee167
RW
1210(define-public bowtie
1211 (package
1212 (name "bowtie")
d6e63cf3 1213 (version "2.3.2")
2c7ee167
RW
1214 (source (origin
1215 (method url-fetch)
1216 (uri (string-append "https://github.com/BenLangmead/bowtie2/archive/v"
1217 version ".tar.gz"))
f586c877 1218 (file-name (string-append name "-" version ".tar.gz"))
2c7ee167
RW
1219 (sha256
1220 (base32
d6e63cf3 1221 "0hwa5r9qbglppb7sz5z79rlmmddr3n51n468jb3wh8rwjgn3yr90"))
2c7ee167
RW
1222 (modules '((guix build utils)))
1223 (snippet
6cbee49d
MW
1224 '(begin
1225 (substitute* "Makefile"
1226 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
1227 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
1228 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
1229 #t))))
2c7ee167 1230 (build-system gnu-build-system)
d6e63cf3
BW
1231 (inputs
1232 `(("perl" ,perl)
1233 ("perl-clone" ,perl-clone)
1234 ("perl-test-deep" ,perl-test-deep)
1235 ("perl-test-simple" ,perl-test-simple)
1236 ("python" ,python-2)
1237 ("tbb" ,tbb)
1238 ("zlib" ,zlib)))
2c7ee167 1239 (arguments
0047d26a
RW
1240 '(#:make-flags
1241 (list "allall"
1242 "WITH_TBB=1"
1243 (string-append "prefix=" (assoc-ref %outputs "out")))
2c7ee167 1244 #:phases
06e37236
BW
1245 (modify-phases %standard-phases
1246 (delete 'configure)
1247 (replace 'check
1248 (lambda* (#:key outputs #:allow-other-keys)
1249 (zero? (system* "perl"
1250 "scripts/test/simple_tests.pl"
1251 "--bowtie2=./bowtie2"
1252 "--bowtie2-build=./bowtie2-build")))))))
2c7ee167
RW
1253 (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
1254 (synopsis "Fast and sensitive nucleotide sequence read aligner")
1255 (description
1256 "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
1257reads to long reference sequences. It is particularly good at aligning reads
1258of about 50 up to 100s or 1,000s of characters, and particularly good at
1259aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
1260genome with an FM Index to keep its memory footprint small: for the human
1261genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
1262gapped, local, and paired-end alignment modes.")
241e1221 1263 (supported-systems '("x86_64-linux"))
2c7ee167
RW
1264 (license license:gpl3+)))
1265
94ce537e
RW
1266(define-public tophat
1267 (package
1268 (name "tophat")
1269 (version "2.1.0")
1270 (source (origin
1271 (method url-fetch)
1272 (uri (string-append
1273 "http://ccb.jhu.edu/software/tophat/downloads/tophat-"
1274 version ".tar.gz"))
1275 (sha256
1276 (base32
1277 "168zlzykq622zbgkh90a90f1bdgsxkscq2zxzbj8brq80hbjpyp7"))
fc1adab1 1278 (patches (search-patches "tophat-build-with-later-seqan.patch"))
94ce537e
RW
1279 (modules '((guix build utils)))
1280 (snippet
1281 '(begin
1282 ;; Remove bundled SeqAn and samtools
1283 (delete-file-recursively "src/SeqAn-1.3")
1284 (delete-file-recursively "src/samtools-0.1.18")
1285 #t))))
1286 (build-system gnu-build-system)
1287 (arguments
1288 '(#:parallel-build? #f ; not supported
1289 #:phases
1290 (modify-phases %standard-phases
1291 (add-after 'unpack 'use-system-samtools
1292 (lambda* (#:key inputs #:allow-other-keys)
1293 (substitute* "src/Makefile.in"
1294 (("(noinst_LIBRARIES = )\\$\\(SAMLIB\\)" _ prefix) prefix)
1295 (("\\$\\(SAMPROG\\): \\$\\(SAMLIB\\)") "")
1296 (("SAMPROG = samtools_0\\.1\\.18") "")
1297 (("\\$\\(samtools_0_1_18_SOURCES\\)") "")
1298 (("am__EXEEXT_1 = samtools_0\\.1\\.18\\$\\(EXEEXT\\)") ""))
1299 (substitute* '("src/common.cpp"
1300 "src/tophat.py")
1301 (("samtools_0.1.18") (which "samtools")))
1302 (substitute* '("src/common.h"
1303 "src/bam2fastx.cpp")
1304 (("#include \"bam.h\"") "#include <samtools/bam.h>")
1305 (("#include \"sam.h\"") "#include <samtools/sam.h>"))
1306 (substitute* '("src/bwt_map.h"
1307 "src/map2gtf.h"
1308 "src/align_status.h")
1309 (("#include <bam.h>") "#include <samtools/bam.h>")
1310 (("#include <sam.h>") "#include <samtools/sam.h>"))
1311 #t)))))
1312 (inputs
1313 `(("boost" ,boost)
1314 ("bowtie" ,bowtie)
1315 ("samtools" ,samtools-0.1)
1316 ("ncurses" ,ncurses)
1317 ("python" ,python-2)
1318 ("perl" ,perl)
1319 ("zlib" ,zlib)
1320 ("seqan" ,seqan)))
1321 (home-page "http://ccb.jhu.edu/software/tophat/index.shtml")
1322 (synopsis "Spliced read mapper for RNA-Seq data")
1323 (description
1324 "TopHat is a fast splice junction mapper for nucleotide sequence
1325reads produced by the RNA-Seq method. It aligns RNA-Seq reads to
1326mammalian-sized genomes using the ultra high-throughput short read
1327aligner Bowtie, and then analyzes the mapping results to identify
1328splice junctions between exons.")
1329 ;; TopHat is released under the Boost Software License, Version 1.0
1330 ;; See https://github.com/infphilo/tophat/issues/11#issuecomment-121589893
1331 (license license:boost1.0)))
1332
9a8336d8
RW
1333(define-public bwa
1334 (package
1335 (name "bwa")
6f141eff 1336 (version "0.7.17")
9a8336d8
RW
1337 (source (origin
1338 (method url-fetch)
ae6e00f6
BW
1339 (uri (string-append
1340 "https://github.com/lh3/bwa/releases/download/v"
1341 version "/bwa-" version ".tar.bz2"))
9a8336d8
RW
1342 (sha256
1343 (base32
6f141eff 1344 "1zfhv2zg9v1icdlq4p9ssc8k01mca5d1bd87w71py2swfi74s6yy"))))
9a8336d8
RW
1345 (build-system gnu-build-system)
1346 (arguments
1347 '(#:tests? #f ;no "check" target
1348 #:phases
dc1d3cde
KK
1349 (modify-phases %standard-phases
1350 (replace 'install
1351 (lambda* (#:key outputs #:allow-other-keys)
1352 (let ((bin (string-append
1353 (assoc-ref outputs "out") "/bin"))
1354 (doc (string-append
1355 (assoc-ref outputs "out") "/share/doc/bwa"))
1356 (man (string-append
1357 (assoc-ref outputs "out") "/share/man/man1")))
1358 (install-file "bwa" bin)
1359 (install-file "README.md" doc)
1360 (install-file "bwa.1" man))
1361 #t))
1362 ;; no "configure" script
1363 (delete 'configure))))
9a8336d8 1364 (inputs `(("zlib" ,zlib)))
db94f8c7
RW
1365 ;; Non-portable SSE instructions are used so building fails on platforms
1366 ;; other than x86_64.
1367 (supported-systems '("x86_64-linux"))
9a8336d8
RW
1368 (home-page "http://bio-bwa.sourceforge.net/")
1369 (synopsis "Burrows-Wheeler sequence aligner")
1370 (description
1371 "BWA is a software package for mapping low-divergent sequences against a
1372large reference genome, such as the human genome. It consists of three
1373algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
1374designed for Illumina sequence reads up to 100bp, while the rest two for
1375longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
1376features such as long-read support and split alignment, but BWA-MEM, which is
1377the latest, is generally recommended for high-quality queries as it is faster
1378and more accurate. BWA-MEM also has better performance than BWA-backtrack for
137970-100bp Illumina reads.")
1380 (license license:gpl3+)))
1381
d29150b5
RW
1382(define-public bwa-pssm
1383 (package (inherit bwa)
1384 (name "bwa-pssm")
1385 (version "0.5.11")
1386 (source (origin
1387 (method url-fetch)
1388 (uri (string-append "https://github.com/pkerpedjiev/bwa-pssm/"
1389 "archive/" version ".tar.gz"))
1390 (file-name (string-append name "-" version ".tar.gz"))
1391 (sha256
1392 (base32
1393 "02p7mpbs4mlxmn84g2x4ghak638vbj4lqix2ipx5g84pz9bhdavg"))))
1394 (build-system gnu-build-system)
1395 (inputs
1396 `(("gdsl" ,gdsl)
1397 ("zlib" ,zlib)
1398 ("perl" ,perl)))
1399 (home-page "http://bwa-pssm.binf.ku.dk/")
1400 (synopsis "Burrows-Wheeler transform-based probabilistic short read mapper")
1401 (description
1402 "BWA-PSSM is a probabilistic short genomic sequence read aligner based on
1403the use of @dfn{position specific scoring matrices} (PSSM). Like many of the
1404existing aligners it is fast and sensitive. Unlike most other aligners,
1405however, it is also adaptible in the sense that one can direct the alignment
1406based on known biases within the data set. It is coded as a modification of
1407the original BWA alignment program and shares the genome index structure as
1408well as many of the command line options.")
1409 (license license:gpl3+)))
1410
ad641d53
RW
1411(define-public python2-bx-python
1412 (package
1413 (name "python2-bx-python")
c1dfe8c3 1414 (version "0.7.3")
ad641d53
RW
1415 (source (origin
1416 (method url-fetch)
c1dfe8c3 1417 (uri (pypi-uri "bx-python" version))
ad641d53
RW
1418 (sha256
1419 (base32
c1dfe8c3 1420 "15z2w3bvnc0n4qmb9bd6d8ylc2h2nj883x2w9iixf4x3vki9b22i"))
ad641d53
RW
1421 (modules '((guix build utils)))
1422 (snippet
6cbee49d
MW
1423 '(begin
1424 (substitute* "setup.py"
1425 ;; remove dependency on outdated "distribute" module
1426 (("^from distribute_setup import use_setuptools") "")
1427 (("^use_setuptools\\(\\)") ""))
1428 #t))))
ad641d53
RW
1429 (build-system python-build-system)
1430 (arguments
1431 `(#:tests? #f ;tests fail because test data are not included
1432 #:python ,python-2))
1433 (inputs
1434 `(("python-numpy" ,python2-numpy)
1435 ("zlib" ,zlib)))
1436 (native-inputs
f3b98f4f 1437 `(("python-nose" ,python2-nose)))
ad641d53
RW
1438 (home-page "http://bitbucket.org/james_taylor/bx-python/")
1439 (synopsis "Tools for manipulating biological data")
1440 (description
1441 "bx-python provides tools for manipulating biological data, particularly
1442multiple sequence alignments.")
1443 (license license:expat)))
1444
55a9a8c2
RW
1445(define-public python-pysam
1446 (package
1447 (name "python-pysam")
68565184 1448 (version "0.13.0")
d454640c
RW
1449 (source (origin
1450 (method url-fetch)
f536dce5
MB
1451 ;; Test data is missing on PyPi.
1452 (uri (string-append
1453 "https://github.com/pysam-developers/pysam/archive/v"
1454 version ".tar.gz"))
1455 (file-name (string-append name "-" version ".tar.gz"))
d454640c
RW
1456 (sha256
1457 (base32
68565184 1458 "0dzap2axin9cbbl0d825w294bpn00zagfm1sigamm4v2pm5bj9lp"))
dff26b23 1459 (modules '((guix build utils)))
6cbee49d
MW
1460 (snippet '(begin
1461 ;; Drop bundled htslib. TODO: Also remove samtools
1462 ;; and bcftools.
1463 (delete-file-recursively "htslib")
1464 #t))))
55a9a8c2
RW
1465 (build-system python-build-system)
1466 (arguments
71dbf592
RW
1467 `(#:modules ((ice-9 ftw)
1468 (srfi srfi-26)
1469 (guix build python-build-system)
1470 (guix build utils))
1471 #:phases
397d463a
MB
1472 (modify-phases %standard-phases
1473 (add-before 'build 'set-flags
dff26b23
MB
1474 (lambda* (#:key inputs #:allow-other-keys)
1475 (setenv "HTSLIB_MODE" "external")
1476 (setenv "HTSLIB_LIBRARY_DIR"
1477 (string-append (assoc-ref inputs "htslib") "/lib"))
1478 (setenv "HTSLIB_INCLUDE_DIR"
1479 (string-append (assoc-ref inputs "htslib") "/include"))
397d463a
MB
1480 (setenv "LDFLAGS" "-lncurses")
1481 (setenv "CFLAGS" "-D_CURSES_LIB=1")
f536dce5 1482 #t))
71dbf592 1483 (replace 'check
f536dce5 1484 (lambda* (#:key inputs outputs #:allow-other-keys)
71dbf592 1485 ;; Add first subdirectory of "build" directory to PYTHONPATH.
f536dce5
MB
1486 (setenv "PYTHONPATH"
1487 (string-append
1488 (getenv "PYTHONPATH")
71dbf592
RW
1489 ":" (getcwd) "/build/"
1490 (car (scandir "build"
e1f02f92 1491 (negate (cut string-prefix? "." <>))))))
f536dce5 1492 ;; Step out of source dir so python does not import from CWD.
71dbf592
RW
1493 (with-directory-excursion "tests"
1494 (setenv "HOME" "/tmp")
1495 (and (zero? (system* "make" "-C" "pysam_data"))
1496 (zero? (system* "make" "-C" "cbcf_data"))
b2955e22
RW
1497 ;; Running nosetests without explicitly asking for a
1498 ;; single process leads to a crash. Running with multiple
1499 ;; processes fails because the tests are not designed to
1500 ;; run in parallel.
31c374e0
RW
1501
1502 ;; FIXME: tests keep timing out on some systems.
1503 ;; (zero? (system* "nosetests" "-v"
1504 ;; "--processes" "1"))
1505 )))))))
dff26b23
MB
1506 (propagated-inputs
1507 `(("htslib" ,htslib))) ; Included from installed header files.
55a9a8c2 1508 (inputs
649e9b3b 1509 `(("ncurses" ,ncurses)
55a9a8c2 1510 ("zlib" ,zlib)))
649e9b3b
RW
1511 (native-inputs
1512 `(("python-cython" ,python-cython)
f536dce5
MB
1513 ;; Dependencies below are are for tests only.
1514 ("samtools" ,samtools)
1515 ("bcftools" ,bcftools)
1516 ("python-nose" ,python-nose)))
55a9a8c2
RW
1517 (home-page "https://github.com/pysam-developers/pysam")
1518 (synopsis "Python bindings to the SAMtools C API")
1519 (description
1520 "Pysam is a Python module for reading and manipulating files in the
1521SAM/BAM format. Pysam is a lightweight wrapper of the SAMtools C API. It
1522also includes an interface for tabix.")
1523 (license license:expat)))
1524
1525(define-public python2-pysam
1526 (package-with-python2 python-pysam))
1527
4db9433a
RW
1528(define-public python-twobitreader
1529 (package
1530 (name "python-twobitreader")
044ac8d2 1531 (version "3.1.4")
4db9433a
RW
1532 (source (origin
1533 (method url-fetch)
1534 (uri (pypi-uri "twobitreader" version))
1535 (sha256
1536 (base32
044ac8d2 1537 "1q8wnj2kga9nz1lwc4w7qv52smfm536hp6mc8w6s53lhyj0mpi22"))))
4db9433a 1538 (build-system python-build-system)
900fb8d0
LF
1539 (arguments
1540 '(;; Tests are not distributed in the PyPi release.
1541 ;; TODO Try building from the Git repo or asking the upstream maintainer
1542 ;; to distribute the tests on PyPi.
1543 #:tests? #f))
4db9433a
RW
1544 (native-inputs
1545 `(("python-sphinx" ,python-sphinx)))
1546 (home-page "https://github.com/benjschiller/twobitreader")
1547 (synopsis "Python library for reading .2bit files")
1548 (description
1549 "twobitreader is a Python library for reading .2bit files as used by the
1550UCSC genome browser.")
1551 (license license:artistic2.0)))
1552
1553(define-public python2-twobitreader
5c31f4aa 1554 (package-with-python2 python-twobitreader))
4db9433a 1555
f94bf198
RW
1556(define-public python-plastid
1557 (package
1558 (name "python-plastid")
897ab082 1559 (version "0.4.8")
f94bf198
RW
1560 (source (origin
1561 (method url-fetch)
1562 (uri (pypi-uri "plastid" version))
1563 (sha256
1564 (base32
897ab082 1565 "0l24dd3q66if8yj042m4s0g95n6acn7im1imqd3p6h8ns43kxhj8"))))
f94bf198
RW
1566 (build-system python-build-system)
1567 (arguments
1568 ;; Some test files are not included.
1569 `(#:tests? #f))
1570 (propagated-inputs
1571 `(("python-numpy" ,python-numpy)
1572 ("python-scipy" ,python-scipy)
1573 ("python-pandas" ,python-pandas)
1574 ("python-pysam" ,python-pysam)
1575 ("python-matplotlib" ,python-matplotlib)
1576 ("python-biopython" ,python-biopython)
99caa6f7
BW
1577 ("python-twobitreader" ,python-twobitreader)
1578 ("python-termcolor" ,python-termcolor)))
f94bf198
RW
1579 (native-inputs
1580 `(("python-cython" ,python-cython)
1581 ("python-nose" ,python-nose)))
1582 (home-page "https://github.com/joshuagryphon/plastid")
1583 (synopsis "Python library for genomic analysis")
1584 (description
1585 "plastid is a Python library for genomic analysis – in particular,
1586high-throughput sequencing data – with an emphasis on simplicity.")
1587 (license license:bsd-3)))
1588
1589(define-public python2-plastid
5c31f4aa 1590 (package-with-python2 python-plastid))
f94bf198 1591
6c1305f9
RW
1592(define-public cd-hit
1593 (package
1594 (name "cd-hit")
ba773f65 1595 (version "4.6.8")
6c1305f9
RW
1596 (source (origin
1597 (method url-fetch)
1598 (uri (string-append "https://github.com/weizhongli/cdhit"
1599 "/releases/download/V" version
ba773f65
BW
1600 "/cd-hit-v" version
1601 "-2017-0621-source.tar.gz"))
6c1305f9
RW
1602 (sha256
1603 (base32
d4735e8c 1604 "1b4mwm2520ixjbw57sil20f9iixzw4bkdqqwgg1fc3pzm6rz4zmn"))))
6c1305f9
RW
1605 (build-system gnu-build-system)
1606 (arguments
1607 `(#:tests? #f ; there are no tests
1608 #:make-flags
1609 ;; Executables are copied directly to the PREFIX.
1610 (list (string-append "PREFIX=" (assoc-ref %outputs "out") "/bin"))
1611 #:phases
1612 (modify-phases %standard-phases
1613 ;; No "configure" script
1614 (delete 'configure)
1615 ;; Remove sources of non-determinism
1616 (add-after 'unpack 'be-timeless
1617 (lambda _
1618 (substitute* "cdhit-utility.c++"
1619 ((" \\(built on \" __DATE__ \"\\)") ""))
1620 (substitute* "cdhit-common.c++"
1621 (("__DATE__") "\"0\"")
1622 (("\", %s, \" __TIME__ \"\\\\n\", date") ""))
1623 #t))
ba773f65 1624 ;; The "install" target does not create the target directory.
6c1305f9
RW
1625 (add-before 'install 'create-target-dir
1626 (lambda* (#:key outputs #:allow-other-keys)
1627 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
1628 #t)))))
1629 (inputs
1630 `(("perl" ,perl)))
1631 (home-page "http://weizhongli-lab.org/cd-hit/")
1632 (synopsis "Cluster and compare protein or nucleotide sequences")
1633 (description
1634 "CD-HIT is a program for clustering and comparing protein or nucleotide
1635sequences. CD-HIT is designed to be fast and handle extremely large
1636databases.")
1637 ;; The manual says: "It can be copied under the GNU General Public License
1638 ;; version 2 (GPLv2)."
1639 (license license:gpl2)))
1640
810cff85
RW
1641(define-public clipper
1642 (package
1643 (name "clipper")
433530a5 1644 (version "1.1")
810cff85
RW
1645 (source (origin
1646 (method url-fetch)
1647 (uri (string-append
1648 "https://github.com/YeoLab/clipper/archive/"
1649 version ".tar.gz"))
9ab5ea44 1650 (file-name (string-append name "-" version ".tar.gz"))
810cff85
RW
1651 (sha256
1652 (base32
433530a5 1653 "0pflmsvhbf8izbgwhbhj1i7349sw1f55qpqj8ljmapp16hb0p0qi"))
810cff85
RW
1654 (modules '((guix build utils)))
1655 (snippet
433530a5
RW
1656 '(begin
1657 ;; remove unnecessary setup dependency
1658 (substitute* "setup.py"
1659 (("setup_requires = .*") ""))
1660 (for-each delete-file
1661 '("clipper/src/peaks.so"
1662 "clipper/src/readsToWiggle.so"))
1663 (delete-file-recursively "dist/")
1664 #t))))
810cff85
RW
1665 (build-system python-build-system)
1666 (arguments `(#:python ,python-2)) ; only Python 2 is supported
1667 (inputs
92971d68 1668 `(("htseq" ,python2-htseq)
810cff85
RW
1669 ("python-pybedtools" ,python2-pybedtools)
1670 ("python-cython" ,python2-cython)
1671 ("python-scikit-learn" ,python2-scikit-learn)
1672 ("python-matplotlib" ,python2-matplotlib)
433530a5 1673 ("python-pandas" ,python2-pandas)
810cff85
RW
1674 ("python-pysam" ,python2-pysam)
1675 ("python-numpy" ,python2-numpy)
1676 ("python-scipy" ,python2-scipy)))
1677 (native-inputs
f3b98f4f 1678 `(("python-mock" ,python2-mock) ; for tests
d281be18 1679 ("python-nose" ,python2-nose) ; for tests
f3b98f4f 1680 ("python-pytz" ,python2-pytz))) ; for tests
810cff85
RW
1681 (home-page "https://github.com/YeoLab/clipper")
1682 (synopsis "CLIP peak enrichment recognition")
1683 (description
1684 "CLIPper is a tool to define peaks in CLIP-seq datasets.")
1685 (license license:gpl2)))
1686
6a35566d
RS
1687(define-public codingquarry
1688 (package
1689 (name "codingquarry")
1690 (version "2.0")
1691 (source (origin
1692 (method url-fetch)
1693 (uri (string-append
1694 "mirror://sourceforge/codingquarry/CodingQuarry_v"
1695 version ".tar.gz"))
1696 (sha256
1697 (base32
1698 "0115hkjflsnfzn36xppwf9h9avfxlavr43djqmshkkzbgjzsz60i"))))
1699 (build-system gnu-build-system)
1700 (arguments
1701 '(#:tests? #f ; no "check" target
1702 #:phases
1703 (modify-phases %standard-phases
1704 (delete 'configure)
1705 (replace 'install
1706 (lambda* (#:key outputs #:allow-other-keys)
1707 (let* ((out (assoc-ref outputs "out"))
1708 (bin (string-append out "/bin"))
1709 (doc (string-append out "/share/doc/codingquarry")))
1710 (install-file "INSTRUCTIONS.pdf" doc)
1711 (copy-recursively "QuarryFiles"
1712 (string-append out "/QuarryFiles"))
1713 (install-file "CodingQuarry" bin)
1714 (install-file "CufflinksGTF_to_CodingQuarryGFF3.py" bin)))))))
1715 (inputs `(("openmpi" ,openmpi)))
1716 (native-search-paths
1717 (list (search-path-specification
1718 (variable "QUARRY_PATH")
1719 (files '("QuarryFiles")))))
1720 (native-inputs `(("python" ,python-2))) ; Only Python 2 is supported
1721 (synopsis "Fungal gene predictor")
1722 (description "CodingQuarry is a highly accurate, self-training GHMM fungal
1723gene predictor designed to work with assembled, aligned RNA-seq transcripts.")
1724 (home-page "https://sourceforge.net/projects/codingquarry/")
1725 (license license:gpl3+)))
1726
36742f43
RW
1727(define-public couger
1728 (package
1729 (name "couger")
1730 (version "1.8.2")
1731 (source (origin
1732 (method url-fetch)
1733 (uri (string-append
1734 "http://couger.oit.duke.edu/static/assets/COUGER"
1735 version ".zip"))
1736 (sha256
1737 (base32
1738 "04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq"))))
1739 (build-system gnu-build-system)
1740 (arguments
1741 `(#:tests? #f
1742 #:phases
1743 (modify-phases %standard-phases
1744 (delete 'configure)
1745 (delete 'build)
1746 (replace
1747 'install
1748 (lambda* (#:key outputs #:allow-other-keys)
f3860753
TGR
1749 (let* ((out (assoc-ref outputs "out"))
1750 (bin (string-append out "/bin")))
36742f43 1751 (copy-recursively "src" (string-append out "/src"))
f3860753 1752 (mkdir bin)
36742f43
RW
1753 ;; Add "src" directory to module lookup path.
1754 (substitute* "couger"
1755 (("from argparse")
1756 (string-append "import sys\nsys.path.append(\""
1757 out "\")\nfrom argparse")))
f3860753 1758 (install-file "couger" bin))
36742f43
RW
1759 #t))
1760 (add-after
1761 'install 'wrap-program
1762 (lambda* (#:key inputs outputs #:allow-other-keys)
1763 ;; Make sure 'couger' runs with the correct PYTHONPATH.
1764 (let* ((out (assoc-ref outputs "out"))
1765 (path (getenv "PYTHONPATH")))
1766 (wrap-program (string-append out "/bin/couger")
1767 `("PYTHONPATH" ":" prefix (,path))))
1768 #t)))))
1769 (inputs
1770 `(("python" ,python-2)
1771 ("python2-pillow" ,python2-pillow)
1772 ("python2-numpy" ,python2-numpy)
1773 ("python2-scipy" ,python2-scipy)
1774 ("python2-matplotlib" ,python2-matplotlib)))
1775 (propagated-inputs
2d7c4ae3 1776 `(("r-minimal" ,r-minimal)
36742f43
RW
1777 ("libsvm" ,libsvm)
1778 ("randomjungle" ,randomjungle)))
1779 (native-inputs
1780 `(("unzip" ,unzip)))
1781 (home-page "http://couger.oit.duke.edu")
1782 (synopsis "Identify co-factors in sets of genomic regions")
1783 (description
1784 "COUGER can be applied to any two sets of genomic regions bound by
1785paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify
1786putative co-factors that provide specificity to each TF. The framework
1787determines the genomic targets uniquely-bound by each TF, and identifies a
1788small set of co-factors that best explain the in vivo binding differences
1789between the two TFs.
1790
1791COUGER uses classification algorithms (support vector machines and random
1792forests) with features that reflect the DNA binding specificities of putative
1793co-factors. The features are generated either from high-throughput TF-DNA
1794binding data (from protein binding microarray experiments), or from large
1795collections of DNA motifs.")
1796 (license license:gpl3+)))
1797
bfe3c685
RW
1798(define-public clustal-omega
1799 (package
1800 (name "clustal-omega")
b3936f35 1801 (version "1.2.4")
bfe3c685
RW
1802 (source (origin
1803 (method url-fetch)
b3936f35
RW
1804 (uri (string-append "http://www.clustal.org/omega/clustal-omega-"
1805 version ".tar.gz"))
bfe3c685
RW
1806 (sha256
1807 (base32
b3936f35 1808 "1vm30mzncwdv881vrcwg11vzvrsmwy4wg80j5i0lcfk6dlld50w6"))))
bfe3c685
RW
1809 (build-system gnu-build-system)
1810 (inputs
1811 `(("argtable" ,argtable)))
1812 (home-page "http://www.clustal.org/omega/")
1813 (synopsis "Multiple sequence aligner for protein and DNA/RNA")
1814 (description
1815 "Clustal-Omega is a general purpose multiple sequence alignment (MSA)
1816program for protein and DNA/RNA. It produces high quality MSAs and is capable
1817of handling data-sets of hundreds of thousands of sequences in reasonable
1818time.")
1819 (license license:gpl2+)))
1820
191c7101
RW
1821(define-public crossmap
1822 (package
1823 (name "crossmap")
61d5fd03 1824 (version "0.2.1")
191c7101
RW
1825 (source (origin
1826 (method url-fetch)
1827 (uri (string-append "mirror://sourceforge/crossmap/CrossMap-"
1828 version ".tar.gz"))
1829 (sha256
1830 (base32
61d5fd03
RW
1831 "07y179f63d7qnzdvkqcziwk9bs3k4zhp81q392fp1hwszjdvy22f"))
1832 ;; This patch has been sent upstream already and is available
1833 ;; for download from Sourceforge, but it has not been merged.
fc1adab1 1834 (patches (search-patches "crossmap-allow-system-pysam.patch"))
191c7101 1835 (modules '((guix build utils)))
6cbee49d
MW
1836 (snippet '(begin
1837 ;; remove bundled copy of pysam
1838 (delete-file-recursively "lib/pysam")
1839 #t))))
191c7101
RW
1840 (build-system python-build-system)
1841 (arguments
1842 `(#:python ,python-2
1843 #:phases
dc1d3cde
KK
1844 (modify-phases %standard-phases
1845 (add-after 'unpack 'set-env
1846 (lambda _ (setenv "CROSSMAP_USE_SYSTEM_PYSAM" "1") #t)))))
191c7101
RW
1847 (inputs
1848 `(("python-numpy" ,python2-numpy)
1849 ("python-pysam" ,python2-pysam)
1850 ("zlib" ,zlib)))
1851 (native-inputs
1852 `(("python-cython" ,python2-cython)
f3b98f4f 1853 ("python-nose" ,python2-nose)))
191c7101
RW
1854 (home-page "http://crossmap.sourceforge.net/")
1855 (synopsis "Convert genome coordinates between assemblies")
1856 (description
1857 "CrossMap is a program for conversion of genome coordinates or annotation
1858files between different genome assemblies. It supports most commonly used
1859file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
1860 (license license:gpl2+)))
1861
8e913213
RW
1862(define-public cutadapt
1863 (package
1864 (name "cutadapt")
3ede1550 1865 (version "1.16")
8e913213 1866 (source (origin
3ede1550
RW
1867 (method git-fetch)
1868 (uri (git-reference
1869 (url "https://github.com/marcelm/cutadapt.git")
1870 (commit (string-append "v" version))))
1871 (file-name (string-append name "-" version "-checkout"))
8e913213
RW
1872 (sha256
1873 (base32
3ede1550 1874 "09pr02067jiks19nc0aby4xp70hhgvb554i2y1c04rv1m401w7q8"))))
8e913213 1875 (build-system python-build-system)
1f94bff2
TGR
1876 (inputs
1877 `(("python-xopen" ,python-xopen)))
8e913213
RW
1878 (native-inputs
1879 `(("python-cython" ,python-cython)
3ede1550 1880 ("python-pytest" ,python-pytest)))
0c6c9c00 1881 (home-page "https://cutadapt.readthedocs.io/en/stable/")
8e913213
RW
1882 (synopsis "Remove adapter sequences from nucleotide sequencing reads")
1883 (description
1884 "Cutadapt finds and removes adapter sequences, primers, poly-A tails and
1885other types of unwanted sequence from high-throughput sequencing reads.")
1886 (license license:expat)))
1887
1baee943
RW
1888(define-public libbigwig
1889 (package
1890 (name "libbigwig")
0d7974c3 1891 (version "0.4.2")
1baee943 1892 (source (origin
0d7974c3
RW
1893 (method git-fetch)
1894 (uri (git-reference
1895 (url "https://github.com/dpryan79/libBigWig.git")
1896 (commit version)))
1897 (file-name (string-append name "-" version "-checkout"))
1baee943
RW
1898 (sha256
1899 (base32
0d7974c3 1900 "0h2smg24v5srdcqzrmz2g23cmlp4va465mgx8r2z571sfz8pv454"))))
1baee943
RW
1901 (build-system gnu-build-system)
1902 (arguments
1903 `(#:test-target "test"
0d7974c3 1904 #:tests? #f ; tests require access to the web
1baee943
RW
1905 #:make-flags
1906 (list "CC=gcc"
1907 (string-append "prefix=" (assoc-ref %outputs "out")))
1908 #:phases
1909 (modify-phases %standard-phases
0d7974c3 1910 (delete 'configure))))
1baee943
RW
1911 (inputs
1912 `(("zlib" ,zlib)
1913 ("curl" ,curl)))
1914 (native-inputs
0d7974c3
RW
1915 `(("doxygen" ,doxygen)
1916 ;; Need for tests
1917 ("python" ,python-2)))
1baee943
RW
1918 (home-page "https://github.com/dpryan79/libBigWig")
1919 (synopsis "C library for handling bigWig files")
1920 (description
1921 "This package provides a C library for parsing local and remote BigWig
1922files.")
1923 (license license:expat)))
1924
69e0e03c
RW
1925(define-public python-pybigwig
1926 (package
1927 (name "python-pybigwig")
8afdeb87 1928 (version "0.3.12")
69e0e03c
RW
1929 (source (origin
1930 (method url-fetch)
1931 (uri (pypi-uri "pyBigWig" version))
1932 (sha256
1933 (base32
8afdeb87 1934 "00w4kfnm2c5l7wdwr2nj1z5djv8kzgf7h1zhsgv6njff1rwr26g0"))
69e0e03c
RW
1935 (modules '((guix build utils)))
1936 (snippet
1937 '(begin
1938 ;; Delete bundled libBigWig sources
6cbee49d
MW
1939 (delete-file-recursively "libBigWig")
1940 #t))))
69e0e03c
RW
1941 (build-system python-build-system)
1942 (arguments
1943 `(#:phases
1944 (modify-phases %standard-phases
1945 (add-after 'unpack 'link-with-libBigWig
1946 (lambda* (#:key inputs #:allow-other-keys)
1947 (substitute* "setup.py"
1948 (("libs=\\[") "libs=[\"BigWig\", "))
1949 #t)))))
8afdeb87
RW
1950 (propagated-inputs
1951 `(("python-numpy" ,python-numpy)))
69e0e03c
RW
1952 (inputs
1953 `(("libbigwig" ,libbigwig)
1954 ("zlib" ,zlib)
1955 ("curl" ,curl)))
1956 (home-page "https://github.com/dpryan79/pyBigWig")
1957 (synopsis "Access bigWig files in Python using libBigWig")
1958 (description
1959 "This package provides Python bindings to the libBigWig library for
1960accessing bigWig files.")
1961 (license license:expat)))
1962
1963(define-public python2-pybigwig
5c31f4aa 1964 (package-with-python2 python-pybigwig))
69e0e03c 1965
ec2a67de
BW
1966(define-public python-dendropy
1967 (package
1968 (name "python-dendropy")
9e0c135c 1969 (version "4.4.0")
ec2a67de
BW
1970 (source
1971 (origin
1972 (method url-fetch)
9e0c135c
BW
1973 ;; Source from GitHub so that tests are included.
1974 (uri
1975 (string-append "https://github.com/jeetsukumaran/DendroPy/archive/v"
1976 version ".tar.gz"))
1977 (file-name (string-append name "-" version ".tar.gz"))
ec2a67de
BW
1978 (sha256
1979 (base32
9e0c135c 1980 "0v2fccny5xjaah546bsch1mw4kh61qq5frz2ibllxs9mp6ih9bsn"))))
ec2a67de
BW
1981 (build-system python-build-system)
1982 (home-page "http://packages.python.org/DendroPy/")
1983 (synopsis "Library for phylogenetics and phylogenetic computing")
1984 (description
1985 "DendroPy is a library for phylogenetics and phylogenetic computing: reading,
1986writing, simulation, processing and manipulation of phylogenetic
1987trees (phylogenies) and characters.")
9e0c135c 1988 (license license:bsd-3)))
ec2a67de
BW
1989
1990(define-public python2-dendropy
9e0c135c 1991 (package-with-python2 python-dendropy))
ec2a67de 1992
eb2200f3
RW
1993(define-public python-py2bit
1994 (package
1995 (name "python-py2bit")
1996 (version "0.2.1")
1997 (source
1998 (origin
1999 (method url-fetch)
2000 (uri (pypi-uri "py2bit" version))
2001 (sha256
2002 (base32
2003 "1cdf4qlmgwsh1f4k0wdv2sr8x9qn4366p0k3614vbd0fpqiarxrl"))))
2004 (build-system python-build-system)
2005 (home-page "https://github.com/dpryan79/py2bit")
2006 (synopsis "Access 2bit files using lib2bit")
2007 (description
2008 "This package provides Python bindings for lib2bit to access 2bit files
2009with Python.")
2010 (license license:expat)))
ec2a67de 2011
1921b1de
RW
2012(define-public deeptools
2013 (package
2014 (name "deeptools")
fed72008 2015 (version "2.5.1")
1921b1de
RW
2016 (source (origin
2017 (method url-fetch)
cd70f9bd 2018 (uri (string-append "https://github.com/deeptools/deepTools/"
3acb8c85 2019 "archive/" version ".tar.gz"))
1921b1de
RW
2020 (file-name (string-append name "-" version ".tar.gz"))
2021 (sha256
2022 (base32
fed72008 2023 "1q8i12l2gvk4n2s8lhyzwhh9g4qbc8lrk5l7maz00yvd5g6z5540"))))
1921b1de 2024 (build-system python-build-system)
14bda1ff 2025 (inputs
fed72008
RW
2026 `(("python-scipy" ,python-scipy)
2027 ("python-numpy" ,python-numpy)
2028 ("python-numpydoc" ,python-numpydoc)
2029 ("python-matplotlib" ,python-matplotlib)
2030 ("python-pysam" ,python-pysam)
2031 ("python-py2bit" ,python-py2bit)
2032 ("python-pybigwig" ,python-pybigwig)))
1921b1de 2033 (native-inputs
fed72008
RW
2034 `(("python-mock" ,python-mock) ;for tests
2035 ("python-nose" ,python-nose) ;for tests
2036 ("python-pytz" ,python-pytz))) ;for tests
cd70f9bd 2037 (home-page "https://github.com/deeptools/deepTools")
1921b1de
RW
2038 (synopsis "Tools for normalizing and visualizing deep-sequencing data")
2039 (description
2040 "DeepTools addresses the challenge of handling the large amounts of data
2041that are now routinely generated from DNA sequencing centers. To do so,
2042deepTools contains useful modules to process the mapped reads data to create
2043coverage files in standard bedGraph and bigWig file formats. By doing so,
2044deepTools allows the creation of normalized coverage files or the comparison
2045between two files (for example, treatment and control). Finally, using such
2046normalized and standardized files, multiple visualizations can be created to
2047identify enrichments with functional annotations of the genome.")
2048 (license license:gpl3+)))
2049
fdc30687
RJ
2050(define-public delly
2051 (package
2052 (name "delly")
2053 (version "0.7.7")
2054 (source (origin
2055 (method url-fetch)
2056 (uri (string-append
2057 "https://github.com/tobiasrausch/delly/archive/v"
2058 version ".tar.gz"))
2059 (file-name (string-append name "-" version ".tar.gz"))
2060 (sha256
2061 (base32 "0dkwy3pyxmi6dhh1lpsr3698ri5sslw9qz67hfys0bz8dgrqwabj"))
2062 (patches (search-patches "delly-use-system-libraries.patch"))))
2063 (build-system gnu-build-system)
2064 (arguments
2065 `(#:tests? #f ; There are no tests to run.
2066 #:make-flags '("PARALLEL=1") ; Allow parallel execution at run-time.
2067 #:phases
2068 (modify-phases %standard-phases
2069 (delete 'configure) ; There is no configure phase.
2070 (replace 'install
2071 (lambda _
2072 (let ((bin (string-append (assoc-ref %outputs "out") "/bin"))
2073 (templates (string-append (assoc-ref %outputs "out")
2074 "/share/delly/templates")))
2075 (mkdir-p bin)
2076 (mkdir-p templates)
2077 (copy-recursively "excludeTemplates" templates)
2078 (install-file "src/cov" bin)
2079 (install-file "src/delly" bin)
2080 (install-file "src/dpe" bin)))))))
2081 (native-inputs
2082 `(("python" ,python-2)))
2083 (inputs
2084 `(("boost" ,boost)
2085 ("htslib" ,htslib)
2086 ("zlib" ,zlib)
2087 ("bzip2" ,bzip2)))
2088 (home-page "https://github.com/tobiasrausch/delly")
2089 (synopsis "Integrated structural variant prediction method")
2090 (description "Delly is an integrated structural variant prediction method
2091that can discover and genotype deletions, tandem duplications, inversions and
2092translocations at single-nucleotide resolution in short-read massively parallel
2093sequencing data. It uses paired-ends and split-reads to sensitively and
2094accurately delineate genomic rearrangements throughout the genome.")
2095 (license license:gpl3+)))
2096
684bf7c7
BW
2097(define-public diamond
2098 (package
2099 (name "diamond")
2c5289b2 2100 (version "0.9.22")
684bf7c7
BW
2101 (source (origin
2102 (method url-fetch)
2103 (uri (string-append
2104 "https://github.com/bbuchfink/diamond/archive/v"
2105 version ".tar.gz"))
2106 (file-name (string-append name "-" version ".tar.gz"))
2107 (sha256
2108 (base32
2c5289b2 2109 "0adp87r9ak63frdrdmrdfhsn6g0jnnyq1lr2wibvqbxcl37iir9m"))))
122395f9 2110 (build-system cmake-build-system)
684bf7c7 2111 (arguments
7c544991
BW
2112 '(#:tests? #f ; no "check" target
2113 #:phases
2114 (modify-phases %standard-phases
2115 (add-after 'unpack 'remove-native-compilation
2116 (lambda _
2117 (substitute* "CMakeLists.txt" (("-march=native") ""))
2118 #t)))))
684bf7c7 2119 (inputs
122395f9 2120 `(("zlib" ,zlib)))
684bf7c7
BW
2121 (home-page "https://github.com/bbuchfink/diamond")
2122 (synopsis "Accelerated BLAST compatible local sequence aligner")
2123 (description
2124 "DIAMOND is a BLAST-compatible local aligner for mapping protein and
2125translated DNA query sequences against a protein reference database (BLASTP
2126and BLASTX alignment mode). The speedup over BLAST is up to 20,000 on short
2127reads at a typical sensitivity of 90-99% relative to BLAST depending on the
2128data and settings.")
ef81341f 2129 (license license:agpl3+)))
684bf7c7 2130
97b9da68
RW
2131(define-public discrover
2132 (package
2133 (name "discrover")
2134 (version "1.6.0")
2135 (source
2136 (origin
2137 (method url-fetch)
2138 (uri (string-append "https://github.com/maaskola/discrover/archive/"
2139 version ".tar.gz"))
2140 (file-name (string-append name "-" version ".tar.gz"))
2141 (sha256
2142 (base32
2143 "0rah9ja4m0rl5mldd6vag9rwrivw1zrqxssfq8qx64m7961fp68k"))))
2144 (build-system cmake-build-system)
fa702e1a
RW
2145 (arguments
2146 `(#:tests? #f ; there are no tests
2147 #:phases
2148 (modify-phases %standard-phases
2149 (add-after 'unpack 'add-missing-includes
2150 (lambda _
2151 (substitute* "src/executioninformation.hpp"
2152 (("#define EXECUTIONINFORMATION_HPP" line)
2153 (string-append line "\n#include <random>")))
2154 (substitute* "src/plasma/fasta.hpp"
2155 (("#define FASTA_HPP" line)
2156 (string-append line "\n#include <random>")))
2157 #t)))))
97b9da68
RW
2158 (inputs
2159 `(("boost" ,boost)
2160 ("cairo" ,cairo)))
2161 (native-inputs
2162 `(("texlive" ,texlive)
2163 ("imagemagick" ,imagemagick)))
2164 (home-page "http://dorina.mdc-berlin.de/public/rajewsky/discrover/")
2165 (synopsis "Discover discriminative nucleotide sequence motifs")
2166 (description "Discrover is a motif discovery method to find binding sites
2167of nucleic acid binding proteins.")
2168 (license license:gpl3+)))
2169
6619f9c7
RW
2170(define-public eigensoft
2171 (let ((revision "1")
2172 (commit "b14d1e202e21e532536ff8004f0419cd5e259dc7"))
2173 (package
2174 (name "eigensoft")
2175 (version (string-append "6.1.2-"
2176 revision "."
2177 (string-take commit 9)))
2178 (source
2179 (origin
2180 (method git-fetch)
2181 (uri (git-reference
2182 (url "https://github.com/DReichLab/EIG.git")
2183 (commit commit)))
2184 (file-name (string-append "eigensoft-" commit "-checkout"))
2185 (sha256
2186 (base32
2187 "0f5m6k2j5c16xc3xbywcs989xyc26ncy1zfzp9j9n55n9r4xcaiq"))
2188 (modules '((guix build utils)))
2189 ;; Remove pre-built binaries.
2190 (snippet '(begin
2191 (delete-file-recursively "bin")
2192 (mkdir "bin")
2193 #t))))
2194 (build-system gnu-build-system)
2195 (arguments
2196 `(#:tests? #f ; There are no tests.
2197 #:make-flags '("CC=gcc")
2198 #:phases
2199 (modify-phases %standard-phases
2200 ;; There is no configure phase, but the Makefile is in a
2201 ;; sub-directory.
2202 (replace 'configure
2203 (lambda _
2204 (chdir "src")
2205 ;; The link flags are incomplete.
2206 (substitute* "Makefile"
2207 (("-lgsl") "-lgsl -lm -llapack -llapacke -lpthread"))
2208 #t))
2209 ;; The provided install target only copies executables to
2210 ;; the "bin" directory in the build root.
2211 (add-after 'install 'actually-install
2212 (lambda* (#:key outputs #:allow-other-keys)
2213 (let* ((out (assoc-ref outputs "out"))
2214 (bin (string-append out "/bin")))
6619f9c7
RW
2215 (for-each (lambda (file)
2216 (install-file file bin))
2217 (find-files "../bin" ".*"))
2218 #t))))))
2219 (inputs
2220 `(("gsl" ,gsl)
2221 ("lapack" ,lapack)
6619f9c7
RW
2222 ("openblas" ,openblas)
2223 ("perl" ,perl)
2224 ("gfortran" ,gfortran "lib")))
2225 (home-page "https://github.com/DReichLab/EIG")
2226 (synopsis "Tools for population genetics")
2227 (description "The EIGENSOFT package provides tools for population
2228genetics and stratification correction. EIGENSOFT implements methods commonly
2229used in population genetics analyses such as PCA, computation of Tracy-Widom
2230statistics, and finding related individuals in structured populations. It
2231comes with a built-in plotting script and supports multiple file formats and
2232quantitative phenotypes.")
2233 ;; The license of the eigensoft tools is Expat, but since it's
2234 ;; linking with the GNU Scientific Library (GSL) the effective
2235 ;; license is the GPL.
2236 (license license:gpl3+))))
2237
365c8153
RW
2238(define-public edirect
2239 (package
2240 (name "edirect")
83b84fa8 2241 (version "4.10")
365c8153
RW
2242 (source (origin
2243 (method url-fetch)
83b84fa8
RW
2244 (uri (string-append "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/"
2245 "versions/2016-05-03/edirect.tar.gz"))
365c8153
RW
2246 (sha256
2247 (base32
83b84fa8 2248 "15zsprak5yh8c1yrz4r1knmb5s8qcmdid4xdhkh3lqcv64l60hli"))))
365c8153
RW
2249 (build-system perl-build-system)
2250 (arguments
2251 `(#:tests? #f ;no "check" target
2252 #:phases
2253 (modify-phases %standard-phases
2254 (delete 'configure)
2255 (delete 'build)
2256 (replace 'install
2257 (lambda* (#:key outputs #:allow-other-keys)
2258 (let ((target (string-append (assoc-ref outputs "out")
2259 "/bin")))
2260 (mkdir-p target)
f3860753 2261 (install-file "edirect.pl" target)
365c8153
RW
2262 #t)))
2263 (add-after
2264 'install 'wrap-program
2265 (lambda* (#:key inputs outputs #:allow-other-keys)
2266 ;; Make sure 'edirect.pl' finds all perl inputs at runtime.
2267 (let* ((out (assoc-ref outputs "out"))
2268 (path (getenv "PERL5LIB")))
2269 (wrap-program (string-append out "/bin/edirect.pl")
2270 `("PERL5LIB" ":" prefix (,path)))))))))
2271 (inputs
2272 `(("perl-html-parser" ,perl-html-parser)
2273 ("perl-encode-locale" ,perl-encode-locale)
2274 ("perl-file-listing" ,perl-file-listing)
2275 ("perl-html-tagset" ,perl-html-tagset)
2276 ("perl-html-tree" ,perl-html-tree)
2277 ("perl-http-cookies" ,perl-http-cookies)
2278 ("perl-http-date" ,perl-http-date)
2279 ("perl-http-message" ,perl-http-message)
2280 ("perl-http-negotiate" ,perl-http-negotiate)
2281 ("perl-lwp-mediatypes" ,perl-lwp-mediatypes)
2282 ("perl-lwp-protocol-https" ,perl-lwp-protocol-https)
2283 ("perl-net-http" ,perl-net-http)
2284 ("perl-uri" ,perl-uri)
2285 ("perl-www-robotrules" ,perl-www-robotrules)
2286 ("perl" ,perl)))
3d51ec91 2287 (home-page "http://www.ncbi.nlm.nih.gov/books/NBK179288/")
365c8153
RW
2288 (synopsis "Tools for accessing the NCBI's set of databases")
2289 (description
2290 "Entrez Direct (EDirect) is a method for accessing the National Center
2291for Biotechnology Information's (NCBI) set of interconnected
2292databases (publication, sequence, structure, gene, variation, expression,
2293etc.) from a terminal. Functions take search terms from command-line
2294arguments. Individual operations are combined to build multi-step queries.
2295Record retrieval and formatting normally complete the process.
2296
2297EDirect also provides an argument-driven function that simplifies the
2298extraction of data from document summaries or other results that are returned
2299in structured XML format. This can eliminate the need for writing custom
2300software to answer ad hoc questions.")
2301 (license license:public-domain)))
2302
b16728b0
BW
2303(define-public exonerate
2304 (package
2305 (name "exonerate")
2306 (version "2.4.0")
2307 (source
2308 (origin
2309 (method url-fetch)
2310 (uri
2311 (string-append
2312 "http://ftp.ebi.ac.uk/pub/software/vertebrategenomics/exonerate/"
2313 "exonerate-" version ".tar.gz"))
2314 (sha256
2315 (base32
2316 "0hj0m9xygiqsdxvbg79wq579kbrx1mdrabi2bzqz2zn9qwfjcjgq"))))
2317 (build-system gnu-build-system)
2318 (arguments
2319 `(#:parallel-build? #f)) ; Building in parallel fails on some machines.
2320 (native-inputs
2321 `(("pkg-config" ,pkg-config)))
2322 (inputs
2323 `(("glib" ,glib)))
2324 (home-page
2325 "https://www.ebi.ac.uk/about/vertebrate-genomics/software/exonerate")
2326 (synopsis "Generic tool for biological sequence alignment")
2327 (description
2328 "Exonerate is a generic tool for pairwise sequence comparison. It allows
2329the alignment of sequences using a many alignment models, either exhaustive
2330dynamic programming or a variety of heuristics.")
2331 (license license:gpl3)))
2332
e4e5a4d8
RW
2333(define-public express
2334 (package
2335 (name "express")
2336 (version "1.5.1")
2337 (source (origin
2338 (method url-fetch)
2339 (uri
2340 (string-append
2341 "http://bio.math.berkeley.edu/eXpress/downloads/express-"
2342 version "/express-" version "-src.tgz"))
2343 (sha256
2344 (base32
2345 "03rczxd0gjp2l1jxcmjfmf5j94j77zqyxa6x063zsc585nj40n0c"))))
2346 (build-system cmake-build-system)
2347 (arguments
2348 `(#:tests? #f ;no "check" target
2349 #:phases
dc1d3cde
KK
2350 (modify-phases %standard-phases
2351 (add-after 'unpack 'use-shared-boost-libs-and-set-bamtools-paths
2352 (lambda* (#:key inputs #:allow-other-keys)
2353 (substitute* "CMakeLists.txt"
2354 (("set\\(Boost_USE_STATIC_LIBS ON\\)")
2355 "set(Boost_USE_STATIC_LIBS OFF)")
2356 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
2357 (string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
2358 (substitute* "src/CMakeLists.txt"
2359 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
4b93efec
RJ
2360 (string-append (assoc-ref inputs "bamtools") "/lib"))
2361 (("libprotobuf.a") "libprotobuf.so"))
dc1d3cde 2362 #t)))))
e4e5a4d8
RW
2363 (inputs
2364 `(("boost" ,boost)
2365 ("bamtools" ,bamtools)
2366 ("protobuf" ,protobuf)
2367 ("zlib" ,zlib)))
2368 (home-page "http://bio.math.berkeley.edu/eXpress")
2369 (synopsis "Streaming quantification for high-throughput genomic sequencing")
2370 (description
2371 "eXpress is a streaming tool for quantifying the abundances of a set of
2372target sequences from sampled subsequences. Example applications include
2373transcript-level RNA-Seq quantification, allele-specific/haplotype expression
2374analysis (from RNA-Seq), transcription factor binding quantification in
2375ChIP-Seq, and analysis of metagenomic data.")
2376 (license license:artistic2.0)))
2377
f3674b1c
BW
2378(define-public express-beta-diversity
2379 (package
2380 (name "express-beta-diversity")
2381 (version "1.0.7")
2382 (source (origin
2383 (method url-fetch)
2384 (uri
2385 (string-append
2386 "https://github.com/dparks1134/ExpressBetaDiversity/archive/v"
2387 version ".tar.gz"))
2388 (file-name (string-append name "-" version ".tar.gz"))
2389 (sha256
2390 (base32
2391 "1djvdlmqvjf6h0zq7w36y8cl5cli6rgj86x65znl48agnwmzxfxr"))))
2392 (build-system gnu-build-system)
2393 (arguments
2394 `(#:phases
2395 (modify-phases %standard-phases
2396 (delete 'configure)
2397 (add-before 'build 'enter-source (lambda _ (chdir "source") #t))
2398 (replace 'check
2399 (lambda _ (zero? (system* "../bin/ExpressBetaDiversity"
2400 "-u"))))
2401 (add-after 'check 'exit-source (lambda _ (chdir "..") #t))
2402 (replace 'install
2403 (lambda* (#:key outputs #:allow-other-keys)
2404 (let ((bin (string-append (assoc-ref outputs "out")
2405 "/bin")))
2406 (mkdir-p bin)
f3860753
TGR
2407 (install-file "scripts/convertToEBD.py" bin)
2408 (install-file "bin/ExpressBetaDiversity" bin)
f3674b1c
BW
2409 #t))))))
2410 (inputs
2411 `(("python" ,python-2)))
2412 (home-page "http://kiwi.cs.dal.ca/Software/ExpressBetaDiversity")
2413 (synopsis "Taxon- and phylogenetic-based beta diversity measures")
2414 (description
2415 "Express Beta Diversity (EBD) calculates ecological beta diversity
2416(dissimilarity) measures between biological communities. EBD implements a
2417variety of diversity measures including those that make use of phylogenetic
2418similarity of community members.")
2419 (license license:gpl3+)))
2420
12b04cbe
BW
2421(define-public fasttree
2422 (package
2423 (name "fasttree")
88682c9a 2424 (version "2.1.10")
12b04cbe
BW
2425 (source (origin
2426 (method url-fetch)
2427 (uri (string-append
2428 "http://www.microbesonline.org/fasttree/FastTree-"
2429 version ".c"))
2430 (sha256
2431 (base32
88682c9a 2432 "0vcjdvy1j4m702vmak4svbfkrpcw63k7wymfksjp9a982zy8kjsl"))))
12b04cbe
BW
2433 (build-system gnu-build-system)
2434 (arguments
2435 `(#:tests? #f ; no "check" target
2436 #:phases
2437 (modify-phases %standard-phases
2438 (delete 'unpack)
2439 (delete 'configure)
2440 (replace 'build
e03a5153
BW
2441 (lambda* (#:key source #:allow-other-keys)
2442 (and (zero? (system* "gcc"
2443 "-O3"
2444 "-finline-functions"
2445 "-funroll-loops"
2446 "-Wall"
2447 "-o"
2448 "FastTree"
2449 source
2450 "-lm"))
2451 (zero? (system* "gcc"
2452 "-DOPENMP"
2453 "-fopenmp"
2454 "-O3"
2455 "-finline-functions"
2456 "-funroll-loops"
2457 "-Wall"
2458 "-o"
2459 "FastTreeMP"
2460 source
2461 "-lm")))))
12b04cbe 2462 (replace 'install
e03a5153
BW
2463 (lambda* (#:key outputs #:allow-other-keys)
2464 (let ((bin (string-append (assoc-ref outputs "out")
2465 "/bin")))
2466 (mkdir-p bin)
f3860753
TGR
2467 (install-file "FastTree" bin)
2468 (install-file "FastTreeMP" bin)
e03a5153 2469 #t))))))
12b04cbe
BW
2470 (home-page "http://www.microbesonline.org/fasttree")
2471 (synopsis "Infers approximately-maximum-likelihood phylogenetic trees")
2472 (description
2473 "FastTree can handle alignments with up to a million of sequences in a
2474reasonable amount of time and memory. For large alignments, FastTree is
2475100-1,000 times faster than PhyML 3.0 or RAxML 7.")
2476 (license license:gpl2+)))
2477
2127cedb
RW
2478(define-public fastx-toolkit
2479 (package
2480 (name "fastx-toolkit")
2481 (version "0.0.14")
2482 (source (origin
2483 (method url-fetch)
2484 (uri
2485 (string-append
2486 "https://github.com/agordon/fastx_toolkit/releases/download/"
2487 version "/fastx_toolkit-" version ".tar.bz2"))
2488 (sha256
2489 (base32
2490 "01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy"))))
2491 (build-system gnu-build-system)
2492 (inputs
2493 `(("libgtextutils" ,libgtextutils)))
2494 (native-inputs
2495 `(("pkg-config" ,pkg-config)))
2496 (home-page "http://hannonlab.cshl.edu/fastx_toolkit/")
2497 (synopsis "Tools for FASTA/FASTQ file preprocessing")
2498 (description
2499 "The FASTX-Toolkit is a collection of command line tools for Short-Reads
2500FASTA/FASTQ files preprocessing.
2501
2502Next-Generation sequencing machines usually produce FASTA or FASTQ files,
2503containing multiple short-reads sequences. The main processing of such
2504FASTA/FASTQ files is mapping the sequences to reference genomes. However, it
2505is sometimes more productive to preprocess the files before mapping the
2506sequences to the genome---manipulating the sequences to produce better mapping
2507results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
2508 (license license:agpl3+)))
2509
d7678942
RW
2510(define-public flexbar
2511 (package
2512 (name "flexbar")
2513 (version "2.5")
2514 (source (origin
2515 (method url-fetch)
2516 (uri
2517 (string-append "mirror://sourceforge/flexbar/"
2518 version "/flexbar_v" version "_src.tgz"))
2519 (sha256
2520 (base32
2521 "13jaykc3y1x8y5nn9j8ljnb79s5y51kyxz46hdmvvjj6qhyympmf"))))
2522 (build-system cmake-build-system)
2523 (arguments
4ca009c0 2524 `(#:configure-flags (list
d7678942
RW
2525 (string-append "-DFLEXBAR_BINARY_DIR="
2526 (assoc-ref %outputs "out")
2527 "/bin/"))
2528 #:phases
dc1d3cde
KK
2529 (modify-phases %standard-phases
2530 (replace 'check
2531 (lambda* (#:key outputs #:allow-other-keys)
2532 (setenv "PATH" (string-append
2533 (assoc-ref outputs "out") "/bin:"
2534 (getenv "PATH")))
2535 (chdir "../flexbar_v2.5_src/test")
2536 (zero? (system* "bash" "flexbar_validate.sh"))))
2537 (delete 'install))))
d7678942
RW
2538 (inputs
2539 `(("tbb" ,tbb)
2540 ("zlib" ,zlib)))
2541 (native-inputs
2542 `(("pkg-config" ,pkg-config)
2543 ("seqan" ,seqan)))
2544 (home-page "http://flexbar.sourceforge.net")
2545 (synopsis "Barcode and adapter removal tool for sequencing platforms")
2546 (description
2547 "Flexbar preprocesses high-throughput nucleotide sequencing data
2548efficiently. It demultiplexes barcoded runs and removes adapter sequences.
2549Moreover, trimming and filtering features are provided. Flexbar increases
2550read mapping rates and improves genome and transcriptome assemblies. It
2551supports next-generation sequencing data in fasta/q and csfasta/q format from
2552Illumina, Roche 454, and the SOLiD platform.")
2553 (license license:gpl3)))
2554
19f4554c
BW
2555(define-public fraggenescan
2556 (package
2557 (name "fraggenescan")
74297231 2558 (version "1.30")
19f4554c
BW
2559 (source
2560 (origin
2561 (method url-fetch)
2562 (uri
2563 (string-append "mirror://sourceforge/fraggenescan/"
2564 "FragGeneScan" version ".tar.gz"))
2565 (sha256
74297231 2566 (base32 "158dcnwczgcyhwm4qlx19sanrwgdpzf6bn2y57mbpx55lkgz1mzj"))))
19f4554c
BW
2567 (build-system gnu-build-system)
2568 (arguments
2569 `(#:phases
2570 (modify-phases %standard-phases
2571 (delete 'configure)
2572 (add-before 'build 'patch-paths
2573 (lambda* (#:key outputs #:allow-other-keys)
2574 (let* ((out (string-append (assoc-ref outputs "out")))
2575 (share (string-append out "/share/fraggenescan/")))
2576 (substitute* "run_FragGeneScan.pl"
2577 (("system\\(\"rm")
2578 (string-append "system(\"" (which "rm")))
2579 (("system\\(\"mv")
2580 (string-append "system(\"" (which "mv")))
74297231 2581 (("\\\"awk") (string-append "\"" (which "awk")))
19f4554c
BW
2582 ;; This script and other programs expect the training files
2583 ;; to be in the non-standard location bin/train/XXX. Change
2584 ;; this to be share/fraggenescan/train/XXX instead.
2585 (("^\\$train.file = \\$dir.*")
2586 (string-append "$train_file = \""
2587 share
2588 "train/\".$FGS_train_file;")))
2589 (substitute* "run_hmm.c"
2590 (("^ strcat\\(train_dir, \\\"train/\\\"\\);")
74297231 2591 (string-append " strcpy(train_dir, \"" share "/train/\");"))))
19f4554c
BW
2592 #t))
2593 (replace 'build
2594 (lambda _ (and (zero? (system* "make" "clean"))
2595 (zero? (system* "make" "fgs")))))
2596 (replace 'install
2597 (lambda* (#:key outputs #:allow-other-keys)
2598 (let* ((out (string-append (assoc-ref outputs "out")))
2599 (bin (string-append out "/bin/"))
2600 (share (string-append out "/share/fraggenescan/train")))
2601 (install-file "run_FragGeneScan.pl" bin)
2602 (install-file "FragGeneScan" bin)
19f4554c
BW
2603 (copy-recursively "train" share))))
2604 (delete 'check)
2605 (add-after 'install 'post-install-check
2606 ;; In lieu of 'make check', run one of the examples and check the
2607 ;; output files gets created.
2608 (lambda* (#:key outputs #:allow-other-keys)
2609 (let* ((out (string-append (assoc-ref outputs "out")))
74297231
BW
2610 (bin (string-append out "/bin/"))
2611 (frag (string-append bin "run_FragGeneScan.pl")))
2612 (and (zero? (system* frag ; Test complete genome.
19f4554c
BW
2613 "-genome=./example/NC_000913.fna"
2614 "-out=./test2"
2615 "-complete=1"
2616 "-train=complete"))
2617 (file-exists? "test2.faa")
2618 (file-exists? "test2.ffn")
2619 (file-exists? "test2.gff")
74297231
BW
2620 (file-exists? "test2.out")
2621 (zero? (system* ; Test incomplete sequences.
2622 frag
2623 "-genome=./example/NC_000913-fgs.ffn"
2624 "-out=out"
2625 "-complete=0"
2626 "-train=454_30")))))))))
19f4554c
BW
2627 (inputs
2628 `(("perl" ,perl)
2629 ("python" ,python-2))) ;not compatible with python 3.
2630 (home-page "https://sourceforge.net/projects/fraggenescan/")
2631 (synopsis "Finds potentially fragmented genes in short reads")
2632 (description
2633 "FragGeneScan is a program for predicting bacterial and archaeal genes in
2634short and error-prone DNA sequencing reads. It can also be applied to predict
2635genes in incomplete assemblies or complete genomes.")
2636 ;; GPL3+ according to private correspondense with the authors.
2637 (license license:gpl3+)))
2638
81f3e0c1
BW
2639(define-public fxtract
2640 (let ((util-commit "776ca85a18a47492af3794745efcb4a905113115"))
2641 (package
2642 (name "fxtract")
2643 (version "2.3")
2644 (source
2645 (origin
2646 (method url-fetch)
2647 (uri (string-append
2648 "https://github.com/ctSkennerton/fxtract/archive/"
2649 version ".tar.gz"))
2650 (file-name (string-append "ctstennerton-util-"
2651 (string-take util-commit 7)
2652 "-checkout"))
2653 (sha256
2654 (base32
2655 "0275cfdhis8517hm01is62062swmi06fxzifq7mr3knbbxjlaiwj"))))
2656 (build-system gnu-build-system)
2657 (arguments
2658 `(#:make-flags (list
2659 (string-append "PREFIX=" (assoc-ref %outputs "out"))
2660 "CC=gcc")
2661 #:test-target "fxtract_test"
2662 #:phases
2663 (modify-phases %standard-phases
2664 (delete 'configure)
2665 (add-before 'build 'copy-util
2666 (lambda* (#:key inputs #:allow-other-keys)
2667 (rmdir "util")
2668 (copy-recursively (assoc-ref inputs "ctskennerton-util") "util")
2669 #t))
2670 ;; Do not use make install as this requires additional dependencies.
2671 (replace 'install
2672 (lambda* (#:key outputs #:allow-other-keys)
2673 (let* ((out (assoc-ref outputs "out"))
2674 (bin (string-append out"/bin")))
2675 (install-file "fxtract" bin)
2676 #t))))))
2677 (inputs
2678 `(("pcre" ,pcre)
2679 ("zlib" ,zlib)))
2680 (native-inputs
2681 ;; ctskennerton-util is licensed under GPL2.
2682 `(("ctskennerton-util"
2683 ,(origin
2684 (method git-fetch)
2685 (uri (git-reference
2686 (url "https://github.com/ctSkennerton/util.git")
2687 (commit util-commit)))
2688 (file-name (string-append
2689 "ctstennerton-util-" util-commit "-checkout"))
2690 (sha256
2691 (base32
2692 "0cls1hd4vgj3f36fpzzg4xc77d6f3hpc60cbpfmn2gdr7ykzzad7"))))))
2693 (home-page "https://github.com/ctSkennerton/fxtract")
2694 (synopsis "Extract sequences from FASTA and FASTQ files")
2695 (description
2696 "Fxtract extracts sequences from a protein or nucleotide fastx (FASTA
2697or FASTQ) file given a subsequence. It uses a simple substring search for
2698basic tasks but can change to using POSIX regular expressions, PCRE, hash
2699lookups or multi-pattern searching as required. By default fxtract looks in
2700the sequence of each record but can also be told to look in the header,
2701comment or quality sections.")
afde1a26
BW
2702 ;; 'util' requires SSE instructions.
2703 (supported-systems '("x86_64-linux"))
81f3e0c1
BW
2704 (license license:expat))))
2705
2b18ad05
PP
2706(define-public gemma
2707 (package
2708 (name "gemma")
2709 (version "0.96")
2710 (source (origin
2711 (method url-fetch)
2712 (uri (string-append "https://github.com/xiangzhou/GEMMA/archive/v"
2713 version ".tar.gz"))
2714 (file-name (string-append name "-" version ".tar.gz"))
2715 (sha256
2716 (base32
ce7e361f
EF
2717 "055ynn16gd12pf78n4vr2a9jlwsbwzajpdnf2y2yilg1krfff222"))
2718 (patches (search-patches "gemma-intel-compat.patch"))))
2b18ad05
PP
2719 (inputs
2720 `(("gsl" ,gsl)
2721 ("lapack" ,lapack)
2722 ("zlib" ,zlib)))
2723 (build-system gnu-build-system)
2724 (arguments
2c9232ae 2725 `(#:make-flags
ce7e361f
EF
2726 '(,@(match (%current-system)
2727 ("x86_64-linux"
2728 '("FORCE_DYNAMIC=1"))
2729 ("i686-linux"
2730 '("FORCE_DYNAMIC=1" "FORCE_32BIT=1"))
2731 (_
2732 '("FORCE_DYNAMIC=1" "NO_INTEL_COMPAT=1"))))
2b18ad05
PP
2733 #:phases
2734 (modify-phases %standard-phases
2735 (delete 'configure)
2736 (add-before 'build 'bin-mkdir
07bf6929
EF
2737 (lambda _
2738 (mkdir-p "bin")
2739 #t))
2b18ad05 2740 (replace 'install
07bf6929
EF
2741 (lambda* (#:key outputs #:allow-other-keys)
2742 (let ((out (assoc-ref outputs "out")))
2743 (install-file "bin/gemma"
2744 (string-append
2745 out "/bin")))
2746 #t)))
2b18ad05
PP
2747 #:tests? #f)) ; no tests included yet
2748 (home-page "https://github.com/xiangzhou/GEMMA")
2749 (synopsis "Tool for genome-wide efficient mixed model association")
2750 (description
2751 "Genome-wide Efficient Mixed Model Association (GEMMA) provides a
2752standard linear mixed model resolver with application in genome-wide
2753association studies (GWAS).")
2754 (license license:gpl3)))
2755
5854f685
RW
2756(define-public grit
2757 (package
2758 (name "grit")
2759 (version "2.0.2")
2760 (source (origin
2761 (method url-fetch)
2762 (uri (string-append
2763 "https://github.com/nboley/grit/archive/"
2764 version ".tar.gz"))
2765 (file-name (string-append name "-" version ".tar.gz"))
2766 (sha256
2767 (base32
2768 "157in84dj70wimbind3x7sy1whs3h57qfgcnj2s6lrd38fbrb7mj"))))
2769 (build-system python-build-system)
2770 (arguments
2771 `(#:python ,python-2
2772 #:phases
dc1d3cde
KK
2773 (modify-phases %standard-phases
2774 (add-after 'unpack 'generate-from-cython-sources
2775 (lambda* (#:key inputs outputs #:allow-other-keys)
2776 ;; Delete these C files to force fresh generation from pyx sources.
2777 (delete-file "grit/sparsify_support_fns.c")
2778 (delete-file "grit/call_peaks_support_fns.c")
2779 (substitute* "setup.py"
2780 (("Cython.Setup") "Cython.Build")
2781 ;; Add numpy include path to fix compilation
2782 (("pyx\", \\]")
2783 (string-append "pyx\", ], include_dirs = ['"
2784 (assoc-ref inputs "python-numpy")
2785 "/lib/python2.7/site-packages/numpy/core/include/"
2786 "']")))
2787 #t)))))
5854f685
RW
2788 (inputs
2789 `(("python-scipy" ,python2-scipy)
2790 ("python-numpy" ,python2-numpy)
2791 ("python-pysam" ,python2-pysam)
2792 ("python-networkx" ,python2-networkx)))
2793 (native-inputs
f3b98f4f 2794 `(("python-cython" ,python2-cython)))
5854f685
RW
2795 (home-page "http://grit-bio.org")
2796 (synopsis "Tool for integrative analysis of RNA-seq type assays")
2797 (description
2798 "GRIT is designed to use RNA-seq, TES, and TSS data to build and quantify
2799full length transcript models. When none of these data sources are available,
2800GRIT can be run by providing a candidate set of TES or TSS sites. In
2801addition, GRIT can merge in reference junctions and gene boundaries. GRIT can
2802also be run in quantification mode, where it uses a provided GTF file and just
2803estimates transcript expression.")
2804 (license license:gpl3+)))
2805
346a829a
RW
2806(define-public hisat
2807 (package
2808 (name "hisat")
2809 (version "0.1.4")
2810 (source (origin
2811 (method url-fetch)
2812 (uri (string-append
2813 "http://ccb.jhu.edu/software/hisat/downloads/hisat-"
2814 version "-beta-source.zip"))
2815 (sha256
2816 (base32
2817 "1k381ydranqxp09yf2y7w1d0chz5d59vb6jchi89hbb0prq19lk5"))))
2818 (build-system gnu-build-system)
2819 (arguments
e58d01fa
RW
2820 `(#:tests? #f ;no check target
2821 #:make-flags '("allall"
2822 ;; Disable unsupported `popcnt' instructions on
2823 ;; architectures other than x86_64
2824 ,@(if (string-prefix? "x86_64"
2825 (or (%current-target-system)
2826 (%current-system)))
2827 '()
2828 '("POPCNT_CAPABILITY=0")))
346a829a 2829 #:phases
dc1d3cde
KK
2830 (modify-phases %standard-phases
2831 (add-after 'unpack 'patch-sources
2832 (lambda _
2833 ;; XXX Cannot use snippet because zip files are not supported
2834 (substitute* "Makefile"
2835 (("^CC = .*$") "CC = gcc")
2836 (("^CPP = .*$") "CPP = g++")
2837 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
2838 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
2839 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
2840 (substitute* '("hisat-build" "hisat-inspect")
2841 (("/usr/bin/env") (which "env")))
2842 #t))
2843 (replace 'install
2844 (lambda* (#:key outputs #:allow-other-keys)
2845 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
2846 (for-each (lambda (file)
2847 (install-file file bin))
2848 (find-files
2849 "."
2850 "hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$")))
2851 #t))
2852 (delete 'configure))))
346a829a
RW
2853 (native-inputs
2854 `(("unzip" ,unzip)))
2855 (inputs
2856 `(("perl" ,perl)
2857 ("python" ,python)
2858 ("zlib" ,zlib)))
60af3d82
RW
2859 ;; Non-portable SSE instructions are used so building fails on platforms
2860 ;; other than x86_64.
2861 (supported-systems '("x86_64-linux"))
346a829a
RW
2862 (home-page "http://ccb.jhu.edu/software/hisat/index.shtml")
2863 (synopsis "Hierarchical indexing for spliced alignment of transcripts")
2864 (description
2865 "HISAT is a fast and sensitive spliced alignment program for mapping
2866RNA-seq reads. In addition to one global FM index that represents a whole
2867genome, HISAT uses a large set of small FM indexes that collectively cover the
2868whole genome. These small indexes (called local indexes) combined with
2869several alignment strategies enable effective alignment of RNA-seq reads, in
2870particular, reads spanning multiple exons.")
2871 (license license:gpl3+)))
2872
e84efc50
RW
2873(define-public hisat2
2874 (package
2875 (name "hisat2")
2876 (version "2.0.5")
2877 (source
2878 (origin
2879 (method url-fetch)
2880 ;; FIXME: a better source URL is
2881 ;; (string-append "ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2"
2882 ;; "/downloads/hisat2-" version "-source.zip")
2883 ;; with hash "0lywnr8kijwsc2aw10dwxic0n0yvip6fl3rjlvc8zzwahamy4x7g"
2884 ;; but it is currently unavailable.
2885 (uri "https://github.com/infphilo/hisat2/archive/cba6e8cb.tar.gz")
2886 (file-name (string-append name "-" version ".tar.gz"))
2887 (sha256
2888 (base32
2889 "1mf2hdsyv7cd97xm9mp9a4qws02yrj95y6w6f6cdwnq0klp81r50"))))
2890 (build-system gnu-build-system)
2891 (arguments
2892 `(#:tests? #f ; no check target
2893 #:make-flags (list "CC=gcc" "CXX=g++" "allall")
2894 #:modules ((guix build gnu-build-system)
2895 (guix build utils)
2896 (srfi srfi-26))
2897 #:phases
2898 (modify-phases %standard-phases
2899 (add-after 'unpack 'make-deterministic
2900 (lambda _
2901 (substitute* "Makefile"
2902 (("`date`") "0"))
2903 #t))
2904 (delete 'configure)
2905 (replace 'install
2906 (lambda* (#:key outputs #:allow-other-keys)
2907 (let* ((out (assoc-ref outputs "out"))
2908 (bin (string-append out "/bin/"))
2909 (doc (string-append out "/share/doc/hisat2/")))
2910 (for-each
2911 (cut install-file <> bin)
2912 (find-files "."
2913 "hisat2(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))
2914 (mkdir-p doc)
2915 (install-file "doc/manual.inc.html" doc))
2916 #t)))))
2917 (native-inputs
2918 `(("unzip" ,unzip) ; needed for archive from ftp
2919 ("perl" ,perl)
2920 ("pandoc" ,ghc-pandoc))) ; for documentation
2921 (home-page "http://ccb.jhu.edu/software/hisat2/index.shtml")
2922 (synopsis "Graph-based alignment of genomic sequencing reads")
2923 (description "HISAT2 is a fast and sensitive alignment program for mapping
2924next-generation sequencing reads (both DNA and RNA) to a population of human
2925genomes (as well as to a single reference genome). In addition to using one
2926global @dfn{graph FM} (GFM) index that represents a population of human
2927genomes, HISAT2 uses a large set of small GFM indexes that collectively cover
2928the whole genome. These small indexes, combined with several alignment
2929strategies, enable rapid and accurate alignment of sequencing reads. This new
2930indexing scheme is called a @dfn{Hierarchical Graph FM index} (HGFM).")
2931 ;; HISAT2 contains files from Bowtie2, which is released under
2932 ;; GPLv2 or later. The HISAT2 source files are released under
2933 ;; GPLv3 or later.
2934 (license license:gpl3+)))
2935
c684629f
BW
2936(define-public hmmer
2937 (package
2938 (name "hmmer")
2939 (version "3.1b2")
79f09fa2
BW
2940 (source
2941 (origin
2942 (method url-fetch)
2943 (uri (string-append
2944 "http://eddylab.org/software/hmmer"
9cf5f134 2945 (version-major version) "/"
79f09fa2
BW
2946 version "/hmmer-" version ".tar.gz"))
2947 (sha256
2948 (base32
2949 "0djmgc0pfli0jilfx8hql1axhwhqxqb8rxg2r5rg07aw73sfs5nx"))
2950 (patches (search-patches "hmmer-remove-cpu-specificity.patch"))))
c684629f 2951 (build-system gnu-build-system)
b3546174 2952 (native-inputs `(("perl" ,perl)))
a83e6046 2953 (home-page "http://hmmer.org/")
c684629f
BW
2954 (synopsis "Biosequence analysis using profile hidden Markov models")
2955 (description
2956 "HMMER is used for searching sequence databases for homologs of protein
2957sequences, and for making protein sequence alignments. It implements methods
2958using probabilistic models called profile hidden Markov models (profile
2959HMMs).")
2960 (license (list license:gpl3+
2961 ;; The bundled library 'easel' is distributed
2962 ;; under The Janelia Farm Software License.
2963 (license:non-copyleft
2964 "file://easel/LICENSE"
2965 "See easel/LICENSE in the distribution.")))))
2966
85652f59
RW
2967(define-public htseq
2968 (package
2969 (name "htseq")
92971d68 2970 (version "0.9.1")
85652f59
RW
2971 (source (origin
2972 (method url-fetch)
75e6639f 2973 (uri (pypi-uri "HTSeq" version))
85652f59
RW
2974 (sha256
2975 (base32
92971d68 2976 "11flgb1381xdhk43bzbfm3vhnszkpqg6jk76rpa5xd1zbrvvlnxg"))))
85652f59 2977 (build-system python-build-system)
92971d68
BW
2978 (native-inputs
2979 `(("python-cython" ,python-cython)))
0536727e
RW
2980 ;; Numpy needs to be propagated when htseq is used as a Python library.
2981 (propagated-inputs
92971d68 2982 `(("python-numpy" ,python-numpy)))
578b05d9 2983 (inputs
92971d68
BW
2984 `(("python-pysam" ,python-pysam)
2985 ("python-matplotlib" ,python-matplotlib)))
85652f59
RW
2986 (home-page "http://www-huber.embl.de/users/anders/HTSeq/")
2987 (synopsis "Analysing high-throughput sequencing data with Python")
2988 (description
2989 "HTSeq is a Python package that provides infrastructure to process data
2990from high-throughput sequencing assays.")
2991 (license license:gpl3+)))
2992
92971d68
BW
2993(define-public python2-htseq
2994 (package-with-python2 htseq))
2995
1ad15c16 2996(define-public java-htsjdk
15a3c3d4 2997 (package
1ad15c16 2998 (name "java-htsjdk")
bd94b6f8 2999 (version "2.3.0") ; last version without build dependency on gradle
15a3c3d4
RW
3000 (source (origin
3001 (method url-fetch)
3002 (uri (string-append
3003 "https://github.com/samtools/htsjdk/archive/"
3004 version ".tar.gz"))
3005 (file-name (string-append name "-" version ".tar.gz"))
3006 (sha256
3007 (base32
bd94b6f8 3008 "1ibhzzxsfc38nqyk9r8zqj6blfc1kh26iirypd4q6n90hs2m6nyq"))
15a3c3d4 3009 (modules '((guix build utils)))
bd94b6f8
RW
3010 (snippet
3011 ;; Delete pre-built binaries
3012 '(begin
3013 (delete-file-recursively "lib")
3014 (mkdir-p "lib")
3015 #t))))
10b4a969 3016 (build-system ant-build-system)
15a3c3d4 3017 (arguments
10b4a969 3018 `(#:tests? #f ; test require Internet access
bd94b6f8 3019 #:jdk ,icedtea-8
10b4a969
RW
3020 #:make-flags
3021 (list (string-append "-Ddist=" (assoc-ref %outputs "out")
3022 "/share/java/htsjdk/"))
3023 #:build-target "all"
3024 #:phases
3025 (modify-phases %standard-phases
3026 ;; The build phase also installs the jars
3027 (delete 'install))))
bd94b6f8
RW
3028 (inputs
3029 `(("java-ngs" ,java-ngs)
3030 ("java-snappy-1" ,java-snappy-1)
3031 ("java-commons-compress" ,java-commons-compress)
3032 ("java-commons-logging-minimal" ,java-commons-logging-minimal)
3033 ("java-commons-jexl-2" ,java-commons-jexl-2)
3034 ("java-xz" ,java-xz)))
3035 (native-inputs
3036 `(("java-testng" ,java-testng)))
15a3c3d4
RW
3037 (home-page "http://samtools.github.io/htsjdk/")
3038 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
3039 (description
3040 "HTSJDK is an implementation of a unified Java library for accessing
3041common file formats, such as SAM and VCF, used for high-throughput
3042sequencing (HTS) data. There are also an number of useful utilities for
3043manipulating HTS data.")
3044 (license license:expat)))
3045
9a599c17
RW
3046(define-public java-htsjdk-latest
3047 (package
3048 (name "java-htsjdk")
3049 (version "2.14.3")
3050 (source (origin
3051 (method git-fetch)
3052 (uri (git-reference
3053 (url "https://github.com/samtools/htsjdk.git")
3054 (commit version)))
3055 (file-name (string-append name "-" version "-checkout"))
3056 (sha256
3057 (base32
3058 "1lmya1fdjy03mz6zmdmd86j9v9vfhqb3952mqq075navx1i6g4bc"))))
3059 (build-system ant-build-system)
3060 (arguments
3061 `(#:tests? #f ; test require Scala
3062 #:jdk ,icedtea-8
3063 #:jar-name "htsjdk.jar"
3064 #:phases
3065 (modify-phases %standard-phases
3066 (add-after 'unpack 'remove-useless-build.xml
3067 (lambda _ (delete-file "build.xml") #t))
3068 ;; The tests require the scalatest package.
3069 (add-after 'unpack 'remove-tests
3070 (lambda _ (delete-file-recursively "src/test") #t)))))
3071 (inputs
3072 `(("java-ngs" ,java-ngs)
3073 ("java-snappy-1" ,java-snappy-1)
3074 ("java-commons-compress" ,java-commons-compress)
3075 ("java-commons-logging-minimal" ,java-commons-logging-minimal)
3076 ("java-commons-jexl-2" ,java-commons-jexl-2)
3077 ("java-xz" ,java-xz)))
3078 (native-inputs
3079 `(("java-junit" ,java-junit)))
3080 (home-page "http://samtools.github.io/htsjdk/")
15a3c3d4
RW
3081 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
3082 (description
3083 "HTSJDK is an implementation of a unified Java library for accessing
3084common file formats, such as SAM and VCF, used for high-throughput
3085sequencing (HTS) data. There are also an number of useful utilities for
3086manipulating HTS data.")
3087 (license license:expat)))
3088
719fa958
RW
3089;; This is needed for picard 2.10.3
3090(define-public java-htsjdk-2.10.1
3091 (package (inherit java-htsjdk-latest)
3092 (name "java-htsjdk")
3093 (version "2.10.1")
3094 (source (origin
3095 (method git-fetch)
3096 (uri (git-reference
3097 (url "https://github.com/samtools/htsjdk.git")
3098 (commit version)))
3099 (file-name (string-append name "-" version "-checkout"))
3100 (sha256
3101 (base32
3102 "1kxh7slm2pm3x9p6jxa1wqsq9a31dhiiflhxnxqcisan4k3rwia2"))))
3103 (build-system ant-build-system)
3104 (arguments
3105 `(#:tests? #f ; tests require Scala
3106 #:jdk ,icedtea-8
3107 #:jar-name "htsjdk.jar"
3108 #:phases
3109 (modify-phases %standard-phases
3110 (add-after 'unpack 'remove-useless-build.xml
3111 (lambda _ (delete-file "build.xml") #t))
3112 ;; The tests require the scalatest package.
3113 (add-after 'unpack 'remove-tests
3114 (lambda _ (delete-file-recursively "src/test") #t)))))))
3115
d7fed31a
RW
3116;; This version matches java-htsjdk 2.3.0. Later versions also require a more
3117;; recent version of java-htsjdk, which depends on gradle.
3118(define-public java-picard
3119 (package
3120 (name "java-picard")
3121 (version "2.3.0")
3122 (source (origin
3123 (method git-fetch)
3124 (uri (git-reference
3125 (url "https://github.com/broadinstitute/picard.git")
3126 (commit version)))
3127 (file-name (string-append "java-picard-" version "-checkout"))
3128 (sha256
3129 (base32
3130 "1ll7mf4r3by92w2nhlmpa591xd1f46xlkwh59mq6fvbb5pdwzvx6"))
3131 (modules '((guix build utils)))
3132 (snippet
3133 '(begin
3134 ;; Delete pre-built binaries.
3135 (delete-file-recursively "lib")
3136 (mkdir-p "lib")
3137 (substitute* "build.xml"
3138 ;; Remove build-time dependency on git.
3139 (("failifexecutionfails=\"true\"")
3140 "failifexecutionfails=\"false\"")
3141 ;; Use our htsjdk.
3142 (("depends=\"compile-htsjdk, ")
3143 "depends=\"")
3144 (("depends=\"compile-htsjdk-tests, ")
3145 "depends=\"")
3146 ;; Build picard-lib.jar before building picard.jar
3147 (("name=\"picard-jar\" depends=\"" line)
3148 (string-append line "picard-lib-jar, ")))
3149 #t))))
3150 (build-system ant-build-system)
3151 (arguments
3152 `(#:build-target "picard-jar"
3153 #:test-target "test"
3154 ;; Tests require jacoco:coverage.
3155 #:tests? #f
3156 #:make-flags
3157 (list (string-append "-Dhtsjdk_lib_dir="
3158 (assoc-ref %build-inputs "java-htsjdk")
3159 "/share/java/htsjdk/")
3160 "-Dhtsjdk-classes=dist/tmp"
3161 (string-append "-Dhtsjdk-version="
3162 ,(package-version java-htsjdk)))
3163 #:jdk ,icedtea-8
3164 #:phases
3165 (modify-phases %standard-phases
1f94ba94
RW
3166 ;; FIXME: this phase fails with "duplicate entry: htsjdk/samtools/AbstractBAMFileIndex$1.class"
3167 (delete 'generate-jar-indices)
d7fed31a
RW
3168 (add-after 'unpack 'use-our-htsjdk
3169 (lambda* (#:key inputs #:allow-other-keys)
3170 (substitute* "build.xml"
3171 (("\\$\\{htsjdk\\}/lib")
3172 (string-append (assoc-ref inputs "java-htsjdk")
3173 "/share/java/htsjdk/")))
3174 #t))
3175 (add-after 'unpack 'make-test-target-independent
3176 (lambda* (#:key inputs #:allow-other-keys)
3177 (substitute* "build.xml"
3178 (("name=\"test\" depends=\"compile, ")
3179 "name=\"test\" depends=\""))
3180 #t))
3181 (replace 'install (install-jars "dist")))))
3182 (inputs
3183 `(("java-htsjdk" ,java-htsjdk)
3184 ("java-guava" ,java-guava)))
3185 (native-inputs
3186 `(("java-testng" ,java-testng)))
3187 (home-page "http://broadinstitute.github.io/picard/")
3188 (synopsis "Tools for manipulating high-throughput sequencing data and formats")
3189 (description "Picard is a set of Java command line tools for manipulating
3190high-throughput sequencing (HTS) data and formats. Picard is implemented
3191using the HTSJDK Java library to support accessing file formats that are
3192commonly used for high-throughput sequencing data such as SAM, BAM, CRAM and
3193VCF.")
3194 (license license:expat)))
3195
72299db9
RW
3196;; This is needed for dropseq-tools
3197(define-public java-picard-2.10.3
3198 (package
3199 (name "java-picard")
3200 (version "2.10.3")
3201 (source (origin
3202 (method git-fetch)
3203 (uri (git-reference
3204 (url "https://github.com/broadinstitute/picard.git")
3205 (commit version)))
3206 (file-name (string-append "java-picard-" version "-checkout"))
3207 (sha256
3208 (base32
3209 "1ajlx31l6i1k3y2rhnmgq07sz99g2czqfqgkr9mihmdjp3gwjhvi"))))
3210 (build-system ant-build-system)
3211 (arguments
3212 `(#:jar-name "picard.jar"
3213 ;; Tests require jacoco:coverage.
3214 #:tests? #f
3215 #:jdk ,icedtea-8
3216 #:main-class "picard.cmdline.PicardCommandLine"
3217 #:modules ((guix build ant-build-system)
3218 (guix build utils)
3219 (guix build java-utils)
3220 (sxml simple)
3221 (sxml transform)
3222 (sxml xpath))
3223 #:phases
3224 (modify-phases %standard-phases
e847f402
RW
3225 ;; FIXME: this phase fails with "duplicate entry: htsjdk/samtools/AbstractBAMFileIndex$1.class"
3226 (delete 'generate-jar-indices)
72299db9
RW
3227 (add-after 'unpack 'remove-useless-build.xml
3228 (lambda _ (delete-file "build.xml") #t))
3229 ;; This is necessary to ensure that htsjdk is found when using
3230 ;; picard.jar as an executable.
3231 (add-before 'build 'edit-classpath-in-manifest
3232 (lambda* (#:key inputs #:allow-other-keys)
3233 (chmod "build.xml" #o664)
3234 (call-with-output-file "build.xml.new"
3235 (lambda (port)
3236 (sxml->xml
3237 (pre-post-order
3238 (with-input-from-file "build.xml"
3239 (lambda _ (xml->sxml #:trim-whitespace? #t)))
3240 `((target . ,(lambda (tag . kids)
3241 (let ((name ((sxpath '(name *text*))
3242 (car kids)))
3243 ;; FIXME: We're breaking the line
3244 ;; early with a dummy path to
3245 ;; ensure that the store reference
3246 ;; isn't broken apart and can still
3247 ;; be found by the reference
3248 ;; scanner.
3249 (msg (format #f
3250 "\
3251Class-Path: /~a \
d60772dc 3252 ~a/share/java/htsjdk.jar${line.separator}${line.separator}"
72299db9
RW
3253 ;; maximum line length is 70
3254 (string-tabulate (const #\b) 57)
3255 (assoc-ref inputs "java-htsjdk"))))
3256 (if (member "manifest" name)
3257 `(,tag ,@kids
d60772dc
GB
3258 (replaceregexp
3259 (@ (file "${manifest.file}")
3260 (match "\\r\\n\\r\\n")
3261 (replace "${line.separator}")))
72299db9
RW
3262 (echo
3263 (@ (message ,msg)
3264 (file "${manifest.file}")
3265 (append "true"))))
3266 `(,tag ,@kids)))))
3267 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
3268 (*text* . ,(lambda (_ txt) txt))))
3269 port)))
3270 (rename-file "build.xml.new" "build.xml")
3271 #t)))))
3272 (propagated-inputs
3273 `(("java-htsjdk" ,java-htsjdk-2.10.1)))
3274 (native-inputs
3275 `(("java-testng" ,java-testng)
3276 ("java-guava" ,java-guava)))
3277 (home-page "http://broadinstitute.github.io/picard/")
3278 (synopsis "Tools for manipulating high-throughput sequencing data and formats")
3279 (description "Picard is a set of Java command line tools for manipulating
3280high-throughput sequencing (HTS) data and formats. Picard is implemented
3281using the HTSJDK Java library to support accessing file formats that are
3282commonly used for high-throughput sequencing data such as SAM, BAM, CRAM and
3283VCF.")
3284 (license license:expat)))
3285
d7f24778
RW
3286;; This is the last version of Picard to provide net.sf.samtools
3287(define-public java-picard-1.113
3288 (package (inherit java-picard)
3289 (name "java-picard")
3290 (version "1.113")
3291 (source (origin
3292 (method git-fetch)
3293 (uri (git-reference
3294 (url "https://github.com/broadinstitute/picard.git")
3295 (commit version)))
3296 (file-name (string-append "java-picard-" version "-checkout"))
3297 (sha256
3298 (base32
3299 "0lkpvin2fz3hhly4l02kk56fqy8lmlgyzr9kmvljk6ry6l1hw973"))
3300 (modules '((guix build utils)))
3301 (snippet
3302 '(begin
3303 ;; Delete pre-built binaries.
3304 (delete-file-recursively "lib")
3305 (mkdir-p "lib")
3306 #t))))
3307 (build-system ant-build-system)
3308 (arguments
3309 `(#:build-target "picard-jar"
3310 #:test-target "test"
3311 ;; FIXME: the class path at test time is wrong.
3312 ;; [testng] Error: A JNI error has occurred, please check your installation and try again
3313 ;; [testng] Exception in thread "main" java.lang.NoClassDefFoundError: com/beust/jcommander/ParameterException
3314 #:tests? #f
3315 #:jdk ,icedtea-8
3316 ;; This is only used for tests.
3317 #:make-flags
3318 (list "-Dsamjdk.intel_deflater_so_path=lib/jni/libIntelDeflater.so")
3319 #:phases
3320 (modify-phases %standard-phases
a993ad83
RW
3321 ;; FIXME: This phase fails.
3322 (delete 'generate-jar-indices)
d7f24778
RW
3323 ;; Do not use bundled ant bzip2.
3324 (add-after 'unpack 'use-ant-bzip
3325 (lambda* (#:key inputs #:allow-other-keys)
3326 (substitute* "build.xml"
3327 (("\\$\\{lib\\}/apache-ant-1.8.2-bzip2.jar")
3328 (string-append (assoc-ref inputs "ant")
3329 "/lib/ant.jar")))
3330 #t))
3331 (add-after 'unpack 'make-test-target-independent
3332 (lambda* (#:key inputs #:allow-other-keys)
3333 (substitute* "build.xml"
3334 (("name=\"test\" depends=\"compile, ")
3335 "name=\"test\" depends=\"compile-tests, ")
3336 (("name=\"compile\" depends=\"compile-src, compile-tests\"")
3337 "name=\"compile\" depends=\"compile-src\""))
3338 #t))
3339 (add-after 'unpack 'fix-deflater-path
3340 (lambda* (#:key outputs #:allow-other-keys)
3341 (substitute* "src/java/net/sf/samtools/Defaults.java"
3342 (("getStringProperty\\(\"intel_deflater_so_path\", null\\)")
3343 (string-append "getStringProperty(\"intel_deflater_so_path\", \""
3344 (assoc-ref outputs "out")
3345 "/lib/jni/libIntelDeflater.so"
3346 "\")")))
3347 #t))
3348 ;; Build the deflater library, because we've previously deleted the
3349 ;; pre-built one. This can only be built with access to the JDK
3350 ;; sources.
3351 (add-after 'build 'build-jni
3352 (lambda* (#:key inputs #:allow-other-keys)
3353 (mkdir-p "lib/jni")
3354 (mkdir-p "jdk-src")
3355 (and (zero? (system* "tar" "--strip-components=1" "-C" "jdk-src"
3356 "-xf" (assoc-ref inputs "jdk-src")))
3357 (zero? (system* "javah" "-jni"
3358 "-classpath" "classes"
3359 "-d" "lib/"
3360 "net.sf.samtools.util.zip.IntelDeflater"))
3361 (with-directory-excursion "src/c/inteldeflater"
3362 (zero? (system* "gcc" "-I../../../lib" "-I."
3363 (string-append "-I" (assoc-ref inputs "jdk")
3364 "/include/linux")
3365 "-I../../../jdk-src/src/share/native/common/"
3366 "-I../../../jdk-src/src/solaris/native/common/"
3367 "-c" "-O3" "-fPIC" "IntelDeflater.c"))
3368 (zero? (system* "gcc" "-shared"
3369 "-o" "../../../lib/jni/libIntelDeflater.so"
3370 "IntelDeflater.o" "-lz" "-lstdc++"))))))
3371 ;; We can only build everything else after building the JNI library.
3372 (add-after 'build-jni 'build-rest
3373 (lambda* (#:key make-flags #:allow-other-keys)
3374 (zero? (apply system* `("ant" "all" ,@make-flags)))))
3375 (add-before 'build 'set-JAVA6_HOME
3376 (lambda _
3377 (setenv "JAVA6_HOME" (getenv "JAVA_HOME"))
3378 #t))
3379 (replace 'install (install-jars "dist"))
3380 (add-after 'install 'install-jni-lib
3381 (lambda* (#:key outputs #:allow-other-keys)
3382 (let ((jni (string-append (assoc-ref outputs "out")
3383 "/lib/jni")))
3384 (mkdir-p jni)
3385 (install-file "lib/jni/libIntelDeflater.so" jni)
3386 #t))))))
3387 (inputs
3388 `(("java-snappy-1" ,java-snappy-1)
3389 ("java-commons-jexl-2" ,java-commons-jexl-2)
3390 ("java-cofoja" ,java-cofoja)
3391 ("ant" ,ant) ; for bzip2 support at runtime
3392 ("zlib" ,zlib)))
3393 (native-inputs
3394 `(("ant-apache-bcel" ,ant-apache-bcel)
3395 ("ant-junit" ,ant-junit)
3396 ("java-testng" ,java-testng)
3397 ("java-commons-bcel" ,java-commons-bcel)
3398 ("java-jcommander" ,java-jcommander)
3399 ("jdk" ,icedtea-8 "jdk")
3400 ("jdk-src" ,(car (assoc-ref (package-native-inputs icedtea-8) "jdk-drop")))))))
3401
bd975831
RW
3402(define-public fastqc
3403 (package
3404 (name "fastqc")
3405 (version "0.11.5")
3406 (source
3407 (origin
3408 (method url-fetch)
3409 (uri (string-append "http://www.bioinformatics.babraham.ac.uk/"
3410 "projects/fastqc/fastqc_v"
3411 version "_source.zip"))
3412 (sha256
3413 (base32
3414 "18rrlkhcrxvvvlapch4dpj6xc6mpayzys8qfppybi8jrpgx5cc5f"))))
3415 (build-system ant-build-system)
3416 (arguments
3417 `(#:tests? #f ; there are no tests
3418 #:build-target "build"
3419 #:phases
3420 (modify-phases %standard-phases
3421 (add-after 'unpack 'fix-dependencies
3422 (lambda* (#:key inputs #:allow-other-keys)
3423 (substitute* "build.xml"
3424 (("jbzip2-0.9.jar")
3425 (string-append (assoc-ref inputs "java-jbzip2")
3426 "/share/java/jbzip2.jar"))
3427 (("sam-1.103.jar")
3428 (string-append (assoc-ref inputs "java-picard-1.113")
3429 "/share/java/sam-1.112.jar"))
3430 (("cisd-jhdf5.jar")
3431 (string-append (assoc-ref inputs "java-cisd-jhdf5")
3432 "/share/java/sis-jhdf5.jar")))
3433 #t))
3434 ;; There is no installation target
3435 (replace 'install
3436 (lambda* (#:key inputs outputs #:allow-other-keys)
3437 (let* ((out (assoc-ref outputs "out"))
3438 (bin (string-append out "/bin"))
3439 (share (string-append out "/share/fastqc/"))
3440 (exe (string-append share "/fastqc")))
3441 (for-each mkdir-p (list bin share))
3442 (copy-recursively "bin" share)
3443 (substitute* exe
3444 (("my \\$java_bin = 'java';")
3445 (string-append "my $java_bin = '"
3446 (assoc-ref inputs "java")
3447 "/bin/java';")))
3448 (chmod exe #o555)
3449 (symlink exe (string-append bin "/fastqc"))
3450 #t))))))
3451 (inputs
3452 `(("java" ,icedtea)
3453 ("perl" ,perl) ; needed for the wrapper script
3454 ("java-cisd-jhdf5" ,java-cisd-jhdf5)
3455 ("java-picard-1.113" ,java-picard-1.113)
3456 ("java-jbzip2" ,java-jbzip2)))
3457 (native-inputs
3458 `(("unzip" ,unzip)))
3459 (home-page "http://www.bioinformatics.babraham.ac.uk/projects/fastqc/")
3460 (synopsis "Quality control tool for high throughput sequence data")
3461 (description
3462 "FastQC aims to provide a simple way to do some quality control
3463checks on raw sequence data coming from high throughput sequencing
3464pipelines. It provides a modular set of analyses which you can use to
3465give a quick impression of whether your data has any problems of which
3466you should be aware before doing any further analysis.
3467
3468The main functions of FastQC are:
3469
3470@itemize
3471@item Import of data from BAM, SAM or FastQ files (any variant);
3472@item Providing a quick overview to tell you in which areas there may
3473 be problems;
3474@item Summary graphs and tables to quickly assess your data;
3475@item Export of results to an HTML based permanent report;
3476@item Offline operation to allow automated generation of reports
3477 without running the interactive application.
3478@end itemize\n")
3479 (license license:gpl3+)))
3480
a1b80245
RW
3481(define-public fastp
3482 (package
3483 (name "fastp")
3484 (version "0.14.1")
3485 (source
3486 (origin
3487 (method git-fetch)
3488 (uri (git-reference
3489 (url "https://github.com/OpenGene/fastp.git")
3490 (commit (string-append "v" version))))
3491 (file-name (git-file-name name version))
3492 (sha256
3493 (base32
3494 "1r6ms5zbf5rps4rgp4z73nczadl00b5rqylw8f684isfz27dp0xh"))))
3495 (build-system gnu-build-system)
3496 (arguments
3497 `(#:tests? #f ; there are none
3498 #:make-flags
3499 (list (string-append "BINDIR=" (assoc-ref %outputs "out") "/bin"))
3500 #:phases
3501 (modify-phases %standard-phases
3502 (delete 'configure)
3503 (add-before 'install 'create-target-dir
3504 (lambda* (#:key outputs #:allow-other-keys)
3505 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
3506 #t)))))
3507 (inputs
3508 `(("zlib" ,zlib)))
3509 (home-page "https://github.com/OpenGene/fastp/")
3510 (synopsis "All-in-one FastQ preprocessor")
3511 (description
3512 "Fastp is a tool designed to provide fast all-in-one preprocessing for
3513FastQ files. This tool has multi-threading support to afford high
3514performance.")
3515 (license license:expat)))
3516
e7c09730
RW
3517(define-public htslib
3518 (package
3519 (name "htslib")
8df0267a 3520 (version "1.8")
e7c09730
RW
3521 (source (origin
3522 (method url-fetch)
3523 (uri (string-append
3524 "https://github.com/samtools/htslib/releases/download/"
3525 version "/htslib-" version ".tar.bz2"))
3526 (sha256
3527 (base32
8df0267a 3528 "18bw0mn9pj5wgarnlaxmf1bb8pdqgl1zd6czirqcr62ajpn1xvy0"))))
e7c09730 3529 (build-system gnu-build-system)
e7c09730 3530 (inputs
a9e4a1e6
BW
3531 `(("openssl" ,openssl)
3532 ("curl" ,curl)
3533 ("zlib" ,zlib)))
e7c09730
RW
3534 (native-inputs
3535 `(("perl" ,perl)))
3536 (home-page "http://www.htslib.org")
3537 (synopsis "C library for reading/writing high-throughput sequencing data")
3538 (description
3539 "HTSlib is a C library for reading/writing high-throughput sequencing
8057dee1
TGR
3540data. It also provides the @command{bgzip}, @command{htsfile}, and
3541@command{tabix} utilities.")
e7c09730
RW
3542 ;; Files under cram/ are released under the modified BSD license;
3543 ;; the rest is released under the Expat license
3544 (license (list license:expat license:bsd-3))))
3545
bca2c576
BW
3546;; This package should be removed once no packages rely upon it.
3547(define htslib-1.3
3548 (package
3549 (inherit htslib)
3550 (version "1.3.1")
3551 (source (origin
3552 (method url-fetch)
3553 (uri (string-append
3554 "https://github.com/samtools/htslib/releases/download/"
3555 version "/htslib-" version ".tar.bz2"))
3556 (sha256
3557 (base32
3558 "1rja282fwdc25ql6izkhdyh8ppw8x2fs0w0js78zgkmqjlikmma9"))))))
3559
c4325f62
RW
3560(define-public idr
3561 (package
3562 (name "idr")
aa33cc29 3563 (version "2.0.3")
c4325f62
RW
3564 (source (origin
3565 (method url-fetch)
3566 (uri (string-append
3567 "https://github.com/nboley/idr/archive/"
3568 version ".tar.gz"))
3569 (file-name (string-append name "-" version ".tar.gz"))
3570 (sha256
3571 (base32
aa33cc29
RW
3572 "1rjdly6daslw66r43g9md8znizlscn1sphycqyldzsidkc4vxqv3"))
3573 ;; Delete generated C code.
3574 (snippet
3575 '(begin (delete-file "idr/inv_cdf.c") #t))))
c4325f62 3576 (build-system python-build-system)
aa33cc29
RW
3577 ;; There is only one test ("test_inv_cdf.py") and it tests features that
3578 ;; are no longer part of this package. It also asserts False, which
3579 ;; causes the tests to always fail.
3580 (arguments `(#:tests? #f))
b7a820fc 3581 (propagated-inputs
c4325f62 3582 `(("python-scipy" ,python-scipy)
b7a820fc 3583 ("python-sympy" ,python-sympy)
c4325f62
RW
3584 ("python-numpy" ,python-numpy)
3585 ("python-matplotlib" ,python-matplotlib)))
3586 (native-inputs
f3b98f4f 3587 `(("python-cython" ,python-cython)))
c4325f62
RW
3588 (home-page "https://github.com/nboley/idr")
3589 (synopsis "Tool to measure the irreproducible discovery rate (IDR)")
3590 (description
3591 "The IDR (Irreproducible Discovery Rate) framework is a unified approach
3592to measure the reproducibility of findings identified from replicate
3593experiments and provide highly stable thresholds based on reproducibility.")
40590caf 3594 (license license:gpl2+)))
c4325f62 3595
43c565d2
RW
3596(define-public jellyfish
3597 (package
3598 (name "jellyfish")
647465ac 3599 (version "2.2.10")
43c565d2
RW
3600 (source (origin
3601 (method url-fetch)
3602 (uri (string-append "https://github.com/gmarcais/Jellyfish/"
3603 "releases/download/v" version
3604 "/jellyfish-" version ".tar.gz"))
3605 (sha256
3606 (base32
647465ac 3607 "1k4pc3fvv6w1km2yph4m5sd78fbxp21d6xyzgmy0gjihzc6mb249"))))
43c565d2
RW
3608 (build-system gnu-build-system)
3609 (outputs '("out" ;for library
3610 "ruby" ;for Ruby bindings
3611 "python")) ;for Python bindings
3612 (arguments
3613 `(#:configure-flags
3614 (list (string-append "--enable-ruby-binding="
3615 (assoc-ref %outputs "ruby"))
3616 (string-append "--enable-python-binding="
3617 (assoc-ref %outputs "python")))
3618 #:phases
3619 (modify-phases %standard-phases
3620 (add-before 'check 'set-SHELL-variable
3621 (lambda _
3622 ;; generator_manager.hpp either uses /bin/sh or $SHELL
3623 ;; to run tests.
3624 (setenv "SHELL" (which "bash"))
3625 #t)))))
3626 (native-inputs
3627 `(("bc" ,bc)
3628 ("time" ,time)
3629 ("ruby" ,ruby)
117d8cc4
RW
3630 ("python" ,python-2)
3631 ("pkg-config" ,pkg-config)))
3632 (inputs
3633 `(("htslib" ,htslib)))
43c565d2
RW
3634 (synopsis "Tool for fast counting of k-mers in DNA")
3635 (description
3636 "Jellyfish is a tool for fast, memory-efficient counting of k-mers in
3637DNA. A k-mer is a substring of length k, and counting the occurrences of all
3638such substrings is a central step in many analyses of DNA sequence. Jellyfish
3639is a command-line program that reads FASTA and multi-FASTA files containing
3640DNA sequences. It outputs its k-mer counts in a binary format, which can be
3641translated into a human-readable text format using the @code{jellyfish dump}
3642command, or queried for specific k-mers with @code{jellyfish query}.")
3643 (home-page "http://www.genome.umd.edu/jellyfish.html")
647465ac
EF
3644 ;; JELLYFISH seems to be 64-bit only.
3645 (supported-systems '("x86_64-linux" "aarch64-linux" "mips64el-linux"))
43c565d2
RW
3646 ;; The combined work is published under the GPLv3 or later. Individual
3647 ;; files such as lib/jsoncpp.cpp are released under the Expat license.
3648 (license (list license:gpl3+ license:expat))))
3649
94ff3157
BW
3650(define-public khmer
3651 (package
3652 (name "khmer")
3653 (version "2.0")
3654 (source
3655 (origin
3656 (method url-fetch)
3657 (uri (pypi-uri "khmer" version))
3658 (sha256
3659 (base32
3660 "0wb05shqh77v00256qlm68vbbx3kl76fyzihszbz5nhanl4ni33a"))
3661 (patches (search-patches "khmer-use-libraries.patch"))))
3662 (build-system python-build-system)
3663 (arguments
3664 `(#:phases
3665 (modify-phases %standard-phases
3666 (add-after 'unpack 'set-paths
3667 (lambda* (#:key inputs outputs #:allow-other-keys)
3668 ;; Delete bundled libraries.
3669 (delete-file-recursively "third-party/zlib")
3670 (delete-file-recursively "third-party/bzip2")
3671 ;; Replace bundled seqan.
3672 (let* ((seqan-all "third-party/seqan")
3673 (seqan-include (string-append
3674 seqan-all "/core/include")))
3675 (delete-file-recursively seqan-all)
3676 (copy-recursively (string-append (assoc-ref inputs "seqan")
3677 "/include/seqan")
3678 (string-append seqan-include "/seqan")))
3679 ;; We do not replace the bundled MurmurHash as the canonical
3680 ;; repository for this code 'SMHasher' is unsuitable for
3681 ;; providing a library. See
3682 ;; https://lists.gnu.org/archive/html/guix-devel/2016-06/msg00977.html
3683 #t))
3684 (add-after 'unpack 'set-cc
3685 (lambda _
3686 (setenv "CC" "gcc")
3687 #t))
3688 ;; It is simpler to test after installation.
3689 (delete 'check)
3690 (add-after 'install 'post-install-check
3691 (lambda* (#:key inputs outputs #:allow-other-keys)
3692 (let ((out (assoc-ref outputs "out")))
3693 (setenv "PATH"
3694 (string-append
3695 (getenv "PATH")
3696 ":"
3697 (assoc-ref outputs "out")
3698 "/bin"))
3699 (setenv "PYTHONPATH"
3700 (string-append
3701 (getenv "PYTHONPATH")
3702 ":"
3703 out
3704 "/lib/python"
3705 (string-take (string-take-right
3706 (assoc-ref inputs "python") 5) 3)
3707 "/site-packages"))
3708 (with-directory-excursion "build"
3709 (zero? (system* "nosetests" "khmer" "--attr"
3710 "!known_failing")))))))))
3711 (native-inputs
3712 `(("seqan" ,seqan)
3713 ("python-nose" ,python-nose)))
3714 (inputs
3715 `(("zlib" ,zlib)
3716 ("bzip2" ,bzip2)
3717 ("python-screed" ,python-screed)
cf1d5814
BW
3718 ("python-bz2file" ,python-bz2file)
3719 ;; Tests fail when gcc-5 is used for compilation. Use gcc-4.9 at least
3720 ;; until the next version of khmer (likely 2.1) is released.
3721 ("gcc" ,gcc-4.9)))
94ff3157
BW
3722 (home-page "https://khmer.readthedocs.org/")
3723 (synopsis "K-mer counting, filtering and graph traversal library")
3724 (description "The khmer software is a set of command-line tools for
3725working with DNA shotgun sequencing data from genomes, transcriptomes,
3726metagenomes and single cells. Khmer can make de novo assemblies faster, and
3727sometimes better. Khmer can also identify and fix problems with shotgun
3728data.")
8157af2e
EF
3729 ;; When building on i686, armhf and mips64el, we get the following error:
3730 ;; error: ['khmer', 'khmer.tests', 'oxli'] require 64-bit operating system
3df57b3a 3731 (supported-systems '("x86_64-linux" "aarch64-linux"))
94ff3157
BW
3732 (license license:bsd-3)))
3733
b9a601d9
RJ
3734(define-public kaiju
3735 (package
3736 (name "kaiju")
53e42694 3737 (version "1.6.2")
b9a601d9
RJ
3738 (source (origin
3739 (method url-fetch)
3740 (uri (string-append
3741 "https://github.com/bioinformatics-centre/kaiju/archive/v"
3742 version ".tar.gz"))
3743 (file-name (string-append name "-" version ".tar.gz"))
3744 (sha256
3745 (base32
53e42694 3746 "1kdn4rxs0kr9ibmrgrfcci71aa6j6gr71dbc8pff7731rpab6kj7"))))
b9a601d9
RJ
3747 (build-system gnu-build-system)
3748 (arguments
3749 `(#:tests? #f ; There are no tests.
3750 #:phases
3751 (modify-phases %standard-phases
3752 (delete 'configure)
3753 (add-before 'build 'move-to-src-dir
3754 (lambda _ (chdir "src") #t))
3755 (replace 'install
3756 (lambda* (#:key inputs outputs #:allow-other-keys)
3757 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
3758 (mkdir-p bin)
3759 (chdir "..")
3760 (copy-recursively "bin" bin)
3761 (copy-recursively "util" bin))
3762 #t)))))
3763 (inputs
53e42694
RJ
3764 `(("perl" ,perl)
3765 ("zlib" ,zlib)))
b9a601d9
RJ
3766 (home-page "http://kaiju.binf.ku.dk/")
3767 (synopsis "Fast and sensitive taxonomic classification for metagenomics")
3768 (description "Kaiju is a program for sensitive taxonomic classification
3769of high-throughput sequencing reads from metagenomic whole genome sequencing
3770experiments.")
3771 (license license:gpl3+)))
3772
d57e6d0f
RW
3773(define-public macs
3774 (package
3775 (name "macs")
ffe8d214 3776 (version "2.1.1.20160309")
d57e6d0f
RW
3777 (source (origin
3778 (method url-fetch)
43ec07f1 3779 (uri (pypi-uri "MACS2" version))
d57e6d0f
RW
3780 (sha256
3781 (base32
ffe8d214 3782 "09ixspd1vcqmz1c81ih70xs4m7qml2iy5vyx1y74zww3iy1vl210"))))
d57e6d0f
RW
3783 (build-system python-build-system)
3784 (arguments
3785 `(#:python ,python-2 ; only compatible with Python 2.7
3786 #:tests? #f)) ; no test target
3787 (inputs
3788 `(("python-numpy" ,python2-numpy)))
7bf837fd 3789 (home-page "https://github.com/taoliu/MACS/")
d57e6d0f
RW
3790 (synopsis "Model based analysis for ChIP-Seq data")
3791 (description
3792 "MACS is an implementation of a ChIP-Seq analysis algorithm for
3793identifying transcript factor binding sites named Model-based Analysis of
3794ChIP-Seq (MACS). MACS captures the influence of genome complexity to evaluate
3795the significance of enriched ChIP regions and it improves the spatial
3796resolution of binding sites through combining the information of both
3797sequencing tag position and orientation.")
3798 (license license:bsd-3)))
3799
41ddebdd
BW
3800(define-public mafft
3801 (package
3802 (name "mafft")
c7a8aa13 3803 (version "7.394")
41ddebdd
BW
3804 (source (origin
3805 (method url-fetch)
3806 (uri (string-append
3146f22d 3807 "https://mafft.cbrc.jp/alignment/software/mafft-" version
41ddebdd
BW
3808 "-without-extensions-src.tgz"))
3809 (file-name (string-append name "-" version ".tgz"))
3810 (sha256
3811 (base32
c7a8aa13 3812 "0bacjkxfg944p5khhyh5rd4y7wkjc9qk4v2jjj442sqlq0f8ar7b"))))
41ddebdd
BW
3813 (build-system gnu-build-system)
3814 (arguments
3815 `(#:tests? #f ; no automated tests, though there are tests in the read me
3816 #:make-flags (let ((out (assoc-ref %outputs "out")))
3817 (list (string-append "PREFIX=" out)
3818 (string-append "BINDIR="
3819 (string-append out "/bin"))))
3820 #:phases
3821 (modify-phases %standard-phases
3822 (add-after 'unpack 'enter-dir
101e8f71 3823 (lambda _ (chdir "core") #t))
41ddebdd 3824 (add-after 'enter-dir 'patch-makefile
101e8f71
BW
3825 (lambda _
3826 ;; on advice from the MAFFT authors, there is no need to
3827 ;; distribute mafft-profile, mafft-distance, or
3828 ;; mafft-homologs.rb as they are too "specialised".
3829 (substitute* "Makefile"
3830 ;; remove mafft-homologs.rb from SCRIPTS
3831 (("^SCRIPTS = mafft mafft-homologs.rb")
3832 "SCRIPTS = mafft")
3833 ;; remove mafft-homologs from MANPAGES
3834 (("^MANPAGES = mafft.1 mafft-homologs.1")
3835 "MANPAGES = mafft.1")
3836 ;; remove mafft-distance from PROGS
3837 (("^PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance")
3838 "PROGS = dvtditr dndfast7 dndblast sextet5")
3839 ;; remove mafft-profile from PROGS
3840 (("splittbfast disttbfast tbfast mafft-profile 2cl mccaskillwrap")
3841 "splittbfast disttbfast tbfast f2cl mccaskillwrap")
3842 (("^rm -f mafft-profile mafft-profile.exe") "#")
3843 (("^rm -f mafft-distance mafft-distance.exe") ")#")
3844 ;; do not install MAN pages in libexec folder
3845 (("^\t\\$\\(INSTALL\\) -m 644 \\$\\(MANPAGES\\) \
41ddebdd 3846\\$\\(DESTDIR\\)\\$\\(LIBDIR\\)") "#"))
101e8f71 3847 #t))
02f35bb5
BW
3848 (add-after 'enter-dir 'patch-paths
3849 (lambda* (#:key inputs #:allow-other-keys)
3850 (substitute* '("pairash.c"
3851 "mafft.tmpl")
3852 (("perl") (which "perl"))
3853 (("([\"`| ])awk" _ prefix)
3854 (string-append prefix (which "awk")))
3855 (("grep") (which "grep")))
3856 #t))
101e8f71
BW
3857 (delete 'configure)
3858 (add-after 'install 'wrap-programs
3859 (lambda* (#:key outputs #:allow-other-keys)
3860 (let* ((out (assoc-ref outputs "out"))
3861 (bin (string-append out "/bin"))
3862 (path (string-append
3863 (assoc-ref %build-inputs "coreutils") "/bin:")))
3864 (for-each (lambda (file)
3865 (wrap-program file
3866 `("PATH" ":" prefix (,path))))
3867 (find-files bin)))
3868 #t)))))
41ddebdd 3869 (inputs
02f35bb5 3870 `(("perl" ,perl)
71461f88 3871 ("ruby" ,ruby)
02f35bb5 3872 ("gawk" ,gawk)
101e8f71
BW
3873 ("grep" ,grep)
3874 ("coreutils" ,coreutils)))
41ddebdd
BW
3875 (home-page "http://mafft.cbrc.jp/alignment/software/")
3876 (synopsis "Multiple sequence alignment program")
3877 (description
3878 "MAFFT offers a range of multiple alignment methods for nucleotide and
3879protein sequences. For instance, it offers L-INS-i (accurate; for alignment
3880of <~200 sequences) and FFT-NS-2 (fast; for alignment of <~30,000
3881sequences).")
3882 (license (license:non-copyleft
3883 "http://mafft.cbrc.jp/alignment/software/license.txt"
3884 "BSD-3 with different formatting"))))
8fd790eb 3885
84be3b99
MB
3886(define-public mash
3887 (package
3888 (name "mash")
f38ac742 3889 (version "2.0")
84be3b99
MB
3890 (source (origin
3891 (method url-fetch)
3892 (uri (string-append
3893 "https://github.com/marbl/mash/archive/v"
3894 version ".tar.gz"))
3895 (file-name (string-append name "-" version ".tar.gz"))
3896 (sha256
3897 (base32
f38ac742 3898 "00fx14vpmgsijwxd1xql3if934l82v8ckqgjjyyhnr36qb9qrskv"))
84be3b99
MB
3899 (modules '((guix build utils)))
3900 (snippet
6cbee49d
MW
3901 '(begin
3902 ;; Delete bundled kseq.
3903 ;; TODO: Also delete bundled murmurhash and open bloom filter.
3904 (delete-file "src/mash/kseq.h")
3905 #t))))
84be3b99
MB
3906 (build-system gnu-build-system)
3907 (arguments
3908 `(#:tests? #f ; No tests.
3909 #:configure-flags
3910 (list
3911 (string-append "--with-capnp=" (assoc-ref %build-inputs "capnproto"))
3912 (string-append "--with-gsl=" (assoc-ref %build-inputs "gsl")))
3913 #:make-flags (list "CC=gcc")
3914 #:phases
3915 (modify-phases %standard-phases
3916 (add-after 'unpack 'fix-includes
3917 (lambda _
f38ac742
BW
3918 (substitute* '("src/mash/Sketch.cpp"
3919 "src/mash/CommandFind.cpp"
3920 "src/mash/CommandScreen.cpp")
84be3b99
MB
3921 (("^#include \"kseq\\.h\"")
3922 "#include \"htslib/kseq.h\""))
3923 #t))
d10092b8 3924 (add-after 'fix-includes 'autoconf
84be3b99
MB
3925 (lambda _ (zero? (system* "autoconf")))))))
3926 (native-inputs
3927 `(("autoconf" ,autoconf)
3928 ;; Capnproto and htslib are statically embedded in the final
3929 ;; application. Therefore we also list their licenses, below.
3930 ("capnproto" ,capnproto)
3931 ("htslib" ,htslib)))
3932 (inputs
3933 `(("gsl" ,gsl)
3934 ("zlib" ,zlib)))
3935 (supported-systems '("x86_64-linux"))
3936 (home-page "https://mash.readthedocs.io")
3937 (synopsis "Fast genome and metagenome distance estimation using MinHash")
3938 (description "Mash is a fast sequence distance estimator that uses the
3939MinHash algorithm and is designed to work with genomes and metagenomes in the
3940form of assemblies or reads.")
3941 (license (list license:bsd-3 ; Mash
3942 license:expat ; HTSlib and capnproto
3943 license:public-domain ; MurmurHash 3
3944 license:cpl1.0)))) ; Open Bloom Filter
3945
8fd790eb 3946(define-public metabat
f3f68a44
BW
3947 (package
3948 (name "metabat")
25bd1fc1 3949 (version "2.12.1")
f3f68a44
BW
3950 (source
3951 (origin
25bd1fc1
BW
3952 (method url-fetch)
3953 (uri (string-append "https://bitbucket.org/berkeleylab/metabat/get/v"
3954 version ".tar.gz"))
3955 (file-name (string-append name "-" version ".tar.gz"))
f3f68a44
BW
3956 (sha256
3957 (base32
25bd1fc1
BW
3958 "1hmvdalz3zj5sqqklg0l4npjdv37cv2hsdi1al9iby2ndxjs1b73"))
3959 (patches (search-patches "metabat-fix-compilation.patch"))))
9364a520 3960 (build-system scons-build-system)
8fd790eb 3961 (arguments
9364a520
AI
3962 `(#:scons ,scons-python2
3963 #:scons-flags
3964 (list (string-append "PREFIX=" (assoc-ref %outputs "out"))
3965 (string-append "BOOST_ROOT=" (assoc-ref %build-inputs "boost")))
3966 #:tests? #f ;; Tests are run during the build phase.
3967 #:phases
8fd790eb
BW
3968 (modify-phases %standard-phases
3969 (add-after 'unpack 'fix-includes
45469ebe
BW
3970 (lambda _
3971 (substitute* "src/BamUtils.h"
3972 (("^#include \"bam/bam\\.h\"")
3973 "#include \"samtools/bam.h\"")
3974 (("^#include \"bam/sam\\.h\"")
3975 "#include \"samtools/sam.h\""))
3976 (substitute* "src/KseqReader.h"
3977 (("^#include \"bam/kseq\\.h\"")
3978 "#include \"htslib/kseq.h\""))
3979 #t))
8fd790eb 3980 (add-after 'unpack 'fix-scons
f3f68a44
BW
3981 (lambda* (#:key inputs #:allow-other-keys)
3982 (substitute* "SConstruct"
3983 (("^htslib_dir += 'samtools'")
3984 (string-append "htslib_dir = '"
3985 (assoc-ref inputs "htslib")
3986 "'"))
3987 (("^samtools_dir = 'samtools'")
3988 (string-append "samtools_dir = '"
3989 (assoc-ref inputs "samtools")
3990 "'"))
3991 (("^findStaticOrShared\\('bam', hts_lib")
3992 (string-append "findStaticOrShared('bam', '"
3993 (assoc-ref inputs "samtools")
3994 "/lib'"))
3995 ;; Do not distribute README.
3996 (("^env\\.Install\\(idir_prefix, 'README\\.md'\\)") ""))
9364a520 3997 #t)))))
8fd790eb
BW
3998 (inputs
3999 `(("zlib" ,zlib)
4000 ("perl" ,perl)
4001 ("samtools" ,samtools)
4002 ("htslib" ,htslib)
4003 ("boost" ,boost)))
8fd790eb
BW
4004 (home-page "https://bitbucket.org/berkeleylab/metabat")
4005 (synopsis
4006 "Reconstruction of single genomes from complex microbial communities")
4007 (description
4008 "Grouping large genomic fragments assembled from shotgun metagenomic
4009sequences to deconvolute complex microbial communities, or metagenome binning,
4010enables the study of individual organisms and their interactions. MetaBAT is
4011an automated metagenome binning software, which integrates empirical
4012probabilistic distances of genome abundance and tetranucleotide frequency.")
d931a4bb
EF
4013 ;; The source code contains inline assembly.
4014 (supported-systems '("x86_64-linux" "i686-linux"))
f3f68a44
BW
4015 (license (license:non-copyleft "file://license.txt"
4016 "See license.txt in the distribution."))))
8fd790eb 4017
318c0aee
MB
4018(define-public minced
4019 (package
4020 (name "minced")
4021 (version "0.2.0")
4022 (source (origin
4023 (method url-fetch)
4024 (uri (string-append
4025 "https://github.com/ctSkennerton/minced/archive/"
4026 version ".tar.gz"))
4027 (file-name (string-append name "-" version ".tar.gz"))
4028 (sha256
4029 (base32
4030 "0wxmlsapxfpxfd3ps9636h7i2xy6la8i42mwh0j2lsky63h63jp1"))))
4031 (build-system gnu-build-system)
4032 (arguments
4033 `(#:test-target "test"
4034 #:phases
4035 (modify-phases %standard-phases
4036 (delete 'configure)
4037 (add-before 'check 'fix-test
4038 (lambda _
4039 ;; Fix test for latest version.
4040 (substitute* "t/Aquifex_aeolicus_VF5.expected"
4041 (("minced:0.1.6") "minced:0.2.0"))
4042 #t))
4043 (replace 'install ; No install target.
4044 (lambda* (#:key inputs outputs #:allow-other-keys)
4045 (let* ((out (assoc-ref outputs "out"))
4046 (bin (string-append out "/bin"))
4047 (wrapper (string-append bin "/minced")))
4048 ;; Minced comes with a wrapper script that tries to figure out where
4049 ;; it is located before running the JAR. Since these paths are known
4050 ;; to us, we build our own wrapper to avoid coreutils dependency.
4051 (install-file "minced.jar" bin)
4052 (with-output-to-file wrapper
4053 (lambda _
4054 (display
4055 (string-append
4056 "#!" (assoc-ref inputs "bash") "/bin/sh\n\n"
4057 (assoc-ref inputs "jre") "/bin/java -jar "
4058 bin "/minced.jar \"$@\"\n"))))
4059 (chmod wrapper #o555)))))))
4060 (native-inputs
4061 `(("jdk" ,icedtea "jdk")))
4062 (inputs
4063 `(("bash" ,bash)
4064 ("jre" ,icedtea "out")))
4065 (home-page "https://github.com/ctSkennerton/minced")
4066 (synopsis "Mining CRISPRs in Environmental Datasets")
4067 (description
4068 "MinCED is a program to find Clustered Regularly Interspaced Short
4069Palindromic Repeats (CRISPRs) in DNA sequences. It can be used for
4070unassembled metagenomic reads, but is mainly designed for full genomes and
4071assembled metagenomic sequence.")
4072 (license license:gpl3+)))
4073
ddd82e0e
RW
4074(define-public miso
4075 (package
4076 (name "miso")
ce4dfde0 4077 (version "0.5.4")
ddd82e0e
RW
4078 (source (origin
4079 (method url-fetch)
34260a10 4080 (uri (pypi-uri "misopy" version))
ddd82e0e
RW
4081 (sha256
4082 (base32
ce4dfde0 4083 "1z3x0vd8ma7pdrnywj7i3kgwl89sdkwrrn62zl7r5calqaq2hyip"))
ddd82e0e 4084 (modules '((guix build utils)))
6cbee49d
MW
4085 (snippet '(begin
4086 (substitute* "setup.py"
4087 ;; Use setuptools, or else the executables are not
4088 ;; installed.
4089 (("distutils.core") "setuptools")
4090 ;; use "gcc" instead of "cc" for compilation
4091 (("^defines")
4092 "cc.set_executables(
ddd82e0e
RW
4093compiler='gcc',
4094compiler_so='gcc',
4095linker_exe='gcc',
6cbee49d
MW
4096linker_so='gcc -shared'); defines"))
4097 #t))))
ddd82e0e
RW
4098 (build-system python-build-system)
4099 (arguments
4100 `(#:python ,python-2 ; only Python 2 is supported
4101 #:tests? #f)) ; no "test" target
4102 (inputs
4103 `(("samtools" ,samtools)
4104 ("python-numpy" ,python2-numpy)
4105 ("python-pysam" ,python2-pysam)
4106 ("python-scipy" ,python2-scipy)
4107 ("python-matplotlib" ,python2-matplotlib)))
4108 (native-inputs
f3b98f4f
HG
4109 `(("python-mock" ,python2-mock) ;for tests
4110 ("python-pytz" ,python2-pytz))) ;for tests
ddd82e0e
RW
4111 (home-page "http://genes.mit.edu/burgelab/miso/index.html")
4112 (synopsis "Mixture of Isoforms model for RNA-Seq isoform quantitation")
4113 (description
4114 "MISO (Mixture-of-Isoforms) is a probabilistic framework that quantitates
4115the expression level of alternatively spliced genes from RNA-Seq data, and
4116identifies differentially regulated isoforms or exons across samples. By
4117modeling the generative process by which reads are produced from isoforms in
4118RNA-Seq, the MISO model uses Bayesian inference to compute the probability
4119that a read originated from a particular isoform.")
4120 (license license:gpl2)))
4121
324efb88
BW
4122(define-public muscle
4123 (package
4124 (name "muscle")
4125 (version "3.8.1551")
4126 (source (origin
4127 (method url-fetch/tarbomb)
324efb88
BW
4128 (uri (string-append
4129 "http://www.drive5.com/muscle/muscle_src_"
4130 version ".tar.gz"))
4131 (sha256
4132 (base32
4133 "0bj8kj7sdizy3987zx6w7axihk40fk8rn76mpbqqjcnd64i5a367"))))
4134 (build-system gnu-build-system)
4135 (arguments
4136 `(#:make-flags (list "LDLIBS = -lm")
4137 #:phases
4138 (modify-phases %standard-phases
4139 (delete 'configure)
4140 (replace 'check
4141 ;; There are no tests, so just test if it runs.
4142 (lambda _ (zero? (system* "./muscle" "-version"))))
4143 (replace 'install
4144 (lambda* (#:key outputs #:allow-other-keys)
4145 (let* ((out (assoc-ref outputs "out"))
4146 (bin (string-append out "/bin")))
4147 (install-file "muscle" bin)))))))
4148 (home-page "http://www.drive5.com/muscle")
4149 (synopsis "Multiple sequence alignment program")
4150 (description
4151 "MUSCLE aims to be a fast and accurate multiple sequence alignment
4152program for nucleotide and protein sequences.")
4153 ;; License information found in 'muscle -h' and usage.cpp.
4154 (license license:public-domain)))
4155
99268755
BW
4156(define-public newick-utils
4157 ;; There are no recent releases so we package from git.
4158 (let ((commit "da121155a977197cab9fbb15953ca1b40b11eb87"))
4159 (package
4160 (name "newick-utils")
4161 (version (string-append "1.6-1." (string-take commit 8)))
4162 (source (origin
4163 (method git-fetch)
4164 (uri (git-reference
4165 (url "https://github.com/tjunier/newick_utils.git")
4166 (commit commit)))
4167 (file-name (string-append name "-" version "-checkout"))
4168 (sha256
4169 (base32
4170 "1hkw21rq1mwf7xp0rmbb2gqc0i6p11108m69i7mr7xcjl268pxnb"))))
4171 (build-system gnu-build-system)
4172 (arguments
4173 `(#:phases
4174 (modify-phases %standard-phases
4175 (add-after 'unpack 'autoconf
4176 (lambda _ (zero? (system* "autoreconf" "-vif")))))))
4177 (inputs
4178 ;; XXX: TODO: Enable Lua and Guile bindings.
4179 ;; https://github.com/tjunier/newick_utils/issues/13
4180 `(("libxml2" ,libxml2)
4181 ("flex" ,flex)
4182 ("bison" ,bison)))
4183 (native-inputs
4184 `(("autoconf" ,autoconf)
4185 ("automake" ,automake)
4186 ("libtool" ,libtool)))
4187 (synopsis "Programs for working with newick format phylogenetic trees")
4188 (description
4189 "Newick-utils is a suite of utilities for processing phylogenetic trees
4190in Newick format. Functions include re-rooting, extracting subtrees,
4191trimming, pruning, condensing, drawing (ASCII graphics or SVG).")
4192 (home-page "https://github.com/tjunier/newick_utils")
4193 (license license:bsd-3))))
4194
1e44cf8b
BW
4195(define-public orfm
4196 (package
4197 (name "orfm")
dfc83ead 4198 (version "0.7.1")
1e44cf8b
BW
4199 (source (origin
4200 (method url-fetch)
4201 (uri (string-append
4202 "https://github.com/wwood/OrfM/releases/download/v"
4203 version "/orfm-" version ".tar.gz"))
4204 (sha256
4205 (base32
dfc83ead 4206 "16iigyr2gd8x0imzkk1dr3k5xsds9bpmwg31ayvjg0f4pir9rwqr"))))
1e44cf8b
BW
4207 (build-system gnu-build-system)
4208 (inputs `(("zlib" ,zlib)))
6b6f7d6a
BW
4209 (native-inputs
4210 `(("ruby-bio-commandeer" ,ruby-bio-commandeer)
4211 ("ruby-rspec" ,ruby-rspec)
4212 ("ruby" ,ruby)))
1e44cf8b
BW
4213 (synopsis "Simple and not slow open reading frame (ORF) caller")
4214 (description
6b6f7d6a 4215 "An ORF caller finds stretches of DNA that, when translated, are not
1e44cf8b
BW
4216interrupted by stop codons. OrfM finds and prints these ORFs.")
4217 (home-page "https://github.com/wwood/OrfM")
4218 (license license:lgpl3+)))
4219
c033f5d6
BW
4220(define-public pplacer
4221 (let ((commit "g807f6f3"))
4222 (package
4223 (name "pplacer")
4224 ;; The commit should be updated with each version change.
4225 (version "1.1.alpha19")
4226 (source
4227 (origin
4228 (method url-fetch)
4229 (uri (string-append "https://github.com/matsen/pplacer/archive/v"
4230 version ".tar.gz"))
4231 (file-name (string-append name "-" version ".tar.gz"))
4232 (sha256
4233 (base32 "0z1lnd2s8sh6kpzg106wzbh2szw7h0hvq8syd5a6wv4rmyyz6x0f"))))
4234 (build-system ocaml-build-system)
4235 (arguments
4236 `(#:ocaml ,ocaml-4.01
4237 #:findlib ,ocaml4.01-findlib
4238 #:modules ((guix build ocaml-build-system)
4239 (guix build utils)
4240 (ice-9 ftw))
4241 #:phases
4242 (modify-phases %standard-phases
4243 (delete 'configure)
4244 (add-after 'unpack 'replace-bundled-cddlib
4245 (lambda* (#:key inputs #:allow-other-keys)
4246 (let* ((cddlib-src (assoc-ref inputs "cddlib-src"))
4247 (local-dir "cddlib_guix"))
4248 (mkdir local-dir)
4249 (with-directory-excursion local-dir
4250 (system* "tar" "xvf" cddlib-src))
4251 (let ((cddlib-src-folder
4252 (string-append local-dir "/"
4253 (list-ref (scandir local-dir) 2)
4254 "/lib-src")))
4255 (for-each
4256 (lambda (file)
4257 (copy-file file
4258 (string-append "cdd_src/" (basename file))))
4259 (find-files cddlib-src-folder ".*[ch]$")))
4260 #t)))
4261 (add-after 'unpack 'fix-makefile
4262 (lambda _
4263 ;; Remove system calls to 'git'.
4264 (substitute* "Makefile"
4265 (("^DESCRIPT:=pplacer-.*")
4266 (string-append
4267 "DESCRIPT:=pplacer-$(shell uname)-v" ,version "\n")))
4268 (substitute* "myocamlbuild.ml"
4269 (("git describe --tags --long .*\\\" with")
4270 (string-append
4271 "echo -n v" ,version "-" ,commit "\" with")))
4272 #t))
4273 (replace 'install
4274 (lambda* (#:key outputs #:allow-other-keys)
4275 (let* ((out (assoc-ref outputs "out"))
4276 (bin (string-append out "/bin")))
4277 (copy-recursively "bin" bin))
4278 #t)))))
4279 (native-inputs
4280 `(("zlib" ,zlib)
4281 ("gsl" ,gsl)
4282 ("ocaml-ounit" ,ocaml4.01-ounit)
4283 ("ocaml-batteries" ,ocaml4.01-batteries)
4284 ("ocaml-camlzip" ,ocaml4.01-camlzip)
4285 ("ocaml-csv" ,ocaml4.01-csv)
4286 ("ocaml-sqlite3" ,ocaml4.01-sqlite3)
4287 ("ocaml-xmlm" ,ocaml4.01-xmlm)
4288 ("ocaml-mcl" ,ocaml4.01-mcl)
4289 ("ocaml-gsl" ,ocaml4.01-gsl)
4290 ("cddlib-src" ,(package-source cddlib))))
4291 (propagated-inputs
4292 `(("pplacer-scripts" ,pplacer-scripts)))
4293 (synopsis "Phylogenetic placement of biological sequences")
4294 (description
4295 "Pplacer places query sequences on a fixed reference phylogenetic tree
4296to maximize phylogenetic likelihood or posterior probability according to a
4297reference alignment. Pplacer is designed to be fast, to give useful
4298information about uncertainty, and to offer advanced visualization and
4299downstream analysis.")
4300 (home-page "http://matsen.fhcrc.org/pplacer")
4301 (license license:gpl3))))
4302
4303;; This package is installed alongside 'pplacer'. It is a separate package so
4304;; that it can use the python-build-system for the scripts that are
4305;; distributed alongside the main OCaml binaries.
4306(define pplacer-scripts
4307 (package
4308 (inherit pplacer)
4309 (name "pplacer-scripts")
4310 (build-system python-build-system)
4311 (arguments
4312 `(#:python ,python-2
4313 #:phases
4314 (modify-phases %standard-phases
4315 (add-after 'unpack 'enter-scripts-dir
4316 (lambda _ (chdir "scripts")))
4317 (replace 'check
4318 (lambda _
4319 (zero? (system* "python" "-m" "unittest" "discover" "-v"))))
4320 (add-after 'install 'wrap-executables
4321 (lambda* (#:key inputs outputs #:allow-other-keys)
4322 (let* ((out (assoc-ref outputs "out"))
4323 (bin (string-append out "/bin")))
4324 (let ((path (string-append
4325 (assoc-ref inputs "hmmer") "/bin:"
4326 (assoc-ref inputs "infernal") "/bin")))
4327 (display path)
4328 (wrap-program (string-append bin "/refpkg_align.py")
4329 `("PATH" ":" prefix (,path))))
4330 (let ((path (string-append
4331 (assoc-ref inputs "hmmer") "/bin")))
4332 (wrap-program (string-append bin "/hrefpkg_query.py")
4333 `("PATH" ":" prefix (,path)))))
4334 #t)))))
4335 (inputs
4336 `(("infernal" ,infernal)
4337 ("hmmer" ,hmmer)))
4338 (propagated-inputs
4339 `(("python-biopython" ,python2-biopython)
4340 ("taxtastic" ,taxtastic)))
4341 (synopsis "Pplacer Python scripts")))
4342
19ee9201
RW
4343(define-public python2-pbcore
4344 (package
4345 (name "python2-pbcore")
e301bfc8 4346 (version "1.2.10")
19ee9201
RW
4347 (source (origin
4348 (method url-fetch)
ddb83129 4349 (uri (pypi-uri "pbcore" version))
19ee9201
RW
4350 (sha256
4351 (base32
e301bfc8 4352 "1kjmv891d6qbpp4shhhvkl02ff4q5xlpnls2513sm2cjcrs52f1i"))))
19ee9201
RW
4353 (build-system python-build-system)
4354 (arguments `(#:python ,python-2)) ; pbcore requires Python 2.7
de5bc890 4355 (propagated-inputs
19ee9201
RW
4356 `(("python-cython" ,python2-cython)
4357 ("python-numpy" ,python2-numpy)
4358 ("python-pysam" ,python2-pysam)
4359 ("python-h5py" ,python2-h5py)))
4360 (native-inputs
de5bc890
HG
4361 `(("python-nose" ,python2-nose)
4362 ("python-sphinx" ,python2-sphinx)
4363 ("python-pyxb" ,python2-pyxb)))
19ee9201
RW
4364 (home-page "http://pacificbiosciences.github.io/pbcore/")
4365 (synopsis "Library for reading and writing PacBio data files")
4366 (description
4367 "The pbcore package provides Python APIs for interacting with PacBio data
4368files and writing bioinformatics applications.")
4369 (license license:bsd-3)))
4370
c61fe02c
RW
4371(define-public python2-warpedlmm
4372 (package
4373 (name "python2-warpedlmm")
4374 (version "0.21")
4375 (source
4376 (origin
4377 (method url-fetch)
4378 (uri (string-append
4379 "https://pypi.python.org/packages/source/W/WarpedLMM/WarpedLMM-"
4380 version ".zip"))
4381 (sha256
4382 (base32
4383 "1agfz6zqa8nc6cw47yh0s3y14gkpa9wqazwcj7mwwj3ffnw39p3j"))))
4384 (build-system python-build-system)
4385 (arguments
54c85e12 4386 `(#:python ,python-2)) ; requires Python 2.7
c61fe02c
RW
4387 (propagated-inputs
4388 `(("python-scipy" ,python2-scipy)
4389 ("python-numpy" ,python2-numpy)
4390 ("python-matplotlib" ,python2-matplotlib)
4391 ("python-fastlmm" ,python2-fastlmm)
4392 ("python-pandas" ,python2-pandas)
4393 ("python-pysnptools" ,python2-pysnptools)))
4394 (native-inputs
f3b98f4f 4395 `(("python-mock" ,python2-mock)
c61fe02c
RW
4396 ("python-nose" ,python2-nose)
4397 ("unzip" ,unzip)))
4398 (home-page "https://github.com/PMBio/warpedLMM")
4399 (synopsis "Implementation of warped linear mixed models")
4400 (description
4401 "WarpedLMM is a Python implementation of the warped linear mixed model,
4402which automatically learns an optimal warping function (or transformation) for
4403the phenotype as it models the data.")
4404 (license license:asl2.0)))
4405
2c16316e 4406(define-public pbtranscript-tofu
698bd297 4407 (let ((commit "8f5467fe6a4472bcfb4226c8720993c8507adfe4"))
2c16316e
RW
4408 (package
4409 (name "pbtranscript-tofu")
698bd297 4410 (version (string-append "2.2.3." (string-take commit 7)))
2c16316e
RW
4411 (source (origin
4412 (method git-fetch)
4413 (uri (git-reference
4414 (url "https://github.com/PacificBiosciences/cDNA_primer.git")
4415 (commit commit)))
9a067efd 4416 (file-name (string-append name "-" version "-checkout"))
2c16316e
RW
4417 (sha256
4418 (base32
9a067efd
RW
4419 "1lgnpi35ihay42qx0b6yl3kkgra723i413j33kvs0kvs61h82w0f"))
4420 (modules '((guix build utils)))
4421 (snippet
4422 '(begin
4423 ;; remove bundled Cython sources
4424 (delete-file "pbtranscript-tofu/pbtranscript/Cython-0.20.1.tar.gz")
4425 #t))))
2c16316e
RW
4426 (build-system python-build-system)
4427 (arguments
4428 `(#:python ,python-2
cdc2bb50
MB
4429 ;; FIXME: Tests fail with "No such file or directory:
4430 ;; pbtools/pbtranscript/modified_bx_intervals/intersection_unique.so"
4431 #:tests? #f
2c16316e 4432 #:phases
9a067efd
RW
4433 (modify-phases %standard-phases
4434 (add-after 'unpack 'enter-directory
4435 (lambda _
4436 (chdir "pbtranscript-tofu/pbtranscript/")
4437 #t))
4438 ;; With setuptools version 18.0 and later this setup.py hack causes
4439 ;; a build error, so we disable it.
4440 (add-after 'enter-directory 'patch-setuppy
4441 (lambda _
4442 (substitute* "setup.py"
4443 (("if 'setuptools.extension' in sys.modules:")
4444 "if False:"))
4445 #t)))))
2c16316e 4446 (inputs
9a067efd 4447 `(("python-numpy" ,python2-numpy)
2c16316e 4448 ("python-bx-python" ,python2-bx-python)
c5372108
RW
4449 ("python-networkx" ,python2-networkx)
4450 ("python-scipy" ,python2-scipy)
9a067efd
RW
4451 ("python-pbcore" ,python2-pbcore)
4452 ("python-h5py" ,python2-h5py)))
2c16316e 4453 (native-inputs
9a067efd 4454 `(("python-cython" ,python2-cython)
f3b98f4f 4455 ("python-nose" ,python2-nose)))
2c16316e
RW
4456 (home-page "https://github.com/PacificBiosciences/cDNA_primer")
4457 (synopsis "Analyze transcriptome data generated with the Iso-Seq protocol")
4458 (description
4459 "pbtranscript-tofu contains scripts to analyze transcriptome data
4460generated using the PacBio Iso-Seq protocol.")
4461 (license license:bsd-3))))
4462
024130d2
BW
4463(define-public prank
4464 (package
4465 (name "prank")
4466 (version "150803")
4467 (source (origin
4468 (method url-fetch)
4469 (uri (string-append
4470 "http://wasabiapp.org/download/prank/prank.source."
4471 version ".tgz"))
4472 (sha256
4473 (base32
4474 "0am4z94fs3w2n5xpfls9zda61vq7qqz4q2i7b9hlsxz5q4j3kfm4"))))
4475 (build-system gnu-build-system)
4476 (arguments
4477 `(#:phases
4478 (modify-phases %standard-phases
4479 (add-after 'unpack 'enter-src-dir
4480 (lambda _
4481 (chdir "src")
4482 #t))
62d00095
EF
4483 (add-after 'unpack 'remove-m64-flag
4484 ;; Prank will build with the correct 'bit-ness' without this flag
4485 ;; and this allows building on 32-bit machines.
4486 (lambda _ (substitute* "src/Makefile"
4487 (("-m64") ""))
4488 #t))
024130d2
BW
4489 (delete 'configure)
4490 (replace 'install
4491 (lambda* (#:key outputs #:allow-other-keys)
4492 (let* ((out (assoc-ref outputs "out"))
4493 (bin (string-append out "/bin"))
4494 (man (string-append out "/share/man/man1"))
4495 (path (string-append
4496 (assoc-ref %build-inputs "mafft") "/bin:"
4497 (assoc-ref %build-inputs "exonerate") "/bin:"
4498 (assoc-ref %build-inputs "bppsuite") "/bin")))
4499 (install-file "prank" bin)
4500 (wrap-program (string-append bin "/prank")
4501 `("PATH" ":" prefix (,path)))
4502 (install-file "prank.1" man))
4503 #t)))))
4504 (inputs
4505 `(("mafft" ,mafft)
4506 ("exonerate" ,exonerate)
4507 ("bppsuite" ,bppsuite)))
4508 (home-page "http://wasabiapp.org/software/prank/")
4509 (synopsis "Probabilistic multiple sequence alignment program")
4510 (description
4511 "PRANK is a probabilistic multiple sequence alignment program for DNA,
4512codon and amino-acid sequences. It is based on a novel algorithm that treats
4513insertions correctly and avoids over-estimation of the number of deletion
4514events. In addition, PRANK borrows ideas from maximum likelihood methods used
4515in phylogenetics and correctly takes into account the evolutionary distances
4516between sequences. Lastly, PRANK allows for defining a potential structure
4517for sequences to be aligned and then, simultaneously with the alignment,
4518predicts the locations of structural units in the sequences.")
4519 (license license:gpl2+)))
4520
31a9d653
BW
4521(define-public proteinortho
4522 (package
4523 (name "proteinortho")
8c864901 4524 (version "5.16b")
31a9d653
BW
4525 (source
4526 (origin
4527 (method url-fetch)
4528 (uri
4529 (string-append
4530 "http://www.bioinf.uni-leipzig.de/Software/proteinortho/proteinortho_v"
4531 version "_src.tar.gz"))
4532 (sha256
4533 (base32
8c864901 4534 "1wl0dawpssqwfjvr651r4wlww8hhjin8nba6xh71ks7sbypx886j"))))
31a9d653
BW
4535 (build-system gnu-build-system)
4536 (arguments
4537 `(#:test-target "test"
4538 #:phases
4539 (modify-phases %standard-phases
4540 (replace 'configure
4541 ;; There is no configure script, so we modify the Makefile directly.
4542 (lambda* (#:key outputs #:allow-other-keys)
4543 (substitute* "Makefile"
4544 (("INSTALLDIR=.*")
4545 (string-append
4546 "INSTALLDIR=" (assoc-ref outputs "out") "/bin\n")))
4547 #t))
4548 (add-before 'install 'make-install-directory
4549 ;; The install directory is not created during 'make install'.
4550 (lambda* (#:key outputs #:allow-other-keys)
4551 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
4552 #t))
4553 (add-after 'install 'wrap-programs
4554 (lambda* (#:key inputs outputs #:allow-other-keys)
4555 (let* ((path (getenv "PATH"))
4556 (out (assoc-ref outputs "out"))
4557 (binary (string-append out "/bin/proteinortho5.pl")))
4558 (wrap-program binary `("PATH" ":" prefix (,path))))
4559 #t)))))
4560 (inputs
4561 `(("perl" ,perl)
4562 ("python" ,python-2)
4563 ("blast+" ,blast+)))
4564 (home-page "http://www.bioinf.uni-leipzig.de/Software/proteinortho")
4565 (synopsis "Detect orthologous genes across species")
4566 (description
4567 "Proteinortho is a tool to detect orthologous genes across different
4568species. For doing so, it compares similarities of given gene sequences and
4569clusters them to find significant groups. The algorithm was designed to handle
4570large-scale data and can be applied to hundreds of species at once.")
4571 (license license:gpl2+)))
4572
846e3409
RW
4573(define-public pyicoteo
4574 (package
4575 (name "pyicoteo")
4576 (version "2.0.7")
4577 (source
4578 (origin
4579 (method url-fetch)
4580 (uri (string-append "https://bitbucket.org/regulatorygenomicsupf/"
4581 "pyicoteo/get/v" version ".tar.bz2"))
4582 (file-name (string-append name "-" version ".tar.bz2"))
4583 (sha256
4584 (base32
4585 "0d6087f29xp8wxwlj111c3sylli98n0l8ry58c51ixzq0zfm50wa"))))
4586 (build-system python-build-system)
4587 (arguments
4588 `(#:python ,python-2 ; does not work with Python 3
4589 #:tests? #f)) ; there are no tests
4590 (inputs
4591 `(("python2-matplotlib" ,python2-matplotlib)))
4592 (home-page "https://bitbucket.org/regulatorygenomicsupf/pyicoteo")
4593 (synopsis "Analyze high-throughput genetic sequencing data")
4594 (description
4595 "Pyicoteo is a suite of tools for the analysis of high-throughput genetic
4596sequencing data. It works with genomic coordinates. There are currently six
4597different command-line tools:
4598
4599@enumerate
4600@item pyicoregion: for generating exploratory regions automatically;
4601@item pyicoenrich: for differential enrichment between two conditions;
4602@item pyicoclip: for calling CLIP-Seq peaks without a control;
4603@item pyicos: for genomic coordinates manipulation;
4604@item pyicoller: for peak calling on punctuated ChIP-Seq;
4605@item pyicount: to count how many reads from N experiment files overlap in a
4606 region file;
4607@item pyicotrocol: to combine operations from pyicoteo.
4608@end enumerate\n")
4609 (license license:gpl3+)))
4610
af860475
BW
4611(define-public prodigal
4612 (package
4613 (name "prodigal")
e70f7a23 4614 (version "2.6.3")
af860475
BW
4615 (source (origin
4616 (method url-fetch)
4617 (uri (string-append
4618 "https://github.com/hyattpd/Prodigal/archive/v"
4619 version ".tar.gz"))
4620 (file-name (string-append name "-" version ".tar.gz"))
4621 (sha256
4622 (base32
e70f7a23 4623 "17srxkqd3jc77xk15pfbgg1a9xahqg7337w95mrsia7mpza4l2c9"))))
af860475
BW
4624 (build-system gnu-build-system)
4625 (arguments
4626 `(#:tests? #f ;no check target
4627 #:make-flags (list (string-append "INSTALLDIR="
4628 (assoc-ref %outputs "out")
4629 "/bin"))
4630 #:phases
4631 (modify-phases %standard-phases
4632 (delete 'configure))))
4633 (home-page "http://prodigal.ornl.gov")
4634 (synopsis "Protein-coding gene prediction for Archaea and Bacteria")
4635 (description
4636 "Prodigal runs smoothly on finished genomes, draft genomes, and
4637metagenomes, providing gene predictions in GFF3, Genbank, or Sequin table
4638format. It runs quickly, in an unsupervised fashion, handles gaps, handles
4639partial genes, and identifies translation initiation sites.")
4640 (license license:gpl3+)))
608dd932 4641
ceb62d54
BW
4642(define-public roary
4643 (package
4644 (name "roary")
dad9556c 4645 (version "3.12.0")
ceb62d54
BW
4646 (source
4647 (origin
4648 (method url-fetch)
4649 (uri (string-append
4650 "mirror://cpan/authors/id/A/AJ/AJPAGE/Bio-Roary-"
4651 version ".tar.gz"))
4652 (sha256
4653 (base32
dad9556c 4654 "0qxrds9wx7cfhlkihrp6697kx0flhhxymap9fwan0b3rbdhcnmff"))))
ceb62d54
BW
4655 (build-system perl-build-system)
4656 (arguments
4657 `(#:phases
4658 (modify-phases %standard-phases
4659 (delete 'configure)
4660 (delete 'build)
4661 (replace 'check
4662 (lambda _
4663 ;; The tests are not run by default, so we run each test file
4664 ;; directly.
4665 (setenv "PATH" (string-append (getcwd) "/bin" ":"
4666 (getenv "PATH")))
4667 (setenv "PERL5LIB" (string-append (getcwd) "/lib" ":"
4668 (getenv "PERL5LIB")))
4669 (zero? (length (filter (lambda (file)
4670 (display file)(display "\n")
4671 (not (zero? (system* "perl" file))))
4672 (find-files "t" ".*\\.t$"))))))
4673 (replace 'install
4674 ;; There is no 'install' target in the Makefile.
4675 (lambda* (#:key outputs #:allow-other-keys)
4676 (let* ((out (assoc-ref outputs "out"))
4677 (bin (string-append out "/bin"))
4678 (perl (string-append out "/lib/perl5/site_perl"))
4679 (roary-plots "contrib/roary_plots"))
4680 (mkdir-p bin)
4681 (mkdir-p perl)
4682 (copy-recursively "bin" bin)
4683 (copy-recursively "lib" perl)
4684 #t)))
4685 (add-after 'install 'wrap-programs
4686 (lambda* (#:key inputs outputs #:allow-other-keys)
4687 (let* ((out (assoc-ref outputs "out"))
4688 (perl5lib (getenv "PERL5LIB"))
4689 (path (getenv "PATH")))
4690 (for-each (lambda (prog)
4691 (let ((binary (string-append out "/" prog)))
4692 (wrap-program binary
4693 `("PERL5LIB" ":" prefix
4694 (,(string-append perl5lib ":" out
4695 "/lib/perl5/site_perl"))))
4696 (wrap-program binary
4697 `("PATH" ":" prefix
4698 (,(string-append path ":" out "/bin"))))))
4699 (find-files "bin" ".*[^R]$"))
4700 (let ((file
4701 (string-append out "/bin/roary-create_pan_genome_plots.R"))
4702 (r-site-lib (getenv "R_LIBS_SITE"))
4703 (coreutils-path
4704 (string-append (assoc-ref inputs "coreutils") "/bin")))
4705 (wrap-program file
4706 `("R_LIBS_SITE" ":" prefix
4707 (,(string-append r-site-lib ":" out "/site-library/"))))
4708 (wrap-program file
4709 `("PATH" ":" prefix
4710 (,(string-append coreutils-path ":" out "/bin"))))))
4711 #t)))))
4712 (native-inputs
4713 `(("perl-env-path" ,perl-env-path)
4714 ("perl-test-files" ,perl-test-files)
4715 ("perl-test-most" ,perl-test-most)
4716 ("perl-test-output" ,perl-test-output)))
4717 (inputs
4718 `(("perl-array-utils" ,perl-array-utils)
4719 ("bioperl" ,bioperl-minimal)
da4a707c 4720 ("perl-digest-md5-file" ,perl-digest-md5-file)
ceb62d54
BW
4721 ("perl-exception-class" ,perl-exception-class)
4722 ("perl-file-find-rule" ,perl-file-find-rule)
4723 ("perl-file-grep" ,perl-file-grep)
4724 ("perl-file-slurper" ,perl-file-slurper)
4725 ("perl-file-which" ,perl-file-which)
4726 ("perl-graph" ,perl-graph)
4727 ("perl-graph-readwrite" ,perl-graph-readwrite)
4728 ("perl-log-log4perl" ,perl-log-log4perl)
4729 ("perl-moose" ,perl-moose)
4730 ("perl-perlio-utf8_strict" ,perl-perlio-utf8_strict)
4731 ("perl-text-csv" ,perl-text-csv)
4732 ("bedtools" ,bedtools)
4733 ("cd-hit" ,cd-hit)
4734 ("blast+" ,blast+)
4735 ("mcl" ,mcl)
4736 ("parallel" ,parallel)
4737 ("prank" ,prank)
4738 ("mafft" ,mafft)
4739 ("fasttree" ,fasttree)
4740 ("grep" ,grep)
4741 ("sed" ,sed)
4742 ("gawk" ,gawk)
2d7c4ae3 4743 ("r-minimal" ,r-minimal)
ceb62d54
BW
4744 ("r-ggplot2" ,r-ggplot2)
4745 ("coreutils" ,coreutils)))
4746 (home-page "http://sanger-pathogens.github.io/Roary")
4747 (synopsis "High speed stand-alone pan genome pipeline")
4748 (description
4749 "Roary is a high speed stand alone pan genome pipeline, which takes
4750annotated assemblies in GFF3 format (produced by the Prokka program) and
4751calculates the pan genome. Using a standard desktop PC, it can analyse
4752datasets with thousands of samples, without compromising the quality of the
4753results. 128 samples can be analysed in under 1 hour using 1 GB of RAM and a
4754single processor. Roary is not intended for metagenomics or for comparing
4755extremely diverse sets of genomes.")
4756 (license license:gpl3)))
4757
608dd932
BW
4758(define-public raxml
4759 (package
4760 (name "raxml")
e9e0fab0 4761 (version "8.2.10")
608dd932
BW
4762 (source
4763 (origin
4764 (method url-fetch)
4765 (uri
4766 (string-append
4767 "https://github.com/stamatak/standard-RAxML/archive/v"
4768 version ".tar.gz"))
4769 (file-name (string-append name "-" version ".tar.gz"))
4770 (sha256
4771 (base32
e9e0fab0 4772 "13s7aspfdcfr6asynwdg1x6vznys6pzap5f8wsffbnnwpkkg9ya8"))))
608dd932
BW
4773 (build-system gnu-build-system)
4774 (arguments
4775 `(#:tests? #f ; There are no tests.
4776 ;; Use 'standard' Makefile rather than SSE or AVX ones.
4777 #:make-flags (list "-f" "Makefile.HYBRID.gcc")
4778 #:phases
4779 (modify-phases %standard-phases
4780 (delete 'configure)
4781 (replace 'install
4782 (lambda* (#:key outputs #:allow-other-keys)
4783 (let* ((out (assoc-ref outputs "out"))
4784 (bin (string-append out "/bin"))
4785 (executable "raxmlHPC-HYBRID"))
4786 (install-file executable bin)
4787 (symlink (string-append bin "/" executable) "raxml"))
4788 #t)))))
4789 (inputs
4790 `(("openmpi" ,openmpi)))
4791 (home-page "http://sco.h-its.org/exelixis/web/software/raxml/index.html")
4792 (synopsis "Randomized Axelerated Maximum Likelihood phylogenetic trees")
4793 (description
4794 "RAxML is a tool for phylogenetic analysis and post-analysis of large
4795phylogenies.")
2805f9b2
EF
4796 ;; The source includes x86 specific code
4797 (supported-systems '("x86_64-linux" "i686-linux"))
608dd932 4798 (license license:gpl2+)))
af860475 4799
66e3eff1
RW
4800(define-public rsem
4801 (package
4802 (name "rsem")
4803 (version "1.2.20")
4804 (source
4805 (origin
4806 (method url-fetch)
4807 (uri
4808 (string-append "http://deweylab.biostat.wisc.edu/rsem/src/rsem-"
4809 version ".tar.gz"))
4810 (sha256
4811 (base32 "0nzdc0j0hjllhsd5f2xli95dafm3nawskigs140xzvjk67xh0r9q"))
fc1adab1 4812 (patches (search-patches "rsem-makefile.patch"))
66e3eff1
RW
4813 (modules '((guix build utils)))
4814 (snippet
4815 '(begin
4816 ;; remove bundled copy of boost
4817 (delete-file-recursively "boost")
4818 #t))))
4819 (build-system gnu-build-system)
4820 (arguments
4821 `(#:tests? #f ;no "check" target
4822 #:phases
4823 (modify-phases %standard-phases
4824 ;; No "configure" script.
4825 ;; Do not build bundled samtools library.
4826 (replace 'configure
4827 (lambda _
4828 (substitute* "Makefile"
4829 (("^all : sam/libbam.a") "all : "))
4830 #t))
4831 (replace 'install
4832 (lambda* (#:key outputs #:allow-other-keys)
4833 (let* ((out (string-append (assoc-ref outputs "out")))
4834 (bin (string-append out "/bin/"))
4835 (perl (string-append out "/lib/perl5/site_perl")))
4836 (mkdir-p bin)
4837 (mkdir-p perl)
4838 (for-each (lambda (file)
f3860753 4839 (install-file file bin))
66e3eff1 4840 (find-files "." "rsem-.*"))
f3860753 4841 (install-file "rsem_perl_utils.pm" perl))
66e3eff1
RW
4842 #t))
4843 (add-after
4844 'install 'wrap-program
4845 (lambda* (#:key outputs #:allow-other-keys)
4846 (let ((out (assoc-ref outputs "out")))
4847 (for-each (lambda (prog)
4848 (wrap-program (string-append out "/bin/" prog)
4849 `("PERL5LIB" ":" prefix
4850 (,(string-append out "/lib/perl5/site_perl")))))
4851 '("rsem-plot-transcript-wiggles"
4852 "rsem-calculate-expression"
4853 "rsem-generate-ngvector"
4854 "rsem-run-ebseq"
4855 "rsem-prepare-reference")))
4856 #t)))))
4857 (inputs
4858 `(("boost" ,boost)
4859 ("ncurses" ,ncurses)
2d7c4ae3 4860 ("r-minimal" ,r-minimal)
66e3eff1
RW
4861 ("perl" ,perl)
4862 ("samtools" ,samtools-0.1)
4863 ("zlib" ,zlib)))
4864 (home-page "http://deweylab.biostat.wisc.edu/rsem/")
4865 (synopsis "Estimate gene expression levels from RNA-Seq data")
4866 (description
4867 "RSEM is a software package for estimating gene and isoform expression
4868levels from RNA-Seq data. The RSEM package provides a user-friendly
4869interface, supports threads for parallel computation of the EM algorithm,
4870single-end and paired-end read data, quality scores, variable-length reads and
4871RSPD estimation. In addition, it provides posterior mean and 95% credibility
4872interval estimates for expression levels. For visualization, it can generate
4873BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.")
4874 (license license:gpl3+)))
4875
8622a072
RW
4876(define-public rseqc
4877 (package
4878 (name "rseqc")
4879 (version "2.6.1")
4880 (source
4881 (origin
4882 (method url-fetch)
4883 (uri
4884 (string-append "mirror://sourceforge/rseqc/"
de67e922 4885 "RSeQC-" version ".tar.gz"))
8622a072 4886 (sha256
8214b7fb 4887 (base32 "15ly0254yi032qzkdplg00q144qfdsd986gh62829rl5bkxhj330"))
8622a072
RW
4888 (modules '((guix build utils)))
4889 (snippet
4890 '(begin
4891 ;; remove bundled copy of pysam
4892 (delete-file-recursively "lib/pysam")
4893 (substitute* "setup.py"
4894 ;; remove dependency on outdated "distribute" module
4895 (("^from distribute_setup import use_setuptools") "")
4896 (("^use_setuptools\\(\\)") "")
4897 ;; do not use bundled copy of pysam
6cbee49d
MW
4898 (("^have_pysam = False") "have_pysam = True"))
4899 #t))))
8622a072
RW
4900 (build-system python-build-system)
4901 (arguments `(#:python ,python-2))
4902 (inputs
4903 `(("python-cython" ,python2-cython)
4904 ("python-pysam" ,python2-pysam)
4905 ("python-numpy" ,python2-numpy)
8622a072
RW
4906 ("zlib" ,zlib)))
4907 (native-inputs
4908 `(("python-nose" ,python2-nose)))
4909 (home-page "http://rseqc.sourceforge.net/")
4910 (synopsis "RNA-seq quality control package")
4911 (description
4912 "RSeQC provides a number of modules that can comprehensively evaluate
4913high throughput sequence data, especially RNA-seq data. Some basic modules
4914inspect sequence quality, nucleotide composition bias, PCR bias and GC bias,
4915while RNA-seq specific modules evaluate sequencing saturation, mapped reads
4916distribution, coverage uniformity, strand specificity, etc.")
4917 (license license:gpl3+)))
4918
ec946638
RW
4919(define-public seek
4920 ;; There are no release tarballs. According to the installation
4921 ;; instructions at http://seek.princeton.edu/installation.jsp, the latest
4922 ;; stable release is identified by this changeset ID.
4923 (let ((changeset "2329130")
4924 (revision "1"))
4925 (package
4926 (name "seek")
4927 (version (string-append "0-" revision "." changeset))
4928 (source (origin
4929 (method hg-fetch)
4930 (uri (hg-reference
4931 (url "https://bitbucket.org/libsleipnir/sleipnir")
4932 (changeset changeset)))
51106724 4933 (file-name (string-append name "-" version "-checkout"))
ec946638
RW
4934 (sha256
4935 (base32
4936 "0qrvilwh18dpbhkf92qvxbmay0j75ra3jg2wrhz67gf538zzphsx"))))
4937 (build-system gnu-build-system)
4938 (arguments
4939 `(#:modules ((srfi srfi-1)
4940 (guix build gnu-build-system)
4941 (guix build utils))
4942 #:phases
4943 (let ((dirs '("SeekMiner"
4944 "SeekEvaluator"
4945 "SeekPrep"
4946 "Distancer"
4947 "Data2DB"
4948 "PCL2Bin")))
4949 (modify-phases %standard-phases
189be331 4950 (replace 'bootstrap
ec946638 4951 (lambda _
189be331 4952 (invoke "bash" "gen_auto")))
ec946638
RW
4953 (add-after 'build 'build-additional-tools
4954 (lambda* (#:key make-flags #:allow-other-keys)
4955 (every (lambda (dir)
4956 (with-directory-excursion (string-append "tools/" dir)
4957 (zero? (apply system* "make" make-flags))))
4958 dirs)))
4959 (add-after 'install 'install-additional-tools
4960 (lambda* (#:key make-flags #:allow-other-keys)
4961 (fold (lambda (dir result)
4962 (with-directory-excursion (string-append "tools/" dir)
4963 (and result
4964 (zero? (apply system*
4965 `("make" ,@make-flags "install"))))))
4966 #t dirs)))))))
4967 (inputs
4968 `(("gsl" ,gsl)
4969 ("boost" ,boost)
4970 ("libsvm" ,libsvm)
4971 ("readline" ,readline)
4972 ("gengetopt" ,gengetopt)
4973 ("log4cpp" ,log4cpp)))
4974 (native-inputs
4975 `(("autoconf" ,autoconf)
4976 ("automake" ,automake)
4977 ("perl" ,perl)))
4978 (home-page "http://seek.princeton.edu")
4979 (synopsis "Gene co-expression search engine")
4980 (description
4981 "SEEK is a computational gene co-expression search engine. SEEK provides
4982biologists with a way to navigate the massive human expression compendium that
4983now contains thousands of expression datasets. SEEK returns a robust ranking
4984of co-expressed genes in the biological area of interest defined by the user's
4985query genes. It also prioritizes thousands of expression datasets according
4986to the user's query of interest.")
4987 (license license:cc-by3.0))))
4988
4e10a221
RW
4989(define-public samtools
4990 (package
4991 (name "samtools")
c829c5ea 4992 (version "1.8")
4e10a221
RW
4993 (source
4994 (origin
4995 (method url-fetch)
4996 (uri
de67e922 4997 (string-append "mirror://sourceforge/samtools/samtools/"
4e10a221
RW
4998 version "/samtools-" version ".tar.bz2"))
4999 (sha256
5000 (base32
c829c5ea 5001 "05myg7bs90i68qbqab9cdg9rqj2xh39azibrx82ipzc5kcfvqhn9"))))
4e10a221
RW
5002 (build-system gnu-build-system)
5003 (arguments
c4473411 5004 `(#:modules ((ice-9 ftw)
5bdda30b
RW
5005 (ice-9 regex)
5006 (guix build gnu-build-system)
5007 (guix build utils))
c4473411 5008 #:make-flags (list (string-append "prefix=" (assoc-ref %outputs "out")))
4ab16440 5009 #:configure-flags (list "--with-ncurses" "--with-htslib=system")
4e10a221 5010 #:phases
c842d425
BW
5011 (modify-phases %standard-phases
5012 (add-after 'unpack 'patch-tests
5013 (lambda _
5014 (substitute* "test/test.pl"
5015 ;; The test script calls out to /bin/bash
5016 (("/bin/bash") (which "bash")))
b2d748ed 5017 #t))
c842d425
BW
5018 (add-after 'install 'install-library
5019 (lambda* (#:key outputs #:allow-other-keys)
5020 (let ((lib (string-append (assoc-ref outputs "out") "/lib")))
5021 (install-file "libbam.a" lib)
5022 #t)))
5023 (add-after 'install 'install-headers
5024 (lambda* (#:key outputs #:allow-other-keys)
5025 (let ((include (string-append (assoc-ref outputs "out")
5026 "/include/samtools/")))
5027 (for-each (lambda (file)
5028 (install-file file include))
5029 (scandir "." (lambda (name) (string-match "\\.h$" name))))
5030 #t))))))
4e10a221 5031 (native-inputs `(("pkg-config" ,pkg-config)))
4ab16440
BW
5032 (inputs
5033 `(("htslib" ,htslib)
5034 ("ncurses" ,ncurses)
5035 ("perl" ,perl)
5036 ("python" ,python)
5037 ("zlib" ,zlib)))
4e10a221
RW
5038 (home-page "http://samtools.sourceforge.net")
5039 (synopsis "Utilities to efficiently manipulate nucleotide sequence alignments")
5040 (description
5041 "Samtools implements various utilities for post-processing nucleotide
5042sequence alignments in the SAM, BAM, and CRAM formats, including indexing,
5043variant calling (in conjunction with bcftools), and a simple alignment
5044viewer.")
5045 (license license:expat)))
d3517eda 5046
0b84a0aa
RW
5047(define-public samtools-0.1
5048 ;; This is the most recent version of the 0.1 line of samtools. The input
5049 ;; and output formats differ greatly from that used and produced by samtools
5050 ;; 1.x and is still used in many bioinformatics pipelines.
5051 (package (inherit samtools)
5052 (version "0.1.19")
5053 (source
5054 (origin
5055 (method url-fetch)
5056 (uri
de67e922 5057 (string-append "mirror://sourceforge/samtools/samtools/"
0b84a0aa
RW
5058 version "/samtools-" version ".tar.bz2"))
5059 (sha256
5060 (base32 "1m33xsfwz0s8qi45lylagfllqg7fphf4dr0780rsvw75av9wk06h"))))
5061 (arguments
2309ed68
RW
5062 `(#:tests? #f ;no "check" target
5063 ,@(substitute-keyword-arguments (package-arguments samtools)
5064 ((#:make-flags flags)
5065 `(cons "LIBCURSES=-lncurses" ,flags))
5066 ((#:phases phases)
5067 `(modify-phases ,phases
5068 (replace 'install
5069 (lambda* (#:key outputs #:allow-other-keys)
5070 (let ((bin (string-append
5071 (assoc-ref outputs "out") "/bin")))
5072 (mkdir-p bin)
f3860753 5073 (install-file "samtools" bin)
b2d748ed 5074 #t)))
2309ed68
RW
5075 (delete 'patch-tests)
5076 (delete 'configure))))))))
0b84a0aa 5077
fe4c37c2 5078(define-public mosaik
698bd297 5079 (let ((commit "5c25216d3522d6a33e53875cd76a6d65001e4e67"))
fe4c37c2
RW
5080 (package
5081 (name "mosaik")
5082 (version "2.2.30")
5083 (source (origin
5084 ;; There are no release tarballs nor tags.
5085 (method git-fetch)
5086 (uri (git-reference
5087 (url "https://github.com/wanpinglee/MOSAIK.git")
5088 (commit commit)))
5089 (file-name (string-append name "-" version))
5090 (sha256
5091 (base32
5092 "17gj3s07cm77r41z92awh0bim7w7q7fbn0sf5nkqmcm1vw052qgw"))))
5093 (build-system gnu-build-system)
5094 (arguments
5095 `(#:tests? #f ; no tests
5096 #:make-flags (list "CC=gcc")
5097 #:phases
5098 (modify-phases %standard-phases
5099 (replace 'configure
5100 (lambda _ (chdir "src") #t))
5101 (replace 'install
5102 (lambda* (#:key outputs #:allow-other-keys)
5103 (let ((bin (string-append (assoc-ref outputs "out")
5104 "/bin")))
5105 (mkdir-p bin)
5106 (copy-recursively "../bin" bin)
5107 #t))))))
5108 (inputs
5109 `(("perl" ,perl)
5110 ("zlib" ,zlib)))
029d9f77 5111 (supported-systems '("x86_64-linux"))
0c6c9c00 5112 (home-page "https://github.com/wanpinglee/MOSAIK")
fe4c37c2
RW
5113 (synopsis "Map nucleotide sequence reads to reference genomes")
5114 (description
5115 "MOSAIK is a program for mapping second and third-generation sequencing
5116reads to a reference genome. MOSAIK can align reads generated by all the
5117major sequencing technologies, including Illumina, Applied Biosystems SOLiD,
5118Roche 454, Ion Torrent and Pacific BioSciences SMRT.")
5119 ;; MOSAIK is released under the GPLv2+ with the exception of third-party
5120 ;; code released into the public domain:
5121 ;; 1. fastlz by Ariya Hidayat - http://www.fastlz.org/
5122 ;; 2. MD5 implementation - RSA Data Security, RFC 1321
5123 (license (list license:gpl2+ license:public-domain)))))
5124
282c5087
RW
5125(define-public ngs-sdk
5126 (package
5127 (name "ngs-sdk")
6c4ccf32 5128 (version "1.3.0")
282c5087
RW
5129 (source
5130 (origin
5131 (method url-fetch)
5132 (uri
5133 (string-append "https://github.com/ncbi/ngs/archive/"
5134 version ".tar.gz"))
5135 (file-name (string-append name "-" version ".tar.gz"))
5136 (sha256
5137 (base32
6c4ccf32 5138 "1wiyf4c6nm2j87pv015cbi0qny5byf3pbvcw3likifz5dl56ag40"))))
282c5087
RW
5139 (build-system gnu-build-system)
5140 (arguments
5141 `(#:parallel-build? #f ; not supported
5142 #:tests? #f ; no "check" target
5143 #:phases
dc1d3cde
KK
5144 (modify-phases %standard-phases
5145 (replace 'configure
5146 (lambda* (#:key outputs #:allow-other-keys)
5147 (let ((out (assoc-ref outputs "out")))
5148 ;; Allow 'konfigure.perl' to find 'package.prl'.
5149 (setenv "PERL5LIB"
5150 (string-append ".:" (getenv "PERL5LIB")))
5151
5152 ;; The 'configure' script doesn't recognize things like
5153 ;; '--enable-fast-install'.
5154 (zero? (system* "./configure"
5155 (string-append "--build-prefix=" (getcwd) "/build")
5156 (string-append "--prefix=" out))))))
5157 (add-after 'unpack 'enter-dir
5158 (lambda _ (chdir "ngs-sdk") #t)))))
282c5087 5159 (native-inputs `(("perl" ,perl)))
a0dadf0c
AE
5160 ;; According to the test
5161 ;; unless ($MARCH =~ /x86_64/i || $MARCH =~ /i?86/i)
5162 ;; in ngs-sdk/setup/konfigure.perl
ab29be81 5163 (supported-systems '("i686-linux" "x86_64-linux"))
282c5087
RW
5164 (home-page "https://github.com/ncbi/ngs")
5165 (synopsis "API for accessing Next Generation Sequencing data")
5166 (description
5167 "NGS is a domain-specific API for accessing reads, alignments and pileups
5168produced from Next Generation Sequencing. The API itself is independent from
5169any particular back-end implementation, and supports use of multiple back-ends
5170simultaneously.")
5171 (license license:public-domain)))
5172
1ad15c16 5173(define-public java-ngs
2651a5e6 5174 (package (inherit ngs-sdk)
1ad15c16 5175 (name "java-ngs")
2651a5e6
RW
5176 (arguments
5177 `(,@(substitute-keyword-arguments
5178 `(#:modules ((guix build gnu-build-system)
5179 (guix build utils)
5180 (srfi srfi-1)
5181 (srfi srfi-26))
5182 ,@(package-arguments ngs-sdk))
5183 ((#:phases phases)
614a8977
RW
5184 `(modify-phases ,phases
5185 (replace 'enter-dir (lambda _ (chdir "ngs-java") #t)))))))
2651a5e6 5186 (inputs
d2540f80 5187 `(("jdk" ,icedtea "jdk")
2651a5e6
RW
5188 ("ngs-sdk" ,ngs-sdk)))
5189 (synopsis "Java bindings for NGS SDK")))
5190
75dd2424
RW
5191(define-public ncbi-vdb
5192 (package
5193 (name "ncbi-vdb")
5021f547 5194 (version "2.8.2")
75dd2424
RW
5195 (source
5196 (origin
5197 (method url-fetch)
5198 (uri
5199 (string-append "https://github.com/ncbi/ncbi-vdb/archive/"
5200 version ".tar.gz"))
5201 (file-name (string-append name "-" version ".tar.gz"))
5202 (sha256
5203 (base32
5021f547 5204 "1acn4bv81mfl137qnbn9995mjjhwd36pm0b7qli1iw5skrxa9j8m"))))
75dd2424
RW
5205 (build-system gnu-build-system)
5206 (arguments
5207 `(#:parallel-build? #f ; not supported
5208 #:tests? #f ; no "check" target
5209 #:phases
70111198 5210 (modify-phases %standard-phases
0691851a
BW
5211 (add-before 'configure 'set-perl-search-path
5212 (lambda _
5213 ;; Work around "dotless @INC" build failure.
5214 (setenv "PERL5LIB"
5215 (string-append (getcwd) "/setup:"
5216 (getenv "PERL5LIB")))
5217 #t))
70111198
RW
5218 (replace 'configure
5219 (lambda* (#:key inputs outputs #:allow-other-keys)
5220 (let ((out (assoc-ref outputs "out")))
5221 ;; Override include path for libmagic
5222 (substitute* "setup/package.prl"
5223 (("name => 'magic', Include => '/usr/include'")
5224 (string-append "name=> 'magic', Include => '"
5225 (assoc-ref inputs "libmagic")
5226 "/include" "'")))
75dd2424 5227
70111198
RW
5228 ;; Install kdf5 library (needed by sra-tools)
5229 (substitute* "build/Makefile.install"
5230 (("LIBRARIES_TO_INSTALL =")
5231 "LIBRARIES_TO_INSTALL = kdf5.$(VERSION_LIBX) kdf5.$(VERSION_SHLX)"))
75dd2424 5232
70111198
RW
5233 (substitute* "build/Makefile.env"
5234 (("CFLAGS =" prefix)
5235 (string-append prefix "-msse2 ")))
675d7ae2 5236
558e2307
RW
5237 ;; Override search path for ngs-java
5238 (substitute* "setup/package.prl"
5239 (("/usr/local/ngs/ngs-java")
5240 (assoc-ref inputs "java-ngs")))
5241
70111198
RW
5242 ;; The 'configure' script doesn't recognize things like
5243 ;; '--enable-fast-install'.
5244 (zero? (system*
5245 "./configure"
5246 (string-append "--build-prefix=" (getcwd) "/build")
5247 (string-append "--prefix=" (assoc-ref outputs "out"))
5248 (string-append "--debug")
5249 (string-append "--with-xml2-prefix="
5250 (assoc-ref inputs "libxml2"))
5251 (string-append "--with-ngs-sdk-prefix="
5252 (assoc-ref inputs "ngs-sdk"))
70111198
RW
5253 (string-append "--with-hdf5-prefix="
5254 (assoc-ref inputs "hdf5")))))))
5255 (add-after 'install 'install-interfaces
5256 (lambda* (#:key outputs #:allow-other-keys)
5257 ;; Install interface libraries. On i686 the interface libraries
5258 ;; are installed to "linux/gcc/i386", so we need to use the Linux
5259 ;; architecture name ("i386") instead of the target system prefix
5260 ;; ("i686").
5261 (mkdir (string-append (assoc-ref outputs "out") "/ilib"))
5262 (copy-recursively (string-append "build/ncbi-vdb/linux/gcc/"
5263 ,(system->linux-architecture
5264 (or (%current-target-system)
5265 (%current-system)))
5266 "/rel/ilib")
5267 (string-append (assoc-ref outputs "out")
5268 "/ilib"))
5269 ;; Install interface headers
5270 (copy-recursively "interfaces"
5271 (string-append (assoc-ref outputs "out")
5272 "/include"))
d5e17162
RW
5273 #t))
5274 ;; These files are needed by sra-tools.
5275 (add-after 'install 'install-configuration-files
5276 (lambda* (#:key outputs #:allow-other-keys)
5277 (let ((target (string-append (assoc-ref outputs "out") "/kfg")))
5278 (mkdir target)
5279 (install-file "libs/kfg/default.kfg" target)
5280 (install-file "libs/kfg/certs.kfg" target))
70111198 5281 #t)))))
75dd2424
RW
5282 (inputs
5283 `(("libxml2" ,libxml2)
5284 ("ngs-sdk" ,ngs-sdk)
1ad15c16 5285 ("java-ngs" ,java-ngs)
75dd2424
RW
5286 ("libmagic" ,file)
5287 ("hdf5" ,hdf5)))
5288 (native-inputs `(("perl" ,perl)))
675d7ae2
RW
5289 ;; NCBI-VDB requires SSE capability.
5290 (supported-systems '("i686-linux" "x86_64-linux"))
75dd2424
RW
5291 (home-page "https://github.com/ncbi/ncbi-vdb")
5292 (synopsis "Database engine for genetic information")
5293 (description
5294 "The NCBI-VDB library implements a highly compressed columnar data
5295warehousing engine that is most often used to store genetic information.
5296Databases are stored in a portable image within the file system, and can be
5297accessed/downloaded on demand across HTTP.")
5298 (license license:public-domain)))
5299
cc6ed477
RW
5300(define-public plink
5301 (package
5302 (name "plink")
5303 (version "1.07")
5304 (source
5305 (origin
5306 (method url-fetch)
5307 (uri (string-append
5308 "http://pngu.mgh.harvard.edu/~purcell/plink/dist/plink-"
5309 version "-src.zip"))
5310 (sha256
5311 (base32 "0as8gxm4pjyc8dxmm1sl873rrd7wn5qs0l29nqfnl31x8i467xaa"))
0dbb7ac2
EF
5312 (patches (search-patches "plink-1.07-unclobber-i.patch"
5313 "plink-endian-detection.patch"))))
cc6ed477
RW
5314 (build-system gnu-build-system)
5315 (arguments
5316 '(#:tests? #f ;no "check" target
5317 #:make-flags (list (string-append "LIB_LAPACK="
5318 (assoc-ref %build-inputs "lapack")
5319 "/lib/liblapack.so")
5320 "WITH_LAPACK=1"
5321 "FORCE_DYNAMIC=1"
5322 ;; disable phoning home
5323 "WITH_WEBCHECK=")
5324 #:phases
5325 (modify-phases %standard-phases
5326 ;; no "configure" script
5327 (delete 'configure)
5328 (replace 'install
5329 (lambda* (#:key outputs #:allow-other-keys)
5330 (let ((bin (string-append (assoc-ref outputs "out")
5331 "/bin/")))
96c46210 5332 (install-file "plink" bin)
cc6ed477
RW
5333 #t))))))
5334 (inputs
5335 `(("zlib" ,zlib)
5336 ("lapack" ,lapack)))
5337 (native-inputs
5338 `(("unzip" ,unzip)))
5339 (home-page "http://pngu.mgh.harvard.edu/~purcell/plink/")
5340 (synopsis "Whole genome association analysis toolset")
5341 (description
5342 "PLINK is a whole genome association analysis toolset, designed to
5343perform a range of basic, large-scale analyses in a computationally efficient
5344manner. The focus of PLINK is purely on analysis of genotype/phenotype data,
5345so there is no support for steps prior to this (e.g. study design and
5346planning, generating genotype or CNV calls from raw data). Through
5347integration with gPLINK and Haploview, there is some support for the
5348subsequent visualization, annotation and storage of results.")
5349 ;; Code is released under GPLv2, except for fisher.h, which is under
5350 ;; LGPLv2.1+
5351 (license (list license:gpl2 license:lgpl2.1+))))
5352
dad66da4
RW
5353(define-public plink-ng
5354 (package (inherit plink)
5355 (name "plink-ng")
5356 (version "1.90b4")
5357 (source
5358 (origin
5359 (method url-fetch)
5360 (uri (string-append "https://github.com/chrchang/plink-ng/archive/v"
5361 version ".tar.gz"))
5362 (file-name (string-append name "-" version ".tar.gz"))
5363 (sha256
5364 (base32 "09ixrds009aczjswxr2alcb774mksq5g0v78dgjjn1h4dky0kf9a"))))
5365 (build-system gnu-build-system)
5366 (arguments
5367 '(#:tests? #f ;no "check" target
5368 #:make-flags (list "BLASFLAGS=-llapack -lopenblas"
5369 "CFLAGS=-Wall -O2 -DDYNAMIC_ZLIB=1"
5370 "ZLIB=-lz"
5371 "-f" "Makefile.std")
5372 #:phases
5373 (modify-phases %standard-phases
5374 (add-after 'unpack 'chdir
5375 (lambda _ (chdir "1.9") #t))
5376 (delete 'configure) ; no "configure" script
5377 (replace 'install
5378 (lambda* (#:key outputs #:allow-other-keys)
5379 (let ((bin (string-append (assoc-ref outputs "out")
5380 "/bin/")))
5381 (install-file "plink" bin)
5382 #t))))))
5383 (inputs
5384 `(("zlib" ,zlib)
5385 ("lapack" ,lapack)
5386 ("openblas" ,openblas)))
5387 (home-page "https://www.cog-genomics.org/plink/")
5388 (license license:gpl3+)))
5389
c6a24d6e
RW
5390(define-public smithlab-cpp
5391 (let ((revision "1")
698bd297 5392 (commit "728a097bec88c6f4b8528b685932049e660eff2e"))
c6a24d6e
RW
5393 (package
5394 (name "smithlab-cpp")
698bd297 5395 (version (string-append "0." revision "." (string-take commit 7)))
c6a24d6e
RW
5396 (source (origin
5397 (method git-fetch)
5398 (uri (git-reference
5399 (url "https://github.com/smithlabcode/smithlab_cpp.git")
5400 (commit commit)))
5401 (file-name (string-append name "-" version "-checkout"))
5402 (sha256
5403 (base32
5404 "0d476lmj312xk77kr9fzrv7z1bv96yfyx0w7y62ycmnfbx32ll74"))))
5405 (build-system gnu-build-system)
5406 (arguments
5407 `(#:modules ((guix build gnu-build-system)
5408 (guix build utils)
5409 (srfi srfi-26))
5410 #:tests? #f ;no "check" target
5411 #:phases
5412 (modify-phases %standard-phases
5413 (add-after 'unpack 'use-samtools-headers
5414 (lambda _
5415 (substitute* '("SAM.cpp"
5416 "SAM.hpp")
5417 (("sam.h") "samtools/sam.h"))
5418 #t))
5419 (replace 'install
5420 (lambda* (#:key outputs #:allow-other-keys)
5421 (let* ((out (assoc-ref outputs "out"))
5422 (lib (string-append out "/lib"))
5423 (include (string-append out "/include/smithlab-cpp")))
5424 (mkdir-p lib)
5425 (mkdir-p include)
5426 (for-each (cut install-file <> lib)
5427 (find-files "." "\\.o$"))
5428 (for-each (cut install-file <> include)
5429 (find-files "." "\\.hpp$")))
5430 #t))
5431 (delete 'configure))))
5432 (inputs
5433 `(("samtools" ,samtools-0.1)
5434 ("zlib" ,zlib)))
5435 (home-page "https://github.com/smithlabcode/smithlab_cpp")
5436 (synopsis "C++ helper library for functions used in Smith lab projects")
5437 (description
5438 "Smithlab CPP is a C++ library that includes functions used in many of
5439the Smith lab bioinformatics projects, such as a wrapper around Samtools data
5440structures, classes for genomic regions, mapped sequencing reads, etc.")
5441 (license license:gpl3+))))
5442
56e373ef
RW
5443(define-public preseq
5444 (package
5445 (name "preseq")
b49c5a58 5446 (version "2.0")
56e373ef
RW
5447 (source (origin
5448 (method url-fetch)
b49c5a58
RW
5449 (uri (string-append "https://github.com/smithlabcode/"
5450 "preseq/archive/v" version ".tar.gz"))
5451 (file-name (string-append name "-" version ".tar.gz"))
56e373ef 5452 (sha256
b49c5a58 5453 (base32 "08r684l50pnxjpvmhzjgqq56yv9rfw90k8vx0nsrnrzk8mf9hsdq"))
56e373ef 5454 (modules '((guix build utils)))
6cbee49d
MW
5455 (snippet '(begin
5456 ;; Remove bundled samtools.
5457 (delete-file-recursively "samtools")
5458 #t))))
56e373ef
RW
5459 (build-system gnu-build-system)
5460 (arguments
5461 `(#:tests? #f ;no "check" target
5462 #:phases
5463 (modify-phases %standard-phases
56e373ef 5464 (delete 'configure))
b49c5a58
RW
5465 #:make-flags
5466 (list (string-append "PREFIX="
5467 (assoc-ref %outputs "out"))
5468 (string-append "LIBBAM="
5469 (assoc-ref %build-inputs "samtools")
5470 "/lib/libbam.a")
5471 (string-append "SMITHLAB_CPP="
5472 (assoc-ref %build-inputs "smithlab-cpp")
5473 "/lib")
5474 "PROGS=preseq"
5475 "INCLUDEDIRS=$(SMITHLAB_CPP)/../include/smithlab-cpp $(SAMTOOLS_DIR)")))
56e373ef
RW
5476 (inputs
5477 `(("gsl" ,gsl)
5478 ("samtools" ,samtools-0.1)
b49c5a58 5479 ("smithlab-cpp" ,smithlab-cpp)
56e373ef
RW
5480 ("zlib" ,zlib)))
5481 (home-page "http://smithlabresearch.org/software/preseq/")
5482 (synopsis "Program for analyzing library complexity")
5483 (description
5484 "The preseq package is aimed at predicting and estimating the complexity
5485of a genomic sequencing library, equivalent to predicting and estimating the
5486number of redundant reads from a given sequencing depth and how many will be
5487expected from additional sequencing using an initial sequencing experiment.
5488The estimates can then be used to examine the utility of further sequencing,
5489optimize the sequencing depth, or to screen multiple libraries to avoid low
5490complexity samples.")
5491 (license license:gpl3+)))
5492
9ded1457
BW
5493(define-public python-screed
5494 (package
5495 (name "python-screed")
5496 (version "0.9")
5497 (source
5498 (origin
5499 (method url-fetch)
5500 (uri (pypi-uri "screed" version))
5501 (sha256
5502 (base32
5503 "18czszp9fkx3j6jr7y5kp6dfialscgddk05mw1zkhh2zhn0jd8i0"))))
5504 (build-system python-build-system)
5505 (arguments
5506 `(#:phases
5507 (modify-phases %standard-phases
5508 (replace 'check
5509 (lambda _
5510 (setenv "PYTHONPATH"
5511 (string-append (getenv "PYTHONPATH") ":."))
5512 (zero? (system* "nosetests" "--attr" "!known_failing")))))))
5513 (native-inputs
5514 `(("python-nose" ,python-nose)))
5515 (inputs
5516 `(("python-bz2file" ,python-bz2file)))
7bf837fd 5517 (home-page "https://github.com/dib-lab/screed/")
9ded1457
BW
5518 (synopsis "Short read sequence database utilities")
5519 (description "Screed parses FASTA and FASTQ files and generates databases.
5520Values such as sequence name, sequence description, sequence quality and the
5521sequence itself can be retrieved from these databases.")
5522 (license license:bsd-3)))
5523
5524(define-public python2-screed
5c31f4aa 5525 (package-with-python2 python-screed))
9ded1457 5526
51c64999
RW
5527(define-public sra-tools
5528 (package
5529 (name "sra-tools")
79849358 5530 (version "2.8.2-1")
51c64999
RW
5531 (source
5532 (origin
5533 (method url-fetch)
5534 (uri
5535 (string-append "https://github.com/ncbi/sra-tools/archive/"
5536 version ".tar.gz"))
5537 (file-name (string-append name "-" version ".tar.gz"))
5538 (sha256
5539 (base32
79849358 5540 "1camsijmvv2s45mb4iyf44ghl4gkd4rl0viphpcgl3ccchy32a0g"))))
51c64999
RW
5541 (build-system gnu-build-system)
5542 (arguments
5543 `(#:parallel-build? #f ; not supported
5544 #:tests? #f ; no "check" target
2320e76b 5545 #:make-flags
79849358
RW
5546 (list (string-append "DEFAULT_CRT="
5547 (assoc-ref %build-inputs "ncbi-vdb")
5548 "/kfg/certs.kfg")
5549 (string-append "DEFAULT_KFG="
5550 (assoc-ref %build-inputs "ncbi-vdb")
5551 "/kfg/default.kfg")
5552 (string-append "VDB_LIBDIR="
2320e76b
RW
5553 (assoc-ref %build-inputs "ncbi-vdb")
5554 ,(if (string-prefix? "x86_64"
5555 (or (%current-target-system)
5556 (%current-system)))
5557 "/lib64"
5558 "/lib32")))
51c64999 5559 #:phases
beebe431 5560 (modify-phases %standard-phases
0691851a
BW
5561 (add-before 'configure 'set-perl-search-path
5562 (lambda _
5563 ;; Work around "dotless @INC" build failure.
5564 (setenv "PERL5LIB"
5565 (string-append (getcwd) "/setup:"
5566 (getenv "PERL5LIB")))
5567 #t))
beebe431
RW
5568 (replace 'configure
5569 (lambda* (#:key inputs outputs #:allow-other-keys)
5570 ;; The build system expects a directory containing the sources and
5571 ;; raw build output of ncbi-vdb, including files that are not
5572 ;; installed. Since we are building against an installed version of
5573 ;; ncbi-vdb, the following modifications are needed.
5574 (substitute* "setup/konfigure.perl"
5575 ;; Make the configure script look for the "ilib" directory of
5576 ;; "ncbi-vdb" without first checking for the existence of a
5577 ;; matching library in its "lib" directory.
5578 (("^ my \\$f = File::Spec->catdir\\(\\$libdir, \\$lib\\);")
5579 "my $f = File::Spec->catdir($ilibdir, $ilib);")
5580 ;; Look for interface libraries in ncbi-vdb's "ilib" directory.
5581 (("my \\$ilibdir = File::Spec->catdir\\(\\$builddir, 'ilib'\\);")
5582 "my $ilibdir = File::Spec->catdir($dir, 'ilib');"))
51c64999 5583
beebe431
RW
5584 ;; Dynamic linking
5585 (substitute* "tools/copycat/Makefile"
5586 (("smagic-static") "lmagic"))
2320e76b 5587
beebe431
RW
5588 ;; The 'configure' script doesn't recognize things like
5589 ;; '--enable-fast-install'.
5590 (zero? (system*
5591 "./configure"
5592 (string-append "--build-prefix=" (getcwd) "/build")
5593 (string-append "--prefix=" (assoc-ref outputs "out"))
5594 (string-append "--debug")
5595 (string-append "--with-fuse-prefix="
5596 (assoc-ref inputs "fuse"))
5597 (string-append "--with-magic-prefix="
5598 (assoc-ref inputs "libmagic"))
5599 ;; TODO: building with libxml2 fails with linker errors
5600 ;; (string-append "--with-xml2-prefix="
5601 ;; (assoc-ref inputs "libxml2"))
5602 (string-append "--with-ncbi-vdb-sources="
5603 (assoc-ref inputs "ncbi-vdb"))
5604 (string-append "--with-ncbi-vdb-build="
5605 (assoc-ref inputs "ncbi-vdb"))
5606 (string-append "--with-ngs-sdk-prefix="
5607 (assoc-ref inputs "ngs-sdk"))
5608 (string-append "--with-hdf5-prefix="
1108a920
RW
5609 (assoc-ref inputs "hdf5"))))))
5610 ;; This version of sra-tools fails to build with glibc because of a
5611 ;; naming conflict. glibc-2.25/include/bits/mathcalls.h already
5612 ;; contains a definition of "canonicalize", so we rename it.
5613 ;;
5614 ;; See upstream bug report:
5615 ;; https://github.com/ncbi/sra-tools/issues/67
5616 (add-after 'unpack 'patch-away-glibc-conflict
5617 (lambda _
5618 (substitute* "tools/bam-loader/bam.c"
5619 (("canonicalize\\(" line)
5620 (string-append "sra_tools_" line)))
5621 #t)))))
51c64999
RW
5622 (native-inputs `(("perl" ,perl)))
5623 (inputs
5624 `(("ngs-sdk" ,ngs-sdk)
5625 ("ncbi-vdb" ,ncbi-vdb)
5626 ("libmagic" ,file)
5627 ("fuse" ,fuse)
5628 ("hdf5" ,hdf5)
5629 ("zlib" ,zlib)))
5630 (home-page "http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software")
5631 (synopsis "Tools and libraries for reading and writing sequencing data")
5632 (description
5633 "The SRA Toolkit from NCBI is a collection of tools and libraries for
5634reading of sequencing files from the Sequence Read Archive (SRA) database and
5635writing files into the .sra format.")
5636 (license license:public-domain)))
5637
d3517eda
RW
5638(define-public seqan
5639 (package
5640 (name "seqan")
5641 (version "1.4.2")
5642 (source (origin
5643 (method url-fetch)
5644 (uri (string-append "http://packages.seqan.de/seqan-library/"
5645 "seqan-library-" version ".tar.bz2"))
5646 (sha256
5647 (base32
5648 "05s3wrrwn50f81aklfm65i4a749zag1vr8z03k21xm0pdxy47yvp"))))
5649 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
5650 ;; makes sense to split the outputs.
5651 (outputs '("out" "doc"))
5652 (build-system trivial-build-system)
5653 (arguments
5654 `(#:modules ((guix build utils))
5655 #:builder
5656 (begin
5657 (use-modules (guix build utils))
5658 (let ((tar (assoc-ref %build-inputs "tar"))
5659 (bzip (assoc-ref %build-inputs "bzip2"))
5660 (out (assoc-ref %outputs "out"))
5661 (doc (assoc-ref %outputs "doc")))
5662 (setenv "PATH" (string-append tar "/bin:" bzip "/bin"))
e3cfef22 5663 (invoke "tar" "xvf" (assoc-ref %build-inputs "source"))
d3517eda
RW
5664 (chdir (string-append "seqan-library-" ,version))
5665 (copy-recursively "include" (string-append out "/include"))
e3cfef22
MW
5666 (copy-recursively "share" (string-append doc "/share"))
5667 #t))))
d3517eda
RW
5668 (native-inputs
5669 `(("source" ,source)
5670 ("tar" ,tar)
5671 ("bzip2" ,bzip2)))
5672 (home-page "http://www.seqan.de")
5673 (synopsis "Library for nucleotide sequence analysis")
5674 (description
5675 "SeqAn is a C++ library of efficient algorithms and data structures for
5676the analysis of sequences with the focus on biological data. It contains
5677algorithms and data structures for string representation and their
5678manipulation, online and indexed string search, efficient I/O of
5679bioinformatics file formats, sequence alignment, and more.")
5680 (license license:bsd-3)))
ce7155d5 5681
d708b7a9
BW
5682(define-public seqmagick
5683 (package
5684 (name "seqmagick")
39fb853a 5685 (version "0.7.0")
d708b7a9
BW
5686 (source
5687 (origin
5688 (method url-fetch)
f6e2d86f 5689 (uri (pypi-uri "seqmagick" version))
d708b7a9
BW
5690 (sha256
5691 (base32
39fb853a 5692 "12bfyp8nqi0hd36rmj450aygafp01qy3hkbvlwn3bk39pyjjkgg5"))))
d708b7a9 5693 (build-system python-build-system)
d708b7a9 5694 (inputs
39fb853a 5695 `(("python-biopython" ,python-biopython)))
d708b7a9 5696 (native-inputs
39fb853a 5697 `(("python-nose" ,python-nose)))
7bf837fd 5698 (home-page "https://github.com/fhcrc/seqmagick")
d708b7a9
BW
5699 (synopsis "Tools for converting and modifying sequence files")
5700 (description
5701 "Bioinformaticians often have to convert sequence files between formats
5702and do little manipulations on them, and it's not worth writing scripts for
5703that. Seqmagick is a utility to expose the file format conversion in
5704BioPython in a convenient way. Instead of having a big mess of scripts, there
5705is one that takes arguments.")
5706 (license license:gpl3)))
5707
66daf78c
BW
5708(define-public seqtk
5709 (package
5710 (name "seqtk")
5711 (version "1.2")
5712 (source (origin
5713 (method url-fetch)
5714 (uri (string-append
5715 "https://github.com/lh3/seqtk/archive/v"
5716 version ".tar.gz"))
5717 (file-name (string-append name "-" version ".tar.gz"))
5718 (sha256
5719 (base32
5720 "0ywdyzpmfiz2wp6ampbzqg4y8bj450nfgqarpamg045b8mk32lxx"))
5721 (modules '((guix build utils)))
5722 (snippet
5723 '(begin
5724 ;; Remove extraneous header files, as is done in the seqtk
5725 ;; master branch.
5726 (for-each (lambda (file) (delete-file file))
5727 (list "ksort.h" "kstring.h" "kvec.h"))
5728 #t))))
5729 (build-system gnu-build-system)
5730 (arguments
5731 `(#:phases
5732 (modify-phases %standard-phases
5733 (delete 'configure)
5734 (replace 'check
5735 ;; There are no tests, so we just run a sanity check.
5736 (lambda _ (zero? (system* "./seqtk" "seq"))))
5737 (replace 'install
5738 (lambda* (#:key outputs #:allow-other-keys)
5739 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
5740 (install-file "seqtk" bin)))))))
5741 (inputs
5742 `(("zlib" ,zlib)))
5743 (home-page "https://github.com/lh3/seqtk")
5744 (synopsis "Toolkit for processing biological sequences in FASTA/Q format")
5745 (description
5746 "Seqtk is a fast and lightweight tool for processing sequences in the
5747FASTA or FASTQ format. It parses both FASTA and FASTQ files which can be
5748optionally compressed by gzip.")
5749 (license license:expat)))
5750
5f7e17be
BW
5751(define-public snap-aligner
5752 (package
5753 (name "snap-aligner")
5754 (version "1.0beta.18")
5755 (source (origin
5756 (method url-fetch)
5757 (uri (string-append
5758 "https://github.com/amplab/snap/archive/v"
5759 version ".tar.gz"))
5760 (file-name (string-append name "-" version ".tar.gz"))
5761 (sha256
5762 (base32
5763 "1vnsjwv007k1fl1q7d681kbwn6bc66cgw6h16hym6gvyy71qv2ly"))))
5764 (build-system gnu-build-system)
5765 (arguments
5766 '(#:phases
5767 (modify-phases %standard-phases
5768 (delete 'configure)
5769 (replace 'check (lambda _ (zero? (system* "./unit_tests"))))
5770 (replace 'install
5771 (lambda* (#:key outputs #:allow-other-keys)
5772 (let* ((out (assoc-ref outputs "out"))
5773 (bin (string-append out "/bin")))
5f7e17be
BW
5774 (install-file "snap-aligner" bin)
5775 (install-file "SNAPCommand" bin)
5776 #t))))))
5777 (native-inputs
5778 `(("zlib" ,zlib)))
5779 (home-page "http://snap.cs.berkeley.edu/")
5780 (synopsis "Short read DNA sequence aligner")
5781 (description
5782 "SNAP is a fast and accurate aligner for short DNA reads. It is
5783optimized for modern read lengths of 100 bases or higher, and takes advantage
5784of these reads to align data quickly through a hash-based indexing scheme.")
3e6fdd5f
EF
5785 ;; 32-bit systems are not supported by the unpatched code.
5786 ;; Following the bug reports https://github.com/amplab/snap/issues/68 and
5787 ;; https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=812378 we see that
5788 ;; systems without a lot of memory cannot make good use of this program.
5789 (supported-systems '("x86_64-linux"))
5f7e17be
BW
5790 (license license:asl2.0)))
5791
bcadaf00
BW
5792(define-public sortmerna
5793 (package
5794 (name "sortmerna")
849485f5 5795 (version "2.1b")
bcadaf00
BW
5796 (source
5797 (origin
5798 (method url-fetch)
5799 (uri (string-append
5800 "https://github.com/biocore/sortmerna/archive/"
5801 version ".tar.gz"))
5802 (file-name (string-append name "-" version ".tar.gz"))
5803 (sha256
5804 (base32
849485f5 5805 "1ghaghvd82af9j5adavxh77g7hm247d1r69m3fbi6f1jdivj5ldk"))))
bcadaf00
BW
5806 (build-system gnu-build-system)
5807 (outputs '("out" ;for binaries
5808 "db")) ;for sequence databases
5809 (arguments
5810 `(#:phases
5811 (modify-phases %standard-phases
5812 (replace 'install
5813 (lambda* (#:key outputs #:allow-other-keys)
5814 (let* ((out (assoc-ref outputs "out"))
5815 (bin (string-append out "/bin"))
5816 (db (assoc-ref outputs "db"))
5817 (share
5818 (string-append db "/share/sortmerna/rRNA_databases")))
5819 (install-file "sortmerna" bin)
5820 (install-file "indexdb_rna" bin)
5821 (for-each (lambda (file)
5822 (install-file file share))
5823 (find-files "rRNA_databases" ".*fasta"))
5824 #t))))))
849485f5
BW
5825 (inputs
5826 `(("zlib" ,zlib)))
bcadaf00
BW
5827 (home-page "http://bioinfo.lifl.fr/RNA/sortmerna")
5828 (synopsis "Biological sequence analysis tool for NGS reads")
5829 (description
5830 "SortMeRNA is a biological sequence analysis tool for filtering, mapping
5831and operational taxonomic unit (OTU) picking of next generation
5832sequencing (NGS) reads. The core algorithm is based on approximate seeds and
5833allows for fast and sensitive analyses of nucleotide sequences. The main
5834application of SortMeRNA is filtering rRNA from metatranscriptomic data.")
2324541b
EF
5835 ;; The source includes x86 specific code
5836 (supported-systems '("x86_64-linux" "i686-linux"))
bcadaf00
BW
5837 (license license:lgpl3)))
5838
ce7155d5
RW
5839(define-public star
5840 (package
5841 (name "star")
6d095c4c 5842 (version "2.6.0c")
ce7155d5 5843 (source (origin
6d095c4c
RW
5844 (method git-fetch)
5845 (uri (git-reference
5846 (url "https://github.com/alexdobin/STAR.git")
5847 (commit version)))
5848 (file-name (string-append name "-" version "-checkout"))
ce7155d5
RW
5849 (sha256
5850 (base32
6d095c4c 5851 "04cj6jw8d9q6lk9c78wa4fky6jdlicf1d13plq7182h8vqiz8p59"))
ce7155d5
RW
5852 (modules '((guix build utils)))
5853 (snippet
3062d750
RW
5854 '(begin
5855 (substitute* "source/Makefile"
5856 (("/bin/rm") "rm"))
5857 ;; Remove pre-built binaries and bundled htslib sources.
5858 (delete-file-recursively "bin/MacOSX_x86_64")
5859 (delete-file-recursively "bin/Linux_x86_64")
c9e9154e 5860 (delete-file-recursively "bin/Linux_x86_64_static")
3062d750
RW
5861 (delete-file-recursively "source/htslib")
5862 #t))))
ce7155d5
RW
5863 (build-system gnu-build-system)
5864 (arguments
5865 '(#:tests? #f ;no check target
5866 #:make-flags '("STAR")
5867 #:phases
c0266e8d
RW
5868 (modify-phases %standard-phases
5869 (add-after 'unpack 'enter-source-dir
5870 (lambda _ (chdir "source") #t))
625cdc3f
RW
5871 (add-after 'enter-source-dir 'make-reproducible
5872 (lambda _
5873 (substitute* "Makefile"
5874 (("(COMPILATION_TIME_PLACE=\")(.*)(\")" _ pre mid post)
5875 (string-append pre "Built with Guix" post)))))
3062d750
RW
5876 (add-after 'enter-source-dir 'do-not-use-bundled-htslib
5877 (lambda _
5878 (substitute* "Makefile"
5879 (("(Depend.list: \\$\\(SOURCES\\) parametersDefault\\.xxd) htslib"
5880 _ prefix) prefix))
5881 (substitute* '("BAMfunctions.cpp"
5882 "signalFromBAM.h"
5883 "bam_cat.h"
5884 "bam_cat.c"
5885 "STAR.cpp"
5886 "bamRemoveDuplicates.cpp")
5887 (("#include \"htslib/([^\"]+\\.h)\"" _ header)
5888 (string-append "#include <" header ">")))
5889 (substitute* "IncludeDefine.h"
5890 (("\"htslib/(htslib/[^\"]+.h)\"" _ header)
5891 (string-append "<" header ">")))
5892 #t))
c0266e8d
RW
5893 (replace 'install
5894 (lambda* (#:key outputs #:allow-other-keys)
5895 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
5896 (install-file "STAR" bin))
5897 #t))
5898 (delete 'configure))))
ce7155d5 5899 (native-inputs
9fc513ad 5900 `(("xxd" ,xxd)))
ce7155d5 5901 (inputs
3062d750
RW
5902 `(("htslib" ,htslib)
5903 ("zlib" ,zlib)))
ce7155d5
RW
5904 (home-page "https://github.com/alexdobin/STAR")
5905 (synopsis "Universal RNA-seq aligner")
5906 (description
5907 "The Spliced Transcripts Alignment to a Reference (STAR) software is
5908based on a previously undescribed RNA-seq alignment algorithm that uses
5909sequential maximum mappable seed search in uncompressed suffix arrays followed
5910by seed clustering and stitching procedure. In addition to unbiased de novo
5911detection of canonical junctions, STAR can discover non-canonical splices and
5912chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
5913sequences.")
c11f79a4
BW
5914 ;; Only 64-bit systems are supported according to the README.
5915 (supported-systems '("x86_64-linux" "mips64el-linux"))
ce7155d5
RW
5916 ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
5917 (license license:gpl3+)))
de07c0db 5918
dbf4ed7c
RW
5919(define-public subread
5920 (package
5921 (name "subread")
e6debcaf 5922 (version "1.6.0")
dbf4ed7c
RW
5923 (source (origin
5924 (method url-fetch)
de67e922
LF
5925 (uri (string-append "mirror://sourceforge/subread/subread-"
5926 version "/subread-" version "-source.tar.gz"))
dbf4ed7c
RW
5927 (sha256
5928 (base32
e6debcaf 5929 "0ah0n4jx6ksk2m2j7xk385x2qzmk1y4rfc6a4mfrdqrlq721w99i"))))
dbf4ed7c
RW
5930 (build-system gnu-build-system)
5931 (arguments
5932 `(#:tests? #f ;no "check" target
104c1986
RW
5933 ;; The CC and CCFLAGS variables are set to contain a lot of x86_64
5934 ;; optimizations by default, so we override these flags such that x86_64
5935 ;; flags are only added when the build target is an x86_64 system.
5936 #:make-flags
5937 (list (let ((system ,(or (%current-target-system)
5938 (%current-system)))
5939 (flags '("-ggdb" "-fomit-frame-pointer"
5940 "-ffast-math" "-funroll-loops"
5941 "-fmessage-length=0"
5942 "-O9" "-Wall" "-DMAKE_FOR_EXON"
5943 "-DMAKE_STANDALONE"
5944 "-DSUBREAD_VERSION=\\\"${SUBREAD_VERSION}\\\""))
5945 (flags64 '("-mmmx" "-msse" "-msse2" "-msse3")))
5946 (if (string-prefix? "x86_64" system)
5947 (string-append "CCFLAGS=" (string-join (append flags flags64)))
5948 (string-append "CCFLAGS=" (string-join flags))))
5949 "-f" "Makefile.Linux"
5950 "CC=gcc ${CCFLAGS}")
dbf4ed7c 5951 #:phases
dc1d3cde
KK
5952 (modify-phases %standard-phases
5953 (add-after 'unpack 'enter-dir
5954 (lambda _ (chdir "src") #t))
5955 (replace 'install
5956 (lambda* (#:key outputs #:allow-other-keys)
5957 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
5958 (mkdir-p bin)
5959 (copy-recursively "../bin" bin))))
dbf4ed7c 5960 ;; no "configure" script
dc1d3cde 5961 (delete 'configure))))
dbf4ed7c
RW
5962 (inputs `(("zlib" ,zlib)))
5963 (home-page "http://bioinf.wehi.edu.au/subread-package/")
5964 (synopsis "Tool kit for processing next-gen sequencing data")
5965 (description
5966 "The subread package contains the following tools: subread aligner, a
5967general-purpose read aligner; subjunc aligner: detecting exon-exon junctions
5968and mapping RNA-seq reads; featureCounts: counting mapped reads for genomic
5969features; exactSNP: a SNP caller that discovers SNPs by testing signals
5970against local background noises.")
5971 (license license:gpl3+)))
5972
d15d981e
RW
5973(define-public stringtie
5974 (package
5975 (name "stringtie")
5976 (version "1.2.1")
5977 (source (origin
5978 (method url-fetch)
5979 (uri (string-append "http://ccb.jhu.edu/software/stringtie/dl/"
5980 "stringtie-" version ".tar.gz"))
5981 (sha256
5982 (base32
5983 "1cqllsc1maq4kh92isi8yadgzbmnf042hlnalpk3y59aph1z3bfz"))
5984 (modules '((guix build utils)))
5985 (snippet
5986 '(begin
5987 (delete-file-recursively "samtools-0.1.18")
5988 #t))))
5989 (build-system gnu-build-system)
5990 (arguments
5991 `(#:tests? #f ;no test suite
5992 #:phases
5993 (modify-phases %standard-phases
5994 ;; no configure script
5995 (delete 'configure)
5996 (add-before 'build 'use-system-samtools
5997 (lambda _
5998 (substitute* "Makefile"
5999 (("stringtie: \\$\\{BAM\\}/libbam\\.a")
6000 "stringtie: "))
6001 (substitute* '("gclib/GBam.h"
6002 "gclib/GBam.cpp")
6003 (("#include \"(bam|sam|kstring).h\"" _ header)
6004 (string-append "#include <samtools/" header ".h>")))
6005 #t))
0d2c0562
RW
6006 (add-after 'unpack 'remove-duplicate-typedef
6007 (lambda _
6008 ;; This typedef conflicts with the typedef in
6009 ;; glibc-2.25/include/bits/types.h
6010 (substitute* "gclib/GThreads.h"
6011 (("typedef long long __intmax_t;") ""))
6012 #t))
d15d981e
RW
6013 (replace 'install
6014 (lambda* (#:key outputs #:allow-other-keys)
6015 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6016 (install-file "stringtie" bin)
6017 #t))))))
6018 (inputs
6019 `(("samtools" ,samtools-0.1)
6020 ("zlib" ,zlib)))
6021 (home-page "http://ccb.jhu.edu/software/stringtie/")
6022 (synopsis "Transcript assembly and quantification for RNA-Seq data")
6023 (description
6024 "StringTie is a fast and efficient assembler of RNA-Seq sequence
6025alignments into potential transcripts. It uses a novel network flow algorithm
6026as well as an optional de novo assembly step to assemble and quantitate
6027full-length transcripts representing multiple splice variants for each gene
6028locus. Its input can include not only the alignments of raw reads used by
6029other transcript assemblers, but also alignments of longer sequences that have
6030been assembled from those reads. To identify differentially expressed genes
6031between experiments, StringTie's output can be processed either by the
6032Cuffdiff or Ballgown programs.")
6033 (license license:artistic2.0)))
6034
ad0ae297
BW
6035(define-public taxtastic
6036 (package
6037 (name "taxtastic")
d0c75e73 6038 (version "0.8.5")
ad0ae297
BW
6039 (source (origin
6040 (method url-fetch)
3cbfc149 6041 (uri (pypi-uri "taxtastic" version))
ad0ae297
BW
6042 (sha256
6043 (base32
d0c75e73 6044 "03pysw79lsrvz4lwzis88j15067ffqbi4cid5pqhrlxmd6bh8rrk"))))
ad0ae297
BW
6045 (build-system python-build-system)
6046 (arguments
6047 `(#:python ,python-2
6048 #:phases
6049 (modify-phases %standard-phases
6050 (replace 'check
6051 (lambda _
6052 (zero? (system* "python" "-m" "unittest" "discover" "-v")))))))
6053 (propagated-inputs
6054 `(("python-sqlalchemy" ,python2-sqlalchemy)
6055 ("python-decorator" ,python2-decorator)
6056 ("python-biopython" ,python2-biopython)
3f8c82e6
BW
6057 ("python-pandas" ,python2-pandas)
6058 ("python-psycopg2" ,python2-psycopg2)
6059 ("python-fastalite" ,python2-fastalite)
6060 ("python-pyyaml" ,python2-pyyaml)
6061 ("python-six" ,python2-six)
6062 ("python-jinja2" ,python2-jinja2)
6063 ("python-dendropy" ,python2-dendropy)))
ad0ae297
BW
6064 (home-page "https://github.com/fhcrc/taxtastic")
6065 (synopsis "Tools for taxonomic naming and annotation")
6066 (description
6067 "Taxtastic is software written in python used to build and maintain
6068reference packages i.e. collections of reference trees, reference alignments,
6069profiles, and associated taxonomic information.")
6070 (license license:gpl3+)))
6071
de07c0db
RW
6072(define-public vcftools
6073 (package
6074 (name "vcftools")
f4322542 6075 (version "0.1.15")
de07c0db
RW
6076 (source (origin
6077 (method url-fetch)
6078 (uri (string-append
9b36e256
RJ
6079 "https://github.com/vcftools/vcftools/releases/download/v"
6080 version "/vcftools-" version ".tar.gz"))
de07c0db
RW
6081 (sha256
6082 (base32
f4322542 6083 "1qw30c45wihgy632rbz4rh3njnwj4msj46l1rsgdhyg6bgypmr1i"))))
de07c0db
RW
6084 (build-system gnu-build-system)
6085 (arguments
6086 `(#:tests? #f ; no "check" target
6087 #:make-flags (list
7c3958e1 6088 "CFLAGS=-O2" ; override "-m64" flag
de07c0db
RW
6089 (string-append "PREFIX=" (assoc-ref %outputs "out"))
6090 (string-append "MANDIR=" (assoc-ref %outputs "out")
9b36e256
RJ
6091 "/share/man/man1"))))
6092 (native-inputs
6093 `(("pkg-config" ,pkg-config)))
de07c0db
RW
6094 (inputs
6095 `(("perl" ,perl)
6096 ("zlib" ,zlib)))
9b36e256 6097 (home-page "https://vcftools.github.io/")
de07c0db
RW
6098 (synopsis "Tools for working with VCF files")
6099 (description
6100 "VCFtools is a program package designed for working with VCF files, such
6101as those generated by the 1000 Genomes Project. The aim of VCFtools is to
6102provide easily accessible methods for working with complex genetic variation
6103data in the form of VCF files.")
6104 ;; The license is declared as LGPLv3 in the README and
9b36e256 6105 ;; at https://vcftools.github.io/license.html
de07c0db 6106 (license license:lgpl3)))
9c38b540 6107
35aa90a1
RW
6108(define-public infernal
6109 (package
6110 (name "infernal")
6111 (version "1.1.2")
6112 (source (origin
6113 (method url-fetch)
6114 (uri (string-append "http://eddylab.org/software/infernal/"
6115 "infernal-" version ".tar.gz"))
6116 (sha256
6117 (base32
6118 "0sr2hiz3qxfwqpz3whxr6n82p3x27336v3f34iqznp10hks2935c"))))
6119 (build-system gnu-build-system)
6120 (native-inputs
6121 `(("perl" ,perl))) ; for tests
6122 (home-page "http://eddylab.org/infernal/")
6123 (synopsis "Inference of RNA alignments")
6124 (description "Infernal (\"INFERence of RNA ALignment\") is a tool for
6125searching DNA sequence databases for RNA structure and sequence similarities.
6126It is an implementation of a special case of profile stochastic context-free
6127grammars called @dfn{covariance models} (CMs). A CM is like a sequence
6128profile, but it scores a combination of sequence consensus and RNA secondary
6129structure consensus, so in many cases, it is more capable of identifying RNA
6130homologs that conserve their secondary structure more than their primary
6131sequence.")
48409ef2
EF
6132 ;; Infernal 1.1.2 requires VMX or SSE capability for parallel instructions.
6133 (supported-systems '("i686-linux" "x86_64-linux"))
35aa90a1
RW
6134 (license license:bsd-3)))
6135
b91cfa22
RW
6136(define-public r-centipede
6137 (package
6138 (name "r-centipede")
6139 (version "1.2")
6140 (source (origin
6141 (method url-fetch)
6142 (uri (string-append "http://download.r-forge.r-project.org/"
6143 "src/contrib/CENTIPEDE_" version ".tar.gz"))
6144 (sha256
6145 (base32
6146 "1hsx6qgwr0i67fhy9257zj7s0ppncph2hjgbia5nn6nfmj0ax6l9"))))
6147 (build-system r-build-system)
6148 (home-page "http://centipede.uchicago.edu/")
6149 (synopsis "Predict transcription factor binding sites")
6150 (description
6151 "CENTIPEDE applies a hierarchical Bayesian mixture model to infer regions
6152of the genome that are bound by particular transcription factors. It starts
6153by identifying a set of candidate binding sites, and then aims to classify the
6154sites according to whether each site is bound or not bound by a transcription
6155factor. CENTIPEDE is an unsupervised learning algorithm that discriminates
6156between two different types of motif instances using as much relevant
6157information as possible.")
6158 (license (list license:gpl2+ license:gpl3+))))
6159
7b3df1e5
BW
6160(define-public r-vegan
6161 (package
6162 (name "r-vegan")
3ca31735 6163 (version "2.5-2")
7b3df1e5
BW
6164 (source
6165 (origin
6166 (method url-fetch)
6167 (uri (cran-uri "vegan" version))
6168 (sha256
6169 (base32
3ca31735 6170 "13pyqvlpz64qibi8k5d109v7q09j06mbv6ndix3n4cn21mjx391c"))))
7b3df1e5 6171 (build-system r-build-system)
7b3df1e5 6172 (native-inputs
db2e4386 6173 `(("gfortran" ,gfortran)))
7b3df1e5
BW
6174 (propagated-inputs
6175 `(("r-cluster" ,r-cluster)
3ca31735 6176 ("r-knitr" ,r-knitr) ; needed for vignettes
7b3df1e5 6177 ("r-lattice" ,r-lattice)
aeb64f3c 6178 ("r-mass" ,r-mass)
7b3df1e5
BW
6179 ("r-mgcv" ,r-mgcv)
6180 ("r-permute" ,r-permute)))
6181 (home-page "https://cran.r-project.org/web/packages/vegan")
6182 (synopsis "Functions for community ecology")
6183 (description
6184 "The vegan package provides tools for descriptive community ecology. It
6185has most basic functions of diversity analysis, community ordination and
6186dissimilarity analysis. Most of its multivariate tools can be used for other
6187data types as well.")
6188 (license license:gpl2+)))
6189
8c6de588
RW
6190(define-public r-annotate
6191 (package
6192 (name "r-annotate")
02be8180 6193 (version "1.58.0")
8c6de588
RW
6194 (source
6195 (origin
6196 (method url-fetch)
6197 (uri (bioconductor-uri "annotate" version))
6198 (sha256
6199 (base32
02be8180 6200 "1qmncyvy147a1ll3iri45p822kcs3s7583jfnq9jf6sz9ilk8cjf"))))
8c6de588
RW
6201 (build-system r-build-system)
6202 (propagated-inputs
6203 `(("r-annotationdbi" ,r-annotationdbi)
6204 ("r-biobase" ,r-biobase)
6205 ("r-biocgenerics" ,r-biocgenerics)
6206 ("r-dbi" ,r-dbi)
d0f0579e 6207 ("r-rcurl" ,r-rcurl)
8c6de588
RW
6208 ("r-xml" ,r-xml)
6209 ("r-xtable" ,r-xtable)))
6210 (home-page
5713bbf1 6211 "https://bioconductor.org/packages/annotate")
8c6de588 6212 (synopsis "Annotation for microarrays")
d1e4ad1b 6213 (description "This package provides R environments for the annotation of
8c6de588
RW
6214microarrays.")
6215 (license license:artistic2.0)))
6216
efa6a1dd
RJ
6217(define-public r-copynumber
6218 (package
6219 (name "r-copynumber")
14e9235f 6220 (version "1.20.0")
efa6a1dd
RJ
6221 (source (origin
6222 (method url-fetch)
6223 (uri (bioconductor-uri "copynumber" version))
6224 (sha256
6225 (base32
14e9235f 6226 "0y9nnwb0psphp3ix88wj2f8z5gr45r5znf55w892ysm27isdpmms"))))
efa6a1dd
RJ
6227 (build-system r-build-system)
6228 (propagated-inputs
6229 `(("r-s4vectors" ,r-s4vectors)
6230 ("r-iranges" ,r-iranges)
6231 ("r-genomicranges" ,r-genomicranges)
6232 ("r-biocgenerics" ,r-biocgenerics)))
6233 (home-page "https://bioconductor.org/packages/copynumber")
6234 (synopsis "Segmentation of single- and multi-track copy number data")
6235 (description
6236 "This package segments single- and multi-track copy number data by a
6237penalized least squares regression method.")
6238 (license license:artistic2.0)))
6239
07a664cd
RW
6240(define-public r-geneplotter
6241 (package
6242 (name "r-geneplotter")
4d70c8c4 6243 (version "1.58.0")
07a664cd
RW
6244 (source
6245 (origin
6246 (method url-fetch)
6247 (uri (bioconductor-uri "geneplotter" version))
6248 (sha256
6249 (base32
4d70c8c4 6250 "055g28xgiazl4l0gkg8xiamks64f5yckjjyvw1abd6d6qjavwx0g"))))
07a664cd
RW
6251 (build-system r-build-system)
6252 (propagated-inputs
6253 `(("r-annotate" ,r-annotate)
6254 ("r-annotationdbi" ,r-annotationdbi)
6255 ("r-biobase" ,r-biobase)
6256 ("r-biocgenerics" ,r-biocgenerics)
6257 ("r-lattice" ,r-lattice)
6258 ("r-rcolorbrewer" ,r-rcolorbrewer)))
5713bbf1 6259 (home-page "https://bioconductor.org/packages/geneplotter")
07a664cd
RW
6260 (synopsis "Graphics functions for genomic data")
6261 (description
6262 "This package provides functions for plotting genomic data.")
6263 (license license:artistic2.0)))
6264
2301fd3e
RW
6265(define-public r-genefilter
6266 (package
6267 (name "r-genefilter")
41657b15 6268 (version "1.62.0")
2301fd3e
RW
6269 (source
6270 (origin
6271 (method url-fetch)
6272 (uri (bioconductor-uri "genefilter" version))
6273 (sha256
6274 (base32
41657b15 6275 "14l0ff02spmjwxj0m1czhg5vlkgwcfi73cym8m2n9vn6i7bjdaqi"))))
2301fd3e
RW
6276 (build-system r-build-system)
6277 (native-inputs
6278 `(("gfortran" ,gfortran)))
6279 (propagated-inputs
6280 `(("r-annotate" ,r-annotate)
6281 ("r-annotationdbi" ,r-annotationdbi)
6282 ("r-biobase" ,r-biobase)
aeb64f3c
RW
6283 ("r-s4vectors" ,r-s4vectors)
6284 ("r-survival" ,r-survival)))
5713bbf1 6285 (home-page "https://bioconductor.org/packages/genefilter")
2301fd3e
RW
6286 (synopsis "Filter genes from high-throughput experiments")
6287 (description
6288 "This package provides basic functions for filtering genes from
6289high-throughput sequencing experiments.")
6290 (license license:artistic2.0)))
6291
ad34f0ac
RW
6292(define-public r-deseq2
6293 (package
6294 (name "r-deseq2")
51c1f96e 6295 (version "1.20.0")
ad34f0ac
RW
6296 (source
6297 (origin
6298 (method url-fetch)
6299 (uri (bioconductor-uri "DESeq2" version))
6300 (sha256
6301 (base32
51c1f96e 6302 "1wjnfpb41a9mmf9a22bz4zh7r1d4id50vpdc1mn5vfzrz7li9qik"))))
ad34f0ac
RW
6303 (properties `((upstream-name . "DESeq2")))
6304 (build-system r-build-system)
ad34f0ac
RW
6305 (propagated-inputs
6306 `(("r-biobase" ,r-biobase)
6307 ("r-biocgenerics" ,r-biocgenerics)
6308 ("r-biocparallel" ,r-biocparallel)
6309 ("r-genefilter" ,r-genefilter)
6310 ("r-geneplotter" ,r-geneplotter)
6311 ("r-genomicranges" ,r-genomicranges)
6312 ("r-ggplot2" ,r-ggplot2)
6313 ("r-hmisc" ,r-hmisc)
6314 ("r-iranges" ,r-iranges)
6315 ("r-locfit" ,r-locfit)
6316 ("r-rcpp" ,r-rcpp)
6317 ("r-rcpparmadillo" ,r-rcpparmadillo)
6318 ("r-s4vectors" ,r-s4vectors)
6319 ("r-summarizedexperiment" ,r-summarizedexperiment)))
5713bbf1 6320 (home-page "https://bioconductor.org/packages/DESeq2")
ad34f0ac
RW
6321 (synopsis "Differential gene expression analysis")
6322 (description
6323 "This package provides functions to estimate variance-mean dependence in
6324count data from high-throughput nucleotide sequencing assays and test for
6325differential expression based on a model using the negative binomial
6326distribution.")
6327 (license license:lgpl3+)))
6328
86763fdd
RW
6329(define-public r-dexseq
6330 (package
6331 (name "r-dexseq")
16da69b5 6332 (version "1.26.0")
86763fdd
RW
6333 (source
6334 (origin
6335 (method url-fetch)
6336 (uri (bioconductor-uri "DEXSeq" version))
6337 (sha256
6338 (base32
16da69b5 6339 "1mqb3mdxcsi3largsl7k27bvqrgps9ixv806xvmf29pw0xn05sg1"))))
86763fdd
RW
6340 (properties `((upstream-name . "DEXSeq")))
6341 (build-system r-build-system)
6342 (propagated-inputs
6343 `(("r-annotationdbi" ,r-annotationdbi)
6344 ("r-biobase" ,r-biobase)
6345 ("r-biocgenerics" ,r-biocgenerics)
6346 ("r-biocparallel" ,r-biocparallel)
6347 ("r-biomart" ,r-biomart)
6348 ("r-deseq2" ,r-deseq2)
6349 ("r-genefilter" ,r-genefilter)
6350 ("r-geneplotter" ,r-geneplotter)
6351 ("r-genomicranges" ,r-genomicranges)
6352 ("r-hwriter" ,r-hwriter)
6353 ("r-iranges" ,r-iranges)
6354 ("r-rcolorbrewer" ,r-rcolorbrewer)
6355 ("r-rsamtools" ,r-rsamtools)
6356 ("r-s4vectors" ,r-s4vectors)
6357 ("r-statmod" ,r-statmod)
6358 ("r-stringr" ,r-stringr)
6359 ("r-summarizedexperiment" ,r-summarizedexperiment)))
5713bbf1 6360 (home-page "https://bioconductor.org/packages/DEXSeq")
86763fdd
RW
6361 (synopsis "Inference of differential exon usage in RNA-Seq")
6362 (description
6363 "This package is focused on finding differential exon usage using RNA-seq
6364exon counts between samples with different experimental designs. It provides
6365functions that allows the user to make the necessary statistical tests based
6366on a model that uses the negative binomial distribution to estimate the
6367variance between biological replicates and generalized linear models for
6368testing. The package also provides functions for the visualization and
6369exploration of the results.")
6370 (license license:gpl3+)))
6371
e8163773
RW
6372(define-public r-annotationforge
6373 (package
6374 (name "r-annotationforge")
ce0e762b 6375 (version "1.22.2")
e8163773
RW
6376 (source
6377 (origin
6378 (method url-fetch)
6379 (uri (bioconductor-uri "AnnotationForge" version))
6380 (sha256
6381 (base32
ce0e762b 6382 "17kmy7nvpyyj6w5jyrjciw87rydmmmc8q6cnwqjv1j7li9bp09gr"))))
e8163773
RW
6383 (properties
6384 `((upstream-name . "AnnotationForge")))
6385 (build-system r-build-system)
6386 (propagated-inputs
6387 `(("r-annotationdbi" ,r-annotationdbi)
6388 ("r-biobase" ,r-biobase)
6389 ("r-biocgenerics" ,r-biocgenerics)
6390 ("r-dbi" ,r-dbi)
55cd914c 6391 ("r-rcurl" ,r-rcurl)
e8163773
RW
6392 ("r-rsqlite" ,r-rsqlite)
6393 ("r-s4vectors" ,r-s4vectors)
6394 ("r-xml" ,r-xml)))
5713bbf1 6395 (home-page "https://bioconductor.org/packages/AnnotationForge")
e8163773
RW
6396 (synopsis "Code for building annotation database packages")
6397 (description
6398 "This package provides code for generating Annotation packages and their
6399databases. Packages produced are intended to be used with AnnotationDbi.")
6400 (license license:artistic2.0)))
6401
cd9e7dc7
RW
6402(define-public r-rbgl
6403 (package
6404 (name "r-rbgl")
9177ac22 6405 (version "1.56.0")
cd9e7dc7
RW
6406 (source
6407 (origin
6408 (method url-fetch)
6409 (uri (bioconductor-uri "RBGL" version))
6410 (sha256
6411 (base32
9177ac22 6412 "0hj972mmqpyi5fx1rq33kysavdyz4nspi6gcffzi3rv339m0anhf"))))
cd9e7dc7
RW
6413 (properties `((upstream-name . "RBGL")))
6414 (build-system r-build-system)
6415 (propagated-inputs `(("r-graph" ,r-graph)))
5713bbf1 6416 (home-page "https://www.bioconductor.org/packages/RBGL")
cd9e7dc7
RW
6417 (synopsis "Interface to the Boost graph library")
6418 (description
6419 "This package provides a fairly extensive and comprehensive interface to
6420the graph algorithms contained in the Boost library.")
6421 (license license:artistic2.0)))
6422
ad740ff8
RW
6423(define-public r-gseabase
6424 (package
6425 (name "r-gseabase")
f1094f50 6426 (version "1.42.0")
ad740ff8
RW
6427 (source
6428 (origin
6429 (method url-fetch)
6430 (uri (bioconductor-uri "GSEABase" version))
6431 (sha256
6432 (base32
f1094f50 6433 "11bv92svik399q677jv96b71i4bq68xxyxn1yijpdik2lq4hgl7a"))))
ad740ff8
RW
6434 (properties `((upstream-name . "GSEABase")))
6435 (build-system r-build-system)
6436 (propagated-inputs
6437 `(("r-annotate" ,r-annotate)
6438 ("r-annotationdbi" ,r-annotationdbi)
6439 ("r-biobase" ,r-biobase)
6440 ("r-biocgenerics" ,r-biocgenerics)
6441 ("r-graph" ,r-graph)
6442 ("r-xml" ,r-xml)))
5713bbf1 6443 (home-page "https://bioconductor.org/packages/GSEABase")
ad740ff8
RW
6444 (synopsis "Gene set enrichment data structures and methods")
6445 (description
6446 "This package provides classes and methods to support @dfn{Gene Set
6447Enrichment Analysis} (GSEA).")
6448 (license license:artistic2.0)))
6449
1a1931f7
RW
6450(define-public r-category
6451 (package
6452 (name "r-category")
d8410d66 6453 (version "2.46.0")
1a1931f7
RW
6454 (source
6455 (origin
6456 (method url-fetch)
6457 (uri (bioconductor-uri "Category" version))
6458 (sha256
6459 (base32
d8410d66 6460 "03wfqa8d1dgwsm327zl2mpkq7dq3mzhq12598qz3ylfhrwplbgx0"))))
1a1931f7
RW
6461 (properties `((upstream-name . "Category")))
6462 (build-system r-build-system)
6463 (propagated-inputs
6464 `(("r-annotate" ,r-annotate)
6465 ("r-annotationdbi" ,r-annotationdbi)
6466 ("r-biobase" ,r-biobase)
6467 ("r-biocgenerics" ,r-biocgenerics)
6468 ("r-genefilter" ,r-genefilter)
6469 ("r-graph" ,r-graph)
6470 ("r-gseabase" ,r-gseabase)
6471 ("r-matrix" ,r-matrix)
6472 ("r-rbgl" ,r-rbgl)
2404cc42 6473 ("r-dbi" ,r-dbi)))
5713bbf1 6474 (home-page "https://bioconductor.org/packages/Category")
1a1931f7
RW
6475 (synopsis "Category analysis")
6476 (description
6477 "This package provides a collection of tools for performing category
6478analysis.")
6479 (license license:artistic2.0)))
6480
89f40c5e
RW
6481(define-public r-gostats
6482 (package
6483 (name "r-gostats")
9da8e275 6484 (version "2.46.0")
89f40c5e
RW
6485 (source
6486 (origin
6487 (method url-fetch)
6488 (uri (bioconductor-uri "GOstats" version))
6489 (sha256
6490 (base32
9da8e275 6491 "1i5mydz5d95w2k28qr9j01hmbnl2id55jq94jvcpcyp1pvinkdq0"))))
89f40c5e
RW
6492 (properties `((upstream-name . "GOstats")))
6493 (build-system r-build-system)
6494 (propagated-inputs
6495 `(("r-annotate" ,r-annotate)
6496 ("r-annotationdbi" ,r-annotationdbi)
6497 ("r-annotationforge" ,r-annotationforge)
6498 ("r-biobase" ,r-biobase)
6499 ("r-category" ,r-category)
6500 ("r-go-db" ,r-go-db)
6501 ("r-graph" ,r-graph)
eb3f5cc7 6502 ("r-rgraphviz" ,r-rgraphviz)
89f40c5e 6503 ("r-rbgl" ,r-rbgl)))
5713bbf1 6504 (home-page "https://bioconductor.org/packages/GOstats")
89f40c5e
RW
6505 (synopsis "Tools for manipulating GO and microarrays")
6506 (description
6507 "This package provides a set of tools for interacting with GO and
6508microarray data. A variety of basic manipulation tools for graphs, hypothesis
6509testing and other simple calculations.")
6510 (license license:artistic2.0)))
6511
cb99d457
RW
6512(define-public r-shortread
6513 (package
6514 (name "r-shortread")
eabe78fa 6515 (version "1.38.0")
cb99d457
RW
6516 (source
6517 (origin
6518 (method url-fetch)
6519 (uri (bioconductor-uri "ShortRead" version))
6520 (sha256
6521 (base32
eabe78fa 6522 "038z3z7qaw5bpgjzy91sjkybsny6jwjjsrnnq4gdqdw9ss1qy1fb"))))
cb99d457
RW
6523 (properties `((upstream-name . "ShortRead")))
6524 (build-system r-build-system)
6525 (inputs
6526 `(("zlib" ,zlib)))
6527 (propagated-inputs
6528 `(("r-biobase" ,r-biobase)
6529 ("r-biocgenerics" ,r-biocgenerics)
6530 ("r-biocparallel" ,r-biocparallel)
6531 ("r-biostrings" ,r-biostrings)
6532 ("r-genomeinfodb" ,r-genomeinfodb)
6533 ("r-genomicalignments" ,r-genomicalignments)
6534 ("r-genomicranges" ,r-genomicranges)
6535 ("r-hwriter" ,r-hwriter)
6536 ("r-iranges" ,r-iranges)
6537 ("r-lattice" ,r-lattice)
6538 ("r-latticeextra" ,r-latticeextra)
6539 ("r-rsamtools" ,r-rsamtools)
6540 ("r-s4vectors" ,r-s4vectors)
6541 ("r-xvector" ,r-xvector)
6542 ("r-zlibbioc" ,r-zlibbioc)))
5713bbf1 6543 (home-page "https://bioconductor.org/packages/ShortRead")
cb99d457
RW
6544 (synopsis "FASTQ input and manipulation tools")
6545 (description
6546 "This package implements sampling, iteration, and input of FASTQ files.
6547It includes functions for filtering and trimming reads, and for generating a
6548quality assessment report. Data are represented as
6549@code{DNAStringSet}-derived objects, and easily manipulated for a diversity of
6550purposes. The package also contains legacy support for early single-end,
6551ungapped alignment formats.")
6552 (license license:artistic2.0)))
6553
7f903d73
RW
6554(define-public r-systempiper
6555 (package
6556 (name "r-systempiper")
712fe396 6557 (version "1.14.0")
7f903d73
RW
6558 (source
6559 (origin
6560 (method url-fetch)
6561 (uri (bioconductor-uri "systemPipeR" version))
6562 (sha256
6563 (base32
712fe396 6564 "1550pd63mmky0vgkmpni7zf14kqz1741wv63nfaw29kcmhh3m5lm"))))
7f903d73
RW
6565 (properties `((upstream-name . "systemPipeR")))
6566 (build-system r-build-system)
6567 (propagated-inputs
6568 `(("r-annotate" ,r-annotate)
6569 ("r-batchjobs" ,r-batchjobs)
6570 ("r-biocgenerics" ,r-biocgenerics)
6571 ("r-biostrings" ,r-biostrings)
6572 ("r-deseq2" ,r-deseq2)
6573 ("r-edger" ,r-edger)
6574 ("r-genomicfeatures" ,r-genomicfeatures)
6575 ("r-genomicranges" ,r-genomicranges)
6576 ("r-ggplot2" ,r-ggplot2)
6577 ("r-go-db" ,r-go-db)
6578 ("r-gostats" ,r-gostats)
6579 ("r-limma" ,r-limma)
6580 ("r-pheatmap" ,r-pheatmap)
6581 ("r-rjson" ,r-rjson)
6582 ("r-rsamtools" ,r-rsamtools)
6583 ("r-shortread" ,r-shortread)
6584 ("r-summarizedexperiment" ,r-summarizedexperiment)
6585 ("r-variantannotation" ,r-variantannotation)))
6586 (home-page "https://github.com/tgirke/systemPipeR")
6587 (synopsis "Next generation sequencing workflow and reporting environment")
6588 (description
6589 "This R package provides tools for building and running automated
6590end-to-end analysis workflows for a wide range of @dfn{next generation
6591sequence} (NGS) applications such as RNA-Seq, ChIP-Seq, VAR-Seq and Ribo-Seq.
6592Important features include a uniform workflow interface across different NGS
6593applications, automated report generation, and support for running both R and
6594command-line software, such as NGS aligners or peak/variant callers, on local
6595computers or compute clusters. Efficient handling of complex sample sets and
6596experimental designs is facilitated by a consistently implemented sample
6597annotation infrastructure.")
6598 (license license:artistic2.0)))
6599
684f29bd
RW
6600(define-public r-grohmm
6601 (package
6602 (name "r-grohmm")
9370b8ee 6603 (version "1.14.0")
684f29bd
RW
6604 (source
6605 (origin
6606 (method url-fetch)
6607 (uri (bioconductor-uri "groHMM" version))
6608 (sha256
6609 (base32
9370b8ee 6610 "1kjb14apyly44qdlx2ld6gr69wlazd4mbhs58l35hir12aphgrzp"))))
684f29bd
RW
6611 (properties `((upstream-name . "groHMM")))
6612 (build-system r-build-system)
6613 (propagated-inputs
6614 `(("r-genomeinfodb" ,r-genomeinfodb)
6615 ("r-genomicalignments" ,r-genomicalignments)
6616 ("r-genomicranges" ,r-genomicranges)
6617 ("r-iranges" ,r-iranges)
aeb64f3c 6618 ("r-mass" ,r-mass)
684f29bd
RW
6619 ("r-rtracklayer" ,r-rtracklayer)
6620 ("r-s4vectors" ,r-s4vectors)))
6621 (home-page "https://github.com/Kraus-Lab/groHMM")
6622 (synopsis "GRO-seq analysis pipeline")
6623 (description
6624 "This package provides a pipeline for the analysis of GRO-seq data.")
6625 (license license:gpl3+)))
6626
f3cfe451
RW
6627(define-public r-txdb-hsapiens-ucsc-hg19-knowngene
6628 (package
6629 (name "r-txdb-hsapiens-ucsc-hg19-knowngene")
6630 (version "3.2.2")
6631 (source (origin
6632 (method url-fetch)
6633 ;; We cannot use bioconductor-uri here because this tarball is
6634 ;; located under "data/annotation/" instead of "bioc/".
5713bbf1 6635 (uri (string-append "https://bioconductor.org/packages/"
f3cfe451
RW
6636 "release/data/annotation/src/contrib"
6637 "/TxDb.Hsapiens.UCSC.hg19.knownGene_"
6638 version ".tar.gz"))
6639 (sha256
6640 (base32
6641 "1sajhcqqwazgz2lqbik7rd935i7kpnh08zxbp2ra10j72yqy4g86"))))
6642 (properties
6643 `((upstream-name . "TxDb.Hsapiens.UCSC.hg19.knownGene")))
6644 (build-system r-build-system)
6645 ;; As this package provides little more than a very large data file it
6646 ;; doesn't make sense to build substitutes.
6647 (arguments `(#:substitutable? #f))
6648 (propagated-inputs
6649 `(("r-genomicfeatures" ,r-genomicfeatures)))
6650 (home-page
5713bbf1 6651 "https://bioconductor.org/packages/TxDb.Hsapiens.UCSC.hg19.knownGene/")
f3cfe451
RW
6652 (synopsis "Annotation package for human genome in TxDb format")
6653 (description
6654 "This package provides an annotation database of Homo sapiens genome
6655data. It is derived from the UCSC hg19 genome and based on the \"knownGene\"
6656track. The database is exposed as a @code{TxDb} object.")
6657 (license license:artistic2.0)))
6658
325c039c
RJ
6659(define-public r-sparql
6660 (package
6661 (name "r-sparql")
6662 (version "1.16")
6663 (source (origin
6664 (method url-fetch)
6665 (uri (cran-uri "SPARQL" version))
6666 (sha256
6667 (base32
6668 "0gak1q06yyhdmcxb2n3v0h9gr1vqd0viqji52wpw211qp6r6dcrc"))))
6669 (properties `((upstream-name . "SPARQL")))
6670 (build-system r-build-system)
6671 (propagated-inputs
6672 `(("r-rcurl" ,r-rcurl)
6673 ("r-xml" ,r-xml)))
e9960d8c 6674 (home-page "https://cran.r-project.org/web/packages/SPARQL")
325c039c
RJ
6675 (synopsis "SPARQL client for R")
6676 (description "This package provides an interface to use SPARQL to pose
6677SELECT or UPDATE queries to an end-point.")
6678 ;; The only license indication is found in the DESCRIPTION file,
6679 ;; which states GPL-3. So we cannot assume GPLv3+.
6680 (license license:gpl3)))
6681
a2950fa4
BW
6682(define-public vsearch
6683 (package
6684 (name "vsearch")
53ec1b7e 6685 (version "2.8.0")
a2950fa4
BW
6686 (source
6687 (origin
6688 (method url-fetch)
6689 (uri (string-append
6690 "https://github.com/torognes/vsearch/archive/v"
6691 version ".tar.gz"))
6692 (file-name (string-append name "-" version ".tar.gz"))
6693 (sha256
6694 (base32
53ec1b7e 6695 "15pbirgzhvflj4pi5n82vybbzjy9mlb0lv5l3qhrmdkfzpbyahw3"))
206af46f 6696 (patches (search-patches "vsearch-unbundle-cityhash.patch"))
a2950fa4
BW
6697 (snippet
6698 '(begin
206af46f
BW
6699 ;; Remove bundled cityhash sources. The vsearch source is adjusted
6700 ;; for this in the patch.
cf6edaba
BW
6701 (delete-file "src/city.h")
6702 (delete-file "src/citycrc.h")
6703 (delete-file "src/city.cc")
a2950fa4
BW
6704 #t))))
6705 (build-system gnu-build-system)
6706 (arguments
6707 `(#:phases
6708 (modify-phases %standard-phases
d10092b8
KK
6709 (add-after 'unpack 'autogen
6710 (lambda _ (zero? (system* "autoreconf" "-vif")))))))
a2950fa4
BW
6711 (inputs
6712 `(("zlib" ,zlib)
6713 ("bzip2" ,bzip2)
6714 ("cityhash" ,cityhash)))
6715 (native-inputs
6716 `(("autoconf" ,autoconf)
6717 ("automake" ,automake)))
6718 (synopsis "Sequence search tools for metagenomics")
6719 (description
6720 "VSEARCH supports DNA sequence searching, clustering, chimera detection,
6721dereplication, pairwise alignment, shuffling, subsampling, sorting and
6722masking. The tool takes advantage of parallelism in the form of SIMD
6723vectorization as well as multiple threads to perform accurate alignments at
6724high speed. VSEARCH uses an optimal global aligner (full dynamic programming
6725Needleman-Wunsch).")
6726 (home-page "https://github.com/torognes/vsearch")
6f04e515
BW
6727 ;; vsearch uses non-portable SSE intrinsics so building fails on other
6728 ;; platforms.
6729 (supported-systems '("x86_64-linux"))
a2950fa4
BW
6730 ;; Dual licensed; also includes public domain source.
6731 (license (list license:gpl3 license:bsd-2))))
6732
07837874
RW
6733(define-public pardre
6734 (package
6735 (name "pardre")
7922ab8f
BW
6736 ;; The source of 1.1.5 changed in place, so we append "-1" to the version.
6737 (version "1.1.5-1")
07837874
RW
6738 (source
6739 (origin
6740 (method url-fetch)
6741 (uri (string-append "mirror://sourceforge/pardre/ParDRe-rel"
7922ab8f 6742 "1.1.5" ".tar.gz"))
07837874
RW
6743 (sha256
6744 (base32
7922ab8f 6745 "17j73nc0viq4f6qj50nrndsrif5d6b71q8fl87m54psiv0ilns2b"))))
07837874
RW
6746 (build-system gnu-build-system)
6747 (arguments
6748 `(#:tests? #f ; no tests included
6749 #:phases
6750 (modify-phases %standard-phases
6751 (delete 'configure)
6752 (replace 'install
6753 (lambda* (#:key outputs #:allow-other-keys)
6754 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
07837874
RW
6755 (install-file "ParDRe" bin)
6756 #t))))))
6757 (inputs
6758 `(("openmpi" ,openmpi)
6759 ("zlib" ,zlib)))
6760 (synopsis "Parallel tool to remove duplicate DNA reads")
6761 (description
6762 "ParDRe is a parallel tool to remove duplicate genetic sequence reads.
6763Duplicate reads can be seen as identical or nearly identical sequences with
6764some mismatches. This tool lets users avoid the analysis of unnecessary
6765reads, reducing the time of subsequent procedures with the
6766dataset (e.g. assemblies, mappings, etc.). The tool is implemented with MPI
6767in order to exploit the parallel capabilities of multicore clusters. It is
6768faster than multithreaded counterparts (end of 2015) for the same number of
6769cores and, thanks to the message-passing technology, it can be executed on
6770clusters.")
6771 (home-page "https://sourceforge.net/projects/pardre/")
6772 (license license:gpl3+)))
6773
e4a44a6a
BW
6774(define-public ruby-bio-kseq
6775 (package
6776 (name "ruby-bio-kseq")
6777 (version "0.0.2")
6778 (source
6779 (origin
6780 (method url-fetch)
6781 (uri (rubygems-uri "bio-kseq" version))
6782 (sha256
6783 (base32
6784 "1xyaha46khb5jc6wzkbf7040jagac49jbimn0vcrzid0j8jdikrz"))))
6785 (build-system ruby-build-system)
6786 (arguments
6787 `(#:test-target "spec"))
6788 (native-inputs
6789 `(("bundler" ,bundler)
6790 ("ruby-rspec" ,ruby-rspec)
6791 ("ruby-rake-compiler" ,ruby-rake-compiler)))
6792 (inputs
6793 `(("zlib" ,zlib)))
6794 (synopsis "Ruby bindings for the kseq.h FASTA/Q parser")
6795 (description
6796 "@code{Bio::Kseq} provides ruby bindings to the @code{kseq.h} FASTA and
6797FASTQ parsing code. It provides a fast iterator over sequences and their
6798quality scores.")
6799 (home-page "https://github.com/gusevfe/bio-kseq")
6800 (license license:expat)))
6801
9c38b540
PP
6802(define-public bio-locus
6803 (package
6804 (name "bio-locus")
6805 (version "0.0.7")
6806 (source
6807 (origin
6808 (method url-fetch)
6809 (uri (rubygems-uri "bio-locus" version))
6810 (sha256
6811 (base32
6812 "02vmrxyimkj9sahsp4zhfhnmbvz6dbbqz1y01vglf8cbwvkajfl0"))))
6813 (build-system ruby-build-system)
6814 (native-inputs
6815 `(("ruby-rspec" ,ruby-rspec)))
6816 (synopsis "Tool for fast querying of genome locations")
6817 (description
6818 "Bio-locus is a tabix-like tool for fast querying of genome
6819locations. Many file formats in bioinformatics contain records that
6820start with a chromosome name and a position for a SNP, or a start-end
6821position for indels. Bio-locus allows users to store this chr+pos or
6822chr+pos+alt information in a database.")
6823 (home-page "https://github.com/pjotrp/bio-locus")
6824 (license license:expat)))
edb15985 6825
b2bddb07
PP
6826(define-public bio-blastxmlparser
6827 (package
6828 (name "bio-blastxmlparser")
6829 (version "2.0.4")
6830 (source (origin
6831 (method url-fetch)
6832 (uri (rubygems-uri "bio-blastxmlparser" version))
6833 (sha256
6834 (base32
6835 "1wf4qygcmdjgcqm6flmvsagfr1gs9lf63mj32qv3z1f481zc5692"))))
6836 (build-system ruby-build-system)
6837 (propagated-inputs
6838 `(("ruby-bio-logger" ,ruby-bio-logger)
6839 ("ruby-nokogiri" ,ruby-nokogiri)))
6840 (inputs
6841 `(("ruby-rspec" ,ruby-rspec)))
6842 (synopsis "Fast big data BLAST XML parser and library")
6843 (description
6844 "Very fast parallel big-data BLAST XML file parser which can be used as
6845command line utility. Use blastxmlparser to: Parse BLAST XML; filter output;
6846generate FASTA, JSON, YAML, RDF, JSON-LD, HTML, CSV, tabular output etc.")
7bf837fd 6847 (home-page "https://github.com/pjotrp/blastxmlparser")
b2bddb07
PP
6848 (license license:expat)))
6849
edb15985
PP
6850(define-public bioruby
6851 (package
6852 (name "bioruby")
dbf9d371 6853 (version "1.5.1")
edb15985
PP
6854 (source
6855 (origin
6856 (method url-fetch)
6857 (uri (rubygems-uri "bio" version))
6858 (sha256
6859 (base32
dbf9d371 6860 "0hdl0789c9n4mprnx5pgd46bfwl8d000rqpamj5h6kkjgspijv49"))))
edb15985
PP
6861 (build-system ruby-build-system)
6862 (propagated-inputs
6863 `(("ruby-libxml" ,ruby-libxml)))
6864 (native-inputs
6865 `(("which" ,which))) ; required for test phase
6866 (arguments
6867 `(#:phases
6868 (modify-phases %standard-phases
6869 (add-before 'build 'patch-test-command
6870 (lambda _
6871 (substitute* '("test/functional/bio/test_command.rb")
6872 (("/bin/sh") (which "sh")))
6873 (substitute* '("test/functional/bio/test_command.rb")
6874 (("/bin/ls") (which "ls")))
6875 (substitute* '("test/functional/bio/test_command.rb")
6876 (("which") (which "which")))
6877 (substitute* '("test/functional/bio/test_command.rb",
6878 "test/data/command/echoarg2.sh")
6879 (("/bin/echo") (which "echo")))
6880 #t)))))
6881 (synopsis "Ruby library, shell and utilities for bioinformatics")
6882 (description "BioRuby comes with a comprehensive set of Ruby development
6883tools and libraries for bioinformatics and molecular biology. BioRuby has
6884components for sequence analysis, pathway analysis, protein modelling and
6885phylogenetic analysis; it supports many widely used data formats and provides
6886easy access to databases, external programs and public web services, including
6887BLAST, KEGG, GenBank, MEDLINE and GO.")
6888 (home-page "http://bioruby.org/")
6889 ;; Code is released under Ruby license, except for setup
6890 ;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+)
6891 (license (list license:ruby license:lgpl2.1+ license:gpl2+ ))))
a5002ae7 6892
9fba89e8
RW
6893(define-public r-acsnminer
6894 (package
6895 (name "r-acsnminer")
0b54b4c9 6896 (version "0.16.8.25")
9fba89e8
RW
6897 (source (origin
6898 (method url-fetch)
6899 (uri (cran-uri "ACSNMineR" version))
6900 (sha256
6901 (base32
0b54b4c9 6902 "0gh604s8qall6zfjlwcg2ilxjvz08dplf9k5g47idhv43scm748l"))))
9fba89e8
RW
6903 (properties `((upstream-name . "ACSNMineR")))
6904 (build-system r-build-system)
6905 (propagated-inputs
6906 `(("r-ggplot2" ,r-ggplot2)
6907 ("r-gridextra" ,r-gridextra)))
e9960d8c 6908 (home-page "https://cran.r-project.org/web/packages/ACSNMineR")
9fba89e8
RW
6909 (synopsis "Gene enrichment analysis")
6910 (description
6911 "This package provides tools to compute and represent gene set enrichment
6912or depletion from your data based on pre-saved maps from the @dfn{Atlas of
6913Cancer Signalling Networks} (ACSN) or user imported maps. The gene set
6914enrichment can be run with hypergeometric test or Fisher exact test, and can
6915use multiple corrections. Visualization of data can be done either by
6916barplots or heatmaps.")
6917 (license license:gpl2+)))
6918
d29b25c4
RW
6919(define-public r-biocgenerics
6920 (package
6921 (name "r-biocgenerics")
e5907a44 6922 (version "0.26.0")
d29b25c4
RW
6923 (source (origin
6924 (method url-fetch)
6925 (uri (bioconductor-uri "BiocGenerics" version))
6926 (sha256
6927 (base32
e5907a44 6928 "19qxhy2cd3pykkhzbb5q3crgaxf65cpzf2mkfsz16gqhi8flj72p"))))
d29b25c4 6929 (properties
1d216b6e 6930 `((upstream-name . "BiocGenerics")))
d29b25c4 6931 (build-system r-build-system)
5713bbf1 6932 (home-page "https://bioconductor.org/packages/BiocGenerics")
d29b25c4
RW
6933 (synopsis "S4 generic functions for Bioconductor")
6934 (description
6935 "This package provides S4 generic functions needed by many Bioconductor
6936packages.")
6937 (license license:artistic2.0)))
6938
eb24341f
RJ
6939(define-public r-biocinstaller
6940 (package
6941 (name "r-biocinstaller")
97af51ef 6942 (version "1.30.0")
eb24341f
RJ
6943 (source (origin
6944 (method url-fetch)
6945 (uri (bioconductor-uri "BiocInstaller" version))
6946 (sha256
6947 (base32
97af51ef 6948 "1xg1gi1hf5vflp71ji21gnmr4kcjpx8a6c47cllpc7yqnjv5nfg0"))))
eb24341f
RJ
6949 (properties
6950 `((upstream-name . "BiocInstaller")))
6951 (build-system r-build-system)
5713bbf1 6952 (home-page "https://bioconductor.org/packages/BiocInstaller")
eb24341f
RJ
6953 (synopsis "Install Bioconductor packages")
6954 (description "This package is used to install and update R packages from
6955Bioconductor, CRAN, and Github.")
6956 (license license:artistic2.0)))
6957
207ce8fb
RJ
6958(define-public r-biocviews
6959 (package
6960 (name "r-biocviews")
3c043756 6961 (version "1.48.3")
207ce8fb
RJ
6962 (source (origin
6963 (method url-fetch)
6964 (uri (bioconductor-uri "biocViews" version))
6965 (sha256
6966 (base32
3c043756 6967 "1rxvwikqivsgxjjcazlszy8xgz346lfh5rw4llxw6fz38fjgb0k5"))))
207ce8fb
RJ
6968 (properties
6969 `((upstream-name . "biocViews")))
6970 (build-system r-build-system)
6971 (propagated-inputs
6972 `(("r-biobase" ,r-biobase)
6973 ("r-graph" ,r-graph)
6974 ("r-rbgl" ,r-rbgl)
6975 ("r-rcurl" ,r-rcurl)
6976 ("r-xml" ,r-xml)
207ce8fb 6977 ("r-runit" ,r-runit)))
5713bbf1 6978 (home-page "https://bioconductor.org/packages/biocViews")
207ce8fb
RJ
6979 (synopsis "Bioconductor package categorization helper")
6980 (description "The purpose of biocViews is to create HTML pages that
6981categorize packages in a Bioconductor package repository according to keywords,
6982also known as views, in a controlled vocabulary.")
6983 (license license:artistic2.0)))
6984
2abfc5b8
RJ
6985(define-public r-bookdown
6986 (package
9800f7d9
RW
6987 (name "r-bookdown")
6988 (version "0.7")
6989 (source (origin
6990 (method url-fetch)
6991 (uri (cran-uri "bookdown" version))
6992 (sha256
6993 (base32
6994 "1b3fw1f41zph5yw3kynb47aijq53vhaa6mnnvxly72zamyzdf95q"))))
6995 (build-system r-build-system)
6996 (propagated-inputs
6997 `(("r-htmltools" ,r-htmltools)
6998 ("r-knitr" ,r-knitr)
6999 ("r-rmarkdown" ,r-rmarkdown)
7000 ("r-tinytex" ,r-tinytex)
7001 ("r-yaml" ,r-yaml)
618050a1 7002 ("r-xfun" ,r-xfun)))
9800f7d9
RW
7003 (home-page "https://github.com/rstudio/bookdown")
7004 (synopsis "Authoring books and technical documents with R markdown")
7005 (description "This package provides output formats and utilities for
2abfc5b8 7006authoring books and technical documents with R Markdown.")
9800f7d9 7007 (license license:gpl3)))
2abfc5b8 7008
99df12cd
RJ
7009(define-public r-biocstyle
7010 (package
7011 (name "r-biocstyle")
dc6c168a 7012 (version "2.8.2")
99df12cd
RJ
7013 (source (origin
7014 (method url-fetch)
7015 (uri (bioconductor-uri "BiocStyle" version))
7016 (sha256
7017 (base32
dc6c168a 7018 "17m901ylz00w1a3nq5f910v55zixm1nr6rb3qrsbhqd94qzr0l2p"))))
99df12cd
RJ
7019 (properties
7020 `((upstream-name . "BiocStyle")))
7021 (build-system r-build-system)
3bef24c9
RJ
7022 (propagated-inputs
7023 `(("r-bookdown" ,r-bookdown)
7024 ("r-knitr" ,r-knitr)
7025 ("r-rmarkdown" ,r-rmarkdown)
7026 ("r-yaml" ,r-yaml)))
5713bbf1 7027 (home-page "https://bioconductor.org/packages/BiocStyle")
99df12cd
RJ
7028 (synopsis "Bioconductor formatting styles")
7029 (description "This package provides standard formatting styles for
7030Bioconductor PDF and HTML documents. Package vignettes illustrate use and
7031functionality.")
7032 (license license:artistic2.0)))
7033
4644644a
RJ
7034(define-public r-bioccheck
7035 (package
7036 (name "r-bioccheck")
3679a2fe 7037 (version "1.16.0")
4644644a
RJ
7038 (source (origin
7039 (method url-fetch)
7040 (uri (bioconductor-uri "BiocCheck" version))
7041 (sha256
7042 (base32
3679a2fe 7043 "1srp1g809f1nn0fyqknr7r6dq89bw1xpjvmygr7cw6ffknbc671s"))))
4644644a
RJ
7044 (properties
7045 `((upstream-name . "BiocCheck")))
7046 (build-system r-build-system)
7047 (arguments
7048 '(#:phases
7049 (modify-phases %standard-phases
7050 ;; This package can be used by calling BiocCheck(<package>) from
7051 ;; within R, or by running R CMD BiocCheck <package>. This phase
7052 ;; makes sure the latter works. For this to work, the BiocCheck
7053 ;; script must be somewhere on the PATH (not the R bin directory).
7054 (add-after 'install 'install-bioccheck-subcommand
7055 (lambda* (#:key outputs #:allow-other-keys)
7056 (let* ((out (assoc-ref outputs "out"))
7057 (dest-dir (string-append out "/bin"))
7058 (script-dir
7059 (string-append out "/site-library/BiocCheck/script/")))
7060 (mkdir-p dest-dir)
7061 (symlink (string-append script-dir "/checkBadDeps.R")
7062 (string-append dest-dir "/checkBadDeps.R"))
7063 (symlink (string-append script-dir "/BiocCheck")
7064 (string-append dest-dir "/BiocCheck")))
7065 #t)))))
4644644a 7066 (propagated-inputs
aeb64f3c
RW
7067 `(("r-codetools" ,r-codetools)
7068 ("r-graph" ,r-graph)
4644644a
RJ
7069 ("r-httr" ,r-httr)
7070 ("r-optparse" ,r-optparse)
4644644a 7071 ("r-biocinstaller" ,r-biocinstaller)
7373b416
RW
7072 ("r-biocviews" ,r-biocviews)
7073 ("r-stringdist" ,r-stringdist)))
5713bbf1 7074 (home-page "https://bioconductor.org/packages/BiocCheck")
4644644a
RJ
7075 (synopsis "Executes Bioconductor-specific package checks")
7076 (description "This package contains tools to perform additional quality
7077checks on R packages that are to be submitted to the Bioconductor repository.")
7078 (license license:artistic2.0)))
7079
c79ad57a
RJ
7080(define-public r-optparse
7081 (package
7082 (name "r-optparse")
eafd35ba 7083 (version "1.6.0")
c79ad57a
RJ
7084 (source
7085 (origin
7086 (method url-fetch)
7087 (uri (cran-uri "optparse" version))
7088 (sha256
7089 (base32
eafd35ba 7090 "1d7v5gl45x4amsfmzn5zyyffyqlc7a82h01szlnda22viyxids0h"))))
c79ad57a
RJ
7091 (build-system r-build-system)
7092 (propagated-inputs
7093 `(("r-getopt" ,r-getopt)))
7094 (home-page
7095 "https://github.com/trevorld/optparse")
7096 (synopsis "Command line option parser")
7097 (description
7098 "This package provides a command line parser inspired by Python's
7099@code{optparse} library to be used with Rscript to write shebang scripts
7100that accept short and long options.")
7101 (license license:gpl2+)))
7102
247d498a
RJ
7103(define-public r-dnacopy
7104 (package
7105 (name "r-dnacopy")
607acf31 7106 (version "1.54.0")
247d498a
RJ
7107 (source (origin
7108 (method url-fetch)
7109 (uri (bioconductor-uri "DNAcopy" version))
7110 (sha256
7111 (base32
607acf31 7112 "03hfhmmc5y60r2gcgm367w2fr7qj115l74m9bp3h9qpn5yci0d8n"))))
247d498a
RJ
7113 (properties
7114 `((upstream-name . "DNAcopy")))
7115 (build-system r-build-system)
7116 (inputs
7117 `(("gfortran" ,gfortran)))
5697fdc3 7118 (home-page "https://bioconductor.org/packages/DNAcopy")
247d498a
RJ
7119 (synopsis "Implementation of a circular binary segmentation algorithm")
7120 (description "This package implements the circular binary segmentation (CBS)
7121algorithm to segment DNA copy number data and identify genomic regions with
7122abnormal copy number.")
7123 (license license:gpl2+)))
7124
7485129e
RW
7125(define-public r-s4vectors
7126 (package
7127 (name "r-s4vectors")
5b2a339e 7128 (version "0.18.3")
7485129e
RW
7129 (source (origin
7130 (method url-fetch)
7131 (uri (bioconductor-uri "S4Vectors" version))
7132 (sha256
7133 (base32
5b2a339e 7134 "02bps2rpjqx2npwxq3x62ncwi9ggr165cwi56h6hld28bw2gddy8"))))
7485129e 7135 (properties
1d216b6e 7136 `((upstream-name . "S4Vectors")))
7485129e
RW
7137 (build-system r-build-system)
7138 (propagated-inputs
7139 `(("r-biocgenerics" ,r-biocgenerics)))
5713bbf1 7140 (home-page "https://bioconductor.org/packages/S4Vectors")
7485129e
RW
7141 (synopsis "S4 implementation of vectors and lists")
7142 (description
7143 "The S4Vectors package defines the @code{Vector} and @code{List} virtual
7144classes and a set of generic functions that extend the semantic of ordinary
7145vectors and lists in R. Package developers can easily implement vector-like
7146or list-like objects as concrete subclasses of @code{Vector} or @code{List}.
7147In addition, a few low-level concrete subclasses of general interest (e.g.
7148@code{DataFrame}, @code{Rle}, and @code{Hits}) are implemented in the
7149S4Vectors package itself.")
7150 (license license:artistic2.0)))
7151
274da826
RW
7152(define-public r-seqinr
7153 (package
7154 (name "r-seqinr")
023aa8ff 7155 (version "3.4-5")
274da826
RW
7156 (source
7157 (origin
7158 (method url-fetch)
7159 (uri (cran-uri "seqinr" version))
7160 (sha256
7161 (base32
023aa8ff 7162 "17zv0n5cji17izwmwg0jcbxbjl3w5rls91w15svcnlpxjms38ahn"))))
274da826
RW
7163 (build-system r-build-system)
7164 (propagated-inputs
3b851cd4
RW
7165 `(("r-ade4" ,r-ade4)
7166 ("r-segmented" ,r-segmented)))
274da826
RW
7167 (inputs
7168 `(("zlib" ,zlib)))
7169 (home-page "http://seqinr.r-forge.r-project.org/")
7170 (synopsis "Biological sequences retrieval and analysis")
7171 (description
7172 "This package provides tools for exploratory data analysis and data
7173visualization of biological sequence (DNA and protein) data. It also includes
7174utilities for sequence data management under the ACNUC system.")
7175 (license license:gpl2+)))
7176
78addcb0
RW
7177(define-public r-iranges
7178 (package
7179 (name "r-iranges")
5728d9c6 7180 (version "2.14.11")
78addcb0
RW
7181 (source (origin
7182 (method url-fetch)
7183 (uri (bioconductor-uri "IRanges" version))
7184 (sha256
7185 (base32
5728d9c6 7186 "0wz63hysspyjihqadg91dbvllc5a61zzjrsz0b9498lihqc6m1la"))))
78addcb0 7187 (properties
1d216b6e 7188 `((upstream-name . "IRanges")))
78addcb0
RW
7189 (build-system r-build-system)
7190 (propagated-inputs
7191 `(("r-biocgenerics" ,r-biocgenerics)
7192 ("r-s4vectors" ,r-s4vectors)))
5713bbf1 7193 (home-page "https://bioconductor.org/packages/IRanges")
78addcb0
RW
7194 (synopsis "Infrastructure for manipulating intervals on sequences")
7195 (description
7196 "This package provides efficient low-level and highly reusable S4 classes
7197for storing ranges of integers, RLE vectors (Run-Length Encoding), and, more
7198generally, data that can be organized sequentially (formally defined as
7199@code{Vector} objects), as well as views on these @code{Vector} objects.
7200Efficient list-like classes are also provided for storing big collections of
7201instances of the basic classes. All classes in the package use consistent
7202naming and share the same rich and consistent \"Vector API\" as much as
7203possible.")
7204 (license license:artistic2.0)))
7205
ffef27f3
RJ
7206(define-public r-genomeinfodbdata
7207 (package
7208 (name "r-genomeinfodbdata")
261b38a9 7209 (version "0.99.1")
ffef27f3
RJ
7210 (source (origin
7211 (method url-fetch)
90f83099
EF
7212 ;; We cannot use bioconductor-uri here because this tarball is
7213 ;; located under "data/annotation/" instead of "bioc/".
7214 (uri (string-append "https://bioconductor.org/packages/release/"
7215 "data/annotation/src/contrib/GenomeInfoDbData_"
7216 version ".tar.gz"))
ffef27f3
RJ
7217 (sha256
7218 (base32
261b38a9 7219 "0hipipvyvrh75n68hsjg35sxbcfzrghzxv547vnkk2f8ya99g01r"))))
ffef27f3
RJ
7220 (properties
7221 `((upstream-name . "GenomeInfoDbData")))
7222 (build-system r-build-system)
5713bbf1 7223 (home-page "https://bioconductor.org/packages/GenomeInfoDbData")
ffef27f3
RJ
7224 (synopsis "Species and taxonomy ID look up tables for GenomeInfoDb")
7225 (description "This package contains data for mapping between NCBI taxonomy
7226ID and species. It is used by functions in the GenomeInfoDb package.")
7227 (license license:artistic2.0)))
7228
bf7764b7
RW
7229(define-public r-genomeinfodb
7230 (package
7231 (name "r-genomeinfodb")
953c8b18 7232 (version "1.16.0")
bf7764b7
RW
7233 (source (origin
7234 (method url-fetch)
7235 (uri (bioconductor-uri "GenomeInfoDb" version))
7236 (sha256
7237 (base32
953c8b18 7238 "0yhnqhaydmmq7ihmhj3rbal4afq5p993l2qqrd0n5wmbyg7glg2d"))))
bf7764b7 7239 (properties
1d216b6e 7240 `((upstream-name . "GenomeInfoDb")))
bf7764b7
RW
7241 (build-system r-build-system)
7242 (propagated-inputs
7243 `(("r-biocgenerics" ,r-biocgenerics)
38b99ccc 7244 ("r-genomeinfodbdata" ,r-genomeinfodbdata)
bf7764b7 7245 ("r-iranges" ,r-iranges)
4cd07e48 7246 ("r-rcurl" ,r-rcurl)
bf7764b7 7247 ("r-s4vectors" ,r-s4vectors)))
5713bbf1 7248 (home-page "https://bioconductor.org/packages/GenomeInfoDb")
bf7764b7
RW
7249 (synopsis "Utilities for manipulating chromosome identifiers")
7250 (description
7251 "This package contains data and functions that define and allow
7252translation between different chromosome sequence naming conventions (e.g.,
7253\"chr1\" versus \"1\"), including a function that attempts to place sequence
7254names in their natural, rather than lexicographic, order.")
7255 (license license:artistic2.0)))
7256
744004a3
RJ
7257(define-public r-edger
7258 (package
7259 (name "r-edger")
1adf4596 7260 (version "3.22.3")
744004a3
RJ
7261 (source (origin
7262 (method url-fetch)
7263 (uri (bioconductor-uri "edgeR" version))
7264 (sha256
7265 (base32
1adf4596 7266 "0w3jv29n0kkaiig8dbbdqy2dkng8xfaihch82mj9ci5hphrx3nng"))))
744004a3
RJ
7267 (properties `((upstream-name . "edgeR")))
7268 (build-system r-build-system)
7269 (propagated-inputs
5e48005f 7270 `(("r-limma" ,r-limma)
47055b27 7271 ("r-locfit" ,r-locfit)
010ab2ff 7272 ("r-rcpp" ,r-rcpp)
47055b27 7273 ("r-statmod" ,r-statmod))) ;for estimateDisp
744004a3
RJ
7274 (home-page "http://bioinf.wehi.edu.au/edgeR")
7275 (synopsis "EdgeR does empirical analysis of digital gene expression data")
7276 (description "This package can do differential expression analysis of
7277RNA-seq expression profiles with biological replication. It implements a range
7278of statistical methodology based on the negative binomial distributions,
7279including empirical Bayes estimation, exact tests, generalized linear models
7280and quasi-likelihood tests. It be applied to differential signal analysis of
7281other types of genomic data that produce counts, including ChIP-seq, SAGE and
7282CAGE.")
7283 (license license:gpl2+)))
7284
b669d9c4
RJ
7285(define-public r-variantannotation
7286 (package
7287 (name "r-variantannotation")
33a103a5 7288 (version "1.26.1")
b669d9c4
RJ
7289 (source (origin
7290 (method url-fetch)
7291 (uri (bioconductor-uri "VariantAnnotation" version))
7292 (sha256
7293 (base32
33a103a5 7294 "1r55ki951dj81qvy73knfcy69ik5vzkd56wnk3f6vvf9vngqb8jr"))))
b669d9c4
RJ
7295 (properties
7296 `((upstream-name . "VariantAnnotation")))
7297 (inputs
7298 `(("zlib" ,zlib)))
7299 (propagated-inputs
7300 `(("r-annotationdbi" ,r-annotationdbi)
37d96f1d 7301 ("r-biobase" ,r-biobase)
b669d9c4 7302 ("r-biocgenerics" ,r-biocgenerics)
37d96f1d 7303 ("r-biostrings" ,r-biostrings)
b669d9c4
RJ
7304 ("r-bsgenome" ,r-bsgenome)
7305 ("r-dbi" ,r-dbi)
7306 ("r-genomeinfodb" ,r-genomeinfodb)
7307 ("r-genomicfeatures" ,r-genomicfeatures)
7308 ("r-genomicranges" ,r-genomicranges)
37d96f1d 7309 ("r-iranges" ,r-iranges)
b669d9c4
RJ
7310 ("r-summarizedexperiment" ,r-summarizedexperiment)
7311 ("r-rsamtools" ,r-rsamtools)
37d96f1d
RW
7312 ("r-rtracklayer" ,r-rtracklayer)
7313 ("r-s4vectors" ,r-s4vectors)
7314 ("r-xvector" ,r-xvector)
b669d9c4
RJ
7315 ("r-zlibbioc" ,r-zlibbioc)))
7316 (build-system r-build-system)
7317 (home-page "https://bioconductor.org/packages/VariantAnnotation")
7318 (synopsis "Package for annotation of genetic variants")
7319 (description "This R package can annotate variants, compute amino acid
7320coding changes and predict coding outcomes.")
7321 (license license:artistic2.0)))
7322
7d4224d7
RJ
7323(define-public r-limma
7324 (package
7325 (name "r-limma")
688cb085 7326 (version "3.36.3")
7d4224d7
RJ
7327 (source (origin
7328 (method url-fetch)
7329 (uri (bioconductor-uri "limma" version))
7330 (sha256
7331 (base32
688cb085 7332 "0iiifszr6hcqih6kszdsbkx3gacfg3d7v8hdx0lbjqnjqgqz7pwk"))))
7d4224d7
RJ
7333 (build-system r-build-system)
7334 (home-page "http://bioinf.wehi.edu.au/limma")
7335 (synopsis "Package for linear models for microarray and RNA-seq data")
7336 (description "This package can be used for the analysis of gene expression
7337studies, especially the use of linear models for analysing designed experiments
7338and the assessment of differential expression. The analysis methods apply to
7339different technologies, including microarrays, RNA-seq, and quantitative PCR.")
7340 (license license:gpl2+)))
7341
0e7d5560
RW
7342(define-public r-xvector
7343 (package
7344 (name "r-xvector")
c2401d06 7345 (version "0.20.0")
0e7d5560
RW
7346 (source (origin
7347 (method url-fetch)
7348 (uri (bioconductor-uri "XVector" version))
7349 (sha256
7350 (base32
c2401d06 7351 "1zjlhh9lsyhg0js1858csyw2389kbrzdqnqnha833wazkwxilp3f"))))
0e7d5560 7352 (properties
1d216b6e 7353 `((upstream-name . "XVector")))
0e7d5560
RW
7354 (build-system r-build-system)
7355 (arguments
7356 `(#:phases
7357 (modify-phases %standard-phases
7358 (add-after 'unpack 'use-system-zlib
7359 (lambda _
7360 (substitute* "DESCRIPTION"
7361 (("zlibbioc, ") ""))
7362 (substitute* "NAMESPACE"
7363 (("import\\(zlibbioc\\)") ""))
7364 #t)))))
7365 (inputs
7366 `(("zlib" ,zlib)))
7367 (propagated-inputs
7368 `(("r-biocgenerics" ,r-biocgenerics)
7369 ("r-iranges" ,r-iranges)
7370 ("r-s4vectors" ,r-s4vectors)))
5713bbf1 7371 (home-page "https://bioconductor.org/packages/XVector")
0e7d5560
RW
7372 (synopsis "Representation and manpulation of external sequences")
7373 (description
7374 "This package provides memory efficient S4 classes for storing sequences
7375\"externally\" (behind an R external pointer, or on disk).")
7376 (license license:artistic2.0)))
7377
e2cd1d0f
RW
7378(define-public r-genomicranges
7379 (package
7380 (name "r-genomicranges")
1d56c79f 7381 (version "1.32.6")
e2cd1d0f
RW
7382 (source (origin
7383 (method url-fetch)
7384 (uri (bioconductor-uri "GenomicRanges" version))
7385 (sha256
7386 (base32
1d56c79f 7387 "0p58yk2i5gqvjlkx548mnrr49wvs0xfcl06l9rqj2hi6hkkbvnp3"))))
e2cd1d0f 7388 (properties
1d216b6e 7389 `((upstream-name . "GenomicRanges")))
e2cd1d0f
RW
7390 (build-system r-build-system)
7391 (propagated-inputs
7392 `(("r-biocgenerics" ,r-biocgenerics)
7393 ("r-genomeinfodb" ,r-genomeinfodb)
92a740af
RW
7394 ("r-iranges" ,r-iranges)
7395 ("r-s4vectors" ,r-s4vectors)
e2cd1d0f 7396 ("r-xvector" ,r-xvector)))
5713bbf1 7397 (home-page "https://bioconductor.org/packages/GenomicRanges")
e2cd1d0f
RW
7398 (synopsis "Representation and manipulation of genomic intervals")
7399 (description
7400 "This package provides tools to efficiently represent and manipulate
7401genomic annotations and alignments is playing a central role when it comes to
7402analyzing high-throughput sequencing data (a.k.a. NGS data). The
7403GenomicRanges package defines general purpose containers for storing and
7404manipulating genomic intervals and variables defined along a genome.")
7405 (license license:artistic2.0)))
7406
555e3399
RW
7407(define-public r-biobase
7408 (package
7409 (name "r-biobase")
6fd71f1b 7410 (version "2.40.0")
555e3399
RW
7411 (source (origin
7412 (method url-fetch)
7413 (uri (bioconductor-uri "Biobase" version))
7414 (sha256
7415 (base32
6fd71f1b 7416 "1iwds2a5ir29k19dbpynlc7nn836cw2gamchhgpi2jf2xar9m9jz"))))
555e3399
RW
7417 (properties
7418 `((upstream-name . "Biobase")))
7419 (build-system r-build-system)
7420 (propagated-inputs
7421 `(("r-biocgenerics" ,r-biocgenerics)))
5713bbf1 7422 (home-page "https://bioconductor.org/packages/Biobase")
555e3399
RW
7423 (synopsis "Base functions for Bioconductor")
7424 (description
7425 "This package provides functions that are needed by many other packages
7426on Bioconductor or which replace R functions.")
7427 (license license:artistic2.0)))
7428
8b7bce74
RW
7429(define-public r-annotationdbi
7430 (package
7431 (name "r-annotationdbi")
77f66fee 7432 (version "1.42.1")
8b7bce74
RW
7433 (source (origin
7434 (method url-fetch)
7435 (uri (bioconductor-uri "AnnotationDbi" version))
7436 (sha256
7437 (base32
77f66fee 7438 "0afkbzli08vq02r2pr9phrz3rxd6ilp1w7yw8y99nbjiz14f8b1c"))))
8b7bce74
RW
7439 (properties
7440 `((upstream-name . "AnnotationDbi")))
7441 (build-system r-build-system)
7442 (propagated-inputs
7443 `(("r-biobase" ,r-biobase)
7444 ("r-biocgenerics" ,r-biocgenerics)
7445 ("r-dbi" ,r-dbi)
7446 ("r-iranges" ,r-iranges)
7447 ("r-rsqlite" ,r-rsqlite)
7448 ("r-s4vectors" ,r-s4vectors)))
5713bbf1 7449 (home-page "https://bioconductor.org/packages/AnnotationDbi")
8b7bce74
RW
7450 (synopsis "Annotation database interface")
7451 (description
7452 "This package provides user interface and database connection code for
7453annotation data packages using SQLite data storage.")
7454 (license license:artistic2.0)))
7455
c465fa72
RW
7456(define-public r-biomart
7457 (package
7458 (name "r-biomart")
1763cba8 7459 (version "2.36.1")
c465fa72
RW
7460 (source (origin
7461 (method url-fetch)
7462 (uri (bioconductor-uri "biomaRt" version))
7463 (sha256
7464 (base32
1763cba8 7465 "0b70s350ffc74v3xz5c3jpazr9zxdb7gjmjfj7aghlsrizrspill"))))
c465fa72
RW
7466 (properties
7467 `((upstream-name . "biomaRt")))
7468 (build-system r-build-system)
7469 (propagated-inputs
7470 `(("r-annotationdbi" ,r-annotationdbi)
b9e8a5c5 7471 ("r-httr" ,r-httr)
42e11d33 7472 ("r-progress" ,r-progress)
c465fa72 7473 ("r-rcurl" ,r-rcurl)
42e11d33 7474 ("r-stringr" ,r-stringr)
c465fa72 7475 ("r-xml" ,r-xml)))
5713bbf1 7476 (home-page "https://bioconductor.org/packages/biomaRt")
c465fa72
RW
7477 (synopsis "Interface to BioMart databases")
7478 (description
7479 "biomaRt provides an interface to a growing collection of databases
7480implementing the @url{BioMart software suite, http://www.biomart.org}. The
7481package enables retrieval of large amounts of data in a uniform way without
7482the need to know the underlying database schemas or write complex SQL queries.
7483Examples of BioMart databases are Ensembl, COSMIC, Uniprot, HGNC, Gramene,
7484Wormbase and dbSNP mapped to Ensembl. These major databases give biomaRt
7485users direct access to a diverse set of data and enable a wide range of
7486powerful online queries from gene annotation to database mining.")
7487 (license license:artistic2.0)))
7488
e91d362e
RW
7489(define-public r-biocparallel
7490 (package
7491 (name "r-biocparallel")
a0234005 7492 (version "1.14.2")
e91d362e
RW
7493 (source (origin
7494 (method url-fetch)
7495 (uri (bioconductor-uri "BiocParallel" version))
7496 (sha256
7497 (base32
a0234005 7498 "1llb5a62hn4yxpdgqdh2l7i5zd06mjkk8hagsna69cq65wv6iifm"))))
e91d362e
RW
7499 (properties
7500 `((upstream-name . "BiocParallel")))
7501 (build-system r-build-system)
7502 (propagated-inputs
7503 `(("r-futile-logger" ,r-futile-logger)
a044c7f4
RW
7504 ("r-snow" ,r-snow)
7505 ("r-bh" ,r-bh)))
5713bbf1 7506 (home-page "https://bioconductor.org/packages/BiocParallel")
e91d362e
RW
7507 (synopsis "Bioconductor facilities for parallel evaluation")
7508 (description
7509 "This package provides modified versions and novel implementation of
7510functions for parallel evaluation, tailored to use with Bioconductor
7511objects.")
7512 (license (list license:gpl2+ license:gpl3+))))
7513
bf159353
RW
7514(define-public r-biostrings
7515 (package
7516 (name "r-biostrings")
942655ad 7517 (version "2.48.0")
bf159353
RW
7518 (source (origin
7519 (method url-fetch)
7520 (uri (bioconductor-uri "Biostrings" version))
7521 (sha256
7522 (base32
942655ad 7523 "118b490jk87ydigm6ln25ms4kskzkw0akmh77clzznhzpqnxsi6j"))))
bf159353
RW
7524 (properties
7525 `((upstream-name . "Biostrings")))
7526 (build-system r-build-system)
7527 (propagated-inputs
7528 `(("r-biocgenerics" ,r-biocgenerics)
7529 ("r-iranges" ,r-iranges)
7530 ("r-s4vectors" ,r-s4vectors)
7531 ("r-xvector" ,r-xvector)))
5713bbf1 7532 (home-page "https://bioconductor.org/packages/Biostrings")
bf159353
RW
7533 (synopsis "String objects and algorithms for biological sequences")
7534 (description
7535 "This package provides memory efficient string containers, string
7536matching algorithms, and other utilities, for fast manipulation of large
7537biological sequences or sets of sequences.")
7538 (license license:artistic2.0)))
7539
f8d74f70
RW
7540(define-public r-rsamtools
7541 (package
7542 (name "r-rsamtools")
3e61c2a8 7543 (version "1.32.3")
f8d74f70
RW
7544 (source (origin
7545 (method url-fetch)
7546 (uri (bioconductor-uri "Rsamtools" version))
7547 (sha256
7548 (base32
3e61c2a8 7549 "1hpjr22h33pf4fgv0sj83rqzv6l5l7s6fpmmqvchh45ikks1mnhq"))))
f8d74f70
RW
7550 (properties
7551 `((upstream-name . "Rsamtools")))
7552 (build-system r-build-system)
7553 (arguments
7554 `(#:phases
7555 (modify-phases %standard-phases
7556 (add-after 'unpack 'use-system-zlib
7557 (lambda _
7558 (substitute* "DESCRIPTION"
7559 (("zlibbioc, ") ""))
7560 (substitute* "NAMESPACE"
7561 (("import\\(zlibbioc\\)") ""))
7562 #t)))))
7563 (inputs
7564 `(("zlib" ,zlib)))
7565 (propagated-inputs
7566 `(("r-biocgenerics" ,r-biocgenerics)
7567 ("r-biocparallel" ,r-biocparallel)
7568 ("r-biostrings" ,r-biostrings)
7569 ("r-bitops" ,r-bitops)
7570 ("r-genomeinfodb" ,r-genomeinfodb)
7571 ("r-genomicranges" ,r-genomicranges)
7572 ("r-iranges" ,r-iranges)
7573 ("r-s4vectors" ,r-s4vectors)
7574 ("r-xvector" ,r-xvector)))
5713bbf1 7575 (home-page "https://bioconductor.org/packages/release/bioc/html/Rsamtools.html")
f8d74f70
RW
7576 (synopsis "Interface to samtools, bcftools, and tabix")
7577 (description
7578 "This package provides an interface to the 'samtools', 'bcftools', and
7579'tabix' utilities for manipulating SAM (Sequence Alignment / Map), FASTA,
7580binary variant call (BCF) and compressed indexed tab-delimited (tabix)
7581files.")
7582 (license license:expat)))
7583
71e34e6b
RJ
7584(define-public r-delayedarray
7585 (package
7586 (name "r-delayedarray")
aea1b6fb 7587 (version "0.6.5")
71e34e6b
RJ
7588 (source (origin
7589 (method url-fetch)
7590 (uri (bioconductor-uri "DelayedArray" version))
7591 (sha256
7592 (base32
aea1b6fb 7593 "10b03zrnvz5isfh4z55hasya2m71lrfx10l5lm2sdmqs0gwkanrd"))))
71e34e6b
RJ
7594 (properties
7595 `((upstream-name . "DelayedArray")))
7596 (build-system r-build-system)
7597 (propagated-inputs
7598 `(("r-biocgenerics" ,r-biocgenerics)
82f593f3 7599 ("r-biocparallel" ,r-biocparallel)
71e34e6b
RJ
7600 ("r-s4vectors" ,r-s4vectors)
7601 ("r-iranges" ,r-iranges)
7602 ("r-matrixstats" ,r-matrixstats)))
5713bbf1 7603 (home-page "https://bioconductor.org/packages/DelayedArray")
71e34e6b
RJ
7604 (synopsis "Delayed operations on array-like objects")
7605 (description
7606 "Wrapping an array-like object (typically an on-disk object) in a
7607@code{DelayedArray} object allows one to perform common array operations on it
7608without loading the object in memory. In order to reduce memory usage and
7609optimize performance, operations on the object are either delayed or executed
7610using a block processing mechanism. Note that this also works on in-memory
7611array-like objects like @code{DataFrame} objects (typically with Rle columns),
7612@code{Matrix} objects, and ordinary arrays and data frames.")
7613 (license license:artistic2.0)))
7614
6e76dda2
RW
7615(define-public r-summarizedexperiment
7616 (package
7617 (name "r-summarizedexperiment")
9bf4110e 7618 (version "1.10.1")
6e76dda2
RW
7619 (source (origin
7620 (method url-fetch)
7621 (uri (bioconductor-uri "SummarizedExperiment" version))
7622 (sha256
7623 (base32
9bf4110e 7624 "0v3zxl9cqsv79ag5cnvzlhvgaz5cr8f4rn7flmwnwpqd508cznl1"))))
6e76dda2
RW
7625 (properties
7626 `((upstream-name . "SummarizedExperiment")))
7627 (build-system r-build-system)
7628 (propagated-inputs
7629 `(("r-biobase" ,r-biobase)
7630 ("r-biocgenerics" ,r-biocgenerics)
d006ee31 7631 ("r-delayedarray" ,r-delayedarray)
6e76dda2
RW
7632 ("r-genomeinfodb" ,r-genomeinfodb)
7633 ("r-genomicranges" ,r-genomicranges)
7634 ("r-iranges" ,r-iranges)
aeb64f3c 7635 ("r-matrix" ,r-matrix)
6e76dda2 7636 ("r-s4vectors" ,r-s4vectors)))
5713bbf1 7637 (home-page "https://bioconductor.org/packages/SummarizedExperiment")
6e76dda2
RW
7638 (synopsis "Container for representing genomic ranges by sample")
7639 (description
7640 "The SummarizedExperiment container contains one or more assays, each
7641represented by a matrix-like object of numeric or other mode. The rows
7642typically represent genomic ranges of interest and the columns represent
7643samples.")
7644 (license license:artistic2.0)))
7645
d8a828af
RW
7646(define-public r-genomicalignments
7647 (package
7648 (name "r-genomicalignments")
90102bd0 7649 (version "1.16.0")
d8a828af
RW
7650 (source (origin
7651 (method url-fetch)
7652 (uri (bioconductor-uri "GenomicAlignments" version))
7653 (sha256
7654 (base32
90102bd0 7655 "00pi2cnkkbj2023fg2x2cmglkdalwzy1vr3glsikwz7ix9yylcaw"))))
d8a828af
RW
7656 (properties
7657 `((upstream-name . "GenomicAlignments")))
7658 (build-system r-build-system)
7659 (propagated-inputs
7660 `(("r-biocgenerics" ,r-biocgenerics)
7661 ("r-biocparallel" ,r-biocparallel)
7662 ("r-biostrings" ,r-biostrings)
7663 ("r-genomeinfodb" ,r-genomeinfodb)
7664 ("r-genomicranges" ,r-genomicranges)
7665 ("r-iranges" ,r-iranges)
7666 ("r-rsamtools" ,r-rsamtools)
7667 ("r-s4vectors" ,r-s4vectors)
7668 ("r-summarizedexperiment" ,r-summarizedexperiment)))
5713bbf1 7669 (home-page "https://bioconductor.org/packages/GenomicAlignments")
d8a828af
RW
7670 (synopsis "Representation and manipulation of short genomic alignments")
7671 (description
7672 "This package provides efficient containers for storing and manipulating
7673short genomic alignments (typically obtained by aligning short reads to a
7674reference genome). This includes read counting, computing the coverage,
7675junction detection, and working with the nucleotide content of the
7676alignments.")
7677 (license license:artistic2.0)))
7678
317755ff
RW
7679(define-public r-rtracklayer
7680 (package
7681 (name "r-rtracklayer")
f2e79823 7682 (version "1.40.6")
317755ff
RW
7683 (source (origin
7684 (method url-fetch)
7685 (uri (bioconductor-uri "rtracklayer" version))
7686 (sha256
7687 (base32
f2e79823 7688 "1wxxxlyps19dw3i0pw4mlm3kinnswsc35rgvlnbwvpnpjbca6w4l"))))
317755ff
RW
7689 (build-system r-build-system)
7690 (arguments
7691 `(#:phases
7692 (modify-phases %standard-phases
7693 (add-after 'unpack 'use-system-zlib
7694 (lambda _
7695 (substitute* "DESCRIPTION"
4dd469ef 7696 ((" zlibbioc,") ""))
317755ff
RW
7697 (substitute* "NAMESPACE"
7698 (("import\\(zlibbioc\\)") ""))
7699 #t)))))
a30df297
RW
7700 (native-inputs
7701 `(("pkg-config" ,pkg-config)))
317755ff
RW
7702 (inputs
7703 `(("zlib" ,zlib)))
7704 (propagated-inputs
7705 `(("r-biocgenerics" ,r-biocgenerics)
7706 ("r-biostrings" ,r-biostrings)
7707 ("r-genomeinfodb" ,r-genomeinfodb)
7708 ("r-genomicalignments" ,r-genomicalignments)
7709 ("r-genomicranges" ,r-genomicranges)
7710 ("r-iranges" ,r-iranges)
7711 ("r-rcurl" ,r-rcurl)
7712 ("r-rsamtools" ,r-rsamtools)
7713 ("r-s4vectors" ,r-s4vectors)
7714 ("r-xml" ,r-xml)
7715 ("r-xvector" ,r-xvector)))
5713bbf1 7716 (home-page "https://bioconductor.org/packages/rtracklayer")
317755ff
RW
7717 (synopsis "R interface to genome browsers and their annotation tracks")
7718 (description
7719 "rtracklayer is an extensible framework for interacting with multiple
7720genome browsers (currently UCSC built-in) and manipulating annotation tracks
7721in various formats (currently GFF, BED, bedGraph, BED15, WIG, BigWig and 2bit
7722built-in). The user may export/import tracks to/from the supported browsers,
7723as well as query and modify the browser state, such as the current viewport.")
7724 (license license:artistic2.0)))
7725
2fd7c049
RW
7726(define-public r-genomicfeatures
7727 (package
7728 (name "r-genomicfeatures")
ad2ed8d0 7729 (version "1.32.2")
2fd7c049
RW
7730 (source (origin
7731 (method url-fetch)
7732 (uri (bioconductor-uri "GenomicFeatures" version))
7733 (sha256
7734 (base32
ad2ed8d0 7735 "0kfyyg1ib8fkq2hxraal10z4bx3rg8figdskw4yhn1mbh6l42q5f"))))
2fd7c049
RW
7736 (properties
7737 `((upstream-name . "GenomicFeatures")))
7738 (build-system r-build-system)
7739 (propagated-inputs
7740 `(("r-annotationdbi" ,r-annotationdbi)
7741 ("r-biobase" ,r-biobase)
7742 ("r-biocgenerics" ,r-biocgenerics)
7743 ("r-biomart" ,r-biomart)
7744 ("r-biostrings" ,r-biostrings)
7745 ("r-dbi" ,r-dbi)
7746 ("r-genomeinfodb" ,r-genomeinfodb)
7747 ("r-genomicranges" ,r-genomicranges)
7748 ("r-iranges" ,r-iranges)
7749 ("r-rcurl" ,r-rcurl)
7750 ("r-rsqlite" ,r-rsqlite)
7751 ("r-rtracklayer" ,r-rtracklayer)
7752 ("r-s4vectors" ,r-s4vectors)
7753 ("r-xvector" ,r-xvector)))
5713bbf1 7754 (home-page "https://bioconductor.org/packages/GenomicFeatures")
2fd7c049
RW
7755 (synopsis "Tools for working with transcript centric annotations")
7756 (description
7757 "This package provides a set of tools and methods for making and
7758manipulating transcript centric annotations. With these tools the user can
7759easily download the genomic locations of the transcripts, exons and cds of a
7760given organism, from either the UCSC Genome Browser or a BioMart
7761database (more sources will be supported in the future). This information is
7762then stored in a local database that keeps track of the relationship between
7763transcripts, exons, cds and genes. Flexible methods are provided for
7764extracting the desired features in a convenient format.")
7765 (license license:artistic2.0)))
7766
fb25d880
RW
7767(define-public r-go-db
7768 (package
7769 (name "r-go-db")
592f4a94 7770 (version "3.5.0")
fb25d880
RW
7771 (source (origin
7772 (method url-fetch)
5713bbf1 7773 (uri (string-append "https://www.bioconductor.org/packages/"
f82c8c3c
PP
7774 "release/data/annotation/src/contrib/GO.db_"
7775 version ".tar.gz"))
fb25d880
RW
7776 (sha256
7777 (base32
592f4a94 7778 "02d1mn1al3q7qvhx1ylrr3ar4w4iw0qyi5d89v2336rzwk9maq35"))))
fb25d880
RW
7779 (properties
7780 `((upstream-name . "GO.db")))
7781 (build-system r-build-system)
3141b83d
RW
7782 (propagated-inputs
7783 `(("r-annotationdbi" ,r-annotationdbi)))
5713bbf1 7784 (home-page "https://bioconductor.org/packages/GO.db")
fb25d880
RW
7785 (synopsis "Annotation maps describing the entire Gene Ontology")
7786 (description
7787 "The purpose of this GO.db annotation package is to provide detailed
7788information about the latest version of the Gene Ontologies.")
7789 (license license:artistic2.0)))
7790
d547ce5e
RW
7791(define-public r-topgo
7792 (package
7793 (name "r-topgo")
f2eb2e45 7794 (version "2.32.0")
d547ce5e
RW
7795 (source (origin
7796 (method url-fetch)
7797 (uri (bioconductor-uri "topGO" version))
7798 (sha256
7799 (base32
f2eb2e45 7800 "05yxnkid8bgw9lkm90if9fg63djhgvbailfa3qsfqa5c0zjmixw1"))))
d547ce5e
RW
7801 (properties
7802 `((upstream-name . "topGO")))
7803 (build-system r-build-system)
7804 (propagated-inputs
7805 `(("r-annotationdbi" ,r-annotationdbi)
30ec4de7 7806 ("r-dbi" ,r-dbi)
d547ce5e
RW
7807 ("r-biobase" ,r-biobase)
7808 ("r-biocgenerics" ,r-biocgenerics)
7809 ("r-go-db" ,r-go-db)
6d415db2 7810 ("r-graph" ,r-graph)
aeb64f3c
RW
7811 ("r-lattice" ,r-lattice)
7812 ("r-matrixstats" ,r-matrixstats)
d547ce5e 7813 ("r-sparsem" ,r-sparsem)))
5713bbf1 7814 (home-page "https://bioconductor.org/packages/topGO")
d547ce5e
RW
7815 (synopsis "Enrichment analysis for gene ontology")
7816 (description
7817 "The topGO package provides tools for testing @dfn{gene ontology} (GO)
7818terms while accounting for the topology of the GO graph. Different test
7819statistics and different methods for eliminating local similarities and
7820dependencies between GO terms can be implemented and applied.")
7821 ;; Any version of the LGPL applies.
7822 (license license:lgpl2.1+)))
7823
c63cef66
RW
7824(define-public r-bsgenome
7825 (package
7826 (name "r-bsgenome")
89ef133e 7827 (version "1.48.0")
c63cef66
RW
7828 (source (origin
7829 (method url-fetch)
7830 (uri (bioconductor-uri "BSgenome" version))
7831 (sha256
7832 (base32
89ef133e 7833 "1rk2piqq5dppkd51ln3r872d7ng3rvq98071mnd0xdv2xwnyn5g8"))))
c63cef66
RW
7834 (properties
7835 `((upstream-name . "BSgenome")))
7836 (build-system r-build-system)
7837 (propagated-inputs
7838 `(("r-biocgenerics" ,r-biocgenerics)
7839 ("r-biostrings" ,r-biostrings)
7840 ("r-genomeinfodb" ,r-genomeinfodb)
7841 ("r-genomicranges" ,r-genomicranges)
7842 ("r-iranges" ,r-iranges)
7843 ("r-rsamtools" ,r-rsamtools)
7844 ("r-rtracklayer" ,r-rtracklayer)
7845 ("r-s4vectors" ,r-s4vectors)
7846 ("r-xvector" ,r-xvector)))
5713bbf1 7847 (home-page "https://bioconductor.org/packages/BSgenome")
c63cef66
RW
7848 (synopsis "Infrastructure for Biostrings-based genome data packages")
7849 (description
7850 "This package provides infrastructure shared by all Biostrings-based
7851genome data packages and support for efficient SNP representation.")
7852 (license license:artistic2.0)))
7853
aa3eeeb5
RJ
7854(define-public r-bsgenome-hsapiens-1000genomes-hs37d5
7855 (package
7856 (name "r-bsgenome-hsapiens-1000genomes-hs37d5")
7857 (version "0.99.1")
7858 (source (origin
7859 (method url-fetch)
7860 ;; We cannot use bioconductor-uri here because this tarball is
7861 ;; located under "data/annotation/" instead of "bioc/".
5713bbf1 7862 (uri (string-append "https://www.bioconductor.org/packages/"
aa3eeeb5
RJ
7863 "release/data/annotation/src/contrib/"
7864 "BSgenome.Hsapiens.1000genomes.hs37d5_"
7865 version ".tar.gz"))
7866 (sha256
7867 (base32
7868 "1cg0g5fqmsvwyw2p9hp2yy4ilk21jkbbrnpgqvb5c36ihjwvc7sr"))))
7869 (properties
7870 `((upstream-name . "BSgenome.Hsapiens.1000genomes.hs37d5")))
7871 (build-system r-build-system)
7872 ;; As this package provides little more than a very large data file it
7873 ;; doesn't make sense to build substitutes.
7874 (arguments `(#:substitutable? #f))
7875 (propagated-inputs
7876 `(("r-bsgenome" ,r-bsgenome)))
7877 (home-page
5713bbf1 7878 "https://www.bioconductor.org/packages/BSgenome.Hsapiens.1000genomes.hs37d5/")
aa3eeeb5
RJ
7879 (synopsis "Full genome sequences for Homo sapiens")
7880 (description
7881 "This package provides full genome sequences for Homo sapiens from
78821000genomes phase2 reference genome sequence (hs37d5), based on NCBI GRCh37.")
7883 (license license:artistic2.0)))
7884
c43a011d
RW
7885(define-public r-impute
7886 (package
7887 (name "r-impute")
25d1df57 7888 (version "1.54.0")
c43a011d
RW
7889 (source (origin
7890 (method url-fetch)
7891 (uri (bioconductor-uri "impute" version))
7892 (sha256
7893 (base32
25d1df57 7894 "1d3cpfaqlq2gnb3hsc2yhxwkrnbd7m6ifif32yp0ya0jr5brl4hr"))))
c43a011d
RW
7895 (inputs
7896 `(("gfortran" ,gfortran)))
7897 (build-system r-build-system)
5713bbf1 7898 (home-page "https://bioconductor.org/packages/impute")
c43a011d
RW
7899 (synopsis "Imputation for microarray data")
7900 (description
7901 "This package provides a function to impute missing gene expression
7902microarray data, using nearest neighbor averaging.")
7903 (license license:gpl2+)))
7904
03ea5a35
RW
7905(define-public r-seqpattern
7906 (package
7907 (name "r-seqpattern")
5229cedf 7908 (version "1.12.0")
03ea5a35
RW
7909 (source (origin
7910 (method url-fetch)
7911 (uri (bioconductor-uri "seqPattern" version))
7912 (sha256
7913 (base32
5229cedf 7914 "0dw0yldfcf0ibvpqxlpx1ijnjf9lma47w9w22siszzhw09i0wp3w"))))
03ea5a35
RW
7915 (properties
7916 `((upstream-name . "seqPattern")))
7917 (build-system r-build-system)
7918 (propagated-inputs
7919 `(("r-biostrings" ,r-biostrings)
7920 ("r-genomicranges" ,r-genomicranges)
7921 ("r-iranges" ,r-iranges)
e92dd6f5 7922 ("r-kernsmooth" ,r-kernsmooth)
03ea5a35 7923 ("r-plotrix" ,r-plotrix)))
5713bbf1 7924 (home-page "https://bioconductor.org/packages/seqPattern")
03ea5a35
RW
7925 (synopsis "Visualising oligonucleotide patterns and motif occurrences")
7926 (description
7927 "This package provides tools to visualize oligonucleotide patterns and
7928sequence motif occurrences across a large set of sequences centred at a common
7929reference point and sorted by a user defined feature.")
7930 (license license:gpl3+)))
7931
cb933df6
RW
7932(define-public r-genomation
7933 (package
7934 (name "r-genomation")
17d35de7 7935 (version "1.12.0")
cb933df6
RW
7936 (source (origin
7937 (method url-fetch)
7938 (uri (bioconductor-uri "genomation" version))
7939 (sha256
7940 (base32
17d35de7 7941 "1vdmdyrq0n7pf8cvy2950v7hrcrcbd9zl4fg7dcmyly3iiwdyirp"))))
cb933df6
RW
7942 (build-system r-build-system)
7943 (propagated-inputs
7944 `(("r-biostrings" ,r-biostrings)
7945 ("r-bsgenome" ,r-bsgenome)
7946 ("r-data-table" ,r-data-table)
7947 ("r-genomeinfodb" ,r-genomeinfodb)
7948 ("r-genomicalignments" ,r-genomicalignments)
7949 ("r-genomicranges" ,r-genomicranges)
7950 ("r-ggplot2" ,r-ggplot2)
7951 ("r-gridbase" ,r-gridbase)
7952 ("r-impute" ,r-impute)
7953 ("r-iranges" ,r-iranges)
7954 ("r-matrixstats" ,r-matrixstats)
7955 ("r-plotrix" ,r-plotrix)
7956 ("r-plyr" ,r-plyr)
51c3c490 7957 ("r-rcpp" ,r-rcpp)
cb933df6
RW
7958 ("r-readr" ,r-readr)
7959 ("r-reshape2" ,r-reshape2)
7960 ("r-rsamtools" ,r-rsamtools)
7961 ("r-rtracklayer" ,r-rtracklayer)
51c3c490
RW
7962 ("r-runit" ,r-runit)
7963 ("r-s4vectors" ,r-s4vectors)
cb933df6
RW
7964 ("r-seqpattern" ,r-seqpattern)))
7965 (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
7966 (synopsis "Summary, annotation and visualization of genomic data")
7967 (description
7968 "This package provides a package for summary and annotation of genomic
7969intervals. Users can visualize and quantify genomic intervals over
7970pre-defined functional regions, such as promoters, exons, introns, etc. The
7971genomic intervals represent regions with a defined chromosome position, which
7972may be associated with a score, such as aligned reads from HT-seq experiments,
7973TF binding sites, methylation scores, etc. The package can use any tabular
7974genomic feature data as long as it has minimal information on the locations of
7975genomic intervals. In addition, it can use BAM or BigWig files as input.")
7976 (license license:artistic2.0)))
7977
64efa307
RW
7978(define-public r-genomationdata
7979 (package
7980 (name "r-genomationdata")
57dc9b58 7981 (version "1.10.0")
64efa307
RW
7982 (source (origin
7983 (method url-fetch)
7984 ;; We cannot use bioconductor-uri here because this tarball is
7985 ;; located under "data/annotation/" instead of "bioc/".
7986 (uri (string-append "https://bioconductor.org/packages/"
7987 "release/data/experiment/src/contrib/"
7988 "genomationData_" version ".tar.gz"))
7989 (sha256
7990 (base32
57dc9b58 7991 "0h7g5x3kyb50qlblz5hc85lfm6n6f5nb68i146way3ggs04sqvla"))))
64efa307
RW
7992 (build-system r-build-system)
7993 ;; As this package provides little more than large data files, it doesn't
7994 ;; make sense to build substitutes.
7995 (arguments `(#:substitutable? #f))
7996 (native-inputs
7997 `(("r-knitr" ,r-knitr)))
7998 (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
7999 (synopsis "Experimental data for use with the genomation package")
8000 (description
8001 "This package contains experimental genetic data for use with the
8002genomation package. Included are Chip Seq, Methylation and Cage data,
8003downloaded from Encode.")
8004 (license license:gpl3+)))
8005
486da491
RW
8006(define-public r-org-hs-eg-db
8007 (package
8008 (name "r-org-hs-eg-db")
d595fed3 8009 (version "3.5.0")
486da491
RW
8010 (source (origin
8011 (method url-fetch)
8012 ;; We cannot use bioconductor-uri here because this tarball is
8013 ;; located under "data/annotation/" instead of "bioc/".
5713bbf1 8014 (uri (string-append "https://www.bioconductor.org/packages/"
486da491
RW
8015 "release/data/annotation/src/contrib/"
8016 "org.Hs.eg.db_" version ".tar.gz"))
8017 (sha256
8018 (base32
d595fed3 8019 "1v6wa5613cjq59xd7x1qz8lr9nb2abm9abl2cci1khrnrlpla927"))))
486da491
RW
8020 (properties
8021 `((upstream-name . "org.Hs.eg.db")))
8022 (build-system r-build-system)
8023 (propagated-inputs
8024 `(("r-annotationdbi" ,r-annotationdbi)))
5713bbf1 8025 (home-page "https://www.bioconductor.org/packages/org.Hs.eg.db/")
486da491
RW
8026 (synopsis "Genome wide annotation for Human")
8027 (description
676507e3
RW
8028 "This package contains genome-wide annotations for Human, primarily based
8029on mapping using Entrez Gene identifiers.")
486da491
RW
8030 (license license:artistic2.0)))
8031
fefedf98
RW
8032(define-public r-org-ce-eg-db
8033 (package
8034 (name "r-org-ce-eg-db")
e4664290 8035 (version "3.5.0")
fefedf98
RW
8036 (source (origin
8037 (method url-fetch)
8038 ;; We cannot use bioconductor-uri here because this tarball is
8039 ;; located under "data/annotation/" instead of "bioc/".
5713bbf1 8040 (uri (string-append "https://www.bioconductor.org/packages/"
fefedf98
RW
8041 "release/data/annotation/src/contrib/"
8042 "org.Ce.eg.db_" version ".tar.gz"))
8043 (sha256
8044 (base32
e4664290 8045 "02ggchixlmzywhsbr0h2ms4dravv7m5964cjxqcjxqs16vjwlbk9"))))
fefedf98
RW
8046 (properties
8047 `((upstream-name . "org.Ce.eg.db")))
8048 (build-system r-build-system)
8049 (propagated-inputs
8050 `(("r-annotationdbi" ,r-annotationdbi)))
5713bbf1 8051 (home-page "https://www.bioconductor.org/packages/org.Ce.eg.db/")
fefedf98
RW
8052 (synopsis "Genome wide annotation for Worm")
8053 (description
8054 "This package provides mappings from Entrez gene identifiers to various
8055annotations for the genome of the model worm Caenorhabditis elegans.")
8056 (license license:artistic2.0)))
8057
16c53a1e
RW
8058(define-public r-org-dm-eg-db
8059 (package
8060 (name "r-org-dm-eg-db")
19fc299f 8061 (version "3.5.0")
16c53a1e
RW
8062 (source (origin
8063 (method url-fetch)
8064 ;; We cannot use bioconductor-uri here because this tarball is
8065 ;; located under "data/annotation/" instead of "bioc/".
5713bbf1 8066 (uri (string-append "https://www.bioconductor.org/packages/"
16c53a1e
RW
8067 "release/data/annotation/src/contrib/"
8068 "org.Dm.eg.db_" version ".tar.gz"))
8069 (sha256
8070 (base32
19fc299f 8071 "033qak1d3wwz17va0bh8z8p8arx0aw2va6gm1qfwsvdkj9cd9d7d"))))
16c53a1e
RW
8072 (properties
8073 `((upstream-name . "org.Dm.eg.db")))
8074 (build-system r-build-system)
8075 (propagated-inputs
8076 `(("r-annotationdbi" ,r-annotationdbi)))
5713bbf1 8077 (home-page "https://www.bioconductor.org/packages/org.Dm.eg.db/")
16c53a1e
RW
8078 (synopsis "Genome wide annotation for Fly")
8079 (description
8080 "This package provides mappings from Entrez gene identifiers to various
8081annotations for the genome of the model fruit fly Drosophila melanogaster.")
8082 (license license:artistic2.0)))
8083
e761beb9
RW
8084(define-public r-org-mm-eg-db
8085 (package
8086 (name "r-org-mm-eg-db")
f3569f52 8087 (version "3.5.0")
e761beb9
RW
8088 (source (origin
8089 (method url-fetch)
8090 ;; We cannot use bioconductor-uri here because this tarball is
8091 ;; located under "data/annotation/" instead of "bioc/".
5713bbf1 8092 (uri (string-append "https://www.bioconductor.org/packages/"
e761beb9
RW
8093 "release/data/annotation/src/contrib/"
8094 "org.Mm.eg.db_" version ".tar.gz"))
8095 (sha256
8096 (base32
f3569f52 8097 "11q21p3ki4bn4hb3aix0g775l45l66jmas6m94nfhqqnpjhv4d6g"))))
e761beb9
RW
8098 (properties
8099 `((upstream-name . "org.Mm.eg.db")))
8100 (build-system r-build-system)
8101 (propagated-inputs
8102 `(("r-annotationdbi" ,r-annotationdbi)))
5713bbf1 8103 (home-page "https://www.bioconductor.org/packages/org.Mm.eg.db/")
e761beb9
RW
8104 (synopsis "Genome wide annotation for Mouse")
8105 (description
8106 "This package provides mappings from Entrez gene identifiers to various
8107annotations for the genome of the model mouse Mus musculus.")
8108 (license license:artistic2.0)))
8109
936e7d67
RW
8110(define-public r-seqlogo
8111 (package
8112 (name "r-seqlogo")
8217c2d0 8113 (version "1.46.0")
936e7d67
RW
8114 (source
8115 (origin
8116 (method url-fetch)
8117 (uri (bioconductor-uri "seqLogo" version))
8118 (sha256
8119 (base32
8217c2d0 8120 "16xvqcdknix9vjm8mrixi6nyfsr45jm844jh1x90m8044lwrsic1"))))
936e7d67
RW
8121 (properties `((upstream-name . "seqLogo")))
8122 (build-system r-build-system)
5713bbf1 8123 (home-page "https://bioconductor.org/packages/seqLogo")
936e7d67
RW
8124 (synopsis "Sequence logos for DNA sequence alignments")
8125 (description
8126 "seqLogo takes the position weight matrix of a DNA sequence motif and
8127plots the corresponding sequence logo as introduced by Schneider and
8128Stephens (1990).")
8129 (license license:lgpl2.0+)))
8130
c90a4baf
RW
8131(define-public r-bsgenome-hsapiens-ucsc-hg19
8132 (package
8133 (name "r-bsgenome-hsapiens-ucsc-hg19")
8134 (version "1.4.0")
8135 (source (origin
8136 (method url-fetch)
8137 ;; We cannot use bioconductor-uri here because this tarball is
8138 ;; located under "data/annotation/" instead of "bioc/".
5713bbf1 8139 (uri (string-append "https://www.bioconductor.org/packages/"
c90a4baf
RW
8140 "release/data/annotation/src/contrib/"
8141 "BSgenome.Hsapiens.UCSC.hg19_"
8142 version ".tar.gz"))
8143 (sha256
8144 (base32
9d217d27 8145 "1y0nqpk8cw5a34sd9hmin3z4v7iqm6hf6l22cl81vlbxqbjibxc8"))))
c90a4baf
RW
8146 (properties
8147 `((upstream-name . "BSgenome.Hsapiens.UCSC.hg19")))
8148 (build-system r-build-system)
8149 ;; As this package provides little more than a very large data file it
8150 ;; doesn't make sense to build substitutes.
8151 (arguments `(#:substitutable? #f))
8152 (propagated-inputs
8153 `(("r-bsgenome" ,r-bsgenome)))
8154 (home-page
5713bbf1 8155 "https://www.bioconductor.org/packages/BSgenome.Hsapiens.UCSC.hg19/")
c90a4baf
RW
8156 (synopsis "Full genome sequences for Homo sapiens")
8157 (description
8158 "This package provides full genome sequences for Homo sapiens as provided
8159by UCSC (hg19, February 2009) and stored in Biostrings objects.")
8160 (license license:artistic2.0)))
8161
a3e90287
RW
8162(define-public r-bsgenome-mmusculus-ucsc-mm9
8163 (package
8164 (name "r-bsgenome-mmusculus-ucsc-mm9")
8165 (version "1.4.0")
8166 (source (origin
8167 (method url-fetch)
8168 ;; We cannot use bioconductor-uri here because this tarball is
8169 ;; located under "data/annotation/" instead of "bioc/".
5713bbf1 8170 (uri (string-append "https://www.bioconductor.org/packages/"
a3e90287
RW
8171 "release/data/annotation/src/contrib/"
8172 "BSgenome.Mmusculus.UCSC.mm9_"
8173 version ".tar.gz"))
8174 (sha256
8175 (base32
8176 "1birqw30g2azimxpnjfzmkphan7x131yy8b9h85lfz5fjdg7841i"))))
8177 (properties
8178 `((upstream-name . "BSgenome.Mmusculus.UCSC.mm9")))
8179 (build-system r-build-system)
8180 ;; As this package provides little more than a very large data file it
8181 ;; doesn't make sense to build substitutes.
8182 (arguments `(#:substitutable? #f))
8183 (propagated-inputs
8184 `(("r-bsgenome" ,r-bsgenome)))
8185 (home-page
5713bbf1 8186 "https://www.bioconductor.org/packages/BSgenome.Mmusculus.UCSC.mm9/")
a3e90287
RW
8187 (synopsis "Full genome sequences for Mouse")
8188 (description
8189 "This package provides full genome sequences for Mus musculus (Mouse) as
8190provided by UCSC (mm9, July 2007) and stored in Biostrings objects.")
8191 (license license:artistic2.0)))
8192
4714d521
RW
8193(define-public r-bsgenome-mmusculus-ucsc-mm10
8194 (package
8195 (name "r-bsgenome-mmusculus-ucsc-mm10")
8196 (version "1.4.0")
8197 (source (origin
8198 (method url-fetch)
8199 ;; We cannot use bioconductor-uri here because this tarball is
8200 ;; located under "data/annotation/" instead of "bioc/".
5713bbf1 8201 (uri (string-append "https://www.bioconductor.org/packages/"
4714d521
RW
8202 "release/data/annotation/src/contrib/"
8203 "BSgenome.Mmusculus.UCSC.mm10_"
8204 version ".tar.gz"))
8205 (sha256
8206 (base32
8207 "12s0nm2na9brjad4rn9l7d3db2aj8qa1xvz0y1k7gk08wayb6bkf"))))
8208 (properties
8209 `((upstream-name . "BSgenome.Mmusculus.UCSC.mm10")))
8210 (build-system r-build-system)
8211 ;; As this package provides little more than a very large data file it
8212 ;; doesn't make sense to build substitutes.
8213 (arguments `(#:substitutable? #f))
8214 (propagated-inputs
8215 `(("r-bsgenome" ,r-bsgenome)))
8216 (home-page
5713bbf1 8217 "https://www.bioconductor.org/packages/BSgenome.Mmusculus.UCSC.mm10/")
4714d521
RW
8218 (synopsis "Full genome sequences for Mouse")
8219 (description
8220 "This package provides full genome sequences for Mus
8221musculus (Mouse) as provided by UCSC (mm10, December 2011) and stored
8222in Biostrings objects.")
8223 (license license:artistic2.0)))
8224
c5173d74
RJ
8225(define-public r-txdb-mmusculus-ucsc-mm10-knowngene
8226 (package
8227 (name "r-txdb-mmusculus-ucsc-mm10-knowngene")
8228 (version "3.4.0")
8229 (source (origin
8230 (method url-fetch)
8231 ;; We cannot use bioconductor-uri here because this tarball is
8232 ;; located under "data/annotation/" instead of "bioc/".
5713bbf1 8233 (uri (string-append "https://www.bioconductor.org/packages/"
c5173d74
RJ
8234 "release/data/annotation/src/contrib/"
8235 "TxDb.Mmusculus.UCSC.mm10.knownGene_"
8236 version ".tar.gz"))
8237 (sha256
8238 (base32
8239 "08gava9wsvpcqz51k2sni3pj03n5155v32d9riqbf305nbirqbkb"))))
8240 (properties
8241 `((upstream-name . "TxDb.Mmusculus.UCSC.mm10.knownGene")))
8242 (build-system r-build-system)
8243 ;; As this package provides little more than a very large data file it
8244 ;; doesn't make sense to build substitutes.
8245 (arguments `(#:substitutable? #f))
8246 (propagated-inputs
8247 `(("r-bsgenome" ,r-bsgenome)
8248 ("r-genomicfeatures" ,r-genomicfeatures)
8249 ("r-annotationdbi" ,r-annotationdbi)))
8250 (home-page
5713bbf1 8251 "https://bioconductor.org/packages/TxDb.Mmusculus.UCSC.mm10.knownGene/")
c5173d74
RJ
8252 (synopsis "Annotation package for TxDb knownGene object(s) for Mouse")
8253 (description
8254 "This package loads a TxDb object, which is an R interface to
8255prefabricated databases contained in this package. This package provides
8256the TxDb object of Mouse data as provided by UCSC (mm10, December 2011)
8257based on the knownGene track.")
8258 (license license:artistic2.0)))
8259
943bd627
RW
8260(define-public r-bsgenome-celegans-ucsc-ce6
8261 (package
8262 (name "r-bsgenome-celegans-ucsc-ce6")
8263 (version "1.4.0")
8264 (source (origin
8265 (method url-fetch)
8266 ;; We cannot use bioconductor-uri here because this tarball is
8267 ;; located under "data/annotation/" instead of "bioc/".
5713bbf1 8268 (uri (string-append "https://www.bioconductor.org/packages/"
943bd627
RW
8269 "release/data/annotation/src/contrib/"
8270 "BSgenome.Celegans.UCSC.ce6_"
8271 version ".tar.gz"))
8272 (sha256
8273 (base32
8274 "0mqzb353xv2c3m3vkb315dkmnxkgczp7ndnknyhpgjlybyf715v9"))))
8275 (properties
8276 `((upstream-name . "BSgenome.Celegans.UCSC.ce6")))
8277 (build-system r-build-system)
8278 ;; As this package provides little more than a very large data file it
8279 ;; doesn't make sense to build substitutes.
8280 (arguments `(#:substitutable? #f))
8281 (propagated-inputs
8282 `(("r-bsgenome" ,r-bsgenome)))
8283 (home-page
5713bbf1 8284 "https://www.bioconductor.org/packages/BSgenome.Celegans.UCSC.ce6/")
943bd627
RW
8285 (synopsis "Full genome sequences for Worm")
8286 (description
8287 "This package provides full genome sequences for Caenorhabditis
8288elegans (Worm) as provided by UCSC (ce6, May 2008) and stored in Biostrings
8289objects.")
8290 (license license:artistic2.0)))
8291
fc47c7d6
RW
8292(define-public r-bsgenome-celegans-ucsc-ce10
8293 (package
8294 (name "r-bsgenome-celegans-ucsc-ce10")
8295 (version "1.4.0")
8296 (source (origin
8297 (method url-fetch)
8298 ;; We cannot use bioconductor-uri here because this tarball is
8299 ;; located under "data/annotation/" instead of "bioc/".
5713bbf1 8300 (uri (string-append "https://www.bioconductor.org/packages/"
fc47c7d6
RW
8301 "release/data/annotation/src/contrib/"
8302 "BSgenome.Celegans.UCSC.ce10_"
8303 version ".tar.gz"))
8304 (sha256
8305 (base32
8306 "1zaym97jk4npxk14ifvwz2rvhm4zx9xgs33r9vvx9rlynp0gydrk"))))
8307 (properties
8308 `((upstream-name . "BSgenome.Celegans.UCSC.ce10")))
8309 (build-system r-build-system)
8310 ;; As this package provides little more than a very large data file it
8311 ;; doesn't make sense to build substitutes.
8312 (arguments `(#:substitutable? #f))
8313 (propagated-inputs
8314 `(("r-bsgenome" ,r-bsgenome)))
8315 (home-page
5713bbf1 8316 "https://www.bioconductor.org/packages/BSgenome.Celegans.UCSC.ce10/")
fc47c7d6
RW
8317 (synopsis "Full genome sequences for Worm")
8318 (description
8319 "This package provides full genome sequences for Caenorhabditis
8320elegans (Worm) as provided by UCSC (ce10, Oct 2010) and stored in Biostrings
8321objects.")
8322 (license license:artistic2.0)))
8323
6dc60998
RW
8324(define-public r-bsgenome-dmelanogaster-ucsc-dm3
8325 (package
8326 (name "r-bsgenome-dmelanogaster-ucsc-dm3")
8327 (version "1.4.0")
8328 (source (origin
8329 (method url-fetch)
8330 ;; We cannot use bioconductor-uri here because this tarball is
8331 ;; located under "data/annotation/" instead of "bioc/".
5713bbf1 8332 (uri (string-append "https://www.bioconductor.org/packages/"
6dc60998
RW
8333 "release/data/annotation/src/contrib/"
8334 "BSgenome.Dmelanogaster.UCSC.dm3_"
8335 version ".tar.gz"))
8336 (sha256
8337 (base32
8338 "19bm3lkhhkag3gnwp419211fh0cnr0x6fa0r1lr0ycwrikxdxsv8"))))
8339 (properties
8340 `((upstream-name . "BSgenome.Dmelanogaster.UCSC.dm3")))
8341 (build-system r-build-system)
8342 ;; As this package provides little more than a very large data file it
8343 ;; doesn't make sense to build substitutes.
8344 (arguments `(#:substitutable? #f))
8345 (propagated-inputs
8346 `(("r-bsgenome" ,r-bsgenome)))
8347 (home-page
5713bbf1 8348 "https://www.bioconductor.org/packages/BSgenome.Dmelanogaster.UCSC.dm3/")
6dc60998
RW
8349 (synopsis "Full genome sequences for Fly")
8350 (description
8351 "This package provides full genome sequences for Drosophila
8352melanogaster (Fly) as provided by UCSC (dm3, April 2006) and stored in
8353Biostrings objects.")
8354 (license license:artistic2.0)))
8355
ae2462f7
RW
8356(define-public r-motifrg
8357 (package
8358 (name "r-motifrg")
7e6537e2 8359 (version "1.24.0")
ae2462f7
RW
8360 (source
8361 (origin
8362 (method url-fetch)
8363 (uri (bioconductor-uri "motifRG" version))
8364 (sha256
8365 (base32
7e6537e2 8366 "0mxhyidkyd2zqahdbg69y20r550z78lvr1r3pbjymnwfg4hcfq1p"))))
ae2462f7
RW
8367 (properties `((upstream-name . "motifRG")))
8368 (build-system r-build-system)
8369 (propagated-inputs
8370 `(("r-biostrings" ,r-biostrings)
8371 ("r-bsgenome" ,r-bsgenome)
007424b5 8372 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
ae2462f7
RW
8373 ("r-iranges" ,r-iranges)
8374 ("r-seqlogo" ,r-seqlogo)
8375 ("r-xvector" ,r-xvector)))
5713bbf1 8376 (home-page "https://bioconductor.org/packages/motifRG")
ae2462f7
RW
8377 (synopsis "Discover motifs in high throughput sequencing data")
8378 (description
8379 "This package provides tools for discriminative motif discovery in high
8380throughput genetic sequencing data sets using regression methods.")
8381 (license license:artistic2.0)))
8382
a5002ae7
AE
8383(define-public r-qtl
8384 (package
8385 (name "r-qtl")
65c9d257 8386 (version "1.42-8")
a5002ae7
AE
8387 (source
8388 (origin
8389 (method url-fetch)
8390 (uri (string-append "mirror://cran/src/contrib/qtl_"
8391 version ".tar.gz"))
8392 (sha256
8393 (base32
65c9d257 8394 "1l528dwvfpdlr05imrrm4rq32axp6hld9nqm6mm43kn5n7z2f5k6"))))
a5002ae7
AE
8395 (build-system r-build-system)
8396 (home-page "http://rqtl.org/")
8397 (synopsis "R package for analyzing QTL experiments in genetics")
8398 (description "R/qtl is an extension library for the R statistics
8399system. It is used to analyze experimental crosses for identifying
8400genes contributing to variation in quantitative traits (so-called
8401quantitative trait loci, QTLs).
8402
8403Using a hidden Markov model, R/qtl allows to estimate genetic maps, to
8404identify genotyping errors, and to perform single-QTL and two-QTL,
8405two-dimensional genome scans.")
8406 (license license:gpl3)))
d1e32822 8407
9e3ba31c
RJ
8408(define-public r-zlibbioc
8409 (package
8410 (name "r-zlibbioc")
2c480bb0 8411 (version "1.26.0")
9e3ba31c
RJ
8412 (source (origin
8413 (method url-fetch)
8414 (uri (bioconductor-uri "zlibbioc" version))
8415 (sha256
8416 (base32
2c480bb0 8417 "1rwr0mci8a712q0isavi4jmhm94gwivc4nr8j7r4kw05flp4g7gz"))))
9e3ba31c
RJ
8418 (properties
8419 `((upstream-name . "zlibbioc")))
8420 (build-system r-build-system)
8421 (home-page "https://bioconductor.org/packages/zlibbioc")
8422 (synopsis "Provider for zlib-1.2.5 to R packages")
8423 (description "This package uses the source code of zlib-1.2.5 to create
8424libraries for systems that do not have these available via other means.")
8425 (license license:artistic2.0)))
8426
e619a5c2
RW
8427(define-public r-r4rna
8428 (package
8429 (name "r-r4rna")
8430 (version "0.1.4")
8431 (source
8432 (origin
8433 (method url-fetch)
8434 (uri (string-append "http://www.e-rna.org/r-chie/files/R4RNA_"
8435 version ".tar.gz"))
8436 (sha256
8437 (base32
8438 "1p0i78wh76jfgmn9jphbwwaz6yy6pipzfg08xs54cxavxg2j81p5"))))
8439 (build-system r-build-system)
8440 (propagated-inputs
8441 `(("r-optparse" ,r-optparse)
8442 ("r-rcolorbrewer" ,r-rcolorbrewer)))
8443 (home-page "http://www.e-rna.org/r-chie/index.cgi")
8444 (synopsis "Analysis framework for RNA secondary structure")
8445 (description
8446 "The R4RNA package aims to be a general framework for the analysis of RNA
8447secondary structure and comparative analysis in R.")
8448 (license license:gpl3+)))
8449
52765a63
RW
8450(define-public r-rhtslib
8451 (package
8452 (name "r-rhtslib")
1a08bb8a 8453 (version "1.12.1")
52765a63
RW
8454 (source
8455 (origin
8456 (method url-fetch)
8457 (uri (bioconductor-uri "Rhtslib" version))
8458 (sha256
8459 (base32
1a08bb8a 8460 "16ywnb8cmr2xabd1i21b92rfziw7xfbv25yv16ipw617p41wa39z"))))
52765a63
RW
8461 (properties `((upstream-name . "Rhtslib")))
8462 (build-system r-build-system)
8463 (propagated-inputs
8464 `(("r-zlibbioc" ,r-zlibbioc)))
8465 (inputs
8466 `(("zlib" ,zlib)))
53ca52f0 8467 (native-inputs
1a08bb8a 8468 `(("pkg-config" ,pkg-config)))
52765a63
RW
8469 (home-page "https://github.com/nhayden/Rhtslib")
8470 (synopsis "High-throughput sequencing library as an R package")
8471 (description
8472 "This package provides the HTSlib C library for high-throughput
8473nucleotide sequence analysis. The package is primarily useful to developers
8474of other R packages who wish to make use of HTSlib.")
8475 (license license:lgpl2.0+)))
8476
fe02c4c9
RW
8477(define-public r-bamsignals
8478 (package
8479 (name "r-bamsignals")
0da6afa2 8480 (version "1.12.1")
fe02c4c9
RW
8481 (source
8482 (origin
8483 (method url-fetch)
8484 (uri (bioconductor-uri "bamsignals" version))
8485 (sha256
8486 (base32
0da6afa2 8487 "141q3p4lzwiqk1mfxi8q1q84axjl0gyiqg59xd3sp4viny4jqmgv"))))
fe02c4c9
RW
8488 (build-system r-build-system)
8489 (propagated-inputs
8490 `(("r-biocgenerics" ,r-biocgenerics)
8491 ("r-genomicranges" ,r-genomicranges)
8492 ("r-iranges" ,r-iranges)
8493 ("r-rcpp" ,r-rcpp)
8494 ("r-rhtslib" ,r-rhtslib)
8495 ("r-zlibbioc" ,r-zlibbioc)))
8496 (inputs
8497 `(("zlib" ,zlib)))
5713bbf1 8498 (home-page "https://bioconductor.org/packages/bamsignals")
fe02c4c9
RW
8499 (synopsis "Extract read count signals from bam files")
8500 (description
8501 "This package allows to efficiently obtain count vectors from indexed bam
8502files. It counts the number of nucleotide sequence reads in given genomic
8503ranges and it computes reads profiles and coverage profiles. It also handles
8504paired-end data.")
8505 (license license:gpl2+)))
8506
89984be4
RW
8507(define-public r-rcas
8508 (package
8509 (name "r-rcas")
bcc2e0ed 8510 (version "1.6.0")
89984be4
RW
8511 (source (origin
8512 (method url-fetch)
bcc2e0ed 8513 (uri (bioconductor-uri "RCAS" version))
89984be4
RW
8514 (sha256
8515 (base32
bcc2e0ed 8516 "0vmn7a0rm2ban0kaxrf5danhss2r4hfhnwh5889fjcgqy300fdd5"))))
89984be4
RW
8517 (build-system r-build-system)
8518 (native-inputs
bcc2e0ed 8519 `(("r-testthat" ,r-testthat)
89984be4
RW
8520 ;; During vignette building knitr checks that "pandoc-citeproc"
8521 ;; is in the PATH.
dff5640a 8522 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)))
89984be4 8523 (propagated-inputs
bcc2e0ed 8524 `(("r-biocgenerics" ,r-biocgenerics)
89984be4 8525 ("r-biomart" ,r-biomart)
bcc2e0ed 8526 ("r-biostrings" ,r-biostrings)
89984be4
RW
8527 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8528 ("r-bsgenome-mmusculus-ucsc-mm9" ,r-bsgenome-mmusculus-ucsc-mm9)
8529 ("r-bsgenome-celegans-ucsc-ce10" ,r-bsgenome-celegans-ucsc-ce10)
8530 ("r-bsgenome-dmelanogaster-ucsc-dm3" ,r-bsgenome-dmelanogaster-ucsc-dm3)
bcc2e0ed
RW
8531 ("r-cowplot" ,r-cowplot)
8532 ("r-data-table" ,r-data-table)
8533 ("r-dbi" ,r-dbi)
89984be4 8534 ("r-dt" ,r-dt)
bcc2e0ed
RW
8535 ("r-genomation" ,r-genomation)
8536 ("r-genomicfeatures" ,r-genomicfeatures)
8537 ("r-ggplot2" ,r-ggplot2)
8538 ("r-ggseqlogo" ,r-ggseqlogo)
8539 ("r-knitr" ,r-knitr)
8540 ("r-motifrg" ,r-motifrg)
8541 ("r-org-hs-eg-db" ,r-org-hs-eg-db)
8542 ("r-org-ce-eg-db" ,r-org-ce-eg-db)
8543 ("r-org-dm-eg-db" ,r-org-dm-eg-db)
8544 ("r-org-mm-eg-db" ,r-org-mm-eg-db)
ebfd6a71 8545 ("r-pbapply" ,r-pbapply)
bcc2e0ed 8546 ("r-pheatmap" ,r-pheatmap)
89984be4 8547 ("r-plotly" ,r-plotly)
2c8d6c0b 8548 ("r-plotrix" ,r-plotrix)
bcc2e0ed
RW
8549 ("r-proxy" ,r-proxy)
8550 ("r-rsqlite" ,r-rsqlite)
89984be4 8551 ("r-rtracklayer" ,r-rtracklayer)
bcc2e0ed
RW
8552 ("r-rmarkdown" ,r-rmarkdown)
8553 ("r-s4vectors" ,r-s4vectors)
8554 ("r-topgo" ,r-topgo)))
89984be4
RW
8555 (synopsis "RNA-centric annotation system")
8556 (description
8557 "RCAS aims to be a standalone RNA-centric annotation system that provides
8558intuitive reports and publication-ready graphics. This package provides the R
8559library implementing most of the pipeline's features.")
8560 (home-page "https://github.com/BIMSBbioinfo/RCAS")
75690c9f 8561 (license license:artistic2.0)))
89984be4 8562
50937297
RW
8563(define-public rcas-web
8564 (package
8565 (name "rcas-web")
cbc1c27b 8566 (version "0.0.5")
50937297
RW
8567 (source
8568 (origin
8569 (method url-fetch)
8570 (uri (string-append "https://github.com/BIMSBbioinfo/rcas-web/"
8571 "releases/download/v" version
8572 "/rcas-web-" version ".tar.gz"))
8573 (sha256
8574 (base32
cbc1c27b 8575 "0igz7jpcf7cm9800zcag6p3gd1i649figrhbdba6cjkm8f4gfspr"))))
50937297
RW
8576 (build-system gnu-build-system)
8577 (arguments
8578 `(#:phases
8579 (modify-phases %standard-phases
8580 (add-after 'install 'wrap-executable
8581 (lambda* (#:key inputs outputs #:allow-other-keys)
8582 (let* ((out (assoc-ref outputs "out"))
8583 (json (assoc-ref inputs "guile-json"))
8584 (redis (assoc-ref inputs "guile-redis"))
8585 (path (string-append
8586 json "/share/guile/site/2.2:"
8587 redis "/share/guile/site/2.2")))
8588 (wrap-program (string-append out "/bin/rcas-web")
8589 `("GUILE_LOAD_PATH" ":" = (,path))
8590 `("GUILE_LOAD_COMPILED_PATH" ":" = (,path))
8591 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
8592 #t)))))
8593 (inputs
2d7c4ae3 8594 `(("r-minimal" ,r-minimal)
50937297 8595 ("r-rcas" ,r-rcas)
f6396d86 8596 ("guile-next" ,guile-2.2)
2252f087 8597 ("guile-json" ,guile-json)
50937297
RW
8598 ("guile-redis" ,guile2.2-redis)))
8599 (native-inputs
8600 `(("pkg-config" ,pkg-config)))
8601 (home-page "https://github.com/BIMSBbioinfo/rcas-web")
8602 (synopsis "Web interface for RNA-centric annotation system (RCAS)")
8603 (description "This package provides a simple web interface for the
8604@dfn{RNA-centric annotation system} (RCAS).")
8605 (license license:agpl3+)))
8606
7500e42b
RJ
8607(define-public r-mutationalpatterns
8608 (package
8609 (name "r-mutationalpatterns")
b6c2c8f7 8610 (version "1.6.1")
7500e42b
RJ
8611 (source
8612 (origin
8613 (method url-fetch)
8614 (uri (bioconductor-uri "MutationalPatterns" version))
8615 (sha256
8616 (base32
b6c2c8f7 8617 "1yq7351j42mjxn8fd3c5bdxzb2l5s4lvqhjdvv4rwj4f600n6wj9"))))
7500e42b
RJ
8618 (build-system r-build-system)
8619 (propagated-inputs
8620 `(("r-biocgenerics" ,r-biocgenerics)
8621 ("r-biostrings" ,r-biostrings)
b6c2c8f7 8622 ;; These two packages are suggested packages
cf4ac4e4
RJ
8623 ("r-bsgenome-hsapiens-1000g" ,r-bsgenome-hsapiens-1000genomes-hs37d5)
8624 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
7500e42b
RJ
8625 ("r-genomicranges" ,r-genomicranges)
8626 ("r-genomeinfodb" ,r-genomeinfodb)
8627 ("r-ggplot2" ,r-ggplot2)
7500e42b
RJ
8628 ("r-iranges" ,r-iranges)
8629 ("r-nmf" ,r-nmf)
8630 ("r-plyr" ,r-plyr)
8631 ("r-pracma" ,r-pracma)
8632 ("r-reshape2" ,r-reshape2)
39d9098d
RW
8633 ("r-cowplot" ,r-cowplot)
8634 ("r-ggdendro" ,r-ggdendro)
8635 ("r-s4vectors" ,r-s4vectors)
7500e42b
RJ
8636 ("r-summarizedexperiment" ,r-summarizedexperiment)
8637 ("r-variantannotation" ,r-variantannotation)))
5713bbf1 8638 (home-page "https://bioconductor.org/packages/MutationalPatterns/")
7500e42b
RJ
8639 (synopsis "Extract and visualize mutational patterns in genomic data")
8640 (description "This package provides an extensive toolset for the
8641characterization and visualization of a wide range of mutational patterns
8642in SNV base substitution data.")
8643 (license license:expat)))
8644
d7160529
RW
8645(define-public r-wgcna
8646 (package
8647 (name "r-wgcna")
6a0e1390 8648 (version "1.63")
d7160529
RW
8649 (source
8650 (origin
8651 (method url-fetch)
8652 (uri (cran-uri "WGCNA" version))
8653 (sha256
8654 (base32
6a0e1390 8655 "1225dqm68bynkmklnsxdqdd3zqrpzbvqwyly8ibxmk75z33xz309"))))
d7160529
RW
8656 (properties `((upstream-name . "WGCNA")))
8657 (build-system r-build-system)
8658 (propagated-inputs
8659 `(("r-annotationdbi" ,r-annotationdbi)
8660 ("r-doparallel" ,r-doparallel)
8661 ("r-dynamictreecut" ,r-dynamictreecut)
8662 ("r-fastcluster" ,r-fastcluster)
8663 ("r-foreach" ,r-foreach)
8664 ("r-go-db" ,r-go-db)
8665 ("r-hmisc" ,r-hmisc)
8666 ("r-impute" ,r-impute)
1b22ecda
RW
8667 ("r-rcpp" ,r-rcpp)
8668 ("r-robust" ,r-robust)
8669 ("r-survival" ,r-survival)
d7160529
RW
8670 ("r-matrixstats" ,r-matrixstats)
8671 ("r-preprocesscore" ,r-preprocesscore)))
8672 (home-page
8673 "http://www.genetics.ucla.edu/labs/horvath/CoexpressionNetwork/Rpackages/WGCNA/")
8674 (synopsis "Weighted correlation network analysis")
8675 (description
8676 "This package provides functions necessary to perform Weighted
8677Correlation Network Analysis on high-dimensional data. It includes functions
8678for rudimentary data cleaning, construction and summarization of correlation
8679networks, module identification and functions for relating both variables and
8680modules to sample traits. It also includes a number of utility functions for
8681data manipulation and visualization.")
8682 (license license:gpl2+)))
8683
c827f202
RW
8684(define-public r-chipkernels
8685 (let ((commit "c9cfcacb626b1221094fb3490ea7bac0fd625372")
8686 (revision "1"))
8687 (package
8688 (name "r-chipkernels")
8689 (version (string-append "1.1-" revision "." (string-take commit 9)))
8690 (source
8691 (origin
8692 (method git-fetch)
8693 (uri (git-reference
8694 (url "https://github.com/ManuSetty/ChIPKernels.git")
8695 (commit commit)))
8696 (file-name (string-append name "-" version))
8697 (sha256
8698 (base32
8699 "14bj5qhjm1hsm9ay561nfbqi9wxsa7y487df2idsaaf6z10nw4v0"))))
8700 (build-system r-build-system)
8701 (propagated-inputs
8702 `(("r-iranges" ,r-iranges)
8703 ("r-xvector" ,r-xvector)
8704 ("r-biostrings" ,r-biostrings)
8705 ("r-bsgenome" ,r-bsgenome)
8706 ("r-gtools" ,r-gtools)
8707 ("r-genomicranges" ,r-genomicranges)
8708 ("r-sfsmisc" ,r-sfsmisc)
8709 ("r-kernlab" ,r-kernlab)
8710 ("r-s4vectors" ,r-s4vectors)
8711 ("r-biocgenerics" ,r-biocgenerics)))
8712 (home-page "https://github.com/ManuSetty/ChIPKernels")
8713 (synopsis "Build string kernels for DNA Sequence analysis")
8714 (description "ChIPKernels is an R package for building different string
8715kernels used for DNA Sequence analysis. A dictionary of the desired kernel
8716must be built and this dictionary can be used for determining kernels for DNA
8717Sequences.")
8718 (license license:gpl2+))))
8719
2d9fb170
RW
8720(define-public r-seqgl
8721 (package
8722 (name "r-seqgl")
8723 (version "1.1.4")
8724 (source
8725 (origin
8726 (method url-fetch)
8727 (uri (string-append "https://github.com/ManuSetty/SeqGL/"
8728 "archive/" version ".tar.gz"))
8729 (file-name (string-append name "-" version ".tar.gz"))
8730 (sha256
8731 (base32
8732 "0pnk1p3sci5yipyc8xnb6jbmydpl80fld927xgnbcv104hy8h8yh"))))
8733 (build-system r-build-system)
8734 (propagated-inputs
8735 `(("r-biostrings" ,r-biostrings)
8736 ("r-chipkernels" ,r-chipkernels)
8737 ("r-genomicranges" ,r-genomicranges)
8738 ("r-spams" ,r-spams)
8739 ("r-wgcna" ,r-wgcna)
8740 ("r-fastcluster" ,r-fastcluster)))
8741 (home-page "https://github.com/ManuSetty/SeqGL")
8742 (synopsis "Group lasso for Dnase/ChIP-seq data")
8743 (description "SeqGL is a group lasso based algorithm to extract
8744transcription factor sequence signals from ChIP, DNase and ATAC-seq profiles.
8745This package presents a method which uses group lasso to discriminate between
8746bound and non bound genomic regions to accurately identify transcription
8747factors bound at the specific regions.")
8748 (license license:gpl2+)))
8749
bd3be46e
RW
8750(define-public r-gkmsvm
8751 (package
8752 (name "r-gkmsvm")
5ef76bc2 8753 (version "0.79.0")
bd3be46e
RW
8754 (source
8755 (origin
8756 (method url-fetch)
8757 (uri (cran-uri "gkmSVM" version))
8758 (sha256
8759 (base32
5ef76bc2 8760 "04dakbgfvfalz4rm4fvvybp506dn5fbj5g86ybfhrc6wywjllsz3"))))
bd3be46e
RW
8761 (properties `((upstream-name . "gkmSVM")))
8762 (build-system r-build-system)
8763 (propagated-inputs
8764 `(("r-biocgenerics" ,r-biocgenerics)
8765 ("r-biostrings" ,r-biostrings)
8766 ("r-genomeinfodb" ,r-genomeinfodb)
8767 ("r-genomicranges" ,r-genomicranges)
8768 ("r-iranges" ,r-iranges)
8769 ("r-kernlab" ,r-kernlab)
8770 ("r-rcpp" ,r-rcpp)
8771 ("r-rocr" ,r-rocr)
8772 ("r-rtracklayer" ,r-rtracklayer)
8773 ("r-s4vectors" ,r-s4vectors)
8774 ("r-seqinr" ,r-seqinr)))
e9960d8c 8775 (home-page "https://cran.r-project.org/web/packages/gkmSVM")
bd3be46e
RW
8776 (synopsis "Gapped-kmer support vector machine")
8777 (description
8778 "This R package provides tools for training gapped-kmer SVM classifiers
8779for DNA and protein sequences. This package supports several sequence
8780kernels, including: gkmSVM, kmer-SVM, mismatch kernel and wildcard kernel.")
8781 (license license:gpl2+)))
8782
d4af25b5
RJPB
8783(define-public r-tximport
8784 (package
8785 (name "r-tximport")
c76e8baf 8786 (version "1.8.0")
d4af25b5
RJPB
8787 (source (origin
8788 (method url-fetch)
8789 (uri (bioconductor-uri "tximport" version))
8790 (sha256
8791 (base32
c76e8baf 8792 "1qjc7ah9dzccpvcjrp9k4qnaz13x6kvy1c1xpdj503km6k528lip"))))
d4af25b5 8793 (build-system r-build-system)
5713bbf1 8794 (home-page "https://bioconductor.org/packages/tximport")
d4af25b5
RJPB
8795 (synopsis "Import and summarize transcript-level estimates for gene-level analysis")
8796 (description
8797 "This package provides tools to import transcript-level abundance,
8798estimated counts and transcript lengths, and to summarize them into matrices
8799for use with downstream gene-level analysis packages. Average transcript
8800length, weighted by sample-specific transcript abundance estimates, is
8801provided as a matrix which can be used as an offset for different expression
8802of gene-level counts.")
8803 (license license:gpl2+)))
8804
69f2b3bd
RJPB
8805(define-public r-rhdf5
8806 (package
8807 (name "r-rhdf5")
d73c5728 8808 (version "2.24.0")
69f2b3bd
RJPB
8809 (source (origin
8810 (method url-fetch)
8811 (uri (bioconductor-uri "rhdf5" version))
8812 (sha256
8813 (base32
d73c5728 8814 "15cmmchhk8bnp94gxg0zk9qyzdjx5kv16dzpbnb62mkq7ydmifx6"))))
69f2b3bd 8815 (build-system r-build-system)
69f2b3bd 8816 (propagated-inputs
d73c5728 8817 `(("r-rhdf5lib" ,r-rhdf5lib)))
69f2b3bd 8818 (inputs
d73c5728 8819 `(("zlib" ,zlib)))
5713bbf1 8820 (home-page "https://bioconductor.org/packages/rhdf5")
69f2b3bd
RJPB
8821 (synopsis "HDF5 interface to R")
8822 (description
8823 "This R/Bioconductor package provides an interface between HDF5 and R.
8824HDF5's main features are the ability to store and access very large and/or
8825complex datasets and a wide variety of metadata on mass storage (disk) through
8826a completely portable file format. The rhdf5 package is thus suited for the
8827exchange of large and/or complex datasets between R and other software
8828package, and for letting R applications work on datasets that are larger than
8829the available RAM.")
8830 (license license:artistic2.0)))
8831
17cddc17
RW
8832(define-public r-annotationfilter
8833 (package
8834 (name "r-annotationfilter")
62509bbc 8835 (version "1.4.0")
17cddc17
RW
8836 (source (origin
8837 (method url-fetch)
8838 (uri (bioconductor-uri "AnnotationFilter" version))
8839 (sha256
8840 (base32
62509bbc 8841 "1w8ypfdz4g7vnwfrvnhjcpm8waciqyq2cn883ajdwg4vv7a5mj9a"))))
17cddc17
RW
8842 (properties
8843 `((upstream-name . "AnnotationFilter")))
8844 (build-system r-build-system)
8845 (propagated-inputs
8846 `(("r-genomicranges" ,r-genomicranges)
8847 ("r-lazyeval" ,r-lazyeval)))
8848 (home-page "https://github.com/Bioconductor/AnnotationFilter")
8849 (synopsis "Facilities for filtering Bioconductor annotation resources")
8850 (description
8851 "This package provides classes and other infrastructure to implement
8852filters for manipulating Bioconductor annotation resources. The filters are
8853used by @code{ensembldb}, @code{Organism.dplyr}, and other packages.")
8854 (license license:artistic2.0)))
8855
66e40e00
RW
8856(define-public emboss
8857 (package
8858 (name "emboss")
8859 (version "6.5.7")
8860 (source (origin
8861 (method url-fetch)
8862 (uri (string-append "ftp://emboss.open-bio.org/pub/EMBOSS/old/"
8863 (version-major+minor version) ".0/"
8864 "EMBOSS-" version ".tar.gz"))
8865 (sha256
8866 (base32
8867 "0vsmz96gc411yj2iyzdrsmg4l2n1nhgmp7vrgzlxx3xixv9xbf0q"))))
8868 (build-system gnu-build-system)
8869 (arguments
8870 `(#:configure-flags
8871 (list (string-append "--with-hpdf="
8872 (assoc-ref %build-inputs "libharu")))
8873 #:phases
8874 (modify-phases %standard-phases
8875 (add-after 'unpack 'fix-checks
8876 (lambda _
8877 ;; The PNGDRIVER tests check for the presence of libgd, libpng
8878 ;; and zlib, but assume that they are all found at the same
8879 ;; prefix.
8880 (substitute* "configure.in"
8881 (("CHECK_PNGDRIVER")
8882 "LIBS=\"$LIBS -lgd -lpng -lz -lm\"
8883AC_DEFINE([PLD_png], [1], [Define to 1 if PNG support is available])
8884AM_CONDITIONAL(AMPNG, true)"))
8885 #t))
d10092b8 8886 (add-after 'fix-checks 'disable-update-check
66e40e00
RW
8887 (lambda _
8888 ;; At build time there is no connection to the Internet, so
8889 ;; looking for updates will not work.
8890 (substitute* "Makefile.am"
8891 (("\\$\\(bindir\\)/embossupdate") ""))
8892 #t))
d10092b8 8893 (add-after 'disable-update-check 'autogen
66e40e00
RW
8894 (lambda _ (zero? (system* "autoreconf" "-vif")))))))
8895 (inputs
8896 `(("perl" ,perl)
8897 ("libpng" ,libpng)
8898 ("gd" ,gd)
8899 ("libx11" ,libx11)
8900 ("libharu" ,libharu)
8901 ("zlib" ,zlib)))
8902 (native-inputs
8903 `(("autoconf" ,autoconf)
8904 ("automake" ,automake)
8905 ("libtool" ,libtool)
8906 ("pkg-config" ,pkg-config)))
8907 (home-page "http://emboss.sourceforge.net")
8908 (synopsis "Molecular biology analysis suite")
8909 (description "EMBOSS is the \"European Molecular Biology Open Software
8910Suite\". EMBOSS is an analysis package specially developed for the needs of
8911the molecular biology (e.g. EMBnet) user community. The software
8912automatically copes with data in a variety of formats and even allows
8913transparent retrieval of sequence data from the web. It also provides a
8914number of libraries for the development of software in the field of molecular
8915biology. EMBOSS also integrates a range of currently available packages and
8916tools for sequence analysis into a seamless whole.")
8917 (license license:gpl2+)))
8918
1f1b20b8
RW
8919(define-public bits
8920 (let ((revision "1")
8921 (commit "3cc4567896d9d6442923da944beb704750a08d2d"))
8922 (package
8923 (name "bits")
8924 ;; The version is 2.13.0 even though no release archives have been
8925 ;; published as yet.
8926 (version (string-append "2.13.0-" revision "." (string-take commit 9)))
8927 (source (origin
8928 (method git-fetch)
8929 (uri (git-reference
8930 (url "https://github.com/arq5x/bits.git")
8931 (commit commit)))
8932 (file-name (string-append name "-" version "-checkout"))
8933 (sha256
8934 (base32
8935 "17n2kffk4kmhivd8c98g2vr6y1s23vbg4sxlxs689wni66797hbs"))))
8936 (build-system gnu-build-system)
8937 (arguments
8938 `(#:tests? #f ;no tests included
8939 #:phases
8940 (modify-phases %standard-phases
8941 (delete 'configure)
8942 (add-after 'unpack 'remove-cuda
8943 (lambda _
8944 (substitute* "Makefile"
8945 ((".*_cuda") "")
8946 (("(bits_test_intersections) \\\\" _ match) match))
8947 #t))
8948 (replace 'install
8949 (lambda* (#:key outputs #:allow-other-keys)
8950 (copy-recursively
8951 "bin" (string-append (assoc-ref outputs "out") "/bin"))
8952 #t)))))
8953 (inputs
8954 `(("gsl" ,gsl)
8955 ("zlib" ,zlib)))
8956 (home-page "https://github.com/arq5x/bits")
8957 (synopsis "Implementation of binary interval search algorithm")
8958 (description "This package provides an implementation of the
8959BITS (Binary Interval Search) algorithm, an approach to interval set
8960intersection. It is especially suited for the comparison of diverse genomic
8961datasets and the exploration of large datasets of genome
8962intervals (e.g. genes, sequence alignments).")
8963 (license license:gpl2))))
8964
e62ffce5 8965(define-public piranha
883302da
RW
8966 ;; There is no release tarball for the latest version. The latest commit is
8967 ;; older than one year at the time of this writing.
8968 (let ((revision "1")
8969 (commit "0466d364b71117d01e4471b74c514436cc281233"))
8970 (package
8971 (name "piranha")
8972 (version (string-append "1.2.1-" revision "." (string-take commit 9)))
8973 (source (origin
8974 (method git-fetch)
8975 (uri (git-reference
8976 (url "https://github.com/smithlabcode/piranha.git")
8977 (commit commit)))
da49d137 8978 (file-name (git-file-name name version))
883302da
RW
8979 (sha256
8980 (base32
8981 "117dc0zf20c61jam69sk4abl57ah6yi6i7qra7d7y5zrbgk12q5n"))))
8982 (build-system gnu-build-system)
8983 (arguments
8984 `(#:test-target "test"
8985 #:phases
8986 (modify-phases %standard-phases
8987 (add-after 'unpack 'copy-smithlab-cpp
8988 (lambda* (#:key inputs #:allow-other-keys)
e62ffce5 8989 (for-each (lambda (file)
883302da
RW
8990 (install-file file "./src/smithlab_cpp/"))
8991 (find-files (assoc-ref inputs "smithlab-cpp")))
8992 #t))
8993 (add-after 'install 'install-to-store
8994 (lambda* (#:key outputs #:allow-other-keys)
8995 (let* ((out (assoc-ref outputs "out"))
8996 (bin (string-append out "/bin")))
883302da
RW
8997 (for-each (lambda (file)
8998 (install-file file bin))
8999 (find-files "bin" ".*")))
9000 #t)))
9001 #:configure-flags
9002 (list (string-append "--with-bam_tools_headers="
9003 (assoc-ref %build-inputs "bamtools") "/include/bamtools")
9004 (string-append "--with-bam_tools_library="
9005 (assoc-ref %build-inputs "bamtools") "/lib/bamtools"))))
9006 (inputs
9007 `(("bamtools" ,bamtools)
9008 ("samtools" ,samtools-0.1)
9009 ("gsl" ,gsl)
9010 ("smithlab-cpp"
9011 ,(let ((commit "3723e2db438c51501d0423429ff396c3035ba46a"))
9012 (origin
9013 (method git-fetch)
9014 (uri (git-reference
9015 (url "https://github.com/smithlabcode/smithlab_cpp.git")
9016 (commit commit)))
9017 (file-name (string-append "smithlab_cpp-" commit "-checkout"))
9018 (sha256
9019 (base32
9020 "0l4gvbwslw5ngziskja41c00x1r06l3yidv7y0xw9djibhykzy0g")))))))
9021 (native-inputs
9022 `(("python" ,python-2)))
9023 (home-page "https://github.com/smithlabcode/piranha")
9024 (synopsis "Peak-caller for CLIP-seq and RIP-seq data")
9025 (description
9026 "Piranha is a peak-caller for genomic data produced by CLIP-seq and
e62ffce5
RW
9027RIP-seq experiments. It takes input in BED or BAM format and identifies
9028regions of statistically significant read enrichment. Additional covariates
9029may optionally be provided to further inform the peak-calling process.")
883302da 9030 (license license:gpl3+))))
e62ffce5 9031
d1e32822
RW
9032(define-public pepr
9033 (package
9034 (name "pepr")
9035 (version "1.0.9")
9036 (source (origin
9037 (method url-fetch)
9038 (uri (string-append "https://pypi.python.org/packages/source/P"
9039 "/PePr/PePr-" version ".tar.gz"))
9040 (sha256
9041 (base32
9042 "0qxjfdpl1b1y53nccws2d85f6k74zwmx8y8sd9rszcqhfayx6gdx"))))
9043 (build-system python-build-system)
9044 (arguments
9045 `(#:python ,python-2 ; python2 only
b41a05ce 9046 #:tests? #f)) ; no tests included
d1e32822
RW
9047 (propagated-inputs
9048 `(("python2-numpy" ,python2-numpy)
9049 ("python2-scipy" ,python2-scipy)
9050 ("python2-pysam" ,python2-pysam)))
0c6c9c00 9051 (home-page "https://github.com/shawnzhangyx/PePr")
d1e32822
RW
9052 (synopsis "Peak-calling and prioritization pipeline for ChIP-Seq data")
9053 (description
9054 "PePr is a ChIP-Seq peak calling or differential binding analysis tool
9055that is primarily designed for data with biological replicates. It uses a
9056negative binomial distribution to model the read counts among the samples in
9057the same group, and look for consistent differences between ChIP and control
9058group or two ChIP groups run under different conditions.")
9059 (license license:gpl3+)))
6b49a37e
RJ
9060
9061(define-public filevercmp
9062 (let ((commit "1a9b779b93d0b244040274794d402106907b71b7"))
9063 (package
9064 (name "filevercmp")
9065 (version (string-append "0-1." (string-take commit 7)))
9066 (source (origin
9067 (method url-fetch)
9068 (uri (string-append "https://github.com/ekg/filevercmp/archive/"
9069 commit ".tar.gz"))
9070 (file-name (string-append name "-" version ".tar.gz"))
9071 (sha256
9072 (base32 "0yp5jswf5j2pqc6517x277s4s6h1ss99v57kxw9gy0jkfl3yh450"))))
9073 (build-system gnu-build-system)
9074 (arguments
9075 `(#:tests? #f ; There are no tests to run.
9076 #:phases
9077 (modify-phases %standard-phases
9078 (delete 'configure) ; There is no configure phase.
9079 (replace 'install
9080 (lambda* (#:key outputs #:allow-other-keys)
9081 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
9082 (install-file "filevercmp" bin)))))))
9083 (home-page "https://github.com/ekg/filevercmp")
9084 (synopsis "This program compares version strings")
9085 (description "This program compares version strings. It intends to be a
9086replacement for strverscmp.")
9087 (license license:gpl3+))))
5fb5dffb
RW
9088
9089(define-public multiqc
9090 (package
9091 (name "multiqc")
457fd268 9092 (version "1.5")
5fb5dffb
RW
9093 (source
9094 (origin
9095 (method url-fetch)
9096 (uri (pypi-uri "multiqc" version))
9097 (sha256
9098 (base32
457fd268 9099 "02iihfl0w0hpnr4pa0sbd1y9qxrg3ycyhjp5lidkcrqh1lmzs3zy"))))
5fb5dffb
RW
9100 (build-system python-build-system)
9101 (propagated-inputs
9e94d063 9102 `(("python-jinja2" ,python-jinja2)
5fb5dffb
RW
9103 ("python-simplejson" ,python-simplejson)
9104 ("python-pyyaml" ,python-pyyaml)
9105 ("python-click" ,python-click)
799247d2
RW
9106 ("python-spectra" ,python-spectra)
9107 ("python-requests" ,python-requests)
9108 ("python-markdown" ,python-markdown)
9109 ("python-lzstring" ,python-lzstring)
5fb5dffb 9110 ("python-matplotlib" ,python-matplotlib)
1778ea74
RW
9111 ("python-numpy" ,python-numpy)
9112 ;; MultQC checks for the presence of nose at runtime.
9113 ("python-nose" ,python-nose)))
4ca95811
TGR
9114 (arguments
9115 `(#:phases
9116 (modify-phases %standard-phases
9117 (add-after 'unpack 'relax-requirements
9118 (lambda _
9119 (substitute* "setup.py"
9120 ;; MultiQC 1.5 ‘requires’ a version of python-matplotlib older
9121 ;; than the one in Guix, but should work fine with 2.2.2.
9122 ;; See <https://github.com/ewels/MultiQC/issues/725> and
9123 ;; <https://github.com/ewels/MultiQC/issues/732> for details.
9124 (("['\"]matplotlib.*?['\"]")
9125 "'matplotlib'"))
9126 #t)))))
5fb5dffb
RW
9127 (home-page "http://multiqc.info")
9128 (synopsis "Aggregate bioinformatics analysis reports")
9129 (description
9130 "MultiQC is a tool to aggregate bioinformatics results across many
9131samples into a single report. It contains modules for a large number of
9132common bioinformatics tools.")
66865ab8 9133 (license license:gpl3+)))
6193563a
RW
9134
9135(define-public r-chipseq
9136 (package
9137 (name "r-chipseq")
8f94487d 9138 (version "1.30.0")
6193563a
RW
9139 (source
9140 (origin
9141 (method url-fetch)
9142 (uri (bioconductor-uri "chipseq" version))
9143 (sha256
9144 (base32
8f94487d 9145 "09f8dgl5ni75qkf7nvvppwr3irpplv4xb3ks59ld7l8i2mplcrx7"))))
6193563a
RW
9146 (build-system r-build-system)
9147 (propagated-inputs
9148 `(("r-biocgenerics" ,r-biocgenerics)
9149 ("r-genomicranges" ,r-genomicranges)
9150 ("r-iranges" ,r-iranges)
eeaa6967 9151 ("r-lattice" ,r-lattice)
6193563a
RW
9152 ("r-s4vectors" ,r-s4vectors)
9153 ("r-shortread" ,r-shortread)))
5713bbf1 9154 (home-page "https://bioconductor.org/packages/chipseq")
6193563a
RW
9155 (synopsis "Package for analyzing ChIPseq data")
9156 (description
9157 "This package provides tools for processing short read data from ChIPseq
9158experiments.")
9159 (license license:artistic2.0)))
d407bdb9
RW
9160
9161(define-public r-copyhelper
9162 (package
9163 (name "r-copyhelper")
9164 (version "1.6.0")
9165 (source
9166 (origin
9167 (method url-fetch)
5713bbf1 9168 (uri (string-append "https://bioconductor.org/packages/release/"
d407bdb9
RW
9169 "data/experiment/src/contrib/CopyhelpeR_"
9170 version ".tar.gz"))
9171 (sha256
9172 (base32
9173 "0x7cyynjmxls9as2gg0iyp9x5fpalxmdjq914ss7i84i9zyk5bhq"))))
9174 (properties `((upstream-name . "CopyhelpeR")))
9175 (build-system r-build-system)
5713bbf1 9176 (home-page "https://bioconductor.org/packages/CopyhelpeR/")
d407bdb9
RW
9177 (synopsis "Helper files for CopywriteR")
9178 (description
9179 "This package contains the helper files that are required to run the
9180Bioconductor package CopywriteR. It contains pre-assembled 1kb bin GC-content
9181and mappability files for the reference genomes hg18, hg19, hg38, mm9 and
9182mm10. In addition, it contains a blacklist filter to remove regions that
9183display copy number variation. Files are stored as GRanges objects from the
9184GenomicRanges Bioconductor package.")
9185 (license license:gpl2)))
3a3bf2f8
RW
9186
9187(define-public r-copywriter
9188 (package
9189 (name "r-copywriter")
aae4f716 9190 (version "2.12.0")
3a3bf2f8
RW
9191 (source
9192 (origin
9193 (method url-fetch)
9194 (uri (bioconductor-uri "CopywriteR" version))
9195 (sha256
9196 (base32
aae4f716 9197 "0b7xwq1va2zclb54f07c5ipmmq4iv2hrlph3j93jz5hmyayv50z3"))))
3a3bf2f8
RW
9198 (properties `((upstream-name . "CopywriteR")))
9199 (build-system r-build-system)
9200 (propagated-inputs
9201 `(("r-biocparallel" ,r-biocparallel)
9202 ("r-chipseq" ,r-chipseq)
9203 ("r-copyhelper" ,r-copyhelper)
9204 ("r-data-table" ,r-data-table)
9205 ("r-dnacopy" ,r-dnacopy)
9206 ("r-futile-logger" ,r-futile-logger)
9207 ("r-genomeinfodb" ,r-genomeinfodb)
9208 ("r-genomicalignments" ,r-genomicalignments)
9209 ("r-genomicranges" ,r-genomicranges)
9210 ("r-gtools" ,r-gtools)
9211 ("r-iranges" ,r-iranges)
9212 ("r-matrixstats" ,r-matrixstats)
9213 ("r-rsamtools" ,r-rsamtools)
9214 ("r-s4vectors" ,r-s4vectors)))
9215 (home-page "https://github.com/PeeperLab/CopywriteR")
9216 (synopsis "Copy number information from targeted sequencing")
9217 (description
9218 "CopywriteR extracts DNA copy number information from targeted sequencing
9219by utilizing off-target reads. It allows for extracting uniformly distributed
9220copy number information, can be used without reference, and can be applied to
9221sequencing data obtained from various techniques including chromatin
9222immunoprecipitation and target enrichment on small gene panels. Thereby,
9223CopywriteR constitutes a widely applicable alternative to available copy
9224number detection tools.")
9225 (license license:gpl2)))
dd42a330 9226
bf3fa996
RW
9227(define-public r-methylkit
9228 (package
9229 (name "r-methylkit")
d19e0690 9230 (version "1.6.1")
bf3fa996
RW
9231 (source (origin
9232 (method url-fetch)
9233 (uri (bioconductor-uri "methylKit" version))
9234 (sha256
9235 (base32
d19e0690 9236 "1hr2czi5ybdf7hdmqsv39d17f3mvmw94wa38bc14zzm9mgy9gfy7"))))
bf3fa996
RW
9237 (properties `((upstream-name . "methylKit")))
9238 (build-system r-build-system)
9239 (propagated-inputs
9240 `(("r-data-table" ,r-data-table)
9241 ("r-emdbook" ,r-emdbook)
9242 ("r-fastseg" ,r-fastseg)
9243 ("r-genomeinfodb" ,r-genomeinfodb)
9244 ("r-genomicranges" ,r-genomicranges)
9245 ("r-gtools" ,r-gtools)
9246 ("r-iranges" ,r-iranges)
9247 ("r-kernsmooth" ,r-kernsmooth)
9248 ("r-limma" ,r-limma)
9249 ("r-mclust" ,r-mclust)
9250 ("r-qvalue" ,r-qvalue)
9251 ("r-r-utils" ,r-r-utils)
9252 ("r-rcpp" ,r-rcpp)
9253 ("r-rhtslib" ,r-rhtslib)
9254 ("r-rsamtools" ,r-rsamtools)
9255 ("r-rtracklayer" ,r-rtracklayer)
9256 ("r-s4vectors" ,r-s4vectors)
9257 ("r-zlibbioc" ,r-zlibbioc)))
9258 (inputs
9259 `(("zlib" ,zlib)))
11ed4108 9260 (home-page "https://github.com/al2na/methylKit")
bf3fa996
RW
9261 (synopsis
9262 "DNA methylation analysis from high-throughput bisulfite sequencing results")
9263 (description
9264 "MethylKit is an R package for DNA methylation analysis and annotation
9265from high-throughput bisulfite sequencing. The package is designed to deal
9266with sequencing data from @dfn{Reduced representation bisulfite
9267sequencing} (RRBS) and its variants, but also target-capture methods and whole
9268genome bisulfite sequencing. It also has functions to analyze base-pair
9269resolution 5hmC data from experimental protocols such as oxBS-Seq and
9270TAB-Seq.")
9271 (license license:artistic2.0)))
9272
dd42a330
RJPB
9273(define-public r-sva
9274 (package
9275 (name "r-sva")
9547dd86 9276 (version "3.28.0")
dd42a330
RJPB
9277 (source
9278 (origin
9279 (method url-fetch)
9280 (uri (bioconductor-uri "sva" version))
9281 (sha256
9282 (base32
9547dd86 9283 "0a3jqbz0jp1jxrnjkqfpmca840yqcdwxprdl608bzzx2zb4jl52s"))))
dd42a330
RJPB
9284 (build-system r-build-system)
9285 (propagated-inputs
aeb64f3c 9286 `(("r-genefilter" ,r-genefilter)
bfa0d1e7
RJ
9287 ("r-mgcv" ,r-mgcv)
9288 ("r-biocparallel" ,r-biocparallel)
9289 ("r-matrixstats" ,r-matrixstats)
9290 ("r-limma" ,r-limma)))
5713bbf1 9291 (home-page "https://bioconductor.org/packages/sva")
dd42a330
RJPB
9292 (synopsis "Surrogate variable analysis")
9293 (description
9294 "This package contains functions for removing batch effects and other
9295unwanted variation in high-throughput experiment. It also contains functions
9296for identifying and building surrogate variables for high-dimensional data
9297sets. Surrogate variables are covariates constructed directly from
9298high-dimensional data like gene expression/RNA sequencing/methylation/brain
9299imaging data that can be used in subsequent analyses to adjust for unknown,
9300unmodeled, or latent sources of noise.")
9301 (license license:artistic2.0)))
fb1e528e
RW
9302
9303(define-public r-seqminer
9304 (package
9305 (name "r-seqminer")
48b21f8b 9306 (version "6.1")
fb1e528e
RW
9307 (source
9308 (origin
9309 (method url-fetch)
9310 (uri (cran-uri "seqminer" version))
9311 (sha256
9312 (base32
48b21f8b 9313 "15yhg4vfc7jg1jnqb3371j00pgbmbyc9l1xx63hq1l3p34lazq2l"))))
fb1e528e
RW
9314 (build-system r-build-system)
9315 (inputs
9316 `(("zlib" ,zlib)))
9317 (home-page "http://seqminer.genomic.codes")
9318 (synopsis "Read nucleotide sequence data (VCF, BCF, and METAL formats)")
9319 (description
9320 "This package provides tools to integrate nucleotide sequencing
9321data (variant call format, e.g. VCF or BCF) or meta-analysis results in R.")
9322 ;; Any version of the GPL is acceptable
9323 (license (list license:gpl2+ license:gpl3+))))
1f40e73c
RW
9324
9325(define-public r-raremetals2
9326 (package
9327 (name "r-raremetals2")
9328 (version "0.1")
9329 (source
9330 (origin
9331 (method url-fetch)
9332 (uri (string-append "http://genome.sph.umich.edu/w/images/"
9333 "b/b7/RareMETALS2_" version ".tar.gz"))
9334 (sha256
9335 (base32
9336 "0z5ljcgvnm06ja9lm85a3cniq7slxcy37aqqkxrdidr79an5fs4s"))))
9337 (properties `((upstream-name . "RareMETALS2")))
9338 (build-system r-build-system)
9339 (propagated-inputs
9340 `(("r-seqminer" ,r-seqminer)
9341 ("r-mvtnorm" ,r-mvtnorm)
aeb64f3c 9342 ("r-mass" ,r-mass)
1f40e73c
RW
9343 ("r-compquadform" ,r-compquadform)
9344 ("r-getopt" ,r-getopt)))
9345 (home-page "http://genome.sph.umich.edu/wiki/RareMETALS2")
9346 (synopsis "Analyze gene-level association tests for binary trait")
9347 (description
9348 "The R package rareMETALS2 is an extension of the R package rareMETALS.
9349It was designed to meta-analyze gene-level association tests for binary trait.
9350While rareMETALS offers a near-complete solution for meta-analysis of
9351gene-level tests for quantitative trait, it does not offer the optimal
9352solution for binary trait. The package rareMETALS2 offers improved features
9353for analyzing gene-level association tests in meta-analyses for binary
9354trait.")
9355 (license license:gpl3)))
0e7d058e
RW
9356
9357(define-public r-maldiquant
9358 (package
9359 (name "r-maldiquant")
4ee8c37c 9360 (version "1.18")
0e7d058e
RW
9361 (source
9362 (origin
9363 (method url-fetch)
9364 (uri (cran-uri "MALDIquant" version))
9365 (sha256
9366 (base32
4ee8c37c 9367 "18nl214xjsxkcpbg79jkmw0yznwm5szyh2qb84n7ip46mm779ha6"))))
0e7d058e
RW
9368 (properties `((upstream-name . "MALDIquant")))
9369 (build-system r-build-system)
e9960d8c 9370 (home-page "https://cran.r-project.org/web/packages/MALDIquant")
0e7d058e
RW
9371 (synopsis "Quantitative analysis of mass spectrometry data")
9372 (description
9373 "This package provides a complete analysis pipeline for matrix-assisted
9374laser desorption/ionization-time-of-flight (MALDI-TOF) and other
9375two-dimensional mass spectrometry data. In addition to commonly used plotting
9376and processing methods it includes distinctive features, namely baseline
9377subtraction methods such as morphological filters (TopHat) or the
9378statistics-sensitive non-linear iterative peak-clipping algorithm (SNIP), peak
9379alignment using warping functions, handling of replicated measurements as well
9380as allowing spectra with different resolutions.")
9381 (license license:gpl3+)))
ae262174
RW
9382
9383(define-public r-protgenerics
9384 (package
9385 (name "r-protgenerics")
a02b8208 9386 (version "1.12.0")
ae262174
RW
9387 (source
9388 (origin
9389 (method url-fetch)
9390 (uri (bioconductor-uri "ProtGenerics" version))
9391 (sha256
9392 (base32
a02b8208 9393 "05jbadw2aiwy8vi2ia0jxg06cmwhly2cq4dy1ag7kdxf1c5i9ccn"))))
ae262174
RW
9394 (properties `((upstream-name . "ProtGenerics")))
9395 (build-system r-build-system)
9396 (home-page "https://github.com/lgatto/ProtGenerics")
9397 (synopsis "S4 generic functions for proteomics infrastructure")
9398 (description
9399 "This package provides S4 generic functions needed by Bioconductor
9400proteomics packages.")
9401 (license license:artistic2.0)))
71676a1f
RW
9402
9403(define-public r-mzr
9404 (package
9405 (name "r-mzr")
39cc4917 9406 (version "2.14.0")
71676a1f
RW
9407 (source
9408 (origin
9409 (method url-fetch)
9410 (uri (bioconductor-uri "mzR" version))
9411 (sha256
9412 (base32
39cc4917 9413 "190m2bq5bqxhljaj4f7vz9wj5h5laaxd8zp5jampnql6xc4zmarg"))
15ca1959
RW
9414 (modules '((guix build utils)))
9415 (snippet
9416 '(begin
9417 (delete-file-recursively "src/boost")
9418 #t))))
71676a1f
RW
9419 (properties `((upstream-name . "mzR")))
9420 (build-system r-build-system)
15ca1959
RW
9421 (arguments
9422 `(#:phases
9423 (modify-phases %standard-phases
9424 (add-after 'unpack 'use-system-boost
9425 (lambda _
9426 (substitute* "src/Makevars"
9427 (("\\./boost/libs.*") "")
9428 (("ARCH_OBJS=" line)
9429 (string-append line
9430 "\nARCH_LIBS=-lboost_system -lboost_regex \
9431-lboost_iostreams -lboost_thread -lboost_filesystem -lboost_chrono\n")))
9432 #t)))))
71676a1f 9433 (inputs
34b96754 9434 `(("boost" ,boost) ; use this instead of the bundled boost sources
39cc4917
RW
9435 ("netcdf" ,netcdf)
9436 ("zlib" ,zlib)))
71676a1f
RW
9437 (propagated-inputs
9438 `(("r-biobase" ,r-biobase)
9439 ("r-biocgenerics" ,r-biocgenerics)
9440 ("r-protgenerics" ,r-protgenerics)
9441 ("r-rcpp" ,r-rcpp)
39cc4917 9442 ("r-rhdf5lib" ,r-rhdf5lib)
71676a1f
RW
9443 ("r-zlibbioc" ,r-zlibbioc)))
9444 (home-page "https://github.com/sneumann/mzR/")
9445 (synopsis "Parser for mass spectrometry data files")
9446 (description
9447 "The mzR package provides a unified API to the common file formats and
9448parsers available for mass spectrometry data. It comes with a wrapper for the
9449ISB random access parser for mass spectrometry mzXML, mzData and mzML files.
9450The package contains the original code written by the ISB, and a subset of the
9451proteowizard library for mzML and mzIdentML. The netCDF reading code has
9452previously been used in XCMS.")
9453 (license license:artistic2.0)))
5c9d1505
RW
9454
9455(define-public r-affyio
9456 (package
9457 (name "r-affyio")
82589f74 9458 (version "1.50.0")
5c9d1505
RW
9459 (source
9460 (origin
9461 (method url-fetch)
9462 (uri (bioconductor-uri "affyio" version))
9463 (sha256
9464 (base32
82589f74 9465 "0sh5wnnchyfpq5n6rchbqmb27byn7kdzn5rgran6i39c81i5z22n"))))
5c9d1505
RW
9466 (build-system r-build-system)
9467 (propagated-inputs
9468 `(("r-zlibbioc" ,r-zlibbioc)))
9469 (inputs
9470 `(("zlib" ,zlib)))
9471 (home-page "https://github.com/bmbolstad/affyio")
9472 (synopsis "Tools for parsing Affymetrix data files")
9473 (description
9474 "This package provides routines for parsing Affymetrix data files based
9475upon file format information. The primary focus is on accessing the CEL and
9476CDF file formats.")
9477 (license license:lgpl2.0+)))
05c7e5fb
RW
9478
9479(define-public r-affy
9480 (package
9481 (name "r-affy")
5837f183 9482 (version "1.58.0")
05c7e5fb
RW
9483 (source
9484 (origin
9485 (method url-fetch)
9486 (uri (bioconductor-uri "affy" version))
9487 (sha256
9488 (base32
5837f183 9489 "0sxq875sigm21sf3qncrfrwfdz9nqw1vldxn3d3hj6aq64jg1ki6"))))
05c7e5fb
RW
9490 (build-system r-build-system)
9491 (propagated-inputs
9492 `(("r-affyio" ,r-affyio)
9493 ("r-biobase" ,r-biobase)
9494 ("r-biocgenerics" ,r-biocgenerics)
9495 ("r-biocinstaller" ,r-biocinstaller)
9496 ("r-preprocesscore" ,r-preprocesscore)
9497 ("r-zlibbioc" ,r-zlibbioc)))
5837f183
RW
9498 (inputs
9499 `(("zlib" ,zlib)))
5713bbf1 9500 (home-page "https://bioconductor.org/packages/affy")
05c7e5fb
RW
9501 (synopsis "Methods for affymetrix oligonucleotide arrays")
9502 (description
9503 "This package contains functions for exploratory oligonucleotide array
9504analysis.")
9505 (license license:lgpl2.0+)))
7c08afaf
RW
9506
9507(define-public r-vsn
9508 (package
9509 (name "r-vsn")
af871062 9510 (version "3.48.1")
7c08afaf
RW
9511 (source
9512 (origin
9513 (method url-fetch)
9514 (uri (bioconductor-uri "vsn" version))
9515 (sha256
9516 (base32
af871062 9517 "0k6mah3g3zqbfap31xmvig4fn452a18xwwa5y0mfj5mj8588p57h"))))
7c08afaf
RW
9518 (build-system r-build-system)
9519 (propagated-inputs
9520 `(("r-affy" ,r-affy)
9521 ("r-biobase" ,r-biobase)
9522 ("r-ggplot2" ,r-ggplot2)
aeb64f3c 9523 ("r-lattice" ,r-lattice)
7c08afaf 9524 ("r-limma" ,r-limma)))
5713bbf1 9525 (home-page "https://bioconductor.org/packages/release/bioc/html/vsn.html")
7c08afaf
RW
9526 (synopsis "Variance stabilization and calibration for microarray data")
9527 (description
9528 "The package implements a method for normalising microarray intensities,
9529and works for single- and multiple-color arrays. It can also be used for data
9530from other technologies, as long as they have similar format. The method uses
9531a robust variant of the maximum-likelihood estimator for an
9532additive-multiplicative error model and affine calibration. The model
9533incorporates data calibration step (a.k.a. normalization), a model for the
9534dependence of the variance on the mean intensity and a variance stabilizing
9535data transformation. Differences between transformed intensities are
9536analogous to \"normalized log-ratios\". However, in contrast to the latter,
9537their variance is independent of the mean, and they are usually more sensitive
9538and specific in detecting differential transcription.")
9539 (license license:artistic2.0)))
4aa7d592
RW
9540
9541(define-public r-mzid
9542 (package
9543 (name "r-mzid")
7bd25829 9544 (version "1.18.0")
4aa7d592
RW
9545 (source
9546 (origin
9547 (method url-fetch)
9548 (uri (bioconductor-uri "mzID" version))
9549 (sha256
9550 (base32
7bd25829 9551 "060k0xlhg8q802c6zsb4b8ps0ccd9ybyaz0gnsvqkxb786i2vk40"))))
4aa7d592
RW
9552 (properties `((upstream-name . "mzID")))
9553 (build-system r-build-system)
9554 (propagated-inputs
9555 `(("r-doparallel" ,r-doparallel)
9556 ("r-foreach" ,r-foreach)
9557 ("r-iterators" ,r-iterators)
9558 ("r-plyr" ,r-plyr)
9559 ("r-protgenerics" ,r-protgenerics)
9560 ("r-rcpp" ,r-rcpp)
9561 ("r-xml" ,r-xml)))
5713bbf1 9562 (home-page "https://bioconductor.org/packages/mzID")
4aa7d592
RW
9563 (synopsis "Parser for mzIdentML files")
9564 (description
9565 "This package provides a parser for mzIdentML files implemented using the
9566XML package. The parser tries to be general and able to handle all types of
9567mzIdentML files with the drawback of having less pretty output than a vendor
9568specific parser.")
9569 (license license:gpl2+)))
6a67e181
RW
9570
9571(define-public r-pcamethods
9572 (package
9573 (name "r-pcamethods")
0f773b90 9574 (version "1.72.0")
6a67e181
RW
9575 (source
9576 (origin
9577 (method url-fetch)
9578 (uri (bioconductor-uri "pcaMethods" version))
9579 (sha256
9580 (base32
0f773b90 9581 "0v99yf8m7ryh6z0r3z0ggpqfnflcq5bn1q1i1cl9b7q4p6b4sa07"))))
6a67e181
RW
9582 (properties `((upstream-name . "pcaMethods")))
9583 (build-system r-build-system)
9584 (propagated-inputs
9585 `(("r-biobase" ,r-biobase)
9586 ("r-biocgenerics" ,r-biocgenerics)
aeb64f3c 9587 ("r-mass" ,r-mass)
6a67e181
RW
9588 ("r-rcpp" ,r-rcpp)))
9589 (home-page "https://github.com/hredestig/pcamethods")
9590 (synopsis "Collection of PCA methods")
9591 (description
9592 "This package provides Bayesian PCA, Probabilistic PCA, Nipals PCA,
9593Inverse Non-Linear PCA and the conventional SVD PCA. A cluster based method
9594for missing value estimation is included for comparison. BPCA, PPCA and
9595NipalsPCA may be used to perform PCA on incomplete data as well as for
9596accurate missing value estimation. A set of methods for printing and plotting
9597the results is also provided. All PCA methods make use of the same data
9598structure (pcaRes) to provide a common interface to the PCA results.")
9599 (license license:gpl3+)))
11879284
RW
9600
9601(define-public r-msnbase
9602 (package
9603 (name "r-msnbase")
07bd90ce 9604 (version "2.6.3")
11879284
RW
9605 (source
9606 (origin
9607 (method url-fetch)
9608 (uri (bioconductor-uri "MSnbase" version))
9609 (sha256
9610 (base32
07bd90ce 9611 "15jhqg02ypmznc0wxksw56yij02csy678vqy531fdv86fsmypwa0"))))
11879284
RW
9612 (properties `((upstream-name . "MSnbase")))
9613 (build-system r-build-system)
9614 (propagated-inputs
9615 `(("r-affy" ,r-affy)
9616 ("r-biobase" ,r-biobase)
9617 ("r-biocgenerics" ,r-biocgenerics)
9618 ("r-biocparallel" ,r-biocparallel)
9619 ("r-digest" ,r-digest)
9620 ("r-ggplot2" ,r-ggplot2)
9621 ("r-impute" ,r-impute)
9622 ("r-iranges" ,r-iranges)
baa00ee9 9623 ("r-lattice" ,r-lattice)
11879284 9624 ("r-maldiquant" ,r-maldiquant)
250d78ea 9625 ("r-mass" ,r-mass)
11879284
RW
9626 ("r-mzid" ,r-mzid)
9627 ("r-mzr" ,r-mzr)
9628 ("r-pcamethods" ,r-pcamethods)
9629 ("r-plyr" ,r-plyr)
9630 ("r-preprocesscore" ,r-preprocesscore)
9631 ("r-protgenerics" ,r-protgenerics)
9632 ("r-rcpp" ,r-rcpp)
11879284 9633 ("r-s4vectors" ,r-s4vectors)
250d78ea 9634 ("r-scales" ,r-scales)
11879284
RW
9635 ("r-vsn" ,r-vsn)
9636 ("r-xml" ,r-xml)))
9637 (home-page "https://github.com/lgatto/MSnbase")
9638 (synopsis "Base functions and classes for MS-based proteomics")
9639 (description
9640 "This package provides basic plotting, data manipulation and processing
e614d639 9641of mass spectrometry based proteomics data.")
11879284 9642 (license license:artistic2.0)))
66da3a9f
RW
9643
9644(define-public r-msnid
9645 (package
9646 (name "r-msnid")
bdc8b40a 9647 (version "1.14.0")
66da3a9f
RW
9648 (source
9649 (origin
9650 (method url-fetch)
9651 (uri (bioconductor-uri "MSnID" version))
9652 (sha256
9653 (base32
bdc8b40a 9654 "172q5chi44104iz4y0g42wrimfp7hlhrfa8vzybx6m0ccrkkhl17"))))
66da3a9f
RW
9655 (properties `((upstream-name . "MSnID")))
9656 (build-system r-build-system)
9657 (propagated-inputs
9658 `(("r-biobase" ,r-biobase)
9659 ("r-data-table" ,r-data-table)
9660 ("r-doparallel" ,r-doparallel)
9661 ("r-dplyr" ,r-dplyr)
9662 ("r-foreach" ,r-foreach)
9663 ("r-iterators" ,r-iterators)
9664 ("r-msnbase" ,r-msnbase)
9665 ("r-mzid" ,r-mzid)
9666 ("r-mzr" ,r-mzr)
9667 ("r-protgenerics" ,r-protgenerics)
9668 ("r-r-cache" ,r-r-cache)
9669 ("r-rcpp" ,r-rcpp)
9670 ("r-reshape2" ,r-reshape2)))
5713bbf1 9671 (home-page "https://bioconductor.org/packages/MSnID")
66da3a9f
RW
9672 (synopsis "Utilities for LC-MSn proteomics identifications")
9673 (description
2923f3e5
RW
9674 "This package extracts @dfn{tandem mass spectrometry} (MS/MS) ID data
9675from mzIdentML (leveraging the mzID package) or text files. After collating
9676the search results from multiple datasets it assesses their identification
9677quality and optimize filtering criteria to achieve the maximum number of
9678identifications while not exceeding a specified false discovery rate. It also
9679contains a number of utilities to explore the MS/MS results and assess missed
9680and irregular enzymatic cleavages, mass measurement accuracy, etc.")
66da3a9f 9681 (license license:artistic2.0)))
5ded35d8 9682
2441c284 9683(define-public r-seurat
f1a2b1cb
RW
9684 (package
9685 (name "r-seurat")
6f6aef80 9686 (version "2.3.4")
f1a2b1cb
RW
9687 (source (origin
9688 (method url-fetch)
9689 (uri (cran-uri "Seurat" version))
9690 (sha256
9691 (base32
6f6aef80 9692 "0l8bv4i9nzz26mirnva10mq6pimibj24vk7vpvfypgn7xk4942hd"))))
f1a2b1cb
RW
9693 (properties `((upstream-name . "Seurat")))
9694 (build-system r-build-system)
f1a2b1cb
RW
9695 (propagated-inputs
9696 `(("r-ape" ,r-ape)
c36ce476 9697 ("r-cluster" ,r-cluster)
f1a2b1cb 9698 ("r-cowplot" ,r-cowplot)
c36ce476 9699 ("r-dosnow" ,r-dosnow)
f1a2b1cb
RW
9700 ("r-dplyr" ,r-dplyr)
9701 ("r-dtw" ,r-dtw)
c36ce476 9702 ("r-fitdistrplus" ,r-fitdistrplus)
c36ce476 9703 ("r-foreach" ,r-foreach)
f1a2b1cb 9704 ("r-fpc" ,r-fpc)
f1a2b1cb 9705 ("r-ggplot2" ,r-ggplot2)
26e3e28c 9706 ("r-ggridges" ,r-ggridges)
f1a2b1cb 9707 ("r-gplots" ,r-gplots)
13b8dcf5 9708 ("r-hdf5r" ,r-hdf5r)
f1a2b1cb 9709 ("r-hmisc" ,r-hmisc)
6f6aef80 9710 ("r-httr" ,r-httr)
f1a2b1cb
RW
9711 ("r-ica" ,r-ica)
9712 ("r-igraph" ,r-igraph)
9713 ("r-irlba" ,r-irlba)
9714 ("r-lars" ,r-lars)
c36ce476 9715 ("r-lmtest" ,r-lmtest)
f1a2b1cb
RW
9716 ("r-mass" ,r-mass)
9717 ("r-matrix" ,r-matrix)
ebec98ae 9718 ("r-metap" ,r-metap)
f1a2b1cb 9719 ("r-mixtools" ,r-mixtools)
f1a2b1cb
RW
9720 ("r-pbapply" ,r-pbapply)
9721 ("r-plotly" ,r-plotly)
c36ce476 9722 ("r-png" ,r-png)
c36ce476 9723 ("r-rann" ,r-rann)
f1a2b1cb
RW
9724 ("r-rcolorbrewer" ,r-rcolorbrewer)
9725 ("r-rcpp" ,r-rcpp)
26e3e28c 9726 ("r-rcppeigen" ,r-rcppeigen)
f1a2b1cb
RW
9727 ("r-rcppprogress" ,r-rcppprogress)
9728 ("r-reshape2" ,r-reshape2)
ce8b9e3b 9729 ("r-reticulate" ,r-reticulate)
f1a2b1cb
RW
9730 ("r-rocr" ,r-rocr)
9731 ("r-rtsne" ,r-rtsne)
9732 ("r-sdmtools" ,r-sdmtools)
f1a2b1cb 9733 ("r-tidyr" ,r-tidyr)
6f6aef80 9734 ("r-tsne" ,r-tsne)))
f1a2b1cb
RW
9735 (home-page "http://www.satijalab.org/seurat")
9736 (synopsis "Seurat is an R toolkit for single cell genomics")
9737 (description
9738 "This package is an R package designed for QC, analysis, and
2441c284
RJPB
9739exploration of single cell RNA-seq data. It easily enables widely-used
9740analytical techniques, including the identification of highly variable genes,
9741dimensionality reduction; PCA, ICA, t-SNE, standard unsupervised clustering
9742algorithms; density clustering, hierarchical clustering, k-means, and the
9743discovery of differentially expressed genes and markers.")
f1a2b1cb 9744 (license license:gpl3)))
2441c284 9745
d4b83651
RW
9746(define-public r-aroma-light
9747 (package
9748 (name "r-aroma-light")
0ebd7a4c 9749 (version "3.10.0")
d4b83651
RW
9750 (source
9751 (origin
9752 (method url-fetch)
9753 (uri (bioconductor-uri "aroma.light" version))
9754 (sha256
9755 (base32
0ebd7a4c 9756 "1dxsiwsrwcq9mj573f9vpdzrhagdqzal328ma8076px4gg6khxkn"))))
d4b83651
RW
9757 (properties `((upstream-name . "aroma.light")))
9758 (build-system r-build-system)
9759 (propagated-inputs
9760 `(("r-matrixstats" ,r-matrixstats)
9761 ("r-r-methodss3" ,r-r-methodss3)
9762 ("r-r-oo" ,r-r-oo)
9763 ("r-r-utils" ,r-r-utils)))
9764 (home-page "https://github.com/HenrikBengtsson/aroma.light")
9765 (synopsis "Methods for normalization and visualization of microarray data")
9766 (description
9767 "This package provides methods for microarray analysis that take basic
9768data types such as matrices and lists of vectors. These methods can be used
9769standalone, be utilized in other packages, or be wrapped up in higher-level
9770classes.")
9771 (license license:gpl2+)))
9772
7b465327
RW
9773(define-public r-deseq
9774 (package
9775 (name "r-deseq")
749b7126 9776 (version "1.32.0")
7b465327
RW
9777 (source
9778 (origin
9779 (method url-fetch)
9780 (uri (bioconductor-uri "DESeq" version))
9781 (sha256
9782 (base32
749b7126 9783 "0ykxw8ksif026xy25wx50j2sdsrp156aqkmhcgfjkpgcw699glnm"))))
7b465327
RW
9784 (properties `((upstream-name . "DESeq")))
9785 (build-system r-build-system)
9786 (propagated-inputs
9787 `(("r-biobase" ,r-biobase)
9788 ("r-biocgenerics" ,r-biocgenerics)
9789 ("r-genefilter" ,r-genefilter)
9790 ("r-geneplotter" ,r-geneplotter)
9791 ("r-lattice" ,r-lattice)
9792 ("r-locfit" ,r-locfit)
9793 ("r-mass" ,r-mass)
9794 ("r-rcolorbrewer" ,r-rcolorbrewer)))
9795 (home-page "http://www-huber.embl.de/users/anders/DESeq")
9796 (synopsis "Differential gene expression analysis")
9797 (description
9798 "This package provides tools for estimating variance-mean dependence in
9799count data from high-throughput genetic sequencing assays and for testing for
9800differential expression based on a model using the negative binomial
9801distribution.")
9802 (license license:gpl3+)))
9803
296b3c63
RW
9804(define-public r-edaseq
9805 (package
9806 (name "r-edaseq")
3c553046 9807 (version "2.14.1")
296b3c63
RW
9808 (source
9809 (origin
9810 (method url-fetch)
9811 (uri (bioconductor-uri "EDASeq" version))
9812 (sha256
9813 (base32
3c553046 9814 "0970w9d5ddqw1qxqqafdidkxh6hmcv9j5djwgnpz3fgl05kmysg8"))))
296b3c63
RW
9815 (properties `((upstream-name . "EDASeq")))
9816 (build-system r-build-system)
9817 (propagated-inputs
9818 `(("r-annotationdbi" ,r-annotationdbi)
9819 ("r-aroma-light" ,r-aroma-light)
9820 ("r-biobase" ,r-biobase)
9821 ("r-biocgenerics" ,r-biocgenerics)
9822 ("r-biomart" ,r-biomart)
9823 ("r-biostrings" ,r-biostrings)
9824 ("r-deseq" ,r-deseq)
9825 ("r-genomicfeatures" ,r-genomicfeatures)
9826 ("r-genomicranges" ,r-genomicranges)
9827 ("r-iranges" ,r-iranges)
9828 ("r-rsamtools" ,r-rsamtools)
9829 ("r-shortread" ,r-shortread)))
9830 (home-page "https://github.com/drisso/EDASeq")
9831 (synopsis "Exploratory data analysis and normalization for RNA-Seq")
9832 (description
9833 "This package provides support for numerical and graphical summaries of
9834RNA-Seq genomic read data. Provided within-lane normalization procedures to
9835adjust for GC-content effect (or other gene-level effects) on read counts:
9836loess robust local regression, global-scaling, and full-quantile
9837normalization. Between-lane normalization procedures to adjust for
9838distributional differences between lanes (e.g., sequencing depth):
9839global-scaling and full-quantile normalization.")
9840 (license license:artistic2.0)))
9841
5f96f30f
RW
9842(define-public r-interactivedisplaybase
9843 (package
9844 (name "r-interactivedisplaybase")
420415b0 9845 (version "1.18.0")
5f96f30f
RW
9846 (source
9847 (origin
9848 (method url-fetch)
9849 (uri (bioconductor-uri "interactiveDisplayBase" version))
9850 (sha256
9851 (base32
420415b0 9852 "05w58z3i9vkma4jd6rhjaxls4yiq4kwrppgcdq9xrr1pxp99k575"))))
5f96f30f
RW
9853 (properties
9854 `((upstream-name . "interactiveDisplayBase")))
9855 (build-system r-build-system)
9856 (propagated-inputs
9857 `(("r-biocgenerics" ,r-biocgenerics)
9858 ("r-shiny" ,r-shiny)))
5713bbf1 9859 (home-page "https://bioconductor.org/packages/interactiveDisplayBase")
5f96f30f
RW
9860 (synopsis "Base package for web displays of Bioconductor objects")
9861 (description
9862 "This package contains the basic methods needed to generate interactive
9863Shiny-based display methods for Bioconductor objects.")
9864 (license license:artistic2.0)))
9865
4a007ffa
RW
9866(define-public r-annotationhub
9867 (package
9868 (name "r-annotationhub")
ab50580a 9869 (version "2.12.1")
4a007ffa
RW
9870 (source
9871 (origin
9872 (method url-fetch)
9873 (uri (bioconductor-uri "AnnotationHub" version))
9874 (sha256
9875 (base32
ab50580a 9876 "02ls279k1qlch147vw8kwvlhcqyzvi495bgv110m0xnnbpgbln6g"))))
4a007ffa
RW
9877 (properties `((upstream-name . "AnnotationHub")))
9878 (build-system r-build-system)
9879 (propagated-inputs
9880 `(("r-annotationdbi" ,r-annotationdbi)
9881 ("r-biocgenerics" ,r-biocgenerics)
9882 ("r-biocinstaller" ,r-biocinstaller)
14b3c719 9883 ("r-curl" ,r-curl)
4a007ffa
RW
9884 ("r-httr" ,r-httr)
9885 ("r-interactivedisplaybase" ,r-interactivedisplaybase)
9886 ("r-rsqlite" ,r-rsqlite)
9887 ("r-s4vectors" ,r-s4vectors)
9888 ("r-yaml" ,r-yaml)))
5713bbf1 9889 (home-page "https://bioconductor.org/packages/AnnotationHub")
4a007ffa
RW
9890 (synopsis "Client to access AnnotationHub resources")
9891 (description
9892 "This package provides a client for the Bioconductor AnnotationHub web
9893resource. The AnnotationHub web resource provides a central location where
9894genomic files (e.g. VCF, bed, wig) and other resources from standard
9895locations (e.g. UCSC, Ensembl) can be discovered. The resource includes
9896metadata about each resource, e.g., a textual description, tags, and date of
9897modification. The client creates and manages a local cache of files retrieved
9898by the user, helping with quick and reproducible access.")
9899 (license license:artistic2.0)))
9900
d4a1e918
RW
9901(define-public r-fastseg
9902 (package
9903 (name "r-fastseg")
15cc1a68 9904 (version "1.26.0")
d4a1e918
RW
9905 (source
9906 (origin
9907 (method url-fetch)
9908 (uri (bioconductor-uri "fastseg" version))
9909 (sha256
9910 (base32
15cc1a68 9911 "1yw6hai6hb8qy7akdm4frfp6h4zy93zb68kdj094sanm7kgqmgik"))))
d4a1e918
RW
9912 (build-system r-build-system)
9913 (propagated-inputs
9914 `(("r-biobase" ,r-biobase)
9915 ("r-biocgenerics" ,r-biocgenerics)
9916 ("r-genomicranges" ,r-genomicranges)
9917 ("r-iranges" ,r-iranges)
9918 ("r-s4vectors" ,r-s4vectors)))
9919 (home-page "http://www.bioinf.jku.at/software/fastseg/index.html")
9920 (synopsis "Fast segmentation algorithm for genetic sequencing data")
9921 (description
9922 "Fastseg implements a very fast and efficient segmentation algorithm.
9923It can segment data from DNA microarrays and data from next generation
9924sequencing for example to detect copy number segments. Further it can segment
9925data from RNA microarrays like tiling arrays to identify transcripts. Most
9926generally, it can segment data given as a matrix or as a vector. Various data
9927formats can be used as input to fastseg like expression set objects for
9928microarrays or GRanges for sequencing data.")
9929 (license license:lgpl2.0+)))
9930
3121814e
RW
9931(define-public r-keggrest
9932 (package
9933 (name "r-keggrest")
ff7c7d62 9934 (version "1.20.1")
3121814e
RW
9935 (source
9936 (origin
9937 (method url-fetch)
9938 (uri (bioconductor-uri "KEGGREST" version))
9939 (sha256
9940 (base32
ff7c7d62 9941 "1ss0xd5570x570v01r6lp64rr1apjrzp0j62520pvm3g8knjhfvs"))))
3121814e
RW
9942 (properties `((upstream-name . "KEGGREST")))
9943 (build-system r-build-system)
9944 (propagated-inputs
9945 `(("r-biostrings" ,r-biostrings)
9946 ("r-httr" ,r-httr)
9947 ("r-png" ,r-png)))
9948 (home-page "https://bioconductor.org/packages/KEGGREST")
9949 (synopsis "Client-side REST access to KEGG")
9950 (description
9951 "This package provides a package that provides a client interface to the
9952@dfn{Kyoto Encyclopedia of Genes and Genomes} (KEGG) REST server.")
9953 (license license:artistic2.0)))
9954
e160aa1f
RW
9955(define-public r-gage
9956 (package
9957 (name "r-gage")
1ba126e0 9958 (version "2.30.0")
e160aa1f
RW
9959 (source
9960 (origin
9961 (method url-fetch)
9962 (uri (bioconductor-uri "gage" version))
9963 (sha256
9964 (base32
1ba126e0 9965 "0j3cqxy97lpf146wkmdfaq9680gicmzxvhp6w5pxq3j7ipiy7262"))))
e160aa1f
RW
9966 (build-system r-build-system)
9967 (propagated-inputs
9968 `(("r-annotationdbi" ,r-annotationdbi)
9969 ("r-graph" ,r-graph)
9970 ("r-keggrest" ,r-keggrest)))
9971 (home-page "http://www.biomedcentral.com/1471-2105/10/161")
9972 (synopsis "Generally applicable gene-set enrichment for pathway analysis")
9973 (description
9974 "GAGE is a published method for gene set (enrichment or GSEA) or pathway
9975analysis. GAGE is generally applicable independent of microarray or RNA-Seq
9976data attributes including sample sizes, experimental designs, assay platforms,
9977and other types of heterogeneity. The gage package provides functions for
9978basic GAGE analysis, result processing and presentation. In addition, it
9979provides demo microarray data and commonly used gene set data based on KEGG
9980pathways and GO terms. These funtions and data are also useful for gene set
9981analysis using other methods.")
9982 (license license:gpl2+)))
9983
902fb15d
RW
9984(define-public r-genomicfiles
9985 (package
9986 (name "r-genomicfiles")
d1463f1b 9987 (version "1.16.0")
902fb15d
RW
9988 (source
9989 (origin
9990 (method url-fetch)
9991 (uri (bioconductor-uri "GenomicFiles" version))
9992 (sha256
9993 (base32
d1463f1b 9994 "0bhsq5czigrjyl9gkb2kpkpl367b3ac5g8s280adkcxggn9g7sxq"))))
902fb15d
RW
9995 (properties `((upstream-name . "GenomicFiles")))
9996 (build-system r-build-system)
9997 (propagated-inputs
9998 `(("r-biocgenerics" ,r-biocgenerics)
9999 ("r-biocparallel" ,r-biocparallel)
10000 ("r-genomeinfodb" ,r-genomeinfodb)
10001 ("r-genomicalignments" ,r-genomicalignments)
10002 ("r-genomicranges" ,r-genomicranges)
10003 ("r-iranges" ,r-iranges)
10004 ("r-rsamtools" ,r-rsamtools)
10005 ("r-rtracklayer" ,r-rtracklayer)
10006 ("r-s4vectors" ,r-s4vectors)
10007 ("r-summarizedexperiment" ,r-summarizedexperiment)
10008 ("r-variantannotation" ,r-variantannotation)))
10009 (home-page "https://bioconductor.org/packages/GenomicFiles")
10010 (synopsis "Distributed computing by file or by range")
10011 (description
10012 "This package provides infrastructure for parallel computations
10013distributed by file or by range. User defined mapper and reducer functions
10014provide added flexibility for data combination and manipulation.")
10015 (license license:artistic2.0)))
10016
3af149f5
RW
10017(define-public r-complexheatmap
10018 (package
10019 (name "r-complexheatmap")
1f73fd01 10020 (version "1.18.1")
3af149f5
RW
10021 (source
10022 (origin
10023 (method url-fetch)
10024 (uri (bioconductor-uri "ComplexHeatmap" version))
10025 (sha256
10026 (base32
1f73fd01 10027 "0qjwz1hzpjnc90jiinjkikfnr0shi72q3zfdjjz7pxydy0mglq8n"))))
3af149f5
RW
10028 (properties
10029 `((upstream-name . "ComplexHeatmap")))
10030 (build-system r-build-system)
10031 (propagated-inputs
10032 `(("r-circlize" ,r-circlize)
10033 ("r-colorspace" ,r-colorspace)
10034 ("r-getoptlong" ,r-getoptlong)
10035 ("r-globaloptions" ,r-globaloptions)
10036 ("r-rcolorbrewer" ,r-rcolorbrewer)))
10037 (home-page
10038 "https://github.com/jokergoo/ComplexHeatmap")
10039 (synopsis "Making Complex Heatmaps")
10040 (description
10041 "Complex heatmaps are efficient to visualize associations between
10042different sources of data sets and reveal potential structures. This package
10043provides a highly flexible way to arrange multiple heatmaps and supports
10044self-defined annotation graphics.")
10045 (license license:gpl2+)))
10046
04e2a2e9
RW
10047(define-public r-dirichletmultinomial
10048 (package
10049 (name "r-dirichletmultinomial")
19adb898 10050 (version "1.22.0")
04e2a2e9
RW
10051 (source
10052 (origin
10053 (method url-fetch)
10054 (uri (bioconductor-uri "DirichletMultinomial" version))
10055 (sha256
10056 (base32
19adb898 10057 "0vcyp81b90in4ls5nbadc66cw2g9aydr94aqifq5j4b7diq74yfs"))))
04e2a2e9
RW
10058 (properties
10059 `((upstream-name . "DirichletMultinomial")))
10060 (build-system r-build-system)
10061 (inputs
10062 `(("gsl" ,gsl)))
10063 (propagated-inputs
10064 `(("r-biocgenerics" ,r-biocgenerics)
10065 ("r-iranges" ,r-iranges)
10066 ("r-s4vectors" ,r-s4vectors)))
10067 (home-page "https://bioconductor.org/packages/DirichletMultinomial")
10068 (synopsis "Dirichlet-Multinomial mixture models for microbiome data")
10069 (description
10070 "Dirichlet-multinomial mixture models can be used to describe variability
10071in microbial metagenomic data. This package is an interface to code
10072originally made available by Holmes, Harris, and Quince, 2012, PLoS ONE 7(2):
100731-15.")
10074 (license license:lgpl3)))
10075
aa30ab5a
RW
10076(define-public r-ensembldb
10077 (package
10078 (name "r-ensembldb")
bbcd19fb 10079 (version "2.4.1")
aa30ab5a
RW
10080 (source
10081 (origin
10082 (method url-fetch)
10083 (uri (bioconductor-uri "ensembldb" version))
10084 (sha256
10085 (base32
bbcd19fb 10086 "1l2b4cxiycv05mz4z4f3dhx57r9ksha02psc114h30ldm5rxz8w6"))))
aa30ab5a
RW
10087 (build-system r-build-system)
10088 (propagated-inputs
10089 `(("r-annotationdbi" ,r-annotationdbi)
10090 ("r-annotationfilter" ,r-annotationfilter)
aa30ab5a
RW
10091 ("r-biobase" ,r-biobase)
10092 ("r-biocgenerics" ,r-biocgenerics)
10093 ("r-biostrings" ,r-biostrings)
10094 ("r-curl" ,r-curl)
10095 ("r-dbi" ,r-dbi)
10096 ("r-genomeinfodb" ,r-genomeinfodb)
10097 ("r-genomicfeatures" ,r-genomicfeatures)
10098 ("r-genomicranges" ,r-genomicranges)
10099 ("r-iranges" ,r-iranges)
10100 ("r-protgenerics" ,r-protgenerics)
10101 ("r-rsamtools" ,r-rsamtools)
10102 ("r-rsqlite" ,r-rsqlite)
10103 ("r-rtracklayer" ,r-rtracklayer)
10104 ("r-s4vectors" ,r-s4vectors)))
10105 (home-page "https://github.com/jotsetung/ensembldb")
10106 (synopsis "Utilities to create and use Ensembl-based annotation databases")
10107 (description
10108 "The package provides functions to create and use transcript-centric
10109annotation databases/packages. The annotation for the databases are directly
10110fetched from Ensembl using their Perl API. The functionality and data is
10111similar to that of the TxDb packages from the @code{GenomicFeatures} package,
10112but, in addition to retrieve all gene/transcript models and annotations from
10113the database, the @code{ensembldb} package also provides a filter framework
10114allowing to retrieve annotations for specific entries like genes encoded on a
10115chromosome region or transcript models of lincRNA genes.")
10116 ;; No version specified
10117 (license license:lgpl3+)))
10118
2f6f61fd
RW
10119(define-public r-organismdbi
10120 (package
10121 (name "r-organismdbi")
94ae875c 10122 (version "1.22.0")
2f6f61fd
RW
10123 (source
10124 (origin
10125 (method url-fetch)
10126 (uri (bioconductor-uri "OrganismDbi" version))
10127 (sha256
10128 (base32
94ae875c 10129 "0hb9ni41bjfy5s5ryw2qmqs2sx3i7j47w1g0l8g1pvn7ppnxb6cv"))))
2f6f61fd
RW
10130 (properties `((upstream-name . "OrganismDbi")))
10131 (build-system r-build-system)
10132 (propagated-inputs
10133 `(("r-annotationdbi" ,r-annotationdbi)
10134 ("r-biobase" ,r-biobase)
10135 ("r-biocgenerics" ,r-biocgenerics)
10136 ("r-biocinstaller" ,r-biocinstaller)
10137 ("r-dbi" ,r-dbi)
10138 ("r-genomicfeatures" ,r-genomicfeatures)
10139 ("r-genomicranges" ,r-genomicranges)
10140 ("r-graph" ,r-graph)
10141 ("r-iranges" ,r-iranges)
10142 ("r-rbgl" ,r-rbgl)
10143 ("r-s4vectors" ,r-s4vectors)))
10144 (home-page "https://bioconductor.org/packages/OrganismDbi")
10145 (synopsis "Software to enable the smooth interfacing of database packages")
10146 (description "The package enables a simple unified interface to several
10147annotation packages each of which has its own schema by taking advantage of
10148the fact that each of these packages implements a select methods.")
10149 (license license:artistic2.0)))
10150
7af1f403
RW
10151(define-public r-biovizbase
10152 (package
10153 (name "r-biovizbase")
70daf936 10154 (version "1.28.2")
7af1f403
RW
10155 (source
10156 (origin
10157 (method url-fetch)
10158 (uri (bioconductor-uri "biovizBase" version))
10159 (sha256
10160 (base32
70daf936 10161 "0wc45j3hfn01i44bkkxjj3n8b8xzbkkcdv35mrkzb1f9yprkf8gq"))))
7af1f403
RW
10162 (properties `((upstream-name . "biovizBase")))
10163 (build-system r-build-system)
10164 (propagated-inputs
10165 `(("r-annotationdbi" ,r-annotationdbi)
10166 ("r-annotationfilter" ,r-annotationfilter)
10167 ("r-biocgenerics" ,r-biocgenerics)
10168 ("r-biostrings" ,r-biostrings)
10169 ("r-dichromat" ,r-dichromat)
10170 ("r-ensembldb" ,r-ensembldb)
10171 ("r-genomeinfodb" ,r-genomeinfodb)
10172 ("r-genomicalignments" ,r-genomicalignments)
10173 ("r-genomicfeatures" ,r-genomicfeatures)
10174 ("r-genomicranges" ,r-genomicranges)
10175 ("r-hmisc" ,r-hmisc)
10176 ("r-iranges" ,r-iranges)
10177 ("r-rcolorbrewer" ,r-rcolorbrewer)
70daf936 10178 ("r-rlang" ,r-rlang)
7af1f403
RW
10179 ("r-rsamtools" ,r-rsamtools)
10180 ("r-s4vectors" ,r-s4vectors)
10181 ("r-scales" ,r-scales)
10182 ("r-summarizedexperiment" ,r-summarizedexperiment)
10183 ("r-variantannotation" ,r-variantannotation)))
10184 (home-page "https://bioconductor.org/packages/biovizBase")
10185 (synopsis "Basic graphic utilities for visualization of genomic data")
10186 (description
10187 "The biovizBase package is designed to provide a set of utilities, color
10188schemes and conventions for genomic data. It serves as the base for various
10189high-level packages for biological data visualization. This saves development
10190effort and encourages consistency.")
10191 (license license:artistic2.0)))
10192
60faf945
RW
10193(define-public r-ggbio
10194 (package
10195 (name "r-ggbio")
16527f1c 10196 (version "1.28.5")
60faf945
RW
10197 (source
10198 (origin
10199 (method url-fetch)
10200 (uri (bioconductor-uri "ggbio" version))
10201 (sha256
10202 (base32
16527f1c 10203 "19s2v40fycwf44fl3lm791y635xzw67b30sq2g0qq4a6phjik42d"))))
60faf945
RW
10204 (build-system r-build-system)
10205 (propagated-inputs
10206 `(("r-annotationdbi" ,r-annotationdbi)
10207 ("r-annotationfilter" ,r-annotationfilter)
10208 ("r-biobase" ,r-biobase)
10209 ("r-biocgenerics" ,r-biocgenerics)
10210 ("r-biostrings" ,r-biostrings)
10211 ("r-biovizbase" ,r-biovizbase)
10212 ("r-bsgenome" ,r-bsgenome)
10213 ("r-ensembldb" ,r-ensembldb)
10214 ("r-genomeinfodb" ,r-genomeinfodb)
10215 ("r-genomicalignments" ,r-genomicalignments)
10216 ("r-genomicfeatures" ,r-genomicfeatures)
10217 ("r-genomicranges" ,r-genomicranges)
10218 ("r-ggally" ,r-ggally)
10219 ("r-ggplot2" ,r-ggplot2)
10220 ("r-gridextra" ,r-gridextra)
10221 ("r-gtable" ,r-gtable)
10222 ("r-hmisc" ,r-hmisc)
10223 ("r-iranges" ,r-iranges)
10224 ("r-organismdbi" ,r-organismdbi)
10225 ("r-reshape2" ,r-reshape2)
16527f1c 10226 ("r-rlang" ,r-rlang)
60faf945
RW
10227 ("r-rsamtools" ,r-rsamtools)
10228 ("r-rtracklayer" ,r-rtracklayer)
10229 ("r-s4vectors" ,r-s4vectors)
10230 ("r-scales" ,r-scales)
10231 ("r-summarizedexperiment" ,r-summarizedexperiment)
10232 ("r-variantannotation" ,r-variantannotation)))
10233 (home-page "http://www.tengfei.name/ggbio/")
10234 (synopsis "Visualization tools for genomic data")
10235 (description
10236 "The ggbio package extends and specializes the grammar of graphics for
10237biological data. The graphics are designed to answer common scientific
10238questions, in particular those often asked of high throughput genomics data.
10239All core Bioconductor data structures are supported, where appropriate. The
10240package supports detailed views of particular genomic regions, as well as
10241genome-wide overviews. Supported overviews include ideograms and grand linear
10242views. High-level plots include sequence fragment length, edge-linked
10243interval to data view, mismatch pileup, and several splicing summaries.")
10244 (license license:artistic2.0)))
10245
96a392a0
RW
10246(define-public r-gprofiler
10247 (package
10248 (name "r-gprofiler")
8defca1d 10249 (version "0.6.6")
96a392a0
RW
10250 (source
10251 (origin
10252 (method url-fetch)
10253 (uri (cran-uri "gProfileR" version))
10254 (sha256
10255 (base32
8defca1d 10256 "1n6cj12j102b4x9vhyl4dljp1i0r43p23cnhqbx4als2xfxdlqgi"))))
96a392a0
RW
10257 (properties `((upstream-name . "gProfileR")))
10258 (build-system r-build-system)
10259 (propagated-inputs
10260 `(("r-plyr" ,r-plyr)
10261 ("r-rcurl" ,r-rcurl)))
e9960d8c 10262 (home-page "https://cran.r-project.org/web/packages/gProfileR/")
96a392a0
RW
10263 (synopsis "Interface to the g:Profiler toolkit")
10264 (description
10265 "This package provides tools for functional enrichment analysis,
10266gene identifier conversion and mapping homologous genes across related
10267organisms via the @code{g:Profiler} toolkit.")
10268 (license license:gpl2+)))
10269
e2b92799
RW
10270(define-public r-gqtlbase
10271 (package
10272 (name "r-gqtlbase")
dcd1f023 10273 (version "1.12.0")
e2b92799
RW
10274 (source
10275 (origin
10276 (method url-fetch)
10277 (uri (bioconductor-uri "gQTLBase" version))
10278 (sha256
10279 (base32
dcd1f023 10280 "1m3ajpqjhw1nwwsn372r44xfxq0a9a0pzsnrprzdjp6mh52p9b5m"))))
e2b92799
RW
10281 (properties `((upstream-name . "gQTLBase")))
10282 (build-system r-build-system)
10283 (propagated-inputs
10284 `(("r-batchjobs" ,r-batchjobs)
10285 ("r-bbmisc" ,r-bbmisc)
10286 ("r-biocgenerics" ,r-biocgenerics)
10287 ("r-bit" ,r-bit)
10288 ("r-doparallel" ,r-doparallel)
10289 ("r-ff" ,r-ff)
10290 ("r-ffbase" ,r-ffbase)
10291 ("r-foreach" ,r-foreach)
10292 ("r-genomicfiles" ,r-genomicfiles)
10293 ("r-genomicranges" ,r-genomicranges)
10294 ("r-rtracklayer" ,r-rtracklayer)
10295 ("r-s4vectors" ,r-s4vectors)
10296 ("r-summarizedexperiment" ,r-summarizedexperiment)))
10297 (home-page "https://bioconductor.org/packages/gQTLBase")
10298 (synopsis "Infrastructure for eQTL, mQTL and similar studies")
10299 (description
10300 "The purpose of this package is to simplify the storage and interrogation
10301of @dfn{quantitative trait loci} (QTL) archives, such as eQTL, mQTL, dsQTL,
10302and more.")
10303 (license license:artistic2.0)))
10304
dd5fc8b7
RW
10305(define-public r-snpstats
10306 (package
10307 (name "r-snpstats")
d26f95d0 10308 (version "1.30.0")
dd5fc8b7
RW
10309 (source
10310 (origin
10311 (method url-fetch)
10312 (uri (bioconductor-uri "snpStats" version))
10313 (sha256
10314 (base32
d26f95d0 10315 "0iydgfnm053iw860qa1bbh4f6nwzlsf3vhgq92gvl2v4xsz1jbbs"))))
dd5fc8b7
RW
10316 (properties `((upstream-name . "snpStats")))
10317 (build-system r-build-system)
10318 (inputs `(("zlib" ,zlib)))
10319 (propagated-inputs
10320 `(("r-biocgenerics" ,r-biocgenerics)
10321 ("r-matrix" ,r-matrix)
10322 ("r-survival" ,r-survival)
10323 ("r-zlibbioc" ,r-zlibbioc)))
10324 (home-page "https://bioconductor.org/packages/snpStats")
10325 (synopsis "Methods for SNP association studies")
10326 (description
10327 "This package provides classes and statistical methods for large
10328@dfn{single-nucleotide polymorphism} (SNP) association studies. This extends
10329the earlier snpMatrix package, allowing for uncertainty in genotypes.")
10330 (license license:gpl3)))
10331
421c68e3
RW
10332(define-public r-homo-sapiens
10333 (package
10334 (name "r-homo-sapiens")
10335 (version "1.3.1")
10336 (source (origin
10337 (method url-fetch)
10338 ;; We cannot use bioconductor-uri here because this tarball is
10339 ;; located under "data/annotation/" instead of "bioc/".
10340 (uri (string-append "http://www.bioconductor.org/packages/"
10341 "release/data/annotation/src/contrib/"
10342 "Homo.sapiens_"
10343 version ".tar.gz"))
10344 (sha256
10345 (base32
10346 "151vj7h5p1c8yd5swrchk46z469p135wk50hvkl0nhgndvy0jj01"))))
10347 (properties
10348 `((upstream-name . "Homo.sapiens")))
10349 (build-system r-build-system)
10350 (propagated-inputs
10351 `(("r-genomicfeatures" ,r-genomicfeatures)
10352 ("r-go-db" ,r-go-db)
10353 ("r-org-hs-eg-db" ,r-org-hs-eg-db)
10354 ("r-txdb-hsapiens-ucsc-hg19-knowngene" ,r-txdb-hsapiens-ucsc-hg19-knowngene)
10355 ("r-organismdbi" ,r-organismdbi)
10356 ("r-annotationdbi" ,r-annotationdbi)))
10357 (home-page "https://bioconductor.org/packages/Homo.sapiens/")
10358 (synopsis "Annotation package for the Homo.sapiens object")
10359 (description
10360 "This package contains the Homo.sapiens object to access data from
10361several related annotation packages.")
10362 (license license:artistic2.0)))
10363
15f98b0d
RW
10364(define-public r-erma
10365 (package
10366 (name "r-erma")
9b0b108e 10367 (version "0.12.0")
15f98b0d
RW
10368 (source
10369 (origin
10370 (method url-fetch)
10371 (uri (bioconductor-uri "erma" version))
10372 (sha256
10373 (base32
9b0b108e 10374 "1ka68n18yizlyvb8bpwwcl4hqbsasg8hw8jb3vgy3cd4szji87hh"))))
15f98b0d
RW
10375 (build-system r-build-system)
10376 (propagated-inputs
10377 `(("r-annotationdbi" ,r-annotationdbi)
10378 ("r-biobase" ,r-biobase)
10379 ("r-biocgenerics" ,r-biocgenerics)
9b0b108e
RW
10380 ("r-biocparallel" ,r-biocparallel)
10381 ("r-genomeinfodb" ,r-genomeinfodb)
15f98b0d
RW
10382 ("r-genomicfiles" ,r-genomicfiles)
10383 ("r-genomicranges" ,r-genomicranges)
10384 ("r-ggplot2" ,r-ggplot2)
10385 ("r-homo-sapiens" ,r-homo-sapiens)
9b0b108e 10386 ("r-iranges" ,r-iranges)
15f98b0d
RW
10387 ("r-rtracklayer" ,r-rtracklayer)
10388 ("r-s4vectors" ,r-s4vectors)
10389 ("r-shiny" ,r-shiny)
10390 ("r-summarizedexperiment" ,r-summarizedexperiment)))
10391 (home-page "https://bioconductor.org/packages/erma")
10392 (synopsis "Epigenomic road map adventures")
10393 (description
10394 "The epigenomics road map describes locations of epigenetic marks in DNA
10395from a variety of cell types. Of interest are locations of histone
10396modifications, sites of DNA methylation, and regions of accessible chromatin.
10397This package presents a selection of elements of the road map including
10398metadata and outputs of the ChromImpute procedure applied to ENCODE cell lines
10399by Ernst and Kellis.")
10400 (license license:artistic2.0)))
10401
66c39102
RW
10402(define-public r-ldblock
10403 (package
10404 (name "r-ldblock")
45c29a6e 10405 (version "1.10.0")
66c39102
RW
10406 (source
10407 (origin
10408 (method url-fetch)
10409 (uri (bioconductor-uri "ldblock" version))
10410 (sha256
10411 (base32
45c29a6e 10412 "0c24zvnwsp39d3q0bps13sc441jj9ms2zi34xsb8c392lqmbypvd"))))
66c39102
RW
10413 (build-system r-build-system)
10414 (propagated-inputs
45c29a6e
RW
10415 `(("r-biocgenerics" ,r-biocgenerics)
10416 ("r-erma" ,r-erma)
66c39102
RW
10417 ("r-genomeinfodb" ,r-genomeinfodb)
10418 ("r-genomicfiles" ,r-genomicfiles)
10419 ("r-go-db" ,r-go-db)
10420 ("r-homo-sapiens" ,r-homo-sapiens)
10421 ("r-matrix" ,r-matrix)
10422 ("r-rsamtools" ,r-rsamtools)
10423 ("r-snpstats" ,r-snpstats)
10424 ("r-variantannotation" ,r-variantannotation)))
10425 (home-page "https://bioconductor.org/packages/ldblock")
10426 (synopsis "Data structures for linkage disequilibrium measures in populations")
10427 (description
10428 "This package defines data structures for @dfn{linkage
10429disequilibrium} (LD) measures in populations. Its purpose is to simplify
10430handling of existing population-level data for the purpose of flexibly
10431defining LD blocks.")
10432 (license license:artistic2.0)))
10433
794ff347
RW
10434(define-public r-gqtlstats
10435 (package
10436 (name "r-gqtlstats")
6929b975 10437 (version "1.12.0")
794ff347
RW
10438 (source
10439 (origin
10440 (method url-fetch)
10441 (uri (bioconductor-uri "gQTLstats" version))
10442 (sha256
10443 (base32
6929b975 10444 "19g8qhfgngdc14cw9k4i44cxhs3qva87x56gjzmn25k1yj8qgsp1"))))
794ff347
RW
10445 (properties `((upstream-name . "gQTLstats")))
10446 (build-system r-build-system)
10447 (propagated-inputs
10448 `(("r-annotationdbi" ,r-annotationdbi)
10449 ("r-batchjobs" ,r-batchjobs)
10450 ("r-bbmisc" ,r-bbmisc)
10451 ("r-beeswarm" ,r-beeswarm)
10452 ("r-biobase" ,r-biobase)
10453 ("r-biocgenerics" ,r-biocgenerics)
10454 ("r-doparallel" ,r-doparallel)
10455 ("r-dplyr" ,r-dplyr)
10456 ("r-erma" ,r-erma)
10457 ("r-ffbase" ,r-ffbase)
10458 ("r-foreach" ,r-foreach)
10459 ("r-genomeinfodb" ,r-genomeinfodb)
10460 ("r-genomicfeatures" ,r-genomicfeatures)
10461 ("r-genomicfiles" ,r-genomicfiles)
10462 ("r-genomicranges" ,r-genomicranges)
10463 ("r-ggbeeswarm" ,r-ggbeeswarm)
10464 ("r-ggplot2" ,r-ggplot2)
10465 ("r-gqtlbase" ,r-gqtlbase)
10466 ("r-hardyweinberg" ,r-hardyweinberg)
6929b975 10467 ("r-homo-sapiens" ,r-homo-sapiens)
794ff347 10468 ("r-iranges" ,r-iranges)
794ff347
RW
10469 ("r-limma" ,r-limma)
10470 ("r-mgcv" ,r-mgcv)
10471 ("r-plotly" ,r-plotly)
10472 ("r-reshape2" ,r-reshape2)
10473 ("r-s4vectors" ,r-s4vectors)
10474 ("r-shiny" ,r-shiny)
10475 ("r-snpstats" ,r-snpstats)
10476 ("r-summarizedexperiment" ,r-summarizedexperiment)
10477 ("r-variantannotation" ,r-variantannotation)))
10478 (home-page "https://bioconductor.org/packages/gQTLstats")
10479 (synopsis "Computationally efficient analysis for eQTL and allied studies")
10480 (description
10481 "This package provides tools for the computationally efficient analysis
10482of @dfn{quantitative trait loci} (QTL) data, including eQTL, mQTL, dsQTL, etc.
10483The software in this package aims to support refinements and functional
10484interpretation of members of a collection of association statistics on a
10485family of feature/genome hypotheses.")
10486 (license license:artistic2.0)))
10487
42720006
RW
10488(define-public r-gviz
10489 (package
10490 (name "r-gviz")
fecbbac2 10491 (version "1.24.0")
42720006
RW
10492 (source
10493 (origin
10494 (method url-fetch)
10495 (uri (bioconductor-uri "Gviz" version))
10496 (sha256
10497 (base32
fecbbac2 10498 "1fhli7ahkl5r43j0hc89ib41mfadj6qyrg36i03ncz8zs6iqwpx4"))))
42720006
RW
10499 (properties `((upstream-name . "Gviz")))
10500 (build-system r-build-system)
10501 (propagated-inputs
10502 `(("r-annotationdbi" ,r-annotationdbi)
10503 ("r-biobase" ,r-biobase)
10504 ("r-biocgenerics" ,r-biocgenerics)
10505 ("r-biomart" ,r-biomart)
10506 ("r-biostrings" ,r-biostrings)
10507 ("r-biovizbase" ,r-biovizbase)
10508 ("r-bsgenome" ,r-bsgenome)
10509 ("r-digest" ,r-digest)
10510 ("r-genomeinfodb" ,r-genomeinfodb)
10511 ("r-genomicalignments" ,r-genomicalignments)
10512 ("r-genomicfeatures" ,r-genomicfeatures)
10513 ("r-genomicranges" ,r-genomicranges)
10514 ("r-iranges" ,r-iranges)
10515 ("r-lattice" ,r-lattice)
10516 ("r-latticeextra" ,r-latticeextra)
10517 ("r-matrixstats" ,r-matrixstats)
10518 ("r-rcolorbrewer" ,r-rcolorbrewer)
10519 ("r-rsamtools" ,r-rsamtools)
10520 ("r-rtracklayer" ,r-rtracklayer)
10521 ("r-s4vectors" ,r-s4vectors)
10522 ("r-xvector" ,r-xvector)))
10523 (home-page "https://bioconductor.org/packages/Gviz")
10524 (synopsis "Plotting data and annotation information along genomic coordinates")
10525 (description
10526 "Genomic data analyses requires integrated visualization of known genomic
10527information and new experimental data. Gviz uses the biomaRt and the
10528rtracklayer packages to perform live annotation queries to Ensembl and UCSC
10529and translates this to e.g. gene/transcript structures in viewports of the
10530grid graphics package. This results in genomic information plotted together
10531with your data.")
10532 (license license:artistic2.0)))
10533
2e4ce60e
RW
10534(define-public r-gwascat
10535 (package
10536 (name "r-gwascat")
2d3c172b 10537 (version "2.12.0")
2e4ce60e
RW
10538 (source
10539 (origin
10540 (method url-fetch)
10541 (uri (bioconductor-uri "gwascat" version))
10542 (sha256
10543 (base32
2d3c172b 10544 "08ba9il4vbjjwlbwmqg4ai6ya1p09js9agn95sw0dhc9gqln42hx"))))
2e4ce60e
RW
10545 (build-system r-build-system)
10546 (propagated-inputs
10547 `(("r-annotationdbi" ,r-annotationdbi)
10548 ("r-annotationhub" ,r-annotationhub)
10549 ("r-biocgenerics" ,r-biocgenerics)
10550 ("r-biostrings" ,r-biostrings)
10551 ("r-genomeinfodb" ,r-genomeinfodb)
10552 ("r-genomicfeatures" ,r-genomicfeatures)
10553 ("r-genomicranges" ,r-genomicranges)
10554 ("r-ggbio" ,r-ggbio)
10555 ("r-ggplot2" ,r-ggplot2)
10556 ("r-gqtlstats" ,r-gqtlstats)
10557 ("r-graph" ,r-graph)
10558 ("r-gviz" ,r-gviz)
10559 ("r-homo-sapiens" ,r-homo-sapiens)
10560 ("r-iranges" ,r-iranges)
10561 ("r-rsamtools" ,r-rsamtools)
10562 ("r-rtracklayer" ,r-rtracklayer)
10563 ("r-s4vectors" ,r-s4vectors)
10564 ("r-snpstats" ,r-snpstats)
10565 ("r-summarizedexperiment" ,r-summarizedexperiment)
10566 ("r-variantannotation" ,r-variantannotation)))
10567 (home-page "https://bioconductor.org/packages/gwascat")
10568 (synopsis "Tools for data in the EMBL-EBI GWAS catalog")
10569 (description
10570 "This package provides tools for representing and modeling data in the
10571EMBL-EBI GWAS catalog.")
10572 (license license:artistic2.0)))
10573
fbf54406
RW
10574(define-public r-sushi
10575 (package
10576 (name "r-sushi")
8db6256f 10577 (version "1.18.0")
fbf54406
RW
10578 (source (origin
10579 (method url-fetch)
10580 (uri (bioconductor-uri "Sushi" version))
10581 (sha256
10582 (base32
8db6256f 10583 "1m15hmg4k0qhshyn65xfj5hx7xbaf0kxqw70lxisak6pj1w00l41"))))
fbf54406
RW
10584 (properties `((upstream-name . "Sushi")))
10585 (build-system r-build-system)
10586 (propagated-inputs
10587 `(("r-biomart" ,r-biomart)
10588 ("r-zoo" ,r-zoo)))
10589 (home-page "https://bioconductor.org/packages/Sushi")
10590 (synopsis "Tools for visualizing genomics data")
10591 (description
10592 "This package provides flexible, quantitative, and integrative genomic
10593visualizations for publication-quality multi-panel figures.")
10594 (license license:gpl2+)))
10595
30eb4e37
RW
10596(define-public r-fithic
10597 (package
10598 (name "r-fithic")
4dedecbe 10599 (version "1.6.0")
30eb4e37
RW
10600 (source (origin
10601 (method url-fetch)
10602 (uri (bioconductor-uri "FitHiC" version))
10603 (sha256
10604 (base32
4dedecbe 10605 "06w4q836bi1mvkbl1saghv4r5p4hxpjg8cp7kgad13ls450kqmyd"))))
30eb4e37
RW
10606 (properties `((upstream-name . "FitHiC")))
10607 (build-system r-build-system)
10608 (propagated-inputs
10609 `(("r-data-table" ,r-data-table)
10610 ("r-fdrtool" ,r-fdrtool)
10611 ("r-rcpp" ,r-rcpp)))
10612 (home-page "https://bioconductor.org/packages/FitHiC")
10613 (synopsis "Confidence estimation for intra-chromosomal contact maps")
10614 (description
10615 "Fit-Hi-C is a tool for assigning statistical confidence estimates to
10616intra-chromosomal contact maps produced by genome-wide genome architecture
10617assays such as Hi-C.")
10618 (license license:gpl2+)))
10619
c49102ec
RW
10620(define-public r-hitc
10621 (package
10622 (name "r-hitc")
c6c619c1 10623 (version "1.24.0")
c49102ec
RW
10624 (source (origin
10625 (method url-fetch)
10626 (uri (bioconductor-uri "HiTC" version))
10627 (sha256
10628 (base32
c6c619c1 10629 "0qkk5139f51lwwy1yh7nbkflh5d69prirmhniwam34nlg9rzjm2z"))))
c49102ec
RW
10630 (properties `((upstream-name . "HiTC")))
10631 (build-system r-build-system)
10632 (propagated-inputs
10633 `(("r-biostrings" ,r-biostrings)
10634 ("r-genomeinfodb" ,r-genomeinfodb)
10635 ("r-genomicranges" ,r-genomicranges)
10636 ("r-iranges" ,r-iranges)
10637 ("r-matrix" ,r-matrix)
10638 ("r-rcolorbrewer" ,r-rcolorbrewer)
10639 ("r-rtracklayer" ,r-rtracklayer)))
10640 (home-page "https://bioconductor.org/packages/HiTC")
10641 (synopsis "High throughput chromosome conformation capture analysis")
10642 (description
10643 "The HiTC package was developed to explore high-throughput \"C\" data
10644such as 5C or Hi-C. Dedicated R classes as well as standard methods for
10645quality controls, normalization, visualization, and further analysis are also
10646provided.")
10647 (license license:artistic2.0)))
10648
212f6120
RW
10649(define-public r-qvalue
10650 (package
10651 (name "r-qvalue")
5cd725d0 10652 (version "2.12.0")
212f6120
RW
10653 (source
10654 (origin
10655 (method url-fetch)
10656 (uri (bioconductor-uri "qvalue" version))
10657 (sha256
10658 (base32
5cd725d0 10659 "1ndwkj0hh7v4lwylq1v0fkxqs7mfmbcj8kxbdpj1wkvf131z2ns8"))))
212f6120
RW
10660 (build-system r-build-system)
10661 (propagated-inputs
10662 `(("r-ggplot2" ,r-ggplot2)
10663 ("r-reshape2" ,r-reshape2)))
10664 (home-page "http://github.com/jdstorey/qvalue")
10665 (synopsis "Q-value estimation for false discovery rate control")
10666 (description
10667 "This package takes a list of p-values resulting from the simultaneous
10668testing of many hypotheses and estimates their q-values and local @dfn{false
10669discovery rate} (FDR) values. The q-value of a test measures the proportion
10670of false positives incurred when that particular test is called significant.
10671The local FDR measures the posterior probability the null hypothesis is true
10672given the test's p-value. Various plots are automatically generated, allowing
10673one to make sensible significance cut-offs. The software can be applied to
10674problems in genomics, brain imaging, astrophysics, and data mining.")
10675 ;; Any version of the LGPL.
10676 (license license:lgpl3+)))
10677
aba3e482
RW
10678(define-public r-hdf5array
10679 (package
10680 (name "r-hdf5array")
e69c07e6 10681 (version "1.8.1")
aba3e482
RW
10682 (source
10683 (origin
10684 (method url-fetch)
10685 (uri (bioconductor-uri "HDF5Array" version))
10686 (sha256
10687 (base32
e69c07e6 10688 "1s44zgm9jg82bk4b8k3dh7xw7mxychlfm3grs8516mxnw91zpvy5"))))
aba3e482
RW
10689 (properties `((upstream-name . "HDF5Array")))
10690 (build-system r-build-system)
10691 (propagated-inputs
10692 `(("r-biocgenerics" ,r-biocgenerics)
10693 ("r-delayedarray" ,r-delayedarray)
10694 ("r-iranges" ,r-iranges)
10695 ("r-rhdf5" ,r-rhdf5)
10696 ("r-s4vectors" ,r-s4vectors)))
10697 (home-page "https://bioconductor.org/packages/HDF5Array")
10698 (synopsis "HDF5 back end for DelayedArray objects")
10699 (description "This package provides an array-like container for convenient
10700access and manipulation of HDF5 datasets. It supports delayed operations and
10701block processing.")
10702 (license license:artistic2.0)))
10703
748e9f16
RW
10704(define-public r-rhdf5lib
10705 (package
10706 (name "r-rhdf5lib")
e9c41736 10707 (version "1.2.1")
748e9f16
RW
10708 (source
10709 (origin
10710 (method url-fetch)
10711 (uri (bioconductor-uri "Rhdf5lib" version))
10712 (sha256
10713 (base32
e9c41736 10714 "1y59acac6v8hrhv84gghn9ifsni9xxxacaj177rrl4frmkrz4x3c"))))
748e9f16
RW
10715 (properties `((upstream-name . "Rhdf5lib")))
10716 (build-system r-build-system)
10717 (arguments
10718 `(#:phases
10719 (modify-phases %standard-phases
10720 (add-after 'unpack 'do-not-use-bundled-hdf5
10721 (lambda* (#:key inputs #:allow-other-keys)
10722 (for-each delete-file '("configure" "configure.ac"))
10723 ;; Do not make other packages link with the proprietary libsz.
10724 (substitute* "R/zzz.R"
e9c41736
RW
10725 (("'%s/libhdf5_cpp.a %s/libhdf5.a %s/libsz.a -lz'")
10726 "'%s/libhdf5_cpp.a %s/libhdf5.a %s/libhdf5.a -lz'")
10727 (("'%s/libhdf5.a %s/libsz.a -lz'")
10728 "'%s/libhdf5.a %s/libhdf5.a -lz'"))
748e9f16
RW
10729 (with-directory-excursion "src"
10730 (invoke "tar" "xvf" (assoc-ref inputs "hdf5-source"))
10731 (rename-file (string-append "hdf5-" ,(package-version hdf5))
10732 "hdf5")
a539ad86
RW
10733 ;; Remove timestamp and host system information to make
10734 ;; the build reproducible.
10735 (substitute* "hdf5/src/libhdf5.settings.in"
10736 (("Configured on: @CONFIG_DATE@")
10737 "Configured on: Guix")
10738 (("Uname information:.*")
10739 "Uname information: Linux\n")
10740 ;; Remove unnecessary store reference.
10741 (("C Compiler:.*")
10742 "C Compiler: GCC\n"))
748e9f16
RW
10743 (rename-file "Makevars.in" "Makevars")
10744 (substitute* "Makevars"
10745 (("HDF5_CXX_LIB=.*")
10746 (string-append "HDF5_CXX_LIB="
10747 (assoc-ref inputs "hdf5") "/lib/libhdf5_cpp.a\n"))
10748 (("HDF5_LIB=.*")
10749 (string-append "HDF5_LIB="
10750 (assoc-ref inputs "hdf5") "/lib/libhdf5.a\n"))
10751 (("HDF5_CXX_INCLUDE=.*") "HDF5_CXX_INCLUDE=./hdf5/c++/src\n")
10752 (("HDF5_INCLUDE=.*") "HDF5_INCLUDE=./hdf5/src\n")
10753 ;; szip is non-free software
10754 (("cp \\$\\{SZIP_LIB\\}.*") "")
e9c41736 10755 (("\\$\\{USER_LIB_DIR\\}libsz.a") "")))
748e9f16
RW
10756 #t)))))
10757 (inputs
10758 `(("zlib" ,zlib)))
10759 (propagated-inputs
10760 `(("hdf5" ,hdf5)))
10761 (native-inputs
10762 `(("hdf5-source" ,(package-source hdf5))))
10763 (home-page "https://bioconductor.org/packages/Rhdf5lib")
10764 (synopsis "HDF5 library as an R package")
10765 (description "This package provides C and C++ HDF5 libraries for use in R
10766packages.")
10767 (license license:artistic2.0)))
10768
9ec08988
RW
10769(define-public r-beachmat
10770 (package
10771 (name "r-beachmat")
fd613234 10772 (version "1.2.1")
9ec08988
RW
10773 (source
10774 (origin
10775 (method url-fetch)
10776 (uri (bioconductor-uri "beachmat" version))
10777 (sha256
10778 (base32
fd613234 10779 "1w90v0jx1zgrfxzx99gdkk0dz2vi25hr51jml1bvq33i64rj7996"))))
9ec08988
RW
10780 (build-system r-build-system)
10781 (inputs
fd613234
RW
10782 `(("hdf5" ,hdf5)
10783 ("zlib" ,zlib)))
9ec08988
RW
10784 (propagated-inputs
10785 `(("r-delayedarray" ,r-delayedarray)
10786 ("r-hdf5array" ,r-hdf5array)
10787 ("r-rcpp" ,r-rcpp)
10788 ("r-rhdf5" ,r-rhdf5)
10789 ("r-rhdf5lib" ,r-rhdf5lib)))
10790 (home-page "https://bioconductor.org/packages/beachmat")
10791 (synopsis "Compiling Bioconductor to handle each matrix type")
10792 (description "This package provides a consistent C++ class interface for a
10793variety of commonly used matrix types, including sparse and HDF5-backed
10794matrices.")
10795 (license license:gpl3)))
10796
6fb1dc17
RW
10797(define-public r-singlecellexperiment
10798 (package
10799 (name "r-singlecellexperiment")
d99e0938 10800 (version "1.2.0")
6fb1dc17
RW
10801 (source
10802 (origin
10803 (method url-fetch)
10804 (uri (bioconductor-uri "SingleCellExperiment" version))
10805 (sha256
10806 (base32
d99e0938 10807 "0mz3chia250v8v6q8r5cqv5fc4bpcw1hhrfr3p7l5i4xi85scpka"))))
6fb1dc17
RW
10808 (properties
10809 `((upstream-name . "SingleCellExperiment")))
10810 (build-system r-build-system)
10811 (propagated-inputs
10812 `(("r-biocgenerics" ,r-biocgenerics)
10813 ("r-s4vectors" ,r-s4vectors)
10814 ("r-summarizedexperiment" ,r-summarizedexperiment)))
10815 (home-page "https://bioconductor.org/packages/SingleCellExperiment")
10816 (synopsis "S4 classes for single cell data")
10817 (description "This package defines an S4 class for storing data from
10818single-cell experiments. This includes specialized methods to store and
10819retrieve spike-in information, dimensionality reduction coordinates and size
10820factors for each cell, along with the usual metadata for genes and
10821libraries.")
10822 (license license:gpl3)))
10823
658ab21d
RW
10824(define-public r-scater
10825 (package
10826 (name "r-scater")
8e44ff0c 10827 (version "1.8.4")
658ab21d
RW
10828 (source (origin
10829 (method url-fetch)
10830 (uri (bioconductor-uri "scater" version))
10831 (sha256
10832 (base32
8e44ff0c 10833 "173lfpas2fqsp4xxsw01wkxd4496c5p8himw9b4r9z4npxkfyv16"))))
658ab21d
RW
10834 (build-system r-build-system)
10835 (propagated-inputs
10836 `(("r-beachmat" ,r-beachmat)
10837 ("r-biobase" ,r-biobase)
10838 ("r-biocgenerics" ,r-biocgenerics)
658ab21d 10839 ("r-data-table" ,r-data-table)
92963798
RW
10840 ("r-delayedarray" ,r-delayedarray)
10841 ("r-delayedmatrixstats" ,r-delayedmatrixstats)
658ab21d
RW
10842 ("r-dplyr" ,r-dplyr)
10843 ("r-edger" ,r-edger)
10844 ("r-ggbeeswarm" ,r-ggbeeswarm)
10845 ("r-ggplot2" ,r-ggplot2)
10846 ("r-limma" ,r-limma)
10847 ("r-matrix" ,r-matrix)
658ab21d
RW
10848 ("r-plyr" ,r-plyr)
10849 ("r-rcpp" ,r-rcpp)
10850 ("r-reshape2" ,r-reshape2)
10851 ("r-rhdf5" ,r-rhdf5)
10852 ("r-rhdf5lib" ,r-rhdf5lib)
10853 ("r-rjson" ,r-rjson)
10854 ("r-s4vectors" ,r-s4vectors)
10855 ("r-shiny" ,r-shiny)
10856 ("r-shinydashboard" ,r-shinydashboard)
10857 ("r-singlecellexperiment" ,r-singlecellexperiment)
10858 ("r-summarizedexperiment" ,r-summarizedexperiment)
10859 ("r-tximport" ,r-tximport)
10860 ("r-viridis" ,r-viridis)))
10861 (home-page "https://github.com/davismcc/scater")
10862 (synopsis "Single-cell analysis toolkit for gene expression data in R")
10863 (description "This package provides a collection of tools for doing
10864various analyses of single-cell RNA-seq gene expression data, with a focus on
10865quality control.")
10866 (license license:gpl2+)))
10867
7f1d9bef
RW
10868(define-public r-scran
10869 (package
10870 (name "r-scran")
f0771741 10871 (version "1.8.4")
7f1d9bef
RW
10872 (source
10873 (origin
10874 (method url-fetch)
10875 (uri (bioconductor-uri "scran" version))
10876 (sha256
10877 (base32
f0771741 10878 "17vq9vb9ak7n4mcqpwnm9x3z91vmr7xnsgj8f45b8dbj7m0v126j"))))
7f1d9bef
RW
10879 (build-system r-build-system)
10880 (propagated-inputs
10881 `(("r-beachmat" ,r-beachmat)
10882 ("r-biocgenerics" ,r-biocgenerics)
10883 ("r-biocparallel" ,r-biocparallel)
d7e32999
RW
10884 ("r-delayedarray" ,r-delayedarray)
10885 ("r-delayedmatrixstats" ,r-delayedmatrixstats)
7f1d9bef
RW
10886 ("r-dt" ,r-dt)
10887 ("r-dynamictreecut" ,r-dynamictreecut)
10888 ("r-edger" ,r-edger)
10889 ("r-fnn" ,r-fnn)
10890 ("r-ggplot2" ,r-ggplot2)
10891 ("r-igraph" ,r-igraph)
10892 ("r-limma" ,r-limma)
10893 ("r-matrix" ,r-matrix)
10894 ("r-rcpp" ,r-rcpp)
10895 ("r-rhdf5lib" ,r-rhdf5lib)
10896 ("r-s4vectors" ,r-s4vectors)
10897 ("r-scater" ,r-scater)
10898 ("r-shiny" ,r-shiny)
10899 ("r-singlecellexperiment" ,r-singlecellexperiment)
10900 ("r-statmod" ,r-statmod)
10901 ("r-summarizedexperiment" ,r-summarizedexperiment)
d7e32999 10902 ("r-viridis" ,r-viridis)))
7f1d9bef
RW
10903 (home-page "https://bioconductor.org/packages/scran")
10904 (synopsis "Methods for single-cell RNA-Seq data analysis")
10905 (description "This package implements a variety of low-level analyses of
10906single-cell RNA-seq data. Methods are provided for normalization of
10907cell-specific biases, assignment of cell cycle phase, and detection of highly
10908variable and significantly correlated genes.")
10909 (license license:gpl3)))
10910
05f72960
RW
10911(define-public r-delayedmatrixstats
10912 (package
10913 (name "r-delayedmatrixstats")
8ec56b29 10914 (version "1.2.0")
05f72960
RW
10915 (source
10916 (origin
10917 (method url-fetch)
10918 (uri (bioconductor-uri "DelayedMatrixStats" version))
10919 (sha256
10920 (base32
8ec56b29 10921 "1dasghfy8x27zzmd0igag4mc1gxxxbchsl4hpc1050dj3wnw9w3y"))))
05f72960
RW
10922 (properties
10923 `((upstream-name . "DelayedMatrixStats")))
10924 (build-system r-build-system)
10925 (propagated-inputs
10926 `(("r-delayedarray" ,r-delayedarray)
10927 ("r-iranges" ,r-iranges)
10928 ("r-matrix" ,r-matrix)
10929 ("r-matrixstats" ,r-matrixstats)
10930 ("r-s4vectors" ,r-s4vectors)))
10931 (home-page "https://github.com/PeteHaitch/DelayedMatrixStats")
10932 (synopsis "Functions that apply to rows and columns of DelayedMatrix objects")
10933 (description
10934 "This package provides a port of the @code{matrixStats} API for use with
10935@code{DelayedMatrix} objects from the @code{DelayedArray} package. It
10936contains high-performing functions operating on rows and columns of
10937@code{DelayedMatrix} objects, e.g. @code{colMedians}, @code{rowMedians},
10938@code{colRanks}, @code{rowRanks}, @code{colSds}, and @code{rowSds}. Functions
10939are optimized per data type and for subsetted calculations such that both
10940memory usage and processing time is minimized.")
10941 (license license:expat)))
10942
22a0c659
RW
10943(define-public r-phangorn
10944 (package
10945 (name "r-phangorn")
10946 (version "2.4.0")
10947 (source
10948 (origin
10949 (method url-fetch)
10950 (uri (cran-uri "phangorn" version))
10951 (sha256
10952 (base32
10953 "0xc8k552nxczy19jr0xjjagrzc8x6lafasgk2c099ls8bc1yml1i"))))
10954 (build-system r-build-system)
10955 (propagated-inputs
10956 `(("r-ape" ,r-ape)
10957 ("r-fastmatch" ,r-fastmatch)
10958 ("r-igraph" ,r-igraph)
10959 ("r-magrittr" ,r-magrittr)
10960 ("r-matrix" ,r-matrix)
10961 ("r-quadprog" ,r-quadprog)
10962 ("r-rcpp" ,r-rcpp)))
10963 (home-page "https://github.com/KlausVigo/phangorn")
10964 (synopsis "Phylogenetic analysis in R")
10965 (description
10966 "Phangorn is a package for phylogenetic analysis in R. It supports
10967estimation of phylogenetic trees and networks using Maximum Likelihood,
10968Maximum Parsimony, distance methods and Hadamard conjugation.")
10969 (license license:gpl2+)))
10970
3082de04 10971(define-public r-dropbead
84c195e5
RW
10972 (let ((commit "d746c6f3b32110428ea56d6a0001ce52a251c247")
10973 (revision "2"))
3082de04
RW
10974 (package
10975 (name "r-dropbead")
10976 (version (string-append "0-" revision "." (string-take commit 7)))
10977 (source
10978 (origin
10979 (method git-fetch)
10980 (uri (git-reference
10981 (url "https://github.com/rajewsky-lab/dropbead.git")
10982 (commit commit)))
b1d4026d 10983 (file-name (git-file-name name version))
3082de04
RW
10984 (sha256
10985 (base32
84c195e5 10986 "0sbzma49aiiyw8b0jpr7fnhzys9nsqmp4hy4hdz1gzyg1lhnca26"))))
3082de04
RW
10987 (build-system r-build-system)
10988 (propagated-inputs
10989 `(("r-ggplot2" ,r-ggplot2)
10990 ("r-rcolorbrewer" ,r-rcolorbrewer)
10991 ("r-gridextra" ,r-gridextra)
10992 ("r-gplots" ,r-gplots)
10993 ("r-plyr" ,r-plyr)))
10994 (home-page "https://github.com/rajewsky-lab/dropbead")
10995 (synopsis "Basic exploration and analysis of Drop-seq data")
10996 (description "This package offers a quick and straight-forward way to
10997explore and perform basic analysis of single cell sequencing data coming from
10998droplet sequencing. It has been particularly tailored for Drop-seq.")
10999 (license license:gpl3))))
11000
5ded35d8
RW
11001(define htslib-for-sambamba
11002 (let ((commit "2f3c3ea7b301f9b45737a793c0b2dcf0240e5ee5"))
11003 (package
11004 (inherit htslib)
11005 (name "htslib-for-sambamba")
11006 (version (string-append "1.3.1-1." (string-take commit 9)))
11007 (source
11008 (origin
11009 (method git-fetch)
11010 (uri (git-reference
11011 (url "https://github.com/lomereiter/htslib.git")
11012 (commit commit)))
11013 (file-name (string-append "htslib-" version "-checkout"))
11014 (sha256
11015 (base32
11016 "0g38g8s3npr0gjm9fahlbhiskyfws9l5i0x1ml3rakzj7az5l9c9"))))
5ded35d8
RW
11017 (native-inputs
11018 `(("autoconf" ,autoconf)
11019 ("automake" ,automake)
11020 ,@(package-native-inputs htslib))))))
11021
11022(define-public sambamba
11023 (package
11024 (name "sambamba")
482959ff 11025 (version "0.6.7-10-g223fa20")
5ded35d8
RW
11026 (source
11027 (origin
482959ff
RW
11028 (method git-fetch)
11029 (uri (git-reference
11030 (url "https://github.com/lomereiter/sambamba.git")
11031 (commit (string-append "v" version))))
11032 (file-name (string-append name "-" version "-checkout"))
5ded35d8
RW
11033 (sha256
11034 (base32
482959ff 11035 "1zb9hrxglxqh13ava9wwri30cvf85hjnbn8ccnr8l60a3k5avczn"))))
5ded35d8
RW
11036 (build-system gnu-build-system)
11037 (arguments
482959ff
RW
11038 `(#:tests? #f ; there is no test target
11039 #:parallel-build? #f ; not supported
5ded35d8
RW
11040 #:phases
11041 (modify-phases %standard-phases
11042 (delete 'configure)
482959ff
RW
11043 (add-after 'unpack 'fix-ldc-version
11044 (lambda _
11045 (substitute* "gen_ldc_version_info.py"
11046 (("/usr/bin/env.*") (which "python")))
11047 (substitute* "Makefile"
11048 (("\\$\\(shell which ldmd2\\)") (which "ldmd2")))
11049 #t))
11050 (add-after 'unpack 'place-biod-and-undead
5ded35d8
RW
11051 (lambda* (#:key inputs #:allow-other-keys)
11052 (copy-recursively (assoc-ref inputs "biod") "BioD")
482959ff 11053 (copy-recursively (assoc-ref inputs "undead") "undeaD")
5ded35d8
RW
11054 #t))
11055 (add-after 'unpack 'unbundle-prerequisites
11056 (lambda _
11057 (substitute* "Makefile"
482959ff
RW
11058 (("htslib/libhts.a lz4/lib/liblz4.a")
11059 "-L-lhts -L-llz4")
5ded35d8
RW
11060 ((" htslib-static lz4-static") ""))
11061 #t))
11062 (replace 'install
11063 (lambda* (#:key outputs #:allow-other-keys)
11064 (let* ((out (assoc-ref outputs "out"))
11065 (bin (string-append out "/bin")))
11066 (mkdir-p bin)
11067 (install-file "build/sambamba" bin)
11068 #t))))))
11069 (native-inputs
11070 `(("ldc" ,ldc)
11071 ("rdmd" ,rdmd)
482959ff 11072 ("python" ,python2-minimal)
5ded35d8 11073 ("biod"
482959ff 11074 ,(let ((commit "c778e4f2d8bacea7499283ce39f5577b232732c6"))
5ded35d8
RW
11075 (origin
11076 (method git-fetch)
11077 (uri (git-reference
11078 (url "https://github.com/biod/BioD.git")
11079 (commit commit)))
11080 (file-name (string-append "biod-"
11081 (string-take commit 9)
11082 "-checkout"))
11083 (sha256
11084 (base32
482959ff
RW
11085 "1z90562hg47i63gx042wb3ak2vqjg5z7hwgn9bp2pdxfg3nxrw37")))))
11086 ("undead"
11087 ,(let ((commit "92803d25c88657e945511f0976a0c79d8da46e89"))
11088 (origin
11089 (method git-fetch)
11090 (uri (git-reference
11091 (url "https://github.com/dlang/undeaD.git")
11092 (commit commit)))
11093 (file-name (string-append "undead-"
11094 (string-take commit 9)
11095 "-checkout"))
11096 (sha256
11097 (base32
11098 "0vq6n81vzqvgphjw54lz2isc1j8lcxwjdbrhqz1h5gwrvw9w5138")))))))
5ded35d8
RW
11099 (inputs
11100 `(("lz4" ,lz4)
11101 ("htslib" ,htslib-for-sambamba)))
11102 (home-page "http://lomereiter.github.io/sambamba")
11103 (synopsis "Tools for working with SAM/BAM data")
11104 (description "Sambamba is a high performance modern robust and
11105fast tool (and library), written in the D programming language, for
11106working with SAM and BAM files. Current parallelised functionality is
11107an important subset of samtools functionality, including view, index,
11108sort, markdup, and depth.")
11109 (license license:gpl2+)))
b8fffd3a
RW
11110
11111(define-public ritornello
11112 (package
11113 (name "ritornello")
11114 (version "1.0.0")
11115 (source (origin
11116 (method url-fetch)
11117 (uri (string-append "https://github.com/KlugerLab/"
11118 "Ritornello/archive/v"
11119 version ".tar.gz"))
11120 (file-name (string-append name "-" version ".tar.gz"))
11121 (sha256
11122 (base32
11123 "02nik86gq9ljjriv6pamwlmqnfky3ads1fpklx6mc3hx6k40pg38"))))
11124 (build-system gnu-build-system)
11125 (arguments
11126 `(#:tests? #f ; there are no tests
11127 #:phases
11128 (modify-phases %standard-phases
11129 (add-after 'unpack 'patch-samtools-references
11130 (lambda* (#:key inputs #:allow-other-keys)
11131 (substitute* '("src/SamStream.h"
11132 "src/BufferedGenomeReader.h")
11133 (("<sam.h>") "<samtools/sam.h>"))
11134 #t))
11135 (delete 'configure)
11136 (replace 'install
11137 (lambda* (#:key inputs outputs #:allow-other-keys)
11138 (let* ((out (assoc-ref outputs "out"))
11139 (bin (string-append out "/bin/")))
11140 (mkdir-p bin)
11141 (install-file "bin/Ritornello" bin)
11142 #t))))))
11143 (inputs
11144 `(("samtools" ,samtools-0.1)
11145 ("fftw" ,fftw)
11146 ("boost" ,boost)
11147 ("zlib" ,zlib)))
11148 (home-page "https://github.com/KlugerLab/Ritornello")
11149 (synopsis "Control-free peak caller for ChIP-seq data")
11150 (description "Ritornello is a ChIP-seq peak calling algorithm based on
11151signal processing that can accurately call binding events without the need to
11152do a pair total DNA input or IgG control sample. It has been tested for use
11153with narrow binding events such as transcription factor ChIP-seq.")
11154 (license license:gpl3+)))
44f6c889
RW
11155
11156(define-public trim-galore
11157 (package
11158 (name "trim-galore")
f895a7a4 11159 (version "0.4.5")
44f6c889
RW
11160 (source
11161 (origin
f895a7a4
RW
11162 (method git-fetch)
11163 (uri (git-reference
11164 (url "https://github.com/FelixKrueger/TrimGalore.git")
11165 (commit version)))
11166 (file-name (string-append name "-" version "-checkout"))
44f6c889
RW
11167 (sha256
11168 (base32
f895a7a4 11169 "0x5892l48c816pf00wmnz5vq0zq6170d3xc8zrxncd4jcz7h1p71"))))
44f6c889
RW
11170 (build-system gnu-build-system)
11171 (arguments
11172 `(#:tests? #f ; no tests
11173 #:phases
11174 (modify-phases %standard-phases
44f6c889
RW
11175 (delete 'configure)
11176 (delete 'build)
11177 (add-after 'unpack 'hardcode-tool-references
11178 (lambda* (#:key inputs #:allow-other-keys)
11179 (substitute* "trim_galore"
11180 (("\\$path_to_cutadapt = 'cutadapt'")
11181 (string-append "$path_to_cutadapt = '"
11182 (assoc-ref inputs "cutadapt")
11183 "/bin/cutadapt'"))
11184 (("\\| gzip")
11185 (string-append "| "
11186 (assoc-ref inputs "gzip")
11187 "/bin/gzip"))
11188 (("\"gunzip")
11189 (string-append "\""
11190 (assoc-ref inputs "gzip")
11191 "/bin/gunzip")))
11192 #t))
11193 (replace 'install
11194 (lambda* (#:key outputs #:allow-other-keys)
11195 (let ((bin (string-append (assoc-ref outputs "out")
11196 "/bin")))
11197 (mkdir-p bin)
11198 (install-file "trim_galore" bin)
11199 #t))))))
11200 (inputs
11201 `(("gzip" ,gzip)
11202 ("perl" ,perl)
11203 ("cutadapt" ,cutadapt)))
11204 (native-inputs
11205 `(("unzip" ,unzip)))
11206 (home-page "http://www.bioinformatics.babraham.ac.uk/projects/trim_galore/")
11207 (synopsis "Wrapper around Cutadapt and FastQC")
11208 (description "Trim Galore! is a wrapper script to automate quality and
11209adapter trimming as well as quality control, with some added functionality to
11210remove biased methylation positions for RRBS sequence files.")
11211 (license license:gpl3+)))
3420c905
RW
11212
11213(define-public gess
11214 (package
11215 (name "gess")
11216 (version "1.0")
11217 (source (origin
11218 (method url-fetch)
11219 (uri (string-append "http://compbio.uthscsa.edu/"
11220 "GESS_Web/files/"
11221 "gess-" version ".src.tar.gz"))
11222 (sha256
11223 (base32
11224 "0hyk403kxscclzfs24pvdgiv0wm03kjcziqdrp5w46cb049gz0d7"))))
11225 (build-system gnu-build-system)
11226 (arguments
11227 `(#:tests? #f ; no tests
11228 #:phases
11229 (modify-phases %standard-phases
11230 (delete 'configure)
11231 (delete 'build)
11232 (replace 'install
11233 (lambda* (#:key inputs outputs #:allow-other-keys)
11234 (let* ((python (assoc-ref inputs "python"))
11235 (out (assoc-ref outputs "out"))
11236 (bin (string-append out "/bin/"))
11237 (target (string-append
11238 out "/lib/python2.7/site-packages/gess/")))
11239 (mkdir-p target)
11240 (copy-recursively "." target)
11241 ;; Make GESS.py executable
11242 (chmod (string-append target "GESS.py") #o555)
11243 ;; Add Python shebang to the top and make Matplotlib
11244 ;; usable.
11245 (substitute* (string-append target "GESS.py")
11246 (("\"\"\"Description:" line)
11247 (string-append "#!" (which "python") "
11248import matplotlib
11249matplotlib.use('Agg')
11250" line)))
11251 ;; Make sure GESS has all modules in its path
11252 (wrap-program (string-append target "GESS.py")
11253 `("PYTHONPATH" ":" prefix (,target ,(getenv "PYTHONPATH"))))
11254 (mkdir-p bin)
11255 (symlink (string-append target "GESS.py")
11256 (string-append bin "GESS.py"))
11257 #t))))))
11258 (inputs
11259 `(("python" ,python-2)
11260 ("python2-pysam" ,python2-pysam)
11261 ("python2-scipy" ,python2-scipy)
11262 ("python2-numpy" ,python2-numpy)
11263 ("python2-networkx" ,python2-networkx)
11264 ("python2-biopython" ,python2-biopython)))
11265 (home-page "http://compbio.uthscsa.edu/GESS_Web/")
11266 (synopsis "Detect exon-skipping events from raw RNA-seq data")
11267 (description
11268 "GESS is an implementation of a novel computational method to detect de
11269novo exon-skipping events directly from raw RNA-seq data without the prior
11270knowledge of gene annotation information. GESS stands for the graph-based
11271exon-skipping scanner detection scheme.")
11272 (license license:bsd-3)))
282b0151
RW
11273
11274(define-public phylip
11275 (package
11276 (name "phylip")
11277 (version "3.696")
11278 (source
11279 (origin
11280 (method url-fetch)
11281 (uri (string-append "http://evolution.gs.washington.edu/phylip/"
11282 "download/phylip-" version ".tar.gz"))
11283 (sha256
11284 (base32
11285 "01jar1rayhr2gba2pgbw49m56rc5z4p5wn3ds0m188hrlln4a2nd"))))
11286 (build-system gnu-build-system)
11287 (arguments
11288 `(#:tests? #f ; no check target
11289 #:make-flags (list "-f" "Makefile.unx" "install")
11290 #:parallel-build? #f ; not supported
11291 #:phases
11292 (modify-phases %standard-phases
11293 (add-after 'unpack 'enter-dir
11294 (lambda _ (chdir "src") #t))
11295 (delete 'configure)
11296 (replace 'install
11297 (lambda* (#:key inputs outputs #:allow-other-keys)
11298 (let ((target (string-append (assoc-ref outputs "out")
11299 "/bin")))
11300 (mkdir-p target)
11301 (for-each (lambda (file)
11302 (install-file file target))
11303 (find-files "../exe" ".*")))
11304 #t)))))
11305 (home-page "http://evolution.genetics.washington.edu/phylip/")
11306 (synopsis "Tools for inferring phylogenies")
11307 (description "PHYLIP (the PHYLogeny Inference Package) is a package of
11308programs for inferring phylogenies (evolutionary trees).")
11309 (license license:bsd-2)))
aa163424
RW
11310
11311(define-public imp
11312 (package
11313 (name "imp")
11314 (version "2.6.2")
11315 (source
11316 (origin
11317 (method url-fetch)
11318 (uri (string-append "https://integrativemodeling.org/"
11319 version "/download/imp-" version ".tar.gz"))
11320 (sha256
11321 (base32
11322 "0lxqx7vh79d771svr611dkilp6sn30qrbw8zvscbrm37v38d2j6h"))))
11323 (build-system cmake-build-system)
11324 (arguments
11325 `(;; FIXME: Some tests fail because they produce warnings, others fail
11326 ;; because the PYTHONPATH does not include the modeller's directory.
677bc34d 11327 #:tests? #f))
aa163424
RW
11328 (inputs
11329 `(("boost" ,boost)
11330 ("gsl" ,gsl)
11331 ("swig" ,swig)
11332 ("hdf5" ,hdf5)
11333 ("fftw" ,fftw)
11334 ("python" ,python-2)))
11335 (propagated-inputs
11336 `(("python2-numpy" ,python2-numpy)
11337 ("python2-scipy" ,python2-scipy)
11338 ("python2-pandas" ,python2-pandas)
11339 ("python2-scikit-learn" ,python2-scikit-learn)
11340 ("python2-networkx" ,python2-networkx)))
11341 (home-page "https://integrativemodeling.org")
11342 (synopsis "Integrative modeling platform")
11343 (description "IMP's broad goal is to contribute to a comprehensive
11344structural characterization of biomolecules ranging in size and complexity
11345from small peptides to large macromolecular assemblies, by integrating data
11346from diverse biochemical and biophysical experiments. IMP provides a C++ and
11347Python toolbox for solving complex modeling problems, and a number of
11348applications for tackling some common problems in a user-friendly way.")
11349 ;; IMP is largely available under the GNU Lesser GPL; see the file
11350 ;; COPYING.LGPL for the full text of this license. Some IMP modules are
11351 ;; available under the GNU GPL (see the file COPYING.GPL).
11352 (license (list license:lgpl2.1+
11353 license:gpl3+))))
8befd094
RW
11354
11355(define-public tadbit
11356 (package
11357 (name "tadbit")
11358 (version "0.2")
11359 (source (origin
11360 (method url-fetch)
11361 (uri (string-append "https://github.com/3DGenomes/TADbit/"
11362 "archive/v" version ".tar.gz"))
11363 (file-name (string-append name "-" version ".tar.gz"))
11364 (sha256
11365 (base32
11366 "1cnfqrl4685zar4nnw94j94nhvl2h29jm448nadqi1h05z6fdk4f"))))
11367 (build-system python-build-system)
11368 (arguments
11369 `(;; Tests are included and must be run after installation, but
11370 ;; they are incomplete and thus cannot be run.
11371 #:tests? #f
11372 #:python ,python-2
11373 #:phases
11374 (modify-phases %standard-phases
11375 (add-after 'unpack 'fix-problems-with-setup.py
11376 (lambda* (#:key outputs #:allow-other-keys)
11377 ;; setup.py opens these files for writing
11378 (chmod "_pytadbit/_version.py" #o664)
11379 (chmod "README.rst" #o664)
11380
11381 ;; Don't attempt to install the bash completions to
11382 ;; the home directory.
11383 (rename-file "extras/.bash_completion"
11384 "extras/tadbit")
11385 (substitute* "setup.py"
11386 (("\\(path.expanduser\\('~'\\)")
11387 (string-append "(\""
11388 (assoc-ref outputs "out")
11389 "/etc/bash_completion.d\""))
11390 (("extras/\\.bash_completion")
11391 "extras/tadbit"))
11392 #t)))))
11393 (inputs
11394 ;; TODO: add Chimera for visualization
11395 `(("imp" ,imp)
11396 ("mcl" ,mcl)
11397 ("python2-scipy" ,python2-scipy)
11398 ("python2-numpy" ,python2-numpy)
11399 ("python2-matplotlib" ,python2-matplotlib)
11400 ("python2-pysam" ,python2-pysam)))
eeb8b0e6 11401 (home-page "https://3dgenomes.github.io/TADbit/")
8befd094
RW
11402 (synopsis "Analyze, model, and explore 3C-based data")
11403 (description
11404 "TADbit is a complete Python library to deal with all steps to analyze,
11405model, and explore 3C-based data. With TADbit the user can map FASTQ files to
11406obtain raw interaction binned matrices (Hi-C like matrices), normalize and
a147fadd 11407correct interaction matrices, identify and compare the so-called
8befd094
RW
11408@dfn{Topologically Associating Domains} (TADs), build 3D models from the
11409interaction matrices, and finally, extract structural properties from the
11410models. TADbit is complemented by TADkit for visualizing 3D models.")
11411 (license license:gpl3+)))
94820951
RW
11412
11413(define-public kentutils
11414 (package
11415 (name "kentutils")
11416 ;; 302.1.0 is out, but the only difference is the inclusion of
11417 ;; pre-built binaries.
11418 (version "302.0.0")
11419 (source
11420 (origin
11421 (method url-fetch)
11422 (uri (string-append "https://github.com/ENCODE-DCC/kentUtils/"
11423 "archive/v" version ".tar.gz"))
11424 (file-name (string-append name "-" version ".tar.gz"))
11425 (sha256
11426 (base32
11427 "134aja3k1cj32kbk1nnw0q9gxjb2krr15q6sga8qldzvc0585rmm"))
11428 (modules '((guix build utils)
11429 (srfi srfi-26)
11430 (ice-9 ftw)))
11431 (snippet
11432 '(begin
11433 ;; Only the contents of the specified directories are free
11434 ;; for all uses, so we remove the rest. "hg/autoSql" and
11435 ;; "hg/autoXml" are nominally free, but they depend on a
11436 ;; library that is built from the sources in "hg/lib",
11437 ;; which is nonfree.
11438 (let ((free (list "." ".."
11439 "utils" "lib" "inc" "tagStorm"
11440 "parasol" "htslib"))
11441 (directory? (lambda (file)
11442 (eq? 'directory (stat:type (stat file))))))
11443 (for-each (lambda (file)
11444 (and (directory? file)
11445 (delete-file-recursively file)))
11446 (map (cut string-append "src/" <>)
11447 (scandir "src"
11448 (lambda (file)
11449 (not (member file free)))))))
11450 ;; Only make the utils target, not the userApps target,
11451 ;; because that requires libraries we won't build.
11452 (substitute* "Makefile"
11453 ((" userApps") " utils"))
11454 ;; Only build libraries that are free.
11455 (substitute* "src/makefile"
11456 (("DIRS =.*") "DIRS =\n")
11457 (("cd jkOwnLib.*") "")
11458 ((" hgLib") "")
11459 (("cd hg.*") ""))
11460 (substitute* "src/utils/makefile"
11461 ;; These tools depend on "jkhgap.a", which is part of the
11462 ;; nonfree "src/hg/lib" directory.
11463 (("raSqlQuery") "")
11464 (("pslLiftSubrangeBlat") "")
11465
11466 ;; Do not build UCSC tools, which may require nonfree
11467 ;; components.
11468 (("ALL_APPS =.*") "ALL_APPS = $(UTILS_APPLIST)\n"))
11469 #t))))
11470 (build-system gnu-build-system)
11471 (arguments
11472 `( ;; There is no global test target and the test target for
11473 ;; individual tools depends on input files that are not
11474 ;; included.
11475 #:tests? #f
11476 #:phases
11477 (modify-phases %standard-phases
11478 (add-after 'unpack 'fix-paths
11479 (lambda _
11480 (substitute* "Makefile"
11481 (("/bin/echo") (which "echo")))
11482 #t))
11483 (add-after 'unpack 'prepare-samtabix
11484 (lambda* (#:key inputs #:allow-other-keys)
11485 (copy-recursively (assoc-ref inputs "samtabix")
11486 "samtabix")
11487 #t))
11488 (delete 'configure)
11489 (replace 'install
11490 (lambda* (#:key outputs #:allow-other-keys)
11491 (let ((bin (string-append (assoc-ref outputs "out")
11492 "/bin")))
11493 (copy-recursively "bin" bin))
11494 #t)))))
11495 (native-inputs
11496 `(("samtabix"
11497 ,(origin
11498 (method git-fetch)
11499 (uri (git-reference
11500 (url "http://genome-source.cse.ucsc.edu/samtabix.git")
11501 (commit "10fd107909c1ac4d679299908be4262a012965ba")))
11502 (sha256
11503 (base32
11504 "0c1nj64l42v395sa84n7az43xiap4i6f9n9dfz4058aqiwkhkmma"))))))
11505 (inputs
11506 `(("zlib" ,zlib)
11507 ("tcsh" ,tcsh)
11508 ("perl" ,perl)
11509 ("libpng" ,libpng)
80420f11 11510 ("mariadb" ,mariadb)
94820951
RW
11511 ("openssl" ,openssl)))
11512 (home-page "http://genome.cse.ucsc.edu/index.html")
11513 (synopsis "Assorted bioinformatics utilities")
11514 (description "This package provides the kentUtils, a selection of
11515bioinformatics utilities used in combination with the UCSC genome
11516browser.")
11517 ;; Only a subset of the sources are released under a non-copyleft
11518 ;; free software license. All other sources are removed in a
11519 ;; snippet. See this bug report for an explanation of how the
11520 ;; license statements apply:
11521 ;; https://github.com/ENCODE-DCC/kentUtils/issues/12
11522 (license (license:non-copyleft
11523 "http://genome.ucsc.edu/license/"
11524 "The contents of this package are free for all uses."))))
7ceb0a83
RW
11525
11526(define-public f-seq
11527 (let ((commit "6ccded34cff38cf432deed8503648b4a66953f9b")
11528 (revision "1"))
11529 (package
11530 (name "f-seq")
c6643f2d 11531 (version (string-append "1.1-" revision "." (string-take commit 7)))
7ceb0a83
RW
11532 (source (origin
11533 (method git-fetch)
11534 (uri (git-reference
11535 (url "https://github.com/aboyle/F-seq.git")
11536 (commit commit)))
11537 (file-name (string-append name "-" version))
11538 (sha256
11539 (base32
11540 "1nk33k0yajg2id4g59bc4szr58r2q6pdq42vgcw054m8ip9wv26h"))
11541 (modules '((guix build utils)))
11542 ;; Remove bundled Java library archives.
11543 (snippet
11544 '(begin
11545 (for-each delete-file (find-files "lib" ".*"))
11546 #t))))
11547 (build-system ant-build-system)
11548 (arguments
11549 `(#:tests? #f ; no tests included
11550 #:phases
11551 (modify-phases %standard-phases
11552 (replace 'install
c0770fee 11553 (lambda* (#:key inputs outputs #:allow-other-keys)
7ceb0a83 11554 (let* ((target (assoc-ref outputs "out"))
c61d8226
TGR
11555 (bin (string-append target "/bin"))
11556 (doc (string-append target "/share/doc/f-seq"))
11557 (lib (string-append target "/lib")))
7ceb0a83
RW
11558 (mkdir-p target)
11559 (mkdir-p doc)
11560 (substitute* "bin/linux/fseq"
c0770fee
RW
11561 (("java") (which "java"))
11562 (("\\$REALDIR/../lib/commons-cli-1.1.jar")
11563 (string-append (assoc-ref inputs "java-commons-cli")
11564 "/share/java/commons-cli.jar"))
11565 (("REALDIR=.*")
c61d8226 11566 (string-append "REALDIR=" bin "\n")))
7ceb0a83 11567 (install-file "README.txt" doc)
c61d8226
TGR
11568 (install-file "bin/linux/fseq" bin)
11569 (install-file "build~/fseq.jar" lib)
11570 (copy-recursively "lib" lib)
7ceb0a83
RW
11571 #t))))))
11572 (inputs
11573 `(("perl" ,perl)
11574 ("java-commons-cli" ,java-commons-cli)))
11575 (home-page "http://fureylab.web.unc.edu/software/fseq/")
11576 (synopsis "Feature density estimator for high-throughput sequence tags")
11577 (description
11578 "F-Seq is a software package that generates a continuous tag sequence
11579density estimation allowing identification of biologically meaningful sites
11580such as transcription factor binding sites (ChIP-seq) or regions of open
11581chromatin (DNase-seq). Output can be displayed directly in the UCSC Genome
11582Browser.")
11583 (license license:gpl3+))))
96348048
RW
11584
11585(define-public bismark
11586 (package
11587 (name "bismark")
126c9e3f 11588 (version "0.19.1")
96348048
RW
11589 (source
11590 (origin
126c9e3f
RW
11591 (method git-fetch)
11592 (uri (git-reference
11593 (url "https://github.com/FelixKrueger/Bismark.git")
11594 (commit version)))
11595 (file-name (string-append name "-" version "-checkout"))
96348048
RW
11596 (sha256
11597 (base32
126c9e3f
RW
11598 "0yb5l36slwg02fp4b1jdlplgljcsxgqfzvzihzdnphd87dghcc84"))
11599 (snippet
11600 '(begin
11601 ;; highcharts.js is non-free software. The code is available under
11602 ;; CC-BY-NC or proprietary licenses only.
11603 (delete-file "bismark_sitrep/highcharts.js")
11604 #t))))
96348048
RW
11605 (build-system perl-build-system)
11606 (arguments
126c9e3f 11607 `(#:tests? #f ; there are no tests
96348048
RW
11608 #:phases
11609 (modify-phases %standard-phases
11610 (delete 'configure)
11611 (delete 'build)
11612 (replace 'install
09658903 11613 (lambda* (#:key inputs outputs #:allow-other-keys)
126c9e3f
RW
11614 (let* ((out (assoc-ref outputs "out"))
11615 (bin (string-append out "/bin"))
11616 (share (string-append out "/share/bismark"))
11617 (docdir (string-append out "/share/doc/bismark"))
11618 (docs '("Docs/Bismark_User_Guide.html"))
11619 (scripts '("bismark"
11620 "bismark_genome_preparation"
11621 "bismark_methylation_extractor"
11622 "bismark2bedGraph"
11623 "bismark2report"
11624 "coverage2cytosine"
11625 "deduplicate_bismark"
11626 "filter_non_conversion"
11627 "bam2nuc"
11628 "bismark2summary")))
0f5ad3a1
RW
11629 (substitute* "bismark2report"
11630 (("\\$RealBin/bismark_sitrep")
11631 (string-append share "/bismark_sitrep")))
126c9e3f 11632 (mkdir-p share)
96348048
RW
11633 (mkdir-p docdir)
11634 (mkdir-p bin)
11635 (for-each (lambda (file) (install-file file bin))
11636 scripts)
11637 (for-each (lambda (file) (install-file file docdir))
11638 docs)
126c9e3f 11639 (copy-recursively "Docs/Images" (string-append docdir "/Images"))
126c9e3f
RW
11640 (copy-recursively "bismark_sitrep"
11641 (string-append share "/bismark_sitrep"))
11642
09658903
RW
11643 ;; Fix references to gunzip
11644 (substitute* (map (lambda (file)
11645 (string-append bin "/" file))
11646 scripts)
11647 (("\"gunzip -c")
11648 (string-append "\"" (assoc-ref inputs "gzip")
11649 "/bin/gunzip -c")))
96348048 11650 #t))))))
09658903
RW
11651 (inputs
11652 `(("gzip" ,gzip)))
96348048
RW
11653 (home-page "http://www.bioinformatics.babraham.ac.uk/projects/bismark/")
11654 (synopsis "Map bisulfite treated sequence reads and analyze methylation")
11655 (description "Bismark is a program to map bisulfite treated sequencing
11656reads to a genome of interest and perform methylation calls in a single step.
11657The output can be easily imported into a genome viewer, such as SeqMonk, and
11658enables a researcher to analyse the methylation levels of their samples
11659straight away. Its main features are:
11660
11661@itemize
11662@item Bisulfite mapping and methylation calling in one single step
11663@item Supports single-end and paired-end read alignments
11664@item Supports ungapped and gapped alignments
11665@item Alignment seed length, number of mismatches etc are adjustable
11666@item Output discriminates between cytosine methylation in CpG, CHG
11667 and CHH context
11668@end itemize\n")
11669 (license license:gpl3+)))
dc8ddbfb
RW
11670
11671(define-public paml
11672 (package
11673 (name "paml")
11674 (version "4.9e")
11675 (source (origin
11676 (method url-fetch)
11677 (uri (string-append "http://abacus.gene.ucl.ac.uk/software/"
11678 "paml" version ".tgz"))
11679 (sha256
11680 (base32
11681 "13zf6h9fiqghwhch2h06x1zdr6s42plsnqahflp5g7myr3han3s6"))
11682 (modules '((guix build utils)))
11683 ;; Remove Windows binaries
11684 (snippet
11685 '(begin
11686 (for-each delete-file (find-files "." "\\.exe$"))
11687 #t))))
11688 (build-system gnu-build-system)
11689 (arguments
11690 `(#:tests? #f ; there are no tests
11691 #:make-flags '("CC=gcc")
11692 #:phases
11693 (modify-phases %standard-phases
11694 (replace 'configure
11695 (lambda _
11696 (substitute* "src/BFdriver.c"
11697 (("/bin/bash") (which "bash")))
11698 (chdir "src")
11699 #t))
11700 (replace 'install
11701 (lambda* (#:key outputs #:allow-other-keys)
11702 (let ((tools '("baseml" "basemlg" "codeml"
11703 "pamp" "evolver" "yn00" "chi2"))
11704 (bin (string-append (assoc-ref outputs "out") "/bin"))
11705 (docdir (string-append (assoc-ref outputs "out")
11706 "/share/doc/paml")))
11707 (mkdir-p bin)
11708 (for-each (lambda (file) (install-file file bin)) tools)
11709 (copy-recursively "../doc" docdir)
11710 #t))))))
11711 (home-page "http://abacus.gene.ucl.ac.uk/software/paml.html")
11712 (synopsis "Phylogentic analysis by maximum likelihood")
11713 (description "PAML (for Phylogentic Analysis by Maximum Likelihood)
11714contains a few programs for model fitting and phylogenetic tree reconstruction
11715using nucleotide or amino-acid sequence data.")
11716 ;; GPLv3 only
11717 (license license:gpl3)))
d112e5a8
RW
11718
11719(define-public kallisto
11720 (package
11721 (name "kallisto")
11722 (version "0.43.1")
11723 (source (origin
11724 (method url-fetch)
11725 (uri (string-append "https://github.com/pachterlab/"
11726 "kallisto/archive/v" version ".tar.gz"))
11727 (file-name (string-append name "-" version ".tar.gz"))
11728 (sha256
11729 (base32
11730 "03j3iqhvq7ya3c91gidly3k3jvgm97vjq4scihrlxh315j696r11"))))
11731 (build-system cmake-build-system)
11732 (arguments `(#:tests? #f)) ; no "check" target
11733 (inputs
11734 `(("hdf5" ,hdf5)
11735 ("zlib" ,zlib)))
11736 (home-page "http://pachterlab.github.io/kallisto/")
11737 (synopsis "Near-optimal RNA-Seq quantification")
11738 (description
11739 "Kallisto is a program for quantifying abundances of transcripts from
11740RNA-Seq data, or more generally of target sequences using high-throughput
11741sequencing reads. It is based on the novel idea of pseudoalignment for
11742rapidly determining the compatibility of reads with targets, without the need
11743for alignment. Pseudoalignment of reads preserves the key information needed
11744for quantification, and kallisto is therefore not only fast, but also as
11745accurate as existing quantification tools.")
11746 (license license:bsd-2)))
c3b2ab9d
RW
11747
11748(define-public libgff
11749 (package
11750 (name "libgff")
11751 (version "1.0")
11752 (source (origin
11753 (method url-fetch)
11754 (uri (string-append
11755 "https://github.com/Kingsford-Group/"
11756 "libgff/archive/v" version ".tar.gz"))
11757 (file-name (string-append name "-" version ".tar.gz"))
11758 (sha256
11759 (base32
11760 "0vc4nxyhlm6g9vvmx5l4lfs5pnvixsv1hiiy4kddf2y3p6jna8ls"))))
11761 (build-system cmake-build-system)
11762 (arguments `(#:tests? #f)) ; no tests included
11763 (home-page "https://github.com/Kingsford-Group/libgff")
11764 (synopsis "Parser library for reading/writing GFF files")
11765 (description "This is a simple \"libraryfication\" of the GFF/GTF parsing
11766code that is used in the Cufflinks codebase. The goal of this library is to
11767provide this functionality without the necessity of drawing in a heavy-weight
11768dependency like SeqAn.")
a2cfe190 11769 (license (license:x11-style "https://www.boost.org/LICENSE_1_0.txt"))))
1fe0bde3
RW
11770
11771(define-public libdivsufsort
11772 (package
11773 (name "libdivsufsort")
11774 (version "2.0.1")
11775 (source (origin
11776 (method git-fetch)
11777 (uri (git-reference
11778 (url "https://github.com/y-256/libdivsufsort.git")
11779 (commit version)))
d9a069c2 11780 (file-name (git-file-name name version))
1fe0bde3
RW
11781 (sha256
11782 (base32
11783 "0fgdz9fzihlvjjrxy01md1bv9vh12rkgkwbm90b1hj5xpbaqp7z2"))))
11784 (build-system cmake-build-system)
11785 (arguments
11786 '(#:tests? #f ; there are no tests
11787 #:configure-flags
11788 ;; Needed for rapmap and sailfish.
11789 '("-DBUILD_DIVSUFSORT64=ON")))
11790 (home-page "https://github.com/y-256/libdivsufsort")
11791 (synopsis "Lightweight suffix-sorting library")
11792 (description "libdivsufsort is a software library that implements a
11793lightweight suffix array construction algorithm. This library provides a
11794simple and an efficient C API to construct a suffix array and a
11795Burrows-Wheeler transformed string from a given string over a constant-size
11796alphabet. The algorithm runs in O(n log n) worst-case time using only 5n+O(1)
11797bytes of memory space, where n is the length of the string.")
11798 (license license:expat)))
8d77a085
RW
11799
11800(define-public sailfish
11801 (package
11802 (name "sailfish")
11803 (version "0.10.1")
11804 (source (origin
11805 (method url-fetch)
11806 (uri
11807 (string-append "https://github.com/kingsfordgroup/"
11808 "sailfish/archive/v" version ".tar.gz"))
11809 (file-name (string-append name "-" version ".tar.gz"))
11810 (sha256
11811 (base32
11812 "1inn60dxiwsz8g9w7kvfhjxj4bwfb0r12dyhpzzhfbig712dkmm0"))
11813 (modules '((guix build utils)))
11814 (snippet
11815 '(begin
11816 ;; Delete bundled headers for eigen3.
11817 (delete-file-recursively "include/eigen3/")
11818 #t))))
11819 (build-system cmake-build-system)
11820 (arguments
11821 `(#:configure-flags
11822 (list (string-append "-DBOOST_INCLUDEDIR="
11823 (assoc-ref %build-inputs "boost")
11824 "/include/")
11825 (string-append "-DBOOST_LIBRARYDIR="
11826 (assoc-ref %build-inputs "boost")
11827 "/lib/")
11828 (string-append "-DBoost_LIBRARIES="
11829 "-lboost_iostreams "
11830 "-lboost_filesystem "
11831 "-lboost_system "
11832 "-lboost_thread "
11833 "-lboost_timer "
11834 "-lboost_chrono "
11835 "-lboost_program_options")
11836 "-DBoost_FOUND=TRUE"
11837 ;; Don't download RapMap---we already have it!
11838 "-DFETCHED_RAPMAP=1")
11839 ;; Tests must be run after installation and the location of the test
11840 ;; data file must be overridden. But the tests fail. It looks like
11841 ;; they are not really meant to be run.
11842 #:tests? #f
11843 #:phases
11844 (modify-phases %standard-phases
11845 ;; Boost cannot be found, even though it's right there.
11846 (add-after 'unpack 'do-not-look-for-boost
11847 (lambda* (#:key inputs #:allow-other-keys)
11848 (substitute* "CMakeLists.txt"
11849 (("find_package\\(Boost 1\\.53\\.0") "#"))))
11850 (add-after 'unpack 'do-not-assign-to-macro
11851 (lambda _
11852 (substitute* "include/spdlog/details/format.cc"
11853 (("const unsigned CHAR_WIDTH = 1;") ""))))
11854 (add-after 'unpack 'prepare-rapmap
11855 (lambda* (#:key inputs #:allow-other-keys)
11856 (let ((src "external/install/src/rapmap/")
11857 (include "external/install/include/rapmap/")
11858 (rapmap (assoc-ref inputs "rapmap")))
11859 (mkdir-p "/tmp/rapmap")
11860 (system* "tar" "xf"
11861 (assoc-ref inputs "rapmap")
11862 "-C" "/tmp/rapmap"
11863 "--strip-components=1")
11864 (mkdir-p src)
11865 (mkdir-p include)
11866 (for-each (lambda (file)
11867 (install-file file src))
11868 (find-files "/tmp/rapmap/src" "\\.(c|cpp)"))
11869 (copy-recursively "/tmp/rapmap/include" include))))
11870 (add-after 'unpack 'use-system-libraries
11871 (lambda* (#:key inputs #:allow-other-keys)
11872 (substitute* '("src/SailfishIndexer.cpp"
11873 "src/SailfishUtils.cpp"
11874 "src/SailfishQuantify.cpp"
11875 "src/FASTAParser.cpp"
11876 "include/PCA.hpp"
11877 "include/SailfishUtils.hpp"
11878 "include/SailfishIndex.hpp"
11879 "include/CollapsedEMOptimizer.hpp"
11880 "src/CollapsedEMOptimizer.cpp")
11881 (("#include \"jellyfish/config.h\"") ""))
11882 (substitute* "src/CMakeLists.txt"
11883 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/include/jellyfish-2.2..")
11884 (string-append (assoc-ref inputs "jellyfish")
11885 "/include/jellyfish-" ,(package-version jellyfish)))
11886 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libjellyfish-2.0.a")
11887 (string-append (assoc-ref inputs "jellyfish")
11888 "/lib/libjellyfish-2.0.a"))
11889 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort.a")
11890 (string-append (assoc-ref inputs "libdivsufsort")
11891 "/lib/libdivsufsort.so"))
11892 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort64.a")
11893 (string-append (assoc-ref inputs "libdivsufsort")
11894 "/lib/libdivsufsort64.so")))
11895 (substitute* "CMakeLists.txt"
11896 ;; Don't prefer static libs
11897 (("SET\\(CMAKE_FIND_LIBRARY_SUFFIXES.*") "")
11898 (("find_package\\(Jellyfish.*") "")
11899 (("ExternalProject_Add\\(libjellyfish") "message(")
11900 (("ExternalProject_Add\\(libgff") "message(")
11901 (("ExternalProject_Add\\(libsparsehash") "message(")
11902 (("ExternalProject_Add\\(libdivsufsort") "message("))
11903
11904 ;; Ensure that Eigen headers can be found
11905 (setenv "CPLUS_INCLUDE_PATH"
11906 (string-append (getenv "CPLUS_INCLUDE_PATH")
11907 ":"
11908 (assoc-ref inputs "eigen")
11909 "/include/eigen3")))))))
11910 (inputs
11911 `(("boost" ,boost)
11912 ("eigen" ,eigen)
11913 ("jemalloc" ,jemalloc)
11914 ("jellyfish" ,jellyfish)
11915 ("sparsehash" ,sparsehash)
11916 ("rapmap" ,(origin
11917 (method git-fetch)
11918 (uri (git-reference
11919 (url "https://github.com/COMBINE-lab/RapMap.git")
11920 (commit (string-append "sf-v" version))))
11921 (file-name (string-append "rapmap-sf-v" version "-checkout"))
11922 (sha256
11923 (base32
11924 "1hv79l5i576ykv5a1srj2p0q36yvyl5966m0fcy2lbi169ipjakf"))
11925 (modules '((guix build utils)))
11926 ;; These files are expected to be excluded.
11927 (snippet
11928 '(begin (delete-file-recursively "include/spdlog")
11929 (for-each delete-file '("include/xxhash.h"
6cbee49d
MW
11930 "src/xxhash.c"))
11931 #t))))
8d77a085
RW
11932 ("libdivsufsort" ,libdivsufsort)
11933 ("libgff" ,libgff)
11934 ("tbb" ,tbb)
11935 ("zlib" ,zlib)))
11936 (native-inputs
11937 `(("pkg-config" ,pkg-config)))
11938 (home-page "http://www.cs.cmu.edu/~ckingsf/software/sailfish")
11939 (synopsis "Mapping-based isoform quantification from RNA-Seq reads")
11940 (description "Sailfish is a tool for genomic transcript quantification
11941from RNA-seq data. It requires a set of target transcripts (either from a
11942reference or de-novo assembly) to quantify. All you need to run sailfish is a
11943fasta file containing your reference transcripts and a (set of) fasta/fastq
11944file(s) containing your reads.")
11945 (license license:gpl3+)))
7762646d
RW
11946
11947(define libstadenio-for-salmon
11948 (package
11949 (name "libstadenio")
11950 (version "1.14.8")
11951 (source (origin
11952 (method git-fetch)
11953 (uri (git-reference
11954 (url "https://github.com/COMBINE-lab/staden-io_lib.git")
11955 (commit (string-append "v" version))))
11956 (file-name (string-append name "-" version "-checkout"))
11957 (sha256
11958 (base32
11959 "1x8kxxqxl892vwfbprlbyfwkkv7c34ggkc94892x9x0g37x5nbwx"))))
11960 (build-system gnu-build-system)
11961 (arguments '(#:parallel-tests? #f)) ; not supported
11962 (inputs
11963 `(("zlib" ,zlib)))
11964 (native-inputs
11965 `(("perl" ,perl))) ; for tests
11966 (home-page "https://github.com/COMBINE-lab/staden-io_lib")
11967 (synopsis "General purpose trace and experiment file library")
11968 (description "This package provides a library of file reading and writing
11969code to provide a general purpose Trace file (and Experiment File) reading
11970interface.
11971
11972The following file formats are supported:
11973
11974@enumerate
11975@item SCF trace files
11976@item ABI trace files
11977@item ALF trace files
11978@item ZTR trace files
11979@item SFF trace archives
11980@item SRF trace archives
11981@item Experiment files
11982@item Plain text files
11983@item SAM/BAM sequence files
11984@item CRAM sequence files
11985@end enumerate\n")
11986 (license license:bsd-3)))
11987
11988(define spdlog-for-salmon
11989 (package
11990 (name "spdlog")
11991 (version "0.14.0")
11992 (source (origin
11993 (method git-fetch)
11994 (uri (git-reference
11995 (url "https://github.com/COMBINE-lab/spdlog.git")
11996 (commit (string-append "v" version))))
11997 (file-name (string-append name "-" version "-checkout"))
11998 (sha256
11999 (base32
12000 "13730429gwlabi432ilpnja3sfvy0nn2719vnhhmii34xcdyc57q"))))
12001 (build-system cmake-build-system)
12002 (home-page "https://github.com/COMBINE-lab/spdlog")
12003 (synopsis "Very fast C++ logging library")
12004 (description "Spdlog is a very fast header-only C++ logging library with
12005performance as its primary goal.")
12006 (license license:expat)))
12007
12008;; This is a modified variant of bwa for use with Salmon. It installs a
12009;; library to avoid having to build this as part of Salmon.
12010(define bwa-for-salmon
12011 (package (inherit bwa)
12012 (name "bwa")
12013 (version "0.7.12.5")
12014 (source (origin
12015 (method git-fetch)
12016 (uri (git-reference
12017 (url "https://github.com/COMBINE-lab/bwa.git")
12018 (commit (string-append "v" version))))
12019 (file-name (string-append "bwa-for-salmon-" version "-checkout"))
12020 (sha256
12021 (base32
12022 "1z2qa64y0c5hky10510x137mnzlhz6k8qf27csw4w9j6qihq95gb"))))
12023 (build-system gnu-build-system)
12024 (arguments
12025 '(#:tests? #f ;no "check" target
12026 #:phases
12027 (modify-phases %standard-phases
12028 (replace 'install
12029 (lambda* (#:key outputs #:allow-other-keys)
12030 (let* ((out (assoc-ref outputs "out"))
12031 (bin (string-append out "/bin"))
12032 (lib (string-append out "/lib"))
12033 (doc (string-append out "/share/doc/bwa"))
12034 (man (string-append out "/share/man/man1"))
12035 (inc (string-append out "/include/bwa")))
12036 (install-file "bwa" bin)
12037 (install-file "README.md" doc)
12038 (install-file "bwa.1" man)
12039 (install-file "libbwa.a" lib)
12040 (mkdir-p lib)
12041 (mkdir-p inc)
12042 (for-each (lambda (file)
12043 (install-file file inc))
12044 (find-files "." "\\.h$")))
12045 #t))
12046 ;; no "configure" script
12047 (delete 'configure))))))
12048
12049(define-public salmon
12050 (package
12051 (name "salmon")
12052 (version "0.9.1")
12053 (source (origin
12054 (method git-fetch)
12055 (uri (git-reference
12056 (url "https://github.com/COMBINE-lab/salmon.git")
12057 (commit (string-append "v" version))))
12058 (file-name (string-append name "-" version "-checkout"))
12059 (sha256
12060 (base32
12061 "1zi1ff4i7y2ykk0vdzysgwzzzv166vg2x77pj1mf4baclavxj87a"))
12062 (modules '((guix build utils)))
12063 (snippet
12064 '(begin
12065 ;; Delete bundled headers for eigen3.
12066 (delete-file-recursively "include/eigen3/")
12067 #t))))
12068 (build-system cmake-build-system)
12069 (arguments
12070 `(#:configure-flags
12071 (list (string-append "-DBOOST_INCLUDEDIR="
12072 (assoc-ref %build-inputs "boost")
12073 "/include/")
12074 (string-append "-DBOOST_LIBRARYDIR="
12075 (assoc-ref %build-inputs "boost")
12076 "/lib/")
12077 (string-append "-DBoost_LIBRARIES="
12078 "-lboost_iostreams "
12079 "-lboost_filesystem "
12080 "-lboost_system "
12081 "-lboost_thread "
12082 "-lboost_timer "
12083 "-lboost_chrono "
12084 "-lboost_program_options")
12085 "-DBoost_FOUND=TRUE"
12086 "-DTBB_LIBRARIES=tbb tbbmalloc"
12087 ;; Don't download RapMap---we already have it!
12088 "-DFETCHED_RAPMAP=1")
12089 #:phases
12090 (modify-phases %standard-phases
12091 ;; Boost cannot be found, even though it's right there.
12092 (add-after 'unpack 'do-not-look-for-boost
12093 (lambda* (#:key inputs #:allow-other-keys)
12094 (substitute* "CMakeLists.txt"
12095 (("find_package\\(Boost 1\\.53\\.0") "#"))))
12096 (add-after 'unpack 'do-not-phone-home
12097 (lambda _
12098 (substitute* "src/Salmon.cpp"
12099 (("getVersionMessage\\(\\)") "\"\""))))
12100 (add-after 'unpack 'prepare-rapmap
12101 (lambda* (#:key inputs #:allow-other-keys)
12102 (let ((src "external/install/src/rapmap/")
12103 (include "external/install/include/rapmap/")
12104 (rapmap (assoc-ref inputs "rapmap")))
12105 (mkdir-p src)
12106 (mkdir-p include)
12107 (for-each (lambda (file)
12108 (install-file file src))
12109 (find-files (string-append rapmap "/src") "\\.(c|cpp)"))
12110 (copy-recursively (string-append rapmap "/include") include)
12111 (for-each delete-file '("external/install/include/rapmap/xxhash.h"
12112 "external/install/include/rapmap/FastxParser.hpp"
12113 "external/install/include/rapmap/concurrentqueue.h"
12114 "external/install/include/rapmap/FastxParserThreadUtils.hpp"
12115 "external/install/src/rapmap/FastxParser.cpp"
12116 "external/install/src/rapmap/xxhash.c")))))
12117 (add-after 'unpack 'use-system-libraries
12118 (lambda* (#:key inputs #:allow-other-keys)
12119 (substitute* "src/CMakeLists.txt"
12120 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/include/jellyfish-2.2..")
12121 (string-append (assoc-ref inputs "jellyfish")
12122 "/include/jellyfish-" ,(package-version jellyfish)))
12123 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libjellyfish-2.0.a")
12124 (string-append (assoc-ref inputs "jellyfish")
12125 "/lib/libjellyfish-2.0.a"))
12126 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort.a")
12127 (string-append (assoc-ref inputs "libdivsufsort")
12128 "/lib/libdivsufsort.so"))
12129 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libstaden-read.a")
12130 (string-append (assoc-ref inputs "libstadenio-for-salmon")
12131 "/lib/libstaden-read.a"))
12132 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libbwa.a")
12133 (string-append (assoc-ref inputs "bwa") "/lib/libbwa.a"))
12134 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort64.a")
12135 (string-append (assoc-ref inputs "libdivsufsort")
12136 "/lib/libdivsufsort64.so")))
12137 (substitute* "CMakeLists.txt"
12138 ;; Don't prefer static libs
12139 (("SET\\(CMAKE_FIND_LIBRARY_SUFFIXES.*") "")
12140 (("set\\(TBB_LIBRARIES") "message(")
12141 (("find_package\\(Jellyfish.*") "")
12142 (("ExternalProject_Add\\(libcereal") "message(")
12143 (("ExternalProject_Add\\(libbwa") "message(")
12144 (("ExternalProject_Add\\(libjellyfish") "message(")
12145 (("ExternalProject_Add\\(libgff") "message(")
12146 (("ExternalProject_Add\\(libtbb") "message(")
12147 (("ExternalProject_Add\\(libspdlog") "message(")
12148 (("ExternalProject_Add\\(libdivsufsort") "message(")
12149 (("ExternalProject_Add\\(libstadenio") "message(")
12150 (("ExternalProject_Add_Step\\(") "message("))
12151
12152 ;; Ensure that all headers can be found
12153 (setenv "CPLUS_INCLUDE_PATH"
12154 (string-append (getenv "CPLUS_INCLUDE_PATH")
12155 ":"
12156 (assoc-ref inputs "bwa")
12157 "/include/bwa"
12158 ":"
12159 (assoc-ref inputs "eigen")
12160 "/include/eigen3"))
12161 (setenv "CPATH"
12162 (string-append (assoc-ref inputs "bwa")
12163 "/include/bwa"
12164 ":"
12165 (assoc-ref inputs "eigen")
12166 "/include/eigen3"))
12167 #t))
12168 ;; CMAKE_INSTALL_PREFIX does not exist when the tests are
12169 ;; run. It only exists after the install phase.
12170 (add-after 'unpack 'fix-tests
12171 (lambda _
12172 (substitute* "src/CMakeLists.txt"
12173 (("DTOPLEVEL_DIR=\\$\\{CMAKE_INSTALL_PREFIX")
12174 "DTOPLEVEL_DIR=${GAT_SOURCE_DIR"))
12175 #t)))))
12176 (inputs
12177 `(("boost" ,boost)
12178 ("bwa" ,bwa-for-salmon)
12179 ("bzip2" ,bzip2)
12180 ("cereal" ,cereal)
12181 ("eigen" ,eigen)
12182 ("rapmap" ,(origin
12183 (method git-fetch)
12184 (uri (git-reference
12185 (url "https://github.com/COMBINE-lab/RapMap.git")
12186 (commit (string-append "salmon-v" version))))
12187 (file-name (string-append "rapmap-salmon-v" version "-checkout"))
12188 (sha256
12189 (base32
12190 "1yc12yqsz6f0r8sg1qnk57xg34aqwc9jbqq6gd5ys28xw3plj98p"))))
12191 ("jemalloc" ,jemalloc)
12192 ("jellyfish" ,jellyfish)
12193 ("libgff" ,libgff)
12194 ("tbb" ,tbb)
12195 ("libdivsufsort" ,libdivsufsort)
12196 ("libstadenio-for-salmon" ,libstadenio-for-salmon)
12197 ("spdlog-for-salmon" ,spdlog-for-salmon)
12198 ("xz" ,xz)
12199 ("zlib" ,zlib)))
12200 (home-page "https://github.com/COMBINE-lab/salmon")
12201 (synopsis "Quantification from RNA-seq reads using lightweight alignments")
12202 (description "Salmon is a program to produce highly-accurate,
12203transcript-level quantification estimates from RNA-seq data. Salmon achieves
12204its accuracy and speed via a number of different innovations, including the
12205use of lightweight alignments (accurate but fast-to-compute proxies for
12206traditional read alignments) and massively-parallel stochastic collapsed
12207variational inference.")
12208 (license license:gpl3+)))
152d30c3
RW
12209
12210(define-public python-loompy
12211 (package
12212 (name "python-loompy")
12213 (version "2.0.2")
12214 (source
12215 (origin
12216 (method url-fetch)
12217 (uri (pypi-uri "loompy" version))
12218 (sha256
12219 (base32
12220 "1drgv8j1hxqzzpnfg272x9djb6j8qr798w1pc2x8ikmfgyd9gh51"))))
12221 (build-system python-build-system)
12222 ;; There are no tests
12223 (arguments '(#:tests? #f))
12224 (propagated-inputs
12225 `(("python-h5py" ,python-h5py)
12226 ("python-numpy" ,python-numpy)
12227 ("python-scipy" ,python-scipy)
12228 ("python-typing" ,python-typing)))
12229 (home-page "https://github.com/linnarsson-lab/loompy")
12230 (synopsis "Work with .loom files for single-cell RNA-seq data")
12231 (description "The loom file format is an efficient format for very large
12232omics datasets, consisting of a main matrix, optional additional layers, a
12233variable number of row and column annotations. Loom also supports sparse
12234graphs. This library makes it easy to work with @file{.loom} files for
12235single-cell RNA-seq data.")
12236 (license license:bsd-3)))
5fef15e5 12237
f09d16e5
RW
12238;; We cannot use the latest commit because it requires Java 9.
12239(define-public java-forester
12240 (let ((commit "86b07efe302d5094b42deed9260f719a4c4ac2e6")
12241 (revision "1"))
12242 (package
12243 (name "java-forester")
12244 (version (string-append "0-" revision "." (string-take commit 7)))
12245 (source (origin
12246 (method git-fetch)
12247 (uri (git-reference
12248 (url "https://github.com/cmzmasek/forester.git")
12249 (commit commit)))
12250 (file-name (string-append name "-" version "-checkout"))
12251 (sha256
12252 (base32
12253 "0vxavc1yrf84yrnf20dq26hi0lglidk8d382xrxsy4qmlbjd276z"))
12254 (modules '((guix build utils)))
12255 (snippet
12256 '(begin
12257 ;; Delete bundled jars and pre-built classes
12258 (delete-file-recursively "forester/java/resources")
12259 (delete-file-recursively "forester/java/classes")
12260 (for-each delete-file (find-files "forester/java/" "\\.jar$"))
12261 ;; Delete bundled applications
12262 (delete-file-recursively "forester_applications")
12263 #t))))
12264 (build-system ant-build-system)
12265 (arguments
12266 `(#:tests? #f ; there are none
12267 #:jdk ,icedtea-8
12268 #:modules ((guix build ant-build-system)
12269 (guix build utils)
12270 (guix build java-utils)
12271 (sxml simple)
12272 (sxml transform))
12273 #:phases
12274 (modify-phases %standard-phases
12275 (add-after 'unpack 'chdir
12276 (lambda _ (chdir "forester/java") #t))
12277 (add-after 'chdir 'fix-dependencies
12278 (lambda _
12279 (chmod "build.xml" #o664)
12280 (call-with-output-file "build.xml.new"
12281 (lambda (port)
12282 (sxml->xml
12283 (pre-post-order
12284 (with-input-from-file "build.xml"
12285 (lambda _ (xml->sxml #:trim-whitespace? #t)))
12286 `(;; Remove all unjar tags to avoid repacking classes.
12287 (unjar . ,(lambda _ '()))
12288 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
12289 (*text* . ,(lambda (_ txt) txt))))
12290 port)))
12291 (rename-file "build.xml.new" "build.xml")
12292 #t))
12293 ;; FIXME: itext is difficult to package as it depends on a few
12294 ;; unpackaged libraries.
12295 (add-after 'chdir 'remove-dependency-on-unpackaged-itext
12296 (lambda _
12297 (delete-file "src/org/forester/archaeopteryx/PdfExporter.java")
12298 (substitute* "src/org/forester/archaeopteryx/MainFrame.java"
12299 (("pdf_written_to = PdfExporter.*")
12300 "throw new IOException(\"PDF export is not available.\");"))
12301 #t))
12302 ;; There is no install target
12303 (replace 'install (install-jars ".")))))
12304 (propagated-inputs
12305 `(("java-commons-codec" ,java-commons-codec)
12306 ("java-openchart2" ,java-openchart2)))
12307 (home-page "https://sites.google.com/site/cmzmasek/home/software/forester")
12308 (synopsis "Phylogenomics libraries for Java")
12309 (description "Forester is a collection of Java libraries for
12310phylogenomics and evolutionary biology research. It includes support for
12311reading, writing, and exporting phylogenetic trees.")
12312 (license license:lgpl2.1+))))
12313
10645b92
RW
12314(define-public java-forester-1.005
12315 (package
12316 (name "java-forester")
12317 (version "1.005")
12318 (source (origin
12319 (method url-fetch)
12320 (uri (string-append "http://search.maven.org/remotecontent?"
12321 "filepath=org/biojava/thirdparty/forester/"
12322 version "/forester-" version "-sources.jar"))
12323 (file-name (string-append name "-" version ".jar"))
12324 (sha256
12325 (base32
12326 "04r8qv4rk3p71z4ajrvp11py1z46qrx0047j3zzs79s6lnsm3lcv"))))
12327 (build-system ant-build-system)
12328 (arguments
12329 `(#:tests? #f ; there are none
12330 #:jdk ,icedtea-8
12331 #:modules ((guix build ant-build-system)
12332 (guix build utils)
12333 (guix build java-utils)
12334 (sxml simple)
12335 (sxml transform))
12336 #:phases
12337 (modify-phases %standard-phases
12338 (add-after 'unpack 'fix-dependencies
12339 (lambda* (#:key inputs #:allow-other-keys)
12340 (call-with-output-file "build.xml"
12341 (lambda (port)
12342 (sxml->xml
12343 (pre-post-order
12344 (with-input-from-file "src/build.xml"
12345 (lambda _ (xml->sxml #:trim-whitespace? #t)))
12346 `(;; Remove all unjar tags to avoid repacking classes.
12347 (unjar . ,(lambda _ '()))
12348 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
12349 (*text* . ,(lambda (_ txt) txt))))
12350 port)))
12351 (copy-file (assoc-ref inputs "synth_look_and_feel_1.xml")
12352 "synth_look_and_feel_1.xml")
12353 (copy-file (assoc-ref inputs "phyloxml.xsd")
12354 "phyloxml.xsd")
12355 (substitute* "build.xml"
12356 (("../resources/synth_laf/synth_look_and_feel_1.xml")
12357 "synth_look_and_feel_1.xml")
12358 (("../resources/phyloxml_schema/1.10/phyloxml.xsd")
12359 "phyloxml.xsd"))
12360 #t))
12361 ;; FIXME: itext is difficult to package as it depends on a few
12362 ;; unpackaged libraries.
12363 (add-after 'unpack 'remove-dependency-on-unpackaged-itext
12364 (lambda _
12365 (delete-file "src/org/forester/archaeopteryx/PdfExporter.java")
12366 (substitute* '("src/org/forester/archaeopteryx/MainFrame.java"
12367 "src/org/forester/archaeopteryx/MainFrameApplication.java")
12368 (("pdf_written_to = PdfExporter.*")
12369 "throw new IOException(\"PDF export is not available.\"); /*")
12370 ((".getPrintSizeX\\(\\), getOptions\\(\\).getPrintSizeY\\(\\) \\);") "*/")
12371 (("getCurrentTreePanel\\(\\).getHeight\\(\\) \\);") "*/"))
12372 #t))
12373 (add-after 'unpack 'delete-pre-built-classes
12374 (lambda _ (delete-file-recursively "src/classes") #t))
12375 ;; There is no install target
12376 (replace 'install (install-jars ".")))))
12377 (propagated-inputs
12378 `(("java-commons-codec" ,java-commons-codec)
12379 ("java-openchart2" ,java-openchart2)))
12380 ;; The source archive does not contain the resources.
12381 (native-inputs
12382 `(("phyloxml.xsd"
12383 ,(origin
12384 (method url-fetch)
12385 (uri (string-append "https://raw.githubusercontent.com/cmzmasek/forester/"
12386 "b61cc2dcede0bede317db362472333115756b8c6/"
12387 "forester/resources/phyloxml_schema/1.10/phyloxml.xsd"))
12388 (file-name (string-append name "-phyloxml-" version ".xsd"))
12389 (sha256
12390 (base32
12391 "1zxc4m8sn4n389nqdnpxa8d0k17qnr3pm2y5y6g6vh4k0zm52npv"))))
12392 ("synth_look_and_feel_1.xml"
12393 ,(origin
12394 (method url-fetch)
12395 (uri (string-append "https://raw.githubusercontent.com/cmzmasek/forester/"
12396 "29e04321615da6b35c1e15c60e52caf3f21d8e6a/"
12397 "forester/java/classes/resources/synth_look_and_feel_1.xml"))
12398 (file-name (string-append name "-synth-look-and-feel-" version ".xml"))
12399 (sha256
12400 (base32
12401 "1gv5602gv4k7y7713y75a4jvj7i9s7nildsbdl7n9q10sc2ikg8h"))))))
12402 (home-page "https://sites.google.com/site/cmzmasek/home/software/forester")
12403 (synopsis "Phylogenomics libraries for Java")
12404 (description "Forester is a collection of Java libraries for
12405phylogenomics and evolutionary biology research. It includes support for
12406reading, writing, and exporting phylogenetic trees.")
12407 (license license:lgpl2.1+)))
12408
5fef15e5
RW
12409(define-public java-biojava-core
12410 (package
12411 (name "java-biojava-core")
12412 (version "4.2.11")
12413 (source (origin
12414 (method git-fetch)
12415 (uri (git-reference
12416 (url "https://github.com/biojava/biojava")
12417 (commit (string-append "biojava-" version))))
12418 (file-name (string-append name "-" version "-checkout"))
12419 (sha256
12420 (base32
12421 "1bvryh2bpsvash8ln79cmc9sqm8qw72hz4xzwqxcrjm8ssxszhqk"))))
12422 (build-system ant-build-system)
12423 (arguments
12424 `(#:jdk ,icedtea-8
12425 #:jar-name "biojava-core.jar"
12426 #:source-dir "biojava-core/src/main/java/"
12427 #:test-dir "biojava-core/src/test"
12428 ;; These tests seem to require internet access.
12429 #:test-exclude (list "**/SearchIOTest.java"
12430 "**/BlastXMLParserTest.java"
12431 "**/GenbankCookbookTest.java"
12432 "**/GenbankProxySequenceReaderTest.java")
12433 #:phases
12434 (modify-phases %standard-phases
12435 (add-before 'build 'copy-resources
12436 (lambda _
12437 (copy-recursively "biojava-core/src/main/resources"
12438 "build/classes")
12439 #t))
12440 (add-before 'check 'copy-test-resources
12441 (lambda _
12442 (copy-recursively "biojava-core/src/test/resources"
12443 "build/test-classes")
12444 #t)))))
12445 (propagated-inputs
12446 `(("java-log4j-api" ,java-log4j-api)
12447 ("java-log4j-core" ,java-log4j-core)
12448 ("java-slf4j-api" ,java-slf4j-api)
12449 ("java-slf4j-simple" ,java-slf4j-simple)))
12450 (native-inputs
12451 `(("java-junit" ,java-junit)
12452 ("java-hamcrest-core" ,java-hamcrest-core)))
12453 (home-page "http://biojava.org")
12454 (synopsis "Core libraries of Java framework for processing biological data")
12455 (description "BioJava is a project dedicated to providing a Java framework
12456for processing biological data. It provides analytical and statistical
12457routines, parsers for common file formats, reference implementations of
12458popular algorithms, and allows the manipulation of sequences and 3D
12459structures. The goal of the biojava project is to facilitate rapid
12460application development for bioinformatics.
12461
12462This package provides the core libraries.")
12463 (license license:lgpl2.1+)))
b28c8037
RW
12464
12465(define-public java-biojava-phylo
12466 (package (inherit java-biojava-core)
12467 (name "java-biojava-phylo")
12468 (build-system ant-build-system)
12469 (arguments
12470 `(#:jdk ,icedtea-8
12471 #:jar-name "biojava-phylo.jar"
12472 #:source-dir "biojava-phylo/src/main/java/"
12473 #:test-dir "biojava-phylo/src/test"
12474 #:phases
12475 (modify-phases %standard-phases
12476 (add-before 'build 'copy-resources
12477 (lambda _
12478 (copy-recursively "biojava-phylo/src/main/resources"
12479 "build/classes")
12480 #t))
12481 (add-before 'check 'copy-test-resources
12482 (lambda _
12483 (copy-recursively "biojava-phylo/src/test/resources"
12484 "build/test-classes")
12485 #t)))))
12486 (propagated-inputs
12487 `(("java-log4j-api" ,java-log4j-api)
12488 ("java-log4j-core" ,java-log4j-core)
12489 ("java-slf4j-api" ,java-slf4j-api)
12490 ("java-slf4j-simple" ,java-slf4j-simple)
12491 ("java-biojava-core" ,java-biojava-core)
12492 ("java-forester" ,java-forester)))
12493 (native-inputs
12494 `(("java-junit" ,java-junit)
12495 ("java-hamcrest-core" ,java-hamcrest-core)))
12496 (home-page "http://biojava.org")
12497 (synopsis "Biojava interface to the forester phylogenomics library")
12498 (description "The phylo module provides a biojava interface layer to the
12499forester phylogenomics library for constructing phylogenetic trees.")))
22c09c33
RW
12500
12501(define-public java-biojava-alignment
12502 (package (inherit java-biojava-core)
12503 (name "java-biojava-alignment")
12504 (build-system ant-build-system)
12505 (arguments
12506 `(#:jdk ,icedtea-8
12507 #:jar-name "biojava-alignment.jar"
12508 #:source-dir "biojava-alignment/src/main/java/"
12509 #:test-dir "biojava-alignment/src/test"
12510 #:phases
12511 (modify-phases %standard-phases
12512 (add-before 'build 'copy-resources
12513 (lambda _
12514 (copy-recursively "biojava-alignment/src/main/resources"
12515 "build/classes")
12516 #t))
12517 (add-before 'check 'copy-test-resources
12518 (lambda _
12519 (copy-recursively "biojava-alignment/src/test/resources"
12520 "build/test-classes")
12521 #t)))))
12522 (propagated-inputs
12523 `(("java-log4j-api" ,java-log4j-api)
12524 ("java-log4j-core" ,java-log4j-core)
12525 ("java-slf4j-api" ,java-slf4j-api)
12526 ("java-slf4j-simple" ,java-slf4j-simple)
12527 ("java-biojava-core" ,java-biojava-core)
12528 ("java-biojava-phylo" ,java-biojava-phylo)
12529 ("java-forester" ,java-forester)))
12530 (native-inputs
12531 `(("java-junit" ,java-junit)
12532 ("java-hamcrest-core" ,java-hamcrest-core)))
12533 (home-page "http://biojava.org")
12534 (synopsis "Biojava API for genetic sequence alignment")
12535 (description "The alignment module of BioJava provides an API that
12536contains
12537
12538@itemize
12539@item implementations of dynamic programming algorithms for sequence
12540 alignment;
12541@item reading and writing of popular alignment file formats;
12542@item a single-, or multi- threaded multiple sequence alignment algorithm.
12543@end itemize\n")))
e44da58c
RW
12544
12545(define-public java-biojava-core-4.0
12546 (package (inherit java-biojava-core)
12547 (name "java-biojava-core")
12548 (version "4.0.0")
12549 (source (origin
12550 (method git-fetch)
12551 (uri (git-reference
12552 (url "https://github.com/biojava/biojava")
12553 (commit (string-append "biojava-" version))))
12554 (file-name (string-append name "-" version "-checkout"))
12555 (sha256
12556 (base32
12557 "13675f6y9aqi7bi2lk3s1z7a22ynccjiqwa8izh7p97xi9wsfmd8"))))))
5528f9e8
RW
12558
12559(define-public java-biojava-phylo-4.0
12560 (package (inherit java-biojava-core-4.0)
12561 (name "java-biojava-phylo")
12562 (build-system ant-build-system)
12563 (arguments
12564 `(#:jdk ,icedtea-8
12565 #:jar-name "biojava-phylo.jar"
12566 #:source-dir "biojava-phylo/src/main/java/"
12567 #:test-dir "biojava-phylo/src/test"
12568 #:phases
12569 (modify-phases %standard-phases
12570 (add-before 'build 'copy-resources
12571 (lambda _
12572 (copy-recursively "biojava-phylo/src/main/resources"
12573 "build/classes")
12574 #t))
12575 (add-before 'check 'copy-test-resources
12576 (lambda _
12577 (copy-recursively "biojava-phylo/src/test/resources"
12578 "build/test-classes")
12579 #t)))))
12580 (propagated-inputs
12581 `(("java-log4j-api" ,java-log4j-api)
12582 ("java-log4j-core" ,java-log4j-core)
12583 ("java-slf4j-api" ,java-slf4j-api)
12584 ("java-slf4j-simple" ,java-slf4j-simple)
12585 ("java-biojava-core" ,java-biojava-core-4.0)
12586 ("java-forester" ,java-forester-1.005)))
12587 (native-inputs
12588 `(("java-junit" ,java-junit)
12589 ("java-hamcrest-core" ,java-hamcrest-core)))
12590 (home-page "http://biojava.org")
12591 (synopsis "Biojava interface to the forester phylogenomics library")
12592 (description "The phylo module provides a biojava interface layer to the
12593forester phylogenomics library for constructing phylogenetic trees.")))
9ac7db18
RW
12594
12595(define-public java-biojava-alignment-4.0
12596 (package (inherit java-biojava-core-4.0)
12597 (name "java-biojava-alignment")
12598 (build-system ant-build-system)
12599 (arguments
12600 `(#:jdk ,icedtea-8
12601 #:jar-name "biojava-alignment.jar"
12602 #:source-dir "biojava-alignment/src/main/java/"
12603 #:test-dir "biojava-alignment/src/test"
12604 #:phases
12605 (modify-phases %standard-phases
12606 (add-before 'build 'copy-resources
12607 (lambda _
12608 (copy-recursively "biojava-alignment/src/main/resources"
12609 "build/classes")
12610 #t))
12611 (add-before 'check 'copy-test-resources
12612 (lambda _
12613 (copy-recursively "biojava-alignment/src/test/resources"
12614 "build/test-classes")
12615 #t)))))
12616 (propagated-inputs
12617 `(("java-log4j-api" ,java-log4j-api)
12618 ("java-log4j-core" ,java-log4j-core)
12619 ("java-slf4j-api" ,java-slf4j-api)
12620 ("java-slf4j-simple" ,java-slf4j-simple)
12621 ("java-biojava-core" ,java-biojava-core-4.0)
12622 ("java-biojava-phylo" ,java-biojava-phylo-4.0)
12623 ("java-forester" ,java-forester-1.005)))
12624 (native-inputs
12625 `(("java-junit" ,java-junit)
12626 ("java-hamcrest-core" ,java-hamcrest-core)))
12627 (home-page "http://biojava.org")
12628 (synopsis "Biojava API for genetic sequence alignment")
12629 (description "The alignment module of BioJava provides an API that
12630contains
12631
12632@itemize
12633@item implementations of dynamic programming algorithms for sequence
12634 alignment;
12635@item reading and writing of popular alignment file formats;
12636@item a single-, or multi- threaded multiple sequence alignment algorithm.
12637@end itemize\n")))
1adeef31
RW
12638
12639(define-public dropseq-tools
12640 (package
12641 (name "dropseq-tools")
12642 (version "1.13")
12643 (source
12644 (origin
12645 (method url-fetch)
12646 (uri "http://mccarrolllab.com/download/1276/")
12647 (file-name (string-append "dropseq-tools-" version ".zip"))
12648 (sha256
12649 (base32
12650 "0yrffckxqk5l8b5xb6z4laq157zd9mdypr2p4b4vq2bhjzi1sj0s"))
12651 ;; Delete bundled libraries
12652 (modules '((guix build utils)))
12653 (snippet
12654 '(begin
12655 (for-each delete-file (find-files "jar/lib" "\\.jar$"))
6cbee49d
MW
12656 (delete-file-recursively "3rdParty")
12657 #t))))
1adeef31
RW
12658 (build-system ant-build-system)
12659 (arguments
12660 `(#:tests? #f ; test data are not included
12661 #:test-target "test"
12662 #:build-target "all"
12663 #:source-dir "public/src/"
12664 #:jdk ,icedtea-8
12665 #:make-flags
12666 (list (string-append "-Dpicard.executable.dir="
12667 (assoc-ref %build-inputs "java-picard")
12668 "/share/java/"))
12669 #:modules ((ice-9 match)
12670 (srfi srfi-1)
12671 (guix build utils)
12672 (guix build java-utils)
12673 (guix build ant-build-system))
12674 #:phases
12675 (modify-phases %standard-phases
e53152c7 12676 ;; FIXME: fails with "java.io.FileNotFoundException:
6fe41974 12677 ;; /gnu/store/…-dropseq-tools-1.13/share/java/lib/biojava-alignment.jar"
e53152c7 12678 (delete 'generate-jar-indices)
1adeef31
RW
12679 ;; All dependencies must be linked to "lib", because that's where
12680 ;; they will be searched for when the Class-Path property of the
12681 ;; manifest is computed.
12682 (add-after 'unpack 'record-references
12683 (lambda* (#:key inputs #:allow-other-keys)
12684 (mkdir-p "jar/lib")
12685 (let ((dirs (filter-map (match-lambda
12686 ((name . dir)
12687 (if (and (string-prefix? "java-" name)
12688 (not (string=? name "java-testng")))
12689 dir #f)))
12690 inputs)))
12691 (for-each (lambda (jar)
12692 (symlink jar (string-append "jar/lib/" (basename jar))))
12693 (append-map (lambda (dir) (find-files dir "\\.jar$"))
12694 dirs)))
12695 #t))
12696 ;; There is no installation target
12697 (replace 'install
12698 (lambda* (#:key inputs outputs #:allow-other-keys)
12699 (let* ((out (assoc-ref outputs "out"))
12700 (bin (string-append out "/bin"))
12701 (share (string-append out "/share/java/"))
12702 (lib (string-append share "/lib/"))
12703 (scripts (list "BAMTagHistogram"
12704 "BAMTagofTagCounts"
12705 "BaseDistributionAtReadPosition"
12706 "CollapseBarcodesInPlace"
12707 "CollapseTagWithContext"
12708 "ConvertToRefFlat"
12709 "CreateIntervalsFiles"
12710 "DetectBeadSynthesisErrors"
12711 "DigitalExpression"
12712 "Drop-seq_alignment.sh"
12713 "FilterBAM"
12714 "FilterBAMByTag"
12715 "GatherGeneGCLength"
12716 "GatherMolecularBarcodeDistributionByGene"
12717 "GatherReadQualityMetrics"
12718 "PolyATrimmer"
12719 "ReduceGTF"
12720 "SelectCellsByNumTranscripts"
12721 "SingleCellRnaSeqMetricsCollector"
12722 "TagBamWithReadSequenceExtended"
12723 "TagReadWithGeneExon"
12724 "TagReadWithInterval"
12725 "TrimStartingSequence"
12726 "ValidateReference")))
12727 (for-each mkdir-p (list bin share lib))
12728 (install-file "dist/dropseq.jar" share)
12729 (for-each (lambda (script)
12730 (chmod script #o555)
12731 (install-file script bin))
12732 scripts)
12733 (substitute* (map (lambda (script)
12734 (string-append bin "/" script))
12735 scripts)
12736 (("^java") (which "java"))
12737 (("jar_deploy_dir=.*")
12738 (string-append "jar_deploy_dir=" share "\n"))))
12739 #t))
12740 ;; FIXME: We do this after stripping jars because we don't want it to
12741 ;; copy all these jars and strip them. We only want to install
12742 ;; links. Arguably, this is a problem with the ant-build-system.
12743 (add-after 'strip-jar-timestamps 'install-links
12744 (lambda* (#:key outputs #:allow-other-keys)
12745 (let* ((out (assoc-ref outputs "out"))
12746 (share (string-append out "/share/java/"))
12747 (lib (string-append share "/lib/")))
12748 (for-each (lambda (jar)
12749 (symlink (readlink jar)
12750 (string-append lib (basename jar))))
12751 (find-files "jar/lib" "\\.jar$")))
12752 #t)))))
12753 (inputs
12754 `(("jdk" ,icedtea-8)
12755 ("java-picard" ,java-picard-2.10.3)
12756 ("java-log4j-1.2-api" ,java-log4j-1.2-api)
12757 ("java-commons-math3" ,java-commons-math3)
12758 ("java-commons-jexl2" ,java-commons-jexl-2)
12759 ("java-commons-collections4" ,java-commons-collections4)
12760 ("java-commons-lang2" ,java-commons-lang)
12761 ("java-commons-io" ,java-commons-io)
12762 ("java-snappy-1.0.3-rc3" ,java-snappy-1)
12763 ("java-guava" ,java-guava)
12764 ("java-la4j" ,java-la4j)
12765 ("java-biojava-core" ,java-biojava-core-4.0)
12766 ("java-biojava-alignment" ,java-biojava-alignment-4.0)
12767 ("java-jdistlib" ,java-jdistlib)
12768 ("java-simple-xml" ,java-simple-xml)
12769 ("java-snakeyaml" ,java-snakeyaml)))
12770 (native-inputs
12771 `(("unzip" ,unzip)
12772 ("java-testng" ,java-testng)))
12773 (home-page "http://mccarrolllab.com/dropseq/")
12774 (synopsis "Tools for Drop-seq analyses")
12775 (description "Drop-seq is a technology to enable biologists to
12776analyze RNA expression genome-wide in thousands of individual cells at
12777once. This package provides tools to perform Drop-seq analyses.")
12778 (license license:expat)))
0bdd5b8a
RW
12779
12780(define-public pigx-rnaseq
12781 (package
12782 (name "pigx-rnaseq")
c7d5cd6c 12783 (version "0.0.4")
0bdd5b8a
RW
12784 (source (origin
12785 (method url-fetch)
12786 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_rnaseq/"
12787 "releases/download/v" version
12788 "/pigx_rnaseq-" version ".tar.gz"))
12789 (sha256
12790 (base32
c7d5cd6c 12791 "16gla23rmziimqan7w494q0nr7vfbp42zzkrl9fracmr4k7b1kzr"))))
0bdd5b8a
RW
12792 (build-system gnu-build-system)
12793 (arguments
12794 `(#:parallel-tests? #f ; not supported
12795 #:phases
12796 (modify-phases %standard-phases
fe52a215
RW
12797 ;; "test.sh" runs STAR, which requires excessive amounts of memory.
12798 (add-after 'unpack 'disable-resource-intensive-test
12799 (lambda _
12800 (substitute* "Makefile.in"
12801 (("(^ tests/test_trim_galore/test.sh).*" _ m) m)
26fd7b24 12802 (("^ tests/test_multiqc/test.sh") "")
fe52a215 12803 (("^ test.sh") ""))
0bdd5b8a
RW
12804 #t)))))
12805 (inputs
616446ff 12806 `(("gzip" ,gzip)
ed2dfe39 12807 ("snakemake" ,snakemake-4)
0bdd5b8a
RW
12808 ("fastqc" ,fastqc)
12809 ("multiqc" ,multiqc)
12810 ("star" ,star)
12811 ("trim-galore" ,trim-galore)
12812 ("htseq" ,htseq)
12813 ("samtools" ,samtools)
12814 ("bedtools" ,bedtools)
12815 ("r-minimal" ,r-minimal)
12816 ("r-rmarkdown" ,r-rmarkdown)
12817 ("r-ggplot2" ,r-ggplot2)
12818 ("r-ggrepel" ,r-ggrepel)
12819 ("r-gprofiler" ,r-gprofiler)
12820 ("r-deseq2" ,r-deseq2)
12821 ("r-dt" ,r-dt)
12822 ("r-knitr" ,r-knitr)
12823 ("r-pheatmap" ,r-pheatmap)
12824 ("r-corrplot" ,r-corrplot)
12825 ("r-reshape2" ,r-reshape2)
12826 ("r-plotly" ,r-plotly)
12827 ("r-scales" ,r-scales)
12828 ("r-summarizedexperiment" ,r-summarizedexperiment)
12829 ("r-crosstalk" ,r-crosstalk)
12830 ("r-tximport" ,r-tximport)
12831 ("r-rtracklayer" ,r-rtracklayer)
12832 ("r-rjson" ,r-rjson)
12833 ("salmon" ,salmon)
ca248f1d
RW
12834 ("ghc-pandoc" ,ghc-pandoc)
12835 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)
0bdd5b8a
RW
12836 ("python-wrapper" ,python-wrapper)
12837 ("python-pyyaml" ,python-pyyaml)))
12838 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
12839 (synopsis "Analysis pipeline for RNA sequencing experiments")
12840 (description "PiGX RNAseq is an analysis pipeline for preprocessing and
12841reporting for RNA sequencing experiments. It is easy to use and produces high
12842quality reports. The inputs are reads files from the sequencing experiment,
12843and a configuration file which describes the experiment. In addition to
12844quality control of the experiment, the pipeline produces a differential
12845expression report comparing samples in an easily configurable manner.")
12846 (license license:gpl3+)))
531afc8a
RW
12847
12848(define-public pigx-chipseq
12849 (package
12850 (name "pigx-chipseq")
29cbf817 12851 (version "0.0.20")
531afc8a
RW
12852 (source (origin
12853 (method url-fetch)
12854 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_chipseq/"
12855 "releases/download/v" version
12856 "/pigx_chipseq-" version ".tar.gz"))
12857 (sha256
12858 (base32
29cbf817 12859 "19a7dclqq0b4kqg3phiz4d4arlwfp34nm3z0rf1gkqdpsy7gghp3"))))
531afc8a 12860 (build-system gnu-build-system)
c57e50b2
RW
12861 ;; parts of the tests rely on access to the network
12862 (arguments '(#:tests? #f))
531afc8a 12863 (inputs
d7a11f1a
RW
12864 `(("grep" ,grep)
12865 ("coreutils" ,coreutils)
12866 ("r-minimal" ,r-minimal)
531afc8a 12867 ("r-argparser" ,r-argparser)
363ee7a3
RW
12868 ("r-biocparallel" ,r-biocparallel)
12869 ("r-biostrings" ,r-biostrings)
531afc8a
RW
12870 ("r-chipseq" ,r-chipseq)
12871 ("r-data-table" ,r-data-table)
363ee7a3 12872 ("r-dplyr" ,r-dplyr)
531afc8a 12873 ("r-genomation" ,r-genomation)
363ee7a3 12874 ("r-genomicalignments" ,r-genomicalignments)
531afc8a 12875 ("r-genomicranges" ,r-genomicranges)
363ee7a3 12876 ("r-rsamtools" ,r-rsamtools)
531afc8a 12877 ("r-rtracklayer" ,r-rtracklayer)
363ee7a3 12878 ("r-s4vectors" ,r-s4vectors)
531afc8a 12879 ("r-stringr" ,r-stringr)
363ee7a3
RW
12880 ("r-tibble" ,r-tibble)
12881 ("r-tidyr" ,r-tidyr)
531afc8a
RW
12882 ("r-jsonlite" ,r-jsonlite)
12883 ("r-heatmaply" ,r-heatmaply)
363ee7a3 12884 ("r-htmlwidgets" ,r-htmlwidgets)
531afc8a
RW
12885 ("r-ggplot2" ,r-ggplot2)
12886 ("r-plotly" ,r-plotly)
363ee7a3 12887 ("r-rmarkdown" ,r-rmarkdown)
531afc8a
RW
12888 ("python-wrapper" ,python-wrapper)
12889 ("python-pyyaml" ,python-pyyaml)
363ee7a3 12890 ("python-magic" ,python-magic)
32b7ccf9 12891 ("python-xlrd" ,python-xlrd)
32b7ccf9 12892 ("trim-galore" ,trim-galore)
531afc8a
RW
12893 ("macs" ,macs)
12894 ("multiqc" ,multiqc)
12895 ("perl" ,perl)
7cd40c8c
RW
12896 ("ghc-pandoc" ,ghc-pandoc)
12897 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)
531afc8a
RW
12898 ("fastqc" ,fastqc)
12899 ("bowtie" ,bowtie)
12900 ("idr" ,idr)
ed2dfe39 12901 ("snakemake" ,snakemake-4)
531afc8a
RW
12902 ("samtools" ,samtools)
12903 ("bedtools" ,bedtools)
12904 ("kentutils" ,kentutils)))
12905 (native-inputs
12906 `(("python-pytest" ,python-pytest)))
12907 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
12908 (synopsis "Analysis pipeline for ChIP sequencing experiments")
12909 (description "PiGX ChIPseq is an analysis pipeline for preprocessing, peak
12910calling and reporting for ChIP sequencing experiments. It is easy to use and
12911produces high quality reports. The inputs are reads files from the sequencing
12912experiment, and a configuration file which describes the experiment. In
12913addition to quality control of the experiment, the pipeline enables to set up
12914multiple peak calling analysis and allows the generation of a UCSC track hub
12915in an easily configurable manner.")
12916 (license license:gpl3+)))
fb94174f
RW
12917
12918(define-public pigx-bsseq
12919 (package
12920 (name "pigx-bsseq")
13f5eb34 12921 (version "0.0.10")
fb94174f
RW
12922 (source (origin
12923 (method url-fetch)
12924 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_bsseq/"
12925 "releases/download/v" version
12926 "/pigx_bsseq-" version ".tar.gz"))
12927 (sha256
12928 (base32
13f5eb34 12929 "0l97wvkq4diq8lcarraj33bby1zzf0w804jwi8mlc5qddp8idwhy"))))
fb94174f
RW
12930 (build-system gnu-build-system)
12931 (arguments
12932 `(#:phases
12933 (modify-phases %standard-phases
12934 (add-before 'check 'set-timezone
12935 ;; The readr package is picky about timezones.
12936 (lambda* (#:key inputs #:allow-other-keys)
12937 (setenv "TZ" "UTC+1")
12938 (setenv "TZDIR"
12939 (string-append (assoc-ref inputs "tzdata")
12940 "/share/zoneinfo"))
fb94174f
RW
12941 #t)))))
12942 (native-inputs
12943 `(("tzdata" ,tzdata)))
12944 (inputs
9dbdc830
RW
12945 `(("coreutils" ,coreutils)
12946 ("sed" ,sed)
12947 ("grep" ,grep)
12948 ("r-minimal" ,r-minimal)
fb94174f
RW
12949 ("r-annotationhub" ,r-annotationhub)
12950 ("r-dt" ,r-dt)
12951 ("r-genomation" ,r-genomation)
12952 ("r-methylkit" ,r-methylkit)
12953 ("r-rtracklayer" ,r-rtracklayer)
12954 ("r-rmarkdown" ,r-rmarkdown)
12955 ("r-bookdown" ,r-bookdown)
12956 ("r-ggplot2" ,r-ggplot2)
12957 ("r-ggbio" ,r-ggbio)
c2b4f81e
RW
12958 ("ghc-pandoc" ,ghc-pandoc)
12959 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)
fb94174f
RW
12960 ("python-wrapper" ,python-wrapper)
12961 ("python-pyyaml" ,python-pyyaml)
ed2dfe39 12962 ("snakemake" ,snakemake-4)
fb94174f
RW
12963 ("bismark" ,bismark)
12964 ("fastqc" ,fastqc)
12965 ("bowtie" ,bowtie)
12966 ("trim-galore" ,trim-galore)
12967 ("cutadapt" ,cutadapt)
12968 ("samtools" ,samtools)))
12969 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
12970 (synopsis "Bisulfite sequencing pipeline from fastq to methylation reports")
12971 (description "PiGx BSseq is a data processing pipeline for raw fastq read
12972data of bisulfite experiments; it produces reports on aggregate methylation
12973and coverage and can be used to produce information on differential
12974methylation and segmentation.")
12975 (license license:gpl3+)))
32cbbac1 12976
46264c73
RW
12977(define-public pigx-scrnaseq
12978 (package
12979 (name "pigx-scrnaseq")
78933bff 12980 (version "0.0.7")
46264c73
RW
12981 (source (origin
12982 (method url-fetch)
12983 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_scrnaseq/"
12984 "releases/download/v" version
12985 "/pigx_scrnaseq-" version ".tar.gz"))
12986 (sha256
12987 (base32
78933bff 12988 "131zarirv16w8653m0d66jgjnwqfsxqc0hix0rypssz4d83bl51j"))))
46264c73
RW
12989 (build-system gnu-build-system)
12990 (arguments
12991 `(#:configure-flags
12992 (list (string-append "PICARDJAR=" (assoc-ref %build-inputs "java-picard")
12993 "/share/java/picard.jar")
12994 (string-append "DROPSEQJAR=" (assoc-ref %build-inputs "dropseq-tools")
08d1a8b1 12995 "/share/java/dropseq.jar"))))
46264c73 12996 (inputs
51237011
RW
12997 `(("coreutils" ,coreutils)
12998 ("perl" ,perl)
12999 ("dropseq-tools" ,dropseq-tools)
46264c73 13000 ("fastqc" ,fastqc)
9cc66d75 13001 ("java-picard" ,java-picard-2.10.3) ; same as for dropseq
46264c73
RW
13002 ("java" ,icedtea-8)
13003 ("python-wrapper" ,python-wrapper)
13004 ("python-pyyaml" ,python-pyyaml)
13005 ("python-pandas" ,python-pandas)
844cc1c8 13006 ("python-magic" ,python-magic)
46264c73
RW
13007 ("python-numpy" ,python-numpy)
13008 ("python-loompy" ,python-loompy)
00a43cb8
RW
13009 ("ghc-pandoc" ,ghc-pandoc)
13010 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)
78933bff 13011 ("samtools" ,samtools)
ed2dfe39 13012 ("snakemake" ,snakemake-4)
46264c73
RW
13013 ("star" ,star)
13014 ("r-minimal" ,r-minimal)
13015 ("r-argparser" ,r-argparser)
13016 ("r-cowplot" ,r-cowplot)
13017 ("r-data-table" ,r-data-table)
13018 ("r-delayedarray" ,r-delayedarray)
13019 ("r-delayedmatrixstats" ,r-delayedmatrixstats)
13020 ("r-dplyr" ,r-dplyr)
13021 ("r-dropbead" ,r-dropbead)
13022 ("r-dt" ,r-dt)
13023 ("r-genomicalignments" ,r-genomicalignments)
13024 ("r-genomicfiles" ,r-genomicfiles)
13025 ("r-genomicranges" ,r-genomicranges)
13026 ("r-ggplot2" ,r-ggplot2)
13027 ("r-hdf5array" ,r-hdf5array)
13028 ("r-pheatmap" ,r-pheatmap)
13029 ("r-rmarkdown" ,r-rmarkdown)
13030 ("r-rsamtools" ,r-rsamtools)
13031 ("r-rtracklayer" ,r-rtracklayer)
13032 ("r-rtsne" ,r-rtsne)
13033 ("r-scater" ,r-scater)
13034 ("r-scran" ,r-scran)
13035 ("r-singlecellexperiment" ,r-singlecellexperiment)
13036 ("r-stringr" ,r-stringr)
13037 ("r-yaml" ,r-yaml)))
13038 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
13039 (synopsis "Analysis pipeline for single-cell RNA sequencing experiments")
13040 (description "PiGX scRNAseq is an analysis pipeline for preprocessing and
13041quality control for single cell RNA sequencing experiments. The inputs are
13042read files from the sequencing experiment, and a configuration file which
13043describes the experiment. It produces processed files for downstream analysis
13044and interactive quality reports. The pipeline is designed to work with UMI
13045based methods.")
13046 (license license:gpl3+)))
13047
c1c9cc66
RW
13048(define-public pigx
13049 (package
13050 (name "pigx")
5149aeb7 13051 (version "0.0.3")
c1c9cc66
RW
13052 (source (origin
13053 (method url-fetch)
13054 (uri (string-append "https://github.com/BIMSBbioinfo/pigx/"
13055 "releases/download/v" version
13056 "/pigx-" version ".tar.gz"))
13057 (sha256
13058 (base32
5149aeb7 13059 "1i5njdy1clj5ncw45d16p7mwmqvb1ilikl9n797pxklc3f4s7mq7"))))
c1c9cc66
RW
13060 (build-system gnu-build-system)
13061 (inputs
13062 `(("python" ,python)
13063 ("pigx-bsseq" ,pigx-bsseq)
13064 ("pigx-chipseq" ,pigx-chipseq)
13065 ("pigx-rnaseq" ,pigx-rnaseq)
13066 ("pigx-scrnaseq" ,pigx-scrnaseq)))
13067 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
13068 (synopsis "Analysis pipelines for genomics")
13069 (description "PiGx is a collection of genomics pipelines. It includes the
13070following pipelines:
13071
13072@itemize
13073@item PiGx BSseq for raw fastq read data of bisulfite experiments
13074@item PiGx RNAseq for RNAseq samples
13075@item PiGx scRNAseq for single cell dropseq analysis
13076@item PiGx ChIPseq for reads from ChIPseq experiments
13077@end itemize
13078
13079All pipelines are easily configured with a simple sample sheet and a
13080descriptive settings file. The result is a set of comprehensive, interactive
13081HTML reports with interesting findings about your samples.")
13082 (license license:gpl3+)))
13083
32cbbac1
LF
13084(define-public r-diversitree
13085 (package
13086 (name "r-diversitree")
13087 (version "0.9-10")
13088 (source
13089 (origin
13090 (method url-fetch)
13091 (uri (cran-uri "diversitree" version))
13092 (sha256
13093 (base32
13094 "0gh4rcrp0an3jh8915i1fsxlgyfk7njywgbd5ln5r2jhr085kpz7"))))
13095 (build-system r-build-system)
13096 (native-inputs
13097 `(("gfortran" ,gfortran)))
13098 (inputs `(("fftw" ,fftw) ("gsl" ,gsl)))
13099 (propagated-inputs
13100 `(("r-ape" ,r-ape)
13101 ("r-desolve" ,r-desolve)
13102 ("r-rcpp" ,r-rcpp)
13103 ("r-suplex" ,r-subplex)))
13104 (home-page "https://www.zoology.ubc.ca/prog/diversitree")
13105 (synopsis "Comparative 'phylogenetic' analyses of diversification")
13106 (description "This package contains a number of comparative \"phylogenetic\"
13107methods, mostly focusing on analysing diversification and character evolution.
13108Contains implementations of \"BiSSE\" (Binary State Speciation and Extinction)
13109and its unresolved tree extensions, \"MuSSE\" (Multiple State Speciation and
13110Extinction), \"QuaSSE\", \"GeoSSE\", and \"BiSSE-ness\" Other included methods
13111include Markov models of discrete and continuous trait evolution and constant
13112rate speciation and extinction.")
13113 (license license:gpl2+)))
90930159
RW
13114
13115(define-public sjcount
13116 ;; There is no tag for version 3.2, nor is there a release archive.
13117 (let ((commit "292d3917cadb3f6834c81e509c30e61cd7ead6e5")
13118 (revision "1"))
13119 (package
13120 (name "sjcount")
13121 (version (git-version "3.2" revision commit))
13122 (source (origin
13123 (method git-fetch)
13124 (uri (git-reference
13125 (url "https://github.com/pervouchine/sjcount-full.git")
13126 (commit commit)))
13127 (file-name (string-append name "-" version "-checkout"))
13128 (sha256
13129 (base32
13130 "0gdgj35j249f04rqgq8ymcc1xg1vi9kzbajnjqpaq2wpbh8bl234"))))
13131 (build-system gnu-build-system)
13132 (arguments
13133 `(#:tests? #f ; requires a 1.4G test file
13134 #:make-flags
13135 (list (string-append "SAMTOOLS_DIR="
13136 (assoc-ref %build-inputs "samtools")
13137 "/lib/"))
13138 #:phases
13139 (modify-phases %standard-phases
13140 (replace 'configure
13141 (lambda* (#:key inputs #:allow-other-keys)
13142 (substitute* "makefile"
13143 (("-I \\$\\{SAMTOOLS_DIR\\}")
13144 (string-append "-I" (assoc-ref inputs "samtools")
13145 "/include/samtools"))
13146 (("-lz ") "-lz -lpthread "))
13147 #t))
13148 (replace 'install
13149 (lambda* (#:key outputs #:allow-other-keys)
13150 (for-each (lambda (tool)
13151 (install-file tool
13152 (string-append (assoc-ref outputs "out")
13153 "/bin")))
13154 '("j_count" "b_count" "sjcount"))
13155 #t)))))
13156 (inputs
13157 `(("samtools" ,samtools-0.1)
13158 ("zlib" ,zlib)))
13159 (home-page "https://github.com/pervouchine/sjcount-full/")
13160 (synopsis "Annotation-agnostic splice junction counting pipeline")
13161 (description "Sjcount is a utility for fast quantification of splice
13162junctions in RNA-seq data. It is annotation-agnostic and offset-aware. This
13163version does count multisplits.")
13164 (license license:gpl3+))))
53131eab
RW
13165
13166(define-public minimap2
13167 (package
13168 (name "minimap2")
13169 (version "2.10")
13170 (source
13171 (origin
13172 (method url-fetch)
13173 (uri (string-append "https://github.com/lh3/minimap2/"
13174 "releases/download/v" version "/"
13175 "minimap2-" version ".tar.bz2"))
13176 (sha256
13177 (base32
13178 "080w9066irkbhbyr4nmf19pzkdd2s4v31hpzlajgq2y0drr6zcsj"))))
13179 (build-system gnu-build-system)
13180 (arguments
13181 `(#:tests? #f ; there are none
13182 #:make-flags
13183 (list "CC=gcc"
13184 (let ((system ,(or (%current-target-system)
13185 (%current-system))))
13186 (cond
13187 ((string-prefix? "x86_64" system)
13188 "all")
13189 ((or (string-prefix? "armhf" system)
13190 (string-prefix? "aarch64" system))
13191 "arm_neon=1")
13192 (_ "sse2only=1"))))
13193 #:phases
13194 (modify-phases %standard-phases
13195 (delete 'configure)
13196 (replace 'install
13197 (lambda* (#:key outputs #:allow-other-keys)
13198 (let* ((out (assoc-ref outputs "out"))
13199 (bin (string-append out "/bin"))
13200 (man (string-append out "/share/man/man1")))
13201 (install-file "minimap2" bin)
13202 (mkdir-p man)
13203 (install-file "minimap2.1" man))
13204 #t)))))
13205 (inputs
13206 `(("zlib" ,zlib)))
13207 (home-page "https://lh3.github.io/minimap2/")
13208 (synopsis "Pairwise aligner for genomic and spliced nucleotide sequences")
13209 (description "Minimap2 is a versatile sequence alignment program that
13210aligns DNA or mRNA sequences against a large reference database. Typical use
13211cases include:
13212
13213@enumerate
13214@item mapping PacBio or Oxford Nanopore genomic reads to the human genome;
13215@item finding overlaps between long reads with error rate up to ~15%;
13216@item splice-aware alignment of PacBio Iso-Seq or Nanopore cDNA or Direct RNA
13217 reads against a reference genome;
13218@item aligning Illumina single- or paired-end reads;
13219@item assembly-to-assembly alignment;
13220@item full-genome alignment between two closely related species with
13221 divergence below ~15%.
13222@end enumerate\n")
13223 (license license:expat)))
fab43c6b 13224
242519ce
RW
13225(define-public r-circus
13226 (package
13227 (name "r-circus")
13228 (version "0.1.5")
13229 (source
13230 (origin
13231 (method git-fetch)
13232 (uri (git-reference
13233 (url "https://github.com/BIMSBbioinfo/ciRcus.git")
13234 (commit (string-append "v" version))))
13235 (file-name (git-file-name name version))
13236 (sha256
13237 (base32
13238 "0jhjn3ilb057hbf6yzrihj13ifxxs32y7nkby8l3lkm28dg4p97h"))))
13239 (build-system r-build-system)
13240 (propagated-inputs
13241 `(("r-annotationdbi" ,r-annotationdbi)
13242 ("r-annotationhub" ,r-annotationhub)
13243 ("r-biomart" ,r-biomart)
13244 ("r-data-table" ,r-data-table)
13245 ("r-dbi" ,r-dbi)
13246 ("r-genomicfeatures" ,r-genomicfeatures)
13247 ("r-genomicranges" ,r-genomicranges)
13248 ("r-ggplot2" ,r-ggplot2)
13249 ("r-hash" ,r-hash)
13250 ("r-iranges" ,r-iranges)
13251 ("r-rcolorbrewer" ,r-rcolorbrewer)
13252 ("r-rmysql" ,r-rmysql)
13253 ("r-s4vectors" ,r-s4vectors)
13254 ("r-stringr" ,r-stringr)
13255 ("r-summarizedexperiment" ,r-summarizedexperiment)))
13256 (native-inputs
13257 `(("r-knitr" ,r-knitr)))
13258 (home-page "https://github.com/BIMSBbioinfo/ciRcus")
13259 (synopsis "Annotation, analysis and visualization of circRNA data")
13260 (description "Circus is an R package for annotation, analysis and
13261visualization of circRNA data. Users can annotate their circRNA candidates
13262with host genes, gene featrues they are spliced from, and discriminate between
13263known and yet unknown splice junctions. Circular-to-linear ratios of circRNAs
13264can be calculated, and a number of descriptive plots easily generated.")
13265 (license license:artistic2.0)))
13266
fab43c6b
RW
13267(define-public r-loomr
13268 (let ((commit "df0144bd2bbceca6fadef9edc1bbc5ca672d4739")
13269 (revision "1"))
13270 (package
13271 (name "r-loomr")
13272 (version (git-version "0.2.0" revision commit))
13273 (source
13274 (origin
13275 (method git-fetch)
13276 (uri (git-reference
13277 (url "https://github.com/mojaveazure/loomR.git")
13278 (commit commit)))
13279 (file-name (git-file-name name version))
13280 (sha256
13281 (base32
13282 "1b1g4dlmfdyhn56bz1mkh9ymirri43wiz7rjhs7py3y7bdw1s3yr"))))
13283 (build-system r-build-system)
13284 (propagated-inputs
13285 `(("r-r6" ,r-r6)
13286 ("r-hdf5r" ,r-hdf5r)
13287 ("r-iterators" ,r-iterators)
13288 ("r-itertools" ,r-itertools)
13289 ("r-matrix" ,r-matrix)))
13290 (home-page "https://github.com/mojaveazure/loomR")
13291 (synopsis "R interface for loom files")
13292 (description "This package provides an R interface to access, create,
13293and modify loom files. loomR aims to be completely compatible with loompy.")
13294 (license license:gpl3))))
689aacc1
RW
13295
13296(define-public gffread
13297 ;; We cannot use the tagged release because it is not in sync with gclib.
13298 ;; See https://github.com/gpertea/gffread/issues/26
13299 (let ((commit "ba7535fcb3cea55a6e5a491d916e93b454e87fd0")
13300 (revision "1"))
13301 (package
13302 (name "gffread")
13303 (version (git-version "0.9.12" revision commit))
13304 (source
13305 (origin
13306 (method git-fetch)
13307 (uri (git-reference
13308 (url "https://github.com/gpertea/gffread.git")
13309 (commit commit)))
13310 (file-name (git-file-name name version))
13311 (sha256
13312 (base32
13313 "1dl2nbcg96lxpd0drg48ssa8343nf7pw9s9mkrc4mjjmfwsin3ki"))))
13314 (build-system gnu-build-system)
13315 (arguments
13316 `(#:tests? #f ; no check target
13317 #:make-flags
13318 (list "GCLDIR=gclib")
13319 #:phases
13320 (modify-phases %standard-phases
13321 (delete 'configure)
13322 (add-after 'unpack 'copy-gclib-source
13323 (lambda* (#:key inputs #:allow-other-keys)
13324 (mkdir-p "gclib")
13325 (copy-recursively (assoc-ref inputs "gclib-source") "gclib")
13326 #t))
13327 ;; There is no install target
13328 (replace 'install
13329 (lambda* (#:key outputs #:allow-other-keys)
13330 (let* ((out (assoc-ref outputs "out"))
13331 (bin (string-append out "/bin")))
13332 (install-file "gffread" bin))
13333 #t)))))
13334 (native-inputs
13335 `(("gclib-source"
13336 ,(let ((version "0.10.3")
13337 (commit "54917d0849c1e83cfb057b5f712e5cb6a35d948f")
13338 (revision "1"))
13339 (origin
13340 (method git-fetch)
13341 (uri (git-reference
13342 (url "https://github.com/gpertea/gclib.git")
13343 (commit commit)))
13344 (file-name (git-file-name "gclib" version))
13345 (sha256
13346 (base32
13347 "0b51lc0b8syrv7186fd7n8f15rwnf264qgfmm2palrwks1px24mr")))))))
13348 (home-page "https://github.com/gpertea/gffread/")
13349 (synopsis "Parse and convert GFF/GTF files")
13350 (description
13351 "This package provides a GFF/GTF file parsing utility providing format
13352conversions, region filtering, FASTA sequence extraction and more.")
13353 ;; gffread is under Expat, but gclib is under Artistic 2.0
13354 (license (list license:expat
13355 license:artistic2.0)))))
079cdd9c
RW
13356
13357(define-public find-circ
13358 ;; The last release was in 2015. The license was clarified in 2017, so we
13359 ;; take the latest commit.
13360 (let ((commit "8655dca54970fcf7e92e22fbf57e1188724dda7d")
13361 (revision "1"))
13362 (package
13363 (name "find-circ")
13364 (version (git-version "1.2" revision commit))
13365 (source
13366 (origin
13367 (method git-fetch)
13368 (uri (git-reference
13369 (url "https://github.com/marvin-jens/find_circ.git")
13370 (commit commit)))
13371 (file-name (git-file-name name version))
13372 (sha256
13373 (base32
13374 "0p77pbqbclqr4srms34y1b9b4njybfpjiknc11ki84f3p8skb3cg"))))
13375 (build-system gnu-build-system)
13376 (arguments
13377 `(#:tests? #f ; there are none
13378 #:phases
13379 ;; There is no actual build system.
13380 (modify-phases %standard-phases
13381 (delete 'configure)
13382 (delete 'build)
13383 (replace 'install
13384 (lambda* (#:key outputs #:allow-other-keys)
13385 (let* ((out (assoc-ref outputs "out"))
13386 (bin (string-append out "/bin"))
13387 (path (getenv "PYTHONPATH")))
13388 (for-each (lambda (script)
13389 (install-file script bin)
13390 (wrap-program (string-append bin "/" script)
13391 `("PYTHONPATH" ":" prefix (,path))))
13392 '("cmp_bed.py"
13393 "find_circ.py"
13394 "maxlength.py"
13395 "merge_bed.py"
13396 "unmapped2anchors.py")))
13397 #t)))))
13398 (inputs
13399 `(("python2" ,python-2)
13400 ("python2-pysam" ,python2-pysam)
13401 ("python2-numpy" ,python2-numpy)))
13402 (home-page "https://github.com/marvin-jens/find_circ")
13403 (synopsis "circRNA detection from RNA-seq reads")
13404 (description "This package provides tools to detect head-to-tail
13405spliced (back-spliced) sequencing reads, indicative of circular RNA (circRNA)
13406in RNA-seq data.")
13407 (license license:gpl3))))
c7fe888b
RW
13408
13409(define-public python-scanpy
13410 (package
13411 (name "python-scanpy")
13412 (version "1.2.2")
13413 (source
13414 (origin
13415 (method url-fetch)
13416 (uri (pypi-uri "scanpy" version))
13417 (sha256
13418 (base32
13419 "1ak7bxms5a0yvf65prppq2g38clkv7c7jnjbnfpkh3xxv7q512jz"))))
13420 (build-system python-build-system)
13421 (propagated-inputs
13422 `(("python-anndata" ,python-anndata)
13423 ("python-igraph" ,python-igraph)
13424 ("python-numba" ,python-numba)
13425 ("python-joblib" ,python-joblib)
13426 ("python-natsort" ,python-natsort)
13427 ("python-networkx" ,python-networkx)
13428 ("python-statsmodels" ,python-statsmodels)
13429 ("python-scikit-learn" ,python-scikit-learn)
13430 ("python-matplotlib" ,python-matplotlib)
13431 ("python-pandas" ,python-pandas)
13432 ("python-scipy" ,python-scipy)
13433 ("python-seaborn" ,python-seaborn)
13434 ("python-h5py" ,python-h5py)
13435 ("python-tables" ,python-tables)))
13436 (home-page "http://github.com/theislab/scanpy")
13437 (synopsis "Single-Cell Analysis in Python.")
13438 (description "Scanpy is a scalable toolkit for analyzing single-cell gene
13439expression data. It includes preprocessing, visualization, clustering,
13440pseudotime and trajectory inference and differential expression testing. The
13441Python-based implementation efficiently deals with datasets of more than one
13442million cells.")
13443 (license license:bsd-3)))
4f9355c3 13444
13445(define-public gffcompare
13446 (let ((commit "be56ef4349ea3966c12c6397f85e49e047361c41")
13447 (revision "1"))
13448 (package
13449 (name "gffcompare")
13450 (version (git-version "0.10.15" revision commit))
13451 (source
13452 (origin
13453 (method git-fetch)
13454 (uri (git-reference
13455 (url "https://github.com/gpertea/gffcompare/")
13456 (commit commit)))
13457 (file-name (git-file-name name version))
13458 (sha256
13459 (base32 "0cp5qpxdhw4mxpya5dld8wi3jk00zyklm6rcri426wydinrnfmkg"))))
13460 (build-system gnu-build-system)
13461 (arguments
13462 `(#:tests? #f ; no check target
13463 #:phases
13464 (modify-phases %standard-phases
13465 (delete 'configure)
13466 (add-before 'build 'copy-gclib-source
13467 (lambda* (#:key inputs #:allow-other-keys)
13468 (mkdir "../gclib")
13469 (copy-recursively
13470 (assoc-ref inputs "gclib-source") "../gclib")
13471 #t))
13472 (replace 'install
13473 (lambda* (#:key outputs #:allow-other-keys)
13474 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
13475 (install-file "gffcompare" bin)
13476 #t))))))
13477 (native-inputs
13478 `(("gclib-source" ; see 'README.md' of gffcompare
13479 ,(let ((commit "54917d0849c1e83cfb057b5f712e5cb6a35d948f")
13480 (revision "1")
13481 (name "gclib")
13482 (version (git-version "0.10.3" revision commit)))
13483 (origin
13484 (method git-fetch)
13485 (uri (git-reference
13486 (url "https://github.com/gpertea/gclib/")
13487 (commit commit)))
13488 (file-name (git-file-name name version))
13489 (sha256
13490 (base32 "0b51lc0b8syrv7186fd7n8f15rwnf264qgfmm2palrwks1px24mr")))))))
13491 (home-page "https://github.com/gpertea/gffcompare/")
13492 (synopsis "Tool for comparing or classifing transcripts of RNA-Seq")
13493 (description
13494 "@code{gffcompare} is a tool that can:
13495@enumerate
13496@item compare and evaluate the accuracy of RNA-Seq transcript assemblers
13497(Cufflinks, Stringtie);
13498@item collapse (merge) duplicate transcripts from multiple GTF/GFF3 files (e.g.
13499resulted from assembly of different samples);
13500@item classify transcripts from one or multiple GTF/GFF3 files as they relate to
13501reference transcripts provided in a annotation file (also in GTF/GFF3 format).
13502@end enumerate")
13503 (license
13504 (list
13505 license:expat ;license for gffcompare
13506 license:artistic2.0))))) ;license for gclib
a9d496b3
RW
13507
13508(define-public python-intervaltree
13509 (package
13510 (name "python-intervaltree")
13511 (version "2.1.0")
13512 (source
13513 (origin
13514 (method url-fetch)
13515 (uri (pypi-uri "intervaltree" version))
13516 (sha256
13517 (base32
13518 "02w191m9zxkcjqr1kv2slxvhymwhj3jnsyy3a28b837pi15q19dc"))))
13519 (build-system python-build-system)
13520 ;; FIXME: error when collecting tests
13521 (arguments '(#:tests? #f))
13522 (propagated-inputs
13523 `(("python-sortedcontainers" ,python-sortedcontainers)))
13524 (native-inputs
13525 `(("python-pytest" ,python-pytest)))
13526 (home-page "https://github.com/chaimleib/intervaltree")
13527 (synopsis "Editable interval tree data structure")
13528 (description
13529 "This package provides a mutable, self-balancing interval tree
13530implementation for Python. Queries may be by point, by range overlap, or by
13531range envelopment. This library was designed to allow tagging text and time
13532intervals, where the intervals include the lower bound but not the upper
13533bound.")
13534 (license license:asl2.0)))
1f41f01c
RW
13535
13536(define-public python-pypairix
13537 (package
13538 (name "python-pypairix")
13539 (version "0.3.6")
13540 (source
13541 (origin
13542 (method url-fetch)
13543 (uri (pypi-uri "pypairix" version))
13544 (sha256
13545 (base32
13546 "0zs92b74s5v4xy2h16s15f3z6l4nnbw8x8zyif7xx5xpafjn0xss"))))
13547 (build-system python-build-system)
13548 ;; FIXME: the tests fail because test.support cannot be loaded:
13549 ;; ImportError: cannot import name 'support'
13550 (arguments '(#:tests? #f))
13551 (inputs
13552 `(("zlib" ,zlib)))
13553 (home-page "https://github.com/4dn-dcic/pairix")
13554 (synopsis "Support for querying pairix-indexed bgzipped text files")
13555 (description
13556 "Pypairix is a Python module for fast querying on a pairix-indexed
13557bgzipped text file that contains a pair of genomic coordinates per line.")
13558 (license license:expat)))
80f4db91
RW
13559
13560(define-public python-pyfaidx
13561 (package
13562 (name "python-pyfaidx")
13563 (version "0.5.4.2")
13564 (source
13565 (origin
13566 (method url-fetch)
13567 (uri (pypi-uri "pyfaidx" version))
13568 (sha256
13569 (base32
13570 "0y5zyjksj1rdglj601xd2bbni5abhdh622y3ck76chyzxz9z4rx8"))))
13571 (build-system python-build-system)
13572 (propagated-inputs
13573 `(("python-setuptools" ,python-setuptools)
13574 ("python-six" ,python-six)))
13575 (home-page "http://mattshirley.com")
13576 (synopsis "Random access to fasta subsequences")
13577 (description
13578 "This package provides procedures for efficient pythonic random access to
13579fasta subsequences.")
13580 (license license:bsd-3)))
5bb9e0af
RW
13581
13582(define-public python-cooler
13583 (package
13584 (name "python-cooler")
13585 (version "0.7.11")
13586 (source
13587 (origin
13588 (method url-fetch)
13589 (uri (pypi-uri "cooler" version))
13590 (sha256
13591 (base32
13592 "08k5nxnxa6qsbk15z5z0q01n28042k87wi4905hh95rzqib15mhx"))))
13593 (build-system python-build-system)
13594 (propagated-inputs
13595 `(("python-biopython" ,python-biopython)
13596 ("python-click" ,python-click)
13597 ("python-cytoolz" ,python-cytoolz)
13598 ("python-dask" ,python-dask)
13599 ("python-h5py" ,python-h5py)
13600 ("python-multiprocess" ,python-multiprocess)
13601 ("python-pandas" ,python-pandas)
13602 ("python-pyfaidx" ,python-pyfaidx)
13603 ("python-pypairix" ,python-pypairix)
13604 ("python-pysam" ,python-pysam)
13605 ("python-scipy" ,python-scipy)))
13606 (native-inputs
13607 `(("python-mock" ,python-mock)
13608 ("python-nose" ,python-nose)
13609 ("python-numpydoc" ,python-numpydoc)
13610 ("python-sphinx" ,python-sphinx)))
13611 (home-page "https://github.com/mirnylab/cooler")
13612 (synopsis "Sparse binary format for genomic interaction matrices")
13613 (description
13614 "Cooler is a support library for a sparse, compressed, binary persistent
13615storage format, called @code{cool}, used to store genomic interaction data,
13616such as Hi-C contact matrices.")
13617 (license license:bsd-3)))
1189c7f1
RW
13618
13619(define-public python-hicexplorer
13620 (package
13621 (name "python-hicexplorer")
13622 (version "2.1.4")
13623 (source
13624 (origin
13625 ;; The latest version is not available on Pypi.
13626 (method git-fetch)
13627 (uri (git-reference
13628 (url "https://github.com/deeptools/HiCExplorer.git")
13629 (commit version)))
13630 (file-name (git-file-name name version))
13631 (sha256
13632 (base32
13633 "0q5gpbzmrkvygqgw524q36b4nrivcmyi5v194vsx0qw7b3gcmq08"))))
13634 (build-system python-build-system)
13635 (arguments
13636 `(#:phases
13637 (modify-phases %standard-phases
13638 (add-after 'unpack 'loosen-up-requirements
13639 (lambda _
13640 (substitute* "setup.py"
13641 (("==") ">="))
13642 #t)))))
13643 (propagated-inputs
13644 `(("python-biopython" ,python-biopython)
13645 ("python-configparser" ,python-configparser)
13646 ("python-cooler" ,python-cooler)
13647 ("python-future" ,python-future)
13648 ("python-intervaltree" ,python-intervaltree)
13649 ("python-jinja2" ,python-jinja2)
13650 ("python-matplotlib" ,python-matplotlib)
13651 ("python-numpy" ,python-numpy)
13652 ("python-pandas" ,python-pandas)
13653 ("python-pybigwig" ,python-pybigwig)
13654 ("python-pysam" ,python-pysam)
13655 ("python-scipy" ,python-scipy)
13656 ("python-six" ,python-six)
13657 ("python-tables" ,python-tables)
13658 ("python-unidecode" ,python-unidecode)))
13659 (home-page "http://hicexplorer.readthedocs.io")
13660 (synopsis "Process, analyze and visualize Hi-C data")
13661 (description
13662 "HiCExplorer is a powerful and easy to use set of tools to process,
13663normalize and visualize Hi-C data. HiCExplorer facilitates the creation of
13664contact matrices, correction of contacts, TAD detection, A/B compartments,
13665merging, reordering or chromosomes, conversion from different formats
13666including cooler and detection of long-range contacts. Moreover, it allows
13667the visualization of multiple contact matrices along with other types of data
13668like genes, compartments, ChIP-seq coverage tracks (and in general any type of
13669genomic scores), long range contacts and the visualization of viewpoints.")
13670 (license license:gpl3)))
5bfa7510
RW
13671
13672(define-public python-pygenometracks
13673 (package
13674 (name "python-pygenometracks")
13675 (version "2.0")
13676 (source
13677 (origin
13678 (method url-fetch)
13679 (uri (pypi-uri "pyGenomeTracks" version))
13680 (sha256
13681 (base32
13682 "1fws6bqsyy9kj3qiabhkqx4wd4i775gsxnhszqd3zg7w67sc1ic5"))))
13683 (build-system python-build-system)
13684 (propagated-inputs
13685 `(("python-configparser" ,python-configparser)
13686 ("python-future" ,python-future)
13687 ("python-hicexplorer" ,python-hicexplorer)
13688 ("python-intervaltree" ,python-intervaltree)
13689 ("python-matplotlib" ,python-matplotlib)
13690 ("python-numpy" ,python-numpy)
13691 ("python-pybigwig" ,python-pybigwig)))
13692 (native-inputs
13693 `(("python-pytest" ,python-pytest)))
13694 (home-page "https://pygenometracks.readthedocs.io")
13695 (synopsis "Program and library to plot beautiful genome browser tracks")
13696 (description
13697 "This package aims to produce high-quality genome browser tracks that
13698are highly customizable. Currently, it is possible to plot: bigwig, bed (many
13699options), bedgraph, links (represented as arcs), and Hi-C matrices.
13700pyGenomeTracks can make plots with or without Hi-C data.")
13701 (license license:gpl3+)))
1dd153b3
RW
13702
13703(define-public python-hic2cool
13704 (package
13705 (name "python-hic2cool")
13706 (version "0.4.2")
13707 (source
13708 (origin
13709 (method url-fetch)
13710 (uri (pypi-uri "hic2cool" version))
13711 (sha256
13712 (base32
13713 "0xy6mhfns2lzib1kcr6419jjp6pmh0qx8z8na55lmiwn0ds8q9cl"))))
13714 (build-system python-build-system)
13715 (arguments '(#:tests? #f)) ; no tests included
13716 (propagated-inputs
13717 `(("python-cooler" ,python-cooler)))
13718 (home-page "https://github.com/4dn-dcic/hic2cool")
13719 (synopsis "Converter for .hic and .cool files")
13720 (description
13721 "This package provides a converter between @code{.hic} files (from
13722juicer) and single-resolution or multi-resolution @code{.cool} files (for
13723cooler). Both @code{hic} and @code{cool} files describe Hi-C contact
13724matrices.")
13725 (license license:expat)))