gnu: webkitgtk: Update to 2.28.2.
[jackhill/guix/guix.git] / gnu / packages / bioinformatics.scm
1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ricardo Wurmus <rekado@elephly.net>
3 ;;; Copyright © 2015, 2016, 2017, 2018 Ben Woodcroft <donttrustben@gmail.com>
4 ;;; Copyright © 2015, 2016 Pjotr Prins <pjotr.guix@thebird.nl>
5 ;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
6 ;;; Copyright © 2016 Roel Janssen <roel@gnu.org>
7 ;;; Copyright © 2016, 2017, 2018, 2019, 2020 Efraim Flashner <efraim@flashner.co.il>
8 ;;; Copyright © 2016, 2020 Marius Bakke <mbakke@fastmail.com>
9 ;;; Copyright © 2016, 2018 Raoul Bonnal <ilpuccio.febo@gmail.com>
10 ;;; Copyright © 2017, 2018 Tobias Geerinckx-Rice <me@tobias.gr>
11 ;;; Copyright © 2017 Arun Isaac <arunisaac@systemreboot.net>
12 ;;; Copyright © 2018 Joshua Sierles, Nextjournal <joshua@nextjournal.com>
13 ;;; Copyright © 2018 Gábor Boskovits <boskovits@gmail.com>
14 ;;; Copyright © 2018, 2019 Mădălin Ionel Patrașcu <madalinionel.patrascu@mdc-berlin.de>
15 ;;; Copyright © 2019 Maxim Cournoyer <maxim.cournoyer@gmail.com>
16 ;;; Copyright © 2019 Brian Leung <bkleung89@gmail.com>
17 ;;; Copyright © 2019 Brett Gilio <brettg@gnu.org>
18 ;;; Copyright © 2020 Björn Höfling <bjoern.hoefling@bjoernhoefling.de>
19 ;;; Copyright © 2020 Jakub Kądziołka <kuba@kadziolka.net>
20 ;;;
21 ;;; This file is part of GNU Guix.
22 ;;;
23 ;;; GNU Guix is free software; you can redistribute it and/or modify it
24 ;;; under the terms of the GNU General Public License as published by
25 ;;; the Free Software Foundation; either version 3 of the License, or (at
26 ;;; your option) any later version.
27 ;;;
28 ;;; GNU Guix is distributed in the hope that it will be useful, but
29 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
30 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 ;;; GNU General Public License for more details.
32 ;;;
33 ;;; You should have received a copy of the GNU General Public License
34 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
35
36 (define-module (gnu packages bioinformatics)
37 #:use-module ((guix licenses) #:prefix license:)
38 #:use-module (guix packages)
39 #:use-module (guix utils)
40 #:use-module (guix download)
41 #:use-module (guix git-download)
42 #:use-module (guix hg-download)
43 #:use-module (guix build-system ant)
44 #:use-module (guix build-system gnu)
45 #:use-module (guix build-system cmake)
46 #:use-module (guix build-system go)
47 #:use-module (guix build-system haskell)
48 #:use-module (guix build-system meson)
49 #:use-module (guix build-system ocaml)
50 #:use-module (guix build-system perl)
51 #:use-module (guix build-system python)
52 #:use-module (guix build-system r)
53 #:use-module (guix build-system ruby)
54 #:use-module (guix build-system scons)
55 #:use-module (guix build-system trivial)
56 #:use-module (gnu packages)
57 #:use-module (gnu packages autotools)
58 #:use-module (gnu packages algebra)
59 #:use-module (gnu packages base)
60 #:use-module (gnu packages bash)
61 #:use-module (gnu packages bison)
62 #:use-module (gnu packages bioconductor)
63 #:use-module (gnu packages boost)
64 #:use-module (gnu packages check)
65 #:use-module (gnu packages code)
66 #:use-module (gnu packages compression)
67 #:use-module (gnu packages cpio)
68 #:use-module (gnu packages cran)
69 #:use-module (gnu packages curl)
70 #:use-module (gnu packages documentation)
71 #:use-module (gnu packages databases)
72 #:use-module (gnu packages datastructures)
73 #:use-module (gnu packages dlang)
74 #:use-module (gnu packages file)
75 #:use-module (gnu packages flex)
76 #:use-module (gnu packages gawk)
77 #:use-module (gnu packages gcc)
78 #:use-module (gnu packages gd)
79 #:use-module (gnu packages golang)
80 #:use-module (gnu packages glib)
81 #:use-module (gnu packages graph)
82 #:use-module (gnu packages groff)
83 #:use-module (gnu packages gtk)
84 #:use-module (gnu packages guile)
85 #:use-module (gnu packages guile-xyz)
86 #:use-module (gnu packages haskell-check)
87 #:use-module (gnu packages haskell-web)
88 #:use-module (gnu packages haskell-xyz)
89 #:use-module (gnu packages image)
90 #:use-module (gnu packages imagemagick)
91 #:use-module (gnu packages java)
92 #:use-module (gnu packages java-compression)
93 #:use-module (gnu packages jemalloc)
94 #:use-module (gnu packages linux)
95 #:use-module (gnu packages lisp-xyz)
96 #:use-module (gnu packages logging)
97 #:use-module (gnu packages machine-learning)
98 #:use-module (gnu packages man)
99 #:use-module (gnu packages maths)
100 #:use-module (gnu packages mpi)
101 #:use-module (gnu packages ncurses)
102 #:use-module (gnu packages ocaml)
103 #:use-module (gnu packages pcre)
104 #:use-module (gnu packages parallel)
105 #:use-module (gnu packages pdf)
106 #:use-module (gnu packages perl)
107 #:use-module (gnu packages perl-check)
108 #:use-module (gnu packages pkg-config)
109 #:use-module (gnu packages popt)
110 #:use-module (gnu packages protobuf)
111 #:use-module (gnu packages python)
112 #:use-module (gnu packages python-compression)
113 #:use-module (gnu packages python-science)
114 #:use-module (gnu packages python-web)
115 #:use-module (gnu packages python-xyz)
116 #:use-module (gnu packages readline)
117 #:use-module (gnu packages ruby)
118 #:use-module (gnu packages serialization)
119 #:use-module (gnu packages shells)
120 #:use-module (gnu packages sphinx)
121 #:use-module (gnu packages statistics)
122 #:use-module (gnu packages swig)
123 #:use-module (gnu packages tbb)
124 #:use-module (gnu packages tex)
125 #:use-module (gnu packages texinfo)
126 #:use-module (gnu packages textutils)
127 #:use-module (gnu packages time)
128 #:use-module (gnu packages tls)
129 #:use-module (gnu packages vim)
130 #:use-module (gnu packages web)
131 #:use-module (gnu packages xml)
132 #:use-module (gnu packages xorg)
133 #:use-module (srfi srfi-1)
134 #:use-module (ice-9 match))
135
136 (define-public aragorn
137 (package
138 (name "aragorn")
139 (version "1.2.38")
140 (source (origin
141 (method url-fetch)
142 (uri (string-append
143 "http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
144 version ".tgz"))
145 (sha256
146 (base32
147 "09i1rg716smlbnixfm7q1ml2mfpaa2fpn3hwjg625ysmfwwy712b"))))
148 (build-system gnu-build-system)
149 (arguments
150 `(#:tests? #f ; there are no tests
151 #:phases
152 (modify-phases %standard-phases
153 (delete 'configure)
154 (replace 'build
155 (lambda _
156 (invoke "gcc"
157 "-O3"
158 "-ffast-math"
159 "-finline-functions"
160 "-o"
161 "aragorn"
162 (string-append "aragorn" ,version ".c"))
163 #t))
164 (replace 'install
165 (lambda* (#:key outputs #:allow-other-keys)
166 (let* ((out (assoc-ref outputs "out"))
167 (bin (string-append out "/bin"))
168 (man (string-append out "/share/man/man1")))
169 (install-file "aragorn" bin)
170 (install-file "aragorn.1" man))
171 #t)))))
172 (home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
173 (synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
174 (description
175 "Aragorn identifies transfer RNA, mitochondrial RNA and
176 transfer-messenger RNA from nucleotide sequences, based on homology to known
177 tRNA consensus sequences and RNA structure. It also outputs the secondary
178 structure of the predicted RNA.")
179 (license license:gpl2)))
180
181 (define-public bamm
182 (package
183 (name "bamm")
184 (version "1.7.3")
185 (source (origin
186 (method git-fetch)
187 ;; BamM is not available on pypi.
188 (uri (git-reference
189 (url "https://github.com/Ecogenomics/BamM.git")
190 (commit version)
191 (recursive? #t)))
192 (file-name (git-file-name name version))
193 (sha256
194 (base32
195 "1p83ahi984ipslxlg4yqy1gdnya9rkn1v71z8djgxkm9d2chw4c5"))
196 (modules '((guix build utils)))
197 (snippet
198 `(begin
199 ;; Delete bundled htslib.
200 (delete-file-recursively "c/htslib-1.3.1")
201 #t))))
202 (build-system python-build-system)
203 (arguments
204 `(#:python ,python-2 ; BamM is Python 2 only.
205 ;; Do not use bundled libhts. Do use the bundled libcfu because it has
206 ;; been modified from its original form.
207 #:configure-flags
208 (let ((htslib (assoc-ref %build-inputs "htslib")))
209 (list "--with-libhts-lib" (string-append htslib "/lib")
210 "--with-libhts-inc" (string-append htslib "/include/htslib")))
211 #:phases
212 (modify-phases %standard-phases
213 (add-after 'unpack 'autogen
214 (lambda _
215 (with-directory-excursion "c"
216 (let ((sh (which "sh")))
217 (for-each make-file-writable (find-files "." ".*"))
218 ;; Use autogen so that 'configure' works.
219 (substitute* "autogen.sh" (("/bin/sh") sh))
220 (setenv "CONFIG_SHELL" sh)
221 (invoke "./autogen.sh")))
222 #t))
223 (delete 'build)
224 ;; Run tests after installation so compilation only happens once.
225 (delete 'check)
226 (add-after 'install 'wrap-executable
227 (lambda* (#:key outputs #:allow-other-keys)
228 (let* ((out (assoc-ref outputs "out"))
229 (path (getenv "PATH")))
230 (wrap-program (string-append out "/bin/bamm")
231 `("PATH" ":" prefix (,path))))
232 #t))
233 (add-after 'wrap-executable 'post-install-check
234 (lambda* (#:key inputs outputs #:allow-other-keys)
235 (setenv "PATH"
236 (string-append (assoc-ref outputs "out")
237 "/bin:"
238 (getenv "PATH")))
239 (setenv "PYTHONPATH"
240 (string-append
241 (assoc-ref outputs "out")
242 "/lib/python"
243 (string-take (string-take-right
244 (assoc-ref inputs "python") 5) 3)
245 "/site-packages:"
246 (getenv "PYTHONPATH")))
247 ;; There are 2 errors printed, but they are safe to ignore:
248 ;; 1) [E::hts_open_format] fail to open file ...
249 ;; 2) samtools view: failed to open ...
250 (invoke "nosetests")
251 #t)))))
252 (native-inputs
253 `(("autoconf" ,autoconf)
254 ("automake" ,automake)
255 ("libtool" ,libtool)
256 ("zlib" ,zlib)
257 ("python-nose" ,python2-nose)
258 ("python-pysam" ,python2-pysam)))
259 (inputs
260 `(("htslib" ,htslib-1.3) ; At least one test fails on htslib-1.4+.
261 ("samtools" ,samtools)
262 ("bwa" ,bwa)
263 ("grep" ,grep)
264 ("sed" ,sed)
265 ("coreutils" ,coreutils)))
266 (propagated-inputs
267 `(("python-numpy" ,python2-numpy)))
268 (home-page "https://ecogenomics.github.io/BamM/")
269 (synopsis "Metagenomics-focused BAM file manipulator")
270 (description
271 "BamM is a C library, wrapped in python, to efficiently generate and
272 parse BAM files, specifically for the analysis of metagenomic data. For
273 instance, it implements several methods to assess contig-wise read coverage.")
274 (license license:lgpl3+)))
275
276 (define-public bamtools
277 (package
278 (name "bamtools")
279 (version "2.5.1")
280 (source (origin
281 (method git-fetch)
282 (uri (git-reference
283 (url "https://github.com/pezmaster31/bamtools.git")
284 (commit (string-append "v" version))))
285 (file-name (git-file-name name version))
286 (sha256
287 (base32
288 "0nfb2ypcx9959xnbz6wxh6py3xfizgmg8nrknxl95c507m9hmq8b"))))
289 (build-system cmake-build-system)
290 (arguments
291 `(#:tests? #f ;no "check" target
292 #:phases
293 (modify-phases %standard-phases
294 (add-before
295 'configure 'set-ldflags
296 (lambda* (#:key outputs #:allow-other-keys)
297 (setenv "LDFLAGS"
298 (string-append
299 "-Wl,-rpath="
300 (assoc-ref outputs "out") "/lib/bamtools"))
301 #t)))))
302 (inputs `(("zlib" ,zlib)))
303 (home-page "https://github.com/pezmaster31/bamtools")
304 (synopsis "C++ API and command-line toolkit for working with BAM data")
305 (description
306 "BamTools provides both a C++ API and a command-line toolkit for handling
307 BAM files.")
308 (license license:expat)))
309
310 (define-public bcftools
311 (package
312 (name "bcftools")
313 (version "1.9")
314 (source (origin
315 (method url-fetch)
316 (uri (string-append "https://github.com/samtools/bcftools/"
317 "releases/download/"
318 version "/bcftools-" version ".tar.bz2"))
319 (sha256
320 (base32
321 "1j3h638i8kgihzyrlnpj82xg1b23sijibys9hvwari3fy7kd0dkg"))
322 (modules '((guix build utils)))
323 (snippet '(begin
324 ;; Delete bundled htslib.
325 (delete-file-recursively "htslib-1.9")
326 #t))))
327 (build-system gnu-build-system)
328 (arguments
329 `(#:configure-flags
330 (list "--enable-libgsl")
331 #:test-target "test"
332 #:phases
333 (modify-phases %standard-phases
334 (add-before 'check 'patch-tests
335 (lambda _
336 (substitute* "test/test.pl"
337 (("/bin/bash") (which "bash")))
338 #t)))))
339 (native-inputs
340 `(("htslib" ,htslib)
341 ("perl" ,perl)))
342 (inputs
343 `(("gsl" ,gsl)
344 ("zlib" ,zlib)))
345 (home-page "https://samtools.github.io/bcftools/")
346 (synopsis "Utilities for variant calling and manipulating VCFs and BCFs")
347 (description
348 "BCFtools is a set of utilities that manipulate variant calls in the
349 Variant Call Format (VCF) and its binary counterpart BCF. All commands work
350 transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed.")
351 ;; The sources are dual MIT/GPL, but becomes GPL-only when USE_GPL=1.
352 (license (list license:gpl3+ license:expat))))
353
354 (define-public bedops
355 (package
356 (name "bedops")
357 (version "2.4.35")
358 (source (origin
359 (method git-fetch)
360 (uri (git-reference
361 (url "https://github.com/bedops/bedops.git")
362 (commit (string-append "v" version))))
363 (file-name (git-file-name name version))
364 (sha256
365 (base32
366 "0mmgsgwz5r9w76hzgxkxc9s9lkdhhaf7vr6i02b09vbswvs1fyqx"))))
367 (build-system gnu-build-system)
368 (arguments
369 '(#:tests? #f
370 #:make-flags (list (string-append "BINDIR=" %output "/bin"))
371 #:phases
372 (modify-phases %standard-phases
373 (add-after 'unpack 'unpack-tarballs
374 (lambda _
375 ;; FIXME: Bedops includes tarballs of minimally patched upstream
376 ;; libraries jansson, zlib, and bzip2. We cannot just use stock
377 ;; libraries because at least one of the libraries (zlib) is
378 ;; patched to add a C++ function definition (deflateInit2cpp).
379 ;; Until the Bedops developers offer a way to link against system
380 ;; libraries we have to build the in-tree copies of these three
381 ;; libraries.
382
383 ;; See upstream discussion:
384 ;; https://github.com/bedops/bedops/issues/124
385
386 ;; Unpack the tarballs to benefit from shebang patching.
387 (with-directory-excursion "third-party"
388 (invoke "tar" "xvf" "jansson-2.6.tar.bz2")
389 (invoke "tar" "xvf" "zlib-1.2.7.tar.bz2")
390 (invoke "tar" "xvf" "bzip2-1.0.6.tar.bz2"))
391 ;; Disable unpacking of tarballs in Makefile.
392 (substitute* "system.mk/Makefile.linux"
393 (("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
394 (("\\./configure") "CONFIG_SHELL=bash ./configure"))
395 (substitute* "third-party/zlib-1.2.7/Makefile.in"
396 (("^SHELL=.*$") "SHELL=bash\n"))
397 #t))
398 (delete 'configure))))
399 (home-page "https://github.com/bedops/bedops")
400 (synopsis "Tools for high-performance genomic feature operations")
401 (description
402 "BEDOPS is a suite of tools to address common questions raised in genomic
403 studies---mostly with regard to overlap and proximity relationships between
404 data sets. It aims to be scalable and flexible, facilitating the efficient
405 and accurate analysis and management of large-scale genomic data.
406
407 BEDOPS provides tools that perform highly efficient and scalable Boolean and
408 other set operations, statistical calculations, archiving, conversion and
409 other management of genomic data of arbitrary scale. Tasks can be easily
410 split by chromosome for distributing whole-genome analyses across a
411 computational cluster.")
412 (license license:gpl2+)))
413
414 (define-public bedtools
415 (package
416 (name "bedtools")
417 (version "2.29.2")
418 (source (origin
419 (method url-fetch)
420 (uri (string-append "https://github.com/arq5x/bedtools2/releases/"
421 "download/v" version "/"
422 "bedtools-" version ".tar.gz"))
423 (sha256
424 (base32
425 "0m3hk6548846w83a9s5drsczvy67n2azx41kj71n03klb2gbzwg3"))))
426 (build-system gnu-build-system)
427 (arguments
428 '(#:test-target "test"
429 #:make-flags
430 (list (string-append "prefix=" (assoc-ref %outputs "out")))
431 #:phases
432 (modify-phases %standard-phases
433 (delete 'configure))))
434 (native-inputs
435 `(("python" ,python-wrapper)))
436 (inputs
437 `(("samtools" ,samtools)
438 ("zlib" ,zlib)))
439 (home-page "https://github.com/arq5x/bedtools2")
440 (synopsis "Tools for genome analysis and arithmetic")
441 (description
442 "Collectively, the bedtools utilities are a swiss-army knife of tools for
443 a wide-range of genomics analysis tasks. The most widely-used tools enable
444 genome arithmetic: that is, set theory on the genome. For example, bedtools
445 allows one to intersect, merge, count, complement, and shuffle genomic
446 intervals from multiple files in widely-used genomic file formats such as BAM,
447 BED, GFF/GTF, VCF.")
448 (license license:expat)))
449
450 ;; Later releases of bedtools produce files with more columns than
451 ;; what Ribotaper expects.
452 (define-public bedtools-2.18
453 (package (inherit bedtools)
454 (name "bedtools")
455 (version "2.18.0")
456 (source (origin
457 (method url-fetch)
458 (uri (string-append "https://github.com/arq5x/bedtools2/"
459 "releases/download/v" version
460 "/bedtools-" version ".tar.gz"))
461 (sha256
462 (base32
463 "11rvca19ncg03kxd0wzlfx5ws7r3nisd0z8s9j9n182d8ksp2pxz"))))
464 (arguments
465 '(#:test-target "test"
466 #:phases
467 (modify-phases %standard-phases
468 (delete 'configure)
469 (replace 'install
470 (lambda* (#:key outputs #:allow-other-keys)
471 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
472 (for-each (lambda (file)
473 (install-file file bin))
474 (find-files "bin" ".*")))
475 #t)))))))
476
477 ;; Needed for pybedtools.
478 (define-public bedtools-2.26
479 (package (inherit bedtools)
480 (name "bedtools")
481 (version "2.26.0")
482 (source (origin
483 (method url-fetch)
484 (uri (string-append "https://github.com/arq5x/bedtools2/releases/"
485 "download/v" version "/"
486 "bedtools-" version ".tar.gz"))
487 (sha256
488 (base32
489 "0jhavwifnf7lmkb11h9y7dynr8d699h0rd2l52j1pfgircr2zwv5"))))))
490
491 (define-public pbbam
492 (package
493 (name "pbbam")
494 (version "0.23.0")
495 (source (origin
496 (method git-fetch)
497 (uri (git-reference
498 (url "https://github.com/PacificBiosciences/pbbam.git")
499 (commit version)))
500 (file-name (git-file-name name version))
501 (sha256
502 (base32
503 "0h9gkrpf2lrxklxp72xfl5bi3h5zcm5hprrya9gf0hr3xwlbpp0x"))))
504 (build-system meson-build-system)
505 (arguments
506 `(#:phases
507 (modify-phases %standard-phases
508 (add-after 'unpack 'find-googletest
509 (lambda* (#:key inputs #:allow-other-keys)
510 ;; It doesn't find gtest_main because there's no pkg-config file
511 ;; for it. Find it another way.
512 (substitute* "tests/meson.build"
513 (("pbbam_gtest_dep = dependency\\('gtest_main'.*")
514 (format #f "cpp = meson.get_compiler('cpp')
515 pbbam_gtest_dep = cpp.find_library('gtest_main', dirs : '~a')\n"
516 (assoc-ref inputs "googletest"))))
517 #t)))
518 ;; TODO: tests/pbbam_test cannot be linked
519 ;; ld: tests/59830eb@@pbbam_test@exe/src_test_Accuracy.cpp.o:
520 ;; undefined reference to symbol '_ZTIN7testing4TestE'
521 ;; ld: /gnu/store/...-googletest-1.8.0/lib/libgtest.so:
522 ;; error adding symbols: DSO missing from command line
523 #:tests? #f
524 #:configure-flags '("-Dtests=false")))
525 ;; These libraries are listed as "Required" in the pkg-config file.
526 (propagated-inputs
527 `(("htslib" ,htslib)
528 ("zlib" ,zlib)))
529 (inputs
530 `(("boost" ,boost)
531 ("samtools" ,samtools)))
532 (native-inputs
533 `(("googletest" ,googletest)
534 ("pkg-config" ,pkg-config)
535 ("python" ,python-wrapper))) ; for tests
536 (home-page "https://github.com/PacificBiosciences/pbbam")
537 (synopsis "Work with PacBio BAM files")
538 (description
539 "The pbbam software package provides components to create, query, and
540 edit PacBio BAM files and associated indices. These components include a core
541 C++ library, bindings for additional languages, and command-line utilities.
542 This library is not intended to be used as a general-purpose BAM utility - all
543 input and output BAMs must adhere to the PacBio BAM format specification.
544 Non-PacBio BAMs will cause exceptions to be thrown.")
545 (license license:bsd-3)))
546
547 (define-public blasr-libcpp
548 (package
549 (name "blasr-libcpp")
550 (version "5.3.3")
551 (source (origin
552 (method git-fetch)
553 (uri (git-reference
554 (url "https://github.com/PacificBiosciences/blasr_libcpp.git")
555 (commit version)))
556 (file-name (git-file-name name version))
557 (sha256
558 (base32
559 "0cn5l42zyq67sj0g2imqkhayz2iqvv0a1pgpbmlq0qynjmsrbfd2"))))
560 (build-system meson-build-system)
561 (arguments
562 `(#:phases
563 (modify-phases %standard-phases
564 (add-after 'unpack 'link-with-hdf5
565 (lambda* (#:key inputs #:allow-other-keys)
566 (let ((hdf5 (assoc-ref inputs "hdf5")))
567 (substitute* "meson.build"
568 (("libblasr_deps = \\[" m)
569 (string-append
570 m
571 (format #f "cpp.find_library('hdf5', dirs : '~a'), \
572 cpp.find_library('hdf5_cpp', dirs : '~a'), "
573 hdf5 hdf5)))))
574 #t))
575 (add-after 'unpack 'find-googletest
576 (lambda* (#:key inputs #:allow-other-keys)
577 ;; It doesn't find gtest_main because there's no pkg-config file
578 ;; for it. Find it another way.
579 (substitute* "unittest/meson.build"
580 (("libblasr_gtest_dep = dependency\\('gtest_main'.*")
581 (format #f "cpp = meson.get_compiler('cpp')
582 libblasr_gtest_dep = cpp.find_library('gtest_main', dirs : '~a')\n"
583 (assoc-ref inputs "googletest"))))
584 #t)))
585 ;; TODO: unittest/libblasr_unittest cannot be linked
586 ;; ld: ;; unittest/df08227@@libblasr_unittest@exe/alignment_utils_FileUtils_gtest.cpp.o:
587 ;; undefined reference to symbol
588 ;; '_ZN7testing8internal9DeathTest6CreateEPKcPKNS0_2REES3_iPPS1_'
589 ;; ld: /gnu/store/...-googletest-1.8.0/lib/libgtest.so:
590 ;; error adding symbols: DSO missing from command line
591 #:tests? #f
592 #:configure-flags '("-Dtests=false")))
593 (inputs
594 `(("boost" ,boost)
595 ("hdf5" ,hdf5)
596 ("pbbam" ,pbbam)
597 ("zlib" ,zlib)))
598 (native-inputs
599 `(("googletest" ,googletest)
600 ("pkg-config" ,pkg-config)))
601 (home-page "https://github.com/PacificBiosciences/blasr_libcpp")
602 (synopsis "Library for analyzing PacBio genomic sequences")
603 (description
604 "This package provides three libraries used by applications for analyzing
605 PacBio genomic sequences. This library contains three sub-libraries: pbdata,
606 hdf and alignment.")
607 (license license:bsd-3)))
608
609 (define-public blasr
610 (package
611 (name "blasr")
612 (version "5.3.3")
613 (source (origin
614 (method git-fetch)
615 (uri (git-reference
616 (url "https://github.com/PacificBiosciences/blasr.git")
617 (commit version)))
618 (file-name (git-file-name name version))
619 (sha256
620 (base32
621 "1skgy2mvz8gsgfh1gc2nfgwvpyzb1hpmp2cf2773h5wsj8nw22kl"))))
622 (build-system meson-build-system)
623 (arguments
624 `(#:phases
625 (modify-phases %standard-phases
626 (add-after 'unpack 'link-with-hdf5
627 (lambda* (#:key inputs #:allow-other-keys)
628 (let ((hdf5 (assoc-ref inputs "hdf5")))
629 (substitute* "meson.build"
630 (("blasr_deps = \\[" m)
631 (string-append
632 m
633 (format #f "cpp.find_library('hdf5', dirs : '~a'), \
634 cpp.find_library('hdf5_cpp', dirs : '~a'), "
635 hdf5 hdf5)))))
636 #t)))
637 ;; Tests require "cram" executable, which is not packaged.
638 #:tests? #f
639 #:configure-flags '("-Dtests=false")))
640 (inputs
641 `(("boost" ,boost)
642 ("blasr-libcpp" ,blasr-libcpp)
643 ("hdf5" ,hdf5)
644 ("pbbam" ,pbbam)
645 ("zlib" ,zlib)))
646 (native-inputs
647 `(("pkg-config" ,pkg-config)))
648 (home-page "https://github.com/PacificBiosciences/blasr")
649 (synopsis "PacBio long read aligner")
650 (description
651 "Blasr is a genomic sequence aligner for processing PacBio long reads.")
652 (license license:bsd-3)))
653
654 (define-public ribotaper
655 (package
656 (name "ribotaper")
657 (version "1.3.1")
658 (source (origin
659 (method url-fetch)
660 (uri (string-append "https://ohlerlab.mdc-berlin.de/"
661 "files/RiboTaper/RiboTaper_Version_"
662 version ".tar.gz"))
663 (sha256
664 (base32
665 "0ykjbps1y3z3085q94npw8i9x5gldc6shy8vlc08v76zljsm07hv"))))
666 (build-system gnu-build-system)
667 (arguments
668 `(#:phases
669 (modify-phases %standard-phases
670 (add-after 'install 'wrap-executables
671 (lambda* (#:key inputs outputs #:allow-other-keys)
672 (let* ((out (assoc-ref outputs "out")))
673 (for-each
674 (lambda (script)
675 (wrap-program (string-append out "/bin/" script)
676 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
677 '("create_annotations_files.bash"
678 "create_metaplots.bash"
679 "Ribotaper_ORF_find.sh"
680 "Ribotaper.sh")))
681 #t)))))
682 (inputs
683 `(("bedtools" ,bedtools-2.18)
684 ("samtools" ,samtools-0.1)
685 ("r-minimal" ,r-minimal)
686 ("r-foreach" ,r-foreach)
687 ("r-xnomial" ,r-xnomial)
688 ("r-domc" ,r-domc)
689 ("r-multitaper" ,r-multitaper)
690 ("r-seqinr" ,r-seqinr)))
691 (home-page "https://ohlerlab.mdc-berlin.de/software/RiboTaper_126/")
692 (synopsis "Define translated ORFs using ribosome profiling data")
693 (description
694 "Ribotaper is a method for defining translated @dfn{open reading
695 frames} (ORFs) using ribosome profiling (ribo-seq) data. This package
696 provides the Ribotaper pipeline.")
697 (license license:gpl3+)))
698
699 (define-public ribodiff
700 (package
701 (name "ribodiff")
702 (version "0.2.2")
703 (source
704 (origin
705 (method git-fetch)
706 (uri (git-reference
707 (url "https://github.com/ratschlab/RiboDiff.git")
708 (commit (string-append "v" version))))
709 (file-name (git-file-name name version))
710 (sha256
711 (base32
712 "0x75nlp7qnmm64jasbi6l21f2cy99r2cjyl6b4hr8zf2bq22drnz"))))
713 (build-system python-build-system)
714 (arguments
715 `(#:python ,python-2
716 #:phases
717 (modify-phases %standard-phases
718 ;; Generate an installable executable script wrapper.
719 (add-after 'unpack 'patch-setup.py
720 (lambda _
721 (substitute* "setup.py"
722 (("^(.*)packages=.*" line prefix)
723 (string-append line "\n"
724 prefix "scripts=['scripts/TE.py'],\n")))
725 #t)))))
726 (inputs
727 `(("python-numpy" ,python2-numpy)
728 ("python-matplotlib" ,python2-matplotlib)
729 ("python-scipy" ,python2-scipy)
730 ("python-statsmodels" ,python2-statsmodels)))
731 (native-inputs
732 `(("python-mock" ,python2-mock)
733 ("python-nose" ,python2-nose)))
734 (home-page "https://public.bmi.inf.ethz.ch/user/zhongy/RiboDiff/")
735 (synopsis "Detect translation efficiency changes from ribosome footprints")
736 (description "RiboDiff is a statistical tool that detects the protein
737 translational efficiency change from Ribo-Seq (ribosome footprinting) and
738 RNA-Seq data. It uses a generalized linear model to detect genes showing
739 difference in translational profile taking mRNA abundance into account. It
740 facilitates us to decipher the translational regulation that behave
741 independently with transcriptional regulation.")
742 (license license:gpl3+)))
743
744 (define-public bioawk
745 (package
746 (name "bioawk")
747 (version "1.0")
748 (source (origin
749 (method git-fetch)
750 (uri (git-reference
751 (url "https://github.com/lh3/bioawk.git")
752 (commit (string-append "v" version))))
753 (file-name (git-file-name name version))
754 (sha256
755 (base32
756 "1pxc3zdnirxbf9a0az698hd8xdik7qkhypm7v6hn922x8y9qmspm"))))
757 (build-system gnu-build-system)
758 (inputs
759 `(("zlib" ,zlib)))
760 (native-inputs
761 `(("bison" ,bison)))
762 (arguments
763 `(#:tests? #f ; There are no tests to run.
764 ;; Bison must generate files, before other targets can build.
765 #:parallel-build? #f
766 #:phases
767 (modify-phases %standard-phases
768 (delete 'configure) ; There is no configure phase.
769 (replace 'install
770 (lambda* (#:key outputs #:allow-other-keys)
771 (let* ((out (assoc-ref outputs "out"))
772 (bin (string-append out "/bin"))
773 (man (string-append out "/share/man/man1")))
774 (mkdir-p man)
775 (copy-file "awk.1" (string-append man "/bioawk.1"))
776 (install-file "bioawk" bin))
777 #t)))))
778 (home-page "https://github.com/lh3/bioawk")
779 (synopsis "AWK with bioinformatics extensions")
780 (description "Bioawk is an extension to Brian Kernighan's awk, adding the
781 support of several common biological data formats, including optionally gzip'ed
782 BED, GFF, SAM, VCF, FASTA/Q and TAB-delimited formats with column names. It
783 also adds a few built-in functions and a command line option to use TAB as the
784 input/output delimiter. When the new functionality is not used, bioawk is
785 intended to behave exactly the same as the original BWK awk.")
786 (license license:x11)))
787
788 (define-public python-pybedtools
789 (package
790 (name "python-pybedtools")
791 (version "0.8.1")
792 (source (origin
793 (method url-fetch)
794 (uri (pypi-uri "pybedtools" version))
795 (sha256
796 (base32
797 "14w5i40gi25clrr7h4wa2pcpnyipya8hrqi7nq77553zc5wf0df0"))))
798 (build-system python-build-system)
799 (arguments
800 `(#:modules ((ice-9 ftw)
801 (srfi srfi-1)
802 (srfi srfi-26)
803 (guix build utils)
804 (guix build python-build-system))
805 ;; See https://github.com/daler/pybedtools/issues/192
806 #:phases
807 (modify-phases %standard-phases
808 ;; See https://github.com/daler/pybedtools/issues/261
809 (add-after 'unpack 'disable-broken-tests
810 (lambda _
811 ;; This test (pybedtools.test.test_scripts.test_venn_mpl) needs a
812 ;; graphical environment.
813 (substitute* "pybedtools/test/test_scripts.py"
814 (("def test_venn_mpl")
815 "def _do_not_test_venn_mpl"))
816 (substitute* "pybedtools/test/test_helpers.py"
817 ;; Requires internet access.
818 (("def test_chromsizes")
819 "def _do_not_test_chromsizes")
820 ;; Broken as a result of the workaround used in the check phase
821 ;; (see: https://github.com/daler/pybedtools/issues/192).
822 (("def test_getting_example_beds")
823 "def _do_not_test_getting_example_beds"))
824 ;; This issue still occurs on python2
825 (substitute* "pybedtools/test/test_issues.py"
826 (("def test_issue_303")
827 "def _test_issue_303"))
828 #t))
829 ;; TODO: Remove phase after it's part of PYTHON-BUILD-SYSTEM.
830 ;; build system.
831 ;; Force the Cythonization of C++ files to guard against compilation
832 ;; problems.
833 (add-after 'unpack 'remove-cython-generated-files
834 (lambda _
835 (let ((cython-sources (map (cut string-drop-right <> 4)
836 (find-files "." "\\.pyx$")))
837 (c/c++-files (find-files "." "\\.(c|cpp|cxx)$")))
838 (define (strip-extension filename)
839 (string-take filename (string-index-right filename #\.)))
840 (define (cythonized? c/c++-file)
841 (member (strip-extension c/c++-file) cython-sources))
842 (for-each delete-file (filter cythonized? c/c++-files))
843 #t)))
844 (add-after 'remove-cython-generated-files 'generate-cython-extensions
845 (lambda _
846 (invoke "python" "setup.py" "cythonize")))
847 (replace 'check
848 (lambda _
849 (let* ((cwd (getcwd))
850 (build-root-directory (string-append cwd "/build/"))
851 (build (string-append
852 build-root-directory
853 (find (cut string-prefix? "lib" <>)
854 (scandir (string-append
855 build-root-directory)))))
856 (scripts (string-append
857 build-root-directory
858 (find (cut string-prefix? "scripts" <>)
859 (scandir build-root-directory)))))
860 (setenv "PYTHONPATH"
861 (string-append build ":" (getenv "PYTHONPATH")))
862 ;; Executable scripts such as 'intron_exon_reads.py' must be
863 ;; available in the PATH.
864 (setenv "PATH"
865 (string-append scripts ":" (getenv "PATH"))))
866 ;; The tests need to be run from elsewhere...
867 (mkdir-p "/tmp/test")
868 (copy-recursively "pybedtools/test" "/tmp/test")
869 (with-directory-excursion "/tmp/test"
870 (invoke "pytest")))))))
871 (propagated-inputs
872 `(("bedtools" ,bedtools)
873 ("samtools" ,samtools)
874 ("python-matplotlib" ,python-matplotlib)
875 ("python-pysam" ,python-pysam)
876 ("python-pyyaml" ,python-pyyaml)))
877 (native-inputs
878 `(("python-numpy" ,python-numpy)
879 ("python-pandas" ,python-pandas)
880 ("python-cython" ,python-cython)
881 ("kentutils" ,kentutils) ; for bedGraphToBigWig
882 ("python-six" ,python-six)
883 ;; For the test suite.
884 ("python-pytest" ,python-pytest)
885 ("python-psutil" ,python-psutil)))
886 (home-page "https://pythonhosted.org/pybedtools/")
887 (synopsis "Python wrapper for BEDtools programs")
888 (description
889 "pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
890 which are widely used for genomic interval manipulation or \"genome algebra\".
891 pybedtools extends BEDTools by offering feature-level manipulations from with
892 Python.")
893 (license license:gpl2+)))
894
895 (define-public python2-pybedtools
896 (let ((pybedtools (package-with-python2 python-pybedtools)))
897 (package
898 (inherit pybedtools)
899 (native-inputs
900 `(("python2-pathlib" ,python2-pathlib)
901 ,@(package-native-inputs pybedtools))))))
902
903 (define-public python-biom-format
904 (package
905 (name "python-biom-format")
906 (version "2.1.7")
907 (source
908 (origin
909 (method git-fetch)
910 ;; Use GitHub as source because PyPI distribution does not contain
911 ;; test data: https://github.com/biocore/biom-format/issues/693
912 (uri (git-reference
913 (url "https://github.com/biocore/biom-format.git")
914 (commit version)))
915 (file-name (git-file-name name version))
916 (sha256
917 (base32
918 "1rna16lyk5aqhnv0dp77wwaplias93f1vw28ad3jmyw6hwkai05v"))
919 (modules '((guix build utils)))
920 (snippet '(begin
921 ;; Delete generated C files.
922 (for-each delete-file (find-files "." "\\.c"))
923 #t))))
924 (build-system python-build-system)
925 (arguments
926 `(#:phases
927 (modify-phases %standard-phases
928 (add-after 'unpack 'use-cython
929 (lambda _ (setenv "USE_CYTHON" "1") #t))
930 (add-after 'unpack 'disable-broken-tests
931 (lambda _
932 (substitute* "biom/tests/test_cli/test_validate_table.py"
933 (("^(.+)def test_invalid_hdf5" m indent)
934 (string-append indent
935 "@npt.dec.skipif(True, msg='Guix')\n"
936 m)))
937 (substitute* "biom/tests/test_table.py"
938 (("^(.+)def test_from_hdf5_issue_731" m indent)
939 (string-append indent
940 "@npt.dec.skipif(True, msg='Guix')\n"
941 m)))
942 #t))
943 (add-before 'reset-gzip-timestamps 'make-files-writable
944 (lambda* (#:key outputs #:allow-other-keys)
945 (let ((out (assoc-ref outputs "out")))
946 (for-each (lambda (file) (chmod file #o644))
947 (find-files out "\\.gz"))
948 #t))))))
949 (propagated-inputs
950 `(("python-numpy" ,python-numpy)
951 ("python-scipy" ,python-scipy)
952 ("python-flake8" ,python-flake8)
953 ("python-future" ,python-future)
954 ("python-click" ,python-click)
955 ("python-h5py" ,python-h5py)
956 ("python-pandas" ,python-pandas)))
957 (native-inputs
958 `(("python-cython" ,python-cython)
959 ("python-pytest" ,python-pytest)
960 ("python-pytest-cov" ,python-pytest-cov)
961 ("python-nose" ,python-nose)))
962 (home-page "http://www.biom-format.org")
963 (synopsis "Biological Observation Matrix (BIOM) format utilities")
964 (description
965 "The BIOM file format is designed to be a general-use format for
966 representing counts of observations e.g. operational taxonomic units, KEGG
967 orthology groups or lipid types, in one or more biological samples
968 e.g. microbiome samples, genomes, metagenomes.")
969 (license license:bsd-3)
970 (properties `((python2-variant . ,(delay python2-biom-format))))))
971
972 (define-public python2-biom-format
973 (let ((base (package-with-python2 (strip-python2-variant python-biom-format))))
974 (package
975 (inherit base)
976 (arguments
977 (substitute-keyword-arguments (package-arguments base)
978 ((#:phases phases)
979 `(modify-phases ,phases
980 ;; Do not require the unmaintained pyqi library.
981 (add-after 'unpack 'remove-pyqi
982 (lambda _
983 (substitute* "setup.py"
984 (("install_requires.append\\(\"pyqi\"\\)") "pass"))
985 #t)))))))))
986
987 (define-public bioperl-minimal
988 (let* ((inputs `(("perl-module-build" ,perl-module-build)
989 ("perl-data-stag" ,perl-data-stag)
990 ("perl-libwww" ,perl-libwww)
991 ("perl-uri" ,perl-uri)))
992 (transitive-inputs
993 (map (compose package-name cadr)
994 (delete-duplicates
995 (concatenate
996 (map (compose package-transitive-target-inputs cadr) inputs))))))
997 (package
998 (name "bioperl-minimal")
999 (version "1.7.0")
1000 (source
1001 (origin
1002 (method git-fetch)
1003 (uri (git-reference
1004 (url "https://github.com/bioperl/bioperl-live")
1005 (commit (string-append "release-"
1006 (string-map (lambda (c)
1007 (if (char=? c #\.)
1008 #\- c)) version)))))
1009 (file-name (git-file-name name version))
1010 (sha256
1011 (base32
1012 "0wl8yvzcls59pwwk6m8ahy87pwg6nnibzy5cldbvmcwg2x2w7783"))))
1013 (build-system perl-build-system)
1014 (arguments
1015 `(#:phases
1016 (modify-phases %standard-phases
1017 (add-after
1018 'install 'wrap-programs
1019 (lambda* (#:key outputs #:allow-other-keys)
1020 ;; Make sure all executables in "bin" find the required Perl
1021 ;; modules at runtime. As the PERL5LIB variable contains also
1022 ;; the paths of native inputs, we pick the transitive target
1023 ;; inputs from %build-inputs.
1024 (let* ((out (assoc-ref outputs "out"))
1025 (bin (string-append out "/bin/"))
1026 (path (string-join
1027 (cons (string-append out "/lib/perl5/site_perl")
1028 (map (lambda (name)
1029 (assoc-ref %build-inputs name))
1030 ',transitive-inputs))
1031 ":")))
1032 (for-each (lambda (file)
1033 (wrap-program file
1034 `("PERL5LIB" ":" prefix (,path))))
1035 (find-files bin "\\.pl$"))
1036 #t))))))
1037 (inputs inputs)
1038 (native-inputs
1039 `(("perl-test-most" ,perl-test-most)))
1040 (home-page "https://metacpan.org/release/BioPerl")
1041 (synopsis "Bioinformatics toolkit")
1042 (description
1043 "BioPerl is the product of a community effort to produce Perl code which
1044 is useful in biology. Examples include Sequence objects, Alignment objects
1045 and database searching objects. These objects not only do what they are
1046 advertised to do in the documentation, but they also interact - Alignment
1047 objects are made from the Sequence objects, Sequence objects have access to
1048 Annotation and SeqFeature objects and databases, Blast objects can be
1049 converted to Alignment objects, and so on. This means that the objects
1050 provide a coordinated and extensible framework to do computational biology.")
1051 (license license:perl-license))))
1052
1053 (define-public python-biopython
1054 (package
1055 (name "python-biopython")
1056 (version "1.70")
1057 (source (origin
1058 (method url-fetch)
1059 ;; use PyPi rather than biopython.org to ease updating
1060 (uri (pypi-uri "biopython" version))
1061 (sha256
1062 (base32
1063 "0nz4n9d2y2dg849gn1z0vjlkwcpzzkzy3fij7x94a6ixy2c54z2a"))))
1064 (build-system python-build-system)
1065 (arguments
1066 `(#:phases
1067 (modify-phases %standard-phases
1068 (add-before 'check 'set-home
1069 ;; Some tests require a home directory to be set.
1070 (lambda _ (setenv "HOME" "/tmp") #t)))))
1071 (propagated-inputs
1072 `(("python-numpy" ,python-numpy)))
1073 (home-page "https://biopython.org/")
1074 (synopsis "Tools for biological computation in Python")
1075 (description
1076 "Biopython is a set of tools for biological computation including parsers
1077 for bioinformatics files into Python data structures; interfaces to common
1078 bioinformatics programs; a standard sequence class and tools for performing
1079 common operations on them; code to perform data classification; code for
1080 dealing with alignments; code making it easy to split up parallelizable tasks
1081 into separate processes; and more.")
1082 (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
1083
1084 (define-public python2-biopython
1085 (package-with-python2 python-biopython))
1086
1087 (define-public python-fastalite
1088 (package
1089 (name "python-fastalite")
1090 (version "0.3")
1091 (source
1092 (origin
1093 (method url-fetch)
1094 (uri (pypi-uri "fastalite" version))
1095 (sha256
1096 (base32
1097 "1qli6pxp77i9xn2wfciq2zaxhl82bdxb33cpzqzj1z25yd036wqj"))))
1098 (build-system python-build-system)
1099 (arguments
1100 `(#:tests? #f)) ; Test data is not distributed.
1101 (home-page "https://github.com/nhoffman/fastalite")
1102 (synopsis "Simplest possible FASTA parser")
1103 (description "This library implements a FASTA and a FASTQ parser without
1104 relying on a complex dependency tree.")
1105 (license license:expat)))
1106
1107 (define-public python2-fastalite
1108 (package-with-python2 python-fastalite))
1109
1110 (define-public bpp-core
1111 ;; The last release was in 2014 and the recommended way to install from source
1112 ;; is to clone the git repository, so we do this.
1113 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1114 (let ((commit "7d8bced0d1a87291ea8dd7046b7fb5ff9c35c582"))
1115 (package
1116 (name "bpp-core")
1117 (version (string-append "2.2.0-1." (string-take commit 7)))
1118 (source (origin
1119 (method git-fetch)
1120 (uri (git-reference
1121 (url "http://biopp.univ-montp2.fr/git/bpp-core")
1122 (commit commit)))
1123 (file-name (string-append name "-" version "-checkout"))
1124 (sha256
1125 (base32
1126 "10djsq5vlnkilv436gnmh4irpk49v29pa69r6xiryg32xmvn909j"))))
1127 (build-system cmake-build-system)
1128 (arguments
1129 `(#:parallel-build? #f))
1130 (home-page "http://biopp.univ-montp2.fr")
1131 (synopsis "C++ libraries for Bioinformatics")
1132 (description
1133 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1134 analysis, phylogenetics, molecular evolution and population genetics. It is
1135 Object Oriented and is designed to be both easy to use and computer efficient.
1136 Bio++ intends to help programmers to write computer expensive programs, by
1137 providing them a set of re-usable tools.")
1138 (license license:cecill-c))))
1139
1140 (define-public bpp-phyl
1141 ;; The last release was in 2014 and the recommended way to install from source
1142 ;; is to clone the git repository, so we do this.
1143 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1144 (let ((commit "0c07167b629f68b569bf274d1ad0c4af83276ae2"))
1145 (package
1146 (name "bpp-phyl")
1147 (version (string-append "2.2.0-1." (string-take commit 7)))
1148 (source (origin
1149 (method git-fetch)
1150 (uri (git-reference
1151 (url "http://biopp.univ-montp2.fr/git/bpp-phyl")
1152 (commit commit)))
1153 (file-name (string-append name "-" version "-checkout"))
1154 (sha256
1155 (base32
1156 "1ssjgchzwj3iai26kyly7gwkdv8sk59nqhkb1wpap3sf5m6kyllh"))))
1157 (build-system cmake-build-system)
1158 (arguments
1159 `(#:parallel-build? #f
1160 ;; If out-of-source, test data is not copied into the build directory
1161 ;; so the tests fail.
1162 #:out-of-source? #f))
1163 (inputs
1164 `(("bpp-core" ,bpp-core)
1165 ("bpp-seq" ,bpp-seq)))
1166 (home-page "http://biopp.univ-montp2.fr")
1167 (synopsis "Bio++ phylogenetic Library")
1168 (description
1169 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1170 analysis, phylogenetics, molecular evolution and population genetics. This
1171 library provides phylogenetics-related modules.")
1172 (license license:cecill-c))))
1173
1174 (define-public bpp-popgen
1175 ;; The last release was in 2014 and the recommended way to install from source
1176 ;; is to clone the git repository, so we do this.
1177 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1178 (let ((commit "e472bac9b1a148803895d747cd6d0c5904f85d9f"))
1179 (package
1180 (name "bpp-popgen")
1181 (version (string-append "2.2.0-1." (string-take commit 7)))
1182 (source (origin
1183 (method git-fetch)
1184 (uri (git-reference
1185 (url "http://biopp.univ-montp2.fr/git/bpp-popgen")
1186 (commit commit)))
1187 (file-name (string-append name "-" version "-checkout"))
1188 (sha256
1189 (base32
1190 "0yn82dzn1n5629nzja68xfrhi655709rjanyryb36vzkmymy6dw5"))))
1191 (build-system cmake-build-system)
1192 (arguments
1193 `(#:parallel-build? #f
1194 #:tests? #f)) ; There are no tests.
1195 (inputs
1196 `(("bpp-core" ,bpp-core)
1197 ("bpp-seq" ,bpp-seq)))
1198 (home-page "http://biopp.univ-montp2.fr")
1199 (synopsis "Bio++ population genetics library")
1200 (description
1201 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1202 analysis, phylogenetics, molecular evolution and population genetics. This
1203 library provides population genetics-related modules.")
1204 (license license:cecill-c))))
1205
1206 (define-public bpp-seq
1207 ;; The last release was in 2014 and the recommended way to install from source
1208 ;; is to clone the git repository, so we do this.
1209 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1210 (let ((commit "6cfa07965ce152e5598a89df2fa80a75973bfa33"))
1211 (package
1212 (name "bpp-seq")
1213 (version (string-append "2.2.0-1." (string-take commit 7)))
1214 (source (origin
1215 (method git-fetch)
1216 (uri (git-reference
1217 (url "http://biopp.univ-montp2.fr/git/bpp-seq")
1218 (commit commit)))
1219 (file-name (string-append name "-" version "-checkout"))
1220 (sha256
1221 (base32
1222 "1nys5jq7jqvdg40d91wsmj3q2yzy4276cp7sp44n67p468f27zf2"))))
1223 (build-system cmake-build-system)
1224 (arguments
1225 `(#:parallel-build? #f
1226 ;; If out-of-source, test data is not copied into the build directory
1227 ;; so the tests fail.
1228 #:out-of-source? #f))
1229 (inputs
1230 `(("bpp-core" ,bpp-core)))
1231 (home-page "http://biopp.univ-montp2.fr")
1232 (synopsis "Bio++ sequence library")
1233 (description
1234 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1235 analysis, phylogenetics, molecular evolution and population genetics. This
1236 library provides sequence-related modules.")
1237 (license license:cecill-c))))
1238
1239 (define-public bppsuite
1240 ;; The last release was in 2014 and the recommended way to install from source
1241 ;; is to clone the git repository, so we do this.
1242 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1243 (let ((commit "c516147f57aa50961121cd505bed52cd7603698b"))
1244 (package
1245 (name "bppsuite")
1246 (version (string-append "2.2.0-1." (string-take commit 7)))
1247 (source (origin
1248 (method git-fetch)
1249 (uri (git-reference
1250 (url "http://biopp.univ-montp2.fr/git/bppsuite")
1251 (commit commit)))
1252 (file-name (string-append name "-" version "-checkout"))
1253 (sha256
1254 (base32
1255 "1y87pxvw0jxjizhq2dr9g2r91md45k1p9ih2sl1yy1y3p934l2kb"))))
1256 (build-system cmake-build-system)
1257 (arguments
1258 `(#:parallel-build? #f
1259 #:tests? #f)) ; There are no tests.
1260 (native-inputs
1261 `(("groff" ,groff)
1262 ("man-db" ,man-db)
1263 ("texinfo" ,texinfo)))
1264 (inputs
1265 `(("bpp-core" ,bpp-core)
1266 ("bpp-seq" ,bpp-seq)
1267 ("bpp-phyl" ,bpp-phyl)
1268 ("bpp-phyl" ,bpp-popgen)))
1269 (home-page "http://biopp.univ-montp2.fr")
1270 (synopsis "Bioinformatics tools written with the Bio++ libraries")
1271 (description
1272 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1273 analysis, phylogenetics, molecular evolution and population genetics. This
1274 package provides command line tools using the Bio++ library.")
1275 (license license:cecill-c))))
1276
1277 (define-public blast+
1278 (package
1279 (name "blast+")
1280 (version "2.7.1")
1281 (source (origin
1282 (method url-fetch)
1283 (uri (string-append
1284 "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
1285 version "/ncbi-blast-" version "+-src.tar.gz"))
1286 (sha256
1287 (base32
1288 "1jlq0afxxgczpp35k6mxh8mn4jzq7vqcnaixk166sfj10wq8v9qh"))
1289 (modules '((guix build utils)))
1290 (snippet
1291 '(begin
1292 ;; Remove bundled bzip2, zlib and pcre.
1293 (delete-file-recursively "c++/src/util/compress/bzip2")
1294 (delete-file-recursively "c++/src/util/compress/zlib")
1295 (delete-file-recursively "c++/src/util/regexp")
1296 (substitute* "c++/src/util/compress/Makefile.in"
1297 (("bzip2 zlib api") "api"))
1298 ;; Remove useless msbuild directory
1299 (delete-file-recursively
1300 "c++/src/build-system/project_tree_builder/msbuild")
1301 #t))))
1302 (build-system gnu-build-system)
1303 (arguments
1304 `(;; There are two(!) tests for this massive library, and both fail with
1305 ;; "unparsable timing stats".
1306 ;; ERR [127] -- [serial/datatool] datatool.sh (unparsable timing stats)
1307 ;; ERR [127] -- [serial/datatool] datatool_xml.sh (unparsable timing stats)
1308 #:tests? #f
1309 #:out-of-source? #t
1310 #:parallel-build? #f ; not supported
1311 #:phases
1312 (modify-phases %standard-phases
1313 (add-before 'configure 'set-HOME
1314 ;; $HOME needs to be set at some point during the configure phase
1315 (lambda _ (setenv "HOME" "/tmp") #t))
1316 (add-after 'unpack 'enter-dir
1317 (lambda _ (chdir "c++") #t))
1318 (add-after 'enter-dir 'fix-build-system
1319 (lambda _
1320 (define (which* cmd)
1321 (cond ((string=? cmd "date")
1322 ;; make call to "date" deterministic
1323 "date -d @0")
1324 ((which cmd)
1325 => identity)
1326 (else
1327 (format (current-error-port)
1328 "WARNING: Unable to find absolute path for ~s~%"
1329 cmd)
1330 #f)))
1331
1332 ;; Rewrite hardcoded paths to various tools
1333 (substitute* (append '("src/build-system/configure.ac"
1334 "src/build-system/configure"
1335 "src/build-system/helpers/run_with_lock.c"
1336 "scripts/common/impl/if_diff.sh"
1337 "scripts/common/impl/run_with_lock.sh"
1338 "src/build-system/Makefile.configurables.real"
1339 "src/build-system/Makefile.in.top"
1340 "src/build-system/Makefile.meta.gmake=no"
1341 "src/build-system/Makefile.meta.in"
1342 "src/build-system/Makefile.meta_l"
1343 "src/build-system/Makefile.meta_p"
1344 "src/build-system/Makefile.meta_r"
1345 "src/build-system/Makefile.mk.in"
1346 "src/build-system/Makefile.requirements"
1347 "src/build-system/Makefile.rules_with_autodep.in")
1348 (find-files "scripts/common/check" "\\.sh$"))
1349 (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
1350 (or (which* cmd) all)))
1351
1352 (substitute* (find-files "src/build-system" "^config.*")
1353 (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
1354 (("^PATH=.*") ""))
1355
1356 ;; rewrite "/var/tmp" in check script
1357 (substitute* "scripts/common/check/check_make_unix.sh"
1358 (("/var/tmp") "/tmp"))
1359
1360 ;; do not reset PATH
1361 (substitute* (find-files "scripts/common/impl/" "\\.sh$")
1362 (("^ *PATH=.*") "")
1363 (("action=/bin/") "action=")
1364 (("export PATH") ":"))
1365 #t))
1366 (replace 'configure
1367 (lambda* (#:key inputs outputs #:allow-other-keys)
1368 (let ((out (assoc-ref outputs "out"))
1369 (lib (string-append (assoc-ref outputs "lib") "/lib"))
1370 (include (string-append (assoc-ref outputs "include")
1371 "/include/ncbi-tools++")))
1372 ;; The 'configure' script doesn't recognize things like
1373 ;; '--enable-fast-install'.
1374 (invoke "./configure.orig"
1375 (string-append "--with-build-root=" (getcwd) "/build")
1376 (string-append "--prefix=" out)
1377 (string-append "--libdir=" lib)
1378 (string-append "--includedir=" include)
1379 (string-append "--with-bz2="
1380 (assoc-ref inputs "bzip2"))
1381 (string-append "--with-z="
1382 (assoc-ref inputs "zlib"))
1383 (string-append "--with-pcre="
1384 (assoc-ref inputs "pcre"))
1385 ;; Each library is built twice by default, once
1386 ;; with "-static" in its name, and again
1387 ;; without.
1388 "--without-static"
1389 "--with-dll")
1390 #t))))))
1391 (outputs '("out" ; 21 MB
1392 "lib" ; 226 MB
1393 "include")) ; 33 MB
1394 (inputs
1395 `(("bzip2" ,bzip2)
1396 ("lmdb" ,lmdb)
1397 ("zlib" ,zlib)
1398 ("pcre" ,pcre)
1399 ("perl" ,perl)
1400 ("python" ,python-wrapper)))
1401 (native-inputs
1402 `(("cpio" ,cpio)))
1403 (home-page "http://blast.ncbi.nlm.nih.gov")
1404 (synopsis "Basic local alignment search tool")
1405 (description
1406 "BLAST is a popular method of performing a DNA or protein sequence
1407 similarity search, using heuristics to produce results quickly. It also
1408 calculates an “expect value” that estimates how many matches would have
1409 occurred at a given score by chance, which can aid a user in judging how much
1410 confidence to have in an alignment.")
1411 ;; Most of the sources are in the public domain, with the following
1412 ;; exceptions:
1413 ;; * Expat:
1414 ;; * ./c++/include/util/bitset/
1415 ;; * ./c++/src/html/ncbi_menu*.js
1416 ;; * Boost license:
1417 ;; * ./c++/include/util/impl/floating_point_comparison.hpp
1418 ;; * LGPL 2+:
1419 ;; * ./c++/include/dbapi/driver/odbc/unix_odbc/
1420 ;; * ASL 2.0:
1421 ;; * ./c++/src/corelib/teamcity_*
1422 (license (list license:public-domain
1423 license:expat
1424 license:boost1.0
1425 license:lgpl2.0+
1426 license:asl2.0))))
1427
1428 (define-public bless
1429 (package
1430 (name "bless")
1431 (version "1p02")
1432 (source (origin
1433 (method url-fetch)
1434 (uri (string-append "mirror://sourceforge/bless-ec/bless.v"
1435 version ".tgz"))
1436 (sha256
1437 (base32
1438 "0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
1439 (modules '((guix build utils)))
1440 (snippet
1441 `(begin
1442 ;; Remove bundled boost, pigz, zlib, and .git directory
1443 ;; FIXME: also remove bundled sources for murmurhash3 and
1444 ;; kmc once packaged.
1445 (delete-file-recursively "boost")
1446 (delete-file-recursively "pigz")
1447 (delete-file-recursively "google-sparsehash")
1448 (delete-file-recursively "zlib")
1449 (delete-file-recursively ".git")
1450 #t))))
1451 (build-system gnu-build-system)
1452 (arguments
1453 '(#:tests? #f ;no "check" target
1454 #:make-flags
1455 (list (string-append "ZLIB="
1456 (assoc-ref %build-inputs "zlib:static")
1457 "/lib/libz.a")
1458 (string-append "LDFLAGS="
1459 (string-join '("-lboost_filesystem"
1460 "-lboost_system"
1461 "-lboost_iostreams"
1462 "-lz"
1463 "-fopenmp"))))
1464 #:phases
1465 (modify-phases %standard-phases
1466 (add-after 'unpack 'do-not-build-bundled-pigz
1467 (lambda* (#:key inputs outputs #:allow-other-keys)
1468 (substitute* "Makefile"
1469 (("cd pigz/pigz-2.3.3; make") ""))
1470 #t))
1471 (add-after 'unpack 'patch-paths-to-executables
1472 (lambda* (#:key inputs outputs #:allow-other-keys)
1473 (substitute* "parse_args.cpp"
1474 (("kmc_binary = .*")
1475 (string-append "kmc_binary = \""
1476 (assoc-ref outputs "out")
1477 "/bin/kmc\";"))
1478 (("pigz_binary = .*")
1479 (string-append "pigz_binary = \""
1480 (assoc-ref inputs "pigz")
1481 "/bin/pigz\";")))
1482 #t))
1483 (replace 'install
1484 (lambda* (#:key outputs #:allow-other-keys)
1485 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
1486 (for-each (lambda (file)
1487 (install-file file bin))
1488 '("bless" "kmc/bin/kmc"))
1489 #t)))
1490 (delete 'configure))))
1491 (native-inputs
1492 `(("perl" ,perl)))
1493 (inputs
1494 `(("openmpi" ,openmpi)
1495 ("boost" ,boost)
1496 ("sparsehash" ,sparsehash)
1497 ("pigz" ,pigz)
1498 ("zlib:static" ,zlib "static")
1499 ("zlib" ,zlib)))
1500 (supported-systems '("x86_64-linux"))
1501 (home-page "https://sourceforge.net/p/bless-ec/wiki/Home/")
1502 (synopsis "Bloom-filter-based error correction tool for NGS reads")
1503 (description
1504 "@dfn{Bloom-filter-based error correction solution for high-throughput
1505 sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
1506 correction tool for genomic reads produced by @dfn{Next-generation
1507 sequencing} (NGS). BLESS produces accurate correction results with much less
1508 memory compared with previous solutions and is also able to tolerate a higher
1509 false-positive rate. BLESS can extend reads like DNA assemblers to correct
1510 errors at the end of reads.")
1511 (license license:gpl3+)))
1512
1513 (define-public bowtie
1514 (package
1515 (name "bowtie")
1516 (version "2.3.4.3")
1517 (source (origin
1518 (method git-fetch)
1519 (uri (git-reference
1520 (url "https://github.com/BenLangmead/bowtie2.git")
1521 (commit (string-append "v" version))))
1522 (file-name (git-file-name name version))
1523 (sha256
1524 (base32
1525 "1zl3cf327y2p7p03cavymbh7b00djc7lncfaqih33n96iy9q8ibp"))
1526 (modules '((guix build utils)))
1527 (snippet
1528 '(begin
1529 (substitute* "Makefile"
1530 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
1531 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
1532 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
1533 #t))))
1534 (build-system gnu-build-system)
1535 (arguments
1536 '(#:make-flags
1537 (list "allall"
1538 "WITH_TBB=1"
1539 (string-append "prefix=" (assoc-ref %outputs "out")))
1540 #:phases
1541 (modify-phases %standard-phases
1542 (delete 'configure)
1543 (replace 'check
1544 (lambda _
1545 (invoke "perl"
1546 "scripts/test/simple_tests.pl"
1547 "--bowtie2=./bowtie2"
1548 "--bowtie2-build=./bowtie2-build")
1549 #t)))))
1550 (inputs
1551 `(("tbb" ,tbb)
1552 ("zlib" ,zlib)
1553 ("python" ,python-wrapper)))
1554 (native-inputs
1555 `(("perl" ,perl)
1556 ("perl-clone" ,perl-clone)
1557 ("perl-test-deep" ,perl-test-deep)
1558 ("perl-test-simple" ,perl-test-simple)))
1559 (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
1560 (synopsis "Fast and sensitive nucleotide sequence read aligner")
1561 (description
1562 "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
1563 reads to long reference sequences. It is particularly good at aligning reads
1564 of about 50 up to 100s or 1,000s of characters, and particularly good at
1565 aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
1566 genome with an FM Index to keep its memory footprint small: for the human
1567 genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
1568 gapped, local, and paired-end alignment modes.")
1569 (supported-systems '("x86_64-linux"))
1570 (license license:gpl3+)))
1571
1572 (define-public bowtie1
1573 (package
1574 (name "bowtie1")
1575 (version "1.2.3")
1576 (source (origin
1577 (method url-fetch)
1578 (uri (string-append "mirror://sourceforge/bowtie-bio/bowtie/"
1579 version "/bowtie-src-x86_64.zip"))
1580 (sha256
1581 (base32
1582 "0vmiqdhc9dzyfy9sh6vgi7k9xy2hiw8g87vbamnc6cgpm179zsa4"))
1583 (modules '((guix build utils)))
1584 (snippet
1585 '(substitute* "Makefile"
1586 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
1587 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
1588 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))))
1589 (build-system gnu-build-system)
1590 (arguments
1591 '(#:tests? #f ; no "check" target
1592 #:make-flags
1593 (list "all"
1594 (string-append "prefix=" (assoc-ref %outputs "out")))
1595 #:phases
1596 (modify-phases %standard-phases
1597 (delete 'configure))))
1598 (inputs
1599 `(("tbb" ,tbb)
1600 ("zlib" ,zlib)))
1601 (supported-systems '("x86_64-linux"))
1602 (home-page "http://bowtie-bio.sourceforge.net/index.shtml")
1603 (synopsis "Fast aligner for short nucleotide sequence reads")
1604 (description
1605 "Bowtie is a fast, memory-efficient short read aligner. It aligns short
1606 DNA sequences (reads) to the human genome at a rate of over 25 million 35-bp
1607 reads per hour. Bowtie indexes the genome with a Burrows-Wheeler index to
1608 keep its memory footprint small: typically about 2.2 GB for the human
1609 genome (2.9 GB for paired-end).")
1610 (license license:artistic2.0)))
1611
1612 (define-public tophat
1613 (package
1614 (name "tophat")
1615 (version "2.1.1")
1616 (source (origin
1617 (method url-fetch)
1618 (uri (string-append
1619 "http://ccb.jhu.edu/software/tophat/downloads/tophat-"
1620 version ".tar.gz"))
1621 (sha256
1622 (base32
1623 "19add02kv2xhd6ihd779dr7x35ggym3jqr0m5c4315i1yfb0p11p"))
1624 (modules '((guix build utils)))
1625 (snippet
1626 '(begin
1627 ;; Remove bundled SeqAn and samtools
1628 (delete-file-recursively "src/SeqAn-1.4.2")
1629 (delete-file-recursively "src/samtools-0.1.18")
1630 #t))))
1631 (build-system gnu-build-system)
1632 (arguments
1633 '(#:parallel-build? #f ; not supported
1634 #:phases
1635 (modify-phases %standard-phases
1636 (add-after 'unpack 'use-system-samtools
1637 (lambda* (#:key inputs #:allow-other-keys)
1638 (substitute* "src/Makefile.in"
1639 (("(noinst_LIBRARIES = )\\$\\(SAMLIB\\)" _ prefix) prefix)
1640 (("\\$\\(SAMPROG\\): \\$\\(SAMLIB\\)") "")
1641 (("SAMPROG = samtools_0\\.1\\.18") "")
1642 (("\\$\\(samtools_0_1_18_SOURCES\\)") "")
1643 (("am__EXEEXT_1 = samtools_0\\.1\\.18\\$\\(EXEEXT\\)") ""))
1644 (substitute* '("src/common.cpp"
1645 "src/tophat.py")
1646 (("samtools_0.1.18") (which "samtools")))
1647 (substitute* '("src/common.h"
1648 "src/bam2fastx.cpp")
1649 (("#include \"bam.h\"") "#include <samtools/bam.h>")
1650 (("#include \"sam.h\"") "#include <samtools/sam.h>"))
1651 (substitute* '("src/bwt_map.h"
1652 "src/map2gtf.h"
1653 "src/align_status.h")
1654 (("#include <bam.h>") "#include <samtools/bam.h>")
1655 (("#include <sam.h>") "#include <samtools/sam.h>"))
1656 #t)))))
1657 (native-inputs
1658 `(("gcc" ,gcc-5))) ;; doesn't build with later versions
1659 (inputs
1660 `(("boost" ,boost)
1661 ("bowtie" ,bowtie)
1662 ("ncurses" ,ncurses)
1663 ("perl" ,perl)
1664 ("python" ,python-2)
1665 ("samtools" ,samtools-0.1)
1666 ("seqan" ,seqan-1)
1667 ("zlib" ,zlib)))
1668 (home-page "https://ccb.jhu.edu/software/tophat/index.shtml")
1669 (synopsis "Spliced read mapper for RNA-Seq data")
1670 (description
1671 "TopHat is a fast splice junction mapper for nucleotide sequence
1672 reads produced by the RNA-Seq method. It aligns RNA-Seq reads to
1673 mammalian-sized genomes using the ultra high-throughput short read
1674 aligner Bowtie, and then analyzes the mapping results to identify
1675 splice junctions between exons.")
1676 ;; TopHat is released under the Boost Software License, Version 1.0
1677 ;; See https://github.com/infphilo/tophat/issues/11#issuecomment-121589893
1678 (license license:boost1.0)))
1679
1680 (define-public bwa
1681 (package
1682 (name "bwa")
1683 (version "0.7.17")
1684 (source (origin
1685 (method url-fetch)
1686 (uri (string-append
1687 "https://github.com/lh3/bwa/releases/download/v"
1688 version "/bwa-" version ".tar.bz2"))
1689 (sha256
1690 (base32
1691 "1zfhv2zg9v1icdlq4p9ssc8k01mca5d1bd87w71py2swfi74s6yy"))))
1692 (build-system gnu-build-system)
1693 (arguments
1694 '(#:tests? #f ;no "check" target
1695 #:phases
1696 (modify-phases %standard-phases
1697 (replace 'install
1698 (lambda* (#:key outputs #:allow-other-keys)
1699 (let* ((out (assoc-ref outputs "out"))
1700 (bin (string-append out "/bin"))
1701 (lib (string-append out "/lib"))
1702 (doc (string-append out "/share/doc/bwa"))
1703 (man (string-append out "/share/man/man1")))
1704 (install-file "bwa" bin)
1705 (install-file "libbwa.a" lib)
1706 (install-file "README.md" doc)
1707 (install-file "bwa.1" man))
1708 #t))
1709 ;; no "configure" script
1710 (delete 'configure))))
1711 (inputs `(("zlib" ,zlib)))
1712 ;; Non-portable SSE instructions are used so building fails on platforms
1713 ;; other than x86_64.
1714 (supported-systems '("x86_64-linux"))
1715 (home-page "http://bio-bwa.sourceforge.net/")
1716 (synopsis "Burrows-Wheeler sequence aligner")
1717 (description
1718 "BWA is a software package for mapping low-divergent sequences against a
1719 large reference genome, such as the human genome. It consists of three
1720 algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
1721 designed for Illumina sequence reads up to 100bp, while the rest two for
1722 longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
1723 features such as long-read support and split alignment, but BWA-MEM, which is
1724 the latest, is generally recommended for high-quality queries as it is faster
1725 and more accurate. BWA-MEM also has better performance than BWA-backtrack for
1726 70-100bp Illumina reads.")
1727 (license license:gpl3+)))
1728
1729 (define-public bwa-pssm
1730 (package (inherit bwa)
1731 (name "bwa-pssm")
1732 (version "0.5.11")
1733 (source (origin
1734 (method git-fetch)
1735 (uri (git-reference
1736 (url "https://github.com/pkerpedjiev/bwa-pssm.git")
1737 (commit version)))
1738 (file-name (git-file-name name version))
1739 (sha256
1740 (base32
1741 "076c4q0cdqz8jgylb067y9zmvxglppnzi3qiscn0xiypgc6lgb5r"))))
1742 (build-system gnu-build-system)
1743 (inputs
1744 `(("gdsl" ,gdsl)
1745 ("zlib" ,zlib)
1746 ("perl" ,perl)))
1747 (home-page "http://bwa-pssm.binf.ku.dk/")
1748 (synopsis "Burrows-Wheeler transform-based probabilistic short read mapper")
1749 (description
1750 "BWA-PSSM is a probabilistic short genomic sequence read aligner based on
1751 the use of @dfn{position specific scoring matrices} (PSSM). Like many of the
1752 existing aligners it is fast and sensitive. Unlike most other aligners,
1753 however, it is also adaptible in the sense that one can direct the alignment
1754 based on known biases within the data set. It is coded as a modification of
1755 the original BWA alignment program and shares the genome index structure as
1756 well as many of the command line options.")
1757 (license license:gpl3+)))
1758
1759 (define-public bwa-meth
1760 (package
1761 (name "bwa-meth")
1762 (version "0.2.2")
1763 (source (origin
1764 (method git-fetch)
1765 (uri (git-reference
1766 (url "https://github.com/brentp/bwa-meth.git")
1767 (commit (string-append "v" version))))
1768 (file-name (git-file-name name version))
1769 (sha256
1770 (base32
1771 "17j31i7zws5j7mhsq9x3qgkxly6mlmrgwhfq0qbflgxrmx04yaiz"))))
1772 (build-system python-build-system)
1773 (arguments
1774 `(#:phases
1775 (modify-phases %standard-phases
1776 (add-after 'unpack 'keep-references-to-bwa
1777 (lambda* (#:key inputs #:allow-other-keys)
1778 (substitute* "bwameth.py"
1779 (("bwa (mem|index)" _ command)
1780 (string-append (which "bwa") " " command))
1781 ;; There's an ill-advised check for "samtools" on PATH.
1782 (("^checkX.*") ""))
1783 #t)))))
1784 (inputs
1785 `(("bwa" ,bwa)))
1786 (native-inputs
1787 `(("python-toolshed" ,python-toolshed)))
1788 (home-page "https://github.com/brentp/bwa-meth")
1789 (synopsis "Fast and accurante alignment of BS-Seq reads")
1790 (description
1791 "BWA-Meth works for single-end reads and for paired-end reads from the
1792 directional protocol (most common). It uses the method employed by
1793 methylcoder and Bismark of in silico conversion of all C's to T's in both
1794 reference and reads. It recovers the original read (needed to tabulate
1795 methylation) by attaching it as a comment which BWA appends as a tag to the
1796 read. It performs favorably to existing aligners gauged by number of on and
1797 off-target reads for a capture method that targets CpG-rich region.")
1798 (license license:expat)))
1799
1800 (define-public python-bx-python
1801 (package
1802 (name "python-bx-python")
1803 (version "0.8.2")
1804 (source (origin
1805 (method url-fetch)
1806 (uri (pypi-uri "bx-python" version))
1807 (sha256
1808 (base32
1809 "11kksg2rbzihpmcid823xvg42xi88m7sz58rzk29abybkxy0rszs"))))
1810 (build-system python-build-system)
1811 ;; Tests fail because test data are not included
1812 (arguments '(#:tests? #f))
1813 (propagated-inputs
1814 `(("python-numpy" ,python-numpy)
1815 ("python-six" ,python-six)))
1816 (inputs
1817 `(("zlib" ,zlib)))
1818 (native-inputs
1819 `(("python-lzo" ,python-lzo)
1820 ("python-nose" ,python-nose)
1821 ("python-cython" ,python-cython)))
1822 (home-page "https://github.com/bxlab/bx-python")
1823 (synopsis "Tools for manipulating biological data")
1824 (description
1825 "bx-python provides tools for manipulating biological data, particularly
1826 multiple sequence alignments.")
1827 (license license:expat)))
1828
1829 (define-public python2-bx-python
1830 (package-with-python2 python-bx-python))
1831
1832 (define-public python-pysam
1833 (package
1834 (name "python-pysam")
1835 (version "0.15.1")
1836 (source (origin
1837 (method git-fetch)
1838 ;; Test data is missing on PyPi.
1839 (uri (git-reference
1840 (url "https://github.com/pysam-developers/pysam.git")
1841 (commit (string-append "v" version))))
1842 (file-name (git-file-name name version))
1843 (sha256
1844 (base32
1845 "1vj367w6xbn9bpmksm162l1aipf7cj97h1q83y7jcpm33ihwpf7x"))
1846 (modules '((guix build utils)))
1847 (snippet '(begin
1848 ;; Drop bundled htslib. TODO: Also remove samtools
1849 ;; and bcftools.
1850 (delete-file-recursively "htslib")
1851 #t))))
1852 (build-system python-build-system)
1853 (arguments
1854 `(#:modules ((ice-9 ftw)
1855 (srfi srfi-26)
1856 (guix build python-build-system)
1857 (guix build utils))
1858 #:phases
1859 (modify-phases %standard-phases
1860 (add-before 'build 'set-flags
1861 (lambda* (#:key inputs #:allow-other-keys)
1862 (setenv "HTSLIB_MODE" "external")
1863 (setenv "HTSLIB_LIBRARY_DIR"
1864 (string-append (assoc-ref inputs "htslib") "/lib"))
1865 (setenv "HTSLIB_INCLUDE_DIR"
1866 (string-append (assoc-ref inputs "htslib") "/include"))
1867 (setenv "LDFLAGS" "-lncurses")
1868 (setenv "CFLAGS" "-D_CURSES_LIB=1")
1869 #t))
1870 (replace 'check
1871 (lambda* (#:key inputs outputs #:allow-other-keys)
1872 ;; This file contains tests that require a connection to the
1873 ;; internet.
1874 (delete-file "tests/tabix_test.py")
1875 ;; FIXME: This test fails
1876 (delete-file "tests/AlignmentFile_test.py")
1877 ;; Add first subdirectory of "build" directory to PYTHONPATH.
1878 (setenv "PYTHONPATH"
1879 (string-append
1880 (getenv "PYTHONPATH")
1881 ":" (getcwd) "/build/"
1882 (car (scandir "build"
1883 (negate (cut string-prefix? "." <>))))))
1884 ;; Step out of source dir so python does not import from CWD.
1885 (with-directory-excursion "tests"
1886 (setenv "HOME" "/tmp")
1887 (invoke "make" "-C" "pysam_data")
1888 (invoke "make" "-C" "cbcf_data")
1889 ;; Running nosetests without explicitly asking for a single
1890 ;; process leads to a crash. Running with multiple processes
1891 ;; fails because the tests are not designed to run in parallel.
1892
1893 ;; FIXME: tests keep timing out on some systems.
1894 (invoke "nosetests" "-v" "--processes" "1")))))))
1895 (propagated-inputs
1896 `(("htslib" ,htslib))) ; Included from installed header files.
1897 (inputs
1898 `(("ncurses" ,ncurses)
1899 ("curl" ,curl)
1900 ("zlib" ,zlib)))
1901 (native-inputs
1902 `(("python-cython" ,python-cython)
1903 ;; Dependencies below are are for tests only.
1904 ("samtools" ,samtools)
1905 ("bcftools" ,bcftools)
1906 ("python-nose" ,python-nose)))
1907 (home-page "https://github.com/pysam-developers/pysam")
1908 (synopsis "Python bindings to the SAMtools C API")
1909 (description
1910 "Pysam is a Python module for reading and manipulating files in the
1911 SAM/BAM format. Pysam is a lightweight wrapper of the SAMtools C API. It
1912 also includes an interface for tabix.")
1913 (license license:expat)))
1914
1915 (define-public python2-pysam
1916 (package-with-python2 python-pysam))
1917
1918 (define-public python-twobitreader
1919 (package
1920 (name "python-twobitreader")
1921 (version "3.1.6")
1922 (source (origin
1923 (method git-fetch)
1924 (uri (git-reference
1925 (url "https://github.com/benjschiller/twobitreader")
1926 (commit version)))
1927 (file-name (git-file-name name version))
1928 (sha256
1929 (base32
1930 "1qbxvv1h58cismbk1anpjrkpghsaiy64a11ir3lhy6qch6xf8n62"))))
1931 (build-system python-build-system)
1932 ;; Tests are not included
1933 (arguments '(#:tests? #f))
1934 (native-inputs
1935 `(("python-sphinx" ,python-sphinx)))
1936 (home-page "https://github.com/benjschiller/twobitreader")
1937 (synopsis "Python library for reading .2bit files")
1938 (description
1939 "twobitreader is a Python library for reading .2bit files as used by the
1940 UCSC genome browser.")
1941 (license license:artistic2.0)))
1942
1943 (define-public python2-twobitreader
1944 (package-with-python2 python-twobitreader))
1945
1946 (define-public python-plastid
1947 (package
1948 (name "python-plastid")
1949 (version "0.4.8")
1950 (source (origin
1951 (method url-fetch)
1952 (uri (pypi-uri "plastid" version))
1953 (sha256
1954 (base32
1955 "0l24dd3q66if8yj042m4s0g95n6acn7im1imqd3p6h8ns43kxhj8"))))
1956 (build-system python-build-system)
1957 (arguments
1958 ;; Some test files are not included.
1959 `(#:tests? #f))
1960 (propagated-inputs
1961 `(("python-numpy" ,python-numpy)
1962 ("python-scipy" ,python-scipy)
1963 ("python-pandas" ,python-pandas)
1964 ("python-pysam" ,python-pysam)
1965 ("python-matplotlib" ,python-matplotlib)
1966 ("python-biopython" ,python-biopython)
1967 ("python-twobitreader" ,python-twobitreader)
1968 ("python-termcolor" ,python-termcolor)))
1969 (native-inputs
1970 `(("python-cython" ,python-cython)
1971 ("python-nose" ,python-nose)))
1972 (home-page "https://github.com/joshuagryphon/plastid")
1973 (synopsis "Python library for genomic analysis")
1974 (description
1975 "plastid is a Python library for genomic analysis – in particular,
1976 high-throughput sequencing data – with an emphasis on simplicity.")
1977 (license license:bsd-3)))
1978
1979 (define-public python2-plastid
1980 (package-with-python2 python-plastid))
1981
1982 (define-public tetoolkit
1983 (package
1984 (name "tetoolkit")
1985 (version "2.0.3")
1986 (source (origin
1987 (method git-fetch)
1988 (uri (git-reference
1989 (url "https://github.com/mhammell-laboratory/tetoolkit.git")
1990 (commit version)))
1991 (file-name (git-file-name name version))
1992 (sha256
1993 (base32
1994 "1yzi0kfpzip8zpjb82x1ik6h22yzfyjiz2dv85v6as2awwqvk807"))))
1995 (build-system python-build-system)
1996 (arguments
1997 `(#:python ,python-2 ; not guaranteed to work with Python 3
1998 #:phases
1999 (modify-phases %standard-phases
2000 (add-after 'unpack 'make-writable
2001 (lambda _
2002 (for-each make-file-writable (find-files "."))
2003 #t))
2004 (add-after 'unpack 'patch-invocations
2005 (lambda* (#:key inputs #:allow-other-keys)
2006 (substitute* '("bin/TEtranscripts"
2007 "bin/TEcount")
2008 (("'sort ")
2009 (string-append "'" (which "sort") " "))
2010 (("'rm -f ")
2011 (string-append "'" (which "rm") " -f "))
2012 (("'Rscript'") (string-append "'" (which "Rscript") "'")))
2013 (substitute* "TEToolkit/IO/ReadInputs.py"
2014 (("BamToBED") (which "bamToBed")))
2015 (substitute* "TEToolkit/Normalization.py"
2016 (("\"Rscript\"")
2017 (string-append "\"" (which "Rscript") "\"")))
2018 #t))
2019 (add-after 'install 'wrap-program
2020 (lambda* (#:key outputs #:allow-other-keys)
2021 ;; Make sure the executables find R packages.
2022 (let ((out (assoc-ref outputs "out")))
2023 (for-each
2024 (lambda (script)
2025 (wrap-program (string-append out "/bin/" script)
2026 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
2027 '("TEtranscripts"
2028 "TEcount")))
2029 #t)))))
2030 (inputs
2031 `(("coreutils" ,coreutils)
2032 ("bedtools" ,bedtools)
2033 ("python-argparse" ,python2-argparse)
2034 ("python-pysam" ,python2-pysam)
2035 ("r-minimal" ,r-minimal)
2036 ("r-deseq2" ,r-deseq2)))
2037 (home-page "https://github.com/mhammell-laboratory/tetoolkit")
2038 (synopsis "Transposable elements in differential enrichment analysis")
2039 (description
2040 "This is package for including transposable elements in differential
2041 enrichment analysis of sequencing datasets. TEtranscripts and TEcount take
2042 RNA-seq (and similar data) and annotates reads to both genes and transposable
2043 elements. TEtranscripts then performs differential analysis using DESeq2.
2044 Note that TEtranscripts and TEcount rely on specially curated GTF files, which
2045 are not included due to their size.")
2046 (license license:gpl3+)))
2047
2048 (define-public cd-hit
2049 (package
2050 (name "cd-hit")
2051 (version "4.6.8")
2052 (source (origin
2053 (method url-fetch)
2054 (uri (string-append "https://github.com/weizhongli/cdhit"
2055 "/releases/download/V" version
2056 "/cd-hit-v" version
2057 "-2017-0621-source.tar.gz"))
2058 (sha256
2059 (base32
2060 "1b4mwm2520ixjbw57sil20f9iixzw4bkdqqwgg1fc3pzm6rz4zmn"))))
2061 (build-system gnu-build-system)
2062 (arguments
2063 `(#:tests? #f ; there are no tests
2064 #:make-flags
2065 ;; Executables are copied directly to the PREFIX.
2066 (list (string-append "PREFIX=" (assoc-ref %outputs "out") "/bin")
2067 ;; Support longer sequences (e.g. Pacbio sequences)
2068 "MAX_SEQ=60000000")
2069 #:phases
2070 (modify-phases %standard-phases
2071 ;; No "configure" script
2072 (delete 'configure)
2073 ;; Remove sources of non-determinism
2074 (add-after 'unpack 'be-timeless
2075 (lambda _
2076 (substitute* "cdhit-utility.c++"
2077 ((" \\(built on \" __DATE__ \"\\)") ""))
2078 (substitute* "cdhit-common.c++"
2079 (("__DATE__") "\"0\"")
2080 (("\", %s, \" __TIME__ \"\\\\n\", date") ""))
2081 #t))
2082 ;; The "install" target does not create the target directory.
2083 (add-before 'install 'create-target-dir
2084 (lambda* (#:key outputs #:allow-other-keys)
2085 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
2086 #t)))))
2087 (inputs
2088 `(("perl" ,perl)))
2089 (home-page "http://weizhongli-lab.org/cd-hit/")
2090 (synopsis "Cluster and compare protein or nucleotide sequences")
2091 (description
2092 "CD-HIT is a program for clustering and comparing protein or nucleotide
2093 sequences. CD-HIT is designed to be fast and handle extremely large
2094 databases.")
2095 ;; The manual says: "It can be copied under the GNU General Public License
2096 ;; version 2 (GPLv2)."
2097 (license license:gpl2)))
2098
2099 (define-public clipper
2100 (package
2101 (name "clipper")
2102 (version "1.2.1")
2103 (source (origin
2104 (method git-fetch)
2105 (uri (git-reference
2106 (url "https://github.com/YeoLab/clipper.git")
2107 (commit version)))
2108 (file-name (git-file-name name version))
2109 (sha256
2110 (base32
2111 "0fja1rj84wp9vpj8rxpj3n8zqzcqq454m904yp9as1w4phccirjb"))
2112 (modules '((guix build utils)))
2113 (snippet
2114 '(begin
2115 ;; remove unnecessary setup dependency
2116 (substitute* "setup.py"
2117 (("setup_requires = .*") ""))
2118 #t))))
2119 (build-system python-build-system)
2120 (arguments
2121 `(#:python ,python-2 ; only Python 2 is supported
2122 #:phases
2123 (modify-phases %standard-phases
2124 ;; This is fixed in upstream commit
2125 ;; f6c2990198f906bf97730d95695b4bd5a6d01ddb.
2126 (add-after 'unpack 'fix-typo
2127 (lambda _
2128 (substitute* "clipper/src/readsToWiggle.pyx"
2129 (("^sc.*") ""))
2130 #t)))))
2131 (inputs
2132 `(("htseq" ,python2-htseq)
2133 ("python-pybedtools" ,python2-pybedtools)
2134 ("python-cython" ,python2-cython)
2135 ("python-scikit-learn" ,python2-scikit-learn)
2136 ("python-matplotlib" ,python2-matplotlib)
2137 ("python-pandas" ,python2-pandas)
2138 ("python-pysam" ,python2-pysam)
2139 ("python-numpy" ,python2-numpy)
2140 ("python-scipy" ,python2-scipy)))
2141 (native-inputs
2142 `(("python-mock" ,python2-mock) ; for tests
2143 ("python-nose" ,python2-nose) ; for tests
2144 ("python-pytz" ,python2-pytz))) ; for tests
2145 (home-page "https://github.com/YeoLab/clipper")
2146 (synopsis "CLIP peak enrichment recognition")
2147 (description
2148 "CLIPper is a tool to define peaks in CLIP-seq datasets.")
2149 (license license:gpl2)))
2150
2151 (define-public codingquarry
2152 (package
2153 (name "codingquarry")
2154 (version "2.0")
2155 (source (origin
2156 (method url-fetch)
2157 (uri (string-append
2158 "mirror://sourceforge/codingquarry/CodingQuarry_v"
2159 version ".tar.gz"))
2160 (sha256
2161 (base32
2162 "0115hkjflsnfzn36xppwf9h9avfxlavr43djqmshkkzbgjzsz60i"))))
2163 (build-system gnu-build-system)
2164 (arguments
2165 '(#:tests? #f ; no "check" target
2166 #:phases
2167 (modify-phases %standard-phases
2168 (delete 'configure)
2169 (replace 'install
2170 (lambda* (#:key outputs #:allow-other-keys)
2171 (let* ((out (assoc-ref outputs "out"))
2172 (bin (string-append out "/bin"))
2173 (doc (string-append out "/share/doc/codingquarry")))
2174 (install-file "INSTRUCTIONS.pdf" doc)
2175 (copy-recursively "QuarryFiles"
2176 (string-append out "/QuarryFiles"))
2177 (install-file "CodingQuarry" bin)
2178 (install-file "CufflinksGTF_to_CodingQuarryGFF3.py" bin))
2179 #t)))))
2180 (inputs `(("openmpi" ,openmpi)))
2181 (native-search-paths
2182 (list (search-path-specification
2183 (variable "QUARRY_PATH")
2184 (files '("QuarryFiles")))))
2185 (native-inputs `(("python" ,python-2))) ; Only Python 2 is supported
2186 (synopsis "Fungal gene predictor")
2187 (description "CodingQuarry is a highly accurate, self-training GHMM fungal
2188 gene predictor designed to work with assembled, aligned RNA-seq transcripts.")
2189 (home-page "https://sourceforge.net/projects/codingquarry/")
2190 (license license:gpl3+)))
2191
2192 (define-public couger
2193 (package
2194 (name "couger")
2195 (version "1.8.2")
2196 (source (origin
2197 (method url-fetch)
2198 (uri (string-append
2199 "http://couger.oit.duke.edu/static/assets/COUGER"
2200 version ".zip"))
2201 (sha256
2202 (base32
2203 "04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq"))))
2204 (build-system gnu-build-system)
2205 (arguments
2206 `(#:tests? #f
2207 #:phases
2208 (modify-phases %standard-phases
2209 (delete 'configure)
2210 (delete 'build)
2211 (replace
2212 'install
2213 (lambda* (#:key outputs #:allow-other-keys)
2214 (let* ((out (assoc-ref outputs "out"))
2215 (bin (string-append out "/bin")))
2216 (copy-recursively "src" (string-append out "/src"))
2217 (mkdir bin)
2218 ;; Add "src" directory to module lookup path.
2219 (substitute* "couger"
2220 (("from argparse")
2221 (string-append "import sys\nsys.path.append(\""
2222 out "\")\nfrom argparse")))
2223 (install-file "couger" bin))
2224 #t))
2225 (add-after
2226 'install 'wrap-program
2227 (lambda* (#:key inputs outputs #:allow-other-keys)
2228 ;; Make sure 'couger' runs with the correct PYTHONPATH.
2229 (let* ((out (assoc-ref outputs "out"))
2230 (path (getenv "PYTHONPATH")))
2231 (wrap-program (string-append out "/bin/couger")
2232 `("PYTHONPATH" ":" prefix (,path))))
2233 #t)))))
2234 (inputs
2235 `(("python" ,python-2)
2236 ("python2-pillow" ,python2-pillow)
2237 ("python2-numpy" ,python2-numpy)
2238 ("python2-scipy" ,python2-scipy)
2239 ("python2-matplotlib" ,python2-matplotlib)))
2240 (propagated-inputs
2241 `(("r-minimal" ,r-minimal)
2242 ("libsvm" ,libsvm)
2243 ("randomjungle" ,randomjungle)))
2244 (native-inputs
2245 `(("unzip" ,unzip)))
2246 (home-page "http://couger.oit.duke.edu")
2247 (synopsis "Identify co-factors in sets of genomic regions")
2248 (description
2249 "COUGER can be applied to any two sets of genomic regions bound by
2250 paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify
2251 putative co-factors that provide specificity to each TF. The framework
2252 determines the genomic targets uniquely-bound by each TF, and identifies a
2253 small set of co-factors that best explain the in vivo binding differences
2254 between the two TFs.
2255
2256 COUGER uses classification algorithms (support vector machines and random
2257 forests) with features that reflect the DNA binding specificities of putative
2258 co-factors. The features are generated either from high-throughput TF-DNA
2259 binding data (from protein binding microarray experiments), or from large
2260 collections of DNA motifs.")
2261 (license license:gpl3+)))
2262
2263 (define-public clustal-omega
2264 (package
2265 (name "clustal-omega")
2266 (version "1.2.4")
2267 (source (origin
2268 (method url-fetch)
2269 (uri (string-append "http://www.clustal.org/omega/clustal-omega-"
2270 version ".tar.gz"))
2271 (sha256
2272 (base32
2273 "1vm30mzncwdv881vrcwg11vzvrsmwy4wg80j5i0lcfk6dlld50w6"))))
2274 (build-system gnu-build-system)
2275 (inputs
2276 `(("argtable" ,argtable)))
2277 (home-page "http://www.clustal.org/omega/")
2278 (synopsis "Multiple sequence aligner for protein and DNA/RNA")
2279 (description
2280 "Clustal-Omega is a general purpose multiple sequence alignment (MSA)
2281 program for protein and DNA/RNA. It produces high quality MSAs and is capable
2282 of handling data-sets of hundreds of thousands of sequences in reasonable
2283 time.")
2284 (license license:gpl2+)))
2285
2286 (define-public crossmap
2287 (package
2288 (name "crossmap")
2289 (version "0.3.8")
2290 (source (origin
2291 (method url-fetch)
2292 (uri (pypi-uri "CrossMap" version))
2293 (sha256
2294 (base32
2295 "1sb2f2qbxya4fzw3yjl09vbrs8vfmw22zrygrvz004sf9gb1vkan"))))
2296 (build-system python-build-system)
2297 (inputs
2298 `(("python-bx-python" ,python-bx-python)
2299 ("python-numpy" ,python-numpy)
2300 ("python-pybigwig" ,python-pybigwig)
2301 ("python-pysam" ,python-pysam)
2302 ("zlib" ,zlib)))
2303 (native-inputs
2304 `(("python-cython" ,python-cython)
2305 ("python-nose" ,python-nose)))
2306 (home-page "http://crossmap.sourceforge.net/")
2307 (synopsis "Convert genome coordinates between assemblies")
2308 (description
2309 "CrossMap is a program for conversion of genome coordinates or annotation
2310 files between different genome assemblies. It supports most commonly used
2311 file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
2312 (license license:gpl2+)))
2313
2314 (define-public python-dnaio
2315 (package
2316 (name "python-dnaio")
2317 (version "0.3")
2318 (source
2319 (origin
2320 (method url-fetch)
2321 (uri (pypi-uri "dnaio" version))
2322 (sha256
2323 (base32
2324 "0f16m7hdlm0fz1n7y5asy0v9ghyrq17ni1p9iybq22ddzyd49r27"))))
2325 (build-system python-build-system)
2326 (native-inputs
2327 `(("python-cython" ,python-cython)
2328 ("python-pytest" ,python-pytest)
2329 ("python-xopen" ,python-xopen)))
2330 (home-page "https://github.com/marcelm/dnaio/")
2331 (synopsis "Read FASTA and FASTQ files efficiently")
2332 (description
2333 "dnaio is a Python library for fast parsing of FASTQ and also FASTA
2334 files. The code was previously part of the cutadapt tool.")
2335 (license license:expat)))
2336
2337 (define-public cutadapt
2338 (package
2339 (name "cutadapt")
2340 (version "2.1")
2341 (source (origin
2342 (method url-fetch)
2343 (uri (pypi-uri "cutadapt" version))
2344 (sha256
2345 (base32
2346 "1vqmsfkm6llxzmsz9wcfcvzx9a9f8iabvwik2rbyn7nc4wm25z89"))))
2347 (build-system python-build-system)
2348 (inputs
2349 `(("python-dnaio" ,python-dnaio)
2350 ("python-xopen" ,python-xopen)))
2351 (native-inputs
2352 `(("python-cython" ,python-cython)
2353 ("python-pytest" ,python-pytest)
2354 ("python-setuptools-scm" ,python-setuptools-scm)))
2355 (home-page "https://cutadapt.readthedocs.io/en/stable/")
2356 (synopsis "Remove adapter sequences from nucleotide sequencing reads")
2357 (description
2358 "Cutadapt finds and removes adapter sequences, primers, poly-A tails and
2359 other types of unwanted sequence from high-throughput sequencing reads.")
2360 (license license:expat)))
2361
2362 (define-public libbigwig
2363 (package
2364 (name "libbigwig")
2365 (version "0.4.4")
2366 (source (origin
2367 (method git-fetch)
2368 (uri (git-reference
2369 (url "https://github.com/dpryan79/libBigWig.git")
2370 (commit version)))
2371 (file-name (git-file-name name version))
2372 (sha256
2373 (base32
2374 "09693dmf1scdac5pyq6qyn8b4mcipvnmc370k9a5z41z81m3dcsj"))))
2375 (build-system gnu-build-system)
2376 (arguments
2377 `(#:test-target "test"
2378 #:tests? #f ; tests require access to the web
2379 #:make-flags
2380 (list "CC=gcc"
2381 (string-append "prefix=" (assoc-ref %outputs "out")))
2382 #:phases
2383 (modify-phases %standard-phases
2384 (delete 'configure))))
2385 (inputs
2386 `(("zlib" ,zlib)
2387 ("curl" ,curl)))
2388 (native-inputs
2389 `(("doxygen" ,doxygen)
2390 ;; Need for tests
2391 ("python" ,python-2)))
2392 (home-page "https://github.com/dpryan79/libBigWig")
2393 (synopsis "C library for handling bigWig files")
2394 (description
2395 "This package provides a C library for parsing local and remote BigWig
2396 files.")
2397 (license license:expat)))
2398
2399 (define-public python-pybigwig
2400 (package
2401 (name "python-pybigwig")
2402 (version "0.3.17")
2403 (source (origin
2404 (method url-fetch)
2405 (uri (pypi-uri "pyBigWig" version))
2406 (sha256
2407 (base32
2408 "157x6v48y299zm382krf1dw08fdxg95im8lnabhp5vc94s04zxj1"))
2409 (modules '((guix build utils)))
2410 (snippet
2411 '(begin
2412 ;; Delete bundled libBigWig sources
2413 (delete-file-recursively "libBigWig")
2414 #t))))
2415 (build-system python-build-system)
2416 (arguments
2417 `(#:phases
2418 (modify-phases %standard-phases
2419 (add-after 'unpack 'link-with-libBigWig
2420 (lambda* (#:key inputs #:allow-other-keys)
2421 (substitute* "setup.py"
2422 (("libs=\\[") "libs=[\"BigWig\", "))
2423 #t)))))
2424 (propagated-inputs
2425 `(("python-numpy" ,python-numpy)))
2426 (inputs
2427 `(("libbigwig" ,libbigwig)
2428 ("zlib" ,zlib)
2429 ("curl" ,curl)))
2430 (home-page "https://github.com/dpryan79/pyBigWig")
2431 (synopsis "Access bigWig files in Python using libBigWig")
2432 (description
2433 "This package provides Python bindings to the libBigWig library for
2434 accessing bigWig files.")
2435 (license license:expat)))
2436
2437 (define-public python2-pybigwig
2438 (package-with-python2 python-pybigwig))
2439
2440 (define-public python-dendropy
2441 (package
2442 (name "python-dendropy")
2443 (version "4.4.0")
2444 (source
2445 (origin
2446 (method git-fetch)
2447 ;; Source from GitHub so that tests are included.
2448 (uri (git-reference
2449 (url "https://github.com/jeetsukumaran/DendroPy.git")
2450 (commit (string-append "v" version))))
2451 (file-name (git-file-name name version))
2452 (sha256
2453 (base32
2454 "097hfyv2kaf4x92i4rjx0paw2cncxap48qivv8zxng4z7nhid0x9"))))
2455 (build-system python-build-system)
2456 (home-page "https://dendropy.org/")
2457 (synopsis "Library for phylogenetics and phylogenetic computing")
2458 (description
2459 "DendroPy is a library for phylogenetics and phylogenetic computing: reading,
2460 writing, simulation, processing and manipulation of phylogenetic
2461 trees (phylogenies) and characters.")
2462 (license license:bsd-3)))
2463
2464 (define-public python2-dendropy
2465 (let ((base (package-with-python2 python-dendropy)))
2466 (package
2467 (inherit base)
2468 (arguments
2469 `(#:phases
2470 (modify-phases %standard-phases
2471 (add-after 'unpack 'remove-failing-test
2472 (lambda _
2473 ;; This test fails when the full test suite is run, as documented
2474 ;; at https://github.com/jeetsukumaran/DendroPy/issues/74
2475 (substitute* "tests/test_dataio_nexml_reader_tree_list.py"
2476 (("test_collection_comments_and_annotations")
2477 "do_not_test_collection_comments_and_annotations"))
2478 #t)))
2479 ,@(package-arguments base))))))
2480
2481 (define-public python-py2bit
2482 (package
2483 (name "python-py2bit")
2484 (version "0.3.0")
2485 (source
2486 (origin
2487 (method url-fetch)
2488 (uri (pypi-uri "py2bit" version))
2489 (sha256
2490 (base32
2491 "1vw2nvw1yrl7ikkqsqs1pg239yr5nspvd969r1x9arms1k25a1a5"))))
2492 (build-system python-build-system)
2493 (home-page "https://github.com/dpryan79/py2bit")
2494 (synopsis "Access 2bit files using lib2bit")
2495 (description
2496 "This package provides Python bindings for lib2bit to access 2bit files
2497 with Python.")
2498 (license license:expat)))
2499
2500 (define-public deeptools
2501 (package
2502 (name "deeptools")
2503 (version "3.1.3")
2504 (source (origin
2505 (method git-fetch)
2506 (uri (git-reference
2507 (url "https://github.com/deeptools/deepTools.git")
2508 (commit version)))
2509 (file-name (git-file-name name version))
2510 (sha256
2511 (base32
2512 "1vggnf52g6q2vifdl4cyi7s2fnfqq0ky2zrkj5zv2qfzsc3p3siw"))))
2513 (build-system python-build-system)
2514 (arguments
2515 `(#:phases
2516 (modify-phases %standard-phases
2517 ;; This phase fails, but it's not needed.
2518 (delete 'reset-gzip-timestamps))))
2519 (inputs
2520 `(("python-plotly" ,python-plotly)
2521 ("python-scipy" ,python-scipy)
2522 ("python-numpy" ,python-numpy)
2523 ("python-numpydoc" ,python-numpydoc)
2524 ("python-matplotlib" ,python-matplotlib)
2525 ("python-pysam" ,python-pysam)
2526 ("python-py2bit" ,python-py2bit)
2527 ("python-pybigwig" ,python-pybigwig)))
2528 (native-inputs
2529 `(("python-mock" ,python-mock) ;for tests
2530 ("python-nose" ,python-nose) ;for tests
2531 ("python-pytz" ,python-pytz))) ;for tests
2532 (home-page "https://github.com/deeptools/deepTools")
2533 (synopsis "Tools for normalizing and visualizing deep-sequencing data")
2534 (description
2535 "DeepTools addresses the challenge of handling the large amounts of data
2536 that are now routinely generated from DNA sequencing centers. To do so,
2537 deepTools contains useful modules to process the mapped reads data to create
2538 coverage files in standard bedGraph and bigWig file formats. By doing so,
2539 deepTools allows the creation of normalized coverage files or the comparison
2540 between two files (for example, treatment and control). Finally, using such
2541 normalized and standardized files, multiple visualizations can be created to
2542 identify enrichments with functional annotations of the genome.")
2543 (license license:gpl3+)))
2544
2545 (define-public delly
2546 (package
2547 (name "delly")
2548 (version "0.7.9")
2549 (source (origin
2550 (method git-fetch)
2551 (uri (git-reference
2552 (url "https://github.com/dellytools/delly.git")
2553 (commit (string-append "v" version))))
2554 (file-name (git-file-name name version))
2555 (sha256
2556 (base32 "034jqsxswy9gqdh2zkgc1js99qkv75ks4xvzgmh0284sraagv61z"))
2557 (modules '((guix build utils)))
2558 (snippet
2559 '(begin
2560 (delete-file-recursively "src/htslib")
2561 #t))))
2562 (build-system gnu-build-system)
2563 (arguments
2564 `(#:tests? #f ; There are no tests to run.
2565 #:make-flags
2566 (list "PARALLEL=1" ; Allow parallel execution at run-time.
2567 (string-append "prefix=" (assoc-ref %outputs "out")))
2568 #:phases
2569 (modify-phases %standard-phases
2570 (delete 'configure) ; There is no configure phase.
2571 (add-after 'install 'install-templates
2572 (lambda* (#:key outputs #:allow-other-keys)
2573 (let ((templates (string-append (assoc-ref outputs "out")
2574 "/share/delly/templates")))
2575 (mkdir-p templates)
2576 (copy-recursively "excludeTemplates" templates)
2577 #t))))))
2578 (inputs
2579 `(("boost" ,boost)
2580 ("htslib" ,htslib)
2581 ("zlib" ,zlib)
2582 ("bzip2" ,bzip2)))
2583 (home-page "https://github.com/dellytools/delly")
2584 (synopsis "Integrated structural variant prediction method")
2585 (description "Delly is an integrated structural variant prediction method
2586 that can discover and genotype deletions, tandem duplications, inversions and
2587 translocations at single-nucleotide resolution in short-read massively parallel
2588 sequencing data. It uses paired-ends and split-reads to sensitively and
2589 accurately delineate genomic rearrangements throughout the genome.")
2590 (license license:gpl3+)))
2591
2592 (define-public diamond
2593 (package
2594 (name "diamond")
2595 (version "0.9.30")
2596 (source (origin
2597 (method git-fetch)
2598 (uri (git-reference
2599 (url "https://github.com/bbuchfink/diamond.git")
2600 (commit (string-append "v" version))))
2601 (file-name (git-file-name name version))
2602 (sha256
2603 (base32
2604 "0k6f3kb6cniw11xw6763kkbs1sl0yack7xsy7q5fl5v170ssphq4"))))
2605 (build-system cmake-build-system)
2606 (arguments
2607 '(#:tests? #f ; no "check" target
2608 #:phases
2609 (modify-phases %standard-phases
2610 (add-after 'unpack 'remove-native-compilation
2611 (lambda _
2612 (substitute* "CMakeLists.txt" (("-march=native") ""))
2613 #t)))))
2614 (inputs
2615 `(("zlib" ,zlib)))
2616 (home-page "https://github.com/bbuchfink/diamond")
2617 (synopsis "Accelerated BLAST compatible local sequence aligner")
2618 (description
2619 "DIAMOND is a BLAST-compatible local aligner for mapping protein and
2620 translated DNA query sequences against a protein reference database (BLASTP
2621 and BLASTX alignment mode). The speedup over BLAST is up to 20,000 on short
2622 reads at a typical sensitivity of 90-99% relative to BLAST depending on the
2623 data and settings.")
2624 (license license:agpl3+)))
2625
2626 (define-public discrover
2627 (package
2628 (name "discrover")
2629 (version "1.6.0")
2630 (source
2631 (origin
2632 (method git-fetch)
2633 (uri (git-reference
2634 (url "https://github.com/maaskola/discrover.git")
2635 (commit version)))
2636 (file-name (git-file-name name version))
2637 (sha256
2638 (base32
2639 "173fwi2vb6a5kp406hm3jj6j7v4whww796f2qcygp4rpvamh307y"))))
2640 (build-system cmake-build-system)
2641 (arguments
2642 `(#:tests? #f ; there are no tests
2643 #:phases
2644 (modify-phases %standard-phases
2645 (add-after 'unpack 'fix-latex-errors
2646 (lambda _
2647 (with-fluids ((%default-port-encoding #f))
2648 (substitute* "doc/references.bib"
2649 (("\\{S\\}illanp[^,]+,")
2650 "{S}illanp{\\\"a}{\\\"a},")))
2651 ;; XXX: I just can't get pdflatex to not complain about these
2652 ;; characters. They end up in the manual via the generated
2653 ;; discrover-cli-help.txt.
2654 (substitute* "src/hmm/cli.cpp"
2655 (("µ") "mu")
2656 (("η") "eta")
2657 (("≤") "<="))
2658 ;; This seems to be a syntax error.
2659 (substitute* "doc/discrover-manual.tex"
2660 (("theverbbox\\[t\\]") "theverbbox"))
2661 #t))
2662 (add-after 'unpack 'add-missing-includes
2663 (lambda _
2664 (substitute* "src/executioninformation.hpp"
2665 (("#define EXECUTIONINFORMATION_HPP" line)
2666 (string-append line "\n#include <random>")))
2667 (substitute* "src/plasma/fasta.hpp"
2668 (("#define FASTA_HPP" line)
2669 (string-append line "\n#include <random>")))
2670 #t))
2671 ;; FIXME: this is needed because we're using texlive-union, which
2672 ;; doesn't handle fonts correctly. It expects to be able to generate
2673 ;; fonts in the home directory.
2674 (add-before 'build 'setenv-HOME
2675 (lambda _ (setenv "HOME" "/tmp") #t)))))
2676 (inputs
2677 `(("boost" ,boost)
2678 ("cairo" ,cairo)
2679 ("rmath-standalone" ,rmath-standalone)))
2680 (native-inputs
2681 `(("texlive" ,(texlive-union (list texlive-fonts-cm
2682 texlive-fonts-amsfonts
2683
2684 texlive-latex-doi
2685 texlive-latex-examplep
2686 texlive-latex-hyperref
2687 texlive-latex-ms
2688 texlive-latex-natbib
2689 texlive-bibtex ; style files used by natbib
2690 texlive-latex-pgf ; tikz
2691 texlive-latex-verbatimbox)))
2692 ("imagemagick" ,imagemagick)))
2693 (home-page "https://dorina.mdc-berlin.de/public/rajewsky/discrover/")
2694 (synopsis "Discover discriminative nucleotide sequence motifs")
2695 (description "Discrover is a motif discovery method to find binding sites
2696 of nucleic acid binding proteins.")
2697 (license license:gpl3+)))
2698
2699 (define-public eigensoft
2700 (package
2701 (name "eigensoft")
2702 (version "7.2.1")
2703 (source
2704 (origin
2705 (method git-fetch)
2706 (uri (git-reference
2707 (url "https://github.com/DReichLab/EIG.git")
2708 (commit (string-append "v" version))))
2709 (file-name (git-file-name name version))
2710 (sha256
2711 (base32
2712 "1c141fqvhnzibmnf22sv23vbmzm20kjjyrib44cfh75wyndp2d9k"))
2713 (modules '((guix build utils)))
2714 ;; Remove pre-built binaries.
2715 (snippet '(begin
2716 (delete-file-recursively "bin")
2717 (mkdir "bin")
2718 #t))))
2719 (build-system gnu-build-system)
2720 (arguments
2721 `(#:tests? #f ; There are no tests.
2722 #:make-flags '("CC=gcc")
2723 #:phases
2724 (modify-phases %standard-phases
2725 ;; There is no configure phase, but the Makefile is in a
2726 ;; sub-directory.
2727 (replace 'configure
2728 (lambda _ (chdir "src") #t))
2729 ;; The provided install target only copies executables to
2730 ;; the "bin" directory in the build root.
2731 (add-after 'install 'actually-install
2732 (lambda* (#:key outputs #:allow-other-keys)
2733 (let* ((out (assoc-ref outputs "out"))
2734 (bin (string-append out "/bin")))
2735 (for-each (lambda (file)
2736 (install-file file bin))
2737 (find-files "../bin" ".*"))
2738 #t))))))
2739 (inputs
2740 `(("gsl" ,gsl)
2741 ("lapack" ,lapack)
2742 ("openblas" ,openblas)
2743 ("perl" ,perl)
2744 ("gfortran" ,gfortran "lib")))
2745 (home-page "https://github.com/DReichLab/EIG")
2746 (synopsis "Tools for population genetics")
2747 (description "The EIGENSOFT package provides tools for population
2748 genetics and stratification correction. EIGENSOFT implements methods commonly
2749 used in population genetics analyses such as PCA, computation of Tracy-Widom
2750 statistics, and finding related individuals in structured populations. It
2751 comes with a built-in plotting script and supports multiple file formats and
2752 quantitative phenotypes.")
2753 ;; The license of the eigensoft tools is Expat, but since it's
2754 ;; linking with the GNU Scientific Library (GSL) the effective
2755 ;; license is the GPL.
2756 (license license:gpl3+)))
2757
2758 (define-public edirect
2759 (package
2760 (name "edirect")
2761 (version "13.3.20200128")
2762 (source (origin
2763 (method url-fetch)
2764 (uri (string-append "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect"
2765 "/versions/" version
2766 "/edirect-" version ".tar.gz"))
2767 (sha256
2768 (base32
2769 "093zp7klv81ph0y8mm8d78a9hnpfxbv2kdym70gzdf3vz176rw33"))
2770 (modules '((guix build utils)))
2771 (snippet
2772 '(begin (delete-file "Mozilla-CA.tar.gz")
2773 (substitute* "rchive.go"
2774 ;; This go library does not have any license.
2775 (("github.com/fiam/gounidecode/unidecode")
2776 "golang.org/rainycape/unidecode"))
2777 #t))))
2778 (build-system perl-build-system)
2779 (arguments
2780 `(#:phases
2781 (modify-phases %standard-phases
2782 (delete 'configure)
2783 (delete 'build)
2784 (delete 'check) ; simple check after install
2785 (add-after 'unpack 'patch-programs
2786 (lambda* (#:key inputs #:allow-other-keys)
2787 ;; Ignore errors about missing xtract.Linux and rchive.Linux.
2788 (substitute* "pm-refresh"
2789 (("cat \\\"\\$target")
2790 "grep ^[[:digit:]] \"$target"))
2791 #t))
2792 (replace 'install
2793 (lambda* (#:key inputs outputs #:allow-other-keys)
2794 (let ((bin (string-append (assoc-ref outputs "out") "/bin"))
2795 (edirect-go (assoc-ref inputs "edirect-go-programs")))
2796 (for-each
2797 (lambda (file)
2798 (install-file file bin))
2799 '("archive-pubmed" "asp-cp" "asp-ls" "download-ncbi-data"
2800 "download-pubmed" "edirect.pl" "efetch" "epost" "esearch"
2801 "fetch-pubmed" "ftp-cp" "ftp-ls" "has-asp" "index-pubmed"
2802 "pm-prepare" "pm-refresh" "pm-stash" "pm-collect"
2803 "pm-index" "pm-invert" "pm-merge" "pm-promote"))
2804 (symlink (string-append edirect-go "/bin/xtract.Linux")
2805 (string-append bin "/xtract"))
2806 (symlink (string-append edirect-go "/bin/rchive.Linux")
2807 (string-append bin "/rchive")))
2808 #t))
2809 (add-after 'install 'wrap-program
2810 (lambda* (#:key outputs #:allow-other-keys)
2811 ;; Make sure everything can run in a pure environment.
2812 (let ((out (assoc-ref outputs "out"))
2813 (path (getenv "PERL5LIB")))
2814 (for-each
2815 (lambda (file)
2816 (wrap-program file
2817 `("PERL5LIB" ":" prefix (,path)))
2818 (wrap-program file
2819 `("PATH" ":" prefix (,(string-append out "/bin")
2820 ,(dirname (which "sed"))
2821 ,(dirname (which "gzip"))
2822 ,(dirname (which "grep"))
2823 ,(dirname (which "perl"))
2824 ,(dirname (which "uname"))))))
2825 (find-files out ".")))
2826 #t))
2827 (add-after 'wrap-program 'check
2828 (lambda* (#:key outputs #:allow-other-keys)
2829 (invoke (string-append (assoc-ref outputs "out")
2830 "/bin/edirect.pl")
2831 "-filter" "-help")
2832 #t)))))
2833 (inputs
2834 `(("edirect-go-programs" ,edirect-go-programs)
2835 ("perl-html-parser" ,perl-html-parser)
2836 ("perl-encode-locale" ,perl-encode-locale)
2837 ("perl-file-listing" ,perl-file-listing)
2838 ("perl-html-tagset" ,perl-html-tagset)
2839 ("perl-html-tree" ,perl-html-tree)
2840 ("perl-http-cookies" ,perl-http-cookies)
2841 ("perl-http-date" ,perl-http-date)
2842 ("perl-http-message" ,perl-http-message)
2843 ("perl-http-negotiate" ,perl-http-negotiate)
2844 ("perl-lwp-mediatypes" ,perl-lwp-mediatypes)
2845 ("perl-lwp-protocol-https" ,perl-lwp-protocol-https)
2846 ("perl-net-http" ,perl-net-http)
2847 ("perl-uri" ,perl-uri)
2848 ("perl-www-robotrules" ,perl-www-robotrules)
2849 ("perl-xml-simple" ,perl-xml-simple)
2850 ("perl" ,perl)))
2851 (home-page "https://www.ncbi.nlm.nih.gov/books/NBK179288/")
2852 (synopsis "Tools for accessing the NCBI's set of databases")
2853 (description
2854 "Entrez Direct (EDirect) is a method for accessing the National Center
2855 for Biotechnology Information's (NCBI) set of interconnected
2856 databases (publication, sequence, structure, gene, variation, expression,
2857 etc.) from a terminal. Functions take search terms from command-line
2858 arguments. Individual operations are combined to build multi-step queries.
2859 Record retrieval and formatting normally complete the process.
2860
2861 EDirect also provides an argument-driven function that simplifies the
2862 extraction of data from document summaries or other results that are returned
2863 in structured XML format. This can eliminate the need for writing custom
2864 software to answer ad hoc questions.")
2865 (native-search-paths
2866 ;; Ideally this should be set for LWP somewhere.
2867 (list (search-path-specification
2868 (variable "PERL_LWP_SSL_CA_FILE")
2869 (file-type 'regular)
2870 (separator #f)
2871 (files '("/etc/ssl/certs/ca-certificates.crt")))))
2872 (license license:public-domain)))
2873
2874 (define-public edirect-go-programs
2875 (package
2876 (inherit edirect)
2877 (name "edirect-go-programs")
2878 (build-system go-build-system)
2879 (arguments
2880 `(#:install-source? #f
2881 #:tests? #f ; No tests.
2882 #:import-path "ncbi.nlm.nih.gov/entrez/edirect"
2883 #:phases
2884 (modify-phases %standard-phases
2885 (replace 'build
2886 (lambda* (#:key import-path #:allow-other-keys)
2887 (with-directory-excursion (string-append "src/" import-path)
2888 (invoke "go" "build" "-v" "-x" "j2x.go")
2889 (invoke "go" "build" "-v" "-x" "t2x.go")
2890 (invoke "go" "build" "-v" "-x" "-o"
2891 "xtract.Linux" "xtract.go" "common.go")
2892 (invoke "go" "build" "-v" "-x" "-o"
2893 "rchive.Linux" "rchive.go" "common.go")
2894 (invoke "go" "build" "-v" "-x" "-o" "symbols.Linux" "s2p.go"))))
2895 (replace 'install
2896 (lambda* (#:key outputs import-path #:allow-other-keys)
2897 (let ((dest (string-append (assoc-ref outputs "out") "/bin"))
2898 (source (string-append "src/" import-path "/")))
2899 (for-each (lambda (file)
2900 (format #t "installing ~a~%" file)
2901 (install-file (string-append source file) dest))
2902 '("j2x" "t2x" "symbols.Linux" "xtract.Linux" "rchive.Linux"))
2903 #t))))))
2904 (native-inputs '())
2905 (propagated-inputs '())
2906 (inputs
2907 `(("go-github-com-fatih-color" ,go-github-com-fatih-color)
2908 ("go-github-com-fogleman-gg" ,go-github-com-fogleman-gg)
2909 ("go-github-com-gedex-inflector" ,go-github-com-gedex-inflector)
2910 ("go-github-com-golang-freetype" ,go-github-com-golang-freetype)
2911 ("go-github-com-klauspost-cpuid" ,go-github-com-klauspost-cpuid)
2912 ("go-github-com-pbnjay-memory" ,go-github-com-pbnjay-memory)
2913 ("go-github-com-surgebase-porter2" ,go-github-com-surgebase-porter2)
2914 ("go-golang-org-rainycape-unidecode" ,go-golang-org-rainycape-unidecode)
2915 ("go-golang-org-x-image" ,go-golang-org-x-image)
2916 ("go-golang-org-x-text" ,go-golang-org-x-text)))))
2917
2918 (define-public exonerate
2919 (package
2920 (name "exonerate")
2921 (version "2.4.0")
2922 (source
2923 (origin
2924 (method url-fetch)
2925 (uri
2926 (string-append
2927 "http://ftp.ebi.ac.uk/pub/software/vertebrategenomics/exonerate/"
2928 "exonerate-" version ".tar.gz"))
2929 (sha256
2930 (base32
2931 "0hj0m9xygiqsdxvbg79wq579kbrx1mdrabi2bzqz2zn9qwfjcjgq"))))
2932 (build-system gnu-build-system)
2933 (arguments
2934 `(#:parallel-build? #f)) ; Building in parallel fails on some machines.
2935 (native-inputs
2936 `(("pkg-config" ,pkg-config)))
2937 (inputs
2938 `(("glib" ,glib)))
2939 (home-page
2940 "https://www.ebi.ac.uk/about/vertebrate-genomics/software/exonerate")
2941 (synopsis "Generic tool for biological sequence alignment")
2942 (description
2943 "Exonerate is a generic tool for pairwise sequence comparison. It allows
2944 the alignment of sequences using a many alignment models, either exhaustive
2945 dynamic programming or a variety of heuristics.")
2946 (license license:gpl3)))
2947
2948 (define-public express
2949 (package
2950 (name "express")
2951 (version "1.5.1")
2952 (source (origin
2953 (method url-fetch)
2954 (uri
2955 (string-append
2956 "http://bio.math.berkeley.edu/eXpress/downloads/express-"
2957 version "/express-" version "-src.tgz"))
2958 (sha256
2959 (base32
2960 "03rczxd0gjp2l1jxcmjfmf5j94j77zqyxa6x063zsc585nj40n0c"))))
2961 (build-system cmake-build-system)
2962 (arguments
2963 `(#:tests? #f ;no "check" target
2964 #:phases
2965 (modify-phases %standard-phases
2966 (add-after 'unpack 'use-shared-boost-libs-and-set-bamtools-paths
2967 (lambda* (#:key inputs #:allow-other-keys)
2968 (substitute* "CMakeLists.txt"
2969 (("set\\(Boost_USE_STATIC_LIBS ON\\)")
2970 "set(Boost_USE_STATIC_LIBS OFF)")
2971 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
2972 (string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
2973 (substitute* "src/CMakeLists.txt"
2974 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
2975 (string-append (assoc-ref inputs "bamtools") "/lib"))
2976 (("libprotobuf.a") "libprotobuf.so"))
2977 #t)))))
2978 (inputs
2979 `(("boost" ,boost)
2980 ("bamtools" ,bamtools)
2981 ("protobuf" ,protobuf)
2982 ("zlib" ,zlib)))
2983 (home-page "http://bio.math.berkeley.edu/eXpress")
2984 (synopsis "Streaming quantification for high-throughput genomic sequencing")
2985 (description
2986 "eXpress is a streaming tool for quantifying the abundances of a set of
2987 target sequences from sampled subsequences. Example applications include
2988 transcript-level RNA-Seq quantification, allele-specific/haplotype expression
2989 analysis (from RNA-Seq), transcription factor binding quantification in
2990 ChIP-Seq, and analysis of metagenomic data.")
2991 (license license:artistic2.0)))
2992
2993 (define-public express-beta-diversity
2994 (package
2995 (name "express-beta-diversity")
2996 (version "1.0.8")
2997 (source (origin
2998 (method git-fetch)
2999 (uri (git-reference
3000 (url "https://github.com/dparks1134/ExpressBetaDiversity.git")
3001 (commit (string-append "v" version))))
3002 (file-name (git-file-name name version))
3003 (sha256
3004 (base32
3005 "0s0yzg5c21349rh7x4w9266jsvnp7j1hp9cf8sk32hz8nvrj745x"))))
3006 (build-system gnu-build-system)
3007 (arguments
3008 `(#:phases
3009 (modify-phases %standard-phases
3010 (delete 'configure)
3011 (add-before 'build 'enter-source (lambda _ (chdir "source") #t))
3012 (replace 'check
3013 (lambda _ (invoke "../bin/ExpressBetaDiversity" "-u") #t))
3014 (replace 'install
3015 (lambda* (#:key outputs #:allow-other-keys)
3016 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
3017 (install-file "../scripts/convertToEBD.py" bin)
3018 (install-file "../bin/ExpressBetaDiversity" bin)
3019 #t))))))
3020 (inputs
3021 `(("python" ,python-2)))
3022 (home-page "https://github.com/dparks1134/ExpressBetaDiversity")
3023 (synopsis "Taxon- and phylogenetic-based beta diversity measures")
3024 (description
3025 "Express Beta Diversity (EBD) calculates ecological beta diversity
3026 (dissimilarity) measures between biological communities. EBD implements a
3027 variety of diversity measures including those that make use of phylogenetic
3028 similarity of community members.")
3029 (license license:gpl3+)))
3030
3031 (define-public fasttree
3032 (package
3033 (name "fasttree")
3034 (version "2.1.10")
3035 (source (origin
3036 (method url-fetch)
3037 (uri (string-append
3038 "http://www.microbesonline.org/fasttree/FastTree-"
3039 version ".c"))
3040 (sha256
3041 (base32
3042 "0vcjdvy1j4m702vmak4svbfkrpcw63k7wymfksjp9a982zy8kjsl"))))
3043 (build-system gnu-build-system)
3044 (arguments
3045 `(#:tests? #f ; no "check" target
3046 #:phases
3047 (modify-phases %standard-phases
3048 (delete 'unpack)
3049 (delete 'configure)
3050 (replace 'build
3051 (lambda* (#:key source #:allow-other-keys)
3052 (invoke "gcc"
3053 "-O3"
3054 "-finline-functions"
3055 "-funroll-loops"
3056 "-Wall"
3057 "-o"
3058 "FastTree"
3059 source
3060 "-lm")
3061 (invoke "gcc"
3062 "-DOPENMP"
3063 "-fopenmp"
3064 "-O3"
3065 "-finline-functions"
3066 "-funroll-loops"
3067 "-Wall"
3068 "-o"
3069 "FastTreeMP"
3070 source
3071 "-lm")
3072 #t))
3073 (replace 'install
3074 (lambda* (#:key outputs #:allow-other-keys)
3075 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
3076 (install-file "FastTree" bin)
3077 (install-file "FastTreeMP" bin)
3078 #t))))))
3079 (home-page "http://www.microbesonline.org/fasttree")
3080 (synopsis "Infers approximately-maximum-likelihood phylogenetic trees")
3081 (description
3082 "FastTree can handle alignments with up to a million of sequences in a
3083 reasonable amount of time and memory. For large alignments, FastTree is
3084 100-1,000 times faster than PhyML 3.0 or RAxML 7.")
3085 (license license:gpl2+)))
3086
3087 (define-public fastx-toolkit
3088 (package
3089 (name "fastx-toolkit")
3090 (version "0.0.14")
3091 (source (origin
3092 (method url-fetch)
3093 (uri
3094 (string-append
3095 "https://github.com/agordon/fastx_toolkit/releases/download/"
3096 version "/fastx_toolkit-" version ".tar.bz2"))
3097 (sha256
3098 (base32
3099 "01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy"))))
3100 (build-system gnu-build-system)
3101 (inputs
3102 `(("libgtextutils" ,libgtextutils)))
3103 (native-inputs
3104 `(("gcc" ,gcc-6) ;; doesn't build with later versions
3105 ("pkg-config" ,pkg-config)))
3106 (home-page "http://hannonlab.cshl.edu/fastx_toolkit/")
3107 (synopsis "Tools for FASTA/FASTQ file preprocessing")
3108 (description
3109 "The FASTX-Toolkit is a collection of command line tools for Short-Reads
3110 FASTA/FASTQ files preprocessing.
3111
3112 Next-Generation sequencing machines usually produce FASTA or FASTQ files,
3113 containing multiple short-reads sequences. The main processing of such
3114 FASTA/FASTQ files is mapping the sequences to reference genomes. However, it
3115 is sometimes more productive to preprocess the files before mapping the
3116 sequences to the genome---manipulating the sequences to produce better mapping
3117 results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
3118 (license license:agpl3+)))
3119
3120 (define-public flexbar
3121 (package
3122 (name "flexbar")
3123 (version "3.4.0")
3124 (source (origin
3125 (method git-fetch)
3126 (uri (git-reference
3127 (url "https://github.com/seqan/flexbar.git")
3128 (commit (string-append "v" version))))
3129 (file-name (git-file-name name version))
3130 (sha256
3131 (base32
3132 "1pq9sxvdnldl14libk234m72dqhwgzs3acgl943wchwdqlcsi5r2"))))
3133 (build-system cmake-build-system)
3134 (arguments
3135 `(#:phases
3136 (modify-phases %standard-phases
3137 (add-after 'unpack 'do-not-tune-to-CPU
3138 (lambda _
3139 (substitute* "src/CMakeLists.txt"
3140 ((" -march=native") ""))
3141 #t))
3142 (replace 'check
3143 (lambda* (#:key outputs #:allow-other-keys)
3144 (setenv "PATH" (string-append (getcwd) ":" (getenv "PATH")))
3145 (with-directory-excursion "../source/test"
3146 (invoke "bash" "flexbar_test.sh"))
3147 #t))
3148 (replace 'install
3149 (lambda* (#:key outputs #:allow-other-keys)
3150 (let* ((out (string-append (assoc-ref outputs "out")))
3151 (bin (string-append out "/bin/")))
3152 (install-file "flexbar" bin))
3153 #t)))))
3154 (inputs
3155 `(("tbb" ,tbb)
3156 ("zlib" ,zlib)))
3157 (native-inputs
3158 `(("pkg-config" ,pkg-config)
3159 ("seqan" ,seqan)))
3160 (home-page "https://github.com/seqan/flexbar")
3161 (synopsis "Barcode and adapter removal tool for sequencing platforms")
3162 (description
3163 "Flexbar preprocesses high-throughput nucleotide sequencing data
3164 efficiently. It demultiplexes barcoded runs and removes adapter sequences.
3165 Moreover, trimming and filtering features are provided. Flexbar increases
3166 read mapping rates and improves genome and transcriptome assemblies. It
3167 supports next-generation sequencing data in fasta/q and csfasta/q format from
3168 Illumina, Roche 454, and the SOLiD platform.")
3169 (license license:bsd-3)))
3170
3171 (define-public fraggenescan
3172 (package
3173 (name "fraggenescan")
3174 (version "1.30")
3175 (source
3176 (origin
3177 (method url-fetch)
3178 (uri
3179 (string-append "mirror://sourceforge/fraggenescan/"
3180 "FragGeneScan" version ".tar.gz"))
3181 (sha256
3182 (base32 "158dcnwczgcyhwm4qlx19sanrwgdpzf6bn2y57mbpx55lkgz1mzj"))))
3183 (build-system gnu-build-system)
3184 (arguments
3185 `(#:phases
3186 (modify-phases %standard-phases
3187 (delete 'configure)
3188 (add-before 'build 'patch-paths
3189 (lambda* (#:key outputs #:allow-other-keys)
3190 (let* ((out (string-append (assoc-ref outputs "out")))
3191 (share (string-append out "/share/fraggenescan/")))
3192 (substitute* "run_FragGeneScan.pl"
3193 (("system\\(\"rm")
3194 (string-append "system(\"" (which "rm")))
3195 (("system\\(\"mv")
3196 (string-append "system(\"" (which "mv")))
3197 (("\\\"awk") (string-append "\"" (which "awk")))
3198 ;; This script and other programs expect the training files
3199 ;; to be in the non-standard location bin/train/XXX. Change
3200 ;; this to be share/fraggenescan/train/XXX instead.
3201 (("^\\$train.file = \\$dir.*")
3202 (string-append "$train_file = \""
3203 share
3204 "train/\".$FGS_train_file;")))
3205 (substitute* "run_hmm.c"
3206 (("^ strcat\\(train_dir, \\\"train/\\\"\\);")
3207 (string-append " strcpy(train_dir, \"" share "/train/\");"))))
3208 #t))
3209 (replace 'build
3210 (lambda _
3211 (invoke "make" "clean")
3212 (invoke "make" "fgs")
3213 #t))
3214 (replace 'install
3215 (lambda* (#:key outputs #:allow-other-keys)
3216 (let* ((out (string-append (assoc-ref outputs "out")))
3217 (bin (string-append out "/bin/"))
3218 (share (string-append out "/share/fraggenescan/train")))
3219 (install-file "run_FragGeneScan.pl" bin)
3220 (install-file "FragGeneScan" bin)
3221 (copy-recursively "train" share))
3222 #t))
3223 (delete 'check)
3224 (add-after 'install 'post-install-check
3225 ;; In lieu of 'make check', run one of the examples and check the
3226 ;; output files gets created.
3227 (lambda* (#:key outputs #:allow-other-keys)
3228 (let* ((out (string-append (assoc-ref outputs "out")))
3229 (bin (string-append out "/bin/"))
3230 (frag (string-append bin "run_FragGeneScan.pl")))
3231 ;; Test complete genome.
3232 (invoke frag
3233 "-genome=./example/NC_000913.fna"
3234 "-out=./test2"
3235 "-complete=1"
3236 "-train=complete")
3237 (unless (and (file-exists? "test2.faa")
3238 (file-exists? "test2.ffn")
3239 (file-exists? "test2.gff")
3240 (file-exists? "test2.out"))
3241 (error "Expected files do not exist."))
3242 ;; Test incomplete sequences.
3243 (invoke frag
3244 "-genome=./example/NC_000913-fgs.ffn"
3245 "-out=out"
3246 "-complete=0"
3247 "-train=454_30")
3248 #t))))))
3249 (inputs
3250 `(("perl" ,perl)
3251 ("python" ,python-2))) ;not compatible with python 3.
3252 (home-page "https://sourceforge.net/projects/fraggenescan/")
3253 (synopsis "Finds potentially fragmented genes in short reads")
3254 (description
3255 "FragGeneScan is a program for predicting bacterial and archaeal genes in
3256 short and error-prone DNA sequencing reads. It can also be applied to predict
3257 genes in incomplete assemblies or complete genomes.")
3258 ;; GPL3+ according to private correspondense with the authors.
3259 (license license:gpl3+)))
3260
3261 (define-public fxtract
3262 (let ((util-commit "776ca85a18a47492af3794745efcb4a905113115"))
3263 (package
3264 (name "fxtract")
3265 (version "2.3")
3266 (source
3267 (origin
3268 (method git-fetch)
3269 (uri (git-reference
3270 (url "https://github.com/ctSkennerton/fxtract.git")
3271 (commit version)))
3272 (file-name (git-file-name name version))
3273 (sha256
3274 (base32
3275 "0hab3gpwf4w9s87qlbswq6ws1qqybh4dcqk79q1ahyldzai5fgp5"))))
3276 (build-system gnu-build-system)
3277 (arguments
3278 `(#:make-flags (list
3279 (string-append "PREFIX=" (assoc-ref %outputs "out"))
3280 "CC=gcc")
3281 #:test-target "fxtract_test"
3282 #:phases
3283 (modify-phases %standard-phases
3284 (delete 'configure)
3285 (add-before 'build 'copy-util
3286 (lambda* (#:key inputs #:allow-other-keys)
3287 (rmdir "util")
3288 (copy-recursively (assoc-ref inputs "ctskennerton-util") "util")
3289 #t))
3290 ;; Do not use make install as this requires additional dependencies.
3291 (replace 'install
3292 (lambda* (#:key outputs #:allow-other-keys)
3293 (let* ((out (assoc-ref outputs "out"))
3294 (bin (string-append out"/bin")))
3295 (install-file "fxtract" bin)
3296 #t))))))
3297 (inputs
3298 `(("pcre" ,pcre)
3299 ("zlib" ,zlib)))
3300 (native-inputs
3301 ;; ctskennerton-util is licensed under GPL2.
3302 `(("ctskennerton-util"
3303 ,(origin
3304 (method git-fetch)
3305 (uri (git-reference
3306 (url "https://github.com/ctSkennerton/util.git")
3307 (commit util-commit)))
3308 (file-name (string-append
3309 "ctstennerton-util-" util-commit "-checkout"))
3310 (sha256
3311 (base32
3312 "0cls1hd4vgj3f36fpzzg4xc77d6f3hpc60cbpfmn2gdr7ykzzad7"))))))
3313 (home-page "https://github.com/ctSkennerton/fxtract")
3314 (synopsis "Extract sequences from FASTA and FASTQ files")
3315 (description
3316 "Fxtract extracts sequences from a protein or nucleotide fastx (FASTA
3317 or FASTQ) file given a subsequence. It uses a simple substring search for
3318 basic tasks but can change to using POSIX regular expressions, PCRE, hash
3319 lookups or multi-pattern searching as required. By default fxtract looks in
3320 the sequence of each record but can also be told to look in the header,
3321 comment or quality sections.")
3322 ;; 'util' requires SSE instructions.
3323 (supported-systems '("x86_64-linux"))
3324 (license license:expat))))
3325
3326 (define-public gemma
3327 (package
3328 (name "gemma")
3329 (version "0.98")
3330 (source (origin
3331 (method git-fetch)
3332 (uri (git-reference
3333 (url "https://github.com/xiangzhou/GEMMA.git")
3334 (commit (string-append "v" version))))
3335 (file-name (git-file-name name version))
3336 (sha256
3337 (base32
3338 "1s3ncnbn45r2hh1cvrqky1kbqq6546biypr4f5mkw1kqlrgyh0yg"))))
3339 (inputs
3340 `(("eigen" ,eigen)
3341 ("gfortran" ,gfortran "lib")
3342 ("gsl" ,gsl)
3343 ("lapack" ,lapack)
3344 ("openblas" ,openblas)
3345 ("zlib" ,zlib)))
3346 (build-system gnu-build-system)
3347 (arguments
3348 `(#:make-flags
3349 '(,@(match (%current-system)
3350 ("x86_64-linux"
3351 '("FORCE_DYNAMIC=1"))
3352 ("i686-linux"
3353 '("FORCE_DYNAMIC=1" "FORCE_32BIT=1"))
3354 (_
3355 '("FORCE_DYNAMIC=1" "NO_INTEL_COMPAT=1"))))
3356 #:phases
3357 (modify-phases %standard-phases
3358 (delete 'configure)
3359 (add-after 'unpack 'find-eigen
3360 (lambda* (#:key inputs #:allow-other-keys)
3361 ;; Ensure that Eigen headers can be found
3362 (setenv "CPLUS_INCLUDE_PATH"
3363 (string-append (assoc-ref inputs "eigen")
3364 "/include/eigen3"))
3365 #t))
3366 (add-before 'build 'bin-mkdir
3367 (lambda _
3368 (mkdir-p "bin")
3369 #t))
3370 (replace 'install
3371 (lambda* (#:key outputs #:allow-other-keys)
3372 (let ((out (assoc-ref outputs "out")))
3373 (install-file "bin/gemma"
3374 (string-append
3375 out "/bin")))
3376 #t)))
3377 #:tests? #f)) ; no tests included yet
3378 (home-page "https://github.com/xiangzhou/GEMMA")
3379 (synopsis "Tool for genome-wide efficient mixed model association")
3380 (description
3381 "Genome-wide Efficient Mixed Model Association (GEMMA) provides a
3382 standard linear mixed model resolver with application in genome-wide
3383 association studies (GWAS).")
3384 (license license:gpl3)))
3385
3386 (define-public grit
3387 (package
3388 (name "grit")
3389 (version "2.0.5")
3390 (source (origin
3391 (method git-fetch)
3392 (uri (git-reference
3393 (url "https://github.com/nboley/grit.git")
3394 (commit version)))
3395 (file-name (git-file-name name version))
3396 (sha256
3397 (base32
3398 "1l5v8vfvfbrpmgnrvbrbv40d0arhxcnmxgv2f1mlcqfa3q6bkqm9"))))
3399 (build-system python-build-system)
3400 (arguments
3401 `(#:python ,python-2
3402 #:phases
3403 (modify-phases %standard-phases
3404 (add-after 'unpack 'generate-from-cython-sources
3405 (lambda* (#:key inputs outputs #:allow-other-keys)
3406 ;; Delete these C files to force fresh generation from pyx sources.
3407 (delete-file "grit/sparsify_support_fns.c")
3408 (delete-file "grit/call_peaks_support_fns.c")
3409 (substitute* "setup.py"
3410 (("Cython.Setup") "Cython.Build"))
3411 #t)))))
3412 (inputs
3413 `(("python-scipy" ,python2-scipy)
3414 ("python-numpy" ,python2-numpy)
3415 ("python-pysam" ,python2-pysam)
3416 ("python-networkx" ,python2-networkx)))
3417 (native-inputs
3418 `(("python-cython" ,python2-cython)))
3419 ;; The canonical <http://grit-bio.org> home page times out as of 2020-01-21.
3420 (home-page "https://github.com/nboley/grit")
3421 (synopsis "Tool for integrative analysis of RNA-seq type assays")
3422 (description
3423 "GRIT is designed to use RNA-seq, TES, and TSS data to build and quantify
3424 full length transcript models. When none of these data sources are available,
3425 GRIT can be run by providing a candidate set of TES or TSS sites. In
3426 addition, GRIT can merge in reference junctions and gene boundaries. GRIT can
3427 also be run in quantification mode, where it uses a provided GTF file and just
3428 estimates transcript expression.")
3429 (license license:gpl3+)))
3430
3431 (define-public hisat
3432 (package
3433 (name "hisat")
3434 (version "0.1.4")
3435 (source (origin
3436 (method url-fetch)
3437 (uri (string-append
3438 "http://ccb.jhu.edu/software/hisat/downloads/hisat-"
3439 version "-beta-source.zip"))
3440 (sha256
3441 (base32
3442 "1k381ydranqxp09yf2y7w1d0chz5d59vb6jchi89hbb0prq19lk5"))))
3443 (build-system gnu-build-system)
3444 (arguments
3445 `(#:tests? #f ;no check target
3446 #:make-flags '("allall"
3447 ;; Disable unsupported `popcnt' instructions on
3448 ;; architectures other than x86_64
3449 ,@(if (string-prefix? "x86_64"
3450 (or (%current-target-system)
3451 (%current-system)))
3452 '()
3453 '("POPCNT_CAPABILITY=0")))
3454 #:phases
3455 (modify-phases %standard-phases
3456 (add-after 'unpack 'patch-sources
3457 (lambda _
3458 ;; XXX Cannot use snippet because zip files are not supported
3459 (substitute* "Makefile"
3460 (("^CC = .*$") "CC = gcc")
3461 (("^CPP = .*$") "CPP = g++")
3462 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
3463 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
3464 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
3465 (substitute* '("hisat-build" "hisat-inspect")
3466 (("/usr/bin/env") (which "env")))
3467 #t))
3468 (replace 'install
3469 (lambda* (#:key outputs #:allow-other-keys)
3470 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
3471 (for-each (lambda (file)
3472 (install-file file bin))
3473 (find-files
3474 "."
3475 "hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$")))
3476 #t))
3477 (delete 'configure))))
3478 (native-inputs
3479 `(("unzip" ,unzip)))
3480 (inputs
3481 `(("perl" ,perl)
3482 ("python" ,python)
3483 ("zlib" ,zlib)))
3484 ;; Non-portable SSE instructions are used so building fails on platforms
3485 ;; other than x86_64.
3486 (supported-systems '("x86_64-linux"))
3487 (home-page "https://ccb.jhu.edu/software/hisat/index.shtml")
3488 (synopsis "Hierarchical indexing for spliced alignment of transcripts")
3489 (description
3490 "HISAT is a fast and sensitive spliced alignment program for mapping
3491 RNA-seq reads. In addition to one global FM index that represents a whole
3492 genome, HISAT uses a large set of small FM indexes that collectively cover the
3493 whole genome. These small indexes (called local indexes) combined with
3494 several alignment strategies enable effective alignment of RNA-seq reads, in
3495 particular, reads spanning multiple exons.")
3496 (license license:gpl3+)))
3497
3498 (define-public hisat2
3499 (package
3500 (name "hisat2")
3501 (version "2.0.5")
3502 (source
3503 (origin
3504 (method url-fetch)
3505 (uri (string-append "ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2"
3506 "/downloads/hisat2-" version "-source.zip"))
3507 (sha256
3508 (base32
3509 "0lywnr8kijwsc2aw10dwxic0n0yvip6fl3rjlvc8zzwahamy4x7g"))))
3510 (build-system gnu-build-system)
3511 (arguments
3512 `(#:tests? #f ; no check target
3513 #:make-flags (list "CC=gcc" "CXX=g++" "allall")
3514 #:modules ((guix build gnu-build-system)
3515 (guix build utils)
3516 (srfi srfi-26))
3517 #:phases
3518 (modify-phases %standard-phases
3519 (add-after 'unpack 'make-deterministic
3520 (lambda _
3521 (substitute* "Makefile"
3522 (("`date`") "0"))
3523 #t))
3524 (delete 'configure)
3525 (replace 'install
3526 (lambda* (#:key outputs #:allow-other-keys)
3527 (let* ((out (assoc-ref outputs "out"))
3528 (bin (string-append out "/bin/"))
3529 (doc (string-append out "/share/doc/hisat2/")))
3530 (for-each
3531 (cut install-file <> bin)
3532 (find-files "."
3533 "hisat2(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))
3534 (mkdir-p doc)
3535 (install-file "doc/manual.inc.html" doc))
3536 #t)))))
3537 (native-inputs
3538 `(("unzip" ,unzip) ; needed for archive from ftp
3539 ("perl" ,perl)
3540 ("pandoc" ,ghc-pandoc))) ; for documentation
3541 (home-page "https://ccb.jhu.edu/software/hisat2/index.shtml")
3542 (synopsis "Graph-based alignment of genomic sequencing reads")
3543 (description "HISAT2 is a fast and sensitive alignment program for mapping
3544 next-generation sequencing reads (both DNA and RNA) to a population of human
3545 genomes (as well as to a single reference genome). In addition to using one
3546 global @dfn{graph FM} (GFM) index that represents a population of human
3547 genomes, HISAT2 uses a large set of small GFM indexes that collectively cover
3548 the whole genome. These small indexes, combined with several alignment
3549 strategies, enable rapid and accurate alignment of sequencing reads. This new
3550 indexing scheme is called a @dfn{Hierarchical Graph FM index} (HGFM).")
3551 ;; HISAT2 contains files from Bowtie2, which is released under
3552 ;; GPLv2 or later. The HISAT2 source files are released under
3553 ;; GPLv3 or later.
3554 (license license:gpl3+)))
3555
3556 (define-public hmmer
3557 (package
3558 (name "hmmer")
3559 (version "3.2.1")
3560 (source
3561 (origin
3562 (method url-fetch)
3563 (uri (string-append
3564 "http://eddylab.org/software/hmmer/hmmer-" version ".tar.gz"))
3565 (sha256
3566 (base32
3567 "171bivy6xhgjsz5nv53n81pc3frnwz29ylblawk2bv46szwjjqd5"))))
3568 (build-system gnu-build-system)
3569 (native-inputs `(("perl" ,perl)))
3570 (home-page "http://hmmer.org/")
3571 (synopsis "Biosequence analysis using profile hidden Markov models")
3572 (description
3573 "HMMER is used for searching sequence databases for homologs of protein
3574 sequences, and for making protein sequence alignments. It implements methods
3575 using probabilistic models called profile hidden Markov models (profile
3576 HMMs).")
3577 ;; hmmer uses non-portable SSE intrinsics so building fails on other
3578 ;; platforms.
3579 (supported-systems '("x86_64-linux" "i686-linux"))
3580 (license license:bsd-3)))
3581
3582 (define-public htseq
3583 (package
3584 (name "htseq")
3585 (version "0.9.1")
3586 (source (origin
3587 (method url-fetch)
3588 (uri (pypi-uri "HTSeq" version))
3589 (sha256
3590 (base32
3591 "11flgb1381xdhk43bzbfm3vhnszkpqg6jk76rpa5xd1zbrvvlnxg"))))
3592 (build-system python-build-system)
3593 (native-inputs
3594 `(("python-cython" ,python-cython)))
3595 ;; Numpy needs to be propagated when htseq is used as a Python library.
3596 (propagated-inputs
3597 `(("python-numpy" ,python-numpy)))
3598 (inputs
3599 `(("python-pysam" ,python-pysam)
3600 ("python-matplotlib" ,python-matplotlib)))
3601 (home-page "https://htseq.readthedocs.io/")
3602 (synopsis "Analysing high-throughput sequencing data with Python")
3603 (description
3604 "HTSeq is a Python package that provides infrastructure to process data
3605 from high-throughput sequencing assays.")
3606 (license license:gpl3+)))
3607
3608 (define-public python2-htseq
3609 (package-with-python2 htseq))
3610
3611 (define-public java-htsjdk
3612 (package
3613 (name "java-htsjdk")
3614 (version "2.3.0") ; last version without build dependency on gradle
3615 (source (origin
3616 (method git-fetch)
3617 (uri (git-reference
3618 (url "https://github.com/samtools/htsjdk.git")
3619 (commit version)))
3620 (file-name (git-file-name name version))
3621 (sha256
3622 (base32
3623 "1b178ixcabanm834ydjl3jiakpyxdmki32hqfv2abrzn3rcwa28i"))
3624 (modules '((guix build utils)))
3625 (snippet
3626 ;; Delete pre-built binaries
3627 '(begin
3628 (delete-file-recursively "lib")
3629 (mkdir-p "lib")
3630 #t))))
3631 (build-system ant-build-system)
3632 (arguments
3633 `(#:tests? #f ; test require Internet access
3634 #:jdk ,icedtea-8
3635 #:make-flags
3636 (list (string-append "-Ddist=" (assoc-ref %outputs "out")
3637 "/share/java/htsjdk/"))
3638 #:build-target "all"
3639 #:phases
3640 (modify-phases %standard-phases
3641 ;; The build phase also installs the jars
3642 (delete 'install))))
3643 (inputs
3644 `(("java-ngs" ,java-ngs)
3645 ("java-snappy-1" ,java-snappy-1)
3646 ("java-commons-compress" ,java-commons-compress)
3647 ("java-commons-logging-minimal" ,java-commons-logging-minimal)
3648 ("java-commons-jexl-2" ,java-commons-jexl-2)
3649 ("java-xz" ,java-xz)))
3650 (native-inputs
3651 `(("java-testng" ,java-testng)))
3652 (home-page "http://samtools.github.io/htsjdk/")
3653 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
3654 (description
3655 "HTSJDK is an implementation of a unified Java library for accessing
3656 common file formats, such as SAM and VCF, used for high-throughput
3657 sequencing (HTS) data. There are also an number of useful utilities for
3658 manipulating HTS data.")
3659 (license license:expat)))
3660
3661 (define-public java-htsjdk-latest
3662 (package
3663 (name "java-htsjdk")
3664 (version "2.14.3")
3665 (source (origin
3666 (method git-fetch)
3667 (uri (git-reference
3668 (url "https://github.com/samtools/htsjdk.git")
3669 (commit version)))
3670 (file-name (string-append name "-" version "-checkout"))
3671 (sha256
3672 (base32
3673 "1lmya1fdjy03mz6zmdmd86j9v9vfhqb3952mqq075navx1i6g4bc"))))
3674 (build-system ant-build-system)
3675 (arguments
3676 `(#:tests? #f ; test require Scala
3677 #:jdk ,icedtea-8
3678 #:jar-name "htsjdk.jar"
3679 #:phases
3680 (modify-phases %standard-phases
3681 (add-after 'unpack 'remove-useless-build.xml
3682 (lambda _ (delete-file "build.xml") #t))
3683 ;; The tests require the scalatest package.
3684 (add-after 'unpack 'remove-tests
3685 (lambda _ (delete-file-recursively "src/test") #t)))))
3686 (inputs
3687 `(("java-ngs" ,java-ngs)
3688 ("java-snappy-1" ,java-snappy-1)
3689 ("java-commons-compress" ,java-commons-compress)
3690 ("java-commons-logging-minimal" ,java-commons-logging-minimal)
3691 ("java-commons-jexl-2" ,java-commons-jexl-2)
3692 ("java-xz" ,java-xz)))
3693 (native-inputs
3694 `(("java-junit" ,java-junit)))
3695 (home-page "http://samtools.github.io/htsjdk/")
3696 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
3697 (description
3698 "HTSJDK is an implementation of a unified Java library for accessing
3699 common file formats, such as SAM and VCF, used for high-throughput
3700 sequencing (HTS) data. There are also an number of useful utilities for
3701 manipulating HTS data.")
3702 (license license:expat)))
3703
3704 ;; This is needed for picard 2.10.3
3705 (define-public java-htsjdk-2.10.1
3706 (package (inherit java-htsjdk-latest)
3707 (name "java-htsjdk")
3708 (version "2.10.1")
3709 (source (origin
3710 (method git-fetch)
3711 (uri (git-reference
3712 (url "https://github.com/samtools/htsjdk.git")
3713 (commit version)))
3714 (file-name (string-append name "-" version "-checkout"))
3715 (sha256
3716 (base32
3717 "1kxh7slm2pm3x9p6jxa1wqsq9a31dhiiflhxnxqcisan4k3rwia2"))))
3718 (build-system ant-build-system)
3719 (arguments
3720 `(#:tests? #f ; tests require Scala
3721 #:jdk ,icedtea-8
3722 #:jar-name "htsjdk.jar"
3723 #:phases
3724 (modify-phases %standard-phases
3725 (add-after 'unpack 'remove-useless-build.xml
3726 (lambda _ (delete-file "build.xml") #t))
3727 ;; The tests require the scalatest package.
3728 (add-after 'unpack 'remove-tests
3729 (lambda _ (delete-file-recursively "src/test") #t)))))))
3730
3731 ;; This version matches java-htsjdk 2.3.0. Later versions also require a more
3732 ;; recent version of java-htsjdk, which depends on gradle.
3733 (define-public java-picard
3734 (package
3735 (name "java-picard")
3736 (version "2.3.0")
3737 (source (origin
3738 (method git-fetch)
3739 (uri (git-reference
3740 (url "https://github.com/broadinstitute/picard.git")
3741 (commit version)))
3742 (file-name (string-append "java-picard-" version "-checkout"))
3743 (sha256
3744 (base32
3745 "1ll7mf4r3by92w2nhlmpa591xd1f46xlkwh59mq6fvbb5pdwzvx6"))
3746 (modules '((guix build utils)))
3747 (snippet
3748 '(begin
3749 ;; Delete pre-built binaries.
3750 (delete-file-recursively "lib")
3751 (mkdir-p "lib")
3752 (substitute* "build.xml"
3753 ;; Remove build-time dependency on git.
3754 (("failifexecutionfails=\"true\"")
3755 "failifexecutionfails=\"false\"")
3756 ;; Use our htsjdk.
3757 (("depends=\"compile-htsjdk, ")
3758 "depends=\"")
3759 (("depends=\"compile-htsjdk-tests, ")
3760 "depends=\"")
3761 ;; Build picard-lib.jar before building picard.jar
3762 (("name=\"picard-jar\" depends=\"" line)
3763 (string-append line "picard-lib-jar, ")))
3764 #t))))
3765 (build-system ant-build-system)
3766 (arguments
3767 `(#:build-target "picard-jar"
3768 #:test-target "test"
3769 ;; Tests require jacoco:coverage.
3770 #:tests? #f
3771 #:make-flags
3772 (list (string-append "-Dhtsjdk_lib_dir="
3773 (assoc-ref %build-inputs "java-htsjdk")
3774 "/share/java/htsjdk/")
3775 "-Dhtsjdk-classes=dist/tmp"
3776 (string-append "-Dhtsjdk-version="
3777 ,(package-version java-htsjdk)))
3778 #:jdk ,icedtea-8
3779 #:phases
3780 (modify-phases %standard-phases
3781 ;; FIXME: this phase fails with "duplicate entry: htsjdk/samtools/AbstractBAMFileIndex$1.class"
3782 (delete 'generate-jar-indices)
3783 (add-after 'unpack 'use-our-htsjdk
3784 (lambda* (#:key inputs #:allow-other-keys)
3785 (substitute* "build.xml"
3786 (("\\$\\{htsjdk\\}/lib")
3787 (string-append (assoc-ref inputs "java-htsjdk")
3788 "/share/java/htsjdk/")))
3789 #t))
3790 (add-after 'unpack 'make-test-target-independent
3791 (lambda* (#:key inputs #:allow-other-keys)
3792 (substitute* "build.xml"
3793 (("name=\"test\" depends=\"compile, ")
3794 "name=\"test\" depends=\""))
3795 #t))
3796 (replace 'install (install-jars "dist")))))
3797 (inputs
3798 `(("java-htsjdk" ,java-htsjdk)
3799 ("java-guava" ,java-guava)))
3800 (native-inputs
3801 `(("java-testng" ,java-testng)))
3802 (home-page "http://broadinstitute.github.io/picard/")
3803 (synopsis "Tools for manipulating high-throughput sequencing data and formats")
3804 (description "Picard is a set of Java command line tools for manipulating
3805 high-throughput sequencing (HTS) data and formats. Picard is implemented
3806 using the HTSJDK Java library to support accessing file formats that are
3807 commonly used for high-throughput sequencing data such as SAM, BAM, CRAM and
3808 VCF.")
3809 (license license:expat)))
3810
3811 ;; This is needed for dropseq-tools
3812 (define-public java-picard-2.10.3
3813 (package
3814 (name "java-picard")
3815 (version "2.10.3")
3816 (source (origin
3817 (method git-fetch)
3818 (uri (git-reference
3819 (url "https://github.com/broadinstitute/picard.git")
3820 (commit version)))
3821 (file-name (string-append "java-picard-" version "-checkout"))
3822 (sha256
3823 (base32
3824 "1ajlx31l6i1k3y2rhnmgq07sz99g2czqfqgkr9mihmdjp3gwjhvi"))))
3825 (build-system ant-build-system)
3826 (arguments
3827 `(#:jar-name "picard.jar"
3828 ;; Tests require jacoco:coverage.
3829 #:tests? #f
3830 #:jdk ,icedtea-8
3831 #:main-class "picard.cmdline.PicardCommandLine"
3832 #:modules ((guix build ant-build-system)
3833 (guix build utils)
3834 (guix build java-utils)
3835 (sxml simple)
3836 (sxml transform)
3837 (sxml xpath))
3838 #:phases
3839 (modify-phases %standard-phases
3840 ;; FIXME: this phase fails with "duplicate entry: htsjdk/samtools/AbstractBAMFileIndex$1.class"
3841 (delete 'generate-jar-indices)
3842 (add-after 'unpack 'remove-useless-build.xml
3843 (lambda _ (delete-file "build.xml") #t))
3844 ;; This is necessary to ensure that htsjdk is found when using
3845 ;; picard.jar as an executable.
3846 (add-before 'build 'edit-classpath-in-manifest
3847 (lambda* (#:key inputs #:allow-other-keys)
3848 (chmod "build.xml" #o664)
3849 (call-with-output-file "build.xml.new"
3850 (lambda (port)
3851 (sxml->xml
3852 (pre-post-order
3853 (with-input-from-file "build.xml"
3854 (lambda _ (xml->sxml #:trim-whitespace? #t)))
3855 `((target . ,(lambda (tag . kids)
3856 (let ((name ((sxpath '(name *text*))
3857 (car kids)))
3858 ;; FIXME: We're breaking the line
3859 ;; early with a dummy path to
3860 ;; ensure that the store reference
3861 ;; isn't broken apart and can still
3862 ;; be found by the reference
3863 ;; scanner.
3864 (msg (format #f
3865 "\
3866 Class-Path: /~a \
3867 ~a/share/java/htsjdk.jar${line.separator}${line.separator}"
3868 ;; maximum line length is 70
3869 (string-tabulate (const #\b) 57)
3870 (assoc-ref inputs "java-htsjdk"))))
3871 (if (member "manifest" name)
3872 `(,tag ,@kids
3873 (replaceregexp
3874 (@ (file "${manifest.file}")
3875 (match "\\r\\n\\r\\n")
3876 (replace "${line.separator}")))
3877 (echo
3878 (@ (message ,msg)
3879 (file "${manifest.file}")
3880 (append "true"))))
3881 `(,tag ,@kids)))))
3882 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
3883 (*text* . ,(lambda (_ txt) txt))))
3884 port)))
3885 (rename-file "build.xml.new" "build.xml")
3886 #t)))))
3887 (propagated-inputs
3888 `(("java-htsjdk" ,java-htsjdk-2.10.1)))
3889 (native-inputs
3890 `(("java-testng" ,java-testng)
3891 ("java-guava" ,java-guava)))
3892 (home-page "http://broadinstitute.github.io/picard/")
3893 (synopsis "Tools for manipulating high-throughput sequencing data and formats")
3894 (description "Picard is a set of Java command line tools for manipulating
3895 high-throughput sequencing (HTS) data and formats. Picard is implemented
3896 using the HTSJDK Java library to support accessing file formats that are
3897 commonly used for high-throughput sequencing data such as SAM, BAM, CRAM and
3898 VCF.")
3899 (license license:expat)))
3900
3901 ;; This is the last version of Picard to provide net.sf.samtools
3902 (define-public java-picard-1.113
3903 (package (inherit java-picard)
3904 (name "java-picard")
3905 (version "1.113")
3906 (source (origin
3907 (method git-fetch)
3908 (uri (git-reference
3909 (url "https://github.com/broadinstitute/picard.git")
3910 (commit version)))
3911 (file-name (string-append "java-picard-" version "-checkout"))
3912 (sha256
3913 (base32
3914 "0lkpvin2fz3hhly4l02kk56fqy8lmlgyzr9kmvljk6ry6l1hw973"))
3915 (modules '((guix build utils)))
3916 (snippet
3917 '(begin
3918 ;; Delete pre-built binaries.
3919 (delete-file-recursively "lib")
3920 (mkdir-p "lib")
3921 #t))))
3922 (build-system ant-build-system)
3923 (arguments
3924 `(#:build-target "picard-jar"
3925 #:test-target "test"
3926 ;; FIXME: the class path at test time is wrong.
3927 ;; [testng] Error: A JNI error has occurred, please check your installation and try again
3928 ;; [testng] Exception in thread "main" java.lang.NoClassDefFoundError: com/beust/jcommander/ParameterException
3929 #:tests? #f
3930 #:jdk ,icedtea-8
3931 ;; This is only used for tests.
3932 #:make-flags
3933 (list "-Dsamjdk.intel_deflater_so_path=lib/jni/libIntelDeflater.so")
3934 #:phases
3935 (modify-phases %standard-phases
3936 ;; FIXME: This phase fails.
3937 (delete 'generate-jar-indices)
3938 ;; Do not use bundled ant bzip2.
3939 (add-after 'unpack 'use-ant-bzip
3940 (lambda* (#:key inputs #:allow-other-keys)
3941 (substitute* "build.xml"
3942 (("\\$\\{lib\\}/apache-ant-1.8.2-bzip2.jar")
3943 (string-append (assoc-ref inputs "ant")
3944 "/lib/ant.jar")))
3945 #t))
3946 (add-after 'unpack 'make-test-target-independent
3947 (lambda* (#:key inputs #:allow-other-keys)
3948 (substitute* "build.xml"
3949 (("name=\"test\" depends=\"compile, ")
3950 "name=\"test\" depends=\"compile-tests, ")
3951 (("name=\"compile\" depends=\"compile-src, compile-tests\"")
3952 "name=\"compile\" depends=\"compile-src\""))
3953 #t))
3954 (add-after 'unpack 'fix-deflater-path
3955 (lambda* (#:key outputs #:allow-other-keys)
3956 (substitute* "src/java/net/sf/samtools/Defaults.java"
3957 (("getStringProperty\\(\"intel_deflater_so_path\", null\\)")
3958 (string-append "getStringProperty(\"intel_deflater_so_path\", \""
3959 (assoc-ref outputs "out")
3960 "/lib/jni/libIntelDeflater.so"
3961 "\")")))
3962 #t))
3963 ;; Build the deflater library, because we've previously deleted the
3964 ;; pre-built one. This can only be built with access to the JDK
3965 ;; sources.
3966 (add-after 'build 'build-jni
3967 (lambda* (#:key inputs #:allow-other-keys)
3968 (mkdir-p "lib/jni")
3969 (mkdir-p "jdk-src")
3970 (invoke "tar" "--strip-components=1" "-C" "jdk-src"
3971 "-xf" (assoc-ref inputs "jdk-src"))
3972 (invoke "javah" "-jni"
3973 "-classpath" "classes"
3974 "-d" "lib/"
3975 "net.sf.samtools.util.zip.IntelDeflater")
3976 (with-directory-excursion "src/c/inteldeflater"
3977 (invoke "gcc" "-I../../../lib" "-I."
3978 (string-append "-I" (assoc-ref inputs "jdk")
3979 "/include/linux")
3980 "-I../../../jdk-src/src/share/native/common/"
3981 "-I../../../jdk-src/src/solaris/native/common/"
3982 "-c" "-O3" "-fPIC" "IntelDeflater.c")
3983 (invoke "gcc" "-shared"
3984 "-o" "../../../lib/jni/libIntelDeflater.so"
3985 "IntelDeflater.o" "-lz" "-lstdc++"))
3986 #t))
3987 ;; We can only build everything else after building the JNI library.
3988 (add-after 'build-jni 'build-rest
3989 (lambda* (#:key make-flags #:allow-other-keys)
3990 (apply invoke `("ant" "all" ,@make-flags))
3991 #t))
3992 (add-before 'build 'set-JAVA6_HOME
3993 (lambda _
3994 (setenv "JAVA6_HOME" (getenv "JAVA_HOME"))
3995 #t))
3996 (replace 'install (install-jars "dist"))
3997 (add-after 'install 'install-jni-lib
3998 (lambda* (#:key outputs #:allow-other-keys)
3999 (let ((jni (string-append (assoc-ref outputs "out")
4000 "/lib/jni")))
4001 (mkdir-p jni)
4002 (install-file "lib/jni/libIntelDeflater.so" jni)
4003 #t))))))
4004 (inputs
4005 `(("java-snappy-1" ,java-snappy-1)
4006 ("java-commons-jexl-2" ,java-commons-jexl-2)
4007 ("java-cofoja" ,java-cofoja)
4008 ("ant" ,ant) ; for bzip2 support at runtime
4009 ("zlib" ,zlib)))
4010 (native-inputs
4011 `(("ant-apache-bcel" ,ant-apache-bcel)
4012 ("ant-junit" ,ant-junit)
4013 ("java-testng" ,java-testng)
4014 ("java-commons-bcel" ,java-commons-bcel)
4015 ("java-jcommander" ,java-jcommander)
4016 ("jdk" ,icedtea-8 "jdk")
4017 ("jdk-src" ,(car (assoc-ref (package-native-inputs icedtea-8) "jdk-drop")))))))
4018
4019 (define-public fastqc
4020 (package
4021 (name "fastqc")
4022 (version "0.11.5")
4023 (source
4024 (origin
4025 (method url-fetch)
4026 (uri (string-append "http://www.bioinformatics.babraham.ac.uk/"
4027 "projects/fastqc/fastqc_v"
4028 version "_source.zip"))
4029 (sha256
4030 (base32
4031 "18rrlkhcrxvvvlapch4dpj6xc6mpayzys8qfppybi8jrpgx5cc5f"))))
4032 (build-system ant-build-system)
4033 (arguments
4034 `(#:tests? #f ; there are no tests
4035 #:build-target "build"
4036 #:phases
4037 (modify-phases %standard-phases
4038 (add-after 'unpack 'fix-dependencies
4039 (lambda* (#:key inputs #:allow-other-keys)
4040 (substitute* "build.xml"
4041 (("jbzip2-0.9.jar")
4042 (string-append (assoc-ref inputs "java-jbzip2")
4043 "/share/java/jbzip2.jar"))
4044 (("sam-1.103.jar")
4045 (string-append (assoc-ref inputs "java-picard-1.113")
4046 "/share/java/sam-1.112.jar"))
4047 (("cisd-jhdf5.jar")
4048 (string-append (assoc-ref inputs "java-cisd-jhdf5")
4049 "/share/java/sis-jhdf5.jar")))
4050 #t))
4051 ;; There is no installation target
4052 (replace 'install
4053 (lambda* (#:key inputs outputs #:allow-other-keys)
4054 (let* ((out (assoc-ref outputs "out"))
4055 (bin (string-append out "/bin"))
4056 (share (string-append out "/share/fastqc/"))
4057 (exe (string-append share "/fastqc")))
4058 (for-each mkdir-p (list bin share))
4059 (copy-recursively "bin" share)
4060 (substitute* exe
4061 (("my \\$java_bin = 'java';")
4062 (string-append "my $java_bin = '"
4063 (assoc-ref inputs "java")
4064 "/bin/java';")))
4065 (chmod exe #o555)
4066 (symlink exe (string-append bin "/fastqc"))
4067 #t))))))
4068 (inputs
4069 `(("java" ,icedtea)
4070 ("perl" ,perl) ; needed for the wrapper script
4071 ("java-cisd-jhdf5" ,java-cisd-jhdf5)
4072 ("java-picard-1.113" ,java-picard-1.113)
4073 ("java-jbzip2" ,java-jbzip2)))
4074 (native-inputs
4075 `(("unzip" ,unzip)))
4076 (home-page "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/")
4077 (synopsis "Quality control tool for high throughput sequence data")
4078 (description
4079 "FastQC aims to provide a simple way to do some quality control
4080 checks on raw sequence data coming from high throughput sequencing
4081 pipelines. It provides a modular set of analyses which you can use to
4082 give a quick impression of whether your data has any problems of which
4083 you should be aware before doing any further analysis.
4084
4085 The main functions of FastQC are:
4086
4087 @itemize
4088 @item Import of data from BAM, SAM or FastQ files (any variant);
4089 @item Providing a quick overview to tell you in which areas there may
4090 be problems;
4091 @item Summary graphs and tables to quickly assess your data;
4092 @item Export of results to an HTML based permanent report;
4093 @item Offline operation to allow automated generation of reports
4094 without running the interactive application.
4095 @end itemize\n")
4096 (license license:gpl3+)))
4097
4098 (define-public fastp
4099 (package
4100 (name "fastp")
4101 (version "0.14.1")
4102 (source
4103 (origin
4104 (method git-fetch)
4105 (uri (git-reference
4106 (url "https://github.com/OpenGene/fastp.git")
4107 (commit (string-append "v" version))))
4108 (file-name (git-file-name name version))
4109 (sha256
4110 (base32
4111 "1r6ms5zbf5rps4rgp4z73nczadl00b5rqylw8f684isfz27dp0xh"))))
4112 (build-system gnu-build-system)
4113 (arguments
4114 `(#:tests? #f ; there are none
4115 #:make-flags
4116 (list (string-append "BINDIR=" (assoc-ref %outputs "out") "/bin"))
4117 #:phases
4118 (modify-phases %standard-phases
4119 (delete 'configure)
4120 (add-before 'install 'create-target-dir
4121 (lambda* (#:key outputs #:allow-other-keys)
4122 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
4123 #t)))))
4124 (inputs
4125 `(("zlib" ,zlib)))
4126 (home-page "https://github.com/OpenGene/fastp/")
4127 (synopsis "All-in-one FastQ preprocessor")
4128 (description
4129 "Fastp is a tool designed to provide fast all-in-one preprocessing for
4130 FastQ files. This tool has multi-threading support to afford high
4131 performance.")
4132 (license license:expat)))
4133
4134 (define-public htslib
4135 (package
4136 (name "htslib")
4137 (version "1.9")
4138 (source (origin
4139 (method url-fetch)
4140 (uri (string-append
4141 "https://github.com/samtools/htslib/releases/download/"
4142 version "/htslib-" version ".tar.bz2"))
4143 (sha256
4144 (base32
4145 "16ljv43sc3fxmv63w7b2ff8m1s7h89xhazwmbm1bicz8axq8fjz0"))))
4146 (build-system gnu-build-system)
4147 (inputs
4148 `(("curl" ,curl)
4149 ("openssl" ,openssl)))
4150 ;; This is referred to in the pkg-config file as a required library.
4151 (propagated-inputs
4152 `(("zlib" ,zlib)))
4153 (native-inputs
4154 `(("perl" ,perl)))
4155 (home-page "https://www.htslib.org")
4156 (synopsis "C library for reading/writing high-throughput sequencing data")
4157 (description
4158 "HTSlib is a C library for reading/writing high-throughput sequencing
4159 data. It also provides the @command{bgzip}, @command{htsfile}, and
4160 @command{tabix} utilities.")
4161 ;; Files under cram/ are released under the modified BSD license;
4162 ;; the rest is released under the Expat license
4163 (license (list license:expat license:bsd-3))))
4164
4165 ;; This package should be removed once no packages rely upon it.
4166 (define htslib-1.3
4167 (package
4168 (inherit htslib)
4169 (version "1.3.1")
4170 (source (origin
4171 (method url-fetch)
4172 (uri (string-append
4173 "https://github.com/samtools/htslib/releases/download/"
4174 version "/htslib-" version ".tar.bz2"))
4175 (sha256
4176 (base32
4177 "1rja282fwdc25ql6izkhdyh8ppw8x2fs0w0js78zgkmqjlikmma9"))))))
4178
4179 (define-public idr
4180 (package
4181 (name "idr")
4182 (version "2.0.3")
4183 (source (origin
4184 (method git-fetch)
4185 (uri (git-reference
4186 (url "https://github.com/nboley/idr.git")
4187 (commit version)))
4188 (file-name (git-file-name name version))
4189 (sha256
4190 (base32
4191 "04j876h6z444v2q79drxx283d3k5snd72kj895wbalnl42206x9g"))
4192 ;; Delete generated C code.
4193 (snippet
4194 '(begin (delete-file "idr/inv_cdf.c") #t))))
4195 (build-system python-build-system)
4196 ;; There is only one test ("test_inv_cdf.py") and it tests features that
4197 ;; are no longer part of this package. It also asserts False, which
4198 ;; causes the tests to always fail.
4199 (arguments `(#:tests? #f))
4200 (propagated-inputs
4201 `(("python-scipy" ,python-scipy)
4202 ("python-sympy" ,python-sympy)
4203 ("python-numpy" ,python-numpy)
4204 ("python-matplotlib" ,python-matplotlib)))
4205 (native-inputs
4206 `(("python-cython" ,python-cython)))
4207 (home-page "https://github.com/nboley/idr")
4208 (synopsis "Tool to measure the irreproducible discovery rate (IDR)")
4209 (description
4210 "The IDR (Irreproducible Discovery Rate) framework is a unified approach
4211 to measure the reproducibility of findings identified from replicate
4212 experiments and provide highly stable thresholds based on reproducibility.")
4213 (license license:gpl2+)))
4214
4215 (define-public jellyfish
4216 (package
4217 (name "jellyfish")
4218 (version "2.2.10")
4219 (source (origin
4220 (method url-fetch)
4221 (uri (string-append "https://github.com/gmarcais/Jellyfish/"
4222 "releases/download/v" version
4223 "/jellyfish-" version ".tar.gz"))
4224 (sha256
4225 (base32
4226 "1k4pc3fvv6w1km2yph4m5sd78fbxp21d6xyzgmy0gjihzc6mb249"))))
4227 (build-system gnu-build-system)
4228 (outputs '("out" ;for library
4229 "ruby" ;for Ruby bindings
4230 "python")) ;for Python bindings
4231 (arguments
4232 `(#:configure-flags
4233 (list (string-append "--enable-ruby-binding="
4234 (assoc-ref %outputs "ruby"))
4235 (string-append "--enable-python-binding="
4236 (assoc-ref %outputs "python")))
4237 #:phases
4238 (modify-phases %standard-phases
4239 (add-before 'check 'set-SHELL-variable
4240 (lambda _
4241 ;; generator_manager.hpp either uses /bin/sh or $SHELL
4242 ;; to run tests.
4243 (setenv "SHELL" (which "bash"))
4244 #t)))))
4245 (native-inputs
4246 `(("bc" ,bc)
4247 ("time" ,time)
4248 ("ruby" ,ruby)
4249 ("python" ,python-2)
4250 ("pkg-config" ,pkg-config)))
4251 (inputs
4252 `(("htslib" ,htslib)))
4253 (synopsis "Tool for fast counting of k-mers in DNA")
4254 (description
4255 "Jellyfish is a tool for fast, memory-efficient counting of k-mers in
4256 DNA. A k-mer is a substring of length k, and counting the occurrences of all
4257 such substrings is a central step in many analyses of DNA sequence. Jellyfish
4258 is a command-line program that reads FASTA and multi-FASTA files containing
4259 DNA sequences. It outputs its k-mer counts in a binary format, which can be
4260 translated into a human-readable text format using the @code{jellyfish dump}
4261 command, or queried for specific k-mers with @code{jellyfish query}.")
4262 (home-page "http://www.genome.umd.edu/jellyfish.html")
4263 ;; JELLYFISH seems to be 64-bit only.
4264 (supported-systems '("x86_64-linux" "aarch64-linux" "mips64el-linux"))
4265 ;; The combined work is published under the GPLv3 or later. Individual
4266 ;; files such as lib/jsoncpp.cpp are released under the Expat license.
4267 (license (list license:gpl3+ license:expat))))
4268
4269 (define-public khmer
4270 (package
4271 (name "khmer")
4272 (version "3.0.0a3")
4273 (source
4274 (origin
4275 (method git-fetch)
4276 (uri (git-reference
4277 (url "https://github.com/dib-lab/khmer.git")
4278 (commit (string-append "v" version))))
4279 (file-name (git-file-name name version))
4280 (sha256
4281 (base32
4282 "01l4jczglkl7yfhgvzx8j0df7k54bk1r8sli9ll16i1mis0d8f37"))
4283 (modules '((guix build utils)))
4284 (snippet
4285 '(begin
4286 ;; Delete bundled libraries. We do not replace the bundled seqan
4287 ;; as it is a modified subset of the old version 1.4.1.
4288 ;;
4289 ;; We do not replace the bundled MurmurHash as the canonical
4290 ;; repository for this code 'SMHasher' is unsuitable for providing
4291 ;; a library. See
4292 ;; https://lists.gnu.org/archive/html/guix-devel/2016-06/msg00977.html
4293 (delete-file-recursively "third-party/zlib")
4294 (delete-file-recursively "third-party/bzip2")
4295 (delete-file-recursively "third-party/seqan")
4296 (substitute* "setup.cfg"
4297 (("# libraries = z,bz2")
4298 "libraries = z,bz2")
4299 (("include:third-party/zlib:third-party/bzip2")
4300 "include:"))
4301 #t))))
4302 (build-system python-build-system)
4303 (arguments
4304 `(#:phases
4305 (modify-phases %standard-phases
4306 (add-after 'unpack 'set-cc
4307 (lambda _ (setenv "CC" "gcc") #t))
4308
4309 (add-before 'reset-gzip-timestamps 'make-files-writable
4310 (lambda* (#:key outputs #:allow-other-keys)
4311 ;; Make sure .gz files are writable so that the
4312 ;; 'reset-gzip-timestamps' phase can do its work.
4313 (let ((out (assoc-ref outputs "out")))
4314 (for-each make-file-writable
4315 (find-files out "\\.gz$"))
4316 #t))))))
4317 (native-inputs
4318 `(("python-cython" ,python-cython)
4319 ("python-pytest" ,python-pytest)
4320 ("python-pytest-runner" ,python-pytest-runner)))
4321 (inputs
4322 `(("zlib" ,zlib)
4323 ("bzip2" ,bzip2)
4324 ("seqan" ,seqan-1)
4325 ("python-screed" ,python-screed)
4326 ("python-bz2file" ,python-bz2file)))
4327 (home-page "https://khmer.readthedocs.org/")
4328 (synopsis "K-mer counting, filtering and graph traversal library")
4329 (description "The khmer software is a set of command-line tools for
4330 working with DNA shotgun sequencing data from genomes, transcriptomes,
4331 metagenomes and single cells. Khmer can make de novo assemblies faster, and
4332 sometimes better. Khmer can also identify and fix problems with shotgun
4333 data.")
4334 ;; When building on i686, armhf and mips64el, we get the following error:
4335 ;; error: ['khmer', 'khmer.tests', 'oxli'] require 64-bit operating system
4336 (supported-systems '("x86_64-linux" "aarch64-linux"))
4337 (license license:bsd-3)))
4338
4339 (define-public kaiju
4340 (package
4341 (name "kaiju")
4342 (version "1.6.3")
4343 (source (origin
4344 (method git-fetch)
4345 (uri (git-reference
4346 (url "https://github.com/bioinformatics-centre/kaiju")
4347 (commit (string-append "v" version))))
4348 (file-name (git-file-name name version))
4349 (sha256
4350 (base32
4351 "119pzi0ddzv9mjg4wwa6han0cwr3k3ssn7kirvsjfcq05mi5ka0x"))))
4352 (build-system gnu-build-system)
4353 (arguments
4354 `(#:tests? #f ; There are no tests.
4355 #:phases
4356 (modify-phases %standard-phases
4357 (delete 'configure)
4358 (add-before 'build 'move-to-src-dir
4359 (lambda _ (chdir "src") #t))
4360 (replace 'install
4361 (lambda* (#:key inputs outputs #:allow-other-keys)
4362 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
4363 (mkdir-p bin)
4364 (chdir "..")
4365 (copy-recursively "bin" bin))
4366 #t)))))
4367 (inputs
4368 `(("perl" ,perl)
4369 ("zlib" ,zlib)))
4370 (home-page "http://kaiju.binf.ku.dk/")
4371 (synopsis "Fast and sensitive taxonomic classification for metagenomics")
4372 (description "Kaiju is a program for sensitive taxonomic classification
4373 of high-throughput sequencing reads from metagenomic whole genome sequencing
4374 experiments.")
4375 (license license:gpl3+)))
4376
4377 (define-public macs
4378 (package
4379 (name "macs")
4380 (version "2.2.6")
4381 (source (origin
4382 ;; The PyPi tarball does not contain tests.
4383 (method git-fetch)
4384 (uri (git-reference
4385 (url "https://github.com/taoliu/MACS.git")
4386 (commit (string-append "v" version))))
4387 (file-name (git-file-name name version))
4388 (sha256
4389 (base32
4390 "1c5gxr0mk6hkd4vclf0k00wvyvzw2vrmk52c85338p7aqjwg6n15"))))
4391 (build-system python-build-system)
4392 (arguments
4393 `(#:phases
4394 (modify-phases %standard-phases
4395 (delete 'check)
4396 (add-after 'install 'check
4397 (lambda* (#:key inputs outputs #:allow-other-keys)
4398 (add-installed-pythonpath inputs outputs)
4399 (invoke "pytest" "-v"))))))
4400 (inputs
4401 `(("python-numpy" ,python-numpy)))
4402 (native-inputs
4403 `(("python-pytest" ,python-pytest)))
4404 (home-page "https://github.com/taoliu/MACS/")
4405 (synopsis "Model based analysis for ChIP-Seq data")
4406 (description
4407 "MACS is an implementation of a ChIP-Seq analysis algorithm for
4408 identifying transcript factor binding sites named Model-based Analysis of
4409 ChIP-Seq (MACS). MACS captures the influence of genome complexity to evaluate
4410 the significance of enriched ChIP regions and it improves the spatial
4411 resolution of binding sites through combining the information of both
4412 sequencing tag position and orientation.")
4413 (license license:bsd-3)))
4414
4415 (define-public mafft
4416 (package
4417 (name "mafft")
4418 (version "7.394")
4419 (source (origin
4420 (method url-fetch)
4421 (uri (string-append
4422 "https://mafft.cbrc.jp/alignment/software/mafft-" version
4423 "-without-extensions-src.tgz"))
4424 (file-name (string-append name "-" version ".tgz"))
4425 (sha256
4426 (base32
4427 "0bacjkxfg944p5khhyh5rd4y7wkjc9qk4v2jjj442sqlq0f8ar7b"))))
4428 (build-system gnu-build-system)
4429 (arguments
4430 `(#:tests? #f ; no automated tests, though there are tests in the read me
4431 #:make-flags (let ((out (assoc-ref %outputs "out")))
4432 (list (string-append "PREFIX=" out)
4433 (string-append "BINDIR="
4434 (string-append out "/bin"))))
4435 #:phases
4436 (modify-phases %standard-phases
4437 (add-after 'unpack 'enter-dir
4438 (lambda _ (chdir "core") #t))
4439 (add-after 'enter-dir 'patch-makefile
4440 (lambda _
4441 ;; on advice from the MAFFT authors, there is no need to
4442 ;; distribute mafft-profile, mafft-distance, or
4443 ;; mafft-homologs.rb as they are too "specialised".
4444 (substitute* "Makefile"
4445 ;; remove mafft-homologs.rb from SCRIPTS
4446 (("^SCRIPTS = mafft mafft-homologs.rb")
4447 "SCRIPTS = mafft")
4448 ;; remove mafft-homologs from MANPAGES
4449 (("^MANPAGES = mafft.1 mafft-homologs.1")
4450 "MANPAGES = mafft.1")
4451 ;; remove mafft-distance from PROGS
4452 (("^PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance")
4453 "PROGS = dvtditr dndfast7 dndblast sextet5")
4454 ;; remove mafft-profile from PROGS
4455 (("splittbfast disttbfast tbfast mafft-profile 2cl mccaskillwrap")
4456 "splittbfast disttbfast tbfast f2cl mccaskillwrap")
4457 (("^rm -f mafft-profile mafft-profile.exe") "#")
4458 (("^rm -f mafft-distance mafft-distance.exe") ")#")
4459 ;; do not install MAN pages in libexec folder
4460 (("^\t\\$\\(INSTALL\\) -m 644 \\$\\(MANPAGES\\) \
4461 \\$\\(DESTDIR\\)\\$\\(LIBDIR\\)") "#"))
4462 #t))
4463 (add-after 'enter-dir 'patch-paths
4464 (lambda* (#:key inputs #:allow-other-keys)
4465 (substitute* '("pairash.c"
4466 "mafft.tmpl")
4467 (("perl") (which "perl"))
4468 (("([\"`| ])awk" _ prefix)
4469 (string-append prefix (which "awk")))
4470 (("grep") (which "grep")))
4471 #t))
4472 (delete 'configure)
4473 (add-after 'install 'wrap-programs
4474 (lambda* (#:key outputs #:allow-other-keys)
4475 (let* ((out (assoc-ref outputs "out"))
4476 (bin (string-append out "/bin"))
4477 (path (string-append
4478 (assoc-ref %build-inputs "coreutils") "/bin:")))
4479 (for-each (lambda (file)
4480 (wrap-program file
4481 `("PATH" ":" prefix (,path))))
4482 (find-files bin)))
4483 #t)))))
4484 (inputs
4485 `(("perl" ,perl)
4486 ("ruby" ,ruby)
4487 ("gawk" ,gawk)
4488 ("grep" ,grep)
4489 ("coreutils" ,coreutils)))
4490 (home-page "http://mafft.cbrc.jp/alignment/software/")
4491 (synopsis "Multiple sequence alignment program")
4492 (description
4493 "MAFFT offers a range of multiple alignment methods for nucleotide and
4494 protein sequences. For instance, it offers L-INS-i (accurate; for alignment
4495 of <~200 sequences) and FFT-NS-2 (fast; for alignment of <~30,000
4496 sequences).")
4497 (license (license:non-copyleft
4498 "http://mafft.cbrc.jp/alignment/software/license.txt"
4499 "BSD-3 with different formatting"))))
4500
4501 (define-public mash
4502 (package
4503 (name "mash")
4504 (version "2.1")
4505 (source (origin
4506 (method git-fetch)
4507 (uri (git-reference
4508 (url "https://github.com/marbl/mash.git")
4509 (commit (string-append "v" version))))
4510 (file-name (git-file-name name version))
4511 (sha256
4512 (base32
4513 "049hwcc059p2fd9vwndn63laifvvsi0wmv84i6y1fr79k15dxwy6"))
4514 (modules '((guix build utils)))
4515 (snippet
4516 '(begin
4517 ;; Delete bundled kseq.
4518 ;; TODO: Also delete bundled murmurhash and open bloom filter.
4519 (delete-file "src/mash/kseq.h")
4520 #t))))
4521 (build-system gnu-build-system)
4522 (arguments
4523 `(#:tests? #f ; No tests.
4524 #:configure-flags
4525 (list
4526 (string-append "--with-capnp=" (assoc-ref %build-inputs "capnproto"))
4527 (string-append "--with-gsl=" (assoc-ref %build-inputs "gsl")))
4528 #:make-flags (list "CC=gcc")
4529 #:phases
4530 (modify-phases %standard-phases
4531 (add-after 'unpack 'fix-includes
4532 (lambda _
4533 (substitute* '("src/mash/Sketch.cpp"
4534 "src/mash/CommandFind.cpp"
4535 "src/mash/CommandScreen.cpp")
4536 (("^#include \"kseq\\.h\"")
4537 "#include \"htslib/kseq.h\""))
4538 #t))
4539 (add-after 'fix-includes 'use-c++14
4540 (lambda _
4541 ;; capnproto 0.7 requires c++14 to build
4542 (substitute* "configure.ac"
4543 (("c\\+\\+11") "c++14"))
4544 (substitute* "Makefile.in"
4545 (("c\\+\\+11") "c++14"))
4546 #t)))))
4547 (native-inputs
4548 `(("autoconf" ,autoconf)
4549 ;; Capnproto and htslib are statically embedded in the final
4550 ;; application. Therefore we also list their licenses, below.
4551 ("capnproto" ,capnproto)
4552 ("htslib" ,htslib)))
4553 (inputs
4554 `(("gsl" ,gsl)
4555 ("zlib" ,zlib)))
4556 (supported-systems '("x86_64-linux"))
4557 (home-page "https://mash.readthedocs.io")
4558 (synopsis "Fast genome and metagenome distance estimation using MinHash")
4559 (description "Mash is a fast sequence distance estimator that uses the
4560 MinHash algorithm and is designed to work with genomes and metagenomes in the
4561 form of assemblies or reads.")
4562 (license (list license:bsd-3 ; Mash
4563 license:expat ; HTSlib and capnproto
4564 license:public-domain ; MurmurHash 3
4565 license:cpl1.0)))) ; Open Bloom Filter
4566
4567 (define-public metabat
4568 (package
4569 (name "metabat")
4570 (version "2.12.1")
4571 (source
4572 (origin
4573 (method git-fetch)
4574 (uri (git-reference
4575 (url "https://bitbucket.org/berkeleylab/metabat.git")
4576 (commit (string-append "v" version))))
4577 (file-name (git-file-name name version))
4578 (sha256
4579 (base32
4580 "0hyg2smw1nz69mfvjpk45xyyychmda92c80a0cv7baji84ri4iyn"))
4581 (patches (search-patches "metabat-fix-compilation.patch"))))
4582 (build-system scons-build-system)
4583 (arguments
4584 `(#:scons ,scons-python2
4585 #:scons-flags
4586 (list (string-append "PREFIX=" (assoc-ref %outputs "out"))
4587 (string-append "BOOST_ROOT=" (assoc-ref %build-inputs "boost")))
4588 #:tests? #f ;; Tests are run during the build phase.
4589 #:phases
4590 (modify-phases %standard-phases
4591 (add-after 'unpack 'fix-includes
4592 (lambda _
4593 (substitute* "src/BamUtils.h"
4594 (("^#include \"bam/bam\\.h\"")
4595 "#include \"samtools/bam.h\"")
4596 (("^#include \"bam/sam\\.h\"")
4597 "#include \"samtools/sam.h\""))
4598 (substitute* "src/KseqReader.h"
4599 (("^#include \"bam/kseq\\.h\"")
4600 "#include \"htslib/kseq.h\""))
4601 #t))
4602 (add-after 'unpack 'fix-scons
4603 (lambda* (#:key inputs #:allow-other-keys)
4604 (substitute* "SConstruct"
4605 (("^htslib_dir += 'samtools'")
4606 (string-append "htslib_dir = '"
4607 (assoc-ref inputs "htslib")
4608 "'"))
4609 (("^samtools_dir = 'samtools'")
4610 (string-append "samtools_dir = '"
4611 (assoc-ref inputs "samtools")
4612 "'"))
4613 (("^findStaticOrShared\\('bam', hts_lib")
4614 (string-append "findStaticOrShared('bam', '"
4615 (assoc-ref inputs "samtools")
4616 "/lib'"))
4617 ;; Do not distribute README.
4618 (("^env\\.Install\\(idir_prefix, 'README\\.md'\\)") ""))
4619 #t)))))
4620 (inputs
4621 `(("zlib" ,zlib)
4622 ("perl" ,perl)
4623 ("samtools" ,samtools)
4624 ("htslib" ,htslib)
4625 ("boost" ,boost)))
4626 (home-page "https://bitbucket.org/berkeleylab/metabat")
4627 (synopsis
4628 "Reconstruction of single genomes from complex microbial communities")
4629 (description
4630 "Grouping large genomic fragments assembled from shotgun metagenomic
4631 sequences to deconvolute complex microbial communities, or metagenome binning,
4632 enables the study of individual organisms and their interactions. MetaBAT is
4633 an automated metagenome binning software, which integrates empirical
4634 probabilistic distances of genome abundance and tetranucleotide frequency.")
4635 ;; The source code contains inline assembly.
4636 (supported-systems '("x86_64-linux" "i686-linux"))
4637 (license (license:non-copyleft "file://license.txt"
4638 "See license.txt in the distribution."))))
4639
4640 (define-public minced
4641 (package
4642 (name "minced")
4643 (version "0.3.2")
4644 (source (origin
4645 (method git-fetch)
4646 (uri (git-reference
4647 (url "https://github.com/ctSkennerton/minced.git")
4648 (commit version)))
4649 (file-name (git-file-name name version))
4650 (sha256
4651 (base32
4652 "1f5h9him0gd355cnx7p6pnxpknhckd4g0v62mg8zyhfbx9as25fv"))))
4653 (build-system gnu-build-system)
4654 (arguments
4655 `(#:test-target "test"
4656 #:phases
4657 (modify-phases %standard-phases
4658 (delete 'configure)
4659 (add-before 'check 'fix-test
4660 (lambda _
4661 ;; Fix test for latest version.
4662 (substitute* "t/Aquifex_aeolicus_VF5.expected"
4663 (("minced:0.1.6") "minced:0.2.0"))
4664 #t))
4665 (replace 'install ; No install target.
4666 (lambda* (#:key inputs outputs #:allow-other-keys)
4667 (let* ((out (assoc-ref outputs "out"))
4668 (bin (string-append out "/bin"))
4669 (wrapper (string-append bin "/minced")))
4670 ;; Minced comes with a wrapper script that tries to figure out where
4671 ;; it is located before running the JAR. Since these paths are known
4672 ;; to us, we build our own wrapper to avoid coreutils dependency.
4673 (install-file "minced.jar" bin)
4674 (with-output-to-file wrapper
4675 (lambda _
4676 (display
4677 (string-append
4678 "#!" (assoc-ref inputs "bash") "/bin/sh\n\n"
4679 (assoc-ref inputs "jre") "/bin/java -jar "
4680 bin "/minced.jar \"$@\"\n"))))
4681 (chmod wrapper #o555))
4682 #t)))))
4683 (native-inputs
4684 `(("jdk" ,icedtea "jdk")))
4685 (inputs
4686 `(("bash" ,bash)
4687 ("jre" ,icedtea "out")))
4688 (home-page "https://github.com/ctSkennerton/minced")
4689 (synopsis "Mining CRISPRs in Environmental Datasets")
4690 (description
4691 "MinCED is a program to find Clustered Regularly Interspaced Short
4692 Palindromic Repeats (CRISPRs) in DNA sequences. It can be used for
4693 unassembled metagenomic reads, but is mainly designed for full genomes and
4694 assembled metagenomic sequence.")
4695 (license license:gpl3+)))
4696
4697 (define-public miso
4698 (package
4699 (name "miso")
4700 (version "0.5.4")
4701 (source (origin
4702 (method url-fetch)
4703 (uri (pypi-uri "misopy" version))
4704 (sha256
4705 (base32
4706 "1z3x0vd8ma7pdrnywj7i3kgwl89sdkwrrn62zl7r5calqaq2hyip"))
4707 (modules '((guix build utils)))
4708 (snippet '(begin
4709 (substitute* "setup.py"
4710 ;; Use setuptools, or else the executables are not
4711 ;; installed.
4712 (("distutils.core") "setuptools")
4713 ;; Use "gcc" instead of "cc" for compilation.
4714 (("^defines")
4715 "cc.set_executables(
4716 compiler='gcc',
4717 compiler_so='gcc',
4718 linker_exe='gcc',
4719 linker_so='gcc -shared'); defines"))
4720 #t))))
4721 (build-system python-build-system)
4722 (arguments
4723 `(#:python ,python-2 ; only Python 2 is supported
4724 #:tests? #f)) ; no "test" target
4725 (inputs
4726 `(("samtools" ,samtools)
4727 ("python-numpy" ,python2-numpy)
4728 ("python-pysam" ,python2-pysam)
4729 ("python-scipy" ,python2-scipy)
4730 ("python-matplotlib" ,python2-matplotlib)))
4731 (native-inputs
4732 `(("python-mock" ,python2-mock) ; for tests
4733 ("python-pytz" ,python2-pytz))) ; for tests
4734 (home-page "https://www.genes.mit.edu/burgelab/miso/index.html")
4735 (synopsis "Mixture of Isoforms model for RNA-Seq isoform quantitation")
4736 (description
4737 "MISO (Mixture-of-Isoforms) is a probabilistic framework that quantitates
4738 the expression level of alternatively spliced genes from RNA-Seq data, and
4739 identifies differentially regulated isoforms or exons across samples. By
4740 modeling the generative process by which reads are produced from isoforms in
4741 RNA-Seq, the MISO model uses Bayesian inference to compute the probability
4742 that a read originated from a particular isoform.")
4743 (license license:gpl2)))
4744
4745 (define-public muscle
4746 (package
4747 (name "muscle")
4748 (version "3.8.1551")
4749 (source (origin
4750 (method url-fetch/tarbomb)
4751 (uri (string-append
4752 "http://www.drive5.com/muscle/muscle_src_"
4753 version ".tar.gz"))
4754 (sha256
4755 (base32
4756 "0bj8kj7sdizy3987zx6w7axihk40fk8rn76mpbqqjcnd64i5a367"))))
4757 (build-system gnu-build-system)
4758 (arguments
4759 `(#:make-flags (list "LDLIBS = -lm")
4760 #:phases
4761 (modify-phases %standard-phases
4762 (delete 'configure)
4763 (replace 'check
4764 ;; There are no tests, so just test if it runs.
4765 (lambda _ (invoke "./muscle" "-version") #t))
4766 (replace 'install
4767 (lambda* (#:key outputs #:allow-other-keys)
4768 (let* ((out (assoc-ref outputs "out"))
4769 (bin (string-append out "/bin")))
4770 (install-file "muscle" bin)
4771 #t))))))
4772 (home-page "http://www.drive5.com/muscle")
4773 (synopsis "Multiple sequence alignment program")
4774 (description
4775 "MUSCLE aims to be a fast and accurate multiple sequence alignment
4776 program for nucleotide and protein sequences.")
4777 ;; License information found in 'muscle -h' and usage.cpp.
4778 (license license:public-domain)))
4779
4780 (define-public newick-utils
4781 ;; There are no recent releases so we package from git.
4782 (let ((commit "da121155a977197cab9fbb15953ca1b40b11eb87"))
4783 (package
4784 (name "newick-utils")
4785 (version (string-append "1.6-1." (string-take commit 8)))
4786 (source (origin
4787 (method git-fetch)
4788 (uri (git-reference
4789 (url "https://github.com/tjunier/newick_utils.git")
4790 (commit commit)))
4791 (file-name (string-append name "-" version "-checkout"))
4792 (sha256
4793 (base32
4794 "1hkw21rq1mwf7xp0rmbb2gqc0i6p11108m69i7mr7xcjl268pxnb"))))
4795 (build-system gnu-build-system)
4796 (inputs
4797 ;; XXX: TODO: Enable Lua and Guile bindings.
4798 ;; https://github.com/tjunier/newick_utils/issues/13
4799 `(("libxml2" ,libxml2)
4800 ("flex" ,flex)
4801 ("bison" ,bison)))
4802 (native-inputs
4803 `(("autoconf" ,autoconf)
4804 ("automake" ,automake)
4805 ("libtool" ,libtool)))
4806 (synopsis "Programs for working with newick format phylogenetic trees")
4807 (description
4808 "Newick-utils is a suite of utilities for processing phylogenetic trees
4809 in Newick format. Functions include re-rooting, extracting subtrees,
4810 trimming, pruning, condensing, drawing (ASCII graphics or SVG).")
4811 (home-page "https://github.com/tjunier/newick_utils")
4812 (license license:bsd-3))))
4813
4814 (define-public orfm
4815 (package
4816 (name "orfm")
4817 (version "0.7.1")
4818 (source (origin
4819 (method url-fetch)
4820 (uri (string-append
4821 "https://github.com/wwood/OrfM/releases/download/v"
4822 version "/orfm-" version ".tar.gz"))
4823 (sha256
4824 (base32
4825 "16iigyr2gd8x0imzkk1dr3k5xsds9bpmwg31ayvjg0f4pir9rwqr"))))
4826 (build-system gnu-build-system)
4827 (inputs `(("zlib" ,zlib)))
4828 (native-inputs
4829 `(("ruby-bio-commandeer" ,ruby-bio-commandeer)
4830 ("ruby-rspec" ,ruby-rspec)
4831 ("ruby" ,ruby)))
4832 (synopsis "Simple and not slow open reading frame (ORF) caller")
4833 (description
4834 "An ORF caller finds stretches of DNA that, when translated, are not
4835 interrupted by stop codons. OrfM finds and prints these ORFs.")
4836 (home-page "https://github.com/wwood/OrfM")
4837 (license license:lgpl3+)))
4838
4839 (define-public python2-pbcore
4840 (package
4841 (name "python2-pbcore")
4842 (version "1.2.10")
4843 (source (origin
4844 (method url-fetch)
4845 (uri (pypi-uri "pbcore" version))
4846 (sha256
4847 (base32
4848 "1kjmv891d6qbpp4shhhvkl02ff4q5xlpnls2513sm2cjcrs52f1i"))))
4849 (build-system python-build-system)
4850 (arguments
4851 `(#:python ,python-2 ;pbcore < 2.0 requires Python 2.7
4852 #:phases (modify-phases %standard-phases
4853 (add-after 'unpack 'remove-sphinx-dependency
4854 (lambda _
4855 ;; Sphinx is only required for documentation tests, which
4856 ;; we do not run; furthermore it depends on python2-sphinx
4857 ;; which is no longer maintained.
4858 (substitute* "requirements-dev.txt"
4859 (("^sphinx") ""))
4860 #t)))))
4861 (propagated-inputs
4862 `(("python-cython" ,python2-cython)
4863 ("python-numpy" ,python2-numpy)
4864 ("python-pysam" ,python2-pysam)
4865 ("python-h5py" ,python2-h5py)))
4866 (native-inputs
4867 `(("python-nose" ,python2-nose)
4868 ("python-pyxb" ,python2-pyxb)))
4869 (home-page "https://pacificbiosciences.github.io/pbcore/")
4870 (synopsis "Library for reading and writing PacBio data files")
4871 (description
4872 "The pbcore package provides Python APIs for interacting with PacBio data
4873 files and writing bioinformatics applications.")
4874 (license license:bsd-3)))
4875
4876 (define-public python2-warpedlmm
4877 (package
4878 (name "python2-warpedlmm")
4879 (version "0.21")
4880 (source
4881 (origin
4882 (method url-fetch)
4883 (uri (pypi-uri "WarpedLMM" version ".zip"))
4884 (sha256
4885 (base32
4886 "1agfz6zqa8nc6cw47yh0s3y14gkpa9wqazwcj7mwwj3ffnw39p3j"))))
4887 (build-system python-build-system)
4888 (arguments
4889 `(#:python ,python-2)) ; requires Python 2.7
4890 (propagated-inputs
4891 `(("python-scipy" ,python2-scipy)
4892 ("python-numpy" ,python2-numpy)
4893 ("python-matplotlib" ,python2-matplotlib)
4894 ("python-fastlmm" ,python2-fastlmm)
4895 ("python-pandas" ,python2-pandas)
4896 ("python-pysnptools" ,python2-pysnptools)))
4897 (native-inputs
4898 `(("python-mock" ,python2-mock)
4899 ("python-nose" ,python2-nose)
4900 ("unzip" ,unzip)))
4901 (home-page "https://github.com/PMBio/warpedLMM")
4902 (synopsis "Implementation of warped linear mixed models")
4903 (description
4904 "WarpedLMM is a Python implementation of the warped linear mixed model,
4905 which automatically learns an optimal warping function (or transformation) for
4906 the phenotype as it models the data.")
4907 (license license:asl2.0)))
4908
4909 (define-public pbtranscript-tofu
4910 (let ((commit "8f5467fe6a4472bcfb4226c8720993c8507adfe4"))
4911 (package
4912 (name "pbtranscript-tofu")
4913 (version (string-append "2.2.3." (string-take commit 7)))
4914 (source (origin
4915 (method git-fetch)
4916 (uri (git-reference
4917 (url "https://github.com/PacificBiosciences/cDNA_primer.git")
4918 (commit commit)))
4919 (file-name (string-append name "-" version "-checkout"))
4920 (sha256
4921 (base32
4922 "1lgnpi35ihay42qx0b6yl3kkgra723i413j33kvs0kvs61h82w0f"))
4923 (modules '((guix build utils)))
4924 (snippet
4925 '(begin
4926 ;; remove bundled Cython sources
4927 (delete-file "pbtranscript-tofu/pbtranscript/Cython-0.20.1.tar.gz")
4928 #t))))
4929 (build-system python-build-system)
4930 (arguments
4931 `(#:python ,python-2
4932 ;; FIXME: Tests fail with "No such file or directory:
4933 ;; pbtools/pbtranscript/modified_bx_intervals/intersection_unique.so"
4934 #:tests? #f
4935 #:phases
4936 (modify-phases %standard-phases
4937 (add-after 'unpack 'enter-directory
4938 (lambda _
4939 (chdir "pbtranscript-tofu/pbtranscript/")
4940 #t))
4941 ;; With setuptools version 18.0 and later this setup.py hack causes
4942 ;; a build error, so we disable it.
4943 (add-after 'enter-directory 'patch-setuppy
4944 (lambda _
4945 (substitute* "setup.py"
4946 (("if 'setuptools.extension' in sys.modules:")
4947 "if False:"))
4948 #t)))))
4949 (inputs
4950 `(("python-numpy" ,python2-numpy)
4951 ("python-bx-python" ,python2-bx-python)
4952 ("python-networkx" ,python2-networkx)
4953 ("python-scipy" ,python2-scipy)
4954 ("python-pbcore" ,python2-pbcore)
4955 ("python-h5py" ,python2-h5py)))
4956 (native-inputs
4957 `(("python-cython" ,python2-cython)
4958 ("python-nose" ,python2-nose)))
4959 (home-page "https://github.com/PacificBiosciences/cDNA_primer")
4960 (synopsis "Analyze transcriptome data generated with the Iso-Seq protocol")
4961 (description
4962 "pbtranscript-tofu contains scripts to analyze transcriptome data
4963 generated using the PacBio Iso-Seq protocol.")
4964 (license license:bsd-3))))
4965
4966 (define-public prank
4967 (package
4968 (name "prank")
4969 (version "170427")
4970 (source (origin
4971 (method url-fetch)
4972 (uri (string-append
4973 "http://wasabiapp.org/download/prank/prank.source."
4974 version ".tgz"))
4975 (sha256
4976 (base32
4977 "0nc8g9c5rkdxcir46s0in9ci1sxwzbjibxrvkksf22ybnplvagk2"))))
4978 (build-system gnu-build-system)
4979 (arguments
4980 `(#:phases
4981 (modify-phases %standard-phases
4982 (add-after 'unpack 'enter-src-dir
4983 (lambda _
4984 (chdir "src")
4985 #t))
4986 (add-after 'unpack 'remove-m64-flag
4987 ;; Prank will build with the correct 'bit-ness' without this flag
4988 ;; and this allows building on 32-bit machines.
4989 (lambda _ (substitute* "src/Makefile"
4990 (("-m64") ""))
4991 #t))
4992 (delete 'configure)
4993 (replace 'install
4994 (lambda* (#:key outputs #:allow-other-keys)
4995 (let* ((out (assoc-ref outputs "out"))
4996 (bin (string-append out "/bin"))
4997 (man (string-append out "/share/man/man1"))
4998 (path (string-append
4999 (assoc-ref %build-inputs "mafft") "/bin:"
5000 (assoc-ref %build-inputs "exonerate") "/bin:"
5001 (assoc-ref %build-inputs "bppsuite") "/bin")))
5002 (install-file "prank" bin)
5003 (wrap-program (string-append bin "/prank")
5004 `("PATH" ":" prefix (,path)))
5005 (install-file "prank.1" man))
5006 #t)))))
5007 (inputs
5008 `(("mafft" ,mafft)
5009 ("exonerate" ,exonerate)
5010 ("bppsuite" ,bppsuite)))
5011 (home-page "http://wasabiapp.org/software/prank/")
5012 (synopsis "Probabilistic multiple sequence alignment program")
5013 (description
5014 "PRANK is a probabilistic multiple sequence alignment program for DNA,
5015 codon and amino-acid sequences. It is based on a novel algorithm that treats
5016 insertions correctly and avoids over-estimation of the number of deletion
5017 events. In addition, PRANK borrows ideas from maximum likelihood methods used
5018 in phylogenetics and correctly takes into account the evolutionary distances
5019 between sequences. Lastly, PRANK allows for defining a potential structure
5020 for sequences to be aligned and then, simultaneously with the alignment,
5021 predicts the locations of structural units in the sequences.")
5022 (license license:gpl2+)))
5023
5024 (define-public proteinortho
5025 (package
5026 (name "proteinortho")
5027 (version "6.0.14")
5028 (source (origin
5029 (method git-fetch)
5030 (uri (git-reference
5031 (url "https://gitlab.com/paulklemm_PHD/proteinortho.git")
5032 (commit (string-append "v" version))))
5033 (file-name (git-file-name name version))
5034 (sha256
5035 (base32
5036 "0pmy617zy2z2w6hjqxjhf3rzikf5n3mpia80ysq8233vfr7wrzff"))
5037 (modules '((guix build utils)))
5038 (snippet
5039 '(begin
5040 ;; remove pre-built scripts
5041 (delete-file-recursively "src/BUILD/")
5042 #t))))
5043 (build-system gnu-build-system)
5044 (arguments
5045 `(#:test-target "test"
5046 #:make-flags '("CC=gcc")
5047 #:phases
5048 (modify-phases %standard-phases
5049 (replace 'configure
5050 ;; There is no configure script, so we modify the Makefile directly.
5051 (lambda* (#:key outputs #:allow-other-keys)
5052 (substitute* "Makefile"
5053 (("INSTALLDIR=.*")
5054 (string-append
5055 "INSTALLDIR=" (assoc-ref outputs "out") "/bin\n")))
5056 #t))
5057 (add-before 'install 'make-install-directory
5058 ;; The install directory is not created during 'make install'.
5059 (lambda* (#:key outputs #:allow-other-keys)
5060 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
5061 #t))
5062 (add-after 'install 'wrap-programs
5063 (lambda* (#:key inputs outputs #:allow-other-keys)
5064 (let ((path (getenv "PATH"))
5065 (out (assoc-ref outputs "out")))
5066 (for-each (lambda (script)
5067 (wrap-script script `("PATH" ":" prefix (,path))))
5068 (cons (string-append out "/bin/proteinortho")
5069 (find-files out "\\.(pl|py)$"))))
5070 #t)))))
5071 (inputs
5072 `(("guile" ,guile-3.0) ; for wrap-script
5073 ("diamond" ,diamond)
5074 ("perl" ,perl)
5075 ("python" ,python-wrapper)
5076 ("blast+" ,blast+)
5077 ("lapack" ,lapack)
5078 ("openblas" ,openblas)))
5079 (native-inputs
5080 `(("which" ,which)))
5081 (home-page "http://www.bioinf.uni-leipzig.de/Software/proteinortho")
5082 (synopsis "Detect orthologous genes across species")
5083 (description
5084 "Proteinortho is a tool to detect orthologous genes across different
5085 species. For doing so, it compares similarities of given gene sequences and
5086 clusters them to find significant groups. The algorithm was designed to handle
5087 large-scale data and can be applied to hundreds of species at once.")
5088 (license license:gpl3+)))
5089
5090 (define-public pyicoteo
5091 (package
5092 (name "pyicoteo")
5093 (version "2.0.7")
5094 (source
5095 (origin
5096 (method git-fetch)
5097 (uri (git-reference
5098 (url "https://bitbucket.org/regulatorygenomicsupf/pyicoteo.git")
5099 (commit (string-append "v" version))))
5100 (file-name (git-file-name name version))
5101 (sha256
5102 (base32
5103 "0hz5g8d25lbjy1wpscr490l0lmyvaix893hhax4fxnh1h9w34w8p"))))
5104 (build-system python-build-system)
5105 (arguments
5106 `(#:python ,python-2 ; does not work with Python 3
5107 #:tests? #f)) ; there are no tests
5108 (inputs
5109 `(("python2-matplotlib" ,python2-matplotlib)))
5110 (home-page "https://bitbucket.org/regulatorygenomicsupf/pyicoteo")
5111 (synopsis "Analyze high-throughput genetic sequencing data")
5112 (description
5113 "Pyicoteo is a suite of tools for the analysis of high-throughput genetic
5114 sequencing data. It works with genomic coordinates. There are currently six
5115 different command-line tools:
5116
5117 @enumerate
5118 @item pyicoregion: for generating exploratory regions automatically;
5119 @item pyicoenrich: for differential enrichment between two conditions;
5120 @item pyicoclip: for calling CLIP-Seq peaks without a control;
5121 @item pyicos: for genomic coordinates manipulation;
5122 @item pyicoller: for peak calling on punctuated ChIP-Seq;
5123 @item pyicount: to count how many reads from N experiment files overlap in a
5124 region file;
5125 @item pyicotrocol: to combine operations from pyicoteo.
5126 @end enumerate\n")
5127 (license license:gpl3+)))
5128
5129 (define-public prodigal
5130 (package
5131 (name "prodigal")
5132 ;; Check for a new home page when updating this package:
5133 ;; https://github.com/hyattpd/Prodigal/issues/36#issuecomment-536617588
5134 (version "2.6.3")
5135 (source (origin
5136 (method git-fetch)
5137 (uri (git-reference
5138 (url "https://github.com/hyattpd/Prodigal.git")
5139 (commit (string-append "v" version))))
5140 (file-name (git-file-name name version))
5141 (sha256
5142 (base32
5143 "1fs1hqk83qjbjhrvhw6ni75zakx5ki1ayy3v6wwkn3xvahc9hi5s"))))
5144 (build-system gnu-build-system)
5145 (arguments
5146 `(#:tests? #f ; no check target
5147 #:make-flags (list (string-append "INSTALLDIR="
5148 (assoc-ref %outputs "out")
5149 "/bin"))
5150 #:phases
5151 (modify-phases %standard-phases
5152 (delete 'configure))))
5153 (home-page "https://github.com/hyattpd/Prodigal")
5154 (synopsis "Protein-coding gene prediction for Archaea and Bacteria")
5155 (description
5156 "Prodigal runs smoothly on finished genomes, draft genomes, and
5157 metagenomes, providing gene predictions in GFF3, Genbank, or Sequin table
5158 format. It runs quickly, in an unsupervised fashion, handles gaps, handles
5159 partial genes, and identifies translation initiation sites.")
5160 (license license:gpl3+)))
5161
5162 (define-public roary
5163 (package
5164 (name "roary")
5165 (version "3.12.0")
5166 (source
5167 (origin
5168 (method url-fetch)
5169 (uri (string-append
5170 "mirror://cpan/authors/id/A/AJ/AJPAGE/Bio-Roary-"
5171 version ".tar.gz"))
5172 (sha256
5173 (base32
5174 "0qxrds9wx7cfhlkihrp6697kx0flhhxymap9fwan0b3rbdhcnmff"))))
5175 (build-system perl-build-system)
5176 (arguments
5177 `(#:phases
5178 (modify-phases %standard-phases
5179 (delete 'configure)
5180 (delete 'build)
5181 (replace 'check
5182 (lambda _
5183 ;; The tests are not run by default, so we run each test file
5184 ;; directly.
5185 (setenv "PATH" (string-append (getcwd) "/bin" ":"
5186 (getenv "PATH")))
5187 (setenv "PERL5LIB" (string-append (getcwd) "/lib" ":"
5188 (getenv "PERL5LIB")))
5189 (for-each (lambda (file)
5190 (display file)(display "\n")
5191 (invoke "perl" file))
5192 (find-files "t" ".*\\.t$"))
5193 #t))
5194 (replace 'install
5195 ;; There is no 'install' target in the Makefile.
5196 (lambda* (#:key outputs #:allow-other-keys)
5197 (let* ((out (assoc-ref outputs "out"))
5198 (bin (string-append out "/bin"))
5199 (perl (string-append out "/lib/perl5/site_perl"))
5200 (roary-plots "contrib/roary_plots"))
5201 (mkdir-p bin)
5202 (mkdir-p perl)
5203 (copy-recursively "bin" bin)
5204 (copy-recursively "lib" perl)
5205 #t)))
5206 (add-after 'install 'wrap-programs
5207 (lambda* (#:key inputs outputs #:allow-other-keys)
5208 (let* ((out (assoc-ref outputs "out"))
5209 (perl5lib (getenv "PERL5LIB"))
5210 (path (getenv "PATH")))
5211 (for-each (lambda (prog)
5212 (let ((binary (string-append out "/" prog)))
5213 (wrap-program binary
5214 `("PERL5LIB" ":" prefix
5215 (,(string-append perl5lib ":" out
5216 "/lib/perl5/site_perl"))))
5217 (wrap-program binary
5218 `("PATH" ":" prefix
5219 (,(string-append path ":" out "/bin"))))))
5220 (find-files "bin" ".*[^R]$"))
5221 (let ((file
5222 (string-append out "/bin/roary-create_pan_genome_plots.R"))
5223 (r-site-lib (getenv "R_LIBS_SITE"))
5224 (coreutils-path
5225 (string-append (assoc-ref inputs "coreutils") "/bin")))
5226 (wrap-program file
5227 `("R_LIBS_SITE" ":" prefix
5228 (,(string-append r-site-lib ":" out "/site-library/"))))
5229 (wrap-program file
5230 `("PATH" ":" prefix
5231 (,(string-append coreutils-path ":" out "/bin"))))))
5232 #t)))))
5233 (native-inputs
5234 `(("perl-env-path" ,perl-env-path)
5235 ("perl-test-files" ,perl-test-files)
5236 ("perl-test-most" ,perl-test-most)
5237 ("perl-test-output" ,perl-test-output)))
5238 (inputs
5239 `(("perl-array-utils" ,perl-array-utils)
5240 ("bioperl" ,bioperl-minimal)
5241 ("perl-digest-md5-file" ,perl-digest-md5-file)
5242 ("perl-exception-class" ,perl-exception-class)
5243 ("perl-file-find-rule" ,perl-file-find-rule)
5244 ("perl-file-grep" ,perl-file-grep)
5245 ("perl-file-slurper" ,perl-file-slurper)
5246 ("perl-file-which" ,perl-file-which)
5247 ("perl-graph" ,perl-graph)
5248 ("perl-graph-readwrite" ,perl-graph-readwrite)
5249 ("perl-log-log4perl" ,perl-log-log4perl)
5250 ("perl-moose" ,perl-moose)
5251 ("perl-perlio-utf8_strict" ,perl-perlio-utf8_strict)
5252 ("perl-text-csv" ,perl-text-csv)
5253 ("bedtools" ,bedtools)
5254 ("cd-hit" ,cd-hit)
5255 ("blast+" ,blast+)
5256 ("mcl" ,mcl)
5257 ("parallel" ,parallel)
5258 ("prank" ,prank)
5259 ("mafft" ,mafft)
5260 ("fasttree" ,fasttree)
5261 ("grep" ,grep)
5262 ("sed" ,sed)
5263 ("gawk" ,gawk)
5264 ("r-minimal" ,r-minimal)
5265 ("r-ggplot2" ,r-ggplot2)
5266 ("coreutils" ,coreutils)))
5267 (home-page "https://sanger-pathogens.github.io/Roary/")
5268 (synopsis "High speed stand-alone pan genome pipeline")
5269 (description
5270 "Roary is a high speed stand alone pan genome pipeline, which takes
5271 annotated assemblies in GFF3 format (produced by the Prokka program) and
5272 calculates the pan genome. Using a standard desktop PC, it can analyse
5273 datasets with thousands of samples, without compromising the quality of the
5274 results. 128 samples can be analysed in under 1 hour using 1 GB of RAM and a
5275 single processor. Roary is not intended for metagenomics or for comparing
5276 extremely diverse sets of genomes.")
5277 (license license:gpl3)))
5278
5279 (define-public raxml
5280 (package
5281 (name "raxml")
5282 (version "8.2.12")
5283 (source
5284 (origin
5285 (method git-fetch)
5286 (uri (git-reference
5287 (url "https://github.com/stamatak/standard-RAxML.git")
5288 (commit (string-append "v" version))))
5289 (file-name (git-file-name name version))
5290 (sha256
5291 (base32
5292 "1jqjzhch0rips0vp04prvb8vmc20c5pdmsqn8knadcf91yy859fh"))))
5293 (build-system gnu-build-system)
5294 (arguments
5295 `(#:tests? #f ; There are no tests.
5296 ;; Use 'standard' Makefile rather than SSE or AVX ones.
5297 #:make-flags (list "-f" "Makefile.HYBRID.gcc")
5298 #:phases
5299 (modify-phases %standard-phases
5300 (delete 'configure)
5301 (replace 'install
5302 (lambda* (#:key outputs #:allow-other-keys)
5303 (let* ((out (assoc-ref outputs "out"))
5304 (bin (string-append out "/bin"))
5305 (executable "raxmlHPC-HYBRID"))
5306 (install-file executable bin)
5307 (symlink (string-append bin "/" executable) "raxml"))
5308 #t)))))
5309 (inputs
5310 `(("openmpi" ,openmpi)))
5311 (home-page "https://cme.h-its.org/exelixis/web/software/raxml/index.html")
5312 (synopsis "Randomized Axelerated Maximum Likelihood phylogenetic trees")
5313 (description
5314 "RAxML is a tool for phylogenetic analysis and post-analysis of large
5315 phylogenies.")
5316 ;; The source includes x86 specific code
5317 (supported-systems '("x86_64-linux" "i686-linux"))
5318 (license license:gpl2+)))
5319
5320 (define-public rsem
5321 (package
5322 (name "rsem")
5323 (version "1.3.1")
5324 (source
5325 (origin
5326 (method git-fetch)
5327 (uri (git-reference
5328 (url "https://github.com/deweylab/RSEM.git")
5329 (commit (string-append "v" version))))
5330 (sha256
5331 (base32 "1jlq11d1p8qp64w75yj8cnbbd1a93viq10pzsbwal7vdn8fg13j1"))
5332 (file-name (git-file-name name version))
5333 (modules '((guix build utils)))
5334 (snippet
5335 '(begin
5336 ;; remove bundled copy of boost and samtools
5337 (delete-file-recursively "boost")
5338 (delete-file-recursively "samtools-1.3")
5339 #t))))
5340 (build-system gnu-build-system)
5341 (arguments
5342 `(#:tests? #f ;no "check" target
5343 #:make-flags
5344 (list (string-append "BOOST="
5345 (assoc-ref %build-inputs "boost")
5346 "/include/")
5347 (string-append "SAMHEADERS="
5348 (assoc-ref %build-inputs "htslib")
5349 "/include/htslib/sam.h")
5350 (string-append "SAMLIBS="
5351 (assoc-ref %build-inputs "htslib")
5352 "/lib/libhts.a"))
5353 #:phases
5354 (modify-phases %standard-phases
5355 ;; No "configure" script.
5356 ;; Do not build bundled samtools library.
5357 (replace 'configure
5358 (lambda _
5359 (substitute* "Makefile"
5360 (("^all : \\$\\(PROGRAMS\\).*") "all: $(PROGRAMS)\n")
5361 (("^\\$\\(SAMLIBS\\).*") ""))
5362 #t))
5363 (replace 'install
5364 (lambda* (#:key outputs #:allow-other-keys)
5365 (let* ((out (string-append (assoc-ref outputs "out")))
5366 (bin (string-append out "/bin/"))
5367 (perl (string-append out "/lib/perl5/site_perl")))
5368 (mkdir-p bin)
5369 (mkdir-p perl)
5370 (for-each (lambda (file)
5371 (install-file file bin))
5372 (find-files "." "rsem-.*"))
5373 (install-file "rsem_perl_utils.pm" perl))
5374 #t))
5375 (add-after 'install 'wrap-program
5376 (lambda* (#:key outputs #:allow-other-keys)
5377 (let ((out (assoc-ref outputs "out")))
5378 (for-each (lambda (prog)
5379 (wrap-program (string-append out "/bin/" prog)
5380 `("PERL5LIB" ":" prefix
5381 (,(string-append out "/lib/perl5/site_perl")))))
5382 '("rsem-calculate-expression"
5383 "rsem-control-fdr"
5384 "rsem-generate-data-matrix"
5385 "rsem-generate-ngvector"
5386 "rsem-plot-transcript-wiggles"
5387 "rsem-prepare-reference"
5388 "rsem-run-ebseq"
5389 "rsem-run-prsem-testing-procedure")))
5390 #t)))))
5391 (inputs
5392 `(("boost" ,boost)
5393 ("r-minimal" ,r-minimal)
5394 ("perl" ,perl)
5395 ("htslib" ,htslib-1.3)
5396 ("zlib" ,zlib)))
5397 (home-page "http://deweylab.biostat.wisc.edu/rsem/")
5398 (synopsis "Estimate gene expression levels from RNA-Seq data")
5399 (description
5400 "RSEM is a software package for estimating gene and isoform expression
5401 levels from RNA-Seq data. The RSEM package provides a user-friendly
5402 interface, supports threads for parallel computation of the EM algorithm,
5403 single-end and paired-end read data, quality scores, variable-length reads and
5404 RSPD estimation. In addition, it provides posterior mean and 95% credibility
5405 interval estimates for expression levels. For visualization, it can generate
5406 BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.")
5407 (license license:gpl3+)))
5408
5409 (define-public rseqc
5410 (package
5411 (name "rseqc")
5412 (version "3.0.1")
5413 (source
5414 (origin
5415 (method url-fetch)
5416 (uri
5417 (string-append "mirror://sourceforge/rseqc/"
5418 "RSeQC-" version ".tar.gz"))
5419 (sha256
5420 (base32
5421 "0gbb9iyb7swiv5455fm5rg98r7l6qn27v564yllqjd574hncpx6m"))))
5422 (build-system python-build-system)
5423 (inputs
5424 `(("python-cython" ,python-cython)
5425 ("python-bx-python" ,python-bx-python)
5426 ("python-pybigwig" ,python-pybigwig)
5427 ("python-pysam" ,python-pysam)
5428 ("python-numpy" ,python-numpy)
5429 ("zlib" ,zlib)))
5430 (native-inputs
5431 `(("python-nose" ,python-nose)))
5432 (home-page "http://rseqc.sourceforge.net/")
5433 (synopsis "RNA-seq quality control package")
5434 (description
5435 "RSeQC provides a number of modules that can comprehensively evaluate
5436 high throughput sequence data, especially RNA-seq data. Some basic modules
5437 inspect sequence quality, nucleotide composition bias, PCR bias and GC bias,
5438 while RNA-seq specific modules evaluate sequencing saturation, mapped reads
5439 distribution, coverage uniformity, strand specificity, etc.")
5440 (license license:gpl3+)))
5441
5442 (define-public seek
5443 ;; There are no release tarballs. According to the installation
5444 ;; instructions at http://seek.princeton.edu/installation.jsp, the latest
5445 ;; stable release is identified by this changeset ID.
5446 (let ((changeset "2329130")
5447 (revision "1"))
5448 (package
5449 (name "seek")
5450 (version (string-append "0-" revision "." changeset))
5451 (source (origin
5452 (method hg-fetch)
5453 (uri (hg-reference
5454 (url "https://bitbucket.org/libsleipnir/sleipnir")
5455 (changeset changeset)))
5456 (file-name (string-append name "-" version "-checkout"))
5457 (sha256
5458 (base32
5459 "0qrvilwh18dpbhkf92qvxbmay0j75ra3jg2wrhz67gf538zzphsx"))))
5460 (build-system gnu-build-system)
5461 (arguments
5462 `(#:modules ((srfi srfi-1)
5463 (guix build gnu-build-system)
5464 (guix build utils))
5465 #:phases
5466 (let ((dirs '("SeekMiner"
5467 "SeekEvaluator"
5468 "SeekPrep"
5469 "Distancer"
5470 "Data2DB"
5471 "PCL2Bin")))
5472 (modify-phases %standard-phases
5473 (replace 'bootstrap
5474 (lambda _
5475 (substitute* "gen_tools_am"
5476 (("/usr/bin/env.*") (which "perl")))
5477 (invoke "bash" "gen_auto")
5478 #t))
5479 (add-after 'build 'build-additional-tools
5480 (lambda* (#:key make-flags #:allow-other-keys)
5481 (for-each (lambda (dir)
5482 (with-directory-excursion (string-append "tools/" dir)
5483 (apply invoke "make" make-flags)))
5484 dirs)
5485 #t))
5486 (add-after 'install 'install-additional-tools
5487 (lambda* (#:key make-flags #:allow-other-keys)
5488 (for-each (lambda (dir)
5489 (with-directory-excursion (string-append "tools/" dir)
5490 (apply invoke `("make" ,@make-flags "install"))))
5491 dirs)
5492 #t))))))
5493 (inputs
5494 `(("gsl" ,gsl)
5495 ("boost" ,boost)
5496 ("libsvm" ,libsvm)
5497 ("readline" ,readline)
5498 ("gengetopt" ,gengetopt)
5499 ("log4cpp" ,log4cpp)))
5500 (native-inputs
5501 `(("autoconf" ,autoconf)
5502 ("automake" ,automake)
5503 ("perl" ,perl)))
5504 (home-page "http://seek.princeton.edu")
5505 (synopsis "Gene co-expression search engine")
5506 (description
5507 "SEEK is a computational gene co-expression search engine. SEEK provides
5508 biologists with a way to navigate the massive human expression compendium that
5509 now contains thousands of expression datasets. SEEK returns a robust ranking
5510 of co-expressed genes in the biological area of interest defined by the user's
5511 query genes. It also prioritizes thousands of expression datasets according
5512 to the user's query of interest.")
5513 (license license:cc-by3.0))))
5514
5515 (define-public samtools
5516 (package
5517 (name "samtools")
5518 (version "1.9")
5519 (source
5520 (origin
5521 (method url-fetch)
5522 (uri
5523 (string-append "mirror://sourceforge/samtools/samtools/"
5524 version "/samtools-" version ".tar.bz2"))
5525 (sha256
5526 (base32
5527 "10ilqbmm7ri8z431sn90lvbjwizd0hhkf9rcqw8j823hf26nhgq8"))
5528 (modules '((guix build utils)))
5529 (snippet '(begin
5530 ;; Delete bundled htslib.
5531 (delete-file-recursively "htslib-1.9")
5532 #t))))
5533 (build-system gnu-build-system)
5534 (arguments
5535 `(#:modules ((ice-9 ftw)
5536 (ice-9 regex)
5537 (guix build gnu-build-system)
5538 (guix build utils))
5539 #:configure-flags (list "--with-ncurses")
5540 #:phases
5541 (modify-phases %standard-phases
5542 (add-after 'unpack 'patch-tests
5543 (lambda _
5544 (substitute* "test/test.pl"
5545 ;; The test script calls out to /bin/bash
5546 (("/bin/bash") (which "bash")))
5547 #t))
5548 (add-after 'install 'install-library
5549 (lambda* (#:key outputs #:allow-other-keys)
5550 (let ((lib (string-append (assoc-ref outputs "out") "/lib")))
5551 (install-file "libbam.a" lib)
5552 #t)))
5553 (add-after 'install 'install-headers
5554 (lambda* (#:key outputs #:allow-other-keys)
5555 (let ((include (string-append (assoc-ref outputs "out")
5556 "/include/samtools/")))
5557 (for-each (lambda (file)
5558 (install-file file include))
5559 (scandir "." (lambda (name) (string-match "\\.h$" name))))
5560 #t))))))
5561 (native-inputs `(("pkg-config" ,pkg-config)))
5562 (inputs
5563 `(("htslib" ,htslib)
5564 ("ncurses" ,ncurses)
5565 ("perl" ,perl)
5566 ("python" ,python)
5567 ("zlib" ,zlib)))
5568 (home-page "http://samtools.sourceforge.net")
5569 (synopsis "Utilities to efficiently manipulate nucleotide sequence alignments")
5570 (description
5571 "Samtools implements various utilities for post-processing nucleotide
5572 sequence alignments in the SAM, BAM, and CRAM formats, including indexing,
5573 variant calling (in conjunction with bcftools), and a simple alignment
5574 viewer.")
5575 (license license:expat)))
5576
5577 (define-public samtools-0.1
5578 ;; This is the most recent version of the 0.1 line of samtools. The input
5579 ;; and output formats differ greatly from that used and produced by samtools
5580 ;; 1.x and is still used in many bioinformatics pipelines.
5581 (package (inherit samtools)
5582 (version "0.1.19")
5583 (source
5584 (origin
5585 (method url-fetch)
5586 (uri
5587 (string-append "mirror://sourceforge/samtools/samtools/"
5588 version "/samtools-" version ".tar.bz2"))
5589 (sha256
5590 (base32 "1m33xsfwz0s8qi45lylagfllqg7fphf4dr0780rsvw75av9wk06h"))))
5591 (arguments
5592 `(#:tests? #f ;no "check" target
5593 #:make-flags
5594 (list "LIBCURSES=-lncurses")
5595 ,@(substitute-keyword-arguments (package-arguments samtools)
5596 ((#:phases phases)
5597 `(modify-phases ,phases
5598 (replace 'install
5599 (lambda* (#:key outputs #:allow-other-keys)
5600 (let ((bin (string-append
5601 (assoc-ref outputs "out") "/bin")))
5602 (mkdir-p bin)
5603 (install-file "samtools" bin)
5604 #t)))
5605 (delete 'patch-tests)
5606 (delete 'configure))))))))
5607
5608 (define-public mosaik
5609 (let ((commit "5c25216d3522d6a33e53875cd76a6d65001e4e67"))
5610 (package
5611 (name "mosaik")
5612 (version "2.2.30")
5613 (source (origin
5614 ;; There are no release tarballs nor tags.
5615 (method git-fetch)
5616 (uri (git-reference
5617 (url "https://github.com/wanpinglee/MOSAIK.git")
5618 (commit commit)))
5619 (file-name (string-append name "-" version))
5620 (sha256
5621 (base32
5622 "17gj3s07cm77r41z92awh0bim7w7q7fbn0sf5nkqmcm1vw052qgw"))))
5623 (build-system gnu-build-system)
5624 (arguments
5625 `(#:tests? #f ; no tests
5626 #:make-flags (list "CC=gcc")
5627 #:phases
5628 (modify-phases %standard-phases
5629 (replace 'configure
5630 (lambda _ (chdir "src") #t))
5631 (replace 'install
5632 (lambda* (#:key outputs #:allow-other-keys)
5633 (let ((bin (string-append (assoc-ref outputs "out")
5634 "/bin")))
5635 (mkdir-p bin)
5636 (copy-recursively "../bin" bin)
5637 #t))))))
5638 (inputs
5639 `(("perl" ,perl)
5640 ("zlib:static" ,zlib "static")
5641 ("zlib" ,zlib)))
5642 (supported-systems '("x86_64-linux"))
5643 (home-page "https://github.com/wanpinglee/MOSAIK")
5644 (synopsis "Map nucleotide sequence reads to reference genomes")
5645 (description
5646 "MOSAIK is a program for mapping second and third-generation sequencing
5647 reads to a reference genome. MOSAIK can align reads generated by all the
5648 major sequencing technologies, including Illumina, Applied Biosystems SOLiD,
5649 Roche 454, Ion Torrent and Pacific BioSciences SMRT.")
5650 ;; MOSAIK is released under the GPLv2+ with the exception of third-party
5651 ;; code released into the public domain:
5652 ;; 1. fastlz by Ariya Hidayat - http://www.fastlz.org/
5653 ;; 2. MD5 implementation - RSA Data Security, RFC 1321
5654 (license (list license:gpl2+ license:public-domain)))))
5655
5656 (define-public ngs-sdk
5657 (package
5658 (name "ngs-sdk")
5659 (version "2.9.6")
5660 (source (origin
5661 (method git-fetch)
5662 (uri (git-reference
5663 (url "https://github.com/ncbi/ngs.git")
5664 (commit version)))
5665 (file-name (git-file-name name version))
5666 (sha256
5667 (base32
5668 "0d5k5kabgl15as37kj9x65xc92j4gcqms86hvihw3yb6wag0r0q3"))))
5669 (build-system gnu-build-system)
5670 (arguments
5671 `(#:parallel-build? #f ; not supported
5672 #:tests? #f ; no "check" target
5673 #:phases
5674 (modify-phases %standard-phases
5675 (replace 'configure
5676 (lambda* (#:key outputs #:allow-other-keys)
5677 (let ((out (assoc-ref outputs "out")))
5678 ;; Allow 'konfigure.perl' to find 'package.prl'.
5679 (setenv "PERL5LIB"
5680 (string-append ".:" (getenv "PERL5LIB")))
5681
5682 ;; The 'configure' script doesn't recognize things like
5683 ;; '--enable-fast-install'.
5684 (invoke "./configure"
5685 (string-append "--build-prefix=" (getcwd) "/build")
5686 (string-append "--prefix=" out))
5687 #t)))
5688 (add-after 'unpack 'enter-dir
5689 (lambda _ (chdir "ngs-sdk") #t)))))
5690 (native-inputs `(("perl" ,perl)))
5691 ;; According to the test
5692 ;; unless ($MARCH =~ /x86_64/i || $MARCH =~ /i?86/i)
5693 ;; in ngs-sdk/setup/konfigure.perl
5694 (supported-systems '("i686-linux" "x86_64-linux"))
5695 (home-page "https://github.com/ncbi/ngs")
5696 (synopsis "API for accessing Next Generation Sequencing data")
5697 (description
5698 "NGS is a domain-specific API for accessing reads, alignments and pileups
5699 produced from Next Generation Sequencing. The API itself is independent from
5700 any particular back-end implementation, and supports use of multiple back-ends
5701 simultaneously.")
5702 (license license:public-domain)))
5703
5704 (define-public java-ngs
5705 (package (inherit ngs-sdk)
5706 (name "java-ngs")
5707 (arguments
5708 `(,@(substitute-keyword-arguments
5709 `(#:modules ((guix build gnu-build-system)
5710 (guix build utils)
5711 (srfi srfi-1)
5712 (srfi srfi-26))
5713 ,@(package-arguments ngs-sdk))
5714 ((#:phases phases)
5715 `(modify-phases ,phases
5716 (replace 'enter-dir (lambda _ (chdir "ngs-java") #t)))))))
5717 (inputs
5718 `(("jdk" ,icedtea "jdk")
5719 ("ngs-sdk" ,ngs-sdk)))
5720 (synopsis "Java bindings for NGS SDK")))
5721
5722 (define-public ncbi-vdb
5723 (package
5724 (name "ncbi-vdb")
5725 (version "2.9.6")
5726 (source (origin
5727 (method git-fetch)
5728 (uri (git-reference
5729 (url "https://github.com/ncbi/ncbi-vdb.git")
5730 (commit version)))
5731 (file-name (git-file-name name version))
5732 (sha256
5733 (base32
5734 "0knkj1sq34hlivgv5qd6jlczqrs3ldmfgn6vbbw7p4mqxvb9mirk"))))
5735 (build-system gnu-build-system)
5736 (arguments
5737 `(#:parallel-build? #f ; not supported
5738 #:tests? #f ; no "check" target
5739 #:make-flags '("HAVE_HDF5=1")
5740 #:phases
5741 (modify-phases %standard-phases
5742 (add-after 'unpack 'make-files-writable
5743 (lambda _ (for-each make-file-writable (find-files "." ".*")) #t))
5744 (add-before 'configure 'set-perl-search-path
5745 (lambda _
5746 ;; Work around "dotless @INC" build failure.
5747 (setenv "PERL5LIB"
5748 (string-append (getcwd) "/setup:"
5749 (getenv "PERL5LIB")))
5750 #t))
5751 ;; See https://github.com/ncbi/ncbi-vdb/issues/14
5752 (add-after 'unpack 'patch-krypto-flags
5753 (lambda _
5754 (substitute* "libs/krypto/Makefile"
5755 (("-Wa,-march=generic64\\+aes") "")
5756 (("-Wa,-march=generic64\\+sse4") ""))
5757 #t))
5758 (replace 'configure
5759 (lambda* (#:key inputs outputs #:allow-other-keys)
5760 (let ((out (assoc-ref outputs "out")))
5761 ;; Override include path for libmagic
5762 (substitute* "setup/package.prl"
5763 (("name => 'magic', Include => '/usr/include'")
5764 (string-append "name=> 'magic', Include => '"
5765 (assoc-ref inputs "libmagic")
5766 "/include" "'")))
5767
5768 ;; Install kdf5 library (needed by sra-tools)
5769 (substitute* "build/Makefile.install"
5770 (("LIBRARIES_TO_INSTALL =")
5771 "LIBRARIES_TO_INSTALL = kdf5.$(VERSION_LIBX) kdf5.$(VERSION_SHLX)"))
5772
5773 (substitute* "build/Makefile.env"
5774 (("CFLAGS =" prefix)
5775 (string-append prefix "-msse2 ")))
5776
5777 ;; Override search path for ngs-java
5778 (substitute* "setup/package.prl"
5779 (("/usr/local/ngs/ngs-java")
5780 (assoc-ref inputs "java-ngs")))
5781
5782 ;; The 'configure' script doesn't recognize things like
5783 ;; '--enable-fast-install'.
5784 (invoke "./configure"
5785 (string-append "--build-prefix=" (getcwd) "/build")
5786 (string-append "--prefix=" (assoc-ref outputs "out"))
5787 (string-append "--debug")
5788 (string-append "--with-xml2-prefix="
5789 (assoc-ref inputs "libxml2"))
5790 (string-append "--with-ngs-sdk-prefix="
5791 (assoc-ref inputs "ngs-sdk"))
5792 (string-append "--with-hdf5-prefix="
5793 (assoc-ref inputs "hdf5")))
5794 #t)))
5795 (add-after 'install 'install-interfaces
5796 (lambda* (#:key outputs #:allow-other-keys)
5797 ;; Install interface libraries. On i686 the interface libraries
5798 ;; are installed to "linux/gcc/i386", so we need to use the Linux
5799 ;; architecture name ("i386") instead of the target system prefix
5800 ;; ("i686").
5801 (mkdir (string-append (assoc-ref outputs "out") "/ilib"))
5802 (copy-recursively (string-append "build/ncbi-vdb/linux/gcc/"
5803 ,(system->linux-architecture
5804 (or (%current-target-system)
5805 (%current-system)))
5806 "/rel/ilib")
5807 (string-append (assoc-ref outputs "out")
5808 "/ilib"))
5809 ;; Install interface headers
5810 (copy-recursively "interfaces"
5811 (string-append (assoc-ref outputs "out")
5812 "/include"))
5813 #t))
5814 ;; These files are needed by sra-tools.
5815 (add-after 'install 'install-configuration-files
5816 (lambda* (#:key outputs #:allow-other-keys)
5817 (let ((target (string-append (assoc-ref outputs "out") "/kfg")))
5818 (mkdir target)
5819 (install-file "libs/kfg/default.kfg" target)
5820 (install-file "libs/kfg/certs.kfg" target))
5821 #t)))))
5822 (inputs
5823 `(("libxml2" ,libxml2)
5824 ("ngs-sdk" ,ngs-sdk)
5825 ("java-ngs" ,java-ngs)
5826 ("libmagic" ,file)
5827 ("hdf5" ,hdf5)))
5828 (native-inputs `(("perl" ,perl)))
5829 ;; NCBI-VDB requires SSE capability.
5830 (supported-systems '("i686-linux" "x86_64-linux"))
5831 (home-page "https://github.com/ncbi/ncbi-vdb")
5832 (synopsis "Database engine for genetic information")
5833 (description
5834 "The NCBI-VDB library implements a highly compressed columnar data
5835 warehousing engine that is most often used to store genetic information.
5836 Databases are stored in a portable image within the file system, and can be
5837 accessed/downloaded on demand across HTTP.")
5838 (license license:public-domain)))
5839
5840 (define-public plink
5841 (package
5842 (name "plink")
5843 (version "1.07")
5844 (source
5845 (origin
5846 (method url-fetch)
5847 (uri (string-append
5848 "http://pngu.mgh.harvard.edu/~purcell/plink/dist/plink-"
5849 version "-src.zip"))
5850 (sha256
5851 (base32 "0as8gxm4pjyc8dxmm1sl873rrd7wn5qs0l29nqfnl31x8i467xaa"))
5852 (patches (search-patches "plink-1.07-unclobber-i.patch"
5853 "plink-endian-detection.patch"))))
5854 (build-system gnu-build-system)
5855 (arguments
5856 '(#:tests? #f ;no "check" target
5857 #:make-flags (list (string-append "LIB_LAPACK="
5858 (assoc-ref %build-inputs "lapack")
5859 "/lib/liblapack.so")
5860 "WITH_LAPACK=1"
5861 "FORCE_DYNAMIC=1"
5862 ;; disable phoning home
5863 "WITH_WEBCHECK=")
5864 #:phases
5865 (modify-phases %standard-phases
5866 ;; no "configure" script
5867 (delete 'configure)
5868 (replace 'install
5869 (lambda* (#:key outputs #:allow-other-keys)
5870 (let ((bin (string-append (assoc-ref outputs "out")
5871 "/bin/")))
5872 (install-file "plink" bin)
5873 #t))))))
5874 (inputs
5875 `(("zlib" ,zlib)
5876 ("lapack" ,lapack)))
5877 (native-inputs
5878 `(("unzip" ,unzip)))
5879 (home-page "http://pngu.mgh.harvard.edu/~purcell/plink/")
5880 (synopsis "Whole genome association analysis toolset")
5881 (description
5882 "PLINK is a whole genome association analysis toolset, designed to
5883 perform a range of basic, large-scale analyses in a computationally efficient
5884 manner. The focus of PLINK is purely on analysis of genotype/phenotype data,
5885 so there is no support for steps prior to this (e.g. study design and
5886 planning, generating genotype or CNV calls from raw data). Through
5887 integration with gPLINK and Haploview, there is some support for the
5888 subsequent visualization, annotation and storage of results.")
5889 ;; Code is released under GPLv2, except for fisher.h, which is under
5890 ;; LGPLv2.1+
5891 (license (list license:gpl2 license:lgpl2.1+))))
5892
5893 (define-public plink-ng
5894 (package (inherit plink)
5895 (name "plink-ng")
5896 (version "1.90b4")
5897 (source
5898 (origin
5899 (method git-fetch)
5900 (uri (git-reference
5901 (url "https://github.com/chrchang/plink-ng.git")
5902 (commit (string-append "v" version))))
5903 (file-name (git-file-name name version))
5904 (sha256
5905 (base32 "02npdwgkpfkdnhw819rhj5kw02a5k5m90b14zq9zzya4hyg929c0"))))
5906 (build-system gnu-build-system)
5907 (arguments
5908 '(#:tests? #f ;no "check" target
5909 #:make-flags (list "BLASFLAGS=-llapack -lopenblas"
5910 "CFLAGS=-Wall -O2 -DDYNAMIC_ZLIB=1"
5911 "ZLIB=-lz"
5912 "-f" "Makefile.std")
5913 #:phases
5914 (modify-phases %standard-phases
5915 (add-after 'unpack 'chdir
5916 (lambda _ (chdir "1.9") #t))
5917 (delete 'configure) ; no "configure" script
5918 (replace 'install
5919 (lambda* (#:key outputs #:allow-other-keys)
5920 (let ((bin (string-append (assoc-ref outputs "out")
5921 "/bin/")))
5922 (install-file "plink" bin)
5923 #t))))))
5924 (inputs
5925 `(("zlib" ,zlib)
5926 ("lapack" ,lapack)
5927 ("openblas" ,openblas)))
5928 (home-page "https://www.cog-genomics.org/plink/")
5929 (license license:gpl3+)))
5930
5931 (define-public smithlab-cpp
5932 (let ((revision "1")
5933 (commit "728a097bec88c6f4b8528b685932049e660eff2e"))
5934 (package
5935 (name "smithlab-cpp")
5936 (version (string-append "0." revision "." (string-take commit 7)))
5937 (source (origin
5938 (method git-fetch)
5939 (uri (git-reference
5940 (url "https://github.com/smithlabcode/smithlab_cpp.git")
5941 (commit commit)))
5942 (file-name (string-append name "-" version "-checkout"))
5943 (sha256
5944 (base32
5945 "0d476lmj312xk77kr9fzrv7z1bv96yfyx0w7y62ycmnfbx32ll74"))))
5946 (build-system gnu-build-system)
5947 (arguments
5948 `(#:modules ((guix build gnu-build-system)
5949 (guix build utils)
5950 (srfi srfi-26))
5951 #:tests? #f ;no "check" target
5952 #:phases
5953 (modify-phases %standard-phases
5954 (add-after 'unpack 'use-samtools-headers
5955 (lambda _
5956 (substitute* '("SAM.cpp"
5957 "SAM.hpp")
5958 (("sam.h") "samtools/sam.h"))
5959 #t))
5960 (replace 'install
5961 (lambda* (#:key outputs #:allow-other-keys)
5962 (let* ((out (assoc-ref outputs "out"))
5963 (lib (string-append out "/lib"))
5964 (include (string-append out "/include/smithlab-cpp")))
5965 (mkdir-p lib)
5966 (mkdir-p include)
5967 (for-each (cut install-file <> lib)
5968 (find-files "." "\\.o$"))
5969 (for-each (cut install-file <> include)
5970 (find-files "." "\\.hpp$")))
5971 #t))
5972 (delete 'configure))))
5973 (inputs
5974 `(("samtools" ,samtools-0.1)
5975 ("zlib" ,zlib)))
5976 (home-page "https://github.com/smithlabcode/smithlab_cpp")
5977 (synopsis "C++ helper library for functions used in Smith lab projects")
5978 (description
5979 "Smithlab CPP is a C++ library that includes functions used in many of
5980 the Smith lab bioinformatics projects, such as a wrapper around Samtools data
5981 structures, classes for genomic regions, mapped sequencing reads, etc.")
5982 (license license:gpl3+))))
5983
5984 (define-public preseq
5985 (package
5986 (name "preseq")
5987 (version "2.0.3")
5988 (source (origin
5989 (method url-fetch)
5990 (uri (string-append "https://github.com/smithlabcode/preseq/"
5991 "releases/download/v" version
5992 "/preseq_v" version ".tar.bz2"))
5993 (sha256
5994 (base32 "149x9xmk1wy1gff85325yfzqc0qk4sgp1w6gbyj9cnji4x1dszbl"))
5995 (modules '((guix build utils)))
5996 (snippet '(begin
5997 ;; Remove bundled samtools.
5998 (delete-file-recursively "samtools")
5999 #t))))
6000 (build-system gnu-build-system)
6001 (arguments
6002 `(#:tests? #f ;no "check" target
6003 #:phases
6004 (modify-phases %standard-phases
6005 (delete 'configure))
6006 #:make-flags
6007 (list (string-append "PREFIX="
6008 (assoc-ref %outputs "out"))
6009 (string-append "LIBBAM="
6010 (assoc-ref %build-inputs "samtools")
6011 "/lib/libbam.a")
6012 (string-append "SMITHLAB_CPP="
6013 (assoc-ref %build-inputs "smithlab-cpp")
6014 "/lib")
6015 "PROGS=preseq"
6016 "INCLUDEDIRS=$(SMITHLAB_CPP)/../include/smithlab-cpp $(SAMTOOLS_DIR)")))
6017 (inputs
6018 `(("gsl" ,gsl)
6019 ("samtools" ,samtools-0.1)
6020 ("smithlab-cpp" ,smithlab-cpp)
6021 ("zlib" ,zlib)))
6022 (home-page "http://smithlabresearch.org/software/preseq/")
6023 (synopsis "Program for analyzing library complexity")
6024 (description
6025 "The preseq package is aimed at predicting and estimating the complexity
6026 of a genomic sequencing library, equivalent to predicting and estimating the
6027 number of redundant reads from a given sequencing depth and how many will be
6028 expected from additional sequencing using an initial sequencing experiment.
6029 The estimates can then be used to examine the utility of further sequencing,
6030 optimize the sequencing depth, or to screen multiple libraries to avoid low
6031 complexity samples.")
6032 (license license:gpl3+)))
6033
6034 (define-public python-screed
6035 (package
6036 (name "python-screed")
6037 (version "1.0")
6038 (source
6039 (origin
6040 (method url-fetch)
6041 (uri (pypi-uri "screed" version))
6042 (sha256
6043 (base32
6044 "148vcb7w2wr6a4w6vs2bsxanbqibxfk490zbcbg4m61s8669zdjx"))))
6045 (build-system python-build-system)
6046 (arguments
6047 '(#:phases
6048 (modify-phases %standard-phases
6049 ;; Tests must be run after installation, as the "screed" command does
6050 ;; not exist right after building.
6051 (delete 'check)
6052 (add-after 'install 'check
6053 (lambda* (#:key inputs outputs #:allow-other-keys)
6054 (let ((out (assoc-ref outputs "out")))
6055 (setenv "PYTHONPATH"
6056 (string-append out "/lib/python"
6057 (string-take (string-take-right
6058 (assoc-ref inputs "python")
6059 5) 3)
6060 "/site-packages:"
6061 (getenv "PYTHONPATH")))
6062 (setenv "PATH" (string-append out "/bin:" (getenv "PATH"))))
6063 (invoke "python" "setup.py" "test")
6064 #t)))))
6065 (native-inputs
6066 `(("python-pytest" ,python-pytest)
6067 ("python-pytest-cov" ,python-pytest-cov)
6068 ("python-pytest-runner" ,python-pytest-runner)))
6069 (inputs
6070 `(("python-bz2file" ,python-bz2file)))
6071 (home-page "https://github.com/dib-lab/screed/")
6072 (synopsis "Short read sequence database utilities")
6073 (description "Screed parses FASTA and FASTQ files and generates databases.
6074 Values such as sequence name, sequence description, sequence quality and the
6075 sequence itself can be retrieved from these databases.")
6076 (license license:bsd-3)))
6077
6078 (define-public python2-screed
6079 (package-with-python2 python-screed))
6080
6081 (define-public sra-tools
6082 (package
6083 (name "sra-tools")
6084 (version "2.9.6")
6085 (source
6086 (origin
6087 (method git-fetch)
6088 (uri (git-reference
6089 (url "https://github.com/ncbi/sra-tools.git")
6090 (commit version)))
6091 (file-name (git-file-name name version))
6092 (sha256
6093 (base32
6094 "0vqzap68v81k0zif2mnqfy8pnw2nrhsg87p6mgq8qk3nk2jv2rgy"))))
6095 (build-system gnu-build-system)
6096 (arguments
6097 `(#:parallel-build? #f ; not supported
6098 #:tests? #f ; no "check" target
6099 #:make-flags
6100 (list (string-append "DEFAULT_CRT="
6101 (assoc-ref %build-inputs "ncbi-vdb")
6102 "/kfg/certs.kfg")
6103 (string-append "DEFAULT_KFG="
6104 (assoc-ref %build-inputs "ncbi-vdb")
6105 "/kfg/default.kfg")
6106 (string-append "VDB_LIBDIR="
6107 (assoc-ref %build-inputs "ncbi-vdb")
6108 ,(if (string-prefix? "x86_64"
6109 (or (%current-target-system)
6110 (%current-system)))
6111 "/lib64"
6112 "/lib32")))
6113 #:phases
6114 (modify-phases %standard-phases
6115 (add-before 'configure 'set-perl-search-path
6116 (lambda _
6117 ;; Work around "dotless @INC" build failure.
6118 (setenv "PERL5LIB"
6119 (string-append (getcwd) "/setup:"
6120 (getenv "PERL5LIB")))
6121 #t))
6122 (replace 'configure
6123 (lambda* (#:key inputs outputs #:allow-other-keys)
6124 ;; The build system expects a directory containing the sources and
6125 ;; raw build output of ncbi-vdb, including files that are not
6126 ;; installed. Since we are building against an installed version of
6127 ;; ncbi-vdb, the following modifications are needed.
6128 (substitute* "setup/konfigure.perl"
6129 ;; Make the configure script look for the "ilib" directory of
6130 ;; "ncbi-vdb" without first checking for the existence of a
6131 ;; matching library in its "lib" directory.
6132 (("^ my \\$f = File::Spec->catdir\\(\\$libdir, \\$lib\\);")
6133 "my $f = File::Spec->catdir($ilibdir, $ilib);")
6134 ;; Look for interface libraries in ncbi-vdb's "ilib" directory.
6135 (("my \\$ilibdir = File::Spec->catdir\\(\\$builddir, 'ilib'\\);")
6136 "my $ilibdir = File::Spec->catdir($dir, 'ilib');"))
6137
6138 ;; Dynamic linking
6139 (substitute* "tools/copycat/Makefile"
6140 (("smagic-static") "lmagic"))
6141
6142 ;; The 'configure' script doesn't recognize things like
6143 ;; '--enable-fast-install'.
6144 (invoke "./configure"
6145 (string-append "--build-prefix=" (getcwd) "/build")
6146 (string-append "--prefix=" (assoc-ref outputs "out"))
6147 (string-append "--debug")
6148 (string-append "--with-fuse-prefix="
6149 (assoc-ref inputs "fuse"))
6150 (string-append "--with-magic-prefix="
6151 (assoc-ref inputs "libmagic"))
6152 ;; TODO: building with libxml2 fails with linker errors
6153 ;; (string-append "--with-xml2-prefix="
6154 ;; (assoc-ref inputs "libxml2"))
6155 (string-append "--with-ncbi-vdb-sources="
6156 (assoc-ref inputs "ncbi-vdb"))
6157 (string-append "--with-ncbi-vdb-build="
6158 (assoc-ref inputs "ncbi-vdb"))
6159 (string-append "--with-ngs-sdk-prefix="
6160 (assoc-ref inputs "ngs-sdk"))
6161 (string-append "--with-hdf5-prefix="
6162 (assoc-ref inputs "hdf5")))
6163 #t)))))
6164 (native-inputs `(("perl" ,perl)))
6165 (inputs
6166 `(("ngs-sdk" ,ngs-sdk)
6167 ("ncbi-vdb" ,ncbi-vdb)
6168 ("libmagic" ,file)
6169 ("fuse" ,fuse)
6170 ("hdf5" ,hdf5)
6171 ("zlib" ,zlib)))
6172 (home-page
6173 "https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software")
6174 (synopsis "Tools and libraries for reading and writing sequencing data")
6175 (description
6176 "The SRA Toolkit from NCBI is a collection of tools and libraries for
6177 reading of sequencing files from the Sequence Read Archive (SRA) database and
6178 writing files into the .sra format.")
6179 (license license:public-domain)))
6180
6181 (define-public seqan
6182 (package
6183 (name "seqan")
6184 (version "2.4.0")
6185 (source (origin
6186 (method url-fetch)
6187 (uri (string-append "https://github.com/seqan/seqan/releases/"
6188 "download/seqan-v" version
6189 "/seqan-library-" version ".tar.xz"))
6190 (sha256
6191 (base32
6192 "19a1rlxx03qy1i1iriicly68w64yjxbv24g9gdywnfmq998v35yx"))))
6193 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
6194 ;; makes sense to split the outputs.
6195 (outputs '("out" "doc"))
6196 (build-system trivial-build-system)
6197 (arguments
6198 `(#:modules ((guix build utils))
6199 #:builder
6200 (begin
6201 (use-modules (guix build utils))
6202 (let ((tar (assoc-ref %build-inputs "tar"))
6203 (xz (assoc-ref %build-inputs "xz"))
6204 (out (assoc-ref %outputs "out"))
6205 (doc (assoc-ref %outputs "doc")))
6206 (setenv "PATH" (string-append tar "/bin:" xz "/bin"))
6207 (invoke "tar" "xvf" (assoc-ref %build-inputs "source"))
6208 (chdir (string-append "seqan-library-" ,version))
6209 (copy-recursively "include" (string-append out "/include"))
6210 (copy-recursively "share" (string-append doc "/share"))
6211 #t))))
6212 (native-inputs
6213 `(("source" ,source)
6214 ("tar" ,tar)
6215 ("xz" ,xz)))
6216 (home-page "http://www.seqan.de")
6217 (synopsis "Library for nucleotide sequence analysis")
6218 (description
6219 "SeqAn is a C++ library of efficient algorithms and data structures for
6220 the analysis of sequences with the focus on biological data. It contains
6221 algorithms and data structures for string representation and their
6222 manipulation, online and indexed string search, efficient I/O of
6223 bioinformatics file formats, sequence alignment, and more.")
6224 (license license:bsd-3)))
6225
6226 (define-public seqan-1
6227 (package (inherit seqan)
6228 (name "seqan")
6229 (version "1.4.2")
6230 (source (origin
6231 (method url-fetch)
6232 (uri (string-append "http://packages.seqan.de/seqan-library/"
6233 "seqan-library-" version ".tar.bz2"))
6234 (sha256
6235 (base32
6236 "05s3wrrwn50f81aklfm65i4a749zag1vr8z03k21xm0pdxy47yvp"))))
6237 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
6238 ;; makes sense to split the outputs.
6239 (outputs '("out" "doc"))
6240 (build-system trivial-build-system)
6241 (arguments
6242 `(#:modules ((guix build utils))
6243 #:builder
6244 (begin
6245 (use-modules (guix build utils))
6246 (let ((tar (assoc-ref %build-inputs "tar"))
6247 (bzip (assoc-ref %build-inputs "bzip2"))
6248 (out (assoc-ref %outputs "out"))
6249 (doc (assoc-ref %outputs "doc")))
6250 (setenv "PATH" (string-append tar "/bin:" bzip "/bin"))
6251 (invoke "tar" "xvf" (assoc-ref %build-inputs "source"))
6252 (chdir (string-append "seqan-library-" ,version))
6253 (copy-recursively "include" (string-append out "/include"))
6254 (copy-recursively "share" (string-append doc "/share"))
6255 #t))))
6256 (native-inputs
6257 `(("source" ,source)
6258 ("tar" ,tar)
6259 ("bzip2" ,bzip2)))))
6260
6261 (define-public seqmagick
6262 (package
6263 (name "seqmagick")
6264 (version "0.7.0")
6265 (source
6266 (origin
6267 (method url-fetch)
6268 (uri (pypi-uri "seqmagick" version))
6269 (sha256
6270 (base32
6271 "12bfyp8nqi0hd36rmj450aygafp01qy3hkbvlwn3bk39pyjjkgg5"))))
6272 (build-system python-build-system)
6273 (inputs
6274 `(("python-biopython" ,python-biopython)))
6275 (native-inputs
6276 `(("python-nose" ,python-nose)))
6277 (home-page "https://github.com/fhcrc/seqmagick")
6278 (synopsis "Tools for converting and modifying sequence files")
6279 (description
6280 "Bioinformaticians often have to convert sequence files between formats
6281 and do little manipulations on them, and it's not worth writing scripts for
6282 that. Seqmagick is a utility to expose the file format conversion in
6283 BioPython in a convenient way. Instead of having a big mess of scripts, there
6284 is one that takes arguments.")
6285 (license license:gpl3)))
6286
6287 (define-public seqtk
6288 (package
6289 (name "seqtk")
6290 (version "1.3")
6291 (source (origin
6292 (method git-fetch)
6293 (uri (git-reference
6294 (url "https://github.com/lh3/seqtk.git")
6295 (commit (string-append "v" version))))
6296 (file-name (git-file-name name version))
6297 (sha256
6298 (base32
6299 "1bfzlqa84b5s1qi22blmmw2s8xdyp9h9ydcq22pfjhh5gab3yz6l"))))
6300 (build-system gnu-build-system)
6301 (arguments
6302 `(#:phases
6303 (modify-phases %standard-phases
6304 (delete 'configure)
6305 (replace 'check
6306 ;; There are no tests, so we just run a sanity check.
6307 (lambda _ (invoke "./seqtk" "seq") #t))
6308 (replace 'install
6309 (lambda* (#:key outputs #:allow-other-keys)
6310 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6311 (install-file "seqtk" bin)
6312 #t))))))
6313 (inputs
6314 `(("zlib" ,zlib)))
6315 (home-page "https://github.com/lh3/seqtk")
6316 (synopsis "Toolkit for processing biological sequences in FASTA/Q format")
6317 (description
6318 "Seqtk is a fast and lightweight tool for processing sequences in the
6319 FASTA or FASTQ format. It parses both FASTA and FASTQ files which can be
6320 optionally compressed by gzip.")
6321 (license license:expat)))
6322
6323 (define-public snap-aligner
6324 (package
6325 (name "snap-aligner")
6326 (version "1.0beta.18")
6327 (source (origin
6328 (method git-fetch)
6329 (uri (git-reference
6330 (url "https://github.com/amplab/snap.git")
6331 (commit (string-append "v" version))))
6332 (file-name (git-file-name name version))
6333 (sha256
6334 (base32
6335 "01w3qq4wm07z73vky0cfwlmrbf50n3w722cxrlzxfi99mnb808d8"))))
6336 (build-system gnu-build-system)
6337 (arguments
6338 '(#:phases
6339 (modify-phases %standard-phases
6340 (delete 'configure)
6341 (replace 'check (lambda _ (invoke "./unit_tests") #t))
6342 (replace 'install
6343 (lambda* (#:key outputs #:allow-other-keys)
6344 (let* ((out (assoc-ref outputs "out"))
6345 (bin (string-append out "/bin")))
6346 (install-file "snap-aligner" bin)
6347 (install-file "SNAPCommand" bin)
6348 #t))))))
6349 (native-inputs
6350 `(("zlib" ,zlib)))
6351 (home-page "http://snap.cs.berkeley.edu/")
6352 (synopsis "Short read DNA sequence aligner")
6353 (description
6354 "SNAP is a fast and accurate aligner for short DNA reads. It is
6355 optimized for modern read lengths of 100 bases or higher, and takes advantage
6356 of these reads to align data quickly through a hash-based indexing scheme.")
6357 ;; 32-bit systems are not supported by the unpatched code.
6358 ;; Following the bug reports https://github.com/amplab/snap/issues/68 and
6359 ;; https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=812378 we see that
6360 ;; systems without a lot of memory cannot make good use of this program.
6361 (supported-systems '("x86_64-linux"))
6362 (license license:asl2.0)))
6363
6364 (define-public sortmerna
6365 (package
6366 (name "sortmerna")
6367 (version "2.1b")
6368 (source
6369 (origin
6370 (method git-fetch)
6371 (uri (git-reference
6372 (url "https://github.com/biocore/sortmerna.git")
6373 (commit version)))
6374 (file-name (git-file-name name version))
6375 (sha256
6376 (base32
6377 "0j3mbz4n25738yijmjbr5r4fyvkgm8v5vn3sshyfvmyqf5q9byqf"))))
6378 (build-system gnu-build-system)
6379 (outputs '("out" ;for binaries
6380 "db")) ;for sequence databases
6381 (arguments
6382 `(#:phases
6383 (modify-phases %standard-phases
6384 (replace 'install
6385 (lambda* (#:key outputs #:allow-other-keys)
6386 (let* ((out (assoc-ref outputs "out"))
6387 (bin (string-append out "/bin"))
6388 (db (assoc-ref outputs "db"))
6389 (share
6390 (string-append db "/share/sortmerna/rRNA_databases")))
6391 (install-file "sortmerna" bin)
6392 (install-file "indexdb_rna" bin)
6393 (for-each (lambda (file)
6394 (install-file file share))
6395 (find-files "rRNA_databases" ".*fasta"))
6396 #t))))))
6397 (inputs
6398 `(("zlib" ,zlib)))
6399 (home-page "https://bioinfo.lifl.fr/RNA/sortmerna/")
6400 (synopsis "Biological sequence analysis tool for NGS reads")
6401 (description
6402 "SortMeRNA is a biological sequence analysis tool for filtering, mapping
6403 and operational taxonomic unit (OTU) picking of next generation
6404 sequencing (NGS) reads. The core algorithm is based on approximate seeds and
6405 allows for fast and sensitive analyses of nucleotide sequences. The main
6406 application of SortMeRNA is filtering rRNA from metatranscriptomic data.")
6407 ;; The source includes x86 specific code
6408 (supported-systems '("x86_64-linux" "i686-linux"))
6409 (license license:lgpl3)))
6410
6411 (define-public star
6412 (package
6413 (name "star")
6414 (version "2.7.3a")
6415 (source (origin
6416 (method git-fetch)
6417 (uri (git-reference
6418 (url "https://github.com/alexdobin/STAR.git")
6419 (commit version)))
6420 (file-name (git-file-name name version))
6421 (sha256
6422 (base32
6423 "1hgiqw5qhs0pc1xazzihcfd92na02xyq2kb469z04y1v51kpvvjq"))
6424 (modules '((guix build utils)))
6425 (snippet
6426 '(begin
6427 (substitute* "source/Makefile"
6428 (("/bin/rm") "rm"))
6429 ;; Remove pre-built binaries and bundled htslib sources.
6430 (delete-file-recursively "bin/MacOSX_x86_64")
6431 (delete-file-recursively "bin/Linux_x86_64")
6432 (delete-file-recursively "bin/Linux_x86_64_static")
6433 (delete-file-recursively "source/htslib")
6434 #t))))
6435 (build-system gnu-build-system)
6436 (arguments
6437 '(#:tests? #f ;no check target
6438 #:make-flags '("STAR")
6439 #:phases
6440 (modify-phases %standard-phases
6441 (add-after 'unpack 'enter-source-dir
6442 (lambda _ (chdir "source") #t))
6443 (add-after 'enter-source-dir 'make-reproducible
6444 (lambda _
6445 (substitute* "Makefile"
6446 (("(COMPILATION_TIME_PLACE=\")(.*)(\")" _ pre mid post)
6447 (string-append pre "Built with Guix" post)))
6448 #t))
6449 ;; See https://github.com/alexdobin/STAR/pull/562
6450 (add-after 'enter-source-dir 'add-missing-header
6451 (lambda _
6452 (substitute* "SoloReadFeature_inputRecords.cpp"
6453 (("#include \"binarySearch2.h\"" h)
6454 (string-append h "\n#include <math.h>")))
6455 #t))
6456 (add-after 'enter-source-dir 'do-not-use-bundled-htslib
6457 (lambda _
6458 (substitute* "Makefile"
6459 (("(Depend.list: \\$\\(SOURCES\\) parametersDefault\\.xxd) htslib"
6460 _ prefix) prefix))
6461 (substitute* '("BAMfunctions.cpp"
6462 "signalFromBAM.h"
6463 "bam_cat.h"
6464 "bam_cat.c"
6465 "STAR.cpp"
6466 "bamRemoveDuplicates.cpp")
6467 (("#include \"htslib/([^\"]+\\.h)\"" _ header)
6468 (string-append "#include <" header ">")))
6469 (substitute* "IncludeDefine.h"
6470 (("\"htslib/(htslib/[^\"]+.h)\"" _ header)
6471 (string-append "<" header ">")))
6472 #t))
6473 (replace 'install
6474 (lambda* (#:key outputs #:allow-other-keys)
6475 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6476 (install-file "STAR" bin))
6477 #t))
6478 (delete 'configure))))
6479 (native-inputs
6480 `(("xxd" ,xxd)))
6481 (inputs
6482 `(("htslib" ,htslib)
6483 ("zlib" ,zlib)))
6484 (home-page "https://github.com/alexdobin/STAR")
6485 (synopsis "Universal RNA-seq aligner")
6486 (description
6487 "The Spliced Transcripts Alignment to a Reference (STAR) software is
6488 based on a previously undescribed RNA-seq alignment algorithm that uses
6489 sequential maximum mappable seed search in uncompressed suffix arrays followed
6490 by seed clustering and stitching procedure. In addition to unbiased de novo
6491 detection of canonical junctions, STAR can discover non-canonical splices and
6492 chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
6493 sequences.")
6494 ;; Only 64-bit systems are supported according to the README.
6495 (supported-systems '("x86_64-linux" "mips64el-linux"))
6496 ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
6497 (license license:gpl3+)))
6498
6499 (define-public starlong
6500 (package (inherit star)
6501 (name "starlong")
6502 (arguments
6503 (substitute-keyword-arguments (package-arguments star)
6504 ((#:make-flags flags)
6505 `(list "STARlong"))
6506 ((#:phases phases)
6507 `(modify-phases ,phases
6508 ;; Allow extra long sequence reads.
6509 (add-after 'unpack 'make-extra-long
6510 (lambda _
6511 (substitute* "source/IncludeDefine.h"
6512 (("(#define DEF_readNameLengthMax ).*" _ match)
6513 (string-append match "900000\n")))
6514 #t))
6515 (replace 'install
6516 (lambda* (#:key outputs #:allow-other-keys)
6517 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6518 (install-file "STARlong" bin))
6519 #t))))))))
6520
6521 (define-public subread
6522 (package
6523 (name "subread")
6524 (version "1.6.0")
6525 (source (origin
6526 (method url-fetch)
6527 (uri (string-append "mirror://sourceforge/subread/subread-"
6528 version "/subread-" version "-source.tar.gz"))
6529 (sha256
6530 (base32
6531 "0ah0n4jx6ksk2m2j7xk385x2qzmk1y4rfc6a4mfrdqrlq721w99i"))))
6532 (build-system gnu-build-system)
6533 (arguments
6534 `(#:tests? #f ;no "check" target
6535 ;; The CC and CCFLAGS variables are set to contain a lot of x86_64
6536 ;; optimizations by default, so we override these flags such that x86_64
6537 ;; flags are only added when the build target is an x86_64 system.
6538 #:make-flags
6539 (list (let ((system ,(or (%current-target-system)
6540 (%current-system)))
6541 (flags '("-ggdb" "-fomit-frame-pointer"
6542 "-ffast-math" "-funroll-loops"
6543 "-fmessage-length=0"
6544 "-O9" "-Wall" "-DMAKE_FOR_EXON"
6545 "-DMAKE_STANDALONE"
6546 "-DSUBREAD_VERSION=\\\"${SUBREAD_VERSION}\\\""))
6547 (flags64 '("-mmmx" "-msse" "-msse2" "-msse3")))
6548 (if (string-prefix? "x86_64" system)
6549 (string-append "CCFLAGS=" (string-join (append flags flags64)))
6550 (string-append "CCFLAGS=" (string-join flags))))
6551 "-f" "Makefile.Linux"
6552 "CC=gcc ${CCFLAGS}")
6553 #:phases
6554 (modify-phases %standard-phases
6555 (add-after 'unpack 'enter-dir
6556 (lambda _ (chdir "src") #t))
6557 (replace 'install
6558 (lambda* (#:key outputs #:allow-other-keys)
6559 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6560 (mkdir-p bin)
6561 (copy-recursively "../bin" bin))
6562 #t))
6563 ;; no "configure" script
6564 (delete 'configure))))
6565 (inputs `(("zlib" ,zlib)))
6566 (home-page "http://bioinf.wehi.edu.au/subread-package/")
6567 (synopsis "Tool kit for processing next-gen sequencing data")
6568 (description
6569 "The subread package contains the following tools: subread aligner, a
6570 general-purpose read aligner; subjunc aligner: detecting exon-exon junctions
6571 and mapping RNA-seq reads; featureCounts: counting mapped reads for genomic
6572 features; exactSNP: a SNP caller that discovers SNPs by testing signals
6573 against local background noises.")
6574 (license license:gpl3+)))
6575
6576 (define-public stringtie
6577 (package
6578 (name "stringtie")
6579 (version "1.2.1")
6580 (source (origin
6581 (method url-fetch)
6582 (uri (string-append "http://ccb.jhu.edu/software/stringtie/dl/"
6583 "stringtie-" version ".tar.gz"))
6584 (sha256
6585 (base32
6586 "1cqllsc1maq4kh92isi8yadgzbmnf042hlnalpk3y59aph1z3bfz"))
6587 (modules '((guix build utils)))
6588 (snippet
6589 '(begin
6590 (delete-file-recursively "samtools-0.1.18")
6591 #t))))
6592 (build-system gnu-build-system)
6593 (arguments
6594 `(#:tests? #f ;no test suite
6595 #:phases
6596 (modify-phases %standard-phases
6597 ;; no configure script
6598 (delete 'configure)
6599 (add-before 'build 'use-system-samtools
6600 (lambda _
6601 (substitute* "Makefile"
6602 (("stringtie: \\$\\{BAM\\}/libbam\\.a")
6603 "stringtie: "))
6604 (substitute* '("gclib/GBam.h"
6605 "gclib/GBam.cpp")
6606 (("#include \"(bam|sam|kstring).h\"" _ header)
6607 (string-append "#include <samtools/" header ".h>")))
6608 #t))
6609 (add-after 'unpack 'remove-duplicate-typedef
6610 (lambda _
6611 ;; This typedef conflicts with the typedef in
6612 ;; glibc-2.25/include/bits/types.h
6613 (substitute* "gclib/GThreads.h"
6614 (("typedef long long __intmax_t;") ""))
6615 #t))
6616 (replace 'install
6617 (lambda* (#:key outputs #:allow-other-keys)
6618 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6619 (install-file "stringtie" bin)
6620 #t))))))
6621 (inputs
6622 `(("samtools" ,samtools-0.1)
6623 ("zlib" ,zlib)))
6624 (home-page "http://ccb.jhu.edu/software/stringtie/")
6625 (synopsis "Transcript assembly and quantification for RNA-Seq data")
6626 (description
6627 "StringTie is a fast and efficient assembler of RNA-Seq sequence
6628 alignments into potential transcripts. It uses a novel network flow algorithm
6629 as well as an optional de novo assembly step to assemble and quantitate
6630 full-length transcripts representing multiple splice variants for each gene
6631 locus. Its input can include not only the alignments of raw reads used by
6632 other transcript assemblers, but also alignments of longer sequences that have
6633 been assembled from those reads. To identify differentially expressed genes
6634 between experiments, StringTie's output can be processed either by the
6635 Cuffdiff or Ballgown programs.")
6636 (license license:artistic2.0)))
6637
6638 (define-public taxtastic
6639 (package
6640 (name "taxtastic")
6641 (version "0.8.11")
6642 (source (origin
6643 ;; The Pypi version does not include tests.
6644 (method git-fetch)
6645 (uri (git-reference
6646 (url "https://github.com/fhcrc/taxtastic.git")
6647 (commit (string-append "v" version))))
6648 (file-name (git-file-name name version))
6649 (sha256
6650 (base32
6651 "1sv8mkg64jn7zdwf1jj71c16686yrwxk0apb1l8sjszy9p166g0p"))))
6652 (build-system python-build-system)
6653 (arguments
6654 `(#:phases
6655 (modify-phases %standard-phases
6656 (add-after 'unpack 'prepare-directory
6657 (lambda _
6658 ;; The git checkout must be writable for tests.
6659 (for-each make-file-writable (find-files "."))
6660 ;; This test fails, but the error is not caught by the test
6661 ;; framework, so the tests fail...
6662 (substitute* "tests/test_taxit.py"
6663 (("self.cmd_fails\\(''\\)")
6664 "self.cmd_fails('nothing')"))
6665 ;; This version file is expected to be created with git describe.
6666 (mkdir-p "taxtastic/data")
6667 (with-output-to-file "taxtastic/data/ver"
6668 (lambda () (display ,version)))
6669 #t))
6670 (add-after 'unpack 'python37-compatibility
6671 (lambda _
6672 (substitute* "taxtastic/utils.py"
6673 (("import csv") "import csv, errno")
6674 (("os.errno") "errno"))
6675 #t))
6676 (replace 'check
6677 ;; Note, this fails to run with "-v" as it tries to write to a
6678 ;; closed output stream.
6679 (lambda _ (invoke "python" "-m" "unittest") #t)))))
6680 (propagated-inputs
6681 `(("python-sqlalchemy" ,python-sqlalchemy)
6682 ("python-decorator" ,python-decorator)
6683 ("python-biopython" ,python-biopython)
6684 ("python-pandas" ,python-pandas)
6685 ("python-psycopg2" ,python-psycopg2)
6686 ("python-fastalite" ,python-fastalite)
6687 ("python-pyyaml" ,python-pyyaml)
6688 ("python-six" ,python-six)
6689 ("python-jinja2" ,python-jinja2)
6690 ("python-dendropy" ,python-dendropy)))
6691 (home-page "https://github.com/fhcrc/taxtastic")
6692 (synopsis "Tools for taxonomic naming and annotation")
6693 (description
6694 "Taxtastic is software written in python used to build and maintain
6695 reference packages i.e. collections of reference trees, reference alignments,
6696 profiles, and associated taxonomic information.")
6697 (license license:gpl3+)))
6698
6699 (define-public vcftools
6700 (package
6701 (name "vcftools")
6702 (version "0.1.16")
6703 (source (origin
6704 (method url-fetch)
6705 (uri (string-append
6706 "https://github.com/vcftools/vcftools/releases/download/v"
6707 version "/vcftools-" version ".tar.gz"))
6708 (sha256
6709 (base32
6710 "1qqlx7flfv7axrjwkaz6njkscsl1d0jw98ns8d8bh1n1hd1pgz6v"))))
6711 (build-system gnu-build-system)
6712 (arguments
6713 `(#:tests? #f ; no "check" target
6714 #:make-flags (list
6715 "CFLAGS=-O2" ; override "-m64" flag
6716 (string-append "PREFIX=" (assoc-ref %outputs "out"))
6717 (string-append "MANDIR=" (assoc-ref %outputs "out")
6718 "/share/man/man1"))))
6719 (native-inputs
6720 `(("pkg-config" ,pkg-config)))
6721 (inputs
6722 `(("perl" ,perl)
6723 ("zlib" ,zlib)))
6724 (home-page "https://vcftools.github.io/")
6725 (synopsis "Tools for working with VCF files")
6726 (description
6727 "VCFtools is a program package designed for working with VCF files, such
6728 as those generated by the 1000 Genomes Project. The aim of VCFtools is to
6729 provide easily accessible methods for working with complex genetic variation
6730 data in the form of VCF files.")
6731 ;; The license is declared as LGPLv3 in the README and
6732 ;; at https://vcftools.github.io/license.html
6733 (license license:lgpl3)))
6734
6735 (define-public infernal
6736 (package
6737 (name "infernal")
6738 (version "1.1.3")
6739 (source (origin
6740 (method url-fetch)
6741 (uri (string-append "http://eddylab.org/software/infernal/"
6742 "infernal-" version ".tar.gz"))
6743 (sha256
6744 (base32
6745 "0pm8bm3s6nfa0av4x6m6h27lsg12b3lz3jm0fyh1mc77l2isd61v"))))
6746 (build-system gnu-build-system)
6747 (native-inputs
6748 `(("perl" ,perl)
6749 ("python" ,python))) ; for tests
6750 (home-page "http://eddylab.org/infernal/")
6751 (synopsis "Inference of RNA alignments")
6752 (description "Infernal (\"INFERence of RNA ALignment\") is a tool for
6753 searching DNA sequence databases for RNA structure and sequence similarities.
6754 It is an implementation of a special case of profile stochastic context-free
6755 grammars called @dfn{covariance models} (CMs). A CM is like a sequence
6756 profile, but it scores a combination of sequence consensus and RNA secondary
6757 structure consensus, so in many cases, it is more capable of identifying RNA
6758 homologs that conserve their secondary structure more than their primary
6759 sequence.")
6760 ;; Infernal 1.1.3 requires VMX or SSE capability for parallel instructions.
6761 (supported-systems '("i686-linux" "x86_64-linux"))
6762 (license license:bsd-3)))
6763
6764 (define-public r-scde
6765 (package
6766 (name "r-scde")
6767 (version "1.99.2")
6768 (source (origin
6769 (method git-fetch)
6770 (uri (git-reference
6771 (url "https://github.com/hms-dbmi/scde.git")
6772 (commit version)))
6773 (file-name (git-file-name name version))
6774 (sha256
6775 (base32
6776 "10na2gyka24mszdxf92wz9h2c13hdf1ww30c68gfsw53lvvhhhxb"))))
6777 (build-system r-build-system)
6778 (propagated-inputs
6779 `(("r-rcpp" ,r-rcpp)
6780 ("r-rcpparmadillo" ,r-rcpparmadillo)
6781 ("r-mgcv" ,r-mgcv)
6782 ("r-rook" ,r-rook)
6783 ("r-rjson" ,r-rjson)
6784 ("r-cairo" ,r-cairo)
6785 ("r-rcolorbrewer" ,r-rcolorbrewer)
6786 ("r-edger" ,r-edger)
6787 ("r-quantreg" ,r-quantreg)
6788 ("r-nnet" ,r-nnet)
6789 ("r-rmtstat" ,r-rmtstat)
6790 ("r-extremes" ,r-extremes)
6791 ("r-pcamethods" ,r-pcamethods)
6792 ("r-biocparallel" ,r-biocparallel)
6793 ("r-flexmix" ,r-flexmix)))
6794 (home-page "https://hms-dbmi.github.io/scde/")
6795 (synopsis "R package for analyzing single-cell RNA-seq data")
6796 (description "The SCDE package implements a set of statistical methods for
6797 analyzing single-cell RNA-seq data. SCDE fits individual error models for
6798 single-cell RNA-seq measurements. These models can then be used for
6799 assessment of differential expression between groups of cells, as well as
6800 other types of analysis. The SCDE package also contains the pagoda framework
6801 which applies pathway and gene set overdispersion analysis to identify aspects
6802 of transcriptional heterogeneity among single cells.")
6803 ;; See https://github.com/hms-dbmi/scde/issues/38
6804 (license license:gpl2)))
6805
6806 (define-public r-centipede
6807 (package
6808 (name "r-centipede")
6809 (version "1.2")
6810 (source (origin
6811 (method url-fetch)
6812 (uri (string-append "http://download.r-forge.r-project.org/"
6813 "src/contrib/CENTIPEDE_" version ".tar.gz"))
6814 (sha256
6815 (base32
6816 "1hsx6qgwr0i67fhy9257zj7s0ppncph2hjgbia5nn6nfmj0ax6l9"))))
6817 (build-system r-build-system)
6818 (home-page "http://centipede.uchicago.edu/")
6819 (synopsis "Predict transcription factor binding sites")
6820 (description
6821 "CENTIPEDE applies a hierarchical Bayesian mixture model to infer regions
6822 of the genome that are bound by particular transcription factors. It starts
6823 by identifying a set of candidate binding sites, and then aims to classify the
6824 sites according to whether each site is bound or not bound by a transcription
6825 factor. CENTIPEDE is an unsupervised learning algorithm that discriminates
6826 between two different types of motif instances using as much relevant
6827 information as possible.")
6828 (license (list license:gpl2+ license:gpl3+))))
6829
6830 (define-public r-genefilter
6831 (package
6832 (name "r-genefilter")
6833 (version "1.68.0")
6834 (source
6835 (origin
6836 (method url-fetch)
6837 (uri (bioconductor-uri "genefilter" version))
6838 (sha256
6839 (base32
6840 "1xjr02qhldspjwd6y374wgik18fgywb6408wsz471i8b4ik98ckc"))))
6841 (build-system r-build-system)
6842 (native-inputs
6843 `(("gfortran" ,gfortran)))
6844 (propagated-inputs
6845 `(("r-annotate" ,r-annotate)
6846 ("r-annotationdbi" ,r-annotationdbi)
6847 ("r-biobase" ,r-biobase)
6848 ("r-biocgenerics" ,r-biocgenerics)
6849 ("r-survival" ,r-survival)))
6850 (home-page "https://bioconductor.org/packages/genefilter")
6851 (synopsis "Filter genes from high-throughput experiments")
6852 (description
6853 "This package provides basic functions for filtering genes from
6854 high-throughput sequencing experiments.")
6855 (license license:artistic2.0)))
6856
6857 (define-public r-deseq2
6858 (package
6859 (name "r-deseq2")
6860 (version "1.26.0")
6861 (source
6862 (origin
6863 (method url-fetch)
6864 (uri (bioconductor-uri "DESeq2" version))
6865 (sha256
6866 (base32
6867 "1lmbhznfs8dz9ipd53z4ccwvwxqwzx1ayw56jlrvlsambaj8fash"))))
6868 (properties `((upstream-name . "DESeq2")))
6869 (build-system r-build-system)
6870 (propagated-inputs
6871 `(("r-biobase" ,r-biobase)
6872 ("r-biocgenerics" ,r-biocgenerics)
6873 ("r-biocparallel" ,r-biocparallel)
6874 ("r-genefilter" ,r-genefilter)
6875 ("r-geneplotter" ,r-geneplotter)
6876 ("r-genomicranges" ,r-genomicranges)
6877 ("r-ggplot2" ,r-ggplot2)
6878 ("r-hmisc" ,r-hmisc)
6879 ("r-iranges" ,r-iranges)
6880 ("r-locfit" ,r-locfit)
6881 ("r-rcpp" ,r-rcpp)
6882 ("r-rcpparmadillo" ,r-rcpparmadillo)
6883 ("r-s4vectors" ,r-s4vectors)
6884 ("r-summarizedexperiment" ,r-summarizedexperiment)))
6885 (home-page "https://bioconductor.org/packages/DESeq2")
6886 (synopsis "Differential gene expression analysis")
6887 (description
6888 "This package provides functions to estimate variance-mean dependence in
6889 count data from high-throughput nucleotide sequencing assays and test for
6890 differential expression based on a model using the negative binomial
6891 distribution.")
6892 (license license:lgpl3+)))
6893
6894 (define-public r-dexseq
6895 (package
6896 (name "r-dexseq")
6897 (version "1.32.0")
6898 (source
6899 (origin
6900 (method url-fetch)
6901 (uri (bioconductor-uri "DEXSeq" version))
6902 (sha256
6903 (base32
6904 "0sqqb65ckliif2nmvlvc7w49id59z0nvqcdz5gry8l2mn6azrf6a"))))
6905 (properties `((upstream-name . "DEXSeq")))
6906 (build-system r-build-system)
6907 (propagated-inputs
6908 `(("r-annotationdbi" ,r-annotationdbi)
6909 ("r-biobase" ,r-biobase)
6910 ("r-biocgenerics" ,r-biocgenerics)
6911 ("r-biocparallel" ,r-biocparallel)
6912 ("r-biomart" ,r-biomart)
6913 ("r-deseq2" ,r-deseq2)
6914 ("r-genefilter" ,r-genefilter)
6915 ("r-geneplotter" ,r-geneplotter)
6916 ("r-genomicranges" ,r-genomicranges)
6917 ("r-hwriter" ,r-hwriter)
6918 ("r-iranges" ,r-iranges)
6919 ("r-rcolorbrewer" ,r-rcolorbrewer)
6920 ("r-rsamtools" ,r-rsamtools)
6921 ("r-s4vectors" ,r-s4vectors)
6922 ("r-statmod" ,r-statmod)
6923 ("r-stringr" ,r-stringr)
6924 ("r-summarizedexperiment" ,r-summarizedexperiment)))
6925 (home-page "https://bioconductor.org/packages/DEXSeq")
6926 (synopsis "Inference of differential exon usage in RNA-Seq")
6927 (description
6928 "This package is focused on finding differential exon usage using RNA-seq
6929 exon counts between samples with different experimental designs. It provides
6930 functions that allows the user to make the necessary statistical tests based
6931 on a model that uses the negative binomial distribution to estimate the
6932 variance between biological replicates and generalized linear models for
6933 testing. The package also provides functions for the visualization and
6934 exploration of the results.")
6935 (license license:gpl3+)))
6936
6937 (define-public r-annotationforge
6938 (package
6939 (name "r-annotationforge")
6940 (version "1.28.0")
6941 (source
6942 (origin
6943 (method url-fetch)
6944 (uri (bioconductor-uri "AnnotationForge" version))
6945 (sha256
6946 (base32
6947 "0h2r60v339ajk5r6xq4iwwcpihdvf12fi8255byr2dhglzrd8xl7"))))
6948 (properties
6949 `((upstream-name . "AnnotationForge")))
6950 (build-system r-build-system)
6951 (propagated-inputs
6952 `(("r-annotationdbi" ,r-annotationdbi)
6953 ("r-biobase" ,r-biobase)
6954 ("r-biocgenerics" ,r-biocgenerics)
6955 ("r-dbi" ,r-dbi)
6956 ("r-rcurl" ,r-rcurl)
6957 ("r-rsqlite" ,r-rsqlite)
6958 ("r-s4vectors" ,r-s4vectors)
6959 ("r-xml" ,r-xml)))
6960 (home-page "https://bioconductor.org/packages/AnnotationForge")
6961 (synopsis "Code for building annotation database packages")
6962 (description
6963 "This package provides code for generating Annotation packages and their
6964 databases. Packages produced are intended to be used with AnnotationDbi.")
6965 (license license:artistic2.0)))
6966
6967 (define-public r-rbgl
6968 (package
6969 (name "r-rbgl")
6970 (version "1.62.1")
6971 (source
6972 (origin
6973 (method url-fetch)
6974 (uri (bioconductor-uri "RBGL" version))
6975 (sha256
6976 (base32
6977 "0ixbkdirf08i400db587r262j52sdwdgk3884nxk5xkbhx2m6vg0"))))
6978 (properties `((upstream-name . "RBGL")))
6979 (build-system r-build-system)
6980 (propagated-inputs
6981 `(("r-bh" ,r-bh)
6982 ("r-graph" ,r-graph)))
6983 (home-page "https://www.bioconductor.org/packages/RBGL")
6984 (synopsis "Interface to the Boost graph library")
6985 (description
6986 "This package provides a fairly extensive and comprehensive interface to
6987 the graph algorithms contained in the Boost library.")
6988 (license license:artistic2.0)))
6989
6990 (define-public r-gseabase
6991 (package
6992 (name "r-gseabase")
6993 (version "1.48.0")
6994 (source
6995 (origin
6996 (method url-fetch)
6997 (uri (bioconductor-uri "GSEABase" version))
6998 (sha256
6999 (base32
7000 "0kcb90sxlf19d5dxhdbqk9x62svky4262cccl0wxarbq6gf3bd57"))))
7001 (properties `((upstream-name . "GSEABase")))
7002 (build-system r-build-system)
7003 (propagated-inputs
7004 `(("r-annotate" ,r-annotate)
7005 ("r-annotationdbi" ,r-annotationdbi)
7006 ("r-biobase" ,r-biobase)
7007 ("r-biocgenerics" ,r-biocgenerics)
7008 ("r-graph" ,r-graph)
7009 ("r-xml" ,r-xml)))
7010 (home-page "https://bioconductor.org/packages/GSEABase")
7011 (synopsis "Gene set enrichment data structures and methods")
7012 (description
7013 "This package provides classes and methods to support @dfn{Gene Set
7014 Enrichment Analysis} (GSEA).")
7015 (license license:artistic2.0)))
7016
7017 (define-public r-category
7018 (package
7019 (name "r-category")
7020 (version "2.52.1")
7021 (source
7022 (origin
7023 (method url-fetch)
7024 (uri (bioconductor-uri "Category" version))
7025 (sha256
7026 (base32
7027 "0ga0ij9hyzsxa5pavwmv35i8xggia2wygrk4m4z8an0qcvgy3v5g"))))
7028 (properties `((upstream-name . "Category")))
7029 (build-system r-build-system)
7030 (propagated-inputs
7031 `(("r-annotate" ,r-annotate)
7032 ("r-annotationdbi" ,r-annotationdbi)
7033 ("r-biobase" ,r-biobase)
7034 ("r-biocgenerics" ,r-biocgenerics)
7035 ("r-genefilter" ,r-genefilter)
7036 ("r-graph" ,r-graph)
7037 ("r-gseabase" ,r-gseabase)
7038 ("r-matrix" ,r-matrix)
7039 ("r-rbgl" ,r-rbgl)
7040 ("r-dbi" ,r-dbi)))
7041 (home-page "https://bioconductor.org/packages/Category")
7042 (synopsis "Category analysis")
7043 (description
7044 "This package provides a collection of tools for performing category
7045 analysis.")
7046 (license license:artistic2.0)))
7047
7048 (define-public r-gostats
7049 (package
7050 (name "r-gostats")
7051 (version "2.52.0")
7052 (source
7053 (origin
7054 (method url-fetch)
7055 (uri (bioconductor-uri "GOstats" version))
7056 (sha256
7057 (base32
7058 "19f4gxm3sbprqrnwzvskvywv6j4ibm8xkrbgg4h0fvh2b5331nwc"))))
7059 (properties `((upstream-name . "GOstats")))
7060 (build-system r-build-system)
7061 (propagated-inputs
7062 `(("r-annotate" ,r-annotate)
7063 ("r-annotationdbi" ,r-annotationdbi)
7064 ("r-annotationforge" ,r-annotationforge)
7065 ("r-biobase" ,r-biobase)
7066 ("r-category" ,r-category)
7067 ("r-go-db" ,r-go-db)
7068 ("r-graph" ,r-graph)
7069 ("r-rgraphviz" ,r-rgraphviz)
7070 ("r-rbgl" ,r-rbgl)))
7071 (home-page "https://bioconductor.org/packages/GOstats")
7072 (synopsis "Tools for manipulating GO and microarrays")
7073 (description
7074 "This package provides a set of tools for interacting with GO and
7075 microarray data. A variety of basic manipulation tools for graphs, hypothesis
7076 testing and other simple calculations.")
7077 (license license:artistic2.0)))
7078
7079 (define-public r-shortread
7080 (package
7081 (name "r-shortread")
7082 (version "1.44.3")
7083 (source
7084 (origin
7085 (method url-fetch)
7086 (uri (bioconductor-uri "ShortRead" version))
7087 (sha256
7088 (base32
7089 "0ykyrj4g6vc67d5s46sp4659qvar2iavflzhggm79w4p50hxia4s"))))
7090 (properties `((upstream-name . "ShortRead")))
7091 (build-system r-build-system)
7092 (inputs
7093 `(("zlib" ,zlib)))
7094 (propagated-inputs
7095 `(("r-biobase" ,r-biobase)
7096 ("r-biocgenerics" ,r-biocgenerics)
7097 ("r-biocparallel" ,r-biocparallel)
7098 ("r-biostrings" ,r-biostrings)
7099 ("r-genomeinfodb" ,r-genomeinfodb)
7100 ("r-genomicalignments" ,r-genomicalignments)
7101 ("r-genomicranges" ,r-genomicranges)
7102 ("r-hwriter" ,r-hwriter)
7103 ("r-iranges" ,r-iranges)
7104 ("r-lattice" ,r-lattice)
7105 ("r-latticeextra" ,r-latticeextra)
7106 ("r-rsamtools" ,r-rsamtools)
7107 ("r-s4vectors" ,r-s4vectors)
7108 ("r-xvector" ,r-xvector)
7109 ("r-zlibbioc" ,r-zlibbioc)))
7110 (home-page "https://bioconductor.org/packages/ShortRead")
7111 (synopsis "FASTQ input and manipulation tools")
7112 (description
7113 "This package implements sampling, iteration, and input of FASTQ files.
7114 It includes functions for filtering and trimming reads, and for generating a
7115 quality assessment report. Data are represented as
7116 @code{DNAStringSet}-derived objects, and easily manipulated for a diversity of
7117 purposes. The package also contains legacy support for early single-end,
7118 ungapped alignment formats.")
7119 (license license:artistic2.0)))
7120
7121 (define-public r-systempiper
7122 (package
7123 (name "r-systempiper")
7124 (version "1.20.0")
7125 (source
7126 (origin
7127 (method url-fetch)
7128 (uri (bioconductor-uri "systemPipeR" version))
7129 (sha256
7130 (base32
7131 "17r25v9wcglyma2v1c8fka80dm7fx86saxlsb2aprmwi4h3dhb0j"))))
7132 (properties `((upstream-name . "systemPipeR")))
7133 (build-system r-build-system)
7134 (propagated-inputs
7135 `(("r-annotate" ,r-annotate)
7136 ("r-batchtools" ,r-batchtools)
7137 ("r-biocgenerics" ,r-biocgenerics)
7138 ("r-biostrings" ,r-biostrings)
7139 ("r-deseq2" ,r-deseq2)
7140 ("r-edger" ,r-edger)
7141 ("r-genomicfeatures" ,r-genomicfeatures)
7142 ("r-genomicranges" ,r-genomicranges)
7143 ("r-ggplot2" ,r-ggplot2)
7144 ("r-go-db" ,r-go-db)
7145 ("r-gostats" ,r-gostats)
7146 ("r-limma" ,r-limma)
7147 ("r-pheatmap" ,r-pheatmap)
7148 ("r-rjson" ,r-rjson)
7149 ("r-rsamtools" ,r-rsamtools)
7150 ("r-shortread" ,r-shortread)
7151 ("r-summarizedexperiment" ,r-summarizedexperiment)
7152 ("r-yaml" ,r-yaml)
7153 ("r-variantannotation" ,r-variantannotation)))
7154 (home-page "https://github.com/tgirke/systemPipeR")
7155 (synopsis "Next generation sequencing workflow and reporting environment")
7156 (description
7157 "This R package provides tools for building and running automated
7158 end-to-end analysis workflows for a wide range of @dfn{next generation
7159 sequence} (NGS) applications such as RNA-Seq, ChIP-Seq, VAR-Seq and Ribo-Seq.
7160 Important features include a uniform workflow interface across different NGS
7161 applications, automated report generation, and support for running both R and
7162 command-line software, such as NGS aligners or peak/variant callers, on local
7163 computers or compute clusters. Efficient handling of complex sample sets and
7164 experimental designs is facilitated by a consistently implemented sample
7165 annotation infrastructure.")
7166 (license license:artistic2.0)))
7167
7168 (define-public r-grohmm
7169 (package
7170 (name "r-grohmm")
7171 (version "1.20.0")
7172 (source
7173 (origin
7174 (method url-fetch)
7175 (uri (bioconductor-uri "groHMM" version))
7176 (sha256
7177 (base32
7178 "0ywr8f6bfhg2ia3n4rmsxr4v1xqmlyhkwh0rvkgh3yqgnbvihndg"))))
7179 (properties `((upstream-name . "groHMM")))
7180 (build-system r-build-system)
7181 (propagated-inputs
7182 `(("r-genomeinfodb" ,r-genomeinfodb)
7183 ("r-genomicalignments" ,r-genomicalignments)
7184 ("r-genomicranges" ,r-genomicranges)
7185 ("r-iranges" ,r-iranges)
7186 ("r-mass" ,r-mass)
7187 ("r-rtracklayer" ,r-rtracklayer)
7188 ("r-s4vectors" ,r-s4vectors)))
7189 (home-page "https://github.com/Kraus-Lab/groHMM")
7190 (synopsis "GRO-seq analysis pipeline")
7191 (description
7192 "This package provides a pipeline for the analysis of GRO-seq data.")
7193 (license license:gpl3+)))
7194
7195 (define-public vsearch
7196 (package
7197 (name "vsearch")
7198 (version "2.9.1")
7199 (source
7200 (origin
7201 (method git-fetch)
7202 (uri (git-reference
7203 (url "https://github.com/torognes/vsearch.git")
7204 (commit (string-append "v" version))))
7205 (file-name (git-file-name name version))
7206 (sha256
7207 (base32
7208 "0vhrpjfdf75ba04b24xknp41790cvcgwl0vgpy7qbzj5xh2521ss"))
7209 (patches (search-patches "vsearch-unbundle-cityhash.patch"))
7210 (snippet
7211 '(begin
7212 ;; Remove bundled cityhash sources. The vsearch source is adjusted
7213 ;; for this in the patch.
7214 (delete-file "src/city.h")
7215 (delete-file "src/citycrc.h")
7216 (delete-file "src/city.cc")
7217 #t))))
7218 (build-system gnu-build-system)
7219 (inputs
7220 `(("zlib" ,zlib)
7221 ("bzip2" ,bzip2)
7222 ("cityhash" ,cityhash)))
7223 (native-inputs
7224 `(("autoconf" ,autoconf)
7225 ("automake" ,automake)))
7226 (synopsis "Sequence search tools for metagenomics")
7227 (description
7228 "VSEARCH supports DNA sequence searching, clustering, chimera detection,
7229 dereplication, pairwise alignment, shuffling, subsampling, sorting and
7230 masking. The tool takes advantage of parallelism in the form of SIMD
7231 vectorization as well as multiple threads to perform accurate alignments at
7232 high speed. VSEARCH uses an optimal global aligner (full dynamic programming
7233 Needleman-Wunsch).")
7234 (home-page "https://github.com/torognes/vsearch")
7235 ;; vsearch uses non-portable SSE intrinsics so building fails on other
7236 ;; platforms.
7237 (supported-systems '("x86_64-linux"))
7238 ;; Dual licensed; also includes public domain source.
7239 (license (list license:gpl3 license:bsd-2))))
7240
7241 (define-public pardre
7242 (package
7243 (name "pardre")
7244 ;; The source of 1.1.5 changed in place, so we append "-1" to the version.
7245 (version "1.1.5-1")
7246 (source
7247 (origin
7248 (method url-fetch)
7249 (uri (string-append "mirror://sourceforge/pardre/ParDRe-rel"
7250 "1.1.5" ".tar.gz"))
7251 (sha256
7252 (base32
7253 "17j73nc0viq4f6qj50nrndsrif5d6b71q8fl87m54psiv0ilns2b"))))
7254 (build-system gnu-build-system)
7255 (arguments
7256 `(#:tests? #f ; no tests included
7257 #:phases
7258 (modify-phases %standard-phases
7259 (delete 'configure)
7260 (replace 'install
7261 (lambda* (#:key outputs #:allow-other-keys)
7262 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
7263 (install-file "ParDRe" bin)
7264 #t))))))
7265 (inputs
7266 `(("openmpi" ,openmpi)
7267 ("zlib" ,zlib)))
7268 (synopsis "Parallel tool to remove duplicate DNA reads")
7269 (description
7270 "ParDRe is a parallel tool to remove duplicate genetic sequence reads.
7271 Duplicate reads can be seen as identical or nearly identical sequences with
7272 some mismatches. This tool lets users avoid the analysis of unnecessary
7273 reads, reducing the time of subsequent procedures with the
7274 dataset (e.g. assemblies, mappings, etc.). The tool is implemented with MPI
7275 in order to exploit the parallel capabilities of multicore clusters. It is
7276 faster than multithreaded counterparts (end of 2015) for the same number of
7277 cores and, thanks to the message-passing technology, it can be executed on
7278 clusters.")
7279 (home-page "https://sourceforge.net/projects/pardre/")
7280 (license license:gpl3+)))
7281
7282 (define-public ruby-bio-kseq
7283 (package
7284 (name "ruby-bio-kseq")
7285 (version "0.0.2")
7286 (source
7287 (origin
7288 (method url-fetch)
7289 (uri (rubygems-uri "bio-kseq" version))
7290 (sha256
7291 (base32
7292 "1xyaha46khb5jc6wzkbf7040jagac49jbimn0vcrzid0j8jdikrz"))))
7293 (build-system ruby-build-system)
7294 (arguments
7295 `(#:test-target "spec"))
7296 (native-inputs
7297 `(("bundler" ,bundler)
7298 ("ruby-rspec" ,ruby-rspec)
7299 ("ruby-rake-compiler" ,ruby-rake-compiler)))
7300 (inputs
7301 `(("zlib" ,zlib)))
7302 (synopsis "Ruby bindings for the kseq.h FASTA/Q parser")
7303 (description
7304 "@code{Bio::Kseq} provides ruby bindings to the @code{kseq.h} FASTA and
7305 FASTQ parsing code. It provides a fast iterator over sequences and their
7306 quality scores.")
7307 (home-page "https://github.com/gusevfe/bio-kseq")
7308 (license license:expat)))
7309
7310 (define-public bio-locus
7311 (package
7312 (name "bio-locus")
7313 (version "0.0.7")
7314 (source
7315 (origin
7316 (method url-fetch)
7317 (uri (rubygems-uri "bio-locus" version))
7318 (sha256
7319 (base32
7320 "02vmrxyimkj9sahsp4zhfhnmbvz6dbbqz1y01vglf8cbwvkajfl0"))))
7321 (build-system ruby-build-system)
7322 (native-inputs
7323 `(("ruby-rspec" ,ruby-rspec)))
7324 (synopsis "Tool for fast querying of genome locations")
7325 (description
7326 "Bio-locus is a tabix-like tool for fast querying of genome
7327 locations. Many file formats in bioinformatics contain records that
7328 start with a chromosome name and a position for a SNP, or a start-end
7329 position for indels. Bio-locus allows users to store this chr+pos or
7330 chr+pos+alt information in a database.")
7331 (home-page "https://github.com/pjotrp/bio-locus")
7332 (license license:expat)))
7333
7334 (define-public bio-blastxmlparser
7335 (package
7336 (name "bio-blastxmlparser")
7337 (version "2.0.4")
7338 (source (origin
7339 (method url-fetch)
7340 (uri (rubygems-uri "bio-blastxmlparser" version))
7341 (sha256
7342 (base32
7343 "1wf4qygcmdjgcqm6flmvsagfr1gs9lf63mj32qv3z1f481zc5692"))))
7344 (build-system ruby-build-system)
7345 (propagated-inputs
7346 `(("ruby-bio-logger" ,ruby-bio-logger)
7347 ("ruby-nokogiri" ,ruby-nokogiri)))
7348 (inputs
7349 `(("ruby-rspec" ,ruby-rspec)))
7350 (synopsis "Fast big data BLAST XML parser and library")
7351 (description
7352 "Very fast parallel big-data BLAST XML file parser which can be used as
7353 command line utility. Use blastxmlparser to: Parse BLAST XML; filter output;
7354 generate FASTA, JSON, YAML, RDF, JSON-LD, HTML, CSV, tabular output etc.")
7355 (home-page "https://github.com/pjotrp/blastxmlparser")
7356 (license license:expat)))
7357
7358 (define-public bioruby
7359 (package
7360 (name "bioruby")
7361 (version "1.5.2")
7362 (source
7363 (origin
7364 (method url-fetch)
7365 (uri (rubygems-uri "bio" version))
7366 (sha256
7367 (base32
7368 "1d56amdsjv1mag7m6gv2w0xij8hqx1v5xbdjsix8sp3yp36m7938"))))
7369 (build-system ruby-build-system)
7370 (propagated-inputs
7371 `(("ruby-libxml" ,ruby-libxml)))
7372 (native-inputs
7373 `(("which" ,which))) ; required for test phase
7374 (arguments
7375 `(#:phases
7376 (modify-phases %standard-phases
7377 (add-before 'build 'patch-test-command
7378 (lambda _
7379 (substitute* '("test/functional/bio/test_command.rb")
7380 (("/bin/sh") (which "sh")))
7381 (substitute* '("test/functional/bio/test_command.rb")
7382 (("/bin/ls") (which "ls")))
7383 (substitute* '("test/functional/bio/test_command.rb")
7384 (("which") (which "which")))
7385 (substitute* '("test/functional/bio/test_command.rb",
7386 "test/data/command/echoarg2.sh")
7387 (("/bin/echo") (which "echo")))
7388 #t)))))
7389 (synopsis "Ruby library, shell and utilities for bioinformatics")
7390 (description "BioRuby comes with a comprehensive set of Ruby development
7391 tools and libraries for bioinformatics and molecular biology. BioRuby has
7392 components for sequence analysis, pathway analysis, protein modelling and
7393 phylogenetic analysis; it supports many widely used data formats and provides
7394 easy access to databases, external programs and public web services, including
7395 BLAST, KEGG, GenBank, MEDLINE and GO.")
7396 (home-page "http://bioruby.org/")
7397 ;; Code is released under Ruby license, except for setup
7398 ;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+)
7399 (license (list license:ruby license:lgpl2.1+ license:gpl2+ ))))
7400
7401 (define-public r-biocviews
7402 (package
7403 (name "r-biocviews")
7404 (version "1.54.0")
7405 (source (origin
7406 (method url-fetch)
7407 (uri (bioconductor-uri "biocViews" version))
7408 (sha256
7409 (base32
7410 "0yn8jys1900d31haayz0ppqk5y79mwjajwp4alz6pln3dbs70f3g"))))
7411 (properties
7412 `((upstream-name . "biocViews")))
7413 (build-system r-build-system)
7414 (propagated-inputs
7415 `(("r-biobase" ,r-biobase)
7416 ("r-biocmanager" ,r-biocmanager)
7417 ("r-graph" ,r-graph)
7418 ("r-rbgl" ,r-rbgl)
7419 ("r-rcurl" ,r-rcurl)
7420 ("r-xml" ,r-xml)
7421 ("r-runit" ,r-runit)))
7422 (home-page "https://bioconductor.org/packages/biocViews")
7423 (synopsis "Bioconductor package categorization helper")
7424 (description "The purpose of biocViews is to create HTML pages that
7425 categorize packages in a Bioconductor package repository according to keywords,
7426 also known as views, in a controlled vocabulary.")
7427 (license license:artistic2.0)))
7428
7429 (define-public r-biocstyle
7430 (package
7431 (name "r-biocstyle")
7432 (version "2.14.4")
7433 (source (origin
7434 (method url-fetch)
7435 (uri (bioconductor-uri "BiocStyle" version))
7436 (sha256
7437 (base32
7438 "1x71in059zql40f4c87bd1gf96r945kdvwbq61jmch9d3d8nwxbb"))))
7439 (properties
7440 `((upstream-name . "BiocStyle")))
7441 (build-system r-build-system)
7442 (propagated-inputs
7443 `(("r-biocmanager" ,r-biocmanager)
7444 ("r-bookdown" ,r-bookdown)
7445 ("r-knitr" ,r-knitr)
7446 ("r-rmarkdown" ,r-rmarkdown)
7447 ("r-yaml" ,r-yaml)))
7448 (home-page "https://bioconductor.org/packages/BiocStyle")
7449 (synopsis "Bioconductor formatting styles")
7450 (description "This package provides standard formatting styles for
7451 Bioconductor PDF and HTML documents. Package vignettes illustrate use and
7452 functionality.")
7453 (license license:artistic2.0)))
7454
7455 (define-public r-bioccheck
7456 (package
7457 (name "r-bioccheck")
7458 (version "1.22.0")
7459 (source (origin
7460 (method url-fetch)
7461 (uri (bioconductor-uri "BiocCheck" version))
7462 (sha256
7463 (base32
7464 "1qnvl5yajgh67ijkq6gdsafri1k5hyw5gzn2ccqk9ymx6i2xd80g"))))
7465 (properties
7466 `((upstream-name . "BiocCheck")))
7467 (build-system r-build-system)
7468 (arguments
7469 '(#:phases
7470 (modify-phases %standard-phases
7471 ;; This package can be used by calling BiocCheck(<package>) from
7472 ;; within R, or by running R CMD BiocCheck <package>. This phase
7473 ;; makes sure the latter works. For this to work, the BiocCheck
7474 ;; script must be somewhere on the PATH (not the R bin directory).
7475 (add-after 'install 'install-bioccheck-subcommand
7476 (lambda* (#:key outputs #:allow-other-keys)
7477 (let* ((out (assoc-ref outputs "out"))
7478 (dest-dir (string-append out "/bin"))
7479 (script-dir
7480 (string-append out "/site-library/BiocCheck/script/")))
7481 (mkdir-p dest-dir)
7482 (symlink (string-append script-dir "/checkBadDeps.R")
7483 (string-append dest-dir "/checkBadDeps.R"))
7484 (symlink (string-append script-dir "/BiocCheck")
7485 (string-append dest-dir "/BiocCheck")))
7486 #t)))))
7487 (propagated-inputs
7488 `(("r-codetools" ,r-codetools)
7489 ("r-graph" ,r-graph)
7490 ("r-httr" ,r-httr)
7491 ("r-knitr" ,r-knitr)
7492 ("r-optparse" ,r-optparse)
7493 ("r-biocmanager" ,r-biocmanager)
7494 ("r-biocviews" ,r-biocviews)
7495 ("r-stringdist" ,r-stringdist)))
7496 (home-page "https://bioconductor.org/packages/BiocCheck")
7497 (synopsis "Executes Bioconductor-specific package checks")
7498 (description "This package contains tools to perform additional quality
7499 checks on R packages that are to be submitted to the Bioconductor repository.")
7500 (license license:artistic2.0)))
7501
7502 (define-public r-s4vectors
7503 (package
7504 (name "r-s4vectors")
7505 (version "0.24.4")
7506 (source (origin
7507 (method url-fetch)
7508 (uri (bioconductor-uri "S4Vectors" version))
7509 (sha256
7510 (base32
7511 "1fzs8j2d3wwfzm2fq63ywf68a4dbggyl5l098f148yn4jw7jd3bc"))))
7512 (properties
7513 `((upstream-name . "S4Vectors")))
7514 (build-system r-build-system)
7515 (propagated-inputs
7516 `(("r-biocgenerics" ,r-biocgenerics)))
7517 (home-page "https://bioconductor.org/packages/S4Vectors")
7518 (synopsis "S4 implementation of vectors and lists")
7519 (description
7520 "The S4Vectors package defines the @code{Vector} and @code{List} virtual
7521 classes and a set of generic functions that extend the semantic of ordinary
7522 vectors and lists in R. Package developers can easily implement vector-like
7523 or list-like objects as concrete subclasses of @code{Vector} or @code{List}.
7524 In addition, a few low-level concrete subclasses of general interest (e.g.
7525 @code{DataFrame}, @code{Rle}, and @code{Hits}) are implemented in the
7526 S4Vectors package itself.")
7527 (license license:artistic2.0)))
7528
7529 (define-public r-iranges
7530 (package
7531 (name "r-iranges")
7532 (version "2.20.2")
7533 (source (origin
7534 (method url-fetch)
7535 (uri (bioconductor-uri "IRanges" version))
7536 (sha256
7537 (base32
7538 "1jhnxb9yacmj2z82b6992gihjvj1a0gnjwbjiagyyx03fqnv23kg"))))
7539 (properties
7540 `((upstream-name . "IRanges")))
7541 (build-system r-build-system)
7542 (propagated-inputs
7543 `(("r-biocgenerics" ,r-biocgenerics)
7544 ("r-s4vectors" ,r-s4vectors)))
7545 (home-page "https://bioconductor.org/packages/IRanges")
7546 (synopsis "Infrastructure for manipulating intervals on sequences")
7547 (description
7548 "This package provides efficient low-level and highly reusable S4 classes
7549 for storing ranges of integers, RLE vectors (Run-Length Encoding), and, more
7550 generally, data that can be organized sequentially (formally defined as
7551 @code{Vector} objects), as well as views on these @code{Vector} objects.
7552 Efficient list-like classes are also provided for storing big collections of
7553 instances of the basic classes. All classes in the package use consistent
7554 naming and share the same rich and consistent \"Vector API\" as much as
7555 possible.")
7556 (license license:artistic2.0)))
7557
7558 (define-public r-genomeinfodbdata
7559 (package
7560 (name "r-genomeinfodbdata")
7561 (version "1.2.0")
7562 (source (origin
7563 (method url-fetch)
7564 ;; We cannot use bioconductor-uri here because this tarball is
7565 ;; located under "data/annotation/" instead of "bioc/".
7566 (uri (string-append "https://bioconductor.org/packages/release/"
7567 "data/annotation/src/contrib/GenomeInfoDbData_"
7568 version ".tar.gz"))
7569 (sha256
7570 (base32
7571 "0di6nlqpsyqf693k2na65ayqldih563x3zfrczpqc5q2hl5kg35c"))))
7572 (properties
7573 `((upstream-name . "GenomeInfoDbData")))
7574 (build-system r-build-system)
7575 (home-page "https://bioconductor.org/packages/GenomeInfoDbData")
7576 (synopsis "Species and taxonomy ID look up tables for GenomeInfoDb")
7577 (description "This package contains data for mapping between NCBI taxonomy
7578 ID and species. It is used by functions in the GenomeInfoDb package.")
7579 (license license:artistic2.0)))
7580
7581 (define-public r-genomeinfodb
7582 (package
7583 (name "r-genomeinfodb")
7584 (version "1.22.1")
7585 (source (origin
7586 (method url-fetch)
7587 (uri (bioconductor-uri "GenomeInfoDb" version))
7588 (sha256
7589 (base32
7590 "0phadr67yb4l25x41a9wg4pjy1wbxlk14jhidhz6g5n4z6x45qbm"))))
7591 (properties
7592 `((upstream-name . "GenomeInfoDb")))
7593 (build-system r-build-system)
7594 (propagated-inputs
7595 `(("r-biocgenerics" ,r-biocgenerics)
7596 ("r-genomeinfodbdata" ,r-genomeinfodbdata)
7597 ("r-iranges" ,r-iranges)
7598 ("r-rcurl" ,r-rcurl)
7599 ("r-s4vectors" ,r-s4vectors)))
7600 (native-inputs
7601 `(("r-knitr" ,r-knitr)))
7602 (home-page "https://bioconductor.org/packages/GenomeInfoDb")
7603 (synopsis "Utilities for manipulating chromosome identifiers")
7604 (description
7605 "This package contains data and functions that define and allow
7606 translation between different chromosome sequence naming conventions (e.g.,
7607 \"chr1\" versus \"1\"), including a function that attempts to place sequence
7608 names in their natural, rather than lexicographic, order.")
7609 (license license:artistic2.0)))
7610
7611 (define-public r-edger
7612 (package
7613 (name "r-edger")
7614 (version "3.28.1")
7615 (source (origin
7616 (method url-fetch)
7617 (uri (bioconductor-uri "edgeR" version))
7618 (sha256
7619 (base32
7620 "07dv99kl7jfa62nzcq705r56gcpb1hq7p4px48j71y2ddi1rqmr6"))))
7621 (properties `((upstream-name . "edgeR")))
7622 (build-system r-build-system)
7623 (propagated-inputs
7624 `(("r-limma" ,r-limma)
7625 ("r-locfit" ,r-locfit)
7626 ("r-rcpp" ,r-rcpp)
7627 ("r-statmod" ,r-statmod))) ;for estimateDisp
7628 (home-page "http://bioinf.wehi.edu.au/edgeR")
7629 (synopsis "EdgeR does empirical analysis of digital gene expression data")
7630 (description "This package can do differential expression analysis of
7631 RNA-seq expression profiles with biological replication. It implements a range
7632 of statistical methodology based on the negative binomial distributions,
7633 including empirical Bayes estimation, exact tests, generalized linear models
7634 and quasi-likelihood tests. It be applied to differential signal analysis of
7635 other types of genomic data that produce counts, including ChIP-seq, SAGE and
7636 CAGE.")
7637 (license license:gpl2+)))
7638
7639 (define-public r-variantannotation
7640 (package
7641 (name "r-variantannotation")
7642 (version "1.32.0")
7643 (source (origin
7644 (method url-fetch)
7645 (uri (bioconductor-uri "VariantAnnotation" version))
7646 (sha256
7647 (base32
7648 "009s7rzp78s2w6iybizina42qx2w8qv3xwjbkpqphmm451maykgs"))))
7649 (properties
7650 `((upstream-name . "VariantAnnotation")))
7651 (inputs
7652 `(("zlib" ,zlib)))
7653 (propagated-inputs
7654 `(("r-annotationdbi" ,r-annotationdbi)
7655 ("r-biobase" ,r-biobase)
7656 ("r-biocgenerics" ,r-biocgenerics)
7657 ("r-biostrings" ,r-biostrings)
7658 ("r-bsgenome" ,r-bsgenome)
7659 ("r-dbi" ,r-dbi)
7660 ("r-genomeinfodb" ,r-genomeinfodb)
7661 ("r-genomicfeatures" ,r-genomicfeatures)
7662 ("r-genomicranges" ,r-genomicranges)
7663 ("r-iranges" ,r-iranges)
7664 ("r-summarizedexperiment" ,r-summarizedexperiment)
7665 ("r-rhtslib" ,r-rhtslib)
7666 ("r-rsamtools" ,r-rsamtools)
7667 ("r-rtracklayer" ,r-rtracklayer)
7668 ("r-s4vectors" ,r-s4vectors)
7669 ("r-xvector" ,r-xvector)
7670 ("r-zlibbioc" ,r-zlibbioc)))
7671 (build-system r-build-system)
7672 (home-page "https://bioconductor.org/packages/VariantAnnotation")
7673 (synopsis "Package for annotation of genetic variants")
7674 (description "This R package can annotate variants, compute amino acid
7675 coding changes and predict coding outcomes.")
7676 (license license:artistic2.0)))
7677
7678 (define-public r-limma
7679 (package
7680 (name "r-limma")
7681 (version "3.42.2")
7682 (source (origin
7683 (method url-fetch)
7684 (uri (bioconductor-uri "limma" version))
7685 (sha256
7686 (base32
7687 "1nd01r7rd7jb5qz84vbgfnyrmgm9wiq7fsdji68537kjgvrzmm9z"))))
7688 (build-system r-build-system)
7689 (home-page "http://bioinf.wehi.edu.au/limma")
7690 (synopsis "Package for linear models for microarray and RNA-seq data")
7691 (description "This package can be used for the analysis of gene expression
7692 studies, especially the use of linear models for analysing designed experiments
7693 and the assessment of differential expression. The analysis methods apply to
7694 different technologies, including microarrays, RNA-seq, and quantitative PCR.")
7695 (license license:gpl2+)))
7696
7697 (define-public r-xvector
7698 (package
7699 (name "r-xvector")
7700 (version "0.26.0")
7701 (source (origin
7702 (method url-fetch)
7703 (uri (bioconductor-uri "XVector" version))
7704 (sha256
7705 (base32
7706 "0s2gg84yzl5ffkzp6n7kh0jjk1vd90z189f5hkfn18rn67cy2nv7"))))
7707 (properties
7708 `((upstream-name . "XVector")))
7709 (build-system r-build-system)
7710 (arguments
7711 `(#:phases
7712 (modify-phases %standard-phases
7713 (add-after 'unpack 'use-system-zlib
7714 (lambda _
7715 (substitute* "DESCRIPTION"
7716 (("zlibbioc, ") ""))
7717 (substitute* "NAMESPACE"
7718 (("import\\(zlibbioc\\)") ""))
7719 #t)))))
7720 (inputs
7721 `(("zlib" ,zlib)))
7722 (propagated-inputs
7723 `(("r-biocgenerics" ,r-biocgenerics)
7724 ("r-iranges" ,r-iranges)
7725 ("r-s4vectors" ,r-s4vectors)))
7726 (home-page "https://bioconductor.org/packages/XVector")
7727 (synopsis "Representation and manpulation of external sequences")
7728 (description
7729 "This package provides memory efficient S4 classes for storing sequences
7730 \"externally\" (behind an R external pointer, or on disk).")
7731 (license license:artistic2.0)))
7732
7733 (define-public r-genomicranges
7734 (package
7735 (name "r-genomicranges")
7736 (version "1.38.0")
7737 (source (origin
7738 (method url-fetch)
7739 (uri (bioconductor-uri "GenomicRanges" version))
7740 (sha256
7741 (base32
7742 "0xdds6ws7jjdfy4x3mb0qhy43kpxdmifmpw0jdk4wgw999zabb50"))))
7743 (properties
7744 `((upstream-name . "GenomicRanges")))
7745 (build-system r-build-system)
7746 (propagated-inputs
7747 `(("r-biocgenerics" ,r-biocgenerics)
7748 ("r-genomeinfodb" ,r-genomeinfodb)
7749 ("r-iranges" ,r-iranges)
7750 ("r-s4vectors" ,r-s4vectors)
7751 ("r-xvector" ,r-xvector)))
7752 (home-page "https://bioconductor.org/packages/GenomicRanges")
7753 (synopsis "Representation and manipulation of genomic intervals")
7754 (description
7755 "This package provides tools to efficiently represent and manipulate
7756 genomic annotations and alignments is playing a central role when it comes to
7757 analyzing high-throughput sequencing data (a.k.a. NGS data). The
7758 GenomicRanges package defines general purpose containers for storing and
7759 manipulating genomic intervals and variables defined along a genome.")
7760 (license license:artistic2.0)))
7761
7762 (define-public r-biobase
7763 (package
7764 (name "r-biobase")
7765 (version "2.46.0")
7766 (source (origin
7767 (method url-fetch)
7768 (uri (bioconductor-uri "Biobase" version))
7769 (sha256
7770 (base32
7771 "1gx41083dqlm59vwqdxvc4ny31x91j48mda9n3scg0f2zwasvqgl"))))
7772 (properties
7773 `((upstream-name . "Biobase")))
7774 (build-system r-build-system)
7775 (propagated-inputs
7776 `(("r-biocgenerics" ,r-biocgenerics)))
7777 (home-page "https://bioconductor.org/packages/Biobase")
7778 (synopsis "Base functions for Bioconductor")
7779 (description
7780 "This package provides functions that are needed by many other packages
7781 on Bioconductor or which replace R functions.")
7782 (license license:artistic2.0)))
7783
7784 (define-public r-annotationdbi
7785 (package
7786 (name "r-annotationdbi")
7787 (version "1.48.0")
7788 (source (origin
7789 (method url-fetch)
7790 (uri (bioconductor-uri "AnnotationDbi" version))
7791 (sha256
7792 (base32
7793 "09piz1f0xpbb4amskx4ilby6lfrn27hhwk75il2c4lq6m3hr4w8s"))))
7794 (properties
7795 `((upstream-name . "AnnotationDbi")))
7796 (build-system r-build-system)
7797 (propagated-inputs
7798 `(("r-biobase" ,r-biobase)
7799 ("r-biocgenerics" ,r-biocgenerics)
7800 ("r-dbi" ,r-dbi)
7801 ("r-iranges" ,r-iranges)
7802 ("r-rsqlite" ,r-rsqlite)
7803 ("r-s4vectors" ,r-s4vectors)))
7804 (home-page "https://bioconductor.org/packages/AnnotationDbi")
7805 (synopsis "Annotation database interface")
7806 (description
7807 "This package provides user interface and database connection code for
7808 annotation data packages using SQLite data storage.")
7809 (license license:artistic2.0)))
7810
7811 (define-public r-biomart
7812 (package
7813 (name "r-biomart")
7814 (version "2.42.1")
7815 (source (origin
7816 (method url-fetch)
7817 (uri (bioconductor-uri "biomaRt" version))
7818 (sha256
7819 (base32
7820 "0676s8aq9xj2pdrfk28kf5j69fmssn900k4vxrp11ghwjr8z24h7"))))
7821 (properties
7822 `((upstream-name . "biomaRt")))
7823 (build-system r-build-system)
7824 (propagated-inputs
7825 `(("r-annotationdbi" ,r-annotationdbi)
7826 ("r-biocfilecache" ,r-biocfilecache)
7827 ("r-httr" ,r-httr)
7828 ("r-openssl" ,r-openssl)
7829 ("r-progress" ,r-progress)
7830 ("r-rappdirs" ,r-rappdirs)
7831 ("r-stringr" ,r-stringr)
7832 ("r-xml" ,r-xml)))
7833 (native-inputs
7834 `(("r-knitr" ,r-knitr)))
7835 (home-page "https://bioconductor.org/packages/biomaRt")
7836 (synopsis "Interface to BioMart databases")
7837 (description
7838 "biomaRt provides an interface to a growing collection of databases
7839 implementing the @url{BioMart software suite, http://www.biomart.org}. The
7840 package enables retrieval of large amounts of data in a uniform way without
7841 the need to know the underlying database schemas or write complex SQL queries.
7842 Examples of BioMart databases are Ensembl, COSMIC, Uniprot, HGNC, Gramene,
7843 Wormbase and dbSNP mapped to Ensembl. These major databases give biomaRt
7844 users direct access to a diverse set of data and enable a wide range of
7845 powerful online queries from gene annotation to database mining.")
7846 (license license:artistic2.0)))
7847
7848 (define-public r-biocparallel
7849 (package
7850 (name "r-biocparallel")
7851 (version "1.20.1")
7852 (source (origin
7853 (method url-fetch)
7854 (uri (bioconductor-uri "BiocParallel" version))
7855 (sha256
7856 (base32
7857 "0g0znb4whsvb9hpwx9xaasdi5n4vjqw8cpdyqgrdrjm91rls1h21"))))
7858 (properties
7859 `((upstream-name . "BiocParallel")))
7860 (build-system r-build-system)
7861 (propagated-inputs
7862 `(("r-futile-logger" ,r-futile-logger)
7863 ("r-snow" ,r-snow)
7864 ("r-bh" ,r-bh)))
7865 (home-page "https://bioconductor.org/packages/BiocParallel")
7866 (synopsis "Bioconductor facilities for parallel evaluation")
7867 (description
7868 "This package provides modified versions and novel implementation of
7869 functions for parallel evaluation, tailored to use with Bioconductor
7870 objects.")
7871 (license (list license:gpl2+ license:gpl3+))))
7872
7873 (define-public r-biostrings
7874 (package
7875 (name "r-biostrings")
7876 (version "2.54.0")
7877 (source (origin
7878 (method url-fetch)
7879 (uri (bioconductor-uri "Biostrings" version))
7880 (sha256
7881 (base32
7882 "0pq7g2hflx2cjlpwdj6mscw9hnxvlf5y50dxf48lbrf9r3q9kmyp"))))
7883 (properties
7884 `((upstream-name . "Biostrings")))
7885 (build-system r-build-system)
7886 (propagated-inputs
7887 `(("r-biocgenerics" ,r-biocgenerics)
7888 ("r-iranges" ,r-iranges)
7889 ("r-s4vectors" ,r-s4vectors)
7890 ("r-xvector" ,r-xvector)))
7891 (home-page "https://bioconductor.org/packages/Biostrings")
7892 (synopsis "String objects and algorithms for biological sequences")
7893 (description
7894 "This package provides memory efficient string containers, string
7895 matching algorithms, and other utilities, for fast manipulation of large
7896 biological sequences or sets of sequences.")
7897 (license license:artistic2.0)))
7898
7899 (define-public r-rsamtools
7900 (package
7901 (name "r-rsamtools")
7902 (version "2.2.3")
7903 (source (origin
7904 (method url-fetch)
7905 (uri (bioconductor-uri "Rsamtools" version))
7906 (sha256
7907 (base32
7908 "1vj43acawqqkf9yval9fzxarpsf04fmn78m2hq4f083w1k0myhyr"))))
7909 (properties
7910 `((upstream-name . "Rsamtools")))
7911 (build-system r-build-system)
7912 (arguments
7913 `(#:phases
7914 (modify-phases %standard-phases
7915 (add-after 'unpack 'use-system-zlib
7916 (lambda _
7917 (substitute* "DESCRIPTION"
7918 (("zlibbioc, ") ""))
7919 (substitute* "NAMESPACE"
7920 (("import\\(zlibbioc\\)") ""))
7921 #t)))))
7922 (inputs
7923 `(("zlib" ,zlib)))
7924 (propagated-inputs
7925 `(("r-biocgenerics" ,r-biocgenerics)
7926 ("r-biocparallel" ,r-biocparallel)
7927 ("r-biostrings" ,r-biostrings)
7928 ("r-bitops" ,r-bitops)
7929 ("r-genomeinfodb" ,r-genomeinfodb)
7930 ("r-genomicranges" ,r-genomicranges)
7931 ("r-iranges" ,r-iranges)
7932 ("r-rhtslib" ,r-rhtslib)
7933 ("r-s4vectors" ,r-s4vectors)
7934 ("r-xvector" ,r-xvector)))
7935 (home-page "https://bioconductor.org/packages/release/bioc/html/Rsamtools.html")
7936 (synopsis "Interface to samtools, bcftools, and tabix")
7937 (description
7938 "This package provides an interface to the @code{samtools},
7939 @code{bcftools}, and @code{tabix} utilities for manipulating SAM (Sequence
7940 Alignment / Map), FASTA, binary variant call (BCF) and compressed indexed
7941 tab-delimited (tabix) files.")
7942 (license license:expat)))
7943
7944 (define-public r-delayedarray
7945 (package
7946 (name "r-delayedarray")
7947 (version "0.12.3")
7948 (source (origin
7949 (method url-fetch)
7950 (uri (bioconductor-uri "DelayedArray" version))
7951 (sha256
7952 (base32
7953 "02i88ll2d7r83nk0wdj28akvsz3jq19g6ixpaahfy3jy5av4byv6"))))
7954 (properties
7955 `((upstream-name . "DelayedArray")))
7956 (build-system r-build-system)
7957 (propagated-inputs
7958 `(("r-biocgenerics" ,r-biocgenerics)
7959 ("r-biocparallel" ,r-biocparallel)
7960 ("r-s4vectors" ,r-s4vectors)
7961 ("r-iranges" ,r-iranges)
7962 ("r-matrix" ,r-matrix)
7963 ("r-matrixstats" ,r-matrixstats)))
7964 (native-inputs
7965 `(("r-knitr" ,r-knitr)))
7966 (home-page "https://bioconductor.org/packages/DelayedArray")
7967 (synopsis "Delayed operations on array-like objects")
7968 (description
7969 "Wrapping an array-like object (typically an on-disk object) in a
7970 @code{DelayedArray} object allows one to perform common array operations on it
7971 without loading the object in memory. In order to reduce memory usage and
7972 optimize performance, operations on the object are either delayed or executed
7973 using a block processing mechanism. Note that this also works on in-memory
7974 array-like objects like @code{DataFrame} objects (typically with Rle columns),
7975 @code{Matrix} objects, and ordinary arrays and data frames.")
7976 (license license:artistic2.0)))
7977
7978 (define-public r-summarizedexperiment
7979 (package
7980 (name "r-summarizedexperiment")
7981 (version "1.16.1")
7982 (source (origin
7983 (method url-fetch)
7984 (uri (bioconductor-uri "SummarizedExperiment" version))
7985 (sha256
7986 (base32
7987 "1z9bdk49dajafkfvv99nv6zyn6v70iyyy2jgdp5w5z8174a2bnn1"))))
7988 (properties
7989 `((upstream-name . "SummarizedExperiment")))
7990 (build-system r-build-system)
7991 (propagated-inputs
7992 `(("r-biobase" ,r-biobase)
7993 ("r-biocgenerics" ,r-biocgenerics)
7994 ("r-delayedarray" ,r-delayedarray)
7995 ("r-genomeinfodb" ,r-genomeinfodb)
7996 ("r-genomicranges" ,r-genomicranges)
7997 ("r-iranges" ,r-iranges)
7998 ("r-matrix" ,r-matrix)
7999 ("r-s4vectors" ,r-s4vectors)))
8000 (home-page "https://bioconductor.org/packages/SummarizedExperiment")
8001 (synopsis "Container for representing genomic ranges by sample")
8002 (description
8003 "The SummarizedExperiment container contains one or more assays, each
8004 represented by a matrix-like object of numeric or other mode. The rows
8005 typically represent genomic ranges of interest and the columns represent
8006 samples.")
8007 (license license:artistic2.0)))
8008
8009 (define-public r-genomicalignments
8010 (package
8011 (name "r-genomicalignments")
8012 (version "1.22.1")
8013 (source (origin
8014 (method url-fetch)
8015 (uri (bioconductor-uri "GenomicAlignments" version))
8016 (sha256
8017 (base32
8018 "065xvy4pkda0ajvl1b75iski95k1pnbhxwdq7vkfl8v55915vqh6"))))
8019 (properties
8020 `((upstream-name . "GenomicAlignments")))
8021 (build-system r-build-system)
8022 (propagated-inputs
8023 `(("r-biocgenerics" ,r-biocgenerics)
8024 ("r-biocparallel" ,r-biocparallel)
8025 ("r-biostrings" ,r-biostrings)
8026 ("r-genomeinfodb" ,r-genomeinfodb)
8027 ("r-genomicranges" ,r-genomicranges)
8028 ("r-iranges" ,r-iranges)
8029 ("r-rsamtools" ,r-rsamtools)
8030 ("r-s4vectors" ,r-s4vectors)
8031 ("r-summarizedexperiment" ,r-summarizedexperiment)))
8032 (home-page "https://bioconductor.org/packages/GenomicAlignments")
8033 (synopsis "Representation and manipulation of short genomic alignments")
8034 (description
8035 "This package provides efficient containers for storing and manipulating
8036 short genomic alignments (typically obtained by aligning short reads to a
8037 reference genome). This includes read counting, computing the coverage,
8038 junction detection, and working with the nucleotide content of the
8039 alignments.")
8040 (license license:artistic2.0)))
8041
8042 (define-public r-rtracklayer
8043 (package
8044 (name "r-rtracklayer")
8045 (version "1.46.0")
8046 (source (origin
8047 (method url-fetch)
8048 (uri (bioconductor-uri "rtracklayer" version))
8049 (sha256
8050 (base32
8051 "0lp9xsm8kqrgpwgwj7qaqcv1z6qynbz084grlpwp94zsp2ppf0n6"))))
8052 (build-system r-build-system)
8053 (arguments
8054 `(#:phases
8055 (modify-phases %standard-phases
8056 (add-after 'unpack 'use-system-zlib
8057 (lambda _
8058 (substitute* "DESCRIPTION"
8059 ((" zlibbioc,") ""))
8060 (substitute* "NAMESPACE"
8061 (("import\\(zlibbioc\\)") ""))
8062 #t)))))
8063 (native-inputs
8064 `(("pkg-config" ,pkg-config)))
8065 (inputs
8066 `(("zlib" ,zlib)))
8067 (propagated-inputs
8068 `(("r-biocgenerics" ,r-biocgenerics)
8069 ("r-biostrings" ,r-biostrings)
8070 ("r-genomeinfodb" ,r-genomeinfodb)
8071 ("r-genomicalignments" ,r-genomicalignments)
8072 ("r-genomicranges" ,r-genomicranges)
8073 ("r-iranges" ,r-iranges)
8074 ("r-rcurl" ,r-rcurl)
8075 ("r-rsamtools" ,r-rsamtools)
8076 ("r-s4vectors" ,r-s4vectors)
8077 ("r-xml" ,r-xml)
8078 ("r-xvector" ,r-xvector)))
8079 (home-page "https://bioconductor.org/packages/rtracklayer")
8080 (synopsis "R interface to genome browsers and their annotation tracks")
8081 (description
8082 "rtracklayer is an extensible framework for interacting with multiple
8083 genome browsers (currently UCSC built-in) and manipulating annotation tracks
8084 in various formats (currently GFF, BED, bedGraph, BED15, WIG, BigWig and 2bit
8085 built-in). The user may export/import tracks to/from the supported browsers,
8086 as well as query and modify the browser state, such as the current viewport.")
8087 (license license:artistic2.0)))
8088
8089 (define-public r-genomicfeatures
8090 (package
8091 (name "r-genomicfeatures")
8092 (version "1.38.2")
8093 (source (origin
8094 (method url-fetch)
8095 (uri (bioconductor-uri "GenomicFeatures" version))
8096 (sha256
8097 (base32
8098 "0dd226kgks50jdx5w35f3wmg95hy8aibi4kcn8p5kmqp5i8j580b"))))
8099 (properties
8100 `((upstream-name . "GenomicFeatures")))
8101 (build-system r-build-system)
8102 (propagated-inputs
8103 `(("r-annotationdbi" ,r-annotationdbi)
8104 ("r-biobase" ,r-biobase)
8105 ("r-biocgenerics" ,r-biocgenerics)
8106 ("r-biomart" ,r-biomart)
8107 ("r-biostrings" ,r-biostrings)
8108 ("r-dbi" ,r-dbi)
8109 ("r-genomeinfodb" ,r-genomeinfodb)
8110 ("r-genomicranges" ,r-genomicranges)
8111 ("r-iranges" ,r-iranges)
8112 ("r-rcurl" ,r-rcurl)
8113 ("r-rsqlite" ,r-rsqlite)
8114 ("r-rtracklayer" ,r-rtracklayer)
8115 ("r-s4vectors" ,r-s4vectors)
8116 ("r-xvector" ,r-xvector)))
8117 (home-page "https://bioconductor.org/packages/GenomicFeatures")
8118 (synopsis "Tools for working with transcript centric annotations")
8119 (description
8120 "This package provides a set of tools and methods for making and
8121 manipulating transcript centric annotations. With these tools the user can
8122 easily download the genomic locations of the transcripts, exons and cds of a
8123 given organism, from either the UCSC Genome Browser or a BioMart
8124 database (more sources will be supported in the future). This information is
8125 then stored in a local database that keeps track of the relationship between
8126 transcripts, exons, cds and genes. Flexible methods are provided for
8127 extracting the desired features in a convenient format.")
8128 (license license:artistic2.0)))
8129
8130 (define-public r-go-db
8131 (package
8132 (name "r-go-db")
8133 (version "3.7.0")
8134 (source (origin
8135 (method url-fetch)
8136 (uri (string-append "https://www.bioconductor.org/packages/"
8137 "release/data/annotation/src/contrib/GO.db_"
8138 version ".tar.gz"))
8139 (sha256
8140 (base32
8141 "0i3wcf5h3n0dawzc1hy0kv74f06j80c47n4p3g3fmrcxlhi3jpa5"))))
8142 (properties
8143 `((upstream-name . "GO.db")))
8144 (build-system r-build-system)
8145 (propagated-inputs
8146 `(("r-annotationdbi" ,r-annotationdbi)))
8147 (home-page "https://bioconductor.org/packages/GO.db")
8148 (synopsis "Annotation maps describing the entire Gene Ontology")
8149 (description
8150 "The purpose of this GO.db annotation package is to provide detailed
8151 information about the latest version of the Gene Ontologies.")
8152 (license license:artistic2.0)))
8153
8154 (define-public r-topgo
8155 (package
8156 (name "r-topgo")
8157 (version "2.38.1")
8158 (source (origin
8159 (method url-fetch)
8160 (uri (bioconductor-uri "topGO" version))
8161 (sha256
8162 (base32
8163 "1kw9m2j67895k58lx9msc248pjwblp8clxwgsl01cql7sgi1xzlf"))))
8164 (properties
8165 `((upstream-name . "topGO")))
8166 (build-system r-build-system)
8167 (propagated-inputs
8168 `(("r-annotationdbi" ,r-annotationdbi)
8169 ("r-dbi" ,r-dbi)
8170 ("r-biobase" ,r-biobase)
8171 ("r-biocgenerics" ,r-biocgenerics)
8172 ("r-go-db" ,r-go-db)
8173 ("r-graph" ,r-graph)
8174 ("r-lattice" ,r-lattice)
8175 ("r-matrixstats" ,r-matrixstats)
8176 ("r-sparsem" ,r-sparsem)))
8177 (home-page "https://bioconductor.org/packages/topGO")
8178 (synopsis "Enrichment analysis for gene ontology")
8179 (description
8180 "The topGO package provides tools for testing @dfn{gene ontology} (GO)
8181 terms while accounting for the topology of the GO graph. Different test
8182 statistics and different methods for eliminating local similarities and
8183 dependencies between GO terms can be implemented and applied.")
8184 ;; Any version of the LGPL applies.
8185 (license license:lgpl2.1+)))
8186
8187 (define-public r-bsgenome
8188 (package
8189 (name "r-bsgenome")
8190 (version "1.54.0")
8191 (source (origin
8192 (method url-fetch)
8193 (uri (bioconductor-uri "BSgenome" version))
8194 (sha256
8195 (base32
8196 "0nn1b3h4hmnx5whf2cmzmyxdrcf4myj8c38rwr0sw4rc07xfzndy"))))
8197 (properties
8198 `((upstream-name . "BSgenome")))
8199 (build-system r-build-system)
8200 (propagated-inputs
8201 `(("r-biocgenerics" ,r-biocgenerics)
8202 ("r-biostrings" ,r-biostrings)
8203 ("r-genomeinfodb" ,r-genomeinfodb)
8204 ("r-genomicranges" ,r-genomicranges)
8205 ("r-iranges" ,r-iranges)
8206 ("r-rsamtools" ,r-rsamtools)
8207 ("r-rtracklayer" ,r-rtracklayer)
8208 ("r-s4vectors" ,r-s4vectors)
8209 ("r-xvector" ,r-xvector)))
8210 (home-page "https://bioconductor.org/packages/BSgenome")
8211 (synopsis "Infrastructure for Biostrings-based genome data packages")
8212 (description
8213 "This package provides infrastructure shared by all Biostrings-based
8214 genome data packages and support for efficient SNP representation.")
8215 (license license:artistic2.0)))
8216
8217 (define-public r-impute
8218 (package
8219 (name "r-impute")
8220 (version "1.60.0")
8221 (source (origin
8222 (method url-fetch)
8223 (uri (bioconductor-uri "impute" version))
8224 (sha256
8225 (base32
8226 "0igz1phjd1j9bg9z4kyy7j8v9bxi9sdwz4df26r51i2vavlbrf4q"))))
8227 (native-inputs
8228 `(("gfortran" ,gfortran)))
8229 (build-system r-build-system)
8230 (home-page "https://bioconductor.org/packages/impute")
8231 (synopsis "Imputation for microarray data")
8232 (description
8233 "This package provides a function to impute missing gene expression
8234 microarray data, using nearest neighbor averaging.")
8235 (license license:gpl2+)))
8236
8237 (define-public r-seqpattern
8238 (package
8239 (name "r-seqpattern")
8240 (version "1.18.0")
8241 (source (origin
8242 (method url-fetch)
8243 (uri (bioconductor-uri "seqPattern" version))
8244 (sha256
8245 (base32
8246 "1gxrq6s2hiyac69idh5r1nbr1s69n0hg4ap2skm4g6857av9pwqf"))))
8247 (properties
8248 `((upstream-name . "seqPattern")))
8249 (build-system r-build-system)
8250 (propagated-inputs
8251 `(("r-biostrings" ,r-biostrings)
8252 ("r-genomicranges" ,r-genomicranges)
8253 ("r-iranges" ,r-iranges)
8254 ("r-kernsmooth" ,r-kernsmooth)
8255 ("r-plotrix" ,r-plotrix)))
8256 (home-page "https://bioconductor.org/packages/seqPattern")
8257 (synopsis "Visualising oligonucleotide patterns and motif occurrences")
8258 (description
8259 "This package provides tools to visualize oligonucleotide patterns and
8260 sequence motif occurrences across a large set of sequences centred at a common
8261 reference point and sorted by a user defined feature.")
8262 (license license:gpl3+)))
8263
8264 (define-public r-genomation
8265 (package
8266 (name "r-genomation")
8267 (version "1.18.0")
8268 (source (origin
8269 (method url-fetch)
8270 (uri (bioconductor-uri "genomation" version))
8271 (sha256
8272 (base32
8273 "1sba928h23b67gr3i4yj1bg655g24l3bcgsf5gvymzrv5idrss1l"))))
8274 (build-system r-build-system)
8275 (propagated-inputs
8276 `(("r-biostrings" ,r-biostrings)
8277 ("r-bsgenome" ,r-bsgenome)
8278 ("r-data-table" ,r-data-table)
8279 ("r-genomeinfodb" ,r-genomeinfodb)
8280 ("r-genomicalignments" ,r-genomicalignments)
8281 ("r-genomicranges" ,r-genomicranges)
8282 ("r-ggplot2" ,r-ggplot2)
8283 ("r-gridbase" ,r-gridbase)
8284 ("r-impute" ,r-impute)
8285 ("r-iranges" ,r-iranges)
8286 ("r-matrixstats" ,r-matrixstats)
8287 ("r-plotrix" ,r-plotrix)
8288 ("r-plyr" ,r-plyr)
8289 ("r-rcpp" ,r-rcpp)
8290 ("r-readr" ,r-readr)
8291 ("r-reshape2" ,r-reshape2)
8292 ("r-rsamtools" ,r-rsamtools)
8293 ("r-rtracklayer" ,r-rtracklayer)
8294 ("r-runit" ,r-runit)
8295 ("r-s4vectors" ,r-s4vectors)
8296 ("r-seqpattern" ,r-seqpattern)))
8297 (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
8298 (synopsis "Summary, annotation and visualization of genomic data")
8299 (description
8300 "This package provides a package for summary and annotation of genomic
8301 intervals. Users can visualize and quantify genomic intervals over
8302 pre-defined functional regions, such as promoters, exons, introns, etc. The
8303 genomic intervals represent regions with a defined chromosome position, which
8304 may be associated with a score, such as aligned reads from HT-seq experiments,
8305 TF binding sites, methylation scores, etc. The package can use any tabular
8306 genomic feature data as long as it has minimal information on the locations of
8307 genomic intervals. In addition, it can use BAM or BigWig files as input.")
8308 (license license:artistic2.0)))
8309
8310 (define-public r-genomationdata
8311 (package
8312 (name "r-genomationdata")
8313 (version "1.14.0")
8314 (source (origin
8315 (method url-fetch)
8316 ;; We cannot use bioconductor-uri here because this tarball is
8317 ;; located under "data/annotation/" instead of "bioc/".
8318 (uri (string-append "https://bioconductor.org/packages/"
8319 "release/data/experiment/src/contrib/"
8320 "genomationData_" version ".tar.gz"))
8321 (sha256
8322 (base32
8323 "10xyb8akjrhmak2i0mnv1agny2ipy364q9nlibyplpzc7vdb6bw7"))))
8324 (build-system r-build-system)
8325 ;; As this package provides little more than large data files, it doesn't
8326 ;; make sense to build substitutes.
8327 (arguments `(#:substitutable? #f))
8328 (native-inputs
8329 `(("r-knitr" ,r-knitr)))
8330 (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
8331 (synopsis "Experimental data for use with the genomation package")
8332 (description
8333 "This package contains experimental genetic data for use with the
8334 genomation package. Included are Chip Seq, Methylation and Cage data,
8335 downloaded from Encode.")
8336 (license license:gpl3+)))
8337
8338 (define-public r-seqlogo
8339 (package
8340 (name "r-seqlogo")
8341 (version "1.52.0")
8342 (source
8343 (origin
8344 (method url-fetch)
8345 (uri (bioconductor-uri "seqLogo" version))
8346 (sha256
8347 (base32
8348 "0s94aahp8ma1crmp83dz65ifjwrx6wqi3q6005lmbp8yk2x1rkj4"))))
8349 (properties `((upstream-name . "seqLogo")))
8350 (build-system r-build-system)
8351 (home-page "https://bioconductor.org/packages/seqLogo")
8352 (synopsis "Sequence logos for DNA sequence alignments")
8353 (description
8354 "seqLogo takes the position weight matrix of a DNA sequence motif and
8355 plots the corresponding sequence logo as introduced by Schneider and
8356 Stephens (1990).")
8357 (license license:lgpl2.0+)))
8358
8359 (define-public r-motifrg
8360 (package
8361 (name "r-motifrg")
8362 (version "1.30.0")
8363 (source
8364 (origin
8365 (method url-fetch)
8366 (uri (bioconductor-uri "motifRG" version))
8367 (sha256
8368 (base32
8369 "0s6wdr036lra9x93r9k8wvicbkgzypjh3jp46h92yacw8d829k0d"))))
8370 (properties `((upstream-name . "motifRG")))
8371 (build-system r-build-system)
8372 (propagated-inputs
8373 `(("r-biostrings" ,r-biostrings)
8374 ("r-bsgenome" ,r-bsgenome)
8375 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8376 ("r-iranges" ,r-iranges)
8377 ("r-seqlogo" ,r-seqlogo)
8378 ("r-xvector" ,r-xvector)))
8379 (home-page "https://bioconductor.org/packages/motifRG")
8380 (synopsis "Discover motifs in high throughput sequencing data")
8381 (description
8382 "This package provides tools for discriminative motif discovery in high
8383 throughput genetic sequencing data sets using regression methods.")
8384 (license license:artistic2.0)))
8385
8386 (define-public r-qtl
8387 (package
8388 (name "r-qtl")
8389 (version "1.46-2")
8390 (source
8391 (origin
8392 (method url-fetch)
8393 (uri (string-append "mirror://cran/src/contrib/qtl_"
8394 version ".tar.gz"))
8395 (sha256
8396 (base32
8397 "0rbwcnvyy96gq1dsgpxx03pv423qya26h6ws5y0blj3blfdmj83a"))))
8398 (build-system r-build-system)
8399 (home-page "https://rqtl.org/")
8400 (synopsis "R package for analyzing QTL experiments in genetics")
8401 (description "R/qtl is an extension library for the R statistics
8402 system. It is used to analyze experimental crosses for identifying
8403 genes contributing to variation in quantitative traits (so-called
8404 quantitative trait loci, QTLs).
8405
8406 Using a hidden Markov model, R/qtl estimates genetic maps, to
8407 identify genotyping errors, and to perform single-QTL and two-QTL,
8408 two-dimensional genome scans.")
8409 (license license:gpl3)))
8410
8411 (define-public r-qtl2
8412 (package
8413 (name "r-qtl2")
8414 (version "0.20")
8415 (source (origin
8416 (method git-fetch)
8417 ;; Not yet available in cran.
8418 (uri (git-reference
8419 (url "https://github.com/rqtl/qtl2.git")
8420 (commit version)))
8421 (file-name (git-file-name name version))
8422 (sha256
8423 (base32 "0l1asr28q25jzbwrbg5490962sg3y4sjrd0qf09p78ws1aq8vfs0"))))
8424 (build-system r-build-system)
8425 (propagated-inputs
8426 `(("r-data-table" ,r-data-table)
8427 ("r-jsonlite" ,r-jsonlite)
8428 ("r-rcpp" ,r-rcpp)
8429 ("r-rcppeigen" ,r-rcppeigen)
8430 ("r-rsqlite" ,r-rsqlite)
8431 ("r-yaml" ,r-yaml)))
8432 (home-page "https://kbroman.org/qtl2/")
8433 (synopsis
8434 "QTL analysis software for high-dimensional data and complex cross designs")
8435 (description
8436 "R/qtl2 (aka qtl2) is a reimplementation of the QTL analysis software
8437 R/qtl, to better handle high-dimensional data and complex cross designs.")
8438 (license license:gpl3)))
8439
8440 (define-public r-zlibbioc
8441 (package
8442 (name "r-zlibbioc")
8443 (version "1.32.0")
8444 (source (origin
8445 (method url-fetch)
8446 (uri (bioconductor-uri "zlibbioc" version))
8447 (sha256
8448 (base32
8449 "1xh7qan0w62mzsmanbx9vcj6ygdfhzw1abaxijkq7f4nh5w87idj"))))
8450 (properties
8451 `((upstream-name . "zlibbioc")))
8452 (build-system r-build-system)
8453 (home-page "https://bioconductor.org/packages/zlibbioc")
8454 (synopsis "Provider for zlib-1.2.5 to R packages")
8455 (description "This package uses the source code of zlib-1.2.5 to create
8456 libraries for systems that do not have these available via other means.")
8457 (license license:artistic2.0)))
8458
8459 (define-public r-r4rna
8460 (package
8461 (name "r-r4rna")
8462 (version "0.1.4")
8463 (source
8464 (origin
8465 (method url-fetch)
8466 (uri (string-append "http://www.e-rna.org/r-chie/files/R4RNA_"
8467 version ".tar.gz"))
8468 (sha256
8469 (base32
8470 "1p0i78wh76jfgmn9jphbwwaz6yy6pipzfg08xs54cxavxg2j81p5"))))
8471 (build-system r-build-system)
8472 (propagated-inputs
8473 `(("r-optparse" ,r-optparse)
8474 ("r-rcolorbrewer" ,r-rcolorbrewer)))
8475 (home-page "https://www.e-rna.org/r-chie/index.cgi")
8476 (synopsis "Analysis framework for RNA secondary structure")
8477 (description
8478 "The R4RNA package aims to be a general framework for the analysis of RNA
8479 secondary structure and comparative analysis in R.")
8480 (license license:gpl3+)))
8481
8482 (define-public r-rhtslib
8483 (package
8484 (name "r-rhtslib")
8485 (version "1.18.1")
8486 (source
8487 (origin
8488 (method url-fetch)
8489 (uri (bioconductor-uri "Rhtslib" version))
8490 (sha256
8491 (base32
8492 "0gkbrmrcg55c9s5166ifljlx0v25rv4ijdyp4wf4c292xd6chy2l"))))
8493 (properties `((upstream-name . "Rhtslib")))
8494 (build-system r-build-system)
8495 ;; Without this a temporary directory ends up in the Rhtslib.so binary,
8496 ;; which makes R abort the build.
8497 (arguments '(#:configure-flags '("--no-staged-install")))
8498 (propagated-inputs
8499 `(("curl" ,curl)
8500 ("r-zlibbioc" ,r-zlibbioc)))
8501 (inputs
8502 `(("zlib" ,zlib)))
8503 (native-inputs
8504 `(("pkg-config" ,pkg-config)))
8505 (home-page "https://github.com/nhayden/Rhtslib")
8506 (synopsis "High-throughput sequencing library as an R package")
8507 (description
8508 "This package provides the HTSlib C library for high-throughput
8509 nucleotide sequence analysis. The package is primarily useful to developers
8510 of other R packages who wish to make use of HTSlib.")
8511 (license license:lgpl2.0+)))
8512
8513 (define-public r-bamsignals
8514 (package
8515 (name "r-bamsignals")
8516 (version "1.18.0")
8517 (source
8518 (origin
8519 (method url-fetch)
8520 (uri (bioconductor-uri "bamsignals" version))
8521 (sha256
8522 (base32
8523 "0699b0pqbs0dvs91yjibcjc90lxj9mg8rcml4a6wchfr9md7n74w"))))
8524 (build-system r-build-system)
8525 (propagated-inputs
8526 `(("r-biocgenerics" ,r-biocgenerics)
8527 ("r-genomicranges" ,r-genomicranges)
8528 ("r-iranges" ,r-iranges)
8529 ("r-rcpp" ,r-rcpp)
8530 ("r-rhtslib" ,r-rhtslib)
8531 ("r-zlibbioc" ,r-zlibbioc)))
8532 (inputs
8533 `(("zlib" ,zlib)))
8534 (home-page "https://bioconductor.org/packages/bamsignals")
8535 (synopsis "Extract read count signals from bam files")
8536 (description
8537 "This package efficiently obtains count vectors from indexed bam
8538 files. It counts the number of nucleotide sequence reads in given genomic
8539 ranges and it computes reads profiles and coverage profiles. It also handles
8540 paired-end data.")
8541 (license license:gpl2+)))
8542
8543 (define-public r-rcas
8544 (package
8545 (name "r-rcas")
8546 (version "1.12.0")
8547 (source (origin
8548 (method url-fetch)
8549 (uri (bioconductor-uri "RCAS" version))
8550 (sha256
8551 (base32
8552 "1s3gvvxi1029d1vfwnjh21nnw3mlx08kcwz63891hml9y850cvsn"))))
8553 (properties `((upstream-name . "RCAS")))
8554 (build-system r-build-system)
8555 (propagated-inputs
8556 `(("r-annotationdbi" ,r-annotationdbi)
8557 ("r-biocgenerics" ,r-biocgenerics)
8558 ("r-biomart" ,r-biomart)
8559 ("r-biostrings" ,r-biostrings)
8560 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8561 ("r-cowplot" ,r-cowplot)
8562 ("r-data-table" ,r-data-table)
8563 ("r-dbi" ,r-dbi)
8564 ("r-dt" ,r-dt)
8565 ("r-genomation" ,r-genomation)
8566 ("r-genomeinfodb" ,r-genomeinfodb)
8567 ("r-genomicfeatures" ,r-genomicfeatures)
8568 ("r-genomicranges" ,r-genomicranges)
8569 ("r-ggplot2" ,r-ggplot2)
8570 ("r-ggseqlogo" ,r-ggseqlogo)
8571 ("r-knitr" ,r-knitr)
8572 ("r-motifrg" ,r-motifrg)
8573 ("r-org-hs-eg-db" ,r-org-hs-eg-db)
8574 ("r-pbapply" ,r-pbapply)
8575 ("r-pheatmap" ,r-pheatmap)
8576 ("r-plotly" ,r-plotly)
8577 ("r-plotrix" ,r-plotrix)
8578 ("r-proxy" ,r-proxy)
8579 ("r-rsqlite" ,r-rsqlite)
8580 ("r-rtracklayer" ,r-rtracklayer)
8581 ("r-rmarkdown" ,r-rmarkdown)
8582 ("r-s4vectors" ,r-s4vectors)
8583 ("r-topgo" ,r-topgo)
8584 ("pandoc" ,ghc-pandoc)))
8585 (synopsis "RNA-centric annotation system")
8586 (description
8587 "RCAS aims to be a standalone RNA-centric annotation system that provides
8588 intuitive reports and publication-ready graphics. This package provides the R
8589 library implementing most of the pipeline's features.")
8590 (home-page "https://github.com/BIMSBbioinfo/RCAS")
8591 (license license:artistic2.0)))
8592
8593 (define-public rcas-web
8594 (package
8595 (name "rcas-web")
8596 (version "0.1.0")
8597 (source
8598 (origin
8599 (method url-fetch)
8600 (uri (string-append "https://github.com/BIMSBbioinfo/rcas-web/"
8601 "releases/download/v" version
8602 "/rcas-web-" version ".tar.gz"))
8603 (sha256
8604 (base32
8605 "0wq951aj45gqki1bickg876i993lmawkp8x24agg264br5x716db"))))
8606 (build-system gnu-build-system)
8607 (arguments
8608 `(#:phases
8609 (modify-phases %standard-phases
8610 (add-before 'configure 'find-RCAS
8611 ;; The configure script can't find non-1.3.x versions of RCAS because
8612 ;; its R expression ‘1.10.1 >= 1.3.4’ evaluates to false.
8613 (lambda _
8614 (substitute* "configure"
8615 (("1\\.3\\.4") "0.0.0"))
8616 #t))
8617 (add-after 'install 'wrap-executable
8618 (lambda* (#:key inputs outputs #:allow-other-keys)
8619 (let* ((out (assoc-ref outputs "out"))
8620 (json (assoc-ref inputs "guile-json"))
8621 (redis (assoc-ref inputs "guile-redis"))
8622 (path (string-append
8623 json "/share/guile/site/2.2:"
8624 redis "/share/guile/site/2.2")))
8625 (wrap-program (string-append out "/bin/rcas-web")
8626 `("GUILE_LOAD_PATH" ":" = (,path))
8627 `("GUILE_LOAD_COMPILED_PATH" ":" = (,path))
8628 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
8629 #t)))))
8630 (inputs
8631 `(("r-minimal" ,r-minimal)
8632 ("r-rcas" ,r-rcas)
8633 ("guile-next" ,guile-2.2)
8634 ("guile-json" ,guile-json-1)
8635 ("guile-redis" ,guile-redis)))
8636 (native-inputs
8637 `(("pkg-config" ,pkg-config)))
8638 (home-page "https://github.com/BIMSBbioinfo/rcas-web")
8639 (synopsis "Web interface for RNA-centric annotation system (RCAS)")
8640 (description "This package provides a simple web interface for the
8641 @dfn{RNA-centric annotation system} (RCAS).")
8642 (license license:agpl3+)))
8643
8644 (define-public r-mutationalpatterns
8645 (package
8646 (name "r-mutationalpatterns")
8647 (version "1.12.0")
8648 (source
8649 (origin
8650 (method url-fetch)
8651 (uri (bioconductor-uri "MutationalPatterns" version))
8652 (sha256
8653 (base32
8654 "08715l6swrlccviw7932v5hyrd2x4c6049vy9qnxk0lw3sp1zvsf"))))
8655 (build-system r-build-system)
8656 (propagated-inputs
8657 `(("r-biocgenerics" ,r-biocgenerics)
8658 ("r-biostrings" ,r-biostrings)
8659 ;; These two packages are suggested packages
8660 ("r-bsgenome-hsapiens-1000g" ,r-bsgenome-hsapiens-1000genomes-hs37d5)
8661 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8662 ("r-genomicranges" ,r-genomicranges)
8663 ("r-genomeinfodb" ,r-genomeinfodb)
8664 ("r-ggplot2" ,r-ggplot2)
8665 ("r-iranges" ,r-iranges)
8666 ("r-nmf" ,r-nmf)
8667 ("r-plyr" ,r-plyr)
8668 ("r-pracma" ,r-pracma)
8669 ("r-reshape2" ,r-reshape2)
8670 ("r-cowplot" ,r-cowplot)
8671 ("r-ggdendro" ,r-ggdendro)
8672 ("r-s4vectors" ,r-s4vectors)
8673 ("r-summarizedexperiment" ,r-summarizedexperiment)
8674 ("r-variantannotation" ,r-variantannotation)))
8675 (home-page "https://bioconductor.org/packages/MutationalPatterns/")
8676 (synopsis "Extract and visualize mutational patterns in genomic data")
8677 (description "This package provides an extensive toolset for the
8678 characterization and visualization of a wide range of mutational patterns
8679 in SNV base substitution data.")
8680 (license license:expat)))
8681
8682 (define-public r-chipkernels
8683 (let ((commit "c9cfcacb626b1221094fb3490ea7bac0fd625372")
8684 (revision "1"))
8685 (package
8686 (name "r-chipkernels")
8687 (version (string-append "1.1-" revision "." (string-take commit 9)))
8688 (source
8689 (origin
8690 (method git-fetch)
8691 (uri (git-reference
8692 (url "https://github.com/ManuSetty/ChIPKernels.git")
8693 (commit commit)))
8694 (file-name (string-append name "-" version))
8695 (sha256
8696 (base32
8697 "14bj5qhjm1hsm9ay561nfbqi9wxsa7y487df2idsaaf6z10nw4v0"))))
8698 (build-system r-build-system)
8699 (propagated-inputs
8700 `(("r-iranges" ,r-iranges)
8701 ("r-xvector" ,r-xvector)
8702 ("r-biostrings" ,r-biostrings)
8703 ("r-bsgenome" ,r-bsgenome)
8704 ("r-gtools" ,r-gtools)
8705 ("r-genomicranges" ,r-genomicranges)
8706 ("r-sfsmisc" ,r-sfsmisc)
8707 ("r-kernlab" ,r-kernlab)
8708 ("r-s4vectors" ,r-s4vectors)
8709 ("r-biocgenerics" ,r-biocgenerics)))
8710 (home-page "https://github.com/ManuSetty/ChIPKernels")
8711 (synopsis "Build string kernels for DNA Sequence analysis")
8712 (description "ChIPKernels is an R package for building different string
8713 kernels used for DNA Sequence analysis. A dictionary of the desired kernel
8714 must be built and this dictionary can be used for determining kernels for DNA
8715 Sequences.")
8716 (license license:gpl2+))))
8717
8718 (define-public r-seqgl
8719 (package
8720 (name "r-seqgl")
8721 (version "1.1.4")
8722 (source
8723 (origin
8724 (method git-fetch)
8725 (uri (git-reference
8726 (url "https://github.com/ManuSetty/SeqGL.git")
8727 (commit version)))
8728 (file-name (git-file-name name version))
8729 (sha256
8730 (base32
8731 "1r6ywvhxl3ffv48lgj7sbd582mcc6dha3ksgc2qjlvjrnkbj3799"))))
8732 (build-system r-build-system)
8733 (propagated-inputs
8734 `(("r-biostrings" ,r-biostrings)
8735 ("r-chipkernels" ,r-chipkernels)
8736 ("r-genomicranges" ,r-genomicranges)
8737 ("r-spams" ,r-spams)
8738 ("r-wgcna" ,r-wgcna)
8739 ("r-fastcluster" ,r-fastcluster)))
8740 (home-page "https://github.com/ManuSetty/SeqGL")
8741 (synopsis "Group lasso for Dnase/ChIP-seq data")
8742 (description "SeqGL is a group lasso based algorithm to extract
8743 transcription factor sequence signals from ChIP, DNase and ATAC-seq profiles.
8744 This package presents a method which uses group lasso to discriminate between
8745 bound and non bound genomic regions to accurately identify transcription
8746 factors bound at the specific regions.")
8747 (license license:gpl2+)))
8748
8749 (define-public r-tximport
8750 (package
8751 (name "r-tximport")
8752 (version "1.14.2")
8753 (source (origin
8754 (method url-fetch)
8755 (uri (bioconductor-uri "tximport" version))
8756 (sha256
8757 (base32
8758 "1avy0zhgnszmg0dr9w74yq9ml10kwdrrgcni2wysrd48zzskc1n0"))))
8759 (build-system r-build-system)
8760 (native-inputs
8761 `(("r-knitr" ,r-knitr)))
8762 (home-page "https://bioconductor.org/packages/tximport")
8763 (synopsis "Import and summarize transcript-level estimates for gene-level analysis")
8764 (description
8765 "This package provides tools to import transcript-level abundance,
8766 estimated counts and transcript lengths, and to summarize them into matrices
8767 for use with downstream gene-level analysis packages. Average transcript
8768 length, weighted by sample-specific transcript abundance estimates, is
8769 provided as a matrix which can be used as an offset for different expression
8770 of gene-level counts.")
8771 (license license:gpl2+)))
8772
8773 (define-public r-rhdf5
8774 (package
8775 (name "r-rhdf5")
8776 (version "2.30.1")
8777 (source (origin
8778 (method url-fetch)
8779 (uri (bioconductor-uri "rhdf5" version))
8780 (sha256
8781 (base32
8782 "18pv74jj4wr1981r92ss10qkgf5g1b09dsbz3im3j70a4l5l0df0"))))
8783 (build-system r-build-system)
8784 (propagated-inputs
8785 `(("r-rhdf5lib" ,r-rhdf5lib)))
8786 (inputs
8787 `(("zlib" ,zlib)))
8788 (home-page "https://bioconductor.org/packages/rhdf5")
8789 (synopsis "HDF5 interface to R")
8790 (description
8791 "This R/Bioconductor package provides an interface between HDF5 and R.
8792 HDF5's main features are the ability to store and access very large and/or
8793 complex datasets and a wide variety of metadata on mass storage (disk) through
8794 a completely portable file format. The rhdf5 package is thus suited for the
8795 exchange of large and/or complex datasets between R and other software
8796 package, and for letting R applications work on datasets that are larger than
8797 the available RAM.")
8798 (license license:artistic2.0)))
8799
8800 (define-public r-annotationfilter
8801 (package
8802 (name "r-annotationfilter")
8803 (version "1.10.0")
8804 (source (origin
8805 (method url-fetch)
8806 (uri (bioconductor-uri "AnnotationFilter" version))
8807 (sha256
8808 (base32
8809 "1l9sxhlvnwn6327vgg02h11ppmqr2zr07ff8wmcng0i1jbqwa8q5"))))
8810 (properties
8811 `((upstream-name . "AnnotationFilter")))
8812 (build-system r-build-system)
8813 (propagated-inputs
8814 `(("r-genomicranges" ,r-genomicranges)
8815 ("r-lazyeval" ,r-lazyeval)))
8816 (home-page "https://github.com/Bioconductor/AnnotationFilter")
8817 (synopsis "Facilities for filtering Bioconductor annotation resources")
8818 (description
8819 "This package provides classes and other infrastructure to implement
8820 filters for manipulating Bioconductor annotation resources. The filters are
8821 used by @code{ensembldb}, @code{Organism.dplyr}, and other packages.")
8822 (license license:artistic2.0)))
8823
8824 (define-public emboss
8825 (package
8826 (name "emboss")
8827 (version "6.5.7")
8828 (source (origin
8829 (method url-fetch)
8830 (uri (string-append "ftp://emboss.open-bio.org/pub/EMBOSS/old/"
8831 (version-major+minor version) ".0/"
8832 "EMBOSS-" version ".tar.gz"))
8833 (sha256
8834 (base32
8835 "0vsmz96gc411yj2iyzdrsmg4l2n1nhgmp7vrgzlxx3xixv9xbf0q"))))
8836 (build-system gnu-build-system)
8837 (arguments
8838 `(#:configure-flags
8839 (list (string-append "--with-hpdf="
8840 (assoc-ref %build-inputs "libharu")))
8841 #:phases
8842 (modify-phases %standard-phases
8843 (add-after 'unpack 'fix-checks
8844 (lambda _
8845 ;; The PNGDRIVER tests check for the presence of libgd, libpng
8846 ;; and zlib, but assume that they are all found at the same
8847 ;; prefix.
8848 (substitute* "configure.in"
8849 (("CHECK_PNGDRIVER")
8850 "LIBS=\"$LIBS -lgd -lpng -lz -lm\"
8851 AC_DEFINE([PLD_png], [1], [Define to 1 if PNG support is available])
8852 AM_CONDITIONAL(AMPNG, true)"))
8853 #t))
8854 (add-after 'fix-checks 'disable-update-check
8855 (lambda _
8856 ;; At build time there is no connection to the Internet, so
8857 ;; looking for updates will not work.
8858 (substitute* "Makefile.am"
8859 (("\\$\\(bindir\\)/embossupdate") ""))
8860 #t))
8861 (add-after 'disable-update-check 'autogen
8862 (lambda _ (invoke "autoreconf" "-vif") #t)))))
8863 (inputs
8864 `(("perl" ,perl)
8865 ("libpng" ,libpng)
8866 ("gd" ,gd)
8867 ("libx11" ,libx11)
8868 ("libharu" ,libharu)
8869 ("zlib" ,zlib)))
8870 (native-inputs
8871 `(("autoconf" ,autoconf)
8872 ("automake" ,automake)
8873 ("libtool" ,libtool)
8874 ("pkg-config" ,pkg-config)))
8875 (home-page "http://emboss.sourceforge.net")
8876 (synopsis "Molecular biology analysis suite")
8877 (description "EMBOSS is the \"European Molecular Biology Open Software
8878 Suite\". EMBOSS is an analysis package specially developed for the needs of
8879 the molecular biology (e.g. EMBnet) user community. The software
8880 automatically copes with data in a variety of formats and even allows
8881 transparent retrieval of sequence data from the web. It also provides a
8882 number of libraries for the development of software in the field of molecular
8883 biology. EMBOSS also integrates a range of currently available packages and
8884 tools for sequence analysis into a seamless whole.")
8885 (license license:gpl2+)))
8886
8887 (define-public bits
8888 (let ((revision "1")
8889 (commit "3cc4567896d9d6442923da944beb704750a08d2d"))
8890 (package
8891 (name "bits")
8892 ;; The version is 2.13.0 even though no release archives have been
8893 ;; published as yet.
8894 (version (string-append "2.13.0-" revision "." (string-take commit 9)))
8895 (source (origin
8896 (method git-fetch)
8897 (uri (git-reference
8898 (url "https://github.com/arq5x/bits.git")
8899 (commit commit)))
8900 (file-name (string-append name "-" version "-checkout"))
8901 (sha256
8902 (base32
8903 "17n2kffk4kmhivd8c98g2vr6y1s23vbg4sxlxs689wni66797hbs"))))
8904 (build-system gnu-build-system)
8905 (arguments
8906 `(#:tests? #f ;no tests included
8907 #:phases
8908 (modify-phases %standard-phases
8909 (delete 'configure)
8910 (add-after 'unpack 'remove-cuda
8911 (lambda _
8912 (substitute* "Makefile"
8913 ((".*_cuda") "")
8914 (("(bits_test_intersections) \\\\" _ match) match))
8915 #t))
8916 (replace 'install
8917 (lambda* (#:key outputs #:allow-other-keys)
8918 (copy-recursively
8919 "bin" (string-append (assoc-ref outputs "out") "/bin"))
8920 #t)))))
8921 (inputs
8922 `(("gsl" ,gsl)
8923 ("zlib" ,zlib)))
8924 (home-page "https://github.com/arq5x/bits")
8925 (synopsis "Implementation of binary interval search algorithm")
8926 (description "This package provides an implementation of the
8927 BITS (Binary Interval Search) algorithm, an approach to interval set
8928 intersection. It is especially suited for the comparison of diverse genomic
8929 datasets and the exploration of large datasets of genome
8930 intervals (e.g. genes, sequence alignments).")
8931 (license license:gpl2))))
8932
8933 (define-public piranha
8934 ;; There is no release tarball for the latest version. The latest commit is
8935 ;; older than one year at the time of this writing.
8936 (let ((revision "1")
8937 (commit "0466d364b71117d01e4471b74c514436cc281233"))
8938 (package
8939 (name "piranha")
8940 (version (string-append "1.2.1-" revision "." (string-take commit 9)))
8941 (source (origin
8942 (method git-fetch)
8943 (uri (git-reference
8944 (url "https://github.com/smithlabcode/piranha.git")
8945 (commit commit)))
8946 (file-name (git-file-name name version))
8947 (sha256
8948 (base32
8949 "117dc0zf20c61jam69sk4abl57ah6yi6i7qra7d7y5zrbgk12q5n"))))
8950 (build-system gnu-build-system)
8951 (arguments
8952 `(#:test-target "test"
8953 #:phases
8954 (modify-phases %standard-phases
8955 (add-after 'unpack 'copy-smithlab-cpp
8956 (lambda* (#:key inputs #:allow-other-keys)
8957 (for-each (lambda (file)
8958 (install-file file "./src/smithlab_cpp/"))
8959 (find-files (assoc-ref inputs "smithlab-cpp")))
8960 #t))
8961 (add-after 'install 'install-to-store
8962 (lambda* (#:key outputs #:allow-other-keys)
8963 (let* ((out (assoc-ref outputs "out"))
8964 (bin (string-append out "/bin")))
8965 (for-each (lambda (file)
8966 (install-file file bin))
8967 (find-files "bin" ".*")))
8968 #t)))
8969 #:configure-flags
8970 (list (string-append "--with-bam_tools_headers="
8971 (assoc-ref %build-inputs "bamtools") "/include/bamtools")
8972 (string-append "--with-bam_tools_library="
8973 (assoc-ref %build-inputs "bamtools") "/lib/bamtools"))))
8974 (inputs
8975 `(("bamtools" ,bamtools)
8976 ("samtools" ,samtools-0.1)
8977 ("gsl" ,gsl)
8978 ("smithlab-cpp"
8979 ,(let ((commit "3723e2db438c51501d0423429ff396c3035ba46a"))
8980 (origin
8981 (method git-fetch)
8982 (uri (git-reference
8983 (url "https://github.com/smithlabcode/smithlab_cpp.git")
8984 (commit commit)))
8985 (file-name (string-append "smithlab_cpp-" commit "-checkout"))
8986 (sha256
8987 (base32
8988 "0l4gvbwslw5ngziskja41c00x1r06l3yidv7y0xw9djibhykzy0g")))))))
8989 (native-inputs
8990 `(("python" ,python-2)))
8991 (home-page "https://github.com/smithlabcode/piranha")
8992 (synopsis "Peak-caller for CLIP-seq and RIP-seq data")
8993 (description
8994 "Piranha is a peak-caller for genomic data produced by CLIP-seq and
8995 RIP-seq experiments. It takes input in BED or BAM format and identifies
8996 regions of statistically significant read enrichment. Additional covariates
8997 may optionally be provided to further inform the peak-calling process.")
8998 (license license:gpl3+))))
8999
9000 (define-public pepr
9001 (package
9002 (name "pepr")
9003 (version "1.0.9")
9004 (source (origin
9005 (method url-fetch)
9006 (uri (pypi-uri "PePr" version))
9007 (sha256
9008 (base32
9009 "0qxjfdpl1b1y53nccws2d85f6k74zwmx8y8sd9rszcqhfayx6gdx"))))
9010 (build-system python-build-system)
9011 (arguments
9012 `(#:python ,python-2 ; python2 only
9013 #:tests? #f)) ; no tests included
9014 (propagated-inputs
9015 `(("python2-numpy" ,python2-numpy)
9016 ("python2-scipy" ,python2-scipy)
9017 ("python2-pysam" ,python2-pysam)))
9018 (home-page "https://github.com/shawnzhangyx/PePr")
9019 (synopsis "Peak-calling and prioritization pipeline for ChIP-Seq data")
9020 (description
9021 "PePr is a ChIP-Seq peak calling or differential binding analysis tool
9022 that is primarily designed for data with biological replicates. It uses a
9023 negative binomial distribution to model the read counts among the samples in
9024 the same group, and look for consistent differences between ChIP and control
9025 group or two ChIP groups run under different conditions.")
9026 (license license:gpl3+)))
9027
9028 (define-public filevercmp
9029 (let ((commit "1a9b779b93d0b244040274794d402106907b71b7"))
9030 (package
9031 (name "filevercmp")
9032 (version (string-append "0-1." (string-take commit 7)))
9033 (source (origin
9034 (method git-fetch)
9035 (uri (git-reference
9036 (url "https://github.com/ekg/filevercmp.git")
9037 (commit commit)))
9038 (file-name (git-file-name name commit))
9039 (sha256
9040 (base32
9041 "1j9vxsy0y050v59h0q1d6501fcw1kjvj0d18l1xk2zyg0jzj247c"))))
9042 (build-system gnu-build-system)
9043 (arguments
9044 `(#:tests? #f ; There are no tests to run.
9045 #:phases
9046 (modify-phases %standard-phases
9047 (delete 'configure) ; There is no configure phase.
9048 (replace 'install
9049 (lambda* (#:key outputs #:allow-other-keys)
9050 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
9051 (install-file "filevercmp" bin)
9052 #t))))))
9053 (home-page "https://github.com/ekg/filevercmp")
9054 (synopsis "This program compares version strings")
9055 (description "This program compares version strings. It intends to be a
9056 replacement for strverscmp.")
9057 (license license:gpl3+))))
9058
9059 (define-public multiqc
9060 (package
9061 (name "multiqc")
9062 (version "1.5")
9063 (source
9064 (origin
9065 (method url-fetch)
9066 (uri (pypi-uri "multiqc" version))
9067 (sha256
9068 (base32
9069 "02iihfl0w0hpnr4pa0sbd1y9qxrg3ycyhjp5lidkcrqh1lmzs3zy"))))
9070 (build-system python-build-system)
9071 (propagated-inputs
9072 `(("python-jinja2" ,python-jinja2)
9073 ("python-simplejson" ,python-simplejson)
9074 ("python-pyyaml" ,python-pyyaml)
9075 ("python-click" ,python-click)
9076 ("python-spectra" ,python-spectra)
9077 ("python-requests" ,python-requests)
9078 ("python-markdown" ,python-markdown)
9079 ("python-lzstring" ,python-lzstring)
9080 ("python-matplotlib" ,python-matplotlib)
9081 ("python-numpy" ,python-numpy)
9082 ;; MultQC checks for the presence of nose at runtime.
9083 ("python-nose" ,python-nose)))
9084 (arguments
9085 `(#:phases
9086 (modify-phases %standard-phases
9087 (add-after 'unpack 'relax-requirements
9088 (lambda _
9089 (substitute* "setup.py"
9090 ;; MultiQC 1.5 ‘requires’ a version of python-matplotlib older
9091 ;; than the one in Guix, but should work fine with 2.2.2.
9092 ;; See <https://github.com/ewels/MultiQC/issues/725> and
9093 ;; <https://github.com/ewels/MultiQC/issues/732> for details.
9094 (("['\"]matplotlib.*?['\"]")
9095 "'matplotlib'"))
9096 #t)))))
9097 (home-page "https://multiqc.info")
9098 (synopsis "Aggregate bioinformatics analysis reports")
9099 (description
9100 "MultiQC is a tool to aggregate bioinformatics results across many
9101 samples into a single report. It contains modules for a large number of
9102 common bioinformatics tools.")
9103 (license license:gpl3+)))
9104
9105 (define-public r-chipseq
9106 (package
9107 (name "r-chipseq")
9108 (version "1.36.0")
9109 (source
9110 (origin
9111 (method url-fetch)
9112 (uri (bioconductor-uri "chipseq" version))
9113 (sha256
9114 (base32
9115 "1ln6bn08xig3j6ryak1xfkjhvpnlm2vf1czz9hlj6f02299nbs6l"))))
9116 (build-system r-build-system)
9117 (propagated-inputs
9118 `(("r-biocgenerics" ,r-biocgenerics)
9119 ("r-genomicranges" ,r-genomicranges)
9120 ("r-iranges" ,r-iranges)
9121 ("r-lattice" ,r-lattice)
9122 ("r-s4vectors" ,r-s4vectors)
9123 ("r-shortread" ,r-shortread)))
9124 (home-page "https://bioconductor.org/packages/chipseq")
9125 (synopsis "Package for analyzing ChIPseq data")
9126 (description
9127 "This package provides tools for processing short read data from ChIPseq
9128 experiments.")
9129 (license license:artistic2.0)))
9130
9131 (define-public r-copyhelper
9132 (package
9133 (name "r-copyhelper")
9134 (version "1.6.0")
9135 (source
9136 (origin
9137 (method url-fetch)
9138 (uri (string-append "https://bioconductor.org/packages/release/"
9139 "data/experiment/src/contrib/CopyhelpeR_"
9140 version ".tar.gz"))
9141 (sha256
9142 (base32
9143 "0x7cyynjmxls9as2gg0iyp9x5fpalxmdjq914ss7i84i9zyk5bhq"))))
9144 (properties `((upstream-name . "CopyhelpeR")))
9145 (build-system r-build-system)
9146 (home-page "https://bioconductor.org/packages/CopyhelpeR/")
9147 (synopsis "Helper files for CopywriteR")
9148 (description
9149 "This package contains the helper files that are required to run the
9150 Bioconductor package CopywriteR. It contains pre-assembled 1kb bin GC-content
9151 and mappability files for the reference genomes hg18, hg19, hg38, mm9 and
9152 mm10. In addition, it contains a blacklist filter to remove regions that
9153 display copy number variation. Files are stored as GRanges objects from the
9154 GenomicRanges Bioconductor package.")
9155 (license license:gpl2)))
9156
9157 (define-public r-copywriter
9158 (package
9159 (name "r-copywriter")
9160 (version "2.18.0")
9161 (source
9162 (origin
9163 (method url-fetch)
9164 (uri (bioconductor-uri "CopywriteR" version))
9165 (sha256
9166 (base32
9167 "0llg1zpxg7qnvja5f5w1z1xic0jdg6zc4mfn97h2sm44skxxcyl1"))))
9168 (properties `((upstream-name . "CopywriteR")))
9169 (build-system r-build-system)
9170 (propagated-inputs
9171 `(("r-biocparallel" ,r-biocparallel)
9172 ("r-chipseq" ,r-chipseq)
9173 ("r-copyhelper" ,r-copyhelper)
9174 ("r-data-table" ,r-data-table)
9175 ("r-dnacopy" ,r-dnacopy)
9176 ("r-futile-logger" ,r-futile-logger)
9177 ("r-genomeinfodb" ,r-genomeinfodb)
9178 ("r-genomicalignments" ,r-genomicalignments)
9179 ("r-genomicranges" ,r-genomicranges)
9180 ("r-gtools" ,r-gtools)
9181 ("r-iranges" ,r-iranges)
9182 ("r-matrixstats" ,r-matrixstats)
9183 ("r-rsamtools" ,r-rsamtools)
9184 ("r-s4vectors" ,r-s4vectors)))
9185 (home-page "https://github.com/PeeperLab/CopywriteR")
9186 (synopsis "Copy number information from targeted sequencing")
9187 (description
9188 "CopywriteR extracts DNA copy number information from targeted sequencing
9189 by utilizing off-target reads. It allows for extracting uniformly distributed
9190 copy number information, can be used without reference, and can be applied to
9191 sequencing data obtained from various techniques including chromatin
9192 immunoprecipitation and target enrichment on small gene panels. Thereby,
9193 CopywriteR constitutes a widely applicable alternative to available copy
9194 number detection tools.")
9195 (license license:gpl2)))
9196
9197 (define-public r-methylkit
9198 (package
9199 (name "r-methylkit")
9200 (version "1.12.0")
9201 (source (origin
9202 (method url-fetch)
9203 (uri (bioconductor-uri "methylKit" version))
9204 (sha256
9205 (base32
9206 "0klwc0sbmrxj1lxbz16pl39rxjm0pi57gjw547hlgnac1p9fspzy"))))
9207 (properties `((upstream-name . "methylKit")))
9208 (build-system r-build-system)
9209 (propagated-inputs
9210 `(("r-data-table" ,r-data-table)
9211 ("r-emdbook" ,r-emdbook)
9212 ("r-fastseg" ,r-fastseg)
9213 ("r-genomeinfodb" ,r-genomeinfodb)
9214 ("r-genomicranges" ,r-genomicranges)
9215 ("r-gtools" ,r-gtools)
9216 ("r-iranges" ,r-iranges)
9217 ("r-kernsmooth" ,r-kernsmooth)
9218 ("r-limma" ,r-limma)
9219 ("r-mclust" ,r-mclust)
9220 ("r-mgcv" ,r-mgcv)
9221 ("r-qvalue" ,r-qvalue)
9222 ("r-r-utils" ,r-r-utils)
9223 ("r-rcpp" ,r-rcpp)
9224 ("r-rhtslib" ,r-rhtslib)
9225 ("r-rsamtools" ,r-rsamtools)
9226 ("r-rtracklayer" ,r-rtracklayer)
9227 ("r-s4vectors" ,r-s4vectors)
9228 ("r-zlibbioc" ,r-zlibbioc)))
9229 (native-inputs
9230 `(("r-knitr" ,r-knitr))) ; for vignettes
9231 (inputs
9232 `(("zlib" ,zlib)))
9233 (home-page "https://github.com/al2na/methylKit")
9234 (synopsis
9235 "DNA methylation analysis from high-throughput bisulfite sequencing results")
9236 (description
9237 "MethylKit is an R package for DNA methylation analysis and annotation
9238 from high-throughput bisulfite sequencing. The package is designed to deal
9239 with sequencing data from @dfn{Reduced representation bisulfite
9240 sequencing} (RRBS) and its variants, but also target-capture methods and whole
9241 genome bisulfite sequencing. It also has functions to analyze base-pair
9242 resolution 5hmC data from experimental protocols such as oxBS-Seq and
9243 TAB-Seq.")
9244 (license license:artistic2.0)))
9245
9246 (define-public r-sva
9247 (package
9248 (name "r-sva")
9249 (version "3.34.0")
9250 (source
9251 (origin
9252 (method url-fetch)
9253 (uri (bioconductor-uri "sva" version))
9254 (sha256
9255 (base32
9256 "1bzms6idx30s4nxl610zwa8rjxsyxb5pf3vxsdfmxg8j4pab9gh1"))))
9257 (build-system r-build-system)
9258 (propagated-inputs
9259 `(("r-genefilter" ,r-genefilter)
9260 ("r-mgcv" ,r-mgcv)
9261 ("r-biocparallel" ,r-biocparallel)
9262 ("r-matrixstats" ,r-matrixstats)
9263 ("r-limma" ,r-limma)))
9264 (home-page "https://bioconductor.org/packages/sva")
9265 (synopsis "Surrogate variable analysis")
9266 (description
9267 "This package contains functions for removing batch effects and other
9268 unwanted variation in high-throughput experiment. It also contains functions
9269 for identifying and building surrogate variables for high-dimensional data
9270 sets. Surrogate variables are covariates constructed directly from
9271 high-dimensional data like gene expression/RNA sequencing/methylation/brain
9272 imaging data that can be used in subsequent analyses to adjust for unknown,
9273 unmodeled, or latent sources of noise.")
9274 (license license:artistic2.0)))
9275
9276 (define-public r-seqminer
9277 (package
9278 (name "r-seqminer")
9279 (version "8.0")
9280 (source
9281 (origin
9282 (method url-fetch)
9283 (uri (cran-uri "seqminer" version))
9284 (sha256
9285 (base32
9286 "00jzj8mwb0zaiwlifd41b26mrq9mzigj18nc29dydi0r42hxg16i"))))
9287 (build-system r-build-system)
9288 (inputs
9289 `(("zlib" ,zlib)))
9290 (home-page "http://seqminer.genomic.codes")
9291 (synopsis "Read nucleotide sequence data (VCF, BCF, and METAL formats)")
9292 (description
9293 "This package provides tools to integrate nucleotide sequencing
9294 data (variant call format, e.g. VCF or BCF) or meta-analysis results in R.")
9295 ;; Any version of the GPL is acceptable
9296 (license (list license:gpl2+ license:gpl3+))))
9297
9298 (define-public r-raremetals2
9299 (package
9300 (name "r-raremetals2")
9301 (version "0.1")
9302 (source
9303 (origin
9304 (method url-fetch)
9305 (uri (string-append "http://genome.sph.umich.edu/w/images/"
9306 "b/b7/RareMETALS2_" version ".tar.gz"))
9307 (sha256
9308 (base32
9309 "0z5ljcgvnm06ja9lm85a3cniq7slxcy37aqqkxrdidr79an5fs4s"))))
9310 (properties `((upstream-name . "RareMETALS2")))
9311 (build-system r-build-system)
9312 (propagated-inputs
9313 `(("r-seqminer" ,r-seqminer)
9314 ("r-mvtnorm" ,r-mvtnorm)
9315 ("r-mass" ,r-mass)
9316 ("r-compquadform" ,r-compquadform)
9317 ("r-getopt" ,r-getopt)))
9318 (home-page "http://genome.sph.umich.edu/wiki/RareMETALS2")
9319 (synopsis "Analyze gene-level association tests for binary trait")
9320 (description
9321 "The R package rareMETALS2 is an extension of the R package rareMETALS.
9322 It was designed to meta-analyze gene-level association tests for binary trait.
9323 While rareMETALS offers a near-complete solution for meta-analysis of
9324 gene-level tests for quantitative trait, it does not offer the optimal
9325 solution for binary trait. The package rareMETALS2 offers improved features
9326 for analyzing gene-level association tests in meta-analyses for binary
9327 trait.")
9328 (license license:gpl3)))
9329
9330 (define-public r-maldiquant
9331 (package
9332 (name "r-maldiquant")
9333 (version "1.19.3")
9334 (source
9335 (origin
9336 (method url-fetch)
9337 (uri (cran-uri "MALDIquant" version))
9338 (sha256
9339 (base32
9340 "0b7kdz3x4sdq413h1q09l1qhcvdnnwv6fqsqwllks1cd3xy34c57"))))
9341 (properties `((upstream-name . "MALDIquant")))
9342 (build-system r-build-system)
9343 (home-page "https://cran.r-project.org/web/packages/MALDIquant")
9344 (synopsis "Quantitative analysis of mass spectrometry data")
9345 (description
9346 "This package provides a complete analysis pipeline for matrix-assisted
9347 laser desorption/ionization-time-of-flight (MALDI-TOF) and other
9348 two-dimensional mass spectrometry data. In addition to commonly used plotting
9349 and processing methods it includes distinctive features, namely baseline
9350 subtraction methods such as morphological filters (TopHat) or the
9351 statistics-sensitive non-linear iterative peak-clipping algorithm (SNIP), peak
9352 alignment using warping functions, handling of replicated measurements as well
9353 as allowing spectra with different resolutions.")
9354 (license license:gpl3+)))
9355
9356 (define-public r-protgenerics
9357 (package
9358 (name "r-protgenerics")
9359 (version "1.18.0")
9360 (source
9361 (origin
9362 (method url-fetch)
9363 (uri (bioconductor-uri "ProtGenerics" version))
9364 (sha256
9365 (base32
9366 "1k1ggjgx2la8b21841a4ngkp6xfxwz0czv7x960r7i1jqif8y48z"))))
9367 (properties `((upstream-name . "ProtGenerics")))
9368 (build-system r-build-system)
9369 (home-page "https://github.com/lgatto/ProtGenerics")
9370 (synopsis "S4 generic functions for proteomics infrastructure")
9371 (description
9372 "This package provides S4 generic functions needed by Bioconductor
9373 proteomics packages.")
9374 (license license:artistic2.0)))
9375
9376 (define-public r-mzr
9377 (package
9378 (name "r-mzr")
9379 (version "2.20.0")
9380 (source
9381 (origin
9382 (method url-fetch)
9383 (uri (bioconductor-uri "mzR" version))
9384 (sha256
9385 (base32
9386 "1cwd7phlc5jbx6r6cznyfbdpvcin5fvsaasbbi65zn0s92a80r13"))
9387 (modules '((guix build utils)))
9388 (snippet
9389 '(begin
9390 (delete-file-recursively "src/boost")
9391 #t))))
9392 (properties `((upstream-name . "mzR")))
9393 (build-system r-build-system)
9394 (arguments
9395 `(#:phases
9396 (modify-phases %standard-phases
9397 (add-after 'unpack 'use-system-boost
9398 (lambda _
9399 (substitute* "src/Makevars"
9400 (("\\./boost/libs.*") "")
9401 (("ARCH_OBJS=" line)
9402 (string-append line
9403 "\nARCH_LIBS=-lboost_system -lboost_regex \
9404 -lboost_iostreams -lboost_thread -lboost_filesystem -lboost_chrono\n")))
9405 #t)))))
9406 (inputs
9407 `(;; Our default boost package won't work here, unfortunately, even with
9408 ;; mzR version 2.20.0.
9409 ("boost" ,boost-for-mysql) ; use this instead of the bundled boost sources
9410 ("zlib" ,zlib)))
9411 (propagated-inputs
9412 `(("r-biobase" ,r-biobase)
9413 ("r-biocgenerics" ,r-biocgenerics)
9414 ("r-ncdf4" ,r-ncdf4)
9415 ("r-protgenerics" ,r-protgenerics)
9416 ("r-rcpp" ,r-rcpp)
9417 ("r-rhdf5lib" ,r-rhdf5lib)
9418 ("r-zlibbioc" ,r-zlibbioc)))
9419 (home-page "https://github.com/sneumann/mzR/")
9420 (synopsis "Parser for mass spectrometry data files")
9421 (description
9422 "The mzR package provides a unified API to the common file formats and
9423 parsers available for mass spectrometry data. It comes with a wrapper for the
9424 ISB random access parser for mass spectrometry mzXML, mzData and mzML files.
9425 The package contains the original code written by the ISB, and a subset of the
9426 proteowizard library for mzML and mzIdentML. The netCDF reading code has
9427 previously been used in XCMS.")
9428 (license license:artistic2.0)))
9429
9430 (define-public r-affyio
9431 (package
9432 (name "r-affyio")
9433 (version "1.56.0")
9434 (source
9435 (origin
9436 (method url-fetch)
9437 (uri (bioconductor-uri "affyio" version))
9438 (sha256
9439 (base32
9440 "0sbkadxdlx7qzxc8z8iv90y6j9b2f62mk3i54dijjh56x3hjy3hb"))))
9441 (build-system r-build-system)
9442 (propagated-inputs
9443 `(("r-zlibbioc" ,r-zlibbioc)))
9444 (inputs
9445 `(("zlib" ,zlib)))
9446 (home-page "https://github.com/bmbolstad/affyio")
9447 (synopsis "Tools for parsing Affymetrix data files")
9448 (description
9449 "This package provides routines for parsing Affymetrix data files based
9450 upon file format information. The primary focus is on accessing the CEL and
9451 CDF file formats.")
9452 (license license:lgpl2.0+)))
9453
9454 (define-public r-affy
9455 (package
9456 (name "r-affy")
9457 (version "1.64.0")
9458 (source
9459 (origin
9460 (method url-fetch)
9461 (uri (bioconductor-uri "affy" version))
9462 (sha256
9463 (base32
9464 "131za66wbaz9y86gvjqcc2yd1f2ngl2b796xw726g75djhdgxgap"))))
9465 (build-system r-build-system)
9466 (propagated-inputs
9467 `(("r-affyio" ,r-affyio)
9468 ("r-biobase" ,r-biobase)
9469 ("r-biocgenerics" ,r-biocgenerics)
9470 ("r-biocmanager" ,r-biocmanager)
9471 ("r-preprocesscore" ,r-preprocesscore)
9472 ("r-zlibbioc" ,r-zlibbioc)))
9473 (inputs
9474 `(("zlib" ,zlib)))
9475 (home-page "https://bioconductor.org/packages/affy")
9476 (synopsis "Methods for affymetrix oligonucleotide arrays")
9477 (description
9478 "This package contains functions for exploratory oligonucleotide array
9479 analysis.")
9480 (license license:lgpl2.0+)))
9481
9482 (define-public r-vsn
9483 (package
9484 (name "r-vsn")
9485 (version "3.54.0")
9486 (source
9487 (origin
9488 (method url-fetch)
9489 (uri (bioconductor-uri "vsn" version))
9490 (sha256
9491 (base32
9492 "1naqzb2m0km8fzr6chf9z71sisrwviy1fdi9b3hn4i8p18b4kqzh"))))
9493 (build-system r-build-system)
9494 (propagated-inputs
9495 `(("r-affy" ,r-affy)
9496 ("r-biobase" ,r-biobase)
9497 ("r-ggplot2" ,r-ggplot2)
9498 ("r-lattice" ,r-lattice)
9499 ("r-limma" ,r-limma)))
9500 (native-inputs
9501 `(("r-knitr" ,r-knitr))) ; for vignettes
9502 (home-page "https://bioconductor.org/packages/release/bioc/html/vsn.html")
9503 (synopsis "Variance stabilization and calibration for microarray data")
9504 (description
9505 "The package implements a method for normalising microarray intensities,
9506 and works for single- and multiple-color arrays. It can also be used for data
9507 from other technologies, as long as they have similar format. The method uses
9508 a robust variant of the maximum-likelihood estimator for an
9509 additive-multiplicative error model and affine calibration. The model
9510 incorporates data calibration step (a.k.a. normalization), a model for the
9511 dependence of the variance on the mean intensity and a variance stabilizing
9512 data transformation. Differences between transformed intensities are
9513 analogous to \"normalized log-ratios\". However, in contrast to the latter,
9514 their variance is independent of the mean, and they are usually more sensitive
9515 and specific in detecting differential transcription.")
9516 (license license:artistic2.0)))
9517
9518 (define-public r-mzid
9519 (package
9520 (name "r-mzid")
9521 (version "1.24.0")
9522 (source
9523 (origin
9524 (method url-fetch)
9525 (uri (bioconductor-uri "mzID" version))
9526 (sha256
9527 (base32
9528 "1glcv096bn6pxlw89dlij1nzpwnjvrbxysvw2gm6qgm7rhxlaxrw"))))
9529 (properties `((upstream-name . "mzID")))
9530 (build-system r-build-system)
9531 (propagated-inputs
9532 `(("r-doparallel" ,r-doparallel)
9533 ("r-foreach" ,r-foreach)
9534 ("r-iterators" ,r-iterators)
9535 ("r-plyr" ,r-plyr)
9536 ("r-protgenerics" ,r-protgenerics)
9537 ("r-rcpp" ,r-rcpp)
9538 ("r-xml" ,r-xml)))
9539 (home-page "https://bioconductor.org/packages/mzID")
9540 (synopsis "Parser for mzIdentML files")
9541 (description
9542 "This package provides a parser for mzIdentML files implemented using the
9543 XML package. The parser tries to be general and able to handle all types of
9544 mzIdentML files with the drawback of having less pretty output than a vendor
9545 specific parser.")
9546 (license license:gpl2+)))
9547
9548 (define-public r-pcamethods
9549 (package
9550 (name "r-pcamethods")
9551 (version "1.78.0")
9552 (source
9553 (origin
9554 (method url-fetch)
9555 (uri (bioconductor-uri "pcaMethods" version))
9556 (sha256
9557 (base32
9558 "1wir67kfjm0m9gf0ki8qmvh45n4gx2k0wfl9pd1hp4g62fbrj1pj"))))
9559 (properties `((upstream-name . "pcaMethods")))
9560 (build-system r-build-system)
9561 (propagated-inputs
9562 `(("r-biobase" ,r-biobase)
9563 ("r-biocgenerics" ,r-biocgenerics)
9564 ("r-mass" ,r-mass)
9565 ("r-rcpp" ,r-rcpp)))
9566 (home-page "https://github.com/hredestig/pcamethods")
9567 (synopsis "Collection of PCA methods")
9568 (description
9569 "This package provides Bayesian PCA, Probabilistic PCA, Nipals PCA,
9570 Inverse Non-Linear PCA and the conventional SVD PCA. A cluster based method
9571 for missing value estimation is included for comparison. BPCA, PPCA and
9572 NipalsPCA may be used to perform PCA on incomplete data as well as for
9573 accurate missing value estimation. A set of methods for printing and plotting
9574 the results is also provided. All PCA methods make use of the same data
9575 structure (pcaRes) to provide a common interface to the PCA results.")
9576 (license license:gpl3+)))
9577
9578 (define-public r-msnbase
9579 (package
9580 (name "r-msnbase")
9581 (version "2.12.0")
9582 (source
9583 (origin
9584 (method url-fetch)
9585 (uri (bioconductor-uri "MSnbase" version))
9586 (sha256
9587 (base32
9588 "1z889xkfphqqmv31i8hh5xqyclv660ic26rfck5bjpgk3s2zzwi6"))))
9589 (properties `((upstream-name . "MSnbase")))
9590 (build-system r-build-system)
9591 (propagated-inputs
9592 `(("r-affy" ,r-affy)
9593 ("r-biobase" ,r-biobase)
9594 ("r-biocgenerics" ,r-biocgenerics)
9595 ("r-biocparallel" ,r-biocparallel)
9596 ("r-digest" ,r-digest)
9597 ("r-ggplot2" ,r-ggplot2)
9598 ("r-impute" ,r-impute)
9599 ("r-iranges" ,r-iranges)
9600 ("r-lattice" ,r-lattice)
9601 ("r-maldiquant" ,r-maldiquant)
9602 ("r-mass" ,r-mass)
9603 ("r-mzid" ,r-mzid)
9604 ("r-mzr" ,r-mzr)
9605 ("r-pcamethods" ,r-pcamethods)
9606 ("r-plyr" ,r-plyr)
9607 ("r-preprocesscore" ,r-preprocesscore)
9608 ("r-protgenerics" ,r-protgenerics)
9609 ("r-rcpp" ,r-rcpp)
9610 ("r-s4vectors" ,r-s4vectors)
9611 ("r-scales" ,r-scales)
9612 ("r-vsn" ,r-vsn)
9613 ("r-xml" ,r-xml)))
9614 (home-page "https://github.com/lgatto/MSnbase")
9615 (synopsis "Base functions and classes for MS-based proteomics")
9616 (description
9617 "This package provides basic plotting, data manipulation and processing
9618 of mass spectrometry based proteomics data.")
9619 (license license:artistic2.0)))
9620
9621 (define-public r-msnid
9622 (package
9623 (name "r-msnid")
9624 (version "1.20.0")
9625 (source
9626 (origin
9627 (method url-fetch)
9628 (uri (bioconductor-uri "MSnID" version))
9629 (sha256
9630 (base32
9631 "0m71f2y12hmwvng45kzz4r4qrgc2jbd7j9gprmw8y5laawpdaifg"))))
9632 (properties `((upstream-name . "MSnID")))
9633 (build-system r-build-system)
9634 (propagated-inputs
9635 `(("r-biobase" ,r-biobase)
9636 ("r-data-table" ,r-data-table)
9637 ("r-doparallel" ,r-doparallel)
9638 ("r-dplyr" ,r-dplyr)
9639 ("r-foreach" ,r-foreach)
9640 ("r-iterators" ,r-iterators)
9641 ("r-msnbase" ,r-msnbase)
9642 ("r-mzid" ,r-mzid)
9643 ("r-mzr" ,r-mzr)
9644 ("r-protgenerics" ,r-protgenerics)
9645 ("r-r-cache" ,r-r-cache)
9646 ("r-rcpp" ,r-rcpp)
9647 ("r-reshape2" ,r-reshape2)))
9648 (home-page "https://bioconductor.org/packages/MSnID")
9649 (synopsis "Utilities for LC-MSn proteomics identifications")
9650 (description
9651 "This package extracts @dfn{tandem mass spectrometry} (MS/MS) ID data
9652 from mzIdentML (leveraging the mzID package) or text files. After collating
9653 the search results from multiple datasets it assesses their identification
9654 quality and optimize filtering criteria to achieve the maximum number of
9655 identifications while not exceeding a specified false discovery rate. It also
9656 contains a number of utilities to explore the MS/MS results and assess missed
9657 and irregular enzymatic cleavages, mass measurement accuracy, etc.")
9658 (license license:artistic2.0)))
9659
9660 (define-public r-seurat
9661 (package
9662 (name "r-seurat")
9663 (version "3.1.4")
9664 (source (origin
9665 (method url-fetch)
9666 (uri (cran-uri "Seurat" version))
9667 (sha256
9668 (base32
9669 "0lhjbjhv1hnx5i3gkx41k68i8ykay3f24708h30wx9xywww9lsvi"))))
9670 (properties `((upstream-name . "Seurat")))
9671 (build-system r-build-system)
9672 (propagated-inputs
9673 `(("r-ape" ,r-ape)
9674 ("r-cluster" ,r-cluster)
9675 ("r-cowplot" ,r-cowplot)
9676 ("r-fitdistrplus" ,r-fitdistrplus)
9677 ("r-future" ,r-future)
9678 ("r-future-apply" ,r-future-apply)
9679 ("r-ggplot2" ,r-ggplot2)
9680 ("r-ggrepel" ,r-ggrepel)
9681 ("r-ggridges" ,r-ggridges)
9682 ("r-httr" ,r-httr)
9683 ("r-ica" ,r-ica)
9684 ("r-igraph" ,r-igraph)
9685 ("r-irlba" ,r-irlba)
9686 ("r-kernsmooth" ,r-kernsmooth)
9687 ("r-leiden" ,r-leiden)
9688 ("r-lmtest" ,r-lmtest)
9689 ("r-mass" ,r-mass)
9690 ("r-matrix" ,r-matrix)
9691 ("r-metap" ,r-metap)
9692 ("r-patchwork" ,r-patchwork)
9693 ("r-pbapply" ,r-pbapply)
9694 ("r-plotly" ,r-plotly)
9695 ("r-png" ,r-png)
9696 ("r-rann" ,r-rann)
9697 ("r-rcolorbrewer" ,r-rcolorbrewer)
9698 ("r-rcpp" ,r-rcpp)
9699 ("r-rcppannoy" ,r-rcppannoy)
9700 ("r-rcppeigen" ,r-rcppeigen)
9701 ("r-rcppprogress" ,r-rcppprogress)
9702 ("r-reticulate" ,r-reticulate)
9703 ("r-rlang" ,r-rlang)
9704 ("r-rocr" ,r-rocr)
9705 ("r-rsvd" ,r-rsvd)
9706 ("r-rtsne" ,r-rtsne)
9707 ("r-scales" ,r-scales)
9708 ("r-sctransform" ,r-sctransform)
9709 ("r-tsne" ,r-tsne)
9710 ("r-uwot" ,r-uwot)))
9711 (home-page "http://www.satijalab.org/seurat")
9712 (synopsis "Seurat is an R toolkit for single cell genomics")
9713 (description
9714 "This package is an R package designed for QC, analysis, and
9715 exploration of single cell RNA-seq data. It easily enables widely-used
9716 analytical techniques, including the identification of highly variable genes,
9717 dimensionality reduction; PCA, ICA, t-SNE, standard unsupervised clustering
9718 algorithms; density clustering, hierarchical clustering, k-means, and the
9719 discovery of differentially expressed genes and markers.")
9720 (license license:gpl3)))
9721
9722 (define-public r-aroma-light
9723 (package
9724 (name "r-aroma-light")
9725 (version "3.16.0")
9726 (source
9727 (origin
9728 (method url-fetch)
9729 (uri (bioconductor-uri "aroma.light" version))
9730 (sha256
9731 (base32
9732 "0cgdg650j4dl0b45pwaw49ib97dwjazrv9sqzkygrjmcnnfxry8x"))))
9733 (properties `((upstream-name . "aroma.light")))
9734 (build-system r-build-system)
9735 (propagated-inputs
9736 `(("r-matrixstats" ,r-matrixstats)
9737 ("r-r-methodss3" ,r-r-methodss3)
9738 ("r-r-oo" ,r-r-oo)
9739 ("r-r-utils" ,r-r-utils)))
9740 (home-page "https://github.com/HenrikBengtsson/aroma.light")
9741 (synopsis "Methods for normalization and visualization of microarray data")
9742 (description
9743 "This package provides methods for microarray analysis that take basic
9744 data types such as matrices and lists of vectors. These methods can be used
9745 standalone, be utilized in other packages, or be wrapped up in higher-level
9746 classes.")
9747 (license license:gpl2+)))
9748
9749 (define-public r-deseq
9750 (package
9751 (name "r-deseq")
9752 (version "1.38.0")
9753 (source
9754 (origin
9755 (method url-fetch)
9756 (uri (bioconductor-uri "DESeq" version))
9757 (sha256
9758 (base32
9759 "14pys93gsl50xmq5pc7pp1g20v3ywlg0yzkkhwb3kiy8573xn9nc"))))
9760 (properties `((upstream-name . "DESeq")))
9761 (build-system r-build-system)
9762 (propagated-inputs
9763 `(("r-biobase" ,r-biobase)
9764 ("r-biocgenerics" ,r-biocgenerics)
9765 ("r-genefilter" ,r-genefilter)
9766 ("r-geneplotter" ,r-geneplotter)
9767 ("r-lattice" ,r-lattice)
9768 ("r-locfit" ,r-locfit)
9769 ("r-mass" ,r-mass)
9770 ("r-rcolorbrewer" ,r-rcolorbrewer)))
9771 (home-page "https://www-huber.embl.de/users/anders/DESeq/")
9772 (synopsis "Differential gene expression analysis")
9773 (description
9774 "This package provides tools for estimating variance-mean dependence in
9775 count data from high-throughput genetic sequencing assays and for testing for
9776 differential expression based on a model using the negative binomial
9777 distribution.")
9778 (license license:gpl3+)))
9779
9780 (define-public r-edaseq
9781 (package
9782 (name "r-edaseq")
9783 (version "2.20.0")
9784 (source
9785 (origin
9786 (method url-fetch)
9787 (uri (bioconductor-uri "EDASeq" version))
9788 (sha256
9789 (base32
9790 "19mgzbv8yxgvw86wpq401l27q55ygawlngl775yavwccz1zbhjnj"))))
9791 (properties `((upstream-name . "EDASeq")))
9792 (build-system r-build-system)
9793 (propagated-inputs
9794 `(("r-annotationdbi" ,r-annotationdbi)
9795 ("r-aroma-light" ,r-aroma-light)
9796 ("r-biobase" ,r-biobase)
9797 ("r-biocgenerics" ,r-biocgenerics)
9798 ("r-biocmanager" ,r-biocmanager)
9799 ("r-biomart" ,r-biomart)
9800 ("r-biostrings" ,r-biostrings)
9801 ("r-deseq" ,r-deseq)
9802 ("r-genomicfeatures" ,r-genomicfeatures)
9803 ("r-genomicranges" ,r-genomicranges)
9804 ("r-iranges" ,r-iranges)
9805 ("r-rsamtools" ,r-rsamtools)
9806 ("r-shortread" ,r-shortread)))
9807 (home-page "https://github.com/drisso/EDASeq")
9808 (synopsis "Exploratory data analysis and normalization for RNA-Seq")
9809 (description
9810 "This package provides support for numerical and graphical summaries of
9811 RNA-Seq genomic read data. Provided within-lane normalization procedures to
9812 adjust for GC-content effect (or other gene-level effects) on read counts:
9813 loess robust local regression, global-scaling, and full-quantile
9814 normalization. Between-lane normalization procedures to adjust for
9815 distributional differences between lanes (e.g., sequencing depth):
9816 global-scaling and full-quantile normalization.")
9817 (license license:artistic2.0)))
9818
9819 (define-public r-interactivedisplaybase
9820 (package
9821 (name "r-interactivedisplaybase")
9822 (version "1.24.0")
9823 (source
9824 (origin
9825 (method url-fetch)
9826 (uri (bioconductor-uri "interactiveDisplayBase" version))
9827 (sha256
9828 (base32
9829 "0zwf3ma6wf4zypl6bgjp0n72k2hmp0g16gzl4v3y4157rxcbpl0n"))))
9830 (properties
9831 `((upstream-name . "interactiveDisplayBase")))
9832 (build-system r-build-system)
9833 (propagated-inputs
9834 `(("r-biocgenerics" ,r-biocgenerics)
9835 ("r-shiny" ,r-shiny)))
9836 (home-page "https://bioconductor.org/packages/interactiveDisplayBase")
9837 (synopsis "Base package for web displays of Bioconductor objects")
9838 (description
9839 "This package contains the basic methods needed to generate interactive
9840 Shiny-based display methods for Bioconductor objects.")
9841 (license license:artistic2.0)))
9842
9843 (define-public r-annotationhub
9844 (package
9845 (name "r-annotationhub")
9846 (version "2.18.0")
9847 (source
9848 (origin
9849 (method url-fetch)
9850 (uri (bioconductor-uri "AnnotationHub" version))
9851 (sha256
9852 (base32
9853 "19vj3bk8jz68q84g3j8xs1s9bqz90lbwbciig1h45zvn2zc6087m"))))
9854 (properties `((upstream-name . "AnnotationHub")))
9855 (build-system r-build-system)
9856 (propagated-inputs
9857 `(("r-annotationdbi" ,r-annotationdbi)
9858 ("r-biocfilecache" ,r-biocfilecache)
9859 ("r-biocgenerics" ,r-biocgenerics)
9860 ("r-biocmanager" ,r-biocmanager)
9861 ("r-biocversion" ,r-biocversion)
9862 ("r-curl" ,r-curl)
9863 ("r-dplyr" ,r-dplyr)
9864 ("r-httr" ,r-httr)
9865 ("r-interactivedisplaybase" ,r-interactivedisplaybase)
9866 ("r-rappdirs" ,r-rappdirs)
9867 ("r-rsqlite" ,r-rsqlite)
9868 ("r-s4vectors" ,r-s4vectors)
9869 ("r-yaml" ,r-yaml)))
9870 (home-page "https://bioconductor.org/packages/AnnotationHub")
9871 (synopsis "Client to access AnnotationHub resources")
9872 (description
9873 "This package provides a client for the Bioconductor AnnotationHub web
9874 resource. The AnnotationHub web resource provides a central location where
9875 genomic files (e.g. VCF, bed, wig) and other resources from standard
9876 locations (e.g. UCSC, Ensembl) can be discovered. The resource includes
9877 metadata about each resource, e.g., a textual description, tags, and date of
9878 modification. The client creates and manages a local cache of files retrieved
9879 by the user, helping with quick and reproducible access.")
9880 (license license:artistic2.0)))
9881
9882 (define-public r-fastseg
9883 (package
9884 (name "r-fastseg")
9885 (version "1.32.0")
9886 (source
9887 (origin
9888 (method url-fetch)
9889 (uri (bioconductor-uri "fastseg" version))
9890 (sha256
9891 (base32
9892 "1cys6frmbizc8bf933mwvvnr31sfya9ahcc0wm66pbd1x3mygkmk"))))
9893 (build-system r-build-system)
9894 (propagated-inputs
9895 `(("r-biobase" ,r-biobase)
9896 ("r-biocgenerics" ,r-biocgenerics)
9897 ("r-genomicranges" ,r-genomicranges)
9898 ("r-iranges" ,r-iranges)
9899 ("r-s4vectors" ,r-s4vectors)))
9900 (home-page "https://www.bioinf.jku.at/software/fastseg/index.html")
9901 (synopsis "Fast segmentation algorithm for genetic sequencing data")
9902 (description
9903 "Fastseg implements a very fast and efficient segmentation algorithm.
9904 It can segment data from DNA microarrays and data from next generation
9905 sequencing for example to detect copy number segments. Further it can segment
9906 data from RNA microarrays like tiling arrays to identify transcripts. Most
9907 generally, it can segment data given as a matrix or as a vector. Various data
9908 formats can be used as input to fastseg like expression set objects for
9909 microarrays or GRanges for sequencing data.")
9910 (license license:lgpl2.0+)))
9911
9912 (define-public r-keggrest
9913 (package
9914 (name "r-keggrest")
9915 (version "1.26.1")
9916 (source
9917 (origin
9918 (method url-fetch)
9919 (uri (bioconductor-uri "KEGGREST" version))
9920 (sha256
9921 (base32
9922 "1cgjvv9n88y3ah21356mh8z2l08vjn42hjy8hcljsibknzc4v247"))))
9923 (properties `((upstream-name . "KEGGREST")))
9924 (build-system r-build-system)
9925 (propagated-inputs
9926 `(("r-biostrings" ,r-biostrings)
9927 ("r-httr" ,r-httr)
9928 ("r-png" ,r-png)))
9929 (home-page "https://bioconductor.org/packages/KEGGREST")
9930 (synopsis "Client-side REST access to KEGG")
9931 (description
9932 "This package provides a package that provides a client interface to the
9933 @dfn{Kyoto Encyclopedia of Genes and Genomes} (KEGG) REST server.")
9934 (license license:artistic2.0)))
9935
9936 (define-public r-gage
9937 (package
9938 (name "r-gage")
9939 (version "2.36.0")
9940 (source
9941 (origin
9942 (method url-fetch)
9943 (uri (bioconductor-uri "gage" version))
9944 (sha256
9945 (base32
9946 "1qxfmg0id19iy3ia8h5nrvk3d1azqb28kl7m08i23654wb6b45c6"))))
9947 (build-system r-build-system)
9948 (propagated-inputs
9949 `(("r-annotationdbi" ,r-annotationdbi)
9950 ("r-graph" ,r-graph)
9951 ("r-keggrest" ,r-keggrest)))
9952 (home-page (string-append "https://bmcbioinformatics.biomedcentral.com/"
9953 "articles/10.1186/1471-2105-10-161"))
9954 (synopsis "Generally applicable gene-set enrichment for pathway analysis")
9955 (description
9956 "GAGE is a published method for gene set (enrichment or GSEA) or pathway
9957 analysis. GAGE is generally applicable independent of microarray or RNA-Seq
9958 data attributes including sample sizes, experimental designs, assay platforms,
9959 and other types of heterogeneity. The gage package provides functions for
9960 basic GAGE analysis, result processing and presentation. In addition, it
9961 provides demo microarray data and commonly used gene set data based on KEGG
9962 pathways and GO terms. These functions and data are also useful for gene set
9963 analysis using other methods.")
9964 (license license:gpl2+)))
9965
9966 (define-public r-genomicfiles
9967 (package
9968 (name "r-genomicfiles")
9969 (version "1.22.0")
9970 (source
9971 (origin
9972 (method url-fetch)
9973 (uri (bioconductor-uri "GenomicFiles" version))
9974 (sha256
9975 (base32
9976 "1x6q827ms2l5lwzha1vsgfrshh35n9f19jq57xagrqlafxgpz86s"))))
9977 (properties `((upstream-name . "GenomicFiles")))
9978 (build-system r-build-system)
9979 (propagated-inputs
9980 `(("r-biocgenerics" ,r-biocgenerics)
9981 ("r-biocparallel" ,r-biocparallel)
9982 ("r-genomeinfodb" ,r-genomeinfodb)
9983 ("r-genomicalignments" ,r-genomicalignments)
9984 ("r-genomicranges" ,r-genomicranges)
9985 ("r-iranges" ,r-iranges)
9986 ("r-rsamtools" ,r-rsamtools)
9987 ("r-rtracklayer" ,r-rtracklayer)
9988 ("r-s4vectors" ,r-s4vectors)
9989 ("r-summarizedexperiment" ,r-summarizedexperiment)
9990 ("r-variantannotation" ,r-variantannotation)))
9991 (home-page "https://bioconductor.org/packages/GenomicFiles")
9992 (synopsis "Distributed computing by file or by range")
9993 (description
9994 "This package provides infrastructure for parallel computations
9995 distributed by file or by range. User defined mapper and reducer functions
9996 provide added flexibility for data combination and manipulation.")
9997 (license license:artistic2.0)))
9998
9999 (define-public r-complexheatmap
10000 (package
10001 (name "r-complexheatmap")
10002 (version "2.2.0")
10003 (source
10004 (origin
10005 (method url-fetch)
10006 (uri (bioconductor-uri "ComplexHeatmap" version))
10007 (sha256
10008 (base32
10009 "1pj6a6rmqckk033pkklk6hr4066rzavamy6w194rfdhind90rk0p"))))
10010 (properties
10011 `((upstream-name . "ComplexHeatmap")))
10012 (build-system r-build-system)
10013 (propagated-inputs
10014 `(("r-circlize" ,r-circlize)
10015 ("r-clue" ,r-clue)
10016 ("r-colorspace" ,r-colorspace)
10017 ("r-getoptlong" ,r-getoptlong)
10018 ("r-globaloptions" ,r-globaloptions)
10019 ("r-png" ,r-png)
10020 ("r-rcolorbrewer" ,r-rcolorbrewer)))
10021 (home-page
10022 "https://github.com/jokergoo/ComplexHeatmap")
10023 (synopsis "Making Complex Heatmaps")
10024 (description
10025 "Complex heatmaps are efficient to visualize associations between
10026 different sources of data sets and reveal potential structures. This package
10027 provides a highly flexible way to arrange multiple heatmaps and supports
10028 self-defined annotation graphics.")
10029 (license license:gpl2+)))
10030
10031 (define-public r-dirichletmultinomial
10032 (package
10033 (name "r-dirichletmultinomial")
10034 (version "1.28.0")
10035 (source
10036 (origin
10037 (method url-fetch)
10038 (uri (bioconductor-uri "DirichletMultinomial" version))
10039 (sha256
10040 (base32
10041 "0knmncmkkf2ypyqfcl5s8nmyyf9nrzkqprzn9w3w8182c0v49r0s"))))
10042 (properties
10043 `((upstream-name . "DirichletMultinomial")))
10044 (build-system r-build-system)
10045 (inputs
10046 `(("gsl" ,gsl)))
10047 (propagated-inputs
10048 `(("r-biocgenerics" ,r-biocgenerics)
10049 ("r-iranges" ,r-iranges)
10050 ("r-s4vectors" ,r-s4vectors)))
10051 (home-page "https://bioconductor.org/packages/DirichletMultinomial")
10052 (synopsis "Dirichlet-Multinomial mixture models for microbiome data")
10053 (description
10054 "Dirichlet-multinomial mixture models can be used to describe variability
10055 in microbial metagenomic data. This package is an interface to code
10056 originally made available by Holmes, Harris, and Quince, 2012, PLoS ONE 7(2):
10057 1-15.")
10058 (license license:lgpl3)))
10059
10060 (define-public r-ensembldb
10061 (package
10062 (name "r-ensembldb")
10063 (version "2.10.2")
10064 (source
10065 (origin
10066 (method url-fetch)
10067 (uri (bioconductor-uri "ensembldb" version))
10068 (sha256
10069 (base32
10070 "02lnpyp85zchmz404hr5381zmihvq4x9zgxdrbn2afi352vg0vab"))))
10071 (build-system r-build-system)
10072 (propagated-inputs
10073 `(("r-annotationdbi" ,r-annotationdbi)
10074 ("r-annotationfilter" ,r-annotationfilter)
10075 ("r-biobase" ,r-biobase)
10076 ("r-biocgenerics" ,r-biocgenerics)
10077 ("r-biostrings" ,r-biostrings)
10078 ("r-curl" ,r-curl)
10079 ("r-dbi" ,r-dbi)
10080 ("r-genomeinfodb" ,r-genomeinfodb)
10081 ("r-genomicfeatures" ,r-genomicfeatures)
10082 ("r-genomicranges" ,r-genomicranges)
10083 ("r-iranges" ,r-iranges)
10084 ("r-protgenerics" ,r-protgenerics)
10085 ("r-rsamtools" ,r-rsamtools)
10086 ("r-rsqlite" ,r-rsqlite)
10087 ("r-rtracklayer" ,r-rtracklayer)
10088 ("r-s4vectors" ,r-s4vectors)))
10089 (home-page "https://github.com/jotsetung/ensembldb")
10090 (synopsis "Utilities to create and use Ensembl-based annotation databases")
10091 (description
10092 "The package provides functions to create and use transcript-centric
10093 annotation databases/packages. The annotation for the databases are directly
10094 fetched from Ensembl using their Perl API. The functionality and data is
10095 similar to that of the TxDb packages from the @code{GenomicFeatures} package,
10096 but, in addition to retrieve all gene/transcript models and annotations from
10097 the database, the @code{ensembldb} package also provides a filter framework
10098 allowing to retrieve annotations for specific entries like genes encoded on a
10099 chromosome region or transcript models of lincRNA genes.")
10100 ;; No version specified
10101 (license license:lgpl3+)))
10102
10103 (define-public r-organismdbi
10104 (package
10105 (name "r-organismdbi")
10106 (version "1.28.0")
10107 (source
10108 (origin
10109 (method url-fetch)
10110 (uri (bioconductor-uri "OrganismDbi" version))
10111 (sha256
10112 (base32
10113 "1bvfyh733mhka9zd00hrzpalgjs255c2blnxyf60ipzk5jg7yllb"))))
10114 (properties `((upstream-name . "OrganismDbi")))
10115 (build-system r-build-system)
10116 (propagated-inputs
10117 `(("r-annotationdbi" ,r-annotationdbi)
10118 ("r-biobase" ,r-biobase)
10119 ("r-biocgenerics" ,r-biocgenerics)
10120 ("r-biocmanager" ,r-biocmanager)
10121 ("r-dbi" ,r-dbi)
10122 ("r-genomicfeatures" ,r-genomicfeatures)
10123 ("r-genomicranges" ,r-genomicranges)
10124 ("r-graph" ,r-graph)
10125 ("r-iranges" ,r-iranges)
10126 ("r-rbgl" ,r-rbgl)
10127 ("r-s4vectors" ,r-s4vectors)))
10128 (home-page "https://bioconductor.org/packages/OrganismDbi")
10129 (synopsis "Software to enable the smooth interfacing of database packages")
10130 (description "The package enables a simple unified interface to several
10131 annotation packages each of which has its own schema by taking advantage of
10132 the fact that each of these packages implements a select methods.")
10133 (license license:artistic2.0)))
10134
10135 (define-public r-biovizbase
10136 (package
10137 (name "r-biovizbase")
10138 (version "1.34.1")
10139 (source
10140 (origin
10141 (method url-fetch)
10142 (uri (bioconductor-uri "biovizBase" version))
10143 (sha256
10144 (base32
10145 "04vvj907bgs67w8rb7n1haf80p6cd0qj5fdxw0dwryb455y35vir"))))
10146 (properties `((upstream-name . "biovizBase")))
10147 (build-system r-build-system)
10148 (propagated-inputs
10149 `(("r-annotationdbi" ,r-annotationdbi)
10150 ("r-annotationfilter" ,r-annotationfilter)
10151 ("r-biocgenerics" ,r-biocgenerics)
10152 ("r-biostrings" ,r-biostrings)
10153 ("r-dichromat" ,r-dichromat)
10154 ("r-ensembldb" ,r-ensembldb)
10155 ("r-genomeinfodb" ,r-genomeinfodb)
10156 ("r-genomicalignments" ,r-genomicalignments)
10157 ("r-genomicfeatures" ,r-genomicfeatures)
10158 ("r-genomicranges" ,r-genomicranges)
10159 ("r-hmisc" ,r-hmisc)
10160 ("r-iranges" ,r-iranges)
10161 ("r-rcolorbrewer" ,r-rcolorbrewer)
10162 ("r-rlang" ,r-rlang)
10163 ("r-rsamtools" ,r-rsamtools)
10164 ("r-s4vectors" ,r-s4vectors)
10165 ("r-scales" ,r-scales)
10166 ("r-summarizedexperiment" ,r-summarizedexperiment)
10167 ("r-variantannotation" ,r-variantannotation)))
10168 (home-page "https://bioconductor.org/packages/biovizBase")
10169 (synopsis "Basic graphic utilities for visualization of genomic data")
10170 (description
10171 "The biovizBase package is designed to provide a set of utilities, color
10172 schemes and conventions for genomic data. It serves as the base for various
10173 high-level packages for biological data visualization. This saves development
10174 effort and encourages consistency.")
10175 (license license:artistic2.0)))
10176
10177 (define-public r-ggbio
10178 (package
10179 (name "r-ggbio")
10180 (version "1.34.0")
10181 (source
10182 (origin
10183 (method url-fetch)
10184 (uri (bioconductor-uri "ggbio" version))
10185 (sha256
10186 (base32
10187 "13wzwh40anh8l53yp19bg4w5cpxykcaf228dc8cxvjndyib711qb"))))
10188 (build-system r-build-system)
10189 (arguments
10190 `(#:phases
10191 (modify-phases %standard-phases
10192 ;; See https://github.com/tengfei/ggbio/issues/117
10193 ;; This fix will be included in the next release.
10194 (add-after 'unpack 'fix-typo
10195 (lambda _
10196 (substitute* "R/GGbio-class.R"
10197 (("fechable") "fetchable"))
10198 #t)))))
10199 (propagated-inputs
10200 `(("r-annotationdbi" ,r-annotationdbi)
10201 ("r-annotationfilter" ,r-annotationfilter)
10202 ("r-biobase" ,r-biobase)
10203 ("r-biocgenerics" ,r-biocgenerics)
10204 ("r-biostrings" ,r-biostrings)
10205 ("r-biovizbase" ,r-biovizbase)
10206 ("r-bsgenome" ,r-bsgenome)
10207 ("r-ensembldb" ,r-ensembldb)
10208 ("r-genomeinfodb" ,r-genomeinfodb)
10209 ("r-genomicalignments" ,r-genomicalignments)
10210 ("r-genomicfeatures" ,r-genomicfeatures)
10211 ("r-genomicranges" ,r-genomicranges)
10212 ("r-ggally" ,r-ggally)
10213 ("r-ggplot2" ,r-ggplot2)
10214 ("r-gridextra" ,r-gridextra)
10215 ("r-gtable" ,r-gtable)
10216 ("r-hmisc" ,r-hmisc)
10217 ("r-iranges" ,r-iranges)
10218 ("r-organismdbi" ,r-organismdbi)
10219 ("r-reshape2" ,r-reshape2)
10220 ("r-rlang" ,r-rlang)
10221 ("r-rsamtools" ,r-rsamtools)
10222 ("r-rtracklayer" ,r-rtracklayer)
10223 ("r-s4vectors" ,r-s4vectors)
10224 ("r-scales" ,r-scales)
10225 ("r-summarizedexperiment" ,r-summarizedexperiment)
10226 ("r-variantannotation" ,r-variantannotation)))
10227 (home-page "http://www.tengfei.name/ggbio/")
10228 (synopsis "Visualization tools for genomic data")
10229 (description
10230 "The ggbio package extends and specializes the grammar of graphics for
10231 biological data. The graphics are designed to answer common scientific
10232 questions, in particular those often asked of high throughput genomics data.
10233 All core Bioconductor data structures are supported, where appropriate. The
10234 package supports detailed views of particular genomic regions, as well as
10235 genome-wide overviews. Supported overviews include ideograms and grand linear
10236 views. High-level plots include sequence fragment length, edge-linked
10237 interval to data view, mismatch pileup, and several splicing summaries.")
10238 (license license:artistic2.0)))
10239
10240 (define-public r-gprofiler
10241 (package
10242 (name "r-gprofiler")
10243 (version "0.7.0")
10244 (source
10245 (origin
10246 (method url-fetch)
10247 (uri (cran-uri "gProfileR" version))
10248 (sha256
10249 (base32
10250 "1h1v0kgpsn04ald2izznh7fr2riwisj5hcgz4k7h3qc931rf0r4k"))))
10251 (properties `((upstream-name . "gProfileR")))
10252 (build-system r-build-system)
10253 (propagated-inputs
10254 `(("r-plyr" ,r-plyr)
10255 ("r-rcurl" ,r-rcurl)))
10256 (home-page "https://cran.r-project.org/web/packages/gProfileR/")
10257 (synopsis "Interface to the g:Profiler toolkit")
10258 (description
10259 "This package provides tools for functional enrichment analysis,
10260 gene identifier conversion and mapping homologous genes across related
10261 organisms via the @code{g:Profiler} toolkit.")
10262 (license license:gpl2+)))
10263
10264 (define-public r-gqtlbase
10265 (package
10266 (name "r-gqtlbase")
10267 (version "1.18.0")
10268 (source
10269 (origin
10270 (method url-fetch)
10271 (uri (bioconductor-uri "gQTLBase" version))
10272 (sha256
10273 (base32
10274 "1qr8dqjbmj1mdjbzbnxwzfrm8f02wqfsgic8ws5kv7pmsby63y4x"))))
10275 (properties `((upstream-name . "gQTLBase")))
10276 (build-system r-build-system)
10277 (propagated-inputs
10278 `(("r-batchjobs" ,r-batchjobs)
10279 ("r-bbmisc" ,r-bbmisc)
10280 ("r-biocgenerics" ,r-biocgenerics)
10281 ("r-bit" ,r-bit)
10282 ("r-doparallel" ,r-doparallel)
10283 ("r-ff" ,r-ff)
10284 ("r-ffbase" ,r-ffbase)
10285 ("r-foreach" ,r-foreach)
10286 ("r-genomicfiles" ,r-genomicfiles)
10287 ("r-genomicranges" ,r-genomicranges)
10288 ("r-rtracklayer" ,r-rtracklayer)
10289 ("r-s4vectors" ,r-s4vectors)
10290 ("r-summarizedexperiment" ,r-summarizedexperiment)))
10291 (home-page "https://bioconductor.org/packages/gQTLBase")
10292 (synopsis "Infrastructure for eQTL, mQTL and similar studies")
10293 (description
10294 "The purpose of this package is to simplify the storage and interrogation
10295 of @dfn{quantitative trait loci} (QTL) archives, such as eQTL, mQTL, dsQTL,
10296 and more.")
10297 (license license:artistic2.0)))
10298
10299 (define-public r-snpstats
10300 (package
10301 (name "r-snpstats")
10302 (version "1.36.0")
10303 (source
10304 (origin
10305 (method url-fetch)
10306 (uri (bioconductor-uri "snpStats" version))
10307 (sha256
10308 (base32
10309 "1xq1rjljg70h5mshdza56dis0iv1a20sivs6dav3w5jbdd1l5qkh"))))
10310 (properties `((upstream-name . "snpStats")))
10311 (build-system r-build-system)
10312 (inputs `(("zlib" ,zlib)))
10313 (propagated-inputs
10314 `(("r-biocgenerics" ,r-biocgenerics)
10315 ("r-matrix" ,r-matrix)
10316 ("r-survival" ,r-survival)
10317 ("r-zlibbioc" ,r-zlibbioc)))
10318 (home-page "https://bioconductor.org/packages/snpStats")
10319 (synopsis "Methods for SNP association studies")
10320 (description
10321 "This package provides classes and statistical methods for large
10322 @dfn{single-nucleotide polymorphism} (SNP) association studies. This extends
10323 the earlier snpMatrix package, allowing for uncertainty in genotypes.")
10324 (license license:gpl3)))
10325
10326 (define-public r-homo-sapiens
10327 (package
10328 (name "r-homo-sapiens")
10329 (version "1.3.1")
10330 (source (origin
10331 (method url-fetch)
10332 ;; We cannot use bioconductor-uri here because this tarball is
10333 ;; located under "data/annotation/" instead of "bioc/".
10334 (uri (string-append "https://www.bioconductor.org/packages/"
10335 "release/data/annotation/src/contrib/"
10336 "Homo.sapiens_"
10337 version ".tar.gz"))
10338 (sha256
10339 (base32
10340 "151vj7h5p1c8yd5swrchk46z469p135wk50hvkl0nhgndvy0jj01"))))
10341 (properties
10342 `((upstream-name . "Homo.sapiens")))
10343 (build-system r-build-system)
10344 (propagated-inputs
10345 `(("r-genomicfeatures" ,r-genomicfeatures)
10346 ("r-go-db" ,r-go-db)
10347 ("r-org-hs-eg-db" ,r-org-hs-eg-db)
10348 ("r-txdb-hsapiens-ucsc-hg19-knowngene" ,r-txdb-hsapiens-ucsc-hg19-knowngene)
10349 ("r-organismdbi" ,r-organismdbi)
10350 ("r-annotationdbi" ,r-annotationdbi)))
10351 (home-page "https://bioconductor.org/packages/Homo.sapiens/")
10352 (synopsis "Annotation package for the Homo.sapiens object")
10353 (description
10354 "This package contains the Homo.sapiens object to access data from
10355 several related annotation packages.")
10356 (license license:artistic2.0)))
10357
10358 (define-public r-erma
10359 (package
10360 (name "r-erma")
10361 (version "1.2.0")
10362 (source
10363 (origin
10364 (method url-fetch)
10365 (uri (bioconductor-uri "erma" version))
10366 (sha256
10367 (base32
10368 "085qsr73p8nyp435f15l4l1jkfd64bfd9gl4z496nfxdnqn95srz"))))
10369 (build-system r-build-system)
10370 (propagated-inputs
10371 `(("r-annotationdbi" ,r-annotationdbi)
10372 ("r-biobase" ,r-biobase)
10373 ("r-biocgenerics" ,r-biocgenerics)
10374 ("r-biocparallel" ,r-biocparallel)
10375 ("r-genomeinfodb" ,r-genomeinfodb)
10376 ("r-genomicfiles" ,r-genomicfiles)
10377 ("r-genomicranges" ,r-genomicranges)
10378 ("r-ggplot2" ,r-ggplot2)
10379 ("r-homo-sapiens" ,r-homo-sapiens)
10380 ("r-iranges" ,r-iranges)
10381 ("r-rtracklayer" ,r-rtracklayer)
10382 ("r-s4vectors" ,r-s4vectors)
10383 ("r-shiny" ,r-shiny)
10384 ("r-summarizedexperiment" ,r-summarizedexperiment)))
10385 (home-page "https://bioconductor.org/packages/erma")
10386 (synopsis "Epigenomic road map adventures")
10387 (description
10388 "The epigenomics road map describes locations of epigenetic marks in DNA
10389 from a variety of cell types. Of interest are locations of histone
10390 modifications, sites of DNA methylation, and regions of accessible chromatin.
10391 This package presents a selection of elements of the road map including
10392 metadata and outputs of the ChromImpute procedure applied to ENCODE cell lines
10393 by Ernst and Kellis.")
10394 (license license:artistic2.0)))
10395
10396 (define-public r-ldblock
10397 (package
10398 (name "r-ldblock")
10399 (version "1.16.0")
10400 (source
10401 (origin
10402 (method url-fetch)
10403 (uri (bioconductor-uri "ldblock" version))
10404 (sha256
10405 (base32
10406 "0xpigfidmylfawy6vzshqnsw1lzjs4qms8q7zffij6bkvkv7920x"))))
10407 (build-system r-build-system)
10408 (propagated-inputs
10409 `(("r-biocgenerics" ,r-biocgenerics)
10410 ("r-ensdb-hsapiens-v75" ,r-ensdb-hsapiens-v75)
10411 ("r-ensembldb" ,r-ensembldb)
10412 ("r-genomeinfodb" ,r-genomeinfodb)
10413 ("r-genomicfiles" ,r-genomicfiles)
10414 ("r-httr" ,r-httr)
10415 ("r-matrix" ,r-matrix)
10416 ("r-rsamtools" ,r-rsamtools)
10417 ("r-snpstats" ,r-snpstats)
10418 ("r-variantannotation" ,r-variantannotation)))
10419 (home-page "https://bioconductor.org/packages/ldblock")
10420 (synopsis "Data structures for linkage disequilibrium measures in populations")
10421 (description
10422 "This package defines data structures for @dfn{linkage
10423 disequilibrium} (LD) measures in populations. Its purpose is to simplify
10424 handling of existing population-level data for the purpose of flexibly
10425 defining LD blocks.")
10426 (license license:artistic2.0)))
10427
10428 (define-public r-gqtlstats
10429 (package
10430 (name "r-gqtlstats")
10431 (version "1.18.0")
10432 (source
10433 (origin
10434 (method url-fetch)
10435 (uri (bioconductor-uri "gQTLstats" version))
10436 (sha256
10437 (base32
10438 "1dly4p9r4231hf31xg1nzqiyvjbcfjljfmhb88ic1jxwnvniyv2f"))))
10439 (properties `((upstream-name . "gQTLstats")))
10440 (build-system r-build-system)
10441 (propagated-inputs
10442 `(("r-annotationdbi" ,r-annotationdbi)
10443 ("r-batchjobs" ,r-batchjobs)
10444 ("r-bbmisc" ,r-bbmisc)
10445 ("r-beeswarm" ,r-beeswarm)
10446 ("r-biobase" ,r-biobase)
10447 ("r-biocgenerics" ,r-biocgenerics)
10448 ("r-doparallel" ,r-doparallel)
10449 ("r-dplyr" ,r-dplyr)
10450 ("r-erma" ,r-erma)
10451 ("r-ffbase" ,r-ffbase)
10452 ("r-foreach" ,r-foreach)
10453 ("r-genomeinfodb" ,r-genomeinfodb)
10454 ("r-genomicfeatures" ,r-genomicfeatures)
10455 ("r-genomicfiles" ,r-genomicfiles)
10456 ("r-genomicranges" ,r-genomicranges)
10457 ("r-ggbeeswarm" ,r-ggbeeswarm)
10458 ("r-ggplot2" ,r-ggplot2)
10459 ("r-gqtlbase" ,r-gqtlbase)
10460 ("r-hardyweinberg" ,r-hardyweinberg)
10461 ("r-homo-sapiens" ,r-homo-sapiens)
10462 ("r-iranges" ,r-iranges)
10463 ("r-limma" ,r-limma)
10464 ("r-mgcv" ,r-mgcv)
10465 ("r-plotly" ,r-plotly)
10466 ("r-reshape2" ,r-reshape2)
10467 ("r-s4vectors" ,r-s4vectors)
10468 ("r-shiny" ,r-shiny)
10469 ("r-snpstats" ,r-snpstats)
10470 ("r-summarizedexperiment" ,r-summarizedexperiment)
10471 ("r-variantannotation" ,r-variantannotation)))
10472 (home-page "https://bioconductor.org/packages/gQTLstats")
10473 (synopsis "Computationally efficient analysis for eQTL and allied studies")
10474 (description
10475 "This package provides tools for the computationally efficient analysis
10476 of @dfn{quantitative trait loci} (QTL) data, including eQTL, mQTL, dsQTL, etc.
10477 The software in this package aims to support refinements and functional
10478 interpretation of members of a collection of association statistics on a
10479 family of feature/genome hypotheses.")
10480 (license license:artistic2.0)))
10481
10482 (define-public r-gviz
10483 (package
10484 (name "r-gviz")
10485 (version "1.30.3")
10486 (source
10487 (origin
10488 (method url-fetch)
10489 (uri (bioconductor-uri "Gviz" version))
10490 (sha256
10491 (base32
10492 "0c9i26h5czm60n1bxzmdxxpywcj0sig6wcj913pb41mr83bbgra3"))))
10493 (properties `((upstream-name . "Gviz")))
10494 (build-system r-build-system)
10495 (propagated-inputs
10496 `(("r-annotationdbi" ,r-annotationdbi)
10497 ("r-biobase" ,r-biobase)
10498 ("r-biocgenerics" ,r-biocgenerics)
10499 ("r-biomart" ,r-biomart)
10500 ("r-biostrings" ,r-biostrings)
10501 ("r-biovizbase" ,r-biovizbase)
10502 ("r-bsgenome" ,r-bsgenome)
10503 ("r-digest" ,r-digest)
10504 ("r-genomeinfodb" ,r-genomeinfodb)
10505 ("r-genomicalignments" ,r-genomicalignments)
10506 ("r-genomicfeatures" ,r-genomicfeatures)
10507 ("r-genomicranges" ,r-genomicranges)
10508 ("r-iranges" ,r-iranges)
10509 ("r-lattice" ,r-lattice)
10510 ("r-latticeextra" ,r-latticeextra)
10511 ("r-matrixstats" ,r-matrixstats)
10512 ("r-rcolorbrewer" ,r-rcolorbrewer)
10513 ("r-rsamtools" ,r-rsamtools)
10514 ("r-rtracklayer" ,r-rtracklayer)
10515 ("r-s4vectors" ,r-s4vectors)
10516 ("r-xvector" ,r-xvector)))
10517 (home-page "https://bioconductor.org/packages/Gviz")
10518 (synopsis "Plotting data and annotation information along genomic coordinates")
10519 (description
10520 "Genomic data analyses requires integrated visualization of known genomic
10521 information and new experimental data. Gviz uses the biomaRt and the
10522 rtracklayer packages to perform live annotation queries to Ensembl and UCSC
10523 and translates this to e.g. gene/transcript structures in viewports of the
10524 grid graphics package. This results in genomic information plotted together
10525 with your data.")
10526 (license license:artistic2.0)))
10527
10528 (define-public r-gwascat
10529 (package
10530 (name "r-gwascat")
10531 (version "2.18.0")
10532 (source
10533 (origin
10534 (method url-fetch)
10535 (uri (bioconductor-uri "gwascat" version))
10536 (sha256
10537 (base32
10538 "038vhfsk2vs7inn5di093cmjbb81k7j0af385sg7l01jj70bdqq1"))))
10539 (build-system r-build-system)
10540 (propagated-inputs
10541 `(("r-annotationdbi" ,r-annotationdbi)
10542 ("r-biocgenerics" ,r-biocgenerics)
10543 ("r-biostrings" ,r-biostrings)
10544 ("r-genomeinfodb" ,r-genomeinfodb)
10545 ("r-genomicfeatures" ,r-genomicfeatures)
10546 ("r-genomicranges" ,r-genomicranges)
10547 ("r-homo-sapiens" ,r-homo-sapiens)
10548 ("r-iranges" ,r-iranges)
10549 ("r-rsamtools" ,r-rsamtools)
10550 ("r-rtracklayer" ,r-rtracklayer)
10551 ("r-s4vectors" ,r-s4vectors)))
10552 (home-page "https://bioconductor.org/packages/gwascat")
10553 (synopsis "Tools for data in the EMBL-EBI GWAS catalog")
10554 (description
10555 "This package provides tools for representing and modeling data in the
10556 EMBL-EBI GWAS catalog.")
10557 (license license:artistic2.0)))
10558
10559 (define-public r-sushi
10560 (package
10561 (name "r-sushi")
10562 (version "1.24.0")
10563 (source (origin
10564 (method url-fetch)
10565 (uri (bioconductor-uri "Sushi" version))
10566 (sha256
10567 (base32
10568 "15xng21hd09fb234ravrry3b872zg82w8x9lijxab9n96xihcpz5"))))
10569 (properties `((upstream-name . "Sushi")))
10570 (build-system r-build-system)
10571 (propagated-inputs
10572 `(("r-biomart" ,r-biomart)
10573 ("r-zoo" ,r-zoo)))
10574 (home-page "https://bioconductor.org/packages/Sushi")
10575 (synopsis "Tools for visualizing genomics data")
10576 (description
10577 "This package provides flexible, quantitative, and integrative genomic
10578 visualizations for publication-quality multi-panel figures.")
10579 (license license:gpl2+)))
10580
10581 (define-public r-fithic
10582 (package
10583 (name "r-fithic")
10584 (version "1.12.0")
10585 (source (origin
10586 (method url-fetch)
10587 (uri (bioconductor-uri "FitHiC" version))
10588 (sha256
10589 (base32
10590 "1irwkwi4afdj395134k31mvx7c2vpdd0rv8zrblnldascdsb04kc"))))
10591 (properties `((upstream-name . "FitHiC")))
10592 (build-system r-build-system)
10593 (propagated-inputs
10594 `(("r-data-table" ,r-data-table)
10595 ("r-fdrtool" ,r-fdrtool)
10596 ("r-rcpp" ,r-rcpp)))
10597 (home-page "https://bioconductor.org/packages/FitHiC")
10598 (synopsis "Confidence estimation for intra-chromosomal contact maps")
10599 (description
10600 "Fit-Hi-C is a tool for assigning statistical confidence estimates to
10601 intra-chromosomal contact maps produced by genome-wide genome architecture
10602 assays such as Hi-C.")
10603 (license license:gpl2+)))
10604
10605 (define-public r-hitc
10606 (package
10607 (name "r-hitc")
10608 (version "1.30.0")
10609 (source (origin
10610 (method url-fetch)
10611 (uri (bioconductor-uri "HiTC" version))
10612 (sha256
10613 (base32
10614 "0byahi0fz0dzjyklz8v9whax9ygg7gwb4pl1j3zbl6z8a9qx8pps"))))
10615 (properties `((upstream-name . "HiTC")))
10616 (build-system r-build-system)
10617 (propagated-inputs
10618 `(("r-biostrings" ,r-biostrings)
10619 ("r-genomeinfodb" ,r-genomeinfodb)
10620 ("r-genomicranges" ,r-genomicranges)
10621 ("r-iranges" ,r-iranges)
10622 ("r-matrix" ,r-matrix)
10623 ("r-rcolorbrewer" ,r-rcolorbrewer)
10624 ("r-rtracklayer" ,r-rtracklayer)))
10625 (home-page "https://bioconductor.org/packages/HiTC")
10626 (synopsis "High throughput chromosome conformation capture analysis")
10627 (description
10628 "The HiTC package was developed to explore high-throughput \"C\" data
10629 such as 5C or Hi-C. Dedicated R classes as well as standard methods for
10630 quality controls, normalization, visualization, and further analysis are also
10631 provided.")
10632 (license license:artistic2.0)))
10633
10634 (define-public r-hdf5array
10635 (package
10636 (name "r-hdf5array")
10637 (version "1.14.3")
10638 (source
10639 (origin
10640 (method url-fetch)
10641 (uri (bioconductor-uri "HDF5Array" version))
10642 (sha256
10643 (base32
10644 "1z153a7nxmlml72pl1saasj2il9g5ahpynkpv3mkhhsvl5kbwbh6"))))
10645 (properties `((upstream-name . "HDF5Array")))
10646 (build-system r-build-system)
10647 (inputs
10648 `(("zlib" ,zlib)))
10649 (propagated-inputs
10650 `(("r-biocgenerics" ,r-biocgenerics)
10651 ("r-delayedarray" ,r-delayedarray)
10652 ("r-iranges" ,r-iranges)
10653 ("r-matrix" ,r-matrix)
10654 ("r-rhdf5" ,r-rhdf5)
10655 ("r-rhdf5lib" ,r-rhdf5lib)
10656 ("r-s4vectors" ,r-s4vectors)))
10657 (home-page "https://bioconductor.org/packages/HDF5Array")
10658 (synopsis "HDF5 back end for DelayedArray objects")
10659 (description "This package provides an array-like container for convenient
10660 access and manipulation of HDF5 datasets. It supports delayed operations and
10661 block processing.")
10662 (license license:artistic2.0)))
10663
10664 (define-public r-rhdf5lib
10665 (package
10666 (name "r-rhdf5lib")
10667 (version "1.8.0")
10668 (source
10669 (origin
10670 (method url-fetch)
10671 (uri (bioconductor-uri "Rhdf5lib" version))
10672 (sha256
10673 (base32
10674 "17lhwnm9rqsvbqkvwp0m07vjrk63a4389p2y39zffv8fgznxqzd7"))
10675 (modules '((guix build utils)))
10676 (snippet
10677 '(begin
10678 ;; Delete bundled binaries
10679 (delete-file-recursively "src/winlib/")
10680 #t))))
10681 (properties `((upstream-name . "Rhdf5lib")))
10682 (build-system r-build-system)
10683 (arguments
10684 `(#:phases
10685 (modify-phases %standard-phases
10686 (add-after 'unpack 'do-not-use-bundled-hdf5
10687 (lambda* (#:key inputs #:allow-other-keys)
10688 (for-each delete-file '("configure" "configure.ac"))
10689 ;; Do not make other packages link with the proprietary libsz.
10690 (substitute* "R/zzz.R"
10691 (("'\"%s/libhdf5.a\" \"%s/libsz.a\" -lz'")
10692 "'\"%s/libhdf5.a\" \"%s/libhdf5.a\" -lz'")
10693 (("'\"%s/libhdf5_cpp.a\" \"%s/libhdf5.a\" \"%s/libsz.a\" -lz'")
10694 "'\"%s/libhdf5_cpp.a\" \"%s/libhdf5.a\" \"%s/libhdf5.a\" -lz'")
10695 (("'%s/libhdf5_hl.a %s/libhdf5.a %s/libsz.a -lz'")
10696 "'%s/libhdf5_hl.a %s/libhdf5.a %s/libhdf5.a -lz'")
10697 (("'%s/libhdf5_hl_cpp.a %s/libhdf5_hl.a %s/libhdf5_cpp.a %s/libhdf5.a %s/libsz.a -lz'")
10698 "'%s/libhdf5_hl_cpp.a %s/libhdf5_hl.a %s/libhdf5_cpp.a %s/libhdf5.a %s/libhdf5.a -lz'"))
10699 (with-directory-excursion "src"
10700 (invoke "tar" "xvf" (assoc-ref inputs "hdf5-source"))
10701 (rename-file (string-append "hdf5-" ,(package-version hdf5-1.10))
10702 "hdf5")
10703 ;; Remove timestamp and host system information to make
10704 ;; the build reproducible.
10705 (substitute* "hdf5/src/libhdf5.settings.in"
10706 (("Configured on: @CONFIG_DATE@")
10707 "Configured on: Guix")
10708 (("Uname information:.*")
10709 "Uname information: Linux\n")
10710 ;; Remove unnecessary store reference.
10711 (("C Compiler:.*")
10712 "C Compiler: GCC\n"))
10713 (rename-file "Makevars.in" "Makevars")
10714 (substitute* "Makevars"
10715 (("HDF5_CXX_LIB=.*")
10716 (string-append "HDF5_CXX_LIB="
10717 (assoc-ref inputs "hdf5") "/lib/libhdf5_cpp.a\n"))
10718 (("HDF5_LIB=.*")
10719 (string-append "HDF5_LIB="
10720 (assoc-ref inputs "hdf5") "/lib/libhdf5.a\n"))
10721 (("HDF5_CXX_INCLUDE=.*") "HDF5_CXX_INCLUDE=./hdf5/c++/src\n")
10722 (("HDF5_INCLUDE=.*") "HDF5_INCLUDE=./hdf5/src\n")
10723 (("HDF5_HL_INCLUDE=.*") "HDF5_HL_INCLUDE=./hdf5/hl/src\n")
10724 (("HDF5_HL_CXX_INCLUDE=.*") "HDF5_HL_CXX_INCLUDE=./hdf5/hl/c++/src\n")
10725 (("HDF5_HL_LIB=.*")
10726 (string-append "HDF5_HL_LIB="
10727 (assoc-ref inputs "hdf5") "/lib/libhdf5_hl.a\n"))
10728 (("HDF5_HL_CXX_LIB=.*")
10729 (string-append "HDF5_HL_CXX_LIB="
10730 (assoc-ref inputs "hdf5") "/lib/libhdf5_hl_cpp.a\n"))
10731 ;; szip is non-free software
10732 (("cp \"\\$\\{SZIP_LIB\\}.*") "")
10733 (("PKG_LIBS =.*") "PKG_LIBS = -lz -lhdf5\n")))
10734 #t)))))
10735 (inputs
10736 `(("zlib" ,zlib)))
10737 (propagated-inputs
10738 `(("hdf5" ,hdf5-1.10)))
10739 (native-inputs
10740 `(("hdf5-source" ,(package-source hdf5-1.10))))
10741 (home-page "https://bioconductor.org/packages/Rhdf5lib")
10742 (synopsis "HDF5 library as an R package")
10743 (description "This package provides C and C++ HDF5 libraries for use in R
10744 packages.")
10745 (license license:artistic2.0)))
10746
10747 (define-public r-beachmat
10748 (package
10749 (name "r-beachmat")
10750 (version "2.2.1")
10751 (source
10752 (origin
10753 (method url-fetch)
10754 (uri (bioconductor-uri "beachmat" version))
10755 (sha256
10756 (base32
10757 "1bpnlw2kdy9yc2vq948k980r0j25ipb80llhvn0j3kxjiwyfgs3i"))))
10758 (build-system r-build-system)
10759 (propagated-inputs
10760 `(("r-biocgenerics" ,r-biocgenerics)
10761 ("r-delayedarray" ,r-delayedarray)
10762 ("r-matrix" ,r-matrix)))
10763 (home-page "https://bioconductor.org/packages/beachmat")
10764 (synopsis "Compiling Bioconductor to handle each matrix type")
10765 (description "This package provides a consistent C++ class interface for a
10766 variety of commonly used matrix types, including sparse and HDF5-backed
10767 matrices.")
10768 (license license:gpl3)))
10769
10770 (define-public r-singlecellexperiment
10771 (package
10772 (name "r-singlecellexperiment")
10773 (version "1.8.0")
10774 (source
10775 (origin
10776 (method url-fetch)
10777 (uri (bioconductor-uri "SingleCellExperiment" version))
10778 (sha256
10779 (base32
10780 "11pqb3cigi9xbhxq2k3n7z23v1ibd03ws1lcrh5c5ffgb33nlyw5"))))
10781 (properties
10782 `((upstream-name . "SingleCellExperiment")))
10783 (build-system r-build-system)
10784 (propagated-inputs
10785 `(("r-biocgenerics" ,r-biocgenerics)
10786 ("r-s4vectors" ,r-s4vectors)
10787 ("r-summarizedexperiment" ,r-summarizedexperiment)))
10788 (home-page "https://bioconductor.org/packages/SingleCellExperiment")
10789 (synopsis "S4 classes for single cell data")
10790 (description "This package defines an S4 class for storing data from
10791 single-cell experiments. This includes specialized methods to store and
10792 retrieve spike-in information, dimensionality reduction coordinates and size
10793 factors for each cell, along with the usual metadata for genes and
10794 libraries.")
10795 (license license:gpl3)))
10796
10797 (define-public r-scater
10798 (package
10799 (name "r-scater")
10800 (version "1.14.6")
10801 (source (origin
10802 (method url-fetch)
10803 (uri (bioconductor-uri "scater" version))
10804 (sha256
10805 (base32
10806 "0sxd1s8wdlj9926bagq4crjrk1nnmh3j3bhgrw160zfgc3y8pzck"))))
10807 (build-system r-build-system)
10808 (propagated-inputs
10809 `(("r-beachmat" ,r-beachmat)
10810 ("r-biocgenerics" ,r-biocgenerics)
10811 ("r-biocneighbors" ,r-biocneighbors)
10812 ("r-biocparallel" ,r-biocparallel)
10813 ("r-biocsingular" ,r-biocsingular)
10814 ("r-delayedarray" ,r-delayedarray)
10815 ("r-delayedmatrixstats" ,r-delayedmatrixstats)
10816 ("r-ggbeeswarm" ,r-ggbeeswarm)
10817 ("r-ggplot2" ,r-ggplot2)
10818 ("r-matrix" ,r-matrix)
10819 ("r-rcpp" ,r-rcpp)
10820 ("r-s4vectors" ,r-s4vectors)
10821 ("r-singlecellexperiment" ,r-singlecellexperiment)
10822 ("r-summarizedexperiment" ,r-summarizedexperiment)
10823 ("r-viridis" ,r-viridis)))
10824 (home-page "https://github.com/davismcc/scater")
10825 (synopsis "Single-cell analysis toolkit for gene expression data in R")
10826 (description "This package provides a collection of tools for doing
10827 various analyses of single-cell RNA-seq gene expression data, with a focus on
10828 quality control.")
10829 (license license:gpl2+)))
10830
10831 (define-public r-scran
10832 (package
10833 (name "r-scran")
10834 (version "1.14.6")
10835 (source
10836 (origin
10837 (method url-fetch)
10838 (uri (bioconductor-uri "scran" version))
10839 (sha256
10840 (base32
10841 "1y8wlgk5zbv7c7gcp0ahfpbh9lifab7y3zwf0093fzaw7vr1y6cr"))))
10842 (build-system r-build-system)
10843 (propagated-inputs
10844 `(("r-beachmat" ,r-beachmat)
10845 ("r-bh" ,r-bh)
10846 ("r-biocgenerics" ,r-biocgenerics)
10847 ("r-biocneighbors" ,r-biocneighbors)
10848 ("r-biocparallel" ,r-biocparallel)
10849 ("r-biocsingular" ,r-biocsingular)
10850 ("r-delayedarray" ,r-delayedarray)
10851 ("r-delayedmatrixstats" ,r-delayedmatrixstats)
10852 ("r-dqrng" ,r-dqrng)
10853 ("r-edger" ,r-edger)
10854 ("r-igraph" ,r-igraph)
10855 ("r-limma" ,r-limma)
10856 ("r-matrix" ,r-matrix)
10857 ("r-rcpp" ,r-rcpp)
10858 ("r-s4vectors" ,r-s4vectors)
10859 ("r-scater" ,r-scater)
10860 ("r-singlecellexperiment" ,r-singlecellexperiment)
10861 ("r-statmod" ,r-statmod)
10862 ("r-summarizedexperiment" ,r-summarizedexperiment)))
10863 (home-page "https://bioconductor.org/packages/scran")
10864 (synopsis "Methods for single-cell RNA-Seq data analysis")
10865 (description "This package implements a variety of low-level analyses of
10866 single-cell RNA-seq data. Methods are provided for normalization of
10867 cell-specific biases, assignment of cell cycle phase, and detection of highly
10868 variable and significantly correlated genes.")
10869 (license license:gpl3)))
10870
10871 (define-public r-delayedmatrixstats
10872 (package
10873 (name "r-delayedmatrixstats")
10874 (version "1.8.0")
10875 (source
10876 (origin
10877 (method url-fetch)
10878 (uri (bioconductor-uri "DelayedMatrixStats" version))
10879 (sha256
10880 (base32
10881 "0mv2rl6a6l404piabcazxz1s6ars016pxhjf5v40hhr6y1r0wbqy"))))
10882 (properties
10883 `((upstream-name . "DelayedMatrixStats")))
10884 (build-system r-build-system)
10885 (propagated-inputs
10886 `(("r-biocparallel" ,r-biocparallel)
10887 ("r-delayedarray" ,r-delayedarray)
10888 ("r-hdf5array" ,r-hdf5array)
10889 ("r-iranges" ,r-iranges)
10890 ("r-matrix" ,r-matrix)
10891 ("r-matrixstats" ,r-matrixstats)
10892 ("r-s4vectors" ,r-s4vectors)))
10893 (home-page "https://github.com/PeteHaitch/DelayedMatrixStats")
10894 (synopsis "Functions that apply to rows and columns of DelayedMatrix objects")
10895 (description
10896 "This package provides a port of the @code{matrixStats} API for use with
10897 @code{DelayedMatrix} objects from the @code{DelayedArray} package. It
10898 contains high-performing functions operating on rows and columns of
10899 @code{DelayedMatrix} objects, e.g. @code{colMedians}, @code{rowMedians},
10900 @code{colRanks}, @code{rowRanks}, @code{colSds}, and @code{rowSds}. Functions
10901 are optimized per data type and for subsetted calculations such that both
10902 memory usage and processing time is minimized.")
10903 (license license:expat)))
10904
10905 (define-public r-phangorn
10906 (package
10907 (name "r-phangorn")
10908 (version "2.5.5")
10909 (source
10910 (origin
10911 (method url-fetch)
10912 (uri (cran-uri "phangorn" version))
10913 (sha256
10914 (base32
10915 "0ihkaykqjmf80d8wrk3saphxvnv58zma6pd13633bd3cwanc33f5"))))
10916 (build-system r-build-system)
10917 (propagated-inputs
10918 `(("r-ape" ,r-ape)
10919 ("r-fastmatch" ,r-fastmatch)
10920 ("r-igraph" ,r-igraph)
10921 ("r-magrittr" ,r-magrittr)
10922 ("r-matrix" ,r-matrix)
10923 ("r-quadprog" ,r-quadprog)
10924 ("r-rcpp" ,r-rcpp)))
10925 (home-page "https://github.com/KlausVigo/phangorn")
10926 (synopsis "Phylogenetic analysis in R")
10927 (description
10928 "Phangorn is a package for phylogenetic analysis in R. It supports
10929 estimation of phylogenetic trees and networks using Maximum Likelihood,
10930 Maximum Parsimony, distance methods and Hadamard conjugation.")
10931 (license license:gpl2+)))
10932
10933 (define-public r-dropbead
10934 (let ((commit "d746c6f3b32110428ea56d6a0001ce52a251c247")
10935 (revision "2"))
10936 (package
10937 (name "r-dropbead")
10938 (version (string-append "0-" revision "." (string-take commit 7)))
10939 (source
10940 (origin
10941 (method git-fetch)
10942 (uri (git-reference
10943 (url "https://github.com/rajewsky-lab/dropbead.git")
10944 (commit commit)))
10945 (file-name (git-file-name name version))
10946 (sha256
10947 (base32
10948 "0sbzma49aiiyw8b0jpr7fnhzys9nsqmp4hy4hdz1gzyg1lhnca26"))))
10949 (build-system r-build-system)
10950 (propagated-inputs
10951 `(("r-ggplot2" ,r-ggplot2)
10952 ("r-rcolorbrewer" ,r-rcolorbrewer)
10953 ("r-gridextra" ,r-gridextra)
10954 ("r-gplots" ,r-gplots)
10955 ("r-plyr" ,r-plyr)))
10956 (home-page "https://github.com/rajewsky-lab/dropbead")
10957 (synopsis "Basic exploration and analysis of Drop-seq data")
10958 (description "This package offers a quick and straight-forward way to
10959 explore and perform basic analysis of single cell sequencing data coming from
10960 droplet sequencing. It has been particularly tailored for Drop-seq.")
10961 (license license:gpl3))))
10962
10963 (define htslib-for-sambamba
10964 (let ((commit "2f3c3ea7b301f9b45737a793c0b2dcf0240e5ee5"))
10965 (package
10966 (inherit htslib)
10967 (name "htslib-for-sambamba")
10968 (version (string-append "1.3.1-1." (string-take commit 9)))
10969 (source
10970 (origin
10971 (method git-fetch)
10972 (uri (git-reference
10973 (url "https://github.com/lomereiter/htslib.git")
10974 (commit commit)))
10975 (file-name (string-append "htslib-" version "-checkout"))
10976 (sha256
10977 (base32
10978 "0g38g8s3npr0gjm9fahlbhiskyfws9l5i0x1ml3rakzj7az5l9c9"))))
10979 (native-inputs
10980 `(("autoconf" ,autoconf)
10981 ("automake" ,automake)
10982 ,@(package-native-inputs htslib))))))
10983
10984 (define-public sambamba
10985 (package
10986 (name "sambamba")
10987 (version "0.7.1")
10988 (source
10989 (origin
10990 (method git-fetch)
10991 (uri (git-reference
10992 (url "https://github.com/lomereiter/sambamba.git")
10993 (commit (string-append "v" version))))
10994 (file-name (string-append name "-" version "-checkout"))
10995 (sha256
10996 (base32
10997 "111h05b60pj8dxbidiamy4imc92x2962b3lmb7wgysl6lx064qis"))))
10998 (build-system gnu-build-system)
10999 (arguments
11000 `(#:tests? #f ; there is no test target
11001 #:parallel-build? #f ; not supported
11002 #:phases
11003 (modify-phases %standard-phases
11004 (delete 'configure)
11005 (add-after 'unpack 'fix-ldc-version
11006 (lambda _
11007 (substitute* "gen_ldc_version_info.py"
11008 (("/usr/bin/env.*") (which "python3")))
11009 (substitute* "Makefile"
11010 ;; We use ldc2 instead of ldmd2 to compile sambamba.
11011 (("\\$\\(shell which ldmd2\\)") (which "ldc2")))
11012 #t))
11013 (add-after 'unpack 'place-biod-and-undead
11014 (lambda* (#:key inputs #:allow-other-keys)
11015 (copy-recursively (assoc-ref inputs "biod") "BioD")
11016 #t))
11017 (add-after 'unpack 'unbundle-prerequisites
11018 (lambda _
11019 (substitute* "Makefile"
11020 (("htslib/libhts.a lz4/lib/liblz4.a")
11021 "-L-lhts -L-llz4")
11022 ((" lz4-static htslib-static") ""))
11023 #t))
11024 (replace 'install
11025 (lambda* (#:key outputs #:allow-other-keys)
11026 (let* ((out (assoc-ref outputs "out"))
11027 (bin (string-append out "/bin")))
11028 (mkdir-p bin)
11029 (copy-file (string-append "bin/sambamba-" ,version)
11030 (string-append bin "/sambamba"))
11031 #t))))))
11032 (native-inputs
11033 `(("ldc" ,ldc)
11034 ("rdmd" ,rdmd)
11035 ("python" ,python)
11036 ("biod"
11037 ,(let ((commit "7969eb0a847b05874e83ffddead26e193ece8101"))
11038 (origin
11039 (method git-fetch)
11040 (uri (git-reference
11041 (url "https://github.com/biod/BioD.git")
11042 (commit commit)))
11043 (file-name (string-append "biod-"
11044 (string-take commit 9)
11045 "-checkout"))
11046 (sha256
11047 (base32
11048 "0mjxsmbmv0jxl3pq21p8j5r829d648if8q58ka50b2956lc6qkpm")))))))
11049 (inputs
11050 `(("lz4" ,lz4)
11051 ("htslib" ,htslib-for-sambamba)))
11052 (home-page "https://lomereiter.github.io/sambamba/")
11053 (synopsis "Tools for working with SAM/BAM data")
11054 (description "Sambamba is a high performance modern robust and
11055 fast tool (and library), written in the D programming language, for
11056 working with SAM and BAM files. Current parallelised functionality is
11057 an important subset of samtools functionality, including view, index,
11058 sort, markdup, and depth.")
11059 (license license:gpl2+)))
11060
11061 (define-public ritornello
11062 (package
11063 (name "ritornello")
11064 (version "2.0.1")
11065 (source (origin
11066 (method git-fetch)
11067 (uri (git-reference
11068 (url "https://github.com/KlugerLab/Ritornello.git")
11069 (commit (string-append "v" version))))
11070 (file-name (git-file-name name version))
11071 (sha256
11072 (base32
11073 "1xahvq215qld7x1w8vpa5zbrsj6p9crb9shqa2x89sb0aaxa02jk"))))
11074 (build-system gnu-build-system)
11075 (arguments
11076 `(#:tests? #f ; there are no tests
11077 #:phases
11078 (modify-phases %standard-phases
11079 (add-after 'unpack 'patch-samtools-references
11080 (lambda* (#:key inputs #:allow-other-keys)
11081 (substitute* '("src/SamStream.h"
11082 "src/FLD.cpp")
11083 (("<sam.h>") "<samtools/sam.h>"))
11084 #t))
11085 (delete 'configure)
11086 (replace 'install
11087 (lambda* (#:key inputs outputs #:allow-other-keys)
11088 (let* ((out (assoc-ref outputs "out"))
11089 (bin (string-append out "/bin/")))
11090 (mkdir-p bin)
11091 (install-file "bin/Ritornello" bin)
11092 #t))))))
11093 (inputs
11094 `(("samtools" ,samtools-0.1)
11095 ("fftw" ,fftw)
11096 ("boost" ,boost)
11097 ("zlib" ,zlib)))
11098 (home-page "https://github.com/KlugerLab/Ritornello")
11099 (synopsis "Control-free peak caller for ChIP-seq data")
11100 (description "Ritornello is a ChIP-seq peak calling algorithm based on
11101 signal processing that can accurately call binding events without the need to
11102 do a pair total DNA input or IgG control sample. It has been tested for use
11103 with narrow binding events such as transcription factor ChIP-seq.")
11104 (license license:gpl3+)))
11105
11106 (define-public trim-galore
11107 (package
11108 (name "trim-galore")
11109 (version "0.6.1")
11110 (source
11111 (origin
11112 (method git-fetch)
11113 (uri (git-reference
11114 (url "https://github.com/FelixKrueger/TrimGalore.git")
11115 (commit version)))
11116 (file-name (git-file-name name version))
11117 (sha256
11118 (base32
11119 "1y31wbxwkm9xqzr5zv1pk5q418whnmlmgmfyxxpnl12h83m2i9iv"))))
11120 (build-system gnu-build-system)
11121 (arguments
11122 `(#:tests? #f ; no tests
11123 #:phases
11124 (modify-phases %standard-phases
11125 (replace 'configure
11126 (lambda _
11127 ;; Trim Galore tries to figure out what version of Python
11128 ;; cutadapt is using by looking at the shebang. Of course that
11129 ;; doesn't work, because cutadapt is wrapped in a shell script.
11130 (substitute* "trim_galore"
11131 (("my \\$python_return.*")
11132 "my $python_return = \"Python 3.999\";\n"))
11133 #t))
11134 (delete 'build)
11135 (add-after 'unpack 'hardcode-tool-references
11136 (lambda* (#:key inputs #:allow-other-keys)
11137 (substitute* "trim_galore"
11138 (("\\$path_to_cutadapt = 'cutadapt'")
11139 (string-append "$path_to_cutadapt = '"
11140 (assoc-ref inputs "cutadapt")
11141 "/bin/cutadapt'"))
11142 (("\\$compression_path = \"gzip\"")
11143 (string-append "$compression_path = \""
11144 (assoc-ref inputs "gzip")
11145 "/bin/gzip\""))
11146 (("\"gunzip")
11147 (string-append "\""
11148 (assoc-ref inputs "gzip")
11149 "/bin/gunzip"))
11150 (("\"pigz")
11151 (string-append "\""
11152 (assoc-ref inputs "pigz")
11153 "/bin/pigz")))
11154 #t))
11155 (replace 'install
11156 (lambda* (#:key outputs #:allow-other-keys)
11157 (let ((bin (string-append (assoc-ref outputs "out")
11158 "/bin")))
11159 (mkdir-p bin)
11160 (install-file "trim_galore" bin)
11161 #t))))))
11162 (inputs
11163 `(("gzip" ,gzip)
11164 ("perl" ,perl)
11165 ("pigz" ,pigz)
11166 ("cutadapt" ,cutadapt)))
11167 (native-inputs
11168 `(("unzip" ,unzip)))
11169 (home-page "https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/")
11170 (synopsis "Wrapper around Cutadapt and FastQC")
11171 (description "Trim Galore! is a wrapper script to automate quality and
11172 adapter trimming as well as quality control, with some added functionality to
11173 remove biased methylation positions for RRBS sequence files.")
11174 (license license:gpl3+)))
11175
11176 (define-public gess
11177 (package
11178 (name "gess")
11179 (version "1.0")
11180 (source (origin
11181 (method url-fetch)
11182 (uri (string-append "http://compbio.uthscsa.edu/"
11183 "GESS_Web/files/"
11184 "gess-" version ".src.tar.gz"))
11185 (sha256
11186 (base32
11187 "0hyk403kxscclzfs24pvdgiv0wm03kjcziqdrp5w46cb049gz0d7"))))
11188 (build-system gnu-build-system)
11189 (arguments
11190 `(#:tests? #f ; no tests
11191 #:phases
11192 (modify-phases %standard-phases
11193 (delete 'configure)
11194 (delete 'build)
11195 (replace 'install
11196 (lambda* (#:key inputs outputs #:allow-other-keys)
11197 (let* ((python (assoc-ref inputs "python"))
11198 (out (assoc-ref outputs "out"))
11199 (bin (string-append out "/bin/"))
11200 (target (string-append
11201 out "/lib/python"
11202 ,(version-major+minor
11203 (package-version python))
11204 "/site-packages/gess/")))
11205 (mkdir-p target)
11206 (copy-recursively "." target)
11207 ;; Make GESS.py executable
11208 (chmod (string-append target "GESS.py") #o555)
11209 ;; Add Python shebang to the top and make Matplotlib
11210 ;; usable.
11211 (substitute* (string-append target "GESS.py")
11212 (("\"\"\"Description:" line)
11213 (string-append "#!" (which "python") "
11214 import matplotlib
11215 matplotlib.use('Agg')
11216 " line)))
11217 ;; Make sure GESS has all modules in its path
11218 (wrap-script (string-append target "GESS.py")
11219 `("PYTHONPATH" ":" = (,target ,(getenv "PYTHONPATH"))))
11220 (mkdir-p bin)
11221 (symlink (string-append target "GESS.py")
11222 (string-append bin "GESS.py"))
11223 #t))))))
11224 (inputs
11225 `(("python" ,python-2)
11226 ("python2-pysam" ,python2-pysam)
11227 ("python2-scipy" ,python2-scipy)
11228 ("python2-numpy" ,python2-numpy)
11229 ("python2-networkx" ,python2-networkx)
11230 ("python2-biopython" ,python2-biopython)
11231 ("guile" ,guile-3.0))) ; for the script wrapper
11232 (home-page "https://compbio.uthscsa.edu/GESS_Web/")
11233 (synopsis "Detect exon-skipping events from raw RNA-seq data")
11234 (description
11235 "GESS is an implementation of a novel computational method to detect de
11236 novo exon-skipping events directly from raw RNA-seq data without the prior
11237 knowledge of gene annotation information. GESS stands for the graph-based
11238 exon-skipping scanner detection scheme.")
11239 (license license:bsd-3)))
11240
11241 (define-public phylip
11242 (package
11243 (name "phylip")
11244 (version "3.696")
11245 (source
11246 (origin
11247 (method url-fetch)
11248 (uri (string-append "http://evolution.gs.washington.edu/phylip/"
11249 "download/phylip-" version ".tar.gz"))
11250 (sha256
11251 (base32
11252 "01jar1rayhr2gba2pgbw49m56rc5z4p5wn3ds0m188hrlln4a2nd"))))
11253 (build-system gnu-build-system)
11254 (arguments
11255 `(#:tests? #f ; no check target
11256 #:make-flags (list "-f" "Makefile.unx" "install")
11257 #:parallel-build? #f ; not supported
11258 #:phases
11259 (modify-phases %standard-phases
11260 (add-after 'unpack 'enter-dir
11261 (lambda _ (chdir "src") #t))
11262 (delete 'configure)
11263 (replace 'install
11264 (lambda* (#:key inputs outputs #:allow-other-keys)
11265 (let ((target (string-append (assoc-ref outputs "out")
11266 "/bin")))
11267 (mkdir-p target)
11268 (for-each (lambda (file)
11269 (install-file file target))
11270 (find-files "../exe" ".*")))
11271 #t)))))
11272 (home-page "http://evolution.genetics.washington.edu/phylip/")
11273 (synopsis "Tools for inferring phylogenies")
11274 (description "PHYLIP (the PHYLogeny Inference Package) is a package of
11275 programs for inferring phylogenies (evolutionary trees).")
11276 (license license:bsd-2)))
11277
11278 (define-public imp
11279 (package
11280 (name "imp")
11281 (version "2.6.2")
11282 (source
11283 (origin
11284 (method url-fetch)
11285 (uri (string-append "https://integrativemodeling.org/"
11286 version "/download/imp-" version ".tar.gz"))
11287 (sha256
11288 (base32
11289 "0lxqx7vh79d771svr611dkilp6sn30qrbw8zvscbrm37v38d2j6h"))))
11290 (build-system cmake-build-system)
11291 (arguments
11292 `(;; FIXME: Some tests fail because they produce warnings, others fail
11293 ;; because the PYTHONPATH does not include the modeller's directory.
11294 #:tests? #f))
11295 (inputs
11296 `(("boost" ,boost)
11297 ("gsl" ,gsl)
11298 ("swig" ,swig)
11299 ("hdf5" ,hdf5)
11300 ("fftw" ,fftw)
11301 ("python" ,python-2)))
11302 (propagated-inputs
11303 `(("python2-numpy" ,python2-numpy)
11304 ("python2-scipy" ,python2-scipy)
11305 ("python2-pandas" ,python2-pandas)
11306 ("python2-scikit-learn" ,python2-scikit-learn)
11307 ("python2-networkx" ,python2-networkx)))
11308 (home-page "https://integrativemodeling.org")
11309 (synopsis "Integrative modeling platform")
11310 (description "IMP's broad goal is to contribute to a comprehensive
11311 structural characterization of biomolecules ranging in size and complexity
11312 from small peptides to large macromolecular assemblies, by integrating data
11313 from diverse biochemical and biophysical experiments. IMP provides a C++ and
11314 Python toolbox for solving complex modeling problems, and a number of
11315 applications for tackling some common problems in a user-friendly way.")
11316 ;; IMP is largely available under the GNU Lesser GPL; see the file
11317 ;; COPYING.LGPL for the full text of this license. Some IMP modules are
11318 ;; available under the GNU GPL (see the file COPYING.GPL).
11319 (license (list license:lgpl2.1+
11320 license:gpl3+))))
11321
11322 (define-public tadbit
11323 (package
11324 (name "tadbit")
11325 (version "0.2.0")
11326 (source (origin
11327 (method git-fetch)
11328 (uri (git-reference
11329 (url "https://github.com/3DGenomes/TADbit.git")
11330 (commit (string-append "v" version))))
11331 (file-name (git-file-name name version))
11332 (sha256
11333 (base32
11334 "07g3aj648prmsvxp9caz5yl41k0y0647vxh0f5p3w8376mfiljd0"))))
11335 (build-system python-build-system)
11336 (arguments
11337 `(;; Tests are included and must be run after installation, but
11338 ;; they are incomplete and thus cannot be run.
11339 #:tests? #f
11340 #:python ,python-2
11341 #:phases
11342 (modify-phases %standard-phases
11343 (add-after 'unpack 'fix-problems-with-setup.py
11344 (lambda* (#:key outputs #:allow-other-keys)
11345 ;; setup.py opens these files for writing
11346 (chmod "_pytadbit/_version.py" #o664)
11347 (chmod "README.rst" #o664)
11348
11349 ;; Don't attempt to install the bash completions to
11350 ;; the home directory.
11351 (rename-file "extras/.bash_completion"
11352 "extras/tadbit")
11353 (substitute* "setup.py"
11354 (("\\(path.expanduser\\('~'\\)")
11355 (string-append "(\""
11356 (assoc-ref outputs "out")
11357 "/etc/bash_completion.d\""))
11358 (("extras/\\.bash_completion")
11359 "extras/tadbit"))
11360 #t)))))
11361 (inputs
11362 ;; TODO: add Chimera for visualization
11363 `(("imp" ,imp)
11364 ("mcl" ,mcl)
11365 ("python2-scipy" ,python2-scipy)
11366 ("python2-numpy" ,python2-numpy)
11367 ("python2-matplotlib" ,python2-matplotlib)
11368 ("python2-pysam" ,python2-pysam)))
11369 (home-page "https://3dgenomes.github.io/TADbit/")
11370 (synopsis "Analyze, model, and explore 3C-based data")
11371 (description
11372 "TADbit is a complete Python library to deal with all steps to analyze,
11373 model, and explore 3C-based data. With TADbit the user can map FASTQ files to
11374 obtain raw interaction binned matrices (Hi-C like matrices), normalize and
11375 correct interaction matrices, identify and compare the so-called
11376 @dfn{Topologically Associating Domains} (TADs), build 3D models from the
11377 interaction matrices, and finally, extract structural properties from the
11378 models. TADbit is complemented by TADkit for visualizing 3D models.")
11379 (license license:gpl3+)))
11380
11381 (define-public kentutils
11382 (package
11383 (name "kentutils")
11384 ;; 302.1.0 is out, but the only difference is the inclusion of
11385 ;; pre-built binaries.
11386 (version "302.0.0")
11387 (source
11388 (origin
11389 (method git-fetch)
11390 (uri (git-reference
11391 (url "https://github.com/ENCODE-DCC/kentUtils.git")
11392 (commit (string-append "v" version))))
11393 (file-name (git-file-name name version))
11394 (sha256
11395 (base32
11396 "0n1wbyjpzii2b9qhyp9r1q76j623cggpg3y8fmw78ld3z4y7ivha"))
11397 (modules '((guix build utils)
11398 (srfi srfi-26)
11399 (ice-9 ftw)))
11400 (snippet
11401 '(begin
11402 ;; Only the contents of the specified directories are free
11403 ;; for all uses, so we remove the rest. "hg/autoSql" and
11404 ;; "hg/autoXml" are nominally free, but they depend on a
11405 ;; library that is built from the sources in "hg/lib",
11406 ;; which is nonfree.
11407 (let ((free (list "." ".."
11408 "utils" "lib" "inc" "tagStorm"
11409 "parasol" "htslib"))
11410 (directory? (lambda (file)
11411 (eq? 'directory (stat:type (stat file))))))
11412 (for-each (lambda (file)
11413 (and (directory? file)
11414 (delete-file-recursively file)))
11415 (map (cut string-append "src/" <>)
11416 (scandir "src"
11417 (lambda (file)
11418 (not (member file free)))))))
11419 ;; Only make the utils target, not the userApps target,
11420 ;; because that requires libraries we won't build.
11421 (substitute* "Makefile"
11422 ((" userApps") " utils"))
11423 ;; Only build libraries that are free.
11424 (substitute* "src/makefile"
11425 (("DIRS =.*") "DIRS =\n")
11426 (("cd jkOwnLib.*") "")
11427 ((" hgLib") "")
11428 (("cd hg.*") ""))
11429 (substitute* "src/utils/makefile"
11430 ;; These tools depend on "jkhgap.a", which is part of the
11431 ;; nonfree "src/hg/lib" directory.
11432 (("raSqlQuery") "")
11433 (("pslLiftSubrangeBlat") "")
11434
11435 ;; Do not build UCSC tools, which may require nonfree
11436 ;; components.
11437 (("ALL_APPS =.*") "ALL_APPS = $(UTILS_APPLIST)\n"))
11438 #t))))
11439 (build-system gnu-build-system)
11440 (arguments
11441 `( ;; There is no global test target and the test target for
11442 ;; individual tools depends on input files that are not
11443 ;; included.
11444 #:tests? #f
11445 #:phases
11446 (modify-phases %standard-phases
11447 (add-after 'unpack 'fix-permissions
11448 (lambda _ (make-file-writable "src/inc/localEnvironment.mk") #t))
11449 (add-after 'unpack 'fix-paths
11450 (lambda _
11451 (substitute* "Makefile"
11452 (("/bin/echo") (which "echo")))
11453 #t))
11454 (add-after 'unpack 'prepare-samtabix
11455 (lambda* (#:key inputs #:allow-other-keys)
11456 (copy-recursively (assoc-ref inputs "samtabix")
11457 "samtabix")
11458 #t))
11459 (delete 'configure)
11460 (replace 'install
11461 (lambda* (#:key outputs #:allow-other-keys)
11462 (let ((bin (string-append (assoc-ref outputs "out")
11463 "/bin")))
11464 (copy-recursively "bin" bin))
11465 #t)))))
11466 (native-inputs
11467 `(("samtabix"
11468 ,(origin
11469 (method git-fetch)
11470 (uri (git-reference
11471 (url "http://genome-source.cse.ucsc.edu/samtabix.git")
11472 (commit "10fd107909c1ac4d679299908be4262a012965ba")))
11473 (sha256
11474 (base32
11475 "0c1nj64l42v395sa84n7az43xiap4i6f9n9dfz4058aqiwkhkmma"))))))
11476 (inputs
11477 `(("zlib" ,zlib)
11478 ("tcsh" ,tcsh)
11479 ("perl" ,perl)
11480 ("libpng" ,libpng)
11481 ("mariadb" ,mariadb "lib")
11482 ("mariadb-dev" ,mariadb "dev")
11483 ("openssl" ,openssl-1.0)))
11484 (home-page "https://genome.cse.ucsc.edu/index.html")
11485 (synopsis "Assorted bioinformatics utilities")
11486 (description "This package provides the kentUtils, a selection of
11487 bioinformatics utilities used in combination with the UCSC genome
11488 browser.")
11489 ;; Only a subset of the sources are released under a non-copyleft
11490 ;; free software license. All other sources are removed in a
11491 ;; snippet. See this bug report for an explanation of how the
11492 ;; license statements apply:
11493 ;; https://github.com/ENCODE-DCC/kentUtils/issues/12
11494 (license (license:non-copyleft
11495 "http://genome.ucsc.edu/license/"
11496 "The contents of this package are free for all uses."))))
11497
11498 (define-public f-seq
11499 (let ((commit "6ccded34cff38cf432deed8503648b4a66953f9b")
11500 (revision "1"))
11501 (package
11502 (name "f-seq")
11503 (version (string-append "1.1-" revision "." (string-take commit 7)))
11504 (source (origin
11505 (method git-fetch)
11506 (uri (git-reference
11507 (url "https://github.com/aboyle/F-seq.git")
11508 (commit commit)))
11509 (file-name (string-append name "-" version))
11510 (sha256
11511 (base32
11512 "1nk33k0yajg2id4g59bc4szr58r2q6pdq42vgcw054m8ip9wv26h"))
11513 (modules '((guix build utils)))
11514 ;; Remove bundled Java library archives.
11515 (snippet
11516 '(begin
11517 (for-each delete-file (find-files "lib" ".*"))
11518 #t))))
11519 (build-system ant-build-system)
11520 (arguments
11521 `(#:tests? #f ; no tests included
11522 #:phases
11523 (modify-phases %standard-phases
11524 (replace 'install
11525 (lambda* (#:key inputs outputs #:allow-other-keys)
11526 (let* ((target (assoc-ref outputs "out"))
11527 (bin (string-append target "/bin"))
11528 (doc (string-append target "/share/doc/f-seq"))
11529 (lib (string-append target "/lib")))
11530 (mkdir-p target)
11531 (mkdir-p doc)
11532 (substitute* "bin/linux/fseq"
11533 (("java") (which "java"))
11534 (("\\$REALDIR/../lib/commons-cli-1.1.jar")
11535 (string-append (assoc-ref inputs "java-commons-cli")
11536 "/share/java/commons-cli.jar"))
11537 (("REALDIR=.*")
11538 (string-append "REALDIR=" bin "\n")))
11539 (install-file "README.txt" doc)
11540 (install-file "bin/linux/fseq" bin)
11541 (install-file "build~/fseq.jar" lib)
11542 (copy-recursively "lib" lib)
11543 #t))))))
11544 (inputs
11545 `(("perl" ,perl)
11546 ("java-commons-cli" ,java-commons-cli)))
11547 (home-page "http://fureylab.web.unc.edu/software/fseq/")
11548 (synopsis "Feature density estimator for high-throughput sequence tags")
11549 (description
11550 "F-Seq is a software package that generates a continuous tag sequence
11551 density estimation allowing identification of biologically meaningful sites
11552 such as transcription factor binding sites (ChIP-seq) or regions of open
11553 chromatin (DNase-seq). Output can be displayed directly in the UCSC Genome
11554 Browser.")
11555 (license license:gpl3+))))
11556
11557 (define-public bismark
11558 (package
11559 (name "bismark")
11560 (version "0.20.1")
11561 (source
11562 (origin
11563 (method git-fetch)
11564 (uri (git-reference
11565 (url "https://github.com/FelixKrueger/Bismark.git")
11566 (commit version)))
11567 (file-name (string-append name "-" version "-checkout"))
11568 (sha256
11569 (base32
11570 "0xchm3rgilj6vfjnyzfzzymfd7djr64sbrmrvs3njbwi66jqbzw9"))))
11571 (build-system perl-build-system)
11572 (arguments
11573 `(#:tests? #f ; there are no tests
11574 #:modules ((guix build utils)
11575 (ice-9 popen)
11576 (srfi srfi-26)
11577 (guix build perl-build-system))
11578 #:phases
11579 (modify-phases %standard-phases
11580 ;; The bundled plotly.js is minified.
11581 (add-after 'unpack 'replace-plotly.js
11582 (lambda* (#:key inputs #:allow-other-keys)
11583 (let* ((file (assoc-ref inputs "plotly.js"))
11584 (installed "plotly/plotly.js"))
11585 (let ((minified (open-pipe* OPEN_READ "uglify-js" file)))
11586 (call-with-output-file installed
11587 (cut dump-port minified <>))))
11588 #t))
11589 (delete 'configure)
11590 (delete 'build)
11591 (replace 'install
11592 (lambda* (#:key inputs outputs #:allow-other-keys)
11593 (let* ((out (assoc-ref outputs "out"))
11594 (bin (string-append out "/bin"))
11595 (share (string-append out "/share/bismark"))
11596 (docdir (string-append out "/share/doc/bismark"))
11597 (docs '("Docs/Bismark_User_Guide.html"))
11598 (scripts '("bismark"
11599 "bismark_genome_preparation"
11600 "bismark_methylation_extractor"
11601 "bismark2bedGraph"
11602 "bismark2report"
11603 "coverage2cytosine"
11604 "deduplicate_bismark"
11605 "filter_non_conversion"
11606 "bam2nuc"
11607 "bismark2summary"
11608 "NOMe_filtering")))
11609 (substitute* "bismark2report"
11610 (("\\$RealBin/plotly")
11611 (string-append share "/plotly")))
11612 (mkdir-p share)
11613 (mkdir-p docdir)
11614 (mkdir-p bin)
11615 (for-each (lambda (file) (install-file file bin))
11616 scripts)
11617 (for-each (lambda (file) (install-file file docdir))
11618 docs)
11619 (copy-recursively "Docs/Images" (string-append docdir "/Images"))
11620 (copy-recursively "plotly"
11621 (string-append share "/plotly"))
11622
11623 ;; Fix references to gunzip
11624 (substitute* (map (lambda (file)
11625 (string-append bin "/" file))
11626 scripts)
11627 (("\"gunzip -c")
11628 (string-append "\"" (assoc-ref inputs "gzip")
11629 "/bin/gunzip -c")))
11630 #t))))))
11631 (inputs
11632 `(("gzip" ,gzip)
11633 ("perl-carp" ,perl-carp)
11634 ("perl-getopt-long" ,perl-getopt-long)))
11635 (native-inputs
11636 `(("plotly.js"
11637 ,(origin
11638 (method url-fetch)
11639 (uri (string-append "https://raw.githubusercontent.com/plotly/plotly.js/"
11640 "v1.39.4/dist/plotly.js"))
11641 (sha256
11642 (base32 "138mwsr4nf5qif4mrxx286mpnagxd1xwl6k8aidrjgknaqg88zyr"))))
11643 ("uglify-js" ,uglify-js)))
11644 (home-page "https://www.bioinformatics.babraham.ac.uk/projects/bismark/")
11645 (synopsis "Map bisulfite treated sequence reads and analyze methylation")
11646 (description "Bismark is a program to map bisulfite treated sequencing
11647 reads to a genome of interest and perform methylation calls in a single step.
11648 The output can be easily imported into a genome viewer, such as SeqMonk, and
11649 enables a researcher to analyse the methylation levels of their samples
11650 straight away. Its main features are:
11651
11652 @itemize
11653 @item Bisulfite mapping and methylation calling in one single step
11654 @item Supports single-end and paired-end read alignments
11655 @item Supports ungapped and gapped alignments
11656 @item Alignment seed length, number of mismatches etc are adjustable
11657 @item Output discriminates between cytosine methylation in CpG, CHG
11658 and CHH context
11659 @end itemize\n")
11660 (license license:gpl3+)))
11661
11662 (define-public paml
11663 (package
11664 (name "paml")
11665 (version "4.9e")
11666 (source (origin
11667 (method url-fetch)
11668 (uri (string-append "http://abacus.gene.ucl.ac.uk/software/"
11669 "paml" version ".tgz"))
11670 (sha256
11671 (base32
11672 "13zf6h9fiqghwhch2h06x1zdr6s42plsnqahflp5g7myr3han3s6"))
11673 (modules '((guix build utils)))
11674 ;; Remove Windows binaries
11675 (snippet
11676 '(begin
11677 (for-each delete-file (find-files "." "\\.exe$"))
11678 #t))))
11679 (build-system gnu-build-system)
11680 (arguments
11681 `(#:tests? #f ; there are no tests
11682 #:make-flags '("CC=gcc")
11683 #:phases
11684 (modify-phases %standard-phases
11685 (replace 'configure
11686 (lambda _
11687 (substitute* "src/BFdriver.c"
11688 (("/bin/bash") (which "bash")))
11689 (chdir "src")
11690 #t))
11691 (replace 'install
11692 (lambda* (#:key outputs #:allow-other-keys)
11693 (let ((tools '("baseml" "basemlg" "codeml"
11694 "pamp" "evolver" "yn00" "chi2"))
11695 (bin (string-append (assoc-ref outputs "out") "/bin"))
11696 (docdir (string-append (assoc-ref outputs "out")
11697 "/share/doc/paml")))
11698 (mkdir-p bin)
11699 (for-each (lambda (file) (install-file file bin)) tools)
11700 (copy-recursively "../doc" docdir)
11701 #t))))))
11702 (home-page "http://abacus.gene.ucl.ac.uk/software/paml.html")
11703 (synopsis "Phylogentic analysis by maximum likelihood")
11704 (description "PAML (for Phylogentic Analysis by Maximum Likelihood)
11705 contains a few programs for model fitting and phylogenetic tree reconstruction
11706 using nucleotide or amino-acid sequence data.")
11707 ;; GPLv3 only
11708 (license license:gpl3)))
11709
11710 (define-public kallisto
11711 (package
11712 (name "kallisto")
11713 (version "0.44.0")
11714 (source (origin
11715 (method git-fetch)
11716 (uri (git-reference
11717 (url "https://github.com/pachterlab/kallisto.git")
11718 (commit (string-append "v" version))))
11719 (file-name (git-file-name name version))
11720 (sha256
11721 (base32
11722 "0nj382jiywqnpgvyhichajpkkh5r0bapn43f4dx40zdaq5v4m40m"))))
11723 (build-system cmake-build-system)
11724 (arguments
11725 `(#:tests? #f ; no "check" target
11726 #:phases
11727 (modify-phases %standard-phases
11728 (add-after 'unpack 'do-not-use-bundled-htslib
11729 (lambda _
11730 (substitute* "CMakeLists.txt"
11731 (("^ExternalProject_Add" m)
11732 (string-append "if (NEVER)\n" m))
11733 (("^\\)")
11734 (string-append ")\nendif(NEVER)"))
11735 (("include_directories\\(\\$\\{htslib_PREFIX.*" m)
11736 (string-append "# " m)))
11737 (substitute* "src/CMakeLists.txt"
11738 (("target_link_libraries\\(kallisto kallisto_core pthread \
11739 \\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/../ext/htslib/libhts.a\\)")
11740 "target_link_libraries(kallisto kallisto_core pthread hts)")
11741 (("include_directories\\(\\.\\./ext/htslib\\)") ""))
11742 #t)))))
11743 (inputs
11744 `(("hdf5" ,hdf5)
11745 ("htslib" ,htslib)
11746 ("zlib" ,zlib)))
11747 (home-page "https://pachterlab.github.io/kallisto/")
11748 (synopsis "Near-optimal RNA-Seq quantification")
11749 (description
11750 "Kallisto is a program for quantifying abundances of transcripts from
11751 RNA-Seq data, or more generally of target sequences using high-throughput
11752 sequencing reads. It is based on the novel idea of pseudoalignment for
11753 rapidly determining the compatibility of reads with targets, without the need
11754 for alignment. Pseudoalignment of reads preserves the key information needed
11755 for quantification, and kallisto is therefore not only fast, but also as
11756 accurate as existing quantification tools.")
11757 (license license:bsd-2)))
11758
11759 (define-public libgff
11760 (package
11761 (name "libgff")
11762 (version "1.0")
11763 (source (origin
11764 (method git-fetch)
11765 (uri (git-reference
11766 (url "https://github.com/Kingsford-Group/libgff.git")
11767 (commit (string-append "v" version))))
11768 (file-name (git-file-name name version))
11769 (sha256
11770 (base32
11771 "0n6vfjnq7a2mianipscbshrvbncss8z4zkgkbjw754p9043nfkps"))))
11772 (build-system cmake-build-system)
11773 (arguments `(#:tests? #f)) ; no tests included
11774 (home-page "https://github.com/Kingsford-Group/libgff")
11775 (synopsis "Parser library for reading/writing GFF files")
11776 (description "This is a simple \"libraryfication\" of the GFF/GTF parsing
11777 code that is used in the Cufflinks codebase. The goal of this library is to
11778 provide this functionality without the necessity of drawing in a heavy-weight
11779 dependency like SeqAn.")
11780 (license (license:x11-style "https://www.boost.org/LICENSE_1_0.txt"))))
11781
11782 (define-public sailfish
11783 (package
11784 (name "sailfish")
11785 (version "0.10.1")
11786 (source (origin
11787 (method git-fetch)
11788 (uri (git-reference
11789 (url "https://github.com/kingsfordgroup/sailfish.git")
11790 (commit (string-append "v" version))))
11791 (file-name (git-file-name name version))
11792 (sha256
11793 (base32
11794 "1amcc5hqvsl42hg4x19bi9vy47cl874s0lw1fmi0hwsdk9i8c03v"))
11795 (modules '((guix build utils)))
11796 (snippet
11797 '(begin
11798 ;; Delete bundled headers for eigen3.
11799 (delete-file-recursively "include/eigen3/")
11800 #t))))
11801 (build-system cmake-build-system)
11802 (arguments
11803 `(#:configure-flags
11804 (list (string-append "-DBOOST_INCLUDEDIR="
11805 (assoc-ref %build-inputs "boost")
11806 "/include/")
11807 (string-append "-DBOOST_LIBRARYDIR="
11808 (assoc-ref %build-inputs "boost")
11809 "/lib/")
11810 (string-append "-DBoost_LIBRARIES="
11811 "-lboost_iostreams "
11812 "-lboost_filesystem "
11813 "-lboost_system "
11814 "-lboost_thread "
11815 "-lboost_timer "
11816 "-lboost_chrono "
11817 "-lboost_program_options")
11818 "-DBoost_FOUND=TRUE"
11819 ;; Don't download RapMap---we already have it!
11820 "-DFETCHED_RAPMAP=1")
11821 ;; Tests must be run after installation and the location of the test
11822 ;; data file must be overridden. But the tests fail. It looks like
11823 ;; they are not really meant to be run.
11824 #:tests? #f
11825 #:phases
11826 (modify-phases %standard-phases
11827 ;; Boost cannot be found, even though it's right there.
11828 (add-after 'unpack 'do-not-look-for-boost
11829 (lambda* (#:key inputs #:allow-other-keys)
11830 (substitute* "CMakeLists.txt"
11831 (("find_package\\(Boost 1\\.53\\.0") "#"))
11832 #t))
11833 (add-after 'unpack 'do-not-assign-to-macro
11834 (lambda _
11835 (substitute* "include/spdlog/details/format.cc"
11836 (("const unsigned CHAR_WIDTH = 1;") ""))
11837 #t))
11838 (add-after 'unpack 'prepare-rapmap
11839 (lambda* (#:key inputs #:allow-other-keys)
11840 (let ((src "external/install/src/rapmap/")
11841 (include "external/install/include/rapmap/")
11842 (rapmap (assoc-ref inputs "rapmap")))
11843 (mkdir-p "/tmp/rapmap")
11844 (invoke "tar" "xf"
11845 (assoc-ref inputs "rapmap")
11846 "-C" "/tmp/rapmap"
11847 "--strip-components=1")
11848 (mkdir-p src)
11849 (mkdir-p include)
11850 (for-each (lambda (file)
11851 (install-file file src))
11852 (find-files "/tmp/rapmap/src" "\\.(c|cpp)"))
11853 (copy-recursively "/tmp/rapmap/include" include))
11854 #t))
11855 (add-after 'unpack 'use-system-libraries
11856 (lambda* (#:key inputs #:allow-other-keys)
11857 (substitute* '("src/SailfishIndexer.cpp"
11858 "src/SailfishUtils.cpp"
11859 "src/SailfishQuantify.cpp"
11860 "src/FASTAParser.cpp"
11861 "include/PCA.hpp"
11862 "include/SailfishUtils.hpp"
11863 "include/SailfishIndex.hpp"
11864 "include/CollapsedEMOptimizer.hpp"
11865 "src/CollapsedEMOptimizer.cpp")
11866 (("#include \"jellyfish/config.h\"") ""))
11867 (substitute* "src/CMakeLists.txt"
11868 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/include/jellyfish-2.2..")
11869 (string-append (assoc-ref inputs "jellyfish")
11870 "/include/jellyfish-" ,(package-version jellyfish)))
11871 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libjellyfish-2.0.a")
11872 (string-append (assoc-ref inputs "jellyfish")
11873 "/lib/libjellyfish-2.0.a"))
11874 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort.a")
11875 (string-append (assoc-ref inputs "libdivsufsort")
11876 "/lib/libdivsufsort.so"))
11877 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort64.a")
11878 (string-append (assoc-ref inputs "libdivsufsort")
11879 "/lib/libdivsufsort64.so")))
11880 (substitute* "CMakeLists.txt"
11881 ;; Don't prefer static libs
11882 (("SET\\(CMAKE_FIND_LIBRARY_SUFFIXES.*") "")
11883 (("find_package\\(Jellyfish.*") "")
11884 (("ExternalProject_Add\\(libjellyfish") "message(")
11885 (("ExternalProject_Add\\(libgff") "message(")
11886 (("ExternalProject_Add\\(libsparsehash") "message(")
11887 (("ExternalProject_Add\\(libdivsufsort") "message("))
11888
11889 ;; Ensure that Eigen headers can be found
11890 (setenv "CPLUS_INCLUDE_PATH"
11891 (string-append (assoc-ref inputs "eigen")
11892 "/include/eigen3"))
11893 #t)))))
11894 (inputs
11895 `(("boost" ,boost)
11896 ("eigen" ,eigen)
11897 ("jemalloc" ,jemalloc)
11898 ("jellyfish" ,jellyfish)
11899 ("sparsehash" ,sparsehash)
11900 ("rapmap" ,(origin
11901 (method git-fetch)
11902 (uri (git-reference
11903 (url "https://github.com/COMBINE-lab/RapMap.git")
11904 (commit (string-append "sf-v" version))))
11905 (file-name (string-append "rapmap-sf-v" version "-checkout"))
11906 (sha256
11907 (base32
11908 "1hv79l5i576ykv5a1srj2p0q36yvyl5966m0fcy2lbi169ipjakf"))
11909 (modules '((guix build utils)))
11910 ;; These files are expected to be excluded.
11911 (snippet
11912 '(begin (delete-file-recursively "include/spdlog")
11913 (for-each delete-file '("include/xxhash.h"
11914 "src/xxhash.c"))
11915 #t))))
11916 ("libdivsufsort" ,libdivsufsort)
11917 ("libgff" ,libgff)
11918 ("tbb" ,tbb)
11919 ("zlib" ,zlib)))
11920 (native-inputs
11921 `(("pkg-config" ,pkg-config)))
11922 (home-page "https://www.cs.cmu.edu/~ckingsf/software/sailfish/")
11923 (synopsis "Mapping-based isoform quantification from RNA-Seq reads")
11924 (description "Sailfish is a tool for genomic transcript quantification
11925 from RNA-seq data. It requires a set of target transcripts (either from a
11926 reference or de-novo assembly) to quantify. All you need to run sailfish is a
11927 fasta file containing your reference transcripts and a (set of) fasta/fastq
11928 file(s) containing your reads.")
11929 (license license:gpl3+)))
11930
11931 (define libstadenio-for-salmon
11932 (package
11933 (name "libstadenio")
11934 (version "1.14.8")
11935 (source (origin
11936 (method git-fetch)
11937 (uri (git-reference
11938 (url "https://github.com/COMBINE-lab/staden-io_lib.git")
11939 (commit (string-append "v" version))))
11940 (file-name (string-append name "-" version "-checkout"))
11941 (sha256
11942 (base32
11943 "1x8kxxqxl892vwfbprlbyfwkkv7c34ggkc94892x9x0g37x5nbwx"))))
11944 (build-system gnu-build-system)
11945 (arguments '(#:parallel-tests? #f)) ; not supported
11946 (inputs
11947 `(("zlib" ,zlib)))
11948 (native-inputs
11949 `(("perl" ,perl))) ; for tests
11950 (home-page "https://github.com/COMBINE-lab/staden-io_lib")
11951 (synopsis "General purpose trace and experiment file library")
11952 (description "This package provides a library of file reading and writing
11953 code to provide a general purpose Trace file (and Experiment File) reading
11954 interface.
11955
11956 The following file formats are supported:
11957
11958 @enumerate
11959 @item SCF trace files
11960 @item ABI trace files
11961 @item ALF trace files
11962 @item ZTR trace files
11963 @item SFF trace archives
11964 @item SRF trace archives
11965 @item Experiment files
11966 @item Plain text files
11967 @item SAM/BAM sequence files
11968 @item CRAM sequence files
11969 @end enumerate\n")
11970 (license license:bsd-3)))
11971
11972 (define-public salmon
11973 (package
11974 (name "salmon")
11975 (version "0.13.1")
11976 (source (origin
11977 (method git-fetch)
11978 (uri (git-reference
11979 (url "https://github.com/COMBINE-lab/salmon.git")
11980 (commit (string-append "v" version))))
11981 (file-name (git-file-name name version))
11982 (sha256
11983 (base32
11984 "1i2z4aivicmiixdz9bxalp7vmfzi3k92fxa63iqa8kgvfw5a4aq5"))
11985 (modules '((guix build utils)))
11986 (snippet
11987 '(begin
11988 ;; Delete bundled headers for eigen3.
11989 (delete-file-recursively "include/eigen3/")
11990 #t))))
11991 (build-system cmake-build-system)
11992 (arguments
11993 `(#:configure-flags
11994 (list (string-append "-DBOOST_INCLUDEDIR="
11995 (assoc-ref %build-inputs "boost")
11996 "/include/")
11997 (string-append "-DBOOST_LIBRARYDIR="
11998 (assoc-ref %build-inputs "boost")
11999 "/lib/")
12000 (string-append "-DBoost_LIBRARIES="
12001 "-lboost_iostreams "
12002 "-lboost_filesystem "
12003 "-lboost_system "
12004 "-lboost_thread "
12005 "-lboost_timer "
12006 "-lboost_chrono "
12007 "-lboost_program_options")
12008 "-DBoost_FOUND=TRUE"
12009 "-DTBB_LIBRARIES=tbb tbbmalloc"
12010 ;; Don't download RapMap---we already have it!
12011 "-DFETCHED_RAPMAP=1")
12012 #:phases
12013 (modify-phases %standard-phases
12014 ;; Boost cannot be found, even though it's right there.
12015 (add-after 'unpack 'do-not-look-for-boost
12016 (lambda* (#:key inputs #:allow-other-keys)
12017 (substitute* "CMakeLists.txt"
12018 (("find_package\\(Boost 1\\.59\\.0") "#"))
12019 #t))
12020 (add-after 'unpack 'do-not-phone-home
12021 (lambda _
12022 (substitute* "src/Salmon.cpp"
12023 (("getVersionMessage\\(\\)") "\"\""))
12024 #t))
12025 (add-after 'unpack 'prepare-rapmap
12026 (lambda* (#:key inputs #:allow-other-keys)
12027 (let ((src "external/install/src/rapmap/")
12028 (include "external/install/include/rapmap/")
12029 (rapmap (assoc-ref inputs "rapmap")))
12030 (mkdir-p src)
12031 (mkdir-p include)
12032 (copy-recursively (string-append rapmap "/src") src)
12033 (copy-recursively (string-append rapmap "/include") include)
12034 (for-each delete-file '("external/install/include/rapmap/xxhash.h"
12035 "external/install/include/rapmap/FastxParser.hpp"
12036 "external/install/include/rapmap/concurrentqueue.h"
12037 "external/install/include/rapmap/FastxParserThreadUtils.hpp"
12038 "external/install/src/rapmap/FastxParser.cpp"
12039 "external/install/src/rapmap/xxhash.c"))
12040 (delete-file-recursively "external/install/include/rapmap/spdlog"))
12041 #t))
12042 (add-after 'unpack 'use-system-libraries
12043 (lambda* (#:key inputs #:allow-other-keys)
12044 (substitute* "CMakeLists.txt"
12045 ;; Don't prefer static libs
12046 (("SET\\(CMAKE_FIND_LIBRARY_SUFFIXES.*") "")
12047 (("set\\(TBB_LIBRARIES") "message(")
12048 ;; Don't download anything
12049 (("DOWNLOAD_COMMAND") "DOWNLOAD_COMMAND echo")
12050 (("externalproject_add\\(libcereal") "message(")
12051 (("externalproject_add\\(libgff") "message(")
12052 (("externalproject_add\\(libtbb") "message(")
12053 (("externalproject_add\\(libdivsufsort") "message(")
12054 (("externalproject_add\\(libstadenio") "message(")
12055 (("externalproject_add_step\\(") "message("))
12056 (substitute* "src/CMakeLists.txt"
12057 (("add_dependencies") "#")
12058 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libstaden-read.a")
12059 (string-append (assoc-ref inputs "libstadenio-for-salmon")
12060 "/lib/libstaden-read.so"))
12061 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort.a")
12062 (string-append (assoc-ref inputs "libdivsufsort")
12063 "/lib/libdivsufsort.so"))
12064 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort64.a")
12065 (string-append (assoc-ref inputs "libdivsufsort")
12066 "/lib/libdivsufsort64.so"))
12067 (("lib/libdivsufsort.a") "/lib/libdivsufsort.so"))
12068
12069 ;; Ensure that all headers can be found
12070 (setenv "CPATH"
12071 (string-append (getenv "CPATH")
12072 ":"
12073 (assoc-ref inputs "eigen")
12074 "/include/eigen3"))
12075 #t))
12076 ;; CMAKE_INSTALL_PREFIX does not exist when the tests are
12077 ;; run. It only exists after the install phase.
12078 (add-after 'unpack 'fix-tests
12079 (lambda _
12080 (substitute* "src/CMakeLists.txt"
12081 (("DTOPLEVEL_DIR=\\$\\{CMAKE_INSTALL_PREFIX")
12082 "DTOPLEVEL_DIR=${GAT_SOURCE_DIR"))
12083 #t)))))
12084 (inputs
12085 `(("boost" ,boost)
12086 ("bzip2" ,bzip2)
12087 ("cereal" ,cereal)
12088 ("eigen" ,eigen)
12089 ("rapmap" ,(origin
12090 (method git-fetch)
12091 (uri (git-reference
12092 (url "https://github.com/COMBINE-lab/RapMap.git")
12093 (commit (string-append "salmon-v" version))))
12094 (file-name (string-append "rapmap-salmon-v" version "-checkout"))
12095 (sha256
12096 (base32
12097 "1biplxf0csc7a8h1wf219b0vmjkvw6wk2zylhdklb577kgmihdms"))))
12098 ("jemalloc" ,jemalloc)
12099 ("libgff" ,libgff)
12100 ("tbb" ,tbb)
12101 ("libdivsufsort" ,libdivsufsort)
12102 ("libstadenio-for-salmon" ,libstadenio-for-salmon)
12103 ("xz" ,xz)
12104 ("zlib" ,zlib)))
12105 (native-inputs
12106 `(("pkg-config" ,pkg-config)))
12107 (home-page "https://github.com/COMBINE-lab/salmon")
12108 (synopsis "Quantification from RNA-seq reads using lightweight alignments")
12109 (description "Salmon is a program to produce highly-accurate,
12110 transcript-level quantification estimates from RNA-seq data. Salmon achieves
12111 its accuracy and speed via a number of different innovations, including the
12112 use of lightweight alignments (accurate but fast-to-compute proxies for
12113 traditional read alignments) and massively-parallel stochastic collapsed
12114 variational inference.")
12115 (license license:gpl3+)))
12116
12117 (define-public python-loompy
12118 (package
12119 (name "python-loompy")
12120 (version "2.0.17")
12121 ;; The tarball on Pypi does not include the tests.
12122 (source (origin
12123 (method git-fetch)
12124 (uri (git-reference
12125 (url "https://github.com/linnarsson-lab/loompy.git")
12126 (commit version)))
12127 (file-name (git-file-name name version))
12128 (sha256
12129 (base32
12130 "12a5kjgiikapv93wahfw0frszx1lblnppyz3vs5gy8fgmgngra07"))))
12131 (build-system python-build-system)
12132 (arguments
12133 `(#:phases
12134 (modify-phases %standard-phases
12135 (replace 'check
12136 (lambda _
12137 (setenv "PYTHONPATH"
12138 (string-append (getcwd) ":"
12139 (getenv "PYTHONPATH")))
12140 (invoke "pytest" "tests")
12141 #t)))))
12142 (propagated-inputs
12143 `(("python-h5py" ,python-h5py)
12144 ("python-numpy" ,python-numpy)
12145 ("python-pandas" ,python-pandas)
12146 ("python-scipy" ,python-scipy)))
12147 (native-inputs
12148 `(("python-pytest" ,python-pytest)))
12149 (home-page "https://github.com/linnarsson-lab/loompy")
12150 (synopsis "Work with .loom files for single-cell RNA-seq data")
12151 (description "The loom file format is an efficient format for very large
12152 omics datasets, consisting of a main matrix, optional additional layers, a
12153 variable number of row and column annotations. Loom also supports sparse
12154 graphs. This library makes it easy to work with @file{.loom} files for
12155 single-cell RNA-seq data.")
12156 (license license:bsd-3)))
12157
12158 ;; We cannot use the latest commit because it requires Java 9.
12159 (define-public java-forester
12160 (let ((commit "86b07efe302d5094b42deed9260f719a4c4ac2e6")
12161 (revision "1"))
12162 (package
12163 (name "java-forester")
12164 (version (string-append "0-" revision "." (string-take commit 7)))
12165 (source (origin
12166 (method git-fetch)
12167 (uri (git-reference
12168 (url "https://github.com/cmzmasek/forester.git")
12169 (commit commit)))
12170 (file-name (string-append name "-" version "-checkout"))
12171 (sha256
12172 (base32
12173 "0vxavc1yrf84yrnf20dq26hi0lglidk8d382xrxsy4qmlbjd276z"))
12174 (modules '((guix build utils)))
12175 (snippet
12176 '(begin
12177 ;; Delete bundled jars and pre-built classes
12178 (delete-file-recursively "forester/java/resources")
12179 (delete-file-recursively "forester/java/classes")
12180 (for-each delete-file (find-files "forester/java/" "\\.jar$"))
12181 ;; Delete bundled applications
12182 (delete-file-recursively "forester_applications")
12183 #t))))
12184 (build-system ant-build-system)
12185 (arguments
12186 `(#:tests? #f ; there are none
12187 #:jdk ,icedtea-8
12188 #:modules ((guix build ant-build-system)
12189 (guix build utils)
12190 (guix build java-utils)
12191 (sxml simple)
12192 (sxml transform))
12193 #:phases
12194 (modify-phases %standard-phases
12195 (add-after 'unpack 'chdir
12196 (lambda _ (chdir "forester/java") #t))
12197 (add-after 'chdir 'fix-dependencies
12198 (lambda _
12199 (chmod "build.xml" #o664)
12200 (call-with-output-file "build.xml.new"
12201 (lambda (port)
12202 (sxml->xml
12203 (pre-post-order
12204 (with-input-from-file "build.xml"
12205 (lambda _ (xml->sxml #:trim-whitespace? #t)))
12206 `(;; Remove all unjar tags to avoid repacking classes.
12207 (unjar . ,(lambda _ '()))
12208 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
12209 (*text* . ,(lambda (_ txt) txt))))
12210 port)))
12211 (rename-file "build.xml.new" "build.xml")
12212 #t))
12213 ;; FIXME: itext is difficult to package as it depends on a few
12214 ;; unpackaged libraries.
12215 (add-after 'chdir 'remove-dependency-on-unpackaged-itext
12216 (lambda _
12217 (delete-file "src/org/forester/archaeopteryx/PdfExporter.java")
12218 (substitute* "src/org/forester/archaeopteryx/MainFrame.java"
12219 (("pdf_written_to = PdfExporter.*")
12220 "throw new IOException(\"PDF export is not available.\");"))
12221 #t))
12222 ;; There is no install target
12223 (replace 'install (install-jars ".")))))
12224 (propagated-inputs
12225 `(("java-commons-codec" ,java-commons-codec)
12226 ("java-openchart2" ,java-openchart2)))
12227 (home-page "https://sites.google.com/site/cmzmasek/home/software/forester")
12228 (synopsis "Phylogenomics libraries for Java")
12229 (description "Forester is a collection of Java libraries for
12230 phylogenomics and evolutionary biology research. It includes support for
12231 reading, writing, and exporting phylogenetic trees.")
12232 (license license:lgpl2.1+))))
12233
12234 (define-public java-forester-1.005
12235 (package
12236 (name "java-forester")
12237 (version "1.005")
12238 (source (origin
12239 (method url-fetch)
12240 (uri (string-append "https://repo1.maven.org/maven2/"
12241 "org/biojava/thirdparty/forester/"
12242 version "/forester-" version "-sources.jar"))
12243 (file-name (string-append name "-" version ".jar"))
12244 (sha256
12245 (base32
12246 "04r8qv4rk3p71z4ajrvp11py1z46qrx0047j3zzs79s6lnsm3lcv"))))
12247 (build-system ant-build-system)
12248 (arguments
12249 `(#:tests? #f ; there are none
12250 #:jdk ,icedtea-8
12251 #:modules ((guix build ant-build-system)
12252 (guix build utils)
12253 (guix build java-utils)
12254 (sxml simple)
12255 (sxml transform))
12256 #:phases
12257 (modify-phases %standard-phases
12258 (add-after 'unpack 'fix-dependencies
12259 (lambda* (#:key inputs #:allow-other-keys)
12260 (call-with-output-file "build.xml"
12261 (lambda (port)
12262 (sxml->xml
12263 (pre-post-order
12264 (with-input-from-file "src/build.xml"
12265 (lambda _ (xml->sxml #:trim-whitespace? #t)))
12266 `(;; Remove all unjar tags to avoid repacking classes.
12267 (unjar . ,(lambda _ '()))
12268 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
12269 (*text* . ,(lambda (_ txt) txt))))
12270 port)))
12271 (copy-file (assoc-ref inputs "synth_look_and_feel_1.xml")
12272 "synth_look_and_feel_1.xml")
12273 (copy-file (assoc-ref inputs "phyloxml.xsd")
12274 "phyloxml.xsd")
12275 (substitute* "build.xml"
12276 (("../resources/synth_laf/synth_look_and_feel_1.xml")
12277 "synth_look_and_feel_1.xml")
12278 (("../resources/phyloxml_schema/1.10/phyloxml.xsd")
12279 "phyloxml.xsd"))
12280 #t))
12281 ;; FIXME: itext is difficult to package as it depends on a few
12282 ;; unpackaged libraries.
12283 (add-after 'unpack 'remove-dependency-on-unpackaged-itext
12284 (lambda _
12285 (delete-file "src/org/forester/archaeopteryx/PdfExporter.java")
12286 (substitute* '("src/org/forester/archaeopteryx/MainFrame.java"
12287 "src/org/forester/archaeopteryx/MainFrameApplication.java")
12288 (("pdf_written_to = PdfExporter.*")
12289 "throw new IOException(\"PDF export is not available.\"); /*")
12290 ((".getPrintSizeX\\(\\), getOptions\\(\\).getPrintSizeY\\(\\) \\);") "*/")
12291 (("getCurrentTreePanel\\(\\).getHeight\\(\\) \\);") "*/"))
12292 #t))
12293 (add-after 'unpack 'delete-pre-built-classes
12294 (lambda _ (delete-file-recursively "src/classes") #t))
12295 ;; There is no install target
12296 (replace 'install (install-jars ".")))))
12297 (propagated-inputs
12298 `(("java-commons-codec" ,java-commons-codec)
12299 ("java-openchart2" ,java-openchart2)))
12300 ;; The source archive does not contain the resources.
12301 (native-inputs
12302 `(("phyloxml.xsd"
12303 ,(origin
12304 (method url-fetch)
12305 (uri (string-append "https://raw.githubusercontent.com/cmzmasek/forester/"
12306 "b61cc2dcede0bede317db362472333115756b8c6/"
12307 "forester/resources/phyloxml_schema/1.10/phyloxml.xsd"))
12308 (file-name (string-append name "-phyloxml-" version ".xsd"))
12309 (sha256
12310 (base32
12311 "1zxc4m8sn4n389nqdnpxa8d0k17qnr3pm2y5y6g6vh4k0zm52npv"))))
12312 ("synth_look_and_feel_1.xml"
12313 ,(origin
12314 (method url-fetch)
12315 (uri (string-append "https://raw.githubusercontent.com/cmzmasek/forester/"
12316 "29e04321615da6b35c1e15c60e52caf3f21d8e6a/"
12317 "forester/java/classes/resources/"
12318 "synth_look_and_feel_1.xml"))
12319 (file-name (string-append name "-synth-look-and-feel-" version ".xml"))
12320 (sha256
12321 (base32
12322 "1gv5602gv4k7y7713y75a4jvj7i9s7nildsbdl7n9q10sc2ikg8h"))))))
12323 (home-page "https://sites.google.com/site/cmzmasek/home/software/forester")
12324 (synopsis "Phylogenomics libraries for Java")
12325 (description "Forester is a collection of Java libraries for
12326 phylogenomics and evolutionary biology research. It includes support for
12327 reading, writing, and exporting phylogenetic trees.")
12328 (license license:lgpl2.1+)))
12329
12330 (define-public java-biojava-core
12331 (package
12332 (name "java-biojava-core")
12333 (version "4.2.11")
12334 (source (origin
12335 (method git-fetch)
12336 (uri (git-reference
12337 (url "https://github.com/biojava/biojava")
12338 (commit (string-append "biojava-" version))))
12339 (file-name (string-append name "-" version "-checkout"))
12340 (sha256
12341 (base32
12342 "1bvryh2bpsvash8ln79cmc9sqm8qw72hz4xzwqxcrjm8ssxszhqk"))))
12343 (build-system ant-build-system)
12344 (arguments
12345 `(#:jdk ,icedtea-8
12346 #:jar-name "biojava-core.jar"
12347 #:source-dir "biojava-core/src/main/java/"
12348 #:test-dir "biojava-core/src/test"
12349 ;; These tests seem to require internet access.
12350 #:test-exclude (list "**/SearchIOTest.java"
12351 "**/BlastXMLParserTest.java"
12352 "**/GenbankCookbookTest.java"
12353 "**/GenbankProxySequenceReaderTest.java")
12354 #:phases
12355 (modify-phases %standard-phases
12356 (add-before 'build 'copy-resources
12357 (lambda _
12358 (copy-recursively "biojava-core/src/main/resources"
12359 "build/classes")
12360 #t))
12361 (add-before 'check 'copy-test-resources
12362 (lambda _
12363 (copy-recursively "biojava-core/src/test/resources"
12364 "build/test-classes")
12365 #t)))))
12366 (propagated-inputs
12367 `(("java-log4j-api" ,java-log4j-api)
12368 ("java-log4j-core" ,java-log4j-core)
12369 ("java-slf4j-api" ,java-slf4j-api)
12370 ("java-slf4j-simple" ,java-slf4j-simple)))
12371 (native-inputs
12372 `(("java-junit" ,java-junit)
12373 ("java-hamcrest-core" ,java-hamcrest-core)))
12374 (home-page "http://biojava.org")
12375 (synopsis "Core libraries of Java framework for processing biological data")
12376 (description "BioJava is a project dedicated to providing a Java framework
12377 for processing biological data. It provides analytical and statistical
12378 routines, parsers for common file formats, reference implementations of
12379 popular algorithms, and allows the manipulation of sequences and 3D
12380 structures. The goal of the biojava project is to facilitate rapid
12381 application development for bioinformatics.
12382
12383 This package provides the core libraries.")
12384 (license license:lgpl2.1+)))
12385
12386 (define-public java-biojava-phylo
12387 (package (inherit java-biojava-core)
12388 (name "java-biojava-phylo")
12389 (build-system ant-build-system)
12390 (arguments
12391 `(#:jdk ,icedtea-8
12392 #:jar-name "biojava-phylo.jar"
12393 #:source-dir "biojava-phylo/src/main/java/"
12394 #:test-dir "biojava-phylo/src/test"
12395 #:phases
12396 (modify-phases %standard-phases
12397 (add-before 'build 'copy-resources
12398 (lambda _
12399 (copy-recursively "biojava-phylo/src/main/resources"
12400 "build/classes")
12401 #t))
12402 (add-before 'check 'copy-test-resources
12403 (lambda _
12404 (copy-recursively "biojava-phylo/src/test/resources"
12405 "build/test-classes")
12406 #t)))))
12407 (propagated-inputs
12408 `(("java-log4j-api" ,java-log4j-api)
12409 ("java-log4j-core" ,java-log4j-core)
12410 ("java-slf4j-api" ,java-slf4j-api)
12411 ("java-slf4j-simple" ,java-slf4j-simple)
12412 ("java-biojava-core" ,java-biojava-core)
12413 ("java-forester" ,java-forester)))
12414 (native-inputs
12415 `(("java-junit" ,java-junit)
12416 ("java-hamcrest-core" ,java-hamcrest-core)))
12417 (home-page "http://biojava.org")
12418 (synopsis "Biojava interface to the forester phylogenomics library")
12419 (description "The phylo module provides a biojava interface layer to the
12420 forester phylogenomics library for constructing phylogenetic trees.")))
12421
12422 (define-public java-biojava-alignment
12423 (package (inherit java-biojava-core)
12424 (name "java-biojava-alignment")
12425 (build-system ant-build-system)
12426 (arguments
12427 `(#:jdk ,icedtea-8
12428 #:jar-name "biojava-alignment.jar"
12429 #:source-dir "biojava-alignment/src/main/java/"
12430 #:test-dir "biojava-alignment/src/test"
12431 #:phases
12432 (modify-phases %standard-phases
12433 (add-before 'build 'copy-resources
12434 (lambda _
12435 (copy-recursively "biojava-alignment/src/main/resources"
12436 "build/classes")
12437 #t))
12438 (add-before 'check 'copy-test-resources
12439 (lambda _
12440 (copy-recursively "biojava-alignment/src/test/resources"
12441 "build/test-classes")
12442 #t)))))
12443 (propagated-inputs
12444 `(("java-log4j-api" ,java-log4j-api)
12445 ("java-log4j-core" ,java-log4j-core)
12446 ("java-slf4j-api" ,java-slf4j-api)
12447 ("java-slf4j-simple" ,java-slf4j-simple)
12448 ("java-biojava-core" ,java-biojava-core)
12449 ("java-biojava-phylo" ,java-biojava-phylo)
12450 ("java-forester" ,java-forester)))
12451 (native-inputs
12452 `(("java-junit" ,java-junit)
12453 ("java-hamcrest-core" ,java-hamcrest-core)))
12454 (home-page "http://biojava.org")
12455 (synopsis "Biojava API for genetic sequence alignment")
12456 (description "The alignment module of BioJava provides an API that
12457 contains
12458
12459 @itemize
12460 @item implementations of dynamic programming algorithms for sequence
12461 alignment;
12462 @item reading and writing of popular alignment file formats;
12463 @item a single-, or multi- threaded multiple sequence alignment algorithm.
12464 @end itemize\n")))
12465
12466 (define-public java-biojava-core-4.0
12467 (package (inherit java-biojava-core)
12468 (name "java-biojava-core")
12469 (version "4.0.0")
12470 (source (origin
12471 (method git-fetch)
12472 (uri (git-reference
12473 (url "https://github.com/biojava/biojava")
12474 (commit (string-append "biojava-" version))))
12475 (file-name (string-append name "-" version "-checkout"))
12476 (sha256
12477 (base32
12478 "13675f6y9aqi7bi2lk3s1z7a22ynccjiqwa8izh7p97xi9wsfmd8"))))))
12479
12480 (define-public java-biojava-phylo-4.0
12481 (package (inherit java-biojava-core-4.0)
12482 (name "java-biojava-phylo")
12483 (build-system ant-build-system)
12484 (arguments
12485 `(#:jdk ,icedtea-8
12486 #:jar-name "biojava-phylo.jar"
12487 #:source-dir "biojava-phylo/src/main/java/"
12488 #:test-dir "biojava-phylo/src/test"
12489 #:phases
12490 (modify-phases %standard-phases
12491 (add-before 'build 'copy-resources
12492 (lambda _
12493 (copy-recursively "biojava-phylo/src/main/resources"
12494 "build/classes")
12495 #t))
12496 (add-before 'check 'copy-test-resources
12497 (lambda _
12498 (copy-recursively "biojava-phylo/src/test/resources"
12499 "build/test-classes")
12500 #t)))))
12501 (propagated-inputs
12502 `(("java-log4j-api" ,java-log4j-api)
12503 ("java-log4j-core" ,java-log4j-core)
12504 ("java-slf4j-api" ,java-slf4j-api)
12505 ("java-slf4j-simple" ,java-slf4j-simple)
12506 ("java-biojava-core" ,java-biojava-core-4.0)
12507 ("java-forester" ,java-forester-1.005)))
12508 (native-inputs
12509 `(("java-junit" ,java-junit)
12510 ("java-hamcrest-core" ,java-hamcrest-core)))
12511 (home-page "http://biojava.org")
12512 (synopsis "Biojava interface to the forester phylogenomics library")
12513 (description "The phylo module provides a biojava interface layer to the
12514 forester phylogenomics library for constructing phylogenetic trees.")))
12515
12516 (define-public java-biojava-alignment-4.0
12517 (package (inherit java-biojava-core-4.0)
12518 (name "java-biojava-alignment")
12519 (build-system ant-build-system)
12520 (arguments
12521 `(#:jdk ,icedtea-8
12522 #:jar-name "biojava-alignment.jar"
12523 #:source-dir "biojava-alignment/src/main/java/"
12524 #:test-dir "biojava-alignment/src/test"
12525 #:phases
12526 (modify-phases %standard-phases
12527 (add-before 'build 'copy-resources
12528 (lambda _
12529 (copy-recursively "biojava-alignment/src/main/resources"
12530 "build/classes")
12531 #t))
12532 (add-before 'check 'copy-test-resources
12533 (lambda _
12534 (copy-recursively "biojava-alignment/src/test/resources"
12535 "build/test-classes")
12536 #t)))))
12537 (propagated-inputs
12538 `(("java-log4j-api" ,java-log4j-api)
12539 ("java-log4j-core" ,java-log4j-core)
12540 ("java-slf4j-api" ,java-slf4j-api)
12541 ("java-slf4j-simple" ,java-slf4j-simple)
12542 ("java-biojava-core" ,java-biojava-core-4.0)
12543 ("java-biojava-phylo" ,java-biojava-phylo-4.0)
12544 ("java-forester" ,java-forester-1.005)))
12545 (native-inputs
12546 `(("java-junit" ,java-junit)
12547 ("java-hamcrest-core" ,java-hamcrest-core)))
12548 (home-page "http://biojava.org")
12549 (synopsis "Biojava API for genetic sequence alignment")
12550 (description "The alignment module of BioJava provides an API that
12551 contains
12552
12553 @itemize
12554 @item implementations of dynamic programming algorithms for sequence
12555 alignment;
12556 @item reading and writing of popular alignment file formats;
12557 @item a single-, or multi- threaded multiple sequence alignment algorithm.
12558 @end itemize\n")))
12559
12560 (define-public dropseq-tools
12561 (package
12562 (name "dropseq-tools")
12563 (version "1.13")
12564 (source
12565 (origin
12566 (method url-fetch)
12567 (uri "http://mccarrolllab.com/download/1276/")
12568 (file-name (string-append "dropseq-tools-" version ".zip"))
12569 (sha256
12570 (base32
12571 "0yrffckxqk5l8b5xb6z4laq157zd9mdypr2p4b4vq2bhjzi1sj0s"))
12572 ;; Delete bundled libraries
12573 (modules '((guix build utils)))
12574 (snippet
12575 '(begin
12576 (for-each delete-file (find-files "jar/lib" "\\.jar$"))
12577 (delete-file-recursively "3rdParty")
12578 #t))))
12579 (build-system ant-build-system)
12580 (arguments
12581 `(#:tests? #f ; test data are not included
12582 #:test-target "test"
12583 #:build-target "all"
12584 #:source-dir "public/src/"
12585 #:jdk ,icedtea-8
12586 #:make-flags
12587 (list (string-append "-Dpicard.executable.dir="
12588 (assoc-ref %build-inputs "java-picard")
12589 "/share/java/"))
12590 #:modules ((ice-9 match)
12591 (srfi srfi-1)
12592 (guix build utils)
12593 (guix build java-utils)
12594 (guix build ant-build-system))
12595 #:phases
12596 (modify-phases %standard-phases
12597 ;; FIXME: fails with "java.io.FileNotFoundException:
12598 ;; /gnu/store/…-dropseq-tools-1.13/share/java/lib/biojava-alignment.jar"
12599 (delete 'generate-jar-indices)
12600 ;; All dependencies must be linked to "lib", because that's where
12601 ;; they will be searched for when the Class-Path property of the
12602 ;; manifest is computed.
12603 (add-after 'unpack 'record-references
12604 (lambda* (#:key inputs #:allow-other-keys)
12605 (mkdir-p "jar/lib")
12606 (let ((dirs (filter-map (match-lambda
12607 ((name . dir)
12608 (if (and (string-prefix? "java-" name)
12609 (not (string=? name "java-testng")))
12610 dir #f)))
12611 inputs)))
12612 (for-each (lambda (jar)
12613 (symlink jar (string-append "jar/lib/" (basename jar))))
12614 (append-map (lambda (dir) (find-files dir "\\.jar$"))
12615 dirs)))
12616 #t))
12617 ;; There is no installation target
12618 (replace 'install
12619 (lambda* (#:key inputs outputs #:allow-other-keys)
12620 (let* ((out (assoc-ref outputs "out"))
12621 (bin (string-append out "/bin"))
12622 (share (string-append out "/share/java/"))
12623 (lib (string-append share "/lib/"))
12624 (scripts (list "BAMTagHistogram"
12625 "BAMTagofTagCounts"
12626 "BaseDistributionAtReadPosition"
12627 "CollapseBarcodesInPlace"
12628 "CollapseTagWithContext"
12629 "ConvertToRefFlat"
12630 "CreateIntervalsFiles"
12631 "DetectBeadSynthesisErrors"
12632 "DigitalExpression"
12633 "Drop-seq_alignment.sh"
12634 "FilterBAM"
12635 "FilterBAMByTag"
12636 "GatherGeneGCLength"
12637 "GatherMolecularBarcodeDistributionByGene"
12638 "GatherReadQualityMetrics"
12639 "PolyATrimmer"
12640 "ReduceGTF"
12641 "SelectCellsByNumTranscripts"
12642 "SingleCellRnaSeqMetricsCollector"
12643 "TagBamWithReadSequenceExtended"
12644 "TagReadWithGeneExon"
12645 "TagReadWithInterval"
12646 "TrimStartingSequence"
12647 "ValidateReference")))
12648 (for-each mkdir-p (list bin share lib))
12649 (install-file "dist/dropseq.jar" share)
12650 (for-each (lambda (script)
12651 (chmod script #o555)
12652 (install-file script bin))
12653 scripts)
12654 (substitute* (map (lambda (script)
12655 (string-append bin "/" script))
12656 scripts)
12657 (("^java") (which "java"))
12658 (("jar_deploy_dir=.*")
12659 (string-append "jar_deploy_dir=" share "\n"))))
12660 #t))
12661 ;; FIXME: We do this after stripping jars because we don't want it to
12662 ;; copy all these jars and strip them. We only want to install
12663 ;; links. Arguably, this is a problem with the ant-build-system.
12664 (add-after 'strip-jar-timestamps 'install-links
12665 (lambda* (#:key outputs #:allow-other-keys)
12666 (let* ((out (assoc-ref outputs "out"))
12667 (share (string-append out "/share/java/"))
12668 (lib (string-append share "/lib/")))
12669 (for-each (lambda (jar)
12670 (symlink (readlink jar)
12671 (string-append lib (basename jar))))
12672 (find-files "jar/lib" "\\.jar$")))
12673 #t)))))
12674 (inputs
12675 `(("jdk" ,icedtea-8)
12676 ("java-picard" ,java-picard-2.10.3)
12677 ("java-log4j-1.2-api" ,java-log4j-1.2-api)
12678 ("java-commons-math3" ,java-commons-math3)
12679 ("java-commons-jexl2" ,java-commons-jexl-2)
12680 ("java-commons-collections4" ,java-commons-collections4)
12681 ("java-commons-lang2" ,java-commons-lang)
12682 ("java-commons-io" ,java-commons-io)
12683 ("java-snappy-1.0.3-rc3" ,java-snappy-1)
12684 ("java-guava" ,java-guava)
12685 ("java-la4j" ,java-la4j)
12686 ("java-biojava-core" ,java-biojava-core-4.0)
12687 ("java-biojava-alignment" ,java-biojava-alignment-4.0)
12688 ("java-jdistlib" ,java-jdistlib)
12689 ("java-simple-xml" ,java-simple-xml)
12690 ("java-snakeyaml" ,java-snakeyaml)))
12691 (native-inputs
12692 `(("unzip" ,unzip)
12693 ("java-testng" ,java-testng)))
12694 (home-page "http://mccarrolllab.com/dropseq/")
12695 (synopsis "Tools for Drop-seq analyses")
12696 (description "Drop-seq is a technology to enable biologists to
12697 analyze RNA expression genome-wide in thousands of individual cells at
12698 once. This package provides tools to perform Drop-seq analyses.")
12699 (license license:expat)))
12700
12701 (define-public pigx-rnaseq
12702 (package
12703 (name "pigx-rnaseq")
12704 (version "0.0.10")
12705 (source (origin
12706 (method url-fetch)
12707 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_rnaseq/"
12708 "releases/download/v" version
12709 "/pigx_rnaseq-" version ".tar.gz"))
12710 (sha256
12711 (base32
12712 "0z3hr120wk2vrlmlpz1vp3n9wy3rq4y2mnzh2vf08qgqn2xfdwcw"))))
12713 (build-system gnu-build-system)
12714 (arguments
12715 `(#:parallel-tests? #f ; not supported
12716 #:phases
12717 (modify-phases %standard-phases
12718 ;; "test.sh" runs STAR, which requires excessive amounts of memory.
12719 (add-after 'unpack 'disable-resource-intensive-test
12720 (lambda _
12721 (substitute* "Makefile.in"
12722 (("(^ tests/test_trim_galore/test.sh).*" _ m) m)
12723 (("^ tests/test_multiqc/test.sh") "")
12724 (("^ test.sh") ""))
12725 #t)))))
12726 (inputs
12727 `(("coreutils" ,coreutils)
12728 ("sed" ,sed)
12729 ("gzip" ,gzip)
12730 ("snakemake" ,snakemake)
12731 ("fastqc" ,fastqc)
12732 ("multiqc" ,multiqc)
12733 ("star" ,star)
12734 ("trim-galore" ,trim-galore)
12735 ("htseq" ,htseq)
12736 ("samtools" ,samtools)
12737 ("r-minimal" ,r-minimal)
12738 ("r-rmarkdown" ,r-rmarkdown)
12739 ("r-ggplot2" ,r-ggplot2)
12740 ("r-ggrepel" ,r-ggrepel)
12741 ("r-gprofiler" ,r-gprofiler)
12742 ("r-deseq2" ,r-deseq2)
12743 ("r-dt" ,r-dt)
12744 ("r-knitr" ,r-knitr)
12745 ("r-pheatmap" ,r-pheatmap)
12746 ("r-corrplot" ,r-corrplot)
12747 ("r-reshape2" ,r-reshape2)
12748 ("r-plotly" ,r-plotly)
12749 ("r-scales" ,r-scales)
12750 ("r-summarizedexperiment" ,r-summarizedexperiment)
12751 ("r-crosstalk" ,r-crosstalk)
12752 ("r-tximport" ,r-tximport)
12753 ("r-rtracklayer" ,r-rtracklayer)
12754 ("r-rjson" ,r-rjson)
12755 ("salmon" ,salmon)
12756 ("ghc-pandoc" ,ghc-pandoc)
12757 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)
12758 ("python-wrapper" ,python-wrapper)
12759 ("python-pyyaml" ,python-pyyaml)))
12760 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
12761 (synopsis "Analysis pipeline for RNA sequencing experiments")
12762 (description "PiGX RNAseq is an analysis pipeline for preprocessing and
12763 reporting for RNA sequencing experiments. It is easy to use and produces high
12764 quality reports. The inputs are reads files from the sequencing experiment,
12765 and a configuration file which describes the experiment. In addition to
12766 quality control of the experiment, the pipeline produces a differential
12767 expression report comparing samples in an easily configurable manner.")
12768 (license license:gpl3+)))
12769
12770 (define-public pigx-chipseq
12771 (package
12772 (name "pigx-chipseq")
12773 (version "0.0.42")
12774 (source (origin
12775 (method url-fetch)
12776 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_chipseq/"
12777 "releases/download/v" version
12778 "/pigx_chipseq-" version ".tar.gz"))
12779 (sha256
12780 (base32
12781 "0xbvgqpk32a8iczhvac56cacr46rdkqb0allhhpvmj940idf72bi"))))
12782 (build-system gnu-build-system)
12783 ;; parts of the tests rely on access to the network
12784 (arguments '(#:tests? #f))
12785 (inputs
12786 `(("grep" ,grep)
12787 ("coreutils" ,coreutils)
12788 ("r-minimal" ,r-minimal)
12789 ("r-argparser" ,r-argparser)
12790 ("r-biocparallel" ,r-biocparallel)
12791 ("r-biostrings" ,r-biostrings)
12792 ("r-chipseq" ,r-chipseq)
12793 ("r-data-table" ,r-data-table)
12794 ("r-dplyr" ,r-dplyr)
12795 ("r-genomation" ,r-genomation)
12796 ("r-genomicalignments" ,r-genomicalignments)
12797 ("r-genomicranges" ,r-genomicranges)
12798 ("r-rsamtools" ,r-rsamtools)
12799 ("r-rtracklayer" ,r-rtracklayer)
12800 ("r-s4vectors" ,r-s4vectors)
12801 ("r-stringr" ,r-stringr)
12802 ("r-tibble" ,r-tibble)
12803 ("r-tidyr" ,r-tidyr)
12804 ("r-jsonlite" ,r-jsonlite)
12805 ("r-heatmaply" ,r-heatmaply)
12806 ("r-htmlwidgets" ,r-htmlwidgets)
12807 ("r-ggplot2" ,r-ggplot2)
12808 ("r-plotly" ,r-plotly)
12809 ("r-rmarkdown" ,r-rmarkdown)
12810 ("python-wrapper" ,python-wrapper)
12811 ("python-pyyaml" ,python-pyyaml)
12812 ("python-magic" ,python-magic)
12813 ("python-xlrd" ,python-xlrd)
12814 ("trim-galore" ,trim-galore)
12815 ("macs" ,macs)
12816 ("multiqc" ,multiqc)
12817 ("perl" ,perl)
12818 ("ghc-pandoc" ,ghc-pandoc)
12819 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)
12820 ("fastqc" ,fastqc)
12821 ("bowtie" ,bowtie)
12822 ("idr" ,idr)
12823 ("snakemake" ,snakemake)
12824 ("samtools" ,samtools)
12825 ("bedtools" ,bedtools)
12826 ("kentutils" ,kentutils)))
12827 (native-inputs
12828 `(("python-pytest" ,python-pytest)))
12829 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
12830 (synopsis "Analysis pipeline for ChIP sequencing experiments")
12831 (description "PiGX ChIPseq is an analysis pipeline for preprocessing, peak
12832 calling and reporting for ChIP sequencing experiments. It is easy to use and
12833 produces high quality reports. The inputs are reads files from the sequencing
12834 experiment, and a configuration file which describes the experiment. In
12835 addition to quality control of the experiment, the pipeline enables to set up
12836 multiple peak calling analysis and allows the generation of a UCSC track hub
12837 in an easily configurable manner.")
12838 (license license:gpl3+)))
12839
12840 (define-public pigx-bsseq
12841 (package
12842 (name "pigx-bsseq")
12843 (version "0.0.10")
12844 (source (origin
12845 (method url-fetch)
12846 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_bsseq/"
12847 "releases/download/v" version
12848 "/pigx_bsseq-" version ".tar.gz"))
12849 (sha256
12850 (base32
12851 "0l97wvkq4diq8lcarraj33bby1zzf0w804jwi8mlc5qddp8idwhy"))))
12852 (build-system gnu-build-system)
12853 (arguments
12854 `(#:phases
12855 (modify-phases %standard-phases
12856 (add-before 'check 'set-timezone
12857 ;; The readr package is picky about timezones.
12858 (lambda* (#:key inputs #:allow-other-keys)
12859 (setenv "TZ" "UTC+1")
12860 (setenv "TZDIR"
12861 (string-append (assoc-ref inputs "tzdata")
12862 "/share/zoneinfo"))
12863 #t)))))
12864 (native-inputs
12865 `(("tzdata" ,tzdata)))
12866 (inputs
12867 `(("coreutils" ,coreutils)
12868 ("sed" ,sed)
12869 ("grep" ,grep)
12870 ("r-minimal" ,r-minimal)
12871 ("r-annotationhub" ,r-annotationhub)
12872 ("r-dt" ,r-dt)
12873 ("r-genomation" ,r-genomation)
12874 ("r-methylkit" ,r-methylkit)
12875 ("r-rtracklayer" ,r-rtracklayer)
12876 ("r-rmarkdown" ,r-rmarkdown)
12877 ("r-bookdown" ,r-bookdown)
12878 ("r-ggplot2" ,r-ggplot2)
12879 ("r-ggbio" ,r-ggbio)
12880 ("ghc-pandoc" ,ghc-pandoc)
12881 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)
12882 ("python-wrapper" ,python-wrapper)
12883 ("python-pyyaml" ,python-pyyaml)
12884 ("snakemake" ,snakemake)
12885 ("bismark" ,bismark)
12886 ("fastqc" ,fastqc)
12887 ("bowtie" ,bowtie)
12888 ("trim-galore" ,trim-galore)
12889 ("cutadapt" ,cutadapt)
12890 ("samtools" ,samtools)))
12891 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
12892 (synopsis "Bisulfite sequencing pipeline from fastq to methylation reports")
12893 (description "PiGx BSseq is a data processing pipeline for raw fastq read
12894 data of bisulfite experiments; it produces reports on aggregate methylation
12895 and coverage and can be used to produce information on differential
12896 methylation and segmentation.")
12897 (license license:gpl3+)))
12898
12899 (define-public pigx-scrnaseq
12900 (package
12901 (name "pigx-scrnaseq")
12902 (version "1.1.4")
12903 (source (origin
12904 (method url-fetch)
12905 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_scrnaseq/"
12906 "releases/download/v" version
12907 "/pigx_scrnaseq-" version ".tar.gz"))
12908 (sha256
12909 (base32
12910 "1d5l3gywypi67yz9advxq5xkgfhr4733gj0bwnngm723i3hdf5w9"))))
12911 (build-system gnu-build-system)
12912 (inputs
12913 `(("coreutils" ,coreutils)
12914 ("perl" ,perl)
12915 ("fastqc" ,fastqc)
12916 ("flexbar" ,flexbar)
12917 ("java" ,icedtea-8)
12918 ("jellyfish" ,jellyfish)
12919 ("python-wrapper" ,python-wrapper)
12920 ("python-pyyaml" ,python-pyyaml)
12921 ("python-pandas" ,python-pandas)
12922 ("python-magic" ,python-magic)
12923 ("python-numpy" ,python-numpy)
12924 ("python-loompy" ,python-loompy)
12925 ("ghc-pandoc" ,ghc-pandoc)
12926 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)
12927 ("samtools" ,samtools)
12928 ("snakemake" ,snakemake)
12929 ("star" ,star)
12930 ("r-minimal" ,r-minimal)
12931 ("r-argparser" ,r-argparser)
12932 ("r-cowplot" ,r-cowplot)
12933 ("r-data-table" ,r-data-table)
12934 ("r-delayedarray" ,r-delayedarray)
12935 ("r-delayedmatrixstats" ,r-delayedmatrixstats)
12936 ("r-dplyr" ,r-dplyr)
12937 ("r-dropbead" ,r-dropbead)
12938 ("r-dt" ,r-dt)
12939 ("r-genomicalignments" ,r-genomicalignments)
12940 ("r-genomicfiles" ,r-genomicfiles)
12941 ("r-genomicranges" ,r-genomicranges)
12942 ("r-ggplot2" ,r-ggplot2)
12943 ("r-hdf5array" ,r-hdf5array)
12944 ("r-pheatmap" ,r-pheatmap)
12945 ("r-rmarkdown" ,r-rmarkdown)
12946 ("r-rsamtools" ,r-rsamtools)
12947 ("r-rtracklayer" ,r-rtracklayer)
12948 ("r-rtsne" ,r-rtsne)
12949 ("r-scater" ,r-scater)
12950 ("r-scran" ,r-scran)
12951 ("r-seurat" ,r-seurat)
12952 ("r-singlecellexperiment" ,r-singlecellexperiment)
12953 ("r-stringr" ,r-stringr)
12954 ("r-yaml" ,r-yaml)))
12955 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
12956 (synopsis "Analysis pipeline for single-cell RNA sequencing experiments")
12957 (description "PiGX scRNAseq is an analysis pipeline for preprocessing and
12958 quality control for single cell RNA sequencing experiments. The inputs are
12959 read files from the sequencing experiment, and a configuration file which
12960 describes the experiment. It produces processed files for downstream analysis
12961 and interactive quality reports. The pipeline is designed to work with UMI
12962 based methods.")
12963 (license license:gpl3+)))
12964
12965 (define-public pigx
12966 (package
12967 (name "pigx")
12968 (version "0.0.3")
12969 (source (origin
12970 (method url-fetch)
12971 (uri (string-append "https://github.com/BIMSBbioinfo/pigx/"
12972 "releases/download/v" version
12973 "/pigx-" version ".tar.gz"))
12974 (sha256
12975 (base32
12976 "1i5njdy1clj5ncw45d16p7mwmqvb1ilikl9n797pxklc3f4s7mq7"))))
12977 (build-system gnu-build-system)
12978 (inputs
12979 `(("python" ,python)
12980 ("pigx-bsseq" ,pigx-bsseq)
12981 ("pigx-chipseq" ,pigx-chipseq)
12982 ("pigx-rnaseq" ,pigx-rnaseq)
12983 ("pigx-scrnaseq" ,pigx-scrnaseq)))
12984 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
12985 (synopsis "Analysis pipelines for genomics")
12986 (description "PiGx is a collection of genomics pipelines. It includes the
12987 following pipelines:
12988
12989 @itemize
12990 @item PiGx BSseq for raw fastq read data of bisulfite experiments
12991 @item PiGx RNAseq for RNAseq samples
12992 @item PiGx scRNAseq for single cell dropseq analysis
12993 @item PiGx ChIPseq for reads from ChIPseq experiments
12994 @end itemize
12995
12996 All pipelines are easily configured with a simple sample sheet and a
12997 descriptive settings file. The result is a set of comprehensive, interactive
12998 HTML reports with interesting findings about your samples.")
12999 (license license:gpl3+)))
13000
13001 (define-public genrich
13002 (package
13003 (name "genrich")
13004 (version "0.5")
13005 (source (origin
13006 (method git-fetch)
13007 (uri (git-reference
13008 (url "https://github.com/jsh58/Genrich.git")
13009 (commit (string-append "v" version))))
13010 (file-name (git-file-name name version))
13011 (sha256
13012 (base32
13013 "0x0q6z0208n3cxzqjla4rgjqpyqgwpmz27852lcvzkzaigymq4zp"))))
13014 (build-system gnu-build-system)
13015 (arguments
13016 `(#:tests? #f ; there are none
13017 #:phases
13018 (modify-phases %standard-phases
13019 (delete 'configure)
13020 (replace 'install
13021 (lambda* (#:key outputs #:allow-other-keys)
13022 (install-file "Genrich" (string-append (assoc-ref outputs "out") "/bin"))
13023 #t)))))
13024 (inputs
13025 `(("zlib" ,zlib)))
13026 (home-page "https://github.com/jsh58/Genrich")
13027 (synopsis "Detecting sites of genomic enrichment")
13028 (description "Genrich is a peak-caller for genomic enrichment
13029 assays (e.g. ChIP-seq, ATAC-seq). It analyzes alignment files generated
13030 following the assay and produces a file detailing peaks of significant
13031 enrichment.")
13032 (license license:expat)))
13033
13034 (define-public mantis
13035 (let ((commit "4ffd171632c2cb0056a86d709dfd2bf21bc69b84")
13036 (revision "1"))
13037 (package
13038 (name "mantis")
13039 (version (git-version "0" revision commit))
13040 (source (origin
13041 (method git-fetch)
13042 (uri (git-reference
13043 (url "https://github.com/splatlab/mantis.git")
13044 (commit commit)))
13045 (file-name (git-file-name name version))
13046 (sha256
13047 (base32
13048 "0iqbr0dhmlc8mzpirmm2s4pkzkwdgrcx50yx6cv3wlr2qi064p55"))))
13049 (build-system cmake-build-system)
13050 (arguments '(#:tests? #f)) ; there are none
13051 (inputs
13052 `(("sdsl-lite" ,sdsl-lite)
13053 ("openssl" ,openssl)
13054 ("zlib" ,zlib)))
13055 (home-page "https://github.com/splatlab/mantis")
13056 (synopsis "Large-scale sequence-search index data structure")
13057 (description "Mantis is a space-efficient data structure that can be
13058 used to index thousands of raw-read genomics experiments and facilitate
13059 large-scale sequence searches on those experiments. Mantis uses counting
13060 quotient filters instead of Bloom filters, enabling rapid index builds and
13061 queries, small indexes, and exact results, i.e., no false positives or
13062 negatives. Furthermore, Mantis is also a colored de Bruijn graph
13063 representation, so it supports fast graph traversal and other topological
13064 analyses in addition to large-scale sequence-level searches.")
13065 ;; uses __uint128_t and inline assembly
13066 (supported-systems '("x86_64-linux"))
13067 (license license:bsd-3))))
13068
13069 (define-public r-diversitree
13070 (package
13071 (name "r-diversitree")
13072 (version "0.9-13")
13073 (source
13074 (origin
13075 (method url-fetch)
13076 (uri (cran-uri "diversitree" version))
13077 (sha256
13078 (base32
13079 "00vi4klywi35hd170ksjv3xja3hqqbkcidcnrrlpgv4179k0azix"))))
13080 (build-system r-build-system)
13081 (native-inputs
13082 `(("gfortran" ,gfortran)))
13083 (inputs `(("fftw" ,fftw) ("gsl" ,gsl)))
13084 (propagated-inputs
13085 `(("r-ape" ,r-ape)
13086 ("r-desolve" ,r-desolve)
13087 ("r-rcpp" ,r-rcpp)
13088 ("r-subplex" ,r-subplex)))
13089 (home-page "https://www.zoology.ubc.ca/prog/diversitree")
13090 (synopsis "Comparative 'phylogenetic' analyses of diversification")
13091 (description "This package contains a number of comparative \"phylogenetic\"
13092 methods, mostly focusing on analysing diversification and character evolution.
13093 Contains implementations of \"BiSSE\" (Binary State Speciation and Extinction)
13094 and its unresolved tree extensions, \"MuSSE\" (Multiple State Speciation and
13095 Extinction), \"QuaSSE\", \"GeoSSE\", and \"BiSSE-ness\" Other included methods
13096 include Markov models of discrete and continuous trait evolution and constant
13097 rate speciation and extinction.")
13098 (license license:gpl2+)))
13099
13100 (define-public sjcount
13101 ;; There is no tag for version 3.2, nor is there a release archive.
13102 (let ((commit "292d3917cadb3f6834c81e509c30e61cd7ead6e5")
13103 (revision "1"))
13104 (package
13105 (name "sjcount")
13106 (version (git-version "3.2" revision commit))
13107 (source (origin
13108 (method git-fetch)
13109 (uri (git-reference
13110 (url "https://github.com/pervouchine/sjcount-full.git")
13111 (commit commit)))
13112 (file-name (string-append name "-" version "-checkout"))
13113 (sha256
13114 (base32
13115 "0gdgj35j249f04rqgq8ymcc1xg1vi9kzbajnjqpaq2wpbh8bl234"))))
13116 (build-system gnu-build-system)
13117 (arguments
13118 `(#:tests? #f ; requires a 1.4G test file
13119 #:make-flags
13120 (list (string-append "SAMTOOLS_DIR="
13121 (assoc-ref %build-inputs "samtools")
13122 "/lib/"))
13123 #:phases
13124 (modify-phases %standard-phases
13125 (replace 'configure
13126 (lambda* (#:key inputs #:allow-other-keys)
13127 (substitute* "makefile"
13128 (("-I \\$\\{SAMTOOLS_DIR\\}")
13129 (string-append "-I" (assoc-ref inputs "samtools")
13130 "/include/samtools"))
13131 (("-lz ") "-lz -lpthread "))
13132 #t))
13133 (replace 'install
13134 (lambda* (#:key outputs #:allow-other-keys)
13135 (for-each (lambda (tool)
13136 (install-file tool
13137 (string-append (assoc-ref outputs "out")
13138 "/bin")))
13139 '("j_count" "b_count" "sjcount"))
13140 #t)))))
13141 (inputs
13142 `(("samtools" ,samtools-0.1)
13143 ("zlib" ,zlib)))
13144 (home-page "https://github.com/pervouchine/sjcount-full/")
13145 (synopsis "Annotation-agnostic splice junction counting pipeline")
13146 (description "Sjcount is a utility for fast quantification of splice
13147 junctions in RNA-seq data. It is annotation-agnostic and offset-aware. This
13148 version does count multisplits.")
13149 (license license:gpl3+))))
13150
13151 (define-public minimap2
13152 (package
13153 (name "minimap2")
13154 (version "2.10")
13155 (source
13156 (origin
13157 (method url-fetch)
13158 (uri (string-append "https://github.com/lh3/minimap2/"
13159 "releases/download/v" version "/"
13160 "minimap2-" version ".tar.bz2"))
13161 (sha256
13162 (base32
13163 "080w9066irkbhbyr4nmf19pzkdd2s4v31hpzlajgq2y0drr6zcsj"))))
13164 (build-system gnu-build-system)
13165 (arguments
13166 `(#:tests? #f ; there are none
13167 #:make-flags
13168 (list "CC=gcc"
13169 (let ((system ,(or (%current-target-system)
13170 (%current-system))))
13171 (cond
13172 ((string-prefix? "x86_64" system)
13173 "all")
13174 ((or (string-prefix? "armhf" system)
13175 (string-prefix? "aarch64" system))
13176 "arm_neon=1")
13177 (else "sse2only=1"))))
13178 #:phases
13179 (modify-phases %standard-phases
13180 (delete 'configure)
13181 (replace 'install
13182 (lambda* (#:key outputs #:allow-other-keys)
13183 (let* ((out (assoc-ref outputs "out"))
13184 (bin (string-append out "/bin"))
13185 (man (string-append out "/share/man/man1")))
13186 (install-file "minimap2" bin)
13187 (mkdir-p man)
13188 (install-file "minimap2.1" man))
13189 #t)))))
13190 (inputs
13191 `(("zlib" ,zlib)))
13192 (home-page "https://lh3.github.io/minimap2/")
13193 (synopsis "Pairwise aligner for genomic and spliced nucleotide sequences")
13194 (description "Minimap2 is a versatile sequence alignment program that
13195 aligns DNA or mRNA sequences against a large reference database. Typical use
13196 cases include:
13197
13198 @enumerate
13199 @item mapping PacBio or Oxford Nanopore genomic reads to the human genome;
13200 @item finding overlaps between long reads with error rate up to ~15%;
13201 @item splice-aware alignment of PacBio Iso-Seq or Nanopore cDNA or Direct RNA
13202 reads against a reference genome;
13203 @item aligning Illumina single- or paired-end reads;
13204 @item assembly-to-assembly alignment;
13205 @item full-genome alignment between two closely related species with
13206 divergence below ~15%.
13207 @end enumerate\n")
13208 (license license:expat)))
13209
13210 (define-public r-circus
13211 (package
13212 (name "r-circus")
13213 (version "0.1.5")
13214 (source
13215 (origin
13216 (method git-fetch)
13217 (uri (git-reference
13218 (url "https://github.com/BIMSBbioinfo/ciRcus.git")
13219 (commit (string-append "v" version))))
13220 (file-name (git-file-name name version))
13221 (sha256
13222 (base32
13223 "0jhjn3ilb057hbf6yzrihj13ifxxs32y7nkby8l3lkm28dg4p97h"))))
13224 (build-system r-build-system)
13225 (propagated-inputs
13226 `(("r-annotationdbi" ,r-annotationdbi)
13227 ("r-annotationhub" ,r-annotationhub)
13228 ("r-biomart" ,r-biomart)
13229 ("r-data-table" ,r-data-table)
13230 ("r-dbi" ,r-dbi)
13231 ("r-genomicfeatures" ,r-genomicfeatures)
13232 ("r-genomicranges" ,r-genomicranges)
13233 ("r-ggplot2" ,r-ggplot2)
13234 ("r-hash" ,r-hash)
13235 ("r-iranges" ,r-iranges)
13236 ("r-rcolorbrewer" ,r-rcolorbrewer)
13237 ("r-rmysql" ,r-rmysql)
13238 ("r-s4vectors" ,r-s4vectors)
13239 ("r-stringr" ,r-stringr)
13240 ("r-summarizedexperiment" ,r-summarizedexperiment)))
13241 (native-inputs
13242 `(("r-knitr" ,r-knitr)))
13243 (home-page "https://github.com/BIMSBbioinfo/ciRcus")
13244 (synopsis "Annotation, analysis and visualization of circRNA data")
13245 (description "Circus is an R package for annotation, analysis and
13246 visualization of circRNA data. Users can annotate their circRNA candidates
13247 with host genes, gene featrues they are spliced from, and discriminate between
13248 known and yet unknown splice junctions. Circular-to-linear ratios of circRNAs
13249 can be calculated, and a number of descriptive plots easily generated.")
13250 (license license:artistic2.0)))
13251
13252 (define-public gffread
13253 ;; We cannot use the tagged release because it is not in sync with gclib.
13254 ;; See https://github.com/gpertea/gffread/issues/26
13255 (let ((commit "ba7535fcb3cea55a6e5a491d916e93b454e87fd0")
13256 (revision "1"))
13257 (package
13258 (name "gffread")
13259 (version (git-version "0.9.12" revision commit))
13260 (source
13261 (origin
13262 (method git-fetch)
13263 (uri (git-reference
13264 (url "https://github.com/gpertea/gffread.git")
13265 (commit commit)))
13266 (file-name (git-file-name name version))
13267 (sha256
13268 (base32
13269 "1dl2nbcg96lxpd0drg48ssa8343nf7pw9s9mkrc4mjjmfwsin3ki"))))
13270 (build-system gnu-build-system)
13271 (arguments
13272 `(#:tests? #f ; no check target
13273 #:make-flags
13274 (list "GCLDIR=gclib")
13275 #:phases
13276 (modify-phases %standard-phases
13277 (delete 'configure)
13278 (add-after 'unpack 'copy-gclib-source
13279 (lambda* (#:key inputs #:allow-other-keys)
13280 (mkdir-p "gclib")
13281 (copy-recursively (assoc-ref inputs "gclib-source") "gclib")
13282 #t))
13283 ;; There is no install target
13284 (replace 'install
13285 (lambda* (#:key outputs #:allow-other-keys)
13286 (let* ((out (assoc-ref outputs "out"))
13287 (bin (string-append out "/bin")))
13288 (install-file "gffread" bin))
13289 #t)))))
13290 (native-inputs
13291 `(("gclib-source"
13292 ,(let ((version "0.10.3")
13293 (commit "54917d0849c1e83cfb057b5f712e5cb6a35d948f")
13294 (revision "1"))
13295 (origin
13296 (method git-fetch)
13297 (uri (git-reference
13298 (url "https://github.com/gpertea/gclib.git")
13299 (commit commit)))
13300 (file-name (git-file-name "gclib" version))
13301 (sha256
13302 (base32
13303 "0b51lc0b8syrv7186fd7n8f15rwnf264qgfmm2palrwks1px24mr")))))))
13304 (home-page "https://github.com/gpertea/gffread/")
13305 (synopsis "Parse and convert GFF/GTF files")
13306 (description
13307 "This package provides a GFF/GTF file parsing utility providing format
13308 conversions, region filtering, FASTA sequence extraction and more.")
13309 ;; gffread is under Expat, but gclib is under Artistic 2.0
13310 (license (list license:expat
13311 license:artistic2.0)))))
13312
13313 (define-public find-circ
13314 ;; The last release was in 2015. The license was clarified in 2017, so we
13315 ;; take the latest commit.
13316 (let ((commit "8655dca54970fcf7e92e22fbf57e1188724dda7d")
13317 (revision "1"))
13318 (package
13319 (name "find-circ")
13320 (version (git-version "1.2" revision commit))
13321 (source
13322 (origin
13323 (method git-fetch)
13324 (uri (git-reference
13325 (url "https://github.com/marvin-jens/find_circ.git")
13326 (commit commit)))
13327 (file-name (git-file-name name version))
13328 (sha256
13329 (base32
13330 "0p77pbqbclqr4srms34y1b9b4njybfpjiknc11ki84f3p8skb3cg"))))
13331 (build-system gnu-build-system)
13332 (arguments
13333 `(#:tests? #f ; there are none
13334 #:phases
13335 ;; There is no actual build system.
13336 (modify-phases %standard-phases
13337 (delete 'configure)
13338 (delete 'build)
13339 (replace 'install
13340 (lambda* (#:key outputs #:allow-other-keys)
13341 (let* ((out (assoc-ref outputs "out"))
13342 (bin (string-append out "/bin"))
13343 (path (getenv "PYTHONPATH")))
13344 (for-each (lambda (script)
13345 (install-file script bin)
13346 (wrap-program (string-append bin "/" script)
13347 `("PYTHONPATH" ":" prefix (,path))))
13348 '("cmp_bed.py"
13349 "find_circ.py"
13350 "maxlength.py"
13351 "merge_bed.py"
13352 "unmapped2anchors.py")))
13353 #t)))))
13354 (inputs
13355 `(("python2" ,python-2)
13356 ("python2-pysam" ,python2-pysam)
13357 ("python2-numpy" ,python2-numpy)))
13358 (home-page "https://github.com/marvin-jens/find_circ")
13359 (synopsis "circRNA detection from RNA-seq reads")
13360 (description "This package provides tools to detect head-to-tail
13361 spliced (back-spliced) sequencing reads, indicative of circular RNA (circRNA)
13362 in RNA-seq data.")
13363 (license license:gpl3))))
13364
13365 (define-public python-scanpy
13366 (package
13367 (name "python-scanpy")
13368 (version "1.4.5.1")
13369 (source
13370 (origin
13371 (method url-fetch)
13372 (uri (pypi-uri "scanpy" version))
13373 (sha256
13374 (base32
13375 "14kh1ji70xxhmri5q8sgcibsidhr6f221wxrcw8a5xvibj5da17j"))))
13376 (build-system python-build-system)
13377 (arguments
13378 `(#:phases
13379 (modify-phases %standard-phases
13380 (replace 'check
13381 (lambda* (#:key inputs #:allow-other-keys)
13382 ;; These tests require Internet access.
13383 (delete-file-recursively "scanpy/tests/notebooks")
13384 (delete-file "scanpy/tests/test_clustering.py")
13385
13386 ;; TODO: I can't get the plotting tests to work, even with Xvfb.
13387 (delete-file "scanpy/tests/test_plotting.py")
13388 (delete-file "scanpy/tests/test_preprocessing.py")
13389 (delete-file "scanpy/tests/test_read_10x.py")
13390
13391 (setenv "PYTHONPATH"
13392 (string-append (getcwd) ":"
13393 (getenv "PYTHONPATH")))
13394 (invoke "pytest")
13395 #t)))))
13396 (propagated-inputs
13397 `(("python-anndata" ,python-anndata)
13398 ("python-h5py" ,python-h5py)
13399 ("python-igraph" ,python-igraph)
13400 ("python-joblib" ,python-joblib)
13401 ("python-louvain" ,python-louvain)
13402 ("python-legacy-api-wrap" ,python-legacy-api-wrap)
13403 ("python-matplotlib" ,python-matplotlib)
13404 ("python-natsort" ,python-natsort)
13405 ("python-networkx" ,python-networkx)
13406 ("python-numba" ,python-numba)
13407 ("python-packaging" ,python-packaging)
13408 ("python-pandas" ,python-pandas)
13409 ("python-patsy" ,python-patsy)
13410 ("python-scikit-learn" ,python-scikit-learn)
13411 ("python-scipy" ,python-scipy)
13412 ("python-seaborn" ,python-seaborn)
13413 ("python-statsmodels" ,python-statsmodels)
13414 ("python-tables" ,python-tables)
13415 ("python-umap-learn" ,python-umap-learn)))
13416 (native-inputs
13417 `(("python-pytest" ,python-pytest)
13418 ("python-setuptools-scm" ,python-setuptools-scm)))
13419 (home-page "https://github.com/theislab/scanpy")
13420 (synopsis "Single-Cell Analysis in Python.")
13421 (description "Scanpy is a scalable toolkit for analyzing single-cell gene
13422 expression data. It includes preprocessing, visualization, clustering,
13423 pseudotime and trajectory inference and differential expression testing. The
13424 Python-based implementation efficiently deals with datasets of more than one
13425 million cells.")
13426 (license license:bsd-3)))
13427
13428 (define-public python-bbknn
13429 (package
13430 (name "python-bbknn")
13431 (version "1.3.6")
13432 (source
13433 (origin
13434 (method url-fetch)
13435 (uri (pypi-uri "bbknn" version))
13436 (sha256
13437 (base32
13438 "1jbsh01f57zj4bhvjr3jh4532zznqd6nccmgrl3qi9gnhkf7c4y0"))))
13439 (build-system python-build-system)
13440 (arguments
13441 `(#:tests? #f)) ; TODO: Enable after migration to scikit-learn.
13442 (propagated-inputs
13443 `(("python-annoy" ,python-annoy)
13444 ("python-cython" ,python-cython)
13445 ("python-numpy" ,python-numpy)
13446 ("python-scipy" ,python-scipy)
13447 ("python-umap-learn" ,python-umap-learn)))
13448 (home-page "https://github.com/Teichlab/bbknn")
13449 (synopsis "Batch balanced KNN")
13450 (description "BBKNN is a batch effect removal tool that can be directly
13451 used in the Scanpy workflow. It serves as an alternative to
13452 @code{scanpy.api.pp.neighbors()}, with both functions creating a neighbour
13453 graph for subsequent use in clustering, pseudotime and UMAP visualisation. If
13454 technical artifacts are present in the data, they will make it challenging to
13455 link corresponding cell types across different batches. BBKNN actively
13456 combats this effect by splitting your data into batches and finding a smaller
13457 number of neighbours for each cell within each of the groups. This helps
13458 create connections between analogous cells in different batches without
13459 altering the counts or PCA space.")
13460 (license license:expat)))
13461
13462 (define-public gffcompare
13463 (let ((commit "be56ef4349ea3966c12c6397f85e49e047361c41")
13464 (revision "1"))
13465 (package
13466 (name "gffcompare")
13467 (version (git-version "0.10.15" revision commit))
13468 (source
13469 (origin
13470 (method git-fetch)
13471 (uri (git-reference
13472 (url "https://github.com/gpertea/gffcompare/")
13473 (commit commit)))
13474 (file-name (git-file-name name version))
13475 (sha256
13476 (base32 "0cp5qpxdhw4mxpya5dld8wi3jk00zyklm6rcri426wydinrnfmkg"))))
13477 (build-system gnu-build-system)
13478 (arguments
13479 `(#:tests? #f ; no check target
13480 #:phases
13481 (modify-phases %standard-phases
13482 (delete 'configure)
13483 (add-before 'build 'copy-gclib-source
13484 (lambda* (#:key inputs #:allow-other-keys)
13485 (mkdir "../gclib")
13486 (copy-recursively
13487 (assoc-ref inputs "gclib-source") "../gclib")
13488 #t))
13489 (replace 'install
13490 (lambda* (#:key outputs #:allow-other-keys)
13491 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
13492 (install-file "gffcompare" bin)
13493 #t))))))
13494 (native-inputs
13495 `(("gclib-source" ; see 'README.md' of gffcompare
13496 ,(let ((commit "54917d0849c1e83cfb057b5f712e5cb6a35d948f")
13497 (revision "1")
13498 (name "gclib")
13499 (version (git-version "0.10.3" revision commit)))
13500 (origin
13501 (method git-fetch)
13502 (uri (git-reference
13503 (url "https://github.com/gpertea/gclib/")
13504 (commit commit)))
13505 (file-name (git-file-name name version))
13506 (sha256
13507 (base32 "0b51lc0b8syrv7186fd7n8f15rwnf264qgfmm2palrwks1px24mr")))))))
13508 (home-page "https://github.com/gpertea/gffcompare/")
13509 (synopsis "Tool for comparing or classifing transcripts of RNA-Seq")
13510 (description
13511 "@code{gffcompare} is a tool that can:
13512 @enumerate
13513 @item compare and evaluate the accuracy of RNA-Seq transcript assemblers
13514 (Cufflinks, Stringtie);
13515 @item collapse (merge) duplicate transcripts from multiple GTF/GFF3 files (e.g.
13516 resulted from assembly of different samples);
13517 @item classify transcripts from one or multiple GTF/GFF3 files as they relate to
13518 reference transcripts provided in a annotation file (also in GTF/GFF3 format).
13519 @end enumerate")
13520 (license
13521 (list
13522 license:expat ;license for gffcompare
13523 license:artistic2.0))))) ;license for gclib
13524
13525 (define-public intervaltree
13526 (let ((commit "b90527f9e6d51cd36ecbb50429e4524d3a418ea5"))
13527 (package
13528 (name "intervaltree")
13529 (version (git-version "0.0.0" "1" commit))
13530 (source
13531 (origin
13532 (method git-fetch)
13533 (uri (git-reference
13534 (url "https://github.com/ekg/intervaltree/")
13535 (commit commit)))
13536 (file-name (git-file-name name version))
13537 (sha256
13538 (base32 "0rgv6q5fl4x5d74n6p5wvdna6zmbdbqpb4jqqh6vq3670gn08xad"))))
13539 (build-system gnu-build-system)
13540 (arguments
13541 '(#:tests? #f ; No tests.
13542 #:make-flags (list (string-append "PREFIX=" (assoc-ref %outputs "out"))
13543 "DESTDIR=\"\"")
13544 #:phases
13545 (modify-phases %standard-phases
13546 (delete 'configure)))) ; There is no configure phase.
13547 (home-page "https://github.com/ekg/intervaltree")
13548 (synopsis "Minimal C++ interval tree implementation")
13549 (description "An interval tree can be used to efficiently find a set of
13550 numeric intervals overlapping or containing another interval. This library
13551 provides a basic implementation of an interval tree using C++ templates,
13552 allowing the insertion of arbitrary types into the tree.")
13553 (license license:expat))))
13554
13555 (define-public python-intervaltree
13556 (package
13557 (name "python-intervaltree")
13558 (version "3.0.2")
13559 (source
13560 (origin
13561 (method url-fetch)
13562 (uri (pypi-uri "intervaltree" version))
13563 (sha256
13564 (base32
13565 "0wz234g6irlm4hivs2qzmnywk0ss06ckagwh15nflkyb3p462kyb"))))
13566 (build-system python-build-system)
13567 (arguments
13568 `(#:phases
13569 (modify-phases %standard-phases
13570 ;; pytest seems to have a check to make sure the user is testing
13571 ;; their checked-out code and not an installed, potentially
13572 ;; out-of-date copy. This is harmless here, since we just installed
13573 ;; the package, so we disable the check to avoid skipping tests
13574 ;; entirely.
13575 (add-before 'check 'import-mismatch-error-workaround
13576 (lambda _
13577 (setenv "PY_IGNORE_IMPORTMISMATCH" "1")
13578 #t)))))
13579 (propagated-inputs
13580 `(("python-sortedcontainers" ,python-sortedcontainers)))
13581 (native-inputs
13582 `(("python-pytest" ,python-pytest)))
13583 (home-page "https://github.com/chaimleib/intervaltree")
13584 (synopsis "Editable interval tree data structure")
13585 (description
13586 "This package provides a mutable, self-balancing interval tree
13587 implementation for Python. Queries may be by point, by range overlap, or by
13588 range envelopment. This library was designed to allow tagging text and time
13589 intervals, where the intervals include the lower bound but not the upper
13590 bound.")
13591 (license license:asl2.0)))
13592
13593 (define-public python-pypairix
13594 (package
13595 (name "python-pypairix")
13596 (version "0.3.6")
13597 (source
13598 (origin
13599 (method url-fetch)
13600 (uri (pypi-uri "pypairix" version))
13601 (sha256
13602 (base32
13603 "0zs92b74s5v4xy2h16s15f3z6l4nnbw8x8zyif7xx5xpafjn0xss"))))
13604 (build-system python-build-system)
13605 ;; FIXME: the tests fail because test.support cannot be loaded:
13606 ;; ImportError: cannot import name 'support'
13607 (arguments '(#:tests? #f))
13608 (inputs
13609 `(("zlib" ,zlib)))
13610 (home-page "https://github.com/4dn-dcic/pairix")
13611 (synopsis "Support for querying pairix-indexed bgzipped text files")
13612 (description
13613 "Pypairix is a Python module for fast querying on a pairix-indexed
13614 bgzipped text file that contains a pair of genomic coordinates per line.")
13615 (license license:expat)))
13616
13617 (define-public python-pyfaidx
13618 (package
13619 (name "python-pyfaidx")
13620 (version "0.5.8")
13621 (source
13622 (origin
13623 (method url-fetch)
13624 (uri (pypi-uri "pyfaidx" version))
13625 (sha256
13626 (base32
13627 "038xi3a6zvrxbyyfpp64ka8pcjgsdq4fgw9cl5lpxbvmm1bzzw2q"))))
13628 (build-system python-build-system)
13629 (propagated-inputs
13630 `(("python-six" ,python-six)))
13631 (home-page "http://mattshirley.com")
13632 (synopsis "Random access to fasta subsequences")
13633 (description
13634 "This package provides procedures for efficient pythonic random access to
13635 fasta subsequences.")
13636 (license license:bsd-3)))
13637
13638 (define-public python2-pyfaidx
13639 (package-with-python2 python-pyfaidx))
13640
13641 (define-public python-cooler
13642 (package
13643 (name "python-cooler")
13644 (version "0.8.7")
13645 (source
13646 (origin
13647 (method url-fetch)
13648 (uri (pypi-uri "cooler" version))
13649 (sha256
13650 (base32
13651 "01g6gqix9ba27sappz6nfyiwabzrlf8i5fn8kwcz8ra356cq9crp"))))
13652 (build-system python-build-system)
13653 (propagated-inputs
13654 `(("python-asciitree" ,python-asciitree)
13655 ("python-biopython" ,python-biopython)
13656 ("python-click" ,python-click)
13657 ("python-cytoolz" ,python-cytoolz)
13658 ("python-dask" ,python-dask)
13659 ("python-h5py" ,python-h5py)
13660 ("python-multiprocess" ,python-multiprocess)
13661 ("python-numpy" ,python-numpy)
13662 ("python-pandas" ,python-pandas)
13663 ("python-pyfaidx" ,python-pyfaidx)
13664 ("python-pypairix" ,python-pypairix)
13665 ("python-pysam" ,python-pysam)
13666 ("python-pyyaml" ,python-pyyaml)
13667 ("python-scipy" ,python-scipy)
13668 ("python-simplejson" ,python-simplejson)))
13669 (native-inputs
13670 `(("python-mock" ,python-mock)
13671 ("python-pytest" ,python-pytest)))
13672 (home-page "https://github.com/mirnylab/cooler")
13673 (synopsis "Sparse binary format for genomic interaction matrices")
13674 (description
13675 "Cooler is a support library for a sparse, compressed, binary persistent
13676 storage format, called @code{cool}, used to store genomic interaction data,
13677 such as Hi-C contact matrices.")
13678 (license license:bsd-3)))
13679
13680 (define-public python-hicmatrix
13681 (package
13682 (name "python-hicmatrix")
13683 (version "12")
13684 (source
13685 (origin
13686 ;; Version 12 is not available on pypi.
13687 (method git-fetch)
13688 (uri (git-reference
13689 (url "https://github.com/deeptools/HiCMatrix.git")
13690 (commit version)))
13691 (file-name (git-file-name name version))
13692 (sha256
13693 (base32
13694 "1xhdyx16f3brgxgxybixdi64ki8nbbkq5vk4h9ahi11pzpjfn1pj"))))
13695 (build-system python-build-system)
13696 (arguments
13697 `(#:phases
13698 (modify-phases %standard-phases
13699 (add-after 'unpack 'relax-requirements
13700 (lambda _
13701 (substitute* '("requirements.txt"
13702 "setup.py")
13703 (("cooler *=+ *0.8.5")
13704 "cooler==0.8.*"))
13705 #t)))))
13706 (propagated-inputs
13707 `(("python-cooler" ,python-cooler)
13708 ("python-intervaltree" ,python-intervaltree)
13709 ("python-numpy" ,python-numpy)
13710 ("python-pandas" ,python-pandas)
13711 ("python-scipy" ,python-scipy)
13712 ("python-tables" ,python-tables)))
13713 (home-page "https://github.com/deeptools/HiCMatrix/")
13714 (synopsis "HiCMatrix class for HiCExplorer and pyGenomeTracks")
13715 (description
13716 "This helper package implements the @code{HiCMatrix} class for
13717 the HiCExplorer and pyGenomeTracks packages.")
13718 (license license:gpl3+)))
13719
13720 (define-public python-hicexplorer
13721 (package
13722 (name "python-hicexplorer")
13723 (version "2.1.4")
13724 (source
13725 (origin
13726 ;; The latest version is not available on Pypi.
13727 (method git-fetch)
13728 (uri (git-reference
13729 (url "https://github.com/deeptools/HiCExplorer.git")
13730 (commit version)))
13731 (file-name (git-file-name name version))
13732 (sha256
13733 (base32
13734 "0q5gpbzmrkvygqgw524q36b4nrivcmyi5v194vsx0qw7b3gcmq08"))))
13735 (build-system python-build-system)
13736 (arguments
13737 `(#:phases
13738 (modify-phases %standard-phases
13739 (add-after 'unpack 'loosen-up-requirements
13740 (lambda _
13741 (substitute* "setup.py"
13742 (("==") ">="))
13743 #t)))))
13744 (propagated-inputs
13745 `(("python-biopython" ,python-biopython)
13746 ("python-configparser" ,python-configparser)
13747 ("python-cooler" ,python-cooler)
13748 ("python-future" ,python-future)
13749 ("python-intervaltree" ,python-intervaltree)
13750 ("python-jinja2" ,python-jinja2)
13751 ("python-matplotlib" ,python-matplotlib)
13752 ("python-numpy" ,python-numpy)
13753 ("python-pandas" ,python-pandas)
13754 ("python-pybigwig" ,python-pybigwig)
13755 ("python-pysam" ,python-pysam)
13756 ("python-scipy" ,python-scipy)
13757 ("python-six" ,python-six)
13758 ("python-tables" ,python-tables)
13759 ("python-unidecode" ,python-unidecode)))
13760 (home-page "https://hicexplorer.readthedocs.io")
13761 (synopsis "Process, analyze and visualize Hi-C data")
13762 (description
13763 "HiCExplorer is a powerful and easy to use set of tools to process,
13764 normalize and visualize Hi-C data. HiCExplorer facilitates the creation of
13765 contact matrices, correction of contacts, TAD detection, A/B compartments,
13766 merging, reordering or chromosomes, conversion from different formats
13767 including cooler and detection of long-range contacts. Moreover, it allows
13768 the visualization of multiple contact matrices along with other types of data
13769 like genes, compartments, ChIP-seq coverage tracks (and in general any type of
13770 genomic scores), long range contacts and the visualization of viewpoints.")
13771 (license license:gpl3)))
13772
13773 (define-public python-pygenometracks
13774 (package
13775 (name "python-pygenometracks")
13776 (version "3.3")
13777 (source
13778 (origin
13779 (method url-fetch)
13780 (uri (pypi-uri "pyGenomeTracks" version))
13781 (sha256
13782 (base32
13783 "16laa0wnf4qn9fb9ych4w1vqhqwjss70v0y0f6wp4gwqfrlgac0f"))))
13784 (build-system python-build-system)
13785 (arguments
13786 `(#:tests? #f ; there are none
13787 #:phases
13788 (modify-phases %standard-phases
13789 (add-after 'unpack 'relax-requirements
13790 (lambda _
13791 (substitute* "setup.py"
13792 (("matplotlib ==3.1.1")
13793 "matplotlib >=3.1.1"))
13794 #t)))))
13795 (propagated-inputs
13796 `(("python-future" ,python-future)
13797 ("python-gffutils" ,python-gffutils)
13798 ("python-hicmatrix" ,python-hicmatrix)
13799 ("python-intervaltree" ,python-intervaltree)
13800 ("python-matplotlib" ,python-matplotlib)
13801 ("python-numpy" ,python-numpy)
13802 ("python-pybigwig" ,python-pybigwig)
13803 ("python-pysam" ,python-pysam)
13804 ("python-tqdm" ,python-tqdm)))
13805 (native-inputs
13806 `(("python-pytest" ,python-pytest)))
13807 (home-page "https://pygenometracks.readthedocs.io")
13808 (synopsis "Program and library to plot beautiful genome browser tracks")
13809 (description
13810 "This package aims to produce high-quality genome browser tracks that
13811 are highly customizable. Currently, it is possible to plot: bigwig, bed (many
13812 options), bedgraph, links (represented as arcs), and Hi-C matrices.
13813 pyGenomeTracks can make plots with or without Hi-C data.")
13814 (license license:gpl3+)))
13815
13816 (define-public python-hic2cool
13817 (package
13818 (name "python-hic2cool")
13819 (version "0.4.2")
13820 (source
13821 (origin
13822 (method url-fetch)
13823 (uri (pypi-uri "hic2cool" version))
13824 (sha256
13825 (base32
13826 "0xy6mhfns2lzib1kcr6419jjp6pmh0qx8z8na55lmiwn0ds8q9cl"))))
13827 (build-system python-build-system)
13828 (arguments '(#:tests? #f)) ; no tests included
13829 (propagated-inputs
13830 `(("python-cooler" ,python-cooler)))
13831 (home-page "https://github.com/4dn-dcic/hic2cool")
13832 (synopsis "Converter for .hic and .cool files")
13833 (description
13834 "This package provides a converter between @code{.hic} files (from
13835 juicer) and single-resolution or multi-resolution @code{.cool} files (for
13836 cooler). Both @code{hic} and @code{cool} files describe Hi-C contact
13837 matrices.")
13838 (license license:expat)))
13839
13840 (define-public r-pore
13841 (package
13842 (name "r-pore")
13843 (version "0.24")
13844 (source
13845 (origin
13846 (method url-fetch)
13847 (uri
13848 (string-append "mirror://sourceforge/rpore/" version
13849 "/poRe_" version ".tar.gz"))
13850 (sha256
13851 (base32 "0pih9nljbv8g4x8rkk29i7aqq681b782r5s5ynp4nw9yzqnmmksv"))))
13852 (properties `((upstream-name . "poRe")))
13853 (build-system r-build-system)
13854 (propagated-inputs
13855 `(("r-bit64" ,r-bit64)
13856 ("r-data-table" ,r-data-table)
13857 ("r-rhdf5" ,r-rhdf5)
13858 ("r-shiny" ,r-shiny)
13859 ("r-svdialogs" ,r-svdialogs)))
13860 (home-page "https://sourceforge.net/projects/rpore/")
13861 (synopsis "Visualize Nanopore sequencing data")
13862 (description
13863 "This package provides graphical user interfaces to organize and visualize Nanopore
13864 sequencing data.")
13865 ;; This is free software but the license variant is unclear:
13866 ;; <https://github.com/mw55309/poRe_docs/issues/10>.
13867 (license license:bsd-3)))
13868
13869 (define-public r-xbioc
13870 (let ((revision "1")
13871 (commit "6ff0670a37ab3036aaf1d94aa4b208310946b0b5"))
13872 (package
13873 (name "r-xbioc")
13874 (version (git-version "0.1.16" revision commit))
13875 (source (origin
13876 (method git-fetch)
13877 (uri (git-reference
13878 (url "https://github.com/renozao/xbioc.git")
13879 (commit commit)))
13880 (file-name (git-file-name name version))
13881 (sha256
13882 (base32
13883 "0w8bsq5myiwkfhh83nm6is5ichiyvwa1axx2szvxnzq39x6knf66"))))
13884 (build-system r-build-system)
13885 (propagated-inputs
13886 `(("r-annotationdbi" ,r-annotationdbi)
13887 ("r-assertthat" ,r-assertthat)
13888 ("r-biobase" ,r-biobase)
13889 ("r-biocmanager" ,r-biocmanager)
13890 ("r-digest" ,r-digest)
13891 ("r-pkgmaker" ,r-pkgmaker)
13892 ("r-plyr" ,r-plyr)
13893 ("r-reshape2" ,r-reshape2)
13894 ("r-stringr" ,r-stringr)))
13895 (home-page "https://github.com/renozao/xbioc/")
13896 (synopsis "Extra base functions for Bioconductor")
13897 (description "This package provides extra utility functions to perform
13898 common tasks in the analysis of omics data, leveraging and enhancing features
13899 provided by Bioconductor packages.")
13900 (license license:gpl3+))))
13901
13902 (define-public r-cssam
13903 (let ((revision "1")
13904 (commit "9ec58c982fa551af0d80b1a266890d92954833f2"))
13905 (package
13906 (name "r-cssam")
13907 (version (git-version "1.4" revision commit))
13908 (source (origin
13909 (method git-fetch)
13910 (uri (git-reference
13911 (url "https://github.com/shenorrLab/csSAM.git")
13912 (commit commit)))
13913 (file-name (git-file-name name version))
13914 (sha256
13915 (base32
13916 "128syf9v39gk0z3ip000qpsjbg6l1siyq6c8b0hz41dzg5achyb3"))))
13917 (build-system r-build-system)
13918 (propagated-inputs
13919 `(("r-formula" ,r-formula)
13920 ("r-ggplot2" ,r-ggplot2)
13921 ("r-pkgmaker" ,r-pkgmaker)
13922 ("r-plyr" ,r-plyr)
13923 ("r-rngtools" ,r-rngtools)
13924 ("r-scales" ,r-scales)))
13925 (home-page "https://github.com/shenorrLab/csSAM/")
13926 (synopsis "Cell type-specific statistical analysis of microarray")
13927 (description "This package implements the method csSAM that computes
13928 cell-specific differential expression from measured cell proportions using
13929 SAM.")
13930 ;; Any version
13931 (license license:lgpl2.1+))))
13932
13933 (define-public r-bseqsc
13934 (let ((revision "1")
13935 (commit "fef3f3e38dcf3df37103348b5780937982b43b98"))
13936 (package
13937 (name "r-bseqsc")
13938 (version (git-version "1.0" revision commit))
13939 (source (origin
13940 (method git-fetch)
13941 (uri (git-reference
13942 (url "https://github.com/shenorrLab/bseqsc.git")
13943 (commit commit)))
13944 (file-name (git-file-name name version))
13945 (sha256
13946 (base32
13947 "1prw13wa20f7wlc3gkkls66n1kxz8d28qrb8icfqdwdnnv8w5qg8"))))
13948 (build-system r-build-system)
13949 (propagated-inputs
13950 `(("r-abind" ,r-abind)
13951 ("r-annotationdbi" ,r-annotationdbi)
13952 ("r-biobase" ,r-biobase)
13953 ("r-cssam" ,r-cssam)
13954 ("r-dplyr" ,r-dplyr)
13955 ("r-e1071" ,r-e1071)
13956 ("r-edger" ,r-edger)
13957 ("r-ggplot2" ,r-ggplot2)
13958 ("r-nmf" ,r-nmf)
13959 ("r-openxlsx" ,r-openxlsx)
13960 ("r-pkgmaker" ,r-pkgmaker)
13961 ("r-plyr" ,r-plyr)
13962 ("r-preprocesscore" ,r-preprocesscore)
13963 ("r-rngtools" ,r-rngtools)
13964 ("r-scales" ,r-scales)
13965 ("r-stringr" ,r-stringr)
13966 ("r-xbioc" ,r-xbioc)))
13967 (home-page "https://github.com/shenorrLab/bseqsc")
13968 (synopsis "Deconvolution of bulk sequencing experiments using single cell data")
13969 (description "BSeq-sc is a bioinformatics analysis pipeline that
13970 leverages single-cell sequencing data to estimate cell type proportion and
13971 cell type-specific gene expression differences from RNA-seq data from bulk
13972 tissue samples. This is a companion package to the publication \"A
13973 single-cell transcriptomic map of the human and mouse pancreas reveals inter-
13974 and intra-cell population structure.\" Baron et al. Cell Systems (2016)
13975 @url{https://www.ncbi.nlm.nih.gov/pubmed/27667365}.")
13976 (license license:gpl2+))))
13977
13978 (define-public porechop
13979 ;; The recommended way to install is to clone the git repository
13980 ;; https://github.com/rrwick/Porechop#installation
13981 (let ((commit "289d5dca4a5fc327f97b3f8cecb68ecaf1014861")
13982 (revision "1"))
13983 (package
13984 (name "porechop")
13985 (version (git-version "0.2.3" revision commit))
13986 (source
13987 (origin
13988 (method git-fetch)
13989 (uri (git-reference
13990 (url "https://github.com/rrwick/Porechop.git")
13991 (commit commit)))
13992 (file-name (git-file-name name version))
13993 (sha256
13994 (base32 "05ps43gig0d3ia9x5lj84lb00hbsl6ba9n7y7jz927npxbr2ym23"))))
13995 (build-system python-build-system)
13996 (home-page "https://github.com/rrwick/porechop")
13997 (synopsis "Finding, trimming or splitting adapters, in Oxford Nanopore reads")
13998 (description
13999 "The porechop package is a tool for finding and removing adapters from Oxford
14000 Nanopore reads. Adapters on the ends of reads are trimmed off, and when a read
14001 has an adapter in its middle, it is treated as chimeric and chopped into
14002 separate reads. Porechop performs thorough alignments to effectively find
14003 adapters, even at low sequence identity. Porechop also supports demultiplexing
14004 of Nanopore reads that were barcoded with the Native Barcoding Kit, PCR
14005 Barcoding Kit or Rapid Barcoding Kit.")
14006 (license license:gpl3+))))
14007
14008 (define-public poretools
14009 ;; The latest release was in 2016 and the latest commit is from 2017
14010 ;; the recommended way to install is to clone the git repository
14011 ;; https://poretools.readthedocs.io/en/latest/content/installation.html
14012 (let ((commit "e426b1f09e86ac259a00c261c79df91510777407")
14013 (revision "1"))
14014 (package
14015 (name "poretools")
14016 (version (git-version "0.6.0" revision commit))
14017 (source
14018 (origin
14019 (method git-fetch)
14020 (uri (git-reference
14021 (url "https://github.com/arq5x/poretools.git")
14022 (commit commit)))
14023 (file-name (git-file-name name version))
14024 (sha256
14025 (base32 "0bglj833wxpp3cq430p1d3xp085ls221js2y90w7ir2x5ay8l7am"))))
14026 (build-system python-build-system)
14027 ;; requires python >=2.7, <3.0, and the same for python dependencies
14028 (arguments `(#:python ,python-2))
14029 (inputs
14030 `(("hdf5" ,hdf5)))
14031 (propagated-inputs
14032 `(("python-dateutil" ,python2-dateutil)
14033 ("python-h5py" ,python2-h5py)
14034 ("python-matplotlib" ,python2-matplotlib)
14035 ("python-pandas" ,python2-pandas)
14036 ("python-seaborn" ,python2-seaborn)))
14037 (home-page "https://poretools.readthedocs.io")
14038 (synopsis "Toolkit for working with nanopore sequencing data")
14039 (description
14040 "The MinION from Oxford Nanopore Technologies is a nanopore sequencer.
14041 This @code{poretools} package is a flexible toolkit for exploring datasets
14042 generated by nanopore sequencing devices for the purposes of quality control and
14043 downstream analysis. Poretools operates directly on the native FAST5, a variant
14044 of the Hierarchical Data Format (HDF5) standard.")
14045 (license license:expat))))
14046
14047 (define-public r-absfiltergsea
14048 (package
14049 (name "r-absfiltergsea")
14050 (version "1.5.1")
14051 (source
14052 (origin
14053 (method url-fetch)
14054 (uri (cran-uri "AbsFilterGSEA" version))
14055 (sha256
14056 (base32 "15srxkxsvn38kd5frdrwfdf0ad8gskrd0h01wmdf9hglq8fjrp7w"))))
14057 (properties `((upstream-name . "AbsFilterGSEA")))
14058 (build-system r-build-system)
14059 (propagated-inputs
14060 `(("r-biobase" ,r-biobase)
14061 ("r-deseq" ,r-deseq)
14062 ("r-limma" ,r-limma)
14063 ("r-rcpp" ,r-rcpp)
14064 ("r-rcpparmadillo" ,r-rcpparmadillo)))
14065 (home-page "https://cran.r-project.org/web/packages/AbsFilterGSEA/")
14066 (synopsis "Improved false positive control of gene-permuting with absolute filtering")
14067 (description
14068 "This package provides a function that performs gene-permuting of a gene-set
14069 enrichment analysis (GSEA) calculation with or without the absolute filtering.
14070 Without filtering, users can perform (original) two-tailed or one-tailed
14071 absolute GSEA.")
14072 (license license:gpl2)))
14073
14074 (define-public jamm
14075 (package
14076 (name "jamm")
14077 (version "1.0.7.6")
14078 (source
14079 (origin
14080 (method git-fetch)
14081 (uri (git-reference
14082 (url "https://github.com/mahmoudibrahim/JAMM.git")
14083 (commit (string-append "JAMMv" version))))
14084 (file-name (git-file-name name version))
14085 (sha256
14086 (base32
14087 "0bsa5mf9n9q5jz7mmacrra41l7r8rac5vgsn6wv1fb52ya58b970"))))
14088 (build-system gnu-build-system)
14089 (arguments
14090 `(#:tests? #f ; there are none
14091 #:phases
14092 (modify-phases %standard-phases
14093 (delete 'configure)
14094 (delete 'build)
14095 (replace 'install
14096 (lambda* (#:key inputs outputs #:allow-other-keys)
14097 (let* ((out (assoc-ref outputs "out"))
14098 (libexec (string-append out "/libexec/jamm"))
14099 (bin (string-append out "/bin")))
14100 (substitute* '("JAMM.sh"
14101 "SignalGenerator.sh")
14102 (("^sPath=.*")
14103 (string-append "sPath=\"" libexec "\"\n")))
14104 (for-each (lambda (file)
14105 (install-file file libexec))
14106 (list "bincalculator.r"
14107 "peakfinder.r"
14108 "peakhelper.r"
14109 "signalmaker.r"
14110 "xcorr.r"
14111 "xcorrhelper.r"
14112 ;; Perl scripts
14113 "peakfilter.pl"
14114 "readshifter.pl"))
14115
14116 (for-each
14117 (lambda (script)
14118 (chmod script #o555)
14119 (install-file script bin)
14120 (wrap-program (string-append bin "/" script)
14121 `("PATH" ":" prefix
14122 (,(string-append (assoc-ref inputs "coreutils") "/bin")
14123 ,(string-append (assoc-ref inputs "gawk") "/bin")
14124 ,(string-append (assoc-ref inputs "perl") "/bin")
14125 ,(string-append (assoc-ref inputs "r-minimal") "/bin")))
14126 `("PERL5LIB" ":" prefix (,(getenv "PERL5LIB")))
14127 `("R_LIBS_SITE" ":" prefix (,(getenv "R_LIBS_SITE")))))
14128 (list "JAMM.sh" "SignalGenerator.sh")))
14129 #t)))))
14130 (inputs
14131 `(("bash" ,bash)
14132 ("coreutils" ,coreutils)
14133 ("gawk" ,gawk)
14134 ("perl" ,perl)
14135 ("r-minimal" ,r-minimal)
14136 ;;("r-parallel" ,r-parallel)
14137 ("r-signal" ,r-signal)
14138 ("r-mclust" ,r-mclust)))
14139 (home-page "https://github.com/mahmoudibrahim/JAMM")
14140 (synopsis "Peak finder for NGS datasets")
14141 (description
14142 "JAMM is a peak finder for next generation sequencing datasets (ChIP-Seq,
14143 ATAC-Seq, DNase-Seq, etc.) that can integrate replicates and assign peak
14144 boundaries accurately. JAMM is applicable to both broad and narrow
14145 datasets.")
14146 (license license:gpl3+)))
14147
14148 (define-public ngless
14149 (package
14150 (name "ngless")
14151 (version "1.1.0")
14152 (source
14153 (origin
14154 (method git-fetch)
14155 (uri (git-reference
14156 (url "https://gitlab.com/ngless/ngless.git")
14157 (commit (string-append "v" version))))
14158 (file-name (git-file-name name version))
14159 (sha256
14160 (base32
14161 "1wim8wpqyff080dfcazynrmjwqas38m24m0v350w245mmhrapdma"))))
14162 (build-system haskell-build-system)
14163 (arguments
14164 `(#:haddock? #f ; The haddock phase fails with: NGLess/CmdArgs.hs:20:1:
14165 ; error: parse error on input import
14166 ; import Options.Applicative
14167 #:phases
14168 (modify-phases %standard-phases
14169 (add-after 'unpack 'create-Versions.hs
14170 (lambda _
14171 (substitute* "Makefile"
14172 (("BWA_VERSION = .*")
14173 (string-append "BWA_VERSION = "
14174 ,(package-version bwa) "\n"))
14175 (("SAM_VERSION = .*")
14176 (string-append "SAM_VERSION = "
14177 ,(package-version samtools) "\n"))
14178 (("PRODIGAL_VERSION = .*")
14179 (string-append "PRODIGAL_VERSION = "
14180 ,(package-version prodigal) "\n"))
14181 (("MINIMAP2_VERSION = .*")
14182 (string-append "MINIMAP2_VERSION = "
14183 ,(package-version minimap2) "\n")))
14184 (invoke "make" "NGLess/Dependencies/Versions.hs")
14185 #t))
14186 (add-after 'create-Versions.hs 'create-cabal-file
14187 (lambda _ (invoke "hpack") #t))
14188 ;; These tools are expected to be installed alongside ngless.
14189 (add-after 'install 'link-tools
14190 (lambda* (#:key inputs outputs #:allow-other-keys)
14191 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
14192 (symlink (string-append (assoc-ref inputs "prodigal")
14193 "/bin/prodigal")
14194 (string-append bin "ngless-" ,version "-prodigal"))
14195 (symlink (string-append (assoc-ref inputs "minimap2")
14196 "/bin/minimap2")
14197 (string-append bin "ngless-" ,version "-minimap2"))
14198 (symlink (string-append (assoc-ref inputs "samtools")
14199 "/bin/samtools")
14200 (string-append bin "ngless-" ,version "-samtools"))
14201 (symlink (string-append (assoc-ref inputs "bwa")
14202 "/bin/bwa")
14203 (string-append bin "ngless-" ,version "-bwa"))
14204 #t))))))
14205 (inputs
14206 `(("prodigal" ,prodigal)
14207 ("bwa" ,bwa)
14208 ("samtools" ,samtools)
14209 ("minimap2" ,minimap2)
14210 ("ghc-aeson" ,ghc-aeson)
14211 ("ghc-ansi-terminal" ,ghc-ansi-terminal)
14212 ("ghc-async" ,ghc-async)
14213 ("ghc-atomic-write" ,ghc-atomic-write)
14214 ("ghc-bytestring-lexing" ,ghc-bytestring-lexing)
14215 ("ghc-conduit" ,ghc-conduit)
14216 ("ghc-conduit-algorithms" ,ghc-conduit-algorithms)
14217 ("ghc-conduit-extra" ,ghc-conduit-extra)
14218 ("ghc-configurator" ,ghc-configurator)
14219 ("ghc-convertible" ,ghc-convertible)
14220 ("ghc-data-default" ,ghc-data-default)
14221 ("ghc-diagrams-core" ,ghc-diagrams-core)
14222 ("ghc-diagrams-lib" ,ghc-diagrams-lib)
14223 ("ghc-diagrams-svg" ,ghc-diagrams-svg)
14224 ("ghc-double-conversion" ,ghc-double-conversion)
14225 ("ghc-edit-distance" ,ghc-edit-distance)
14226 ("ghc-either" ,ghc-either)
14227 ("ghc-errors" ,ghc-errors)
14228 ("ghc-extra" ,ghc-extra)
14229 ("ghc-filemanip" ,ghc-filemanip)
14230 ("ghc-file-embed" ,ghc-file-embed)
14231 ("ghc-gitrev" ,ghc-gitrev)
14232 ("ghc-hashtables" ,ghc-hashtables)
14233 ("ghc-http-conduit" ,ghc-http-conduit)
14234 ("ghc-inline-c" ,ghc-inline-c)
14235 ("ghc-inline-c-cpp" ,ghc-inline-c-cpp)
14236 ("ghc-intervalmap" ,ghc-intervalmap)
14237 ("ghc-missingh" ,ghc-missingh)
14238 ("ghc-optparse-applicative" ,ghc-optparse-applicative)
14239 ("ghc-regex" ,ghc-regex)
14240 ("ghc-safe" ,ghc-safe)
14241 ("ghc-safeio" ,ghc-safeio)
14242 ("ghc-strict" ,ghc-strict)
14243 ("ghc-tar" ,ghc-tar)
14244 ("ghc-tar-conduit" ,ghc-tar-conduit)
14245 ("ghc-unliftio" ,ghc-unliftio)
14246 ("ghc-unliftio-core" ,ghc-unliftio-core)
14247 ("ghc-vector" ,ghc-vector)
14248 ("ghc-yaml" ,ghc-yaml)
14249 ("ghc-zlib" ,ghc-zlib)))
14250 (propagated-inputs
14251 `(("r-r6" ,r-r6)
14252 ("r-hdf5r" ,r-hdf5r)
14253 ("r-iterators" ,r-iterators)
14254 ("r-itertools" ,r-itertools)
14255 ("r-matrix" ,r-matrix)))
14256 (native-inputs
14257 `(("ghc-hpack" ,ghc-hpack)
14258 ("ghc-quickcheck" ,ghc-quickcheck)
14259 ("ghc-test-framework" ,ghc-test-framework)
14260 ("ghc-test-framework-hunit",ghc-test-framework-hunit)
14261 ("ghc-test-framework-quickcheck2" ,ghc-test-framework-quickcheck2)
14262 ("ghc-test-framework-th" ,ghc-test-framework-th)))
14263 (home-page "https://gitlab.com/ngless/ngless")
14264 (synopsis "DSL for processing next-generation sequencing data")
14265 (description "Ngless is a domain-specific language for
14266 @dfn{next-generation sequencing} (NGS) data processing.")
14267 (license license:expat)))
14268
14269 (define-public filtlong
14270 ;; The recommended way to install is to clone the git repository
14271 ;; https://github.com/rrwick/Filtlong#installation
14272 ;; and the lastest release is more than nine months old
14273 (let ((commit "d1bb46dfe8bc7efe6257b5ce222c04bfe8aedaab")
14274 (revision "1"))
14275 (package
14276 (name "filtlong")
14277 (version (git-version "0.2.0" revision commit))
14278 (source
14279 (origin
14280 (method git-fetch)
14281 (uri (git-reference
14282 (url "https://github.com/rrwick/Filtlong.git")
14283 (commit commit)))
14284 (file-name (git-file-name name version))
14285 (sha256
14286 (base32 "1xr92r820x8qlkcr3b57iw223yq8vjgyi42jr79w2xgw47qzr575"))))
14287 (build-system gnu-build-system)
14288 (arguments
14289 `(#:tests? #f ; no check target
14290 #:phases
14291 (modify-phases %standard-phases
14292 (delete 'configure)
14293 (replace 'install
14294 (lambda* (#:key outputs #:allow-other-keys)
14295 (let* ((out (assoc-ref outputs "out"))
14296 (bin (string-append out "/bin"))
14297 (scripts (string-append out "/share/filtlong/scripts")))
14298 (install-file "bin/filtlong" bin)
14299 (install-file "scripts/histogram.py" scripts)
14300 (install-file "scripts/read_info_histograms.sh" scripts))
14301 #t))
14302 (add-after 'install 'wrap-program
14303 (lambda* (#:key inputs outputs #:allow-other-keys)
14304 (let* ((out (assoc-ref outputs "out"))
14305 (path (getenv "PYTHONPATH")))
14306 (wrap-program (string-append out
14307 "/share/filtlong/scripts/histogram.py")
14308 `("PYTHONPATH" ":" prefix (,path))))
14309 #t))
14310 (add-before 'check 'patch-tests
14311 (lambda _
14312 (substitute* "scripts/read_info_histograms.sh"
14313 (("awk") (which "gawk")))
14314 #t)))))
14315 (inputs
14316 `(("gawk" ,gawk) ;for read_info_histograms.sh
14317 ("python" ,python-2) ;required for histogram.py
14318 ("zlib" ,zlib)))
14319 (home-page "https://github.com/rrwick/Filtlong/")
14320 (synopsis "Tool for quality filtering of Nanopore and PacBio data")
14321 (description
14322 "The Filtlong package is a tool for filtering long reads by quality.
14323 It can take a set of long reads and produce a smaller, better subset. It uses
14324 both read length (longer is better) and read identity (higher is better) when
14325 choosing which reads pass the filter.")
14326 (license (list license:gpl3 ;filtlong
14327 license:asl2.0))))) ;histogram.py
14328
14329 (define-public nanopolish
14330 ;; The recommended way to install is to clone the git repository
14331 ;; <https://github.com/jts/nanopolish#installing-a-particular-release>.
14332 ;; Also, the differences between release and current version seem to be
14333 ;; significant.
14334 (let ((commit "6331dc4f15b9dfabb954ba3fae9d76b6c3ca6377")
14335 (revision "1"))
14336 (package
14337 (name "nanopolish")
14338 (version (git-version "0.11.1" revision commit))
14339 (source
14340 (origin
14341 (method git-fetch)
14342 (uri (git-reference
14343 (url "https://github.com/jts/nanopolish.git")
14344 (commit commit)
14345 (recursive? #t)))
14346 (file-name (git-file-name name version))
14347 (sha256
14348 (base32 "15ikl3d37y49pwd7vx36xksgsqajhf24q7qqsnpl15dqqyy5qgbc"))
14349 (modules '((guix build utils)))
14350 (snippet
14351 '(begin
14352 (delete-file-recursively "htslib")
14353 #t))))
14354 (build-system gnu-build-system)
14355 (arguments
14356 `(#:make-flags
14357 `("HDF5=noinstall" "EIGEN=noinstall" "HTS=noinstall" "CC=gcc")
14358 #:tests? #f ; no check target
14359 #:phases
14360 (modify-phases %standard-phases
14361 (add-after 'unpack 'find-eigen
14362 (lambda* (#:key inputs #:allow-other-keys)
14363 (setenv "CPATH"
14364 (string-append (assoc-ref inputs "eigen")
14365 "/include/eigen3:"
14366 (or (getenv "CPATH") "")))
14367 #t))
14368 (delete 'configure)
14369 (replace 'install
14370 (lambda* (#:key outputs #:allow-other-keys)
14371 (let* ((out (assoc-ref outputs "out"))
14372 (bin (string-append out "/bin"))
14373 (scripts (string-append out "/share/nanopolish/scripts")))
14374
14375 (install-file "nanopolish" bin)
14376 (for-each (lambda (file) (install-file file scripts))
14377 (find-files "scripts" ".*"))
14378 #t)))
14379 (add-after 'install 'wrap-programs
14380 (lambda* (#:key outputs #:allow-other-keys)
14381 (for-each (lambda (file)
14382 (wrap-program file `("PYTHONPATH" ":" prefix (,path))))
14383 (find-files "/share/nanopolish/scripts" "\\.py"))
14384 (for-each (lambda (file)
14385 (wrap-program file `("PERL5LIB" ":" prefix (,path))))
14386 (find-files "/share/nanopolish/scripts" "\\.pl"))
14387 #t)))))
14388 (inputs
14389 `(("eigen" ,eigen)
14390 ("hdf5" ,hdf5)
14391 ("htslib" ,htslib)
14392 ("perl" ,perl)
14393 ("python" ,python-wrapper)
14394 ("python-biopython" ,python-biopython)
14395 ("python-numpy" ,python-numpy)
14396 ("python-pysam" ,python-pysam)
14397 ("python-scikit-learn" , python-scikit-learn)
14398 ("python-scipy" ,python-scipy)
14399 ("zlib" ,zlib)))
14400 (home-page "https://github.com/jts/nanopolish")
14401 (synopsis "Signal-level analysis of Oxford Nanopore sequencing data")
14402 (description
14403 "This package analyses the Oxford Nanopore sequencing data at signal-level.
14404 Nanopolish can calculate an improved consensus sequence for a draft genome
14405 assembly, detect base modifications, call SNPs (Single nucleotide
14406 polymorphisms) and indels with respect to a reference genome and more.")
14407 (license license:expat))))
14408
14409 (define-public cnvkit
14410 (package
14411 (name "cnvkit")
14412 (version "0.9.5")
14413 (source
14414 (origin
14415 (method git-fetch)
14416 (uri (git-reference
14417 (url "https://github.com/etal/cnvkit.git")
14418 (commit (string-append "v" version))))
14419 (file-name (git-file-name name version))
14420 (sha256
14421 (base32 "0g2f78k68yglmj4fsfmgs8idqv3di9aj53fg0ld0hqljg8chhh82"))))
14422 (build-system python-build-system)
14423 (propagated-inputs
14424 `(("python-biopython" ,python-biopython)
14425 ("python-future" ,python-future)
14426 ("python-matplotlib" ,python-matplotlib)
14427 ("python-numpy" ,python-numpy)
14428 ("python-reportlab" ,python-reportlab)
14429 ("python-pandas" ,python-pandas)
14430 ("python-pysam" ,python-pysam)
14431 ("python-pyfaidx" ,python-pyfaidx)
14432 ("python-scipy" ,python-scipy)
14433 ;; R packages
14434 ("r-dnacopy" ,r-dnacopy)))
14435 (home-page "https://cnvkit.readthedocs.org/")
14436 (synopsis "Copy number variant detection from targeted DNA sequencing")
14437 (description
14438 "CNVkit is a Python library and command-line software toolkit to infer
14439 and visualize copy number from high-throughput DNA sequencing data. It is
14440 designed for use with hybrid capture, including both whole-exome and custom
14441 target panels, and short-read sequencing platforms such as Illumina and Ion
14442 Torrent.")
14443 (license license:asl2.0)))
14444
14445 (define-public python-pyfit-sne
14446 (package
14447 (name "python-pyfit-sne")
14448 (version "1.0.1")
14449 (source
14450 (origin
14451 (method git-fetch)
14452 (uri (git-reference
14453 (url "https://github.com/KlugerLab/pyFIt-SNE.git")
14454 (commit version)))
14455 (file-name (git-file-name name version))
14456 (sha256
14457 (base32 "13wh3qkzs56azmmgnxib6xfr29g7xh09sxylzjpni5j0pp0rc5qw"))))
14458 (build-system python-build-system)
14459 (propagated-inputs
14460 `(("python-numpy" ,python-numpy)))
14461 (inputs
14462 `(("fftw" ,fftw)))
14463 (native-inputs
14464 `(("python-cython" ,python-cython)))
14465 (home-page "https://github.com/KlugerLab/pyFIt-SNE")
14466 (synopsis "FFT-accelerated Interpolation-based t-SNE")
14467 (description
14468 "t-Stochastic Neighborhood Embedding (t-SNE) is a highly successful
14469 method for dimensionality reduction and visualization of high dimensional
14470 datasets. A popular implementation of t-SNE uses the Barnes-Hut algorithm to
14471 approximate the gradient at each iteration of gradient descent. This package
14472 is a Cython wrapper for FIt-SNE.")
14473 (license license:bsd-4)))
14474
14475 (define-public bbmap
14476 (package
14477 (name "bbmap")
14478 (version "35.82")
14479 (source (origin
14480 (method url-fetch)
14481 (uri (string-append
14482 "mirror://sourceforge/bbmap/BBMap_" version ".tar.gz"))
14483 (sha256
14484 (base32
14485 "1q4rfhxcb6z3gm8zg2davjz98w22lkf4hm9ikxz9kdl93pil3wkd"))))
14486 (build-system ant-build-system)
14487 (arguments
14488 `(#:build-target "dist"
14489 #:tests? #f ; there are none
14490 #:make-flags
14491 (list (string-append "-Dmpijar="
14492 (assoc-ref %build-inputs "java-openmpi")
14493 "/lib/mpi.jar"))
14494 #:modules ((guix build ant-build-system)
14495 (guix build utils)
14496 (guix build java-utils))
14497 #:phases
14498 (modify-phases %standard-phases
14499 (add-after 'build 'build-jni-library
14500 (lambda _
14501 (with-directory-excursion "jni"
14502 (invoke "make" "-f" "makefile.linux"))))
14503 ;; There is no install target
14504 (replace 'install (install-jars "dist"))
14505 (add-after 'install 'install-scripts-and-documentation
14506 (lambda* (#:key outputs #:allow-other-keys)
14507 (substitute* "calcmem.sh"
14508 (("\\| awk ") (string-append "| " (which "awk") " ")))
14509 (let* ((scripts (find-files "." "\\.sh$"))
14510 (out (assoc-ref outputs "out"))
14511 (bin (string-append out "/bin"))
14512 (doc (string-append out "/share/doc/bbmap"))
14513 (jni (string-append out "/lib/jni")))
14514 (substitute* scripts
14515 (("\\$DIR\"\"docs") doc)
14516 (("^CP=.*")
14517 (string-append "CP=" out "/share/java/BBTools.jar\n"))
14518 (("^NATIVELIBDIR.*")
14519 (string-append "NATIVELIBDIR=" jni "\n"))
14520 (("CMD=\"java")
14521 (string-append "CMD=\"" (which "java"))))
14522 (for-each (lambda (script) (install-file script bin)) scripts)
14523
14524 ;; Install JNI library
14525 (install-file "jni/libbbtoolsjni.so" jni)
14526
14527 ;; Install documentation
14528 (install-file "docs/readme.txt" doc)
14529 (copy-recursively "docs/guides" doc))
14530 #t)))
14531 #:jdk ,openjdk11))
14532 (inputs
14533 `(("gawk" ,gawk)
14534 ("java-eclipse-jdt-core" ,java-eclipse-jdt-core)
14535 ("java-eclipse-jdt-compiler-apt" ,java-eclipse-jdt-compiler-apt)
14536 ("java-openmpi" ,java-openmpi)))
14537 (home-page "http://sourceforge.net/projects/bbmap/")
14538 (synopsis "Aligner and other tools for short sequencing reads")
14539 (description
14540 "This package provides bioinformatic tools to align, deduplicate,
14541 reformat, filter and normalize DNA and RNA-seq data. It includes the
14542 following tools: BBMap, a short read aligner for DNA and RNA-seq data; BBNorm,
14543 a kmer-based error-correction and normalization tool; Dedupe, a tool to
14544 simplify assemblies by removing duplicate or contained subsequences that share
14545 a target percent identity; Reformat, to convert reads between
14546 fasta/fastq/scarf/fasta+qual/sam, interleaved/paired, and ASCII-33/64, at over
14547 500 MB/s; and BBDuk, a tool to filter, trim, or mask reads with kmer matches
14548 to an artifact/contaminant file.")
14549 (license license:bsd-3)))
14550
14551 (define-public velvet
14552 (package
14553 (name "velvet")
14554 (version "1.2.10")
14555 (source (origin
14556 (method url-fetch)
14557 (uri (string-append "https://www.ebi.ac.uk/~zerbino/velvet/"
14558 "velvet_" version ".tgz"))
14559 (sha256
14560 (base32
14561 "0h3njwy66p6bx14r3ar1byb0ccaxmxka4c65rn4iybyiqa4d8kc8"))
14562 ;; Delete bundled libraries
14563 (modules '((guix build utils)))
14564 (snippet
14565 '(begin
14566 (delete-file "Manual.pdf")
14567 (delete-file-recursively "third-party")
14568 #t))))
14569 (build-system gnu-build-system)
14570 (arguments
14571 `(#:make-flags '("OPENMP=t")
14572 #:test-target "test"
14573 #:phases
14574 (modify-phases %standard-phases
14575 (delete 'configure)
14576 (add-after 'unpack 'fix-zlib-include
14577 (lambda _
14578 (substitute* "src/binarySequences.c"
14579 (("../third-party/zlib-1.2.3/zlib.h") "zlib.h"))
14580 #t))
14581 (replace 'install
14582 (lambda* (#:key outputs #:allow-other-keys)
14583 (let* ((out (assoc-ref outputs "out"))
14584 (bin (string-append out "/bin"))
14585 (doc (string-append out "/share/doc/velvet")))
14586 (mkdir-p bin)
14587 (mkdir-p doc)
14588 (install-file "velveth" bin)
14589 (install-file "velvetg" bin)
14590 (install-file "Manual.pdf" doc)
14591 (install-file "Columbus_manual.pdf" doc)
14592 #t))))))
14593 (inputs
14594 `(("openmpi" ,openmpi)
14595 ("zlib" ,zlib)))
14596 (native-inputs
14597 `(("texlive" ,(texlive-union (list texlive-latex-graphics
14598 texlive-latex-hyperref)))))
14599 (home-page "https://www.ebi.ac.uk/~zerbino/velvet/")
14600 (synopsis "Nucleic acid sequence assembler for very short reads")
14601 (description
14602 "Velvet is a de novo genomic assembler specially designed for short read
14603 sequencing technologies, such as Solexa or 454. Velvet currently takes in
14604 short read sequences, removes errors then produces high quality unique
14605 contigs. It then uses paired read information, if available, to retrieve the
14606 repeated areas between contigs.")
14607 (license license:gpl2+)))
14608
14609 (define-public python-velocyto
14610 (package
14611 (name "python-velocyto")
14612 (version "0.17.17")
14613 (source
14614 (origin
14615 (method url-fetch)
14616 (uri (pypi-uri "velocyto" version))
14617 (sha256
14618 (base32
14619 "0fgygyzqgrq32dv6a00biq1p1cwi6kbl5iqblxq1kklj6b2mzmhs"))))
14620 (build-system python-build-system)
14621 (native-inputs
14622 `(("python-joblib" ,python-joblib)))
14623 (propagated-inputs
14624 `(("python-click" ,python-click)
14625 ("python-cython" ,python-cython)
14626 ("python-h5py" ,python-h5py)
14627 ("python-loompy" ,python-loompy)
14628 ("python-matplotlib" ,python-matplotlib)
14629 ("python-numba" ,python-numba)
14630 ("python-numpy" ,python-numpy)
14631 ("python-pandas" ,python-pandas)
14632 ("python-pysam" ,python-pysam)
14633 ("python-scikit-learn" ,python-scikit-learn)
14634 ("python-scipy" ,python-scipy)))
14635 (home-page "https://github.com/velocyto-team/velocyto.py")
14636 (synopsis "RNA velocity analysis for single cell RNA-seq data")
14637 (description
14638 "Velocyto is a library for the analysis of RNA velocity. Velocyto
14639 includes a command line tool and an analysis pipeline.")
14640 (license license:bsd-2)))
14641
14642 (define-public arriba
14643 (package
14644 (name "arriba")
14645 (version "1.0.1")
14646 (source
14647 (origin
14648 (method url-fetch)
14649 (uri (string-append "https://github.com/suhrig/arriba/releases/"
14650 "download/v" version "/arriba_v" version ".tar.gz"))
14651 (sha256
14652 (base32
14653 "0jx9656ry766vb8z08m1c3im87b0c82qpnjby9wz4kcz8vn87dx2"))))
14654 (build-system gnu-build-system)
14655 (arguments
14656 `(#:tests? #f ; there are none
14657 #:phases
14658 (modify-phases %standard-phases
14659 (replace 'configure
14660 (lambda* (#:key inputs #:allow-other-keys)
14661 (let ((htslib (assoc-ref inputs "htslib")))
14662 (substitute* "Makefile"
14663 (("-I\\$\\(HTSLIB\\)/htslib")
14664 (string-append "-I" htslib "/include/htslib"))
14665 ((" \\$\\(HTSLIB\\)/libhts.a")
14666 (string-append " " htslib "/lib/libhts.so"))))
14667 (substitute* "run_arriba.sh"
14668 (("^STAR ") (string-append (which "STAR") " "))
14669 (("samtools --version-only")
14670 (string-append (which "samtools") " --version-only"))
14671 (("samtools index")
14672 (string-append (which "samtools") " index"))
14673 (("samtools sort")
14674 (string-append (which "samtools") " sort")))
14675 #t))
14676 (replace 'install
14677 (lambda* (#:key outputs #:allow-other-keys)
14678 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
14679 (install-file "arriba" bin)
14680 (install-file "run_arriba.sh" bin)
14681 (install-file "draw_fusions.R" bin)
14682 (wrap-program (string-append bin "/draw_fusions.R")
14683 `("R_LIBS_SITE" ":" prefix (,(getenv "R_LIBS_SITE")))))
14684 #t)))))
14685 (inputs
14686 `(("htslib" ,htslib)
14687 ("r-minimal" ,r-minimal)
14688 ("r-circlize" ,r-circlize)
14689 ("r-genomicalignments" ,r-genomicalignments)
14690 ("r-genomicranges" ,r-genomicranges)
14691 ("samtools" ,samtools)
14692 ("star" ,star)
14693 ("zlib" ,zlib)))
14694 (home-page "https://github.com/suhrig/arriba")
14695 (synopsis "Gene fusion detection from RNA-Seq data ")
14696 (description
14697 "Arriba is a command-line tool for the detection of gene fusions from
14698 RNA-Seq data. It was developed for the use in a clinical research setting.
14699 Therefore, short runtimes and high sensitivity were important design criteria.
14700 It is based on the fast STAR aligner and the post-alignment runtime is
14701 typically just around two minutes. In contrast to many other fusion detection
14702 tools which build on STAR, Arriba does not require to reduce the
14703 @code{alignIntronMax} parameter of STAR to detect small deletions.")
14704 ;; All code is under the Expat license with the exception of
14705 ;; "draw_fusions.R", which is under GPLv3.
14706 (license (list license:expat license:gpl3))))
14707
14708 (define-public adapterremoval
14709 (package
14710 (name "adapterremoval")
14711 (version "2.3.0")
14712 (source
14713 (origin
14714 (method git-fetch)
14715 (uri (git-reference
14716 (url "https://github.com/MikkelSchubert/adapterremoval.git")
14717 (commit (string-append "v" version))))
14718 (file-name (git-file-name name version))
14719 (sha256
14720 (base32
14721 "1nf3ki5pfzalhrx2fr1y6pfqfi133yj2m7q4fj9irf5fb94bapwr"))))
14722 (build-system gnu-build-system)
14723 (arguments
14724 `(#:make-flags (list "COLOR_BUILD=no"
14725 (string-append "PREFIX="
14726 (assoc-ref %outputs "out")))
14727 #:test-target "test"
14728 #:phases
14729 (modify-phases %standard-phases
14730 (delete 'configure))))
14731 (inputs
14732 `(("zlib" ,zlib)))
14733 (home-page "https://adapterremoval.readthedocs.io/")
14734 (synopsis "Rapid sequence adapter trimming, identification, and read merging")
14735 (description
14736 "This program searches for and removes remnant adapter sequences from
14737 @dfn{High-Throughput Sequencing} (HTS) data and (optionally) trims low quality
14738 bases from the 3' end of reads following adapter removal. AdapterRemoval can
14739 analyze both single end and paired end data, and can be used to merge
14740 overlapping paired-ended reads into (longer) consensus sequences.
14741 Additionally, the AdapterRemoval may be used to recover a consensus adapter
14742 sequence for paired-ended data, for which this information is not available.")
14743 (license license:gpl3+)))
14744
14745 (define-public pplacer
14746 (let ((commit "807f6f3"))
14747 (package
14748 (name "pplacer")
14749 ;; The commit should be updated with each version change.
14750 (version "1.1.alpha19")
14751 (source
14752 (origin
14753 (method git-fetch)
14754 (uri (git-reference
14755 (url "https://github.com/matsen/pplacer.git")
14756 (commit (string-append "v" version))))
14757 (file-name (git-file-name name version))
14758 (sha256
14759 (base32 "11ppbbbx20p2g9wj3ff64dhnarb12q79v7qh4rk0gj6lkbz4n7cn"))))
14760 (build-system ocaml-build-system)
14761 (arguments
14762 `(#:modules ((guix build ocaml-build-system)
14763 (guix build utils)
14764 (ice-9 ftw))
14765 #:phases
14766 (modify-phases %standard-phases
14767 (delete 'configure)
14768 (add-after 'unpack 'fix-build-with-latest-ocaml
14769 (lambda _
14770 (substitute* "myocamlbuild.ml"
14771 (("dep \\[\"c_pam\"\\]" m)
14772 (string-append "flag [\"ocaml\"; \"compile\"] (A \"-unsafe-string\");\n"
14773 m))
14774 (("let run_and_read" m)
14775 (string-append "
14776 let split s ch =
14777 let x = ref [] in
14778 let rec go s =
14779 let pos = String.index s ch in
14780 x := (String.before s pos)::!x;
14781 go (String.after s (pos + 1))
14782 in
14783 try go s
14784 with Not_found -> !x
14785 let split_nl s = split s '\\n'
14786 let before_space s =
14787 try String.before s (String.index s ' ')
14788 with Not_found -> s
14789
14790 " m))
14791 (("run_and_read \"ocamlfind list \\| cut -d' ' -f1\"" m)
14792 (string-append "List.map before_space (split_nl & " m ")"))
14793 ((" blank_sep_strings &") "")
14794 ((" Lexing.from_string &") ""))
14795 #t))
14796 (add-after 'unpack 'replace-bundled-cddlib
14797 (lambda* (#:key inputs #:allow-other-keys)
14798 (let* ((cddlib-src (assoc-ref inputs "cddlib-src"))
14799 (local-dir "cddlib_guix"))
14800 (mkdir local-dir)
14801 (with-directory-excursion local-dir
14802 (invoke "tar" "xvf" cddlib-src))
14803 (let ((cddlib-src-folder
14804 (string-append local-dir "/"
14805 (list-ref (scandir local-dir) 2)
14806 "/lib-src")))
14807 (for-each make-file-writable (find-files "cdd_src" ".*"))
14808 (for-each
14809 (lambda (file)
14810 (copy-file file
14811 (string-append "cdd_src/" (basename file))))
14812 (find-files cddlib-src-folder ".*[ch]$")))
14813 #t)))
14814 (add-after 'unpack 'fix-makefile
14815 (lambda _
14816 ;; Remove system calls to 'git'.
14817 (substitute* "Makefile"
14818 (("^DESCRIPT:=pplacer-.*")
14819 (string-append
14820 "DESCRIPT:=pplacer-$(shell uname)-v" ,version "\n")))
14821 (substitute* "myocamlbuild.ml"
14822 (("git describe --tags --long .*\\\" with")
14823 (string-append
14824 "echo -n v" ,version "-" ,commit "\" with")))
14825 #t))
14826 (replace 'install
14827 (lambda* (#:key outputs #:allow-other-keys)
14828 (let* ((out (assoc-ref outputs "out"))
14829 (bin (string-append out "/bin")))
14830 (copy-recursively "bin" bin))
14831 #t)))
14832 #:ocaml ,ocaml-4.07
14833 #:findlib ,ocaml4.07-findlib))
14834 (inputs
14835 `(("zlib" ,zlib "static")
14836 ("gsl" ,gsl)
14837 ("ocaml-ounit" ,(package-with-ocaml4.07 ocaml-ounit))
14838 ("ocaml-batteries" ,(package-with-ocaml4.07 ocaml-batteries))
14839 ("ocaml-camlzip" ,(package-with-ocaml4.07 camlzip))
14840 ("ocaml-csv" ,(package-with-ocaml4.07 ocaml-csv))
14841 ("ocaml-sqlite3" ,ocaml4.07-sqlite3)
14842 ("ocaml-xmlm" ,(package-with-ocaml4.07 ocaml-xmlm))
14843 ("ocaml-mcl" ,(package-with-ocaml4.07 ocaml-mcl))
14844 ("ocaml-gsl" ,ocaml4.07-gsl-1)))
14845 (native-inputs
14846 `(("cddlib-src" ,(package-source cddlib))
14847 ("ocamlbuild" ,(package-with-ocaml4.07 ocamlbuild))
14848 ("pkg-config" ,pkg-config)))
14849 (propagated-inputs
14850 `(("pplacer-scripts" ,pplacer-scripts)))
14851 (synopsis "Phylogenetic placement of biological sequences")
14852 (description
14853 "Pplacer places query sequences on a fixed reference phylogenetic tree
14854 to maximize phylogenetic likelihood or posterior probability according to a
14855 reference alignment. Pplacer is designed to be fast, to give useful
14856 information about uncertainty, and to offer advanced visualization and
14857 downstream analysis.")
14858 (home-page "https://matsen.fhcrc.org/pplacer/")
14859 (license license:gpl3))))
14860
14861 ;; This package is installed alongside 'pplacer'. It is a separate package so
14862 ;; that it can use the python-build-system for the scripts that are
14863 ;; distributed alongside the main OCaml binaries.
14864 (define pplacer-scripts
14865 (package
14866 (inherit pplacer)
14867 (name "pplacer-scripts")
14868 (build-system python-build-system)
14869 (arguments
14870 `(#:python ,python-2
14871 #:phases
14872 (modify-phases %standard-phases
14873 (add-after 'unpack 'enter-scripts-dir
14874 (lambda _ (chdir "scripts") #t))
14875 (replace 'check
14876 (lambda _ (invoke "python" "-m" "unittest" "discover" "-v") #t))
14877 (add-after 'install 'wrap-executables
14878 (lambda* (#:key inputs outputs #:allow-other-keys)
14879 (let* ((out (assoc-ref outputs "out"))
14880 (bin (string-append out "/bin")))
14881 (let ((path (string-append
14882 (assoc-ref inputs "hmmer") "/bin:"
14883 (assoc-ref inputs "infernal") "/bin")))
14884 (display path)
14885 (wrap-program (string-append bin "/refpkg_align.py")
14886 `("PATH" ":" prefix (,path))))
14887 (let ((path (string-append
14888 (assoc-ref inputs "hmmer") "/bin")))
14889 (wrap-program (string-append bin "/hrefpkg_query.py")
14890 `("PATH" ":" prefix (,path)))))
14891 #t)))))
14892 (inputs
14893 `(("infernal" ,infernal)
14894 ("hmmer" ,hmmer)))
14895 (propagated-inputs
14896 `(("python-biopython" ,python2-biopython)
14897 ("taxtastic" ,taxtastic)))
14898 (synopsis "Pplacer Python scripts")))
14899
14900 (define-public python2-checkm-genome
14901 (package
14902 (name "python2-checkm-genome")
14903 (version "1.0.13")
14904 (source
14905 (origin
14906 (method url-fetch)
14907 (uri (pypi-uri "checkm-genome" version))
14908 (sha256
14909 (base32
14910 "0bm8gpxjmzxsxxl8lzwqhgx8g1dlnmp6znz7wv3hgb0gdjbf9dzz"))))
14911 (build-system python-build-system)
14912 (arguments
14913 `(#:python ,python-2
14914 #:tests? #f)) ; some tests are interactive
14915 (propagated-inputs
14916 `(("python-dendropy" ,python2-dendropy)
14917 ("python-matplotlib" ,python2-matplotlib)
14918 ("python-numpy" ,python2-numpy)
14919 ("python-pysam" ,python2-pysam)
14920 ("python-scipy" ,python2-scipy)))
14921 (home-page "https://pypi.org/project/Checkm/")
14922 (synopsis "Assess the quality of putative genome bins")
14923 (description
14924 "CheckM provides a set of tools for assessing the quality of genomes
14925 recovered from isolates, single cells, or metagenomes. It provides robust
14926 estimates of genome completeness and contamination by using collocated sets of
14927 genes that are ubiquitous and single-copy within a phylogenetic lineage.
14928 Assessment of genome quality can also be examined using plots depicting key
14929 genomic characteristics (e.g., GC, coding density) which highlight sequences
14930 outside the expected distributions of a typical genome. CheckM also provides
14931 tools for identifying genome bins that are likely candidates for merging based
14932 on marker set compatibility, similarity in genomic characteristics, and
14933 proximity within a reference genome.")
14934 (license license:gpl3+)))
14935
14936 (define-public umi-tools
14937 (package
14938 (name "umi-tools")
14939 (version "1.0.0")
14940 (source
14941 (origin
14942 (method url-fetch)
14943 (uri (pypi-uri "umi_tools" version))
14944 (sha256
14945 (base32
14946 "08y3vz1vcx09whmbsn722lcs6jl9wyrh9i4p3k8j4cb1i32bij4a"))))
14947 (build-system python-build-system)
14948 (inputs
14949 `(("python-pandas" ,python-pandas)
14950 ("python-future" ,python-future)
14951 ("python-scipy" ,python-scipy)
14952 ("python-matplotlib" ,python-matplotlib)
14953 ("python-regex" ,python-regex)
14954 ("python-pysam" ,python-pysam)))
14955 (native-inputs
14956 `(("python-cython" ,python-cython)))
14957 (home-page "https://github.com/CGATOxford/UMI-tools")
14958 (synopsis "Tools for analyzing unique modular identifiers")
14959 (description "This package provides tools for dealing with @dfn{Unique
14960 Molecular Identifiers} (UMIs) and @dfn{Random Molecular Tags} (RMTs) in
14961 genetic sequences. There are six tools: the @code{extract} and
14962 @code{whitelist} commands are used to prepare a fastq containing UMIs @code{+/-}
14963 cell barcodes for alignment. The remaining commands, @code{group},
14964 @code{dedup}, and @{count}/@code{count_tab}, are used to identify PCR
14965 duplicates using the UMIs and perform different levels of analysis depending
14966 on the needs of the user.")
14967 (license license:expat)))
14968
14969 (define-public ataqv
14970 (package
14971 (name "ataqv")
14972 (version "1.0.0")
14973 (source
14974 (origin
14975 (method git-fetch)
14976 (uri (git-reference
14977 (url "https://github.com/ParkerLab/ataqv.git")
14978 (commit version)))
14979 (file-name (git-file-name name version))
14980 (sha256
14981 (base32
14982 "031xr6jx1aprh26y5b1lv3gzrlmzg4alfl73vvshymx8cq8asrqi"))))
14983 (build-system gnu-build-system)
14984 (arguments
14985 `(#:make-flags
14986 (list (string-append "prefix=" (assoc-ref %outputs "out"))
14987 (string-append "BOOST_ROOT="
14988 (assoc-ref %build-inputs "boost"))
14989 (string-append "HTSLIB_ROOT="
14990 (assoc-ref %build-inputs "htslib")))
14991 #:test-target "test"
14992 #:phases
14993 (modify-phases %standard-phases
14994 (delete 'configure))))
14995 (inputs
14996 `(("boost" ,boost)
14997 ("htslib" ,htslib)
14998 ("ncurses" ,ncurses)
14999 ("zlib" ,zlib)))
15000 (native-inputs
15001 `(("lcov" ,lcov)))
15002 (home-page "https://github.com/ParkerLab/ataqv")
15003 (synopsis "Toolkit for quality control and visualization of ATAC-seq data")
15004 (description "This package provides a toolkit for measuring and comparing
15005 ATAC-seq results. It was written to make it easier to spot differences that
15006 might be caused by ATAC-seq library prep or sequencing. The main program,
15007 @code{ataqv}, examines aligned reads and reports some basic metrics.")
15008 (license license:gpl3+)))
15009
15010 (define-public r-psiplot
15011 (package
15012 (name "r-psiplot")
15013 (version "2.3.0")
15014 (source
15015 (origin
15016 (method git-fetch)
15017 (uri (git-reference
15018 (url "https://github.com/kcha/psiplot.git")
15019 (commit (string-append "v" version))))
15020 (file-name (git-file-name name version))
15021 (sha256
15022 (base32 "08438h16cfry5kqh3y9hs8q1b1a8bxhblsm75knviz5r6q0n1jxh"))))
15023 (build-system r-build-system)
15024 (propagated-inputs
15025 `(("r-mass" ,r-mass)
15026 ("r-dplyr" ,r-dplyr)
15027 ("r-tidyr" ,r-tidyr)
15028 ("r-purrr" ,r-purrr)
15029 ("r-readr" ,r-readr)
15030 ("r-magrittr" ,r-magrittr)
15031 ("r-ggplot2" ,r-ggplot2)))
15032 (home-page "https://github.com/kcha/psiplot")
15033 (synopsis "Plot percent spliced-in values of alternatively-spliced exons")
15034 (description
15035 "PSIplot is an R package for generating plots of @dfn{percent
15036 spliced-in} (PSI) values of alternatively-spliced exons that were computed by
15037 vast-tools, an RNA-Seq pipeline for alternative splicing analysis. The plots
15038 are generated using @code{ggplot2}.")
15039 (license license:expat)))
15040
15041 (define-public python-ont-fast5-api
15042 (package
15043 (name "python-ont-fast5-api")
15044 (version "1.4.4")
15045 (source
15046 (origin
15047 (method git-fetch)
15048 (uri (git-reference
15049 (url "https://github.com/nanoporetech/ont_fast5_api.git")
15050 (commit (string-append "release_" version))))
15051 (file-name (git-file-name name version))
15052 (sha256
15053 (base32
15054 "03cbq4zbbwhll8ml2m9k8sa31mirsvcbjkrq1yna0kkzz9fad5fm"))))
15055 (build-system python-build-system)
15056 (propagated-inputs
15057 `(("python-numpy" ,python-numpy)
15058 ("python-six" ,python-six)
15059 ("python-h5py" ,python-h5py)
15060 ("python-progressbar33" ,python-progressbar33)))
15061 (home-page "https://github.com/nanoporetech/ont_fast5_api")
15062 (synopsis "Interface to HDF5 files of the Oxford Nanopore fast5 file format")
15063 (description
15064 "This package provides a concrete implementation of the fast5 file schema
15065 using the generic @code{h5py} library, plain-named methods to interact with
15066 and reflect the fast5 file schema, and tools to convert between
15067 @code{multi_read} and @code{single_read} formats.")
15068 (license license:mpl2.0)))
15069
15070 (define-public tbsp
15071 (let ((commit "ec8fff4410cfb13a677dbbb95cbbc60217e64907")
15072 (revision "1"))
15073 (package
15074 (name "tbsp")
15075 (version (git-version "1.0.0" revision commit))
15076 (source
15077 (origin
15078 (method git-fetch)
15079 (uri (git-reference
15080 (url "https://github.com/phoenixding/tbsp.git")
15081 (commit commit)))
15082 (file-name (git-file-name name version))
15083 (sha256
15084 (base32
15085 "025ym14x8gbd6hb55lsinqj6f5qzw36i10klgs7ldzxxd7s39ki1"))))
15086 (build-system python-build-system)
15087 (arguments '(#:tests? #f)) ; no tests included
15088 (inputs
15089 `(("python-matplotlib" ,python-matplotlib)
15090 ("python-networkx" ,python-networkx)
15091 ("python-numpy" ,python-numpy)
15092 ("python-pybigwig" ,python-pybigwig)
15093 ("python-biopython" ,python-biopython)
15094 ("python-scikit-learn" ,python-scikit-learn)
15095 ("python-scipy" ,python-scipy)))
15096 (home-page "https://github.com/phoenixding/tbsp/")
15097 (synopsis "SNP-based trajectory inference")
15098 (description
15099 "Several studies focus on the inference of developmental and response
15100 trajectories from single cell RNA-Seq (scRNA-Seq) data. A number of
15101 computational methods, often referred to as pseudo-time ordering, have been
15102 developed for this task. CRISPR has also been used to reconstruct lineage
15103 trees by inserting random mutations. The tbsp package implements an
15104 alternative method to detect significant, cell type specific sequence
15105 mutations from scRNA-Seq data.")
15106 (license license:expat))))
15107
15108 (define-public tabixpp
15109 (package
15110 (name "tabixpp")
15111 (version "1.1.0")
15112 (source (origin
15113 (method git-fetch)
15114 (uri (git-reference
15115 (url "https://github.com/ekg/tabixpp")
15116 (commit (string-append "v" version))))
15117 (file-name (git-file-name name version))
15118 (sha256
15119 (base32 "1k2a3vbq96ic4lw72iwp5s3mwwc4xhdffjj584yn6l9637q9j1yd"))
15120 (modules '((guix build utils)))
15121 (snippet
15122 `(begin
15123 (delete-file-recursively "htslib") #t))))
15124 (build-system gnu-build-system)
15125 (inputs
15126 `(("htslib" ,htslib)
15127 ("zlib" ,zlib)))
15128 (arguments
15129 `(#:tests? #f ; There are no tests to run.
15130 #:phases
15131 (modify-phases %standard-phases
15132 (delete 'configure) ; There is no configure phase.
15133 ;; The build phase needs overriding the location of htslib.
15134 (replace 'build
15135 (lambda* (#:key inputs #:allow-other-keys)
15136 (let ((htslib-ref (assoc-ref inputs "htslib")))
15137 (invoke "make"
15138 (string-append "HTS_LIB=" htslib-ref "/lib/libhts.a")
15139 (string-append "INCLUDES= -I" htslib-ref "/include/htslib")
15140 "HTS_HEADERS=" ; No need to check for headers here.
15141 (string-append "LIBPATH=-L. -L" htslib-ref "/include")))))
15142 (replace 'install
15143 (lambda* (#:key outputs #:allow-other-keys)
15144 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
15145 (install-file "tabix++" bin))
15146 #t)))))
15147 (home-page "https://github.com/ekg/tabixpp")
15148 (synopsis "C++ wrapper around tabix project")
15149 (description "This is a C++ wrapper around the Tabix project which abstracts
15150 some of the details of opening and jumping in tabix-indexed files.")
15151 (license license:expat)))
15152
15153 (define-public smithwaterman
15154 (let ((commit "2610e259611ae4cde8f03c72499d28f03f6d38a7"))
15155 (package
15156 (name "smithwaterman")
15157 (version (git-version "0.0.0" "2" commit))
15158 (source (origin
15159 (method git-fetch)
15160 (uri (git-reference
15161 (url "https://github.com/ekg/smithwaterman/")
15162 (commit commit)))
15163 (file-name (git-file-name name version))
15164 (sha256
15165 (base32 "0i9d8zrxpiracw3mxzd9siybpy62p06rqz9mc2w93arajgbk45bs"))))
15166 (build-system gnu-build-system)
15167 (arguments
15168 `(#:tests? #f ; There are no tests to run.
15169 #:make-flags '("libsw.a" "all")
15170 #:phases
15171 (modify-phases %standard-phases
15172 (delete 'configure) ; There is no configure phase.
15173 (replace 'install
15174 (lambda* (#:key outputs #:allow-other-keys)
15175 (let* ((out (assoc-ref outputs "out"))
15176 (bin (string-append out "/bin"))
15177 (lib (string-append out "/lib")))
15178 (install-file "smithwaterman" bin)
15179 (install-file "libsw.a" lib))
15180 #t)))))
15181 (home-page "https://github.com/ekg/smithwaterman")
15182 (synopsis "Implementation of the Smith-Waterman algorithm")
15183 (description "Implementation of the Smith-Waterman algorithm.")
15184 ;; The licensing terms are unclear: https://github.com/ekg/smithwaterman/issues/9.
15185 (license (list license:gpl2 license:expat)))))
15186
15187 (define-public multichoose
15188 (package
15189 (name "multichoose")
15190 (version "1.0.3")
15191 (source (origin
15192 (method git-fetch)
15193 (uri (git-reference
15194 (url "https://github.com/ekg/multichoose/")
15195 (commit (string-append "v" version))))
15196 (file-name (git-file-name name version))
15197 (sha256
15198 (base32 "0ci5fqvmpamwgxvmyd79ygj6n3bnbl3vc7b6h1sxz58186sm3pfs"))))
15199 (build-system gnu-build-system)
15200 (arguments
15201 `(#:tests? #f ; Tests require node.
15202 #:phases
15203 (modify-phases %standard-phases
15204 (delete 'configure) ; There is no configure phase.
15205 (replace 'install
15206 (lambda* (#:key outputs #:allow-other-keys)
15207 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
15208 ;; TODO: There are Python modules for these programs too.
15209 (install-file "multichoose" bin)
15210 (install-file "multipermute" bin))
15211 #t)))))
15212 (home-page "https://github.com/ekg/multichoose")
15213 (synopsis "Efficient loopless multiset combination generation algorithm")
15214 (description "This library implements an efficient loopless multiset
15215 combination generation algorithm which is (approximately) described in
15216 \"Loopless algorithms for generating permutations, combinations, and other
15217 combinatorial configurations.\", G. Ehrlich - Journal of the ACM (JACM),
15218 1973. (Algorithm 7.)")
15219 (license license:expat)))
15220
15221 (define-public fsom
15222 (let ((commit "a6ef318fbd347c53189384aef7f670c0e6ce89a3"))
15223 (package
15224 (name "fsom")
15225 (version (git-version "0.0.0" "1" commit))
15226 (source (origin
15227 (method git-fetch)
15228 (uri (git-reference
15229 (url "https://github.com/ekg/fsom/")
15230 (commit commit)))
15231 (file-name (git-file-name name version))
15232 (sha256
15233 (base32 "0gw1lpvr812pywg9y546x0h1hhj261xwls41r6kqhddjlrcjc0pi"))))
15234 (build-system gnu-build-system)
15235 (arguments
15236 `(#:tests? #f ; There are no tests to run.
15237 #:phases
15238 (modify-phases %standard-phases
15239 (delete 'configure) ; There is no configure phase.
15240 (replace 'install
15241 (lambda* (#:key outputs #:allow-other-keys)
15242 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
15243 (install-file "fsom" bin))
15244 #t)))))
15245 (home-page "https://github.com/ekg/fsom")
15246 (synopsis "Manage SOM (Self-Organizing Maps) neural networks")
15247 (description "A tiny C library for managing SOM (Self-Organizing Maps)
15248 neural networks.")
15249 (license license:gpl3))))
15250
15251 (define-public fastahack
15252 (package
15253 (name "fastahack")
15254 (version "1.0.0")
15255 (source (origin
15256 (method git-fetch)
15257 (uri (git-reference
15258 (url "https://github.com/ekg/fastahack/")
15259 (commit (string-append "v" version))))
15260 (file-name (git-file-name name version))
15261 (sha256
15262 (base32 "0rp1blskhzxf7vbh253ibpxbgl9wwgyzf1wbkxndi08d3j4vcss9"))))
15263 (build-system gnu-build-system)
15264 (arguments
15265 `(#:tests? #f ; Unclear how to run tests: https://github.com/ekg/fastahack/issues/15
15266 #:phases
15267 (modify-phases %standard-phases
15268 (delete 'configure) ; There is no configure phase.
15269 (replace 'install
15270 (lambda* (#:key outputs #:allow-other-keys)
15271 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
15272 (install-file "fastahack" bin))
15273 #t)))))
15274 (home-page "https://github.com/ekg/fastahack")
15275 (synopsis "Indexing and sequence extraction from FASTA files")
15276 (description "Fastahack is a small application for indexing and
15277 extracting sequences and subsequences from FASTA files. The included library
15278 provides a FASTA reader and indexer that can be embedded into applications
15279 which would benefit from directly reading subsequences from FASTA files. The
15280 library automatically handles index file generation and use.")
15281 (license (list license:expat license:gpl2))))
15282
15283 (define-public vcflib
15284 (package
15285 (name "vcflib")
15286 (version "1.0.1")
15287 (source
15288 (origin
15289 (method url-fetch)
15290 (uri (string-append "https://github.com/vcflib/vcflib/releases/"
15291 "download/v" version
15292 "/vcflib-" version "-src.tar.gz"))
15293 (sha256
15294 (base32 "14zzrg8hg8cq9cvq2wdvp21j7nmxxkjrbagw2apd2yqv2kyx42lm"))
15295 (modules '((guix build utils)))
15296 (snippet
15297 `(begin
15298 (for-each delete-file-recursively
15299 '("fastahack" "filevercmp" "fsom" "googletest" "intervaltree"
15300 "libVCFH" "multichoose" "smithwaterman" "tabixpp"))
15301 #t))))
15302 (build-system gnu-build-system)
15303 (inputs
15304 `(("htslib" ,htslib)
15305 ("perl" ,perl)
15306 ("python" ,python)
15307 ("zlib" ,zlib)))
15308 (native-inputs
15309 `(;; Submodules.
15310 ;; This package builds against the .o files so we need to extract the source.
15311 ("fastahack-src" ,(package-source fastahack))
15312 ("filevercmp-src" ,(package-source filevercmp))
15313 ("fsom-src" ,(package-source fsom))
15314 ("intervaltree-src" ,(package-source intervaltree))
15315 ("multichoose-src" ,(package-source multichoose))
15316 ("smithwaterman-src" ,(package-source smithwaterman))
15317 ("tabixpp-src" ,(package-source tabixpp))))
15318 (arguments
15319 `(#:tests? #f ; no tests
15320 #:make-flags (list (string-append "HTS_LIB="
15321 (assoc-ref %build-inputs "htslib")
15322 "/lib/libhts.a")
15323 (string-append "HTS_INCLUDES= -I"
15324 (assoc-ref %build-inputs "htslib")
15325 "/include/htslib")
15326 (string-append "HTS_LDFLAGS= -L"
15327 (assoc-ref %build-inputs "htslib")
15328 "/include/htslib" " -lhts"))
15329 #:phases
15330 (modify-phases %standard-phases
15331 (delete 'configure)
15332 (delete 'check)
15333 (add-after 'unpack 'unpack-submodule-sources
15334 (lambda* (#:key inputs #:allow-other-keys)
15335 (let ((unpack (lambda (source target)
15336 (mkdir target)
15337 (with-directory-excursion target
15338 (if (file-is-directory? (assoc-ref inputs source))
15339 (copy-recursively (assoc-ref inputs source) ".")
15340 (invoke "tar" "xvf"
15341 (assoc-ref inputs source)
15342 "--strip-components=1"))))))
15343 (and
15344 (unpack "fastahack-src" "fastahack")
15345 (unpack "filevercmp-src" "filevercmp")
15346 (unpack "fsom-src" "fsom")
15347 (unpack "intervaltree-src" "intervaltree")
15348 (unpack "multichoose-src" "multichoose")
15349 (unpack "smithwaterman-src" "smithwaterman")
15350 (unpack "tabixpp-src" "tabixpp")))))
15351 (replace 'build
15352 (lambda* (#:key inputs make-flags #:allow-other-keys)
15353 (let ((htslib (assoc-ref inputs "htslib")))
15354 (with-directory-excursion "tabixpp"
15355 (substitute* "Makefile"
15356 (("-Ihtslib") (string-append "-I" htslib "/include/htslib"))
15357 (("-Lhtslib") (string-append "-L" htslib "/lib/htslib"))
15358 (("htslib/htslib") (string-append htslib "/include/htslib")))
15359 (invoke "make"
15360 (string-append "HTS_LIB=" htslib "/lib/libhts.a")))
15361 (apply invoke "make" "CC=gcc" "CFLAGS=-Itabixpp" make-flags))))
15362 (replace 'install
15363 (lambda* (#:key outputs #:allow-other-keys)
15364 (let ((bin (string-append (assoc-ref outputs "out") "/bin"))
15365 (lib (string-append (assoc-ref outputs "out") "/lib")))
15366 (for-each (lambda (file)
15367 (install-file file bin))
15368 (find-files "bin" ".*"))
15369 ;; The header files in src/ do not interface libvcflib,
15370 ;; therefore they are left out.
15371 (install-file "libvcflib.a" lib))
15372 #t)))))
15373 (home-page "https://github.com/vcflib/vcflib/")
15374 (synopsis "Library for parsing and manipulating VCF files")
15375 (description "Vcflib provides methods to manipulate and interpret
15376 sequence variation as it can be described by VCF. It is both an API for parsing
15377 and operating on records of genomic variation as it can be described by the VCF
15378 format, and a collection of command-line utilities for executing complex
15379 manipulations on VCF files.")
15380 (license license:expat)))
15381
15382 (define-public freebayes
15383 (let ((commit "3ce827d8ebf89bb3bdc097ee0fe7f46f9f30d5fb")
15384 (revision "1")
15385 (version "1.0.2"))
15386 (package
15387 (name "freebayes")
15388 (version (git-version version revision commit))
15389 (source (origin
15390 (method git-fetch)
15391 (uri (git-reference
15392 (url "https://github.com/ekg/freebayes.git")
15393 (commit commit)))
15394 (file-name (git-file-name name version))
15395 (sha256
15396 (base32 "1sbzwmcbn78ybymjnhwk7qc5r912azy5vqz2y7y81616yc3ba2a2"))))
15397 (build-system gnu-build-system)
15398 (inputs
15399 `(("bamtools" ,bamtools)
15400 ("htslib" ,htslib)
15401 ("zlib" ,zlib)))
15402 (native-inputs
15403 `(("bc" ,bc) ; Needed for running tests.
15404 ("samtools" ,samtools) ; Needed for running tests.
15405 ("parallel" ,parallel) ; Needed for running tests.
15406 ("perl" ,perl) ; Needed for running tests.
15407 ("procps" ,procps) ; Needed for running tests.
15408 ("python" ,python-2) ; Needed for running tests.
15409 ("vcflib-src" ,(package-source vcflib))
15410 ;; These are submodules for the vcflib version used in freebayes.
15411 ;; This package builds against the .o files so we need to extract the source.
15412 ("tabixpp-src" ,(package-source tabixpp))
15413 ("smithwaterman-src" ,(package-source smithwaterman))
15414 ("multichoose-src" ,(package-source multichoose))
15415 ("fsom-src" ,(package-source fsom))
15416 ("filevercmp-src" ,(package-source filevercmp))
15417 ("fastahack-src" ,(package-source fastahack))
15418 ("intervaltree-src" ,(package-source intervaltree))
15419 ;; These submodules are needed to run the tests.
15420 ("bash-tap-src" ,(package-source bash-tap))
15421 ("test-simple-bash-src"
15422 ,(origin
15423 (method git-fetch)
15424 (uri (git-reference
15425 (url "https://github.com/ingydotnet/test-simple-bash/")
15426 (commit "124673ff204b01c8e96b7fc9f9b32ee35d898acc")))
15427 (file-name "test-simple-bash-src-checkout")
15428 (sha256
15429 (base32 "043plp6z0x9yf7mdpky1fw7zcpwn1p47px95w9mh16603zqqqpga"))))))
15430 (arguments
15431 `(#:make-flags
15432 (list "CC=gcc"
15433 (string-append "BAMTOOLS_ROOT="
15434 (assoc-ref %build-inputs "bamtools")))
15435 #:test-target "test"
15436 #:phases
15437 (modify-phases %standard-phases
15438 (delete 'configure)
15439 (add-after 'unpack 'fix-tests
15440 (lambda _
15441 (substitute* "test/t/01_call_variants.t"
15442 (("grep -P \"\\(\\\\t500\\$\\|\\\\t11000\\$\\|\\\\t1000\\$\\)\"")
15443 "grep -E ' (500|11000|1000)$'"))
15444 #t))
15445 (add-after 'unpack 'unpack-submodule-sources
15446 (lambda* (#:key inputs #:allow-other-keys)
15447 (let ((unpack (lambda (source target)
15448 (with-directory-excursion target
15449 (if (file-is-directory? (assoc-ref inputs source))
15450 (copy-recursively (assoc-ref inputs source) ".")
15451 (invoke "tar" "xvf"
15452 (assoc-ref inputs source)
15453 "--strip-components=1"))))))
15454 (and
15455 (unpack "vcflib-src" "vcflib")
15456 (unpack "fastahack-src" "vcflib/fastahack")
15457 (unpack "filevercmp-src" "vcflib/filevercmp")
15458 (unpack "fsom-src" "vcflib/fsom")
15459 (unpack "intervaltree-src" "vcflib/intervaltree")
15460 (unpack "multichoose-src" "vcflib/multichoose")
15461 (unpack "smithwaterman-src" "vcflib/smithwaterman")
15462 (unpack "tabixpp-src" "vcflib/tabixpp")
15463 (unpack "test-simple-bash-src" "test/test-simple-bash")
15464 (unpack "bash-tap-src" "test/bash-tap")))))
15465 (add-after 'unpack-submodule-sources 'fix-makefiles
15466 (lambda _
15467 ;; We don't have the .git folder to get the version tag from.
15468 (substitute* "vcflib/Makefile"
15469 (("^GIT_VERSION.*")
15470 (string-append "GIT_VERSION = v" ,version)))
15471 (substitute* "src/Makefile"
15472 (("-I\\$\\(BAMTOOLS_ROOT\\)/src")
15473 "-I$(BAMTOOLS_ROOT)/include/bamtools"))
15474 #t))
15475 (add-before 'build 'build-tabixpp-and-vcflib
15476 (lambda* (#:key inputs make-flags #:allow-other-keys)
15477 (with-directory-excursion "vcflib"
15478 (with-directory-excursion "tabixpp"
15479 (apply invoke "make"
15480 (string-append "HTS_LIB="
15481 (assoc-ref inputs "htslib")
15482 "/lib/libhts.a")
15483 make-flags))
15484 (apply invoke "make"
15485 (string-append "CFLAGS=-Itabixpp")
15486 "all"
15487 make-flags))))
15488 (replace 'install
15489 (lambda* (#:key outputs #:allow-other-keys)
15490 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
15491 (install-file "bin/freebayes" bin)
15492 (install-file "bin/bamleftalign" bin))
15493 #t)))))
15494 (home-page "https://github.com/ekg/freebayes")
15495 (synopsis "Haplotype-based variant detector")
15496 (description "FreeBayes is a Bayesian genetic variant detector designed to
15497 find small polymorphisms, specifically SNPs (single-nucleotide polymorphisms),
15498 indels (insertions and deletions), MNPs (multi-nucleotide polymorphisms), and
15499 complex events (composite insertion and substitution events) smaller than the
15500 length of a short-read sequencing alignment.")
15501 (license license:expat))))
15502
15503 (define-public samblaster
15504 (package
15505 (name "samblaster")
15506 (version "0.1.24")
15507 (source (origin
15508 (method git-fetch)
15509 (uri (git-reference
15510 (url "https://github.com/GregoryFaust/samblaster.git")
15511 (commit (string-append "v." version))))
15512 (file-name (git-file-name name version))
15513 (sha256
15514 (base32
15515 "0iv2ddfw8363vb2x8gr3p8g88whb6mb9m0pf71i2cqsbv6jghap7"))))
15516 (build-system gnu-build-system)
15517 (arguments
15518 `(#:tests? #f ; there are none
15519 #:phases
15520 (modify-phases %standard-phases
15521 (delete 'configure) ; There is no configure phase.
15522 (replace 'install
15523 (lambda* (#:key outputs #:allow-other-keys)
15524 (install-file "samblaster"
15525 (string-append (assoc-ref outputs "out") "/bin"))
15526 #t)))))
15527 (home-page "https://github.com/GregoryFaust/samblaster")
15528 (synopsis "Mark duplicates in paired-end SAM files")
15529 (description "Samblaster is a fast and flexible program for marking
15530 duplicates in read-id grouped paired-end SAM files. It can also optionally
15531 output discordant read pairs and/or split read mappings to separate SAM files,
15532 and/or unmapped/clipped reads to a separate FASTQ file. When marking
15533 duplicates, samblaster will require approximately 20MB of memory per 1M read
15534 pairs.")
15535 (license license:expat)))
15536
15537 (define-public r-velocyto
15538 (let ((commit "d7790346cb99f49ab9c2b23ba70dcf9d2c9fc350")
15539 (revision "1"))
15540 (package
15541 (name "r-velocyto")
15542 (version (git-version "0.6" revision commit))
15543 (source
15544 (origin
15545 (method git-fetch)
15546 (uri (git-reference
15547 (url "https://github.com/velocyto-team/velocyto.R.git")
15548 (commit commit)))
15549 (file-name (git-file-name name version))
15550 (sha256
15551 (base32
15552 "16wqf70j7rd7pay2q513iyz12i8n9vrpg1bisah4lddbcpx5dz1n"))))
15553 (build-system r-build-system)
15554 (inputs
15555 `(("boost" ,boost)))
15556 (propagated-inputs
15557 `(("r-hdf5r" ,r-hdf5r)
15558 ("r-mass" ,r-mass)
15559 ("r-mgcv" ,r-mgcv)
15560 ("r-pcamethods" ,r-pcamethods)
15561 ("r-rcpp" ,r-rcpp)
15562 ("r-rcpparmadillo" ,r-rcpparmadillo)
15563 ;; Suggested packages
15564 ("r-rtsne" ,r-rtsne)
15565 ("r-cluster" ,r-cluster)
15566 ("r-abind" ,r-abind)
15567 ("r-h5" ,r-h5)
15568 ("r-biocgenerics" ,r-biocgenerics)
15569 ("r-genomicalignments" ,r-genomicalignments)
15570 ("r-rsamtools" ,r-rsamtools)
15571 ("r-edger" ,r-edger)
15572 ("r-igraph" ,r-igraph)))
15573 (home-page "https://velocyto.org")
15574 (synopsis "RNA velocity estimation in R")
15575 (description
15576 "This package provides basic routines for estimation of gene-specific
15577 transcriptional derivatives and visualization of the resulting velocity
15578 patterns.")
15579 (license license:gpl3))))
15580
15581 (define-public methyldackel
15582 (package
15583 (name "methyldackel")
15584 (version "0.4.0")
15585 (source (origin
15586 (method git-fetch)
15587 (uri (git-reference
15588 (url "https://github.com/dpryan79/MethylDackel.git")
15589 (commit version)))
15590 (file-name (git-file-name name version))
15591 (sha256
15592 (base32
15593 "10gh8k0ca92kywnrw5pkacq3g6r8s976s12k8jhp8g3g49q9a97g"))))
15594 (build-system gnu-build-system)
15595 (arguments
15596 `(#:test-target "test"
15597 #:make-flags
15598 (list "CC=gcc"
15599 (string-append "prefix="
15600 (assoc-ref %outputs "out") "/bin/"))
15601 #:phases
15602 (modify-phases %standard-phases
15603 (replace 'configure
15604 (lambda* (#:key outputs #:allow-other-keys)
15605 (substitute* "Makefile"
15606 (("install MethylDackel \\$\\(prefix\\)" match)
15607 (string-append "install -d $(prefix); " match)))
15608 #t)))))
15609 (inputs
15610 `(("htslib" ,htslib)
15611 ("zlib" ,zlib)))
15612 ;; Needed for tests
15613 (native-inputs
15614 `(("python" ,python-wrapper)))
15615 (home-page "https://github.com/dpryan79/MethylDackel")
15616 (synopsis "Universal methylation extractor for BS-seq experiments")
15617 (description
15618 "MethylDackel will process a coordinate-sorted and indexed BAM or CRAM
15619 file containing some form of BS-seq alignments and extract per-base
15620 methylation metrics from them. MethylDackel requires an indexed fasta file
15621 containing the reference genome as well.")
15622 ;; See https://github.com/dpryan79/MethylDackel/issues/85
15623 (license license:expat)))
15624
15625 (define-public python-gffutils
15626 ;; The latest release is older more than a year than the latest commit
15627 (let ((commit "4034c54600813b1402945e12faa91b3a53162cf1")
15628 (revision "1"))
15629 (package
15630 (name "python-gffutils")
15631 (version (git-version "0.9" revision commit))
15632 (source
15633 (origin
15634 (method git-fetch)
15635 (uri (git-reference
15636 (url "https://github.com/daler/gffutils.git")
15637 (commit commit)))
15638 (file-name (git-file-name name version))
15639 (sha256
15640 (base32
15641 "1rwafjdnbir5wnk0ap06ww4lra3p5frhy7mfs03rlldgfnwxymsn"))))
15642 (build-system python-build-system)
15643 (arguments
15644 `(#:phases
15645 (modify-phases %standard-phases
15646 (replace 'check
15647 (lambda _
15648 ;; Tests need to access the HOME directory
15649 (setenv "HOME" "/tmp")
15650 (invoke "nosetests" "-a" "!slow")))
15651 (add-after 'unpack 'make-gz-files-writable
15652 (lambda _
15653 (for-each make-file-writable
15654 (find-files "." "\\.gz"))
15655 #t)))))
15656 (propagated-inputs
15657 `(("python-argcomplete" ,python-argcomplete)
15658 ("python-argh" ,python-argh)
15659 ("python-biopython" ,python-biopython)
15660 ("python-pybedtools" ,python-pybedtools)
15661 ("python-pyfaidx" ,python-pyfaidx)
15662 ("python-simplejson" ,python-simplejson)
15663 ("python-six" ,python-six)))
15664 (native-inputs
15665 `(("python-nose" , python-nose)))
15666 (home-page "https://github.com/daler/gffutils")
15667 (synopsis "Tool for manipulation of GFF and GTF files")
15668 (description
15669 "python-gffutils is a Python package for working with and manipulating
15670 the GFF and GTF format files typically used for genomic annotations. The
15671 files are loaded into a SQLite database, allowing much more complex
15672 manipulation of hierarchical features (e.g., genes, transcripts, and exons)
15673 than is possible with plain-text methods alone.")
15674 (license license:expat))))
15675
15676 (define-public libsbml
15677 (package
15678 (name "libsbml")
15679 (version "5.18.0")
15680 (source (origin
15681 (method url-fetch)
15682 (uri (string-append "mirror://sourceforge/sbml/libsbml/"
15683 version "/stable/libSBML-"
15684 version "-core-src.tar.gz"))
15685 (sha256
15686 (base32
15687 "0slkagrk3nfi2qsksv6b1brj6zhx4bj4bkib2sdycvrcd10ql2lh"))))
15688 (build-system cmake-build-system)
15689 (arguments
15690 `(#:test-target "test"
15691 #:configure-flags
15692 (list "-DWITH_CHECK=ON"
15693 (string-append "-DLIBXML_LIBRARY="
15694 (assoc-ref %build-inputs "libxml2")
15695 "/lib/libxml2.so")
15696 (string-append "-DLIBXML_INCLUDE_DIR="
15697 (assoc-ref %build-inputs "libxml2")
15698 "/include/libxml2"))))
15699 (propagated-inputs
15700 `(("libxml2" ,libxml2)))
15701 (native-inputs
15702 `(("check" ,check)
15703 ("swig" ,swig)))
15704 (home-page "http://sbml.org/Software/libSBML")
15705 (synopsis "Process SBML files and data streams")
15706 (description "LibSBML is a library to help you read, write, manipulate,
15707 translate, and validate SBML files and data streams. The @dfn{Systems Biology
15708 Markup Language} (SBML) is an interchange format for computer models of
15709 biological processes. SBML is useful for models of metabolism, cell
15710 signaling, and more. It continues to be evolved and expanded by an
15711 international community.")
15712 (license license:lgpl2.1+)))