ff6996011e15f18b5bfdddcb5d4ab1ff00dde083
[jackhill/guix/guix.git] / gnu / packages / bioinformatics.scm
1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021 Ricardo Wurmus <rekado@elephly.net>
3 ;;; Copyright © 2015, 2016, 2017, 2018 Ben Woodcroft <donttrustben@gmail.com>
4 ;;; Copyright © 2015, 2016, 2018, 2019, 2020 Pjotr Prins <pjotr.guix@thebird.nl>
5 ;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
6 ;;; Copyright © 2016, 2020 Roel Janssen <roel@gnu.org>
7 ;;; Copyright © 2016, 2017, 2018, 2019, 2020, 2021 Efraim Flashner <efraim@flashner.co.il>
8 ;;; Copyright © 2016, 2020 Marius Bakke <mbakke@fastmail.com>
9 ;;; Copyright © 2016, 2018 Raoul Bonnal <ilpuccio.febo@gmail.com>
10 ;;; Copyright © 2017, 2018 Tobias Geerinckx-Rice <me@tobias.gr>
11 ;;; Copyright © 2017 Arun Isaac <arunisaac@systemreboot.net>
12 ;;; Copyright © 2018 Joshua Sierles, Nextjournal <joshua@nextjournal.com>
13 ;;; Copyright © 2018 Gábor Boskovits <boskovits@gmail.com>
14 ;;; Copyright © 2018, 2019, 2020, 2021 Mădălin Ionel Patrașcu <madalinionel.patrascu@mdc-berlin.de>
15 ;;; Copyright © 2019, 2020 Maxim Cournoyer <maxim.cournoyer@gmail.com>
16 ;;; Copyright © 2019 Brian Leung <bkleung89@gmail.com>
17 ;;; Copyright © 2019 Brett Gilio <brettg@gnu.org>
18 ;;; Copyright © 2020 Björn Höfling <bjoern.hoefling@bjoernhoefling.de>
19 ;;; Copyright © 2020 Jakub Kądziołka <kuba@kadziolka.net>
20 ;;; Copyright © 2020 Pierre Langlois <pierre.langlois@gmx.com>
21 ;;; Copyright © 2020 Bonface Munyoki Kilyungi <bonfacemunyoki@gmail.com>
22 ;;; Copyright © 2021 Tim Howes <timhowes@lavabit.com>
23 ;;;
24 ;;; This file is part of GNU Guix.
25 ;;;
26 ;;; GNU Guix is free software; you can redistribute it and/or modify it
27 ;;; under the terms of the GNU General Public License as published by
28 ;;; the Free Software Foundation; either version 3 of the License, or (at
29 ;;; your option) any later version.
30 ;;;
31 ;;; GNU Guix is distributed in the hope that it will be useful, but
32 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
33 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34 ;;; GNU General Public License for more details.
35 ;;;
36 ;;; You should have received a copy of the GNU General Public License
37 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
38
39 (define-module (gnu packages bioinformatics)
40 #:use-module ((guix licenses) #:prefix license:)
41 #:use-module (guix packages)
42 #:use-module (guix utils)
43 #:use-module (guix download)
44 #:use-module (guix git-download)
45 #:use-module (guix hg-download)
46 #:use-module (guix build-system ant)
47 #:use-module (guix build-system gnu)
48 #:use-module (guix build-system cmake)
49 #:use-module (guix build-system go)
50 #:use-module (guix build-system haskell)
51 #:use-module (guix build-system meson)
52 #:use-module (guix build-system ocaml)
53 #:use-module (guix build-system perl)
54 #:use-module (guix build-system python)
55 #:use-module (guix build-system qt)
56 #:use-module (guix build-system r)
57 #:use-module (guix build-system ruby)
58 #:use-module (guix build-system scons)
59 #:use-module (guix build-system trivial)
60 #:use-module (guix deprecation)
61 #:use-module (gnu packages)
62 #:use-module (gnu packages assembly)
63 #:use-module (gnu packages autotools)
64 #:use-module (gnu packages algebra)
65 #:use-module (gnu packages base)
66 #:use-module (gnu packages bash)
67 #:use-module (gnu packages bison)
68 #:use-module (gnu packages bioconductor)
69 #:use-module (gnu packages boost)
70 #:use-module (gnu packages check)
71 #:use-module (gnu packages code)
72 #:use-module (gnu packages commencement)
73 #:use-module (gnu packages cmake)
74 #:use-module (gnu packages compression)
75 #:use-module (gnu packages cpio)
76 #:use-module (gnu packages cran)
77 #:use-module (gnu packages curl)
78 #:use-module (gnu packages documentation)
79 #:use-module (gnu packages databases)
80 #:use-module (gnu packages datastructures)
81 #:use-module (gnu packages dlang)
82 #:use-module (gnu packages file)
83 #:use-module (gnu packages flex)
84 #:use-module (gnu packages gawk)
85 #:use-module (gnu packages gcc)
86 #:use-module (gnu packages gd)
87 #:use-module (gnu packages golang)
88 #:use-module (gnu packages glib)
89 #:use-module (gnu packages graph)
90 #:use-module (gnu packages graphics)
91 #:use-module (gnu packages graphviz)
92 #:use-module (gnu packages groff)
93 #:use-module (gnu packages gtk)
94 #:use-module (gnu packages guile)
95 #:use-module (gnu packages guile-xyz)
96 #:use-module (gnu packages haskell-check)
97 #:use-module (gnu packages haskell-web)
98 #:use-module (gnu packages haskell-xyz)
99 #:use-module (gnu packages image)
100 #:use-module (gnu packages image-processing)
101 #:use-module (gnu packages imagemagick)
102 #:use-module (gnu packages java)
103 #:use-module (gnu packages java-compression)
104 #:use-module (gnu packages jemalloc)
105 #:use-module (gnu packages linux)
106 #:use-module (gnu packages lisp-xyz)
107 #:use-module (gnu packages logging)
108 #:use-module (gnu packages machine-learning)
109 #:use-module (gnu packages man)
110 #:use-module (gnu packages maths)
111 #:use-module (gnu packages mpi)
112 #:use-module (gnu packages ncurses)
113 #:use-module (gnu packages node)
114 #:use-module (gnu packages ocaml)
115 #:use-module (gnu packages pcre)
116 #:use-module (gnu packages parallel)
117 #:use-module (gnu packages pdf)
118 #:use-module (gnu packages perl)
119 #:use-module (gnu packages perl-check)
120 #:use-module (gnu packages pkg-config)
121 #:use-module (gnu packages popt)
122 #:use-module (gnu packages protobuf)
123 #:use-module (gnu packages python)
124 #:use-module (gnu packages python-check)
125 #:use-module (gnu packages python-compression)
126 #:use-module (gnu packages python-science)
127 #:use-module (gnu packages python-web)
128 #:use-module (gnu packages python-xyz)
129 #:use-module (gnu packages qt)
130 #:use-module (gnu packages rdf)
131 #:use-module (gnu packages readline)
132 #:use-module (gnu packages ruby)
133 #:use-module (gnu packages serialization)
134 #:use-module (gnu packages shells)
135 #:use-module (gnu packages sphinx)
136 #:use-module (gnu packages statistics)
137 #:use-module (gnu packages swig)
138 #:use-module (gnu packages tbb)
139 #:use-module (gnu packages tex)
140 #:use-module (gnu packages texinfo)
141 #:use-module (gnu packages textutils)
142 #:use-module (gnu packages time)
143 #:use-module (gnu packages tls)
144 #:use-module (gnu packages vim)
145 #:use-module (gnu packages web)
146 #:use-module (gnu packages xml)
147 #:use-module (gnu packages xorg)
148 #:use-module (srfi srfi-1)
149 #:use-module (srfi srfi-26)
150 #:use-module (ice-9 match))
151
152 (define-public aragorn
153 (package
154 (name "aragorn")
155 (version "1.2.38")
156 (source (origin
157 (method url-fetch)
158 (uri (string-append
159 "http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
160 version ".tgz"))
161 (sha256
162 (base32
163 "09i1rg716smlbnixfm7q1ml2mfpaa2fpn3hwjg625ysmfwwy712b"))))
164 (build-system gnu-build-system)
165 (arguments
166 `(#:tests? #f ; there are no tests
167 #:phases
168 (modify-phases %standard-phases
169 (delete 'configure)
170 (replace 'build
171 (lambda _
172 (invoke "gcc"
173 "-O3"
174 "-ffast-math"
175 "-finline-functions"
176 "-o"
177 "aragorn"
178 (string-append "aragorn" ,version ".c"))
179 #t))
180 (replace 'install
181 (lambda* (#:key outputs #:allow-other-keys)
182 (let* ((out (assoc-ref outputs "out"))
183 (bin (string-append out "/bin"))
184 (man (string-append out "/share/man/man1")))
185 (install-file "aragorn" bin)
186 (install-file "aragorn.1" man))
187 #t)))))
188 (home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
189 (synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
190 (description
191 "Aragorn identifies transfer RNA, mitochondrial RNA and
192 transfer-messenger RNA from nucleotide sequences, based on homology to known
193 tRNA consensus sequences and RNA structure. It also outputs the secondary
194 structure of the predicted RNA.")
195 (license license:gpl2)))
196
197 (define-public bamm
198 (package
199 (name "bamm")
200 (version "1.7.3")
201 (source (origin
202 (method git-fetch)
203 ;; BamM is not available on pypi.
204 (uri (git-reference
205 (url "https://github.com/Ecogenomics/BamM")
206 (commit version)
207 (recursive? #t)))
208 (file-name (git-file-name name version))
209 (sha256
210 (base32
211 "1p83ahi984ipslxlg4yqy1gdnya9rkn1v71z8djgxkm9d2chw4c5"))
212 (modules '((guix build utils)))
213 (snippet
214 `(begin
215 ;; Delete bundled htslib.
216 (delete-file-recursively "c/htslib-1.3.1")
217 #t))))
218 (build-system python-build-system)
219 (arguments
220 `(#:python ,python-2 ; BamM is Python 2 only.
221 ;; Do not use bundled libhts. Do use the bundled libcfu because it has
222 ;; been modified from its original form.
223 #:configure-flags
224 (let ((htslib (assoc-ref %build-inputs "htslib")))
225 (list "--with-libhts-lib" (string-append htslib "/lib")
226 "--with-libhts-inc" (string-append htslib "/include/htslib")))
227 #:phases
228 (modify-phases %standard-phases
229 (add-after 'unpack 'autogen
230 (lambda _
231 (with-directory-excursion "c"
232 (let ((sh (which "sh")))
233 (for-each make-file-writable (find-files "." ".*"))
234 ;; Use autogen so that 'configure' works.
235 (substitute* "autogen.sh" (("/bin/sh") sh))
236 (setenv "CONFIG_SHELL" sh)
237 (invoke "./autogen.sh")))
238 #t))
239 (delete 'build)
240 ;; Run tests after installation so compilation only happens once.
241 (delete 'check)
242 (add-after 'install 'wrap-executable
243 (lambda* (#:key outputs #:allow-other-keys)
244 (let* ((out (assoc-ref outputs "out"))
245 (path (getenv "PATH")))
246 (wrap-program (string-append out "/bin/bamm")
247 `("PATH" ":" prefix (,path))))
248 #t))
249 (add-after 'wrap-executable 'post-install-check
250 (lambda* (#:key inputs outputs #:allow-other-keys)
251 (setenv "PATH"
252 (string-append (assoc-ref outputs "out")
253 "/bin:"
254 (getenv "PATH")))
255 (setenv "PYTHONPATH"
256 (string-append
257 (assoc-ref outputs "out")
258 "/lib/python"
259 (string-take (string-take-right
260 (assoc-ref inputs "python") 5) 3)
261 "/site-packages:"
262 (getenv "PYTHONPATH")))
263 ;; There are 2 errors printed, but they are safe to ignore:
264 ;; 1) [E::hts_open_format] fail to open file ...
265 ;; 2) samtools view: failed to open ...
266 (invoke "nosetests")
267 #t)))))
268 (native-inputs
269 `(("autoconf" ,autoconf)
270 ("automake" ,automake)
271 ("libtool" ,libtool)
272 ("zlib" ,zlib)
273 ("python-nose" ,python2-nose)
274 ("python-pysam" ,python2-pysam)))
275 (inputs
276 `(("htslib" ,htslib-1.3) ; At least one test fails on htslib-1.4+.
277 ("samtools" ,samtools)
278 ("bwa" ,bwa)
279 ("grep" ,grep)
280 ("sed" ,sed)
281 ("coreutils" ,coreutils)))
282 (propagated-inputs
283 `(("python-numpy" ,python2-numpy)))
284 (home-page "https://ecogenomics.github.io/BamM/")
285 (synopsis "Metagenomics-focused BAM file manipulator")
286 (description
287 "BamM is a C library, wrapped in python, to efficiently generate and
288 parse BAM files, specifically for the analysis of metagenomic data. For
289 instance, it implements several methods to assess contig-wise read coverage.")
290 (license license:lgpl3+)))
291
292 (define-public bamtools
293 (package
294 (name "bamtools")
295 (version "2.5.1")
296 (source (origin
297 (method git-fetch)
298 (uri (git-reference
299 (url "https://github.com/pezmaster31/bamtools")
300 (commit (string-append "v" version))))
301 (file-name (git-file-name name version))
302 (sha256
303 (base32
304 "0nfb2ypcx9959xnbz6wxh6py3xfizgmg8nrknxl95c507m9hmq8b"))))
305 (build-system cmake-build-system)
306 (arguments
307 `(#:tests? #f ;no "check" target
308 #:phases
309 (modify-phases %standard-phases
310 (add-before
311 'configure 'set-ldflags
312 (lambda* (#:key outputs #:allow-other-keys)
313 (setenv "LDFLAGS"
314 (string-append
315 "-Wl,-rpath="
316 (assoc-ref outputs "out") "/lib/bamtools"))
317 #t)))))
318 (inputs `(("zlib" ,zlib)))
319 (home-page "https://github.com/pezmaster31/bamtools")
320 (synopsis "C++ API and command-line toolkit for working with BAM data")
321 (description
322 "BamTools provides both a C++ API and a command-line toolkit for handling
323 BAM files.")
324 (license license:expat)))
325
326 (define-public bcftools
327 (package
328 (name "bcftools")
329 (version "1.11")
330 (source (origin
331 (method url-fetch)
332 (uri (string-append "https://github.com/samtools/bcftools/"
333 "releases/download/"
334 version "/bcftools-" version ".tar.bz2"))
335 (sha256
336 (base32
337 "0r508mp15pqzf8r1269kb4v5naw9zsvbwd3cz8s1yj7carsf9viw"))
338 (modules '((guix build utils)))
339 (snippet '(begin
340 ;; Delete bundled htslib.
341 (delete-file-recursively "htslib-1.11")
342 #t))))
343 (build-system gnu-build-system)
344 (arguments
345 `(#:configure-flags
346 (list "--enable-libgsl")
347 #:test-target "test"
348 #:phases
349 (modify-phases %standard-phases
350 (add-before 'check 'patch-tests
351 (lambda _
352 (substitute* "test/test.pl"
353 (("/bin/bash") (which "bash")))
354 #t)))))
355 (native-inputs
356 `(("htslib" ,htslib)
357 ("perl" ,perl)))
358 (inputs
359 `(("gsl" ,gsl)
360 ("zlib" ,zlib)))
361 (home-page "https://samtools.github.io/bcftools/")
362 (synopsis "Utilities for variant calling and manipulating VCFs and BCFs")
363 (description
364 "BCFtools is a set of utilities that manipulate variant calls in the
365 Variant Call Format (VCF) and its binary counterpart BCF. All commands work
366 transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed.")
367 ;; The sources are dual MIT/GPL, but becomes GPL-only when USE_GPL=1.
368 (license (list license:gpl3+ license:expat))))
369
370 (define-public bcftools-1.9
371 (package (inherit bcftools)
372 (name "bcftools")
373 (version "1.9")
374 (source (origin
375 (method url-fetch)
376 (uri (string-append "https://github.com/samtools/bcftools/"
377 "releases/download/"
378 version "/bcftools-" version ".tar.bz2"))
379 (sha256
380 (base32
381 "1j3h638i8kgihzyrlnpj82xg1b23sijibys9hvwari3fy7kd0dkg"))
382 (modules '((guix build utils)))
383 (snippet '(begin
384 ;; Delete bundled htslib.
385 (delete-file-recursively "htslib-1.9")
386 #t))))
387 (build-system gnu-build-system)
388 (native-inputs
389 `(("htslib" ,htslib-1.9)
390 ("perl" ,perl)))))
391
392 (define-public bedops
393 (package
394 (name "bedops")
395 (version "2.4.35")
396 (source (origin
397 (method git-fetch)
398 (uri (git-reference
399 (url "https://github.com/bedops/bedops")
400 (commit (string-append "v" version))))
401 (file-name (git-file-name name version))
402 (sha256
403 (base32
404 "0mmgsgwz5r9w76hzgxkxc9s9lkdhhaf7vr6i02b09vbswvs1fyqx"))))
405 (build-system gnu-build-system)
406 (arguments
407 '(#:tests? #f
408 #:make-flags (list (string-append "BINDIR=" %output "/bin"))
409 #:phases
410 (modify-phases %standard-phases
411 (add-after 'unpack 'unpack-tarballs
412 (lambda _
413 ;; FIXME: Bedops includes tarballs of minimally patched upstream
414 ;; libraries jansson, zlib, and bzip2. We cannot just use stock
415 ;; libraries because at least one of the libraries (zlib) is
416 ;; patched to add a C++ function definition (deflateInit2cpp).
417 ;; Until the Bedops developers offer a way to link against system
418 ;; libraries we have to build the in-tree copies of these three
419 ;; libraries.
420
421 ;; See upstream discussion:
422 ;; https://github.com/bedops/bedops/issues/124
423
424 ;; Unpack the tarballs to benefit from shebang patching.
425 (with-directory-excursion "third-party"
426 (invoke "tar" "xvf" "jansson-2.6.tar.bz2")
427 (invoke "tar" "xvf" "zlib-1.2.7.tar.bz2")
428 (invoke "tar" "xvf" "bzip2-1.0.6.tar.bz2"))
429 ;; Disable unpacking of tarballs in Makefile.
430 (substitute* "system.mk/Makefile.linux"
431 (("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
432 (("\\./configure") "CONFIG_SHELL=bash ./configure"))
433 (substitute* "third-party/zlib-1.2.7/Makefile.in"
434 (("^SHELL=.*$") "SHELL=bash\n"))
435 #t))
436 (delete 'configure))))
437 (home-page "https://github.com/bedops/bedops")
438 (synopsis "Tools for high-performance genomic feature operations")
439 (description
440 "BEDOPS is a suite of tools to address common questions raised in genomic
441 studies---mostly with regard to overlap and proximity relationships between
442 data sets. It aims to be scalable and flexible, facilitating the efficient
443 and accurate analysis and management of large-scale genomic data.
444
445 BEDOPS provides tools that perform highly efficient and scalable Boolean and
446 other set operations, statistical calculations, archiving, conversion and
447 other management of genomic data of arbitrary scale. Tasks can be easily
448 split by chromosome for distributing whole-genome analyses across a
449 computational cluster.")
450 (license license:gpl2+)))
451
452 (define-public bedtools
453 (package
454 (name "bedtools")
455 (version "2.29.2")
456 (source (origin
457 (method url-fetch)
458 (uri (string-append "https://github.com/arq5x/bedtools2/releases/"
459 "download/v" version "/"
460 "bedtools-" version ".tar.gz"))
461 (sha256
462 (base32
463 "0m3hk6548846w83a9s5drsczvy67n2azx41kj71n03klb2gbzwg3"))))
464 (build-system gnu-build-system)
465 (arguments
466 '(#:test-target "test"
467 #:make-flags
468 (list (string-append "prefix=" (assoc-ref %outputs "out")))
469 #:phases
470 (modify-phases %standard-phases
471 (delete 'configure))))
472 (native-inputs
473 `(("python" ,python-wrapper)))
474 (inputs
475 `(("samtools" ,samtools-1.9)
476 ("zlib" ,zlib)))
477 (home-page "https://github.com/arq5x/bedtools2")
478 (synopsis "Tools for genome analysis and arithmetic")
479 (description
480 "Collectively, the bedtools utilities are a swiss-army knife of tools for
481 a wide-range of genomics analysis tasks. The most widely-used tools enable
482 genome arithmetic: that is, set theory on the genome. For example, bedtools
483 allows one to intersect, merge, count, complement, and shuffle genomic
484 intervals from multiple files in widely-used genomic file formats such as BAM,
485 BED, GFF/GTF, VCF.")
486 (license license:expat)))
487
488 ;; Later releases of bedtools produce files with more columns than
489 ;; what Ribotaper expects.
490 (define-public bedtools-2.18
491 (package (inherit bedtools)
492 (name "bedtools")
493 (version "2.18.0")
494 (source (origin
495 (method url-fetch)
496 (uri (string-append "https://github.com/arq5x/bedtools2/"
497 "releases/download/v" version
498 "/bedtools-" version ".tar.gz"))
499 (sha256
500 (base32
501 "11rvca19ncg03kxd0wzlfx5ws7r3nisd0z8s9j9n182d8ksp2pxz"))))
502 (arguments
503 '(#:test-target "test"
504 #:phases
505 (modify-phases %standard-phases
506 (delete 'configure)
507 (replace 'install
508 (lambda* (#:key outputs #:allow-other-keys)
509 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
510 (for-each (lambda (file)
511 (install-file file bin))
512 (find-files "bin" ".*")))
513 #t)))))))
514
515 (define-public pbbam
516 (package
517 (name "pbbam")
518 (version "0.23.0")
519 (source (origin
520 (method git-fetch)
521 (uri (git-reference
522 (url "https://github.com/PacificBiosciences/pbbam")
523 (commit version)))
524 (file-name (git-file-name name version))
525 (sha256
526 (base32
527 "0h9gkrpf2lrxklxp72xfl5bi3h5zcm5hprrya9gf0hr3xwlbpp0x"))))
528 (build-system meson-build-system)
529 (arguments
530 `(#:phases
531 (modify-phases %standard-phases
532 (add-after 'unpack 'find-googletest
533 (lambda* (#:key inputs #:allow-other-keys)
534 ;; It doesn't find gtest_main because there's no pkg-config file
535 ;; for it. Find it another way.
536 (substitute* "tests/meson.build"
537 (("pbbam_gtest_dep = dependency\\('gtest_main'.*")
538 (format #f "cpp = meson.get_compiler('cpp')
539 pbbam_gtest_dep = cpp.find_library('gtest_main', dirs : '~a')\n"
540 (assoc-ref inputs "googletest"))))
541 #t)))
542 ;; TODO: tests/pbbam_test cannot be linked
543 ;; ld: tests/59830eb@@pbbam_test@exe/src_test_Accuracy.cpp.o:
544 ;; undefined reference to symbol '_ZTIN7testing4TestE'
545 ;; ld: /gnu/store/...-googletest-1.8.0/lib/libgtest.so:
546 ;; error adding symbols: DSO missing from command line
547 #:tests? #f
548 #:configure-flags '("-Dtests=false")))
549 ;; These libraries are listed as "Required" in the pkg-config file.
550 (propagated-inputs
551 `(("htslib" ,htslib)
552 ("zlib" ,zlib)))
553 (inputs
554 `(("boost" ,boost)
555 ("samtools" ,samtools)))
556 (native-inputs
557 `(("googletest" ,googletest)
558 ("pkg-config" ,pkg-config)
559 ("python" ,python-wrapper))) ; for tests
560 (home-page "https://github.com/PacificBiosciences/pbbam")
561 (synopsis "Work with PacBio BAM files")
562 (description
563 "The pbbam software package provides components to create, query, and
564 edit PacBio BAM files and associated indices. These components include a core
565 C++ library, bindings for additional languages, and command-line utilities.
566 This library is not intended to be used as a general-purpose BAM utility - all
567 input and output BAMs must adhere to the PacBio BAM format specification.
568 Non-PacBio BAMs will cause exceptions to be thrown.")
569 (license license:bsd-3)))
570
571 (define-public blasr-libcpp
572 (package
573 (name "blasr-libcpp")
574 (version "5.3.3")
575 (source (origin
576 (method git-fetch)
577 (uri (git-reference
578 (url "https://github.com/PacificBiosciences/blasr_libcpp")
579 (commit version)))
580 (file-name (git-file-name name version))
581 (sha256
582 (base32
583 "0cn5l42zyq67sj0g2imqkhayz2iqvv0a1pgpbmlq0qynjmsrbfd2"))))
584 (build-system meson-build-system)
585 (arguments
586 `(#:phases
587 (modify-phases %standard-phases
588 (add-after 'unpack 'link-with-hdf5
589 (lambda* (#:key inputs #:allow-other-keys)
590 (let ((hdf5 (assoc-ref inputs "hdf5")))
591 (substitute* "meson.build"
592 (("libblasr_deps = \\[" m)
593 (string-append
594 m
595 (format #f "cpp.find_library('hdf5', dirs : '~a'), \
596 cpp.find_library('hdf5_cpp', dirs : '~a'), "
597 hdf5 hdf5)))))
598 #t))
599 (add-after 'unpack 'find-googletest
600 (lambda* (#:key inputs #:allow-other-keys)
601 ;; It doesn't find gtest_main because there's no pkg-config file
602 ;; for it. Find it another way.
603 (substitute* "unittest/meson.build"
604 (("libblasr_gtest_dep = dependency\\('gtest_main'.*")
605 (format #f "cpp = meson.get_compiler('cpp')
606 libblasr_gtest_dep = cpp.find_library('gtest_main', dirs : '~a')\n"
607 (assoc-ref inputs "googletest"))))
608 #t)))
609 ;; TODO: unittest/libblasr_unittest cannot be linked
610 ;; ld: ;; unittest/df08227@@libblasr_unittest@exe/alignment_utils_FileUtils_gtest.cpp.o:
611 ;; undefined reference to symbol
612 ;; '_ZN7testing8internal9DeathTest6CreateEPKcPKNS0_2REES3_iPPS1_'
613 ;; ld: /gnu/store/...-googletest-1.8.0/lib/libgtest.so:
614 ;; error adding symbols: DSO missing from command line
615 #:tests? #f
616 #:configure-flags '("-Dtests=false")))
617 (inputs
618 `(("boost" ,boost)
619 ("hdf5" ,hdf5)
620 ("pbbam" ,pbbam)
621 ("zlib" ,zlib)))
622 (native-inputs
623 `(("googletest" ,googletest)
624 ("pkg-config" ,pkg-config)))
625 (home-page "https://github.com/PacificBiosciences/blasr_libcpp")
626 (synopsis "Library for analyzing PacBio genomic sequences")
627 (description
628 "This package provides three libraries used by applications for analyzing
629 PacBio genomic sequences. This library contains three sub-libraries: pbdata,
630 hdf and alignment.")
631 (license license:bsd-3)))
632
633 (define-public blasr
634 (package
635 (name "blasr")
636 (version "5.3.3")
637 (source (origin
638 (method git-fetch)
639 (uri (git-reference
640 (url "https://github.com/PacificBiosciences/blasr")
641 (commit version)))
642 (file-name (git-file-name name version))
643 (sha256
644 (base32
645 "1skgy2mvz8gsgfh1gc2nfgwvpyzb1hpmp2cf2773h5wsj8nw22kl"))))
646 (build-system meson-build-system)
647 (arguments
648 `(#:phases
649 (modify-phases %standard-phases
650 (add-after 'unpack 'link-with-hdf5
651 (lambda* (#:key inputs #:allow-other-keys)
652 (let ((hdf5 (assoc-ref inputs "hdf5")))
653 (substitute* "meson.build"
654 (("blasr_deps = \\[" m)
655 (string-append
656 m
657 (format #f "cpp.find_library('hdf5', dirs : '~a'), \
658 cpp.find_library('hdf5_cpp', dirs : '~a'), "
659 hdf5 hdf5)))))
660 #t)))
661 ;; Tests require "cram" executable, which is not packaged.
662 #:tests? #f
663 #:configure-flags '("-Dtests=false")))
664 (inputs
665 `(("boost" ,boost)
666 ("blasr-libcpp" ,blasr-libcpp)
667 ("hdf5" ,hdf5)
668 ("pbbam" ,pbbam)
669 ("zlib" ,zlib)))
670 (native-inputs
671 `(("pkg-config" ,pkg-config)))
672 (home-page "https://github.com/PacificBiosciences/blasr")
673 (synopsis "PacBio long read aligner")
674 (description
675 "Blasr is a genomic sequence aligner for processing PacBio long reads.")
676 (license license:bsd-3)))
677
678 (define-public ribotaper
679 (package
680 (name "ribotaper")
681 (version "1.3.1")
682 (source (origin
683 (method url-fetch)
684 (uri (string-append "https://ohlerlab.mdc-berlin.de/"
685 "files/RiboTaper/RiboTaper_Version_"
686 version ".tar.gz"))
687 (sha256
688 (base32
689 "0ykjbps1y3z3085q94npw8i9x5gldc6shy8vlc08v76zljsm07hv"))))
690 (build-system gnu-build-system)
691 (arguments
692 `(#:phases
693 (modify-phases %standard-phases
694 (add-after 'install 'wrap-executables
695 (lambda* (#:key inputs outputs #:allow-other-keys)
696 (let* ((out (assoc-ref outputs "out")))
697 (for-each
698 (lambda (script)
699 (wrap-program (string-append out "/bin/" script)
700 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
701 '("create_annotations_files.bash"
702 "create_metaplots.bash"
703 "Ribotaper_ORF_find.sh"
704 "Ribotaper.sh")))
705 #t)))))
706 (inputs
707 `(("bedtools" ,bedtools-2.18)
708 ("samtools" ,samtools-0.1)
709 ("r-minimal" ,r-minimal)
710 ("r-foreach" ,r-foreach)
711 ("r-xnomial" ,r-xnomial)
712 ("r-domc" ,r-domc)
713 ("r-multitaper" ,r-multitaper)
714 ("r-seqinr" ,r-seqinr)))
715 (home-page "https://ohlerlab.mdc-berlin.de/software/RiboTaper_126/")
716 (synopsis "Define translated ORFs using ribosome profiling data")
717 (description
718 "Ribotaper is a method for defining translated @dfn{open reading
719 frames} (ORFs) using ribosome profiling (ribo-seq) data. This package
720 provides the Ribotaper pipeline.")
721 (license license:gpl3+)))
722
723 (define-public ribodiff
724 (package
725 (name "ribodiff")
726 (version "0.2.2")
727 (source
728 (origin
729 (method git-fetch)
730 (uri (git-reference
731 (url "https://github.com/ratschlab/RiboDiff")
732 (commit (string-append "v" version))))
733 (file-name (git-file-name name version))
734 (sha256
735 (base32
736 "0x75nlp7qnmm64jasbi6l21f2cy99r2cjyl6b4hr8zf2bq22drnz"))))
737 (build-system python-build-system)
738 (arguments
739 `(#:python ,python-2
740 #:phases
741 (modify-phases %standard-phases
742 ;; Generate an installable executable script wrapper.
743 (add-after 'unpack 'patch-setup.py
744 (lambda _
745 (substitute* "setup.py"
746 (("^(.*)packages=.*" line prefix)
747 (string-append line "\n"
748 prefix "scripts=['scripts/TE.py'],\n")))
749 #t)))))
750 (inputs
751 `(("python-numpy" ,python2-numpy)
752 ("python-matplotlib" ,python2-matplotlib)
753 ("python-scipy" ,python2-scipy)
754 ("python-statsmodels" ,python2-statsmodels)))
755 (native-inputs
756 `(("python-mock" ,python2-mock)
757 ("python-nose" ,python2-nose)))
758 (home-page "https://public.bmi.inf.ethz.ch/user/zhongy/RiboDiff/")
759 (synopsis "Detect translation efficiency changes from ribosome footprints")
760 (description "RiboDiff is a statistical tool that detects the protein
761 translational efficiency change from Ribo-Seq (ribosome footprinting) and
762 RNA-Seq data. It uses a generalized linear model to detect genes showing
763 difference in translational profile taking mRNA abundance into account. It
764 facilitates us to decipher the translational regulation that behave
765 independently with transcriptional regulation.")
766 (license license:gpl3+)))
767
768 (define-public bioawk
769 (package
770 (name "bioawk")
771 (version "1.0")
772 (source (origin
773 (method git-fetch)
774 (uri (git-reference
775 (url "https://github.com/lh3/bioawk")
776 (commit (string-append "v" version))))
777 (file-name (git-file-name name version))
778 (sha256
779 (base32
780 "1pxc3zdnirxbf9a0az698hd8xdik7qkhypm7v6hn922x8y9qmspm"))))
781 (build-system gnu-build-system)
782 (inputs
783 `(("zlib" ,zlib)))
784 (native-inputs
785 `(("bison" ,bison)))
786 (arguments
787 `(#:tests? #f ; There are no tests to run.
788 ;; Bison must generate files, before other targets can build.
789 #:parallel-build? #f
790 #:phases
791 (modify-phases %standard-phases
792 (delete 'configure) ; There is no configure phase.
793 (replace 'install
794 (lambda* (#:key outputs #:allow-other-keys)
795 (let* ((out (assoc-ref outputs "out"))
796 (bin (string-append out "/bin"))
797 (man (string-append out "/share/man/man1")))
798 (mkdir-p man)
799 (copy-file "awk.1" (string-append man "/bioawk.1"))
800 (install-file "bioawk" bin))
801 #t)))))
802 (home-page "https://github.com/lh3/bioawk")
803 (synopsis "AWK with bioinformatics extensions")
804 (description "Bioawk is an extension to Brian Kernighan's awk, adding the
805 support of several common biological data formats, including optionally gzip'ed
806 BED, GFF, SAM, VCF, FASTA/Q and TAB-delimited formats with column names. It
807 also adds a few built-in functions and a command line option to use TAB as the
808 input/output delimiter. When the new functionality is not used, bioawk is
809 intended to behave exactly the same as the original BWK awk.")
810 (license license:x11)))
811
812 (define-public python-pybedtools
813 (package
814 (name "python-pybedtools")
815 (version "0.8.1")
816 (source (origin
817 (method url-fetch)
818 (uri (pypi-uri "pybedtools" version))
819 (sha256
820 (base32
821 "14w5i40gi25clrr7h4wa2pcpnyipya8hrqi7nq77553zc5wf0df0"))))
822 (build-system python-build-system)
823 (arguments
824 `(#:modules ((ice-9 ftw)
825 (srfi srfi-1)
826 (srfi srfi-26)
827 (guix build utils)
828 (guix build python-build-system))
829 ;; See https://github.com/daler/pybedtools/issues/192
830 #:phases
831 (modify-phases %standard-phases
832 (add-after 'unpack 'disable-broken-tests
833 (lambda _
834 (substitute* "pybedtools/test/test_scripts.py"
835 ;; This test freezes.
836 (("def test_intron_exon_reads")
837 "def _do_not_test_intron_exon_reads")
838 ;; This test fails in the Python 2 build.
839 (("def test_venn_mpl")
840 "def _do_not_test_venn_mpl"))
841 (substitute* "pybedtools/test/test_helpers.py"
842 ;; Requires internet access.
843 (("def test_chromsizes")
844 "def _do_not_test_chromsizes")
845 ;; Broken as a result of the workaround used in the check phase
846 ;; (see: https://github.com/daler/pybedtools/issues/192).
847 (("def test_getting_example_beds")
848 "def _do_not_test_getting_example_beds"))
849 ;; This issue still occurs on python2
850 (substitute* "pybedtools/test/test_issues.py"
851 (("def test_issue_303")
852 "def _test_issue_303"))
853 #t))
854 ;; TODO: Remove phase after it's part of PYTHON-BUILD-SYSTEM.
855 ;; build system.
856 ;; Force the Cythonization of C++ files to guard against compilation
857 ;; problems.
858 (add-after 'unpack 'remove-cython-generated-files
859 (lambda _
860 (let ((cython-sources (map (cut string-drop-right <> 4)
861 (find-files "." "\\.pyx$")))
862 (c/c++-files (find-files "." "\\.(c|cpp|cxx)$")))
863 (define (strip-extension filename)
864 (string-take filename (string-index-right filename #\.)))
865 (define (cythonized? c/c++-file)
866 (member (strip-extension c/c++-file) cython-sources))
867 (for-each delete-file (filter cythonized? c/c++-files))
868 #t)))
869 (add-after 'remove-cython-generated-files 'generate-cython-extensions
870 (lambda _
871 (invoke "python" "setup.py" "cythonize")))
872 (replace 'check
873 (lambda _
874 (let* ((cwd (getcwd))
875 (build-root-directory (string-append cwd "/build/"))
876 (build (string-append
877 build-root-directory
878 (find (cut string-prefix? "lib" <>)
879 (scandir (string-append
880 build-root-directory)))))
881 (scripts (string-append
882 build-root-directory
883 (find (cut string-prefix? "scripts" <>)
884 (scandir build-root-directory)))))
885 (setenv "PYTHONPATH"
886 (string-append build ":" (getenv "PYTHONPATH")))
887 ;; Executable scripts such as 'intron_exon_reads.py' must be
888 ;; available in the PATH.
889 (setenv "PATH"
890 (string-append scripts ":" (getenv "PATH"))))
891 ;; The tests need to be run from elsewhere...
892 (mkdir-p "/tmp/test")
893 (copy-recursively "pybedtools/test" "/tmp/test")
894 (with-directory-excursion "/tmp/test"
895 (invoke "pytest" "-v" "--doctest-modules")))))))
896 (propagated-inputs
897 `(("bedtools" ,bedtools)
898 ("samtools" ,samtools)
899 ("python-matplotlib" ,python-matplotlib)
900 ("python-pysam" ,python-pysam)
901 ("python-pyyaml" ,python-pyyaml)))
902 (native-inputs
903 `(("python-numpy" ,python-numpy)
904 ("python-pandas" ,python-pandas)
905 ("python-cython" ,python-cython)
906 ("kentutils" ,kentutils) ; for bedGraphToBigWig
907 ("python-six" ,python-six)
908 ;; For the test suite.
909 ("python-pytest" ,python-pytest)
910 ("python-psutil" ,python-psutil)))
911 (home-page "https://pythonhosted.org/pybedtools/")
912 (synopsis "Python wrapper for BEDtools programs")
913 (description
914 "pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
915 which are widely used for genomic interval manipulation or \"genome algebra\".
916 pybedtools extends BEDTools by offering feature-level manipulations from with
917 Python.")
918 (license license:gpl2+)))
919
920 (define-public python2-pybedtools
921 (let ((pybedtools (package-with-python2 python-pybedtools)))
922 (package
923 (inherit pybedtools)
924 (native-inputs
925 `(("python2-pathlib" ,python2-pathlib)
926 ,@(package-native-inputs pybedtools))))))
927
928 (define-public python-biom-format
929 (package
930 (name "python-biom-format")
931 (version "2.1.7")
932 (source
933 (origin
934 (method git-fetch)
935 ;; Use GitHub as source because PyPI distribution does not contain
936 ;; test data: https://github.com/biocore/biom-format/issues/693
937 (uri (git-reference
938 (url "https://github.com/biocore/biom-format")
939 (commit version)))
940 (file-name (git-file-name name version))
941 (sha256
942 (base32
943 "1rna16lyk5aqhnv0dp77wwaplias93f1vw28ad3jmyw6hwkai05v"))
944 (modules '((guix build utils)))
945 (snippet '(begin
946 ;; Delete generated C files.
947 (for-each delete-file (find-files "." "\\.c"))
948 #t))))
949 (build-system python-build-system)
950 (arguments
951 `(#:phases
952 (modify-phases %standard-phases
953 (add-after 'unpack 'use-cython
954 (lambda _ (setenv "USE_CYTHON" "1") #t))
955 (add-after 'unpack 'disable-broken-tests
956 (lambda _
957 (substitute* "biom/tests/test_cli/test_validate_table.py"
958 (("^(.+)def test_invalid_hdf5" m indent)
959 (string-append indent
960 "@npt.dec.skipif(True, msg='Guix')\n"
961 m)))
962 (substitute* "biom/tests/test_table.py"
963 (("^(.+)def test_from_hdf5_issue_731" m indent)
964 (string-append indent
965 "@npt.dec.skipif(True, msg='Guix')\n"
966 m)))
967 #t))
968 (add-before 'reset-gzip-timestamps 'make-files-writable
969 (lambda* (#:key outputs #:allow-other-keys)
970 (let ((out (assoc-ref outputs "out")))
971 (for-each (lambda (file) (chmod file #o644))
972 (find-files out "\\.gz"))
973 #t))))))
974 (propagated-inputs
975 `(("python-numpy" ,python-numpy)
976 ("python-scipy" ,python-scipy)
977 ("python-flake8" ,python-flake8)
978 ("python-future" ,python-future)
979 ("python-click" ,python-click)
980 ("python-h5py" ,python-h5py)
981 ;; FIXME: Upgrade to pandas 1.0 when
982 ;; https://github.com/biocore/biom-format/issues/837 is resolved.
983 ("python-pandas" ,python-pandas-0.25)))
984 (native-inputs
985 `(("python-cython" ,python-cython)
986 ("python-pytest" ,python-pytest)
987 ("python-pytest-cov" ,python-pytest-cov)
988 ("python-nose" ,python-nose)))
989 (home-page "http://www.biom-format.org")
990 (synopsis "Biological Observation Matrix (BIOM) format utilities")
991 (description
992 "The BIOM file format is designed to be a general-use format for
993 representing counts of observations e.g. operational taxonomic units, KEGG
994 orthology groups or lipid types, in one or more biological samples
995 e.g. microbiome samples, genomes, metagenomes.")
996 (license license:bsd-3)
997 (properties `((python2-variant . ,(delay python2-biom-format))))))
998
999 (define-public python2-biom-format
1000 (let ((base (package-with-python2 (strip-python2-variant python-biom-format))))
1001 (package
1002 (inherit base)
1003 (arguments
1004 (substitute-keyword-arguments (package-arguments base)
1005 ((#:phases phases)
1006 `(modify-phases ,phases
1007 ;; Do not require the unmaintained pyqi library.
1008 (add-after 'unpack 'remove-pyqi
1009 (lambda _
1010 (substitute* "setup.py"
1011 (("install_requires.append\\(\"pyqi\"\\)") "pass"))
1012 #t)))))))))
1013
1014 (define-public python-pairtools
1015 (package
1016 (name "python-pairtools")
1017 (version "0.3.0")
1018 (source (origin
1019 (method git-fetch)
1020 (uri (git-reference
1021 (url "https://github.com/mirnylab/pairtools")
1022 (commit (string-append "v" version))))
1023 (file-name (git-file-name name version))
1024 (sha256
1025 (base32
1026 "0gr8y13q7sd6yai6df4aavl2470n1f9s3cib6r473z4hr8hcbwmc"))))
1027 (build-system python-build-system)
1028 (arguments
1029 `(#:phases
1030 (modify-phases %standard-phases
1031 (add-after 'unpack 'fix-references
1032 (lambda _
1033 (substitute* '("pairtools/pairtools_merge.py"
1034 "pairtools/pairtools_sort.py")
1035 (("/bin/bash") (which "bash")))
1036 #t))
1037 (replace 'check
1038 (lambda* (#:key inputs outputs #:allow-other-keys)
1039 (add-installed-pythonpath inputs outputs)
1040 (with-directory-excursion "/tmp"
1041 (invoke "pytest" "-v")))))))
1042 (native-inputs
1043 `(("python-cython" ,python-cython)
1044 ("python-nose" ,python-nose)
1045 ("python-pytest" ,python-pytest)))
1046 (inputs
1047 `(("python" ,python-wrapper)))
1048 (propagated-inputs
1049 `(("htslib" ,htslib) ; for bgzip, looked up in PATH
1050 ("samtools" ,samtools) ; looked up in PATH
1051 ("lz4" ,lz4) ; for lz4c
1052 ("python-click" ,python-click)
1053 ("python-numpy" ,python-numpy)))
1054 (home-page "https://github.com/mirnylab/pairtools")
1055 (synopsis "Process mapped Hi-C data")
1056 (description "Pairtools is a simple and fast command-line framework to
1057 process sequencing data from a Hi-C experiment. Process pair-end sequence
1058 alignments and perform the following operations:
1059
1060 @itemize
1061 @item detect ligation junctions (a.k.a. Hi-C pairs) in aligned paired-end
1062 sequences of Hi-C DNA molecules
1063 @item sort @code{.pairs} files for downstream analyses
1064 @item detect, tag and remove PCR/optical duplicates
1065 @item generate extensive statistics of Hi-C datasets
1066 @item select Hi-C pairs given flexibly defined criteria
1067 @item restore @code{.sam} alignments from Hi-C pairs.
1068 @end itemize
1069 ")
1070 (license license:expat)))
1071
1072 (define-public bioperl-minimal
1073 (let* ((inputs `(("perl-module-build" ,perl-module-build)
1074 ("perl-data-stag" ,perl-data-stag)
1075 ("perl-libwww" ,perl-libwww)
1076 ("perl-uri" ,perl-uri)))
1077 (transitive-inputs
1078 (map (compose package-name cadr)
1079 (delete-duplicates
1080 (concatenate
1081 (map (compose package-transitive-target-inputs cadr) inputs))))))
1082 (package
1083 (name "bioperl-minimal")
1084 (version "1.7.0")
1085 (source
1086 (origin
1087 (method git-fetch)
1088 (uri (git-reference
1089 (url "https://github.com/bioperl/bioperl-live")
1090 (commit (string-append "release-"
1091 (string-map (lambda (c)
1092 (if (char=? c #\.)
1093 #\- c)) version)))))
1094 (file-name (git-file-name name version))
1095 (sha256
1096 (base32
1097 "0wl8yvzcls59pwwk6m8ahy87pwg6nnibzy5cldbvmcwg2x2w7783"))))
1098 (build-system perl-build-system)
1099 (arguments
1100 `(#:phases
1101 (modify-phases %standard-phases
1102 (add-after
1103 'install 'wrap-programs
1104 (lambda* (#:key outputs #:allow-other-keys)
1105 ;; Make sure all executables in "bin" find the required Perl
1106 ;; modules at runtime. As the PERL5LIB variable contains also
1107 ;; the paths of native inputs, we pick the transitive target
1108 ;; inputs from %build-inputs.
1109 (let* ((out (assoc-ref outputs "out"))
1110 (bin (string-append out "/bin/"))
1111 (path (string-join
1112 (cons (string-append out "/lib/perl5/site_perl")
1113 (map (lambda (name)
1114 (assoc-ref %build-inputs name))
1115 ',transitive-inputs))
1116 ":")))
1117 (for-each (lambda (file)
1118 (wrap-program file
1119 `("PERL5LIB" ":" prefix (,path))))
1120 (find-files bin "\\.pl$"))
1121 #t))))))
1122 (inputs inputs)
1123 (native-inputs
1124 `(("perl-test-most" ,perl-test-most)))
1125 (home-page "https://metacpan.org/release/BioPerl")
1126 (synopsis "Bioinformatics toolkit")
1127 (description
1128 "BioPerl is the product of a community effort to produce Perl code which
1129 is useful in biology. Examples include Sequence objects, Alignment objects
1130 and database searching objects. These objects not only do what they are
1131 advertised to do in the documentation, but they also interact - Alignment
1132 objects are made from the Sequence objects, Sequence objects have access to
1133 Annotation and SeqFeature objects and databases, Blast objects can be
1134 converted to Alignment objects, and so on. This means that the objects
1135 provide a coordinated and extensible framework to do computational biology.")
1136 (license license:perl-license))))
1137
1138 (define-public python-biopython
1139 (package
1140 (name "python-biopython")
1141 (version "1.70")
1142 (source (origin
1143 (method url-fetch)
1144 ;; use PyPi rather than biopython.org to ease updating
1145 (uri (pypi-uri "biopython" version))
1146 (sha256
1147 (base32
1148 "0nz4n9d2y2dg849gn1z0vjlkwcpzzkzy3fij7x94a6ixy2c54z2a"))))
1149 (build-system python-build-system)
1150 (arguments
1151 `(#:phases
1152 (modify-phases %standard-phases
1153 (add-before 'check 'set-home
1154 ;; Some tests require a home directory to be set.
1155 (lambda _ (setenv "HOME" "/tmp") #t)))))
1156 (propagated-inputs
1157 `(("python-numpy" ,python-numpy)))
1158 (home-page "https://biopython.org/")
1159 (synopsis "Tools for biological computation in Python")
1160 (description
1161 "Biopython is a set of tools for biological computation including parsers
1162 for bioinformatics files into Python data structures; interfaces to common
1163 bioinformatics programs; a standard sequence class and tools for performing
1164 common operations on them; code to perform data classification; code for
1165 dealing with alignments; code making it easy to split up parallelizable tasks
1166 into separate processes; and more.")
1167 (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
1168
1169 (define-public python2-biopython
1170 (package-with-python2 python-biopython))
1171
1172 (define-public python-fastalite
1173 (package
1174 (name "python-fastalite")
1175 (version "0.3")
1176 (source
1177 (origin
1178 (method url-fetch)
1179 (uri (pypi-uri "fastalite" version))
1180 (sha256
1181 (base32
1182 "1qli6pxp77i9xn2wfciq2zaxhl82bdxb33cpzqzj1z25yd036wqj"))))
1183 (build-system python-build-system)
1184 (arguments
1185 `(#:tests? #f)) ; Test data is not distributed.
1186 (home-page "https://github.com/nhoffman/fastalite")
1187 (synopsis "Simplest possible FASTA parser")
1188 (description "This library implements a FASTA and a FASTQ parser without
1189 relying on a complex dependency tree.")
1190 (license license:expat)))
1191
1192 (define-public python2-fastalite
1193 (package-with-python2 python-fastalite))
1194
1195 (define-public bpp-core
1196 ;; The last release was in 2014 and the recommended way to install from source
1197 ;; is to clone the git repository, so we do this.
1198 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1199 (let ((commit "7d8bced0d1a87291ea8dd7046b7fb5ff9c35c582"))
1200 (package
1201 (name "bpp-core")
1202 (version (string-append "2.2.0-1." (string-take commit 7)))
1203 (source (origin
1204 (method git-fetch)
1205 (uri (git-reference
1206 (url "http://biopp.univ-montp2.fr/git/bpp-core")
1207 (commit commit)))
1208 (file-name (string-append name "-" version "-checkout"))
1209 (sha256
1210 (base32
1211 "10djsq5vlnkilv436gnmh4irpk49v29pa69r6xiryg32xmvn909j"))))
1212 (build-system cmake-build-system)
1213 (arguments
1214 `(#:parallel-build? #f))
1215 (home-page "http://biopp.univ-montp2.fr")
1216 (synopsis "C++ libraries for Bioinformatics")
1217 (description
1218 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1219 analysis, phylogenetics, molecular evolution and population genetics. It is
1220 Object Oriented and is designed to be both easy to use and computer efficient.
1221 Bio++ intends to help programmers to write computer expensive programs, by
1222 providing them a set of re-usable tools.")
1223 (license license:cecill-c))))
1224
1225 (define-public bpp-phyl
1226 ;; The last release was in 2014 and the recommended way to install from source
1227 ;; is to clone the git repository, so we do this.
1228 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1229 (let ((commit "0c07167b629f68b569bf274d1ad0c4af83276ae2"))
1230 (package
1231 (name "bpp-phyl")
1232 (version (string-append "2.2.0-1." (string-take commit 7)))
1233 (source (origin
1234 (method git-fetch)
1235 (uri (git-reference
1236 (url "http://biopp.univ-montp2.fr/git/bpp-phyl")
1237 (commit commit)))
1238 (file-name (string-append name "-" version "-checkout"))
1239 (sha256
1240 (base32
1241 "1ssjgchzwj3iai26kyly7gwkdv8sk59nqhkb1wpap3sf5m6kyllh"))))
1242 (build-system cmake-build-system)
1243 (arguments
1244 `(#:parallel-build? #f
1245 ;; If out-of-source, test data is not copied into the build directory
1246 ;; so the tests fail.
1247 #:out-of-source? #f))
1248 (inputs
1249 `(("bpp-core" ,bpp-core)
1250 ("bpp-seq" ,bpp-seq)))
1251 (home-page "http://biopp.univ-montp2.fr")
1252 (synopsis "Bio++ phylogenetic Library")
1253 (description
1254 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1255 analysis, phylogenetics, molecular evolution and population genetics. This
1256 library provides phylogenetics-related modules.")
1257 (license license:cecill-c))))
1258
1259 (define-public bpp-popgen
1260 ;; The last release was in 2014 and the recommended way to install from source
1261 ;; is to clone the git repository, so we do this.
1262 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1263 (let ((commit "e472bac9b1a148803895d747cd6d0c5904f85d9f"))
1264 (package
1265 (name "bpp-popgen")
1266 (version (string-append "2.2.0-1." (string-take commit 7)))
1267 (source (origin
1268 (method git-fetch)
1269 (uri (git-reference
1270 (url "http://biopp.univ-montp2.fr/git/bpp-popgen")
1271 (commit commit)))
1272 (file-name (string-append name "-" version "-checkout"))
1273 (sha256
1274 (base32
1275 "0yn82dzn1n5629nzja68xfrhi655709rjanyryb36vzkmymy6dw5"))))
1276 (build-system cmake-build-system)
1277 (arguments
1278 `(#:parallel-build? #f
1279 #:tests? #f)) ; There are no tests.
1280 (inputs
1281 `(("bpp-core" ,bpp-core)
1282 ("bpp-seq" ,bpp-seq)))
1283 (home-page "http://biopp.univ-montp2.fr")
1284 (synopsis "Bio++ population genetics library")
1285 (description
1286 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1287 analysis, phylogenetics, molecular evolution and population genetics. This
1288 library provides population genetics-related modules.")
1289 (license license:cecill-c))))
1290
1291 (define-public bpp-seq
1292 ;; The last release was in 2014 and the recommended way to install from source
1293 ;; is to clone the git repository, so we do this.
1294 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1295 (let ((commit "6cfa07965ce152e5598a89df2fa80a75973bfa33"))
1296 (package
1297 (name "bpp-seq")
1298 (version (string-append "2.2.0-1." (string-take commit 7)))
1299 (source (origin
1300 (method git-fetch)
1301 (uri (git-reference
1302 (url "http://biopp.univ-montp2.fr/git/bpp-seq")
1303 (commit commit)))
1304 (file-name (string-append name "-" version "-checkout"))
1305 (sha256
1306 (base32
1307 "1nys5jq7jqvdg40d91wsmj3q2yzy4276cp7sp44n67p468f27zf2"))))
1308 (build-system cmake-build-system)
1309 (arguments
1310 `(#:parallel-build? #f
1311 ;; If out-of-source, test data is not copied into the build directory
1312 ;; so the tests fail.
1313 #:out-of-source? #f))
1314 (inputs
1315 `(("bpp-core" ,bpp-core)))
1316 (home-page "http://biopp.univ-montp2.fr")
1317 (synopsis "Bio++ sequence library")
1318 (description
1319 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1320 analysis, phylogenetics, molecular evolution and population genetics. This
1321 library provides sequence-related modules.")
1322 (license license:cecill-c))))
1323
1324 (define-public bppsuite
1325 ;; The last release was in 2014 and the recommended way to install from source
1326 ;; is to clone the git repository, so we do this.
1327 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1328 (let ((commit "c516147f57aa50961121cd505bed52cd7603698b"))
1329 (package
1330 (name "bppsuite")
1331 (version (string-append "2.2.0-1." (string-take commit 7)))
1332 (source (origin
1333 (method git-fetch)
1334 (uri (git-reference
1335 (url "http://biopp.univ-montp2.fr/git/bppsuite")
1336 (commit commit)))
1337 (file-name (string-append name "-" version "-checkout"))
1338 (sha256
1339 (base32
1340 "1y87pxvw0jxjizhq2dr9g2r91md45k1p9ih2sl1yy1y3p934l2kb"))))
1341 (build-system cmake-build-system)
1342 (arguments
1343 `(#:parallel-build? #f
1344 #:tests? #f)) ; There are no tests.
1345 (native-inputs
1346 `(("groff" ,groff)
1347 ("man-db" ,man-db)
1348 ("texinfo" ,texinfo)))
1349 (inputs
1350 `(("bpp-core" ,bpp-core)
1351 ("bpp-seq" ,bpp-seq)
1352 ("bpp-phyl" ,bpp-phyl)
1353 ("bpp-phyl" ,bpp-popgen)))
1354 (home-page "http://biopp.univ-montp2.fr")
1355 (synopsis "Bioinformatics tools written with the Bio++ libraries")
1356 (description
1357 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1358 analysis, phylogenetics, molecular evolution and population genetics. This
1359 package provides command line tools using the Bio++ library.")
1360 (license license:cecill-c))))
1361
1362 (define-public blast+
1363 (package
1364 (name "blast+")
1365 (version "2.10.1")
1366 (source (origin
1367 (method url-fetch)
1368 (uri (string-append
1369 "https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
1370 version "/ncbi-blast-" version "+-src.tar.gz"))
1371 (sha256
1372 (base32
1373 "11kvrrl0mcwww6530r55hccpg3x3msmhr3051fwnjbq8rzg2j1qi"))
1374 (modules '((guix build utils)))
1375 (snippet
1376 '(begin
1377 ;; Remove bundled bzip2, zlib and pcre.
1378 (delete-file-recursively "c++/src/util/compress/bzip2")
1379 (delete-file-recursively "c++/src/util/compress/zlib")
1380 (delete-file-recursively "c++/src/util/regexp")
1381 (substitute* "c++/src/util/compress/Makefile.in"
1382 (("bzip2 zlib api") "api"))
1383 ;; Remove useless msbuild directory
1384 (delete-file-recursively
1385 "c++/src/build-system/project_tree_builder/msbuild")
1386 #t))))
1387 (build-system gnu-build-system)
1388 (arguments
1389 `(;; There are two(!) tests for this massive library, and both fail with
1390 ;; "unparsable timing stats".
1391 ;; ERR [127] -- [serial/datatool] datatool.sh (unparsable timing stats)
1392 ;; ERR [127] -- [serial/datatool] datatool_xml.sh (unparsable timing stats)
1393 #:tests? #f
1394 #:out-of-source? #t
1395 #:parallel-build? #f ; not supported
1396 #:phases
1397 (modify-phases %standard-phases
1398 (add-before 'configure 'set-HOME
1399 ;; $HOME needs to be set at some point during the configure phase
1400 (lambda _ (setenv "HOME" "/tmp") #t))
1401 (add-after 'unpack 'enter-dir
1402 (lambda _ (chdir "c++") #t))
1403 (add-after 'enter-dir 'fix-build-system
1404 (lambda _
1405 (define (which* cmd)
1406 (cond ((string=? cmd "date")
1407 ;; make call to "date" deterministic
1408 "date -d @0")
1409 ((which cmd)
1410 => identity)
1411 (else
1412 (format (current-error-port)
1413 "WARNING: Unable to find absolute path for ~s~%"
1414 cmd)
1415 #f)))
1416
1417 ;; Rewrite hardcoded paths to various tools
1418 (substitute* (append '("src/build-system/configure.ac"
1419 "src/build-system/configure"
1420 "src/build-system/helpers/run_with_lock.c"
1421 "scripts/common/impl/if_diff.sh"
1422 "scripts/common/impl/run_with_lock.sh"
1423 "src/build-system/Makefile.configurables.real"
1424 "src/build-system/Makefile.in.top"
1425 "src/build-system/Makefile.meta.gmake=no"
1426 "src/build-system/Makefile.meta.in"
1427 "src/build-system/Makefile.meta_l"
1428 "src/build-system/Makefile.meta_p"
1429 "src/build-system/Makefile.meta_r"
1430 "src/build-system/Makefile.mk.in"
1431 "src/build-system/Makefile.requirements"
1432 "src/build-system/Makefile.rules_with_autodep.in")
1433 (find-files "scripts/common/check" "\\.sh$"))
1434 (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
1435 (or (which* cmd) all)))
1436
1437 (substitute* (find-files "src/build-system" "^config.*")
1438 (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
1439 (("^PATH=.*") ""))
1440
1441 ;; rewrite "/var/tmp" in check script
1442 (substitute* "scripts/common/check/check_make_unix.sh"
1443 (("/var/tmp") "/tmp"))
1444
1445 ;; do not reset PATH
1446 (substitute* (find-files "scripts/common/impl/" "\\.sh$")
1447 (("^ *PATH=.*") "")
1448 (("action=/bin/") "action=")
1449 (("export PATH") ":"))
1450 #t))
1451 (replace 'configure
1452 (lambda* (#:key inputs outputs #:allow-other-keys)
1453 (let ((out (assoc-ref outputs "out"))
1454 (lib (string-append (assoc-ref outputs "lib") "/lib"))
1455 (include (string-append (assoc-ref outputs "include")
1456 "/include/ncbi-tools++")))
1457 ;; The 'configure' script doesn't recognize things like
1458 ;; '--enable-fast-install'.
1459 (invoke "./configure.orig"
1460 (string-append "--with-build-root=" (getcwd) "/build")
1461 (string-append "--prefix=" out)
1462 (string-append "--libdir=" lib)
1463 (string-append "--includedir=" include)
1464 (string-append "--with-bz2="
1465 (assoc-ref inputs "bzip2"))
1466 (string-append "--with-z="
1467 (assoc-ref inputs "zlib"))
1468 (string-append "--with-pcre="
1469 (assoc-ref inputs "pcre"))
1470 ;; Each library is built twice by default, once
1471 ;; with "-static" in its name, and again
1472 ;; without.
1473 "--without-static"
1474 "--with-dll")
1475 #t))))))
1476 (outputs '("out" ; 21 MB
1477 "lib" ; 226 MB
1478 "include")) ; 33 MB
1479 (inputs
1480 `(("bzip2" ,bzip2)
1481 ("lmdb" ,lmdb)
1482 ("zlib" ,zlib)
1483 ("pcre" ,pcre)
1484 ("perl" ,perl)
1485 ("python" ,python-wrapper)))
1486 (native-inputs
1487 `(("cpio" ,cpio)))
1488 (home-page "https://blast.ncbi.nlm.nih.gov")
1489 (synopsis "Basic local alignment search tool")
1490 (description
1491 "BLAST is a popular method of performing a DNA or protein sequence
1492 similarity search, using heuristics to produce results quickly. It also
1493 calculates an “expect value” that estimates how many matches would have
1494 occurred at a given score by chance, which can aid a user in judging how much
1495 confidence to have in an alignment.")
1496 ;; Most of the sources are in the public domain, with the following
1497 ;; exceptions:
1498 ;; * Expat:
1499 ;; * ./c++/include/util/bitset/
1500 ;; * ./c++/src/html/ncbi_menu*.js
1501 ;; * Boost license:
1502 ;; * ./c++/include/util/impl/floating_point_comparison.hpp
1503 ;; * LGPL 2+:
1504 ;; * ./c++/include/dbapi/driver/odbc/unix_odbc/
1505 ;; * ASL 2.0:
1506 ;; * ./c++/src/corelib/teamcity_*
1507 (license (list license:public-domain
1508 license:expat
1509 license:boost1.0
1510 license:lgpl2.0+
1511 license:asl2.0))))
1512
1513 (define-public bless
1514 (package
1515 (name "bless")
1516 (version "1p02")
1517 (source (origin
1518 (method url-fetch)
1519 (uri (string-append "mirror://sourceforge/bless-ec/bless.v"
1520 version ".tgz"))
1521 (sha256
1522 (base32
1523 "0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
1524 (modules '((guix build utils)))
1525 (snippet
1526 `(begin
1527 ;; Remove bundled boost, pigz, zlib, and .git directory
1528 ;; FIXME: also remove bundled sources for murmurhash3 and
1529 ;; kmc once packaged.
1530 (delete-file-recursively "boost")
1531 (delete-file-recursively "pigz")
1532 (delete-file-recursively "google-sparsehash")
1533 (delete-file-recursively "zlib")
1534 (delete-file-recursively ".git")
1535 #t))))
1536 (build-system gnu-build-system)
1537 (arguments
1538 '(#:tests? #f ;no "check" target
1539 #:make-flags
1540 (list (string-append "ZLIB="
1541 (assoc-ref %build-inputs "zlib:static")
1542 "/lib/libz.a")
1543 (string-append "LDFLAGS="
1544 (string-join '("-lboost_filesystem"
1545 "-lboost_system"
1546 "-lboost_iostreams"
1547 "-lz"
1548 "-fopenmp"))))
1549 #:phases
1550 (modify-phases %standard-phases
1551 (add-after 'unpack 'do-not-build-bundled-pigz
1552 (lambda* (#:key inputs outputs #:allow-other-keys)
1553 (substitute* "Makefile"
1554 (("cd pigz/pigz-2.3.3; make") ""))
1555 #t))
1556 (add-after 'unpack 'patch-paths-to-executables
1557 (lambda* (#:key inputs outputs #:allow-other-keys)
1558 (substitute* "parse_args.cpp"
1559 (("kmc_binary = .*")
1560 (string-append "kmc_binary = \""
1561 (assoc-ref outputs "out")
1562 "/bin/kmc\";"))
1563 (("pigz_binary = .*")
1564 (string-append "pigz_binary = \""
1565 (assoc-ref inputs "pigz")
1566 "/bin/pigz\";")))
1567 #t))
1568 (replace 'install
1569 (lambda* (#:key outputs #:allow-other-keys)
1570 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
1571 (for-each (lambda (file)
1572 (install-file file bin))
1573 '("bless" "kmc/bin/kmc"))
1574 #t)))
1575 (delete 'configure))))
1576 (native-inputs
1577 `(("perl" ,perl)))
1578 (inputs
1579 `(("openmpi" ,openmpi)
1580 ("boost" ,boost)
1581 ("sparsehash" ,sparsehash)
1582 ("pigz" ,pigz)
1583 ("zlib:static" ,zlib "static")
1584 ("zlib" ,zlib)))
1585 (supported-systems '("x86_64-linux"))
1586 (home-page "https://sourceforge.net/p/bless-ec/wiki/Home/")
1587 (synopsis "Bloom-filter-based error correction tool for NGS reads")
1588 (description
1589 "@dfn{Bloom-filter-based error correction solution for high-throughput
1590 sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
1591 correction tool for genomic reads produced by @dfn{Next-generation
1592 sequencing} (NGS). BLESS produces accurate correction results with much less
1593 memory compared with previous solutions and is also able to tolerate a higher
1594 false-positive rate. BLESS can extend reads like DNA assemblers to correct
1595 errors at the end of reads.")
1596 (license license:gpl3+)))
1597
1598 (define-public bowtie
1599 (package
1600 (name "bowtie")
1601 (version "2.3.4.3")
1602 (source (origin
1603 (method git-fetch)
1604 (uri (git-reference
1605 (url "https://github.com/BenLangmead/bowtie2")
1606 (commit (string-append "v" version))))
1607 (file-name (git-file-name name version))
1608 (sha256
1609 (base32
1610 "1zl3cf327y2p7p03cavymbh7b00djc7lncfaqih33n96iy9q8ibp"))
1611 (modules '((guix build utils)))
1612 (snippet
1613 '(begin
1614 (substitute* "Makefile"
1615 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
1616 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
1617 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
1618 #t))))
1619 (build-system gnu-build-system)
1620 (arguments
1621 '(#:make-flags
1622 (list "allall"
1623 "WITH_TBB=1"
1624 (string-append "prefix=" (assoc-ref %outputs "out")))
1625 #:phases
1626 (modify-phases %standard-phases
1627 (delete 'configure)
1628 (replace 'check
1629 (lambda _
1630 (invoke "perl"
1631 "scripts/test/simple_tests.pl"
1632 "--bowtie2=./bowtie2"
1633 "--bowtie2-build=./bowtie2-build")
1634 #t)))))
1635 (inputs
1636 `(("tbb" ,tbb)
1637 ("zlib" ,zlib)
1638 ("python" ,python-wrapper)))
1639 (native-inputs
1640 `(("perl" ,perl)
1641 ("perl-clone" ,perl-clone)
1642 ("perl-test-deep" ,perl-test-deep)
1643 ("perl-test-simple" ,perl-test-simple)))
1644 (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
1645 (synopsis "Fast and sensitive nucleotide sequence read aligner")
1646 (description
1647 "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
1648 reads to long reference sequences. It is particularly good at aligning reads
1649 of about 50 up to 100s or 1,000s of characters, and particularly good at
1650 aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
1651 genome with an FM Index to keep its memory footprint small: for the human
1652 genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
1653 gapped, local, and paired-end alignment modes.")
1654 (supported-systems '("x86_64-linux"))
1655 (license license:gpl3+)))
1656
1657 (define-public bowtie1
1658 (package
1659 (name "bowtie1")
1660 (version "1.3.0")
1661 (source (origin
1662 (method url-fetch)
1663 (uri (string-append "mirror://sourceforge/bowtie-bio/bowtie/"
1664 version "/bowtie-" version "-src.zip"))
1665 (sha256
1666 (base32
1667 "11dbihdnrizc6qhx9xsw77w3q5ssx642alaqzvhxx32ak9glvq04"))
1668 (modules '((guix build utils)))
1669 (snippet
1670 '(substitute* "Makefile"
1671 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
1672 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
1673 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))))
1674 (build-system gnu-build-system)
1675 (arguments
1676 '(#:tests? #f ; no "check" target
1677 #:make-flags
1678 (list "CC=gcc" "all"
1679 (string-append "prefix=" (assoc-ref %outputs "out")))
1680 #:phases
1681 (modify-phases %standard-phases
1682 (delete 'configure))))
1683 (inputs
1684 `(("python-wrapper" ,python-wrapper)
1685 ("tbb" ,tbb)
1686 ("zlib" ,zlib)))
1687 (supported-systems '("x86_64-linux"))
1688 (home-page "http://bowtie-bio.sourceforge.net/index.shtml")
1689 (synopsis "Fast aligner for short nucleotide sequence reads")
1690 (description
1691 "Bowtie is a fast, memory-efficient short read aligner. It aligns short
1692 DNA sequences (reads) to the human genome at a rate of over 25 million 35-bp
1693 reads per hour. Bowtie indexes the genome with a Burrows-Wheeler index to
1694 keep its memory footprint small: typically about 2.2 GB for the human
1695 genome (2.9 GB for paired-end).")
1696 (license license:artistic2.0)))
1697
1698 (define-public tophat
1699 (package
1700 (name "tophat")
1701 (version "2.1.1")
1702 (source (origin
1703 (method url-fetch)
1704 (uri (string-append
1705 "http://ccb.jhu.edu/software/tophat/downloads/tophat-"
1706 version ".tar.gz"))
1707 (sha256
1708 (base32
1709 "19add02kv2xhd6ihd779dr7x35ggym3jqr0m5c4315i1yfb0p11p"))
1710 (modules '((guix build utils)))
1711 (snippet
1712 '(begin
1713 ;; Remove bundled SeqAn and samtools
1714 (delete-file-recursively "src/SeqAn-1.4.2")
1715 (delete-file-recursively "src/samtools-0.1.18")
1716 #t))))
1717 (build-system gnu-build-system)
1718 (arguments
1719 '(#:parallel-build? #f ; not supported
1720 #:phases
1721 (modify-phases %standard-phases
1722 (add-after 'set-paths 'hide-default-gcc
1723 (lambda* (#:key inputs #:allow-other-keys)
1724 (let ((gcc (assoc-ref inputs "gcc")))
1725 ;; Remove the default GCC from CPLUS_INCLUDE_PATH to prevent
1726 ;; conflicts with the GCC 5 input.
1727 (setenv "CPLUS_INCLUDE_PATH"
1728 (string-join
1729 (delete (string-append gcc "/include/c++")
1730 (string-split (getenv "CPLUS_INCLUDE_PATH") #\:))
1731 ":"))
1732 #t)))
1733 (add-after 'unpack 'use-system-samtools
1734 (lambda* (#:key inputs #:allow-other-keys)
1735 (substitute* "src/Makefile.in"
1736 (("(noinst_LIBRARIES = )\\$\\(SAMLIB\\)" _ prefix) prefix)
1737 (("\\$\\(SAMPROG\\): \\$\\(SAMLIB\\)") "")
1738 (("SAMPROG = samtools_0\\.1\\.18") "")
1739 (("\\$\\(samtools_0_1_18_SOURCES\\)") "")
1740 (("am__EXEEXT_1 = samtools_0\\.1\\.18\\$\\(EXEEXT\\)") ""))
1741 (substitute* '("src/common.cpp"
1742 "src/tophat.py")
1743 (("samtools_0.1.18") (which "samtools")))
1744 (substitute* '("src/common.h"
1745 "src/bam2fastx.cpp")
1746 (("#include \"bam.h\"") "#include <samtools/bam.h>")
1747 (("#include \"sam.h\"") "#include <samtools/sam.h>"))
1748 (substitute* '("src/bwt_map.h"
1749 "src/map2gtf.h"
1750 "src/align_status.h")
1751 (("#include <bam.h>") "#include <samtools/bam.h>")
1752 (("#include <sam.h>") "#include <samtools/sam.h>"))
1753 #t)))))
1754 (native-inputs
1755 `(("gcc@5" ,gcc-5))) ;; doesn't build with later versions
1756 (inputs
1757 `(("boost" ,boost)
1758 ("bowtie" ,bowtie)
1759 ("ncurses" ,ncurses)
1760 ("perl" ,perl)
1761 ("python" ,python-2)
1762 ("samtools" ,samtools-0.1)
1763 ("seqan" ,seqan-1)
1764 ("zlib" ,zlib)))
1765 (home-page "https://ccb.jhu.edu/software/tophat/index.shtml")
1766 (synopsis "Spliced read mapper for RNA-Seq data")
1767 (description
1768 "TopHat is a fast splice junction mapper for nucleotide sequence
1769 reads produced by the RNA-Seq method. It aligns RNA-Seq reads to
1770 mammalian-sized genomes using the ultra high-throughput short read
1771 aligner Bowtie, and then analyzes the mapping results to identify
1772 splice junctions between exons.")
1773 ;; TopHat is released under the Boost Software License, Version 1.0
1774 ;; See https://github.com/infphilo/tophat/issues/11#issuecomment-121589893
1775 (license license:boost1.0)))
1776
1777 (define-public bwa
1778 (package
1779 (name "bwa")
1780 (version "0.7.17")
1781 (source (origin
1782 (method url-fetch)
1783 (uri (string-append
1784 "https://github.com/lh3/bwa/releases/download/v"
1785 version "/bwa-" version ".tar.bz2"))
1786 (sha256
1787 (base32
1788 "1zfhv2zg9v1icdlq4p9ssc8k01mca5d1bd87w71py2swfi74s6yy"))))
1789 (build-system gnu-build-system)
1790 (arguments
1791 '(#:tests? #f ;no "check" target
1792 #:phases
1793 (modify-phases %standard-phases
1794 (replace 'install
1795 (lambda* (#:key outputs #:allow-other-keys)
1796 (let* ((out (assoc-ref outputs "out"))
1797 (bin (string-append out "/bin"))
1798 (lib (string-append out "/lib"))
1799 (doc (string-append out "/share/doc/bwa"))
1800 (man (string-append out "/share/man/man1")))
1801 (install-file "bwa" bin)
1802 (install-file "libbwa.a" lib)
1803 (install-file "README.md" doc)
1804 (install-file "bwa.1" man))
1805 #t))
1806 ;; no "configure" script
1807 (delete 'configure))))
1808 (inputs `(("zlib" ,zlib)))
1809 ;; Non-portable SSE instructions are used so building fails on platforms
1810 ;; other than x86_64.
1811 (supported-systems '("x86_64-linux"))
1812 (home-page "http://bio-bwa.sourceforge.net/")
1813 (synopsis "Burrows-Wheeler sequence aligner")
1814 (description
1815 "BWA is a software package for mapping low-divergent sequences against a
1816 large reference genome, such as the human genome. It consists of three
1817 algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
1818 designed for Illumina sequence reads up to 100bp, while the rest two for
1819 longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
1820 features such as long-read support and split alignment, but BWA-MEM, which is
1821 the latest, is generally recommended for high-quality queries as it is faster
1822 and more accurate. BWA-MEM also has better performance than BWA-backtrack for
1823 70-100bp Illumina reads.")
1824 (license license:gpl3+)))
1825
1826 (define-public bwa-pssm
1827 (package (inherit bwa)
1828 (name "bwa-pssm")
1829 (version "0.5.11")
1830 (source (origin
1831 (method git-fetch)
1832 (uri (git-reference
1833 (url "https://github.com/pkerpedjiev/bwa-pssm")
1834 (commit version)))
1835 (file-name (git-file-name name version))
1836 (sha256
1837 (base32
1838 "076c4q0cdqz8jgylb067y9zmvxglppnzi3qiscn0xiypgc6lgb5r"))))
1839 (build-system gnu-build-system)
1840 (inputs
1841 `(("gdsl" ,gdsl)
1842 ("zlib" ,zlib)
1843 ("perl" ,perl)))
1844 (home-page "http://bwa-pssm.binf.ku.dk/")
1845 (synopsis "Burrows-Wheeler transform-based probabilistic short read mapper")
1846 (description
1847 "BWA-PSSM is a probabilistic short genomic sequence read aligner based on
1848 the use of @dfn{position specific scoring matrices} (PSSM). Like many of the
1849 existing aligners it is fast and sensitive. Unlike most other aligners,
1850 however, it is also adaptible in the sense that one can direct the alignment
1851 based on known biases within the data set. It is coded as a modification of
1852 the original BWA alignment program and shares the genome index structure as
1853 well as many of the command line options.")
1854 (license license:gpl3+)))
1855
1856 (define-public bwa-meth
1857 (package
1858 (name "bwa-meth")
1859 (version "0.2.2")
1860 (source (origin
1861 (method git-fetch)
1862 (uri (git-reference
1863 (url "https://github.com/brentp/bwa-meth")
1864 (commit (string-append "v" version))))
1865 (file-name (git-file-name name version))
1866 (sha256
1867 (base32
1868 "17j31i7zws5j7mhsq9x3qgkxly6mlmrgwhfq0qbflgxrmx04yaiz"))))
1869 (build-system python-build-system)
1870 (arguments
1871 `(#:phases
1872 (modify-phases %standard-phases
1873 (add-after 'unpack 'keep-references-to-bwa
1874 (lambda* (#:key inputs #:allow-other-keys)
1875 (substitute* "bwameth.py"
1876 (("bwa (mem|index)" _ command)
1877 (string-append (which "bwa") " " command))
1878 ;; There's an ill-advised check for "samtools" on PATH.
1879 (("^checkX.*") ""))
1880 #t)))))
1881 (inputs
1882 `(("bwa" ,bwa)))
1883 (native-inputs
1884 `(("python-toolshed" ,python-toolshed)))
1885 (home-page "https://github.com/brentp/bwa-meth")
1886 (synopsis "Fast and accurante alignment of BS-Seq reads")
1887 (description
1888 "BWA-Meth works for single-end reads and for paired-end reads from the
1889 directional protocol (most common). It uses the method employed by
1890 methylcoder and Bismark of in silico conversion of all C's to T's in both
1891 reference and reads. It recovers the original read (needed to tabulate
1892 methylation) by attaching it as a comment which BWA appends as a tag to the
1893 read. It performs favorably to existing aligners gauged by number of on and
1894 off-target reads for a capture method that targets CpG-rich region.")
1895 (license license:expat)))
1896
1897 (define-public python-bx-python
1898 (package
1899 (name "python-bx-python")
1900 (version "0.8.2")
1901 (source (origin
1902 (method url-fetch)
1903 (uri (pypi-uri "bx-python" version))
1904 (sha256
1905 (base32
1906 "11kksg2rbzihpmcid823xvg42xi88m7sz58rzk29abybkxy0rszs"))))
1907 (build-system python-build-system)
1908 ;; Tests fail because test data are not included
1909 (arguments '(#:tests? #f))
1910 (propagated-inputs
1911 `(("python-numpy" ,python-numpy)
1912 ("python-six" ,python-six)))
1913 (inputs
1914 `(("zlib" ,zlib)))
1915 (native-inputs
1916 `(("python-lzo" ,python-lzo)
1917 ("python-nose" ,python-nose)
1918 ("python-cython" ,python-cython)))
1919 (home-page "https://github.com/bxlab/bx-python")
1920 (synopsis "Tools for manipulating biological data")
1921 (description
1922 "bx-python provides tools for manipulating biological data, particularly
1923 multiple sequence alignments.")
1924 (license license:expat)))
1925
1926 (define-public python2-bx-python
1927 (package-with-python2 python-bx-python))
1928
1929 (define-public python-pysam
1930 (package
1931 (name "python-pysam")
1932 (version "0.15.1")
1933 (source (origin
1934 (method git-fetch)
1935 ;; Test data is missing on PyPi.
1936 (uri (git-reference
1937 (url "https://github.com/pysam-developers/pysam")
1938 (commit (string-append "v" version))))
1939 (file-name (git-file-name name version))
1940 (sha256
1941 (base32
1942 "1vj367w6xbn9bpmksm162l1aipf7cj97h1q83y7jcpm33ihwpf7x"))
1943 (modules '((guix build utils)))
1944 (snippet '(begin
1945 ;; Drop bundled htslib. TODO: Also remove samtools
1946 ;; and bcftools.
1947 (delete-file-recursively "htslib")
1948 #t))))
1949 (build-system python-build-system)
1950 (arguments
1951 `(#:modules ((ice-9 ftw)
1952 (srfi srfi-26)
1953 (guix build python-build-system)
1954 (guix build utils))
1955 #:phases
1956 (modify-phases %standard-phases
1957 (add-before 'build 'set-flags
1958 (lambda* (#:key inputs #:allow-other-keys)
1959 (setenv "HTSLIB_MODE" "external")
1960 (setenv "HTSLIB_LIBRARY_DIR"
1961 (string-append (assoc-ref inputs "htslib") "/lib"))
1962 (setenv "HTSLIB_INCLUDE_DIR"
1963 (string-append (assoc-ref inputs "htslib") "/include"))
1964 (setenv "LDFLAGS" "-lncurses")
1965 (setenv "CFLAGS" "-D_CURSES_LIB=1")
1966 #t))
1967 (replace 'check
1968 (lambda* (#:key inputs outputs #:allow-other-keys)
1969 ;; This file contains tests that require a connection to the
1970 ;; internet.
1971 (delete-file "tests/tabix_test.py")
1972 ;; FIXME: This test fails
1973 (delete-file "tests/AlignmentFile_test.py")
1974 ;; Add first subdirectory of "build" directory to PYTHONPATH.
1975 (setenv "PYTHONPATH"
1976 (string-append
1977 (getenv "PYTHONPATH")
1978 ":" (getcwd) "/build/"
1979 (car (scandir "build"
1980 (negate (cut string-prefix? "." <>))))))
1981 ;; Step out of source dir so python does not import from CWD.
1982 (with-directory-excursion "tests"
1983 (setenv "HOME" "/tmp")
1984 (invoke "make" "-C" "pysam_data")
1985 (invoke "make" "-C" "cbcf_data")
1986 ;; Running nosetests without explicitly asking for a single
1987 ;; process leads to a crash. Running with multiple processes
1988 ;; fails because the tests are not designed to run in parallel.
1989
1990 ;; FIXME: tests keep timing out on some systems.
1991 (invoke "nosetests" "-v" "--processes" "1")))))))
1992 (propagated-inputs
1993 `(("htslib" ,htslib-1.9))) ; Included from installed header files.
1994 (inputs
1995 `(("ncurses" ,ncurses)
1996 ("curl" ,curl)
1997 ("zlib" ,zlib)))
1998 (native-inputs
1999 `(("python-cython" ,python-cython)
2000 ;; Dependencies below are are for tests only.
2001 ("samtools" ,samtools-1.9)
2002 ("bcftools" ,bcftools-1.9)
2003 ("python-nose" ,python-nose)))
2004 (home-page "https://github.com/pysam-developers/pysam")
2005 (synopsis "Python bindings to the SAMtools C API")
2006 (description
2007 "Pysam is a Python module for reading and manipulating files in the
2008 SAM/BAM format. Pysam is a lightweight wrapper of the SAMtools C API. It
2009 also includes an interface for tabix.")
2010 (license license:expat)))
2011
2012 (define-public python2-pysam
2013 (package-with-python2 python-pysam))
2014
2015 (define-public python-twobitreader
2016 (package
2017 (name "python-twobitreader")
2018 (version "3.1.6")
2019 (source (origin
2020 (method git-fetch)
2021 (uri (git-reference
2022 (url "https://github.com/benjschiller/twobitreader")
2023 (commit version)))
2024 (file-name (git-file-name name version))
2025 (sha256
2026 (base32
2027 "1qbxvv1h58cismbk1anpjrkpghsaiy64a11ir3lhy6qch6xf8n62"))))
2028 (build-system python-build-system)
2029 ;; Tests are not included
2030 (arguments '(#:tests? #f))
2031 (native-inputs
2032 `(("python-sphinx" ,python-sphinx)))
2033 (home-page "https://github.com/benjschiller/twobitreader")
2034 (synopsis "Python library for reading .2bit files")
2035 (description
2036 "twobitreader is a Python library for reading .2bit files as used by the
2037 UCSC genome browser.")
2038 (license license:artistic2.0)))
2039
2040 (define-public python2-twobitreader
2041 (package-with-python2 python-twobitreader))
2042
2043 (define-public python-plastid
2044 (package
2045 (name "python-plastid")
2046 (version "0.4.8")
2047 (source (origin
2048 (method url-fetch)
2049 (uri (pypi-uri "plastid" version))
2050 (sha256
2051 (base32
2052 "0l24dd3q66if8yj042m4s0g95n6acn7im1imqd3p6h8ns43kxhj8"))))
2053 (build-system python-build-system)
2054 (arguments
2055 ;; Some test files are not included.
2056 `(#:tests? #f))
2057 (propagated-inputs
2058 `(("python-numpy" ,python-numpy)
2059 ("python-scipy" ,python-scipy)
2060 ("python-pandas" ,python-pandas)
2061 ("python-pysam" ,python-pysam)
2062 ("python-matplotlib" ,python-matplotlib)
2063 ("python-biopython" ,python-biopython)
2064 ("python-twobitreader" ,python-twobitreader)
2065 ("python-termcolor" ,python-termcolor)))
2066 (native-inputs
2067 `(("python-cython" ,python-cython)
2068 ("python-nose" ,python-nose)))
2069 (home-page "https://github.com/joshuagryphon/plastid")
2070 (synopsis "Python library for genomic analysis")
2071 (description
2072 "plastid is a Python library for genomic analysis – in particular,
2073 high-throughput sequencing data – with an emphasis on simplicity.")
2074 (license license:bsd-3)))
2075
2076 (define-public python2-plastid
2077 (package-with-python2 python-plastid))
2078
2079 (define-public tetoolkit
2080 (package
2081 (name "tetoolkit")
2082 (version "2.0.3")
2083 (source (origin
2084 (method git-fetch)
2085 (uri (git-reference
2086 (url "https://github.com/mhammell-laboratory/tetoolkit")
2087 (commit version)))
2088 (file-name (git-file-name name version))
2089 (sha256
2090 (base32
2091 "1yzi0kfpzip8zpjb82x1ik6h22yzfyjiz2dv85v6as2awwqvk807"))))
2092 (build-system python-build-system)
2093 (arguments
2094 `(#:python ,python-2 ; not guaranteed to work with Python 3
2095 #:phases
2096 (modify-phases %standard-phases
2097 (add-after 'unpack 'make-writable
2098 (lambda _
2099 (for-each make-file-writable (find-files "."))
2100 #t))
2101 (add-after 'unpack 'patch-invocations
2102 (lambda* (#:key inputs #:allow-other-keys)
2103 (substitute* '("bin/TEtranscripts"
2104 "bin/TEcount")
2105 (("'sort ")
2106 (string-append "'" (which "sort") " "))
2107 (("'rm -f ")
2108 (string-append "'" (which "rm") " -f "))
2109 (("'Rscript'") (string-append "'" (which "Rscript") "'")))
2110 (substitute* "TEToolkit/IO/ReadInputs.py"
2111 (("BamToBED") (which "bamToBed")))
2112 (substitute* "TEToolkit/Normalization.py"
2113 (("\"Rscript\"")
2114 (string-append "\"" (which "Rscript") "\"")))
2115 #t))
2116 (add-after 'install 'wrap-program
2117 (lambda* (#:key outputs #:allow-other-keys)
2118 ;; Make sure the executables find R packages.
2119 (let ((out (assoc-ref outputs "out")))
2120 (for-each
2121 (lambda (script)
2122 (wrap-program (string-append out "/bin/" script)
2123 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
2124 '("TEtranscripts"
2125 "TEcount")))
2126 #t)))))
2127 (inputs
2128 `(("coreutils" ,coreutils)
2129 ("bedtools" ,bedtools)
2130 ("python-argparse" ,python2-argparse)
2131 ("python-pysam" ,python2-pysam)
2132 ("r-minimal" ,r-minimal)
2133 ("r-deseq2" ,r-deseq2)))
2134 (home-page "https://github.com/mhammell-laboratory/tetoolkit")
2135 (synopsis "Transposable elements in differential enrichment analysis")
2136 (description
2137 "This is package for including transposable elements in differential
2138 enrichment analysis of sequencing datasets. TEtranscripts and TEcount take
2139 RNA-seq (and similar data) and annotates reads to both genes and transposable
2140 elements. TEtranscripts then performs differential analysis using DESeq2.
2141 Note that TEtranscripts and TEcount rely on specially curated GTF files, which
2142 are not included due to their size.")
2143 (license license:gpl3+)))
2144
2145 (define-public cd-hit
2146 (package
2147 (name "cd-hit")
2148 (version "4.6.8")
2149 (source (origin
2150 (method url-fetch)
2151 (uri (string-append "https://github.com/weizhongli/cdhit"
2152 "/releases/download/V" version
2153 "/cd-hit-v" version
2154 "-2017-0621-source.tar.gz"))
2155 (sha256
2156 (base32
2157 "1b4mwm2520ixjbw57sil20f9iixzw4bkdqqwgg1fc3pzm6rz4zmn"))))
2158 (build-system gnu-build-system)
2159 (arguments
2160 `(#:tests? #f ; there are no tests
2161 #:make-flags
2162 ;; Executables are copied directly to the PREFIX.
2163 (list (string-append "PREFIX=" (assoc-ref %outputs "out") "/bin")
2164 ;; Support longer sequences (e.g. Pacbio sequences)
2165 "MAX_SEQ=60000000")
2166 #:phases
2167 (modify-phases %standard-phases
2168 ;; No "configure" script
2169 (delete 'configure)
2170 ;; Remove sources of non-determinism
2171 (add-after 'unpack 'be-timeless
2172 (lambda _
2173 (substitute* "cdhit-utility.c++"
2174 ((" \\(built on \" __DATE__ \"\\)") ""))
2175 (substitute* "cdhit-common.c++"
2176 (("__DATE__") "\"0\"")
2177 (("\", %s, \" __TIME__ \"\\\\n\", date") ""))
2178 #t))
2179 ;; The "install" target does not create the target directory.
2180 (add-before 'install 'create-target-dir
2181 (lambda* (#:key outputs #:allow-other-keys)
2182 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
2183 #t)))))
2184 (inputs
2185 `(("perl" ,perl)))
2186 (home-page "http://weizhongli-lab.org/cd-hit/")
2187 (synopsis "Cluster and compare protein or nucleotide sequences")
2188 (description
2189 "CD-HIT is a program for clustering and comparing protein or nucleotide
2190 sequences. CD-HIT is designed to be fast and handle extremely large
2191 databases.")
2192 ;; The manual says: "It can be copied under the GNU General Public License
2193 ;; version 2 (GPLv2)."
2194 (license license:gpl2)))
2195
2196 (define-public clipper
2197 (package
2198 (name "clipper")
2199 (version "2.0")
2200 (source (origin
2201 (method git-fetch)
2202 (uri (git-reference
2203 (url "https://github.com/YeoLab/clipper")
2204 (commit version)))
2205 (file-name (git-file-name name version))
2206 (sha256
2207 (base32
2208 "1bcag4lb5bkzsj2vg7lrq24aw6yfgq275ifrbhd82l7kqgbbjbkv"))))
2209 (build-system python-build-system)
2210 (arguments
2211 `(#:phases
2212 (modify-phases %standard-phases
2213 (add-before 'reset-gzip-timestamps 'make-files-writable
2214 (lambda* (#:key outputs #:allow-other-keys)
2215 ;; Make sure .gz files are writable so that the
2216 ;; 'reset-gzip-timestamps' phase can do its work.
2217 (let ((out (assoc-ref outputs "out")))
2218 (for-each make-file-writable
2219 (find-files out "\\.gz$"))
2220 #t))))))
2221 (inputs
2222 `(("htseq" ,htseq)
2223 ("python-pybedtools" ,python-pybedtools)
2224 ("python-cython" ,python-cython)
2225 ("python-scikit-learn" ,python-scikit-learn)
2226 ("python-matplotlib" ,python-matplotlib)
2227 ("python-pandas" ,python-pandas)
2228 ("python-pysam" ,python-pysam)
2229 ("python-numpy" ,python-numpy)
2230 ("python-scipy" ,python-scipy)))
2231 (native-inputs
2232 `(("python-setuptools-git" ,python-setuptools-git)
2233 ("python-mock" ,python-mock) ; for tests
2234 ("python-nose" ,python-nose) ; for tests
2235 ("python-pytz" ,python-pytz))) ; for tests
2236 (home-page "https://github.com/YeoLab/clipper")
2237 (synopsis "CLIP peak enrichment recognition")
2238 (description
2239 "CLIPper is a tool to define peaks in CLIP-seq datasets.")
2240 (license license:gpl2)))
2241
2242 (define-public codingquarry
2243 (package
2244 (name "codingquarry")
2245 (version "2.0")
2246 (source (origin
2247 (method url-fetch)
2248 (uri (string-append
2249 "mirror://sourceforge/codingquarry/CodingQuarry_v"
2250 version ".tar.gz"))
2251 (sha256
2252 (base32
2253 "0115hkjflsnfzn36xppwf9h9avfxlavr43djqmshkkzbgjzsz60i"))))
2254 (build-system gnu-build-system)
2255 (arguments
2256 '(#:tests? #f ; no "check" target
2257 #:phases
2258 (modify-phases %standard-phases
2259 (delete 'configure)
2260 (replace 'install
2261 (lambda* (#:key outputs #:allow-other-keys)
2262 (let* ((out (assoc-ref outputs "out"))
2263 (bin (string-append out "/bin"))
2264 (doc (string-append out "/share/doc/codingquarry")))
2265 (install-file "INSTRUCTIONS.pdf" doc)
2266 (copy-recursively "QuarryFiles"
2267 (string-append out "/QuarryFiles"))
2268 (install-file "CodingQuarry" bin)
2269 (install-file "CufflinksGTF_to_CodingQuarryGFF3.py" bin))
2270 #t)))))
2271 (inputs `(("openmpi" ,openmpi)))
2272 (native-search-paths
2273 (list (search-path-specification
2274 (variable "QUARRY_PATH")
2275 (files '("QuarryFiles")))))
2276 (native-inputs `(("python" ,python-2))) ; Only Python 2 is supported
2277 (synopsis "Fungal gene predictor")
2278 (description "CodingQuarry is a highly accurate, self-training GHMM fungal
2279 gene predictor designed to work with assembled, aligned RNA-seq transcripts.")
2280 (home-page "https://sourceforge.net/projects/codingquarry/")
2281 (license license:gpl3+)))
2282
2283 (define-public couger
2284 (package
2285 (name "couger")
2286 (version "1.8.2")
2287 (source (origin
2288 (method url-fetch)
2289 (uri (string-append
2290 "http://couger.oit.duke.edu/static/assets/COUGER"
2291 version ".zip"))
2292 (sha256
2293 (base32
2294 "04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq"))))
2295 (build-system gnu-build-system)
2296 (arguments
2297 `(#:tests? #f
2298 #:phases
2299 (modify-phases %standard-phases
2300 (delete 'configure)
2301 (delete 'build)
2302 (replace
2303 'install
2304 (lambda* (#:key outputs #:allow-other-keys)
2305 (let* ((out (assoc-ref outputs "out"))
2306 (bin (string-append out "/bin")))
2307 (copy-recursively "src" (string-append out "/src"))
2308 (mkdir bin)
2309 ;; Add "src" directory to module lookup path.
2310 (substitute* "couger"
2311 (("from argparse")
2312 (string-append "import sys\nsys.path.append(\""
2313 out "\")\nfrom argparse")))
2314 (install-file "couger" bin))
2315 #t))
2316 (add-after
2317 'install 'wrap-program
2318 (lambda* (#:key inputs outputs #:allow-other-keys)
2319 ;; Make sure 'couger' runs with the correct PYTHONPATH.
2320 (let* ((out (assoc-ref outputs "out"))
2321 (path (getenv "PYTHONPATH")))
2322 (wrap-program (string-append out "/bin/couger")
2323 `("PYTHONPATH" ":" prefix (,path))))
2324 #t)))))
2325 (inputs
2326 `(("python" ,python-2)
2327 ("python2-pillow" ,python2-pillow)
2328 ("python2-numpy" ,python2-numpy)
2329 ("python2-scipy" ,python2-scipy)
2330 ("python2-matplotlib" ,python2-matplotlib)))
2331 (propagated-inputs
2332 `(("r-minimal" ,r-minimal)
2333 ("libsvm" ,libsvm)
2334 ("randomjungle" ,randomjungle)))
2335 (native-inputs
2336 `(("unzip" ,unzip)))
2337 (home-page "http://couger.oit.duke.edu")
2338 (synopsis "Identify co-factors in sets of genomic regions")
2339 (description
2340 "COUGER can be applied to any two sets of genomic regions bound by
2341 paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify
2342 putative co-factors that provide specificity to each TF. The framework
2343 determines the genomic targets uniquely-bound by each TF, and identifies a
2344 small set of co-factors that best explain the in vivo binding differences
2345 between the two TFs.
2346
2347 COUGER uses classification algorithms (support vector machines and random
2348 forests) with features that reflect the DNA binding specificities of putative
2349 co-factors. The features are generated either from high-throughput TF-DNA
2350 binding data (from protein binding microarray experiments), or from large
2351 collections of DNA motifs.")
2352 (license license:gpl3+)))
2353
2354 (define-public clustal-omega
2355 (package
2356 (name "clustal-omega")
2357 (version "1.2.4")
2358 (source (origin
2359 (method url-fetch)
2360 (uri (string-append "http://www.clustal.org/omega/clustal-omega-"
2361 version ".tar.gz"))
2362 (sha256
2363 (base32
2364 "1vm30mzncwdv881vrcwg11vzvrsmwy4wg80j5i0lcfk6dlld50w6"))))
2365 (build-system gnu-build-system)
2366 (inputs
2367 `(("argtable" ,argtable)))
2368 (home-page "http://www.clustal.org/omega/")
2369 (synopsis "Multiple sequence aligner for protein and DNA/RNA")
2370 (description
2371 "Clustal-Omega is a general purpose multiple sequence alignment (MSA)
2372 program for protein and DNA/RNA. It produces high quality MSAs and is capable
2373 of handling data-sets of hundreds of thousands of sequences in reasonable
2374 time.")
2375 (license license:gpl2+)))
2376
2377 (define-public crossmap
2378 (package
2379 (name "crossmap")
2380 (version "0.3.8")
2381 (source (origin
2382 (method url-fetch)
2383 (uri (pypi-uri "CrossMap" version))
2384 (sha256
2385 (base32
2386 "1sb2f2qbxya4fzw3yjl09vbrs8vfmw22zrygrvz004sf9gb1vkan"))))
2387 (build-system python-build-system)
2388 (inputs
2389 `(("python-bx-python" ,python-bx-python)
2390 ("python-numpy" ,python-numpy)
2391 ("python-pybigwig" ,python-pybigwig)
2392 ("python-pysam" ,python-pysam)
2393 ("zlib" ,zlib)))
2394 (native-inputs
2395 `(("python-cython" ,python-cython)
2396 ("python-nose" ,python-nose)))
2397 (home-page "http://crossmap.sourceforge.net/")
2398 (synopsis "Convert genome coordinates between assemblies")
2399 (description
2400 "CrossMap is a program for conversion of genome coordinates or annotation
2401 files between different genome assemblies. It supports most commonly used
2402 file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
2403 (license license:gpl2+)))
2404
2405 (define-public python-dnaio
2406 (package
2407 (name "python-dnaio")
2408 (version "0.3")
2409 (source
2410 (origin
2411 (method url-fetch)
2412 (uri (pypi-uri "dnaio" version))
2413 (sha256
2414 (base32
2415 "0f16m7hdlm0fz1n7y5asy0v9ghyrq17ni1p9iybq22ddzyd49r27"))))
2416 (build-system python-build-system)
2417 (native-inputs
2418 `(("python-cython" ,python-cython)
2419 ("python-pytest" ,python-pytest)
2420 ("python-xopen" ,python-xopen)))
2421 (home-page "https://github.com/marcelm/dnaio/")
2422 (synopsis "Read FASTA and FASTQ files efficiently")
2423 (description
2424 "dnaio is a Python library for fast parsing of FASTQ and also FASTA
2425 files. The code was previously part of the cutadapt tool.")
2426 (license license:expat)))
2427
2428 (define-public python-deeptoolsintervals
2429 (package
2430 (name "python-deeptoolsintervals")
2431 (version "0.1.9")
2432 (source (origin
2433 (method url-fetch)
2434 (uri (pypi-uri "deeptoolsintervals" version))
2435 (sha256
2436 (base32
2437 "1xnl80nblysj6dylj4683wgrfa425rkx4dp5k65hvwdns9pw753x"))))
2438 (build-system python-build-system)
2439 (inputs
2440 `(("zlib" ,zlib)))
2441 (home-page "https://github.com/deeptools/deeptools_intervals")
2442 (synopsis "Create GTF-based interval trees with associated meta-data")
2443 (description
2444 "This package provides a Python module creating/accessing GTF-based
2445 interval trees with associated meta-data. It is primarily used by the
2446 @code{deeptools} package.")
2447 (license license:expat)))
2448
2449 (define-public python-deeptools
2450 (package
2451 (name "python-deeptools")
2452 (version "3.4.3")
2453 (source (origin
2454 (method git-fetch)
2455 (uri (git-reference
2456 (url "https://github.com/deeptools/deepTools")
2457 (commit version)))
2458 (file-name (git-file-name name version))
2459 (sha256
2460 (base32
2461 "0l09vyynz6s6w7fnyd94rpys4a6aja6kp4gli64pngdxdz3md1nl"))))
2462 (build-system python-build-system)
2463 (native-inputs
2464 `(("python-mock" ,python-mock)
2465 ("python-nose" ,python-nose)))
2466 (propagated-inputs
2467 `(("python-matplotlib" ,python-matplotlib)
2468 ("python-numpy" ,python-numpy)
2469 ("python-numpydoc" ,python-numpydoc)
2470 ("python-py2bit" ,python-py2bit)
2471 ("python-pybigwig" ,python-pybigwig)
2472 ("python-pysam" ,python-pysam)
2473 ("python-scipy" ,python-scipy)
2474 ("python-deeptoolsintervals" ,python-deeptoolsintervals)
2475 ("python-plotly" ,python-plotly-2.4.1)))
2476 (home-page "https://pypi.org/project/deepTools/")
2477 (synopsis "Useful tools for exploring deep sequencing data")
2478 (description "This package addresses the challenge of handling large amounts
2479 of data that are now routinely generated from DNA sequencing centers.
2480 @code{deepTools} contains useful modules to process the mapped reads data for
2481 multiple quality checks, creating normalized coverage files in standard bedGraph
2482 and bigWig file formats, that allow comparison between different files. Finally,
2483 using such normalized and standardized files, deepTools can create many
2484 publication-ready visualizations to identify enrichments and for functional
2485 annotations of the genome.")
2486 ;; The file deeptools/cm.py is licensed under the BSD license. The
2487 ;; remainder of the code is licensed under the MIT license.
2488 (license (list license:bsd-3 license:expat))))
2489
2490 (define-deprecated deeptools python-deeptools)
2491
2492 (define-public cutadapt
2493 (package
2494 (name "cutadapt")
2495 (version "2.1")
2496 (source (origin
2497 (method url-fetch)
2498 (uri (pypi-uri "cutadapt" version))
2499 (sha256
2500 (base32
2501 "1vqmsfkm6llxzmsz9wcfcvzx9a9f8iabvwik2rbyn7nc4wm25z89"))))
2502 (build-system python-build-system)
2503 (inputs
2504 `(("python-dnaio" ,python-dnaio)
2505 ("python-xopen" ,python-xopen)))
2506 (native-inputs
2507 `(("python-cython" ,python-cython)
2508 ("python-pytest" ,python-pytest)
2509 ("python-setuptools-scm" ,python-setuptools-scm)))
2510 (home-page "https://cutadapt.readthedocs.io/en/stable/")
2511 (synopsis "Remove adapter sequences from nucleotide sequencing reads")
2512 (description
2513 "Cutadapt finds and removes adapter sequences, primers, poly-A tails and
2514 other types of unwanted sequence from high-throughput sequencing reads.")
2515 (license license:expat)))
2516
2517 (define-public libbigwig
2518 (package
2519 (name "libbigwig")
2520 (version "0.4.4")
2521 (source (origin
2522 (method git-fetch)
2523 (uri (git-reference
2524 (url "https://github.com/dpryan79/libBigWig")
2525 (commit version)))
2526 (file-name (git-file-name name version))
2527 (sha256
2528 (base32
2529 "09693dmf1scdac5pyq6qyn8b4mcipvnmc370k9a5z41z81m3dcsj"))))
2530 (build-system gnu-build-system)
2531 (arguments
2532 `(#:test-target "test"
2533 #:tests? #f ; tests require access to the web
2534 #:make-flags
2535 (list "CC=gcc"
2536 (string-append "prefix=" (assoc-ref %outputs "out")))
2537 #:phases
2538 (modify-phases %standard-phases
2539 (delete 'configure))))
2540 (inputs
2541 `(("zlib" ,zlib)
2542 ("curl" ,curl)))
2543 (native-inputs
2544 `(("doxygen" ,doxygen)
2545 ;; Need for tests
2546 ("python" ,python-2)))
2547 (home-page "https://github.com/dpryan79/libBigWig")
2548 (synopsis "C library for handling bigWig files")
2549 (description
2550 "This package provides a C library for parsing local and remote BigWig
2551 files.")
2552 (license license:expat)))
2553
2554 (define-public python-pybigwig
2555 (package
2556 (name "python-pybigwig")
2557 (version "0.3.17")
2558 (source (origin
2559 (method url-fetch)
2560 (uri (pypi-uri "pyBigWig" version))
2561 (sha256
2562 (base32
2563 "157x6v48y299zm382krf1dw08fdxg95im8lnabhp5vc94s04zxj1"))
2564 (modules '((guix build utils)))
2565 (snippet
2566 '(begin
2567 ;; Delete bundled libBigWig sources
2568 (delete-file-recursively "libBigWig")
2569 #t))))
2570 (build-system python-build-system)
2571 (arguments
2572 `(#:phases
2573 (modify-phases %standard-phases
2574 (add-after 'unpack 'link-with-libBigWig
2575 (lambda* (#:key inputs #:allow-other-keys)
2576 (substitute* "setup.py"
2577 (("libs=\\[") "libs=[\"BigWig\", "))
2578 #t)))))
2579 (propagated-inputs
2580 `(("python-numpy" ,python-numpy)))
2581 (inputs
2582 `(("libbigwig" ,libbigwig)
2583 ("zlib" ,zlib)
2584 ("curl" ,curl)))
2585 (home-page "https://github.com/dpryan79/pyBigWig")
2586 (synopsis "Access bigWig files in Python using libBigWig")
2587 (description
2588 "This package provides Python bindings to the libBigWig library for
2589 accessing bigWig files.")
2590 (license license:expat)))
2591
2592 (define-public python2-pybigwig
2593 (package-with-python2 python-pybigwig))
2594
2595 (define-public python-schema-salad
2596 (package
2597 (name "python-schema-salad")
2598 (version "7.0.20200811075006")
2599 (source
2600 (origin
2601 (method url-fetch)
2602 (uri (pypi-uri "schema-salad" version))
2603 (sha256
2604 (base32
2605 "0wanbwmqb189x1m0vacnhpivfsr8rwbqknngivzxxs8j46yj80bg"))))
2606 (build-system python-build-system)
2607 (propagated-inputs
2608 `(("python-cachecontrol" ,python-cachecontrol-0.11)
2609 ("python-lockfile" ,python-lockfile)
2610 ("python-mistune" ,python-mistune)
2611 ("python-rdflib" ,python-rdflib)
2612 ("python-rdflib-jsonld" ,python-rdflib-jsonld)
2613 ("python-requests" ,python-requests)
2614 ("python-ruamel.yaml" ,python-ruamel.yaml)
2615 ("python-typing-extensions" ,python-typing-extensions)))
2616 (native-inputs
2617 `(("python-pytest" ,python-pytest)
2618 ("python-pytest-runner" ,python-pytest-runner)))
2619 (home-page "https://github.com/common-workflow-language/schema_salad")
2620 (synopsis "Schema Annotations for Linked Avro Data (SALAD)")
2621 (description
2622 "Salad is a schema language for describing JSON or YAML structured linked
2623 data documents. Salad schema describes rules for preprocessing, structural
2624 validation, and hyperlink checking for documents described by a Salad schema.
2625 Salad supports rich data modeling with inheritance, template specialization,
2626 object identifiers, object references, documentation generation, code
2627 generation, and transformation to RDF. Salad provides a bridge between document
2628 and record oriented data modeling and the Semantic Web.")
2629 (license license:asl2.0)))
2630
2631 (define-public cwltool
2632 (package
2633 (name "cwltool")
2634 (version "3.0.20201121085451")
2635 (source (origin
2636 (method git-fetch)
2637 (uri (git-reference
2638 (url "https://github.com/common-workflow-language/cwltool")
2639 (commit version)))
2640 (file-name (git-file-name name version))
2641 (sha256
2642 (base32
2643 "1awf99n7aglxc5zszrlrv6jxp355jp45ws7wpsgjlgcdv7advn0w"))))
2644 (build-system python-build-system)
2645 (arguments
2646 `(#:phases
2647 (modify-phases %standard-phases
2648 (add-after 'unpack 'loosen-version-restrictions
2649 (lambda _
2650 (substitute* "setup.py"
2651 (("== 1.5.1") ">=1.5.1") ; prov
2652 ((", < 3.5") "")) ; shellescape
2653 #t))
2654 (add-after 'unpack 'dont-use-git
2655 (lambda _
2656 (substitute* "gittaggers.py"
2657 (("self.git_timestamp_tag\\(\\)")
2658 (string-append "time.strftime('.%Y%m%d%H%M%S', time.gmtime(int("
2659 (string-drop ,version 4) ")))")))
2660 #t))
2661 (add-after 'unpack 'modify-tests
2662 (lambda _
2663 ;; Tries to connect to the internet.
2664 (delete-file "tests/test_udocker.py")
2665 (delete-file "tests/test_http_input.py")
2666 (substitute* "tests/test_load_tool.py"
2667 (("def test_load_graph_fragment_from_packed")
2668 (string-append "@pytest.mark.skip(reason=\"Disabled by Guix\")\n"
2669 "def test_load_graph_fragment_from_packed")))
2670 (substitute* "tests/test_examples.py"
2671 (("def test_env_filtering")
2672 (string-append "@pytest.mark.skip(reason=\"Disabled by Guix\")\n"
2673 "def test_env_filtering")))
2674 ;; Tries to use cwl-runners.
2675 (substitute* "tests/test_examples.py"
2676 (("def test_v1_0_arg_empty_prefix_separate_false")
2677 (string-append "@pytest.mark.skip(reason=\"Disabled by Guix\")\n"
2678 "def test_v1_0_arg_empty_prefix_separate_false")))
2679 #t)))))
2680 (propagated-inputs
2681 `(("python-argcomplete" ,python-argcomplete)
2682 ("python-bagit" ,python-bagit)
2683 ("python-coloredlogs" ,python-coloredlogs)
2684 ("python-mypy-extensions" ,python-mypy-extensions)
2685 ("python-prov" ,python-prov)
2686 ("python-pydot" ,python-pydot)
2687 ("python-psutil" ,python-psutil)
2688 ("python-rdflib" ,python-rdflib)
2689 ("python-requests" ,python-requests)
2690 ("python-ruamel.yaml" ,python-ruamel.yaml)
2691 ("python-schema-salad" ,python-schema-salad)
2692 ("python-shellescape" ,python-shellescape)
2693 ("python-typing-extensions" ,python-typing-extensions)
2694 ;; Not listed as needed but still necessary:
2695 ("node" ,node)))
2696 (native-inputs
2697 `(("python-arcp" ,python-arcp)
2698 ("python-humanfriendly" ,python-humanfriendly)
2699 ("python-mock" ,python-mock)
2700 ("python-pytest" ,python-pytest)
2701 ("python-pytest-cov" ,python-pytest-cov)
2702 ("python-pytest-mock" ,python-pytest-mock)
2703 ("python-pytest-runner" ,python-pytest-runner)
2704 ("python-rdflib-jsonld" ,python-rdflib-jsonld)))
2705 (home-page
2706 "https://github.com/common-workflow-language/common-workflow-language")
2707 (synopsis "Common Workflow Language reference implementation")
2708 (description
2709 "This is the reference implementation of the @acronym{CWL, Common Workflow
2710 Language} standards. The CWL open standards are for describing analysis
2711 workflows and tools in a way that makes them portable and scalable across a
2712 variety of software and hardware environments, from workstations to cluster,
2713 cloud, and high performance computing (HPC) environments. CWL is designed to
2714 meet the needs of data-intensive science, such as Bioinformatics, Medical
2715 Imaging, Astronomy, Physics, and Chemistry. The @acronym{cwltool, CWL reference
2716 implementation} is intended to be feature complete and to provide comprehensive
2717 validation of CWL files as well as provide other tools related to working with
2718 CWL descriptions.")
2719 (license license:asl2.0)))
2720
2721 (define-public python-dendropy
2722 (package
2723 (name "python-dendropy")
2724 (version "4.4.0")
2725 (source
2726 (origin
2727 (method git-fetch)
2728 ;; Source from GitHub so that tests are included.
2729 (uri (git-reference
2730 (url "https://github.com/jeetsukumaran/DendroPy")
2731 (commit (string-append "v" version))))
2732 (file-name (git-file-name name version))
2733 (sha256
2734 (base32
2735 "097hfyv2kaf4x92i4rjx0paw2cncxap48qivv8zxng4z7nhid0x9"))))
2736 (build-system python-build-system)
2737 (home-page "https://dendropy.org/")
2738 (synopsis "Library for phylogenetics and phylogenetic computing")
2739 (description
2740 "DendroPy is a library for phylogenetics and phylogenetic computing: reading,
2741 writing, simulation, processing and manipulation of phylogenetic
2742 trees (phylogenies) and characters.")
2743 (license license:bsd-3)))
2744
2745 (define-public python2-dendropy
2746 (let ((base (package-with-python2 python-dendropy)))
2747 (package
2748 (inherit base)
2749 (arguments
2750 `(#:phases
2751 (modify-phases %standard-phases
2752 (add-after 'unpack 'remove-failing-test
2753 (lambda _
2754 ;; This test fails when the full test suite is run, as documented
2755 ;; at https://github.com/jeetsukumaran/DendroPy/issues/74
2756 (substitute* "tests/test_dataio_nexml_reader_tree_list.py"
2757 (("test_collection_comments_and_annotations")
2758 "do_not_test_collection_comments_and_annotations"))
2759 #t)))
2760 ,@(package-arguments base))))))
2761
2762 (define-public python-py2bit
2763 (package
2764 (name "python-py2bit")
2765 (version "0.3.0")
2766 (source
2767 (origin
2768 (method url-fetch)
2769 (uri (pypi-uri "py2bit" version))
2770 (sha256
2771 (base32
2772 "1vw2nvw1yrl7ikkqsqs1pg239yr5nspvd969r1x9arms1k25a1a5"))))
2773 (build-system python-build-system)
2774 (home-page "https://github.com/dpryan79/py2bit")
2775 (synopsis "Access 2bit files using lib2bit")
2776 (description
2777 "This package provides Python bindings for lib2bit to access 2bit files
2778 with Python.")
2779 (license license:expat)))
2780
2781 (define-public delly
2782 (package
2783 (name "delly")
2784 (version "0.8.3")
2785 (source (origin
2786 (method git-fetch)
2787 (uri (git-reference
2788 (url "https://github.com/dellytools/delly")
2789 (commit (string-append "v" version))))
2790 (file-name (git-file-name name version))
2791 (sha256
2792 (base32 "1ibnplgfzj96w8glkx17v7sld3pm402fr5ybmf3h0rlcryabxrqy"))
2793 (modules '((guix build utils)))
2794 (snippet
2795 '(begin
2796 (delete-file-recursively "src/htslib")
2797 #t))))
2798 (build-system gnu-build-system)
2799 (arguments
2800 `(#:tests? #f ; There are no tests to run.
2801 #:make-flags
2802 (list "PARALLEL=1" ; Allow parallel execution at run-time.
2803 (string-append "prefix=" (assoc-ref %outputs "out")))
2804 #:phases
2805 (modify-phases %standard-phases
2806 (delete 'configure) ; There is no configure phase.
2807 (add-after 'install 'install-templates
2808 (lambda* (#:key outputs #:allow-other-keys)
2809 (let ((templates (string-append (assoc-ref outputs "out")
2810 "/share/delly/templates")))
2811 (mkdir-p templates)
2812 (copy-recursively "excludeTemplates" templates)
2813 #t))))))
2814 (inputs
2815 `(("boost" ,boost)
2816 ("bzip2" ,bzip2)
2817 ("htslib" ,htslib)
2818 ("zlib" ,zlib)))
2819 (home-page "https://github.com/dellytools/delly")
2820 (synopsis "Integrated structural variant prediction method")
2821 (description "Delly is an integrated structural variant prediction method
2822 that can discover and genotype deletions, tandem duplications, inversions and
2823 translocations at single-nucleotide resolution in short-read massively parallel
2824 sequencing data. It uses paired-ends and split-reads to sensitively and
2825 accurately delineate genomic rearrangements throughout the genome.")
2826 (license license:gpl3+)))
2827
2828 (define-public trf
2829 (package
2830 (name "trf")
2831 (version "4.09.1")
2832 (source (origin
2833 (method git-fetch)
2834 (uri (git-reference
2835 (url "https://github.com/Benson-Genomics-Lab/TRF")
2836 (commit (string-append "v" version))))
2837 (file-name (git-file-name name version))
2838 (sha256
2839 (base32 "0fhwr4s1mf8nw8fr5imwjvjr42b59p97zr961ifm8xl1bajz4wpg"))))
2840 (build-system gnu-build-system)
2841 (home-page "https://github.com/Benson-Genomics-Lab/TRF")
2842 (synopsis "Tandem Repeats Finder: a program to analyze DNA sequences")
2843 (description "A tandem repeat in DNA is two or more adjacent, approximate
2844 copies of a pattern of nucleotides. Tandem Repeats Finder is a program to
2845 locate and display tandem repeats in DNA sequences. In order to use the
2846 program, the user submits a sequence in FASTA format. The output consists of
2847 two files: a repeat table file and an alignment file. Submitted sequences may
2848 be of arbitrary length. Repeats with pattern size in the range from 1 to 2000
2849 bases are detected.")
2850 (license license:agpl3+)))
2851
2852 (define-public repeat-masker
2853 (package
2854 (name "repeat-masker")
2855 (version "4.1.1")
2856 (source (origin
2857 (method url-fetch)
2858 (uri (string-append "http://www.repeatmasker.org/"
2859 "RepeatMasker/RepeatMasker-"
2860 version ".tar.gz"))
2861 (sha256
2862 (base32 "03144sl9kh5ni2i33phi7x2pjndzbm5bjw3r4kqvmm6hxyb4k4x2"))))
2863 (build-system gnu-build-system)
2864 (arguments
2865 `(#:tests? #false ; there are none
2866 #:phases
2867 (modify-phases %standard-phases
2868 (delete 'configure)
2869 (replace 'build
2870 (lambda* (#:key inputs outputs #:allow-other-keys)
2871 (let ((share (string-append (assoc-ref outputs "out")
2872 "/share/RepeatMasker")))
2873 (mkdir-p share)
2874 (copy-recursively "." share)
2875 (with-directory-excursion share
2876 (invoke "perl" "configure"
2877 "--trf_prgm" (which "trf")
2878 "--hmmer_dir"
2879 (string-append (assoc-ref inputs "hmmer")
2880 "/bin"))))))
2881 (replace 'install
2882 (lambda* (#:key outputs #:allow-other-keys)
2883 (let* ((out (assoc-ref outputs "out"))
2884 (share (string-append out "/share/RepeatMasker"))
2885 (bin (string-append out "/bin"))
2886 (path (getenv "PERL5LIB")))
2887 (install-file (string-append share "/RepeatMasker") bin)
2888 (wrap-program (string-append bin "/RepeatMasker")
2889 `("PERL5LIB" ":" prefix (,path ,share)))))))))
2890 (inputs
2891 `(("perl" ,perl)
2892 ("perl-text-soundex" ,perl-text-soundex)
2893 ("python" ,python)
2894 ("python-h5py" ,python-h5py)
2895 ("hmmer" ,hmmer)
2896 ("trf" ,trf)))
2897 (home-page "https://github.com/Benson-Genomics-Lab/TRF")
2898 (synopsis "Tandem Repeats Finder: a program to analyze DNA sequences")
2899 (description "A tandem repeat in DNA is two or more adjacent, approximate
2900 copies of a pattern of nucleotides. Tandem Repeats Finder is a program to
2901 locate and display tandem repeats in DNA sequences. In order to use the
2902 program, the user submits a sequence in FASTA format. The output consists of
2903 two files: a repeat table file and an alignment file. Submitted sequences may
2904 be of arbitrary length. Repeats with pattern size in the range from 1 to 2000
2905 bases are detected.")
2906 (license license:osl2.1)))
2907
2908 (define-public diamond
2909 (package
2910 (name "diamond")
2911 (version "0.9.30")
2912 (source (origin
2913 (method git-fetch)
2914 (uri (git-reference
2915 (url "https://github.com/bbuchfink/diamond")
2916 (commit (string-append "v" version))))
2917 (file-name (git-file-name name version))
2918 (sha256
2919 (base32
2920 "0k6f3kb6cniw11xw6763kkbs1sl0yack7xsy7q5fl5v170ssphq4"))))
2921 (build-system cmake-build-system)
2922 (arguments
2923 '(#:tests? #f ; no "check" target
2924 #:phases
2925 (modify-phases %standard-phases
2926 (add-after 'unpack 'remove-native-compilation
2927 (lambda _
2928 (substitute* "CMakeLists.txt" (("-march=native") ""))
2929 #t)))))
2930 (inputs
2931 `(("zlib" ,zlib)))
2932 (home-page "https://github.com/bbuchfink/diamond")
2933 (synopsis "Accelerated BLAST compatible local sequence aligner")
2934 (description
2935 "DIAMOND is a BLAST-compatible local aligner for mapping protein and
2936 translated DNA query sequences against a protein reference database (BLASTP
2937 and BLASTX alignment mode). The speedup over BLAST is up to 20,000 on short
2938 reads at a typical sensitivity of 90-99% relative to BLAST depending on the
2939 data and settings.")
2940 (license license:agpl3+)))
2941
2942 (define-public discrover
2943 (package
2944 (name "discrover")
2945 (version "1.6.0")
2946 (source
2947 (origin
2948 (method git-fetch)
2949 (uri (git-reference
2950 (url "https://github.com/maaskola/discrover")
2951 (commit version)))
2952 (file-name (git-file-name name version))
2953 (sha256
2954 (base32
2955 "173fwi2vb6a5kp406hm3jj6j7v4whww796f2qcygp4rpvamh307y"))))
2956 (build-system cmake-build-system)
2957 (arguments
2958 `(#:tests? #f ; there are no tests
2959 #:phases
2960 (modify-phases %standard-phases
2961 (add-after 'unpack 'fix-latex-errors
2962 (lambda _
2963 (with-fluids ((%default-port-encoding #f))
2964 (substitute* "doc/references.bib"
2965 (("\\{S\\}illanp[^,]+,")
2966 "{S}illanp{\\\"a}{\\\"a},")))
2967 ;; XXX: I just can't get pdflatex to not complain about these
2968 ;; characters. They end up in the manual via the generated
2969 ;; discrover-cli-help.txt.
2970 (substitute* "src/hmm/cli.cpp"
2971 (("µ") "mu")
2972 (("η") "eta")
2973 (("≤") "<="))
2974 ;; This seems to be a syntax error.
2975 (substitute* "doc/discrover-manual.tex"
2976 (("theverbbox\\[t\\]") "theverbbox"))
2977 #t))
2978 (add-after 'unpack 'add-missing-includes
2979 (lambda _
2980 (substitute* "src/executioninformation.hpp"
2981 (("#define EXECUTIONINFORMATION_HPP" line)
2982 (string-append line "\n#include <random>")))
2983 (substitute* "src/plasma/fasta.hpp"
2984 (("#define FASTA_HPP" line)
2985 (string-append line "\n#include <random>")))
2986 #t))
2987 ;; FIXME: this is needed because we're using texlive-union, which
2988 ;; doesn't handle fonts correctly. It expects to be able to generate
2989 ;; fonts in the home directory.
2990 (add-before 'build 'setenv-HOME
2991 (lambda _ (setenv "HOME" "/tmp") #t)))))
2992 (inputs
2993 `(("boost" ,boost)
2994 ("cairo" ,cairo)
2995 ("rmath-standalone" ,rmath-standalone)))
2996 (native-inputs
2997 `(("texlive" ,(texlive-union (list texlive-fonts-cm
2998 texlive-fonts-amsfonts
2999
3000 texlive-latex-doi
3001 texlive-latex-examplep
3002 texlive-latex-hyperref
3003 texlive-latex-ms
3004 texlive-latex-natbib
3005 texlive-bibtex ; style files used by natbib
3006 texlive-latex-pgf ; tikz
3007 texlive-latex-verbatimbox)))
3008 ("imagemagick" ,imagemagick)))
3009 (home-page "https://dorina.mdc-berlin.de/public/rajewsky/discrover/")
3010 (synopsis "Discover discriminative nucleotide sequence motifs")
3011 (description "Discrover is a motif discovery method to find binding sites
3012 of nucleic acid binding proteins.")
3013 (license license:gpl3+)))
3014
3015 (define-public eigensoft
3016 (package
3017 (name "eigensoft")
3018 (version "7.2.1")
3019 (source
3020 (origin
3021 (method git-fetch)
3022 (uri (git-reference
3023 (url "https://github.com/DReichLab/EIG")
3024 (commit (string-append "v" version))))
3025 (file-name (git-file-name name version))
3026 (sha256
3027 (base32
3028 "1c141fqvhnzibmnf22sv23vbmzm20kjjyrib44cfh75wyndp2d9k"))
3029 (modules '((guix build utils)))
3030 ;; Remove pre-built binaries.
3031 (snippet '(begin
3032 (delete-file-recursively "bin")
3033 (mkdir "bin")
3034 #t))))
3035 (build-system gnu-build-system)
3036 (arguments
3037 `(#:tests? #f ; There are no tests.
3038 #:make-flags '("CC=gcc")
3039 #:phases
3040 (modify-phases %standard-phases
3041 ;; There is no configure phase, but the Makefile is in a
3042 ;; sub-directory.
3043 (replace 'configure
3044 (lambda _ (chdir "src") #t))
3045 ;; The provided install target only copies executables to
3046 ;; the "bin" directory in the build root.
3047 (add-after 'install 'actually-install
3048 (lambda* (#:key outputs #:allow-other-keys)
3049 (let* ((out (assoc-ref outputs "out"))
3050 (bin (string-append out "/bin")))
3051 (for-each (lambda (file)
3052 (install-file file bin))
3053 (find-files "../bin" ".*"))
3054 #t))))))
3055 (inputs
3056 `(("gsl" ,gsl)
3057 ("lapack" ,lapack)
3058 ("openblas" ,openblas)
3059 ("perl" ,perl)
3060 ("gfortran" ,gfortran "lib")))
3061 (home-page "https://github.com/DReichLab/EIG")
3062 (synopsis "Tools for population genetics")
3063 (description "The EIGENSOFT package provides tools for population
3064 genetics and stratification correction. EIGENSOFT implements methods commonly
3065 used in population genetics analyses such as PCA, computation of Tracy-Widom
3066 statistics, and finding related individuals in structured populations. It
3067 comes with a built-in plotting script and supports multiple file formats and
3068 quantitative phenotypes.")
3069 ;; The license of the eigensoft tools is Expat, but since it's
3070 ;; linking with the GNU Scientific Library (GSL) the effective
3071 ;; license is the GPL.
3072 (license license:gpl3+)))
3073
3074 (define-public edirect
3075 (package
3076 (name "edirect")
3077 (version "13.3.20200128")
3078 (source (origin
3079 (method url-fetch)
3080 (uri (string-append "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect"
3081 "/versions/" version
3082 "/edirect-" version ".tar.gz"))
3083 (sha256
3084 (base32
3085 "093zp7klv81ph0y8mm8d78a9hnpfxbv2kdym70gzdf3vz176rw33"))
3086 (modules '((guix build utils)))
3087 (snippet
3088 '(begin (delete-file "Mozilla-CA.tar.gz")
3089 (substitute* "rchive.go"
3090 ;; This go library does not have any license.
3091 (("github.com/fiam/gounidecode/unidecode")
3092 "golang.org/rainycape/unidecode"))
3093 #t))))
3094 (build-system perl-build-system)
3095 (arguments
3096 `(#:phases
3097 (modify-phases %standard-phases
3098 (delete 'configure)
3099 (delete 'build)
3100 (delete 'check) ; simple check after install
3101 (add-after 'unpack 'patch-programs
3102 (lambda* (#:key inputs #:allow-other-keys)
3103 ;; Ignore errors about missing xtract.Linux and rchive.Linux.
3104 (substitute* "pm-refresh"
3105 (("cat \\\"\\$target")
3106 "grep ^[[:digit:]] \"$target"))
3107 #t))
3108 (replace 'install
3109 (lambda* (#:key inputs outputs #:allow-other-keys)
3110 (let ((bin (string-append (assoc-ref outputs "out") "/bin"))
3111 (edirect-go (assoc-ref inputs "edirect-go-programs")))
3112 (for-each
3113 (lambda (file)
3114 (install-file file bin))
3115 '("archive-pubmed" "asp-cp" "asp-ls" "download-ncbi-data"
3116 "download-pubmed" "edirect.pl" "efetch" "epost" "esearch"
3117 "fetch-pubmed" "ftp-cp" "ftp-ls" "has-asp" "index-pubmed"
3118 "pm-prepare" "pm-refresh" "pm-stash" "pm-collect"
3119 "pm-index" "pm-invert" "pm-merge" "pm-promote"))
3120 (symlink (string-append edirect-go "/bin/xtract.Linux")
3121 (string-append bin "/xtract"))
3122 (symlink (string-append edirect-go "/bin/rchive.Linux")
3123 (string-append bin "/rchive")))
3124 #t))
3125 (add-after 'install 'wrap-program
3126 (lambda* (#:key outputs #:allow-other-keys)
3127 ;; Make sure everything can run in a pure environment.
3128 (let ((out (assoc-ref outputs "out"))
3129 (path (getenv "PERL5LIB")))
3130 (for-each
3131 (lambda (file)
3132 (wrap-program file
3133 `("PERL5LIB" ":" prefix (,path)))
3134 (wrap-program file
3135 `("PATH" ":" prefix (,(string-append out "/bin")
3136 ,(dirname (which "sed"))
3137 ,(dirname (which "gzip"))
3138 ,(dirname (which "grep"))
3139 ,(dirname (which "perl"))
3140 ,(dirname (which "uname"))))))
3141 (find-files out ".")))
3142 #t))
3143 (add-after 'wrap-program 'check
3144 (lambda* (#:key outputs #:allow-other-keys)
3145 (invoke (string-append (assoc-ref outputs "out")
3146 "/bin/edirect.pl")
3147 "-filter" "-help")
3148 #t)))))
3149 (inputs
3150 `(("edirect-go-programs" ,edirect-go-programs)
3151 ("perl-html-parser" ,perl-html-parser)
3152 ("perl-encode-locale" ,perl-encode-locale)
3153 ("perl-file-listing" ,perl-file-listing)
3154 ("perl-html-tagset" ,perl-html-tagset)
3155 ("perl-html-tree" ,perl-html-tree)
3156 ("perl-http-cookies" ,perl-http-cookies)
3157 ("perl-http-date" ,perl-http-date)
3158 ("perl-http-message" ,perl-http-message)
3159 ("perl-http-negotiate" ,perl-http-negotiate)
3160 ("perl-lwp-mediatypes" ,perl-lwp-mediatypes)
3161 ("perl-lwp-protocol-https" ,perl-lwp-protocol-https)
3162 ("perl-net-http" ,perl-net-http)
3163 ("perl-uri" ,perl-uri)
3164 ("perl-www-robotrules" ,perl-www-robotrules)
3165 ("perl-xml-simple" ,perl-xml-simple)
3166 ("perl" ,perl)))
3167 (home-page "https://www.ncbi.nlm.nih.gov/books/NBK179288/")
3168 (synopsis "Tools for accessing the NCBI's set of databases")
3169 (description
3170 "Entrez Direct (EDirect) is a method for accessing the National Center
3171 for Biotechnology Information's (NCBI) set of interconnected
3172 databases (publication, sequence, structure, gene, variation, expression,
3173 etc.) from a terminal. Functions take search terms from command-line
3174 arguments. Individual operations are combined to build multi-step queries.
3175 Record retrieval and formatting normally complete the process.
3176
3177 EDirect also provides an argument-driven function that simplifies the
3178 extraction of data from document summaries or other results that are returned
3179 in structured XML format. This can eliminate the need for writing custom
3180 software to answer ad hoc questions.")
3181 (native-search-paths
3182 ;; Ideally this should be set for LWP somewhere.
3183 (list (search-path-specification
3184 (variable "PERL_LWP_SSL_CA_FILE")
3185 (file-type 'regular)
3186 (separator #f)
3187 (files '("/etc/ssl/certs/ca-certificates.crt")))))
3188 (license license:public-domain)))
3189
3190 (define-public edirect-go-programs
3191 (package
3192 (inherit edirect)
3193 (name "edirect-go-programs")
3194 (build-system go-build-system)
3195 (arguments
3196 `(#:install-source? #f
3197 #:tests? #f ; No tests.
3198 #:import-path "ncbi.nlm.nih.gov/entrez/edirect"
3199 #:phases
3200 (modify-phases %standard-phases
3201 (replace 'build
3202 (lambda* (#:key import-path #:allow-other-keys)
3203 (with-directory-excursion (string-append "src/" import-path)
3204 (invoke "go" "build" "-v" "-x" "j2x.go")
3205 (invoke "go" "build" "-v" "-x" "t2x.go")
3206 (invoke "go" "build" "-v" "-x" "-o"
3207 "xtract.Linux" "xtract.go" "common.go")
3208 (invoke "go" "build" "-v" "-x" "-o"
3209 "rchive.Linux" "rchive.go" "common.go")
3210 (invoke "go" "build" "-v" "-x" "-o" "symbols.Linux" "s2p.go"))))
3211 (replace 'install
3212 (lambda* (#:key outputs import-path #:allow-other-keys)
3213 (let ((dest (string-append (assoc-ref outputs "out") "/bin"))
3214 (source (string-append "src/" import-path "/")))
3215 (for-each (lambda (file)
3216 (format #t "installing ~a~%" file)
3217 (install-file (string-append source file) dest))
3218 '("j2x" "t2x" "symbols.Linux" "xtract.Linux" "rchive.Linux"))
3219 #t))))))
3220 (native-inputs '())
3221 (propagated-inputs '())
3222 (inputs
3223 `(("go-github-com-fatih-color" ,go-github-com-fatih-color)
3224 ("go-github-com-fogleman-gg" ,go-github-com-fogleman-gg)
3225 ("go-github-com-gedex-inflector" ,go-github-com-gedex-inflector)
3226 ("go-github-com-golang-freetype" ,go-github-com-golang-freetype)
3227 ("go-github-com-klauspost-cpuid" ,go-github-com-klauspost-cpuid)
3228 ("go-github-com-pbnjay-memory" ,go-github-com-pbnjay-memory)
3229 ("go-github-com-surgebase-porter2" ,go-github-com-surgebase-porter2)
3230 ("go-golang-org-rainycape-unidecode" ,go-golang-org-rainycape-unidecode)
3231 ("go-golang-org-x-image" ,go-golang-org-x-image)
3232 ("go-golang-org-x-text" ,go-golang-org-x-text)))))
3233
3234 (define-public exonerate
3235 (package
3236 (name "exonerate")
3237 (version "2.4.0")
3238 (source
3239 (origin
3240 (method url-fetch)
3241 (uri
3242 (string-append
3243 "http://ftp.ebi.ac.uk/pub/software/vertebrategenomics/exonerate/"
3244 "exonerate-" version ".tar.gz"))
3245 (sha256
3246 (base32
3247 "0hj0m9xygiqsdxvbg79wq579kbrx1mdrabi2bzqz2zn9qwfjcjgq"))))
3248 (build-system gnu-build-system)
3249 (arguments
3250 `(#:parallel-build? #f)) ; Building in parallel fails on some machines.
3251 (native-inputs
3252 `(("pkg-config" ,pkg-config)))
3253 (inputs
3254 `(("glib" ,glib)))
3255 (home-page
3256 "https://www.ebi.ac.uk/about/vertebrate-genomics/software/exonerate")
3257 (synopsis "Generic tool for biological sequence alignment")
3258 (description
3259 "Exonerate is a generic tool for pairwise sequence comparison. It allows
3260 the alignment of sequences using a many alignment models, either exhaustive
3261 dynamic programming or a variety of heuristics.")
3262 (license license:gpl3)))
3263
3264 (define-public express
3265 (package
3266 (name "express")
3267 (version "1.5.3")
3268 (source (origin
3269 (method git-fetch)
3270 (uri (git-reference
3271 (url "https://github.com/adarob/eXpress")
3272 (commit version)))
3273 (file-name (git-file-name name version))
3274 (sha256
3275 (base32
3276 "18nb22n7x820fzjngf4qgyb3mspqkw7xyk7v7s5ps6wfrd8qwscb"))))
3277 (build-system cmake-build-system)
3278 (arguments
3279 `(#:tests? #f ;no "check" target
3280 #:phases
3281 (modify-phases %standard-phases
3282 (add-after 'unpack 'use-shared-boost-libs-and-set-bamtools-paths
3283 (lambda* (#:key inputs #:allow-other-keys)
3284 (substitute* "CMakeLists.txt"
3285 (("set\\(Boost_USE_STATIC_LIBS ON\\)")
3286 "set(Boost_USE_STATIC_LIBS OFF)")
3287 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
3288 (string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
3289 (substitute* "src/CMakeLists.txt"
3290 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
3291 (string-append (assoc-ref inputs "bamtools") "/lib"))
3292 (("libprotobuf.a") "libprotobuf.so"))
3293 #t))
3294 (add-after 'unpack 'remove-update-check
3295 (lambda _
3296 (substitute* "src/main.cpp"
3297 (("#include \"update_check.h\"") "")
3298 (("check_version\\(PACKAGE_VERSION\\);") ""))
3299 #t)))))
3300 (inputs
3301 `(("boost" ,boost)
3302 ("bamtools" ,bamtools)
3303 ("protobuf" ,protobuf)
3304 ("zlib" ,zlib)))
3305 (home-page "http://bio.math.berkeley.edu/eXpress")
3306 (synopsis "Streaming quantification for high-throughput genomic sequencing")
3307 (description
3308 "eXpress is a streaming tool for quantifying the abundances of a set of
3309 target sequences from sampled subsequences. Example applications include
3310 transcript-level RNA-Seq quantification, allele-specific/haplotype expression
3311 analysis (from RNA-Seq), transcription factor binding quantification in
3312 ChIP-Seq, and analysis of metagenomic data.")
3313 (license license:artistic2.0)))
3314
3315 (define-public express-beta-diversity
3316 (package
3317 (name "express-beta-diversity")
3318 (version "1.0.8")
3319 (source (origin
3320 (method git-fetch)
3321 (uri (git-reference
3322 (url "https://github.com/dparks1134/ExpressBetaDiversity")
3323 (commit (string-append "v" version))))
3324 (file-name (git-file-name name version))
3325 (sha256
3326 (base32
3327 "0s0yzg5c21349rh7x4w9266jsvnp7j1hp9cf8sk32hz8nvrj745x"))))
3328 (build-system gnu-build-system)
3329 (arguments
3330 `(#:phases
3331 (modify-phases %standard-phases
3332 (delete 'configure)
3333 (add-before 'build 'enter-source (lambda _ (chdir "source") #t))
3334 (replace 'check
3335 (lambda _ (invoke "../bin/ExpressBetaDiversity" "-u") #t))
3336 (replace 'install
3337 (lambda* (#:key outputs #:allow-other-keys)
3338 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
3339 (install-file "../scripts/convertToEBD.py" bin)
3340 (install-file "../bin/ExpressBetaDiversity" bin)
3341 #t))))))
3342 (inputs
3343 `(("python" ,python-2)))
3344 (home-page "https://github.com/dparks1134/ExpressBetaDiversity")
3345 (synopsis "Taxon- and phylogenetic-based beta diversity measures")
3346 (description
3347 "Express Beta Diversity (EBD) calculates ecological beta diversity
3348 (dissimilarity) measures between biological communities. EBD implements a
3349 variety of diversity measures including those that make use of phylogenetic
3350 similarity of community members.")
3351 (license license:gpl3+)))
3352
3353 (define-public fasttree
3354 (package
3355 (name "fasttree")
3356 (version "2.1.10")
3357 (source (origin
3358 (method url-fetch)
3359 (uri (string-append
3360 "http://www.microbesonline.org/fasttree/FastTree-"
3361 version ".c"))
3362 (sha256
3363 (base32
3364 "0vcjdvy1j4m702vmak4svbfkrpcw63k7wymfksjp9a982zy8kjsl"))))
3365 (build-system gnu-build-system)
3366 (arguments
3367 `(#:tests? #f ; no "check" target
3368 #:phases
3369 (modify-phases %standard-phases
3370 (delete 'unpack)
3371 (delete 'configure)
3372 (replace 'build
3373 (lambda* (#:key source #:allow-other-keys)
3374 (invoke "gcc"
3375 "-O3"
3376 "-finline-functions"
3377 "-funroll-loops"
3378 "-Wall"
3379 "-o"
3380 "FastTree"
3381 source
3382 "-lm")
3383 (invoke "gcc"
3384 "-DOPENMP"
3385 "-fopenmp"
3386 "-O3"
3387 "-finline-functions"
3388 "-funroll-loops"
3389 "-Wall"
3390 "-o"
3391 "FastTreeMP"
3392 source
3393 "-lm")
3394 #t))
3395 (replace 'install
3396 (lambda* (#:key outputs #:allow-other-keys)
3397 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
3398 (install-file "FastTree" bin)
3399 (install-file "FastTreeMP" bin)
3400 #t))))))
3401 (home-page "http://www.microbesonline.org/fasttree")
3402 (synopsis "Infers approximately-maximum-likelihood phylogenetic trees")
3403 (description
3404 "FastTree can handle alignments with up to a million of sequences in a
3405 reasonable amount of time and memory. For large alignments, FastTree is
3406 100-1,000 times faster than PhyML 3.0 or RAxML 7.")
3407 (license license:gpl2+)))
3408
3409 (define-public fastx-toolkit
3410 (package
3411 (name "fastx-toolkit")
3412 (version "0.0.14")
3413 (source (origin
3414 (method url-fetch)
3415 (uri
3416 (string-append
3417 "https://github.com/agordon/fastx_toolkit/releases/download/"
3418 version "/fastx_toolkit-" version ".tar.bz2"))
3419 (sha256
3420 (base32
3421 "01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy"))))
3422 (build-system gnu-build-system)
3423 (inputs
3424 `(("libgtextutils" ,libgtextutils)))
3425 (native-inputs
3426 `(("gcc" ,gcc-6) ;; doesn't build with later versions
3427 ("pkg-config" ,pkg-config)))
3428 (home-page "http://hannonlab.cshl.edu/fastx_toolkit/")
3429 (synopsis "Tools for FASTA/FASTQ file preprocessing")
3430 (description
3431 "The FASTX-Toolkit is a collection of command line tools for Short-Reads
3432 FASTA/FASTQ files preprocessing.
3433
3434 Next-Generation sequencing machines usually produce FASTA or FASTQ files,
3435 containing multiple short-reads sequences. The main processing of such
3436 FASTA/FASTQ files is mapping the sequences to reference genomes. However, it
3437 is sometimes more productive to preprocess the files before mapping the
3438 sequences to the genome---manipulating the sequences to produce better mapping
3439 results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
3440 (license license:agpl3+)))
3441
3442 (define-public flexbar
3443 (package
3444 (name "flexbar")
3445 (version "3.4.0")
3446 (source (origin
3447 (method git-fetch)
3448 (uri (git-reference
3449 (url "https://github.com/seqan/flexbar")
3450 (commit (string-append "v" version))))
3451 (file-name (git-file-name name version))
3452 (sha256
3453 (base32
3454 "1pq9sxvdnldl14libk234m72dqhwgzs3acgl943wchwdqlcsi5r2"))))
3455 (build-system cmake-build-system)
3456 (arguments
3457 `(#:phases
3458 (modify-phases %standard-phases
3459 (add-after 'unpack 'do-not-tune-to-CPU
3460 (lambda _
3461 (substitute* "src/CMakeLists.txt"
3462 ((" -march=native") ""))
3463 #t))
3464 (replace 'check
3465 (lambda* (#:key outputs #:allow-other-keys)
3466 (setenv "PATH" (string-append (getcwd) ":" (getenv "PATH")))
3467 (with-directory-excursion "../source/test"
3468 (invoke "bash" "flexbar_test.sh"))
3469 #t))
3470 (replace 'install
3471 (lambda* (#:key outputs #:allow-other-keys)
3472 (let* ((out (string-append (assoc-ref outputs "out")))
3473 (bin (string-append out "/bin/")))
3474 (install-file "flexbar" bin))
3475 #t)))))
3476 (inputs
3477 `(("tbb" ,tbb)
3478 ("zlib" ,zlib)))
3479 (native-inputs
3480 `(("pkg-config" ,pkg-config)
3481 ("seqan" ,seqan)))
3482 (home-page "https://github.com/seqan/flexbar")
3483 (synopsis "Barcode and adapter removal tool for sequencing platforms")
3484 (description
3485 "Flexbar preprocesses high-throughput nucleotide sequencing data
3486 efficiently. It demultiplexes barcoded runs and removes adapter sequences.
3487 Moreover, trimming and filtering features are provided. Flexbar increases
3488 read mapping rates and improves genome and transcriptome assemblies. It
3489 supports next-generation sequencing data in fasta/q and csfasta/q format from
3490 Illumina, Roche 454, and the SOLiD platform.")
3491 (license license:bsd-3)))
3492
3493 (define-public fraggenescan
3494 (package
3495 (name "fraggenescan")
3496 (version "1.30")
3497 (source
3498 (origin
3499 (method url-fetch)
3500 (uri
3501 (string-append "mirror://sourceforge/fraggenescan/"
3502 "FragGeneScan" version ".tar.gz"))
3503 (sha256
3504 (base32 "158dcnwczgcyhwm4qlx19sanrwgdpzf6bn2y57mbpx55lkgz1mzj"))))
3505 (build-system gnu-build-system)
3506 (arguments
3507 `(#:phases
3508 (modify-phases %standard-phases
3509 (delete 'configure)
3510 (add-before 'build 'patch-paths
3511 (lambda* (#:key outputs #:allow-other-keys)
3512 (let* ((out (string-append (assoc-ref outputs "out")))
3513 (share (string-append out "/share/fraggenescan/")))
3514 (substitute* "run_FragGeneScan.pl"
3515 (("system\\(\"rm")
3516 (string-append "system(\"" (which "rm")))
3517 (("system\\(\"mv")
3518 (string-append "system(\"" (which "mv")))
3519 (("\\\"awk") (string-append "\"" (which "awk")))
3520 ;; This script and other programs expect the training files
3521 ;; to be in the non-standard location bin/train/XXX. Change
3522 ;; this to be share/fraggenescan/train/XXX instead.
3523 (("^\\$train.file = \\$dir.*")
3524 (string-append "$train_file = \""
3525 share
3526 "train/\".$FGS_train_file;")))
3527 (substitute* "run_hmm.c"
3528 (("^ strcat\\(train_dir, \\\"train/\\\"\\);")
3529 (string-append " strcpy(train_dir, \"" share "/train/\");"))))
3530 #t))
3531 (replace 'build
3532 (lambda _
3533 (invoke "make" "clean")
3534 (invoke "make" "fgs")
3535 #t))
3536 (replace 'install
3537 (lambda* (#:key outputs #:allow-other-keys)
3538 (let* ((out (string-append (assoc-ref outputs "out")))
3539 (bin (string-append out "/bin/"))
3540 (share (string-append out "/share/fraggenescan/train")))
3541 (install-file "run_FragGeneScan.pl" bin)
3542 (install-file "FragGeneScan" bin)
3543 (copy-recursively "train" share))
3544 #t))
3545 (delete 'check)
3546 (add-after 'install 'post-install-check
3547 ;; In lieu of 'make check', run one of the examples and check the
3548 ;; output files gets created.
3549 (lambda* (#:key outputs #:allow-other-keys)
3550 (let* ((out (string-append (assoc-ref outputs "out")))
3551 (bin (string-append out "/bin/"))
3552 (frag (string-append bin "run_FragGeneScan.pl")))
3553 ;; Test complete genome.
3554 (invoke frag
3555 "-genome=./example/NC_000913.fna"
3556 "-out=./test2"
3557 "-complete=1"
3558 "-train=complete")
3559 (unless (and (file-exists? "test2.faa")
3560 (file-exists? "test2.ffn")
3561 (file-exists? "test2.gff")
3562 (file-exists? "test2.out"))
3563 (error "Expected files do not exist."))
3564 ;; Test incomplete sequences.
3565 (invoke frag
3566 "-genome=./example/NC_000913-fgs.ffn"
3567 "-out=out"
3568 "-complete=0"
3569 "-train=454_30")
3570 #t))))))
3571 (inputs
3572 `(("perl" ,perl)
3573 ("python" ,python-2))) ;not compatible with python 3.
3574 (home-page "https://sourceforge.net/projects/fraggenescan/")
3575 (synopsis "Finds potentially fragmented genes in short reads")
3576 (description
3577 "FragGeneScan is a program for predicting bacterial and archaeal genes in
3578 short and error-prone DNA sequencing reads. It can also be applied to predict
3579 genes in incomplete assemblies or complete genomes.")
3580 ;; GPL3+ according to private correspondense with the authors.
3581 (license license:gpl3+)))
3582
3583 (define-public fxtract
3584 (let ((util-commit "776ca85a18a47492af3794745efcb4a905113115"))
3585 (package
3586 (name "fxtract")
3587 (version "2.3")
3588 (source
3589 (origin
3590 (method git-fetch)
3591 (uri (git-reference
3592 (url "https://github.com/ctSkennerton/fxtract")
3593 (commit version)))
3594 (file-name (git-file-name name version))
3595 (sha256
3596 (base32
3597 "0hab3gpwf4w9s87qlbswq6ws1qqybh4dcqk79q1ahyldzai5fgp5"))))
3598 (build-system gnu-build-system)
3599 (arguments
3600 `(#:make-flags (list
3601 (string-append "PREFIX=" (assoc-ref %outputs "out"))
3602 "CC=gcc")
3603 #:test-target "fxtract_test"
3604 #:phases
3605 (modify-phases %standard-phases
3606 (delete 'configure)
3607 (add-before 'build 'copy-util
3608 (lambda* (#:key inputs #:allow-other-keys)
3609 (rmdir "util")
3610 (copy-recursively (assoc-ref inputs "ctskennerton-util") "util")
3611 #t))
3612 ;; Do not use make install as this requires additional dependencies.
3613 (replace 'install
3614 (lambda* (#:key outputs #:allow-other-keys)
3615 (let* ((out (assoc-ref outputs "out"))
3616 (bin (string-append out"/bin")))
3617 (install-file "fxtract" bin)
3618 #t))))))
3619 (inputs
3620 `(("pcre" ,pcre)
3621 ("zlib" ,zlib)))
3622 (native-inputs
3623 ;; ctskennerton-util is licensed under GPL2.
3624 `(("ctskennerton-util"
3625 ,(origin
3626 (method git-fetch)
3627 (uri (git-reference
3628 (url "https://github.com/ctSkennerton/util")
3629 (commit util-commit)))
3630 (file-name (string-append
3631 "ctstennerton-util-" util-commit "-checkout"))
3632 (sha256
3633 (base32
3634 "0cls1hd4vgj3f36fpzzg4xc77d6f3hpc60cbpfmn2gdr7ykzzad7"))))))
3635 (home-page "https://github.com/ctSkennerton/fxtract")
3636 (synopsis "Extract sequences from FASTA and FASTQ files")
3637 (description
3638 "Fxtract extracts sequences from a protein or nucleotide fastx (FASTA
3639 or FASTQ) file given a subsequence. It uses a simple substring search for
3640 basic tasks but can change to using POSIX regular expressions, PCRE, hash
3641 lookups or multi-pattern searching as required. By default fxtract looks in
3642 the sequence of each record but can also be told to look in the header,
3643 comment or quality sections.")
3644 ;; 'util' requires SSE instructions.
3645 (supported-systems '("x86_64-linux"))
3646 (license license:expat))))
3647
3648 (define-public gemma
3649 (package
3650 (name "gemma")
3651 (version "0.98.3")
3652 (source (origin
3653 (method git-fetch)
3654 (uri (git-reference
3655 (url "https://github.com/genetics-statistics/GEMMA")
3656 (commit version)))
3657 (file-name (git-file-name name version))
3658 (sha256
3659 (base32
3660 "1p8a7kkfn1mmrg017aziy544aha8i9h6wd1x2dk3w2794wl33qb7"))
3661 (modules '((guix build utils)))
3662 (snippet
3663 '(begin
3664 (delete-file-recursively "contrib")
3665 #t))))
3666 (build-system gnu-build-system)
3667 (inputs
3668 `(("gsl" ,gsl)
3669 ("openblas" ,openblas)
3670 ("zlib" ,zlib)))
3671 (native-inputs
3672 `(("catch" ,catch-framework2-1)
3673 ("perl" ,perl)
3674 ("shunit2" ,shunit2)
3675 ("which" ,which)))
3676 (arguments
3677 `(#:phases
3678 (modify-phases %standard-phases
3679 (delete 'configure)
3680 (add-after 'unpack 'prepare-build
3681 (lambda* (#:key inputs #:allow-other-keys)
3682 (mkdir-p "bin")
3683 (substitute* "Makefile"
3684 (("/usr/local/opt/openblas")
3685 (assoc-ref inputs "openblas")))
3686 #t))
3687 (replace 'check
3688 (lambda* (#:key tests? #:allow-other-keys)
3689 (when tests?
3690 ;; 'make slow-check' expects shunit2-2.0.3.
3691 (with-directory-excursion "test"
3692 (invoke "./test_suite.sh"))
3693 #t)))
3694 (replace 'install
3695 (lambda* (#:key outputs #:allow-other-keys)
3696 (install-file "bin/gemma"
3697 (string-append (assoc-ref outputs "out") "/bin"))
3698 #t)))))
3699 (home-page "https://github.com/genetics-statistics/GEMMA")
3700 (synopsis "Tool for genome-wide efficient mixed model association")
3701 (description
3702 "@acronym{GEMMA, Genome-wide Efficient Mixed Model Association} provides a
3703 standard linear mixed model resolver with application in @acronym{GWAS,
3704 genome-wide association studies}.")
3705 (license license:gpl3)))
3706
3707 (define-public grit
3708 (package
3709 (name "grit")
3710 (version "2.0.5")
3711 (source (origin
3712 (method git-fetch)
3713 (uri (git-reference
3714 (url "https://github.com/nboley/grit")
3715 (commit version)))
3716 (file-name (git-file-name name version))
3717 (sha256
3718 (base32
3719 "1l5v8vfvfbrpmgnrvbrbv40d0arhxcnmxgv2f1mlcqfa3q6bkqm9"))))
3720 (build-system python-build-system)
3721 (arguments
3722 `(#:python ,python-2
3723 #:phases
3724 (modify-phases %standard-phases
3725 (add-after 'unpack 'generate-from-cython-sources
3726 (lambda* (#:key inputs outputs #:allow-other-keys)
3727 ;; Delete these C files to force fresh generation from pyx sources.
3728 (delete-file "grit/sparsify_support_fns.c")
3729 (delete-file "grit/call_peaks_support_fns.c")
3730 (substitute* "setup.py"
3731 (("Cython.Setup") "Cython.Build"))
3732 #t)))))
3733 (inputs
3734 `(("python-scipy" ,python2-scipy)
3735 ("python-numpy" ,python2-numpy)
3736 ("python-pysam" ,python2-pysam)
3737 ("python-networkx" ,python2-networkx)))
3738 (native-inputs
3739 `(("python-cython" ,python2-cython)))
3740 ;; The canonical <http://grit-bio.org> home page times out as of 2020-01-21.
3741 (home-page "https://github.com/nboley/grit")
3742 (synopsis "Tool for integrative analysis of RNA-seq type assays")
3743 (description
3744 "GRIT is designed to use RNA-seq, TES, and TSS data to build and quantify
3745 full length transcript models. When none of these data sources are available,
3746 GRIT can be run by providing a candidate set of TES or TSS sites. In
3747 addition, GRIT can merge in reference junctions and gene boundaries. GRIT can
3748 also be run in quantification mode, where it uses a provided GTF file and just
3749 estimates transcript expression.")
3750 (license license:gpl3+)))
3751
3752 (define-public hisat
3753 (package
3754 (name "hisat")
3755 (version "0.1.4")
3756 (source (origin
3757 (method url-fetch)
3758 (uri (string-append
3759 "http://ccb.jhu.edu/software/hisat/downloads/hisat-"
3760 version "-beta-source.zip"))
3761 (sha256
3762 (base32
3763 "1k381ydranqxp09yf2y7w1d0chz5d59vb6jchi89hbb0prq19lk5"))))
3764 (build-system gnu-build-system)
3765 (arguments
3766 `(#:tests? #f ;no check target
3767 #:make-flags '("allall"
3768 ;; Disable unsupported `popcnt' instructions on
3769 ;; architectures other than x86_64
3770 ,@(if (string-prefix? "x86_64"
3771 (or (%current-target-system)
3772 (%current-system)))
3773 '()
3774 '("POPCNT_CAPABILITY=0")))
3775 #:phases
3776 (modify-phases %standard-phases
3777 (add-after 'unpack 'patch-sources
3778 (lambda _
3779 ;; XXX Cannot use snippet because zip files are not supported
3780 (substitute* "Makefile"
3781 (("^CC = .*$") "CC = gcc")
3782 (("^CPP = .*$") "CPP = g++")
3783 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
3784 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
3785 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
3786 (substitute* '("hisat-build" "hisat-inspect")
3787 (("/usr/bin/env") (which "env")))
3788 #t))
3789 (replace 'install
3790 (lambda* (#:key outputs #:allow-other-keys)
3791 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
3792 (for-each (lambda (file)
3793 (install-file file bin))
3794 (find-files
3795 "."
3796 "hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$")))
3797 #t))
3798 (delete 'configure))))
3799 (native-inputs
3800 `(("unzip" ,unzip)))
3801 (inputs
3802 `(("perl" ,perl)
3803 ("python" ,python)
3804 ("zlib" ,zlib)))
3805 ;; Non-portable SSE instructions are used so building fails on platforms
3806 ;; other than x86_64.
3807 (supported-systems '("x86_64-linux"))
3808 (home-page "https://ccb.jhu.edu/software/hisat/index.shtml")
3809 (synopsis "Hierarchical indexing for spliced alignment of transcripts")
3810 (description
3811 "HISAT is a fast and sensitive spliced alignment program for mapping
3812 RNA-seq reads. In addition to one global FM index that represents a whole
3813 genome, HISAT uses a large set of small FM indexes that collectively cover the
3814 whole genome. These small indexes (called local indexes) combined with
3815 several alignment strategies enable effective alignment of RNA-seq reads, in
3816 particular, reads spanning multiple exons.")
3817 (license license:gpl3+)))
3818
3819 (define-public hisat2
3820 (package
3821 (name "hisat2")
3822 (version "2.0.5")
3823 (source
3824 (origin
3825 (method url-fetch)
3826 (uri (string-append "ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2"
3827 "/downloads/hisat2-" version "-source.zip"))
3828 (sha256
3829 (base32
3830 "0lywnr8kijwsc2aw10dwxic0n0yvip6fl3rjlvc8zzwahamy4x7g"))))
3831 (build-system gnu-build-system)
3832 (arguments
3833 `(#:tests? #f ; no check target
3834 #:make-flags (list "CC=gcc" "CXX=g++" "allall")
3835 #:modules ((guix build gnu-build-system)
3836 (guix build utils)
3837 (srfi srfi-26))
3838 #:phases
3839 (modify-phases %standard-phases
3840 (add-after 'unpack 'make-deterministic
3841 (lambda _
3842 (substitute* "Makefile"
3843 (("`date`") "0"))
3844 #t))
3845 (delete 'configure)
3846 (replace 'install
3847 (lambda* (#:key outputs #:allow-other-keys)
3848 (let* ((out (assoc-ref outputs "out"))
3849 (bin (string-append out "/bin/"))
3850 (doc (string-append out "/share/doc/hisat2/")))
3851 (for-each
3852 (cut install-file <> bin)
3853 (find-files "."
3854 "hisat2(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))
3855 (mkdir-p doc)
3856 (install-file "doc/manual.inc.html" doc))
3857 #t)))))
3858 (native-inputs
3859 `(("unzip" ,unzip) ; needed for archive from ftp
3860 ("perl" ,perl)
3861 ("pandoc" ,pandoc))) ; for documentation
3862 (home-page "https://ccb.jhu.edu/software/hisat2/index.shtml")
3863 (synopsis "Graph-based alignment of genomic sequencing reads")
3864 (description "HISAT2 is a fast and sensitive alignment program for mapping
3865 next-generation sequencing reads (both DNA and RNA) to a population of human
3866 genomes (as well as to a single reference genome). In addition to using one
3867 global @dfn{graph FM} (GFM) index that represents a population of human
3868 genomes, HISAT2 uses a large set of small GFM indexes that collectively cover
3869 the whole genome. These small indexes, combined with several alignment
3870 strategies, enable rapid and accurate alignment of sequencing reads. This new
3871 indexing scheme is called a @dfn{Hierarchical Graph FM index} (HGFM).")
3872 ;; HISAT2 contains files from Bowtie2, which is released under
3873 ;; GPLv2 or later. The HISAT2 source files are released under
3874 ;; GPLv3 or later.
3875 (license license:gpl3+)))
3876
3877 (define-public hmmer
3878 (package
3879 (name "hmmer")
3880 (version "3.3.2")
3881 (source
3882 (origin
3883 (method url-fetch)
3884 (uri (string-append
3885 "http://eddylab.org/software/hmmer/hmmer-" version ".tar.gz"))
3886 (sha256
3887 (base32
3888 "0s9wf6n0qanbx8qs6igfl3vyjikwbrvh4d9d6mv54yp3xysykzlj"))))
3889 (build-system gnu-build-system)
3890 (native-inputs `(("perl" ,perl)
3891 ("python" ,python))) ; for tests
3892 (home-page "http://hmmer.org/")
3893 (synopsis "Biosequence analysis using profile hidden Markov models")
3894 (description
3895 "HMMER is used for searching sequence databases for homologs of protein
3896 sequences, and for making protein sequence alignments. It implements methods
3897 using probabilistic models called profile hidden Markov models (profile
3898 HMMs).")
3899 ;; hmmer uses non-portable SSE intrinsics so building fails on other
3900 ;; platforms.
3901 (supported-systems '("x86_64-linux" "i686-linux"))
3902 (license license:bsd-3)))
3903
3904 (define-public htseq
3905 (package
3906 (name "htseq")
3907 (version "0.9.1")
3908 (source (origin
3909 (method url-fetch)
3910 (uri (pypi-uri "HTSeq" version))
3911 (sha256
3912 (base32
3913 "11flgb1381xdhk43bzbfm3vhnszkpqg6jk76rpa5xd1zbrvvlnxg"))))
3914 (build-system python-build-system)
3915 (native-inputs
3916 `(("python-cython" ,python-cython)))
3917 ;; Numpy needs to be propagated when htseq is used as a Python library.
3918 (propagated-inputs
3919 `(("python-numpy" ,python-numpy)))
3920 (inputs
3921 `(("python-pysam" ,python-pysam)
3922 ("python-matplotlib" ,python-matplotlib)))
3923 (home-page "https://htseq.readthedocs.io/")
3924 (synopsis "Analysing high-throughput sequencing data with Python")
3925 (description
3926 "HTSeq is a Python package that provides infrastructure to process data
3927 from high-throughput sequencing assays.")
3928 (license license:gpl3+)))
3929
3930 (define-public python2-htseq
3931 (package-with-python2 htseq))
3932
3933 (define-public java-htsjdk
3934 (package
3935 (name "java-htsjdk")
3936 (version "2.3.0") ; last version without build dependency on gradle
3937 (source (origin
3938 (method git-fetch)
3939 (uri (git-reference
3940 (url "https://github.com/samtools/htsjdk")
3941 (commit version)))
3942 (file-name (git-file-name name version))
3943 (sha256
3944 (base32
3945 "1b178ixcabanm834ydjl3jiakpyxdmki32hqfv2abrzn3rcwa28i"))
3946 (modules '((guix build utils)))
3947 (snippet
3948 ;; Delete pre-built binaries
3949 '(begin
3950 (delete-file-recursively "lib")
3951 (mkdir-p "lib")
3952 #t))))
3953 (build-system ant-build-system)
3954 (arguments
3955 `(#:tests? #f ; test require Internet access
3956 #:jdk ,icedtea-8
3957 #:make-flags
3958 (list (string-append "-Ddist=" (assoc-ref %outputs "out")
3959 "/share/java/htsjdk/"))
3960 #:build-target "all"
3961 #:phases
3962 (modify-phases %standard-phases
3963 ;; The build phase also installs the jars
3964 (delete 'install))))
3965 (inputs
3966 `(("java-ngs" ,java-ngs)
3967 ("java-snappy-1" ,java-snappy-1)
3968 ("java-commons-compress" ,java-commons-compress)
3969 ("java-commons-logging-minimal" ,java-commons-logging-minimal)
3970 ("java-commons-jexl-2" ,java-commons-jexl-2)
3971 ("java-xz" ,java-xz)))
3972 (native-inputs
3973 `(("java-testng" ,java-testng)))
3974 (home-page "http://samtools.github.io/htsjdk/")
3975 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
3976 (description
3977 "HTSJDK is an implementation of a unified Java library for accessing
3978 common file formats, such as SAM and VCF, used for high-throughput
3979 sequencing (HTS) data. There are also an number of useful utilities for
3980 manipulating HTS data.")
3981 (license license:expat)))
3982
3983 (define-public java-htsjdk-latest
3984 (package
3985 (name "java-htsjdk")
3986 (version "2.14.3")
3987 (source (origin
3988 (method git-fetch)
3989 (uri (git-reference
3990 (url "https://github.com/samtools/htsjdk")
3991 (commit version)))
3992 (file-name (string-append name "-" version "-checkout"))
3993 (sha256
3994 (base32
3995 "1lmya1fdjy03mz6zmdmd86j9v9vfhqb3952mqq075navx1i6g4bc"))))
3996 (build-system ant-build-system)
3997 (arguments
3998 `(#:tests? #f ; test require Scala
3999 #:jdk ,icedtea-8
4000 #:jar-name "htsjdk.jar"
4001 #:phases
4002 (modify-phases %standard-phases
4003 (add-after 'unpack 'remove-useless-build.xml
4004 (lambda _ (delete-file "build.xml") #t))
4005 ;; The tests require the scalatest package.
4006 (add-after 'unpack 'remove-tests
4007 (lambda _ (delete-file-recursively "src/test") #t)))))
4008 (inputs
4009 `(("java-ngs" ,java-ngs)
4010 ("java-snappy-1" ,java-snappy-1)
4011 ("java-commons-compress" ,java-commons-compress)
4012 ("java-commons-logging-minimal" ,java-commons-logging-minimal)
4013 ("java-commons-jexl-2" ,java-commons-jexl-2)
4014 ("java-xz" ,java-xz)))
4015 (native-inputs
4016 `(("java-junit" ,java-junit)))
4017 (home-page "http://samtools.github.io/htsjdk/")
4018 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
4019 (description
4020 "HTSJDK is an implementation of a unified Java library for accessing
4021 common file formats, such as SAM and VCF, used for high-throughput
4022 sequencing (HTS) data. There are also an number of useful utilities for
4023 manipulating HTS data.")
4024 (license license:expat)))
4025
4026 ;; This is needed for picard 2.10.3
4027 (define-public java-htsjdk-2.10.1
4028 (package (inherit java-htsjdk-latest)
4029 (name "java-htsjdk")
4030 (version "2.10.1")
4031 (source (origin
4032 (method git-fetch)
4033 (uri (git-reference
4034 (url "https://github.com/samtools/htsjdk")
4035 (commit version)))
4036 (file-name (string-append name "-" version "-checkout"))
4037 (sha256
4038 (base32
4039 "1kxh7slm2pm3x9p6jxa1wqsq9a31dhiiflhxnxqcisan4k3rwia2"))))
4040 (build-system ant-build-system)
4041 (arguments
4042 `(#:tests? #f ; tests require Scala
4043 #:jdk ,icedtea-8
4044 #:jar-name "htsjdk.jar"
4045 #:phases
4046 (modify-phases %standard-phases
4047 (add-after 'unpack 'remove-useless-build.xml
4048 (lambda _ (delete-file "build.xml") #t))
4049 ;; The tests require the scalatest package.
4050 (add-after 'unpack 'remove-tests
4051 (lambda _ (delete-file-recursively "src/test") #t)))))))
4052
4053 ;; This version matches java-htsjdk 2.3.0. Later versions also require a more
4054 ;; recent version of java-htsjdk, which depends on gradle.
4055 (define-public java-picard
4056 (package
4057 (name "java-picard")
4058 (version "2.3.0")
4059 (source (origin
4060 (method git-fetch)
4061 (uri (git-reference
4062 (url "https://github.com/broadinstitute/picard")
4063 (commit version)))
4064 (file-name (string-append "java-picard-" version "-checkout"))
4065 (sha256
4066 (base32
4067 "1ll7mf4r3by92w2nhlmpa591xd1f46xlkwh59mq6fvbb5pdwzvx6"))
4068 (modules '((guix build utils)))
4069 (snippet
4070 '(begin
4071 ;; Delete pre-built binaries.
4072 (delete-file-recursively "lib")
4073 (mkdir-p "lib")
4074 (substitute* "build.xml"
4075 ;; Remove build-time dependency on git.
4076 (("failifexecutionfails=\"true\"")
4077 "failifexecutionfails=\"false\"")
4078 ;; Use our htsjdk.
4079 (("depends=\"compile-htsjdk, ")
4080 "depends=\"")
4081 (("depends=\"compile-htsjdk-tests, ")
4082 "depends=\"")
4083 ;; Build picard-lib.jar before building picard.jar
4084 (("name=\"picard-jar\" depends=\"" line)
4085 (string-append line "picard-lib-jar, ")))
4086 #t))))
4087 (build-system ant-build-system)
4088 (arguments
4089 `(#:build-target "picard-jar"
4090 #:test-target "test"
4091 ;; Tests require jacoco:coverage.
4092 #:tests? #f
4093 #:make-flags
4094 (list (string-append "-Dhtsjdk_lib_dir="
4095 (assoc-ref %build-inputs "java-htsjdk")
4096 "/share/java/htsjdk/")
4097 "-Dhtsjdk-classes=dist/tmp"
4098 (string-append "-Dhtsjdk-version="
4099 ,(package-version java-htsjdk)))
4100 #:jdk ,icedtea-8
4101 #:phases
4102 (modify-phases %standard-phases
4103 ;; FIXME: this phase fails with "duplicate entry: htsjdk/samtools/AbstractBAMFileIndex$1.class"
4104 (delete 'generate-jar-indices)
4105 (add-after 'unpack 'use-our-htsjdk
4106 (lambda* (#:key inputs #:allow-other-keys)
4107 (substitute* "build.xml"
4108 (("\\$\\{htsjdk\\}/lib")
4109 (string-append (assoc-ref inputs "java-htsjdk")
4110 "/share/java/htsjdk/")))
4111 #t))
4112 (add-after 'unpack 'make-test-target-independent
4113 (lambda* (#:key inputs #:allow-other-keys)
4114 (substitute* "build.xml"
4115 (("name=\"test\" depends=\"compile, ")
4116 "name=\"test\" depends=\""))
4117 #t))
4118 (replace 'install (install-jars "dist")))))
4119 (inputs
4120 `(("java-htsjdk" ,java-htsjdk)
4121 ("java-guava" ,java-guava)))
4122 (native-inputs
4123 `(("java-testng" ,java-testng)))
4124 (home-page "http://broadinstitute.github.io/picard/")
4125 (synopsis "Tools for manipulating high-throughput sequencing data and formats")
4126 (description "Picard is a set of Java command line tools for manipulating
4127 high-throughput sequencing (HTS) data and formats. Picard is implemented
4128 using the HTSJDK Java library to support accessing file formats that are
4129 commonly used for high-throughput sequencing data such as SAM, BAM, CRAM and
4130 VCF.")
4131 (license license:expat)))
4132
4133 ;; This is needed for dropseq-tools
4134 (define-public java-picard-2.10.3
4135 (package
4136 (name "java-picard")
4137 (version "2.10.3")
4138 (source (origin
4139 (method git-fetch)
4140 (uri (git-reference
4141 (url "https://github.com/broadinstitute/picard")
4142 (commit version)))
4143 (file-name (string-append "java-picard-" version "-checkout"))
4144 (sha256
4145 (base32
4146 "1ajlx31l6i1k3y2rhnmgq07sz99g2czqfqgkr9mihmdjp3gwjhvi"))))
4147 (build-system ant-build-system)
4148 (arguments
4149 `(#:jar-name "picard.jar"
4150 ;; Tests require jacoco:coverage.
4151 #:tests? #f
4152 #:jdk ,icedtea-8
4153 #:main-class "picard.cmdline.PicardCommandLine"
4154 #:modules ((guix build ant-build-system)
4155 (guix build utils)
4156 (guix build java-utils)
4157 (sxml simple)
4158 (sxml transform)
4159 (sxml xpath))
4160 #:phases
4161 (modify-phases %standard-phases
4162 ;; FIXME: this phase fails with "duplicate entry: htsjdk/samtools/AbstractBAMFileIndex$1.class"
4163 (delete 'generate-jar-indices)
4164 (add-after 'unpack 'remove-useless-build.xml
4165 (lambda _ (delete-file "build.xml") #t))
4166 ;; This is necessary to ensure that htsjdk is found when using
4167 ;; picard.jar as an executable.
4168 (add-before 'build 'edit-classpath-in-manifest
4169 (lambda* (#:key inputs #:allow-other-keys)
4170 (chmod "build.xml" #o664)
4171 (call-with-output-file "build.xml.new"
4172 (lambda (port)
4173 (sxml->xml
4174 (pre-post-order
4175 (with-input-from-file "build.xml"
4176 (lambda _ (xml->sxml #:trim-whitespace? #t)))
4177 `((target . ,(lambda (tag . kids)
4178 (let ((name ((sxpath '(name *text*))
4179 (car kids)))
4180 ;; FIXME: We're breaking the line
4181 ;; early with a dummy path to
4182 ;; ensure that the store reference
4183 ;; isn't broken apart and can still
4184 ;; be found by the reference
4185 ;; scanner.
4186 (msg (format #f
4187 "\
4188 Class-Path: /~a \
4189 ~a/share/java/htsjdk.jar${line.separator}${line.separator}"
4190 ;; maximum line length is 70
4191 (string-tabulate (const #\b) 57)
4192 (assoc-ref inputs "java-htsjdk"))))
4193 (if (member "manifest" name)
4194 `(,tag ,@kids
4195 (replaceregexp
4196 (@ (file "${manifest.file}")
4197 (match "\\r\\n\\r\\n")
4198 (replace "${line.separator}")))
4199 (echo
4200 (@ (message ,msg)
4201 (file "${manifest.file}")
4202 (append "true"))))
4203 `(,tag ,@kids)))))
4204 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
4205 (*text* . ,(lambda (_ txt) txt))))
4206 port)))
4207 (rename-file "build.xml.new" "build.xml")
4208 #t)))))
4209 (propagated-inputs
4210 `(("java-htsjdk" ,java-htsjdk-2.10.1)))
4211 (native-inputs
4212 `(("java-testng" ,java-testng)
4213 ("java-guava" ,java-guava)))
4214 (home-page "http://broadinstitute.github.io/picard/")
4215 (synopsis "Tools for manipulating high-throughput sequencing data and formats")
4216 (description "Picard is a set of Java command line tools for manipulating
4217 high-throughput sequencing (HTS) data and formats. Picard is implemented
4218 using the HTSJDK Java library to support accessing file formats that are
4219 commonly used for high-throughput sequencing data such as SAM, BAM, CRAM and
4220 VCF.")
4221 (license license:expat)))
4222
4223 ;; This is the last version of Picard to provide net.sf.samtools
4224 (define-public java-picard-1.113
4225 (package (inherit java-picard)
4226 (name "java-picard")
4227 (version "1.113")
4228 (source (origin
4229 (method git-fetch)
4230 (uri (git-reference
4231 (url "https://github.com/broadinstitute/picard")
4232 (commit version)))
4233 (file-name (string-append "java-picard-" version "-checkout"))
4234 (sha256
4235 (base32
4236 "0lkpvin2fz3hhly4l02kk56fqy8lmlgyzr9kmvljk6ry6l1hw973"))
4237 (modules '((guix build utils)))
4238 (snippet
4239 '(begin
4240 ;; Delete pre-built binaries.
4241 (delete-file-recursively "lib")
4242 (mkdir-p "lib")
4243 #t))))
4244 (build-system ant-build-system)
4245 (arguments
4246 `(#:build-target "picard-jar"
4247 #:test-target "test"
4248 ;; FIXME: the class path at test time is wrong.
4249 ;; [testng] Error: A JNI error has occurred, please check your installation and try again
4250 ;; [testng] Exception in thread "main" java.lang.NoClassDefFoundError: com/beust/jcommander/ParameterException
4251 #:tests? #f
4252 #:jdk ,icedtea-8
4253 #:ant ,ant/java8
4254 ;; This is only used for tests.
4255 #:make-flags
4256 (list "-Dsamjdk.intel_deflater_so_path=lib/jni/libIntelDeflater.so")
4257 #:phases
4258 (modify-phases %standard-phases
4259 ;; FIXME: This phase fails.
4260 (delete 'generate-jar-indices)
4261 ;; Do not use bundled ant bzip2.
4262 (add-after 'unpack 'use-ant-bzip
4263 (lambda* (#:key inputs #:allow-other-keys)
4264 (substitute* "build.xml"
4265 (("\\$\\{lib\\}/apache-ant-1.8.2-bzip2.jar")
4266 (string-append (assoc-ref inputs "ant")
4267 "/lib/ant.jar")))
4268 #t))
4269 (add-after 'unpack 'make-test-target-independent
4270 (lambda* (#:key inputs #:allow-other-keys)
4271 (substitute* "build.xml"
4272 (("name=\"test\" depends=\"compile, ")
4273 "name=\"test\" depends=\"compile-tests, ")
4274 (("name=\"compile\" depends=\"compile-src, compile-tests\"")
4275 "name=\"compile\" depends=\"compile-src\""))
4276 #t))
4277 (add-after 'unpack 'fix-deflater-path
4278 (lambda* (#:key outputs #:allow-other-keys)
4279 (substitute* "src/java/net/sf/samtools/Defaults.java"
4280 (("getStringProperty\\(\"intel_deflater_so_path\", null\\)")
4281 (string-append "getStringProperty(\"intel_deflater_so_path\", \""
4282 (assoc-ref outputs "out")
4283 "/lib/jni/libIntelDeflater.so"
4284 "\")")))
4285 #t))
4286 ;; Build the deflater library, because we've previously deleted the
4287 ;; pre-built one. This can only be built with access to the JDK
4288 ;; sources.
4289 (add-after 'build 'build-jni
4290 (lambda* (#:key inputs #:allow-other-keys)
4291 (mkdir-p "lib/jni")
4292 (mkdir-p "jdk-src")
4293 (invoke "tar" "--strip-components=1" "-C" "jdk-src"
4294 "-xf" (assoc-ref inputs "jdk-src"))
4295 (invoke "javah" "-jni"
4296 "-classpath" "classes"
4297 "-d" "lib/"
4298 "net.sf.samtools.util.zip.IntelDeflater")
4299 (with-directory-excursion "src/c/inteldeflater"
4300 (invoke "gcc" "-I../../../lib" "-I."
4301 (string-append "-I" (assoc-ref inputs "jdk")
4302 "/include/linux")
4303 "-I../../../jdk-src/src/share/native/common/"
4304 "-I../../../jdk-src/src/solaris/native/common/"
4305 "-c" "-O3" "-fPIC" "IntelDeflater.c")
4306 (invoke "gcc" "-shared"
4307 "-o" "../../../lib/jni/libIntelDeflater.so"
4308 "IntelDeflater.o" "-lz" "-lstdc++"))
4309 #t))
4310 ;; We can only build everything else after building the JNI library.
4311 (add-after 'build-jni 'build-rest
4312 (lambda* (#:key make-flags #:allow-other-keys)
4313 (apply invoke `("ant" "all" ,@make-flags))
4314 #t))
4315 (add-before 'build 'set-JAVA6_HOME
4316 (lambda _
4317 (setenv "JAVA6_HOME" (getenv "JAVA_HOME"))
4318 #t))
4319 (replace 'install (install-jars "dist"))
4320 (add-after 'install 'install-jni-lib
4321 (lambda* (#:key outputs #:allow-other-keys)
4322 (let ((jni (string-append (assoc-ref outputs "out")
4323 "/lib/jni")))
4324 (mkdir-p jni)
4325 (install-file "lib/jni/libIntelDeflater.so" jni)
4326 #t))))))
4327 (inputs
4328 `(("java-snappy-1" ,java-snappy-1)
4329 ("java-commons-jexl-2" ,java-commons-jexl-2)
4330 ("java-cofoja" ,java-cofoja)
4331 ("ant" ,ant/java8) ; for bzip2 support at runtime
4332 ("zlib" ,zlib)))
4333 (native-inputs
4334 `(("ant-apache-bcel" ,ant-apache-bcel)
4335 ("ant-junit" ,ant-junit)
4336 ("java-testng" ,java-testng)
4337 ("java-commons-bcel" ,java-commons-bcel)
4338 ("java-jcommander" ,java-jcommander)
4339 ("jdk" ,icedtea-8 "jdk")
4340 ("jdk-src" ,(car (assoc-ref (package-native-inputs icedtea-8) "jdk-drop")))))))
4341
4342 (define-public fastqc
4343 (package
4344 (name "fastqc")
4345 (version "0.11.5")
4346 (source
4347 (origin
4348 (method url-fetch)
4349 (uri (string-append "http://www.bioinformatics.babraham.ac.uk/"
4350 "projects/fastqc/fastqc_v"
4351 version "_source.zip"))
4352 (sha256
4353 (base32
4354 "18rrlkhcrxvvvlapch4dpj6xc6mpayzys8qfppybi8jrpgx5cc5f"))))
4355 (build-system ant-build-system)
4356 (arguments
4357 `(#:tests? #f ; there are no tests
4358 #:build-target "build"
4359 #:phases
4360 (modify-phases %standard-phases
4361 (add-after 'unpack 'fix-dependencies
4362 (lambda* (#:key inputs #:allow-other-keys)
4363 (substitute* "build.xml"
4364 (("jbzip2-0.9.jar")
4365 (string-append (assoc-ref inputs "java-jbzip2")
4366 "/share/java/jbzip2.jar"))
4367 (("sam-1.103.jar")
4368 (string-append (assoc-ref inputs "java-picard-1.113")
4369 "/share/java/sam-1.112.jar"))
4370 (("cisd-jhdf5.jar")
4371 (string-append (assoc-ref inputs "java-cisd-jhdf5")
4372 "/share/java/sis-jhdf5.jar")))
4373 #t))
4374 ;; There is no installation target
4375 (replace 'install
4376 (lambda* (#:key inputs outputs #:allow-other-keys)
4377 (let* ((out (assoc-ref outputs "out"))
4378 (bin (string-append out "/bin"))
4379 (share (string-append out "/share/fastqc/"))
4380 (exe (string-append share "/fastqc")))
4381 (for-each mkdir-p (list bin share))
4382 (copy-recursively "bin" share)
4383 (substitute* exe
4384 (("my \\$java_bin = 'java';")
4385 (string-append "my $java_bin = '"
4386 (assoc-ref inputs "java")
4387 "/bin/java';")))
4388 (chmod exe #o555)
4389 (symlink exe (string-append bin "/fastqc"))
4390 #t))))))
4391 (inputs
4392 `(("java" ,icedtea)
4393 ("perl" ,perl) ; needed for the wrapper script
4394 ("java-cisd-jhdf5" ,java-cisd-jhdf5)
4395 ("java-picard-1.113" ,java-picard-1.113)
4396 ("java-jbzip2" ,java-jbzip2)))
4397 (native-inputs
4398 `(("unzip" ,unzip)))
4399 (home-page "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/")
4400 (synopsis "Quality control tool for high throughput sequence data")
4401 (description
4402 "FastQC aims to provide a simple way to do some quality control
4403 checks on raw sequence data coming from high throughput sequencing
4404 pipelines. It provides a modular set of analyses which you can use to
4405 give a quick impression of whether your data has any problems of which
4406 you should be aware before doing any further analysis.
4407
4408 The main functions of FastQC are:
4409
4410 @itemize
4411 @item Import of data from BAM, SAM or FastQ files (any variant);
4412 @item Providing a quick overview to tell you in which areas there may
4413 be problems;
4414 @item Summary graphs and tables to quickly assess your data;
4415 @item Export of results to an HTML based permanent report;
4416 @item Offline operation to allow automated generation of reports
4417 without running the interactive application.
4418 @end itemize\n")
4419 (license license:gpl3+)))
4420
4421 (define-public fastp
4422 (package
4423 (name "fastp")
4424 (version "0.14.1")
4425 (source
4426 (origin
4427 (method git-fetch)
4428 (uri (git-reference
4429 (url "https://github.com/OpenGene/fastp")
4430 (commit (string-append "v" version))))
4431 (file-name (git-file-name name version))
4432 (sha256
4433 (base32
4434 "1r6ms5zbf5rps4rgp4z73nczadl00b5rqylw8f684isfz27dp0xh"))))
4435 (build-system gnu-build-system)
4436 (arguments
4437 `(#:tests? #f ; there are none
4438 #:make-flags
4439 (list (string-append "BINDIR=" (assoc-ref %outputs "out") "/bin"))
4440 #:phases
4441 (modify-phases %standard-phases
4442 (delete 'configure)
4443 (add-before 'install 'create-target-dir
4444 (lambda* (#:key outputs #:allow-other-keys)
4445 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
4446 #t)))))
4447 (inputs
4448 `(("zlib" ,zlib)))
4449 (home-page "https://github.com/OpenGene/fastp/")
4450 (synopsis "All-in-one FastQ preprocessor")
4451 (description
4452 "Fastp is a tool designed to provide fast all-in-one preprocessing for
4453 FastQ files. This tool has multi-threading support to afford high
4454 performance.")
4455 (license license:expat)))
4456
4457 (define-public htslib
4458 (package
4459 (name "htslib")
4460 (version "1.11")
4461 (source (origin
4462 (method url-fetch)
4463 (uri (string-append
4464 "https://github.com/samtools/htslib/releases/download/"
4465 version "/htslib-" version ".tar.bz2"))
4466 (sha256
4467 (base32
4468 "1mrq4mihzx37yqhj3sfz6da6mw49niia808bzsw2gkkgmadxvyng"))))
4469 (build-system gnu-build-system)
4470 ;; Let htslib translate "gs://" and "s3://" to regular https links with
4471 ;; "--enable-gcs" and "--enable-s3". For these options to work, we also
4472 ;; need to set "--enable-libcurl".
4473 (arguments
4474 `(#:configure-flags '("--enable-gcs"
4475 "--enable-libcurl"
4476 "--enable-s3")))
4477 (inputs
4478 `(("curl" ,curl)
4479 ("openssl" ,openssl)))
4480 ;; This is referred to in the pkg-config file as a required library.
4481 (propagated-inputs
4482 `(("zlib" ,zlib)))
4483 (native-inputs
4484 `(("perl" ,perl)))
4485 (home-page "https://www.htslib.org")
4486 (synopsis "C library for reading/writing high-throughput sequencing data")
4487 (description
4488 "HTSlib is a C library for reading/writing high-throughput sequencing
4489 data. It also provides the @command{bgzip}, @command{htsfile}, and
4490 @command{tabix} utilities.")
4491 ;; Files under cram/ are released under the modified BSD license;
4492 ;; the rest is released under the Expat license
4493 (license (list license:expat license:bsd-3))))
4494
4495 (define-public htslib-1.9
4496 (package (inherit htslib)
4497 (name "htslib")
4498 (version "1.9")
4499 (source (origin
4500 (method url-fetch)
4501 (uri (string-append
4502 "https://github.com/samtools/htslib/releases/download/"
4503 version "/htslib-" version ".tar.bz2"))
4504 (sha256
4505 (base32
4506 "16ljv43sc3fxmv63w7b2ff8m1s7h89xhazwmbm1bicz8axq8fjz0"))))))
4507
4508 ;; This package should be removed once no packages rely upon it.
4509 (define htslib-1.3
4510 (package
4511 (inherit htslib)
4512 (version "1.3.1")
4513 (source (origin
4514 (method url-fetch)
4515 (uri (string-append
4516 "https://github.com/samtools/htslib/releases/download/"
4517 version "/htslib-" version ".tar.bz2"))
4518 (sha256
4519 (base32
4520 "1rja282fwdc25ql6izkhdyh8ppw8x2fs0w0js78zgkmqjlikmma9"))))))
4521
4522 (define-public idr
4523 (package
4524 (name "idr")
4525 (version "2.0.3")
4526 (source (origin
4527 (method git-fetch)
4528 (uri (git-reference
4529 (url "https://github.com/nboley/idr")
4530 (commit version)))
4531 (file-name (git-file-name name version))
4532 (sha256
4533 (base32
4534 "04j876h6z444v2q79drxx283d3k5snd72kj895wbalnl42206x9g"))
4535 ;; Delete generated C code.
4536 (snippet
4537 '(begin (delete-file "idr/inv_cdf.c") #t))))
4538 (build-system python-build-system)
4539 ;; There is only one test ("test_inv_cdf.py") and it tests features that
4540 ;; are no longer part of this package. It also asserts False, which
4541 ;; causes the tests to always fail.
4542 (arguments `(#:tests? #f))
4543 (propagated-inputs
4544 `(("python-scipy" ,python-scipy)
4545 ("python-sympy" ,python-sympy)
4546 ("python-numpy" ,python-numpy)
4547 ("python-matplotlib" ,python-matplotlib)))
4548 (native-inputs
4549 `(("python-cython" ,python-cython)))
4550 (home-page "https://github.com/nboley/idr")
4551 (synopsis "Tool to measure the irreproducible discovery rate (IDR)")
4552 (description
4553 "The IDR (Irreproducible Discovery Rate) framework is a unified approach
4554 to measure the reproducibility of findings identified from replicate
4555 experiments and provide highly stable thresholds based on reproducibility.")
4556 (license license:gpl2+)))
4557
4558 (define-public jellyfish
4559 (package
4560 (name "jellyfish")
4561 (version "2.2.10")
4562 (source (origin
4563 (method url-fetch)
4564 (uri (string-append "https://github.com/gmarcais/Jellyfish/"
4565 "releases/download/v" version
4566 "/jellyfish-" version ".tar.gz"))
4567 (sha256
4568 (base32
4569 "1k4pc3fvv6w1km2yph4m5sd78fbxp21d6xyzgmy0gjihzc6mb249"))))
4570 (build-system gnu-build-system)
4571 (outputs '("out" ;for library
4572 "ruby" ;for Ruby bindings
4573 "python")) ;for Python bindings
4574 (arguments
4575 `(#:configure-flags
4576 (list (string-append "--enable-ruby-binding="
4577 (assoc-ref %outputs "ruby"))
4578 (string-append "--enable-python-binding="
4579 (assoc-ref %outputs "python")))
4580 #:phases
4581 (modify-phases %standard-phases
4582 (add-before 'check 'set-SHELL-variable
4583 (lambda _
4584 ;; generator_manager.hpp either uses /bin/sh or $SHELL
4585 ;; to run tests.
4586 (setenv "SHELL" (which "bash"))
4587 #t)))))
4588 (native-inputs
4589 `(("bc" ,bc)
4590 ("time" ,time)
4591 ("ruby" ,ruby)
4592 ("python" ,python-2)
4593 ("pkg-config" ,pkg-config)))
4594 (inputs
4595 `(("htslib" ,htslib)))
4596 (synopsis "Tool for fast counting of k-mers in DNA")
4597 (description
4598 "Jellyfish is a tool for fast, memory-efficient counting of k-mers in
4599 DNA. A k-mer is a substring of length k, and counting the occurrences of all
4600 such substrings is a central step in many analyses of DNA sequence. Jellyfish
4601 is a command-line program that reads FASTA and multi-FASTA files containing
4602 DNA sequences. It outputs its k-mer counts in a binary format, which can be
4603 translated into a human-readable text format using the @code{jellyfish dump}
4604 command, or queried for specific k-mers with @code{jellyfish query}.")
4605 (home-page "http://www.genome.umd.edu/jellyfish.html")
4606 ;; JELLYFISH seems to be 64-bit only.
4607 (supported-systems '("x86_64-linux" "aarch64-linux" "mips64el-linux"))
4608 ;; The combined work is published under the GPLv3 or later. Individual
4609 ;; files such as lib/jsoncpp.cpp are released under the Expat license.
4610 (license (list license:gpl3+ license:expat))))
4611
4612 (define-public khmer
4613 (package
4614 (name "khmer")
4615 (version "3.0.0a3")
4616 (source
4617 (origin
4618 (method git-fetch)
4619 (uri (git-reference
4620 (url "https://github.com/dib-lab/khmer")
4621 (commit (string-append "v" version))))
4622 (file-name (git-file-name name version))
4623 (sha256
4624 (base32
4625 "01l4jczglkl7yfhgvzx8j0df7k54bk1r8sli9ll16i1mis0d8f37"))
4626 (modules '((guix build utils)))
4627 (snippet
4628 '(begin
4629 ;; Delete bundled libraries. We do not replace the bundled seqan
4630 ;; as it is a modified subset of the old version 1.4.1.
4631 ;;
4632 ;; We do not replace the bundled MurmurHash as the canonical
4633 ;; repository for this code 'SMHasher' is unsuitable for providing
4634 ;; a library. See
4635 ;; https://lists.gnu.org/archive/html/guix-devel/2016-06/msg00977.html
4636 (delete-file-recursively "third-party/zlib")
4637 (delete-file-recursively "third-party/bzip2")
4638 (delete-file-recursively "third-party/seqan")
4639 (substitute* "setup.cfg"
4640 (("# libraries = z,bz2")
4641 "libraries = z,bz2")
4642 (("include:third-party/zlib:third-party/bzip2")
4643 "include:"))
4644 #t))))
4645 (build-system python-build-system)
4646 (arguments
4647 `(#:phases
4648 (modify-phases %standard-phases
4649 (add-after 'unpack 'set-cc
4650 (lambda _ (setenv "CC" "gcc") #t))
4651
4652 (add-before 'reset-gzip-timestamps 'make-files-writable
4653 (lambda* (#:key outputs #:allow-other-keys)
4654 ;; Make sure .gz files are writable so that the
4655 ;; 'reset-gzip-timestamps' phase can do its work.
4656 (let ((out (assoc-ref outputs "out")))
4657 (for-each make-file-writable
4658 (find-files out "\\.gz$"))
4659 #t))))))
4660 (native-inputs
4661 `(("python-cython" ,python-cython)
4662 ("python-pytest" ,python-pytest)
4663 ("python-pytest-runner" ,python-pytest-runner)))
4664 (inputs
4665 `(("zlib" ,zlib)
4666 ("bzip2" ,bzip2)
4667 ("seqan" ,seqan-1)
4668 ("python-screed" ,python-screed)
4669 ("python-bz2file" ,python-bz2file)))
4670 (home-page "https://khmer.readthedocs.org/")
4671 (synopsis "K-mer counting, filtering and graph traversal library")
4672 (description "The khmer software is a set of command-line tools for
4673 working with DNA shotgun sequencing data from genomes, transcriptomes,
4674 metagenomes and single cells. Khmer can make de novo assemblies faster, and
4675 sometimes better. Khmer can also identify and fix problems with shotgun
4676 data.")
4677 ;; When building on i686, armhf and mips64el, we get the following error:
4678 ;; error: ['khmer', 'khmer.tests', 'oxli'] require 64-bit operating system
4679 (supported-systems '("x86_64-linux" "aarch64-linux"))
4680 (license license:bsd-3)))
4681
4682 (define-public kaiju
4683 (package
4684 (name "kaiju")
4685 (version "1.6.3")
4686 (source (origin
4687 (method git-fetch)
4688 (uri (git-reference
4689 (url "https://github.com/bioinformatics-centre/kaiju")
4690 (commit (string-append "v" version))))
4691 (file-name (git-file-name name version))
4692 (sha256
4693 (base32
4694 "119pzi0ddzv9mjg4wwa6han0cwr3k3ssn7kirvsjfcq05mi5ka0x"))))
4695 (build-system gnu-build-system)
4696 (arguments
4697 `(#:tests? #f ; There are no tests.
4698 #:phases
4699 (modify-phases %standard-phases
4700 (delete 'configure)
4701 (add-before 'build 'move-to-src-dir
4702 (lambda _ (chdir "src") #t))
4703 (replace 'install
4704 (lambda* (#:key inputs outputs #:allow-other-keys)
4705 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
4706 (mkdir-p bin)
4707 (chdir "..")
4708 (copy-recursively "bin" bin))
4709 #t)))))
4710 (inputs
4711 `(("perl" ,perl)
4712 ("zlib" ,zlib)))
4713 (home-page "http://kaiju.binf.ku.dk/")
4714 (synopsis "Fast and sensitive taxonomic classification for metagenomics")
4715 (description "Kaiju is a program for sensitive taxonomic classification
4716 of high-throughput sequencing reads from metagenomic whole genome sequencing
4717 experiments.")
4718 (license license:gpl3+)))
4719
4720 (define-public macs
4721 (package
4722 (name "macs")
4723 (version "2.2.6")
4724 (source (origin
4725 ;; The PyPi tarball does not contain tests.
4726 (method git-fetch)
4727 (uri (git-reference
4728 (url "https://github.com/macs3-project/MACS")
4729 (commit (string-append "v" version))))
4730 (file-name (git-file-name name version))
4731 (sha256
4732 (base32
4733 "1c5gxr0mk6hkd4vclf0k00wvyvzw2vrmk52c85338p7aqjwg6n15"))
4734 (modules '((guix build utils)))
4735 ;; Remove files generated by Cython
4736 (snippet
4737 '(begin
4738 (for-each (lambda (file)
4739 (let ((generated-file
4740 (string-append (string-drop-right file 3) "c")))
4741 (when (file-exists? generated-file)
4742 (delete-file generated-file))))
4743 (find-files "." "\\.pyx$"))
4744 (delete-file "MACS2/IO/CallPeakUnitPrecompiled.c")
4745 #t))))
4746 (build-system python-build-system)
4747 (arguments
4748 `(#:phases
4749 (modify-phases %standard-phases
4750 (replace 'check
4751 (lambda* (#:key tests? inputs outputs #:allow-other-keys)
4752 (when tests?
4753 (add-installed-pythonpath inputs outputs)
4754 (invoke "pytest" "-v"))
4755 #t)))))
4756 (inputs
4757 `(("python-numpy" ,python-numpy)))
4758 (native-inputs
4759 `(("python-cython" ,python-cython)
4760 ("python-pytest" ,python-pytest)))
4761 (home-page "https://github.com/macs3-project/MACS")
4762 (synopsis "Model based analysis for ChIP-Seq data")
4763 (description
4764 "MACS is an implementation of a ChIP-Seq analysis algorithm for
4765 identifying transcript factor binding sites named Model-based Analysis of
4766 ChIP-Seq (MACS). MACS captures the influence of genome complexity to evaluate
4767 the significance of enriched ChIP regions and it improves the spatial
4768 resolution of binding sites through combining the information of both
4769 sequencing tag position and orientation.")
4770 (license license:bsd-3)))
4771
4772 (define-public mafft
4773 (package
4774 (name "mafft")
4775 (version "7.475")
4776 (source (origin
4777 (method url-fetch)
4778 (uri (string-append
4779 "https://mafft.cbrc.jp/alignment/software/mafft-" version
4780 "-without-extensions-src.tgz"))
4781 (file-name (string-append name "-" version ".tgz"))
4782 (sha256
4783 (base32
4784 "0i2i2m3blh2xkbkdk48hxfssks30ny0v381gdl7zwhcvp0axs26r"))))
4785 (build-system gnu-build-system)
4786 (arguments
4787 `(#:tests? #f ; no automated tests, though there are tests in the read me
4788 #:make-flags (let ((out (assoc-ref %outputs "out")))
4789 (list (string-append "PREFIX=" out)
4790 (string-append "BINDIR="
4791 (string-append out "/bin"))))
4792 #:phases
4793 (modify-phases %standard-phases
4794 (add-after 'unpack 'enter-dir
4795 (lambda _ (chdir "core") #t))
4796 (add-after 'enter-dir 'patch-makefile
4797 (lambda _
4798 ;; on advice from the MAFFT authors, there is no need to
4799 ;; distribute mafft-profile, mafft-distance, or
4800 ;; mafft-homologs.rb as they are too "specialised".
4801 (substitute* "Makefile"
4802 ;; remove mafft-homologs.rb from SCRIPTS
4803 (("^SCRIPTS = mafft mafft-homologs.rb")
4804 "SCRIPTS = mafft")
4805 ;; remove mafft-homologs from MANPAGES
4806 (("^MANPAGES = mafft.1 mafft-homologs.1")
4807 "MANPAGES = mafft.1")
4808 ;; remove mafft-distance from PROGS
4809 (("^PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance")
4810 "PROGS = dvtditr dndfast7 dndblast sextet5")
4811 ;; remove mafft-profile from PROGS
4812 (("splittbfast disttbfast tbfast mafft-profile 2cl mccaskillwrap")
4813 "splittbfast disttbfast tbfast f2cl mccaskillwrap")
4814 (("^rm -f mafft-profile mafft-profile.exe") "#")
4815 (("^rm -f mafft-distance mafft-distance.exe") ")#")
4816 ;; do not install MAN pages in libexec folder
4817 (("^\t\\$\\(INSTALL\\) -m 644 \\$\\(MANPAGES\\) \
4818 \\$\\(DESTDIR\\)\\$\\(LIBDIR\\)") "#"))
4819 #t))
4820 (add-after 'enter-dir 'patch-paths
4821 (lambda* (#:key inputs #:allow-other-keys)
4822 (substitute* '("pairash.c"
4823 "mafft.tmpl")
4824 (("perl") (which "perl"))
4825 (("([\"`| ])awk" _ prefix)
4826 (string-append prefix (which "awk")))
4827 (("grep") (which "grep")))
4828 #t))
4829 (delete 'configure)
4830 (add-after 'install 'wrap-programs
4831 (lambda* (#:key outputs #:allow-other-keys)
4832 (let* ((out (assoc-ref outputs "out"))
4833 (bin (string-append out "/bin"))
4834 (path (string-append
4835 (assoc-ref %build-inputs "coreutils") "/bin:")))
4836 (for-each (lambda (file)
4837 (wrap-program file
4838 `("PATH" ":" prefix (,path))))
4839 (find-files bin)))
4840 #t)))))
4841 (inputs
4842 `(("perl" ,perl)
4843 ("ruby" ,ruby)
4844 ("gawk" ,gawk)
4845 ("grep" ,grep)
4846 ("coreutils" ,coreutils)))
4847 (home-page "https://mafft.cbrc.jp/alignment/software/")
4848 (synopsis "Multiple sequence alignment program")
4849 (description
4850 "MAFFT offers a range of multiple alignment methods for nucleotide and
4851 protein sequences. For instance, it offers L-INS-i (accurate; for alignment
4852 of <~200 sequences) and FFT-NS-2 (fast; for alignment of <~30,000
4853 sequences).")
4854 (license (license:non-copyleft
4855 "https://mafft.cbrc.jp/alignment/software/license.txt"
4856 "BSD-3 with different formatting"))))
4857
4858 (define-public mash
4859 (package
4860 (name "mash")
4861 (version "2.1")
4862 (source (origin
4863 (method git-fetch)
4864 (uri (git-reference
4865 (url "https://github.com/marbl/mash")
4866 (commit (string-append "v" version))))
4867 (file-name (git-file-name name version))
4868 (sha256
4869 (base32
4870 "049hwcc059p2fd9vwndn63laifvvsi0wmv84i6y1fr79k15dxwy6"))
4871 (modules '((guix build utils)))
4872 (snippet
4873 '(begin
4874 ;; Delete bundled kseq.
4875 ;; TODO: Also delete bundled murmurhash and open bloom filter.
4876 (delete-file "src/mash/kseq.h")
4877 #t))))
4878 (build-system gnu-build-system)
4879 (arguments
4880 `(#:tests? #f ; No tests.
4881 #:configure-flags
4882 (list
4883 (string-append "--with-capnp=" (assoc-ref %build-inputs "capnproto"))
4884 (string-append "--with-gsl=" (assoc-ref %build-inputs "gsl")))
4885 #:make-flags (list "CC=gcc")
4886 #:phases
4887 (modify-phases %standard-phases
4888 (add-after 'unpack 'fix-includes
4889 (lambda _
4890 (substitute* '("src/mash/Sketch.cpp"
4891 "src/mash/CommandFind.cpp"
4892 "src/mash/CommandScreen.cpp")
4893 (("^#include \"kseq\\.h\"")
4894 "#include \"htslib/kseq.h\""))
4895 #t))
4896 (add-after 'fix-includes 'use-c++14
4897 (lambda _
4898 ;; capnproto 0.7 requires c++14 to build
4899 (substitute* "configure.ac"
4900 (("c\\+\\+11") "c++14"))
4901 (substitute* "Makefile.in"
4902 (("c\\+\\+11") "c++14"))
4903 #t)))))
4904 (native-inputs
4905 `(("autoconf" ,autoconf)
4906 ;; Capnproto and htslib are statically embedded in the final
4907 ;; application. Therefore we also list their licenses, below.
4908 ("capnproto" ,capnproto)
4909 ("htslib" ,htslib)))
4910 (inputs
4911 `(("gsl" ,gsl)
4912 ("zlib" ,zlib)))
4913 (supported-systems '("x86_64-linux"))
4914 (home-page "https://mash.readthedocs.io")
4915 (synopsis "Fast genome and metagenome distance estimation using MinHash")
4916 (description "Mash is a fast sequence distance estimator that uses the
4917 MinHash algorithm and is designed to work with genomes and metagenomes in the
4918 form of assemblies or reads.")
4919 (license (list license:bsd-3 ; Mash
4920 license:expat ; HTSlib and capnproto
4921 license:public-domain ; MurmurHash 3
4922 license:cpl1.0)))) ; Open Bloom Filter
4923
4924 (define-public metabat
4925 (package
4926 (name "metabat")
4927 (version "2.12.1")
4928 (source
4929 (origin
4930 (method git-fetch)
4931 (uri (git-reference
4932 (url "https://bitbucket.org/berkeleylab/metabat.git")
4933 (commit (string-append "v" version))))
4934 (file-name (git-file-name name version))
4935 (sha256
4936 (base32
4937 "0hyg2smw1nz69mfvjpk45xyyychmda92c80a0cv7baji84ri4iyn"))
4938 (patches (search-patches "metabat-fix-compilation.patch"))))
4939 (build-system scons-build-system)
4940 (arguments
4941 `(#:scons ,scons-python2
4942 #:scons-flags
4943 (list (string-append "PREFIX=" (assoc-ref %outputs "out"))
4944 (string-append "BOOST_ROOT=" (assoc-ref %build-inputs "boost")))
4945 #:tests? #f ;; Tests are run during the build phase.
4946 #:phases
4947 (modify-phases %standard-phases
4948 (add-after 'unpack 'fix-includes
4949 (lambda _
4950 (substitute* "src/BamUtils.h"
4951 (("^#include \"bam/bam\\.h\"")
4952 "#include \"samtools/bam.h\"")
4953 (("^#include \"bam/sam\\.h\"")
4954 "#include \"samtools/sam.h\""))
4955 (substitute* "src/KseqReader.h"
4956 (("^#include \"bam/kseq\\.h\"")
4957 "#include \"htslib/kseq.h\""))
4958 #t))
4959 (add-after 'unpack 'fix-scons
4960 (lambda* (#:key inputs #:allow-other-keys)
4961 (substitute* "SConstruct"
4962 (("^htslib_dir += 'samtools'")
4963 (string-append "htslib_dir = '"
4964 (assoc-ref inputs "htslib")
4965 "'"))
4966 (("^samtools_dir = 'samtools'")
4967 (string-append "samtools_dir = '"
4968 (assoc-ref inputs "samtools")
4969 "'"))
4970 (("^findStaticOrShared\\('bam', hts_lib")
4971 (string-append "findStaticOrShared('bam', '"
4972 (assoc-ref inputs "samtools")
4973 "/lib'"))
4974 ;; Do not distribute README.
4975 (("^env\\.Install\\(idir_prefix, 'README\\.md'\\)") ""))
4976 #t)))))
4977 (inputs
4978 `(("zlib" ,zlib)
4979 ("perl" ,perl)
4980 ("samtools" ,samtools)
4981 ("htslib" ,htslib)
4982 ("boost" ,boost)))
4983 (home-page "https://bitbucket.org/berkeleylab/metabat")
4984 (synopsis
4985 "Reconstruction of single genomes from complex microbial communities")
4986 (description
4987 "Grouping large genomic fragments assembled from shotgun metagenomic
4988 sequences to deconvolute complex microbial communities, or metagenome binning,
4989 enables the study of individual organisms and their interactions. MetaBAT is
4990 an automated metagenome binning software, which integrates empirical
4991 probabilistic distances of genome abundance and tetranucleotide frequency.")
4992 ;; The source code contains inline assembly.
4993 (supported-systems '("x86_64-linux" "i686-linux"))
4994 (license (license:non-copyleft "file://license.txt"
4995 "See license.txt in the distribution."))))
4996
4997 (define-public minced
4998 (package
4999 (name "minced")
5000 (version "0.3.2")
5001 (source (origin
5002 (method git-fetch)
5003 (uri (git-reference
5004 (url "https://github.com/ctSkennerton/minced")
5005 (commit version)))
5006 (file-name (git-file-name name version))
5007 (sha256
5008 (base32
5009 "1f5h9him0gd355cnx7p6pnxpknhckd4g0v62mg8zyhfbx9as25fv"))))
5010 (build-system gnu-build-system)
5011 (arguments
5012 `(#:test-target "test"
5013 #:phases
5014 (modify-phases %standard-phases
5015 (delete 'configure)
5016 (add-before 'check 'fix-test
5017 (lambda _
5018 ;; Fix test for latest version.
5019 (substitute* "t/Aquifex_aeolicus_VF5.expected"
5020 (("minced:0.1.6") "minced:0.2.0"))
5021 #t))
5022 (replace 'install ; No install target.
5023 (lambda* (#:key inputs outputs #:allow-other-keys)
5024 (let* ((out (assoc-ref outputs "out"))
5025 (bin (string-append out "/bin"))
5026 (wrapper (string-append bin "/minced")))
5027 ;; Minced comes with a wrapper script that tries to figure out where
5028 ;; it is located before running the JAR. Since these paths are known
5029 ;; to us, we build our own wrapper to avoid coreutils dependency.
5030 (install-file "minced.jar" bin)
5031 (with-output-to-file wrapper
5032 (lambda _
5033 (display
5034 (string-append
5035 "#!" (assoc-ref inputs "bash") "/bin/sh\n\n"
5036 (assoc-ref inputs "jre") "/bin/java -jar "
5037 bin "/minced.jar \"$@\"\n"))))
5038 (chmod wrapper #o555))
5039 #t)))))
5040 (native-inputs
5041 `(("jdk" ,icedtea "jdk")))
5042 (inputs
5043 `(("bash" ,bash)
5044 ("jre" ,icedtea "out")))
5045 (home-page "https://github.com/ctSkennerton/minced")
5046 (synopsis "Mining CRISPRs in Environmental Datasets")
5047 (description
5048 "MinCED is a program to find Clustered Regularly Interspaced Short
5049 Palindromic Repeats (CRISPRs) in DNA sequences. It can be used for
5050 unassembled metagenomic reads, but is mainly designed for full genomes and
5051 assembled metagenomic sequence.")
5052 (license license:gpl3+)))
5053
5054 (define-public miso
5055 (package
5056 (name "miso")
5057 (version "0.5.4")
5058 (source (origin
5059 (method url-fetch)
5060 (uri (pypi-uri "misopy" version))
5061 (sha256
5062 (base32
5063 "1z3x0vd8ma7pdrnywj7i3kgwl89sdkwrrn62zl7r5calqaq2hyip"))
5064 (modules '((guix build utils)))
5065 (snippet '(begin
5066 (substitute* "setup.py"
5067 ;; Use setuptools, or else the executables are not
5068 ;; installed.
5069 (("distutils.core") "setuptools")
5070 ;; Use "gcc" instead of "cc" for compilation.
5071 (("^defines")
5072 "cc.set_executables(
5073 compiler='gcc',
5074 compiler_so='gcc',
5075 linker_exe='gcc',
5076 linker_so='gcc -shared'); defines"))
5077 #t))))
5078 (build-system python-build-system)
5079 (arguments
5080 `(#:python ,python-2 ; only Python 2 is supported
5081 #:tests? #f)) ; no "test" target
5082 (inputs
5083 `(("samtools" ,samtools)
5084 ("python-numpy" ,python2-numpy)
5085 ("python-pysam" ,python2-pysam)
5086 ("python-scipy" ,python2-scipy)
5087 ("python-matplotlib" ,python2-matplotlib)))
5088 (native-inputs
5089 `(("python-mock" ,python2-mock) ; for tests
5090 ("python-pytz" ,python2-pytz))) ; for tests
5091 (home-page "https://www.genes.mit.edu/burgelab/miso/index.html")
5092 (synopsis "Mixture of Isoforms model for RNA-Seq isoform quantitation")
5093 (description
5094 "MISO (Mixture-of-Isoforms) is a probabilistic framework that quantitates
5095 the expression level of alternatively spliced genes from RNA-Seq data, and
5096 identifies differentially regulated isoforms or exons across samples. By
5097 modeling the generative process by which reads are produced from isoforms in
5098 RNA-Seq, the MISO model uses Bayesian inference to compute the probability
5099 that a read originated from a particular isoform.")
5100 (license license:gpl2)))
5101
5102 (define-public muscle
5103 (package
5104 (name "muscle")
5105 (version "3.8.1551")
5106 (source (origin
5107 (method url-fetch/tarbomb)
5108 (uri (string-append
5109 "http://www.drive5.com/muscle/muscle_src_"
5110 version ".tar.gz"))
5111 (sha256
5112 (base32
5113 "0bj8kj7sdizy3987zx6w7axihk40fk8rn76mpbqqjcnd64i5a367"))))
5114 (build-system gnu-build-system)
5115 (arguments
5116 `(#:make-flags (list "LDLIBS = -lm")
5117 #:phases
5118 (modify-phases %standard-phases
5119 (delete 'configure)
5120 (replace 'check
5121 ;; There are no tests, so just test if it runs.
5122 (lambda _ (invoke "./muscle" "-version") #t))
5123 (replace 'install
5124 (lambda* (#:key outputs #:allow-other-keys)
5125 (let* ((out (assoc-ref outputs "out"))
5126 (bin (string-append out "/bin")))
5127 (install-file "muscle" bin)
5128 #t))))))
5129 (home-page "http://www.drive5.com/muscle")
5130 (synopsis "Multiple sequence alignment program")
5131 (description
5132 "MUSCLE aims to be a fast and accurate multiple sequence alignment
5133 program for nucleotide and protein sequences.")
5134 ;; License information found in 'muscle -h' and usage.cpp.
5135 (license license:public-domain)))
5136
5137 (define-public newick-utils
5138 ;; There are no recent releases so we package from git.
5139 (let ((commit "da121155a977197cab9fbb15953ca1b40b11eb87"))
5140 (package
5141 (name "newick-utils")
5142 (version (string-append "1.6-1." (string-take commit 8)))
5143 (source (origin
5144 (method git-fetch)
5145 (uri (git-reference
5146 (url "https://github.com/tjunier/newick_utils")
5147 (commit commit)))
5148 (file-name (string-append name "-" version "-checkout"))
5149 (sha256
5150 (base32
5151 "1hkw21rq1mwf7xp0rmbb2gqc0i6p11108m69i7mr7xcjl268pxnb"))))
5152 (build-system gnu-build-system)
5153 (inputs
5154 ;; XXX: TODO: Enable Lua and Guile bindings.
5155 ;; https://github.com/tjunier/newick_utils/issues/13
5156 `(("libxml2" ,libxml2)
5157 ("flex" ,flex)
5158 ("bison" ,bison)))
5159 (native-inputs
5160 `(("autoconf" ,autoconf)
5161 ("automake" ,automake)
5162 ("libtool" ,libtool)))
5163 (synopsis "Programs for working with newick format phylogenetic trees")
5164 (description
5165 "Newick-utils is a suite of utilities for processing phylogenetic trees
5166 in Newick format. Functions include re-rooting, extracting subtrees,
5167 trimming, pruning, condensing, drawing (ASCII graphics or SVG).")
5168 (home-page "https://github.com/tjunier/newick_utils")
5169 (license license:bsd-3))))
5170
5171 (define-public orfm
5172 (package
5173 (name "orfm")
5174 (version "0.7.1")
5175 (source (origin
5176 (method url-fetch)
5177 (uri (string-append
5178 "https://github.com/wwood/OrfM/releases/download/v"
5179 version "/orfm-" version ".tar.gz"))
5180 (sha256
5181 (base32
5182 "16iigyr2gd8x0imzkk1dr3k5xsds9bpmwg31ayvjg0f4pir9rwqr"))))
5183 (build-system gnu-build-system)
5184 (inputs `(("zlib" ,zlib)))
5185 (native-inputs
5186 `(("ruby-bio-commandeer" ,ruby-bio-commandeer)
5187 ("ruby-rspec" ,ruby-rspec)
5188 ("ruby" ,ruby)))
5189 (synopsis "Simple and not slow open reading frame (ORF) caller")
5190 (description
5191 "An ORF caller finds stretches of DNA that, when translated, are not
5192 interrupted by stop codons. OrfM finds and prints these ORFs.")
5193 (home-page "https://github.com/wwood/OrfM")
5194 (license license:lgpl3+)))
5195
5196 (define-public python2-pbcore
5197 (package
5198 (name "python2-pbcore")
5199 (version "1.2.10")
5200 (source (origin
5201 (method url-fetch)
5202 (uri (pypi-uri "pbcore" version))
5203 (sha256
5204 (base32
5205 "1kjmv891d6qbpp4shhhvkl02ff4q5xlpnls2513sm2cjcrs52f1i"))))
5206 (build-system python-build-system)
5207 (arguments
5208 `(#:python ,python-2 ;pbcore < 2.0 requires Python 2.7
5209 #:phases (modify-phases %standard-phases
5210 (add-after 'unpack 'remove-sphinx-dependency
5211 (lambda _
5212 ;; Sphinx is only required for documentation tests, which
5213 ;; we do not run; furthermore it depends on python2-sphinx
5214 ;; which is no longer maintained.
5215 (substitute* "requirements-dev.txt"
5216 (("^sphinx") ""))
5217 #t)))))
5218 (propagated-inputs
5219 `(("python-cython" ,python2-cython)
5220 ("python-numpy" ,python2-numpy)
5221 ("python-pysam" ,python2-pysam)
5222 ("python-h5py" ,python2-h5py)))
5223 (native-inputs
5224 `(("python-nose" ,python2-nose)
5225 ("python-pyxb" ,python2-pyxb)))
5226 (home-page "https://pacificbiosciences.github.io/pbcore/")
5227 (synopsis "Library for reading and writing PacBio data files")
5228 (description
5229 "The pbcore package provides Python APIs for interacting with PacBio data
5230 files and writing bioinformatics applications.")
5231 (license license:bsd-3)))
5232
5233 (define-public python2-warpedlmm
5234 (package
5235 (name "python2-warpedlmm")
5236 (version "0.21")
5237 (source
5238 (origin
5239 (method url-fetch)
5240 (uri (pypi-uri "WarpedLMM" version ".zip"))
5241 (sha256
5242 (base32
5243 "1agfz6zqa8nc6cw47yh0s3y14gkpa9wqazwcj7mwwj3ffnw39p3j"))))
5244 (build-system python-build-system)
5245 (arguments
5246 `(#:python ,python-2 ; requires Python 2.7
5247 #:tests? #f ; test data are not included
5248 #:phases
5249 (modify-phases %standard-phases
5250 (add-after 'unpack 'use-weave
5251 (lambda _
5252 (substitute* "warpedlmm/util/linalg.py"
5253 (("from scipy import linalg, weave")
5254 "from scipy import linalg\nimport weave"))
5255 #t)))))
5256 (propagated-inputs
5257 `(("python-scipy" ,python2-scipy)
5258 ("python-numpy" ,python2-numpy)
5259 ("python-matplotlib" ,python2-matplotlib)
5260 ("python-fastlmm" ,python2-fastlmm)
5261 ("python-pandas" ,python2-pandas)
5262 ("python-pysnptools" ,python2-pysnptools)
5263 ("python-weave" ,python2-weave)))
5264 (native-inputs
5265 `(("python-mock" ,python2-mock)
5266 ("python-nose" ,python2-nose)
5267 ("unzip" ,unzip)))
5268 (home-page "https://github.com/PMBio/warpedLMM")
5269 (synopsis "Implementation of warped linear mixed models")
5270 (description
5271 "WarpedLMM is a Python implementation of the warped linear mixed model,
5272 which automatically learns an optimal warping function (or transformation) for
5273 the phenotype as it models the data.")
5274 (license license:asl2.0)))
5275
5276 (define-public pbtranscript-tofu
5277 (let ((commit "8f5467fe6a4472bcfb4226c8720993c8507adfe4"))
5278 (package
5279 (name "pbtranscript-tofu")
5280 (version (string-append "2.2.3." (string-take commit 7)))
5281 (source (origin
5282 (method git-fetch)
5283 (uri (git-reference
5284 (url "https://github.com/PacificBiosciences/cDNA_primer")
5285 (commit commit)))
5286 (file-name (string-append name "-" version "-checkout"))
5287 (sha256
5288 (base32
5289 "1lgnpi35ihay42qx0b6yl3kkgra723i413j33kvs0kvs61h82w0f"))
5290 (modules '((guix build utils)))
5291 (snippet
5292 '(begin
5293 ;; remove bundled Cython sources
5294 (delete-file "pbtranscript-tofu/pbtranscript/Cython-0.20.1.tar.gz")
5295 #t))))
5296 (build-system python-build-system)
5297 (arguments
5298 `(#:python ,python-2
5299 ;; FIXME: Tests fail with "No such file or directory:
5300 ;; pbtools/pbtranscript/modified_bx_intervals/intersection_unique.so"
5301 #:tests? #f
5302 #:phases
5303 (modify-phases %standard-phases
5304 (add-after 'unpack 'enter-directory
5305 (lambda _
5306 (chdir "pbtranscript-tofu/pbtranscript/")
5307 #t))
5308 ;; With setuptools version 18.0 and later this setup.py hack causes
5309 ;; a build error, so we disable it.
5310 (add-after 'enter-directory 'patch-setuppy
5311 (lambda _
5312 (substitute* "setup.py"
5313 (("if 'setuptools.extension' in sys.modules:")
5314 "if False:"))
5315 #t)))))
5316 (inputs
5317 `(("python-numpy" ,python2-numpy)
5318 ("python-bx-python" ,python2-bx-python)
5319 ("python-networkx" ,python2-networkx)
5320 ("python-scipy" ,python2-scipy)
5321 ("python-pbcore" ,python2-pbcore)
5322 ("python-h5py" ,python2-h5py)))
5323 (native-inputs
5324 `(("python-cython" ,python2-cython)
5325 ("python-nose" ,python2-nose)))
5326 (home-page "https://github.com/PacificBiosciences/cDNA_primer")
5327 (synopsis "Analyze transcriptome data generated with the Iso-Seq protocol")
5328 (description
5329 "pbtranscript-tofu contains scripts to analyze transcriptome data
5330 generated using the PacBio Iso-Seq protocol.")
5331 (license license:bsd-3))))
5332
5333 (define-public prank
5334 (package
5335 (name "prank")
5336 (version "170427")
5337 (source (origin
5338 (method url-fetch)
5339 (uri (string-append
5340 "http://wasabiapp.org/download/prank/prank.source."
5341 version ".tgz"))
5342 (sha256
5343 (base32
5344 "0nc8g9c5rkdxcir46s0in9ci1sxwzbjibxrvkksf22ybnplvagk2"))))
5345 (build-system gnu-build-system)
5346 (arguments
5347 `(#:phases
5348 (modify-phases %standard-phases
5349 (add-after 'unpack 'enter-src-dir
5350 (lambda _
5351 (chdir "src")
5352 #t))
5353 (add-after 'unpack 'remove-m64-flag
5354 ;; Prank will build with the correct 'bit-ness' without this flag
5355 ;; and this allows building on 32-bit machines.
5356 (lambda _ (substitute* "src/Makefile"
5357 (("-m64") ""))
5358 #t))
5359 (delete 'configure)
5360 (replace 'install
5361 (lambda* (#:key outputs #:allow-other-keys)
5362 (let* ((out (assoc-ref outputs "out"))
5363 (bin (string-append out "/bin"))
5364 (man (string-append out "/share/man/man1"))
5365 (path (string-append
5366 (assoc-ref %build-inputs "mafft") "/bin:"
5367 (assoc-ref %build-inputs "exonerate") "/bin:"
5368 (assoc-ref %build-inputs "bppsuite") "/bin")))
5369 (install-file "prank" bin)
5370 (wrap-program (string-append bin "/prank")
5371 `("PATH" ":" prefix (,path)))
5372 (install-file "prank.1" man))
5373 #t)))))
5374 (inputs
5375 `(("mafft" ,mafft)
5376 ("exonerate" ,exonerate)
5377 ("bppsuite" ,bppsuite)))
5378 (home-page "http://wasabiapp.org/software/prank/")
5379 (synopsis "Probabilistic multiple sequence alignment program")
5380 (description
5381 "PRANK is a probabilistic multiple sequence alignment program for DNA,
5382 codon and amino-acid sequences. It is based on a novel algorithm that treats
5383 insertions correctly and avoids over-estimation of the number of deletion
5384 events. In addition, PRANK borrows ideas from maximum likelihood methods used
5385 in phylogenetics and correctly takes into account the evolutionary distances
5386 between sequences. Lastly, PRANK allows for defining a potential structure
5387 for sequences to be aligned and then, simultaneously with the alignment,
5388 predicts the locations of structural units in the sequences.")
5389 (license license:gpl2+)))
5390
5391 (define-public proteinortho
5392 (package
5393 (name "proteinortho")
5394 (version "6.0.14")
5395 (source (origin
5396 (method git-fetch)
5397 (uri (git-reference
5398 (url "https://gitlab.com/paulklemm_PHD/proteinortho.git")
5399 (commit (string-append "v" version))))
5400 (file-name (git-file-name name version))
5401 (sha256
5402 (base32
5403 "0pmy617zy2z2w6hjqxjhf3rzikf5n3mpia80ysq8233vfr7wrzff"))
5404 (modules '((guix build utils)))
5405 (snippet
5406 '(begin
5407 ;; remove pre-built scripts
5408 (delete-file-recursively "src/BUILD/")
5409 #t))))
5410 (build-system gnu-build-system)
5411 (arguments
5412 `(#:test-target "test"
5413 #:make-flags '("CC=gcc")
5414 #:phases
5415 (modify-phases %standard-phases
5416 (replace 'configure
5417 ;; There is no configure script, so we modify the Makefile directly.
5418 (lambda* (#:key outputs #:allow-other-keys)
5419 (substitute* "Makefile"
5420 (("INSTALLDIR=.*")
5421 (string-append
5422 "INSTALLDIR=" (assoc-ref outputs "out") "/bin\n")))
5423 #t))
5424 (add-before 'install 'make-install-directory
5425 ;; The install directory is not created during 'make install'.
5426 (lambda* (#:key outputs #:allow-other-keys)
5427 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
5428 #t))
5429 (add-after 'install 'wrap-programs
5430 (lambda* (#:key inputs outputs #:allow-other-keys)
5431 (let ((path (getenv "PATH"))
5432 (out (assoc-ref outputs "out")))
5433 (for-each (lambda (script)
5434 (wrap-script script `("PATH" ":" prefix (,path))))
5435 (cons (string-append out "/bin/proteinortho")
5436 (find-files out "\\.(pl|py)$"))))
5437 #t)))))
5438 (inputs
5439 `(("guile" ,guile-3.0) ; for wrap-script
5440 ("diamond" ,diamond)
5441 ("perl" ,perl)
5442 ("python" ,python-wrapper)
5443 ("blast+" ,blast+)
5444 ("lapack" ,lapack)
5445 ("openblas" ,openblas)))
5446 (native-inputs
5447 `(("which" ,which)))
5448 (home-page "http://www.bioinf.uni-leipzig.de/Software/proteinortho")
5449 (synopsis "Detect orthologous genes across species")
5450 (description
5451 "Proteinortho is a tool to detect orthologous genes across different
5452 species. For doing so, it compares similarities of given gene sequences and
5453 clusters them to find significant groups. The algorithm was designed to handle
5454 large-scale data and can be applied to hundreds of species at once.")
5455 (license license:gpl3+)))
5456
5457 (define-public pyicoteo
5458 (package
5459 (name "pyicoteo")
5460 (version "2.0.7")
5461 (source
5462 (origin
5463 (method git-fetch)
5464 (uri (git-reference
5465 (url "https://bitbucket.org/regulatorygenomicsupf/pyicoteo.git")
5466 (commit (string-append "v" version))))
5467 (file-name (git-file-name name version))
5468 (sha256
5469 (base32
5470 "0hz5g8d25lbjy1wpscr490l0lmyvaix893hhax4fxnh1h9w34w8p"))))
5471 (build-system python-build-system)
5472 (arguments
5473 `(#:python ,python-2 ; does not work with Python 3
5474 #:tests? #f)) ; there are no tests
5475 (inputs
5476 `(("python2-matplotlib" ,python2-matplotlib)))
5477 (home-page "https://bitbucket.org/regulatorygenomicsupf/pyicoteo")
5478 (synopsis "Analyze high-throughput genetic sequencing data")
5479 (description
5480 "Pyicoteo is a suite of tools for the analysis of high-throughput genetic
5481 sequencing data. It works with genomic coordinates. There are currently six
5482 different command-line tools:
5483
5484 @enumerate
5485 @item pyicoregion: for generating exploratory regions automatically;
5486 @item pyicoenrich: for differential enrichment between two conditions;
5487 @item pyicoclip: for calling CLIP-Seq peaks without a control;
5488 @item pyicos: for genomic coordinates manipulation;
5489 @item pyicoller: for peak calling on punctuated ChIP-Seq;
5490 @item pyicount: to count how many reads from N experiment files overlap in a
5491 region file;
5492 @item pyicotrocol: to combine operations from pyicoteo.
5493 @end enumerate\n")
5494 (license license:gpl3+)))
5495
5496 (define-public prodigal
5497 (package
5498 (name "prodigal")
5499 ;; Check for a new home page when updating this package:
5500 ;; https://github.com/hyattpd/Prodigal/issues/36#issuecomment-536617588
5501 (version "2.6.3")
5502 (source (origin
5503 (method git-fetch)
5504 (uri (git-reference
5505 (url "https://github.com/hyattpd/Prodigal")
5506 (commit (string-append "v" version))))
5507 (file-name (git-file-name name version))
5508 (sha256
5509 (base32
5510 "1fs1hqk83qjbjhrvhw6ni75zakx5ki1ayy3v6wwkn3xvahc9hi5s"))))
5511 (build-system gnu-build-system)
5512 (arguments
5513 `(#:tests? #f ; no check target
5514 #:make-flags (list (string-append "INSTALLDIR="
5515 (assoc-ref %outputs "out")
5516 "/bin"))
5517 #:phases
5518 (modify-phases %standard-phases
5519 (delete 'configure))))
5520 (home-page "https://github.com/hyattpd/Prodigal")
5521 (synopsis "Protein-coding gene prediction for Archaea and Bacteria")
5522 (description
5523 "Prodigal runs smoothly on finished genomes, draft genomes, and
5524 metagenomes, providing gene predictions in GFF3, Genbank, or Sequin table
5525 format. It runs quickly, in an unsupervised fashion, handles gaps, handles
5526 partial genes, and identifies translation initiation sites.")
5527 (license license:gpl3+)))
5528
5529 (define-public roary
5530 (package
5531 (name "roary")
5532 (version "3.12.0")
5533 (source
5534 (origin
5535 (method url-fetch)
5536 (uri (string-append
5537 "mirror://cpan/authors/id/A/AJ/AJPAGE/Bio-Roary-"
5538 version ".tar.gz"))
5539 (sha256
5540 (base32
5541 "0qxrds9wx7cfhlkihrp6697kx0flhhxymap9fwan0b3rbdhcnmff"))))
5542 (build-system perl-build-system)
5543 (arguments
5544 `(#:phases
5545 (modify-phases %standard-phases
5546 (delete 'configure)
5547 (delete 'build)
5548 (replace 'check
5549 (lambda _
5550 ;; The tests are not run by default, so we run each test file
5551 ;; directly.
5552 (setenv "PATH" (string-append (getcwd) "/bin" ":"
5553 (getenv "PATH")))
5554 (setenv "PERL5LIB" (string-append (getcwd) "/lib" ":"
5555 (getenv "PERL5LIB")))
5556 (for-each (lambda (file)
5557 (display file)(display "\n")
5558 (invoke "perl" file))
5559 (find-files "t" ".*\\.t$"))
5560 #t))
5561 (replace 'install
5562 ;; There is no 'install' target in the Makefile.
5563 (lambda* (#:key outputs #:allow-other-keys)
5564 (let* ((out (assoc-ref outputs "out"))
5565 (bin (string-append out "/bin"))
5566 (perl (string-append out "/lib/perl5/site_perl"))
5567 (roary-plots "contrib/roary_plots"))
5568 (mkdir-p bin)
5569 (mkdir-p perl)
5570 (copy-recursively "bin" bin)
5571 (copy-recursively "lib" perl)
5572 #t)))
5573 (add-after 'install 'wrap-programs
5574 (lambda* (#:key inputs outputs #:allow-other-keys)
5575 (let* ((out (assoc-ref outputs "out"))
5576 (perl5lib (getenv "PERL5LIB"))
5577 (path (getenv "PATH")))
5578 (for-each (lambda (prog)
5579 (let ((binary (string-append out "/" prog)))
5580 (wrap-program binary
5581 `("PERL5LIB" ":" prefix
5582 (,(string-append perl5lib ":" out
5583 "/lib/perl5/site_perl"))))
5584 (wrap-program binary
5585 `("PATH" ":" prefix
5586 (,(string-append path ":" out "/bin"))))))
5587 (find-files "bin" ".*[^R]$"))
5588 (let ((file
5589 (string-append out "/bin/roary-create_pan_genome_plots.R"))
5590 (r-site-lib (getenv "R_LIBS_SITE"))
5591 (coreutils-path
5592 (string-append (assoc-ref inputs "coreutils") "/bin")))
5593 (wrap-program file
5594 `("R_LIBS_SITE" ":" prefix
5595 (,(string-append r-site-lib ":" out "/site-library/"))))
5596 (wrap-program file
5597 `("PATH" ":" prefix
5598 (,(string-append coreutils-path ":" out "/bin"))))))
5599 #t)))))
5600 (native-inputs
5601 `(("perl-env-path" ,perl-env-path)
5602 ("perl-test-files" ,perl-test-files)
5603 ("perl-test-most" ,perl-test-most)
5604 ("perl-test-output" ,perl-test-output)))
5605 (inputs
5606 `(("perl-array-utils" ,perl-array-utils)
5607 ("bioperl" ,bioperl-minimal)
5608 ("perl-digest-md5-file" ,perl-digest-md5-file)
5609 ("perl-exception-class" ,perl-exception-class)
5610 ("perl-file-find-rule" ,perl-file-find-rule)
5611 ("perl-file-grep" ,perl-file-grep)
5612 ("perl-file-slurper" ,perl-file-slurper)
5613 ("perl-file-which" ,perl-file-which)
5614 ("perl-graph" ,perl-graph)
5615 ("perl-graph-readwrite" ,perl-graph-readwrite)
5616 ("perl-log-log4perl" ,perl-log-log4perl)
5617 ("perl-moose" ,perl-moose)
5618 ("perl-perlio-utf8_strict" ,perl-perlio-utf8_strict)
5619 ("perl-text-csv" ,perl-text-csv)
5620 ("bedtools" ,bedtools)
5621 ("cd-hit" ,cd-hit)
5622 ("blast+" ,blast+)
5623 ("mcl" ,mcl)
5624 ("parallel" ,parallel)
5625 ("prank" ,prank)
5626 ("mafft" ,mafft)
5627 ("fasttree" ,fasttree)
5628 ("grep" ,grep)
5629 ("sed" ,sed)
5630 ("gawk" ,gawk)
5631 ("r-minimal" ,r-minimal)
5632 ("r-ggplot2" ,r-ggplot2)
5633 ("coreutils" ,coreutils)))
5634 (home-page "https://sanger-pathogens.github.io/Roary/")
5635 (synopsis "High speed stand-alone pan genome pipeline")
5636 (description
5637 "Roary is a high speed stand alone pan genome pipeline, which takes
5638 annotated assemblies in GFF3 format (produced by the Prokka program) and
5639 calculates the pan genome. Using a standard desktop PC, it can analyse
5640 datasets with thousands of samples, without compromising the quality of the
5641 results. 128 samples can be analysed in under 1 hour using 1 GB of RAM and a
5642 single processor. Roary is not intended for metagenomics or for comparing
5643 extremely diverse sets of genomes.")
5644 (license license:gpl3)))
5645
5646 (define-public raxml
5647 (package
5648 (name "raxml")
5649 (version "8.2.12")
5650 (source
5651 (origin
5652 (method git-fetch)
5653 (uri (git-reference
5654 (url "https://github.com/stamatak/standard-RAxML")
5655 (commit (string-append "v" version))))
5656 (file-name (git-file-name name version))
5657 (sha256
5658 (base32
5659 "1jqjzhch0rips0vp04prvb8vmc20c5pdmsqn8knadcf91yy859fh"))))
5660 (build-system gnu-build-system)
5661 (arguments
5662 `(#:tests? #f ; There are no tests.
5663 ;; Use 'standard' Makefile rather than SSE or AVX ones.
5664 #:make-flags (list "-f" "Makefile.HYBRID.gcc")
5665 #:phases
5666 (modify-phases %standard-phases
5667 (delete 'configure)
5668 (replace 'install
5669 (lambda* (#:key outputs #:allow-other-keys)
5670 (let* ((out (assoc-ref outputs "out"))
5671 (bin (string-append out "/bin"))
5672 (executable "raxmlHPC-HYBRID"))
5673 (install-file executable bin)
5674 (symlink (string-append bin "/" executable) "raxml"))
5675 #t)))))
5676 (inputs
5677 `(("openmpi" ,openmpi)))
5678 (home-page "https://cme.h-its.org/exelixis/web/software/raxml/index.html")
5679 (synopsis "Randomized Axelerated Maximum Likelihood phylogenetic trees")
5680 (description
5681 "RAxML is a tool for phylogenetic analysis and post-analysis of large
5682 phylogenies.")
5683 ;; The source includes x86 specific code
5684 (supported-systems '("x86_64-linux" "i686-linux"))
5685 (license license:gpl2+)))
5686
5687 (define-public rsem
5688 (package
5689 (name "rsem")
5690 (version "1.3.1")
5691 (source
5692 (origin
5693 (method git-fetch)
5694 (uri (git-reference
5695 (url "https://github.com/deweylab/RSEM")
5696 (commit (string-append "v" version))))
5697 (sha256
5698 (base32 "1jlq11d1p8qp64w75yj8cnbbd1a93viq10pzsbwal7vdn8fg13j1"))
5699 (file-name (git-file-name name version))
5700 (modules '((guix build utils)))
5701 (snippet
5702 '(begin
5703 ;; remove bundled copy of boost and samtools
5704 (delete-file-recursively "boost")
5705 (delete-file-recursively "samtools-1.3")
5706 #t))))
5707 (build-system gnu-build-system)
5708 (arguments
5709 `(#:tests? #f ;no "check" target
5710 #:make-flags
5711 (list (string-append "BOOST="
5712 (assoc-ref %build-inputs "boost")
5713 "/include/")
5714 (string-append "SAMHEADERS="
5715 (assoc-ref %build-inputs "htslib")
5716 "/include/htslib/sam.h")
5717 (string-append "SAMLIBS="
5718 (assoc-ref %build-inputs "htslib")
5719 "/lib/libhts.so"))
5720 #:phases
5721 (modify-phases %standard-phases
5722 ;; No "configure" script.
5723 ;; Do not build bundled samtools library.
5724 (replace 'configure
5725 (lambda _
5726 (substitute* "Makefile"
5727 (("^all : \\$\\(PROGRAMS\\).*") "all: $(PROGRAMS)\n")
5728 (("^\\$\\(SAMLIBS\\).*") ""))
5729 #t))
5730 (replace 'install
5731 (lambda* (#:key outputs #:allow-other-keys)
5732 (let* ((out (string-append (assoc-ref outputs "out")))
5733 (bin (string-append out "/bin/"))
5734 (perl (string-append out "/lib/perl5/site_perl")))
5735 (mkdir-p bin)
5736 (mkdir-p perl)
5737 (for-each (lambda (file)
5738 (install-file file bin))
5739 (find-files "." "rsem-.*"))
5740 (install-file "rsem_perl_utils.pm" perl))
5741 #t))
5742 (add-after 'install 'wrap-program
5743 (lambda* (#:key outputs #:allow-other-keys)
5744 (let ((out (assoc-ref outputs "out")))
5745 (for-each (lambda (prog)
5746 (wrap-program (string-append out "/bin/" prog)
5747 `("PERL5LIB" ":" prefix
5748 (,(string-append out "/lib/perl5/site_perl")))))
5749 '("rsem-calculate-expression"
5750 "rsem-control-fdr"
5751 "rsem-generate-data-matrix"
5752 "rsem-generate-ngvector"
5753 "rsem-plot-transcript-wiggles"
5754 "rsem-prepare-reference"
5755 "rsem-run-ebseq"
5756 "rsem-run-prsem-testing-procedure")))
5757 #t)))))
5758 (inputs
5759 `(("boost" ,boost)
5760 ("r-minimal" ,r-minimal)
5761 ("perl" ,perl)
5762 ("htslib" ,htslib-1.3)
5763 ("zlib" ,zlib)))
5764 (home-page "http://deweylab.biostat.wisc.edu/rsem/")
5765 (synopsis "Estimate gene expression levels from RNA-Seq data")
5766 (description
5767 "RSEM is a software package for estimating gene and isoform expression
5768 levels from RNA-Seq data. The RSEM package provides a user-friendly
5769 interface, supports threads for parallel computation of the EM algorithm,
5770 single-end and paired-end read data, quality scores, variable-length reads and
5771 RSPD estimation. In addition, it provides posterior mean and 95% credibility
5772 interval estimates for expression levels. For visualization, it can generate
5773 BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.")
5774 (license license:gpl3+)))
5775
5776 (define-public rseqc
5777 (package
5778 (name "rseqc")
5779 (version "3.0.1")
5780 (source
5781 (origin
5782 (method url-fetch)
5783 (uri
5784 (string-append "mirror://sourceforge/rseqc/"
5785 "RSeQC-" version ".tar.gz"))
5786 (sha256
5787 (base32
5788 "0gbb9iyb7swiv5455fm5rg98r7l6qn27v564yllqjd574hncpx6m"))))
5789 (build-system python-build-system)
5790 (inputs
5791 `(("python-cython" ,python-cython)
5792 ("python-bx-python" ,python-bx-python)
5793 ("python-pybigwig" ,python-pybigwig)
5794 ("python-pysam" ,python-pysam)
5795 ("python-numpy" ,python-numpy)
5796 ("zlib" ,zlib)))
5797 (native-inputs
5798 `(("python-nose" ,python-nose)))
5799 (home-page "http://rseqc.sourceforge.net/")
5800 (synopsis "RNA-seq quality control package")
5801 (description
5802 "RSeQC provides a number of modules that can comprehensively evaluate
5803 high throughput sequence data, especially RNA-seq data. Some basic modules
5804 inspect sequence quality, nucleotide composition bias, PCR bias and GC bias,
5805 while RNA-seq specific modules evaluate sequencing saturation, mapped reads
5806 distribution, coverage uniformity, strand specificity, etc.")
5807 (license license:gpl3+)))
5808
5809 (define-public seek
5810 ;; There are no release tarballs. According to the installation
5811 ;; instructions at http://seek.princeton.edu/installation.jsp, the latest
5812 ;; stable release is identified by this changeset ID.
5813 (let ((changeset "2329130")
5814 (revision "1"))
5815 (package
5816 (name "seek")
5817 (version (string-append "0-" revision "." changeset))
5818 (source (origin
5819 (method hg-fetch)
5820 (uri (hg-reference
5821 (url "https://bitbucket.org/libsleipnir/sleipnir")
5822 (changeset changeset)))
5823 (file-name (string-append name "-" version "-checkout"))
5824 (sha256
5825 (base32
5826 "0qrvilwh18dpbhkf92qvxbmay0j75ra3jg2wrhz67gf538zzphsx"))))
5827 (build-system gnu-build-system)
5828 (arguments
5829 `(#:modules ((srfi srfi-1)
5830 (guix build gnu-build-system)
5831 (guix build utils))
5832 #:phases
5833 (let ((dirs '("SeekMiner"
5834 "SeekEvaluator"
5835 "SeekPrep"
5836 "Distancer"
5837 "Data2DB"
5838 "PCL2Bin")))
5839 (modify-phases %standard-phases
5840 (replace 'bootstrap
5841 (lambda _
5842 (substitute* "gen_tools_am"
5843 (("/usr/bin/env.*") (which "perl")))
5844 (invoke "bash" "gen_auto")
5845 #t))
5846 (add-after 'build 'build-additional-tools
5847 (lambda* (#:key make-flags #:allow-other-keys)
5848 (for-each (lambda (dir)
5849 (with-directory-excursion (string-append "tools/" dir)
5850 (apply invoke "make" make-flags)))
5851 dirs)
5852 #t))
5853 (add-after 'install 'install-additional-tools
5854 (lambda* (#:key make-flags #:allow-other-keys)
5855 (for-each (lambda (dir)
5856 (with-directory-excursion (string-append "tools/" dir)
5857 (apply invoke `("make" ,@make-flags "install"))))
5858 dirs)
5859 #t))))))
5860 (inputs
5861 `(("gsl" ,gsl)
5862 ("boost" ,boost)
5863 ("libsvm" ,libsvm)
5864 ("readline" ,readline)
5865 ("gengetopt" ,gengetopt)
5866 ("log4cpp" ,log4cpp)))
5867 (native-inputs
5868 `(("autoconf" ,autoconf)
5869 ("automake" ,automake)
5870 ("perl" ,perl)))
5871 (home-page "http://seek.princeton.edu")
5872 (synopsis "Gene co-expression search engine")
5873 (description
5874 "SEEK is a computational gene co-expression search engine. SEEK provides
5875 biologists with a way to navigate the massive human expression compendium that
5876 now contains thousands of expression datasets. SEEK returns a robust ranking
5877 of co-expressed genes in the biological area of interest defined by the user's
5878 query genes. It also prioritizes thousands of expression datasets according
5879 to the user's query of interest.")
5880 (license license:cc-by3.0))))
5881
5882 (define-public samtools
5883 (package
5884 (name "samtools")
5885 (version "1.11")
5886 (source
5887 (origin
5888 (method url-fetch)
5889 (uri
5890 (string-append "mirror://sourceforge/samtools/samtools/"
5891 version "/samtools-" version ".tar.bz2"))
5892 (sha256
5893 (base32
5894 "1dp5wknak4arnw5ghhif9mmljlfnw5bgm91wib7z0j8wdjywx0z2"))
5895 (modules '((guix build utils)))
5896 (snippet '(begin
5897 ;; Delete bundled htslib.
5898 (delete-file-recursively "htslib-1.11")
5899 #t))))
5900 (build-system gnu-build-system)
5901 (arguments
5902 `(#:modules ((ice-9 ftw)
5903 (ice-9 regex)
5904 (guix build gnu-build-system)
5905 (guix build utils))
5906 #:configure-flags (list "--with-ncurses")
5907 #:phases
5908 (modify-phases %standard-phases
5909 (add-after 'unpack 'patch-tests
5910 (lambda _
5911 (substitute* "test/test.pl"
5912 ;; The test script calls out to /bin/bash
5913 (("/bin/bash") (which "bash")))
5914 #t))
5915 (add-after 'install 'install-library
5916 (lambda* (#:key outputs #:allow-other-keys)
5917 (let ((lib (string-append (assoc-ref outputs "out") "/lib")))
5918 (install-file "libbam.a" lib)
5919 #t)))
5920 (add-after 'install 'install-headers
5921 (lambda* (#:key outputs #:allow-other-keys)
5922 (let ((include (string-append (assoc-ref outputs "out")
5923 "/include/samtools/")))
5924 (for-each (lambda (file)
5925 (install-file file include))
5926 (scandir "." (lambda (name) (string-match "\\.h$" name))))
5927 #t))))))
5928 (native-inputs `(("pkg-config" ,pkg-config)))
5929 (inputs
5930 `(("htslib" ,htslib)
5931 ("ncurses" ,ncurses)
5932 ("perl" ,perl)
5933 ("python" ,python)
5934 ("zlib" ,zlib)))
5935 (home-page "http://samtools.sourceforge.net")
5936 (synopsis "Utilities to efficiently manipulate nucleotide sequence alignments")
5937 (description
5938 "Samtools implements various utilities for post-processing nucleotide
5939 sequence alignments in the SAM, BAM, and CRAM formats, including indexing,
5940 variant calling (in conjunction with bcftools), and a simple alignment
5941 viewer.")
5942 (license license:expat)))
5943
5944 (define-public samtools-1.9
5945 (package (inherit samtools)
5946 (name "samtools")
5947 (version "1.9")
5948 (source
5949 (origin
5950 (method url-fetch)
5951 (uri
5952 (string-append "mirror://sourceforge/samtools/samtools/"
5953 version "/samtools-" version ".tar.bz2"))
5954 (sha256
5955 (base32
5956 "10ilqbmm7ri8z431sn90lvbjwizd0hhkf9rcqw8j823hf26nhgq8"))
5957 (modules '((guix build utils)))
5958 (snippet '(begin
5959 ;; Delete bundled htslib.
5960 (delete-file-recursively "htslib-1.9")
5961 #t))))
5962 (inputs
5963 `(("htslib" ,htslib-1.9)
5964 ("ncurses" ,ncurses)
5965 ("perl" ,perl)
5966 ("python" ,python)
5967 ("zlib" ,zlib)))))
5968
5969 (define-public samtools-0.1
5970 ;; This is the most recent version of the 0.1 line of samtools. The input
5971 ;; and output formats differ greatly from that used and produced by samtools
5972 ;; 1.x and is still used in many bioinformatics pipelines.
5973 (package (inherit samtools)
5974 (version "0.1.19")
5975 (source
5976 (origin
5977 (method url-fetch)
5978 (uri
5979 (string-append "mirror://sourceforge/samtools/samtools/"
5980 version "/samtools-" version ".tar.bz2"))
5981 (sha256
5982 (base32 "1m33xsfwz0s8qi45lylagfllqg7fphf4dr0780rsvw75av9wk06h"))))
5983 (arguments
5984 `(#:tests? #f ;no "check" target
5985 #:make-flags
5986 (list "LIBCURSES=-lncurses")
5987 ,@(substitute-keyword-arguments (package-arguments samtools)
5988 ((#:phases phases)
5989 `(modify-phases ,phases
5990 (replace 'install
5991 (lambda* (#:key outputs #:allow-other-keys)
5992 (let ((bin (string-append
5993 (assoc-ref outputs "out") "/bin")))
5994 (mkdir-p bin)
5995 (install-file "samtools" bin)
5996 #t)))
5997 (delete 'patch-tests)
5998 (delete 'configure))))))))
5999
6000 (define-public mosaik
6001 (let ((commit "5c25216d3522d6a33e53875cd76a6d65001e4e67"))
6002 (package
6003 (name "mosaik")
6004 (version "2.2.30")
6005 (source (origin
6006 ;; There are no release tarballs nor tags.
6007 (method git-fetch)
6008 (uri (git-reference
6009 (url "https://github.com/wanpinglee/MOSAIK")
6010 (commit commit)))
6011 (file-name (string-append name "-" version))
6012 (sha256
6013 (base32
6014 "17gj3s07cm77r41z92awh0bim7w7q7fbn0sf5nkqmcm1vw052qgw"))))
6015 (build-system gnu-build-system)
6016 (arguments
6017 `(#:tests? #f ; no tests
6018 #:make-flags (list "CC=gcc")
6019 #:phases
6020 (modify-phases %standard-phases
6021 (replace 'configure
6022 (lambda _ (chdir "src") #t))
6023 (replace 'install
6024 (lambda* (#:key outputs #:allow-other-keys)
6025 (let ((bin (string-append (assoc-ref outputs "out")
6026 "/bin")))
6027 (mkdir-p bin)
6028 (copy-recursively "../bin" bin)
6029 #t))))))
6030 (inputs
6031 `(("perl" ,perl)
6032 ("zlib:static" ,zlib "static")
6033 ("zlib" ,zlib)))
6034 (supported-systems '("x86_64-linux"))
6035 (home-page "https://github.com/wanpinglee/MOSAIK")
6036 (synopsis "Map nucleotide sequence reads to reference genomes")
6037 (description
6038 "MOSAIK is a program for mapping second and third-generation sequencing
6039 reads to a reference genome. MOSAIK can align reads generated by all the
6040 major sequencing technologies, including Illumina, Applied Biosystems SOLiD,
6041 Roche 454, Ion Torrent and Pacific BioSciences SMRT.")
6042 ;; MOSAIK is released under the GPLv2+ with the exception of third-party
6043 ;; code released into the public domain:
6044 ;; 1. fastlz by Ariya Hidayat - http://www.fastlz.org/
6045 ;; 2. MD5 implementation - RSA Data Security, RFC 1321
6046 (license (list license:gpl2+ license:public-domain)))))
6047
6048 (define-public ngs-sdk
6049 (package
6050 (name "ngs-sdk")
6051 (version "2.10.5")
6052 (source (origin
6053 (method git-fetch)
6054 (uri (git-reference
6055 (url "https://github.com/ncbi/ngs")
6056 (commit version)))
6057 (file-name (git-file-name name version))
6058 (sha256
6059 (base32
6060 "1ix51c25hjn57w93qmwzw80xh2i34wx8j2hn7szh8p6w8i3az5qa"))))
6061 (build-system gnu-build-system)
6062 (arguments
6063 `(#:parallel-build? #f ; not supported
6064 #:tests? #f ; no "check" target
6065 #:phases
6066 (modify-phases %standard-phases
6067 (replace 'configure
6068 (lambda* (#:key outputs #:allow-other-keys)
6069 (let ((out (assoc-ref outputs "out")))
6070 ;; Allow 'konfigure.perl' to find 'package.prl'.
6071 (setenv "PERL5LIB"
6072 (string-append ".:" (getenv "PERL5LIB")))
6073
6074 ;; The 'configure' script doesn't recognize things like
6075 ;; '--enable-fast-install'.
6076 (invoke "./configure"
6077 (string-append "--build-prefix=" (getcwd) "/build")
6078 (string-append "--prefix=" out))
6079 #t)))
6080 (add-after 'unpack 'enter-dir
6081 (lambda _ (chdir "ngs-sdk") #t)))))
6082 (native-inputs `(("perl" ,perl)))
6083 ;; According to the test
6084 ;; unless ($MARCH =~ /x86_64/i || $MARCH =~ /i?86/i)
6085 ;; in ngs-sdk/setup/konfigure.perl
6086 (supported-systems '("i686-linux" "x86_64-linux"))
6087 (home-page "https://github.com/ncbi/ngs")
6088 (synopsis "API for accessing Next Generation Sequencing data")
6089 (description
6090 "NGS is a domain-specific API for accessing reads, alignments and pileups
6091 produced from Next Generation Sequencing. The API itself is independent from
6092 any particular back-end implementation, and supports use of multiple back-ends
6093 simultaneously.")
6094 (license license:public-domain)))
6095
6096 (define-public java-ngs
6097 (package (inherit ngs-sdk)
6098 (name "java-ngs")
6099 (arguments
6100 `(,@(substitute-keyword-arguments
6101 `(#:modules ((guix build gnu-build-system)
6102 (guix build utils)
6103 (srfi srfi-1)
6104 (srfi srfi-26))
6105 ,@(package-arguments ngs-sdk))
6106 ((#:phases phases)
6107 `(modify-phases ,phases
6108 (replace 'enter-dir (lambda _ (chdir "ngs-java") #t)))))))
6109 (inputs
6110 `(("jdk" ,icedtea "jdk")
6111 ("ngs-sdk" ,ngs-sdk)))
6112 (synopsis "Java bindings for NGS SDK")))
6113
6114 (define-public ncbi-vdb
6115 (package
6116 (name "ncbi-vdb")
6117 (version "2.10.6")
6118 (source (origin
6119 (method git-fetch)
6120 (uri (git-reference
6121 (url "https://github.com/ncbi/ncbi-vdb")
6122 (commit version)))
6123 (file-name (git-file-name name version))
6124 (sha256
6125 (base32
6126 "0m8hlxscidsfqm9x9fyi62q6lpf1dv5115kgjjgnrkl49q9c27m6"))))
6127 (build-system gnu-build-system)
6128 (arguments
6129 `(#:parallel-build? #f ; not supported
6130 #:tests? #f ; no "check" target
6131 #:make-flags '("HAVE_HDF5=1")
6132 #:phases
6133 (modify-phases %standard-phases
6134 (add-after 'unpack 'make-files-writable
6135 (lambda _ (for-each make-file-writable (find-files "." ".*")) #t))
6136 (add-before 'configure 'set-perl-search-path
6137 (lambda _
6138 ;; Work around "dotless @INC" build failure.
6139 (setenv "PERL5LIB"
6140 (string-append (getcwd) "/setup:"
6141 (getenv "PERL5LIB")))
6142 #t))
6143 ;; See https://github.com/ncbi/ncbi-vdb/issues/14
6144 (add-after 'unpack 'patch-krypto-flags
6145 (lambda _
6146 (substitute* "libs/krypto/Makefile"
6147 (("-Wa,-march=generic64\\+aes") "")
6148 (("-Wa,-march=generic64\\+sse4") ""))
6149 #t))
6150 (replace 'configure
6151 (lambda* (#:key inputs outputs #:allow-other-keys)
6152 (let ((out (assoc-ref outputs "out")))
6153 ;; Override include path for libmagic
6154 (substitute* "setup/package.prl"
6155 (("name => 'magic', Include => '/usr/include'")
6156 (string-append "name=> 'magic', Include => '"
6157 (assoc-ref inputs "libmagic")
6158 "/include" "'")))
6159
6160 ;; Install kdf5 library (needed by sra-tools)
6161 (substitute* "build/Makefile.install"
6162 (("LIBRARIES_TO_INSTALL =")
6163 "LIBRARIES_TO_INSTALL = kdf5.$(VERSION_LIBX) kdf5.$(VERSION_SHLX)"))
6164
6165 (substitute* "build/Makefile.env"
6166 (("CFLAGS =" prefix)
6167 (string-append prefix "-msse2 ")))
6168
6169 ;; Override search path for ngs-java
6170 (substitute* "setup/package.prl"
6171 (("/usr/local/ngs/ngs-java")
6172 (assoc-ref inputs "java-ngs")))
6173
6174 ;; The 'configure' script doesn't recognize things like
6175 ;; '--enable-fast-install'.
6176 (invoke "./configure"
6177 (string-append "--build-prefix=" (getcwd) "/build")
6178 (string-append "--prefix=" (assoc-ref outputs "out"))
6179 (string-append "--debug")
6180 (string-append "--with-xml2-prefix="
6181 (assoc-ref inputs "libxml2"))
6182 (string-append "--with-ngs-sdk-prefix="
6183 (assoc-ref inputs "ngs-sdk"))
6184 (string-append "--with-hdf5-prefix="
6185 (assoc-ref inputs "hdf5")))
6186 #t)))
6187 (add-after 'install 'install-interfaces
6188 (lambda* (#:key outputs #:allow-other-keys)
6189 ;; Install interface libraries. On i686 the interface libraries
6190 ;; are installed to "linux/gcc/i386", so we need to use the Linux
6191 ;; architecture name ("i386") instead of the target system prefix
6192 ;; ("i686").
6193 (mkdir (string-append (assoc-ref outputs "out") "/ilib"))
6194 (copy-recursively (string-append "build/ncbi-vdb/linux/gcc/"
6195 ,(system->linux-architecture
6196 (or (%current-target-system)
6197 (%current-system)))
6198 "/rel/ilib")
6199 (string-append (assoc-ref outputs "out")
6200 "/ilib"))
6201 ;; Install interface headers
6202 (copy-recursively "interfaces"
6203 (string-append (assoc-ref outputs "out")
6204 "/include"))
6205 #t))
6206 ;; These files are needed by sra-tools.
6207 (add-after 'install 'install-configuration-files
6208 (lambda* (#:key outputs #:allow-other-keys)
6209 (let ((target (string-append (assoc-ref outputs "out") "/kfg")))
6210 (mkdir target)
6211 (install-file "libs/kfg/default.kfg" target)
6212 (install-file "libs/kfg/certs.kfg" target))
6213 #t)))))
6214 (inputs
6215 `(("libxml2" ,libxml2)
6216 ("ngs-sdk" ,ngs-sdk)
6217 ("java-ngs" ,java-ngs)
6218 ("libmagic" ,file)
6219 ("hdf5" ,hdf5)))
6220 (native-inputs `(("perl" ,perl)))
6221 ;; NCBI-VDB requires SSE capability.
6222 (supported-systems '("i686-linux" "x86_64-linux"))
6223 (home-page "https://github.com/ncbi/ncbi-vdb")
6224 (synopsis "Database engine for genetic information")
6225 (description
6226 "The NCBI-VDB library implements a highly compressed columnar data
6227 warehousing engine that is most often used to store genetic information.
6228 Databases are stored in a portable image within the file system, and can be
6229 accessed/downloaded on demand across HTTP.")
6230 (license license:public-domain)))
6231
6232 (define-public plink
6233 (package
6234 (name "plink")
6235 (version "1.07")
6236 (source
6237 (origin
6238 (method url-fetch)
6239 (uri (string-append
6240 "http://pngu.mgh.harvard.edu/~purcell/plink/dist/plink-"
6241 version "-src.zip"))
6242 (sha256
6243 (base32 "0as8gxm4pjyc8dxmm1sl873rrd7wn5qs0l29nqfnl31x8i467xaa"))
6244 (patches (search-patches "plink-1.07-unclobber-i.patch"
6245 "plink-endian-detection.patch"))))
6246 (build-system gnu-build-system)
6247 (arguments
6248 '(#:tests? #f ;no "check" target
6249 #:make-flags (list (string-append "LIB_LAPACK="
6250 (assoc-ref %build-inputs "lapack")
6251 "/lib/liblapack.so")
6252 "WITH_LAPACK=1"
6253 "FORCE_DYNAMIC=1"
6254 ;; disable phoning home
6255 "WITH_WEBCHECK=")
6256 #:phases
6257 (modify-phases %standard-phases
6258 ;; no "configure" script
6259 (delete 'configure)
6260 (replace 'install
6261 (lambda* (#:key outputs #:allow-other-keys)
6262 (let ((bin (string-append (assoc-ref outputs "out")
6263 "/bin/")))
6264 (install-file "plink" bin)
6265 #t))))))
6266 (inputs
6267 `(("zlib" ,zlib)
6268 ("lapack" ,lapack)))
6269 (native-inputs
6270 `(("unzip" ,unzip)))
6271 (home-page "http://pngu.mgh.harvard.edu/~purcell/plink/")
6272 (synopsis "Whole genome association analysis toolset")
6273 (description
6274 "PLINK is a whole genome association analysis toolset, designed to
6275 perform a range of basic, large-scale analyses in a computationally efficient
6276 manner. The focus of PLINK is purely on analysis of genotype/phenotype data,
6277 so there is no support for steps prior to this (e.g. study design and
6278 planning, generating genotype or CNV calls from raw data). Through
6279 integration with gPLINK and Haploview, there is some support for the
6280 subsequent visualization, annotation and storage of results.")
6281 ;; Code is released under GPLv2, except for fisher.h, which is under
6282 ;; LGPLv2.1+
6283 (license (list license:gpl2 license:lgpl2.1+))))
6284
6285 (define-public plink-ng
6286 (package (inherit plink)
6287 (name "plink-ng")
6288 (version "1.90b4")
6289 (source
6290 (origin
6291 (method git-fetch)
6292 (uri (git-reference
6293 (url "https://github.com/chrchang/plink-ng")
6294 (commit (string-append "v" version))))
6295 (file-name (git-file-name name version))
6296 (sha256
6297 (base32 "02npdwgkpfkdnhw819rhj5kw02a5k5m90b14zq9zzya4hyg929c0"))))
6298 (build-system gnu-build-system)
6299 (arguments
6300 '(#:tests? #f ;no "check" target
6301 #:make-flags (list "BLASFLAGS=-llapack -lopenblas"
6302 "CFLAGS=-Wall -O2 -DDYNAMIC_ZLIB=1"
6303 "ZLIB=-lz"
6304 "-f" "Makefile.std")
6305 #:phases
6306 (modify-phases %standard-phases
6307 (add-after 'unpack 'chdir
6308 (lambda _ (chdir "1.9") #t))
6309 (delete 'configure) ; no "configure" script
6310 (replace 'install
6311 (lambda* (#:key outputs #:allow-other-keys)
6312 (let ((bin (string-append (assoc-ref outputs "out")
6313 "/bin/")))
6314 (install-file "plink" bin)
6315 #t))))))
6316 (inputs
6317 `(("zlib" ,zlib)
6318 ("lapack" ,lapack)
6319 ("openblas" ,openblas)))
6320 (home-page "https://www.cog-genomics.org/plink/")
6321 (license license:gpl3+)))
6322
6323 (define-public smithlab-cpp
6324 (let ((revision "1")
6325 (commit "728a097bec88c6f4b8528b685932049e660eff2e"))
6326 (package
6327 (name "smithlab-cpp")
6328 (version (string-append "0." revision "." (string-take commit 7)))
6329 (source (origin
6330 (method git-fetch)
6331 (uri (git-reference
6332 (url "https://github.com/smithlabcode/smithlab_cpp")
6333 (commit commit)))
6334 (file-name (string-append name "-" version "-checkout"))
6335 (sha256
6336 (base32
6337 "0d476lmj312xk77kr9fzrv7z1bv96yfyx0w7y62ycmnfbx32ll74"))))
6338 (build-system gnu-build-system)
6339 (arguments
6340 `(#:modules ((guix build gnu-build-system)
6341 (guix build utils)
6342 (srfi srfi-26))
6343 #:tests? #f ;no "check" target
6344 #:phases
6345 (modify-phases %standard-phases
6346 (add-after 'unpack 'use-samtools-headers
6347 (lambda _
6348 (substitute* '("SAM.cpp"
6349 "SAM.hpp")
6350 (("sam.h") "samtools/sam.h"))
6351 #t))
6352 (replace 'install
6353 (lambda* (#:key outputs #:allow-other-keys)
6354 (let* ((out (assoc-ref outputs "out"))
6355 (lib (string-append out "/lib"))
6356 (include (string-append out "/include/smithlab-cpp")))
6357 (mkdir-p lib)
6358 (mkdir-p include)
6359 (for-each (cut install-file <> lib)
6360 (find-files "." "\\.o$"))
6361 (for-each (cut install-file <> include)
6362 (find-files "." "\\.hpp$")))
6363 #t))
6364 (delete 'configure))))
6365 (inputs
6366 `(("samtools" ,samtools-0.1)
6367 ("zlib" ,zlib)))
6368 (home-page "https://github.com/smithlabcode/smithlab_cpp")
6369 (synopsis "C++ helper library for functions used in Smith lab projects")
6370 (description
6371 "Smithlab CPP is a C++ library that includes functions used in many of
6372 the Smith lab bioinformatics projects, such as a wrapper around Samtools data
6373 structures, classes for genomic regions, mapped sequencing reads, etc.")
6374 (license license:gpl3+))))
6375
6376 (define-public preseq
6377 (package
6378 (name "preseq")
6379 (version "2.0.3")
6380 (source (origin
6381 (method url-fetch)
6382 (uri (string-append "https://github.com/smithlabcode/preseq/"
6383 "releases/download/v" version
6384 "/preseq_v" version ".tar.bz2"))
6385 (sha256
6386 (base32 "149x9xmk1wy1gff85325yfzqc0qk4sgp1w6gbyj9cnji4x1dszbl"))
6387 (modules '((guix build utils)))
6388 (snippet '(begin
6389 ;; Remove bundled samtools.
6390 (delete-file-recursively "samtools")
6391 #t))))
6392 (build-system gnu-build-system)
6393 (arguments
6394 `(#:tests? #f ;no "check" target
6395 #:phases
6396 (modify-phases %standard-phases
6397 (delete 'configure))
6398 #:make-flags
6399 (list (string-append "PREFIX="
6400 (assoc-ref %outputs "out"))
6401 (string-append "LIBBAM="
6402 (assoc-ref %build-inputs "samtools")
6403 "/lib/libbam.a")
6404 (string-append "SMITHLAB_CPP="
6405 (assoc-ref %build-inputs "smithlab-cpp")
6406 "/lib")
6407 "PROGS=preseq"
6408 "INCLUDEDIRS=$(SMITHLAB_CPP)/../include/smithlab-cpp $(SAMTOOLS_DIR)")))
6409 (inputs
6410 `(("gsl" ,gsl)
6411 ("samtools" ,samtools-0.1)
6412 ("smithlab-cpp" ,smithlab-cpp)
6413 ("zlib" ,zlib)))
6414 (home-page "http://smithlabresearch.org/software/preseq/")
6415 (synopsis "Program for analyzing library complexity")
6416 (description
6417 "The preseq package is aimed at predicting and estimating the complexity
6418 of a genomic sequencing library, equivalent to predicting and estimating the
6419 number of redundant reads from a given sequencing depth and how many will be
6420 expected from additional sequencing using an initial sequencing experiment.
6421 The estimates can then be used to examine the utility of further sequencing,
6422 optimize the sequencing depth, or to screen multiple libraries to avoid low
6423 complexity samples.")
6424 (license license:gpl3+)))
6425
6426 (define-public python-screed
6427 (package
6428 (name "python-screed")
6429 (version "1.0")
6430 (source
6431 (origin
6432 (method url-fetch)
6433 (uri (pypi-uri "screed" version))
6434 (sha256
6435 (base32
6436 "148vcb7w2wr6a4w6vs2bsxanbqibxfk490zbcbg4m61s8669zdjx"))))
6437 (build-system python-build-system)
6438 (arguments
6439 '(#:phases
6440 (modify-phases %standard-phases
6441 ;; Tests must be run after installation, as the "screed" command does
6442 ;; not exist right after building.
6443 (delete 'check)
6444 (add-after 'install 'check
6445 (lambda* (#:key inputs outputs #:allow-other-keys)
6446 (let ((out (assoc-ref outputs "out")))
6447 (setenv "PYTHONPATH"
6448 (string-append out "/lib/python"
6449 (string-take (string-take-right
6450 (assoc-ref inputs "python")
6451 5) 3)
6452 "/site-packages:"
6453 (getenv "PYTHONPATH")))
6454 (setenv "PATH" (string-append out "/bin:" (getenv "PATH"))))
6455 (invoke "python" "setup.py" "test")
6456 #t)))))
6457 (native-inputs
6458 `(("python-pytest" ,python-pytest)
6459 ("python-pytest-cov" ,python-pytest-cov)
6460 ("python-pytest-runner" ,python-pytest-runner)))
6461 (inputs
6462 `(("python-bz2file" ,python-bz2file)))
6463 (home-page "https://github.com/dib-lab/screed/")
6464 (synopsis "Short read sequence database utilities")
6465 (description "Screed parses FASTA and FASTQ files and generates databases.
6466 Values such as sequence name, sequence description, sequence quality and the
6467 sequence itself can be retrieved from these databases.")
6468 (license license:bsd-3)))
6469
6470 (define-public python2-screed
6471 (package-with-python2 python-screed))
6472
6473 (define-public sra-tools
6474 (package
6475 (name "sra-tools")
6476 (version "2.10.6")
6477 (source
6478 (origin
6479 (method git-fetch)
6480 (uri (git-reference
6481 (url "https://github.com/ncbi/sra-tools")
6482 (commit version)))
6483 (file-name (git-file-name name version))
6484 (sha256
6485 (base32
6486 "1cr2mijkfs5sm35ffjs6861qsd1qkgnhnbavdv65zg5d655abbjf"))))
6487 (build-system gnu-build-system)
6488 (arguments
6489 `(#:parallel-build? #f ; not supported
6490 #:tests? #f ; no "check" target
6491 #:make-flags
6492 (list (string-append "DEFAULT_CRT="
6493 (assoc-ref %build-inputs "ncbi-vdb")
6494 "/kfg/certs.kfg")
6495 (string-append "DEFAULT_KFG="
6496 (assoc-ref %build-inputs "ncbi-vdb")
6497 "/kfg/default.kfg")
6498 (string-append "VDB_LIBDIR="
6499 (assoc-ref %build-inputs "ncbi-vdb")
6500 ,(if (string-prefix? "x86_64"
6501 (or (%current-target-system)
6502 (%current-system)))
6503 "/lib64"
6504 "/lib32")))
6505 #:phases
6506 (modify-phases %standard-phases
6507 (add-before 'configure 'set-perl-search-path
6508 (lambda _
6509 ;; Work around "dotless @INC" build failure.
6510 (setenv "PERL5LIB"
6511 (string-append (getcwd) "/setup:"
6512 (getenv "PERL5LIB")))
6513 #t))
6514 (replace 'configure
6515 (lambda* (#:key inputs outputs #:allow-other-keys)
6516 ;; The build system expects a directory containing the sources and
6517 ;; raw build output of ncbi-vdb, including files that are not
6518 ;; installed. Since we are building against an installed version of
6519 ;; ncbi-vdb, the following modifications are needed.
6520 (substitute* "setup/konfigure.perl"
6521 ;; Make the configure script look for the "ilib" directory of
6522 ;; "ncbi-vdb" without first checking for the existence of a
6523 ;; matching library in its "lib" directory.
6524 (("^ my \\$f = File::Spec->catdir\\(\\$libdir, \\$lib\\);")
6525 "my $f = File::Spec->catdir($ilibdir, $ilib);")
6526 ;; Look for interface libraries in ncbi-vdb's "ilib" directory.
6527 (("my \\$ilibdir = File::Spec->catdir\\(\\$builddir, 'ilib'\\);")
6528 "my $ilibdir = File::Spec->catdir($dir, 'ilib');"))
6529
6530 ;; Dynamic linking
6531 (substitute* "tools/copycat/Makefile"
6532 (("smagic-static") "lmagic"))
6533 (substitute* "tools/driver-tool/utf8proc/Makefile"
6534 (("CC\\?=gcc") "myCC=gcc")
6535 (("\\(CC\\)") "(myCC)"))
6536
6537 ;; The 'configure' script doesn't recognize things like
6538 ;; '--enable-fast-install'.
6539 (invoke "./configure"
6540 (string-append "--build-prefix=" (getcwd) "/build")
6541 (string-append "--prefix=" (assoc-ref outputs "out"))
6542 (string-append "--debug")
6543 (string-append "--with-fuse-prefix="
6544 (assoc-ref inputs "fuse"))
6545 (string-append "--with-magic-prefix="
6546 (assoc-ref inputs "libmagic"))
6547 ;; TODO: building with libxml2 fails with linker errors
6548 #;
6549 (string-append "--with-xml2-prefix="
6550 (assoc-ref inputs "libxml2"))
6551 (string-append "--with-ncbi-vdb-sources="
6552 (assoc-ref inputs "ncbi-vdb"))
6553 (string-append "--with-ncbi-vdb-build="
6554 (assoc-ref inputs "ncbi-vdb"))
6555 (string-append "--with-ngs-sdk-prefix="
6556 (assoc-ref inputs "ngs-sdk"))
6557 (string-append "--with-hdf5-prefix="
6558 (assoc-ref inputs "hdf5")))
6559 #t)))))
6560 (native-inputs `(("perl" ,perl)))
6561 (inputs
6562 `(("ngs-sdk" ,ngs-sdk)
6563 ("ncbi-vdb" ,ncbi-vdb)
6564 ("libmagic" ,file)
6565 ("fuse" ,fuse)
6566 ("hdf5" ,hdf5-1.10)
6567 ("zlib" ,zlib)
6568 ("python" ,python-wrapper)))
6569 (home-page
6570 "https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software")
6571 (synopsis "Tools and libraries for reading and writing sequencing data")
6572 (description
6573 "The SRA Toolkit from NCBI is a collection of tools and libraries for
6574 reading of sequencing files from the Sequence Read Archive (SRA) database and
6575 writing files into the .sra format.")
6576 (license license:public-domain)))
6577
6578 (define-public seqan
6579 (package
6580 (name "seqan")
6581 (version "2.4.0")
6582 (source (origin
6583 (method url-fetch)
6584 (uri (string-append "https://github.com/seqan/seqan/releases/"
6585 "download/seqan-v" version
6586 "/seqan-library-" version ".tar.xz"))
6587 (sha256
6588 (base32
6589 "19a1rlxx03qy1i1iriicly68w64yjxbv24g9gdywnfmq998v35yx"))))
6590 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
6591 ;; makes sense to split the outputs.
6592 (outputs '("out" "doc"))
6593 (build-system trivial-build-system)
6594 (arguments
6595 `(#:modules ((guix build utils))
6596 #:builder
6597 (begin
6598 (use-modules (guix build utils))
6599 (let ((tar (assoc-ref %build-inputs "tar"))
6600 (xz (assoc-ref %build-inputs "xz"))
6601 (out (assoc-ref %outputs "out"))
6602 (doc (assoc-ref %outputs "doc")))
6603 (setenv "PATH" (string-append tar "/bin:" xz "/bin"))
6604 (invoke "tar" "xvf" (assoc-ref %build-inputs "source"))
6605 (chdir (string-append "seqan-library-" ,version))
6606 (copy-recursively "include" (string-append out "/include"))
6607 (copy-recursively "share" (string-append doc "/share"))
6608 #t))))
6609 (native-inputs
6610 `(("source" ,source)
6611 ("tar" ,tar)
6612 ("xz" ,xz)))
6613 (home-page "https://www.seqan.de")
6614 (synopsis "Library for nucleotide sequence analysis")
6615 (description
6616 "SeqAn is a C++ library of efficient algorithms and data structures for
6617 the analysis of sequences with the focus on biological data. It contains
6618 algorithms and data structures for string representation and their
6619 manipulation, online and indexed string search, efficient I/O of
6620 bioinformatics file formats, sequence alignment, and more.")
6621 (license license:bsd-3)))
6622
6623 (define-public seqan-1
6624 (package (inherit seqan)
6625 (name "seqan")
6626 (version "1.4.2")
6627 (source (origin
6628 (method url-fetch)
6629 (uri (string-append "https://packages.seqan.de/seqan-library/"
6630 "seqan-library-" version ".tar.bz2"))
6631 (sha256
6632 (base32
6633 "05s3wrrwn50f81aklfm65i4a749zag1vr8z03k21xm0pdxy47yvp"))))
6634 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
6635 ;; makes sense to split the outputs.
6636 (outputs '("out" "doc"))
6637 (build-system trivial-build-system)
6638 (arguments
6639 `(#:modules ((guix build utils))
6640 #:builder
6641 (begin
6642 (use-modules (guix build utils))
6643 (let ((tar (assoc-ref %build-inputs "tar"))
6644 (bzip (assoc-ref %build-inputs "bzip2"))
6645 (out (assoc-ref %outputs "out"))
6646 (doc (assoc-ref %outputs "doc")))
6647 (setenv "PATH" (string-append tar "/bin:" bzip "/bin"))
6648 (invoke "tar" "xvf" (assoc-ref %build-inputs "source"))
6649 (chdir (string-append "seqan-library-" ,version))
6650 (copy-recursively "include" (string-append out "/include"))
6651 (copy-recursively "share" (string-append doc "/share"))
6652 #t))))
6653 (native-inputs
6654 `(("source" ,source)
6655 ("tar" ,tar)
6656 ("bzip2" ,bzip2)))))
6657
6658 (define-public seqmagick
6659 (package
6660 (name "seqmagick")
6661 (version "0.8.0")
6662 (source
6663 (origin
6664 (method url-fetch)
6665 (uri (pypi-uri "seqmagick" version))
6666 (sha256
6667 (base32
6668 "0pf98da7i59q47gwrbx0wjk6xlvbybiwphw80w7h4ydjj0579a2b"))))
6669 (build-system python-build-system)
6670 (inputs
6671 `(("python-biopython" ,python-biopython)))
6672 (native-inputs
6673 `(("python-nose" ,python-nose)))
6674 (home-page "https://github.com/fhcrc/seqmagick")
6675 (synopsis "Tools for converting and modifying sequence files")
6676 (description
6677 "Bioinformaticians often have to convert sequence files between formats
6678 and do little manipulations on them, and it's not worth writing scripts for
6679 that. Seqmagick is a utility to expose the file format conversion in
6680 BioPython in a convenient way. Instead of having a big mess of scripts, there
6681 is one that takes arguments.")
6682 (license license:gpl3)))
6683
6684 (define-public seqtk
6685 (package
6686 (name "seqtk")
6687 (version "1.3")
6688 (source (origin
6689 (method git-fetch)
6690 (uri (git-reference
6691 (url "https://github.com/lh3/seqtk")
6692 (commit (string-append "v" version))))
6693 (file-name (git-file-name name version))
6694 (sha256
6695 (base32
6696 "1bfzlqa84b5s1qi22blmmw2s8xdyp9h9ydcq22pfjhh5gab3yz6l"))))
6697 (build-system gnu-build-system)
6698 (arguments
6699 `(#:phases
6700 (modify-phases %standard-phases
6701 (delete 'configure)
6702 (replace 'check
6703 ;; There are no tests, so we just run a sanity check.
6704 (lambda _ (invoke "./seqtk" "seq") #t))
6705 (replace 'install
6706 (lambda* (#:key outputs #:allow-other-keys)
6707 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6708 (install-file "seqtk" bin)
6709 #t))))))
6710 (inputs
6711 `(("zlib" ,zlib)))
6712 (home-page "https://github.com/lh3/seqtk")
6713 (synopsis "Toolkit for processing biological sequences in FASTA/Q format")
6714 (description
6715 "Seqtk is a fast and lightweight tool for processing sequences in the
6716 FASTA or FASTQ format. It parses both FASTA and FASTQ files which can be
6717 optionally compressed by gzip.")
6718 (license license:expat)))
6719
6720 (define-public snap-aligner
6721 (package
6722 (name "snap-aligner")
6723 (version "1.0beta.18")
6724 (source (origin
6725 (method git-fetch)
6726 (uri (git-reference
6727 (url "https://github.com/amplab/snap")
6728 (commit (string-append "v" version))))
6729 (file-name (git-file-name name version))
6730 (sha256
6731 (base32
6732 "01w3qq4wm07z73vky0cfwlmrbf50n3w722cxrlzxfi99mnb808d8"))))
6733 (build-system gnu-build-system)
6734 (arguments
6735 '(#:phases
6736 (modify-phases %standard-phases
6737 (delete 'configure)
6738 (replace 'check (lambda _ (invoke "./unit_tests") #t))
6739 (replace 'install
6740 (lambda* (#:key outputs #:allow-other-keys)
6741 (let* ((out (assoc-ref outputs "out"))
6742 (bin (string-append out "/bin")))
6743 (install-file "snap-aligner" bin)
6744 (install-file "SNAPCommand" bin)
6745 #t))))))
6746 (native-inputs
6747 `(("zlib" ,zlib)))
6748 (home-page "http://snap.cs.berkeley.edu/")
6749 (synopsis "Short read DNA sequence aligner")
6750 (description
6751 "SNAP is a fast and accurate aligner for short DNA reads. It is
6752 optimized for modern read lengths of 100 bases or higher, and takes advantage
6753 of these reads to align data quickly through a hash-based indexing scheme.")
6754 ;; 32-bit systems are not supported by the unpatched code.
6755 ;; Following the bug reports https://github.com/amplab/snap/issues/68 and
6756 ;; https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=812378 we see that
6757 ;; systems without a lot of memory cannot make good use of this program.
6758 (supported-systems '("x86_64-linux"))
6759 (license license:asl2.0)))
6760
6761 (define-public sortmerna
6762 (package
6763 (name "sortmerna")
6764 (version "2.1b")
6765 (source
6766 (origin
6767 (method git-fetch)
6768 (uri (git-reference
6769 (url "https://github.com/biocore/sortmerna")
6770 (commit version)))
6771 (file-name (git-file-name name version))
6772 (sha256
6773 (base32
6774 "0j3mbz4n25738yijmjbr5r4fyvkgm8v5vn3sshyfvmyqf5q9byqf"))))
6775 (build-system gnu-build-system)
6776 (outputs '("out" ;for binaries
6777 "db")) ;for sequence databases
6778 (arguments
6779 `(#:phases
6780 (modify-phases %standard-phases
6781 (replace 'install
6782 (lambda* (#:key outputs #:allow-other-keys)
6783 (let* ((out (assoc-ref outputs "out"))
6784 (bin (string-append out "/bin"))
6785 (db (assoc-ref outputs "db"))
6786 (share
6787 (string-append db "/share/sortmerna/rRNA_databases")))
6788 (install-file "sortmerna" bin)
6789 (install-file "indexdb_rna" bin)
6790 (for-each (lambda (file)
6791 (install-file file share))
6792 (find-files "rRNA_databases" ".*fasta"))
6793 #t))))))
6794 (inputs
6795 `(("zlib" ,zlib)))
6796 (home-page "https://bioinfo.lifl.fr/RNA/sortmerna/")
6797 (synopsis "Biological sequence analysis tool for NGS reads")
6798 (description
6799 "SortMeRNA is a biological sequence analysis tool for filtering, mapping
6800 and operational taxonomic unit (OTU) picking of next generation
6801 sequencing (NGS) reads. The core algorithm is based on approximate seeds and
6802 allows for fast and sensitive analyses of nucleotide sequences. The main
6803 application of SortMeRNA is filtering rRNA from metatranscriptomic data.")
6804 ;; The source includes x86 specific code
6805 (supported-systems '("x86_64-linux" "i686-linux"))
6806 (license license:lgpl3)))
6807
6808 (define-public star
6809 (package
6810 (name "star")
6811 (version "2.7.8a")
6812 (source (origin
6813 (method git-fetch)
6814 (uri (git-reference
6815 (url "https://github.com/alexdobin/STAR")
6816 (commit version)))
6817 (file-name (git-file-name name version))
6818 (sha256
6819 (base32
6820 "0zc5biymja9zml9yizcj1h68fq9c6sxfcav8a0lbgvgsm44rvans"))
6821 (modules '((guix build utils)))
6822 (snippet
6823 '(begin
6824 (substitute* "source/Makefile"
6825 (("/bin/rm") "rm"))
6826 ;; Remove pre-built binaries and bundled htslib sources.
6827 (delete-file-recursively "bin/MacOSX_x86_64")
6828 (delete-file-recursively "bin/Linux_x86_64")
6829 (delete-file-recursively "bin/Linux_x86_64_static")
6830 (delete-file-recursively "source/htslib")
6831 #t))))
6832 (build-system gnu-build-system)
6833 (arguments
6834 '(#:tests? #f ;no check target
6835 #:make-flags '("STAR")
6836 #:phases
6837 (modify-phases %standard-phases
6838 (add-after 'unpack 'enter-source-dir
6839 (lambda _ (chdir "source") #t))
6840 (add-after 'enter-source-dir 'make-reproducible
6841 (lambda _
6842 (substitute* "Makefile"
6843 (("(COMPILATION_TIME_PLACE=\")(.*)(\")" _ pre mid post)
6844 (string-append pre "Built with Guix" post)))
6845 #t))
6846 ;; See https://github.com/alexdobin/STAR/pull/562
6847 (add-after 'enter-source-dir 'add-missing-header
6848 (lambda _
6849 (substitute* "SoloReadFeature_inputRecords.cpp"
6850 (("#include \"binarySearch2.h\"" h)
6851 (string-append h "\n#include <math.h>")))
6852 #t))
6853 (add-after 'enter-source-dir 'do-not-use-bundled-htslib
6854 (lambda _
6855 (substitute* "Makefile"
6856 (("(Depend.list: \\$\\(SOURCES\\) parametersDefault\\.xxd) htslib"
6857 _ prefix) prefix))
6858 (substitute* '("BAMfunctions.cpp"
6859 "signalFromBAM.h"
6860 "bam_cat.h"
6861 "bam_cat.c"
6862 "STAR.cpp"
6863 "bamRemoveDuplicates.cpp")
6864 (("#include \"htslib/([^\"]+\\.h)\"" _ header)
6865 (string-append "#include <" header ">")))
6866 (substitute* "IncludeDefine.h"
6867 (("\"htslib/(htslib/[^\"]+.h)\"" _ header)
6868 (string-append "<" header ">")))
6869 #t))
6870 (replace 'install
6871 (lambda* (#:key outputs #:allow-other-keys)
6872 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6873 (install-file "STAR" bin))
6874 #t))
6875 (delete 'configure))))
6876 (native-inputs
6877 `(("xxd" ,xxd)))
6878 (inputs
6879 `(("htslib" ,htslib)
6880 ("zlib" ,zlib)))
6881 (home-page "https://github.com/alexdobin/STAR")
6882 (synopsis "Universal RNA-seq aligner")
6883 (description
6884 "The Spliced Transcripts Alignment to a Reference (STAR) software is
6885 based on a previously undescribed RNA-seq alignment algorithm that uses
6886 sequential maximum mappable seed search in uncompressed suffix arrays followed
6887 by seed clustering and stitching procedure. In addition to unbiased de novo
6888 detection of canonical junctions, STAR can discover non-canonical splices and
6889 chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
6890 sequences.")
6891 ;; Only 64-bit systems are supported according to the README.
6892 (supported-systems '("x86_64-linux" "mips64el-linux"))
6893 ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
6894 (license license:gpl3+)))
6895
6896 (define-public star-for-pigx
6897 (package
6898 (inherit star)
6899 (name "star")
6900 (version "2.7.3a")
6901 (source (origin
6902 (method git-fetch)
6903 (uri (git-reference
6904 (url "https://github.com/alexdobin/STAR")
6905 (commit version)))
6906 (file-name (git-file-name name version))
6907 (sha256
6908 (base32
6909 "1hgiqw5qhs0pc1xazzihcfd92na02xyq2kb469z04y1v51kpvvjq"))
6910 (modules '((guix build utils)))
6911 (snippet
6912 '(begin
6913 (substitute* "source/Makefile"
6914 (("/bin/rm") "rm"))
6915 ;; Remove pre-built binaries and bundled htslib sources.
6916 (delete-file-recursively "bin/MacOSX_x86_64")
6917 (delete-file-recursively "bin/Linux_x86_64")
6918 (delete-file-recursively "bin/Linux_x86_64_static")
6919 (delete-file-recursively "source/htslib")
6920 #t))))))
6921
6922 (define-public starlong
6923 (package (inherit star)
6924 (name "starlong")
6925 (arguments
6926 (substitute-keyword-arguments (package-arguments star)
6927 ((#:make-flags flags)
6928 `(list "STARlong"))
6929 ((#:phases phases)
6930 `(modify-phases ,phases
6931 ;; Allow extra long sequence reads.
6932 (add-after 'unpack 'make-extra-long
6933 (lambda _
6934 (substitute* "source/IncludeDefine.h"
6935 (("(#define DEF_readNameLengthMax ).*" _ match)
6936 (string-append match "900000\n")))
6937 #t))
6938 (replace 'install
6939 (lambda* (#:key outputs #:allow-other-keys)
6940 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6941 (install-file "STARlong" bin))
6942 #t))))))))
6943
6944 (define-public subread
6945 (package
6946 (name "subread")
6947 (version "1.6.0")
6948 (source (origin
6949 (method url-fetch)
6950 (uri (string-append "mirror://sourceforge/subread/subread-"
6951 version "/subread-" version "-source.tar.gz"))
6952 (sha256
6953 (base32
6954 "0ah0n4jx6ksk2m2j7xk385x2qzmk1y4rfc6a4mfrdqrlq721w99i"))))
6955 (build-system gnu-build-system)
6956 (arguments
6957 `(#:tests? #f ;no "check" target
6958 ;; The CC and CCFLAGS variables are set to contain a lot of x86_64
6959 ;; optimizations by default, so we override these flags such that x86_64
6960 ;; flags are only added when the build target is an x86_64 system.
6961 #:make-flags
6962 (list (let ((system ,(or (%current-target-system)
6963 (%current-system)))
6964 (flags '("-ggdb" "-fomit-frame-pointer"
6965 "-ffast-math" "-funroll-loops"
6966 "-fmessage-length=0"
6967 "-O9" "-Wall" "-DMAKE_FOR_EXON"
6968 "-DMAKE_STANDALONE"
6969 "-DSUBREAD_VERSION=\\\"${SUBREAD_VERSION}\\\""))
6970 (flags64 '("-mmmx" "-msse" "-msse2" "-msse3")))
6971 (if (string-prefix? "x86_64" system)
6972 (string-append "CCFLAGS=" (string-join (append flags flags64)))
6973 (string-append "CCFLAGS=" (string-join flags))))
6974 "-f" "Makefile.Linux"
6975 "CC=gcc ${CCFLAGS}")
6976 #:phases
6977 (modify-phases %standard-phases
6978 (add-after 'unpack 'enter-dir
6979 (lambda _ (chdir "src") #t))
6980 (replace 'install
6981 (lambda* (#:key outputs #:allow-other-keys)
6982 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6983 (mkdir-p bin)
6984 (copy-recursively "../bin" bin))
6985 #t))
6986 ;; no "configure" script
6987 (delete 'configure))))
6988 (inputs `(("zlib" ,zlib)))
6989 (home-page "http://bioinf.wehi.edu.au/subread-package/")
6990 (synopsis "Tool kit for processing next-gen sequencing data")
6991 (description
6992 "The subread package contains the following tools: subread aligner, a
6993 general-purpose read aligner; subjunc aligner: detecting exon-exon junctions
6994 and mapping RNA-seq reads; featureCounts: counting mapped reads for genomic
6995 features; exactSNP: a SNP caller that discovers SNPs by testing signals
6996 against local background noises.")
6997 (license license:gpl3+)))
6998
6999 (define-public stringtie
7000 (package
7001 (name "stringtie")
7002 (version "1.2.1")
7003 (source (origin
7004 (method url-fetch)
7005 (uri (string-append "http://ccb.jhu.edu/software/stringtie/dl/"
7006 "stringtie-" version ".tar.gz"))
7007 (sha256
7008 (base32
7009 "1cqllsc1maq4kh92isi8yadgzbmnf042hlnalpk3y59aph1z3bfz"))
7010 (modules '((guix build utils)))
7011 (snippet
7012 '(begin
7013 (delete-file-recursively "samtools-0.1.18")
7014 #t))))
7015 (build-system gnu-build-system)
7016 (arguments
7017 `(#:tests? #f ;no test suite
7018 #:phases
7019 (modify-phases %standard-phases
7020 ;; no configure script
7021 (delete 'configure)
7022 (add-before 'build 'use-system-samtools
7023 (lambda _
7024 (substitute* "Makefile"
7025 (("stringtie: \\$\\{BAM\\}/libbam\\.a")
7026 "stringtie: "))
7027 (substitute* '("gclib/GBam.h"
7028 "gclib/GBam.cpp")
7029 (("#include \"(bam|sam|kstring).h\"" _ header)
7030 (string-append "#include <samtools/" header ".h>")))
7031 #t))
7032 (add-after 'unpack 'remove-duplicate-typedef
7033 (lambda _
7034 ;; This typedef conflicts with the typedef in
7035 ;; glibc-2.25/include/bits/types.h
7036 (substitute* "gclib/GThreads.h"
7037 (("typedef long long __intmax_t;") ""))
7038 #t))
7039 (replace 'install
7040 (lambda* (#:key outputs #:allow-other-keys)
7041 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
7042 (install-file "stringtie" bin)
7043 #t))))))
7044 (inputs
7045 `(("samtools" ,samtools-0.1)
7046 ("zlib" ,zlib)))
7047 (home-page "http://ccb.jhu.edu/software/stringtie/")
7048 (synopsis "Transcript assembly and quantification for RNA-Seq data")
7049 (description
7050 "StringTie is a fast and efficient assembler of RNA-Seq sequence
7051 alignments into potential transcripts. It uses a novel network flow algorithm
7052 as well as an optional de novo assembly step to assemble and quantitate
7053 full-length transcripts representing multiple splice variants for each gene
7054 locus. Its input can include not only the alignments of raw reads used by
7055 other transcript assemblers, but also alignments of longer sequences that have
7056 been assembled from those reads. To identify differentially expressed genes
7057 between experiments, StringTie's output can be processed either by the
7058 Cuffdiff or Ballgown programs.")
7059 (license license:artistic2.0)))
7060
7061 (define-public taxtastic
7062 (package
7063 (name "taxtastic")
7064 (version "0.8.11")
7065 (source (origin
7066 ;; The Pypi version does not include tests.
7067 (method git-fetch)
7068 (uri (git-reference
7069 (url "https://github.com/fhcrc/taxtastic")
7070 (commit (string-append "v" version))))
7071 (file-name (git-file-name name version))
7072 (sha256
7073 (base32
7074 "1sv8mkg64jn7zdwf1jj71c16686yrwxk0apb1l8sjszy9p166g0p"))))
7075 (build-system python-build-system)
7076 (arguments
7077 `(#:phases
7078 (modify-phases %standard-phases
7079 (add-after 'unpack 'prepare-directory
7080 (lambda _
7081 ;; The git checkout must be writable for tests.
7082 (for-each make-file-writable (find-files "."))
7083 ;; This test fails, but the error is not caught by the test
7084 ;; framework, so the tests fail...
7085 (substitute* "tests/test_taxit.py"
7086 (("self.cmd_fails\\(''\\)")
7087 "self.cmd_fails('nothing')"))
7088 ;; This version file is expected to be created with git describe.
7089 (mkdir-p "taxtastic/data")
7090 (with-output-to-file "taxtastic/data/ver"
7091 (lambda () (display ,version)))
7092 #t))
7093 (add-after 'unpack 'python37-compatibility
7094 (lambda _
7095 (substitute* "taxtastic/utils.py"
7096 (("import csv") "import csv, errno")
7097 (("os.errno") "errno"))
7098 #t))
7099 (replace 'check
7100 ;; Note, this fails to run with "-v" as it tries to write to a
7101 ;; closed output stream.
7102 (lambda _ (invoke "python" "-m" "unittest") #t)))))
7103 (propagated-inputs
7104 `(("python-sqlalchemy" ,python-sqlalchemy)
7105 ("python-decorator" ,python-decorator)
7106 ("python-biopython" ,python-biopython)
7107 ("python-pandas" ,python-pandas)
7108 ("python-psycopg2" ,python-psycopg2)
7109 ("python-fastalite" ,python-fastalite)
7110 ("python-pyyaml" ,python-pyyaml)
7111 ("python-six" ,python-six)
7112 ("python-jinja2" ,python-jinja2)
7113 ("python-dendropy" ,python-dendropy)))
7114 (home-page "https://github.com/fhcrc/taxtastic")
7115 (synopsis "Tools for taxonomic naming and annotation")
7116 (description
7117 "Taxtastic is software written in python used to build and maintain
7118 reference packages i.e. collections of reference trees, reference alignments,
7119 profiles, and associated taxonomic information.")
7120 (license license:gpl3+)))
7121
7122 (define-public vcftools
7123 (package
7124 (name "vcftools")
7125 (version "0.1.16")
7126 (source (origin
7127 (method url-fetch)
7128 (uri (string-append
7129 "https://github.com/vcftools/vcftools/releases/download/v"
7130 version "/vcftools-" version ".tar.gz"))
7131 (sha256
7132 (base32
7133 "1qqlx7flfv7axrjwkaz6njkscsl1d0jw98ns8d8bh1n1hd1pgz6v"))))
7134 (build-system gnu-build-system)
7135 (arguments
7136 `(#:tests? #f ; no "check" target
7137 #:make-flags (list
7138 "CFLAGS=-O2" ; override "-m64" flag
7139 (string-append "PREFIX=" (assoc-ref %outputs "out"))
7140 (string-append "MANDIR=" (assoc-ref %outputs "out")
7141 "/share/man/man1"))))
7142 (native-inputs
7143 `(("pkg-config" ,pkg-config)))
7144 (inputs
7145 `(("perl" ,perl)
7146 ("zlib" ,zlib)))
7147 (home-page "https://vcftools.github.io/")
7148 (synopsis "Tools for working with VCF files")
7149 (description
7150 "VCFtools is a program package designed for working with VCF files, such
7151 as those generated by the 1000 Genomes Project. The aim of VCFtools is to
7152 provide easily accessible methods for working with complex genetic variation
7153 data in the form of VCF files.")
7154 ;; The license is declared as LGPLv3 in the README and
7155 ;; at https://vcftools.github.io/license.html
7156 (license license:lgpl3)))
7157
7158 (define-public infernal
7159 (package
7160 (name "infernal")
7161 (version "1.1.4")
7162 (source (origin
7163 (method url-fetch)
7164 (uri (string-append "http://eddylab.org/software/infernal/"
7165 "infernal-" version ".tar.gz"))
7166 (sha256
7167 (base32
7168 "1z4mgwqg1j4n5ika08ai8mg9yjyjhf4821jp83v2bgwzxrykqjgr"))))
7169 (build-system gnu-build-system)
7170 (native-inputs
7171 `(("perl" ,perl)
7172 ("python" ,python))) ; for tests
7173 (home-page "http://eddylab.org/infernal/")
7174 (synopsis "Inference of RNA alignments")
7175 (description "Infernal (\"INFERence of RNA ALignment\") is a tool for
7176 searching DNA sequence databases for RNA structure and sequence similarities.
7177 It is an implementation of a special case of profile stochastic context-free
7178 grammars called @dfn{covariance models} (CMs). A CM is like a sequence
7179 profile, but it scores a combination of sequence consensus and RNA secondary
7180 structure consensus, so in many cases, it is more capable of identifying RNA
7181 homologs that conserve their secondary structure more than their primary
7182 sequence.")
7183 ;; Infernal 1.1.3 requires VMX or SSE capability for parallel instructions.
7184 (supported-systems '("i686-linux" "x86_64-linux"))
7185 (license license:bsd-3)))
7186
7187 (define-public r-snapatac
7188 (package
7189 (name "r-snapatac")
7190 (version "2.0")
7191 (source
7192 (origin
7193 (method git-fetch)
7194 (uri (git-reference
7195 (url "https://github.com/r3fang/SnapATAC")
7196 (commit (string-append "v" version))))
7197 (file-name (git-file-name name version))
7198 (sha256
7199 (base32 "037jzlbl436fi7lkpq7d83i2vd1crnrik3vac2x6xj75dbikb2av"))))
7200 (properties `((upstream-name . "SnapATAC")))
7201 (build-system r-build-system)
7202 (propagated-inputs
7203 `(("r-bigmemory" ,r-bigmemory)
7204 ("r-doparallel" ,r-doparallel)
7205 ("r-dosnow" ,r-dosnow)
7206 ("r-edger" ,r-edger)
7207 ("r-foreach" ,r-foreach)
7208 ("r-genomicranges" ,r-genomicranges)
7209 ("r-igraph" ,r-igraph)
7210 ("r-iranges" ,r-iranges)
7211 ("r-irlba" ,r-irlba)
7212 ("r-matrix" ,r-matrix)
7213 ("r-plyr" ,r-plyr)
7214 ("r-plot3d" ,r-plot3d)
7215 ("r-rann" ,r-rann)
7216 ("r-raster" ,r-raster)
7217 ("r-rcolorbrewer" ,r-rcolorbrewer)
7218 ("r-rhdf5" ,r-rhdf5)
7219 ("r-rtsne" ,r-rtsne)
7220 ("r-scales" ,r-scales)
7221 ("r-viridis" ,r-viridis)))
7222 (home-page "https://github.com/r3fang/SnapATAC")
7223 (synopsis "Single nucleus analysis package for ATAC-Seq")
7224 (description
7225 "This package provides a fast and accurate analysis toolkit for single
7226 cell ATAC-seq (Assay for transposase-accessible chromatin using sequencing).
7227 Single cell ATAC-seq can resolve the heterogeneity of a complex tissue and
7228 reveal cell-type specific regulatory landscapes. However, the exceeding data
7229 sparsity has posed unique challenges for the data analysis. This package
7230 @code{r-snapatac} is an end-to-end bioinformatics pipeline for analyzing large-
7231 scale single cell ATAC-seq data which includes quality control, normalization,
7232 clustering analysis, differential analysis, motif inference and exploration of
7233 single cell ATAC-seq sequencing data.")
7234 (license license:gpl3)))
7235
7236 (define-public r-archr
7237 (let ((commit "46b519ffb6f73edf132497ac31650d19ef055dc1")
7238 (revision "1"))
7239 (package
7240 (name "r-archr")
7241 (version (git-version "1.0.0" revision commit))
7242 (source
7243 (origin
7244 (method git-fetch)
7245 (uri (git-reference
7246 (url "https://github.com/GreenleafLab/ArchR")
7247 (commit commit)))
7248 (file-name (git-file-name name version))
7249 (sha256
7250 (base32
7251 "1zj3sdfhgn2q2256fmz61a92vw1wylyck632d7842d6knd0v92v8"))))
7252 (properties `((upstream-name . "ArchR")))
7253 (build-system r-build-system)
7254 (propagated-inputs
7255 `(("r-biocgenerics" ,r-biocgenerics)
7256 ("r-biostrings" ,r-biostrings)
7257 ("r-chromvar" ,r-chromvar)
7258 ("r-complexheatmap" ,r-complexheatmap)
7259 ("r-data-table" ,r-data-table)
7260 ("r-genomicranges" ,r-genomicranges)
7261 ("r-ggplot2" ,r-ggplot2)
7262 ("r-ggrepel" ,r-ggrepel)
7263 ("r-gridextra" ,r-gridextra)
7264 ("r-gtable" ,r-gtable)
7265 ("r-gtools" ,r-gtools)
7266 ("r-magrittr" ,r-magrittr)
7267 ("r-matrix" ,r-matrix)
7268 ("r-matrixstats" ,r-matrixstats)
7269 ("r-motifmatchr" ,r-motifmatchr)
7270 ("r-nabor" ,r-nabor)
7271 ("r-plyr" ,r-plyr)
7272 ("r-rcpp" ,r-rcpp)
7273 ("r-rhdf5" ,r-rhdf5)
7274 ("r-rsamtools" ,r-rsamtools)
7275 ("r-s4vectors" ,r-s4vectors)
7276 ("r-stringr" ,r-stringr)
7277 ("r-summarizedexperiment" ,r-summarizedexperiment)
7278 ("r-uwot" ,r-uwot)))
7279 (home-page "https://github.com/GreenleafLab/ArchR")
7280 (synopsis "Analyze single-cell regulatory chromatin in R")
7281 (description
7282 "This package is designed to streamline scATAC analyses in R.")
7283 (license license:gpl2+))))
7284
7285 (define-public r-scde
7286 (package
7287 (name "r-scde")
7288 (version "1.99.2")
7289 (source (origin
7290 (method git-fetch)
7291 (uri (git-reference
7292 (url "https://github.com/hms-dbmi/scde")
7293 (commit version)))
7294 (file-name (git-file-name name version))
7295 (sha256
7296 (base32
7297 "10na2gyka24mszdxf92wz9h2c13hdf1ww30c68gfsw53lvvhhhxb"))))
7298 (build-system r-build-system)
7299 (propagated-inputs
7300 `(("r-rcpp" ,r-rcpp)
7301 ("r-rcpparmadillo" ,r-rcpparmadillo)
7302 ("r-mgcv" ,r-mgcv)
7303 ("r-rook" ,r-rook)
7304 ("r-rjson" ,r-rjson)
7305 ("r-cairo" ,r-cairo)
7306 ("r-rcolorbrewer" ,r-rcolorbrewer)
7307 ("r-edger" ,r-edger)
7308 ("r-quantreg" ,r-quantreg)
7309 ("r-nnet" ,r-nnet)
7310 ("r-rmtstat" ,r-rmtstat)
7311 ("r-extremes" ,r-extremes)
7312 ("r-pcamethods" ,r-pcamethods)
7313 ("r-biocparallel" ,r-biocparallel)
7314 ("r-flexmix" ,r-flexmix)))
7315 (home-page "https://hms-dbmi.github.io/scde/")
7316 (synopsis "R package for analyzing single-cell RNA-seq data")
7317 (description "The SCDE package implements a set of statistical methods for
7318 analyzing single-cell RNA-seq data. SCDE fits individual error models for
7319 single-cell RNA-seq measurements. These models can then be used for
7320 assessment of differential expression between groups of cells, as well as
7321 other types of analysis. The SCDE package also contains the pagoda framework
7322 which applies pathway and gene set overdispersion analysis to identify aspects
7323 of transcriptional heterogeneity among single cells.")
7324 ;; See https://github.com/hms-dbmi/scde/issues/38
7325 (license license:gpl2)))
7326
7327 (define-public r-centipede
7328 (package
7329 (name "r-centipede")
7330 (version "1.2")
7331 (source (origin
7332 (method url-fetch)
7333 (uri (string-append "http://download.r-forge.r-project.org/"
7334 "src/contrib/CENTIPEDE_" version ".tar.gz"))
7335 (sha256
7336 (base32
7337 "1hsx6qgwr0i67fhy9257zj7s0ppncph2hjgbia5nn6nfmj0ax6l9"))))
7338 (build-system r-build-system)
7339 (home-page "http://centipede.uchicago.edu/")
7340 (synopsis "Predict transcription factor binding sites")
7341 (description
7342 "CENTIPEDE applies a hierarchical Bayesian mixture model to infer regions
7343 of the genome that are bound by particular transcription factors. It starts
7344 by identifying a set of candidate binding sites, and then aims to classify the
7345 sites according to whether each site is bound or not bound by a transcription
7346 factor. CENTIPEDE is an unsupervised learning algorithm that discriminates
7347 between two different types of motif instances using as much relevant
7348 information as possible.")
7349 (license (list license:gpl2+ license:gpl3+))))
7350
7351 (define-public r-demultiplex
7352 (let ((commit "6e2a1422c8e6f418cfb271997eebc91f9195f299")
7353 (revision "1"))
7354 (package
7355 (name "r-demultiplex")
7356 (version (git-version "1.0.2" revision commit))
7357 (source
7358 (origin
7359 (method git-fetch)
7360 (uri (git-reference
7361 (url "https://github.com/chris-mcginnis-ucsf/MULTI-seq")
7362 (commit commit)))
7363 (file-name (git-file-name name version))
7364 (sha256
7365 (base32
7366 "01kv88wp8vdaq07sjk0d3d1cb553mq1xqg0war81pgmg63bgi38w"))))
7367 (properties `((upstream-name . "deMULTIplex")))
7368 (build-system r-build-system)
7369 (propagated-inputs
7370 `(("r-kernsmooth" ,r-kernsmooth)
7371 ("r-reshape2" ,r-reshape2)
7372 ("r-rtsne" ,r-rtsne)
7373 ("r-shortread" ,r-shortread)
7374 ("r-stringdist" ,r-stringdist)))
7375 (home-page "https://github.com/chris-mcginnis-ucsf/MULTI-seq")
7376 (synopsis "MULTI-seq pre-processing and classification tools")
7377 (description
7378 "deMULTIplex is an R package for analyzing single-cell RNA sequencing
7379 data generated with the MULTI-seq sample multiplexing method. The package
7380 includes software to
7381
7382 @enumerate
7383 @item Convert raw MULTI-seq sample barcode library FASTQs into a sample
7384 barcode UMI count matrix, and
7385 @item Classify cell barcodes into sample barcode groups.
7386 @end enumerate
7387 ")
7388 (license license:cc0))))
7389
7390 (define-public vsearch
7391 (package
7392 (name "vsearch")
7393 (version "2.9.1")
7394 (source
7395 (origin
7396 (method git-fetch)
7397 (uri (git-reference
7398 (url "https://github.com/torognes/vsearch")
7399 (commit (string-append "v" version))))
7400 (file-name (git-file-name name version))
7401 (sha256
7402 (base32
7403 "0vhrpjfdf75ba04b24xknp41790cvcgwl0vgpy7qbzj5xh2521ss"))
7404 (patches (search-patches "vsearch-unbundle-cityhash.patch"))
7405 (snippet
7406 '(begin
7407 ;; Remove bundled cityhash sources. The vsearch source is adjusted
7408 ;; for this in the patch.
7409 (delete-file "src/city.h")
7410 (delete-file "src/citycrc.h")
7411 (delete-file "src/city.cc")
7412 #t))))
7413 (build-system gnu-build-system)
7414 (inputs
7415 `(("zlib" ,zlib)
7416 ("bzip2" ,bzip2)
7417 ("cityhash" ,cityhash)))
7418 (native-inputs
7419 `(("autoconf" ,autoconf)
7420 ("automake" ,automake)))
7421 (synopsis "Sequence search tools for metagenomics")
7422 (description
7423 "VSEARCH supports DNA sequence searching, clustering, chimera detection,
7424 dereplication, pairwise alignment, shuffling, subsampling, sorting and
7425 masking. The tool takes advantage of parallelism in the form of SIMD
7426 vectorization as well as multiple threads to perform accurate alignments at
7427 high speed. VSEARCH uses an optimal global aligner (full dynamic programming
7428 Needleman-Wunsch).")
7429 (home-page "https://github.com/torognes/vsearch")
7430 ;; vsearch uses non-portable SSE intrinsics so building fails on other
7431 ;; platforms.
7432 (supported-systems '("x86_64-linux"))
7433 ;; Dual licensed; also includes public domain source.
7434 (license (list license:gpl3 license:bsd-2))))
7435
7436 (define-public pardre
7437 (package
7438 (name "pardre")
7439 ;; The source of 1.1.5 changed in place, so we append "-1" to the version.
7440 (version "1.1.5-1")
7441 (source
7442 (origin
7443 (method url-fetch)
7444 (uri (string-append "mirror://sourceforge/pardre/ParDRe-rel"
7445 "1.1.5" ".tar.gz"))
7446 (sha256
7447 (base32
7448 "17j73nc0viq4f6qj50nrndsrif5d6b71q8fl87m54psiv0ilns2b"))))
7449 (build-system gnu-build-system)
7450 (arguments
7451 `(#:tests? #f ; no tests included
7452 #:phases
7453 (modify-phases %standard-phases
7454 (delete 'configure)
7455 (replace 'install
7456 (lambda* (#:key outputs #:allow-other-keys)
7457 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
7458 (install-file "ParDRe" bin)
7459 #t))))))
7460 (inputs
7461 `(("openmpi" ,openmpi)
7462 ("zlib" ,zlib)))
7463 (synopsis "Parallel tool to remove duplicate DNA reads")
7464 (description
7465 "ParDRe is a parallel tool to remove duplicate genetic sequence reads.
7466 Duplicate reads can be seen as identical or nearly identical sequences with
7467 some mismatches. This tool lets users avoid the analysis of unnecessary
7468 reads, reducing the time of subsequent procedures with the
7469 dataset (e.g. assemblies, mappings, etc.). The tool is implemented with MPI
7470 in order to exploit the parallel capabilities of multicore clusters. It is
7471 faster than multithreaded counterparts (end of 2015) for the same number of
7472 cores and, thanks to the message-passing technology, it can be executed on
7473 clusters.")
7474 (home-page "https://sourceforge.net/projects/pardre/")
7475 (license license:gpl3+)))
7476
7477 (define-public ngshmmalign
7478 (package
7479 (name "ngshmmalign")
7480 (version "0.1.1")
7481 (source
7482 (origin
7483 (method url-fetch)
7484 (uri (string-append "https://github.com/cbg-ethz/ngshmmalign/"
7485 "releases/download/" version
7486 "/ngshmmalign-" version ".tar.bz2"))
7487 (sha256
7488 (base32
7489 "0jryvlssi2r2ii1dxnx39yk6bh4yqgq010fnxrgfgbaj3ykijlzv"))))
7490 (build-system cmake-build-system)
7491 (arguments '(#:tests? #false)) ; there are none
7492 (inputs
7493 `(("boost" ,boost)))
7494 (home-page "https://github.com/cbg-ethz/ngshmmalign/")
7495 (synopsis "Profile HMM aligner for NGS reads")
7496 (description
7497 "ngshmmalign is a profile HMM aligner for NGS reads designed particularly
7498 for small genomes (such as those of RNA viruses like HIV-1 and HCV) that
7499 experience substantial biological insertions and deletions.")
7500 (license license:gpl2+)))
7501
7502 (define-public prinseq
7503 (package
7504 (name "prinseq")
7505 (version "0.20.4")
7506 (source
7507 (origin
7508 (method url-fetch)
7509 (uri (string-append "mirror://sourceforge/prinseq/standalone/"
7510 "prinseq-lite-" version ".tar.gz"))
7511 (sha256
7512 (base32
7513 "0vxmzvmm67whxrqdaaamwgjk7cf0fzfs5s673jgg00kz7g70splv"))))
7514 (build-system gnu-build-system)
7515 (arguments
7516 `(#:tests? #false ; no check target
7517 #:phases
7518 (modify-phases %standard-phases
7519 (delete 'configure)
7520 (delete 'build)
7521 (replace 'install
7522 (lambda* (#:key outputs #:allow-other-keys)
7523 (let* ((out (assoc-ref outputs "out"))
7524 (bin (string-append out "/bin")))
7525 (for-each (lambda (file)
7526 (chmod file #o555)
7527 (install-file file bin)
7528 (wrap-script (string-append bin "/" (basename file))
7529 `("PERL5LIB" ":" prefix
7530 (,(getenv "PERL5LIB")))))
7531 (find-files "." "prinseq.*.pl"))))))))
7532 (inputs
7533 `(("guile" ,guile-3.0) ; for wrapper scripts
7534 ("perl" ,perl)
7535 ("perl-cairo" ,perl-cairo)
7536 ("perl-data-dumper" ,perl-data-dumper)
7537 ("perl-digest-md5" ,perl-digest-md5)
7538 ("perl-getopt-long" ,perl-getopt-long)
7539 ("perl-json" ,perl-json)
7540 ("perl-statistics-pca" ,perl-statistics-pca)))
7541 (home-page "http://prinseq.sourceforge.net/")
7542 (synopsis "Preprocess sequence data in FASTA or FASTQ formats")
7543 (description
7544 "PRINSEQ is a bioinformatics tool to help you preprocess your genomic or
7545 metagenomic sequence data in FASTA or FASTQ formats. The tool is written in
7546 Perl and can be helpful if you want to filter, reformat, or trim your sequence
7547 data. It also generates basic statistics for your sequences.")
7548 (license license:gpl3+)))
7549
7550 (define-public ruby-bio-kseq
7551 (package
7552 (name "ruby-bio-kseq")
7553 (version "0.0.2")
7554 (source
7555 (origin
7556 (method url-fetch)
7557 (uri (rubygems-uri "bio-kseq" version))
7558 (sha256
7559 (base32
7560 "1xyaha46khb5jc6wzkbf7040jagac49jbimn0vcrzid0j8jdikrz"))))
7561 (build-system ruby-build-system)
7562 (arguments
7563 `(#:test-target "spec"))
7564 (native-inputs
7565 `(("bundler" ,bundler)
7566 ("ruby-rspec" ,ruby-rspec)
7567 ("ruby-rake-compiler" ,ruby-rake-compiler)))
7568 (inputs
7569 `(("zlib" ,zlib)))
7570 (synopsis "Ruby bindings for the kseq.h FASTA/Q parser")
7571 (description
7572 "@code{Bio::Kseq} provides ruby bindings to the @code{kseq.h} FASTA and
7573 FASTQ parsing code. It provides a fast iterator over sequences and their
7574 quality scores.")
7575 (home-page "https://github.com/gusevfe/bio-kseq")
7576 (license license:expat)))
7577
7578 (define-public bio-locus
7579 (package
7580 (name "bio-locus")
7581 (version "0.0.7")
7582 (source
7583 (origin
7584 (method url-fetch)
7585 (uri (rubygems-uri "bio-locus" version))
7586 (sha256
7587 (base32
7588 "02vmrxyimkj9sahsp4zhfhnmbvz6dbbqz1y01vglf8cbwvkajfl0"))))
7589 (build-system ruby-build-system)
7590 (native-inputs
7591 `(("ruby-rspec" ,ruby-rspec)))
7592 (synopsis "Tool for fast querying of genome locations")
7593 (description
7594 "Bio-locus is a tabix-like tool for fast querying of genome
7595 locations. Many file formats in bioinformatics contain records that
7596 start with a chromosome name and a position for a SNP, or a start-end
7597 position for indels. Bio-locus allows users to store this chr+pos or
7598 chr+pos+alt information in a database.")
7599 (home-page "https://github.com/pjotrp/bio-locus")
7600 (license license:expat)))
7601
7602 (define-public bio-blastxmlparser
7603 (package
7604 (name "bio-blastxmlparser")
7605 (version "2.0.4")
7606 (source (origin
7607 (method url-fetch)
7608 (uri (rubygems-uri "bio-blastxmlparser" version))
7609 (sha256
7610 (base32
7611 "1wf4qygcmdjgcqm6flmvsagfr1gs9lf63mj32qv3z1f481zc5692"))))
7612 (build-system ruby-build-system)
7613 (propagated-inputs
7614 `(("ruby-bio-logger" ,ruby-bio-logger)
7615 ("ruby-nokogiri" ,ruby-nokogiri)))
7616 (inputs
7617 `(("ruby-rspec" ,ruby-rspec)))
7618 (synopsis "Fast big data BLAST XML parser and library")
7619 (description
7620 "Very fast parallel big-data BLAST XML file parser which can be used as
7621 command line utility. Use blastxmlparser to: Parse BLAST XML; filter output;
7622 generate FASTA, JSON, YAML, RDF, JSON-LD, HTML, CSV, tabular output etc.")
7623 (home-page "https://github.com/pjotrp/blastxmlparser")
7624 (license license:expat)))
7625
7626 (define-public bioruby
7627 (package
7628 (name "bioruby")
7629 (version "1.5.2")
7630 (source
7631 (origin
7632 (method url-fetch)
7633 (uri (rubygems-uri "bio" version))
7634 (sha256
7635 (base32
7636 "1d56amdsjv1mag7m6gv2w0xij8hqx1v5xbdjsix8sp3yp36m7938"))))
7637 (build-system ruby-build-system)
7638 (propagated-inputs
7639 `(("ruby-libxml" ,ruby-libxml)))
7640 (native-inputs
7641 `(("which" ,which))) ; required for test phase
7642 (arguments
7643 `(#:phases
7644 (modify-phases %standard-phases
7645 (add-before 'build 'patch-test-command
7646 (lambda _
7647 (substitute* '("test/functional/bio/test_command.rb")
7648 (("/bin/sh") (which "sh")))
7649 (substitute* '("test/functional/bio/test_command.rb")
7650 (("/bin/ls") (which "ls")))
7651 (substitute* '("test/functional/bio/test_command.rb")
7652 (("which") (which "which")))
7653 (substitute* '("test/functional/bio/test_command.rb",
7654 "test/data/command/echoarg2.sh")
7655 (("/bin/echo") (which "echo")))
7656 #t)))))
7657 (synopsis "Ruby library, shell and utilities for bioinformatics")
7658 (description "BioRuby comes with a comprehensive set of Ruby development
7659 tools and libraries for bioinformatics and molecular biology. BioRuby has
7660 components for sequence analysis, pathway analysis, protein modelling and
7661 phylogenetic analysis; it supports many widely used data formats and provides
7662 easy access to databases, external programs and public web services, including
7663 BLAST, KEGG, GenBank, MEDLINE and GO.")
7664 (home-page "http://bioruby.org/")
7665 ;; Code is released under Ruby license, except for setup
7666 ;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+)
7667 (license (list license:ruby license:lgpl2.1+ license:gpl2+ ))))
7668
7669 (define-public bio-vcf
7670 (package
7671 (name "bio-vcf")
7672 (version "0.9.5")
7673 (source
7674 (origin
7675 (method url-fetch)
7676 (uri (rubygems-uri "bio-vcf" version))
7677 (sha256
7678 (base32
7679 "1glw5pn9s8z13spxk6yyfqaz80n9lga67f33w35nkpq9dwi2vg6g"))))
7680 (build-system ruby-build-system)
7681 (native-inputs
7682 `(("ruby-cucumber" ,ruby-cucumber)))
7683 (synopsis "Smart VCF parser DSL")
7684 (description
7685 "Bio-vcf provides a @acronym{DSL, domain specific language} for processing
7686 the VCF format. Record named fields can be queried with regular expressions.
7687 Bio-vcf is a new generation VCF parser, filter and converter. Bio-vcf is not
7688 only very fast for genome-wide (WGS) data, it also comes with a filtering,
7689 evaluation and rewrite language and can output any type of textual data,
7690 including VCF header and contents in RDF and JSON.")
7691 (home-page "https://github.com/vcflib/bio-vcf")
7692 (license license:expat)))
7693
7694 (define-public r-edger
7695 (package
7696 (name "r-edger")
7697 (version "3.32.1")
7698 (source (origin
7699 (method url-fetch)
7700 (uri (bioconductor-uri "edgeR" version))
7701 (sha256
7702 (base32
7703 "1gaic8qf6a6sy0bmydh1xzf52w0wnq31aanpvw3a30pfsi218bcp"))))
7704 (properties `((upstream-name . "edgeR")))
7705 (build-system r-build-system)
7706 (propagated-inputs
7707 `(("r-limma" ,r-limma)
7708 ("r-locfit" ,r-locfit)
7709 ("r-rcpp" ,r-rcpp)
7710 ("r-statmod" ,r-statmod))) ;for estimateDisp
7711 (home-page "http://bioinf.wehi.edu.au/edgeR")
7712 (synopsis "EdgeR does empirical analysis of digital gene expression data")
7713 (description "This package can do differential expression analysis of
7714 RNA-seq expression profiles with biological replication. It implements a range
7715 of statistical methodology based on the negative binomial distributions,
7716 including empirical Bayes estimation, exact tests, generalized linear models
7717 and quasi-likelihood tests. It be applied to differential signal analysis of
7718 other types of genomic data that produce counts, including ChIP-seq, SAGE and
7719 CAGE.")
7720 (license license:gpl2+)))
7721
7722 (define-public r-variantannotation
7723 (package
7724 (name "r-variantannotation")
7725 (version "1.36.0")
7726 (source (origin
7727 (method url-fetch)
7728 (uri (bioconductor-uri "VariantAnnotation" version))
7729 (sha256
7730 (base32
7731 "1sl0l6v05lfglj281nszma0h5k234md7rn2pdah8vs2d4iq3kimw"))))
7732 (properties
7733 `((upstream-name . "VariantAnnotation")))
7734 (propagated-inputs
7735 `(("r-annotationdbi" ,r-annotationdbi)
7736 ("r-biobase" ,r-biobase)
7737 ("r-biocgenerics" ,r-biocgenerics)
7738 ("r-biostrings" ,r-biostrings)
7739 ("r-bsgenome" ,r-bsgenome)
7740 ("r-dbi" ,r-dbi)
7741 ("r-genomeinfodb" ,r-genomeinfodb)
7742 ("r-genomicfeatures" ,r-genomicfeatures)
7743 ("r-genomicranges" ,r-genomicranges)
7744 ("r-iranges" ,r-iranges)
7745 ("r-matrixgenerics" ,r-matrixgenerics)
7746 ("r-summarizedexperiment" ,r-summarizedexperiment)
7747 ("r-rhtslib" ,r-rhtslib)
7748 ("r-rsamtools" ,r-rsamtools)
7749 ("r-rtracklayer" ,r-rtracklayer)
7750 ("r-s4vectors" ,r-s4vectors)
7751 ("r-xvector" ,r-xvector)
7752 ("r-zlibbioc" ,r-zlibbioc)))
7753 (build-system r-build-system)
7754 (home-page "https://bioconductor.org/packages/VariantAnnotation")
7755 (synopsis "Package for annotation of genetic variants")
7756 (description "This R package can annotate variants, compute amino acid
7757 coding changes and predict coding outcomes.")
7758 (license license:artistic2.0)))
7759
7760 (define-public r-limma
7761 (package
7762 (name "r-limma")
7763 (version "3.46.0")
7764 (source (origin
7765 (method url-fetch)
7766 (uri (bioconductor-uri "limma" version))
7767 (sha256
7768 (base32
7769 "1xxv493q1kip9bjfv7v7k5dnq7hz7gvl80i983v4mvwavhgnbxfz"))))
7770 (build-system r-build-system)
7771 (home-page "http://bioinf.wehi.edu.au/limma")
7772 (synopsis "Package for linear models for microarray and RNA-seq data")
7773 (description "This package can be used for the analysis of gene expression
7774 studies, especially the use of linear models for analysing designed experiments
7775 and the assessment of differential expression. The analysis methods apply to
7776 different technologies, including microarrays, RNA-seq, and quantitative PCR.")
7777 (license license:gpl2+)))
7778
7779 (define-public r-xvector
7780 (package
7781 (name "r-xvector")
7782 (version "0.30.0")
7783 (source (origin
7784 (method url-fetch)
7785 (uri (bioconductor-uri "XVector" version))
7786 (sha256
7787 (base32
7788 "1pqljikg4f6jb7wgm5537zwgq5b013nyz1agjrwfq2cljb0ym6lq"))))
7789 (properties
7790 `((upstream-name . "XVector")))
7791 (build-system r-build-system)
7792 (arguments
7793 `(#:phases
7794 (modify-phases %standard-phases
7795 (add-after 'unpack 'use-system-zlib
7796 (lambda _
7797 (substitute* "DESCRIPTION"
7798 (("zlibbioc, ") ""))
7799 (substitute* "NAMESPACE"
7800 (("import\\(zlibbioc\\)") ""))
7801 #t)))))
7802 (inputs
7803 `(("zlib" ,zlib)))
7804 (propagated-inputs
7805 `(("r-biocgenerics" ,r-biocgenerics)
7806 ("r-iranges" ,r-iranges)
7807 ("r-s4vectors" ,r-s4vectors)))
7808 (home-page "https://bioconductor.org/packages/XVector")
7809 (synopsis "Representation and manpulation of external sequences")
7810 (description
7811 "This package provides memory efficient S4 classes for storing sequences
7812 \"externally\" (behind an R external pointer, or on disk).")
7813 (license license:artistic2.0)))
7814
7815 (define-public r-genomicranges
7816 (package
7817 (name "r-genomicranges")
7818 (version "1.42.0")
7819 (source (origin
7820 (method url-fetch)
7821 (uri (bioconductor-uri "GenomicRanges" version))
7822 (sha256
7823 (base32
7824 "0j4py5g6pdj35xhlaqhxxhg55j9l4mcdk3yck4dgyavv5f2dh24i"))))
7825 (properties
7826 `((upstream-name . "GenomicRanges")))
7827 (build-system r-build-system)
7828 (propagated-inputs
7829 `(("r-biocgenerics" ,r-biocgenerics)
7830 ("r-genomeinfodb" ,r-genomeinfodb)
7831 ("r-iranges" ,r-iranges)
7832 ("r-s4vectors" ,r-s4vectors)
7833 ("r-xvector" ,r-xvector)))
7834 (native-inputs
7835 `(("r-knitr" ,r-knitr)))
7836 (home-page "https://bioconductor.org/packages/GenomicRanges")
7837 (synopsis "Representation and manipulation of genomic intervals")
7838 (description
7839 "This package provides tools to efficiently represent and manipulate
7840 genomic annotations and alignments is playing a central role when it comes to
7841 analyzing high-throughput sequencing data (a.k.a. NGS data). The
7842 GenomicRanges package defines general purpose containers for storing and
7843 manipulating genomic intervals and variables defined along a genome.")
7844 (license license:artistic2.0)))
7845
7846 (define-public r-biobase
7847 (package
7848 (name "r-biobase")
7849 (version "2.50.0")
7850 (source (origin
7851 (method url-fetch)
7852 (uri (bioconductor-uri "Biobase" version))
7853 (sha256
7854 (base32
7855 "11kgc4flywlm3i18603558l8ksv91c24vkc5fnnbcd375i2dhhd4"))))
7856 (properties
7857 `((upstream-name . "Biobase")))
7858 (build-system r-build-system)
7859 (propagated-inputs
7860 `(("r-biocgenerics" ,r-biocgenerics)))
7861 (home-page "https://bioconductor.org/packages/Biobase")
7862 (synopsis "Base functions for Bioconductor")
7863 (description
7864 "This package provides functions that are needed by many other packages
7865 on Bioconductor or which replace R functions.")
7866 (license license:artistic2.0)))
7867
7868 (define-public r-annotationdbi
7869 (package
7870 (name "r-annotationdbi")
7871 (version "1.52.0")
7872 (source (origin
7873 (method url-fetch)
7874 (uri (bioconductor-uri "AnnotationDbi" version))
7875 (sha256
7876 (base32
7877 "0zqxgh3nx6y8ry12s2vss2f4axz5vpqxha1y4ifhhcx4zhpzsglr"))))
7878 (properties
7879 `((upstream-name . "AnnotationDbi")))
7880 (build-system r-build-system)
7881 (propagated-inputs
7882 `(("r-biobase" ,r-biobase)
7883 ("r-biocgenerics" ,r-biocgenerics)
7884 ("r-dbi" ,r-dbi)
7885 ("r-iranges" ,r-iranges)
7886 ("r-rsqlite" ,r-rsqlite)
7887 ("r-s4vectors" ,r-s4vectors)))
7888 (native-inputs
7889 `(("r-knitr" ,r-knitr)))
7890 (home-page "https://bioconductor.org/packages/AnnotationDbi")
7891 (synopsis "Annotation database interface")
7892 (description
7893 "This package provides user interface and database connection code for
7894 annotation data packages using SQLite data storage.")
7895 (license license:artistic2.0)))
7896
7897 (define-public r-biomart
7898 (package
7899 (name "r-biomart")
7900 (version "2.46.3")
7901 (source (origin
7902 (method url-fetch)
7903 (uri (bioconductor-uri "biomaRt" version))
7904 (sha256
7905 (base32
7906 "0gwmd0ykpv0gyh34c56g5m12lil20fvig49f3ih1jxrxf3q4wmq7"))))
7907 (properties
7908 `((upstream-name . "biomaRt")))
7909 (build-system r-build-system)
7910 (propagated-inputs
7911 `(("r-annotationdbi" ,r-annotationdbi)
7912 ("r-biocfilecache" ,r-biocfilecache)
7913 ("r-httr" ,r-httr)
7914 ("r-openssl" ,r-openssl)
7915 ("r-progress" ,r-progress)
7916 ("r-rappdirs" ,r-rappdirs)
7917 ("r-stringr" ,r-stringr)
7918 ("r-xml" ,r-xml)
7919 ("r-xml2" ,r-xml2)))
7920 (native-inputs
7921 `(("r-knitr" ,r-knitr)))
7922 (home-page "https://bioconductor.org/packages/biomaRt")
7923 (synopsis "Interface to BioMart databases")
7924 (description
7925 "biomaRt provides an interface to a growing collection of databases
7926 implementing the @url{BioMart software suite, http://www.biomart.org}. The
7927 package enables retrieval of large amounts of data in a uniform way without
7928 the need to know the underlying database schemas or write complex SQL queries.
7929 Examples of BioMart databases are Ensembl, COSMIC, Uniprot, HGNC, Gramene,
7930 Wormbase and dbSNP mapped to Ensembl. These major databases give biomaRt
7931 users direct access to a diverse set of data and enable a wide range of
7932 powerful online queries from gene annotation to database mining.")
7933 (license license:artistic2.0)))
7934
7935 (define-public r-biocparallel
7936 (package
7937 (name "r-biocparallel")
7938 (version "1.24.1")
7939 (source (origin
7940 (method url-fetch)
7941 (uri (bioconductor-uri "BiocParallel" version))
7942 (sha256
7943 (base32
7944 "1iryicvmcagcrj29kp49mqhiq2kn72j4idj380hi9illmdrg9ism"))))
7945 (properties
7946 `((upstream-name . "BiocParallel")))
7947 (build-system r-build-system)
7948 (propagated-inputs
7949 `(("r-futile-logger" ,r-futile-logger)
7950 ("r-snow" ,r-snow)
7951 ("r-bh" ,r-bh)))
7952 (native-inputs
7953 `(("r-knitr" ,r-knitr)))
7954 (home-page "https://bioconductor.org/packages/BiocParallel")
7955 (synopsis "Bioconductor facilities for parallel evaluation")
7956 (description
7957 "This package provides modified versions and novel implementation of
7958 functions for parallel evaluation, tailored to use with Bioconductor
7959 objects.")
7960 (license (list license:gpl2+ license:gpl3+))))
7961
7962 (define-public r-biostrings
7963 (package
7964 (name "r-biostrings")
7965 (version "2.58.0")
7966 (source (origin
7967 (method url-fetch)
7968 (uri (bioconductor-uri "Biostrings" version))
7969 (sha256
7970 (base32
7971 "1rbqhs73mhfr1gi0rx28jiyan7i3hb45ai3jpl1656fnrhgjfxq5"))))
7972 (properties
7973 `((upstream-name . "Biostrings")))
7974 (build-system r-build-system)
7975 (propagated-inputs
7976 `(("r-biocgenerics" ,r-biocgenerics)
7977 ("r-crayon" ,r-crayon)
7978 ("r-iranges" ,r-iranges)
7979 ("r-s4vectors" ,r-s4vectors)
7980 ("r-xvector" ,r-xvector)))
7981 (home-page "https://bioconductor.org/packages/Biostrings")
7982 (synopsis "String objects and algorithms for biological sequences")
7983 (description
7984 "This package provides memory efficient string containers, string
7985 matching algorithms, and other utilities, for fast manipulation of large
7986 biological sequences or sets of sequences.")
7987 (license license:artistic2.0)))
7988
7989 (define-public r-rsamtools
7990 (package
7991 (name "r-rsamtools")
7992 (version "2.6.0")
7993 (source (origin
7994 (method url-fetch)
7995 (uri (bioconductor-uri "Rsamtools" version))
7996 (sha256
7997 (base32
7998 "040pggkwglc6wy90qnc7xcdnaj0v3iqlykvvsl74241409qly554"))))
7999 (properties
8000 `((upstream-name . "Rsamtools")))
8001 (build-system r-build-system)
8002 (arguments
8003 `(#:phases
8004 (modify-phases %standard-phases
8005 (add-after 'unpack 'use-system-zlib
8006 (lambda _
8007 (substitute* "DESCRIPTION"
8008 (("zlibbioc, ") ""))
8009 (substitute* "NAMESPACE"
8010 (("import\\(zlibbioc\\)") ""))
8011 #t)))))
8012 (propagated-inputs
8013 `(("r-biocgenerics" ,r-biocgenerics)
8014 ("r-biocparallel" ,r-biocparallel)
8015 ("r-biostrings" ,r-biostrings)
8016 ("r-bitops" ,r-bitops)
8017 ("r-genomeinfodb" ,r-genomeinfodb)
8018 ("r-genomicranges" ,r-genomicranges)
8019 ("r-iranges" ,r-iranges)
8020 ("r-rhtslib" ,r-rhtslib)
8021 ("r-s4vectors" ,r-s4vectors)
8022 ("r-xvector" ,r-xvector)))
8023 (home-page "https://bioconductor.org/packages/release/bioc/html/Rsamtools.html")
8024 (synopsis "Interface to samtools, bcftools, and tabix")
8025 (description
8026 "This package provides an interface to the @code{samtools},
8027 @code{bcftools}, and @code{tabix} utilities for manipulating SAM (Sequence
8028 Alignment / Map), FASTA, binary variant call (BCF) and compressed indexed
8029 tab-delimited (tabix) files.")
8030 (license license:expat)))
8031
8032 (define-public r-delayedarray
8033 (package
8034 (name "r-delayedarray")
8035 (version "0.16.1")
8036 (source (origin
8037 (method url-fetch)
8038 (uri (bioconductor-uri "DelayedArray" version))
8039 (sha256
8040 (base32
8041 "1d75zrhha1v7dhbvjp6a4iap441l5k268w0jjxklpqywbqns7l3d"))))
8042 (properties
8043 `((upstream-name . "DelayedArray")))
8044 (build-system r-build-system)
8045 (propagated-inputs
8046 `(("r-biocgenerics" ,r-biocgenerics)
8047 ("r-s4vectors" ,r-s4vectors)
8048 ("r-iranges" ,r-iranges)
8049 ("r-matrix" ,r-matrix)
8050 ("r-matrixgenerics" ,r-matrixgenerics)))
8051 (native-inputs
8052 `(("r-knitr" ,r-knitr)))
8053 (home-page "https://bioconductor.org/packages/DelayedArray")
8054 (synopsis "Delayed operations on array-like objects")
8055 (description
8056 "Wrapping an array-like object (typically an on-disk object) in a
8057 @code{DelayedArray} object allows one to perform common array operations on it
8058 without loading the object in memory. In order to reduce memory usage and
8059 optimize performance, operations on the object are either delayed or executed
8060 using a block processing mechanism. Note that this also works on in-memory
8061 array-like objects like @code{DataFrame} objects (typically with Rle columns),
8062 @code{Matrix} objects, and ordinary arrays and data frames.")
8063 (license license:artistic2.0)))
8064
8065 (define-public r-summarizedexperiment
8066 (package
8067 (name "r-summarizedexperiment")
8068 (version "1.20.0")
8069 (source (origin
8070 (method url-fetch)
8071 (uri (bioconductor-uri "SummarizedExperiment" version))
8072 (sha256
8073 (base32
8074 "04x6d4mcsnvz6glkmf6k2cv3fs8zk03i9rvv0ahpl793n8l411ps"))))
8075 (properties
8076 `((upstream-name . "SummarizedExperiment")))
8077 (build-system r-build-system)
8078 (propagated-inputs
8079 `(("r-biobase" ,r-biobase)
8080 ("r-biocgenerics" ,r-biocgenerics)
8081 ("r-delayedarray" ,r-delayedarray)
8082 ("r-genomeinfodb" ,r-genomeinfodb)
8083 ("r-genomicranges" ,r-genomicranges)
8084 ("r-iranges" ,r-iranges)
8085 ("r-matrix" ,r-matrix)
8086 ("r-matrixgenerics" ,r-matrixgenerics)
8087 ("r-s4vectors" ,r-s4vectors)))
8088 (native-inputs
8089 `(("r-knitr" ,r-knitr)))
8090 (home-page "https://bioconductor.org/packages/SummarizedExperiment")
8091 (synopsis "Container for representing genomic ranges by sample")
8092 (description
8093 "The SummarizedExperiment container contains one or more assays, each
8094 represented by a matrix-like object of numeric or other mode. The rows
8095 typically represent genomic ranges of interest and the columns represent
8096 samples.")
8097 (license license:artistic2.0)))
8098
8099 (define-public r-genomicalignments
8100 (package
8101 (name "r-genomicalignments")
8102 (version "1.26.0")
8103 (source (origin
8104 (method url-fetch)
8105 (uri (bioconductor-uri "GenomicAlignments" version))
8106 (sha256
8107 (base32
8108 "1q95px6s6snsax4ax955zzpdlrwp5liwf70wqq0lrk9mp6lq0hbr"))))
8109 (properties
8110 `((upstream-name . "GenomicAlignments")))
8111 (build-system r-build-system)
8112 (propagated-inputs
8113 `(("r-biocgenerics" ,r-biocgenerics)
8114 ("r-biocparallel" ,r-biocparallel)
8115 ("r-biostrings" ,r-biostrings)
8116 ("r-genomeinfodb" ,r-genomeinfodb)
8117 ("r-genomicranges" ,r-genomicranges)
8118 ("r-iranges" ,r-iranges)
8119 ("r-rsamtools" ,r-rsamtools)
8120 ("r-s4vectors" ,r-s4vectors)
8121 ("r-summarizedexperiment" ,r-summarizedexperiment)))
8122 (home-page "https://bioconductor.org/packages/GenomicAlignments")
8123 (synopsis "Representation and manipulation of short genomic alignments")
8124 (description
8125 "This package provides efficient containers for storing and manipulating
8126 short genomic alignments (typically obtained by aligning short reads to a
8127 reference genome). This includes read counting, computing the coverage,
8128 junction detection, and working with the nucleotide content of the
8129 alignments.")
8130 (license license:artistic2.0)))
8131
8132 (define-public r-rtracklayer
8133 (package
8134 (name "r-rtracklayer")
8135 (version "1.50.0")
8136 (source (origin
8137 (method url-fetch)
8138 (uri (bioconductor-uri "rtracklayer" version))
8139 (sha256
8140 (base32
8141 "12zimhpdzjyzd81wrzz5hdbzvlgzcs22x1nnaf2jq4cba3ch5px8"))))
8142 (build-system r-build-system)
8143 (arguments
8144 `(#:phases
8145 (modify-phases %standard-phases
8146 (add-after 'unpack 'use-system-zlib
8147 (lambda _
8148 (substitute* "DESCRIPTION"
8149 ((" zlibbioc,") ""))
8150 (substitute* "NAMESPACE"
8151 (("import\\(zlibbioc\\)") ""))
8152 #t)))))
8153 (native-inputs
8154 `(("pkg-config" ,pkg-config)))
8155 (inputs
8156 `(("zlib" ,zlib)))
8157 (propagated-inputs
8158 `(("r-biocgenerics" ,r-biocgenerics)
8159 ("r-biostrings" ,r-biostrings)
8160 ("r-genomeinfodb" ,r-genomeinfodb)
8161 ("r-genomicalignments" ,r-genomicalignments)
8162 ("r-genomicranges" ,r-genomicranges)
8163 ("r-iranges" ,r-iranges)
8164 ("r-rcurl" ,r-rcurl)
8165 ("r-rsamtools" ,r-rsamtools)
8166 ("r-s4vectors" ,r-s4vectors)
8167 ("r-xml" ,r-xml)
8168 ("r-xvector" ,r-xvector)
8169 ("r-zlibbioc" ,r-zlibbioc)))
8170 (home-page "https://bioconductor.org/packages/rtracklayer")
8171 (synopsis "R interface to genome browsers and their annotation tracks")
8172 (description
8173 "rtracklayer is an extensible framework for interacting with multiple
8174 genome browsers (currently UCSC built-in) and manipulating annotation tracks
8175 in various formats (currently GFF, BED, bedGraph, BED15, WIG, BigWig and 2bit
8176 built-in). The user may export/import tracks to/from the supported browsers,
8177 as well as query and modify the browser state, such as the current viewport.")
8178 (license license:artistic2.0)))
8179
8180 (define-public r-genomicfeatures
8181 (package
8182 (name "r-genomicfeatures")
8183 (version "1.42.1")
8184 (source (origin
8185 (method url-fetch)
8186 (uri (bioconductor-uri "GenomicFeatures" version))
8187 (sha256
8188 (base32
8189 "17dyd9hcw6pw16y353dh55wfhxmkxka99lbsxsp9xyrhffwrxi0s"))))
8190 (properties
8191 `((upstream-name . "GenomicFeatures")))
8192 (build-system r-build-system)
8193 (propagated-inputs
8194 `(("r-annotationdbi" ,r-annotationdbi)
8195 ("r-biobase" ,r-biobase)
8196 ("r-biocgenerics" ,r-biocgenerics)
8197 ("r-biomart" ,r-biomart)
8198 ("r-biostrings" ,r-biostrings)
8199 ("r-dbi" ,r-dbi)
8200 ("r-genomeinfodb" ,r-genomeinfodb)
8201 ("r-genomicranges" ,r-genomicranges)
8202 ("r-iranges" ,r-iranges)
8203 ("r-rcurl" ,r-rcurl)
8204 ("r-rsqlite" ,r-rsqlite)
8205 ("r-rtracklayer" ,r-rtracklayer)
8206 ("r-s4vectors" ,r-s4vectors)
8207 ("r-xvector" ,r-xvector)))
8208 (native-inputs
8209 `(("r-knitr" ,r-knitr)))
8210 (home-page "https://bioconductor.org/packages/GenomicFeatures")
8211 (synopsis "Tools for working with transcript centric annotations")
8212 (description
8213 "This package provides a set of tools and methods for making and
8214 manipulating transcript centric annotations. With these tools the user can
8215 easily download the genomic locations of the transcripts, exons and cds of a
8216 given organism, from either the UCSC Genome Browser or a BioMart
8217 database (more sources will be supported in the future). This information is
8218 then stored in a local database that keeps track of the relationship between
8219 transcripts, exons, cds and genes. Flexible methods are provided for
8220 extracting the desired features in a convenient format.")
8221 (license license:artistic2.0)))
8222
8223 (define-public r-go-db
8224 (package
8225 (name "r-go-db")
8226 (version "3.7.0")
8227 (source (origin
8228 (method url-fetch)
8229 (uri (string-append "https://www.bioconductor.org/packages/"
8230 "release/data/annotation/src/contrib/GO.db_"
8231 version ".tar.gz"))
8232 (sha256
8233 (base32
8234 "0i3wcf5h3n0dawzc1hy0kv74f06j80c47n4p3g3fmrcxlhi3jpa5"))))
8235 (properties
8236 `((upstream-name . "GO.db")))
8237 (build-system r-build-system)
8238 (propagated-inputs
8239 `(("r-annotationdbi" ,r-annotationdbi)))
8240 (home-page "https://bioconductor.org/packages/GO.db")
8241 (synopsis "Annotation maps describing the entire Gene Ontology")
8242 (description
8243 "The purpose of this GO.db annotation package is to provide detailed
8244 information about the latest version of the Gene Ontologies.")
8245 (license license:artistic2.0)))
8246
8247 (define-public r-topgo
8248 (package
8249 (name "r-topgo")
8250 (version "2.42.0")
8251 (source (origin
8252 (method url-fetch)
8253 (uri (bioconductor-uri "topGO" version))
8254 (sha256
8255 (base32
8256 "0vr3l9gvd3dhy446k3fkj6rm7z1abxi56rbnrs64297yzxaz1ngl"))))
8257 (properties
8258 `((upstream-name . "topGO")))
8259 (build-system r-build-system)
8260 (propagated-inputs
8261 `(("r-annotationdbi" ,r-annotationdbi)
8262 ("r-dbi" ,r-dbi)
8263 ("r-biobase" ,r-biobase)
8264 ("r-biocgenerics" ,r-biocgenerics)
8265 ("r-go-db" ,r-go-db)
8266 ("r-graph" ,r-graph)
8267 ("r-lattice" ,r-lattice)
8268 ("r-matrixstats" ,r-matrixstats)
8269 ("r-sparsem" ,r-sparsem)))
8270 (home-page "https://bioconductor.org/packages/topGO")
8271 (synopsis "Enrichment analysis for gene ontology")
8272 (description
8273 "The topGO package provides tools for testing @dfn{gene ontology} (GO)
8274 terms while accounting for the topology of the GO graph. Different test
8275 statistics and different methods for eliminating local similarities and
8276 dependencies between GO terms can be implemented and applied.")
8277 ;; Any version of the LGPL applies.
8278 (license license:lgpl2.1+)))
8279
8280 (define-public r-bsgenome
8281 (package
8282 (name "r-bsgenome")
8283 (version "1.58.0")
8284 (source (origin
8285 (method url-fetch)
8286 (uri (bioconductor-uri "BSgenome" version))
8287 (sha256
8288 (base32
8289 "1gbvmxr6r57smgvhqgwspbcnwyk4hsfkxkpzzcs6470q03zfb4wq"))))
8290 (properties
8291 `((upstream-name . "BSgenome")))
8292 (build-system r-build-system)
8293 (propagated-inputs
8294 `(("r-biocgenerics" ,r-biocgenerics)
8295 ("r-biostrings" ,r-biostrings)
8296 ("r-genomeinfodb" ,r-genomeinfodb)
8297 ("r-genomicranges" ,r-genomicranges)
8298 ("r-iranges" ,r-iranges)
8299 ("r-matrixstats" ,r-matrixstats)
8300 ("r-rsamtools" ,r-rsamtools)
8301 ("r-rtracklayer" ,r-rtracklayer)
8302 ("r-s4vectors" ,r-s4vectors)
8303 ("r-xvector" ,r-xvector)))
8304 (home-page "https://bioconductor.org/packages/BSgenome")
8305 (synopsis "Infrastructure for Biostrings-based genome data packages")
8306 (description
8307 "This package provides infrastructure shared by all Biostrings-based
8308 genome data packages and support for efficient SNP representation.")
8309 (license license:artistic2.0)))
8310
8311 (define-public r-impute
8312 (package
8313 (name "r-impute")
8314 (version "1.64.0")
8315 (source (origin
8316 (method url-fetch)
8317 (uri (bioconductor-uri "impute" version))
8318 (sha256
8319 (base32
8320 "1pnjasw9i19nmxwjzrd9jbln31yc5jilfvwk414ya5zbqfsazvxa"))))
8321 (native-inputs
8322 `(("gfortran" ,gfortran)))
8323 (build-system r-build-system)
8324 (home-page "https://bioconductor.org/packages/impute")
8325 (synopsis "Imputation for microarray data")
8326 (description
8327 "This package provides a function to impute missing gene expression
8328 microarray data, using nearest neighbor averaging.")
8329 (license license:gpl2+)))
8330
8331 (define-public r-seqpattern
8332 (package
8333 (name "r-seqpattern")
8334 (version "1.22.0")
8335 (source (origin
8336 (method url-fetch)
8337 (uri (bioconductor-uri "seqPattern" version))
8338 (sha256
8339 (base32
8340 "0j68n6fwycxjpl2va5fw7ajb123n758s2pq997d76dysxghmrlzq"))))
8341 (properties
8342 `((upstream-name . "seqPattern")))
8343 (build-system r-build-system)
8344 (propagated-inputs
8345 `(("r-biostrings" ,r-biostrings)
8346 ("r-genomicranges" ,r-genomicranges)
8347 ("r-iranges" ,r-iranges)
8348 ("r-kernsmooth" ,r-kernsmooth)
8349 ("r-plotrix" ,r-plotrix)))
8350 (home-page "https://bioconductor.org/packages/seqPattern")
8351 (synopsis "Visualising oligonucleotide patterns and motif occurrences")
8352 (description
8353 "This package provides tools to visualize oligonucleotide patterns and
8354 sequence motif occurrences across a large set of sequences centred at a common
8355 reference point and sorted by a user defined feature.")
8356 (license license:gpl3+)))
8357
8358 (define-public r-genomation
8359 (package
8360 (name "r-genomation")
8361 (version "1.22.0")
8362 (source (origin
8363 (method url-fetch)
8364 (uri (bioconductor-uri "genomation" version))
8365 (sha256
8366 (base32
8367 "1ana06irlpdgnmk8mb329nws9sm8n6max4qargf1xdcdf3rnk45g"))))
8368 (build-system r-build-system)
8369 (propagated-inputs
8370 `(("r-biostrings" ,r-biostrings)
8371 ("r-bsgenome" ,r-bsgenome)
8372 ("r-data-table" ,r-data-table)
8373 ("r-genomeinfodb" ,r-genomeinfodb)
8374 ("r-genomicalignments" ,r-genomicalignments)
8375 ("r-genomicranges" ,r-genomicranges)
8376 ("r-ggplot2" ,r-ggplot2)
8377 ("r-gridbase" ,r-gridbase)
8378 ("r-impute" ,r-impute)
8379 ("r-iranges" ,r-iranges)
8380 ("r-matrixstats" ,r-matrixstats)
8381 ("r-plotrix" ,r-plotrix)
8382 ("r-plyr" ,r-plyr)
8383 ("r-rcpp" ,r-rcpp)
8384 ("r-readr" ,r-readr)
8385 ("r-reshape2" ,r-reshape2)
8386 ("r-rsamtools" ,r-rsamtools)
8387 ("r-rtracklayer" ,r-rtracklayer)
8388 ("r-runit" ,r-runit)
8389 ("r-s4vectors" ,r-s4vectors)
8390 ("r-seqpattern" ,r-seqpattern)))
8391 (native-inputs
8392 `(("r-knitr" ,r-knitr)))
8393 (home-page "https://bioinformatics.mdc-berlin.de/genomation/")
8394 (synopsis "Summary, annotation and visualization of genomic data")
8395 (description
8396 "This package provides a package for summary and annotation of genomic
8397 intervals. Users can visualize and quantify genomic intervals over
8398 pre-defined functional regions, such as promoters, exons, introns, etc. The
8399 genomic intervals represent regions with a defined chromosome position, which
8400 may be associated with a score, such as aligned reads from HT-seq experiments,
8401 TF binding sites, methylation scores, etc. The package can use any tabular
8402 genomic feature data as long as it has minimal information on the locations of
8403 genomic intervals. In addition, it can use BAM or BigWig files as input.")
8404 (license license:artistic2.0)))
8405
8406 (define-public r-genomationdata
8407 (package
8408 (name "r-genomationdata")
8409 (version "1.22.0")
8410 (source (origin
8411 (method url-fetch)
8412 ;; We cannot use bioconductor-uri here because this tarball is
8413 ;; located under "data/annotation/" instead of "bioc/".
8414 (uri (string-append "https://bioconductor.org/packages/"
8415 "release/data/experiment/src/contrib/"
8416 "genomationData_" version ".tar.gz"))
8417 (sha256
8418 (base32
8419 "0igjsvfnws3498j65ifniw0kbxfqpfr59rcjddqvq4zsj453fx1g"))))
8420 (build-system r-build-system)
8421 ;; As this package provides little more than large data files, it doesn't
8422 ;; make sense to build substitutes.
8423 (arguments `(#:substitutable? #f))
8424 (native-inputs
8425 `(("r-knitr" ,r-knitr)))
8426 (home-page "https://bioinformatics.mdc-berlin.de/genomation/")
8427 (synopsis "Experimental data for use with the genomation package")
8428 (description
8429 "This package contains experimental genetic data for use with the
8430 genomation package. Included are Chip Seq, Methylation and Cage data,
8431 downloaded from Encode.")
8432 (license license:gpl3+)))
8433
8434 (define-public r-seqlogo
8435 (package
8436 (name "r-seqlogo")
8437 (version "1.56.0")
8438 (source
8439 (origin
8440 (method url-fetch)
8441 (uri (bioconductor-uri "seqLogo" version))
8442 (sha256
8443 (base32
8444 "02rpzjjfg5chlwwfbvv72cm78cg2vfmdwzars0cin9hz1hd7rnq1"))))
8445 (properties `((upstream-name . "seqLogo")))
8446 (build-system r-build-system)
8447 (native-inputs
8448 `(("r-knitr" ,r-knitr)))
8449 (home-page "https://bioconductor.org/packages/seqLogo")
8450 (synopsis "Sequence logos for DNA sequence alignments")
8451 (description
8452 "seqLogo takes the position weight matrix of a DNA sequence motif and
8453 plots the corresponding sequence logo as introduced by Schneider and
8454 Stephens (1990).")
8455 (license license:lgpl2.0+)))
8456
8457 (define-public r-motifrg
8458 (package
8459 (name "r-motifrg")
8460 (version "1.31.0")
8461 (source
8462 (origin
8463 (method url-fetch)
8464 (uri (bioconductor-uri "motifRG" version))
8465 (sha256
8466 (base32
8467 "1ml6zyzlk8yjbnfhga2qnw8nl43rankvka0kc1yljxr2b66aqbhn"))))
8468 (properties `((upstream-name . "motifRG")))
8469 (build-system r-build-system)
8470 (propagated-inputs
8471 `(("r-biostrings" ,r-biostrings)
8472 ("r-bsgenome" ,r-bsgenome)
8473 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8474 ("r-iranges" ,r-iranges)
8475 ("r-seqlogo" ,r-seqlogo)
8476 ("r-xvector" ,r-xvector)))
8477 (home-page "https://bioconductor.org/packages/motifRG")
8478 (synopsis "Discover motifs in high throughput sequencing data")
8479 (description
8480 "This package provides tools for discriminative motif discovery in high
8481 throughput genetic sequencing data sets using regression methods.")
8482 (license license:artistic2.0)))
8483
8484 (define-public r-zlibbioc
8485 (package
8486 (name "r-zlibbioc")
8487 (version "1.36.0")
8488 (source (origin
8489 (method url-fetch)
8490 (uri (bioconductor-uri "zlibbioc" version))
8491 (sha256
8492 (base32
8493 "0m36ddss0znvm19dhnxcclxjhgjplw8ajk8v419h20ab8an6khxg"))))
8494 (properties
8495 `((upstream-name . "zlibbioc")))
8496 (build-system r-build-system)
8497 (home-page "https://bioconductor.org/packages/zlibbioc")
8498 (synopsis "Provider for zlib-1.2.5 to R packages")
8499 (description "This package uses the source code of zlib-1.2.5 to create
8500 libraries for systems that do not have these available via other means.")
8501 (license license:artistic2.0)))
8502
8503 (define-public r-r4rna
8504 (package
8505 (name "r-r4rna")
8506 (version "0.1.4")
8507 (source
8508 (origin
8509 (method url-fetch)
8510 (uri (string-append "http://www.e-rna.org/r-chie/files/R4RNA_"
8511 version ".tar.gz"))
8512 (sha256
8513 (base32
8514 "1p0i78wh76jfgmn9jphbwwaz6yy6pipzfg08xs54cxavxg2j81p5"))))
8515 (build-system r-build-system)
8516 (propagated-inputs
8517 `(("r-optparse" ,r-optparse)
8518 ("r-rcolorbrewer" ,r-rcolorbrewer)))
8519 (home-page "https://www.e-rna.org/r-chie/index.cgi")
8520 (synopsis "Analysis framework for RNA secondary structure")
8521 (description
8522 "The R4RNA package aims to be a general framework for the analysis of RNA
8523 secondary structure and comparative analysis in R.")
8524 (license license:gpl3+)))
8525
8526 (define-public r-rhtslib
8527 (package
8528 (name "r-rhtslib")
8529 (version "1.22.0")
8530 (source
8531 (origin
8532 (method url-fetch)
8533 (uri (bioconductor-uri "Rhtslib" version))
8534 (sha256
8535 (base32
8536 "18wag2jnpda6078xjkpfdvar1gkb2myhw83gg03l39sabh35qya4"))))
8537 (properties `((upstream-name . "Rhtslib")))
8538 (build-system r-build-system)
8539 ;; Without this a temporary directory ends up in the Rhtslib.so binary,
8540 ;; which makes R abort the build.
8541 (arguments '(#:configure-flags '("--no-staged-install")))
8542 (propagated-inputs
8543 `(("curl" ,curl)
8544 ("zlib" ,zlib) ; packages using rhtslib need to link with zlib
8545 ("r-zlibbioc" ,r-zlibbioc)))
8546 (native-inputs
8547 `(("pkg-config" ,pkg-config)
8548 ("r-knitr" ,r-knitr)))
8549 (home-page "https://github.com/nhayden/Rhtslib")
8550 (synopsis "High-throughput sequencing library as an R package")
8551 (description
8552 "This package provides the HTSlib C library for high-throughput
8553 nucleotide sequence analysis. The package is primarily useful to developers
8554 of other R packages who wish to make use of HTSlib.")
8555 (license license:lgpl2.0+)))
8556
8557 (define-public r-bamsignals
8558 (package
8559 (name "r-bamsignals")
8560 (version "1.22.0")
8561 (source
8562 (origin
8563 (method url-fetch)
8564 (uri (bioconductor-uri "bamsignals" version))
8565 (sha256
8566 (base32
8567 "0p3r9z9z5sfkd0b951cgr751k4z0yviyn1jfw9d4fcnyld7g1jxv"))))
8568 (build-system r-build-system)
8569 (propagated-inputs
8570 `(("r-biocgenerics" ,r-biocgenerics)
8571 ("r-genomicranges" ,r-genomicranges)
8572 ("r-iranges" ,r-iranges)
8573 ("r-rcpp" ,r-rcpp)
8574 ("r-rhtslib" ,r-rhtslib)
8575 ("r-zlibbioc" ,r-zlibbioc)))
8576 (native-inputs
8577 `(("r-knitr" ,r-knitr)))
8578 (home-page "https://bioconductor.org/packages/bamsignals")
8579 (synopsis "Extract read count signals from bam files")
8580 (description
8581 "This package efficiently obtains count vectors from indexed bam
8582 files. It counts the number of nucleotide sequence reads in given genomic
8583 ranges and it computes reads profiles and coverage profiles. It also handles
8584 paired-end data.")
8585 (license license:gpl2+)))
8586
8587 (define-public r-rcas
8588 (package
8589 (name "r-rcas")
8590 (version "1.16.0")
8591 (source (origin
8592 (method url-fetch)
8593 (uri (bioconductor-uri "RCAS" version))
8594 (sha256
8595 (base32
8596 "0vdxml618vqvf8xyh0zxs307p9zby0cj9dqyiiz625ilyq1hkw2m"))))
8597 (properties `((upstream-name . "RCAS")))
8598 (build-system r-build-system)
8599 (propagated-inputs
8600 `(("r-biocgenerics" ,r-biocgenerics)
8601 ("r-biostrings" ,r-biostrings)
8602 ("r-bsgenome" ,r-bsgenome)
8603 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8604 ("r-cowplot" ,r-cowplot)
8605 ("r-data-table" ,r-data-table)
8606 ("r-dt" ,r-dt)
8607 ("r-genomation" ,r-genomation)
8608 ("r-genomeinfodb" ,r-genomeinfodb)
8609 ("r-genomicfeatures" ,r-genomicfeatures)
8610 ("r-genomicranges" ,r-genomicranges)
8611 ("r-ggplot2" ,r-ggplot2)
8612 ("r-ggseqlogo" ,r-ggseqlogo)
8613 ("r-gprofiler2" ,r-gprofiler2)
8614 ("r-iranges" ,r-iranges)
8615 ("r-pbapply" ,r-pbapply)
8616 ("r-pheatmap" ,r-pheatmap)
8617 ("r-plotly" ,r-plotly)
8618 ("r-plotrix" ,r-plotrix)
8619 ("r-proxy" ,r-proxy)
8620 ("r-ranger" ,r-ranger)
8621 ("r-rsqlite" ,r-rsqlite)
8622 ("r-rtracklayer" ,r-rtracklayer)
8623 ("r-rmarkdown" ,r-rmarkdown)
8624 ("r-s4vectors" ,r-s4vectors)
8625 ("pandoc" ,pandoc)))
8626 (native-inputs
8627 `(("r-knitr" ,r-knitr)))
8628 (synopsis "RNA-centric annotation system")
8629 (description
8630 "RCAS aims to be a standalone RNA-centric annotation system that provides
8631 intuitive reports and publication-ready graphics. This package provides the R
8632 library implementing most of the pipeline's features.")
8633 (home-page "https://github.com/BIMSBbioinfo/RCAS")
8634 (license license:artistic2.0)))
8635
8636 (define-public rcas-web
8637 (package
8638 (name "rcas-web")
8639 (version "0.1.0")
8640 (source
8641 (origin
8642 (method url-fetch)
8643 (uri (string-append "https://github.com/BIMSBbioinfo/rcas-web/"
8644 "releases/download/v" version
8645 "/rcas-web-" version ".tar.gz"))
8646 (sha256
8647 (base32
8648 "0wq951aj45gqki1bickg876i993lmawkp8x24agg264br5x716db"))))
8649 (build-system gnu-build-system)
8650 (arguments
8651 `(#:phases
8652 (modify-phases %standard-phases
8653 (add-before 'configure 'find-RCAS
8654 ;; The configure script can't find non-1.3.x versions of RCAS because
8655 ;; its R expression ‘1.10.1 >= 1.3.4’ evaluates to false.
8656 (lambda _
8657 (substitute* "configure"
8658 (("1\\.3\\.4") "0.0.0"))
8659 #t))
8660 (add-after 'install 'wrap-executable
8661 (lambda* (#:key inputs outputs #:allow-other-keys)
8662 (let* ((out (assoc-ref outputs "out"))
8663 (json (assoc-ref inputs "guile-json"))
8664 (redis (assoc-ref inputs "guile-redis"))
8665 (path (string-append
8666 json "/share/guile/site/2.2:"
8667 redis "/share/guile/site/2.2")))
8668 (wrap-program (string-append out "/bin/rcas-web")
8669 `("GUILE_LOAD_PATH" ":" = (,path))
8670 `("GUILE_LOAD_COMPILED_PATH" ":" = (,path))
8671 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
8672 #t)))))
8673 (inputs
8674 `(("r-minimal" ,r-minimal)
8675 ("r-rcas" ,r-rcas)
8676 ("guile" ,guile-2.2)
8677 ("guile-json" ,guile-json-1)
8678 ("guile-redis" ,guile2.2-redis)))
8679 (native-inputs
8680 `(("pkg-config" ,pkg-config)))
8681 (home-page "https://github.com/BIMSBbioinfo/rcas-web")
8682 (synopsis "Web interface for RNA-centric annotation system (RCAS)")
8683 (description "This package provides a simple web interface for the
8684 @dfn{RNA-centric annotation system} (RCAS).")
8685 (license license:agpl3+)))
8686
8687 (define-public r-mutationalpatterns
8688 (package
8689 (name "r-mutationalpatterns")
8690 (version "3.0.1")
8691 (source
8692 (origin
8693 (method url-fetch)
8694 (uri (bioconductor-uri "MutationalPatterns" version))
8695 (sha256
8696 (base32
8697 "1988kjjgq8af0hj7chhpxi88717wwmzs9qgrwapjh0hm2hjwhn35"))))
8698 (build-system r-build-system)
8699 (native-inputs
8700 `(("r-knitr" ,r-knitr)))
8701 (propagated-inputs
8702 `(("r-biocgenerics" ,r-biocgenerics)
8703 ("r-biostrings" ,r-biostrings)
8704 ("r-bsgenome" ,r-bsgenome)
8705 ;; These two packages are suggested packages
8706 ("r-bsgenome-hsapiens-1000g" ,r-bsgenome-hsapiens-1000genomes-hs37d5)
8707 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8708 ("r-cowplot" ,r-cowplot)
8709 ("r-dplyr" ,r-dplyr)
8710 ("r-genomeinfodb" ,r-genomeinfodb)
8711 ("r-genomicranges" ,r-genomicranges)
8712 ("r-ggalluvial" ,r-ggalluvial)
8713 ("r-ggdendro" ,r-ggdendro)
8714 ("r-ggplot2" ,r-ggplot2)
8715 ("r-iranges" ,r-iranges)
8716 ("r-magrittr" ,r-magrittr)
8717 ("r-nmf" ,r-nmf)
8718 ("r-pracma" ,r-pracma)
8719 ("r-purrr" ,r-purrr)
8720 ("r-s4vectors" ,r-s4vectors)
8721 ("r-stringr" ,r-stringr)
8722 ("r-tibble" ,r-tibble)
8723 ("r-tidyr" ,r-tidyr)
8724 ("r-variantannotation" ,r-variantannotation)))
8725 (home-page "https://bioconductor.org/packages/MutationalPatterns/")
8726 (synopsis "Extract and visualize mutational patterns in genomic data")
8727 (description "This package provides an extensive toolset for the
8728 characterization and visualization of a wide range of mutational patterns
8729 in SNV base substitution data.")
8730 (license license:expat)))
8731
8732 (define-public r-chipkernels
8733 (let ((commit "c9cfcacb626b1221094fb3490ea7bac0fd625372")
8734 (revision "1"))
8735 (package
8736 (name "r-chipkernels")
8737 (version (string-append "1.1-" revision "." (string-take commit 9)))
8738 (source
8739 (origin
8740 (method git-fetch)
8741 (uri (git-reference
8742 (url "https://github.com/ManuSetty/ChIPKernels")
8743 (commit commit)))
8744 (file-name (string-append name "-" version))
8745 (sha256
8746 (base32
8747 "14bj5qhjm1hsm9ay561nfbqi9wxsa7y487df2idsaaf6z10nw4v0"))))
8748 (build-system r-build-system)
8749 (propagated-inputs
8750 `(("r-iranges" ,r-iranges)
8751 ("r-xvector" ,r-xvector)
8752 ("r-biostrings" ,r-biostrings)
8753 ("r-bsgenome" ,r-bsgenome)
8754 ("r-gtools" ,r-gtools)
8755 ("r-genomicranges" ,r-genomicranges)
8756 ("r-sfsmisc" ,r-sfsmisc)
8757 ("r-kernlab" ,r-kernlab)
8758 ("r-s4vectors" ,r-s4vectors)
8759 ("r-biocgenerics" ,r-biocgenerics)))
8760 (home-page "https://github.com/ManuSetty/ChIPKernels")
8761 (synopsis "Build string kernels for DNA Sequence analysis")
8762 (description "ChIPKernels is an R package for building different string
8763 kernels used for DNA Sequence analysis. A dictionary of the desired kernel
8764 must be built and this dictionary can be used for determining kernels for DNA
8765 Sequences.")
8766 (license license:gpl2+))))
8767
8768 (define-public r-seqgl
8769 (package
8770 (name "r-seqgl")
8771 (version "1.1.4")
8772 (source
8773 (origin
8774 (method git-fetch)
8775 (uri (git-reference
8776 (url "https://github.com/ManuSetty/SeqGL")
8777 (commit version)))
8778 (file-name (git-file-name name version))
8779 (sha256
8780 (base32
8781 "1r6ywvhxl3ffv48lgj7sbd582mcc6dha3ksgc2qjlvjrnkbj3799"))))
8782 (build-system r-build-system)
8783 (propagated-inputs
8784 `(("r-biostrings" ,r-biostrings)
8785 ("r-chipkernels" ,r-chipkernels)
8786 ("r-genomicranges" ,r-genomicranges)
8787 ("r-spams" ,r-spams)
8788 ("r-wgcna" ,r-wgcna)
8789 ("r-fastcluster" ,r-fastcluster)))
8790 (home-page "https://github.com/ManuSetty/SeqGL")
8791 (synopsis "Group lasso for Dnase/ChIP-seq data")
8792 (description "SeqGL is a group lasso based algorithm to extract
8793 transcription factor sequence signals from ChIP, DNase and ATAC-seq profiles.
8794 This package presents a method which uses group lasso to discriminate between
8795 bound and non bound genomic regions to accurately identify transcription
8796 factors bound at the specific regions.")
8797 (license license:gpl2+)))
8798
8799 (define-public r-tximport
8800 (package
8801 (name "r-tximport")
8802 (version "1.18.0")
8803 (source (origin
8804 (method url-fetch)
8805 (uri (bioconductor-uri "tximport" version))
8806 (sha256
8807 (base32
8808 "1nxnlvl4iv2392xa72j0lzy2xnb3vrvyhfrdj9l54znwkrryyq34"))))
8809 (build-system r-build-system)
8810 (native-inputs
8811 `(("r-knitr" ,r-knitr)))
8812 (home-page "https://bioconductor.org/packages/tximport")
8813 (synopsis "Import and summarize transcript-level estimates for gene-level analysis")
8814 (description
8815 "This package provides tools to import transcript-level abundance,
8816 estimated counts and transcript lengths, and to summarize them into matrices
8817 for use with downstream gene-level analysis packages. Average transcript
8818 length, weighted by sample-specific transcript abundance estimates, is
8819 provided as a matrix which can be used as an offset for different expression
8820 of gene-level counts.")
8821 (license license:gpl2+)))
8822
8823 (define-public r-rhdf5filters
8824 (package
8825 (name "r-rhdf5filters")
8826 (version "1.2.0")
8827 (source
8828 (origin
8829 (method url-fetch)
8830 (uri (bioconductor-uri "rhdf5filters" version))
8831 (sha256
8832 (base32
8833 "1jvnss44liapbc6hk93yg1gknv0ahd5x86dydqiwq9l65jd03psq"))))
8834 (properties `((upstream-name . "rhdf5filters")))
8835 (build-system r-build-system)
8836 (propagated-inputs
8837 `(("r-rhdf5lib" ,r-rhdf5lib)))
8838 (inputs
8839 `(("zlib" ,zlib)))
8840 (native-inputs
8841 `(("r-knitr" ,r-knitr)))
8842 (home-page "https://github.com/grimbough/rhdf5filters")
8843 (synopsis "HDF5 compression filters")
8844 (description
8845 "This package provides a collection of compression filters for use with
8846 HDF5 datasets.")
8847 (license license:bsd-2)))
8848
8849 (define-public r-rhdf5
8850 (package
8851 (name "r-rhdf5")
8852 (version "2.34.0")
8853 (source (origin
8854 (method url-fetch)
8855 (uri (bioconductor-uri "rhdf5" version))
8856 (sha256
8857 (base32
8858 "0almr1vscrgj5g4dyrags131wia2pmdbdidlpskbgm44ha6hpmqi"))))
8859 (build-system r-build-system)
8860 (arguments
8861 `(#:phases
8862 (modify-phases %standard-phases
8863 (add-after 'unpack 'fix-linking
8864 (lambda _
8865 (substitute* "src/Makevars"
8866 ;; This is to avoid having a plain directory on the list of
8867 ;; libraries to link.
8868 (("\\(RHDF5_LIBS\\)" match)
8869 (string-append match "/libhdf5.a")))
8870 #t)))))
8871 (propagated-inputs
8872 `(("r-rhdf5filters" ,r-rhdf5filters)
8873 ("r-rhdf5lib" ,r-rhdf5lib)))
8874 (native-inputs
8875 `(("r-knitr" ,r-knitr)))
8876 (home-page "https://bioconductor.org/packages/rhdf5")
8877 (synopsis "HDF5 interface to R")
8878 (description
8879 "This R/Bioconductor package provides an interface between HDF5 and R.
8880 HDF5's main features are the ability to store and access very large and/or
8881 complex datasets and a wide variety of metadata on mass storage (disk) through
8882 a completely portable file format. The rhdf5 package is thus suited for the
8883 exchange of large and/or complex datasets between R and other software
8884 package, and for letting R applications work on datasets that are larger than
8885 the available RAM.")
8886 (license license:artistic2.0)))
8887
8888 (define-public r-annotationfilter
8889 (package
8890 (name "r-annotationfilter")
8891 (version "1.14.0")
8892 (source (origin
8893 (method url-fetch)
8894 (uri (bioconductor-uri "AnnotationFilter" version))
8895 (sha256
8896 (base32
8897 "0npk0laa2rc93rsh6yikj886zf2fl53a050j07fj9w67j0q0h3s9"))))
8898 (properties
8899 `((upstream-name . "AnnotationFilter")))
8900 (build-system r-build-system)
8901 (propagated-inputs
8902 `(("r-genomicranges" ,r-genomicranges)
8903 ("r-lazyeval" ,r-lazyeval)))
8904 (native-inputs
8905 `(("r-knitr" ,r-knitr)))
8906 (home-page "https://github.com/Bioconductor/AnnotationFilter")
8907 (synopsis "Facilities for filtering Bioconductor annotation resources")
8908 (description
8909 "This package provides classes and other infrastructure to implement
8910 filters for manipulating Bioconductor annotation resources. The filters are
8911 used by @code{ensembldb}, @code{Organism.dplyr}, and other packages.")
8912 (license license:artistic2.0)))
8913
8914 (define-public emboss
8915 (package
8916 (name "emboss")
8917 (version "6.5.7")
8918 (source (origin
8919 (method url-fetch)
8920 (uri (string-append "ftp://emboss.open-bio.org/pub/EMBOSS/old/"
8921 (version-major+minor version) ".0/"
8922 "EMBOSS-" version ".tar.gz"))
8923 (sha256
8924 (base32
8925 "0vsmz96gc411yj2iyzdrsmg4l2n1nhgmp7vrgzlxx3xixv9xbf0q"))))
8926 (build-system gnu-build-system)
8927 (arguments
8928 `(#:configure-flags
8929 (list (string-append "--with-hpdf="
8930 (assoc-ref %build-inputs "libharu")))
8931 #:phases
8932 (modify-phases %standard-phases
8933 (add-after 'unpack 'fix-checks
8934 (lambda _
8935 ;; The PNGDRIVER tests check for the presence of libgd, libpng
8936 ;; and zlib, but assume that they are all found at the same
8937 ;; prefix.
8938 (substitute* "configure.in"
8939 (("CHECK_PNGDRIVER")
8940 "LIBS=\"$LIBS -lgd -lpng -lz -lm\"
8941 AC_DEFINE([PLD_png], [1], [Define to 1 if PNG support is available])
8942 AM_CONDITIONAL(AMPNG, true)"))
8943 #t))
8944 (add-after 'fix-checks 'disable-update-check
8945 (lambda _
8946 ;; At build time there is no connection to the Internet, so
8947 ;; looking for updates will not work.
8948 (substitute* "Makefile.am"
8949 (("\\$\\(bindir\\)/embossupdate") ""))
8950 #t))
8951 (add-after 'disable-update-check 'autogen
8952 (lambda _ (invoke "autoreconf" "-vif") #t)))))
8953 (inputs
8954 `(("perl" ,perl)
8955 ("libpng" ,libpng)
8956 ("gd" ,gd)
8957 ("libx11" ,libx11)
8958 ("libharu" ,libharu)
8959 ("zlib" ,zlib)))
8960 (native-inputs
8961 `(("autoconf" ,autoconf)
8962 ("automake" ,automake)
8963 ("libtool" ,libtool)
8964 ("pkg-config" ,pkg-config)))
8965 (home-page "http://emboss.sourceforge.net")
8966 (synopsis "Molecular biology analysis suite")
8967 (description "EMBOSS is the \"European Molecular Biology Open Software
8968 Suite\". EMBOSS is an analysis package specially developed for the needs of
8969 the molecular biology (e.g. EMBnet) user community. The software
8970 automatically copes with data in a variety of formats and even allows
8971 transparent retrieval of sequence data from the web. It also provides a
8972 number of libraries for the development of software in the field of molecular
8973 biology. EMBOSS also integrates a range of currently available packages and
8974 tools for sequence analysis into a seamless whole.")
8975 (license license:gpl2+)))
8976
8977 (define-public bits
8978 (let ((revision "1")
8979 (commit "3cc4567896d9d6442923da944beb704750a08d2d"))
8980 (package
8981 (name "bits")
8982 ;; The version is 2.13.0 even though no release archives have been
8983 ;; published as yet.
8984 (version (string-append "2.13.0-" revision "." (string-take commit 9)))
8985 (source (origin
8986 (method git-fetch)
8987 (uri (git-reference
8988 (url "https://github.com/arq5x/bits")
8989 (commit commit)))
8990 (file-name (string-append name "-" version "-checkout"))
8991 (sha256
8992 (base32
8993 "17n2kffk4kmhivd8c98g2vr6y1s23vbg4sxlxs689wni66797hbs"))))
8994 (build-system gnu-build-system)
8995 (arguments
8996 `(#:tests? #f ;no tests included
8997 #:phases
8998 (modify-phases %standard-phases
8999 (delete 'configure)
9000 (add-after 'unpack 'remove-cuda
9001 (lambda _
9002 (substitute* "Makefile"
9003 ((".*_cuda") "")
9004 (("(bits_test_intersections) \\\\" _ match) match))
9005 #t))
9006 (replace 'install
9007 (lambda* (#:key outputs #:allow-other-keys)
9008 (copy-recursively
9009 "bin" (string-append (assoc-ref outputs "out") "/bin"))
9010 #t)))))
9011 (inputs
9012 `(("gsl" ,gsl)
9013 ("zlib" ,zlib)))
9014 (home-page "https://github.com/arq5x/bits")
9015 (synopsis "Implementation of binary interval search algorithm")
9016 (description "This package provides an implementation of the
9017 BITS (Binary Interval Search) algorithm, an approach to interval set
9018 intersection. It is especially suited for the comparison of diverse genomic
9019 datasets and the exploration of large datasets of genome
9020 intervals (e.g. genes, sequence alignments).")
9021 (license license:gpl2))))
9022
9023 (define-public piranha
9024 ;; There is no release tarball for the latest version. The latest commit is
9025 ;; older than one year at the time of this writing.
9026 (let ((revision "1")
9027 (commit "0466d364b71117d01e4471b74c514436cc281233"))
9028 (package
9029 (name "piranha")
9030 (version (string-append "1.2.1-" revision "." (string-take commit 9)))
9031 (source (origin
9032 (method git-fetch)
9033 (uri (git-reference
9034 (url "https://github.com/smithlabcode/piranha")
9035 (commit commit)))
9036 (file-name (git-file-name name version))
9037 (sha256
9038 (base32
9039 "117dc0zf20c61jam69sk4abl57ah6yi6i7qra7d7y5zrbgk12q5n"))))
9040 (build-system gnu-build-system)
9041 (arguments
9042 `(#:test-target "test"
9043 #:phases
9044 (modify-phases %standard-phases
9045 (add-after 'unpack 'copy-smithlab-cpp
9046 (lambda* (#:key inputs #:allow-other-keys)
9047 (for-each (lambda (file)
9048 (install-file file "./src/smithlab_cpp/"))
9049 (find-files (assoc-ref inputs "smithlab-cpp")))
9050 #t))
9051 (add-after 'install 'install-to-store
9052 (lambda* (#:key outputs #:allow-other-keys)
9053 (let* ((out (assoc-ref outputs "out"))
9054 (bin (string-append out "/bin")))
9055 (for-each (lambda (file)
9056 (install-file file bin))
9057 (find-files "bin" ".*")))
9058 #t)))
9059 #:configure-flags
9060 (list (string-append "--with-bam_tools_headers="
9061 (assoc-ref %build-inputs "bamtools") "/include/bamtools")
9062 (string-append "--with-bam_tools_library="
9063 (assoc-ref %build-inputs "bamtools") "/lib/bamtools"))))
9064 (inputs
9065 `(("bamtools" ,bamtools)
9066 ("samtools" ,samtools-0.1)
9067 ("gsl" ,gsl)
9068 ("smithlab-cpp"
9069 ,(let ((commit "3723e2db438c51501d0423429ff396c3035ba46a"))
9070 (origin
9071 (method git-fetch)
9072 (uri (git-reference
9073 (url "https://github.com/smithlabcode/smithlab_cpp")
9074 (commit commit)))
9075 (file-name (string-append "smithlab_cpp-" commit "-checkout"))
9076 (sha256
9077 (base32
9078 "0l4gvbwslw5ngziskja41c00x1r06l3yidv7y0xw9djibhykzy0g")))))))
9079 (native-inputs
9080 `(("python" ,python-2)))
9081 (home-page "https://github.com/smithlabcode/piranha")
9082 (synopsis "Peak-caller for CLIP-seq and RIP-seq data")
9083 (description
9084 "Piranha is a peak-caller for genomic data produced by CLIP-seq and
9085 RIP-seq experiments. It takes input in BED or BAM format and identifies
9086 regions of statistically significant read enrichment. Additional covariates
9087 may optionally be provided to further inform the peak-calling process.")
9088 (license license:gpl3+))))
9089
9090 (define-public pepr
9091 (package
9092 (name "pepr")
9093 (version "1.0.9")
9094 (source (origin
9095 (method url-fetch)
9096 (uri (pypi-uri "PePr" version))
9097 (sha256
9098 (base32
9099 "0qxjfdpl1b1y53nccws2d85f6k74zwmx8y8sd9rszcqhfayx6gdx"))))
9100 (build-system python-build-system)
9101 (arguments
9102 `(#:python ,python-2 ; python2 only
9103 #:tests? #f)) ; no tests included
9104 (propagated-inputs
9105 `(("python2-numpy" ,python2-numpy)
9106 ("python2-scipy" ,python2-scipy)
9107 ("python2-pysam" ,python2-pysam)))
9108 (home-page "https://github.com/shawnzhangyx/PePr")
9109 (synopsis "Peak-calling and prioritization pipeline for ChIP-Seq data")
9110 (description
9111 "PePr is a ChIP-Seq peak calling or differential binding analysis tool
9112 that is primarily designed for data with biological replicates. It uses a
9113 negative binomial distribution to model the read counts among the samples in
9114 the same group, and look for consistent differences between ChIP and control
9115 group or two ChIP groups run under different conditions.")
9116 (license license:gpl3+)))
9117
9118 (define-public filevercmp
9119 (let ((commit "1a9b779b93d0b244040274794d402106907b71b7"))
9120 (package
9121 (name "filevercmp")
9122 (version (string-append "0-1." (string-take commit 7)))
9123 (source (origin
9124 (method git-fetch)
9125 (uri (git-reference
9126 (url "https://github.com/ekg/filevercmp")
9127 (commit commit)))
9128 (file-name (git-file-name name commit))
9129 (sha256
9130 (base32
9131 "1j9vxsy0y050v59h0q1d6501fcw1kjvj0d18l1xk2zyg0jzj247c"))))
9132 (build-system gnu-build-system)
9133 (arguments
9134 `(#:tests? #f ; There are no tests to run.
9135 #:phases
9136 (modify-phases %standard-phases
9137 (delete 'configure) ; There is no configure phase.
9138 (replace 'install
9139 (lambda* (#:key outputs #:allow-other-keys)
9140 (let ((out (assoc-ref outputs "out")))
9141 (install-file "filevercmp" (string-append out "/bin"))
9142 (install-file "filevercmp.h" (string-append out "/include"))
9143 #t))))))
9144 (home-page "https://github.com/ekg/filevercmp")
9145 (synopsis "This program compares version strings")
9146 (description "This program compares version strings. It intends to be a
9147 replacement for strverscmp.")
9148 (license license:gpl3+))))
9149
9150 (define-public multiqc
9151 (package
9152 (name "multiqc")
9153 (version "1.5")
9154 (source
9155 (origin
9156 (method url-fetch)
9157 (uri (pypi-uri "multiqc" version))
9158 (sha256
9159 (base32
9160 "02iihfl0w0hpnr4pa0sbd1y9qxrg3ycyhjp5lidkcrqh1lmzs3zy"))))
9161 (build-system python-build-system)
9162 (propagated-inputs
9163 `(("python-jinja2" ,python-jinja2)
9164 ("python-simplejson" ,python-simplejson)
9165 ("python-pyyaml" ,python-pyyaml)
9166 ("python-click" ,python-click)
9167 ("python-spectra" ,python-spectra)
9168 ("python-requests" ,python-requests)
9169 ("python-markdown" ,python-markdown)
9170 ("python-lzstring" ,python-lzstring)
9171 ("python-matplotlib" ,python-matplotlib)
9172 ("python-numpy" ,python-numpy)
9173 ;; MultQC checks for the presence of nose at runtime.
9174 ("python-nose" ,python-nose)))
9175 (arguments
9176 `(#:phases
9177 (modify-phases %standard-phases
9178 (add-after 'unpack 'relax-requirements
9179 (lambda _
9180 (substitute* "setup.py"
9181 ;; MultiQC 1.5 ‘requires’ a version of python-matplotlib older
9182 ;; than the one in Guix, but should work fine with 2.2.2.
9183 ;; See <https://github.com/ewels/MultiQC/issues/725> and
9184 ;; <https://github.com/ewels/MultiQC/issues/732> for details.
9185 (("['\"]matplotlib.*?['\"]")
9186 "'matplotlib'"))
9187 #t)))))
9188 (home-page "https://multiqc.info")
9189 (synopsis "Aggregate bioinformatics analysis reports")
9190 (description
9191 "MultiQC is a tool to aggregate bioinformatics results across many
9192 samples into a single report. It contains modules for a large number of
9193 common bioinformatics tools.")
9194 (license license:gpl3+)))
9195
9196 (define-public variant-tools
9197 (package
9198 (name "variant-tools")
9199 (version "3.1.2")
9200 (source
9201 (origin
9202 (method git-fetch)
9203 (uri (git-reference
9204 (url "https://github.com/vatlab/varianttools")
9205 ;; There is no tag corresponding to version 3.1.2
9206 (commit "813ae4a90d25b69abc8a40f4f70441fe09015249")))
9207 (file-name (git-file-name name version))
9208 (sha256
9209 (base32
9210 "12ibdmksj7icyqhks4xyvd61bygk4pjmxn618kp6vgk1af01y34g"))))
9211 (build-system python-build-system)
9212 (inputs
9213 `(("boost" ,boost)
9214 ("c-blosc" ,c-blosc)
9215 ("gsl" ,gsl)
9216 ("hdf5" ,hdf5)
9217 ("hdf5-blosc" ,hdf5-blosc)
9218 ("python-cython" ,python-cython)
9219 ("zlib" ,zlib)))
9220 (propagated-inputs
9221 `(("python-numpy" ,python-numpy)
9222 ("python-pycurl" ,python-pycurl)
9223 ("python-pyzmq" ,python-pyzmq)
9224 ("python-scipy" ,python-scipy)
9225 ("python-tables" ,python-tables)))
9226 (home-page "https://vatlab.github.io/vat-docs/")
9227 (synopsis "Analyze genetic variants from Next-Gen sequencing studies")
9228 (description
9229 "Variant tools is a tool for the manipulation, annotation,
9230 selection, simulation, and analysis of variants in the context of next-gen
9231 sequencing analysis. Unlike some other tools used for next-gen sequencing
9232 analysis, variant tools is project based and provides a whole set of tools to
9233 manipulate and analyze genetic variants.")
9234 (license license:gpl3+)))
9235
9236 (define-public r-chipseq
9237 (package
9238 (name "r-chipseq")
9239 (version "1.40.0")
9240 (source
9241 (origin
9242 (method url-fetch)
9243 (uri (bioconductor-uri "chipseq" version))
9244 (sha256
9245 (base32
9246 "12pzq24aarvgxfmhcad0l5g951xqdvvi7bspgbsvlvmfkqd74j2v"))))
9247 (build-system r-build-system)
9248 (propagated-inputs
9249 `(("r-biocgenerics" ,r-biocgenerics)
9250 ("r-genomicranges" ,r-genomicranges)
9251 ("r-iranges" ,r-iranges)
9252 ("r-lattice" ,r-lattice)
9253 ("r-s4vectors" ,r-s4vectors)
9254 ("r-shortread" ,r-shortread)))
9255 (home-page "https://bioconductor.org/packages/chipseq")
9256 (synopsis "Package for analyzing ChIPseq data")
9257 (description
9258 "This package provides tools for processing short read data from ChIPseq
9259 experiments.")
9260 (license license:artistic2.0)))
9261
9262 (define-public r-copyhelper
9263 (package
9264 (name "r-copyhelper")
9265 (version "1.6.0")
9266 (source
9267 (origin
9268 (method url-fetch)
9269 (uri (string-append "https://bioconductor.org/packages/release/"
9270 "data/experiment/src/contrib/CopyhelpeR_"
9271 version ".tar.gz"))
9272 (sha256
9273 (base32
9274 "0x7cyynjmxls9as2gg0iyp9x5fpalxmdjq914ss7i84i9zyk5bhq"))))
9275 (properties `((upstream-name . "CopyhelpeR")))
9276 (build-system r-build-system)
9277 (home-page "https://bioconductor.org/packages/CopyhelpeR/")
9278 (synopsis "Helper files for CopywriteR")
9279 (description
9280 "This package contains the helper files that are required to run the
9281 Bioconductor package CopywriteR. It contains pre-assembled 1kb bin GC-content
9282 and mappability files for the reference genomes hg18, hg19, hg38, mm9 and
9283 mm10. In addition, it contains a blacklist filter to remove regions that
9284 display copy number variation. Files are stored as GRanges objects from the
9285 GenomicRanges Bioconductor package.")
9286 (license license:gpl2)))
9287
9288 (define-public r-copywriter
9289 (package
9290 (name "r-copywriter")
9291 (version "2.22.0")
9292 (source
9293 (origin
9294 (method url-fetch)
9295 (uri (bioconductor-uri "CopywriteR" version))
9296 (sha256
9297 (base32
9298 "060p6l6l8i6b15hyyz5v5kkxih3h4wcciixii51m9mn82z23xr2f"))))
9299 (properties `((upstream-name . "CopywriteR")))
9300 (build-system r-build-system)
9301 (propagated-inputs
9302 `(("r-biocparallel" ,r-biocparallel)
9303 ("r-chipseq" ,r-chipseq)
9304 ("r-copyhelper" ,r-copyhelper)
9305 ("r-data-table" ,r-data-table)
9306 ("r-dnacopy" ,r-dnacopy)
9307 ("r-futile-logger" ,r-futile-logger)
9308 ("r-genomeinfodb" ,r-genomeinfodb)
9309 ("r-genomicalignments" ,r-genomicalignments)
9310 ("r-genomicranges" ,r-genomicranges)
9311 ("r-gtools" ,r-gtools)
9312 ("r-iranges" ,r-iranges)
9313 ("r-matrixstats" ,r-matrixstats)
9314 ("r-rsamtools" ,r-rsamtools)
9315 ("r-s4vectors" ,r-s4vectors)))
9316 (home-page "https://github.com/PeeperLab/CopywriteR")
9317 (synopsis "Copy number information from targeted sequencing")
9318 (description
9319 "CopywriteR extracts DNA copy number information from targeted sequencing
9320 by utilizing off-target reads. It allows for extracting uniformly distributed
9321 copy number information, can be used without reference, and can be applied to
9322 sequencing data obtained from various techniques including chromatin
9323 immunoprecipitation and target enrichment on small gene panels. Thereby,
9324 CopywriteR constitutes a widely applicable alternative to available copy
9325 number detection tools.")
9326 (license license:gpl2)))
9327
9328 (define-public r-methylkit
9329 (package
9330 (name "r-methylkit")
9331 (version "1.16.1")
9332 (source (origin
9333 (method url-fetch)
9334 (uri (bioconductor-uri "methylKit" version))
9335 (sha256
9336 (base32
9337 "1c9b11gfh3cc37iwym9rgsba3mh2xkp78a1gvnjqhzlkiz667mn3"))))
9338 (properties `((upstream-name . "methylKit")))
9339 (build-system r-build-system)
9340 (propagated-inputs
9341 `(("r-data-table" ,r-data-table)
9342 ("r-emdbook" ,r-emdbook)
9343 ("r-fastseg" ,r-fastseg)
9344 ("r-genomeinfodb" ,r-genomeinfodb)
9345 ("r-genomicranges" ,r-genomicranges)
9346 ("r-gtools" ,r-gtools)
9347 ("r-iranges" ,r-iranges)
9348 ("r-kernsmooth" ,r-kernsmooth)
9349 ("r-limma" ,r-limma)
9350 ("r-mclust" ,r-mclust)
9351 ("r-mgcv" ,r-mgcv)
9352 ("r-qvalue" ,r-qvalue)
9353 ("r-r-utils" ,r-r-utils)
9354 ("r-rcpp" ,r-rcpp)
9355 ("r-rhtslib" ,r-rhtslib)
9356 ("r-rsamtools" ,r-rsamtools)
9357 ("r-rtracklayer" ,r-rtracklayer)
9358 ("r-s4vectors" ,r-s4vectors)
9359 ("r-zlibbioc" ,r-zlibbioc)))
9360 (native-inputs
9361 `(("r-knitr" ,r-knitr))) ; for vignettes
9362 (home-page "https://github.com/al2na/methylKit")
9363 (synopsis
9364 "DNA methylation analysis from high-throughput bisulfite sequencing results")
9365 (description
9366 "MethylKit is an R package for DNA methylation analysis and annotation
9367 from high-throughput bisulfite sequencing. The package is designed to deal
9368 with sequencing data from @dfn{Reduced representation bisulfite
9369 sequencing} (RRBS) and its variants, but also target-capture methods and whole
9370 genome bisulfite sequencing. It also has functions to analyze base-pair
9371 resolution 5hmC data from experimental protocols such as oxBS-Seq and
9372 TAB-Seq.")
9373 (license license:artistic2.0)))
9374
9375 (define-public r-sva
9376 (package
9377 (name "r-sva")
9378 (version "3.38.0")
9379 (source
9380 (origin
9381 (method url-fetch)
9382 (uri (bioconductor-uri "sva" version))
9383 (sha256
9384 (base32
9385 "1hpzzg3qrgkd8kwg1m5gq94cikjgk9j4l1wk58fxl49s6fmd13zy"))))
9386 (build-system r-build-system)
9387 (propagated-inputs
9388 `(("r-edger" ,r-edger)
9389 ("r-genefilter" ,r-genefilter)
9390 ("r-mgcv" ,r-mgcv)
9391 ("r-biocparallel" ,r-biocparallel)
9392 ("r-matrixstats" ,r-matrixstats)
9393 ("r-limma" ,r-limma)))
9394 (home-page "https://bioconductor.org/packages/sva")
9395 (synopsis "Surrogate variable analysis")
9396 (description
9397 "This package contains functions for removing batch effects and other
9398 unwanted variation in high-throughput experiment. It also contains functions
9399 for identifying and building surrogate variables for high-dimensional data
9400 sets. Surrogate variables are covariates constructed directly from
9401 high-dimensional data like gene expression/RNA sequencing/methylation/brain
9402 imaging data that can be used in subsequent analyses to adjust for unknown,
9403 unmodeled, or latent sources of noise.")
9404 (license license:artistic2.0)))
9405
9406 (define-public r-raremetals2
9407 (package
9408 (name "r-raremetals2")
9409 (version "0.1")
9410 (source
9411 (origin
9412 (method url-fetch)
9413 (uri (string-append "http://genome.sph.umich.edu/w/images/"
9414 "b/b7/RareMETALS2_" version ".tar.gz"))
9415 (sha256
9416 (base32
9417 "0z5ljcgvnm06ja9lm85a3cniq7slxcy37aqqkxrdidr79an5fs4s"))))
9418 (properties `((upstream-name . "RareMETALS2")))
9419 (build-system r-build-system)
9420 (propagated-inputs
9421 `(("r-seqminer" ,r-seqminer)
9422 ("r-mvtnorm" ,r-mvtnorm)
9423 ("r-mass" ,r-mass)
9424 ("r-compquadform" ,r-compquadform)
9425 ("r-getopt" ,r-getopt)))
9426 (home-page "http://genome.sph.umich.edu/wiki/RareMETALS2")
9427 (synopsis "Analyze gene-level association tests for binary trait")
9428 (description
9429 "The R package rareMETALS2 is an extension of the R package rareMETALS.
9430 It was designed to meta-analyze gene-level association tests for binary trait.
9431 While rareMETALS offers a near-complete solution for meta-analysis of
9432 gene-level tests for quantitative trait, it does not offer the optimal
9433 solution for binary trait. The package rareMETALS2 offers improved features
9434 for analyzing gene-level association tests in meta-analyses for binary
9435 trait.")
9436 (license license:gpl3)))
9437
9438 (define-public r-protgenerics
9439 (package
9440 (name "r-protgenerics")
9441 (version "1.22.0")
9442 (source
9443 (origin
9444 (method url-fetch)
9445 (uri (bioconductor-uri "ProtGenerics" version))
9446 (sha256
9447 (base32
9448 "0yihxphgkshvfv1sn67wc4zvr2zlzws2j7ki3zabm6vyfkfdkfiz"))))
9449 (properties `((upstream-name . "ProtGenerics")))
9450 (build-system r-build-system)
9451 (home-page "https://github.com/lgatto/ProtGenerics")
9452 (synopsis "S4 generic functions for proteomics infrastructure")
9453 (description
9454 "This package provides S4 generic functions needed by Bioconductor
9455 proteomics packages.")
9456 (license license:artistic2.0)))
9457
9458 (define-public r-mzr
9459 (package
9460 (name "r-mzr")
9461 (version "2.24.1")
9462 (source
9463 (origin
9464 (method url-fetch)
9465 (uri (bioconductor-uri "mzR" version))
9466 (sha256
9467 (base32
9468 "0ik0yrjhvk8r5pm990chn2aadp0gqzzkkm0027682ky34xp142sg"))
9469 (modules '((guix build utils)))
9470 (snippet
9471 '(begin
9472 (delete-file-recursively "src/boost")
9473 #t))))
9474 (properties `((upstream-name . "mzR")))
9475 (build-system r-build-system)
9476 (arguments
9477 `(#:phases
9478 (modify-phases %standard-phases
9479 (add-after 'unpack 'use-system-boost
9480 (lambda _
9481 (substitute* "src/Makevars"
9482 (("\\./boost/libs.*") "")
9483 ;; This is to avoid having a plain directory on the list of
9484 ;; libraries to link.
9485 (("\\(RHDF5_LIBS\\)" match)
9486 (string-append match "/libhdf5.a"))
9487 (("PKG_LIBS=") "PKG_LIBS=$(BOOST_LIBS) ")
9488 (("\\ARCH_OBJS=" line)
9489 (string-append line
9490 "\nBOOST_LIBS=-lboost_system -lboost_regex \
9491 -lboost_iostreams -lboost_thread -lboost_filesystem -lboost_chrono\n")))
9492 #t)))))
9493 (inputs
9494 `(;; Our default boost package won't work here, unfortunately, even with
9495 ;; mzR version 2.24.1.
9496 ("boost" ,boost-for-mysql) ; use this instead of the bundled boost sources
9497 ("zlib" ,zlib)))
9498 (propagated-inputs
9499 `(("r-biobase" ,r-biobase)
9500 ("r-biocgenerics" ,r-biocgenerics)
9501 ("r-ncdf4" ,r-ncdf4)
9502 ("r-protgenerics" ,r-protgenerics)
9503 ("r-rcpp" ,r-rcpp)
9504 ("r-rhdf5lib" ,r-rhdf5lib)
9505 ("r-zlibbioc" ,r-zlibbioc)))
9506 (native-inputs
9507 `(("r-knitr" ,r-knitr)))
9508 (home-page "https://github.com/sneumann/mzR/")
9509 (synopsis "Parser for mass spectrometry data files")
9510 (description
9511 "The mzR package provides a unified API to the common file formats and
9512 parsers available for mass spectrometry data. It comes with a wrapper for the
9513 ISB random access parser for mass spectrometry mzXML, mzData and mzML files.
9514 The package contains the original code written by the ISB, and a subset of the
9515 proteowizard library for mzML and mzIdentML. The netCDF reading code has
9516 previously been used in XCMS.")
9517 (license license:artistic2.0)))
9518
9519 (define-public r-affyio
9520 (package
9521 (name "r-affyio")
9522 (version "1.60.0")
9523 (source
9524 (origin
9525 (method url-fetch)
9526 (uri (bioconductor-uri "affyio" version))
9527 (sha256
9528 (base32
9529 "14xnzrxrvgxgixjhq5a9fdgcmrxam2j74hwidkc9if92ffv6s83h"))))
9530 (build-system r-build-system)
9531 (propagated-inputs
9532 `(("r-zlibbioc" ,r-zlibbioc)))
9533 (inputs
9534 `(("zlib" ,zlib)))
9535 (home-page "https://github.com/bmbolstad/affyio")
9536 (synopsis "Tools for parsing Affymetrix data files")
9537 (description
9538 "This package provides routines for parsing Affymetrix data files based
9539 upon file format information. The primary focus is on accessing the CEL and
9540 CDF file formats.")
9541 (license license:lgpl2.0+)))
9542
9543 (define-public r-affy
9544 (package
9545 (name "r-affy")
9546 (version "1.68.0")
9547 (source
9548 (origin
9549 (method url-fetch)
9550 (uri (bioconductor-uri "affy" version))
9551 (sha256
9552 (base32
9553 "0ywz548cbzk2k1njnxhlk5ydzvz2dk78ka8kx53gwrmdc4sc2b06"))))
9554 (build-system r-build-system)
9555 (propagated-inputs
9556 `(("r-affyio" ,r-affyio)
9557 ("r-biobase" ,r-biobase)
9558 ("r-biocgenerics" ,r-biocgenerics)
9559 ("r-biocmanager" ,r-biocmanager)
9560 ("r-preprocesscore" ,r-preprocesscore)
9561 ("r-zlibbioc" ,r-zlibbioc)))
9562 (inputs
9563 `(("zlib" ,zlib)))
9564 (home-page "https://bioconductor.org/packages/affy")
9565 (synopsis "Methods for affymetrix oligonucleotide arrays")
9566 (description
9567 "This package contains functions for exploratory oligonucleotide array
9568 analysis.")
9569 (license license:lgpl2.0+)))
9570
9571 (define-public r-vsn
9572 (package
9573 (name "r-vsn")
9574 (version "3.58.0")
9575 (source
9576 (origin
9577 (method url-fetch)
9578 (uri (bioconductor-uri "vsn" version))
9579 (sha256
9580 (base32
9581 "0dfrfflidpnphwyqzmmfiz9blfqv6qa09xlwgfabhpfsf3ml2rlb"))))
9582 (build-system r-build-system)
9583 (propagated-inputs
9584 `(("r-affy" ,r-affy)
9585 ("r-biobase" ,r-biobase)
9586 ("r-ggplot2" ,r-ggplot2)
9587 ("r-lattice" ,r-lattice)
9588 ("r-limma" ,r-limma)))
9589 (native-inputs
9590 `(("r-knitr" ,r-knitr))) ; for vignettes
9591 (home-page "https://bioconductor.org/packages/release/bioc/html/vsn.html")
9592 (synopsis "Variance stabilization and calibration for microarray data")
9593 (description
9594 "The package implements a method for normalising microarray intensities,
9595 and works for single- and multiple-color arrays. It can also be used for data
9596 from other technologies, as long as they have similar format. The method uses
9597 a robust variant of the maximum-likelihood estimator for an
9598 additive-multiplicative error model and affine calibration. The model
9599 incorporates data calibration step (a.k.a. normalization), a model for the
9600 dependence of the variance on the mean intensity and a variance stabilizing
9601 data transformation. Differences between transformed intensities are
9602 analogous to \"normalized log-ratios\". However, in contrast to the latter,
9603 their variance is independent of the mean, and they are usually more sensitive
9604 and specific in detecting differential transcription.")
9605 (license license:artistic2.0)))
9606
9607 (define-public r-mzid
9608 (package
9609 (name "r-mzid")
9610 (version "1.28.0")
9611 (source
9612 (origin
9613 (method url-fetch)
9614 (uri (bioconductor-uri "mzID" version))
9615 (sha256
9616 (base32
9617 "0s7d6cz1li7v3ni6n6hrdspl93yiyr283kmbbd3hhkfgzgx6kpq2"))))
9618 (properties `((upstream-name . "mzID")))
9619 (build-system r-build-system)
9620 (propagated-inputs
9621 `(("r-doparallel" ,r-doparallel)
9622 ("r-foreach" ,r-foreach)
9623 ("r-iterators" ,r-iterators)
9624 ("r-plyr" ,r-plyr)
9625 ("r-protgenerics" ,r-protgenerics)
9626 ("r-xml" ,r-xml)))
9627 (native-inputs
9628 `(("r-knitr" ,r-knitr)))
9629 (home-page "https://bioconductor.org/packages/mzID")
9630 (synopsis "Parser for mzIdentML files")
9631 (description
9632 "This package provides a parser for mzIdentML files implemented using the
9633 XML package. The parser tries to be general and able to handle all types of
9634 mzIdentML files with the drawback of having less pretty output than a vendor
9635 specific parser.")
9636 (license license:gpl2+)))
9637
9638 (define-public r-pcamethods
9639 (package
9640 (name "r-pcamethods")
9641 (version "1.82.0")
9642 (source
9643 (origin
9644 (method url-fetch)
9645 (uri (bioconductor-uri "pcaMethods" version))
9646 (sha256
9647 (base32
9648 "04xb4vjky6hq58l30i1iq9rv5gzjdxnidjxpnzg7pvg67vz8pgf0"))))
9649 (properties `((upstream-name . "pcaMethods")))
9650 (build-system r-build-system)
9651 (propagated-inputs
9652 `(("r-biobase" ,r-biobase)
9653 ("r-biocgenerics" ,r-biocgenerics)
9654 ("r-mass" ,r-mass)
9655 ("r-rcpp" ,r-rcpp)))
9656 (home-page "https://github.com/hredestig/pcamethods")
9657 (synopsis "Collection of PCA methods")
9658 (description
9659 "This package provides Bayesian PCA, Probabilistic PCA, Nipals PCA,
9660 Inverse Non-Linear PCA and the conventional SVD PCA. A cluster based method
9661 for missing value estimation is included for comparison. BPCA, PPCA and
9662 NipalsPCA may be used to perform PCA on incomplete data as well as for
9663 accurate missing value estimation. A set of methods for printing and plotting
9664 the results is also provided. All PCA methods make use of the same data
9665 structure (pcaRes) to provide a common interface to the PCA results.")
9666 (license license:gpl3+)))
9667
9668 (define-public r-msnbase
9669 (package
9670 (name "r-msnbase")
9671 (version "2.16.1")
9672 (source
9673 (origin
9674 (method url-fetch)
9675 (uri (bioconductor-uri "MSnbase" version))
9676 (sha256
9677 (base32
9678 "0hxzs9zzljywqxr7q388hshpy1pdryhl0zkwffqbxpf5pcf92d3h"))))
9679 (properties `((upstream-name . "MSnbase")))
9680 (build-system r-build-system)
9681 (propagated-inputs
9682 `(("r-affy" ,r-affy)
9683 ("r-biobase" ,r-biobase)
9684 ("r-biocgenerics" ,r-biocgenerics)
9685 ("r-biocparallel" ,r-biocparallel)
9686 ("r-digest" ,r-digest)
9687 ("r-ggplot2" ,r-ggplot2)
9688 ("r-impute" ,r-impute)
9689 ("r-iranges" ,r-iranges)
9690 ("r-lattice" ,r-lattice)
9691 ("r-maldiquant" ,r-maldiquant)
9692 ("r-mass" ,r-mass)
9693 ("r-mzid" ,r-mzid)
9694 ("r-mzr" ,r-mzr)
9695 ("r-pcamethods" ,r-pcamethods)
9696 ("r-plyr" ,r-plyr)
9697 ("r-preprocesscore" ,r-preprocesscore)
9698 ("r-protgenerics" ,r-protgenerics)
9699 ("r-rcpp" ,r-rcpp)
9700 ("r-s4vectors" ,r-s4vectors)
9701 ("r-scales" ,r-scales)
9702 ("r-vsn" ,r-vsn)
9703 ("r-xml" ,r-xml)))
9704 (native-inputs
9705 `(("r-knitr" ,r-knitr)))
9706 (home-page "https://github.com/lgatto/MSnbase")
9707 (synopsis "Base functions and classes for MS-based proteomics")
9708 (description
9709 "This package provides basic plotting, data manipulation and processing
9710 of mass spectrometry based proteomics data.")
9711 (license license:artistic2.0)))
9712
9713 (define-public r-msnid
9714 (package
9715 (name "r-msnid")
9716 (version "1.24.0")
9717 (source
9718 (origin
9719 (method url-fetch)
9720 (uri (bioconductor-uri "MSnID" version))
9721 (sha256
9722 (base32
9723 "05bncy7lw2a3h8xgnavjiz56pc6mk8q7l6qdd81197nawxs3j02d"))))
9724 (properties `((upstream-name . "MSnID")))
9725 (build-system r-build-system)
9726 (propagated-inputs
9727 `(("r-annotationdbi" ,r-annotationdbi)
9728 ("r-annotationhub" ,r-annotationhub)
9729 ("r-biobase" ,r-biobase)
9730 ("r-biocgenerics" ,r-biocgenerics)
9731 ("r-biocstyle" ,r-biocstyle)
9732 ("r-biostrings" ,r-biostrings)
9733 ("r-data-table" ,r-data-table)
9734 ("r-doparallel" ,r-doparallel)
9735 ("r-dplyr" ,r-dplyr)
9736 ("r-foreach" ,r-foreach)
9737 ("r-ggplot2" ,r-ggplot2)
9738 ("r-iterators" ,r-iterators)
9739 ("r-msnbase" ,r-msnbase)
9740 ("r-msmstests" ,r-msmstests)
9741 ("r-mzid" ,r-mzid)
9742 ("r-mzr" ,r-mzr)
9743 ("r-protgenerics" ,r-protgenerics)
9744 ("r-purrr" ,r-purrr)
9745 ("r-r-cache" ,r-r-cache)
9746 ("r-rcpp" ,r-rcpp)
9747 ("r-reshape2" ,r-reshape2)
9748 ("r-rlang" ,r-rlang)
9749 ("r-runit" ,r-runit)
9750 ("r-stringr" ,r-stringr)
9751 ("r-tibble" ,r-tibble)
9752 ("r-xtable" ,r-xtable)))
9753 (home-page "https://bioconductor.org/packages/MSnID")
9754 (synopsis "Utilities for LC-MSn proteomics identifications")
9755 (description
9756 "This package extracts @dfn{tandem mass spectrometry} (MS/MS) ID data
9757 from mzIdentML (leveraging the mzID package) or text files. After collating
9758 the search results from multiple datasets it assesses their identification
9759 quality and optimize filtering criteria to achieve the maximum number of
9760 identifications while not exceeding a specified false discovery rate. It also
9761 contains a number of utilities to explore the MS/MS results and assess missed
9762 and irregular enzymatic cleavages, mass measurement accuracy, etc.")
9763 (license license:artistic2.0)))
9764
9765 (define-public r-aroma-light
9766 (package
9767 (name "r-aroma-light")
9768 (version "3.20.0")
9769 (source
9770 (origin
9771 (method url-fetch)
9772 (uri (bioconductor-uri "aroma.light" version))
9773 (sha256
9774 (base32
9775 "0pi37rlfqh24p9wd7l1xb3f7c7938xdscgcc5agp8c9qhajq25a0"))))
9776 (properties `((upstream-name . "aroma.light")))
9777 (build-system r-build-system)
9778 (propagated-inputs
9779 `(("r-matrixstats" ,r-matrixstats)
9780 ("r-r-methodss3" ,r-r-methodss3)
9781 ("r-r-oo" ,r-r-oo)
9782 ("r-r-utils" ,r-r-utils)))
9783 (home-page "https://github.com/HenrikBengtsson/aroma.light")
9784 (synopsis "Methods for normalization and visualization of microarray data")
9785 (description
9786 "This package provides methods for microarray analysis that take basic
9787 data types such as matrices and lists of vectors. These methods can be used
9788 standalone, be utilized in other packages, or be wrapped up in higher-level
9789 classes.")
9790 (license license:gpl2+)))
9791
9792 (define-public r-deseq
9793 (package
9794 (name "r-deseq")
9795 (version "1.39.0")
9796 (source
9797 (origin
9798 (method url-fetch)
9799 (uri (bioconductor-uri "DESeq" version))
9800 (sha256
9801 (base32
9802 "047hph5aqmjnz1aqprziw0smdn5lf96hmwpnvqrxv1j2yfvcf3h1"))))
9803 (properties `((upstream-name . "DESeq")))
9804 (build-system r-build-system)
9805 (propagated-inputs
9806 `(("r-biobase" ,r-biobase)
9807 ("r-biocgenerics" ,r-biocgenerics)
9808 ("r-genefilter" ,r-genefilter)
9809 ("r-geneplotter" ,r-geneplotter)
9810 ("r-lattice" ,r-lattice)
9811 ("r-locfit" ,r-locfit)
9812 ("r-mass" ,r-mass)
9813 ("r-rcolorbrewer" ,r-rcolorbrewer)))
9814 (home-page "https://www-huber.embl.de/users/anders/DESeq/")
9815 (synopsis "Differential gene expression analysis")
9816 (description
9817 "This package provides tools for estimating variance-mean dependence in
9818 count data from high-throughput genetic sequencing assays and for testing for
9819 differential expression based on a model using the negative binomial
9820 distribution.")
9821 (license license:gpl3+)))
9822
9823 (define-public r-edaseq
9824 (package
9825 (name "r-edaseq")
9826 (version "2.24.0")
9827 (source
9828 (origin
9829 (method url-fetch)
9830 (uri (bioconductor-uri "EDASeq" version))
9831 (sha256
9832 (base32
9833 "0fznj7lsgkss1svv4rq8g87s1gmnbd7hccim41dv1c2w2nl0n2ip"))))
9834 (properties `((upstream-name . "EDASeq")))
9835 (build-system r-build-system)
9836 (propagated-inputs
9837 `(("r-annotationdbi" ,r-annotationdbi)
9838 ("r-aroma-light" ,r-aroma-light)
9839 ("r-biobase" ,r-biobase)
9840 ("r-biocgenerics" ,r-biocgenerics)
9841 ("r-biocmanager" ,r-biocmanager)
9842 ("r-biomart" ,r-biomart)
9843 ("r-biostrings" ,r-biostrings)
9844 ("r-genomicfeatures" ,r-genomicfeatures)
9845 ("r-genomicranges" ,r-genomicranges)
9846 ("r-iranges" ,r-iranges)
9847 ("r-rsamtools" ,r-rsamtools)
9848 ("r-shortread" ,r-shortread)))
9849 (native-inputs
9850 `(("r-knitr" ,r-knitr)))
9851 (home-page "https://github.com/drisso/EDASeq")
9852 (synopsis "Exploratory data analysis and normalization for RNA-Seq")
9853 (description
9854 "This package provides support for numerical and graphical summaries of
9855 RNA-Seq genomic read data. Provided within-lane normalization procedures to
9856 adjust for GC-content effect (or other gene-level effects) on read counts:
9857 loess robust local regression, global-scaling, and full-quantile
9858 normalization. Between-lane normalization procedures to adjust for
9859 distributional differences between lanes (e.g., sequencing depth):
9860 global-scaling and full-quantile normalization.")
9861 (license license:artistic2.0)))
9862
9863 (define-public r-interactivedisplaybase
9864 (package
9865 (name "r-interactivedisplaybase")
9866 (version "1.28.0")
9867 (source
9868 (origin
9869 (method url-fetch)
9870 (uri (bioconductor-uri "interactiveDisplayBase" version))
9871 (sha256
9872 (base32
9873 "08id2hkx4ssxj34dildx00a4j3z0nv171b7b0wl6xjks7wk6lv01"))))
9874 (properties
9875 `((upstream-name . "interactiveDisplayBase")))
9876 (build-system r-build-system)
9877 (propagated-inputs
9878 `(("r-biocgenerics" ,r-biocgenerics)
9879 ("r-dt" ,r-dt)
9880 ("r-shiny" ,r-shiny)))
9881 (native-inputs
9882 `(("r-knitr" ,r-knitr)))
9883 (home-page "https://bioconductor.org/packages/interactiveDisplayBase")
9884 (synopsis "Base package for web displays of Bioconductor objects")
9885 (description
9886 "This package contains the basic methods needed to generate interactive
9887 Shiny-based display methods for Bioconductor objects.")
9888 (license license:artistic2.0)))
9889
9890 (define-public r-annotationhub
9891 (package
9892 (name "r-annotationhub")
9893 (version "2.22.0")
9894 (source
9895 (origin
9896 (method url-fetch)
9897 (uri (bioconductor-uri "AnnotationHub" version))
9898 (sha256
9899 (base32
9900 "1950x654ffqx53b154kbph808zdh2xm5vmj9vzmc5nxc28fi2z5g"))))
9901 (properties `((upstream-name . "AnnotationHub")))
9902 (build-system r-build-system)
9903 (propagated-inputs
9904 `(("r-annotationdbi" ,r-annotationdbi)
9905 ("r-biocfilecache" ,r-biocfilecache)
9906 ("r-biocgenerics" ,r-biocgenerics)
9907 ("r-biocmanager" ,r-biocmanager)
9908 ("r-biocversion" ,r-biocversion)
9909 ("r-curl" ,r-curl)
9910 ("r-dplyr" ,r-dplyr)
9911 ("r-httr" ,r-httr)
9912 ("r-interactivedisplaybase" ,r-interactivedisplaybase)
9913 ("r-rappdirs" ,r-rappdirs)
9914 ("r-rsqlite" ,r-rsqlite)
9915 ("r-s4vectors" ,r-s4vectors)
9916 ("r-yaml" ,r-yaml)))
9917 (native-inputs
9918 `(("r-knitr" ,r-knitr)))
9919 (home-page "https://bioconductor.org/packages/AnnotationHub")
9920 (synopsis "Client to access AnnotationHub resources")
9921 (description
9922 "This package provides a client for the Bioconductor AnnotationHub web
9923 resource. The AnnotationHub web resource provides a central location where
9924 genomic files (e.g. VCF, bed, wig) and other resources from standard
9925 locations (e.g. UCSC, Ensembl) can be discovered. The resource includes
9926 metadata about each resource, e.g., a textual description, tags, and date of
9927 modification. The client creates and manages a local cache of files retrieved
9928 by the user, helping with quick and reproducible access.")
9929 (license license:artistic2.0)))
9930
9931 (define-public r-fastseg
9932 (package
9933 (name "r-fastseg")
9934 (version "1.36.0")
9935 (source
9936 (origin
9937 (method url-fetch)
9938 (uri (bioconductor-uri "fastseg" version))
9939 (sha256
9940 (base32
9941 "1ln6w93ag4wanp0nrm0pqngbfc88w95zq2kcj583hbxy885dkg4f"))))
9942 (build-system r-build-system)
9943 (propagated-inputs
9944 `(("r-biobase" ,r-biobase)
9945 ("r-biocgenerics" ,r-biocgenerics)
9946 ("r-genomicranges" ,r-genomicranges)
9947 ("r-iranges" ,r-iranges)
9948 ("r-s4vectors" ,r-s4vectors)))
9949 (home-page "https://www.bioinf.jku.at/software/fastseg/index.html")
9950 (synopsis "Fast segmentation algorithm for genetic sequencing data")
9951 (description
9952 "Fastseg implements a very fast and efficient segmentation algorithm.
9953 It can segment data from DNA microarrays and data from next generation
9954 sequencing for example to detect copy number segments. Further it can segment
9955 data from RNA microarrays like tiling arrays to identify transcripts. Most
9956 generally, it can segment data given as a matrix or as a vector. Various data
9957 formats can be used as input to fastseg like expression set objects for
9958 microarrays or GRanges for sequencing data.")
9959 (license license:lgpl2.0+)))
9960
9961 (define-public r-keggrest
9962 (package
9963 (name "r-keggrest")
9964 (version "1.30.1")
9965 (source
9966 (origin
9967 (method url-fetch)
9968 (uri (bioconductor-uri "KEGGREST" version))
9969 (sha256
9970 (base32
9971 "0k9z85xf9la2y98xqmdmjb8mci9fh2fdybkl77x1yl26hyalip0s"))))
9972 (properties `((upstream-name . "KEGGREST")))
9973 (build-system r-build-system)
9974 (propagated-inputs
9975 `(("r-biostrings" ,r-biostrings)
9976 ("r-httr" ,r-httr)
9977 ("r-png" ,r-png)))
9978 (native-inputs
9979 `(("r-knitr" ,r-knitr)))
9980 (home-page "https://bioconductor.org/packages/KEGGREST")
9981 (synopsis "Client-side REST access to KEGG")
9982 (description
9983 "This package provides a package that provides a client interface to the
9984 @dfn{Kyoto Encyclopedia of Genes and Genomes} (KEGG) REST server.")
9985 (license license:artistic2.0)))
9986
9987 (define-public r-gage
9988 (package
9989 (name "r-gage")
9990 (version "2.40.1")
9991 (source
9992 (origin
9993 (method url-fetch)
9994 (uri (bioconductor-uri "gage" version))
9995 (sha256
9996 (base32
9997 "1iawa03dy4bl333my69d4sk7d74cjzfg5dpcxga6q5dglan4sp8r"))))
9998 (build-system r-build-system)
9999 (propagated-inputs
10000 `(("r-annotationdbi" ,r-annotationdbi)
10001 ("r-go-db" ,r-go-db)
10002 ("r-graph" ,r-graph)
10003 ("r-keggrest" ,r-keggrest)))
10004 (home-page (string-append "https://bmcbioinformatics.biomedcentral.com/"
10005 "articles/10.1186/1471-2105-10-161"))
10006 (synopsis "Generally applicable gene-set enrichment for pathway analysis")
10007 (description
10008 "GAGE is a published method for gene set (enrichment or GSEA) or pathway
10009 analysis. GAGE is generally applicable independent of microarray or RNA-Seq
10010 data attributes including sample sizes, experimental designs, assay platforms,
10011 and other types of heterogeneity. The gage package provides functions for
10012 basic GAGE analysis, result processing and presentation. In addition, it
10013 provides demo microarray data and commonly used gene set data based on KEGG
10014 pathways and GO terms. These functions and data are also useful for gene set
10015 analysis using other methods.")
10016 (license license:gpl2+)))
10017
10018 (define-public r-genomicfiles
10019 (package
10020 (name "r-genomicfiles")
10021 (version "1.26.0")
10022 (source
10023 (origin
10024 (method url-fetch)
10025 (uri (bioconductor-uri "GenomicFiles" version))
10026 (sha256
10027 (base32
10028 "0awnf0m1pz7cw9wvh9cfxz9k7xm6wnvjm7xbxf139lrhd4nlyqjz"))))
10029 (properties `((upstream-name . "GenomicFiles")))
10030 (build-system r-build-system)
10031 (propagated-inputs
10032 `(("r-biocgenerics" ,r-biocgenerics)
10033 ("r-biocparallel" ,r-biocparallel)
10034 ("r-genomeinfodb" ,r-genomeinfodb)
10035 ("r-genomicalignments" ,r-genomicalignments)
10036 ("r-genomicranges" ,r-genomicranges)
10037 ("r-iranges" ,r-iranges)
10038 ("r-matrixgenerics" ,r-matrixgenerics)
10039 ("r-rsamtools" ,r-rsamtools)
10040 ("r-rtracklayer" ,r-rtracklayer)
10041 ("r-s4vectors" ,r-s4vectors)
10042 ("r-summarizedexperiment" ,r-summarizedexperiment)
10043 ("r-variantannotation" ,r-variantannotation)))
10044 (home-page "https://bioconductor.org/packages/GenomicFiles")
10045 (synopsis "Distributed computing by file or by range")
10046 (description
10047 "This package provides infrastructure for parallel computations
10048 distributed by file or by range. User defined mapper and reducer functions
10049 provide added flexibility for data combination and manipulation.")
10050 (license license:artistic2.0)))
10051
10052 (define-public r-complexheatmap
10053 (package
10054 (name "r-complexheatmap")
10055 (version "2.6.2")
10056 (source
10057 (origin
10058 (method url-fetch)
10059 (uri (bioconductor-uri "ComplexHeatmap" version))
10060 (sha256
10061 (base32
10062 "1nx1xxpq8zrvi990v9fmvx3msl85pdz5dp1gp6m78q6i4s2alg5x"))))
10063 (properties
10064 `((upstream-name . "ComplexHeatmap")))
10065 (build-system r-build-system)
10066 (propagated-inputs
10067 `(("r-cairo" ,r-cairo)
10068 ("r-circlize" ,r-circlize)
10069 ("r-clue" ,r-clue)
10070 ("r-colorspace" ,r-colorspace)
10071 ("r-digest" ,r-digest)
10072 ("r-getoptlong" ,r-getoptlong)
10073 ("r-globaloptions" ,r-globaloptions)
10074 ("r-iranges" ,r-iranges)
10075 ("r-matrixstats" ,r-matrixstats)
10076 ("r-png" ,r-png)
10077 ("r-rcolorbrewer" ,r-rcolorbrewer)
10078 ("r-s4vectors" ,r-s4vectors)))
10079 (native-inputs
10080 `(("r-knitr" ,r-knitr)))
10081 (home-page
10082 "https://github.com/jokergoo/ComplexHeatmap")
10083 (synopsis "Making Complex Heatmaps")
10084 (description
10085 "Complex heatmaps are efficient to visualize associations between
10086 different sources of data sets and reveal potential structures. This package
10087 provides a highly flexible way to arrange multiple heatmaps and supports
10088 self-defined annotation graphics.")
10089 (license license:gpl2+)))
10090
10091 (define-public r-dirichletmultinomial
10092 (package
10093 (name "r-dirichletmultinomial")
10094 (version "1.32.0")
10095 (source
10096 (origin
10097 (method url-fetch)
10098 (uri (bioconductor-uri "DirichletMultinomial" version))
10099 (sha256
10100 (base32
10101 "098zql6ryd1b0gkq4cjybblyh0x8xidxxfygqq5a5x9asl8y4vsk"))))
10102 (properties
10103 `((upstream-name . "DirichletMultinomial")))
10104 (build-system r-build-system)
10105 (inputs
10106 `(("gsl" ,gsl)))
10107 (propagated-inputs
10108 `(("r-biocgenerics" ,r-biocgenerics)
10109 ("r-iranges" ,r-iranges)
10110 ("r-s4vectors" ,r-s4vectors)))
10111 (home-page "https://bioconductor.org/packages/DirichletMultinomial")
10112 (synopsis "Dirichlet-Multinomial mixture models for microbiome data")
10113 (description
10114 "Dirichlet-multinomial mixture models can be used to describe variability
10115 in microbial metagenomic data. This package is an interface to code
10116 originally made available by Holmes, Harris, and Quince, 2012, PLoS ONE 7(2):
10117 1-15.")
10118 (license license:lgpl3)))
10119
10120 (define-public r-ensembldb
10121 (package
10122 (name "r-ensembldb")
10123 (version "2.14.0")
10124 (source
10125 (origin
10126 (method url-fetch)
10127 (uri (bioconductor-uri "ensembldb" version))
10128 (sha256
10129 (base32
10130 "04il99gcrqzakvc0bxchdp9gghkn1sp9lpiian0iz4y7r67z3wpy"))))
10131 (build-system r-build-system)
10132 (propagated-inputs
10133 `(("r-annotationdbi" ,r-annotationdbi)
10134 ("r-annotationfilter" ,r-annotationfilter)
10135 ("r-biobase" ,r-biobase)
10136 ("r-biocgenerics" ,r-biocgenerics)
10137 ("r-biostrings" ,r-biostrings)
10138 ("r-curl" ,r-curl)
10139 ("r-dbi" ,r-dbi)
10140 ("r-genomeinfodb" ,r-genomeinfodb)
10141 ("r-genomicfeatures" ,r-genomicfeatures)
10142 ("r-genomicranges" ,r-genomicranges)
10143 ("r-iranges" ,r-iranges)
10144 ("r-protgenerics" ,r-protgenerics)
10145 ("r-rsamtools" ,r-rsamtools)
10146 ("r-rsqlite" ,r-rsqlite)
10147 ("r-rtracklayer" ,r-rtracklayer)
10148 ("r-s4vectors" ,r-s4vectors)))
10149 (native-inputs
10150 `(("r-knitr" ,r-knitr)))
10151 (home-page "https://github.com/jotsetung/ensembldb")
10152 (synopsis "Utilities to create and use Ensembl-based annotation databases")
10153 (description
10154 "The package provides functions to create and use transcript-centric
10155 annotation databases/packages. The annotation for the databases are directly
10156 fetched from Ensembl using their Perl API. The functionality and data is
10157 similar to that of the TxDb packages from the @code{GenomicFeatures} package,
10158 but, in addition to retrieve all gene/transcript models and annotations from
10159 the database, the @code{ensembldb} package also provides a filter framework
10160 allowing to retrieve annotations for specific entries like genes encoded on a
10161 chromosome region or transcript models of lincRNA genes.")
10162 ;; No version specified
10163 (license license:lgpl3+)))
10164
10165 (define-public r-organismdbi
10166 (package
10167 (name "r-organismdbi")
10168 (version "1.32.0")
10169 (source
10170 (origin
10171 (method url-fetch)
10172 (uri (bioconductor-uri "OrganismDbi" version))
10173 (sha256
10174 (base32
10175 "1mklnzs0d0ygcdibwfnk5xqr8ln6wpa00qcaw9c68m342kql0jqw"))))
10176 (properties `((upstream-name . "OrganismDbi")))
10177 (build-system r-build-system)
10178 (propagated-inputs
10179 `(("r-annotationdbi" ,r-annotationdbi)
10180 ("r-biobase" ,r-biobase)
10181 ("r-biocgenerics" ,r-biocgenerics)
10182 ("r-biocmanager" ,r-biocmanager)
10183 ("r-dbi" ,r-dbi)
10184 ("r-genomicfeatures" ,r-genomicfeatures)
10185 ("r-genomicranges" ,r-genomicranges)
10186 ("r-graph" ,r-graph)
10187 ("r-iranges" ,r-iranges)
10188 ("r-rbgl" ,r-rbgl)
10189 ("r-s4vectors" ,r-s4vectors)))
10190 (home-page "https://bioconductor.org/packages/OrganismDbi")
10191 (synopsis "Software to enable the smooth interfacing of database packages")
10192 (description "The package enables a simple unified interface to several
10193 annotation packages each of which has its own schema by taking advantage of
10194 the fact that each of these packages implements a select methods.")
10195 (license license:artistic2.0)))
10196
10197 (define-public r-biovizbase
10198 (package
10199 (name "r-biovizbase")
10200 (version "1.38.0")
10201 (source
10202 (origin
10203 (method url-fetch)
10204 (uri (bioconductor-uri "biovizBase" version))
10205 (sha256
10206 (base32
10207 "10jflvadfcgxq2jnfxkpn417xd8ibh3zllz9rsqnq5w3wgfr4fhq"))))
10208 (properties `((upstream-name . "biovizBase")))
10209 (build-system r-build-system)
10210 (propagated-inputs
10211 `(("r-annotationdbi" ,r-annotationdbi)
10212 ("r-annotationfilter" ,r-annotationfilter)
10213 ("r-biocgenerics" ,r-biocgenerics)
10214 ("r-biostrings" ,r-biostrings)
10215 ("r-dichromat" ,r-dichromat)
10216 ("r-ensembldb" ,r-ensembldb)
10217 ("r-genomeinfodb" ,r-genomeinfodb)
10218 ("r-genomicalignments" ,r-genomicalignments)
10219 ("r-genomicfeatures" ,r-genomicfeatures)
10220 ("r-genomicranges" ,r-genomicranges)
10221 ("r-hmisc" ,r-hmisc)
10222 ("r-iranges" ,r-iranges)
10223 ("r-rcolorbrewer" ,r-rcolorbrewer)
10224 ("r-rlang" ,r-rlang)
10225 ("r-rsamtools" ,r-rsamtools)
10226 ("r-s4vectors" ,r-s4vectors)
10227 ("r-scales" ,r-scales)
10228 ("r-summarizedexperiment" ,r-summarizedexperiment)
10229 ("r-variantannotation" ,r-variantannotation)))
10230 (home-page "https://bioconductor.org/packages/biovizBase")
10231 (synopsis "Basic graphic utilities for visualization of genomic data")
10232 (description
10233 "The biovizBase package is designed to provide a set of utilities, color
10234 schemes and conventions for genomic data. It serves as the base for various
10235 high-level packages for biological data visualization. This saves development
10236 effort and encourages consistency.")
10237 (license license:artistic2.0)))
10238
10239 (define-public r-dropbead
10240 (let ((commit "d746c6f3b32110428ea56d6a0001ce52a251c247")
10241 (revision "2"))
10242 (package
10243 (name "r-dropbead")
10244 (version (string-append "0-" revision "." (string-take commit 7)))
10245 (source
10246 (origin
10247 (method git-fetch)
10248 (uri (git-reference
10249 (url "https://github.com/rajewsky-lab/dropbead")
10250 (commit commit)))
10251 (file-name (git-file-name name version))
10252 (sha256
10253 (base32
10254 "0sbzma49aiiyw8b0jpr7fnhzys9nsqmp4hy4hdz1gzyg1lhnca26"))))
10255 (build-system r-build-system)
10256 (propagated-inputs
10257 `(("r-ggplot2" ,r-ggplot2)
10258 ("r-rcolorbrewer" ,r-rcolorbrewer)
10259 ("r-gridextra" ,r-gridextra)
10260 ("r-gplots" ,r-gplots)
10261 ("r-plyr" ,r-plyr)))
10262 (home-page "https://github.com/rajewsky-lab/dropbead")
10263 (synopsis "Basic exploration and analysis of Drop-seq data")
10264 (description "This package offers a quick and straight-forward way to
10265 explore and perform basic analysis of single cell sequencing data coming from
10266 droplet sequencing. It has been particularly tailored for Drop-seq.")
10267 (license license:gpl3))))
10268
10269 (define-public r-cellchat
10270 (let ((commit
10271 "21edd226ca408e4c413408f98562d71ee0b54e5d")
10272 (revision "1"))
10273 (package
10274 (name "r-cellchat")
10275 (version (git-version "1.0.0" revision commit))
10276 (source
10277 (origin
10278 (method git-fetch)
10279 (uri (git-reference
10280 (url "https://github.com/sqjin/CellChat")
10281 (commit commit)))
10282 (file-name (git-file-name name version))
10283 (sha256
10284 (base32
10285 "0cvzl9mi8jjznpql2gv67swnk1dndn3a2h22z5l84h7lwpwjmh53"))
10286 (snippet
10287 '(for-each delete-file '("src/CellChat.so"
10288 "src/CellChat_Rcpp.o"
10289 "src/RcppExports.o")))))
10290 (properties `((upstream-name . "CellChat")))
10291 (build-system r-build-system)
10292 (propagated-inputs
10293 `(("r-biocgenerics" ,r-biocgenerics)
10294 ("r-circlize" ,r-circlize)
10295 ("r-colorspace" ,r-colorspace)
10296 ("r-complexheatmap" ,r-complexheatmap)
10297 ("r-cowplot" ,r-cowplot)
10298 ("r-dplyr" ,r-dplyr)
10299 ("r-expm" ,r-expm)
10300 ("r-fnn" ,r-fnn)
10301 ("r-forcats" ,r-forcats)
10302 ("r-future" ,r-future)
10303 ("r-future-apply" ,r-future-apply)
10304 ("r-gg-gap" ,r-gg-gap)
10305 ("r-ggalluvial" ,r-ggalluvial)
10306 ("r-ggplot2" ,r-ggplot2)
10307 ("r-ggrepel" ,r-ggrepel)
10308 ("r-igraph" ,r-igraph)
10309 ("r-irlba" ,r-irlba)
10310 ("r-magrittr" ,r-magrittr)
10311 ("r-matrix" ,r-matrix)
10312 ("r-nmf" ,r-nmf)
10313 ("r-patchwork" ,r-patchwork)
10314 ("r-pbapply" ,r-pbapply)
10315 ("r-rcolorbrewer" ,r-rcolorbrewer)
10316 ("r-rcpp" ,r-rcpp)
10317 ("r-rcppeigen" ,r-rcppeigen)
10318 ("r-reshape2" ,r-reshape2)
10319 ("r-reticulate" ,r-reticulate)
10320 ("r-rspectra" ,r-rspectra)
10321 ("r-rtsne" ,r-rtsne)
10322 ("r-scales" ,r-scales)
10323 ("r-shape" ,r-shape)
10324 ("r-sna" ,r-sna)
10325 ("r-stringr" ,r-stringr)
10326 ("r-svglite" ,r-svglite)))
10327 (native-inputs `(("r-knitr" ,r-knitr)))
10328 (home-page "https://github.com/sqjin/CellChat")
10329 (synopsis "Analysis of cell-cell communication from single-cell transcriptomics data")
10330 (description
10331 "This package infers, visualizes and analyzes the cell-cell
10332 communication networks from scRNA-seq data.")
10333 (license license:gpl3))))
10334
10335 (define-public sambamba
10336 (package
10337 (name "sambamba")
10338 (version "0.8.0")
10339 (source
10340 (origin
10341 (method git-fetch)
10342 (uri (git-reference
10343 (url "https://github.com/biod/sambamba")
10344 (commit (string-append "v" version))))
10345 (file-name (git-file-name name version))
10346 (sha256
10347 (base32
10348 "07dznzl6m8k7sw84jxw2kx6i3ymrapbmcmyh0fxz8wrybhw8fmwc"))))
10349 (build-system gnu-build-system)
10350 (arguments
10351 `(#:tests? #f ; there is no test target
10352 #:parallel-build? #f ; not supported
10353 #:phases
10354 (modify-phases %standard-phases
10355 (delete 'configure)
10356 (add-after 'unpack 'fix-ldc-version
10357 (lambda _
10358 (substitute* "Makefile"
10359 ;; We use ldc2 instead of ldmd2 to compile sambamba.
10360 (("\\$\\(shell which ldmd2\\)") (which "ldc2")))
10361 #t))
10362 (add-after 'unpack 'unbundle-prerequisites
10363 (lambda _
10364 (substitute* "Makefile"
10365 (("= lz4/lib/liblz4.a") "= -L-llz4")
10366 (("ldc_version_info lz4-static") "ldc_version_info"))
10367 #t))
10368 (replace 'install
10369 (lambda* (#:key outputs #:allow-other-keys)
10370 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
10371 (mkdir-p bin)
10372 (copy-file (string-append "bin/sambamba-" ,version)
10373 (string-append bin "/sambamba"))
10374 #t))))))
10375 (native-inputs
10376 `(("python" ,python)))
10377 (inputs
10378 `(("ldc" ,ldc)
10379 ("lz4" ,lz4)
10380 ("zlib" ,zlib)))
10381 (home-page "https://github.com/biod/sambamba")
10382 (synopsis "Tools for working with SAM/BAM data")
10383 (description "Sambamba is a high performance modern robust and
10384 fast tool (and library), written in the D programming language, for
10385 working with SAM and BAM files. Current parallelised functionality is
10386 an important subset of samtools functionality, including view, index,
10387 sort, markdup, and depth.")
10388 (license license:gpl2+)))
10389
10390 (define-public ritornello
10391 (package
10392 (name "ritornello")
10393 (version "2.0.1")
10394 (source (origin
10395 (method git-fetch)
10396 (uri (git-reference
10397 (url "https://github.com/KlugerLab/Ritornello")
10398 (commit (string-append "v" version))))
10399 (file-name (git-file-name name version))
10400 (sha256
10401 (base32
10402 "1xahvq215qld7x1w8vpa5zbrsj6p9crb9shqa2x89sb0aaxa02jk"))))
10403 (build-system gnu-build-system)
10404 (arguments
10405 `(#:tests? #f ; there are no tests
10406 #:phases
10407 (modify-phases %standard-phases
10408 (add-after 'unpack 'patch-samtools-references
10409 (lambda* (#:key inputs #:allow-other-keys)
10410 (substitute* '("src/SamStream.h"
10411 "src/FLD.cpp")
10412 (("<sam.h>") "<samtools/sam.h>"))
10413 #t))
10414 (delete 'configure)
10415 (replace 'install
10416 (lambda* (#:key inputs outputs #:allow-other-keys)
10417 (let* ((out (assoc-ref outputs "out"))
10418 (bin (string-append out "/bin/")))
10419 (mkdir-p bin)
10420 (install-file "bin/Ritornello" bin)
10421 #t))))))
10422 (inputs
10423 `(("samtools" ,samtools-0.1)
10424 ("fftw" ,fftw)
10425 ("boost" ,boost)
10426 ("zlib" ,zlib)))
10427 (home-page "https://github.com/KlugerLab/Ritornello")
10428 (synopsis "Control-free peak caller for ChIP-seq data")
10429 (description "Ritornello is a ChIP-seq peak calling algorithm based on
10430 signal processing that can accurately call binding events without the need to
10431 do a pair total DNA input or IgG control sample. It has been tested for use
10432 with narrow binding events such as transcription factor ChIP-seq.")
10433 (license license:gpl3+)))
10434
10435 (define-public trim-galore
10436 (package
10437 (name "trim-galore")
10438 (version "0.6.6")
10439 (source
10440 (origin
10441 (method git-fetch)
10442 (uri (git-reference
10443 (url "https://github.com/FelixKrueger/TrimGalore")
10444 (commit version)))
10445 (file-name (git-file-name name version))
10446 (sha256
10447 (base32
10448 "0yrwg6325j4sb9vnplvl3jplzab0qdhp92wl480qjinpfq88j4rs"))))
10449 (build-system gnu-build-system)
10450 (arguments
10451 `(#:tests? #f ; no tests
10452 #:phases
10453 (modify-phases %standard-phases
10454 (replace 'configure
10455 (lambda _
10456 ;; Trim Galore tries to figure out what version of Python
10457 ;; cutadapt is using by looking at the shebang. Of course that
10458 ;; doesn't work, because cutadapt is wrapped in a shell script.
10459 (substitute* "trim_galore"
10460 (("my \\$python_return.*")
10461 "my $python_return = \"Python 3.999\";\n"))
10462 #t))
10463 (delete 'build)
10464 (add-after 'unpack 'hardcode-tool-references
10465 (lambda* (#:key inputs #:allow-other-keys)
10466 (substitute* "trim_galore"
10467 (("\\$path_to_cutadapt = 'cutadapt'")
10468 (string-append "$path_to_cutadapt = '"
10469 (assoc-ref inputs "cutadapt")
10470 "/bin/cutadapt'"))
10471 (("\\$compression_path = \"gzip\"")
10472 (string-append "$compression_path = \""
10473 (assoc-ref inputs "gzip")
10474 "/bin/gzip\""))
10475 (("\"gunzip")
10476 (string-append "\""
10477 (assoc-ref inputs "gzip")
10478 "/bin/gunzip"))
10479 (("\"pigz")
10480 (string-append "\""
10481 (assoc-ref inputs "pigz")
10482 "/bin/pigz")))
10483 #t))
10484 (replace 'install
10485 (lambda* (#:key outputs #:allow-other-keys)
10486 (let ((bin (string-append (assoc-ref outputs "out")
10487 "/bin")))
10488 (mkdir-p bin)
10489 (install-file "trim_galore" bin)
10490 #t))))))
10491 (inputs
10492 `(("gzip" ,gzip)
10493 ("perl" ,perl)
10494 ("pigz" ,pigz)
10495 ("cutadapt" ,cutadapt)))
10496 (native-inputs
10497 `(("unzip" ,unzip)))
10498 (home-page "https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/")
10499 (synopsis "Wrapper around Cutadapt and FastQC")
10500 (description "Trim Galore! is a wrapper script to automate quality and
10501 adapter trimming as well as quality control, with some added functionality to
10502 remove biased methylation positions for RRBS sequence files.")
10503 (license license:gpl3+)))
10504
10505 (define-public gess
10506 (package
10507 (name "gess")
10508 (version "1.0")
10509 (source (origin
10510 (method url-fetch)
10511 (uri (string-append "http://compbio.uthscsa.edu/"
10512 "GESS_Web/files/"
10513 "gess-" version ".src.tar.gz"))
10514 (sha256
10515 (base32
10516 "0hyk403kxscclzfs24pvdgiv0wm03kjcziqdrp5w46cb049gz0d7"))))
10517 (build-system gnu-build-system)
10518 (arguments
10519 `(#:tests? #f ; no tests
10520 #:phases
10521 (modify-phases %standard-phases
10522 (delete 'configure)
10523 (delete 'build)
10524 (replace 'install
10525 (lambda* (#:key inputs outputs #:allow-other-keys)
10526 (let* ((python (assoc-ref inputs "python"))
10527 (out (assoc-ref outputs "out"))
10528 (bin (string-append out "/bin/"))
10529 (target (string-append
10530 out "/lib/python"
10531 ,(version-major+minor
10532 (package-version python))
10533 "/site-packages/gess/")))
10534 (mkdir-p target)
10535 (copy-recursively "." target)
10536 ;; Make GESS.py executable
10537 (chmod (string-append target "GESS.py") #o555)
10538 ;; Add Python shebang to the top and make Matplotlib
10539 ;; usable.
10540 (substitute* (string-append target "GESS.py")
10541 (("\"\"\"Description:" line)
10542 (string-append "#!" (which "python") "
10543 import matplotlib
10544 matplotlib.use('Agg')
10545 " line)))
10546 ;; Make sure GESS has all modules in its path
10547 (wrap-script (string-append target "GESS.py")
10548 `("PYTHONPATH" ":" = (,target ,(getenv "PYTHONPATH"))))
10549 (mkdir-p bin)
10550 (symlink (string-append target "GESS.py")
10551 (string-append bin "GESS.py"))
10552 #t))))))
10553 (inputs
10554 `(("python" ,python-2)
10555 ("python2-pysam" ,python2-pysam)
10556 ("python2-scipy" ,python2-scipy)
10557 ("python2-numpy" ,python2-numpy)
10558 ("python2-networkx" ,python2-networkx)
10559 ("python2-biopython" ,python2-biopython)
10560 ("guile" ,guile-3.0))) ; for the script wrapper
10561 (home-page "https://compbio.uthscsa.edu/GESS_Web/")
10562 (synopsis "Detect exon-skipping events from raw RNA-seq data")
10563 (description
10564 "GESS is an implementation of a novel computational method to detect de
10565 novo exon-skipping events directly from raw RNA-seq data without the prior
10566 knowledge of gene annotation information. GESS stands for the graph-based
10567 exon-skipping scanner detection scheme.")
10568 (license license:bsd-3)))
10569
10570 (define-public phylip
10571 (package
10572 (name "phylip")
10573 (version "3.696")
10574 (source
10575 (origin
10576 (method url-fetch)
10577 (uri (string-append "http://evolution.gs.washington.edu/phylip/"
10578 "download/phylip-" version ".tar.gz"))
10579 (sha256
10580 (base32
10581 "01jar1rayhr2gba2pgbw49m56rc5z4p5wn3ds0m188hrlln4a2nd"))))
10582 (build-system gnu-build-system)
10583 (arguments
10584 `(#:tests? #f ; no check target
10585 #:make-flags (list "-f" "Makefile.unx" "install")
10586 #:parallel-build? #f ; not supported
10587 #:phases
10588 (modify-phases %standard-phases
10589 (add-after 'unpack 'enter-dir
10590 (lambda _ (chdir "src") #t))
10591 (delete 'configure)
10592 (replace 'install
10593 (lambda* (#:key inputs outputs #:allow-other-keys)
10594 (let ((target (string-append (assoc-ref outputs "out")
10595 "/bin")))
10596 (mkdir-p target)
10597 (for-each (lambda (file)
10598 (install-file file target))
10599 (find-files "../exe" ".*")))
10600 #t)))))
10601 (home-page "http://evolution.genetics.washington.edu/phylip/")
10602 (synopsis "Tools for inferring phylogenies")
10603 (description "PHYLIP (the PHYLogeny Inference Package) is a package of
10604 programs for inferring phylogenies (evolutionary trees).")
10605 (license license:bsd-2)))
10606
10607 (define-public imp
10608 (package
10609 (name "imp")
10610 (version "2.13.0")
10611 (source
10612 (origin
10613 (method url-fetch)
10614 (uri (string-append "https://integrativemodeling.org/"
10615 version "/download/imp-" version ".tar.gz"))
10616 (sha256
10617 (base32
10618 "1z1vcpwbylixk0zywngg5iw0jv083jj1bqphi817jpg3fb9fx2jj"))))
10619 (build-system cmake-build-system)
10620 (arguments
10621 `( ;; CMake 3.17 or newer is required for the CMAKE_TEST_ARGUMENTS used
10622 ;; below to have an effect.
10623 #:cmake ,cmake
10624 #:configure-flags
10625 (let ((disabled-tests
10626 '("expensive" ;exclude expensive tests
10627 "IMP.modeller" ;fail to import its own modules
10628 "IMP.parallel-test_sge.py" ;fail in build container
10629 ;; The following test fails non-reproducibly on
10630 ;; an inexact numbers assertion.
10631 "IMP.em-medium_test_local_fitting.py")))
10632 (list
10633 (string-append
10634 "-DCMAKE_CTEST_ARGUMENTS="
10635 (string-join
10636 (list "-L" "-tests?-" ;select only tests
10637 "-E" (format #f "'(~a)'" (string-join disabled-tests "|")))
10638 ";"))))))
10639 (native-inputs
10640 `(("python" ,python-wrapper)
10641 ("swig" ,swig)))
10642 (inputs
10643 `(("boost" ,boost)
10644 ("cgal" ,cgal)
10645 ("gsl" ,gsl)
10646 ("hdf5" ,hdf5)
10647 ("fftw" ,fftw)
10648 ("eigen" ,eigen)
10649 ;; Enabling MPI causes the build to use all the available memory and
10650 ;; fail (tested on a machine with 32 GiB of RAM).
10651 ;;("mpi" ,openmpi)
10652 ("opencv" ,opencv)))
10653 (propagated-inputs
10654 `(("python-numpy" ,python-numpy)
10655 ("python-scipy" ,python-scipy)
10656 ("python-pandas" ,python-pandas)
10657 ("python-scikit-learn" ,python-scikit-learn)
10658 ("python-networkx" ,python-networkx)))
10659 (home-page "https://integrativemodeling.org")
10660 (synopsis "Integrative modeling platform")
10661 (description "IMP's broad goal is to contribute to a comprehensive
10662 structural characterization of biomolecules ranging in size and complexity
10663 from small peptides to large macromolecular assemblies, by integrating data
10664 from diverse biochemical and biophysical experiments. IMP provides a C++ and
10665 Python toolbox for solving complex modeling problems, and a number of
10666 applications for tackling some common problems in a user-friendly way.")
10667 ;; IMP is largely available under the GNU Lesser GPL; see the file
10668 ;; COPYING.LGPL for the full text of this license. Some IMP modules are
10669 ;; available under the GNU GPL (see the file COPYING.GPL).
10670 (license (list license:lgpl2.1+
10671 license:gpl3+))))
10672
10673 (define-public tadbit
10674 (package
10675 (name "tadbit")
10676 (version "1.0.1")
10677 (source (origin
10678 (method git-fetch)
10679 (uri (git-reference
10680 (url "https://github.com/3DGenomes/TADbit")
10681 (commit (string-append "v" version))))
10682 (file-name (git-file-name name version))
10683 (sha256
10684 (base32
10685 "0hqrlymh2a2bimcfdvlssy1x5h1lp3h1c5a7jj11hmcqczzqn3ni"))))
10686 (build-system python-build-system)
10687 (arguments
10688 `(#:phases
10689 (modify-phases %standard-phases
10690 (add-after 'unpack 'fix-problems-with-setup.py
10691 (lambda* (#:key outputs #:allow-other-keys)
10692 ;; Don't attempt to install the bash completions to
10693 ;; the home directory.
10694 (rename-file "extras/.bash_completion"
10695 "extras/tadbit")
10696 (substitute* "setup.py"
10697 (("\\(path.expanduser\\('~'\\)")
10698 (string-append "(\""
10699 (assoc-ref outputs "out")
10700 "/etc/bash_completion.d\""))
10701 (("extras/\\.bash_completion")
10702 "extras/tadbit"))
10703 #t))
10704 (replace 'check
10705 (lambda* (#:key inputs outputs #:allow-other-keys)
10706 (add-installed-pythonpath inputs outputs)
10707 (invoke "python3" "test/test_all.py")
10708 #t)))))
10709 (native-inputs
10710 `(("glib" ,glib "bin") ;for gtester
10711 ("pkg-config" ,pkg-config)))
10712 (inputs
10713 ;; TODO: add Chimera for visualization
10714 `(("imp" ,imp)
10715 ("mcl" ,mcl)
10716 ("python-future" ,python-future)
10717 ("python-h5py" ,python-h5py)
10718 ("python-scipy" ,python-scipy)
10719 ("python-numpy" ,python-numpy)
10720 ("python-matplotlib" ,python-matplotlib)
10721 ("python-pysam" ,python-pysam)))
10722 (home-page "https://3dgenomes.github.io/TADbit/")
10723 (synopsis "Analyze, model, and explore 3C-based data")
10724 (description
10725 "TADbit is a complete Python library to deal with all steps to analyze,
10726 model, and explore 3C-based data. With TADbit the user can map FASTQ files to
10727 obtain raw interaction binned matrices (Hi-C like matrices), normalize and
10728 correct interaction matrices, identify and compare the so-called
10729 @dfn{Topologically Associating Domains} (TADs), build 3D models from the
10730 interaction matrices, and finally, extract structural properties from the
10731 models. TADbit is complemented by TADkit for visualizing 3D models.")
10732 (license license:gpl3+)))
10733
10734 (define-public kentutils
10735 (package
10736 (name "kentutils")
10737 ;; 302.1.0 is out, but the only difference is the inclusion of
10738 ;; pre-built binaries.
10739 (version "302.0.0")
10740 (source
10741 (origin
10742 (method git-fetch)
10743 (uri (git-reference
10744 (url "https://github.com/ENCODE-DCC/kentUtils")
10745 (commit (string-append "v" version))))
10746 (file-name (git-file-name name version))
10747 (sha256
10748 (base32
10749 "0n1wbyjpzii2b9qhyp9r1q76j623cggpg3y8fmw78ld3z4y7ivha"))
10750 (modules '((guix build utils)
10751 (srfi srfi-26)
10752 (ice-9 ftw)))
10753 (snippet
10754 '(begin
10755 ;; Only the contents of the specified directories are free
10756 ;; for all uses, so we remove the rest. "hg/autoSql" and
10757 ;; "hg/autoXml" are nominally free, but they depend on a
10758 ;; library that is built from the sources in "hg/lib",
10759 ;; which is nonfree.
10760 (let ((free (list "." ".."
10761 "utils" "lib" "inc" "tagStorm"
10762 "parasol" "htslib"))
10763 (directory? (lambda (file)
10764 (eq? 'directory (stat:type (stat file))))))
10765 (for-each (lambda (file)
10766 (and (directory? file)
10767 (delete-file-recursively file)))
10768 (map (cut string-append "src/" <>)
10769 (scandir "src"
10770 (lambda (file)
10771 (not (member file free)))))))
10772 ;; Only make the utils target, not the userApps target,
10773 ;; because that requires libraries we won't build.
10774 (substitute* "Makefile"
10775 ((" userApps") " utils"))
10776 ;; Only build libraries that are free.
10777 (substitute* "src/makefile"
10778 (("DIRS =.*") "DIRS =\n")
10779 (("cd jkOwnLib.*") "")
10780 ((" hgLib") "")
10781 (("cd hg.*") ""))
10782 (substitute* "src/utils/makefile"
10783 ;; These tools depend on "jkhgap.a", which is part of the
10784 ;; nonfree "src/hg/lib" directory.
10785 (("raSqlQuery") "")
10786 (("pslLiftSubrangeBlat") "")
10787
10788 ;; Do not build UCSC tools, which may require nonfree
10789 ;; components.
10790 (("ALL_APPS =.*") "ALL_APPS = $(UTILS_APPLIST)\n"))
10791 #t))))
10792 (build-system gnu-build-system)
10793 (arguments
10794 `( ;; There is no global test target and the test target for
10795 ;; individual tools depends on input files that are not
10796 ;; included.
10797 #:tests? #f
10798 #:phases
10799 (modify-phases %standard-phases
10800 (add-after 'unpack 'fix-permissions
10801 (lambda _ (make-file-writable "src/inc/localEnvironment.mk") #t))
10802 (add-after 'unpack 'fix-paths
10803 (lambda _
10804 (substitute* "Makefile"
10805 (("/bin/echo") (which "echo")))
10806 #t))
10807 (add-after 'unpack 'prepare-samtabix
10808 (lambda* (#:key inputs #:allow-other-keys)
10809 (copy-recursively (assoc-ref inputs "samtabix")
10810 "samtabix")
10811 #t))
10812 (delete 'configure)
10813 (replace 'install
10814 (lambda* (#:key outputs #:allow-other-keys)
10815 (let ((bin (string-append (assoc-ref outputs "out")
10816 "/bin")))
10817 (copy-recursively "bin" bin))
10818 #t)))))
10819 (native-inputs
10820 `(("samtabix"
10821 ,(let ((commit "10fd107909c1ac4d679299908be4262a012965ba"))
10822 (origin
10823 (method git-fetch)
10824 (uri (git-reference
10825 (url "http://genome-source.cse.ucsc.edu/samtabix.git")
10826 (commit commit)))
10827 (file-name (git-file-name "samtabix" (string-take commit 7)))
10828 (sha256
10829 (base32
10830 "0c1nj64l42v395sa84n7az43xiap4i6f9n9dfz4058aqiwkhkmma")))))))
10831 (inputs
10832 `(("zlib" ,zlib)
10833 ("tcsh" ,tcsh)
10834 ("perl" ,perl)
10835 ("libpng" ,libpng)
10836 ("mariadb-dev" ,mariadb "dev")
10837 ("openssl" ,openssl)))
10838 (home-page "https://genome.cse.ucsc.edu/index.html")
10839 (synopsis "Assorted bioinformatics utilities")
10840 (description "This package provides the kentUtils, a selection of
10841 bioinformatics utilities used in combination with the UCSC genome
10842 browser.")
10843 ;; Only a subset of the sources are released under a non-copyleft
10844 ;; free software license. All other sources are removed in a
10845 ;; snippet. See this bug report for an explanation of how the
10846 ;; license statements apply:
10847 ;; https://github.com/ENCODE-DCC/kentUtils/issues/12
10848 (license (license:non-copyleft
10849 "http://genome.ucsc.edu/license/"
10850 "The contents of this package are free for all uses."))))
10851
10852 (define-public f-seq
10853 (let ((commit "6ccded34cff38cf432deed8503648b4a66953f9b")
10854 (revision "1"))
10855 (package
10856 (name "f-seq")
10857 (version (string-append "1.1-" revision "." (string-take commit 7)))
10858 (source (origin
10859 (method git-fetch)
10860 (uri (git-reference
10861 (url "https://github.com/aboyle/F-seq")
10862 (commit commit)))
10863 (file-name (string-append name "-" version))
10864 (sha256
10865 (base32
10866 "1nk33k0yajg2id4g59bc4szr58r2q6pdq42vgcw054m8ip9wv26h"))
10867 (modules '((guix build utils)))
10868 ;; Remove bundled Java library archives.
10869 (snippet
10870 '(begin
10871 (for-each delete-file (find-files "lib" ".*"))
10872 #t))))
10873 (build-system ant-build-system)
10874 (arguments
10875 `(#:tests? #f ; no tests included
10876 #:phases
10877 (modify-phases %standard-phases
10878 (replace 'install
10879 (lambda* (#:key inputs outputs #:allow-other-keys)
10880 (let* ((target (assoc-ref outputs "out"))
10881 (bin (string-append target "/bin"))
10882 (doc (string-append target "/share/doc/f-seq"))
10883 (lib (string-append target "/lib")))
10884 (mkdir-p target)
10885 (mkdir-p doc)
10886 (substitute* "bin/linux/fseq"
10887 (("java") (which "java"))
10888 (("\\$REALDIR/../lib/commons-cli-1.1.jar")
10889 (string-append (assoc-ref inputs "java-commons-cli")
10890 "/share/java/commons-cli.jar"))
10891 (("REALDIR=.*")
10892 (string-append "REALDIR=" bin "\n")))
10893 (install-file "README.txt" doc)
10894 (install-file "bin/linux/fseq" bin)
10895 (install-file "build~/fseq.jar" lib)
10896 (copy-recursively "lib" lib)
10897 #t))))))
10898 (inputs
10899 `(("perl" ,perl)
10900 ("java-commons-cli" ,java-commons-cli)))
10901 (home-page "https://fureylab.web.unc.edu/software/fseq/")
10902 (synopsis "Feature density estimator for high-throughput sequence tags")
10903 (description
10904 "F-Seq is a software package that generates a continuous tag sequence
10905 density estimation allowing identification of biologically meaningful sites
10906 such as transcription factor binding sites (ChIP-seq) or regions of open
10907 chromatin (DNase-seq). Output can be displayed directly in the UCSC Genome
10908 Browser.")
10909 (license license:gpl3+))))
10910
10911 (define-public bismark
10912 (package
10913 (name "bismark")
10914 (version "0.20.1")
10915 (source
10916 (origin
10917 (method git-fetch)
10918 (uri (git-reference
10919 (url "https://github.com/FelixKrueger/Bismark")
10920 (commit version)))
10921 (file-name (string-append name "-" version "-checkout"))
10922 (sha256
10923 (base32
10924 "0xchm3rgilj6vfjnyzfzzymfd7djr64sbrmrvs3njbwi66jqbzw9"))))
10925 (build-system perl-build-system)
10926 (arguments
10927 `(#:tests? #f ; there are no tests
10928 #:modules ((guix build utils)
10929 (ice-9 popen)
10930 (srfi srfi-26)
10931 (guix build perl-build-system))
10932 #:phases
10933 (modify-phases %standard-phases
10934 ;; The bundled plotly.js is minified.
10935 (add-after 'unpack 'replace-plotly.js
10936 (lambda* (#:key inputs #:allow-other-keys)
10937 (let* ((file (assoc-ref inputs "plotly.js"))
10938 (installed "plotly/plotly.js"))
10939 (let ((minified (open-pipe* OPEN_READ "uglify-js" file)))
10940 (call-with-output-file installed
10941 (cut dump-port minified <>))))
10942 #t))
10943 (delete 'configure)
10944 (delete 'build)
10945 (replace 'install
10946 (lambda* (#:key inputs outputs #:allow-other-keys)
10947 (let* ((out (assoc-ref outputs "out"))
10948 (bin (string-append out "/bin"))
10949 (share (string-append out "/share/bismark"))
10950 (docdir (string-append out "/share/doc/bismark"))
10951 (docs '("Docs/Bismark_User_Guide.html"))
10952 (scripts '("bismark"
10953 "bismark_genome_preparation"
10954 "bismark_methylation_extractor"
10955 "bismark2bedGraph"
10956 "bismark2report"
10957 "coverage2cytosine"
10958 "deduplicate_bismark"
10959 "filter_non_conversion"
10960 "bam2nuc"
10961 "bismark2summary"
10962 "NOMe_filtering")))
10963 (substitute* "bismark2report"
10964 (("\\$RealBin/plotly")
10965 (string-append share "/plotly")))
10966 (mkdir-p share)
10967 (mkdir-p docdir)
10968 (mkdir-p bin)
10969 (for-each (lambda (file) (install-file file bin))
10970 scripts)
10971 (for-each (lambda (file) (install-file file docdir))
10972 docs)
10973 (copy-recursively "Docs/Images" (string-append docdir "/Images"))
10974 (copy-recursively "plotly"
10975 (string-append share "/plotly"))
10976
10977 ;; Fix references to gunzip
10978 (substitute* (map (lambda (file)
10979 (string-append bin "/" file))
10980 scripts)
10981 (("\"gunzip -c")
10982 (string-append "\"" (assoc-ref inputs "gzip")
10983 "/bin/gunzip -c")))
10984 #t))))))
10985 (inputs
10986 `(("gzip" ,gzip)
10987 ("perl-carp" ,perl-carp)
10988 ("perl-getopt-long" ,perl-getopt-long)))
10989 (native-inputs
10990 `(("plotly.js"
10991 ,(origin
10992 (method url-fetch)
10993 (uri (string-append "https://raw.githubusercontent.com/plotly/plotly.js/"
10994 "v1.39.4/dist/plotly.js"))
10995 (sha256
10996 (base32 "138mwsr4nf5qif4mrxx286mpnagxd1xwl6k8aidrjgknaqg88zyr"))))
10997 ("uglify-js" ,uglify-js)))
10998 (home-page "https://www.bioinformatics.babraham.ac.uk/projects/bismark/")
10999 (synopsis "Map bisulfite treated sequence reads and analyze methylation")
11000 (description "Bismark is a program to map bisulfite treated sequencing
11001 reads to a genome of interest and perform methylation calls in a single step.
11002 The output can be easily imported into a genome viewer, such as SeqMonk, and
11003 enables a researcher to analyse the methylation levels of their samples
11004 straight away. Its main features are:
11005
11006 @itemize
11007 @item Bisulfite mapping and methylation calling in one single step
11008 @item Supports single-end and paired-end read alignments
11009 @item Supports ungapped and gapped alignments
11010 @item Alignment seed length, number of mismatches etc are adjustable
11011 @item Output discriminates between cytosine methylation in CpG, CHG
11012 and CHH context
11013 @end itemize\n")
11014 (license license:gpl3+)))
11015
11016 (define-public paml
11017 (package
11018 (name "paml")
11019 (version "4.9e")
11020 (source (origin
11021 (method url-fetch)
11022 (uri (string-append "http://abacus.gene.ucl.ac.uk/software/"
11023 "paml" version ".tgz"))
11024 (sha256
11025 (base32
11026 "13zf6h9fiqghwhch2h06x1zdr6s42plsnqahflp5g7myr3han3s6"))
11027 (modules '((guix build utils)))
11028 ;; Remove Windows binaries
11029 (snippet
11030 '(begin
11031 (for-each delete-file (find-files "." "\\.exe$"))
11032 ;; Some files in the original tarball have restrictive
11033 ;; permissions, which makes repackaging fail
11034 (for-each (lambda (file) (chmod file #o644)) (find-files "."))
11035 #t))))
11036 (build-system gnu-build-system)
11037 (arguments
11038 `(#:tests? #f ; there are no tests
11039 #:make-flags '("CC=gcc")
11040 #:phases
11041 (modify-phases %standard-phases
11042 (replace 'configure
11043 (lambda _
11044 (substitute* "src/BFdriver.c"
11045 (("/bin/bash") (which "bash")))
11046 (chdir "src")
11047 #t))
11048 (replace 'install
11049 (lambda* (#:key outputs #:allow-other-keys)
11050 (let ((tools '("baseml" "basemlg" "codeml"
11051 "pamp" "evolver" "yn00" "chi2"))
11052 (bin (string-append (assoc-ref outputs "out") "/bin"))
11053 (docdir (string-append (assoc-ref outputs "out")
11054 "/share/doc/paml")))
11055 (mkdir-p bin)
11056 (for-each (lambda (file) (install-file file bin)) tools)
11057 (copy-recursively "../doc" docdir)
11058 #t))))))
11059 (home-page "http://abacus.gene.ucl.ac.uk/software/paml.html")
11060 (synopsis "Phylogentic analysis by maximum likelihood")
11061 (description "PAML (for Phylogentic Analysis by Maximum Likelihood)
11062 contains a few programs for model fitting and phylogenetic tree reconstruction
11063 using nucleotide or amino-acid sequence data.")
11064 ;; GPLv3 only
11065 (license license:gpl3)))
11066
11067 (define-public kallisto
11068 (package
11069 (name "kallisto")
11070 (version "0.46.2")
11071 (source (origin
11072 (method git-fetch)
11073 (uri (git-reference
11074 (url "https://github.com/pachterlab/kallisto")
11075 (commit (string-append "v" version))))
11076 (file-name (git-file-name name version))
11077 (sha256
11078 (base32
11079 "0ij5n7v3m90jdfi7sn8nvglfyf58abp1f5xq42r4k73l0lfds6xi"))
11080 (modules '((guix build utils)))
11081 (snippet
11082 '(delete-file-recursively "ext/htslib/"))))
11083 (build-system cmake-build-system)
11084 (arguments
11085 `(#:tests? #f ; no "check" target
11086 #:phases
11087 (modify-phases %standard-phases
11088 (add-after 'unpack 'do-not-use-bundled-htslib
11089 (lambda _
11090 (substitute* "CMakeLists.txt"
11091 (("^ExternalProject_Add" m)
11092 (string-append "if (NEVER)\n" m))
11093 (("^\\)")
11094 (string-append ")\nendif(NEVER)"))
11095 (("include_directories\\(\\$\\{htslib_PREFIX.*" m)
11096 (string-append "# " m)))
11097 (substitute* "src/CMakeLists.txt"
11098 (("target_link_libraries\\(kallisto kallisto_core pthread \
11099 \\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/../ext/htslib/libhts.a\\)")
11100 "target_link_libraries(kallisto kallisto_core pthread hts)")
11101 (("include_directories\\(\\.\\./ext/htslib\\)") ""))
11102 #t)))))
11103 (inputs
11104 `(("hdf5" ,hdf5)
11105 ("htslib" ,htslib-1.9)
11106 ("zlib" ,zlib)))
11107 (home-page "https://pachterlab.github.io/kallisto/")
11108 (synopsis "Near-optimal RNA-Seq quantification")
11109 (description
11110 "Kallisto is a program for quantifying abundances of transcripts from
11111 RNA-Seq data, or more generally of target sequences using high-throughput
11112 sequencing reads. It is based on the novel idea of pseudoalignment for
11113 rapidly determining the compatibility of reads with targets, without the need
11114 for alignment. Pseudoalignment of reads preserves the key information needed
11115 for quantification, and kallisto is therefore not only fast, but also as
11116 accurate as existing quantification tools.")
11117 (license license:bsd-2)))
11118
11119 (define-public libgff
11120 (package
11121 (name "libgff")
11122 (version "1.0")
11123 (source (origin
11124 (method git-fetch)
11125 (uri (git-reference
11126 (url "https://github.com/Kingsford-Group/libgff")
11127 (commit (string-append "v" version))))
11128 (file-name (git-file-name name version))
11129 (sha256
11130 (base32
11131 "0n6vfjnq7a2mianipscbshrvbncss8z4zkgkbjw754p9043nfkps"))))
11132 (build-system cmake-build-system)
11133 (arguments `(#:tests? #f)) ; no tests included
11134 (home-page "https://github.com/Kingsford-Group/libgff")
11135 (synopsis "Parser library for reading/writing GFF files")
11136 (description "This is a simple \"libraryfication\" of the GFF/GTF parsing
11137 code that is used in the Cufflinks codebase. The goal of this library is to
11138 provide this functionality without the necessity of drawing in a heavy-weight
11139 dependency like SeqAn.")
11140 (license (license:x11-style "https://www.boost.org/LICENSE_1_0.txt"))))
11141
11142 (define-public sailfish
11143 (package
11144 (name "sailfish")
11145 (version "0.10.1")
11146 (source (origin
11147 (method git-fetch)
11148 (uri (git-reference
11149 (url "https://github.com/kingsfordgroup/sailfish")
11150 (commit (string-append "v" version))))
11151 (file-name (git-file-name name version))
11152 (sha256
11153 (base32
11154 "1amcc5hqvsl42hg4x19bi9vy47cl874s0lw1fmi0hwsdk9i8c03v"))
11155 (modules '((guix build utils)))
11156 (snippet
11157 '(begin
11158 ;; Delete bundled headers for eigen3.
11159 (delete-file-recursively "include/eigen3/")
11160 #t))))
11161 (build-system cmake-build-system)
11162 (arguments
11163 `(#:configure-flags
11164 (list (string-append "-DBOOST_INCLUDEDIR="
11165 (assoc-ref %build-inputs "boost")
11166 "/include/")
11167 (string-append "-DBOOST_LIBRARYDIR="
11168 (assoc-ref %build-inputs "boost")
11169 "/lib/")
11170 (string-append "-DBoost_LIBRARIES="
11171 "-lboost_iostreams "
11172 "-lboost_filesystem "
11173 "-lboost_system "
11174 "-lboost_thread "
11175 "-lboost_timer "
11176 "-lboost_chrono "
11177 "-lboost_program_options")
11178 "-DBoost_FOUND=TRUE"
11179 ;; Don't download RapMap---we already have it!
11180 "-DFETCHED_RAPMAP=1")
11181 ;; Tests must be run after installation and the location of the test
11182 ;; data file must be overridden. But the tests fail. It looks like
11183 ;; they are not really meant to be run.
11184 #:tests? #f
11185 #:phases
11186 (modify-phases %standard-phases
11187 ;; Boost cannot be found, even though it's right there.
11188 (add-after 'unpack 'do-not-look-for-boost
11189 (lambda* (#:key inputs #:allow-other-keys)
11190 (substitute* "CMakeLists.txt"
11191 (("find_package\\(Boost 1\\.53\\.0") "#"))
11192 #t))
11193 (add-after 'unpack 'do-not-assign-to-macro
11194 (lambda _
11195 (substitute* "include/spdlog/details/format.cc"
11196 (("const unsigned CHAR_WIDTH = 1;") ""))
11197 #t))
11198 (add-after 'unpack 'prepare-rapmap
11199 (lambda* (#:key inputs #:allow-other-keys)
11200 (let ((src "external/install/src/rapmap/")
11201 (include "external/install/include/rapmap/")
11202 (rapmap (assoc-ref inputs "rapmap")))
11203 (mkdir-p "/tmp/rapmap")
11204 (invoke "tar" "xf"
11205 (assoc-ref inputs "rapmap")
11206 "-C" "/tmp/rapmap"
11207 "--strip-components=1")
11208 (mkdir-p src)
11209 (mkdir-p include)
11210 (for-each (lambda (file)
11211 (install-file file src))
11212 (find-files "/tmp/rapmap/src" "\\.(c|cpp)"))
11213 (copy-recursively "/tmp/rapmap/include" include))
11214 #t))
11215 (add-after 'unpack 'use-system-libraries
11216 (lambda* (#:key inputs #:allow-other-keys)
11217 (substitute* '("src/SailfishIndexer.cpp"
11218 "src/SailfishUtils.cpp"
11219 "src/SailfishQuantify.cpp"
11220 "src/FASTAParser.cpp"
11221 "include/PCA.hpp"
11222 "include/SailfishUtils.hpp"
11223 "include/SailfishIndex.hpp"
11224 "include/CollapsedEMOptimizer.hpp"
11225 "src/CollapsedEMOptimizer.cpp")
11226 (("#include \"jellyfish/config.h\"") ""))
11227 (substitute* "src/CMakeLists.txt"
11228 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/include/jellyfish-2.2..")
11229 (string-append (assoc-ref inputs "jellyfish")
11230 "/include/jellyfish-" ,(package-version jellyfish)))
11231 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libjellyfish-2.0.a")
11232 (string-append (assoc-ref inputs "jellyfish")
11233 "/lib/libjellyfish-2.0.a"))
11234 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort.a")
11235 (string-append (assoc-ref inputs "libdivsufsort")
11236 "/lib/libdivsufsort.so"))
11237 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort64.a")
11238 (string-append (assoc-ref inputs "libdivsufsort")
11239 "/lib/libdivsufsort64.so")))
11240 (substitute* "CMakeLists.txt"
11241 ;; Don't prefer static libs
11242 (("SET\\(CMAKE_FIND_LIBRARY_SUFFIXES.*") "")
11243 (("find_package\\(Jellyfish.*") "")
11244 (("ExternalProject_Add\\(libjellyfish") "message(")
11245 (("ExternalProject_Add\\(libgff") "message(")
11246 (("ExternalProject_Add\\(libsparsehash") "message(")
11247 (("ExternalProject_Add\\(libdivsufsort") "message("))
11248
11249 ;; Ensure that Eigen headers can be found
11250 (setenv "CPLUS_INCLUDE_PATH"
11251 (string-append (assoc-ref inputs "eigen")
11252 "/include/eigen3:"
11253 (or (getenv "CPLUS_INCLUDE_PATH") "")))
11254 #t)))))
11255 (inputs
11256 `(("boost" ,boost)
11257 ("eigen" ,eigen)
11258 ("jemalloc" ,jemalloc)
11259 ("jellyfish" ,jellyfish)
11260 ("sparsehash" ,sparsehash)
11261 ("rapmap" ,(origin
11262 (method git-fetch)
11263 (uri (git-reference
11264 (url "https://github.com/COMBINE-lab/RapMap")
11265 (commit (string-append "sf-v" version))))
11266 (file-name (string-append "rapmap-sf-v" version "-checkout"))
11267 (sha256
11268 (base32
11269 "1hv79l5i576ykv5a1srj2p0q36yvyl5966m0fcy2lbi169ipjakf"))
11270 (modules '((guix build utils)))
11271 ;; These files are expected to be excluded.
11272 (snippet
11273 '(begin (delete-file-recursively "include/spdlog")
11274 (for-each delete-file '("include/xxhash.h"
11275 "src/xxhash.c"))
11276 #t))))
11277 ("libdivsufsort" ,libdivsufsort)
11278 ("libgff" ,libgff)
11279 ("tbb" ,tbb)
11280 ("zlib" ,zlib)))
11281 (native-inputs
11282 `(("pkg-config" ,pkg-config)))
11283 (home-page "https://www.cs.cmu.edu/~ckingsf/software/sailfish/")
11284 (synopsis "Mapping-based isoform quantification from RNA-Seq reads")
11285 (description "Sailfish is a tool for genomic transcript quantification
11286 from RNA-seq data. It requires a set of target transcripts (either from a
11287 reference or de-novo assembly) to quantify. All you need to run sailfish is a
11288 fasta file containing your reference transcripts and a (set of) fasta/fastq
11289 file(s) containing your reads.")
11290 (license license:gpl3+)))
11291
11292 (define libstadenio-for-salmon
11293 (package
11294 (name "libstadenio")
11295 (version "1.14.8")
11296 (source (origin
11297 (method git-fetch)
11298 (uri (git-reference
11299 (url "https://github.com/COMBINE-lab/staden-io_lib")
11300 (commit (string-append "v" version))))
11301 (file-name (string-append name "-" version "-checkout"))
11302 (sha256
11303 (base32
11304 "1x8kxxqxl892vwfbprlbyfwkkv7c34ggkc94892x9x0g37x5nbwx"))))
11305 (build-system gnu-build-system)
11306 (arguments '(#:parallel-tests? #f)) ; not supported
11307 (inputs
11308 `(("zlib" ,zlib)))
11309 (native-inputs
11310 `(("perl" ,perl))) ; for tests
11311 (home-page "https://github.com/COMBINE-lab/staden-io_lib")
11312 (synopsis "General purpose trace and experiment file library")
11313 (description "This package provides a library of file reading and writing
11314 code to provide a general purpose Trace file (and Experiment File) reading
11315 interface.
11316
11317 The following file formats are supported:
11318
11319 @enumerate
11320 @item SCF trace files
11321 @item ABI trace files
11322 @item ALF trace files
11323 @item ZTR trace files
11324 @item SFF trace archives
11325 @item SRF trace archives
11326 @item Experiment files
11327 @item Plain text files
11328 @item SAM/BAM sequence files
11329 @item CRAM sequence files
11330 @end enumerate\n")
11331 (license license:bsd-3)))
11332
11333 (define-public salmon
11334 (package
11335 (name "salmon")
11336 (version "0.13.1")
11337 (source (origin
11338 (method git-fetch)
11339 (uri (git-reference
11340 (url "https://github.com/COMBINE-lab/salmon")
11341 (commit (string-append "v" version))))
11342 (file-name (git-file-name name version))
11343 (sha256
11344 (base32
11345 "1i2z4aivicmiixdz9bxalp7vmfzi3k92fxa63iqa8kgvfw5a4aq5"))
11346 (modules '((guix build utils)))
11347 (snippet
11348 '(begin
11349 ;; Delete bundled headers for eigen3.
11350 (delete-file-recursively "include/eigen3/")
11351 #t))))
11352 (build-system cmake-build-system)
11353 (arguments
11354 `(#:configure-flags
11355 (list (string-append "-DBOOST_INCLUDEDIR="
11356 (assoc-ref %build-inputs "boost")
11357 "/include/")
11358 (string-append "-DBOOST_LIBRARYDIR="
11359 (assoc-ref %build-inputs "boost")
11360 "/lib/")
11361 (string-append "-DBoost_LIBRARIES="
11362 "-lboost_iostreams "
11363 "-lboost_filesystem "
11364 "-lboost_system "
11365 "-lboost_thread "
11366 "-lboost_timer "
11367 "-lboost_chrono "
11368 "-lboost_program_options")
11369 "-DBoost_FOUND=TRUE"
11370 "-DTBB_LIBRARIES=tbb tbbmalloc"
11371 ;; Don't download RapMap---we already have it!
11372 "-DFETCHED_RAPMAP=1")
11373 #:phases
11374 (modify-phases %standard-phases
11375 ;; Boost cannot be found, even though it's right there.
11376 (add-after 'unpack 'do-not-look-for-boost
11377 (lambda* (#:key inputs #:allow-other-keys)
11378 (substitute* "CMakeLists.txt"
11379 (("find_package\\(Boost 1\\.59\\.0") "#"))
11380 #t))
11381 (add-after 'unpack 'do-not-phone-home
11382 (lambda _
11383 (substitute* "src/Salmon.cpp"
11384 (("getVersionMessage\\(\\)") "\"\""))
11385 #t))
11386 (add-after 'unpack 'prepare-rapmap
11387 (lambda* (#:key inputs #:allow-other-keys)
11388 (let ((src "external/install/src/rapmap/")
11389 (include "external/install/include/rapmap/")
11390 (rapmap (assoc-ref inputs "rapmap")))
11391 (mkdir-p src)
11392 (mkdir-p include)
11393 (copy-recursively (string-append rapmap "/src") src)
11394 (copy-recursively (string-append rapmap "/include") include)
11395 (for-each delete-file '("external/install/include/rapmap/xxhash.h"
11396 "external/install/include/rapmap/FastxParser.hpp"
11397 "external/install/include/rapmap/concurrentqueue.h"
11398 "external/install/include/rapmap/FastxParserThreadUtils.hpp"
11399 "external/install/src/rapmap/FastxParser.cpp"
11400 "external/install/src/rapmap/xxhash.c"))
11401 (delete-file-recursively "external/install/include/rapmap/spdlog"))
11402 #t))
11403 (add-after 'unpack 'use-system-libraries
11404 (lambda* (#:key inputs #:allow-other-keys)
11405 (substitute* "CMakeLists.txt"
11406 ;; Don't prefer static libs
11407 (("SET\\(CMAKE_FIND_LIBRARY_SUFFIXES.*") "")
11408 (("set\\(TBB_LIBRARIES") "message(")
11409 ;; Don't download anything
11410 (("DOWNLOAD_COMMAND") "DOWNLOAD_COMMAND echo")
11411 (("externalproject_add\\(libcereal") "message(")
11412 (("externalproject_add\\(libgff") "message(")
11413 (("externalproject_add\\(libtbb") "message(")
11414 (("externalproject_add\\(libdivsufsort") "message(")
11415 (("externalproject_add\\(libstadenio") "message(")
11416 (("externalproject_add_step\\(") "message("))
11417 (substitute* "src/CMakeLists.txt"
11418 (("add_dependencies") "#")
11419 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libstaden-read.a")
11420 (string-append (assoc-ref inputs "libstadenio-for-salmon")
11421 "/lib/libstaden-read.so"))
11422 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort.a")
11423 (string-append (assoc-ref inputs "libdivsufsort")
11424 "/lib/libdivsufsort.so"))
11425 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort64.a")
11426 (string-append (assoc-ref inputs "libdivsufsort")
11427 "/lib/libdivsufsort64.so"))
11428 (("lib/libdivsufsort.a") "/lib/libdivsufsort.so"))
11429
11430 ;; Ensure that all headers can be found
11431 (setenv "CPLUS_INCLUDE_PATH"
11432 (string-append (or (getenv "CPLUS_INCLUDE_PATH") "")
11433 ":"
11434 (assoc-ref inputs "eigen")
11435 "/include/eigen3"))
11436 #t))
11437 ;; CMAKE_INSTALL_PREFIX does not exist when the tests are
11438 ;; run. It only exists after the install phase.
11439 (add-after 'unpack 'fix-tests
11440 (lambda _
11441 (substitute* "src/CMakeLists.txt"
11442 (("DTOPLEVEL_DIR=\\$\\{CMAKE_INSTALL_PREFIX")
11443 "DTOPLEVEL_DIR=${GAT_SOURCE_DIR"))
11444 #t)))))
11445 (inputs
11446 `(("boost" ,boost)
11447 ("bzip2" ,bzip2)
11448 ("cereal" ,cereal)
11449 ("eigen" ,eigen)
11450 ("rapmap" ,(origin
11451 (method git-fetch)
11452 (uri (git-reference
11453 (url "https://github.com/COMBINE-lab/RapMap")
11454 (commit (string-append "salmon-v" version))))
11455 (file-name (string-append "rapmap-salmon-v" version "-checkout"))
11456 (sha256
11457 (base32
11458 "1biplxf0csc7a8h1wf219b0vmjkvw6wk2zylhdklb577kgmihdms"))))
11459 ("jemalloc" ,jemalloc)
11460 ("libgff" ,libgff)
11461 ("tbb" ,tbb)
11462 ("libdivsufsort" ,libdivsufsort)
11463 ("libstadenio-for-salmon" ,libstadenio-for-salmon)
11464 ("xz" ,xz)
11465 ("zlib" ,zlib)))
11466 (native-inputs
11467 `(("pkg-config" ,pkg-config)))
11468 (home-page "https://github.com/COMBINE-lab/salmon")
11469 (synopsis "Quantification from RNA-seq reads using lightweight alignments")
11470 (description "Salmon is a program to produce highly-accurate,
11471 transcript-level quantification estimates from RNA-seq data. Salmon achieves
11472 its accuracy and speed via a number of different innovations, including the
11473 use of lightweight alignments (accurate but fast-to-compute proxies for
11474 traditional read alignments) and massively-parallel stochastic collapsed
11475 variational inference.")
11476 (license license:gpl3+)))
11477
11478 (define-public python-loompy
11479 (package
11480 (name "python-loompy")
11481 (version "2.0.17")
11482 ;; The tarball on Pypi does not include the tests.
11483 (source (origin
11484 (method git-fetch)
11485 (uri (git-reference
11486 (url "https://github.com/linnarsson-lab/loompy")
11487 (commit version)))
11488 (file-name (git-file-name name version))
11489 (sha256
11490 (base32
11491 "12a5kjgiikapv93wahfw0frszx1lblnppyz3vs5gy8fgmgngra07"))))
11492 (build-system python-build-system)
11493 (arguments
11494 `(#:phases
11495 (modify-phases %standard-phases
11496 (replace 'check
11497 (lambda _
11498 (setenv "PYTHONPATH"
11499 (string-append (getcwd) ":"
11500 (getenv "PYTHONPATH")))
11501 (invoke "pytest" "tests")
11502 #t)))))
11503 (propagated-inputs
11504 `(("python-h5py" ,python-h5py)
11505 ("python-numpy" ,python-numpy)
11506 ("python-pandas" ,python-pandas)
11507 ("python-scipy" ,python-scipy)))
11508 (native-inputs
11509 `(("python-pytest" ,python-pytest)))
11510 (home-page "https://github.com/linnarsson-lab/loompy")
11511 (synopsis "Work with .loom files for single-cell RNA-seq data")
11512 (description "The loom file format is an efficient format for very large
11513 omics datasets, consisting of a main matrix, optional additional layers, a
11514 variable number of row and column annotations. Loom also supports sparse
11515 graphs. This library makes it easy to work with @file{.loom} files for
11516 single-cell RNA-seq data.")
11517 (license license:bsd-3)))
11518
11519 ;; We cannot use the latest commit because it requires Java 9.
11520 (define-public java-forester
11521 (let ((commit "86b07efe302d5094b42deed9260f719a4c4ac2e6")
11522 (revision "1"))
11523 (package
11524 (name "java-forester")
11525 (version (string-append "0-" revision "." (string-take commit 7)))
11526 (source (origin
11527 (method git-fetch)
11528 (uri (git-reference
11529 (url "https://github.com/cmzmasek/forester")
11530 (commit commit)))
11531 (file-name (string-append name "-" version "-checkout"))
11532 (sha256
11533 (base32
11534 "0vxavc1yrf84yrnf20dq26hi0lglidk8d382xrxsy4qmlbjd276z"))
11535 (modules '((guix build utils)))
11536 (snippet
11537 '(begin
11538 ;; Delete bundled jars and pre-built classes
11539 (delete-file-recursively "forester/java/resources")
11540 (delete-file-recursively "forester/java/classes")
11541 (for-each delete-file (find-files "forester/java/" "\\.jar$"))
11542 ;; Delete bundled applications
11543 (delete-file-recursively "forester_applications")
11544 #t))))
11545 (build-system ant-build-system)
11546 (arguments
11547 `(#:tests? #f ; there are none
11548 #:jdk ,icedtea-8
11549 #:modules ((guix build ant-build-system)
11550 (guix build utils)
11551 (guix build java-utils)
11552 (sxml simple)
11553 (sxml transform))
11554 #:phases
11555 (modify-phases %standard-phases
11556 (add-after 'unpack 'chdir
11557 (lambda _ (chdir "forester/java") #t))
11558 (add-after 'chdir 'fix-dependencies
11559 (lambda _
11560 (chmod "build.xml" #o664)
11561 (call-with-output-file "build.xml.new"
11562 (lambda (port)
11563 (sxml->xml
11564 (pre-post-order
11565 (with-input-from-file "build.xml"
11566 (lambda _ (xml->sxml #:trim-whitespace? #t)))
11567 `(;; Remove all unjar tags to avoid repacking classes.
11568 (unjar . ,(lambda _ '()))
11569 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
11570 (*text* . ,(lambda (_ txt) txt))))
11571 port)))
11572 (rename-file "build.xml.new" "build.xml")
11573 #t))
11574 ;; FIXME: itext is difficult to package as it depends on a few
11575 ;; unpackaged libraries.
11576 (add-after 'chdir 'remove-dependency-on-unpackaged-itext
11577 (lambda _
11578 (delete-file "src/org/forester/archaeopteryx/PdfExporter.java")
11579 (substitute* "src/org/forester/archaeopteryx/MainFrame.java"
11580 (("pdf_written_to = PdfExporter.*")
11581 "throw new IOException(\"PDF export is not available.\");"))
11582 #t))
11583 ;; There is no install target
11584 (replace 'install (install-jars ".")))))
11585 (propagated-inputs
11586 `(("java-commons-codec" ,java-commons-codec)
11587 ("java-openchart2" ,java-openchart2)))
11588 (home-page "https://sites.google.com/site/cmzmasek/home/software/forester")
11589 (synopsis "Phylogenomics libraries for Java")
11590 (description "Forester is a collection of Java libraries for
11591 phylogenomics and evolutionary biology research. It includes support for
11592 reading, writing, and exporting phylogenetic trees.")
11593 (license license:lgpl2.1+))))
11594
11595 (define-public java-forester-1.005
11596 (package
11597 (name "java-forester")
11598 (version "1.005")
11599 (source (origin
11600 (method url-fetch)
11601 (uri (string-append "https://repo1.maven.org/maven2/"
11602 "org/biojava/thirdparty/forester/"
11603 version "/forester-" version "-sources.jar"))
11604 (file-name (string-append name "-" version ".jar"))
11605 (sha256
11606 (base32
11607 "04r8qv4rk3p71z4ajrvp11py1z46qrx0047j3zzs79s6lnsm3lcv"))))
11608 (build-system ant-build-system)
11609 (arguments
11610 `(#:tests? #f ; there are none
11611 #:jdk ,icedtea-8
11612 #:modules ((guix build ant-build-system)
11613 (guix build utils)
11614 (guix build java-utils)
11615 (sxml simple)
11616 (sxml transform))
11617 #:phases
11618 (modify-phases %standard-phases
11619 (add-after 'unpack 'fix-dependencies
11620 (lambda* (#:key inputs #:allow-other-keys)
11621 (call-with-output-file "build.xml"
11622 (lambda (port)
11623 (sxml->xml
11624 (pre-post-order
11625 (with-input-from-file "src/build.xml"
11626 (lambda _ (xml->sxml #:trim-whitespace? #t)))
11627 `(;; Remove all unjar tags to avoid repacking classes.
11628 (unjar . ,(lambda _ '()))
11629 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
11630 (*text* . ,(lambda (_ txt) txt))))
11631 port)))
11632 (copy-file (assoc-ref inputs "synth_look_and_feel_1.xml")
11633 "synth_look_and_feel_1.xml")
11634 (copy-file (assoc-ref inputs "phyloxml.xsd")
11635 "phyloxml.xsd")
11636 (substitute* "build.xml"
11637 (("../resources/synth_laf/synth_look_and_feel_1.xml")
11638 "synth_look_and_feel_1.xml")
11639 (("../resources/phyloxml_schema/1.10/phyloxml.xsd")
11640 "phyloxml.xsd"))
11641 #t))
11642 ;; FIXME: itext is difficult to package as it depends on a few
11643 ;; unpackaged libraries.
11644 (add-after 'unpack 'remove-dependency-on-unpackaged-itext
11645 (lambda _
11646 (delete-file "src/org/forester/archaeopteryx/PdfExporter.java")
11647 (substitute* '("src/org/forester/archaeopteryx/MainFrame.java"
11648 "src/org/forester/archaeopteryx/MainFrameApplication.java")
11649 (("pdf_written_to = PdfExporter.*")
11650 "throw new IOException(\"PDF export is not available.\"); /*")
11651 ((".getPrintSizeX\\(\\), getOptions\\(\\).getPrintSizeY\\(\\) \\);") "*/")
11652 (("getCurrentTreePanel\\(\\).getHeight\\(\\) \\);") "*/"))
11653 #t))
11654 (add-after 'unpack 'delete-pre-built-classes
11655 (lambda _ (delete-file-recursively "src/classes") #t))
11656 ;; There is no install target
11657 (replace 'install (install-jars ".")))))
11658 (propagated-inputs
11659 `(("java-commons-codec" ,java-commons-codec)
11660 ("java-openchart2" ,java-openchart2)))
11661 ;; The source archive does not contain the resources.
11662 (native-inputs
11663 `(("phyloxml.xsd"
11664 ,(origin
11665 (method url-fetch)
11666 (uri (string-append "https://raw.githubusercontent.com/cmzmasek/forester/"
11667 "b61cc2dcede0bede317db362472333115756b8c6/"
11668 "forester/resources/phyloxml_schema/1.10/phyloxml.xsd"))
11669 (file-name (string-append name "-phyloxml-" version ".xsd"))
11670 (sha256
11671 (base32
11672 "1zxc4m8sn4n389nqdnpxa8d0k17qnr3pm2y5y6g6vh4k0zm52npv"))))
11673 ("synth_look_and_feel_1.xml"
11674 ,(origin
11675 (method url-fetch)
11676 (uri (string-append "https://raw.githubusercontent.com/cmzmasek/forester/"
11677 "29e04321615da6b35c1e15c60e52caf3f21d8e6a/"
11678 "forester/java/classes/resources/"
11679 "synth_look_and_feel_1.xml"))
11680 (file-name (string-append name "-synth-look-and-feel-" version ".xml"))
11681 (sha256
11682 (base32
11683 "1gv5602gv4k7y7713y75a4jvj7i9s7nildsbdl7n9q10sc2ikg8h"))))))
11684 (home-page "https://sites.google.com/site/cmzmasek/home/software/forester")
11685 (synopsis "Phylogenomics libraries for Java")
11686 (description "Forester is a collection of Java libraries for
11687 phylogenomics and evolutionary biology research. It includes support for
11688 reading, writing, and exporting phylogenetic trees.")
11689 (license license:lgpl2.1+)))
11690
11691 (define-public java-biojava-core
11692 (package
11693 (name "java-biojava-core")
11694 (version "4.2.11")
11695 (source (origin
11696 (method git-fetch)
11697 (uri (git-reference
11698 (url "https://github.com/biojava/biojava")
11699 (commit (string-append "biojava-" version))))
11700 (file-name (string-append name "-" version "-checkout"))
11701 (sha256
11702 (base32
11703 "1bvryh2bpsvash8ln79cmc9sqm8qw72hz4xzwqxcrjm8ssxszhqk"))))
11704 (build-system ant-build-system)
11705 (arguments
11706 `(#:jdk ,icedtea-8
11707 #:jar-name "biojava-core.jar"
11708 #:source-dir "biojava-core/src/main/java/"
11709 #:test-dir "biojava-core/src/test"
11710 ;; These tests seem to require internet access.
11711 #:test-exclude (list "**/SearchIOTest.java"
11712 "**/BlastXMLParserTest.java"
11713 "**/GenbankCookbookTest.java"
11714 "**/GenbankProxySequenceReaderTest.java")
11715 #:phases
11716 (modify-phases %standard-phases
11717 (add-before 'build 'copy-resources
11718 (lambda _
11719 (copy-recursively "biojava-core/src/main/resources"
11720 "build/classes")
11721 #t))
11722 (add-before 'check 'copy-test-resources
11723 (lambda _
11724 (copy-recursively "biojava-core/src/test/resources"
11725 "build/test-classes")
11726 #t)))))
11727 (propagated-inputs
11728 `(("java-log4j-api" ,java-log4j-api)
11729 ("java-log4j-core" ,java-log4j-core)
11730 ("java-slf4j-api" ,java-slf4j-api)
11731 ("java-slf4j-simple" ,java-slf4j-simple)))
11732 (native-inputs
11733 `(("java-junit" ,java-junit)
11734 ("java-hamcrest-core" ,java-hamcrest-core)))
11735 (home-page "https://biojava.org")
11736 (synopsis "Core libraries of Java framework for processing biological data")
11737 (description "BioJava is a project dedicated to providing a Java framework
11738 for processing biological data. It provides analytical and statistical
11739 routines, parsers for common file formats, reference implementations of
11740 popular algorithms, and allows the manipulation of sequences and 3D
11741 structures. The goal of the biojava project is to facilitate rapid
11742 application development for bioinformatics.
11743
11744 This package provides the core libraries.")
11745 (license license:lgpl2.1+)))
11746
11747 (define-public java-biojava-phylo
11748 (package (inherit java-biojava-core)
11749 (name "java-biojava-phylo")
11750 (build-system ant-build-system)
11751 (arguments
11752 `(#:jdk ,icedtea-8
11753 #:jar-name "biojava-phylo.jar"
11754 #:source-dir "biojava-phylo/src/main/java/"
11755 #:test-dir "biojava-phylo/src/test"
11756 #:phases
11757 (modify-phases %standard-phases
11758 (add-before 'build 'copy-resources
11759 (lambda _
11760 (copy-recursively "biojava-phylo/src/main/resources"
11761 "build/classes")
11762 #t))
11763 (add-before 'check 'copy-test-resources
11764 (lambda _
11765 (copy-recursively "biojava-phylo/src/test/resources"
11766 "build/test-classes")
11767 #t)))))
11768 (propagated-inputs
11769 `(("java-log4j-api" ,java-log4j-api)
11770 ("java-log4j-core" ,java-log4j-core)
11771 ("java-slf4j-api" ,java-slf4j-api)
11772 ("java-slf4j-simple" ,java-slf4j-simple)
11773 ("java-biojava-core" ,java-biojava-core)
11774 ("java-forester" ,java-forester)))
11775 (native-inputs
11776 `(("java-junit" ,java-junit)
11777 ("java-hamcrest-core" ,java-hamcrest-core)))
11778 (home-page "https://biojava.org")
11779 (synopsis "Biojava interface to the forester phylogenomics library")
11780 (description "The phylo module provides a biojava interface layer to the
11781 forester phylogenomics library for constructing phylogenetic trees.")))
11782
11783 (define-public java-biojava-alignment
11784 (package (inherit java-biojava-core)
11785 (name "java-biojava-alignment")
11786 (build-system ant-build-system)
11787 (arguments
11788 `(#:jdk ,icedtea-8
11789 #:jar-name "biojava-alignment.jar"
11790 #:source-dir "biojava-alignment/src/main/java/"
11791 #:test-dir "biojava-alignment/src/test"
11792 #:phases
11793 (modify-phases %standard-phases
11794 (add-before 'build 'copy-resources
11795 (lambda _
11796 (copy-recursively "biojava-alignment/src/main/resources"
11797 "build/classes")
11798 #t))
11799 (add-before 'check 'copy-test-resources
11800 (lambda _
11801 (copy-recursively "biojava-alignment/src/test/resources"
11802 "build/test-classes")
11803 #t)))))
11804 (propagated-inputs
11805 `(("java-log4j-api" ,java-log4j-api)
11806 ("java-log4j-core" ,java-log4j-core)
11807 ("java-slf4j-api" ,java-slf4j-api)
11808 ("java-slf4j-simple" ,java-slf4j-simple)
11809 ("java-biojava-core" ,java-biojava-core)
11810 ("java-biojava-phylo" ,java-biojava-phylo)
11811 ("java-forester" ,java-forester)))
11812 (native-inputs
11813 `(("java-junit" ,java-junit)
11814 ("java-hamcrest-core" ,java-hamcrest-core)))
11815 (home-page "https://biojava.org")
11816 (synopsis "Biojava API for genetic sequence alignment")
11817 (description "The alignment module of BioJava provides an API that
11818 contains
11819
11820 @itemize
11821 @item implementations of dynamic programming algorithms for sequence
11822 alignment;
11823 @item reading and writing of popular alignment file formats;
11824 @item a single-, or multi- threaded multiple sequence alignment algorithm.
11825 @end itemize\n")))
11826
11827 (define-public java-biojava-core-4.0
11828 (package (inherit java-biojava-core)
11829 (name "java-biojava-core")
11830 (version "4.0.0")
11831 (source (origin
11832 (method git-fetch)
11833 (uri (git-reference
11834 (url "https://github.com/biojava/biojava")
11835 (commit (string-append "biojava-" version))))
11836 (file-name (string-append name "-" version "-checkout"))
11837 (sha256
11838 (base32
11839 "13675f6y9aqi7bi2lk3s1z7a22ynccjiqwa8izh7p97xi9wsfmd8"))))))
11840
11841 (define-public java-biojava-phylo-4.0
11842 (package (inherit java-biojava-core-4.0)
11843 (name "java-biojava-phylo")
11844 (build-system ant-build-system)
11845 (arguments
11846 `(#:jdk ,icedtea-8
11847 #:jar-name "biojava-phylo.jar"
11848 #:source-dir "biojava-phylo/src/main/java/"
11849 #:test-dir "biojava-phylo/src/test"
11850 #:phases
11851 (modify-phases %standard-phases
11852 (add-before 'build 'copy-resources
11853 (lambda _
11854 (copy-recursively "biojava-phylo/src/main/resources"
11855 "build/classes")
11856 #t))
11857 (add-before 'check 'copy-test-resources
11858 (lambda _
11859 (copy-recursively "biojava-phylo/src/test/resources"
11860 "build/test-classes")
11861 #t)))))
11862 (propagated-inputs
11863 `(("java-log4j-api" ,java-log4j-api)
11864 ("java-log4j-core" ,java-log4j-core)
11865 ("java-slf4j-api" ,java-slf4j-api)
11866 ("java-slf4j-simple" ,java-slf4j-simple)
11867 ("java-biojava-core" ,java-biojava-core-4.0)
11868 ("java-forester" ,java-forester-1.005)))
11869 (native-inputs
11870 `(("java-junit" ,java-junit)
11871 ("java-hamcrest-core" ,java-hamcrest-core)))
11872 (home-page "https://biojava.org")
11873 (synopsis "Biojava interface to the forester phylogenomics library")
11874 (description "The phylo module provides a biojava interface layer to the
11875 forester phylogenomics library for constructing phylogenetic trees.")))
11876
11877 (define-public java-biojava-alignment-4.0
11878 (package (inherit java-biojava-core-4.0)
11879 (name "java-biojava-alignment")
11880 (build-system ant-build-system)
11881 (arguments
11882 `(#:jdk ,icedtea-8
11883 #:jar-name "biojava-alignment.jar"
11884 #:source-dir "biojava-alignment/src/main/java/"
11885 #:test-dir "biojava-alignment/src/test"
11886 #:phases
11887 (modify-phases %standard-phases
11888 (add-before 'build 'copy-resources
11889 (lambda _
11890 (copy-recursively "biojava-alignment/src/main/resources"
11891 "build/classes")
11892 #t))
11893 (add-before 'check 'copy-test-resources
11894 (lambda _
11895 (copy-recursively "biojava-alignment/src/test/resources"
11896 "build/test-classes")
11897 #t)))))
11898 (propagated-inputs
11899 `(("java-log4j-api" ,java-log4j-api)
11900 ("java-log4j-core" ,java-log4j-core)
11901 ("java-slf4j-api" ,java-slf4j-api)
11902 ("java-slf4j-simple" ,java-slf4j-simple)
11903 ("java-biojava-core" ,java-biojava-core-4.0)
11904 ("java-biojava-phylo" ,java-biojava-phylo-4.0)
11905 ("java-forester" ,java-forester-1.005)))
11906 (native-inputs
11907 `(("java-junit" ,java-junit)
11908 ("java-hamcrest-core" ,java-hamcrest-core)))
11909 (home-page "https://biojava.org")
11910 (synopsis "Biojava API for genetic sequence alignment")
11911 (description "The alignment module of BioJava provides an API that
11912 contains
11913
11914 @itemize
11915 @item implementations of dynamic programming algorithms for sequence
11916 alignment;
11917 @item reading and writing of popular alignment file formats;
11918 @item a single-, or multi- threaded multiple sequence alignment algorithm.
11919 @end itemize\n")))
11920
11921 (define-public dropseq-tools
11922 (package
11923 (name "dropseq-tools")
11924 (version "1.13")
11925 (source
11926 (origin
11927 (method url-fetch)
11928 (uri "http://mccarrolllab.com/download/1276/")
11929 (file-name (string-append "dropseq-tools-" version ".zip"))
11930 (sha256
11931 (base32
11932 "0yrffckxqk5l8b5xb6z4laq157zd9mdypr2p4b4vq2bhjzi1sj0s"))
11933 ;; Delete bundled libraries
11934 (modules '((guix build utils)))
11935 (snippet
11936 '(begin
11937 (for-each delete-file (find-files "jar/lib" "\\.jar$"))
11938 (delete-file-recursively "3rdParty")
11939 #t))))
11940 (build-system ant-build-system)
11941 (arguments
11942 `(#:tests? #f ; test data are not included
11943 #:test-target "test"
11944 #:build-target "all"
11945 #:source-dir "public/src/"
11946 #:jdk ,icedtea-8
11947 #:make-flags
11948 (list (string-append "-Dpicard.executable.dir="
11949 (assoc-ref %build-inputs "java-picard")
11950 "/share/java/"))
11951 #:modules ((ice-9 match)
11952 (srfi srfi-1)
11953 (guix build utils)
11954 (guix build java-utils)
11955 (guix build ant-build-system))
11956 #:phases
11957 (modify-phases %standard-phases
11958 ;; FIXME: fails with "java.io.FileNotFoundException:
11959 ;; /gnu/store/…-dropseq-tools-1.13/share/java/lib/biojava-alignment.jar"
11960 (delete 'generate-jar-indices)
11961 ;; All dependencies must be linked to "lib", because that's where
11962 ;; they will be searched for when the Class-Path property of the
11963 ;; manifest is computed.
11964 (add-after 'unpack 'record-references
11965 (lambda* (#:key inputs #:allow-other-keys)
11966 (mkdir-p "jar/lib")
11967 (let ((dirs (filter-map (match-lambda
11968 ((name . dir)
11969 (if (and (string-prefix? "java-" name)
11970 (not (string=? name "java-testng")))
11971 dir #f)))
11972 inputs)))
11973 (for-each (lambda (jar)
11974 (symlink jar (string-append "jar/lib/" (basename jar))))
11975 (append-map (lambda (dir) (find-files dir "\\.jar$"))
11976 dirs)))
11977 #t))
11978 ;; There is no installation target
11979 (replace 'install
11980 (lambda* (#:key inputs outputs #:allow-other-keys)
11981 (let* ((out (assoc-ref outputs "out"))
11982 (bin (string-append out "/bin"))
11983 (share (string-append out "/share/java/"))
11984 (lib (string-append share "/lib/"))
11985 (scripts (list "BAMTagHistogram"
11986 "BAMTagofTagCounts"
11987 "BaseDistributionAtReadPosition"
11988 "CollapseBarcodesInPlace"
11989 "CollapseTagWithContext"
11990 "ConvertToRefFlat"
11991 "CreateIntervalsFiles"
11992 "DetectBeadSynthesisErrors"
11993 "DigitalExpression"
11994 "Drop-seq_alignment.sh"
11995 "FilterBAM"
11996 "FilterBAMByTag"
11997 "GatherGeneGCLength"
11998 "GatherMolecularBarcodeDistributionByGene"
11999 "GatherReadQualityMetrics"
12000 "PolyATrimmer"
12001 "ReduceGTF"
12002 "SelectCellsByNumTranscripts"
12003 "SingleCellRnaSeqMetricsCollector"
12004 "TagBamWithReadSequenceExtended"
12005 "TagReadWithGeneExon"
12006 "TagReadWithInterval"
12007 "TrimStartingSequence"
12008 "ValidateReference")))
12009 (for-each mkdir-p (list bin share lib))
12010 (install-file "dist/dropseq.jar" share)
12011 (for-each (lambda (script)
12012 (chmod script #o555)
12013 (install-file script bin))
12014 scripts)
12015 (substitute* (map (lambda (script)
12016 (string-append bin "/" script))
12017 scripts)
12018 (("^java") (which "java"))
12019 (("jar_deploy_dir=.*")
12020 (string-append "jar_deploy_dir=" share "\n"))))
12021 #t))
12022 ;; FIXME: We do this after stripping jars because we don't want it to
12023 ;; copy all these jars and strip them. We only want to install
12024 ;; links. Arguably, this is a problem with the ant-build-system.
12025 (add-after 'strip-jar-timestamps 'install-links
12026 (lambda* (#:key outputs #:allow-other-keys)
12027 (let* ((out (assoc-ref outputs "out"))
12028 (share (string-append out "/share/java/"))
12029 (lib (string-append share "/lib/")))
12030 (for-each (lambda (jar)
12031 (symlink (readlink jar)
12032 (string-append lib (basename jar))))
12033 (find-files "jar/lib" "\\.jar$")))
12034 #t)))))
12035 (inputs
12036 `(("jdk" ,icedtea-8)
12037 ("java-picard" ,java-picard-2.10.3)
12038 ("java-log4j-1.2-api" ,java-log4j-1.2-api)
12039 ("java-commons-math3" ,java-commons-math3)
12040 ("java-commons-jexl2" ,java-commons-jexl-2)
12041 ("java-commons-collections4" ,java-commons-collections4)
12042 ("java-commons-lang2" ,java-commons-lang)
12043 ("java-commons-io" ,java-commons-io)
12044 ("java-snappy-1.0.3-rc3" ,java-snappy-1)
12045 ("java-guava" ,java-guava)
12046 ("java-la4j" ,java-la4j)
12047 ("java-biojava-core" ,java-biojava-core-4.0)
12048 ("java-biojava-alignment" ,java-biojava-alignment-4.0)
12049 ("java-jdistlib" ,java-jdistlib)
12050 ("java-simple-xml" ,java-simple-xml)
12051 ("java-snakeyaml" ,java-snakeyaml)))
12052 (native-inputs
12053 `(("unzip" ,unzip)
12054 ("java-testng" ,java-testng)))
12055 (home-page "http://mccarrolllab.com/dropseq/")
12056 (synopsis "Tools for Drop-seq analyses")
12057 (description "Drop-seq is a technology to enable biologists to
12058 analyze RNA expression genome-wide in thousands of individual cells at
12059 once. This package provides tools to perform Drop-seq analyses.")
12060 (license license:expat)))
12061
12062 (define-public pigx-rnaseq
12063 (package
12064 (name "pigx-rnaseq")
12065 (version "0.0.10")
12066 (source (origin
12067 (method url-fetch)
12068 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_rnaseq/"
12069 "releases/download/v" version
12070 "/pigx_rnaseq-" version ".tar.gz"))
12071 (sha256
12072 (base32
12073 "0z3hr120wk2vrlmlpz1vp3n9wy3rq4y2mnzh2vf08qgqn2xfdwcw"))))
12074 (build-system gnu-build-system)
12075 (arguments
12076 `(#:parallel-tests? #f ; not supported
12077 #:phases
12078 (modify-phases %standard-phases
12079 ;; "test.sh" runs STAR, which requires excessive amounts of memory.
12080 (add-after 'unpack 'disable-resource-intensive-test
12081 (lambda _
12082 (substitute* "Makefile.in"
12083 (("(^ tests/test_trim_galore/test.sh).*" _ m) m)
12084 (("^ tests/test_multiqc/test.sh") "")
12085 (("^ test.sh") ""))
12086 #t)))))
12087 (inputs
12088 `(("coreutils" ,coreutils)
12089 ("sed" ,sed)
12090 ("gzip" ,gzip)
12091 ("snakemake" ,snakemake)
12092 ("fastqc" ,fastqc)
12093 ("multiqc" ,multiqc)
12094 ("star" ,star-for-pigx)
12095 ("trim-galore" ,trim-galore)
12096 ("htseq" ,htseq)
12097 ("samtools" ,samtools)
12098 ("r-minimal" ,r-minimal)
12099 ("r-rmarkdown" ,r-rmarkdown)
12100 ("r-ggplot2" ,r-ggplot2)
12101 ("r-ggrepel" ,r-ggrepel)
12102 ("r-gprofiler" ,r-gprofiler)
12103 ("r-deseq2" ,r-deseq2)
12104 ("r-dt" ,r-dt)
12105 ("r-knitr" ,r-knitr)
12106 ("r-pheatmap" ,r-pheatmap)
12107 ("r-corrplot" ,r-corrplot)
12108 ("r-reshape2" ,r-reshape2)
12109 ("r-plotly" ,r-plotly)
12110 ("r-scales" ,r-scales)
12111 ("r-summarizedexperiment" ,r-summarizedexperiment)
12112 ("r-crosstalk" ,r-crosstalk)
12113 ("r-tximport" ,r-tximport)
12114 ("r-rtracklayer" ,r-rtracklayer)
12115 ("r-rjson" ,r-rjson)
12116 ("salmon" ,salmon)
12117 ("pandoc" ,pandoc)
12118 ("pandoc-citeproc" ,pandoc-citeproc)
12119 ("python-wrapper" ,python-wrapper)
12120 ("python-pyyaml" ,python-pyyaml)))
12121 (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
12122 (synopsis "Analysis pipeline for RNA sequencing experiments")
12123 (description "PiGX RNAseq is an analysis pipeline for preprocessing and
12124 reporting for RNA sequencing experiments. It is easy to use and produces high
12125 quality reports. The inputs are reads files from the sequencing experiment,
12126 and a configuration file which describes the experiment. In addition to
12127 quality control of the experiment, the pipeline produces a differential
12128 expression report comparing samples in an easily configurable manner.")
12129 (license license:gpl3+)))
12130
12131 (define-public pigx-chipseq
12132 (package
12133 (name "pigx-chipseq")
12134 (version "0.0.51")
12135 (source (origin
12136 (method url-fetch)
12137 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_chipseq/"
12138 "releases/download/v" version
12139 "/pigx_chipseq-" version ".tar.gz"))
12140 (sha256
12141 (base32
12142 "0bb6hzpl0qq0jd57pgd1m5ns547rfipr6071a4m12vxlm4nlpi5q"))))
12143 (build-system gnu-build-system)
12144 ;; parts of the tests rely on access to the network
12145 (arguments '(#:tests? #f))
12146 (inputs
12147 `(("grep" ,grep)
12148 ("coreutils" ,coreutils)
12149 ("r-minimal" ,r-minimal)
12150 ("r-argparser" ,r-argparser)
12151 ("r-biocparallel" ,r-biocparallel)
12152 ("r-biostrings" ,r-biostrings)
12153 ("r-chipseq" ,r-chipseq)
12154 ("r-corrplot" ,r-corrplot)
12155 ("r-data-table" ,r-data-table)
12156 ("r-deseq2" ,r-deseq2)
12157 ("r-dplyr" ,r-dplyr)
12158 ("r-dt" ,r-dt)
12159 ("r-genomation" ,r-genomation)
12160 ("r-genomicalignments" ,r-genomicalignments)
12161 ("r-genomicranges" ,r-genomicranges)
12162 ("r-ggplot2" ,r-ggplot2)
12163 ("r-ggrepel" ,r-ggrepel)
12164 ("r-gprofiler2" ,r-gprofiler2)
12165 ("r-heatmaply" ,r-heatmaply)
12166 ("r-htmlwidgets" ,r-htmlwidgets)
12167 ("r-jsonlite" ,r-jsonlite)
12168 ("r-pheatmap" ,r-pheatmap)
12169 ("r-plotly" ,r-plotly)
12170 ("r-rmarkdown" ,r-rmarkdown)
12171 ("r-rsamtools" ,r-rsamtools)
12172 ("r-rsubread" ,r-rsubread)
12173 ("r-rtracklayer" ,r-rtracklayer)
12174 ("r-s4vectors" ,r-s4vectors)
12175 ("r-stringr" ,r-stringr)
12176 ("r-tibble" ,r-tibble)
12177 ("r-tidyr" ,r-tidyr)
12178 ("python-wrapper" ,python-wrapper)
12179 ("python-pyyaml" ,python-pyyaml)
12180 ("python-magic" ,python-magic)
12181 ("python-xlrd" ,python-xlrd)
12182 ("trim-galore" ,trim-galore)
12183 ("macs" ,macs)
12184 ("multiqc" ,multiqc)
12185 ("perl" ,perl)
12186 ("pandoc" ,pandoc)
12187 ("pandoc-citeproc" ,pandoc-citeproc)
12188 ("fastqc" ,fastqc)
12189 ("bowtie" ,bowtie)
12190 ("idr" ,idr)
12191 ("snakemake" ,snakemake)
12192 ("samtools" ,samtools)
12193 ("bedtools" ,bedtools)
12194 ("kentutils" ,kentutils)))
12195 (native-inputs
12196 `(("python-pytest" ,python-pytest)))
12197 (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
12198 (synopsis "Analysis pipeline for ChIP sequencing experiments")
12199 (description "PiGX ChIPseq is an analysis pipeline for preprocessing, peak
12200 calling and reporting for ChIP sequencing experiments. It is easy to use and
12201 produces high quality reports. The inputs are reads files from the sequencing
12202 experiment, and a configuration file which describes the experiment. In
12203 addition to quality control of the experiment, the pipeline enables to set up
12204 multiple peak calling analysis and allows the generation of a UCSC track hub
12205 in an easily configurable manner.")
12206 (license license:gpl3+)))
12207
12208 (define-public pigx-bsseq
12209 (package
12210 (name "pigx-bsseq")
12211 (version "0.1.2")
12212 (source (origin
12213 (method url-fetch)
12214 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_bsseq/"
12215 "releases/download/v" version
12216 "/pigx_bsseq-" version ".tar.gz"))
12217 (sha256
12218 (base32
12219 "0mpzlay2d5cjpmrcp7knff6rg1c2mqszd638n7lw0mc0cycbp9f8"))))
12220 (build-system gnu-build-system)
12221 (arguments
12222 `(;; TODO: tests currently require 12+GB of RAM. See
12223 ;; https://github.com/BIMSBbioinfo/pigx_bsseq/issues/164
12224 #:tests? #f
12225 #:phases
12226 (modify-phases %standard-phases
12227 (add-before 'check 'set-timezone
12228 ;; The readr package is picky about timezones.
12229 (lambda* (#:key inputs #:allow-other-keys)
12230 (setenv "TZ" "UTC+1")
12231 (setenv "TZDIR"
12232 (string-append (assoc-ref inputs "tzdata")
12233 "/share/zoneinfo"))
12234 #t)))))
12235 (native-inputs
12236 `(("tzdata" ,tzdata)))
12237 (inputs
12238 `(("coreutils" ,coreutils)
12239 ("sed" ,sed)
12240 ("grep" ,grep)
12241 ("r-minimal" ,r-minimal)
12242 ("r-annotationhub" ,r-annotationhub)
12243 ("r-dt" ,r-dt)
12244 ("r-genomation" ,r-genomation)
12245 ("r-ggrepel" ,r-ggrepel)
12246 ("r-methylkit" ,r-methylkit)
12247 ("r-rtracklayer" ,r-rtracklayer)
12248 ("r-rmarkdown" ,r-rmarkdown)
12249 ("r-bookdown" ,r-bookdown)
12250 ("r-ggplot2" ,r-ggplot2)
12251 ("r-ggbio" ,r-ggbio)
12252 ("pandoc" ,pandoc)
12253 ("pandoc-citeproc" ,pandoc-citeproc)
12254 ("python-wrapper" ,python-wrapper)
12255 ("python-pyyaml" ,python-pyyaml)
12256 ("snakemake" ,snakemake)
12257 ("bismark" ,bismark)
12258 ("bowtie" ,bowtie)
12259 ("bwa-meth" ,bwa-meth)
12260 ("fastqc" ,fastqc)
12261 ("methyldackel" ,methyldackel)
12262 ("multiqc" ,multiqc)
12263 ("trim-galore" ,trim-galore)
12264 ("cutadapt" ,cutadapt)
12265 ("samblaster" ,samblaster)
12266 ("samtools" ,samtools)))
12267 (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
12268 (synopsis "Bisulfite sequencing pipeline from fastq to methylation reports")
12269 (description "PiGx BSseq is a data processing pipeline for raw fastq read
12270 data of bisulfite experiments; it produces reports on aggregate methylation
12271 and coverage and can be used to produce information on differential
12272 methylation and segmentation.")
12273 (license license:gpl3+)))
12274
12275 (define-public pigx-scrnaseq
12276 (package
12277 (name "pigx-scrnaseq")
12278 (version "1.1.7")
12279 (source (origin
12280 (method url-fetch)
12281 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_scrnaseq/"
12282 "releases/download/v" version
12283 "/pigx_scrnaseq-" version ".tar.gz"))
12284 (sha256
12285 (base32
12286 "1h5mcxzwj3cidlkvy9ly5wmi48vwfsjf8dxjfirknqxr9a92hwlx"))))
12287 (build-system gnu-build-system)
12288 (inputs
12289 `(("coreutils" ,coreutils)
12290 ("perl" ,perl)
12291 ("fastqc" ,fastqc)
12292 ("flexbar" ,flexbar)
12293 ("java" ,icedtea-8)
12294 ("jellyfish" ,jellyfish)
12295 ("python-wrapper" ,python-wrapper)
12296 ("python-pyyaml" ,python-pyyaml)
12297 ("python-pandas" ,python-pandas)
12298 ("python-magic" ,python-magic)
12299 ("python-numpy" ,python-numpy)
12300 ("python-loompy" ,python-loompy)
12301 ("pandoc" ,pandoc)
12302 ("pandoc-citeproc" ,pandoc-citeproc)
12303 ("samtools" ,samtools)
12304 ("snakemake" ,snakemake)
12305 ("star" ,star-for-pigx)
12306 ("r-minimal" ,r-minimal)
12307 ("r-argparser" ,r-argparser)
12308 ("r-cowplot" ,r-cowplot)
12309 ("r-data-table" ,r-data-table)
12310 ("r-delayedarray" ,r-delayedarray)
12311 ("r-delayedmatrixstats" ,r-delayedmatrixstats)
12312 ("r-dplyr" ,r-dplyr)
12313 ("r-dropbead" ,r-dropbead)
12314 ("r-dt" ,r-dt)
12315 ("r-genomicalignments" ,r-genomicalignments)
12316 ("r-genomicfiles" ,r-genomicfiles)
12317 ("r-genomicranges" ,r-genomicranges)
12318 ("r-ggplot2" ,r-ggplot2)
12319 ("r-hdf5array" ,r-hdf5array)
12320 ("r-pheatmap" ,r-pheatmap)
12321 ("r-rmarkdown" ,r-rmarkdown)
12322 ("r-rsamtools" ,r-rsamtools)
12323 ("r-rtracklayer" ,r-rtracklayer)
12324 ("r-rtsne" ,r-rtsne)
12325 ("r-scater" ,r-scater)
12326 ("r-scran" ,r-scran)
12327 ("r-seurat" ,r-seurat)
12328 ("r-singlecellexperiment" ,r-singlecellexperiment)
12329 ("r-stringr" ,r-stringr)
12330 ("r-yaml" ,r-yaml)))
12331 (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
12332 (synopsis "Analysis pipeline for single-cell RNA sequencing experiments")
12333 (description "PiGX scRNAseq is an analysis pipeline for preprocessing and
12334 quality control for single cell RNA sequencing experiments. The inputs are
12335 read files from the sequencing experiment, and a configuration file which
12336 describes the experiment. It produces processed files for downstream analysis
12337 and interactive quality reports. The pipeline is designed to work with UMI
12338 based methods.")
12339 (license license:gpl3+)))
12340
12341 (define-public pigx
12342 (package
12343 (name "pigx")
12344 (version "0.0.3")
12345 (source (origin
12346 (method url-fetch)
12347 (uri (string-append "https://github.com/BIMSBbioinfo/pigx/"
12348 "releases/download/v" version
12349 "/pigx-" version ".tar.gz"))
12350 (sha256
12351 (base32
12352 "1i5njdy1clj5ncw45d16p7mwmqvb1ilikl9n797pxklc3f4s7mq7"))))
12353 (build-system gnu-build-system)
12354 (inputs
12355 `(("python" ,python)
12356 ("pigx-bsseq" ,pigx-bsseq)
12357 ("pigx-chipseq" ,pigx-chipseq)
12358 ("pigx-rnaseq" ,pigx-rnaseq)
12359 ("pigx-scrnaseq" ,pigx-scrnaseq)))
12360 (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
12361 (synopsis "Analysis pipelines for genomics")
12362 (description "PiGx is a collection of genomics pipelines. It includes the
12363 following pipelines:
12364
12365 @itemize
12366 @item PiGx BSseq for raw fastq read data of bisulfite experiments
12367 @item PiGx RNAseq for RNAseq samples
12368 @item PiGx scRNAseq for single cell dropseq analysis
12369 @item PiGx ChIPseq for reads from ChIPseq experiments
12370 @end itemize
12371
12372 All pipelines are easily configured with a simple sample sheet and a
12373 descriptive settings file. The result is a set of comprehensive, interactive
12374 HTML reports with interesting findings about your samples.")
12375 (license license:gpl3+)))
12376
12377 (define-public genrich
12378 (package
12379 (name "genrich")
12380 (version "0.5")
12381 (source (origin
12382 (method git-fetch)
12383 (uri (git-reference
12384 (url "https://github.com/jsh58/Genrich")
12385 (commit (string-append "v" version))))
12386 (file-name (git-file-name name version))
12387 (sha256
12388 (base32
12389 "0x0q6z0208n3cxzqjla4rgjqpyqgwpmz27852lcvzkzaigymq4zp"))))
12390 (build-system gnu-build-system)
12391 (arguments
12392 `(#:tests? #f ; there are none
12393 #:phases
12394 (modify-phases %standard-phases
12395 (delete 'configure)
12396 (replace 'install
12397 (lambda* (#:key outputs #:allow-other-keys)
12398 (install-file "Genrich" (string-append (assoc-ref outputs "out") "/bin"))
12399 #t)))))
12400 (inputs
12401 `(("zlib" ,zlib)))
12402 (home-page "https://github.com/jsh58/Genrich")
12403 (synopsis "Detecting sites of genomic enrichment")
12404 (description "Genrich is a peak-caller for genomic enrichment
12405 assays (e.g. ChIP-seq, ATAC-seq). It analyzes alignment files generated
12406 following the assay and produces a file detailing peaks of significant
12407 enrichment.")
12408 (license license:expat)))
12409
12410 (define-public mantis
12411 (let ((commit "4ffd171632c2cb0056a86d709dfd2bf21bc69b84")
12412 (revision "1"))
12413 (package
12414 (name "mantis")
12415 (version (git-version "0" revision commit))
12416 (source (origin
12417 (method git-fetch)
12418 (uri (git-reference
12419 (url "https://github.com/splatlab/mantis")
12420 (commit commit)))
12421 (file-name (git-file-name name version))
12422 (sha256
12423 (base32
12424 "0iqbr0dhmlc8mzpirmm2s4pkzkwdgrcx50yx6cv3wlr2qi064p55"))))
12425 (build-system cmake-build-system)
12426 (arguments '(#:tests? #f)) ; there are none
12427 (inputs
12428 `(("sdsl-lite" ,sdsl-lite)
12429 ("openssl" ,openssl)
12430 ("zlib" ,zlib)))
12431 (home-page "https://github.com/splatlab/mantis")
12432 (synopsis "Large-scale sequence-search index data structure")
12433 (description "Mantis is a space-efficient data structure that can be
12434 used to index thousands of raw-read genomics experiments and facilitate
12435 large-scale sequence searches on those experiments. Mantis uses counting
12436 quotient filters instead of Bloom filters, enabling rapid index builds and
12437 queries, small indexes, and exact results, i.e., no false positives or
12438 negatives. Furthermore, Mantis is also a colored de Bruijn graph
12439 representation, so it supports fast graph traversal and other topological
12440 analyses in addition to large-scale sequence-level searches.")
12441 ;; uses __uint128_t and inline assembly
12442 (supported-systems '("x86_64-linux"))
12443 (license license:bsd-3))))
12444
12445 (define-public sjcount
12446 ;; There is no tag for version 3.2, nor is there a release archive.
12447 (let ((commit "292d3917cadb3f6834c81e509c30e61cd7ead6e5")
12448 (revision "1"))
12449 (package
12450 (name "sjcount")
12451 (version (git-version "3.2" revision commit))
12452 (source (origin
12453 (method git-fetch)
12454 (uri (git-reference
12455 (url "https://github.com/pervouchine/sjcount-full")
12456 (commit commit)))
12457 (file-name (string-append name "-" version "-checkout"))
12458 (sha256
12459 (base32
12460 "0gdgj35j249f04rqgq8ymcc1xg1vi9kzbajnjqpaq2wpbh8bl234"))))
12461 (build-system gnu-build-system)
12462 (arguments
12463 `(#:tests? #f ; requires a 1.4G test file
12464 #:make-flags
12465 (list (string-append "SAMTOOLS_DIR="
12466 (assoc-ref %build-inputs "samtools")
12467 "/lib/"))
12468 #:phases
12469 (modify-phases %standard-phases
12470 (replace 'configure
12471 (lambda* (#:key inputs #:allow-other-keys)
12472 (substitute* "makefile"
12473 (("-I \\$\\{SAMTOOLS_DIR\\}")
12474 (string-append "-I" (assoc-ref inputs "samtools")
12475 "/include/samtools"))
12476 (("-lz ") "-lz -lpthread "))
12477 #t))
12478 (replace 'install
12479 (lambda* (#:key outputs #:allow-other-keys)
12480 (for-each (lambda (tool)
12481 (install-file tool
12482 (string-append (assoc-ref outputs "out")
12483 "/bin")))
12484 '("j_count" "b_count" "sjcount"))
12485 #t)))))
12486 (inputs
12487 `(("samtools" ,samtools-0.1)
12488 ("zlib" ,zlib)))
12489 (home-page "https://github.com/pervouchine/sjcount-full/")
12490 (synopsis "Annotation-agnostic splice junction counting pipeline")
12491 (description "Sjcount is a utility for fast quantification of splice
12492 junctions in RNA-seq data. It is annotation-agnostic and offset-aware. This
12493 version does count multisplits.")
12494 (license license:gpl3+))))
12495
12496 (define-public minimap2
12497 (package
12498 (name "minimap2")
12499 (version "2.17")
12500 (source
12501 (origin
12502 (method url-fetch)
12503 (uri (string-append "https://github.com/lh3/minimap2/"
12504 "releases/download/v" version "/"
12505 "minimap2-" version ".tar.bz2"))
12506 (sha256
12507 (base32
12508 "0hi7i9pzxhvjj44khzzzj1lrn5gb5837arr4wgln7k1k5n4ci2mn"))
12509 (patches (search-patches "minimap2-aarch64-support.patch"))))
12510 (build-system gnu-build-system)
12511 (arguments
12512 `(#:tests? #f ; there are none
12513 #:make-flags
12514 (list (string-append "CC=" ,(cc-for-target))
12515 (let ((system ,(or (%current-target-system)
12516 (%current-system))))
12517 (cond
12518 ((string-prefix? "x86_64" system)
12519 "all")
12520 ((or (string-prefix? "i586" system)
12521 (string-prefix? "i686" system))
12522 "sse2only=1")
12523 ((string-prefix? "armhf" system)
12524 "arm_neon=1")
12525 ((string-prefix? "aarch64" system)
12526 "aarch64=1")
12527 (else ""))))
12528 #:phases
12529 (modify-phases %standard-phases
12530 (delete 'configure)
12531 (replace 'install
12532 (lambda* (#:key outputs #:allow-other-keys)
12533 (let* ((out (assoc-ref outputs "out"))
12534 (bin (string-append out "/bin"))
12535 (man (string-append out "/share/man/man1")))
12536 (install-file "minimap2" bin)
12537 (mkdir-p man)
12538 (install-file "minimap2.1" man))
12539 #t)))))
12540 (inputs
12541 `(("zlib" ,zlib)))
12542 (home-page "https://lh3.github.io/minimap2/")
12543 (synopsis "Pairwise aligner for genomic and spliced nucleotide sequences")
12544 (description "Minimap2 is a versatile sequence alignment program that
12545 aligns DNA or mRNA sequences against a large reference database. Typical use
12546 cases include:
12547
12548 @enumerate
12549 @item mapping PacBio or Oxford Nanopore genomic reads to the human genome;
12550 @item finding overlaps between long reads with error rate up to ~15%;
12551 @item splice-aware alignment of PacBio Iso-Seq or Nanopore cDNA or Direct RNA
12552 reads against a reference genome;
12553 @item aligning Illumina single- or paired-end reads;
12554 @item assembly-to-assembly alignment;
12555 @item full-genome alignment between two closely related species with
12556 divergence below ~15%.
12557 @end enumerate\n")
12558 (license license:expat)))
12559
12560 (define-public miniasm
12561 (package
12562 (name "miniasm")
12563 (version "0.3")
12564 (source (origin
12565 (method git-fetch)
12566 (uri (git-reference
12567 (url "https://github.com/lh3/miniasm")
12568 (commit (string-append "v" version))))
12569 (file-name (git-file-name name version))
12570 (sha256
12571 (base32
12572 "04dv5wv8bhsw1imxwyd438bnn9kby7svp44nbcz8lsadzjjci5gs"))))
12573 (build-system gnu-build-system)
12574 (inputs
12575 `(("zlib" ,zlib)))
12576 (arguments
12577 `(#:tests? #f ; There are no tests.
12578 #:phases
12579 (modify-phases %standard-phases
12580 (delete 'configure)
12581 (replace 'install
12582 (lambda* (#:key inputs outputs #:allow-other-keys)
12583 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
12584 (install-file "miniasm" bin)
12585 (install-file "minidot" bin)
12586 #t))))))
12587 (home-page "https://github.com/lh3/miniasm")
12588 (synopsis "Ultrafast de novo assembly for long noisy reads")
12589 (description "Miniasm is a very fast OLC-based de novo assembler for noisy
12590 long reads. It takes all-vs-all read self-mappings (typically by minimap) as
12591 input and outputs an assembly graph in the GFA format. Different from
12592 mainstream assemblers, miniasm does not have a consensus step. It simply
12593 concatenates pieces of read sequences to generate the final unitig sequences.
12594 Thus the per-base error rate is similar to the raw input reads.")
12595 (license license:expat)))
12596
12597 (define-public bandage
12598 (package
12599 (name "bandage")
12600 (version "0.8.1")
12601 (source
12602 (origin
12603 (method git-fetch)
12604 (uri (git-reference
12605 (url "https://github.com/rrwick/Bandage")
12606 (commit (string-append "v" version))))
12607 (file-name (git-file-name name version))
12608 (sha256
12609 (base32 "1bbsn5f5x8wlspg4pbibqz6m5vin8c19nl224f3z3km0pkc97rwv"))))
12610 (build-system qt-build-system)
12611 (arguments
12612 `(#:phases
12613 (modify-phases %standard-phases
12614 (replace 'configure
12615 (lambda _
12616 (invoke "qmake" "Bandage.pro")))
12617 (replace 'check
12618 (lambda* (#:key tests? #:allow-other-keys)
12619 (when tests?
12620 (substitute* "tests/bandage_command_line_tests.sh"
12621 (("^bandagepath=.*")
12622 (string-append "bandagepath=" (getcwd) "/Bandage\n")))
12623 (with-directory-excursion "tests"
12624 (setenv "XDG_RUNTIME_DIR" (getcwd))
12625 (invoke "./bandage_command_line_tests.sh")))
12626 #t))
12627 (replace 'install
12628 (lambda* (#:key outputs #:allow-other-keys)
12629 (let ((out (assoc-ref outputs "out")))
12630 (install-file "Bandage" (string-append out "/bin"))
12631 #t))))))
12632 (inputs
12633 `(("qtbase" ,qtbase)
12634 ("qtsvg" ,qtsvg)))
12635 (native-inputs
12636 `(("imagemagick" ,imagemagick)))
12637 (home-page "https://rrwick.github.io/Bandage/")
12638 (synopsis
12639 "Bioinformatics Application for Navigating De novo Assembly Graphs Easily")
12640 (description "Bandage is a program for visualising de novo assembly graphs.
12641 It allows users to interact with the assembly graphs made by de novo assemblers
12642 such as Velvet, SPAdes, MEGAHIT and others. De novo assembly graphs contain not
12643 only assembled contigs but also the connections between those contigs, which
12644 were previously not easily accessible. Bandage visualises assembly graphs, with
12645 connections, using graph layout algorithms. Nodes in the drawn graph, which
12646 represent contigs, can be automatically labelled with their ID, length or depth.
12647 Users can interact with the graph by moving, labelling and colouring nodes.
12648 Sequence information can also be extracted directly from the graph viewer. By
12649 displaying connections between contigs, Bandage opens up new possibilities for
12650 analysing and improving de novo assemblies that are not possible by looking at
12651 contigs alone.")
12652 (license (list license:gpl2+ ; bundled ogdf
12653 license:gpl3+))))
12654
12655 (define-public r-circus
12656 (package
12657 (name "r-circus")
12658 (version "0.1.5")
12659 (source
12660 (origin
12661 (method git-fetch)
12662 (uri (git-reference
12663 (url "https://github.com/BIMSBbioinfo/ciRcus")
12664 (commit (string-append "v" version))))
12665 (file-name (git-file-name name version))
12666 (sha256
12667 (base32
12668 "0jhjn3ilb057hbf6yzrihj13ifxxs32y7nkby8l3lkm28dg4p97h"))))
12669 (build-system r-build-system)
12670 (propagated-inputs
12671 `(("r-annotationdbi" ,r-annotationdbi)
12672 ("r-annotationhub" ,r-annotationhub)
12673 ("r-biomart" ,r-biomart)
12674 ("r-data-table" ,r-data-table)
12675 ("r-dbi" ,r-dbi)
12676 ("r-genomicfeatures" ,r-genomicfeatures)
12677 ("r-genomicranges" ,r-genomicranges)
12678 ("r-ggplot2" ,r-ggplot2)
12679 ("r-hash" ,r-hash)
12680 ("r-iranges" ,r-iranges)
12681 ("r-rcolorbrewer" ,r-rcolorbrewer)
12682 ("r-rmysql" ,r-rmysql)
12683 ("r-s4vectors" ,r-s4vectors)
12684 ("r-stringr" ,r-stringr)
12685 ("r-summarizedexperiment" ,r-summarizedexperiment)))
12686 (native-inputs
12687 `(("r-knitr" ,r-knitr)))
12688 (home-page "https://github.com/BIMSBbioinfo/ciRcus")
12689 (synopsis "Annotation, analysis and visualization of circRNA data")
12690 (description "Circus is an R package for annotation, analysis and
12691 visualization of circRNA data. Users can annotate their circRNA candidates
12692 with host genes, gene featrues they are spliced from, and discriminate between
12693 known and yet unknown splice junctions. Circular-to-linear ratios of circRNAs
12694 can be calculated, and a number of descriptive plots easily generated.")
12695 (license license:artistic2.0)))
12696
12697 (define-public gffread
12698 ;; We cannot use the tagged release because it is not in sync with gclib.
12699 ;; See https://github.com/gpertea/gffread/issues/26
12700 (let ((commit "ba7535fcb3cea55a6e5a491d916e93b454e87fd0")
12701 (revision "1"))
12702 (package
12703 (name "gffread")
12704 (version (git-version "0.9.12" revision commit))
12705 (source
12706 (origin
12707 (method git-fetch)
12708 (uri (git-reference
12709 (url "https://github.com/gpertea/gffread")
12710 (commit commit)))
12711 (file-name (git-file-name name version))
12712 (sha256
12713 (base32
12714 "1dl2nbcg96lxpd0drg48ssa8343nf7pw9s9mkrc4mjjmfwsin3ki"))))
12715 (build-system gnu-build-system)
12716 (arguments
12717 `(#:tests? #f ; no check target
12718 #:make-flags
12719 (list "GCLDIR=gclib")
12720 #:phases
12721 (modify-phases %standard-phases
12722 (delete 'configure)
12723 (add-after 'unpack 'copy-gclib-source
12724 (lambda* (#:key inputs #:allow-other-keys)
12725 (mkdir-p "gclib")
12726 (copy-recursively (assoc-ref inputs "gclib-source") "gclib")
12727 #t))
12728 ;; There is no install target
12729 (replace 'install
12730 (lambda* (#:key outputs #:allow-other-keys)
12731 (let* ((out (assoc-ref outputs "out"))
12732 (bin (string-append out "/bin")))
12733 (install-file "gffread" bin))
12734 #t)))))
12735 (native-inputs
12736 `(("gclib-source"
12737 ,(let ((version "0.10.3")
12738 (commit "54917d0849c1e83cfb057b5f712e5cb6a35d948f")
12739 (revision "1"))
12740 (origin
12741 (method git-fetch)
12742 (uri (git-reference
12743 (url "https://github.com/gpertea/gclib")
12744 (commit commit)))
12745 (file-name (git-file-name "gclib" version))
12746 (sha256
12747 (base32
12748 "0b51lc0b8syrv7186fd7n8f15rwnf264qgfmm2palrwks1px24mr")))))))
12749 (home-page "https://github.com/gpertea/gffread/")
12750 (synopsis "Parse and convert GFF/GTF files")
12751 (description
12752 "This package provides a GFF/GTF file parsing utility providing format
12753 conversions, region filtering, FASTA sequence extraction and more.")
12754 ;; gffread is under Expat, but gclib is under Artistic 2.0
12755 (license (list license:expat
12756 license:artistic2.0)))))
12757
12758 (define-public find-circ
12759 ;; The last release was in 2015. The license was clarified in 2017, so we
12760 ;; take the latest commit.
12761 (let ((commit "8655dca54970fcf7e92e22fbf57e1188724dda7d")
12762 (revision "1"))
12763 (package
12764 (name "find-circ")
12765 (version (git-version "1.2" revision commit))
12766 (source
12767 (origin
12768 (method git-fetch)
12769 (uri (git-reference
12770 (url "https://github.com/marvin-jens/find_circ")
12771 (commit commit)))
12772 (file-name (git-file-name name version))
12773 (sha256
12774 (base32
12775 "0p77pbqbclqr4srms34y1b9b4njybfpjiknc11ki84f3p8skb3cg"))))
12776 (build-system gnu-build-system)
12777 (arguments
12778 `(#:tests? #f ; there are none
12779 #:phases
12780 ;; There is no actual build system.
12781 (modify-phases %standard-phases
12782 (delete 'configure)
12783 (delete 'build)
12784 (replace 'install
12785 (lambda* (#:key outputs #:allow-other-keys)
12786 (let* ((out (assoc-ref outputs "out"))
12787 (bin (string-append out "/bin"))
12788 (path (getenv "PYTHONPATH")))
12789 (for-each (lambda (script)
12790 (install-file script bin)
12791 (wrap-program (string-append bin "/" script)
12792 `("PYTHONPATH" ":" prefix (,path))))
12793 '("cmp_bed.py"
12794 "find_circ.py"
12795 "maxlength.py"
12796 "merge_bed.py"
12797 "unmapped2anchors.py")))
12798 #t)))))
12799 (inputs
12800 `(("python2" ,python-2)
12801 ("python2-pysam" ,python2-pysam)
12802 ("python2-numpy" ,python2-numpy)))
12803 (home-page "https://github.com/marvin-jens/find_circ")
12804 (synopsis "circRNA detection from RNA-seq reads")
12805 (description "This package provides tools to detect head-to-tail
12806 spliced (back-spliced) sequencing reads, indicative of circular RNA (circRNA)
12807 in RNA-seq data.")
12808 (license license:gpl3))))
12809
12810 (define-public python-scanpy
12811 (package
12812 (name "python-scanpy")
12813 (version "1.4.6")
12814 (source
12815 (origin
12816 (method url-fetch)
12817 (uri (pypi-uri "scanpy" version))
12818 (sha256
12819 (base32
12820 "0s2b6cvaigx4wzw3850qb93sjwwxbzh22kpbp498zklc5rjpbz4l"))))
12821 (build-system python-build-system)
12822 (arguments
12823 `(#:phases
12824 (modify-phases %standard-phases
12825 (replace 'check
12826 (lambda* (#:key inputs #:allow-other-keys)
12827 ;; These tests require Internet access.
12828 (delete-file-recursively "scanpy/tests/notebooks")
12829 (delete-file "scanpy/tests/test_clustering.py")
12830 (delete-file "scanpy/tests/test_datasets.py")
12831
12832 ;; TODO: I can't get the plotting tests to work, even with Xvfb.
12833 (delete-file "scanpy/tests/test_plotting.py")
12834 (delete-file "scanpy/tests/test_preprocessing.py")
12835 (delete-file "scanpy/tests/test_read_10x.py")
12836
12837 (setenv "PYTHONPATH"
12838 (string-append (getcwd) ":"
12839 (getenv "PYTHONPATH")))
12840 (invoke "pytest")
12841 #t)))))
12842 (propagated-inputs
12843 `(("python-anndata" ,python-anndata)
12844 ("python-h5py" ,python-h5py)
12845 ("python-igraph" ,python-igraph)
12846 ("python-joblib" ,python-joblib)
12847 ("python-legacy-api-wrap" ,python-legacy-api-wrap)
12848 ("python-louvain" ,python-louvain)
12849 ("python-matplotlib" ,python-matplotlib)
12850 ("python-natsort" ,python-natsort)
12851 ("python-networkx" ,python-networkx)
12852 ("python-numba" ,python-numba)
12853 ("python-packaging" ,python-packaging)
12854 ("python-pandas" ,python-pandas)
12855 ("python-patsy" ,python-patsy)
12856 ("python-scikit-learn" ,python-scikit-learn)
12857 ("python-scipy" ,python-scipy)
12858 ("python-seaborn" ,python-seaborn)
12859 ("python-statsmodels" ,python-statsmodels)
12860 ("python-tables" ,python-tables)
12861 ("python-tqdm" ,python-tqdm)
12862 ("python-umap-learn" ,python-umap-learn)))
12863 (native-inputs
12864 `(("python-pytest" ,python-pytest)
12865 ("python-setuptools-scm" ,python-setuptools-scm)))
12866 (home-page "https://github.com/theislab/scanpy")
12867 (synopsis "Single-Cell Analysis in Python.")
12868 (description "Scanpy is a scalable toolkit for analyzing single-cell gene
12869 expression data. It includes preprocessing, visualization, clustering,
12870 pseudotime and trajectory inference and differential expression testing. The
12871 Python-based implementation efficiently deals with datasets of more than one
12872 million cells.")
12873 (license license:bsd-3)))
12874
12875 (define-public python-bbknn
12876 (package
12877 (name "python-bbknn")
12878 (version "1.3.6")
12879 (source
12880 (origin
12881 (method url-fetch)
12882 (uri (pypi-uri "bbknn" version))
12883 (sha256
12884 (base32
12885 "1jbsh01f57zj4bhvjr3jh4532zznqd6nccmgrl3qi9gnhkf7c4y0"))))
12886 (build-system python-build-system)
12887 (arguments
12888 `(#:tests? #f)) ; TODO: Enable after migration to scikit-learn.
12889 (propagated-inputs
12890 `(("python-annoy" ,python-annoy)
12891 ("python-cython" ,python-cython)
12892 ("python-numpy" ,python-numpy)
12893 ("python-scipy" ,python-scipy)
12894 ("python-umap-learn" ,python-umap-learn)))
12895 (home-page "https://github.com/Teichlab/bbknn")
12896 (synopsis "Batch balanced KNN")
12897 (description "BBKNN is a batch effect removal tool that can be directly
12898 used in the Scanpy workflow. It serves as an alternative to
12899 @code{scanpy.api.pp.neighbors()}, with both functions creating a neighbour
12900 graph for subsequent use in clustering, pseudotime and UMAP visualisation. If
12901 technical artifacts are present in the data, they will make it challenging to
12902 link corresponding cell types across different batches. BBKNN actively
12903 combats this effect by splitting your data into batches and finding a smaller
12904 number of neighbours for each cell within each of the groups. This helps
12905 create connections between analogous cells in different batches without
12906 altering the counts or PCA space.")
12907 (license license:expat)))
12908
12909 (define-public python-drep
12910 (package
12911 (name "python-drep")
12912 (version "3.2.0")
12913 (source
12914 (origin
12915 (method url-fetch)
12916 (uri (pypi-uri "drep" version))
12917 (sha256
12918 (base32
12919 "08vk0x6v5c5n7afgd5pcjhsvb424absypxy22hw1cm1n9kirbi77"))))
12920 (build-system python-build-system)
12921 (propagated-inputs
12922 `(("python-biopython" ,python-biopython)
12923 ("python-matplotlib" ,python-matplotlib)
12924 ("python-numpy" ,python-numpy)
12925 ("python-pandas" ,python-pandas)
12926 ("python-pytest" ,python-pytest)
12927 ("python-scikit-learn" ,python-scikit-learn)
12928 ("python-seaborn" ,python-seaborn)
12929 ("python-tqdm" ,python-tqdm)))
12930 (home-page "https://github.com/MrOlm/drep")
12931 (synopsis "De-replication of microbial genomes assembled from multiple samples")
12932 (description
12933 "dRep is a Python program for rapidly comparing large numbers of genomes.
12934 dRep can also \"de-replicate\" a genome set by identifying groups of highly
12935 similar genomes and choosing the best representative genome for each genome
12936 set.")
12937 (license license:expat)))
12938
12939 (define-public instrain
12940 (package
12941 (name "instrain")
12942 (version "1.5.2")
12943 (source
12944 (origin
12945 (method url-fetch)
12946 (uri (pypi-uri "inStrain" version))
12947 (sha256
12948 (base32
12949 "0ykqlpf6yz4caihsaz3ys00cyvlr7wdj4s9a8rh56q5r8xf80ic0"))))
12950 (build-system python-build-system)
12951 (arguments
12952 `(#:phases
12953 (modify-phases %standard-phases
12954 (add-after 'unpack 'patch-relative-imports
12955 (lambda _
12956 (substitute* "docker/run_instrain.py"
12957 (("from s3_utils")
12958 "from .s3_utils")
12959 (("from job_utils")
12960 "from .job_utils")))))))
12961 (inputs
12962 `(("python-biopython" ,python-biopython)
12963 ("python-boto3" ,python-boto3)
12964 ("python-h5py" ,python-h5py)
12965 ("python-lmfit" ,python-lmfit)
12966 ("python-matplotlib" ,python-matplotlib)
12967 ("python-networkx" ,python-networkx)
12968 ("python-numba" ,python-numba)
12969 ("python-numpy" ,python-numpy)
12970 ("python-pandas" ,python-pandas)
12971 ("python-psutil" ,python-psutil)
12972 ("python-pysam" ,python-pysam)
12973 ("python-scikit-learn" ,python-scikit-learn)
12974 ("python-seaborn" ,python-seaborn)
12975 ("python-tqdm" ,python-tqdm)
12976 ;; drep is needed for deprecated plot utilities
12977 ("python-drep" ,python-drep)))
12978 (native-inputs
12979 `(("python-pytest" ,python-pytest)))
12980 (home-page "https://github.com/MrOlm/inStrain")
12981 (synopsis "Calculation of strain-level metrics")
12982 (description
12983 "inStrain is a Python program for analysis of co-occurring genome
12984 populations from metagenomes that allows highly accurate genome comparisons,
12985 analysis of coverage, microdiversity, and linkage, and sensitive SNP detection
12986 with gene localization and synonymous non-synonymous identification.")
12987 ;; The tool itself says that the license is "MIT", but the repository
12988 ;; contains a LICENSE file with the GPLv3.
12989 ;; See https://github.com/MrOlm/inStrain/issues/51
12990 (license license:expat)))
12991
12992 (define-public gffcompare
12993 (let ((commit "be56ef4349ea3966c12c6397f85e49e047361c41")
12994 (revision "1"))
12995 (package
12996 (name "gffcompare")
12997 (version (git-version "0.10.15" revision commit))
12998 (source
12999 (origin
13000 (method git-fetch)
13001 (uri (git-reference
13002 (url "https://github.com/gpertea/gffcompare/")
13003 (commit commit)))
13004 (file-name (git-file-name name version))
13005 (sha256
13006 (base32 "0cp5qpxdhw4mxpya5dld8wi3jk00zyklm6rcri426wydinrnfmkg"))))
13007 (build-system gnu-build-system)
13008 (arguments
13009 `(#:tests? #f ; no check target
13010 #:phases
13011 (modify-phases %standard-phases
13012 (delete 'configure)
13013 (add-before 'build 'copy-gclib-source
13014 (lambda* (#:key inputs #:allow-other-keys)
13015 (mkdir "../gclib")
13016 (copy-recursively
13017 (assoc-ref inputs "gclib-source") "../gclib")
13018 #t))
13019 (replace 'install
13020 (lambda* (#:key outputs #:allow-other-keys)
13021 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
13022 (install-file "gffcompare" bin)
13023 #t))))))
13024 (native-inputs
13025 `(("gclib-source" ; see 'README.md' of gffcompare
13026 ,(let ((commit "54917d0849c1e83cfb057b5f712e5cb6a35d948f")
13027 (revision "1")
13028 (name "gclib")
13029 (version (git-version "0.10.3" revision commit)))
13030 (origin
13031 (method git-fetch)
13032 (uri (git-reference
13033 (url "https://github.com/gpertea/gclib/")
13034 (commit commit)))
13035 (file-name (git-file-name name version))
13036 (sha256
13037 (base32 "0b51lc0b8syrv7186fd7n8f15rwnf264qgfmm2palrwks1px24mr")))))))
13038 (home-page "https://github.com/gpertea/gffcompare/")
13039 (synopsis "Tool for comparing or classifing transcripts of RNA-Seq")
13040 (description
13041 "@code{gffcompare} is a tool that can:
13042 @enumerate
13043 @item compare and evaluate the accuracy of RNA-Seq transcript assemblers
13044 (Cufflinks, Stringtie);
13045 @item collapse (merge) duplicate transcripts from multiple GTF/GFF3 files (e.g.
13046 resulted from assembly of different samples);
13047 @item classify transcripts from one or multiple GTF/GFF3 files as they relate to
13048 reference transcripts provided in a annotation file (also in GTF/GFF3 format).
13049 @end enumerate")
13050 (license
13051 (list
13052 license:expat ;license for gffcompare
13053 license:artistic2.0))))) ;license for gclib
13054
13055 (define-public intervaltree
13056 (let ((commit "b90527f9e6d51cd36ecbb50429e4524d3a418ea5"))
13057 (package
13058 (name "intervaltree")
13059 (version (git-version "0.0.0" "1" commit))
13060 (source
13061 (origin
13062 (method git-fetch)
13063 (uri (git-reference
13064 (url "https://github.com/ekg/intervaltree/")
13065 (commit commit)))
13066 (file-name (git-file-name name version))
13067 (sha256
13068 (base32 "0rgv6q5fl4x5d74n6p5wvdna6zmbdbqpb4jqqh6vq3670gn08xad"))))
13069 (build-system gnu-build-system)
13070 (arguments
13071 '(#:tests? #f ; No tests.
13072 #:make-flags (list (string-append "PREFIX=" (assoc-ref %outputs "out"))
13073 "DESTDIR=\"\"")
13074 #:phases
13075 (modify-phases %standard-phases
13076 (delete 'configure)))) ; There is no configure phase.
13077 (home-page "https://github.com/ekg/intervaltree")
13078 (synopsis "Minimal C++ interval tree implementation")
13079 (description "An interval tree can be used to efficiently find a set of
13080 numeric intervals overlapping or containing another interval. This library
13081 provides a basic implementation of an interval tree using C++ templates,
13082 allowing the insertion of arbitrary types into the tree.")
13083 (license license:expat))))
13084
13085 (define-public python-intervaltree
13086 (package
13087 (name "python-intervaltree")
13088 (version "3.0.2")
13089 (source
13090 (origin
13091 (method url-fetch)
13092 (uri (pypi-uri "intervaltree" version))
13093 (sha256
13094 (base32
13095 "0wz234g6irlm4hivs2qzmnywk0ss06ckagwh15nflkyb3p462kyb"))))
13096 (build-system python-build-system)
13097 (arguments
13098 `(#:phases
13099 (modify-phases %standard-phases
13100 ;; pytest seems to have a check to make sure the user is testing
13101 ;; their checked-out code and not an installed, potentially
13102 ;; out-of-date copy. This is harmless here, since we just installed
13103 ;; the package, so we disable the check to avoid skipping tests
13104 ;; entirely.
13105 (add-before 'check 'import-mismatch-error-workaround
13106 (lambda _
13107 (setenv "PY_IGNORE_IMPORTMISMATCH" "1")
13108 #t)))))
13109 (propagated-inputs
13110 `(("python-sortedcontainers" ,python-sortedcontainers)))
13111 (native-inputs
13112 `(("python-pytest" ,python-pytest)))
13113 (home-page "https://github.com/chaimleib/intervaltree")
13114 (synopsis "Editable interval tree data structure")
13115 (description
13116 "This package provides a mutable, self-balancing interval tree
13117 implementation for Python. Queries may be by point, by range overlap, or by
13118 range envelopment. This library was designed to allow tagging text and time
13119 intervals, where the intervals include the lower bound but not the upper
13120 bound.")
13121 (license license:asl2.0)))
13122
13123 (define-public python-pypairix
13124 (package
13125 (name "python-pypairix")
13126 (version "0.3.7")
13127 ;; The tarball on pypi does not include the makefile to build the
13128 ;; programs.
13129 (source
13130 (origin
13131 (method git-fetch)
13132 (uri (git-reference
13133 (url "https://github.com/4dn-dcic/pairix")
13134 (commit version)))
13135 (file-name (git-file-name name version))
13136 (sha256
13137 (base32
13138 "1snr3lrmsld8sy77ng6ba6wcmd33xjccf1l2f3m6pi29xis9nd6p"))))
13139 (build-system python-build-system)
13140 (arguments
13141 `(#:phases
13142 (modify-phases %standard-phases
13143 (add-before 'build 'build-programs
13144 (lambda _ (invoke "make")))
13145 (add-after 'install 'install-programs
13146 (lambda* (#:key outputs #:allow-other-keys)
13147 (copy-recursively "bin" (string-append
13148 (assoc-ref outputs "out")
13149 "/bin"))
13150 #t)))))
13151 (inputs
13152 `(("zlib" ,zlib)))
13153 (home-page "https://github.com/4dn-dcic/pairix")
13154 (synopsis "Support for querying pairix-indexed bgzipped text files")
13155 (description
13156 "Pypairix is a Python module for fast querying on a pairix-indexed
13157 bgzipped text file that contains a pair of genomic coordinates per line.")
13158 (license license:expat)))
13159
13160 (define-public python-pyfaidx
13161 (package
13162 (name "python-pyfaidx")
13163 (version "0.5.8")
13164 (source
13165 (origin
13166 (method url-fetch)
13167 (uri (pypi-uri "pyfaidx" version))
13168 (sha256
13169 (base32
13170 "038xi3a6zvrxbyyfpp64ka8pcjgsdq4fgw9cl5lpxbvmm1bzzw2q"))))
13171 (build-system python-build-system)
13172 (propagated-inputs
13173 `(("python-six" ,python-six)))
13174 (home-page "http://mattshirley.com")
13175 (synopsis "Random access to fasta subsequences")
13176 (description
13177 "This package provides procedures for efficient pythonic random access to
13178 fasta subsequences.")
13179 (license license:bsd-3)))
13180
13181 (define-public python2-pyfaidx
13182 (package-with-python2 python-pyfaidx))
13183
13184 (define-public python-cooler
13185 (package
13186 (name "python-cooler")
13187 (version "0.8.7")
13188 (source
13189 (origin
13190 (method url-fetch)
13191 (uri (pypi-uri "cooler" version))
13192 (sha256
13193 (base32
13194 "01g6gqix9ba27sappz6nfyiwabzrlf8i5fn8kwcz8ra356cq9crp"))))
13195 (build-system python-build-system)
13196 (propagated-inputs
13197 `(("python-asciitree" ,python-asciitree)
13198 ("python-biopython" ,python-biopython)
13199 ("python-click" ,python-click)
13200 ("python-cytoolz" ,python-cytoolz)
13201 ("python-dask" ,python-dask)
13202 ("python-h5py" ,python-h5py)
13203 ("python-multiprocess" ,python-multiprocess)
13204 ("python-numpy" ,python-numpy)
13205 ("python-pandas" ,python-pandas)
13206 ("python-pyfaidx" ,python-pyfaidx)
13207 ("python-pypairix" ,python-pypairix)
13208 ("python-pysam" ,python-pysam)
13209 ("python-pyyaml" ,python-pyyaml)
13210 ("python-scipy" ,python-scipy)
13211 ("python-simplejson" ,python-simplejson)))
13212 (native-inputs
13213 `(("python-mock" ,python-mock)
13214 ("python-pytest" ,python-pytest)))
13215 (home-page "https://github.com/mirnylab/cooler")
13216 (synopsis "Sparse binary format for genomic interaction matrices")
13217 (description
13218 "Cooler is a support library for a sparse, compressed, binary persistent
13219 storage format, called @code{cool}, used to store genomic interaction data,
13220 such as Hi-C contact matrices.")
13221 (license license:bsd-3)))
13222
13223 (define-public python-hicmatrix
13224 (package
13225 (name "python-hicmatrix")
13226 (version "12")
13227 (source
13228 (origin
13229 ;; Version 12 is not available on pypi.
13230 (method git-fetch)
13231 (uri (git-reference
13232 (url "https://github.com/deeptools/HiCMatrix")
13233 (commit version)))
13234 (file-name (git-file-name name version))
13235 (sha256
13236 (base32
13237 "1xhdyx16f3brgxgxybixdi64ki8nbbkq5vk4h9ahi11pzpjfn1pj"))))
13238 (build-system python-build-system)
13239 (arguments
13240 `(#:phases
13241 (modify-phases %standard-phases
13242 (add-after 'unpack 'relax-requirements
13243 (lambda _
13244 (substitute* '("requirements.txt"
13245 "setup.py")
13246 (("cooler *=+ *0.8.5")
13247 "cooler==0.8.*"))
13248 #t)))))
13249 (propagated-inputs
13250 `(("python-cooler" ,python-cooler)
13251 ("python-intervaltree" ,python-intervaltree)
13252 ("python-numpy" ,python-numpy)
13253 ("python-pandas" ,python-pandas)
13254 ("python-scipy" ,python-scipy)
13255 ("python-tables" ,python-tables)))
13256 (home-page "https://github.com/deeptools/HiCMatrix/")
13257 (synopsis "HiCMatrix class for HiCExplorer and pyGenomeTracks")
13258 (description
13259 "This helper package implements the @code{HiCMatrix} class for
13260 the HiCExplorer and pyGenomeTracks packages.")
13261 (license license:gpl3+)))
13262
13263 (define-public python-hicexplorer
13264 (package
13265 (name "python-hicexplorer")
13266 (version "2.1.4")
13267 (source
13268 (origin
13269 ;; The latest version is not available on Pypi.
13270 (method git-fetch)
13271 (uri (git-reference
13272 (url "https://github.com/deeptools/HiCExplorer")
13273 (commit version)))
13274 (file-name (git-file-name name version))
13275 (sha256
13276 (base32
13277 "0q5gpbzmrkvygqgw524q36b4nrivcmyi5v194vsx0qw7b3gcmq08"))))
13278 (build-system python-build-system)
13279 (arguments
13280 `(#:phases
13281 (modify-phases %standard-phases
13282 (add-after 'unpack 'loosen-up-requirements
13283 (lambda _
13284 (substitute* "setup.py"
13285 (("==") ">="))
13286 #t)))))
13287 (propagated-inputs
13288 `(("python-biopython" ,python-biopython)
13289 ("python-configparser" ,python-configparser)
13290 ("python-cooler" ,python-cooler)
13291 ("python-future" ,python-future)
13292 ("python-intervaltree" ,python-intervaltree)
13293 ("python-jinja2" ,python-jinja2)
13294 ("python-matplotlib" ,python-matplotlib)
13295 ("python-numpy" ,python-numpy)
13296 ("python-pandas" ,python-pandas)
13297 ("python-pybigwig" ,python-pybigwig)
13298 ("python-pysam" ,python-pysam)
13299 ("python-scipy" ,python-scipy)
13300 ("python-six" ,python-six)
13301 ("python-tables" ,python-tables)
13302 ("python-unidecode" ,python-unidecode)))
13303 (home-page "https://hicexplorer.readthedocs.io")
13304 (synopsis "Process, analyze and visualize Hi-C data")
13305 (description
13306 "HiCExplorer is a powerful and easy to use set of tools to process,
13307 normalize and visualize Hi-C data. HiCExplorer facilitates the creation of
13308 contact matrices, correction of contacts, TAD detection, A/B compartments,
13309 merging, reordering or chromosomes, conversion from different formats
13310 including cooler and detection of long-range contacts. Moreover, it allows
13311 the visualization of multiple contact matrices along with other types of data
13312 like genes, compartments, ChIP-seq coverage tracks (and in general any type of
13313 genomic scores), long range contacts and the visualization of viewpoints.")
13314 (license license:gpl3)))
13315
13316 (define-public python-pygenometracks
13317 (package
13318 (name "python-pygenometracks")
13319 (version "3.3")
13320 (source
13321 (origin
13322 (method url-fetch)
13323 (uri (pypi-uri "pyGenomeTracks" version))
13324 (sha256
13325 (base32
13326 "16laa0wnf4qn9fb9ych4w1vqhqwjss70v0y0f6wp4gwqfrlgac0f"))))
13327 (build-system python-build-system)
13328 (arguments
13329 `(#:tests? #f ; there are none
13330 #:phases
13331 (modify-phases %standard-phases
13332 (add-after 'unpack 'relax-requirements
13333 (lambda _
13334 (substitute* "setup.py"
13335 (("matplotlib ==3.1.1")
13336 "matplotlib >=3.1.1"))
13337 #t)))))
13338 (propagated-inputs
13339 `(("python-future" ,python-future)
13340 ("python-gffutils" ,python-gffutils)
13341 ("python-hicmatrix" ,python-hicmatrix)
13342 ("python-intervaltree" ,python-intervaltree)
13343 ("python-matplotlib" ,python-matplotlib)
13344 ("python-numpy" ,python-numpy)
13345 ("python-pybigwig" ,python-pybigwig)
13346 ("python-pysam" ,python-pysam)
13347 ("python-tqdm" ,python-tqdm)))
13348 (native-inputs
13349 `(("python-pytest" ,python-pytest)))
13350 (home-page "https://pygenometracks.readthedocs.io")
13351 (synopsis "Program and library to plot beautiful genome browser tracks")
13352 (description
13353 "This package aims to produce high-quality genome browser tracks that
13354 are highly customizable. Currently, it is possible to plot: bigwig, bed (many
13355 options), bedgraph, links (represented as arcs), and Hi-C matrices.
13356 pyGenomeTracks can make plots with or without Hi-C data.")
13357 (license license:gpl3+)))
13358
13359 (define-public python-hic2cool
13360 (package
13361 (name "python-hic2cool")
13362 (version "0.4.2")
13363 (source
13364 (origin
13365 (method url-fetch)
13366 (uri (pypi-uri "hic2cool" version))
13367 (sha256
13368 (base32
13369 "0xy6mhfns2lzib1kcr6419jjp6pmh0qx8z8na55lmiwn0ds8q9cl"))))
13370 (build-system python-build-system)
13371 (arguments '(#:tests? #f)) ; no tests included
13372 (propagated-inputs
13373 `(("python-cooler" ,python-cooler)))
13374 (home-page "https://github.com/4dn-dcic/hic2cool")
13375 (synopsis "Converter for .hic and .cool files")
13376 (description
13377 "This package provides a converter between @code{.hic} files (from
13378 juicer) and single-resolution or multi-resolution @code{.cool} files (for
13379 cooler). Both @code{hic} and @code{cool} files describe Hi-C contact
13380 matrices.")
13381 (license license:expat)))
13382
13383 (define-public r-pore
13384 (package
13385 (name "r-pore")
13386 (version "0.24")
13387 (source
13388 (origin
13389 (method url-fetch)
13390 (uri
13391 (string-append "mirror://sourceforge/rpore/" version
13392 "/poRe_" version ".tar.gz"))
13393 (sha256
13394 (base32 "0pih9nljbv8g4x8rkk29i7aqq681b782r5s5ynp4nw9yzqnmmksv"))))
13395 (properties `((upstream-name . "poRe")))
13396 (build-system r-build-system)
13397 (propagated-inputs
13398 `(("r-bit64" ,r-bit64)
13399 ("r-data-table" ,r-data-table)
13400 ("r-rhdf5" ,r-rhdf5)
13401 ("r-shiny" ,r-shiny)
13402 ("r-svdialogs" ,r-svdialogs)))
13403 (home-page "https://sourceforge.net/projects/rpore/")
13404 (synopsis "Visualize Nanopore sequencing data")
13405 (description
13406 "This package provides graphical user interfaces to organize and visualize Nanopore
13407 sequencing data.")
13408 ;; This is free software but the license variant is unclear:
13409 ;; <https://github.com/mw55309/poRe_docs/issues/10>.
13410 (license license:bsd-3)))
13411
13412 (define-public r-xbioc
13413 (let ((revision "1")
13414 (commit "6ff0670a37ab3036aaf1d94aa4b208310946b0b5"))
13415 (package
13416 (name "r-xbioc")
13417 (version (git-version "0.1.16" revision commit))
13418 (source (origin
13419 (method git-fetch)
13420 (uri (git-reference
13421 (url "https://github.com/renozao/xbioc")
13422 (commit commit)))
13423 (file-name (git-file-name name version))
13424 (sha256
13425 (base32
13426 "0w8bsq5myiwkfhh83nm6is5ichiyvwa1axx2szvxnzq39x6knf66"))))
13427 (build-system r-build-system)
13428 (propagated-inputs
13429 `(("r-annotationdbi" ,r-annotationdbi)
13430 ("r-assertthat" ,r-assertthat)
13431 ("r-biobase" ,r-biobase)
13432 ("r-biocmanager" ,r-biocmanager)
13433 ("r-digest" ,r-digest)
13434 ("r-pkgmaker" ,r-pkgmaker)
13435 ("r-plyr" ,r-plyr)
13436 ("r-reshape2" ,r-reshape2)
13437 ("r-stringr" ,r-stringr)))
13438 (home-page "https://github.com/renozao/xbioc/")
13439 (synopsis "Extra base functions for Bioconductor")
13440 (description "This package provides extra utility functions to perform
13441 common tasks in the analysis of omics data, leveraging and enhancing features
13442 provided by Bioconductor packages.")
13443 (license license:gpl3+))))
13444
13445 (define-public r-cssam
13446 (let ((revision "1")
13447 (commit "9ec58c982fa551af0d80b1a266890d92954833f2"))
13448 (package
13449 (name "r-cssam")
13450 (version (git-version "1.4" revision commit))
13451 (source (origin
13452 (method git-fetch)
13453 (uri (git-reference
13454 (url "https://github.com/shenorrLab/csSAM")
13455 (commit commit)))
13456 (file-name (git-file-name name version))
13457 (sha256
13458 (base32
13459 "128syf9v39gk0z3ip000qpsjbg6l1siyq6c8b0hz41dzg5achyb3"))))
13460 (build-system r-build-system)
13461 (propagated-inputs
13462 `(("r-formula" ,r-formula)
13463 ("r-ggplot2" ,r-ggplot2)
13464 ("r-pkgmaker" ,r-pkgmaker)
13465 ("r-plyr" ,r-plyr)
13466 ("r-rngtools" ,r-rngtools)
13467 ("r-scales" ,r-scales)))
13468 (home-page "https://github.com/shenorrLab/csSAM/")
13469 (synopsis "Cell type-specific statistical analysis of microarray")
13470 (description "This package implements the method csSAM that computes
13471 cell-specific differential expression from measured cell proportions using
13472 SAM.")
13473 ;; Any version
13474 (license license:lgpl2.1+))))
13475
13476 (define-public r-bseqsc
13477 (let ((revision "1")
13478 (commit "fef3f3e38dcf3df37103348b5780937982b43b98"))
13479 (package
13480 (name "r-bseqsc")
13481 (version (git-version "1.0" revision commit))
13482 (source (origin
13483 (method git-fetch)
13484 (uri (git-reference
13485 (url "https://github.com/shenorrLab/bseqsc")
13486 (commit commit)))
13487 (file-name (git-file-name name version))
13488 (sha256
13489 (base32
13490 "1prw13wa20f7wlc3gkkls66n1kxz8d28qrb8icfqdwdnnv8w5qg8"))))
13491 (build-system r-build-system)
13492 (propagated-inputs
13493 `(("r-abind" ,r-abind)
13494 ("r-annotationdbi" ,r-annotationdbi)
13495 ("r-biobase" ,r-biobase)
13496 ("r-cssam" ,r-cssam)
13497 ("r-dplyr" ,r-dplyr)
13498 ("r-e1071" ,r-e1071)
13499 ("r-edger" ,r-edger)
13500 ("r-ggplot2" ,r-ggplot2)
13501 ("r-nmf" ,r-nmf)
13502 ("r-openxlsx" ,r-openxlsx)
13503 ("r-pkgmaker" ,r-pkgmaker)
13504 ("r-plyr" ,r-plyr)
13505 ("r-preprocesscore" ,r-preprocesscore)
13506 ("r-rngtools" ,r-rngtools)
13507 ("r-scales" ,r-scales)
13508 ("r-stringr" ,r-stringr)
13509 ("r-xbioc" ,r-xbioc)))
13510 (home-page "https://github.com/shenorrLab/bseqsc")
13511 (synopsis "Deconvolution of bulk sequencing experiments using single cell data")
13512 (description "BSeq-sc is a bioinformatics analysis pipeline that
13513 leverages single-cell sequencing data to estimate cell type proportion and
13514 cell type-specific gene expression differences from RNA-seq data from bulk
13515 tissue samples. This is a companion package to the publication \"A
13516 single-cell transcriptomic map of the human and mouse pancreas reveals inter-
13517 and intra-cell population structure.\" Baron et al. Cell Systems (2016)
13518 @url{https://www.ncbi.nlm.nih.gov/pubmed/27667365}.")
13519 (license license:gpl2+))))
13520
13521 (define-public porechop
13522 ;; The recommended way to install is to clone the git repository
13523 ;; https://github.com/rrwick/Porechop#installation
13524 (let ((commit "289d5dca4a5fc327f97b3f8cecb68ecaf1014861")
13525 (revision "1"))
13526 (package
13527 (name "porechop")
13528 (version (git-version "0.2.3" revision commit))
13529 (source
13530 (origin
13531 (method git-fetch)
13532 (uri (git-reference
13533 (url "https://github.com/rrwick/Porechop")
13534 (commit commit)))
13535 (file-name (git-file-name name version))
13536 (sha256
13537 (base32 "05ps43gig0d3ia9x5lj84lb00hbsl6ba9n7y7jz927npxbr2ym23"))))
13538 (build-system python-build-system)
13539 (home-page "https://github.com/rrwick/porechop")
13540 (synopsis "Finding, trimming or splitting adapters, in Oxford Nanopore reads")
13541 (description
13542 "The porechop package is a tool for finding and removing adapters from Oxford
13543 Nanopore reads. Adapters on the ends of reads are trimmed off, and when a read
13544 has an adapter in its middle, it is treated as chimeric and chopped into
13545 separate reads. Porechop performs thorough alignments to effectively find
13546 adapters, even at low sequence identity. Porechop also supports demultiplexing
13547 of Nanopore reads that were barcoded with the Native Barcoding Kit, PCR
13548 Barcoding Kit or Rapid Barcoding Kit.")
13549 (license license:gpl3+))))
13550
13551 (define-public poretools
13552 ;; The latest release was in 2016 and the latest commit is from 2017
13553 ;; the recommended way to install is to clone the git repository
13554 ;; https://poretools.readthedocs.io/en/latest/content/installation.html
13555 (let ((commit "e426b1f09e86ac259a00c261c79df91510777407")
13556 (revision "1"))
13557 (package
13558 (name "poretools")
13559 (version (git-version "0.6.0" revision commit))
13560 (source
13561 (origin
13562 (method git-fetch)
13563 (uri (git-reference
13564 (url "https://github.com/arq5x/poretools")
13565 (commit commit)))
13566 (file-name (git-file-name name version))
13567 (sha256
13568 (base32 "0bglj833wxpp3cq430p1d3xp085ls221js2y90w7ir2x5ay8l7am"))))
13569 (build-system python-build-system)
13570 ;; requires python >=2.7, <3.0, and the same for python dependencies
13571 (arguments `(#:python ,python-2))
13572 (inputs
13573 `(("hdf5" ,hdf5)))
13574 (propagated-inputs
13575 `(("python-dateutil" ,python2-dateutil)
13576 ("python-h5py" ,python2-h5py)
13577 ("python-matplotlib" ,python2-matplotlib)
13578 ("python-pandas" ,python2-pandas)
13579 ("python-seaborn" ,python2-seaborn)))
13580 (home-page "https://poretools.readthedocs.io")
13581 (synopsis "Toolkit for working with nanopore sequencing data")
13582 (description
13583 "The MinION from Oxford Nanopore Technologies is a nanopore sequencer.
13584 This @code{poretools} package is a flexible toolkit for exploring datasets
13585 generated by nanopore sequencing devices for the purposes of quality control and
13586 downstream analysis. Poretools operates directly on the native FAST5, a variant
13587 of the Hierarchical Data Format (HDF5) standard.")
13588 (license license:expat))))
13589
13590 (define-public jamm
13591 (package
13592 (name "jamm")
13593 (version "1.0.7.6")
13594 (source
13595 (origin
13596 (method git-fetch)
13597 (uri (git-reference
13598 (url "https://github.com/mahmoudibrahim/JAMM")
13599 (commit (string-append "JAMMv" version))))
13600 (file-name (git-file-name name version))
13601 (sha256
13602 (base32
13603 "0bsa5mf9n9q5jz7mmacrra41l7r8rac5vgsn6wv1fb52ya58b970"))))
13604 (build-system gnu-build-system)
13605 (arguments
13606 `(#:tests? #f ; there are none
13607 #:phases
13608 (modify-phases %standard-phases
13609 (delete 'configure)
13610 (delete 'build)
13611 (replace 'install
13612 (lambda* (#:key inputs outputs #:allow-other-keys)
13613 (let* ((out (assoc-ref outputs "out"))
13614 (libexec (string-append out "/libexec/jamm"))
13615 (bin (string-append out "/bin")))
13616 (substitute* '("JAMM.sh"
13617 "SignalGenerator.sh")
13618 (("^sPath=.*")
13619 (string-append "sPath=\"" libexec "\"\n")))
13620 (for-each (lambda (file)
13621 (install-file file libexec))
13622 (list "bincalculator.r"
13623 "peakfinder.r"
13624 "peakhelper.r"
13625 "signalmaker.r"
13626 "xcorr.r"
13627 "xcorrhelper.r"
13628 ;; Perl scripts
13629 "peakfilter.pl"
13630 "readshifter.pl"))
13631
13632 (for-each
13633 (lambda (script)
13634 (chmod script #o555)
13635 (install-file script bin)
13636 (wrap-program (string-append bin "/" script)
13637 `("PATH" ":" prefix
13638 (,(string-append (assoc-ref inputs "coreutils") "/bin")
13639 ,(string-append (assoc-ref inputs "gawk") "/bin")
13640 ,(string-append (assoc-ref inputs "perl") "/bin")
13641 ,(string-append (assoc-ref inputs "r-minimal") "/bin")))
13642 `("PERL5LIB" ":" prefix (,(getenv "PERL5LIB")))
13643 `("R_LIBS_SITE" ":" prefix (,(getenv "R_LIBS_SITE")))))
13644 (list "JAMM.sh" "SignalGenerator.sh")))
13645 #t)))))
13646 (inputs
13647 `(("bash" ,bash)
13648 ("coreutils" ,coreutils)
13649 ("gawk" ,gawk)
13650 ("perl" ,perl)
13651 ("r-minimal" ,r-minimal)
13652 ;;("r-parallel" ,r-parallel)
13653 ("r-signal" ,r-signal)
13654 ("r-mclust" ,r-mclust)))
13655 (home-page "https://github.com/mahmoudibrahim/JAMM")
13656 (synopsis "Peak finder for NGS datasets")
13657 (description
13658 "JAMM is a peak finder for next generation sequencing datasets (ChIP-Seq,
13659 ATAC-Seq, DNase-Seq, etc.) that can integrate replicates and assign peak
13660 boundaries accurately. JAMM is applicable to both broad and narrow
13661 datasets.")
13662 (license license:gpl3+)))
13663
13664 (define-public ngless
13665 (package
13666 (name "ngless")
13667 (version "1.1.0")
13668 (source
13669 (origin
13670 (method git-fetch)
13671 (uri (git-reference
13672 (url "https://gitlab.com/ngless/ngless.git")
13673 (commit (string-append "v" version))))
13674 (file-name (git-file-name name version))
13675 (sha256
13676 (base32
13677 "1wim8wpqyff080dfcazynrmjwqas38m24m0v350w245mmhrapdma"))))
13678 (build-system haskell-build-system)
13679 (arguments
13680 `(#:haddock? #f ; The haddock phase fails with: NGLess/CmdArgs.hs:20:1:
13681 ; error: parse error on input import
13682 ; import Options.Applicative
13683 #:phases
13684 (modify-phases %standard-phases
13685 (add-after 'unpack 'create-Versions.hs
13686 (lambda _
13687 (substitute* "Makefile"
13688 (("BWA_VERSION = .*")
13689 (string-append "BWA_VERSION = "
13690 ,(package-version bwa) "\n"))
13691 (("SAM_VERSION = .*")
13692 (string-append "SAM_VERSION = "
13693 ,(package-version samtools) "\n"))
13694 (("PRODIGAL_VERSION = .*")
13695 (string-append "PRODIGAL_VERSION = "
13696 ,(package-version prodigal) "\n"))
13697 (("MINIMAP2_VERSION = .*")
13698 (string-append "MINIMAP2_VERSION = "
13699 ,(package-version minimap2) "\n")))
13700 (invoke "make" "NGLess/Dependencies/Versions.hs")
13701 #t))
13702 (add-after 'create-Versions.hs 'create-cabal-file
13703 (lambda _ (invoke "hpack") #t))
13704 ;; These tools are expected to be installed alongside ngless.
13705 (add-after 'install 'link-tools
13706 (lambda* (#:key inputs outputs #:allow-other-keys)
13707 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
13708 (symlink (string-append (assoc-ref inputs "prodigal")
13709 "/bin/prodigal")
13710 (string-append bin "ngless-" ,version "-prodigal"))
13711 (symlink (string-append (assoc-ref inputs "minimap2")
13712 "/bin/minimap2")
13713 (string-append bin "ngless-" ,version "-minimap2"))
13714 (symlink (string-append (assoc-ref inputs "samtools")
13715 "/bin/samtools")
13716 (string-append bin "ngless-" ,version "-samtools"))
13717 (symlink (string-append (assoc-ref inputs "bwa")
13718 "/bin/bwa")
13719 (string-append bin "ngless-" ,version "-bwa"))
13720 #t))))))
13721 (inputs
13722 `(("prodigal" ,prodigal)
13723 ("bwa" ,bwa)
13724 ("samtools" ,samtools)
13725 ("minimap2" ,minimap2)
13726 ("ghc-aeson" ,ghc-aeson)
13727 ("ghc-ansi-terminal" ,ghc-ansi-terminal)
13728 ("ghc-async" ,ghc-async)
13729 ("ghc-atomic-write" ,ghc-atomic-write)
13730 ("ghc-bytestring-lexing" ,ghc-bytestring-lexing)
13731 ("ghc-conduit" ,ghc-conduit)
13732 ("ghc-conduit-algorithms" ,ghc-conduit-algorithms)
13733 ("ghc-conduit-extra" ,ghc-conduit-extra)
13734 ("ghc-configurator" ,ghc-configurator)
13735 ("ghc-convertible" ,ghc-convertible)
13736 ("ghc-data-default" ,ghc-data-default)
13737 ("ghc-diagrams-core" ,ghc-diagrams-core)
13738 ("ghc-diagrams-lib" ,ghc-diagrams-lib)
13739 ("ghc-diagrams-svg" ,ghc-diagrams-svg)
13740 ("ghc-double-conversion" ,ghc-double-conversion)
13741 ("ghc-edit-distance" ,ghc-edit-distance)
13742 ("ghc-either" ,ghc-either)
13743 ("ghc-errors" ,ghc-errors)
13744 ("ghc-extra" ,ghc-extra)
13745 ("ghc-filemanip" ,ghc-filemanip)
13746 ("ghc-file-embed" ,ghc-file-embed)
13747 ("ghc-gitrev" ,ghc-gitrev)
13748 ("ghc-hashtables" ,ghc-hashtables)
13749 ("ghc-http-conduit" ,ghc-http-conduit)
13750 ("ghc-inline-c" ,ghc-inline-c)
13751 ("ghc-inline-c-cpp" ,ghc-inline-c-cpp)
13752 ("ghc-intervalmap" ,ghc-intervalmap)
13753 ("ghc-missingh" ,ghc-missingh)
13754 ("ghc-optparse-applicative" ,ghc-optparse-applicative)
13755 ("ghc-regex" ,ghc-regex)
13756 ("ghc-safe" ,ghc-safe)
13757 ("ghc-safeio" ,ghc-safeio)
13758 ("ghc-strict" ,ghc-strict)
13759 ("ghc-tar" ,ghc-tar)
13760 ("ghc-tar-conduit" ,ghc-tar-conduit)
13761 ("ghc-unliftio" ,ghc-unliftio)
13762 ("ghc-unliftio-core" ,ghc-unliftio-core)
13763 ("ghc-vector" ,ghc-vector)
13764 ("ghc-yaml" ,ghc-yaml)
13765 ("ghc-zlib" ,ghc-zlib)))
13766 (propagated-inputs
13767 `(("r-r6" ,r-r6)
13768 ("r-hdf5r" ,r-hdf5r)
13769 ("r-iterators" ,r-iterators)
13770 ("r-itertools" ,r-itertools)
13771 ("r-matrix" ,r-matrix)))
13772 (native-inputs
13773 `(("ghc-hpack" ,ghc-hpack)
13774 ("ghc-quickcheck" ,ghc-quickcheck)
13775 ("ghc-test-framework" ,ghc-test-framework)
13776 ("ghc-test-framework-hunit",ghc-test-framework-hunit)
13777 ("ghc-test-framework-quickcheck2" ,ghc-test-framework-quickcheck2)
13778 ("ghc-test-framework-th" ,ghc-test-framework-th)))
13779 (home-page "https://gitlab.com/ngless/ngless")
13780 (synopsis "DSL for processing next-generation sequencing data")
13781 (description "Ngless is a domain-specific language for
13782 @dfn{next-generation sequencing} (NGS) data processing.")
13783 (license license:expat)))
13784
13785 (define-public filtlong
13786 ;; The recommended way to install is to clone the git repository
13787 ;; https://github.com/rrwick/Filtlong#installation
13788 ;; and the lastest release is more than nine months old
13789 (let ((commit "d1bb46dfe8bc7efe6257b5ce222c04bfe8aedaab")
13790 (revision "1"))
13791 (package
13792 (name "filtlong")
13793 (version (git-version "0.2.0" revision commit))
13794 (source
13795 (origin
13796 (method git-fetch)
13797 (uri (git-reference
13798 (url "https://github.com/rrwick/Filtlong")
13799 (commit commit)))
13800 (file-name (git-file-name name version))
13801 (sha256
13802 (base32 "1xr92r820x8qlkcr3b57iw223yq8vjgyi42jr79w2xgw47qzr575"))))
13803 (build-system gnu-build-system)
13804 (arguments
13805 `(#:tests? #f ; no check target
13806 #:phases
13807 (modify-phases %standard-phases
13808 (delete 'configure)
13809 (replace 'install
13810 (lambda* (#:key outputs #:allow-other-keys)
13811 (let* ((out (assoc-ref outputs "out"))
13812 (bin (string-append out "/bin"))
13813 (scripts (string-append out "/share/filtlong/scripts")))
13814 (install-file "bin/filtlong" bin)
13815 (install-file "scripts/histogram.py" scripts)
13816 (install-file "scripts/read_info_histograms.sh" scripts))
13817 #t))
13818 (add-after 'install 'wrap-program
13819 (lambda* (#:key inputs outputs #:allow-other-keys)
13820 (let* ((out (assoc-ref outputs "out"))
13821 (path (getenv "PYTHONPATH")))
13822 (wrap-program (string-append out
13823 "/share/filtlong/scripts/histogram.py")
13824 `("PYTHONPATH" ":" prefix (,path))))
13825 #t))
13826 (add-before 'check 'patch-tests
13827 (lambda _
13828 (substitute* "scripts/read_info_histograms.sh"
13829 (("awk") (which "gawk")))
13830 #t)))))
13831 (inputs
13832 `(("gawk" ,gawk) ;for read_info_histograms.sh
13833 ("python" ,python-2) ;required for histogram.py
13834 ("zlib" ,zlib)))
13835 (home-page "https://github.com/rrwick/Filtlong/")
13836 (synopsis "Tool for quality filtering of Nanopore and PacBio data")
13837 (description
13838 "The Filtlong package is a tool for filtering long reads by quality.
13839 It can take a set of long reads and produce a smaller, better subset. It uses
13840 both read length (longer is better) and read identity (higher is better) when
13841 choosing which reads pass the filter.")
13842 (license (list license:gpl3 ;filtlong
13843 license:asl2.0))))) ;histogram.py
13844
13845 (define-public nanopolish
13846 ;; The recommended way to install is to clone the git repository
13847 ;; <https://github.com/jts/nanopolish#installing-a-particular-release>.
13848 ;; Also, the differences between release and current version seem to be
13849 ;; significant.
13850 (let ((commit "6331dc4f15b9dfabb954ba3fae9d76b6c3ca6377")
13851 (revision "1"))
13852 (package
13853 (name "nanopolish")
13854 (version (git-version "0.11.1" revision commit))
13855 (source
13856 (origin
13857 (method git-fetch)
13858 (uri (git-reference
13859 (url "https://github.com/jts/nanopolish")
13860 (commit commit)
13861 (recursive? #t)))
13862 (file-name (git-file-name name version))
13863 (sha256
13864 (base32 "15ikl3d37y49pwd7vx36xksgsqajhf24q7qqsnpl15dqqyy5qgbc"))
13865 (modules '((guix build utils)))
13866 (snippet
13867 '(begin
13868 (delete-file-recursively "htslib")
13869 #t))))
13870 (build-system gnu-build-system)
13871 (arguments
13872 `(#:make-flags
13873 `("HDF5=noinstall" "EIGEN=noinstall" "HTS=noinstall" "CC=gcc")
13874 #:tests? #f ; no check target
13875 #:phases
13876 (modify-phases %standard-phases
13877 (add-after 'unpack 'find-eigen
13878 (lambda* (#:key inputs #:allow-other-keys)
13879 (setenv "CPATH"
13880 (string-append (assoc-ref inputs "eigen")
13881 "/include/eigen3:"
13882 (or (getenv "CPATH") "")))
13883 #t))
13884 (delete 'configure)
13885 (replace 'install
13886 (lambda* (#:key outputs #:allow-other-keys)
13887 (let* ((out (assoc-ref outputs "out"))
13888 (bin (string-append out "/bin"))
13889 (scripts (string-append out "/share/nanopolish/scripts")))
13890
13891 (install-file "nanopolish" bin)
13892 (for-each (lambda (file) (install-file file scripts))
13893 (find-files "scripts" ".*"))
13894 #t)))
13895 (add-after 'install 'wrap-programs
13896 (lambda* (#:key outputs #:allow-other-keys)
13897 (let ((pythonpath (getenv "PYTHONPATH"))
13898 (perl5lib (getenv "PERL5LIB"))
13899 (scripts (string-append (assoc-ref outputs "out")
13900 "/share/nanopolish/scripts")))
13901 (for-each (lambda (file)
13902 (wrap-program file `("PYTHONPATH" ":" prefix (,pythonpath))))
13903 (find-files scripts "\\.py"))
13904 (for-each (lambda (file)
13905 (wrap-script file `("PERL5LIB" ":" prefix (,perl5lib))))
13906 (find-files scripts "\\.pl"))))))))
13907 (inputs
13908 `(("guile" ,guile-3.0) ; for wrappers
13909 ("eigen" ,eigen)
13910 ("hdf5" ,hdf5)
13911 ("htslib" ,htslib)
13912 ("perl" ,perl)
13913 ("bioperl" ,bioperl-minimal)
13914 ("perl-getopt-long" ,perl-getopt-long)
13915 ("python" ,python-wrapper)
13916 ("python-biopython" ,python-biopython)
13917 ("python-numpy" ,python-numpy)
13918 ("python-pysam" ,python-pysam)
13919 ("python-scikit-learn" , python-scikit-learn)
13920 ("python-scipy" ,python-scipy)
13921 ("zlib" ,zlib)))
13922 (home-page "https://github.com/jts/nanopolish")
13923 (synopsis "Signal-level analysis of Oxford Nanopore sequencing data")
13924 (description
13925 "This package analyses the Oxford Nanopore sequencing data at signal-level.
13926 Nanopolish can calculate an improved consensus sequence for a draft genome
13927 assembly, detect base modifications, call SNPs (Single nucleotide
13928 polymorphisms) and indels with respect to a reference genome and more.")
13929 (license license:expat))))
13930
13931 (define-public cnvkit
13932 (package
13933 (name "cnvkit")
13934 (version "0.9.5")
13935 (source
13936 (origin
13937 (method git-fetch)
13938 (uri (git-reference
13939 (url "https://github.com/etal/cnvkit")
13940 (commit (string-append "v" version))))
13941 (file-name (git-file-name name version))
13942 (sha256
13943 (base32 "0g2f78k68yglmj4fsfmgs8idqv3di9aj53fg0ld0hqljg8chhh82"))))
13944 (build-system python-build-system)
13945 (propagated-inputs
13946 `(("python-biopython" ,python-biopython)
13947 ("python-future" ,python-future)
13948 ("python-matplotlib" ,python-matplotlib)
13949 ("python-numpy" ,python-numpy)
13950 ("python-reportlab" ,python-reportlab)
13951 ("python-pandas" ,python-pandas)
13952 ("python-pysam" ,python-pysam)
13953 ("python-pyfaidx" ,python-pyfaidx)
13954 ("python-scipy" ,python-scipy)
13955 ;; R packages
13956 ("r-dnacopy" ,r-dnacopy)))
13957 (home-page "https://cnvkit.readthedocs.org/")
13958 (synopsis "Copy number variant detection from targeted DNA sequencing")
13959 (description
13960 "CNVkit is a Python library and command-line software toolkit to infer
13961 and visualize copy number from high-throughput DNA sequencing data. It is
13962 designed for use with hybrid capture, including both whole-exome and custom
13963 target panels, and short-read sequencing platforms such as Illumina and Ion
13964 Torrent.")
13965 (license license:asl2.0)))
13966
13967 (define-public python-pyfit-sne
13968 (package
13969 (name "python-pyfit-sne")
13970 (version "1.0.1")
13971 (source
13972 (origin
13973 (method git-fetch)
13974 (uri (git-reference
13975 (url "https://github.com/KlugerLab/pyFIt-SNE")
13976 (commit version)))
13977 (file-name (git-file-name name version))
13978 (sha256
13979 (base32 "13wh3qkzs56azmmgnxib6xfr29g7xh09sxylzjpni5j0pp0rc5qw"))))
13980 (build-system python-build-system)
13981 (propagated-inputs
13982 `(("python-numpy" ,python-numpy)))
13983 (inputs
13984 `(("fftw" ,fftw)))
13985 (native-inputs
13986 `(("python-cython" ,python-cython)))
13987 (home-page "https://github.com/KlugerLab/pyFIt-SNE")
13988 (synopsis "FFT-accelerated Interpolation-based t-SNE")
13989 (description
13990 "t-Stochastic Neighborhood Embedding (t-SNE) is a highly successful
13991 method for dimensionality reduction and visualization of high dimensional
13992 datasets. A popular implementation of t-SNE uses the Barnes-Hut algorithm to
13993 approximate the gradient at each iteration of gradient descent. This package
13994 is a Cython wrapper for FIt-SNE.")
13995 (license license:bsd-4)))
13996
13997 (define-public bbmap
13998 (package
13999 (name "bbmap")
14000 (version "38.90")
14001 (source (origin
14002 (method url-fetch)
14003 (uri (string-append
14004 "mirror://sourceforge/bbmap/BBMap_" version ".tar.gz"))
14005 (sha256
14006 (base32
14007 "1wb94bcc006qq86x77z2rz0lc8m9f1kpnw6gdhjfg9bdaqf56rm3"))))
14008 (build-system ant-build-system)
14009 (arguments
14010 `(#:build-target "dist"
14011 #:tests? #f ; there are none
14012 #:make-flags
14013 (list (string-append "-Dmpijar="
14014 (assoc-ref %build-inputs "java-openmpi")
14015 "/lib/mpi.jar"))
14016 #:modules ((guix build ant-build-system)
14017 (guix build utils)
14018 (guix build java-utils))
14019 #:phases
14020 (modify-phases %standard-phases
14021 (add-after 'build 'build-jni-library
14022 (lambda _
14023 (with-directory-excursion "jni"
14024 (invoke "make" "-f" "makefile.linux"))))
14025 ;; There is no install target
14026 (replace 'install (install-jars "dist"))
14027 (add-after 'install 'install-scripts-and-documentation
14028 (lambda* (#:key outputs #:allow-other-keys)
14029 (substitute* "calcmem.sh"
14030 (("\\| awk ") (string-append "| " (which "awk") " ")))
14031 (let* ((scripts (find-files "." "\\.sh$"))
14032 (out (assoc-ref outputs "out"))
14033 (bin (string-append out "/bin"))
14034 (doc (string-append out "/share/doc/bbmap"))
14035 (jni (string-append out "/lib/jni")))
14036 (substitute* scripts
14037 (("\\$DIR\"\"docs") doc)
14038 (("^CP=.*")
14039 (string-append "CP=" out "/share/java/BBTools.jar\n"))
14040 (("^NATIVELIBDIR.*")
14041 (string-append "NATIVELIBDIR=" jni "\n"))
14042 (("CMD=\"java")
14043 (string-append "CMD=\"" (which "java"))))
14044 (for-each (lambda (script) (install-file script bin)) scripts)
14045
14046 ;; Install JNI library
14047 (install-file "jni/libbbtoolsjni.so" jni)
14048
14049 ;; Install documentation
14050 (install-file "docs/readme.txt" doc)
14051 (copy-recursively "docs/guides" doc))
14052 #t)))
14053 #:jdk ,openjdk11))
14054 (inputs
14055 `(("gawk" ,gawk)
14056 ("java-eclipse-jdt-core" ,java-eclipse-jdt-core)
14057 ("java-eclipse-jdt-compiler-apt" ,java-eclipse-jdt-compiler-apt)
14058 ("java-openmpi" ,java-openmpi)))
14059 (home-page "https://sourceforge.net/projects/bbmap/")
14060 (synopsis "Aligner and other tools for short sequencing reads")
14061 (description
14062 "This package provides bioinformatic tools to align, deduplicate,
14063 reformat, filter and normalize DNA and RNA-seq data. It includes the
14064 following tools: BBMap, a short read aligner for DNA and RNA-seq data; BBNorm,
14065 a kmer-based error-correction and normalization tool; Dedupe, a tool to
14066 simplify assemblies by removing duplicate or contained subsequences that share
14067 a target percent identity; Reformat, to convert reads between
14068 fasta/fastq/scarf/fasta+qual/sam, interleaved/paired, and ASCII-33/64, at over
14069 500 MB/s; and BBDuk, a tool to filter, trim, or mask reads with kmer matches
14070 to an artifact/contaminant file.")
14071 (license license:bsd-3)))
14072
14073 (define-public velvet
14074 (package
14075 (name "velvet")
14076 (version "1.2.10")
14077 (source (origin
14078 (method url-fetch)
14079 (uri (string-append "https://www.ebi.ac.uk/~zerbino/velvet/"
14080 "velvet_" version ".tgz"))
14081 (sha256
14082 (base32
14083 "0h3njwy66p6bx14r3ar1byb0ccaxmxka4c65rn4iybyiqa4d8kc8"))
14084 ;; Delete bundled libraries
14085 (modules '((guix build utils)))
14086 (snippet
14087 '(begin
14088 (delete-file "Manual.pdf")
14089 (delete-file-recursively "third-party")
14090 #t))))
14091 (build-system gnu-build-system)
14092 (arguments
14093 `(#:make-flags '("OPENMP=t")
14094 #:test-target "test"
14095 #:phases
14096 (modify-phases %standard-phases
14097 (delete 'configure)
14098 (add-after 'unpack 'fix-zlib-include
14099 (lambda _
14100 (substitute* "src/binarySequences.c"
14101 (("../third-party/zlib-1.2.3/zlib.h") "zlib.h"))
14102 #t))
14103 (replace 'install
14104 (lambda* (#:key outputs #:allow-other-keys)
14105 (let* ((out (assoc-ref outputs "out"))
14106 (bin (string-append out "/bin"))
14107 (doc (string-append out "/share/doc/velvet")))
14108 (mkdir-p bin)
14109 (mkdir-p doc)
14110 (install-file "velveth" bin)
14111 (install-file "velvetg" bin)
14112 (install-file "Manual.pdf" doc)
14113 (install-file "Columbus_manual.pdf" doc)
14114 #t))))))
14115 (inputs
14116 `(("openmpi" ,openmpi)
14117 ("zlib" ,zlib)))
14118 (native-inputs
14119 `(("texlive" ,(texlive-union (list texlive-latex-graphics
14120 texlive-latex-hyperref)))))
14121 (home-page "https://www.ebi.ac.uk/~zerbino/velvet/")
14122 (synopsis "Nucleic acid sequence assembler for very short reads")
14123 (description
14124 "Velvet is a de novo genomic assembler specially designed for short read
14125 sequencing technologies, such as Solexa or 454. Velvet currently takes in
14126 short read sequences, removes errors then produces high quality unique
14127 contigs. It then uses paired read information, if available, to retrieve the
14128 repeated areas between contigs.")
14129 (license license:gpl2+)))
14130
14131 (define-public python-velocyto
14132 (package
14133 (name "python-velocyto")
14134 (version "0.17.17")
14135 (source
14136 (origin
14137 (method url-fetch)
14138 (uri (pypi-uri "velocyto" version))
14139 (sha256
14140 (base32
14141 "0fgygyzqgrq32dv6a00biq1p1cwi6kbl5iqblxq1kklj6b2mzmhs"))))
14142 (build-system python-build-system)
14143 (native-inputs
14144 `(("python-joblib" ,python-joblib)))
14145 (propagated-inputs
14146 `(("python-click" ,python-click)
14147 ("python-cython" ,python-cython)
14148 ("python-h5py" ,python-h5py)
14149 ("python-loompy" ,python-loompy)
14150 ("python-matplotlib" ,python-matplotlib)
14151 ("python-numba" ,python-numba)
14152 ("python-numpy" ,python-numpy)
14153 ("python-pandas" ,python-pandas)
14154 ("python-pysam" ,python-pysam)
14155 ("python-scikit-learn" ,python-scikit-learn)
14156 ("python-scipy" ,python-scipy)))
14157 (home-page "https://github.com/velocyto-team/velocyto.py")
14158 (synopsis "RNA velocity analysis for single cell RNA-seq data")
14159 (description
14160 "Velocyto is a library for the analysis of RNA velocity. Velocyto
14161 includes a command line tool and an analysis pipeline.")
14162 (license license:bsd-2)))
14163
14164 (define-public arriba
14165 (package
14166 (name "arriba")
14167 (version "1.0.1")
14168 (source
14169 (origin
14170 (method url-fetch)
14171 (uri (string-append "https://github.com/suhrig/arriba/releases/"
14172 "download/v" version "/arriba_v" version ".tar.gz"))
14173 (sha256
14174 (base32
14175 "0jx9656ry766vb8z08m1c3im87b0c82qpnjby9wz4kcz8vn87dx2"))))
14176 (build-system gnu-build-system)
14177 (arguments
14178 `(#:tests? #f ; there are none
14179 #:phases
14180 (modify-phases %standard-phases
14181 (replace 'configure
14182 (lambda* (#:key inputs #:allow-other-keys)
14183 (let ((htslib (assoc-ref inputs "htslib")))
14184 (substitute* "Makefile"
14185 (("-I\\$\\(HTSLIB\\)/htslib")
14186 (string-append "-I" htslib "/include/htslib"))
14187 ((" \\$\\(HTSLIB\\)/libhts.a")
14188 (string-append " " htslib "/lib/libhts.so"))))
14189 (substitute* "run_arriba.sh"
14190 (("^STAR ") (string-append (which "STAR") " "))
14191 (("samtools --version-only")
14192 (string-append (which "samtools") " --version-only"))
14193 (("samtools index")
14194 (string-append (which "samtools") " index"))
14195 (("samtools sort")
14196 (string-append (which "samtools") " sort")))
14197 #t))
14198 (replace 'install
14199 (lambda* (#:key outputs #:allow-other-keys)
14200 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
14201 (install-file "arriba" bin)
14202 (install-file "run_arriba.sh" bin)
14203 (install-file "draw_fusions.R" bin)
14204 (wrap-program (string-append bin "/draw_fusions.R")
14205 `("R_LIBS_SITE" ":" prefix (,(getenv "R_LIBS_SITE")))))
14206 #t)))))
14207 (inputs
14208 `(("htslib" ,htslib)
14209 ("r-minimal" ,r-minimal)
14210 ("r-circlize" ,r-circlize)
14211 ("r-genomicalignments" ,r-genomicalignments)
14212 ("r-genomicranges" ,r-genomicranges)
14213 ("samtools" ,samtools)
14214 ("star" ,star)
14215 ("zlib" ,zlib)))
14216 (home-page "https://github.com/suhrig/arriba")
14217 (synopsis "Gene fusion detection from RNA-Seq data ")
14218 (description
14219 "Arriba is a command-line tool for the detection of gene fusions from
14220 RNA-Seq data. It was developed for the use in a clinical research setting.
14221 Therefore, short runtimes and high sensitivity were important design criteria.
14222 It is based on the fast STAR aligner and the post-alignment runtime is
14223 typically just around two minutes. In contrast to many other fusion detection
14224 tools which build on STAR, Arriba does not require to reduce the
14225 @code{alignIntronMax} parameter of STAR to detect small deletions.")
14226 ;; All code is under the Expat license with the exception of
14227 ;; "draw_fusions.R", which is under GPLv3.
14228 (license (list license:expat license:gpl3))))
14229
14230 (define-public adapterremoval
14231 (package
14232 (name "adapterremoval")
14233 (version "2.3.0")
14234 (source
14235 (origin
14236 (method git-fetch)
14237 (uri (git-reference
14238 (url "https://github.com/MikkelSchubert/adapterremoval")
14239 (commit (string-append "v" version))))
14240 (file-name (git-file-name name version))
14241 (sha256
14242 (base32
14243 "1nf3ki5pfzalhrx2fr1y6pfqfi133yj2m7q4fj9irf5fb94bapwr"))))
14244 (build-system gnu-build-system)
14245 (arguments
14246 `(#:make-flags (list "COLOR_BUILD=no"
14247 (string-append "PREFIX="
14248 (assoc-ref %outputs "out")))
14249 #:test-target "test"
14250 #:phases
14251 (modify-phases %standard-phases
14252 (delete 'configure))))
14253 (inputs
14254 `(("zlib" ,zlib)))
14255 (home-page "https://adapterremoval.readthedocs.io/")
14256 (synopsis "Rapid sequence adapter trimming, identification, and read merging")
14257 (description
14258 "This program searches for and removes remnant adapter sequences from
14259 @dfn{High-Throughput Sequencing} (HTS) data and (optionally) trims low quality
14260 bases from the 3' end of reads following adapter removal. AdapterRemoval can
14261 analyze both single end and paired end data, and can be used to merge
14262 overlapping paired-ended reads into (longer) consensus sequences.
14263 Additionally, the AdapterRemoval may be used to recover a consensus adapter
14264 sequence for paired-ended data, for which this information is not available.")
14265 (license license:gpl3+)))
14266
14267 (define-public pplacer
14268 (let ((commit "807f6f3"))
14269 (package
14270 (name "pplacer")
14271 ;; The commit should be updated with each version change.
14272 (version "1.1.alpha19")
14273 (source
14274 (origin
14275 (method git-fetch)
14276 (uri (git-reference
14277 (url "https://github.com/matsen/pplacer")
14278 (commit (string-append "v" version))))
14279 (file-name (git-file-name name version))
14280 (sha256
14281 (base32 "11ppbbbx20p2g9wj3ff64dhnarb12q79v7qh4rk0gj6lkbz4n7cn"))))
14282 (build-system ocaml-build-system)
14283 (arguments
14284 `(#:modules ((guix build ocaml-build-system)
14285 (guix build utils)
14286 (ice-9 ftw))
14287 #:phases
14288 (modify-phases %standard-phases
14289 (delete 'configure)
14290 (add-after 'unpack 'fix-build-with-latest-ocaml
14291 (lambda _
14292 (substitute* "myocamlbuild.ml"
14293 (("dep \\[\"c_pam\"\\]" m)
14294 (string-append "flag [\"ocaml\"; \"compile\"] (A \"-unsafe-string\");\n"
14295 m))
14296 (("let run_and_read" m)
14297 (string-append "
14298 let split s ch =
14299 let x = ref [] in
14300 let rec go s =
14301 let pos = String.index s ch in
14302 x := (String.before s pos)::!x;
14303 go (String.after s (pos + 1))
14304 in
14305 try go s
14306 with Not_found -> !x
14307 let split_nl s = split s '\\n'
14308 let before_space s =
14309 try String.before s (String.index s ' ')
14310 with Not_found -> s
14311
14312 " m))
14313 (("run_and_read \"ocamlfind list \\| cut -d' ' -f1\"" m)
14314 (string-append "List.map before_space (split_nl & " m ")"))
14315 ((" blank_sep_strings &") "")
14316 ((" Lexing.from_string &") ""))
14317 #t))
14318 (add-after 'unpack 'replace-bundled-cddlib
14319 (lambda* (#:key inputs #:allow-other-keys)
14320 (let* ((cddlib-src (assoc-ref inputs "cddlib-src"))
14321 (local-dir "cddlib_guix"))
14322 (mkdir local-dir)
14323 (with-directory-excursion local-dir
14324 (invoke "tar" "xvf" cddlib-src))
14325 (let ((cddlib-src-folder
14326 (string-append local-dir "/"
14327 (list-ref (scandir local-dir) 2)
14328 "/lib-src")))
14329 (for-each make-file-writable (find-files "cdd_src" ".*"))
14330 (for-each
14331 (lambda (file)
14332 (copy-file file
14333 (string-append "cdd_src/" (basename file))))
14334 (find-files cddlib-src-folder ".*[ch]$")))
14335 #t)))
14336 (add-after 'unpack 'fix-makefile
14337 (lambda _
14338 ;; Remove system calls to 'git'.
14339 (substitute* "Makefile"
14340 (("^DESCRIPT:=pplacer-.*")
14341 (string-append
14342 "DESCRIPT:=pplacer-$(shell uname)-v" ,version "\n")))
14343 (substitute* "myocamlbuild.ml"
14344 (("git describe --tags --long .*\\\" with")
14345 (string-append
14346 "echo -n v" ,version "-" ,commit "\" with")))
14347 #t))
14348 (replace 'install
14349 (lambda* (#:key outputs #:allow-other-keys)
14350 (let* ((out (assoc-ref outputs "out"))
14351 (bin (string-append out "/bin")))
14352 (copy-recursively "bin" bin))
14353 #t)))
14354 #:ocaml ,ocaml-4.07
14355 #:findlib ,ocaml4.07-findlib))
14356 (inputs
14357 `(("zlib" ,zlib "static")
14358 ("gsl" ,gsl)
14359 ("ocaml-ounit" ,(package-with-ocaml4.07 ocaml-ounit))
14360 ("ocaml-batteries" ,(package-with-ocaml4.07 ocaml-batteries))
14361 ("ocaml-camlzip" ,(package-with-ocaml4.07 camlzip))
14362 ("ocaml-csv" ,(package-with-ocaml4.07 ocaml-csv))
14363 ("ocaml-sqlite3" ,ocaml4.07-sqlite3)
14364 ("ocaml-xmlm" ,(package-with-ocaml4.07 ocaml-xmlm))
14365 ("ocaml-mcl" ,(package-with-ocaml4.07 ocaml-mcl))
14366 ("ocaml-gsl" ,ocaml4.07-gsl-1)))
14367 (native-inputs
14368 `(("cddlib-src" ,(package-source cddlib))
14369 ("ocamlbuild" ,(package-with-ocaml4.07 ocamlbuild))
14370 ("pkg-config" ,pkg-config)))
14371 (propagated-inputs
14372 `(("pplacer-scripts" ,pplacer-scripts)))
14373 (synopsis "Phylogenetic placement of biological sequences")
14374 (description
14375 "Pplacer places query sequences on a fixed reference phylogenetic tree
14376 to maximize phylogenetic likelihood or posterior probability according to a
14377 reference alignment. Pplacer is designed to be fast, to give useful
14378 information about uncertainty, and to offer advanced visualization and
14379 downstream analysis.")
14380 (home-page "https://matsen.fhcrc.org/pplacer/")
14381 (license license:gpl3))))
14382
14383 ;; This package is installed alongside 'pplacer'. It is a separate package so
14384 ;; that it can use the python-build-system for the scripts that are
14385 ;; distributed alongside the main OCaml binaries.
14386 (define pplacer-scripts
14387 (package
14388 (inherit pplacer)
14389 (name "pplacer-scripts")
14390 (build-system python-build-system)
14391 (arguments
14392 `(#:python ,python-2
14393 #:phases
14394 (modify-phases %standard-phases
14395 (add-after 'unpack 'enter-scripts-dir
14396 (lambda _ (chdir "scripts") #t))
14397 (replace 'check
14398 (lambda _ (invoke "python" "-m" "unittest" "discover" "-v") #t))
14399 (add-after 'install 'wrap-executables
14400 (lambda* (#:key inputs outputs #:allow-other-keys)
14401 (let* ((out (assoc-ref outputs "out"))
14402 (bin (string-append out "/bin")))
14403 (let ((path (string-append
14404 (assoc-ref inputs "hmmer") "/bin:"
14405 (assoc-ref inputs "infernal") "/bin")))
14406 (display path)
14407 (wrap-program (string-append bin "/refpkg_align.py")
14408 `("PATH" ":" prefix (,path))))
14409 (let ((path (string-append
14410 (assoc-ref inputs "hmmer") "/bin")))
14411 (wrap-program (string-append bin "/hrefpkg_query.py")
14412 `("PATH" ":" prefix (,path)))))
14413 #t)))))
14414 (inputs
14415 `(("infernal" ,infernal)
14416 ("hmmer" ,hmmer)))
14417 (propagated-inputs
14418 `(("python-biopython" ,python2-biopython)
14419 ("taxtastic" ,taxtastic)))
14420 (synopsis "Pplacer Python scripts")))
14421
14422 (define-public python2-checkm-genome
14423 (package
14424 (name "python2-checkm-genome")
14425 (version "1.0.13")
14426 (source
14427 (origin
14428 (method url-fetch)
14429 (uri (pypi-uri "checkm-genome" version))
14430 (sha256
14431 (base32
14432 "0bm8gpxjmzxsxxl8lzwqhgx8g1dlnmp6znz7wv3hgb0gdjbf9dzz"))))
14433 (build-system python-build-system)
14434 (arguments
14435 `(#:python ,python-2
14436 #:tests? #f)) ; some tests are interactive
14437 (propagated-inputs
14438 `(("python-dendropy" ,python2-dendropy)
14439 ("python-matplotlib" ,python2-matplotlib)
14440 ("python-numpy" ,python2-numpy)
14441 ("python-pysam" ,python2-pysam)
14442 ("python-scipy" ,python2-scipy)))
14443 (home-page "https://pypi.org/project/Checkm/")
14444 (synopsis "Assess the quality of putative genome bins")
14445 (description
14446 "CheckM provides a set of tools for assessing the quality of genomes
14447 recovered from isolates, single cells, or metagenomes. It provides robust
14448 estimates of genome completeness and contamination by using collocated sets of
14449 genes that are ubiquitous and single-copy within a phylogenetic lineage.
14450 Assessment of genome quality can also be examined using plots depicting key
14451 genomic characteristics (e.g., GC, coding density) which highlight sequences
14452 outside the expected distributions of a typical genome. CheckM also provides
14453 tools for identifying genome bins that are likely candidates for merging based
14454 on marker set compatibility, similarity in genomic characteristics, and
14455 proximity within a reference genome.")
14456 (license license:gpl3+)))
14457
14458 (define-public umi-tools
14459 (package
14460 (name "umi-tools")
14461 (version "1.0.0")
14462 (source
14463 (origin
14464 (method url-fetch)
14465 (uri (pypi-uri "umi_tools" version))
14466 (sha256
14467 (base32
14468 "08y3vz1vcx09whmbsn722lcs6jl9wyrh9i4p3k8j4cb1i32bij4a"))))
14469 (build-system python-build-system)
14470 (inputs
14471 `(("python-pandas" ,python-pandas)
14472 ("python-future" ,python-future)
14473 ("python-scipy" ,python-scipy)
14474 ("python-matplotlib" ,python-matplotlib)
14475 ("python-regex" ,python-regex)
14476 ("python-pysam" ,python-pysam)))
14477 (native-inputs
14478 `(("python-cython" ,python-cython)))
14479 (home-page "https://github.com/CGATOxford/UMI-tools")
14480 (synopsis "Tools for analyzing unique modular identifiers")
14481 (description "This package provides tools for dealing with @dfn{Unique
14482 Molecular Identifiers} (UMIs) and @dfn{Random Molecular Tags} (RMTs) in
14483 genetic sequences. There are six tools: the @code{extract} and
14484 @code{whitelist} commands are used to prepare a fastq containing UMIs @code{+/-}
14485 cell barcodes for alignment. The remaining commands, @code{group},
14486 @code{dedup}, and @{count}/@code{count_tab}, are used to identify PCR
14487 duplicates using the UMIs and perform different levels of analysis depending
14488 on the needs of the user.")
14489 (license license:expat)))
14490
14491 (define-public ataqv
14492 (package
14493 (name "ataqv")
14494 (version "1.0.0")
14495 (source
14496 (origin
14497 (method git-fetch)
14498 (uri (git-reference
14499 (url "https://github.com/ParkerLab/ataqv")
14500 (commit version)))
14501 (file-name (git-file-name name version))
14502 (sha256
14503 (base32
14504 "031xr6jx1aprh26y5b1lv3gzrlmzg4alfl73vvshymx8cq8asrqi"))))
14505 (build-system gnu-build-system)
14506 (arguments
14507 `(#:make-flags
14508 (list (string-append "prefix=" (assoc-ref %outputs "out"))
14509 (string-append "BOOST_ROOT="
14510 (assoc-ref %build-inputs "boost"))
14511 (string-append "HTSLIB_ROOT="
14512 (assoc-ref %build-inputs "htslib")))
14513 #:test-target "test"
14514 #:phases
14515 (modify-phases %standard-phases
14516 (delete 'configure))))
14517 (inputs
14518 `(("boost" ,boost)
14519 ("htslib" ,htslib)
14520 ("ncurses" ,ncurses)
14521 ("zlib" ,zlib)))
14522 (native-inputs
14523 `(("lcov" ,lcov)))
14524 (home-page "https://github.com/ParkerLab/ataqv")
14525 (synopsis "Toolkit for quality control and visualization of ATAC-seq data")
14526 (description "This package provides a toolkit for measuring and comparing
14527 ATAC-seq results. It was written to make it easier to spot differences that
14528 might be caused by ATAC-seq library prep or sequencing. The main program,
14529 @code{ataqv}, examines aligned reads and reports some basic metrics.")
14530 (license license:gpl3+)))
14531
14532 (define-public r-psiplot
14533 (package
14534 (name "r-psiplot")
14535 (version "2.3.0")
14536 (source
14537 (origin
14538 (method git-fetch)
14539 (uri (git-reference
14540 (url "https://github.com/kcha/psiplot")
14541 (commit (string-append "v" version))))
14542 (file-name (git-file-name name version))
14543 (sha256
14544 (base32 "08438h16cfry5kqh3y9hs8q1b1a8bxhblsm75knviz5r6q0n1jxh"))))
14545 (build-system r-build-system)
14546 (propagated-inputs
14547 `(("r-mass" ,r-mass)
14548 ("r-dplyr" ,r-dplyr)
14549 ("r-tidyr" ,r-tidyr)
14550 ("r-purrr" ,r-purrr)
14551 ("r-readr" ,r-readr)
14552 ("r-magrittr" ,r-magrittr)
14553 ("r-ggplot2" ,r-ggplot2)))
14554 (home-page "https://github.com/kcha/psiplot")
14555 (synopsis "Plot percent spliced-in values of alternatively-spliced exons")
14556 (description
14557 "PSIplot is an R package for generating plots of @dfn{percent
14558 spliced-in} (PSI) values of alternatively-spliced exons that were computed by
14559 vast-tools, an RNA-Seq pipeline for alternative splicing analysis. The plots
14560 are generated using @code{ggplot2}.")
14561 (license license:expat)))
14562
14563 (define-public python-ont-fast5-api
14564 (package
14565 (name "python-ont-fast5-api")
14566 (version "1.4.4")
14567 (source
14568 (origin
14569 (method git-fetch)
14570 (uri (git-reference
14571 (url "https://github.com/nanoporetech/ont_fast5_api")
14572 (commit (string-append "release_" version))))
14573 (file-name (git-file-name name version))
14574 (sha256
14575 (base32
14576 "03cbq4zbbwhll8ml2m9k8sa31mirsvcbjkrq1yna0kkzz9fad5fm"))))
14577 (build-system python-build-system)
14578 (propagated-inputs
14579 `(("python-numpy" ,python-numpy)
14580 ("python-six" ,python-six)
14581 ("python-h5py" ,python-h5py)
14582 ("python-progressbar33" ,python-progressbar33)))
14583 (home-page "https://github.com/nanoporetech/ont_fast5_api")
14584 (synopsis "Interface to HDF5 files of the Oxford Nanopore fast5 file format")
14585 (description
14586 "This package provides a concrete implementation of the fast5 file schema
14587 using the generic @code{h5py} library, plain-named methods to interact with
14588 and reflect the fast5 file schema, and tools to convert between
14589 @code{multi_read} and @code{single_read} formats.")
14590 (license license:mpl2.0)))
14591
14592 (define-public tbsp
14593 (let ((commit "ec8fff4410cfb13a677dbbb95cbbc60217e64907")
14594 (revision "1"))
14595 (package
14596 (name "tbsp")
14597 (version (git-version "1.0.0" revision commit))
14598 (source
14599 (origin
14600 (method git-fetch)
14601 (uri (git-reference
14602 (url "https://github.com/phoenixding/tbsp")
14603 (commit commit)))
14604 (file-name (git-file-name name version))
14605 (sha256
14606 (base32
14607 "025ym14x8gbd6hb55lsinqj6f5qzw36i10klgs7ldzxxd7s39ki1"))))
14608 (build-system python-build-system)
14609 (arguments '(#:tests? #f)) ; no tests included
14610 (inputs
14611 `(("python-matplotlib" ,python-matplotlib)
14612 ("python-networkx" ,python-networkx)
14613 ("python-numpy" ,python-numpy)
14614 ("python-pybigwig" ,python-pybigwig)
14615 ("python-biopython" ,python-biopython)
14616 ("python-scikit-learn" ,python-scikit-learn)
14617 ("python-scipy" ,python-scipy)))
14618 (home-page "https://github.com/phoenixding/tbsp/")
14619 (synopsis "SNP-based trajectory inference")
14620 (description
14621 "Several studies focus on the inference of developmental and response
14622 trajectories from single cell RNA-Seq (scRNA-Seq) data. A number of
14623 computational methods, often referred to as pseudo-time ordering, have been
14624 developed for this task. CRISPR has also been used to reconstruct lineage
14625 trees by inserting random mutations. The tbsp package implements an
14626 alternative method to detect significant, cell type specific sequence
14627 mutations from scRNA-Seq data.")
14628 (license license:expat))))
14629
14630 (define-public tabixpp
14631 (package
14632 (name "tabixpp")
14633 (version "1.1.0")
14634 (source (origin
14635 (method git-fetch)
14636 (uri (git-reference
14637 (url "https://github.com/ekg/tabixpp")
14638 (commit (string-append "v" version))))
14639 (file-name (git-file-name name version))
14640 (sha256
14641 (base32 "1k2a3vbq96ic4lw72iwp5s3mwwc4xhdffjj584yn6l9637q9j1yd"))
14642 (modules '((guix build utils)))
14643 (snippet
14644 `(begin
14645 (delete-file-recursively "htslib") #t))))
14646 (build-system gnu-build-system)
14647 (inputs
14648 `(("htslib" ,htslib)
14649 ("zlib" ,zlib)))
14650 (arguments
14651 `(#:tests? #f ; There are no tests to run.
14652 #:phases
14653 (modify-phases %standard-phases
14654 (delete 'configure) ; There is no configure phase.
14655 ;; The build phase needs overriding the location of htslib.
14656 (replace 'build
14657 (lambda* (#:key inputs #:allow-other-keys)
14658 (let ((htslib-ref (assoc-ref inputs "htslib")))
14659 (invoke "make"
14660 (string-append "HTS_LIB=" htslib-ref "/lib/libhts.a")
14661 (string-append "INCLUDES= -I" htslib-ref "/include/htslib")
14662 "HTS_HEADERS=" ; No need to check for headers here.
14663 (string-append "LIBPATH=-L. -L" htslib-ref "/include"))
14664 (invoke "g++" "-shared" "-o" "libtabixpp.so" "tabix.o" "-lhts")
14665 (invoke "ar" "rcs" "libtabixpp.a" "tabix.o"))))
14666 (replace 'install
14667 (lambda* (#:key outputs #:allow-other-keys)
14668 (let* ((out (assoc-ref outputs "out"))
14669 (lib (string-append out "/lib"))
14670 (bin (string-append out "/bin")))
14671 (install-file "tabix++" bin)
14672 (install-file "libtabixpp.so" lib)
14673 (install-file "libtabixpp.a" lib)
14674 (install-file "tabix.hpp" (string-append out "/include"))
14675 (mkdir-p (string-append lib "/pkgconfig"))
14676 (with-output-to-file (string-append lib "/pkgconfig/tabixpp.pc")
14677 (lambda _
14678 (format #t "prefix=~a~@
14679 exec_prefix=${prefix}~@
14680 libdir=${exec_prefix}/lib~@
14681 includedir=${prefix}/include~@
14682 ~@
14683 ~@
14684 Name: libtabixpp~@
14685 Version: ~a~@
14686 Description: C++ wrapper around tabix project~@
14687 Libs: -L${libdir} -ltabixpp~@
14688 Cflags: -I${includedir}~%"
14689 out ,version)))
14690 #t))))))
14691 (home-page "https://github.com/ekg/tabixpp")
14692 (synopsis "C++ wrapper around tabix project")
14693 (description "This is a C++ wrapper around the Tabix project which abstracts
14694 some of the details of opening and jumping in tabix-indexed files.")
14695 (license license:expat)))
14696
14697 (define-public smithwaterman
14698 (let ((commit "2610e259611ae4cde8f03c72499d28f03f6d38a7"))
14699 (package
14700 (name "smithwaterman")
14701 (version (git-version "0.0.0" "2" commit))
14702 (source (origin
14703 (method git-fetch)
14704 (uri (git-reference
14705 (url "https://github.com/ekg/smithwaterman/")
14706 (commit commit)))
14707 (file-name (git-file-name name version))
14708 (sha256
14709 (base32 "0i9d8zrxpiracw3mxzd9siybpy62p06rqz9mc2w93arajgbk45bs"))))
14710 (build-system gnu-build-system)
14711 (arguments
14712 `(#:tests? #f ; There are no tests to run.
14713 #:make-flags '("libsw.a" "all")
14714 #:phases
14715 (modify-phases %standard-phases
14716 (delete 'configure) ; There is no configure phase.
14717 (add-after 'unpack 'patch-source
14718 (lambda _
14719 (substitute* "Makefile"
14720 (("-c ") "-c -fPIC "))
14721 #t))
14722 (add-after 'build 'build-dynamic
14723 (lambda _
14724 (invoke "g++"
14725 "-shared" "-o" "libsmithwaterman.so"
14726 "smithwaterman.o" "SmithWatermanGotoh.o"
14727 "disorder.o" "BandedSmithWaterman.o"
14728 "LeftAlign.o" "Repeats.o" "IndelAllele.o")))
14729 (replace 'install
14730 (lambda* (#:key outputs #:allow-other-keys)
14731 (let* ((out (assoc-ref outputs "out"))
14732 (bin (string-append out "/bin"))
14733 (lib (string-append out "/lib")))
14734 (install-file "smithwaterman" bin)
14735 (for-each
14736 (lambda (file)
14737 (install-file file (string-append out "/include/smithwaterman")))
14738 (find-files "." "\\.h$"))
14739 (install-file "libsmithwaterman.so" lib)
14740 (install-file "libsw.a" lib)
14741 (mkdir-p (string-append lib "/pkgconfig"))
14742 (with-output-to-file (string-append lib "/pkgconfig/smithwaterman.pc")
14743 (lambda _
14744 (format #t "prefix=~a~@
14745 exec_prefix=${prefix}~@
14746 libdir=${exec_prefix}/lib~@
14747 includedir=${prefix}/include/smithwaterman~@
14748 ~@
14749 ~@
14750 Name: smithwaterman~@
14751 Version: ~a~@
14752 Description: smith-waterman-gotoh alignment algorithm~@
14753 Libs: -L${libdir} -lsmithwaterman~@
14754 Cflags: -I${includedir}~%"
14755 out ,version))))
14756 #t)))))
14757 (home-page "https://github.com/ekg/smithwaterman")
14758 (synopsis "Implementation of the Smith-Waterman algorithm")
14759 (description "Implementation of the Smith-Waterman algorithm.")
14760 ;; The licensing terms are unclear: https://github.com/ekg/smithwaterman/issues/9.
14761 (license (list license:gpl2 license:expat)))))
14762
14763 (define-public multichoose
14764 (package
14765 (name "multichoose")
14766 (version "1.0.3")
14767 (source (origin
14768 (method git-fetch)
14769 (uri (git-reference
14770 (url "https://github.com/ekg/multichoose/")
14771 (commit (string-append "v" version))))
14772 (file-name (git-file-name name version))
14773 (sha256
14774 (base32 "0ci5fqvmpamwgxvmyd79ygj6n3bnbl3vc7b6h1sxz58186sm3pfs"))))
14775 (build-system gnu-build-system)
14776 (arguments
14777 `(#:tests? #f ; Tests require node.
14778 #:phases
14779 (modify-phases %standard-phases
14780 (delete 'configure) ; There is no configure phase.
14781 (replace 'install
14782 (lambda* (#:key outputs #:allow-other-keys)
14783 (let* ((out (assoc-ref outputs "out"))
14784 (bin (string-append out "/bin"))
14785 (include (string-append out "/include")))
14786 ;; TODO: There are Python modules for these programs too.
14787 (install-file "multichoose" bin)
14788 (install-file "multipermute" bin)
14789 (install-file "multichoose.h" include)
14790 (install-file "multipermute.h" include))
14791 #t)))))
14792 (home-page "https://github.com/ekg/multichoose")
14793 (synopsis "Efficient loopless multiset combination generation algorithm")
14794 (description "This library implements an efficient loopless multiset
14795 combination generation algorithm which is (approximately) described in
14796 \"Loopless algorithms for generating permutations, combinations, and other
14797 combinatorial configurations.\", G. Ehrlich - Journal of the ACM (JACM),
14798 1973. (Algorithm 7.)")
14799 (license license:expat)))
14800
14801 (define-public fsom
14802 (let ((commit "a6ef318fbd347c53189384aef7f670c0e6ce89a3"))
14803 (package
14804 (name "fsom")
14805 (version (git-version "0.0.0" "1" commit))
14806 (source (origin
14807 (method git-fetch)
14808 (uri (git-reference
14809 (url "https://github.com/ekg/fsom/")
14810 (commit commit)))
14811 (file-name (git-file-name name version))
14812 (sha256
14813 (base32 "0gw1lpvr812pywg9y546x0h1hhj261xwls41r6kqhddjlrcjc0pi"))))
14814 (build-system gnu-build-system)
14815 (arguments
14816 `(#:tests? #f ; There are no tests to run.
14817 #:phases
14818 (modify-phases %standard-phases
14819 (delete 'configure) ; There is no configure phase.
14820 (replace 'install
14821 (lambda* (#:key outputs #:allow-other-keys)
14822 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
14823 (install-file "fsom" bin))
14824 #t)))))
14825 (home-page "https://github.com/ekg/fsom")
14826 (synopsis "Manage SOM (Self-Organizing Maps) neural networks")
14827 (description "A tiny C library for managing SOM (Self-Organizing Maps)
14828 neural networks.")
14829 (license license:gpl3))))
14830
14831 (define-public fastahack
14832 (package
14833 (name "fastahack")
14834 (version "1.0.0")
14835 (source (origin
14836 (method git-fetch)
14837 (uri (git-reference
14838 (url "https://github.com/ekg/fastahack/")
14839 (commit (string-append "v" version))))
14840 (file-name (git-file-name name version))
14841 (sha256
14842 (base32 "0rp1blskhzxf7vbh253ibpxbgl9wwgyzf1wbkxndi08d3j4vcss9"))))
14843 (build-system gnu-build-system)
14844 (arguments
14845 `(#:tests? #f ; Unclear how to run tests: https://github.com/ekg/fastahack/issues/15
14846 #:phases
14847 (modify-phases %standard-phases
14848 (delete 'configure) ; There is no configure phase.
14849 (add-after 'unpack 'patch-source
14850 (lambda _
14851 (substitute* "Makefile"
14852 (("-c ") "-c -fPIC "))
14853 #t))
14854 (add-after 'build 'build-dynamic
14855 (lambda _
14856 (invoke "g++"
14857 "-shared" "-o" "libfastahack.so"
14858 "Fasta.o" "FastaHack.o" "split.o" "disorder.o")))
14859 (replace 'install
14860 (lambda* (#:key outputs #:allow-other-keys)
14861 (let* ((out (assoc-ref outputs "out"))
14862 (lib (string-append out "/lib"))
14863 (bin (string-append out "/bin")))
14864 (mkdir-p (string-append out "/include/fastahack"))
14865 (for-each
14866 (lambda (file)
14867 (install-file file (string-append out "/include/fastahack")))
14868 (find-files "." "\\.h$"))
14869 (install-file "fastahack" bin)
14870 (install-file "libfastahack.so" lib)
14871 (mkdir-p (string-append lib "/pkgconfig"))
14872 (with-output-to-file (string-append lib "/pkgconfig/fastahack.pc")
14873 (lambda _
14874 (format #t "prefix=~a~@
14875 exec_prefix=${prefix}~@
14876 libdir=${exec_prefix}/lib~@
14877 includedir=${prefix}/include/fastahack~@
14878 ~@
14879 ~@
14880 Name: fastahack~@
14881 Version: ~a~@
14882 Description: Indexing and sequence extraction from FASTA files~@
14883 Libs: -L${libdir} -lfastahack~@
14884 Cflags: -I${includedir}~%"
14885 out ,version))))
14886 #t)))))
14887 (home-page "https://github.com/ekg/fastahack")
14888 (synopsis "Indexing and sequence extraction from FASTA files")
14889 (description "Fastahack is a small application for indexing and
14890 extracting sequences and subsequences from FASTA files. The included library
14891 provides a FASTA reader and indexer that can be embedded into applications
14892 which would benefit from directly reading subsequences from FASTA files. The
14893 library automatically handles index file generation and use.")
14894 (license (list license:expat license:gpl2))))
14895
14896 (define-public vcflib
14897 (package
14898 (name "vcflib")
14899 (version "1.0.2")
14900 (source
14901 (origin
14902 (method git-fetch)
14903 (uri (git-reference
14904 (url "https://github.com/vcflib/vcflib")
14905 (commit (string-append "v" version))))
14906 (file-name (git-file-name name version))
14907 (sha256
14908 (base32 "1k1z3876kbzifj1sqfzsf3lgb4rw779hvkg6ryxbyq5bc2paj9kh"))
14909 (modules '((guix build utils)))
14910 (snippet
14911 '(begin
14912 (substitute* "CMakeLists.txt"
14913 ((".*fastahack.*") "")
14914 ((".*smithwaterman.*") "")
14915 (("(pkg_check_modules\\(TABIXPP)" text)
14916 (string-append
14917 "pkg_check_modules(FASTAHACK REQUIRED fastahack)\n"
14918 "pkg_check_modules(SMITHWATERMAN REQUIRED smithwaterman)\n"
14919 text))
14920 (("\\$\\{TABIXPP_LIBRARIES\\}" text)
14921 (string-append "${FASTAHACK_LIBRARIES} "
14922 "${SMITHWATERMAN_LIBRARIES} "
14923 text)))
14924 (substitute* (find-files "." "\\.(h|c)(pp)?$")
14925 (("\"SmithWatermanGotoh.h\"") "<smithwaterman/SmithWatermanGotoh.h>")
14926 (("\"convert.h\"") "<smithwaterman/convert.h>")
14927 (("\"disorder.h\"") "<smithwaterman/disorder.h>")
14928 (("Fasta.h") "fastahack/Fasta.h"))
14929 (for-each delete-file-recursively
14930 '("fastahack" "filevercmp" "fsom" "googletest" "intervaltree"
14931 "libVCFH" "multichoose" "smithwaterman"))
14932 #t))))
14933 (build-system cmake-build-system)
14934 (inputs
14935 `(("bzip2" ,bzip2)
14936 ("htslib" ,htslib)
14937 ("fastahack" ,fastahack)
14938 ("perl" ,perl)
14939 ("python" ,python)
14940 ("smithwaterman" ,smithwaterman)
14941 ("tabixpp" ,tabixpp)
14942 ("xz" ,xz)
14943 ("zlib" ,zlib)))
14944 (native-inputs
14945 `(("pkg-config" ,pkg-config)
14946 ;; Submodules.
14947 ;; This package builds against the .o files so we need to extract the source.
14948 ("filevercmp-src" ,(package-source filevercmp))
14949 ("fsom-src" ,(package-source fsom))
14950 ("intervaltree-src" ,(package-source intervaltree))
14951 ("multichoose-src" ,(package-source multichoose))))
14952 (arguments
14953 `(#:tests? #f ; no tests
14954 #:phases
14955 (modify-phases %standard-phases
14956 (add-after 'unpack 'build-shared-library
14957 (lambda _
14958 (substitute* "CMakeLists.txt"
14959 (("vcflib STATIC") "vcflib SHARED"))
14960 (substitute* "test/Makefile"
14961 (("libvcflib.a") "libvcflib.so"))
14962 #t))
14963 (add-after 'unpack 'unpack-submodule-sources
14964 (lambda* (#:key inputs #:allow-other-keys)
14965 (let ((unpack (lambda (source target)
14966 (mkdir target)
14967 (with-directory-excursion target
14968 (if (file-is-directory? (assoc-ref inputs source))
14969 (copy-recursively (assoc-ref inputs source) ".")
14970 (invoke "tar" "xvf"
14971 (assoc-ref inputs source)
14972 "--strip-components=1"))))))
14973 (and
14974 (unpack "filevercmp-src" "filevercmp")
14975 (unpack "fsom-src" "fsom")
14976 (unpack "intervaltree-src" "intervaltree")
14977 (unpack "multichoose-src" "multichoose"))
14978 #t)))
14979 ;; This pkg-config file is provided by other distributions.
14980 (add-after 'install 'install-pkg-config-file
14981 (lambda* (#:key outputs #:allow-other-keys)
14982 (let* ((out (assoc-ref outputs "out"))
14983 (pkgconfig (string-append out "/lib/pkgconfig")))
14984 (mkdir-p pkgconfig)
14985 (with-output-to-file (string-append pkgconfig "/vcflib.pc")
14986 (lambda _
14987 (format #t "prefix=~a~@
14988 exec_prefix=${prefix}~@
14989 libdir=${exec_prefix}/lib~@
14990 includedir=${prefix}/include~@
14991 ~@
14992 Name: vcflib~@
14993 Version: ~a~@
14994 Requires: smithwaterman, fastahack, tabixpp~@
14995 Description: C++ library for parsing and manipulating VCF files~@
14996 Libs: -L${libdir} -lvcflib~@
14997 Cflags: -I${includedir}~%"
14998 out ,version)))
14999 #t))))))
15000 (home-page "https://github.com/vcflib/vcflib/")
15001 (synopsis "Library for parsing and manipulating VCF files")
15002 (description "Vcflib provides methods to manipulate and interpret
15003 sequence variation as it can be described by VCF. It is both an API for parsing
15004 and operating on records of genomic variation as it can be described by the VCF
15005 format, and a collection of command-line utilities for executing complex
15006 manipulations on VCF files.")
15007 (license license:expat)))
15008
15009 (define-public freebayes
15010 (package
15011 (name "freebayes")
15012 (version "1.3.3")
15013 (source (origin
15014 (method git-fetch)
15015 (uri (git-reference
15016 (url "https://github.com/freebayes/freebayes")
15017 (commit (string-append "v" version))))
15018 (file-name (git-file-name name version))
15019 (sha256
15020 (base32 "0myz3giad7jqp6ricdfnig9ymlcps2h67mlivadvx97ngagm85z8"))
15021 (patches (search-patches "freebayes-devendor-deps.patch"))
15022 (modules '((guix build utils)))
15023 (snippet
15024 '(begin
15025 (delete-file-recursively "contrib/htslib")
15026 #t))))
15027 (build-system meson-build-system)
15028 (inputs
15029 `(("fastahack" ,fastahack)
15030 ("htslib" ,htslib)
15031 ("smithwaterman" ,smithwaterman)
15032 ("tabixpp" ,tabixpp)
15033 ("vcflib" ,vcflib)
15034 ("zlib" ,zlib)))
15035 (native-inputs
15036 `(("bash-tap" ,bash-tap)
15037 ("bc" ,bc)
15038 ("grep" ,grep) ; Built with perl support.
15039 ("parallel" ,parallel)
15040 ("perl" ,perl)
15041 ("pkg-config" ,pkg-config)
15042 ("samtools" ,samtools)
15043 ("simde" ,simde)
15044 ;; This submodule is needed to run the tests.
15045 ("test-simple-bash-src"
15046 ,(origin
15047 (method git-fetch)
15048 (uri (git-reference
15049 (url "https://github.com/ingydotnet/test-simple-bash/")
15050 (commit "124673ff204b01c8e96b7fc9f9b32ee35d898acc")))
15051 (file-name "test-simple-bash-src-checkout")
15052 (sha256
15053 (base32 "043plp6z0x9yf7mdpky1fw7zcpwn1p47px95w9mh16603zqqqpga"))))))
15054 (arguments
15055 `(#:phases
15056 (modify-phases %standard-phases
15057 (add-after 'unpack 'patch-source
15058 (lambda* (#:key inputs #:allow-other-keys)
15059 (let ((bash-tap (assoc-ref inputs "bash-tap")))
15060 (substitute* (find-files "test/t")
15061 (("BASH_TAP_ROOT=bash-tap")
15062 (string-append "BASH_TAP_ROOT=" bash-tap "/bin"))
15063 (("bash-tap/bash-tap-bootstrap")
15064 (string-append bash-tap "/bin/bash-tap-bootstrap"))
15065 (("source.*bash-tap-bootstrap")
15066 (string-append "source " bash-tap "/bin/bash-tap-bootstrap")))
15067 (substitute* "meson.build"
15068 ;; Some inputs aren't actually needed.
15069 ((".*bamtools/src.*") "")
15070 ((".*multichoose.*") ""))
15071 (substitute* '("src/BedReader.cpp"
15072 "src/BedReader.h")
15073 (("../intervaltree/IntervalTree.h") "IntervalTree.h"))
15074 #t)))
15075 (add-after 'unpack 'unpack-submodule-sources
15076 (lambda* (#:key inputs #:allow-other-keys)
15077 (mkdir-p "test/test-simple-bash")
15078 (copy-recursively (assoc-ref inputs "test-simple-bash-src")
15079 "test/test-simple-bash")
15080 #t))
15081 ;; The slow tests take longer than the specified timeout.
15082 ,@(if (any (cute string=? <> (%current-system))
15083 '("armhf-linux" "aarch64-linux"))
15084 '((replace 'check
15085 (lambda* (#:key tests? #:allow-other-keys)
15086 (when tests?
15087 (invoke "meson" "test" "--timeout-multiplier" "5"))
15088 #t)))
15089 '()))))
15090 (home-page "https://github.com/freebayes/freebayes")
15091 (synopsis "Haplotype-based variant detector")
15092 (description "FreeBayes is a Bayesian genetic variant detector designed to
15093 find small polymorphisms, specifically SNPs (single-nucleotide polymorphisms),
15094 indels (insertions and deletions), MNPs (multi-nucleotide polymorphisms), and
15095 complex events (composite insertion and substitution events) smaller than the
15096 length of a short-read sequencing alignment.")
15097 (license license:expat)))
15098
15099 (define-public samblaster
15100 (package
15101 (name "samblaster")
15102 (version "0.1.24")
15103 (source (origin
15104 (method git-fetch)
15105 (uri (git-reference
15106 (url "https://github.com/GregoryFaust/samblaster")
15107 (commit (string-append "v." version))))
15108 (file-name (git-file-name name version))
15109 (sha256
15110 (base32
15111 "0iv2ddfw8363vb2x8gr3p8g88whb6mb9m0pf71i2cqsbv6jghap7"))))
15112 (build-system gnu-build-system)
15113 (arguments
15114 `(#:tests? #f ; there are none
15115 #:phases
15116 (modify-phases %standard-phases
15117 (delete 'configure) ; There is no configure phase.
15118 (replace 'install
15119 (lambda* (#:key outputs #:allow-other-keys)
15120 (install-file "samblaster"
15121 (string-append (assoc-ref outputs "out") "/bin"))
15122 #t)))))
15123 (home-page "https://github.com/GregoryFaust/samblaster")
15124 (synopsis "Mark duplicates in paired-end SAM files")
15125 (description "Samblaster is a fast and flexible program for marking
15126 duplicates in read-id grouped paired-end SAM files. It can also optionally
15127 output discordant read pairs and/or split read mappings to separate SAM files,
15128 and/or unmapped/clipped reads to a separate FASTQ file. When marking
15129 duplicates, samblaster will require approximately 20MB of memory per 1M read
15130 pairs.")
15131 (license license:expat)))
15132
15133 (define-public r-velocyto
15134 (let ((commit "d7790346cb99f49ab9c2b23ba70dcf9d2c9fc350")
15135 (revision "1"))
15136 (package
15137 (name "r-velocyto")
15138 (version (git-version "0.6" revision commit))
15139 (source
15140 (origin
15141 (method git-fetch)
15142 (uri (git-reference
15143 (url "https://github.com/velocyto-team/velocyto.R")
15144 (commit commit)))
15145 (file-name (git-file-name name version))
15146 (sha256
15147 (base32
15148 "16wqf70j7rd7pay2q513iyz12i8n9vrpg1bisah4lddbcpx5dz1n"))))
15149 (build-system r-build-system)
15150 (inputs
15151 `(("boost" ,boost)))
15152 (propagated-inputs
15153 `(("r-hdf5r" ,r-hdf5r)
15154 ("r-mass" ,r-mass)
15155 ("r-mgcv" ,r-mgcv)
15156 ("r-pcamethods" ,r-pcamethods)
15157 ("r-rcpp" ,r-rcpp)
15158 ("r-rcpparmadillo" ,r-rcpparmadillo)
15159 ;; Suggested packages
15160 ("r-rtsne" ,r-rtsne)
15161 ("r-cluster" ,r-cluster)
15162 ("r-abind" ,r-abind)
15163 ("r-h5" ,r-h5)
15164 ("r-biocgenerics" ,r-biocgenerics)
15165 ("r-genomicalignments" ,r-genomicalignments)
15166 ("r-rsamtools" ,r-rsamtools)
15167 ("r-edger" ,r-edger)
15168 ("r-igraph" ,r-igraph)))
15169 (home-page "https://velocyto.org")
15170 (synopsis "RNA velocity estimation in R")
15171 (description
15172 "This package provides basic routines for estimation of gene-specific
15173 transcriptional derivatives and visualization of the resulting velocity
15174 patterns.")
15175 (license license:gpl3))))
15176
15177 (define-public methyldackel
15178 (package
15179 (name "methyldackel")
15180 (version "0.5.1")
15181 (source (origin
15182 (method git-fetch)
15183 (uri (git-reference
15184 (url "https://github.com/dpryan79/MethylDackel")
15185 (commit version)))
15186 (file-name (git-file-name name version))
15187 (sha256
15188 (base32
15189 "1sfhf2ap75qxpnmy1ifgmxqs18rq8mah9mcgkby73vc6h0sw99ws"))))
15190 (build-system gnu-build-system)
15191 (arguments
15192 `(#:test-target "test"
15193 #:make-flags
15194 (list "CC=gcc"
15195 (string-append "prefix="
15196 (assoc-ref %outputs "out") "/bin/"))
15197 #:phases
15198 (modify-phases %standard-phases
15199 (replace 'configure
15200 (lambda* (#:key outputs #:allow-other-keys)
15201 (substitute* "Makefile"
15202 (("-lhts ") "-lhts -lBigWig ")
15203 (("install MethylDackel \\$\\(prefix\\)" match)
15204 (string-append "install -d $(prefix); " match)))
15205 #t)))))
15206 (inputs
15207 `(("curl" ,curl) ; XXX: needed by libbigwig
15208 ("htslib" ,htslib-1.9)
15209 ("libbigwig" ,libbigwig)
15210 ("zlib" ,zlib)))
15211 ;; Needed for tests
15212 (native-inputs
15213 `(("python" ,python-wrapper)))
15214 (home-page "https://github.com/dpryan79/MethylDackel")
15215 (synopsis "Universal methylation extractor for BS-seq experiments")
15216 (description
15217 "MethylDackel will process a coordinate-sorted and indexed BAM or CRAM
15218 file containing some form of BS-seq alignments and extract per-base
15219 methylation metrics from them. MethylDackel requires an indexed fasta file
15220 containing the reference genome as well.")
15221 ;; See https://github.com/dpryan79/MethylDackel/issues/85
15222 (license license:expat)))
15223
15224 ;; This package bundles PCRE 8.02 and cannot be built with the current
15225 ;; version.
15226 (define-public phast
15227 (package
15228 (name "phast")
15229 (version "1.5")
15230 (source (origin
15231 (method git-fetch)
15232 (uri (git-reference
15233 (url "https://github.com/CshlSiepelLab/phast")
15234 (commit (string-append "v" version))))
15235 (file-name (git-file-name name version))
15236 (sha256
15237 (base32
15238 "10lpbllvny923jjbbyrpxahhd1m5h7sbj9gx7rd123rg10mlidki"))))
15239 (build-system gnu-build-system)
15240 (arguments
15241 `(#:make-flags
15242 (list "CC=gcc"
15243 (string-append "DESTDIR=" (assoc-ref %outputs "out")))
15244 #:phases
15245 (modify-phases %standard-phases
15246 (replace 'configure
15247 (lambda* (#:key inputs outputs #:allow-other-keys)
15248 ;; Fix syntax
15249 (substitute* "test/Makefile"
15250 ((" ") " "))
15251 (substitute* "Makefile"
15252 (("CLAPACKPATH=/usr/lib")
15253 (string-append "CLAPACKPATH="
15254 (assoc-ref inputs "clapack") "/lib")))
15255 ;; Renaming the libraries is not necessary with our version of
15256 ;; CLAPACK.
15257 (substitute* "src/lib/Makefile"
15258 (("ifdef CLAPACKPATH") "ifdef UNNECESSARY"))
15259 (substitute* "src/make-include.mk"
15260 (("-lblaswr") "-lblas")
15261 (("-ltmg") "-ltmglib")
15262 (("liblapack.a") "liblapack.so")
15263 (("libblas.a") "libblas.so")
15264 (("libf2c.a") "libf2c.so"))
15265 (substitute* "src/Makefile"
15266 (("/opt") "/share")
15267 (("/usr/") "/"))
15268 #t))
15269 (replace 'check
15270 (lambda _
15271 (setenv "PATH"
15272 (string-append (getcwd) "/bin:" (getenv "PATH")))
15273 ;; Disable broken test
15274 (substitute* "test/Makefile"
15275 ((".*if.*hmrc_summary" m) (string-append "#" m)))
15276 ;; Only run the msa_view tests because the others fail for
15277 ;; unknown reasons.
15278 (invoke "make" "-C" "test" "msa_view"))))))
15279 (inputs
15280 `(("clapack" ,clapack)))
15281 (native-inputs
15282 `(("perl" ,perl)))
15283 (home-page "http://compgen.cshl.edu/phast/")
15284 (synopsis "Phylogenetic analysis with space/time models")
15285 (description
15286 "Phylogenetic Analysis with Space/Time models (PHAST) is a collection of
15287 command-line programs and supporting libraries for comparative and
15288 evolutionary genomics. Best known as the search engine behind the
15289 Conservation tracks in the University of California, Santa Cruz (UCSC) Genome
15290 Browser, PHAST also includes several tools for phylogenetic modeling,
15291 functional element identification, as well as utilities for manipulating
15292 alignments, trees and genomic annotations.")
15293 (license license:bsd-3)))
15294
15295 (define-public python-gffutils
15296 ;; The latest release is older more than a year than the latest commit
15297 (let ((commit "4034c54600813b1402945e12faa91b3a53162cf1")
15298 (revision "1"))
15299 (package
15300 (name "python-gffutils")
15301 (version (git-version "0.9" revision commit))
15302 (source
15303 (origin
15304 (method git-fetch)
15305 (uri (git-reference
15306 (url "https://github.com/daler/gffutils")
15307 (commit commit)))
15308 (file-name (git-file-name name version))
15309 (sha256
15310 (base32
15311 "1rwafjdnbir5wnk0ap06ww4lra3p5frhy7mfs03rlldgfnwxymsn"))))
15312 (build-system python-build-system)
15313 (arguments
15314 `(#:phases
15315 (modify-phases %standard-phases
15316 (replace 'check
15317 (lambda _
15318 ;; Tests need to access the HOME directory
15319 (setenv "HOME" "/tmp")
15320 (invoke "nosetests" "-a" "!slow")))
15321 (add-after 'unpack 'make-gz-files-writable
15322 (lambda _
15323 (for-each make-file-writable
15324 (find-files "." "\\.gz"))
15325 #t)))))
15326 (propagated-inputs
15327 `(("python-argcomplete" ,python-argcomplete)
15328 ("python-argh" ,python-argh)
15329 ("python-biopython" ,python-biopython)
15330 ("python-pybedtools" ,python-pybedtools)
15331 ("python-pyfaidx" ,python-pyfaidx)
15332 ("python-simplejson" ,python-simplejson)
15333 ("python-six" ,python-six)))
15334 (native-inputs
15335 `(("python-nose" , python-nose)))
15336 (home-page "https://github.com/daler/gffutils")
15337 (synopsis "Tool for manipulation of GFF and GTF files")
15338 (description
15339 "python-gffutils is a Python package for working with and manipulating
15340 the GFF and GTF format files typically used for genomic annotations. The
15341 files are loaded into a SQLite database, allowing much more complex
15342 manipulation of hierarchical features (e.g., genes, transcripts, and exons)
15343 than is possible with plain-text methods alone.")
15344 (license license:expat))))
15345
15346 (define-public indelfixer
15347 (package
15348 (name "indelfixer")
15349 (version "1.1")
15350 (source (origin
15351 (method git-fetch)
15352 (uri (git-reference
15353 (url "https://github.com/cbg-ethz/InDelFixer/")
15354 (commit (string-append "v" version))))
15355 (file-name (git-file-name name version))
15356 (sha256
15357 (base32
15358 "10ak05x8i1bx2p7rriv2rglqg1wr7c8wrhjrqlq1wm7ka99w8i79"))))
15359 (build-system ant-build-system)
15360 (arguments
15361 `(#:jar-name "InDelFixer.jar"
15362 #:source-dir "src/main/java"
15363 #:test-dir "src/test"))
15364 (inputs
15365 `(("java-commons-lang2" ,java-commons-lang)
15366 ("java-args4j" ,java-args4j)))
15367 (native-inputs
15368 `(("java-junit" ,java-junit)))
15369 (home-page "https://github.com/cbg-ethz/InDelFixer/")
15370 (synopsis "Iterative and sensitive NGS sequence aligner")
15371 (description "InDelFixer is a sensitive aligner for 454, Illumina and
15372 PacBio data, employing a full Smith-Waterman alignment against a reference.
15373 This Java command line application aligns Next-Generation Sequencing (NGS) and
15374 third-generation reads to a set of reference sequences, by a prior fast k-mer
15375 matching and removes indels, causing frame shifts. In addition, only a
15376 specific region can be considered. An iterative refinement of the alignment
15377 can be performed, by alignment against the consensus sequence with wobbles.
15378 The output is in SAM format.")
15379 (license license:gpl3+)))
15380
15381 (define-public libsbml
15382 (package
15383 (name "libsbml")
15384 (version "5.18.0")
15385 (source (origin
15386 (method url-fetch)
15387 (uri (string-append "mirror://sourceforge/sbml/libsbml/"
15388 version "/stable/libSBML-"
15389 version "-core-src.tar.gz"))
15390 (sha256
15391 (base32
15392 "0slkagrk3nfi2qsksv6b1brj6zhx4bj4bkib2sdycvrcd10ql2lh"))))
15393 (build-system cmake-build-system)
15394 (arguments
15395 `(#:test-target "test"
15396 #:configure-flags
15397 (list "-DWITH_CHECK=ON"
15398 (string-append "-DLIBXML_LIBRARY="
15399 (assoc-ref %build-inputs "libxml2")
15400 "/lib/libxml2.so")
15401 (string-append "-DLIBXML_INCLUDE_DIR="
15402 (assoc-ref %build-inputs "libxml2")
15403 "/include/libxml2"))))
15404 (propagated-inputs
15405 `(("libxml2" ,libxml2)))
15406 (native-inputs
15407 `(("check" ,check-0.14)
15408 ("swig" ,swig)))
15409 (home-page "http://sbml.org/Software/libSBML")
15410 (synopsis "Process SBML files and data streams")
15411 (description "LibSBML is a library to help you read, write, manipulate,
15412 translate, and validate SBML files and data streams. The @dfn{Systems Biology
15413 Markup Language} (SBML) is an interchange format for computer models of
15414 biological processes. SBML is useful for models of metabolism, cell
15415 signaling, and more. It continues to be evolved and expanded by an
15416 international community.")
15417 (license license:lgpl2.1+)))