Merge branch 'core-updates'
[jackhill/guix/guix.git] / gnu / packages / bioinformatics.scm
CommitLineData
4e10a221 1;;; GNU Guix --- Functional package management for GNU
0047d26a 2;;; Copyright © 2014, 2015, 2016 Ricardo Wurmus <rekado@elephly.net>
9b9b7ffd 3;;; Copyright © 2015, 2016 Ben Woodcroft <donttrustben@gmail.com>
a5002ae7
AE
4;;; Copyright © 2015 Pjotr Prins <pjotr.guix@thebird.nl>
5;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
4e10a221
RW
6;;;
7;;; This file is part of GNU Guix.
8;;;
9;;; GNU Guix is free software; you can redistribute it and/or modify it
10;;; under the terms of the GNU General Public License as published by
11;;; the Free Software Foundation; either version 3 of the License, or (at
12;;; your option) any later version.
13;;;
14;;; GNU Guix is distributed in the hope that it will be useful, but
15;;; WITHOUT ANY WARRANTY; without even the implied warranty of
16;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17;;; GNU General Public License for more details.
18;;;
19;;; You should have received a copy of the GNU General Public License
20;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
21
22(define-module (gnu packages bioinformatics)
23 #:use-module ((guix licenses) #:prefix license:)
24 #:use-module (guix packages)
8e913213 25 #:use-module (guix utils)
4e10a221 26 #:use-module (guix download)
2c16316e 27 #:use-module (guix git-download)
4e10a221 28 #:use-module (guix build-system gnu)
d7678942 29 #:use-module (guix build-system cmake)
365c8153 30 #:use-module (guix build-system perl)
8622a072 31 #:use-module (guix build-system python)
a5002ae7 32 #:use-module (guix build-system r)
9c38b540 33 #:use-module (guix build-system ruby)
d3517eda 34 #:use-module (guix build-system trivial)
4e10a221 35 #:use-module (gnu packages)
a2950fa4 36 #:use-module (gnu packages autotools)
684bf7c7 37 #:use-module (gnu packages algebra)
d3517eda 38 #:use-module (gnu packages base)
e4e5a4d8 39 #:use-module (gnu packages boost)
4e10a221 40 #:use-module (gnu packages compression)
82c370de 41 #:use-module (gnu packages cpio)
75dd2424 42 #:use-module (gnu packages file)
02f35bb5 43 #:use-module (gnu packages gawk)
2409f37f 44 #:use-module (gnu packages gcc)
15a3c3d4 45 #:use-module (gnu packages java)
51c64999 46 #:use-module (gnu packages linux)
36742f43 47 #:use-module (gnu packages machine-learning)
c833ab55 48 #:use-module (gnu packages maths)
6c2b26e2 49 #:use-module (gnu packages mpi)
4e10a221 50 #:use-module (gnu packages ncurses)
81f3e0c1 51 #:use-module (gnu packages pcre)
4e10a221
RW
52 #:use-module (gnu packages perl)
53 #:use-module (gnu packages pkg-config)
bfe3c685 54 #:use-module (gnu packages popt)
e4e5a4d8 55 #:use-module (gnu packages protobuf)
346a829a 56 #:use-module (gnu packages python)
9c38b540 57 #:use-module (gnu packages ruby)
c833ab55 58 #:use-module (gnu packages statistics)
d7678942 59 #:use-module (gnu packages tbb)
2127cedb 60 #:use-module (gnu packages textutils)
43c565d2 61 #:use-module (gnu packages time)
a2950fa4 62 #:use-module (gnu packages tls)
ce7155d5 63 #:use-module (gnu packages vim)
365c8153 64 #:use-module (gnu packages web)
c833ab55 65 #:use-module (gnu packages xml)
f7283db3
RW
66 #:use-module (gnu packages zip)
67 #:use-module (srfi srfi-1))
4e10a221 68
8dc797fa
BW
69(define-public aragorn
70 (package
71 (name "aragorn")
72 (version "1.2.36")
73 (source (origin
74 (method url-fetch)
75 (uri (string-append
76 "http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
77 version ".tgz"))
78 (sha256
79 (base32
80 "1dg7jlz1qpqy88igjxd6ncs11ccsirb36qv1z01a0np4i4jh61mb"))))
81 (build-system gnu-build-system)
82 (arguments
83 `(#:tests? #f ; there are no tests
84 #:phases
85 (modify-phases %standard-phases
86 (delete 'configure)
87 (replace 'build
88 (lambda _
89 (zero? (system* "gcc"
90 "-O3"
91 "-ffast-math"
92 "-finline-functions"
93 "-o"
94 "aragorn"
95 (string-append "aragorn" ,version ".c")))))
96 (replace 'install
97 (lambda* (#:key outputs #:allow-other-keys)
98 (let* ((out (assoc-ref outputs "out"))
99 (bin (string-append out "/bin"))
100 (man (string-append out "/share/man/man1")))
101 (mkdir-p bin)
102 (copy-file "aragorn"
103 (string-append bin "/aragorn"))
104 (mkdir-p man)
105 (copy-file "aragorn.1"
106 (string-append man "/aragorn.1")))
107 #t)))))
108 (home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
109 (synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
110 (description
111 "Aragorn identifies transfer RNA, mitochondrial RNA and
112transfer-messenger RNA from nucleotide sequences, based on homology to known
113tRNA consensus sequences and RNA structure. It also outputs the secondary
114structure of the predicted RNA.")
115 (license license:gpl2)))
116
9794180d
RW
117(define-public bamtools
118 (package
119 (name "bamtools")
120 (version "2.3.0")
121 (source (origin
122 (method url-fetch)
123 (uri (string-append
124 "https://github.com/pezmaster31/bamtools/archive/v"
125 version ".tar.gz"))
126 (file-name (string-append name "-" version ".tar.gz"))
127 (sha256
128 (base32
129 "1brry29bw2xr2l9pqn240rkqwayg85b8qq78zk2zs6nlspk4d018"))))
130 (build-system cmake-build-system)
4702cec2
RW
131 (arguments
132 `(#:tests? #f ;no "check" target
133 #:phases
134 (modify-phases %standard-phases
135 (add-before
136 'configure 'set-ldflags
137 (lambda* (#:key outputs #:allow-other-keys)
138 (setenv "LDFLAGS"
139 (string-append
140 "-Wl,-rpath="
141 (assoc-ref outputs "out") "/lib/bamtools")))))))
9794180d
RW
142 (inputs `(("zlib" ,zlib)))
143 (home-page "https://github.com/pezmaster31/bamtools")
144 (synopsis "C++ API and command-line toolkit for working with BAM data")
145 (description
146 "BamTools provides both a C++ API and a command-line toolkit for handling
147BAM files.")
148 (license license:expat)))
149
8dd4ff11
RW
150(define-public bedops
151 (package
152 (name "bedops")
1bbc3b1d 153 (version "2.4.14")
8dd4ff11
RW
154 (source (origin
155 (method url-fetch)
156 (uri (string-append "https://github.com/bedops/bedops/archive/v"
157 version ".tar.gz"))
f586c877 158 (file-name (string-append name "-" version ".tar.gz"))
8dd4ff11
RW
159 (sha256
160 (base32
1bbc3b1d 161 "1kqbac547wyqma81cyky9n7mkgikjpsfd3nnmcm6hpqwanqgh10v"))))
8dd4ff11
RW
162 (build-system gnu-build-system)
163 (arguments
164 '(#:tests? #f
165 #:make-flags (list (string-append "BINDIR=" %output "/bin"))
166 #:phases
167 (alist-cons-after
168 'unpack 'unpack-tarballs
169 (lambda _
170 ;; FIXME: Bedops includes tarballs of minimally patched upstream
171 ;; libraries jansson, zlib, and bzip2. We cannot just use stock
172 ;; libraries because at least one of the libraries (zlib) is
173 ;; patched to add a C++ function definition (deflateInit2cpp).
174 ;; Until the Bedops developers offer a way to link against system
175 ;; libraries we have to build the in-tree copies of these three
176 ;; libraries.
177
178 ;; See upstream discussion:
179 ;; https://github.com/bedops/bedops/issues/124
180
181 ;; Unpack the tarballs to benefit from shebang patching.
182 (with-directory-excursion "third-party"
183 (and (zero? (system* "tar" "xvf" "jansson-2.6.tar.bz2"))
184 (zero? (system* "tar" "xvf" "zlib-1.2.7.tar.bz2"))
185 (zero? (system* "tar" "xvf" "bzip2-1.0.6.tar.bz2"))))
186 ;; Disable unpacking of tarballs in Makefile.
187 (substitute* "system.mk/Makefile.linux"
188 (("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
189 (("\\./configure") "CONFIG_SHELL=bash ./configure"))
190 (substitute* "third-party/zlib-1.2.7/Makefile.in"
191 (("^SHELL=.*$") "SHELL=bash\n")))
192 (alist-delete 'configure %standard-phases))))
193 (home-page "https://github.com/bedops/bedops")
194 (synopsis "Tools for high-performance genomic feature operations")
195 (description
196 "BEDOPS is a suite of tools to address common questions raised in genomic
197studies---mostly with regard to overlap and proximity relationships between
198data sets. It aims to be scalable and flexible, facilitating the efficient
199and accurate analysis and management of large-scale genomic data.
200
201BEDOPS provides tools that perform highly efficient and scalable Boolean and
202other set operations, statistical calculations, archiving, conversion and
203other management of genomic data of arbitrary scale. Tasks can be easily
204split by chromosome for distributing whole-genome analyses across a
205computational cluster.")
206 (license license:gpl2+)))
207
81de5647
RW
208(define-public bedtools
209 (package
210 (name "bedtools")
9b9b7ffd 211 (version "2.25.0")
81de5647
RW
212 (source (origin
213 (method url-fetch)
214 (uri (string-append "https://github.com/arq5x/bedtools2/archive/v"
215 version ".tar.gz"))
f586c877 216 (file-name (string-append name "-" version ".tar.gz"))
81de5647
RW
217 (sha256
218 (base32
9b9b7ffd 219 "1ywcy3yfwzhl905b51l0ffjia55h75vv3mw5xkvib04pp6pj548m"))))
81de5647
RW
220 (build-system gnu-build-system)
221 (native-inputs `(("python" ,python-2)))
222 (inputs `(("samtools" ,samtools)
223 ("zlib" ,zlib)))
224 (arguments
225 '(#:test-target "test"
226 #:phases
6573ac82 227 (modify-phases %standard-phases
6573ac82
BW
228 (delete 'configure)
229 (replace 'install
230 (lambda* (#:key outputs #:allow-other-keys)
231 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
232 (for-each (lambda (file)
233 (install-file file bin))
234 (find-files "bin" ".*")))
235 #t)))))
81de5647
RW
236 (home-page "https://github.com/arq5x/bedtools2")
237 (synopsis "Tools for genome analysis and arithmetic")
238 (description
239 "Collectively, the bedtools utilities are a swiss-army knife of tools for
240a wide-range of genomics analysis tasks. The most widely-used tools enable
241genome arithmetic: that is, set theory on the genome. For example, bedtools
242allows one to intersect, merge, count, complement, and shuffle genomic
243intervals from multiple files in widely-used genomic file formats such as BAM,
244BED, GFF/GTF, VCF.")
245 (license license:gpl2)))
246
a2fb1492
RW
247(define-public python2-pybedtools
248 (package
249 (name "python2-pybedtools")
250 (version "0.6.9")
251 (source (origin
252 (method url-fetch)
253 (uri (string-append
254 "https://pypi.python.org/packages/source/p/pybedtools/pybedtools-"
255 version ".tar.gz"))
256 (sha256
257 (base32
258 "1ldzdxw1p4y3g2ignmggsdypvqkcwqwzhdha4rbgpih048z5p4an"))))
259 (build-system python-build-system)
260 (arguments `(#:python ,python-2)) ; no Python 3 support
261 (inputs
262 `(("python-cython" ,python2-cython)
263 ("python-matplotlib" ,python2-matplotlib)))
264 (propagated-inputs
265 `(("bedtools" ,bedtools)
266 ("samtools" ,samtools)))
267 (native-inputs
268 `(("python-pyyaml" ,python2-pyyaml)
269 ("python-nose" ,python2-nose)
270 ("python-setuptools" ,python2-setuptools)))
271 (home-page "https://pythonhosted.org/pybedtools/")
272 (synopsis "Python wrapper for BEDtools programs")
273 (description
274 "pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
275which are widely used for genomic interval manipulation or \"genome algebra\".
276pybedtools extends BEDTools by offering feature-level manipulations from with
277Python.")
278 (license license:gpl2+)))
279
f7283db3
RW
280(define-public bioperl-minimal
281 (let* ((inputs `(("perl-module-build" ,perl-module-build)
282 ("perl-data-stag" ,perl-data-stag)
283 ("perl-libwww" ,perl-libwww)
284 ("perl-uri" ,perl-uri)))
285 (transitive-inputs
286 (map (compose package-name cadr)
287 (delete-duplicates
288 (concatenate
289 (map (compose package-transitive-target-inputs cadr) inputs))))))
290 (package
291 (name "bioperl-minimal")
292 (version "1.6.924")
293 (source
294 (origin
295 (method url-fetch)
296 (uri (string-append "mirror://cpan/authors/id/C/CJ/CJFIELDS/BioPerl-"
297 version ".tar.gz"))
298 (sha256
299 (base32
300 "1l3npcvvvwjlhkna9dndpfv1hklhrgva013kw96m0n1wpd37ask1"))))
301 (build-system perl-build-system)
302 (arguments
303 `(#:phases
304 (modify-phases %standard-phases
305 (add-after
306 'install 'wrap-programs
307 (lambda* (#:key outputs #:allow-other-keys)
308 ;; Make sure all executables in "bin" find the required Perl
309 ;; modules at runtime. As the PERL5LIB variable contains also
310 ;; the paths of native inputs, we pick the transitive target
311 ;; inputs from %build-inputs.
312 (let* ((out (assoc-ref outputs "out"))
313 (bin (string-append out "/bin/"))
314 (path (string-join
315 (cons (string-append out "/lib/perl5/site_perl")
316 (map (lambda (name)
317 (assoc-ref %build-inputs name))
318 ',transitive-inputs))
319 ":")))
320 (for-each (lambda (file)
321 (wrap-program file
322 `("PERL5LIB" ":" prefix (,path))))
323 (find-files bin "\\.pl$"))
324 #t))))))
325 (inputs inputs)
326 (native-inputs
327 `(("perl-test-most" ,perl-test-most)))
328 (home-page "http://search.cpan.org/dist/BioPerl")
329 (synopsis "Bioinformatics toolkit")
330 (description
331 "BioPerl is the product of a community effort to produce Perl code which
332is useful in biology. Examples include Sequence objects, Alignment objects
333and database searching objects. These objects not only do what they are
334advertised to do in the documentation, but they also interact - Alignment
335objects are made from the Sequence objects, Sequence objects have access to
336Annotation and SeqFeature objects and databases, Blast objects can be
337converted to Alignment objects, and so on. This means that the objects
338provide a coordinated and extensible framework to do computational biology.")
339 (license (package-license perl)))))
340
85c37e29
RW
341(define-public python-biopython
342 (package
343 (name "python-biopython")
e815c094 344 (version "1.66")
85c37e29
RW
345 (source (origin
346 (method url-fetch)
e815c094
BW
347 ;; use PyPi rather than biopython.org to ease updating
348 (uri (pypi-uri "biopython" version))
85c37e29
RW
349 (sha256
350 (base32
e815c094 351 "1gdv92593klimg22icf5j9by7xiq86jnwzkpz4abaa05ylkdf6hp"))))
85c37e29
RW
352 (build-system python-build-system)
353 (inputs
354 `(("python-numpy" ,python-numpy)))
355 (native-inputs
356 `(("python-setuptools" ,python2-setuptools)))
357 (home-page "http://biopython.org/")
358 (synopsis "Tools for biological computation in Python")
359 (description
360 "Biopython is a set of tools for biological computation including parsers
361for bioinformatics files into Python data structures; interfaces to common
362bioinformatics programs; a standard sequence class and tools for performing
363common operations on them; code to perform data classification; code for
364dealing with alignments; code making it easy to split up parallelizable tasks
365into separate processes; and more.")
366 (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
367
368(define-public python2-biopython
369 (package (inherit (package-with-python2 python-biopython))
370 (inputs
371 `(("python2-numpy" ,python2-numpy)))))
372
82c370de
RW
373(define-public blast+
374 (package
375 (name "blast+")
376 (version "2.2.31")
377 (source (origin
378 (method url-fetch)
379 (uri (string-append
380 "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
381 version "/ncbi-blast-" version "+-src.tar.gz"))
382 (sha256
383 (base32
384 "19gq6as4k1jrgsd26158ads6h7v4jca3h4r5dzg1y0m6ya50x5ph"))
385 (modules '((guix build utils)))
386 (snippet
387 '(begin
388 ;; Remove bundled bzip2 and zlib
389 (delete-file-recursively "c++/src/util/compress/bzip2")
390 (delete-file-recursively "c++/src/util/compress/zlib")
391 (substitute* "c++/src/util/compress/Makefile.in"
392 (("bzip2 zlib api") "api"))
393 ;; Remove useless msbuild directory
394 (delete-file-recursively
395 "c++/src/build-system/project_tree_builder/msbuild")
396 #t))))
397 (build-system gnu-build-system)
398 (arguments
399 `(;; There are three(!) tests for this massive library, and all fail with
400 ;; "unparsable timing stats".
401 ;; ERR [127] -- [util/regexp] test_pcre.sh (unparsable timing stats)
402 ;; ERR [127] -- [serial/datatool] datatool.sh (unparsable timing stats)
403 ;; ERR [127] -- [serial/datatool] datatool_xml.sh (unparsable timing stats)
404 #:tests? #f
405 #:out-of-source? #t
406 #:parallel-build? #f ; not supported
407 #:phases
408 (modify-phases %standard-phases
409 (add-before
410 'configure 'set-HOME
411 ;; $HOME needs to be set at some point during the configure phase
412 (lambda _ (setenv "HOME" "/tmp") #t))
413 (add-after
414 'unpack 'enter-dir
415 (lambda _ (chdir "c++") #t))
416 (add-after
417 'enter-dir 'fix-build-system
418 (lambda _
419 (define (which* cmd)
420 (cond ((string=? cmd "date")
421 ;; make call to "date" deterministic
422 "date -d @0")
423 ((which cmd)
424 => identity)
425 (else
426 (format (current-error-port)
427 "WARNING: Unable to find absolute path for ~s~%"
428 cmd)
429 #f)))
430
431 ;; Rewrite hardcoded paths to various tools
432 (substitute* (append '("src/build-system/configure.ac"
433 "src/build-system/configure"
434 "scripts/common/impl/if_diff.sh"
435 "scripts/common/impl/run_with_lock.sh"
436 "src/build-system/Makefile.configurables.real"
437 "src/build-system/Makefile.in.top"
438 "src/build-system/Makefile.meta.gmake=no"
439 "src/build-system/Makefile.meta.in"
440 "src/build-system/Makefile.meta_l"
441 "src/build-system/Makefile.meta_p"
442 "src/build-system/Makefile.meta_r"
443 "src/build-system/Makefile.mk.in"
444 "src/build-system/Makefile.requirements"
445 "src/build-system/Makefile.rules_with_autodep.in")
446 (find-files "scripts/common/check" "\\.sh$"))
447 (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
448 (or (which* cmd) all)))
449
450 (substitute* (find-files "src/build-system" "^config.*")
451 (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
452 (("^PATH=.*") ""))
453
454 ;; rewrite "/var/tmp" in check script
455 (substitute* "scripts/common/check/check_make_unix.sh"
456 (("/var/tmp") "/tmp"))
457
458 ;; do not reset PATH
459 (substitute* (find-files "scripts/common/impl/" "\\.sh$")
460 (("^ *PATH=.*") "")
461 (("action=/bin/") "action=")
462 (("export PATH") ":"))
463 #t))
464 (replace
465 'configure
466 (lambda* (#:key inputs outputs #:allow-other-keys)
467 (let ((out (assoc-ref outputs "out"))
468 (lib (string-append (assoc-ref outputs "lib") "/lib"))
469 (include (string-append (assoc-ref outputs "include")
470 "/include/ncbi-tools++")))
471 ;; The 'configure' script doesn't recognize things like
472 ;; '--enable-fast-install'.
473 (zero? (system* "./configure.orig"
474 (string-append "--with-build-root=" (getcwd) "/build")
475 (string-append "--prefix=" out)
476 (string-append "--libdir=" lib)
477 (string-append "--includedir=" include)
478 (string-append "--with-bz2="
479 (assoc-ref inputs "bzip2"))
480 (string-append "--with-z="
481 (assoc-ref inputs "zlib"))
482 ;; Each library is built twice by default, once
483 ;; with "-static" in its name, and again
484 ;; without.
485 "--without-static"
486 "--with-dll"))))))))
487 (outputs '("out" ; 19 MB
488 "lib" ; 203 MB
489 "include")) ; 32 MB
490 (inputs
491 `(("bzip2" ,bzip2)
492 ("zlib" ,zlib)))
493 (native-inputs
494 `(("cpio" ,cpio)))
495 (home-page "http://blast.ncbi.nlm.nih.gov")
496 (synopsis "Basic local alignment search tool")
497 (description
498 "BLAST is a popular method of performing a DNA or protein sequence
499similarity search, using heuristics to produce results quickly. It also
500calculates an “expect value” that estimates how many matches would have
501occurred at a given score by chance, which can aid a user in judging how much
502confidence to have in an alignment.")
503 ;; Most of the sources are in the public domain, with the following
504 ;; exceptions:
505 ;; * Expat:
506 ;; * ./c++/include/util/bitset/
507 ;; * ./c++/src/html/ncbi_menu*.js
508 ;; * Boost license:
509 ;; * ./c++/include/util/impl/floating_point_comparison.hpp
510 ;; * LGPL 2+:
511 ;; * ./c++/include/dbapi/driver/odbc/unix_odbc/
512 ;; * ASL 2.0:
513 ;; * ./c++/src/corelib/teamcity_*
514 (license (list license:public-domain
515 license:expat
516 license:boost1.0
517 license:lgpl2.0+
518 license:asl2.0))))
519
6c2b26e2
RW
520(define-public bless
521 (package
522 (name "bless")
523 (version "1p02")
524 (source (origin
525 (method url-fetch)
526 (uri (string-append "mirror://sourceforge/bless-ec/bless.v"
527 version ".tgz"))
528 (sha256
529 (base32
4d75e03a
RW
530 "0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
531 (modules '((guix build utils)))
6c2b26e2
RW
532 (snippet
533 `(begin
534 ;; Remove bundled boost, pigz, zlib, and .git directory
535 ;; FIXME: also remove bundled sources for google-sparsehash,
536 ;; murmurhash3, kmc once packaged.
537 (delete-file-recursively "boost")
538 (delete-file-recursively "pigz")
539 (delete-file-recursively "zlib")
540 (delete-file-recursively ".git")
541 #t))))
542 (build-system gnu-build-system)
543 (arguments
544 '(#:tests? #f ;no "check" target
545 #:make-flags
546 (list (string-append "ZLIB="
547 (assoc-ref %build-inputs "zlib")
548 "/lib/libz.a")
549 (string-append "LDFLAGS="
550 (string-join '("-lboost_filesystem"
551 "-lboost_system"
552 "-lboost_iostreams"
553 "-lz"
554 "-fopenmp"
555 "-std=c++11"))))
556 #:phases
557 (modify-phases %standard-phases
558 (add-after 'unpack 'do-not-build-bundled-pigz
559 (lambda* (#:key inputs outputs #:allow-other-keys)
560 (substitute* "Makefile"
561 (("cd pigz/pigz-2.3.3; make") ""))
562 #t))
563 (add-after 'unpack 'patch-paths-to-executables
564 (lambda* (#:key inputs outputs #:allow-other-keys)
565 (substitute* "parse_args.cpp"
566 (("kmc_binary = .*")
567 (string-append "kmc_binary = \""
568 (assoc-ref outputs "out")
569 "/bin/kmc\";"))
570 (("pigz_binary = .*")
571 (string-append "pigz_binary = \""
572 (assoc-ref inputs "pigz")
573 "/bin/pigz\";")))
574 #t))
575 (replace 'install
576 (lambda* (#:key outputs #:allow-other-keys)
577 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
578 (for-each (lambda (file)
579 (install-file file bin))
580 '("bless" "kmc/bin/kmc"))
581 #t)))
582 (delete 'configure))))
583 (native-inputs
584 `(("perl" ,perl)))
585 (inputs
586 `(("openmpi" ,openmpi)
587 ("boost" ,boost)
588 ("pigz" ,pigz)
589 ("zlib" ,zlib)))
9641a899 590 (supported-systems '("x86_64-linux"))
4d75e03a 591 (home-page "http://sourceforge.net/p/bless-ec/wiki/Home/")
6c2b26e2
RW
592 (synopsis "Bloom-filter-based error correction tool for NGS reads")
593 (description
594 "@dfn{Bloom-filter-based error correction solution for high-throughput
595sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
596correction tool for genomic reads produced by @dfn{Next-generation
597sequencing} (NGS). BLESS produces accurate correction results with much less
598memory compared with previous solutions and is also able to tolerate a higher
599false-positive rate. BLESS can extend reads like DNA assemblers to correct
600errors at the end of reads.")
601 (license license:gpl3+)))
602
2c7ee167
RW
603(define-public bowtie
604 (package
605 (name "bowtie")
0047d26a 606 (version "2.2.6")
2c7ee167
RW
607 (source (origin
608 (method url-fetch)
609 (uri (string-append "https://github.com/BenLangmead/bowtie2/archive/v"
610 version ".tar.gz"))
f586c877 611 (file-name (string-append name "-" version ".tar.gz"))
2c7ee167
RW
612 (sha256
613 (base32
0047d26a 614 "1ssfvymxfrap6f9pf86s9bvsbqdgka4abr2r7j3mgr4w1l289m86"))
2c7ee167
RW
615 (modules '((guix build utils)))
616 (snippet
617 '(substitute* "Makefile"
2c7ee167
RW
618 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
619 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
0047d26a 620 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))))
2c7ee167
RW
621 (build-system gnu-build-system)
622 (inputs `(("perl" ,perl)
623 ("perl-clone" ,perl-clone)
624 ("perl-test-deep" ,perl-test-deep)
625 ("perl-test-simple" ,perl-test-simple)
0047d26a
RW
626 ("python" ,python-2)
627 ("tbb" ,tbb)))
2c7ee167 628 (arguments
0047d26a
RW
629 '(#:make-flags
630 (list "allall"
631 "WITH_TBB=1"
632 (string-append "prefix=" (assoc-ref %outputs "out")))
2c7ee167
RW
633 #:phases
634 (alist-delete
635 'configure
636 (alist-replace
0047d26a 637 'check
2c7ee167 638 (lambda* (#:key outputs #:allow-other-keys)
0047d26a
RW
639 (system* "perl"
640 "scripts/test/simple_tests.pl"
641 "--bowtie2=./bowtie2"
642 "--bowtie2-build=./bowtie2-build"))
643 %standard-phases))))
2c7ee167
RW
644 (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
645 (synopsis "Fast and sensitive nucleotide sequence read aligner")
646 (description
647 "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
648reads to long reference sequences. It is particularly good at aligning reads
649of about 50 up to 100s or 1,000s of characters, and particularly good at
650aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
651genome with an FM Index to keep its memory footprint small: for the human
652genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
653gapped, local, and paired-end alignment modes.")
241e1221 654 (supported-systems '("x86_64-linux"))
2c7ee167
RW
655 (license license:gpl3+)))
656
94ce537e
RW
657(define-public tophat
658 (package
659 (name "tophat")
660 (version "2.1.0")
661 (source (origin
662 (method url-fetch)
663 (uri (string-append
664 "http://ccb.jhu.edu/software/tophat/downloads/tophat-"
665 version ".tar.gz"))
666 (sha256
667 (base32
668 "168zlzykq622zbgkh90a90f1bdgsxkscq2zxzbj8brq80hbjpyp7"))
669 (patches (list (search-patch "tophat-build-with-later-seqan.patch")))
670 (modules '((guix build utils)))
671 (snippet
672 '(begin
673 ;; Remove bundled SeqAn and samtools
674 (delete-file-recursively "src/SeqAn-1.3")
675 (delete-file-recursively "src/samtools-0.1.18")
676 #t))))
677 (build-system gnu-build-system)
678 (arguments
679 '(#:parallel-build? #f ; not supported
680 #:phases
681 (modify-phases %standard-phases
682 (add-after 'unpack 'use-system-samtools
683 (lambda* (#:key inputs #:allow-other-keys)
684 (substitute* "src/Makefile.in"
685 (("(noinst_LIBRARIES = )\\$\\(SAMLIB\\)" _ prefix) prefix)
686 (("\\$\\(SAMPROG\\): \\$\\(SAMLIB\\)") "")
687 (("SAMPROG = samtools_0\\.1\\.18") "")
688 (("\\$\\(samtools_0_1_18_SOURCES\\)") "")
689 (("am__EXEEXT_1 = samtools_0\\.1\\.18\\$\\(EXEEXT\\)") ""))
690 (substitute* '("src/common.cpp"
691 "src/tophat.py")
692 (("samtools_0.1.18") (which "samtools")))
693 (substitute* '("src/common.h"
694 "src/bam2fastx.cpp")
695 (("#include \"bam.h\"") "#include <samtools/bam.h>")
696 (("#include \"sam.h\"") "#include <samtools/sam.h>"))
697 (substitute* '("src/bwt_map.h"
698 "src/map2gtf.h"
699 "src/align_status.h")
700 (("#include <bam.h>") "#include <samtools/bam.h>")
701 (("#include <sam.h>") "#include <samtools/sam.h>"))
702 #t)))))
703 (inputs
704 `(("boost" ,boost)
705 ("bowtie" ,bowtie)
706 ("samtools" ,samtools-0.1)
707 ("ncurses" ,ncurses)
708 ("python" ,python-2)
709 ("perl" ,perl)
710 ("zlib" ,zlib)
711 ("seqan" ,seqan)))
712 (home-page "http://ccb.jhu.edu/software/tophat/index.shtml")
713 (synopsis "Spliced read mapper for RNA-Seq data")
714 (description
715 "TopHat is a fast splice junction mapper for nucleotide sequence
716reads produced by the RNA-Seq method. It aligns RNA-Seq reads to
717mammalian-sized genomes using the ultra high-throughput short read
718aligner Bowtie, and then analyzes the mapping results to identify
719splice junctions between exons.")
720 ;; TopHat is released under the Boost Software License, Version 1.0
721 ;; See https://github.com/infphilo/tophat/issues/11#issuecomment-121589893
722 (license license:boost1.0)))
723
9a8336d8
RW
724(define-public bwa
725 (package
726 (name "bwa")
727 (version "0.7.12")
728 (source (origin
729 (method url-fetch)
730 (uri (string-append "mirror://sourceforge/bio-bwa/bwa-"
731 version ".tar.bz2"))
732 (sha256
733 (base32
734 "1330dpqncv0px3pbhjzz1gwgg39kkcv2r9qp2xs0sixf8z8wl7bh"))))
735 (build-system gnu-build-system)
736 (arguments
737 '(#:tests? #f ;no "check" target
738 #:phases
739 (alist-replace
740 'install
741 (lambda* (#:key outputs #:allow-other-keys)
742 (let ((bin (string-append
743 (assoc-ref outputs "out") "/bin"))
744 (doc (string-append
745 (assoc-ref outputs "out") "/share/doc/bwa"))
746 (man (string-append
747 (assoc-ref outputs "out") "/share/man/man1")))
748 (mkdir-p bin)
749 (mkdir-p doc)
750 (mkdir-p man)
96c46210
LC
751 (install-file "bwa" bin)
752 (install-file "README.md" doc)
753 (install-file "bwa.1" man)))
9a8336d8
RW
754 ;; no "configure" script
755 (alist-delete 'configure %standard-phases))))
756 (inputs `(("zlib" ,zlib)))
db94f8c7
RW
757 ;; Non-portable SSE instructions are used so building fails on platforms
758 ;; other than x86_64.
759 (supported-systems '("x86_64-linux"))
9a8336d8
RW
760 (home-page "http://bio-bwa.sourceforge.net/")
761 (synopsis "Burrows-Wheeler sequence aligner")
762 (description
763 "BWA is a software package for mapping low-divergent sequences against a
764large reference genome, such as the human genome. It consists of three
765algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
766designed for Illumina sequence reads up to 100bp, while the rest two for
767longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
768features such as long-read support and split alignment, but BWA-MEM, which is
769the latest, is generally recommended for high-quality queries as it is faster
770and more accurate. BWA-MEM also has better performance than BWA-backtrack for
77170-100bp Illumina reads.")
772 (license license:gpl3+)))
773
ad641d53
RW
774(define-public python2-bx-python
775 (package
776 (name "python2-bx-python")
777 (version "0.7.2")
778 (source (origin
779 (method url-fetch)
780 (uri (string-append
781 "https://pypi.python.org/packages/source/b/bx-python/bx-python-"
782 version ".tar.gz"))
783 (sha256
784 (base32
785 "0ld49idhc5zjdvbhvjq1a2qmpjj7h5v58rqr25dzmfq7g34b50xh"))
786 (modules '((guix build utils)))
787 (snippet
788 '(substitute* "setup.py"
789 ;; remove dependency on outdated "distribute" module
790 (("^from distribute_setup import use_setuptools") "")
791 (("^use_setuptools\\(\\)") "")))))
792 (build-system python-build-system)
793 (arguments
794 `(#:tests? #f ;tests fail because test data are not included
795 #:python ,python-2))
796 (inputs
797 `(("python-numpy" ,python2-numpy)
798 ("zlib" ,zlib)))
799 (native-inputs
800 `(("python-nose" ,python2-nose)
801 ("python-setuptools" ,python2-setuptools)))
802 (home-page "http://bitbucket.org/james_taylor/bx-python/")
803 (synopsis "Tools for manipulating biological data")
804 (description
805 "bx-python provides tools for manipulating biological data, particularly
806multiple sequence alignments.")
807 (license license:expat)))
808
810cff85
RW
809(define-public clipper
810 (package
811 (name "clipper")
812 (version "0.3.0")
813 (source (origin
814 (method url-fetch)
815 (uri (string-append
816 "https://github.com/YeoLab/clipper/archive/"
817 version ".tar.gz"))
9ab5ea44 818 (file-name (string-append name "-" version ".tar.gz"))
810cff85
RW
819 (sha256
820 (base32
821 "1q7jpimsqln7ic44i8v2rx2haj5wvik8hc1s2syd31zcn0xk1iyq"))
822 (modules '((guix build utils)))
823 (snippet
824 ;; remove unnecessary setup dependency
825 '(substitute* "setup.py"
826 (("setup_requires = .*") "")))))
827 (build-system python-build-system)
828 (arguments `(#:python ,python-2)) ; only Python 2 is supported
829 (inputs
830 `(("htseq" ,htseq)
831 ("python-pybedtools" ,python2-pybedtools)
832 ("python-cython" ,python2-cython)
833 ("python-scikit-learn" ,python2-scikit-learn)
834 ("python-matplotlib" ,python2-matplotlib)
835 ("python-pysam" ,python2-pysam)
836 ("python-numpy" ,python2-numpy)
837 ("python-scipy" ,python2-scipy)))
838 (native-inputs
839 `(("python-mock" ,python2-mock) ; for tests
840 ("python-pytz" ,python2-pytz) ; for tests
841 ("python-setuptools" ,python2-setuptools)))
842 (home-page "https://github.com/YeoLab/clipper")
843 (synopsis "CLIP peak enrichment recognition")
844 (description
845 "CLIPper is a tool to define peaks in CLIP-seq datasets.")
846 (license license:gpl2)))
847
36742f43
RW
848(define-public couger
849 (package
850 (name "couger")
851 (version "1.8.2")
852 (source (origin
853 (method url-fetch)
854 (uri (string-append
855 "http://couger.oit.duke.edu/static/assets/COUGER"
856 version ".zip"))
857 (sha256
858 (base32
859 "04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq"))))
860 (build-system gnu-build-system)
861 (arguments
862 `(#:tests? #f
863 #:phases
864 (modify-phases %standard-phases
865 (delete 'configure)
866 (delete 'build)
867 (replace
868 'install
869 (lambda* (#:key outputs #:allow-other-keys)
870 (let ((out (assoc-ref outputs "out")))
871 (copy-recursively "src" (string-append out "/src"))
872 (mkdir (string-append out "/bin"))
873 ;; Add "src" directory to module lookup path.
874 (substitute* "couger"
875 (("from argparse")
876 (string-append "import sys\nsys.path.append(\""
877 out "\")\nfrom argparse")))
878 (copy-file "couger" (string-append out "/bin/couger")))
879 #t))
880 (add-after
881 'install 'wrap-program
882 (lambda* (#:key inputs outputs #:allow-other-keys)
883 ;; Make sure 'couger' runs with the correct PYTHONPATH.
884 (let* ((out (assoc-ref outputs "out"))
885 (path (getenv "PYTHONPATH")))
886 (wrap-program (string-append out "/bin/couger")
887 `("PYTHONPATH" ":" prefix (,path))))
888 #t)))))
889 (inputs
890 `(("python" ,python-2)
891 ("python2-pillow" ,python2-pillow)
892 ("python2-numpy" ,python2-numpy)
893 ("python2-scipy" ,python2-scipy)
894 ("python2-matplotlib" ,python2-matplotlib)))
895 (propagated-inputs
896 `(("r" ,r)
897 ("libsvm" ,libsvm)
898 ("randomjungle" ,randomjungle)))
899 (native-inputs
900 `(("unzip" ,unzip)))
901 (home-page "http://couger.oit.duke.edu")
902 (synopsis "Identify co-factors in sets of genomic regions")
903 (description
904 "COUGER can be applied to any two sets of genomic regions bound by
905paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify
906putative co-factors that provide specificity to each TF. The framework
907determines the genomic targets uniquely-bound by each TF, and identifies a
908small set of co-factors that best explain the in vivo binding differences
909between the two TFs.
910
911COUGER uses classification algorithms (support vector machines and random
912forests) with features that reflect the DNA binding specificities of putative
913co-factors. The features are generated either from high-throughput TF-DNA
914binding data (from protein binding microarray experiments), or from large
915collections of DNA motifs.")
916 (license license:gpl3+)))
917
bfe3c685
RW
918(define-public clustal-omega
919 (package
920 (name "clustal-omega")
921 (version "1.2.1")
922 (source (origin
923 (method url-fetch)
924 (uri (string-append
925 "http://www.clustal.org/omega/clustal-omega-"
926 version ".tar.gz"))
927 (sha256
928 (base32
929 "02ibkx0m0iwz8nscg998bh41gg251y56cgh86bvyrii5m8kjgwqf"))))
930 (build-system gnu-build-system)
931 (inputs
932 `(("argtable" ,argtable)))
933 (home-page "http://www.clustal.org/omega/")
934 (synopsis "Multiple sequence aligner for protein and DNA/RNA")
935 (description
936 "Clustal-Omega is a general purpose multiple sequence alignment (MSA)
937program for protein and DNA/RNA. It produces high quality MSAs and is capable
938of handling data-sets of hundreds of thousands of sequences in reasonable
939time.")
940 (license license:gpl2+)))
941
191c7101
RW
942(define-public crossmap
943 (package
944 (name "crossmap")
61d5fd03 945 (version "0.2.1")
191c7101
RW
946 (source (origin
947 (method url-fetch)
948 (uri (string-append "mirror://sourceforge/crossmap/CrossMap-"
949 version ".tar.gz"))
950 (sha256
951 (base32
61d5fd03
RW
952 "07y179f63d7qnzdvkqcziwk9bs3k4zhp81q392fp1hwszjdvy22f"))
953 ;; This patch has been sent upstream already and is available
954 ;; for download from Sourceforge, but it has not been merged.
191c7101
RW
955 (patches (list
956 (search-patch "crossmap-allow-system-pysam.patch")))
957 (modules '((guix build utils)))
958 ;; remove bundled copy of pysam
959 (snippet
960 '(delete-file-recursively "lib/pysam"))))
961 (build-system python-build-system)
962 (arguments
963 `(#:python ,python-2
964 #:phases
965 (alist-cons-after
966 'unpack 'set-env
967 (lambda _ (setenv "CROSSMAP_USE_SYSTEM_PYSAM" "1"))
968 %standard-phases)))
969 (inputs
970 `(("python-numpy" ,python2-numpy)
971 ("python-pysam" ,python2-pysam)
972 ("zlib" ,zlib)))
973 (native-inputs
974 `(("python-cython" ,python2-cython)
975 ("python-nose" ,python2-nose)
976 ("python-setuptools" ,python2-setuptools)))
977 (home-page "http://crossmap.sourceforge.net/")
978 (synopsis "Convert genome coordinates between assemblies")
979 (description
980 "CrossMap is a program for conversion of genome coordinates or annotation
981files between different genome assemblies. It supports most commonly used
982file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
983 (license license:gpl2+)))
984
3a40a92c
RW
985(define-public cufflinks
986 (package
987 (name "cufflinks")
988 (version "2.2.1")
989 (source (origin
990 (method url-fetch)
991 (uri (string-append "http://cole-trapnell-lab.github.io/"
992 "cufflinks/assets/downloads/cufflinks-"
993 version ".tar.gz"))
994 (sha256
995 (base32
996 "1bnm10p8m7zq4qiipjhjqb24csiqdm1pwc8c795z253r2xk6ncg8"))))
997 (build-system gnu-build-system)
998 (arguments
999 `(#:make-flags
1000 (list
1001 ;; The includes for "eigen" are located in a subdirectory.
1002 (string-append "EIGEN_CPPFLAGS="
1003 "-I" (assoc-ref %build-inputs "eigen")
1004 "/include/eigen3/")
1005 ;; Cufflinks must be linked with various boost libraries.
1006 (string-append "LDFLAGS="
1007 (string-join '("-lboost_system"
1008 "-lboost_serialization"
1009 "-lboost_thread"))))
1010 #:phases
1011 (modify-phases %standard-phases
1012 (add-after 'unpack 'fix-search-for-bam
1013 (lambda _
1014 (substitute* '("ax_bam.m4"
1015 "configure"
1016 "src/hits.h")
1017 (("<bam/sam\\.h>") "<samtools/sam.h>")
1018 (("<bam/bam\\.h>") "<samtools/bam.h>")
1019 (("<bam/version\\.hpp>") "<samtools/version.h>"))
1020 #t)))
1021 #:configure-flags
1022 (list (string-append "--with-bam="
1023 (assoc-ref %build-inputs "samtools")))))
1024 (inputs
1025 `(("eigen" ,eigen)
1026 ("samtools" ,samtools-0.1)
1027 ("htslib" ,htslib)
1028 ("boost" ,boost)
1029 ("python" ,python-2)
1030 ("zlib" ,zlib)))
1031 (home-page "http://cole-trapnell-lab.github.io/cufflinks/")
1032 (synopsis "Transcriptome assembly and RNA-Seq expression analysis")
1033 (description
1034 "Cufflinks assembles RNA transcripts, estimates their abundances,
1035and tests for differential expression and regulation in RNA-Seq
1036samples. It accepts aligned RNA-Seq reads and assembles the
1037alignments into a parsimonious set of transcripts. Cufflinks then
1038estimates the relative abundances of these transcripts based on how
1039many reads support each one, taking into account biases in library
1040preparation protocols.")
1041 (license license:boost1.0)))
1042
8e913213
RW
1043(define-public cutadapt
1044 (package
1045 (name "cutadapt")
1046 (version "1.8")
1047 (source (origin
1048 (method url-fetch)
1049 (uri (string-append
1050 "https://github.com/marcelm/cutadapt/archive/v"
1051 version ".tar.gz"))
1052 (file-name (string-append name "-" version ".tar.gz"))
1053 (sha256
1054 (base32
1055 "161bp87y6gd6r5bmvjpn2b1k942i3fizfpa139f0jn6jv1wcp5h5"))))
1056 (build-system python-build-system)
1057 (arguments
1058 ;; tests must be run after install
1059 `(#:phases (alist-cons-after
1060 'install 'check
1061 (lambda* (#:key inputs outputs #:allow-other-keys)
1062 (setenv "PYTHONPATH"
1063 (string-append
1064 (getenv "PYTHONPATH")
1065 ":" (assoc-ref outputs "out")
1066 "/lib/python"
1067 (string-take (string-take-right
1068 (assoc-ref inputs "python") 5) 3)
1069 "/site-packages"))
1070 (zero? (system* "nosetests" "-P" "tests")))
1071 (alist-delete 'check %standard-phases))))
1072 (native-inputs
1073 `(("python-cython" ,python-cython)
1074 ("python-nose" ,python-nose)
1075 ("python-setuptools" ,python-setuptools)))
1076 (home-page "https://code.google.com/p/cutadapt/")
1077 (synopsis "Remove adapter sequences from nucleotide sequencing reads")
1078 (description
1079 "Cutadapt finds and removes adapter sequences, primers, poly-A tails and
1080other types of unwanted sequence from high-throughput sequencing reads.")
1081 (license license:expat)))
1082
1921b1de
RW
1083(define-public deeptools
1084 (package
1085 (name "deeptools")
1086 (version "1.5.11")
1087 (source (origin
1088 (method url-fetch)
1089 (uri (string-append
1090 "https://github.com/fidelram/deepTools/archive/"
1091 version ".tar.gz"))
1092 (file-name (string-append name "-" version ".tar.gz"))
1093 (sha256
1094 (base32
1095 "1kaagygcbvjs9sxd9cqmskd02wcfp9imvb735r087w7hwqpvz6fs"))))
1096 (build-system python-build-system)
1097 (arguments
1098 `(#:python ,python-2))
1099 (propagated-inputs
1100 `(("python-scipy" ,python2-scipy)
1101 ("python-numpy" ,python2-numpy)
1102 ("python-matplotlib" ,python2-matplotlib)
1103 ("python-bx-python" ,python2-bx-python)
1104 ("python-pysam" ,python2-pysam)))
1105 (native-inputs
1106 `(("python-mock" ,python2-mock) ;for tests
1107 ("python-pytz" ,python2-pytz) ;for tests
1108 ("python-setuptools" ,python2-setuptools)))
1109 (home-page "https://github.com/fidelram/deepTools")
1110 (synopsis "Tools for normalizing and visualizing deep-sequencing data")
1111 (description
1112 "DeepTools addresses the challenge of handling the large amounts of data
1113that are now routinely generated from DNA sequencing centers. To do so,
1114deepTools contains useful modules to process the mapped reads data to create
1115coverage files in standard bedGraph and bigWig file formats. By doing so,
1116deepTools allows the creation of normalized coverage files or the comparison
1117between two files (for example, treatment and control). Finally, using such
1118normalized and standardized files, multiple visualizations can be created to
1119identify enrichments with functional annotations of the genome.")
1120 (license license:gpl3+)))
1121
684bf7c7
BW
1122(define-public diamond
1123 (package
1124 (name "diamond")
1125 (version "0.7.9")
1126 (source (origin
1127 (method url-fetch)
1128 (uri (string-append
1129 "https://github.com/bbuchfink/diamond/archive/v"
1130 version ".tar.gz"))
1131 (file-name (string-append name "-" version ".tar.gz"))
1132 (sha256
1133 (base32
1134 "0hfkcfv9f76h5brbyw9fyvmc0l9cmbsxrcdqk0fa9xv82zj47p15"))
1135 (snippet '(begin
1136 (delete-file "bin/diamond")
1137 #t))))
1138 (build-system gnu-build-system)
1139 (arguments
1140 '(#:tests? #f ;no "check" target
1141 #:phases
1142 (modify-phases %standard-phases
1143 (add-after 'unpack 'enter-source-dir
1144 (lambda _
1145 (chdir "src")
1146 #t))
1147 (delete 'configure)
1148 (replace 'install
1149 (lambda* (#:key outputs #:allow-other-keys)
1150 (let ((bin (string-append (assoc-ref outputs "out")
1151 "/bin")))
1152 (mkdir-p bin)
1153 (copy-file "../bin/diamond"
1154 (string-append bin "/diamond"))
1155 #t))))))
1156 (native-inputs
1157 `(("bc" ,bc)))
1158 (inputs
1159 `(("boost" ,boost)
1160 ("zlib" ,zlib)))
1161 (home-page "https://github.com/bbuchfink/diamond")
1162 (synopsis "Accelerated BLAST compatible local sequence aligner")
1163 (description
1164 "DIAMOND is a BLAST-compatible local aligner for mapping protein and
1165translated DNA query sequences against a protein reference database (BLASTP
1166and BLASTX alignment mode). The speedup over BLAST is up to 20,000 on short
1167reads at a typical sensitivity of 90-99% relative to BLAST depending on the
1168data and settings.")
d9c44e9c
BW
1169 ;; diamond fails to build on other platforms
1170 ;; https://github.com/bbuchfink/diamond/issues/18
1171 (supported-systems '("x86_64-linux"))
684bf7c7
BW
1172 (license (license:non-copyleft "file://src/COPYING"
1173 "See src/COPYING in the distribution."))))
1174
365c8153
RW
1175(define-public edirect
1176 (package
1177 (name "edirect")
5dfd2766 1178 (version "3.50")
365c8153
RW
1179 (source (origin
1180 (method url-fetch)
1181 ;; Note: older versions are not retained.
5dfd2766 1182 (uri "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/edirect.tar.gz")
365c8153
RW
1183 (sha256
1184 (base32
5dfd2766 1185 "1cr3gzcs3flmgnnbj5iz93vh9w0fca1ilzi2q82cl63ln3mwvpz0"))))
365c8153
RW
1186 (build-system perl-build-system)
1187 (arguments
1188 `(#:tests? #f ;no "check" target
1189 #:phases
1190 (modify-phases %standard-phases
1191 (delete 'configure)
1192 (delete 'build)
1193 (replace 'install
1194 (lambda* (#:key outputs #:allow-other-keys)
1195 (let ((target (string-append (assoc-ref outputs "out")
1196 "/bin")))
1197 (mkdir-p target)
1198 (copy-file "edirect.pl"
1199 (string-append target "/edirect.pl"))
1200 #t)))
1201 (add-after
1202 'install 'wrap-program
1203 (lambda* (#:key inputs outputs #:allow-other-keys)
1204 ;; Make sure 'edirect.pl' finds all perl inputs at runtime.
1205 (let* ((out (assoc-ref outputs "out"))
1206 (path (getenv "PERL5LIB")))
1207 (wrap-program (string-append out "/bin/edirect.pl")
1208 `("PERL5LIB" ":" prefix (,path)))))))))
1209 (inputs
1210 `(("perl-html-parser" ,perl-html-parser)
1211 ("perl-encode-locale" ,perl-encode-locale)
1212 ("perl-file-listing" ,perl-file-listing)
1213 ("perl-html-tagset" ,perl-html-tagset)
1214 ("perl-html-tree" ,perl-html-tree)
1215 ("perl-http-cookies" ,perl-http-cookies)
1216 ("perl-http-date" ,perl-http-date)
1217 ("perl-http-message" ,perl-http-message)
1218 ("perl-http-negotiate" ,perl-http-negotiate)
1219 ("perl-lwp-mediatypes" ,perl-lwp-mediatypes)
1220 ("perl-lwp-protocol-https" ,perl-lwp-protocol-https)
1221 ("perl-net-http" ,perl-net-http)
1222 ("perl-uri" ,perl-uri)
1223 ("perl-www-robotrules" ,perl-www-robotrules)
1224 ("perl" ,perl)))
3d51ec91 1225 (home-page "http://www.ncbi.nlm.nih.gov/books/NBK179288/")
365c8153
RW
1226 (synopsis "Tools for accessing the NCBI's set of databases")
1227 (description
1228 "Entrez Direct (EDirect) is a method for accessing the National Center
1229for Biotechnology Information's (NCBI) set of interconnected
1230databases (publication, sequence, structure, gene, variation, expression,
1231etc.) from a terminal. Functions take search terms from command-line
1232arguments. Individual operations are combined to build multi-step queries.
1233Record retrieval and formatting normally complete the process.
1234
1235EDirect also provides an argument-driven function that simplifies the
1236extraction of data from document summaries or other results that are returned
1237in structured XML format. This can eliminate the need for writing custom
1238software to answer ad hoc questions.")
1239 (license license:public-domain)))
1240
e4e5a4d8
RW
1241(define-public express
1242 (package
1243 (name "express")
1244 (version "1.5.1")
1245 (source (origin
1246 (method url-fetch)
1247 (uri
1248 (string-append
1249 "http://bio.math.berkeley.edu/eXpress/downloads/express-"
1250 version "/express-" version "-src.tgz"))
1251 (sha256
1252 (base32
1253 "03rczxd0gjp2l1jxcmjfmf5j94j77zqyxa6x063zsc585nj40n0c"))))
1254 (build-system cmake-build-system)
1255 (arguments
1256 `(#:tests? #f ;no "check" target
1257 #:phases
1258 (alist-cons-after
1259 'unpack 'use-shared-boost-libs-and-set-bamtools-paths
1260 (lambda* (#:key inputs #:allow-other-keys)
1261 (substitute* "CMakeLists.txt"
1262 (("set\\(Boost_USE_STATIC_LIBS ON\\)")
1263 "set(Boost_USE_STATIC_LIBS OFF)")
1264 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
1265 (string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
1266 (substitute* "src/CMakeLists.txt"
1267 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
1268 (string-append (assoc-ref inputs "bamtools") "/lib/bamtools")))
1269 #t)
1270 %standard-phases)))
1271 (inputs
1272 `(("boost" ,boost)
1273 ("bamtools" ,bamtools)
1274 ("protobuf" ,protobuf)
1275 ("zlib" ,zlib)))
1276 (home-page "http://bio.math.berkeley.edu/eXpress")
1277 (synopsis "Streaming quantification for high-throughput genomic sequencing")
1278 (description
1279 "eXpress is a streaming tool for quantifying the abundances of a set of
1280target sequences from sampled subsequences. Example applications include
1281transcript-level RNA-Seq quantification, allele-specific/haplotype expression
1282analysis (from RNA-Seq), transcription factor binding quantification in
1283ChIP-Seq, and analysis of metagenomic data.")
1284 (license license:artistic2.0)))
1285
f3674b1c
BW
1286(define-public express-beta-diversity
1287 (package
1288 (name "express-beta-diversity")
1289 (version "1.0.7")
1290 (source (origin
1291 (method url-fetch)
1292 (uri
1293 (string-append
1294 "https://github.com/dparks1134/ExpressBetaDiversity/archive/v"
1295 version ".tar.gz"))
1296 (file-name (string-append name "-" version ".tar.gz"))
1297 (sha256
1298 (base32
1299 "1djvdlmqvjf6h0zq7w36y8cl5cli6rgj86x65znl48agnwmzxfxr"))))
1300 (build-system gnu-build-system)
1301 (arguments
1302 `(#:phases
1303 (modify-phases %standard-phases
1304 (delete 'configure)
1305 (add-before 'build 'enter-source (lambda _ (chdir "source") #t))
1306 (replace 'check
1307 (lambda _ (zero? (system* "../bin/ExpressBetaDiversity"
1308 "-u"))))
1309 (add-after 'check 'exit-source (lambda _ (chdir "..") #t))
1310 (replace 'install
1311 (lambda* (#:key outputs #:allow-other-keys)
1312 (let ((bin (string-append (assoc-ref outputs "out")
1313 "/bin")))
1314 (mkdir-p bin)
1315 (copy-file "scripts/convertToEBD.py"
1316 (string-append bin "/convertToEBD.py"))
1317 (copy-file "bin/ExpressBetaDiversity"
1318 (string-append bin "/ExpressBetaDiversity"))
1319 #t))))))
1320 (inputs
1321 `(("python" ,python-2)))
1322 (home-page "http://kiwi.cs.dal.ca/Software/ExpressBetaDiversity")
1323 (synopsis "Taxon- and phylogenetic-based beta diversity measures")
1324 (description
1325 "Express Beta Diversity (EBD) calculates ecological beta diversity
1326(dissimilarity) measures between biological communities. EBD implements a
1327variety of diversity measures including those that make use of phylogenetic
1328similarity of community members.")
1329 (license license:gpl3+)))
1330
12b04cbe
BW
1331(define-public fasttree
1332 (package
1333 (name "fasttree")
1334 (version "2.1.8")
1335 (source (origin
1336 (method url-fetch)
1337 (uri (string-append
1338 "http://www.microbesonline.org/fasttree/FastTree-"
1339 version ".c"))
1340 (sha256
1341 (base32
1342 "0dzqc9vr9iiiw21y159xfjl2z90vw0y7r4x6456pcaxiy5hd2wmi"))))
1343 (build-system gnu-build-system)
1344 (arguments
1345 `(#:tests? #f ; no "check" target
1346 #:phases
1347 (modify-phases %standard-phases
1348 (delete 'unpack)
1349 (delete 'configure)
1350 (replace 'build
1351 (lambda* (#:key source #:allow-other-keys)
1352 (and (zero? (system* "gcc"
1353 "-O3"
1354 "-finline-functions"
1355 "-funroll-loops"
1356 "-Wall"
1357 "-o"
1358 "FastTree"
1359 source
1360 "-lm"))
1361 (zero? (system* "gcc"
1362 "-DOPENMP"
1363 "-fopenmp"
1364 "-O3"
1365 "-finline-functions"
1366 "-funroll-loops"
1367 "-Wall"
1368 "-o"
1369 "FastTreeMP"
1370 source
1371 "-lm")))))
1372 (replace 'install
1373 (lambda* (#:key outputs #:allow-other-keys)
1374 (let ((bin (string-append (assoc-ref outputs "out")
1375 "/bin")))
1376 (mkdir-p bin)
1377 (copy-file "FastTree"
1378 (string-append bin "/FastTree"))
1379 (copy-file "FastTreeMP"
1380 (string-append bin "/FastTreeMP"))
1381 #t))))))
1382 (home-page "http://www.microbesonline.org/fasttree")
1383 (synopsis "Infers approximately-maximum-likelihood phylogenetic trees")
1384 (description
1385 "FastTree can handle alignments with up to a million of sequences in a
1386reasonable amount of time and memory. For large alignments, FastTree is
1387100-1,000 times faster than PhyML 3.0 or RAxML 7.")
1388 (license license:gpl2+)))
1389
2127cedb
RW
1390(define-public fastx-toolkit
1391 (package
1392 (name "fastx-toolkit")
1393 (version "0.0.14")
1394 (source (origin
1395 (method url-fetch)
1396 (uri
1397 (string-append
1398 "https://github.com/agordon/fastx_toolkit/releases/download/"
1399 version "/fastx_toolkit-" version ".tar.bz2"))
1400 (sha256
1401 (base32
1402 "01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy"))))
1403 (build-system gnu-build-system)
1404 (inputs
1405 `(("libgtextutils" ,libgtextutils)))
1406 (native-inputs
1407 `(("pkg-config" ,pkg-config)))
1408 (home-page "http://hannonlab.cshl.edu/fastx_toolkit/")
1409 (synopsis "Tools for FASTA/FASTQ file preprocessing")
1410 (description
1411 "The FASTX-Toolkit is a collection of command line tools for Short-Reads
1412FASTA/FASTQ files preprocessing.
1413
1414Next-Generation sequencing machines usually produce FASTA or FASTQ files,
1415containing multiple short-reads sequences. The main processing of such
1416FASTA/FASTQ files is mapping the sequences to reference genomes. However, it
1417is sometimes more productive to preprocess the files before mapping the
1418sequences to the genome---manipulating the sequences to produce better mapping
1419results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
1420 (license license:agpl3+)))
1421
d7678942
RW
1422(define-public flexbar
1423 (package
1424 (name "flexbar")
1425 (version "2.5")
1426 (source (origin
1427 (method url-fetch)
1428 (uri
1429 (string-append "mirror://sourceforge/flexbar/"
1430 version "/flexbar_v" version "_src.tgz"))
1431 (sha256
1432 (base32
1433 "13jaykc3y1x8y5nn9j8ljnb79s5y51kyxz46hdmvvjj6qhyympmf"))))
1434 (build-system cmake-build-system)
1435 (arguments
4ca009c0 1436 `(#:configure-flags (list
d7678942
RW
1437 (string-append "-DFLEXBAR_BINARY_DIR="
1438 (assoc-ref %outputs "out")
1439 "/bin/"))
1440 #:phases
4ca009c0
RW
1441 (alist-replace
1442 'check
1443 (lambda* (#:key outputs #:allow-other-keys)
1444 (setenv "PATH" (string-append
1445 (assoc-ref outputs "out") "/bin:"
1446 (getenv "PATH")))
1447 (chdir "../flexbar_v2.5_src/test")
1448 (zero? (system* "bash" "flexbar_validate.sh")))
1449 (alist-delete 'install %standard-phases))))
d7678942
RW
1450 (inputs
1451 `(("tbb" ,tbb)
1452 ("zlib" ,zlib)))
1453 (native-inputs
1454 `(("pkg-config" ,pkg-config)
1455 ("seqan" ,seqan)))
1456 (home-page "http://flexbar.sourceforge.net")
1457 (synopsis "Barcode and adapter removal tool for sequencing platforms")
1458 (description
1459 "Flexbar preprocesses high-throughput nucleotide sequencing data
1460efficiently. It demultiplexes barcoded runs and removes adapter sequences.
1461Moreover, trimming and filtering features are provided. Flexbar increases
1462read mapping rates and improves genome and transcriptome assemblies. It
1463supports next-generation sequencing data in fasta/q and csfasta/q format from
1464Illumina, Roche 454, and the SOLiD platform.")
1465 (license license:gpl3)))
1466
19f4554c
BW
1467(define-public fraggenescan
1468 (package
1469 (name "fraggenescan")
1470 (version "1.20")
1471 (source
1472 (origin
1473 (method url-fetch)
1474 (uri
1475 (string-append "mirror://sourceforge/fraggenescan/"
1476 "FragGeneScan" version ".tar.gz"))
1477 (sha256
1478 (base32 "1zzigqmvqvjyqv4945kv6nc5ah2xxm1nxgrlsnbzav3f5c0n0pyj"))))
1479 (build-system gnu-build-system)
1480 (arguments
1481 `(#:phases
1482 (modify-phases %standard-phases
1483 (delete 'configure)
1484 (add-before 'build 'patch-paths
1485 (lambda* (#:key outputs #:allow-other-keys)
1486 (let* ((out (string-append (assoc-ref outputs "out")))
1487 (share (string-append out "/share/fraggenescan/")))
1488 (substitute* "run_FragGeneScan.pl"
1489 (("system\\(\"rm")
1490 (string-append "system(\"" (which "rm")))
1491 (("system\\(\"mv")
1492 (string-append "system(\"" (which "mv")))
1493 ;; This script and other programs expect the training files
1494 ;; to be in the non-standard location bin/train/XXX. Change
1495 ;; this to be share/fraggenescan/train/XXX instead.
1496 (("^\\$train.file = \\$dir.*")
1497 (string-append "$train_file = \""
1498 share
1499 "train/\".$FGS_train_file;")))
1500 (substitute* "run_hmm.c"
1501 (("^ strcat\\(train_dir, \\\"train/\\\"\\);")
1502 (string-append " strcpy(train_dir, \"" share "/train/\");")))
1503 (substitute* "post_process.pl"
1504 (("^my \\$dir = substr.*")
1505 (string-append "my $dir = \"" share "\";"))))
1506 #t))
1507 (replace 'build
1508 (lambda _ (and (zero? (system* "make" "clean"))
1509 (zero? (system* "make" "fgs")))))
1510 (replace 'install
1511 (lambda* (#:key outputs #:allow-other-keys)
1512 (let* ((out (string-append (assoc-ref outputs "out")))
1513 (bin (string-append out "/bin/"))
1514 (share (string-append out "/share/fraggenescan/train")))
1515 (install-file "run_FragGeneScan.pl" bin)
1516 (install-file "FragGeneScan" bin)
1517 (install-file "FGS_gff.py" bin)
1518 (install-file "post_process.pl" bin)
1519 (copy-recursively "train" share))))
1520 (delete 'check)
1521 (add-after 'install 'post-install-check
1522 ;; In lieu of 'make check', run one of the examples and check the
1523 ;; output files gets created.
1524 (lambda* (#:key outputs #:allow-other-keys)
1525 (let* ((out (string-append (assoc-ref outputs "out")))
1526 (bin (string-append out "/bin/")))
1527 (and (zero? (system* (string-append bin "run_FragGeneScan.pl")
1528 "-genome=./example/NC_000913.fna"
1529 "-out=./test2"
1530 "-complete=1"
1531 "-train=complete"))
1532 (file-exists? "test2.faa")
1533 (file-exists? "test2.ffn")
1534 (file-exists? "test2.gff")
1535 (file-exists? "test2.out"))))))))
1536 (inputs
1537 `(("perl" ,perl)
1538 ("python" ,python-2))) ;not compatible with python 3.
1539 (home-page "https://sourceforge.net/projects/fraggenescan/")
1540 (synopsis "Finds potentially fragmented genes in short reads")
1541 (description
1542 "FragGeneScan is a program for predicting bacterial and archaeal genes in
1543short and error-prone DNA sequencing reads. It can also be applied to predict
1544genes in incomplete assemblies or complete genomes.")
1545 ;; GPL3+ according to private correspondense with the authors.
1546 (license license:gpl3+)))
1547
81f3e0c1
BW
1548(define-public fxtract
1549 (let ((util-commit "776ca85a18a47492af3794745efcb4a905113115"))
1550 (package
1551 (name "fxtract")
1552 (version "2.3")
1553 (source
1554 (origin
1555 (method url-fetch)
1556 (uri (string-append
1557 "https://github.com/ctSkennerton/fxtract/archive/"
1558 version ".tar.gz"))
1559 (file-name (string-append "ctstennerton-util-"
1560 (string-take util-commit 7)
1561 "-checkout"))
1562 (sha256
1563 (base32
1564 "0275cfdhis8517hm01is62062swmi06fxzifq7mr3knbbxjlaiwj"))))
1565 (build-system gnu-build-system)
1566 (arguments
1567 `(#:make-flags (list
1568 (string-append "PREFIX=" (assoc-ref %outputs "out"))
1569 "CC=gcc")
1570 #:test-target "fxtract_test"
1571 #:phases
1572 (modify-phases %standard-phases
1573 (delete 'configure)
1574 (add-before 'build 'copy-util
1575 (lambda* (#:key inputs #:allow-other-keys)
1576 (rmdir "util")
1577 (copy-recursively (assoc-ref inputs "ctskennerton-util") "util")
1578 #t))
1579 ;; Do not use make install as this requires additional dependencies.
1580 (replace 'install
1581 (lambda* (#:key outputs #:allow-other-keys)
1582 (let* ((out (assoc-ref outputs "out"))
1583 (bin (string-append out"/bin")))
1584 (install-file "fxtract" bin)
1585 #t))))))
1586 (inputs
1587 `(("pcre" ,pcre)
1588 ("zlib" ,zlib)))
1589 (native-inputs
1590 ;; ctskennerton-util is licensed under GPL2.
1591 `(("ctskennerton-util"
1592 ,(origin
1593 (method git-fetch)
1594 (uri (git-reference
1595 (url "https://github.com/ctSkennerton/util.git")
1596 (commit util-commit)))
1597 (file-name (string-append
1598 "ctstennerton-util-" util-commit "-checkout"))
1599 (sha256
1600 (base32
1601 "0cls1hd4vgj3f36fpzzg4xc77d6f3hpc60cbpfmn2gdr7ykzzad7"))))))
1602 (home-page "https://github.com/ctSkennerton/fxtract")
1603 (synopsis "Extract sequences from FASTA and FASTQ files")
1604 (description
1605 "Fxtract extracts sequences from a protein or nucleotide fastx (FASTA
1606or FASTQ) file given a subsequence. It uses a simple substring search for
1607basic tasks but can change to using POSIX regular expressions, PCRE, hash
1608lookups or multi-pattern searching as required. By default fxtract looks in
1609the sequence of each record but can also be told to look in the header,
1610comment or quality sections.")
1611 (license license:expat))))
1612
5854f685
RW
1613(define-public grit
1614 (package
1615 (name "grit")
1616 (version "2.0.2")
1617 (source (origin
1618 (method url-fetch)
1619 (uri (string-append
1620 "https://github.com/nboley/grit/archive/"
1621 version ".tar.gz"))
1622 (file-name (string-append name "-" version ".tar.gz"))
1623 (sha256
1624 (base32
1625 "157in84dj70wimbind3x7sy1whs3h57qfgcnj2s6lrd38fbrb7mj"))))
1626 (build-system python-build-system)
1627 (arguments
1628 `(#:python ,python-2
1629 #:phases
1630 (alist-cons-after
1631 'unpack 'generate-from-cython-sources
1632 (lambda* (#:key inputs outputs #:allow-other-keys)
1633 ;; Delete these C files to force fresh generation from pyx sources.
1634 (delete-file "grit/sparsify_support_fns.c")
1635 (delete-file "grit/call_peaks_support_fns.c")
1636 (substitute* "setup.py"
1637 (("Cython.Setup") "Cython.Build")
1638 ;; Add numpy include path to fix compilation
1639 (("pyx\", \\]")
1640 (string-append "pyx\", ], include_dirs = ['"
1641 (assoc-ref inputs "python-numpy")
1642 "/lib/python2.7/site-packages/numpy/core/include/"
1643 "']"))) #t)
1644 %standard-phases)))
1645 (inputs
1646 `(("python-scipy" ,python2-scipy)
1647 ("python-numpy" ,python2-numpy)
1648 ("python-pysam" ,python2-pysam)
1649 ("python-networkx" ,python2-networkx)))
1650 (native-inputs
1651 `(("python-cython" ,python2-cython)
1652 ("python-setuptools" ,python2-setuptools)))
1653 (home-page "http://grit-bio.org")
1654 (synopsis "Tool for integrative analysis of RNA-seq type assays")
1655 (description
1656 "GRIT is designed to use RNA-seq, TES, and TSS data to build and quantify
1657full length transcript models. When none of these data sources are available,
1658GRIT can be run by providing a candidate set of TES or TSS sites. In
1659addition, GRIT can merge in reference junctions and gene boundaries. GRIT can
1660also be run in quantification mode, where it uses a provided GTF file and just
1661estimates transcript expression.")
1662 (license license:gpl3+)))
1663
346a829a
RW
1664(define-public hisat
1665 (package
1666 (name "hisat")
1667 (version "0.1.4")
1668 (source (origin
1669 (method url-fetch)
1670 (uri (string-append
1671 "http://ccb.jhu.edu/software/hisat/downloads/hisat-"
1672 version "-beta-source.zip"))
1673 (sha256
1674 (base32
1675 "1k381ydranqxp09yf2y7w1d0chz5d59vb6jchi89hbb0prq19lk5"))))
1676 (build-system gnu-build-system)
1677 (arguments
e58d01fa
RW
1678 `(#:tests? #f ;no check target
1679 #:make-flags '("allall"
1680 ;; Disable unsupported `popcnt' instructions on
1681 ;; architectures other than x86_64
1682 ,@(if (string-prefix? "x86_64"
1683 (or (%current-target-system)
1684 (%current-system)))
1685 '()
1686 '("POPCNT_CAPABILITY=0")))
346a829a 1687 #:phases
da6dd842
LC
1688 (alist-cons-after
1689 'unpack 'patch-sources
1690 (lambda _
1691 ;; XXX Cannot use snippet because zip files are not supported
1692 (substitute* "Makefile"
1693 (("^CC = .*$") "CC = gcc")
1694 (("^CPP = .*$") "CPP = g++")
1695 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
1696 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
1697 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
1698 (substitute* '("hisat-build" "hisat-inspect")
1699 (("/usr/bin/env") (which "env"))))
1700 (alist-replace
1701 'install
1702 (lambda* (#:key outputs #:allow-other-keys)
96c46210
LC
1703 (let ((bin (string-append (assoc-ref outputs "out") "/bi/")))
1704 (for-each (lambda (file)
1705 (install-file file bin))
1706 (find-files
1707 "."
1708 "hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))))
da6dd842 1709 (alist-delete 'configure %standard-phases)))))
346a829a
RW
1710 (native-inputs
1711 `(("unzip" ,unzip)))
1712 (inputs
1713 `(("perl" ,perl)
1714 ("python" ,python)
1715 ("zlib" ,zlib)))
60af3d82
RW
1716 ;; Non-portable SSE instructions are used so building fails on platforms
1717 ;; other than x86_64.
1718 (supported-systems '("x86_64-linux"))
346a829a
RW
1719 (home-page "http://ccb.jhu.edu/software/hisat/index.shtml")
1720 (synopsis "Hierarchical indexing for spliced alignment of transcripts")
1721 (description
1722 "HISAT is a fast and sensitive spliced alignment program for mapping
1723RNA-seq reads. In addition to one global FM index that represents a whole
1724genome, HISAT uses a large set of small FM indexes that collectively cover the
1725whole genome. These small indexes (called local indexes) combined with
1726several alignment strategies enable effective alignment of RNA-seq reads, in
1727particular, reads spanning multiple exons.")
1728 (license license:gpl3+)))
1729
c684629f
BW
1730(define-public hmmer
1731 (package
1732 (name "hmmer")
1733 (version "3.1b2")
1734 (source (origin
1735 (method url-fetch)
1736 (uri (string-append
1737 "http://selab.janelia.org/software/hmmer"
1738 (version-prefix version 1) "/"
1739 version "/hmmer-" version ".tar.gz"))
1740 (sha256
1741 (base32
1742 "0djmgc0pfli0jilfx8hql1axhwhqxqb8rxg2r5rg07aw73sfs5nx"))))
1743 (build-system gnu-build-system)
1744 (native-inputs `(("perl", perl)))
1745 (home-page "http://hmmer.janelia.org")
1746 (synopsis "Biosequence analysis using profile hidden Markov models")
1747 (description
1748 "HMMER is used for searching sequence databases for homologs of protein
1749sequences, and for making protein sequence alignments. It implements methods
1750using probabilistic models called profile hidden Markov models (profile
1751HMMs).")
1752 (license (list license:gpl3+
1753 ;; The bundled library 'easel' is distributed
1754 ;; under The Janelia Farm Software License.
1755 (license:non-copyleft
1756 "file://easel/LICENSE"
1757 "See easel/LICENSE in the distribution.")))))
1758
85652f59
RW
1759(define-public htseq
1760 (package
1761 (name "htseq")
1762 (version "0.6.1")
1763 (source (origin
1764 (method url-fetch)
1765 (uri (string-append
1766 "https://pypi.python.org/packages/source/H/HTSeq/HTSeq-"
1767 version ".tar.gz"))
1768 (sha256
1769 (base32
1770 "1i85ppf2j2lj12m0x690qq5nn17xxk23pbbx2c83r8ayb5wngzwv"))))
1771 (build-system python-build-system)
1772 (arguments `(#:python ,python-2)) ; only Python 2 is supported
0536727e
RW
1773 ;; Numpy needs to be propagated when htseq is used as a Python library.
1774 (propagated-inputs
1775 `(("python-numpy" ,python2-numpy)))
1776 (native-inputs
1777 `(("python-setuptools" ,python2-setuptools)))
85652f59
RW
1778 (home-page "http://www-huber.embl.de/users/anders/HTSeq/")
1779 (synopsis "Analysing high-throughput sequencing data with Python")
1780 (description
1781 "HTSeq is a Python package that provides infrastructure to process data
1782from high-throughput sequencing assays.")
1783 (license license:gpl3+)))
1784
15a3c3d4
RW
1785(define-public htsjdk
1786 (package
1787 (name "htsjdk")
1788 (version "1.129")
1789 (source (origin
1790 (method url-fetch)
1791 (uri (string-append
1792 "https://github.com/samtools/htsjdk/archive/"
1793 version ".tar.gz"))
1794 (file-name (string-append name "-" version ".tar.gz"))
1795 (sha256
1796 (base32
1797 "0asdk9b8jx2ij7yd6apg9qx03li8q7z3ml0qy2r2qczkra79y6fw"))
1798 (modules '((guix build utils)))
1799 ;; remove build dependency on git
1800 (snippet '(substitute* "build.xml"
1801 (("failifexecutionfails=\"true\"")
1802 "failifexecutionfails=\"false\"")))))
1803 (build-system gnu-build-system)
1804 (arguments
1805 `(#:modules ((srfi srfi-1)
1806 (guix build gnu-build-system)
1807 (guix build utils))
1808 #:phases (alist-replace
1809 'build
1810 (lambda _
1811 (setenv "JAVA_HOME" (assoc-ref %build-inputs "jdk"))
1812 (zero? (system* "ant" "all"
1813 (string-append "-Ddist="
1814 (assoc-ref %outputs "out")
1815 "/share/java/htsjdk/"))))
1816 (fold alist-delete %standard-phases
1817 '(configure install check)))))
1818 (native-inputs
1819 `(("ant" ,ant)
d2540f80 1820 ("jdk" ,icedtea "jdk")))
15a3c3d4
RW
1821 (home-page "http://samtools.github.io/htsjdk/")
1822 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
1823 (description
1824 "HTSJDK is an implementation of a unified Java library for accessing
1825common file formats, such as SAM and VCF, used for high-throughput
1826sequencing (HTS) data. There are also an number of useful utilities for
1827manipulating HTS data.")
1828 (license license:expat)))
1829
e7c09730
RW
1830(define-public htslib
1831 (package
1832 (name "htslib")
1833 (version "1.2.1")
1834 (source (origin
1835 (method url-fetch)
1836 (uri (string-append
1837 "https://github.com/samtools/htslib/releases/download/"
1838 version "/htslib-" version ".tar.bz2"))
1839 (sha256
1840 (base32
1841 "1c32ssscbnjwfw3dra140fq7riarp2x990qxybh34nr1p5r17nxx"))))
1842 (build-system gnu-build-system)
1843 (arguments
1844 `(#:phases
1845 (modify-phases %standard-phases
1846 (add-after
1847 'unpack 'patch-tests
1848 (lambda _
1849 (substitute* "test/test.pl"
1850 (("/bin/bash") (which "bash")))
1851 #t)))))
1852 (inputs
1853 `(("zlib" ,zlib)))
1854 (native-inputs
1855 `(("perl" ,perl)))
1856 (home-page "http://www.htslib.org")
1857 (synopsis "C library for reading/writing high-throughput sequencing data")
1858 (description
1859 "HTSlib is a C library for reading/writing high-throughput sequencing
1860data. It also provides the bgzip, htsfile, and tabix utilities.")
1861 ;; Files under cram/ are released under the modified BSD license;
1862 ;; the rest is released under the Expat license
1863 (license (list license:expat license:bsd-3))))
1864
c4325f62
RW
1865(define-public idr
1866 (package
1867 (name "idr")
1868 (version "2.0.0")
1869 (source (origin
1870 (method url-fetch)
1871 (uri (string-append
1872 "https://github.com/nboley/idr/archive/"
1873 version ".tar.gz"))
1874 (file-name (string-append name "-" version ".tar.gz"))
1875 (sha256
1876 (base32
1877 "1k3x44biak00aiv3hpm1yd6nn4hhp7n0qnbs3zh2q9sw7qr1qj5r"))))
1878 (build-system python-build-system)
1879 (arguments
1880 `(#:phases
1881 (modify-phases %standard-phases
1882 (add-after
1883 'install 'wrap-program
1884 (lambda* (#:key inputs outputs #:allow-other-keys)
1885 (let* ((out (assoc-ref outputs "out"))
1886 (python-version (string-take (string-take-right
1887 (assoc-ref inputs "python") 5) 3))
1888 (path (string-join
1889 (map (lambda (name)
1890 (string-append (assoc-ref inputs name)
1891 "/lib/python" python-version
1892 "/site-packages"))
1893 '("python-scipy"
1894 "python-numpy"
1895 "python-matplotlib"))
1896 ":")))
1897 (wrap-program (string-append out "/bin/idr")
1898 `("PYTHONPATH" ":" prefix (,path))))
1899 #t)))))
1900 (inputs
1901 `(("python-scipy" ,python-scipy)
1902 ("python-numpy" ,python-numpy)
1903 ("python-matplotlib" ,python-matplotlib)))
1904 (native-inputs
1905 `(("python-cython" ,python-cython)
1906 ("python-setuptools" ,python-setuptools)))
1907 (home-page "https://github.com/nboley/idr")
1908 (synopsis "Tool to measure the irreproducible discovery rate (IDR)")
1909 (description
1910 "The IDR (Irreproducible Discovery Rate) framework is a unified approach
1911to measure the reproducibility of findings identified from replicate
1912experiments and provide highly stable thresholds based on reproducibility.")
1913 (license license:gpl3+)))
1914
43c565d2
RW
1915(define-public jellyfish
1916 (package
1917 (name "jellyfish")
1918 (version "2.2.4")
1919 (source (origin
1920 (method url-fetch)
1921 (uri (string-append "https://github.com/gmarcais/Jellyfish/"
1922 "releases/download/v" version
1923 "/jellyfish-" version ".tar.gz"))
1924 (sha256
1925 (base32
1926 "0a6xnynqy2ibfbfz86b9g2m2dgm7f1469pmymkpam333gi3p26nk"))))
1927 (build-system gnu-build-system)
1928 (outputs '("out" ;for library
1929 "ruby" ;for Ruby bindings
1930 "python")) ;for Python bindings
1931 (arguments
1932 `(#:configure-flags
1933 (list (string-append "--enable-ruby-binding="
1934 (assoc-ref %outputs "ruby"))
1935 (string-append "--enable-python-binding="
1936 (assoc-ref %outputs "python")))
1937 #:phases
1938 (modify-phases %standard-phases
1939 (add-before 'check 'set-SHELL-variable
1940 (lambda _
1941 ;; generator_manager.hpp either uses /bin/sh or $SHELL
1942 ;; to run tests.
1943 (setenv "SHELL" (which "bash"))
1944 #t)))))
1945 (native-inputs
1946 `(("bc" ,bc)
1947 ("time" ,time)
1948 ("ruby" ,ruby)
1949 ("python" ,python-2)))
1950 (synopsis "Tool for fast counting of k-mers in DNA")
1951 (description
1952 "Jellyfish is a tool for fast, memory-efficient counting of k-mers in
1953DNA. A k-mer is a substring of length k, and counting the occurrences of all
1954such substrings is a central step in many analyses of DNA sequence. Jellyfish
1955is a command-line program that reads FASTA and multi-FASTA files containing
1956DNA sequences. It outputs its k-mer counts in a binary format, which can be
1957translated into a human-readable text format using the @code{jellyfish dump}
1958command, or queried for specific k-mers with @code{jellyfish query}.")
1959 (home-page "http://www.genome.umd.edu/jellyfish.html")
1960 ;; The combined work is published under the GPLv3 or later. Individual
1961 ;; files such as lib/jsoncpp.cpp are released under the Expat license.
1962 (license (list license:gpl3+ license:expat))))
1963
d57e6d0f
RW
1964(define-public macs
1965 (package
1966 (name "macs")
1967 (version "2.1.0.20140616")
1968 (source (origin
1969 (method url-fetch)
1970 (uri (string-append
1971 "https://pypi.python.org/packages/source/M/MACS2/MACS2-"
1972 version ".tar.gz"))
1973 (sha256
1974 (base32
1975 "11lmiw6avqhwn75sn59g4lfkrr2kk20r3rgfbx9xfqb8rg9mi2n6"))))
1976 (build-system python-build-system)
1977 (arguments
1978 `(#:python ,python-2 ; only compatible with Python 2.7
1979 #:tests? #f)) ; no test target
1980 (inputs
1981 `(("python-numpy" ,python2-numpy)))
1982 (native-inputs
1983 `(("python-setuptools" ,python2-setuptools)))
1984 (home-page "http://github.com/taoliu/MACS/")
1985 (synopsis "Model based analysis for ChIP-Seq data")
1986 (description
1987 "MACS is an implementation of a ChIP-Seq analysis algorithm for
1988identifying transcript factor binding sites named Model-based Analysis of
1989ChIP-Seq (MACS). MACS captures the influence of genome complexity to evaluate
1990the significance of enriched ChIP regions and it improves the spatial
1991resolution of binding sites through combining the information of both
1992sequencing tag position and orientation.")
1993 (license license:bsd-3)))
1994
41ddebdd
BW
1995(define-public mafft
1996 (package
1997 (name "mafft")
02f35bb5 1998 (version "7.267")
41ddebdd
BW
1999 (source (origin
2000 (method url-fetch)
2001 (uri (string-append
2002 "http://mafft.cbrc.jp/alignment/software/mafft-" version
2003 "-without-extensions-src.tgz"))
2004 (file-name (string-append name "-" version ".tgz"))
2005 (sha256
2006 (base32
02f35bb5 2007 "1xl6xq1rfxkws0svrlhyqxhhwbv6r77jwblsdpcyiwzsscw6wlk0"))))
41ddebdd
BW
2008 (build-system gnu-build-system)
2009 (arguments
2010 `(#:tests? #f ; no automated tests, though there are tests in the read me
2011 #:make-flags (let ((out (assoc-ref %outputs "out")))
2012 (list (string-append "PREFIX=" out)
2013 (string-append "BINDIR="
2014 (string-append out "/bin"))))
2015 #:phases
2016 (modify-phases %standard-phases
2017 (add-after 'unpack 'enter-dir
2018 (lambda _ (chdir "core") #t))
2019 (add-after 'enter-dir 'patch-makefile
2020 (lambda _
2021 ;; on advice from the MAFFT authors, there is no need to
2022 ;; distribute mafft-profile, mafft-distance, or
2023 ;; mafft-homologs.rb as they are too "specialised".
2024 (substitute* "Makefile"
2025 ;; remove mafft-homologs.rb from SCRIPTS
2026 (("^SCRIPTS = mafft mafft-homologs.rb")
2027 "SCRIPTS = mafft")
02f35bb5
BW
2028 ;; remove mafft-homologs from MANPAGES
2029 (("^MANPAGES = mafft.1 mafft-homologs.1")
2030 "MANPAGES = mafft.1")
41ddebdd
BW
2031 ;; remove mafft-distance from PROGS
2032 (("^PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance")
2033 "PROGS = dvtditr dndfast7 dndblast sextet5")
2034 ;; remove mafft-profile from PROGS
2035 (("splittbfast disttbfast tbfast mafft-profile 2cl mccaskillwrap")
2036 "splittbfast disttbfast tbfast f2cl mccaskillwrap")
2037 (("^rm -f mafft-profile mafft-profile.exe") "#")
2038 (("^rm -f mafft-distance mafft-distance.exe") ")#")
2039 ;; do not install MAN pages in libexec folder
2040 (("^\t\\$\\(INSTALL\\) -m 644 \\$\\(MANPAGES\\) \
2041\\$\\(DESTDIR\\)\\$\\(LIBDIR\\)") "#"))
2042 #t))
02f35bb5
BW
2043 (add-after 'enter-dir 'patch-paths
2044 (lambda* (#:key inputs #:allow-other-keys)
2045 (substitute* '("pairash.c"
2046 "mafft.tmpl")
2047 (("perl") (which "perl"))
2048 (("([\"`| ])awk" _ prefix)
2049 (string-append prefix (which "awk")))
2050 (("grep") (which "grep")))
2051 #t))
41ddebdd
BW
2052 (delete 'configure))))
2053 (inputs
02f35bb5
BW
2054 `(("perl" ,perl)
2055 ("gawk" ,gawk)
2056 ("grep" ,grep)))
2057 (propagated-inputs
2058 `(("coreutils" ,coreutils)))
41ddebdd
BW
2059 (home-page "http://mafft.cbrc.jp/alignment/software/")
2060 (synopsis "Multiple sequence alignment program")
2061 (description
2062 "MAFFT offers a range of multiple alignment methods for nucleotide and
2063protein sequences. For instance, it offers L-INS-i (accurate; for alignment
2064of <~200 sequences) and FFT-NS-2 (fast; for alignment of <~30,000
2065sequences).")
2066 (license (license:non-copyleft
2067 "http://mafft.cbrc.jp/alignment/software/license.txt"
2068 "BSD-3 with different formatting"))))
8fd790eb
BW
2069
2070(define-public metabat
2071 (package
2072 (name "metabat")
2073 (version "0.26.1")
2074 (source (origin
2075 (method url-fetch)
2076 (uri (string-append
2077 "https://bitbucket.org/berkeleylab/metabat/get/"
2078 version ".tar.bz2"))
2079 (file-name (string-append name "-" version ".tar.bz2"))
2080 (sha256
2081 (base32
2082 "0vgrhbaxg4dkxyax2kbigak7w0arhqvw0szwp6gd9wmyilc44kfa"))))
2083 (build-system gnu-build-system)
2084 (arguments
2085 `(#:phases
2086 (modify-phases %standard-phases
2087 (add-after 'unpack 'fix-includes
2088 (lambda _
5ede5f2f
MW
2089 (substitute* "SConstruct"
2090 (("/include/bam/bam.h")
2091 "/include/samtools/bam.h"))
2092 (substitute* "src/BamUtils.h"
2093 (("^#include \"bam/bam\\.h\"")
2094 "#include \"samtools/bam.h\"")
2095 (("^#include \"bam/sam\\.h\"")
2096 "#include \"samtools/sam.h\""))
2097 (substitute* "src/KseqReader.h"
2098 (("^#include \"bam/kseq\\.h\"")
2099 "#include \"samtools/kseq.h\""))
2100 #t))
8fd790eb
BW
2101 (add-after 'unpack 'fix-scons
2102 (lambda _
2103 (substitute* "SConstruct" ; Do not distribute README
2104 (("^env\\.Install\\(idir_prefix, 'README\\.md'\\)")
2105 ""))
2106 #t))
2107 (delete 'configure)
2108 (replace 'build
2109 (lambda* (#:key inputs outputs #:allow-other-keys)
2110 (mkdir (assoc-ref outputs "out"))
2111 (zero? (system* "scons"
2112 (string-append
2113 "PREFIX="
2114 (assoc-ref outputs "out"))
2115 (string-append
2116 "HTSLIB_DIR="
2117 (assoc-ref inputs "htslib"))
2118 (string-append
2119 "SAMTOOLS_DIR="
2120 (assoc-ref inputs "samtools"))
2121 (string-append
2122 "BOOST_ROOT="
2123 (assoc-ref inputs "boost"))
2124 "install"))))
2125 ;; check and install carried out during build phase
2126 (delete 'check)
2127 (delete 'install))))
2128 (inputs
2129 `(("zlib" ,zlib)
2130 ("perl" ,perl)
2131 ("samtools" ,samtools)
2132 ("htslib" ,htslib)
2133 ("boost" ,boost)))
2134 (native-inputs
2135 `(("scons" ,scons)))
2136 (home-page "https://bitbucket.org/berkeleylab/metabat")
2137 (synopsis
2138 "Reconstruction of single genomes from complex microbial communities")
2139 (description
2140 "Grouping large genomic fragments assembled from shotgun metagenomic
2141sequences to deconvolute complex microbial communities, or metagenome binning,
2142enables the study of individual organisms and their interactions. MetaBAT is
2143an automated metagenome binning software, which integrates empirical
2144probabilistic distances of genome abundance and tetranucleotide frequency.")
2145 (license (license:non-copyleft "file://license.txt"
5ede5f2f 2146 "See license.txt in the distribution."))))
8fd790eb 2147
ddd82e0e
RW
2148(define-public miso
2149 (package
2150 (name "miso")
2151 (version "0.5.3")
2152 (source (origin
2153 (method url-fetch)
2154 (uri (string-append
86517de6 2155 "https://pypi.python.org/packages/source/m/misopy/misopy-"
ddd82e0e
RW
2156 version ".tar.gz"))
2157 (sha256
2158 (base32
2159 "0x446867az8ir0z8c1vjqffkp0ma37wm4sylixnkhgawllzx8v5w"))
2160 (modules '((guix build utils)))
ddd82e0e
RW
2161 (snippet
2162 '(substitute* "setup.py"
0ec8b206
RW
2163 ;; Use setuptools, or else the executables are not
2164 ;; installed.
2165 (("distutils.core") "setuptools")
2166 ;; use "gcc" instead of "cc" for compilation
ddd82e0e
RW
2167 (("^defines")
2168 "cc.set_executables(
2169compiler='gcc',
2170compiler_so='gcc',
2171linker_exe='gcc',
2172linker_so='gcc -shared'); defines")))))
2173 (build-system python-build-system)
2174 (arguments
2175 `(#:python ,python-2 ; only Python 2 is supported
2176 #:tests? #f)) ; no "test" target
2177 (inputs
2178 `(("samtools" ,samtools)
2179 ("python-numpy" ,python2-numpy)
2180 ("python-pysam" ,python2-pysam)
2181 ("python-scipy" ,python2-scipy)
2182 ("python-matplotlib" ,python2-matplotlib)))
2183 (native-inputs
0ec8b206
RW
2184 `(("python-mock" ,python2-mock) ;for tests
2185 ("python-pytz" ,python2-pytz) ;for tests
2186 ("python-setuptools" ,python2-setuptools)))
ddd82e0e
RW
2187 (home-page "http://genes.mit.edu/burgelab/miso/index.html")
2188 (synopsis "Mixture of Isoforms model for RNA-Seq isoform quantitation")
2189 (description
2190 "MISO (Mixture-of-Isoforms) is a probabilistic framework that quantitates
2191the expression level of alternatively spliced genes from RNA-Seq data, and
2192identifies differentially regulated isoforms or exons across samples. By
2193modeling the generative process by which reads are produced from isoforms in
2194RNA-Seq, the MISO model uses Bayesian inference to compute the probability
2195that a read originated from a particular isoform.")
2196 (license license:gpl2)))
2197
1e44cf8b
BW
2198(define-public orfm
2199 (package
2200 (name "orfm")
6b6f7d6a 2201 (version "0.5.3")
1e44cf8b
BW
2202 (source (origin
2203 (method url-fetch)
2204 (uri (string-append
2205 "https://github.com/wwood/OrfM/releases/download/v"
2206 version "/orfm-" version ".tar.gz"))
2207 (sha256
2208 (base32
6b6f7d6a 2209 "0vb6d771gl4mix8bwx919x5ayy9pkj44n7ki336nz3rz2rx4c7gk"))))
1e44cf8b
BW
2210 (build-system gnu-build-system)
2211 (inputs `(("zlib" ,zlib)))
6b6f7d6a
BW
2212 (native-inputs
2213 `(("ruby-bio-commandeer" ,ruby-bio-commandeer)
2214 ("ruby-rspec" ,ruby-rspec)
2215 ("ruby" ,ruby)))
1e44cf8b
BW
2216 (synopsis "Simple and not slow open reading frame (ORF) caller")
2217 (description
6b6f7d6a 2218 "An ORF caller finds stretches of DNA that, when translated, are not
1e44cf8b
BW
2219interrupted by stop codons. OrfM finds and prints these ORFs.")
2220 (home-page "https://github.com/wwood/OrfM")
2221 (license license:lgpl3+)))
2222
19ee9201
RW
2223(define-public python2-pbcore
2224 (package
2225 (name "python2-pbcore")
2226 (version "0.9.3")
2227 (source (origin
2228 (method url-fetch)
2229 (uri (string-append
2230 "https://github.com/PacificBiosciences/pbcore/archive/"
2231 version ".tar.gz"))
2232 (file-name (string-append name "-" version ".tar.gz"))
2233 (sha256
2234 (base32
2235 "1z46rwjac93jm87cbj2zgjg6qvsgs65140wkbbxsvxps7ai4pm09"))))
2236 (build-system python-build-system)
2237 (arguments `(#:python ,python-2)) ; pbcore requires Python 2.7
2238 (inputs
2239 `(("python-cython" ,python2-cython)
2240 ("python-numpy" ,python2-numpy)
2241 ("python-pysam" ,python2-pysam)
2242 ("python-h5py" ,python2-h5py)))
2243 (native-inputs
2244 `(("python-setuptools" ,python2-setuptools)))
2245 (home-page "http://pacificbiosciences.github.io/pbcore/")
2246 (synopsis "Library for reading and writing PacBio data files")
2247 (description
2248 "The pbcore package provides Python APIs for interacting with PacBio data
2249files and writing bioinformatics applications.")
2250 (license license:bsd-3)))
2251
c61fe02c
RW
2252(define-public python2-warpedlmm
2253 (package
2254 (name "python2-warpedlmm")
2255 (version "0.21")
2256 (source
2257 (origin
2258 (method url-fetch)
2259 (uri (string-append
2260 "https://pypi.python.org/packages/source/W/WarpedLMM/WarpedLMM-"
2261 version ".zip"))
2262 (sha256
2263 (base32
2264 "1agfz6zqa8nc6cw47yh0s3y14gkpa9wqazwcj7mwwj3ffnw39p3j"))))
2265 (build-system python-build-system)
2266 (arguments
2267 `(#:python ,python-2 ; requires Python 2.7
2268 #:phases
2269 (modify-phases %standard-phases
2270 (add-after
2271 'install 'remove-bin-directory
2272 (lambda* (#:key outputs #:allow-other-keys)
2273 ;; The "bin" directory only contains wrappers for running
2274 ;; the module tests. They are not needed after the
2275 ;; "check" phase.
2276 (delete-file-recursively
2277 (string-append (assoc-ref outputs "out") "/bin"))
2278 #t)))))
2279 (propagated-inputs
2280 `(("python-scipy" ,python2-scipy)
2281 ("python-numpy" ,python2-numpy)
2282 ("python-matplotlib" ,python2-matplotlib)
2283 ("python-fastlmm" ,python2-fastlmm)
2284 ("python-pandas" ,python2-pandas)
2285 ("python-pysnptools" ,python2-pysnptools)))
2286 (native-inputs
2287 `(("python-setuptools" ,python2-setuptools)
2288 ("python-mock" ,python2-mock)
2289 ("python-nose" ,python2-nose)
2290 ("unzip" ,unzip)))
2291 (home-page "https://github.com/PMBio/warpedLMM")
2292 (synopsis "Implementation of warped linear mixed models")
2293 (description
2294 "WarpedLMM is a Python implementation of the warped linear mixed model,
2295which automatically learns an optimal warping function (or transformation) for
2296the phenotype as it models the data.")
2297 (license license:asl2.0)))
2298
2c16316e 2299(define-public pbtranscript-tofu
9a067efd 2300 (let ((commit "8f5467fe6"))
2c16316e
RW
2301 (package
2302 (name "pbtranscript-tofu")
9a067efd 2303 (version (string-append "2.2.3." commit))
2c16316e
RW
2304 (source (origin
2305 (method git-fetch)
2306 (uri (git-reference
2307 (url "https://github.com/PacificBiosciences/cDNA_primer.git")
2308 (commit commit)))
9a067efd 2309 (file-name (string-append name "-" version "-checkout"))
2c16316e
RW
2310 (sha256
2311 (base32
9a067efd
RW
2312 "1lgnpi35ihay42qx0b6yl3kkgra723i413j33kvs0kvs61h82w0f"))
2313 (modules '((guix build utils)))
2314 (snippet
2315 '(begin
2316 ;; remove bundled Cython sources
2317 (delete-file "pbtranscript-tofu/pbtranscript/Cython-0.20.1.tar.gz")
2318 #t))))
2c16316e
RW
2319 (build-system python-build-system)
2320 (arguments
2321 `(#:python ,python-2
2322 ;; With standard flags, the install phase attempts to create a zip'd
2323 ;; egg file, and fails with an error: 'ZIP does not support timestamps
2324 ;; before 1980'
2325 #:configure-flags '("--single-version-externally-managed"
2326 "--record=pbtranscript-tofu.txt")
2327 #:phases
9a067efd
RW
2328 (modify-phases %standard-phases
2329 (add-after 'unpack 'enter-directory
2330 (lambda _
2331 (chdir "pbtranscript-tofu/pbtranscript/")
2332 #t))
2333 ;; With setuptools version 18.0 and later this setup.py hack causes
2334 ;; a build error, so we disable it.
2335 (add-after 'enter-directory 'patch-setuppy
2336 (lambda _
2337 (substitute* "setup.py"
2338 (("if 'setuptools.extension' in sys.modules:")
2339 "if False:"))
2340 #t)))))
2c16316e 2341 (inputs
9a067efd 2342 `(("python-numpy" ,python2-numpy)
2c16316e 2343 ("python-bx-python" ,python2-bx-python)
c5372108
RW
2344 ("python-networkx" ,python2-networkx)
2345 ("python-scipy" ,python2-scipy)
9a067efd
RW
2346 ("python-pbcore" ,python2-pbcore)
2347 ("python-h5py" ,python2-h5py)))
2c16316e 2348 (native-inputs
9a067efd
RW
2349 `(("python-cython" ,python2-cython)
2350 ("python-nose" ,python2-nose)
2c16316e
RW
2351 ("python-setuptools" ,python2-setuptools)))
2352 (home-page "https://github.com/PacificBiosciences/cDNA_primer")
2353 (synopsis "Analyze transcriptome data generated with the Iso-Seq protocol")
2354 (description
2355 "pbtranscript-tofu contains scripts to analyze transcriptome data
2356generated using the PacBio Iso-Seq protocol.")
2357 (license license:bsd-3))))
2358
af860475
BW
2359(define-public prodigal
2360 (package
2361 (name "prodigal")
2362 (version "2.6.2")
2363 (source (origin
2364 (method url-fetch)
2365 (uri (string-append
2366 "https://github.com/hyattpd/Prodigal/archive/v"
2367 version ".tar.gz"))
2368 (file-name (string-append name "-" version ".tar.gz"))
2369 (sha256
2370 (base32
2371 "0m8sb0fg6lmxrlpzna0am6svbnlmd3dckrhgzxxgb3gxr5fyj284"))))
2372 (build-system gnu-build-system)
2373 (arguments
2374 `(#:tests? #f ;no check target
2375 #:make-flags (list (string-append "INSTALLDIR="
2376 (assoc-ref %outputs "out")
2377 "/bin"))
2378 #:phases
2379 (modify-phases %standard-phases
2380 (delete 'configure))))
2381 (home-page "http://prodigal.ornl.gov")
2382 (synopsis "Protein-coding gene prediction for Archaea and Bacteria")
2383 (description
2384 "Prodigal runs smoothly on finished genomes, draft genomes, and
2385metagenomes, providing gene predictions in GFF3, Genbank, or Sequin table
2386format. It runs quickly, in an unsupervised fashion, handles gaps, handles
2387partial genes, and identifies translation initiation sites.")
2388 (license license:gpl3+)))
2389
66e3eff1
RW
2390(define-public rsem
2391 (package
2392 (name "rsem")
2393 (version "1.2.20")
2394 (source
2395 (origin
2396 (method url-fetch)
2397 (uri
2398 (string-append "http://deweylab.biostat.wisc.edu/rsem/src/rsem-"
2399 version ".tar.gz"))
2400 (sha256
2401 (base32 "0nzdc0j0hjllhsd5f2xli95dafm3nawskigs140xzvjk67xh0r9q"))
2402 (patches (list (search-patch "rsem-makefile.patch")))
2403 (modules '((guix build utils)))
2404 (snippet
2405 '(begin
2406 ;; remove bundled copy of boost
2407 (delete-file-recursively "boost")
2408 #t))))
2409 (build-system gnu-build-system)
2410 (arguments
2411 `(#:tests? #f ;no "check" target
2412 #:phases
2413 (modify-phases %standard-phases
2414 ;; No "configure" script.
2415 ;; Do not build bundled samtools library.
2416 (replace 'configure
2417 (lambda _
2418 (substitute* "Makefile"
2419 (("^all : sam/libbam.a") "all : "))
2420 #t))
2421 (replace 'install
2422 (lambda* (#:key outputs #:allow-other-keys)
2423 (let* ((out (string-append (assoc-ref outputs "out")))
2424 (bin (string-append out "/bin/"))
2425 (perl (string-append out "/lib/perl5/site_perl")))
2426 (mkdir-p bin)
2427 (mkdir-p perl)
2428 (for-each (lambda (file)
2429 (copy-file file
2430 (string-append bin (basename file))))
2431 (find-files "." "rsem-.*"))
2432 (copy-file "rsem_perl_utils.pm"
2433 (string-append perl "/rsem_perl_utils.pm")))
2434 #t))
2435 (add-after
2436 'install 'wrap-program
2437 (lambda* (#:key outputs #:allow-other-keys)
2438 (let ((out (assoc-ref outputs "out")))
2439 (for-each (lambda (prog)
2440 (wrap-program (string-append out "/bin/" prog)
2441 `("PERL5LIB" ":" prefix
2442 (,(string-append out "/lib/perl5/site_perl")))))
2443 '("rsem-plot-transcript-wiggles"
2444 "rsem-calculate-expression"
2445 "rsem-generate-ngvector"
2446 "rsem-run-ebseq"
2447 "rsem-prepare-reference")))
2448 #t)))))
2449 (inputs
2450 `(("boost" ,boost)
2451 ("ncurses" ,ncurses)
2452 ("r" ,r)
2453 ("perl" ,perl)
2454 ("samtools" ,samtools-0.1)
2455 ("zlib" ,zlib)))
2456 (home-page "http://deweylab.biostat.wisc.edu/rsem/")
2457 (synopsis "Estimate gene expression levels from RNA-Seq data")
2458 (description
2459 "RSEM is a software package for estimating gene and isoform expression
2460levels from RNA-Seq data. The RSEM package provides a user-friendly
2461interface, supports threads for parallel computation of the EM algorithm,
2462single-end and paired-end read data, quality scores, variable-length reads and
2463RSPD estimation. In addition, it provides posterior mean and 95% credibility
2464interval estimates for expression levels. For visualization, it can generate
2465BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.")
2466 (license license:gpl3+)))
2467
8622a072
RW
2468(define-public rseqc
2469 (package
2470 (name "rseqc")
2471 (version "2.6.1")
2472 (source
2473 (origin
2474 (method url-fetch)
2475 (uri
2476 (string-append "mirror://sourceforge/rseqc/"
2477 version "/RSeQC-" version ".tar.gz"))
2478 (sha256
8214b7fb 2479 (base32 "15ly0254yi032qzkdplg00q144qfdsd986gh62829rl5bkxhj330"))
8622a072
RW
2480 (modules '((guix build utils)))
2481 (snippet
2482 '(begin
2483 ;; remove bundled copy of pysam
2484 (delete-file-recursively "lib/pysam")
2485 (substitute* "setup.py"
2486 ;; remove dependency on outdated "distribute" module
2487 (("^from distribute_setup import use_setuptools") "")
2488 (("^use_setuptools\\(\\)") "")
2489 ;; do not use bundled copy of pysam
2490 (("^have_pysam = False") "have_pysam = True"))))))
2491 (build-system python-build-system)
2492 (arguments `(#:python ,python-2))
2493 (inputs
2494 `(("python-cython" ,python2-cython)
2495 ("python-pysam" ,python2-pysam)
2496 ("python-numpy" ,python2-numpy)
2497 ("python-setuptools" ,python2-setuptools)
2498 ("zlib" ,zlib)))
2499 (native-inputs
2500 `(("python-nose" ,python2-nose)))
2501 (home-page "http://rseqc.sourceforge.net/")
2502 (synopsis "RNA-seq quality control package")
2503 (description
2504 "RSeQC provides a number of modules that can comprehensively evaluate
2505high throughput sequence data, especially RNA-seq data. Some basic modules
2506inspect sequence quality, nucleotide composition bias, PCR bias and GC bias,
2507while RNA-seq specific modules evaluate sequencing saturation, mapped reads
2508distribution, coverage uniformity, strand specificity, etc.")
2509 (license license:gpl3+)))
2510
4e10a221
RW
2511(define-public samtools
2512 (package
2513 (name "samtools")
c4473411 2514 (version "1.3")
4e10a221
RW
2515 (source
2516 (origin
2517 (method url-fetch)
2518 (uri
2519 (string-append "mirror://sourceforge/samtools/"
2520 version "/samtools-" version ".tar.bz2"))
2521 (sha256
2522 (base32
c4473411 2523 "03mnf0mhbfwhqlqfslrhfnw68s3g0fs1as354i9a584mqw1l1smy"))))
4e10a221
RW
2524 (build-system gnu-build-system)
2525 (arguments
c4473411 2526 `(#:modules ((ice-9 ftw)
5bdda30b
RW
2527 (ice-9 regex)
2528 (guix build gnu-build-system)
2529 (guix build utils))
c4473411
RW
2530 #:make-flags (list (string-append "prefix=" (assoc-ref %outputs "out")))
2531 #:configure-flags (list "--with-ncurses")
4e10a221
RW
2532 #:phases
2533 (alist-cons-after
c4473411
RW
2534 'unpack 'patch-tests
2535 (lambda _
2536 (substitute* "test/test.pl"
2537 ;; The test script calls out to /bin/bash
2538 (("/bin/bash") (which "bash")))
2539 #t)
41dd7126
RW
2540 (alist-cons-after
2541 'install 'install-library
2542 (lambda* (#:key outputs #:allow-other-keys)
2543 (let ((lib (string-append (assoc-ref outputs "out") "/lib")))
96c46210 2544 (install-file "libbam.a" lib)))
5bdda30b
RW
2545 (alist-cons-after
2546 'install 'install-headers
2547 (lambda* (#:key outputs #:allow-other-keys)
2548 (let ((include (string-append (assoc-ref outputs "out")
2549 "/include/samtools/")))
5bdda30b 2550 (for-each (lambda (file)
96c46210 2551 (install-file file include))
5bdda30b
RW
2552 (scandir "." (lambda (name) (string-match "\\.h$" name))))
2553 #t))
c4473411 2554 %standard-phases)))))
4e10a221
RW
2555 (native-inputs `(("pkg-config" ,pkg-config)))
2556 (inputs `(("ncurses" ,ncurses)
2557 ("perl" ,perl)
2558 ("python" ,python)
2559 ("zlib" ,zlib)))
2560 (home-page "http://samtools.sourceforge.net")
2561 (synopsis "Utilities to efficiently manipulate nucleotide sequence alignments")
2562 (description
2563 "Samtools implements various utilities for post-processing nucleotide
2564sequence alignments in the SAM, BAM, and CRAM formats, including indexing,
2565variant calling (in conjunction with bcftools), and a simple alignment
2566viewer.")
2567 (license license:expat)))
d3517eda 2568
0b84a0aa
RW
2569(define-public samtools-0.1
2570 ;; This is the most recent version of the 0.1 line of samtools. The input
2571 ;; and output formats differ greatly from that used and produced by samtools
2572 ;; 1.x and is still used in many bioinformatics pipelines.
2573 (package (inherit samtools)
2574 (version "0.1.19")
2575 (source
2576 (origin
2577 (method url-fetch)
2578 (uri
2579 (string-append "mirror://sourceforge/samtools/"
2580 version "/samtools-" version ".tar.bz2"))
2581 (sha256
2582 (base32 "1m33xsfwz0s8qi45lylagfllqg7fphf4dr0780rsvw75av9wk06h"))))
2583 (arguments
2309ed68
RW
2584 `(#:tests? #f ;no "check" target
2585 ,@(substitute-keyword-arguments (package-arguments samtools)
2586 ((#:make-flags flags)
2587 `(cons "LIBCURSES=-lncurses" ,flags))
2588 ((#:phases phases)
2589 `(modify-phases ,phases
2590 (replace 'install
2591 (lambda* (#:key outputs #:allow-other-keys)
2592 (let ((bin (string-append
2593 (assoc-ref outputs "out") "/bin")))
2594 (mkdir-p bin)
2595 (copy-file "samtools"
2596 (string-append bin "/samtools")))))
2597 (delete 'patch-tests)
2598 (delete 'configure))))))))
0b84a0aa 2599
fe4c37c2
RW
2600(define-public mosaik
2601 (let ((commit "5c25216d"))
2602 (package
2603 (name "mosaik")
2604 (version "2.2.30")
2605 (source (origin
2606 ;; There are no release tarballs nor tags.
2607 (method git-fetch)
2608 (uri (git-reference
2609 (url "https://github.com/wanpinglee/MOSAIK.git")
2610 (commit commit)))
2611 (file-name (string-append name "-" version))
2612 (sha256
2613 (base32
2614 "17gj3s07cm77r41z92awh0bim7w7q7fbn0sf5nkqmcm1vw052qgw"))))
2615 (build-system gnu-build-system)
2616 (arguments
2617 `(#:tests? #f ; no tests
2618 #:make-flags (list "CC=gcc")
2619 #:phases
2620 (modify-phases %standard-phases
2621 (replace 'configure
2622 (lambda _ (chdir "src") #t))
2623 (replace 'install
2624 (lambda* (#:key outputs #:allow-other-keys)
2625 (let ((bin (string-append (assoc-ref outputs "out")
2626 "/bin")))
2627 (mkdir-p bin)
2628 (copy-recursively "../bin" bin)
2629 #t))))))
2630 (inputs
2631 `(("perl" ,perl)
2632 ("zlib" ,zlib)))
029d9f77 2633 (supported-systems '("x86_64-linux"))
fe4c37c2
RW
2634 (home-page "https://code.google.com/p/mosaik-aligner/")
2635 (synopsis "Map nucleotide sequence reads to reference genomes")
2636 (description
2637 "MOSAIK is a program for mapping second and third-generation sequencing
2638reads to a reference genome. MOSAIK can align reads generated by all the
2639major sequencing technologies, including Illumina, Applied Biosystems SOLiD,
2640Roche 454, Ion Torrent and Pacific BioSciences SMRT.")
2641 ;; MOSAIK is released under the GPLv2+ with the exception of third-party
2642 ;; code released into the public domain:
2643 ;; 1. fastlz by Ariya Hidayat - http://www.fastlz.org/
2644 ;; 2. MD5 implementation - RSA Data Security, RFC 1321
2645 (license (list license:gpl2+ license:public-domain)))))
2646
282c5087
RW
2647(define-public ngs-sdk
2648 (package
2649 (name "ngs-sdk")
d3b39bc2 2650 (version "1.2.3")
282c5087
RW
2651 (source
2652 (origin
2653 (method url-fetch)
2654 (uri
2655 (string-append "https://github.com/ncbi/ngs/archive/"
2656 version ".tar.gz"))
2657 (file-name (string-append name "-" version ".tar.gz"))
2658 (sha256
2659 (base32
d3b39bc2 2660 "15074fdi94c6pjy83hhk22r86kfvzpaz2i07h3rqg9yy6x3w0pk2"))))
282c5087
RW
2661 (build-system gnu-build-system)
2662 (arguments
2663 `(#:parallel-build? #f ; not supported
2664 #:tests? #f ; no "check" target
2665 #:phases
2666 (alist-replace
2667 'configure
2668 (lambda* (#:key outputs #:allow-other-keys)
2669 (let ((out (assoc-ref outputs "out")))
282c5087
RW
2670 ;; The 'configure' script doesn't recognize things like
2671 ;; '--enable-fast-install'.
2672 (zero? (system* "./configure"
2673 (string-append "--build-prefix=" (getcwd) "/build")
2674 (string-append "--prefix=" out)))))
2675 (alist-cons-after
2676 'unpack 'enter-dir
2677 (lambda _ (chdir "ngs-sdk") #t)
2678 %standard-phases))))
2679 (native-inputs `(("perl" ,perl)))
a0dadf0c
AE
2680 ;; According to the test
2681 ;; unless ($MARCH =~ /x86_64/i || $MARCH =~ /i?86/i)
2682 ;; in ngs-sdk/setup/konfigure.perl
ab29be81 2683 (supported-systems '("i686-linux" "x86_64-linux"))
282c5087
RW
2684 (home-page "https://github.com/ncbi/ngs")
2685 (synopsis "API for accessing Next Generation Sequencing data")
2686 (description
2687 "NGS is a domain-specific API for accessing reads, alignments and pileups
2688produced from Next Generation Sequencing. The API itself is independent from
2689any particular back-end implementation, and supports use of multiple back-ends
2690simultaneously.")
2691 (license license:public-domain)))
2692
2651a5e6
RW
2693(define-public ngs-java
2694 (package (inherit ngs-sdk)
2695 (name "ngs-java")
2696 (arguments
2697 `(,@(substitute-keyword-arguments
2698 `(#:modules ((guix build gnu-build-system)
2699 (guix build utils)
2700 (srfi srfi-1)
2701 (srfi srfi-26))
2702 ,@(package-arguments ngs-sdk))
2703 ((#:phases phases)
614a8977
RW
2704 `(modify-phases ,phases
2705 (replace 'enter-dir (lambda _ (chdir "ngs-java") #t)))))))
2651a5e6 2706 (inputs
d2540f80 2707 `(("jdk" ,icedtea "jdk")
2651a5e6
RW
2708 ("ngs-sdk" ,ngs-sdk)))
2709 (synopsis "Java bindings for NGS SDK")))
2710
75dd2424
RW
2711(define-public ncbi-vdb
2712 (package
2713 (name "ncbi-vdb")
fe91666b 2714 (version "2.5.7")
75dd2424
RW
2715 (source
2716 (origin
2717 (method url-fetch)
2718 (uri
2719 (string-append "https://github.com/ncbi/ncbi-vdb/archive/"
2720 version ".tar.gz"))
2721 (file-name (string-append name "-" version ".tar.gz"))
2722 (sha256
2723 (base32
fe91666b 2724 "0hay5hy8ynva3mi5wbn4wmq1q23qwxc3aqzbb86hg3x4f1r73270"))))
75dd2424
RW
2725 (build-system gnu-build-system)
2726 (arguments
2727 `(#:parallel-build? #f ; not supported
2728 #:tests? #f ; no "check" target
2729 #:phases
2730 (alist-replace
2731 'configure
2732 (lambda* (#:key inputs outputs #:allow-other-keys)
2733 (let ((out (assoc-ref outputs "out")))
75dd2424
RW
2734 ;; Override include path for libmagic
2735 (substitute* "setup/package.prl"
2736 (("name => 'magic', Include => '/usr/include'")
2737 (string-append "name=> 'magic', Include => '"
2738 (assoc-ref inputs "libmagic")
2739 "/include" "'")))
2740
2741 ;; Install kdf5 library (needed by sra-tools)
2742 (substitute* "build/Makefile.install"
2743 (("LIBRARIES_TO_INSTALL =")
2744 "LIBRARIES_TO_INSTALL = kdf5.$(VERSION_LIBX) kdf5.$(VERSION_SHLX)"))
2745
2746 ;; The 'configure' script doesn't recognize things like
2747 ;; '--enable-fast-install'.
2748 (zero? (system*
2749 "./configure"
2750 (string-append "--build-prefix=" (getcwd) "/build")
2751 (string-append "--prefix=" (assoc-ref outputs "out"))
2752 (string-append "--debug")
2753 (string-append "--with-xml2-prefix="
2754 (assoc-ref inputs "libxml2"))
2755 (string-append "--with-ngs-sdk-prefix="
2756 (assoc-ref inputs "ngs-sdk"))
2757 (string-append "--with-ngs-java-prefix="
2758 (assoc-ref inputs "ngs-java"))
2759 (string-append "--with-hdf5-prefix="
2760 (assoc-ref inputs "hdf5"))))))
2761 (alist-cons-after
2762 'install 'install-interfaces
132b4c8c
RW
2763 (lambda* (#:key outputs #:allow-other-keys)
2764 ;; Install interface libraries. On i686 the interface libraries
2765 ;; are installed to "linux/gcc/i386", so we need to use the Linux
2766 ;; architecture name ("i386") instead of the target system prefix
2767 ;; ("i686").
75dd2424
RW
2768 (mkdir (string-append (assoc-ref outputs "out") "/ilib"))
2769 (copy-recursively (string-append "build/ncbi-vdb/linux/gcc/"
132b4c8c
RW
2770 ,(system->linux-architecture
2771 (or (%current-target-system)
2772 (%current-system)))
75dd2424
RW
2773 "/rel/ilib")
2774 (string-append (assoc-ref outputs "out")
2775 "/ilib"))
2776 ;; Install interface headers
2777 (copy-recursively "interfaces"
2778 (string-append (assoc-ref outputs "out")
2779 "/include")))
2780 %standard-phases))))
2781 (inputs
2782 `(("libxml2" ,libxml2)
2783 ("ngs-sdk" ,ngs-sdk)
2784 ("ngs-java" ,ngs-java)
2785 ("libmagic" ,file)
2786 ("hdf5" ,hdf5)))
2787 (native-inputs `(("perl" ,perl)))
2788 (home-page "https://github.com/ncbi/ncbi-vdb")
2789 (synopsis "Database engine for genetic information")
2790 (description
2791 "The NCBI-VDB library implements a highly compressed columnar data
2792warehousing engine that is most often used to store genetic information.
2793Databases are stored in a portable image within the file system, and can be
2794accessed/downloaded on demand across HTTP.")
2795 (license license:public-domain)))
2796
cc6ed477
RW
2797(define-public plink
2798 (package
2799 (name "plink")
2800 (version "1.07")
2801 (source
2802 (origin
2803 (method url-fetch)
2804 (uri (string-append
2805 "http://pngu.mgh.harvard.edu/~purcell/plink/dist/plink-"
2806 version "-src.zip"))
2807 (sha256
2808 (base32 "0as8gxm4pjyc8dxmm1sl873rrd7wn5qs0l29nqfnl31x8i467xaa"))
2809 (patches (list (search-patch "plink-1.07-unclobber-i.patch")))))
2810 (build-system gnu-build-system)
2811 (arguments
2812 '(#:tests? #f ;no "check" target
2813 #:make-flags (list (string-append "LIB_LAPACK="
2814 (assoc-ref %build-inputs "lapack")
2815 "/lib/liblapack.so")
2816 "WITH_LAPACK=1"
2817 "FORCE_DYNAMIC=1"
2818 ;; disable phoning home
2819 "WITH_WEBCHECK=")
2820 #:phases
2821 (modify-phases %standard-phases
2822 ;; no "configure" script
2823 (delete 'configure)
2824 (replace 'install
2825 (lambda* (#:key outputs #:allow-other-keys)
2826 (let ((bin (string-append (assoc-ref outputs "out")
2827 "/bin/")))
96c46210 2828 (install-file "plink" bin)
cc6ed477
RW
2829 #t))))))
2830 (inputs
2831 `(("zlib" ,zlib)
2832 ("lapack" ,lapack)))
2833 (native-inputs
2834 `(("unzip" ,unzip)))
2835 (home-page "http://pngu.mgh.harvard.edu/~purcell/plink/")
2836 (synopsis "Whole genome association analysis toolset")
2837 (description
2838 "PLINK is a whole genome association analysis toolset, designed to
2839perform a range of basic, large-scale analyses in a computationally efficient
2840manner. The focus of PLINK is purely on analysis of genotype/phenotype data,
2841so there is no support for steps prior to this (e.g. study design and
2842planning, generating genotype or CNV calls from raw data). Through
2843integration with gPLINK and Haploview, there is some support for the
2844subsequent visualization, annotation and storage of results.")
2845 ;; Code is released under GPLv2, except for fisher.h, which is under
2846 ;; LGPLv2.1+
2847 (license (list license:gpl2 license:lgpl2.1+))))
2848
c6a24d6e
RW
2849(define-public smithlab-cpp
2850 (let ((revision "1")
2851 (commit "728a097"))
2852 (package
2853 (name "smithlab-cpp")
2854 (version (string-append "0." revision "." commit))
2855 (source (origin
2856 (method git-fetch)
2857 (uri (git-reference
2858 (url "https://github.com/smithlabcode/smithlab_cpp.git")
2859 (commit commit)))
2860 (file-name (string-append name "-" version "-checkout"))
2861 (sha256
2862 (base32
2863 "0d476lmj312xk77kr9fzrv7z1bv96yfyx0w7y62ycmnfbx32ll74"))))
2864 (build-system gnu-build-system)
2865 (arguments
2866 `(#:modules ((guix build gnu-build-system)
2867 (guix build utils)
2868 (srfi srfi-26))
2869 #:tests? #f ;no "check" target
2870 #:phases
2871 (modify-phases %standard-phases
2872 (add-after 'unpack 'use-samtools-headers
2873 (lambda _
2874 (substitute* '("SAM.cpp"
2875 "SAM.hpp")
2876 (("sam.h") "samtools/sam.h"))
2877 #t))
2878 (replace 'install
2879 (lambda* (#:key outputs #:allow-other-keys)
2880 (let* ((out (assoc-ref outputs "out"))
2881 (lib (string-append out "/lib"))
2882 (include (string-append out "/include/smithlab-cpp")))
2883 (mkdir-p lib)
2884 (mkdir-p include)
2885 (for-each (cut install-file <> lib)
2886 (find-files "." "\\.o$"))
2887 (for-each (cut install-file <> include)
2888 (find-files "." "\\.hpp$")))
2889 #t))
2890 (delete 'configure))))
2891 (inputs
2892 `(("samtools" ,samtools-0.1)
2893 ("zlib" ,zlib)))
2894 (home-page "https://github.com/smithlabcode/smithlab_cpp")
2895 (synopsis "C++ helper library for functions used in Smith lab projects")
2896 (description
2897 "Smithlab CPP is a C++ library that includes functions used in many of
2898the Smith lab bioinformatics projects, such as a wrapper around Samtools data
2899structures, classes for genomic regions, mapped sequencing reads, etc.")
2900 (license license:gpl3+))))
2901
56e373ef
RW
2902(define-public preseq
2903 (package
2904 (name "preseq")
b49c5a58 2905 (version "2.0")
56e373ef
RW
2906 (source (origin
2907 (method url-fetch)
b49c5a58
RW
2908 (uri (string-append "https://github.com/smithlabcode/"
2909 "preseq/archive/v" version ".tar.gz"))
2910 (file-name (string-append name "-" version ".tar.gz"))
56e373ef 2911 (sha256
b49c5a58 2912 (base32 "08r684l50pnxjpvmhzjgqq56yv9rfw90k8vx0nsrnrzk8mf9hsdq"))
56e373ef
RW
2913 (modules '((guix build utils)))
2914 (snippet
2915 ;; Remove bundled samtools.
b49c5a58 2916 '(delete-file-recursively "samtools"))))
56e373ef
RW
2917 (build-system gnu-build-system)
2918 (arguments
2919 `(#:tests? #f ;no "check" target
2920 #:phases
2921 (modify-phases %standard-phases
56e373ef 2922 (delete 'configure))
b49c5a58
RW
2923 #:make-flags
2924 (list (string-append "PREFIX="
2925 (assoc-ref %outputs "out"))
2926 (string-append "LIBBAM="
2927 (assoc-ref %build-inputs "samtools")
2928 "/lib/libbam.a")
2929 (string-append "SMITHLAB_CPP="
2930 (assoc-ref %build-inputs "smithlab-cpp")
2931 "/lib")
2932 "PROGS=preseq"
2933 "INCLUDEDIRS=$(SMITHLAB_CPP)/../include/smithlab-cpp $(SAMTOOLS_DIR)")))
56e373ef
RW
2934 (inputs
2935 `(("gsl" ,gsl)
2936 ("samtools" ,samtools-0.1)
b49c5a58 2937 ("smithlab-cpp" ,smithlab-cpp)
56e373ef
RW
2938 ("zlib" ,zlib)))
2939 (home-page "http://smithlabresearch.org/software/preseq/")
2940 (synopsis "Program for analyzing library complexity")
2941 (description
2942 "The preseq package is aimed at predicting and estimating the complexity
2943of a genomic sequencing library, equivalent to predicting and estimating the
2944number of redundant reads from a given sequencing depth and how many will be
2945expected from additional sequencing using an initial sequencing experiment.
2946The estimates can then be used to examine the utility of further sequencing,
2947optimize the sequencing depth, or to screen multiple libraries to avoid low
2948complexity samples.")
2949 (license license:gpl3+)))
2950
51c64999
RW
2951(define-public sra-tools
2952 (package
2953 (name "sra-tools")
a78d62f4 2954 (version "2.5.7")
51c64999
RW
2955 (source
2956 (origin
2957 (method url-fetch)
2958 (uri
2959 (string-append "https://github.com/ncbi/sra-tools/archive/"
2960 version ".tar.gz"))
2961 (file-name (string-append name "-" version ".tar.gz"))
2962 (sha256
2963 (base32
a78d62f4 2964 "0q93qg744x787d08qmjmdafki1wkbvkdwynayjnjd454gkd26jl5"))))
51c64999
RW
2965 (build-system gnu-build-system)
2966 (arguments
2967 `(#:parallel-build? #f ; not supported
2968 #:tests? #f ; no "check" target
2969 #:phases
2970 (alist-replace
2971 'configure
2972 (lambda* (#:key inputs outputs #:allow-other-keys)
2973 ;; The build system expects a directory containing the sources and
2974 ;; raw build output of ncbi-vdb, including files that are not
2975 ;; installed. Since we are building against an installed version of
2976 ;; ncbi-vdb, the following modifications are needed.
2977 (substitute* "setup/konfigure.perl"
2978 ;; Make the configure script look for the "ilib" directory of
2979 ;; "ncbi-vdb" without first checking for the existence of a
2980 ;; matching library in its "lib" directory.
2981 (("^ my \\$f = File::Spec->catdir\\(\\$libdir, \\$lib\\);")
2982 "my $f = File::Spec->catdir($ilibdir, $ilib);")
2983 ;; Look for interface libraries in ncbi-vdb's "ilib" directory.
2984 (("my \\$ilibdir = File::Spec->catdir\\(\\$builddir, 'ilib'\\);")
2985 "my $ilibdir = File::Spec->catdir($dir, 'ilib');"))
2986
2987 ;; The 'configure' script doesn't recognize things like
2988 ;; '--enable-fast-install'.
2989 (zero? (system*
2990 "./configure"
2991 (string-append "--build-prefix=" (getcwd) "/build")
2992 (string-append "--prefix=" (assoc-ref outputs "out"))
2993 (string-append "--debug")
2994 (string-append "--with-fuse-prefix="
2995 (assoc-ref inputs "fuse"))
2996 (string-append "--with-magic-prefix="
2997 (assoc-ref inputs "libmagic"))
2998 ;; TODO: building with libxml2 fails with linker errors
2999 ;; (string-append "--with-xml2-prefix="
3000 ;; (assoc-ref inputs "libxml2"))
3001 (string-append "--with-ncbi-vdb-sources="
3002 (assoc-ref inputs "ncbi-vdb"))
3003 (string-append "--with-ncbi-vdb-build="
3004 (assoc-ref inputs "ncbi-vdb"))
3005 (string-append "--with-ngs-sdk-prefix="
3006 (assoc-ref inputs "ngs-sdk"))
3007 (string-append "--with-hdf5-prefix="
3008 (assoc-ref inputs "hdf5")))))
3009 %standard-phases)))
3010 (native-inputs `(("perl" ,perl)))
3011 (inputs
3012 `(("ngs-sdk" ,ngs-sdk)
3013 ("ncbi-vdb" ,ncbi-vdb)
3014 ("libmagic" ,file)
3015 ("fuse" ,fuse)
3016 ("hdf5" ,hdf5)
3017 ("zlib" ,zlib)))
3018 (home-page "http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software")
3019 (synopsis "Tools and libraries for reading and writing sequencing data")
3020 (description
3021 "The SRA Toolkit from NCBI is a collection of tools and libraries for
3022reading of sequencing files from the Sequence Read Archive (SRA) database and
3023writing files into the .sra format.")
3024 (license license:public-domain)))
3025
d3517eda
RW
3026(define-public seqan
3027 (package
3028 (name "seqan")
3029 (version "1.4.2")
3030 (source (origin
3031 (method url-fetch)
3032 (uri (string-append "http://packages.seqan.de/seqan-library/"
3033 "seqan-library-" version ".tar.bz2"))
3034 (sha256
3035 (base32
3036 "05s3wrrwn50f81aklfm65i4a749zag1vr8z03k21xm0pdxy47yvp"))))
3037 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
3038 ;; makes sense to split the outputs.
3039 (outputs '("out" "doc"))
3040 (build-system trivial-build-system)
3041 (arguments
3042 `(#:modules ((guix build utils))
3043 #:builder
3044 (begin
3045 (use-modules (guix build utils))
3046 (let ((tar (assoc-ref %build-inputs "tar"))
3047 (bzip (assoc-ref %build-inputs "bzip2"))
3048 (out (assoc-ref %outputs "out"))
3049 (doc (assoc-ref %outputs "doc")))
3050 (setenv "PATH" (string-append tar "/bin:" bzip "/bin"))
3051 (system* "tar" "xvf" (assoc-ref %build-inputs "source"))
3052 (chdir (string-append "seqan-library-" ,version))
3053 (copy-recursively "include" (string-append out "/include"))
3054 (copy-recursively "share" (string-append doc "/share"))))))
3055 (native-inputs
3056 `(("source" ,source)
3057 ("tar" ,tar)
3058 ("bzip2" ,bzip2)))
3059 (home-page "http://www.seqan.de")
3060 (synopsis "Library for nucleotide sequence analysis")
3061 (description
3062 "SeqAn is a C++ library of efficient algorithms and data structures for
3063the analysis of sequences with the focus on biological data. It contains
3064algorithms and data structures for string representation and their
3065manipulation, online and indexed string search, efficient I/O of
3066bioinformatics file formats, sequence alignment, and more.")
3067 (license license:bsd-3)))
ce7155d5 3068
d708b7a9
BW
3069(define-public seqmagick
3070 (package
3071 (name "seqmagick")
3072 (version "0.6.1")
3073 (source
3074 (origin
3075 (method url-fetch)
3076 (uri (string-append
3077 "https://pypi.python.org/packages/source/s/seqmagick/seqmagick-"
3078 version ".tar.gz"))
3079 (sha256
3080 (base32
3081 "0cgn477n74gsl4qdaakrrhi953kcsd4q3ivk2lr18x74s3g4ma1d"))))
3082 (build-system python-build-system)
3083 (arguments
3084 ;; python2 only, see https://github.com/fhcrc/seqmagick/issues/56
3085 `(#:python ,python-2
3086 #:phases
3087 (modify-phases %standard-phases
3088 ;; Current test in setup.py does not work as of 0.6.1,
3089 ;; so use nose to run tests instead for now. See
3090 ;; https://github.com/fhcrc/seqmagick/issues/55
3091 (replace 'check (lambda _ (zero? (system* "nosetests")))))))
3092 (inputs
3093 `(("python-biopython" ,python2-biopython)))
3094 (native-inputs
3095 `(("python-setuptools" ,python2-setuptools)
3096 ("python-nose" ,python2-nose)))
3097 (home-page "http://github.com/fhcrc/seqmagick")
3098 (synopsis "Tools for converting and modifying sequence files")
3099 (description
3100 "Bioinformaticians often have to convert sequence files between formats
3101and do little manipulations on them, and it's not worth writing scripts for
3102that. Seqmagick is a utility to expose the file format conversion in
3103BioPython in a convenient way. Instead of having a big mess of scripts, there
3104is one that takes arguments.")
3105 (license license:gpl3)))
3106
5f7e17be
BW
3107(define-public snap-aligner
3108 (package
3109 (name "snap-aligner")
3110 (version "1.0beta.18")
3111 (source (origin
3112 (method url-fetch)
3113 (uri (string-append
3114 "https://github.com/amplab/snap/archive/v"
3115 version ".tar.gz"))
3116 (file-name (string-append name "-" version ".tar.gz"))
3117 (sha256
3118 (base32
3119 "1vnsjwv007k1fl1q7d681kbwn6bc66cgw6h16hym6gvyy71qv2ly"))))
3120 (build-system gnu-build-system)
3121 (arguments
3122 '(#:phases
3123 (modify-phases %standard-phases
3124 (delete 'configure)
3125 (replace 'check (lambda _ (zero? (system* "./unit_tests"))))
3126 (replace 'install
3127 (lambda* (#:key outputs #:allow-other-keys)
3128 (let* ((out (assoc-ref outputs "out"))
3129 (bin (string-append out "/bin")))
3130 (mkdir-p bin)
3131 (install-file "snap-aligner" bin)
3132 (install-file "SNAPCommand" bin)
3133 #t))))))
3134 (native-inputs
3135 `(("zlib" ,zlib)))
3136 (home-page "http://snap.cs.berkeley.edu/")
3137 (synopsis "Short read DNA sequence aligner")
3138 (description
3139 "SNAP is a fast and accurate aligner for short DNA reads. It is
3140optimized for modern read lengths of 100 bases or higher, and takes advantage
3141of these reads to align data quickly through a hash-based indexing scheme.")
3142 (license license:asl2.0)))
3143
bcadaf00
BW
3144(define-public sortmerna
3145 (package
3146 (name "sortmerna")
3147 (version "2.0")
3148 (source
3149 (origin
3150 (method url-fetch)
3151 (uri (string-append
3152 "https://github.com/biocore/sortmerna/archive/"
3153 version ".tar.gz"))
3154 (file-name (string-append name "-" version ".tar.gz"))
3155 (sha256
3156 (base32
3157 "1670a92x1vvkacnvgr2i5xac3ls6lp4pc3n0bccnmllsnymggcf0"))))
3158 (build-system gnu-build-system)
3159 (outputs '("out" ;for binaries
3160 "db")) ;for sequence databases
3161 (arguments
3162 `(#:phases
3163 (modify-phases %standard-phases
3164 (replace 'install
3165 (lambda* (#:key outputs #:allow-other-keys)
3166 (let* ((out (assoc-ref outputs "out"))
3167 (bin (string-append out "/bin"))
3168 (db (assoc-ref outputs "db"))
3169 (share
3170 (string-append db "/share/sortmerna/rRNA_databases")))
3171 (install-file "sortmerna" bin)
3172 (install-file "indexdb_rna" bin)
3173 (for-each (lambda (file)
3174 (install-file file share))
3175 (find-files "rRNA_databases" ".*fasta"))
3176 #t))))))
3177 (home-page "http://bioinfo.lifl.fr/RNA/sortmerna")
3178 (synopsis "Biological sequence analysis tool for NGS reads")
3179 (description
3180 "SortMeRNA is a biological sequence analysis tool for filtering, mapping
3181and operational taxonomic unit (OTU) picking of next generation
3182sequencing (NGS) reads. The core algorithm is based on approximate seeds and
3183allows for fast and sensitive analyses of nucleotide sequences. The main
3184application of SortMeRNA is filtering rRNA from metatranscriptomic data.")
3185 (license license:lgpl3)))
3186
ce7155d5
RW
3187(define-public star
3188 (package
3189 (name "star")
3062d750 3190 (version "2.5.1b")
ce7155d5
RW
3191 (source (origin
3192 (method url-fetch)
3062d750
RW
3193 (uri (string-append "https://github.com/alexdobin/STAR/archive/"
3194 version ".tar.gz"))
3195 (file-name (string-append name "-" version ".tar.gz"))
ce7155d5
RW
3196 (sha256
3197 (base32
3062d750 3198 "0wzcfhkg10apnh0y73xlarfa79xxwxdizicbdl11wb48awk44iq4"))
ce7155d5
RW
3199 (modules '((guix build utils)))
3200 (snippet
3062d750
RW
3201 '(begin
3202 (substitute* "source/Makefile"
3203 (("/bin/rm") "rm"))
3204 ;; Remove pre-built binaries and bundled htslib sources.
3205 (delete-file-recursively "bin/MacOSX_x86_64")
3206 (delete-file-recursively "bin/Linux_x86_64")
3207 (delete-file-recursively "source/htslib")
3208 #t))))
ce7155d5
RW
3209 (build-system gnu-build-system)
3210 (arguments
3211 '(#:tests? #f ;no check target
3212 #:make-flags '("STAR")
3213 #:phases
c0266e8d
RW
3214 (modify-phases %standard-phases
3215 (add-after 'unpack 'enter-source-dir
3216 (lambda _ (chdir "source") #t))
3062d750
RW
3217 (add-after 'enter-source-dir 'do-not-use-bundled-htslib
3218 (lambda _
3219 (substitute* "Makefile"
3220 (("(Depend.list: \\$\\(SOURCES\\) parametersDefault\\.xxd) htslib"
3221 _ prefix) prefix))
3222 (substitute* '("BAMfunctions.cpp"
3223 "signalFromBAM.h"
3224 "bam_cat.h"
3225 "bam_cat.c"
3226 "STAR.cpp"
3227 "bamRemoveDuplicates.cpp")
3228 (("#include \"htslib/([^\"]+\\.h)\"" _ header)
3229 (string-append "#include <" header ">")))
3230 (substitute* "IncludeDefine.h"
3231 (("\"htslib/(htslib/[^\"]+.h)\"" _ header)
3232 (string-append "<" header ">")))
3233 #t))
c0266e8d
RW
3234 (replace 'install
3235 (lambda* (#:key outputs #:allow-other-keys)
3236 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
3237 (install-file "STAR" bin))
3238 #t))
3239 (delete 'configure))))
ce7155d5
RW
3240 (native-inputs
3241 `(("vim" ,vim))) ; for xxd
3242 (inputs
3062d750
RW
3243 `(("htslib" ,htslib)
3244 ("zlib" ,zlib)))
ce7155d5
RW
3245 (home-page "https://github.com/alexdobin/STAR")
3246 (synopsis "Universal RNA-seq aligner")
3247 (description
3248 "The Spliced Transcripts Alignment to a Reference (STAR) software is
3249based on a previously undescribed RNA-seq alignment algorithm that uses
3250sequential maximum mappable seed search in uncompressed suffix arrays followed
3251by seed clustering and stitching procedure. In addition to unbiased de novo
3252detection of canonical junctions, STAR can discover non-canonical splices and
3253chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
3254sequences.")
3255 ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
3256 (license license:gpl3+)))
de07c0db 3257
dbf4ed7c
RW
3258(define-public subread
3259 (package
3260 (name "subread")
3261 (version "1.4.6-p2")
3262 (source (origin
3263 (method url-fetch)
3264 (uri (string-append
3265 "mirror://sourceforge/subread/subread-"
3266 version "-source.tar.gz"))
3267 (sha256
3268 (base32
3269 "06sv9mpcsdj6p68y15d6gi70lca3lxmzk0dn61hg0kfsa7rxmsr3"))))
3270 (build-system gnu-build-system)
3271 (arguments
3272 `(#:tests? #f ;no "check" target
104c1986
RW
3273 ;; The CC and CCFLAGS variables are set to contain a lot of x86_64
3274 ;; optimizations by default, so we override these flags such that x86_64
3275 ;; flags are only added when the build target is an x86_64 system.
3276 #:make-flags
3277 (list (let ((system ,(or (%current-target-system)
3278 (%current-system)))
3279 (flags '("-ggdb" "-fomit-frame-pointer"
3280 "-ffast-math" "-funroll-loops"
3281 "-fmessage-length=0"
3282 "-O9" "-Wall" "-DMAKE_FOR_EXON"
3283 "-DMAKE_STANDALONE"
3284 "-DSUBREAD_VERSION=\\\"${SUBREAD_VERSION}\\\""))
3285 (flags64 '("-mmmx" "-msse" "-msse2" "-msse3")))
3286 (if (string-prefix? "x86_64" system)
3287 (string-append "CCFLAGS=" (string-join (append flags flags64)))
3288 (string-append "CCFLAGS=" (string-join flags))))
3289 "-f" "Makefile.Linux"
3290 "CC=gcc ${CCFLAGS}")
dbf4ed7c
RW
3291 #:phases
3292 (alist-cons-after
3293 'unpack 'enter-dir
3294 (lambda _ (chdir "src") #t)
3295 (alist-replace
3296 'install
3297 (lambda* (#:key outputs #:allow-other-keys)
3298 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
3299 (mkdir-p bin)
3300 (copy-recursively "../bin" bin)))
3301 ;; no "configure" script
3302 (alist-delete 'configure %standard-phases)))))
3303 (inputs `(("zlib" ,zlib)))
3304 (home-page "http://bioinf.wehi.edu.au/subread-package/")
3305 (synopsis "Tool kit for processing next-gen sequencing data")
3306 (description
3307 "The subread package contains the following tools: subread aligner, a
3308general-purpose read aligner; subjunc aligner: detecting exon-exon junctions
3309and mapping RNA-seq reads; featureCounts: counting mapped reads for genomic
3310features; exactSNP: a SNP caller that discovers SNPs by testing signals
3311against local background noises.")
3312 (license license:gpl3+)))
3313
de07c0db
RW
3314(define-public vcftools
3315 (package
3316 (name "vcftools")
3317 (version "0.1.12b")
3318 (source (origin
3319 (method url-fetch)
3320 (uri (string-append
3321 "mirror://sourceforge/vcftools/vcftools_"
3322 version ".tar.gz"))
3323 (sha256
3324 (base32
3325 "148al9h7f8g8my2qdnpax51kdd2yjrivlx6frvakf4lz5r8j88wx"))))
3326 (build-system gnu-build-system)
3327 (arguments
3328 `(#:tests? #f ; no "check" target
3329 #:make-flags (list
7c3958e1 3330 "CFLAGS=-O2" ; override "-m64" flag
de07c0db
RW
3331 (string-append "PREFIX=" (assoc-ref %outputs "out"))
3332 (string-append "MANDIR=" (assoc-ref %outputs "out")
3333 "/share/man/man1"))
3334 #:phases
3335 (alist-cons-after
3336 'unpack 'patch-manpage-install
3337 (lambda _
3338 (substitute* "Makefile"
3339 (("cp \\$\\{PREFIX\\}/cpp/vcftools.1") "cp ./cpp/vcftools.1")))
3340 (alist-delete 'configure %standard-phases))))
3341 (inputs
3342 `(("perl" ,perl)
3343 ("zlib" ,zlib)))
3344 (home-page "http://vcftools.sourceforge.net/")
3345 (synopsis "Tools for working with VCF files")
3346 (description
3347 "VCFtools is a program package designed for working with VCF files, such
3348as those generated by the 1000 Genomes Project. The aim of VCFtools is to
3349provide easily accessible methods for working with complex genetic variation
3350data in the form of VCF files.")
3351 ;; The license is declared as LGPLv3 in the README and
3352 ;; at http://vcftools.sourceforge.net/license.html
3353 (license license:lgpl3)))
9c38b540 3354
a2950fa4
BW
3355(define-public vsearch
3356 (package
3357 (name "vsearch")
3358 (version "1.4.1")
3359 (source
3360 (origin
3361 (method url-fetch)
3362 (uri (string-append
3363 "https://github.com/torognes/vsearch/archive/v"
3364 version ".tar.gz"))
3365 (file-name (string-append name "-" version ".tar.gz"))
3366 (sha256
3367 (base32
3368 "0b1359wbzgb2cm04h7dq05v80vik88hnsv298xxd1q1f2q4ydni7"))
3369 (modules '((guix build utils)))
3370 (snippet
3371 '(begin
3372 ;; Remove bundled cityhash and '-mtune=native'.
3373 (substitute* "src/Makefile.am"
3374 (("^AM_CXXFLAGS=-I\\$\\{srcdir\\}/cityhash \
3375-O3 -mtune=native -Wall -Wsign-compare")
3376 (string-append "AM_CXXFLAGS=-lcityhash"
3377 " -O3 -Wall -Wsign-compare"))
3378 (("^__top_builddir__bin_vsearch_SOURCES = cityhash/city.h \\\\")
3379 "__top_builddir__bin_vsearch_SOURCES = \\")
3380 (("^cityhash/config.h \\\\") "\\")
3381 (("^cityhash/city.cc \\\\") "\\"))
3382 (substitute* "src/vsearch.h"
3383 (("^\\#include \"cityhash/city.h\"")
3384 "#include <city.h>"))
3385 (delete-file-recursively "src/cityhash")
3386 #t))))
3387 (build-system gnu-build-system)
3388 (arguments
3389 `(#:phases
3390 (modify-phases %standard-phases
3391 (add-before 'configure 'autogen
3392 (lambda _ (zero? (system* "autoreconf" "-vif")))))))
3393 (inputs
3394 `(("zlib" ,zlib)
3395 ("bzip2" ,bzip2)
3396 ("cityhash" ,cityhash)))
3397 (native-inputs
3398 `(("autoconf" ,autoconf)
3399 ("automake" ,automake)))
3400 (synopsis "Sequence search tools for metagenomics")
3401 (description
3402 "VSEARCH supports DNA sequence searching, clustering, chimera detection,
3403dereplication, pairwise alignment, shuffling, subsampling, sorting and
3404masking. The tool takes advantage of parallelism in the form of SIMD
3405vectorization as well as multiple threads to perform accurate alignments at
3406high speed. VSEARCH uses an optimal global aligner (full dynamic programming
3407Needleman-Wunsch).")
3408 (home-page "https://github.com/torognes/vsearch")
6f04e515
BW
3409 ;; vsearch uses non-portable SSE intrinsics so building fails on other
3410 ;; platforms.
3411 (supported-systems '("x86_64-linux"))
a2950fa4
BW
3412 ;; Dual licensed; also includes public domain source.
3413 (license (list license:gpl3 license:bsd-2))))
3414
9c38b540
PP
3415(define-public bio-locus
3416 (package
3417 (name "bio-locus")
3418 (version "0.0.7")
3419 (source
3420 (origin
3421 (method url-fetch)
3422 (uri (rubygems-uri "bio-locus" version))
3423 (sha256
3424 (base32
3425 "02vmrxyimkj9sahsp4zhfhnmbvz6dbbqz1y01vglf8cbwvkajfl0"))))
3426 (build-system ruby-build-system)
3427 (native-inputs
3428 `(("ruby-rspec" ,ruby-rspec)))
3429 (synopsis "Tool for fast querying of genome locations")
3430 (description
3431 "Bio-locus is a tabix-like tool for fast querying of genome
3432locations. Many file formats in bioinformatics contain records that
3433start with a chromosome name and a position for a SNP, or a start-end
3434position for indels. Bio-locus allows users to store this chr+pos or
3435chr+pos+alt information in a database.")
3436 (home-page "https://github.com/pjotrp/bio-locus")
3437 (license license:expat)))
edb15985 3438
b2bddb07
PP
3439(define-public bio-blastxmlparser
3440 (package
3441 (name "bio-blastxmlparser")
3442 (version "2.0.4")
3443 (source (origin
3444 (method url-fetch)
3445 (uri (rubygems-uri "bio-blastxmlparser" version))
3446 (sha256
3447 (base32
3448 "1wf4qygcmdjgcqm6flmvsagfr1gs9lf63mj32qv3z1f481zc5692"))))
3449 (build-system ruby-build-system)
3450 (propagated-inputs
3451 `(("ruby-bio-logger" ,ruby-bio-logger)
3452 ("ruby-nokogiri" ,ruby-nokogiri)))
3453 (inputs
3454 `(("ruby-rspec" ,ruby-rspec)))
3455 (synopsis "Fast big data BLAST XML parser and library")
3456 (description
3457 "Very fast parallel big-data BLAST XML file parser which can be used as
3458command line utility. Use blastxmlparser to: Parse BLAST XML; filter output;
3459generate FASTA, JSON, YAML, RDF, JSON-LD, HTML, CSV, tabular output etc.")
3460 (home-page "http://github.com/pjotrp/blastxmlparser")
3461 (license license:expat)))
3462
edb15985
PP
3463(define-public bioruby
3464 (package
3465 (name "bioruby")
3466 (version "1.5.0")
3467 (source
3468 (origin
3469 (method url-fetch)
3470 (uri (rubygems-uri "bio" version))
3471 (sha256
3472 (base32
3473 "01k2fyjl5fpx4zn8g6gqiqvsg2j1fgixrs9p03vzxckynxdq3wmc"))))
3474 (build-system ruby-build-system)
3475 (propagated-inputs
3476 `(("ruby-libxml" ,ruby-libxml)))
3477 (native-inputs
3478 `(("which" ,which))) ; required for test phase
3479 (arguments
3480 `(#:phases
3481 (modify-phases %standard-phases
3482 (add-before 'build 'patch-test-command
3483 (lambda _
3484 (substitute* '("test/functional/bio/test_command.rb")
3485 (("/bin/sh") (which "sh")))
3486 (substitute* '("test/functional/bio/test_command.rb")
3487 (("/bin/ls") (which "ls")))
3488 (substitute* '("test/functional/bio/test_command.rb")
3489 (("which") (which "which")))
3490 (substitute* '("test/functional/bio/test_command.rb",
3491 "test/data/command/echoarg2.sh")
3492 (("/bin/echo") (which "echo")))
3493 #t)))))
3494 (synopsis "Ruby library, shell and utilities for bioinformatics")
3495 (description "BioRuby comes with a comprehensive set of Ruby development
3496tools and libraries for bioinformatics and molecular biology. BioRuby has
3497components for sequence analysis, pathway analysis, protein modelling and
3498phylogenetic analysis; it supports many widely used data formats and provides
3499easy access to databases, external programs and public web services, including
3500BLAST, KEGG, GenBank, MEDLINE and GO.")
3501 (home-page "http://bioruby.org/")
3502 ;; Code is released under Ruby license, except for setup
3503 ;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+)
3504 (license (list license:ruby license:lgpl2.1+ license:gpl2+ ))))
a5002ae7 3505
9fba89e8
RW
3506(define-public r-acsnminer
3507 (package
3508 (name "r-acsnminer")
3509 (version "0.15.11")
3510 (source (origin
3511 (method url-fetch)
3512 (uri (cran-uri "ACSNMineR" version))
3513 (sha256
3514 (base32
3515 "1dl4drhjyazwm9wxlm8yfppwvvj4h6jxwmz8kfw5bxpb3jdnsqvy"))))
3516 (properties `((upstream-name . "ACSNMineR")))
3517 (build-system r-build-system)
3518 (propagated-inputs
3519 `(("r-ggplot2" ,r-ggplot2)
3520 ("r-gridextra" ,r-gridextra)))
3521 (home-page "http://cran.r-project.org/web/packages/ACSNMineR")
3522 (synopsis "Gene enrichment analysis")
3523 (description
3524 "This package provides tools to compute and represent gene set enrichment
3525or depletion from your data based on pre-saved maps from the @dfn{Atlas of
3526Cancer Signalling Networks} (ACSN) or user imported maps. The gene set
3527enrichment can be run with hypergeometric test or Fisher exact test, and can
3528use multiple corrections. Visualization of data can be done either by
3529barplots or heatmaps.")
3530 (license license:gpl2+)))
3531
d29b25c4
RW
3532(define-public r-biocgenerics
3533 (package
3534 (name "r-biocgenerics")
3535 (version "0.16.1")
3536 (source (origin
3537 (method url-fetch)
3538 (uri (bioconductor-uri "BiocGenerics" version))
3539 (sha256
3540 (base32
3541 "0f16ryy5f012hvksrwlmm33bcl7lw97i2jvhbnwfwl03j4w7nhc1"))))
3542 (properties
3543 `((upstream-name . "BiocGenerics")
3544 (r-repository . bioconductor)))
3545 (build-system r-build-system)
3546 (home-page "http://bioconductor.org/packages/BiocGenerics")
3547 (synopsis "S4 generic functions for Bioconductor")
3548 (description
3549 "This package provides S4 generic functions needed by many Bioconductor
3550packages.")
3551 (license license:artistic2.0)))
3552
7485129e
RW
3553(define-public r-s4vectors
3554 (package
3555 (name "r-s4vectors")
3556 (version "0.8.5")
3557 (source (origin
3558 (method url-fetch)
3559 (uri (bioconductor-uri "S4Vectors" version))
3560 (sha256
3561 (base32
3562 "10f4jxwlwsiy7zhb3kgp6anid0d7wkvrrljl80r3nhx38yr24l5k"))))
3563 (properties
3564 `((upstream-name . "S4Vectors")
3565 (r-repository . bioconductor)))
3566 (build-system r-build-system)
3567 (propagated-inputs
3568 `(("r-biocgenerics" ,r-biocgenerics)))
3569 (home-page "http://bioconductor.org/packages/S4Vectors")
3570 (synopsis "S4 implementation of vectors and lists")
3571 (description
3572 "The S4Vectors package defines the @code{Vector} and @code{List} virtual
3573classes and a set of generic functions that extend the semantic of ordinary
3574vectors and lists in R. Package developers can easily implement vector-like
3575or list-like objects as concrete subclasses of @code{Vector} or @code{List}.
3576In addition, a few low-level concrete subclasses of general interest (e.g.
3577@code{DataFrame}, @code{Rle}, and @code{Hits}) are implemented in the
3578S4Vectors package itself.")
3579 (license license:artistic2.0)))
3580
78addcb0
RW
3581(define-public r-iranges
3582 (package
3583 (name "r-iranges")
3584 (version "2.4.6")
3585 (source (origin
3586 (method url-fetch)
3587 (uri (bioconductor-uri "IRanges" version))
3588 (sha256
3589 (base32
3590 "00x0266sys1fc5ipa639y84p6m6mgspk2xb099vcwmd3w4hypj9d"))))
3591 (properties
3592 `((upstream-name . "IRanges")
3593 (r-repository . bioconductor)))
3594 (build-system r-build-system)
3595 (propagated-inputs
3596 `(("r-biocgenerics" ,r-biocgenerics)
3597 ("r-s4vectors" ,r-s4vectors)))
3598 (home-page "http://bioconductor.org/packages/IRanges")
3599 (synopsis "Infrastructure for manipulating intervals on sequences")
3600 (description
3601 "This package provides efficient low-level and highly reusable S4 classes
3602for storing ranges of integers, RLE vectors (Run-Length Encoding), and, more
3603generally, data that can be organized sequentially (formally defined as
3604@code{Vector} objects), as well as views on these @code{Vector} objects.
3605Efficient list-like classes are also provided for storing big collections of
3606instances of the basic classes. All classes in the package use consistent
3607naming and share the same rich and consistent \"Vector API\" as much as
3608possible.")
3609 (license license:artistic2.0)))
3610
bf7764b7
RW
3611(define-public r-genomeinfodb
3612 (package
3613 (name "r-genomeinfodb")
3614 (version "1.6.1")
3615 (source (origin
3616 (method url-fetch)
3617 (uri (bioconductor-uri "GenomeInfoDb" version))
3618 (sha256
3619 (base32
3620 "1j2n1v1mrw1fxn7cyffz112pm76wd6gy9q9qwlsfv3brbsqbvdbf"))))
3621 (properties
3622 `((upstream-name . "GenomeInfoDb")
3623 (r-repository . bioconductor)))
3624 (build-system r-build-system)
3625 (propagated-inputs
3626 `(("r-biocgenerics" ,r-biocgenerics)
3627 ("r-iranges" ,r-iranges)
3628 ("r-s4vectors" ,r-s4vectors)))
3629 (home-page "http://bioconductor.org/packages/GenomeInfoDb")
3630 (synopsis "Utilities for manipulating chromosome identifiers")
3631 (description
3632 "This package contains data and functions that define and allow
3633translation between different chromosome sequence naming conventions (e.g.,
3634\"chr1\" versus \"1\"), including a function that attempts to place sequence
3635names in their natural, rather than lexicographic, order.")
3636 (license license:artistic2.0)))
3637
0e7d5560
RW
3638(define-public r-xvector
3639 (package
3640 (name "r-xvector")
3641 (version "0.10.0")
3642 (source (origin
3643 (method url-fetch)
3644 (uri (bioconductor-uri "XVector" version))
3645 (sha256
3646 (base32
3647 "0havwyr6xqk7w0rmbwfj9jq1djz7wzdz7w39adhklwzwz9l4ih3a"))))
3648 (properties
3649 `((upstream-name . "XVector")
3650 (r-repository . bioconductor)))
3651 (build-system r-build-system)
3652 (arguments
3653 `(#:phases
3654 (modify-phases %standard-phases
3655 (add-after 'unpack 'use-system-zlib
3656 (lambda _
3657 (substitute* "DESCRIPTION"
3658 (("zlibbioc, ") ""))
3659 (substitute* "NAMESPACE"
3660 (("import\\(zlibbioc\\)") ""))
3661 #t)))))
3662 (inputs
3663 `(("zlib" ,zlib)))
3664 (propagated-inputs
3665 `(("r-biocgenerics" ,r-biocgenerics)
3666 ("r-iranges" ,r-iranges)
3667 ("r-s4vectors" ,r-s4vectors)))
3668 (home-page "http://bioconductor.org/packages/XVector")
3669 (synopsis "Representation and manpulation of external sequences")
3670 (description
3671 "This package provides memory efficient S4 classes for storing sequences
3672\"externally\" (behind an R external pointer, or on disk).")
3673 (license license:artistic2.0)))
3674
e2cd1d0f
RW
3675(define-public r-genomicranges
3676 (package
3677 (name "r-genomicranges")
3678 (version "1.22.2")
3679 (source (origin
3680 (method url-fetch)
3681 (uri (bioconductor-uri "GenomicRanges" version))
3682 (sha256
3683 (base32
3684 "1jffvcs0jsi7q4l3pvjj6r73vll80csgkljvhqp0g2ixc43jjng9"))))
3685 (properties
3686 `((upstream-name . "GenomicRanges")
3687 (r-repository . bioconductor)))
3688 (build-system r-build-system)
3689 (propagated-inputs
3690 `(("r-biocgenerics" ,r-biocgenerics)
3691 ("r-genomeinfodb" ,r-genomeinfodb)
3692 ("r-xvector" ,r-xvector)))
3693 (home-page "http://bioconductor.org/packages/GenomicRanges")
3694 (synopsis "Representation and manipulation of genomic intervals")
3695 (description
3696 "This package provides tools to efficiently represent and manipulate
3697genomic annotations and alignments is playing a central role when it comes to
3698analyzing high-throughput sequencing data (a.k.a. NGS data). The
3699GenomicRanges package defines general purpose containers for storing and
3700manipulating genomic intervals and variables defined along a genome.")
3701 (license license:artistic2.0)))
3702
555e3399
RW
3703(define-public r-biobase
3704 (package
3705 (name "r-biobase")
3706 (version "2.30.0")
3707 (source (origin
3708 (method url-fetch)
3709 (uri (bioconductor-uri "Biobase" version))
3710 (sha256
3711 (base32
3712 "1qasjpq3kw8h7qw8cin3bjvv1256hqr1mm24fq3v0ymxzlb66szi"))))
3713 (properties
3714 `((upstream-name . "Biobase")))
3715 (build-system r-build-system)
3716 (propagated-inputs
3717 `(("r-biocgenerics" ,r-biocgenerics)))
3718 (home-page "http://bioconductor.org/packages/Biobase")
3719 (synopsis "Base functions for Bioconductor")
3720 (description
3721 "This package provides functions that are needed by many other packages
3722on Bioconductor or which replace R functions.")
3723 (license license:artistic2.0)))
3724
8b7bce74
RW
3725(define-public r-annotationdbi
3726 (package
3727 (name "r-annotationdbi")
3728 (version "1.32.2")
3729 (source (origin
3730 (method url-fetch)
3731 (uri (bioconductor-uri "AnnotationDbi" version))
3732 (sha256
3733 (base32
3734 "08ncdjvq0l44kqyiv32kn9wnbw1xgfb6qjfzfbjpqrcfp1jygz9j"))))
3735 (properties
3736 `((upstream-name . "AnnotationDbi")))
3737 (build-system r-build-system)
3738 (propagated-inputs
3739 `(("r-biobase" ,r-biobase)
3740 ("r-biocgenerics" ,r-biocgenerics)
3741 ("r-dbi" ,r-dbi)
3742 ("r-iranges" ,r-iranges)
3743 ("r-rsqlite" ,r-rsqlite)
3744 ("r-s4vectors" ,r-s4vectors)))
3745 (home-page "http://bioconductor.org/packages/AnnotationDbi")
3746 (synopsis "Annotation database interface")
3747 (description
3748 "This package provides user interface and database connection code for
3749annotation data packages using SQLite data storage.")
3750 (license license:artistic2.0)))
3751
c465fa72
RW
3752(define-public r-biomart
3753 (package
3754 (name "r-biomart")
3755 (version "2.26.1")
3756 (source (origin
3757 (method url-fetch)
3758 (uri (bioconductor-uri "biomaRt" version))
3759 (sha256
3760 (base32
3761 "1s709055abj2gd35g6nnk5d2ai5ii09iir270l2xika6pi62gj3f"))))
3762 (properties
3763 `((upstream-name . "biomaRt")))
3764 (build-system r-build-system)
3765 (propagated-inputs
3766 `(("r-annotationdbi" ,r-annotationdbi)
3767 ("r-rcurl" ,r-rcurl)
3768 ("r-xml" ,r-xml)))
3769 (home-page "http://bioconductor.org/packages/biomaRt")
3770 (synopsis "Interface to BioMart databases")
3771 (description
3772 "biomaRt provides an interface to a growing collection of databases
3773implementing the @url{BioMart software suite, http://www.biomart.org}. The
3774package enables retrieval of large amounts of data in a uniform way without
3775the need to know the underlying database schemas or write complex SQL queries.
3776Examples of BioMart databases are Ensembl, COSMIC, Uniprot, HGNC, Gramene,
3777Wormbase and dbSNP mapped to Ensembl. These major databases give biomaRt
3778users direct access to a diverse set of data and enable a wide range of
3779powerful online queries from gene annotation to database mining.")
3780 (license license:artistic2.0)))
3781
e91d362e
RW
3782(define-public r-biocparallel
3783 (package
3784 (name "r-biocparallel")
3785 (version "1.4.3")
3786 (source (origin
3787 (method url-fetch)
3788 (uri (bioconductor-uri "BiocParallel" version))
3789 (sha256
3790 (base32
3791 "1f5mndx66vampcsq0n66afg6x851crl0h3nyv2nyp9bsgzj9cdzq"))))
3792 (properties
3793 `((upstream-name . "BiocParallel")))
3794 (build-system r-build-system)
3795 (propagated-inputs
3796 `(("r-futile-logger" ,r-futile-logger)
3797 ("r-snow" ,r-snow)))
3798 (home-page "http://bioconductor.org/packages/BiocParallel")
3799 (synopsis "Bioconductor facilities for parallel evaluation")
3800 (description
3801 "This package provides modified versions and novel implementation of
3802functions for parallel evaluation, tailored to use with Bioconductor
3803objects.")
3804 (license (list license:gpl2+ license:gpl3+))))
3805
bf159353
RW
3806(define-public r-biostrings
3807 (package
3808 (name "r-biostrings")
3809 (version "2.38.2")
3810 (source (origin
3811 (method url-fetch)
3812 (uri (bioconductor-uri "Biostrings" version))
3813 (sha256
3814 (base32
3815 "1afp9szc8ci6jn0m3hrrqh6df65cpw3v1dcnl6xir3d3m3lwwmk4"))))
3816 (properties
3817 `((upstream-name . "Biostrings")))
3818 (build-system r-build-system)
3819 (propagated-inputs
3820 `(("r-biocgenerics" ,r-biocgenerics)
3821 ("r-iranges" ,r-iranges)
3822 ("r-s4vectors" ,r-s4vectors)
3823 ("r-xvector" ,r-xvector)))
3824 (home-page "http://bioconductor.org/packages/Biostrings")
3825 (synopsis "String objects and algorithms for biological sequences")
3826 (description
3827 "This package provides memory efficient string containers, string
3828matching algorithms, and other utilities, for fast manipulation of large
3829biological sequences or sets of sequences.")
3830 (license license:artistic2.0)))
3831
f8d74f70
RW
3832(define-public r-rsamtools
3833 (package
3834 (name "r-rsamtools")
3835 (version "1.22.0")
3836 (source (origin
3837 (method url-fetch)
3838 (uri (bioconductor-uri "Rsamtools" version))
3839 (sha256
3840 (base32
3841 "1yc3nzzms3igjwr4l9yd3wdac95glcs08b4cfp7disyly0wcskjd"))))
3842 (properties
3843 `((upstream-name . "Rsamtools")))
3844 (build-system r-build-system)
3845 (arguments
3846 `(#:phases
3847 (modify-phases %standard-phases
3848 (add-after 'unpack 'use-system-zlib
3849 (lambda _
3850 (substitute* "DESCRIPTION"
3851 (("zlibbioc, ") ""))
3852 (substitute* "NAMESPACE"
3853 (("import\\(zlibbioc\\)") ""))
3854 #t)))))
3855 (inputs
3856 `(("zlib" ,zlib)))
3857 (propagated-inputs
3858 `(("r-biocgenerics" ,r-biocgenerics)
3859 ("r-biocparallel" ,r-biocparallel)
3860 ("r-biostrings" ,r-biostrings)
3861 ("r-bitops" ,r-bitops)
3862 ("r-genomeinfodb" ,r-genomeinfodb)
3863 ("r-genomicranges" ,r-genomicranges)
3864 ("r-iranges" ,r-iranges)
3865 ("r-s4vectors" ,r-s4vectors)
3866 ("r-xvector" ,r-xvector)))
3867 (home-page "http://bioconductor.org/packages/release/bioc/html/Rsamtools.html")
3868 (synopsis "Interface to samtools, bcftools, and tabix")
3869 (description
3870 "This package provides an interface to the 'samtools', 'bcftools', and
3871'tabix' utilities for manipulating SAM (Sequence Alignment / Map), FASTA,
3872binary variant call (BCF) and compressed indexed tab-delimited (tabix)
3873files.")
3874 (license license:expat)))
3875
6e76dda2
RW
3876(define-public r-summarizedexperiment
3877 (package
3878 (name "r-summarizedexperiment")
3879 (version "1.0.1")
3880 (source (origin
3881 (method url-fetch)
3882 (uri (bioconductor-uri "SummarizedExperiment" version))
3883 (sha256
3884 (base32
3885 "0w1dwp99p6i7sc3cn0ir3dr8ksgxwjf16675h5i8n6gbv4rl9lz6"))))
3886 (properties
3887 `((upstream-name . "SummarizedExperiment")))
3888 (build-system r-build-system)
3889 (propagated-inputs
3890 `(("r-biobase" ,r-biobase)
3891 ("r-biocgenerics" ,r-biocgenerics)
3892 ("r-genomeinfodb" ,r-genomeinfodb)
3893 ("r-genomicranges" ,r-genomicranges)
3894 ("r-iranges" ,r-iranges)
3895 ("r-s4vectors" ,r-s4vectors)))
3896 (home-page "http://bioconductor.org/packages/SummarizedExperiment")
3897 (synopsis "Container for representing genomic ranges by sample")
3898 (description
3899 "The SummarizedExperiment container contains one or more assays, each
3900represented by a matrix-like object of numeric or other mode. The rows
3901typically represent genomic ranges of interest and the columns represent
3902samples.")
3903 (license license:artistic2.0)))
3904
d8a828af
RW
3905(define-public r-genomicalignments
3906 (package
3907 (name "r-genomicalignments")
3908 (version "1.6.1")
3909 (source (origin
3910 (method url-fetch)
3911 (uri (bioconductor-uri "GenomicAlignments" version))
3912 (sha256
3913 (base32
3914 "03pxzkmwcpl0d7a09ahan0nllfv7qw2i7w361w6af2s4n3xwrniz"))))
3915 (properties
3916 `((upstream-name . "GenomicAlignments")))
3917 (build-system r-build-system)
3918 (propagated-inputs
3919 `(("r-biocgenerics" ,r-biocgenerics)
3920 ("r-biocparallel" ,r-biocparallel)
3921 ("r-biostrings" ,r-biostrings)
3922 ("r-genomeinfodb" ,r-genomeinfodb)
3923 ("r-genomicranges" ,r-genomicranges)
3924 ("r-iranges" ,r-iranges)
3925 ("r-rsamtools" ,r-rsamtools)
3926 ("r-s4vectors" ,r-s4vectors)
3927 ("r-summarizedexperiment" ,r-summarizedexperiment)))
3928 (home-page "http://bioconductor.org/packages/GenomicAlignments")
3929 (synopsis "Representation and manipulation of short genomic alignments")
3930 (description
3931 "This package provides efficient containers for storing and manipulating
3932short genomic alignments (typically obtained by aligning short reads to a
3933reference genome). This includes read counting, computing the coverage,
3934junction detection, and working with the nucleotide content of the
3935alignments.")
3936 (license license:artistic2.0)))
3937
317755ff
RW
3938(define-public r-rtracklayer
3939 (package
3940 (name "r-rtracklayer")
3941 (version "1.30.1")
3942 (source (origin
3943 (method url-fetch)
3944 (uri (bioconductor-uri "rtracklayer" version))
3945 (sha256
3946 (base32
3947 "1if31hg56islx5vwydpgs5gkyas26kyvv2ljv1c7jikpm62w14qv"))))
3948 (build-system r-build-system)
3949 (arguments
3950 `(#:phases
3951 (modify-phases %standard-phases
3952 (add-after 'unpack 'use-system-zlib
3953 (lambda _
3954 (substitute* "DESCRIPTION"
3955 (("zlibbioc, ") ""))
3956 (substitute* "NAMESPACE"
3957 (("import\\(zlibbioc\\)") ""))
3958 #t)))))
3959 (inputs
3960 `(("zlib" ,zlib)))
3961 (propagated-inputs
3962 `(("r-biocgenerics" ,r-biocgenerics)
3963 ("r-biostrings" ,r-biostrings)
3964 ("r-genomeinfodb" ,r-genomeinfodb)
3965 ("r-genomicalignments" ,r-genomicalignments)
3966 ("r-genomicranges" ,r-genomicranges)
3967 ("r-iranges" ,r-iranges)
3968 ("r-rcurl" ,r-rcurl)
3969 ("r-rsamtools" ,r-rsamtools)
3970 ("r-s4vectors" ,r-s4vectors)
3971 ("r-xml" ,r-xml)
3972 ("r-xvector" ,r-xvector)))
3973 (home-page "http://bioconductor.org/packages/rtracklayer")
3974 (synopsis "R interface to genome browsers and their annotation tracks")
3975 (description
3976 "rtracklayer is an extensible framework for interacting with multiple
3977genome browsers (currently UCSC built-in) and manipulating annotation tracks
3978in various formats (currently GFF, BED, bedGraph, BED15, WIG, BigWig and 2bit
3979built-in). The user may export/import tracks to/from the supported browsers,
3980as well as query and modify the browser state, such as the current viewport.")
3981 (license license:artistic2.0)))
3982
2fd7c049
RW
3983(define-public r-genomicfeatures
3984 (package
3985 (name "r-genomicfeatures")
3986 (version "1.22.7")
3987 (source (origin
3988 (method url-fetch)
3989 (uri (bioconductor-uri "GenomicFeatures" version))
3990 (sha256
3991 (base32
3992 "1jb4s49ar5j9qslpd3kfdg2wrl4q7ciysd55h9a7zvspymxcngq8"))))
3993 (properties
3994 `((upstream-name . "GenomicFeatures")))
3995 (build-system r-build-system)
3996 (propagated-inputs
3997 `(("r-annotationdbi" ,r-annotationdbi)
3998 ("r-biobase" ,r-biobase)
3999 ("r-biocgenerics" ,r-biocgenerics)
4000 ("r-biomart" ,r-biomart)
4001 ("r-biostrings" ,r-biostrings)
4002 ("r-dbi" ,r-dbi)
4003 ("r-genomeinfodb" ,r-genomeinfodb)
4004 ("r-genomicranges" ,r-genomicranges)
4005 ("r-iranges" ,r-iranges)
4006 ("r-rcurl" ,r-rcurl)
4007 ("r-rsqlite" ,r-rsqlite)
4008 ("r-rtracklayer" ,r-rtracklayer)
4009 ("r-s4vectors" ,r-s4vectors)
4010 ("r-xvector" ,r-xvector)))
4011 (home-page "http://bioconductor.org/packages/GenomicFeatures")
4012 (synopsis "Tools for working with transcript centric annotations")
4013 (description
4014 "This package provides a set of tools and methods for making and
4015manipulating transcript centric annotations. With these tools the user can
4016easily download the genomic locations of the transcripts, exons and cds of a
4017given organism, from either the UCSC Genome Browser or a BioMart
4018database (more sources will be supported in the future). This information is
4019then stored in a local database that keeps track of the relationship between
4020transcripts, exons, cds and genes. Flexible methods are provided for
4021extracting the desired features in a convenient format.")
4022 (license license:artistic2.0)))
4023
fb25d880
RW
4024(define-public r-go-db
4025 (package
4026 (name "r-go-db")
4027 (version "3.2.2")
4028 (source (origin
4029 (method url-fetch)
4030 (uri (bioconductor-uri "GO.db" version))
4031 (sha256
4032 (base32
4033 "00gariag9ampz82dh0xllrc26r85d7vdcwc0vca5zdy147rwxr7f"))))
4034 (properties
4035 `((upstream-name . "GO.db")))
4036 (build-system r-build-system)
3141b83d
RW
4037 (propagated-inputs
4038 `(("r-annotationdbi" ,r-annotationdbi)))
fb25d880
RW
4039 (home-page "http://bioconductor.org/packages/GO.db")
4040 (synopsis "Annotation maps describing the entire Gene Ontology")
4041 (description
4042 "The purpose of this GO.db annotation package is to provide detailed
4043information about the latest version of the Gene Ontologies.")
4044 (license license:artistic2.0)))
4045
d547ce5e
RW
4046(define-public r-topgo
4047 (package
4048 (name "r-topgo")
4049 (version "2.22.0")
4050 (source (origin
4051 (method url-fetch)
4052 (uri (bioconductor-uri "topGO" version))
4053 (sha256
4054 (base32
4055 "029j9nb39b8l9xlzsp83pmjr8ap247aia387yzaa1yyw8klapdaf"))))
4056 (properties
4057 `((upstream-name . "topGO")))
4058 (build-system r-build-system)
4059 (propagated-inputs
4060 `(("r-annotationdbi" ,r-annotationdbi)
4061 ("r-biobase" ,r-biobase)
4062 ("r-biocgenerics" ,r-biocgenerics)
4063 ("r-go-db" ,r-go-db)
4064 ("r-sparsem" ,r-sparsem)))
4065 (home-page "http://bioconductor.org/packages/topGO")
4066 (synopsis "Enrichment analysis for gene ontology")
4067 (description
4068 "The topGO package provides tools for testing @dfn{gene ontology} (GO)
4069terms while accounting for the topology of the GO graph. Different test
4070statistics and different methods for eliminating local similarities and
4071dependencies between GO terms can be implemented and applied.")
4072 ;; Any version of the LGPL applies.
4073 (license license:lgpl2.1+)))
4074
c63cef66
RW
4075(define-public r-bsgenome
4076 (package
4077 (name "r-bsgenome")
4078 (version "1.38.0")
4079 (source (origin
4080 (method url-fetch)
4081 (uri (bioconductor-uri "BSgenome" version))
4082 (sha256
4083 (base32
4084 "130w0m6q8kkca7gyz1aqj5jjhalwvwi6rk2yvbjrnj4gpnncyrd2"))))
4085 (properties
4086 `((upstream-name . "BSgenome")))
4087 (build-system r-build-system)
4088 (propagated-inputs
4089 `(("r-biocgenerics" ,r-biocgenerics)
4090 ("r-biostrings" ,r-biostrings)
4091 ("r-genomeinfodb" ,r-genomeinfodb)
4092 ("r-genomicranges" ,r-genomicranges)
4093 ("r-iranges" ,r-iranges)
4094 ("r-rsamtools" ,r-rsamtools)
4095 ("r-rtracklayer" ,r-rtracklayer)
4096 ("r-s4vectors" ,r-s4vectors)
4097 ("r-xvector" ,r-xvector)))
4098 (home-page "http://bioconductor.org/packages/BSgenome")
4099 (synopsis "Infrastructure for Biostrings-based genome data packages")
4100 (description
4101 "This package provides infrastructure shared by all Biostrings-based
4102genome data packages and support for efficient SNP representation.")
4103 (license license:artistic2.0)))
4104
c43a011d
RW
4105(define-public r-impute
4106 (package
4107 (name "r-impute")
4108 (version "1.44.0")
4109 (source (origin
4110 (method url-fetch)
4111 (uri (bioconductor-uri "impute" version))
4112 (sha256
4113 (base32
4114 "0y4x5jk7gsf4xn56jrkdcdnxpcfll4h6ivncd7n4snmzixldvmvw"))))
4115 (inputs
4116 `(("gfortran" ,gfortran)))
4117 (build-system r-build-system)
4118 (home-page "http://bioconductor.org/packages/impute")
4119 (synopsis "Imputation for microarray data")
4120 (description
4121 "This package provides a function to impute missing gene expression
4122microarray data, using nearest neighbor averaging.")
4123 (license license:gpl2+)))
4124
03ea5a35
RW
4125(define-public r-seqpattern
4126 (package
4127 (name "r-seqpattern")
4128 (version "1.2.0")
4129 (source (origin
4130 (method url-fetch)
4131 (uri (bioconductor-uri "seqPattern" version))
4132 (sha256
4133 (base32
4134 "0p9zj6bic7sa0hb2bjm988kkk5n9r1kvlbqkzvy702f642n0j53i"))))
4135 (properties
4136 `((upstream-name . "seqPattern")))
4137 (build-system r-build-system)
4138 (propagated-inputs
4139 `(("r-biostrings" ,r-biostrings)
4140 ("r-genomicranges" ,r-genomicranges)
4141 ("r-iranges" ,r-iranges)
4142 ("r-plotrix" ,r-plotrix)))
4143 (home-page "http://bioconductor.org/packages/seqPattern")
4144 (synopsis "Visualising oligonucleotide patterns and motif occurrences")
4145 (description
4146 "This package provides tools to visualize oligonucleotide patterns and
4147sequence motif occurrences across a large set of sequences centred at a common
4148reference point and sorted by a user defined feature.")
4149 (license license:gpl3+)))
4150
cb933df6
RW
4151(define-public r-genomation
4152 (package
4153 (name "r-genomation")
4154 (version "1.2.1")
4155 (source (origin
4156 (method url-fetch)
4157 (uri (bioconductor-uri "genomation" version))
4158 (sha256
4159 (base32
4160 "1mzs995snwim13qk9kz4q3nczpnbsy1allwp4whfq0cflg2mndfr"))))
4161 (build-system r-build-system)
4162 (propagated-inputs
4163 `(("r-biostrings" ,r-biostrings)
4164 ("r-bsgenome" ,r-bsgenome)
4165 ("r-data-table" ,r-data-table)
4166 ("r-genomeinfodb" ,r-genomeinfodb)
4167 ("r-genomicalignments" ,r-genomicalignments)
4168 ("r-genomicranges" ,r-genomicranges)
4169 ("r-ggplot2" ,r-ggplot2)
4170 ("r-gridbase" ,r-gridbase)
4171 ("r-impute" ,r-impute)
4172 ("r-iranges" ,r-iranges)
4173 ("r-matrixstats" ,r-matrixstats)
4174 ("r-plotrix" ,r-plotrix)
4175 ("r-plyr" ,r-plyr)
4176 ("r-readr" ,r-readr)
4177 ("r-reshape2" ,r-reshape2)
4178 ("r-rsamtools" ,r-rsamtools)
4179 ("r-rtracklayer" ,r-rtracklayer)
4180 ("r-seqpattern" ,r-seqpattern)))
4181 (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
4182 (synopsis "Summary, annotation and visualization of genomic data")
4183 (description
4184 "This package provides a package for summary and annotation of genomic
4185intervals. Users can visualize and quantify genomic intervals over
4186pre-defined functional regions, such as promoters, exons, introns, etc. The
4187genomic intervals represent regions with a defined chromosome position, which
4188may be associated with a score, such as aligned reads from HT-seq experiments,
4189TF binding sites, methylation scores, etc. The package can use any tabular
4190genomic feature data as long as it has minimal information on the locations of
4191genomic intervals. In addition, it can use BAM or BigWig files as input.")
4192 (license license:artistic2.0)))
4193
a5002ae7
AE
4194(define-public r-qtl
4195 (package
4196 (name "r-qtl")
89709287 4197 (version "1.38-4")
a5002ae7
AE
4198 (source
4199 (origin
4200 (method url-fetch)
4201 (uri (string-append "mirror://cran/src/contrib/qtl_"
4202 version ".tar.gz"))
4203 (sha256
4204 (base32
89709287 4205 "0rv9xhp8lyldpgwxqirhyjqvg07dr5x4x1x2jpyj37dada9ccyx3"))))
a5002ae7
AE
4206 (build-system r-build-system)
4207 (home-page "http://rqtl.org/")
4208 (synopsis "R package for analyzing QTL experiments in genetics")
4209 (description "R/qtl is an extension library for the R statistics
4210system. It is used to analyze experimental crosses for identifying
4211genes contributing to variation in quantitative traits (so-called
4212quantitative trait loci, QTLs).
4213
4214Using a hidden Markov model, R/qtl allows to estimate genetic maps, to
4215identify genotyping errors, and to perform single-QTL and two-QTL,
4216two-dimensional genome scans.")
4217 (license license:gpl3)))