guix system: Restore load path after running the activation script.
[jackhill/guix/guix.git] / gnu / packages / bioinformatics.scm
CommitLineData
cf6edaba 1
4e10a221 2;;; GNU Guix --- Functional package management for GNU
0047d26a 3;;; Copyright © 2014, 2015, 2016 Ricardo Wurmus <rekado@elephly.net>
9b9b7ffd 4;;; Copyright © 2015, 2016 Ben Woodcroft <donttrustben@gmail.com>
a5002ae7
AE
5;;; Copyright © 2015 Pjotr Prins <pjotr.guix@thebird.nl>
6;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
4e10a221
RW
7;;;
8;;; This file is part of GNU Guix.
9;;;
10;;; GNU Guix is free software; you can redistribute it and/or modify it
11;;; under the terms of the GNU General Public License as published by
12;;; the Free Software Foundation; either version 3 of the License, or (at
13;;; your option) any later version.
14;;;
15;;; GNU Guix is distributed in the hope that it will be useful, but
16;;; WITHOUT ANY WARRANTY; without even the implied warranty of
17;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;;; GNU General Public License for more details.
19;;;
20;;; You should have received a copy of the GNU General Public License
21;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
22
23(define-module (gnu packages bioinformatics)
24 #:use-module ((guix licenses) #:prefix license:)
25 #:use-module (guix packages)
8e913213 26 #:use-module (guix utils)
4e10a221 27 #:use-module (guix download)
2c16316e 28 #:use-module (guix git-download)
4e10a221 29 #:use-module (guix build-system gnu)
d7678942 30 #:use-module (guix build-system cmake)
365c8153 31 #:use-module (guix build-system perl)
8622a072 32 #:use-module (guix build-system python)
a5002ae7 33 #:use-module (guix build-system r)
9c38b540 34 #:use-module (guix build-system ruby)
d3517eda 35 #:use-module (guix build-system trivial)
4e10a221 36 #:use-module (gnu packages)
a2950fa4 37 #:use-module (gnu packages autotools)
684bf7c7 38 #:use-module (gnu packages algebra)
d3517eda 39 #:use-module (gnu packages base)
e4e5a4d8 40 #:use-module (gnu packages boost)
4e10a221 41 #:use-module (gnu packages compression)
82c370de 42 #:use-module (gnu packages cpio)
1baee943 43 #:use-module (gnu packages curl)
75dd2424 44 #:use-module (gnu packages file)
02f35bb5 45 #:use-module (gnu packages gawk)
2409f37f 46 #:use-module (gnu packages gcc)
15a3c3d4 47 #:use-module (gnu packages java)
51c64999 48 #:use-module (gnu packages linux)
36742f43 49 #:use-module (gnu packages machine-learning)
c833ab55 50 #:use-module (gnu packages maths)
6c2b26e2 51 #:use-module (gnu packages mpi)
4e10a221 52 #:use-module (gnu packages ncurses)
81f3e0c1 53 #:use-module (gnu packages pcre)
4e10a221
RW
54 #:use-module (gnu packages perl)
55 #:use-module (gnu packages pkg-config)
bfe3c685 56 #:use-module (gnu packages popt)
e4e5a4d8 57 #:use-module (gnu packages protobuf)
346a829a 58 #:use-module (gnu packages python)
9c38b540 59 #:use-module (gnu packages ruby)
c833ab55 60 #:use-module (gnu packages statistics)
d7678942 61 #:use-module (gnu packages tbb)
2127cedb 62 #:use-module (gnu packages textutils)
43c565d2 63 #:use-module (gnu packages time)
a2950fa4 64 #:use-module (gnu packages tls)
ce7155d5 65 #:use-module (gnu packages vim)
365c8153 66 #:use-module (gnu packages web)
c833ab55 67 #:use-module (gnu packages xml)
f7283db3
RW
68 #:use-module (gnu packages zip)
69 #:use-module (srfi srfi-1))
4e10a221 70
8dc797fa
BW
71(define-public aragorn
72 (package
73 (name "aragorn")
74 (version "1.2.36")
75 (source (origin
76 (method url-fetch)
77 (uri (string-append
78 "http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
79 version ".tgz"))
80 (sha256
81 (base32
82 "1dg7jlz1qpqy88igjxd6ncs11ccsirb36qv1z01a0np4i4jh61mb"))))
83 (build-system gnu-build-system)
84 (arguments
85 `(#:tests? #f ; there are no tests
86 #:phases
87 (modify-phases %standard-phases
88 (delete 'configure)
89 (replace 'build
90 (lambda _
91 (zero? (system* "gcc"
92 "-O3"
93 "-ffast-math"
94 "-finline-functions"
95 "-o"
96 "aragorn"
97 (string-append "aragorn" ,version ".c")))))
98 (replace 'install
99 (lambda* (#:key outputs #:allow-other-keys)
100 (let* ((out (assoc-ref outputs "out"))
101 (bin (string-append out "/bin"))
102 (man (string-append out "/share/man/man1")))
103 (mkdir-p bin)
104 (copy-file "aragorn"
105 (string-append bin "/aragorn"))
106 (mkdir-p man)
107 (copy-file "aragorn.1"
108 (string-append man "/aragorn.1")))
109 #t)))))
110 (home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
111 (synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
112 (description
113 "Aragorn identifies transfer RNA, mitochondrial RNA and
114transfer-messenger RNA from nucleotide sequences, based on homology to known
115tRNA consensus sequences and RNA structure. It also outputs the secondary
116structure of the predicted RNA.")
117 (license license:gpl2)))
118
9794180d
RW
119(define-public bamtools
120 (package
121 (name "bamtools")
122 (version "2.3.0")
123 (source (origin
124 (method url-fetch)
125 (uri (string-append
126 "https://github.com/pezmaster31/bamtools/archive/v"
127 version ".tar.gz"))
128 (file-name (string-append name "-" version ".tar.gz"))
129 (sha256
130 (base32
131 "1brry29bw2xr2l9pqn240rkqwayg85b8qq78zk2zs6nlspk4d018"))))
132 (build-system cmake-build-system)
4702cec2
RW
133 (arguments
134 `(#:tests? #f ;no "check" target
135 #:phases
136 (modify-phases %standard-phases
137 (add-before
138 'configure 'set-ldflags
139 (lambda* (#:key outputs #:allow-other-keys)
140 (setenv "LDFLAGS"
141 (string-append
142 "-Wl,-rpath="
143 (assoc-ref outputs "out") "/lib/bamtools")))))))
9794180d
RW
144 (inputs `(("zlib" ,zlib)))
145 (home-page "https://github.com/pezmaster31/bamtools")
146 (synopsis "C++ API and command-line toolkit for working with BAM data")
147 (description
148 "BamTools provides both a C++ API and a command-line toolkit for handling
149BAM files.")
150 (license license:expat)))
151
8dd4ff11
RW
152(define-public bedops
153 (package
154 (name "bedops")
1bbc3b1d 155 (version "2.4.14")
8dd4ff11
RW
156 (source (origin
157 (method url-fetch)
158 (uri (string-append "https://github.com/bedops/bedops/archive/v"
159 version ".tar.gz"))
f586c877 160 (file-name (string-append name "-" version ".tar.gz"))
8dd4ff11
RW
161 (sha256
162 (base32
1bbc3b1d 163 "1kqbac547wyqma81cyky9n7mkgikjpsfd3nnmcm6hpqwanqgh10v"))))
8dd4ff11
RW
164 (build-system gnu-build-system)
165 (arguments
166 '(#:tests? #f
167 #:make-flags (list (string-append "BINDIR=" %output "/bin"))
168 #:phases
169 (alist-cons-after
170 'unpack 'unpack-tarballs
171 (lambda _
172 ;; FIXME: Bedops includes tarballs of minimally patched upstream
173 ;; libraries jansson, zlib, and bzip2. We cannot just use stock
174 ;; libraries because at least one of the libraries (zlib) is
175 ;; patched to add a C++ function definition (deflateInit2cpp).
176 ;; Until the Bedops developers offer a way to link against system
177 ;; libraries we have to build the in-tree copies of these three
178 ;; libraries.
179
180 ;; See upstream discussion:
181 ;; https://github.com/bedops/bedops/issues/124
182
183 ;; Unpack the tarballs to benefit from shebang patching.
184 (with-directory-excursion "third-party"
185 (and (zero? (system* "tar" "xvf" "jansson-2.6.tar.bz2"))
186 (zero? (system* "tar" "xvf" "zlib-1.2.7.tar.bz2"))
187 (zero? (system* "tar" "xvf" "bzip2-1.0.6.tar.bz2"))))
188 ;; Disable unpacking of tarballs in Makefile.
189 (substitute* "system.mk/Makefile.linux"
190 (("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
191 (("\\./configure") "CONFIG_SHELL=bash ./configure"))
192 (substitute* "third-party/zlib-1.2.7/Makefile.in"
193 (("^SHELL=.*$") "SHELL=bash\n")))
194 (alist-delete 'configure %standard-phases))))
195 (home-page "https://github.com/bedops/bedops")
196 (synopsis "Tools for high-performance genomic feature operations")
197 (description
198 "BEDOPS is a suite of tools to address common questions raised in genomic
199studies---mostly with regard to overlap and proximity relationships between
200data sets. It aims to be scalable and flexible, facilitating the efficient
201and accurate analysis and management of large-scale genomic data.
202
203BEDOPS provides tools that perform highly efficient and scalable Boolean and
204other set operations, statistical calculations, archiving, conversion and
205other management of genomic data of arbitrary scale. Tasks can be easily
206split by chromosome for distributing whole-genome analyses across a
207computational cluster.")
208 (license license:gpl2+)))
209
81de5647
RW
210(define-public bedtools
211 (package
212 (name "bedtools")
9b9b7ffd 213 (version "2.25.0")
81de5647
RW
214 (source (origin
215 (method url-fetch)
216 (uri (string-append "https://github.com/arq5x/bedtools2/archive/v"
217 version ".tar.gz"))
f586c877 218 (file-name (string-append name "-" version ".tar.gz"))
81de5647
RW
219 (sha256
220 (base32
9b9b7ffd 221 "1ywcy3yfwzhl905b51l0ffjia55h75vv3mw5xkvib04pp6pj548m"))))
81de5647
RW
222 (build-system gnu-build-system)
223 (native-inputs `(("python" ,python-2)))
224 (inputs `(("samtools" ,samtools)
225 ("zlib" ,zlib)))
226 (arguments
227 '(#:test-target "test"
228 #:phases
6573ac82 229 (modify-phases %standard-phases
6573ac82
BW
230 (delete 'configure)
231 (replace 'install
232 (lambda* (#:key outputs #:allow-other-keys)
233 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
234 (for-each (lambda (file)
235 (install-file file bin))
236 (find-files "bin" ".*")))
237 #t)))))
81de5647
RW
238 (home-page "https://github.com/arq5x/bedtools2")
239 (synopsis "Tools for genome analysis and arithmetic")
240 (description
241 "Collectively, the bedtools utilities are a swiss-army knife of tools for
242a wide-range of genomics analysis tasks. The most widely-used tools enable
243genome arithmetic: that is, set theory on the genome. For example, bedtools
244allows one to intersect, merge, count, complement, and shuffle genomic
245intervals from multiple files in widely-used genomic file formats such as BAM,
246BED, GFF/GTF, VCF.")
247 (license license:gpl2)))
248
a2fb1492
RW
249(define-public python2-pybedtools
250 (package
251 (name "python2-pybedtools")
252 (version "0.6.9")
253 (source (origin
254 (method url-fetch)
255 (uri (string-append
256 "https://pypi.python.org/packages/source/p/pybedtools/pybedtools-"
257 version ".tar.gz"))
258 (sha256
259 (base32
260 "1ldzdxw1p4y3g2ignmggsdypvqkcwqwzhdha4rbgpih048z5p4an"))))
261 (build-system python-build-system)
262 (arguments `(#:python ,python-2)) ; no Python 3 support
263 (inputs
264 `(("python-cython" ,python2-cython)
265 ("python-matplotlib" ,python2-matplotlib)))
266 (propagated-inputs
267 `(("bedtools" ,bedtools)
268 ("samtools" ,samtools)))
269 (native-inputs
270 `(("python-pyyaml" ,python2-pyyaml)
271 ("python-nose" ,python2-nose)
272 ("python-setuptools" ,python2-setuptools)))
273 (home-page "https://pythonhosted.org/pybedtools/")
274 (synopsis "Python wrapper for BEDtools programs")
275 (description
276 "pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
277which are widely used for genomic interval manipulation or \"genome algebra\".
278pybedtools extends BEDTools by offering feature-level manipulations from with
279Python.")
280 (license license:gpl2+)))
281
f7283db3
RW
282(define-public bioperl-minimal
283 (let* ((inputs `(("perl-module-build" ,perl-module-build)
284 ("perl-data-stag" ,perl-data-stag)
285 ("perl-libwww" ,perl-libwww)
286 ("perl-uri" ,perl-uri)))
287 (transitive-inputs
288 (map (compose package-name cadr)
289 (delete-duplicates
290 (concatenate
291 (map (compose package-transitive-target-inputs cadr) inputs))))))
292 (package
293 (name "bioperl-minimal")
294 (version "1.6.924")
295 (source
296 (origin
297 (method url-fetch)
298 (uri (string-append "mirror://cpan/authors/id/C/CJ/CJFIELDS/BioPerl-"
299 version ".tar.gz"))
300 (sha256
301 (base32
302 "1l3npcvvvwjlhkna9dndpfv1hklhrgva013kw96m0n1wpd37ask1"))))
303 (build-system perl-build-system)
304 (arguments
305 `(#:phases
306 (modify-phases %standard-phases
307 (add-after
308 'install 'wrap-programs
309 (lambda* (#:key outputs #:allow-other-keys)
310 ;; Make sure all executables in "bin" find the required Perl
311 ;; modules at runtime. As the PERL5LIB variable contains also
312 ;; the paths of native inputs, we pick the transitive target
313 ;; inputs from %build-inputs.
314 (let* ((out (assoc-ref outputs "out"))
315 (bin (string-append out "/bin/"))
316 (path (string-join
317 (cons (string-append out "/lib/perl5/site_perl")
318 (map (lambda (name)
319 (assoc-ref %build-inputs name))
320 ',transitive-inputs))
321 ":")))
322 (for-each (lambda (file)
323 (wrap-program file
324 `("PERL5LIB" ":" prefix (,path))))
325 (find-files bin "\\.pl$"))
326 #t))))))
327 (inputs inputs)
328 (native-inputs
329 `(("perl-test-most" ,perl-test-most)))
330 (home-page "http://search.cpan.org/dist/BioPerl")
331 (synopsis "Bioinformatics toolkit")
332 (description
333 "BioPerl is the product of a community effort to produce Perl code which
334is useful in biology. Examples include Sequence objects, Alignment objects
335and database searching objects. These objects not only do what they are
336advertised to do in the documentation, but they also interact - Alignment
337objects are made from the Sequence objects, Sequence objects have access to
338Annotation and SeqFeature objects and databases, Blast objects can be
339converted to Alignment objects, and so on. This means that the objects
340provide a coordinated and extensible framework to do computational biology.")
341 (license (package-license perl)))))
342
85c37e29
RW
343(define-public python-biopython
344 (package
345 (name "python-biopython")
e815c094 346 (version "1.66")
85c37e29
RW
347 (source (origin
348 (method url-fetch)
e815c094
BW
349 ;; use PyPi rather than biopython.org to ease updating
350 (uri (pypi-uri "biopython" version))
85c37e29
RW
351 (sha256
352 (base32
e815c094 353 "1gdv92593klimg22icf5j9by7xiq86jnwzkpz4abaa05ylkdf6hp"))))
85c37e29
RW
354 (build-system python-build-system)
355 (inputs
356 `(("python-numpy" ,python-numpy)))
357 (native-inputs
358 `(("python-setuptools" ,python2-setuptools)))
359 (home-page "http://biopython.org/")
360 (synopsis "Tools for biological computation in Python")
361 (description
362 "Biopython is a set of tools for biological computation including parsers
363for bioinformatics files into Python data structures; interfaces to common
364bioinformatics programs; a standard sequence class and tools for performing
365common operations on them; code to perform data classification; code for
366dealing with alignments; code making it easy to split up parallelizable tasks
367into separate processes; and more.")
368 (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
369
370(define-public python2-biopython
371 (package (inherit (package-with-python2 python-biopython))
372 (inputs
373 `(("python2-numpy" ,python2-numpy)))))
374
82c370de
RW
375(define-public blast+
376 (package
377 (name "blast+")
378 (version "2.2.31")
379 (source (origin
380 (method url-fetch)
381 (uri (string-append
382 "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
383 version "/ncbi-blast-" version "+-src.tar.gz"))
384 (sha256
385 (base32
386 "19gq6as4k1jrgsd26158ads6h7v4jca3h4r5dzg1y0m6ya50x5ph"))
387 (modules '((guix build utils)))
388 (snippet
389 '(begin
390 ;; Remove bundled bzip2 and zlib
391 (delete-file-recursively "c++/src/util/compress/bzip2")
392 (delete-file-recursively "c++/src/util/compress/zlib")
393 (substitute* "c++/src/util/compress/Makefile.in"
394 (("bzip2 zlib api") "api"))
395 ;; Remove useless msbuild directory
396 (delete-file-recursively
397 "c++/src/build-system/project_tree_builder/msbuild")
398 #t))))
399 (build-system gnu-build-system)
400 (arguments
401 `(;; There are three(!) tests for this massive library, and all fail with
402 ;; "unparsable timing stats".
403 ;; ERR [127] -- [util/regexp] test_pcre.sh (unparsable timing stats)
404 ;; ERR [127] -- [serial/datatool] datatool.sh (unparsable timing stats)
405 ;; ERR [127] -- [serial/datatool] datatool_xml.sh (unparsable timing stats)
406 #:tests? #f
407 #:out-of-source? #t
408 #:parallel-build? #f ; not supported
409 #:phases
410 (modify-phases %standard-phases
411 (add-before
412 'configure 'set-HOME
413 ;; $HOME needs to be set at some point during the configure phase
414 (lambda _ (setenv "HOME" "/tmp") #t))
415 (add-after
416 'unpack 'enter-dir
417 (lambda _ (chdir "c++") #t))
418 (add-after
419 'enter-dir 'fix-build-system
420 (lambda _
421 (define (which* cmd)
422 (cond ((string=? cmd "date")
423 ;; make call to "date" deterministic
424 "date -d @0")
425 ((which cmd)
426 => identity)
427 (else
428 (format (current-error-port)
429 "WARNING: Unable to find absolute path for ~s~%"
430 cmd)
431 #f)))
432
433 ;; Rewrite hardcoded paths to various tools
434 (substitute* (append '("src/build-system/configure.ac"
435 "src/build-system/configure"
436 "scripts/common/impl/if_diff.sh"
437 "scripts/common/impl/run_with_lock.sh"
438 "src/build-system/Makefile.configurables.real"
439 "src/build-system/Makefile.in.top"
440 "src/build-system/Makefile.meta.gmake=no"
441 "src/build-system/Makefile.meta.in"
442 "src/build-system/Makefile.meta_l"
443 "src/build-system/Makefile.meta_p"
444 "src/build-system/Makefile.meta_r"
445 "src/build-system/Makefile.mk.in"
446 "src/build-system/Makefile.requirements"
447 "src/build-system/Makefile.rules_with_autodep.in")
448 (find-files "scripts/common/check" "\\.sh$"))
449 (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
450 (or (which* cmd) all)))
451
452 (substitute* (find-files "src/build-system" "^config.*")
453 (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
454 (("^PATH=.*") ""))
455
456 ;; rewrite "/var/tmp" in check script
457 (substitute* "scripts/common/check/check_make_unix.sh"
458 (("/var/tmp") "/tmp"))
459
460 ;; do not reset PATH
461 (substitute* (find-files "scripts/common/impl/" "\\.sh$")
462 (("^ *PATH=.*") "")
463 (("action=/bin/") "action=")
464 (("export PATH") ":"))
465 #t))
466 (replace
467 'configure
468 (lambda* (#:key inputs outputs #:allow-other-keys)
469 (let ((out (assoc-ref outputs "out"))
470 (lib (string-append (assoc-ref outputs "lib") "/lib"))
471 (include (string-append (assoc-ref outputs "include")
472 "/include/ncbi-tools++")))
473 ;; The 'configure' script doesn't recognize things like
474 ;; '--enable-fast-install'.
475 (zero? (system* "./configure.orig"
476 (string-append "--with-build-root=" (getcwd) "/build")
477 (string-append "--prefix=" out)
478 (string-append "--libdir=" lib)
479 (string-append "--includedir=" include)
480 (string-append "--with-bz2="
481 (assoc-ref inputs "bzip2"))
482 (string-append "--with-z="
483 (assoc-ref inputs "zlib"))
484 ;; Each library is built twice by default, once
485 ;; with "-static" in its name, and again
486 ;; without.
487 "--without-static"
488 "--with-dll"))))))))
489 (outputs '("out" ; 19 MB
490 "lib" ; 203 MB
491 "include")) ; 32 MB
492 (inputs
493 `(("bzip2" ,bzip2)
494 ("zlib" ,zlib)))
495 (native-inputs
496 `(("cpio" ,cpio)))
497 (home-page "http://blast.ncbi.nlm.nih.gov")
498 (synopsis "Basic local alignment search tool")
499 (description
500 "BLAST is a popular method of performing a DNA or protein sequence
501similarity search, using heuristics to produce results quickly. It also
502calculates an “expect value” that estimates how many matches would have
503occurred at a given score by chance, which can aid a user in judging how much
504confidence to have in an alignment.")
505 ;; Most of the sources are in the public domain, with the following
506 ;; exceptions:
507 ;; * Expat:
508 ;; * ./c++/include/util/bitset/
509 ;; * ./c++/src/html/ncbi_menu*.js
510 ;; * Boost license:
511 ;; * ./c++/include/util/impl/floating_point_comparison.hpp
512 ;; * LGPL 2+:
513 ;; * ./c++/include/dbapi/driver/odbc/unix_odbc/
514 ;; * ASL 2.0:
515 ;; * ./c++/src/corelib/teamcity_*
516 (license (list license:public-domain
517 license:expat
518 license:boost1.0
519 license:lgpl2.0+
520 license:asl2.0))))
521
6c2b26e2
RW
522(define-public bless
523 (package
524 (name "bless")
525 (version "1p02")
526 (source (origin
527 (method url-fetch)
528 (uri (string-append "mirror://sourceforge/bless-ec/bless.v"
529 version ".tgz"))
530 (sha256
531 (base32
4d75e03a
RW
532 "0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
533 (modules '((guix build utils)))
6c2b26e2
RW
534 (snippet
535 `(begin
536 ;; Remove bundled boost, pigz, zlib, and .git directory
537 ;; FIXME: also remove bundled sources for google-sparsehash,
538 ;; murmurhash3, kmc once packaged.
539 (delete-file-recursively "boost")
540 (delete-file-recursively "pigz")
541 (delete-file-recursively "zlib")
542 (delete-file-recursively ".git")
543 #t))))
544 (build-system gnu-build-system)
545 (arguments
546 '(#:tests? #f ;no "check" target
547 #:make-flags
548 (list (string-append "ZLIB="
549 (assoc-ref %build-inputs "zlib")
550 "/lib/libz.a")
551 (string-append "LDFLAGS="
552 (string-join '("-lboost_filesystem"
553 "-lboost_system"
554 "-lboost_iostreams"
555 "-lz"
556 "-fopenmp"
557 "-std=c++11"))))
558 #:phases
559 (modify-phases %standard-phases
560 (add-after 'unpack 'do-not-build-bundled-pigz
561 (lambda* (#:key inputs outputs #:allow-other-keys)
562 (substitute* "Makefile"
563 (("cd pigz/pigz-2.3.3; make") ""))
564 #t))
565 (add-after 'unpack 'patch-paths-to-executables
566 (lambda* (#:key inputs outputs #:allow-other-keys)
567 (substitute* "parse_args.cpp"
568 (("kmc_binary = .*")
569 (string-append "kmc_binary = \""
570 (assoc-ref outputs "out")
571 "/bin/kmc\";"))
572 (("pigz_binary = .*")
573 (string-append "pigz_binary = \""
574 (assoc-ref inputs "pigz")
575 "/bin/pigz\";")))
576 #t))
577 (replace 'install
578 (lambda* (#:key outputs #:allow-other-keys)
579 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
580 (for-each (lambda (file)
581 (install-file file bin))
582 '("bless" "kmc/bin/kmc"))
583 #t)))
584 (delete 'configure))))
585 (native-inputs
586 `(("perl" ,perl)))
587 (inputs
588 `(("openmpi" ,openmpi)
589 ("boost" ,boost)
590 ("pigz" ,pigz)
591 ("zlib" ,zlib)))
9641a899 592 (supported-systems '("x86_64-linux"))
4d75e03a 593 (home-page "http://sourceforge.net/p/bless-ec/wiki/Home/")
6c2b26e2
RW
594 (synopsis "Bloom-filter-based error correction tool for NGS reads")
595 (description
596 "@dfn{Bloom-filter-based error correction solution for high-throughput
597sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
598correction tool for genomic reads produced by @dfn{Next-generation
599sequencing} (NGS). BLESS produces accurate correction results with much less
600memory compared with previous solutions and is also able to tolerate a higher
601false-positive rate. BLESS can extend reads like DNA assemblers to correct
602errors at the end of reads.")
603 (license license:gpl3+)))
604
2c7ee167
RW
605(define-public bowtie
606 (package
607 (name "bowtie")
0047d26a 608 (version "2.2.6")
2c7ee167
RW
609 (source (origin
610 (method url-fetch)
611 (uri (string-append "https://github.com/BenLangmead/bowtie2/archive/v"
612 version ".tar.gz"))
f586c877 613 (file-name (string-append name "-" version ".tar.gz"))
2c7ee167
RW
614 (sha256
615 (base32
0047d26a 616 "1ssfvymxfrap6f9pf86s9bvsbqdgka4abr2r7j3mgr4w1l289m86"))
2c7ee167
RW
617 (modules '((guix build utils)))
618 (snippet
619 '(substitute* "Makefile"
2c7ee167
RW
620 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
621 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
0047d26a 622 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))))
2c7ee167
RW
623 (build-system gnu-build-system)
624 (inputs `(("perl" ,perl)
625 ("perl-clone" ,perl-clone)
626 ("perl-test-deep" ,perl-test-deep)
627 ("perl-test-simple" ,perl-test-simple)
0047d26a
RW
628 ("python" ,python-2)
629 ("tbb" ,tbb)))
2c7ee167 630 (arguments
0047d26a
RW
631 '(#:make-flags
632 (list "allall"
633 "WITH_TBB=1"
634 (string-append "prefix=" (assoc-ref %outputs "out")))
2c7ee167
RW
635 #:phases
636 (alist-delete
637 'configure
638 (alist-replace
0047d26a 639 'check
2c7ee167 640 (lambda* (#:key outputs #:allow-other-keys)
0047d26a
RW
641 (system* "perl"
642 "scripts/test/simple_tests.pl"
643 "--bowtie2=./bowtie2"
644 "--bowtie2-build=./bowtie2-build"))
645 %standard-phases))))
2c7ee167
RW
646 (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
647 (synopsis "Fast and sensitive nucleotide sequence read aligner")
648 (description
649 "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
650reads to long reference sequences. It is particularly good at aligning reads
651of about 50 up to 100s or 1,000s of characters, and particularly good at
652aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
653genome with an FM Index to keep its memory footprint small: for the human
654genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
655gapped, local, and paired-end alignment modes.")
241e1221 656 (supported-systems '("x86_64-linux"))
2c7ee167
RW
657 (license license:gpl3+)))
658
94ce537e
RW
659(define-public tophat
660 (package
661 (name "tophat")
662 (version "2.1.0")
663 (source (origin
664 (method url-fetch)
665 (uri (string-append
666 "http://ccb.jhu.edu/software/tophat/downloads/tophat-"
667 version ".tar.gz"))
668 (sha256
669 (base32
670 "168zlzykq622zbgkh90a90f1bdgsxkscq2zxzbj8brq80hbjpyp7"))
671 (patches (list (search-patch "tophat-build-with-later-seqan.patch")))
672 (modules '((guix build utils)))
673 (snippet
674 '(begin
675 ;; Remove bundled SeqAn and samtools
676 (delete-file-recursively "src/SeqAn-1.3")
677 (delete-file-recursively "src/samtools-0.1.18")
678 #t))))
679 (build-system gnu-build-system)
680 (arguments
681 '(#:parallel-build? #f ; not supported
682 #:phases
683 (modify-phases %standard-phases
684 (add-after 'unpack 'use-system-samtools
685 (lambda* (#:key inputs #:allow-other-keys)
686 (substitute* "src/Makefile.in"
687 (("(noinst_LIBRARIES = )\\$\\(SAMLIB\\)" _ prefix) prefix)
688 (("\\$\\(SAMPROG\\): \\$\\(SAMLIB\\)") "")
689 (("SAMPROG = samtools_0\\.1\\.18") "")
690 (("\\$\\(samtools_0_1_18_SOURCES\\)") "")
691 (("am__EXEEXT_1 = samtools_0\\.1\\.18\\$\\(EXEEXT\\)") ""))
692 (substitute* '("src/common.cpp"
693 "src/tophat.py")
694 (("samtools_0.1.18") (which "samtools")))
695 (substitute* '("src/common.h"
696 "src/bam2fastx.cpp")
697 (("#include \"bam.h\"") "#include <samtools/bam.h>")
698 (("#include \"sam.h\"") "#include <samtools/sam.h>"))
699 (substitute* '("src/bwt_map.h"
700 "src/map2gtf.h"
701 "src/align_status.h")
702 (("#include <bam.h>") "#include <samtools/bam.h>")
703 (("#include <sam.h>") "#include <samtools/sam.h>"))
704 #t)))))
705 (inputs
706 `(("boost" ,boost)
707 ("bowtie" ,bowtie)
708 ("samtools" ,samtools-0.1)
709 ("ncurses" ,ncurses)
710 ("python" ,python-2)
711 ("perl" ,perl)
712 ("zlib" ,zlib)
713 ("seqan" ,seqan)))
714 (home-page "http://ccb.jhu.edu/software/tophat/index.shtml")
715 (synopsis "Spliced read mapper for RNA-Seq data")
716 (description
717 "TopHat is a fast splice junction mapper for nucleotide sequence
718reads produced by the RNA-Seq method. It aligns RNA-Seq reads to
719mammalian-sized genomes using the ultra high-throughput short read
720aligner Bowtie, and then analyzes the mapping results to identify
721splice junctions between exons.")
722 ;; TopHat is released under the Boost Software License, Version 1.0
723 ;; See https://github.com/infphilo/tophat/issues/11#issuecomment-121589893
724 (license license:boost1.0)))
725
9a8336d8
RW
726(define-public bwa
727 (package
728 (name "bwa")
729 (version "0.7.12")
730 (source (origin
731 (method url-fetch)
732 (uri (string-append "mirror://sourceforge/bio-bwa/bwa-"
733 version ".tar.bz2"))
734 (sha256
735 (base32
736 "1330dpqncv0px3pbhjzz1gwgg39kkcv2r9qp2xs0sixf8z8wl7bh"))))
737 (build-system gnu-build-system)
738 (arguments
739 '(#:tests? #f ;no "check" target
740 #:phases
741 (alist-replace
742 'install
743 (lambda* (#:key outputs #:allow-other-keys)
744 (let ((bin (string-append
745 (assoc-ref outputs "out") "/bin"))
746 (doc (string-append
747 (assoc-ref outputs "out") "/share/doc/bwa"))
748 (man (string-append
749 (assoc-ref outputs "out") "/share/man/man1")))
750 (mkdir-p bin)
751 (mkdir-p doc)
752 (mkdir-p man)
96c46210
LC
753 (install-file "bwa" bin)
754 (install-file "README.md" doc)
755 (install-file "bwa.1" man)))
9a8336d8
RW
756 ;; no "configure" script
757 (alist-delete 'configure %standard-phases))))
758 (inputs `(("zlib" ,zlib)))
db94f8c7
RW
759 ;; Non-portable SSE instructions are used so building fails on platforms
760 ;; other than x86_64.
761 (supported-systems '("x86_64-linux"))
9a8336d8
RW
762 (home-page "http://bio-bwa.sourceforge.net/")
763 (synopsis "Burrows-Wheeler sequence aligner")
764 (description
765 "BWA is a software package for mapping low-divergent sequences against a
766large reference genome, such as the human genome. It consists of three
767algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
768designed for Illumina sequence reads up to 100bp, while the rest two for
769longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
770features such as long-read support and split alignment, but BWA-MEM, which is
771the latest, is generally recommended for high-quality queries as it is faster
772and more accurate. BWA-MEM also has better performance than BWA-backtrack for
77370-100bp Illumina reads.")
774 (license license:gpl3+)))
775
ad641d53
RW
776(define-public python2-bx-python
777 (package
778 (name "python2-bx-python")
779 (version "0.7.2")
780 (source (origin
781 (method url-fetch)
782 (uri (string-append
783 "https://pypi.python.org/packages/source/b/bx-python/bx-python-"
784 version ".tar.gz"))
785 (sha256
786 (base32
787 "0ld49idhc5zjdvbhvjq1a2qmpjj7h5v58rqr25dzmfq7g34b50xh"))
788 (modules '((guix build utils)))
789 (snippet
790 '(substitute* "setup.py"
791 ;; remove dependency on outdated "distribute" module
792 (("^from distribute_setup import use_setuptools") "")
793 (("^use_setuptools\\(\\)") "")))))
794 (build-system python-build-system)
795 (arguments
796 `(#:tests? #f ;tests fail because test data are not included
797 #:python ,python-2))
798 (inputs
799 `(("python-numpy" ,python2-numpy)
800 ("zlib" ,zlib)))
801 (native-inputs
802 `(("python-nose" ,python2-nose)
803 ("python-setuptools" ,python2-setuptools)))
804 (home-page "http://bitbucket.org/james_taylor/bx-python/")
805 (synopsis "Tools for manipulating biological data")
806 (description
807 "bx-python provides tools for manipulating biological data, particularly
808multiple sequence alignments.")
809 (license license:expat)))
810
810cff85
RW
811(define-public clipper
812 (package
813 (name "clipper")
814 (version "0.3.0")
815 (source (origin
816 (method url-fetch)
817 (uri (string-append
818 "https://github.com/YeoLab/clipper/archive/"
819 version ".tar.gz"))
9ab5ea44 820 (file-name (string-append name "-" version ".tar.gz"))
810cff85
RW
821 (sha256
822 (base32
823 "1q7jpimsqln7ic44i8v2rx2haj5wvik8hc1s2syd31zcn0xk1iyq"))
824 (modules '((guix build utils)))
825 (snippet
826 ;; remove unnecessary setup dependency
827 '(substitute* "setup.py"
828 (("setup_requires = .*") "")))))
829 (build-system python-build-system)
830 (arguments `(#:python ,python-2)) ; only Python 2 is supported
831 (inputs
832 `(("htseq" ,htseq)
833 ("python-pybedtools" ,python2-pybedtools)
834 ("python-cython" ,python2-cython)
835 ("python-scikit-learn" ,python2-scikit-learn)
836 ("python-matplotlib" ,python2-matplotlib)
837 ("python-pysam" ,python2-pysam)
838 ("python-numpy" ,python2-numpy)
839 ("python-scipy" ,python2-scipy)))
840 (native-inputs
841 `(("python-mock" ,python2-mock) ; for tests
842 ("python-pytz" ,python2-pytz) ; for tests
843 ("python-setuptools" ,python2-setuptools)))
844 (home-page "https://github.com/YeoLab/clipper")
845 (synopsis "CLIP peak enrichment recognition")
846 (description
847 "CLIPper is a tool to define peaks in CLIP-seq datasets.")
848 (license license:gpl2)))
849
36742f43
RW
850(define-public couger
851 (package
852 (name "couger")
853 (version "1.8.2")
854 (source (origin
855 (method url-fetch)
856 (uri (string-append
857 "http://couger.oit.duke.edu/static/assets/COUGER"
858 version ".zip"))
859 (sha256
860 (base32
861 "04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq"))))
862 (build-system gnu-build-system)
863 (arguments
864 `(#:tests? #f
865 #:phases
866 (modify-phases %standard-phases
867 (delete 'configure)
868 (delete 'build)
869 (replace
870 'install
871 (lambda* (#:key outputs #:allow-other-keys)
872 (let ((out (assoc-ref outputs "out")))
873 (copy-recursively "src" (string-append out "/src"))
874 (mkdir (string-append out "/bin"))
875 ;; Add "src" directory to module lookup path.
876 (substitute* "couger"
877 (("from argparse")
878 (string-append "import sys\nsys.path.append(\""
879 out "\")\nfrom argparse")))
880 (copy-file "couger" (string-append out "/bin/couger")))
881 #t))
882 (add-after
883 'install 'wrap-program
884 (lambda* (#:key inputs outputs #:allow-other-keys)
885 ;; Make sure 'couger' runs with the correct PYTHONPATH.
886 (let* ((out (assoc-ref outputs "out"))
887 (path (getenv "PYTHONPATH")))
888 (wrap-program (string-append out "/bin/couger")
889 `("PYTHONPATH" ":" prefix (,path))))
890 #t)))))
891 (inputs
892 `(("python" ,python-2)
893 ("python2-pillow" ,python2-pillow)
894 ("python2-numpy" ,python2-numpy)
895 ("python2-scipy" ,python2-scipy)
896 ("python2-matplotlib" ,python2-matplotlib)))
897 (propagated-inputs
898 `(("r" ,r)
899 ("libsvm" ,libsvm)
900 ("randomjungle" ,randomjungle)))
901 (native-inputs
902 `(("unzip" ,unzip)))
903 (home-page "http://couger.oit.duke.edu")
904 (synopsis "Identify co-factors in sets of genomic regions")
905 (description
906 "COUGER can be applied to any two sets of genomic regions bound by
907paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify
908putative co-factors that provide specificity to each TF. The framework
909determines the genomic targets uniquely-bound by each TF, and identifies a
910small set of co-factors that best explain the in vivo binding differences
911between the two TFs.
912
913COUGER uses classification algorithms (support vector machines and random
914forests) with features that reflect the DNA binding specificities of putative
915co-factors. The features are generated either from high-throughput TF-DNA
916binding data (from protein binding microarray experiments), or from large
917collections of DNA motifs.")
918 (license license:gpl3+)))
919
bfe3c685
RW
920(define-public clustal-omega
921 (package
922 (name "clustal-omega")
923 (version "1.2.1")
924 (source (origin
925 (method url-fetch)
926 (uri (string-append
927 "http://www.clustal.org/omega/clustal-omega-"
928 version ".tar.gz"))
929 (sha256
930 (base32
931 "02ibkx0m0iwz8nscg998bh41gg251y56cgh86bvyrii5m8kjgwqf"))))
932 (build-system gnu-build-system)
933 (inputs
934 `(("argtable" ,argtable)))
935 (home-page "http://www.clustal.org/omega/")
936 (synopsis "Multiple sequence aligner for protein and DNA/RNA")
937 (description
938 "Clustal-Omega is a general purpose multiple sequence alignment (MSA)
939program for protein and DNA/RNA. It produces high quality MSAs and is capable
940of handling data-sets of hundreds of thousands of sequences in reasonable
941time.")
942 (license license:gpl2+)))
943
191c7101
RW
944(define-public crossmap
945 (package
946 (name "crossmap")
61d5fd03 947 (version "0.2.1")
191c7101
RW
948 (source (origin
949 (method url-fetch)
950 (uri (string-append "mirror://sourceforge/crossmap/CrossMap-"
951 version ".tar.gz"))
952 (sha256
953 (base32
61d5fd03
RW
954 "07y179f63d7qnzdvkqcziwk9bs3k4zhp81q392fp1hwszjdvy22f"))
955 ;; This patch has been sent upstream already and is available
956 ;; for download from Sourceforge, but it has not been merged.
191c7101
RW
957 (patches (list
958 (search-patch "crossmap-allow-system-pysam.patch")))
959 (modules '((guix build utils)))
960 ;; remove bundled copy of pysam
961 (snippet
962 '(delete-file-recursively "lib/pysam"))))
963 (build-system python-build-system)
964 (arguments
965 `(#:python ,python-2
966 #:phases
967 (alist-cons-after
968 'unpack 'set-env
969 (lambda _ (setenv "CROSSMAP_USE_SYSTEM_PYSAM" "1"))
970 %standard-phases)))
971 (inputs
972 `(("python-numpy" ,python2-numpy)
973 ("python-pysam" ,python2-pysam)
974 ("zlib" ,zlib)))
975 (native-inputs
976 `(("python-cython" ,python2-cython)
977 ("python-nose" ,python2-nose)
978 ("python-setuptools" ,python2-setuptools)))
979 (home-page "http://crossmap.sourceforge.net/")
980 (synopsis "Convert genome coordinates between assemblies")
981 (description
982 "CrossMap is a program for conversion of genome coordinates or annotation
983files between different genome assemblies. It supports most commonly used
984file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
985 (license license:gpl2+)))
986
3a40a92c
RW
987(define-public cufflinks
988 (package
989 (name "cufflinks")
990 (version "2.2.1")
991 (source (origin
992 (method url-fetch)
993 (uri (string-append "http://cole-trapnell-lab.github.io/"
994 "cufflinks/assets/downloads/cufflinks-"
995 version ".tar.gz"))
996 (sha256
997 (base32
998 "1bnm10p8m7zq4qiipjhjqb24csiqdm1pwc8c795z253r2xk6ncg8"))))
999 (build-system gnu-build-system)
1000 (arguments
1001 `(#:make-flags
1002 (list
1003 ;; The includes for "eigen" are located in a subdirectory.
1004 (string-append "EIGEN_CPPFLAGS="
1005 "-I" (assoc-ref %build-inputs "eigen")
1006 "/include/eigen3/")
1007 ;; Cufflinks must be linked with various boost libraries.
1008 (string-append "LDFLAGS="
1009 (string-join '("-lboost_system"
1010 "-lboost_serialization"
1011 "-lboost_thread"))))
1012 #:phases
1013 (modify-phases %standard-phases
1014 (add-after 'unpack 'fix-search-for-bam
1015 (lambda _
1016 (substitute* '("ax_bam.m4"
1017 "configure"
1018 "src/hits.h")
1019 (("<bam/sam\\.h>") "<samtools/sam.h>")
1020 (("<bam/bam\\.h>") "<samtools/bam.h>")
1021 (("<bam/version\\.hpp>") "<samtools/version.h>"))
1022 #t)))
1023 #:configure-flags
1024 (list (string-append "--with-bam="
1025 (assoc-ref %build-inputs "samtools")))))
1026 (inputs
1027 `(("eigen" ,eigen)
1028 ("samtools" ,samtools-0.1)
1029 ("htslib" ,htslib)
1030 ("boost" ,boost)
1031 ("python" ,python-2)
1032 ("zlib" ,zlib)))
1033 (home-page "http://cole-trapnell-lab.github.io/cufflinks/")
1034 (synopsis "Transcriptome assembly and RNA-Seq expression analysis")
1035 (description
1036 "Cufflinks assembles RNA transcripts, estimates their abundances,
1037and tests for differential expression and regulation in RNA-Seq
1038samples. It accepts aligned RNA-Seq reads and assembles the
1039alignments into a parsimonious set of transcripts. Cufflinks then
1040estimates the relative abundances of these transcripts based on how
1041many reads support each one, taking into account biases in library
1042preparation protocols.")
1043 (license license:boost1.0)))
1044
8e913213
RW
1045(define-public cutadapt
1046 (package
1047 (name "cutadapt")
1048 (version "1.8")
1049 (source (origin
1050 (method url-fetch)
1051 (uri (string-append
1052 "https://github.com/marcelm/cutadapt/archive/v"
1053 version ".tar.gz"))
1054 (file-name (string-append name "-" version ".tar.gz"))
1055 (sha256
1056 (base32
1057 "161bp87y6gd6r5bmvjpn2b1k942i3fizfpa139f0jn6jv1wcp5h5"))))
1058 (build-system python-build-system)
1059 (arguments
1060 ;; tests must be run after install
1061 `(#:phases (alist-cons-after
1062 'install 'check
1063 (lambda* (#:key inputs outputs #:allow-other-keys)
1064 (setenv "PYTHONPATH"
1065 (string-append
1066 (getenv "PYTHONPATH")
1067 ":" (assoc-ref outputs "out")
1068 "/lib/python"
1069 (string-take (string-take-right
1070 (assoc-ref inputs "python") 5) 3)
1071 "/site-packages"))
1072 (zero? (system* "nosetests" "-P" "tests")))
1073 (alist-delete 'check %standard-phases))))
1074 (native-inputs
1075 `(("python-cython" ,python-cython)
1076 ("python-nose" ,python-nose)
1077 ("python-setuptools" ,python-setuptools)))
1078 (home-page "https://code.google.com/p/cutadapt/")
1079 (synopsis "Remove adapter sequences from nucleotide sequencing reads")
1080 (description
1081 "Cutadapt finds and removes adapter sequences, primers, poly-A tails and
1082other types of unwanted sequence from high-throughput sequencing reads.")
1083 (license license:expat)))
1084
1baee943
RW
1085(define-public libbigwig
1086 (package
1087 (name "libbigwig")
1088 (version "0.1.4")
1089 (source (origin
1090 (method url-fetch)
1091 (uri (string-append "https://github.com/dpryan79/libBigWig/"
1092 "archive/" version ".tar.gz"))
1093 (file-name (string-append name "-" version ".tar.gz"))
1094 (sha256
1095 (base32
1096 "098rjh35pi4a9q83n8wiwvyzykjqj6l8q189p1xgfw4ghywdlvw1"))))
1097 (build-system gnu-build-system)
1098 (arguments
1099 `(#:test-target "test"
1100 #:make-flags
1101 (list "CC=gcc"
1102 (string-append "prefix=" (assoc-ref %outputs "out")))
1103 #:phases
1104 (modify-phases %standard-phases
1105 (delete 'configure)
1106 (add-before 'check 'disable-curl-test
1107 (lambda _
1108 (substitute* "Makefile"
1109 (("./test/testRemote.*") ""))
1110 #t))
1111 ;; This has been fixed with the upstream commit 4ff6959cd8a0, but
1112 ;; there has not yet been a release containing this change.
1113 (add-before 'install 'create-target-dirs
1114 (lambda* (#:key outputs #:allow-other-keys)
1115 (let ((out (assoc-ref outputs "out")))
1116 (mkdir-p (string-append out "/lib"))
1117 (mkdir-p (string-append out "/include"))
1118 #t))))))
1119 (inputs
1120 `(("zlib" ,zlib)
1121 ("curl" ,curl)))
1122 (native-inputs
1123 `(("doxygen" ,doxygen)))
1124 (home-page "https://github.com/dpryan79/libBigWig")
1125 (synopsis "C library for handling bigWig files")
1126 (description
1127 "This package provides a C library for parsing local and remote BigWig
1128files.")
1129 (license license:expat)))
1130
1921b1de
RW
1131(define-public deeptools
1132 (package
1133 (name "deeptools")
1134 (version "1.5.11")
1135 (source (origin
1136 (method url-fetch)
1137 (uri (string-append
1138 "https://github.com/fidelram/deepTools/archive/"
1139 version ".tar.gz"))
1140 (file-name (string-append name "-" version ".tar.gz"))
1141 (sha256
1142 (base32
1143 "1kaagygcbvjs9sxd9cqmskd02wcfp9imvb735r087w7hwqpvz6fs"))))
1144 (build-system python-build-system)
1145 (arguments
1146 `(#:python ,python-2))
1147 (propagated-inputs
1148 `(("python-scipy" ,python2-scipy)
1149 ("python-numpy" ,python2-numpy)
1150 ("python-matplotlib" ,python2-matplotlib)
1151 ("python-bx-python" ,python2-bx-python)
1152 ("python-pysam" ,python2-pysam)))
1153 (native-inputs
1154 `(("python-mock" ,python2-mock) ;for tests
1155 ("python-pytz" ,python2-pytz) ;for tests
1156 ("python-setuptools" ,python2-setuptools)))
1157 (home-page "https://github.com/fidelram/deepTools")
1158 (synopsis "Tools for normalizing and visualizing deep-sequencing data")
1159 (description
1160 "DeepTools addresses the challenge of handling the large amounts of data
1161that are now routinely generated from DNA sequencing centers. To do so,
1162deepTools contains useful modules to process the mapped reads data to create
1163coverage files in standard bedGraph and bigWig file formats. By doing so,
1164deepTools allows the creation of normalized coverage files or the comparison
1165between two files (for example, treatment and control). Finally, using such
1166normalized and standardized files, multiple visualizations can be created to
1167identify enrichments with functional annotations of the genome.")
1168 (license license:gpl3+)))
1169
684bf7c7
BW
1170(define-public diamond
1171 (package
1172 (name "diamond")
1173 (version "0.7.9")
1174 (source (origin
1175 (method url-fetch)
1176 (uri (string-append
1177 "https://github.com/bbuchfink/diamond/archive/v"
1178 version ".tar.gz"))
1179 (file-name (string-append name "-" version ".tar.gz"))
1180 (sha256
1181 (base32
1182 "0hfkcfv9f76h5brbyw9fyvmc0l9cmbsxrcdqk0fa9xv82zj47p15"))
1183 (snippet '(begin
1184 (delete-file "bin/diamond")
1185 #t))))
1186 (build-system gnu-build-system)
1187 (arguments
1188 '(#:tests? #f ;no "check" target
1189 #:phases
1190 (modify-phases %standard-phases
1191 (add-after 'unpack 'enter-source-dir
1192 (lambda _
1193 (chdir "src")
1194 #t))
1195 (delete 'configure)
1196 (replace 'install
1197 (lambda* (#:key outputs #:allow-other-keys)
1198 (let ((bin (string-append (assoc-ref outputs "out")
1199 "/bin")))
1200 (mkdir-p bin)
1201 (copy-file "../bin/diamond"
1202 (string-append bin "/diamond"))
1203 #t))))))
1204 (native-inputs
1205 `(("bc" ,bc)))
1206 (inputs
1207 `(("boost" ,boost)
1208 ("zlib" ,zlib)))
1209 (home-page "https://github.com/bbuchfink/diamond")
1210 (synopsis "Accelerated BLAST compatible local sequence aligner")
1211 (description
1212 "DIAMOND is a BLAST-compatible local aligner for mapping protein and
1213translated DNA query sequences against a protein reference database (BLASTP
1214and BLASTX alignment mode). The speedup over BLAST is up to 20,000 on short
1215reads at a typical sensitivity of 90-99% relative to BLAST depending on the
1216data and settings.")
d9c44e9c
BW
1217 ;; diamond fails to build on other platforms
1218 ;; https://github.com/bbuchfink/diamond/issues/18
1219 (supported-systems '("x86_64-linux"))
684bf7c7
BW
1220 (license (license:non-copyleft "file://src/COPYING"
1221 "See src/COPYING in the distribution."))))
1222
365c8153
RW
1223(define-public edirect
1224 (package
1225 (name "edirect")
5dfd2766 1226 (version "3.50")
365c8153
RW
1227 (source (origin
1228 (method url-fetch)
1229 ;; Note: older versions are not retained.
5dfd2766 1230 (uri "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/edirect.tar.gz")
365c8153
RW
1231 (sha256
1232 (base32
5dfd2766 1233 "1cr3gzcs3flmgnnbj5iz93vh9w0fca1ilzi2q82cl63ln3mwvpz0"))))
365c8153
RW
1234 (build-system perl-build-system)
1235 (arguments
1236 `(#:tests? #f ;no "check" target
1237 #:phases
1238 (modify-phases %standard-phases
1239 (delete 'configure)
1240 (delete 'build)
1241 (replace 'install
1242 (lambda* (#:key outputs #:allow-other-keys)
1243 (let ((target (string-append (assoc-ref outputs "out")
1244 "/bin")))
1245 (mkdir-p target)
1246 (copy-file "edirect.pl"
1247 (string-append target "/edirect.pl"))
1248 #t)))
1249 (add-after
1250 'install 'wrap-program
1251 (lambda* (#:key inputs outputs #:allow-other-keys)
1252 ;; Make sure 'edirect.pl' finds all perl inputs at runtime.
1253 (let* ((out (assoc-ref outputs "out"))
1254 (path (getenv "PERL5LIB")))
1255 (wrap-program (string-append out "/bin/edirect.pl")
1256 `("PERL5LIB" ":" prefix (,path)))))))))
1257 (inputs
1258 `(("perl-html-parser" ,perl-html-parser)
1259 ("perl-encode-locale" ,perl-encode-locale)
1260 ("perl-file-listing" ,perl-file-listing)
1261 ("perl-html-tagset" ,perl-html-tagset)
1262 ("perl-html-tree" ,perl-html-tree)
1263 ("perl-http-cookies" ,perl-http-cookies)
1264 ("perl-http-date" ,perl-http-date)
1265 ("perl-http-message" ,perl-http-message)
1266 ("perl-http-negotiate" ,perl-http-negotiate)
1267 ("perl-lwp-mediatypes" ,perl-lwp-mediatypes)
1268 ("perl-lwp-protocol-https" ,perl-lwp-protocol-https)
1269 ("perl-net-http" ,perl-net-http)
1270 ("perl-uri" ,perl-uri)
1271 ("perl-www-robotrules" ,perl-www-robotrules)
1272 ("perl" ,perl)))
3d51ec91 1273 (home-page "http://www.ncbi.nlm.nih.gov/books/NBK179288/")
365c8153
RW
1274 (synopsis "Tools for accessing the NCBI's set of databases")
1275 (description
1276 "Entrez Direct (EDirect) is a method for accessing the National Center
1277for Biotechnology Information's (NCBI) set of interconnected
1278databases (publication, sequence, structure, gene, variation, expression,
1279etc.) from a terminal. Functions take search terms from command-line
1280arguments. Individual operations are combined to build multi-step queries.
1281Record retrieval and formatting normally complete the process.
1282
1283EDirect also provides an argument-driven function that simplifies the
1284extraction of data from document summaries or other results that are returned
1285in structured XML format. This can eliminate the need for writing custom
1286software to answer ad hoc questions.")
1287 (license license:public-domain)))
1288
e4e5a4d8
RW
1289(define-public express
1290 (package
1291 (name "express")
1292 (version "1.5.1")
1293 (source (origin
1294 (method url-fetch)
1295 (uri
1296 (string-append
1297 "http://bio.math.berkeley.edu/eXpress/downloads/express-"
1298 version "/express-" version "-src.tgz"))
1299 (sha256
1300 (base32
1301 "03rczxd0gjp2l1jxcmjfmf5j94j77zqyxa6x063zsc585nj40n0c"))))
1302 (build-system cmake-build-system)
1303 (arguments
1304 `(#:tests? #f ;no "check" target
1305 #:phases
1306 (alist-cons-after
1307 'unpack 'use-shared-boost-libs-and-set-bamtools-paths
1308 (lambda* (#:key inputs #:allow-other-keys)
1309 (substitute* "CMakeLists.txt"
1310 (("set\\(Boost_USE_STATIC_LIBS ON\\)")
1311 "set(Boost_USE_STATIC_LIBS OFF)")
1312 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
1313 (string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
1314 (substitute* "src/CMakeLists.txt"
1315 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
1316 (string-append (assoc-ref inputs "bamtools") "/lib/bamtools")))
1317 #t)
1318 %standard-phases)))
1319 (inputs
1320 `(("boost" ,boost)
1321 ("bamtools" ,bamtools)
1322 ("protobuf" ,protobuf)
1323 ("zlib" ,zlib)))
1324 (home-page "http://bio.math.berkeley.edu/eXpress")
1325 (synopsis "Streaming quantification for high-throughput genomic sequencing")
1326 (description
1327 "eXpress is a streaming tool for quantifying the abundances of a set of
1328target sequences from sampled subsequences. Example applications include
1329transcript-level RNA-Seq quantification, allele-specific/haplotype expression
1330analysis (from RNA-Seq), transcription factor binding quantification in
1331ChIP-Seq, and analysis of metagenomic data.")
1332 (license license:artistic2.0)))
1333
f3674b1c
BW
1334(define-public express-beta-diversity
1335 (package
1336 (name "express-beta-diversity")
1337 (version "1.0.7")
1338 (source (origin
1339 (method url-fetch)
1340 (uri
1341 (string-append
1342 "https://github.com/dparks1134/ExpressBetaDiversity/archive/v"
1343 version ".tar.gz"))
1344 (file-name (string-append name "-" version ".tar.gz"))
1345 (sha256
1346 (base32
1347 "1djvdlmqvjf6h0zq7w36y8cl5cli6rgj86x65znl48agnwmzxfxr"))))
1348 (build-system gnu-build-system)
1349 (arguments
1350 `(#:phases
1351 (modify-phases %standard-phases
1352 (delete 'configure)
1353 (add-before 'build 'enter-source (lambda _ (chdir "source") #t))
1354 (replace 'check
1355 (lambda _ (zero? (system* "../bin/ExpressBetaDiversity"
1356 "-u"))))
1357 (add-after 'check 'exit-source (lambda _ (chdir "..") #t))
1358 (replace 'install
1359 (lambda* (#:key outputs #:allow-other-keys)
1360 (let ((bin (string-append (assoc-ref outputs "out")
1361 "/bin")))
1362 (mkdir-p bin)
1363 (copy-file "scripts/convertToEBD.py"
1364 (string-append bin "/convertToEBD.py"))
1365 (copy-file "bin/ExpressBetaDiversity"
1366 (string-append bin "/ExpressBetaDiversity"))
1367 #t))))))
1368 (inputs
1369 `(("python" ,python-2)))
1370 (home-page "http://kiwi.cs.dal.ca/Software/ExpressBetaDiversity")
1371 (synopsis "Taxon- and phylogenetic-based beta diversity measures")
1372 (description
1373 "Express Beta Diversity (EBD) calculates ecological beta diversity
1374(dissimilarity) measures between biological communities. EBD implements a
1375variety of diversity measures including those that make use of phylogenetic
1376similarity of community members.")
1377 (license license:gpl3+)))
1378
12b04cbe
BW
1379(define-public fasttree
1380 (package
1381 (name "fasttree")
1382 (version "2.1.8")
1383 (source (origin
1384 (method url-fetch)
1385 (uri (string-append
1386 "http://www.microbesonline.org/fasttree/FastTree-"
1387 version ".c"))
1388 (sha256
1389 (base32
1390 "0dzqc9vr9iiiw21y159xfjl2z90vw0y7r4x6456pcaxiy5hd2wmi"))))
1391 (build-system gnu-build-system)
1392 (arguments
1393 `(#:tests? #f ; no "check" target
1394 #:phases
1395 (modify-phases %standard-phases
1396 (delete 'unpack)
1397 (delete 'configure)
1398 (replace 'build
1399 (lambda* (#:key source #:allow-other-keys)
1400 (and (zero? (system* "gcc"
1401 "-O3"
1402 "-finline-functions"
1403 "-funroll-loops"
1404 "-Wall"
1405 "-o"
1406 "FastTree"
1407 source
1408 "-lm"))
1409 (zero? (system* "gcc"
1410 "-DOPENMP"
1411 "-fopenmp"
1412 "-O3"
1413 "-finline-functions"
1414 "-funroll-loops"
1415 "-Wall"
1416 "-o"
1417 "FastTreeMP"
1418 source
1419 "-lm")))))
1420 (replace 'install
1421 (lambda* (#:key outputs #:allow-other-keys)
1422 (let ((bin (string-append (assoc-ref outputs "out")
1423 "/bin")))
1424 (mkdir-p bin)
1425 (copy-file "FastTree"
1426 (string-append bin "/FastTree"))
1427 (copy-file "FastTreeMP"
1428 (string-append bin "/FastTreeMP"))
1429 #t))))))
1430 (home-page "http://www.microbesonline.org/fasttree")
1431 (synopsis "Infers approximately-maximum-likelihood phylogenetic trees")
1432 (description
1433 "FastTree can handle alignments with up to a million of sequences in a
1434reasonable amount of time and memory. For large alignments, FastTree is
1435100-1,000 times faster than PhyML 3.0 or RAxML 7.")
1436 (license license:gpl2+)))
1437
2127cedb
RW
1438(define-public fastx-toolkit
1439 (package
1440 (name "fastx-toolkit")
1441 (version "0.0.14")
1442 (source (origin
1443 (method url-fetch)
1444 (uri
1445 (string-append
1446 "https://github.com/agordon/fastx_toolkit/releases/download/"
1447 version "/fastx_toolkit-" version ".tar.bz2"))
1448 (sha256
1449 (base32
1450 "01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy"))))
1451 (build-system gnu-build-system)
1452 (inputs
1453 `(("libgtextutils" ,libgtextutils)))
1454 (native-inputs
1455 `(("pkg-config" ,pkg-config)))
1456 (home-page "http://hannonlab.cshl.edu/fastx_toolkit/")
1457 (synopsis "Tools for FASTA/FASTQ file preprocessing")
1458 (description
1459 "The FASTX-Toolkit is a collection of command line tools for Short-Reads
1460FASTA/FASTQ files preprocessing.
1461
1462Next-Generation sequencing machines usually produce FASTA or FASTQ files,
1463containing multiple short-reads sequences. The main processing of such
1464FASTA/FASTQ files is mapping the sequences to reference genomes. However, it
1465is sometimes more productive to preprocess the files before mapping the
1466sequences to the genome---manipulating the sequences to produce better mapping
1467results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
1468 (license license:agpl3+)))
1469
d7678942
RW
1470(define-public flexbar
1471 (package
1472 (name "flexbar")
1473 (version "2.5")
1474 (source (origin
1475 (method url-fetch)
1476 (uri
1477 (string-append "mirror://sourceforge/flexbar/"
1478 version "/flexbar_v" version "_src.tgz"))
1479 (sha256
1480 (base32
1481 "13jaykc3y1x8y5nn9j8ljnb79s5y51kyxz46hdmvvjj6qhyympmf"))))
1482 (build-system cmake-build-system)
1483 (arguments
4ca009c0 1484 `(#:configure-flags (list
d7678942
RW
1485 (string-append "-DFLEXBAR_BINARY_DIR="
1486 (assoc-ref %outputs "out")
1487 "/bin/"))
1488 #:phases
4ca009c0
RW
1489 (alist-replace
1490 'check
1491 (lambda* (#:key outputs #:allow-other-keys)
1492 (setenv "PATH" (string-append
1493 (assoc-ref outputs "out") "/bin:"
1494 (getenv "PATH")))
1495 (chdir "../flexbar_v2.5_src/test")
1496 (zero? (system* "bash" "flexbar_validate.sh")))
1497 (alist-delete 'install %standard-phases))))
d7678942
RW
1498 (inputs
1499 `(("tbb" ,tbb)
1500 ("zlib" ,zlib)))
1501 (native-inputs
1502 `(("pkg-config" ,pkg-config)
1503 ("seqan" ,seqan)))
1504 (home-page "http://flexbar.sourceforge.net")
1505 (synopsis "Barcode and adapter removal tool for sequencing platforms")
1506 (description
1507 "Flexbar preprocesses high-throughput nucleotide sequencing data
1508efficiently. It demultiplexes barcoded runs and removes adapter sequences.
1509Moreover, trimming and filtering features are provided. Flexbar increases
1510read mapping rates and improves genome and transcriptome assemblies. It
1511supports next-generation sequencing data in fasta/q and csfasta/q format from
1512Illumina, Roche 454, and the SOLiD platform.")
1513 (license license:gpl3)))
1514
19f4554c
BW
1515(define-public fraggenescan
1516 (package
1517 (name "fraggenescan")
1518 (version "1.20")
1519 (source
1520 (origin
1521 (method url-fetch)
1522 (uri
1523 (string-append "mirror://sourceforge/fraggenescan/"
1524 "FragGeneScan" version ".tar.gz"))
1525 (sha256
1526 (base32 "1zzigqmvqvjyqv4945kv6nc5ah2xxm1nxgrlsnbzav3f5c0n0pyj"))))
1527 (build-system gnu-build-system)
1528 (arguments
1529 `(#:phases
1530 (modify-phases %standard-phases
1531 (delete 'configure)
1532 (add-before 'build 'patch-paths
1533 (lambda* (#:key outputs #:allow-other-keys)
1534 (let* ((out (string-append (assoc-ref outputs "out")))
1535 (share (string-append out "/share/fraggenescan/")))
1536 (substitute* "run_FragGeneScan.pl"
1537 (("system\\(\"rm")
1538 (string-append "system(\"" (which "rm")))
1539 (("system\\(\"mv")
1540 (string-append "system(\"" (which "mv")))
1541 ;; This script and other programs expect the training files
1542 ;; to be in the non-standard location bin/train/XXX. Change
1543 ;; this to be share/fraggenescan/train/XXX instead.
1544 (("^\\$train.file = \\$dir.*")
1545 (string-append "$train_file = \""
1546 share
1547 "train/\".$FGS_train_file;")))
1548 (substitute* "run_hmm.c"
1549 (("^ strcat\\(train_dir, \\\"train/\\\"\\);")
1550 (string-append " strcpy(train_dir, \"" share "/train/\");")))
1551 (substitute* "post_process.pl"
1552 (("^my \\$dir = substr.*")
1553 (string-append "my $dir = \"" share "\";"))))
1554 #t))
1555 (replace 'build
1556 (lambda _ (and (zero? (system* "make" "clean"))
1557 (zero? (system* "make" "fgs")))))
1558 (replace 'install
1559 (lambda* (#:key outputs #:allow-other-keys)
1560 (let* ((out (string-append (assoc-ref outputs "out")))
1561 (bin (string-append out "/bin/"))
1562 (share (string-append out "/share/fraggenescan/train")))
1563 (install-file "run_FragGeneScan.pl" bin)
1564 (install-file "FragGeneScan" bin)
1565 (install-file "FGS_gff.py" bin)
1566 (install-file "post_process.pl" bin)
1567 (copy-recursively "train" share))))
1568 (delete 'check)
1569 (add-after 'install 'post-install-check
1570 ;; In lieu of 'make check', run one of the examples and check the
1571 ;; output files gets created.
1572 (lambda* (#:key outputs #:allow-other-keys)
1573 (let* ((out (string-append (assoc-ref outputs "out")))
1574 (bin (string-append out "/bin/")))
1575 (and (zero? (system* (string-append bin "run_FragGeneScan.pl")
1576 "-genome=./example/NC_000913.fna"
1577 "-out=./test2"
1578 "-complete=1"
1579 "-train=complete"))
1580 (file-exists? "test2.faa")
1581 (file-exists? "test2.ffn")
1582 (file-exists? "test2.gff")
1583 (file-exists? "test2.out"))))))))
1584 (inputs
1585 `(("perl" ,perl)
1586 ("python" ,python-2))) ;not compatible with python 3.
1587 (home-page "https://sourceforge.net/projects/fraggenescan/")
1588 (synopsis "Finds potentially fragmented genes in short reads")
1589 (description
1590 "FragGeneScan is a program for predicting bacterial and archaeal genes in
1591short and error-prone DNA sequencing reads. It can also be applied to predict
1592genes in incomplete assemblies or complete genomes.")
1593 ;; GPL3+ according to private correspondense with the authors.
1594 (license license:gpl3+)))
1595
81f3e0c1
BW
1596(define-public fxtract
1597 (let ((util-commit "776ca85a18a47492af3794745efcb4a905113115"))
1598 (package
1599 (name "fxtract")
1600 (version "2.3")
1601 (source
1602 (origin
1603 (method url-fetch)
1604 (uri (string-append
1605 "https://github.com/ctSkennerton/fxtract/archive/"
1606 version ".tar.gz"))
1607 (file-name (string-append "ctstennerton-util-"
1608 (string-take util-commit 7)
1609 "-checkout"))
1610 (sha256
1611 (base32
1612 "0275cfdhis8517hm01is62062swmi06fxzifq7mr3knbbxjlaiwj"))))
1613 (build-system gnu-build-system)
1614 (arguments
1615 `(#:make-flags (list
1616 (string-append "PREFIX=" (assoc-ref %outputs "out"))
1617 "CC=gcc")
1618 #:test-target "fxtract_test"
1619 #:phases
1620 (modify-phases %standard-phases
1621 (delete 'configure)
1622 (add-before 'build 'copy-util
1623 (lambda* (#:key inputs #:allow-other-keys)
1624 (rmdir "util")
1625 (copy-recursively (assoc-ref inputs "ctskennerton-util") "util")
1626 #t))
1627 ;; Do not use make install as this requires additional dependencies.
1628 (replace 'install
1629 (lambda* (#:key outputs #:allow-other-keys)
1630 (let* ((out (assoc-ref outputs "out"))
1631 (bin (string-append out"/bin")))
1632 (install-file "fxtract" bin)
1633 #t))))))
1634 (inputs
1635 `(("pcre" ,pcre)
1636 ("zlib" ,zlib)))
1637 (native-inputs
1638 ;; ctskennerton-util is licensed under GPL2.
1639 `(("ctskennerton-util"
1640 ,(origin
1641 (method git-fetch)
1642 (uri (git-reference
1643 (url "https://github.com/ctSkennerton/util.git")
1644 (commit util-commit)))
1645 (file-name (string-append
1646 "ctstennerton-util-" util-commit "-checkout"))
1647 (sha256
1648 (base32
1649 "0cls1hd4vgj3f36fpzzg4xc77d6f3hpc60cbpfmn2gdr7ykzzad7"))))))
1650 (home-page "https://github.com/ctSkennerton/fxtract")
1651 (synopsis "Extract sequences from FASTA and FASTQ files")
1652 (description
1653 "Fxtract extracts sequences from a protein or nucleotide fastx (FASTA
1654or FASTQ) file given a subsequence. It uses a simple substring search for
1655basic tasks but can change to using POSIX regular expressions, PCRE, hash
1656lookups or multi-pattern searching as required. By default fxtract looks in
1657the sequence of each record but can also be told to look in the header,
1658comment or quality sections.")
1659 (license license:expat))))
1660
5854f685
RW
1661(define-public grit
1662 (package
1663 (name "grit")
1664 (version "2.0.2")
1665 (source (origin
1666 (method url-fetch)
1667 (uri (string-append
1668 "https://github.com/nboley/grit/archive/"
1669 version ".tar.gz"))
1670 (file-name (string-append name "-" version ".tar.gz"))
1671 (sha256
1672 (base32
1673 "157in84dj70wimbind3x7sy1whs3h57qfgcnj2s6lrd38fbrb7mj"))))
1674 (build-system python-build-system)
1675 (arguments
1676 `(#:python ,python-2
1677 #:phases
1678 (alist-cons-after
1679 'unpack 'generate-from-cython-sources
1680 (lambda* (#:key inputs outputs #:allow-other-keys)
1681 ;; Delete these C files to force fresh generation from pyx sources.
1682 (delete-file "grit/sparsify_support_fns.c")
1683 (delete-file "grit/call_peaks_support_fns.c")
1684 (substitute* "setup.py"
1685 (("Cython.Setup") "Cython.Build")
1686 ;; Add numpy include path to fix compilation
1687 (("pyx\", \\]")
1688 (string-append "pyx\", ], include_dirs = ['"
1689 (assoc-ref inputs "python-numpy")
1690 "/lib/python2.7/site-packages/numpy/core/include/"
1691 "']"))) #t)
1692 %standard-phases)))
1693 (inputs
1694 `(("python-scipy" ,python2-scipy)
1695 ("python-numpy" ,python2-numpy)
1696 ("python-pysam" ,python2-pysam)
1697 ("python-networkx" ,python2-networkx)))
1698 (native-inputs
1699 `(("python-cython" ,python2-cython)
1700 ("python-setuptools" ,python2-setuptools)))
1701 (home-page "http://grit-bio.org")
1702 (synopsis "Tool for integrative analysis of RNA-seq type assays")
1703 (description
1704 "GRIT is designed to use RNA-seq, TES, and TSS data to build and quantify
1705full length transcript models. When none of these data sources are available,
1706GRIT can be run by providing a candidate set of TES or TSS sites. In
1707addition, GRIT can merge in reference junctions and gene boundaries. GRIT can
1708also be run in quantification mode, where it uses a provided GTF file and just
1709estimates transcript expression.")
1710 (license license:gpl3+)))
1711
346a829a
RW
1712(define-public hisat
1713 (package
1714 (name "hisat")
1715 (version "0.1.4")
1716 (source (origin
1717 (method url-fetch)
1718 (uri (string-append
1719 "http://ccb.jhu.edu/software/hisat/downloads/hisat-"
1720 version "-beta-source.zip"))
1721 (sha256
1722 (base32
1723 "1k381ydranqxp09yf2y7w1d0chz5d59vb6jchi89hbb0prq19lk5"))))
1724 (build-system gnu-build-system)
1725 (arguments
e58d01fa
RW
1726 `(#:tests? #f ;no check target
1727 #:make-flags '("allall"
1728 ;; Disable unsupported `popcnt' instructions on
1729 ;; architectures other than x86_64
1730 ,@(if (string-prefix? "x86_64"
1731 (or (%current-target-system)
1732 (%current-system)))
1733 '()
1734 '("POPCNT_CAPABILITY=0")))
346a829a 1735 #:phases
da6dd842
LC
1736 (alist-cons-after
1737 'unpack 'patch-sources
1738 (lambda _
1739 ;; XXX Cannot use snippet because zip files are not supported
1740 (substitute* "Makefile"
1741 (("^CC = .*$") "CC = gcc")
1742 (("^CPP = .*$") "CPP = g++")
1743 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
1744 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
1745 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
1746 (substitute* '("hisat-build" "hisat-inspect")
1747 (("/usr/bin/env") (which "env"))))
1748 (alist-replace
1749 'install
1750 (lambda* (#:key outputs #:allow-other-keys)
96c46210
LC
1751 (let ((bin (string-append (assoc-ref outputs "out") "/bi/")))
1752 (for-each (lambda (file)
1753 (install-file file bin))
1754 (find-files
1755 "."
1756 "hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))))
da6dd842 1757 (alist-delete 'configure %standard-phases)))))
346a829a
RW
1758 (native-inputs
1759 `(("unzip" ,unzip)))
1760 (inputs
1761 `(("perl" ,perl)
1762 ("python" ,python)
1763 ("zlib" ,zlib)))
60af3d82
RW
1764 ;; Non-portable SSE instructions are used so building fails on platforms
1765 ;; other than x86_64.
1766 (supported-systems '("x86_64-linux"))
346a829a
RW
1767 (home-page "http://ccb.jhu.edu/software/hisat/index.shtml")
1768 (synopsis "Hierarchical indexing for spliced alignment of transcripts")
1769 (description
1770 "HISAT is a fast and sensitive spliced alignment program for mapping
1771RNA-seq reads. In addition to one global FM index that represents a whole
1772genome, HISAT uses a large set of small FM indexes that collectively cover the
1773whole genome. These small indexes (called local indexes) combined with
1774several alignment strategies enable effective alignment of RNA-seq reads, in
1775particular, reads spanning multiple exons.")
1776 (license license:gpl3+)))
1777
c684629f
BW
1778(define-public hmmer
1779 (package
1780 (name "hmmer")
1781 (version "3.1b2")
1782 (source (origin
1783 (method url-fetch)
1784 (uri (string-append
1785 "http://selab.janelia.org/software/hmmer"
1786 (version-prefix version 1) "/"
1787 version "/hmmer-" version ".tar.gz"))
1788 (sha256
1789 (base32
1790 "0djmgc0pfli0jilfx8hql1axhwhqxqb8rxg2r5rg07aw73sfs5nx"))))
1791 (build-system gnu-build-system)
1792 (native-inputs `(("perl", perl)))
1793 (home-page "http://hmmer.janelia.org")
1794 (synopsis "Biosequence analysis using profile hidden Markov models")
1795 (description
1796 "HMMER is used for searching sequence databases for homologs of protein
1797sequences, and for making protein sequence alignments. It implements methods
1798using probabilistic models called profile hidden Markov models (profile
1799HMMs).")
1800 (license (list license:gpl3+
1801 ;; The bundled library 'easel' is distributed
1802 ;; under The Janelia Farm Software License.
1803 (license:non-copyleft
1804 "file://easel/LICENSE"
1805 "See easel/LICENSE in the distribution.")))))
1806
85652f59
RW
1807(define-public htseq
1808 (package
1809 (name "htseq")
1810 (version "0.6.1")
1811 (source (origin
1812 (method url-fetch)
1813 (uri (string-append
1814 "https://pypi.python.org/packages/source/H/HTSeq/HTSeq-"
1815 version ".tar.gz"))
1816 (sha256
1817 (base32
1818 "1i85ppf2j2lj12m0x690qq5nn17xxk23pbbx2c83r8ayb5wngzwv"))))
1819 (build-system python-build-system)
1820 (arguments `(#:python ,python-2)) ; only Python 2 is supported
0536727e
RW
1821 ;; Numpy needs to be propagated when htseq is used as a Python library.
1822 (propagated-inputs
1823 `(("python-numpy" ,python2-numpy)))
578b05d9
RW
1824 (inputs
1825 `(("python-pysam" ,python2-pysam)))
0536727e
RW
1826 (native-inputs
1827 `(("python-setuptools" ,python2-setuptools)))
85652f59
RW
1828 (home-page "http://www-huber.embl.de/users/anders/HTSeq/")
1829 (synopsis "Analysing high-throughput sequencing data with Python")
1830 (description
1831 "HTSeq is a Python package that provides infrastructure to process data
1832from high-throughput sequencing assays.")
1833 (license license:gpl3+)))
1834
15a3c3d4
RW
1835(define-public htsjdk
1836 (package
1837 (name "htsjdk")
1838 (version "1.129")
1839 (source (origin
1840 (method url-fetch)
1841 (uri (string-append
1842 "https://github.com/samtools/htsjdk/archive/"
1843 version ".tar.gz"))
1844 (file-name (string-append name "-" version ".tar.gz"))
1845 (sha256
1846 (base32
1847 "0asdk9b8jx2ij7yd6apg9qx03li8q7z3ml0qy2r2qczkra79y6fw"))
1848 (modules '((guix build utils)))
1849 ;; remove build dependency on git
1850 (snippet '(substitute* "build.xml"
1851 (("failifexecutionfails=\"true\"")
1852 "failifexecutionfails=\"false\"")))))
1853 (build-system gnu-build-system)
1854 (arguments
1855 `(#:modules ((srfi srfi-1)
1856 (guix build gnu-build-system)
1857 (guix build utils))
1858 #:phases (alist-replace
1859 'build
1860 (lambda _
1861 (setenv "JAVA_HOME" (assoc-ref %build-inputs "jdk"))
1862 (zero? (system* "ant" "all"
1863 (string-append "-Ddist="
1864 (assoc-ref %outputs "out")
1865 "/share/java/htsjdk/"))))
1866 (fold alist-delete %standard-phases
1867 '(configure install check)))))
1868 (native-inputs
1869 `(("ant" ,ant)
d2540f80 1870 ("jdk" ,icedtea "jdk")))
15a3c3d4
RW
1871 (home-page "http://samtools.github.io/htsjdk/")
1872 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
1873 (description
1874 "HTSJDK is an implementation of a unified Java library for accessing
1875common file formats, such as SAM and VCF, used for high-throughput
1876sequencing (HTS) data. There are also an number of useful utilities for
1877manipulating HTS data.")
1878 (license license:expat)))
1879
e7c09730
RW
1880(define-public htslib
1881 (package
1882 (name "htslib")
1883 (version "1.2.1")
1884 (source (origin
1885 (method url-fetch)
1886 (uri (string-append
1887 "https://github.com/samtools/htslib/releases/download/"
1888 version "/htslib-" version ".tar.bz2"))
1889 (sha256
1890 (base32
1891 "1c32ssscbnjwfw3dra140fq7riarp2x990qxybh34nr1p5r17nxx"))))
1892 (build-system gnu-build-system)
1893 (arguments
1894 `(#:phases
1895 (modify-phases %standard-phases
1896 (add-after
1897 'unpack 'patch-tests
1898 (lambda _
1899 (substitute* "test/test.pl"
1900 (("/bin/bash") (which "bash")))
1901 #t)))))
1902 (inputs
1903 `(("zlib" ,zlib)))
1904 (native-inputs
1905 `(("perl" ,perl)))
1906 (home-page "http://www.htslib.org")
1907 (synopsis "C library for reading/writing high-throughput sequencing data")
1908 (description
1909 "HTSlib is a C library for reading/writing high-throughput sequencing
1910data. It also provides the bgzip, htsfile, and tabix utilities.")
1911 ;; Files under cram/ are released under the modified BSD license;
1912 ;; the rest is released under the Expat license
1913 (license (list license:expat license:bsd-3))))
1914
c4325f62
RW
1915(define-public idr
1916 (package
1917 (name "idr")
1918 (version "2.0.0")
1919 (source (origin
1920 (method url-fetch)
1921 (uri (string-append
1922 "https://github.com/nboley/idr/archive/"
1923 version ".tar.gz"))
1924 (file-name (string-append name "-" version ".tar.gz"))
1925 (sha256
1926 (base32
1927 "1k3x44biak00aiv3hpm1yd6nn4hhp7n0qnbs3zh2q9sw7qr1qj5r"))))
1928 (build-system python-build-system)
1929 (arguments
1930 `(#:phases
1931 (modify-phases %standard-phases
1932 (add-after
1933 'install 'wrap-program
1934 (lambda* (#:key inputs outputs #:allow-other-keys)
1935 (let* ((out (assoc-ref outputs "out"))
1936 (python-version (string-take (string-take-right
1937 (assoc-ref inputs "python") 5) 3))
1938 (path (string-join
1939 (map (lambda (name)
1940 (string-append (assoc-ref inputs name)
1941 "/lib/python" python-version
1942 "/site-packages"))
1943 '("python-scipy"
1944 "python-numpy"
1945 "python-matplotlib"))
1946 ":")))
1947 (wrap-program (string-append out "/bin/idr")
1948 `("PYTHONPATH" ":" prefix (,path))))
1949 #t)))))
1950 (inputs
1951 `(("python-scipy" ,python-scipy)
1952 ("python-numpy" ,python-numpy)
1953 ("python-matplotlib" ,python-matplotlib)))
1954 (native-inputs
1955 `(("python-cython" ,python-cython)
1956 ("python-setuptools" ,python-setuptools)))
1957 (home-page "https://github.com/nboley/idr")
1958 (synopsis "Tool to measure the irreproducible discovery rate (IDR)")
1959 (description
1960 "The IDR (Irreproducible Discovery Rate) framework is a unified approach
1961to measure the reproducibility of findings identified from replicate
1962experiments and provide highly stable thresholds based on reproducibility.")
1963 (license license:gpl3+)))
1964
43c565d2
RW
1965(define-public jellyfish
1966 (package
1967 (name "jellyfish")
1968 (version "2.2.4")
1969 (source (origin
1970 (method url-fetch)
1971 (uri (string-append "https://github.com/gmarcais/Jellyfish/"
1972 "releases/download/v" version
1973 "/jellyfish-" version ".tar.gz"))
1974 (sha256
1975 (base32
1976 "0a6xnynqy2ibfbfz86b9g2m2dgm7f1469pmymkpam333gi3p26nk"))))
1977 (build-system gnu-build-system)
1978 (outputs '("out" ;for library
1979 "ruby" ;for Ruby bindings
1980 "python")) ;for Python bindings
1981 (arguments
1982 `(#:configure-flags
1983 (list (string-append "--enable-ruby-binding="
1984 (assoc-ref %outputs "ruby"))
1985 (string-append "--enable-python-binding="
1986 (assoc-ref %outputs "python")))
1987 #:phases
1988 (modify-phases %standard-phases
1989 (add-before 'check 'set-SHELL-variable
1990 (lambda _
1991 ;; generator_manager.hpp either uses /bin/sh or $SHELL
1992 ;; to run tests.
1993 (setenv "SHELL" (which "bash"))
1994 #t)))))
1995 (native-inputs
1996 `(("bc" ,bc)
1997 ("time" ,time)
1998 ("ruby" ,ruby)
1999 ("python" ,python-2)))
2000 (synopsis "Tool for fast counting of k-mers in DNA")
2001 (description
2002 "Jellyfish is a tool for fast, memory-efficient counting of k-mers in
2003DNA. A k-mer is a substring of length k, and counting the occurrences of all
2004such substrings is a central step in many analyses of DNA sequence. Jellyfish
2005is a command-line program that reads FASTA and multi-FASTA files containing
2006DNA sequences. It outputs its k-mer counts in a binary format, which can be
2007translated into a human-readable text format using the @code{jellyfish dump}
2008command, or queried for specific k-mers with @code{jellyfish query}.")
2009 (home-page "http://www.genome.umd.edu/jellyfish.html")
2010 ;; The combined work is published under the GPLv3 or later. Individual
2011 ;; files such as lib/jsoncpp.cpp are released under the Expat license.
2012 (license (list license:gpl3+ license:expat))))
2013
d57e6d0f
RW
2014(define-public macs
2015 (package
2016 (name "macs")
0d0bcaa0 2017 (version "2.1.0.20151222")
d57e6d0f
RW
2018 (source (origin
2019 (method url-fetch)
43ec07f1 2020 (uri (pypi-uri "MACS2" version))
d57e6d0f
RW
2021 (sha256
2022 (base32
0d0bcaa0 2023 "1r2hcz6irhcq7lwbafjks98jbn34hv05avgbdjnp6w6mlfjkf8x5"))))
d57e6d0f
RW
2024 (build-system python-build-system)
2025 (arguments
2026 `(#:python ,python-2 ; only compatible with Python 2.7
2027 #:tests? #f)) ; no test target
2028 (inputs
2029 `(("python-numpy" ,python2-numpy)))
2030 (native-inputs
2031 `(("python-setuptools" ,python2-setuptools)))
2032 (home-page "http://github.com/taoliu/MACS/")
2033 (synopsis "Model based analysis for ChIP-Seq data")
2034 (description
2035 "MACS is an implementation of a ChIP-Seq analysis algorithm for
2036identifying transcript factor binding sites named Model-based Analysis of
2037ChIP-Seq (MACS). MACS captures the influence of genome complexity to evaluate
2038the significance of enriched ChIP regions and it improves the spatial
2039resolution of binding sites through combining the information of both
2040sequencing tag position and orientation.")
2041 (license license:bsd-3)))
2042
41ddebdd
BW
2043(define-public mafft
2044 (package
2045 (name "mafft")
02f35bb5 2046 (version "7.267")
41ddebdd
BW
2047 (source (origin
2048 (method url-fetch)
2049 (uri (string-append
2050 "http://mafft.cbrc.jp/alignment/software/mafft-" version
2051 "-without-extensions-src.tgz"))
2052 (file-name (string-append name "-" version ".tgz"))
2053 (sha256
2054 (base32
02f35bb5 2055 "1xl6xq1rfxkws0svrlhyqxhhwbv6r77jwblsdpcyiwzsscw6wlk0"))))
41ddebdd
BW
2056 (build-system gnu-build-system)
2057 (arguments
2058 `(#:tests? #f ; no automated tests, though there are tests in the read me
2059 #:make-flags (let ((out (assoc-ref %outputs "out")))
2060 (list (string-append "PREFIX=" out)
2061 (string-append "BINDIR="
2062 (string-append out "/bin"))))
2063 #:phases
2064 (modify-phases %standard-phases
2065 (add-after 'unpack 'enter-dir
2066 (lambda _ (chdir "core") #t))
2067 (add-after 'enter-dir 'patch-makefile
2068 (lambda _
2069 ;; on advice from the MAFFT authors, there is no need to
2070 ;; distribute mafft-profile, mafft-distance, or
2071 ;; mafft-homologs.rb as they are too "specialised".
2072 (substitute* "Makefile"
2073 ;; remove mafft-homologs.rb from SCRIPTS
2074 (("^SCRIPTS = mafft mafft-homologs.rb")
2075 "SCRIPTS = mafft")
02f35bb5
BW
2076 ;; remove mafft-homologs from MANPAGES
2077 (("^MANPAGES = mafft.1 mafft-homologs.1")
2078 "MANPAGES = mafft.1")
41ddebdd
BW
2079 ;; remove mafft-distance from PROGS
2080 (("^PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance")
2081 "PROGS = dvtditr dndfast7 dndblast sextet5")
2082 ;; remove mafft-profile from PROGS
2083 (("splittbfast disttbfast tbfast mafft-profile 2cl mccaskillwrap")
2084 "splittbfast disttbfast tbfast f2cl mccaskillwrap")
2085 (("^rm -f mafft-profile mafft-profile.exe") "#")
2086 (("^rm -f mafft-distance mafft-distance.exe") ")#")
2087 ;; do not install MAN pages in libexec folder
2088 (("^\t\\$\\(INSTALL\\) -m 644 \\$\\(MANPAGES\\) \
2089\\$\\(DESTDIR\\)\\$\\(LIBDIR\\)") "#"))
2090 #t))
02f35bb5
BW
2091 (add-after 'enter-dir 'patch-paths
2092 (lambda* (#:key inputs #:allow-other-keys)
2093 (substitute* '("pairash.c"
2094 "mafft.tmpl")
2095 (("perl") (which "perl"))
2096 (("([\"`| ])awk" _ prefix)
2097 (string-append prefix (which "awk")))
2098 (("grep") (which "grep")))
2099 #t))
41ddebdd
BW
2100 (delete 'configure))))
2101 (inputs
02f35bb5
BW
2102 `(("perl" ,perl)
2103 ("gawk" ,gawk)
2104 ("grep" ,grep)))
2105 (propagated-inputs
2106 `(("coreutils" ,coreutils)))
41ddebdd
BW
2107 (home-page "http://mafft.cbrc.jp/alignment/software/")
2108 (synopsis "Multiple sequence alignment program")
2109 (description
2110 "MAFFT offers a range of multiple alignment methods for nucleotide and
2111protein sequences. For instance, it offers L-INS-i (accurate; for alignment
2112of <~200 sequences) and FFT-NS-2 (fast; for alignment of <~30,000
2113sequences).")
2114 (license (license:non-copyleft
2115 "http://mafft.cbrc.jp/alignment/software/license.txt"
2116 "BSD-3 with different formatting"))))
8fd790eb
BW
2117
2118(define-public metabat
2119 (package
2120 (name "metabat")
2121 (version "0.26.1")
2122 (source (origin
2123 (method url-fetch)
2124 (uri (string-append
2125 "https://bitbucket.org/berkeleylab/metabat/get/"
2126 version ".tar.bz2"))
2127 (file-name (string-append name "-" version ".tar.bz2"))
2128 (sha256
2129 (base32
2130 "0vgrhbaxg4dkxyax2kbigak7w0arhqvw0szwp6gd9wmyilc44kfa"))))
2131 (build-system gnu-build-system)
2132 (arguments
2133 `(#:phases
2134 (modify-phases %standard-phases
2135 (add-after 'unpack 'fix-includes
2136 (lambda _
5ede5f2f
MW
2137 (substitute* "SConstruct"
2138 (("/include/bam/bam.h")
2139 "/include/samtools/bam.h"))
2140 (substitute* "src/BamUtils.h"
2141 (("^#include \"bam/bam\\.h\"")
2142 "#include \"samtools/bam.h\"")
2143 (("^#include \"bam/sam\\.h\"")
2144 "#include \"samtools/sam.h\""))
2145 (substitute* "src/KseqReader.h"
2146 (("^#include \"bam/kseq\\.h\"")
2147 "#include \"samtools/kseq.h\""))
2148 #t))
8fd790eb
BW
2149 (add-after 'unpack 'fix-scons
2150 (lambda _
2151 (substitute* "SConstruct" ; Do not distribute README
2152 (("^env\\.Install\\(idir_prefix, 'README\\.md'\\)")
2153 ""))
2154 #t))
2155 (delete 'configure)
2156 (replace 'build
2157 (lambda* (#:key inputs outputs #:allow-other-keys)
2158 (mkdir (assoc-ref outputs "out"))
2159 (zero? (system* "scons"
2160 (string-append
2161 "PREFIX="
2162 (assoc-ref outputs "out"))
2163 (string-append
2164 "HTSLIB_DIR="
2165 (assoc-ref inputs "htslib"))
2166 (string-append
2167 "SAMTOOLS_DIR="
2168 (assoc-ref inputs "samtools"))
2169 (string-append
2170 "BOOST_ROOT="
2171 (assoc-ref inputs "boost"))
2172 "install"))))
2173 ;; check and install carried out during build phase
2174 (delete 'check)
2175 (delete 'install))))
2176 (inputs
2177 `(("zlib" ,zlib)
2178 ("perl" ,perl)
2179 ("samtools" ,samtools)
2180 ("htslib" ,htslib)
2181 ("boost" ,boost)))
2182 (native-inputs
2183 `(("scons" ,scons)))
2184 (home-page "https://bitbucket.org/berkeleylab/metabat")
2185 (synopsis
2186 "Reconstruction of single genomes from complex microbial communities")
2187 (description
2188 "Grouping large genomic fragments assembled from shotgun metagenomic
2189sequences to deconvolute complex microbial communities, or metagenome binning,
2190enables the study of individual organisms and their interactions. MetaBAT is
2191an automated metagenome binning software, which integrates empirical
2192probabilistic distances of genome abundance and tetranucleotide frequency.")
2193 (license (license:non-copyleft "file://license.txt"
5ede5f2f 2194 "See license.txt in the distribution."))))
8fd790eb 2195
ddd82e0e
RW
2196(define-public miso
2197 (package
2198 (name "miso")
2199 (version "0.5.3")
2200 (source (origin
2201 (method url-fetch)
2202 (uri (string-append
86517de6 2203 "https://pypi.python.org/packages/source/m/misopy/misopy-"
ddd82e0e
RW
2204 version ".tar.gz"))
2205 (sha256
2206 (base32
2207 "0x446867az8ir0z8c1vjqffkp0ma37wm4sylixnkhgawllzx8v5w"))
2208 (modules '((guix build utils)))
ddd82e0e
RW
2209 (snippet
2210 '(substitute* "setup.py"
0ec8b206
RW
2211 ;; Use setuptools, or else the executables are not
2212 ;; installed.
2213 (("distutils.core") "setuptools")
2214 ;; use "gcc" instead of "cc" for compilation
ddd82e0e
RW
2215 (("^defines")
2216 "cc.set_executables(
2217compiler='gcc',
2218compiler_so='gcc',
2219linker_exe='gcc',
2220linker_so='gcc -shared'); defines")))))
2221 (build-system python-build-system)
2222 (arguments
2223 `(#:python ,python-2 ; only Python 2 is supported
2224 #:tests? #f)) ; no "test" target
2225 (inputs
2226 `(("samtools" ,samtools)
2227 ("python-numpy" ,python2-numpy)
2228 ("python-pysam" ,python2-pysam)
2229 ("python-scipy" ,python2-scipy)
2230 ("python-matplotlib" ,python2-matplotlib)))
2231 (native-inputs
0ec8b206
RW
2232 `(("python-mock" ,python2-mock) ;for tests
2233 ("python-pytz" ,python2-pytz) ;for tests
2234 ("python-setuptools" ,python2-setuptools)))
ddd82e0e
RW
2235 (home-page "http://genes.mit.edu/burgelab/miso/index.html")
2236 (synopsis "Mixture of Isoforms model for RNA-Seq isoform quantitation")
2237 (description
2238 "MISO (Mixture-of-Isoforms) is a probabilistic framework that quantitates
2239the expression level of alternatively spliced genes from RNA-Seq data, and
2240identifies differentially regulated isoforms or exons across samples. By
2241modeling the generative process by which reads are produced from isoforms in
2242RNA-Seq, the MISO model uses Bayesian inference to compute the probability
2243that a read originated from a particular isoform.")
2244 (license license:gpl2)))
2245
1e44cf8b
BW
2246(define-public orfm
2247 (package
2248 (name "orfm")
6b6f7d6a 2249 (version "0.5.3")
1e44cf8b
BW
2250 (source (origin
2251 (method url-fetch)
2252 (uri (string-append
2253 "https://github.com/wwood/OrfM/releases/download/v"
2254 version "/orfm-" version ".tar.gz"))
2255 (sha256
2256 (base32
6b6f7d6a 2257 "0vb6d771gl4mix8bwx919x5ayy9pkj44n7ki336nz3rz2rx4c7gk"))))
1e44cf8b
BW
2258 (build-system gnu-build-system)
2259 (inputs `(("zlib" ,zlib)))
6b6f7d6a
BW
2260 (native-inputs
2261 `(("ruby-bio-commandeer" ,ruby-bio-commandeer)
2262 ("ruby-rspec" ,ruby-rspec)
2263 ("ruby" ,ruby)))
1e44cf8b
BW
2264 (synopsis "Simple and not slow open reading frame (ORF) caller")
2265 (description
6b6f7d6a 2266 "An ORF caller finds stretches of DNA that, when translated, are not
1e44cf8b
BW
2267interrupted by stop codons. OrfM finds and prints these ORFs.")
2268 (home-page "https://github.com/wwood/OrfM")
2269 (license license:lgpl3+)))
2270
19ee9201
RW
2271(define-public python2-pbcore
2272 (package
2273 (name "python2-pbcore")
2274 (version "0.9.3")
2275 (source (origin
2276 (method url-fetch)
2277 (uri (string-append
2278 "https://github.com/PacificBiosciences/pbcore/archive/"
2279 version ".tar.gz"))
2280 (file-name (string-append name "-" version ".tar.gz"))
2281 (sha256
2282 (base32
2283 "1z46rwjac93jm87cbj2zgjg6qvsgs65140wkbbxsvxps7ai4pm09"))))
2284 (build-system python-build-system)
2285 (arguments `(#:python ,python-2)) ; pbcore requires Python 2.7
2286 (inputs
2287 `(("python-cython" ,python2-cython)
2288 ("python-numpy" ,python2-numpy)
2289 ("python-pysam" ,python2-pysam)
2290 ("python-h5py" ,python2-h5py)))
2291 (native-inputs
2292 `(("python-setuptools" ,python2-setuptools)))
2293 (home-page "http://pacificbiosciences.github.io/pbcore/")
2294 (synopsis "Library for reading and writing PacBio data files")
2295 (description
2296 "The pbcore package provides Python APIs for interacting with PacBio data
2297files and writing bioinformatics applications.")
2298 (license license:bsd-3)))
2299
c61fe02c
RW
2300(define-public python2-warpedlmm
2301 (package
2302 (name "python2-warpedlmm")
2303 (version "0.21")
2304 (source
2305 (origin
2306 (method url-fetch)
2307 (uri (string-append
2308 "https://pypi.python.org/packages/source/W/WarpedLMM/WarpedLMM-"
2309 version ".zip"))
2310 (sha256
2311 (base32
2312 "1agfz6zqa8nc6cw47yh0s3y14gkpa9wqazwcj7mwwj3ffnw39p3j"))))
2313 (build-system python-build-system)
2314 (arguments
2315 `(#:python ,python-2 ; requires Python 2.7
2316 #:phases
2317 (modify-phases %standard-phases
2318 (add-after
2319 'install 'remove-bin-directory
2320 (lambda* (#:key outputs #:allow-other-keys)
2321 ;; The "bin" directory only contains wrappers for running
2322 ;; the module tests. They are not needed after the
2323 ;; "check" phase.
2324 (delete-file-recursively
2325 (string-append (assoc-ref outputs "out") "/bin"))
2326 #t)))))
2327 (propagated-inputs
2328 `(("python-scipy" ,python2-scipy)
2329 ("python-numpy" ,python2-numpy)
2330 ("python-matplotlib" ,python2-matplotlib)
2331 ("python-fastlmm" ,python2-fastlmm)
2332 ("python-pandas" ,python2-pandas)
2333 ("python-pysnptools" ,python2-pysnptools)))
2334 (native-inputs
2335 `(("python-setuptools" ,python2-setuptools)
2336 ("python-mock" ,python2-mock)
2337 ("python-nose" ,python2-nose)
2338 ("unzip" ,unzip)))
2339 (home-page "https://github.com/PMBio/warpedLMM")
2340 (synopsis "Implementation of warped linear mixed models")
2341 (description
2342 "WarpedLMM is a Python implementation of the warped linear mixed model,
2343which automatically learns an optimal warping function (or transformation) for
2344the phenotype as it models the data.")
2345 (license license:asl2.0)))
2346
2c16316e 2347(define-public pbtranscript-tofu
9a067efd 2348 (let ((commit "8f5467fe6"))
2c16316e
RW
2349 (package
2350 (name "pbtranscript-tofu")
9a067efd 2351 (version (string-append "2.2.3." commit))
2c16316e
RW
2352 (source (origin
2353 (method git-fetch)
2354 (uri (git-reference
2355 (url "https://github.com/PacificBiosciences/cDNA_primer.git")
2356 (commit commit)))
9a067efd 2357 (file-name (string-append name "-" version "-checkout"))
2c16316e
RW
2358 (sha256
2359 (base32
9a067efd
RW
2360 "1lgnpi35ihay42qx0b6yl3kkgra723i413j33kvs0kvs61h82w0f"))
2361 (modules '((guix build utils)))
2362 (snippet
2363 '(begin
2364 ;; remove bundled Cython sources
2365 (delete-file "pbtranscript-tofu/pbtranscript/Cython-0.20.1.tar.gz")
2366 #t))))
2c16316e
RW
2367 (build-system python-build-system)
2368 (arguments
2369 `(#:python ,python-2
2370 ;; With standard flags, the install phase attempts to create a zip'd
2371 ;; egg file, and fails with an error: 'ZIP does not support timestamps
2372 ;; before 1980'
2373 #:configure-flags '("--single-version-externally-managed"
2374 "--record=pbtranscript-tofu.txt")
2375 #:phases
9a067efd
RW
2376 (modify-phases %standard-phases
2377 (add-after 'unpack 'enter-directory
2378 (lambda _
2379 (chdir "pbtranscript-tofu/pbtranscript/")
2380 #t))
2381 ;; With setuptools version 18.0 and later this setup.py hack causes
2382 ;; a build error, so we disable it.
2383 (add-after 'enter-directory 'patch-setuppy
2384 (lambda _
2385 (substitute* "setup.py"
2386 (("if 'setuptools.extension' in sys.modules:")
2387 "if False:"))
2388 #t)))))
2c16316e 2389 (inputs
9a067efd 2390 `(("python-numpy" ,python2-numpy)
2c16316e 2391 ("python-bx-python" ,python2-bx-python)
c5372108
RW
2392 ("python-networkx" ,python2-networkx)
2393 ("python-scipy" ,python2-scipy)
9a067efd
RW
2394 ("python-pbcore" ,python2-pbcore)
2395 ("python-h5py" ,python2-h5py)))
2c16316e 2396 (native-inputs
9a067efd
RW
2397 `(("python-cython" ,python2-cython)
2398 ("python-nose" ,python2-nose)
2c16316e
RW
2399 ("python-setuptools" ,python2-setuptools)))
2400 (home-page "https://github.com/PacificBiosciences/cDNA_primer")
2401 (synopsis "Analyze transcriptome data generated with the Iso-Seq protocol")
2402 (description
2403 "pbtranscript-tofu contains scripts to analyze transcriptome data
2404generated using the PacBio Iso-Seq protocol.")
2405 (license license:bsd-3))))
2406
af860475
BW
2407(define-public prodigal
2408 (package
2409 (name "prodigal")
e70f7a23 2410 (version "2.6.3")
af860475
BW
2411 (source (origin
2412 (method url-fetch)
2413 (uri (string-append
2414 "https://github.com/hyattpd/Prodigal/archive/v"
2415 version ".tar.gz"))
2416 (file-name (string-append name "-" version ".tar.gz"))
2417 (sha256
2418 (base32
e70f7a23 2419 "17srxkqd3jc77xk15pfbgg1a9xahqg7337w95mrsia7mpza4l2c9"))))
af860475
BW
2420 (build-system gnu-build-system)
2421 (arguments
2422 `(#:tests? #f ;no check target
2423 #:make-flags (list (string-append "INSTALLDIR="
2424 (assoc-ref %outputs "out")
2425 "/bin"))
2426 #:phases
2427 (modify-phases %standard-phases
2428 (delete 'configure))))
2429 (home-page "http://prodigal.ornl.gov")
2430 (synopsis "Protein-coding gene prediction for Archaea and Bacteria")
2431 (description
2432 "Prodigal runs smoothly on finished genomes, draft genomes, and
2433metagenomes, providing gene predictions in GFF3, Genbank, or Sequin table
2434format. It runs quickly, in an unsupervised fashion, handles gaps, handles
2435partial genes, and identifies translation initiation sites.")
2436 (license license:gpl3+)))
2437
66e3eff1
RW
2438(define-public rsem
2439 (package
2440 (name "rsem")
2441 (version "1.2.20")
2442 (source
2443 (origin
2444 (method url-fetch)
2445 (uri
2446 (string-append "http://deweylab.biostat.wisc.edu/rsem/src/rsem-"
2447 version ".tar.gz"))
2448 (sha256
2449 (base32 "0nzdc0j0hjllhsd5f2xli95dafm3nawskigs140xzvjk67xh0r9q"))
2450 (patches (list (search-patch "rsem-makefile.patch")))
2451 (modules '((guix build utils)))
2452 (snippet
2453 '(begin
2454 ;; remove bundled copy of boost
2455 (delete-file-recursively "boost")
2456 #t))))
2457 (build-system gnu-build-system)
2458 (arguments
2459 `(#:tests? #f ;no "check" target
2460 #:phases
2461 (modify-phases %standard-phases
2462 ;; No "configure" script.
2463 ;; Do not build bundled samtools library.
2464 (replace 'configure
2465 (lambda _
2466 (substitute* "Makefile"
2467 (("^all : sam/libbam.a") "all : "))
2468 #t))
2469 (replace 'install
2470 (lambda* (#:key outputs #:allow-other-keys)
2471 (let* ((out (string-append (assoc-ref outputs "out")))
2472 (bin (string-append out "/bin/"))
2473 (perl (string-append out "/lib/perl5/site_perl")))
2474 (mkdir-p bin)
2475 (mkdir-p perl)
2476 (for-each (lambda (file)
2477 (copy-file file
2478 (string-append bin (basename file))))
2479 (find-files "." "rsem-.*"))
2480 (copy-file "rsem_perl_utils.pm"
2481 (string-append perl "/rsem_perl_utils.pm")))
2482 #t))
2483 (add-after
2484 'install 'wrap-program
2485 (lambda* (#:key outputs #:allow-other-keys)
2486 (let ((out (assoc-ref outputs "out")))
2487 (for-each (lambda (prog)
2488 (wrap-program (string-append out "/bin/" prog)
2489 `("PERL5LIB" ":" prefix
2490 (,(string-append out "/lib/perl5/site_perl")))))
2491 '("rsem-plot-transcript-wiggles"
2492 "rsem-calculate-expression"
2493 "rsem-generate-ngvector"
2494 "rsem-run-ebseq"
2495 "rsem-prepare-reference")))
2496 #t)))))
2497 (inputs
2498 `(("boost" ,boost)
2499 ("ncurses" ,ncurses)
2500 ("r" ,r)
2501 ("perl" ,perl)
2502 ("samtools" ,samtools-0.1)
2503 ("zlib" ,zlib)))
2504 (home-page "http://deweylab.biostat.wisc.edu/rsem/")
2505 (synopsis "Estimate gene expression levels from RNA-Seq data")
2506 (description
2507 "RSEM is a software package for estimating gene and isoform expression
2508levels from RNA-Seq data. The RSEM package provides a user-friendly
2509interface, supports threads for parallel computation of the EM algorithm,
2510single-end and paired-end read data, quality scores, variable-length reads and
2511RSPD estimation. In addition, it provides posterior mean and 95% credibility
2512interval estimates for expression levels. For visualization, it can generate
2513BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.")
2514 (license license:gpl3+)))
2515
8622a072
RW
2516(define-public rseqc
2517 (package
2518 (name "rseqc")
2519 (version "2.6.1")
2520 (source
2521 (origin
2522 (method url-fetch)
2523 (uri
2524 (string-append "mirror://sourceforge/rseqc/"
2525 version "/RSeQC-" version ".tar.gz"))
2526 (sha256
8214b7fb 2527 (base32 "15ly0254yi032qzkdplg00q144qfdsd986gh62829rl5bkxhj330"))
8622a072
RW
2528 (modules '((guix build utils)))
2529 (snippet
2530 '(begin
2531 ;; remove bundled copy of pysam
2532 (delete-file-recursively "lib/pysam")
2533 (substitute* "setup.py"
2534 ;; remove dependency on outdated "distribute" module
2535 (("^from distribute_setup import use_setuptools") "")
2536 (("^use_setuptools\\(\\)") "")
2537 ;; do not use bundled copy of pysam
2538 (("^have_pysam = False") "have_pysam = True"))))))
2539 (build-system python-build-system)
2540 (arguments `(#:python ,python-2))
2541 (inputs
2542 `(("python-cython" ,python2-cython)
2543 ("python-pysam" ,python2-pysam)
2544 ("python-numpy" ,python2-numpy)
2545 ("python-setuptools" ,python2-setuptools)
2546 ("zlib" ,zlib)))
2547 (native-inputs
2548 `(("python-nose" ,python2-nose)))
2549 (home-page "http://rseqc.sourceforge.net/")
2550 (synopsis "RNA-seq quality control package")
2551 (description
2552 "RSeQC provides a number of modules that can comprehensively evaluate
2553high throughput sequence data, especially RNA-seq data. Some basic modules
2554inspect sequence quality, nucleotide composition bias, PCR bias and GC bias,
2555while RNA-seq specific modules evaluate sequencing saturation, mapped reads
2556distribution, coverage uniformity, strand specificity, etc.")
2557 (license license:gpl3+)))
2558
4e10a221
RW
2559(define-public samtools
2560 (package
2561 (name "samtools")
c4473411 2562 (version "1.3")
4e10a221
RW
2563 (source
2564 (origin
2565 (method url-fetch)
2566 (uri
2567 (string-append "mirror://sourceforge/samtools/"
2568 version "/samtools-" version ".tar.bz2"))
2569 (sha256
2570 (base32
c4473411 2571 "03mnf0mhbfwhqlqfslrhfnw68s3g0fs1as354i9a584mqw1l1smy"))))
4e10a221
RW
2572 (build-system gnu-build-system)
2573 (arguments
c4473411 2574 `(#:modules ((ice-9 ftw)
5bdda30b
RW
2575 (ice-9 regex)
2576 (guix build gnu-build-system)
2577 (guix build utils))
c4473411
RW
2578 #:make-flags (list (string-append "prefix=" (assoc-ref %outputs "out")))
2579 #:configure-flags (list "--with-ncurses")
4e10a221
RW
2580 #:phases
2581 (alist-cons-after
c4473411
RW
2582 'unpack 'patch-tests
2583 (lambda _
2584 (substitute* "test/test.pl"
2585 ;; The test script calls out to /bin/bash
2586 (("/bin/bash") (which "bash")))
2587 #t)
41dd7126
RW
2588 (alist-cons-after
2589 'install 'install-library
2590 (lambda* (#:key outputs #:allow-other-keys)
2591 (let ((lib (string-append (assoc-ref outputs "out") "/lib")))
96c46210 2592 (install-file "libbam.a" lib)))
5bdda30b
RW
2593 (alist-cons-after
2594 'install 'install-headers
2595 (lambda* (#:key outputs #:allow-other-keys)
2596 (let ((include (string-append (assoc-ref outputs "out")
2597 "/include/samtools/")))
5bdda30b 2598 (for-each (lambda (file)
96c46210 2599 (install-file file include))
5bdda30b
RW
2600 (scandir "." (lambda (name) (string-match "\\.h$" name))))
2601 #t))
c4473411 2602 %standard-phases)))))
4e10a221
RW
2603 (native-inputs `(("pkg-config" ,pkg-config)))
2604 (inputs `(("ncurses" ,ncurses)
2605 ("perl" ,perl)
2606 ("python" ,python)
2607 ("zlib" ,zlib)))
2608 (home-page "http://samtools.sourceforge.net")
2609 (synopsis "Utilities to efficiently manipulate nucleotide sequence alignments")
2610 (description
2611 "Samtools implements various utilities for post-processing nucleotide
2612sequence alignments in the SAM, BAM, and CRAM formats, including indexing,
2613variant calling (in conjunction with bcftools), and a simple alignment
2614viewer.")
2615 (license license:expat)))
d3517eda 2616
0b84a0aa
RW
2617(define-public samtools-0.1
2618 ;; This is the most recent version of the 0.1 line of samtools. The input
2619 ;; and output formats differ greatly from that used and produced by samtools
2620 ;; 1.x and is still used in many bioinformatics pipelines.
2621 (package (inherit samtools)
2622 (version "0.1.19")
2623 (source
2624 (origin
2625 (method url-fetch)
2626 (uri
2627 (string-append "mirror://sourceforge/samtools/"
2628 version "/samtools-" version ".tar.bz2"))
2629 (sha256
2630 (base32 "1m33xsfwz0s8qi45lylagfllqg7fphf4dr0780rsvw75av9wk06h"))))
2631 (arguments
2309ed68
RW
2632 `(#:tests? #f ;no "check" target
2633 ,@(substitute-keyword-arguments (package-arguments samtools)
2634 ((#:make-flags flags)
2635 `(cons "LIBCURSES=-lncurses" ,flags))
2636 ((#:phases phases)
2637 `(modify-phases ,phases
2638 (replace 'install
2639 (lambda* (#:key outputs #:allow-other-keys)
2640 (let ((bin (string-append
2641 (assoc-ref outputs "out") "/bin")))
2642 (mkdir-p bin)
2643 (copy-file "samtools"
2644 (string-append bin "/samtools")))))
2645 (delete 'patch-tests)
2646 (delete 'configure))))))))
0b84a0aa 2647
fe4c37c2
RW
2648(define-public mosaik
2649 (let ((commit "5c25216d"))
2650 (package
2651 (name "mosaik")
2652 (version "2.2.30")
2653 (source (origin
2654 ;; There are no release tarballs nor tags.
2655 (method git-fetch)
2656 (uri (git-reference
2657 (url "https://github.com/wanpinglee/MOSAIK.git")
2658 (commit commit)))
2659 (file-name (string-append name "-" version))
2660 (sha256
2661 (base32
2662 "17gj3s07cm77r41z92awh0bim7w7q7fbn0sf5nkqmcm1vw052qgw"))))
2663 (build-system gnu-build-system)
2664 (arguments
2665 `(#:tests? #f ; no tests
2666 #:make-flags (list "CC=gcc")
2667 #:phases
2668 (modify-phases %standard-phases
2669 (replace 'configure
2670 (lambda _ (chdir "src") #t))
2671 (replace 'install
2672 (lambda* (#:key outputs #:allow-other-keys)
2673 (let ((bin (string-append (assoc-ref outputs "out")
2674 "/bin")))
2675 (mkdir-p bin)
2676 (copy-recursively "../bin" bin)
2677 #t))))))
2678 (inputs
2679 `(("perl" ,perl)
2680 ("zlib" ,zlib)))
029d9f77 2681 (supported-systems '("x86_64-linux"))
fe4c37c2
RW
2682 (home-page "https://code.google.com/p/mosaik-aligner/")
2683 (synopsis "Map nucleotide sequence reads to reference genomes")
2684 (description
2685 "MOSAIK is a program for mapping second and third-generation sequencing
2686reads to a reference genome. MOSAIK can align reads generated by all the
2687major sequencing technologies, including Illumina, Applied Biosystems SOLiD,
2688Roche 454, Ion Torrent and Pacific BioSciences SMRT.")
2689 ;; MOSAIK is released under the GPLv2+ with the exception of third-party
2690 ;; code released into the public domain:
2691 ;; 1. fastlz by Ariya Hidayat - http://www.fastlz.org/
2692 ;; 2. MD5 implementation - RSA Data Security, RFC 1321
2693 (license (list license:gpl2+ license:public-domain)))))
2694
282c5087
RW
2695(define-public ngs-sdk
2696 (package
2697 (name "ngs-sdk")
d3b39bc2 2698 (version "1.2.3")
282c5087
RW
2699 (source
2700 (origin
2701 (method url-fetch)
2702 (uri
2703 (string-append "https://github.com/ncbi/ngs/archive/"
2704 version ".tar.gz"))
2705 (file-name (string-append name "-" version ".tar.gz"))
2706 (sha256
2707 (base32
d3b39bc2 2708 "15074fdi94c6pjy83hhk22r86kfvzpaz2i07h3rqg9yy6x3w0pk2"))))
282c5087
RW
2709 (build-system gnu-build-system)
2710 (arguments
2711 `(#:parallel-build? #f ; not supported
2712 #:tests? #f ; no "check" target
2713 #:phases
2714 (alist-replace
2715 'configure
2716 (lambda* (#:key outputs #:allow-other-keys)
2717 (let ((out (assoc-ref outputs "out")))
282c5087
RW
2718 ;; The 'configure' script doesn't recognize things like
2719 ;; '--enable-fast-install'.
2720 (zero? (system* "./configure"
2721 (string-append "--build-prefix=" (getcwd) "/build")
2722 (string-append "--prefix=" out)))))
2723 (alist-cons-after
2724 'unpack 'enter-dir
2725 (lambda _ (chdir "ngs-sdk") #t)
2726 %standard-phases))))
2727 (native-inputs `(("perl" ,perl)))
a0dadf0c
AE
2728 ;; According to the test
2729 ;; unless ($MARCH =~ /x86_64/i || $MARCH =~ /i?86/i)
2730 ;; in ngs-sdk/setup/konfigure.perl
ab29be81 2731 (supported-systems '("i686-linux" "x86_64-linux"))
282c5087
RW
2732 (home-page "https://github.com/ncbi/ngs")
2733 (synopsis "API for accessing Next Generation Sequencing data")
2734 (description
2735 "NGS is a domain-specific API for accessing reads, alignments and pileups
2736produced from Next Generation Sequencing. The API itself is independent from
2737any particular back-end implementation, and supports use of multiple back-ends
2738simultaneously.")
2739 (license license:public-domain)))
2740
2651a5e6
RW
2741(define-public ngs-java
2742 (package (inherit ngs-sdk)
2743 (name "ngs-java")
2744 (arguments
2745 `(,@(substitute-keyword-arguments
2746 `(#:modules ((guix build gnu-build-system)
2747 (guix build utils)
2748 (srfi srfi-1)
2749 (srfi srfi-26))
2750 ,@(package-arguments ngs-sdk))
2751 ((#:phases phases)
614a8977
RW
2752 `(modify-phases ,phases
2753 (replace 'enter-dir (lambda _ (chdir "ngs-java") #t)))))))
2651a5e6 2754 (inputs
d2540f80 2755 `(("jdk" ,icedtea "jdk")
2651a5e6
RW
2756 ("ngs-sdk" ,ngs-sdk)))
2757 (synopsis "Java bindings for NGS SDK")))
2758
75dd2424
RW
2759(define-public ncbi-vdb
2760 (package
2761 (name "ncbi-vdb")
fe91666b 2762 (version "2.5.7")
75dd2424
RW
2763 (source
2764 (origin
2765 (method url-fetch)
2766 (uri
2767 (string-append "https://github.com/ncbi/ncbi-vdb/archive/"
2768 version ".tar.gz"))
2769 (file-name (string-append name "-" version ".tar.gz"))
2770 (sha256
2771 (base32
fe91666b 2772 "0hay5hy8ynva3mi5wbn4wmq1q23qwxc3aqzbb86hg3x4f1r73270"))))
75dd2424
RW
2773 (build-system gnu-build-system)
2774 (arguments
2775 `(#:parallel-build? #f ; not supported
2776 #:tests? #f ; no "check" target
2777 #:phases
2778 (alist-replace
2779 'configure
2780 (lambda* (#:key inputs outputs #:allow-other-keys)
2781 (let ((out (assoc-ref outputs "out")))
75dd2424
RW
2782 ;; Override include path for libmagic
2783 (substitute* "setup/package.prl"
2784 (("name => 'magic', Include => '/usr/include'")
2785 (string-append "name=> 'magic', Include => '"
2786 (assoc-ref inputs "libmagic")
2787 "/include" "'")))
2788
2789 ;; Install kdf5 library (needed by sra-tools)
2790 (substitute* "build/Makefile.install"
2791 (("LIBRARIES_TO_INSTALL =")
2792 "LIBRARIES_TO_INSTALL = kdf5.$(VERSION_LIBX) kdf5.$(VERSION_SHLX)"))
2793
2794 ;; The 'configure' script doesn't recognize things like
2795 ;; '--enable-fast-install'.
2796 (zero? (system*
2797 "./configure"
2798 (string-append "--build-prefix=" (getcwd) "/build")
2799 (string-append "--prefix=" (assoc-ref outputs "out"))
2800 (string-append "--debug")
2801 (string-append "--with-xml2-prefix="
2802 (assoc-ref inputs "libxml2"))
2803 (string-append "--with-ngs-sdk-prefix="
2804 (assoc-ref inputs "ngs-sdk"))
2805 (string-append "--with-ngs-java-prefix="
2806 (assoc-ref inputs "ngs-java"))
2807 (string-append "--with-hdf5-prefix="
2808 (assoc-ref inputs "hdf5"))))))
2809 (alist-cons-after
2810 'install 'install-interfaces
132b4c8c
RW
2811 (lambda* (#:key outputs #:allow-other-keys)
2812 ;; Install interface libraries. On i686 the interface libraries
2813 ;; are installed to "linux/gcc/i386", so we need to use the Linux
2814 ;; architecture name ("i386") instead of the target system prefix
2815 ;; ("i686").
75dd2424
RW
2816 (mkdir (string-append (assoc-ref outputs "out") "/ilib"))
2817 (copy-recursively (string-append "build/ncbi-vdb/linux/gcc/"
132b4c8c
RW
2818 ,(system->linux-architecture
2819 (or (%current-target-system)
2820 (%current-system)))
75dd2424
RW
2821 "/rel/ilib")
2822 (string-append (assoc-ref outputs "out")
2823 "/ilib"))
2824 ;; Install interface headers
2825 (copy-recursively "interfaces"
2826 (string-append (assoc-ref outputs "out")
2827 "/include")))
2828 %standard-phases))))
2829 (inputs
2830 `(("libxml2" ,libxml2)
2831 ("ngs-sdk" ,ngs-sdk)
2832 ("ngs-java" ,ngs-java)
2833 ("libmagic" ,file)
2834 ("hdf5" ,hdf5)))
2835 (native-inputs `(("perl" ,perl)))
2836 (home-page "https://github.com/ncbi/ncbi-vdb")
2837 (synopsis "Database engine for genetic information")
2838 (description
2839 "The NCBI-VDB library implements a highly compressed columnar data
2840warehousing engine that is most often used to store genetic information.
2841Databases are stored in a portable image within the file system, and can be
2842accessed/downloaded on demand across HTTP.")
2843 (license license:public-domain)))
2844
cc6ed477
RW
2845(define-public plink
2846 (package
2847 (name "plink")
2848 (version "1.07")
2849 (source
2850 (origin
2851 (method url-fetch)
2852 (uri (string-append
2853 "http://pngu.mgh.harvard.edu/~purcell/plink/dist/plink-"
2854 version "-src.zip"))
2855 (sha256
2856 (base32 "0as8gxm4pjyc8dxmm1sl873rrd7wn5qs0l29nqfnl31x8i467xaa"))
2857 (patches (list (search-patch "plink-1.07-unclobber-i.patch")))))
2858 (build-system gnu-build-system)
2859 (arguments
2860 '(#:tests? #f ;no "check" target
2861 #:make-flags (list (string-append "LIB_LAPACK="
2862 (assoc-ref %build-inputs "lapack")
2863 "/lib/liblapack.so")
2864 "WITH_LAPACK=1"
2865 "FORCE_DYNAMIC=1"
2866 ;; disable phoning home
2867 "WITH_WEBCHECK=")
2868 #:phases
2869 (modify-phases %standard-phases
2870 ;; no "configure" script
2871 (delete 'configure)
2872 (replace 'install
2873 (lambda* (#:key outputs #:allow-other-keys)
2874 (let ((bin (string-append (assoc-ref outputs "out")
2875 "/bin/")))
96c46210 2876 (install-file "plink" bin)
cc6ed477
RW
2877 #t))))))
2878 (inputs
2879 `(("zlib" ,zlib)
2880 ("lapack" ,lapack)))
2881 (native-inputs
2882 `(("unzip" ,unzip)))
2883 (home-page "http://pngu.mgh.harvard.edu/~purcell/plink/")
2884 (synopsis "Whole genome association analysis toolset")
2885 (description
2886 "PLINK is a whole genome association analysis toolset, designed to
2887perform a range of basic, large-scale analyses in a computationally efficient
2888manner. The focus of PLINK is purely on analysis of genotype/phenotype data,
2889so there is no support for steps prior to this (e.g. study design and
2890planning, generating genotype or CNV calls from raw data). Through
2891integration with gPLINK and Haploview, there is some support for the
2892subsequent visualization, annotation and storage of results.")
2893 ;; Code is released under GPLv2, except for fisher.h, which is under
2894 ;; LGPLv2.1+
2895 (license (list license:gpl2 license:lgpl2.1+))))
2896
c6a24d6e
RW
2897(define-public smithlab-cpp
2898 (let ((revision "1")
2899 (commit "728a097"))
2900 (package
2901 (name "smithlab-cpp")
2902 (version (string-append "0." revision "." commit))
2903 (source (origin
2904 (method git-fetch)
2905 (uri (git-reference
2906 (url "https://github.com/smithlabcode/smithlab_cpp.git")
2907 (commit commit)))
2908 (file-name (string-append name "-" version "-checkout"))
2909 (sha256
2910 (base32
2911 "0d476lmj312xk77kr9fzrv7z1bv96yfyx0w7y62ycmnfbx32ll74"))))
2912 (build-system gnu-build-system)
2913 (arguments
2914 `(#:modules ((guix build gnu-build-system)
2915 (guix build utils)
2916 (srfi srfi-26))
2917 #:tests? #f ;no "check" target
2918 #:phases
2919 (modify-phases %standard-phases
2920 (add-after 'unpack 'use-samtools-headers
2921 (lambda _
2922 (substitute* '("SAM.cpp"
2923 "SAM.hpp")
2924 (("sam.h") "samtools/sam.h"))
2925 #t))
2926 (replace 'install
2927 (lambda* (#:key outputs #:allow-other-keys)
2928 (let* ((out (assoc-ref outputs "out"))
2929 (lib (string-append out "/lib"))
2930 (include (string-append out "/include/smithlab-cpp")))
2931 (mkdir-p lib)
2932 (mkdir-p include)
2933 (for-each (cut install-file <> lib)
2934 (find-files "." "\\.o$"))
2935 (for-each (cut install-file <> include)
2936 (find-files "." "\\.hpp$")))
2937 #t))
2938 (delete 'configure))))
2939 (inputs
2940 `(("samtools" ,samtools-0.1)
2941 ("zlib" ,zlib)))
2942 (home-page "https://github.com/smithlabcode/smithlab_cpp")
2943 (synopsis "C++ helper library for functions used in Smith lab projects")
2944 (description
2945 "Smithlab CPP is a C++ library that includes functions used in many of
2946the Smith lab bioinformatics projects, such as a wrapper around Samtools data
2947structures, classes for genomic regions, mapped sequencing reads, etc.")
2948 (license license:gpl3+))))
2949
56e373ef
RW
2950(define-public preseq
2951 (package
2952 (name "preseq")
b49c5a58 2953 (version "2.0")
56e373ef
RW
2954 (source (origin
2955 (method url-fetch)
b49c5a58
RW
2956 (uri (string-append "https://github.com/smithlabcode/"
2957 "preseq/archive/v" version ".tar.gz"))
2958 (file-name (string-append name "-" version ".tar.gz"))
56e373ef 2959 (sha256
b49c5a58 2960 (base32 "08r684l50pnxjpvmhzjgqq56yv9rfw90k8vx0nsrnrzk8mf9hsdq"))
56e373ef
RW
2961 (modules '((guix build utils)))
2962 (snippet
2963 ;; Remove bundled samtools.
b49c5a58 2964 '(delete-file-recursively "samtools"))))
56e373ef
RW
2965 (build-system gnu-build-system)
2966 (arguments
2967 `(#:tests? #f ;no "check" target
2968 #:phases
2969 (modify-phases %standard-phases
56e373ef 2970 (delete 'configure))
b49c5a58
RW
2971 #:make-flags
2972 (list (string-append "PREFIX="
2973 (assoc-ref %outputs "out"))
2974 (string-append "LIBBAM="
2975 (assoc-ref %build-inputs "samtools")
2976 "/lib/libbam.a")
2977 (string-append "SMITHLAB_CPP="
2978 (assoc-ref %build-inputs "smithlab-cpp")
2979 "/lib")
2980 "PROGS=preseq"
2981 "INCLUDEDIRS=$(SMITHLAB_CPP)/../include/smithlab-cpp $(SAMTOOLS_DIR)")))
56e373ef
RW
2982 (inputs
2983 `(("gsl" ,gsl)
2984 ("samtools" ,samtools-0.1)
b49c5a58 2985 ("smithlab-cpp" ,smithlab-cpp)
56e373ef
RW
2986 ("zlib" ,zlib)))
2987 (home-page "http://smithlabresearch.org/software/preseq/")
2988 (synopsis "Program for analyzing library complexity")
2989 (description
2990 "The preseq package is aimed at predicting and estimating the complexity
2991of a genomic sequencing library, equivalent to predicting and estimating the
2992number of redundant reads from a given sequencing depth and how many will be
2993expected from additional sequencing using an initial sequencing experiment.
2994The estimates can then be used to examine the utility of further sequencing,
2995optimize the sequencing depth, or to screen multiple libraries to avoid low
2996complexity samples.")
2997 (license license:gpl3+)))
2998
51c64999
RW
2999(define-public sra-tools
3000 (package
3001 (name "sra-tools")
a78d62f4 3002 (version "2.5.7")
51c64999
RW
3003 (source
3004 (origin
3005 (method url-fetch)
3006 (uri
3007 (string-append "https://github.com/ncbi/sra-tools/archive/"
3008 version ".tar.gz"))
3009 (file-name (string-append name "-" version ".tar.gz"))
3010 (sha256
3011 (base32
a78d62f4 3012 "0q93qg744x787d08qmjmdafki1wkbvkdwynayjnjd454gkd26jl5"))))
51c64999
RW
3013 (build-system gnu-build-system)
3014 (arguments
3015 `(#:parallel-build? #f ; not supported
3016 #:tests? #f ; no "check" target
3017 #:phases
3018 (alist-replace
3019 'configure
3020 (lambda* (#:key inputs outputs #:allow-other-keys)
3021 ;; The build system expects a directory containing the sources and
3022 ;; raw build output of ncbi-vdb, including files that are not
3023 ;; installed. Since we are building against an installed version of
3024 ;; ncbi-vdb, the following modifications are needed.
3025 (substitute* "setup/konfigure.perl"
3026 ;; Make the configure script look for the "ilib" directory of
3027 ;; "ncbi-vdb" without first checking for the existence of a
3028 ;; matching library in its "lib" directory.
3029 (("^ my \\$f = File::Spec->catdir\\(\\$libdir, \\$lib\\);")
3030 "my $f = File::Spec->catdir($ilibdir, $ilib);")
3031 ;; Look for interface libraries in ncbi-vdb's "ilib" directory.
3032 (("my \\$ilibdir = File::Spec->catdir\\(\\$builddir, 'ilib'\\);")
3033 "my $ilibdir = File::Spec->catdir($dir, 'ilib');"))
3034
3035 ;; The 'configure' script doesn't recognize things like
3036 ;; '--enable-fast-install'.
3037 (zero? (system*
3038 "./configure"
3039 (string-append "--build-prefix=" (getcwd) "/build")
3040 (string-append "--prefix=" (assoc-ref outputs "out"))
3041 (string-append "--debug")
3042 (string-append "--with-fuse-prefix="
3043 (assoc-ref inputs "fuse"))
3044 (string-append "--with-magic-prefix="
3045 (assoc-ref inputs "libmagic"))
3046 ;; TODO: building with libxml2 fails with linker errors
3047 ;; (string-append "--with-xml2-prefix="
3048 ;; (assoc-ref inputs "libxml2"))
3049 (string-append "--with-ncbi-vdb-sources="
3050 (assoc-ref inputs "ncbi-vdb"))
3051 (string-append "--with-ncbi-vdb-build="
3052 (assoc-ref inputs "ncbi-vdb"))
3053 (string-append "--with-ngs-sdk-prefix="
3054 (assoc-ref inputs "ngs-sdk"))
3055 (string-append "--with-hdf5-prefix="
3056 (assoc-ref inputs "hdf5")))))
3057 %standard-phases)))
3058 (native-inputs `(("perl" ,perl)))
3059 (inputs
3060 `(("ngs-sdk" ,ngs-sdk)
3061 ("ncbi-vdb" ,ncbi-vdb)
3062 ("libmagic" ,file)
3063 ("fuse" ,fuse)
3064 ("hdf5" ,hdf5)
3065 ("zlib" ,zlib)))
3066 (home-page "http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software")
3067 (synopsis "Tools and libraries for reading and writing sequencing data")
3068 (description
3069 "The SRA Toolkit from NCBI is a collection of tools and libraries for
3070reading of sequencing files from the Sequence Read Archive (SRA) database and
3071writing files into the .sra format.")
3072 (license license:public-domain)))
3073
d3517eda
RW
3074(define-public seqan
3075 (package
3076 (name "seqan")
3077 (version "1.4.2")
3078 (source (origin
3079 (method url-fetch)
3080 (uri (string-append "http://packages.seqan.de/seqan-library/"
3081 "seqan-library-" version ".tar.bz2"))
3082 (sha256
3083 (base32
3084 "05s3wrrwn50f81aklfm65i4a749zag1vr8z03k21xm0pdxy47yvp"))))
3085 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
3086 ;; makes sense to split the outputs.
3087 (outputs '("out" "doc"))
3088 (build-system trivial-build-system)
3089 (arguments
3090 `(#:modules ((guix build utils))
3091 #:builder
3092 (begin
3093 (use-modules (guix build utils))
3094 (let ((tar (assoc-ref %build-inputs "tar"))
3095 (bzip (assoc-ref %build-inputs "bzip2"))
3096 (out (assoc-ref %outputs "out"))
3097 (doc (assoc-ref %outputs "doc")))
3098 (setenv "PATH" (string-append tar "/bin:" bzip "/bin"))
3099 (system* "tar" "xvf" (assoc-ref %build-inputs "source"))
3100 (chdir (string-append "seqan-library-" ,version))
3101 (copy-recursively "include" (string-append out "/include"))
3102 (copy-recursively "share" (string-append doc "/share"))))))
3103 (native-inputs
3104 `(("source" ,source)
3105 ("tar" ,tar)
3106 ("bzip2" ,bzip2)))
3107 (home-page "http://www.seqan.de")
3108 (synopsis "Library for nucleotide sequence analysis")
3109 (description
3110 "SeqAn is a C++ library of efficient algorithms and data structures for
3111the analysis of sequences with the focus on biological data. It contains
3112algorithms and data structures for string representation and their
3113manipulation, online and indexed string search, efficient I/O of
3114bioinformatics file formats, sequence alignment, and more.")
3115 (license license:bsd-3)))
ce7155d5 3116
d708b7a9
BW
3117(define-public seqmagick
3118 (package
3119 (name "seqmagick")
3120 (version "0.6.1")
3121 (source
3122 (origin
3123 (method url-fetch)
3124 (uri (string-append
3125 "https://pypi.python.org/packages/source/s/seqmagick/seqmagick-"
3126 version ".tar.gz"))
3127 (sha256
3128 (base32
3129 "0cgn477n74gsl4qdaakrrhi953kcsd4q3ivk2lr18x74s3g4ma1d"))))
3130 (build-system python-build-system)
3131 (arguments
3132 ;; python2 only, see https://github.com/fhcrc/seqmagick/issues/56
3133 `(#:python ,python-2
3134 #:phases
3135 (modify-phases %standard-phases
3136 ;; Current test in setup.py does not work as of 0.6.1,
3137 ;; so use nose to run tests instead for now. See
3138 ;; https://github.com/fhcrc/seqmagick/issues/55
3139 (replace 'check (lambda _ (zero? (system* "nosetests")))))))
3140 (inputs
3141 `(("python-biopython" ,python2-biopython)))
3142 (native-inputs
3143 `(("python-setuptools" ,python2-setuptools)
3144 ("python-nose" ,python2-nose)))
3145 (home-page "http://github.com/fhcrc/seqmagick")
3146 (synopsis "Tools for converting and modifying sequence files")
3147 (description
3148 "Bioinformaticians often have to convert sequence files between formats
3149and do little manipulations on them, and it's not worth writing scripts for
3150that. Seqmagick is a utility to expose the file format conversion in
3151BioPython in a convenient way. Instead of having a big mess of scripts, there
3152is one that takes arguments.")
3153 (license license:gpl3)))
3154
5f7e17be
BW
3155(define-public snap-aligner
3156 (package
3157 (name "snap-aligner")
3158 (version "1.0beta.18")
3159 (source (origin
3160 (method url-fetch)
3161 (uri (string-append
3162 "https://github.com/amplab/snap/archive/v"
3163 version ".tar.gz"))
3164 (file-name (string-append name "-" version ".tar.gz"))
3165 (sha256
3166 (base32
3167 "1vnsjwv007k1fl1q7d681kbwn6bc66cgw6h16hym6gvyy71qv2ly"))))
3168 (build-system gnu-build-system)
3169 (arguments
3170 '(#:phases
3171 (modify-phases %standard-phases
3172 (delete 'configure)
3173 (replace 'check (lambda _ (zero? (system* "./unit_tests"))))
3174 (replace 'install
3175 (lambda* (#:key outputs #:allow-other-keys)
3176 (let* ((out (assoc-ref outputs "out"))
3177 (bin (string-append out "/bin")))
3178 (mkdir-p bin)
3179 (install-file "snap-aligner" bin)
3180 (install-file "SNAPCommand" bin)
3181 #t))))))
3182 (native-inputs
3183 `(("zlib" ,zlib)))
3184 (home-page "http://snap.cs.berkeley.edu/")
3185 (synopsis "Short read DNA sequence aligner")
3186 (description
3187 "SNAP is a fast and accurate aligner for short DNA reads. It is
3188optimized for modern read lengths of 100 bases or higher, and takes advantage
3189of these reads to align data quickly through a hash-based indexing scheme.")
3190 (license license:asl2.0)))
3191
bcadaf00
BW
3192(define-public sortmerna
3193 (package
3194 (name "sortmerna")
ce294c04 3195 (version "2.1")
bcadaf00
BW
3196 (source
3197 (origin
3198 (method url-fetch)
3199 (uri (string-append
3200 "https://github.com/biocore/sortmerna/archive/"
3201 version ".tar.gz"))
3202 (file-name (string-append name "-" version ".tar.gz"))
3203 (sha256
3204 (base32
ce294c04 3205 "1mc5cf1c7xh0h7xb11vh7gqgzx0qvrfa606cb8ixlfg3f2av58s4"))))
bcadaf00
BW
3206 (build-system gnu-build-system)
3207 (outputs '("out" ;for binaries
3208 "db")) ;for sequence databases
3209 (arguments
3210 `(#:phases
3211 (modify-phases %standard-phases
3212 (replace 'install
3213 (lambda* (#:key outputs #:allow-other-keys)
3214 (let* ((out (assoc-ref outputs "out"))
3215 (bin (string-append out "/bin"))
3216 (db (assoc-ref outputs "db"))
3217 (share
3218 (string-append db "/share/sortmerna/rRNA_databases")))
3219 (install-file "sortmerna" bin)
3220 (install-file "indexdb_rna" bin)
3221 (for-each (lambda (file)
3222 (install-file file share))
3223 (find-files "rRNA_databases" ".*fasta"))
3224 #t))))))
3225 (home-page "http://bioinfo.lifl.fr/RNA/sortmerna")
3226 (synopsis "Biological sequence analysis tool for NGS reads")
3227 (description
3228 "SortMeRNA is a biological sequence analysis tool for filtering, mapping
3229and operational taxonomic unit (OTU) picking of next generation
3230sequencing (NGS) reads. The core algorithm is based on approximate seeds and
3231allows for fast and sensitive analyses of nucleotide sequences. The main
3232application of SortMeRNA is filtering rRNA from metatranscriptomic data.")
3233 (license license:lgpl3)))
3234
ce7155d5
RW
3235(define-public star
3236 (package
3237 (name "star")
3062d750 3238 (version "2.5.1b")
ce7155d5
RW
3239 (source (origin
3240 (method url-fetch)
3062d750
RW
3241 (uri (string-append "https://github.com/alexdobin/STAR/archive/"
3242 version ".tar.gz"))
3243 (file-name (string-append name "-" version ".tar.gz"))
ce7155d5
RW
3244 (sha256
3245 (base32
3062d750 3246 "0wzcfhkg10apnh0y73xlarfa79xxwxdizicbdl11wb48awk44iq4"))
ce7155d5
RW
3247 (modules '((guix build utils)))
3248 (snippet
3062d750
RW
3249 '(begin
3250 (substitute* "source/Makefile"
3251 (("/bin/rm") "rm"))
3252 ;; Remove pre-built binaries and bundled htslib sources.
3253 (delete-file-recursively "bin/MacOSX_x86_64")
3254 (delete-file-recursively "bin/Linux_x86_64")
3255 (delete-file-recursively "source/htslib")
3256 #t))))
ce7155d5
RW
3257 (build-system gnu-build-system)
3258 (arguments
3259 '(#:tests? #f ;no check target
3260 #:make-flags '("STAR")
3261 #:phases
c0266e8d
RW
3262 (modify-phases %standard-phases
3263 (add-after 'unpack 'enter-source-dir
3264 (lambda _ (chdir "source") #t))
3062d750
RW
3265 (add-after 'enter-source-dir 'do-not-use-bundled-htslib
3266 (lambda _
3267 (substitute* "Makefile"
3268 (("(Depend.list: \\$\\(SOURCES\\) parametersDefault\\.xxd) htslib"
3269 _ prefix) prefix))
3270 (substitute* '("BAMfunctions.cpp"
3271 "signalFromBAM.h"
3272 "bam_cat.h"
3273 "bam_cat.c"
3274 "STAR.cpp"
3275 "bamRemoveDuplicates.cpp")
3276 (("#include \"htslib/([^\"]+\\.h)\"" _ header)
3277 (string-append "#include <" header ">")))
3278 (substitute* "IncludeDefine.h"
3279 (("\"htslib/(htslib/[^\"]+.h)\"" _ header)
3280 (string-append "<" header ">")))
3281 #t))
c0266e8d
RW
3282 (replace 'install
3283 (lambda* (#:key outputs #:allow-other-keys)
3284 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
3285 (install-file "STAR" bin))
3286 #t))
3287 (delete 'configure))))
ce7155d5
RW
3288 (native-inputs
3289 `(("vim" ,vim))) ; for xxd
3290 (inputs
3062d750
RW
3291 `(("htslib" ,htslib)
3292 ("zlib" ,zlib)))
ce7155d5
RW
3293 (home-page "https://github.com/alexdobin/STAR")
3294 (synopsis "Universal RNA-seq aligner")
3295 (description
3296 "The Spliced Transcripts Alignment to a Reference (STAR) software is
3297based on a previously undescribed RNA-seq alignment algorithm that uses
3298sequential maximum mappable seed search in uncompressed suffix arrays followed
3299by seed clustering and stitching procedure. In addition to unbiased de novo
3300detection of canonical junctions, STAR can discover non-canonical splices and
3301chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
3302sequences.")
3303 ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
3304 (license license:gpl3+)))
de07c0db 3305
dbf4ed7c
RW
3306(define-public subread
3307 (package
3308 (name "subread")
3309 (version "1.4.6-p2")
3310 (source (origin
3311 (method url-fetch)
3312 (uri (string-append
3313 "mirror://sourceforge/subread/subread-"
3314 version "-source.tar.gz"))
3315 (sha256
3316 (base32
3317 "06sv9mpcsdj6p68y15d6gi70lca3lxmzk0dn61hg0kfsa7rxmsr3"))))
3318 (build-system gnu-build-system)
3319 (arguments
3320 `(#:tests? #f ;no "check" target
104c1986
RW
3321 ;; The CC and CCFLAGS variables are set to contain a lot of x86_64
3322 ;; optimizations by default, so we override these flags such that x86_64
3323 ;; flags are only added when the build target is an x86_64 system.
3324 #:make-flags
3325 (list (let ((system ,(or (%current-target-system)
3326 (%current-system)))
3327 (flags '("-ggdb" "-fomit-frame-pointer"
3328 "-ffast-math" "-funroll-loops"
3329 "-fmessage-length=0"
3330 "-O9" "-Wall" "-DMAKE_FOR_EXON"
3331 "-DMAKE_STANDALONE"
3332 "-DSUBREAD_VERSION=\\\"${SUBREAD_VERSION}\\\""))
3333 (flags64 '("-mmmx" "-msse" "-msse2" "-msse3")))
3334 (if (string-prefix? "x86_64" system)
3335 (string-append "CCFLAGS=" (string-join (append flags flags64)))
3336 (string-append "CCFLAGS=" (string-join flags))))
3337 "-f" "Makefile.Linux"
3338 "CC=gcc ${CCFLAGS}")
dbf4ed7c
RW
3339 #:phases
3340 (alist-cons-after
3341 'unpack 'enter-dir
3342 (lambda _ (chdir "src") #t)
3343 (alist-replace
3344 'install
3345 (lambda* (#:key outputs #:allow-other-keys)
3346 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
3347 (mkdir-p bin)
3348 (copy-recursively "../bin" bin)))
3349 ;; no "configure" script
3350 (alist-delete 'configure %standard-phases)))))
3351 (inputs `(("zlib" ,zlib)))
3352 (home-page "http://bioinf.wehi.edu.au/subread-package/")
3353 (synopsis "Tool kit for processing next-gen sequencing data")
3354 (description
3355 "The subread package contains the following tools: subread aligner, a
3356general-purpose read aligner; subjunc aligner: detecting exon-exon junctions
3357and mapping RNA-seq reads; featureCounts: counting mapped reads for genomic
3358features; exactSNP: a SNP caller that discovers SNPs by testing signals
3359against local background noises.")
3360 (license license:gpl3+)))
3361
d15d981e
RW
3362(define-public stringtie
3363 (package
3364 (name "stringtie")
3365 (version "1.2.1")
3366 (source (origin
3367 (method url-fetch)
3368 (uri (string-append "http://ccb.jhu.edu/software/stringtie/dl/"
3369 "stringtie-" version ".tar.gz"))
3370 (sha256
3371 (base32
3372 "1cqllsc1maq4kh92isi8yadgzbmnf042hlnalpk3y59aph1z3bfz"))
3373 (modules '((guix build utils)))
3374 (snippet
3375 '(begin
3376 (delete-file-recursively "samtools-0.1.18")
3377 #t))))
3378 (build-system gnu-build-system)
3379 (arguments
3380 `(#:tests? #f ;no test suite
3381 #:phases
3382 (modify-phases %standard-phases
3383 ;; no configure script
3384 (delete 'configure)
3385 (add-before 'build 'use-system-samtools
3386 (lambda _
3387 (substitute* "Makefile"
3388 (("stringtie: \\$\\{BAM\\}/libbam\\.a")
3389 "stringtie: "))
3390 (substitute* '("gclib/GBam.h"
3391 "gclib/GBam.cpp")
3392 (("#include \"(bam|sam|kstring).h\"" _ header)
3393 (string-append "#include <samtools/" header ".h>")))
3394 #t))
3395 (replace 'install
3396 (lambda* (#:key outputs #:allow-other-keys)
3397 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
3398 (install-file "stringtie" bin)
3399 #t))))))
3400 (inputs
3401 `(("samtools" ,samtools-0.1)
3402 ("zlib" ,zlib)))
3403 (home-page "http://ccb.jhu.edu/software/stringtie/")
3404 (synopsis "Transcript assembly and quantification for RNA-Seq data")
3405 (description
3406 "StringTie is a fast and efficient assembler of RNA-Seq sequence
3407alignments into potential transcripts. It uses a novel network flow algorithm
3408as well as an optional de novo assembly step to assemble and quantitate
3409full-length transcripts representing multiple splice variants for each gene
3410locus. Its input can include not only the alignments of raw reads used by
3411other transcript assemblers, but also alignments of longer sequences that have
3412been assembled from those reads. To identify differentially expressed genes
3413between experiments, StringTie's output can be processed either by the
3414Cuffdiff or Ballgown programs.")
3415 (license license:artistic2.0)))
3416
de07c0db
RW
3417(define-public vcftools
3418 (package
3419 (name "vcftools")
3420 (version "0.1.12b")
3421 (source (origin
3422 (method url-fetch)
3423 (uri (string-append
3424 "mirror://sourceforge/vcftools/vcftools_"
3425 version ".tar.gz"))
3426 (sha256
3427 (base32
3428 "148al9h7f8g8my2qdnpax51kdd2yjrivlx6frvakf4lz5r8j88wx"))))
3429 (build-system gnu-build-system)
3430 (arguments
3431 `(#:tests? #f ; no "check" target
3432 #:make-flags (list
7c3958e1 3433 "CFLAGS=-O2" ; override "-m64" flag
de07c0db
RW
3434 (string-append "PREFIX=" (assoc-ref %outputs "out"))
3435 (string-append "MANDIR=" (assoc-ref %outputs "out")
3436 "/share/man/man1"))
3437 #:phases
3438 (alist-cons-after
3439 'unpack 'patch-manpage-install
3440 (lambda _
3441 (substitute* "Makefile"
3442 (("cp \\$\\{PREFIX\\}/cpp/vcftools.1") "cp ./cpp/vcftools.1")))
3443 (alist-delete 'configure %standard-phases))))
3444 (inputs
3445 `(("perl" ,perl)
3446 ("zlib" ,zlib)))
3447 (home-page "http://vcftools.sourceforge.net/")
3448 (synopsis "Tools for working with VCF files")
3449 (description
3450 "VCFtools is a program package designed for working with VCF files, such
3451as those generated by the 1000 Genomes Project. The aim of VCFtools is to
3452provide easily accessible methods for working with complex genetic variation
3453data in the form of VCF files.")
3454 ;; The license is declared as LGPLv3 in the README and
3455 ;; at http://vcftools.sourceforge.net/license.html
3456 (license license:lgpl3)))
9c38b540 3457
a2950fa4
BW
3458(define-public vsearch
3459 (package
3460 (name "vsearch")
cf6edaba 3461 (version "1.10.0")
a2950fa4
BW
3462 (source
3463 (origin
3464 (method url-fetch)
3465 (uri (string-append
3466 "https://github.com/torognes/vsearch/archive/v"
3467 version ".tar.gz"))
3468 (file-name (string-append name "-" version ".tar.gz"))
3469 (sha256
3470 (base32
cf6edaba 3471 "1i3bad7gnn2y3a1yfixzshd99xdkjc8w5bxzgifpysc6jiljwvb5"))
a2950fa4
BW
3472 (modules '((guix build utils)))
3473 (snippet
3474 '(begin
3475 ;; Remove bundled cityhash and '-mtune=native'.
3476 (substitute* "src/Makefile.am"
3477 (("^AM_CXXFLAGS=-I\\$\\{srcdir\\}/cityhash \
3478-O3 -mtune=native -Wall -Wsign-compare")
3479 (string-append "AM_CXXFLAGS=-lcityhash"
3480 " -O3 -Wall -Wsign-compare"))
cf6edaba 3481 (("^__top_builddir__bin_vsearch_SOURCES = city.h \\\\")
a2950fa4 3482 "__top_builddir__bin_vsearch_SOURCES = \\")
cf6edaba
BW
3483 (("^city.h \\\\") "\\")
3484 (("^citycrc.h \\\\") "\\")
3485 (("^libcityhash_a.*") "")
3486 (("noinst_LIBRARIES = libcpu_sse2.a libcpu_ssse3.a \
3487libcityhash.a")
3488 "noinst_LIBRARIES = libcpu_sse2.a libcpu_ssse3.a")
3489 (("__top_builddir__bin_vsearch_LDADD = libcpu_ssse3.a \
3490libcpu_sse2.a libcityhash.a")
3491 "__top_builddir__bin_vsearch_LDADD = libcpu_ssse3.a \
3492libcpu_sse2.a -lcityhash"))
a2950fa4 3493 (substitute* "src/vsearch.h"
cf6edaba
BW
3494 (("^\\#include \"city.h\"") "#include <city.h>")
3495 (("^\\#include \"citycrc.h\"") "#include <citycrc.h>"))
3496 (delete-file "src/city.h")
3497 (delete-file "src/citycrc.h")
3498 (delete-file "src/city.cc")
a2950fa4
BW
3499 #t))))
3500 (build-system gnu-build-system)
3501 (arguments
3502 `(#:phases
3503 (modify-phases %standard-phases
3504 (add-before 'configure 'autogen
3505 (lambda _ (zero? (system* "autoreconf" "-vif")))))))
3506 (inputs
3507 `(("zlib" ,zlib)
3508 ("bzip2" ,bzip2)
3509 ("cityhash" ,cityhash)))
3510 (native-inputs
3511 `(("autoconf" ,autoconf)
3512 ("automake" ,automake)))
3513 (synopsis "Sequence search tools for metagenomics")
3514 (description
3515 "VSEARCH supports DNA sequence searching, clustering, chimera detection,
3516dereplication, pairwise alignment, shuffling, subsampling, sorting and
3517masking. The tool takes advantage of parallelism in the form of SIMD
3518vectorization as well as multiple threads to perform accurate alignments at
3519high speed. VSEARCH uses an optimal global aligner (full dynamic programming
3520Needleman-Wunsch).")
3521 (home-page "https://github.com/torognes/vsearch")
6f04e515
BW
3522 ;; vsearch uses non-portable SSE intrinsics so building fails on other
3523 ;; platforms.
3524 (supported-systems '("x86_64-linux"))
a2950fa4
BW
3525 ;; Dual licensed; also includes public domain source.
3526 (license (list license:gpl3 license:bsd-2))))
3527
9c38b540
PP
3528(define-public bio-locus
3529 (package
3530 (name "bio-locus")
3531 (version "0.0.7")
3532 (source
3533 (origin
3534 (method url-fetch)
3535 (uri (rubygems-uri "bio-locus" version))
3536 (sha256
3537 (base32
3538 "02vmrxyimkj9sahsp4zhfhnmbvz6dbbqz1y01vglf8cbwvkajfl0"))))
3539 (build-system ruby-build-system)
3540 (native-inputs
3541 `(("ruby-rspec" ,ruby-rspec)))
3542 (synopsis "Tool for fast querying of genome locations")
3543 (description
3544 "Bio-locus is a tabix-like tool for fast querying of genome
3545locations. Many file formats in bioinformatics contain records that
3546start with a chromosome name and a position for a SNP, or a start-end
3547position for indels. Bio-locus allows users to store this chr+pos or
3548chr+pos+alt information in a database.")
3549 (home-page "https://github.com/pjotrp/bio-locus")
3550 (license license:expat)))
edb15985 3551
b2bddb07
PP
3552(define-public bio-blastxmlparser
3553 (package
3554 (name "bio-blastxmlparser")
3555 (version "2.0.4")
3556 (source (origin
3557 (method url-fetch)
3558 (uri (rubygems-uri "bio-blastxmlparser" version))
3559 (sha256
3560 (base32
3561 "1wf4qygcmdjgcqm6flmvsagfr1gs9lf63mj32qv3z1f481zc5692"))))
3562 (build-system ruby-build-system)
3563 (propagated-inputs
3564 `(("ruby-bio-logger" ,ruby-bio-logger)
3565 ("ruby-nokogiri" ,ruby-nokogiri)))
3566 (inputs
3567 `(("ruby-rspec" ,ruby-rspec)))
3568 (synopsis "Fast big data BLAST XML parser and library")
3569 (description
3570 "Very fast parallel big-data BLAST XML file parser which can be used as
3571command line utility. Use blastxmlparser to: Parse BLAST XML; filter output;
3572generate FASTA, JSON, YAML, RDF, JSON-LD, HTML, CSV, tabular output etc.")
3573 (home-page "http://github.com/pjotrp/blastxmlparser")
3574 (license license:expat)))
3575
edb15985
PP
3576(define-public bioruby
3577 (package
3578 (name "bioruby")
3579 (version "1.5.0")
3580 (source
3581 (origin
3582 (method url-fetch)
3583 (uri (rubygems-uri "bio" version))
3584 (sha256
3585 (base32
3586 "01k2fyjl5fpx4zn8g6gqiqvsg2j1fgixrs9p03vzxckynxdq3wmc"))))
3587 (build-system ruby-build-system)
3588 (propagated-inputs
3589 `(("ruby-libxml" ,ruby-libxml)))
3590 (native-inputs
3591 `(("which" ,which))) ; required for test phase
3592 (arguments
3593 `(#:phases
3594 (modify-phases %standard-phases
3595 (add-before 'build 'patch-test-command
3596 (lambda _
3597 (substitute* '("test/functional/bio/test_command.rb")
3598 (("/bin/sh") (which "sh")))
3599 (substitute* '("test/functional/bio/test_command.rb")
3600 (("/bin/ls") (which "ls")))
3601 (substitute* '("test/functional/bio/test_command.rb")
3602 (("which") (which "which")))
3603 (substitute* '("test/functional/bio/test_command.rb",
3604 "test/data/command/echoarg2.sh")
3605 (("/bin/echo") (which "echo")))
3606 #t)))))
3607 (synopsis "Ruby library, shell and utilities for bioinformatics")
3608 (description "BioRuby comes with a comprehensive set of Ruby development
3609tools and libraries for bioinformatics and molecular biology. BioRuby has
3610components for sequence analysis, pathway analysis, protein modelling and
3611phylogenetic analysis; it supports many widely used data formats and provides
3612easy access to databases, external programs and public web services, including
3613BLAST, KEGG, GenBank, MEDLINE and GO.")
3614 (home-page "http://bioruby.org/")
3615 ;; Code is released under Ruby license, except for setup
3616 ;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+)
3617 (license (list license:ruby license:lgpl2.1+ license:gpl2+ ))))
a5002ae7 3618
9fba89e8
RW
3619(define-public r-acsnminer
3620 (package
3621 (name "r-acsnminer")
3622 (version "0.15.11")
3623 (source (origin
3624 (method url-fetch)
3625 (uri (cran-uri "ACSNMineR" version))
3626 (sha256
3627 (base32
3628 "1dl4drhjyazwm9wxlm8yfppwvvj4h6jxwmz8kfw5bxpb3jdnsqvy"))))
3629 (properties `((upstream-name . "ACSNMineR")))
3630 (build-system r-build-system)
3631 (propagated-inputs
3632 `(("r-ggplot2" ,r-ggplot2)
3633 ("r-gridextra" ,r-gridextra)))
3634 (home-page "http://cran.r-project.org/web/packages/ACSNMineR")
3635 (synopsis "Gene enrichment analysis")
3636 (description
3637 "This package provides tools to compute and represent gene set enrichment
3638or depletion from your data based on pre-saved maps from the @dfn{Atlas of
3639Cancer Signalling Networks} (ACSN) or user imported maps. The gene set
3640enrichment can be run with hypergeometric test or Fisher exact test, and can
3641use multiple corrections. Visualization of data can be done either by
3642barplots or heatmaps.")
3643 (license license:gpl2+)))
3644
d29b25c4
RW
3645(define-public r-biocgenerics
3646 (package
3647 (name "r-biocgenerics")
3648 (version "0.16.1")
3649 (source (origin
3650 (method url-fetch)
3651 (uri (bioconductor-uri "BiocGenerics" version))
3652 (sha256
3653 (base32
3654 "0f16ryy5f012hvksrwlmm33bcl7lw97i2jvhbnwfwl03j4w7nhc1"))))
3655 (properties
3656 `((upstream-name . "BiocGenerics")
3657 (r-repository . bioconductor)))
3658 (build-system r-build-system)
3659 (home-page "http://bioconductor.org/packages/BiocGenerics")
3660 (synopsis "S4 generic functions for Bioconductor")
3661 (description
3662 "This package provides S4 generic functions needed by many Bioconductor
3663packages.")
3664 (license license:artistic2.0)))
3665
7485129e
RW
3666(define-public r-s4vectors
3667 (package
3668 (name "r-s4vectors")
3669 (version "0.8.5")
3670 (source (origin
3671 (method url-fetch)
3672 (uri (bioconductor-uri "S4Vectors" version))
3673 (sha256
3674 (base32
3675 "10f4jxwlwsiy7zhb3kgp6anid0d7wkvrrljl80r3nhx38yr24l5k"))))
3676 (properties
3677 `((upstream-name . "S4Vectors")
3678 (r-repository . bioconductor)))
3679 (build-system r-build-system)
3680 (propagated-inputs
3681 `(("r-biocgenerics" ,r-biocgenerics)))
3682 (home-page "http://bioconductor.org/packages/S4Vectors")
3683 (synopsis "S4 implementation of vectors and lists")
3684 (description
3685 "The S4Vectors package defines the @code{Vector} and @code{List} virtual
3686classes and a set of generic functions that extend the semantic of ordinary
3687vectors and lists in R. Package developers can easily implement vector-like
3688or list-like objects as concrete subclasses of @code{Vector} or @code{List}.
3689In addition, a few low-level concrete subclasses of general interest (e.g.
3690@code{DataFrame}, @code{Rle}, and @code{Hits}) are implemented in the
3691S4Vectors package itself.")
3692 (license license:artistic2.0)))
3693
78addcb0
RW
3694(define-public r-iranges
3695 (package
3696 (name "r-iranges")
3697 (version "2.4.6")
3698 (source (origin
3699 (method url-fetch)
3700 (uri (bioconductor-uri "IRanges" version))
3701 (sha256
3702 (base32
3703 "00x0266sys1fc5ipa639y84p6m6mgspk2xb099vcwmd3w4hypj9d"))))
3704 (properties
3705 `((upstream-name . "IRanges")
3706 (r-repository . bioconductor)))
3707 (build-system r-build-system)
3708 (propagated-inputs
3709 `(("r-biocgenerics" ,r-biocgenerics)
3710 ("r-s4vectors" ,r-s4vectors)))
3711 (home-page "http://bioconductor.org/packages/IRanges")
3712 (synopsis "Infrastructure for manipulating intervals on sequences")
3713 (description
3714 "This package provides efficient low-level and highly reusable S4 classes
3715for storing ranges of integers, RLE vectors (Run-Length Encoding), and, more
3716generally, data that can be organized sequentially (formally defined as
3717@code{Vector} objects), as well as views on these @code{Vector} objects.
3718Efficient list-like classes are also provided for storing big collections of
3719instances of the basic classes. All classes in the package use consistent
3720naming and share the same rich and consistent \"Vector API\" as much as
3721possible.")
3722 (license license:artistic2.0)))
3723
bf7764b7
RW
3724(define-public r-genomeinfodb
3725 (package
3726 (name "r-genomeinfodb")
3727 (version "1.6.1")
3728 (source (origin
3729 (method url-fetch)
3730 (uri (bioconductor-uri "GenomeInfoDb" version))
3731 (sha256
3732 (base32
3733 "1j2n1v1mrw1fxn7cyffz112pm76wd6gy9q9qwlsfv3brbsqbvdbf"))))
3734 (properties
3735 `((upstream-name . "GenomeInfoDb")
3736 (r-repository . bioconductor)))
3737 (build-system r-build-system)
3738 (propagated-inputs
3739 `(("r-biocgenerics" ,r-biocgenerics)
3740 ("r-iranges" ,r-iranges)
3741 ("r-s4vectors" ,r-s4vectors)))
3742 (home-page "http://bioconductor.org/packages/GenomeInfoDb")
3743 (synopsis "Utilities for manipulating chromosome identifiers")
3744 (description
3745 "This package contains data and functions that define and allow
3746translation between different chromosome sequence naming conventions (e.g.,
3747\"chr1\" versus \"1\"), including a function that attempts to place sequence
3748names in their natural, rather than lexicographic, order.")
3749 (license license:artistic2.0)))
3750
0e7d5560
RW
3751(define-public r-xvector
3752 (package
3753 (name "r-xvector")
3754 (version "0.10.0")
3755 (source (origin
3756 (method url-fetch)
3757 (uri (bioconductor-uri "XVector" version))
3758 (sha256
3759 (base32
3760 "0havwyr6xqk7w0rmbwfj9jq1djz7wzdz7w39adhklwzwz9l4ih3a"))))
3761 (properties
3762 `((upstream-name . "XVector")
3763 (r-repository . bioconductor)))
3764 (build-system r-build-system)
3765 (arguments
3766 `(#:phases
3767 (modify-phases %standard-phases
3768 (add-after 'unpack 'use-system-zlib
3769 (lambda _
3770 (substitute* "DESCRIPTION"
3771 (("zlibbioc, ") ""))
3772 (substitute* "NAMESPACE"
3773 (("import\\(zlibbioc\\)") ""))
3774 #t)))))
3775 (inputs
3776 `(("zlib" ,zlib)))
3777 (propagated-inputs
3778 `(("r-biocgenerics" ,r-biocgenerics)
3779 ("r-iranges" ,r-iranges)
3780 ("r-s4vectors" ,r-s4vectors)))
3781 (home-page "http://bioconductor.org/packages/XVector")
3782 (synopsis "Representation and manpulation of external sequences")
3783 (description
3784 "This package provides memory efficient S4 classes for storing sequences
3785\"externally\" (behind an R external pointer, or on disk).")
3786 (license license:artistic2.0)))
3787
e2cd1d0f
RW
3788(define-public r-genomicranges
3789 (package
3790 (name "r-genomicranges")
3791 (version "1.22.2")
3792 (source (origin
3793 (method url-fetch)
3794 (uri (bioconductor-uri "GenomicRanges" version))
3795 (sha256
3796 (base32
3797 "1jffvcs0jsi7q4l3pvjj6r73vll80csgkljvhqp0g2ixc43jjng9"))))
3798 (properties
3799 `((upstream-name . "GenomicRanges")
3800 (r-repository . bioconductor)))
3801 (build-system r-build-system)
3802 (propagated-inputs
3803 `(("r-biocgenerics" ,r-biocgenerics)
3804 ("r-genomeinfodb" ,r-genomeinfodb)
3805 ("r-xvector" ,r-xvector)))
3806 (home-page "http://bioconductor.org/packages/GenomicRanges")
3807 (synopsis "Representation and manipulation of genomic intervals")
3808 (description
3809 "This package provides tools to efficiently represent and manipulate
3810genomic annotations and alignments is playing a central role when it comes to
3811analyzing high-throughput sequencing data (a.k.a. NGS data). The
3812GenomicRanges package defines general purpose containers for storing and
3813manipulating genomic intervals and variables defined along a genome.")
3814 (license license:artistic2.0)))
3815
555e3399
RW
3816(define-public r-biobase
3817 (package
3818 (name "r-biobase")
3819 (version "2.30.0")
3820 (source (origin
3821 (method url-fetch)
3822 (uri (bioconductor-uri "Biobase" version))
3823 (sha256
3824 (base32
3825 "1qasjpq3kw8h7qw8cin3bjvv1256hqr1mm24fq3v0ymxzlb66szi"))))
3826 (properties
3827 `((upstream-name . "Biobase")))
3828 (build-system r-build-system)
3829 (propagated-inputs
3830 `(("r-biocgenerics" ,r-biocgenerics)))
3831 (home-page "http://bioconductor.org/packages/Biobase")
3832 (synopsis "Base functions for Bioconductor")
3833 (description
3834 "This package provides functions that are needed by many other packages
3835on Bioconductor or which replace R functions.")
3836 (license license:artistic2.0)))
3837
8b7bce74
RW
3838(define-public r-annotationdbi
3839 (package
3840 (name "r-annotationdbi")
258c0a36 3841 (version "1.32.3")
8b7bce74
RW
3842 (source (origin
3843 (method url-fetch)
3844 (uri (bioconductor-uri "AnnotationDbi" version))
3845 (sha256
3846 (base32
258c0a36 3847 "1v6x62hgys5827yg2xayjrd9xawbayzm6wy0q4vxh1s6yxc9bklj"))))
8b7bce74
RW
3848 (properties
3849 `((upstream-name . "AnnotationDbi")))
3850 (build-system r-build-system)
3851 (propagated-inputs
3852 `(("r-biobase" ,r-biobase)
3853 ("r-biocgenerics" ,r-biocgenerics)
3854 ("r-dbi" ,r-dbi)
3855 ("r-iranges" ,r-iranges)
3856 ("r-rsqlite" ,r-rsqlite)
3857 ("r-s4vectors" ,r-s4vectors)))
3858 (home-page "http://bioconductor.org/packages/AnnotationDbi")
3859 (synopsis "Annotation database interface")
3860 (description
3861 "This package provides user interface and database connection code for
3862annotation data packages using SQLite data storage.")
3863 (license license:artistic2.0)))
3864
c465fa72
RW
3865(define-public r-biomart
3866 (package
3867 (name "r-biomart")
3868 (version "2.26.1")
3869 (source (origin
3870 (method url-fetch)
3871 (uri (bioconductor-uri "biomaRt" version))
3872 (sha256
3873 (base32
3874 "1s709055abj2gd35g6nnk5d2ai5ii09iir270l2xika6pi62gj3f"))))
3875 (properties
3876 `((upstream-name . "biomaRt")))
3877 (build-system r-build-system)
3878 (propagated-inputs
3879 `(("r-annotationdbi" ,r-annotationdbi)
3880 ("r-rcurl" ,r-rcurl)
3881 ("r-xml" ,r-xml)))
3882 (home-page "http://bioconductor.org/packages/biomaRt")
3883 (synopsis "Interface to BioMart databases")
3884 (description
3885 "biomaRt provides an interface to a growing collection of databases
3886implementing the @url{BioMart software suite, http://www.biomart.org}. The
3887package enables retrieval of large amounts of data in a uniform way without
3888the need to know the underlying database schemas or write complex SQL queries.
3889Examples of BioMart databases are Ensembl, COSMIC, Uniprot, HGNC, Gramene,
3890Wormbase and dbSNP mapped to Ensembl. These major databases give biomaRt
3891users direct access to a diverse set of data and enable a wide range of
3892powerful online queries from gene annotation to database mining.")
3893 (license license:artistic2.0)))
3894
e91d362e
RW
3895(define-public r-biocparallel
3896 (package
3897 (name "r-biocparallel")
3898 (version "1.4.3")
3899 (source (origin
3900 (method url-fetch)
3901 (uri (bioconductor-uri "BiocParallel" version))
3902 (sha256
3903 (base32
3904 "1f5mndx66vampcsq0n66afg6x851crl0h3nyv2nyp9bsgzj9cdzq"))))
3905 (properties
3906 `((upstream-name . "BiocParallel")))
3907 (build-system r-build-system)
3908 (propagated-inputs
3909 `(("r-futile-logger" ,r-futile-logger)
3910 ("r-snow" ,r-snow)))
3911 (home-page "http://bioconductor.org/packages/BiocParallel")
3912 (synopsis "Bioconductor facilities for parallel evaluation")
3913 (description
3914 "This package provides modified versions and novel implementation of
3915functions for parallel evaluation, tailored to use with Bioconductor
3916objects.")
3917 (license (list license:gpl2+ license:gpl3+))))
3918
bf159353
RW
3919(define-public r-biostrings
3920 (package
3921 (name "r-biostrings")
3922 (version "2.38.2")
3923 (source (origin
3924 (method url-fetch)
3925 (uri (bioconductor-uri "Biostrings" version))
3926 (sha256
3927 (base32
3928 "1afp9szc8ci6jn0m3hrrqh6df65cpw3v1dcnl6xir3d3m3lwwmk4"))))
3929 (properties
3930 `((upstream-name . "Biostrings")))
3931 (build-system r-build-system)
3932 (propagated-inputs
3933 `(("r-biocgenerics" ,r-biocgenerics)
3934 ("r-iranges" ,r-iranges)
3935 ("r-s4vectors" ,r-s4vectors)
3936 ("r-xvector" ,r-xvector)))
3937 (home-page "http://bioconductor.org/packages/Biostrings")
3938 (synopsis "String objects and algorithms for biological sequences")
3939 (description
3940 "This package provides memory efficient string containers, string
3941matching algorithms, and other utilities, for fast manipulation of large
3942biological sequences or sets of sequences.")
3943 (license license:artistic2.0)))
3944
f8d74f70
RW
3945(define-public r-rsamtools
3946 (package
3947 (name "r-rsamtools")
3948 (version "1.22.0")
3949 (source (origin
3950 (method url-fetch)
3951 (uri (bioconductor-uri "Rsamtools" version))
3952 (sha256
3953 (base32
3954 "1yc3nzzms3igjwr4l9yd3wdac95glcs08b4cfp7disyly0wcskjd"))))
3955 (properties
3956 `((upstream-name . "Rsamtools")))
3957 (build-system r-build-system)
3958 (arguments
3959 `(#:phases
3960 (modify-phases %standard-phases
3961 (add-after 'unpack 'use-system-zlib
3962 (lambda _
3963 (substitute* "DESCRIPTION"
3964 (("zlibbioc, ") ""))
3965 (substitute* "NAMESPACE"
3966 (("import\\(zlibbioc\\)") ""))
3967 #t)))))
3968 (inputs
3969 `(("zlib" ,zlib)))
3970 (propagated-inputs
3971 `(("r-biocgenerics" ,r-biocgenerics)
3972 ("r-biocparallel" ,r-biocparallel)
3973 ("r-biostrings" ,r-biostrings)
3974 ("r-bitops" ,r-bitops)
3975 ("r-genomeinfodb" ,r-genomeinfodb)
3976 ("r-genomicranges" ,r-genomicranges)
3977 ("r-iranges" ,r-iranges)
3978 ("r-s4vectors" ,r-s4vectors)
3979 ("r-xvector" ,r-xvector)))
3980 (home-page "http://bioconductor.org/packages/release/bioc/html/Rsamtools.html")
3981 (synopsis "Interface to samtools, bcftools, and tabix")
3982 (description
3983 "This package provides an interface to the 'samtools', 'bcftools', and
3984'tabix' utilities for manipulating SAM (Sequence Alignment / Map), FASTA,
3985binary variant call (BCF) and compressed indexed tab-delimited (tabix)
3986files.")
3987 (license license:expat)))
3988
6e76dda2
RW
3989(define-public r-summarizedexperiment
3990 (package
3991 (name "r-summarizedexperiment")
3992 (version "1.0.1")
3993 (source (origin
3994 (method url-fetch)
3995 (uri (bioconductor-uri "SummarizedExperiment" version))
3996 (sha256
3997 (base32
3998 "0w1dwp99p6i7sc3cn0ir3dr8ksgxwjf16675h5i8n6gbv4rl9lz6"))))
3999 (properties
4000 `((upstream-name . "SummarizedExperiment")))
4001 (build-system r-build-system)
4002 (propagated-inputs
4003 `(("r-biobase" ,r-biobase)
4004 ("r-biocgenerics" ,r-biocgenerics)
4005 ("r-genomeinfodb" ,r-genomeinfodb)
4006 ("r-genomicranges" ,r-genomicranges)
4007 ("r-iranges" ,r-iranges)
4008 ("r-s4vectors" ,r-s4vectors)))
4009 (home-page "http://bioconductor.org/packages/SummarizedExperiment")
4010 (synopsis "Container for representing genomic ranges by sample")
4011 (description
4012 "The SummarizedExperiment container contains one or more assays, each
4013represented by a matrix-like object of numeric or other mode. The rows
4014typically represent genomic ranges of interest and the columns represent
4015samples.")
4016 (license license:artistic2.0)))
4017
d8a828af
RW
4018(define-public r-genomicalignments
4019 (package
4020 (name "r-genomicalignments")
4021 (version "1.6.1")
4022 (source (origin
4023 (method url-fetch)
4024 (uri (bioconductor-uri "GenomicAlignments" version))
4025 (sha256
4026 (base32
4027 "03pxzkmwcpl0d7a09ahan0nllfv7qw2i7w361w6af2s4n3xwrniz"))))
4028 (properties
4029 `((upstream-name . "GenomicAlignments")))
4030 (build-system r-build-system)
4031 (propagated-inputs
4032 `(("r-biocgenerics" ,r-biocgenerics)
4033 ("r-biocparallel" ,r-biocparallel)
4034 ("r-biostrings" ,r-biostrings)
4035 ("r-genomeinfodb" ,r-genomeinfodb)
4036 ("r-genomicranges" ,r-genomicranges)
4037 ("r-iranges" ,r-iranges)
4038 ("r-rsamtools" ,r-rsamtools)
4039 ("r-s4vectors" ,r-s4vectors)
4040 ("r-summarizedexperiment" ,r-summarizedexperiment)))
4041 (home-page "http://bioconductor.org/packages/GenomicAlignments")
4042 (synopsis "Representation and manipulation of short genomic alignments")
4043 (description
4044 "This package provides efficient containers for storing and manipulating
4045short genomic alignments (typically obtained by aligning short reads to a
4046reference genome). This includes read counting, computing the coverage,
4047junction detection, and working with the nucleotide content of the
4048alignments.")
4049 (license license:artistic2.0)))
4050
317755ff
RW
4051(define-public r-rtracklayer
4052 (package
4053 (name "r-rtracklayer")
4054 (version "1.30.1")
4055 (source (origin
4056 (method url-fetch)
4057 (uri (bioconductor-uri "rtracklayer" version))
4058 (sha256
4059 (base32
4060 "1if31hg56islx5vwydpgs5gkyas26kyvv2ljv1c7jikpm62w14qv"))))
4061 (build-system r-build-system)
4062 (arguments
4063 `(#:phases
4064 (modify-phases %standard-phases
4065 (add-after 'unpack 'use-system-zlib
4066 (lambda _
4067 (substitute* "DESCRIPTION"
4068 (("zlibbioc, ") ""))
4069 (substitute* "NAMESPACE"
4070 (("import\\(zlibbioc\\)") ""))
4071 #t)))))
4072 (inputs
4073 `(("zlib" ,zlib)))
4074 (propagated-inputs
4075 `(("r-biocgenerics" ,r-biocgenerics)
4076 ("r-biostrings" ,r-biostrings)
4077 ("r-genomeinfodb" ,r-genomeinfodb)
4078 ("r-genomicalignments" ,r-genomicalignments)
4079 ("r-genomicranges" ,r-genomicranges)
4080 ("r-iranges" ,r-iranges)
4081 ("r-rcurl" ,r-rcurl)
4082 ("r-rsamtools" ,r-rsamtools)
4083 ("r-s4vectors" ,r-s4vectors)
4084 ("r-xml" ,r-xml)
4085 ("r-xvector" ,r-xvector)))
4086 (home-page "http://bioconductor.org/packages/rtracklayer")
4087 (synopsis "R interface to genome browsers and their annotation tracks")
4088 (description
4089 "rtracklayer is an extensible framework for interacting with multiple
4090genome browsers (currently UCSC built-in) and manipulating annotation tracks
4091in various formats (currently GFF, BED, bedGraph, BED15, WIG, BigWig and 2bit
4092built-in). The user may export/import tracks to/from the supported browsers,
4093as well as query and modify the browser state, such as the current viewport.")
4094 (license license:artistic2.0)))
4095
2fd7c049
RW
4096(define-public r-genomicfeatures
4097 (package
4098 (name "r-genomicfeatures")
4099 (version "1.22.7")
4100 (source (origin
4101 (method url-fetch)
4102 (uri (bioconductor-uri "GenomicFeatures" version))
4103 (sha256
4104 (base32
4105 "1jb4s49ar5j9qslpd3kfdg2wrl4q7ciysd55h9a7zvspymxcngq8"))))
4106 (properties
4107 `((upstream-name . "GenomicFeatures")))
4108 (build-system r-build-system)
4109 (propagated-inputs
4110 `(("r-annotationdbi" ,r-annotationdbi)
4111 ("r-biobase" ,r-biobase)
4112 ("r-biocgenerics" ,r-biocgenerics)
4113 ("r-biomart" ,r-biomart)
4114 ("r-biostrings" ,r-biostrings)
4115 ("r-dbi" ,r-dbi)
4116 ("r-genomeinfodb" ,r-genomeinfodb)
4117 ("r-genomicranges" ,r-genomicranges)
4118 ("r-iranges" ,r-iranges)
4119 ("r-rcurl" ,r-rcurl)
4120 ("r-rsqlite" ,r-rsqlite)
4121 ("r-rtracklayer" ,r-rtracklayer)
4122 ("r-s4vectors" ,r-s4vectors)
4123 ("r-xvector" ,r-xvector)))
4124 (home-page "http://bioconductor.org/packages/GenomicFeatures")
4125 (synopsis "Tools for working with transcript centric annotations")
4126 (description
4127 "This package provides a set of tools and methods for making and
4128manipulating transcript centric annotations. With these tools the user can
4129easily download the genomic locations of the transcripts, exons and cds of a
4130given organism, from either the UCSC Genome Browser or a BioMart
4131database (more sources will be supported in the future). This information is
4132then stored in a local database that keeps track of the relationship between
4133transcripts, exons, cds and genes. Flexible methods are provided for
4134extracting the desired features in a convenient format.")
4135 (license license:artistic2.0)))
4136
fb25d880
RW
4137(define-public r-go-db
4138 (package
4139 (name "r-go-db")
4140 (version "3.2.2")
4141 (source (origin
4142 (method url-fetch)
4143 (uri (bioconductor-uri "GO.db" version))
4144 (sha256
4145 (base32
4146 "00gariag9ampz82dh0xllrc26r85d7vdcwc0vca5zdy147rwxr7f"))))
4147 (properties
4148 `((upstream-name . "GO.db")))
4149 (build-system r-build-system)
3141b83d
RW
4150 (propagated-inputs
4151 `(("r-annotationdbi" ,r-annotationdbi)))
fb25d880
RW
4152 (home-page "http://bioconductor.org/packages/GO.db")
4153 (synopsis "Annotation maps describing the entire Gene Ontology")
4154 (description
4155 "The purpose of this GO.db annotation package is to provide detailed
4156information about the latest version of the Gene Ontologies.")
4157 (license license:artistic2.0)))
4158
d1dbde6a
RW
4159(define-public r-graph
4160 (package
4161 (name "r-graph")
4162 (version "1.48.0")
4163 (source (origin
4164 (method url-fetch)
4165 (uri (bioconductor-uri "graph" version))
4166 (sha256
4167 (base32
4168 "16w75rji3kv24gfv44w66y1a2y75ax26rl470y3ypna0ndc3rrcd"))))
4169 (build-system r-build-system)
4170 (propagated-inputs
4171 `(("r-biocgenerics" ,r-biocgenerics)))
4172 (home-page "http://bioconductor.org/packages/graph")
4173 (synopsis "Handle graph data structures in R")
4174 (description
4175 "This package implements some simple graph handling capabilities for R.")
4176 (license license:artistic2.0)))
4177
d547ce5e
RW
4178(define-public r-topgo
4179 (package
4180 (name "r-topgo")
4181 (version "2.22.0")
4182 (source (origin
4183 (method url-fetch)
4184 (uri (bioconductor-uri "topGO" version))
4185 (sha256
4186 (base32
4187 "029j9nb39b8l9xlzsp83pmjr8ap247aia387yzaa1yyw8klapdaf"))))
4188 (properties
4189 `((upstream-name . "topGO")))
4190 (build-system r-build-system)
4191 (propagated-inputs
4192 `(("r-annotationdbi" ,r-annotationdbi)
4193 ("r-biobase" ,r-biobase)
4194 ("r-biocgenerics" ,r-biocgenerics)
4195 ("r-go-db" ,r-go-db)
6d415db2 4196 ("r-graph" ,r-graph)
d547ce5e
RW
4197 ("r-sparsem" ,r-sparsem)))
4198 (home-page "http://bioconductor.org/packages/topGO")
4199 (synopsis "Enrichment analysis for gene ontology")
4200 (description
4201 "The topGO package provides tools for testing @dfn{gene ontology} (GO)
4202terms while accounting for the topology of the GO graph. Different test
4203statistics and different methods for eliminating local similarities and
4204dependencies between GO terms can be implemented and applied.")
4205 ;; Any version of the LGPL applies.
4206 (license license:lgpl2.1+)))
4207
c63cef66
RW
4208(define-public r-bsgenome
4209 (package
4210 (name "r-bsgenome")
4211 (version "1.38.0")
4212 (source (origin
4213 (method url-fetch)
4214 (uri (bioconductor-uri "BSgenome" version))
4215 (sha256
4216 (base32
4217 "130w0m6q8kkca7gyz1aqj5jjhalwvwi6rk2yvbjrnj4gpnncyrd2"))))
4218 (properties
4219 `((upstream-name . "BSgenome")))
4220 (build-system r-build-system)
4221 (propagated-inputs
4222 `(("r-biocgenerics" ,r-biocgenerics)
4223 ("r-biostrings" ,r-biostrings)
4224 ("r-genomeinfodb" ,r-genomeinfodb)
4225 ("r-genomicranges" ,r-genomicranges)
4226 ("r-iranges" ,r-iranges)
4227 ("r-rsamtools" ,r-rsamtools)
4228 ("r-rtracklayer" ,r-rtracklayer)
4229 ("r-s4vectors" ,r-s4vectors)
4230 ("r-xvector" ,r-xvector)))
4231 (home-page "http://bioconductor.org/packages/BSgenome")
4232 (synopsis "Infrastructure for Biostrings-based genome data packages")
4233 (description
4234 "This package provides infrastructure shared by all Biostrings-based
4235genome data packages and support for efficient SNP representation.")
4236 (license license:artistic2.0)))
4237
c43a011d
RW
4238(define-public r-impute
4239 (package
4240 (name "r-impute")
4241 (version "1.44.0")
4242 (source (origin
4243 (method url-fetch)
4244 (uri (bioconductor-uri "impute" version))
4245 (sha256
4246 (base32
4247 "0y4x5jk7gsf4xn56jrkdcdnxpcfll4h6ivncd7n4snmzixldvmvw"))))
4248 (inputs
4249 `(("gfortran" ,gfortran)))
4250 (build-system r-build-system)
4251 (home-page "http://bioconductor.org/packages/impute")
4252 (synopsis "Imputation for microarray data")
4253 (description
4254 "This package provides a function to impute missing gene expression
4255microarray data, using nearest neighbor averaging.")
4256 (license license:gpl2+)))
4257
03ea5a35
RW
4258(define-public r-seqpattern
4259 (package
4260 (name "r-seqpattern")
4261 (version "1.2.0")
4262 (source (origin
4263 (method url-fetch)
4264 (uri (bioconductor-uri "seqPattern" version))
4265 (sha256
4266 (base32
4267 "0p9zj6bic7sa0hb2bjm988kkk5n9r1kvlbqkzvy702f642n0j53i"))))
4268 (properties
4269 `((upstream-name . "seqPattern")))
4270 (build-system r-build-system)
4271 (propagated-inputs
4272 `(("r-biostrings" ,r-biostrings)
4273 ("r-genomicranges" ,r-genomicranges)
4274 ("r-iranges" ,r-iranges)
4275 ("r-plotrix" ,r-plotrix)))
4276 (home-page "http://bioconductor.org/packages/seqPattern")
4277 (synopsis "Visualising oligonucleotide patterns and motif occurrences")
4278 (description
4279 "This package provides tools to visualize oligonucleotide patterns and
4280sequence motif occurrences across a large set of sequences centred at a common
4281reference point and sorted by a user defined feature.")
4282 (license license:gpl3+)))
4283
cb933df6
RW
4284(define-public r-genomation
4285 (package
4286 (name "r-genomation")
4287 (version "1.2.1")
4288 (source (origin
4289 (method url-fetch)
4290 (uri (bioconductor-uri "genomation" version))
4291 (sha256
4292 (base32
4293 "1mzs995snwim13qk9kz4q3nczpnbsy1allwp4whfq0cflg2mndfr"))))
4294 (build-system r-build-system)
4295 (propagated-inputs
4296 `(("r-biostrings" ,r-biostrings)
4297 ("r-bsgenome" ,r-bsgenome)
4298 ("r-data-table" ,r-data-table)
4299 ("r-genomeinfodb" ,r-genomeinfodb)
4300 ("r-genomicalignments" ,r-genomicalignments)
4301 ("r-genomicranges" ,r-genomicranges)
4302 ("r-ggplot2" ,r-ggplot2)
4303 ("r-gridbase" ,r-gridbase)
4304 ("r-impute" ,r-impute)
4305 ("r-iranges" ,r-iranges)
4306 ("r-matrixstats" ,r-matrixstats)
4307 ("r-plotrix" ,r-plotrix)
4308 ("r-plyr" ,r-plyr)
4309 ("r-readr" ,r-readr)
4310 ("r-reshape2" ,r-reshape2)
4311 ("r-rsamtools" ,r-rsamtools)
4312 ("r-rtracklayer" ,r-rtracklayer)
4313 ("r-seqpattern" ,r-seqpattern)))
4314 (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
4315 (synopsis "Summary, annotation and visualization of genomic data")
4316 (description
4317 "This package provides a package for summary and annotation of genomic
4318intervals. Users can visualize and quantify genomic intervals over
4319pre-defined functional regions, such as promoters, exons, introns, etc. The
4320genomic intervals represent regions with a defined chromosome position, which
4321may be associated with a score, such as aligned reads from HT-seq experiments,
4322TF binding sites, methylation scores, etc. The package can use any tabular
4323genomic feature data as long as it has minimal information on the locations of
4324genomic intervals. In addition, it can use BAM or BigWig files as input.")
4325 (license license:artistic2.0)))
4326
486da491
RW
4327(define-public r-org-hs-eg-db
4328 (package
4329 (name "r-org-hs-eg-db")
4330 (version "3.2.3")
4331 (source (origin
4332 (method url-fetch)
4333 ;; We cannot use bioconductor-uri here because this tarball is
4334 ;; located under "data/annotation/" instead of "bioc/".
4335 (uri (string-append "http://www.bioconductor.org/packages/"
4336 "release/data/annotation/src/contrib/"
4337 "org.Hs.eg.db_" version ".tar.gz"))
4338 (sha256
4339 (base32
4340 "0xicgkbh6xkvs74s1piafqac63dyz2ycdyil4pj4ghhxx2sabm6p"))))
4341 (properties
4342 `((upstream-name . "org.Hs.eg.db")))
4343 (build-system r-build-system)
4344 (propagated-inputs
4345 `(("r-annotationdbi" ,r-annotationdbi)))
4346 (home-page "http://www.bioconductor.org/packages/org.Hs.eg.db/")
4347 (synopsis "Genome wide annotation for Human")
4348 (description
4349 "This package provides mappings from Entrez gene identifiers to various
4350annotations for the human genome.")
4351 (license license:artistic2.0)))
4352
fefedf98
RW
4353(define-public r-org-ce-eg-db
4354 (package
4355 (name "r-org-ce-eg-db")
4356 (version "3.2.3")
4357 (source (origin
4358 (method url-fetch)
4359 ;; We cannot use bioconductor-uri here because this tarball is
4360 ;; located under "data/annotation/" instead of "bioc/".
4361 (uri (string-append "http://www.bioconductor.org/packages/"
4362 "release/data/annotation/src/contrib/"
4363 "org.Ce.eg.db_" version ".tar.gz"))
4364 (sha256
4365 (base32
4366 "1d0lx00ybq34yqs6mziaa0lrh77xm0ggsmi76g6k95f77gi7m1sw"))))
4367 (properties
4368 `((upstream-name . "org.Ce.eg.db")))
4369 (build-system r-build-system)
4370 (propagated-inputs
4371 `(("r-annotationdbi" ,r-annotationdbi)))
4372 (home-page "http://www.bioconductor.org/packages/org.Ce.eg.db/")
4373 (synopsis "Genome wide annotation for Worm")
4374 (description
4375 "This package provides mappings from Entrez gene identifiers to various
4376annotations for the genome of the model worm Caenorhabditis elegans.")
4377 (license license:artistic2.0)))
4378
16c53a1e
RW
4379(define-public r-org-dm-eg-db
4380 (package
4381 (name "r-org-dm-eg-db")
4382 (version "3.2.3")
4383 (source (origin
4384 (method url-fetch)
4385 ;; We cannot use bioconductor-uri here because this tarball is
4386 ;; located under "data/annotation/" instead of "bioc/".
4387 (uri (string-append "http://www.bioconductor.org/packages/"
4388 "release/data/annotation/src/contrib/"
4389 "org.Dm.eg.db_" version ".tar.gz"))
4390 (sha256
4391 (base32
4392 "0mib46c7nr00l7mh290n383za9hyl91a1dc6jhjbk884jmxaxyz6"))))
4393 (properties
4394 `((upstream-name . "org.Dm.eg.db")))
4395 (build-system r-build-system)
4396 (propagated-inputs
4397 `(("r-annotationdbi" ,r-annotationdbi)))
4398 (home-page "http://www.bioconductor.org/packages/org.Dm.eg.db/")
4399 (synopsis "Genome wide annotation for Fly")
4400 (description
4401 "This package provides mappings from Entrez gene identifiers to various
4402annotations for the genome of the model fruit fly Drosophila melanogaster.")
4403 (license license:artistic2.0)))
4404
e761beb9
RW
4405(define-public r-org-mm-eg-db
4406 (package
4407 (name "r-org-mm-eg-db")
4408 (version "3.2.3")
4409 (source (origin
4410 (method url-fetch)
4411 ;; We cannot use bioconductor-uri here because this tarball is
4412 ;; located under "data/annotation/" instead of "bioc/".
4413 (uri (string-append "http://www.bioconductor.org/packages/"
4414 "release/data/annotation/src/contrib/"
4415 "org.Mm.eg.db_" version ".tar.gz"))
4416 (sha256
4417 (base32
4418 "0wh1pm3npdg7070875kfgiid3bqkz3q7rq6snhk6bxfvph00298y"))))
4419 (properties
4420 `((upstream-name . "org.Mm.eg.db")))
4421 (build-system r-build-system)
4422 (propagated-inputs
4423 `(("r-annotationdbi" ,r-annotationdbi)))
4424 (home-page "http://www.bioconductor.org/packages/org.Mm.eg.db/")
4425 (synopsis "Genome wide annotation for Mouse")
4426 (description
4427 "This package provides mappings from Entrez gene identifiers to various
4428annotations for the genome of the model mouse Mus musculus.")
4429 (license license:artistic2.0)))
4430
a5002ae7
AE
4431(define-public r-qtl
4432 (package
4433 (name "r-qtl")
89709287 4434 (version "1.38-4")
a5002ae7
AE
4435 (source
4436 (origin
4437 (method url-fetch)
4438 (uri (string-append "mirror://cran/src/contrib/qtl_"
4439 version ".tar.gz"))
4440 (sha256
4441 (base32
89709287 4442 "0rv9xhp8lyldpgwxqirhyjqvg07dr5x4x1x2jpyj37dada9ccyx3"))))
a5002ae7
AE
4443 (build-system r-build-system)
4444 (home-page "http://rqtl.org/")
4445 (synopsis "R package for analyzing QTL experiments in genetics")
4446 (description "R/qtl is an extension library for the R statistics
4447system. It is used to analyze experimental crosses for identifying
4448genes contributing to variation in quantitative traits (so-called
4449quantitative trait loci, QTLs).
4450
4451Using a hidden Markov model, R/qtl allows to estimate genetic maps, to
4452identify genotyping errors, and to perform single-QTL and two-QTL,
4453two-dimensional genome scans.")
4454 (license license:gpl3)))