Revert "gnu: Add bio-locus."
[jackhill/guix/guix.git] / gnu / packages / bioinformatics.scm
1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2014, 2015 Ricardo Wurmus <rekado@elephly.net>
3 ;;; Copyright © 2015 Ben Woodcroft <donttrustben@gmail.com>
4 ;;;
5 ;;; This file is part of GNU Guix.
6 ;;;
7 ;;; GNU Guix is free software; you can redistribute it and/or modify it
8 ;;; under the terms of the GNU General Public License as published by
9 ;;; the Free Software Foundation; either version 3 of the License, or (at
10 ;;; your option) any later version.
11 ;;;
12 ;;; GNU Guix is distributed in the hope that it will be useful, but
13 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;;; GNU General Public License for more details.
16 ;;;
17 ;;; You should have received a copy of the GNU General Public License
18 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
19
20 (define-module (gnu packages bioinformatics)
21 #:use-module ((guix licenses) #:prefix license:)
22 #:use-module (guix packages)
23 #:use-module (guix utils)
24 #:use-module (guix download)
25 #:use-module (guix git-download)
26 #:use-module (guix build-system gnu)
27 #:use-module (guix build-system cmake)
28 #:use-module (guix build-system perl)
29 #:use-module (guix build-system python)
30 #:use-module (guix build-system trivial)
31 #:use-module (gnu packages)
32 #:use-module (gnu packages algebra)
33 #:use-module (gnu packages base)
34 #:use-module (gnu packages boost)
35 #:use-module (gnu packages compression)
36 #:use-module (gnu packages cpio)
37 #:use-module (gnu packages file)
38 #:use-module (gnu packages java)
39 #:use-module (gnu packages linux)
40 #:use-module (gnu packages machine-learning)
41 #:use-module (gnu packages maths)
42 #:use-module (gnu packages ncurses)
43 #:use-module (gnu packages perl)
44 #:use-module (gnu packages pkg-config)
45 #:use-module (gnu packages popt)
46 #:use-module (gnu packages protobuf)
47 #:use-module (gnu packages python)
48 #:use-module (gnu packages statistics)
49 #:use-module (gnu packages tbb)
50 #:use-module (gnu packages textutils)
51 #:use-module (gnu packages vim)
52 #:use-module (gnu packages web)
53 #:use-module (gnu packages xml)
54 #:use-module (gnu packages zip)
55 #:use-module (srfi srfi-1))
56
57 (define-public aragorn
58 (package
59 (name "aragorn")
60 (version "1.2.36")
61 (source (origin
62 (method url-fetch)
63 (uri (string-append
64 "http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
65 version ".tgz"))
66 (sha256
67 (base32
68 "1dg7jlz1qpqy88igjxd6ncs11ccsirb36qv1z01a0np4i4jh61mb"))))
69 (build-system gnu-build-system)
70 (arguments
71 `(#:tests? #f ; there are no tests
72 #:phases
73 (modify-phases %standard-phases
74 (delete 'configure)
75 (replace 'build
76 (lambda _
77 (zero? (system* "gcc"
78 "-O3"
79 "-ffast-math"
80 "-finline-functions"
81 "-o"
82 "aragorn"
83 (string-append "aragorn" ,version ".c")))))
84 (replace 'install
85 (lambda* (#:key outputs #:allow-other-keys)
86 (let* ((out (assoc-ref outputs "out"))
87 (bin (string-append out "/bin"))
88 (man (string-append out "/share/man/man1")))
89 (mkdir-p bin)
90 (copy-file "aragorn"
91 (string-append bin "/aragorn"))
92 (mkdir-p man)
93 (copy-file "aragorn.1"
94 (string-append man "/aragorn.1")))
95 #t)))))
96 (home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
97 (synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
98 (description
99 "Aragorn identifies transfer RNA, mitochondrial RNA and
100 transfer-messenger RNA from nucleotide sequences, based on homology to known
101 tRNA consensus sequences and RNA structure. It also outputs the secondary
102 structure of the predicted RNA.")
103 (license license:gpl2)))
104
105 (define-public bamtools
106 (package
107 (name "bamtools")
108 (version "2.3.0")
109 (source (origin
110 (method url-fetch)
111 (uri (string-append
112 "https://github.com/pezmaster31/bamtools/archive/v"
113 version ".tar.gz"))
114 (file-name (string-append name "-" version ".tar.gz"))
115 (sha256
116 (base32
117 "1brry29bw2xr2l9pqn240rkqwayg85b8qq78zk2zs6nlspk4d018"))))
118 (build-system cmake-build-system)
119 (arguments
120 `(#:tests? #f ;no "check" target
121 #:phases
122 (modify-phases %standard-phases
123 (add-before
124 'configure 'set-ldflags
125 (lambda* (#:key outputs #:allow-other-keys)
126 (setenv "LDFLAGS"
127 (string-append
128 "-Wl,-rpath="
129 (assoc-ref outputs "out") "/lib/bamtools")))))))
130 (inputs `(("zlib" ,zlib)))
131 (home-page "https://github.com/pezmaster31/bamtools")
132 (synopsis "C++ API and command-line toolkit for working with BAM data")
133 (description
134 "BamTools provides both a C++ API and a command-line toolkit for handling
135 BAM files.")
136 (license license:expat)))
137
138 (define-public bedops
139 (package
140 (name "bedops")
141 (version "2.4.14")
142 (source (origin
143 (method url-fetch)
144 (uri (string-append "https://github.com/bedops/bedops/archive/v"
145 version ".tar.gz"))
146 (file-name (string-append name "-" version ".tar.gz"))
147 (sha256
148 (base32
149 "1kqbac547wyqma81cyky9n7mkgikjpsfd3nnmcm6hpqwanqgh10v"))))
150 (build-system gnu-build-system)
151 (arguments
152 '(#:tests? #f
153 #:make-flags (list (string-append "BINDIR=" %output "/bin"))
154 #:phases
155 (alist-cons-after
156 'unpack 'unpack-tarballs
157 (lambda _
158 ;; FIXME: Bedops includes tarballs of minimally patched upstream
159 ;; libraries jansson, zlib, and bzip2. We cannot just use stock
160 ;; libraries because at least one of the libraries (zlib) is
161 ;; patched to add a C++ function definition (deflateInit2cpp).
162 ;; Until the Bedops developers offer a way to link against system
163 ;; libraries we have to build the in-tree copies of these three
164 ;; libraries.
165
166 ;; See upstream discussion:
167 ;; https://github.com/bedops/bedops/issues/124
168
169 ;; Unpack the tarballs to benefit from shebang patching.
170 (with-directory-excursion "third-party"
171 (and (zero? (system* "tar" "xvf" "jansson-2.6.tar.bz2"))
172 (zero? (system* "tar" "xvf" "zlib-1.2.7.tar.bz2"))
173 (zero? (system* "tar" "xvf" "bzip2-1.0.6.tar.bz2"))))
174 ;; Disable unpacking of tarballs in Makefile.
175 (substitute* "system.mk/Makefile.linux"
176 (("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
177 (("\\./configure") "CONFIG_SHELL=bash ./configure"))
178 (substitute* "third-party/zlib-1.2.7/Makefile.in"
179 (("^SHELL=.*$") "SHELL=bash\n")))
180 (alist-delete 'configure %standard-phases))))
181 (home-page "https://github.com/bedops/bedops")
182 (synopsis "Tools for high-performance genomic feature operations")
183 (description
184 "BEDOPS is a suite of tools to address common questions raised in genomic
185 studies---mostly with regard to overlap and proximity relationships between
186 data sets. It aims to be scalable and flexible, facilitating the efficient
187 and accurate analysis and management of large-scale genomic data.
188
189 BEDOPS provides tools that perform highly efficient and scalable Boolean and
190 other set operations, statistical calculations, archiving, conversion and
191 other management of genomic data of arbitrary scale. Tasks can be easily
192 split by chromosome for distributing whole-genome analyses across a
193 computational cluster.")
194 (license license:gpl2+)))
195
196 (define-public bedtools
197 (package
198 (name "bedtools")
199 (version "2.24.0")
200 (source (origin
201 (method url-fetch)
202 (uri (string-append "https://github.com/arq5x/bedtools2/archive/v"
203 version ".tar.gz"))
204 (file-name (string-append name "-" version ".tar.gz"))
205 (sha256
206 (base32
207 "0lnxrjvs3nnmb4bmskag1wg3h2hd80przz5q3xd0bvs7vyxrvpbl"))
208 (patches (list (search-patch "bedtools-32bit-compilation.patch")))))
209 (build-system gnu-build-system)
210 (native-inputs `(("python" ,python-2)))
211 (inputs `(("samtools" ,samtools)
212 ("zlib" ,zlib)))
213 (arguments
214 '(#:test-target "test"
215 #:phases
216 (alist-cons-after
217 'unpack 'patch-makefile-SHELL-definition
218 (lambda _
219 ;; patch-makefile-SHELL cannot be used here as it does not
220 ;; yet patch definitions with `:='. Since changes to
221 ;; patch-makefile-SHELL result in a full rebuild, features
222 ;; of patch-makefile-SHELL are reimplemented here.
223 (substitute* "Makefile"
224 (("^SHELL := .*$") (string-append "SHELL := " (which "bash") " -e \n"))))
225 (alist-delete
226 'configure
227 (alist-replace
228 'install
229 (lambda* (#:key outputs #:allow-other-keys)
230 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
231 (mkdir-p bin)
232 (for-each (lambda (file)
233 (copy-file file (string-append bin (basename file))))
234 (find-files "bin" ".*"))))
235 %standard-phases)))))
236 (home-page "https://github.com/arq5x/bedtools2")
237 (synopsis "Tools for genome analysis and arithmetic")
238 (description
239 "Collectively, the bedtools utilities are a swiss-army knife of tools for
240 a wide-range of genomics analysis tasks. The most widely-used tools enable
241 genome arithmetic: that is, set theory on the genome. For example, bedtools
242 allows one to intersect, merge, count, complement, and shuffle genomic
243 intervals from multiple files in widely-used genomic file formats such as BAM,
244 BED, GFF/GTF, VCF.")
245 (license license:gpl2)))
246
247 (define-public python2-pybedtools
248 (package
249 (name "python2-pybedtools")
250 (version "0.6.9")
251 (source (origin
252 (method url-fetch)
253 (uri (string-append
254 "https://pypi.python.org/packages/source/p/pybedtools/pybedtools-"
255 version ".tar.gz"))
256 (sha256
257 (base32
258 "1ldzdxw1p4y3g2ignmggsdypvqkcwqwzhdha4rbgpih048z5p4an"))))
259 (build-system python-build-system)
260 (arguments `(#:python ,python-2)) ; no Python 3 support
261 (inputs
262 `(("python-cython" ,python2-cython)
263 ("python-matplotlib" ,python2-matplotlib)))
264 (propagated-inputs
265 `(("bedtools" ,bedtools)
266 ("samtools" ,samtools)))
267 (native-inputs
268 `(("python-pyyaml" ,python2-pyyaml)
269 ("python-nose" ,python2-nose)
270 ("python-setuptools" ,python2-setuptools)))
271 (home-page "https://pythonhosted.org/pybedtools/")
272 (synopsis "Python wrapper for BEDtools programs")
273 (description
274 "pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
275 which are widely used for genomic interval manipulation or \"genome algebra\".
276 pybedtools extends BEDTools by offering feature-level manipulations from with
277 Python.")
278 (license license:gpl2+)))
279
280 (define-public bioperl-minimal
281 (let* ((inputs `(("perl-module-build" ,perl-module-build)
282 ("perl-data-stag" ,perl-data-stag)
283 ("perl-libwww" ,perl-libwww)
284 ("perl-uri" ,perl-uri)))
285 (transitive-inputs
286 (map (compose package-name cadr)
287 (delete-duplicates
288 (concatenate
289 (map (compose package-transitive-target-inputs cadr) inputs))))))
290 (package
291 (name "bioperl-minimal")
292 (version "1.6.924")
293 (source
294 (origin
295 (method url-fetch)
296 (uri (string-append "mirror://cpan/authors/id/C/CJ/CJFIELDS/BioPerl-"
297 version ".tar.gz"))
298 (sha256
299 (base32
300 "1l3npcvvvwjlhkna9dndpfv1hklhrgva013kw96m0n1wpd37ask1"))))
301 (build-system perl-build-system)
302 (arguments
303 `(#:phases
304 (modify-phases %standard-phases
305 (add-after
306 'install 'wrap-programs
307 (lambda* (#:key outputs #:allow-other-keys)
308 ;; Make sure all executables in "bin" find the required Perl
309 ;; modules at runtime. As the PERL5LIB variable contains also
310 ;; the paths of native inputs, we pick the transitive target
311 ;; inputs from %build-inputs.
312 (let* ((out (assoc-ref outputs "out"))
313 (bin (string-append out "/bin/"))
314 (path (string-join
315 (cons (string-append out "/lib/perl5/site_perl")
316 (map (lambda (name)
317 (assoc-ref %build-inputs name))
318 ',transitive-inputs))
319 ":")))
320 (for-each (lambda (file)
321 (wrap-program file
322 `("PERL5LIB" ":" prefix (,path))))
323 (find-files bin "\\.pl$"))
324 #t))))))
325 (inputs inputs)
326 (native-inputs
327 `(("perl-test-most" ,perl-test-most)))
328 (home-page "http://search.cpan.org/dist/BioPerl")
329 (synopsis "Bioinformatics toolkit")
330 (description
331 "BioPerl is the product of a community effort to produce Perl code which
332 is useful in biology. Examples include Sequence objects, Alignment objects
333 and database searching objects. These objects not only do what they are
334 advertised to do in the documentation, but they also interact - Alignment
335 objects are made from the Sequence objects, Sequence objects have access to
336 Annotation and SeqFeature objects and databases, Blast objects can be
337 converted to Alignment objects, and so on. This means that the objects
338 provide a coordinated and extensible framework to do computational biology.")
339 (license (package-license perl)))))
340
341 (define-public python-biopython
342 (package
343 (name "python-biopython")
344 (version "1.65")
345 (source (origin
346 (method url-fetch)
347 (uri (string-append
348 "http://biopython.org/DIST/biopython-"
349 version ".tar.gz"))
350 (sha256
351 (base32
352 "13m8s9jkrw40zvdp1rl709n6lmgdh4f52aann7gzr6sfp0fwhg26"))))
353 (build-system python-build-system)
354 (inputs
355 `(("python-numpy" ,python-numpy)))
356 (native-inputs
357 `(("python-setuptools" ,python2-setuptools)))
358 (home-page "http://biopython.org/")
359 (synopsis "Tools for biological computation in Python")
360 (description
361 "Biopython is a set of tools for biological computation including parsers
362 for bioinformatics files into Python data structures; interfaces to common
363 bioinformatics programs; a standard sequence class and tools for performing
364 common operations on them; code to perform data classification; code for
365 dealing with alignments; code making it easy to split up parallelizable tasks
366 into separate processes; and more.")
367 (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
368
369 (define-public python2-biopython
370 (package (inherit (package-with-python2 python-biopython))
371 (inputs
372 `(("python2-numpy" ,python2-numpy)))))
373
374 (define-public blast+
375 (package
376 (name "blast+")
377 (version "2.2.31")
378 (source (origin
379 (method url-fetch)
380 (uri (string-append
381 "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
382 version "/ncbi-blast-" version "+-src.tar.gz"))
383 (sha256
384 (base32
385 "19gq6as4k1jrgsd26158ads6h7v4jca3h4r5dzg1y0m6ya50x5ph"))
386 (modules '((guix build utils)))
387 (snippet
388 '(begin
389 ;; Remove bundled bzip2 and zlib
390 (delete-file-recursively "c++/src/util/compress/bzip2")
391 (delete-file-recursively "c++/src/util/compress/zlib")
392 (substitute* "c++/src/util/compress/Makefile.in"
393 (("bzip2 zlib api") "api"))
394 ;; Remove useless msbuild directory
395 (delete-file-recursively
396 "c++/src/build-system/project_tree_builder/msbuild")
397 #t))))
398 (build-system gnu-build-system)
399 (arguments
400 `(;; There are three(!) tests for this massive library, and all fail with
401 ;; "unparsable timing stats".
402 ;; ERR [127] -- [util/regexp] test_pcre.sh (unparsable timing stats)
403 ;; ERR [127] -- [serial/datatool] datatool.sh (unparsable timing stats)
404 ;; ERR [127] -- [serial/datatool] datatool_xml.sh (unparsable timing stats)
405 #:tests? #f
406 #:out-of-source? #t
407 #:parallel-build? #f ; not supported
408 #:phases
409 (modify-phases %standard-phases
410 (add-before
411 'configure 'set-HOME
412 ;; $HOME needs to be set at some point during the configure phase
413 (lambda _ (setenv "HOME" "/tmp") #t))
414 (add-after
415 'unpack 'enter-dir
416 (lambda _ (chdir "c++") #t))
417 (add-after
418 'enter-dir 'fix-build-system
419 (lambda _
420 (define (which* cmd)
421 (cond ((string=? cmd "date")
422 ;; make call to "date" deterministic
423 "date -d @0")
424 ((which cmd)
425 => identity)
426 (else
427 (format (current-error-port)
428 "WARNING: Unable to find absolute path for ~s~%"
429 cmd)
430 #f)))
431
432 ;; Rewrite hardcoded paths to various tools
433 (substitute* (append '("src/build-system/configure.ac"
434 "src/build-system/configure"
435 "scripts/common/impl/if_diff.sh"
436 "scripts/common/impl/run_with_lock.sh"
437 "src/build-system/Makefile.configurables.real"
438 "src/build-system/Makefile.in.top"
439 "src/build-system/Makefile.meta.gmake=no"
440 "src/build-system/Makefile.meta.in"
441 "src/build-system/Makefile.meta_l"
442 "src/build-system/Makefile.meta_p"
443 "src/build-system/Makefile.meta_r"
444 "src/build-system/Makefile.mk.in"
445 "src/build-system/Makefile.requirements"
446 "src/build-system/Makefile.rules_with_autodep.in")
447 (find-files "scripts/common/check" "\\.sh$"))
448 (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
449 (or (which* cmd) all)))
450
451 (substitute* (find-files "src/build-system" "^config.*")
452 (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
453 (("^PATH=.*") ""))
454
455 ;; rewrite "/var/tmp" in check script
456 (substitute* "scripts/common/check/check_make_unix.sh"
457 (("/var/tmp") "/tmp"))
458
459 ;; do not reset PATH
460 (substitute* (find-files "scripts/common/impl/" "\\.sh$")
461 (("^ *PATH=.*") "")
462 (("action=/bin/") "action=")
463 (("export PATH") ":"))
464 #t))
465 (replace
466 'configure
467 (lambda* (#:key inputs outputs #:allow-other-keys)
468 (let ((out (assoc-ref outputs "out"))
469 (lib (string-append (assoc-ref outputs "lib") "/lib"))
470 (include (string-append (assoc-ref outputs "include")
471 "/include/ncbi-tools++")))
472 ;; The 'configure' script doesn't recognize things like
473 ;; '--enable-fast-install'.
474 (zero? (system* "./configure.orig"
475 (string-append "--with-build-root=" (getcwd) "/build")
476 (string-append "--prefix=" out)
477 (string-append "--libdir=" lib)
478 (string-append "--includedir=" include)
479 (string-append "--with-bz2="
480 (assoc-ref inputs "bzip2"))
481 (string-append "--with-z="
482 (assoc-ref inputs "zlib"))
483 ;; Each library is built twice by default, once
484 ;; with "-static" in its name, and again
485 ;; without.
486 "--without-static"
487 "--with-dll"))))))))
488 (outputs '("out" ; 19 MB
489 "lib" ; 203 MB
490 "include")) ; 32 MB
491 (inputs
492 `(("bzip2" ,bzip2)
493 ("zlib" ,zlib)))
494 (native-inputs
495 `(("cpio" ,cpio)))
496 (home-page "http://blast.ncbi.nlm.nih.gov")
497 (synopsis "Basic local alignment search tool")
498 (description
499 "BLAST is a popular method of performing a DNA or protein sequence
500 similarity search, using heuristics to produce results quickly. It also
501 calculates an “expect value” that estimates how many matches would have
502 occurred at a given score by chance, which can aid a user in judging how much
503 confidence to have in an alignment.")
504 ;; Most of the sources are in the public domain, with the following
505 ;; exceptions:
506 ;; * Expat:
507 ;; * ./c++/include/util/bitset/
508 ;; * ./c++/src/html/ncbi_menu*.js
509 ;; * Boost license:
510 ;; * ./c++/include/util/impl/floating_point_comparison.hpp
511 ;; * LGPL 2+:
512 ;; * ./c++/include/dbapi/driver/odbc/unix_odbc/
513 ;; * ASL 2.0:
514 ;; * ./c++/src/corelib/teamcity_*
515 (license (list license:public-domain
516 license:expat
517 license:boost1.0
518 license:lgpl2.0+
519 license:asl2.0))))
520
521 (define-public bowtie
522 (package
523 (name "bowtie")
524 (version "2.2.4")
525 (source (origin
526 (method url-fetch)
527 (uri (string-append "https://github.com/BenLangmead/bowtie2/archive/v"
528 version ".tar.gz"))
529 (file-name (string-append name "-" version ".tar.gz"))
530 (sha256
531 (base32
532 "15dnbqippwvhyh9zqjhaxkabk7lm1xbh1nvar1x4b5kwm117zijn"))
533 (modules '((guix build utils)))
534 (snippet
535 '(substitute* "Makefile"
536 (("^CC = .*$") "CC = gcc")
537 (("^CPP = .*$") "CPP = g++")
538 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
539 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
540 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))
541 (patches (list (search-patch "bowtie-fix-makefile.patch")))))
542 (build-system gnu-build-system)
543 (inputs `(("perl" ,perl)
544 ("perl-clone" ,perl-clone)
545 ("perl-test-deep" ,perl-test-deep)
546 ("perl-test-simple" ,perl-test-simple)
547 ("python" ,python-2)))
548 (arguments
549 '(#:make-flags '("allall")
550 #:phases
551 (alist-delete
552 'configure
553 (alist-replace
554 'install
555 (lambda* (#:key outputs #:allow-other-keys)
556 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
557 (mkdir-p bin)
558 (for-each (lambda (file)
559 (copy-file file (string-append bin file)))
560 (find-files "." "bowtie2.*"))))
561 (alist-replace
562 'check
563 (lambda* (#:key outputs #:allow-other-keys)
564 (system* "perl"
565 "scripts/test/simple_tests.pl"
566 "--bowtie2=./bowtie2"
567 "--bowtie2-build=./bowtie2-build"))
568 %standard-phases)))))
569 (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
570 (synopsis "Fast and sensitive nucleotide sequence read aligner")
571 (description
572 "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
573 reads to long reference sequences. It is particularly good at aligning reads
574 of about 50 up to 100s or 1,000s of characters, and particularly good at
575 aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
576 genome with an FM Index to keep its memory footprint small: for the human
577 genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
578 gapped, local, and paired-end alignment modes.")
579 (supported-systems '("x86_64-linux"))
580 (license license:gpl3+)))
581
582 (define-public bwa
583 (package
584 (name "bwa")
585 (version "0.7.12")
586 (source (origin
587 (method url-fetch)
588 (uri (string-append "mirror://sourceforge/bio-bwa/bwa-"
589 version ".tar.bz2"))
590 (sha256
591 (base32
592 "1330dpqncv0px3pbhjzz1gwgg39kkcv2r9qp2xs0sixf8z8wl7bh"))))
593 (build-system gnu-build-system)
594 (arguments
595 '(#:tests? #f ;no "check" target
596 #:phases
597 (alist-replace
598 'install
599 (lambda* (#:key outputs #:allow-other-keys)
600 (let ((bin (string-append
601 (assoc-ref outputs "out") "/bin"))
602 (doc (string-append
603 (assoc-ref outputs "out") "/share/doc/bwa"))
604 (man (string-append
605 (assoc-ref outputs "out") "/share/man/man1")))
606 (mkdir-p bin)
607 (mkdir-p doc)
608 (mkdir-p man)
609 (copy-file "bwa" (string-append bin "/bwa"))
610 (copy-file "README.md" (string-append doc "/README.md"))
611 (copy-file "bwa.1" (string-append man "/bwa.1"))))
612 ;; no "configure" script
613 (alist-delete 'configure %standard-phases))))
614 (inputs `(("zlib" ,zlib)))
615 (home-page "http://bio-bwa.sourceforge.net/")
616 (synopsis "Burrows-Wheeler sequence aligner")
617 (description
618 "BWA is a software package for mapping low-divergent sequences against a
619 large reference genome, such as the human genome. It consists of three
620 algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
621 designed for Illumina sequence reads up to 100bp, while the rest two for
622 longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
623 features such as long-read support and split alignment, but BWA-MEM, which is
624 the latest, is generally recommended for high-quality queries as it is faster
625 and more accurate. BWA-MEM also has better performance than BWA-backtrack for
626 70-100bp Illumina reads.")
627 (license license:gpl3+)))
628
629 (define-public python2-bx-python
630 (package
631 (name "python2-bx-python")
632 (version "0.7.2")
633 (source (origin
634 (method url-fetch)
635 (uri (string-append
636 "https://pypi.python.org/packages/source/b/bx-python/bx-python-"
637 version ".tar.gz"))
638 (sha256
639 (base32
640 "0ld49idhc5zjdvbhvjq1a2qmpjj7h5v58rqr25dzmfq7g34b50xh"))
641 (modules '((guix build utils)))
642 (snippet
643 '(substitute* "setup.py"
644 ;; remove dependency on outdated "distribute" module
645 (("^from distribute_setup import use_setuptools") "")
646 (("^use_setuptools\\(\\)") "")))))
647 (build-system python-build-system)
648 (arguments
649 `(#:tests? #f ;tests fail because test data are not included
650 #:python ,python-2))
651 (inputs
652 `(("python-numpy" ,python2-numpy)
653 ("zlib" ,zlib)))
654 (native-inputs
655 `(("python-nose" ,python2-nose)
656 ("python-setuptools" ,python2-setuptools)))
657 (home-page "http://bitbucket.org/james_taylor/bx-python/")
658 (synopsis "Tools for manipulating biological data")
659 (description
660 "bx-python provides tools for manipulating biological data, particularly
661 multiple sequence alignments.")
662 (license license:expat)))
663
664 (define-public clipper
665 (package
666 (name "clipper")
667 (version "0.3.0")
668 (source (origin
669 (method url-fetch)
670 (uri (string-append
671 "https://github.com/YeoLab/clipper/archive/"
672 version ".tar.gz"))
673 (sha256
674 (base32
675 "1q7jpimsqln7ic44i8v2rx2haj5wvik8hc1s2syd31zcn0xk1iyq"))
676 (modules '((guix build utils)))
677 (snippet
678 ;; remove unnecessary setup dependency
679 '(substitute* "setup.py"
680 (("setup_requires = .*") "")))))
681 (build-system python-build-system)
682 (arguments `(#:python ,python-2)) ; only Python 2 is supported
683 (inputs
684 `(("htseq" ,htseq)
685 ("python-pybedtools" ,python2-pybedtools)
686 ("python-cython" ,python2-cython)
687 ("python-scikit-learn" ,python2-scikit-learn)
688 ("python-matplotlib" ,python2-matplotlib)
689 ("python-pysam" ,python2-pysam)
690 ("python-numpy" ,python2-numpy)
691 ("python-scipy" ,python2-scipy)))
692 (native-inputs
693 `(("python-mock" ,python2-mock) ; for tests
694 ("python-pytz" ,python2-pytz) ; for tests
695 ("python-setuptools" ,python2-setuptools)))
696 (home-page "https://github.com/YeoLab/clipper")
697 (synopsis "CLIP peak enrichment recognition")
698 (description
699 "CLIPper is a tool to define peaks in CLIP-seq datasets.")
700 (license license:gpl2)))
701
702 (define-public couger
703 (package
704 (name "couger")
705 (version "1.8.2")
706 (source (origin
707 (method url-fetch)
708 (uri (string-append
709 "http://couger.oit.duke.edu/static/assets/COUGER"
710 version ".zip"))
711 (sha256
712 (base32
713 "04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq"))))
714 (build-system gnu-build-system)
715 (arguments
716 `(#:tests? #f
717 #:phases
718 (modify-phases %standard-phases
719 (delete 'configure)
720 (delete 'build)
721 (replace
722 'install
723 (lambda* (#:key outputs #:allow-other-keys)
724 (let ((out (assoc-ref outputs "out")))
725 (copy-recursively "src" (string-append out "/src"))
726 (mkdir (string-append out "/bin"))
727 ;; Add "src" directory to module lookup path.
728 (substitute* "couger"
729 (("from argparse")
730 (string-append "import sys\nsys.path.append(\""
731 out "\")\nfrom argparse")))
732 (copy-file "couger" (string-append out "/bin/couger")))
733 #t))
734 (add-after
735 'install 'wrap-program
736 (lambda* (#:key inputs outputs #:allow-other-keys)
737 ;; Make sure 'couger' runs with the correct PYTHONPATH.
738 (let* ((out (assoc-ref outputs "out"))
739 (path (getenv "PYTHONPATH")))
740 (wrap-program (string-append out "/bin/couger")
741 `("PYTHONPATH" ":" prefix (,path))))
742 #t)))))
743 (inputs
744 `(("python" ,python-2)
745 ("python2-pillow" ,python2-pillow)
746 ("python2-numpy" ,python2-numpy)
747 ("python2-scipy" ,python2-scipy)
748 ("python2-matplotlib" ,python2-matplotlib)))
749 (propagated-inputs
750 `(("r" ,r)
751 ("libsvm" ,libsvm)
752 ("randomjungle" ,randomjungle)))
753 (native-inputs
754 `(("unzip" ,unzip)))
755 (home-page "http://couger.oit.duke.edu")
756 (synopsis "Identify co-factors in sets of genomic regions")
757 (description
758 "COUGER can be applied to any two sets of genomic regions bound by
759 paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify
760 putative co-factors that provide specificity to each TF. The framework
761 determines the genomic targets uniquely-bound by each TF, and identifies a
762 small set of co-factors that best explain the in vivo binding differences
763 between the two TFs.
764
765 COUGER uses classification algorithms (support vector machines and random
766 forests) with features that reflect the DNA binding specificities of putative
767 co-factors. The features are generated either from high-throughput TF-DNA
768 binding data (from protein binding microarray experiments), or from large
769 collections of DNA motifs.")
770 (license license:gpl3+)))
771
772 (define-public clustal-omega
773 (package
774 (name "clustal-omega")
775 (version "1.2.1")
776 (source (origin
777 (method url-fetch)
778 (uri (string-append
779 "http://www.clustal.org/omega/clustal-omega-"
780 version ".tar.gz"))
781 (sha256
782 (base32
783 "02ibkx0m0iwz8nscg998bh41gg251y56cgh86bvyrii5m8kjgwqf"))))
784 (build-system gnu-build-system)
785 (inputs
786 `(("argtable" ,argtable)))
787 (home-page "http://www.clustal.org/omega/")
788 (synopsis "Multiple sequence aligner for protein and DNA/RNA")
789 (description
790 "Clustal-Omega is a general purpose multiple sequence alignment (MSA)
791 program for protein and DNA/RNA. It produces high quality MSAs and is capable
792 of handling data-sets of hundreds of thousands of sequences in reasonable
793 time.")
794 (license license:gpl2+)))
795
796 (define-public crossmap
797 (package
798 (name "crossmap")
799 (version "0.1.6")
800 (source (origin
801 (method url-fetch)
802 (uri (string-append "mirror://sourceforge/crossmap/CrossMap-"
803 version ".tar.gz"))
804 (sha256
805 (base32
806 "163hi5gjgij6cndxlvbkp5jjwr0k4wbm9im6d2210278q7k9kpnp"))
807 ;; patch has been sent upstream already
808 (patches (list
809 (search-patch "crossmap-allow-system-pysam.patch")))
810 (modules '((guix build utils)))
811 ;; remove bundled copy of pysam
812 (snippet
813 '(delete-file-recursively "lib/pysam"))))
814 (build-system python-build-system)
815 (arguments
816 `(#:python ,python-2
817 #:phases
818 (alist-cons-after
819 'unpack 'set-env
820 (lambda _ (setenv "CROSSMAP_USE_SYSTEM_PYSAM" "1"))
821 %standard-phases)))
822 (inputs
823 `(("python-numpy" ,python2-numpy)
824 ("python-pysam" ,python2-pysam)
825 ("zlib" ,zlib)))
826 (native-inputs
827 `(("python-cython" ,python2-cython)
828 ("python-nose" ,python2-nose)
829 ("python-setuptools" ,python2-setuptools)))
830 (home-page "http://crossmap.sourceforge.net/")
831 (synopsis "Convert genome coordinates between assemblies")
832 (description
833 "CrossMap is a program for conversion of genome coordinates or annotation
834 files between different genome assemblies. It supports most commonly used
835 file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
836 (license license:gpl2+)))
837
838 (define-public cutadapt
839 (package
840 (name "cutadapt")
841 (version "1.8")
842 (source (origin
843 (method url-fetch)
844 (uri (string-append
845 "https://github.com/marcelm/cutadapt/archive/v"
846 version ".tar.gz"))
847 (file-name (string-append name "-" version ".tar.gz"))
848 (sha256
849 (base32
850 "161bp87y6gd6r5bmvjpn2b1k942i3fizfpa139f0jn6jv1wcp5h5"))))
851 (build-system python-build-system)
852 (arguments
853 ;; tests must be run after install
854 `(#:phases (alist-cons-after
855 'install 'check
856 (lambda* (#:key inputs outputs #:allow-other-keys)
857 (setenv "PYTHONPATH"
858 (string-append
859 (getenv "PYTHONPATH")
860 ":" (assoc-ref outputs "out")
861 "/lib/python"
862 (string-take (string-take-right
863 (assoc-ref inputs "python") 5) 3)
864 "/site-packages"))
865 (zero? (system* "nosetests" "-P" "tests")))
866 (alist-delete 'check %standard-phases))))
867 (native-inputs
868 `(("python-cython" ,python-cython)
869 ("python-nose" ,python-nose)
870 ("python-setuptools" ,python-setuptools)))
871 (home-page "https://code.google.com/p/cutadapt/")
872 (synopsis "Remove adapter sequences from nucleotide sequencing reads")
873 (description
874 "Cutadapt finds and removes adapter sequences, primers, poly-A tails and
875 other types of unwanted sequence from high-throughput sequencing reads.")
876 (license license:expat)))
877
878 (define-public deeptools
879 (package
880 (name "deeptools")
881 (version "1.5.11")
882 (source (origin
883 (method url-fetch)
884 (uri (string-append
885 "https://github.com/fidelram/deepTools/archive/"
886 version ".tar.gz"))
887 (file-name (string-append name "-" version ".tar.gz"))
888 (sha256
889 (base32
890 "1kaagygcbvjs9sxd9cqmskd02wcfp9imvb735r087w7hwqpvz6fs"))))
891 (build-system python-build-system)
892 (arguments
893 `(#:python ,python-2))
894 (propagated-inputs
895 `(("python-scipy" ,python2-scipy)
896 ("python-numpy" ,python2-numpy)
897 ("python-matplotlib" ,python2-matplotlib)
898 ("python-bx-python" ,python2-bx-python)
899 ("python-pysam" ,python2-pysam)))
900 (native-inputs
901 `(("python-mock" ,python2-mock) ;for tests
902 ("python-pytz" ,python2-pytz) ;for tests
903 ("python-setuptools" ,python2-setuptools)))
904 (home-page "https://github.com/fidelram/deepTools")
905 (synopsis "Tools for normalizing and visualizing deep-sequencing data")
906 (description
907 "DeepTools addresses the challenge of handling the large amounts of data
908 that are now routinely generated from DNA sequencing centers. To do so,
909 deepTools contains useful modules to process the mapped reads data to create
910 coverage files in standard bedGraph and bigWig file formats. By doing so,
911 deepTools allows the creation of normalized coverage files or the comparison
912 between two files (for example, treatment and control). Finally, using such
913 normalized and standardized files, multiple visualizations can be created to
914 identify enrichments with functional annotations of the genome.")
915 (license license:gpl3+)))
916
917 (define-public diamond
918 (package
919 (name "diamond")
920 (version "0.7.9")
921 (source (origin
922 (method url-fetch)
923 (uri (string-append
924 "https://github.com/bbuchfink/diamond/archive/v"
925 version ".tar.gz"))
926 (file-name (string-append name "-" version ".tar.gz"))
927 (sha256
928 (base32
929 "0hfkcfv9f76h5brbyw9fyvmc0l9cmbsxrcdqk0fa9xv82zj47p15"))
930 (snippet '(begin
931 (delete-file "bin/diamond")
932 #t))))
933 (build-system gnu-build-system)
934 (arguments
935 '(#:tests? #f ;no "check" target
936 #:phases
937 (modify-phases %standard-phases
938 (add-after 'unpack 'enter-source-dir
939 (lambda _
940 (chdir "src")
941 #t))
942 (delete 'configure)
943 (replace 'install
944 (lambda* (#:key outputs #:allow-other-keys)
945 (let ((bin (string-append (assoc-ref outputs "out")
946 "/bin")))
947 (mkdir-p bin)
948 (copy-file "../bin/diamond"
949 (string-append bin "/diamond"))
950 #t))))))
951 (native-inputs
952 `(("bc" ,bc)))
953 (inputs
954 `(("boost" ,boost)
955 ("zlib" ,zlib)))
956 (home-page "https://github.com/bbuchfink/diamond")
957 (synopsis "Accelerated BLAST compatible local sequence aligner")
958 (description
959 "DIAMOND is a BLAST-compatible local aligner for mapping protein and
960 translated DNA query sequences against a protein reference database (BLASTP
961 and BLASTX alignment mode). The speedup over BLAST is up to 20,000 on short
962 reads at a typical sensitivity of 90-99% relative to BLAST depending on the
963 data and settings.")
964 (license (license:non-copyleft "file://src/COPYING"
965 "See src/COPYING in the distribution."))))
966
967 (define-public edirect
968 (package
969 (name "edirect")
970 (version "2.50")
971 (source (origin
972 (method url-fetch)
973 ;; Note: older versions are not retained.
974 (uri "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/edirect.zip")
975 (sha256
976 (base32
977 "08afhz2ph66h8h381hl1mqyxkdi5nbvzsyj9gfw3jfbdijnpi4qj"))))
978 (build-system perl-build-system)
979 (arguments
980 `(#:tests? #f ;no "check" target
981 #:phases
982 (modify-phases %standard-phases
983 (delete 'configure)
984 (delete 'build)
985 (replace 'install
986 (lambda* (#:key outputs #:allow-other-keys)
987 (let ((target (string-append (assoc-ref outputs "out")
988 "/bin")))
989 (mkdir-p target)
990 (copy-file "edirect.pl"
991 (string-append target "/edirect.pl"))
992 #t)))
993 (add-after
994 'install 'wrap-program
995 (lambda* (#:key inputs outputs #:allow-other-keys)
996 ;; Make sure 'edirect.pl' finds all perl inputs at runtime.
997 (let* ((out (assoc-ref outputs "out"))
998 (path (getenv "PERL5LIB")))
999 (wrap-program (string-append out "/bin/edirect.pl")
1000 `("PERL5LIB" ":" prefix (,path)))))))))
1001 (inputs
1002 `(("perl-html-parser" ,perl-html-parser)
1003 ("perl-encode-locale" ,perl-encode-locale)
1004 ("perl-file-listing" ,perl-file-listing)
1005 ("perl-html-tagset" ,perl-html-tagset)
1006 ("perl-html-tree" ,perl-html-tree)
1007 ("perl-http-cookies" ,perl-http-cookies)
1008 ("perl-http-date" ,perl-http-date)
1009 ("perl-http-message" ,perl-http-message)
1010 ("perl-http-negotiate" ,perl-http-negotiate)
1011 ("perl-lwp-mediatypes" ,perl-lwp-mediatypes)
1012 ("perl-lwp-protocol-https" ,perl-lwp-protocol-https)
1013 ("perl-net-http" ,perl-net-http)
1014 ("perl-uri" ,perl-uri)
1015 ("perl-www-robotrules" ,perl-www-robotrules)
1016 ("perl" ,perl)))
1017 (native-inputs
1018 `(("unzip" ,unzip)))
1019 (home-page "http://www.ncbi.nlm.nih.gov/books/NBK179288")
1020 (synopsis "Tools for accessing the NCBI's set of databases")
1021 (description
1022 "Entrez Direct (EDirect) is a method for accessing the National Center
1023 for Biotechnology Information's (NCBI) set of interconnected
1024 databases (publication, sequence, structure, gene, variation, expression,
1025 etc.) from a terminal. Functions take search terms from command-line
1026 arguments. Individual operations are combined to build multi-step queries.
1027 Record retrieval and formatting normally complete the process.
1028
1029 EDirect also provides an argument-driven function that simplifies the
1030 extraction of data from document summaries or other results that are returned
1031 in structured XML format. This can eliminate the need for writing custom
1032 software to answer ad hoc questions.")
1033 (license license:public-domain)))
1034
1035 (define-public express
1036 (package
1037 (name "express")
1038 (version "1.5.1")
1039 (source (origin
1040 (method url-fetch)
1041 (uri
1042 (string-append
1043 "http://bio.math.berkeley.edu/eXpress/downloads/express-"
1044 version "/express-" version "-src.tgz"))
1045 (sha256
1046 (base32
1047 "03rczxd0gjp2l1jxcmjfmf5j94j77zqyxa6x063zsc585nj40n0c"))))
1048 (build-system cmake-build-system)
1049 (arguments
1050 `(#:tests? #f ;no "check" target
1051 #:phases
1052 (alist-cons-after
1053 'unpack 'use-shared-boost-libs-and-set-bamtools-paths
1054 (lambda* (#:key inputs #:allow-other-keys)
1055 (substitute* "CMakeLists.txt"
1056 (("set\\(Boost_USE_STATIC_LIBS ON\\)")
1057 "set(Boost_USE_STATIC_LIBS OFF)")
1058 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
1059 (string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
1060 (substitute* "src/CMakeLists.txt"
1061 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
1062 (string-append (assoc-ref inputs "bamtools") "/lib/bamtools")))
1063 #t)
1064 %standard-phases)))
1065 (inputs
1066 `(("boost" ,boost)
1067 ("bamtools" ,bamtools)
1068 ("protobuf" ,protobuf)
1069 ("zlib" ,zlib)))
1070 (home-page "http://bio.math.berkeley.edu/eXpress")
1071 (synopsis "Streaming quantification for high-throughput genomic sequencing")
1072 (description
1073 "eXpress is a streaming tool for quantifying the abundances of a set of
1074 target sequences from sampled subsequences. Example applications include
1075 transcript-level RNA-Seq quantification, allele-specific/haplotype expression
1076 analysis (from RNA-Seq), transcription factor binding quantification in
1077 ChIP-Seq, and analysis of metagenomic data.")
1078 (license license:artistic2.0)))
1079
1080 (define-public fasttree
1081 (package
1082 (name "fasttree")
1083 (version "2.1.8")
1084 (source (origin
1085 (method url-fetch)
1086 (uri (string-append
1087 "http://www.microbesonline.org/fasttree/FastTree-"
1088 version ".c"))
1089 (sha256
1090 (base32
1091 "0dzqc9vr9iiiw21y159xfjl2z90vw0y7r4x6456pcaxiy5hd2wmi"))))
1092 (build-system gnu-build-system)
1093 (arguments
1094 `(#:tests? #f ; no "check" target
1095 #:phases
1096 (modify-phases %standard-phases
1097 (delete 'unpack)
1098 (delete 'configure)
1099 (replace 'build
1100 (lambda* (#:key source #:allow-other-keys)
1101 (and (zero? (system* "gcc"
1102 "-O3"
1103 "-finline-functions"
1104 "-funroll-loops"
1105 "-Wall"
1106 "-o"
1107 "FastTree"
1108 source
1109 "-lm"))
1110 (zero? (system* "gcc"
1111 "-DOPENMP"
1112 "-fopenmp"
1113 "-O3"
1114 "-finline-functions"
1115 "-funroll-loops"
1116 "-Wall"
1117 "-o"
1118 "FastTreeMP"
1119 source
1120 "-lm")))))
1121 (replace 'install
1122 (lambda* (#:key outputs #:allow-other-keys)
1123 (let ((bin (string-append (assoc-ref outputs "out")
1124 "/bin")))
1125 (mkdir-p bin)
1126 (copy-file "FastTree"
1127 (string-append bin "/FastTree"))
1128 (copy-file "FastTreeMP"
1129 (string-append bin "/FastTreeMP"))
1130 #t))))))
1131 (home-page "http://www.microbesonline.org/fasttree")
1132 (synopsis "Infers approximately-maximum-likelihood phylogenetic trees")
1133 (description
1134 "FastTree can handle alignments with up to a million of sequences in a
1135 reasonable amount of time and memory. For large alignments, FastTree is
1136 100-1,000 times faster than PhyML 3.0 or RAxML 7.")
1137 (license license:gpl2+)))
1138
1139 (define-public fastx-toolkit
1140 (package
1141 (name "fastx-toolkit")
1142 (version "0.0.14")
1143 (source (origin
1144 (method url-fetch)
1145 (uri
1146 (string-append
1147 "https://github.com/agordon/fastx_toolkit/releases/download/"
1148 version "/fastx_toolkit-" version ".tar.bz2"))
1149 (sha256
1150 (base32
1151 "01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy"))))
1152 (build-system gnu-build-system)
1153 (inputs
1154 `(("libgtextutils" ,libgtextutils)))
1155 (native-inputs
1156 `(("pkg-config" ,pkg-config)))
1157 (home-page "http://hannonlab.cshl.edu/fastx_toolkit/")
1158 (synopsis "Tools for FASTA/FASTQ file preprocessing")
1159 (description
1160 "The FASTX-Toolkit is a collection of command line tools for Short-Reads
1161 FASTA/FASTQ files preprocessing.
1162
1163 Next-Generation sequencing machines usually produce FASTA or FASTQ files,
1164 containing multiple short-reads sequences. The main processing of such
1165 FASTA/FASTQ files is mapping the sequences to reference genomes. However, it
1166 is sometimes more productive to preprocess the files before mapping the
1167 sequences to the genome---manipulating the sequences to produce better mapping
1168 results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
1169 (license license:agpl3+)))
1170
1171 (define-public flexbar
1172 (package
1173 (name "flexbar")
1174 (version "2.5")
1175 (source (origin
1176 (method url-fetch)
1177 (uri
1178 (string-append "mirror://sourceforge/flexbar/"
1179 version "/flexbar_v" version "_src.tgz"))
1180 (sha256
1181 (base32
1182 "13jaykc3y1x8y5nn9j8ljnb79s5y51kyxz46hdmvvjj6qhyympmf"))))
1183 (build-system cmake-build-system)
1184 (arguments
1185 `(#:configure-flags (list
1186 (string-append "-DFLEXBAR_BINARY_DIR="
1187 (assoc-ref %outputs "out")
1188 "/bin/"))
1189 #:phases
1190 (alist-replace
1191 'check
1192 (lambda* (#:key outputs #:allow-other-keys)
1193 (setenv "PATH" (string-append
1194 (assoc-ref outputs "out") "/bin:"
1195 (getenv "PATH")))
1196 (chdir "../flexbar_v2.5_src/test")
1197 (zero? (system* "bash" "flexbar_validate.sh")))
1198 (alist-delete 'install %standard-phases))))
1199 (inputs
1200 `(("tbb" ,tbb)
1201 ("zlib" ,zlib)))
1202 (native-inputs
1203 `(("pkg-config" ,pkg-config)
1204 ("seqan" ,seqan)))
1205 (home-page "http://flexbar.sourceforge.net")
1206 (synopsis "Barcode and adapter removal tool for sequencing platforms")
1207 (description
1208 "Flexbar preprocesses high-throughput nucleotide sequencing data
1209 efficiently. It demultiplexes barcoded runs and removes adapter sequences.
1210 Moreover, trimming and filtering features are provided. Flexbar increases
1211 read mapping rates and improves genome and transcriptome assemblies. It
1212 supports next-generation sequencing data in fasta/q and csfasta/q format from
1213 Illumina, Roche 454, and the SOLiD platform.")
1214 (license license:gpl3)))
1215
1216 (define-public grit
1217 (package
1218 (name "grit")
1219 (version "2.0.2")
1220 (source (origin
1221 (method url-fetch)
1222 (uri (string-append
1223 "https://github.com/nboley/grit/archive/"
1224 version ".tar.gz"))
1225 (file-name (string-append name "-" version ".tar.gz"))
1226 (sha256
1227 (base32
1228 "157in84dj70wimbind3x7sy1whs3h57qfgcnj2s6lrd38fbrb7mj"))))
1229 (build-system python-build-system)
1230 (arguments
1231 `(#:python ,python-2
1232 #:phases
1233 (alist-cons-after
1234 'unpack 'generate-from-cython-sources
1235 (lambda* (#:key inputs outputs #:allow-other-keys)
1236 ;; Delete these C files to force fresh generation from pyx sources.
1237 (delete-file "grit/sparsify_support_fns.c")
1238 (delete-file "grit/call_peaks_support_fns.c")
1239 (substitute* "setup.py"
1240 (("Cython.Setup") "Cython.Build")
1241 ;; Add numpy include path to fix compilation
1242 (("pyx\", \\]")
1243 (string-append "pyx\", ], include_dirs = ['"
1244 (assoc-ref inputs "python-numpy")
1245 "/lib/python2.7/site-packages/numpy/core/include/"
1246 "']"))) #t)
1247 %standard-phases)))
1248 (inputs
1249 `(("python-scipy" ,python2-scipy)
1250 ("python-numpy" ,python2-numpy)
1251 ("python-pysam" ,python2-pysam)
1252 ("python-networkx" ,python2-networkx)))
1253 (native-inputs
1254 `(("python-cython" ,python2-cython)
1255 ("python-setuptools" ,python2-setuptools)))
1256 (home-page "http://grit-bio.org")
1257 (synopsis "Tool for integrative analysis of RNA-seq type assays")
1258 (description
1259 "GRIT is designed to use RNA-seq, TES, and TSS data to build and quantify
1260 full length transcript models. When none of these data sources are available,
1261 GRIT can be run by providing a candidate set of TES or TSS sites. In
1262 addition, GRIT can merge in reference junctions and gene boundaries. GRIT can
1263 also be run in quantification mode, where it uses a provided GTF file and just
1264 estimates transcript expression.")
1265 (license license:gpl3+)))
1266
1267 (define-public hisat
1268 (package
1269 (name "hisat")
1270 (version "0.1.4")
1271 (source (origin
1272 (method url-fetch)
1273 (uri (string-append
1274 "http://ccb.jhu.edu/software/hisat/downloads/hisat-"
1275 version "-beta-source.zip"))
1276 (sha256
1277 (base32
1278 "1k381ydranqxp09yf2y7w1d0chz5d59vb6jchi89hbb0prq19lk5"))))
1279 (build-system gnu-build-system)
1280 (arguments
1281 `(#:tests? #f ;no check target
1282 #:make-flags '("allall"
1283 ;; Disable unsupported `popcnt' instructions on
1284 ;; architectures other than x86_64
1285 ,@(if (string-prefix? "x86_64"
1286 (or (%current-target-system)
1287 (%current-system)))
1288 '()
1289 '("POPCNT_CAPABILITY=0")))
1290 #:phases
1291 (alist-cons-after
1292 'unpack 'patch-sources
1293 (lambda _
1294 ;; XXX Cannot use snippet because zip files are not supported
1295 (substitute* "Makefile"
1296 (("^CC = .*$") "CC = gcc")
1297 (("^CPP = .*$") "CPP = g++")
1298 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
1299 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
1300 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
1301 (substitute* '("hisat-build" "hisat-inspect")
1302 (("/usr/bin/env") (which "env"))))
1303 (alist-replace
1304 'install
1305 (lambda* (#:key outputs #:allow-other-keys)
1306 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
1307 (mkdir-p bin)
1308 (for-each
1309 (lambda (file)
1310 (copy-file file (string-append bin file)))
1311 (find-files
1312 "."
1313 "hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))))
1314 (alist-delete 'configure %standard-phases)))))
1315 (native-inputs
1316 `(("unzip" ,unzip)))
1317 (inputs
1318 `(("perl" ,perl)
1319 ("python" ,python)
1320 ("zlib" ,zlib)))
1321 (home-page "http://ccb.jhu.edu/software/hisat/index.shtml")
1322 (synopsis "Hierarchical indexing for spliced alignment of transcripts")
1323 (description
1324 "HISAT is a fast and sensitive spliced alignment program for mapping
1325 RNA-seq reads. In addition to one global FM index that represents a whole
1326 genome, HISAT uses a large set of small FM indexes that collectively cover the
1327 whole genome. These small indexes (called local indexes) combined with
1328 several alignment strategies enable effective alignment of RNA-seq reads, in
1329 particular, reads spanning multiple exons.")
1330 (license license:gpl3+)))
1331
1332 (define-public hmmer
1333 (package
1334 (name "hmmer")
1335 (version "3.1b2")
1336 (source (origin
1337 (method url-fetch)
1338 (uri (string-append
1339 "http://selab.janelia.org/software/hmmer"
1340 (version-prefix version 1) "/"
1341 version "/hmmer-" version ".tar.gz"))
1342 (sha256
1343 (base32
1344 "0djmgc0pfli0jilfx8hql1axhwhqxqb8rxg2r5rg07aw73sfs5nx"))))
1345 (build-system gnu-build-system)
1346 (native-inputs `(("perl", perl)))
1347 (home-page "http://hmmer.janelia.org")
1348 (synopsis "Biosequence analysis using profile hidden Markov models")
1349 (description
1350 "HMMER is used for searching sequence databases for homologs of protein
1351 sequences, and for making protein sequence alignments. It implements methods
1352 using probabilistic models called profile hidden Markov models (profile
1353 HMMs).")
1354 (license (list license:gpl3+
1355 ;; The bundled library 'easel' is distributed
1356 ;; under The Janelia Farm Software License.
1357 (license:non-copyleft
1358 "file://easel/LICENSE"
1359 "See easel/LICENSE in the distribution.")))))
1360
1361 (define-public htseq
1362 (package
1363 (name "htseq")
1364 (version "0.6.1")
1365 (source (origin
1366 (method url-fetch)
1367 (uri (string-append
1368 "https://pypi.python.org/packages/source/H/HTSeq/HTSeq-"
1369 version ".tar.gz"))
1370 (sha256
1371 (base32
1372 "1i85ppf2j2lj12m0x690qq5nn17xxk23pbbx2c83r8ayb5wngzwv"))))
1373 (build-system python-build-system)
1374 (arguments `(#:python ,python-2)) ; only Python 2 is supported
1375 (inputs
1376 `(("python-numpy" ,python2-numpy)
1377 ("python-setuptools" ,python2-setuptools)))
1378 (home-page "http://www-huber.embl.de/users/anders/HTSeq/")
1379 (synopsis "Analysing high-throughput sequencing data with Python")
1380 (description
1381 "HTSeq is a Python package that provides infrastructure to process data
1382 from high-throughput sequencing assays.")
1383 (license license:gpl3+)))
1384
1385 (define-public htsjdk
1386 (package
1387 (name "htsjdk")
1388 (version "1.129")
1389 (source (origin
1390 (method url-fetch)
1391 (uri (string-append
1392 "https://github.com/samtools/htsjdk/archive/"
1393 version ".tar.gz"))
1394 (file-name (string-append name "-" version ".tar.gz"))
1395 (sha256
1396 (base32
1397 "0asdk9b8jx2ij7yd6apg9qx03li8q7z3ml0qy2r2qczkra79y6fw"))
1398 (modules '((guix build utils)))
1399 ;; remove build dependency on git
1400 (snippet '(substitute* "build.xml"
1401 (("failifexecutionfails=\"true\"")
1402 "failifexecutionfails=\"false\"")))))
1403 (build-system gnu-build-system)
1404 (arguments
1405 `(#:modules ((srfi srfi-1)
1406 (guix build gnu-build-system)
1407 (guix build utils))
1408 #:phases (alist-replace
1409 'build
1410 (lambda _
1411 (setenv "JAVA_HOME" (assoc-ref %build-inputs "jdk"))
1412 (zero? (system* "ant" "all"
1413 (string-append "-Ddist="
1414 (assoc-ref %outputs "out")
1415 "/share/java/htsjdk/"))))
1416 (fold alist-delete %standard-phases
1417 '(configure install check)))))
1418 (native-inputs
1419 `(("ant" ,ant)
1420 ("jdk" ,icedtea6 "jdk")))
1421 (home-page "http://samtools.github.io/htsjdk/")
1422 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
1423 (description
1424 "HTSJDK is an implementation of a unified Java library for accessing
1425 common file formats, such as SAM and VCF, used for high-throughput
1426 sequencing (HTS) data. There are also an number of useful utilities for
1427 manipulating HTS data.")
1428 (license license:expat)))
1429
1430 (define-public htslib
1431 (package
1432 (name "htslib")
1433 (version "1.2.1")
1434 (source (origin
1435 (method url-fetch)
1436 (uri (string-append
1437 "https://github.com/samtools/htslib/releases/download/"
1438 version "/htslib-" version ".tar.bz2"))
1439 (sha256
1440 (base32
1441 "1c32ssscbnjwfw3dra140fq7riarp2x990qxybh34nr1p5r17nxx"))))
1442 (build-system gnu-build-system)
1443 (arguments
1444 `(#:phases
1445 (modify-phases %standard-phases
1446 (add-after
1447 'unpack 'patch-tests
1448 (lambda _
1449 (substitute* "test/test.pl"
1450 (("/bin/bash") (which "bash")))
1451 #t)))))
1452 (inputs
1453 `(("zlib" ,zlib)))
1454 (native-inputs
1455 `(("perl" ,perl)))
1456 (home-page "http://www.htslib.org")
1457 (synopsis "C library for reading/writing high-throughput sequencing data")
1458 (description
1459 "HTSlib is a C library for reading/writing high-throughput sequencing
1460 data. It also provides the bgzip, htsfile, and tabix utilities.")
1461 ;; Files under cram/ are released under the modified BSD license;
1462 ;; the rest is released under the Expat license
1463 (license (list license:expat license:bsd-3))))
1464
1465 (define-public idr
1466 (package
1467 (name "idr")
1468 (version "2.0.0")
1469 (source (origin
1470 (method url-fetch)
1471 (uri (string-append
1472 "https://github.com/nboley/idr/archive/"
1473 version ".tar.gz"))
1474 (file-name (string-append name "-" version ".tar.gz"))
1475 (sha256
1476 (base32
1477 "1k3x44biak00aiv3hpm1yd6nn4hhp7n0qnbs3zh2q9sw7qr1qj5r"))))
1478 (build-system python-build-system)
1479 (arguments
1480 `(#:phases
1481 (modify-phases %standard-phases
1482 (add-after
1483 'install 'wrap-program
1484 (lambda* (#:key inputs outputs #:allow-other-keys)
1485 (let* ((out (assoc-ref outputs "out"))
1486 (python-version (string-take (string-take-right
1487 (assoc-ref inputs "python") 5) 3))
1488 (path (string-join
1489 (map (lambda (name)
1490 (string-append (assoc-ref inputs name)
1491 "/lib/python" python-version
1492 "/site-packages"))
1493 '("python-scipy"
1494 "python-numpy"
1495 "python-matplotlib"))
1496 ":")))
1497 (wrap-program (string-append out "/bin/idr")
1498 `("PYTHONPATH" ":" prefix (,path))))
1499 #t)))))
1500 (inputs
1501 `(("python-scipy" ,python-scipy)
1502 ("python-numpy" ,python-numpy)
1503 ("python-matplotlib" ,python-matplotlib)))
1504 (native-inputs
1505 `(("python-cython" ,python-cython)
1506 ("python-setuptools" ,python-setuptools)))
1507 (home-page "https://github.com/nboley/idr")
1508 (synopsis "Tool to measure the irreproducible discovery rate (IDR)")
1509 (description
1510 "The IDR (Irreproducible Discovery Rate) framework is a unified approach
1511 to measure the reproducibility of findings identified from replicate
1512 experiments and provide highly stable thresholds based on reproducibility.")
1513 (license license:gpl3+)))
1514
1515 (define-public macs
1516 (package
1517 (name "macs")
1518 (version "2.1.0.20140616")
1519 (source (origin
1520 (method url-fetch)
1521 (uri (string-append
1522 "https://pypi.python.org/packages/source/M/MACS2/MACS2-"
1523 version ".tar.gz"))
1524 (sha256
1525 (base32
1526 "11lmiw6avqhwn75sn59g4lfkrr2kk20r3rgfbx9xfqb8rg9mi2n6"))))
1527 (build-system python-build-system)
1528 (arguments
1529 `(#:python ,python-2 ; only compatible with Python 2.7
1530 #:tests? #f)) ; no test target
1531 (inputs
1532 `(("python-numpy" ,python2-numpy)))
1533 (native-inputs
1534 `(("python-setuptools" ,python2-setuptools)))
1535 (home-page "http://github.com/taoliu/MACS/")
1536 (synopsis "Model based analysis for ChIP-Seq data")
1537 (description
1538 "MACS is an implementation of a ChIP-Seq analysis algorithm for
1539 identifying transcript factor binding sites named Model-based Analysis of
1540 ChIP-Seq (MACS). MACS captures the influence of genome complexity to evaluate
1541 the significance of enriched ChIP regions and it improves the spatial
1542 resolution of binding sites through combining the information of both
1543 sequencing tag position and orientation.")
1544 (license license:bsd-3)))
1545
1546
1547 (define-public metabat
1548 (package
1549 (name "metabat")
1550 (version "0.26.1")
1551 (source (origin
1552 (method url-fetch)
1553 (uri (string-append
1554 "https://bitbucket.org/berkeleylab/metabat/get/"
1555 version ".tar.bz2"))
1556 (file-name (string-append name "-" version ".tar.bz2"))
1557 (sha256
1558 (base32
1559 "0vgrhbaxg4dkxyax2kbigak7w0arhqvw0szwp6gd9wmyilc44kfa"))))
1560 (build-system gnu-build-system)
1561 (arguments
1562 `(#:phases
1563 (modify-phases %standard-phases
1564 (add-after 'unpack 'fix-includes
1565 (lambda _
1566 (substitute* "SConstruct"
1567 (("/include/bam/bam.h")
1568 "/include/samtools/bam.h"))
1569 (substitute* "src/BamUtils.h"
1570 (("^#include \"bam/bam\\.h\"")
1571 "#include \"samtools/bam.h\"")
1572 (("^#include \"bam/sam\\.h\"")
1573 "#include \"samtools/sam.h\""))
1574 (substitute* "src/KseqReader.h"
1575 (("^#include \"bam/kseq\\.h\"")
1576 "#include \"samtools/kseq.h\""))
1577 #t))
1578 (add-after 'unpack 'fix-scons
1579 (lambda _
1580 (substitute* "SConstruct" ; Do not distribute README
1581 (("^env\\.Install\\(idir_prefix, 'README\\.md'\\)")
1582 ""))
1583 #t))
1584 (delete 'configure)
1585 (replace 'build
1586 (lambda* (#:key inputs outputs #:allow-other-keys)
1587 (mkdir (assoc-ref outputs "out"))
1588 (zero? (system* "scons"
1589 (string-append
1590 "PREFIX="
1591 (assoc-ref outputs "out"))
1592 (string-append
1593 "HTSLIB_DIR="
1594 (assoc-ref inputs "htslib"))
1595 (string-append
1596 "SAMTOOLS_DIR="
1597 (assoc-ref inputs "samtools"))
1598 (string-append
1599 "BOOST_ROOT="
1600 (assoc-ref inputs "boost"))
1601 "install"))))
1602 ;; check and install carried out during build phase
1603 (delete 'check)
1604 (delete 'install))))
1605 (inputs
1606 `(("zlib" ,zlib)
1607 ("perl" ,perl)
1608 ("samtools" ,samtools)
1609 ("htslib" ,htslib)
1610 ("boost" ,boost)))
1611 (native-inputs
1612 `(("scons" ,scons)))
1613 (home-page "https://bitbucket.org/berkeleylab/metabat")
1614 (synopsis
1615 "Reconstruction of single genomes from complex microbial communities")
1616 (description
1617 "Grouping large genomic fragments assembled from shotgun metagenomic
1618 sequences to deconvolute complex microbial communities, or metagenome binning,
1619 enables the study of individual organisms and their interactions. MetaBAT is
1620 an automated metagenome binning software, which integrates empirical
1621 probabilistic distances of genome abundance and tetranucleotide frequency.")
1622 (license (license:non-copyleft "file://license.txt"
1623 "See license.txt in the distribution."))))
1624
1625 (define-public miso
1626 (package
1627 (name "miso")
1628 (version "0.5.3")
1629 (source (origin
1630 (method url-fetch)
1631 (uri (string-append
1632 "https://pypi.python.org/packages/source/m/misopy/misopy-"
1633 version ".tar.gz"))
1634 (sha256
1635 (base32
1636 "0x446867az8ir0z8c1vjqffkp0ma37wm4sylixnkhgawllzx8v5w"))
1637 (modules '((guix build utils)))
1638 (snippet
1639 '(substitute* "setup.py"
1640 ;; Use setuptools, or else the executables are not
1641 ;; installed.
1642 (("distutils.core") "setuptools")
1643 ;; use "gcc" instead of "cc" for compilation
1644 (("^defines")
1645 "cc.set_executables(
1646 compiler='gcc',
1647 compiler_so='gcc',
1648 linker_exe='gcc',
1649 linker_so='gcc -shared'); defines")))))
1650 (build-system python-build-system)
1651 (arguments
1652 `(#:python ,python-2 ; only Python 2 is supported
1653 #:tests? #f)) ; no "test" target
1654 (inputs
1655 `(("samtools" ,samtools)
1656 ("python-numpy" ,python2-numpy)
1657 ("python-pysam" ,python2-pysam)
1658 ("python-scipy" ,python2-scipy)
1659 ("python-matplotlib" ,python2-matplotlib)))
1660 (native-inputs
1661 `(("python-mock" ,python2-mock) ;for tests
1662 ("python-pytz" ,python2-pytz) ;for tests
1663 ("python-setuptools" ,python2-setuptools)))
1664 (home-page "http://genes.mit.edu/burgelab/miso/index.html")
1665 (synopsis "Mixture of Isoforms model for RNA-Seq isoform quantitation")
1666 (description
1667 "MISO (Mixture-of-Isoforms) is a probabilistic framework that quantitates
1668 the expression level of alternatively spliced genes from RNA-Seq data, and
1669 identifies differentially regulated isoforms or exons across samples. By
1670 modeling the generative process by which reads are produced from isoforms in
1671 RNA-Seq, the MISO model uses Bayesian inference to compute the probability
1672 that a read originated from a particular isoform.")
1673 (license license:gpl2)))
1674
1675 (define-public orfm
1676 (package
1677 (name "orfm")
1678 (version "0.4.1")
1679 (source (origin
1680 (method url-fetch)
1681 (uri (string-append
1682 "https://github.com/wwood/OrfM/releases/download/v"
1683 version "/orfm-" version ".tar.gz"))
1684 (sha256
1685 (base32
1686 "05fmw145snk646ly076zby0fjav0k7ysbclck5d4s9pmgcfpijc2"))))
1687 (build-system gnu-build-system)
1688 (inputs `(("zlib" ,zlib)))
1689 (synopsis "Simple and not slow open reading frame (ORF) caller")
1690 (description
1691 "An ORF caller finds stretches of DNA that when translated are not
1692 interrupted by stop codons. OrfM finds and prints these ORFs.")
1693 (home-page "https://github.com/wwood/OrfM")
1694 (license license:lgpl3+)))
1695
1696 (define-public python2-pbcore
1697 (package
1698 (name "python2-pbcore")
1699 (version "0.9.3")
1700 (source (origin
1701 (method url-fetch)
1702 (uri (string-append
1703 "https://github.com/PacificBiosciences/pbcore/archive/"
1704 version ".tar.gz"))
1705 (file-name (string-append name "-" version ".tar.gz"))
1706 (sha256
1707 (base32
1708 "1z46rwjac93jm87cbj2zgjg6qvsgs65140wkbbxsvxps7ai4pm09"))))
1709 (build-system python-build-system)
1710 (arguments `(#:python ,python-2)) ; pbcore requires Python 2.7
1711 (inputs
1712 `(("python-cython" ,python2-cython)
1713 ("python-numpy" ,python2-numpy)
1714 ("python-pysam" ,python2-pysam)
1715 ("python-h5py" ,python2-h5py)))
1716 (native-inputs
1717 `(("python-setuptools" ,python2-setuptools)))
1718 (home-page "http://pacificbiosciences.github.io/pbcore/")
1719 (synopsis "Library for reading and writing PacBio data files")
1720 (description
1721 "The pbcore package provides Python APIs for interacting with PacBio data
1722 files and writing bioinformatics applications.")
1723 (license license:bsd-3)))
1724
1725 (define-public python2-warpedlmm
1726 (package
1727 (name "python2-warpedlmm")
1728 (version "0.21")
1729 (source
1730 (origin
1731 (method url-fetch)
1732 (uri (string-append
1733 "https://pypi.python.org/packages/source/W/WarpedLMM/WarpedLMM-"
1734 version ".zip"))
1735 (sha256
1736 (base32
1737 "1agfz6zqa8nc6cw47yh0s3y14gkpa9wqazwcj7mwwj3ffnw39p3j"))))
1738 (build-system python-build-system)
1739 (arguments
1740 `(#:python ,python-2 ; requires Python 2.7
1741 #:phases
1742 (modify-phases %standard-phases
1743 (add-after
1744 'install 'remove-bin-directory
1745 (lambda* (#:key outputs #:allow-other-keys)
1746 ;; The "bin" directory only contains wrappers for running
1747 ;; the module tests. They are not needed after the
1748 ;; "check" phase.
1749 (delete-file-recursively
1750 (string-append (assoc-ref outputs "out") "/bin"))
1751 #t)))))
1752 (propagated-inputs
1753 `(("python-scipy" ,python2-scipy)
1754 ("python-numpy" ,python2-numpy)
1755 ("python-matplotlib" ,python2-matplotlib)
1756 ("python-fastlmm" ,python2-fastlmm)
1757 ("python-pandas" ,python2-pandas)
1758 ("python-pysnptools" ,python2-pysnptools)))
1759 (native-inputs
1760 `(("python-setuptools" ,python2-setuptools)
1761 ("python-mock" ,python2-mock)
1762 ("python-nose" ,python2-nose)
1763 ("unzip" ,unzip)))
1764 (home-page "https://github.com/PMBio/warpedLMM")
1765 (synopsis "Implementation of warped linear mixed models")
1766 (description
1767 "WarpedLMM is a Python implementation of the warped linear mixed model,
1768 which automatically learns an optimal warping function (or transformation) for
1769 the phenotype as it models the data.")
1770 (license license:asl2.0)))
1771
1772 (define-public pbtranscript-tofu
1773 (let ((commit "c7bbd5472"))
1774 (package
1775 (name "pbtranscript-tofu")
1776 (version (string-append "0.4.1." commit))
1777 (source (origin
1778 (method git-fetch)
1779 (uri (git-reference
1780 (url "https://github.com/PacificBiosciences/cDNA_primer.git")
1781 (commit commit)))
1782 (file-name (string-append name "-" version ".tar.gz"))
1783 (sha256
1784 (base32
1785 "148xkzi689c49g6fdhckp6mnmj2qhjdf1j4wifm6ja7ij95d7fxx"))))
1786 (build-system python-build-system)
1787 (arguments
1788 `(#:python ,python-2
1789 ;; With standard flags, the install phase attempts to create a zip'd
1790 ;; egg file, and fails with an error: 'ZIP does not support timestamps
1791 ;; before 1980'
1792 #:configure-flags '("--single-version-externally-managed"
1793 "--record=pbtranscript-tofu.txt")
1794 #:phases
1795 (alist-cons-after
1796 'unpack 'enter-directory-and-clean-up
1797 (lambda _
1798 (chdir "pbtranscript-tofu/pbtranscript/")
1799 ;; Delete clutter
1800 (delete-file-recursively "dist/")
1801 (delete-file-recursively "build/")
1802 (delete-file-recursively "setuptools_cython-0.2.1-py2.6.egg/")
1803 (delete-file-recursively "pbtools.pbtranscript.egg-info")
1804 (delete-file "Cython-0.20.1.tar.gz")
1805 (delete-file "setuptools_cython-0.2.1-py2.7.egg")
1806 (delete-file "setuptools_cython-0.2.1.tar.gz")
1807 (delete-file "setup.cfg")
1808 (for-each delete-file
1809 (find-files "." "\\.so$"))
1810 ;; files should be writable for install phase
1811 (for-each (lambda (f) (chmod f #o755))
1812 (find-files "." "\\.py$")))
1813 %standard-phases)))
1814 (inputs
1815 `(("python-cython" ,python2-cython)
1816 ("python-numpy" ,python2-numpy)
1817 ("python-bx-python" ,python2-bx-python)
1818 ("python-networkx" ,python2-networkx)
1819 ("python-scipy" ,python2-scipy)
1820 ("python-pbcore" ,python2-pbcore)))
1821 (native-inputs
1822 `(("python-nose" ,python2-nose)
1823 ("python-setuptools" ,python2-setuptools)))
1824 (home-page "https://github.com/PacificBiosciences/cDNA_primer")
1825 (synopsis "Analyze transcriptome data generated with the Iso-Seq protocol")
1826 (description
1827 "pbtranscript-tofu contains scripts to analyze transcriptome data
1828 generated using the PacBio Iso-Seq protocol.")
1829 (license license:bsd-3))))
1830
1831 (define-public prodigal
1832 (package
1833 (name "prodigal")
1834 (version "2.6.2")
1835 (source (origin
1836 (method url-fetch)
1837 (uri (string-append
1838 "https://github.com/hyattpd/Prodigal/archive/v"
1839 version ".tar.gz"))
1840 (file-name (string-append name "-" version ".tar.gz"))
1841 (sha256
1842 (base32
1843 "0m8sb0fg6lmxrlpzna0am6svbnlmd3dckrhgzxxgb3gxr5fyj284"))))
1844 (build-system gnu-build-system)
1845 (arguments
1846 `(#:tests? #f ;no check target
1847 #:make-flags (list (string-append "INSTALLDIR="
1848 (assoc-ref %outputs "out")
1849 "/bin"))
1850 #:phases
1851 (modify-phases %standard-phases
1852 (delete 'configure))))
1853 (home-page "http://prodigal.ornl.gov")
1854 (synopsis "Protein-coding gene prediction for Archaea and Bacteria")
1855 (description
1856 "Prodigal runs smoothly on finished genomes, draft genomes, and
1857 metagenomes, providing gene predictions in GFF3, Genbank, or Sequin table
1858 format. It runs quickly, in an unsupervised fashion, handles gaps, handles
1859 partial genes, and identifies translation initiation sites.")
1860 (license license:gpl3+)))
1861
1862 (define-public rsem
1863 (package
1864 (name "rsem")
1865 (version "1.2.20")
1866 (source
1867 (origin
1868 (method url-fetch)
1869 (uri
1870 (string-append "http://deweylab.biostat.wisc.edu/rsem/src/rsem-"
1871 version ".tar.gz"))
1872 (sha256
1873 (base32 "0nzdc0j0hjllhsd5f2xli95dafm3nawskigs140xzvjk67xh0r9q"))
1874 (patches (list (search-patch "rsem-makefile.patch")))
1875 (modules '((guix build utils)))
1876 (snippet
1877 '(begin
1878 ;; remove bundled copy of boost
1879 (delete-file-recursively "boost")
1880 #t))))
1881 (build-system gnu-build-system)
1882 (arguments
1883 `(#:tests? #f ;no "check" target
1884 #:phases
1885 (modify-phases %standard-phases
1886 ;; No "configure" script.
1887 ;; Do not build bundled samtools library.
1888 (replace 'configure
1889 (lambda _
1890 (substitute* "Makefile"
1891 (("^all : sam/libbam.a") "all : "))
1892 #t))
1893 (replace 'install
1894 (lambda* (#:key outputs #:allow-other-keys)
1895 (let* ((out (string-append (assoc-ref outputs "out")))
1896 (bin (string-append out "/bin/"))
1897 (perl (string-append out "/lib/perl5/site_perl")))
1898 (mkdir-p bin)
1899 (mkdir-p perl)
1900 (for-each (lambda (file)
1901 (copy-file file
1902 (string-append bin (basename file))))
1903 (find-files "." "rsem-.*"))
1904 (copy-file "rsem_perl_utils.pm"
1905 (string-append perl "/rsem_perl_utils.pm")))
1906 #t))
1907 (add-after
1908 'install 'wrap-program
1909 (lambda* (#:key outputs #:allow-other-keys)
1910 (let ((out (assoc-ref outputs "out")))
1911 (for-each (lambda (prog)
1912 (wrap-program (string-append out "/bin/" prog)
1913 `("PERL5LIB" ":" prefix
1914 (,(string-append out "/lib/perl5/site_perl")))))
1915 '("rsem-plot-transcript-wiggles"
1916 "rsem-calculate-expression"
1917 "rsem-generate-ngvector"
1918 "rsem-run-ebseq"
1919 "rsem-prepare-reference")))
1920 #t)))))
1921 (inputs
1922 `(("boost" ,boost)
1923 ("ncurses" ,ncurses)
1924 ("r" ,r)
1925 ("perl" ,perl)
1926 ("samtools" ,samtools-0.1)
1927 ("zlib" ,zlib)))
1928 (home-page "http://deweylab.biostat.wisc.edu/rsem/")
1929 (synopsis "Estimate gene expression levels from RNA-Seq data")
1930 (description
1931 "RSEM is a software package for estimating gene and isoform expression
1932 levels from RNA-Seq data. The RSEM package provides a user-friendly
1933 interface, supports threads for parallel computation of the EM algorithm,
1934 single-end and paired-end read data, quality scores, variable-length reads and
1935 RSPD estimation. In addition, it provides posterior mean and 95% credibility
1936 interval estimates for expression levels. For visualization, it can generate
1937 BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.")
1938 (license license:gpl3+)))
1939
1940 (define-public rseqc
1941 (package
1942 (name "rseqc")
1943 (version "2.6.1")
1944 (source
1945 (origin
1946 (method url-fetch)
1947 (uri
1948 (string-append "mirror://sourceforge/rseqc/"
1949 version "/RSeQC-" version ".tar.gz"))
1950 (sha256
1951 (base32 "15ly0254yi032qzkdplg00q144qfdsd986gh62829rl5bkxhj330"))
1952 (modules '((guix build utils)))
1953 (snippet
1954 '(begin
1955 ;; remove bundled copy of pysam
1956 (delete-file-recursively "lib/pysam")
1957 (substitute* "setup.py"
1958 ;; remove dependency on outdated "distribute" module
1959 (("^from distribute_setup import use_setuptools") "")
1960 (("^use_setuptools\\(\\)") "")
1961 ;; do not use bundled copy of pysam
1962 (("^have_pysam = False") "have_pysam = True"))))))
1963 (build-system python-build-system)
1964 (arguments `(#:python ,python-2))
1965 (inputs
1966 `(("python-cython" ,python2-cython)
1967 ("python-pysam" ,python2-pysam)
1968 ("python-numpy" ,python2-numpy)
1969 ("python-setuptools" ,python2-setuptools)
1970 ("zlib" ,zlib)))
1971 (native-inputs
1972 `(("python-nose" ,python2-nose)))
1973 (home-page "http://rseqc.sourceforge.net/")
1974 (synopsis "RNA-seq quality control package")
1975 (description
1976 "RSeQC provides a number of modules that can comprehensively evaluate
1977 high throughput sequence data, especially RNA-seq data. Some basic modules
1978 inspect sequence quality, nucleotide composition bias, PCR bias and GC bias,
1979 while RNA-seq specific modules evaluate sequencing saturation, mapped reads
1980 distribution, coverage uniformity, strand specificity, etc.")
1981 (license license:gpl3+)))
1982
1983 (define-public samtools
1984 (package
1985 (name "samtools")
1986 (version "1.2")
1987 (source
1988 (origin
1989 (method url-fetch)
1990 (uri
1991 (string-append "mirror://sourceforge/samtools/"
1992 version "/samtools-" version ".tar.bz2"))
1993 (sha256
1994 (base32
1995 "1akdqb685pk9xk1nb6sa9aq8xssjjhvvc06kp4cpdqvz2157l3j2"))))
1996 (build-system gnu-build-system)
1997 (arguments
1998 `(;; There are 87 test failures when building on non-64-bit architectures
1999 ;; due to invalid test data. This has since been fixed upstream (see
2000 ;; <https://github.com/samtools/samtools/pull/307>), but as there has
2001 ;; not been a new release we disable the tests for all non-64-bit
2002 ;; systems.
2003 #:tests? ,(string=? (or (%current-system) (%current-target-system))
2004 "x86_64-linux")
2005 #:modules ((ice-9 ftw)
2006 (ice-9 regex)
2007 (guix build gnu-build-system)
2008 (guix build utils))
2009 #:make-flags (list "LIBCURSES=-lncurses"
2010 (string-append "prefix=" (assoc-ref %outputs "out")))
2011 #:phases
2012 (alist-cons-after
2013 'unpack
2014 'patch-tests
2015 (lambda* (#:key inputs #:allow-other-keys)
2016 (let ((bash (assoc-ref inputs "bash")))
2017 (substitute* "test/test.pl"
2018 ;; The test script calls out to /bin/bash
2019 (("/bin/bash")
2020 (string-append bash "/bin/bash"))
2021 ;; There are two failing tests upstream relating to the "stats"
2022 ;; subcommand in test_usage_subcommand ("did not have Usage"
2023 ;; and "usage did not mention samtools stats"), so we disable
2024 ;; them.
2025 (("(test_usage_subcommand\\(.*\\);)" cmd)
2026 (string-append "unless ($subcommand eq 'stats') {" cmd "};")))))
2027 (alist-cons-after
2028 'install 'install-library
2029 (lambda* (#:key outputs #:allow-other-keys)
2030 (let ((lib (string-append (assoc-ref outputs "out") "/lib")))
2031 (mkdir-p lib)
2032 (copy-file "libbam.a" (string-append lib "/libbam.a"))))
2033 (alist-cons-after
2034 'install 'install-headers
2035 (lambda* (#:key outputs #:allow-other-keys)
2036 (let ((include (string-append (assoc-ref outputs "out")
2037 "/include/samtools/")))
2038 (mkdir-p include)
2039 (for-each (lambda (file)
2040 (copy-file file (string-append include
2041 (basename file))))
2042 (scandir "." (lambda (name) (string-match "\\.h$" name))))
2043 #t))
2044 (alist-delete 'configure %standard-phases))))))
2045 (native-inputs `(("pkg-config" ,pkg-config)))
2046 (inputs `(("ncurses" ,ncurses)
2047 ("perl" ,perl)
2048 ("python" ,python)
2049 ("zlib" ,zlib)))
2050 (home-page "http://samtools.sourceforge.net")
2051 (synopsis "Utilities to efficiently manipulate nucleotide sequence alignments")
2052 (description
2053 "Samtools implements various utilities for post-processing nucleotide
2054 sequence alignments in the SAM, BAM, and CRAM formats, including indexing,
2055 variant calling (in conjunction with bcftools), and a simple alignment
2056 viewer.")
2057 (license license:expat)))
2058
2059 (define-public samtools-0.1
2060 ;; This is the most recent version of the 0.1 line of samtools. The input
2061 ;; and output formats differ greatly from that used and produced by samtools
2062 ;; 1.x and is still used in many bioinformatics pipelines.
2063 (package (inherit samtools)
2064 (version "0.1.19")
2065 (source
2066 (origin
2067 (method url-fetch)
2068 (uri
2069 (string-append "mirror://sourceforge/samtools/"
2070 version "/samtools-" version ".tar.bz2"))
2071 (sha256
2072 (base32 "1m33xsfwz0s8qi45lylagfllqg7fphf4dr0780rsvw75av9wk06h"))))
2073 (arguments
2074 (substitute-keyword-arguments (package-arguments samtools)
2075 ((#:tests? tests) #f) ;no "check" target
2076 ((#:phases phases)
2077 `(modify-phases ,phases
2078 (replace 'install
2079 (lambda* (#:key outputs #:allow-other-keys)
2080 (let ((bin (string-append
2081 (assoc-ref outputs "out") "/bin")))
2082 (mkdir-p bin)
2083 (copy-file "samtools"
2084 (string-append bin "/samtools")))))
2085 (delete 'patch-tests)))))))
2086
2087 (define-public ngs-sdk
2088 (package
2089 (name "ngs-sdk")
2090 (version "1.1.1")
2091 (source
2092 (origin
2093 (method url-fetch)
2094 (uri
2095 (string-append "https://github.com/ncbi/ngs/archive/"
2096 version ".tar.gz"))
2097 (file-name (string-append name "-" version ".tar.gz"))
2098 (sha256
2099 (base32
2100 "1x58gpm574n0xmk2a98gmikbgycq78ia0bvnb42k5ck34fmd5v8y"))))
2101 (build-system gnu-build-system)
2102 (arguments
2103 `(#:parallel-build? #f ; not supported
2104 #:tests? #f ; no "check" target
2105 #:phases
2106 (alist-replace
2107 'configure
2108 (lambda* (#:key outputs #:allow-other-keys)
2109 (let ((out (assoc-ref outputs "out")))
2110 ;; The 'configure' script doesn't recognize things like
2111 ;; '--enable-fast-install'.
2112 (zero? (system* "./configure"
2113 (string-append "--build-prefix=" (getcwd) "/build")
2114 (string-append "--prefix=" out)))))
2115 (alist-cons-after
2116 'unpack 'enter-dir
2117 (lambda _ (chdir "ngs-sdk") #t)
2118 %standard-phases))))
2119 (native-inputs `(("perl" ,perl)))
2120 (home-page "https://github.com/ncbi/ngs")
2121 (synopsis "API for accessing Next Generation Sequencing data")
2122 (description
2123 "NGS is a domain-specific API for accessing reads, alignments and pileups
2124 produced from Next Generation Sequencing. The API itself is independent from
2125 any particular back-end implementation, and supports use of multiple back-ends
2126 simultaneously.")
2127 (license license:public-domain)))
2128
2129 (define-public ngs-java
2130 (package (inherit ngs-sdk)
2131 (name "ngs-java")
2132 (arguments
2133 `(,@(substitute-keyword-arguments
2134 `(#:modules ((guix build gnu-build-system)
2135 (guix build utils)
2136 (srfi srfi-1)
2137 (srfi srfi-26))
2138 ,@(package-arguments ngs-sdk))
2139 ((#:phases phases)
2140 `(alist-cons-after
2141 'enter-dir 'fix-java-symlink-installation
2142 (lambda _
2143 ;; Only replace the version suffix, not the version number in
2144 ;; the directory name. Reported here:
2145 ;; https://github.com/ncbi/ngs/pull/4
2146 (substitute* "Makefile.java"
2147 (((string-append "\\$\\(subst "
2148 "(\\$\\(VERSION[^\\)]*\\)),"
2149 "(\\$\\([^\\)]+\\)),"
2150 "(\\$\\([^\\)]+\\)|\\$\\@)"
2151 "\\)")
2152 _ pattern replacement target)
2153 (string-append "$(patsubst "
2154 "%" pattern ","
2155 "%" replacement ","
2156 target ")"))))
2157 (alist-replace
2158 'enter-dir (lambda _ (chdir "ngs-java") #t)
2159 ,phases))))))
2160 (inputs
2161 `(("jdk" ,icedtea6 "jdk")
2162 ("ngs-sdk" ,ngs-sdk)))
2163 (synopsis "Java bindings for NGS SDK")))
2164
2165 (define-public ncbi-vdb
2166 (package
2167 (name "ncbi-vdb")
2168 (version "2.4.5-5")
2169 (source
2170 (origin
2171 (method url-fetch)
2172 (uri
2173 (string-append "https://github.com/ncbi/ncbi-vdb/archive/"
2174 version ".tar.gz"))
2175 (file-name (string-append name "-" version ".tar.gz"))
2176 (sha256
2177 (base32
2178 "1cj8nk6if8sqagv20vx36v566fdvhcaadf0x1ycnbgql6chbs6vy"))))
2179 (build-system gnu-build-system)
2180 (arguments
2181 `(#:parallel-build? #f ; not supported
2182 #:tests? #f ; no "check" target
2183 #:phases
2184 (alist-replace
2185 'configure
2186 (lambda* (#:key inputs outputs #:allow-other-keys)
2187 (let ((out (assoc-ref outputs "out")))
2188 ;; Only replace the version suffix, not the version number in the
2189 ;; directory name; fixed in commit 4dbba5c6a809 (no release yet).
2190 (substitute* "setup/konfigure.perl"
2191 (((string-append "\\$\\(subst "
2192 "(\\$\\(VERSION[^\\)]*\\)),"
2193 "(\\$\\([^\\)]+\\)),"
2194 "(\\$\\([^\\)]+\\)|\\$\\@)"
2195 "\\)")
2196 _ pattern replacement target)
2197 (string-append "$(patsubst "
2198 "%" pattern ","
2199 "%" replacement ","
2200 target ")")))
2201
2202 ;; Override include path for libmagic
2203 (substitute* "setup/package.prl"
2204 (("name => 'magic', Include => '/usr/include'")
2205 (string-append "name=> 'magic', Include => '"
2206 (assoc-ref inputs "libmagic")
2207 "/include" "'")))
2208
2209 ;; Install kdf5 library (needed by sra-tools)
2210 (substitute* "build/Makefile.install"
2211 (("LIBRARIES_TO_INSTALL =")
2212 "LIBRARIES_TO_INSTALL = kdf5.$(VERSION_LIBX) kdf5.$(VERSION_SHLX)"))
2213
2214 ;; The 'configure' script doesn't recognize things like
2215 ;; '--enable-fast-install'.
2216 (zero? (system*
2217 "./configure"
2218 (string-append "--build-prefix=" (getcwd) "/build")
2219 (string-append "--prefix=" (assoc-ref outputs "out"))
2220 (string-append "--debug")
2221 (string-append "--with-xml2-prefix="
2222 (assoc-ref inputs "libxml2"))
2223 (string-append "--with-ngs-sdk-prefix="
2224 (assoc-ref inputs "ngs-sdk"))
2225 (string-append "--with-ngs-java-prefix="
2226 (assoc-ref inputs "ngs-java"))
2227 (string-append "--with-hdf5-prefix="
2228 (assoc-ref inputs "hdf5"))))))
2229 (alist-cons-after
2230 'install 'install-interfaces
2231 (lambda* (#:key outputs #:allow-other-keys)
2232 ;; Install interface libraries. On i686 the interface libraries
2233 ;; are installed to "linux/gcc/i386", so we need to use the Linux
2234 ;; architecture name ("i386") instead of the target system prefix
2235 ;; ("i686").
2236 (mkdir (string-append (assoc-ref outputs "out") "/ilib"))
2237 (copy-recursively (string-append "build/ncbi-vdb/linux/gcc/"
2238 ,(system->linux-architecture
2239 (or (%current-target-system)
2240 (%current-system)))
2241 "/rel/ilib")
2242 (string-append (assoc-ref outputs "out")
2243 "/ilib"))
2244 ;; Install interface headers
2245 (copy-recursively "interfaces"
2246 (string-append (assoc-ref outputs "out")
2247 "/include")))
2248 %standard-phases))))
2249 (inputs
2250 `(("libxml2" ,libxml2)
2251 ("ngs-sdk" ,ngs-sdk)
2252 ("ngs-java" ,ngs-java)
2253 ("libmagic" ,file)
2254 ("hdf5" ,hdf5)))
2255 (native-inputs `(("perl" ,perl)))
2256 (home-page "https://github.com/ncbi/ncbi-vdb")
2257 (synopsis "Database engine for genetic information")
2258 (description
2259 "The NCBI-VDB library implements a highly compressed columnar data
2260 warehousing engine that is most often used to store genetic information.
2261 Databases are stored in a portable image within the file system, and can be
2262 accessed/downloaded on demand across HTTP.")
2263 (license license:public-domain)))
2264
2265 (define-public plink
2266 (package
2267 (name "plink")
2268 (version "1.07")
2269 (source
2270 (origin
2271 (method url-fetch)
2272 (uri (string-append
2273 "http://pngu.mgh.harvard.edu/~purcell/plink/dist/plink-"
2274 version "-src.zip"))
2275 (sha256
2276 (base32 "0as8gxm4pjyc8dxmm1sl873rrd7wn5qs0l29nqfnl31x8i467xaa"))
2277 (patches (list (search-patch "plink-1.07-unclobber-i.patch")))))
2278 (build-system gnu-build-system)
2279 (arguments
2280 '(#:tests? #f ;no "check" target
2281 #:make-flags (list (string-append "LIB_LAPACK="
2282 (assoc-ref %build-inputs "lapack")
2283 "/lib/liblapack.so")
2284 "WITH_LAPACK=1"
2285 "FORCE_DYNAMIC=1"
2286 ;; disable phoning home
2287 "WITH_WEBCHECK=")
2288 #:phases
2289 (modify-phases %standard-phases
2290 ;; no "configure" script
2291 (delete 'configure)
2292 (replace 'install
2293 (lambda* (#:key outputs #:allow-other-keys)
2294 (let ((bin (string-append (assoc-ref outputs "out")
2295 "/bin/")))
2296 (mkdir-p bin)
2297 (copy-file "plink" (string-append bin "plink"))
2298 #t))))))
2299 (inputs
2300 `(("zlib" ,zlib)
2301 ("lapack" ,lapack)))
2302 (native-inputs
2303 `(("unzip" ,unzip)))
2304 (home-page "http://pngu.mgh.harvard.edu/~purcell/plink/")
2305 (synopsis "Whole genome association analysis toolset")
2306 (description
2307 "PLINK is a whole genome association analysis toolset, designed to
2308 perform a range of basic, large-scale analyses in a computationally efficient
2309 manner. The focus of PLINK is purely on analysis of genotype/phenotype data,
2310 so there is no support for steps prior to this (e.g. study design and
2311 planning, generating genotype or CNV calls from raw data). Through
2312 integration with gPLINK and Haploview, there is some support for the
2313 subsequent visualization, annotation and storage of results.")
2314 ;; Code is released under GPLv2, except for fisher.h, which is under
2315 ;; LGPLv2.1+
2316 (license (list license:gpl2 license:lgpl2.1+))))
2317
2318 (define-public preseq
2319 (package
2320 (name "preseq")
2321 (version "1.0.2")
2322 (source (origin
2323 (method url-fetch)
2324 (uri
2325 (string-append "http://smithlabresearch.org/downloads/preseq-"
2326 version ".tar.bz2"))
2327 (sha256
2328 (base32 "0r7sw07p6nv8ygvc17gd78lisbw5336v3vhs86b5wv8mw3pwqksc"))
2329 (patches (list (search-patch "preseq-1.0.2-install-to-PREFIX.patch")
2330 (search-patch "preseq-1.0.2-link-with-libbam.patch")))
2331 (modules '((guix build utils)))
2332 (snippet
2333 ;; Remove bundled samtools.
2334 '(delete-file-recursively "preseq-master/samtools"))))
2335 (build-system gnu-build-system)
2336 (arguments
2337 `(#:tests? #f ;no "check" target
2338 #:phases
2339 (modify-phases %standard-phases
2340 (add-after
2341 'unpack 'enter-dir
2342 (lambda _
2343 (chdir "preseq-master")
2344 #t))
2345 (add-after
2346 'enter-dir 'use-samtools-headers
2347 (lambda _
2348 (substitute* '("smithlab_cpp/SAM.cpp"
2349 "smithlab_cpp/SAM.hpp")
2350 (("sam.h") "samtools/sam.h"))
2351 #t))
2352 (delete 'configure))
2353 #:make-flags (list (string-append "PREFIX="
2354 (assoc-ref %outputs "out"))
2355 (string-append "LIBBAM="
2356 (assoc-ref %build-inputs "samtools")
2357 "/lib/libbam.a"))))
2358 (inputs
2359 `(("gsl" ,gsl)
2360 ("samtools" ,samtools-0.1)
2361 ("zlib" ,zlib)))
2362 (home-page "http://smithlabresearch.org/software/preseq/")
2363 (synopsis "Program for analyzing library complexity")
2364 (description
2365 "The preseq package is aimed at predicting and estimating the complexity
2366 of a genomic sequencing library, equivalent to predicting and estimating the
2367 number of redundant reads from a given sequencing depth and how many will be
2368 expected from additional sequencing using an initial sequencing experiment.
2369 The estimates can then be used to examine the utility of further sequencing,
2370 optimize the sequencing depth, or to screen multiple libraries to avoid low
2371 complexity samples.")
2372 (license license:gpl3+)))
2373
2374 (define-public sra-tools
2375 (package
2376 (name "sra-tools")
2377 (version "2.4.5-5")
2378 (source
2379 (origin
2380 (method url-fetch)
2381 (uri
2382 (string-append "https://github.com/ncbi/sra-tools/archive/"
2383 version ".tar.gz"))
2384 (file-name (string-append name "-" version ".tar.gz"))
2385 (sha256
2386 (base32
2387 "11nrnvz7a012f4iryf0wiwrid0h111grsfxbxa9j51h3f2xbvgns"))))
2388 (build-system gnu-build-system)
2389 (arguments
2390 `(#:parallel-build? #f ; not supported
2391 #:tests? #f ; no "check" target
2392 #:phases
2393 (alist-replace
2394 'configure
2395 (lambda* (#:key inputs outputs #:allow-other-keys)
2396 ;; The build system expects a directory containing the sources and
2397 ;; raw build output of ncbi-vdb, including files that are not
2398 ;; installed. Since we are building against an installed version of
2399 ;; ncbi-vdb, the following modifications are needed.
2400 (substitute* "setup/konfigure.perl"
2401 ;; Make the configure script look for the "ilib" directory of
2402 ;; "ncbi-vdb" without first checking for the existence of a
2403 ;; matching library in its "lib" directory.
2404 (("^ my \\$f = File::Spec->catdir\\(\\$libdir, \\$lib\\);")
2405 "my $f = File::Spec->catdir($ilibdir, $ilib);")
2406 ;; Look for interface libraries in ncbi-vdb's "ilib" directory.
2407 (("my \\$ilibdir = File::Spec->catdir\\(\\$builddir, 'ilib'\\);")
2408 "my $ilibdir = File::Spec->catdir($dir, 'ilib');"))
2409
2410 ;; The 'configure' script doesn't recognize things like
2411 ;; '--enable-fast-install'.
2412 (zero? (system*
2413 "./configure"
2414 (string-append "--build-prefix=" (getcwd) "/build")
2415 (string-append "--prefix=" (assoc-ref outputs "out"))
2416 (string-append "--debug")
2417 (string-append "--with-fuse-prefix="
2418 (assoc-ref inputs "fuse"))
2419 (string-append "--with-magic-prefix="
2420 (assoc-ref inputs "libmagic"))
2421 ;; TODO: building with libxml2 fails with linker errors
2422 ;; (string-append "--with-xml2-prefix="
2423 ;; (assoc-ref inputs "libxml2"))
2424 (string-append "--with-ncbi-vdb-sources="
2425 (assoc-ref inputs "ncbi-vdb"))
2426 (string-append "--with-ncbi-vdb-build="
2427 (assoc-ref inputs "ncbi-vdb"))
2428 (string-append "--with-ngs-sdk-prefix="
2429 (assoc-ref inputs "ngs-sdk"))
2430 (string-append "--with-hdf5-prefix="
2431 (assoc-ref inputs "hdf5")))))
2432 %standard-phases)))
2433 (native-inputs `(("perl" ,perl)))
2434 (inputs
2435 `(("ngs-sdk" ,ngs-sdk)
2436 ("ncbi-vdb" ,ncbi-vdb)
2437 ("libmagic" ,file)
2438 ("fuse" ,fuse)
2439 ("hdf5" ,hdf5)
2440 ("zlib" ,zlib)))
2441 (home-page "http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software")
2442 (synopsis "Tools and libraries for reading and writing sequencing data")
2443 (description
2444 "The SRA Toolkit from NCBI is a collection of tools and libraries for
2445 reading of sequencing files from the Sequence Read Archive (SRA) database and
2446 writing files into the .sra format.")
2447 (license license:public-domain)))
2448
2449 (define-public seqan
2450 (package
2451 (name "seqan")
2452 (version "1.4.2")
2453 (source (origin
2454 (method url-fetch)
2455 (uri (string-append "http://packages.seqan.de/seqan-library/"
2456 "seqan-library-" version ".tar.bz2"))
2457 (sha256
2458 (base32
2459 "05s3wrrwn50f81aklfm65i4a749zag1vr8z03k21xm0pdxy47yvp"))))
2460 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
2461 ;; makes sense to split the outputs.
2462 (outputs '("out" "doc"))
2463 (build-system trivial-build-system)
2464 (arguments
2465 `(#:modules ((guix build utils))
2466 #:builder
2467 (begin
2468 (use-modules (guix build utils))
2469 (let ((tar (assoc-ref %build-inputs "tar"))
2470 (bzip (assoc-ref %build-inputs "bzip2"))
2471 (out (assoc-ref %outputs "out"))
2472 (doc (assoc-ref %outputs "doc")))
2473 (setenv "PATH" (string-append tar "/bin:" bzip "/bin"))
2474 (system* "tar" "xvf" (assoc-ref %build-inputs "source"))
2475 (chdir (string-append "seqan-library-" ,version))
2476 (copy-recursively "include" (string-append out "/include"))
2477 (copy-recursively "share" (string-append doc "/share"))))))
2478 (native-inputs
2479 `(("source" ,source)
2480 ("tar" ,tar)
2481 ("bzip2" ,bzip2)))
2482 (home-page "http://www.seqan.de")
2483 (synopsis "Library for nucleotide sequence analysis")
2484 (description
2485 "SeqAn is a C++ library of efficient algorithms and data structures for
2486 the analysis of sequences with the focus on biological data. It contains
2487 algorithms and data structures for string representation and their
2488 manipulation, online and indexed string search, efficient I/O of
2489 bioinformatics file formats, sequence alignment, and more.")
2490 (license license:bsd-3)))
2491
2492 (define-public star
2493 (package
2494 (name "star")
2495 (version "2.4.2a")
2496 (source (origin
2497 (method url-fetch)
2498 (uri (string-append
2499 "https://github.com/alexdobin/STAR/archive/STAR_"
2500 version ".tar.gz"))
2501 (sha256
2502 (base32
2503 "1c3rnm7r5l0kl3d04gl1g7938xqf1c2l0mla87rlplqg1hcns5mc"))
2504 (modules '((guix build utils)))
2505 (snippet
2506 '(substitute* "source/Makefile"
2507 (("/bin/rm") "rm")))))
2508 (build-system gnu-build-system)
2509 (arguments
2510 '(#:tests? #f ;no check target
2511 #:make-flags '("STAR")
2512 #:phases
2513 (alist-cons-after
2514 'unpack 'enter-source-dir (lambda _ (chdir "source"))
2515 (alist-replace
2516 'install
2517 (lambda* (#:key outputs #:allow-other-keys)
2518 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
2519 (mkdir-p bin)
2520 (copy-file "STAR" (string-append bin "STAR"))))
2521 (alist-delete
2522 'configure %standard-phases)))))
2523 (native-inputs
2524 `(("vim" ,vim))) ; for xxd
2525 (inputs
2526 `(("zlib" ,zlib)))
2527 (home-page "https://github.com/alexdobin/STAR")
2528 (synopsis "Universal RNA-seq aligner")
2529 (description
2530 "The Spliced Transcripts Alignment to a Reference (STAR) software is
2531 based on a previously undescribed RNA-seq alignment algorithm that uses
2532 sequential maximum mappable seed search in uncompressed suffix arrays followed
2533 by seed clustering and stitching procedure. In addition to unbiased de novo
2534 detection of canonical junctions, STAR can discover non-canonical splices and
2535 chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
2536 sequences.")
2537 ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
2538 (license license:gpl3+)))
2539
2540 (define-public subread
2541 (package
2542 (name "subread")
2543 (version "1.4.6-p2")
2544 (source (origin
2545 (method url-fetch)
2546 (uri (string-append
2547 "mirror://sourceforge/subread/subread-"
2548 version "-source.tar.gz"))
2549 (sha256
2550 (base32
2551 "06sv9mpcsdj6p68y15d6gi70lca3lxmzk0dn61hg0kfsa7rxmsr3"))))
2552 (build-system gnu-build-system)
2553 (arguments
2554 `(#:tests? #f ;no "check" target
2555 #:make-flags '("-f" "Makefile.Linux")
2556 #:phases
2557 (alist-cons-after
2558 'unpack 'enter-dir
2559 (lambda _ (chdir "src") #t)
2560 (alist-replace
2561 'install
2562 (lambda* (#:key outputs #:allow-other-keys)
2563 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
2564 (mkdir-p bin)
2565 (copy-recursively "../bin" bin)))
2566 ;; no "configure" script
2567 (alist-delete 'configure %standard-phases)))))
2568 (inputs `(("zlib" ,zlib)))
2569 (home-page "http://bioinf.wehi.edu.au/subread-package/")
2570 (synopsis "Tool kit for processing next-gen sequencing data")
2571 (description
2572 "The subread package contains the following tools: subread aligner, a
2573 general-purpose read aligner; subjunc aligner: detecting exon-exon junctions
2574 and mapping RNA-seq reads; featureCounts: counting mapped reads for genomic
2575 features; exactSNP: a SNP caller that discovers SNPs by testing signals
2576 against local background noises.")
2577 (license license:gpl3+)))
2578
2579 (define-public vcftools
2580 (package
2581 (name "vcftools")
2582 (version "0.1.12b")
2583 (source (origin
2584 (method url-fetch)
2585 (uri (string-append
2586 "mirror://sourceforge/vcftools/vcftools_"
2587 version ".tar.gz"))
2588 (sha256
2589 (base32
2590 "148al9h7f8g8my2qdnpax51kdd2yjrivlx6frvakf4lz5r8j88wx"))))
2591 (build-system gnu-build-system)
2592 (arguments
2593 `(#:tests? #f ; no "check" target
2594 #:make-flags (list
2595 "CFLAGS=-O2" ; override "-m64" flag
2596 (string-append "PREFIX=" (assoc-ref %outputs "out"))
2597 (string-append "MANDIR=" (assoc-ref %outputs "out")
2598 "/share/man/man1"))
2599 #:phases
2600 (alist-cons-after
2601 'unpack 'patch-manpage-install
2602 (lambda _
2603 (substitute* "Makefile"
2604 (("cp \\$\\{PREFIX\\}/cpp/vcftools.1") "cp ./cpp/vcftools.1")))
2605 (alist-delete 'configure %standard-phases))))
2606 (inputs
2607 `(("perl" ,perl)
2608 ("zlib" ,zlib)))
2609 (home-page "http://vcftools.sourceforge.net/")
2610 (synopsis "Tools for working with VCF files")
2611 (description
2612 "VCFtools is a program package designed for working with VCF files, such
2613 as those generated by the 1000 Genomes Project. The aim of VCFtools is to
2614 provide easily accessible methods for working with complex genetic variation
2615 data in the form of VCF files.")
2616 ;; The license is declared as LGPLv3 in the README and
2617 ;; at http://vcftools.sourceforge.net/license.html
2618 (license license:lgpl3)))