gnu: Add zita-resampler.
[jackhill/guix/guix.git] / gnu / packages / bioinformatics.scm
CommitLineData
4e10a221 1;;; GNU Guix --- Functional package management for GNU
241e1221 2;;; Copyright © 2014, 2015 Ricardo Wurmus <rekado@elephly.net>
684bf7c7 3;;; Copyright © 2015 Ben Woodcroft <donttrustben@gmail.com>
4e10a221
RW
4;;;
5;;; This file is part of GNU Guix.
6;;;
7;;; GNU Guix is free software; you can redistribute it and/or modify it
8;;; under the terms of the GNU General Public License as published by
9;;; the Free Software Foundation; either version 3 of the License, or (at
10;;; your option) any later version.
11;;;
12;;; GNU Guix is distributed in the hope that it will be useful, but
13;;; WITHOUT ANY WARRANTY; without even the implied warranty of
14;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15;;; GNU General Public License for more details.
16;;;
17;;; You should have received a copy of the GNU General Public License
18;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
19
20(define-module (gnu packages bioinformatics)
21 #:use-module ((guix licenses) #:prefix license:)
22 #:use-module (guix packages)
8e913213 23 #:use-module (guix utils)
4e10a221 24 #:use-module (guix download)
2c16316e 25 #:use-module (guix git-download)
4e10a221 26 #:use-module (guix build-system gnu)
d7678942 27 #:use-module (guix build-system cmake)
365c8153 28 #:use-module (guix build-system perl)
8622a072 29 #:use-module (guix build-system python)
d3517eda 30 #:use-module (guix build-system trivial)
4e10a221 31 #:use-module (gnu packages)
684bf7c7 32 #:use-module (gnu packages algebra)
d3517eda 33 #:use-module (gnu packages base)
e4e5a4d8 34 #:use-module (gnu packages boost)
4e10a221 35 #:use-module (gnu packages compression)
75dd2424 36 #:use-module (gnu packages file)
15a3c3d4 37 #:use-module (gnu packages java)
51c64999 38 #:use-module (gnu packages linux)
36742f43 39 #:use-module (gnu packages machine-learning)
c833ab55 40 #:use-module (gnu packages maths)
4e10a221
RW
41 #:use-module (gnu packages ncurses)
42 #:use-module (gnu packages perl)
43 #:use-module (gnu packages pkg-config)
bfe3c685 44 #:use-module (gnu packages popt)
e4e5a4d8 45 #:use-module (gnu packages protobuf)
346a829a 46 #:use-module (gnu packages python)
c833ab55
RW
47 #:use-module (gnu packages statistics)
48 #:use-module (gnu packages swig)
d7678942 49 #:use-module (gnu packages tbb)
2127cedb 50 #:use-module (gnu packages textutils)
ce7155d5 51 #:use-module (gnu packages vim)
365c8153 52 #:use-module (gnu packages web)
c833ab55 53 #:use-module (gnu packages xml)
346a829a 54 #:use-module (gnu packages zip))
4e10a221 55
9794180d
RW
56(define-public bamtools
57 (package
58 (name "bamtools")
59 (version "2.3.0")
60 (source (origin
61 (method url-fetch)
62 (uri (string-append
63 "https://github.com/pezmaster31/bamtools/archive/v"
64 version ".tar.gz"))
65 (file-name (string-append name "-" version ".tar.gz"))
66 (sha256
67 (base32
68 "1brry29bw2xr2l9pqn240rkqwayg85b8qq78zk2zs6nlspk4d018"))))
69 (build-system cmake-build-system)
4702cec2
RW
70 (arguments
71 `(#:tests? #f ;no "check" target
72 #:phases
73 (modify-phases %standard-phases
74 (add-before
75 'configure 'set-ldflags
76 (lambda* (#:key outputs #:allow-other-keys)
77 (setenv "LDFLAGS"
78 (string-append
79 "-Wl,-rpath="
80 (assoc-ref outputs "out") "/lib/bamtools")))))))
9794180d
RW
81 (inputs `(("zlib" ,zlib)))
82 (home-page "https://github.com/pezmaster31/bamtools")
83 (synopsis "C++ API and command-line toolkit for working with BAM data")
84 (description
85 "BamTools provides both a C++ API and a command-line toolkit for handling
86BAM files.")
87 (license license:expat)))
88
8dd4ff11
RW
89(define-public bedops
90 (package
91 (name "bedops")
1bbc3b1d 92 (version "2.4.14")
8dd4ff11
RW
93 (source (origin
94 (method url-fetch)
95 (uri (string-append "https://github.com/bedops/bedops/archive/v"
96 version ".tar.gz"))
f586c877 97 (file-name (string-append name "-" version ".tar.gz"))
8dd4ff11
RW
98 (sha256
99 (base32
1bbc3b1d 100 "1kqbac547wyqma81cyky9n7mkgikjpsfd3nnmcm6hpqwanqgh10v"))))
8dd4ff11
RW
101 (build-system gnu-build-system)
102 (arguments
103 '(#:tests? #f
104 #:make-flags (list (string-append "BINDIR=" %output "/bin"))
105 #:phases
106 (alist-cons-after
107 'unpack 'unpack-tarballs
108 (lambda _
109 ;; FIXME: Bedops includes tarballs of minimally patched upstream
110 ;; libraries jansson, zlib, and bzip2. We cannot just use stock
111 ;; libraries because at least one of the libraries (zlib) is
112 ;; patched to add a C++ function definition (deflateInit2cpp).
113 ;; Until the Bedops developers offer a way to link against system
114 ;; libraries we have to build the in-tree copies of these three
115 ;; libraries.
116
117 ;; See upstream discussion:
118 ;; https://github.com/bedops/bedops/issues/124
119
120 ;; Unpack the tarballs to benefit from shebang patching.
121 (with-directory-excursion "third-party"
122 (and (zero? (system* "tar" "xvf" "jansson-2.6.tar.bz2"))
123 (zero? (system* "tar" "xvf" "zlib-1.2.7.tar.bz2"))
124 (zero? (system* "tar" "xvf" "bzip2-1.0.6.tar.bz2"))))
125 ;; Disable unpacking of tarballs in Makefile.
126 (substitute* "system.mk/Makefile.linux"
127 (("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
128 (("\\./configure") "CONFIG_SHELL=bash ./configure"))
129 (substitute* "third-party/zlib-1.2.7/Makefile.in"
130 (("^SHELL=.*$") "SHELL=bash\n")))
131 (alist-delete 'configure %standard-phases))))
132 (home-page "https://github.com/bedops/bedops")
133 (synopsis "Tools for high-performance genomic feature operations")
134 (description
135 "BEDOPS is a suite of tools to address common questions raised in genomic
136studies---mostly with regard to overlap and proximity relationships between
137data sets. It aims to be scalable and flexible, facilitating the efficient
138and accurate analysis and management of large-scale genomic data.
139
140BEDOPS provides tools that perform highly efficient and scalable Boolean and
141other set operations, statistical calculations, archiving, conversion and
142other management of genomic data of arbitrary scale. Tasks can be easily
143split by chromosome for distributing whole-genome analyses across a
144computational cluster.")
145 (license license:gpl2+)))
146
81de5647
RW
147(define-public bedtools
148 (package
149 (name "bedtools")
150 (version "2.22.0")
151 (source (origin
152 (method url-fetch)
153 (uri (string-append "https://github.com/arq5x/bedtools2/archive/v"
154 version ".tar.gz"))
f586c877 155 (file-name (string-append name "-" version ".tar.gz"))
81de5647
RW
156 (sha256
157 (base32
158 "16aq0w3dmbd0853j32xk9jin4vb6v6fgakfyvrsmsjizzbn3fpfl"))))
159 (build-system gnu-build-system)
160 (native-inputs `(("python" ,python-2)))
161 (inputs `(("samtools" ,samtools)
162 ("zlib" ,zlib)))
163 (arguments
164 '(#:test-target "test"
165 #:phases
166 (alist-cons-after
167 'unpack 'patch-makefile-SHELL-definition
168 (lambda _
169 ;; patch-makefile-SHELL cannot be used here as it does not
170 ;; yet patch definitions with `:='. Since changes to
171 ;; patch-makefile-SHELL result in a full rebuild, features
172 ;; of patch-makefile-SHELL are reimplemented here.
173 (substitute* "Makefile"
174 (("^SHELL := .*$") (string-append "SHELL := " (which "bash") " -e \n"))))
175 (alist-delete
176 'configure
177 (alist-replace
178 'install
179 (lambda* (#:key outputs #:allow-other-keys)
180 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
181 (mkdir-p bin)
182 (for-each (lambda (file)
183 (copy-file file (string-append bin (basename file))))
184 (find-files "bin" ".*"))))
185 %standard-phases)))))
186 (home-page "https://github.com/arq5x/bedtools2")
187 (synopsis "Tools for genome analysis and arithmetic")
188 (description
189 "Collectively, the bedtools utilities are a swiss-army knife of tools for
190a wide-range of genomics analysis tasks. The most widely-used tools enable
191genome arithmetic: that is, set theory on the genome. For example, bedtools
192allows one to intersect, merge, count, complement, and shuffle genomic
193intervals from multiple files in widely-used genomic file formats such as BAM,
194BED, GFF/GTF, VCF.")
195 (license license:gpl2)))
196
a2fb1492
RW
197(define-public python2-pybedtools
198 (package
199 (name "python2-pybedtools")
200 (version "0.6.9")
201 (source (origin
202 (method url-fetch)
203 (uri (string-append
204 "https://pypi.python.org/packages/source/p/pybedtools/pybedtools-"
205 version ".tar.gz"))
206 (sha256
207 (base32
208 "1ldzdxw1p4y3g2ignmggsdypvqkcwqwzhdha4rbgpih048z5p4an"))))
209 (build-system python-build-system)
210 (arguments `(#:python ,python-2)) ; no Python 3 support
211 (inputs
212 `(("python-cython" ,python2-cython)
213 ("python-matplotlib" ,python2-matplotlib)))
214 (propagated-inputs
215 `(("bedtools" ,bedtools)
216 ("samtools" ,samtools)))
217 (native-inputs
218 `(("python-pyyaml" ,python2-pyyaml)
219 ("python-nose" ,python2-nose)
220 ("python-setuptools" ,python2-setuptools)))
221 (home-page "https://pythonhosted.org/pybedtools/")
222 (synopsis "Python wrapper for BEDtools programs")
223 (description
224 "pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
225which are widely used for genomic interval manipulation or \"genome algebra\".
226pybedtools extends BEDTools by offering feature-level manipulations from with
227Python.")
228 (license license:gpl2+)))
229
85c37e29
RW
230(define-public python-biopython
231 (package
232 (name "python-biopython")
233 (version "1.65")
234 (source (origin
235 (method url-fetch)
236 (uri (string-append
237 "http://biopython.org/DIST/biopython-"
238 version ".tar.gz"))
239 (sha256
240 (base32
241 "13m8s9jkrw40zvdp1rl709n6lmgdh4f52aann7gzr6sfp0fwhg26"))))
242 (build-system python-build-system)
243 (inputs
244 `(("python-numpy" ,python-numpy)))
245 (native-inputs
246 `(("python-setuptools" ,python2-setuptools)))
247 (home-page "http://biopython.org/")
248 (synopsis "Tools for biological computation in Python")
249 (description
250 "Biopython is a set of tools for biological computation including parsers
251for bioinformatics files into Python data structures; interfaces to common
252bioinformatics programs; a standard sequence class and tools for performing
253common operations on them; code to perform data classification; code for
254dealing with alignments; code making it easy to split up parallelizable tasks
255into separate processes; and more.")
256 (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
257
258(define-public python2-biopython
259 (package (inherit (package-with-python2 python-biopython))
260 (inputs
261 `(("python2-numpy" ,python2-numpy)))))
262
2c7ee167
RW
263(define-public bowtie
264 (package
265 (name "bowtie")
266 (version "2.2.4")
267 (source (origin
268 (method url-fetch)
269 (uri (string-append "https://github.com/BenLangmead/bowtie2/archive/v"
270 version ".tar.gz"))
f586c877 271 (file-name (string-append name "-" version ".tar.gz"))
2c7ee167
RW
272 (sha256
273 (base32
274 "15dnbqippwvhyh9zqjhaxkabk7lm1xbh1nvar1x4b5kwm117zijn"))
275 (modules '((guix build utils)))
276 (snippet
277 '(substitute* "Makefile"
278 (("^CC = .*$") "CC = gcc")
279 (("^CPP = .*$") "CPP = g++")
280 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
281 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
241e1221
RW
282 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))
283 (patches (list (search-patch "bowtie-fix-makefile.patch")))))
2c7ee167
RW
284 (build-system gnu-build-system)
285 (inputs `(("perl" ,perl)
286 ("perl-clone" ,perl-clone)
287 ("perl-test-deep" ,perl-test-deep)
288 ("perl-test-simple" ,perl-test-simple)
289 ("python" ,python-2)))
290 (arguments
291 '(#:make-flags '("allall")
292 #:phases
293 (alist-delete
294 'configure
295 (alist-replace
296 'install
297 (lambda* (#:key outputs #:allow-other-keys)
298 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
299 (mkdir-p bin)
300 (for-each (lambda (file)
301 (copy-file file (string-append bin file)))
302 (find-files "." "bowtie2.*"))))
303 (alist-replace
304 'check
305 (lambda* (#:key outputs #:allow-other-keys)
306 (system* "perl"
307 "scripts/test/simple_tests.pl"
308 "--bowtie2=./bowtie2"
309 "--bowtie2-build=./bowtie2-build"))
310 %standard-phases)))))
311 (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
312 (synopsis "Fast and sensitive nucleotide sequence read aligner")
313 (description
314 "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
315reads to long reference sequences. It is particularly good at aligning reads
316of about 50 up to 100s or 1,000s of characters, and particularly good at
317aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
318genome with an FM Index to keep its memory footprint small: for the human
319genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
320gapped, local, and paired-end alignment modes.")
241e1221 321 (supported-systems '("x86_64-linux"))
2c7ee167
RW
322 (license license:gpl3+)))
323
9a8336d8
RW
324(define-public bwa
325 (package
326 (name "bwa")
327 (version "0.7.12")
328 (source (origin
329 (method url-fetch)
330 (uri (string-append "mirror://sourceforge/bio-bwa/bwa-"
331 version ".tar.bz2"))
332 (sha256
333 (base32
334 "1330dpqncv0px3pbhjzz1gwgg39kkcv2r9qp2xs0sixf8z8wl7bh"))))
335 (build-system gnu-build-system)
336 (arguments
337 '(#:tests? #f ;no "check" target
338 #:phases
339 (alist-replace
340 'install
341 (lambda* (#:key outputs #:allow-other-keys)
342 (let ((bin (string-append
343 (assoc-ref outputs "out") "/bin"))
344 (doc (string-append
345 (assoc-ref outputs "out") "/share/doc/bwa"))
346 (man (string-append
347 (assoc-ref outputs "out") "/share/man/man1")))
348 (mkdir-p bin)
349 (mkdir-p doc)
350 (mkdir-p man)
351 (copy-file "bwa" (string-append bin "/bwa"))
352 (copy-file "README.md" (string-append doc "/README.md"))
353 (copy-file "bwa.1" (string-append man "/bwa.1"))))
354 ;; no "configure" script
355 (alist-delete 'configure %standard-phases))))
356 (inputs `(("zlib" ,zlib)))
357 (home-page "http://bio-bwa.sourceforge.net/")
358 (synopsis "Burrows-Wheeler sequence aligner")
359 (description
360 "BWA is a software package for mapping low-divergent sequences against a
361large reference genome, such as the human genome. It consists of three
362algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
363designed for Illumina sequence reads up to 100bp, while the rest two for
364longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
365features such as long-read support and split alignment, but BWA-MEM, which is
366the latest, is generally recommended for high-quality queries as it is faster
367and more accurate. BWA-MEM also has better performance than BWA-backtrack for
36870-100bp Illumina reads.")
369 (license license:gpl3+)))
370
ad641d53
RW
371(define-public python2-bx-python
372 (package
373 (name "python2-bx-python")
374 (version "0.7.2")
375 (source (origin
376 (method url-fetch)
377 (uri (string-append
378 "https://pypi.python.org/packages/source/b/bx-python/bx-python-"
379 version ".tar.gz"))
380 (sha256
381 (base32
382 "0ld49idhc5zjdvbhvjq1a2qmpjj7h5v58rqr25dzmfq7g34b50xh"))
383 (modules '((guix build utils)))
384 (snippet
385 '(substitute* "setup.py"
386 ;; remove dependency on outdated "distribute" module
387 (("^from distribute_setup import use_setuptools") "")
388 (("^use_setuptools\\(\\)") "")))))
389 (build-system python-build-system)
390 (arguments
391 `(#:tests? #f ;tests fail because test data are not included
392 #:python ,python-2))
393 (inputs
394 `(("python-numpy" ,python2-numpy)
395 ("zlib" ,zlib)))
396 (native-inputs
397 `(("python-nose" ,python2-nose)
398 ("python-setuptools" ,python2-setuptools)))
399 (home-page "http://bitbucket.org/james_taylor/bx-python/")
400 (synopsis "Tools for manipulating biological data")
401 (description
402 "bx-python provides tools for manipulating biological data, particularly
403multiple sequence alignments.")
404 (license license:expat)))
405
810cff85
RW
406(define-public clipper
407 (package
408 (name "clipper")
409 (version "0.3.0")
410 (source (origin
411 (method url-fetch)
412 (uri (string-append
413 "https://github.com/YeoLab/clipper/archive/"
414 version ".tar.gz"))
415 (sha256
416 (base32
417 "1q7jpimsqln7ic44i8v2rx2haj5wvik8hc1s2syd31zcn0xk1iyq"))
418 (modules '((guix build utils)))
419 (snippet
420 ;; remove unnecessary setup dependency
421 '(substitute* "setup.py"
422 (("setup_requires = .*") "")))))
423 (build-system python-build-system)
424 (arguments `(#:python ,python-2)) ; only Python 2 is supported
425 (inputs
426 `(("htseq" ,htseq)
427 ("python-pybedtools" ,python2-pybedtools)
428 ("python-cython" ,python2-cython)
429 ("python-scikit-learn" ,python2-scikit-learn)
430 ("python-matplotlib" ,python2-matplotlib)
431 ("python-pysam" ,python2-pysam)
432 ("python-numpy" ,python2-numpy)
433 ("python-scipy" ,python2-scipy)))
434 (native-inputs
435 `(("python-mock" ,python2-mock) ; for tests
436 ("python-pytz" ,python2-pytz) ; for tests
437 ("python-setuptools" ,python2-setuptools)))
438 (home-page "https://github.com/YeoLab/clipper")
439 (synopsis "CLIP peak enrichment recognition")
440 (description
441 "CLIPper is a tool to define peaks in CLIP-seq datasets.")
442 (license license:gpl2)))
443
36742f43
RW
444(define-public couger
445 (package
446 (name "couger")
447 (version "1.8.2")
448 (source (origin
449 (method url-fetch)
450 (uri (string-append
451 "http://couger.oit.duke.edu/static/assets/COUGER"
452 version ".zip"))
453 (sha256
454 (base32
455 "04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq"))))
456 (build-system gnu-build-system)
457 (arguments
458 `(#:tests? #f
459 #:phases
460 (modify-phases %standard-phases
461 (delete 'configure)
462 (delete 'build)
463 (replace
464 'install
465 (lambda* (#:key outputs #:allow-other-keys)
466 (let ((out (assoc-ref outputs "out")))
467 (copy-recursively "src" (string-append out "/src"))
468 (mkdir (string-append out "/bin"))
469 ;; Add "src" directory to module lookup path.
470 (substitute* "couger"
471 (("from argparse")
472 (string-append "import sys\nsys.path.append(\""
473 out "\")\nfrom argparse")))
474 (copy-file "couger" (string-append out "/bin/couger")))
475 #t))
476 (add-after
477 'install 'wrap-program
478 (lambda* (#:key inputs outputs #:allow-other-keys)
479 ;; Make sure 'couger' runs with the correct PYTHONPATH.
480 (let* ((out (assoc-ref outputs "out"))
481 (path (getenv "PYTHONPATH")))
482 (wrap-program (string-append out "/bin/couger")
483 `("PYTHONPATH" ":" prefix (,path))))
484 #t)))))
485 (inputs
486 `(("python" ,python-2)
487 ("python2-pillow" ,python2-pillow)
488 ("python2-numpy" ,python2-numpy)
489 ("python2-scipy" ,python2-scipy)
490 ("python2-matplotlib" ,python2-matplotlib)))
491 (propagated-inputs
492 `(("r" ,r)
493 ("libsvm" ,libsvm)
494 ("randomjungle" ,randomjungle)))
495 (native-inputs
496 `(("unzip" ,unzip)))
497 (home-page "http://couger.oit.duke.edu")
498 (synopsis "Identify co-factors in sets of genomic regions")
499 (description
500 "COUGER can be applied to any two sets of genomic regions bound by
501paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify
502putative co-factors that provide specificity to each TF. The framework
503determines the genomic targets uniquely-bound by each TF, and identifies a
504small set of co-factors that best explain the in vivo binding differences
505between the two TFs.
506
507COUGER uses classification algorithms (support vector machines and random
508forests) with features that reflect the DNA binding specificities of putative
509co-factors. The features are generated either from high-throughput TF-DNA
510binding data (from protein binding microarray experiments), or from large
511collections of DNA motifs.")
512 (license license:gpl3+)))
513
bfe3c685
RW
514(define-public clustal-omega
515 (package
516 (name "clustal-omega")
517 (version "1.2.1")
518 (source (origin
519 (method url-fetch)
520 (uri (string-append
521 "http://www.clustal.org/omega/clustal-omega-"
522 version ".tar.gz"))
523 (sha256
524 (base32
525 "02ibkx0m0iwz8nscg998bh41gg251y56cgh86bvyrii5m8kjgwqf"))))
526 (build-system gnu-build-system)
527 (inputs
528 `(("argtable" ,argtable)))
529 (home-page "http://www.clustal.org/omega/")
530 (synopsis "Multiple sequence aligner for protein and DNA/RNA")
531 (description
532 "Clustal-Omega is a general purpose multiple sequence alignment (MSA)
533program for protein and DNA/RNA. It produces high quality MSAs and is capable
534of handling data-sets of hundreds of thousands of sequences in reasonable
535time.")
536 (license license:gpl2+)))
537
191c7101
RW
538(define-public crossmap
539 (package
540 (name "crossmap")
541 (version "0.1.6")
542 (source (origin
543 (method url-fetch)
544 (uri (string-append "mirror://sourceforge/crossmap/CrossMap-"
545 version ".tar.gz"))
546 (sha256
547 (base32
548 "163hi5gjgij6cndxlvbkp5jjwr0k4wbm9im6d2210278q7k9kpnp"))
549 ;; patch has been sent upstream already
550 (patches (list
551 (search-patch "crossmap-allow-system-pysam.patch")))
552 (modules '((guix build utils)))
553 ;; remove bundled copy of pysam
554 (snippet
555 '(delete-file-recursively "lib/pysam"))))
556 (build-system python-build-system)
557 (arguments
558 `(#:python ,python-2
559 #:phases
560 (alist-cons-after
561 'unpack 'set-env
562 (lambda _ (setenv "CROSSMAP_USE_SYSTEM_PYSAM" "1"))
563 %standard-phases)))
564 (inputs
565 `(("python-numpy" ,python2-numpy)
566 ("python-pysam" ,python2-pysam)
567 ("zlib" ,zlib)))
568 (native-inputs
569 `(("python-cython" ,python2-cython)
570 ("python-nose" ,python2-nose)
571 ("python-setuptools" ,python2-setuptools)))
572 (home-page "http://crossmap.sourceforge.net/")
573 (synopsis "Convert genome coordinates between assemblies")
574 (description
575 "CrossMap is a program for conversion of genome coordinates or annotation
576files between different genome assemblies. It supports most commonly used
577file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
578 (license license:gpl2+)))
579
8e913213
RW
580(define-public cutadapt
581 (package
582 (name "cutadapt")
583 (version "1.8")
584 (source (origin
585 (method url-fetch)
586 (uri (string-append
587 "https://github.com/marcelm/cutadapt/archive/v"
588 version ".tar.gz"))
589 (file-name (string-append name "-" version ".tar.gz"))
590 (sha256
591 (base32
592 "161bp87y6gd6r5bmvjpn2b1k942i3fizfpa139f0jn6jv1wcp5h5"))))
593 (build-system python-build-system)
594 (arguments
595 ;; tests must be run after install
596 `(#:phases (alist-cons-after
597 'install 'check
598 (lambda* (#:key inputs outputs #:allow-other-keys)
599 (setenv "PYTHONPATH"
600 (string-append
601 (getenv "PYTHONPATH")
602 ":" (assoc-ref outputs "out")
603 "/lib/python"
604 (string-take (string-take-right
605 (assoc-ref inputs "python") 5) 3)
606 "/site-packages"))
607 (zero? (system* "nosetests" "-P" "tests")))
608 (alist-delete 'check %standard-phases))))
609 (native-inputs
610 `(("python-cython" ,python-cython)
611 ("python-nose" ,python-nose)
612 ("python-setuptools" ,python-setuptools)))
613 (home-page "https://code.google.com/p/cutadapt/")
614 (synopsis "Remove adapter sequences from nucleotide sequencing reads")
615 (description
616 "Cutadapt finds and removes adapter sequences, primers, poly-A tails and
617other types of unwanted sequence from high-throughput sequencing reads.")
618 (license license:expat)))
619
684bf7c7
BW
620(define-public diamond
621 (package
622 (name "diamond")
623 (version "0.7.9")
624 (source (origin
625 (method url-fetch)
626 (uri (string-append
627 "https://github.com/bbuchfink/diamond/archive/v"
628 version ".tar.gz"))
629 (file-name (string-append name "-" version ".tar.gz"))
630 (sha256
631 (base32
632 "0hfkcfv9f76h5brbyw9fyvmc0l9cmbsxrcdqk0fa9xv82zj47p15"))
633 (snippet '(begin
634 (delete-file "bin/diamond")
635 #t))))
636 (build-system gnu-build-system)
637 (arguments
638 '(#:tests? #f ;no "check" target
639 #:phases
640 (modify-phases %standard-phases
641 (add-after 'unpack 'enter-source-dir
642 (lambda _
643 (chdir "src")
644 #t))
645 (delete 'configure)
646 (replace 'install
647 (lambda* (#:key outputs #:allow-other-keys)
648 (let ((bin (string-append (assoc-ref outputs "out")
649 "/bin")))
650 (mkdir-p bin)
651 (copy-file "../bin/diamond"
652 (string-append bin "/diamond"))
653 #t))))))
654 (native-inputs
655 `(("bc" ,bc)))
656 (inputs
657 `(("boost" ,boost)
658 ("zlib" ,zlib)))
659 (home-page "https://github.com/bbuchfink/diamond")
660 (synopsis "Accelerated BLAST compatible local sequence aligner")
661 (description
662 "DIAMOND is a BLAST-compatible local aligner for mapping protein and
663translated DNA query sequences against a protein reference database (BLASTP
664and BLASTX alignment mode). The speedup over BLAST is up to 20,000 on short
665reads at a typical sensitivity of 90-99% relative to BLAST depending on the
666data and settings.")
667 (license (license:non-copyleft "file://src/COPYING"
668 "See src/COPYING in the distribution."))))
669
365c8153
RW
670(define-public edirect
671 (package
672 (name "edirect")
673 (version "2.50")
674 (source (origin
675 (method url-fetch)
676 ;; Note: older versions are not retained.
677 (uri "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/edirect.zip")
678 (sha256
679 (base32
680 "08afhz2ph66h8h381hl1mqyxkdi5nbvzsyj9gfw3jfbdijnpi4qj"))))
681 (build-system perl-build-system)
682 (arguments
683 `(#:tests? #f ;no "check" target
684 #:phases
685 (modify-phases %standard-phases
686 (delete 'configure)
687 (delete 'build)
688 (replace 'install
689 (lambda* (#:key outputs #:allow-other-keys)
690 (let ((target (string-append (assoc-ref outputs "out")
691 "/bin")))
692 (mkdir-p target)
693 (copy-file "edirect.pl"
694 (string-append target "/edirect.pl"))
695 #t)))
696 (add-after
697 'install 'wrap-program
698 (lambda* (#:key inputs outputs #:allow-other-keys)
699 ;; Make sure 'edirect.pl' finds all perl inputs at runtime.
700 (let* ((out (assoc-ref outputs "out"))
701 (path (getenv "PERL5LIB")))
702 (wrap-program (string-append out "/bin/edirect.pl")
703 `("PERL5LIB" ":" prefix (,path)))))))))
704 (inputs
705 `(("perl-html-parser" ,perl-html-parser)
706 ("perl-encode-locale" ,perl-encode-locale)
707 ("perl-file-listing" ,perl-file-listing)
708 ("perl-html-tagset" ,perl-html-tagset)
709 ("perl-html-tree" ,perl-html-tree)
710 ("perl-http-cookies" ,perl-http-cookies)
711 ("perl-http-date" ,perl-http-date)
712 ("perl-http-message" ,perl-http-message)
713 ("perl-http-negotiate" ,perl-http-negotiate)
714 ("perl-lwp-mediatypes" ,perl-lwp-mediatypes)
715 ("perl-lwp-protocol-https" ,perl-lwp-protocol-https)
716 ("perl-net-http" ,perl-net-http)
717 ("perl-uri" ,perl-uri)
718 ("perl-www-robotrules" ,perl-www-robotrules)
719 ("perl" ,perl)))
720 (native-inputs
721 `(("unzip" ,unzip)))
722 (home-page "http://www.ncbi.nlm.nih.gov/books/NBK179288")
723 (synopsis "Tools for accessing the NCBI's set of databases")
724 (description
725 "Entrez Direct (EDirect) is a method for accessing the National Center
726for Biotechnology Information's (NCBI) set of interconnected
727databases (publication, sequence, structure, gene, variation, expression,
728etc.) from a terminal. Functions take search terms from command-line
729arguments. Individual operations are combined to build multi-step queries.
730Record retrieval and formatting normally complete the process.
731
732EDirect also provides an argument-driven function that simplifies the
733extraction of data from document summaries or other results that are returned
734in structured XML format. This can eliminate the need for writing custom
735software to answer ad hoc questions.")
736 (license license:public-domain)))
737
e4e5a4d8
RW
738(define-public express
739 (package
740 (name "express")
741 (version "1.5.1")
742 (source (origin
743 (method url-fetch)
744 (uri
745 (string-append
746 "http://bio.math.berkeley.edu/eXpress/downloads/express-"
747 version "/express-" version "-src.tgz"))
748 (sha256
749 (base32
750 "03rczxd0gjp2l1jxcmjfmf5j94j77zqyxa6x063zsc585nj40n0c"))))
751 (build-system cmake-build-system)
752 (arguments
753 `(#:tests? #f ;no "check" target
754 #:phases
755 (alist-cons-after
756 'unpack 'use-shared-boost-libs-and-set-bamtools-paths
757 (lambda* (#:key inputs #:allow-other-keys)
758 (substitute* "CMakeLists.txt"
759 (("set\\(Boost_USE_STATIC_LIBS ON\\)")
760 "set(Boost_USE_STATIC_LIBS OFF)")
761 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
762 (string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
763 (substitute* "src/CMakeLists.txt"
764 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
765 (string-append (assoc-ref inputs "bamtools") "/lib/bamtools")))
766 #t)
767 %standard-phases)))
768 (inputs
769 `(("boost" ,boost)
770 ("bamtools" ,bamtools)
771 ("protobuf" ,protobuf)
772 ("zlib" ,zlib)))
773 (home-page "http://bio.math.berkeley.edu/eXpress")
774 (synopsis "Streaming quantification for high-throughput genomic sequencing")
775 (description
776 "eXpress is a streaming tool for quantifying the abundances of a set of
777target sequences from sampled subsequences. Example applications include
778transcript-level RNA-Seq quantification, allele-specific/haplotype expression
779analysis (from RNA-Seq), transcription factor binding quantification in
780ChIP-Seq, and analysis of metagenomic data.")
781 (license license:artistic2.0)))
782
2127cedb
RW
783(define-public fastx-toolkit
784 (package
785 (name "fastx-toolkit")
786 (version "0.0.14")
787 (source (origin
788 (method url-fetch)
789 (uri
790 (string-append
791 "https://github.com/agordon/fastx_toolkit/releases/download/"
792 version "/fastx_toolkit-" version ".tar.bz2"))
793 (sha256
794 (base32
795 "01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy"))))
796 (build-system gnu-build-system)
797 (inputs
798 `(("libgtextutils" ,libgtextutils)))
799 (native-inputs
800 `(("pkg-config" ,pkg-config)))
801 (home-page "http://hannonlab.cshl.edu/fastx_toolkit/")
802 (synopsis "Tools for FASTA/FASTQ file preprocessing")
803 (description
804 "The FASTX-Toolkit is a collection of command line tools for Short-Reads
805FASTA/FASTQ files preprocessing.
806
807Next-Generation sequencing machines usually produce FASTA or FASTQ files,
808containing multiple short-reads sequences. The main processing of such
809FASTA/FASTQ files is mapping the sequences to reference genomes. However, it
810is sometimes more productive to preprocess the files before mapping the
811sequences to the genome---manipulating the sequences to produce better mapping
812results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
813 (license license:agpl3+)))
814
d7678942
RW
815(define-public flexbar
816 (package
817 (name "flexbar")
818 (version "2.5")
819 (source (origin
820 (method url-fetch)
821 (uri
822 (string-append "mirror://sourceforge/flexbar/"
823 version "/flexbar_v" version "_src.tgz"))
824 (sha256
825 (base32
826 "13jaykc3y1x8y5nn9j8ljnb79s5y51kyxz46hdmvvjj6qhyympmf"))))
827 (build-system cmake-build-system)
828 (arguments
4ca009c0 829 `(#:configure-flags (list
d7678942
RW
830 (string-append "-DFLEXBAR_BINARY_DIR="
831 (assoc-ref %outputs "out")
832 "/bin/"))
833 #:phases
4ca009c0
RW
834 (alist-replace
835 'check
836 (lambda* (#:key outputs #:allow-other-keys)
837 (setenv "PATH" (string-append
838 (assoc-ref outputs "out") "/bin:"
839 (getenv "PATH")))
840 (chdir "../flexbar_v2.5_src/test")
841 (zero? (system* "bash" "flexbar_validate.sh")))
842 (alist-delete 'install %standard-phases))))
d7678942
RW
843 (inputs
844 `(("tbb" ,tbb)
845 ("zlib" ,zlib)))
846 (native-inputs
847 `(("pkg-config" ,pkg-config)
848 ("seqan" ,seqan)))
849 (home-page "http://flexbar.sourceforge.net")
850 (synopsis "Barcode and adapter removal tool for sequencing platforms")
851 (description
852 "Flexbar preprocesses high-throughput nucleotide sequencing data
853efficiently. It demultiplexes barcoded runs and removes adapter sequences.
854Moreover, trimming and filtering features are provided. Flexbar increases
855read mapping rates and improves genome and transcriptome assemblies. It
856supports next-generation sequencing data in fasta/q and csfasta/q format from
857Illumina, Roche 454, and the SOLiD platform.")
858 (license license:gpl3)))
859
5854f685
RW
860(define-public grit
861 (package
862 (name "grit")
863 (version "2.0.2")
864 (source (origin
865 (method url-fetch)
866 (uri (string-append
867 "https://github.com/nboley/grit/archive/"
868 version ".tar.gz"))
869 (file-name (string-append name "-" version ".tar.gz"))
870 (sha256
871 (base32
872 "157in84dj70wimbind3x7sy1whs3h57qfgcnj2s6lrd38fbrb7mj"))))
873 (build-system python-build-system)
874 (arguments
875 `(#:python ,python-2
876 #:phases
877 (alist-cons-after
878 'unpack 'generate-from-cython-sources
879 (lambda* (#:key inputs outputs #:allow-other-keys)
880 ;; Delete these C files to force fresh generation from pyx sources.
881 (delete-file "grit/sparsify_support_fns.c")
882 (delete-file "grit/call_peaks_support_fns.c")
883 (substitute* "setup.py"
884 (("Cython.Setup") "Cython.Build")
885 ;; Add numpy include path to fix compilation
886 (("pyx\", \\]")
887 (string-append "pyx\", ], include_dirs = ['"
888 (assoc-ref inputs "python-numpy")
889 "/lib/python2.7/site-packages/numpy/core/include/"
890 "']"))) #t)
891 %standard-phases)))
892 (inputs
893 `(("python-scipy" ,python2-scipy)
894 ("python-numpy" ,python2-numpy)
895 ("python-pysam" ,python2-pysam)
896 ("python-networkx" ,python2-networkx)))
897 (native-inputs
898 `(("python-cython" ,python2-cython)
899 ("python-setuptools" ,python2-setuptools)))
900 (home-page "http://grit-bio.org")
901 (synopsis "Tool for integrative analysis of RNA-seq type assays")
902 (description
903 "GRIT is designed to use RNA-seq, TES, and TSS data to build and quantify
904full length transcript models. When none of these data sources are available,
905GRIT can be run by providing a candidate set of TES or TSS sites. In
906addition, GRIT can merge in reference junctions and gene boundaries. GRIT can
907also be run in quantification mode, where it uses a provided GTF file and just
908estimates transcript expression.")
909 (license license:gpl3+)))
910
346a829a
RW
911(define-public hisat
912 (package
913 (name "hisat")
914 (version "0.1.4")
915 (source (origin
916 (method url-fetch)
917 (uri (string-append
918 "http://ccb.jhu.edu/software/hisat/downloads/hisat-"
919 version "-beta-source.zip"))
920 (sha256
921 (base32
922 "1k381ydranqxp09yf2y7w1d0chz5d59vb6jchi89hbb0prq19lk5"))))
923 (build-system gnu-build-system)
924 (arguments
e58d01fa
RW
925 `(#:tests? #f ;no check target
926 #:make-flags '("allall"
927 ;; Disable unsupported `popcnt' instructions on
928 ;; architectures other than x86_64
929 ,@(if (string-prefix? "x86_64"
930 (or (%current-target-system)
931 (%current-system)))
932 '()
933 '("POPCNT_CAPABILITY=0")))
346a829a 934 #:phases
da6dd842
LC
935 (alist-cons-after
936 'unpack 'patch-sources
937 (lambda _
938 ;; XXX Cannot use snippet because zip files are not supported
939 (substitute* "Makefile"
940 (("^CC = .*$") "CC = gcc")
941 (("^CPP = .*$") "CPP = g++")
942 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
943 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
944 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
945 (substitute* '("hisat-build" "hisat-inspect")
946 (("/usr/bin/env") (which "env"))))
947 (alist-replace
948 'install
949 (lambda* (#:key outputs #:allow-other-keys)
950 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
951 (mkdir-p bin)
952 (for-each
953 (lambda (file)
954 (copy-file file (string-append bin file)))
955 (find-files
956 "."
957 "hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))))
958 (alist-delete 'configure %standard-phases)))))
346a829a
RW
959 (native-inputs
960 `(("unzip" ,unzip)))
961 (inputs
962 `(("perl" ,perl)
963 ("python" ,python)
964 ("zlib" ,zlib)))
965 (home-page "http://ccb.jhu.edu/software/hisat/index.shtml")
966 (synopsis "Hierarchical indexing for spliced alignment of transcripts")
967 (description
968 "HISAT is a fast and sensitive spliced alignment program for mapping
969RNA-seq reads. In addition to one global FM index that represents a whole
970genome, HISAT uses a large set of small FM indexes that collectively cover the
971whole genome. These small indexes (called local indexes) combined with
972several alignment strategies enable effective alignment of RNA-seq reads, in
973particular, reads spanning multiple exons.")
974 (license license:gpl3+)))
975
85652f59
RW
976(define-public htseq
977 (package
978 (name "htseq")
979 (version "0.6.1")
980 (source (origin
981 (method url-fetch)
982 (uri (string-append
983 "https://pypi.python.org/packages/source/H/HTSeq/HTSeq-"
984 version ".tar.gz"))
985 (sha256
986 (base32
987 "1i85ppf2j2lj12m0x690qq5nn17xxk23pbbx2c83r8ayb5wngzwv"))))
988 (build-system python-build-system)
989 (arguments `(#:python ,python-2)) ; only Python 2 is supported
990 (inputs
991 `(("python-numpy" ,python2-numpy)
992 ("python-setuptools" ,python2-setuptools)))
993 (home-page "http://www-huber.embl.de/users/anders/HTSeq/")
994 (synopsis "Analysing high-throughput sequencing data with Python")
995 (description
996 "HTSeq is a Python package that provides infrastructure to process data
997from high-throughput sequencing assays.")
998 (license license:gpl3+)))
999
15a3c3d4
RW
1000(define-public htsjdk
1001 (package
1002 (name "htsjdk")
1003 (version "1.129")
1004 (source (origin
1005 (method url-fetch)
1006 (uri (string-append
1007 "https://github.com/samtools/htsjdk/archive/"
1008 version ".tar.gz"))
1009 (file-name (string-append name "-" version ".tar.gz"))
1010 (sha256
1011 (base32
1012 "0asdk9b8jx2ij7yd6apg9qx03li8q7z3ml0qy2r2qczkra79y6fw"))
1013 (modules '((guix build utils)))
1014 ;; remove build dependency on git
1015 (snippet '(substitute* "build.xml"
1016 (("failifexecutionfails=\"true\"")
1017 "failifexecutionfails=\"false\"")))))
1018 (build-system gnu-build-system)
1019 (arguments
1020 `(#:modules ((srfi srfi-1)
1021 (guix build gnu-build-system)
1022 (guix build utils))
1023 #:phases (alist-replace
1024 'build
1025 (lambda _
1026 (setenv "JAVA_HOME" (assoc-ref %build-inputs "jdk"))
1027 (zero? (system* "ant" "all"
1028 (string-append "-Ddist="
1029 (assoc-ref %outputs "out")
1030 "/share/java/htsjdk/"))))
1031 (fold alist-delete %standard-phases
1032 '(configure install check)))))
1033 (native-inputs
1034 `(("ant" ,ant)
1035 ("jdk" ,icedtea6 "jdk")))
1036 (home-page "http://samtools.github.io/htsjdk/")
1037 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
1038 (description
1039 "HTSJDK is an implementation of a unified Java library for accessing
1040common file formats, such as SAM and VCF, used for high-throughput
1041sequencing (HTS) data. There are also an number of useful utilities for
1042manipulating HTS data.")
1043 (license license:expat)))
1044
e7c09730
RW
1045(define-public htslib
1046 (package
1047 (name "htslib")
1048 (version "1.2.1")
1049 (source (origin
1050 (method url-fetch)
1051 (uri (string-append
1052 "https://github.com/samtools/htslib/releases/download/"
1053 version "/htslib-" version ".tar.bz2"))
1054 (sha256
1055 (base32
1056 "1c32ssscbnjwfw3dra140fq7riarp2x990qxybh34nr1p5r17nxx"))))
1057 (build-system gnu-build-system)
1058 (arguments
1059 `(#:phases
1060 (modify-phases %standard-phases
1061 (add-after
1062 'unpack 'patch-tests
1063 (lambda _
1064 (substitute* "test/test.pl"
1065 (("/bin/bash") (which "bash")))
1066 #t)))))
1067 (inputs
1068 `(("zlib" ,zlib)))
1069 (native-inputs
1070 `(("perl" ,perl)))
1071 (home-page "http://www.htslib.org")
1072 (synopsis "C library for reading/writing high-throughput sequencing data")
1073 (description
1074 "HTSlib is a C library for reading/writing high-throughput sequencing
1075data. It also provides the bgzip, htsfile, and tabix utilities.")
1076 ;; Files under cram/ are released under the modified BSD license;
1077 ;; the rest is released under the Expat license
1078 (license (list license:expat license:bsd-3))))
1079
d57e6d0f
RW
1080(define-public macs
1081 (package
1082 (name "macs")
1083 (version "2.1.0.20140616")
1084 (source (origin
1085 (method url-fetch)
1086 (uri (string-append
1087 "https://pypi.python.org/packages/source/M/MACS2/MACS2-"
1088 version ".tar.gz"))
1089 (sha256
1090 (base32
1091 "11lmiw6avqhwn75sn59g4lfkrr2kk20r3rgfbx9xfqb8rg9mi2n6"))))
1092 (build-system python-build-system)
1093 (arguments
1094 `(#:python ,python-2 ; only compatible with Python 2.7
1095 #:tests? #f)) ; no test target
1096 (inputs
1097 `(("python-numpy" ,python2-numpy)))
1098 (native-inputs
1099 `(("python-setuptools" ,python2-setuptools)))
1100 (home-page "http://github.com/taoliu/MACS/")
1101 (synopsis "Model based analysis for ChIP-Seq data")
1102 (description
1103 "MACS is an implementation of a ChIP-Seq analysis algorithm for
1104identifying transcript factor binding sites named Model-based Analysis of
1105ChIP-Seq (MACS). MACS captures the influence of genome complexity to evaluate
1106the significance of enriched ChIP regions and it improves the spatial
1107resolution of binding sites through combining the information of both
1108sequencing tag position and orientation.")
1109 (license license:bsd-3)))
1110
ddd82e0e
RW
1111(define-public miso
1112 (package
1113 (name "miso")
1114 (version "0.5.3")
1115 (source (origin
1116 (method url-fetch)
1117 (uri (string-append
86517de6 1118 "https://pypi.python.org/packages/source/m/misopy/misopy-"
ddd82e0e
RW
1119 version ".tar.gz"))
1120 (sha256
1121 (base32
1122 "0x446867az8ir0z8c1vjqffkp0ma37wm4sylixnkhgawllzx8v5w"))
1123 (modules '((guix build utils)))
1124 ;; use "gcc" instead of "cc" for compilation
1125 (snippet
1126 '(substitute* "setup.py"
1127 (("^defines")
1128 "cc.set_executables(
1129compiler='gcc',
1130compiler_so='gcc',
1131linker_exe='gcc',
1132linker_so='gcc -shared'); defines")))))
1133 (build-system python-build-system)
1134 (arguments
1135 `(#:python ,python-2 ; only Python 2 is supported
1136 #:tests? #f)) ; no "test" target
1137 (inputs
1138 `(("samtools" ,samtools)
1139 ("python-numpy" ,python2-numpy)
1140 ("python-pysam" ,python2-pysam)
1141 ("python-scipy" ,python2-scipy)
1142 ("python-matplotlib" ,python2-matplotlib)))
1143 (native-inputs
1144 `(("python-setuptools" ,python2-setuptools)))
1145 (home-page "http://genes.mit.edu/burgelab/miso/index.html")
1146 (synopsis "Mixture of Isoforms model for RNA-Seq isoform quantitation")
1147 (description
1148 "MISO (Mixture-of-Isoforms) is a probabilistic framework that quantitates
1149the expression level of alternatively spliced genes from RNA-Seq data, and
1150identifies differentially regulated isoforms or exons across samples. By
1151modeling the generative process by which reads are produced from isoforms in
1152RNA-Seq, the MISO model uses Bayesian inference to compute the probability
1153that a read originated from a particular isoform.")
1154 (license license:gpl2)))
1155
1e44cf8b
BW
1156(define-public orfm
1157 (package
1158 (name "orfm")
a98c6ecc 1159 (version "0.4.1")
1e44cf8b
BW
1160 (source (origin
1161 (method url-fetch)
1162 (uri (string-append
1163 "https://github.com/wwood/OrfM/releases/download/v"
1164 version "/orfm-" version ".tar.gz"))
1165 (sha256
1166 (base32
a98c6ecc 1167 "05fmw145snk646ly076zby0fjav0k7ysbclck5d4s9pmgcfpijc2"))))
1e44cf8b
BW
1168 (build-system gnu-build-system)
1169 (inputs `(("zlib" ,zlib)))
1170 (synopsis "Simple and not slow open reading frame (ORF) caller")
1171 (description
1172 "An ORF caller finds stretches of DNA that when translated are not
1173interrupted by stop codons. OrfM finds and prints these ORFs.")
1174 (home-page "https://github.com/wwood/OrfM")
1175 (license license:lgpl3+)))
1176
19ee9201
RW
1177(define-public python2-pbcore
1178 (package
1179 (name "python2-pbcore")
1180 (version "0.9.3")
1181 (source (origin
1182 (method url-fetch)
1183 (uri (string-append
1184 "https://github.com/PacificBiosciences/pbcore/archive/"
1185 version ".tar.gz"))
1186 (file-name (string-append name "-" version ".tar.gz"))
1187 (sha256
1188 (base32
1189 "1z46rwjac93jm87cbj2zgjg6qvsgs65140wkbbxsvxps7ai4pm09"))))
1190 (build-system python-build-system)
1191 (arguments `(#:python ,python-2)) ; pbcore requires Python 2.7
1192 (inputs
1193 `(("python-cython" ,python2-cython)
1194 ("python-numpy" ,python2-numpy)
1195 ("python-pysam" ,python2-pysam)
1196 ("python-h5py" ,python2-h5py)))
1197 (native-inputs
1198 `(("python-setuptools" ,python2-setuptools)))
1199 (home-page "http://pacificbiosciences.github.io/pbcore/")
1200 (synopsis "Library for reading and writing PacBio data files")
1201 (description
1202 "The pbcore package provides Python APIs for interacting with PacBio data
1203files and writing bioinformatics applications.")
1204 (license license:bsd-3)))
1205
2c16316e
RW
1206(define-public pbtranscript-tofu
1207 (let ((commit "c7bbd5472"))
1208 (package
1209 (name "pbtranscript-tofu")
1210 (version (string-append "0.4.1." commit))
1211 (source (origin
1212 (method git-fetch)
1213 (uri (git-reference
1214 (url "https://github.com/PacificBiosciences/cDNA_primer.git")
1215 (commit commit)))
1216 (file-name (string-append name "-" version ".tar.gz"))
1217 (sha256
1218 (base32
1219 "148xkzi689c49g6fdhckp6mnmj2qhjdf1j4wifm6ja7ij95d7fxx"))))
1220 (build-system python-build-system)
1221 (arguments
1222 `(#:python ,python-2
1223 ;; With standard flags, the install phase attempts to create a zip'd
1224 ;; egg file, and fails with an error: 'ZIP does not support timestamps
1225 ;; before 1980'
1226 #:configure-flags '("--single-version-externally-managed"
1227 "--record=pbtranscript-tofu.txt")
1228 #:phases
1229 (alist-cons-after
1230 'unpack 'enter-directory-and-clean-up
1231 (lambda _
1232 (chdir "pbtranscript-tofu/pbtranscript/")
1233 ;; Delete clutter
1234 (delete-file-recursively "dist/")
49261e45 1235 (delete-file-recursively "build/")
2c16316e
RW
1236 (delete-file-recursively "setuptools_cython-0.2.1-py2.6.egg/")
1237 (delete-file-recursively "pbtools.pbtranscript.egg-info")
1238 (delete-file "Cython-0.20.1.tar.gz")
1239 (delete-file "setuptools_cython-0.2.1-py2.7.egg")
1240 (delete-file "setuptools_cython-0.2.1.tar.gz")
1241 (delete-file "setup.cfg")
49261e45
RW
1242 (for-each delete-file
1243 (find-files "." "\\.so$"))
2c16316e
RW
1244 ;; files should be writable for install phase
1245 (for-each (lambda (f) (chmod f #o755))
49261e45 1246 (find-files "." "\\.py$")))
2c16316e
RW
1247 %standard-phases)))
1248 (inputs
1249 `(("python-cython" ,python2-cython)
1250 ("python-numpy" ,python2-numpy)
1251 ("python-bx-python" ,python2-bx-python)
c5372108
RW
1252 ("python-networkx" ,python2-networkx)
1253 ("python-scipy" ,python2-scipy)
2c16316e
RW
1254 ("python-pbcore" ,python2-pbcore)))
1255 (native-inputs
1256 `(("python-nose" ,python2-nose)
1257 ("python-setuptools" ,python2-setuptools)))
1258 (home-page "https://github.com/PacificBiosciences/cDNA_primer")
1259 (synopsis "Analyze transcriptome data generated with the Iso-Seq protocol")
1260 (description
1261 "pbtranscript-tofu contains scripts to analyze transcriptome data
1262generated using the PacBio Iso-Seq protocol.")
1263 (license license:bsd-3))))
1264
66e3eff1
RW
1265(define-public rsem
1266 (package
1267 (name "rsem")
1268 (version "1.2.20")
1269 (source
1270 (origin
1271 (method url-fetch)
1272 (uri
1273 (string-append "http://deweylab.biostat.wisc.edu/rsem/src/rsem-"
1274 version ".tar.gz"))
1275 (sha256
1276 (base32 "0nzdc0j0hjllhsd5f2xli95dafm3nawskigs140xzvjk67xh0r9q"))
1277 (patches (list (search-patch "rsem-makefile.patch")))
1278 (modules '((guix build utils)))
1279 (snippet
1280 '(begin
1281 ;; remove bundled copy of boost
1282 (delete-file-recursively "boost")
1283 #t))))
1284 (build-system gnu-build-system)
1285 (arguments
1286 `(#:tests? #f ;no "check" target
1287 #:phases
1288 (modify-phases %standard-phases
1289 ;; No "configure" script.
1290 ;; Do not build bundled samtools library.
1291 (replace 'configure
1292 (lambda _
1293 (substitute* "Makefile"
1294 (("^all : sam/libbam.a") "all : "))
1295 #t))
1296 (replace 'install
1297 (lambda* (#:key outputs #:allow-other-keys)
1298 (let* ((out (string-append (assoc-ref outputs "out")))
1299 (bin (string-append out "/bin/"))
1300 (perl (string-append out "/lib/perl5/site_perl")))
1301 (mkdir-p bin)
1302 (mkdir-p perl)
1303 (for-each (lambda (file)
1304 (copy-file file
1305 (string-append bin (basename file))))
1306 (find-files "." "rsem-.*"))
1307 (copy-file "rsem_perl_utils.pm"
1308 (string-append perl "/rsem_perl_utils.pm")))
1309 #t))
1310 (add-after
1311 'install 'wrap-program
1312 (lambda* (#:key outputs #:allow-other-keys)
1313 (let ((out (assoc-ref outputs "out")))
1314 (for-each (lambda (prog)
1315 (wrap-program (string-append out "/bin/" prog)
1316 `("PERL5LIB" ":" prefix
1317 (,(string-append out "/lib/perl5/site_perl")))))
1318 '("rsem-plot-transcript-wiggles"
1319 "rsem-calculate-expression"
1320 "rsem-generate-ngvector"
1321 "rsem-run-ebseq"
1322 "rsem-prepare-reference")))
1323 #t)))))
1324 (inputs
1325 `(("boost" ,boost)
1326 ("ncurses" ,ncurses)
1327 ("r" ,r)
1328 ("perl" ,perl)
1329 ("samtools" ,samtools-0.1)
1330 ("zlib" ,zlib)))
1331 (home-page "http://deweylab.biostat.wisc.edu/rsem/")
1332 (synopsis "Estimate gene expression levels from RNA-Seq data")
1333 (description
1334 "RSEM is a software package for estimating gene and isoform expression
1335levels from RNA-Seq data. The RSEM package provides a user-friendly
1336interface, supports threads for parallel computation of the EM algorithm,
1337single-end and paired-end read data, quality scores, variable-length reads and
1338RSPD estimation. In addition, it provides posterior mean and 95% credibility
1339interval estimates for expression levels. For visualization, it can generate
1340BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.")
1341 (license license:gpl3+)))
1342
8622a072
RW
1343(define-public rseqc
1344 (package
1345 (name "rseqc")
1346 (version "2.6.1")
1347 (source
1348 (origin
1349 (method url-fetch)
1350 (uri
1351 (string-append "mirror://sourceforge/rseqc/"
1352 version "/RSeQC-" version ".tar.gz"))
1353 (sha256
8214b7fb 1354 (base32 "15ly0254yi032qzkdplg00q144qfdsd986gh62829rl5bkxhj330"))
8622a072
RW
1355 (modules '((guix build utils)))
1356 (snippet
1357 '(begin
1358 ;; remove bundled copy of pysam
1359 (delete-file-recursively "lib/pysam")
1360 (substitute* "setup.py"
1361 ;; remove dependency on outdated "distribute" module
1362 (("^from distribute_setup import use_setuptools") "")
1363 (("^use_setuptools\\(\\)") "")
1364 ;; do not use bundled copy of pysam
1365 (("^have_pysam = False") "have_pysam = True"))))))
1366 (build-system python-build-system)
1367 (arguments `(#:python ,python-2))
1368 (inputs
1369 `(("python-cython" ,python2-cython)
1370 ("python-pysam" ,python2-pysam)
1371 ("python-numpy" ,python2-numpy)
1372 ("python-setuptools" ,python2-setuptools)
1373 ("zlib" ,zlib)))
1374 (native-inputs
1375 `(("python-nose" ,python2-nose)))
1376 (home-page "http://rseqc.sourceforge.net/")
1377 (synopsis "RNA-seq quality control package")
1378 (description
1379 "RSeQC provides a number of modules that can comprehensively evaluate
1380high throughput sequence data, especially RNA-seq data. Some basic modules
1381inspect sequence quality, nucleotide composition bias, PCR bias and GC bias,
1382while RNA-seq specific modules evaluate sequencing saturation, mapped reads
1383distribution, coverage uniformity, strand specificity, etc.")
1384 (license license:gpl3+)))
1385
4e10a221
RW
1386(define-public samtools
1387 (package
1388 (name "samtools")
1389 (version "1.1")
1390 (source
1391 (origin
1392 (method url-fetch)
1393 (uri
1394 (string-append "mirror://sourceforge/samtools/"
1395 version "/samtools-" version ".tar.bz2"))
1396 (sha256
1397 (base32
1398 "1y5p2hs4gif891b4ik20275a8xf3qrr1zh9wpysp4g8m0g1jckf2"))))
1399 (build-system gnu-build-system)
1400 (arguments
f3cd952b
RW
1401 `(;; There are 87 test failures when building on non-64-bit architectures
1402 ;; due to invalid test data. This has since been fixed upstream (see
1403 ;; <https://github.com/samtools/samtools/pull/307>), but as there has
1404 ;; not been a new release we disable the tests for all non-64-bit
1405 ;; systems.
1406 #:tests? ,(string=? (or (%current-system) (%current-target-system))
1407 "x86_64-linux")
0a75450c
RW
1408 #:make-flags (list "LIBCURSES=-lncurses"
1409 (string-append "prefix=" (assoc-ref %outputs "out")))
4e10a221
RW
1410 #:phases
1411 (alist-cons-after
1412 'unpack
0a75450c
RW
1413 'patch-tests
1414 (lambda* (#:key inputs #:allow-other-keys)
1415 (let ((bash (assoc-ref inputs "bash")))
1416 (substitute* "test/test.pl"
1417 ;; The test script calls out to /bin/bash
1418 (("/bin/bash")
1419 (string-append bash "/bin/bash"))
1420 ;; There are two failing tests upstream relating to the "stats"
1421 ;; subcommand in test_usage_subcommand ("did not have Usage"
1422 ;; and "usage did not mention samtools stats"), so we disable
1423 ;; them.
1424 (("(test_usage_subcommand\\(.*\\);)" cmd)
1425 (string-append "unless ($subcommand eq 'stats') {" cmd "};")))))
41dd7126
RW
1426 (alist-cons-after
1427 'install 'install-library
1428 (lambda* (#:key outputs #:allow-other-keys)
1429 (let ((lib (string-append (assoc-ref outputs "out") "/lib")))
1430 (mkdir-p lib)
1431 (copy-file "libbam.a" (string-append lib "/libbam.a"))))
1432 (alist-delete 'configure %standard-phases)))))
4e10a221
RW
1433 (native-inputs `(("pkg-config" ,pkg-config)))
1434 (inputs `(("ncurses" ,ncurses)
1435 ("perl" ,perl)
1436 ("python" ,python)
1437 ("zlib" ,zlib)))
1438 (home-page "http://samtools.sourceforge.net")
1439 (synopsis "Utilities to efficiently manipulate nucleotide sequence alignments")
1440 (description
1441 "Samtools implements various utilities for post-processing nucleotide
1442sequence alignments in the SAM, BAM, and CRAM formats, including indexing,
1443variant calling (in conjunction with bcftools), and a simple alignment
1444viewer.")
1445 (license license:expat)))
d3517eda 1446
0b84a0aa
RW
1447(define-public samtools-0.1
1448 ;; This is the most recent version of the 0.1 line of samtools. The input
1449 ;; and output formats differ greatly from that used and produced by samtools
1450 ;; 1.x and is still used in many bioinformatics pipelines.
1451 (package (inherit samtools)
1452 (version "0.1.19")
1453 (source
1454 (origin
1455 (method url-fetch)
1456 (uri
1457 (string-append "mirror://sourceforge/samtools/"
1458 version "/samtools-" version ".tar.bz2"))
1459 (sha256
1460 (base32 "1m33xsfwz0s8qi45lylagfllqg7fphf4dr0780rsvw75av9wk06h"))))
1461 (arguments
1462 (substitute-keyword-arguments (package-arguments samtools)
1463 ((#:tests? tests) #f) ;no "check" target
1464 ((#:phases phases)
1465 `(modify-phases ,phases
1466 (replace 'install
1467 (lambda* (#:key outputs #:allow-other-keys)
1468 (let ((bin (string-append
1469 (assoc-ref outputs "out") "/bin")))
1470 (mkdir-p bin)
1471 (copy-file "samtools"
1472 (string-append bin "/samtools")))))
1473 (delete 'patch-tests)))))))
1474
282c5087
RW
1475(define-public ngs-sdk
1476 (package
1477 (name "ngs-sdk")
e0a02cb2 1478 (version "1.1.1")
282c5087
RW
1479 (source
1480 (origin
1481 (method url-fetch)
1482 (uri
1483 (string-append "https://github.com/ncbi/ngs/archive/"
1484 version ".tar.gz"))
1485 (file-name (string-append name "-" version ".tar.gz"))
1486 (sha256
1487 (base32
e0a02cb2 1488 "1x58gpm574n0xmk2a98gmikbgycq78ia0bvnb42k5ck34fmd5v8y"))))
282c5087
RW
1489 (build-system gnu-build-system)
1490 (arguments
1491 `(#:parallel-build? #f ; not supported
1492 #:tests? #f ; no "check" target
1493 #:phases
1494 (alist-replace
1495 'configure
1496 (lambda* (#:key outputs #:allow-other-keys)
1497 (let ((out (assoc-ref outputs "out")))
282c5087
RW
1498 ;; The 'configure' script doesn't recognize things like
1499 ;; '--enable-fast-install'.
1500 (zero? (system* "./configure"
1501 (string-append "--build-prefix=" (getcwd) "/build")
1502 (string-append "--prefix=" out)))))
1503 (alist-cons-after
1504 'unpack 'enter-dir
1505 (lambda _ (chdir "ngs-sdk") #t)
1506 %standard-phases))))
1507 (native-inputs `(("perl" ,perl)))
1508 (home-page "https://github.com/ncbi/ngs")
1509 (synopsis "API for accessing Next Generation Sequencing data")
1510 (description
1511 "NGS is a domain-specific API for accessing reads, alignments and pileups
1512produced from Next Generation Sequencing. The API itself is independent from
1513any particular back-end implementation, and supports use of multiple back-ends
1514simultaneously.")
1515 (license license:public-domain)))
1516
2651a5e6
RW
1517(define-public ngs-java
1518 (package (inherit ngs-sdk)
1519 (name "ngs-java")
1520 (arguments
1521 `(,@(substitute-keyword-arguments
1522 `(#:modules ((guix build gnu-build-system)
1523 (guix build utils)
1524 (srfi srfi-1)
1525 (srfi srfi-26))
1526 ,@(package-arguments ngs-sdk))
1527 ((#:phases phases)
1528 `(alist-cons-after
1529 'enter-dir 'fix-java-symlink-installation
1530 (lambda _
1531 ;; Only replace the version suffix, not the version number in
1532 ;; the directory name. Reported here:
1533 ;; https://github.com/ncbi/ngs/pull/4
1534 (substitute* "Makefile.java"
1535 (((string-append "\\$\\(subst "
1536 "(\\$\\(VERSION[^\\)]*\\)),"
1537 "(\\$\\([^\\)]+\\)),"
1538 "(\\$\\([^\\)]+\\)|\\$\\@)"
1539 "\\)")
1540 _ pattern replacement target)
1541 (string-append "$(patsubst "
1542 "%" pattern ","
1543 "%" replacement ","
1544 target ")"))))
1545 (alist-replace
1546 'enter-dir (lambda _ (chdir "ngs-java") #t)
1547 ,phases))))))
1548 (inputs
1549 `(("jdk" ,icedtea6 "jdk")
1550 ("ngs-sdk" ,ngs-sdk)))
1551 (synopsis "Java bindings for NGS SDK")))
1552
75dd2424
RW
1553(define-public ncbi-vdb
1554 (package
1555 (name "ncbi-vdb")
1556 (version "2.4.5-5")
1557 (source
1558 (origin
1559 (method url-fetch)
1560 (uri
1561 (string-append "https://github.com/ncbi/ncbi-vdb/archive/"
1562 version ".tar.gz"))
1563 (file-name (string-append name "-" version ".tar.gz"))
1564 (sha256
1565 (base32
1566 "1cj8nk6if8sqagv20vx36v566fdvhcaadf0x1ycnbgql6chbs6vy"))))
1567 (build-system gnu-build-system)
1568 (arguments
1569 `(#:parallel-build? #f ; not supported
1570 #:tests? #f ; no "check" target
1571 #:phases
1572 (alist-replace
1573 'configure
1574 (lambda* (#:key inputs outputs #:allow-other-keys)
1575 (let ((out (assoc-ref outputs "out")))
1576 ;; Only replace the version suffix, not the version number in the
1577 ;; directory name; fixed in commit 4dbba5c6a809 (no release yet).
1578 (substitute* "setup/konfigure.perl"
1579 (((string-append "\\$\\(subst "
1580 "(\\$\\(VERSION[^\\)]*\\)),"
1581 "(\\$\\([^\\)]+\\)),"
1582 "(\\$\\([^\\)]+\\)|\\$\\@)"
1583 "\\)")
1584 _ pattern replacement target)
1585 (string-append "$(patsubst "
1586 "%" pattern ","
1587 "%" replacement ","
1588 target ")")))
1589
1590 ;; Override include path for libmagic
1591 (substitute* "setup/package.prl"
1592 (("name => 'magic', Include => '/usr/include'")
1593 (string-append "name=> 'magic', Include => '"
1594 (assoc-ref inputs "libmagic")
1595 "/include" "'")))
1596
1597 ;; Install kdf5 library (needed by sra-tools)
1598 (substitute* "build/Makefile.install"
1599 (("LIBRARIES_TO_INSTALL =")
1600 "LIBRARIES_TO_INSTALL = kdf5.$(VERSION_LIBX) kdf5.$(VERSION_SHLX)"))
1601
1602 ;; The 'configure' script doesn't recognize things like
1603 ;; '--enable-fast-install'.
1604 (zero? (system*
1605 "./configure"
1606 (string-append "--build-prefix=" (getcwd) "/build")
1607 (string-append "--prefix=" (assoc-ref outputs "out"))
1608 (string-append "--debug")
1609 (string-append "--with-xml2-prefix="
1610 (assoc-ref inputs "libxml2"))
1611 (string-append "--with-ngs-sdk-prefix="
1612 (assoc-ref inputs "ngs-sdk"))
1613 (string-append "--with-ngs-java-prefix="
1614 (assoc-ref inputs "ngs-java"))
1615 (string-append "--with-hdf5-prefix="
1616 (assoc-ref inputs "hdf5"))))))
1617 (alist-cons-after
1618 'install 'install-interfaces
132b4c8c
RW
1619 (lambda* (#:key outputs #:allow-other-keys)
1620 ;; Install interface libraries. On i686 the interface libraries
1621 ;; are installed to "linux/gcc/i386", so we need to use the Linux
1622 ;; architecture name ("i386") instead of the target system prefix
1623 ;; ("i686").
75dd2424
RW
1624 (mkdir (string-append (assoc-ref outputs "out") "/ilib"))
1625 (copy-recursively (string-append "build/ncbi-vdb/linux/gcc/"
132b4c8c
RW
1626 ,(system->linux-architecture
1627 (or (%current-target-system)
1628 (%current-system)))
75dd2424
RW
1629 "/rel/ilib")
1630 (string-append (assoc-ref outputs "out")
1631 "/ilib"))
1632 ;; Install interface headers
1633 (copy-recursively "interfaces"
1634 (string-append (assoc-ref outputs "out")
1635 "/include")))
1636 %standard-phases))))
1637 (inputs
1638 `(("libxml2" ,libxml2)
1639 ("ngs-sdk" ,ngs-sdk)
1640 ("ngs-java" ,ngs-java)
1641 ("libmagic" ,file)
1642 ("hdf5" ,hdf5)))
1643 (native-inputs `(("perl" ,perl)))
1644 (home-page "https://github.com/ncbi/ncbi-vdb")
1645 (synopsis "Database engine for genetic information")
1646 (description
1647 "The NCBI-VDB library implements a highly compressed columnar data
1648warehousing engine that is most often used to store genetic information.
1649Databases are stored in a portable image within the file system, and can be
1650accessed/downloaded on demand across HTTP.")
1651 (license license:public-domain)))
1652
51c64999
RW
1653(define-public sra-tools
1654 (package
1655 (name "sra-tools")
1656 (version "2.4.5-5")
1657 (source
1658 (origin
1659 (method url-fetch)
1660 (uri
1661 (string-append "https://github.com/ncbi/sra-tools/archive/"
1662 version ".tar.gz"))
1663 (file-name (string-append name "-" version ".tar.gz"))
1664 (sha256
1665 (base32
1666 "11nrnvz7a012f4iryf0wiwrid0h111grsfxbxa9j51h3f2xbvgns"))))
1667 (build-system gnu-build-system)
1668 (arguments
1669 `(#:parallel-build? #f ; not supported
1670 #:tests? #f ; no "check" target
1671 #:phases
1672 (alist-replace
1673 'configure
1674 (lambda* (#:key inputs outputs #:allow-other-keys)
1675 ;; The build system expects a directory containing the sources and
1676 ;; raw build output of ncbi-vdb, including files that are not
1677 ;; installed. Since we are building against an installed version of
1678 ;; ncbi-vdb, the following modifications are needed.
1679 (substitute* "setup/konfigure.perl"
1680 ;; Make the configure script look for the "ilib" directory of
1681 ;; "ncbi-vdb" without first checking for the existence of a
1682 ;; matching library in its "lib" directory.
1683 (("^ my \\$f = File::Spec->catdir\\(\\$libdir, \\$lib\\);")
1684 "my $f = File::Spec->catdir($ilibdir, $ilib);")
1685 ;; Look for interface libraries in ncbi-vdb's "ilib" directory.
1686 (("my \\$ilibdir = File::Spec->catdir\\(\\$builddir, 'ilib'\\);")
1687 "my $ilibdir = File::Spec->catdir($dir, 'ilib');"))
1688
1689 ;; The 'configure' script doesn't recognize things like
1690 ;; '--enable-fast-install'.
1691 (zero? (system*
1692 "./configure"
1693 (string-append "--build-prefix=" (getcwd) "/build")
1694 (string-append "--prefix=" (assoc-ref outputs "out"))
1695 (string-append "--debug")
1696 (string-append "--with-fuse-prefix="
1697 (assoc-ref inputs "fuse"))
1698 (string-append "--with-magic-prefix="
1699 (assoc-ref inputs "libmagic"))
1700 ;; TODO: building with libxml2 fails with linker errors
1701 ;; (string-append "--with-xml2-prefix="
1702 ;; (assoc-ref inputs "libxml2"))
1703 (string-append "--with-ncbi-vdb-sources="
1704 (assoc-ref inputs "ncbi-vdb"))
1705 (string-append "--with-ncbi-vdb-build="
1706 (assoc-ref inputs "ncbi-vdb"))
1707 (string-append "--with-ngs-sdk-prefix="
1708 (assoc-ref inputs "ngs-sdk"))
1709 (string-append "--with-hdf5-prefix="
1710 (assoc-ref inputs "hdf5")))))
1711 %standard-phases)))
1712 (native-inputs `(("perl" ,perl)))
1713 (inputs
1714 `(("ngs-sdk" ,ngs-sdk)
1715 ("ncbi-vdb" ,ncbi-vdb)
1716 ("libmagic" ,file)
1717 ("fuse" ,fuse)
1718 ("hdf5" ,hdf5)
1719 ("zlib" ,zlib)))
1720 (home-page "http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software")
1721 (synopsis "Tools and libraries for reading and writing sequencing data")
1722 (description
1723 "The SRA Toolkit from NCBI is a collection of tools and libraries for
1724reading of sequencing files from the Sequence Read Archive (SRA) database and
1725writing files into the .sra format.")
1726 (license license:public-domain)))
1727
d3517eda
RW
1728(define-public seqan
1729 (package
1730 (name "seqan")
1731 (version "1.4.2")
1732 (source (origin
1733 (method url-fetch)
1734 (uri (string-append "http://packages.seqan.de/seqan-library/"
1735 "seqan-library-" version ".tar.bz2"))
1736 (sha256
1737 (base32
1738 "05s3wrrwn50f81aklfm65i4a749zag1vr8z03k21xm0pdxy47yvp"))))
1739 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
1740 ;; makes sense to split the outputs.
1741 (outputs '("out" "doc"))
1742 (build-system trivial-build-system)
1743 (arguments
1744 `(#:modules ((guix build utils))
1745 #:builder
1746 (begin
1747 (use-modules (guix build utils))
1748 (let ((tar (assoc-ref %build-inputs "tar"))
1749 (bzip (assoc-ref %build-inputs "bzip2"))
1750 (out (assoc-ref %outputs "out"))
1751 (doc (assoc-ref %outputs "doc")))
1752 (setenv "PATH" (string-append tar "/bin:" bzip "/bin"))
1753 (system* "tar" "xvf" (assoc-ref %build-inputs "source"))
1754 (chdir (string-append "seqan-library-" ,version))
1755 (copy-recursively "include" (string-append out "/include"))
1756 (copy-recursively "share" (string-append doc "/share"))))))
1757 (native-inputs
1758 `(("source" ,source)
1759 ("tar" ,tar)
1760 ("bzip2" ,bzip2)))
1761 (home-page "http://www.seqan.de")
1762 (synopsis "Library for nucleotide sequence analysis")
1763 (description
1764 "SeqAn is a C++ library of efficient algorithms and data structures for
1765the analysis of sequences with the focus on biological data. It contains
1766algorithms and data structures for string representation and their
1767manipulation, online and indexed string search, efficient I/O of
1768bioinformatics file formats, sequence alignment, and more.")
1769 (license license:bsd-3)))
ce7155d5
RW
1770
1771(define-public star
1772 (package
1773 (name "star")
1774 (version "2.4.0j")
1775 (source (origin
1776 (method url-fetch)
1777 (uri (string-append
1778 "https://github.com/alexdobin/STAR/archive/STAR_"
1779 version ".tar.gz"))
1780 (sha256
1781 (base32
1782 "1y3bciych1aw6s7k8sy1saj23dcan9wk4d4f96an499slkxwz712"))
1783 (modules '((guix build utils)))
1784 (snippet
1785 '(substitute* "source/Makefile"
1786 (("/bin/rm") "rm")))))
1787 (build-system gnu-build-system)
1788 (arguments
1789 '(#:tests? #f ;no check target
1790 #:make-flags '("STAR")
1791 #:phases
1792 (alist-cons-after
1793 'unpack 'enter-source-dir (lambda _ (chdir "source"))
1794 (alist-replace
1795 'install
1796 (lambda* (#:key outputs #:allow-other-keys)
1797 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
1798 (mkdir-p bin)
1799 (copy-file "STAR" (string-append bin "STAR"))))
1800 (alist-delete
1801 'configure %standard-phases)))))
1802 (native-inputs
1803 `(("vim" ,vim))) ; for xxd
1804 (inputs
1805 `(("zlib" ,zlib)))
1806 (home-page "https://github.com/alexdobin/STAR")
1807 (synopsis "Universal RNA-seq aligner")
1808 (description
1809 "The Spliced Transcripts Alignment to a Reference (STAR) software is
1810based on a previously undescribed RNA-seq alignment algorithm that uses
1811sequential maximum mappable seed search in uncompressed suffix arrays followed
1812by seed clustering and stitching procedure. In addition to unbiased de novo
1813detection of canonical junctions, STAR can discover non-canonical splices and
1814chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
1815sequences.")
1816 ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
1817 (license license:gpl3+)))
de07c0db 1818
dbf4ed7c
RW
1819(define-public subread
1820 (package
1821 (name "subread")
1822 (version "1.4.6-p2")
1823 (source (origin
1824 (method url-fetch)
1825 (uri (string-append
1826 "mirror://sourceforge/subread/subread-"
1827 version "-source.tar.gz"))
1828 (sha256
1829 (base32
1830 "06sv9mpcsdj6p68y15d6gi70lca3lxmzk0dn61hg0kfsa7rxmsr3"))))
1831 (build-system gnu-build-system)
1832 (arguments
1833 `(#:tests? #f ;no "check" target
1834 #:make-flags '("-f" "Makefile.Linux")
1835 #:phases
1836 (alist-cons-after
1837 'unpack 'enter-dir
1838 (lambda _ (chdir "src") #t)
1839 (alist-replace
1840 'install
1841 (lambda* (#:key outputs #:allow-other-keys)
1842 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
1843 (mkdir-p bin)
1844 (copy-recursively "../bin" bin)))
1845 ;; no "configure" script
1846 (alist-delete 'configure %standard-phases)))))
1847 (inputs `(("zlib" ,zlib)))
1848 (home-page "http://bioinf.wehi.edu.au/subread-package/")
1849 (synopsis "Tool kit for processing next-gen sequencing data")
1850 (description
1851 "The subread package contains the following tools: subread aligner, a
1852general-purpose read aligner; subjunc aligner: detecting exon-exon junctions
1853and mapping RNA-seq reads; featureCounts: counting mapped reads for genomic
1854features; exactSNP: a SNP caller that discovers SNPs by testing signals
1855against local background noises.")
1856 (license license:gpl3+)))
1857
c833ab55
RW
1858(define-public shogun
1859 (package
1860 (name "shogun")
1861 (version "4.0.0")
1862 (source
1863 (origin
1864 (method url-fetch)
1865 (uri (string-append
1866 "ftp://shogun-toolbox.org/shogun/releases/"
1867 (version-major+minor version)
1868 "/sources/shogun-" version ".tar.bz2"))
1869 (sha256
1870 (base32
628bd9b8
RW
1871 "159nlijnb7mnrv9za80wnm1shwvy45hgrqzn51hxy7gw4z6d6fdb"))
1872 (modules '((guix build utils)
1873 (ice-9 rdelim)))
1874 (snippet
1875 '(begin
1876 ;; Remove non-free sources and files referencing them
1877 (for-each delete-file
1878 (find-files "src/shogun/classifier/svm/"
1879 "SVMLight\\.(cpp|h)"))
1880 (for-each delete-file
1881 (find-files "examples/undocumented/libshogun/"
1882 (string-append
1883 "(classifier_.*svmlight.*|"
1884 "evaluation_cross_validation_locked_comparison).cpp")))
1885 ;; Remove non-free functions.
1886 (define (delete-ifdefs file)
1887 (with-atomic-file-replacement file
1888 (lambda (in out)
1889 (let loop ((line (read-line in 'concat))
1890 (skipping? #f))
1891 (if (eof-object? line)
1892 #t
1893 (let ((skip-next?
1894 (or (and skipping?
1895 (not (string-prefix?
1896 "#endif //USE_SVMLIGHT" line)))
1897 (string-prefix?
1898 "#ifdef USE_SVMLIGHT" line))))
1899 (when (or (not skipping?)
1900 (and skipping? (not skip-next?)))
1901 (display line out))
1902 (loop (read-line in 'concat) skip-next?)))))))
1903 (for-each delete-ifdefs (find-files "src/shogun/kernel/"
1904 "^Kernel\\.(cpp|h)"))))))
c833ab55
RW
1905 (build-system cmake-build-system)
1906 (arguments
1907 '(#:tests? #f ;no check target
1908 #:phases
1909 (alist-cons-after
1910 'unpack 'delete-broken-symlinks
1911 (lambda _
1912 (for-each delete-file '("applications/arts/data"
1913 "applications/asp/data"
1914 "applications/easysvm/data"
1915 "applications/msplicer/data"
1916 "applications/ocr/data"
1917 "examples/documented/data"
1918 "examples/documented/matlab_static"
1919 "examples/documented/octave_static"
1920 "examples/undocumented/data"
1921 "examples/undocumented/matlab_static"
1922 "examples/undocumented/octave_static"
1923 "tests/integration/data"
1924 "tests/integration/matlab_static"
1925 "tests/integration/octave_static"
1926 "tests/integration/python_modular/tests"))
1927 #t)
1928 (alist-cons-after
1929 'unpack 'change-R-target-path
1930 (lambda* (#:key outputs #:allow-other-keys)
1931 (substitute* '("src/interfaces/r_modular/CMakeLists.txt"
1932 "src/interfaces/r_static/CMakeLists.txt"
1933 "examples/undocumented/r_modular/CMakeLists.txt")
1934 (("\\$\\{R_COMPONENT_LIB_PATH\\}")
1935 (string-append (assoc-ref outputs "out")
1936 "/lib/R/library/")))
1937 #t)
1938 (alist-cons-after
1939 'unpack 'fix-octave-modules
1940 (lambda* (#:key outputs #:allow-other-keys)
1941 (substitute* '("src/interfaces/octave_modular/CMakeLists.txt"
1942 "src/interfaces/octave_static/CMakeLists.txt")
1943 (("^include_directories\\(\\$\\{OCTAVE_INCLUDE_DIRS\\}")
1944 "include_directories(${OCTAVE_INCLUDE_DIRS} ${OCTAVE_INCLUDE_DIRS}/octave"))
1945
1946 ;; change target directory
1947 (substitute* "src/interfaces/octave_modular/CMakeLists.txt"
1948 (("\\$\\{OCTAVE_OCT_LOCAL_API_FILE_DIR\\}")
1949 (string-append (assoc-ref outputs "out")
1950 "/share/octave/packages")))
1951 #t)
1952 (alist-cons-before
1953 'build 'set-HOME
1954 ;; $HOME needs to be set at some point during the build phase
1955 (lambda _ (setenv "HOME" "/tmp") #t)
1956 %standard-phases))))
1957 #:configure-flags
1958 (list "-DUSE_SVMLIGHT=OFF" ;disable proprietary SVMLIGHT
1959 ;;"-DJavaModular=ON" ;requires unpackaged jblas
1960 ;;"-DRubyModular=ON" ;requires unpackaged ruby-narray
1961 ;;"-DPerlModular=ON" ;"FindPerlLibs" does not exist
1962 ;;"-DLuaModular=ON" ;fails because lua doesn't build pkgconfig file
1963 "-DOctaveModular=ON"
1964 "-DOctaveStatic=ON"
1965 "-DPythonModular=ON"
1966 "-DPythonStatic=ON"
1967 "-DRModular=ON"
1968 "-DRStatic=ON"
1969 "-DCmdLineStatic=ON")))
1970 (inputs
1971 `(("python" ,python)
1972 ("numpy" ,python-numpy)
1973 ("r" ,r)
1974 ("octave" ,octave)
1975 ("swig" ,swig)
1976 ("hdf5" ,hdf5)
1977 ("atlas" ,atlas)
1978 ("arpack" ,arpack-ng)
1979 ("lapack" ,lapack)
1980 ("glpk" ,glpk)
1981 ("libxml2" ,libxml2)
1982 ("lzo" ,lzo)
1983 ("zlib" ,zlib)))
1984 (native-inputs
1985 `(("pkg-config" ,pkg-config)))
1986 (home-page "http://shogun-toolbox.org/")
1987 (synopsis "Machine learning toolbox")
1988 (description
1989 "The Shogun Machine learning toolbox provides a wide range of unified and
1990efficient Machine Learning (ML) methods. The toolbox seamlessly allows to
1991combine multiple data representations, algorithm classes, and general purpose
1992tools. This enables both rapid prototyping of data pipelines and extensibility
1993in terms of new algorithms.")
1994 (license license:gpl3+)))
1995
de07c0db
RW
1996(define-public vcftools
1997 (package
1998 (name "vcftools")
1999 (version "0.1.12b")
2000 (source (origin
2001 (method url-fetch)
2002 (uri (string-append
2003 "mirror://sourceforge/vcftools/vcftools_"
2004 version ".tar.gz"))
2005 (sha256
2006 (base32
2007 "148al9h7f8g8my2qdnpax51kdd2yjrivlx6frvakf4lz5r8j88wx"))))
2008 (build-system gnu-build-system)
2009 (arguments
2010 `(#:tests? #f ; no "check" target
2011 #:make-flags (list
7c3958e1 2012 "CFLAGS=-O2" ; override "-m64" flag
de07c0db
RW
2013 (string-append "PREFIX=" (assoc-ref %outputs "out"))
2014 (string-append "MANDIR=" (assoc-ref %outputs "out")
2015 "/share/man/man1"))
2016 #:phases
2017 (alist-cons-after
2018 'unpack 'patch-manpage-install
2019 (lambda _
2020 (substitute* "Makefile"
2021 (("cp \\$\\{PREFIX\\}/cpp/vcftools.1") "cp ./cpp/vcftools.1")))
2022 (alist-delete 'configure %standard-phases))))
2023 (inputs
2024 `(("perl" ,perl)
2025 ("zlib" ,zlib)))
2026 (home-page "http://vcftools.sourceforge.net/")
2027 (synopsis "Tools for working with VCF files")
2028 (description
2029 "VCFtools is a program package designed for working with VCF files, such
2030as those generated by the 1000 Genomes Project. The aim of VCFtools is to
2031provide easily accessible methods for working with complex genetic variation
2032data in the form of VCF files.")
2033 ;; The license is declared as LGPLv3 in the README and
2034 ;; at http://vcftools.sourceforge.net/license.html
2035 (license license:lgpl3)))