Commit | Line | Data |
---|---|---|
4e10a221 | 1 | ;;; GNU Guix --- Functional package management for GNU |
241e1221 | 2 | ;;; Copyright © 2014, 2015 Ricardo Wurmus <rekado@elephly.net> |
684bf7c7 | 3 | ;;; Copyright © 2015 Ben Woodcroft <donttrustben@gmail.com> |
4e10a221 RW |
4 | ;;; |
5 | ;;; This file is part of GNU Guix. | |
6 | ;;; | |
7 | ;;; GNU Guix is free software; you can redistribute it and/or modify it | |
8 | ;;; under the terms of the GNU General Public License as published by | |
9 | ;;; the Free Software Foundation; either version 3 of the License, or (at | |
10 | ;;; your option) any later version. | |
11 | ;;; | |
12 | ;;; GNU Guix is distributed in the hope that it will be useful, but | |
13 | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | ;;; GNU General Public License for more details. | |
16 | ;;; | |
17 | ;;; You should have received a copy of the GNU General Public License | |
18 | ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. | |
19 | ||
20 | (define-module (gnu packages bioinformatics) | |
21 | #:use-module ((guix licenses) #:prefix license:) | |
22 | #:use-module (guix packages) | |
8e913213 | 23 | #:use-module (guix utils) |
4e10a221 | 24 | #:use-module (guix download) |
2c16316e | 25 | #:use-module (guix git-download) |
4e10a221 | 26 | #:use-module (guix build-system gnu) |
d7678942 | 27 | #:use-module (guix build-system cmake) |
365c8153 | 28 | #:use-module (guix build-system perl) |
8622a072 | 29 | #:use-module (guix build-system python) |
d3517eda | 30 | #:use-module (guix build-system trivial) |
4e10a221 | 31 | #:use-module (gnu packages) |
684bf7c7 | 32 | #:use-module (gnu packages algebra) |
d3517eda | 33 | #:use-module (gnu packages base) |
e4e5a4d8 | 34 | #:use-module (gnu packages boost) |
4e10a221 | 35 | #:use-module (gnu packages compression) |
75dd2424 | 36 | #:use-module (gnu packages file) |
15a3c3d4 | 37 | #:use-module (gnu packages java) |
51c64999 | 38 | #:use-module (gnu packages linux) |
36742f43 | 39 | #:use-module (gnu packages machine-learning) |
c833ab55 | 40 | #:use-module (gnu packages maths) |
4e10a221 RW |
41 | #:use-module (gnu packages ncurses) |
42 | #:use-module (gnu packages perl) | |
43 | #:use-module (gnu packages pkg-config) | |
bfe3c685 | 44 | #:use-module (gnu packages popt) |
e4e5a4d8 | 45 | #:use-module (gnu packages protobuf) |
346a829a | 46 | #:use-module (gnu packages python) |
c833ab55 RW |
47 | #:use-module (gnu packages statistics) |
48 | #:use-module (gnu packages swig) | |
d7678942 | 49 | #:use-module (gnu packages tbb) |
2127cedb | 50 | #:use-module (gnu packages textutils) |
ce7155d5 | 51 | #:use-module (gnu packages vim) |
365c8153 | 52 | #:use-module (gnu packages web) |
c833ab55 | 53 | #:use-module (gnu packages xml) |
346a829a | 54 | #:use-module (gnu packages zip)) |
4e10a221 | 55 | |
9794180d RW |
56 | (define-public bamtools |
57 | (package | |
58 | (name "bamtools") | |
59 | (version "2.3.0") | |
60 | (source (origin | |
61 | (method url-fetch) | |
62 | (uri (string-append | |
63 | "https://github.com/pezmaster31/bamtools/archive/v" | |
64 | version ".tar.gz")) | |
65 | (file-name (string-append name "-" version ".tar.gz")) | |
66 | (sha256 | |
67 | (base32 | |
68 | "1brry29bw2xr2l9pqn240rkqwayg85b8qq78zk2zs6nlspk4d018")))) | |
69 | (build-system cmake-build-system) | |
4702cec2 RW |
70 | (arguments |
71 | `(#:tests? #f ;no "check" target | |
72 | #:phases | |
73 | (modify-phases %standard-phases | |
74 | (add-before | |
75 | 'configure 'set-ldflags | |
76 | (lambda* (#:key outputs #:allow-other-keys) | |
77 | (setenv "LDFLAGS" | |
78 | (string-append | |
79 | "-Wl,-rpath=" | |
80 | (assoc-ref outputs "out") "/lib/bamtools"))))))) | |
9794180d RW |
81 | (inputs `(("zlib" ,zlib))) |
82 | (home-page "https://github.com/pezmaster31/bamtools") | |
83 | (synopsis "C++ API and command-line toolkit for working with BAM data") | |
84 | (description | |
85 | "BamTools provides both a C++ API and a command-line toolkit for handling | |
86 | BAM files.") | |
87 | (license license:expat))) | |
88 | ||
8dd4ff11 RW |
89 | (define-public bedops |
90 | (package | |
91 | (name "bedops") | |
1bbc3b1d | 92 | (version "2.4.14") |
8dd4ff11 RW |
93 | (source (origin |
94 | (method url-fetch) | |
95 | (uri (string-append "https://github.com/bedops/bedops/archive/v" | |
96 | version ".tar.gz")) | |
f586c877 | 97 | (file-name (string-append name "-" version ".tar.gz")) |
8dd4ff11 RW |
98 | (sha256 |
99 | (base32 | |
1bbc3b1d | 100 | "1kqbac547wyqma81cyky9n7mkgikjpsfd3nnmcm6hpqwanqgh10v")))) |
8dd4ff11 RW |
101 | (build-system gnu-build-system) |
102 | (arguments | |
103 | '(#:tests? #f | |
104 | #:make-flags (list (string-append "BINDIR=" %output "/bin")) | |
105 | #:phases | |
106 | (alist-cons-after | |
107 | 'unpack 'unpack-tarballs | |
108 | (lambda _ | |
109 | ;; FIXME: Bedops includes tarballs of minimally patched upstream | |
110 | ;; libraries jansson, zlib, and bzip2. We cannot just use stock | |
111 | ;; libraries because at least one of the libraries (zlib) is | |
112 | ;; patched to add a C++ function definition (deflateInit2cpp). | |
113 | ;; Until the Bedops developers offer a way to link against system | |
114 | ;; libraries we have to build the in-tree copies of these three | |
115 | ;; libraries. | |
116 | ||
117 | ;; See upstream discussion: | |
118 | ;; https://github.com/bedops/bedops/issues/124 | |
119 | ||
120 | ;; Unpack the tarballs to benefit from shebang patching. | |
121 | (with-directory-excursion "third-party" | |
122 | (and (zero? (system* "tar" "xvf" "jansson-2.6.tar.bz2")) | |
123 | (zero? (system* "tar" "xvf" "zlib-1.2.7.tar.bz2")) | |
124 | (zero? (system* "tar" "xvf" "bzip2-1.0.6.tar.bz2")))) | |
125 | ;; Disable unpacking of tarballs in Makefile. | |
126 | (substitute* "system.mk/Makefile.linux" | |
127 | (("^\tbzcat .*") "\t@echo \"not unpacking\"\n") | |
128 | (("\\./configure") "CONFIG_SHELL=bash ./configure")) | |
129 | (substitute* "third-party/zlib-1.2.7/Makefile.in" | |
130 | (("^SHELL=.*$") "SHELL=bash\n"))) | |
131 | (alist-delete 'configure %standard-phases)))) | |
132 | (home-page "https://github.com/bedops/bedops") | |
133 | (synopsis "Tools for high-performance genomic feature operations") | |
134 | (description | |
135 | "BEDOPS is a suite of tools to address common questions raised in genomic | |
136 | studies---mostly with regard to overlap and proximity relationships between | |
137 | data sets. It aims to be scalable and flexible, facilitating the efficient | |
138 | and accurate analysis and management of large-scale genomic data. | |
139 | ||
140 | BEDOPS provides tools that perform highly efficient and scalable Boolean and | |
141 | other set operations, statistical calculations, archiving, conversion and | |
142 | other management of genomic data of arbitrary scale. Tasks can be easily | |
143 | split by chromosome for distributing whole-genome analyses across a | |
144 | computational cluster.") | |
145 | (license license:gpl2+))) | |
146 | ||
81de5647 RW |
147 | (define-public bedtools |
148 | (package | |
149 | (name "bedtools") | |
150 | (version "2.22.0") | |
151 | (source (origin | |
152 | (method url-fetch) | |
153 | (uri (string-append "https://github.com/arq5x/bedtools2/archive/v" | |
154 | version ".tar.gz")) | |
f586c877 | 155 | (file-name (string-append name "-" version ".tar.gz")) |
81de5647 RW |
156 | (sha256 |
157 | (base32 | |
158 | "16aq0w3dmbd0853j32xk9jin4vb6v6fgakfyvrsmsjizzbn3fpfl")))) | |
159 | (build-system gnu-build-system) | |
160 | (native-inputs `(("python" ,python-2))) | |
161 | (inputs `(("samtools" ,samtools) | |
162 | ("zlib" ,zlib))) | |
163 | (arguments | |
164 | '(#:test-target "test" | |
165 | #:phases | |
166 | (alist-cons-after | |
167 | 'unpack 'patch-makefile-SHELL-definition | |
168 | (lambda _ | |
169 | ;; patch-makefile-SHELL cannot be used here as it does not | |
170 | ;; yet patch definitions with `:='. Since changes to | |
171 | ;; patch-makefile-SHELL result in a full rebuild, features | |
172 | ;; of patch-makefile-SHELL are reimplemented here. | |
173 | (substitute* "Makefile" | |
174 | (("^SHELL := .*$") (string-append "SHELL := " (which "bash") " -e \n")))) | |
175 | (alist-delete | |
176 | 'configure | |
177 | (alist-replace | |
178 | 'install | |
179 | (lambda* (#:key outputs #:allow-other-keys) | |
180 | (let ((bin (string-append (assoc-ref outputs "out") "/bin/"))) | |
181 | (mkdir-p bin) | |
182 | (for-each (lambda (file) | |
183 | (copy-file file (string-append bin (basename file)))) | |
184 | (find-files "bin" ".*")))) | |
185 | %standard-phases))))) | |
186 | (home-page "https://github.com/arq5x/bedtools2") | |
187 | (synopsis "Tools for genome analysis and arithmetic") | |
188 | (description | |
189 | "Collectively, the bedtools utilities are a swiss-army knife of tools for | |
190 | a wide-range of genomics analysis tasks. The most widely-used tools enable | |
191 | genome arithmetic: that is, set theory on the genome. For example, bedtools | |
192 | allows one to intersect, merge, count, complement, and shuffle genomic | |
193 | intervals from multiple files in widely-used genomic file formats such as BAM, | |
194 | BED, GFF/GTF, VCF.") | |
195 | (license license:gpl2))) | |
196 | ||
a2fb1492 RW |
197 | (define-public python2-pybedtools |
198 | (package | |
199 | (name "python2-pybedtools") | |
200 | (version "0.6.9") | |
201 | (source (origin | |
202 | (method url-fetch) | |
203 | (uri (string-append | |
204 | "https://pypi.python.org/packages/source/p/pybedtools/pybedtools-" | |
205 | version ".tar.gz")) | |
206 | (sha256 | |
207 | (base32 | |
208 | "1ldzdxw1p4y3g2ignmggsdypvqkcwqwzhdha4rbgpih048z5p4an")))) | |
209 | (build-system python-build-system) | |
210 | (arguments `(#:python ,python-2)) ; no Python 3 support | |
211 | (inputs | |
212 | `(("python-cython" ,python2-cython) | |
213 | ("python-matplotlib" ,python2-matplotlib))) | |
214 | (propagated-inputs | |
215 | `(("bedtools" ,bedtools) | |
216 | ("samtools" ,samtools))) | |
217 | (native-inputs | |
218 | `(("python-pyyaml" ,python2-pyyaml) | |
219 | ("python-nose" ,python2-nose) | |
220 | ("python-setuptools" ,python2-setuptools))) | |
221 | (home-page "https://pythonhosted.org/pybedtools/") | |
222 | (synopsis "Python wrapper for BEDtools programs") | |
223 | (description | |
224 | "pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs, | |
225 | which are widely used for genomic interval manipulation or \"genome algebra\". | |
226 | pybedtools extends BEDTools by offering feature-level manipulations from with | |
227 | Python.") | |
228 | (license license:gpl2+))) | |
229 | ||
85c37e29 RW |
230 | (define-public python-biopython |
231 | (package | |
232 | (name "python-biopython") | |
233 | (version "1.65") | |
234 | (source (origin | |
235 | (method url-fetch) | |
236 | (uri (string-append | |
237 | "http://biopython.org/DIST/biopython-" | |
238 | version ".tar.gz")) | |
239 | (sha256 | |
240 | (base32 | |
241 | "13m8s9jkrw40zvdp1rl709n6lmgdh4f52aann7gzr6sfp0fwhg26")))) | |
242 | (build-system python-build-system) | |
243 | (inputs | |
244 | `(("python-numpy" ,python-numpy))) | |
245 | (native-inputs | |
246 | `(("python-setuptools" ,python2-setuptools))) | |
247 | (home-page "http://biopython.org/") | |
248 | (synopsis "Tools for biological computation in Python") | |
249 | (description | |
250 | "Biopython is a set of tools for biological computation including parsers | |
251 | for bioinformatics files into Python data structures; interfaces to common | |
252 | bioinformatics programs; a standard sequence class and tools for performing | |
253 | common operations on them; code to perform data classification; code for | |
254 | dealing with alignments; code making it easy to split up parallelizable tasks | |
255 | into separate processes; and more.") | |
256 | (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE")))) | |
257 | ||
258 | (define-public python2-biopython | |
259 | (package (inherit (package-with-python2 python-biopython)) | |
260 | (inputs | |
261 | `(("python2-numpy" ,python2-numpy))))) | |
262 | ||
2c7ee167 RW |
263 | (define-public bowtie |
264 | (package | |
265 | (name "bowtie") | |
266 | (version "2.2.4") | |
267 | (source (origin | |
268 | (method url-fetch) | |
269 | (uri (string-append "https://github.com/BenLangmead/bowtie2/archive/v" | |
270 | version ".tar.gz")) | |
f586c877 | 271 | (file-name (string-append name "-" version ".tar.gz")) |
2c7ee167 RW |
272 | (sha256 |
273 | (base32 | |
274 | "15dnbqippwvhyh9zqjhaxkabk7lm1xbh1nvar1x4b5kwm117zijn")) | |
275 | (modules '((guix build utils))) | |
276 | (snippet | |
277 | '(substitute* "Makefile" | |
278 | (("^CC = .*$") "CC = gcc") | |
279 | (("^CPP = .*$") "CPP = g++") | |
280 | ;; replace BUILD_HOST and BUILD_TIME for deterministic build | |
281 | (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"") | |
241e1221 RW |
282 | (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))) |
283 | (patches (list (search-patch "bowtie-fix-makefile.patch"))))) | |
2c7ee167 RW |
284 | (build-system gnu-build-system) |
285 | (inputs `(("perl" ,perl) | |
286 | ("perl-clone" ,perl-clone) | |
287 | ("perl-test-deep" ,perl-test-deep) | |
288 | ("perl-test-simple" ,perl-test-simple) | |
289 | ("python" ,python-2))) | |
290 | (arguments | |
291 | '(#:make-flags '("allall") | |
292 | #:phases | |
293 | (alist-delete | |
294 | 'configure | |
295 | (alist-replace | |
296 | 'install | |
297 | (lambda* (#:key outputs #:allow-other-keys) | |
298 | (let ((bin (string-append (assoc-ref outputs "out") "/bin/"))) | |
299 | (mkdir-p bin) | |
300 | (for-each (lambda (file) | |
301 | (copy-file file (string-append bin file))) | |
302 | (find-files "." "bowtie2.*")))) | |
303 | (alist-replace | |
304 | 'check | |
305 | (lambda* (#:key outputs #:allow-other-keys) | |
306 | (system* "perl" | |
307 | "scripts/test/simple_tests.pl" | |
308 | "--bowtie2=./bowtie2" | |
309 | "--bowtie2-build=./bowtie2-build")) | |
310 | %standard-phases))))) | |
311 | (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml") | |
312 | (synopsis "Fast and sensitive nucleotide sequence read aligner") | |
313 | (description | |
314 | "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing | |
315 | reads to long reference sequences. It is particularly good at aligning reads | |
316 | of about 50 up to 100s or 1,000s of characters, and particularly good at | |
317 | aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the | |
318 | genome with an FM Index to keep its memory footprint small: for the human | |
319 | genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports | |
320 | gapped, local, and paired-end alignment modes.") | |
241e1221 | 321 | (supported-systems '("x86_64-linux")) |
2c7ee167 RW |
322 | (license license:gpl3+))) |
323 | ||
9a8336d8 RW |
324 | (define-public bwa |
325 | (package | |
326 | (name "bwa") | |
327 | (version "0.7.12") | |
328 | (source (origin | |
329 | (method url-fetch) | |
330 | (uri (string-append "mirror://sourceforge/bio-bwa/bwa-" | |
331 | version ".tar.bz2")) | |
332 | (sha256 | |
333 | (base32 | |
334 | "1330dpqncv0px3pbhjzz1gwgg39kkcv2r9qp2xs0sixf8z8wl7bh")))) | |
335 | (build-system gnu-build-system) | |
336 | (arguments | |
337 | '(#:tests? #f ;no "check" target | |
338 | #:phases | |
339 | (alist-replace | |
340 | 'install | |
341 | (lambda* (#:key outputs #:allow-other-keys) | |
342 | (let ((bin (string-append | |
343 | (assoc-ref outputs "out") "/bin")) | |
344 | (doc (string-append | |
345 | (assoc-ref outputs "out") "/share/doc/bwa")) | |
346 | (man (string-append | |
347 | (assoc-ref outputs "out") "/share/man/man1"))) | |
348 | (mkdir-p bin) | |
349 | (mkdir-p doc) | |
350 | (mkdir-p man) | |
351 | (copy-file "bwa" (string-append bin "/bwa")) | |
352 | (copy-file "README.md" (string-append doc "/README.md")) | |
353 | (copy-file "bwa.1" (string-append man "/bwa.1")))) | |
354 | ;; no "configure" script | |
355 | (alist-delete 'configure %standard-phases)))) | |
356 | (inputs `(("zlib" ,zlib))) | |
357 | (home-page "http://bio-bwa.sourceforge.net/") | |
358 | (synopsis "Burrows-Wheeler sequence aligner") | |
359 | (description | |
360 | "BWA is a software package for mapping low-divergent sequences against a | |
361 | large reference genome, such as the human genome. It consists of three | |
362 | algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is | |
363 | designed for Illumina sequence reads up to 100bp, while the rest two for | |
364 | longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar | |
365 | features such as long-read support and split alignment, but BWA-MEM, which is | |
366 | the latest, is generally recommended for high-quality queries as it is faster | |
367 | and more accurate. BWA-MEM also has better performance than BWA-backtrack for | |
368 | 70-100bp Illumina reads.") | |
369 | (license license:gpl3+))) | |
370 | ||
ad641d53 RW |
371 | (define-public python2-bx-python |
372 | (package | |
373 | (name "python2-bx-python") | |
374 | (version "0.7.2") | |
375 | (source (origin | |
376 | (method url-fetch) | |
377 | (uri (string-append | |
378 | "https://pypi.python.org/packages/source/b/bx-python/bx-python-" | |
379 | version ".tar.gz")) | |
380 | (sha256 | |
381 | (base32 | |
382 | "0ld49idhc5zjdvbhvjq1a2qmpjj7h5v58rqr25dzmfq7g34b50xh")) | |
383 | (modules '((guix build utils))) | |
384 | (snippet | |
385 | '(substitute* "setup.py" | |
386 | ;; remove dependency on outdated "distribute" module | |
387 | (("^from distribute_setup import use_setuptools") "") | |
388 | (("^use_setuptools\\(\\)") ""))))) | |
389 | (build-system python-build-system) | |
390 | (arguments | |
391 | `(#:tests? #f ;tests fail because test data are not included | |
392 | #:python ,python-2)) | |
393 | (inputs | |
394 | `(("python-numpy" ,python2-numpy) | |
395 | ("zlib" ,zlib))) | |
396 | (native-inputs | |
397 | `(("python-nose" ,python2-nose) | |
398 | ("python-setuptools" ,python2-setuptools))) | |
399 | (home-page "http://bitbucket.org/james_taylor/bx-python/") | |
400 | (synopsis "Tools for manipulating biological data") | |
401 | (description | |
402 | "bx-python provides tools for manipulating biological data, particularly | |
403 | multiple sequence alignments.") | |
404 | (license license:expat))) | |
405 | ||
810cff85 RW |
406 | (define-public clipper |
407 | (package | |
408 | (name "clipper") | |
409 | (version "0.3.0") | |
410 | (source (origin | |
411 | (method url-fetch) | |
412 | (uri (string-append | |
413 | "https://github.com/YeoLab/clipper/archive/" | |
414 | version ".tar.gz")) | |
415 | (sha256 | |
416 | (base32 | |
417 | "1q7jpimsqln7ic44i8v2rx2haj5wvik8hc1s2syd31zcn0xk1iyq")) | |
418 | (modules '((guix build utils))) | |
419 | (snippet | |
420 | ;; remove unnecessary setup dependency | |
421 | '(substitute* "setup.py" | |
422 | (("setup_requires = .*") ""))))) | |
423 | (build-system python-build-system) | |
424 | (arguments `(#:python ,python-2)) ; only Python 2 is supported | |
425 | (inputs | |
426 | `(("htseq" ,htseq) | |
427 | ("python-pybedtools" ,python2-pybedtools) | |
428 | ("python-cython" ,python2-cython) | |
429 | ("python-scikit-learn" ,python2-scikit-learn) | |
430 | ("python-matplotlib" ,python2-matplotlib) | |
431 | ("python-pysam" ,python2-pysam) | |
432 | ("python-numpy" ,python2-numpy) | |
433 | ("python-scipy" ,python2-scipy))) | |
434 | (native-inputs | |
435 | `(("python-mock" ,python2-mock) ; for tests | |
436 | ("python-pytz" ,python2-pytz) ; for tests | |
437 | ("python-setuptools" ,python2-setuptools))) | |
438 | (home-page "https://github.com/YeoLab/clipper") | |
439 | (synopsis "CLIP peak enrichment recognition") | |
440 | (description | |
441 | "CLIPper is a tool to define peaks in CLIP-seq datasets.") | |
442 | (license license:gpl2))) | |
443 | ||
36742f43 RW |
444 | (define-public couger |
445 | (package | |
446 | (name "couger") | |
447 | (version "1.8.2") | |
448 | (source (origin | |
449 | (method url-fetch) | |
450 | (uri (string-append | |
451 | "http://couger.oit.duke.edu/static/assets/COUGER" | |
452 | version ".zip")) | |
453 | (sha256 | |
454 | (base32 | |
455 | "04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq")))) | |
456 | (build-system gnu-build-system) | |
457 | (arguments | |
458 | `(#:tests? #f | |
459 | #:phases | |
460 | (modify-phases %standard-phases | |
461 | (delete 'configure) | |
462 | (delete 'build) | |
463 | (replace | |
464 | 'install | |
465 | (lambda* (#:key outputs #:allow-other-keys) | |
466 | (let ((out (assoc-ref outputs "out"))) | |
467 | (copy-recursively "src" (string-append out "/src")) | |
468 | (mkdir (string-append out "/bin")) | |
469 | ;; Add "src" directory to module lookup path. | |
470 | (substitute* "couger" | |
471 | (("from argparse") | |
472 | (string-append "import sys\nsys.path.append(\"" | |
473 | out "\")\nfrom argparse"))) | |
474 | (copy-file "couger" (string-append out "/bin/couger"))) | |
475 | #t)) | |
476 | (add-after | |
477 | 'install 'wrap-program | |
478 | (lambda* (#:key inputs outputs #:allow-other-keys) | |
479 | ;; Make sure 'couger' runs with the correct PYTHONPATH. | |
480 | (let* ((out (assoc-ref outputs "out")) | |
481 | (path (getenv "PYTHONPATH"))) | |
482 | (wrap-program (string-append out "/bin/couger") | |
483 | `("PYTHONPATH" ":" prefix (,path)))) | |
484 | #t))))) | |
485 | (inputs | |
486 | `(("python" ,python-2) | |
487 | ("python2-pillow" ,python2-pillow) | |
488 | ("python2-numpy" ,python2-numpy) | |
489 | ("python2-scipy" ,python2-scipy) | |
490 | ("python2-matplotlib" ,python2-matplotlib))) | |
491 | (propagated-inputs | |
492 | `(("r" ,r) | |
493 | ("libsvm" ,libsvm) | |
494 | ("randomjungle" ,randomjungle))) | |
495 | (native-inputs | |
496 | `(("unzip" ,unzip))) | |
497 | (home-page "http://couger.oit.duke.edu") | |
498 | (synopsis "Identify co-factors in sets of genomic regions") | |
499 | (description | |
500 | "COUGER can be applied to any two sets of genomic regions bound by | |
501 | paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify | |
502 | putative co-factors that provide specificity to each TF. The framework | |
503 | determines the genomic targets uniquely-bound by each TF, and identifies a | |
504 | small set of co-factors that best explain the in vivo binding differences | |
505 | between the two TFs. | |
506 | ||
507 | COUGER uses classification algorithms (support vector machines and random | |
508 | forests) with features that reflect the DNA binding specificities of putative | |
509 | co-factors. The features are generated either from high-throughput TF-DNA | |
510 | binding data (from protein binding microarray experiments), or from large | |
511 | collections of DNA motifs.") | |
512 | (license license:gpl3+))) | |
513 | ||
bfe3c685 RW |
514 | (define-public clustal-omega |
515 | (package | |
516 | (name "clustal-omega") | |
517 | (version "1.2.1") | |
518 | (source (origin | |
519 | (method url-fetch) | |
520 | (uri (string-append | |
521 | "http://www.clustal.org/omega/clustal-omega-" | |
522 | version ".tar.gz")) | |
523 | (sha256 | |
524 | (base32 | |
525 | "02ibkx0m0iwz8nscg998bh41gg251y56cgh86bvyrii5m8kjgwqf")))) | |
526 | (build-system gnu-build-system) | |
527 | (inputs | |
528 | `(("argtable" ,argtable))) | |
529 | (home-page "http://www.clustal.org/omega/") | |
530 | (synopsis "Multiple sequence aligner for protein and DNA/RNA") | |
531 | (description | |
532 | "Clustal-Omega is a general purpose multiple sequence alignment (MSA) | |
533 | program for protein and DNA/RNA. It produces high quality MSAs and is capable | |
534 | of handling data-sets of hundreds of thousands of sequences in reasonable | |
535 | time.") | |
536 | (license license:gpl2+))) | |
537 | ||
191c7101 RW |
538 | (define-public crossmap |
539 | (package | |
540 | (name "crossmap") | |
541 | (version "0.1.6") | |
542 | (source (origin | |
543 | (method url-fetch) | |
544 | (uri (string-append "mirror://sourceforge/crossmap/CrossMap-" | |
545 | version ".tar.gz")) | |
546 | (sha256 | |
547 | (base32 | |
548 | "163hi5gjgij6cndxlvbkp5jjwr0k4wbm9im6d2210278q7k9kpnp")) | |
549 | ;; patch has been sent upstream already | |
550 | (patches (list | |
551 | (search-patch "crossmap-allow-system-pysam.patch"))) | |
552 | (modules '((guix build utils))) | |
553 | ;; remove bundled copy of pysam | |
554 | (snippet | |
555 | '(delete-file-recursively "lib/pysam")))) | |
556 | (build-system python-build-system) | |
557 | (arguments | |
558 | `(#:python ,python-2 | |
559 | #:phases | |
560 | (alist-cons-after | |
561 | 'unpack 'set-env | |
562 | (lambda _ (setenv "CROSSMAP_USE_SYSTEM_PYSAM" "1")) | |
563 | %standard-phases))) | |
564 | (inputs | |
565 | `(("python-numpy" ,python2-numpy) | |
566 | ("python-pysam" ,python2-pysam) | |
567 | ("zlib" ,zlib))) | |
568 | (native-inputs | |
569 | `(("python-cython" ,python2-cython) | |
570 | ("python-nose" ,python2-nose) | |
571 | ("python-setuptools" ,python2-setuptools))) | |
572 | (home-page "http://crossmap.sourceforge.net/") | |
573 | (synopsis "Convert genome coordinates between assemblies") | |
574 | (description | |
575 | "CrossMap is a program for conversion of genome coordinates or annotation | |
576 | files between different genome assemblies. It supports most commonly used | |
577 | file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.") | |
578 | (license license:gpl2+))) | |
579 | ||
8e913213 RW |
580 | (define-public cutadapt |
581 | (package | |
582 | (name "cutadapt") | |
583 | (version "1.8") | |
584 | (source (origin | |
585 | (method url-fetch) | |
586 | (uri (string-append | |
587 | "https://github.com/marcelm/cutadapt/archive/v" | |
588 | version ".tar.gz")) | |
589 | (file-name (string-append name "-" version ".tar.gz")) | |
590 | (sha256 | |
591 | (base32 | |
592 | "161bp87y6gd6r5bmvjpn2b1k942i3fizfpa139f0jn6jv1wcp5h5")))) | |
593 | (build-system python-build-system) | |
594 | (arguments | |
595 | ;; tests must be run after install | |
596 | `(#:phases (alist-cons-after | |
597 | 'install 'check | |
598 | (lambda* (#:key inputs outputs #:allow-other-keys) | |
599 | (setenv "PYTHONPATH" | |
600 | (string-append | |
601 | (getenv "PYTHONPATH") | |
602 | ":" (assoc-ref outputs "out") | |
603 | "/lib/python" | |
604 | (string-take (string-take-right | |
605 | (assoc-ref inputs "python") 5) 3) | |
606 | "/site-packages")) | |
607 | (zero? (system* "nosetests" "-P" "tests"))) | |
608 | (alist-delete 'check %standard-phases)))) | |
609 | (native-inputs | |
610 | `(("python-cython" ,python-cython) | |
611 | ("python-nose" ,python-nose) | |
612 | ("python-setuptools" ,python-setuptools))) | |
613 | (home-page "https://code.google.com/p/cutadapt/") | |
614 | (synopsis "Remove adapter sequences from nucleotide sequencing reads") | |
615 | (description | |
616 | "Cutadapt finds and removes adapter sequences, primers, poly-A tails and | |
617 | other types of unwanted sequence from high-throughput sequencing reads.") | |
618 | (license license:expat))) | |
619 | ||
684bf7c7 BW |
620 | (define-public diamond |
621 | (package | |
622 | (name "diamond") | |
623 | (version "0.7.9") | |
624 | (source (origin | |
625 | (method url-fetch) | |
626 | (uri (string-append | |
627 | "https://github.com/bbuchfink/diamond/archive/v" | |
628 | version ".tar.gz")) | |
629 | (file-name (string-append name "-" version ".tar.gz")) | |
630 | (sha256 | |
631 | (base32 | |
632 | "0hfkcfv9f76h5brbyw9fyvmc0l9cmbsxrcdqk0fa9xv82zj47p15")) | |
633 | (snippet '(begin | |
634 | (delete-file "bin/diamond") | |
635 | #t)))) | |
636 | (build-system gnu-build-system) | |
637 | (arguments | |
638 | '(#:tests? #f ;no "check" target | |
639 | #:phases | |
640 | (modify-phases %standard-phases | |
641 | (add-after 'unpack 'enter-source-dir | |
642 | (lambda _ | |
643 | (chdir "src") | |
644 | #t)) | |
645 | (delete 'configure) | |
646 | (replace 'install | |
647 | (lambda* (#:key outputs #:allow-other-keys) | |
648 | (let ((bin (string-append (assoc-ref outputs "out") | |
649 | "/bin"))) | |
650 | (mkdir-p bin) | |
651 | (copy-file "../bin/diamond" | |
652 | (string-append bin "/diamond")) | |
653 | #t)))))) | |
654 | (native-inputs | |
655 | `(("bc" ,bc))) | |
656 | (inputs | |
657 | `(("boost" ,boost) | |
658 | ("zlib" ,zlib))) | |
659 | (home-page "https://github.com/bbuchfink/diamond") | |
660 | (synopsis "Accelerated BLAST compatible local sequence aligner") | |
661 | (description | |
662 | "DIAMOND is a BLAST-compatible local aligner for mapping protein and | |
663 | translated DNA query sequences against a protein reference database (BLASTP | |
664 | and BLASTX alignment mode). The speedup over BLAST is up to 20,000 on short | |
665 | reads at a typical sensitivity of 90-99% relative to BLAST depending on the | |
666 | data and settings.") | |
667 | (license (license:non-copyleft "file://src/COPYING" | |
668 | "See src/COPYING in the distribution.")))) | |
669 | ||
365c8153 RW |
670 | (define-public edirect |
671 | (package | |
672 | (name "edirect") | |
673 | (version "2.50") | |
674 | (source (origin | |
675 | (method url-fetch) | |
676 | ;; Note: older versions are not retained. | |
677 | (uri "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/edirect.zip") | |
678 | (sha256 | |
679 | (base32 | |
680 | "08afhz2ph66h8h381hl1mqyxkdi5nbvzsyj9gfw3jfbdijnpi4qj")))) | |
681 | (build-system perl-build-system) | |
682 | (arguments | |
683 | `(#:tests? #f ;no "check" target | |
684 | #:phases | |
685 | (modify-phases %standard-phases | |
686 | (delete 'configure) | |
687 | (delete 'build) | |
688 | (replace 'install | |
689 | (lambda* (#:key outputs #:allow-other-keys) | |
690 | (let ((target (string-append (assoc-ref outputs "out") | |
691 | "/bin"))) | |
692 | (mkdir-p target) | |
693 | (copy-file "edirect.pl" | |
694 | (string-append target "/edirect.pl")) | |
695 | #t))) | |
696 | (add-after | |
697 | 'install 'wrap-program | |
698 | (lambda* (#:key inputs outputs #:allow-other-keys) | |
699 | ;; Make sure 'edirect.pl' finds all perl inputs at runtime. | |
700 | (let* ((out (assoc-ref outputs "out")) | |
701 | (path (getenv "PERL5LIB"))) | |
702 | (wrap-program (string-append out "/bin/edirect.pl") | |
703 | `("PERL5LIB" ":" prefix (,path))))))))) | |
704 | (inputs | |
705 | `(("perl-html-parser" ,perl-html-parser) | |
706 | ("perl-encode-locale" ,perl-encode-locale) | |
707 | ("perl-file-listing" ,perl-file-listing) | |
708 | ("perl-html-tagset" ,perl-html-tagset) | |
709 | ("perl-html-tree" ,perl-html-tree) | |
710 | ("perl-http-cookies" ,perl-http-cookies) | |
711 | ("perl-http-date" ,perl-http-date) | |
712 | ("perl-http-message" ,perl-http-message) | |
713 | ("perl-http-negotiate" ,perl-http-negotiate) | |
714 | ("perl-lwp-mediatypes" ,perl-lwp-mediatypes) | |
715 | ("perl-lwp-protocol-https" ,perl-lwp-protocol-https) | |
716 | ("perl-net-http" ,perl-net-http) | |
717 | ("perl-uri" ,perl-uri) | |
718 | ("perl-www-robotrules" ,perl-www-robotrules) | |
719 | ("perl" ,perl))) | |
720 | (native-inputs | |
721 | `(("unzip" ,unzip))) | |
722 | (home-page "http://www.ncbi.nlm.nih.gov/books/NBK179288") | |
723 | (synopsis "Tools for accessing the NCBI's set of databases") | |
724 | (description | |
725 | "Entrez Direct (EDirect) is a method for accessing the National Center | |
726 | for Biotechnology Information's (NCBI) set of interconnected | |
727 | databases (publication, sequence, structure, gene, variation, expression, | |
728 | etc.) from a terminal. Functions take search terms from command-line | |
729 | arguments. Individual operations are combined to build multi-step queries. | |
730 | Record retrieval and formatting normally complete the process. | |
731 | ||
732 | EDirect also provides an argument-driven function that simplifies the | |
733 | extraction of data from document summaries or other results that are returned | |
734 | in structured XML format. This can eliminate the need for writing custom | |
735 | software to answer ad hoc questions.") | |
736 | (license license:public-domain))) | |
737 | ||
e4e5a4d8 RW |
738 | (define-public express |
739 | (package | |
740 | (name "express") | |
741 | (version "1.5.1") | |
742 | (source (origin | |
743 | (method url-fetch) | |
744 | (uri | |
745 | (string-append | |
746 | "http://bio.math.berkeley.edu/eXpress/downloads/express-" | |
747 | version "/express-" version "-src.tgz")) | |
748 | (sha256 | |
749 | (base32 | |
750 | "03rczxd0gjp2l1jxcmjfmf5j94j77zqyxa6x063zsc585nj40n0c")))) | |
751 | (build-system cmake-build-system) | |
752 | (arguments | |
753 | `(#:tests? #f ;no "check" target | |
754 | #:phases | |
755 | (alist-cons-after | |
756 | 'unpack 'use-shared-boost-libs-and-set-bamtools-paths | |
757 | (lambda* (#:key inputs #:allow-other-keys) | |
758 | (substitute* "CMakeLists.txt" | |
759 | (("set\\(Boost_USE_STATIC_LIBS ON\\)") | |
760 | "set(Boost_USE_STATIC_LIBS OFF)") | |
761 | (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include") | |
762 | (string-append (assoc-ref inputs "bamtools") "/include/bamtools"))) | |
763 | (substitute* "src/CMakeLists.txt" | |
764 | (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib") | |
765 | (string-append (assoc-ref inputs "bamtools") "/lib/bamtools"))) | |
766 | #t) | |
767 | %standard-phases))) | |
768 | (inputs | |
769 | `(("boost" ,boost) | |
770 | ("bamtools" ,bamtools) | |
771 | ("protobuf" ,protobuf) | |
772 | ("zlib" ,zlib))) | |
773 | (home-page "http://bio.math.berkeley.edu/eXpress") | |
774 | (synopsis "Streaming quantification for high-throughput genomic sequencing") | |
775 | (description | |
776 | "eXpress is a streaming tool for quantifying the abundances of a set of | |
777 | target sequences from sampled subsequences. Example applications include | |
778 | transcript-level RNA-Seq quantification, allele-specific/haplotype expression | |
779 | analysis (from RNA-Seq), transcription factor binding quantification in | |
780 | ChIP-Seq, and analysis of metagenomic data.") | |
781 | (license license:artistic2.0))) | |
782 | ||
2127cedb RW |
783 | (define-public fastx-toolkit |
784 | (package | |
785 | (name "fastx-toolkit") | |
786 | (version "0.0.14") | |
787 | (source (origin | |
788 | (method url-fetch) | |
789 | (uri | |
790 | (string-append | |
791 | "https://github.com/agordon/fastx_toolkit/releases/download/" | |
792 | version "/fastx_toolkit-" version ".tar.bz2")) | |
793 | (sha256 | |
794 | (base32 | |
795 | "01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy")))) | |
796 | (build-system gnu-build-system) | |
797 | (inputs | |
798 | `(("libgtextutils" ,libgtextutils))) | |
799 | (native-inputs | |
800 | `(("pkg-config" ,pkg-config))) | |
801 | (home-page "http://hannonlab.cshl.edu/fastx_toolkit/") | |
802 | (synopsis "Tools for FASTA/FASTQ file preprocessing") | |
803 | (description | |
804 | "The FASTX-Toolkit is a collection of command line tools for Short-Reads | |
805 | FASTA/FASTQ files preprocessing. | |
806 | ||
807 | Next-Generation sequencing machines usually produce FASTA or FASTQ files, | |
808 | containing multiple short-reads sequences. The main processing of such | |
809 | FASTA/FASTQ files is mapping the sequences to reference genomes. However, it | |
810 | is sometimes more productive to preprocess the files before mapping the | |
811 | sequences to the genome---manipulating the sequences to produce better mapping | |
812 | results. The FASTX-Toolkit tools perform some of these preprocessing tasks.") | |
813 | (license license:agpl3+))) | |
814 | ||
d7678942 RW |
815 | (define-public flexbar |
816 | (package | |
817 | (name "flexbar") | |
818 | (version "2.5") | |
819 | (source (origin | |
820 | (method url-fetch) | |
821 | (uri | |
822 | (string-append "mirror://sourceforge/flexbar/" | |
823 | version "/flexbar_v" version "_src.tgz")) | |
824 | (sha256 | |
825 | (base32 | |
826 | "13jaykc3y1x8y5nn9j8ljnb79s5y51kyxz46hdmvvjj6qhyympmf")))) | |
827 | (build-system cmake-build-system) | |
828 | (arguments | |
4ca009c0 | 829 | `(#:configure-flags (list |
d7678942 RW |
830 | (string-append "-DFLEXBAR_BINARY_DIR=" |
831 | (assoc-ref %outputs "out") | |
832 | "/bin/")) | |
833 | #:phases | |
4ca009c0 RW |
834 | (alist-replace |
835 | 'check | |
836 | (lambda* (#:key outputs #:allow-other-keys) | |
837 | (setenv "PATH" (string-append | |
838 | (assoc-ref outputs "out") "/bin:" | |
839 | (getenv "PATH"))) | |
840 | (chdir "../flexbar_v2.5_src/test") | |
841 | (zero? (system* "bash" "flexbar_validate.sh"))) | |
842 | (alist-delete 'install %standard-phases)))) | |
d7678942 RW |
843 | (inputs |
844 | `(("tbb" ,tbb) | |
845 | ("zlib" ,zlib))) | |
846 | (native-inputs | |
847 | `(("pkg-config" ,pkg-config) | |
848 | ("seqan" ,seqan))) | |
849 | (home-page "http://flexbar.sourceforge.net") | |
850 | (synopsis "Barcode and adapter removal tool for sequencing platforms") | |
851 | (description | |
852 | "Flexbar preprocesses high-throughput nucleotide sequencing data | |
853 | efficiently. It demultiplexes barcoded runs and removes adapter sequences. | |
854 | Moreover, trimming and filtering features are provided. Flexbar increases | |
855 | read mapping rates and improves genome and transcriptome assemblies. It | |
856 | supports next-generation sequencing data in fasta/q and csfasta/q format from | |
857 | Illumina, Roche 454, and the SOLiD platform.") | |
858 | (license license:gpl3))) | |
859 | ||
5854f685 RW |
860 | (define-public grit |
861 | (package | |
862 | (name "grit") | |
863 | (version "2.0.2") | |
864 | (source (origin | |
865 | (method url-fetch) | |
866 | (uri (string-append | |
867 | "https://github.com/nboley/grit/archive/" | |
868 | version ".tar.gz")) | |
869 | (file-name (string-append name "-" version ".tar.gz")) | |
870 | (sha256 | |
871 | (base32 | |
872 | "157in84dj70wimbind3x7sy1whs3h57qfgcnj2s6lrd38fbrb7mj")))) | |
873 | (build-system python-build-system) | |
874 | (arguments | |
875 | `(#:python ,python-2 | |
876 | #:phases | |
877 | (alist-cons-after | |
878 | 'unpack 'generate-from-cython-sources | |
879 | (lambda* (#:key inputs outputs #:allow-other-keys) | |
880 | ;; Delete these C files to force fresh generation from pyx sources. | |
881 | (delete-file "grit/sparsify_support_fns.c") | |
882 | (delete-file "grit/call_peaks_support_fns.c") | |
883 | (substitute* "setup.py" | |
884 | (("Cython.Setup") "Cython.Build") | |
885 | ;; Add numpy include path to fix compilation | |
886 | (("pyx\", \\]") | |
887 | (string-append "pyx\", ], include_dirs = ['" | |
888 | (assoc-ref inputs "python-numpy") | |
889 | "/lib/python2.7/site-packages/numpy/core/include/" | |
890 | "']"))) #t) | |
891 | %standard-phases))) | |
892 | (inputs | |
893 | `(("python-scipy" ,python2-scipy) | |
894 | ("python-numpy" ,python2-numpy) | |
895 | ("python-pysam" ,python2-pysam) | |
896 | ("python-networkx" ,python2-networkx))) | |
897 | (native-inputs | |
898 | `(("python-cython" ,python2-cython) | |
899 | ("python-setuptools" ,python2-setuptools))) | |
900 | (home-page "http://grit-bio.org") | |
901 | (synopsis "Tool for integrative analysis of RNA-seq type assays") | |
902 | (description | |
903 | "GRIT is designed to use RNA-seq, TES, and TSS data to build and quantify | |
904 | full length transcript models. When none of these data sources are available, | |
905 | GRIT can be run by providing a candidate set of TES or TSS sites. In | |
906 | addition, GRIT can merge in reference junctions and gene boundaries. GRIT can | |
907 | also be run in quantification mode, where it uses a provided GTF file and just | |
908 | estimates transcript expression.") | |
909 | (license license:gpl3+))) | |
910 | ||
346a829a RW |
911 | (define-public hisat |
912 | (package | |
913 | (name "hisat") | |
914 | (version "0.1.4") | |
915 | (source (origin | |
916 | (method url-fetch) | |
917 | (uri (string-append | |
918 | "http://ccb.jhu.edu/software/hisat/downloads/hisat-" | |
919 | version "-beta-source.zip")) | |
920 | (sha256 | |
921 | (base32 | |
922 | "1k381ydranqxp09yf2y7w1d0chz5d59vb6jchi89hbb0prq19lk5")))) | |
923 | (build-system gnu-build-system) | |
924 | (arguments | |
e58d01fa RW |
925 | `(#:tests? #f ;no check target |
926 | #:make-flags '("allall" | |
927 | ;; Disable unsupported `popcnt' instructions on | |
928 | ;; architectures other than x86_64 | |
929 | ,@(if (string-prefix? "x86_64" | |
930 | (or (%current-target-system) | |
931 | (%current-system))) | |
932 | '() | |
933 | '("POPCNT_CAPABILITY=0"))) | |
346a829a | 934 | #:phases |
da6dd842 LC |
935 | (alist-cons-after |
936 | 'unpack 'patch-sources | |
937 | (lambda _ | |
938 | ;; XXX Cannot use snippet because zip files are not supported | |
939 | (substitute* "Makefile" | |
940 | (("^CC = .*$") "CC = gcc") | |
941 | (("^CPP = .*$") "CPP = g++") | |
942 | ;; replace BUILD_HOST and BUILD_TIME for deterministic build | |
943 | (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"") | |
944 | (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")) | |
945 | (substitute* '("hisat-build" "hisat-inspect") | |
946 | (("/usr/bin/env") (which "env")))) | |
947 | (alist-replace | |
948 | 'install | |
949 | (lambda* (#:key outputs #:allow-other-keys) | |
950 | (let ((bin (string-append (assoc-ref outputs "out") "/bin/"))) | |
951 | (mkdir-p bin) | |
952 | (for-each | |
953 | (lambda (file) | |
954 | (copy-file file (string-append bin file))) | |
955 | (find-files | |
956 | "." | |
957 | "hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$")))) | |
958 | (alist-delete 'configure %standard-phases))))) | |
346a829a RW |
959 | (native-inputs |
960 | `(("unzip" ,unzip))) | |
961 | (inputs | |
962 | `(("perl" ,perl) | |
963 | ("python" ,python) | |
964 | ("zlib" ,zlib))) | |
965 | (home-page "http://ccb.jhu.edu/software/hisat/index.shtml") | |
966 | (synopsis "Hierarchical indexing for spliced alignment of transcripts") | |
967 | (description | |
968 | "HISAT is a fast and sensitive spliced alignment program for mapping | |
969 | RNA-seq reads. In addition to one global FM index that represents a whole | |
970 | genome, HISAT uses a large set of small FM indexes that collectively cover the | |
971 | whole genome. These small indexes (called local indexes) combined with | |
972 | several alignment strategies enable effective alignment of RNA-seq reads, in | |
973 | particular, reads spanning multiple exons.") | |
974 | (license license:gpl3+))) | |
975 | ||
85652f59 RW |
976 | (define-public htseq |
977 | (package | |
978 | (name "htseq") | |
979 | (version "0.6.1") | |
980 | (source (origin | |
981 | (method url-fetch) | |
982 | (uri (string-append | |
983 | "https://pypi.python.org/packages/source/H/HTSeq/HTSeq-" | |
984 | version ".tar.gz")) | |
985 | (sha256 | |
986 | (base32 | |
987 | "1i85ppf2j2lj12m0x690qq5nn17xxk23pbbx2c83r8ayb5wngzwv")))) | |
988 | (build-system python-build-system) | |
989 | (arguments `(#:python ,python-2)) ; only Python 2 is supported | |
990 | (inputs | |
991 | `(("python-numpy" ,python2-numpy) | |
992 | ("python-setuptools" ,python2-setuptools))) | |
993 | (home-page "http://www-huber.embl.de/users/anders/HTSeq/") | |
994 | (synopsis "Analysing high-throughput sequencing data with Python") | |
995 | (description | |
996 | "HTSeq is a Python package that provides infrastructure to process data | |
997 | from high-throughput sequencing assays.") | |
998 | (license license:gpl3+))) | |
999 | ||
15a3c3d4 RW |
1000 | (define-public htsjdk |
1001 | (package | |
1002 | (name "htsjdk") | |
1003 | (version "1.129") | |
1004 | (source (origin | |
1005 | (method url-fetch) | |
1006 | (uri (string-append | |
1007 | "https://github.com/samtools/htsjdk/archive/" | |
1008 | version ".tar.gz")) | |
1009 | (file-name (string-append name "-" version ".tar.gz")) | |
1010 | (sha256 | |
1011 | (base32 | |
1012 | "0asdk9b8jx2ij7yd6apg9qx03li8q7z3ml0qy2r2qczkra79y6fw")) | |
1013 | (modules '((guix build utils))) | |
1014 | ;; remove build dependency on git | |
1015 | (snippet '(substitute* "build.xml" | |
1016 | (("failifexecutionfails=\"true\"") | |
1017 | "failifexecutionfails=\"false\""))))) | |
1018 | (build-system gnu-build-system) | |
1019 | (arguments | |
1020 | `(#:modules ((srfi srfi-1) | |
1021 | (guix build gnu-build-system) | |
1022 | (guix build utils)) | |
1023 | #:phases (alist-replace | |
1024 | 'build | |
1025 | (lambda _ | |
1026 | (setenv "JAVA_HOME" (assoc-ref %build-inputs "jdk")) | |
1027 | (zero? (system* "ant" "all" | |
1028 | (string-append "-Ddist=" | |
1029 | (assoc-ref %outputs "out") | |
1030 | "/share/java/htsjdk/")))) | |
1031 | (fold alist-delete %standard-phases | |
1032 | '(configure install check))))) | |
1033 | (native-inputs | |
1034 | `(("ant" ,ant) | |
1035 | ("jdk" ,icedtea6 "jdk"))) | |
1036 | (home-page "http://samtools.github.io/htsjdk/") | |
1037 | (synopsis "Java API for high-throughput sequencing data (HTS) formats") | |
1038 | (description | |
1039 | "HTSJDK is an implementation of a unified Java library for accessing | |
1040 | common file formats, such as SAM and VCF, used for high-throughput | |
1041 | sequencing (HTS) data. There are also an number of useful utilities for | |
1042 | manipulating HTS data.") | |
1043 | (license license:expat))) | |
1044 | ||
e7c09730 RW |
1045 | (define-public htslib |
1046 | (package | |
1047 | (name "htslib") | |
1048 | (version "1.2.1") | |
1049 | (source (origin | |
1050 | (method url-fetch) | |
1051 | (uri (string-append | |
1052 | "https://github.com/samtools/htslib/releases/download/" | |
1053 | version "/htslib-" version ".tar.bz2")) | |
1054 | (sha256 | |
1055 | (base32 | |
1056 | "1c32ssscbnjwfw3dra140fq7riarp2x990qxybh34nr1p5r17nxx")))) | |
1057 | (build-system gnu-build-system) | |
1058 | (arguments | |
1059 | `(#:phases | |
1060 | (modify-phases %standard-phases | |
1061 | (add-after | |
1062 | 'unpack 'patch-tests | |
1063 | (lambda _ | |
1064 | (substitute* "test/test.pl" | |
1065 | (("/bin/bash") (which "bash"))) | |
1066 | #t))))) | |
1067 | (inputs | |
1068 | `(("zlib" ,zlib))) | |
1069 | (native-inputs | |
1070 | `(("perl" ,perl))) | |
1071 | (home-page "http://www.htslib.org") | |
1072 | (synopsis "C library for reading/writing high-throughput sequencing data") | |
1073 | (description | |
1074 | "HTSlib is a C library for reading/writing high-throughput sequencing | |
1075 | data. It also provides the bgzip, htsfile, and tabix utilities.") | |
1076 | ;; Files under cram/ are released under the modified BSD license; | |
1077 | ;; the rest is released under the Expat license | |
1078 | (license (list license:expat license:bsd-3)))) | |
1079 | ||
d57e6d0f RW |
1080 | (define-public macs |
1081 | (package | |
1082 | (name "macs") | |
1083 | (version "2.1.0.20140616") | |
1084 | (source (origin | |
1085 | (method url-fetch) | |
1086 | (uri (string-append | |
1087 | "https://pypi.python.org/packages/source/M/MACS2/MACS2-" | |
1088 | version ".tar.gz")) | |
1089 | (sha256 | |
1090 | (base32 | |
1091 | "11lmiw6avqhwn75sn59g4lfkrr2kk20r3rgfbx9xfqb8rg9mi2n6")))) | |
1092 | (build-system python-build-system) | |
1093 | (arguments | |
1094 | `(#:python ,python-2 ; only compatible with Python 2.7 | |
1095 | #:tests? #f)) ; no test target | |
1096 | (inputs | |
1097 | `(("python-numpy" ,python2-numpy))) | |
1098 | (native-inputs | |
1099 | `(("python-setuptools" ,python2-setuptools))) | |
1100 | (home-page "http://github.com/taoliu/MACS/") | |
1101 | (synopsis "Model based analysis for ChIP-Seq data") | |
1102 | (description | |
1103 | "MACS is an implementation of a ChIP-Seq analysis algorithm for | |
1104 | identifying transcript factor binding sites named Model-based Analysis of | |
1105 | ChIP-Seq (MACS). MACS captures the influence of genome complexity to evaluate | |
1106 | the significance of enriched ChIP regions and it improves the spatial | |
1107 | resolution of binding sites through combining the information of both | |
1108 | sequencing tag position and orientation.") | |
1109 | (license license:bsd-3))) | |
1110 | ||
ddd82e0e RW |
1111 | (define-public miso |
1112 | (package | |
1113 | (name "miso") | |
1114 | (version "0.5.3") | |
1115 | (source (origin | |
1116 | (method url-fetch) | |
1117 | (uri (string-append | |
86517de6 | 1118 | "https://pypi.python.org/packages/source/m/misopy/misopy-" |
ddd82e0e RW |
1119 | version ".tar.gz")) |
1120 | (sha256 | |
1121 | (base32 | |
1122 | "0x446867az8ir0z8c1vjqffkp0ma37wm4sylixnkhgawllzx8v5w")) | |
1123 | (modules '((guix build utils))) | |
1124 | ;; use "gcc" instead of "cc" for compilation | |
1125 | (snippet | |
1126 | '(substitute* "setup.py" | |
1127 | (("^defines") | |
1128 | "cc.set_executables( | |
1129 | compiler='gcc', | |
1130 | compiler_so='gcc', | |
1131 | linker_exe='gcc', | |
1132 | linker_so='gcc -shared'); defines"))))) | |
1133 | (build-system python-build-system) | |
1134 | (arguments | |
1135 | `(#:python ,python-2 ; only Python 2 is supported | |
1136 | #:tests? #f)) ; no "test" target | |
1137 | (inputs | |
1138 | `(("samtools" ,samtools) | |
1139 | ("python-numpy" ,python2-numpy) | |
1140 | ("python-pysam" ,python2-pysam) | |
1141 | ("python-scipy" ,python2-scipy) | |
1142 | ("python-matplotlib" ,python2-matplotlib))) | |
1143 | (native-inputs | |
1144 | `(("python-setuptools" ,python2-setuptools))) | |
1145 | (home-page "http://genes.mit.edu/burgelab/miso/index.html") | |
1146 | (synopsis "Mixture of Isoforms model for RNA-Seq isoform quantitation") | |
1147 | (description | |
1148 | "MISO (Mixture-of-Isoforms) is a probabilistic framework that quantitates | |
1149 | the expression level of alternatively spliced genes from RNA-Seq data, and | |
1150 | identifies differentially regulated isoforms or exons across samples. By | |
1151 | modeling the generative process by which reads are produced from isoforms in | |
1152 | RNA-Seq, the MISO model uses Bayesian inference to compute the probability | |
1153 | that a read originated from a particular isoform.") | |
1154 | (license license:gpl2))) | |
1155 | ||
1e44cf8b BW |
1156 | (define-public orfm |
1157 | (package | |
1158 | (name "orfm") | |
a98c6ecc | 1159 | (version "0.4.1") |
1e44cf8b BW |
1160 | (source (origin |
1161 | (method url-fetch) | |
1162 | (uri (string-append | |
1163 | "https://github.com/wwood/OrfM/releases/download/v" | |
1164 | version "/orfm-" version ".tar.gz")) | |
1165 | (sha256 | |
1166 | (base32 | |
a98c6ecc | 1167 | "05fmw145snk646ly076zby0fjav0k7ysbclck5d4s9pmgcfpijc2")))) |
1e44cf8b BW |
1168 | (build-system gnu-build-system) |
1169 | (inputs `(("zlib" ,zlib))) | |
1170 | (synopsis "Simple and not slow open reading frame (ORF) caller") | |
1171 | (description | |
1172 | "An ORF caller finds stretches of DNA that when translated are not | |
1173 | interrupted by stop codons. OrfM finds and prints these ORFs.") | |
1174 | (home-page "https://github.com/wwood/OrfM") | |
1175 | (license license:lgpl3+))) | |
1176 | ||
19ee9201 RW |
1177 | (define-public python2-pbcore |
1178 | (package | |
1179 | (name "python2-pbcore") | |
1180 | (version "0.9.3") | |
1181 | (source (origin | |
1182 | (method url-fetch) | |
1183 | (uri (string-append | |
1184 | "https://github.com/PacificBiosciences/pbcore/archive/" | |
1185 | version ".tar.gz")) | |
1186 | (file-name (string-append name "-" version ".tar.gz")) | |
1187 | (sha256 | |
1188 | (base32 | |
1189 | "1z46rwjac93jm87cbj2zgjg6qvsgs65140wkbbxsvxps7ai4pm09")))) | |
1190 | (build-system python-build-system) | |
1191 | (arguments `(#:python ,python-2)) ; pbcore requires Python 2.7 | |
1192 | (inputs | |
1193 | `(("python-cython" ,python2-cython) | |
1194 | ("python-numpy" ,python2-numpy) | |
1195 | ("python-pysam" ,python2-pysam) | |
1196 | ("python-h5py" ,python2-h5py))) | |
1197 | (native-inputs | |
1198 | `(("python-setuptools" ,python2-setuptools))) | |
1199 | (home-page "http://pacificbiosciences.github.io/pbcore/") | |
1200 | (synopsis "Library for reading and writing PacBio data files") | |
1201 | (description | |
1202 | "The pbcore package provides Python APIs for interacting with PacBio data | |
1203 | files and writing bioinformatics applications.") | |
1204 | (license license:bsd-3))) | |
1205 | ||
2c16316e RW |
1206 | (define-public pbtranscript-tofu |
1207 | (let ((commit "c7bbd5472")) | |
1208 | (package | |
1209 | (name "pbtranscript-tofu") | |
1210 | (version (string-append "0.4.1." commit)) | |
1211 | (source (origin | |
1212 | (method git-fetch) | |
1213 | (uri (git-reference | |
1214 | (url "https://github.com/PacificBiosciences/cDNA_primer.git") | |
1215 | (commit commit))) | |
1216 | (file-name (string-append name "-" version ".tar.gz")) | |
1217 | (sha256 | |
1218 | (base32 | |
1219 | "148xkzi689c49g6fdhckp6mnmj2qhjdf1j4wifm6ja7ij95d7fxx")))) | |
1220 | (build-system python-build-system) | |
1221 | (arguments | |
1222 | `(#:python ,python-2 | |
1223 | ;; With standard flags, the install phase attempts to create a zip'd | |
1224 | ;; egg file, and fails with an error: 'ZIP does not support timestamps | |
1225 | ;; before 1980' | |
1226 | #:configure-flags '("--single-version-externally-managed" | |
1227 | "--record=pbtranscript-tofu.txt") | |
1228 | #:phases | |
1229 | (alist-cons-after | |
1230 | 'unpack 'enter-directory-and-clean-up | |
1231 | (lambda _ | |
1232 | (chdir "pbtranscript-tofu/pbtranscript/") | |
1233 | ;; Delete clutter | |
1234 | (delete-file-recursively "dist/") | |
49261e45 | 1235 | (delete-file-recursively "build/") |
2c16316e RW |
1236 | (delete-file-recursively "setuptools_cython-0.2.1-py2.6.egg/") |
1237 | (delete-file-recursively "pbtools.pbtranscript.egg-info") | |
1238 | (delete-file "Cython-0.20.1.tar.gz") | |
1239 | (delete-file "setuptools_cython-0.2.1-py2.7.egg") | |
1240 | (delete-file "setuptools_cython-0.2.1.tar.gz") | |
1241 | (delete-file "setup.cfg") | |
49261e45 RW |
1242 | (for-each delete-file |
1243 | (find-files "." "\\.so$")) | |
2c16316e RW |
1244 | ;; files should be writable for install phase |
1245 | (for-each (lambda (f) (chmod f #o755)) | |
49261e45 | 1246 | (find-files "." "\\.py$"))) |
2c16316e RW |
1247 | %standard-phases))) |
1248 | (inputs | |
1249 | `(("python-cython" ,python2-cython) | |
1250 | ("python-numpy" ,python2-numpy) | |
1251 | ("python-bx-python" ,python2-bx-python) | |
c5372108 RW |
1252 | ("python-networkx" ,python2-networkx) |
1253 | ("python-scipy" ,python2-scipy) | |
2c16316e RW |
1254 | ("python-pbcore" ,python2-pbcore))) |
1255 | (native-inputs | |
1256 | `(("python-nose" ,python2-nose) | |
1257 | ("python-setuptools" ,python2-setuptools))) | |
1258 | (home-page "https://github.com/PacificBiosciences/cDNA_primer") | |
1259 | (synopsis "Analyze transcriptome data generated with the Iso-Seq protocol") | |
1260 | (description | |
1261 | "pbtranscript-tofu contains scripts to analyze transcriptome data | |
1262 | generated using the PacBio Iso-Seq protocol.") | |
1263 | (license license:bsd-3)))) | |
1264 | ||
66e3eff1 RW |
1265 | (define-public rsem |
1266 | (package | |
1267 | (name "rsem") | |
1268 | (version "1.2.20") | |
1269 | (source | |
1270 | (origin | |
1271 | (method url-fetch) | |
1272 | (uri | |
1273 | (string-append "http://deweylab.biostat.wisc.edu/rsem/src/rsem-" | |
1274 | version ".tar.gz")) | |
1275 | (sha256 | |
1276 | (base32 "0nzdc0j0hjllhsd5f2xli95dafm3nawskigs140xzvjk67xh0r9q")) | |
1277 | (patches (list (search-patch "rsem-makefile.patch"))) | |
1278 | (modules '((guix build utils))) | |
1279 | (snippet | |
1280 | '(begin | |
1281 | ;; remove bundled copy of boost | |
1282 | (delete-file-recursively "boost") | |
1283 | #t)))) | |
1284 | (build-system gnu-build-system) | |
1285 | (arguments | |
1286 | `(#:tests? #f ;no "check" target | |
1287 | #:phases | |
1288 | (modify-phases %standard-phases | |
1289 | ;; No "configure" script. | |
1290 | ;; Do not build bundled samtools library. | |
1291 | (replace 'configure | |
1292 | (lambda _ | |
1293 | (substitute* "Makefile" | |
1294 | (("^all : sam/libbam.a") "all : ")) | |
1295 | #t)) | |
1296 | (replace 'install | |
1297 | (lambda* (#:key outputs #:allow-other-keys) | |
1298 | (let* ((out (string-append (assoc-ref outputs "out"))) | |
1299 | (bin (string-append out "/bin/")) | |
1300 | (perl (string-append out "/lib/perl5/site_perl"))) | |
1301 | (mkdir-p bin) | |
1302 | (mkdir-p perl) | |
1303 | (for-each (lambda (file) | |
1304 | (copy-file file | |
1305 | (string-append bin (basename file)))) | |
1306 | (find-files "." "rsem-.*")) | |
1307 | (copy-file "rsem_perl_utils.pm" | |
1308 | (string-append perl "/rsem_perl_utils.pm"))) | |
1309 | #t)) | |
1310 | (add-after | |
1311 | 'install 'wrap-program | |
1312 | (lambda* (#:key outputs #:allow-other-keys) | |
1313 | (let ((out (assoc-ref outputs "out"))) | |
1314 | (for-each (lambda (prog) | |
1315 | (wrap-program (string-append out "/bin/" prog) | |
1316 | `("PERL5LIB" ":" prefix | |
1317 | (,(string-append out "/lib/perl5/site_perl"))))) | |
1318 | '("rsem-plot-transcript-wiggles" | |
1319 | "rsem-calculate-expression" | |
1320 | "rsem-generate-ngvector" | |
1321 | "rsem-run-ebseq" | |
1322 | "rsem-prepare-reference"))) | |
1323 | #t))))) | |
1324 | (inputs | |
1325 | `(("boost" ,boost) | |
1326 | ("ncurses" ,ncurses) | |
1327 | ("r" ,r) | |
1328 | ("perl" ,perl) | |
1329 | ("samtools" ,samtools-0.1) | |
1330 | ("zlib" ,zlib))) | |
1331 | (home-page "http://deweylab.biostat.wisc.edu/rsem/") | |
1332 | (synopsis "Estimate gene expression levels from RNA-Seq data") | |
1333 | (description | |
1334 | "RSEM is a software package for estimating gene and isoform expression | |
1335 | levels from RNA-Seq data. The RSEM package provides a user-friendly | |
1336 | interface, supports threads for parallel computation of the EM algorithm, | |
1337 | single-end and paired-end read data, quality scores, variable-length reads and | |
1338 | RSPD estimation. In addition, it provides posterior mean and 95% credibility | |
1339 | interval estimates for expression levels. For visualization, it can generate | |
1340 | BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.") | |
1341 | (license license:gpl3+))) | |
1342 | ||
8622a072 RW |
1343 | (define-public rseqc |
1344 | (package | |
1345 | (name "rseqc") | |
1346 | (version "2.6.1") | |
1347 | (source | |
1348 | (origin | |
1349 | (method url-fetch) | |
1350 | (uri | |
1351 | (string-append "mirror://sourceforge/rseqc/" | |
1352 | version "/RSeQC-" version ".tar.gz")) | |
1353 | (sha256 | |
8214b7fb | 1354 | (base32 "15ly0254yi032qzkdplg00q144qfdsd986gh62829rl5bkxhj330")) |
8622a072 RW |
1355 | (modules '((guix build utils))) |
1356 | (snippet | |
1357 | '(begin | |
1358 | ;; remove bundled copy of pysam | |
1359 | (delete-file-recursively "lib/pysam") | |
1360 | (substitute* "setup.py" | |
1361 | ;; remove dependency on outdated "distribute" module | |
1362 | (("^from distribute_setup import use_setuptools") "") | |
1363 | (("^use_setuptools\\(\\)") "") | |
1364 | ;; do not use bundled copy of pysam | |
1365 | (("^have_pysam = False") "have_pysam = True")))))) | |
1366 | (build-system python-build-system) | |
1367 | (arguments `(#:python ,python-2)) | |
1368 | (inputs | |
1369 | `(("python-cython" ,python2-cython) | |
1370 | ("python-pysam" ,python2-pysam) | |
1371 | ("python-numpy" ,python2-numpy) | |
1372 | ("python-setuptools" ,python2-setuptools) | |
1373 | ("zlib" ,zlib))) | |
1374 | (native-inputs | |
1375 | `(("python-nose" ,python2-nose))) | |
1376 | (home-page "http://rseqc.sourceforge.net/") | |
1377 | (synopsis "RNA-seq quality control package") | |
1378 | (description | |
1379 | "RSeQC provides a number of modules that can comprehensively evaluate | |
1380 | high throughput sequence data, especially RNA-seq data. Some basic modules | |
1381 | inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, | |
1382 | while RNA-seq specific modules evaluate sequencing saturation, mapped reads | |
1383 | distribution, coverage uniformity, strand specificity, etc.") | |
1384 | (license license:gpl3+))) | |
1385 | ||
4e10a221 RW |
1386 | (define-public samtools |
1387 | (package | |
1388 | (name "samtools") | |
1389 | (version "1.1") | |
1390 | (source | |
1391 | (origin | |
1392 | (method url-fetch) | |
1393 | (uri | |
1394 | (string-append "mirror://sourceforge/samtools/" | |
1395 | version "/samtools-" version ".tar.bz2")) | |
1396 | (sha256 | |
1397 | (base32 | |
1398 | "1y5p2hs4gif891b4ik20275a8xf3qrr1zh9wpysp4g8m0g1jckf2")))) | |
1399 | (build-system gnu-build-system) | |
1400 | (arguments | |
f3cd952b RW |
1401 | `(;; There are 87 test failures when building on non-64-bit architectures |
1402 | ;; due to invalid test data. This has since been fixed upstream (see | |
1403 | ;; <https://github.com/samtools/samtools/pull/307>), but as there has | |
1404 | ;; not been a new release we disable the tests for all non-64-bit | |
1405 | ;; systems. | |
1406 | #:tests? ,(string=? (or (%current-system) (%current-target-system)) | |
1407 | "x86_64-linux") | |
0a75450c RW |
1408 | #:make-flags (list "LIBCURSES=-lncurses" |
1409 | (string-append "prefix=" (assoc-ref %outputs "out"))) | |
4e10a221 RW |
1410 | #:phases |
1411 | (alist-cons-after | |
1412 | 'unpack | |
0a75450c RW |
1413 | 'patch-tests |
1414 | (lambda* (#:key inputs #:allow-other-keys) | |
1415 | (let ((bash (assoc-ref inputs "bash"))) | |
1416 | (substitute* "test/test.pl" | |
1417 | ;; The test script calls out to /bin/bash | |
1418 | (("/bin/bash") | |
1419 | (string-append bash "/bin/bash")) | |
1420 | ;; There are two failing tests upstream relating to the "stats" | |
1421 | ;; subcommand in test_usage_subcommand ("did not have Usage" | |
1422 | ;; and "usage did not mention samtools stats"), so we disable | |
1423 | ;; them. | |
1424 | (("(test_usage_subcommand\\(.*\\);)" cmd) | |
1425 | (string-append "unless ($subcommand eq 'stats') {" cmd "};"))))) | |
41dd7126 RW |
1426 | (alist-cons-after |
1427 | 'install 'install-library | |
1428 | (lambda* (#:key outputs #:allow-other-keys) | |
1429 | (let ((lib (string-append (assoc-ref outputs "out") "/lib"))) | |
1430 | (mkdir-p lib) | |
1431 | (copy-file "libbam.a" (string-append lib "/libbam.a")))) | |
1432 | (alist-delete 'configure %standard-phases))))) | |
4e10a221 RW |
1433 | (native-inputs `(("pkg-config" ,pkg-config))) |
1434 | (inputs `(("ncurses" ,ncurses) | |
1435 | ("perl" ,perl) | |
1436 | ("python" ,python) | |
1437 | ("zlib" ,zlib))) | |
1438 | (home-page "http://samtools.sourceforge.net") | |
1439 | (synopsis "Utilities to efficiently manipulate nucleotide sequence alignments") | |
1440 | (description | |
1441 | "Samtools implements various utilities for post-processing nucleotide | |
1442 | sequence alignments in the SAM, BAM, and CRAM formats, including indexing, | |
1443 | variant calling (in conjunction with bcftools), and a simple alignment | |
1444 | viewer.") | |
1445 | (license license:expat))) | |
d3517eda | 1446 | |
0b84a0aa RW |
1447 | (define-public samtools-0.1 |
1448 | ;; This is the most recent version of the 0.1 line of samtools. The input | |
1449 | ;; and output formats differ greatly from that used and produced by samtools | |
1450 | ;; 1.x and is still used in many bioinformatics pipelines. | |
1451 | (package (inherit samtools) | |
1452 | (version "0.1.19") | |
1453 | (source | |
1454 | (origin | |
1455 | (method url-fetch) | |
1456 | (uri | |
1457 | (string-append "mirror://sourceforge/samtools/" | |
1458 | version "/samtools-" version ".tar.bz2")) | |
1459 | (sha256 | |
1460 | (base32 "1m33xsfwz0s8qi45lylagfllqg7fphf4dr0780rsvw75av9wk06h")))) | |
1461 | (arguments | |
1462 | (substitute-keyword-arguments (package-arguments samtools) | |
1463 | ((#:tests? tests) #f) ;no "check" target | |
1464 | ((#:phases phases) | |
1465 | `(modify-phases ,phases | |
1466 | (replace 'install | |
1467 | (lambda* (#:key outputs #:allow-other-keys) | |
1468 | (let ((bin (string-append | |
1469 | (assoc-ref outputs "out") "/bin"))) | |
1470 | (mkdir-p bin) | |
1471 | (copy-file "samtools" | |
1472 | (string-append bin "/samtools"))))) | |
1473 | (delete 'patch-tests))))))) | |
1474 | ||
282c5087 RW |
1475 | (define-public ngs-sdk |
1476 | (package | |
1477 | (name "ngs-sdk") | |
e0a02cb2 | 1478 | (version "1.1.1") |
282c5087 RW |
1479 | (source |
1480 | (origin | |
1481 | (method url-fetch) | |
1482 | (uri | |
1483 | (string-append "https://github.com/ncbi/ngs/archive/" | |
1484 | version ".tar.gz")) | |
1485 | (file-name (string-append name "-" version ".tar.gz")) | |
1486 | (sha256 | |
1487 | (base32 | |
e0a02cb2 | 1488 | "1x58gpm574n0xmk2a98gmikbgycq78ia0bvnb42k5ck34fmd5v8y")))) |
282c5087 RW |
1489 | (build-system gnu-build-system) |
1490 | (arguments | |
1491 | `(#:parallel-build? #f ; not supported | |
1492 | #:tests? #f ; no "check" target | |
1493 | #:phases | |
1494 | (alist-replace | |
1495 | 'configure | |
1496 | (lambda* (#:key outputs #:allow-other-keys) | |
1497 | (let ((out (assoc-ref outputs "out"))) | |
282c5087 RW |
1498 | ;; The 'configure' script doesn't recognize things like |
1499 | ;; '--enable-fast-install'. | |
1500 | (zero? (system* "./configure" | |
1501 | (string-append "--build-prefix=" (getcwd) "/build") | |
1502 | (string-append "--prefix=" out))))) | |
1503 | (alist-cons-after | |
1504 | 'unpack 'enter-dir | |
1505 | (lambda _ (chdir "ngs-sdk") #t) | |
1506 | %standard-phases)))) | |
1507 | (native-inputs `(("perl" ,perl))) | |
1508 | (home-page "https://github.com/ncbi/ngs") | |
1509 | (synopsis "API for accessing Next Generation Sequencing data") | |
1510 | (description | |
1511 | "NGS is a domain-specific API for accessing reads, alignments and pileups | |
1512 | produced from Next Generation Sequencing. The API itself is independent from | |
1513 | any particular back-end implementation, and supports use of multiple back-ends | |
1514 | simultaneously.") | |
1515 | (license license:public-domain))) | |
1516 | ||
2651a5e6 RW |
1517 | (define-public ngs-java |
1518 | (package (inherit ngs-sdk) | |
1519 | (name "ngs-java") | |
1520 | (arguments | |
1521 | `(,@(substitute-keyword-arguments | |
1522 | `(#:modules ((guix build gnu-build-system) | |
1523 | (guix build utils) | |
1524 | (srfi srfi-1) | |
1525 | (srfi srfi-26)) | |
1526 | ,@(package-arguments ngs-sdk)) | |
1527 | ((#:phases phases) | |
1528 | `(alist-cons-after | |
1529 | 'enter-dir 'fix-java-symlink-installation | |
1530 | (lambda _ | |
1531 | ;; Only replace the version suffix, not the version number in | |
1532 | ;; the directory name. Reported here: | |
1533 | ;; https://github.com/ncbi/ngs/pull/4 | |
1534 | (substitute* "Makefile.java" | |
1535 | (((string-append "\\$\\(subst " | |
1536 | "(\\$\\(VERSION[^\\)]*\\))," | |
1537 | "(\\$\\([^\\)]+\\))," | |
1538 | "(\\$\\([^\\)]+\\)|\\$\\@)" | |
1539 | "\\)") | |
1540 | _ pattern replacement target) | |
1541 | (string-append "$(patsubst " | |
1542 | "%" pattern "," | |
1543 | "%" replacement "," | |
1544 | target ")")))) | |
1545 | (alist-replace | |
1546 | 'enter-dir (lambda _ (chdir "ngs-java") #t) | |
1547 | ,phases)))))) | |
1548 | (inputs | |
1549 | `(("jdk" ,icedtea6 "jdk") | |
1550 | ("ngs-sdk" ,ngs-sdk))) | |
1551 | (synopsis "Java bindings for NGS SDK"))) | |
1552 | ||
75dd2424 RW |
1553 | (define-public ncbi-vdb |
1554 | (package | |
1555 | (name "ncbi-vdb") | |
1556 | (version "2.4.5-5") | |
1557 | (source | |
1558 | (origin | |
1559 | (method url-fetch) | |
1560 | (uri | |
1561 | (string-append "https://github.com/ncbi/ncbi-vdb/archive/" | |
1562 | version ".tar.gz")) | |
1563 | (file-name (string-append name "-" version ".tar.gz")) | |
1564 | (sha256 | |
1565 | (base32 | |
1566 | "1cj8nk6if8sqagv20vx36v566fdvhcaadf0x1ycnbgql6chbs6vy")))) | |
1567 | (build-system gnu-build-system) | |
1568 | (arguments | |
1569 | `(#:parallel-build? #f ; not supported | |
1570 | #:tests? #f ; no "check" target | |
1571 | #:phases | |
1572 | (alist-replace | |
1573 | 'configure | |
1574 | (lambda* (#:key inputs outputs #:allow-other-keys) | |
1575 | (let ((out (assoc-ref outputs "out"))) | |
1576 | ;; Only replace the version suffix, not the version number in the | |
1577 | ;; directory name; fixed in commit 4dbba5c6a809 (no release yet). | |
1578 | (substitute* "setup/konfigure.perl" | |
1579 | (((string-append "\\$\\(subst " | |
1580 | "(\\$\\(VERSION[^\\)]*\\))," | |
1581 | "(\\$\\([^\\)]+\\))," | |
1582 | "(\\$\\([^\\)]+\\)|\\$\\@)" | |
1583 | "\\)") | |
1584 | _ pattern replacement target) | |
1585 | (string-append "$(patsubst " | |
1586 | "%" pattern "," | |
1587 | "%" replacement "," | |
1588 | target ")"))) | |
1589 | ||
1590 | ;; Override include path for libmagic | |
1591 | (substitute* "setup/package.prl" | |
1592 | (("name => 'magic', Include => '/usr/include'") | |
1593 | (string-append "name=> 'magic', Include => '" | |
1594 | (assoc-ref inputs "libmagic") | |
1595 | "/include" "'"))) | |
1596 | ||
1597 | ;; Install kdf5 library (needed by sra-tools) | |
1598 | (substitute* "build/Makefile.install" | |
1599 | (("LIBRARIES_TO_INSTALL =") | |
1600 | "LIBRARIES_TO_INSTALL = kdf5.$(VERSION_LIBX) kdf5.$(VERSION_SHLX)")) | |
1601 | ||
1602 | ;; The 'configure' script doesn't recognize things like | |
1603 | ;; '--enable-fast-install'. | |
1604 | (zero? (system* | |
1605 | "./configure" | |
1606 | (string-append "--build-prefix=" (getcwd) "/build") | |
1607 | (string-append "--prefix=" (assoc-ref outputs "out")) | |
1608 | (string-append "--debug") | |
1609 | (string-append "--with-xml2-prefix=" | |
1610 | (assoc-ref inputs "libxml2")) | |
1611 | (string-append "--with-ngs-sdk-prefix=" | |
1612 | (assoc-ref inputs "ngs-sdk")) | |
1613 | (string-append "--with-ngs-java-prefix=" | |
1614 | (assoc-ref inputs "ngs-java")) | |
1615 | (string-append "--with-hdf5-prefix=" | |
1616 | (assoc-ref inputs "hdf5")))))) | |
1617 | (alist-cons-after | |
1618 | 'install 'install-interfaces | |
132b4c8c RW |
1619 | (lambda* (#:key outputs #:allow-other-keys) |
1620 | ;; Install interface libraries. On i686 the interface libraries | |
1621 | ;; are installed to "linux/gcc/i386", so we need to use the Linux | |
1622 | ;; architecture name ("i386") instead of the target system prefix | |
1623 | ;; ("i686"). | |
75dd2424 RW |
1624 | (mkdir (string-append (assoc-ref outputs "out") "/ilib")) |
1625 | (copy-recursively (string-append "build/ncbi-vdb/linux/gcc/" | |
132b4c8c RW |
1626 | ,(system->linux-architecture |
1627 | (or (%current-target-system) | |
1628 | (%current-system))) | |
75dd2424 RW |
1629 | "/rel/ilib") |
1630 | (string-append (assoc-ref outputs "out") | |
1631 | "/ilib")) | |
1632 | ;; Install interface headers | |
1633 | (copy-recursively "interfaces" | |
1634 | (string-append (assoc-ref outputs "out") | |
1635 | "/include"))) | |
1636 | %standard-phases)))) | |
1637 | (inputs | |
1638 | `(("libxml2" ,libxml2) | |
1639 | ("ngs-sdk" ,ngs-sdk) | |
1640 | ("ngs-java" ,ngs-java) | |
1641 | ("libmagic" ,file) | |
1642 | ("hdf5" ,hdf5))) | |
1643 | (native-inputs `(("perl" ,perl))) | |
1644 | (home-page "https://github.com/ncbi/ncbi-vdb") | |
1645 | (synopsis "Database engine for genetic information") | |
1646 | (description | |
1647 | "The NCBI-VDB library implements a highly compressed columnar data | |
1648 | warehousing engine that is most often used to store genetic information. | |
1649 | Databases are stored in a portable image within the file system, and can be | |
1650 | accessed/downloaded on demand across HTTP.") | |
1651 | (license license:public-domain))) | |
1652 | ||
51c64999 RW |
1653 | (define-public sra-tools |
1654 | (package | |
1655 | (name "sra-tools") | |
1656 | (version "2.4.5-5") | |
1657 | (source | |
1658 | (origin | |
1659 | (method url-fetch) | |
1660 | (uri | |
1661 | (string-append "https://github.com/ncbi/sra-tools/archive/" | |
1662 | version ".tar.gz")) | |
1663 | (file-name (string-append name "-" version ".tar.gz")) | |
1664 | (sha256 | |
1665 | (base32 | |
1666 | "11nrnvz7a012f4iryf0wiwrid0h111grsfxbxa9j51h3f2xbvgns")))) | |
1667 | (build-system gnu-build-system) | |
1668 | (arguments | |
1669 | `(#:parallel-build? #f ; not supported | |
1670 | #:tests? #f ; no "check" target | |
1671 | #:phases | |
1672 | (alist-replace | |
1673 | 'configure | |
1674 | (lambda* (#:key inputs outputs #:allow-other-keys) | |
1675 | ;; The build system expects a directory containing the sources and | |
1676 | ;; raw build output of ncbi-vdb, including files that are not | |
1677 | ;; installed. Since we are building against an installed version of | |
1678 | ;; ncbi-vdb, the following modifications are needed. | |
1679 | (substitute* "setup/konfigure.perl" | |
1680 | ;; Make the configure script look for the "ilib" directory of | |
1681 | ;; "ncbi-vdb" without first checking for the existence of a | |
1682 | ;; matching library in its "lib" directory. | |
1683 | (("^ my \\$f = File::Spec->catdir\\(\\$libdir, \\$lib\\);") | |
1684 | "my $f = File::Spec->catdir($ilibdir, $ilib);") | |
1685 | ;; Look for interface libraries in ncbi-vdb's "ilib" directory. | |
1686 | (("my \\$ilibdir = File::Spec->catdir\\(\\$builddir, 'ilib'\\);") | |
1687 | "my $ilibdir = File::Spec->catdir($dir, 'ilib');")) | |
1688 | ||
1689 | ;; The 'configure' script doesn't recognize things like | |
1690 | ;; '--enable-fast-install'. | |
1691 | (zero? (system* | |
1692 | "./configure" | |
1693 | (string-append "--build-prefix=" (getcwd) "/build") | |
1694 | (string-append "--prefix=" (assoc-ref outputs "out")) | |
1695 | (string-append "--debug") | |
1696 | (string-append "--with-fuse-prefix=" | |
1697 | (assoc-ref inputs "fuse")) | |
1698 | (string-append "--with-magic-prefix=" | |
1699 | (assoc-ref inputs "libmagic")) | |
1700 | ;; TODO: building with libxml2 fails with linker errors | |
1701 | ;; (string-append "--with-xml2-prefix=" | |
1702 | ;; (assoc-ref inputs "libxml2")) | |
1703 | (string-append "--with-ncbi-vdb-sources=" | |
1704 | (assoc-ref inputs "ncbi-vdb")) | |
1705 | (string-append "--with-ncbi-vdb-build=" | |
1706 | (assoc-ref inputs "ncbi-vdb")) | |
1707 | (string-append "--with-ngs-sdk-prefix=" | |
1708 | (assoc-ref inputs "ngs-sdk")) | |
1709 | (string-append "--with-hdf5-prefix=" | |
1710 | (assoc-ref inputs "hdf5"))))) | |
1711 | %standard-phases))) | |
1712 | (native-inputs `(("perl" ,perl))) | |
1713 | (inputs | |
1714 | `(("ngs-sdk" ,ngs-sdk) | |
1715 | ("ncbi-vdb" ,ncbi-vdb) | |
1716 | ("libmagic" ,file) | |
1717 | ("fuse" ,fuse) | |
1718 | ("hdf5" ,hdf5) | |
1719 | ("zlib" ,zlib))) | |
1720 | (home-page "http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software") | |
1721 | (synopsis "Tools and libraries for reading and writing sequencing data") | |
1722 | (description | |
1723 | "The SRA Toolkit from NCBI is a collection of tools and libraries for | |
1724 | reading of sequencing files from the Sequence Read Archive (SRA) database and | |
1725 | writing files into the .sra format.") | |
1726 | (license license:public-domain))) | |
1727 | ||
d3517eda RW |
1728 | (define-public seqan |
1729 | (package | |
1730 | (name "seqan") | |
1731 | (version "1.4.2") | |
1732 | (source (origin | |
1733 | (method url-fetch) | |
1734 | (uri (string-append "http://packages.seqan.de/seqan-library/" | |
1735 | "seqan-library-" version ".tar.bz2")) | |
1736 | (sha256 | |
1737 | (base32 | |
1738 | "05s3wrrwn50f81aklfm65i4a749zag1vr8z03k21xm0pdxy47yvp")))) | |
1739 | ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it | |
1740 | ;; makes sense to split the outputs. | |
1741 | (outputs '("out" "doc")) | |
1742 | (build-system trivial-build-system) | |
1743 | (arguments | |
1744 | `(#:modules ((guix build utils)) | |
1745 | #:builder | |
1746 | (begin | |
1747 | (use-modules (guix build utils)) | |
1748 | (let ((tar (assoc-ref %build-inputs "tar")) | |
1749 | (bzip (assoc-ref %build-inputs "bzip2")) | |
1750 | (out (assoc-ref %outputs "out")) | |
1751 | (doc (assoc-ref %outputs "doc"))) | |
1752 | (setenv "PATH" (string-append tar "/bin:" bzip "/bin")) | |
1753 | (system* "tar" "xvf" (assoc-ref %build-inputs "source")) | |
1754 | (chdir (string-append "seqan-library-" ,version)) | |
1755 | (copy-recursively "include" (string-append out "/include")) | |
1756 | (copy-recursively "share" (string-append doc "/share")))))) | |
1757 | (native-inputs | |
1758 | `(("source" ,source) | |
1759 | ("tar" ,tar) | |
1760 | ("bzip2" ,bzip2))) | |
1761 | (home-page "http://www.seqan.de") | |
1762 | (synopsis "Library for nucleotide sequence analysis") | |
1763 | (description | |
1764 | "SeqAn is a C++ library of efficient algorithms and data structures for | |
1765 | the analysis of sequences with the focus on biological data. It contains | |
1766 | algorithms and data structures for string representation and their | |
1767 | manipulation, online and indexed string search, efficient I/O of | |
1768 | bioinformatics file formats, sequence alignment, and more.") | |
1769 | (license license:bsd-3))) | |
ce7155d5 RW |
1770 | |
1771 | (define-public star | |
1772 | (package | |
1773 | (name "star") | |
1774 | (version "2.4.0j") | |
1775 | (source (origin | |
1776 | (method url-fetch) | |
1777 | (uri (string-append | |
1778 | "https://github.com/alexdobin/STAR/archive/STAR_" | |
1779 | version ".tar.gz")) | |
1780 | (sha256 | |
1781 | (base32 | |
1782 | "1y3bciych1aw6s7k8sy1saj23dcan9wk4d4f96an499slkxwz712")) | |
1783 | (modules '((guix build utils))) | |
1784 | (snippet | |
1785 | '(substitute* "source/Makefile" | |
1786 | (("/bin/rm") "rm"))))) | |
1787 | (build-system gnu-build-system) | |
1788 | (arguments | |
1789 | '(#:tests? #f ;no check target | |
1790 | #:make-flags '("STAR") | |
1791 | #:phases | |
1792 | (alist-cons-after | |
1793 | 'unpack 'enter-source-dir (lambda _ (chdir "source")) | |
1794 | (alist-replace | |
1795 | 'install | |
1796 | (lambda* (#:key outputs #:allow-other-keys) | |
1797 | (let ((bin (string-append (assoc-ref outputs "out") "/bin/"))) | |
1798 | (mkdir-p bin) | |
1799 | (copy-file "STAR" (string-append bin "STAR")))) | |
1800 | (alist-delete | |
1801 | 'configure %standard-phases))))) | |
1802 | (native-inputs | |
1803 | `(("vim" ,vim))) ; for xxd | |
1804 | (inputs | |
1805 | `(("zlib" ,zlib))) | |
1806 | (home-page "https://github.com/alexdobin/STAR") | |
1807 | (synopsis "Universal RNA-seq aligner") | |
1808 | (description | |
1809 | "The Spliced Transcripts Alignment to a Reference (STAR) software is | |
1810 | based on a previously undescribed RNA-seq alignment algorithm that uses | |
1811 | sequential maximum mappable seed search in uncompressed suffix arrays followed | |
1812 | by seed clustering and stitching procedure. In addition to unbiased de novo | |
1813 | detection of canonical junctions, STAR can discover non-canonical splices and | |
1814 | chimeric (fusion) transcripts, and is also capable of mapping full-length RNA | |
1815 | sequences.") | |
1816 | ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed. | |
1817 | (license license:gpl3+))) | |
de07c0db | 1818 | |
dbf4ed7c RW |
1819 | (define-public subread |
1820 | (package | |
1821 | (name "subread") | |
1822 | (version "1.4.6-p2") | |
1823 | (source (origin | |
1824 | (method url-fetch) | |
1825 | (uri (string-append | |
1826 | "mirror://sourceforge/subread/subread-" | |
1827 | version "-source.tar.gz")) | |
1828 | (sha256 | |
1829 | (base32 | |
1830 | "06sv9mpcsdj6p68y15d6gi70lca3lxmzk0dn61hg0kfsa7rxmsr3")))) | |
1831 | (build-system gnu-build-system) | |
1832 | (arguments | |
1833 | `(#:tests? #f ;no "check" target | |
1834 | #:make-flags '("-f" "Makefile.Linux") | |
1835 | #:phases | |
1836 | (alist-cons-after | |
1837 | 'unpack 'enter-dir | |
1838 | (lambda _ (chdir "src") #t) | |
1839 | (alist-replace | |
1840 | 'install | |
1841 | (lambda* (#:key outputs #:allow-other-keys) | |
1842 | (let ((bin (string-append (assoc-ref outputs "out") "/bin/"))) | |
1843 | (mkdir-p bin) | |
1844 | (copy-recursively "../bin" bin))) | |
1845 | ;; no "configure" script | |
1846 | (alist-delete 'configure %standard-phases))))) | |
1847 | (inputs `(("zlib" ,zlib))) | |
1848 | (home-page "http://bioinf.wehi.edu.au/subread-package/") | |
1849 | (synopsis "Tool kit for processing next-gen sequencing data") | |
1850 | (description | |
1851 | "The subread package contains the following tools: subread aligner, a | |
1852 | general-purpose read aligner; subjunc aligner: detecting exon-exon junctions | |
1853 | and mapping RNA-seq reads; featureCounts: counting mapped reads for genomic | |
1854 | features; exactSNP: a SNP caller that discovers SNPs by testing signals | |
1855 | against local background noises.") | |
1856 | (license license:gpl3+))) | |
1857 | ||
c833ab55 RW |
1858 | (define-public shogun |
1859 | (package | |
1860 | (name "shogun") | |
1861 | (version "4.0.0") | |
1862 | (source | |
1863 | (origin | |
1864 | (method url-fetch) | |
1865 | (uri (string-append | |
1866 | "ftp://shogun-toolbox.org/shogun/releases/" | |
1867 | (version-major+minor version) | |
1868 | "/sources/shogun-" version ".tar.bz2")) | |
1869 | (sha256 | |
1870 | (base32 | |
628bd9b8 RW |
1871 | "159nlijnb7mnrv9za80wnm1shwvy45hgrqzn51hxy7gw4z6d6fdb")) |
1872 | (modules '((guix build utils) | |
1873 | (ice-9 rdelim))) | |
1874 | (snippet | |
1875 | '(begin | |
1876 | ;; Remove non-free sources and files referencing them | |
1877 | (for-each delete-file | |
1878 | (find-files "src/shogun/classifier/svm/" | |
1879 | "SVMLight\\.(cpp|h)")) | |
1880 | (for-each delete-file | |
1881 | (find-files "examples/undocumented/libshogun/" | |
1882 | (string-append | |
1883 | "(classifier_.*svmlight.*|" | |
1884 | "evaluation_cross_validation_locked_comparison).cpp"))) | |
1885 | ;; Remove non-free functions. | |
1886 | (define (delete-ifdefs file) | |
1887 | (with-atomic-file-replacement file | |
1888 | (lambda (in out) | |
1889 | (let loop ((line (read-line in 'concat)) | |
1890 | (skipping? #f)) | |
1891 | (if (eof-object? line) | |
1892 | #t | |
1893 | (let ((skip-next? | |
1894 | (or (and skipping? | |
1895 | (not (string-prefix? | |
1896 | "#endif //USE_SVMLIGHT" line))) | |
1897 | (string-prefix? | |
1898 | "#ifdef USE_SVMLIGHT" line)))) | |
1899 | (when (or (not skipping?) | |
1900 | (and skipping? (not skip-next?))) | |
1901 | (display line out)) | |
1902 | (loop (read-line in 'concat) skip-next?))))))) | |
1903 | (for-each delete-ifdefs (find-files "src/shogun/kernel/" | |
1904 | "^Kernel\\.(cpp|h)")))))) | |
c833ab55 RW |
1905 | (build-system cmake-build-system) |
1906 | (arguments | |
1907 | '(#:tests? #f ;no check target | |
1908 | #:phases | |
1909 | (alist-cons-after | |
1910 | 'unpack 'delete-broken-symlinks | |
1911 | (lambda _ | |
1912 | (for-each delete-file '("applications/arts/data" | |
1913 | "applications/asp/data" | |
1914 | "applications/easysvm/data" | |
1915 | "applications/msplicer/data" | |
1916 | "applications/ocr/data" | |
1917 | "examples/documented/data" | |
1918 | "examples/documented/matlab_static" | |
1919 | "examples/documented/octave_static" | |
1920 | "examples/undocumented/data" | |
1921 | "examples/undocumented/matlab_static" | |
1922 | "examples/undocumented/octave_static" | |
1923 | "tests/integration/data" | |
1924 | "tests/integration/matlab_static" | |
1925 | "tests/integration/octave_static" | |
1926 | "tests/integration/python_modular/tests")) | |
1927 | #t) | |
1928 | (alist-cons-after | |
1929 | 'unpack 'change-R-target-path | |
1930 | (lambda* (#:key outputs #:allow-other-keys) | |
1931 | (substitute* '("src/interfaces/r_modular/CMakeLists.txt" | |
1932 | "src/interfaces/r_static/CMakeLists.txt" | |
1933 | "examples/undocumented/r_modular/CMakeLists.txt") | |
1934 | (("\\$\\{R_COMPONENT_LIB_PATH\\}") | |
1935 | (string-append (assoc-ref outputs "out") | |
1936 | "/lib/R/library/"))) | |
1937 | #t) | |
1938 | (alist-cons-after | |
1939 | 'unpack 'fix-octave-modules | |
1940 | (lambda* (#:key outputs #:allow-other-keys) | |
1941 | (substitute* '("src/interfaces/octave_modular/CMakeLists.txt" | |
1942 | "src/interfaces/octave_static/CMakeLists.txt") | |
1943 | (("^include_directories\\(\\$\\{OCTAVE_INCLUDE_DIRS\\}") | |
1944 | "include_directories(${OCTAVE_INCLUDE_DIRS} ${OCTAVE_INCLUDE_DIRS}/octave")) | |
1945 | ||
1946 | ;; change target directory | |
1947 | (substitute* "src/interfaces/octave_modular/CMakeLists.txt" | |
1948 | (("\\$\\{OCTAVE_OCT_LOCAL_API_FILE_DIR\\}") | |
1949 | (string-append (assoc-ref outputs "out") | |
1950 | "/share/octave/packages"))) | |
1951 | #t) | |
1952 | (alist-cons-before | |
1953 | 'build 'set-HOME | |
1954 | ;; $HOME needs to be set at some point during the build phase | |
1955 | (lambda _ (setenv "HOME" "/tmp") #t) | |
1956 | %standard-phases)))) | |
1957 | #:configure-flags | |
1958 | (list "-DUSE_SVMLIGHT=OFF" ;disable proprietary SVMLIGHT | |
1959 | ;;"-DJavaModular=ON" ;requires unpackaged jblas | |
1960 | ;;"-DRubyModular=ON" ;requires unpackaged ruby-narray | |
1961 | ;;"-DPerlModular=ON" ;"FindPerlLibs" does not exist | |
1962 | ;;"-DLuaModular=ON" ;fails because lua doesn't build pkgconfig file | |
1963 | "-DOctaveModular=ON" | |
1964 | "-DOctaveStatic=ON" | |
1965 | "-DPythonModular=ON" | |
1966 | "-DPythonStatic=ON" | |
1967 | "-DRModular=ON" | |
1968 | "-DRStatic=ON" | |
1969 | "-DCmdLineStatic=ON"))) | |
1970 | (inputs | |
1971 | `(("python" ,python) | |
1972 | ("numpy" ,python-numpy) | |
1973 | ("r" ,r) | |
1974 | ("octave" ,octave) | |
1975 | ("swig" ,swig) | |
1976 | ("hdf5" ,hdf5) | |
1977 | ("atlas" ,atlas) | |
1978 | ("arpack" ,arpack-ng) | |
1979 | ("lapack" ,lapack) | |
1980 | ("glpk" ,glpk) | |
1981 | ("libxml2" ,libxml2) | |
1982 | ("lzo" ,lzo) | |
1983 | ("zlib" ,zlib))) | |
1984 | (native-inputs | |
1985 | `(("pkg-config" ,pkg-config))) | |
1986 | (home-page "http://shogun-toolbox.org/") | |
1987 | (synopsis "Machine learning toolbox") | |
1988 | (description | |
1989 | "The Shogun Machine learning toolbox provides a wide range of unified and | |
1990 | efficient Machine Learning (ML) methods. The toolbox seamlessly allows to | |
1991 | combine multiple data representations, algorithm classes, and general purpose | |
1992 | tools. This enables both rapid prototyping of data pipelines and extensibility | |
1993 | in terms of new algorithms.") | |
1994 | (license license:gpl3+))) | |
1995 | ||
de07c0db RW |
1996 | (define-public vcftools |
1997 | (package | |
1998 | (name "vcftools") | |
1999 | (version "0.1.12b") | |
2000 | (source (origin | |
2001 | (method url-fetch) | |
2002 | (uri (string-append | |
2003 | "mirror://sourceforge/vcftools/vcftools_" | |
2004 | version ".tar.gz")) | |
2005 | (sha256 | |
2006 | (base32 | |
2007 | "148al9h7f8g8my2qdnpax51kdd2yjrivlx6frvakf4lz5r8j88wx")))) | |
2008 | (build-system gnu-build-system) | |
2009 | (arguments | |
2010 | `(#:tests? #f ; no "check" target | |
2011 | #:make-flags (list | |
7c3958e1 | 2012 | "CFLAGS=-O2" ; override "-m64" flag |
de07c0db RW |
2013 | (string-append "PREFIX=" (assoc-ref %outputs "out")) |
2014 | (string-append "MANDIR=" (assoc-ref %outputs "out") | |
2015 | "/share/man/man1")) | |
2016 | #:phases | |
2017 | (alist-cons-after | |
2018 | 'unpack 'patch-manpage-install | |
2019 | (lambda _ | |
2020 | (substitute* "Makefile" | |
2021 | (("cp \\$\\{PREFIX\\}/cpp/vcftools.1") "cp ./cpp/vcftools.1"))) | |
2022 | (alist-delete 'configure %standard-phases)))) | |
2023 | (inputs | |
2024 | `(("perl" ,perl) | |
2025 | ("zlib" ,zlib))) | |
2026 | (home-page "http://vcftools.sourceforge.net/") | |
2027 | (synopsis "Tools for working with VCF files") | |
2028 | (description | |
2029 | "VCFtools is a program package designed for working with VCF files, such | |
2030 | as those generated by the 1000 Genomes Project. The aim of VCFtools is to | |
2031 | provide easily accessible methods for working with complex genetic variation | |
2032 | data in the form of VCF files.") | |
2033 | ;; The license is declared as LGPLv3 in the README and | |
2034 | ;; at http://vcftools.sourceforge.net/license.html | |
2035 | (license license:lgpl3))) |