gnu: Add r-bsgenome-dmelanogaster-ucsc-dm3-masked.
[jackhill/guix/guix.git] / gnu / packages / bioconductor.scm
1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2018 Ricardo Wurmus <rekado@elephly.net>
3 ;;; Copyright © 2018 Roel Janssen <roel@gnu.org>
4 ;;; Copyright © 2018 Tobias Geerinckx-Rice <me@tobias.gr>
5 ;;;
6 ;;; This file is part of GNU Guix.
7 ;;;
8 ;;; GNU Guix is free software; you can redistribute it and/or modify it
9 ;;; under the terms of the GNU General Public License as published by
10 ;;; the Free Software Foundation; either version 3 of the License, or (at
11 ;;; your option) any later version.
12 ;;;
13 ;;; GNU Guix is distributed in the hope that it will be useful, but
14 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;;; GNU General Public License for more details.
17 ;;;
18 ;;; You should have received a copy of the GNU General Public License
19 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
20
21 (define-module (gnu packages bioconductor)
22 #:use-module ((guix licenses) #:prefix license:)
23 #:use-module (guix packages)
24 #:use-module (guix download)
25 #:use-module (guix build-system r)
26 #:use-module (gnu packages)
27 #:use-module (gnu packages cran)
28 #:use-module (gnu packages compression)
29 #:use-module (gnu packages statistics)
30 #:use-module (gnu packages bioinformatics))
31
32 (define-public r-bsgenome-dmelanogaster-ucsc-dm6
33 (package
34 (name "r-bsgenome-dmelanogaster-ucsc-dm6")
35 (version "1.4.1")
36 (source (origin
37 (method url-fetch)
38 ;; We cannot use bioconductor-uri here because this tarball is
39 ;; located under "data/annotation/" instead of "bioc/".
40 (uri (string-append "https://www.bioconductor.org/packages/"
41 "release/data/annotation/src/contrib/"
42 "BSgenome.Dmelanogaster.UCSC.dm6_"
43 version ".tar.gz"))
44 (sha256
45 (base32
46 "1bhj0rdgf7lspw4xby9y9mf7v7jxxz8001bc8vw8kf04rjsx6060"))))
47 (properties
48 `((upstream-name . "BSgenome.Dmelanogaster.UCSC.dm6")))
49 (build-system r-build-system)
50 ;; As this package provides little more than a very large data file it
51 ;; doesn't make sense to build substitutes.
52 (arguments `(#:substitutable? #f))
53 (propagated-inputs
54 `(("r-bsgenome" ,r-bsgenome)))
55 (home-page
56 "https://www.bioconductor.org/packages/BSgenome.Dmelanogaster.UCSC.dm6/")
57 (synopsis "Full genome sequences for Fly")
58 (description
59 "This package provides full genome sequences for Drosophila
60 melanogaster (Fly) as provided by UCSC (dm6) and stored in Biostrings
61 objects.")
62 (license license:artistic2.0)))
63
64 (define-public r-bsgenome-dmelanogaster-ucsc-dm3-masked
65 (package
66 (name "r-bsgenome-dmelanogaster-ucsc-dm3-masked")
67 (version "1.3.99")
68 (source (origin
69 (method url-fetch)
70 ;; We cannot use bioconductor-uri here because this tarball is
71 ;; located under "data/annotation/" instead of "bioc/".
72 (uri (string-append "http://www.bioconductor.org/packages/"
73 "release/data/annotation/src/contrib/"
74 "BSgenome.Dmelanogaster.UCSC.dm3.masked_"
75 version ".tar.gz"))
76 (sha256
77 (base32
78 "1756csb09f1br9rj1l3f08qyh4hlymdbd0cfn8x3fq39dn45m5ap"))))
79 (properties
80 `((upstream-name . "BSgenome.Dmelanogaster.UCSC.dm3.masked")))
81 (build-system r-build-system)
82 (propagated-inputs
83 `(("r-bsgenome" ,r-bsgenome)
84 ("r-bsgenome-dmelanogaster-ucsc-dm3"
85 ,r-bsgenome-dmelanogaster-ucsc-dm3)))
86 (home-page "https://www.bioconductor.org/packages/BSgenome.Dmelanogaster.UCSC.dm3.masked/")
87 (synopsis "Full masked genome sequences for Fly")
88 (description
89 "This package provides full masked genome sequences for Drosophila
90 melanogaster (Fly) as provided by UCSC (dm3, April 2006) and stored in
91 Biostrings objects. The sequences are the same as in
92 BSgenome.Dmelanogaster.UCSC.dm3, except that each of them has the 4 following
93 masks on top: (1) the mask of assembly gaps (AGAPS mask), (2) the mask of
94 intra-contig ambiguities (AMB mask), (3) the mask of repeats from
95 RepeatMasker (RM mask), and (4) the mask of repeats from Tandem Repeats
96 Finder (TRF mask). Only the AGAPS and AMB masks are \"active\" by default.")
97 (license license:artistic2.0)))
98
99 (define-public r-hpar
100 (package
101 (name "r-hpar")
102 (version "1.22.2")
103 (source
104 (origin
105 (method url-fetch)
106 (uri (bioconductor-uri "hpar" version))
107 (sha256
108 (base32
109 "1b72hvzasf6q739gmx6jblbzzyq22l7crrkbbfkihv3v7s94g388"))))
110 (build-system r-build-system)
111 (home-page "https://bioconductor.org/packages/hpar/")
112 (synopsis "Human Protein Atlas in R")
113 (description "This package provides a simple interface to and data from
114 the Human Protein Atlas project.")
115 (license license:artistic2.0)))
116
117 (define-public r-regioner
118 (package
119 (name "r-regioner")
120 (version "1.12.0")
121 (source
122 (origin
123 (method url-fetch)
124 (uri (bioconductor-uri "regioneR" version))
125 (sha256
126 (base32
127 "09bzlaqdgy7wmzly3zc9y2da50d07mlixlnpaxdxpiwdk8qmhxsb"))))
128 (properties `((upstream-name . "regioneR")))
129 (build-system r-build-system)
130 (propagated-inputs
131 `(("r-memoise" ,r-memoise)
132 ("r-genomicranges" ,r-genomicranges)
133 ("r-bsgenome" ,r-bsgenome)
134 ("r-rtracklayer" ,r-rtracklayer)
135 ("r-genomeinfodb" ,r-genomeinfodb)
136 ("r-iranges" ,r-iranges)
137 ("r-s4vectors" ,r-s4vectors)))
138 (home-page "https://bioconductor.org/packages/regioneR/")
139 (synopsis "Association analysis of genomic regions")
140 (description "This package offers a statistical framework based on
141 customizable permutation tests to assess the association between genomic
142 region sets and other genomic features.")
143 (license license:artistic2.0)))
144
145 (define-public r-diffbind
146 (package
147 (name "r-diffbind")
148 (version "2.8.0")
149 (source
150 (origin
151 (method url-fetch)
152 (uri (bioconductor-uri "DiffBind" version))
153 (sha256
154 (base32
155 "1w1hybzd732ccg3q8zhirwfilq8sx3frv1x98zfyj3svzw98fish"))))
156 (properties `((upstream-name . "DiffBind")))
157 (build-system r-build-system)
158 (inputs
159 `(("zlib" ,zlib)))
160 (propagated-inputs
161 `(("r-amap" ,r-amap)
162 ("r-biocparallel" ,r-biocparallel)
163 ("r-deseq2" ,r-deseq2)
164 ("r-dplyr" ,r-dplyr)
165 ("r-edger" ,r-edger)
166 ("r-genomicalignments" ,r-genomicalignments)
167 ("r-genomicranges" ,r-genomicranges)
168 ("r-ggplot2" ,r-ggplot2)
169 ("r-ggrepel" ,r-ggrepel)
170 ("r-gplots" ,r-gplots)
171 ("r-iranges" ,r-iranges)
172 ("r-lattice" ,r-lattice)
173 ("r-limma" ,r-limma)
174 ("r-locfit" ,r-locfit)
175 ("r-rcolorbrewer" , r-rcolorbrewer)
176 ("r-rcpp" ,r-rcpp)
177 ("r-rsamtools" ,r-rsamtools)
178 ("r-s4vectors" ,r-s4vectors)
179 ("r-summarizedexperiment" ,r-summarizedexperiment)
180 ("r-systempiper" ,r-systempiper)
181 ("r-zlibbioc" ,r-zlibbioc)))
182 (home-page "http://bioconductor.org/packages/DiffBind")
183 (synopsis "Differential binding analysis of ChIP-Seq peak data")
184 (description
185 "This package computes differentially bound sites from multiple
186 ChIP-seq experiments using affinity (quantitative) data. Also enables
187 occupancy (overlap) analysis and plotting functions.")
188 (license license:artistic2.0)))
189
190 (define-public r-ripseeker
191 (package
192 (name "r-ripseeker")
193 (version "1.20.0")
194 (source
195 (origin
196 (method url-fetch)
197 (uri (bioconductor-uri "RIPSeeker" version))
198 (sha256
199 (base32
200 "0y9cvzqslfxj3z9mnp47mknff0pky2g5x8x1z1s5yjcx35q89xfi"))))
201 (properties `((upstream-name . "RIPSeeker")))
202 (build-system r-build-system)
203 (propagated-inputs
204 `(("r-s4vectors" ,r-s4vectors)
205 ("r-iranges" ,r-iranges)
206 ("r-genomicranges" ,r-genomicranges)
207 ("r-summarizedexperiment" ,r-summarizedexperiment)
208 ("r-rsamtools" ,r-rsamtools)
209 ("r-genomicalignments" ,r-genomicalignments)
210 ("r-rtracklayer" ,r-rtracklayer)))
211 (home-page "http://bioconductor.org/packages/RIPSeeker")
212 (synopsis
213 "Identifying protein-associated transcripts from RIP-seq experiments")
214 (description
215 "This package infers and discriminates RIP peaks from RIP-seq alignments
216 using two-state HMM with negative binomial emission probability. While
217 RIPSeeker is specifically tailored for RIP-seq data analysis, it also provides
218 a suite of bioinformatics tools integrated within this self-contained software
219 package comprehensively addressing issues ranging from post-alignments
220 processing to visualization and annotation.")
221 (license license:gpl2)))
222
223 (define-public r-multtest
224 (package
225 (name "r-multtest")
226 (version "2.36.0")
227 (source
228 (origin
229 (method url-fetch)
230 (uri (bioconductor-uri "multtest" version))
231 (sha256
232 (base32
233 "11949h2kglw13x8haaj4clg4jim1mwh5n98n9zxp9mmgn01z1lp0"))))
234 (build-system r-build-system)
235 (propagated-inputs
236 `(("r-survival" ,r-survival)
237 ("r-biocgenerics" ,r-biocgenerics)
238 ("r-biobase" ,r-biobase)
239 ("r-mass" ,r-mass)))
240 (home-page "http://bioconductor.org/packages/multtest")
241 (synopsis "Resampling-based multiple hypothesis testing")
242 (description
243 "This package can do non-parametric bootstrap and permutation
244 resampling-based multiple testing procedures (including empirical Bayes
245 methods) for controlling the family-wise error rate (FWER), generalized
246 family-wise error rate (gFWER), tail probability of the proportion of
247 false positives (TPPFP), and false discovery rate (FDR). Several choices
248 of bootstrap-based null distribution are implemented (centered, centered
249 and scaled, quantile-transformed). Single-step and step-wise methods are
250 available. Tests based on a variety of T- and F-statistics (including
251 T-statistics based on regression parameters from linear and survival models
252 as well as those based on correlation parameters) are included. When probing
253 hypotheses with T-statistics, users may also select a potentially faster null
254 distribution which is multivariate normal with mean zero and variance
255 covariance matrix derived from the vector influence function. Results are
256 reported in terms of adjusted P-values, confidence regions and test statistic
257 cutoffs. The procedures are directly applicable to identifying differentially
258 expressed genes in DNA microarray experiments.")
259 (license license:lgpl3)))
260
261 (define-public r-chippeakanno
262 (package
263 (name "r-chippeakanno")
264 (version "3.14.0")
265 (source
266 (origin
267 (method url-fetch)
268 (uri (bioconductor-uri "ChIPpeakAnno" version))
269 (sha256
270 (base32
271 "1kcnc3cnmrhdk1x7q3y6zsz09pgd3xn9xy1hfbxz48cajlb18ad0"))))
272 (properties `((upstream-name . "ChIPpeakAnno")))
273 (build-system r-build-system)
274 (propagated-inputs
275 `(("r-biocgenerics" ,r-biocgenerics)
276 ("r-biostrings" ,r-biostrings)
277 ("r-delayedarray" ,r-delayedarray)
278 ("r-go-db" ,r-go-db)
279 ("r-biomart" ,r-biomart)
280 ("r-bsgenome" ,r-bsgenome)
281 ("r-genomicfeatures" ,r-genomicfeatures)
282 ("r-genomicranges" ,r-genomicranges)
283 ("r-genomeinfodb" ,r-genomeinfodb)
284 ("r-iranges" ,r-iranges)
285 ("r-matrixstats" ,r-matrixstats)
286 ("r-annotationdbi" ,r-annotationdbi)
287 ("r-limma" ,r-limma)
288 ("r-multtest" ,r-multtest)
289 ("r-rbgl" ,r-rbgl)
290 ("r-graph" ,r-graph)
291 ("r-biocinstaller" ,r-biocinstaller)
292 ("r-regioner" ,r-regioner)
293 ("r-dbi" ,r-dbi)
294 ("r-ensembldb" ,r-ensembldb)
295 ("r-biobase" ,r-biobase)
296 ("r-s4vectors" ,r-s4vectors)
297 ("r-seqinr" ,r-seqinr)
298 ("r-idr" ,r-idr)
299 ("r-genomicalignments" ,r-genomicalignments)
300 ("r-summarizedexperiment" ,r-summarizedexperiment)
301 ("r-rsamtools" ,r-rsamtools)
302 ("r-venndiagram" ,r-venndiagram)))
303 (home-page "http://bioconductor.org/packages/ChIPpeakAnno")
304 (synopsis "Peaks annotation from ChIP-seq and ChIP-chip experiments")
305 (description
306 "The package includes functions to retrieve the sequences around the peak,
307 obtain enriched Gene Ontology (GO) terms, find the nearest gene, exon, miRNA or
308 custom features such as most conserved elements and other transcription factor
309 binding sites supplied by users. Starting 2.0.5, new functions have been added
310 for finding the peaks with bi-directional promoters with summary statistics
311 (peaksNearBDP), for summarizing the occurrence of motifs in peaks
312 (summarizePatternInPeaks) and for adding other IDs to annotated peaks or
313 enrichedGO (addGeneIDs).")
314 (license license:gpl2+)))
315
316 (define-public r-marray
317 (package
318 (name "r-marray")
319 (version "1.58.0")
320 (source (origin
321 (method url-fetch)
322 (uri (bioconductor-uri "marray" version))
323 (sha256
324 (base32 "0539flh3y1qy5b1bamkfwbskis765c5s33v1y9j51n33mxb9h08d"))))
325 (build-system r-build-system)
326 (propagated-inputs
327 `(("r-limma" ,r-limma)))
328 (home-page "http://bioconductor.org/packages/marray")
329 (synopsis "Exploratory analysis for two-color spotted microarray data")
330 (description "This package contains class definitions for two-color spotted
331 microarray data. It also includes fuctions for data input, diagnostic plots,
332 normalization and quality checking.")
333 (license license:lgpl2.0+)))
334
335 (define-public r-cghbase
336 (package
337 (name "r-cghbase")
338 (version "1.40.0")
339 (source (origin
340 (method url-fetch)
341 (uri (bioconductor-uri "CGHbase" version))
342 (sha256
343 (base32 "1hf44vma3kgwr61kjbszvfxkava8bjqnam1mdncqvczbypb2xwaq"))))
344 (properties `((upstream-name . "CGHbase")))
345 (build-system r-build-system)
346 (propagated-inputs
347 `(("r-biobase" ,r-biobase)
348 ("r-marray" ,r-marray)))
349 (home-page "http://bioconductor.org/packages/CGHbase")
350 (synopsis "Base functions and classes for arrayCGH data analysis")
351 (description "This package contains functions and classes that are needed by
352 the @code{arrayCGH} packages.")
353 (license license:gpl2+)))
354
355 (define-public r-cghcall
356 (package
357 (name "r-cghcall")
358 (version "2.42.0")
359 (source (origin
360 (method url-fetch)
361 (uri (bioconductor-uri "CGHcall" version))
362 (sha256
363 (base32 "0y71vfxv9x0am3xvv520yr95cb7m7y92dhdx1vkqki80jrmf12dz"))))
364 (properties `((upstream-name . "CGHcall")))
365 (build-system r-build-system)
366 (propagated-inputs
367 `(("r-biobase" ,r-biobase)
368 ("r-cghbase" ,r-cghbase)
369 ("r-impute" ,r-impute)
370 ("r-dnacopy" ,r-dnacopy)
371 ("r-snowfall" ,r-snowfall)))
372 (home-page "http://bioconductor.org/packages/CGHcall")
373 (synopsis "Base functions and classes for arrayCGH data analysis")
374 (description "This package contains functions and classes that are needed by
375 @code{arrayCGH} packages.")
376 (license license:gpl2+)))
377
378 (define-public r-qdnaseq
379 (package
380 (name "r-qdnaseq")
381 (version "1.16.0")
382 (source (origin
383 (method url-fetch)
384 (uri (bioconductor-uri "QDNAseq" version))
385 (sha256
386 (base32 "1pj69mfyxwfd0d7h4kls9xq96sdc55y3rv20qpla50hw9libcwwd"))))
387 (properties `((upstream-name . "QDNAseq")))
388 (build-system r-build-system)
389 (propagated-inputs
390 `(("r-biobase" ,r-biobase)
391 ("r-biocparallel" ,r-biocparallel)
392 ("r-cghbase" ,r-cghbase)
393 ("r-cghcall" ,r-cghcall)
394 ("r-dnacopy" ,r-dnacopy)
395 ("r-genomicranges" ,r-genomicranges)
396 ("r-iranges" ,r-iranges)
397 ("r-matrixstats" ,r-matrixstats)
398 ("r-r-utils" ,r-r-utils)
399 ("r-rsamtools" ,r-rsamtools)))
400 (home-page "http://bioconductor.org/packages/QDNAseq")
401 (synopsis "Quantitative DNA sequencing for chromosomal aberrations")
402 (description "The genome is divided into non-overlapping fixed-sized bins,
403 number of sequence reads in each counted, adjusted with a simultaneous
404 two-dimensional loess correction for sequence mappability and GC content, and
405 filtered to remove spurious regions in the genome. Downstream steps of
406 segmentation and calling are also implemented via packages DNAcopy and CGHcall,
407 respectively.")
408 (license license:gpl2+)))
409
410 (define-public r-bayseq
411 (package
412 (name "r-bayseq")
413 (version "2.14.0")
414 (source
415 (origin
416 (method url-fetch)
417 (uri (bioconductor-uri "baySeq" version))
418 (sha256
419 (base32
420 "0hbmm01a8libara9mbxknpk0wzarwfngnfwlmhpww91a0cmy5klg"))))
421 (properties `((upstream-name . "baySeq")))
422 (build-system r-build-system)
423 (propagated-inputs
424 `(("r-abind" ,r-abind)
425 ("r-edger" ,r-edger)
426 ("r-genomicranges" ,r-genomicranges)))
427 (home-page "https://bioconductor.org/packages/baySeq/")
428 (synopsis "Bayesian analysis of differential expression patterns in count data")
429 (description
430 "This package identifies differential expression in high-throughput count
431 data, such as that derived from next-generation sequencing machines,
432 calculating estimated posterior likelihoods of differential expression (or
433 more complex hypotheses) via empirical Bayesian methods.")
434 (license license:gpl3)))