gnu: r-bsgenome-celegans-ucsc-ce6: Move to (gnu packages bioconductor).
[jackhill/guix/guix.git] / gnu / packages / bioconductor.scm
index 37ac941..24504d0 100644 (file)
@@ -1,7 +1,7 @@
 ;;; GNU Guix --- Functional package management for GNU
-;;; Copyright © 2018, 2019 Ricardo Wurmus <rekado@elephly.net>
-;;; Copyright © 2018 Roel Janssen <roel@gnu.org>
-;;; Copyright © 2018 Tobias Geerinckx-Rice <me@tobias.gr>
+;;; Copyright © 2016, 2017, 2018, 2019 Ricardo Wurmus <rekado@elephly.net>
+;;; Copyright © 2016, 2017, 2018 Roel Janssen <roel@gnu.org>
+;;; Copyright © 2017, 2018 Tobias Geerinckx-Rice <me@tobias.gr>
 ;;;
 ;;; This file is part of GNU Guix.
 ;;;
   #:use-module (gnu packages bioinformatics)
   #:use-module (gnu packages cran)
   #:use-module (gnu packages compression)
+  #:use-module (gnu packages gcc)
   #:use-module (gnu packages graph)
   #:use-module (gnu packages maths)
   #:use-module (gnu packages statistics)
   #:use-module (gnu packages web))
 
+(define-public r-bsgenome-celegans-ucsc-ce6
+  (package
+    (name "r-bsgenome-celegans-ucsc-ce6")
+    (version "1.4.0")
+    (source (origin
+              (method url-fetch)
+              ;; We cannot use bioconductor-uri here because this tarball is
+              ;; located under "data/annotation/" instead of "bioc/".
+              (uri (string-append "https://www.bioconductor.org/packages/"
+                                  "release/data/annotation/src/contrib/"
+                                  "BSgenome.Celegans.UCSC.ce6_"
+                                  version ".tar.gz"))
+              (sha256
+               (base32
+                "0mqzb353xv2c3m3vkb315dkmnxkgczp7ndnknyhpgjlybyf715v9"))))
+    (properties
+     `((upstream-name . "BSgenome.Celegans.UCSC.ce6")))
+    (build-system r-build-system)
+    ;; As this package provides little more than a very large data file it
+    ;; doesn't make sense to build substitutes.
+    (arguments `(#:substitutable? #f))
+    (propagated-inputs
+     `(("r-bsgenome" ,r-bsgenome)))
+    (home-page
+     "https://www.bioconductor.org/packages/BSgenome.Celegans.UCSC.ce6/")
+    (synopsis "Full genome sequences for Worm")
+    (description
+     "This package provides full genome sequences for Caenorhabditis
+elegans (Worm) as provided by UCSC (ce6, May 2008) and stored in Biostrings
+objects.")
+    (license license:artistic2.0)))
+
 (define-public r-bsgenome-dmelanogaster-ucsc-dm6
   (package
     (name "r-bsgenome-dmelanogaster-ucsc-dm6")
@@ -99,6 +132,37 @@ RepeatMasker (RM mask), and (4) the mask of repeats from Tandem Repeats
 Finder (TRF mask).  Only the AGAPS and AMB masks are \"active\" by default.")
     (license license:artistic2.0)))
 
+(define-public r-bsgenome-hsapiens-1000genomes-hs37d5
+  (package
+    (name "r-bsgenome-hsapiens-1000genomes-hs37d5")
+    (version "0.99.1")
+    (source (origin
+              (method url-fetch)
+              ;; We cannot use bioconductor-uri here because this tarball is
+              ;; located under "data/annotation/" instead of "bioc/".
+              (uri (string-append "https://www.bioconductor.org/packages/"
+                                  "release/data/annotation/src/contrib/"
+                                  "BSgenome.Hsapiens.1000genomes.hs37d5_"
+                                  version ".tar.gz"))
+              (sha256
+               (base32
+                "1cg0g5fqmsvwyw2p9hp2yy4ilk21jkbbrnpgqvb5c36ihjwvc7sr"))))
+    (properties
+     `((upstream-name . "BSgenome.Hsapiens.1000genomes.hs37d5")))
+    (build-system r-build-system)
+    ;; As this package provides little more than a very large data file it
+    ;; doesn't make sense to build substitutes.
+    (arguments `(#:substitutable? #f))
+    (propagated-inputs
+     `(("r-bsgenome" ,r-bsgenome)))
+    (home-page
+     "https://www.bioconductor.org/packages/BSgenome.Hsapiens.1000genomes.hs37d5/")
+    (synopsis "Full genome sequences for Homo sapiens")
+    (description
+     "This package provides full genome sequences for Homo sapiens from
+1000genomes phase2 reference genome sequence (hs37d5), based on NCBI GRCh37.")
+    (license license:artistic2.0)))
+
 (define-public r-bsgenome-hsapiens-ucsc-hg19-masked
   (package
     (name "r-bsgenome-hsapiens-ucsc-hg19-masked")
@@ -134,6 +198,37 @@ Repeats Finder (TRF mask).  Only the AGAPS and AMB masks are \"active\" by
 default.")
     (license license:artistic2.0)))
 
+(define-public r-bsgenome-mmusculus-ucsc-mm9
+  (package
+    (name "r-bsgenome-mmusculus-ucsc-mm9")
+    (version "1.4.0")
+    (source (origin
+              (method url-fetch)
+              ;; We cannot use bioconductor-uri here because this tarball is
+              ;; located under "data/annotation/" instead of "bioc/".
+              (uri (string-append "https://www.bioconductor.org/packages/"
+                                  "release/data/annotation/src/contrib/"
+                                  "BSgenome.Mmusculus.UCSC.mm9_"
+                                  version ".tar.gz"))
+              (sha256
+               (base32
+                "1birqw30g2azimxpnjfzmkphan7x131yy8b9h85lfz5fjdg7841i"))))
+    (properties
+     `((upstream-name . "BSgenome.Mmusculus.UCSC.mm9")))
+    (build-system r-build-system)
+    ;; As this package provides little more than a very large data file it
+    ;; doesn't make sense to build substitutes.
+    (arguments `(#:substitutable? #f))
+    (propagated-inputs
+     `(("r-bsgenome" ,r-bsgenome)))
+    (home-page
+     "https://www.bioconductor.org/packages/BSgenome.Mmusculus.UCSC.mm9/")
+    (synopsis "Full genome sequences for Mouse")
+    (description
+     "This package provides full genome sequences for Mus musculus (Mouse) as
+provided by UCSC (mm9, July 2007) and stored in Biostrings objects.")
+    (license license:artistic2.0)))
+
 (define-public r-bsgenome-mmusculus-ucsc-mm9-masked
   (package
     (name "r-bsgenome-mmusculus-ucsc-mm9-masked")
@@ -169,6 +264,95 @@ Repeats Finder (TRF mask).  Only the AGAPS and AMB masks are \"active\" by
 default."  )
     (license license:artistic2.0)))
 
+(define-public r-bsgenome-mmusculus-ucsc-mm10
+  (package
+    (name "r-bsgenome-mmusculus-ucsc-mm10")
+    (version "1.4.0")
+    (source (origin
+              (method url-fetch)
+              ;; We cannot use bioconductor-uri here because this tarball is
+              ;; located under "data/annotation/" instead of "bioc/".
+              (uri (string-append "https://www.bioconductor.org/packages/"
+                                  "release/data/annotation/src/contrib/"
+                                  "BSgenome.Mmusculus.UCSC.mm10_"
+                                  version ".tar.gz"))
+              (sha256
+               (base32
+                "12s0nm2na9brjad4rn9l7d3db2aj8qa1xvz0y1k7gk08wayb6bkf"))))
+    (properties
+     `((upstream-name . "BSgenome.Mmusculus.UCSC.mm10")))
+    (build-system r-build-system)
+    ;; As this package provides little more than a very large data file it
+    ;; doesn't make sense to build substitutes.
+    (arguments `(#:substitutable? #f))
+    (propagated-inputs
+     `(("r-bsgenome" ,r-bsgenome)))
+    (home-page
+     "https://www.bioconductor.org/packages/BSgenome.Mmusculus.UCSC.mm10/")
+    (synopsis "Full genome sequences for Mouse")
+    (description
+     "This package provides full genome sequences for Mus
+musculus (Mouse) as provided by UCSC (mm10, December 2011) and stored
+in Biostrings objects.")
+    (license license:artistic2.0)))
+
+(define-public r-org-dr-eg-db
+  (package
+    (name "r-org-dr-eg-db")
+    (version "3.7.0")
+    (source (origin
+              (method url-fetch)
+              ;; We cannot use bioconductor-uri here because this tarball is
+              ;; located under "data/annotation/" instead of "bioc/".
+              (uri (string-append "https://www.bioconductor.org/packages/"
+                                  "release/data/annotation/src/contrib/"
+                                  "org.Dr.eg.db_" version ".tar.gz"))
+              (sha256
+               (base32
+                "1xs5wsbcpy0iwbjyiv7fax57djqc529ai5fk1qfsdcvlja3cpglx"))))
+    (properties
+     `((upstream-name . "org.Dr.eg.db")))
+    (build-system r-build-system)
+    (propagated-inputs
+     `(("r-annotationdbi" ,r-annotationdbi)))
+    (home-page "https://www.bioconductor.org/packages/org.Dr.eg.db/")
+    (synopsis "Annotation for Zebrafish")
+    (description
+     "This package provides genome wide annotations for Zebrafish, primarily
+based on mapping using Entrez Gene identifiers.")
+    (license license:artistic2.0)))
+
+(define-public r-bsgenome-hsapiens-ucsc-hg19
+  (package
+    (name "r-bsgenome-hsapiens-ucsc-hg19")
+    (version "1.4.0")
+    (source (origin
+              (method url-fetch)
+              ;; We cannot use bioconductor-uri here because this tarball is
+              ;; located under "data/annotation/" instead of "bioc/".
+              (uri (string-append "https://www.bioconductor.org/packages/"
+                                  "release/data/annotation/src/contrib/"
+                                  "BSgenome.Hsapiens.UCSC.hg19_"
+                                  version ".tar.gz"))
+              (sha256
+               (base32
+                "1y0nqpk8cw5a34sd9hmin3z4v7iqm6hf6l22cl81vlbxqbjibxc8"))))
+    (properties
+     `((upstream-name . "BSgenome.Hsapiens.UCSC.hg19")))
+    (build-system r-build-system)
+    ;; As this package provides little more than a very large data file it
+    ;; doesn't make sense to build substitutes.
+    (arguments `(#:substitutable? #f))
+    (propagated-inputs
+     `(("r-bsgenome" ,r-bsgenome)))
+    (home-page
+     "https://www.bioconductor.org/packages/BSgenome.Hsapiens.UCSC.hg19/")
+    (synopsis "Full genome sequences for Homo sapiens")
+    (description
+     "This package provides full genome sequences for Homo sapiens as provided
+by UCSC (hg19, February 2009) and stored in Biostrings objects.")
+    (license license:artistic2.0)))
+
 (define-public r-genelendatabase
   (package
     (name "r-genelendatabase")
@@ -197,6 +381,38 @@ default."  )
 genomes and gene ID formats, largely based on the UCSC table browser.")
     (license license:lgpl2.0+)))
 
+(define-public r-txdb-hsapiens-ucsc-hg19-knowngene
+  (package
+    (name "r-txdb-hsapiens-ucsc-hg19-knowngene")
+    (version "3.2.2")
+    (source (origin
+              (method url-fetch)
+              ;; We cannot use bioconductor-uri here because this tarball is
+              ;; located under "data/annotation/" instead of "bioc/".
+              (uri (string-append "https://bioconductor.org/packages/"
+                                  "release/data/annotation/src/contrib"
+                                  "/TxDb.Hsapiens.UCSC.hg19.knownGene_"
+                                  version ".tar.gz"))
+              (sha256
+               (base32
+                "1sajhcqqwazgz2lqbik7rd935i7kpnh08zxbp2ra10j72yqy4g86"))))
+    (properties
+     `((upstream-name . "TxDb.Hsapiens.UCSC.hg19.knownGene")))
+    (build-system r-build-system)
+    ;; As this package provides little more than a very large data file it
+    ;; doesn't make sense to build substitutes.
+    (arguments `(#:substitutable? #f))
+    (propagated-inputs
+     `(("r-genomicfeatures" ,r-genomicfeatures)))
+    (home-page
+     "https://bioconductor.org/packages/TxDb.Hsapiens.UCSC.hg19.knownGene/")
+    (synopsis "Annotation package for human genome in TxDb format")
+    (description
+     "This package provides an annotation database of Homo sapiens genome
+data.  It is derived from the UCSC hg19 genome and based on the \"knownGene\"
+track.  The database is exposed as a @code{TxDb} object.")
+    (license license:artistic2.0)))
+
 (define-public r-txdb-mmusculus-ucsc-mm9-knowngene
   (package
     (name "r-txdb-mmusculus-ucsc-mm9-knowngene")
@@ -228,6 +444,53 @@ database is exposed as a @code{TxDb} object.")
     (license license:artistic2.0)))
 
 \f
+(define-public r-biocgenerics
+  (package
+    (name "r-biocgenerics")
+    (version "0.28.0")
+    (source (origin
+              (method url-fetch)
+              (uri (bioconductor-uri "BiocGenerics" version))
+              (sha256
+               (base32
+                "0cvpsrhg7sn7lpqgxvqrsagv6j7xj5rafq5xdjfd8zc4gxrs5rb8"))))
+    (properties
+     `((upstream-name . "BiocGenerics")))
+    (build-system r-build-system)
+    (home-page "https://bioconductor.org/packages/BiocGenerics")
+    (synopsis "S4 generic functions for Bioconductor")
+    (description
+     "This package provides S4 generic functions needed by many Bioconductor
+packages.")
+    (license license:artistic2.0)))
+
+(define-public r-annotate
+  (package
+    (name "r-annotate")
+    (version "1.60.0")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (bioconductor-uri "annotate" version))
+       (sha256
+        (base32
+         "0p6c96lay23a67dyirgnwzm2yw22m592z780vy6p4nqwla8ha18n"))))
+    (build-system r-build-system)
+    (propagated-inputs
+     `(("r-annotationdbi" ,r-annotationdbi)
+       ("r-biobase" ,r-biobase)
+       ("r-biocgenerics" ,r-biocgenerics)
+       ("r-dbi" ,r-dbi)
+       ("r-rcurl" ,r-rcurl)
+       ("r-xml" ,r-xml)
+       ("r-xtable" ,r-xtable)))
+    (home-page
+     "https://bioconductor.org/packages/annotate")
+    (synopsis "Annotation for microarrays")
+    (description "This package provides R environments for the annotation of
+microarrays.")
+    (license license:artistic2.0)))
+
 (define-public r-hpar
   (package
     (name "r-hpar")
@@ -275,6 +538,31 @@ customizable permutation tests to assess the association between genomic
 region sets and other genomic features.")
     (license license:artistic2.0)))
 
+(define-public r-geneplotter
+  (package
+    (name "r-geneplotter")
+    (version "1.60.0")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (bioconductor-uri "geneplotter" version))
+       (sha256
+        (base32
+         "10khr0pznxf3m0f5gzck9ymljrwcv3vamfmpskd51yjh36lhllqz"))))
+    (build-system r-build-system)
+    (propagated-inputs
+     `(("r-annotate" ,r-annotate)
+       ("r-annotationdbi" ,r-annotationdbi)
+       ("r-biobase" ,r-biobase)
+       ("r-biocgenerics" ,r-biocgenerics)
+       ("r-lattice" ,r-lattice)
+       ("r-rcolorbrewer" ,r-rcolorbrewer)))
+    (home-page "https://bioconductor.org/packages/geneplotter")
+    (synopsis "Graphics functions for genomic data")
+    (description
+     "This package provides functions for plotting genomic data.")
+    (license license:artistic2.0)))
+
 (define-public r-diffbind
   (package
     (name "r-diffbind")
@@ -1129,3 +1417,77 @@ Viewer (SAV) files, access data, and generate QC plots.")
      "This package provides a quality control pipeline for ChIP-exo/nexus
 sequencing data.")
     (license license:gpl2+)))
+
+(define-public r-copynumber
+  (package
+    (name "r-copynumber")
+    (version "1.22.0")
+    (source (origin
+              (method url-fetch)
+              (uri (bioconductor-uri "copynumber" version))
+              (sha256
+               (base32
+                "0ipwj9i5p1bwhg5d80jdjagm02krpj2v0j47qdgw41h8wncdyal3"))))
+    (build-system r-build-system)
+    (propagated-inputs
+     `(("r-s4vectors" ,r-s4vectors)
+       ("r-iranges" ,r-iranges)
+       ("r-genomicranges" ,r-genomicranges)
+       ("r-biocgenerics" ,r-biocgenerics)))
+    (home-page "https://bioconductor.org/packages/copynumber")
+    (synopsis "Segmentation of single- and multi-track copy number data")
+    (description
+     "This package segments single- and multi-track copy number data by a
+penalized least squares regression method.")
+    (license license:artistic2.0)))
+
+(define-public r-dnacopy
+  (package
+    (name "r-dnacopy")
+    (version "1.56.0")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (bioconductor-uri "DNAcopy" version))
+       (sha256
+        (base32
+         "04cqdqxhva66xwh1s2vffi56b9fcrqd4slcrvqasj5lp2rkjli82"))))
+    (properties `((upstream-name . "DNAcopy")))
+    (build-system r-build-system)
+    (native-inputs `(("gfortran" ,gfortran)))
+    (home-page "https://bioconductor.org/packages/DNAcopy")
+    (synopsis "DNA copy number data analysis")
+    (description
+     "This package implements the @dfn{circular binary segmentation} (CBS)
+algorithm to segment DNA copy number data and identify genomic regions with
+abnormal copy number.")
+    (license license:gpl2+)))
+
+;; This is a CRAN package, but it uncharacteristically depends on a
+;; Bioconductor package.
+(define-public r-htscluster
+  (package
+    (name "r-htscluster")
+    (version "2.0.8")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (cran-uri "HTSCluster" version))
+       (sha256
+        (base32
+         "0wnbfh6hdx8692jilgmv8sys1zm6fqc6mim7vvjhyqlmpm8gm0kg"))))
+    (properties `((upstream-name . "HTSCluster")))
+    (build-system r-build-system)
+    (propagated-inputs
+     `(("r-capushe" ,r-capushe)
+       ("r-edger" ,r-edger)
+       ("r-plotrix" ,r-plotrix)))
+    (home-page "https://cran.r-project.org/web/packages/HTSCluster")
+    (synopsis "Clustering high-throughput transcriptome sequencing (HTS) data")
+    (description
+     "This package provides a Poisson mixture model is implemented to cluster
+genes from high-throughput transcriptome sequencing (RNA-seq) data.  Parameter
+estimation is performed using either the EM or CEM algorithm, and the slope
+heuristics are used for model selection (i.e., to choose the number of
+clusters).")
+    (license license:gpl3+)))