gnu: Add r-assessorf.

[jackhill/guix/guix.git] / gnu / packages / bioconductor.scm
diff --git a/gnu/packages/bioconductor.scm b/gnu/packages/bioconductor.scm

index 784ddbf..67a62fa 100644 (file)
--- a/gnu/packages/bioconductor.scm
+++ b/gnu/packages/bioconductor.scm
@@ -2317,6 +2317,218 @@ reproducible gene expression signatures capable of accurately distinguishing
  tumor samples from healthy controls.")
      (license license:artistic2.0)))
  
+(define-public r-assessorf
+  (package
+    (name "r-assessorf")
+    (version "1.14.0")
+    (source (origin
+              (method url-fetch)
+              (uri (bioconductor-uri "AssessORF" version))
+              (sha256
+               (base32
+                "1l87bpny9k3jbzbzmb9h2ijvblrj471gqv26fyzbvb3vr6y406z7"))))
+    (properties `((upstream-name . "AssessORF")))
+    (build-system r-build-system)
+    (propagated-inputs
+     (list r-biostrings
+           r-decipher
+           r-genomicranges
+           r-iranges))
+    (native-inputs (list r-knitr))
+    (home-page "https://bioconductor.org/packages/AssessORF")
+    (synopsis "Assess gene predictions using proteomics and evolutionary conservation")
+    (description
+     "In order to assess the quality of a set of predicted genes for a genome,
+evidence must first be mapped to that genome.  Next, each gene must be
+categorized based on how strong the evidence is for or against that gene.  The
+AssessORF package provides the functions and class structures necessary for
+accomplishing those tasks, using proteomics hits and evolutionarily conserved
+start codons as the forms of evidence.")
+    (license license:gpl3)))
+
+(define-public r-asset
+  (package
+    (name "r-asset")
+    (version "2.14.0")
+    (source (origin
+              (method url-fetch)
+              (uri (bioconductor-uri "ASSET" version))
+              (sha256
+               (base32
+                "029acl5k9d4hnvy3jia9cr4rk6w31zn8b5s79i6lazq1cp236hbg"))))
+    (properties `((upstream-name . "ASSET")))
+    (build-system r-build-system)
+    (propagated-inputs (list r-mass r-msm r-rmeta))
+    (native-inputs (list r-knitr))
+    (home-page "https://bioconductor.org/packages/ASSET")
+    (synopsis
+     "Subset-based association analysis of heterogeneous traits and subtypes")
+    (description
+     "This package is an R program for the subset-based analysis of
+heterogeneous traits and disease subtypes.  ASSET allows the user to search
+through all possible subsets of z-scores to identify the subset of traits
+giving the best meta-analyzed z-score.  Further, it returns a p-value
+adjusting for the multiple-testing involved in the search.  It also allows for
+searching for the best combination of disease subtypes associated with each
+variant.")
+    (license license:gpl2)))
+
+(define-public r-atena
+  (package
+    (name "r-atena")
+    (version "1.2.2")
+    (source (origin
+              (method url-fetch)
+              (uri (bioconductor-uri "atena" version))
+              (sha256
+               (base32
+                "0b89wb7cc44c8jd6868dn8pwgid768bprkncsi87qkdz0abbhzhp"))))
+    (properties `((upstream-name . "atena")))
+    (build-system r-build-system)
+    (propagated-inputs
+     (list r-annotationhub
+           r-biocgenerics
+           r-biocparallel
+           r-genomeinfodb
+           r-genomicalignments
+           r-genomicranges
+           r-iranges
+           r-matrix
+           r-rsamtools
+           r-s4vectors
+           r-scales
+           r-sparsematrixstats
+           r-squarem
+           r-summarizedexperiment))
+    (native-inputs (list r-knitr))
+    (home-page "https://github.com/functionalgenomics/atena")
+    (synopsis "Analysis of transposable elements")
+    (description
+     "The atena package quantifies expression of @dfn{TEs} (transposable
+elements) from RNA-seq data through different methods, including ERVmap,
+TEtranscripts and Telescope.  A common interface is provided to use each of
+these methods, which consists of building a parameter object, calling the
+quantification function with this object and getting a
+@code{SummarizedExperiment} object as an output container of the quantified
+expression profiles.  The implementation allows quantifing TEs and gene
+transcripts in an integrated manner.")
+    (license license:artistic2.0)))
+
+(define-public r-atsnp
+  (package
+    (name "r-atsnp")
+    (version "1.12.0")
+    (source (origin
+              (method url-fetch)
+              (uri (bioconductor-uri "atSNP" version))
+              (sha256
+               (base32
+                "0dmv34xqwr3l2rznapxmyrkyf1w78qzxdv88s5nn8s1m8qdkgwkz"))))
+    (properties `((upstream-name . "atSNP")))
+    (build-system r-build-system)
+    (propagated-inputs
+     (list r-biocfilecache
+           r-biocparallel
+           r-bsgenome
+           r-data-table
+           r-ggplot2
+           r-lifecycle
+           r-motifstack
+           r-rappdirs
+           r-rcpp
+           r-testthat))
+    (native-inputs (list r-knitr))
+    (home-page "https://github.com/sunyoungshin/atSNP")
+    (synopsis
+     "Affinity test for identifying regulatory single nucleotide polymorphisms")
+    (description
+     "The atSNP package performs affinity tests of motif matches with the
+@dfn{SNP} (single nucleotide polymorphism) or the reference genomes and
+SNP-led changes in motif matches.")
+    (license license:gpl2)))
+
+(define-public r-attract
+  (package
+    (name "r-attract")
+    (version "1.48.0")
+    (source (origin
+              (method url-fetch)
+              (uri (bioconductor-uri "attract" version))
+              (sha256
+               (base32
+                "0f1fsv278kpnxvqg9qa5rw2k3zr8zws0ab73ldl60h6pv9cy8x82"))))
+    (properties `((upstream-name . "attract")))
+    (build-system r-build-system)
+    (propagated-inputs
+     (list r-annotationdbi
+           r-biobase
+           r-cluster
+           r-gostats
+           r-keggrest
+           r-limma
+           r-org-hs-eg-db
+           r-reactome-db))
+    (home-page "https://bioconductor.org/packages/attract")
+    (synopsis "Finding drivers of Kauffman's attractor landscape")
+    (description
+     "This package contains the functions to find the gene expression modules
+that represent the drivers of Kauffman's attractor landscape.  The modules are
+the core attractor pathways that discriminate between different cell types of
+groups of interest.  Each pathway has a set of synexpression groups, which show
+transcriptionally-coordinated changes in gene expression.")
+    (license license:lgpl2.0+)))
+
+(define-public r-awfisher
+  (package
+    (name "r-awfisher")
+    (version "1.10.0")
+    (source (origin
+              (method url-fetch)
+              (uri (bioconductor-uri "AWFisher" version))
+              (sha256
+               (base32
+                "050k7w0azsl7rqx2pxgccihzc2q8pmh6fyy4gib2d42sdyijr2n1"))))
+    (properties `((upstream-name . "AWFisher")))
+    (build-system r-build-system)
+    (propagated-inputs
+     (list r-edger
+           r-limma))
+    (native-inputs (list r-knitr))
+    (home-page "https://bioconductor.org/packages/AWFisher")
+    (synopsis  "Fast computing for adaptively weighted fisher's method")
+    (description
+     "This package is an implementation of the Adaptively Weighted Fisher's
+method, including fast p-value computing, variability index, and
+meta-pattern.")
+    (license license:gpl3)))
+
+(define-public r-awst
+  (package
+    (name "r-awst")
+    (version "1.4.0")
+    (source (origin
+              (method url-fetch)
+              (uri (bioconductor-uri "awst" version))
+              (sha256
+               (base32
+                "0iw3zycmj95rmdx7f2w0j4yxkzd90y87lrzgdn9cyvvzi5avflav"))))
+    (properties `((upstream-name . "awst")))
+    (build-system r-build-system)
+    (propagated-inputs (list r-summarizedexperiment))
+    (native-inputs (list r-knitr))
+    (home-page "https://github.com/drisso/awst")
+    (synopsis "Asymmetric within-sample transformation")
+    (description
+     "This package @dfn{awst} (Asymmetric Within-Sample Transformation) that
+regularizes RNA-seq read counts and reduces the effect of noise on the
+classification of samples.  AWST comprises two main steps: standardization and
+smoothing.  These steps transform gene expression data to reduce the noise of
+the lowly expressed features, which suffer from background effects and low
+signal-to-noise ratio, and the influence of the highly expressed features,
+which may be the result of amplification bias and other experimental
+artifacts.")
+    (license license:expat)))
+
  (define-public r-baalchip
    (package
      (name "r-baalchip")
@@ -2380,6 +2592,232 @@ manipulating the data from BaseSpace, it also facilitates the access to R's
  rich environment of statistical and data analysis tools.")
      (license license:asl2.0)))
  
+(define-public r-bac
+  (package
+    (name "r-bac")
+    (version "1.56.0")
+    (source (origin
+              (method url-fetch)
+              (uri (bioconductor-uri "BAC" version))
+              (sha256
+               (base32
+                "0dkw7438d2sf6nb577dnzija54qs0nhlr47lb73li60fhlnvqmh2"))))
+    (properties `((upstream-name . "BAC")))
+    (build-system r-build-system)
+    (home-page "https://bioconductor.org/packages/BAC")
+    (synopsis "Bayesian analysis of Chip-chip experiment")
+    (description
+     "This package uses a Bayesian hierarchical model to detect enriched
+regions from ChIP-chip experiments.  The common goal in analyzing this
+ChIP-chip data is to detect DNA-protein interactions from ChIP-chip
+experiments.  The BAC package has mainly been tested with Affymetrix tiling
+array data.  However, we expect it to work with other platforms (e.g. Agilent,
+Nimblegen, cDNA, etc.).  Note that BAC does not deal with normalization, so
+you will have to normalize your data beforehand.")
+    (license license:artistic2.0)))
+
+(define-public r-bader
+  (package
+    (name "r-bader")
+    (version "1.34.0")
+    (source (origin
+              (method url-fetch)
+              (uri (bioconductor-uri "BADER" version))
+              (sha256
+               (base32
+                "0i5x1r2ns1hxhqk5jyfqird81hck1hllvvgx5bn0rb5vl99g8spm"))))
+    (properties `((upstream-name . "BADER")))
+    (build-system r-build-system)
+    (home-page "https://bioconductor.org/packages/BADER")
+    (synopsis
+     "Bayesian analysis of differential expression in RNA sequencing data")
+    (description
+     "The BADER package is intended for the analysis of RNA sequencing data.
+The algorithm fits a Bayesian hierarchical model for RNA sequencing count
+data.  BADER returns the posterior probability of differential expression for
+each gene between two groups A and B.  The joint posterior distribution of the
+variables in the model can be returned in the form of posterior samples, which
+can be used for further down-stream analyses such as gene set enrichment.")
+    (license license:gpl2)))
+
+(define-public r-badregionfinder
+  (package
+    (name "r-badregionfinder")
+    (version "1.24.0")
+    (source (origin
+              (method url-fetch)
+              (uri (bioconductor-uri "BadRegionFinder" version))
+              (sha256
+               (base32
+                "1a1pqmh5ak9s3k1lxw6flanchk24zyznwm34ixi2b78wdc3hqgm9"))))
+    (properties `((upstream-name . "BadRegionFinder")))
+    (build-system r-build-system)
+    (propagated-inputs
+     (list r-biomart
+           r-genomicranges
+           r-rsamtools
+           r-s4vectors
+           r-variantannotation))
+    (home-page "https://bioconductor.org/packages/BadRegionFinder")
+    (synopsis "Identifying regions with bad coverage in sequence alignment data")
+    (description
+     "BadRegionFinder is a package for identifying regions with a bad,
+acceptable and good coverage in sequence alignment data available as bam
+files.  The whole genome may be considered as well as a set of target regions.
+Various visual and textual types of output are available.")
+    (license license:lgpl3)))
+
+(define-public r-bambu
+  (package
+    (name "r-bambu")
+    (version "2.2.0")
+    (source (origin
+              (method url-fetch)
+              (uri (bioconductor-uri "bambu" version))
+              (sha256
+               (base32
+                "0dc2hpnykr575jbrq9whmdabknl70s2hcs6gkmkl4kpv7xfqdq6w"))))
+    (properties `((upstream-name . "bambu")))
+    (build-system r-build-system)
+    (propagated-inputs
+     (list r-biocgenerics
+           r-biocparallel
+           r-bsgenome
+           r-data-table
+           r-dplyr
+           r-genomeinfodb
+           r-genomicalignments
+           r-genomicfeatures
+           r-genomicranges
+           r-iranges
+           r-rcpp
+           r-rcpparmadillo
+           r-rsamtools
+           r-s4vectors
+           r-summarizedexperiment
+           r-tidyr
+           r-xgboost))
+    (native-inputs (list r-knitr))
+    (home-page "https://github.com/GoekeLab/bambu")
+    (synopsis
+     "Isoform reconstruction and quantification for long read RNA-Seq data")
+    (description
+     "This R package is for multi-sample transcript discovery and
+quantification using long read RNA-Seq data.  You can use bambu after read
+alignment to obtain expression estimates for known and novel transcripts and
+genes.  The output from bambu can directly be used for visualisation and
+downstream analysis, such as differential gene expression or transcript
+usage.")
+    (license license:gpl3)))
+
+(define-public r-bandits
+  (package
+    (name "r-bandits")
+    (version "1.12.0")
+    (source (origin
+              (method url-fetch)
+              (uri (bioconductor-uri "BANDITS" version))
+              (sha256
+               (base32
+                "1423djb7cij68y0q2dcp8q7lrcn2fxjn6d25v4qy3w00b2w8ppg9"))))
+    (properties `((upstream-name . "BANDITS")))
+    (build-system r-build-system)
+    (propagated-inputs
+     (list r-biocparallel
+           r-data-table
+           r-doparallel
+           r-dorng
+           r-drimseq
+           r-foreach
+           r-ggplot2
+           r-mass
+           r-r-utils
+           r-rcpp
+           r-rcpparmadillo))
+    (native-inputs (list r-knitr))
+    (home-page "https://github.com/SimoneTiberi/BANDITS")
+    (synopsis "Bayesian analysis of differential splicing")
+    (description
+     "BANDITS is a Bayesian hierarchical model for detecting differential
+splicing of genes and transcripts, via @dfn{DTU} (differential transcript
+usage), between two or more conditions.  The method uses a Bayesian
+hierarchical framework, which allows for sample specific proportions in a
+Dirichlet-Multinomial model, and samples the allocation of fragments to the
+transcripts.  Parameters are inferred via @dfn{MCMC} (Markov chain Monte
+Carlo) techniques and a DTU test is performed via a multivariate Wald test on
+the posterior densities for the average relative abundance of transcripts.")
+    (license license:gpl3+)))
+
+(define-public r-banocc
+  (package
+    (name "r-banocc")
+    (version "1.20.0")
+    (source (origin
+              (method url-fetch)
+              (uri (bioconductor-uri "banocc" version))
+              (sha256
+               (base32
+                "10vaggq1w5jkxd8r2k1mhymzvb7x3h8afwn2pvmcpj022ka7xhbx"))))
+    (properties `((upstream-name . "banocc")))
+    (build-system r-build-system)
+    (propagated-inputs
+     (list r-coda
+           r-mvtnorm
+           r-rstan
+           r-stringr))
+    (native-inputs (list r-knitr))
+    (home-page "https://bioconductor.org/packages/banocc")
+    (synopsis "Bayesian analysis of compositional covariance")
+    (description
+     "BAnOCC is a package designed for compositional data, where each sample
+sums to one.  It infers the approximate covariance of the unconstrained data
+using a Bayesian model coded with @code{rstan}.  It provides as output the
+@code{stanfit} object as well as posterior median and credible interval
+estimates for each correlation element.")
+    (license license:expat)))
+
+(define-public r-barcodetrackr
+  (package
+    (name "r-barcodetrackr")
+    (version "1.4.0")
+    (source (origin
+              (method url-fetch)
+              (uri (bioconductor-uri "barcodetrackR" version))
+              (sha256
+               (base32
+                "0yxa15xkgqazw31vq4wm8v747bw4qb18m6i602pvynk0n5bgg3d3"))))
+    (properties `((upstream-name . "barcodetrackR")))
+    (build-system r-build-system)
+    (propagated-inputs
+     (list r-circlize
+           r-cowplot
+           r-dplyr
+           r-ggdendro
+           r-ggplot2
+           r-ggridges
+           r-magrittr
+           r-plyr
+           r-proxy
+           r-rcolorbrewer
+           r-rlang
+           r-s4vectors
+           r-scales
+           r-shiny
+           r-summarizedexperiment
+           r-tibble
+           r-tidyr
+           r-vegan
+           r-viridis))
+    (native-inputs (list r-knitr))
+    (home-page "https://github.com/dunbarlabNIH/barcodetrackR")
+    (synopsis "Functions for analyzing cellular barcoding data")
+    (description
+     "This package is developed for the analysis and visualization of clonal
+tracking data.  The required data is formed by samples and tag abundances in
+matrix form, usually from cellular barcoding experiments, integration site
+retrieval analyses, or similar technologies.")
+    (license license:cc0)))
+
  (define-public r-biocversion
    (package
      (name "r-biocversion")