gnu: Add centrifuge.
[jackhill/guix/guix.git] / gnu / packages / bioinformatics.scm
index 3918a4f..c5b47ee 100644 (file)
@@ -1656,7 +1656,27 @@ package provides command line tools using the Bio++ library.")
                   ;; Remove useless msbuild directory
                   (delete-file-recursively
                    "c++/src/build-system/project_tree_builder/msbuild")
-                  #t))))
+
+                  ;; Build reproducibly.
+                  ;; Do not record the kernel version
+                  (substitute* "c++/src/build-system/configure"
+                    (("kver=.*") "kver=\"\""))
+                  ;; Do not generate random numbers.
+                  (substitute* "c++/scripts/common/impl/define_random_macros.sh"
+                    (("#define NCBI_RANDOM_VALUE_MAX  0xffffffffu" m)
+                     (string-append m "
+#define NCBI_RANDOM_VALUE_0    2845495105u
+#define NCBI_RANDOM_VALUE_1    2158634051u
+#define NCBI_RANDOM_VALUE_2    4072202242u
+#define NCBI_RANDOM_VALUE_3    902228395u
+#define NCBI_RANDOM_VALUE_4    1353323915u
+#define NCBI_RANDOM_VALUE_5    574823513u
+#define NCBI_RANDOM_VALUE_6    4119501261u
+#define NCBI_RANDOM_VALUE_7    2477640938u
+#define NCBI_RANDOM_VALUE_8    2776595395u
+#define NCBI_RANDOM_VALUE_9    270550684u
+"))
+                    (("cksum") "cksum >/dev/null"))))))
     (build-system gnu-build-system)
     (arguments
      `(;; There are two(!) tests for this massive library, and both fail with
@@ -3792,7 +3812,7 @@ comment or quality sections.")
     (inputs
      (list gsl openblas zlib))
     (native-inputs
-     `(("catch" ,catch-framework2-1)
+     `(("catch" ,catch2-1)
        ("perl" ,perl)
        ("shunit2" ,shunit2)
        ("which" ,which)))
@@ -7273,6 +7293,228 @@ sequence.")
     (supported-systems '("i686-linux" "x86_64-linux"))
     (license license:bsd-3)))
 
+(define-public r-gutils
+  (let ((commit "10e36c7b580aacb2d952140a3fdd82418aaddea6")
+        (revision "1"))
+    (package
+      (name "r-gutils")
+      (version (git-version "0.2.0" revision commit))
+      (source (origin
+                (method git-fetch)
+                (uri (git-reference
+                      (url "https://github.com/mskilab/gUtils")
+                      (commit commit)))
+                (file-name (git-file-name name version))
+                (sha256
+                 (base32
+                  "1wq9kd1afzy7ii510r20c4n9fkykj6p15q5c85ws27h1q5w4ghxy"))))
+      (properties `((upstream-name . "gUtils")))
+      (build-system r-build-system)
+      (propagated-inputs
+       (list r-biocgenerics
+             r-data-table
+             r-genomeinfodb
+             r-genomicranges
+             r-iranges
+             r-matrix
+             r-s4vectors
+             r-stringr))
+      (home-page "https://github.com/mskilab/gUtils")
+      (synopsis "Additional capabilities and speed for GenomicRanges operations")
+      (description
+       "This is an R package providing additional capabilities and speed for
+@code{GenomicRanges} operations.")
+      (license license:gpl2))))
+
+(define-public r-bamutils
+  (let ((commit "639dba901f16944fa1b7a8d7048701ba86a2cdb8")
+        (revision "1"))
+    (package
+      (name "r-bamutils")
+      (version (git-version "0.0.0.9000" revision commit))
+      (source (origin
+                (method git-fetch)
+                (uri (git-reference
+                      (url "https://github.com/mskilab/bamutils/")
+                      (commit commit)))
+                (file-name (git-file-name name version))
+                (sha256
+                 (base32
+                  "0qwby2v5rydnipvf1iv1wz9nf02yq98k0xbc4inf9mqc54jwacs0"))))
+      (properties `((upstream-name . "bamUtils")))
+      (build-system r-build-system)
+      (propagated-inputs
+       (list r-abind
+             r-biocgenerics
+             r-data-table
+             r-genomicalignments
+             r-genomicranges
+             r-gutils
+             r-rsamtools
+             r-variantannotation))
+      (home-page "https://github.com/mskilab/bamutils/")
+      (synopsis "Utility functions for manipulating BAMs")
+      (description "This package provides utility functions for manipulating
+BAM files.")
+      (license license:gpl2))))
+
+(define-public r-gtrack
+  (let ((commit "a694fa36cedafca2658da79fc8e5b673535b15e5")
+        (revision "1"))
+    (package
+      (name "r-gtrack")
+      (version (git-version "0.1.0" revision commit))
+      (source (origin
+                (method git-fetch)
+                (uri (git-reference
+                      (url "https://github.com/mskilab/gTrack/")
+                      (commit commit)))
+                (file-name (git-file-name name version))
+                (sha256
+                 (base32
+                  "070qlrbqsbj9max2vx740zigqh0ymvnw2pm1ia5la3wb4dbfwh2b"))))
+      (properties `((upstream-name . "gTrack")))
+      (build-system r-build-system)
+      (propagated-inputs
+       (list r-biocgenerics
+             r-data-table
+             r-genomeinfodb
+             r-genomicranges
+             r-gutils
+             r-iranges
+             r-matrix
+             r-rcolorbrewer
+             r-rcpp
+             r-rcurl
+             r-rtracklayer
+             r-s4vectors))
+      (home-page "https://github.com/mskilab/gTrack/")
+      (synopsis "Plot tracks of complex genomic data across multiple genomic windows")
+      (description
+       "This package provides an object for plotting GRanges, RleList, UCSC
+file formats, and ffTrack objects in multi-track panels.")
+      (license license:gpl2))))
+
+(define-public r-gchain
+  (let ((commit "dc393e8dd0d8efaf36270c04d7112db8553db36a")
+        (revision "1"))
+    (package
+      (name "r-gchain")
+      (version (git-version "0.2.0" revision commit))
+      (source (origin
+                (method git-fetch)
+                (uri (git-reference
+                      (url "https://github.com/mskilab/gChain/")
+                      (commit commit)))
+                (file-name (git-file-name name version))
+                (sha256
+                 (base32
+                  "105wgi5w2fhwq1grsvj6zjigwg0sny3z7zr577q8ki3qffjwdkj0"))))
+      (properties `((upstream-name . "gChain")))
+      (build-system r-build-system)
+      (propagated-inputs
+       (list r-bamutils
+             r-biostrings
+             r-data-table
+             r-genomicalignments
+             r-genomicranges
+             r-gtrack
+             r-gutils
+             r-matrix
+             r-rtracklayer))
+      (home-page "https://github.com/mskilab/gChain/")
+      (synopsis "Additional capabilities and speed for GenomicRanges operations")
+      (description
+       "This R package provides additional capabilities and speed for
+GenomicRanges operations.")
+      (license license:gpl2))))
+
+(define-public r-skitools
+  (let ((commit "22d107d32f063eb891eb5e7fb36996d1c0b0d2bc")
+        (revision "1"))
+    (package
+      (name "r-skitools")
+      (version (git-version "0.0.0.9000" revision commit))
+      (source (origin
+                (method git-fetch)
+                (uri (git-reference
+                      (url "https://github.com/mskilab/skitools/")
+                      (commit commit)))
+                (file-name (git-file-name name version))
+                (sha256
+                 (base32
+                  "1977d9bkdk9l2n6niahfj9vksh9l1ga4g7c3b3x27lj1gc0qgr4z"))))
+      (properties `((upstream-name . "skitools")))
+      (build-system r-build-system)
+      (propagated-inputs
+       (list r-biostrings
+             r-complexheatmap
+             r-data-table
+             r-devtools
+             r-dt
+             r-gchain
+             r-genomeinfodb
+             r-genomicranges
+             r-ggplot2
+             r-gplots
+             r-gutils
+             r-htmlwidgets
+             r-hwriter
+             r-igraph
+             r-iranges
+             r-plotly
+             r-rcolorbrewer
+             r-reshape2
+             r-s4vectors
+             r-stringr
+             r-variantannotation))
+      (home-page "https://github.com/mskilab/skitools/")
+      (synopsis "Various mskilab R utilties")
+      (description
+       "This package provides R miscellaneous utilities for basic data
+manipulation, debugging, visualization, lsf management, and common mskilab
+tasks.")
+      (license license:expat))))
+
+(define-public r-chromunity
+  (let ((commit "09fce8bc12cb84b45a6ea25bf8db6e5b75113d4f")
+        (revision "1"))
+    (package
+      (name "r-chromunity")
+      (version (git-version "0.0.1" revision commit))
+      (source (origin
+                (method git-fetch)
+                (uri (git-reference
+                      (url "https://github.com/mskilab/chromunity")
+                      (commit commit)))
+                (file-name (git-file-name name version))
+                (sha256
+                 (base32
+                  "0lp0h614k8fq6h9gpbylk4chh7q6w4qda8lx03ajrpppxmg7al2d"))))
+      (properties `((upstream-name . "chromunity")))
+      (build-system r-build-system)
+      (propagated-inputs
+       (list r-arrow
+             r-biocgenerics
+             r-data-table
+             r-gchain
+             r-genomicranges
+             r-gutils
+             r-igraph
+             r-magrittr
+             r-mass
+             r-matrix
+             r-pbmcapply
+             r-plyr
+             r-r6
+             r-skitools
+             r-zoo))
+      (home-page "https://github.com/mskilab/chromunity")
+      (synopsis "Discovery of communities in Pore-C concatemers")
+      (description "This is a package for the discovery of communities in
+Pore-C concatemers.")
+      (license license:gpl3))))
+
 (define-public r-presto
   (let ((commit "052085db9c88aa70a28d11cc58ebc807999bf0ad")
         (revision "0"))
@@ -8222,6 +8464,46 @@ BLAST, KEGG, GenBank, MEDLINE and GO.")
     ;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+)
     (license (list license:ruby license:lgpl2.1+ license:gpl2+ ))))
 
+(define-public centrifuge
+  (package
+    (name "centrifuge")
+    (version "1.0.4")
+    (source
+     (origin
+       (method git-fetch)
+       (uri (git-reference
+             (url "https://github.com/DaehwanKimLab/centrifuge.git")
+             (commit (string-append "v" version))))
+       (file-name (git-file-name name version))
+       (sha256
+        (base32
+         "167610gbz1rrh6ir3j7jcmhzg3x5msn7x7a3dpv7wmwdndnnqvg0"))))
+    (build-system gnu-build-system)
+    (arguments
+     (list
+      #:tests? #false                   ; no check target
+      #:make-flags
+      #~(list (string-append "prefix=" #$output))
+      #:phases
+      '(modify-phases %standard-phases
+         (delete 'configure))))
+    (inputs (list python-wrapper))
+    (native-inputs
+     (list pandoc perl                  ;for documentation
+           which))
+    (home-page "https://github.com/DaehwanKimLab/centrifuge/")
+    (synopsis "Classifier for metagenomic sequences")
+    (description "Centrifuge is a microbial classification engine that enables
+rapid, accurate and sensitive labeling of reads and quantification of species
+on desktop computers.  The system uses an indexing scheme based on the
+@dfn{Burrows-Wheeler transform} (BWT) and the @dfn{Ferragina-Manzini} (FM)
+index, optimized specifically for the metagenomic classification problem.
+Centrifuge requires a relatively small index (4.7 GB for all complete
+bacterial and viral genomes plus the human genome) and classifies sequences at
+very high speed, allowing it to process the millions of reads from a typical
+high-throughput DNA sequencing run within a few minutes.")
+    (license license:gpl3+)))
+
 (define-public bio-vcf
   (package
     (name "bio-vcf")
@@ -10936,7 +11218,7 @@ in an easily configurable manner.")
 (define-public pigx-bsseq
   (package
     (name "pigx-bsseq")
-    (version "0.1.7")
+    (version "0.1.8")
     (source (origin
               (method url-fetch)
               (uri (string-append "https://github.com/BIMSBbioinfo/pigx_bsseq/"
@@ -10944,7 +11226,7 @@ in an easily configurable manner.")
                                   "/pigx_bsseq-" version ".tar.gz"))
               (sha256
                (base32
-                "1hfiignq3410dbl6f67vc6zr69abknpcgxixx475dspky2jb5lyn"))))
+                "1s8zgrqxabrawrgkga5rmgb0gyzj7ck47p3rkicjkfv7r2yjy0d7"))))
     (build-system gnu-build-system)
     (arguments
      `(;; TODO: tests currently require 12+GB of RAM.  See
@@ -11391,13 +11673,13 @@ cases include:
 (define-public python-mappy
   (package
    (name "python-mappy")
-   (version "2.18")
+   (version "2.24")
    (source (origin
             (method url-fetch)
             (uri (pypi-uri "mappy" version))
             (sha256
              (base32
-              "1a05p7rkmxa6qhm108na8flzj2v45jab06drk59kzk1ip2sgvzqq"))))
+              "1ycszza87p9qvx8mis9v1hry0ac465x1xcxbsn1k45qlxxrzp8im"))))
    (build-system python-build-system)
    (native-inputs
     (list python-cython))
@@ -11592,47 +11874,43 @@ including:
     (license license:gpl3+)))
 
 (define-public r-dyngen
-  (let ((commit "37fd1798fcbd41093fb3d7775bb2d268e2fc82b6")
-        (revision "1"))
-    (package
-      (name "r-dyngen")
-      (version (git-version "1.0.3" revision commit))
-      (source
-       (origin
-         (method git-fetch)
-         (uri (git-reference
-               (url "https://github.com/dynverse/dyngen")
-               (commit commit)))
-         (file-name (git-file-name name version))
-         (sha256
-          (base32 "05pr6v1b8yji1jnj3fwx0crmg8ay6yy6lp9qjmcyvhkwbmf3kvc7"))))
-      (properties `((upstream-name . "dyngen")))
-      (build-system r-build-system)
-      (propagated-inputs
-       (list r-assertthat
-             r-dplyr
-             r-dynutils
-             r-ggplot2
-             r-ggraph
-             r-ggrepel
-             r-gillespiessa2
-             r-igraph
-             r-lmds
-             r-matrix
-             r-patchwork
-             r-pbapply
-             r-purrr
-             r-rlang
-             r-tibble
-             r-tidygraph
-             r-tidyr
-             r-viridis))
-      (home-page "https://github.com/dynverse/dyngen")
-      (synopsis "Multi-Modal simulator for single-cell omics analyses")
-      (description
-       "This package provides a multi-modal simulation engine for studying
+  (package
+    (name "r-dyngen")
+    (version "1.0.4")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (cran-uri "dyngen" version))
+       (sha256
+        (base32
+         "1qmqy0dyiz30zpf3ii4h2ip6hg2449ghb474sjzrqa1yk9mdpy4i"))))
+    (properties `((upstream-name . "dyngen")))
+    (build-system r-build-system)
+    (propagated-inputs
+     (list r-assertthat
+           r-dplyr
+           r-dynutils
+           r-ggplot2
+           r-ggraph
+           r-ggrepel
+           r-gillespiessa2
+           r-igraph
+           r-lmds
+           r-matrix
+           r-patchwork
+           r-pbapply
+           r-purrr
+           r-rlang
+           r-tibble
+           r-tidygraph
+           r-tidyr
+           r-viridis))
+    (home-page "https://github.com/dynverse/dyngen")
+    (synopsis "Multi-Modal simulator for single-cell omics analyses")
+    (description
+     "This package provides a multi-modal simulation engine for studying
 dynamic cellular processes at single-cell resolution.")
-      (license license:expat))))
+    (license license:expat)))
 
 ;; Needed for r-liana
 (define-public r-omnipathr/devel
@@ -12518,6 +12796,12 @@ fasta subsequences.")
              (substitute* '("requirements.txt"
                             "cooler.egg-info/requires.txt")
                (("cytoolz.*<.*0.11") "cytoolz"))))
+         ;; This version of flake8 just won't work with this version of
+         ;; pytest, because of dependency pinning.
+         (add-after 'unpack 'do-not-use-flake8
+           (lambda _
+             (substitute* "setup.cfg"
+               (("addopts = --flake8") "addopts = "))))
          (add-after 'unpack 'patch-tests
            (lambda _
              (substitute* "tests/test_create.py"
@@ -12526,10 +12810,13 @@ fasta subsequences.")
                                 "access to genome.ucsc.edu\")\n"
                                 "def test_roundtrip")))
              (substitute* "tests/test_util.py"
-              (("def test_fetch_chromsizes")
-                 (string-append "@pytest.mark.skip(reason=\"requires network "
-                                "access to genome.ucsc.edu\")\n"
-                                "def test_fetch_chromsizes")))
+               (("def test_fetch_chromsizes")
+                (string-append "@pytest.mark.skip(reason=\"requires network "
+                               "access to genome.ucsc.edu\")\n"
+                               "def test_fetch_chromsizes"))
+               ;; See https://github.com/open2c/cooler/issues/287
+               (("skipif\\(six.PY2, reason=\"Scipy on Py2 is too old\"")
+                "skip(reason=\"Scipy is too new\""))
              ;; This test depends on ipytree, which contains a lot of minified
              ;; JavaScript.
              (substitute* "tests/test_fileops.py"
@@ -14988,7 +15275,7 @@ international community.")
 (define-public kraken2
   (package
     (name "kraken2")
-    (version "2.1.1")
+    (version "2.1.2")
     (source (origin
               (method git-fetch)
               (uri (git-reference
@@ -14997,7 +15284,7 @@ international community.")
               (file-name (git-file-name name version))
               (sha256
                (base32
-                "0h7a7vygd7y5isbrnc6srwq6xj1rmyd33pm8mmcgfkmlxlg5vkg3"))))
+                "1pl6ml1ldg2hnhy8ps56q0fl1wq3g91qkhinj6pb4yjjhv1rxsjf"))))
     (build-system gnu-build-system)
     (arguments
      `(#:tests? #false                  ; there are none
@@ -15522,6 +15809,8 @@ sequence motif analysis.")
         (base32
          "1023hadgcsgi53kz53ql45207hfizf9sw57z0qij3ay1bx68zbpm"))))
     (build-system python-build-system)
+    (arguments
+     '(#:tests? #false))                ;no tests
     (native-inputs
      (list python-cython python-nose2))
     ;; The package mainly consists of a command-line tool, but also has a
@@ -15543,47 +15832,49 @@ for the analysis and visualization of raw nanopore signal.")
     (license license:mpl2.0)))
 
 (define-public python-pyvcf
-  (package
-    (name "python-pyvcf")
-    (version "0.6.8")
-    ;; Use git, because the PyPI tarballs lack test data.
-    (source
-      (origin
-        (method git-fetch)
-        (uri (git-reference
+  (let ((commit "476169cd457ba0caa6b998b301a4d91e975251d9")
+        (revision "0"))
+    (package
+      (name "python-pyvcf")
+      (version (git-version "0.6.8" revision commit))
+      ;; Use git, because the PyPI tarballs lack test data.
+      (source
+       (origin
+         (method git-fetch)
+         (uri (git-reference
                (url "https://github.com/jamescasbon/PyVCF.git")
                ;; Latest release is not tagged.
-               (commit "bfcedb9bad1a14074ac4526ffdb610611e073810")))
-        (file-name (git-file-name name version))
-        (sha256
+               (commit commit)))
+         (file-name (git-file-name name version))
+         (sha256
           (base32
-            "0c7lsssns3zp8fh2ibllzzra003srg9vbxqzmq6654akbzdb7lrf"))))
-    (build-system python-build-system)
-    (arguments
-     `(#:phases
-        (modify-phases %standard-phases
-          (add-after 'unpack 'patch-sample-script
-            (lambda _
-              ;; Add Python 3 compatibility to this sample script.
-              (substitute* "scripts/vcf_sample_filter.py"
-                (("print (.*)\n" _ arg)
-                 (string-append "print(" arg ")\n")))))
-          (add-after 'install 'remove-installed-tests
-            ;; Do not install test files.
-            (lambda* (#:key inputs outputs #:allow-other-keys)
-              (delete-file-recursively (string-append
+           "0qf9lwj7r2hjjp4bd4vc7nayrhblfm4qcqs4dbd43a6p4bj2jv5p"))))
+      (build-system python-build-system)
+      (arguments
+       `(#:phases
+         (modify-phases %standard-phases
+           (add-after 'unpack 'patch-sample-script
+             (lambda _
+               ;; Add Python 3 compatibility to this sample script.
+               (substitute* "scripts/vcf_sample_filter.py"
+                 (("print (.*)\n" _ arg)
+                  (string-append "print(" arg ")\n")))))
+           (add-after 'install 'remove-installed-tests
+             ;; Do not install test files.
+             (lambda* (#:key inputs outputs #:allow-other-keys)
+               (delete-file-recursively (string-append
                                          (site-packages inputs outputs)
                                          "/vcf/test")))))))
-    (native-inputs
-     ;; Older setuptools is needed for use_2to3.
-     (list python-cython python-setuptools))
-    (propagated-inputs
-     (list python-pysam python-rpy2))
-    (home-page "https://github.com/jamescasbon/PyVCF")
-    (synopsis "Variant Call Format parser for Python")
-    (description "This package provides a @acronym{VCF,Variant Call Format}
+      (native-inputs
+       ;; Older setuptools is needed for use_2to3.
+       (list python-cython python-setuptools-for-tensorflow))
+      (propagated-inputs
+       (list python-pysam python-rpy2))
+      (home-page "https://github.com/jamescasbon/PyVCF")
+      (synopsis "Variant Call Format parser for Python")
+      (description "This package provides a @acronym{VCF,Variant Call Format}
 parser for Python.")
-    (license license:expat)))
+      (license license:expat))))
 
 (define-public nanosv
   (package
@@ -15998,6 +16289,43 @@ BigWig files, as well as efficient region coverage summary over intervals from
 both types of files.")
     (license license:expat)))
 
+(define-public mudskipper
+  (package
+    (name "mudskipper")
+    (version "0.1.0")
+    (source (origin
+              (method url-fetch)
+              (uri (crate-uri "mudskipper" version))
+              (file-name (string-append name "-" version ".tar.gz"))
+              (sha256
+               (base32
+                "1y7fnlz6irmxdmv6bxzm95w4ws4vzldlrh8npvgxmdnrz9pgb1dv"))))
+    (build-system cargo-build-system)
+    (arguments
+     `(#:tests? #false    ;fail because the "mudskipper" crate cannot be found
+       #:cargo-inputs
+       (("rust-bio" ,rust-bio-0.39)
+        ("rust-bio-types" ,rust-bio-types-0.12)
+        ("rust-clap" ,rust-clap-2)
+        ("rust-coitrees" ,rust-coitrees-0.2)
+        ("rust-env-logger" ,rust-env-logger-0.9)
+        ("rust-fnv" ,rust-fnv-1)
+        ("rust-indicatif" ,rust-indicatif-0.16)
+        ("rust-libradicl" ,rust-libradicl-0.4)
+        ("rust-linecount" ,rust-linecount-0.1)
+        ("rust-log" ,rust-log-0.4)
+        ("rust-num-cpus" ,rust-num-cpus-1)
+        ("rust-rust-htslib" ,rust-rust-htslib-0.38))))
+    (native-inputs
+     (list cmake pkg-config))
+    (inputs
+     (list zlib xz))
+    (home-page "https://github.com/OceanGenomics/mudskipper")
+    (synopsis "Convert genomic alignments to transcriptomic BAM/RAD files.")
+    (description "Mudskipper is a tool for projecting genomic alignments to
+transcriptomic coordinates.")
+    (license license:bsd-3)))
+
 (define-public r-ascat
   (package
    (name "r-ascat")
@@ -16157,6 +16485,29 @@ integration, exploration, and analysis of high-dimensional single-cell
 cytometry and imaging data.")
       (license license:expat))))
 
+(define-public r-compgenomrdata
+  (let ((commit "24484cb77631e1123ead6c329b9d62c160e600c6")
+        (revision "1"))
+    (package
+      (name "r-compgenomrdata")
+      (version (git-version "0.1.0" revision commit))
+      (source (origin
+                (method git-fetch)
+                (uri (git-reference
+                      (url "https://github.com/compgenomr/compGenomRData")
+                      (commit commit)))
+                (file-name (git-file-name name version))
+                (sha256
+                 (base32
+                  "06gdvz4m4qlb1ylv10qfx09zv4c95cm7nps4y2s67m946kv8czv8"))))
+      (properties `((upstream-name . "compGenomRData")))
+      (build-system r-build-system)
+      (home-page "https://github.com/compgenomr/compGenomRData")
+      (synopsis "Data for Computational Genomics with R book")
+      (description "This package provides data for the book \"Computational
+Genomics with R\".")
+      (license license:gpl3))))
+
 (define-public r-cytonorm
   (let ((commit "e4b9d343ee65db3c422800f1db3e77c25abde987")
         (revision "1"))
@@ -16196,6 +16547,32 @@ control samples and applying quantile normalization on all markers of
 interest.")
       (license license:gpl2+))))
 
+(define-public r-kbet
+  (let ((commit "f35171dfb04c7951b8a09ac778faf7424c4b6bc0")
+        (revision "1"))
+    (package
+      (name "r-kbet")
+      (version (git-version "0.99.6" revision commit))
+      (source (origin
+                (method git-fetch)
+                (uri (git-reference
+                      (url "https://github.com/theislab/kBET")
+                      (commit commit)))
+                (file-name (git-file-name name version))
+                (sha256
+                 (base32
+                  "1r91prl2kki3zk694vhlmxdlqh0ixlhs8jfcqw6wc7cdsa0nv67k"))))
+      (properties `((upstream-name . "kBET")))
+      (build-system r-build-system)
+      (propagated-inputs (list r-cluster r-fnn r-ggplot2 r-mass r-rcolorbrewer))
+      (native-inputs (list r-knitr))
+      (home-page "https://github.com/theislab/kBET")
+      (synopsis "k-nearest neighbour batch effect test")
+      (description
+       "This tool detects batch effects in high-dimensional data based on chi^2-test.")
+      ;; Any version of the GPL
+      (license license:gpl3+))))
+
 (define-public ccwl
   (package
     (name "ccwl")
@@ -16430,6 +16807,55 @@ module capable of computing base-level alignments for very large sequences.")
     (home-page "https://github.com/ekg/wfmash")
     (license license:expat)))
 
+(define-public flair
+  (package
+    (name "flair")
+    (version "1.6.2")
+    (source
+     (origin
+       (method git-fetch)
+       (uri (git-reference
+             (url "https://github.com/BrooksLabUCSC/flair")
+             (commit (string-append "v" version))))
+       (file-name (git-file-name name version))
+       (sha256
+        (base32
+         "106swb2q7l20ki58fca1hg95q5f79bgp9gjb0clr2243ycrzyxf8"))))
+    (build-system python-build-system)
+    (arguments
+     (list
+      #:tests? #false ;there are none
+      #:phases
+      #~(modify-phases %standard-phases
+          ;; TODO: implement as a feature of python-build-system (PEP-621,
+          ;; PEP-631, PEP-660)
+          (replace 'build
+            (lambda _
+              (setenv "SETUPTOOLS_SCM_PRETEND_VERSION" #$version)
+              ;; ZIP does not support timestamps before 1980.
+              (setenv "SOURCE_DATE_EPOCH" "315532800")
+              (invoke "python" "-m" "build" "--wheel" "--no-isolation" ".")))
+          (replace 'install
+            (lambda _
+              (apply invoke "pip" "--no-cache-dir" "--no-input"
+                     "install" "--no-deps" "--prefix" #$output
+                     (find-files "dist" "\\.whl$")))))))
+    (propagated-inputs
+     (list python-mappy
+           python-ncls
+           python-pybedtools
+           python-pysam
+           python-tqdm))
+    (native-inputs
+     (list python-pypa-build python-setuptools))
+    (home-page "https://flair.readthedocs.io/en/latest/")
+    (synopsis "Full-length alternative isoform analysis of RNA")
+    (description "This package implements FLAIR (Full-Length Alternative
+Isoform analysis of RNA) for the correction, isoform definition, and
+alternative splicing analysis of noisy reads.  FLAIR has primarily been used
+for nanopore cDNA, native RNA, and PacBio sequencing reads.")
+    (license license:bsd-3)))
+
 (define-public go-github-com-biogo-graph
   (package
     (name "go-github-com-biogo-graph")
@@ -16553,3 +16979,104 @@ useful for bioinformatic analysis.")
      (list go-gopkg-in-check-v1))
     (synopsis "HTS SAM module for biogo")
     (description "This package provides tools for handling SAM files.")))
+
+(define-public go-github-com-biogo-hts-tabix
+  (package
+    (inherit go-github-com-biogo-hts-bam)
+    (name "go-github-com-biogo-hts-tabix")
+    (arguments
+     '(#:import-path "github.com/biogo/hts/tabix"
+       #:unpack-path "github.com/biogo/hts"))
+    (propagated-inputs
+     (list go-gopkg-in-check-v1))
+    (synopsis "HTS Tabix module for biogo")
+    (description "This package provides tools for handling Tabix files.")))
+
+(define-public go-github-com-biogo-hts-bgzf
+  (package
+    (inherit go-github-com-biogo-hts-bam)
+    (name "go-github-com-biogo-hts-bgzf")
+    (arguments
+     '(#:import-path "github.com/biogo/hts/bgzf"
+       #:unpack-path "github.com/biogo/hts"))
+    (propagated-inputs
+     (list go-gopkg-in-check-v1))
+    (synopsis "HTS bgzf module for biogo")
+    (description "This package provides tools for handling bgzf files.")))
+
+(define-public go-github-com-biogo-hts-cram
+  (package
+    (inherit go-github-com-biogo-hts-bam)
+    (name "go-github-com-biogo-hts-cram")
+    (arguments
+     '(#:import-path "github.com/biogo/hts/cram"
+       #:unpack-path "github.com/biogo/hts"
+       #:tests? #false)) ;require network access
+    (propagated-inputs
+     (list go-gopkg-in-check-v1
+           go-github.com-ulikunitz-xz
+           go-github-com-kortschak-utter))
+    (synopsis "HTS CRAM module for biogo")
+    (description "This package provides tools for handling CRAM files.")))
+
+(define-public go-github-com-biogo-hts-csi
+  (package
+    (inherit go-github-com-biogo-hts-bam)
+    (name "go-github-com-biogo-hts-csi")
+    (arguments
+     '(#:import-path "github.com/biogo/hts/csi"
+       #:unpack-path "github.com/biogo/hts"))
+    (propagated-inputs
+     (list go-gopkg-in-check-v1))
+    (synopsis "Coordinate sorted indexing for biogo")
+    (description "This package implements CSIv1 and CSIv2 coordinate sorted
+indexing.")))
+
+(define-public go-github-com-biogo-hts-fai
+  (package
+    (inherit go-github-com-biogo-hts-bam)
+    (name "go-github-com-biogo-hts-fai")
+    (arguments
+     '(#:import-path "github.com/biogo/hts/fai"
+       #:unpack-path "github.com/biogo/hts"))
+    (propagated-inputs
+     (list go-gopkg-in-check-v1))
+    (synopsis "Fasta sequence file index handling for biogo")
+    (description "This package implements FAI fasta sequence file index
+handling.")))
+
+(define-public go-github-com-biogo-biogo
+  (package
+    (name "go-github-com-biogo-biogo")
+    (version "1.0.4")
+    (source (origin
+              (method git-fetch)
+              (uri (git-reference
+                    (url "https://github.com/biogo/biogo")
+                    (commit (string-append "v" version))))
+              (file-name (git-file-name name version))
+              (sha256
+               (base32
+                "0ali1mqf3dc26myv6l7wmqfr8i25461rbq3qdad8s0wi29622199"))))
+    (build-system go-build-system)
+    (arguments
+     '(#:import-path "github.com/biogo/biogo"))
+    (propagated-inputs
+     (list go-gopkg-in-check-v1
+           go-github-com-biogo-store-interval
+           go-github-com-biogo-store-kdtree
+           go-github-com-biogo-store-llrb
+           go-github-com-biogo-store-step
+           go-github-com-biogo-hts-bam
+           go-github-com-biogo-graph))
+    (home-page "https://github.com/biogo/biogo")
+    (synopsis "Bioinformatics library for Go")
+    (description
+     "BĂ­ogo is a bioinformatics library for the Go language.")
+    (license license:bsd-3)))
+
+;;;
+;;; Avoid adding new packages to the end of this file. To reduce the chances
+;;; of a merge conflict, place them above by existing packages with similar
+;;; functionality or similar names.
+;;;