gnu: Add circos.
[jackhill/guix/guix.git] / gnu / packages / bioinformatics.scm
index b0ec57b..33dda7e 100644 (file)
@@ -15,6 +15,7 @@
 ;;; Copyright © 2019 Maxim Cournoyer <maxim.cournoyer@gmail.com>
 ;;; Copyright © 2019 Brian Leung <bkleung89@gmail.com>
 ;;; Copyright © 2019 Brett Gilio <brettg@gnu.org>
+;;; Copyright © 2020 Björn Höfling <bjoern.hoefling@bjoernhoefling.de>
 ;;;
 ;;; This file is part of GNU Guix.
 ;;;
@@ -41,6 +42,7 @@
   #:use-module (guix build-system ant)
   #:use-module (guix build-system gnu)
   #:use-module (guix build-system cmake)
+  #:use-module (guix build-system go)
   #:use-module (guix build-system haskell)
   #:use-module (guix build-system meson)
   #:use-module (guix build-system ocaml)
   #:use-module (gnu packages documentation)
   #:use-module (gnu packages databases)
   #:use-module (gnu packages datastructures)
+  #:use-module (gnu packages dlang)
   #:use-module (gnu packages file)
   #:use-module (gnu packages flex)
   #:use-module (gnu packages gawk)
   #:use-module (gnu packages gcc)
   #:use-module (gnu packages gd)
-  #:use-module (gnu packages gtk)
+  #:use-module (gnu packages golang)
   #:use-module (gnu packages glib)
   #:use-module (gnu packages graph)
   #:use-module (gnu packages groff)
+  #:use-module (gnu packages gtk)
   #:use-module (gnu packages guile)
   #:use-module (gnu packages guile-xyz)
   #:use-module (gnu packages haskell-check)
@@ -86,7 +90,6 @@
   #:use-module (gnu packages java)
   #:use-module (gnu packages java-compression)
   #:use-module (gnu packages jemalloc)
-  #:use-module (gnu packages dlang)
   #:use-module (gnu packages linux)
   #:use-module (gnu packages lisp-xyz)
   #:use-module (gnu packages logging)
@@ -784,13 +787,13 @@ intended to behave exactly the same as the original BWK awk.")
 (define-public python-pybedtools
   (package
     (name "python-pybedtools")
-    (version "0.8.0")
+    (version "0.8.1")
     (source (origin
               (method url-fetch)
               (uri (pypi-uri "pybedtools" version))
               (sha256
                (base32
-                "1xl454ijvd4dzfvqgfahad49b49j7qy710fq9xh1rvk42z6x5ssf"))))
+                "14w5i40gi25clrr7h4wa2pcpnyipya8hrqi7nq77553zc5wf0df0"))))
     (build-system python-build-system)
     (arguments
      `(#:modules ((ice-9 ftw)
@@ -817,6 +820,10 @@ intended to behave exactly the same as the original BWK awk.")
                ;; (see: https://github.com/daler/pybedtools/issues/192).
                (("def test_getting_example_beds")
                 "def _do_not_test_getting_example_beds"))
+             ;; This issue still occurs on python2
+             (substitute* "pybedtools/test/test_issues.py"
+               (("def test_issue_303")
+                "def _test_issue_303"))
              #t))
          ;; TODO: Remove phase after it's part of PYTHON-BUILD-SYSTEM.
          ;; build system.
@@ -885,7 +892,12 @@ Python.")
     (license license:gpl2+)))
 
 (define-public python2-pybedtools
-  (package-with-python2 python-pybedtools))
+  (let ((pybedtools (package-with-python2 python-pybedtools)))
+    (package
+      (inherit pybedtools)
+      (native-inputs
+       `(("python2-pathlib" ,python2-pathlib)
+         ,@(package-native-inputs pybedtools))))))
 
 (define-public python-biom-format
   (package
@@ -2745,7 +2757,7 @@ quantitative phenotypes.")
 (define-public edirect
   (package
     (name "edirect")
-    (version "12.1.20190829")
+    (version "13.3.20200128")
     (source (origin
               (method url-fetch)
               (uri (string-append "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect"
@@ -2753,7 +2765,15 @@ quantitative phenotypes.")
                                   "/edirect-" version ".tar.gz"))
               (sha256
                (base32
-                "1xb330z28dgp7slrvp8r7rgncsasv9lpcpqim571yg728dq7xdik"))))
+                "093zp7klv81ph0y8mm8d78a9hnpfxbv2kdym70gzdf3vz176rw33"))
+              (modules '((guix build utils)))
+              (snippet
+               '(begin (delete-file "Mozilla-CA.tar.gz")
+                       (substitute* "rchive.go"
+                         ;; This go library does not have any license.
+                         (("github.com/fiam/gounidecode/unidecode")
+                          "golang.org/rainycape/unidecode"))
+                       #t))))
     (build-system perl-build-system)
     (arguments
      `(#:phases
@@ -2761,19 +2781,48 @@ quantitative phenotypes.")
          (delete 'configure)
          (delete 'build)
          (delete 'check)                ; simple check after install
+         (add-after 'unpack 'patch-programs
+           (lambda* (#:key inputs #:allow-other-keys)
+             ;; Ignore errors about missing xtract.Linux and rchive.Linux.
+              (substitute* "pm-refresh"
+                (("cat \\\"\\$target")
+                 "grep ^[[:digit:]] \"$target"))
+              #t))
          (replace 'install
-           (lambda* (#:key outputs #:allow-other-keys)
-             (install-file "edirect.pl"
-                           (string-append (assoc-ref outputs "out") "/bin"))
+           (lambda* (#:key inputs outputs #:allow-other-keys)
+             (let ((bin (string-append (assoc-ref outputs "out") "/bin"))
+                   (edirect-go (assoc-ref inputs "edirect-go-programs")))
+               (for-each
+                 (lambda (file)
+                   (install-file file bin))
+                 '("archive-pubmed" "asp-cp" "asp-ls" "download-ncbi-data"
+                   "download-pubmed" "edirect.pl" "efetch" "epost" "esearch"
+                   "fetch-pubmed" "ftp-cp" "ftp-ls" "has-asp" "index-pubmed"
+                   "pm-prepare" "pm-refresh" "pm-stash" "pm-collect"
+                   "pm-index" "pm-invert" "pm-merge" "pm-promote"))
+               (symlink (string-append edirect-go "/bin/xtract.Linux")
+                        (string-append bin "/xtract"))
+               (symlink (string-append edirect-go "/bin/rchive.Linux")
+                        (string-append bin "/rchive")))
              #t))
          (add-after 'install 'wrap-program
            (lambda* (#:key outputs #:allow-other-keys)
-             ;; Make sure 'edirect.pl' finds all perl inputs at runtime.
-             (let* ((out (assoc-ref outputs "out"))
+              ;; Make sure everything can run in a pure environment.
+              (let ((out (assoc-ref outputs "out"))
                     (path (getenv "PERL5LIB")))
-               (wrap-program (string-append out "/bin/edirect.pl")
-                 `("PERL5LIB" ":" prefix (,path))))
-             #t))
+                (for-each
+                  (lambda (file)
+                    (wrap-program file
+                      `("PERL5LIB" ":" prefix (,path)))
+                    (wrap-program file
+                      `("PATH" ":" prefix (,(string-append out "/bin")
+                                           ,(dirname (which "sed"))
+                                           ,(dirname (which "gzip"))
+                                           ,(dirname (which "grep"))
+                                           ,(dirname (which "perl"))
+                                           ,(dirname (which "uname"))))))
+                  (find-files out ".")))
+              #t))
          (add-after 'wrap-program 'check
            (lambda* (#:key outputs #:allow-other-keys)
              (invoke (string-append (assoc-ref outputs "out")
@@ -2781,7 +2830,8 @@ quantitative phenotypes.")
                      "-filter" "-help")
              #t)))))
     (inputs
-     `(("perl-html-parser" ,perl-html-parser)
+     `(("edirect-go-programs" ,edirect-go-programs)
+       ("perl-html-parser" ,perl-html-parser)
        ("perl-encode-locale" ,perl-encode-locale)
        ("perl-file-listing" ,perl-file-listing)
        ("perl-html-tagset" ,perl-html-tagset)
@@ -2811,8 +2861,59 @@ EDirect also provides an argument-driven function that simplifies the
 extraction of data from document summaries or other results that are returned
 in structured XML format.  This can eliminate the need for writing custom
 software to answer ad hoc questions.")
+    (native-search-paths
+     ;; Ideally this should be set for LWP somewhere.
+     (list (search-path-specification
+            (variable "PERL_LWP_SSL_CA_FILE")
+            (file-type 'regular)
+            (separator #f)
+            (files '("/etc/ssl/certs/ca-certificates.crt")))))
     (license license:public-domain)))
 
+(define-public edirect-go-programs
+  (package
+    (inherit edirect)
+    (name "edirect-go-programs")
+    (build-system go-build-system)
+    (arguments
+     `(#:install-source? #f
+       #:tests? #f      ; No tests.
+       #:import-path "ncbi.nlm.nih.gov/entrez/edirect"
+       #:phases
+       (modify-phases %standard-phases
+         (replace 'build
+           (lambda* (#:key import-path #:allow-other-keys)
+             (with-directory-excursion (string-append "src/" import-path)
+               (invoke "go" "build" "-v" "-x" "j2x.go")
+               (invoke "go" "build" "-v" "-x" "t2x.go")
+               (invoke "go" "build" "-v" "-x" "-o"
+                       "xtract.Linux" "xtract.go" "common.go")
+               (invoke "go" "build" "-v" "-x" "-o"
+                       "rchive.Linux" "rchive.go" "common.go")
+               (invoke "go" "build" "-v" "-x" "-o" "symbols.Linux" "s2p.go"))))
+         (replace 'install
+           (lambda* (#:key outputs import-path #:allow-other-keys)
+             (let ((dest    (string-append (assoc-ref outputs "out") "/bin"))
+                   (source  (string-append "src/" import-path "/")))
+               (for-each (lambda (file)
+                           (format #t "installing ~a~%" file)
+                           (install-file (string-append source file) dest))
+                         '("j2x" "t2x" "symbols.Linux" "xtract.Linux" "rchive.Linux"))
+               #t))))))
+    (native-inputs '())
+    (propagated-inputs '())
+    (inputs
+     `(("go-github-com-fatih-color" ,go-github-com-fatih-color)
+       ("go-github-com-fogleman-gg" ,go-github-com-fogleman-gg)
+       ("go-github-com-gedex-inflector" ,go-github-com-gedex-inflector)
+       ("go-github-com-golang-freetype" ,go-github-com-golang-freetype)
+       ("go-github-com-klauspost-cpuid" ,go-github-com-klauspost-cpuid)
+       ("go-github-com-pbnjay-memory" ,go-github-com-pbnjay-memory)
+       ("go-github-com-surgebase-porter2" ,go-github-com-surgebase-porter2)
+       ("go-golang-org-rainycape-unidecode" ,go-golang-org-rainycape-unidecode)
+       ("go-golang-org-x-image" ,go-golang-org-x-image)
+       ("go-golang-org-x-text" ,go-golang-org-x-text)))))
+
 (define-public exonerate
   (package
     (name "exonerate")
@@ -6309,16 +6410,16 @@ application of SortMeRNA is filtering rRNA from metatranscriptomic data.")
 (define-public star
   (package
     (name "star")
-    (version "2.7.1a")
+    (version "2.7.3a")
     (source (origin
               (method git-fetch)
               (uri (git-reference
                     (url "https://github.com/alexdobin/STAR.git")
                     (commit version)))
-              (file-name (string-append name "-" version "-checkout"))
+              (file-name (git-file-name name version))
               (sha256
                (base32
-                "0n6g4s4hgw7qygs1z97j7a2dgz8gfaa4cv5pjvvvmarvk0x07hyg"))
+                "1hgiqw5qhs0pc1xazzihcfd92na02xyq2kb469z04y1v51kpvvjq"))
               (modules '((guix build utils)))
               (snippet
                '(begin
@@ -6536,31 +6637,56 @@ Cuffdiff or Ballgown programs.")
 (define-public taxtastic
   (package
     (name "taxtastic")
-    (version "0.8.5")
+    (version "0.8.11")
     (source (origin
-              (method url-fetch)
-              (uri (pypi-uri "taxtastic" version))
+              ;; The Pypi version does not include tests.
+              (method git-fetch)
+              (uri (git-reference
+                    (url "https://github.com/fhcrc/taxtastic.git")
+                    (commit (string-append "v" version))))
+              (file-name (git-file-name name version))
               (sha256
                (base32
-                "03pysw79lsrvz4lwzis88j15067ffqbi4cid5pqhrlxmd6bh8rrk"))))
+                "1sv8mkg64jn7zdwf1jj71c16686yrwxk0apb1l8sjszy9p166g0p"))))
     (build-system python-build-system)
     (arguments
-     `(#:python ,python-2
-       #:phases
+     `(#:phases
        (modify-phases %standard-phases
+         (add-after 'unpack 'prepare-directory
+           (lambda _
+             ;; The git checkout must be writable for tests.
+             (for-each make-file-writable (find-files "."))
+             ;; This test fails, but the error is not caught by the test
+             ;; framework, so the tests fail...
+             (substitute* "tests/test_taxit.py"
+               (("self.cmd_fails\\(''\\)")
+                "self.cmd_fails('nothing')"))
+             ;; This version file is expected to be created with git describe.
+             (mkdir-p "taxtastic/data")
+             (with-output-to-file "taxtastic/data/ver"
+               (lambda () (display ,version)))
+             #t))
+         (add-after 'unpack 'python37-compatibility
+           (lambda _
+             (substitute* "taxtastic/utils.py"
+               (("import csv") "import csv, errno")
+               (("os.errno") "errno"))
+             #t))
          (replace 'check
-           (lambda _ (invoke "python" "-m" "unittest" "discover" "-v") #t)))))
+           ;; Note, this fails to run with "-v" as it tries to write to a
+           ;; closed output stream.
+           (lambda _ (invoke "python" "-m" "unittest") #t)))))
     (propagated-inputs
-     `(("python-sqlalchemy" ,python2-sqlalchemy)
-       ("python-decorator" ,python2-decorator)
-       ("python-biopython" ,python2-biopython)
-       ("python-pandas" ,python2-pandas)
-       ("python-psycopg2" ,python2-psycopg2)
-       ("python-fastalite" ,python2-fastalite)
-       ("python-pyyaml" ,python2-pyyaml)
-       ("python-six" ,python2-six)
-       ("python-jinja2" ,python2-jinja2)
-       ("python-dendropy" ,python2-dendropy)))
+     `(("python-sqlalchemy" ,python-sqlalchemy)
+       ("python-decorator" ,python-decorator)
+       ("python-biopython" ,python-biopython)
+       ("python-pandas" ,python-pandas)
+       ("python-psycopg2" ,python-psycopg2)
+       ("python-fastalite" ,python-fastalite)
+       ("python-pyyaml" ,python-pyyaml)
+       ("python-six" ,python-six)
+       ("python-jinja2" ,python-jinja2)
+       ("python-dendropy" ,python-dendropy)))
     (home-page "https://github.com/fhcrc/taxtastic")
     (synopsis "Tools for taxonomic naming and annotation")
     (description
@@ -8253,7 +8379,7 @@ throughput genetic sequencing data sets using regression methods.")
 (define-public r-qtl
  (package
   (name "r-qtl")
-  (version "1.45-11")
+  (version "1.46-2")
   (source
    (origin
     (method url-fetch)
@@ -8261,7 +8387,7 @@ throughput genetic sequencing data sets using regression methods.")
                         version ".tar.gz"))
     (sha256
      (base32
-      "1d6qgj602fm6zia3djl4hmca0ri4v57ffp3g93p2yc3cabx2hq90"))))
+      "0rbwcnvyy96gq1dsgpxx03pv423qya26h6ws5y0blj3blfdmj83a"))))
   (build-system r-build-system)
   (home-page "https://rqtl.org/")
   (synopsis "R package for analyzing QTL experiments in genetics")
@@ -9091,6 +9217,8 @@ number detection tools.")
        ("r-rtracklayer" ,r-rtracklayer)
        ("r-s4vectors" ,r-s4vectors)
        ("r-zlibbioc" ,r-zlibbioc)))
+    (native-inputs
+     `(("r-knitr" ,r-knitr))) ; for vignettes
     (inputs
      `(("zlib" ,zlib)))
     (home-page "https://github.com/al2na/methylKit")
@@ -9360,6 +9488,8 @@ analysis.")
        ("r-ggplot2" ,r-ggplot2)
        ("r-lattice" ,r-lattice)
        ("r-limma" ,r-limma)))
+    (native-inputs
+     `(("r-knitr" ,r-knitr))) ; for vignettes
     (home-page "https://bioconductor.org/packages/release/bioc/html/vsn.html")
     (synopsis "Variance stabilization and calibration for microarray data")
     (description
@@ -12111,8 +12241,8 @@ reading, writing, and exporting phylogenetic trees.")
     (version "1.005")
     (source (origin
               (method url-fetch)
-              (uri (string-append "http://search.maven.org/remotecontent?"
-                                  "filepath=org/biojava/thirdparty/forester/"
+              (uri (string-append "https://repo1.maven.org/maven2/"
+                                  "org/biojava/thirdparty/forester/"
                                   version "/forester-" version "-sources.jar"))
               (file-name (string-append name "-" version ".jar"))
               (sha256
@@ -12188,7 +12318,8 @@ reading, writing, and exporting phylogenetic trees.")
            (method url-fetch)
            (uri (string-append "https://raw.githubusercontent.com/cmzmasek/forester/"
                                "29e04321615da6b35c1e15c60e52caf3f21d8e6a/"
-                               "forester/java/classes/resources/synth_look_and_feel_1.xml"))
+                               "forester/java/classes/resources/"
+                               "synth_look_and_feel_1.xml"))
            (file-name (string-append name "-synth-look-and-feel-" version ".xml"))
            (sha256
             (base32
@@ -12772,7 +12903,7 @@ methylation and segmentation.")
 (define-public pigx-scrnaseq
   (package
     (name "pigx-scrnaseq")
-    (version "1.1.3")
+    (version "1.1.4")
     (source (origin
               (method url-fetch)
               (uri (string-append "https://github.com/BIMSBbioinfo/pigx_scrnaseq/"
@@ -12780,7 +12911,7 @@ methylation and segmentation.")
                                   "/pigx_scrnaseq-" version ".tar.gz"))
               (sha256
                (base32
-                "0ga2jr4968qzwml6aycky4603q64lny3y7lzw6dmafch5pydl1qi"))))
+                "1d5l3gywypi67yz9advxq5xkgfhr4733gj0bwnngm723i3hdf5w9"))))
     (build-system gnu-build-system)
     (inputs
      `(("coreutils" ,coreutils)
@@ -13480,14 +13611,14 @@ bgzipped text file that contains a pair of genomic coordinates per line.")
 (define-public python-pyfaidx
   (package
     (name "python-pyfaidx")
-    (version "0.5.7")
+    (version "0.5.8")
     (source
      (origin
        (method url-fetch)
        (uri (pypi-uri "pyfaidx" version))
        (sha256
         (base32
-         "02jvdx3ksy6w5gd29i1d0g0zsabbz7c14qg482ff7pza6sdl0b2i"))))
+         "038xi3a6zvrxbyyfpp64ka8pcjgsdq4fgw9cl5lpxbvmm1bzzw2q"))))
     (build-system python-build-system)
     (propagated-inputs
      `(("python-six" ,python-six)))
@@ -13498,6 +13629,9 @@ bgzipped text file that contains a pair of genomic coordinates per line.")
 fasta subsequences.")
     (license license:bsd-3)))
 
+(define-public python2-pyfaidx
+  (package-with-python2 python-pyfaidx))
+
 (define-public python-cooler
   (package
     (name "python-cooler")
@@ -13954,7 +14088,7 @@ datasets.")
 (define-public ngless
   (package
     (name "ngless")
-    (version "1.0.1")
+    (version "1.1.0")
     (source
      (origin
        (method git-fetch)
@@ -13964,7 +14098,7 @@ datasets.")
        (file-name (git-file-name name version))
        (sha256
         (base32
-         "06ygv8q2zjqsnrid1302yrlhhvb8ik48nq6n0higk3i1mdc8r0dg"))))
+         "1wim8wpqyff080dfcazynrmjwqas38m24m0v350w245mmhrapdma"))))
     (build-system haskell-build-system)
     (arguments
      `(#:haddock? #f ; The haddock phase fails with: NGLess/CmdArgs.hs:20:1:
@@ -14424,6 +14558,8 @@ repeated areas between contigs.")
         (base32
          "0fgygyzqgrq32dv6a00biq1p1cwi6kbl5iqblxq1kklj6b2mzmhs"))))
     (build-system python-build-system)
+    (native-inputs
+     `(("python-joblib" ,python-joblib)))
     (propagated-inputs
      `(("python-click" ,python-click)
        ("python-cython" ,python-cython)