gnu: ncbi-vdb: Update to 2.5.4.
[jackhill/guix/guix.git] / gnu / packages / bioinformatics.scm
index fdb4256..0ba0910 100644 (file)
@@ -44,6 +44,7 @@
   #:use-module (gnu packages linux)
   #:use-module (gnu packages machine-learning)
   #:use-module (gnu packages maths)
+  #:use-module (gnu packages mpi)
   #:use-module (gnu packages ncurses)
   #:use-module (gnu packages perl)
   #:use-module (gnu packages pkg-config)
@@ -524,6 +525,89 @@ confidence to have in an alignment.")
                    license:lgpl2.0+
                    license:asl2.0))))
 
+(define-public bless
+  (package
+    (name "bless")
+    (version "1p02")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append "mirror://sourceforge/bless-ec/bless.v"
+                                  version ".tgz"))
+              (sha256
+               (base32
+                "0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
+              (modules '((guix build utils)))
+              (snippet
+               `(begin
+                  ;; Remove bundled boost, pigz, zlib, and .git directory
+                  ;; FIXME: also remove bundled sources for google-sparsehash,
+                  ;; murmurhash3, kmc once packaged.
+                  (delete-file-recursively "boost")
+                  (delete-file-recursively "pigz")
+                  (delete-file-recursively "zlib")
+                  (delete-file-recursively ".git")
+                  #t))))
+    (build-system gnu-build-system)
+    (arguments
+     '(#:tests? #f ;no "check" target
+       #:make-flags
+       (list (string-append "ZLIB="
+                            (assoc-ref %build-inputs "zlib")
+                            "/lib/libz.a")
+             (string-append "LDFLAGS="
+                            (string-join '("-lboost_filesystem"
+                                           "-lboost_system"
+                                           "-lboost_iostreams"
+                                           "-lz"
+                                           "-fopenmp"
+                                           "-std=c++11"))))
+       #:phases
+       (modify-phases %standard-phases
+         (add-after 'unpack 'do-not-build-bundled-pigz
+          (lambda* (#:key inputs outputs #:allow-other-keys)
+            (substitute* "Makefile"
+              (("cd pigz/pigz-2.3.3; make") ""))
+            #t))
+         (add-after 'unpack 'patch-paths-to-executables
+          (lambda* (#:key inputs outputs #:allow-other-keys)
+            (substitute* "parse_args.cpp"
+              (("kmc_binary = .*")
+               (string-append "kmc_binary = \""
+                              (assoc-ref outputs "out")
+                              "/bin/kmc\";"))
+              (("pigz_binary = .*")
+               (string-append "pigz_binary = \""
+                              (assoc-ref inputs "pigz")
+                              "/bin/pigz\";")))
+            #t))
+         (replace 'install
+          (lambda* (#:key outputs #:allow-other-keys)
+            (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
+              (for-each (lambda (file)
+                          (install-file file bin))
+                        '("bless" "kmc/bin/kmc"))
+              #t)))
+         (delete 'configure))))
+    (native-inputs
+     `(("perl" ,perl)))
+    (inputs
+     `(("openmpi" ,openmpi)
+       ("boost" ,boost)
+       ("pigz" ,pigz)
+       ("zlib" ,zlib)))
+    (supported-systems '("x86_64-linux"))
+    (home-page "http://sourceforge.net/p/bless-ec/wiki/Home/")
+    (synopsis "Bloom-filter-based error correction tool for NGS reads")
+    (description
+     "@dfn{Bloom-filter-based error correction solution for high-throughput
+sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
+correction tool for genomic reads produced by @dfn{Next-generation
+sequencing} (NGS).  BLESS produces accurate correction results with much less
+memory compared with previous solutions and is also able to tolerate a higher
+false-positive rate.  BLESS can extend reads like DNA assemblers to correct
+errors at the end of reads.")
+    (license license:gpl3+)))
+
 (define-public bowtie
   (package
     (name "bowtie")
@@ -1090,6 +1174,51 @@ analysis (from RNA-Seq), transcription factor binding quantification in
 ChIP-Seq, and analysis of metagenomic data.")
     (license license:artistic2.0)))
 
+(define-public express-beta-diversity
+  (package
+   (name "express-beta-diversity")
+   (version "1.0.7")
+   (source (origin
+             (method url-fetch)
+             (uri
+              (string-append
+               "https://github.com/dparks1134/ExpressBetaDiversity/archive/v"
+               version ".tar.gz"))
+             (file-name (string-append name "-" version ".tar.gz"))
+             (sha256
+              (base32
+               "1djvdlmqvjf6h0zq7w36y8cl5cli6rgj86x65znl48agnwmzxfxr"))))
+   (build-system gnu-build-system)
+   (arguments
+    `(#:phases
+      (modify-phases %standard-phases
+        (delete 'configure)
+        (add-before 'build 'enter-source (lambda _ (chdir "source") #t))
+        (replace 'check
+                 (lambda _ (zero? (system* "../bin/ExpressBetaDiversity"
+                                           "-u"))))
+        (add-after 'check 'exit-source (lambda _ (chdir "..") #t))
+        (replace 'install
+                 (lambda* (#:key outputs #:allow-other-keys)
+                   (let ((bin (string-append (assoc-ref outputs "out")
+                                             "/bin")))
+                     (mkdir-p bin)
+                     (copy-file "scripts/convertToEBD.py"
+                                (string-append bin "/convertToEBD.py"))
+                     (copy-file "bin/ExpressBetaDiversity"
+                                (string-append bin "/ExpressBetaDiversity"))
+                     #t))))))
+   (inputs
+    `(("python" ,python-2)))
+   (home-page "http://kiwi.cs.dal.ca/Software/ExpressBetaDiversity")
+   (synopsis "Taxon- and phylogenetic-based beta diversity measures")
+   (description
+    "Express Beta Diversity (EBD) calculates ecological beta diversity
+(dissimilarity) measures between biological communities.  EBD implements a
+variety of diversity measures including those that make use of phylogenetic
+similarity of community members.")
+   (license license:gpl3+)))
+
 (define-public fasttree
   (package
    (name "fasttree")
@@ -1329,6 +1458,9 @@ estimates transcript expression.")
      `(("perl" ,perl)
        ("python" ,python)
        ("zlib" ,zlib)))
+    ;; Non-portable SSE instructions are used so building fails on platforms
+    ;; other than x86_64.
+    (supported-systems '("x86_64-linux"))
     (home-page "http://ccb.jhu.edu/software/hisat/index.shtml")
     (synopsis "Hierarchical indexing for spliced alignment of transcripts")
     (description
@@ -1383,9 +1515,11 @@ HMMs).")
                 "1i85ppf2j2lj12m0x690qq5nn17xxk23pbbx2c83r8ayb5wngzwv"))))
     (build-system python-build-system)
     (arguments `(#:python ,python-2)) ; only Python 2 is supported
-    (inputs
-     `(("python-numpy" ,python2-numpy)
-       ("python-setuptools" ,python2-setuptools)))
+    ;; Numpy needs to be propagated when htseq is used as a Python library.
+    (propagated-inputs
+     `(("python-numpy" ,python2-numpy)))
+    (native-inputs
+     `(("python-setuptools" ,python2-setuptools)))
     (home-page "http://www-huber.embl.de/users/anders/HTSeq/")
     (synopsis "Analysing high-throughput sequencing data with Python")
     (description
@@ -2184,6 +2318,7 @@ viewer.")
       (inputs
        `(("perl" ,perl)
          ("zlib" ,zlib)))
+      (supported-systems '("x86_64-linux"))
       (home-page "https://code.google.com/p/mosaik-aligner/")
       (synopsis "Map nucleotide sequence reads to reference genomes")
       (description
@@ -2200,7 +2335,7 @@ Roche 454, Ion Torrent and Pacific BioSciences SMRT.")
 (define-public ngs-sdk
   (package
     (name "ngs-sdk")
-    (version "1.1.1")
+    (version "1.2.2")
     (source
      (origin
        (method url-fetch)
@@ -2210,7 +2345,7 @@ Roche 454, Ion Torrent and Pacific BioSciences SMRT.")
        (file-name (string-append name "-" version ".tar.gz"))
        (sha256
         (base32
-         "1x58gpm574n0xmk2a98gmikbgycq78ia0bvnb42k5ck34fmd5v8y"))))
+         "0rvq61zfw2h9jcz6a33b9xrl20r7s5a9rldvv6rs2qy42khpmf5j"))))
     (build-system gnu-build-system)
     (arguments
      `(#:parallel-build? #f ; not supported
@@ -2254,26 +2389,8 @@ simultaneously.")
                           (srfi srfi-26))
                          ,@(package-arguments ngs-sdk))
            ((#:phases phases)
-            `(alist-cons-after
-              'enter-dir 'fix-java-symlink-installation
-              (lambda _
-                ;; Only replace the version suffix, not the version number in
-                ;; the directory name.  Reported here:
-                ;; https://github.com/ncbi/ngs/pull/4
-                (substitute* "Makefile.java"
-                  (((string-append "\\$\\(subst "
-                                   "(\\$\\(VERSION[^\\)]*\\)),"
-                                   "(\\$\\([^\\)]+\\)),"
-                                   "(\\$\\([^\\)]+\\)|\\$\\@)"
-                                   "\\)")
-                    _ pattern replacement target)
-                   (string-append "$(patsubst "
-                                  "%" pattern ","
-                                  "%" replacement ","
-                                  target ")"))))
-              (alist-replace
-               'enter-dir (lambda _ (chdir "ngs-java") #t)
-               ,phases))))))
+            `(modify-phases ,phases
+               (replace 'enter-dir (lambda _ (chdir "ngs-java") #t)))))))
     (inputs
      `(("jdk" ,icedtea6 "jdk")
        ("ngs-sdk" ,ngs-sdk)))
@@ -2282,7 +2399,7 @@ simultaneously.")
 (define-public ncbi-vdb
   (package
     (name "ncbi-vdb")
-    (version "2.4.5-5")
+    (version "2.5.4")
     (source
      (origin
        (method url-fetch)
@@ -2292,7 +2409,7 @@ simultaneously.")
        (file-name (string-append name "-" version ".tar.gz"))
        (sha256
         (base32
-         "1cj8nk6if8sqagv20vx36v566fdvhcaadf0x1ycnbgql6chbs6vy"))))
+         "1rcnyc4xkdfcjww2i0s0qrbapys0cxbjcx2sy3qkpslf9f400fgj"))))
     (build-system gnu-build-system)
     (arguments
      `(#:parallel-build? #f ; not supported
@@ -2302,20 +2419,6 @@ simultaneously.")
         'configure
         (lambda* (#:key inputs outputs #:allow-other-keys)
           (let ((out (assoc-ref outputs "out")))
-            ;; Only replace the version suffix, not the version number in the
-            ;; directory name; fixed in commit 4dbba5c6a809 (no release yet).
-            (substitute* "setup/konfigure.perl"
-              (((string-append "\\$\\(subst "
-                               "(\\$\\(VERSION[^\\)]*\\)),"
-                               "(\\$\\([^\\)]+\\)),"
-                               "(\\$\\([^\\)]+\\)|\\$\\@)"
-                               "\\)")
-                _ pattern replacement target)
-               (string-append "$(patsubst "
-                              "%" pattern ","
-                              "%" replacement ","
-                              target ")")))
-
             ;; Override include path for libmagic
             (substitute* "setup/package.prl"
               (("name => 'magic', Include => '/usr/include'")