gnu: ncbi-vdb: Update to 2.5.4.
[jackhill/guix/guix.git] / gnu / packages / bioinformatics.scm
index 4c4eedb..0ba0910 100644 (file)
@@ -1,5 +1,8 @@
 ;;; GNU Guix --- Functional package management for GNU
 ;;; Copyright © 2014, 2015 Ricardo Wurmus <rekado@elephly.net>
+;;; Copyright © 2015 Ben Woodcroft <donttrustben@gmail.com>
+;;; Copyright © 2015 Pjotr Prins <pjotr.guix@thebird.nl>
+;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
 ;;;
 ;;; This file is part of GNU Guix.
 ;;;
   #:use-module (guix git-download)
   #:use-module (guix build-system gnu)
   #:use-module (guix build-system cmake)
+  #:use-module (guix build-system perl)
   #:use-module (guix build-system python)
+  #:use-module (guix build-system r)
+  #:use-module (guix build-system ruby)
   #:use-module (guix build-system trivial)
   #:use-module (gnu packages)
+  #:use-module (gnu packages autotools)
+  #:use-module (gnu packages algebra)
   #:use-module (gnu packages base)
+  #:use-module (gnu packages boost)
   #:use-module (gnu packages compression)
+  #:use-module (gnu packages cpio)
+  #:use-module (gnu packages file)
   #:use-module (gnu packages java)
+  #:use-module (gnu packages linux)
+  #:use-module (gnu packages machine-learning)
   #:use-module (gnu packages maths)
+  #:use-module (gnu packages mpi)
   #:use-module (gnu packages ncurses)
   #:use-module (gnu packages perl)
   #:use-module (gnu packages pkg-config)
   #:use-module (gnu packages popt)
+  #:use-module (gnu packages protobuf)
   #:use-module (gnu packages python)
+  #:use-module (gnu packages ruby)
   #:use-module (gnu packages statistics)
-  #:use-module (gnu packages swig)
   #:use-module (gnu packages tbb)
   #:use-module (gnu packages textutils)
+  #:use-module (gnu packages tls)
   #:use-module (gnu packages vim)
+  #:use-module (gnu packages web)
   #:use-module (gnu packages xml)
-  #:use-module (gnu packages zip))
+  #:use-module (gnu packages zip)
+  #:use-module (srfi srfi-1))
+
+(define-public aragorn
+  (package
+    (name "aragorn")
+    (version "1.2.36")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append
+                    "http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
+                    version ".tgz"))
+              (sha256
+               (base32
+                "1dg7jlz1qpqy88igjxd6ncs11ccsirb36qv1z01a0np4i4jh61mb"))))
+    (build-system gnu-build-system)
+    (arguments
+     `(#:tests? #f ; there are no tests
+       #:phases
+       (modify-phases %standard-phases
+         (delete 'configure)
+         (replace 'build
+                  (lambda _
+                    (zero? (system* "gcc"
+                                    "-O3"
+                                    "-ffast-math"
+                                    "-finline-functions"
+                                    "-o"
+                                    "aragorn"
+                                    (string-append "aragorn" ,version ".c")))))
+         (replace 'install
+                  (lambda* (#:key outputs #:allow-other-keys)
+                    (let* ((out (assoc-ref outputs "out"))
+                           (bin (string-append out "/bin"))
+                           (man (string-append out "/share/man/man1")))
+                      (mkdir-p bin)
+                      (copy-file "aragorn"
+                                 (string-append bin "/aragorn"))
+                      (mkdir-p man)
+                      (copy-file "aragorn.1"
+                                 (string-append man "/aragorn.1")))
+                    #t)))))
+    (home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
+    (synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
+    (description
+     "Aragorn identifies transfer RNA, mitochondrial RNA and
+transfer-messenger RNA from nucleotide sequences, based on homology to known
+tRNA consensus sequences and RNA structure.  It also outputs the secondary
+structure of the predicted RNA.")
+    (license license:gpl2)))
+
+(define-public bamtools
+  (package
+    (name "bamtools")
+    (version "2.3.0")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append
+                    "https://github.com/pezmaster31/bamtools/archive/v"
+                    version ".tar.gz"))
+              (file-name (string-append name "-" version ".tar.gz"))
+              (sha256
+               (base32
+                "1brry29bw2xr2l9pqn240rkqwayg85b8qq78zk2zs6nlspk4d018"))))
+    (build-system cmake-build-system)
+    (arguments
+     `(#:tests? #f ;no "check" target
+       #:phases
+       (modify-phases %standard-phases
+         (add-before
+          'configure 'set-ldflags
+          (lambda* (#:key outputs #:allow-other-keys)
+            (setenv "LDFLAGS"
+                    (string-append
+                     "-Wl,-rpath="
+                     (assoc-ref outputs "out") "/lib/bamtools")))))))
+    (inputs `(("zlib" ,zlib)))
+    (home-page "https://github.com/pezmaster31/bamtools")
+    (synopsis "C++ API and command-line toolkit for working with BAM data")
+    (description
+     "BamTools provides both a C++ API and a command-line toolkit for handling
+BAM files.")
+    (license license:expat)))
 
 (define-public bedops
   (package
     (name "bedops")
-    (version "2.4.5")
+    (version "2.4.14")
     (source (origin
               (method url-fetch)
               (uri (string-append "https://github.com/bedops/bedops/archive/v"
               (file-name (string-append name "-" version ".tar.gz"))
               (sha256
                (base32
-                "0wmg6j0icimlrnsidaxrzf3hfgjvlkkcwvpdg7n4gg7hdv2m9ni5"))))
+                "1kqbac547wyqma81cyky9n7mkgikjpsfd3nnmcm6hpqwanqgh10v"))))
     (build-system gnu-build-system)
     (arguments
      '(#:tests? #f
@@ -105,7 +204,7 @@ computational cluster.")
 (define-public bedtools
   (package
     (name "bedtools")
-    (version "2.22.0")
+    (version "2.24.0")
     (source (origin
               (method url-fetch)
               (uri (string-append "https://github.com/arq5x/bedtools2/archive/v"
@@ -113,7 +212,8 @@ computational cluster.")
               (file-name (string-append name "-" version ".tar.gz"))
               (sha256
                (base32
-                "16aq0w3dmbd0853j32xk9jin4vb6v6fgakfyvrsmsjizzbn3fpfl"))))
+                "0lnxrjvs3nnmb4bmskag1wg3h2hd80przz5q3xd0bvs7vyxrvpbl"))
+              (patches (list (search-patch "bedtools-32bit-compilation.patch")))))
     (build-system gnu-build-system)
     (native-inputs `(("python" ,python-2)))
     (inputs `(("samtools" ,samtools)
@@ -136,9 +236,8 @@ computational cluster.")
           'install
           (lambda* (#:key outputs #:allow-other-keys)
             (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
-              (mkdir-p bin)
               (for-each (lambda (file)
-                          (copy-file file (string-append bin (basename file))))
+                          (install-file file bin))
                         (find-files "bin" ".*"))))
           %standard-phases)))))
     (home-page "https://github.com/arq5x/bedtools2")
@@ -185,6 +284,67 @@ pybedtools extends BEDTools by offering feature-level manipulations from with
 Python.")
     (license license:gpl2+)))
 
+(define-public bioperl-minimal
+  (let* ((inputs `(("perl-module-build" ,perl-module-build)
+                   ("perl-data-stag" ,perl-data-stag)
+                   ("perl-libwww" ,perl-libwww)
+                   ("perl-uri" ,perl-uri)))
+         (transitive-inputs
+          (map (compose package-name cadr)
+               (delete-duplicates
+                (concatenate
+                 (map (compose package-transitive-target-inputs cadr) inputs))))))
+    (package
+      (name "bioperl-minimal")
+      (version "1.6.924")
+      (source
+       (origin
+         (method url-fetch)
+         (uri (string-append "mirror://cpan/authors/id/C/CJ/CJFIELDS/BioPerl-"
+                             version ".tar.gz"))
+         (sha256
+          (base32
+           "1l3npcvvvwjlhkna9dndpfv1hklhrgva013kw96m0n1wpd37ask1"))))
+      (build-system perl-build-system)
+      (arguments
+       `(#:phases
+         (modify-phases %standard-phases
+           (add-after
+            'install 'wrap-programs
+            (lambda* (#:key outputs #:allow-other-keys)
+              ;; Make sure all executables in "bin" find the required Perl
+              ;; modules at runtime.  As the PERL5LIB variable contains also
+              ;; the paths of native inputs, we pick the transitive target
+              ;; inputs from %build-inputs.
+              (let* ((out  (assoc-ref outputs "out"))
+                     (bin  (string-append out "/bin/"))
+                     (path (string-join
+                            (cons (string-append out "/lib/perl5/site_perl")
+                                  (map (lambda (name)
+                                         (assoc-ref %build-inputs name))
+                                       ',transitive-inputs))
+                            ":")))
+                (for-each (lambda (file)
+                            (wrap-program file
+                              `("PERL5LIB" ":" prefix (,path))))
+                          (find-files bin "\\.pl$"))
+                #t))))))
+      (inputs inputs)
+      (native-inputs
+       `(("perl-test-most" ,perl-test-most)))
+      (home-page "http://search.cpan.org/dist/BioPerl")
+      (synopsis "Bioinformatics toolkit")
+      (description
+       "BioPerl is the product of a community effort to produce Perl code which
+is useful in biology.  Examples include Sequence objects, Alignment objects
+and database searching objects.  These objects not only do what they are
+advertised to do in the documentation, but they also interact - Alignment
+objects are made from the Sequence objects, Sequence objects have access to
+Annotation and SeqFeature objects and databases, Blast objects can be
+converted to Alignment objects, and so on.  This means that the objects
+provide a coordinated and extensible framework to do computational biology.")
+      (license (package-license perl)))))
+
 (define-public python-biopython
   (package
     (name "python-biopython")
@@ -218,6 +378,236 @@ into separate processes; and more.")
     (inputs
      `(("python2-numpy" ,python2-numpy)))))
 
+(define-public blast+
+  (package
+    (name "blast+")
+    (version "2.2.31")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append
+                    "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
+                    version "/ncbi-blast-" version "+-src.tar.gz"))
+              (sha256
+               (base32
+                "19gq6as4k1jrgsd26158ads6h7v4jca3h4r5dzg1y0m6ya50x5ph"))
+              (modules '((guix build utils)))
+              (snippet
+               '(begin
+                  ;; Remove bundled bzip2 and zlib
+                  (delete-file-recursively "c++/src/util/compress/bzip2")
+                  (delete-file-recursively "c++/src/util/compress/zlib")
+                  (substitute* "c++/src/util/compress/Makefile.in"
+                    (("bzip2 zlib api") "api"))
+                  ;; Remove useless msbuild directory
+                  (delete-file-recursively
+                   "c++/src/build-system/project_tree_builder/msbuild")
+                  #t))))
+    (build-system gnu-build-system)
+    (arguments
+     `(;; There are three(!) tests for this massive library, and all fail with
+       ;; "unparsable timing stats".
+       ;; ERR [127] --  [util/regexp] test_pcre.sh     (unparsable timing stats)
+       ;; ERR [127] --  [serial/datatool] datatool.sh     (unparsable timing stats)
+       ;; ERR [127] --  [serial/datatool] datatool_xml.sh     (unparsable timing stats)
+       #:tests? #f
+       #:out-of-source? #t
+       #:parallel-build? #f ; not supported
+       #:phases
+       (modify-phases %standard-phases
+         (add-before
+          'configure 'set-HOME
+          ;; $HOME needs to be set at some point during the configure phase
+          (lambda _ (setenv "HOME" "/tmp") #t))
+         (add-after
+          'unpack 'enter-dir
+          (lambda _ (chdir "c++") #t))
+         (add-after
+          'enter-dir 'fix-build-system
+          (lambda _
+            (define (which* cmd)
+              (cond ((string=? cmd "date")
+                     ;; make call to "date" deterministic
+                     "date -d @0")
+                    ((which cmd)
+                     => identity)
+                    (else
+                     (format (current-error-port)
+                             "WARNING: Unable to find absolute path for ~s~%"
+                             cmd)
+                     #f)))
+
+            ;; Rewrite hardcoded paths to various tools
+            (substitute* (append '("src/build-system/configure.ac"
+                                   "src/build-system/configure"
+                                   "scripts/common/impl/if_diff.sh"
+                                   "scripts/common/impl/run_with_lock.sh"
+                                   "src/build-system/Makefile.configurables.real"
+                                   "src/build-system/Makefile.in.top"
+                                   "src/build-system/Makefile.meta.gmake=no"
+                                   "src/build-system/Makefile.meta.in"
+                                   "src/build-system/Makefile.meta_l"
+                                   "src/build-system/Makefile.meta_p"
+                                   "src/build-system/Makefile.meta_r"
+                                   "src/build-system/Makefile.mk.in"
+                                   "src/build-system/Makefile.requirements"
+                                   "src/build-system/Makefile.rules_with_autodep.in")
+                                 (find-files "scripts/common/check" "\\.sh$"))
+              (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
+               (or (which* cmd) all)))
+
+            (substitute* (find-files "src/build-system" "^config.*")
+              (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
+              (("^PATH=.*") ""))
+
+            ;; rewrite "/var/tmp" in check script
+            (substitute* "scripts/common/check/check_make_unix.sh"
+              (("/var/tmp") "/tmp"))
+
+            ;; do not reset PATH
+            (substitute* (find-files "scripts/common/impl/" "\\.sh$")
+              (("^ *PATH=.*") "")
+              (("action=/bin/") "action=")
+              (("export PATH") ":"))
+            #t))
+         (replace
+          'configure
+          (lambda* (#:key inputs outputs #:allow-other-keys)
+            (let ((out     (assoc-ref outputs "out"))
+                  (lib     (string-append (assoc-ref outputs "lib") "/lib"))
+                  (include (string-append (assoc-ref outputs "include")
+                                          "/include/ncbi-tools++")))
+              ;; The 'configure' script doesn't recognize things like
+              ;; '--enable-fast-install'.
+              (zero? (system* "./configure.orig"
+                              (string-append "--with-build-root=" (getcwd) "/build")
+                              (string-append "--prefix=" out)
+                              (string-append "--libdir=" lib)
+                              (string-append "--includedir=" include)
+                              (string-append "--with-bz2="
+                                             (assoc-ref inputs "bzip2"))
+                              (string-append "--with-z="
+                                             (assoc-ref inputs "zlib"))
+                              ;; Each library is built twice by default, once
+                              ;; with "-static" in its name, and again
+                              ;; without.
+                              "--without-static"
+                              "--with-dll"))))))))
+    (outputs '("out"       ;  19 MB
+               "lib"       ; 203 MB
+               "include")) ;  32 MB
+    (inputs
+     `(("bzip2" ,bzip2)
+       ("zlib" ,zlib)))
+    (native-inputs
+     `(("cpio" ,cpio)))
+    (home-page "http://blast.ncbi.nlm.nih.gov")
+    (synopsis "Basic local alignment search tool")
+    (description
+     "BLAST is a popular method of performing a DNA or protein sequence
+similarity search, using heuristics to produce results quickly.  It also
+calculates an “expect value” that estimates how many matches would have
+occurred at a given score by chance, which can aid a user in judging how much
+confidence to have in an alignment.")
+    ;; Most of the sources are in the public domain, with the following
+    ;; exceptions:
+    ;;   * Expat:
+    ;;     * ./c++/include/util/bitset/
+    ;;     * ./c++/src/html/ncbi_menu*.js
+    ;;   * Boost license:
+    ;;     * ./c++/include/util/impl/floating_point_comparison.hpp
+    ;;   * LGPL 2+:
+    ;;     * ./c++/include/dbapi/driver/odbc/unix_odbc/
+    ;;   * ASL 2.0:
+    ;;     * ./c++/src/corelib/teamcity_*
+    (license (list license:public-domain
+                   license:expat
+                   license:boost1.0
+                   license:lgpl2.0+
+                   license:asl2.0))))
+
+(define-public bless
+  (package
+    (name "bless")
+    (version "1p02")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append "mirror://sourceforge/bless-ec/bless.v"
+                                  version ".tgz"))
+              (sha256
+               (base32
+                "0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
+              (modules '((guix build utils)))
+              (snippet
+               `(begin
+                  ;; Remove bundled boost, pigz, zlib, and .git directory
+                  ;; FIXME: also remove bundled sources for google-sparsehash,
+                  ;; murmurhash3, kmc once packaged.
+                  (delete-file-recursively "boost")
+                  (delete-file-recursively "pigz")
+                  (delete-file-recursively "zlib")
+                  (delete-file-recursively ".git")
+                  #t))))
+    (build-system gnu-build-system)
+    (arguments
+     '(#:tests? #f ;no "check" target
+       #:make-flags
+       (list (string-append "ZLIB="
+                            (assoc-ref %build-inputs "zlib")
+                            "/lib/libz.a")
+             (string-append "LDFLAGS="
+                            (string-join '("-lboost_filesystem"
+                                           "-lboost_system"
+                                           "-lboost_iostreams"
+                                           "-lz"
+                                           "-fopenmp"
+                                           "-std=c++11"))))
+       #:phases
+       (modify-phases %standard-phases
+         (add-after 'unpack 'do-not-build-bundled-pigz
+          (lambda* (#:key inputs outputs #:allow-other-keys)
+            (substitute* "Makefile"
+              (("cd pigz/pigz-2.3.3; make") ""))
+            #t))
+         (add-after 'unpack 'patch-paths-to-executables
+          (lambda* (#:key inputs outputs #:allow-other-keys)
+            (substitute* "parse_args.cpp"
+              (("kmc_binary = .*")
+               (string-append "kmc_binary = \""
+                              (assoc-ref outputs "out")
+                              "/bin/kmc\";"))
+              (("pigz_binary = .*")
+               (string-append "pigz_binary = \""
+                              (assoc-ref inputs "pigz")
+                              "/bin/pigz\";")))
+            #t))
+         (replace 'install
+          (lambda* (#:key outputs #:allow-other-keys)
+            (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
+              (for-each (lambda (file)
+                          (install-file file bin))
+                        '("bless" "kmc/bin/kmc"))
+              #t)))
+         (delete 'configure))))
+    (native-inputs
+     `(("perl" ,perl)))
+    (inputs
+     `(("openmpi" ,openmpi)
+       ("boost" ,boost)
+       ("pigz" ,pigz)
+       ("zlib" ,zlib)))
+    (supported-systems '("x86_64-linux"))
+    (home-page "http://sourceforge.net/p/bless-ec/wiki/Home/")
+    (synopsis "Bloom-filter-based error correction tool for NGS reads")
+    (description
+     "@dfn{Bloom-filter-based error correction solution for high-throughput
+sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
+correction tool for genomic reads produced by @dfn{Next-generation
+sequencing} (NGS).  BLESS produces accurate correction results with much less
+memory compared with previous solutions and is also able to tolerate a higher
+false-positive rate.  BLESS can extend reads like DNA assemblers to correct
+errors at the end of reads.")
+    (license license:gpl3+)))
+
 (define-public bowtie
   (package
     (name "bowtie")
@@ -254,9 +644,8 @@ into separate processes; and more.")
          'install
          (lambda* (#:key outputs #:allow-other-keys)
            (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
-             (mkdir-p bin)
              (for-each (lambda (file)
-                         (copy-file file (string-append bin file)))
+                         (install-file file bin))
                        (find-files "." "bowtie2.*"))))
          (alist-replace
           'check
@@ -306,12 +695,15 @@ gapped, local, and paired-end alignment modes.")
             (mkdir-p bin)
             (mkdir-p doc)
             (mkdir-p man)
-            (copy-file "bwa" (string-append bin "/bwa"))
-            (copy-file "README.md" (string-append doc "/README.md"))
-            (copy-file "bwa.1" (string-append man "/bwa.1"))))
+            (install-file "bwa" bin)
+            (install-file "README.md" doc)
+            (install-file "bwa.1" man)))
         ;; no "configure" script
         (alist-delete 'configure %standard-phases))))
     (inputs `(("zlib" ,zlib)))
+    ;; Non-portable SSE instructions are used so building fails on platforms
+    ;; other than x86_64.
+    (supported-systems '("x86_64-linux"))
     (home-page "http://bio-bwa.sourceforge.net/")
     (synopsis "Burrows-Wheeler sequence aligner")
     (description
@@ -370,6 +762,7 @@ multiple sequence alignments.")
               (uri (string-append
                     "https://github.com/YeoLab/clipper/archive/"
                     version ".tar.gz"))
+              (file-name (string-append name "-" version ".tar.gz"))
               (sha256
                (base32
                 "1q7jpimsqln7ic44i8v2rx2haj5wvik8hc1s2syd31zcn0xk1iyq"))
@@ -399,6 +792,76 @@ multiple sequence alignments.")
      "CLIPper is a tool to define peaks in CLIP-seq datasets.")
     (license license:gpl2)))
 
+(define-public couger
+  (package
+    (name "couger")
+    (version "1.8.2")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append
+                    "http://couger.oit.duke.edu/static/assets/COUGER"
+                    version ".zip"))
+              (sha256
+               (base32
+                "04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq"))))
+    (build-system gnu-build-system)
+    (arguments
+     `(#:tests? #f
+       #:phases
+       (modify-phases %standard-phases
+         (delete 'configure)
+         (delete 'build)
+         (replace
+          'install
+          (lambda* (#:key outputs #:allow-other-keys)
+            (let ((out (assoc-ref outputs "out")))
+              (copy-recursively "src" (string-append out "/src"))
+              (mkdir (string-append out "/bin"))
+              ;; Add "src" directory to module lookup path.
+              (substitute* "couger"
+                (("from argparse")
+                 (string-append "import sys\nsys.path.append(\""
+                                out "\")\nfrom argparse")))
+              (copy-file "couger" (string-append out "/bin/couger")))
+            #t))
+         (add-after
+          'install 'wrap-program
+          (lambda* (#:key inputs outputs #:allow-other-keys)
+            ;; Make sure 'couger' runs with the correct PYTHONPATH.
+            (let* ((out (assoc-ref outputs "out"))
+                   (path (getenv "PYTHONPATH")))
+              (wrap-program (string-append out "/bin/couger")
+                `("PYTHONPATH" ":" prefix (,path))))
+            #t)))))
+    (inputs
+     `(("python" ,python-2)
+       ("python2-pillow" ,python2-pillow)
+       ("python2-numpy" ,python2-numpy)
+       ("python2-scipy" ,python2-scipy)
+       ("python2-matplotlib" ,python2-matplotlib)))
+    (propagated-inputs
+     `(("r" ,r)
+       ("libsvm" ,libsvm)
+       ("randomjungle" ,randomjungle)))
+    (native-inputs
+     `(("unzip" ,unzip)))
+    (home-page "http://couger.oit.duke.edu")
+    (synopsis "Identify co-factors in sets of genomic regions")
+    (description
+     "COUGER can be applied to any two sets of genomic regions bound by
+paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify
+putative co-factors that provide specificity to each TF.  The framework
+determines the genomic targets uniquely-bound by each TF, and identifies a
+small set of co-factors that best explain the in vivo binding differences
+between the two TFs.
+
+COUGER uses classification algorithms (support vector machines and random
+forests) with features that reflect the DNA binding specificities of putative
+co-factors.  The features are generated either from high-throughput TF-DNA
+binding data (from protein binding microarray experiments), or from large
+collections of DNA motifs.")
+    (license license:gpl3+)))
+
 (define-public clustal-omega
   (package
     (name "clustal-omega")
@@ -426,15 +889,16 @@ time.")
 (define-public crossmap
   (package
     (name "crossmap")
-    (version "0.1.6")
+    (version "0.2.1")
     (source (origin
               (method url-fetch)
               (uri (string-append "mirror://sourceforge/crossmap/CrossMap-"
                                   version ".tar.gz"))
               (sha256
                (base32
-                "163hi5gjgij6cndxlvbkp5jjwr0k4wbm9im6d2210278q7k9kpnp"))
-              ;; patch has been sent upstream already
+                "07y179f63d7qnzdvkqcziwk9bs3k4zhp81q392fp1hwszjdvy22f"))
+              ;; This patch has been sent upstream already and is available
+              ;; for download from Sourceforge, but it has not been merged.
               (patches (list
                         (search-patch "crossmap-allow-system-pysam.patch")))
               (modules '((guix build utils)))
@@ -505,6 +969,315 @@ file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
 other types of unwanted sequence from high-throughput sequencing reads.")
     (license license:expat)))
 
+(define-public deeptools
+  (package
+    (name "deeptools")
+    (version "1.5.11")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append
+                    "https://github.com/fidelram/deepTools/archive/"
+                    version ".tar.gz"))
+              (file-name (string-append name "-" version ".tar.gz"))
+              (sha256
+               (base32
+                "1kaagygcbvjs9sxd9cqmskd02wcfp9imvb735r087w7hwqpvz6fs"))))
+    (build-system python-build-system)
+    (arguments
+     `(#:python ,python-2))
+    (propagated-inputs
+     `(("python-scipy" ,python2-scipy)
+       ("python-numpy" ,python2-numpy)
+       ("python-matplotlib" ,python2-matplotlib)
+       ("python-bx-python" ,python2-bx-python)
+       ("python-pysam" ,python2-pysam)))
+    (native-inputs
+     `(("python-mock" ,python2-mock) ;for tests
+       ("python-pytz" ,python2-pytz) ;for tests
+       ("python-setuptools" ,python2-setuptools)))
+    (home-page "https://github.com/fidelram/deepTools")
+    (synopsis "Tools for normalizing and visualizing deep-sequencing data")
+    (description
+     "DeepTools addresses the challenge of handling the large amounts of data
+that are now routinely generated from DNA sequencing centers.  To do so,
+deepTools contains useful modules to process the mapped reads data to create
+coverage files in standard bedGraph and bigWig file formats.  By doing so,
+deepTools allows the creation of normalized coverage files or the comparison
+between two files (for example, treatment and control).  Finally, using such
+normalized and standardized files, multiple visualizations can be created to
+identify enrichments with functional annotations of the genome.")
+    (license license:gpl3+)))
+
+(define-public diamond
+  (package
+    (name "diamond")
+    (version "0.7.9")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append
+                    "https://github.com/bbuchfink/diamond/archive/v"
+                    version ".tar.gz"))
+              (file-name (string-append name "-" version ".tar.gz"))
+              (sha256
+               (base32
+                "0hfkcfv9f76h5brbyw9fyvmc0l9cmbsxrcdqk0fa9xv82zj47p15"))
+              (snippet '(begin
+                          (delete-file "bin/diamond")
+                          #t))))
+    (build-system gnu-build-system)
+    (arguments
+     '(#:tests? #f  ;no "check" target
+       #:phases
+       (modify-phases %standard-phases
+         (add-after 'unpack 'enter-source-dir
+                    (lambda _
+                      (chdir "src")
+                      #t))
+         (delete 'configure)
+         (replace 'install
+                  (lambda* (#:key outputs #:allow-other-keys)
+                    (let ((bin (string-append (assoc-ref outputs "out")
+                                              "/bin")))
+                      (mkdir-p bin)
+                      (copy-file "../bin/diamond"
+                                 (string-append bin "/diamond"))
+                      #t))))))
+    (native-inputs
+     `(("bc" ,bc)))
+    (inputs
+     `(("boost" ,boost)
+       ("zlib" ,zlib)))
+    (home-page "https://github.com/bbuchfink/diamond")
+    (synopsis "Accelerated BLAST compatible local sequence aligner")
+    (description
+     "DIAMOND is a BLAST-compatible local aligner for mapping protein and
+translated DNA query sequences against a protein reference database (BLASTP
+and BLASTX alignment mode).  The speedup over BLAST is up to 20,000 on short
+reads at a typical sensitivity of 90-99% relative to BLAST depending on the
+data and settings.")
+    ;; diamond fails to build on other platforms
+    ;; https://github.com/bbuchfink/diamond/issues/18
+    (supported-systems '("x86_64-linux"))
+    (license (license:non-copyleft "file://src/COPYING"
+                                   "See src/COPYING in the distribution."))))
+
+(define-public edirect
+  (package
+    (name "edirect")
+    (version "2.50")
+    (source (origin
+              (method url-fetch)
+              ;; Note: older versions are not retained.
+              (uri "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/edirect.zip")
+              (sha256
+               (base32
+                "08afhz2ph66h8h381hl1mqyxkdi5nbvzsyj9gfw3jfbdijnpi4qj"))))
+    (build-system perl-build-system)
+    (arguments
+     `(#:tests? #f ;no "check" target
+       #:phases
+       (modify-phases %standard-phases
+         (delete 'configure)
+         (delete 'build)
+         (replace 'install
+                  (lambda* (#:key outputs #:allow-other-keys)
+                    (let ((target (string-append (assoc-ref outputs "out")
+                                                 "/bin")))
+                      (mkdir-p target)
+                      (copy-file "edirect.pl"
+                                 (string-append target "/edirect.pl"))
+                      #t)))
+         (add-after
+          'install 'wrap-program
+          (lambda* (#:key inputs outputs #:allow-other-keys)
+            ;; Make sure 'edirect.pl' finds all perl inputs at runtime.
+            (let* ((out (assoc-ref outputs "out"))
+                   (path (getenv "PERL5LIB")))
+              (wrap-program (string-append out "/bin/edirect.pl")
+                `("PERL5LIB" ":" prefix (,path)))))))))
+    (inputs
+     `(("perl-html-parser" ,perl-html-parser)
+       ("perl-encode-locale" ,perl-encode-locale)
+       ("perl-file-listing" ,perl-file-listing)
+       ("perl-html-tagset" ,perl-html-tagset)
+       ("perl-html-tree" ,perl-html-tree)
+       ("perl-http-cookies" ,perl-http-cookies)
+       ("perl-http-date" ,perl-http-date)
+       ("perl-http-message" ,perl-http-message)
+       ("perl-http-negotiate" ,perl-http-negotiate)
+       ("perl-lwp-mediatypes" ,perl-lwp-mediatypes)
+       ("perl-lwp-protocol-https" ,perl-lwp-protocol-https)
+       ("perl-net-http" ,perl-net-http)
+       ("perl-uri" ,perl-uri)
+       ("perl-www-robotrules" ,perl-www-robotrules)
+       ("perl" ,perl)))
+    (native-inputs
+     `(("unzip" ,unzip)))
+    (home-page "http://www.ncbi.nlm.nih.gov/books/NBK179288")
+    (synopsis "Tools for accessing the NCBI's set of databases")
+    (description
+     "Entrez Direct (EDirect) is a method for accessing the National Center
+for Biotechnology Information's (NCBI) set of interconnected
+databases (publication, sequence, structure, gene, variation, expression,
+etc.) from a terminal.  Functions take search terms from command-line
+arguments.  Individual operations are combined to build multi-step queries.
+Record retrieval and formatting normally complete the process.
+
+EDirect also provides an argument-driven function that simplifies the
+extraction of data from document summaries or other results that are returned
+in structured XML format.  This can eliminate the need for writing custom
+software to answer ad hoc questions.")
+    (license license:public-domain)))
+
+(define-public express
+  (package
+    (name "express")
+    (version "1.5.1")
+    (source (origin
+              (method url-fetch)
+              (uri
+               (string-append
+                "http://bio.math.berkeley.edu/eXpress/downloads/express-"
+                version "/express-" version "-src.tgz"))
+              (sha256
+               (base32
+                "03rczxd0gjp2l1jxcmjfmf5j94j77zqyxa6x063zsc585nj40n0c"))))
+    (build-system cmake-build-system)
+    (arguments
+     `(#:tests? #f ;no "check" target
+       #:phases
+       (alist-cons-after
+        'unpack 'use-shared-boost-libs-and-set-bamtools-paths
+        (lambda* (#:key inputs #:allow-other-keys)
+          (substitute* "CMakeLists.txt"
+            (("set\\(Boost_USE_STATIC_LIBS ON\\)")
+             "set(Boost_USE_STATIC_LIBS OFF)")
+            (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
+             (string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
+          (substitute* "src/CMakeLists.txt"
+            (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
+             (string-append (assoc-ref inputs "bamtools") "/lib/bamtools")))
+          #t)
+        %standard-phases)))
+    (inputs
+     `(("boost" ,boost)
+       ("bamtools" ,bamtools)
+       ("protobuf" ,protobuf)
+       ("zlib" ,zlib)))
+    (home-page "http://bio.math.berkeley.edu/eXpress")
+    (synopsis "Streaming quantification for high-throughput genomic sequencing")
+    (description
+     "eXpress is a streaming tool for quantifying the abundances of a set of
+target sequences from sampled subsequences.  Example applications include
+transcript-level RNA-Seq quantification, allele-specific/haplotype expression
+analysis (from RNA-Seq), transcription factor binding quantification in
+ChIP-Seq, and analysis of metagenomic data.")
+    (license license:artistic2.0)))
+
+(define-public express-beta-diversity
+  (package
+   (name "express-beta-diversity")
+   (version "1.0.7")
+   (source (origin
+             (method url-fetch)
+             (uri
+              (string-append
+               "https://github.com/dparks1134/ExpressBetaDiversity/archive/v"
+               version ".tar.gz"))
+             (file-name (string-append name "-" version ".tar.gz"))
+             (sha256
+              (base32
+               "1djvdlmqvjf6h0zq7w36y8cl5cli6rgj86x65znl48agnwmzxfxr"))))
+   (build-system gnu-build-system)
+   (arguments
+    `(#:phases
+      (modify-phases %standard-phases
+        (delete 'configure)
+        (add-before 'build 'enter-source (lambda _ (chdir "source") #t))
+        (replace 'check
+                 (lambda _ (zero? (system* "../bin/ExpressBetaDiversity"
+                                           "-u"))))
+        (add-after 'check 'exit-source (lambda _ (chdir "..") #t))
+        (replace 'install
+                 (lambda* (#:key outputs #:allow-other-keys)
+                   (let ((bin (string-append (assoc-ref outputs "out")
+                                             "/bin")))
+                     (mkdir-p bin)
+                     (copy-file "scripts/convertToEBD.py"
+                                (string-append bin "/convertToEBD.py"))
+                     (copy-file "bin/ExpressBetaDiversity"
+                                (string-append bin "/ExpressBetaDiversity"))
+                     #t))))))
+   (inputs
+    `(("python" ,python-2)))
+   (home-page "http://kiwi.cs.dal.ca/Software/ExpressBetaDiversity")
+   (synopsis "Taxon- and phylogenetic-based beta diversity measures")
+   (description
+    "Express Beta Diversity (EBD) calculates ecological beta diversity
+(dissimilarity) measures between biological communities.  EBD implements a
+variety of diversity measures including those that make use of phylogenetic
+similarity of community members.")
+   (license license:gpl3+)))
+
+(define-public fasttree
+  (package
+   (name "fasttree")
+   (version "2.1.8")
+   (source (origin
+             (method url-fetch)
+             (uri (string-append
+                   "http://www.microbesonline.org/fasttree/FastTree-"
+                   version ".c"))
+             (sha256
+              (base32
+               "0dzqc9vr9iiiw21y159xfjl2z90vw0y7r4x6456pcaxiy5hd2wmi"))))
+   (build-system gnu-build-system)
+   (arguments
+    `(#:tests? #f ; no "check" target
+      #:phases
+      (modify-phases %standard-phases
+        (delete 'unpack)
+        (delete 'configure)
+        (replace 'build
+                 (lambda* (#:key source #:allow-other-keys)
+                   (and (zero? (system* "gcc"
+                                        "-O3"
+                                        "-finline-functions"
+                                        "-funroll-loops"
+                                        "-Wall"
+                                        "-o"
+                                        "FastTree"
+                                        source
+                                        "-lm"))
+                        (zero? (system* "gcc"
+                                        "-DOPENMP"
+                                        "-fopenmp"
+                                        "-O3"
+                                        "-finline-functions"
+                                        "-funroll-loops"
+                                        "-Wall"
+                                        "-o"
+                                        "FastTreeMP"
+                                        source
+                                        "-lm")))))
+        (replace 'install
+                 (lambda* (#:key outputs #:allow-other-keys)
+                   (let ((bin (string-append (assoc-ref outputs "out")
+                                             "/bin")))
+                     (mkdir-p bin)
+                     (copy-file "FastTree"
+                                (string-append bin "/FastTree"))
+                     (copy-file "FastTreeMP"
+                                (string-append bin "/FastTreeMP"))
+                     #t))))))
+   (home-page "http://www.microbesonline.org/fasttree")
+   (synopsis "Infers approximately-maximum-likelihood phylogenetic trees")
+   (description
+    "FastTree can handle alignments with up to a million of sequences in a
+reasonable amount of time and memory.  For large alignments, FastTree is
+100-1,000 times faster than PhyML 3.0 or RAxML 7.")
+   (license license:gpl2+)))
+
 (define-public fastx-toolkit
   (package
     (name "fastx-toolkit")
@@ -551,15 +1324,20 @@ results.  The FASTX-Toolkit tools perform some of these preprocessing tasks.")
                 "13jaykc3y1x8y5nn9j8ljnb79s5y51kyxz46hdmvvjj6qhyympmf"))))
     (build-system cmake-build-system)
     (arguments
-     `(;; There is no test target, although there is a directory containing
-       ;; test data and scripts (launched by flexbar_validate.sh).
-       #:tests? #f
-       #:configure-flags (list
+     `(#:configure-flags (list
                           (string-append "-DFLEXBAR_BINARY_DIR="
                                          (assoc-ref %outputs "out")
                                          "/bin/"))
        #:phases
-       (alist-delete 'install %standard-phases)))
+       (alist-replace
+        'check
+        (lambda* (#:key outputs #:allow-other-keys)
+          (setenv "PATH" (string-append
+                          (assoc-ref outputs "out") "/bin:"
+                          (getenv "PATH")))
+          (chdir "../flexbar_v2.5_src/test")
+          (zero? (system* "bash" "flexbar_validate.sh")))
+        (alist-delete 'install %standard-phases))))
     (inputs
      `(("tbb" ,tbb)
        ("zlib" ,zlib)))
@@ -667,14 +1445,12 @@ estimates transcript expression.")
         (alist-replace
          'install
          (lambda* (#:key outputs #:allow-other-keys)
-           (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
-             (mkdir-p bin)
-             (for-each
-              (lambda (file)
-                (copy-file file (string-append bin file)))
-              (find-files
-               "."
-               "hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))))
+           (let ((bin (string-append (assoc-ref outputs "out") "/bi/")))
+             (for-each (lambda (file)
+                         (install-file file bin))
+                       (find-files
+                        "."
+                        "hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))))
          (alist-delete 'configure %standard-phases)))))
     (native-inputs
      `(("unzip" ,unzip)))
@@ -682,6 +1458,9 @@ estimates transcript expression.")
      `(("perl" ,perl)
        ("python" ,python)
        ("zlib" ,zlib)))
+    ;; Non-portable SSE instructions are used so building fails on platforms
+    ;; other than x86_64.
+    (supported-systems '("x86_64-linux"))
     (home-page "http://ccb.jhu.edu/software/hisat/index.shtml")
     (synopsis "Hierarchical indexing for spliced alignment of transcripts")
     (description
@@ -693,6 +1472,35 @@ several alignment strategies enable effective alignment of RNA-seq reads, in
 particular, reads spanning multiple exons.")
     (license license:gpl3+)))
 
+(define-public hmmer
+  (package
+    (name "hmmer")
+    (version "3.1b2")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append
+                    "http://selab.janelia.org/software/hmmer"
+                    (version-prefix version 1) "/"
+                    version "/hmmer-" version ".tar.gz"))
+              (sha256
+               (base32
+                "0djmgc0pfli0jilfx8hql1axhwhqxqb8rxg2r5rg07aw73sfs5nx"))))
+    (build-system gnu-build-system)
+    (native-inputs `(("perl", perl)))
+    (home-page "http://hmmer.janelia.org")
+    (synopsis "Biosequence analysis using profile hidden Markov models")
+    (description
+     "HMMER is used for searching sequence databases for homologs of protein
+sequences, and for making protein sequence alignments.  It implements methods
+using probabilistic models called profile hidden Markov models (profile
+HMMs).")
+    (license (list license:gpl3+
+                   ;; The bundled library 'easel' is distributed
+                   ;; under The Janelia Farm Software License.
+                   (license:non-copyleft
+                    "file://easel/LICENSE"
+                    "See easel/LICENSE in the distribution.")))))
+
 (define-public htseq
   (package
     (name "htseq")
@@ -707,9 +1515,11 @@ particular, reads spanning multiple exons.")
                 "1i85ppf2j2lj12m0x690qq5nn17xxk23pbbx2c83r8ayb5wngzwv"))))
     (build-system python-build-system)
     (arguments `(#:python ,python-2)) ; only Python 2 is supported
-    (inputs
-     `(("python-numpy" ,python2-numpy)
-       ("python-setuptools" ,python2-setuptools)))
+    ;; Numpy needs to be propagated when htseq is used as a Python library.
+    (propagated-inputs
+     `(("python-numpy" ,python2-numpy)))
+    (native-inputs
+     `(("python-setuptools" ,python2-setuptools)))
     (home-page "http://www-huber.embl.de/users/anders/HTSeq/")
     (synopsis "Analysing high-throughput sequencing data with Python")
     (description
@@ -762,6 +1572,91 @@ sequencing (HTS) data.  There are also an number of useful utilities for
 manipulating HTS data.")
     (license license:expat)))
 
+(define-public htslib
+  (package
+    (name "htslib")
+    (version "1.2.1")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append
+                    "https://github.com/samtools/htslib/releases/download/"
+                    version "/htslib-" version ".tar.bz2"))
+              (sha256
+               (base32
+                "1c32ssscbnjwfw3dra140fq7riarp2x990qxybh34nr1p5r17nxx"))))
+    (build-system gnu-build-system)
+    (arguments
+     `(#:phases
+       (modify-phases %standard-phases
+         (add-after
+          'unpack 'patch-tests
+          (lambda _
+            (substitute* "test/test.pl"
+              (("/bin/bash") (which "bash")))
+            #t)))))
+    (inputs
+     `(("zlib" ,zlib)))
+    (native-inputs
+     `(("perl" ,perl)))
+    (home-page "http://www.htslib.org")
+    (synopsis "C library for reading/writing high-throughput sequencing data")
+    (description
+     "HTSlib is a C library for reading/writing high-throughput sequencing
+data.  It also provides the bgzip, htsfile, and tabix utilities.")
+    ;; Files under cram/ are released under the modified BSD license;
+    ;; the rest is released under the Expat license
+    (license (list license:expat license:bsd-3))))
+
+(define-public idr
+  (package
+    (name "idr")
+    (version "2.0.0")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append
+                    "https://github.com/nboley/idr/archive/"
+                    version ".tar.gz"))
+              (file-name (string-append name "-" version ".tar.gz"))
+              (sha256
+               (base32
+                "1k3x44biak00aiv3hpm1yd6nn4hhp7n0qnbs3zh2q9sw7qr1qj5r"))))
+    (build-system python-build-system)
+    (arguments
+     `(#:phases
+       (modify-phases %standard-phases
+         (add-after
+          'install 'wrap-program
+          (lambda* (#:key inputs outputs #:allow-other-keys)
+            (let* ((out (assoc-ref outputs "out"))
+                   (python-version (string-take (string-take-right
+                                                 (assoc-ref inputs "python") 5) 3))
+                   (path (string-join
+                          (map (lambda (name)
+                                 (string-append (assoc-ref inputs name)
+                                                "/lib/python" python-version
+                                                "/site-packages"))
+                               '("python-scipy"
+                                 "python-numpy"
+                                 "python-matplotlib"))
+                          ":")))
+              (wrap-program (string-append out "/bin/idr")
+                `("PYTHONPATH" ":" prefix (,path))))
+            #t)))))
+    (inputs
+     `(("python-scipy" ,python-scipy)
+       ("python-numpy" ,python-numpy)
+       ("python-matplotlib" ,python-matplotlib)))
+    (native-inputs
+     `(("python-cython" ,python-cython)
+       ("python-setuptools" ,python-setuptools)))
+    (home-page "https://github.com/nboley/idr")
+    (synopsis "Tool to measure the irreproducible discovery rate (IDR)")
+    (description
+     "The IDR (Irreproducible Discovery Rate) framework is a unified approach
+to measure the reproducibility of findings identified from replicate
+experiments and provide highly stable thresholds based on reproducibility.")
+    (license license:gpl3+)))
+
 (define-public macs
   (package
     (name "macs")
@@ -793,6 +1688,143 @@ resolution of binding sites through combining the information of both
 sequencing tag position and orientation.")
     (license license:bsd-3)))
 
+(define-public mafft
+  (package
+    (name "mafft")
+    (version "7.221")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append
+                    "http://mafft.cbrc.jp/alignment/software/mafft-" version
+                    "-without-extensions-src.tgz"))
+              (file-name (string-append name "-" version ".tgz"))
+              (sha256
+               (base32
+                "0xi7klbsgi049vsrk6jiwh9wfj3b770gz3c8c7zwij448v0dr73d"))))
+    (build-system gnu-build-system)
+    (arguments
+     `(#:tests? #f ; no automated tests, though there are tests in the read me
+       #:make-flags (let ((out (assoc-ref %outputs "out")))
+                      (list (string-append "PREFIX=" out)
+                            (string-append "BINDIR="
+                                           (string-append out "/bin"))))
+       #:phases
+       (modify-phases %standard-phases
+         (add-after 'unpack 'enter-dir
+          (lambda _ (chdir "core") #t))
+         (add-after 'enter-dir 'patch-makefile
+          (lambda _
+            ;; on advice from the MAFFT authors, there is no need to
+            ;; distribute mafft-profile, mafft-distance, or
+            ;; mafft-homologs.rb as they are too "specialised".
+            (substitute* "Makefile"
+              ;; remove mafft-homologs.rb from SCRIPTS
+              (("^SCRIPTS = mafft mafft-homologs.rb")
+               "SCRIPTS = mafft")
+              ;; remove mafft-distance from PROGS
+              (("^PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance")
+               "PROGS = dvtditr dndfast7 dndblast sextet5")
+              ;; remove mafft-profile from PROGS
+              (("splittbfast disttbfast tbfast mafft-profile 2cl mccaskillwrap")
+               "splittbfast disttbfast tbfast f2cl mccaskillwrap")
+              (("^rm -f mafft-profile mafft-profile.exe") "#")
+              (("^rm -f mafft-distance mafft-distance.exe") ")#")
+              ;; do not install MAN pages in libexec folder
+              (("^\t\\$\\(INSTALL\\) -m 644 \\$\\(MANPAGES\\) \
+\\$\\(DESTDIR\\)\\$\\(LIBDIR\\)") "#"))
+            #t))
+         (delete 'configure))))
+    (inputs
+     `(("perl" ,perl)))
+    (home-page "http://mafft.cbrc.jp/alignment/software/")
+    (synopsis "Multiple sequence alignment program")
+    (description
+     "MAFFT offers a range of multiple alignment methods for nucleotide and
+protein sequences.  For instance, it offers L-INS-i (accurate; for alignment
+of <~200 sequences) and FFT-NS-2 (fast; for alignment of <~30,000
+sequences).")
+    (license (license:non-copyleft
+              "http://mafft.cbrc.jp/alignment/software/license.txt"
+              "BSD-3 with different formatting"))))
+
+(define-public metabat
+  (package
+    (name "metabat")
+    (version "0.26.1")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append
+                    "https://bitbucket.org/berkeleylab/metabat/get/"
+                    version ".tar.bz2"))
+              (file-name (string-append name "-" version ".tar.bz2"))
+              (sha256
+               (base32
+                "0vgrhbaxg4dkxyax2kbigak7w0arhqvw0szwp6gd9wmyilc44kfa"))))
+    (build-system gnu-build-system)
+    (arguments
+     `(#:phases
+       (modify-phases %standard-phases
+         (add-after 'unpack 'fix-includes
+                    (lambda _
+                      (substitute* "SConstruct"
+                        (("/include/bam/bam.h")
+                         "/include/samtools/bam.h"))
+                      (substitute* "src/BamUtils.h"
+                        (("^#include \"bam/bam\\.h\"")
+                         "#include \"samtools/bam.h\"")
+                        (("^#include \"bam/sam\\.h\"")
+                         "#include \"samtools/sam.h\""))
+                      (substitute* "src/KseqReader.h"
+                        (("^#include \"bam/kseq\\.h\"")
+                         "#include \"samtools/kseq.h\""))
+                      #t))
+         (add-after 'unpack 'fix-scons
+                    (lambda _
+                      (substitute* "SConstruct" ; Do not distribute README
+                        (("^env\\.Install\\(idir_prefix, 'README\\.md'\\)")
+                         ""))
+                      #t))
+         (delete 'configure)
+         (replace 'build
+                  (lambda* (#:key inputs outputs #:allow-other-keys)
+                    (mkdir (assoc-ref outputs "out"))
+                    (zero? (system* "scons"
+                                    (string-append
+                                     "PREFIX="
+                                     (assoc-ref outputs "out"))
+                                    (string-append
+                                     "HTSLIB_DIR="
+                                     (assoc-ref inputs "htslib"))
+                                    (string-append
+                                     "SAMTOOLS_DIR="
+                                     (assoc-ref inputs "samtools"))
+                                    (string-append
+                                     "BOOST_ROOT="
+                                     (assoc-ref inputs "boost"))
+                                    "install"))))
+         ;; check and install carried out during build phase
+         (delete 'check)
+         (delete 'install))))
+    (inputs
+     `(("zlib" ,zlib)
+       ("perl" ,perl)
+       ("samtools" ,samtools)
+       ("htslib" ,htslib)
+       ("boost" ,boost)))
+    (native-inputs
+     `(("scons" ,scons)))
+    (home-page "https://bitbucket.org/berkeleylab/metabat")
+    (synopsis
+     "Reconstruction of single genomes from complex microbial communities")
+    (description
+     "Grouping large genomic fragments assembled from shotgun metagenomic
+sequences to deconvolute complex microbial communities, or metagenome binning,
+enables the study of individual organisms and their interactions.  MetaBAT is
+an automated metagenome binning software, which integrates empirical
+probabilistic distances of genome abundance and tetranucleotide frequency.")
+   (license (license:non-copyleft "file://license.txt"
+                                  "See license.txt in the distribution."))))
+
 (define-public miso
   (package
     (name "miso")
@@ -800,15 +1832,18 @@ sequencing tag position and orientation.")
     (source (origin
               (method url-fetch)
               (uri (string-append
-                    "http://pypi.python.org/packages/source/m/misopy/misopy-"
+                    "https://pypi.python.org/packages/source/m/misopy/misopy-"
                     version ".tar.gz"))
               (sha256
                (base32
                 "0x446867az8ir0z8c1vjqffkp0ma37wm4sylixnkhgawllzx8v5w"))
               (modules '((guix build utils)))
-              ;; use "gcc" instead of "cc" for compilation
               (snippet
                '(substitute* "setup.py"
+                  ;; Use setuptools, or else the executables are not
+                  ;; installed.
+                  (("distutils.core") "setuptools")
+                  ;; use "gcc" instead of "cc" for compilation
                   (("^defines")
                    "cc.set_executables(
 compiler='gcc',
@@ -826,7 +1861,9 @@ linker_so='gcc -shared'); defines")))))
        ("python-scipy" ,python2-scipy)
        ("python-matplotlib" ,python2-matplotlib)))
     (native-inputs
-     `(("python-setuptools" ,python2-setuptools)))
+     `(("python-mock" ,python2-mock) ;for tests
+       ("python-pytz" ,python2-pytz) ;for tests
+       ("python-setuptools" ,python2-setuptools)))
     (home-page "http://genes.mit.edu/burgelab/miso/index.html")
     (synopsis "Mixture of Isoforms model for RNA-Seq isoform quantitation")
     (description
@@ -838,14 +1875,35 @@ RNA-Seq, the MISO model uses Bayesian inference to compute the probability
 that a read originated from a particular isoform.")
     (license license:gpl2)))
 
-(define-public python2-pbcore
+(define-public orfm
   (package
-    (name "python2-pbcore")
-    (version "0.9.3")
+    (name "orfm")
+    (version "0.4.1")
     (source (origin
               (method url-fetch)
               (uri (string-append
-                    "https://github.com/PacificBiosciences/pbcore/archive/"
+                    "https://github.com/wwood/OrfM/releases/download/v"
+                    version "/orfm-" version ".tar.gz"))
+              (sha256
+               (base32
+                "05fmw145snk646ly076zby0fjav0k7ysbclck5d4s9pmgcfpijc2"))))
+    (build-system gnu-build-system)
+    (inputs `(("zlib" ,zlib)))
+    (synopsis "Simple and not slow open reading frame (ORF) caller")
+    (description
+     "An ORF caller finds stretches of DNA that when translated are not
+interrupted by stop codons.  OrfM finds and prints these ORFs.")
+    (home-page "https://github.com/wwood/OrfM")
+    (license license:lgpl3+)))
+
+(define-public python2-pbcore
+  (package
+    (name "python2-pbcore")
+    (version "0.9.3")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append
+                    "https://github.com/PacificBiosciences/pbcore/archive/"
                     version ".tar.gz"))
               (file-name (string-append name "-" version ".tar.gz"))
               (sha256
@@ -867,20 +1925,73 @@ that a read originated from a particular isoform.")
 files and writing bioinformatics applications.")
     (license license:bsd-3)))
 
+(define-public python2-warpedlmm
+  (package
+    (name "python2-warpedlmm")
+    (version "0.21")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (string-append
+             "https://pypi.python.org/packages/source/W/WarpedLMM/WarpedLMM-"
+             version ".zip"))
+       (sha256
+        (base32
+         "1agfz6zqa8nc6cw47yh0s3y14gkpa9wqazwcj7mwwj3ffnw39p3j"))))
+    (build-system python-build-system)
+    (arguments
+     `(#:python ,python-2  ; requires Python 2.7
+       #:phases
+       (modify-phases %standard-phases
+         (add-after
+          'install 'remove-bin-directory
+          (lambda* (#:key outputs #:allow-other-keys)
+            ;; The "bin" directory only contains wrappers for running
+            ;; the module tests.  They are not needed after the
+            ;; "check" phase.
+            (delete-file-recursively
+             (string-append (assoc-ref outputs "out") "/bin"))
+            #t)))))
+    (propagated-inputs
+     `(("python-scipy" ,python2-scipy)
+       ("python-numpy" ,python2-numpy)
+       ("python-matplotlib" ,python2-matplotlib)
+       ("python-fastlmm" ,python2-fastlmm)
+       ("python-pandas" ,python2-pandas)
+       ("python-pysnptools" ,python2-pysnptools)))
+    (native-inputs
+     `(("python-setuptools" ,python2-setuptools)
+       ("python-mock" ,python2-mock)
+       ("python-nose" ,python2-nose)
+       ("unzip" ,unzip)))
+    (home-page "https://github.com/PMBio/warpedLMM")
+    (synopsis "Implementation of warped linear mixed models")
+    (description
+     "WarpedLMM is a Python implementation of the warped linear mixed model,
+which automatically learns an optimal warping function (or transformation) for
+the phenotype as it models the data.")
+    (license license:asl2.0)))
+
 (define-public pbtranscript-tofu
-  (let ((commit "c7bbd5472"))
+  (let ((commit "8f5467fe6"))
     (package
       (name "pbtranscript-tofu")
-      (version (string-append "0.4.1." commit))
+      (version (string-append "2.2.3." commit))
       (source (origin
                 (method git-fetch)
                 (uri (git-reference
                       (url "https://github.com/PacificBiosciences/cDNA_primer.git")
                       (commit commit)))
-                (file-name (string-append name "-" version ".tar.gz"))
+                (file-name (string-append name "-" version "-checkout"))
                 (sha256
                  (base32
-                  "148xkzi689c49g6fdhckp6mnmj2qhjdf1j4wifm6ja7ij95d7fxx"))))
+                  "1lgnpi35ihay42qx0b6yl3kkgra723i413j33kvs0kvs61h82w0f"))
+                (modules '((guix build utils)))
+                (snippet
+                 '(begin
+                    ;; remove bundled Cython sources
+                    (delete-file "pbtranscript-tofu/pbtranscript/Cython-0.20.1.tar.gz")
+                    #t))))
       (build-system python-build-system)
       (arguments
        `(#:python ,python-2
@@ -890,34 +2001,29 @@ files and writing bioinformatics applications.")
          #:configure-flags '("--single-version-externally-managed"
                              "--record=pbtranscript-tofu.txt")
          #:phases
-         (alist-cons-after
-          'unpack 'enter-directory-and-clean-up
-          (lambda _
-            (chdir "pbtranscript-tofu/pbtranscript/")
-            ;; Delete clutter
-            (delete-file-recursively "dist/")
-            (delete-file-recursively "build/")
-            (delete-file-recursively "setuptools_cython-0.2.1-py2.6.egg/")
-            (delete-file-recursively "pbtools.pbtranscript.egg-info")
-            (delete-file "Cython-0.20.1.tar.gz")
-            (delete-file "setuptools_cython-0.2.1-py2.7.egg")
-            (delete-file "setuptools_cython-0.2.1.tar.gz")
-            (delete-file "setup.cfg")
-            (for-each delete-file
-                      (find-files "." "\\.so$"))
-            ;; files should be writable for install phase
-            (for-each (lambda (f) (chmod f #o755))
-                      (find-files "." "\\.py$")))
-          %standard-phases)))
+         (modify-phases %standard-phases
+           (add-after 'unpack 'enter-directory
+            (lambda _
+              (chdir "pbtranscript-tofu/pbtranscript/")
+              #t))
+           ;; With setuptools version 18.0 and later this setup.py hack causes
+           ;; a build error, so we disable it.
+           (add-after 'enter-directory 'patch-setuppy
+            (lambda _
+              (substitute* "setup.py"
+                (("if 'setuptools.extension' in sys.modules:")
+                 "if False:"))
+              #t)))))
       (inputs
-       `(("python-cython" ,python2-cython)
-         ("python-numpy" ,python2-numpy)
+       `(("python-numpy" ,python2-numpy)
          ("python-bx-python" ,python2-bx-python)
          ("python-networkx" ,python2-networkx)
          ("python-scipy" ,python2-scipy)
-         ("python-pbcore" ,python2-pbcore)))
+         ("python-pbcore" ,python2-pbcore)
+         ("python-h5py" ,python2-h5py)))
       (native-inputs
-       `(("python-nose" ,python2-nose)
+       `(("python-cython" ,python2-cython)
+         ("python-nose" ,python2-nose)
          ("python-setuptools" ,python2-setuptools)))
       (home-page "https://github.com/PacificBiosciences/cDNA_primer")
       (synopsis "Analyze transcriptome data generated with the Iso-Seq protocol")
@@ -926,6 +2032,115 @@ files and writing bioinformatics applications.")
 generated using the PacBio Iso-Seq protocol.")
       (license license:bsd-3))))
 
+(define-public prodigal
+  (package
+    (name "prodigal")
+    (version "2.6.2")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append
+                    "https://github.com/hyattpd/Prodigal/archive/v"
+                    version ".tar.gz"))
+              (file-name (string-append name "-" version ".tar.gz"))
+              (sha256
+               (base32
+                "0m8sb0fg6lmxrlpzna0am6svbnlmd3dckrhgzxxgb3gxr5fyj284"))))
+    (build-system gnu-build-system)
+    (arguments
+     `(#:tests? #f ;no check target
+       #:make-flags (list (string-append "INSTALLDIR="
+                                         (assoc-ref %outputs "out")
+                                         "/bin"))
+       #:phases
+       (modify-phases %standard-phases
+         (delete 'configure))))
+    (home-page "http://prodigal.ornl.gov")
+    (synopsis "Protein-coding gene prediction for Archaea and Bacteria")
+    (description
+     "Prodigal runs smoothly on finished genomes, draft genomes, and
+metagenomes, providing gene predictions in GFF3, Genbank, or Sequin table
+format.  It runs quickly, in an unsupervised fashion, handles gaps, handles
+partial genes, and identifies translation initiation sites.")
+    (license license:gpl3+)))
+
+(define-public rsem
+  (package
+    (name "rsem")
+    (version "1.2.20")
+    (source
+     (origin
+       (method url-fetch)
+       (uri
+        (string-append "http://deweylab.biostat.wisc.edu/rsem/src/rsem-"
+                       version ".tar.gz"))
+       (sha256
+        (base32 "0nzdc0j0hjllhsd5f2xli95dafm3nawskigs140xzvjk67xh0r9q"))
+       (patches (list (search-patch "rsem-makefile.patch")))
+       (modules '((guix build utils)))
+       (snippet
+        '(begin
+           ;; remove bundled copy of boost
+           (delete-file-recursively "boost")
+           #t))))
+    (build-system gnu-build-system)
+    (arguments
+     `(#:tests? #f ;no "check" target
+       #:phases
+       (modify-phases %standard-phases
+         ;; No "configure" script.
+         ;; Do not build bundled samtools library.
+         (replace 'configure
+                  (lambda _
+                    (substitute* "Makefile"
+                      (("^all : sam/libbam.a") "all : "))
+                    #t))
+         (replace 'install
+                  (lambda* (#:key outputs #:allow-other-keys)
+                    (let* ((out (string-append (assoc-ref outputs "out")))
+                           (bin (string-append out "/bin/"))
+                           (perl (string-append out "/lib/perl5/site_perl")))
+                      (mkdir-p bin)
+                      (mkdir-p perl)
+                      (for-each (lambda (file)
+                                  (copy-file file
+                                             (string-append bin (basename file))))
+                                (find-files "." "rsem-.*"))
+                      (copy-file "rsem_perl_utils.pm"
+                                 (string-append perl "/rsem_perl_utils.pm")))
+                    #t))
+         (add-after
+          'install 'wrap-program
+          (lambda* (#:key outputs #:allow-other-keys)
+            (let ((out (assoc-ref outputs "out")))
+              (for-each (lambda (prog)
+                          (wrap-program (string-append out "/bin/" prog)
+                            `("PERL5LIB" ":" prefix
+                              (,(string-append out "/lib/perl5/site_perl")))))
+                        '("rsem-plot-transcript-wiggles"
+                          "rsem-calculate-expression"
+                          "rsem-generate-ngvector"
+                          "rsem-run-ebseq"
+                          "rsem-prepare-reference")))
+            #t)))))
+    (inputs
+     `(("boost" ,boost)
+       ("ncurses" ,ncurses)
+       ("r" ,r)
+       ("perl" ,perl)
+       ("samtools" ,samtools-0.1)
+       ("zlib" ,zlib)))
+    (home-page "http://deweylab.biostat.wisc.edu/rsem/")
+    (synopsis "Estimate gene expression levels from RNA-Seq data")
+    (description
+     "RSEM is a software package for estimating gene and isoform expression
+levels from RNA-Seq data.  The RSEM package provides a user-friendly
+interface, supports threads for parallel computation of the EM algorithm,
+single-end and paired-end read data, quality scores, variable-length reads and
+RSPD estimation.  In addition, it provides posterior mean and 95% credibility
+interval estimates for expression levels.  For visualization, it can generate
+BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.")
+    (license license:gpl3+)))
+
 (define-public rseqc
   (package
     (name "rseqc")
@@ -972,7 +2187,7 @@ distribution, coverage uniformity, strand specificity, etc.")
 (define-public samtools
   (package
     (name "samtools")
-    (version "1.1")
+    (version "1.2")
     (source
      (origin
        (method url-fetch)
@@ -981,7 +2196,7 @@ distribution, coverage uniformity, strand specificity, etc.")
                        version "/samtools-" version ".tar.bz2"))
        (sha256
         (base32
-         "1y5p2hs4gif891b4ik20275a8xf3qrr1zh9wpysp4g8m0g1jckf2"))))
+         "1akdqb685pk9xk1nb6sa9aq8xssjjhvvc06kp4cpdqvz2157l3j2"))))
     (build-system gnu-build-system)
     (arguments
      `(;; There are 87 test failures when building on non-64-bit architectures
@@ -991,32 +2206,43 @@ distribution, coverage uniformity, strand specificity, etc.")
        ;; systems.
        #:tests? ,(string=? (or (%current-system) (%current-target-system))
                            "x86_64-linux")
-       #:make-flags (list (string-append "prefix=" (assoc-ref %outputs "out")))
+       #:modules ((ice-9 ftw)
+                  (ice-9 regex)
+                  (guix build gnu-build-system)
+                  (guix build utils))
+       #:make-flags (list "LIBCURSES=-lncurses"
+                          (string-append "prefix=" (assoc-ref %outputs "out")))
        #:phases
        (alist-cons-after
         'unpack
-        'patch-makefile-curses
-        (lambda _
-          (substitute* "Makefile"
-            (("-lcurses") "-lncurses")))
+        'patch-tests
+        (lambda* (#:key inputs #:allow-other-keys)
+          (let ((bash (assoc-ref inputs "bash")))
+            (substitute* "test/test.pl"
+              ;; The test script calls out to /bin/bash
+              (("/bin/bash")
+               (string-append bash "/bin/bash"))
+              ;; There are two failing tests upstream relating to the "stats"
+              ;; subcommand in test_usage_subcommand ("did not have Usage"
+              ;; and "usage did not mention samtools stats"), so we disable
+              ;; them.
+              (("(test_usage_subcommand\\(.*\\);)" cmd)
+               (string-append "unless ($subcommand eq 'stats') {" cmd "};")))))
         (alist-cons-after
-         'unpack
-         'patch-tests
-         (lambda* (#:key inputs #:allow-other-keys)
-           (let ((bash (assoc-ref inputs "bash")))
-             (substitute* "test/test.pl"
-               ;; The test script calls out to /bin/bash
-               (("/bin/bash")
-                (string-append bash "/bin/bash"))
-               ;; There are two failing tests upstream relating to the "stats"
-               ;; subcommand in test_usage_subcommand ("did not have Usage"
-               ;; and "usage did not mention samtools stats"), so we disable
-               ;; them.
-               (("(test_usage_subcommand\\(.*\\);)" cmd)
-                (string-append "unless ($subcommand eq 'stats') {" cmd "};")))))
-         (alist-delete
-          'configure
-          %standard-phases)))))
+         'install 'install-library
+         (lambda* (#:key outputs #:allow-other-keys)
+           (let ((lib (string-append (assoc-ref outputs "out") "/lib")))
+             (install-file "libbam.a" lib)))
+         (alist-cons-after
+          'install 'install-headers
+          (lambda* (#:key outputs #:allow-other-keys)
+            (let ((include (string-append (assoc-ref outputs "out")
+                                          "/include/samtools/")))
+              (for-each (lambda (file)
+                          (install-file file include))
+                        (scandir "." (lambda (name) (string-match "\\.h$" name))))
+              #t))
+          (alist-delete 'configure %standard-phases))))))
     (native-inputs `(("pkg-config" ,pkg-config)))
     (inputs `(("ncurses" ,ncurses)
               ("perl" ,perl)
@@ -1031,10 +2257,85 @@ variant calling (in conjunction with bcftools), and a simple alignment
 viewer.")
     (license license:expat)))
 
+(define-public samtools-0.1
+  ;; This is the most recent version of the 0.1 line of samtools.  The input
+  ;; and output formats differ greatly from that used and produced by samtools
+  ;; 1.x and is still used in many bioinformatics pipelines.
+  (package (inherit samtools)
+    (version "0.1.19")
+    (source
+     (origin
+       (method url-fetch)
+       (uri
+        (string-append "mirror://sourceforge/samtools/"
+                       version "/samtools-" version ".tar.bz2"))
+       (sha256
+        (base32 "1m33xsfwz0s8qi45lylagfllqg7fphf4dr0780rsvw75av9wk06h"))))
+    (arguments
+     (substitute-keyword-arguments (package-arguments samtools)
+       ((#:tests? tests) #f) ;no "check" target
+       ((#:phases phases)
+        `(modify-phases ,phases
+           (replace 'install
+                    (lambda* (#:key outputs #:allow-other-keys)
+                      (let ((bin (string-append
+                                  (assoc-ref outputs "out") "/bin")))
+                        (mkdir-p bin)
+                        (copy-file "samtools"
+                                   (string-append bin "/samtools")))))
+           (delete 'patch-tests)))))))
+
+(define-public mosaik
+  (let ((commit "5c25216d"))
+    (package
+      (name "mosaik")
+      (version "2.2.30")
+      (source (origin
+                ;; There are no release tarballs nor tags.
+                (method git-fetch)
+                (uri (git-reference
+                      (url "https://github.com/wanpinglee/MOSAIK.git")
+                      (commit commit)))
+                (file-name (string-append name "-" version))
+                (sha256
+                 (base32
+                  "17gj3s07cm77r41z92awh0bim7w7q7fbn0sf5nkqmcm1vw052qgw"))))
+      (build-system gnu-build-system)
+      (arguments
+       `(#:tests? #f ; no tests
+         #:make-flags (list "CC=gcc")
+         #:phases
+         (modify-phases %standard-phases
+           (replace 'configure
+                    (lambda _ (chdir "src") #t))
+           (replace 'install
+                    (lambda* (#:key outputs #:allow-other-keys)
+                      (let ((bin (string-append (assoc-ref outputs "out")
+                                                "/bin")))
+                        (mkdir-p bin)
+                        (copy-recursively "../bin" bin)
+                        #t))))))
+      (inputs
+       `(("perl" ,perl)
+         ("zlib" ,zlib)))
+      (supported-systems '("x86_64-linux"))
+      (home-page "https://code.google.com/p/mosaik-aligner/")
+      (synopsis "Map nucleotide sequence reads to reference genomes")
+      (description
+       "MOSAIK is a program for mapping second and third-generation sequencing
+reads to a reference genome.  MOSAIK can align reads generated by all the
+major sequencing technologies, including Illumina, Applied Biosystems SOLiD,
+Roche 454, Ion Torrent and Pacific BioSciences SMRT.")
+      ;; MOSAIK is released under the GPLv2+ with the exception of third-party
+      ;; code released into the public domain:
+      ;; 1. fastlz by Ariya Hidayat - http://www.fastlz.org/
+      ;; 2. MD5 implementation - RSA Data Security, RFC 1321
+      (license (list license:gpl2+ license:public-domain)))))
+
 (define-public ngs-sdk
   (package
     (name "ngs-sdk")
-    (version "1.1.0")
+    (version "1.2.2")
     (source
      (origin
        (method url-fetch)
@@ -1044,7 +2345,7 @@ viewer.")
        (file-name (string-append name "-" version ".tar.gz"))
        (sha256
         (base32
-         "09fakv9w87lfg9g70kwzmnryqdjj1sz2c7kw01i6drjf787gkjhw"))))
+         "0rvq61zfw2h9jcz6a33b9xrl20r7s5a9rldvv6rs2qy42khpmf5j"))))
     (build-system gnu-build-system)
     (arguments
      `(#:parallel-build? #f ; not supported
@@ -1054,20 +2355,6 @@ viewer.")
         'configure
         (lambda* (#:key outputs #:allow-other-keys)
           (let ((out (assoc-ref outputs "out")))
-            ;; Only replace the version suffix, not the version number in the
-            ;; directory name; fixed in commit 46d4509fa8 (no release yet).
-            (substitute* "setup/konfigure.perl"
-              (((string-append "\\$\\(subst "
-                               "(\\$\\(VERSION[^\\)]*\\)),"
-                               "(\\$\\([^\\)]+\\)),"
-                               "(\\$\\([^\\)]+\\)|\\$\\@)"
-                               "\\)")
-                _ pattern replacement target)
-               (string-append "$(patsubst "
-                              "%" pattern ","
-                              "%" replacement ","
-                              target ")")))
-
             ;; The 'configure' script doesn't recognize things like
             ;; '--enable-fast-install'.
             (zero? (system* "./configure"
@@ -1078,6 +2365,10 @@ viewer.")
          (lambda _ (chdir "ngs-sdk") #t)
          %standard-phases))))
     (native-inputs `(("perl" ,perl)))
+    ;; According to the test
+    ;;   unless ($MARCH =~ /x86_64/i || $MARCH =~ /i?86/i)
+    ;; in ngs-sdk/setup/konfigure.perl
+    (supported-systems '("i686-linux" "x86_64-linux"))
     (home-page "https://github.com/ncbi/ngs")
     (synopsis "API for accessing Next Generation Sequencing data")
     (description
@@ -1098,31 +2389,282 @@ simultaneously.")
                           (srfi srfi-26))
                          ,@(package-arguments ngs-sdk))
            ((#:phases phases)
-            `(alist-cons-after
-              'enter-dir 'fix-java-symlink-installation
-              (lambda _
-                ;; Only replace the version suffix, not the version number in
-                ;; the directory name.  Reported here:
-                ;; https://github.com/ncbi/ngs/pull/4
-                (substitute* "Makefile.java"
-                  (((string-append "\\$\\(subst "
-                                   "(\\$\\(VERSION[^\\)]*\\)),"
-                                   "(\\$\\([^\\)]+\\)),"
-                                   "(\\$\\([^\\)]+\\)|\\$\\@)"
-                                   "\\)")
-                    _ pattern replacement target)
-                   (string-append "$(patsubst "
-                                  "%" pattern ","
-                                  "%" replacement ","
-                                  target ")"))))
-              (alist-replace
-               'enter-dir (lambda _ (chdir "ngs-java") #t)
-               ,phases))))))
+            `(modify-phases ,phases
+               (replace 'enter-dir (lambda _ (chdir "ngs-java") #t)))))))
     (inputs
      `(("jdk" ,icedtea6 "jdk")
        ("ngs-sdk" ,ngs-sdk)))
     (synopsis "Java bindings for NGS SDK")))
 
+(define-public ncbi-vdb
+  (package
+    (name "ncbi-vdb")
+    (version "2.5.4")
+    (source
+     (origin
+       (method url-fetch)
+       (uri
+        (string-append "https://github.com/ncbi/ncbi-vdb/archive/"
+                       version ".tar.gz"))
+       (file-name (string-append name "-" version ".tar.gz"))
+       (sha256
+        (base32
+         "1rcnyc4xkdfcjww2i0s0qrbapys0cxbjcx2sy3qkpslf9f400fgj"))))
+    (build-system gnu-build-system)
+    (arguments
+     `(#:parallel-build? #f ; not supported
+       #:tests? #f ; no "check" target
+       #:phases
+       (alist-replace
+        'configure
+        (lambda* (#:key inputs outputs #:allow-other-keys)
+          (let ((out (assoc-ref outputs "out")))
+            ;; Override include path for libmagic
+            (substitute* "setup/package.prl"
+              (("name => 'magic', Include => '/usr/include'")
+               (string-append "name=> 'magic', Include => '"
+                              (assoc-ref inputs "libmagic")
+                              "/include" "'")))
+
+            ;; Install kdf5 library (needed by sra-tools)
+            (substitute* "build/Makefile.install"
+              (("LIBRARIES_TO_INSTALL =")
+               "LIBRARIES_TO_INSTALL = kdf5.$(VERSION_LIBX) kdf5.$(VERSION_SHLX)"))
+
+            ;; The 'configure' script doesn't recognize things like
+            ;; '--enable-fast-install'.
+            (zero? (system*
+                    "./configure"
+                    (string-append "--build-prefix=" (getcwd) "/build")
+                    (string-append "--prefix=" (assoc-ref outputs "out"))
+                    (string-append "--debug")
+                    (string-append "--with-xml2-prefix="
+                                   (assoc-ref inputs "libxml2"))
+                    (string-append "--with-ngs-sdk-prefix="
+                                   (assoc-ref inputs "ngs-sdk"))
+                    (string-append "--with-ngs-java-prefix="
+                                   (assoc-ref inputs "ngs-java"))
+                    (string-append "--with-hdf5-prefix="
+                                   (assoc-ref inputs "hdf5"))))))
+        (alist-cons-after
+         'install 'install-interfaces
+         (lambda* (#:key outputs #:allow-other-keys)
+           ;; Install interface libraries.  On i686 the interface libraries
+           ;; are installed to "linux/gcc/i386", so we need to use the Linux
+           ;; architecture name ("i386") instead of the target system prefix
+           ;; ("i686").
+           (mkdir (string-append (assoc-ref outputs "out") "/ilib"))
+           (copy-recursively (string-append "build/ncbi-vdb/linux/gcc/"
+                                            ,(system->linux-architecture
+                                              (or (%current-target-system)
+                                                  (%current-system)))
+                                            "/rel/ilib")
+                             (string-append (assoc-ref outputs "out")
+                                            "/ilib"))
+           ;; Install interface headers
+           (copy-recursively "interfaces"
+                             (string-append (assoc-ref outputs "out")
+                                            "/include")))
+         %standard-phases))))
+    (inputs
+     `(("libxml2" ,libxml2)
+       ("ngs-sdk" ,ngs-sdk)
+       ("ngs-java" ,ngs-java)
+       ("libmagic" ,file)
+       ("hdf5" ,hdf5)))
+    (native-inputs `(("perl" ,perl)))
+    (home-page "https://github.com/ncbi/ncbi-vdb")
+    (synopsis "Database engine for genetic information")
+    (description
+     "The NCBI-VDB library implements a highly compressed columnar data
+warehousing engine that is most often used to store genetic information.
+Databases are stored in a portable image within the file system, and can be
+accessed/downloaded on demand across HTTP.")
+    (license license:public-domain)))
+
+(define-public plink
+  (package
+    (name "plink")
+    (version "1.07")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (string-append
+             "http://pngu.mgh.harvard.edu/~purcell/plink/dist/plink-"
+             version "-src.zip"))
+       (sha256
+        (base32 "0as8gxm4pjyc8dxmm1sl873rrd7wn5qs0l29nqfnl31x8i467xaa"))
+       (patches (list (search-patch "plink-1.07-unclobber-i.patch")))))
+    (build-system gnu-build-system)
+    (arguments
+     '(#:tests? #f ;no "check" target
+       #:make-flags (list (string-append "LIB_LAPACK="
+                                         (assoc-ref %build-inputs "lapack")
+                                         "/lib/liblapack.so")
+                          "WITH_LAPACK=1"
+                          "FORCE_DYNAMIC=1"
+                          ;; disable phoning home
+                          "WITH_WEBCHECK=")
+       #:phases
+       (modify-phases %standard-phases
+         ;; no "configure" script
+         (delete 'configure)
+         (replace 'install
+                  (lambda* (#:key outputs #:allow-other-keys)
+                    (let ((bin (string-append (assoc-ref outputs "out")
+                                              "/bin/")))
+                      (install-file "plink" bin)
+                      #t))))))
+    (inputs
+     `(("zlib" ,zlib)
+       ("lapack" ,lapack)))
+    (native-inputs
+     `(("unzip" ,unzip)))
+    (home-page "http://pngu.mgh.harvard.edu/~purcell/plink/")
+    (synopsis "Whole genome association analysis toolset")
+    (description
+     "PLINK is a whole genome association analysis toolset, designed to
+perform a range of basic, large-scale analyses in a computationally efficient
+manner.  The focus of PLINK is purely on analysis of genotype/phenotype data,
+so there is no support for steps prior to this (e.g. study design and
+planning, generating genotype or CNV calls from raw data).  Through
+integration with gPLINK and Haploview, there is some support for the
+subsequent visualization, annotation and storage of results.")
+    ;; Code is released under GPLv2, except for fisher.h, which is under
+    ;; LGPLv2.1+
+    (license (list license:gpl2 license:lgpl2.1+))))
+
+(define-public preseq
+  (package
+    (name "preseq")
+    (version "1.0.2")
+    (source (origin
+              (method url-fetch)
+              (uri
+               (string-append "http://smithlabresearch.org/downloads/preseq-"
+                              version ".tar.bz2"))
+              (sha256
+               (base32 "0r7sw07p6nv8ygvc17gd78lisbw5336v3vhs86b5wv8mw3pwqksc"))
+              (patches (list (search-patch "preseq-1.0.2-install-to-PREFIX.patch")
+                             (search-patch "preseq-1.0.2-link-with-libbam.patch")))
+              (modules '((guix build utils)))
+              (snippet
+               ;; Remove bundled samtools.
+               '(delete-file-recursively "preseq-master/samtools"))))
+    (build-system gnu-build-system)
+    (arguments
+     `(#:tests? #f ;no "check" target
+       #:phases
+       (modify-phases %standard-phases
+         (add-after
+          'unpack 'enter-dir
+          (lambda _
+            (chdir "preseq-master")
+            #t))
+         (add-after
+          'enter-dir 'use-samtools-headers
+          (lambda _
+            (substitute* '("smithlab_cpp/SAM.cpp"
+                           "smithlab_cpp/SAM.hpp")
+              (("sam.h") "samtools/sam.h"))
+            #t))
+         (delete 'configure))
+       #:make-flags (list (string-append "PREFIX="
+                                         (assoc-ref %outputs "out"))
+                          (string-append "LIBBAM="
+                                         (assoc-ref %build-inputs "samtools")
+                                         "/lib/libbam.a"))))
+    (inputs
+     `(("gsl" ,gsl)
+       ("samtools" ,samtools-0.1)
+       ("zlib" ,zlib)))
+    (home-page "http://smithlabresearch.org/software/preseq/")
+    (synopsis "Program for analyzing library complexity")
+    (description
+     "The preseq package is aimed at predicting and estimating the complexity
+of a genomic sequencing library, equivalent to predicting and estimating the
+number of redundant reads from a given sequencing depth and how many will be
+expected from additional sequencing using an initial sequencing experiment.
+The estimates can then be used to examine the utility of further sequencing,
+optimize the sequencing depth, or to screen multiple libraries to avoid low
+complexity samples.")
+    (license license:gpl3+)))
+
+(define-public sra-tools
+  (package
+    (name "sra-tools")
+    (version "2.4.5-5")
+    (source
+     (origin
+       (method url-fetch)
+       (uri
+        (string-append "https://github.com/ncbi/sra-tools/archive/"
+                       version ".tar.gz"))
+       (file-name (string-append name "-" version ".tar.gz"))
+       (sha256
+        (base32
+         "11nrnvz7a012f4iryf0wiwrid0h111grsfxbxa9j51h3f2xbvgns"))))
+    (build-system gnu-build-system)
+    (arguments
+     `(#:parallel-build? #f ; not supported
+       #:tests? #f ; no "check" target
+       #:phases
+       (alist-replace
+        'configure
+        (lambda* (#:key inputs outputs #:allow-other-keys)
+          ;; The build system expects a directory containing the sources and
+          ;; raw build output of ncbi-vdb, including files that are not
+          ;; installed.  Since we are building against an installed version of
+          ;; ncbi-vdb, the following modifications are needed.
+          (substitute* "setup/konfigure.perl"
+            ;; Make the configure script look for the "ilib" directory of
+            ;; "ncbi-vdb" without first checking for the existence of a
+            ;; matching library in its "lib" directory.
+            (("^            my \\$f = File::Spec->catdir\\(\\$libdir, \\$lib\\);")
+             "my $f = File::Spec->catdir($ilibdir, $ilib);")
+            ;; Look for interface libraries in ncbi-vdb's "ilib" directory.
+            (("my \\$ilibdir = File::Spec->catdir\\(\\$builddir, 'ilib'\\);")
+             "my $ilibdir = File::Spec->catdir($dir, 'ilib');"))
+
+          ;; The 'configure' script doesn't recognize things like
+          ;; '--enable-fast-install'.
+          (zero? (system*
+                  "./configure"
+                  (string-append "--build-prefix=" (getcwd) "/build")
+                  (string-append "--prefix=" (assoc-ref outputs "out"))
+                  (string-append "--debug")
+                  (string-append "--with-fuse-prefix="
+                                 (assoc-ref inputs "fuse"))
+                  (string-append "--with-magic-prefix="
+                                 (assoc-ref inputs "libmagic"))
+                  ;; TODO: building with libxml2 fails with linker errors
+                  ;; (string-append "--with-xml2-prefix="
+                  ;;                (assoc-ref inputs "libxml2"))
+                  (string-append "--with-ncbi-vdb-sources="
+                                 (assoc-ref inputs "ncbi-vdb"))
+                  (string-append "--with-ncbi-vdb-build="
+                                 (assoc-ref inputs "ncbi-vdb"))
+                  (string-append "--with-ngs-sdk-prefix="
+                                 (assoc-ref inputs "ngs-sdk"))
+                  (string-append "--with-hdf5-prefix="
+                                 (assoc-ref inputs "hdf5")))))
+        %standard-phases)))
+    (native-inputs `(("perl" ,perl)))
+    (inputs
+     `(("ngs-sdk" ,ngs-sdk)
+       ("ncbi-vdb" ,ncbi-vdb)
+       ("libmagic" ,file)
+       ("fuse" ,fuse)
+       ("hdf5" ,hdf5)
+       ("zlib" ,zlib)))
+    (home-page "http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software")
+    (synopsis "Tools and libraries for reading and writing sequencing data")
+    (description
+     "The SRA Toolkit from NCBI is a collection of tools and libraries for
+reading of sequencing files from the Sequence Read Archive (SRA) database and
+writing files into the .sra format.")
+    (license license:public-domain)))
+
 (define-public seqan
   (package
     (name "seqan")
@@ -1166,10 +2708,48 @@ manipulation, online and indexed string search, efficient I/O of
 bioinformatics file formats, sequence alignment, and more.")
     (license license:bsd-3)))
 
+(define-public seqmagick
+  (package
+    (name "seqmagick")
+    (version "0.6.1")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (string-append
+             "https://pypi.python.org/packages/source/s/seqmagick/seqmagick-"
+             version ".tar.gz"))
+       (sha256
+        (base32
+         "0cgn477n74gsl4qdaakrrhi953kcsd4q3ivk2lr18x74s3g4ma1d"))))
+    (build-system python-build-system)
+    (arguments
+     ;; python2 only, see https://github.com/fhcrc/seqmagick/issues/56
+     `(#:python ,python-2
+       #:phases
+       (modify-phases %standard-phases
+         ;; Current test in setup.py does not work as of 0.6.1,
+         ;; so use nose to run tests instead for now. See
+         ;; https://github.com/fhcrc/seqmagick/issues/55
+         (replace 'check (lambda _ (zero? (system* "nosetests")))))))
+    (inputs
+     `(("python-biopython" ,python2-biopython)))
+    (native-inputs
+     `(("python-setuptools" ,python2-setuptools)
+       ("python-nose" ,python2-nose)))
+    (home-page "http://github.com/fhcrc/seqmagick")
+    (synopsis "Tools for converting and modifying sequence files")
+    (description
+     "Bioinformaticians often have to convert sequence files between formats
+and do little manipulations on them, and it's not worth writing scripts for
+that.  Seqmagick is a utility to expose the file format conversion in
+BioPython in a convenient way.  Instead of having a big mess of scripts, there
+is one that takes arguments.")
+    (license license:gpl3)))
+
 (define-public star
   (package
     (name "star")
-    (version "2.4.0j")
+    (version "2.4.2a")
     (source (origin
               (method url-fetch)
               (uri (string-append
@@ -1177,7 +2757,7 @@ bioinformatics file formats, sequence alignment, and more.")
                     version ".tar.gz"))
               (sha256
                (base32
-                "1y3bciych1aw6s7k8sy1saj23dcan9wk4d4f96an499slkxwz712"))
+                "1c3rnm7r5l0kl3d04gl1g7938xqf1c2l0mla87rlplqg1hcns5mc"))
               (modules '((guix build utils)))
               (snippet
                '(substitute* "source/Makefile"
@@ -1193,8 +2773,7 @@ bioinformatics file formats, sequence alignment, and more.")
          'install
          (lambda* (#:key outputs #:allow-other-keys)
            (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
-             (mkdir-p bin)
-             (copy-file "STAR" (string-append bin "STAR"))))
+             (install-file "STAR" bin)))
          (alist-delete
           'configure %standard-phases)))))
     (native-inputs
@@ -1214,109 +2793,60 @@ sequences.")
     ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
     (license license:gpl3+)))
 
-(define-public shogun
+(define-public subread
   (package
-    (name "shogun")
-    (version "4.0.0")
-    (source
-     (origin
-       (method url-fetch)
-       (uri (string-append
-             "ftp://shogun-toolbox.org/shogun/releases/"
-             (version-major+minor version)
-             "/sources/shogun-" version ".tar.bz2"))
-       (sha256
-        (base32
-         "159nlijnb7mnrv9za80wnm1shwvy45hgrqzn51hxy7gw4z6d6fdb"))))
-    (build-system cmake-build-system)
+    (name "subread")
+    (version "1.4.6-p2")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append
+                    "mirror://sourceforge/subread/subread-"
+                    version "-source.tar.gz"))
+              (sha256
+               (base32
+                "06sv9mpcsdj6p68y15d6gi70lca3lxmzk0dn61hg0kfsa7rxmsr3"))))
+    (build-system gnu-build-system)
     (arguments
-     '(#:tests? #f ;no check target
+     `(#:tests? #f ;no "check" target
+      ;; The CC and CCFLAGS variables are set to contain a lot of x86_64
+      ;; optimizations by default, so we override these flags such that x86_64
+      ;; flags are only added when the build target is an x86_64 system.
+       #:make-flags
+       (list (let ((system ,(or (%current-target-system)
+                                (%current-system)))
+                   (flags '("-ggdb" "-fomit-frame-pointer"
+                            "-ffast-math" "-funroll-loops"
+                            "-fmessage-length=0"
+                            "-O9" "-Wall" "-DMAKE_FOR_EXON"
+                            "-DMAKE_STANDALONE"
+                            "-DSUBREAD_VERSION=\\\"${SUBREAD_VERSION}\\\""))
+                   (flags64 '("-mmmx" "-msse" "-msse2" "-msse3")))
+               (if (string-prefix? "x86_64" system)
+                   (string-append "CCFLAGS=" (string-join (append flags flags64)))
+                   (string-append "CCFLAGS=" (string-join flags))))
+             "-f" "Makefile.Linux"
+             "CC=gcc ${CCFLAGS}")
        #:phases
        (alist-cons-after
-        'unpack 'delete-broken-symlinks
-        (lambda _
-          (for-each delete-file '("applications/arts/data"
-                                  "applications/asp/data"
-                                  "applications/easysvm/data"
-                                  "applications/msplicer/data"
-                                  "applications/ocr/data"
-                                  "examples/documented/data"
-                                  "examples/documented/matlab_static"
-                                  "examples/documented/octave_static"
-                                  "examples/undocumented/data"
-                                  "examples/undocumented/matlab_static"
-                                  "examples/undocumented/octave_static"
-                                  "tests/integration/data"
-                                  "tests/integration/matlab_static"
-                                  "tests/integration/octave_static"
-                                  "tests/integration/python_modular/tests"))
-          #t)
-        (alist-cons-after
-         'unpack 'change-R-target-path
+        'unpack 'enter-dir
+        (lambda _ (chdir "src") #t)
+        (alist-replace
+         'install
          (lambda* (#:key outputs #:allow-other-keys)
-           (substitute* '("src/interfaces/r_modular/CMakeLists.txt"
-                          "src/interfaces/r_static/CMakeLists.txt"
-                          "examples/undocumented/r_modular/CMakeLists.txt")
-             (("\\$\\{R_COMPONENT_LIB_PATH\\}")
-              (string-append (assoc-ref outputs "out")
-                             "/lib/R/library/")))
-           #t)
-         (alist-cons-after
-          'unpack 'fix-octave-modules
-          (lambda* (#:key outputs #:allow-other-keys)
-            (substitute* '("src/interfaces/octave_modular/CMakeLists.txt"
-                           "src/interfaces/octave_static/CMakeLists.txt")
-              (("^include_directories\\(\\$\\{OCTAVE_INCLUDE_DIRS\\}")
-               "include_directories(${OCTAVE_INCLUDE_DIRS} ${OCTAVE_INCLUDE_DIRS}/octave"))
-
-            ;; change target directory
-            (substitute* "src/interfaces/octave_modular/CMakeLists.txt"
-              (("\\$\\{OCTAVE_OCT_LOCAL_API_FILE_DIR\\}")
-               (string-append (assoc-ref outputs "out")
-                              "/share/octave/packages")))
-            #t)
-          (alist-cons-before
-           'build 'set-HOME
-           ;; $HOME needs to be set at some point during the build phase
-           (lambda _ (setenv "HOME" "/tmp") #t)
-           %standard-phases))))
-       #:configure-flags
-       (list "-DUSE_SVMLIGHT=OFF" ;disable proprietary SVMLIGHT
-             ;;"-DJavaModular=ON" ;requires unpackaged jblas
-             ;;"-DRubyModular=ON" ;requires unpackaged ruby-narray
-             ;;"-DPerlModular=ON" ;"FindPerlLibs" does not exist
-             ;;"-DLuaModular=ON"  ;fails because lua doesn't build pkgconfig file
-             "-DOctaveModular=ON"
-             "-DOctaveStatic=ON"
-             "-DPythonModular=ON"
-             "-DPythonStatic=ON"
-             "-DRModular=ON"
-             "-DRStatic=ON"
-             "-DCmdLineStatic=ON")))
-    (inputs
-     `(("python" ,python)
-       ("numpy" ,python-numpy)
-       ("r" ,r)
-       ("octave" ,octave)
-       ("swig" ,swig)
-       ("hdf5" ,hdf5)
-       ("atlas" ,atlas)
-       ("arpack" ,arpack-ng)
-       ("lapack" ,lapack)
-       ("glpk" ,glpk)
-       ("libxml2" ,libxml2)
-       ("lzo" ,lzo)
-       ("zlib" ,zlib)))
-    (native-inputs
-     `(("pkg-config" ,pkg-config)))
-    (home-page "http://shogun-toolbox.org/")
-    (synopsis "Machine learning toolbox")
+           (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
+             (mkdir-p bin)
+             (copy-recursively "../bin" bin)))
+         ;; no "configure" script
+         (alist-delete 'configure %standard-phases)))))
+    (inputs `(("zlib" ,zlib)))
+    (home-page "http://bioinf.wehi.edu.au/subread-package/")
+    (synopsis "Tool kit for processing next-gen sequencing data")
     (description
-     "The Shogun Machine learning toolbox provides a wide range of unified and
-efficient Machine Learning (ML) methods.  The toolbox seamlessly allows to
-combine multiple data representations, algorithm classes, and general purpose
-tools.  This enables both rapid prototyping of data pipelines and extensibility
-in terms of new algorithms.")
+     "The subread package contains the following tools: subread aligner, a
+general-purpose read aligner; subjunc aligner: detecting exon-exon junctions
+and mapping RNA-seq reads; featureCounts: counting mapped reads for genomic
+features; exactSNP: a SNP caller that discovers SNPs by testing signals
+against local background noises.")
     (license license:gpl3+)))
 
 (define-public vcftools
@@ -1335,6 +2865,7 @@ in terms of new algorithms.")
     (arguments
      `(#:tests? #f ; no "check" target
        #:make-flags (list
+                     "CFLAGS=-O2" ; override "-m64" flag
                      (string-append "PREFIX=" (assoc-ref %outputs "out"))
                      (string-append "MANDIR=" (assoc-ref %outputs "out")
                                     "/share/man/man1"))
@@ -1358,3 +2889,179 @@ data in the form of VCF files.")
     ;; The license is declared as LGPLv3 in the README and
     ;; at http://vcftools.sourceforge.net/license.html
     (license license:lgpl3)))
+
+(define-public vsearch
+  (package
+    (name "vsearch")
+    (version "1.4.1")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (string-append
+             "https://github.com/torognes/vsearch/archive/v"
+             version ".tar.gz"))
+       (file-name (string-append name "-" version ".tar.gz"))
+       (sha256
+        (base32
+         "0b1359wbzgb2cm04h7dq05v80vik88hnsv298xxd1q1f2q4ydni7"))
+       (modules '((guix build utils)))
+       (snippet
+        '(begin
+           ;; Remove bundled cityhash and '-mtune=native'.
+           (substitute* "src/Makefile.am"
+             (("^AM_CXXFLAGS=-I\\$\\{srcdir\\}/cityhash \
+-O3 -mtune=native -Wall -Wsign-compare")
+              (string-append "AM_CXXFLAGS=-lcityhash"
+                             " -O3 -Wall -Wsign-compare"))
+             (("^__top_builddir__bin_vsearch_SOURCES = cityhash/city.h \\\\")
+              "__top_builddir__bin_vsearch_SOURCES = \\")
+             (("^cityhash/config.h \\\\") "\\")
+             (("^cityhash/city.cc \\\\") "\\"))
+           (substitute* "src/vsearch.h"
+             (("^\\#include \"cityhash/city.h\"")
+              "#include <city.h>"))
+           (delete-file-recursively "src/cityhash")
+           #t))))
+    (build-system gnu-build-system)
+    (arguments
+     `(#:phases
+       (modify-phases %standard-phases
+         (add-before 'configure 'autogen
+                     (lambda _ (zero? (system* "autoreconf" "-vif")))))))
+    (inputs
+     `(("zlib" ,zlib)
+       ("bzip2" ,bzip2)
+       ("cityhash" ,cityhash)))
+    (native-inputs
+     `(("autoconf" ,autoconf)
+       ("automake" ,automake)))
+    (synopsis "Sequence search tools for metagenomics")
+    (description
+     "VSEARCH supports DNA sequence searching, clustering, chimera detection,
+dereplication, pairwise alignment, shuffling, subsampling, sorting and
+masking.  The tool takes advantage of parallelism in the form of SIMD
+vectorization as well as multiple threads to perform accurate alignments at
+high speed.  VSEARCH uses an optimal global aligner (full dynamic programming
+Needleman-Wunsch).")
+    (home-page "https://github.com/torognes/vsearch")
+    ;; vsearch uses non-portable SSE intrinsics so building fails on other
+    ;; platforms.
+    (supported-systems '("x86_64-linux"))
+    ;; Dual licensed; also includes public domain source.
+    (license (list license:gpl3 license:bsd-2))))
+
+(define-public bio-locus
+  (package
+    (name "bio-locus")
+    (version "0.0.7")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (rubygems-uri "bio-locus" version))
+       (sha256
+        (base32
+         "02vmrxyimkj9sahsp4zhfhnmbvz6dbbqz1y01vglf8cbwvkajfl0"))))
+    (build-system ruby-build-system)
+    (native-inputs
+     `(("ruby-rspec" ,ruby-rspec)))
+    (synopsis "Tool for fast querying of genome locations")
+    (description
+     "Bio-locus is a tabix-like tool for fast querying of genome
+locations.  Many file formats in bioinformatics contain records that
+start with a chromosome name and a position for a SNP, or a start-end
+position for indels.  Bio-locus allows users to store this chr+pos or
+chr+pos+alt information in a database.")
+    (home-page "https://github.com/pjotrp/bio-locus")
+    (license license:expat)))
+
+(define-public bio-blastxmlparser
+  (package
+    (name "bio-blastxmlparser")
+    (version "2.0.4")
+    (source (origin
+              (method url-fetch)
+              (uri (rubygems-uri "bio-blastxmlparser" version))
+              (sha256
+               (base32
+                "1wf4qygcmdjgcqm6flmvsagfr1gs9lf63mj32qv3z1f481zc5692"))))
+    (build-system ruby-build-system)
+    (propagated-inputs
+     `(("ruby-bio-logger" ,ruby-bio-logger)
+       ("ruby-nokogiri" ,ruby-nokogiri)))
+    (inputs
+     `(("ruby-rspec" ,ruby-rspec)))
+    (synopsis "Fast big data BLAST XML parser and library")
+    (description
+     "Very fast parallel big-data BLAST XML file parser which can be used as
+command line utility.  Use blastxmlparser to: Parse BLAST XML; filter output;
+generate FASTA, JSON, YAML, RDF, JSON-LD, HTML, CSV, tabular output etc.")
+    (home-page "http://github.com/pjotrp/blastxmlparser")
+    (license license:expat)))
+
+(define-public bioruby
+  (package
+    (name "bioruby")
+    (version "1.5.0")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (rubygems-uri "bio" version))
+       (sha256
+        (base32
+         "01k2fyjl5fpx4zn8g6gqiqvsg2j1fgixrs9p03vzxckynxdq3wmc"))))
+    (build-system ruby-build-system)
+    (propagated-inputs
+     `(("ruby-libxml" ,ruby-libxml)))
+    (native-inputs
+     `(("which" ,which)))  ; required for test phase
+    (arguments
+     `(#:phases
+       (modify-phases %standard-phases
+         (add-before 'build 'patch-test-command
+          (lambda _
+            (substitute* '("test/functional/bio/test_command.rb")
+              (("/bin/sh") (which "sh")))
+            (substitute* '("test/functional/bio/test_command.rb")
+              (("/bin/ls") (which "ls")))
+            (substitute* '("test/functional/bio/test_command.rb")
+              (("which") (which "which")))
+            (substitute* '("test/functional/bio/test_command.rb",
+                           "test/data/command/echoarg2.sh")
+              (("/bin/echo") (which "echo")))
+            #t)))))
+    (synopsis "Ruby library, shell and utilities for bioinformatics")
+    (description "BioRuby comes with a comprehensive set of Ruby development
+tools and libraries for bioinformatics and molecular biology.  BioRuby has
+components for sequence analysis, pathway analysis, protein modelling and
+phylogenetic analysis; it supports many widely used data formats and provides
+easy access to databases, external programs and public web services, including
+BLAST, KEGG, GenBank, MEDLINE and GO.")
+    (home-page "http://bioruby.org/")
+    ;; Code is released under Ruby license, except for setup
+    ;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+)
+    (license (list license:ruby license:lgpl2.1+ license:gpl2+ ))))
+
+(define-public r-qtl
+ (package
+  (name "r-qtl")
+  (version "1.37-11")
+  (source
+   (origin
+    (method url-fetch)
+    (uri (string-append "mirror://cran/src/contrib/qtl_"
+                        version ".tar.gz"))
+    (sha256
+     (base32
+      "0h20d36mww7ljp51pfs66xq33yq4b4fwq9nsh02dpmfhlaxgx1xi"))))
+  (build-system r-build-system)
+  (home-page "http://rqtl.org/")
+  (synopsis "R package for analyzing QTL experiments in genetics")
+  (description "R/qtl is an extension library for the R statistics
+system.  It is used to analyze experimental crosses for identifying
+genes contributing to variation in quantitative traits (so-called
+quantitative trait loci, QTLs).
+
+Using a hidden Markov model, R/qtl allows to estimate genetic maps, to
+identify genotyping errors, and to perform single-QTL and two-QTL,
+two-dimensional genome scans.")
+  (license license:gpl3)))