gnu: r-bsgenome-celegans-ucsc-ce6: Move to (gnu packages bioconductor).
[jackhill/guix/guix.git] / gnu / packages / bioinformatics.scm
index b9c1ecb..efe4c9b 100644 (file)
@@ -84,6 +84,7 @@
   #:use-module (gnu packages jemalloc)
   #:use-module (gnu packages dlang)
   #:use-module (gnu packages linux)
+  #:use-module (gnu packages lisp)
   #:use-module (gnu packages logging)
   #:use-module (gnu packages machine-learning)
   #:use-module (gnu packages man)
@@ -1384,6 +1385,46 @@ gapped, local, and paired-end alignment modes.")
     (supported-systems '("x86_64-linux"))
     (license license:gpl3+)))
 
+(define-public bowtie1
+  (package
+    (name "bowtie1")
+    (version "1.2.2")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append "mirror://sourceforge/bowtie-bio/bowtie/"
+                                  version "/bowtie-" version "-src.zip"))
+              (sha256
+               (base32
+                "1jl2cj9bz8lwz8dwnxbycn8yp8g4kky62fkcxifyf1ri0y6n2vc0"))
+              (modules '((guix build utils)))
+              (snippet
+               '(substitute* "Makefile"
+                  ;; replace BUILD_HOST and BUILD_TIME for deterministic build
+                  (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
+                  (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))))
+    (build-system gnu-build-system)
+    (arguments
+     '(#:tests? #f                      ; no "check" target
+       #:make-flags
+       (list "all"
+             (string-append "prefix=" (assoc-ref %outputs "out")))
+       #:phases
+       (modify-phases %standard-phases
+         (delete 'configure))))
+    (inputs
+     `(("tbb" ,tbb)
+       ("zlib" ,zlib)))
+    (supported-systems '("x86_64-linux"))
+    (home-page "http://bowtie-bio.sourceforge.net/index.shtml")
+    (synopsis "Fast aligner for short nucleotide sequence reads")
+    (description
+     "Bowtie is a fast, memory-efficient short read aligner.  It aligns short
+DNA sequences (reads) to the human genome at a rate of over 25 million 35-bp
+reads per hour.  Bowtie indexes the genome with a Burrows-Wheeler index to
+keep its memory footprint small: typically about 2.2 GB for the human
+genome (2.9 GB for paired-end).")
+    (license license:artistic2.0)))
+
 (define-public tophat
   (package
     (name "tophat")
@@ -6240,6 +6281,48 @@ sequence.")
     (supported-systems '("i686-linux" "x86_64-linux"))
     (license license:bsd-3)))
 
+(define-public r-scde
+  (package
+    (name "r-scde")
+    (version "1.99.2")
+    (source (origin
+              (method git-fetch)
+              (uri (git-reference
+                    (url "https://github.com/hms-dbmi/scde.git")
+                    (commit version)))
+              (file-name (git-file-name name version))
+              (sha256
+               (base32
+                "10na2gyka24mszdxf92wz9h2c13hdf1ww30c68gfsw53lvvhhhxb"))))
+    (build-system r-build-system)
+    (propagated-inputs
+     `(("r-rcpp" ,r-rcpp)
+       ("r-rcpparmadillo" ,r-rcpparmadillo)
+       ("r-mgcv" ,r-mgcv)
+       ("r-rook" ,r-rook)
+       ("r-rjson" ,r-rjson)
+       ("r-cairo" ,r-cairo)
+       ("r-rcolorbrewer" ,r-rcolorbrewer)
+       ("r-edger" ,r-edger)
+       ("r-quantreg" ,r-quantreg)
+       ("r-nnet" ,r-nnet)
+       ("r-rmtstat" ,r-rmtstat)
+       ("r-extremes" ,r-extremes)
+       ("r-pcamethods" ,r-pcamethods)
+       ("r-biocparallel" ,r-biocparallel)
+       ("r-flexmix" ,r-flexmix)))
+    (home-page "https://hms-dbmi.github.io/scde/")
+    (synopsis "R package for analyzing single-cell RNA-seq data")
+    (description "The SCDE package implements a set of statistical methods for
+analyzing single-cell RNA-seq data.  SCDE fits individual error models for
+single-cell RNA-seq measurements.  These models can then be used for
+assessment of differential expression between groups of cells, as well as
+other types of analysis.  The SCDE package also contains the pagoda framework
+which applies pathway and gene set overdispersion analysis to identify aspects
+of transcriptional heterogeneity among single cells.")
+    ;; See https://github.com/hms-dbmi/scde/issues/38
+    (license license:gpl2)))
+
 (define-public r-centipede
   (package
     (name "r-centipede")
@@ -6264,54 +6347,6 @@ between two different types of motif instances using as much relevant
 information as possible.")
     (license (list license:gpl2+ license:gpl3+))))
 
-(define-public r-copynumber
-  (package
-    (name "r-copynumber")
-    (version "1.22.0")
-    (source (origin
-              (method url-fetch)
-              (uri (bioconductor-uri "copynumber" version))
-              (sha256
-               (base32
-                "0ipwj9i5p1bwhg5d80jdjagm02krpj2v0j47qdgw41h8wncdyal3"))))
-    (build-system r-build-system)
-    (propagated-inputs
-     `(("r-s4vectors" ,r-s4vectors)
-       ("r-iranges" ,r-iranges)
-       ("r-genomicranges" ,r-genomicranges)
-       ("r-biocgenerics" ,r-biocgenerics)))
-    (home-page "https://bioconductor.org/packages/copynumber")
-    (synopsis "Segmentation of single- and multi-track copy number data")
-    (description
-     "This package segments single- and multi-track copy number data by a
-penalized least squares regression method.")
-    (license license:artistic2.0)))
-
-(define-public r-geneplotter
-  (package
-    (name "r-geneplotter")
-    (version "1.60.0")
-    (source
-     (origin
-       (method url-fetch)
-       (uri (bioconductor-uri "geneplotter" version))
-       (sha256
-        (base32
-         "10khr0pznxf3m0f5gzck9ymljrwcv3vamfmpskd51yjh36lhllqz"))))
-    (build-system r-build-system)
-    (propagated-inputs
-     `(("r-annotate" ,r-annotate)
-       ("r-annotationdbi" ,r-annotationdbi)
-       ("r-biobase" ,r-biobase)
-       ("r-biocgenerics" ,r-biocgenerics)
-       ("r-lattice" ,r-lattice)
-       ("r-rcolorbrewer" ,r-rcolorbrewer)))
-    (home-page "https://bioconductor.org/packages/geneplotter")
-    (synopsis "Graphics functions for genomic data")
-    (description
-     "This package provides functions for plotting genomic data.")
-    (license license:artistic2.0)))
-
 (define-public r-genefilter
   (package
     (name "r-genefilter")
@@ -6674,38 +6709,6 @@ annotation infrastructure.")
      "This package provides a pipeline for the analysis of GRO-seq data.")
     (license license:gpl3+)))
 
-(define-public r-txdb-hsapiens-ucsc-hg19-knowngene
-  (package
-    (name "r-txdb-hsapiens-ucsc-hg19-knowngene")
-    (version "3.2.2")
-    (source (origin
-              (method url-fetch)
-              ;; We cannot use bioconductor-uri here because this tarball is
-              ;; located under "data/annotation/" instead of "bioc/".
-              (uri (string-append "https://bioconductor.org/packages/"
-                                  "release/data/annotation/src/contrib"
-                                  "/TxDb.Hsapiens.UCSC.hg19.knownGene_"
-                                  version ".tar.gz"))
-              (sha256
-               (base32
-                "1sajhcqqwazgz2lqbik7rd935i7kpnh08zxbp2ra10j72yqy4g86"))))
-    (properties
-     `((upstream-name . "TxDb.Hsapiens.UCSC.hg19.knownGene")))
-    (build-system r-build-system)
-    ;; As this package provides little more than a very large data file it
-    ;; doesn't make sense to build substitutes.
-    (arguments `(#:substitutable? #f))
-    (propagated-inputs
-     `(("r-genomicfeatures" ,r-genomicfeatures)))
-    (home-page
-     "https://bioconductor.org/packages/TxDb.Hsapiens.UCSC.hg19.knownGene/")
-    (synopsis "Annotation package for human genome in TxDb format")
-    (description
-     "This package provides an annotation database of Homo sapiens genome
-data.  It is derived from the UCSC hg19 genome and based on the \"knownGene\"
-track.  The database is exposed as a @code{TxDb} object.")
-    (license license:artistic2.0)))
-
 (define-public r-sparql
   (package
   (name "r-sparql")
@@ -7127,28 +7130,6 @@ checks on R packages that are to be submitted to the Bioconductor repository.")
 that accept short and long options.")
     (license license:gpl2+)))
 
-(define-public r-dnacopy
-  (package
-    (name "r-dnacopy")
-    (version "1.56.0")
-    (source (origin
-              (method url-fetch)
-              (uri (bioconductor-uri "DNAcopy" version))
-              (sha256
-               (base32
-                "04cqdqxhva66xwh1s2vffi56b9fcrqd4slcrvqasj5lp2rkjli82"))))
-    (properties
-     `((upstream-name . "DNAcopy")))
-    (build-system r-build-system)
-    (inputs
-     `(("gfortran" ,gfortran)))
-    (home-page "https://bioconductor.org/packages/DNAcopy")
-    (synopsis "Implementation of a circular binary segmentation algorithm")
-    (description "This package implements the circular binary segmentation (CBS)
-algorithm to segment DNA copy number data and identify genomic regions with
-abnormal copy number.")
-    (license license:gpl2+)))
-
 (define-public r-s4vectors
   (package
     (name "r-s4vectors")
@@ -7878,37 +7859,6 @@ dependencies between GO terms can be implemented and applied.")
 genome data packages and support for efficient SNP representation.")
     (license license:artistic2.0)))
 
-(define-public r-bsgenome-hsapiens-1000genomes-hs37d5
-  (package
-    (name "r-bsgenome-hsapiens-1000genomes-hs37d5")
-    (version "0.99.1")
-    (source (origin
-              (method url-fetch)
-              ;; We cannot use bioconductor-uri here because this tarball is
-              ;; located under "data/annotation/" instead of "bioc/".
-              (uri (string-append "https://www.bioconductor.org/packages/"
-                                  "release/data/annotation/src/contrib/"
-                                  "BSgenome.Hsapiens.1000genomes.hs37d5_"
-                                  version ".tar.gz"))
-              (sha256
-               (base32
-                "1cg0g5fqmsvwyw2p9hp2yy4ilk21jkbbrnpgqvb5c36ihjwvc7sr"))))
-    (properties
-     `((upstream-name . "BSgenome.Hsapiens.1000genomes.hs37d5")))
-    (build-system r-build-system)
-    ;; As this package provides little more than a very large data file it
-    ;; doesn't make sense to build substitutes.
-    (arguments `(#:substitutable? #f))
-    (propagated-inputs
-     `(("r-bsgenome" ,r-bsgenome)))
-    (home-page
-     "https://www.bioconductor.org/packages/BSgenome.Hsapiens.1000genomes.hs37d5/")
-    (synopsis "Full genome sequences for Homo sapiens")
-    (description
-     "This package provides full genome sequences for Homo sapiens from
-1000genomes phase2 reference genome sequence (hs37d5), based on NCBI GRCh37.")
-    (license license:artistic2.0)))
-
 (define-public r-impute
   (package
     (name "r-impute")
@@ -8155,100 +8105,6 @@ plots the corresponding sequence logo as introduced by Schneider and
 Stephens (1990).")
     (license license:lgpl2.0+)))
 
-(define-public r-bsgenome-hsapiens-ucsc-hg19
-  (package
-    (name "r-bsgenome-hsapiens-ucsc-hg19")
-    (version "1.4.0")
-    (source (origin
-              (method url-fetch)
-              ;; We cannot use bioconductor-uri here because this tarball is
-              ;; located under "data/annotation/" instead of "bioc/".
-              (uri (string-append "https://www.bioconductor.org/packages/"
-                                  "release/data/annotation/src/contrib/"
-                                  "BSgenome.Hsapiens.UCSC.hg19_"
-                                  version ".tar.gz"))
-              (sha256
-               (base32
-                "1y0nqpk8cw5a34sd9hmin3z4v7iqm6hf6l22cl81vlbxqbjibxc8"))))
-    (properties
-     `((upstream-name . "BSgenome.Hsapiens.UCSC.hg19")))
-    (build-system r-build-system)
-    ;; As this package provides little more than a very large data file it
-    ;; doesn't make sense to build substitutes.
-    (arguments `(#:substitutable? #f))
-    (propagated-inputs
-     `(("r-bsgenome" ,r-bsgenome)))
-    (home-page
-     "https://www.bioconductor.org/packages/BSgenome.Hsapiens.UCSC.hg19/")
-    (synopsis "Full genome sequences for Homo sapiens")
-    (description
-     "This package provides full genome sequences for Homo sapiens as provided
-by UCSC (hg19, February 2009) and stored in Biostrings objects.")
-    (license license:artistic2.0)))
-
-(define-public r-bsgenome-mmusculus-ucsc-mm9
-  (package
-    (name "r-bsgenome-mmusculus-ucsc-mm9")
-    (version "1.4.0")
-    (source (origin
-              (method url-fetch)
-              ;; We cannot use bioconductor-uri here because this tarball is
-              ;; located under "data/annotation/" instead of "bioc/".
-              (uri (string-append "https://www.bioconductor.org/packages/"
-                                  "release/data/annotation/src/contrib/"
-                                  "BSgenome.Mmusculus.UCSC.mm9_"
-                                  version ".tar.gz"))
-              (sha256
-               (base32
-                "1birqw30g2azimxpnjfzmkphan7x131yy8b9h85lfz5fjdg7841i"))))
-    (properties
-     `((upstream-name . "BSgenome.Mmusculus.UCSC.mm9")))
-    (build-system r-build-system)
-    ;; As this package provides little more than a very large data file it
-    ;; doesn't make sense to build substitutes.
-    (arguments `(#:substitutable? #f))
-    (propagated-inputs
-     `(("r-bsgenome" ,r-bsgenome)))
-    (home-page
-     "https://www.bioconductor.org/packages/BSgenome.Mmusculus.UCSC.mm9/")
-    (synopsis "Full genome sequences for Mouse")
-    (description
-     "This package provides full genome sequences for Mus musculus (Mouse) as
-provided by UCSC (mm9, July 2007) and stored in Biostrings objects.")
-    (license license:artistic2.0)))
-
-(define-public r-bsgenome-mmusculus-ucsc-mm10
-  (package
-    (name "r-bsgenome-mmusculus-ucsc-mm10")
-    (version "1.4.0")
-    (source (origin
-              (method url-fetch)
-              ;; We cannot use bioconductor-uri here because this tarball is
-              ;; located under "data/annotation/" instead of "bioc/".
-              (uri (string-append "https://www.bioconductor.org/packages/"
-                                  "release/data/annotation/src/contrib/"
-                                  "BSgenome.Mmusculus.UCSC.mm10_"
-                                  version ".tar.gz"))
-              (sha256
-               (base32
-                "12s0nm2na9brjad4rn9l7d3db2aj8qa1xvz0y1k7gk08wayb6bkf"))))
-    (properties
-     `((upstream-name . "BSgenome.Mmusculus.UCSC.mm10")))
-    (build-system r-build-system)
-    ;; As this package provides little more than a very large data file it
-    ;; doesn't make sense to build substitutes.
-    (arguments `(#:substitutable? #f))
-    (propagated-inputs
-     `(("r-bsgenome" ,r-bsgenome)))
-    (home-page
-     "https://www.bioconductor.org/packages/BSgenome.Mmusculus.UCSC.mm10/")
-    (synopsis "Full genome sequences for Mouse")
-    (description
-     "This package provides full genome sequences for Mus
-musculus (Mouse) as provided by UCSC (mm10, December 2011) and stored
-in Biostrings objects.")
-    (license license:artistic2.0)))
-
 (define-public r-txdb-mmusculus-ucsc-mm10-knowngene
   (package
     (name "r-txdb-mmusculus-ucsc-mm10-knowngene")
@@ -8284,38 +8140,6 @@ the TxDb object of Mouse data as provided by UCSC (mm10, December 2011)
 based on the knownGene track.")
     (license license:artistic2.0)))
 
-(define-public r-bsgenome-celegans-ucsc-ce6
-  (package
-    (name "r-bsgenome-celegans-ucsc-ce6")
-    (version "1.4.0")
-    (source (origin
-              (method url-fetch)
-              ;; We cannot use bioconductor-uri here because this tarball is
-              ;; located under "data/annotation/" instead of "bioc/".
-              (uri (string-append "https://www.bioconductor.org/packages/"
-                                  "release/data/annotation/src/contrib/"
-                                  "BSgenome.Celegans.UCSC.ce6_"
-                                  version ".tar.gz"))
-              (sha256
-               (base32
-                "0mqzb353xv2c3m3vkb315dkmnxkgczp7ndnknyhpgjlybyf715v9"))))
-    (properties
-     `((upstream-name . "BSgenome.Celegans.UCSC.ce6")))
-    (build-system r-build-system)
-    ;; As this package provides little more than a very large data file it
-    ;; doesn't make sense to build substitutes.
-    (arguments `(#:substitutable? #f))
-    (propagated-inputs
-     `(("r-bsgenome" ,r-bsgenome)))
-    (home-page
-     "https://www.bioconductor.org/packages/BSgenome.Celegans.UCSC.ce6/")
-    (synopsis "Full genome sequences for Worm")
-    (description
-     "This package provides full genome sequences for Caenorhabditis
-elegans (Worm) as provided by UCSC (ce6, May 2008) and stored in Biostrings
-objects.")
-    (license license:artistic2.0)))
-
 (define-public r-bsgenome-celegans-ucsc-ce10
   (package
     (name "r-bsgenome-celegans-ucsc-ce10")
@@ -11613,7 +11437,7 @@ Browser.")
 (define-public bismark
   (package
     (name "bismark")
-    (version "0.19.1")
+    (version "0.20.1")
     (source
      (origin
        (method git-fetch)
@@ -11623,18 +11447,25 @@ Browser.")
        (file-name (string-append name "-" version "-checkout"))
        (sha256
         (base32
-         "0yb5l36slwg02fp4b1jdlplgljcsxgqfzvzihzdnphd87dghcc84"))
-       (snippet
-        '(begin
-           ;; highcharts.js is non-free software.  The code is available under
-           ;; CC-BY-NC or proprietary licenses only.
-           (delete-file "bismark_sitrep/highcharts.js")
-           #t))))
+         "0xchm3rgilj6vfjnyzfzzymfd7djr64sbrmrvs3njbwi66jqbzw9"))))
     (build-system perl-build-system)
     (arguments
      `(#:tests? #f                      ; there are no tests
+       #:modules ((guix build utils)
+                  (ice-9 popen)
+                  (srfi srfi-26)
+                  (guix build perl-build-system))
        #:phases
        (modify-phases %standard-phases
+         ;; The bundled plotly.js is minified.
+         (add-after 'unpack 'replace-plotly.js
+           (lambda* (#:key inputs #:allow-other-keys)
+             (let* ((file (assoc-ref inputs "plotly.js"))
+                    (installed "plotly/plotly.js"))
+               (let ((minified (open-pipe* OPEN_READ "uglify-js" file)))
+                 (call-with-output-file installed
+                   (cut dump-port minified <>))))
+             #t))
          (delete 'configure)
          (delete 'build)
          (replace 'install
@@ -11653,10 +11484,11 @@ Browser.")
                                "deduplicate_bismark"
                                "filter_non_conversion"
                                "bam2nuc"
-                               "bismark2summary")))
+                               "bismark2summary"
+                               "NOMe_filtering")))
                (substitute* "bismark2report"
-                 (("\\$RealBin/bismark_sitrep")
-                  (string-append share "/bismark_sitrep")))
+                 (("\\$RealBin/plotly")
+                  (string-append share "/plotly")))
                (mkdir-p share)
                (mkdir-p docdir)
                (mkdir-p bin)
@@ -11665,8 +11497,8 @@ Browser.")
                (for-each (lambda (file) (install-file file docdir))
                          docs)
                (copy-recursively "Docs/Images" (string-append docdir "/Images"))
-               (copy-recursively "bismark_sitrep"
-                                 (string-append share "/bismark_sitrep"))
+               (copy-recursively "plotly"
+                                 (string-append share "/plotly"))
 
                ;; Fix references to gunzip
                (substitute* (map (lambda (file)
@@ -11677,7 +11509,18 @@ Browser.")
                                  "/bin/gunzip -c")))
                #t))))))
     (inputs
-     `(("gzip" ,gzip)))
+     `(("gzip" ,gzip)
+       ("perl-carp" ,perl-carp)
+       ("perl-getopt-long" ,perl-getopt-long)))
+    (native-inputs
+     `(("plotly.js"
+        ,(origin
+           (method url-fetch)
+           (uri (string-append "https://raw.githubusercontent.com/plotly/plotly.js/"
+                               "v1.39.4/dist/plotly.js"))
+           (sha256
+            (base32 "138mwsr4nf5qif4mrxx286mpnagxd1xwl6k8aidrjgknaqg88zyr"))))
+       ("uglify-js" ,uglify-js)))
     (home-page "http://www.bioinformatics.babraham.ac.uk/projects/bismark/")
     (synopsis "Map bisulfite treated sequence reads and analyze methylation")
     (description "Bismark is a program to map bisulfite treated sequencing
@@ -12902,7 +12745,7 @@ expression report comparing samples in an easily configurable manner.")
 (define-public pigx-chipseq
   (package
     (name "pigx-chipseq")
-    (version "0.0.21")
+    (version "0.0.31")
     (source (origin
               (method url-fetch)
               (uri (string-append "https://github.com/BIMSBbioinfo/pigx_chipseq/"
@@ -12910,7 +12753,7 @@ expression report comparing samples in an easily configurable manner.")
                                   "/pigx_chipseq-" version ".tar.gz"))
               (sha256
                (base32
-                "0psgdzlnx5xwhlhpss5yvmnl7yv19y9742l97m04f7awd8w74gxs"))))
+                "0l3vd9xwqzap3mmyj8xwqp84kj7scbq308diqnwg2albphl75xqs"))))
     (build-system gnu-build-system)
     ;; parts of the tests rely on access to the network
     (arguments '(#:tests? #f))
@@ -13135,6 +12978,38 @@ descriptive settings file.  The result is a set of comprehensive, interactive
 HTML reports with interesting findings about your samples.")
     (license license:gpl3+)))
 
+(define-public genrich
+  (package
+    (name "genrich")
+    (version "0.5")
+    (source (origin
+              (method git-fetch)
+              (uri (git-reference
+                    (url "https://github.com/jsh58/Genrich.git")
+                    (commit (string-append "v" version))))
+              (sha256
+               (base32
+                "0x0q6z0208n3cxzqjla4rgjqpyqgwpmz27852lcvzkzaigymq4zp"))))
+    (build-system gnu-build-system)
+    (arguments
+     `(#:tests? #f ; there are none
+       #:phases
+       (modify-phases %standard-phases
+         (delete 'configure)
+         (replace 'install
+           (lambda* (#:key outputs #:allow-other-keys)
+             (install-file "Genrich" (string-append (assoc-ref outputs "out") "/bin"))
+             #t)))))
+    (inputs
+     `(("zlib" ,zlib)))
+    (home-page "https://github.com/jsh58/Genrich")
+    (synopsis "Detecting sites of genomic enrichment")
+    (description "Genrich is a peak-caller for genomic enrichment
+assays (e.g. ChIP-seq, ATAC-seq).  It analyzes alignment files generated
+following the assay and produces a file detailing peaks of significant
+enrichment.")
+    (license license:expat)))
+
 (define-public mantis
   (let ((commit "4ffd171632c2cb0056a86d709dfd2bf21bc69b84")
         (revision "1"))
@@ -14425,3 +14300,61 @@ datasets.  A popular implementation of t-SNE uses the Barnes-Hut algorithm to
 approximate the gradient at each iteration of gradient descent.  This package
 is a Cython wrapper for FIt-SNE.")
     (license license:bsd-4)))
+
+(define-public velvet
+  (package
+    (name "velvet")
+    (version "1.2.10")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append "https://www.ebi.ac.uk/~zerbino/velvet/"
+                                  "velvet_" version ".tgz"))
+              (sha256
+               (base32
+                "0h3njwy66p6bx14r3ar1byb0ccaxmxka4c65rn4iybyiqa4d8kc8"))
+              ;; Delete bundled libraries
+              (modules '((guix build utils)))
+              (snippet
+               '(begin
+                  (delete-file "Manual.pdf")
+                  (delete-file-recursively "third-party")
+                  #t))))
+    (build-system gnu-build-system)
+    (arguments
+     `(#:make-flags '("OPENMP=t")
+       #:test-target "test"
+       #:phases
+       (modify-phases %standard-phases
+         (delete 'configure)
+         (add-after 'unpack 'fix-zlib-include
+           (lambda _
+             (substitute* "src/binarySequences.c"
+               (("../third-party/zlib-1.2.3/zlib.h") "zlib.h"))
+             #t))
+         (replace 'install
+           (lambda* (#:key outputs #:allow-other-keys)
+             (let* ((out (assoc-ref outputs "out"))
+                    (bin (string-append out "/bin"))
+                    (doc (string-append out "/share/doc/velvet")))
+               (mkdir-p bin)
+               (mkdir-p doc)
+               (install-file "velveth" bin)
+               (install-file "velvetg" bin)
+               (install-file "Manual.pdf" doc)
+               (install-file "Columbus_manual.pdf" doc)
+               #t))))))
+    (inputs
+     `(("openmpi" ,openmpi)
+       ("zlib" ,zlib)))
+    (native-inputs
+     `(("texlive" ,(texlive-union (list texlive-latex-graphics
+                                        texlive-latex-hyperref)))))
+    (home-page "https://www.ebi.ac.uk/~zerbino/velvet/")
+    (synopsis "Nucleic acid sequence assembler for very short reads")
+    (description
+     "Velvet is a de novo genomic assembler specially designed for short read
+sequencing technologies, such as Solexa or 454.  Velvet currently takes in
+short read sequences, removes errors then produces high quality unique
+contigs.  It then uses paired read information, if available, to retrieve the
+repeated areas between contigs.")
+    (license license:gpl2+)))