gnu: Separate Python core packages from the rest.
[jackhill/guix/guix.git] / gnu / packages / machine-learning.scm
index 467b44c..b56468f 100644 (file)
@@ -1,5 +1,15 @@
 ;;; GNU Guix --- Functional package management for GNU
-;;; Copyright © 2015 Ricardo Wurmus <rekado@elephly.net>
+;;; Copyright © 2015, 2016, 2017, 2018 Ricardo Wurmus <rekado@elephly.net>
+;;; Copyright © 2016 Efraim Flashner <efraim@flashner.co.il>
+;;; Copyright © 2016, 2017 Marius Bakke <mbakke@fastmail.com>
+;;; Copyright © 2016 Hartmut Goebel <h.goebel@crazy-compilers.com>
+;;; Copyright © 2018 Tobias Geerinckx-Rice <me@tobias.gr>
+;;; Copyright © 2018 Kei Kebreau <kkebreau@posteo.net>
+;;; Copyright © 2018 Mark Meyer <mark@ofosos.org>
+;;; Copyright © 2018 Ben Woodcroft <donttrustben@gmail.com>
+;;; Copyright © 2018 Fis Trivial <ybbs.daans@hotmail.com>
+;;; Copyright © 2018 Julien Lepiller <julien@lepiller.eu>
+;;; Copyright © 2018 Björn Höfling <bjoern.hoefling@bjoernhoefling.de>
 ;;;
 ;;; This file is part of GNU Guix.
 ;;;
   #:use-module (guix packages)
   #:use-module (guix utils)
   #:use-module (guix download)
+  #:use-module (guix svn-download)
   #:use-module (guix build-system cmake)
   #:use-module (guix build-system gnu)
+  #:use-module (guix build-system ocaml)
+  #:use-module (guix build-system python)
+  #:use-module (guix build-system r)
+  #:use-module (guix git-download)
   #:use-module (gnu packages)
+  #:use-module (gnu packages algebra)
+  #:use-module (gnu packages autotools)
   #:use-module (gnu packages boost)
+  #:use-module (gnu packages check)
   #:use-module (gnu packages compression)
+  #:use-module (gnu packages cran)
+  #:use-module (gnu packages dejagnu)
   #:use-module (gnu packages gcc)
+  #:use-module (gnu packages image)
   #:use-module (gnu packages maths)
+  #:use-module (gnu packages mpi)
+  #:use-module (gnu packages ocaml)
+  #:use-module (gnu packages onc-rpc)
+  #:use-module (gnu packages perl)
   #:use-module (gnu packages pkg-config)
   #:use-module (gnu packages python)
+  #:use-module (gnu packages python-xyz)
   #:use-module (gnu packages statistics)
   #:use-module (gnu packages swig)
-  #:use-module (gnu packages xml))
+  #:use-module (gnu packages xml)
+  #:use-module (gnu packages xorg))
+
+(define-public fann
+  ;; The last release is >100 commits behind, so we package from git.
+  (let ((commit "d71d54788bee56ba4cf7522801270152da5209d7"))
+    (package
+      (name "fann")
+      (version (string-append "2.2.0-1." (string-take commit 8)))
+      (source (origin
+                (method git-fetch)
+                (uri (git-reference
+                      (url "https://github.com/libfann/fann.git")
+                      (commit commit)))
+                (file-name (string-append name "-" version "-checkout"))
+                (sha256
+                 (base32
+                  "0ibwpfrjs6q2lijs8slxjgzb2llcl6rk3v2ski4r6215g5jjhg3x"))))
+      (build-system cmake-build-system)
+      (arguments
+       `(#:phases
+         (modify-phases %standard-phases
+           (replace 'check
+             (lambda* (#:key outputs #:allow-other-keys)
+               (let* ((out (assoc-ref outputs "out")))
+                 (with-directory-excursion (string-append (getcwd) "/tests")
+                   (invoke "./fann_tests"))))))))
+      (home-page "http://leenissen.dk/fann/wp/")
+      (synopsis "Fast Artificial Neural Network")
+      (description
+       "FANN is a free open source neural network library, which implements
+multilayer artificial neural networks in C with support for both fully
+connected and sparsely connected networks.")
+      (license license:lgpl2.1))))
 
 (define-public libsvm
   (package
     (name "libsvm")
-    (version "3.20")
+    (version "3.22")
     (source
      (origin
        (method url-fetch)
-       (uri (string-append
-             "https://github.com/cjlin1/libsvm/archive/v"
-             (string-delete #\. version) ".tar.gz"))
-       (file-name (string-append name "-" version ".tar.gz"))
+       (uri (string-append "https://www.csie.ntu.edu.tw/~cjlin/libsvm/"
+                           name "-" version ".tar.gz"))
        (sha256
         (base32
-         "1jpjlql3frjza7zxzrqqr2firh44fjb8fqsdmvz6bjz7sb47zgp4"))))
+         "0zd7s19y5vb7agczl6456bn45cj1y64739sslaskw1qk7dywd0bd"))))
     (build-system gnu-build-system)
     (arguments
      `(#:tests? #f ;no "check" target
        #:phases (modify-phases %standard-phases
                   (delete 'configure)
                   (replace
-                   'install
+                   'install             ; no ‘install’ target
                    (lambda* (#:key outputs #:allow-other-keys)
                      (let* ((out (assoc-ref outputs "out"))
                             (bin (string-append out "/bin/")))
@@ -85,7 +142,7 @@ classification.")
        (modify-phases %standard-phases
          (delete 'configure)
          (replace
-          'install
+          'install                      ; no ‘install’ target
           (lambda* (#:key inputs outputs #:allow-other-keys)
             (let ((site (string-append (assoc-ref outputs "out")
                                        "/lib/python"
@@ -106,6 +163,168 @@ classification.")
      `(("python" ,python)))
     (synopsis "Python bindings of libSVM")))
 
+(define-public ghmm
+  ;; The latest release candidate is several years and a couple of fixes have
+  ;; been published since.  This is why we download the sources from the SVN
+  ;; repository.
+  (let ((svn-revision 2341))
+    (package
+      (name "ghmm")
+      (version (string-append "0.9-rc3-0." (number->string svn-revision)))
+      (source (origin
+                (method svn-fetch)
+                (uri (svn-reference
+                      (url "http://svn.code.sf.net/p/ghmm/code/trunk")
+                      (revision svn-revision)))
+                (file-name (string-append name "-" version))
+                (sha256
+                 (base32
+                  "0qbq1rqp94l530f043qzp8aw5lj7dng9wq0miffd7spd1ff638wq"))))
+      (build-system gnu-build-system)
+      (arguments
+       `(#:imported-modules (,@%gnu-build-system-modules
+                             (guix build python-build-system))
+         #:phases
+         (modify-phases %standard-phases
+           (add-after 'unpack 'enter-dir
+             (lambda _ (chdir "ghmm") #t))
+           (delete 'check)
+           (add-after 'install 'check
+             (assoc-ref %standard-phases 'check))
+           (add-before 'check 'fix-PYTHONPATH
+             (lambda* (#:key inputs outputs #:allow-other-keys)
+               (let ((python-version ((@@ (guix build python-build-system)
+                                           get-python-version)
+                                      (assoc-ref inputs "python"))))
+                 (setenv "PYTHONPATH"
+                         (string-append (getenv "PYTHONPATH")
+                                        ":" (assoc-ref outputs "out")
+                                        "/lib/python" python-version
+                                        "/site-packages")))
+               #t))
+           (add-after 'enter-dir 'fix-runpath
+             (lambda* (#:key outputs #:allow-other-keys)
+               (substitute* "ghmmwrapper/setup.py"
+                 (("^(.*)extra_compile_args = \\[" line indent)
+                  (string-append indent
+                                 "extra_link_args = [\"-Wl,-rpath="
+                                 (assoc-ref outputs "out") "/lib\"],\n"
+                                 line
+                                 "\"-Wl,-rpath="
+                                 (assoc-ref outputs "out")
+                                 "/lib\", ")))
+               #t))
+           (add-after 'enter-dir 'disable-broken-tests
+             (lambda _
+               (substitute* "tests/Makefile.am"
+                 ;; GHMM_SILENT_TESTS is assumed to be a command.
+                 (("TESTS_ENVIRONMENT.*") "")
+                 ;; Do not build broken tests.
+                 (("chmm .*") "")
+                 (("read_fa .*") "")
+                 (("mcmc .*") "")
+                 (("label_higher_order_test.*$")
+                  "label_higher_order_test\n"))
+
+               ;; These Python unittests are broken as there is no gato.
+               ;; See https://sourceforge.net/p/ghmm/support-requests/3/
+               (substitute* "ghmmwrapper/ghmmunittests.py"
+                 (("^(.*)def (testNewXML|testMultipleTransitionClasses|testNewXML)"
+                   line indent)
+                  (string-append indent
+                                 "@unittest.skip(\"Disabled by Guix\")\n"
+                                 line)))
+               #t))
+           (add-after 'disable-broken-tests 'autogen
+             (lambda _
+               (invoke "bash" "autogen.sh"))))))
+      (inputs
+       `(("python" ,python-2) ; only Python 2 is supported
+         ("libxml2" ,libxml2)))
+      (native-inputs
+       `(("pkg-config" ,pkg-config)
+         ("dejagnu" ,dejagnu)
+         ("swig" ,swig)
+         ("autoconf" ,autoconf)
+         ("automake" ,automake)
+         ("libtool" ,libtool)))
+      (home-page "http://ghmm.org")
+      (synopsis "Hidden Markov Model library")
+      (description
+       "The General Hidden Markov Model library (GHMM) is a C library with
+additional Python bindings implementing a wide range of types of @dfn{Hidden
+Markov Models} (HMM) and algorithms: discrete, continuous emissions, basic
+training, HMM clustering, HMM mixtures.")
+      (license license:lgpl2.0+))))
+
+(define-public mcl
+  (package
+    (name "mcl")
+    (version "14.137")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append
+                    "http://micans.org/mcl/src/mcl-"
+                    (string-replace-substring version "." "-")
+                    ".tar.gz"))
+              (sha256
+               (base32
+                "15xlax3z31lsn62vlg94hkm75nm40q4679amnfg13jm8m2bnhy5m"))))
+    (build-system gnu-build-system)
+    (arguments
+     `(#:configure-flags (list "--enable-blast")))
+    (inputs
+     `(("perl" ,perl)))
+    (home-page "http://micans.org/mcl/")
+    (synopsis "Clustering algorithm for graphs")
+    (description
+     "The MCL algorithm is short for the @dfn{Markov Cluster Algorithm}, a
+fast and scalable unsupervised cluster algorithm for graphs (also known as
+networks) based on simulation of (stochastic) flow in graphs.")
+    ;; In the LICENCE file and web page it says "The software is licensed
+    ;; under the GNU General Public License, version 3.", but in several of
+    ;; the source code files it suggests GPL3 or later.
+    ;; http://listserver.ebi.ac.uk/pipermail/mcl-users/2016/000376.html
+    (license license:gpl3)))
+
+(define-public ocaml-mcl
+  (package
+    (name "ocaml-mcl")
+    (version "12-068oasis4")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (string-append
+             "https://github.com/fhcrc/mcl/archive/"
+             version ".tar.gz"))
+       (file-name (string-append name "-" version ".tar.gz"))
+       (sha256
+        (base32
+         "1l5jbhwjpsj38x8b9698hfpkv75h8hn3kj0gihjhn8ym2cwwv110"))))
+    (build-system ocaml-build-system)
+    (arguments
+     `(#:ocaml ,ocaml-4.02
+       #:findlib ,ocaml4.02-findlib
+       #:phases
+       (modify-phases %standard-phases
+         (add-before 'configure 'patch-paths
+           (lambda _
+             (substitute* "configure"
+               (("SHELL = /bin/sh") (string-append "SHELL = "(which "sh"))))
+             (substitute* "setup.ml"
+               (("LDFLAGS=-fPIC")
+                (string-append "LDFLAGS=-fPIC\"; \"SHELL=" (which "sh"))))
+             #t)))))
+    (home-page "https://github.com/fhcrc/mcl")
+    (synopsis "OCaml wrappers around MCL")
+    (description
+     "This package provides OCaml bindings for the MCL graph clustering
+algorithm.")
+    (license license:gpl3)))
+
+(define-public ocaml4.01-mcl
+  (package-with-ocaml4.01 ocaml-mcl))
+
 (define-public randomjungle
   (package
     (name "randomjungle")
@@ -114,15 +333,17 @@ classification.")
      (origin
        (method url-fetch)
        (uri (string-append
-             "http://www.imbs-luebeck.de/imbs/sites/default/files/u59/"
-             "randomjungle-" version ".tar_.gz"))
+             "https://www.imbs.uni-luebeck.de/fileadmin/files/Software"
+             "/randomjungle/randomjungle-" version ".tar_.gz"))
+       (patches (search-patches "randomjungle-disable-static-build.patch"))
        (sha256
         (base32
          "12c8rf30cla71swx2mf4ww9mfd8jbdw5lnxd7dxhyw1ygrvg6y4w"))))
     (build-system gnu-build-system)
     (arguments
      `(#:configure-flags
-       (list (string-append "--with-boost="
+       (list "--disable-static"
+             (string-append "--with-boost="
                             (assoc-ref %build-inputs "boost")))
        #:phases
        (modify-phases %standard-phases
@@ -137,8 +358,12 @@ classification.")
        ("libxml2" ,libxml2)
        ("zlib" ,zlib)))
     (native-inputs
-     `(("gfortran" ,gfortran)))
-    (home-page "http://www.imbs-luebeck.de/imbs/de/node/227/")
+     `(("gfortran" ,gfortran)
+       ("gfortran:lib" ,gfortran "lib")))
+    ;; Non-portable assembly instructions are used so building fails on
+    ;; platforms other than x86_64 or i686.
+    (supported-systems '("x86_64-linux" "i686-linux"))
+    (home-page "https://www.imbs.uni-luebeck.de/forschung/software/details.html#c224")
     (synopsis "Implementation of the Random Forests machine learning method")
     (description
      "Random Jungle is an implementation of Random Forests.  It is supposed to
@@ -152,7 +377,7 @@ sample proximities between pairs of cases.")
 (define-public shogun
   (package
     (name "shogun")
-    (version "4.0.0")
+    (version "6.1.3")
     (source
      (origin
        (method url-fetch)
@@ -162,7 +387,7 @@ sample proximities between pairs of cases.")
              "/sources/shogun-" version ".tar.bz2"))
        (sha256
         (base32
-         "159nlijnb7mnrv9za80wnm1shwvy45hgrqzn51hxy7gw4z6d6fdb"))
+         "1rn9skm3nw6hr7mr3lgp2gfqhi7ii0lyxck7qmqnf8avq349s5jp"))
        (modules '((guix build utils)
                   (ice-9 rdelim)))
        (snippet
@@ -194,80 +419,86 @@ sample proximities between pairs of cases.")
                                    (and skipping? (not skip-next?)))
                            (display line out))
                          (loop (read-line in 'concat) skip-next?)))))))
-           (for-each delete-ifdefs (find-files "src/shogun/kernel/"
-                                               "^Kernel\\.(cpp|h)"))))))
+           (for-each delete-ifdefs
+                     (append
+                      (find-files "src/shogun/classifier/mkl"
+                                  "^MKLClassification\\.cpp")
+                      (find-files "src/shogun/classifier/svm"
+                                  "^SVMLightOneClass\\.(cpp|h)")
+                      (find-files "src/shogun/multiclass"
+                                  "^ScatterSVM\\.(cpp|h)")
+                      (find-files "src/shogun/kernel/"
+                                  "^(Kernel|CombinedKernel|ProductKernel)\\.(cpp|h)")
+                      (find-files "src/shogun/regression/svr"
+                                  "^(MKLRegression|SVRLight)\\.(cpp|h)")
+                      (find-files "src/shogun/transfer/domain_adaptation"
+                                  "^DomainAdaptationSVM\\.(cpp|h)")))
+           #t))))
     (build-system cmake-build-system)
     (arguments
      '(#:tests? #f ;no check target
        #:phases
-       (alist-cons-after
-        'unpack 'delete-broken-symlinks
-        (lambda _
-          (for-each delete-file '("applications/arts/data"
-                                  "applications/asp/data"
-                                  "applications/easysvm/data"
-                                  "applications/msplicer/data"
-                                  "applications/ocr/data"
-                                  "examples/documented/data"
-                                  "examples/documented/matlab_static"
-                                  "examples/documented/octave_static"
-                                  "examples/undocumented/data"
-                                  "examples/undocumented/matlab_static"
-                                  "examples/undocumented/octave_static"
-                                  "tests/integration/data"
-                                  "tests/integration/matlab_static"
-                                  "tests/integration/octave_static"
-                                  "tests/integration/python_modular/tests"))
-          #t)
-        (alist-cons-after
-         'unpack 'change-R-target-path
-         (lambda* (#:key outputs #:allow-other-keys)
-           (substitute* '("src/interfaces/r_modular/CMakeLists.txt"
-                          "src/interfaces/r_static/CMakeLists.txt"
-                          "examples/undocumented/r_modular/CMakeLists.txt")
-             (("\\$\\{R_COMPONENT_LIB_PATH\\}")
-              (string-append (assoc-ref outputs "out")
-                             "/lib/R/library/")))
-           #t)
-         (alist-cons-after
-          'unpack 'fix-octave-modules
-          (lambda* (#:key outputs #:allow-other-keys)
-            (substitute* '("src/interfaces/octave_modular/CMakeLists.txt"
-                           "src/interfaces/octave_static/CMakeLists.txt")
-              (("^include_directories\\(\\$\\{OCTAVE_INCLUDE_DIRS\\}")
-               "include_directories(${OCTAVE_INCLUDE_DIRS} ${OCTAVE_INCLUDE_DIRS}/octave"))
-
-            ;; change target directory
-            (substitute* "src/interfaces/octave_modular/CMakeLists.txt"
-              (("\\$\\{OCTAVE_OCT_LOCAL_API_FILE_DIR\\}")
-               (string-append (assoc-ref outputs "out")
-                              "/share/octave/packages")))
-            #t)
-          (alist-cons-before
-           'build 'set-HOME
+       (modify-phases %standard-phases
+         (add-after 'unpack 'delete-broken-symlinks
+           (lambda _
+             (for-each delete-file '("applications/arts/data"
+                                     "applications/asp/data"
+                                     "applications/easysvm/data"
+                                     "applications/msplicer/data"
+                                     "applications/ocr/data"
+                                     "examples/meta/data"
+                                     "examples/undocumented/data"))
+             #t))
+         (add-after 'unpack 'change-R-target-path
+           (lambda* (#:key outputs #:allow-other-keys)
+             (substitute* '("src/interfaces/r/CMakeLists.txt"
+                            "examples/meta/r/CMakeLists.txt")
+               (("\\$\\{R_COMPONENT_LIB_PATH\\}")
+                (string-append (assoc-ref outputs "out")
+                               "/lib/R/library/")))
+             #t))
+         (add-after 'unpack 'fix-octave-modules
+           (lambda* (#:key outputs #:allow-other-keys)
+             (substitute* "src/interfaces/octave/CMakeLists.txt"
+               (("^include_directories\\(\\$\\{OCTAVE_INCLUDE_DIRS\\}")
+                "include_directories(${OCTAVE_INCLUDE_DIRS} ${OCTAVE_INCLUDE_DIRS}/octave")
+               ;; change target directory
+               (("\\$\\{OCTAVE_OCT_LOCAL_API_FILE_DIR\\}")
+                (string-append (assoc-ref outputs "out")
+                               "/share/octave/packages")))
+             (substitute* '("src/interfaces/octave/swig_typemaps.i"
+                            "src/interfaces/octave/sg_print_functions.cpp")
+               ;; "octave/config.h" and "octave/oct-obj.h" deprecated in Octave.
+               (("octave/config\\.h") "octave/octave-config.h")
+               (("octave/oct-obj.h") "octave/ovl.h"))
+             #t))
+         (add-after 'unpack 'move-rxcpp
+           (lambda* (#:key inputs #:allow-other-keys)
+             (let ((rxcpp-dir "shogun/third-party/rxcpp"))
+               (mkdir-p rxcpp-dir)
+               (install-file (assoc-ref inputs "rxcpp") rxcpp-dir)
+               #t)))
+         (add-before 'build 'set-HOME
            ;; $HOME needs to be set at some point during the build phase
-           (lambda _ (setenv "HOME" "/tmp") #t)
-           %standard-phases))))
+           (lambda _ (setenv "HOME" "/tmp") #t)))
        #:configure-flags
        (list "-DCMAKE_BUILD_WITH_INSTALL_RPATH=TRUE"
              "-DUSE_SVMLIGHT=OFF" ;disable proprietary SVMLIGHT
-             ;;"-DJavaModular=ON" ;requires unpackaged jblas
-             ;;"-DRubyModular=ON" ;requires unpackaged ruby-narray
-             ;;"-DPerlModular=ON" ;"FindPerlLibs" does not exist
-             ;;"-DLuaModular=ON"  ;fails because lua doesn't build pkgconfig file
-             "-DOctaveModular=ON"
-             "-DOctaveStatic=ON"
-             "-DPythonModular=ON"
-             "-DPythonStatic=ON"
-             "-DRModular=ON"
-             "-DRStatic=ON"
-             "-DCmdLineStatic=ON")))
+             "-DBUILD_META_EXAMPLES=OFF" ;requires unpackaged ctags
+             ;;"-DINTERFACE_JAVA=ON" ;requires unpackaged jblas
+             ;;"-DINTERFACE_RUBY=ON" ;requires unpackaged ruby-narray
+             ;;"-DINTERFACE_PERL=ON" ;"FindPerlLibs" does not exist
+             ;;"-DINTERFACE_LUA=ON"  ;fails because lua doesn't build pkgconfig file
+             "-DINTERFACE_OCTAVE=ON"
+             "-DINTERFACE_PYTHON=ON"
+             "-DINTERFACE_R=ON")))
     (inputs
      `(("python" ,python)
        ("numpy" ,python-numpy)
-       ("r" ,r)
-       ("octave" ,octave)
+       ("r-minimal" ,r-minimal)
+       ("octave" ,octave-cli)
        ("swig" ,swig)
+       ("eigen" ,eigen)
        ("hdf5" ,hdf5)
        ("atlas" ,atlas)
        ("arpack" ,arpack-ng)
@@ -277,7 +508,11 @@ sample proximities between pairs of cases.")
        ("lzo" ,lzo)
        ("zlib" ,zlib)))
     (native-inputs
-     `(("pkg-config" ,pkg-config)))
+     `(("pkg-config" ,pkg-config)
+       ("rxcpp" ,rxcpp)))
+    ;; Non-portable SSE instructions are used so building fails on platforms
+    ;; other than x86_64.
+    (supported-systems '("x86_64-linux"))
     (home-page "http://shogun-toolbox.org/")
     (synopsis "Machine learning toolbox")
     (description
@@ -287,3 +522,389 @@ combine multiple data representations, algorithm classes, and general purpose
 tools.  This enables both rapid prototyping of data pipelines and extensibility
 in terms of new algorithms.")
     (license license:gpl3+)))
+
+(define-public rxcpp
+  (package
+    (name "rxcpp")
+    (version "4.0.0")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (string-append "https://github.com/ReactiveX/RxCpp/archive/v"
+                           version ".tar.gz"))
+       (sha256
+        (base32
+         "0y2isr8dy2n1yjr9c5570kpc9lvdlch6jv0jvw000amwn5d3krsh"))
+       (file-name (string-append name "-" version ".tar.gz"))))
+    (build-system cmake-build-system)
+    (arguments
+     `(#:phases
+       (modify-phases %standard-phases
+         (add-after 'unpack 'remove-werror
+           (lambda _
+             (substitute* (find-files ".")
+               (("-Werror") ""))
+             #t))
+         (replace 'check
+           (lambda _
+             (invoke "ctest"))))))
+    (native-inputs
+     `(("catch" ,catch-framework)))
+    (home-page "http://reactivex.io/")
+    (synopsis "Reactive Extensions for C++")
+    (description
+     "The Reactive Extensions for C++ (RxCpp) is a library of algorithms for
+values-distributed-in-time.  ReactiveX is a library for composing asynchronous
+and event-based programs by using observable sequences.
+
+It extends the observer pattern to support sequences of data and/or events and
+adds operators that allow you to compose sequences together declaratively while
+abstracting away concerns about things like low-level threading,
+synchronization, thread-safety, concurrent data structures, and non-blocking
+I/O.")
+    (license license:asl2.0)))
+
+(define-public r-adaptivesparsity
+  (package
+    (name "r-adaptivesparsity")
+    (version "1.6")
+    (source (origin
+              (method url-fetch)
+              (uri (cran-uri "AdaptiveSparsity" version))
+              (sha256
+               (base32
+                "0imr5m8mll9j6n4icsv6z9rl5kbnwsp9wvzrg7n90nnmcxq2cz91"))))
+    (properties
+     `((upstream-name . "AdaptiveSparsity")))
+    (build-system r-build-system)
+    (arguments
+     `(#:phases
+       (modify-phases %standard-phases
+         (add-after 'unpack 'link-against-armadillo
+           (lambda _
+             (substitute* "src/Makevars"
+               (("PKG_LIBS=" prefix)
+                (string-append prefix "-larmadillo"))))))))
+    (propagated-inputs
+     `(("r-mass" ,r-mass)
+       ("r-matrix" ,r-matrix)
+       ("r-rcpp" ,r-rcpp)
+       ("r-rcpparmadillo" ,r-rcpparmadillo)))
+    (inputs
+     `(("armadillo" ,armadillo)))
+    (home-page "https://cran.r-project.org/web/packages/AdaptiveSparsity")
+    (synopsis "Adaptive sparsity models")
+    (description
+     "This package implements the Figueiredo machine learning algorithm for
+adaptive sparsity and the Wong algorithm for adaptively sparse gaussian
+geometric models.")
+    (license license:lgpl3+)))
+
+(define-public r-kernlab
+  (package
+    (name "r-kernlab")
+    (version "0.9-27")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (cran-uri "kernlab" version))
+       (sha256
+        (base32
+         "1m0xqf6gyvwayz7w3c83y32ayvnlz0jicj8ijk808zq9sh7dbbgn"))))
+    (build-system r-build-system)
+    (home-page "https://cran.r-project.org/web/packages/kernlab")
+    (synopsis "Kernel-based machine learning tools")
+    (description
+     "This package provides kernel-based machine learning methods for
+classification, regression, clustering, novelty detection, quantile regression
+and dimensionality reduction.  Among other methods @code{kernlab} includes
+Support Vector Machines, Spectral Clustering, Kernel PCA, Gaussian Processes
+and a QP solver.")
+    (license license:gpl2)))
+
+(define-public dlib
+  (package
+    (name "dlib")
+    (version "19.7")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append
+                    "http://dlib.net/files/dlib-" version ".tar.bz2"))
+              (sha256
+               (base32
+                "1mljz02kwkrbggyncxv5fpnyjdybw2qihaacb3js8yfkw12vwpc2"))
+              (modules '((guix build utils)))
+              (snippet
+               '(begin
+                  ;; Delete ~13MB of bundled dependencies.
+                  (delete-file-recursively "dlib/external")
+                  (delete-file-recursively "docs/dlib/external")
+                  #t))))
+    (build-system cmake-build-system)
+    (arguments
+     `(#:phases
+       (modify-phases %standard-phases
+         (add-after 'unpack 'disable-asserts
+           (lambda _
+             ;; config.h recommends explicitly enabling or disabling asserts
+             ;; when building as a shared library. By default neither is set.
+             (substitute* "dlib/config.h"
+               (("^//#define DLIB_DISABLE_ASSERTS") "#define DLIB_DISABLE_ASSERTS"))
+             #t))
+         (add-after 'disable-asserts 'disable-failing-tests
+           (lambda _
+             ;; One test times out on MIPS, so we need to disable it.
+             ;; Others are flaky on some platforms.
+             (let* ((system ,(or (%current-target-system)
+                                 (%current-system)))
+                    (disabled-tests (cond
+                                     ((string-prefix? "mips64" system)
+                                      '("object_detector" ; timeout
+                                        "data_io"))
+                                     ((string-prefix? "armhf" system)
+                                      '("learning_to_track"))
+                                     ((string-prefix? "i686" system)
+                                      '("optimization"))
+                                     (else '()))))
+               (for-each
+                (lambda (test)
+                  (substitute* "dlib/test/makefile"
+                    (((string-append "SRC \\+= " test "\\.cpp")) "")))
+                disabled-tests)
+               #t)))
+         (replace 'check
+           (lambda _
+             ;; No test target, so we build and run the unit tests here.
+             (let ((test-dir (string-append "../dlib-" ,version "/dlib/test")))
+               (with-directory-excursion test-dir
+                 (invoke "make" "-j" (number->string (parallel-job-count)))
+                 (invoke "./dtest" "--runall"))
+               #t)))
+         (add-after 'install 'delete-static-library
+           (lambda* (#:key outputs #:allow-other-keys)
+             (delete-file (string-append (assoc-ref outputs "out")
+                                         "/lib/libdlib.a"))
+             #t)))))
+    (native-inputs
+     `(("pkg-config" ,pkg-config)
+       ;; For tests.
+       ("libnsl" ,libnsl)))
+    (inputs
+     `(("giflib" ,giflib)
+       ("lapack" ,lapack)
+       ("libjpeg" ,libjpeg)
+       ("libpng" ,libpng)
+       ("libx11" ,libx11)
+       ("openblas" ,openblas)
+       ("zlib" ,zlib)))
+    (synopsis
+     "Toolkit for making machine learning and data analysis applications in C++")
+    (description
+     "Dlib is a modern C++ toolkit containing machine learning algorithms and
+tools.  It is used in both industry and academia in a wide range of domains
+including robotics, embedded devices, mobile phones, and large high performance
+computing environments.")
+    (home-page "http://dlib.net")
+    (license license:boost1.0)))
+
+(define-public python-scikit-learn
+  (package
+    (name "python-scikit-learn")
+    (version "0.20.1")
+    (source
+     (origin
+       (method git-fetch)
+       (uri (git-reference
+             (url "https://github.com/scikit-learn/scikit-learn.git")
+             (commit version)))
+       (file-name (git-file-name name version))
+       (sha256
+        (base32
+         "0fkhwg3xn1s7ln9q1szq6kwc4jhwvjh8w4kmv9wcrqy7cq3lbv0d"))))
+    (build-system python-build-system)
+    (arguments
+     `(#:phases
+       (modify-phases %standard-phases
+         (add-after 'build 'build-ext
+           (lambda _ (invoke "python" "setup.py" "build_ext" "--inplace") #t))
+         (replace 'check
+           (lambda _
+             ;; Restrict OpenBLAS threads to prevent segfaults while testing!
+             (setenv "OPENBLAS_NUM_THREADS" "1")
+
+             ;; Some tests require write access to $HOME.
+             (setenv "HOME" "/tmp")
+
+             (invoke "pytest" "sklearn" "-m" "not network")))
+         ;; FIXME: This fails with permission denied
+         (delete 'reset-gzip-timestamps))))
+    (inputs
+     `(("openblas" ,openblas)))
+    (native-inputs
+     `(("python-pytest" ,python-pytest)
+       ("python-pandas" ,python-pandas) ;for tests
+       ("python-cython" ,python-cython)))
+    (propagated-inputs
+     `(("python-numpy" ,python-numpy)
+       ("python-scipy" ,python-scipy)))
+    (home-page "http://scikit-learn.org/")
+    (synopsis "Machine Learning in Python")
+    (description
+     "Scikit-learn provides simple and efficient tools for data mining and
+data analysis.")
+    (license license:bsd-3)))
+
+(define-public python2-scikit-learn
+  (package-with-python2 python-scikit-learn))
+
+(define-public python-autograd
+  (let* ((commit "442205dfefe407beffb33550846434baa90c4de7")
+         (revision "0")
+         (version (git-version "0.0.0" revision commit)))
+    (package
+      (name "python-autograd")
+      (home-page "https://github.com/HIPS/autograd")
+      (source (origin
+                (method git-fetch)
+                (uri (git-reference
+                      (url home-page)
+                      (commit commit)))
+                (sha256
+                 (base32
+                  "189sv2xb0mwnjawa9z7mrgdglc1miaq93pnck26r28fi1jdwg0z4"))
+                (file-name (git-file-name name version))))
+      (version version)
+      (build-system python-build-system)
+      (native-inputs
+       `(("python-nose" ,python-nose)
+         ("python-pytest" ,python-pytest)))
+      (propagated-inputs
+       `(("python-future" ,python-future)
+         ("python-numpy" ,python-numpy)))
+      (arguments
+       `(#:phases (modify-phases %standard-phases
+                    (replace 'check
+                      (lambda _
+                        (invoke "py.test" "-v"))))))
+      (synopsis "Efficiently computes derivatives of NumPy code")
+      (description "Autograd can automatically differentiate native Python and
+NumPy code.  It can handle a large subset of Python's features, including loops,
+ifs, recursion and closures, and it can even take derivatives of derivatives
+of derivatives.  It supports reverse-mode differentiation
+(a.k.a. backpropagation), which means it can efficiently take gradients of
+scalar-valued functions with respect to array-valued arguments, as well as
+forward-mode differentiation, and the two can be composed arbitrarily.  The
+main intended application of Autograd is gradient-based optimization.")
+      (license license:expat))))
+
+(define-public python2-autograd
+  (package-with-python2 python-autograd))
+
+(define-public lightgbm
+  (package
+    (name "lightgbm")
+    (version "2.0.12")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append
+                    "https://github.com/Microsoft/LightGBM/archive/v"
+                    version ".tar.gz"))
+              (sha256
+               (base32
+                "132zf0yk0545mg72hyzxm102g3hpb6ixx9hnf8zd2k55gas6cjj1"))
+              (file-name (string-append name "-" version ".tar.gz"))))
+    (native-inputs
+     `(("python-pytest" ,python-pytest)
+       ("python-nose" ,python-nose)))
+    (inputs
+     `(("openmpi" ,openmpi)))
+    (propagated-inputs
+     `(("python-numpy" ,python-numpy)
+       ("python-scipy" ,python-scipy)))
+    (arguments
+     `(#:configure-flags
+       '("-DUSE_MPI=ON")
+       #:phases
+       (modify-phases %standard-phases
+         (replace 'check
+           (lambda* (#:key outputs #:allow-other-keys)
+             (with-directory-excursion ,(string-append "../LightGBM-" version)
+               (invoke "pytest" "tests/c_api_test/test_.py")))))))
+    (build-system cmake-build-system)
+    (home-page "https://github.com/Microsoft/LightGBM")
+    (synopsis "Gradient boosting framework based on decision tree algorithms")
+    (description "LightGBM is a gradient boosting framework that uses tree
+based learning algorithms.  It is designed to be distributed and efficient with
+the following advantages:
+
+@itemize
+@item Faster training speed and higher efficiency
+@item Lower memory usage
+@item Better accuracy
+@item Parallel and GPU learning supported (not enabled in this package)
+@item Capable of handling large-scale data
+@end itemize\n")
+    (license license:expat)))
+
+(define-public vowpal-wabbit
+  ;; Language bindings not included.
+  (package
+    (name "vowpal-wabbit")
+    (version "8.5.0")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append
+                    "https://github.com/JohnLangford/vowpal_wabbit/archive/"
+                    version ".tar.gz"))
+              (sha256
+               (base32
+                "0clp2kb7rk5sckhllxjr5a651awf4s8dgzg4659yh4hf5cqnf0gr"))
+              (file-name (string-append name "-" version ".tar.gz"))))
+    (inputs
+     `(("boost" ,boost)
+       ("zlib" ,zlib)))
+    (arguments
+     `(#:configure-flags
+       (list (string-append "--with-boost="
+                            (assoc-ref %build-inputs "boost")))))
+    (build-system gnu-build-system)
+    (home-page "https://github.com/JohnLangford/vowpal_wabbit")
+    (synopsis "Fast machine learning library for online learning")
+    (description "Vowpal Wabbit is a machine learning system with techniques
+such as online, hashing, allreduce, reductions, learning2search, active, and
+interactive learning.")
+    (license license:bsd-3)))
+
+(define-public python2-fastlmm
+  (package
+    (name "python2-fastlmm")
+    (version "0.2.21")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (pypi-uri "fastlmm" version ".zip"))
+       (sha256
+        (base32
+         "1q8c34rpmwkfy3r4d5172pzdkpfryj561897z9r3x22gq7813x1m"))))
+    (build-system python-build-system)
+    (arguments
+     `(#:python ,python-2)) ; only Python 2.7 is supported
+    (propagated-inputs
+     `(("python2-numpy" ,python2-numpy)
+       ("python2-scipy" ,python2-scipy)
+       ("python2-matplotlib" ,python2-matplotlib)
+       ("python2-pandas" ,python2-pandas)
+       ("python2-scikit-learn" ,python2-scikit-learn)
+       ("python2-pysnptools" ,python2-pysnptools)))
+    (native-inputs
+     `(("unzip" ,unzip)
+       ("python2-cython" ,python2-cython)
+       ("python2-mock" ,python2-mock)
+       ("python2-nose" ,python2-nose)))
+    (home-page "http://research.microsoft.com/en-us/um/redmond/projects/mscompbio/fastlmm/")
+    (synopsis "Perform genome-wide association studies on large data sets")
+    (description
+     "FaST-LMM, which stands for Factored Spectrally Transformed Linear Mixed
+Models, is a program for performing both single-SNP and SNP-set genome-wide
+association studies (GWAS) on extremely large data sets.")
+    (license license:asl2.0)))