gnu: Replace uses of 'libjpeg' with 'libjpeg-turbo'.
[jackhill/guix/guix.git] / gnu / packages / machine-learning.scm
index 245ccb9..d92a74e 100644 (file)
@@ -11,6 +11,8 @@
 ;;; Copyright © 2018 Julien Lepiller <julien@lepiller.eu>
 ;;; Copyright © 2018 Björn Höfling <bjoern.hoefling@bjoernhoefling.de>
 ;;; Copyright © 2019 Nicolas Goaziou <mail@nicolasgoaziou.fr>
+;;; Copyright © 2019 Guillaume Le Vaillant <glv@posteo.net>
+;;; Copyright © 2019 Brett Gilio <brettg@gnu.org>
 ;;;
 ;;; This file is part of GNU Guix.
 ;;;
@@ -33,6 +35,7 @@
   #:use-module (guix utils)
   #:use-module (guix download)
   #:use-module (guix svn-download)
+  #:use-module (guix build-system asdf)
   #:use-module (guix build-system cmake)
   #:use-module (guix build-system gnu)
   #:use-module (guix build-system ocaml)
   #:use-module (gnu packages dejagnu)
   #:use-module (gnu packages gcc)
   #:use-module (gnu packages glib)
+  #:use-module (gnu packages graphviz)
   #:use-module (gnu packages gstreamer)
   #:use-module (gnu packages image)
   #:use-module (gnu packages linux)
+  #:use-module (gnu packages lisp-xyz)
   #:use-module (gnu packages maths)
   #:use-module (gnu packages mpi)
   #:use-module (gnu packages ocaml)
   #:use-module (gnu packages pkg-config)
   #:use-module (gnu packages protobuf)
   #:use-module (gnu packages python)
+  #:use-module (gnu packages python-science)
   #:use-module (gnu packages python-web)
   #:use-module (gnu packages python-xyz)
   #:use-module (gnu packages serialization)
+  #:use-module (gnu packages sphinx)
   #:use-module (gnu packages statistics)
   #:use-module (gnu packages sqlite)
   #:use-module (gnu packages swig)
       (home-page "http://leenissen.dk/fann/wp/")
       (synopsis "Fast Artificial Neural Network")
       (description
-       "FANN is a free open source neural network library, which implements
-multilayer artificial neural networks in C with support for both fully
-connected and sparsely connected networks.")
+       "FANN is a neural network library, which implements multilayer
+artificial neural networks in C with support for both fully connected and
+sparsely connected networks.")
       (license license:lgpl2.1))))
 
 (define-public libsvm
@@ -191,7 +198,7 @@ classification.")
                 (uri (svn-reference
                       (url "http://svn.code.sf.net/p/ghmm/code/trunk")
                       (revision svn-revision)))
-                (file-name (string-append name "-" version))
+                (file-name (string-append name "-" version "-checkout"))
                 (sha256
                  (base32
                   "0qbq1rqp94l530f043qzp8aw5lj7dng9wq0miffd7spd1ff638wq"))))
@@ -208,8 +215,7 @@ classification.")
              (assoc-ref %standard-phases 'check))
            (add-before 'check 'fix-PYTHONPATH
              (lambda* (#:key inputs outputs #:allow-other-keys)
-               (let ((python-version ((@@ (guix build python-build-system)
-                                           get-python-version)
+               (let ((python-version (python-version
                                       (assoc-ref inputs "python"))))
                  (setenv "PYTHONPATH"
                          (string-append (getenv "PYTHONPATH")
@@ -249,10 +255,7 @@ classification.")
                   (string-append indent
                                  "@unittest.skip(\"Disabled by Guix\")\n"
                                  line)))
-               #t))
-           (add-after 'disable-broken-tests 'autogen
-             (lambda _
-               (invoke "bash" "autogen.sh"))))))
+               #t)))))
       (inputs
        `(("python" ,python-2) ; only Python 2 is supported
          ("libxml2" ,libxml2)))
@@ -775,7 +778,7 @@ than 8 bits, and at the end only some significant 8 bits are kept.")
     (inputs
      `(("giflib" ,giflib)
        ("lapack" ,lapack)
-       ("libjpeg" ,libjpeg)
+       ("libjpeg" ,libjpeg-turbo)
        ("libpng" ,libpng)
        ("libx11" ,libx11)
        ("openblas" ,openblas)
@@ -793,7 +796,7 @@ computing environments.")
 (define-public python-scikit-learn
   (package
     (name "python-scikit-learn")
-    (version "0.20.3")
+    (version "0.20.4")
     (source
      (origin
        (method git-fetch)
@@ -803,7 +806,7 @@ computing environments.")
        (file-name (git-file-name name version))
        (sha256
         (base32
-         "08aaby5zphfxy83mggg35bwyka7wk91l2qijh8kk0bl08dikq8dl"))))
+         "08zbzi8yx5wdlxfx9jap61vg1malc9ajf576w7a0liv6jvvrxlpj"))))
     (build-system python-build-system)
     (arguments
      `(#:phases
@@ -819,8 +822,14 @@ computing environments.")
              (setenv "HOME" "/tmp")
 
              (invoke "pytest" "sklearn" "-m" "not network")))
-         ;; FIXME: This fails with permission denied
-         (delete 'reset-gzip-timestamps))))
+         (add-before 'reset-gzip-timestamps 'make-files-writable
+           (lambda* (#:key outputs #:allow-other-keys)
+             ;; Make sure .gz files are writable so that the
+             ;; 'reset-gzip-timestamps' phase can do its work.
+             (let ((out (assoc-ref outputs "out")))
+               (for-each make-file-writable
+                         (find-files out "\\.gz$"))
+               #t))))))
     (inputs
      `(("openblas" ,openblas)))
     (native-inputs
@@ -1138,16 +1147,16 @@ written in C++.")
            (replace 'configure
              (lambda* (#:key inputs #:allow-other-keys)
                (let ((glib (assoc-ref inputs "glib")))
-                 (setenv "CXXFLAGS" "-std=c++11 -fPIC")
+                 (setenv "CXXFLAGS" "-fPIC")
                  (setenv "CPLUS_INCLUDE_PATH"
                          (string-append glib "/include/glib-2.0:"
                                         glib "/lib/glib-2.0/include:"
                                         (assoc-ref inputs "gstreamer")
-                                        "/include/gstreamer-1.0:"
-                                        (getenv "CPLUS_INCLUDE_PATH"))))
+                                        "/include/gstreamer-1.0")))
                (substitute* "Makefile"
                  (("include \\$\\(KALDI_ROOT\\)/src/kaldi.mk") "")
-                 (("\\$\\(error Cannot find") "#"))))
+                 (("\\$\\(error Cannot find") "#"))
+               #t))
            (add-before 'build 'build-depend
              (lambda* (#:key make-flags #:allow-other-keys)
                (apply invoke "make" "depend" make-flags)))
@@ -1291,20 +1300,20 @@ Python.")
              "-DgRPC_SSL_PROVIDER=package"
              "-DgRPC_PROTOBUF_PROVIDER=package")))
     (inputs
-     `(("c-ares" ,c-ares-next)
+     `(("c-ares" ,c-ares/cmake)
        ("openssl" ,openssl)
        ("zlib" ,zlib)))
     (native-inputs
-     `(("protobuf" ,protobuf-next)
+     `(("protobuf" ,protobuf)
        ("python" ,python-wrapper)))
     (home-page "https://grpc.io")
     (synopsis "High performance universal RPC framework")
-    (description "gRPC is a modern open source high performance @dfn{Remote
-Procedure Call} (RPC) framework that can run in any environment.  It can
-efficiently connect services in and across data centers with pluggable support
-for load balancing, tracing, health checking and authentication.  It is also
-applicable in last mile of distributed computing to connect devices, mobile
-applications and browsers to backend services.")
+    (description "gRPC is a modern high performance @dfn{Remote Procedure Call}
+(RPC) framework that can run in any environment.  It can efficiently connect
+services in and across data centers with pluggable support for load balancing,
+tracing, health checking and authentication.  It is also applicable in last
+mile of distributed computing to connect devices, mobile applications and
+browsers to backend services.")
     (license license:asl2.0)))
 
 ;; Note that Tensorflow includes a "third_party" directory, which seems to not
@@ -1600,9 +1609,12 @@ INSTALL_RPATH " (assoc-ref outputs "out") "/lib)\n")))
                #t))))))
     (native-inputs
      `(("pkg-config" ,pkg-config)
-       ("protobuf:native" ,protobuf-next) ; protoc
-       ("protobuf:src" ,(package-source protobuf-next))
+       ("protobuf:native" ,protobuf-3.6) ; protoc
+       ("protobuf:src" ,(package-source protobuf-3.6))
        ("eigen:src" ,(package-source eigen-for-tensorflow))
+       ;; install_pip_packages.sh wants setuptools 39.1.0 specifically.
+       ("python-setuptools" ,python-setuptools-for-tensorflow)
+
        ;; The commit hashes and URLs for third-party source code are taken
        ;; from "tensorflow/workspace.bzl".
        ("boringssl-src"
@@ -1725,23 +1737,23 @@ INSTALL_RPATH " (assoc-ref outputs "out") "/lib)\n")))
        ("python-gast" ,python-gast)
        ("python-grpcio" ,python-grpcio)
        ("python-numpy" ,python-numpy)
-       ("python-protobuf" ,python-protobuf-next)
+       ("python-protobuf" ,python-protobuf-3.6)
        ("python-six" ,python-six)
        ("python-termcolo" ,python-termcolor)
        ("python-wheel" ,python-wheel)))
     (inputs
-     `(("c-ares" ,c-ares-next)
+     `(("c-ares" ,c-ares)
        ("eigen" ,eigen-for-tensorflow)
        ("gemmlowp" ,gemmlowp-for-tensorflow)
        ("lmdb" ,lmdb)
-       ("libjpeg" ,libjpeg)
+       ("libjpeg" ,libjpeg-turbo)
        ("libpng" ,libpng)
        ("giflib" ,giflib)
        ("grpc" ,grpc)
        ("jsoncpp" ,jsoncpp-for-tensorflow)
        ("snappy" ,snappy)
        ("sqlite" ,sqlite)
-       ("protobuf" ,protobuf-next)
+       ("protobuf" ,protobuf-3.6)
        ("python" ,python-wrapper)
        ("zlib" ,zlib)))
     (home-page "https://tensorflow.org")
@@ -1781,3 +1793,327 @@ explain the predictions of machine learning models (or really the output of
 any function).  It currently contains the interface and IO code from the Shap
 project, and it will potentially also do the same for the Lime project.")
     (license license:expat)))
+
+(define-public python-keras-applications
+  (package
+    (name "python-keras-applications")
+    (version "1.0.8")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (pypi-uri "Keras_Applications" version))
+       (sha256
+        (base32
+         "1rcz31ca4axa6kzhjx4lwqxbg4wvlljkj8qj9a7p9sfd5fhzjyam"))))
+    (build-system python-build-system)
+    ;; The tests require Keras, but this package is needed to build Keras.
+    (arguments '(#:tests? #f))
+    (propagated-inputs
+     `(("python-h5py" ,python-h5py)
+       ("python-numpy" ,python-numpy)))
+    (native-inputs
+     `(("python-pytest" ,python-pytest)
+       ("python-pytest-cov" ,python-pytest-cov)
+       ("python-pytest-pep8" ,python-pytest-pep8)
+       ("python-pytest-xdist" ,python-pytest-xdist)))
+    (home-page "https://github.com/keras-team/keras-applications")
+    (synopsis "Reference implementations of popular deep learning models")
+    (description
+     "This package provides reference implementations of popular deep learning
+models for use with the Keras deep learning framework.")
+    (license license:expat)))
+
+(define-public python-keras-preprocessing
+  (package
+    (name "python-keras-preprocessing")
+    (version "1.1.0")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (pypi-uri "Keras_Preprocessing" version))
+       (sha256
+        (base32
+         "1r98nm4k1svsqjyaqkfk23i31bl1kcfcyp7094yyj3c43phfp3as"))))
+    (build-system python-build-system)
+    (propagated-inputs
+     `(("python-numpy" ,python-numpy)
+       ("python-six" ,python-six)))
+    (native-inputs
+     `(("python-pandas" ,python-pandas)
+       ("python-pillow" ,python-pillow)
+       ("python-pytest" ,python-pytest)
+       ("python-pytest-cov" ,python-pytest-cov)
+       ("python-pytest-xdist" ,python-pytest-xdist)
+       ("tensorflow" ,tensorflow)))
+    (home-page "https://github.com/keras-team/keras-preprocessing/")
+    (synopsis "Data preprocessing and augmentation for deep learning models")
+    (description
+     "Keras Preprocessing is the data preprocessing and data augmentation
+module of the Keras deep learning library.  It provides utilities for working
+with image data, text data, and sequence data.")
+    (license license:expat)))
+
+(define-public python-keras
+  (package
+    (name "python-keras")
+    (version "2.2.4")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (pypi-uri "Keras" version))
+       (patches (search-patches "python-keras-integration-test.patch"))
+       (sha256
+        (base32
+         "1j8bsqzh49vjdxy6l1k4iwax5vpjzniynyd041xjavdzvfii1dlh"))))
+    (build-system python-build-system)
+    (arguments
+     `(#:phases
+       (modify-phases %standard-phases
+         (add-after 'unpack 'remove-tests-for-unavailable-features
+           (lambda _
+             (delete-file "keras/backend/theano_backend.py")
+             (delete-file "keras/backend/cntk_backend.py")
+             (delete-file "tests/keras/backend/backend_test.py")
+
+             ;; FIXME: This doesn't work because Tensorflow is missing the
+             ;; coder ops library.
+             (delete-file "tests/keras/test_callbacks.py")
+             #t))
+         (replace 'check
+           (lambda _
+             ;; These tests attempt to download data files from the internet.
+             (delete-file "tests/integration_tests/test_datasets.py")
+             (delete-file "tests/integration_tests/imagenet_utils_test.py")
+
+             (setenv "PYTHONPATH"
+                     (string-append (getcwd) "/build/lib:"
+                                    (getenv "PYTHONPATH")))
+             (invoke "py.test" "-v"
+                     "-p" "no:cacheprovider"
+                     "--ignore" "keras/utils"))))))
+    (propagated-inputs
+     `(("python-h5py" ,python-h5py)
+       ("python-keras-applications" ,python-keras-applications)
+       ("python-keras-preprocessing" ,python-keras-preprocessing)
+       ("python-numpy" ,python-numpy)
+       ("python-pydot" ,python-pydot)
+       ("python-pyyaml" ,python-pyyaml)
+       ("python-scipy" ,python-scipy)
+       ("python-six" ,python-six)
+       ("tensorflow" ,tensorflow)
+       ("graphviz" ,graphviz)))
+    (native-inputs
+     `(("python-pandas" ,python-pandas)
+       ("python-pytest" ,python-pytest)
+       ("python-pytest-cov" ,python-pytest-cov)
+       ("python-pytest-pep8" ,python-pytest-pep8)
+       ("python-pytest-timeout" ,python-pytest-timeout)
+       ("python-pytest-xdist" ,python-pytest-xdist)
+       ("python-sphinx" ,python-sphinx)
+       ("python-requests" ,python-requests)))
+    (home-page "https://github.com/keras-team/keras")
+    (synopsis "High-level deep learning framework")
+    (description "Keras is a high-level neural networks API, written in Python
+and capable of running on top of TensorFlow.  It was developed with a focus on
+enabling fast experimentation.  Use Keras if you need a deep learning library
+that:
+
+@itemize
+@item Allows for easy and fast prototyping (through user friendliness,
+  modularity, and extensibility).
+@item Supports both convolutional networks and recurrent networks, as well as
+  combinations of the two.
+@item Runs seamlessly on CPU and GPU.
+@end itemize\n")
+    (license license:expat)))
+
+(define-public sbcl-cl-libsvm-format
+  (let ((commit "3300f84fd8d9f5beafc114f543f9d83417c742fb")
+        (revision "0"))
+    (package
+      (name "sbcl-cl-libsvm-format")
+      (version (git-version "0.1.0" revision commit))
+      (source
+       (origin
+         (method git-fetch)
+         (uri (git-reference
+               (url "https://github.com/masatoi/cl-libsvm-format.git")
+               (commit commit)))
+         (file-name (git-file-name name version))
+         (sha256
+          (base32
+           "0284aj84xszhkhlivaigf9qj855fxad3mzmv3zfr0qzb5k0nzwrg"))))
+      (build-system asdf-build-system/sbcl)
+      (native-inputs
+       `(("prove" ,sbcl-prove)
+         ("prove-asdf" ,sbcl-prove-asdf)))
+      (inputs
+       `(("alexandria" ,sbcl-alexandria)))
+      (synopsis "LibSVM data format reader for Common Lisp")
+      (description
+       "This Common Lisp library provides a fast reader for data in LibSVM
+format.")
+      (home-page "https://github.com/masatoi/cl-libsvm-format")
+      (license license:expat))))
+
+(define-public cl-libsvm-format
+  (sbcl-package->cl-source-package sbcl-cl-libsvm-format))
+
+(define-public ecl-cl-libsvm-format
+  (sbcl-package->ecl-package sbcl-cl-libsvm-format))
+
+(define-public sbcl-cl-online-learning
+  (let ((commit "fc7a34f4f161cd1c7dd747d2ed8f698947781423")
+        (revision "0"))
+    (package
+      (name "sbcl-cl-online-learning")
+      (version (git-version "0.5" revision commit))
+      (source
+       (origin
+         (method git-fetch)
+         (uri (git-reference
+               (url "https://github.com/masatoi/cl-online-learning.git")
+               (commit commit)))
+         (file-name (git-file-name name version))
+         (sha256
+          (base32
+           "14x95rlg80ay5hv645ki57pqvy12v28hz4k1w0f6bsfi2rmpxchq"))))
+      (build-system asdf-build-system/sbcl)
+      (native-inputs
+       `(("prove" ,sbcl-prove)
+         ("prove-asdf" ,sbcl-prove-asdf)))
+      (inputs
+       `(("cl-libsvm-format" ,sbcl-cl-libsvm-format)
+         ("cl-store" ,sbcl-cl-store)))
+      (arguments
+       `(;; FIXME: Tests pass but then the check phase crashes
+         #:tests? #f))
+      (synopsis "Online Machine Learning for Common Lisp")
+      (description
+       "This library contains a collection of machine learning algorithms for
+online linear classification written in Common Lisp.")
+      (home-page "https://github.com/masatoi/cl-online-learning")
+      (license license:expat))))
+
+(define-public cl-online-learning
+  (sbcl-package->cl-source-package sbcl-cl-online-learning))
+
+(define-public ecl-cl-online-learning
+  (sbcl-package->ecl-package sbcl-cl-online-learning))
+
+(define-public sbcl-cl-random-forest
+  (let ((commit "85fbdd4596d40e824f70f1b7cf239cf544e49d51")
+        (revision "0"))
+    (package
+      (name "sbcl-cl-random-forest")
+      (version (git-version "0.1" revision commit))
+      (source
+       (origin
+         (method git-fetch)
+         (uri (git-reference
+               (url "https://github.com/masatoi/cl-random-forest.git")
+               (commit commit)))
+         (file-name (git-file-name name version))
+         (sha256
+          (base32
+           "097xv60i1ndz68sg9p4pc7c5gvyp9i1xgw966b4wwfq3x6hbz421"))))
+      (build-system asdf-build-system/sbcl)
+      (native-inputs
+       `(("prove" ,sbcl-prove)
+         ("prove-asdf" ,sbcl-prove-asdf)
+         ("trivial-garbage" ,sbcl-trivial-garbage)))
+      (inputs
+       `(("alexandria" ,sbcl-alexandria)
+         ("cl-libsvm-format" ,sbcl-cl-libsvm-format)
+         ("cl-online-learning" ,sbcl-cl-online-learning)
+         ("lparallel" ,sbcl-lparallel)))
+      (arguments
+       `(;; The tests download data from the Internet
+         #:tests? #f
+         #:phases
+         (modify-phases %standard-phases
+           (add-after 'unpack 'add-sb-cltl2-dependency
+             (lambda _
+               ;; sb-cltl2 is required by lparallel when using sbcl, but it is
+               ;; not loaded automatically.
+               (substitute* "cl-random-forest.asd"
+                 (("\\(in-package :cl-user\\)")
+                  "(in-package :cl-user) #+sbcl (require :sb-cltl2)"))
+               #t)))))
+      (synopsis "Random Forest and Global Refinement for Common Lisp")
+      (description
+       "CL-random-forest is an implementation of Random Forest for multiclass
+classification and univariate regression written in Common Lisp.  It also
+includes an implementation of Global Refinement of Random Forest.")
+      (home-page "https://github.com/masatoi/cl-random-forest")
+      (license license:expat))))
+
+(define-public cl-random-forest
+  (sbcl-package->cl-source-package sbcl-cl-random-forest))
+
+(define-public ecl-cl-random-forest
+  (sbcl-package->ecl-package sbcl-cl-random-forest))
+
+(define-public gloo
+  (let ((version "0.0.0") ; no proper version tag
+        (commit "ca528e32fea9ca8f2b16053cff17160290fc84ce")
+        (revision "0"))
+    (package
+      (name "gloo")
+      (version (git-version version revision commit))
+      (source
+       (origin
+         (method git-fetch)
+         (uri (git-reference
+               (url "https://github.com/facebookincubator/gloo.git")
+               (commit commit)))
+         (file-name (git-file-name name version))
+         (sha256
+          (base32
+           "1q9f80zy75f6njrzrqkmhc0g3qxs4gskr7ns2jdqanxa2ww7a99w"))))
+      (build-system cmake-build-system)
+      (native-inputs
+       `(("googletest" ,googletest)))
+      (arguments
+       `(#:configure-flags '("-DBUILD_TEST=1")
+         #:phases
+         (modify-phases %standard-phases
+           (replace 'check
+             (lambda _
+               (invoke "make" "gloo_test")
+               #t)))))
+      (synopsis "Collective communications library")
+      (description
+       "Gloo is a collective communications library.  It comes with a
+number of collective algorithms useful for machine learning applications.
+These include a barrier, broadcast, and allreduce.")
+      (home-page "https://github.com/facebookincubator/gloo")
+      (license license:bsd-3))))
+
+(define-public python-umap-learn
+  (package
+    (name "python-umap-learn")
+    (version "0.3.10")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (pypi-uri "umap-learn" version))
+       (sha256
+        (base32
+         "02ada2yy6km6zgk2836kg1c97yrcpalvan34p8c57446finnpki1"))))
+    (build-system python-build-system)
+    (native-inputs
+     `(("python-nose" ,python-nose)))
+    (propagated-inputs
+     `(("python-numba" ,python-numba)
+       ("python-numpy" ,python-numpy)
+       ("python-scikit-learn" ,python-scikit-learn)
+       ("python-scipy" ,python-scipy)))
+    (home-page "https://github.com/lmcinnes/umap")
+    (synopsis
+     "Uniform Manifold Approximation and Projection")
+    (description
+     "Uniform Manifold Approximation and Projection is a dimension reduction
+technique that can be used for visualisation similarly to t-SNE, but also for
+general non-linear dimension reduction.")
+    (license license:bsd-3)))