gnu: Separate Python core packages from the rest.
[jackhill/guix/guix.git] / gnu / packages / machine-learning.scm
index 7790e78..b56468f 100644 (file)
@@ -8,6 +8,8 @@
 ;;; Copyright © 2018 Mark Meyer <mark@ofosos.org>
 ;;; Copyright © 2018 Ben Woodcroft <donttrustben@gmail.com>
 ;;; Copyright © 2018 Fis Trivial <ybbs.daans@hotmail.com>
+;;; Copyright © 2018 Julien Lepiller <julien@lepiller.eu>
+;;; Copyright © 2018 Björn Höfling <bjoern.hoefling@bjoernhoefling.de>
 ;;;
 ;;; This file is part of GNU Guix.
 ;;;
@@ -53,6 +55,7 @@
   #:use-module (gnu packages perl)
   #:use-module (gnu packages pkg-config)
   #:use-module (gnu packages python)
+  #:use-module (gnu packages python-xyz)
   #:use-module (gnu packages statistics)
   #:use-module (gnu packages swig)
   #:use-module (gnu packages xml)
@@ -300,7 +303,9 @@ networks) based on simulation of (stochastic) flow in graphs.")
          "1l5jbhwjpsj38x8b9698hfpkv75h8hn3kj0gihjhn8ym2cwwv110"))))
     (build-system ocaml-build-system)
     (arguments
-     `(#:phases
+     `(#:ocaml ,ocaml-4.02
+       #:findlib ,ocaml4.02-findlib
+       #:phases
        (modify-phases %standard-phases
          (add-before 'configure 'patch-paths
            (lambda _
@@ -328,15 +333,17 @@ algorithm.")
      (origin
        (method url-fetch)
        (uri (string-append
-             "http://www.imbs-luebeck.de/imbs/sites/default/files/u59/"
-             "randomjungle-" version ".tar_.gz"))
+             "https://www.imbs.uni-luebeck.de/fileadmin/files/Software"
+             "/randomjungle/randomjungle-" version ".tar_.gz"))
+       (patches (search-patches "randomjungle-disable-static-build.patch"))
        (sha256
         (base32
          "12c8rf30cla71swx2mf4ww9mfd8jbdw5lnxd7dxhyw1ygrvg6y4w"))))
     (build-system gnu-build-system)
     (arguments
      `(#:configure-flags
-       (list (string-append "--with-boost="
+       (list "--disable-static"
+             (string-append "--with-boost="
                             (assoc-ref %build-inputs "boost")))
        #:phases
        (modify-phases %standard-phases
@@ -356,7 +363,7 @@ algorithm.")
     ;; Non-portable assembly instructions are used so building fails on
     ;; platforms other than x86_64 or i686.
     (supported-systems '("x86_64-linux" "i686-linux"))
-    (home-page "http://www.imbs-luebeck.de/imbs/de/node/227/")
+    (home-page "https://www.imbs.uni-luebeck.de/forschung/software/details.html#c224")
     (synopsis "Implementation of the Random Forests machine learning method")
     (description
      "Random Jungle is an implementation of Random Forests.  It is supposed to
@@ -489,7 +496,7 @@ sample proximities between pairs of cases.")
      `(("python" ,python)
        ("numpy" ,python-numpy)
        ("r-minimal" ,r-minimal)
-       ("octave" ,octave)
+       ("octave" ,octave-cli)
        ("swig" ,swig)
        ("eigen" ,eigen)
        ("hdf5" ,hdf5)
@@ -703,40 +710,39 @@ computing environments.")
 (define-public python-scikit-learn
   (package
     (name "python-scikit-learn")
-    (version "0.19.1")
+    (version "0.20.1")
     (source
      (origin
-       (method url-fetch)
-       (uri (string-append
-             "https://github.com/scikit-learn/scikit-learn/archive/"
-             version ".tar.gz"))
-       (file-name (string-append name "-" version ".tar.gz"))
+       (method git-fetch)
+       (uri (git-reference
+             (url "https://github.com/scikit-learn/scikit-learn.git")
+             (commit version)))
+       (file-name (git-file-name name version))
        (sha256
         (base32
-         "18n8775kyfwbvcjjjzda9c5sqy4737c0hrmj6qj1ps2jmlqzair9"))
-       (patches (search-patches
-                "python-scikit-learn-fix-test-non-determinism.patch"))))
+         "0fkhwg3xn1s7ln9q1szq6kwc4jhwvjh8w4kmv9wcrqy7cq3lbv0d"))))
     (build-system python-build-system)
     (arguments
      `(#:phases
        (modify-phases %standard-phases
-         (delete 'check)
-         (add-after 'install 'check
-           ;; Running tests from the source directory requires
-           ;; an "inplace" build with paths relative to CWD.
-           ;; http://scikit-learn.org/stable/developers/advanced_installation.html#testing
-           ;; Use the installed version instead.
-           (lambda* (#:key inputs outputs #:allow-other-keys)
-             (add-installed-pythonpath inputs outputs)
-             ;; some tests require access to "$HOME"
+         (add-after 'build 'build-ext
+           (lambda _ (invoke "python" "setup.py" "build_ext" "--inplace") #t))
+         (replace 'check
+           (lambda _
+             ;; Restrict OpenBLAS threads to prevent segfaults while testing!
+             (setenv "OPENBLAS_NUM_THREADS" "1")
+
+             ;; Some tests require write access to $HOME.
              (setenv "HOME" "/tmp")
-             ;; Step out of the source directory just to be sure.
-             (chdir "..")
-             (invoke "nosetests" "-v" "sklearn"))))))
+
+             (invoke "pytest" "sklearn" "-m" "not network")))
+         ;; FIXME: This fails with permission denied
+         (delete 'reset-gzip-timestamps))))
     (inputs
      `(("openblas" ,openblas)))
     (native-inputs
-     `(("python-nose" ,python-nose)
+     `(("python-pytest" ,python-pytest)
+       ("python-pandas" ,python-pandas) ;for tests
        ("python-cython" ,python-cython)))
     (propagated-inputs
      `(("python-numpy" ,python-numpy)
@@ -744,8 +750,8 @@ computing environments.")
     (home-page "http://scikit-learn.org/")
     (synopsis "Machine Learning in Python")
     (description
-     "Scikit-learn provides simple and efficient tools for data
-mining and data analysis.")
+     "Scikit-learn provides simple and efficient tools for data mining and
+data analysis.")
     (license license:bsd-3)))
 
 (define-public python2-scikit-learn
@@ -868,3 +874,37 @@ the following advantages:
 such as online, hashing, allreduce, reductions, learning2search, active, and
 interactive learning.")
     (license license:bsd-3)))
+
+(define-public python2-fastlmm
+  (package
+    (name "python2-fastlmm")
+    (version "0.2.21")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (pypi-uri "fastlmm" version ".zip"))
+       (sha256
+        (base32
+         "1q8c34rpmwkfy3r4d5172pzdkpfryj561897z9r3x22gq7813x1m"))))
+    (build-system python-build-system)
+    (arguments
+     `(#:python ,python-2)) ; only Python 2.7 is supported
+    (propagated-inputs
+     `(("python2-numpy" ,python2-numpy)
+       ("python2-scipy" ,python2-scipy)
+       ("python2-matplotlib" ,python2-matplotlib)
+       ("python2-pandas" ,python2-pandas)
+       ("python2-scikit-learn" ,python2-scikit-learn)
+       ("python2-pysnptools" ,python2-pysnptools)))
+    (native-inputs
+     `(("unzip" ,unzip)
+       ("python2-cython" ,python2-cython)
+       ("python2-mock" ,python2-mock)
+       ("python2-nose" ,python2-nose)))
+    (home-page "http://research.microsoft.com/en-us/um/redmond/projects/mscompbio/fastlmm/")
+    (synopsis "Perform genome-wide association studies on large data sets")
+    (description
+     "FaST-LMM, which stands for Factored Spectrally Transformed Linear Mixed
+Models, is a program for performing both single-SNP and SNP-set genome-wide
+association studies (GWAS) on extremely large data sets.")
+    (license license:asl2.0)))