X-Git-Url: https://git.hcoop.net/jackhill/guix/guix.git/blobdiff_plain/4adb40bffc0dda8871878283887a0e0cd88d9578..44d10b1f722856ab8e9b942804aa7ef33e2ef739:/gnu/packages/machine-learning.scm diff --git a/gnu/packages/machine-learning.scm b/gnu/packages/machine-learning.scm index e45c1ae3d3..b56468f8c1 100644 --- a/gnu/packages/machine-learning.scm +++ b/gnu/packages/machine-learning.scm @@ -1,9 +1,15 @@ ;;; GNU Guix --- Functional package management for GNU -;;; Copyright © 2015, 2016, 2017 Ricardo Wurmus +;;; Copyright © 2015, 2016, 2017, 2018 Ricardo Wurmus ;;; Copyright © 2016 Efraim Flashner ;;; Copyright © 2016, 2017 Marius Bakke ;;; Copyright © 2016 Hartmut Goebel ;;; Copyright © 2018 Tobias Geerinckx-Rice +;;; Copyright © 2018 Kei Kebreau +;;; Copyright © 2018 Mark Meyer +;;; Copyright © 2018 Ben Woodcroft +;;; Copyright © 2018 Fis Trivial +;;; Copyright © 2018 Julien Lepiller +;;; Copyright © 2018 Björn Höfling ;;; ;;; This file is part of GNU Guix. ;;; @@ -31,7 +37,9 @@ #:use-module (guix build-system ocaml) #:use-module (guix build-system python) #:use-module (guix build-system r) + #:use-module (guix git-download) #:use-module (gnu packages) + #:use-module (gnu packages algebra) #:use-module (gnu packages autotools) #:use-module (gnu packages boost) #:use-module (gnu packages check) @@ -41,15 +49,50 @@ #:use-module (gnu packages gcc) #:use-module (gnu packages image) #:use-module (gnu packages maths) + #:use-module (gnu packages mpi) #:use-module (gnu packages ocaml) + #:use-module (gnu packages onc-rpc) #:use-module (gnu packages perl) #:use-module (gnu packages pkg-config) #:use-module (gnu packages python) + #:use-module (gnu packages python-xyz) #:use-module (gnu packages statistics) #:use-module (gnu packages swig) #:use-module (gnu packages xml) #:use-module (gnu packages xorg)) +(define-public fann + ;; The last release is >100 commits behind, so we package from git. + (let ((commit "d71d54788bee56ba4cf7522801270152da5209d7")) + (package + (name "fann") + (version (string-append "2.2.0-1." (string-take commit 8))) + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/libfann/fann.git") + (commit commit))) + (file-name (string-append name "-" version "-checkout")) + (sha256 + (base32 + "0ibwpfrjs6q2lijs8slxjgzb2llcl6rk3v2ski4r6215g5jjhg3x")))) + (build-system cmake-build-system) + (arguments + `(#:phases + (modify-phases %standard-phases + (replace 'check + (lambda* (#:key outputs #:allow-other-keys) + (let* ((out (assoc-ref outputs "out"))) + (with-directory-excursion (string-append (getcwd) "/tests") + (invoke "./fann_tests")))))))) + (home-page "http://leenissen.dk/fann/wp/") + (synopsis "Fast Artificial Neural Network") + (description + "FANN is a free open source neural network library, which implements +multilayer artificial neural networks in C with support for both fully +connected and sparsely connected networks.") + (license license:lgpl2.1)))) + (define-public libsvm (package (name "libsvm") @@ -194,7 +237,7 @@ classification.") #t)) (add-after 'disable-broken-tests 'autogen (lambda _ - (zero? (system* "bash" "autogen.sh"))))))) + (invoke "bash" "autogen.sh")))))) (inputs `(("python" ,python-2) ; only Python 2 is supported ("libxml2" ,libxml2))) @@ -260,7 +303,9 @@ networks) based on simulation of (stochastic) flow in graphs.") "1l5jbhwjpsj38x8b9698hfpkv75h8hn3kj0gihjhn8ym2cwwv110")))) (build-system ocaml-build-system) (arguments - `(#:phases + `(#:ocaml ,ocaml-4.02 + #:findlib ,ocaml4.02-findlib + #:phases (modify-phases %standard-phases (add-before 'configure 'patch-paths (lambda _ @@ -288,15 +333,17 @@ algorithm.") (origin (method url-fetch) (uri (string-append - "http://www.imbs-luebeck.de/imbs/sites/default/files/u59/" - "randomjungle-" version ".tar_.gz")) + "https://www.imbs.uni-luebeck.de/fileadmin/files/Software" + "/randomjungle/randomjungle-" version ".tar_.gz")) + (patches (search-patches "randomjungle-disable-static-build.patch")) (sha256 (base32 "12c8rf30cla71swx2mf4ww9mfd8jbdw5lnxd7dxhyw1ygrvg6y4w")))) (build-system gnu-build-system) (arguments `(#:configure-flags - (list (string-append "--with-boost=" + (list "--disable-static" + (string-append "--with-boost=" (assoc-ref %build-inputs "boost"))) #:phases (modify-phases %standard-phases @@ -316,7 +363,7 @@ algorithm.") ;; Non-portable assembly instructions are used so building fails on ;; platforms other than x86_64 or i686. (supported-systems '("x86_64-linux" "i686-linux")) - (home-page "http://www.imbs-luebeck.de/imbs/de/node/227/") + (home-page "https://www.imbs.uni-luebeck.de/forschung/software/details.html#c224") (synopsis "Implementation of the Random Forests machine learning method") (description "Random Jungle is an implementation of Random Forests. It is supposed to @@ -330,7 +377,7 @@ sample proximities between pairs of cases.") (define-public shogun (package (name "shogun") - (version "4.0.0") + (version "6.1.3") (source (origin (method url-fetch) @@ -340,7 +387,7 @@ sample proximities between pairs of cases.") "/sources/shogun-" version ".tar.bz2")) (sha256 (base32 - "159nlijnb7mnrv9za80wnm1shwvy45hgrqzn51hxy7gw4z6d6fdb")) + "1rn9skm3nw6hr7mr3lgp2gfqhi7ii0lyxck7qmqnf8avq349s5jp")) (modules '((guix build utils) (ice-9 rdelim))) (snippet @@ -372,8 +419,21 @@ sample proximities between pairs of cases.") (and skipping? (not skip-next?))) (display line out)) (loop (read-line in 'concat) skip-next?))))))) - (for-each delete-ifdefs (find-files "src/shogun/kernel/" - "^Kernel\\.(cpp|h)")))))) + (for-each delete-ifdefs + (append + (find-files "src/shogun/classifier/mkl" + "^MKLClassification\\.cpp") + (find-files "src/shogun/classifier/svm" + "^SVMLightOneClass\\.(cpp|h)") + (find-files "src/shogun/multiclass" + "^ScatterSVM\\.(cpp|h)") + (find-files "src/shogun/kernel/" + "^(Kernel|CombinedKernel|ProductKernel)\\.(cpp|h)") + (find-files "src/shogun/regression/svr" + "^(MKLRegression|SVRLight)\\.(cpp|h)") + (find-files "src/shogun/transfer/domain_adaptation" + "^DomainAdaptationSVM\\.(cpp|h)"))) + #t)))) (build-system cmake-build-system) (arguments '(#:tests? #f ;no check target @@ -386,62 +446,59 @@ sample proximities between pairs of cases.") "applications/easysvm/data" "applications/msplicer/data" "applications/ocr/data" - "examples/documented/data" - "examples/documented/matlab_static" - "examples/documented/octave_static" - "examples/undocumented/data" - "examples/undocumented/matlab_static" - "examples/undocumented/octave_static" - "tests/integration/data" - "tests/integration/matlab_static" - "tests/integration/octave_static" - "tests/integration/python_modular/tests")) + "examples/meta/data" + "examples/undocumented/data")) #t)) (add-after 'unpack 'change-R-target-path (lambda* (#:key outputs #:allow-other-keys) - (substitute* '("src/interfaces/r_modular/CMakeLists.txt" - "src/interfaces/r_static/CMakeLists.txt" - "examples/undocumented/r_modular/CMakeLists.txt") + (substitute* '("src/interfaces/r/CMakeLists.txt" + "examples/meta/r/CMakeLists.txt") (("\\$\\{R_COMPONENT_LIB_PATH\\}") (string-append (assoc-ref outputs "out") "/lib/R/library/"))) #t)) (add-after 'unpack 'fix-octave-modules (lambda* (#:key outputs #:allow-other-keys) - (substitute* '("src/interfaces/octave_modular/CMakeLists.txt" - "src/interfaces/octave_static/CMakeLists.txt") + (substitute* "src/interfaces/octave/CMakeLists.txt" (("^include_directories\\(\\$\\{OCTAVE_INCLUDE_DIRS\\}") - "include_directories(${OCTAVE_INCLUDE_DIRS} ${OCTAVE_INCLUDE_DIRS}/octave")) - - ;; change target directory - (substitute* "src/interfaces/octave_modular/CMakeLists.txt" + "include_directories(${OCTAVE_INCLUDE_DIRS} ${OCTAVE_INCLUDE_DIRS}/octave") + ;; change target directory (("\\$\\{OCTAVE_OCT_LOCAL_API_FILE_DIR\\}") (string-append (assoc-ref outputs "out") "/share/octave/packages"))) + (substitute* '("src/interfaces/octave/swig_typemaps.i" + "src/interfaces/octave/sg_print_functions.cpp") + ;; "octave/config.h" and "octave/oct-obj.h" deprecated in Octave. + (("octave/config\\.h") "octave/octave-config.h") + (("octave/oct-obj.h") "octave/ovl.h")) #t)) + (add-after 'unpack 'move-rxcpp + (lambda* (#:key inputs #:allow-other-keys) + (let ((rxcpp-dir "shogun/third-party/rxcpp")) + (mkdir-p rxcpp-dir) + (install-file (assoc-ref inputs "rxcpp") rxcpp-dir) + #t))) (add-before 'build 'set-HOME ;; $HOME needs to be set at some point during the build phase (lambda _ (setenv "HOME" "/tmp") #t))) #:configure-flags (list "-DCMAKE_BUILD_WITH_INSTALL_RPATH=TRUE" "-DUSE_SVMLIGHT=OFF" ;disable proprietary SVMLIGHT - ;;"-DJavaModular=ON" ;requires unpackaged jblas - ;;"-DRubyModular=ON" ;requires unpackaged ruby-narray - ;;"-DPerlModular=ON" ;"FindPerlLibs" does not exist - ;;"-DLuaModular=ON" ;fails because lua doesn't build pkgconfig file - "-DOctaveModular=ON" - "-DOctaveStatic=ON" - "-DPythonModular=ON" - "-DPythonStatic=ON" - "-DRModular=ON" - "-DRStatic=ON" - "-DCmdLineStatic=ON"))) + "-DBUILD_META_EXAMPLES=OFF" ;requires unpackaged ctags + ;;"-DINTERFACE_JAVA=ON" ;requires unpackaged jblas + ;;"-DINTERFACE_RUBY=ON" ;requires unpackaged ruby-narray + ;;"-DINTERFACE_PERL=ON" ;"FindPerlLibs" does not exist + ;;"-DINTERFACE_LUA=ON" ;fails because lua doesn't build pkgconfig file + "-DINTERFACE_OCTAVE=ON" + "-DINTERFACE_PYTHON=ON" + "-DINTERFACE_R=ON"))) (inputs `(("python" ,python) ("numpy" ,python-numpy) ("r-minimal" ,r-minimal) - ("octave" ,octave) + ("octave" ,octave-cli) ("swig" ,swig) + ("eigen" ,eigen) ("hdf5" ,hdf5) ("atlas" ,atlas) ("arpack" ,arpack-ng) @@ -451,7 +508,8 @@ sample proximities between pairs of cases.") ("lzo" ,lzo) ("zlib" ,zlib))) (native-inputs - `(("pkg-config" ,pkg-config))) + `(("pkg-config" ,pkg-config) + ("rxcpp" ,rxcpp))) ;; Non-portable SSE instructions are used so building fails on platforms ;; other than x86_64. (supported-systems '("x86_64-linux")) @@ -465,16 +523,57 @@ tools. This enables both rapid prototyping of data pipelines and extensibility in terms of new algorithms.") (license license:gpl3+))) +(define-public rxcpp + (package + (name "rxcpp") + (version "4.0.0") + (source + (origin + (method url-fetch) + (uri (string-append "https://github.com/ReactiveX/RxCpp/archive/v" + version ".tar.gz")) + (sha256 + (base32 + "0y2isr8dy2n1yjr9c5570kpc9lvdlch6jv0jvw000amwn5d3krsh")) + (file-name (string-append name "-" version ".tar.gz")))) + (build-system cmake-build-system) + (arguments + `(#:phases + (modify-phases %standard-phases + (add-after 'unpack 'remove-werror + (lambda _ + (substitute* (find-files ".") + (("-Werror") "")) + #t)) + (replace 'check + (lambda _ + (invoke "ctest")))))) + (native-inputs + `(("catch" ,catch-framework))) + (home-page "http://reactivex.io/") + (synopsis "Reactive Extensions for C++") + (description + "The Reactive Extensions for C++ (RxCpp) is a library of algorithms for +values-distributed-in-time. ReactiveX is a library for composing asynchronous +and event-based programs by using observable sequences. + +It extends the observer pattern to support sequences of data and/or events and +adds operators that allow you to compose sequences together declaratively while +abstracting away concerns about things like low-level threading, +synchronization, thread-safety, concurrent data structures, and non-blocking +I/O.") + (license license:asl2.0))) + (define-public r-adaptivesparsity (package (name "r-adaptivesparsity") - (version "1.4") + (version "1.6") (source (origin (method url-fetch) (uri (cran-uri "AdaptiveSparsity" version)) (sha256 (base32 - "1az7isvalf3kmdiycrfl6s9k9xqk22k1mc6rh8v0jmcz402qyq8z")))) + "0imr5m8mll9j6n4icsv6z9rl5kbnwsp9wvzrg7n90nnmcxq2cz91")))) (properties `((upstream-name . "AdaptiveSparsity"))) (build-system r-build-system) @@ -487,7 +586,9 @@ in terms of new algorithms.") (("PKG_LIBS=" prefix) (string-append prefix "-larmadillo")))))))) (propagated-inputs - `(("r-rcpp" ,r-rcpp) + `(("r-mass" ,r-mass) + ("r-matrix" ,r-matrix) + ("r-rcpp" ,r-rcpp) ("r-rcpparmadillo" ,r-rcpparmadillo))) (inputs `(("armadillo" ,armadillo))) @@ -502,14 +603,14 @@ geometric models.") (define-public r-kernlab (package (name "r-kernlab") - (version "0.9-25") + (version "0.9-27") (source (origin (method url-fetch) (uri (cran-uri "kernlab" version)) (sha256 (base32 - "0qnaq9x3j2xc6jrmmd98wc6hkzch487s4p3a9lnc00xvahkhgpmr")))) + "1m0xqf6gyvwayz7w3c83y32ayvnlz0jicj8ijk808zq9sh7dbbgn")))) (build-system r-build-system) (home-page "https://cran.r-project.org/web/packages/kernlab") (synopsis "Kernel-based machine learning tools") @@ -576,15 +677,18 @@ and a QP solver.") ;; No test target, so we build and run the unit tests here. (let ((test-dir (string-append "../dlib-" ,version "/dlib/test"))) (with-directory-excursion test-dir - (and (zero? (system* "make" "-j" (number->string (parallel-job-count)))) - (zero? (system* "./dtest" "--runall"))))))) + (invoke "make" "-j" (number->string (parallel-job-count))) + (invoke "./dtest" "--runall")) + #t))) (add-after 'install 'delete-static-library (lambda* (#:key outputs #:allow-other-keys) (delete-file (string-append (assoc-ref outputs "out") "/lib/libdlib.a")) #t))))) (native-inputs - `(("pkg-config" ,pkg-config))) + `(("pkg-config" ,pkg-config) + ;; For tests. + ("libnsl" ,libnsl))) (inputs `(("giflib" ,giflib) ("lapack" ,lapack) @@ -606,40 +710,39 @@ computing environments.") (define-public python-scikit-learn (package (name "python-scikit-learn") - (version "0.19.1") + (version "0.20.1") (source (origin - (method url-fetch) - (uri (string-append - "https://github.com/scikit-learn/scikit-learn/archive/" - version ".tar.gz")) - (file-name (string-append name "-" version ".tar.gz")) + (method git-fetch) + (uri (git-reference + (url "https://github.com/scikit-learn/scikit-learn.git") + (commit version))) + (file-name (git-file-name name version)) (sha256 (base32 - "18n8775kyfwbvcjjjzda9c5sqy4737c0hrmj6qj1ps2jmlqzair9")) - (patches (search-patches - "python-scikit-learn-fix-test-non-determinism.patch")))) + "0fkhwg3xn1s7ln9q1szq6kwc4jhwvjh8w4kmv9wcrqy7cq3lbv0d")))) (build-system python-build-system) (arguments `(#:phases (modify-phases %standard-phases - (delete 'check) - (add-after 'install 'check - ;; Running tests from the source directory requires - ;; an "inplace" build with paths relative to CWD. - ;; http://scikit-learn.org/stable/developers/advanced_installation.html#testing - ;; Use the installed version instead. - (lambda* (#:key inputs outputs #:allow-other-keys) - (add-installed-pythonpath inputs outputs) - ;; some tests require access to "$HOME" + (add-after 'build 'build-ext + (lambda _ (invoke "python" "setup.py" "build_ext" "--inplace") #t)) + (replace 'check + (lambda _ + ;; Restrict OpenBLAS threads to prevent segfaults while testing! + (setenv "OPENBLAS_NUM_THREADS" "1") + + ;; Some tests require write access to $HOME. (setenv "HOME" "/tmp") - ;; Step out of the source directory just to be sure. - (chdir "..") - (zero? (system* "nosetests" "-v" "sklearn"))))))) + + (invoke "pytest" "sklearn" "-m" "not network"))) + ;; FIXME: This fails with permission denied + (delete 'reset-gzip-timestamps)))) (inputs `(("openblas" ,openblas))) (native-inputs - `(("python-nose" ,python-nose) + `(("python-pytest" ,python-pytest) + ("python-pandas" ,python-pandas) ;for tests ("python-cython" ,python-cython))) (propagated-inputs `(("python-numpy" ,python-numpy) @@ -647,9 +750,161 @@ computing environments.") (home-page "http://scikit-learn.org/") (synopsis "Machine Learning in Python") (description - "Scikit-learn provides simple and efficient tools for data -mining and data analysis.") + "Scikit-learn provides simple and efficient tools for data mining and +data analysis.") (license license:bsd-3))) (define-public python2-scikit-learn (package-with-python2 python-scikit-learn)) + +(define-public python-autograd + (let* ((commit "442205dfefe407beffb33550846434baa90c4de7") + (revision "0") + (version (git-version "0.0.0" revision commit))) + (package + (name "python-autograd") + (home-page "https://github.com/HIPS/autograd") + (source (origin + (method git-fetch) + (uri (git-reference + (url home-page) + (commit commit))) + (sha256 + (base32 + "189sv2xb0mwnjawa9z7mrgdglc1miaq93pnck26r28fi1jdwg0z4")) + (file-name (git-file-name name version)))) + (version version) + (build-system python-build-system) + (native-inputs + `(("python-nose" ,python-nose) + ("python-pytest" ,python-pytest))) + (propagated-inputs + `(("python-future" ,python-future) + ("python-numpy" ,python-numpy))) + (arguments + `(#:phases (modify-phases %standard-phases + (replace 'check + (lambda _ + (invoke "py.test" "-v")))))) + (synopsis "Efficiently computes derivatives of NumPy code") + (description "Autograd can automatically differentiate native Python and +NumPy code. It can handle a large subset of Python's features, including loops, +ifs, recursion and closures, and it can even take derivatives of derivatives +of derivatives. It supports reverse-mode differentiation +(a.k.a. backpropagation), which means it can efficiently take gradients of +scalar-valued functions with respect to array-valued arguments, as well as +forward-mode differentiation, and the two can be composed arbitrarily. The +main intended application of Autograd is gradient-based optimization.") + (license license:expat)))) + +(define-public python2-autograd + (package-with-python2 python-autograd)) + +(define-public lightgbm + (package + (name "lightgbm") + (version "2.0.12") + (source (origin + (method url-fetch) + (uri (string-append + "https://github.com/Microsoft/LightGBM/archive/v" + version ".tar.gz")) + (sha256 + (base32 + "132zf0yk0545mg72hyzxm102g3hpb6ixx9hnf8zd2k55gas6cjj1")) + (file-name (string-append name "-" version ".tar.gz")))) + (native-inputs + `(("python-pytest" ,python-pytest) + ("python-nose" ,python-nose))) + (inputs + `(("openmpi" ,openmpi))) + (propagated-inputs + `(("python-numpy" ,python-numpy) + ("python-scipy" ,python-scipy))) + (arguments + `(#:configure-flags + '("-DUSE_MPI=ON") + #:phases + (modify-phases %standard-phases + (replace 'check + (lambda* (#:key outputs #:allow-other-keys) + (with-directory-excursion ,(string-append "../LightGBM-" version) + (invoke "pytest" "tests/c_api_test/test_.py"))))))) + (build-system cmake-build-system) + (home-page "https://github.com/Microsoft/LightGBM") + (synopsis "Gradient boosting framework based on decision tree algorithms") + (description "LightGBM is a gradient boosting framework that uses tree +based learning algorithms. It is designed to be distributed and efficient with +the following advantages: + +@itemize +@item Faster training speed and higher efficiency +@item Lower memory usage +@item Better accuracy +@item Parallel and GPU learning supported (not enabled in this package) +@item Capable of handling large-scale data +@end itemize\n") + (license license:expat))) + +(define-public vowpal-wabbit + ;; Language bindings not included. + (package + (name "vowpal-wabbit") + (version "8.5.0") + (source (origin + (method url-fetch) + (uri (string-append + "https://github.com/JohnLangford/vowpal_wabbit/archive/" + version ".tar.gz")) + (sha256 + (base32 + "0clp2kb7rk5sckhllxjr5a651awf4s8dgzg4659yh4hf5cqnf0gr")) + (file-name (string-append name "-" version ".tar.gz")))) + (inputs + `(("boost" ,boost) + ("zlib" ,zlib))) + (arguments + `(#:configure-flags + (list (string-append "--with-boost=" + (assoc-ref %build-inputs "boost"))))) + (build-system gnu-build-system) + (home-page "https://github.com/JohnLangford/vowpal_wabbit") + (synopsis "Fast machine learning library for online learning") + (description "Vowpal Wabbit is a machine learning system with techniques +such as online, hashing, allreduce, reductions, learning2search, active, and +interactive learning.") + (license license:bsd-3))) + +(define-public python2-fastlmm + (package + (name "python2-fastlmm") + (version "0.2.21") + (source + (origin + (method url-fetch) + (uri (pypi-uri "fastlmm" version ".zip")) + (sha256 + (base32 + "1q8c34rpmwkfy3r4d5172pzdkpfryj561897z9r3x22gq7813x1m")))) + (build-system python-build-system) + (arguments + `(#:python ,python-2)) ; only Python 2.7 is supported + (propagated-inputs + `(("python2-numpy" ,python2-numpy) + ("python2-scipy" ,python2-scipy) + ("python2-matplotlib" ,python2-matplotlib) + ("python2-pandas" ,python2-pandas) + ("python2-scikit-learn" ,python2-scikit-learn) + ("python2-pysnptools" ,python2-pysnptools))) + (native-inputs + `(("unzip" ,unzip) + ("python2-cython" ,python2-cython) + ("python2-mock" ,python2-mock) + ("python2-nose" ,python2-nose))) + (home-page "http://research.microsoft.com/en-us/um/redmond/projects/mscompbio/fastlmm/") + (synopsis "Perform genome-wide association studies on large data sets") + (description + "FaST-LMM, which stands for Factored Spectrally Transformed Linear Mixed +Models, is a program for performing both single-SNP and SNP-set genome-wide +association studies (GWAS) on extremely large data sets.") + (license license:asl2.0)))