gnu: dlib: Do not build dlib twice for tests.
[jackhill/guix/guix.git] / gnu / packages / machine-learning.scm
CommitLineData
741115b6 1;;; GNU Guix --- Functional package management for GNU
23aab4ab 2;;; Copyright © 2015, 2016 Ricardo Wurmus <rekado@elephly.net>
25e0037a 3;;; Copyright © 2016 Efraim Flashner <efraim@flashner.co.il>
5f0ff6a9 4;;; Copyright © 2016 Marius Bakke <mbakke@fastmail.com>
741115b6
RW
5;;;
6;;; This file is part of GNU Guix.
7;;;
8;;; GNU Guix is free software; you can redistribute it and/or modify it
9;;; under the terms of the GNU General Public License as published by
10;;; the Free Software Foundation; either version 3 of the License, or (at
11;;; your option) any later version.
12;;;
13;;; GNU Guix is distributed in the hope that it will be useful, but
14;;; WITHOUT ANY WARRANTY; without even the implied warranty of
15;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16;;; GNU General Public License for more details.
17;;;
18;;; You should have received a copy of the GNU General Public License
19;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
20
21(define-module (gnu packages machine-learning)
22 #:use-module ((guix licenses) #:prefix license:)
23 #:use-module (guix packages)
24 #:use-module (guix utils)
25 #:use-module (guix download)
23aab4ab 26 #:use-module (guix svn-download)
c1670a81 27 #:use-module (guix build-system cmake)
741115b6 28 #:use-module (guix build-system gnu)
8406138b 29 #:use-module (guix build-system r)
71f80f54 30 #:use-module (gnu packages)
23aab4ab 31 #:use-module (gnu packages autotools)
0931c609
RW
32 #:use-module (gnu packages boost)
33 #:use-module (gnu packages compression)
23aab4ab 34 #:use-module (gnu packages dejagnu)
0931c609 35 #:use-module (gnu packages gcc)
5f0ff6a9 36 #:use-module (gnu packages image)
0931c609 37 #:use-module (gnu packages maths)
791c11d6 38 #:use-module (gnu packages perl)
c1670a81 39 #:use-module (gnu packages pkg-config)
0931c609 40 #:use-module (gnu packages python)
c1670a81
RW
41 #:use-module (gnu packages statistics)
42 #:use-module (gnu packages swig)
5f0ff6a9
MB
43 #:use-module (gnu packages xml)
44 #:use-module (gnu packages xorg))
741115b6
RW
45
46(define-public libsvm
47 (package
48 (name "libsvm")
49 (version "3.20")
50 (source
51 (origin
52 (method url-fetch)
53 (uri (string-append
54 "https://github.com/cjlin1/libsvm/archive/v"
55 (string-delete #\. version) ".tar.gz"))
56 (file-name (string-append name "-" version ".tar.gz"))
57 (sha256
58 (base32
59 "1jpjlql3frjza7zxzrqqr2firh44fjb8fqsdmvz6bjz7sb47zgp4"))))
60 (build-system gnu-build-system)
61 (arguments
62 `(#:tests? #f ;no "check" target
63 #:phases (modify-phases %standard-phases
64 (delete 'configure)
65 (replace
66 'install
67 (lambda* (#:key outputs #:allow-other-keys)
68 (let* ((out (assoc-ref outputs "out"))
69 (bin (string-append out "/bin/")))
70 (mkdir-p bin)
71 (for-each (lambda (file)
72 (copy-file file (string-append bin file)))
73 '("svm-train"
74 "svm-predict"
75 "svm-scale")))
76 #t)))))
77 (home-page "http://www.csie.ntu.edu.tw/~cjlin/libsvm/")
78 (synopsis "Library for Support Vector Machines")
79 (description
80 "LIBSVM is a machine learning library for support vector
81classification, (C-SVC, nu-SVC), regression (epsilon-SVR, nu-SVR) and
82distribution estimation (one-class SVM). It supports multi-class
83classification.")
84 (license license:bsd-3)))
71f80f54
RW
85
86(define-public python-libsvm
87 (package (inherit libsvm)
88 (name "python-libsvm")
89 (build-system gnu-build-system)
90 (arguments
91 `(#:tests? #f ;no "check" target
92 #:make-flags '("-C" "python")
93 #:phases
94 (modify-phases %standard-phases
95 (delete 'configure)
96 (replace
97 'install
98 (lambda* (#:key inputs outputs #:allow-other-keys)
99 (let ((site (string-append (assoc-ref outputs "out")
100 "/lib/python"
101 (string-take
102 (string-take-right
103 (assoc-ref inputs "python") 5) 3)
104 "/site-packages/")))
105 (substitute* "python/svm.py"
106 (("../libsvm.so.2") "libsvm.so.2"))
107 (mkdir-p site)
108 (for-each (lambda (file)
109 (copy-file file (string-append site (basename file))))
110 (find-files "python" "\\.py"))
111 (copy-file "libsvm.so.2"
112 (string-append site "libsvm.so.2")))
113 #t)))))
114 (inputs
115 `(("python" ,python)))
116 (synopsis "Python bindings of libSVM")))
0931c609 117
23aab4ab
RW
118(define-public ghmm
119 ;; The latest release candidate is several years and a couple of fixes have
120 ;; been published since. This is why we download the sources from the SVN
121 ;; repository.
122 (let ((svn-revision 2341))
123 (package
124 (name "ghmm")
125 (version (string-append "0.9-rc3-0." (number->string svn-revision)))
126 (source (origin
127 (method svn-fetch)
128 (uri (svn-reference
129 (url "http://svn.code.sf.net/p/ghmm/code/trunk")
130 (revision svn-revision)))
131 (file-name (string-append name "-" version))
132 (sha256
133 (base32
134 "0qbq1rqp94l530f043qzp8aw5lj7dng9wq0miffd7spd1ff638wq"))))
135 (build-system gnu-build-system)
136 (arguments
137 `(#:phases
138 (modify-phases %standard-phases
139 (add-after 'unpack 'enter-dir
140 (lambda _ (chdir "ghmm") #t))
141 (add-after 'enter-dir 'fix-PYTHONPATH
142 (lambda* (#:key outputs #:allow-other-keys)
143 ;; The Python tests fail as the library is assumed to be stored
144 ;; in ./build/lib.linux-i686-*. To fix this we detect the CPU
145 ;; and use it in the path.
146 (substitute* "configure.in"
147 (("AM_INIT_AUTOMAKE" line)
148 (string-append line "\nAC_CANONICAL_HOST\n")))
149 (substitute* "ghmmwrapper/Makefile.am"
150 (("i686") "@host_cpu@"))
151 #t))
152 (add-after 'enter-dir 'fix-runpath
153 (lambda* (#:key outputs #:allow-other-keys)
154 (substitute* "ghmmwrapper/setup.py"
155 (("^(.*)extra_compile_args = \\[" line indent)
156 (string-append indent
157 "extra_link_args = [\"-Wl,-rpath="
158 (assoc-ref outputs "out") "/lib\"],\n"
159 line
160 "\"-Wl,-rpath="
161 (assoc-ref outputs "out")
162 "/lib\", ")))
163 #t))
164 (add-after 'enter-dir 'disable-broken-tests
165 (lambda _
166 (substitute* "tests/Makefile.am"
167 ;; GHMM_SILENT_TESTS is assumed to be a command.
168 (("TESTS_ENVIRONMENT.*") "")
169 ;; Do not build broken tests.
170 (("chmm .*") "")
171 (("read_fa .*") "")
172 (("mcmc .*") "")
173 (("label_higher_order_test.*$")
174 "label_higher_order_test\n"))
175
176 ;; These Python unittests are broken as there is no gato.
177 ;; See https://sourceforge.net/p/ghmm/support-requests/3/
178 (substitute* "ghmmwrapper/ghmmunittests.py"
179 (("^(.*)def (testNewXML|testMultipleTransitionClasses|testNewXML)"
180 line indent)
181 (string-append indent
182 "@unittest.skip(\"Disabled by Guix\")\n"
183 line)))
184 #t))
185 (add-before 'configure 'autogen
186 (lambda _
187 (zero? (system* "bash" "./autogen.sh")))))))
188 (inputs
189 `(("python" ,python-2) ; only Python 2 is supported
190 ("libxml2" ,libxml2)))
191 (native-inputs
192 `(("pkg-config" ,pkg-config)
193 ("dejagnu" ,dejagnu)
194 ("swig" ,swig)
195 ("autoconf" ,autoconf)
196 ("automake" ,automake)
197 ("libtool" ,libtool)))
198 (home-page "http://ghmm.org")
199 (synopsis "Hidden Markov Model library")
200 (description
201 "The General Hidden Markov Model library (GHMM) is a C library with
202additional Python bindings implementing a wide range of types of @dfn{Hidden
203Markov Models} (HMM) and algorithms: discrete, continous emissions, basic
204training, HMM clustering, HMM mixtures.")
205 (license license:lgpl2.0+))))
206
791c11d6
BW
207(define-public mcl
208 (package
209 (name "mcl")
210 (version "14.137")
211 (source (origin
212 (method url-fetch)
213 (uri (string-append
214 "http://micans.org/mcl/src/mcl-"
215 (string-replace-substring version "." "-")
216 ".tar.gz"))
217 (sha256
218 (base32
219 "15xlax3z31lsn62vlg94hkm75nm40q4679amnfg13jm8m2bnhy5m"))))
220 (build-system gnu-build-system)
221 (arguments
222 `(#:configure-flags (list "--enable-blast")))
223 (inputs
224 `(("perl" ,perl)))
225 (home-page "http://micans.org/mcl/")
226 (synopsis "Clustering algorithm for graphs")
227 (description
228 "The MCL algorithm is short for the @dfn{Markov Cluster Algorithm}, a
229fast and scalable unsupervised cluster algorithm for graphs (also known as
230networks) based on simulation of (stochastic) flow in graphs.")
231 ;; In the LICENCE file and web page it says "The software is licensed
232 ;; under the GNU General Public License, version 3.", but in several of
233 ;; the source code files it suggests GPL3 or later.
234 ;; http://listserver.ebi.ac.uk/pipermail/mcl-users/2016/000376.html
235 (license license:gpl3)))
236
0931c609
RW
237(define-public randomjungle
238 (package
239 (name "randomjungle")
240 (version "2.1.0")
241 (source
242 (origin
243 (method url-fetch)
244 (uri (string-append
245 "http://www.imbs-luebeck.de/imbs/sites/default/files/u59/"
246 "randomjungle-" version ".tar_.gz"))
247 (sha256
248 (base32
249 "12c8rf30cla71swx2mf4ww9mfd8jbdw5lnxd7dxhyw1ygrvg6y4w"))))
250 (build-system gnu-build-system)
251 (arguments
252 `(#:configure-flags
253 (list (string-append "--with-boost="
254 (assoc-ref %build-inputs "boost")))
255 #:phases
256 (modify-phases %standard-phases
257 (add-before
258 'configure 'set-CXXFLAGS
259 (lambda _
260 (setenv "CXXFLAGS" "-fpermissive ")
261 #t)))))
262 (inputs
263 `(("boost" ,boost)
264 ("gsl" ,gsl)
265 ("libxml2" ,libxml2)
266 ("zlib" ,zlib)))
267 (native-inputs
25e0037a
EF
268 `(("gfortran" ,gfortran)
269 ("gfortran:lib" ,gfortran "lib")))
0931c609
RW
270 (home-page "http://www.imbs-luebeck.de/imbs/de/node/227/")
271 (synopsis "Implementation of the Random Forests machine learning method")
272 (description
273 "Random Jungle is an implementation of Random Forests. It is supposed to
274analyse high dimensional data. In genetics, it can be used for analysing big
275Genome Wide Association (GWA) data. Random Forests is a powerful machine
276learning method. Most interesting features are variable selection, missing
277value imputation, classifier creation, generalization error estimation and
278sample proximities between pairs of cases.")
279 (license license:gpl3+)))
c1670a81
RW
280
281(define-public shogun
282 (package
283 (name "shogun")
284 (version "4.0.0")
285 (source
286 (origin
287 (method url-fetch)
288 (uri (string-append
289 "ftp://shogun-toolbox.org/shogun/releases/"
290 (version-major+minor version)
291 "/sources/shogun-" version ".tar.bz2"))
292 (sha256
293 (base32
294 "159nlijnb7mnrv9za80wnm1shwvy45hgrqzn51hxy7gw4z6d6fdb"))
295 (modules '((guix build utils)
296 (ice-9 rdelim)))
297 (snippet
298 '(begin
299 ;; Remove non-free sources and files referencing them
300 (for-each delete-file
301 (find-files "src/shogun/classifier/svm/"
302 "SVMLight\\.(cpp|h)"))
303 (for-each delete-file
304 (find-files "examples/undocumented/libshogun/"
305 (string-append
306 "(classifier_.*svmlight.*|"
307 "evaluation_cross_validation_locked_comparison).cpp")))
308 ;; Remove non-free functions.
309 (define (delete-ifdefs file)
310 (with-atomic-file-replacement file
311 (lambda (in out)
312 (let loop ((line (read-line in 'concat))
313 (skipping? #f))
314 (if (eof-object? line)
315 #t
316 (let ((skip-next?
317 (or (and skipping?
318 (not (string-prefix?
319 "#endif //USE_SVMLIGHT" line)))
320 (string-prefix?
321 "#ifdef USE_SVMLIGHT" line))))
322 (when (or (not skipping?)
323 (and skipping? (not skip-next?)))
324 (display line out))
325 (loop (read-line in 'concat) skip-next?)))))))
326 (for-each delete-ifdefs (find-files "src/shogun/kernel/"
327 "^Kernel\\.(cpp|h)"))))))
328 (build-system cmake-build-system)
329 (arguments
330 '(#:tests? #f ;no check target
331 #:phases
332 (alist-cons-after
333 'unpack 'delete-broken-symlinks
334 (lambda _
335 (for-each delete-file '("applications/arts/data"
336 "applications/asp/data"
337 "applications/easysvm/data"
338 "applications/msplicer/data"
339 "applications/ocr/data"
340 "examples/documented/data"
341 "examples/documented/matlab_static"
342 "examples/documented/octave_static"
343 "examples/undocumented/data"
344 "examples/undocumented/matlab_static"
345 "examples/undocumented/octave_static"
346 "tests/integration/data"
347 "tests/integration/matlab_static"
348 "tests/integration/octave_static"
349 "tests/integration/python_modular/tests"))
350 #t)
351 (alist-cons-after
352 'unpack 'change-R-target-path
353 (lambda* (#:key outputs #:allow-other-keys)
354 (substitute* '("src/interfaces/r_modular/CMakeLists.txt"
355 "src/interfaces/r_static/CMakeLists.txt"
356 "examples/undocumented/r_modular/CMakeLists.txt")
357 (("\\$\\{R_COMPONENT_LIB_PATH\\}")
358 (string-append (assoc-ref outputs "out")
359 "/lib/R/library/")))
360 #t)
361 (alist-cons-after
362 'unpack 'fix-octave-modules
363 (lambda* (#:key outputs #:allow-other-keys)
364 (substitute* '("src/interfaces/octave_modular/CMakeLists.txt"
365 "src/interfaces/octave_static/CMakeLists.txt")
366 (("^include_directories\\(\\$\\{OCTAVE_INCLUDE_DIRS\\}")
367 "include_directories(${OCTAVE_INCLUDE_DIRS} ${OCTAVE_INCLUDE_DIRS}/octave"))
368
369 ;; change target directory
370 (substitute* "src/interfaces/octave_modular/CMakeLists.txt"
371 (("\\$\\{OCTAVE_OCT_LOCAL_API_FILE_DIR\\}")
372 (string-append (assoc-ref outputs "out")
373 "/share/octave/packages")))
374 #t)
375 (alist-cons-before
376 'build 'set-HOME
377 ;; $HOME needs to be set at some point during the build phase
378 (lambda _ (setenv "HOME" "/tmp") #t)
379 %standard-phases))))
380 #:configure-flags
381 (list "-DCMAKE_BUILD_WITH_INSTALL_RPATH=TRUE"
382 "-DUSE_SVMLIGHT=OFF" ;disable proprietary SVMLIGHT
383 ;;"-DJavaModular=ON" ;requires unpackaged jblas
384 ;;"-DRubyModular=ON" ;requires unpackaged ruby-narray
385 ;;"-DPerlModular=ON" ;"FindPerlLibs" does not exist
386 ;;"-DLuaModular=ON" ;fails because lua doesn't build pkgconfig file
387 "-DOctaveModular=ON"
388 "-DOctaveStatic=ON"
389 "-DPythonModular=ON"
390 "-DPythonStatic=ON"
391 "-DRModular=ON"
392 "-DRStatic=ON"
393 "-DCmdLineStatic=ON")))
394 (inputs
395 `(("python" ,python)
396 ("numpy" ,python-numpy)
397 ("r" ,r)
398 ("octave" ,octave)
399 ("swig" ,swig)
400 ("hdf5" ,hdf5)
401 ("atlas" ,atlas)
402 ("arpack" ,arpack-ng)
403 ("lapack" ,lapack)
404 ("glpk" ,glpk)
405 ("libxml2" ,libxml2)
406 ("lzo" ,lzo)
407 ("zlib" ,zlib)))
408 (native-inputs
409 `(("pkg-config" ,pkg-config)))
6b5b656f
RW
410 ;; Non-portable SSE instructions are used so building fails on platforms
411 ;; other than x86_64.
412 (supported-systems '("x86_64-linux"))
c1670a81
RW
413 (home-page "http://shogun-toolbox.org/")
414 (synopsis "Machine learning toolbox")
415 (description
416 "The Shogun Machine learning toolbox provides a wide range of unified and
417efficient Machine Learning (ML) methods. The toolbox seamlessly allows to
418combine multiple data representations, algorithm classes, and general purpose
419tools. This enables both rapid prototyping of data pipelines and extensibility
420in terms of new algorithms.")
421 (license license:gpl3+)))
8406138b
RW
422
423(define-public r-adaptivesparsity
424 (package
425 (name "r-adaptivesparsity")
426 (version "1.4")
427 (source (origin
428 (method url-fetch)
429 (uri (cran-uri "AdaptiveSparsity" version))
430 (sha256
431 (base32
432 "1az7isvalf3kmdiycrfl6s9k9xqk22k1mc6rh8v0jmcz402qyq8z"))))
433 (properties
434 `((upstream-name . "AdaptiveSparsity")))
435 (build-system r-build-system)
436 (arguments
437 `(#:phases
438 (modify-phases %standard-phases
439 (add-after 'unpack 'link-against-armadillo
440 (lambda _
441 (substitute* "src/Makevars"
442 (("PKG_LIBS=" prefix)
443 (string-append prefix "-larmadillo"))))))))
444 (propagated-inputs
445 `(("r-rcpp" ,r-rcpp)
446 ("r-rcpparmadillo" ,r-rcpparmadillo)))
447 (home-page "http://cran.r-project.org/web/packages/AdaptiveSparsity")
448 (synopsis "Adaptive sparsity models")
449 (description
450 "This package implements the Figueiredo machine learning algorithm for
451adaptive sparsity and the Wong algorithm for adaptively sparse gaussian
452geometric models.")
453 (license license:lgpl3+)))
e4785eb8
RW
454
455(define-public r-nnet
456 (package
457 (name "r-nnet")
458 (version "7.3-12")
459 (source
460 (origin
461 (method url-fetch)
462 (uri (cran-uri "nnet" version))
463 (sha256
464 (base32
465 "17amqnw9dpap2w8ivx53hxha2xrm0drwfnj32li0xk41hlz548r7"))))
466 (build-system r-build-system)
467 (home-page "http://www.stats.ox.ac.uk/pub/MASS4/")
468 (synopsis "Feed-forward neural networks and multinomial log-linear models")
469 (description
470 "This package provides functions for feed-forward neural networks with a
471single hidden layer, and for multinomial log-linear models.")
472 (license (list license:gpl2+ license:gpl3+))))
5f0ff6a9
MB
473
474(define-public dlib
475 (package
476 (name "dlib")
477 (version "19.1")
478 (source (origin
479 (method url-fetch)
480 (uri (string-append
481 "http://dlib.net/files/dlib-" version ".tar.bz2"))
482 (sha256
483 (base32
484 "0p2pvcdalc6jhb6r99ybvjd9x74sclr0ngswdg9j2xl5pj7knbr4"))
485 (modules '((guix build utils)))
486 (snippet
487 '(begin
488 ;; Delete ~13MB of bundled dependencies.
489 (delete-file-recursively "dlib/external")
490 (delete-file-recursively "docs/dlib/external")))))
491 (build-system cmake-build-system)
492 (arguments
493 `(#:phases
494 (modify-phases %standard-phases
495 (add-after 'unpack 'disable-asserts
496 (lambda _
497 ;; config.h recommends explicitly enabling or disabling asserts
498 ;; when building as a shared library. By default neither is set.
499 (substitute* "dlib/config.h"
500 (("^//#define DLIB_DISABLE_ASSERTS") "#define DLIB_DISABLE_ASSERTS"))
501 #t))
502 (replace 'check
503 (lambda _
504 ;; No test target, so we build and run the unit tests here.
8eaf53e3 505 (let ((test-dir (string-append "../dlib-" ,version "/dlib/test")))
5f0ff6a9 506 (with-directory-excursion test-dir
8eaf53e3
MB
507 (setenv "CXXFLAGS" "-std=gnu++11")
508 (and (zero? (system* "make" "-j" (number->string (parallel-job-count))))
5f0ff6a9
MB
509 (zero? (system* "./dtest" "--runall")))))))
510 (add-after 'install 'delete-static-library
511 (lambda* (#:key outputs #:allow-other-keys)
512 (delete-file (string-append (assoc-ref outputs "out") "/lib/libdlib.a")))))))
513 (native-inputs
514 `(("pkg-config" ,pkg-config)))
515 (inputs
f40841e9 516 `(("giflib" ,giflib)
5f0ff6a9
MB
517 ;("lapack" ,lapack) XXX lapack here causes test failures in some setups.
518 ("libjpeg" ,libjpeg)
519 ("libpng" ,libpng)
520 ("libx11" ,libx11)
521 ("openblas" ,openblas)
522 ("zlib" ,zlib)))
523 (synopsis
524 "Toolkit for making machine learning and data analysis applications in C++")
525 (description
526 "Dlib is a modern C++ toolkit containing machine learning algorithms and
527tools. It is used in both industry and academia in a wide range of domains
528including robotics, embedded devices, mobile phones, and large high performance
529computing environments.")
530 (home-page "http://dlib.net")
531 (license license:boost1.0)))