gnu/packages/speech.scm

   1 ;;; GNU Guix --- Functional package management for GNU
   2 ;;; Copyright © 2016 David Thompson <davet@gnu.org>
   3 ;;; Copyright © 2016, 2019, 2020 Marius Bakke <mbakke@fastmail.com>
   4 ;;; Copyright © 2017 Leo Famulari <leo@famulari.name>
   5 ;;; Copyright © 2018, 2020 Tobias Geerinckx-Rice <me@tobias.gr>
   6 ;;; Copyright © 2016 Kei Kebreau <kkebreau@posteo.net>
   7 ;;; Copyright © 2019 Ricardo Wurmus <rekado@elephly.net>
   8 ;;; Copyright © 2020 Nicolas Goaziou <mail@nicolasgoaziou.fr>
   9 ;;;
  10 ;;; This file is part of GNU Guix.
  11 ;;;
  12 ;;; GNU Guix is free software; you can redistribute it and/or modify it
  13 ;;; under the terms of the GNU General Public License as published by
  14 ;;; the Free Software Foundation; either version 3 of the License, or (at
  15 ;;; your option) any later version.
  16 ;;;
  17 ;;; GNU Guix is distributed in the hope that it will be useful, but
  18 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
  19 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20 ;;; GNU General Public License for more details.
  21 ;;;
  22 ;;; You should have received a copy of the GNU General Public License
  23 ;;; along with GNU Guix.  If not, see <http://www.gnu.org/licenses/>.
  24
  25 (define-module (gnu packages speech)
  26   #:use-module ((guix licenses) #:prefix license:)
  27   #:use-module (guix packages)
  28   #:use-module (guix download)
  29   #:use-module (guix git-download)
  30   #:use-module (guix utils)
  31   #:use-module (guix build-system gnu)
  32   #:use-module (gnu packages)
  33   #:use-module (gnu packages audio)
  34   #:use-module (gnu packages autotools)
  35   #:use-module (gnu packages base)            ;for 'which'
  36   #:use-module (gnu packages bison)
  37   #:use-module (gnu packages compression)
  38   #:use-module (gnu packages documentation)
  39   #:use-module (gnu packages emacs)
  40   #:use-module (gnu packages gcc)
  41   #:use-module (gnu packages glib)
  42   #:use-module (gnu packages gstreamer)
  43   #:use-module (gnu packages linux)
  44   #:use-module (gnu packages ncurses)
  45   #:use-module (gnu packages perl)
  46   #:use-module (gnu packages pkg-config)
  47   #:use-module (gnu packages pulseaudio)
  48   #:use-module (gnu packages python)
  49   #:use-module (gnu packages swig)
  50   #:use-module (gnu packages texinfo)
  51   #:use-module (gnu packages textutils))
  52
  53 (define-public espeak
  54   (package
  55     (name "espeak")
  56     (version "1.48.04")
  57     (source (origin
  58               (method url-fetch)
  59               (uri (string-append "mirror://sourceforge/espeak/espeak/"
  60                                   "espeak-" (version-major+minor version)
  61                                   "/espeak-" version "-source.zip"))
  62               (sha256
  63                (base32
  64                 "0n86gwh9pw0jqqpdz7mxggllfr8k0r7pc67ayy7w5z6z79kig6mz"))
  65               (modules '((guix build utils)))
  66               (snippet
  67                ;; remove prebuilt binaries
  68                '(begin
  69                   (delete-file-recursively "linux_32bit")
  70                   #t))))
  71     (build-system gnu-build-system)
  72     (arguments
  73      `(#:make-flags (list (string-append "PREFIX=" (assoc-ref %outputs "out"))
  74                           (string-append "DATADIR="
  75                                          (assoc-ref %outputs "out")
  76                                          "/share/espeak-data")
  77                           (string-append "LDFLAGS=-Wl,-rpath="
  78                                          (assoc-ref %outputs "out")
  79                                          "/lib")
  80                           ;; The package fails to build with newer C++ standards.
  81                           "CXXFLAGS=-std=c++98"
  82                           "AUDIO=pulseaudio")
  83        #:tests? #f ; no check target
  84        #:phases
  85        (modify-phases %standard-phases
  86          (replace 'configure
  87            (lambda _
  88              (chdir "src")
  89              ;; We use version 19 of the PortAudio library, so we must copy the
  90              ;; corresponding file to be sure that espeak compiles correctly.
  91              (copy-file "portaudio19.h" "portaudio.h")
  92              (substitute* "Makefile"
  93                (("/bin/ln") "ln"))
  94              #t)))))
  95        (inputs
  96         `(("portaudio" ,portaudio)
  97           ("pulseaudio" ,pulseaudio)))
  98        (native-inputs `(("unzip" ,unzip)))
  99        (home-page "http://espeak.sourceforge.net/")
 100        (synopsis "Software speech synthesizer")
 101        (description "eSpeak is a software speech synthesizer for English and
 102 other languages.  eSpeak uses a \"formant synthesis\" method.  This allows many
 103 languages to be provided in a small size.  The speech is clear, and can be used
 104 at high speeds, but is not as natural or smooth as larger synthesizers which are
 105 based on human speech recordings.")
 106        (license license:gpl3+)))
 107
 108 (define-public espeak-ng
 109   (package
 110     (name "espeak-ng")
 111     (version "1.50")
 112     (home-page "https://github.com/espeak-ng/espeak-ng")
 113     ;; Note: eSpeak NG publishes release tarballs, but the 1.50 tarball is
 114     ;; broken: <https://github.com/espeak-ng/espeak-ng/issues/683>.
 115     ;; Download the raw repository to work around it; remove 'native-inputs'
 116     ;; below when switching back to the release tarball.
 117     (source (origin
 118               (method git-fetch)
 119               (uri (git-reference (url home-page) (commit version)))
 120               (file-name (git-file-name name version))
 121               (sha256
 122                (base32 "0jkqhf2h94vbqq7mg7mmm23bq372fa7mdk941my18c3vkldcir1b"))))
 123     (build-system gnu-build-system)
 124     (arguments
 125      `(#:configure-flags '("--disable-static")
 126        ;; Building in parallel triggers a race condition in 1.49.2.
 127        #:parallel-build? #f
 128        ;; XXX: Some tests require an audio device.
 129        #:tests? #f))
 130     (native-inputs
 131      `(("autoconf" ,autoconf)
 132        ("automake" ,automake)
 133        ("libtool" ,libtool)
 134        ("which" ,which)))
 135     (inputs
 136      `(("libcap" ,libcap)
 137        ("pcaudiolib" ,pcaudiolib)))
 138     (synopsis "Software speech synthesizer")
 139     (description
 140      "eSpeak NG is a software speech synthesizer for more than 100 languages.
 141 It is based on the eSpeak engine and supports spectral and Klatt formant
 142 synthesis, and the ability to use MBROLA voices.")
 143     (license license:gpl3+)))
 144
 145 (define-public mitlm
 146   (package
 147     (name "mitlm")
 148     (version "0.4.2")
 149     (source (origin
 150               (method url-fetch)
 151               (uri (string-append "https://github.com/mitlm/mitlm/releases/"
 152                                   "download/v" version "/"
 153                                   name "-" version ".tar.xz"))
 154               (sha256
 155                (base32
 156                 "09fv4fcpmw9g1j0zml0k5kk1lgjw2spr8gn51llbkaaph6v8d62a"))))
 157     (build-system gnu-build-system)
 158     (native-inputs
 159      `(("gfortran" ,gfortran)))
 160     (synopsis "The MIT Language Modeling toolkit")
 161     (description "The MIT Language Modeling (MITLM) toolkit is a set of
 162 tools designed for the efficient estimation of statistical n-gram language
 163 models involving iterative parameter estimation.  It achieves much of its
 164 efficiency through the use of a compact vector representation of n-grams.")
 165     (home-page "https://github.com/mitlm/mitlm")
 166     (license license:expat)))
 167
 168 (define-public speech-dispatcher
 169   (package
 170     (name "speech-dispatcher")
 171     (version "0.9.1")
 172     (source (origin
 173               (method url-fetch)
 174               (uri (string-append "https://github.com/brailcom/speechd/releases"
 175                                   "/download/" version "/speech-dispatcher-"
 176                                   version ".tar.gz"))
 177               (sha256
 178                (base32
 179                 "16bg52hnkrsrs7kgbzanb34b9zb6fqxwj0a9bmsxmj1skkil1h1p"))))
 180     (build-system gnu-build-system)
 181     (arguments
 182      `(#:configure-flags '("--disable-static"
 183
 184                            ;; Disable support for proprietary TTS engines.
 185                            "--with-ibmtts=no"
 186                            "--with-kali=no" "--with-baratinoo=no")))
 187     (native-inputs
 188      `(("intltool" ,intltool)
 189        ("pkg-config" ,pkg-config)))
 190     (inputs
 191      `(("dotconf" ,dotconf)
 192        ("espeak" ,espeak-ng)
 193        ("glib" ,glib)
 194        ("libltdl" ,libltdl)
 195        ("libsndfile" ,libsndfile)
 196        ("pulseaudio" ,pulseaudio)
 197        ("python" ,python)))
 198     (synopsis "Common interface to speech synthesizers")
 199     (description "The Speech Dispatcher project provides a high-level
 200 device independent layer for access to speech synthesis through a simple,
 201 stable and well documented interface.")
 202     (home-page "https://devel.freebsoft.org/speechd")
 203     ;; The software is distributed under GPL2+, but includes a number
 204     ;; of files covered by other licenses.  Note: in practice, this
 205     ;; is linked against dotconf, which is LGPL 2.1 only.
 206     (license (list license:gpl2+
 207                    license:fdl1.2+ ; Most files in doc/ are dual gpl2+/fdl1.2+.
 208                    license:lgpl2.1+
 209                    (license:non-copyleft
 210                     ;; festival_client.{c,h} carries an expat-style license.
 211                     "See src/modules/festival_client.c in the distribution.")
 212                    license:gpl3+)))) ; doc/texinfo.tex -- with TeX exception.
 213
 214 (define-public sonic
 215   (package
 216     (name "sonic")
 217     (version "0.2.0")
 218     (source (origin
 219              (method git-fetch)
 220              (uri (git-reference
 221                     (url "https://github.com/waywardgeek/sonic")
 222                     (commit (string-append "release-" version))))
 223              (file-name (git-file-name name version))
 224              (sha256
 225               (base32
 226                "08xwnpw9cnaix1n1i7gvpq5hrfrqc2z1snjhjapfam506hrc77g4"))))
 227     (build-system gnu-build-system)
 228     (arguments
 229      `(#:tests? #f                      ; no test suite
 230        #:make-flags
 231        (list (string-append "PREFIX=" (assoc-ref %outputs "out"))
 232              (string-append "LDFLAGS=-Wl,-rpath="
 233                             (assoc-ref %outputs "out") "/lib"))
 234        #:phases
 235        (modify-phases %standard-phases
 236          (add-after 'unpack 'respect-LDFLAGS
 237            (lambda _
 238              (substitute* "Makefile"
 239                ((" -o sonic " match)
 240                 (string-append " $(LDFLAGS)" match)))
 241              #t))
 242          (delete 'configure))))        ; no ./configure script
 243     (synopsis "Speed up or slow down speech")
 244     (description "Sonic implements a simple algorithm for speeding up or slowing
 245 down speech.  However, it's optimized for speed ups of over 2X, unlike previous
 246 algorithms for changing speech rate.  Sonic is a C library designed to be easily
 247 integrated into streaming voice applications such as text-to-speech (TTS) back
 248 ends.
 249
 250 The primary motivation behind Sonic is to enable the blind and visually impaired
 251 to improve their productivity with speech engines, like eSpeak.  Sonic can also
 252 be used by the sighted.")
 253     (home-page "https://github.com/waywardgeek/sonic")
 254     (license license:asl2.0)))
 255
 256 (define-public festival
 257   (package
 258     (name "festival")
 259     (version "2.5.0")
 260     (source (origin
 261               (method url-fetch)
 262               (uri (string-append "http://festvox.org/packed/festival/"
 263                                   (version-major+minor version)
 264                                   "/festival-" version "-release.tar.gz"))
 265               (sha256
 266                (base32
 267                 "1d5415nckiv19adxisxfm1l1xxfyw88g87ckkmcr0lhjdd10g42c"))))
 268     (build-system gnu-build-system)
 269     (arguments
 270      `(#:tests? #f ; there is no test target
 271        #:make-flags
 272        (list (string-append "RM="
 273                             (assoc-ref %build-inputs "coreutils")
 274                             "/bin/rm")
 275              (string-append "ECHO_N="
 276                             (assoc-ref %build-inputs "coreutils")
 277                             "/bin/printf \"%s\""))
 278        #:parallel-build? #f ; not supported
 279        #:modules ((guix build gnu-build-system)
 280                   (guix build utils)
 281                   (guix build emacs-utils))
 282        #:imported-modules (,@%gnu-build-system-modules
 283                            (guix build emacs-utils))
 284        #:phases
 285        (modify-phases %standard-phases
 286          (add-after 'unpack 'unpack-and-patch-speech-tools
 287            (lambda* (#:key inputs #:allow-other-keys)
 288              (invoke "tar" "-C" ".."
 289                      "-xf" (assoc-ref inputs "speech-tools"))
 290              (with-directory-excursion "../speech_tools"
 291                (substitute* '("config/rules/modules.mak"
 292                               "config/rules/test_make_rules.mak"
 293                               "config/make_system.mak")
 294                  (("/bin/sh") (which "sh"))))
 295              #t))
 296          (add-after 'unpack 'patch-/bin/sh
 297            (lambda _
 298              (substitute* '("config/test_make_rules"
 299                             "config/make_system.mak")
 300                (("/bin/sh") (which "sh")))
 301              #t))
 302          (add-before 'build 'build-speech-tools
 303            (lambda* (#:key configure-flags make-flags #:allow-other-keys)
 304              (with-directory-excursion "../speech_tools"
 305                (apply invoke "sh" "configure"
 306                       (string-append "CONFIG_SHELL=" (which "sh"))
 307                       (string-append "SHELL=" (which "sh"))
 308                       configure-flags)
 309                (apply invoke "make" make-flags))))
 310          (add-after 'build 'build-documentation
 311            (lambda _
 312              (with-directory-excursion "doc"
 313                (invoke "make" "festival.info"))))
 314          (add-after 'unpack 'set-installation-directories
 315            (lambda* (#:key outputs #:allow-other-keys)
 316              (let ((out (assoc-ref outputs "out")))
 317                (substitute* "config/project.mak"
 318                  (("^FTLIBDIR.*")
 319                   (string-append "FTLIBDIR=" out "/share/festival/lib")))
 320                (substitute* "config/systems/default.mak"
 321                  (("^INSTALL_PREFIX.*")
 322                   (string-append "INSTALL_PREFIX=" out)))
 323                #t)))
 324          (add-after 'install 'actually-install
 325            (lambda* (#:key inputs outputs #:allow-other-keys)
 326              (let ((out (assoc-ref outputs "out")))
 327                ;; Install Speech Tools first
 328                (with-directory-excursion "../speech_tools"
 329                  ;; Target directories
 330                  (for-each (lambda (dir)
 331                              (mkdir-p (string-append out dir)))
 332                            '("/bin"
 333                              "/lib"
 334                              "/include/speech_tools/"
 335                              "/include/speech_tools/instantiate"
 336                              "/include/speech_tools/ling_class"
 337                              "/include/speech_tools/rxp"
 338                              "/include/speech_tools/sigpr"
 339                              "/include/speech_tools/unix"))
 340                  ;; Install binaries
 341                  (for-each (lambda (file)
 342                              (install-file file (string-append out "/bin")))
 343                            (find-files "bin" ".*"))
 344                  (for-each (lambda (file)
 345                              (delete-file (string-append out "/bin/" file)))
 346                            '("est_gdb" "est_examples" "est_program"))
 347                  ;; Install libraries
 348                  (for-each (lambda (file)
 349                              (install-file file (string-append out "/lib")))
 350                            (find-files "lib" "lib.*\\.so.*"))
 351
 352                  ;; Install headers
 353                  (for-each
 354                   (lambda (dir)
 355                     (for-each
 356                      (lambda (header)
 357                        (install-file header
 358                                      (string-append out "/include/speech_tools/" dir)))
 359                      (find-files (string-append "include/" dir)
 360                                  "\\.h$")))
 361                   '("." "instantiate" "ling_class" "rxp" "sigpr" "unix")))
 362
 363                ;; Unpack files that will be installed together with the
 364                ;; Festival libraries.
 365                (invoke "tar" "--strip-components=1"
 366                        "-xvf" (assoc-ref inputs "festvox-cmu"))
 367                (invoke "tar" "--strip-components=1"
 368                        "-xvf" (assoc-ref inputs "festvox-poslex"))
 369                (invoke "tar" "--strip-components=1"
 370                        "-xvf" (assoc-ref inputs "default-voice"))
 371
 372                ;; Install Festival
 373                (let ((bin (string-append out "/bin"))
 374                      (incdir (string-append out "/include/festival"))
 375                      (share (string-append out "/share/festival"))
 376                      (info (string-append out "/share/info")))
 377                  (for-each (lambda (executable)
 378                              (install-file executable bin))
 379                            '("src/main/festival"
 380                              "src/main/festival_client"
 381                              "examples/benchmark"))
 382                  (let ((scripts '("examples/dumpfeats"
 383                                   "examples/durmeanstd"
 384                                   "examples/latest"
 385                                   "examples/make_utts"
 386                                   "examples/powmeanstd"
 387                                   "examples/run-festival-script"
 388                                   "examples/saytime"
 389                                   "examples/scfg_parse_text"
 390                                   "examples/text2pos"
 391                                   "examples/text2wave")))
 392                    (substitute* scripts
 393                      (("exec /tmp/guix-build.*/bin/festival")
 394                       (string-append "exec " bin "/festival")))
 395                    (for-each (lambda (script)
 396                                (install-file script bin))
 397                              scripts))
 398
 399                  ;; Documentation
 400                  (for-each (lambda (file)
 401                              (install-file file info))
 402                            (find-files "doc/info/" "festival.info.*"))
 403
 404                  ;; Headers
 405                  (mkdir-p incdir)
 406                  (for-each (lambda (header)
 407                              (install-file header
 408                                            (string-append incdir "/"
 409                                                           (dirname header))))
 410                            (find-files "src/include" "\\.h$"))
 411
 412                  ;; Data
 413                  (mkdir-p share)
 414                  (for-each (lambda (file)
 415                              (install-file file
 416                                            (string-append share "/"
 417                                                           (dirname file))))
 418                            (find-files "lib" ".*"))
 419                  (for-each delete-file
 420                            (append (find-files share "Makefile")
 421                                    (find-files bin "Makefile")))))
 422              #t))
 423          (add-after 'actually-install 'install-emacs-mode
 424            (lambda* (#:key outputs #:allow-other-keys)
 425              (let ((emacs-dir (string-append (assoc-ref outputs "out")
 426                                              "/share/emacs/site-lisp")))
 427                (install-file "lib/festival.el" emacs-dir)
 428                (emacs-generate-autoloads ,name emacs-dir)
 429                #t)))
 430          ;; Rebuild the very old configure script that is confused by extra
 431          ;; arguments.
 432          (add-before 'configure 'bootstrap
 433            (lambda _ (invoke "autoreconf" "-vif"))))))
 434     (inputs
 435      `(("ncurses" ,ncurses)))
 436     (native-inputs
 437      `(("autoconf" ,autoconf)
 438        ("automake" ,automake)
 439        ("texinfo" ,texinfo)
 440        ("emacs" ,emacs-minimal)
 441        ("festvox-cmu"
 442         ,(origin
 443            (method url-fetch)
 444            (uri (string-append "http://festvox.org/packed/festival/"
 445                                (version-major+minor version)
 446                                "/festlex_CMU.tar.gz"))
 447            (sha256
 448             (base32
 449              "01vwidqhhg2zifvk1gby91mckg1z2pv2mj6lihvdaifakf8k1561"))))
 450        ("festvox-poslex"
 451         ,(origin
 452            (method url-fetch)
 453            (uri (string-append "http://festvox.org/packed/festival/"
 454                                (version-major+minor version)
 455                                "/festlex_POSLEX.tar.gz"))
 456            (sha256
 457             (base32
 458              "18wywilxaqwy63lc47p5g5529mpxhslibh1bjij0snxx5mjf7ip7"))))
 459        ("default-voice"
 460         ,(origin
 461            (method url-fetch)
 462            (uri (string-append "http://festvox.org/packed/festival/"
 463                                (version-major+minor version)
 464                                "/voices/festvox_kallpc16k.tar.gz"))
 465            (sha256
 466             (base32
 467              "136hmsyiwnlg2qwa508dy0imf19mzrb5r3dmb2kg8kcyxnslm740"))))
 468        ("speech-tools"
 469         ,(origin
 470            (method url-fetch)
 471            (uri (string-append "http://festvox.org/packed/festival/"
 472                                (version-major+minor version)
 473                                "/speech_tools-" version "-release.tar.gz"))
 474            (sha256
 475             (base32
 476              "1k2xh13miyv48gh06rgsq2vj25xwj7z6vwq9ilsn8i7ig3nrgzg4"))))))
 477     (home-page "http://www.cstr.ed.ac.uk/projects/festival/")
 478     (synopsis "Speech synthesis system")
 479     (description "Festival offers a general framework for building speech
 480 synthesis systems as well as including examples of various modules.  As a
 481 whole it offers full text to speech through a number APIs: from shell level,
 482 though a Scheme command interpreter, as a C++ library, from Java, and an Emacs
 483 interface.  Festival is multi-lingual though English is the most advanced.
 484 The system is written in C++ and uses the Edinburgh Speech Tools Library for
 485 low level architecture and has a Scheme (SIOD) based command interpreter for
 486 control.")
 487     (license (license:non-copyleft "file://COPYING"))))
 488
 489 (define-public sphinxbase
 490   (package
 491     (name "sphinxbase")
 492     (version "5prealpha")
 493     (source
 494      (origin
 495        (method url-fetch)
 496        (uri (string-append "mirror://sourceforge/cmusphinx/"
 497                            "sphinxbase/" version "/"
 498                            "sphinxbase-" version ".tar.gz"))
 499        (sha256
 500         (base32 "0vr4k8pv5a8nvq9yja7kl13b5lh0f9vha8fc8znqnm8bwmcxnazp"))))
 501     (build-system gnu-build-system)
 502     (arguments
 503      `(#:parallel-tests? #f))           ;tests fail otherwise
 504     (native-inputs
 505      `(("bison" ,bison)
 506        ("doxygen" ,doxygen)
 507        ("perl" ,perl)                   ;for tests
 508        ("python" ,python)
 509        ("swig" ,swig)))
 510     (inputs
 511      `(("pulseaudio" ,pulseaudio)))
 512     (home-page "https://cmusphinx.github.io/")
 513     (synopsis "Support library required by Pocketsphinx and Sphinxtrain")
 514     (description "This package contains the basic libraries shared by
 515 the CMU Sphinx trainer and all the Sphinx decoders (Sphinx-II,
 516 Sphinx-III, and PocketSphinx), as well as some common utilities for
 517 manipulating acoustic feature and audio files.")
 518     (license license:bsd-4)))
 519
 520 (define-public pocketsphinx
 521   (package
 522     (name "pocketsphinx")
 523     (version "5prealpha")
 524     (source
 525      (origin
 526        (method url-fetch)
 527        (uri (string-append "mirror://sourceforge/cmusphinx/"
 528                            "pocketsphinx/" version "/"
 529                            "pocketsphinx-" version ".tar.gz"))
 530        (sha256
 531         (base32 "1n9yazzdgvpqgnfzsbl96ch9cirayh74jmpjf7svs4i7grabanzg"))))
 532     (build-system gnu-build-system)
 533     (native-inputs
 534      `(("pkg-config" ,pkg-config)
 535        ("perl" ,perl)                   ;for tests
 536        ("python" ,python)
 537        ("swig" ,swig)))
 538     (inputs
 539      `(("gstreamer" ,gstreamer)
 540        ("libcap" ,libcap)
 541        ("pulseaudio" ,pulseaudio)
 542        ("sphinxbase" ,sphinxbase)))
 543     (home-page "https://cmusphinx.github.io/")
 544     (synopsis "Recognizer library written in C")
 545     (description "PocketSphinx is one of Carnegie Mellon University's
 546 large vocabulary, speaker-independent continuous speech recognition
 547 engine.")
 548     (license license:bsd-2)))