gnu/packages/speech.scm

   1 ;;; GNU Guix --- Functional package management for GNU
   2 ;;; Copyright © 2016 David Thompson <davet@gnu.org>
   3 ;;; Copyright © 2016, 2019, 2020 Marius Bakke <mbakke@fastmail.com>
   4 ;;; Copyright © 2017 Leo Famulari <leo@famulari.name>
   5 ;;; Copyright © 2018, 2020 Tobias Geerinckx-Rice <me@tobias.gr>
   6 ;;; Copyright © 2016 Kei Kebreau <kkebreau@posteo.net>
   7 ;;; Copyright © 2019 Ricardo Wurmus <rekado@elephly.net>
   8 ;;; Copyright © 2020 Nicolas Goaziou <mail@nicolasgoaziou.fr>
   9 ;;; Copyright © 2020 Efraim Flashner <efraim@flashner.co.il>
  10 ;;;
  11 ;;; This file is part of GNU Guix.
  12 ;;;
  13 ;;; GNU Guix is free software; you can redistribute it and/or modify it
  14 ;;; under the terms of the GNU General Public License as published by
  15 ;;; the Free Software Foundation; either version 3 of the License, or (at
  16 ;;; your option) any later version.
  17 ;;;
  18 ;;; GNU Guix is distributed in the hope that it will be useful, but
  19 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
  20 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  21 ;;; GNU General Public License for more details.
  22 ;;;
  23 ;;; You should have received a copy of the GNU General Public License
  24 ;;; along with GNU Guix.  If not, see <http://www.gnu.org/licenses/>.
  25
  26 (define-module (gnu packages speech)
  27   #:use-module ((guix licenses) #:prefix license:)
  28   #:use-module (guix packages)
  29   #:use-module (guix download)
  30   #:use-module (guix git-download)
  31   #:use-module (guix utils)
  32   #:use-module (guix build-system gnu)
  33   #:use-module (gnu packages)
  34   #:use-module (gnu packages audio)
  35   #:use-module (gnu packages autotools)
  36   #:use-module (gnu packages base)            ;for 'which'
  37   #:use-module (gnu packages bison)
  38   #:use-module (gnu packages compression)
  39   #:use-module (gnu packages documentation)
  40   #:use-module (gnu packages emacs)
  41   #:use-module (gnu packages gcc)
  42   #:use-module (gnu packages glib)
  43   #:use-module (gnu packages gstreamer)
  44   #:use-module (gnu packages linux)
  45   #:use-module (gnu packages ncurses)
  46   #:use-module (gnu packages perl)
  47   #:use-module (gnu packages pkg-config)
  48   #:use-module (gnu packages pulseaudio)
  49   #:use-module (gnu packages python)
  50   #:use-module (gnu packages swig)
  51   #:use-module (gnu packages texinfo)
  52   #:use-module (gnu packages textutils))
  53
  54 (define-public espeak
  55   (package
  56     (name "espeak")
  57     (version "1.48.04")
  58     (source (origin
  59               (method url-fetch)
  60               (uri (string-append "mirror://sourceforge/espeak/espeak/"
  61                                   "espeak-" (version-major+minor version)
  62                                   "/espeak-" version "-source.zip"))
  63               (sha256
  64                (base32
  65                 "0n86gwh9pw0jqqpdz7mxggllfr8k0r7pc67ayy7w5z6z79kig6mz"))
  66               (modules '((guix build utils)))
  67               (snippet
  68                ;; remove prebuilt binaries
  69                '(begin
  70                   (delete-file-recursively "linux_32bit")
  71                   #t))))
  72     (build-system gnu-build-system)
  73     (arguments
  74      `(#:make-flags (list (string-append "PREFIX=" (assoc-ref %outputs "out"))
  75                           (string-append "DATADIR="
  76                                          (assoc-ref %outputs "out")
  77                                          "/share/espeak-data")
  78                           (string-append "LDFLAGS=-Wl,-rpath="
  79                                          (assoc-ref %outputs "out")
  80                                          "/lib")
  81                           ;; The package fails to build with newer C++ standards.
  82                           "CXXFLAGS=-std=c++98"
  83                           "AUDIO=pulseaudio")
  84        #:tests? #f ; no check target
  85        #:phases
  86        (modify-phases %standard-phases
  87          (replace 'configure
  88            (lambda _
  89              (chdir "src")
  90              ;; We use version 19 of the PortAudio library, so we must copy the
  91              ;; corresponding file to be sure that espeak compiles correctly.
  92              (copy-file "portaudio19.h" "portaudio.h")
  93              (substitute* "Makefile"
  94                (("/bin/ln") "ln"))
  95              #t)))))
  96        (inputs
  97         `(("portaudio" ,portaudio)
  98           ("pulseaudio" ,pulseaudio)))
  99        (native-inputs `(("unzip" ,unzip)))
 100        (home-page "http://espeak.sourceforge.net/")
 101        (synopsis "Software speech synthesizer")
 102        (description "eSpeak is a software speech synthesizer for English and
 103 other languages.  eSpeak uses a \"formant synthesis\" method.  This allows many
 104 languages to be provided in a small size.  The speech is clear, and can be used
 105 at high speeds, but is not as natural or smooth as larger synthesizers which are
 106 based on human speech recordings.")
 107        (license license:gpl3+)))
 108
 109 (define-public espeak-ng
 110   (package
 111     (name "espeak-ng")
 112     (version "1.50")
 113     (home-page "https://github.com/espeak-ng/espeak-ng")
 114     ;; Note: eSpeak NG publishes release tarballs, but the 1.50 tarball is
 115     ;; broken: <https://github.com/espeak-ng/espeak-ng/issues/683>.
 116     ;; Download the raw repository to work around it; remove 'native-inputs'
 117     ;; below when switching back to the release tarball.
 118     (source (origin
 119               (method git-fetch)
 120               (uri (git-reference (url home-page) (commit version)))
 121               (file-name (git-file-name name version))
 122               (sha256
 123                (base32 "0jkqhf2h94vbqq7mg7mmm23bq372fa7mdk941my18c3vkldcir1b"))))
 124     (build-system gnu-build-system)
 125     (arguments
 126      `(#:configure-flags '("--disable-static")
 127        ;; Building in parallel triggers a race condition in 1.49.2.
 128        #:parallel-build? #f
 129        ;; XXX: Some tests require an audio device.
 130        #:tests? #f))
 131     (native-inputs
 132      `(("autoconf" ,autoconf)
 133        ("automake" ,automake)
 134        ("libtool" ,libtool)
 135        ("which" ,which)))
 136     (inputs
 137      `(("libcap" ,libcap)
 138        ("pcaudiolib" ,pcaudiolib)))
 139     (synopsis "Software speech synthesizer")
 140     (description
 141      "eSpeak NG is a software speech synthesizer for more than 100 languages.
 142 It is based on the eSpeak engine and supports spectral and Klatt formant
 143 synthesis, and the ability to use MBROLA voices.")
 144     (license license:gpl3+)))
 145
 146 (define-public mitlm
 147   (package
 148     (name "mitlm")
 149     (version "0.4.2")
 150     (source (origin
 151               (method url-fetch)
 152               (uri (string-append "https://github.com/mitlm/mitlm/releases/"
 153                                   "download/v" version "/"
 154                                   name "-" version ".tar.xz"))
 155               (sha256
 156                (base32
 157                 "09fv4fcpmw9g1j0zml0k5kk1lgjw2spr8gn51llbkaaph6v8d62a"))))
 158     (build-system gnu-build-system)
 159     (native-inputs
 160      `(("gfortran" ,gfortran)))
 161     (synopsis "The MIT Language Modeling toolkit")
 162     (description "The MIT Language Modeling (MITLM) toolkit is a set of
 163 tools designed for the efficient estimation of statistical n-gram language
 164 models involving iterative parameter estimation.  It achieves much of its
 165 efficiency through the use of a compact vector representation of n-grams.")
 166     (home-page "https://github.com/mitlm/mitlm")
 167     (license license:expat)))
 168
 169 (define-public speech-dispatcher
 170   (package
 171     (name "speech-dispatcher")
 172     (version "0.9.1")
 173     (source (origin
 174               (method url-fetch)
 175               (uri (string-append "https://github.com/brailcom/speechd/releases"
 176                                   "/download/" version "/speech-dispatcher-"
 177                                   version ".tar.gz"))
 178               (sha256
 179                (base32
 180                 "16bg52hnkrsrs7kgbzanb34b9zb6fqxwj0a9bmsxmj1skkil1h1p"))))
 181     (build-system gnu-build-system)
 182     (arguments
 183      `(#:configure-flags '("--disable-static"
 184
 185                            ;; Disable support for proprietary TTS engines.
 186                            "--with-ibmtts=no"
 187                            "--with-kali=no" "--with-baratinoo=no")))
 188     (native-inputs
 189      `(("intltool" ,intltool)
 190        ("pkg-config" ,pkg-config)))
 191     (inputs
 192      `(("dotconf" ,dotconf)
 193        ("espeak" ,espeak-ng)
 194        ("glib" ,glib)
 195        ("libltdl" ,libltdl)
 196        ("libsndfile" ,libsndfile)
 197        ("pulseaudio" ,pulseaudio)
 198        ("python" ,python)))
 199     (synopsis "Common interface to speech synthesizers")
 200     (description "The Speech Dispatcher project provides a high-level
 201 device independent layer for access to speech synthesis through a simple,
 202 stable and well documented interface.")
 203     (home-page "https://devel.freebsoft.org/speechd")
 204     ;; The software is distributed under GPL2+, but includes a number
 205     ;; of files covered by other licenses.  Note: in practice, this
 206     ;; is linked against dotconf, which is LGPL 2.1 only.
 207     (license (list license:gpl2+
 208                    license:fdl1.2+ ; Most files in doc/ are dual gpl2+/fdl1.2+.
 209                    license:lgpl2.1+
 210                    (license:non-copyleft
 211                     ;; festival_client.{c,h} carries an expat-style license.
 212                     "See src/modules/festival_client.c in the distribution.")
 213                    license:gpl3+)))) ; doc/texinfo.tex -- with TeX exception.
 214
 215 (define-public sonic
 216   (package
 217     (name "sonic")
 218     (version "0.2.0")
 219     (source (origin
 220              (method git-fetch)
 221              (uri (git-reference
 222                     (url "https://github.com/waywardgeek/sonic")
 223                     (commit (string-append "release-" version))))
 224              (file-name (git-file-name name version))
 225              (sha256
 226               (base32
 227                "08xwnpw9cnaix1n1i7gvpq5hrfrqc2z1snjhjapfam506hrc77g4"))))
 228     (build-system gnu-build-system)
 229     (arguments
 230      `(#:tests? #f                      ; no test suite
 231        #:make-flags
 232        (list (string-append "PREFIX=" (assoc-ref %outputs "out"))
 233              (string-append "LDFLAGS=-Wl,-rpath="
 234                             (assoc-ref %outputs "out") "/lib"))
 235        #:phases
 236        (modify-phases %standard-phases
 237          (add-after 'unpack 'respect-LDFLAGS
 238            (lambda _
 239              (substitute* "Makefile"
 240                ((" -o sonic " match)
 241                 (string-append " $(LDFLAGS)" match)))
 242              #t))
 243          (delete 'configure))))        ; no ./configure script
 244     (synopsis "Speed up or slow down speech")
 245     (description "Sonic implements a simple algorithm for speeding up or slowing
 246 down speech.  However, it's optimized for speed ups of over 2X, unlike previous
 247 algorithms for changing speech rate.  Sonic is a C library designed to be easily
 248 integrated into streaming voice applications such as text-to-speech (TTS) back
 249 ends.
 250
 251 The primary motivation behind Sonic is to enable the blind and visually impaired
 252 to improve their productivity with speech engines, like eSpeak.  Sonic can also
 253 be used by the sighted.")
 254     (home-page "https://github.com/waywardgeek/sonic")
 255     (license license:asl2.0)))
 256
 257 (define-public festival
 258   (package
 259     (name "festival")
 260     (version "2.5.0")
 261     (source (origin
 262               (method url-fetch)
 263               (uri (string-append "http://festvox.org/packed/festival/"
 264                                   (version-major+minor version)
 265                                   "/festival-" version "-release.tar.gz"))
 266               (sha256
 267                (base32
 268                 "1d5415nckiv19adxisxfm1l1xxfyw88g87ckkmcr0lhjdd10g42c"))))
 269     (build-system gnu-build-system)
 270     (arguments
 271      `(#:tests? #f ; there is no test target
 272        #:make-flags
 273        (list (string-append "RM="
 274                             (assoc-ref %build-inputs "coreutils")
 275                             "/bin/rm")
 276              (string-append "ECHO_N="
 277                             (assoc-ref %build-inputs "coreutils")
 278                             "/bin/printf \"%s\""))
 279        #:parallel-build? #f ; not supported
 280        #:modules ((guix build gnu-build-system)
 281                   (guix build utils)
 282                   (guix build emacs-utils))
 283        #:imported-modules (,@%gnu-build-system-modules
 284                            (guix build emacs-utils))
 285        #:phases
 286        (modify-phases %standard-phases
 287          (add-after 'unpack 'unpack-and-patch-speech-tools
 288            (lambda* (#:key inputs #:allow-other-keys)
 289              (invoke "tar" "-C" ".."
 290                      "-xf" (assoc-ref inputs "speech-tools"))
 291              (with-directory-excursion "../speech_tools"
 292                (substitute* '("config/rules/modules.mak"
 293                               "config/rules/test_make_rules.mak"
 294                               "config/make_system.mak")
 295                  (("/bin/sh") (which "sh"))))
 296              #t))
 297          (add-after 'unpack 'patch-/bin/sh
 298            (lambda _
 299              (substitute* '("config/test_make_rules"
 300                             "config/make_system.mak")
 301                (("/bin/sh") (which "sh")))
 302              #t))
 303          (add-before 'build 'build-speech-tools
 304            (lambda* (#:key configure-flags make-flags #:allow-other-keys)
 305              (with-directory-excursion "../speech_tools"
 306                (apply invoke "sh" "configure"
 307                       (string-append "CONFIG_SHELL=" (which "sh"))
 308                       (string-append "SHELL=" (which "sh"))
 309                       configure-flags)
 310                (apply invoke "make" make-flags))))
 311          (add-after 'build 'build-documentation
 312            (lambda _
 313              (with-directory-excursion "doc"
 314                (invoke "make" "festival.info"))))
 315          (add-after 'unpack 'set-installation-directories
 316            (lambda* (#:key outputs #:allow-other-keys)
 317              (let ((out (assoc-ref outputs "out")))
 318                (substitute* "config/project.mak"
 319                  (("^FTLIBDIR.*")
 320                   (string-append "FTLIBDIR=" out "/share/festival/lib")))
 321                (substitute* "config/systems/default.mak"
 322                  (("^INSTALL_PREFIX.*")
 323                   (string-append "INSTALL_PREFIX=" out)))
 324                #t)))
 325          (add-after 'install 'actually-install
 326            (lambda* (#:key inputs outputs #:allow-other-keys)
 327              (let ((out (assoc-ref outputs "out")))
 328                ;; Install Speech Tools first
 329                (with-directory-excursion "../speech_tools"
 330                  ;; Target directories
 331                  (for-each (lambda (dir)
 332                              (mkdir-p (string-append out dir)))
 333                            '("/bin"
 334                              "/lib"
 335                              "/include/speech_tools/"
 336                              "/include/speech_tools/instantiate"
 337                              "/include/speech_tools/ling_class"
 338                              "/include/speech_tools/rxp"
 339                              "/include/speech_tools/sigpr"
 340                              "/include/speech_tools/unix"))
 341                  ;; Install binaries
 342                  (for-each (lambda (file)
 343                              (install-file file (string-append out "/bin")))
 344                            (find-files "bin" ".*"))
 345                  (for-each (lambda (file)
 346                              (delete-file (string-append out "/bin/" file)))
 347                            '("est_gdb" "est_examples" "est_program"))
 348                  ;; Install libraries
 349                  (for-each (lambda (file)
 350                              (install-file file (string-append out "/lib")))
 351                            (find-files "lib" "lib.*\\.so.*"))
 352
 353                  ;; Install headers
 354                  (for-each
 355                   (lambda (dir)
 356                     (for-each
 357                      (lambda (header)
 358                        (install-file header
 359                                      (string-append out "/include/speech_tools/" dir)))
 360                      (find-files (string-append "include/" dir)
 361                                  "\\.h$")))
 362                   '("." "instantiate" "ling_class" "rxp" "sigpr" "unix")))
 363
 364                ;; Unpack files that will be installed together with the
 365                ;; Festival libraries.
 366                (invoke "tar" "--strip-components=1"
 367                        "-xvf" (assoc-ref inputs "festvox-cmu"))
 368                (invoke "tar" "--strip-components=1"
 369                        "-xvf" (assoc-ref inputs "festvox-poslex"))
 370                (invoke "tar" "--strip-components=1"
 371                        "-xvf" (assoc-ref inputs "default-voice"))
 372
 373                ;; Install Festival
 374                (let ((bin (string-append out "/bin"))
 375                      (incdir (string-append out "/include/festival"))
 376                      (share (string-append out "/share/festival"))
 377                      (info (string-append out "/share/info")))
 378                  (for-each (lambda (executable)
 379                              (install-file executable bin))
 380                            '("src/main/festival"
 381                              "src/main/festival_client"
 382                              "examples/benchmark"))
 383                  (let ((scripts '("examples/dumpfeats"
 384                                   "examples/durmeanstd"
 385                                   "examples/latest"
 386                                   "examples/make_utts"
 387                                   "examples/powmeanstd"
 388                                   "examples/run-festival-script"
 389                                   "examples/saytime"
 390                                   "examples/scfg_parse_text"
 391                                   "examples/text2pos"
 392                                   "examples/text2wave")))
 393                    (substitute* scripts
 394                      (("exec /tmp/guix-build.*/bin/festival")
 395                       (string-append "exec " bin "/festival")))
 396                    (for-each (lambda (script)
 397                                (install-file script bin))
 398                              scripts))
 399
 400                  ;; Documentation
 401                  (for-each (lambda (file)
 402                              (install-file file info))
 403                            (find-files "doc/info/" "festival.info.*"))
 404
 405                  ;; Headers
 406                  (mkdir-p incdir)
 407                  (for-each (lambda (header)
 408                              (install-file header
 409                                            (string-append incdir "/"
 410                                                           (dirname header))))
 411                            (find-files "src/include" "\\.h$"))
 412
 413                  ;; Data
 414                  (mkdir-p share)
 415                  (for-each (lambda (file)
 416                              (install-file file
 417                                            (string-append share "/"
 418                                                           (dirname file))))
 419                            (find-files "lib" ".*"))
 420                  (for-each delete-file
 421                            (append (find-files share "Makefile")
 422                                    (find-files bin "Makefile")))))
 423              #t))
 424          (add-after 'actually-install 'install-emacs-mode
 425            (lambda* (#:key outputs #:allow-other-keys)
 426              (let ((emacs-dir (string-append (assoc-ref outputs "out")
 427                                              "/share/emacs/site-lisp")))
 428                (install-file "lib/festival.el" emacs-dir)
 429                (emacs-generate-autoloads ,name emacs-dir)
 430                #t)))
 431          ;; Rebuild the very old configure script that is confused by extra
 432          ;; arguments.
 433          (add-before 'configure 'bootstrap
 434            (lambda _ (invoke "autoreconf" "-vif"))))))
 435     (inputs
 436      `(("ncurses" ,ncurses)))
 437     (native-inputs
 438      `(("autoconf" ,autoconf)
 439        ("automake" ,automake)
 440        ("texinfo" ,texinfo)
 441        ("emacs" ,emacs-minimal)
 442        ("festvox-cmu"
 443         ,(origin
 444            (method url-fetch)
 445            (uri (string-append "http://festvox.org/packed/festival/"
 446                                (version-major+minor version)
 447                                "/festlex_CMU.tar.gz"))
 448            (sha256
 449             (base32
 450              "01vwidqhhg2zifvk1gby91mckg1z2pv2mj6lihvdaifakf8k1561"))))
 451        ("festvox-poslex"
 452         ,(origin
 453            (method url-fetch)
 454            (uri (string-append "http://festvox.org/packed/festival/"
 455                                (version-major+minor version)
 456                                "/festlex_POSLEX.tar.gz"))
 457            (sha256
 458             (base32
 459              "18wywilxaqwy63lc47p5g5529mpxhslibh1bjij0snxx5mjf7ip7"))))
 460        ("default-voice"
 461         ,(origin
 462            (method url-fetch)
 463            (uri (string-append "http://festvox.org/packed/festival/"
 464                                (version-major+minor version)
 465                                "/voices/festvox_kallpc16k.tar.gz"))
 466            (sha256
 467             (base32
 468              "136hmsyiwnlg2qwa508dy0imf19mzrb5r3dmb2kg8kcyxnslm740"))))
 469        ("speech-tools"
 470         ,(origin
 471            (method url-fetch)
 472            (uri (string-append "http://festvox.org/packed/festival/"
 473                                (version-major+minor version)
 474                                "/speech_tools-" version "-release.tar.gz"))
 475            (sha256
 476             (base32
 477              "1k2xh13miyv48gh06rgsq2vj25xwj7z6vwq9ilsn8i7ig3nrgzg4"))))))
 478     (home-page "http://www.cstr.ed.ac.uk/projects/festival/")
 479     (synopsis "Speech synthesis system")
 480     (description "Festival offers a general framework for building speech
 481 synthesis systems as well as including examples of various modules.  As a
 482 whole it offers full text to speech through a number APIs: from shell level,
 483 though a Scheme command interpreter, as a C++ library, from Java, and an Emacs
 484 interface.  Festival is multi-lingual though English is the most advanced.
 485 The system is written in C++ and uses the Edinburgh Speech Tools Library for
 486 low level architecture and has a Scheme (SIOD) based command interpreter for
 487 control.")
 488     (license (license:non-copyleft "file://COPYING"))))
 489
 490 (define-public sphinxbase
 491   (package
 492     (name "sphinxbase")
 493     (version "5prealpha")
 494     (source
 495      (origin
 496        (method url-fetch)
 497        (uri (string-append "mirror://sourceforge/cmusphinx/"
 498                            "sphinxbase/" version "/"
 499                            "sphinxbase-" version ".tar.gz"))
 500        (sha256
 501         (base32 "0vr4k8pv5a8nvq9yja7kl13b5lh0f9vha8fc8znqnm8bwmcxnazp"))
 502        (patches (search-patches "sphinxbase-fix-doxygen.patch"))))
 503     (build-system gnu-build-system)
 504     (arguments
 505      `(#:parallel-tests? #f))           ;tests fail otherwise
 506     (native-inputs
 507      `(("bison" ,bison)
 508        ("doxygen" ,doxygen)
 509        ("perl" ,perl)                   ;for tests
 510        ("python" ,python)
 511        ("swig" ,swig)))
 512     (inputs
 513      `(("pulseaudio" ,pulseaudio)))
 514     (home-page "https://cmusphinx.github.io/")
 515     (synopsis "Support library required by Pocketsphinx and Sphinxtrain")
 516     (description "This package contains the basic libraries shared by
 517 the CMU Sphinx trainer and all the Sphinx decoders (Sphinx-II,
 518 Sphinx-III, and PocketSphinx), as well as some common utilities for
 519 manipulating acoustic feature and audio files.")
 520     (license license:bsd-4)))
 521
 522 (define-public pocketsphinx
 523   (package
 524     (name "pocketsphinx")
 525     (version "5prealpha")
 526     (source
 527      (origin
 528        (method url-fetch)
 529        (uri (string-append "mirror://sourceforge/cmusphinx/"
 530                            "pocketsphinx/" version "/"
 531                            "pocketsphinx-" version ".tar.gz"))
 532        (sha256
 533         (base32 "1n9yazzdgvpqgnfzsbl96ch9cirayh74jmpjf7svs4i7grabanzg"))))
 534     (build-system gnu-build-system)
 535     (native-inputs
 536      `(("pkg-config" ,pkg-config)
 537        ("perl" ,perl)                   ;for tests
 538        ("python" ,python)
 539        ("swig" ,swig)))
 540     (inputs
 541      `(("gstreamer" ,gstreamer)
 542        ("libcap" ,libcap)
 543        ("pulseaudio" ,pulseaudio)
 544        ("sphinxbase" ,sphinxbase)))
 545     (home-page "https://cmusphinx.github.io/")
 546     (synopsis "Recognizer library written in C")
 547     (description "PocketSphinx is one of Carnegie Mellon University's
 548 large vocabulary, speaker-independent continuous speech recognition
 549 engine.")
 550     (license license:bsd-2)))