X-Git-Url: http://git.hcoop.net/jackhill/guix/guix.git/blobdiff_plain/cd6cc144e0822482a8ca2b033b7bd6d33f0fd331..f9c3bd2e0183b777ad3794674a360222c62633b0:/gnu/packages/textutils.scm diff --git a/gnu/packages/textutils.scm b/gnu/packages/textutils.scm index 73a0532478..674a3507d0 100644 --- a/gnu/packages/textutils.scm +++ b/gnu/packages/textutils.scm @@ -1,8 +1,18 @@ ;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2015 Taylan Ulrich Bayırlı/Kammer -;;; Copyright © 2015, 2016 Ricardo Wurmus +;;; Copyright © 2015, 2016, 2017 Ricardo Wurmus ;;; Copyright © 2015, 2016 Ben Woodcroft ;;; Copyright © 2015 Roel Janssen +;;; Copyright © 2016 Jelle Licht +;;; Copyright © 2016 Alex Griffin +;;; Copyright © 2016 Efraim Flashner +;;; Copyright © 2016 ng0 +;;; Copyright © 2016 Marius Bakke +;;; Copyright © 2017 Eric Bavier +;;; Copyright © 2017 Rene Saavedra +;;; Copyright © 2017 Hartmut Goebel +;;; Copyright © 2017 Kei Kebreau +;;; Copyright © 2017 Alex Vong ;;; ;;; This file is part of GNU Guix. ;;; @@ -24,10 +34,50 @@ #:use-module (guix packages) #:use-module (guix download) #:use-module (guix git-download) + #:use-module (guix build-system ant) #:use-module (guix build-system gnu) #:use-module (guix build-system cmake) + #:use-module (guix build-system trivial) + #:use-module (guix build-system python) + #:use-module (gnu packages) #:use-module (gnu packages autotools) - #:use-module (gnu packages python)) + #:use-module (gnu packages compression) + #:use-module (gnu packages gettext) + #:use-module (gnu packages java) + #:use-module (gnu packages ncurses) + #:use-module (gnu packages perl) + #:use-module (gnu packages pkg-config) + #:use-module (gnu packages python) + #:use-module (gnu packages readline) + #:use-module (gnu packages slang)) + +(define-public dos2unix + (package + (name "dos2unix") + (version "7.4.0") + (source + (origin + (method url-fetch) + (uri (string-append "https://waterlan.home.xs4all.nl/" name "/" + name "-" version ".tar.gz")) + (sha256 + (base32 "12h4c61g376bhq03y5g2xszkrkrj5hwd928rly3xsp6rvfmnbixs")))) + (build-system gnu-build-system) + (arguments + '(#:make-flags (list "CC=gcc" + (string-append "prefix=" (assoc-ref %outputs "out"))) + #:phases + (modify-phases %standard-phases + (delete 'configure)))) ; no configure script + (native-inputs + `(("gettext" ,gettext-minimal) + ("perl" ,perl))) + (home-page "https://waterlan.home.xs4all.nl/dos2unix.html") + (synopsis "DOS/Mac to Unix and vice versa text file format converter") + (description + "dos2unix is a tool to convert line breaks in a text file from Unix format +to DOS format and vice versa.") + (license license:bsd-2))) (define-public recode (package @@ -48,15 +98,15 @@ (native-inputs `(("python" ,python-2))) (arguments '(#:phases - (alist-cons-before - 'check 'pre-check - (lambda _ - (substitute* "tests/setup.py" - (("([[:space:]]*)include_dirs=.*" all space) - (string-append all space "library_dirs=['../src/.libs'],\n"))) - ;; The test extension 'Recode.so' lacks RUNPATH for 'librecode.so'. - (setenv "LD_LIBRARY_PATH" (string-append (getcwd) "/src/.libs"))) - %standard-phases))) + (modify-phases %standard-phases + (add-before 'check 'pre-check + (lambda _ + (substitute* "tests/setup.py" + (("([[:space:]]*)include_dirs=.*" all space) + (string-append all space "library_dirs=['../src/.libs'],\n"))) + ;; The test extension 'Recode.so' lacks RUNPATH for 'librecode.so'. + (setenv "LD_LIBRARY_PATH" (string-append (getcwd) "/src/.libs")) + #t))))) (home-page "https://github.com/pinard/Recode") (synopsis "Text encoding converter") (description "The Recode library converts files between character sets and @@ -96,7 +146,7 @@ libenca and several charset conversion libraries and tools.") (define-public utf8proc (package (name "utf8proc") - (version "1.3.1") + (version "2.1.0") (source (origin (method url-fetch) @@ -105,20 +155,43 @@ libenca and several charset conversion libraries and tools.") version ".tar.gz")) (file-name (string-append name "-" version ".tar.gz")) (sha256 - (base32 "1k48as5kjkar4yj3dwxyll8ykj4k723ib5a6mnw1g86q3zi0zdl3")))) + (base32 "0q1jhdkk4f9b0zb8s2ql3sba3br5nvjsmbsaybmgj064k9hwbk15")))) (build-system gnu-build-system) + (inputs ;test data that is otherwise downloaded with curl + `(("NormalizationTest.txt" + ,(origin + (method url-fetch) + (uri (string-append "http://www.unicode.org/Public/9.0.0/ucd/" + "NormalizationTest.txt")) + (sha256 + (base32 "1fxrz0bilsbwl685336aqi88k62i6nqhm62rvy4zhg3bcm4dhj1d")))) + ("GraphemeBreakTest.txt" + ,(origin + (method url-fetch) + (uri (string-append "http://www.unicode.org/Public/9.0.0/ucd/" + "auxiliary/GraphemeBreakTest.txt")) + (sha256 + (base32 "0qbhyhmf0778lc2hcwlpizrvmdxwpk959v2q2wb8abv09ba7wvn7")))))) (arguments - '(#:tests? #f ;no "check" target - #:make-flags (list "CC=gcc" + '(#:make-flags (list "CC=gcc" (string-append "prefix=" (assoc-ref %outputs "out"))) #:phases (modify-phases %standard-phases - (delete 'configure)))) + (delete 'configure) + (add-before 'check 'check-data + (lambda* (#:key inputs #:allow-other-keys) + (for-each (lambda (i) + (copy-file (assoc-ref inputs i) + (string-append "data/" i))) + '("NormalizationTest.txt" "GraphemeBreakTest.txt")) + (substitute* "data/GraphemeBreakTest.txt" + (("÷") "/") + (("×") "+"))))))) (home-page "http://julialang.org/utf8proc/") (synopsis "C library for processing UTF-8 Unicode data") (description "utf8proc is a small C library that provides Unicode normalization, case-folding, and other operations for data in the UTF-8 -encoding, supporting Unicode version 7.0.") +encoding, supporting Unicode version 9.0.0.") (license license:expat))) (define-public libgtextutils @@ -136,10 +209,9 @@ encoding, supporting Unicode version 7.0.") (build-system gnu-build-system) (arguments '(#:phases - (alist-cons-after - 'unpack 'autoreconf - (lambda _ (zero? (system* "autoreconf" "-vif"))) - %standard-phases))) + (modify-phases %standard-phases + (add-after 'unpack 'autoreconf + (lambda _ (zero? (system* "autoreconf" "-vif"))))))) (native-inputs `(("autoconf" ,autoconf) ("automake" ,automake) @@ -195,7 +267,8 @@ input bits thoroughly but are not suitable for cryptography.") "/ustr-" version ".tar.bz2")) (sha256 (base32 - "1i623ygdj7rkizj7985q9d6vj5amwg686aqb5j3ixpkqkyp6xbrx")))) + "1i623ygdj7rkizj7985q9d6vj5amwg686aqb5j3ixpkqkyp6xbrx")) + (patches (search-patches "ustr-fix-build-with-gcc-5.patch")))) (build-system gnu-build-system) (arguments `(#:make-flags @@ -271,3 +344,319 @@ Consequently, the computation has a flat performance characteristic, correlated with data variation rather than file size. pfff can be as reliable as existing hashing techniques, with provably negligible risk of collisions.") (license license:bsd-3))) + +(define-public oniguruma + (package + (name "oniguruma") + (version "5.9.6") + (source (origin + (method url-fetch) + (uri (string-append "https://github.com/kkos/" + "oniguruma/releases/download/v" version + "/onig-" version ".tar.gz")) + (sha256 + (base32 + "19s79vsclqn170mw0ajwv7j37qsbn4f1yjz3yavnhvva6c820r6m")))) + (build-system gnu-build-system) + (home-page "https://github.com/kkos/oniguruma") + (synopsis "Regular expression library") + (description "Oniguruma is a regular expressions library. The special +characteristic of this library is that different character encoding for every +regular expression object can be specified.") + (license license:bsd-2))) + +(define-public antiword + (package + (name "antiword") + (version "0.37") + (source (origin + (method url-fetch) + (uri (string-append "http://www.winfield.demon.nl/linux" + "/antiword-" version ".tar.gz")) + (sha256 + (base32 + "1b7mi1l20jhj09kyh0bq14qzz8vdhhyf35gzwsq43mn6rc7h0b4f")) + (patches (search-patches "antiword-CVE-2014-8123.patch")))) + (build-system gnu-build-system) + (arguments + `(#:tests? #f ; There are no tests + #:make-flags + (list "-f" "Makefile.Linux" + (string-append "GLOBAL_INSTALL_DIR=" + (assoc-ref %outputs "out") "/bin") + (string-append "GLOBAL_RESOURCES_DIR=" + (assoc-ref %outputs "out") "/share/antiword")) + #:phases + (modify-phases %standard-phases + (delete 'configure) + (replace 'install + (lambda* (#:key make-flags #:allow-other-keys) + (zero? (apply system* "make" `("global_install" ,@make-flags)))))))) + (home-page "http://www.winfield.demon.nl/") + (synopsis "Microsoft Word document reader") + (description "Antiword is an application for displaying Microsoft Word +documents. It can also convert the document to PostScript or XML. Only +documents made by MS Word version 2 and version 6 or later are supported. The +name comes from: \"The antidote against people who send Microsoft Word files +to everybody, because they believe that everybody runs Windows and therefore +runs Word\".") + (license license:gpl2+))) + +(define-public catdoc + (package + (name "catdoc") + (version "0.95") + (source (origin + (method url-fetch) + (uri (string-append "http://ftp.wagner.pp.ru/pub/catdoc/" + "catdoc-" version ".tar.gz")) + (patches (search-patches "catdoc-CVE-2017-11110.patch")) + (sha256 + (base32 + "15h7v3bmwfk4z8r78xs5ih6vd0pskn0rj90xghvbzdjj0cc88jji")))) + (build-system gnu-build-system) + ;; TODO: Also build `wordview` which requires `tk` – make a separate + ;; package for this. + (arguments + '(#:tests? #f ; There are no tests + #:configure-flags '("--disable-wordview") + #:phases + (modify-phases %standard-phases + (add-before 'install 'fix-install + (lambda* (#:key outputs #:allow-other-keys) + (let ((out (assoc-ref outputs "out"))) + (mkdir-p (string-append out "/share/man/man1")))))))) + (home-page "http://www.wagner.pp.ru/~vitus/software/catdoc/") + (synopsis "MS-Word to TeX or plain text converter") + (description "@command{catdoc} extracts text from MS-Word files, trying to +preserve as many special printable characters as possible. It supports +everything up to Word-97. Also supported are MS Write documents and RTF files. + +@command{catdoc} does not preserve complex word formatting, but it can +translate some non-ASCII characters into TeX escape codes. It's goal is to +extract plain text and allow you to read it and, probably, reformat with TeX, +according to TeXnical rules. + +This package also provides @command{xls2csv}, which extracts data from Excel +spreadsheets and outputs it in comma-separated-value format, and +@command{catppt}, which extracts data from PowerPoint presentations.") + (license license:gpl2+))) + +(define-public utfcpp + (package + (name "utfcpp") + (version "2.3.4") + (source (origin + (method url-fetch) + (uri + (string-append + "mirror://sourceforge/utfcpp/utf8cpp_2x/Release%20" + version "/utf8_v" + (string-map (lambda (x) (if (eq? x #\.) #\_ x)) version) + ".zip")) + (file-name (string-append name "-" version ".zip")) + (sha256 + (base32 + "1vqhs0aipcvvdrwcs7h3jsryg6mgbmc4s34n5cm6d36q4nxwwwrk")))) + (build-system trivial-build-system) + (arguments + `(#:modules ((guix build utils)) + #:builder + (begin + (use-modules (guix build utils)) + (let ((source (assoc-ref %build-inputs "source")) + (out (assoc-ref %outputs "out")) + (unzip (string-append (assoc-ref %build-inputs "unzip") + "/bin/unzip"))) + (mkdir-p out) + (with-directory-excursion out + (system* unzip source) + (mkdir-p "share/doc") + (rename-file "doc" "share/doc/utfcpp") + (rename-file "source" "include")))))) + (native-inputs `(("unzip" ,unzip))) + (home-page "https://github.com/nemtrif/utfcpp") + (synopsis "Portable C++ library for handling UTF-8") + (description "UTF8-CPP is a C++ library for handling UTF-8 encoded text +in a portable way.") + (license license:boost1.0))) + +(define-public dbacl + (package + (name "dbacl") + (version "1.14") + (source + (origin + (method url-fetch) + (uri (string-append "http://www.lbreyer.com/gpl/" + name "-" version ".tar.gz")) + (sha256 + (base32 + "0224g6x71hyvy7jikfxmgcwww1r5lvk0jx36cva319cb9nmrbrq7")))) + (build-system gnu-build-system) + (arguments + `(#:make-flags + (list + (string-append "-I" (assoc-ref %build-inputs "slang") + "/include/slang") + (string-append "-I" (assoc-ref %build-inputs "ncurses") + "/include/ncurses")) + #:phases + (modify-phases %standard-phases + (add-after 'unpack 'delete-sample6-and-japanese + (lambda _ + (substitute* "doc/Makefile.am" + (("sample6.txt") "") + (("japanese.txt") "")) + (delete-file "doc/sample6.txt") + (delete-file "doc/japanese.txt") + (substitute* (list "src/tests/Makefile.am" + "src/tests/Makefile.in") + (("dbacl-jap.shin") "") + (("dbacl-jap.sh") "")) + #t)) + (add-after 'unpack 'delete-test + ;; See comments about the license. + (lambda _ + (delete-file "src/tests/dbacl-jap.shin"))) + (add-after 'delete-sample6-and-japanese 'autoreconf + (lambda _ + (zero? (system* "autoreconf" "-vif")))) + (add-after 'unpack 'fix-test-files + (lambda* (#:key inputs outputs #:allow-other-keys) + (let* ((out (assoc-ref outputs "out")) + (bin (string-append out "/bin"))) + (substitute* (find-files "src/tests/" "\\.shin$") + (("PATH=/bin:/usr/bin") + "#PATH=/bin:/usr/bin") + (("diff") (string-append (which "diff"))) + (("tr") (string-append (which "tr")))) + #t)))))) + (inputs + `(("ncurses" ,ncurses) + ("perl" ,perl) + ("readline" ,readline) + ("slang" ,slang))) + (native-inputs + `(("libtool" ,libtool) + ("autoconf" ,autoconf) + ("automake" ,automake) + ("pkg-config" ,pkg-config))) + (home-page "http://www.lbreyer.com/dbacl.html") + (synopsis "Bayesian text and email classifier") + (description + "dbacl is a fast Bayesian text and email classifier. It builds a variety +of language models using maximum entropy (minimum divergence) principles, and +these can then be used to categorize input data automatically among multiple +categories.") + ;; The software is licensed as GPLv3 or later, but + ;; includes various sample texts in the doc dir: + ;; - sample1.txt, sample3 and sampe5.txt are in the public domain, + ;; by Mark Twain. + ;; - sample2.txt, sample4.txt are in the public domain, by Aristotle. + ;; - sample6.txt is a forwarded email, copyright unknown. + ;; Guix does exclude sample6.txt. + ;; - japanese.txt is a Japanese unoffical translation of the + ;; GNU General Public License, (c) by the Free Software Foundation. + ;; Guix excludes this file. + (license (list license:gpl3+ license:public-domain)))) + +(define-public dotconf + (package + (name "dotconf") + (version "1.3") + (source (origin + (method url-fetch) + (uri (string-append + "https://github.com/williamh/dotconf/archive/v" + version ".tar.gz")) + (file-name (string-append name "-" version ".tar.gz")) + (sha256 + (base32 + "0lsnh0yaw44psmx59hq94cj1932gscp5h8d3cnh05l0svr0cy7kz")))) + (build-system gnu-build-system) + (arguments + `(#:tests? #f ; FIXME maketest.sh does not work. + #:phases + (modify-phases %standard-phases + (add-after 'unpack 'autoreconf + (lambda _ + (zero? (system* "autoreconf" "-vif"))))))) + (native-inputs + `(("autoconf" ,autoconf) + ("automake" ,automake) + ("libtool" ,libtool))) + (home-page "https://github.com/williamh/dotconf") + (synopsis "Configuration file parser library") + (description + "C library for creating and parsing configuration files.") + (license (list license:lgpl2.1 ; Main distribution. + license:asl1.1)))) ; src/readdir.{c,h} + +(define-public java-rsyntaxtextarea + (package + (name "java-rsyntaxtextarea") + (version "2.6.1") + (source (origin + (method url-fetch) + (uri (string-append "https://github.com/bobbylight/" + "RSyntaxTextArea/archive/" + version ".tar.gz")) + (file-name (string-append name "-" version ".tar.gz")) + (sha256 + (base32 + "0c5mqg2klj5rvf8fhycrli8rf6s37l9p7a8knw9gpp65r1c120q2")))) + (build-system ant-build-system) + (arguments + `(;; FIXME: some tests fail because locale resources cannot be found. + ;; Even when I add them to the class path, + ;; RSyntaxTextAreaEditorKitDumbCompleteWordActionTest fails. + #:tests? #f + #:jar-name "rsyntaxtextarea.jar")) + (native-inputs + `(("java-junit" ,java-junit) + ("java-hamcrest-core" ,java-hamcrest-core))) + (home-page "https://bobbylight.github.io/RSyntaxTextArea/") + (synopsis "Syntax highlighting text component for Java Swing") + (description "RSyntaxTextArea is a syntax highlighting, code folding text +component for Java Swing. It extends @code{JTextComponent} so it integrates +completely with the standard @code{javax.swing.text} package. It is fast and +efficient, and can be used in any application that needs to edit or view +source code.") + (license license:bsd-3))) + +;; We use the sources from git instead of the tarball from pypi, because the +;; latter does not include the Cython source file from which bycython.cpp is +;; generated. +(define-public python-editdistance + (let ((commit "3ea84a7dd3258c76aa3be851ef3d50e59c886846") + (revision "1")) + (package + (name "python-editdistance") + (version (string-append "0.3.1-" revision "." (string-take commit 7))) + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/aflc/editdistance.git") + (commit commit))) + (sha256 + (base32 + "1l43svsv12crvzphrgi6x435z6xg8m086c64armp8wzb4l8ccm7g")))) + (build-system python-build-system) + (arguments + `(#:phases + (modify-phases %standard-phases + (add-after 'unpack 'build-cython-code + (lambda _ + (with-directory-excursion "editdistance" + (delete-file "bycython.cpp") + (zero? (system* "cython" "--cplus" "bycython.pyx")))))))) + (native-inputs + `(("python-cython" ,python-cython))) + (home-page "https://www.github.com/aflc/editdistance") + (synopsis "Fast implementation of the edit distance (Levenshtein distance)") + (description + "This library simply implements Levenshtein distance algorithm with C++ +and Cython.") + (license license:expat))))