;;; Copyright © 2016 Jelle Licht <jlicht@fsfe.org>
;;; Copyright © 2016 Alex Griffin <a@ajgrf.com>
;;; Copyright © 2016, 2018 Efraim Flashner <efraim@flashner.co.il>
-;;; Copyright © 2016 ng0 <ng0@we.make.ritual.n0.is>
+;;; Copyright © 2016 Nils Gillmann <ng0@n0.is>
;;; Copyright © 2016 Marius Bakke <mbakke@fastmail.com>
;;; Copyright © 2017 Eric Bavier <bavier@member.fsf.org>
;;; Copyright © 2017 Rene Saavedra <rennes@openmailbox.org>
;;; Copyright © 2017 Kei Kebreau <kkebreau@posteo.net>
;;; Copyright © 2017 Alex Vong <alexvong1995@gmail.com>
;;; Copyright © 2018 Tobias Geerinckx-Rice <me@tobias.gr>
+;;; Copyright © 2018 Pierre Neidhardt <ambrevar@gmail.com>
;;;
;;; This file is part of GNU Guix.
;;;
(base32
"0r4yhf7i7zp2nl2apyzz7r3i2in12n385hmr8zcfr18ly0ly530q"))
(modules '((guix build utils)))
- (snippet
- `(begin
- (delete-file "tests/Recode.c")
- #t))))
+ (snippet '(begin
+ (delete-file "tests/Recode.c")
+ #t))))
(build-system gnu-build-system)
(native-inputs
`(("python" ,python-2)
(define-public utf8proc
(package
(name "utf8proc")
- (version "2.1.0")
+ (version "2.1.1")
(source
(origin
(method url-fetch)
version ".tar.gz"))
(file-name (string-append name "-" version ".tar.gz"))
(sha256
- (base32 "0q1jhdkk4f9b0zb8s2ql3sba3br5nvjsmbsaybmgj064k9hwbk15"))))
+ (base32 "1cnpigrazhslw65s4j1a56j7p6d7d61wsxxjf1218i9mkwv2yw17"))))
(build-system gnu-build-system)
- (inputs ;test data that is otherwise downloaded with curl
+ (inputs ; test data that is otherwise downloaded with curl
`(("NormalizationTest.txt"
,(origin
(method url-fetch)
(substitute* "data/GraphemeBreakTest.txt"
(("÷") "/")
(("×") "+")))))))
- (home-page "http://julialang.org/utf8proc/")
+ (home-page "https://julialang.org/utf8proc/")
(synopsis "C library for processing UTF-8 Unicode data")
(description "utf8proc is a small C library that provides Unicode
normalization, case-folding, and other operations for data in the UTF-8
(define-public libconfig
(package
(name "libconfig")
- (version "1.5")
+ (version "1.7.2")
+ (home-page "https://hyperrealm.github.io/libconfig/")
(source (origin
(method url-fetch)
- (uri (string-append "http://www.hyperrealm.com/libconfig/"
- "libconfig-" version ".tar.gz"))
+ (uri (string-append home-page "/dist/libconfig-"
+ version ".tar.gz"))
(sha256
(base32
- "1xh3hzk63v4y8815lc5209m3s6ms2cpgw4h5hg462i4f1lwsl7g3"))))
+ "1ngs2qx3cx5cbwinc5mvadly0b5n7s86zsc68c404czzfff7lg3w"))))
(build-system gnu-build-system)
- (home-page "http://www.hyperrealm.com/libconfig/")
(synopsis "C/C++ configuration file library")
(description
"Libconfig is a simple library for manipulating structured configuration
(define-public oniguruma
(package
(name "oniguruma")
- (version "5.9.6")
+ (version "6.8.2")
(source (origin
(method url-fetch)
(uri (string-append "https://github.com/kkos/"
"/onig-" version ".tar.gz"))
(sha256
(base32
- "19s79vsclqn170mw0ajwv7j37qsbn4f1yjz3yavnhvva6c820r6m"))))
+ "00s9gjgb3srn5sbmx4x9bssn52mi04d868ghizssdhjlddgxmsmd"))))
(build-system gnu-build-system)
(home-page "https://github.com/kkos/oniguruma")
(synopsis "Regular expression library")
regular expression object can be specified.")
(license license:bsd-2)))
+;; PHP < 7.3.0 requires this old version. Remove once no longer needed.
+(define-public oniguruma-5
+ (package
+ (inherit oniguruma)
+ (version "5.9.6")
+ (source (origin
+ (method url-fetch)
+ (uri (string-append "https://github.com/kkos/"
+ "oniguruma/releases/download/v" version
+ "/onig-" version ".tar.gz"))
+ (sha256
+ (base32
+ "19s79vsclqn170mw0ajwv7j37qsbn4f1yjz3yavnhvva6c820r6m"))))))
+
(define-public antiword
(package
(name "antiword")
(add-after 'unpack 'delete-test
;; See comments about the license.
(lambda _
- (delete-file "src/tests/dbacl-jap.shin")))
+ (delete-file "src/tests/dbacl-jap.shin")
+ #t))
(add-after 'delete-sample6-and-japanese 'autoreconf
(lambda _
- (zero? (system* "autoreconf" "-vif"))))
+ (invoke "autoreconf" "-vif")
+ #t))
(add-after 'unpack 'fix-test-files
(lambda* (#:key inputs outputs #:allow-other-keys)
(let* ((out (assoc-ref outputs "out"))
("autoconf" ,autoconf)
("automake" ,automake)
("pkg-config" ,pkg-config)))
- (home-page "http://www.lbreyer.com/dbacl.html")
+ (home-page "https://www.lbreyer.com/dbacl.html")
(synopsis "Bayesian text and email classifier")
(description
"dbacl is a fast Bayesian text and email classifier. It builds a variety
(uri (git-reference
(url "https://github.com/aflc/editdistance.git")
(commit commit)))
+ (file-name (git-file-name name version))
(sha256
(base32
"1l43svsv12crvzphrgi6x435z6xg8m086c64armp8wzb4l8ccm7g"))))
(lambda _
(with-directory-excursion "editdistance"
(delete-file "bycython.cpp")
- (zero? (system* "cython" "--cplus" "bycython.pyx"))))))))
+ (invoke "cython" "--cplus" "bycython.pyx")))))))
(native-inputs
`(("python-cython" ,python-cython)))
(home-page "https://www.github.com/aflc/editdistance")
measuring and checking the width of strings, with support east asian text.")
(home-page "https://github.com/jessevdk/go-flags")
(license license:expat)))
+
+(define-public docx2txt
+ (package
+ (name "docx2txt")
+ (version "1.4")
+ (source (origin
+ (method url-fetch)
+ (uri (string-append
+ "mirror://sourceforge/docx2txt/docx2txt/v"
+ version "/docx2txt-" version ".tgz"))
+ (sha256
+ (base32
+ "06vdikjvpj6qdb41d8wzfnyj44jpnknmlgbhbr1w215420lpb5xj"))))
+ (build-system gnu-build-system)
+ (inputs
+ `(("unzip" ,unzip)
+ ("perl" ,perl)))
+ (arguments
+ `(#:tests? #f ; No tests.
+ #:make-flags (list (string-append "BINDIR="
+ (assoc-ref %outputs "out") "/bin")
+ (string-append "CONFIGDIR="
+ (assoc-ref %outputs "out") "/etc")
+ ;; Makefile seems to be a bit dumb at guessing.
+ (string-append "INSTALL=install")
+ (string-append "PERL=perl"))
+ #:phases
+ (modify-phases %standard-phases
+ (delete 'configure)
+ (add-after 'install 'fix-install
+ (lambda* (#:key outputs inputs #:allow-other-keys)
+ (let* ((out (assoc-ref outputs "out"))
+ (bin (string-append out "/bin"))
+ (config (string-append out "/etc/docx2txt.config"))
+ (unzip (assoc-ref inputs "unzip")))
+ ;; According to INSTALL, the .sh wrapper can be skipped.
+ (delete-file (string-append bin "/docx2txt.sh"))
+ (rename-file (string-append bin "/docx2txt.pl")
+ (string-append bin "/docx2txt"))
+ (substitute* config
+ (("config_unzip => '/usr/bin/unzip',")
+ (string-append "config_unzip => '"
+ unzip
+ "/bin/unzip',")))
+ ;; Makefile is wrong.
+ (chmod config #o644)))))))
+ (synopsis "Recover text from @file{.docx} files, with good formatting")
+ (description
+ "@command{docx2txt} is a Perl based command line utility to convert
+Microsoft Office @file{.docx} documents to equivalent text documents. Latest
+version supports following features during text extraction.
+
+@itemize
+@item Character conversions; currency characters are converted to respective
+names like Euro.
+@item Capitalisation of text blocks.
+@item Center and right justification of text fitting in a line of
+(configurable) 80 columns.
+@item Horizontal ruler, line breaks, paragraphs separation, tabs.
+@item Indicating hyperlinked text along with the hyperlink (configurable).
+@item Handling (bullet, decimal, letter, roman) lists along with (attempt at)
+indentation.
+@end itemize\n")
+ (home-page "http://docx2txt.sourceforge.net")
+ (license license:gpl3+)))