gnu: ustr: Fix build with GCC 5.
[jackhill/guix/guix.git] / gnu / packages / textutils.scm
CommitLineData
7939e70a
TUBK
1;;; GNU Guix --- Functional package management for GNU
2;;; Copyright © 2015 Taylan Ulrich Bayırlı/Kammer <taylanbayirli@gmail.com>
c8d969b5 3;;; Copyright © 2015, 2016 Ricardo Wurmus <rekado@elephly.net>
a64a8c46 4;;; Copyright © 2015, 2016 Ben Woodcroft <donttrustben@gmail.com>
e522d840 5;;; Copyright © 2015 Roel Janssen <roel@gnu.org>
2d8cf0b3 6;;; Copyright © 2016 Jelle Licht <jlicht@fsfe.org>
f17a5447 7;;; Copyright © 2016 Alex Griffin <a@ajgrf.com>
9116f126 8;;; Copyright © 2016 Efraim Flashner <efraim@flashner.co.il>
8888fe82 9;;; Copyright © 2016 ng0 <ng0@we.make.ritual.n0.is>
0905048a 10;;; Copyright © 2016 Marius Bakke <mbakke@fastmail.com>
5353cea0 11;;; Copyright © 2017 Eric Bavier <bavier@member.fsf.org>
7fdca77e 12;;; Copyright © 2017 Rene Saavedra <rennes@openmailbox.org>
7939e70a
TUBK
13;;;
14;;; This file is part of GNU Guix.
15;;;
16;;; GNU Guix is free software; you can redistribute it and/or modify it
17;;; under the terms of the GNU General Public License as published by
18;;; the Free Software Foundation; either version 3 of the License, or (at
19;;; your option) any later version.
20;;;
21;;; GNU Guix is distributed in the hope that it will be useful, but
22;;; WITHOUT ANY WARRANTY; without even the implied warranty of
23;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24;;; GNU General Public License for more details.
25;;;
26;;; You should have received a copy of the GNU General Public License
27;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
28
29(define-module (gnu packages textutils)
30 #:use-module ((guix licenses) #:prefix license:)
31 #:use-module (guix packages)
32 #:use-module (guix download)
33 #:use-module (guix git-download)
34 #:use-module (guix build-system gnu)
c8d969b5 35 #:use-module (guix build-system cmake)
f17a5447 36 #:use-module (guix build-system trivial)
9116f126 37 #:use-module (gnu packages)
f571e1c3 38 #:use-module (gnu packages autotools)
8888fe82 39 #:use-module (gnu packages ncurses)
40 #:use-module (gnu packages perl)
41 #:use-module (gnu packages pkg-config)
f17a5447 42 #:use-module (gnu packages python)
8888fe82 43 #:use-module (gnu packages readline)
44 #:use-module (gnu packages slang)
f17a5447 45 #:use-module (gnu packages zip))
7939e70a
TUBK
46
47(define-public recode
48 (package
49 (name "recode")
50 ;; Last beta release (3.7-beta2) is from 2008; last commit from Feb 2014.
51 ;; So we use that commit instead.
52 (version "3.7.0.201402")
53 (source
54 (origin
55 (method git-fetch)
56 (uri (git-reference
57 (url "https://github.com/pinard/Recode.git")
58 (commit "2d7092a9999194fc0e9449717a8048c8d8e26c18")))
59 (sha256
6f323b07
TUBK
60 (base32 "1wssv8z6g3ryrw33sksz4rjhlnhgvvdqszw1ggl4rcwks34n86zm"))
61 (file-name (string-append name "-" version "-checkout"))))
7939e70a
TUBK
62 (build-system gnu-build-system)
63 (native-inputs `(("python" ,python-2)))
64 (arguments
65 '(#:phases
66 (alist-cons-before
fee4693e 67 'check 'pre-check
7939e70a
TUBK
68 (lambda _
69 (substitute* "tests/setup.py"
70 (("([[:space:]]*)include_dirs=.*" all space)
fee4693e
SB
71 (string-append all space "library_dirs=['../src/.libs'],\n")))
72 ;; The test extension 'Recode.so' lacks RUNPATH for 'librecode.so'.
73 (setenv "LD_LIBRARY_PATH" (string-append (getcwd) "/src/.libs")))
7939e70a
TUBK
74 %standard-phases)))
75 (home-page "https://github.com/pinard/Recode")
76 (synopsis "Text encoding converter")
77 (description "The Recode library converts files between character sets and
78usages. It recognises or produces over 200 different character sets (or about
79300 if combined with an iconv library) and transliterates files between almost
80any pair. When exact transliteration are not possible, it gets rid of
81offending characters or falls back on approximations. The recode program is a
82handy front-end to the library.")
83 (license license:gpl2+)))
688fe865
TUBK
84
85(define-public enca
86 (package
87 (name "enca")
88 (version "1.16")
89 (source
90 (origin
91 (method url-fetch)
92 (uri (string-append
93 "https://github.com/nijel/enca/archive/" version ".tar.gz"))
94 (sha256
bbee7bd7
TUBK
95 (base32 "1xik00x0yvhswsw2isnclabhv536xk1s42cf5z54gfbpbhc7ni8l"))
96 (file-name (string-append name "-" version ".tar.gz"))))
688fe865
TUBK
97 (build-system gnu-build-system)
98 (inputs `(("recode" ,recode)))
6795b8d8
LC
99
100 ;; Both 'test-convert-64.sh' and 'test-convert-filter.sh' manipulate a
101 ;; 'test.tmp' file, so they have to run in sequence.
102 (arguments '(#:parallel-tests? #f))
103
688fe865
TUBK
104 (home-page "https://github.com/nijel/enca")
105 (synopsis "Text encoding detection tool")
106 (description "Enca (Extremely Naive Charset Analyser) consists of libenca,
107an encoding detection library, and enca, a command line frontend, integrating
108libenca and several charset conversion libraries and tools.")
109 (license license:gpl2)))
cd15ad82
RW
110
111(define-public utf8proc
112 (package
113 (name "utf8proc")
5d230d5b 114 (version "2.1.0")
cd15ad82
RW
115 (source
116 (origin
117 (method url-fetch)
118 (uri (string-append
119 "https://github.com/JuliaLang/utf8proc/archive/v"
120 version ".tar.gz"))
121 (file-name (string-append name "-" version ".tar.gz"))
122 (sha256
5d230d5b 123 (base32 "0q1jhdkk4f9b0zb8s2ql3sba3br5nvjsmbsaybmgj064k9hwbk15"))))
cd15ad82 124 (build-system gnu-build-system)
5353cea0
EB
125 (inputs ;test data that is otherwise downloaded with curl
126 `(("NormalizationTest.txt"
127 ,(origin
128 (method url-fetch)
129 (uri (string-append "http://www.unicode.org/Public/9.0.0/ucd/"
130 "NormalizationTest.txt"))
131 (sha256
132 (base32 "1fxrz0bilsbwl685336aqi88k62i6nqhm62rvy4zhg3bcm4dhj1d"))))
133 ("GraphemeBreakTest.txt"
134 ,(origin
135 (method url-fetch)
136 (uri (string-append "http://www.unicode.org/Public/9.0.0/ucd/"
137 "auxiliary/GraphemeBreakTest.txt"))
138 (sha256
139 (base32 "0qbhyhmf0778lc2hcwlpizrvmdxwpk959v2q2wb8abv09ba7wvn7"))))))
cd15ad82 140 (arguments
5353cea0 141 '(#:make-flags (list "CC=gcc"
f1e3a8ae 142 (string-append "prefix=" (assoc-ref %outputs "out")))
cd15ad82 143 #:phases
f1e3a8ae 144 (modify-phases %standard-phases
5353cea0
EB
145 (delete 'configure)
146 (add-before 'check 'check-data
147 (lambda* (#:key inputs #:allow-other-keys)
148 (for-each (lambda (i)
149 (copy-file (assoc-ref inputs i)
150 (string-append "data/" i)))
151 '("NormalizationTest.txt" "GraphemeBreakTest.txt"))
152 (substitute* "data/GraphemeBreakTest.txt"
153 (("÷") "/")
154 (("×") "+")))))))
cd15ad82
RW
155 (home-page "http://julialang.org/utf8proc/")
156 (synopsis "C library for processing UTF-8 Unicode data")
157 (description "utf8proc is a small C library that provides Unicode
158normalization, case-folding, and other operations for data in the UTF-8
9b72ce60 159encoding, supporting Unicode version 9.0.0.")
cd15ad82 160 (license license:expat)))
f571e1c3
RW
161
162(define-public libgtextutils
163 (package
164 (name "libgtextutils")
165 (version "0.7")
166 (source
167 (origin
168 (method url-fetch)
169 (uri (string-append
170 "https://github.com/agordon/libgtextutils/releases/download/"
171 version "/libgtextutils-" version ".tar.gz"))
172 (sha256
173 (base32 "0jiybkb2z58wa2msvllnphr4js2hvjvh988pavb3mzkgr6ihwbkr"))))
174 (build-system gnu-build-system)
175 (arguments
176 '(#:phases
177 (alist-cons-after
178 'unpack 'autoreconf
179 (lambda _ (zero? (system* "autoreconf" "-vif")))
180 %standard-phases)))
181 (native-inputs
182 `(("autoconf" ,autoconf)
183 ("automake" ,automake)
184 ("libtool" ,libtool)))
185 (home-page "https://github.com/agordon/libgtextutils")
186 (synopsis "Gordon's text utils library")
187 (description
188 "libgtextutils is a text utilities library used by the fastx toolkit from
189the Hannon Lab.")
190 (license license:agpl3+)))
aae2b445
BW
191
192(define-public cityhash
a64a8c46 193 (let ((commit "8af9b8c"))
aae2b445
BW
194 (package
195 (name "cityhash")
a64a8c46 196 (version (string-append "1.1-2." commit))
aae2b445
BW
197 (source (origin
198 (method git-fetch)
199 (uri (git-reference
200 (url "https://github.com/google/cityhash.git")
201 (commit commit)))
202 (file-name (string-append name "-" version ".tar.gz"))
203 (sha256
204 (base32
205 "0n6skf5dv8yfl1ckax8dqhvsbslkwc9158zf2ims0xqdvzsahbi6"))))
a64a8c46
BW
206 (build-system gnu-build-system)
207 (arguments
208 '(#:make-flags (list "CXXFLAGS=-g -O3")
209 #:phases
210 (modify-phases %standard-phases
211 ;; citycrc is not installed by default but is used by some
212 ;; programs.
213 (add-after 'install 'install-citycrc
214 (lambda* (#:key outputs #:allow-other-keys)
215 (let* ((out (assoc-ref outputs "out"))
216 (include (string-append out "/include")))
217 (install-file "src/citycrc.h" include))
218 #t)))))
219 (home-page "https://github.com/google/cityhash")
220 (synopsis "C++ hash functions for strings")
221 (description
222 "CityHash provides hash functions for strings. The functions mix the
aae2b445 223input bits thoroughly but are not suitable for cryptography.")
a64a8c46 224 (license license:expat))))
e522d840 225
9128db21
RW
226(define-public ustr
227 (package
228 (name "ustr")
229 (version "1.0.4")
230 (source (origin
231 (method url-fetch)
232 (uri (string-append "http://www.and.org/ustr/" version
233 "/ustr-" version ".tar.bz2"))
234 (sha256
235 (base32
7fdca77e
RS
236 "1i623ygdj7rkizj7985q9d6vj5amwg686aqb5j3ixpkqkyp6xbrx"))
237 (patches (search-patches "ustr-fix-build-with-gcc-5.patch"))))
9128db21
RW
238 (build-system gnu-build-system)
239 (arguments
240 `(#:make-flags
241 (list "CC=gcc"
242 "HIDE="
243 ;; Override "/sbin/ldconfig" with "echo" because we don't need
244 ;; "ldconfig".
245 "LDCONFIG=echo"
246 (string-append "prefix=" (assoc-ref %outputs "out"))
247 "all-shared")
248 #:phases
249 (modify-phases %standard-phases
250 (add-after 'unpack 'disable-check-for-stdint
251 (lambda _
252 ;; Of course we have stdint.h, just not in /usr/include
253 (substitute* '("Makefile"
254 "ustr-import.in")
255 (("-f \"/usr/include/stdint.h\"") "-z \"\""))
256 #t))
257 ;; No configure script
258 (delete 'configure))))
259 (home-page "http://www.and.org/ustr/")
260 (synopsis "String library with very low memory overhead")
261 (description
262 "Ustr is a string library for C with very low memory overhead.")
263 ;; Quoted from the home page: "The License for the code is MIT, new-BSD,
264 ;; LGPL, etc. ... if you need another license to help compatibility, just
265 ;; ask for it. It's basically public domain, without all the legal
266 ;; problems for everyone that trying to make something public domain
267 ;; entails."
268 (license license:public-domain)))
269
e522d840
RJ
270(define-public libconfig
271 (package
272 (name "libconfig")
273 (version "1.5")
274 (source (origin
275 (method url-fetch)
276 (uri (string-append "http://www.hyperrealm.com/libconfig/"
277 "libconfig-" version ".tar.gz"))
278 (sha256
279 (base32
280 "1xh3hzk63v4y8815lc5209m3s6ms2cpgw4h5hg462i4f1lwsl7g3"))))
281 (build-system gnu-build-system)
282 (home-page "http://www.hyperrealm.com/libconfig/")
283 (synopsis "C/C++ configuration file library")
284 (description
285 "Libconfig is a simple library for manipulating structured configuration
286files. This file format is more compact and more readable than XML. And
287unlike XML, it is type-aware, so it is not necessary to do string parsing in
288application code.")
289 (license license:lgpl2.1+)))
c8d969b5
PP
290
291(define-public pfff
292 (package
293 (name "pfff")
294 (version "1.0")
295 (source (origin
296 (method url-fetch)
297 (uri (string-append "https://github.com/pfff/pfff/archive/v"
298 version ".tar.gz"))
299 (file-name (string-append name "-" version ".tar.gz"))
300 (sha256
301 (base32
302 "00m553aa277iarxj6dalmklyb64r7ias49bfwzbacsfg8h3kar8m"))))
303 (build-system cmake-build-system)
304 (home-page "http://biit.cs.ut.ee/pfff/")
305 (synopsis "Probabilistic fast file fingerprinting tool")
306 (description
307 "pfff is a tool for calculating a compact digital fingerprint of a file
308by sampling randomly from the file instead of reading it in full.
309Consequently, the computation has a flat performance characteristic,
310correlated with data variation rather than file size. pfff can be as reliable
311as existing hashing techniques, with provably negligible risk of collisions.")
312 (license license:bsd-3)))
2d8cf0b3
JL
313
314(define-public oniguruma
315 (package
316 (name "oniguruma")
317 (version "5.9.6")
318 (source (origin
319 (method url-fetch)
320 (uri (string-append "https://github.com/kkos/"
321 "oniguruma/releases/download/v" version
322 "/onig-" version ".tar.gz"))
323 (sha256
324 (base32
325 "19s79vsclqn170mw0ajwv7j37qsbn4f1yjz3yavnhvva6c820r6m"))))
326 (build-system gnu-build-system)
327 (home-page "https://github.com/kkos/oniguruma")
328 (synopsis "Regular expression library")
329 (description "Oniguruma is a regular expressions library. The special
330characteristic of this library is that different character encoding for every
331regular expression object can be specified.")
332 (license license:bsd-2)))
1b90e57e
RW
333
334(define-public antiword
335 (package
336 (name "antiword")
337 (version "0.37")
338 (source (origin
339 (method url-fetch)
340 (uri (string-append "http://www.winfield.demon.nl/linux"
341 "/antiword-" version ".tar.gz"))
342 (sha256
343 (base32
9116f126
EF
344 "1b7mi1l20jhj09kyh0bq14qzz8vdhhyf35gzwsq43mn6rc7h0b4f"))
345 (patches (search-patches "antiword-CVE-2014-8123.patch"))))
1b90e57e
RW
346 (build-system gnu-build-system)
347 (arguments
348 `(#:tests? #f ; There are no tests
349 #:make-flags
350 (list "-f" "Makefile.Linux"
351 (string-append "GLOBAL_INSTALL_DIR="
352 (assoc-ref %outputs "out") "/bin")
353 (string-append "GLOBAL_RESOURCES_DIR="
354 (assoc-ref %outputs "out") "/share/antiword"))
355 #:phases
356 (modify-phases %standard-phases
357 (delete 'configure)
358 (replace 'install
359 (lambda* (#:key make-flags #:allow-other-keys)
360 (zero? (apply system* "make" `("global_install" ,@make-flags))))))))
361 (home-page "http://www.winfield.demon.nl/")
362 (synopsis "Microsoft Word document reader")
363 (description "Antiword is an application for displaying Microsoft Word
364documents. It can also convert the document to PostScript or XML. Only
365documents made by MS Word version 2 and version 6 or later are supported. The
366name comes from: \"The antidote against people who send Microsoft Word files
367to everybody, because they believe that everybody runs Windows and therefore
368runs Word\".")
369 (license license:gpl2+)))
f17a5447
AG
370
371(define-public utfcpp
372 (package
373 (name "utfcpp")
374 (version "2.3.4")
375 (source (origin
376 (method url-fetch)
377 (uri
378 (string-append
0f971a04 379 "mirror://sourceforge/utfcpp/utf8cpp_2x/Release%20"
f17a5447
AG
380 version "/utf8_v"
381 (string-map (lambda (x) (if (eq? x #\.) #\_ x)) version)
382 ".zip"))
383 (file-name (string-append name "-" version ".zip"))
384 (sha256
385 (base32
386 "1vqhs0aipcvvdrwcs7h3jsryg6mgbmc4s34n5cm6d36q4nxwwwrk"))))
387 (build-system trivial-build-system)
388 (arguments
389 `(#:modules ((guix build utils))
390 #:builder
391 (begin
392 (use-modules (guix build utils))
393 (let ((source (assoc-ref %build-inputs "source"))
394 (out (assoc-ref %outputs "out"))
395 (unzip (string-append (assoc-ref %build-inputs "unzip")
396 "/bin/unzip")))
397 (mkdir-p out)
398 (with-directory-excursion out
399 (system* unzip source)
400 (mkdir-p "share/doc")
401 (rename-file "doc" "share/doc/utfcpp")
402 (rename-file "source" "include"))))))
403 (native-inputs `(("unzip" ,unzip)))
404 (home-page "https://github.com/nemtrif/utfcpp")
405 (synopsis "Portable C++ library for handling UTF-8")
406 (description "UTF8-CPP is a C++ library for handling UTF-8 encoded text
407in a portable way.")
408 (license license:boost1.0)))
8888fe82 409
410(define-public dbacl
411 (package
412 (name "dbacl")
413 (version "1.14")
414 (source
415 (origin
416 (method url-fetch)
417 (uri (string-append "http://www.lbreyer.com/gpl/"
418 name "-" version ".tar.gz"))
419 (sha256
420 (base32
421 "0224g6x71hyvy7jikfxmgcwww1r5lvk0jx36cva319cb9nmrbrq7"))))
422 (build-system gnu-build-system)
423 (arguments
424 `(#:make-flags
425 (list
426 (string-append "-I" (assoc-ref %build-inputs "slang")
427 "/include/slang")
428 (string-append "-I" (assoc-ref %build-inputs "ncurses")
429 "/include/ncurses"))
430 #:phases
431 (modify-phases %standard-phases
432 (add-after 'unpack 'delete-sample6-and-japanese
433 (lambda _
434 (substitute* "doc/Makefile.am"
435 (("sample6.txt") "")
436 (("japanese.txt") ""))
437 (delete-file "doc/sample6.txt")
438 (delete-file "doc/japanese.txt")
439 (substitute* (list "src/tests/Makefile.am"
440 "src/tests/Makefile.in")
441 (("dbacl-jap.shin") "")
442 (("dbacl-jap.sh") ""))
443 #t))
444 (add-after 'unpack 'delete-test
445 ;; See comments about the license.
446 (lambda _
447 (delete-file "src/tests/dbacl-jap.shin")))
448 (add-after 'delete-sample6-and-japanese 'autoreconf
449 (lambda _
450 (zero? (system* "autoreconf" "-vif"))))
451 (add-after 'unpack 'fix-test-files
452 (lambda* (#:key inputs outputs #:allow-other-keys)
453 (let* ((out (assoc-ref outputs "out"))
454 (bin (string-append out "/bin")))
455 (substitute* (find-files "src/tests/" "\\.shin$")
456 (("PATH=/bin:/usr/bin")
457 "#PATH=/bin:/usr/bin")
458 (("diff") (string-append (which "diff")))
459 (("tr") (string-append (which "tr"))))
460 #t))))))
461 (inputs
462 `(("ncurses" ,ncurses)
463 ("perl" ,perl)
464 ("readline" ,readline)
465 ("slang" ,slang)))
466 (native-inputs
467 `(("libtool" ,libtool)
468 ("autoconf" ,autoconf)
469 ("automake" ,automake)
470 ("pkg-config" ,pkg-config)))
471 (home-page "http://www.lbreyer.com/dbacl.html")
472 (synopsis "Bayesian text and email classifier")
473 (description
474 "dbacl is a fast Bayesian text and email classifier. It builds a variety
475of language models using maximum entropy (minimum divergence) principles, and
476these can then be used to categorize input data automatically among multiple
477categories.")
478 ;; The software is licensed as GPLv3 or later, but
479 ;; includes various sample texts in the doc dir:
480 ;; - sample1.txt, sample3 and sampe5.txt are in the public domain,
481 ;; by Mark Twain.
482 ;; - sample2.txt, sample4.txt are in the public domain, by Aristotle.
483 ;; - sample6.txt is a forwarded email, copyright unknown.
484 ;; Guix does exclude sample6.txt.
485 ;; - japanese.txt is a Japanese unoffical translation of the
486 ;; GNU General Public License, (c) by the Free Software Foundation.
487 ;; Guix excludes this file.
488 (license (list license:gpl3+ license:public-domain))))
0905048a
MB
489
490(define-public dotconf
491 (package
492 (name "dotconf")
493 (version "1.3")
494 (source (origin
495 (method url-fetch)
496 (uri (string-append
497 "https://github.com/williamh/dotconf/archive/v"
498 version ".tar.gz"))
499 (file-name (string-append name "-" version ".tar.gz"))
500 (sha256
501 (base32
502 "0lsnh0yaw44psmx59hq94cj1932gscp5h8d3cnh05l0svr0cy7kz"))))
503 (build-system gnu-build-system)
504 (arguments
505 `(#:tests? #f ; FIXME maketest.sh does not work.
506 #:phases
507 (modify-phases %standard-phases
508 (add-before 'configure 'autoreconf
509 (lambda _
510 (zero? (system* "autoreconf" "-vif")))))))
511 (native-inputs
512 `(("autoconf" ,autoconf)
513 ("automake" ,automake)
514 ("libtool" ,libtool)))
515 (home-page "https://github.com/williamh/dotconf")
516 (synopsis "Configuration file parser library")
517 (description
518 "C library for creating and parsing configuration files.")
519 (license (list license:lgpl2.1 ; Main distribution.
520 license:asl1.1)))) ; src/readdir.{c,h}