gnu: libical: Correct zoneinfo search path.
[jackhill/guix/guix.git] / gnu / packages / textutils.scm
CommitLineData
7939e70a
TUBK
1;;; GNU Guix --- Functional package management for GNU
2;;; Copyright © 2015 Taylan Ulrich Bayırlı/Kammer <taylanbayirli@gmail.com>
c8d969b5 3;;; Copyright © 2015, 2016 Ricardo Wurmus <rekado@elephly.net>
a64a8c46 4;;; Copyright © 2015, 2016 Ben Woodcroft <donttrustben@gmail.com>
e522d840 5;;; Copyright © 2015 Roel Janssen <roel@gnu.org>
2d8cf0b3 6;;; Copyright © 2016 Jelle Licht <jlicht@fsfe.org>
7939e70a
TUBK
7;;;
8;;; This file is part of GNU Guix.
9;;;
10;;; GNU Guix is free software; you can redistribute it and/or modify it
11;;; under the terms of the GNU General Public License as published by
12;;; the Free Software Foundation; either version 3 of the License, or (at
13;;; your option) any later version.
14;;;
15;;; GNU Guix is distributed in the hope that it will be useful, but
16;;; WITHOUT ANY WARRANTY; without even the implied warranty of
17;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;;; GNU General Public License for more details.
19;;;
20;;; You should have received a copy of the GNU General Public License
21;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
22
23(define-module (gnu packages textutils)
24 #:use-module ((guix licenses) #:prefix license:)
25 #:use-module (guix packages)
26 #:use-module (guix download)
27 #:use-module (guix git-download)
28 #:use-module (guix build-system gnu)
c8d969b5 29 #:use-module (guix build-system cmake)
f571e1c3 30 #:use-module (gnu packages autotools)
bae87762 31 #:use-module (gnu packages python))
7939e70a
TUBK
32
33(define-public recode
34 (package
35 (name "recode")
36 ;; Last beta release (3.7-beta2) is from 2008; last commit from Feb 2014.
37 ;; So we use that commit instead.
38 (version "3.7.0.201402")
39 (source
40 (origin
41 (method git-fetch)
42 (uri (git-reference
43 (url "https://github.com/pinard/Recode.git")
44 (commit "2d7092a9999194fc0e9449717a8048c8d8e26c18")))
45 (sha256
6f323b07
TUBK
46 (base32 "1wssv8z6g3ryrw33sksz4rjhlnhgvvdqszw1ggl4rcwks34n86zm"))
47 (file-name (string-append name "-" version "-checkout"))))
7939e70a
TUBK
48 (build-system gnu-build-system)
49 (native-inputs `(("python" ,python-2)))
50 (arguments
51 '(#:phases
52 (alist-cons-before
fee4693e 53 'check 'pre-check
7939e70a
TUBK
54 (lambda _
55 (substitute* "tests/setup.py"
56 (("([[:space:]]*)include_dirs=.*" all space)
fee4693e
SB
57 (string-append all space "library_dirs=['../src/.libs'],\n")))
58 ;; The test extension 'Recode.so' lacks RUNPATH for 'librecode.so'.
59 (setenv "LD_LIBRARY_PATH" (string-append (getcwd) "/src/.libs")))
7939e70a
TUBK
60 %standard-phases)))
61 (home-page "https://github.com/pinard/Recode")
62 (synopsis "Text encoding converter")
63 (description "The Recode library converts files between character sets and
64usages. It recognises or produces over 200 different character sets (or about
65300 if combined with an iconv library) and transliterates files between almost
66any pair. When exact transliteration are not possible, it gets rid of
67offending characters or falls back on approximations. The recode program is a
68handy front-end to the library.")
69 (license license:gpl2+)))
688fe865
TUBK
70
71(define-public enca
72 (package
73 (name "enca")
74 (version "1.16")
75 (source
76 (origin
77 (method url-fetch)
78 (uri (string-append
79 "https://github.com/nijel/enca/archive/" version ".tar.gz"))
80 (sha256
bbee7bd7
TUBK
81 (base32 "1xik00x0yvhswsw2isnclabhv536xk1s42cf5z54gfbpbhc7ni8l"))
82 (file-name (string-append name "-" version ".tar.gz"))))
688fe865
TUBK
83 (build-system gnu-build-system)
84 (inputs `(("recode" ,recode)))
6795b8d8
LC
85
86 ;; Both 'test-convert-64.sh' and 'test-convert-filter.sh' manipulate a
87 ;; 'test.tmp' file, so they have to run in sequence.
88 (arguments '(#:parallel-tests? #f))
89
688fe865
TUBK
90 (home-page "https://github.com/nijel/enca")
91 (synopsis "Text encoding detection tool")
92 (description "Enca (Extremely Naive Charset Analyser) consists of libenca,
93an encoding detection library, and enca, a command line frontend, integrating
94libenca and several charset conversion libraries and tools.")
95 (license license:gpl2)))
cd15ad82
RW
96
97(define-public utf8proc
98 (package
99 (name "utf8proc")
f1e3a8ae 100 (version "1.3.1")
cd15ad82
RW
101 (source
102 (origin
103 (method url-fetch)
104 (uri (string-append
105 "https://github.com/JuliaLang/utf8proc/archive/v"
106 version ".tar.gz"))
107 (file-name (string-append name "-" version ".tar.gz"))
108 (sha256
f1e3a8ae 109 (base32 "1k48as5kjkar4yj3dwxyll8ykj4k723ib5a6mnw1g86q3zi0zdl3"))))
cd15ad82
RW
110 (build-system gnu-build-system)
111 (arguments
112 '(#:tests? #f ;no "check" target
f1e3a8ae
RW
113 #:make-flags (list "CC=gcc"
114 (string-append "prefix=" (assoc-ref %outputs "out")))
cd15ad82 115 #:phases
f1e3a8ae
RW
116 (modify-phases %standard-phases
117 (delete 'configure))))
cd15ad82
RW
118 (home-page "http://julialang.org/utf8proc/")
119 (synopsis "C library for processing UTF-8 Unicode data")
120 (description "utf8proc is a small C library that provides Unicode
121normalization, case-folding, and other operations for data in the UTF-8
122encoding, supporting Unicode version 7.0.")
123 (license license:expat)))
f571e1c3
RW
124
125(define-public libgtextutils
126 (package
127 (name "libgtextutils")
128 (version "0.7")
129 (source
130 (origin
131 (method url-fetch)
132 (uri (string-append
133 "https://github.com/agordon/libgtextutils/releases/download/"
134 version "/libgtextutils-" version ".tar.gz"))
135 (sha256
136 (base32 "0jiybkb2z58wa2msvllnphr4js2hvjvh988pavb3mzkgr6ihwbkr"))))
137 (build-system gnu-build-system)
138 (arguments
139 '(#:phases
140 (alist-cons-after
141 'unpack 'autoreconf
142 (lambda _ (zero? (system* "autoreconf" "-vif")))
143 %standard-phases)))
144 (native-inputs
145 `(("autoconf" ,autoconf)
146 ("automake" ,automake)
147 ("libtool" ,libtool)))
148 (home-page "https://github.com/agordon/libgtextutils")
149 (synopsis "Gordon's text utils library")
150 (description
151 "libgtextutils is a text utilities library used by the fastx toolkit from
152the Hannon Lab.")
153 (license license:agpl3+)))
aae2b445
BW
154
155(define-public cityhash
a64a8c46 156 (let ((commit "8af9b8c"))
aae2b445
BW
157 (package
158 (name "cityhash")
a64a8c46 159 (version (string-append "1.1-2." commit))
aae2b445
BW
160 (source (origin
161 (method git-fetch)
162 (uri (git-reference
163 (url "https://github.com/google/cityhash.git")
164 (commit commit)))
165 (file-name (string-append name "-" version ".tar.gz"))
166 (sha256
167 (base32
168 "0n6skf5dv8yfl1ckax8dqhvsbslkwc9158zf2ims0xqdvzsahbi6"))))
a64a8c46
BW
169 (build-system gnu-build-system)
170 (arguments
171 '(#:make-flags (list "CXXFLAGS=-g -O3")
172 #:phases
173 (modify-phases %standard-phases
174 ;; citycrc is not installed by default but is used by some
175 ;; programs.
176 (add-after 'install 'install-citycrc
177 (lambda* (#:key outputs #:allow-other-keys)
178 (let* ((out (assoc-ref outputs "out"))
179 (include (string-append out "/include")))
180 (install-file "src/citycrc.h" include))
181 #t)))))
182 (home-page "https://github.com/google/cityhash")
183 (synopsis "C++ hash functions for strings")
184 (description
185 "CityHash provides hash functions for strings. The functions mix the
aae2b445 186input bits thoroughly but are not suitable for cryptography.")
a64a8c46 187 (license license:expat))))
e522d840 188
9128db21
RW
189(define-public ustr
190 (package
191 (name "ustr")
192 (version "1.0.4")
193 (source (origin
194 (method url-fetch)
195 (uri (string-append "http://www.and.org/ustr/" version
196 "/ustr-" version ".tar.bz2"))
197 (sha256
198 (base32
199 "1i623ygdj7rkizj7985q9d6vj5amwg686aqb5j3ixpkqkyp6xbrx"))))
200 (build-system gnu-build-system)
201 (arguments
202 `(#:make-flags
203 (list "CC=gcc"
204 "HIDE="
205 ;; Override "/sbin/ldconfig" with "echo" because we don't need
206 ;; "ldconfig".
207 "LDCONFIG=echo"
208 (string-append "prefix=" (assoc-ref %outputs "out"))
209 "all-shared")
210 #:phases
211 (modify-phases %standard-phases
212 (add-after 'unpack 'disable-check-for-stdint
213 (lambda _
214 ;; Of course we have stdint.h, just not in /usr/include
215 (substitute* '("Makefile"
216 "ustr-import.in")
217 (("-f \"/usr/include/stdint.h\"") "-z \"\""))
218 #t))
219 ;; No configure script
220 (delete 'configure))))
221 (home-page "http://www.and.org/ustr/")
222 (synopsis "String library with very low memory overhead")
223 (description
224 "Ustr is a string library for C with very low memory overhead.")
225 ;; Quoted from the home page: "The License for the code is MIT, new-BSD,
226 ;; LGPL, etc. ... if you need another license to help compatibility, just
227 ;; ask for it. It's basically public domain, without all the legal
228 ;; problems for everyone that trying to make something public domain
229 ;; entails."
230 (license license:public-domain)))
231
e522d840
RJ
232(define-public libconfig
233 (package
234 (name "libconfig")
235 (version "1.5")
236 (source (origin
237 (method url-fetch)
238 (uri (string-append "http://www.hyperrealm.com/libconfig/"
239 "libconfig-" version ".tar.gz"))
240 (sha256
241 (base32
242 "1xh3hzk63v4y8815lc5209m3s6ms2cpgw4h5hg462i4f1lwsl7g3"))))
243 (build-system gnu-build-system)
244 (home-page "http://www.hyperrealm.com/libconfig/")
245 (synopsis "C/C++ configuration file library")
246 (description
247 "Libconfig is a simple library for manipulating structured configuration
248files. This file format is more compact and more readable than XML. And
249unlike XML, it is type-aware, so it is not necessary to do string parsing in
250application code.")
251 (license license:lgpl2.1+)))
c8d969b5
PP
252
253(define-public pfff
254 (package
255 (name "pfff")
256 (version "1.0")
257 (source (origin
258 (method url-fetch)
259 (uri (string-append "https://github.com/pfff/pfff/archive/v"
260 version ".tar.gz"))
261 (file-name (string-append name "-" version ".tar.gz"))
262 (sha256
263 (base32
264 "00m553aa277iarxj6dalmklyb64r7ias49bfwzbacsfg8h3kar8m"))))
265 (build-system cmake-build-system)
266 (home-page "http://biit.cs.ut.ee/pfff/")
267 (synopsis "Probabilistic fast file fingerprinting tool")
268 (description
269 "pfff is a tool for calculating a compact digital fingerprint of a file
270by sampling randomly from the file instead of reading it in full.
271Consequently, the computation has a flat performance characteristic,
272correlated with data variation rather than file size. pfff can be as reliable
273as existing hashing techniques, with provably negligible risk of collisions.")
274 (license license:bsd-3)))
2d8cf0b3
JL
275
276(define-public oniguruma
277 (package
278 (name "oniguruma")
279 (version "5.9.6")
280 (source (origin
281 (method url-fetch)
282 (uri (string-append "https://github.com/kkos/"
283 "oniguruma/releases/download/v" version
284 "/onig-" version ".tar.gz"))
285 (sha256
286 (base32
287 "19s79vsclqn170mw0ajwv7j37qsbn4f1yjz3yavnhvva6c820r6m"))))
288 (build-system gnu-build-system)
289 (home-page "https://github.com/kkos/oniguruma")
290 (synopsis "Regular expression library")
291 (description "Oniguruma is a regular expressions library. The special
292characteristic of this library is that different character encoding for every
293regular expression object can be specified.")
294 (license license:bsd-2)))
1b90e57e
RW
295
296(define-public antiword
297 (package
298 (name "antiword")
299 (version "0.37")
300 (source (origin
301 (method url-fetch)
302 (uri (string-append "http://www.winfield.demon.nl/linux"
303 "/antiword-" version ".tar.gz"))
304 (sha256
305 (base32
306 "1b7mi1l20jhj09kyh0bq14qzz8vdhhyf35gzwsq43mn6rc7h0b4f"))))
307 (build-system gnu-build-system)
308 (arguments
309 `(#:tests? #f ; There are no tests
310 #:make-flags
311 (list "-f" "Makefile.Linux"
312 (string-append "GLOBAL_INSTALL_DIR="
313 (assoc-ref %outputs "out") "/bin")
314 (string-append "GLOBAL_RESOURCES_DIR="
315 (assoc-ref %outputs "out") "/share/antiword"))
316 #:phases
317 (modify-phases %standard-phases
318 (delete 'configure)
319 (replace 'install
320 (lambda* (#:key make-flags #:allow-other-keys)
321 (zero? (apply system* "make" `("global_install" ,@make-flags))))))))
322 (home-page "http://www.winfield.demon.nl/")
323 (synopsis "Microsoft Word document reader")
324 (description "Antiword is an application for displaying Microsoft Word
325documents. It can also convert the document to PostScript or XML. Only
326documents made by MS Word version 2 and version 6 or later are supported. The
327name comes from: \"The antidote against people who send Microsoft Word files
328to everybody, because they believe that everybody runs Windows and therefore
329runs Word\".")
330 (license license:gpl2+)))