Commit | Line | Data |
---|---|---|
7939e70a TUBK |
1 | ;;; GNU Guix --- Functional package management for GNU |
2 | ;;; Copyright © 2015 Taylan Ulrich Bayırlı/Kammer <taylanbayirli@gmail.com> | |
c8d969b5 | 3 | ;;; Copyright © 2015, 2016 Ricardo Wurmus <rekado@elephly.net> |
a64a8c46 | 4 | ;;; Copyright © 2015, 2016 Ben Woodcroft <donttrustben@gmail.com> |
e522d840 | 5 | ;;; Copyright © 2015 Roel Janssen <roel@gnu.org> |
2d8cf0b3 | 6 | ;;; Copyright © 2016 Jelle Licht <jlicht@fsfe.org> |
7939e70a TUBK |
7 | ;;; |
8 | ;;; This file is part of GNU Guix. | |
9 | ;;; | |
10 | ;;; GNU Guix is free software; you can redistribute it and/or modify it | |
11 | ;;; under the terms of the GNU General Public License as published by | |
12 | ;;; the Free Software Foundation; either version 3 of the License, or (at | |
13 | ;;; your option) any later version. | |
14 | ;;; | |
15 | ;;; GNU Guix is distributed in the hope that it will be useful, but | |
16 | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | ;;; GNU General Public License for more details. | |
19 | ;;; | |
20 | ;;; You should have received a copy of the GNU General Public License | |
21 | ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. | |
22 | ||
23 | (define-module (gnu packages textutils) | |
24 | #:use-module ((guix licenses) #:prefix license:) | |
25 | #:use-module (guix packages) | |
26 | #:use-module (guix download) | |
27 | #:use-module (guix git-download) | |
28 | #:use-module (guix build-system gnu) | |
c8d969b5 | 29 | #:use-module (guix build-system cmake) |
f571e1c3 | 30 | #:use-module (gnu packages autotools) |
bae87762 | 31 | #:use-module (gnu packages python)) |
7939e70a TUBK |
32 | |
33 | (define-public recode | |
34 | (package | |
35 | (name "recode") | |
36 | ;; Last beta release (3.7-beta2) is from 2008; last commit from Feb 2014. | |
37 | ;; So we use that commit instead. | |
38 | (version "3.7.0.201402") | |
39 | (source | |
40 | (origin | |
41 | (method git-fetch) | |
42 | (uri (git-reference | |
43 | (url "https://github.com/pinard/Recode.git") | |
44 | (commit "2d7092a9999194fc0e9449717a8048c8d8e26c18"))) | |
45 | (sha256 | |
6f323b07 TUBK |
46 | (base32 "1wssv8z6g3ryrw33sksz4rjhlnhgvvdqszw1ggl4rcwks34n86zm")) |
47 | (file-name (string-append name "-" version "-checkout")))) | |
7939e70a TUBK |
48 | (build-system gnu-build-system) |
49 | (native-inputs `(("python" ,python-2))) | |
50 | (arguments | |
51 | '(#:phases | |
52 | (alist-cons-before | |
fee4693e | 53 | 'check 'pre-check |
7939e70a TUBK |
54 | (lambda _ |
55 | (substitute* "tests/setup.py" | |
56 | (("([[:space:]]*)include_dirs=.*" all space) | |
fee4693e SB |
57 | (string-append all space "library_dirs=['../src/.libs'],\n"))) |
58 | ;; The test extension 'Recode.so' lacks RUNPATH for 'librecode.so'. | |
59 | (setenv "LD_LIBRARY_PATH" (string-append (getcwd) "/src/.libs"))) | |
7939e70a TUBK |
60 | %standard-phases))) |
61 | (home-page "https://github.com/pinard/Recode") | |
62 | (synopsis "Text encoding converter") | |
63 | (description "The Recode library converts files between character sets and | |
64 | usages. It recognises or produces over 200 different character sets (or about | |
65 | 300 if combined with an iconv library) and transliterates files between almost | |
66 | any pair. When exact transliteration are not possible, it gets rid of | |
67 | offending characters or falls back on approximations. The recode program is a | |
68 | handy front-end to the library.") | |
69 | (license license:gpl2+))) | |
688fe865 TUBK |
70 | |
71 | (define-public enca | |
72 | (package | |
73 | (name "enca") | |
74 | (version "1.16") | |
75 | (source | |
76 | (origin | |
77 | (method url-fetch) | |
78 | (uri (string-append | |
79 | "https://github.com/nijel/enca/archive/" version ".tar.gz")) | |
80 | (sha256 | |
bbee7bd7 TUBK |
81 | (base32 "1xik00x0yvhswsw2isnclabhv536xk1s42cf5z54gfbpbhc7ni8l")) |
82 | (file-name (string-append name "-" version ".tar.gz")))) | |
688fe865 TUBK |
83 | (build-system gnu-build-system) |
84 | (inputs `(("recode" ,recode))) | |
6795b8d8 LC |
85 | |
86 | ;; Both 'test-convert-64.sh' and 'test-convert-filter.sh' manipulate a | |
87 | ;; 'test.tmp' file, so they have to run in sequence. | |
88 | (arguments '(#:parallel-tests? #f)) | |
89 | ||
688fe865 TUBK |
90 | (home-page "https://github.com/nijel/enca") |
91 | (synopsis "Text encoding detection tool") | |
92 | (description "Enca (Extremely Naive Charset Analyser) consists of libenca, | |
93 | an encoding detection library, and enca, a command line frontend, integrating | |
94 | libenca and several charset conversion libraries and tools.") | |
95 | (license license:gpl2))) | |
cd15ad82 RW |
96 | |
97 | (define-public utf8proc | |
98 | (package | |
99 | (name "utf8proc") | |
f1e3a8ae | 100 | (version "1.3.1") |
cd15ad82 RW |
101 | (source |
102 | (origin | |
103 | (method url-fetch) | |
104 | (uri (string-append | |
105 | "https://github.com/JuliaLang/utf8proc/archive/v" | |
106 | version ".tar.gz")) | |
107 | (file-name (string-append name "-" version ".tar.gz")) | |
108 | (sha256 | |
f1e3a8ae | 109 | (base32 "1k48as5kjkar4yj3dwxyll8ykj4k723ib5a6mnw1g86q3zi0zdl3")))) |
cd15ad82 RW |
110 | (build-system gnu-build-system) |
111 | (arguments | |
112 | '(#:tests? #f ;no "check" target | |
f1e3a8ae RW |
113 | #:make-flags (list "CC=gcc" |
114 | (string-append "prefix=" (assoc-ref %outputs "out"))) | |
cd15ad82 | 115 | #:phases |
f1e3a8ae RW |
116 | (modify-phases %standard-phases |
117 | (delete 'configure)))) | |
cd15ad82 RW |
118 | (home-page "http://julialang.org/utf8proc/") |
119 | (synopsis "C library for processing UTF-8 Unicode data") | |
120 | (description "utf8proc is a small C library that provides Unicode | |
121 | normalization, case-folding, and other operations for data in the UTF-8 | |
122 | encoding, supporting Unicode version 7.0.") | |
123 | (license license:expat))) | |
f571e1c3 RW |
124 | |
125 | (define-public libgtextutils | |
126 | (package | |
127 | (name "libgtextutils") | |
128 | (version "0.7") | |
129 | (source | |
130 | (origin | |
131 | (method url-fetch) | |
132 | (uri (string-append | |
133 | "https://github.com/agordon/libgtextutils/releases/download/" | |
134 | version "/libgtextutils-" version ".tar.gz")) | |
135 | (sha256 | |
136 | (base32 "0jiybkb2z58wa2msvllnphr4js2hvjvh988pavb3mzkgr6ihwbkr")))) | |
137 | (build-system gnu-build-system) | |
138 | (arguments | |
139 | '(#:phases | |
140 | (alist-cons-after | |
141 | 'unpack 'autoreconf | |
142 | (lambda _ (zero? (system* "autoreconf" "-vif"))) | |
143 | %standard-phases))) | |
144 | (native-inputs | |
145 | `(("autoconf" ,autoconf) | |
146 | ("automake" ,automake) | |
147 | ("libtool" ,libtool))) | |
148 | (home-page "https://github.com/agordon/libgtextutils") | |
149 | (synopsis "Gordon's text utils library") | |
150 | (description | |
151 | "libgtextutils is a text utilities library used by the fastx toolkit from | |
152 | the Hannon Lab.") | |
153 | (license license:agpl3+))) | |
aae2b445 BW |
154 | |
155 | (define-public cityhash | |
a64a8c46 | 156 | (let ((commit "8af9b8c")) |
aae2b445 BW |
157 | (package |
158 | (name "cityhash") | |
a64a8c46 | 159 | (version (string-append "1.1-2." commit)) |
aae2b445 BW |
160 | (source (origin |
161 | (method git-fetch) | |
162 | (uri (git-reference | |
163 | (url "https://github.com/google/cityhash.git") | |
164 | (commit commit))) | |
165 | (file-name (string-append name "-" version ".tar.gz")) | |
166 | (sha256 | |
167 | (base32 | |
168 | "0n6skf5dv8yfl1ckax8dqhvsbslkwc9158zf2ims0xqdvzsahbi6")))) | |
a64a8c46 BW |
169 | (build-system gnu-build-system) |
170 | (arguments | |
171 | '(#:make-flags (list "CXXFLAGS=-g -O3") | |
172 | #:phases | |
173 | (modify-phases %standard-phases | |
174 | ;; citycrc is not installed by default but is used by some | |
175 | ;; programs. | |
176 | (add-after 'install 'install-citycrc | |
177 | (lambda* (#:key outputs #:allow-other-keys) | |
178 | (let* ((out (assoc-ref outputs "out")) | |
179 | (include (string-append out "/include"))) | |
180 | (install-file "src/citycrc.h" include)) | |
181 | #t))))) | |
182 | (home-page "https://github.com/google/cityhash") | |
183 | (synopsis "C++ hash functions for strings") | |
184 | (description | |
185 | "CityHash provides hash functions for strings. The functions mix the | |
aae2b445 | 186 | input bits thoroughly but are not suitable for cryptography.") |
a64a8c46 | 187 | (license license:expat)))) |
e522d840 | 188 | |
9128db21 RW |
189 | (define-public ustr |
190 | (package | |
191 | (name "ustr") | |
192 | (version "1.0.4") | |
193 | (source (origin | |
194 | (method url-fetch) | |
195 | (uri (string-append "http://www.and.org/ustr/" version | |
196 | "/ustr-" version ".tar.bz2")) | |
197 | (sha256 | |
198 | (base32 | |
199 | "1i623ygdj7rkizj7985q9d6vj5amwg686aqb5j3ixpkqkyp6xbrx")))) | |
200 | (build-system gnu-build-system) | |
201 | (arguments | |
202 | `(#:make-flags | |
203 | (list "CC=gcc" | |
204 | "HIDE=" | |
205 | ;; Override "/sbin/ldconfig" with "echo" because we don't need | |
206 | ;; "ldconfig". | |
207 | "LDCONFIG=echo" | |
208 | (string-append "prefix=" (assoc-ref %outputs "out")) | |
209 | "all-shared") | |
210 | #:phases | |
211 | (modify-phases %standard-phases | |
212 | (add-after 'unpack 'disable-check-for-stdint | |
213 | (lambda _ | |
214 | ;; Of course we have stdint.h, just not in /usr/include | |
215 | (substitute* '("Makefile" | |
216 | "ustr-import.in") | |
217 | (("-f \"/usr/include/stdint.h\"") "-z \"\"")) | |
218 | #t)) | |
219 | ;; No configure script | |
220 | (delete 'configure)))) | |
221 | (home-page "http://www.and.org/ustr/") | |
222 | (synopsis "String library with very low memory overhead") | |
223 | (description | |
224 | "Ustr is a string library for C with very low memory overhead.") | |
225 | ;; Quoted from the home page: "The License for the code is MIT, new-BSD, | |
226 | ;; LGPL, etc. ... if you need another license to help compatibility, just | |
227 | ;; ask for it. It's basically public domain, without all the legal | |
228 | ;; problems for everyone that trying to make something public domain | |
229 | ;; entails." | |
230 | (license license:public-domain))) | |
231 | ||
e522d840 RJ |
232 | (define-public libconfig |
233 | (package | |
234 | (name "libconfig") | |
235 | (version "1.5") | |
236 | (source (origin | |
237 | (method url-fetch) | |
238 | (uri (string-append "http://www.hyperrealm.com/libconfig/" | |
239 | "libconfig-" version ".tar.gz")) | |
240 | (sha256 | |
241 | (base32 | |
242 | "1xh3hzk63v4y8815lc5209m3s6ms2cpgw4h5hg462i4f1lwsl7g3")))) | |
243 | (build-system gnu-build-system) | |
244 | (home-page "http://www.hyperrealm.com/libconfig/") | |
245 | (synopsis "C/C++ configuration file library") | |
246 | (description | |
247 | "Libconfig is a simple library for manipulating structured configuration | |
248 | files. This file format is more compact and more readable than XML. And | |
249 | unlike XML, it is type-aware, so it is not necessary to do string parsing in | |
250 | application code.") | |
251 | (license license:lgpl2.1+))) | |
c8d969b5 PP |
252 | |
253 | (define-public pfff | |
254 | (package | |
255 | (name "pfff") | |
256 | (version "1.0") | |
257 | (source (origin | |
258 | (method url-fetch) | |
259 | (uri (string-append "https://github.com/pfff/pfff/archive/v" | |
260 | version ".tar.gz")) | |
261 | (file-name (string-append name "-" version ".tar.gz")) | |
262 | (sha256 | |
263 | (base32 | |
264 | "00m553aa277iarxj6dalmklyb64r7ias49bfwzbacsfg8h3kar8m")))) | |
265 | (build-system cmake-build-system) | |
266 | (home-page "http://biit.cs.ut.ee/pfff/") | |
267 | (synopsis "Probabilistic fast file fingerprinting tool") | |
268 | (description | |
269 | "pfff is a tool for calculating a compact digital fingerprint of a file | |
270 | by sampling randomly from the file instead of reading it in full. | |
271 | Consequently, the computation has a flat performance characteristic, | |
272 | correlated with data variation rather than file size. pfff can be as reliable | |
273 | as existing hashing techniques, with provably negligible risk of collisions.") | |
274 | (license license:bsd-3))) | |
2d8cf0b3 JL |
275 | |
276 | (define-public oniguruma | |
277 | (package | |
278 | (name "oniguruma") | |
279 | (version "5.9.6") | |
280 | (source (origin | |
281 | (method url-fetch) | |
282 | (uri (string-append "https://github.com/kkos/" | |
283 | "oniguruma/releases/download/v" version | |
284 | "/onig-" version ".tar.gz")) | |
285 | (sha256 | |
286 | (base32 | |
287 | "19s79vsclqn170mw0ajwv7j37qsbn4f1yjz3yavnhvva6c820r6m")))) | |
288 | (build-system gnu-build-system) | |
289 | (home-page "https://github.com/kkos/oniguruma") | |
290 | (synopsis "Regular expression library") | |
291 | (description "Oniguruma is a regular expressions library. The special | |
292 | characteristic of this library is that different character encoding for every | |
293 | regular expression object can be specified.") | |
294 | (license license:bsd-2))) | |
1b90e57e RW |
295 | |
296 | (define-public antiword | |
297 | (package | |
298 | (name "antiword") | |
299 | (version "0.37") | |
300 | (source (origin | |
301 | (method url-fetch) | |
302 | (uri (string-append "http://www.winfield.demon.nl/linux" | |
303 | "/antiword-" version ".tar.gz")) | |
304 | (sha256 | |
305 | (base32 | |
306 | "1b7mi1l20jhj09kyh0bq14qzz8vdhhyf35gzwsq43mn6rc7h0b4f")))) | |
307 | (build-system gnu-build-system) | |
308 | (arguments | |
309 | `(#:tests? #f ; There are no tests | |
310 | #:make-flags | |
311 | (list "-f" "Makefile.Linux" | |
312 | (string-append "GLOBAL_INSTALL_DIR=" | |
313 | (assoc-ref %outputs "out") "/bin") | |
314 | (string-append "GLOBAL_RESOURCES_DIR=" | |
315 | (assoc-ref %outputs "out") "/share/antiword")) | |
316 | #:phases | |
317 | (modify-phases %standard-phases | |
318 | (delete 'configure) | |
319 | (replace 'install | |
320 | (lambda* (#:key make-flags #:allow-other-keys) | |
321 | (zero? (apply system* "make" `("global_install" ,@make-flags)))))))) | |
322 | (home-page "http://www.winfield.demon.nl/") | |
323 | (synopsis "Microsoft Word document reader") | |
324 | (description "Antiword is an application for displaying Microsoft Word | |
325 | documents. It can also convert the document to PostScript or XML. Only | |
326 | documents made by MS Word version 2 and version 6 or later are supported. The | |
327 | name comes from: \"The antidote against people who send Microsoft Word files | |
328 | to everybody, because they believe that everybody runs Windows and therefore | |
329 | runs Word\".") | |
330 | (license license:gpl2+))) |