Commit | Line | Data |
---|---|---|
7939e70a TUBK |
1 | ;;; GNU Guix --- Functional package management for GNU |
2 | ;;; Copyright © 2015 Taylan Ulrich Bayırlı/Kammer <taylanbayirli@gmail.com> | |
ff3f6766 | 3 | ;;; Copyright © 2015, 2016, 2017 Ricardo Wurmus <rekado@elephly.net> |
a64a8c46 | 4 | ;;; Copyright © 2015, 2016 Ben Woodcroft <donttrustben@gmail.com> |
e522d840 | 5 | ;;; Copyright © 2015 Roel Janssen <roel@gnu.org> |
2d8cf0b3 | 6 | ;;; Copyright © 2016 Jelle Licht <jlicht@fsfe.org> |
f17a5447 | 7 | ;;; Copyright © 2016 Alex Griffin <a@ajgrf.com> |
20a792b5 | 8 | ;;; Copyright © 2016, 2018 Efraim Flashner <efraim@flashner.co.il> |
4a78fd46 | 9 | ;;; Copyright © 2016 Nils Gillmann <ng0@n0.is> |
0905048a | 10 | ;;; Copyright © 2016 Marius Bakke <mbakke@fastmail.com> |
5353cea0 | 11 | ;;; Copyright © 2017 Eric Bavier <bavier@member.fsf.org> |
7fdca77e | 12 | ;;; Copyright © 2017 Rene Saavedra <rennes@openmailbox.org> |
d0abaf89 | 13 | ;;; Copyright © 2017 Hartmut Goebel <h.goebel@crazy-compilers.com> |
3c8ba11a | 14 | ;;; Copyright © 2017 Kei Kebreau <kkebreau@posteo.net> |
bfcdf887 | 15 | ;;; Copyright © 2017 Alex Vong <alexvong1995@gmail.com> |
f9c84cd4 | 16 | ;;; Copyright © 2018 Tobias Geerinckx-Rice <me@tobias.gr> |
4715f92e | 17 | ;;; Copyright © 2018 Pierre Neidhardt <mail@ambrevar.xyz> |
7939e70a TUBK |
18 | ;;; |
19 | ;;; This file is part of GNU Guix. | |
20 | ;;; | |
21 | ;;; GNU Guix is free software; you can redistribute it and/or modify it | |
22 | ;;; under the terms of the GNU General Public License as published by | |
23 | ;;; the Free Software Foundation; either version 3 of the License, or (at | |
24 | ;;; your option) any later version. | |
25 | ;;; | |
26 | ;;; GNU Guix is distributed in the hope that it will be useful, but | |
27 | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
28 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
29 | ;;; GNU General Public License for more details. | |
30 | ;;; | |
31 | ;;; You should have received a copy of the GNU General Public License | |
32 | ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. | |
33 | ||
34 | (define-module (gnu packages textutils) | |
35 | #:use-module ((guix licenses) #:prefix license:) | |
36 | #:use-module (guix packages) | |
37 | #:use-module (guix download) | |
38 | #:use-module (guix git-download) | |
ff3f6766 | 39 | #:use-module (guix build-system ant) |
7939e70a | 40 | #:use-module (guix build-system gnu) |
03f801aa | 41 | #:use-module (guix build-system go) |
c8d969b5 | 42 | #:use-module (guix build-system cmake) |
03639d03 | 43 | #:use-module (guix build-system python) |
9116f126 | 44 | #:use-module (gnu packages) |
f571e1c3 | 45 | #:use-module (gnu packages autotools) |
148585c2 | 46 | #:use-module (gnu packages compression) |
2f6e988d | 47 | #:use-module (gnu packages gettext) |
ff3f6766 | 48 | #:use-module (gnu packages java) |
8888fe82 | 49 | #:use-module (gnu packages ncurses) |
50 | #:use-module (gnu packages perl) | |
51 | #:use-module (gnu packages pkg-config) | |
f17a5447 | 52 | #:use-module (gnu packages python) |
8888fe82 | 53 | #:use-module (gnu packages readline) |
148585c2 | 54 | #:use-module (gnu packages slang)) |
7939e70a | 55 | |
2f6e988d KK |
56 | (define-public dos2unix |
57 | (package | |
58 | (name "dos2unix") | |
3b2fa478 | 59 | (version "7.4.0") |
2f6e988d KK |
60 | (source |
61 | (origin | |
62 | (method url-fetch) | |
63 | (uri (string-append "https://waterlan.home.xs4all.nl/" name "/" | |
64 | name "-" version ".tar.gz")) | |
65 | (sha256 | |
3b2fa478 | 66 | (base32 "12h4c61g376bhq03y5g2xszkrkrj5hwd928rly3xsp6rvfmnbixs")))) |
2f6e988d KK |
67 | (build-system gnu-build-system) |
68 | (arguments | |
69 | '(#:make-flags (list "CC=gcc" | |
70 | (string-append "prefix=" (assoc-ref %outputs "out"))) | |
71 | #:phases | |
72 | (modify-phases %standard-phases | |
73 | (delete 'configure)))) ; no configure script | |
74 | (native-inputs | |
75 | `(("gettext" ,gettext-minimal) | |
76 | ("perl" ,perl))) | |
77 | (home-page "https://waterlan.home.xs4all.nl/dos2unix.html") | |
78 | (synopsis "DOS/Mac to Unix and vice versa text file format converter") | |
79 | (description | |
80 | "dos2unix is a tool to convert line breaks in a text file from Unix format | |
81 | to DOS format and vice versa.") | |
82 | (license license:bsd-2))) | |
83 | ||
7939e70a TUBK |
84 | (define-public recode |
85 | (package | |
86 | (name "recode") | |
20a792b5 | 87 | (version "3.7") |
7939e70a TUBK |
88 | (source |
89 | (origin | |
20a792b5 EF |
90 | (method url-fetch) |
91 | (uri (string-append "https://github.com/rrthomas/recode/releases/" | |
92 | "download/v" version "/" name "-" version ".tar.gz")) | |
7939e70a | 93 | (sha256 |
20a792b5 EF |
94 | (base32 |
95 | "0r4yhf7i7zp2nl2apyzz7r3i2in12n385hmr8zcfr18ly0ly530q")) | |
96 | (modules '((guix build utils))) | |
6cbee49d MW |
97 | (snippet '(begin |
98 | (delete-file "tests/Recode.c") | |
99 | #t)))) | |
7939e70a | 100 | (build-system gnu-build-system) |
20a792b5 EF |
101 | (native-inputs |
102 | `(("python" ,python-2) | |
103 | ("python2-cython" ,python2-cython))) | |
104 | (home-page "https://github.com/rrthomas/recode") | |
7939e70a TUBK |
105 | (synopsis "Text encoding converter") |
106 | (description "The Recode library converts files between character sets and | |
107 | usages. It recognises or produces over 200 different character sets (or about | |
108 | 300 if combined with an iconv library) and transliterates files between almost | |
109 | any pair. When exact transliteration are not possible, it gets rid of | |
110 | offending characters or falls back on approximations. The recode program is a | |
111 | handy front-end to the library.") | |
20a792b5 | 112 | (license license:gpl3+))) |
688fe865 TUBK |
113 | |
114 | (define-public enca | |
115 | (package | |
116 | (name "enca") | |
1462b456 | 117 | (version "1.19") |
688fe865 TUBK |
118 | (source |
119 | (origin | |
120 | (method url-fetch) | |
121 | (uri (string-append | |
122 | "https://github.com/nijel/enca/archive/" version ".tar.gz")) | |
123 | (sha256 | |
1462b456 | 124 | (base32 "099z526i7qgij7q1w3lvhl88iv3jc3nqxca2i09h6s08ghyrmzf4")) |
bbee7bd7 | 125 | (file-name (string-append name "-" version ".tar.gz")))) |
688fe865 | 126 | (build-system gnu-build-system) |
1462b456 EF |
127 | ;; enca-1.19 tests fail with recent recode. |
128 | ;(inputs `(("recode" ,recode))) | |
688fe865 TUBK |
129 | (home-page "https://github.com/nijel/enca") |
130 | (synopsis "Text encoding detection tool") | |
131 | (description "Enca (Extremely Naive Charset Analyser) consists of libenca, | |
132 | an encoding detection library, and enca, a command line frontend, integrating | |
133 | libenca and several charset conversion libraries and tools.") | |
134 | (license license:gpl2))) | |
cd15ad82 RW |
135 | |
136 | (define-public utf8proc | |
137 | (package | |
138 | (name "utf8proc") | |
8bcda205 | 139 | (version "2.1.1") |
cd15ad82 RW |
140 | (source |
141 | (origin | |
142 | (method url-fetch) | |
143 | (uri (string-append | |
144 | "https://github.com/JuliaLang/utf8proc/archive/v" | |
145 | version ".tar.gz")) | |
146 | (file-name (string-append name "-" version ".tar.gz")) | |
147 | (sha256 | |
8bcda205 | 148 | (base32 "1cnpigrazhslw65s4j1a56j7p6d7d61wsxxjf1218i9mkwv2yw17")))) |
cd15ad82 | 149 | (build-system gnu-build-system) |
8bcda205 | 150 | (inputs ; test data that is otherwise downloaded with curl |
5353cea0 EB |
151 | `(("NormalizationTest.txt" |
152 | ,(origin | |
153 | (method url-fetch) | |
154 | (uri (string-append "http://www.unicode.org/Public/9.0.0/ucd/" | |
155 | "NormalizationTest.txt")) | |
156 | (sha256 | |
157 | (base32 "1fxrz0bilsbwl685336aqi88k62i6nqhm62rvy4zhg3bcm4dhj1d")))) | |
158 | ("GraphemeBreakTest.txt" | |
159 | ,(origin | |
160 | (method url-fetch) | |
161 | (uri (string-append "http://www.unicode.org/Public/9.0.0/ucd/" | |
162 | "auxiliary/GraphemeBreakTest.txt")) | |
163 | (sha256 | |
164 | (base32 "0qbhyhmf0778lc2hcwlpizrvmdxwpk959v2q2wb8abv09ba7wvn7")))))) | |
cd15ad82 | 165 | (arguments |
5353cea0 | 166 | '(#:make-flags (list "CC=gcc" |
f1e3a8ae | 167 | (string-append "prefix=" (assoc-ref %outputs "out"))) |
cd15ad82 | 168 | #:phases |
f1e3a8ae | 169 | (modify-phases %standard-phases |
5353cea0 EB |
170 | (delete 'configure) |
171 | (add-before 'check 'check-data | |
172 | (lambda* (#:key inputs #:allow-other-keys) | |
173 | (for-each (lambda (i) | |
174 | (copy-file (assoc-ref inputs i) | |
175 | (string-append "data/" i))) | |
176 | '("NormalizationTest.txt" "GraphemeBreakTest.txt")) | |
177 | (substitute* "data/GraphemeBreakTest.txt" | |
178 | (("÷") "/") | |
179 | (("×") "+"))))))) | |
4f447d2f | 180 | (home-page "https://julialang.org/utf8proc/") |
cd15ad82 RW |
181 | (synopsis "C library for processing UTF-8 Unicode data") |
182 | (description "utf8proc is a small C library that provides Unicode | |
183 | normalization, case-folding, and other operations for data in the UTF-8 | |
9b72ce60 | 184 | encoding, supporting Unicode version 9.0.0.") |
cd15ad82 | 185 | (license license:expat))) |
f571e1c3 RW |
186 | |
187 | (define-public libgtextutils | |
188 | (package | |
189 | (name "libgtextutils") | |
190 | (version "0.7") | |
191 | (source | |
192 | (origin | |
193 | (method url-fetch) | |
194 | (uri (string-append | |
195 | "https://github.com/agordon/libgtextutils/releases/download/" | |
196 | version "/libgtextutils-" version ".tar.gz")) | |
197 | (sha256 | |
198 | (base32 "0jiybkb2z58wa2msvllnphr4js2hvjvh988pavb3mzkgr6ihwbkr")))) | |
199 | (build-system gnu-build-system) | |
200 | (arguments | |
201 | '(#:phases | |
dc1d3cde KK |
202 | (modify-phases %standard-phases |
203 | (add-after 'unpack 'autoreconf | |
204 | (lambda _ (zero? (system* "autoreconf" "-vif"))))))) | |
f571e1c3 RW |
205 | (native-inputs |
206 | `(("autoconf" ,autoconf) | |
207 | ("automake" ,automake) | |
208 | ("libtool" ,libtool))) | |
209 | (home-page "https://github.com/agordon/libgtextutils") | |
210 | (synopsis "Gordon's text utils library") | |
211 | (description | |
212 | "libgtextutils is a text utilities library used by the fastx toolkit from | |
213 | the Hannon Lab.") | |
214 | (license license:agpl3+))) | |
aae2b445 BW |
215 | |
216 | (define-public cityhash | |
a64a8c46 | 217 | (let ((commit "8af9b8c")) |
aae2b445 BW |
218 | (package |
219 | (name "cityhash") | |
a64a8c46 | 220 | (version (string-append "1.1-2." commit)) |
aae2b445 BW |
221 | (source (origin |
222 | (method git-fetch) | |
223 | (uri (git-reference | |
224 | (url "https://github.com/google/cityhash.git") | |
225 | (commit commit))) | |
226 | (file-name (string-append name "-" version ".tar.gz")) | |
227 | (sha256 | |
228 | (base32 | |
229 | "0n6skf5dv8yfl1ckax8dqhvsbslkwc9158zf2ims0xqdvzsahbi6")))) | |
a64a8c46 BW |
230 | (build-system gnu-build-system) |
231 | (arguments | |
232 | '(#:make-flags (list "CXXFLAGS=-g -O3") | |
233 | #:phases | |
234 | (modify-phases %standard-phases | |
235 | ;; citycrc is not installed by default but is used by some | |
236 | ;; programs. | |
237 | (add-after 'install 'install-citycrc | |
238 | (lambda* (#:key outputs #:allow-other-keys) | |
239 | (let* ((out (assoc-ref outputs "out")) | |
240 | (include (string-append out "/include"))) | |
241 | (install-file "src/citycrc.h" include)) | |
242 | #t))))) | |
243 | (home-page "https://github.com/google/cityhash") | |
244 | (synopsis "C++ hash functions for strings") | |
245 | (description | |
246 | "CityHash provides hash functions for strings. The functions mix the | |
aae2b445 | 247 | input bits thoroughly but are not suitable for cryptography.") |
a64a8c46 | 248 | (license license:expat)))) |
e522d840 | 249 | |
9128db21 RW |
250 | (define-public ustr |
251 | (package | |
252 | (name "ustr") | |
253 | (version "1.0.4") | |
254 | (source (origin | |
255 | (method url-fetch) | |
256 | (uri (string-append "http://www.and.org/ustr/" version | |
257 | "/ustr-" version ".tar.bz2")) | |
258 | (sha256 | |
259 | (base32 | |
7fdca77e RS |
260 | "1i623ygdj7rkizj7985q9d6vj5amwg686aqb5j3ixpkqkyp6xbrx")) |
261 | (patches (search-patches "ustr-fix-build-with-gcc-5.patch")))) | |
9128db21 RW |
262 | (build-system gnu-build-system) |
263 | (arguments | |
264 | `(#:make-flags | |
265 | (list "CC=gcc" | |
266 | "HIDE=" | |
267 | ;; Override "/sbin/ldconfig" with "echo" because we don't need | |
268 | ;; "ldconfig". | |
269 | "LDCONFIG=echo" | |
270 | (string-append "prefix=" (assoc-ref %outputs "out")) | |
271 | "all-shared") | |
272 | #:phases | |
273 | (modify-phases %standard-phases | |
274 | (add-after 'unpack 'disable-check-for-stdint | |
275 | (lambda _ | |
276 | ;; Of course we have stdint.h, just not in /usr/include | |
277 | (substitute* '("Makefile" | |
278 | "ustr-import.in") | |
279 | (("-f \"/usr/include/stdint.h\"") "-z \"\"")) | |
280 | #t)) | |
281 | ;; No configure script | |
282 | (delete 'configure)))) | |
283 | (home-page "http://www.and.org/ustr/") | |
284 | (synopsis "String library with very low memory overhead") | |
285 | (description | |
286 | "Ustr is a string library for C with very low memory overhead.") | |
287 | ;; Quoted from the home page: "The License for the code is MIT, new-BSD, | |
288 | ;; LGPL, etc. ... if you need another license to help compatibility, just | |
289 | ;; ask for it. It's basically public domain, without all the legal | |
290 | ;; problems for everyone that trying to make something public domain | |
291 | ;; entails." | |
292 | (license license:public-domain))) | |
293 | ||
e522d840 RJ |
294 | (define-public libconfig |
295 | (package | |
296 | (name "libconfig") | |
1b362016 MB |
297 | (version "1.7.2") |
298 | (home-page "https://hyperrealm.github.io/libconfig/") | |
e522d840 RJ |
299 | (source (origin |
300 | (method url-fetch) | |
1b362016 MB |
301 | (uri (string-append home-page "/dist/libconfig-" |
302 | version ".tar.gz")) | |
e522d840 RJ |
303 | (sha256 |
304 | (base32 | |
1b362016 | 305 | "1ngs2qx3cx5cbwinc5mvadly0b5n7s86zsc68c404czzfff7lg3w")))) |
e522d840 | 306 | (build-system gnu-build-system) |
e522d840 RJ |
307 | (synopsis "C/C++ configuration file library") |
308 | (description | |
309 | "Libconfig is a simple library for manipulating structured configuration | |
310 | files. This file format is more compact and more readable than XML. And | |
311 | unlike XML, it is type-aware, so it is not necessary to do string parsing in | |
312 | application code.") | |
313 | (license license:lgpl2.1+))) | |
c8d969b5 PP |
314 | |
315 | (define-public pfff | |
316 | (package | |
317 | (name "pfff") | |
318 | (version "1.0") | |
319 | (source (origin | |
320 | (method url-fetch) | |
321 | (uri (string-append "https://github.com/pfff/pfff/archive/v" | |
322 | version ".tar.gz")) | |
323 | (file-name (string-append name "-" version ".tar.gz")) | |
324 | (sha256 | |
325 | (base32 | |
326 | "00m553aa277iarxj6dalmklyb64r7ias49bfwzbacsfg8h3kar8m")))) | |
327 | (build-system cmake-build-system) | |
328 | (home-page "http://biit.cs.ut.ee/pfff/") | |
329 | (synopsis "Probabilistic fast file fingerprinting tool") | |
330 | (description | |
331 | "pfff is a tool for calculating a compact digital fingerprint of a file | |
332 | by sampling randomly from the file instead of reading it in full. | |
333 | Consequently, the computation has a flat performance characteristic, | |
334 | correlated with data variation rather than file size. pfff can be as reliable | |
335 | as existing hashing techniques, with provably negligible risk of collisions.") | |
336 | (license license:bsd-3))) | |
2d8cf0b3 JL |
337 | |
338 | (define-public oniguruma | |
339 | (package | |
340 | (name "oniguruma") | |
48337437 | 341 | (version "6.8.2") |
2d8cf0b3 JL |
342 | (source (origin |
343 | (method url-fetch) | |
344 | (uri (string-append "https://github.com/kkos/" | |
345 | "oniguruma/releases/download/v" version | |
346 | "/onig-" version ".tar.gz")) | |
347 | (sha256 | |
348 | (base32 | |
48337437 | 349 | "00s9gjgb3srn5sbmx4x9bssn52mi04d868ghizssdhjlddgxmsmd")))) |
2d8cf0b3 JL |
350 | (build-system gnu-build-system) |
351 | (home-page "https://github.com/kkos/oniguruma") | |
352 | (synopsis "Regular expression library") | |
353 | (description "Oniguruma is a regular expressions library. The special | |
354 | characteristic of this library is that different character encoding for every | |
355 | regular expression object can be specified.") | |
356 | (license license:bsd-2))) | |
1b90e57e | 357 | |
d5019f9c MB |
358 | ;; PHP < 7.3.0 requires this old version. Remove once no longer needed. |
359 | (define-public oniguruma-5 | |
360 | (package | |
361 | (inherit oniguruma) | |
362 | (version "5.9.6") | |
363 | (source (origin | |
364 | (method url-fetch) | |
365 | (uri (string-append "https://github.com/kkos/" | |
366 | "oniguruma/releases/download/v" version | |
367 | "/onig-" version ".tar.gz")) | |
368 | (sha256 | |
369 | (base32 | |
370 | "19s79vsclqn170mw0ajwv7j37qsbn4f1yjz3yavnhvva6c820r6m")))))) | |
371 | ||
1b90e57e RW |
372 | (define-public antiword |
373 | (package | |
374 | (name "antiword") | |
375 | (version "0.37") | |
376 | (source (origin | |
377 | (method url-fetch) | |
378 | (uri (string-append "http://www.winfield.demon.nl/linux" | |
379 | "/antiword-" version ".tar.gz")) | |
380 | (sha256 | |
381 | (base32 | |
9116f126 EF |
382 | "1b7mi1l20jhj09kyh0bq14qzz8vdhhyf35gzwsq43mn6rc7h0b4f")) |
383 | (patches (search-patches "antiword-CVE-2014-8123.patch")))) | |
1b90e57e RW |
384 | (build-system gnu-build-system) |
385 | (arguments | |
386 | `(#:tests? #f ; There are no tests | |
387 | #:make-flags | |
388 | (list "-f" "Makefile.Linux" | |
389 | (string-append "GLOBAL_INSTALL_DIR=" | |
390 | (assoc-ref %outputs "out") "/bin") | |
391 | (string-append "GLOBAL_RESOURCES_DIR=" | |
392 | (assoc-ref %outputs "out") "/share/antiword")) | |
393 | #:phases | |
394 | (modify-phases %standard-phases | |
b28e05d4 RW |
395 | (replace 'configure |
396 | (lambda* (#:key outputs #:allow-other-keys) | |
397 | ;; Ensure that mapping files can be found in the actual package | |
398 | ;; data directory. | |
399 | (substitute* "antiword.h" | |
400 | (("/usr/share/antiword") | |
401 | (string-append (assoc-ref outputs "out") "/share/antiword"))) | |
402 | #t)) | |
1b90e57e RW |
403 | (replace 'install |
404 | (lambda* (#:key make-flags #:allow-other-keys) | |
405 | (zero? (apply system* "make" `("global_install" ,@make-flags)))))))) | |
406 | (home-page "http://www.winfield.demon.nl/") | |
407 | (synopsis "Microsoft Word document reader") | |
408 | (description "Antiword is an application for displaying Microsoft Word | |
409 | documents. It can also convert the document to PostScript or XML. Only | |
410 | documents made by MS Word version 2 and version 6 or later are supported. The | |
411 | name comes from: \"The antidote against people who send Microsoft Word files | |
412 | to everybody, because they believe that everybody runs Windows and therefore | |
413 | runs Word\".") | |
414 | (license license:gpl2+))) | |
f17a5447 | 415 | |
d0abaf89 HG |
416 | (define-public catdoc |
417 | (package | |
418 | (name "catdoc") | |
419 | (version "0.95") | |
420 | (source (origin | |
421 | (method url-fetch) | |
422 | (uri (string-append "http://ftp.wagner.pp.ru/pub/catdoc/" | |
423 | "catdoc-" version ".tar.gz")) | |
bfcdf887 | 424 | (patches (search-patches "catdoc-CVE-2017-11110.patch")) |
d0abaf89 HG |
425 | (sha256 |
426 | (base32 | |
427 | "15h7v3bmwfk4z8r78xs5ih6vd0pskn0rj90xghvbzdjj0cc88jji")))) | |
428 | (build-system gnu-build-system) | |
429 | ;; TODO: Also build `wordview` which requires `tk` – make a separate | |
430 | ;; package for this. | |
431 | (arguments | |
432 | '(#:tests? #f ; There are no tests | |
433 | #:configure-flags '("--disable-wordview") | |
434 | #:phases | |
435 | (modify-phases %standard-phases | |
436 | (add-before 'install 'fix-install | |
437 | (lambda* (#:key outputs #:allow-other-keys) | |
438 | (let ((out (assoc-ref outputs "out"))) | |
439 | (mkdir-p (string-append out "/share/man/man1")))))))) | |
440 | (home-page "http://www.wagner.pp.ru/~vitus/software/catdoc/") | |
441 | (synopsis "MS-Word to TeX or plain text converter") | |
442 | (description "@command{catdoc} extracts text from MS-Word files, trying to | |
443 | preserve as many special printable characters as possible. It supports | |
444 | everything up to Word-97. Also supported are MS Write documents and RTF files. | |
445 | ||
446 | @command{catdoc} does not preserve complex word formatting, but it can | |
447 | translate some non-ASCII characters into TeX escape codes. It's goal is to | |
448 | extract plain text and allow you to read it and, probably, reformat with TeX, | |
449 | according to TeXnical rules. | |
450 | ||
451 | This package also provides @command{xls2csv}, which extracts data from Excel | |
452 | spreadsheets and outputs it in comma-separated-value format, and | |
453 | @command{catppt}, which extracts data from PowerPoint presentations.") | |
454 | (license license:gpl2+))) | |
455 | ||
f17a5447 AG |
456 | (define-public utfcpp |
457 | (package | |
458 | (name "utfcpp") | |
f9c84cd4 | 459 | (version "2.3.5") |
f17a5447 AG |
460 | (source (origin |
461 | (method url-fetch) | |
462 | (uri | |
f9c84cd4 TGR |
463 | (string-append "https://github.com/nemtrif/utfcpp/archive/v" |
464 | version ".tar.gz")) | |
465 | (file-name (string-append name "-" version ".tar.gz")) | |
f17a5447 AG |
466 | (sha256 |
467 | (base32 | |
f9c84cd4 TGR |
468 | "0gcqcfw19kfim8xw29xdp91l310yfjyrqdj2zsx8xx02dkpy1zzk")))) |
469 | (build-system cmake-build-system) | |
f17a5447 | 470 | (arguments |
f9c84cd4 TGR |
471 | `(#:out-of-source? #f |
472 | #:phases | |
473 | (modify-phases %standard-phases | |
474 | (replace 'install ; no install target | |
475 | (lambda* (#:key outputs #:allow-other-keys) | |
476 | (let* ((out (assoc-ref outputs "out")) | |
477 | (include (string-append out "/include")) | |
478 | (doc (string-append out "/share/doc/" ,name))) | |
479 | (copy-recursively "source" include) | |
480 | (install-file "README.md" doc) | |
481 | #t)))))) | |
f17a5447 AG |
482 | (home-page "https://github.com/nemtrif/utfcpp") |
483 | (synopsis "Portable C++ library for handling UTF-8") | |
484 | (description "UTF8-CPP is a C++ library for handling UTF-8 encoded text | |
485 | in a portable way.") | |
486 | (license license:boost1.0))) | |
8888fe82 | 487 | |
488 | (define-public dbacl | |
489 | (package | |
490 | (name "dbacl") | |
491 | (version "1.14") | |
492 | (source | |
493 | (origin | |
494 | (method url-fetch) | |
495 | (uri (string-append "http://www.lbreyer.com/gpl/" | |
496 | name "-" version ".tar.gz")) | |
497 | (sha256 | |
498 | (base32 | |
499 | "0224g6x71hyvy7jikfxmgcwww1r5lvk0jx36cva319cb9nmrbrq7")))) | |
500 | (build-system gnu-build-system) | |
501 | (arguments | |
502 | `(#:make-flags | |
503 | (list | |
504 | (string-append "-I" (assoc-ref %build-inputs "slang") | |
505 | "/include/slang") | |
506 | (string-append "-I" (assoc-ref %build-inputs "ncurses") | |
507 | "/include/ncurses")) | |
508 | #:phases | |
509 | (modify-phases %standard-phases | |
510 | (add-after 'unpack 'delete-sample6-and-japanese | |
511 | (lambda _ | |
512 | (substitute* "doc/Makefile.am" | |
513 | (("sample6.txt") "") | |
514 | (("japanese.txt") "")) | |
515 | (delete-file "doc/sample6.txt") | |
516 | (delete-file "doc/japanese.txt") | |
517 | (substitute* (list "src/tests/Makefile.am" | |
518 | "src/tests/Makefile.in") | |
519 | (("dbacl-jap.shin") "") | |
520 | (("dbacl-jap.sh") "")) | |
521 | #t)) | |
522 | (add-after 'unpack 'delete-test | |
523 | ;; See comments about the license. | |
524 | (lambda _ | |
db317b38 TGR |
525 | (delete-file "src/tests/dbacl-jap.shin") |
526 | #t)) | |
8888fe82 | 527 | (add-after 'delete-sample6-and-japanese 'autoreconf |
528 | (lambda _ | |
db317b38 TGR |
529 | (invoke "autoreconf" "-vif") |
530 | #t)) | |
8888fe82 | 531 | (add-after 'unpack 'fix-test-files |
532 | (lambda* (#:key inputs outputs #:allow-other-keys) | |
533 | (let* ((out (assoc-ref outputs "out")) | |
534 | (bin (string-append out "/bin"))) | |
535 | (substitute* (find-files "src/tests/" "\\.shin$") | |
536 | (("PATH=/bin:/usr/bin") | |
537 | "#PATH=/bin:/usr/bin") | |
538 | (("diff") (string-append (which "diff"))) | |
539 | (("tr") (string-append (which "tr")))) | |
540 | #t)))))) | |
541 | (inputs | |
542 | `(("ncurses" ,ncurses) | |
543 | ("perl" ,perl) | |
544 | ("readline" ,readline) | |
545 | ("slang" ,slang))) | |
546 | (native-inputs | |
547 | `(("libtool" ,libtool) | |
548 | ("autoconf" ,autoconf) | |
549 | ("automake" ,automake) | |
550 | ("pkg-config" ,pkg-config))) | |
7314a547 | 551 | (home-page "https://www.lbreyer.com/dbacl.html") |
8888fe82 | 552 | (synopsis "Bayesian text and email classifier") |
553 | (description | |
554 | "dbacl is a fast Bayesian text and email classifier. It builds a variety | |
555 | of language models using maximum entropy (minimum divergence) principles, and | |
556 | these can then be used to categorize input data automatically among multiple | |
557 | categories.") | |
558 | ;; The software is licensed as GPLv3 or later, but | |
559 | ;; includes various sample texts in the doc dir: | |
560 | ;; - sample1.txt, sample3 and sampe5.txt are in the public domain, | |
561 | ;; by Mark Twain. | |
562 | ;; - sample2.txt, sample4.txt are in the public domain, by Aristotle. | |
563 | ;; - sample6.txt is a forwarded email, copyright unknown. | |
564 | ;; Guix does exclude sample6.txt. | |
565 | ;; - japanese.txt is a Japanese unoffical translation of the | |
566 | ;; GNU General Public License, (c) by the Free Software Foundation. | |
567 | ;; Guix excludes this file. | |
568 | (license (list license:gpl3+ license:public-domain)))) | |
0905048a MB |
569 | |
570 | (define-public dotconf | |
571 | (package | |
572 | (name "dotconf") | |
573 | (version "1.3") | |
574 | (source (origin | |
575 | (method url-fetch) | |
576 | (uri (string-append | |
577 | "https://github.com/williamh/dotconf/archive/v" | |
578 | version ".tar.gz")) | |
579 | (file-name (string-append name "-" version ".tar.gz")) | |
580 | (sha256 | |
581 | (base32 | |
582 | "0lsnh0yaw44psmx59hq94cj1932gscp5h8d3cnh05l0svr0cy7kz")))) | |
583 | (build-system gnu-build-system) | |
584 | (arguments | |
585 | `(#:tests? #f ; FIXME maketest.sh does not work. | |
586 | #:phases | |
587 | (modify-phases %standard-phases | |
d10092b8 | 588 | (add-after 'unpack 'autoreconf |
0905048a MB |
589 | (lambda _ |
590 | (zero? (system* "autoreconf" "-vif"))))))) | |
591 | (native-inputs | |
592 | `(("autoconf" ,autoconf) | |
593 | ("automake" ,automake) | |
594 | ("libtool" ,libtool))) | |
595 | (home-page "https://github.com/williamh/dotconf") | |
596 | (synopsis "Configuration file parser library") | |
597 | (description | |
598 | "C library for creating and parsing configuration files.") | |
599 | (license (list license:lgpl2.1 ; Main distribution. | |
600 | license:asl1.1)))) ; src/readdir.{c,h} | |
ff3f6766 RW |
601 | |
602 | (define-public java-rsyntaxtextarea | |
603 | (package | |
604 | (name "java-rsyntaxtextarea") | |
605 | (version "2.6.1") | |
606 | (source (origin | |
607 | (method url-fetch) | |
608 | (uri (string-append "https://github.com/bobbylight/" | |
609 | "RSyntaxTextArea/archive/" | |
610 | version ".tar.gz")) | |
611 | (file-name (string-append name "-" version ".tar.gz")) | |
612 | (sha256 | |
613 | (base32 | |
614 | "0c5mqg2klj5rvf8fhycrli8rf6s37l9p7a8knw9gpp65r1c120q2")))) | |
615 | (build-system ant-build-system) | |
616 | (arguments | |
617 | `(;; FIXME: some tests fail because locale resources cannot be found. | |
618 | ;; Even when I add them to the class path, | |
619 | ;; RSyntaxTextAreaEditorKitDumbCompleteWordActionTest fails. | |
620 | #:tests? #f | |
621 | #:jar-name "rsyntaxtextarea.jar")) | |
622 | (native-inputs | |
623 | `(("java-junit" ,java-junit) | |
624 | ("java-hamcrest-core" ,java-hamcrest-core))) | |
625 | (home-page "https://bobbylight.github.io/RSyntaxTextArea/") | |
626 | (synopsis "Syntax highlighting text component for Java Swing") | |
627 | (description "RSyntaxTextArea is a syntax highlighting, code folding text | |
628 | component for Java Swing. It extends @code{JTextComponent} so it integrates | |
629 | completely with the standard @code{javax.swing.text} package. It is fast and | |
630 | efficient, and can be used in any application that needs to edit or view | |
631 | source code.") | |
632 | (license license:bsd-3))) | |
03639d03 RW |
633 | |
634 | ;; We use the sources from git instead of the tarball from pypi, because the | |
635 | ;; latter does not include the Cython source file from which bycython.cpp is | |
636 | ;; generated. | |
637 | (define-public python-editdistance | |
638 | (let ((commit "3ea84a7dd3258c76aa3be851ef3d50e59c886846") | |
639 | (revision "1")) | |
640 | (package | |
641 | (name "python-editdistance") | |
642 | (version (string-append "0.3.1-" revision "." (string-take commit 7))) | |
643 | (source | |
644 | (origin | |
645 | (method git-fetch) | |
646 | (uri (git-reference | |
647 | (url "https://github.com/aflc/editdistance.git") | |
648 | (commit commit))) | |
54335b59 | 649 | (file-name (git-file-name name version)) |
03639d03 RW |
650 | (sha256 |
651 | (base32 | |
652 | "1l43svsv12crvzphrgi6x435z6xg8m086c64armp8wzb4l8ccm7g")))) | |
653 | (build-system python-build-system) | |
654 | (arguments | |
655 | `(#:phases | |
656 | (modify-phases %standard-phases | |
657 | (add-after 'unpack 'build-cython-code | |
658 | (lambda _ | |
659 | (with-directory-excursion "editdistance" | |
660 | (delete-file "bycython.cpp") | |
54d5b27e | 661 | (invoke "cython" "--cplus" "bycython.pyx"))))))) |
03639d03 RW |
662 | (native-inputs |
663 | `(("python-cython" ,python-cython))) | |
664 | (home-page "https://www.github.com/aflc/editdistance") | |
665 | (synopsis "Fast implementation of the edit distance (Levenshtein distance)") | |
666 | (description | |
667 | "This library simply implements Levenshtein distance algorithm with C++ | |
668 | and Cython.") | |
669 | (license license:expat)))) | |
03f801aa CB |
670 | |
671 | (define-public go-github.com-mattn-go-runewidth | |
672 | (package | |
673 | (name "go-github.com-mattn-go-runewidth") | |
674 | (version "0.0.2") | |
675 | (source (origin | |
676 | (method git-fetch) | |
677 | (uri (git-reference | |
678 | (url "https://github.com/mattn/go-runewidth") | |
679 | (commit (string-append "v" version)))) | |
680 | (file-name (git-file-name name version)) | |
681 | (sha256 | |
682 | (base32 | |
683 | "0vkrfrz3fzn5n6ix4k8s0cg0b448459sldq8bp4riavsxm932jzb")))) | |
684 | (build-system go-build-system) | |
685 | (arguments | |
686 | '(#:import-path "github.com/mattn/go-runewidth")) | |
687 | (synopsis "@code{runewidth} provides Go functions to work with string widths") | |
688 | (description | |
689 | "The @code{runewidth} library provides Go functions for padding, | |
690 | measuring and checking the width of strings, with support east asian text.") | |
691 | (home-page "https://github.com/jessevdk/go-flags") | |
692 | (license license:expat))) | |
74fa77e9 PN |
693 | |
694 | (define-public docx2txt | |
695 | (package | |
696 | (name "docx2txt") | |
697 | (version "1.4") | |
698 | (source (origin | |
699 | (method url-fetch) | |
700 | (uri (string-append | |
701 | "mirror://sourceforge/docx2txt/docx2txt/v" | |
702 | version "/docx2txt-" version ".tgz")) | |
703 | (sha256 | |
704 | (base32 | |
705 | "06vdikjvpj6qdb41d8wzfnyj44jpnknmlgbhbr1w215420lpb5xj")))) | |
706 | (build-system gnu-build-system) | |
707 | (inputs | |
708 | `(("unzip" ,unzip) | |
709 | ("perl" ,perl))) | |
710 | (arguments | |
711 | `(#:tests? #f ; No tests. | |
712 | #:make-flags (list (string-append "BINDIR=" | |
713 | (assoc-ref %outputs "out") "/bin") | |
714 | (string-append "CONFIGDIR=" | |
715 | (assoc-ref %outputs "out") "/etc") | |
716 | ;; Makefile seems to be a bit dumb at guessing. | |
717 | (string-append "INSTALL=install") | |
718 | (string-append "PERL=perl")) | |
719 | #:phases | |
720 | (modify-phases %standard-phases | |
721 | (delete 'configure) | |
722 | (add-after 'install 'fix-install | |
723 | (lambda* (#:key outputs inputs #:allow-other-keys) | |
724 | (let* ((out (assoc-ref outputs "out")) | |
725 | (bin (string-append out "/bin")) | |
726 | (config (string-append out "/etc/docx2txt.config")) | |
727 | (unzip (assoc-ref inputs "unzip"))) | |
728 | ;; According to INSTALL, the .sh wrapper can be skipped. | |
729 | (delete-file (string-append bin "/docx2txt.sh")) | |
730 | (rename-file (string-append bin "/docx2txt.pl") | |
731 | (string-append bin "/docx2txt")) | |
732 | (substitute* config | |
733 | (("config_unzip => '/usr/bin/unzip',") | |
734 | (string-append "config_unzip => '" | |
735 | unzip | |
736 | "/bin/unzip',"))) | |
737 | ;; Makefile is wrong. | |
738 | (chmod config #o644))))))) | |
739 | (synopsis "Recover text from @file{.docx} files, with good formatting") | |
740 | (description | |
741 | "@command{docx2txt} is a Perl based command line utility to convert | |
742 | Microsoft Office @file{.docx} documents to equivalent text documents. Latest | |
743 | version supports following features during text extraction. | |
744 | ||
745 | @itemize | |
746 | @item Character conversions; currency characters are converted to respective | |
747 | names like Euro. | |
748 | @item Capitalisation of text blocks. | |
749 | @item Center and right justification of text fitting in a line of | |
750 | (configurable) 80 columns. | |
751 | @item Horizontal ruler, line breaks, paragraphs separation, tabs. | |
752 | @item Indicating hyperlinked text along with the hyperlink (configurable). | |
753 | @item Handling (bullet, decimal, letter, roman) lists along with (attempt at) | |
754 | indentation. | |
755 | @end itemize\n") | |
756 | (home-page "http://docx2txt.sourceforge.net") | |
757 | (license license:gpl3+))) |