Commit | Line | Data |
---|---|---|
7939e70a TUBK |
1 | ;;; GNU Guix --- Functional package management for GNU |
2 | ;;; Copyright © 2015 Taylan Ulrich Bayırlı/Kammer <taylanbayirli@gmail.com> | |
73092df4 | 3 | ;;; Copyright © 2015, 2016, 2017, 2018, 2019, 2020 Ricardo Wurmus <rekado@elephly.net> |
a64a8c46 | 4 | ;;; Copyright © 2015, 2016 Ben Woodcroft <donttrustben@gmail.com> |
e522d840 | 5 | ;;; Copyright © 2015 Roel Janssen <roel@gnu.org> |
2d8cf0b3 | 6 | ;;; Copyright © 2016 Jelle Licht <jlicht@fsfe.org> |
f17a5447 | 7 | ;;; Copyright © 2016 Alex Griffin <a@ajgrf.com> |
3d0c0ae6 | 8 | ;;; Copyright © 2016, 2018, 2019, 2020 Efraim Flashner <efraim@flashner.co.il> |
3c986a7d | 9 | ;;; Copyright © 2016 Nikita <nikita@n0.is> |
07d6003b | 10 | ;;; Copyright © 2016, 2020 Marius Bakke <marius@gnu.org> |
5353cea0 | 11 | ;;; Copyright © 2017 Eric Bavier <bavier@member.fsf.org> |
7fdca77e | 12 | ;;; Copyright © 2017 Rene Saavedra <rennes@openmailbox.org> |
7577ab55 | 13 | ;;; Copyright © 2017,2019 Hartmut Goebel <h.goebel@crazy-compilers.com> |
3c8ba11a | 14 | ;;; Copyright © 2017 Kei Kebreau <kkebreau@posteo.net> |
bfcdf887 | 15 | ;;; Copyright © 2017 Alex Vong <alexvong1995@gmail.com> |
9a71213e | 16 | ;;; Copyright © 2018, 2019, 2020 Tobias Geerinckx-Rice <me@tobias.gr> |
4715f92e | 17 | ;;; Copyright © 2018 Pierre Neidhardt <mail@ambrevar.xyz> |
1a0363cf | 18 | ;;; Copyright © 2018 Meiyo Peng <meiyo.peng@gmail.com> |
933ac939 | 19 | ;;; Copyright © 2019 Yoshinori Arai <kumagusu08@gmail.com> |
f9488b70 | 20 | ;;; Copyright © 2019 Mădălin Ionel Patrașcu <madalinionel.patrascu@mdc-berlin.de> |
24719e8a | 21 | ;;; Copyright © 2019 Wiktor Żelazny <wzelazny@vurv.cz> |
7f05493d | 22 | ;;; Copyright © 2020 Maxim Cournoyer <maxim.cournoyer@gmail.com> |
7939e70a TUBK |
23 | ;;; |
24 | ;;; This file is part of GNU Guix. | |
25 | ;;; | |
26 | ;;; GNU Guix is free software; you can redistribute it and/or modify it | |
27 | ;;; under the terms of the GNU General Public License as published by | |
28 | ;;; the Free Software Foundation; either version 3 of the License, or (at | |
29 | ;;; your option) any later version. | |
30 | ;;; | |
31 | ;;; GNU Guix is distributed in the hope that it will be useful, but | |
32 | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
33 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
34 | ;;; GNU General Public License for more details. | |
35 | ;;; | |
36 | ;;; You should have received a copy of the GNU General Public License | |
37 | ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. | |
38 | ||
39 | (define-module (gnu packages textutils) | |
40 | #:use-module ((guix licenses) #:prefix license:) | |
41 | #:use-module (guix packages) | |
42 | #:use-module (guix download) | |
43 | #:use-module (guix git-download) | |
ff3f6766 | 44 | #:use-module (guix build-system ant) |
7939e70a | 45 | #:use-module (guix build-system gnu) |
03f801aa | 46 | #:use-module (guix build-system go) |
c8d969b5 | 47 | #:use-module (guix build-system cmake) |
03639d03 | 48 | #:use-module (guix build-system python) |
3a176be0 | 49 | #:use-module (guix utils) |
9116f126 | 50 | #:use-module (gnu packages) |
f571e1c3 | 51 | #:use-module (gnu packages autotools) |
a6baae74 | 52 | #:use-module (gnu packages base) |
148585c2 | 53 | #:use-module (gnu packages compression) |
f9488b70 | 54 | #:use-module (gnu packages gcc) |
28e20c53 | 55 | #:use-module (gnu packages golang) |
2f6e988d | 56 | #:use-module (gnu packages gettext) |
ff3f6766 | 57 | #:use-module (gnu packages java) |
8888fe82 | 58 | #:use-module (gnu packages ncurses) |
7f05493d | 59 | #:use-module (gnu packages pcre) |
8888fe82 | 60 | #:use-module (gnu packages perl) |
61 | #:use-module (gnu packages pkg-config) | |
f17a5447 | 62 | #:use-module (gnu packages python) |
2ee7524c | 63 | #:use-module (gnu packages python-build) |
44d10b1f | 64 | #:use-module (gnu packages python-xyz) |
8888fe82 | 65 | #:use-module (gnu packages readline) |
1506d491 EF |
66 | #:use-module (gnu packages slang) |
67 | #:use-module (gnu packages web)) | |
7939e70a | 68 | |
2f6e988d KK |
69 | (define-public dos2unix |
70 | (package | |
71 | (name "dos2unix") | |
e2d33f9e | 72 | (version "7.4.2") |
2f6e988d KK |
73 | (source |
74 | (origin | |
75 | (method url-fetch) | |
96b96ec6 TGR |
76 | (uri (string-append "https://waterlan.home.xs4all.nl/dos2unix/" |
77 | "dos2unix-" version ".tar.gz")) | |
2f6e988d | 78 | (sha256 |
e2d33f9e | 79 | (base32 "00dfsf4rfyjb5j12gan8xjiirm0asshdz6dmd3l34a7ays6wadb0")))) |
2f6e988d KK |
80 | (build-system gnu-build-system) |
81 | (arguments | |
b4d7d41b TGR |
82 | `(#:make-flags |
83 | (list (string-append "CC=" ,(cc-for-target)) | |
84 | (string-append "prefix=" (assoc-ref %outputs "out"))) | |
2f6e988d KK |
85 | #:phases |
86 | (modify-phases %standard-phases | |
87 | (delete 'configure)))) ; no configure script | |
88 | (native-inputs | |
89 | `(("gettext" ,gettext-minimal) | |
90 | ("perl" ,perl))) | |
91 | (home-page "https://waterlan.home.xs4all.nl/dos2unix.html") | |
92 | (synopsis "DOS/Mac to Unix and vice versa text file format converter") | |
93 | (description | |
94 | "dos2unix is a tool to convert line breaks in a text file from Unix format | |
95 | to DOS format and vice versa.") | |
96 | (license license:bsd-2))) | |
97 | ||
7939e70a TUBK |
98 | (define-public recode |
99 | (package | |
100 | (name "recode") | |
f4a1de00 | 101 | (version "3.7.6") |
7939e70a TUBK |
102 | (source |
103 | (origin | |
20a792b5 EF |
104 | (method url-fetch) |
105 | (uri (string-append "https://github.com/rrthomas/recode/releases/" | |
6f2f7bc8 | 106 | "download/v" version "/recode-" version ".tar.gz")) |
7939e70a | 107 | (sha256 |
f4a1de00 | 108 | (base32 "0m59sd1ca0zw1aydpc3m8sw03nc885knmccqryg7byzmqs585ia6")))) |
7939e70a | 109 | (build-system gnu-build-system) |
20a792b5 | 110 | (native-inputs |
5ca1900b TGR |
111 | `(("python" ,python) |
112 | ("python-cython" ,python-cython))) | |
20a792b5 | 113 | (home-page "https://github.com/rrthomas/recode") |
7939e70a TUBK |
114 | (synopsis "Text encoding converter") |
115 | (description "The Recode library converts files between character sets and | |
116 | usages. It recognises or produces over 200 different character sets (or about | |
117 | 300 if combined with an iconv library) and transliterates files between almost | |
118 | any pair. When exact transliteration are not possible, it gets rid of | |
119 | offending characters or falls back on approximations. The recode program is a | |
120 | handy front-end to the library.") | |
20a792b5 | 121 | (license license:gpl3+))) |
688fe865 TUBK |
122 | |
123 | (define-public enca | |
124 | (package | |
125 | (name "enca") | |
1462b456 | 126 | (version "1.19") |
688fe865 TUBK |
127 | (source |
128 | (origin | |
30024a1e EF |
129 | (method git-fetch) |
130 | (uri (git-reference | |
131 | (url "https://github.com/nijel/enca") | |
132 | (commit version))) | |
133 | (file-name (git-file-name name version)) | |
688fe865 | 134 | (sha256 |
30024a1e | 135 | (base32 "19q7cwwxmmk5j9438bsqdpjvdjawsd3zmw1zyqgi7s4m0rasr3ah")))) |
688fe865 | 136 | (build-system gnu-build-system) |
1462b456 EF |
137 | ;; enca-1.19 tests fail with recent recode. |
138 | ;(inputs `(("recode" ,recode))) | |
688fe865 TUBK |
139 | (home-page "https://github.com/nijel/enca") |
140 | (synopsis "Text encoding detection tool") | |
141 | (description "Enca (Extremely Naive Charset Analyser) consists of libenca, | |
142 | an encoding detection library, and enca, a command line frontend, integrating | |
143 | libenca and several charset conversion libraries and tools.") | |
144 | (license license:gpl2))) | |
cd15ad82 RW |
145 | |
146 | (define-public utf8proc | |
147 | (package | |
148 | (name "utf8proc") | |
cbae2049 | 149 | (version "2.5.0") |
cd15ad82 RW |
150 | (source |
151 | (origin | |
39ff1d89 MB |
152 | (method git-fetch) |
153 | (uri (git-reference | |
154 | (url "https://github.com/JuliaStrings/utf8proc") | |
155 | (commit (string-append "v" version)))) | |
156 | (file-name (git-file-name name version)) | |
cd15ad82 | 157 | (sha256 |
cbae2049 | 158 | (base32 "1xlkazhdnja4lksn5c9nf4bln5gjqa35a8gwlam5r0728w0h83qq")))) |
cd15ad82 | 159 | (build-system gnu-build-system) |
63f2de03 | 160 | (native-inputs |
cbae2049 | 161 | (let ((UNICODE_VERSION "13.0.0")) ; defined in data/Makefile |
63f2de03 TGR |
162 | ;; Test data that is otherwise downloaded with curl. |
163 | `(("NormalizationTest.txt" | |
164 | ,(origin | |
165 | (method url-fetch) | |
166 | (uri (string-append "https://www.unicode.org/Public/" | |
167 | UNICODE_VERSION "/ucd/NormalizationTest.txt")) | |
168 | (sha256 | |
cbae2049 | 169 | (base32 "07g0ya4f6zfzvpp24ccxkb2yq568kh83gls85rjl950nv5fya3nn")))) |
63f2de03 TGR |
170 | ("GraphemeBreakTest.txt" |
171 | ,(origin | |
172 | (method url-fetch) | |
173 | (uri (string-append "https://www.unicode.org/Public/" | |
174 | UNICODE_VERSION | |
175 | "/ucd/auxiliary/GraphemeBreakTest.txt")) | |
176 | (sha256 | |
cbae2049 | 177 | (base32 "07f8rrvcsq4pibdz6zxggxy8w7zjjqyw2ggclqlhalyv45yv7prj")))) |
99293380 | 178 | |
63f2de03 TGR |
179 | ;; For tests. |
180 | ("perl" ,perl)))) | |
cd15ad82 | 181 | (arguments |
5353cea0 | 182 | '(#:make-flags (list "CC=gcc" |
f1e3a8ae | 183 | (string-append "prefix=" (assoc-ref %outputs "out"))) |
cd15ad82 | 184 | #:phases |
f1e3a8ae | 185 | (modify-phases %standard-phases |
5353cea0 EB |
186 | (delete 'configure) |
187 | (add-before 'check 'check-data | |
188 | (lambda* (#:key inputs #:allow-other-keys) | |
189 | (for-each (lambda (i) | |
190 | (copy-file (assoc-ref inputs i) | |
191 | (string-append "data/" i))) | |
192 | '("NormalizationTest.txt" "GraphemeBreakTest.txt")) | |
193 | (substitute* "data/GraphemeBreakTest.txt" | |
194 | (("÷") "/") | |
99293380 MB |
195 | (("×") "+")) |
196 | #t))))) | |
22cc598b | 197 | (home-page "https://juliastrings.github.io/utf8proc/") |
cd15ad82 RW |
198 | (synopsis "C library for processing UTF-8 Unicode data") |
199 | (description "utf8proc is a small C library that provides Unicode | |
200 | normalization, case-folding, and other operations for data in the UTF-8 | |
9b72ce60 | 201 | encoding, supporting Unicode version 9.0.0.") |
cd15ad82 | 202 | (license license:expat))) |
f571e1c3 | 203 | |
205df739 TGR |
204 | (define-public libconfuse |
205 | (package | |
206 | (name "libconfuse") | |
07d6003b | 207 | (version "3.3") |
205df739 TGR |
208 | (source |
209 | (origin | |
210 | (method url-fetch) | |
211 | (uri (string-append "https://github.com/martinh/libconfuse/" | |
212 | "releases/download/v" version | |
213 | "/confuse-" version ".tar.xz")) | |
214 | (sha256 | |
07d6003b | 215 | (base32 "043hqqykpprgrkw9s2hbdlxr308a7yxwsgxj4m8aadg1401hmm8x")))) |
205df739 | 216 | (build-system gnu-build-system) |
e36c4d0b MB |
217 | (arguments |
218 | '(#:configure-flags '("--disable-static"))) | |
205df739 TGR |
219 | (home-page "https://github.com/martinh/libconfuse") |
220 | (synopsis "Configuration file parser library") | |
221 | (description "libconfuse is a configuration file parser library. It | |
222 | supports sections and (lists of) values (strings, integers, floats, booleans | |
223 | or other sections), as well as some other features (such as | |
224 | single/double-quoted strings, environment variable expansion, functions and | |
225 | nested include statements).") | |
226 | (license license:isc))) | |
227 | ||
f571e1c3 RW |
228 | (define-public libgtextutils |
229 | (package | |
230 | (name "libgtextutils") | |
231 | (version "0.7") | |
232 | (source | |
233 | (origin | |
234 | (method url-fetch) | |
235 | (uri (string-append | |
236 | "https://github.com/agordon/libgtextutils/releases/download/" | |
237 | version "/libgtextutils-" version ".tar.gz")) | |
238 | (sha256 | |
239 | (base32 "0jiybkb2z58wa2msvllnphr4js2hvjvh988pavb3mzkgr6ihwbkr")))) | |
240 | (build-system gnu-build-system) | |
e005ddc2 | 241 | (arguments |
73092df4 | 242 | `(#:phases |
e005ddc2 EF |
243 | (modify-phases %standard-phases |
244 | (replace 'bootstrap | |
73092df4 RW |
245 | (lambda _ (invoke "sh" "reconf"))) |
246 | (add-after 'set-paths 'hide-default-gcc | |
247 | (lambda* (#:key inputs #:allow-other-keys) | |
248 | (let ((gcc (assoc-ref inputs "gcc"))) | |
249 | ;; Remove the default GCC from CPLUS_INCLUDE_PATH to prevent | |
250 | ;; conflicts with the GCC 5 input. | |
251 | (setenv "CPLUS_INCLUDE_PATH" | |
252 | (string-join | |
253 | (delete (string-append gcc "/include/c++") | |
254 | (string-split (getenv "CPLUS_INCLUDE_PATH") #\:)) | |
255 | ":")) | |
256 | #t)))))) | |
f571e1c3 RW |
257 | (native-inputs |
258 | `(("autoconf" ,autoconf) | |
259 | ("automake" ,automake) | |
73092df4 | 260 | ("gcc@5" ,gcc-5) ; doesn't build with later versions |
f571e1c3 RW |
261 | ("libtool" ,libtool))) |
262 | (home-page "https://github.com/agordon/libgtextutils") | |
263 | (synopsis "Gordon's text utils library") | |
264 | (description | |
265 | "libgtextutils is a text utilities library used by the fastx toolkit from | |
266 | the Hannon Lab.") | |
267 | (license license:agpl3+))) | |
aae2b445 BW |
268 | |
269 | (define-public cityhash | |
a64a8c46 | 270 | (let ((commit "8af9b8c")) |
aae2b445 BW |
271 | (package |
272 | (name "cityhash") | |
a64a8c46 | 273 | (version (string-append "1.1-2." commit)) |
aae2b445 BW |
274 | (source (origin |
275 | (method git-fetch) | |
276 | (uri (git-reference | |
b0e7b699 | 277 | (url "https://github.com/google/cityhash") |
aae2b445 BW |
278 | (commit commit))) |
279 | (file-name (string-append name "-" version ".tar.gz")) | |
280 | (sha256 | |
281 | (base32 | |
282 | "0n6skf5dv8yfl1ckax8dqhvsbslkwc9158zf2ims0xqdvzsahbi6")))) | |
a64a8c46 BW |
283 | (build-system gnu-build-system) |
284 | (arguments | |
285 | '(#:make-flags (list "CXXFLAGS=-g -O3") | |
286 | #:phases | |
287 | (modify-phases %standard-phases | |
288 | ;; citycrc is not installed by default but is used by some | |
289 | ;; programs. | |
290 | (add-after 'install 'install-citycrc | |
291 | (lambda* (#:key outputs #:allow-other-keys) | |
292 | (let* ((out (assoc-ref outputs "out")) | |
293 | (include (string-append out "/include"))) | |
294 | (install-file "src/citycrc.h" include)) | |
295 | #t))))) | |
296 | (home-page "https://github.com/google/cityhash") | |
297 | (synopsis "C++ hash functions for strings") | |
298 | (description | |
299 | "CityHash provides hash functions for strings. The functions mix the | |
aae2b445 | 300 | input bits thoroughly but are not suitable for cryptography.") |
a64a8c46 | 301 | (license license:expat)))) |
e522d840 | 302 | |
9128db21 RW |
303 | (define-public ustr |
304 | (package | |
305 | (name "ustr") | |
306 | (version "1.0.4") | |
307 | (source (origin | |
308 | (method url-fetch) | |
309 | (uri (string-append "http://www.and.org/ustr/" version | |
310 | "/ustr-" version ".tar.bz2")) | |
311 | (sha256 | |
312 | (base32 | |
7fdca77e RS |
313 | "1i623ygdj7rkizj7985q9d6vj5amwg686aqb5j3ixpkqkyp6xbrx")) |
314 | (patches (search-patches "ustr-fix-build-with-gcc-5.patch")))) | |
9128db21 RW |
315 | (build-system gnu-build-system) |
316 | (arguments | |
317 | `(#:make-flags | |
318 | (list "CC=gcc" | |
319 | "HIDE=" | |
320 | ;; Override "/sbin/ldconfig" with "echo" because we don't need | |
321 | ;; "ldconfig". | |
322 | "LDCONFIG=echo" | |
323 | (string-append "prefix=" (assoc-ref %outputs "out")) | |
324 | "all-shared") | |
325 | #:phases | |
326 | (modify-phases %standard-phases | |
327 | (add-after 'unpack 'disable-check-for-stdint | |
328 | (lambda _ | |
329 | ;; Of course we have stdint.h, just not in /usr/include | |
330 | (substitute* '("Makefile" | |
331 | "ustr-import.in") | |
332 | (("-f \"/usr/include/stdint.h\"") "-z \"\"")) | |
333 | #t)) | |
334 | ;; No configure script | |
335 | (delete 'configure)))) | |
336 | (home-page "http://www.and.org/ustr/") | |
337 | (synopsis "String library with very low memory overhead") | |
338 | (description | |
339 | "Ustr is a string library for C with very low memory overhead.") | |
340 | ;; Quoted from the home page: "The License for the code is MIT, new-BSD, | |
341 | ;; LGPL, etc. ... if you need another license to help compatibility, just | |
342 | ;; ask for it. It's basically public domain, without all the legal | |
343 | ;; problems for everyone that trying to make something public domain | |
344 | ;; entails." | |
345 | (license license:public-domain))) | |
346 | ||
7577ab55 HG |
347 | (define-public ascii2binary |
348 | (package | |
349 | (name "ascii2binary") | |
350 | (version "2.14") | |
351 | (source | |
352 | (origin | |
353 | (method url-fetch) | |
354 | (uri (string-append "http://billposer.org/Software/Downloads/" | |
355 | "ascii2binary-" version ".tar.bz2")) | |
356 | (sha256 | |
357 | (base32 "0dc9fxcdmppbs9s06jvq61zbk552laxps0xyk098gj41697ihd96")))) | |
358 | (build-system gnu-build-system) | |
359 | (native-inputs | |
360 | `(("gettext" ,gettext-minimal))) | |
361 | (home-page "https://billposer.org/Software/a2b.html") | |
362 | (synopsis "Convert between ASCII, hexadecimal and binary representations") | |
363 | (description "The two programs are useful for generating test data, for | |
364 | inspecting binary files, and for interfacing programs that generate textual | |
365 | output to programs that require binary input and conversely. They can also be | |
366 | useful when it is desired to reformat numbers. | |
367 | ||
368 | @itemize | |
369 | ||
370 | @item @command{ascii2binary} reads input consisting of ascii or hexadecimal | |
371 | representation numbers separated by whitespace and produces as output | |
372 | the binary equivalents. The type and precision of the binary output | |
373 | is selected using command line flags. | |
374 | ||
375 | @item @command{binary2ascii} reads input consisting of binary numbers | |
376 | and converts them to their ascii or hexadecimal representation. | |
377 | Command line flags specify the type and size of the binary numbers | |
378 | and provide control over the format of the output. | |
379 | Unsigned integers may be written out in binary, octal, decimal, | |
380 | or hexadecimal. | |
381 | ||
382 | Signed integers may be written out only in binary or decimal. Floating | |
383 | point numbers may be written out only decimal, either in standard or | |
384 | scientific notation. (If you want to examine the binary representation | |
385 | of floating point numbers, just treat the input as a sequence of unsigned | |
386 | characters.) | |
387 | ||
a6baae74 HG |
388 | @end itemize") |
389 | (license license:gpl3))) | |
390 | ||
391 | (define-public uniutils | |
392 | (package | |
393 | (name "uniutils") | |
394 | (version "2.27") | |
395 | (source | |
396 | (origin | |
397 | (method url-fetch) | |
398 | (uri (string-append "http://billposer.org/Software/Downloads/" | |
399 | "uniutils-" version ".tar.bz2")) | |
400 | (sha256 | |
401 | (base32 "19w1510w87gx7n4qy3zsb0m467a4rn5scvh4ajajg7jh6x5xri08")))) | |
402 | (build-system gnu-build-system) | |
403 | (arguments | |
404 | '(#:configure-flags '("--disable-dependency-tracking") | |
405 | #:phases | |
406 | (modify-phases %standard-phases | |
407 | (add-after 'build 'fix-paths | |
408 | (lambda* (#:key outputs inputs #:allow-other-keys) | |
409 | (let ((out (assoc-ref outputs "out")) | |
410 | (a2b (assoc-ref inputs "ascii2binary")) | |
411 | (iconv (assoc-ref inputs "libiconv"))) | |
412 | (substitute* "utf8lookup" | |
413 | (("^ascii2binary ") (string-append a2b "/bin/ascii2binary ")) | |
414 | (("^uniname ") (string-append out "/bin/uniname ")) | |
415 | (("^iconv ") (string-append iconv "/bin/iconv "))) | |
416 | #t)))))) | |
417 | (inputs | |
418 | `(("ascii2binary" ,ascii2binary) | |
419 | ("libiconv" ,libiconv))) | |
420 | (home-page "https://billposer.org/Software/unidesc.html") | |
421 | (synopsis "Find out what is in a Unicode file") | |
422 | (description "Useful tools when working with Unicode files when one | |
423 | doesn't know the writing system, doesn't have the necessary font, needs to | |
424 | inspect invisible characters, needs to find out whether characters have been | |
425 | combined or in what order they occur, or needs statistics on which characters | |
426 | occur. | |
427 | ||
428 | @itemize | |
429 | ||
430 | @item @command{uniname} defaults to printing the character offset of each | |
431 | character, its byte offset, its hex code value, its encoding, the glyph | |
432 | itself, and its name. It may also be used to validate UTF-8 input. | |
433 | ||
434 | @item @command{unidesc} reports the character ranges to which different | |
435 | portions of the text belong. It can also be used to identify Unicode encodings | |
436 | (e.g. UTF-16be) flagged by magic numbers. | |
437 | ||
438 | @item @command{unihist} generates a histogram of the characters in its input. | |
439 | ||
440 | @item @command{ExplicateUTF8} is intended for debugging or for learning about | |
441 | Unicode. It determines and explains the validity of a sequence of bytes as a | |
442 | UTF8 encoding. | |
443 | ||
444 | @item @command{utf8lookup} provides a handy way to look up Unicode characters | |
445 | from the command line. | |
446 | ||
447 | @item @command{unireverse} reverse each line of UTF-8 input | |
448 | character-by-character. | |
449 | ||
7577ab55 HG |
450 | @end itemize") |
451 | (license license:gpl3))) | |
452 | ||
e522d840 RJ |
453 | (define-public libconfig |
454 | (package | |
455 | (name "libconfig") | |
1b362016 MB |
456 | (version "1.7.2") |
457 | (home-page "https://hyperrealm.github.io/libconfig/") | |
e522d840 RJ |
458 | (source (origin |
459 | (method url-fetch) | |
1b362016 MB |
460 | (uri (string-append home-page "/dist/libconfig-" |
461 | version ".tar.gz")) | |
e522d840 RJ |
462 | (sha256 |
463 | (base32 | |
1b362016 | 464 | "1ngs2qx3cx5cbwinc5mvadly0b5n7s86zsc68c404czzfff7lg3w")))) |
e522d840 | 465 | (build-system gnu-build-system) |
e522d840 RJ |
466 | (synopsis "C/C++ configuration file library") |
467 | (description | |
468 | "Libconfig is a simple library for manipulating structured configuration | |
469 | files. This file format is more compact and more readable than XML. And | |
470 | unlike XML, it is type-aware, so it is not necessary to do string parsing in | |
471 | application code.") | |
472 | (license license:lgpl2.1+))) | |
c8d969b5 PP |
473 | |
474 | (define-public pfff | |
475 | (package | |
476 | (name "pfff") | |
477 | (version "1.0") | |
478 | (source (origin | |
bc117787 EF |
479 | (method git-fetch) |
480 | (uri (git-reference | |
481 | (url "https://github.com/pfff/pfff") | |
482 | (commit (string-append "v" version)))) | |
483 | (file-name (git-file-name name version)) | |
c8d969b5 PP |
484 | (sha256 |
485 | (base32 | |
bc117787 | 486 | "1nxkfm7zliq3rmr7yp871sppwfnz71iz364m2sgazny71pzykggc")))) |
c8d969b5 | 487 | (build-system cmake-build-system) |
86bbfc50 | 488 | (home-page "https://biit.cs.ut.ee/pfff/") |
c8d969b5 PP |
489 | (synopsis "Probabilistic fast file fingerprinting tool") |
490 | (description | |
491 | "pfff is a tool for calculating a compact digital fingerprint of a file | |
492 | by sampling randomly from the file instead of reading it in full. | |
493 | Consequently, the computation has a flat performance characteristic, | |
494 | correlated with data variation rather than file size. pfff can be as reliable | |
495 | as existing hashing techniques, with provably negligible risk of collisions.") | |
496 | (license license:bsd-3))) | |
2d8cf0b3 JL |
497 | |
498 | (define-public oniguruma | |
499 | (package | |
500 | (name "oniguruma") | |
7eb823c6 | 501 | (version "6.9.5-rev1") |
2d8cf0b3 JL |
502 | (source (origin |
503 | (method url-fetch) | |
504 | (uri (string-append "https://github.com/kkos/" | |
7eb823c6 MB |
505 | "oniguruma/releases/download/v" |
506 | ;; If there is a "-" in the version, convert | |
507 | ;; to underscore for this part of the URI. | |
508 | (string-map (lambda (c) (if (char=? #\- c) #\_ c)) | |
509 | version) | |
2d8cf0b3 JL |
510 | "/onig-" version ".tar.gz")) |
511 | (sha256 | |
512 | (base32 | |
7eb823c6 | 513 | "17m92k1n6bvza6m35fpd5g36zwpwm3hfz3478iwj5bvj2sfq8g6k")))) |
2d8cf0b3 | 514 | (build-system gnu-build-system) |
28b55a51 | 515 | (arguments '(#:configure-flags '("--disable-static"))) |
2d8cf0b3 JL |
516 | (home-page "https://github.com/kkos/oniguruma") |
517 | (synopsis "Regular expression library") | |
518 | (description "Oniguruma is a regular expressions library. The special | |
519 | characteristic of this library is that different character encoding for every | |
520 | regular expression object can be specified.") | |
521 | (license license:bsd-2))) | |
1b90e57e RW |
522 | |
523 | (define-public antiword | |
524 | (package | |
525 | (name "antiword") | |
526 | (version "0.37") | |
527 | (source (origin | |
528 | (method url-fetch) | |
529 | (uri (string-append "http://www.winfield.demon.nl/linux" | |
530 | "/antiword-" version ".tar.gz")) | |
531 | (sha256 | |
532 | (base32 | |
9116f126 EF |
533 | "1b7mi1l20jhj09kyh0bq14qzz8vdhhyf35gzwsq43mn6rc7h0b4f")) |
534 | (patches (search-patches "antiword-CVE-2014-8123.patch")))) | |
1b90e57e RW |
535 | (build-system gnu-build-system) |
536 | (arguments | |
537 | `(#:tests? #f ; There are no tests | |
538 | #:make-flags | |
539 | (list "-f" "Makefile.Linux" | |
540 | (string-append "GLOBAL_INSTALL_DIR=" | |
541 | (assoc-ref %outputs "out") "/bin") | |
542 | (string-append "GLOBAL_RESOURCES_DIR=" | |
543 | (assoc-ref %outputs "out") "/share/antiword")) | |
544 | #:phases | |
545 | (modify-phases %standard-phases | |
b28e05d4 RW |
546 | (replace 'configure |
547 | (lambda* (#:key outputs #:allow-other-keys) | |
548 | ;; Ensure that mapping files can be found in the actual package | |
549 | ;; data directory. | |
550 | (substitute* "antiword.h" | |
551 | (("/usr/share/antiword") | |
552 | (string-append (assoc-ref outputs "out") "/share/antiword"))) | |
553 | #t)) | |
1b90e57e RW |
554 | (replace 'install |
555 | (lambda* (#:key make-flags #:allow-other-keys) | |
0ad0ecee | 556 | (apply invoke "make" `("global_install" ,@make-flags))))))) |
1b90e57e RW |
557 | (home-page "http://www.winfield.demon.nl/") |
558 | (synopsis "Microsoft Word document reader") | |
559 | (description "Antiword is an application for displaying Microsoft Word | |
560 | documents. It can also convert the document to PostScript or XML. Only | |
561 | documents made by MS Word version 2 and version 6 or later are supported. The | |
562 | name comes from: \"The antidote against people who send Microsoft Word files | |
563 | to everybody, because they believe that everybody runs Windows and therefore | |
564 | runs Word\".") | |
565 | (license license:gpl2+))) | |
f17a5447 | 566 | |
d0abaf89 HG |
567 | (define-public catdoc |
568 | (package | |
569 | (name "catdoc") | |
570 | (version "0.95") | |
571 | (source (origin | |
572 | (method url-fetch) | |
573 | (uri (string-append "http://ftp.wagner.pp.ru/pub/catdoc/" | |
574 | "catdoc-" version ".tar.gz")) | |
bfcdf887 | 575 | (patches (search-patches "catdoc-CVE-2017-11110.patch")) |
d0abaf89 HG |
576 | (sha256 |
577 | (base32 | |
578 | "15h7v3bmwfk4z8r78xs5ih6vd0pskn0rj90xghvbzdjj0cc88jji")))) | |
579 | (build-system gnu-build-system) | |
580 | ;; TODO: Also build `wordview` which requires `tk` – make a separate | |
581 | ;; package for this. | |
582 | (arguments | |
583 | '(#:tests? #f ; There are no tests | |
584 | #:configure-flags '("--disable-wordview") | |
585 | #:phases | |
586 | (modify-phases %standard-phases | |
587 | (add-before 'install 'fix-install | |
588 | (lambda* (#:key outputs #:allow-other-keys) | |
589 | (let ((out (assoc-ref outputs "out"))) | |
590 | (mkdir-p (string-append out "/share/man/man1")))))))) | |
13607002 | 591 | (home-page "https://www.wagner.pp.ru/~vitus/software/catdoc/") |
d0abaf89 HG |
592 | (synopsis "MS-Word to TeX or plain text converter") |
593 | (description "@command{catdoc} extracts text from MS-Word files, trying to | |
594 | preserve as many special printable characters as possible. It supports | |
595 | everything up to Word-97. Also supported are MS Write documents and RTF files. | |
596 | ||
597 | @command{catdoc} does not preserve complex word formatting, but it can | |
598 | translate some non-ASCII characters into TeX escape codes. It's goal is to | |
599 | extract plain text and allow you to read it and, probably, reformat with TeX, | |
600 | according to TeXnical rules. | |
601 | ||
602 | This package also provides @command{xls2csv}, which extracts data from Excel | |
603 | spreadsheets and outputs it in comma-separated-value format, and | |
604 | @command{catppt}, which extracts data from PowerPoint presentations.") | |
605 | (license license:gpl2+))) | |
606 | ||
f17a5447 AG |
607 | (define-public utfcpp |
608 | (package | |
609 | (name "utfcpp") | |
f9c84cd4 | 610 | (version "2.3.5") |
f17a5447 | 611 | (source (origin |
d0f3ccf6 EF |
612 | (method git-fetch) |
613 | (uri (git-reference | |
614 | (url "https://github.com/nemtrif/utfcpp") | |
615 | (commit (string-append "v" version)))) | |
616 | (file-name (git-file-name name version)) | |
f17a5447 AG |
617 | (sha256 |
618 | (base32 | |
d0f3ccf6 | 619 | "1gr98d826z6wa58r1s5i7rz7q2x3r31v7zj0pjjlrc7gfxwklr4s")))) |
f9c84cd4 | 620 | (build-system cmake-build-system) |
f17a5447 | 621 | (arguments |
f9c84cd4 TGR |
622 | `(#:out-of-source? #f |
623 | #:phases | |
624 | (modify-phases %standard-phases | |
625 | (replace 'install ; no install target | |
626 | (lambda* (#:key outputs #:allow-other-keys) | |
627 | (let* ((out (assoc-ref outputs "out")) | |
628 | (include (string-append out "/include")) | |
629 | (doc (string-append out "/share/doc/" ,name))) | |
630 | (copy-recursively "source" include) | |
631 | (install-file "README.md" doc) | |
632 | #t)))))) | |
f17a5447 AG |
633 | (home-page "https://github.com/nemtrif/utfcpp") |
634 | (synopsis "Portable C++ library for handling UTF-8") | |
635 | (description "UTF8-CPP is a C++ library for handling UTF-8 encoded text | |
636 | in a portable way.") | |
637 | (license license:boost1.0))) | |
8888fe82 | 638 | |
639 | (define-public dbacl | |
640 | (package | |
641 | (name "dbacl") | |
9a71213e | 642 | (version "1.14.1") |
8888fe82 | 643 | (source |
644 | (origin | |
645 | (method url-fetch) | |
9a71213e | 646 | (uri (string-append "mirror://sourceforge/dbacl/dbacl/" version "/" |
50b01dcf | 647 | "dbacl-" version ".tar.gz")) |
8888fe82 | 648 | (sha256 |
9a71213e TGR |
649 | (base32 "1gas0112wqjvwn9qg3hxnawk7h3prr0w9b2h68f3p1ifd1kzn3gz")) |
650 | (patches (search-patches "dbacl-include-locale.h.patch")))) | |
8888fe82 | 651 | (build-system gnu-build-system) |
652 | (arguments | |
653 | `(#:make-flags | |
654 | (list | |
655 | (string-append "-I" (assoc-ref %build-inputs "slang") | |
656 | "/include/slang") | |
657 | (string-append "-I" (assoc-ref %build-inputs "ncurses") | |
658 | "/include/ncurses")) | |
659 | #:phases | |
660 | (modify-phases %standard-phases | |
661 | (add-after 'unpack 'delete-sample6-and-japanese | |
662 | (lambda _ | |
663 | (substitute* "doc/Makefile.am" | |
664 | (("sample6.txt") "") | |
665 | (("japanese.txt") "")) | |
666 | (delete-file "doc/sample6.txt") | |
667 | (delete-file "doc/japanese.txt") | |
668 | (substitute* (list "src/tests/Makefile.am" | |
669 | "src/tests/Makefile.in") | |
670 | (("dbacl-jap.shin") "") | |
671 | (("dbacl-jap.sh") "")) | |
672 | #t)) | |
673 | (add-after 'unpack 'delete-test | |
674 | ;; See comments about the license. | |
675 | (lambda _ | |
db317b38 TGR |
676 | (delete-file "src/tests/dbacl-jap.shin") |
677 | #t)) | |
8888fe82 | 678 | (add-after 'unpack 'fix-test-files |
679 | (lambda* (#:key inputs outputs #:allow-other-keys) | |
680 | (let* ((out (assoc-ref outputs "out")) | |
681 | (bin (string-append out "/bin"))) | |
682 | (substitute* (find-files "src/tests/" "\\.shin$") | |
683 | (("PATH=/bin:/usr/bin") | |
684 | "#PATH=/bin:/usr/bin") | |
685 | (("diff") (string-append (which "diff"))) | |
686 | (("tr") (string-append (which "tr")))) | |
9a71213e TGR |
687 | #t))) |
688 | (replace 'bootstrap | |
689 | (lambda _ | |
690 | (invoke "autoreconf" "-vif") | |
691 | #t))))) | |
8888fe82 | 692 | (inputs |
693 | `(("ncurses" ,ncurses) | |
694 | ("perl" ,perl) | |
695 | ("readline" ,readline) | |
696 | ("slang" ,slang))) | |
697 | (native-inputs | |
698 | `(("libtool" ,libtool) | |
699 | ("autoconf" ,autoconf) | |
700 | ("automake" ,automake) | |
701 | ("pkg-config" ,pkg-config))) | |
7314a547 | 702 | (home-page "https://www.lbreyer.com/dbacl.html") |
8888fe82 | 703 | (synopsis "Bayesian text and email classifier") |
704 | (description | |
705 | "dbacl is a fast Bayesian text and email classifier. It builds a variety | |
706 | of language models using maximum entropy (minimum divergence) principles, and | |
707 | these can then be used to categorize input data automatically among multiple | |
708 | categories.") | |
709 | ;; The software is licensed as GPLv3 or later, but | |
710 | ;; includes various sample texts in the doc dir: | |
711 | ;; - sample1.txt, sample3 and sampe5.txt are in the public domain, | |
712 | ;; by Mark Twain. | |
713 | ;; - sample2.txt, sample4.txt are in the public domain, by Aristotle. | |
714 | ;; - sample6.txt is a forwarded email, copyright unknown. | |
715 | ;; Guix does exclude sample6.txt. | |
716 | ;; - japanese.txt is a Japanese unoffical translation of the | |
717 | ;; GNU General Public License, (c) by the Free Software Foundation. | |
718 | ;; Guix excludes this file. | |
719 | (license (list license:gpl3+ license:public-domain)))) | |
0905048a MB |
720 | |
721 | (define-public dotconf | |
722 | (package | |
723 | (name "dotconf") | |
724 | (version "1.3") | |
725 | (source (origin | |
08bc7f26 RW |
726 | (method git-fetch) |
727 | (uri (git-reference | |
b0e7b699 | 728 | (url "https://github.com/williamh/dotconf") |
08bc7f26 RW |
729 | (commit (string-append "v" version)))) |
730 | (file-name (git-file-name name version)) | |
0905048a MB |
731 | (sha256 |
732 | (base32 | |
08bc7f26 | 733 | "1sc95hw5k2xagpafny0v35filmcn05k1ds5ghkldfpf6xw4hakp7")))) |
0905048a | 734 | (build-system gnu-build-system) |
7a52d0e0 | 735 | (arguments `(#:tests? #f)) ; FIXME maketest.sh does not work. |
0905048a MB |
736 | (native-inputs |
737 | `(("autoconf" ,autoconf) | |
738 | ("automake" ,automake) | |
739 | ("libtool" ,libtool))) | |
740 | (home-page "https://github.com/williamh/dotconf") | |
741 | (synopsis "Configuration file parser library") | |
742 | (description | |
743 | "C library for creating and parsing configuration files.") | |
744 | (license (list license:lgpl2.1 ; Main distribution. | |
745 | license:asl1.1)))) ; src/readdir.{c,h} | |
ff3f6766 | 746 | |
7f05493d MC |
747 | (define-public drm-tools |
748 | (package | |
749 | (name "drm-tools") | |
750 | (version "1.1.33") | |
751 | (source (origin | |
752 | (method url-fetch) | |
753 | (uri (string-append "mirror://sourceforge/drmtools/drm_tools-" | |
754 | version ".tar.gz")) | |
755 | (sha256 | |
756 | (base32 | |
757 | "187zbxw21zcg8gpyc13gxlycfw0n05a6rmqq6im5wr9zk1v1wj80")))) | |
758 | (build-system cmake-build-system) | |
759 | (arguments | |
760 | `(#:tests? #f ;the test suite fails | |
761 | #:phases (modify-phases %standard-phases | |
762 | (add-after 'unpack 'set-install-prefixes | |
763 | (lambda* (#:key outputs #:allow-other-keys) | |
764 | (let* ((out (assoc-ref outputs "out"))) | |
765 | (substitute* "CMakeLists.txt" | |
766 | (("tmp/testinstall") | |
767 | (string-drop out 1)) | |
768 | (("/man/man1") | |
769 | "/share/man/man1")) | |
770 | #t))) | |
771 | (add-after 'unpack 'adjust-test-paths | |
772 | (lambda _ | |
773 | (substitute* '("test_extract_increment.sh" | |
774 | "test_extract_features.sh" | |
775 | "test_extract_features2.sh" | |
776 | "test_dmath.sh") | |
777 | (("\\./extract") "extract") | |
778 | (("\\./dmath") "dmath") | |
779 | (("/usr/local/bin/") "") | |
780 | (("/bin/rm") "rm") | |
781 | (("/bin/cp") "cp")) | |
782 | #t)) | |
783 | (delete 'check) | |
784 | ;; The produced binaries are written directly to %output/bin. | |
785 | (delete 'install) | |
786 | (add-after 'build 'check | |
787 | (lambda* (#:key outputs tests? #:allow-other-keys) | |
788 | (when tests? | |
789 | (let* ((out (assoc-ref outputs "out")) | |
790 | (bin (string-append out "/bin"))) | |
791 | (setenv "PATH" (string-append bin ":" | |
792 | (getenv "PATH"))) | |
793 | (with-directory-excursion | |
794 | (format #f "../drm_tools-~a" ,version) | |
795 | (invoke "sh" "test_all.sh"))))))))) | |
796 | (native-inputs `(("which" ,which))) ;for tests | |
797 | (inputs `(("pcre" ,pcre))) | |
798 | (home-page "http://drmtools.sourceforge.net/") | |
799 | (synopsis "Utilities to manipulate text and binary files") | |
800 | (description "The drm_tools package contains the following commands: | |
801 | @table @command | |
802 | @item accudate | |
803 | An extended version of the \"date\" program that has sub-second accuracy. | |
804 | @item binformat | |
805 | Format complex binary data into text. | |
806 | @item binload | |
807 | Load data into a binary file using simple commands from the input. | |
808 | @item binorder | |
809 | Sort, merge, search, retrieve or generate test data consisting of fixed size | |
810 | binary records. | |
811 | @item binreplace | |
812 | Find or find/replace in binary files. | |
813 | @item binsplit | |
814 | Split test data consisting of fixed size binary records into one or more | |
815 | output streams. | |
816 | @item chardiff | |
817 | Find changes between two files at the character level. Unlike \"diff\", it | |
818 | lists just the characters that differ, so if the 40,000th character is | |
819 | different only that one character will be shown, not the entire line. | |
820 | @item columnadd | |
821 | Add columns of integers, decimals, and/or times. | |
822 | @item datasniffer | |
823 | A utility for formatting binary data dumps. | |
824 | @item dmath | |
825 | Double precision interactive command line math calculator. | |
826 | @item extract | |
827 | Extract and emit data from text files based on character or token position. | |
828 | @item execinput | |
829 | A utility that reads from STDIN and executes each line as a command in a | |
830 | sub-process. | |
831 | @item indexed_text | |
832 | A utility for rapid retrieval of text by line numbers, in any order, from a | |
833 | text file. | |
834 | @item mdump | |
835 | Format binary data. | |
836 | @item msgqueue | |
837 | Create message queues and send/receive messages. | |
838 | @item mbin | |
839 | @itemx mbout | |
840 | Multiple buffer in and out. Used for buffering a lot of data between a slow | |
841 | device and a fast device. Mostly for buffering streaming tape drives for use | |
842 | with slower network connections, so that streaming is maintained as much as | |
843 | possible to minimize wear on the tape device. | |
844 | @item pockmark | |
845 | Corrupt data streams - useful for testing error correction and data recovery. | |
846 | @item tarsieve | |
847 | Filter, list, or split a tar file. | |
848 | @end table") | |
849 | (license license:gpl2+))) | |
850 | ||
ff3f6766 RW |
851 | (define-public java-rsyntaxtextarea |
852 | (package | |
853 | (name "java-rsyntaxtextarea") | |
854 | (version "2.6.1") | |
855 | (source (origin | |
3d0c0ae6 EF |
856 | (method git-fetch) |
857 | (uri (git-reference | |
858 | (url "https://github.com/bobbylight/RSyntaxTextArea") | |
859 | (commit version))) | |
860 | (file-name (git-file-name name version)) | |
ff3f6766 RW |
861 | (sha256 |
862 | (base32 | |
3d0c0ae6 | 863 | "0dyflzvxq2wvs0rgqfyi5yzzrb6r4bzw2dm8cl304dakxk38ddys")))) |
ff3f6766 RW |
864 | (build-system ant-build-system) |
865 | (arguments | |
866 | `(;; FIXME: some tests fail because locale resources cannot be found. | |
867 | ;; Even when I add them to the class path, | |
868 | ;; RSyntaxTextAreaEditorKitDumbCompleteWordActionTest fails. | |
869 | #:tests? #f | |
870 | #:jar-name "rsyntaxtextarea.jar")) | |
871 | (native-inputs | |
872 | `(("java-junit" ,java-junit) | |
873 | ("java-hamcrest-core" ,java-hamcrest-core))) | |
874 | (home-page "https://bobbylight.github.io/RSyntaxTextArea/") | |
875 | (synopsis "Syntax highlighting text component for Java Swing") | |
876 | (description "RSyntaxTextArea is a syntax highlighting, code folding text | |
877 | component for Java Swing. It extends @code{JTextComponent} so it integrates | |
878 | completely with the standard @code{javax.swing.text} package. It is fast and | |
879 | efficient, and can be used in any application that needs to edit or view | |
880 | source code.") | |
881 | (license license:bsd-3))) | |
03639d03 RW |
882 | |
883 | ;; We use the sources from git instead of the tarball from pypi, because the | |
884 | ;; latter does not include the Cython source file from which bycython.cpp is | |
885 | ;; generated. | |
886 | (define-public python-editdistance | |
887 | (let ((commit "3ea84a7dd3258c76aa3be851ef3d50e59c886846") | |
888 | (revision "1")) | |
889 | (package | |
890 | (name "python-editdistance") | |
891 | (version (string-append "0.3.1-" revision "." (string-take commit 7))) | |
892 | (source | |
893 | (origin | |
894 | (method git-fetch) | |
895 | (uri (git-reference | |
b0e7b699 | 896 | (url "https://github.com/aflc/editdistance") |
03639d03 | 897 | (commit commit))) |
54335b59 | 898 | (file-name (git-file-name name version)) |
03639d03 RW |
899 | (sha256 |
900 | (base32 | |
901 | "1l43svsv12crvzphrgi6x435z6xg8m086c64armp8wzb4l8ccm7g")))) | |
902 | (build-system python-build-system) | |
903 | (arguments | |
904 | `(#:phases | |
905 | (modify-phases %standard-phases | |
906 | (add-after 'unpack 'build-cython-code | |
907 | (lambda _ | |
908 | (with-directory-excursion "editdistance" | |
909 | (delete-file "bycython.cpp") | |
54d5b27e | 910 | (invoke "cython" "--cplus" "bycython.pyx"))))))) |
03639d03 RW |
911 | (native-inputs |
912 | `(("python-cython" ,python-cython))) | |
913 | (home-page "https://www.github.com/aflc/editdistance") | |
914 | (synopsis "Fast implementation of the edit distance (Levenshtein distance)") | |
915 | (description | |
916 | "This library simply implements Levenshtein distance algorithm with C++ | |
917 | and Cython.") | |
918 | (license license:expat)))) | |
03f801aa CB |
919 | |
920 | (define-public go-github.com-mattn-go-runewidth | |
d050000a BL |
921 | (let ((commit "703b5e6b11ae25aeb2af9ebb5d5fdf8fa2575211") |
922 | (version "0.0.4") | |
923 | (revision "1")) | |
924 | (package | |
925 | (name "go-github.com-mattn-go-runewidth") | |
926 | (version (git-version version revision commit)) | |
927 | (source | |
928 | (origin | |
929 | (method git-fetch) | |
930 | (uri (git-reference | |
931 | (url "https://github.com/mattn/runewidth") | |
932 | (commit commit))) | |
933 | (file-name (git-file-name name version)) | |
934 | (sha256 | |
935 | (base32 | |
936 | "0znpyz71gajx3g0j2zp63nhjj2c07g16885vxv4ykwnrfmzbgk4w")))) | |
937 | (build-system go-build-system) | |
938 | (arguments | |
939 | '(#:import-path "github.com/mattn/go-runewidth")) | |
940 | (synopsis "@code{runewidth} provides Go functions to work with string widths") | |
941 | (description | |
942 | "The @code{runewidth} library provides Go functions for padding, | |
943 | measuring and checking the width of strings, with support for East Asian | |
944 | text.") | |
945 | (home-page "https://github.com/mattn/runewidth") | |
946 | (license license:expat)))) | |
74fa77e9 PN |
947 | |
948 | (define-public docx2txt | |
949 | (package | |
950 | (name "docx2txt") | |
951 | (version "1.4") | |
952 | (source (origin | |
953 | (method url-fetch) | |
954 | (uri (string-append | |
955 | "mirror://sourceforge/docx2txt/docx2txt/v" | |
956 | version "/docx2txt-" version ".tgz")) | |
957 | (sha256 | |
958 | (base32 | |
959 | "06vdikjvpj6qdb41d8wzfnyj44jpnknmlgbhbr1w215420lpb5xj")))) | |
960 | (build-system gnu-build-system) | |
961 | (inputs | |
962 | `(("unzip" ,unzip) | |
963 | ("perl" ,perl))) | |
964 | (arguments | |
965 | `(#:tests? #f ; No tests. | |
966 | #:make-flags (list (string-append "BINDIR=" | |
967 | (assoc-ref %outputs "out") "/bin") | |
968 | (string-append "CONFIGDIR=" | |
969 | (assoc-ref %outputs "out") "/etc") | |
970 | ;; Makefile seems to be a bit dumb at guessing. | |
971 | (string-append "INSTALL=install") | |
972 | (string-append "PERL=perl")) | |
973 | #:phases | |
974 | (modify-phases %standard-phases | |
975 | (delete 'configure) | |
976 | (add-after 'install 'fix-install | |
977 | (lambda* (#:key outputs inputs #:allow-other-keys) | |
978 | (let* ((out (assoc-ref outputs "out")) | |
979 | (bin (string-append out "/bin")) | |
980 | (config (string-append out "/etc/docx2txt.config")) | |
981 | (unzip (assoc-ref inputs "unzip"))) | |
982 | ;; According to INSTALL, the .sh wrapper can be skipped. | |
983 | (delete-file (string-append bin "/docx2txt.sh")) | |
984 | (rename-file (string-append bin "/docx2txt.pl") | |
985 | (string-append bin "/docx2txt")) | |
986 | (substitute* config | |
987 | (("config_unzip => '/usr/bin/unzip',") | |
988 | (string-append "config_unzip => '" | |
989 | unzip | |
990 | "/bin/unzip',"))) | |
991 | ;; Makefile is wrong. | |
f6ccb95a TGR |
992 | (chmod config #o644) |
993 | #t)))))) | |
74fa77e9 PN |
994 | (synopsis "Recover text from @file{.docx} files, with good formatting") |
995 | (description | |
996 | "@command{docx2txt} is a Perl based command line utility to convert | |
997 | Microsoft Office @file{.docx} documents to equivalent text documents. Latest | |
998 | version supports following features during text extraction. | |
999 | ||
1000 | @itemize | |
1001 | @item Character conversions; currency characters are converted to respective | |
1002 | names like Euro. | |
1003 | @item Capitalisation of text blocks. | |
1004 | @item Center and right justification of text fitting in a line of | |
1005 | (configurable) 80 columns. | |
1006 | @item Horizontal ruler, line breaks, paragraphs separation, tabs. | |
1007 | @item Indicating hyperlinked text along with the hyperlink (configurable). | |
1008 | @item Handling (bullet, decimal, letter, roman) lists along with (attempt at) | |
1009 | indentation. | |
1010 | @end itemize\n") | |
1011 | (home-page "http://docx2txt.sourceforge.net") | |
1012 | (license license:gpl3+))) | |
1a0363cf | 1013 | |
bc5aa386 VC |
1014 | (define-public odt2txt |
1015 | (package | |
1016 | (name "odt2txt") | |
1017 | (version "0.5") | |
1018 | (source | |
1019 | (origin | |
1020 | (method git-fetch) | |
1021 | (uri (git-reference | |
1022 | (url "https://github.com/dstosberg/odt2txt/") | |
1023 | (commit (string-append "v" version)))) | |
1024 | (file-name (git-file-name name version)) | |
1025 | (sha256 | |
1026 | (base32 | |
1027 | "0im3kzvhxkjlx57w6h13mc9584c74ma1dyymgvpq2y61av3gc35v")))) | |
1028 | (build-system gnu-build-system) | |
1029 | (arguments | |
1030 | `(#:tests? #f ; no make check | |
1031 | #:make-flags (list "CC=gcc" | |
1032 | (string-append "DESTDIR=" (assoc-ref %outputs "out"))) | |
1033 | #:phases | |
1034 | (modify-phases %standard-phases | |
1035 | ;; no configure script | |
1036 | (delete 'configure)))) | |
1037 | (inputs | |
1038 | `(("zlib" ,zlib))) | |
1039 | (home-page "https://github.com/dstosberg/odt2txt/") | |
1040 | (synopsis "Converter from OpenDocument Text to plain text") | |
1041 | (description "odt2txt is a command-line tool which extracts the text out | |
1042 | of OpenDocument Texts, as produced by OpenOffice.org, KOffice, StarOffice and | |
1043 | others. | |
1044 | ||
1045 | odt2txt can also extract text from some file formats similar to OpenDocument | |
1046 | Text, such as OpenOffice.org XML (*.sxw), which was used by OpenOffice.org | |
1047 | version 1.x and older StarOffice versions. To a lesser extent, odt2txt may be | |
1048 | useful to extract content from OpenDocument spreadsheets (*.ods) and | |
1049 | OpenDocument presentations (*.odp).") | |
1050 | (license license:gpl2))) | |
1051 | ||
1a0363cf MP |
1052 | (define-public opencc |
1053 | (package | |
1054 | (name "opencc") | |
2ee7524c | 1055 | (version "1.1.1") |
1a0363cf MP |
1056 | (source |
1057 | (origin | |
bf5af934 EF |
1058 | (method git-fetch) |
1059 | (uri (git-reference | |
2ee7524c ZZ |
1060 | (url "https://github.com/BYVoid/OpenCC") |
1061 | (commit (string-append "ver." version)))) | |
bf5af934 | 1062 | (file-name (git-file-name name version)) |
1a0363cf | 1063 | (sha256 |
1506d491 | 1064 | (base32 |
2ee7524c | 1065 | "07y0pvxjlmpcnym229j87qcfwnfm7vi10dad0a20xy6as55a9j3d")) |
1506d491 EF |
1066 | (modules '((guix build utils))) |
1067 | (snippet | |
1068 | '(begin | |
1069 | ;; TODO: Unbundle tclap, darts-clone, gtest | |
2ee7524c | 1070 | (delete-file-recursively "deps/rapidjson-1.1.0") #t)))) |
1a0363cf | 1071 | (build-system cmake-build-system) |
1506d491 | 1072 | (arguments |
2ee7524c ZZ |
1073 | ;; Required to locate the install script properly. |
1074 | `(#:out-of-source? #f | |
1075 | #:parallel-build? #f ;occasionally failed. | |
1076 | #:imported-modules | |
1077 | (,@%cmake-build-system-modules | |
1078 | (guix build python-build-system)) | |
1079 | #:modules ((guix build cmake-build-system) | |
1080 | ((guix build python-build-system) #:prefix python:) | |
1081 | (guix build utils)) | |
1082 | #:phases | |
1506d491 | 1083 | (modify-phases %standard-phases |
2ee7524c ZZ |
1084 | (add-after 'unpack 'prevent-rebuild-during-installation |
1085 | (lambda _ | |
1086 | (substitute* "python/setup.py" | |
1087 | (("'build_py': BuildPyCommand,") "")) | |
1088 | #t)) | |
1506d491 EF |
1089 | (add-after 'unpack 'patch-3rd-party-references |
1090 | (lambda* (#:key inputs #:allow-other-keys) | |
1091 | (let ((rapidjson (assoc-ref inputs "rapidjson"))) | |
1092 | (substitute* "src/CMakeLists.txt" | |
2ee7524c | 1093 | (("../deps/rapidjson-1.1.0") |
1506d491 | 1094 | (string-append rapidjson "/include/rapidjson"))) |
2ee7524c ZZ |
1095 | #t))) |
1096 | (add-before 'configure 'patch-python-binding-installation | |
1097 | (lambda* (#:key outputs inputs #:allow-other-keys) | |
1098 | (let ((out (assoc-ref outputs "out"))) | |
1099 | (substitute* "python/opencc/__init__.py" | |
1100 | (("(_libopenccfile =).*$" _ prefix) | |
1101 | (format #f "~a os.path.join('~a/lib', _libopenccfilename)~%" | |
1102 | prefix out)) | |
1103 | (("(_opencc_share_dir =).*$" _ prefix) | |
1104 | (format #f "~a '~a/share/opencc'~%" prefix out)))) | |
1105 | #t)) | |
1106 | (add-after 'install 'install-python-binding | |
1107 | (lambda* (#:key outputs inputs #:allow-other-keys) | |
1108 | (let* ((out (assoc-ref outputs "out")) | |
1109 | (dist (string-append | |
1110 | out "/lib/python" | |
1111 | ,(version-major+minor (package-version python)) | |
1112 | "/site-packages"))) | |
1113 | (chdir "python") | |
1114 | (mkdir-p dist) | |
1115 | (setenv "PYTHONPATH" | |
1116 | (string-append dist ":" (getenv "PYTHONPATH"))) | |
1117 | (invoke "python" "setup.py" "install" | |
1118 | "--root=/" "--single-version-externally-managed" | |
1119 | (string-append "--prefix=" out)) | |
1120 | #t))) | |
1121 | (add-before 'install-python-binding 'enable-bytecode-determinism | |
1122 | (assoc-ref python:%standard-phases 'enable-bytecode-determinism))))) | |
1a0363cf | 1123 | (native-inputs |
1506d491 | 1124 | `(("python" ,python-wrapper) |
2ee7524c ZZ |
1125 | ("rapidjson" ,rapidjson) |
1126 | ("python-setuptools" ,python-setuptools) | |
1127 | ("python-wheel" ,python-wheel))) | |
1a0363cf MP |
1128 | (home-page "https://github.com/BYVoid/OpenCC") |
1129 | (synopsis "Convert between Traditional Chinese and Simplified Chinese") | |
1130 | (description "Open Chinese Convert (OpenCC) converts between Traditional | |
1131 | Chinese and Simplified Chinese, supporting character-level conversion, | |
1132 | phrase-level conversion, variant conversion, and regional idioms among | |
1133 | Mainland China, Taiwan, and Hong-Kong.") | |
1134 | (license license:asl2.0))) | |
933ac939 YA |
1135 | |
1136 | (define-public nkf | |
1137 | (let ((commit "08043eadf4abdddcf277842217e3c77a24740dc2") | |
1138 | (revision "1")) | |
1139 | (package | |
1140 | (name "nkf") | |
1141 | ;; The commits corresponding to specific versions are published | |
1142 | ;; here: | |
1143 | ;; https://ja.osdn.net/projects/nkf/scm/git/nkf/ | |
1144 | (version "2.1.5") | |
1145 | (source (origin | |
1146 | (method git-fetch) | |
1147 | (uri (git-reference | |
b0e7b699 | 1148 | (url "https://github.com/nurse/nkf") |
933ac939 YA |
1149 | (commit commit))) |
1150 | (file-name (git-file-name name version)) | |
1151 | (sha256 | |
1152 | (base32 | |
1153 | "0anw0knr1iy4p9w3d3b3pbwzh1c43p1i2q4c28kw9zviw8kx2rly")))) | |
1154 | (build-system gnu-build-system) | |
1155 | (arguments | |
1156 | `(#:tests? #f ; test for perl module | |
1157 | #:make-flags (list "CC=gcc" "CFLAGS=-O2 -Wall -pedantic" | |
1158 | (string-append "prefix=" %output) | |
1159 | "MKDIR=mkdir -p") | |
1160 | #:phases | |
1161 | (modify-phases %standard-phases | |
1162 | (delete 'configure)))) ; No ./configure script | |
1163 | (home-page "https://ja.osdn.net/projects/nkf/") | |
1164 | (synopsis "Network Kanji Filter") | |
1165 | (description "Nkf is yet another kanji code converter among networks, | |
1166 | hosts and terminals. It converts input kanji code to designated kanji code | |
1167 | such as ISO-2022-JP, Shift_JIS, EUC-JP, UTF-8, UTF-16 or UTF-32.") | |
1168 | (license license:zlib)))) | |
74247b80 NG |
1169 | |
1170 | (define-public python-pandocfilters | |
1171 | (package | |
1172 | (name "python-pandocfilters") | |
66ddd966 | 1173 | (version "1.4.3") |
74247b80 NG |
1174 | (source |
1175 | (origin | |
1176 | (method url-fetch) | |
1177 | (uri (pypi-uri "pandocfilters" version)) | |
1178 | (sha256 | |
66ddd966 | 1179 | (base32 "1sq675dg4barb5949xxz4d5gk2ly524hi1p1xgwb3d1l0nsznqxw")))) |
74247b80 NG |
1180 | (build-system python-build-system) |
1181 | (home-page "https://github.com/jgm/pandocfilters") | |
1182 | (synopsis "Python module for writing Pandoc filters") | |
1183 | (description "Pandoc is a powerful utility to transform various | |
1184 | input formats into a wide range of output formats. To alter the | |
1185 | exported output document, Pandoc allows the usage of filters, which | |
1186 | are pipes that read a JSON serialization of the Pandoc AST from stdin, | |
1187 | transform it in some way, and write it to stdout. It allows therefore | |
1188 | to alter the processing of Pandoc's supported input formats, for | |
1189 | instance one can add new syntax elements to markdown, etc. | |
1190 | ||
1191 | This package provides Python bindings.") | |
1192 | (license license:bsd-3))) | |
24719e8a WŻ |
1193 | |
1194 | (define-public aha | |
1195 | (package | |
1196 | (name "aha") | |
7d98468c | 1197 | (version "0.5.1") |
24719e8a WŻ |
1198 | (source |
1199 | (origin | |
1200 | (method git-fetch) | |
1201 | (uri (git-reference | |
1202 | (url "https://github.com/theZiz/aha") | |
1203 | (commit version))) | |
1204 | (sha256 | |
7d98468c | 1205 | (base32 "1gywad0rvvz3c5balz8cxsnx0562hj2ngzqyr8zsy2mb4pn0lpgv")) |
24719e8a WŻ |
1206 | (file-name (git-file-name name version)))) |
1207 | (build-system gnu-build-system) | |
1208 | (arguments | |
3a176be0 | 1209 | `(#:phases |
24719e8a WŻ |
1210 | (modify-phases %standard-phases |
1211 | (delete 'configure)) | |
3a176be0 | 1212 | #:make-flags (list (string-append "CC=" ,(cc-for-target)) |
24719e8a WŻ |
1213 | (string-append "PREFIX=" |
1214 | (assoc-ref %outputs "out"))) | |
7d98468c | 1215 | #:tests? #f)) ; no test suite |
24719e8a WŻ |
1216 | (home-page "https://github.com/theZiz/aha") |
1217 | (synopsis "Converts terminal escape sequences to HTML") | |
1218 | (description "@command{aha} (Ansi Html Adapter) converts ANSI escape sequences | |
1219 | of a Unix terminal to HTML code.") | |
1220 | (license (list license:lgpl2.0+ license:mpl1.1)))) | |
28e20c53 GA |
1221 | |
1222 | (define-public go-github-com-errata-ai-vale | |
1223 | (package | |
1224 | (name "go-github-com-errata-ai-vale") | |
1225 | (version "2.4.0") | |
1226 | (source | |
1227 | (origin | |
1228 | (method git-fetch) | |
1229 | (uri (git-reference | |
1230 | (url "https://github.com/errata-ai/vale") | |
1231 | (commit (string-append "v" version)))) | |
1232 | (sha256 | |
1233 | (base32 "0d07fwha2220m8j24h527xl0gnl3svvyaywflgk5292d6g49ach2")) | |
1234 | (file-name (git-file-name name version)))) | |
1235 | (build-system go-build-system) | |
1236 | (native-inputs | |
1237 | `(("github.com/sp13/afero" ,go-github-com-spf13-afero) | |
1238 | ("github.com/urfave/cli" ,go-github-com-urfave-cli) | |
1239 | ("github.com/olekukonko/tablewriter" ,go-github-com-olekukonko-tablewriter) | |
1240 | ("github.com/mitchellh/mapstructure" ,go-github-com-mitchellh-mapstructure))) | |
1241 | (arguments | |
1242 | `(#:import-path "github.com/errata-ai/vale" | |
1243 | #:install-source? #f)) | |
1244 | (home-page "https://github.com/errata-ai/vale") | |
1245 | (synopsis "Fully customizable syntax-aware linter that focuses on your style") | |
1246 | (description | |
1247 | "Vale is a fully extensible linter that focuses on your own writing style | |
1248 | by making use of rules in individual YAML files. It is syntax-aware on markup | |
1249 | languages such as HTML, Markdown, Asciidoc, and reStructuredText. The community | |
1250 | around it also has a list of style guides implemented with Vale in | |
1251 | @url{https://github.com/errata-ai/styles, their styles repo}.") | |
1252 | (license license:expat))) |