gnu: libdvdcss: Update to 1.4.3.
[jackhill/guix/guix.git] / gnu / packages / textutils.scm
CommitLineData
7939e70a
TUBK
1;;; GNU Guix --- Functional package management for GNU
2;;; Copyright © 2015 Taylan Ulrich Bayırlı/Kammer <taylanbayirli@gmail.com>
73092df4 3;;; Copyright © 2015, 2016, 2017, 2018, 2019, 2020 Ricardo Wurmus <rekado@elephly.net>
a64a8c46 4;;; Copyright © 2015, 2016 Ben Woodcroft <donttrustben@gmail.com>
e522d840 5;;; Copyright © 2015 Roel Janssen <roel@gnu.org>
2d8cf0b3 6;;; Copyright © 2016 Jelle Licht <jlicht@fsfe.org>
f17a5447 7;;; Copyright © 2016 Alex Griffin <a@ajgrf.com>
3d0c0ae6 8;;; Copyright © 2016, 2018, 2019, 2020 Efraim Flashner <efraim@flashner.co.il>
3c986a7d 9;;; Copyright © 2016 Nikita <nikita@n0.is>
07d6003b 10;;; Copyright © 2016, 2020 Marius Bakke <marius@gnu.org>
5353cea0 11;;; Copyright © 2017 Eric Bavier <bavier@member.fsf.org>
7fdca77e 12;;; Copyright © 2017 Rene Saavedra <rennes@openmailbox.org>
7577ab55 13;;; Copyright © 2017,2019 Hartmut Goebel <h.goebel@crazy-compilers.com>
3c8ba11a 14;;; Copyright © 2017 Kei Kebreau <kkebreau@posteo.net>
bfcdf887 15;;; Copyright © 2017 Alex Vong <alexvong1995@gmail.com>
9a71213e 16;;; Copyright © 2018, 2019, 2020 Tobias Geerinckx-Rice <me@tobias.gr>
4715f92e 17;;; Copyright © 2018 Pierre Neidhardt <mail@ambrevar.xyz>
1a0363cf 18;;; Copyright © 2018 Meiyo Peng <meiyo.peng@gmail.com>
933ac939 19;;; Copyright © 2019 Yoshinori Arai <kumagusu08@gmail.com>
f9488b70 20;;; Copyright © 2019 Mădălin Ionel Patrașcu <madalinionel.patrascu@mdc-berlin.de>
24719e8a 21;;; Copyright © 2019 Wiktor Żelazny <wzelazny@vurv.cz>
7f05493d 22;;; Copyright © 2020 Maxim Cournoyer <maxim.cournoyer@gmail.com>
7939e70a
TUBK
23;;;
24;;; This file is part of GNU Guix.
25;;;
26;;; GNU Guix is free software; you can redistribute it and/or modify it
27;;; under the terms of the GNU General Public License as published by
28;;; the Free Software Foundation; either version 3 of the License, or (at
29;;; your option) any later version.
30;;;
31;;; GNU Guix is distributed in the hope that it will be useful, but
32;;; WITHOUT ANY WARRANTY; without even the implied warranty of
33;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34;;; GNU General Public License for more details.
35;;;
36;;; You should have received a copy of the GNU General Public License
37;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
38
39(define-module (gnu packages textutils)
40 #:use-module ((guix licenses) #:prefix license:)
41 #:use-module (guix packages)
42 #:use-module (guix download)
43 #:use-module (guix git-download)
ff3f6766 44 #:use-module (guix build-system ant)
7939e70a 45 #:use-module (guix build-system gnu)
03f801aa 46 #:use-module (guix build-system go)
c8d969b5 47 #:use-module (guix build-system cmake)
03639d03 48 #:use-module (guix build-system python)
3a176be0 49 #:use-module (guix utils)
9116f126 50 #:use-module (gnu packages)
f571e1c3 51 #:use-module (gnu packages autotools)
a6baae74 52 #:use-module (gnu packages base)
148585c2 53 #:use-module (gnu packages compression)
f9488b70 54 #:use-module (gnu packages gcc)
28e20c53 55 #:use-module (gnu packages golang)
2f6e988d 56 #:use-module (gnu packages gettext)
ff3f6766 57 #:use-module (gnu packages java)
8888fe82 58 #:use-module (gnu packages ncurses)
7f05493d 59 #:use-module (gnu packages pcre)
8888fe82 60 #:use-module (gnu packages perl)
61 #:use-module (gnu packages pkg-config)
f17a5447 62 #:use-module (gnu packages python)
2ee7524c 63 #:use-module (gnu packages python-build)
44d10b1f 64 #:use-module (gnu packages python-xyz)
8888fe82 65 #:use-module (gnu packages readline)
1506d491
EF
66 #:use-module (gnu packages slang)
67 #:use-module (gnu packages web))
7939e70a 68
2f6e988d
KK
69(define-public dos2unix
70 (package
71 (name "dos2unix")
e2d33f9e 72 (version "7.4.2")
2f6e988d
KK
73 (source
74 (origin
75 (method url-fetch)
96b96ec6
TGR
76 (uri (string-append "https://waterlan.home.xs4all.nl/dos2unix/"
77 "dos2unix-" version ".tar.gz"))
2f6e988d 78 (sha256
e2d33f9e 79 (base32 "00dfsf4rfyjb5j12gan8xjiirm0asshdz6dmd3l34a7ays6wadb0"))))
2f6e988d
KK
80 (build-system gnu-build-system)
81 (arguments
b4d7d41b
TGR
82 `(#:make-flags
83 (list (string-append "CC=" ,(cc-for-target))
84 (string-append "prefix=" (assoc-ref %outputs "out")))
2f6e988d
KK
85 #:phases
86 (modify-phases %standard-phases
87 (delete 'configure)))) ; no configure script
88 (native-inputs
89 `(("gettext" ,gettext-minimal)
90 ("perl" ,perl)))
91 (home-page "https://waterlan.home.xs4all.nl/dos2unix.html")
92 (synopsis "DOS/Mac to Unix and vice versa text file format converter")
93 (description
94 "dos2unix is a tool to convert line breaks in a text file from Unix format
95to DOS format and vice versa.")
96 (license license:bsd-2)))
97
7939e70a
TUBK
98(define-public recode
99 (package
100 (name "recode")
f4a1de00 101 (version "3.7.6")
7939e70a
TUBK
102 (source
103 (origin
20a792b5
EF
104 (method url-fetch)
105 (uri (string-append "https://github.com/rrthomas/recode/releases/"
6f2f7bc8 106 "download/v" version "/recode-" version ".tar.gz"))
7939e70a 107 (sha256
f4a1de00 108 (base32 "0m59sd1ca0zw1aydpc3m8sw03nc885knmccqryg7byzmqs585ia6"))))
7939e70a 109 (build-system gnu-build-system)
20a792b5 110 (native-inputs
5ca1900b
TGR
111 `(("python" ,python)
112 ("python-cython" ,python-cython)))
20a792b5 113 (home-page "https://github.com/rrthomas/recode")
7939e70a
TUBK
114 (synopsis "Text encoding converter")
115 (description "The Recode library converts files between character sets and
116usages. It recognises or produces over 200 different character sets (or about
117300 if combined with an iconv library) and transliterates files between almost
118any pair. When exact transliteration are not possible, it gets rid of
119offending characters or falls back on approximations. The recode program is a
120handy front-end to the library.")
20a792b5 121 (license license:gpl3+)))
688fe865
TUBK
122
123(define-public enca
124 (package
125 (name "enca")
1462b456 126 (version "1.19")
688fe865
TUBK
127 (source
128 (origin
30024a1e
EF
129 (method git-fetch)
130 (uri (git-reference
131 (url "https://github.com/nijel/enca")
132 (commit version)))
133 (file-name (git-file-name name version))
688fe865 134 (sha256
30024a1e 135 (base32 "19q7cwwxmmk5j9438bsqdpjvdjawsd3zmw1zyqgi7s4m0rasr3ah"))))
688fe865 136 (build-system gnu-build-system)
1462b456
EF
137 ;; enca-1.19 tests fail with recent recode.
138 ;(inputs `(("recode" ,recode)))
688fe865
TUBK
139 (home-page "https://github.com/nijel/enca")
140 (synopsis "Text encoding detection tool")
141 (description "Enca (Extremely Naive Charset Analyser) consists of libenca,
142an encoding detection library, and enca, a command line frontend, integrating
143libenca and several charset conversion libraries and tools.")
144 (license license:gpl2)))
cd15ad82
RW
145
146(define-public utf8proc
147 (package
148 (name "utf8proc")
cbae2049 149 (version "2.5.0")
cd15ad82
RW
150 (source
151 (origin
39ff1d89
MB
152 (method git-fetch)
153 (uri (git-reference
154 (url "https://github.com/JuliaStrings/utf8proc")
155 (commit (string-append "v" version))))
156 (file-name (git-file-name name version))
cd15ad82 157 (sha256
cbae2049 158 (base32 "1xlkazhdnja4lksn5c9nf4bln5gjqa35a8gwlam5r0728w0h83qq"))))
cd15ad82 159 (build-system gnu-build-system)
63f2de03 160 (native-inputs
cbae2049 161 (let ((UNICODE_VERSION "13.0.0")) ; defined in data/Makefile
63f2de03
TGR
162 ;; Test data that is otherwise downloaded with curl.
163 `(("NormalizationTest.txt"
164 ,(origin
165 (method url-fetch)
166 (uri (string-append "https://www.unicode.org/Public/"
167 UNICODE_VERSION "/ucd/NormalizationTest.txt"))
168 (sha256
cbae2049 169 (base32 "07g0ya4f6zfzvpp24ccxkb2yq568kh83gls85rjl950nv5fya3nn"))))
63f2de03
TGR
170 ("GraphemeBreakTest.txt"
171 ,(origin
172 (method url-fetch)
173 (uri (string-append "https://www.unicode.org/Public/"
174 UNICODE_VERSION
175 "/ucd/auxiliary/GraphemeBreakTest.txt"))
176 (sha256
cbae2049 177 (base32 "07f8rrvcsq4pibdz6zxggxy8w7zjjqyw2ggclqlhalyv45yv7prj"))))
99293380 178
63f2de03
TGR
179 ;; For tests.
180 ("perl" ,perl))))
cd15ad82 181 (arguments
5353cea0 182 '(#:make-flags (list "CC=gcc"
f1e3a8ae 183 (string-append "prefix=" (assoc-ref %outputs "out")))
cd15ad82 184 #:phases
f1e3a8ae 185 (modify-phases %standard-phases
5353cea0
EB
186 (delete 'configure)
187 (add-before 'check 'check-data
188 (lambda* (#:key inputs #:allow-other-keys)
189 (for-each (lambda (i)
190 (copy-file (assoc-ref inputs i)
191 (string-append "data/" i)))
192 '("NormalizationTest.txt" "GraphemeBreakTest.txt"))
193 (substitute* "data/GraphemeBreakTest.txt"
194 (("÷") "/")
99293380
MB
195 (("×") "+"))
196 #t)))))
22cc598b 197 (home-page "https://juliastrings.github.io/utf8proc/")
cd15ad82
RW
198 (synopsis "C library for processing UTF-8 Unicode data")
199 (description "utf8proc is a small C library that provides Unicode
200normalization, case-folding, and other operations for data in the UTF-8
9b72ce60 201encoding, supporting Unicode version 9.0.0.")
cd15ad82 202 (license license:expat)))
f571e1c3 203
205df739
TGR
204(define-public libconfuse
205 (package
206 (name "libconfuse")
07d6003b 207 (version "3.3")
205df739
TGR
208 (source
209 (origin
210 (method url-fetch)
211 (uri (string-append "https://github.com/martinh/libconfuse/"
212 "releases/download/v" version
213 "/confuse-" version ".tar.xz"))
214 (sha256
07d6003b 215 (base32 "043hqqykpprgrkw9s2hbdlxr308a7yxwsgxj4m8aadg1401hmm8x"))))
205df739 216 (build-system gnu-build-system)
e36c4d0b
MB
217 (arguments
218 '(#:configure-flags '("--disable-static")))
205df739
TGR
219 (home-page "https://github.com/martinh/libconfuse")
220 (synopsis "Configuration file parser library")
221 (description "libconfuse is a configuration file parser library. It
222supports sections and (lists of) values (strings, integers, floats, booleans
223or other sections), as well as some other features (such as
224single/double-quoted strings, environment variable expansion, functions and
225nested include statements).")
226 (license license:isc)))
227
f571e1c3
RW
228(define-public libgtextutils
229 (package
230 (name "libgtextutils")
231 (version "0.7")
232 (source
233 (origin
234 (method url-fetch)
235 (uri (string-append
236 "https://github.com/agordon/libgtextutils/releases/download/"
237 version "/libgtextutils-" version ".tar.gz"))
238 (sha256
239 (base32 "0jiybkb2z58wa2msvllnphr4js2hvjvh988pavb3mzkgr6ihwbkr"))))
240 (build-system gnu-build-system)
e005ddc2 241 (arguments
73092df4 242 `(#:phases
e005ddc2
EF
243 (modify-phases %standard-phases
244 (replace 'bootstrap
73092df4
RW
245 (lambda _ (invoke "sh" "reconf")))
246 (add-after 'set-paths 'hide-default-gcc
247 (lambda* (#:key inputs #:allow-other-keys)
248 (let ((gcc (assoc-ref inputs "gcc")))
249 ;; Remove the default GCC from CPLUS_INCLUDE_PATH to prevent
250 ;; conflicts with the GCC 5 input.
251 (setenv "CPLUS_INCLUDE_PATH"
252 (string-join
253 (delete (string-append gcc "/include/c++")
254 (string-split (getenv "CPLUS_INCLUDE_PATH") #\:))
255 ":"))
256 #t))))))
f571e1c3
RW
257 (native-inputs
258 `(("autoconf" ,autoconf)
259 ("automake" ,automake)
73092df4 260 ("gcc@5" ,gcc-5) ; doesn't build with later versions
f571e1c3
RW
261 ("libtool" ,libtool)))
262 (home-page "https://github.com/agordon/libgtextutils")
263 (synopsis "Gordon's text utils library")
264 (description
265 "libgtextutils is a text utilities library used by the fastx toolkit from
266the Hannon Lab.")
267 (license license:agpl3+)))
aae2b445
BW
268
269(define-public cityhash
a64a8c46 270 (let ((commit "8af9b8c"))
aae2b445
BW
271 (package
272 (name "cityhash")
a64a8c46 273 (version (string-append "1.1-2." commit))
aae2b445
BW
274 (source (origin
275 (method git-fetch)
276 (uri (git-reference
b0e7b699 277 (url "https://github.com/google/cityhash")
aae2b445
BW
278 (commit commit)))
279 (file-name (string-append name "-" version ".tar.gz"))
280 (sha256
281 (base32
282 "0n6skf5dv8yfl1ckax8dqhvsbslkwc9158zf2ims0xqdvzsahbi6"))))
a64a8c46
BW
283 (build-system gnu-build-system)
284 (arguments
285 '(#:make-flags (list "CXXFLAGS=-g -O3")
286 #:phases
287 (modify-phases %standard-phases
288 ;; citycrc is not installed by default but is used by some
289 ;; programs.
290 (add-after 'install 'install-citycrc
291 (lambda* (#:key outputs #:allow-other-keys)
292 (let* ((out (assoc-ref outputs "out"))
293 (include (string-append out "/include")))
294 (install-file "src/citycrc.h" include))
295 #t)))))
296 (home-page "https://github.com/google/cityhash")
297 (synopsis "C++ hash functions for strings")
298 (description
299 "CityHash provides hash functions for strings. The functions mix the
aae2b445 300input bits thoroughly but are not suitable for cryptography.")
a64a8c46 301 (license license:expat))))
e522d840 302
9128db21
RW
303(define-public ustr
304 (package
305 (name "ustr")
306 (version "1.0.4")
307 (source (origin
308 (method url-fetch)
309 (uri (string-append "http://www.and.org/ustr/" version
310 "/ustr-" version ".tar.bz2"))
311 (sha256
312 (base32
7fdca77e
RS
313 "1i623ygdj7rkizj7985q9d6vj5amwg686aqb5j3ixpkqkyp6xbrx"))
314 (patches (search-patches "ustr-fix-build-with-gcc-5.patch"))))
9128db21
RW
315 (build-system gnu-build-system)
316 (arguments
317 `(#:make-flags
318 (list "CC=gcc"
319 "HIDE="
320 ;; Override "/sbin/ldconfig" with "echo" because we don't need
321 ;; "ldconfig".
322 "LDCONFIG=echo"
323 (string-append "prefix=" (assoc-ref %outputs "out"))
324 "all-shared")
325 #:phases
326 (modify-phases %standard-phases
327 (add-after 'unpack 'disable-check-for-stdint
328 (lambda _
329 ;; Of course we have stdint.h, just not in /usr/include
330 (substitute* '("Makefile"
331 "ustr-import.in")
332 (("-f \"/usr/include/stdint.h\"") "-z \"\""))
333 #t))
334 ;; No configure script
335 (delete 'configure))))
336 (home-page "http://www.and.org/ustr/")
337 (synopsis "String library with very low memory overhead")
338 (description
339 "Ustr is a string library for C with very low memory overhead.")
340 ;; Quoted from the home page: "The License for the code is MIT, new-BSD,
341 ;; LGPL, etc. ... if you need another license to help compatibility, just
342 ;; ask for it. It's basically public domain, without all the legal
343 ;; problems for everyone that trying to make something public domain
344 ;; entails."
345 (license license:public-domain)))
346
7577ab55
HG
347(define-public ascii2binary
348 (package
349 (name "ascii2binary")
350 (version "2.14")
351 (source
352 (origin
353 (method url-fetch)
354 (uri (string-append "http://billposer.org/Software/Downloads/"
355 "ascii2binary-" version ".tar.bz2"))
356 (sha256
357 (base32 "0dc9fxcdmppbs9s06jvq61zbk552laxps0xyk098gj41697ihd96"))))
358 (build-system gnu-build-system)
359 (native-inputs
360 `(("gettext" ,gettext-minimal)))
361 (home-page "https://billposer.org/Software/a2b.html")
362 (synopsis "Convert between ASCII, hexadecimal and binary representations")
363 (description "The two programs are useful for generating test data, for
364inspecting binary files, and for interfacing programs that generate textual
365output to programs that require binary input and conversely. They can also be
366useful when it is desired to reformat numbers.
367
368@itemize
369
370@item @command{ascii2binary} reads input consisting of ascii or hexadecimal
371 representation numbers separated by whitespace and produces as output
372 the binary equivalents. The type and precision of the binary output
373 is selected using command line flags.
374
375@item @command{binary2ascii} reads input consisting of binary numbers
376 and converts them to their ascii or hexadecimal representation.
377 Command line flags specify the type and size of the binary numbers
378 and provide control over the format of the output.
379 Unsigned integers may be written out in binary, octal, decimal,
380 or hexadecimal.
381
382 Signed integers may be written out only in binary or decimal. Floating
383 point numbers may be written out only decimal, either in standard or
384 scientific notation. (If you want to examine the binary representation
385 of floating point numbers, just treat the input as a sequence of unsigned
386 characters.)
387
a6baae74
HG
388@end itemize")
389 (license license:gpl3)))
390
391(define-public uniutils
392 (package
393 (name "uniutils")
394 (version "2.27")
395 (source
396 (origin
397 (method url-fetch)
398 (uri (string-append "http://billposer.org/Software/Downloads/"
399 "uniutils-" version ".tar.bz2"))
400 (sha256
401 (base32 "19w1510w87gx7n4qy3zsb0m467a4rn5scvh4ajajg7jh6x5xri08"))))
402 (build-system gnu-build-system)
403 (arguments
404 '(#:configure-flags '("--disable-dependency-tracking")
405 #:phases
406 (modify-phases %standard-phases
407 (add-after 'build 'fix-paths
408 (lambda* (#:key outputs inputs #:allow-other-keys)
409 (let ((out (assoc-ref outputs "out"))
410 (a2b (assoc-ref inputs "ascii2binary"))
411 (iconv (assoc-ref inputs "libiconv")))
412 (substitute* "utf8lookup"
413 (("^ascii2binary ") (string-append a2b "/bin/ascii2binary "))
414 (("^uniname ") (string-append out "/bin/uniname "))
415 (("^iconv ") (string-append iconv "/bin/iconv ")))
416 #t))))))
417 (inputs
418 `(("ascii2binary" ,ascii2binary)
419 ("libiconv" ,libiconv)))
420 (home-page "https://billposer.org/Software/unidesc.html")
421 (synopsis "Find out what is in a Unicode file")
422 (description "Useful tools when working with Unicode files when one
423doesn't know the writing system, doesn't have the necessary font, needs to
424inspect invisible characters, needs to find out whether characters have been
425combined or in what order they occur, or needs statistics on which characters
426occur.
427
428@itemize
429
430@item @command{uniname} defaults to printing the character offset of each
431character, its byte offset, its hex code value, its encoding, the glyph
432itself, and its name. It may also be used to validate UTF-8 input.
433
434@item @command{unidesc} reports the character ranges to which different
435portions of the text belong. It can also be used to identify Unicode encodings
436(e.g. UTF-16be) flagged by magic numbers.
437
438@item @command{unihist} generates a histogram of the characters in its input.
439
440@item @command{ExplicateUTF8} is intended for debugging or for learning about
441Unicode. It determines and explains the validity of a sequence of bytes as a
442UTF8 encoding.
443
444@item @command{utf8lookup} provides a handy way to look up Unicode characters
445from the command line.
446
447@item @command{unireverse} reverse each line of UTF-8 input
448character-by-character.
449
7577ab55
HG
450@end itemize")
451 (license license:gpl3)))
452
e522d840
RJ
453(define-public libconfig
454 (package
455 (name "libconfig")
1b362016
MB
456 (version "1.7.2")
457 (home-page "https://hyperrealm.github.io/libconfig/")
e522d840
RJ
458 (source (origin
459 (method url-fetch)
1b362016
MB
460 (uri (string-append home-page "/dist/libconfig-"
461 version ".tar.gz"))
e522d840
RJ
462 (sha256
463 (base32
1b362016 464 "1ngs2qx3cx5cbwinc5mvadly0b5n7s86zsc68c404czzfff7lg3w"))))
e522d840 465 (build-system gnu-build-system)
e522d840
RJ
466 (synopsis "C/C++ configuration file library")
467 (description
468 "Libconfig is a simple library for manipulating structured configuration
469files. This file format is more compact and more readable than XML. And
470unlike XML, it is type-aware, so it is not necessary to do string parsing in
471application code.")
472 (license license:lgpl2.1+)))
c8d969b5
PP
473
474(define-public pfff
475 (package
476 (name "pfff")
477 (version "1.0")
478 (source (origin
bc117787
EF
479 (method git-fetch)
480 (uri (git-reference
481 (url "https://github.com/pfff/pfff")
482 (commit (string-append "v" version))))
483 (file-name (git-file-name name version))
c8d969b5
PP
484 (sha256
485 (base32
bc117787 486 "1nxkfm7zliq3rmr7yp871sppwfnz71iz364m2sgazny71pzykggc"))))
c8d969b5 487 (build-system cmake-build-system)
86bbfc50 488 (home-page "https://biit.cs.ut.ee/pfff/")
c8d969b5
PP
489 (synopsis "Probabilistic fast file fingerprinting tool")
490 (description
491 "pfff is a tool for calculating a compact digital fingerprint of a file
492by sampling randomly from the file instead of reading it in full.
493Consequently, the computation has a flat performance characteristic,
494correlated with data variation rather than file size. pfff can be as reliable
495as existing hashing techniques, with provably negligible risk of collisions.")
496 (license license:bsd-3)))
2d8cf0b3
JL
497
498(define-public oniguruma
499 (package
500 (name "oniguruma")
7eb823c6 501 (version "6.9.5-rev1")
2d8cf0b3
JL
502 (source (origin
503 (method url-fetch)
504 (uri (string-append "https://github.com/kkos/"
7eb823c6
MB
505 "oniguruma/releases/download/v"
506 ;; If there is a "-" in the version, convert
507 ;; to underscore for this part of the URI.
508 (string-map (lambda (c) (if (char=? #\- c) #\_ c))
509 version)
2d8cf0b3
JL
510 "/onig-" version ".tar.gz"))
511 (sha256
512 (base32
7eb823c6 513 "17m92k1n6bvza6m35fpd5g36zwpwm3hfz3478iwj5bvj2sfq8g6k"))))
2d8cf0b3 514 (build-system gnu-build-system)
28b55a51 515 (arguments '(#:configure-flags '("--disable-static")))
2d8cf0b3
JL
516 (home-page "https://github.com/kkos/oniguruma")
517 (synopsis "Regular expression library")
518 (description "Oniguruma is a regular expressions library. The special
519characteristic of this library is that different character encoding for every
520regular expression object can be specified.")
521 (license license:bsd-2)))
1b90e57e
RW
522
523(define-public antiword
524 (package
525 (name "antiword")
526 (version "0.37")
527 (source (origin
528 (method url-fetch)
529 (uri (string-append "http://www.winfield.demon.nl/linux"
530 "/antiword-" version ".tar.gz"))
531 (sha256
532 (base32
9116f126
EF
533 "1b7mi1l20jhj09kyh0bq14qzz8vdhhyf35gzwsq43mn6rc7h0b4f"))
534 (patches (search-patches "antiword-CVE-2014-8123.patch"))))
1b90e57e
RW
535 (build-system gnu-build-system)
536 (arguments
537 `(#:tests? #f ; There are no tests
538 #:make-flags
539 (list "-f" "Makefile.Linux"
540 (string-append "GLOBAL_INSTALL_DIR="
541 (assoc-ref %outputs "out") "/bin")
542 (string-append "GLOBAL_RESOURCES_DIR="
543 (assoc-ref %outputs "out") "/share/antiword"))
544 #:phases
545 (modify-phases %standard-phases
b28e05d4
RW
546 (replace 'configure
547 (lambda* (#:key outputs #:allow-other-keys)
548 ;; Ensure that mapping files can be found in the actual package
549 ;; data directory.
550 (substitute* "antiword.h"
551 (("/usr/share/antiword")
552 (string-append (assoc-ref outputs "out") "/share/antiword")))
553 #t))
1b90e57e
RW
554 (replace 'install
555 (lambda* (#:key make-flags #:allow-other-keys)
0ad0ecee 556 (apply invoke "make" `("global_install" ,@make-flags)))))))
1b90e57e
RW
557 (home-page "http://www.winfield.demon.nl/")
558 (synopsis "Microsoft Word document reader")
559 (description "Antiword is an application for displaying Microsoft Word
560documents. It can also convert the document to PostScript or XML. Only
561documents made by MS Word version 2 and version 6 or later are supported. The
562name comes from: \"The antidote against people who send Microsoft Word files
563to everybody, because they believe that everybody runs Windows and therefore
564runs Word\".")
565 (license license:gpl2+)))
f17a5447 566
d0abaf89
HG
567(define-public catdoc
568 (package
569 (name "catdoc")
570 (version "0.95")
571 (source (origin
572 (method url-fetch)
573 (uri (string-append "http://ftp.wagner.pp.ru/pub/catdoc/"
574 "catdoc-" version ".tar.gz"))
bfcdf887 575 (patches (search-patches "catdoc-CVE-2017-11110.patch"))
d0abaf89
HG
576 (sha256
577 (base32
578 "15h7v3bmwfk4z8r78xs5ih6vd0pskn0rj90xghvbzdjj0cc88jji"))))
579 (build-system gnu-build-system)
580 ;; TODO: Also build `wordview` which requires `tk` – make a separate
581 ;; package for this.
582 (arguments
583 '(#:tests? #f ; There are no tests
584 #:configure-flags '("--disable-wordview")
585 #:phases
586 (modify-phases %standard-phases
587 (add-before 'install 'fix-install
588 (lambda* (#:key outputs #:allow-other-keys)
589 (let ((out (assoc-ref outputs "out")))
590 (mkdir-p (string-append out "/share/man/man1"))))))))
13607002 591 (home-page "https://www.wagner.pp.ru/~vitus/software/catdoc/")
d0abaf89
HG
592 (synopsis "MS-Word to TeX or plain text converter")
593 (description "@command{catdoc} extracts text from MS-Word files, trying to
594preserve as many special printable characters as possible. It supports
595everything up to Word-97. Also supported are MS Write documents and RTF files.
596
597@command{catdoc} does not preserve complex word formatting, but it can
598translate some non-ASCII characters into TeX escape codes. It's goal is to
599extract plain text and allow you to read it and, probably, reformat with TeX,
600according to TeXnical rules.
601
602This package also provides @command{xls2csv}, which extracts data from Excel
603spreadsheets and outputs it in comma-separated-value format, and
604@command{catppt}, which extracts data from PowerPoint presentations.")
605 (license license:gpl2+)))
606
f17a5447
AG
607(define-public utfcpp
608 (package
609 (name "utfcpp")
f9c84cd4 610 (version "2.3.5")
f17a5447 611 (source (origin
d0f3ccf6
EF
612 (method git-fetch)
613 (uri (git-reference
614 (url "https://github.com/nemtrif/utfcpp")
615 (commit (string-append "v" version))))
616 (file-name (git-file-name name version))
f17a5447
AG
617 (sha256
618 (base32
d0f3ccf6 619 "1gr98d826z6wa58r1s5i7rz7q2x3r31v7zj0pjjlrc7gfxwklr4s"))))
f9c84cd4 620 (build-system cmake-build-system)
f17a5447 621 (arguments
f9c84cd4
TGR
622 `(#:out-of-source? #f
623 #:phases
624 (modify-phases %standard-phases
625 (replace 'install ; no install target
626 (lambda* (#:key outputs #:allow-other-keys)
627 (let* ((out (assoc-ref outputs "out"))
628 (include (string-append out "/include"))
629 (doc (string-append out "/share/doc/" ,name)))
630 (copy-recursively "source" include)
631 (install-file "README.md" doc)
632 #t))))))
f17a5447
AG
633 (home-page "https://github.com/nemtrif/utfcpp")
634 (synopsis "Portable C++ library for handling UTF-8")
635 (description "UTF8-CPP is a C++ library for handling UTF-8 encoded text
636in a portable way.")
637 (license license:boost1.0)))
8888fe82 638
639(define-public dbacl
640 (package
641 (name "dbacl")
9a71213e 642 (version "1.14.1")
8888fe82 643 (source
644 (origin
645 (method url-fetch)
9a71213e 646 (uri (string-append "mirror://sourceforge/dbacl/dbacl/" version "/"
50b01dcf 647 "dbacl-" version ".tar.gz"))
8888fe82 648 (sha256
9a71213e
TGR
649 (base32 "1gas0112wqjvwn9qg3hxnawk7h3prr0w9b2h68f3p1ifd1kzn3gz"))
650 (patches (search-patches "dbacl-include-locale.h.patch"))))
8888fe82 651 (build-system gnu-build-system)
652 (arguments
653 `(#:make-flags
654 (list
655 (string-append "-I" (assoc-ref %build-inputs "slang")
656 "/include/slang")
657 (string-append "-I" (assoc-ref %build-inputs "ncurses")
658 "/include/ncurses"))
659 #:phases
660 (modify-phases %standard-phases
661 (add-after 'unpack 'delete-sample6-and-japanese
662 (lambda _
663 (substitute* "doc/Makefile.am"
664 (("sample6.txt") "")
665 (("japanese.txt") ""))
666 (delete-file "doc/sample6.txt")
667 (delete-file "doc/japanese.txt")
668 (substitute* (list "src/tests/Makefile.am"
669 "src/tests/Makefile.in")
670 (("dbacl-jap.shin") "")
671 (("dbacl-jap.sh") ""))
672 #t))
673 (add-after 'unpack 'delete-test
674 ;; See comments about the license.
675 (lambda _
db317b38
TGR
676 (delete-file "src/tests/dbacl-jap.shin")
677 #t))
8888fe82 678 (add-after 'unpack 'fix-test-files
679 (lambda* (#:key inputs outputs #:allow-other-keys)
680 (let* ((out (assoc-ref outputs "out"))
681 (bin (string-append out "/bin")))
682 (substitute* (find-files "src/tests/" "\\.shin$")
683 (("PATH=/bin:/usr/bin")
684 "#PATH=/bin:/usr/bin")
685 (("diff") (string-append (which "diff")))
686 (("tr") (string-append (which "tr"))))
9a71213e
TGR
687 #t)))
688 (replace 'bootstrap
689 (lambda _
690 (invoke "autoreconf" "-vif")
691 #t)))))
8888fe82 692 (inputs
693 `(("ncurses" ,ncurses)
694 ("perl" ,perl)
695 ("readline" ,readline)
696 ("slang" ,slang)))
697 (native-inputs
698 `(("libtool" ,libtool)
699 ("autoconf" ,autoconf)
700 ("automake" ,automake)
701 ("pkg-config" ,pkg-config)))
7314a547 702 (home-page "https://www.lbreyer.com/dbacl.html")
8888fe82 703 (synopsis "Bayesian text and email classifier")
704 (description
705 "dbacl is a fast Bayesian text and email classifier. It builds a variety
706of language models using maximum entropy (minimum divergence) principles, and
707these can then be used to categorize input data automatically among multiple
708categories.")
709 ;; The software is licensed as GPLv3 or later, but
710 ;; includes various sample texts in the doc dir:
711 ;; - sample1.txt, sample3 and sampe5.txt are in the public domain,
712 ;; by Mark Twain.
713 ;; - sample2.txt, sample4.txt are in the public domain, by Aristotle.
714 ;; - sample6.txt is a forwarded email, copyright unknown.
715 ;; Guix does exclude sample6.txt.
716 ;; - japanese.txt is a Japanese unoffical translation of the
717 ;; GNU General Public License, (c) by the Free Software Foundation.
718 ;; Guix excludes this file.
719 (license (list license:gpl3+ license:public-domain))))
0905048a
MB
720
721(define-public dotconf
722 (package
723 (name "dotconf")
724 (version "1.3")
725 (source (origin
08bc7f26
RW
726 (method git-fetch)
727 (uri (git-reference
b0e7b699 728 (url "https://github.com/williamh/dotconf")
08bc7f26
RW
729 (commit (string-append "v" version))))
730 (file-name (git-file-name name version))
0905048a
MB
731 (sha256
732 (base32
08bc7f26 733 "1sc95hw5k2xagpafny0v35filmcn05k1ds5ghkldfpf6xw4hakp7"))))
0905048a 734 (build-system gnu-build-system)
7a52d0e0 735 (arguments `(#:tests? #f)) ; FIXME maketest.sh does not work.
0905048a
MB
736 (native-inputs
737 `(("autoconf" ,autoconf)
738 ("automake" ,automake)
739 ("libtool" ,libtool)))
740 (home-page "https://github.com/williamh/dotconf")
741 (synopsis "Configuration file parser library")
742 (description
743 "C library for creating and parsing configuration files.")
744 (license (list license:lgpl2.1 ; Main distribution.
745 license:asl1.1)))) ; src/readdir.{c,h}
ff3f6766 746
7f05493d
MC
747(define-public drm-tools
748 (package
749 (name "drm-tools")
750 (version "1.1.33")
751 (source (origin
752 (method url-fetch)
753 (uri (string-append "mirror://sourceforge/drmtools/drm_tools-"
754 version ".tar.gz"))
755 (sha256
756 (base32
757 "187zbxw21zcg8gpyc13gxlycfw0n05a6rmqq6im5wr9zk1v1wj80"))))
758 (build-system cmake-build-system)
759 (arguments
760 `(#:tests? #f ;the test suite fails
761 #:phases (modify-phases %standard-phases
762 (add-after 'unpack 'set-install-prefixes
763 (lambda* (#:key outputs #:allow-other-keys)
764 (let* ((out (assoc-ref outputs "out")))
765 (substitute* "CMakeLists.txt"
766 (("tmp/testinstall")
767 (string-drop out 1))
768 (("/man/man1")
769 "/share/man/man1"))
770 #t)))
771 (add-after 'unpack 'adjust-test-paths
772 (lambda _
773 (substitute* '("test_extract_increment.sh"
774 "test_extract_features.sh"
775 "test_extract_features2.sh"
776 "test_dmath.sh")
777 (("\\./extract") "extract")
778 (("\\./dmath") "dmath")
779 (("/usr/local/bin/") "")
780 (("/bin/rm") "rm")
781 (("/bin/cp") "cp"))
782 #t))
783 (delete 'check)
784 ;; The produced binaries are written directly to %output/bin.
785 (delete 'install)
786 (add-after 'build 'check
787 (lambda* (#:key outputs tests? #:allow-other-keys)
788 (when tests?
789 (let* ((out (assoc-ref outputs "out"))
790 (bin (string-append out "/bin")))
791 (setenv "PATH" (string-append bin ":"
792 (getenv "PATH")))
793 (with-directory-excursion
794 (format #f "../drm_tools-~a" ,version)
795 (invoke "sh" "test_all.sh")))))))))
796 (native-inputs `(("which" ,which))) ;for tests
797 (inputs `(("pcre" ,pcre)))
798 (home-page "http://drmtools.sourceforge.net/")
799 (synopsis "Utilities to manipulate text and binary files")
800 (description "The drm_tools package contains the following commands:
801@table @command
802@item accudate
803An extended version of the \"date\" program that has sub-second accuracy.
804@item binformat
805Format complex binary data into text.
806@item binload
807Load data into a binary file using simple commands from the input.
808@item binorder
809Sort, merge, search, retrieve or generate test data consisting of fixed size
810binary records.
811@item binreplace
812Find or find/replace in binary files.
813@item binsplit
814Split test data consisting of fixed size binary records into one or more
815output streams.
816@item chardiff
817Find changes between two files at the character level. Unlike \"diff\", it
818lists just the characters that differ, so if the 40,000th character is
819different only that one character will be shown, not the entire line.
820@item columnadd
821Add columns of integers, decimals, and/or times.
822@item datasniffer
823A utility for formatting binary data dumps.
824@item dmath
825Double precision interactive command line math calculator.
826@item extract
827Extract and emit data from text files based on character or token position.
828@item execinput
829A utility that reads from STDIN and executes each line as a command in a
830sub-process.
831@item indexed_text
832A utility for rapid retrieval of text by line numbers, in any order, from a
833text file.
834@item mdump
835Format binary data.
836@item msgqueue
837Create message queues and send/receive messages.
838@item mbin
839@itemx mbout
840Multiple buffer in and out. Used for buffering a lot of data between a slow
841device and a fast device. Mostly for buffering streaming tape drives for use
842with slower network connections, so that streaming is maintained as much as
843possible to minimize wear on the tape device.
844@item pockmark
845Corrupt data streams - useful for testing error correction and data recovery.
846@item tarsieve
847Filter, list, or split a tar file.
848@end table")
849 (license license:gpl2+)))
850
ff3f6766
RW
851(define-public java-rsyntaxtextarea
852 (package
853 (name "java-rsyntaxtextarea")
854 (version "2.6.1")
855 (source (origin
3d0c0ae6
EF
856 (method git-fetch)
857 (uri (git-reference
858 (url "https://github.com/bobbylight/RSyntaxTextArea")
859 (commit version)))
860 (file-name (git-file-name name version))
ff3f6766
RW
861 (sha256
862 (base32
3d0c0ae6 863 "0dyflzvxq2wvs0rgqfyi5yzzrb6r4bzw2dm8cl304dakxk38ddys"))))
ff3f6766
RW
864 (build-system ant-build-system)
865 (arguments
866 `(;; FIXME: some tests fail because locale resources cannot be found.
867 ;; Even when I add them to the class path,
868 ;; RSyntaxTextAreaEditorKitDumbCompleteWordActionTest fails.
869 #:tests? #f
870 #:jar-name "rsyntaxtextarea.jar"))
871 (native-inputs
872 `(("java-junit" ,java-junit)
873 ("java-hamcrest-core" ,java-hamcrest-core)))
874 (home-page "https://bobbylight.github.io/RSyntaxTextArea/")
875 (synopsis "Syntax highlighting text component for Java Swing")
876 (description "RSyntaxTextArea is a syntax highlighting, code folding text
877component for Java Swing. It extends @code{JTextComponent} so it integrates
878completely with the standard @code{javax.swing.text} package. It is fast and
879efficient, and can be used in any application that needs to edit or view
880source code.")
881 (license license:bsd-3)))
03639d03
RW
882
883;; We use the sources from git instead of the tarball from pypi, because the
884;; latter does not include the Cython source file from which bycython.cpp is
885;; generated.
886(define-public python-editdistance
887 (let ((commit "3ea84a7dd3258c76aa3be851ef3d50e59c886846")
888 (revision "1"))
889 (package
890 (name "python-editdistance")
891 (version (string-append "0.3.1-" revision "." (string-take commit 7)))
892 (source
893 (origin
894 (method git-fetch)
895 (uri (git-reference
b0e7b699 896 (url "https://github.com/aflc/editdistance")
03639d03 897 (commit commit)))
54335b59 898 (file-name (git-file-name name version))
03639d03
RW
899 (sha256
900 (base32
901 "1l43svsv12crvzphrgi6x435z6xg8m086c64armp8wzb4l8ccm7g"))))
902 (build-system python-build-system)
903 (arguments
904 `(#:phases
905 (modify-phases %standard-phases
906 (add-after 'unpack 'build-cython-code
907 (lambda _
908 (with-directory-excursion "editdistance"
909 (delete-file "bycython.cpp")
54d5b27e 910 (invoke "cython" "--cplus" "bycython.pyx")))))))
03639d03
RW
911 (native-inputs
912 `(("python-cython" ,python-cython)))
913 (home-page "https://www.github.com/aflc/editdistance")
914 (synopsis "Fast implementation of the edit distance (Levenshtein distance)")
915 (description
916 "This library simply implements Levenshtein distance algorithm with C++
917and Cython.")
918 (license license:expat))))
03f801aa
CB
919
920(define-public go-github.com-mattn-go-runewidth
d050000a
BL
921 (let ((commit "703b5e6b11ae25aeb2af9ebb5d5fdf8fa2575211")
922 (version "0.0.4")
923 (revision "1"))
924 (package
925 (name "go-github.com-mattn-go-runewidth")
926 (version (git-version version revision commit))
927 (source
928 (origin
929 (method git-fetch)
930 (uri (git-reference
931 (url "https://github.com/mattn/runewidth")
932 (commit commit)))
933 (file-name (git-file-name name version))
934 (sha256
935 (base32
936 "0znpyz71gajx3g0j2zp63nhjj2c07g16885vxv4ykwnrfmzbgk4w"))))
937 (build-system go-build-system)
938 (arguments
939 '(#:import-path "github.com/mattn/go-runewidth"))
940 (synopsis "@code{runewidth} provides Go functions to work with string widths")
941 (description
942 "The @code{runewidth} library provides Go functions for padding,
943measuring and checking the width of strings, with support for East Asian
944text.")
945 (home-page "https://github.com/mattn/runewidth")
946 (license license:expat))))
74fa77e9
PN
947
948(define-public docx2txt
949 (package
950 (name "docx2txt")
951 (version "1.4")
952 (source (origin
953 (method url-fetch)
954 (uri (string-append
955 "mirror://sourceforge/docx2txt/docx2txt/v"
956 version "/docx2txt-" version ".tgz"))
957 (sha256
958 (base32
959 "06vdikjvpj6qdb41d8wzfnyj44jpnknmlgbhbr1w215420lpb5xj"))))
960 (build-system gnu-build-system)
961 (inputs
962 `(("unzip" ,unzip)
963 ("perl" ,perl)))
964 (arguments
965 `(#:tests? #f ; No tests.
966 #:make-flags (list (string-append "BINDIR="
967 (assoc-ref %outputs "out") "/bin")
968 (string-append "CONFIGDIR="
969 (assoc-ref %outputs "out") "/etc")
970 ;; Makefile seems to be a bit dumb at guessing.
971 (string-append "INSTALL=install")
972 (string-append "PERL=perl"))
973 #:phases
974 (modify-phases %standard-phases
975 (delete 'configure)
976 (add-after 'install 'fix-install
977 (lambda* (#:key outputs inputs #:allow-other-keys)
978 (let* ((out (assoc-ref outputs "out"))
979 (bin (string-append out "/bin"))
980 (config (string-append out "/etc/docx2txt.config"))
981 (unzip (assoc-ref inputs "unzip")))
982 ;; According to INSTALL, the .sh wrapper can be skipped.
983 (delete-file (string-append bin "/docx2txt.sh"))
984 (rename-file (string-append bin "/docx2txt.pl")
985 (string-append bin "/docx2txt"))
986 (substitute* config
987 (("config_unzip => '/usr/bin/unzip',")
988 (string-append "config_unzip => '"
989 unzip
990 "/bin/unzip',")))
991 ;; Makefile is wrong.
f6ccb95a
TGR
992 (chmod config #o644)
993 #t))))))
74fa77e9
PN
994 (synopsis "Recover text from @file{.docx} files, with good formatting")
995 (description
996 "@command{docx2txt} is a Perl based command line utility to convert
997Microsoft Office @file{.docx} documents to equivalent text documents. Latest
998version supports following features during text extraction.
999
1000@itemize
1001@item Character conversions; currency characters are converted to respective
1002names like Euro.
1003@item Capitalisation of text blocks.
1004@item Center and right justification of text fitting in a line of
1005(configurable) 80 columns.
1006@item Horizontal ruler, line breaks, paragraphs separation, tabs.
1007@item Indicating hyperlinked text along with the hyperlink (configurable).
1008@item Handling (bullet, decimal, letter, roman) lists along with (attempt at)
1009indentation.
1010@end itemize\n")
1011 (home-page "http://docx2txt.sourceforge.net")
1012 (license license:gpl3+)))
1a0363cf 1013
bc5aa386
VC
1014(define-public odt2txt
1015 (package
1016 (name "odt2txt")
1017 (version "0.5")
1018 (source
1019 (origin
1020 (method git-fetch)
1021 (uri (git-reference
1022 (url "https://github.com/dstosberg/odt2txt/")
1023 (commit (string-append "v" version))))
1024 (file-name (git-file-name name version))
1025 (sha256
1026 (base32
1027 "0im3kzvhxkjlx57w6h13mc9584c74ma1dyymgvpq2y61av3gc35v"))))
1028 (build-system gnu-build-system)
1029 (arguments
1030 `(#:tests? #f ; no make check
1031 #:make-flags (list "CC=gcc"
1032 (string-append "DESTDIR=" (assoc-ref %outputs "out")))
1033 #:phases
1034 (modify-phases %standard-phases
1035 ;; no configure script
1036 (delete 'configure))))
1037 (inputs
1038 `(("zlib" ,zlib)))
1039 (home-page "https://github.com/dstosberg/odt2txt/")
1040 (synopsis "Converter from OpenDocument Text to plain text")
1041 (description "odt2txt is a command-line tool which extracts the text out
1042of OpenDocument Texts, as produced by OpenOffice.org, KOffice, StarOffice and
1043others.
1044
1045odt2txt can also extract text from some file formats similar to OpenDocument
1046Text, such as OpenOffice.org XML (*.sxw), which was used by OpenOffice.org
1047version 1.x and older StarOffice versions. To a lesser extent, odt2txt may be
1048useful to extract content from OpenDocument spreadsheets (*.ods) and
1049OpenDocument presentations (*.odp).")
1050 (license license:gpl2)))
1051
1a0363cf
MP
1052(define-public opencc
1053 (package
1054 (name "opencc")
2ee7524c 1055 (version "1.1.1")
1a0363cf
MP
1056 (source
1057 (origin
bf5af934
EF
1058 (method git-fetch)
1059 (uri (git-reference
2ee7524c
ZZ
1060 (url "https://github.com/BYVoid/OpenCC")
1061 (commit (string-append "ver." version))))
bf5af934 1062 (file-name (git-file-name name version))
1a0363cf 1063 (sha256
1506d491 1064 (base32
2ee7524c 1065 "07y0pvxjlmpcnym229j87qcfwnfm7vi10dad0a20xy6as55a9j3d"))
1506d491
EF
1066 (modules '((guix build utils)))
1067 (snippet
1068 '(begin
1069 ;; TODO: Unbundle tclap, darts-clone, gtest
2ee7524c 1070 (delete-file-recursively "deps/rapidjson-1.1.0") #t))))
1a0363cf 1071 (build-system cmake-build-system)
1506d491 1072 (arguments
2ee7524c
ZZ
1073 ;; Required to locate the install script properly.
1074 `(#:out-of-source? #f
1075 #:parallel-build? #f ;occasionally failed.
1076 #:imported-modules
1077 (,@%cmake-build-system-modules
1078 (guix build python-build-system))
1079 #:modules ((guix build cmake-build-system)
1080 ((guix build python-build-system) #:prefix python:)
1081 (guix build utils))
1082 #:phases
1506d491 1083 (modify-phases %standard-phases
2ee7524c
ZZ
1084 (add-after 'unpack 'prevent-rebuild-during-installation
1085 (lambda _
1086 (substitute* "python/setup.py"
1087 (("'build_py': BuildPyCommand,") ""))
1088 #t))
1506d491
EF
1089 (add-after 'unpack 'patch-3rd-party-references
1090 (lambda* (#:key inputs #:allow-other-keys)
1091 (let ((rapidjson (assoc-ref inputs "rapidjson")))
1092 (substitute* "src/CMakeLists.txt"
2ee7524c 1093 (("../deps/rapidjson-1.1.0")
1506d491 1094 (string-append rapidjson "/include/rapidjson")))
2ee7524c
ZZ
1095 #t)))
1096 (add-before 'configure 'patch-python-binding-installation
1097 (lambda* (#:key outputs inputs #:allow-other-keys)
1098 (let ((out (assoc-ref outputs "out")))
1099 (substitute* "python/opencc/__init__.py"
1100 (("(_libopenccfile =).*$" _ prefix)
1101 (format #f "~a os.path.join('~a/lib', _libopenccfilename)~%"
1102 prefix out))
1103 (("(_opencc_share_dir =).*$" _ prefix)
1104 (format #f "~a '~a/share/opencc'~%" prefix out))))
1105 #t))
1106 (add-after 'install 'install-python-binding
1107 (lambda* (#:key outputs inputs #:allow-other-keys)
1108 (let* ((out (assoc-ref outputs "out"))
1109 (dist (string-append
1110 out "/lib/python"
1111 ,(version-major+minor (package-version python))
1112 "/site-packages")))
1113 (chdir "python")
1114 (mkdir-p dist)
1115 (setenv "PYTHONPATH"
1116 (string-append dist ":" (getenv "PYTHONPATH")))
1117 (invoke "python" "setup.py" "install"
1118 "--root=/" "--single-version-externally-managed"
1119 (string-append "--prefix=" out))
1120 #t)))
1121 (add-before 'install-python-binding 'enable-bytecode-determinism
1122 (assoc-ref python:%standard-phases 'enable-bytecode-determinism)))))
1a0363cf 1123 (native-inputs
1506d491 1124 `(("python" ,python-wrapper)
2ee7524c
ZZ
1125 ("rapidjson" ,rapidjson)
1126 ("python-setuptools" ,python-setuptools)
1127 ("python-wheel" ,python-wheel)))
1a0363cf
MP
1128 (home-page "https://github.com/BYVoid/OpenCC")
1129 (synopsis "Convert between Traditional Chinese and Simplified Chinese")
1130 (description "Open Chinese Convert (OpenCC) converts between Traditional
1131Chinese and Simplified Chinese, supporting character-level conversion,
1132phrase-level conversion, variant conversion, and regional idioms among
1133Mainland China, Taiwan, and Hong-Kong.")
1134 (license license:asl2.0)))
933ac939
YA
1135
1136(define-public nkf
1137 (let ((commit "08043eadf4abdddcf277842217e3c77a24740dc2")
1138 (revision "1"))
1139 (package
1140 (name "nkf")
1141 ;; The commits corresponding to specific versions are published
1142 ;; here:
1143 ;; https://ja.osdn.net/projects/nkf/scm/git/nkf/
1144 (version "2.1.5")
1145 (source (origin
1146 (method git-fetch)
1147 (uri (git-reference
b0e7b699 1148 (url "https://github.com/nurse/nkf")
933ac939
YA
1149 (commit commit)))
1150 (file-name (git-file-name name version))
1151 (sha256
1152 (base32
1153 "0anw0knr1iy4p9w3d3b3pbwzh1c43p1i2q4c28kw9zviw8kx2rly"))))
1154 (build-system gnu-build-system)
1155 (arguments
1156 `(#:tests? #f ; test for perl module
1157 #:make-flags (list "CC=gcc" "CFLAGS=-O2 -Wall -pedantic"
1158 (string-append "prefix=" %output)
1159 "MKDIR=mkdir -p")
1160 #:phases
1161 (modify-phases %standard-phases
1162 (delete 'configure)))) ; No ./configure script
1163 (home-page "https://ja.osdn.net/projects/nkf/")
1164 (synopsis "Network Kanji Filter")
1165 (description "Nkf is yet another kanji code converter among networks,
1166hosts and terminals. It converts input kanji code to designated kanji code
1167such as ISO-2022-JP, Shift_JIS, EUC-JP, UTF-8, UTF-16 or UTF-32.")
1168 (license license:zlib))))
74247b80
NG
1169
1170(define-public python-pandocfilters
1171 (package
1172 (name "python-pandocfilters")
66ddd966 1173 (version "1.4.3")
74247b80
NG
1174 (source
1175 (origin
1176 (method url-fetch)
1177 (uri (pypi-uri "pandocfilters" version))
1178 (sha256
66ddd966 1179 (base32 "1sq675dg4barb5949xxz4d5gk2ly524hi1p1xgwb3d1l0nsznqxw"))))
74247b80
NG
1180 (build-system python-build-system)
1181 (home-page "https://github.com/jgm/pandocfilters")
1182 (synopsis "Python module for writing Pandoc filters")
1183 (description "Pandoc is a powerful utility to transform various
1184input formats into a wide range of output formats. To alter the
1185exported output document, Pandoc allows the usage of filters, which
1186are pipes that read a JSON serialization of the Pandoc AST from stdin,
1187transform it in some way, and write it to stdout. It allows therefore
1188to alter the processing of Pandoc's supported input formats, for
1189instance one can add new syntax elements to markdown, etc.
1190
1191This package provides Python bindings.")
1192 (license license:bsd-3)))
24719e8a
1193
1194(define-public aha
1195 (package
1196 (name "aha")
7d98468c 1197 (version "0.5.1")
24719e8a
1198 (source
1199 (origin
1200 (method git-fetch)
1201 (uri (git-reference
1202 (url "https://github.com/theZiz/aha")
1203 (commit version)))
1204 (sha256
7d98468c 1205 (base32 "1gywad0rvvz3c5balz8cxsnx0562hj2ngzqyr8zsy2mb4pn0lpgv"))
24719e8a
1206 (file-name (git-file-name name version))))
1207 (build-system gnu-build-system)
1208 (arguments
3a176be0 1209 `(#:phases
24719e8a
1210 (modify-phases %standard-phases
1211 (delete 'configure))
3a176be0 1212 #:make-flags (list (string-append "CC=" ,(cc-for-target))
24719e8a
1213 (string-append "PREFIX="
1214 (assoc-ref %outputs "out")))
7d98468c 1215 #:tests? #f)) ; no test suite
24719e8a
1216 (home-page "https://github.com/theZiz/aha")
1217 (synopsis "Converts terminal escape sequences to HTML")
1218 (description "@command{aha} (Ansi Html Adapter) converts ANSI escape sequences
1219of a Unix terminal to HTML code.")
1220 (license (list license:lgpl2.0+ license:mpl1.1))))
28e20c53
GA
1221
1222(define-public go-github-com-errata-ai-vale
1223 (package
1224 (name "go-github-com-errata-ai-vale")
1225 (version "2.4.0")
1226 (source
1227 (origin
1228 (method git-fetch)
1229 (uri (git-reference
1230 (url "https://github.com/errata-ai/vale")
1231 (commit (string-append "v" version))))
1232 (sha256
1233 (base32 "0d07fwha2220m8j24h527xl0gnl3svvyaywflgk5292d6g49ach2"))
1234 (file-name (git-file-name name version))))
1235 (build-system go-build-system)
1236 (native-inputs
1237 `(("github.com/sp13/afero" ,go-github-com-spf13-afero)
1238 ("github.com/urfave/cli" ,go-github-com-urfave-cli)
1239 ("github.com/olekukonko/tablewriter" ,go-github-com-olekukonko-tablewriter)
1240 ("github.com/mitchellh/mapstructure" ,go-github-com-mitchellh-mapstructure)))
1241 (arguments
1242 `(#:import-path "github.com/errata-ai/vale"
1243 #:install-source? #f))
1244 (home-page "https://github.com/errata-ai/vale")
1245 (synopsis "Fully customizable syntax-aware linter that focuses on your style")
1246 (description
1247 "Vale is a fully extensible linter that focuses on your own writing style
1248by making use of rules in individual YAML files. It is syntax-aware on markup
1249languages such as HTML, Markdown, Asciidoc, and reStructuredText. The community
1250around it also has a list of style guides implemented with Vale in
1251@url{https://github.com/errata-ai/styles, their styles repo}.")
1252 (license license:expat)))