gnu: emacs-svg-icon: Fix grammar.
[jackhill/guix/guix.git] / gnu / packages / ocr.scm
CommitLineData
f99f6fd6
LC
1;;; GNU Guix --- Functional package management for GNU
2;;; Copyright © 2013 Ludovic Courtès <ludo@gnu.org>
04950d1f 3;;; Copyright © 2016, 2020 Efraim Flashner <efraim@flashner.co.il>
ff8a66bc 4;;; Copyright © 2019 Tobias Geerinckx-Rice <me@tobias.gr>
9a94ccec 5;;; Copyright © 2019 Alex Vong <alexvong1995@gmail.com>
822cd628 6;;; Copyright © 2021 Andy Tai <atai@atai.org>
fee4afd8 7;;; Copyright © 2021 Nicolas Goaziou <mail@nicolasgoaziou.fr>
f99f6fd6
LC
8;;;
9;;; This file is part of GNU Guix.
10;;;
11;;; GNU Guix is free software; you can redistribute it and/or modify it
12;;; under the terms of the GNU General Public License as published by
13;;; the Free Software Foundation; either version 3 of the License, or (at
14;;; your option) any later version.
15;;;
16;;; GNU Guix is distributed in the hope that it will be useful, but
17;;; WITHOUT ANY WARRANTY; without even the implied warranty of
18;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19;;; GNU General Public License for more details.
20;;;
21;;; You should have received a copy of the GNU General Public License
22;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
23
9e617a54 24(define-module (gnu packages ocr)
d814be32 25 #:use-module ((guix licenses) #:prefix license:)
f99f6fd6
LC
26 #:use-module (guix packages)
27 #:use-module (guix download)
9a94ccec 28 #:use-module (guix git-download)
fee4afd8 29 #:use-module (guix build-system cmake)
f99f6fd6 30 #:use-module (guix build-system gnu)
16dd1fd0 31 #:use-module (guix build-system python)
822cd628
T
32 #:use-module (gnu packages)
33 #:use-module (gnu packages autotools)
34 #:use-module (gnu packages backup)
35 #:use-module (gnu packages check)
d814be32 36 #:use-module (gnu packages compression)
822cd628 37 #:use-module (gnu packages curl)
fee4afd8 38 #:use-module (gnu packages djvu)
822cd628
T
39 #:use-module (gnu packages docbook)
40 #:use-module (gnu packages documentation)
fee4afd8
NG
41 #:use-module (gnu packages enchant)
42 #:use-module (gnu packages gettext)
43 #:use-module (gnu packages glib)
822cd628
T
44 #:use-module (gnu packages gtk)
45 #:use-module (gnu packages icu4c)
fee4afd8 46 #:use-module (gnu packages pdf)
822cd628 47 #:use-module (gnu packages pkg-config)
16dd1fd0 48 #:use-module (gnu packages python)
fee4afd8
NG
49 #:use-module (gnu packages qt)
50 #:use-module (gnu packages scanner)
822cd628 51 #:use-module (gnu packages xml)
427dda54 52 #:use-module (gnu packages image))
f99f6fd6
LC
53
54(define-public ocrad
55 (package
56 (name "ocrad")
ff8a66bc 57 (version "0.27")
f99f6fd6
LC
58 (source (origin
59 (method url-fetch)
60 (uri (string-append "mirror://gnu/ocrad/ocrad-"
61 version ".tar.lz"))
62 (sha256
63 (base32
ff8a66bc 64 "0divffvcaim89g4pvqs8kslbcxi475bcl3b4ynphf284k9zfdgx9"))))
f99f6fd6
LC
65 (build-system gnu-build-system)
66 (native-inputs `(("lzip" ,lzip)))
6fd52309 67 (home-page "https://www.gnu.org/software/ocrad/")
f99f6fd6
LC
68 (synopsis "Optical character recognition based on feature extraction")
69 (description
79c311b8
LC
70 "GNU Ocrad is an optical character recognition program based on a
71feature extraction method. It can read images in PBM, PGM or PPM formats and
72it produces text in 8-bit or UTF-8 formats.")
d814be32
TUBK
73 (license license:gpl3+)))
74
75(define-public tesseract-ocr
822cd628
T
76 ;; There are useful commits beyond the last official stable release.
77 (let ((commit "97079fa353557af6df86fd20b5d2e0dff5d8d5df")
78 (revision "1"))
79 (package
80 (name "tesseract-ocr")
81 (version (git-version "4.1.1" revision commit))
82 (source
83 (origin
84 (method git-fetch)
85 (uri (git-reference
86 (url "https://github.com/tesseract-ocr/tesseract")
87 (commit commit)))
88 (file-name (git-file-name name version))
89 (sha256
90 (base32
91 "11137a4aaay7qp64vdjd83hz1l089nzi5a0ql0qgk8gn79pyhi98"))))
92 (build-system gnu-build-system)
93 (inputs
94 `(("cairo" ,cairo)
95 ("icu" ,icu4c)
96 ("leptonica" ,leptonica)
97 ("pango" ,pango)
98 ("python-wrapper" ,python-wrapper)))
99 (native-inputs
100 `(("asciidoc" ,asciidoc)
101 ("autoconf" ,autoconf)
102 ("automake" ,automake)
103 ("docbook-xsl" ,docbook-xsl)
104 ("libarchive" ,libarchive)
105 ("libcurl" ,curl)
106 ("libtool" ,libtool)
107 ("libtiff" ,libtiff)
108 ("pkg-config" ,pkg-config)
109 ("xsltproc" ,libxslt)))
110 (arguments
111 `(#:configure-flags
112 (let ((leptonica (assoc-ref %build-inputs "leptonica")))
113 (list (string-append "LIBLEPT_HEADERSDIR=" leptonica "/include")))
114 #:tests? #f ; Tests currently result in a segfault
115 #:phases
116 (modify-phases %standard-phases
117 (add-after 'unpack 'fix-docbook
118 (lambda* (#:key inputs #:allow-other-keys)
119 ;; Don't attempt to download XSL schema.
120 (substitute* "doc/Makefile.am"
121 (("http://docbook.sourceforge.net/release/xsl/current/manpages/docbook.xsl")
122 (string-append (assoc-ref inputs "docbook-xsl")
123 "/xml/xsl/docbook-xsl-"
124 ,(package-version docbook-xsl)
125 "/manpages/docbook.xsl")))))
126 (add-after 'install 'build-training
127 (lambda _
128 (invoke "make" "training")))
129 (add-after 'build-training 'install-training
130 (lambda _
131 (invoke "make" "training-install"))))))
132 (home-page "https://github.com/tesseract-ocr/tesseract")
133 (synopsis "Optical character recognition engine")
134 (description
135 "Tesseract is an optical character recognition (OCR) engine with very
d814be32
TUBK
136high accuracy. It supports many languages, output text formatting, hOCR
137positional information and page layout analysis. Several image formats are
138supported through the Leptonica library. It can also detect whether text is
139monospaced or proportional.")
822cd628 140 (license license:asl2.0))))
9a94ccec 141
fee4afd8
NG
142(define-public gimagereader
143 (package
144 (name "gimagereader")
145 (version "3.3.1")
146 (source
147 (origin
148 (method url-fetch)
149 (uri (string-append
150 "https://github.com/manisandro/gImageReader/releases"
151 "/download/v" version "/"
152 "gimagereader-" version ".tar.xz"))
153 (sha256
154 (base32 "1pghffb55k3wq33nbn9fi0lmjbldpmvqs2msnvss8bxz1k1ck23n"))))
155 (build-system cmake-build-system)
156 (arguments
157 `(#:tests? #f ;no test
158 #:configure-flags (list "-DENABLE_VERSIONCHECK=0")
159 #:phases
160 (modify-phases %standard-phases
161 (add-after 'unpack 'fix-build
162 ;; XXX: Prevent compilation error: "incomplete type ‘QUrl’ used in
163 ;; nested name specifier". Fixed upstream as
164 ;; 6209e25dab20b233e399ff36fabe4252db0f9e44. It can be removed in
165 ;; release 3.3.2+.
166 (lambda _
167 (with-directory-excursion "qt/src/hocr"
168 (substitute* '("HOCROdtExporter.cc" "HOCRTextExporter.cc")
169 (("#include <QMessageBox>\n" all)
170 (string-append all "#include <QUrl>\n"))))
171 #t)))))
172 (native-inputs
173 `(("gettext" ,gettext-minimal)
174 ("intltool" ,intltool)
175 ("pkg-config" ,pkg-config)))
176 (inputs
177 `(("enchant" ,enchant)
178 ("djvulibre" ,djvulibre)
179 ("leptonica" ,leptonica)
180 ("podofo" ,podofo)
181 ("poppler-qt5" ,poppler-qt5)
182 ("sane-backends" ,sane-backends)
183 ("qtbase" ,qtbase)
184 ("qtspell" ,qtspell)
185 ("quazip" ,quazip)
186 ("tesseract" ,tesseract-ocr)))
187 (home-page "https://github.com/manisandro/gImageReader")
188 (synopsis "Qt front-end to tesseract-ocr")
189 (description
190 "gImageReader is a Qt front-end to Tesseract optical character
191recognition (OCR) software.
192
193gImageReader supports automatic page layout detection but the user can
194also manually define and adjust the recognition regions. It is
195possible to import images from disk, scanning devices, clipboard and
196screenshots. gImageReader also supports multipage PDF documents.
197Recognized text is displayed directly next to the image and basic text
198editing including search/replace and removing of line breaks is
199possible. Spellchecking for the output text is also supported if the
200corresponding dictionaries are installed.")
201 (license license:gpl3+)))
202
9a94ccec
AV
203(define-public zinnia
204 (let* ((commit "581faa8f6f15e4a7b21964be3a5ec36265c80e5b")
205 (revision "1")
206 ;; version copied from 'configure.in'
207 (version (git-version "0.07" revision commit)))
208 (package
209 (name "zinnia")
210 (version version)
211 (source
212 (origin
213 (method git-fetch)
214 (uri (git-reference
b0e7b699 215 (url "https://github.com/taku910/zinnia")
9a94ccec
AV
216 (commit commit)))
217 (sha256
218 (base32
219 "1izjy5qw6swg0rs2ym2i72zndb90mwrfbd1iv8xbpwckbm4899lg"))
220 (file-name (git-file-name name version))
221 (modules '((guix build utils)
222 (ice-9 ftw)
223 (srfi srfi-26)))
224 (snippet ; remove unnecessary files with potentially different license
225 '(begin
226 (for-each delete-file-recursively
227 (scandir "."
228 (negate (cut member <> '("zinnia"
229 "." "..")))))
230 #t))))
231 (build-system gnu-build-system)
232 (arguments
233 `(#:phases
234 (modify-phases %standard-phases
235 (replace 'bootstrap
236 (lambda _
237 (chdir "zinnia")
238 (for-each make-file-writable
239 '("config.log" "config.status"))
240 #t)))))
241 (home-page "https://taku910.github.io/zinnia/")
242 (synopsis "Online hand recognition system with machine learning")
243 (description
244 "Zinnia is a simple, customizable and portable online hand recognition
245system based on Support Vector Machines. Zinnia simply receives user pen
246strokes as a sequence of coordinate data and outputs n-best characters sorted
247by SVM confidence. To keep portability, Zinnia doesn't have any rendering
248functionality. In addition to recognition, Zinnia provides training module
249that allows us to create any hand-written recognition systems with low-cost.")
250 (license (list license:bsd-3 ; all files except...
251 (license:non-copyleft ; some autotools related files
252 "file://zinnia/aclocal.m4")
253 license:x11 ; 'install-sh'
254 license:public-domain))))) ; 'install-sh'
16dd1fd0
AV
255
256;;; python 2 bindings, license under the same terms as zinnia
257(define-public python2-zinnia
258 (package
259 (inherit zinnia)
260 (name "python2-zinnia")
261 (build-system python-build-system)
262 (arguments
263 `(#:python ,python-2 ; CObject API is used, it was removed in Python 3.2
264 #:tests? #f ; avoid circular dependency on tegaki-zinnia-japanese
265 #:phases
266 (modify-phases %standard-phases
267 (add-after 'unpack 'chdir
268 (lambda _
269 (chdir "zinnia/python")
270 #t)))))
271 (inputs
272 `(("zinnia" ,zinnia)))))
822cd628 273