Commit | Line | Data |
---|---|---|
f99f6fd6 LC |
1 | ;;; GNU Guix --- Functional package management for GNU |
2 | ;;; Copyright © 2013 Ludovic Courtès <ludo@gnu.org> | |
04950d1f | 3 | ;;; Copyright © 2016, 2020 Efraim Flashner <efraim@flashner.co.il> |
ff8a66bc | 4 | ;;; Copyright © 2019 Tobias Geerinckx-Rice <me@tobias.gr> |
9a94ccec | 5 | ;;; Copyright © 2019 Alex Vong <alexvong1995@gmail.com> |
822cd628 | 6 | ;;; Copyright © 2021 Andy Tai <atai@atai.org> |
fee4afd8 | 7 | ;;; Copyright © 2021 Nicolas Goaziou <mail@nicolasgoaziou.fr> |
f99f6fd6 LC |
8 | ;;; |
9 | ;;; This file is part of GNU Guix. | |
10 | ;;; | |
11 | ;;; GNU Guix is free software; you can redistribute it and/or modify it | |
12 | ;;; under the terms of the GNU General Public License as published by | |
13 | ;;; the Free Software Foundation; either version 3 of the License, or (at | |
14 | ;;; your option) any later version. | |
15 | ;;; | |
16 | ;;; GNU Guix is distributed in the hope that it will be useful, but | |
17 | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | ;;; GNU General Public License for more details. | |
20 | ;;; | |
21 | ;;; You should have received a copy of the GNU General Public License | |
22 | ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. | |
23 | ||
9e617a54 | 24 | (define-module (gnu packages ocr) |
d814be32 | 25 | #:use-module ((guix licenses) #:prefix license:) |
f99f6fd6 LC |
26 | #:use-module (guix packages) |
27 | #:use-module (guix download) | |
9a94ccec | 28 | #:use-module (guix git-download) |
fee4afd8 | 29 | #:use-module (guix build-system cmake) |
f99f6fd6 | 30 | #:use-module (guix build-system gnu) |
16dd1fd0 | 31 | #:use-module (guix build-system python) |
822cd628 T |
32 | #:use-module (gnu packages) |
33 | #:use-module (gnu packages autotools) | |
34 | #:use-module (gnu packages backup) | |
35 | #:use-module (gnu packages check) | |
d814be32 | 36 | #:use-module (gnu packages compression) |
822cd628 | 37 | #:use-module (gnu packages curl) |
fee4afd8 | 38 | #:use-module (gnu packages djvu) |
822cd628 T |
39 | #:use-module (gnu packages docbook) |
40 | #:use-module (gnu packages documentation) | |
fee4afd8 NG |
41 | #:use-module (gnu packages enchant) |
42 | #:use-module (gnu packages gettext) | |
43 | #:use-module (gnu packages glib) | |
822cd628 T |
44 | #:use-module (gnu packages gtk) |
45 | #:use-module (gnu packages icu4c) | |
fee4afd8 | 46 | #:use-module (gnu packages pdf) |
822cd628 | 47 | #:use-module (gnu packages pkg-config) |
16dd1fd0 | 48 | #:use-module (gnu packages python) |
fee4afd8 NG |
49 | #:use-module (gnu packages qt) |
50 | #:use-module (gnu packages scanner) | |
822cd628 | 51 | #:use-module (gnu packages xml) |
427dda54 | 52 | #:use-module (gnu packages image)) |
f99f6fd6 LC |
53 | |
54 | (define-public ocrad | |
55 | (package | |
56 | (name "ocrad") | |
ff8a66bc | 57 | (version "0.27") |
f99f6fd6 LC |
58 | (source (origin |
59 | (method url-fetch) | |
60 | (uri (string-append "mirror://gnu/ocrad/ocrad-" | |
61 | version ".tar.lz")) | |
62 | (sha256 | |
63 | (base32 | |
ff8a66bc | 64 | "0divffvcaim89g4pvqs8kslbcxi475bcl3b4ynphf284k9zfdgx9")))) |
f99f6fd6 LC |
65 | (build-system gnu-build-system) |
66 | (native-inputs `(("lzip" ,lzip))) | |
6fd52309 | 67 | (home-page "https://www.gnu.org/software/ocrad/") |
f99f6fd6 LC |
68 | (synopsis "Optical character recognition based on feature extraction") |
69 | (description | |
79c311b8 LC |
70 | "GNU Ocrad is an optical character recognition program based on a |
71 | feature extraction method. It can read images in PBM, PGM or PPM formats and | |
72 | it produces text in 8-bit or UTF-8 formats.") | |
d814be32 TUBK |
73 | (license license:gpl3+))) |
74 | ||
75 | (define-public tesseract-ocr | |
822cd628 T |
76 | ;; There are useful commits beyond the last official stable release. |
77 | (let ((commit "97079fa353557af6df86fd20b5d2e0dff5d8d5df") | |
78 | (revision "1")) | |
79 | (package | |
80 | (name "tesseract-ocr") | |
81 | (version (git-version "4.1.1" revision commit)) | |
82 | (source | |
83 | (origin | |
84 | (method git-fetch) | |
85 | (uri (git-reference | |
86 | (url "https://github.com/tesseract-ocr/tesseract") | |
87 | (commit commit))) | |
88 | (file-name (git-file-name name version)) | |
89 | (sha256 | |
90 | (base32 | |
91 | "11137a4aaay7qp64vdjd83hz1l089nzi5a0ql0qgk8gn79pyhi98")))) | |
92 | (build-system gnu-build-system) | |
93 | (inputs | |
94 | `(("cairo" ,cairo) | |
95 | ("icu" ,icu4c) | |
96 | ("leptonica" ,leptonica) | |
97 | ("pango" ,pango) | |
98 | ("python-wrapper" ,python-wrapper))) | |
99 | (native-inputs | |
100 | `(("asciidoc" ,asciidoc) | |
101 | ("autoconf" ,autoconf) | |
102 | ("automake" ,automake) | |
103 | ("docbook-xsl" ,docbook-xsl) | |
104 | ("libarchive" ,libarchive) | |
105 | ("libcurl" ,curl) | |
106 | ("libtool" ,libtool) | |
107 | ("libtiff" ,libtiff) | |
108 | ("pkg-config" ,pkg-config) | |
109 | ("xsltproc" ,libxslt))) | |
110 | (arguments | |
111 | `(#:configure-flags | |
112 | (let ((leptonica (assoc-ref %build-inputs "leptonica"))) | |
113 | (list (string-append "LIBLEPT_HEADERSDIR=" leptonica "/include"))) | |
114 | #:tests? #f ; Tests currently result in a segfault | |
115 | #:phases | |
116 | (modify-phases %standard-phases | |
117 | (add-after 'unpack 'fix-docbook | |
118 | (lambda* (#:key inputs #:allow-other-keys) | |
119 | ;; Don't attempt to download XSL schema. | |
120 | (substitute* "doc/Makefile.am" | |
121 | (("http://docbook.sourceforge.net/release/xsl/current/manpages/docbook.xsl") | |
122 | (string-append (assoc-ref inputs "docbook-xsl") | |
123 | "/xml/xsl/docbook-xsl-" | |
124 | ,(package-version docbook-xsl) | |
125 | "/manpages/docbook.xsl"))))) | |
126 | (add-after 'install 'build-training | |
127 | (lambda _ | |
128 | (invoke "make" "training"))) | |
129 | (add-after 'build-training 'install-training | |
130 | (lambda _ | |
131 | (invoke "make" "training-install")))))) | |
132 | (home-page "https://github.com/tesseract-ocr/tesseract") | |
133 | (synopsis "Optical character recognition engine") | |
134 | (description | |
135 | "Tesseract is an optical character recognition (OCR) engine with very | |
d814be32 TUBK |
136 | high accuracy. It supports many languages, output text formatting, hOCR |
137 | positional information and page layout analysis. Several image formats are | |
138 | supported through the Leptonica library. It can also detect whether text is | |
139 | monospaced or proportional.") | |
822cd628 | 140 | (license license:asl2.0)))) |
9a94ccec | 141 | |
fee4afd8 NG |
142 | (define-public gimagereader |
143 | (package | |
144 | (name "gimagereader") | |
145 | (version "3.3.1") | |
146 | (source | |
147 | (origin | |
148 | (method url-fetch) | |
149 | (uri (string-append | |
150 | "https://github.com/manisandro/gImageReader/releases" | |
151 | "/download/v" version "/" | |
152 | "gimagereader-" version ".tar.xz")) | |
153 | (sha256 | |
154 | (base32 "1pghffb55k3wq33nbn9fi0lmjbldpmvqs2msnvss8bxz1k1ck23n")))) | |
155 | (build-system cmake-build-system) | |
156 | (arguments | |
157 | `(#:tests? #f ;no test | |
158 | #:configure-flags (list "-DENABLE_VERSIONCHECK=0") | |
159 | #:phases | |
160 | (modify-phases %standard-phases | |
161 | (add-after 'unpack 'fix-build | |
162 | ;; XXX: Prevent compilation error: "incomplete type ‘QUrl’ used in | |
163 | ;; nested name specifier". Fixed upstream as | |
164 | ;; 6209e25dab20b233e399ff36fabe4252db0f9e44. It can be removed in | |
165 | ;; release 3.3.2+. | |
166 | (lambda _ | |
167 | (with-directory-excursion "qt/src/hocr" | |
168 | (substitute* '("HOCROdtExporter.cc" "HOCRTextExporter.cc") | |
169 | (("#include <QMessageBox>\n" all) | |
170 | (string-append all "#include <QUrl>\n")))) | |
171 | #t))))) | |
172 | (native-inputs | |
173 | `(("gettext" ,gettext-minimal) | |
174 | ("intltool" ,intltool) | |
175 | ("pkg-config" ,pkg-config))) | |
176 | (inputs | |
177 | `(("enchant" ,enchant) | |
178 | ("djvulibre" ,djvulibre) | |
179 | ("leptonica" ,leptonica) | |
180 | ("podofo" ,podofo) | |
181 | ("poppler-qt5" ,poppler-qt5) | |
182 | ("sane-backends" ,sane-backends) | |
183 | ("qtbase" ,qtbase) | |
184 | ("qtspell" ,qtspell) | |
185 | ("quazip" ,quazip) | |
186 | ("tesseract" ,tesseract-ocr))) | |
187 | (home-page "https://github.com/manisandro/gImageReader") | |
188 | (synopsis "Qt front-end to tesseract-ocr") | |
189 | (description | |
190 | "gImageReader is a Qt front-end to Tesseract optical character | |
191 | recognition (OCR) software. | |
192 | ||
193 | gImageReader supports automatic page layout detection but the user can | |
194 | also manually define and adjust the recognition regions. It is | |
195 | possible to import images from disk, scanning devices, clipboard and | |
196 | screenshots. gImageReader also supports multipage PDF documents. | |
197 | Recognized text is displayed directly next to the image and basic text | |
198 | editing including search/replace and removing of line breaks is | |
199 | possible. Spellchecking for the output text is also supported if the | |
200 | corresponding dictionaries are installed.") | |
201 | (license license:gpl3+))) | |
202 | ||
9a94ccec AV |
203 | (define-public zinnia |
204 | (let* ((commit "581faa8f6f15e4a7b21964be3a5ec36265c80e5b") | |
205 | (revision "1") | |
206 | ;; version copied from 'configure.in' | |
207 | (version (git-version "0.07" revision commit))) | |
208 | (package | |
209 | (name "zinnia") | |
210 | (version version) | |
211 | (source | |
212 | (origin | |
213 | (method git-fetch) | |
214 | (uri (git-reference | |
b0e7b699 | 215 | (url "https://github.com/taku910/zinnia") |
9a94ccec AV |
216 | (commit commit))) |
217 | (sha256 | |
218 | (base32 | |
219 | "1izjy5qw6swg0rs2ym2i72zndb90mwrfbd1iv8xbpwckbm4899lg")) | |
220 | (file-name (git-file-name name version)) | |
221 | (modules '((guix build utils) | |
222 | (ice-9 ftw) | |
223 | (srfi srfi-26))) | |
224 | (snippet ; remove unnecessary files with potentially different license | |
225 | '(begin | |
226 | (for-each delete-file-recursively | |
227 | (scandir "." | |
228 | (negate (cut member <> '("zinnia" | |
229 | "." ".."))))) | |
230 | #t)))) | |
231 | (build-system gnu-build-system) | |
232 | (arguments | |
233 | `(#:phases | |
234 | (modify-phases %standard-phases | |
235 | (replace 'bootstrap | |
236 | (lambda _ | |
237 | (chdir "zinnia") | |
238 | (for-each make-file-writable | |
239 | '("config.log" "config.status")) | |
240 | #t))))) | |
241 | (home-page "https://taku910.github.io/zinnia/") | |
242 | (synopsis "Online hand recognition system with machine learning") | |
243 | (description | |
244 | "Zinnia is a simple, customizable and portable online hand recognition | |
245 | system based on Support Vector Machines. Zinnia simply receives user pen | |
246 | strokes as a sequence of coordinate data and outputs n-best characters sorted | |
247 | by SVM confidence. To keep portability, Zinnia doesn't have any rendering | |
248 | functionality. In addition to recognition, Zinnia provides training module | |
249 | that allows us to create any hand-written recognition systems with low-cost.") | |
250 | (license (list license:bsd-3 ; all files except... | |
251 | (license:non-copyleft ; some autotools related files | |
252 | "file://zinnia/aclocal.m4") | |
253 | license:x11 ; 'install-sh' | |
254 | license:public-domain))))) ; 'install-sh' | |
16dd1fd0 AV |
255 | |
256 | ;;; python 2 bindings, license under the same terms as zinnia | |
257 | (define-public python2-zinnia | |
258 | (package | |
259 | (inherit zinnia) | |
260 | (name "python2-zinnia") | |
261 | (build-system python-build-system) | |
262 | (arguments | |
263 | `(#:python ,python-2 ; CObject API is used, it was removed in Python 3.2 | |
264 | #:tests? #f ; avoid circular dependency on tegaki-zinnia-japanese | |
265 | #:phases | |
266 | (modify-phases %standard-phases | |
267 | (add-after 'unpack 'chdir | |
268 | (lambda _ | |
269 | (chdir "zinnia/python") | |
270 | #t))))) | |
271 | (inputs | |
272 | `(("zinnia" ,zinnia))))) | |
822cd628 | 273 |