Commit | Line | Data |
---|---|---|
f99f6fd6 LC |
1 | ;;; GNU Guix --- Functional package management for GNU |
2 | ;;; Copyright © 2013 Ludovic Courtès <ludo@gnu.org> | |
04950d1f | 3 | ;;; Copyright © 2016, 2020 Efraim Flashner <efraim@flashner.co.il> |
ff8a66bc | 4 | ;;; Copyright © 2019 Tobias Geerinckx-Rice <me@tobias.gr> |
9a94ccec | 5 | ;;; Copyright © 2019 Alex Vong <alexvong1995@gmail.com> |
822cd628 | 6 | ;;; Copyright © 2021 Andy Tai <atai@atai.org> |
b75b2fc3 | 7 | ;;; Copyright © 2021, 2022 Nicolas Goaziou <mail@nicolasgoaziou.fr> |
f99f6fd6 LC |
8 | ;;; |
9 | ;;; This file is part of GNU Guix. | |
10 | ;;; | |
11 | ;;; GNU Guix is free software; you can redistribute it and/or modify it | |
12 | ;;; under the terms of the GNU General Public License as published by | |
13 | ;;; the Free Software Foundation; either version 3 of the License, or (at | |
14 | ;;; your option) any later version. | |
15 | ;;; | |
16 | ;;; GNU Guix is distributed in the hope that it will be useful, but | |
17 | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | ;;; GNU General Public License for more details. | |
20 | ;;; | |
21 | ;;; You should have received a copy of the GNU General Public License | |
22 | ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. | |
23 | ||
9e617a54 | 24 | (define-module (gnu packages ocr) |
d814be32 | 25 | #:use-module ((guix licenses) #:prefix license:) |
f99f6fd6 LC |
26 | #:use-module (guix packages) |
27 | #:use-module (guix download) | |
fda21800 | 28 | #:use-module (guix gexp) |
9a94ccec | 29 | #:use-module (guix git-download) |
fee4afd8 | 30 | #:use-module (guix build-system cmake) |
f99f6fd6 | 31 | #:use-module (guix build-system gnu) |
16dd1fd0 | 32 | #:use-module (guix build-system python) |
822cd628 T |
33 | #:use-module (gnu packages) |
34 | #:use-module (gnu packages autotools) | |
35 | #:use-module (gnu packages backup) | |
36 | #:use-module (gnu packages check) | |
d814be32 | 37 | #:use-module (gnu packages compression) |
822cd628 | 38 | #:use-module (gnu packages curl) |
fee4afd8 | 39 | #:use-module (gnu packages djvu) |
822cd628 T |
40 | #:use-module (gnu packages docbook) |
41 | #:use-module (gnu packages documentation) | |
fee4afd8 NG |
42 | #:use-module (gnu packages enchant) |
43 | #:use-module (gnu packages gettext) | |
44 | #:use-module (gnu packages glib) | |
822cd628 T |
45 | #:use-module (gnu packages gtk) |
46 | #:use-module (gnu packages icu4c) | |
fee4afd8 | 47 | #:use-module (gnu packages pdf) |
822cd628 | 48 | #:use-module (gnu packages pkg-config) |
16dd1fd0 | 49 | #:use-module (gnu packages python) |
fee4afd8 NG |
50 | #:use-module (gnu packages qt) |
51 | #:use-module (gnu packages scanner) | |
822cd628 | 52 | #:use-module (gnu packages xml) |
427dda54 | 53 | #:use-module (gnu packages image)) |
f99f6fd6 LC |
54 | |
55 | (define-public ocrad | |
56 | (package | |
57 | (name "ocrad") | |
ff8a66bc | 58 | (version "0.27") |
f99f6fd6 LC |
59 | (source (origin |
60 | (method url-fetch) | |
61 | (uri (string-append "mirror://gnu/ocrad/ocrad-" | |
62 | version ".tar.lz")) | |
63 | (sha256 | |
64 | (base32 | |
ff8a66bc | 65 | "0divffvcaim89g4pvqs8kslbcxi475bcl3b4ynphf284k9zfdgx9")))) |
f99f6fd6 | 66 | (build-system gnu-build-system) |
8394619b | 67 | (native-inputs (list lzip)) |
6fd52309 | 68 | (home-page "https://www.gnu.org/software/ocrad/") |
f99f6fd6 LC |
69 | (synopsis "Optical character recognition based on feature extraction") |
70 | (description | |
79c311b8 LC |
71 | "GNU Ocrad is an optical character recognition program based on a |
72 | feature extraction method. It can read images in PBM, PGM or PPM formats and | |
73 | it produces text in 8-bit or UTF-8 formats.") | |
d814be32 TUBK |
74 | (license license:gpl3+))) |
75 | ||
76 | (define-public tesseract-ocr | |
822cd628 T |
77 | ;; There are useful commits beyond the last official stable release. |
78 | (let ((commit "97079fa353557af6df86fd20b5d2e0dff5d8d5df") | |
79 | (revision "1")) | |
80 | (package | |
81 | (name "tesseract-ocr") | |
82 | (version (git-version "4.1.1" revision commit)) | |
83 | (source | |
84 | (origin | |
85 | (method git-fetch) | |
86 | (uri (git-reference | |
87 | (url "https://github.com/tesseract-ocr/tesseract") | |
88 | (commit commit))) | |
89 | (file-name (git-file-name name version)) | |
90 | (sha256 | |
91 | (base32 | |
92 | "11137a4aaay7qp64vdjd83hz1l089nzi5a0ql0qgk8gn79pyhi98")))) | |
93 | (build-system gnu-build-system) | |
94 | (inputs | |
95 | `(("cairo" ,cairo) | |
96 | ("icu" ,icu4c) | |
97 | ("leptonica" ,leptonica) | |
98 | ("pango" ,pango) | |
99 | ("python-wrapper" ,python-wrapper))) | |
100 | (native-inputs | |
101 | `(("asciidoc" ,asciidoc) | |
102 | ("autoconf" ,autoconf) | |
103 | ("automake" ,automake) | |
104 | ("docbook-xsl" ,docbook-xsl) | |
105 | ("libarchive" ,libarchive) | |
106 | ("libcurl" ,curl) | |
107 | ("libtool" ,libtool) | |
108 | ("libtiff" ,libtiff) | |
109 | ("pkg-config" ,pkg-config) | |
110 | ("xsltproc" ,libxslt))) | |
111 | (arguments | |
112 | `(#:configure-flags | |
113 | (let ((leptonica (assoc-ref %build-inputs "leptonica"))) | |
114 | (list (string-append "LIBLEPT_HEADERSDIR=" leptonica "/include"))) | |
115 | #:tests? #f ; Tests currently result in a segfault | |
116 | #:phases | |
117 | (modify-phases %standard-phases | |
118 | (add-after 'unpack 'fix-docbook | |
119 | (lambda* (#:key inputs #:allow-other-keys) | |
120 | ;; Don't attempt to download XSL schema. | |
121 | (substitute* "doc/Makefile.am" | |
122 | (("http://docbook.sourceforge.net/release/xsl/current/manpages/docbook.xsl") | |
123 | (string-append (assoc-ref inputs "docbook-xsl") | |
124 | "/xml/xsl/docbook-xsl-" | |
125 | ,(package-version docbook-xsl) | |
126 | "/manpages/docbook.xsl"))))) | |
127 | (add-after 'install 'build-training | |
128 | (lambda _ | |
129 | (invoke "make" "training"))) | |
130 | (add-after 'build-training 'install-training | |
131 | (lambda _ | |
132 | (invoke "make" "training-install")))))) | |
133 | (home-page "https://github.com/tesseract-ocr/tesseract") | |
134 | (synopsis "Optical character recognition engine") | |
135 | (description | |
136 | "Tesseract is an optical character recognition (OCR) engine with very | |
d814be32 TUBK |
137 | high accuracy. It supports many languages, output text formatting, hOCR |
138 | positional information and page layout analysis. Several image formats are | |
139 | supported through the Leptonica library. It can also detect whether text is | |
140 | monospaced or proportional.") | |
822cd628 | 141 | (license license:asl2.0)))) |
9a94ccec | 142 | |
fee4afd8 NG |
143 | (define-public gimagereader |
144 | (package | |
145 | (name "gimagereader") | |
b75b2fc3 | 146 | (version "3.4.0") |
fee4afd8 NG |
147 | (source |
148 | (origin | |
149 | (method url-fetch) | |
150 | (uri (string-append | |
151 | "https://github.com/manisandro/gImageReader/releases" | |
152 | "/download/v" version "/" | |
153 | "gimagereader-" version ".tar.xz")) | |
154 | (sha256 | |
b75b2fc3 | 155 | (base32 "09glxh7b4ivrd4samm67b8k2p0aljiagr83wb8nvy5ps2a9gwp5m")))) |
fee4afd8 NG |
156 | (build-system cmake-build-system) |
157 | (arguments | |
fda21800 NG |
158 | (list |
159 | #:tests? #f ;no test | |
160 | #:configure-flags #~(list "-DENABLE_VERSIONCHECK=0"))) | |
fee4afd8 | 161 | (native-inputs |
b73d8f07 | 162 | (list gettext-minimal intltool pkg-config)) |
fee4afd8 | 163 | (inputs |
b73d8f07 NG |
164 | (list enchant |
165 | djvulibre | |
166 | leptonica | |
167 | podofo | |
168 | poppler-qt5 | |
169 | sane-backends | |
170 | qtbase-5 | |
171 | qtspell | |
172 | quazip-0 | |
173 | tesseract-ocr)) | |
fee4afd8 NG |
174 | (home-page "https://github.com/manisandro/gImageReader") |
175 | (synopsis "Qt front-end to tesseract-ocr") | |
176 | (description | |
177 | "gImageReader is a Qt front-end to Tesseract optical character | |
178 | recognition (OCR) software. | |
179 | ||
180 | gImageReader supports automatic page layout detection but the user can | |
181 | also manually define and adjust the recognition regions. It is | |
182 | possible to import images from disk, scanning devices, clipboard and | |
183 | screenshots. gImageReader also supports multipage PDF documents. | |
184 | Recognized text is displayed directly next to the image and basic text | |
185 | editing including search/replace and removing of line breaks is | |
186 | possible. Spellchecking for the output text is also supported if the | |
187 | corresponding dictionaries are installed.") | |
188 | (license license:gpl3+))) | |
189 | ||
9a94ccec AV |
190 | (define-public zinnia |
191 | (let* ((commit "581faa8f6f15e4a7b21964be3a5ec36265c80e5b") | |
192 | (revision "1") | |
193 | ;; version copied from 'configure.in' | |
194 | (version (git-version "0.07" revision commit))) | |
195 | (package | |
196 | (name "zinnia") | |
197 | (version version) | |
198 | (source | |
199 | (origin | |
200 | (method git-fetch) | |
201 | (uri (git-reference | |
b0e7b699 | 202 | (url "https://github.com/taku910/zinnia") |
9a94ccec AV |
203 | (commit commit))) |
204 | (sha256 | |
205 | (base32 | |
206 | "1izjy5qw6swg0rs2ym2i72zndb90mwrfbd1iv8xbpwckbm4899lg")) | |
207 | (file-name (git-file-name name version)) | |
208 | (modules '((guix build utils) | |
209 | (ice-9 ftw) | |
210 | (srfi srfi-26))) | |
211 | (snippet ; remove unnecessary files with potentially different license | |
212 | '(begin | |
213 | (for-each delete-file-recursively | |
214 | (scandir "." | |
215 | (negate (cut member <> '("zinnia" | |
216 | "." ".."))))) | |
217 | #t)))) | |
218 | (build-system gnu-build-system) | |
219 | (arguments | |
220 | `(#:phases | |
221 | (modify-phases %standard-phases | |
222 | (replace 'bootstrap | |
223 | (lambda _ | |
224 | (chdir "zinnia") | |
225 | (for-each make-file-writable | |
226 | '("config.log" "config.status")) | |
227 | #t))))) | |
228 | (home-page "https://taku910.github.io/zinnia/") | |
229 | (synopsis "Online hand recognition system with machine learning") | |
230 | (description | |
231 | "Zinnia is a simple, customizable and portable online hand recognition | |
232 | system based on Support Vector Machines. Zinnia simply receives user pen | |
233 | strokes as a sequence of coordinate data and outputs n-best characters sorted | |
234 | by SVM confidence. To keep portability, Zinnia doesn't have any rendering | |
235 | functionality. In addition to recognition, Zinnia provides training module | |
236 | that allows us to create any hand-written recognition systems with low-cost.") | |
237 | (license (list license:bsd-3 ; all files except... | |
238 | (license:non-copyleft ; some autotools related files | |
239 | "file://zinnia/aclocal.m4") | |
240 | license:x11 ; 'install-sh' | |
241 | license:public-domain))))) ; 'install-sh' | |
16dd1fd0 | 242 | |
822cd628 | 243 |