gnu: emacs-helm: Update to 3.8.7.
[jackhill/guix/guix.git] / gnu / packages / ocr.scm
CommitLineData
f99f6fd6
LC
1;;; GNU Guix --- Functional package management for GNU
2;;; Copyright © 2013 Ludovic Courtès <ludo@gnu.org>
04950d1f 3;;; Copyright © 2016, 2020 Efraim Flashner <efraim@flashner.co.il>
ff8a66bc 4;;; Copyright © 2019 Tobias Geerinckx-Rice <me@tobias.gr>
9a94ccec 5;;; Copyright © 2019 Alex Vong <alexvong1995@gmail.com>
822cd628 6;;; Copyright © 2021 Andy Tai <atai@atai.org>
b75b2fc3 7;;; Copyright © 2021, 2022 Nicolas Goaziou <mail@nicolasgoaziou.fr>
f99f6fd6
LC
8;;;
9;;; This file is part of GNU Guix.
10;;;
11;;; GNU Guix is free software; you can redistribute it and/or modify it
12;;; under the terms of the GNU General Public License as published by
13;;; the Free Software Foundation; either version 3 of the License, or (at
14;;; your option) any later version.
15;;;
16;;; GNU Guix is distributed in the hope that it will be useful, but
17;;; WITHOUT ANY WARRANTY; without even the implied warranty of
18;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19;;; GNU General Public License for more details.
20;;;
21;;; You should have received a copy of the GNU General Public License
22;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
23
9e617a54 24(define-module (gnu packages ocr)
d814be32 25 #:use-module ((guix licenses) #:prefix license:)
f99f6fd6
LC
26 #:use-module (guix packages)
27 #:use-module (guix download)
fda21800 28 #:use-module (guix gexp)
9a94ccec 29 #:use-module (guix git-download)
fee4afd8 30 #:use-module (guix build-system cmake)
f99f6fd6 31 #:use-module (guix build-system gnu)
16dd1fd0 32 #:use-module (guix build-system python)
822cd628
T
33 #:use-module (gnu packages)
34 #:use-module (gnu packages autotools)
35 #:use-module (gnu packages backup)
36 #:use-module (gnu packages check)
d814be32 37 #:use-module (gnu packages compression)
822cd628 38 #:use-module (gnu packages curl)
fee4afd8 39 #:use-module (gnu packages djvu)
822cd628
T
40 #:use-module (gnu packages docbook)
41 #:use-module (gnu packages documentation)
fee4afd8
NG
42 #:use-module (gnu packages enchant)
43 #:use-module (gnu packages gettext)
44 #:use-module (gnu packages glib)
822cd628
T
45 #:use-module (gnu packages gtk)
46 #:use-module (gnu packages icu4c)
fee4afd8 47 #:use-module (gnu packages pdf)
822cd628 48 #:use-module (gnu packages pkg-config)
16dd1fd0 49 #:use-module (gnu packages python)
fee4afd8
NG
50 #:use-module (gnu packages qt)
51 #:use-module (gnu packages scanner)
822cd628 52 #:use-module (gnu packages xml)
427dda54 53 #:use-module (gnu packages image))
f99f6fd6
LC
54
55(define-public ocrad
56 (package
57 (name "ocrad")
ff8a66bc 58 (version "0.27")
f99f6fd6
LC
59 (source (origin
60 (method url-fetch)
61 (uri (string-append "mirror://gnu/ocrad/ocrad-"
62 version ".tar.lz"))
63 (sha256
64 (base32
ff8a66bc 65 "0divffvcaim89g4pvqs8kslbcxi475bcl3b4ynphf284k9zfdgx9"))))
f99f6fd6 66 (build-system gnu-build-system)
8394619b 67 (native-inputs (list lzip))
6fd52309 68 (home-page "https://www.gnu.org/software/ocrad/")
f99f6fd6
LC
69 (synopsis "Optical character recognition based on feature extraction")
70 (description
79c311b8
LC
71 "GNU Ocrad is an optical character recognition program based on a
72feature extraction method. It can read images in PBM, PGM or PPM formats and
73it produces text in 8-bit or UTF-8 formats.")
d814be32
TUBK
74 (license license:gpl3+)))
75
76(define-public tesseract-ocr
822cd628
T
77 ;; There are useful commits beyond the last official stable release.
78 (let ((commit "97079fa353557af6df86fd20b5d2e0dff5d8d5df")
79 (revision "1"))
80 (package
81 (name "tesseract-ocr")
82 (version (git-version "4.1.1" revision commit))
83 (source
84 (origin
85 (method git-fetch)
86 (uri (git-reference
87 (url "https://github.com/tesseract-ocr/tesseract")
88 (commit commit)))
89 (file-name (git-file-name name version))
90 (sha256
91 (base32
92 "11137a4aaay7qp64vdjd83hz1l089nzi5a0ql0qgk8gn79pyhi98"))))
93 (build-system gnu-build-system)
94 (inputs
95 `(("cairo" ,cairo)
96 ("icu" ,icu4c)
97 ("leptonica" ,leptonica)
98 ("pango" ,pango)
99 ("python-wrapper" ,python-wrapper)))
100 (native-inputs
101 `(("asciidoc" ,asciidoc)
102 ("autoconf" ,autoconf)
103 ("automake" ,automake)
104 ("docbook-xsl" ,docbook-xsl)
105 ("libarchive" ,libarchive)
106 ("libcurl" ,curl)
107 ("libtool" ,libtool)
108 ("libtiff" ,libtiff)
109 ("pkg-config" ,pkg-config)
110 ("xsltproc" ,libxslt)))
111 (arguments
112 `(#:configure-flags
113 (let ((leptonica (assoc-ref %build-inputs "leptonica")))
114 (list (string-append "LIBLEPT_HEADERSDIR=" leptonica "/include")))
115 #:tests? #f ; Tests currently result in a segfault
116 #:phases
117 (modify-phases %standard-phases
118 (add-after 'unpack 'fix-docbook
119 (lambda* (#:key inputs #:allow-other-keys)
120 ;; Don't attempt to download XSL schema.
121 (substitute* "doc/Makefile.am"
122 (("http://docbook.sourceforge.net/release/xsl/current/manpages/docbook.xsl")
123 (string-append (assoc-ref inputs "docbook-xsl")
124 "/xml/xsl/docbook-xsl-"
125 ,(package-version docbook-xsl)
126 "/manpages/docbook.xsl")))))
127 (add-after 'install 'build-training
128 (lambda _
129 (invoke "make" "training")))
130 (add-after 'build-training 'install-training
131 (lambda _
132 (invoke "make" "training-install"))))))
133 (home-page "https://github.com/tesseract-ocr/tesseract")
134 (synopsis "Optical character recognition engine")
135 (description
136 "Tesseract is an optical character recognition (OCR) engine with very
d814be32
TUBK
137high accuracy. It supports many languages, output text formatting, hOCR
138positional information and page layout analysis. Several image formats are
139supported through the Leptonica library. It can also detect whether text is
140monospaced or proportional.")
822cd628 141 (license license:asl2.0))))
9a94ccec 142
fee4afd8
NG
143(define-public gimagereader
144 (package
145 (name "gimagereader")
b75b2fc3 146 (version "3.4.0")
fee4afd8
NG
147 (source
148 (origin
149 (method url-fetch)
150 (uri (string-append
151 "https://github.com/manisandro/gImageReader/releases"
152 "/download/v" version "/"
153 "gimagereader-" version ".tar.xz"))
154 (sha256
b75b2fc3 155 (base32 "09glxh7b4ivrd4samm67b8k2p0aljiagr83wb8nvy5ps2a9gwp5m"))))
fee4afd8
NG
156 (build-system cmake-build-system)
157 (arguments
fda21800
NG
158 (list
159 #:tests? #f ;no test
160 #:configure-flags #~(list "-DENABLE_VERSIONCHECK=0")))
fee4afd8 161 (native-inputs
b73d8f07 162 (list gettext-minimal intltool pkg-config))
fee4afd8 163 (inputs
b73d8f07
NG
164 (list enchant
165 djvulibre
166 leptonica
167 podofo
168 poppler-qt5
169 sane-backends
170 qtbase-5
171 qtspell
172 quazip-0
173 tesseract-ocr))
fee4afd8
NG
174 (home-page "https://github.com/manisandro/gImageReader")
175 (synopsis "Qt front-end to tesseract-ocr")
176 (description
177 "gImageReader is a Qt front-end to Tesseract optical character
178recognition (OCR) software.
179
180gImageReader supports automatic page layout detection but the user can
181also manually define and adjust the recognition regions. It is
182possible to import images from disk, scanning devices, clipboard and
183screenshots. gImageReader also supports multipage PDF documents.
184Recognized text is displayed directly next to the image and basic text
185editing including search/replace and removing of line breaks is
186possible. Spellchecking for the output text is also supported if the
187corresponding dictionaries are installed.")
188 (license license:gpl3+)))
189
9a94ccec
AV
190(define-public zinnia
191 (let* ((commit "581faa8f6f15e4a7b21964be3a5ec36265c80e5b")
192 (revision "1")
193 ;; version copied from 'configure.in'
194 (version (git-version "0.07" revision commit)))
195 (package
196 (name "zinnia")
197 (version version)
198 (source
199 (origin
200 (method git-fetch)
201 (uri (git-reference
b0e7b699 202 (url "https://github.com/taku910/zinnia")
9a94ccec
AV
203 (commit commit)))
204 (sha256
205 (base32
206 "1izjy5qw6swg0rs2ym2i72zndb90mwrfbd1iv8xbpwckbm4899lg"))
207 (file-name (git-file-name name version))
208 (modules '((guix build utils)
209 (ice-9 ftw)
210 (srfi srfi-26)))
211 (snippet ; remove unnecessary files with potentially different license
212 '(begin
213 (for-each delete-file-recursively
214 (scandir "."
215 (negate (cut member <> '("zinnia"
216 "." "..")))))
217 #t))))
218 (build-system gnu-build-system)
219 (arguments
220 `(#:phases
221 (modify-phases %standard-phases
222 (replace 'bootstrap
223 (lambda _
224 (chdir "zinnia")
225 (for-each make-file-writable
226 '("config.log" "config.status"))
227 #t)))))
228 (home-page "https://taku910.github.io/zinnia/")
229 (synopsis "Online hand recognition system with machine learning")
230 (description
231 "Zinnia is a simple, customizable and portable online hand recognition
232system based on Support Vector Machines. Zinnia simply receives user pen
233strokes as a sequence of coordinate data and outputs n-best characters sorted
234by SVM confidence. To keep portability, Zinnia doesn't have any rendering
235functionality. In addition to recognition, Zinnia provides training module
236that allows us to create any hand-written recognition systems with low-cost.")
237 (license (list license:bsd-3 ; all files except...
238 (license:non-copyleft ; some autotools related files
239 "file://zinnia/aclocal.m4")
240 license:x11 ; 'install-sh'
241 license:public-domain))))) ; 'install-sh'
16dd1fd0 242
822cd628 243