;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2013 Ludovic Courtès <ludo@gnu.org>
-;;; Copyright © 2016 Efraim Flashner <efraim@flashner.co.il>
+;;; Copyright © 2016, 2020 Efraim Flashner <efraim@flashner.co.il>
+;;; Copyright © 2019 Tobias Geerinckx-Rice <me@tobias.gr>
+;;; Copyright © 2019 Alex Vong <alexvong1995@gmail.com>
+;;; Copyright © 2021 Andy Tai <atai@atai.org>
+;;; Copyright © 2021 Nicolas Goaziou <mail@nicolasgoaziou.fr>
;;;
;;; This file is part of GNU Guix.
;;;
#:use-module ((guix licenses) #:prefix license:)
#:use-module (guix packages)
#:use-module (guix download)
+ #:use-module (guix git-download)
+ #:use-module (guix build-system cmake)
#:use-module (guix build-system gnu)
+ #:use-module (guix build-system python)
+ #:use-module (gnu packages)
+ #:use-module (gnu packages autotools)
+ #:use-module (gnu packages backup)
+ #:use-module (gnu packages check)
#:use-module (gnu packages compression)
+ #:use-module (gnu packages curl)
+ #:use-module (gnu packages djvu)
+ #:use-module (gnu packages docbook)
+ #:use-module (gnu packages documentation)
+ #:use-module (gnu packages enchant)
+ #:use-module (gnu packages gettext)
+ #:use-module (gnu packages glib)
+ #:use-module (gnu packages gtk)
+ #:use-module (gnu packages icu4c)
+ #:use-module (gnu packages pdf)
+ #:use-module (gnu packages pkg-config)
+ #:use-module (gnu packages python)
+ #:use-module (gnu packages qt)
+ #:use-module (gnu packages scanner)
+ #:use-module (gnu packages xml)
#:use-module (gnu packages image))
(define-public ocrad
(package
(name "ocrad")
- (version "0.25")
+ (version "0.27")
(source (origin
(method url-fetch)
(uri (string-append "mirror://gnu/ocrad/ocrad-"
version ".tar.lz"))
(sha256
(base32
- "1m2dblgvvjs48rsglfdwq0ib9zk8h9n34xsh67ibrg0g0ffbw477"))))
+ "0divffvcaim89g4pvqs8kslbcxi475bcl3b4ynphf284k9zfdgx9"))))
(build-system gnu-build-system)
(native-inputs `(("lzip" ,lzip)))
- (home-page "http://www.gnu.org/software/ocrad/")
+ (home-page "https://www.gnu.org/software/ocrad/")
(synopsis "Optical character recognition based on feature extraction")
(description
"GNU Ocrad is an optical character recognition program based on a
(license license:gpl3+)))
(define-public tesseract-ocr
+ ;; There are useful commits beyond the last official stable release.
+ (let ((commit "97079fa353557af6df86fd20b5d2e0dff5d8d5df")
+ (revision "1"))
+ (package
+ (name "tesseract-ocr")
+ (version (git-version "4.1.1" revision commit))
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/tesseract-ocr/tesseract")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "11137a4aaay7qp64vdjd83hz1l089nzi5a0ql0qgk8gn79pyhi98"))))
+ (build-system gnu-build-system)
+ (inputs
+ `(("cairo" ,cairo)
+ ("icu" ,icu4c)
+ ("leptonica" ,leptonica)
+ ("pango" ,pango)
+ ("python-wrapper" ,python-wrapper)))
+ (native-inputs
+ `(("asciidoc" ,asciidoc)
+ ("autoconf" ,autoconf)
+ ("automake" ,automake)
+ ("docbook-xsl" ,docbook-xsl)
+ ("libarchive" ,libarchive)
+ ("libcurl" ,curl)
+ ("libtool" ,libtool)
+ ("libtiff" ,libtiff)
+ ("pkg-config" ,pkg-config)
+ ("xsltproc" ,libxslt)))
+ (arguments
+ `(#:configure-flags
+ (let ((leptonica (assoc-ref %build-inputs "leptonica")))
+ (list (string-append "LIBLEPT_HEADERSDIR=" leptonica "/include")))
+ #:tests? #f ; Tests currently result in a segfault
+ #:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'fix-docbook
+ (lambda* (#:key inputs #:allow-other-keys)
+ ;; Don't attempt to download XSL schema.
+ (substitute* "doc/Makefile.am"
+ (("http://docbook.sourceforge.net/release/xsl/current/manpages/docbook.xsl")
+ (string-append (assoc-ref inputs "docbook-xsl")
+ "/xml/xsl/docbook-xsl-"
+ ,(package-version docbook-xsl)
+ "/manpages/docbook.xsl")))))
+ (add-after 'install 'build-training
+ (lambda _
+ (invoke "make" "training")))
+ (add-after 'build-training 'install-training
+ (lambda _
+ (invoke "make" "training-install"))))))
+ (home-page "https://github.com/tesseract-ocr/tesseract")
+ (synopsis "Optical character recognition engine")
+ (description
+ "Tesseract is an optical character recognition (OCR) engine with very
+high accuracy. It supports many languages, output text formatting, hOCR
+positional information and page layout analysis. Several image formats are
+supported through the Leptonica library. It can also detect whether text is
+monospaced or proportional.")
+ (license license:asl2.0))))
+
+(define-public gimagereader
(package
- (name "tesseract-ocr")
- (version "3.04.01")
+ (name "gimagereader")
+ (version "3.3.1")
(source
(origin
(method url-fetch)
(uri (string-append
- "https://github.com/tesseract-ocr/tesseract/archive/"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
+ "https://github.com/manisandro/gImageReader/releases"
+ "/download/v" version "/"
+ "gimagereader-" version ".tar.xz"))
(sha256
- (base32 "0snwd8as5i8vx7zkimpd2yg898jl96zf90r65a9w615f2hdkxxjp"))))
- (build-system gnu-build-system)
- (inputs
- `(("leptonica" ,leptonica)))
+ (base32 "1pghffb55k3wq33nbn9fi0lmjbldpmvqs2msnvss8bxz1k1ck23n"))))
+ (build-system cmake-build-system)
(arguments
- '(#:configure-flags
- (let ((leptonica (assoc-ref %build-inputs "leptonica")))
- (list (string-append "LIBLEPT_HEADERSDIR=" leptonica "/include")))))
- (home-page "https://github.com/tesseract-ocr")
- (synopsis "Optical character recognition engine")
+ `(#:tests? #f ;no test
+ #:configure-flags (list "-DENABLE_VERSIONCHECK=0")
+ #:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'fix-build
+ ;; XXX: Prevent compilation error: "incomplete type ‘QUrl’ used in
+ ;; nested name specifier". Fixed upstream as
+ ;; 6209e25dab20b233e399ff36fabe4252db0f9e44. It can be removed in
+ ;; release 3.3.2+.
+ (lambda _
+ (with-directory-excursion "qt/src/hocr"
+ (substitute* '("HOCROdtExporter.cc" "HOCRTextExporter.cc")
+ (("#include <QMessageBox>\n" all)
+ (string-append all "#include <QUrl>\n"))))
+ #t)))))
+ (native-inputs
+ `(("gettext" ,gettext-minimal)
+ ("intltool" ,intltool)
+ ("pkg-config" ,pkg-config)))
+ (inputs
+ `(("enchant" ,enchant)
+ ("djvulibre" ,djvulibre)
+ ("leptonica" ,leptonica)
+ ("podofo" ,podofo)
+ ("poppler-qt5" ,poppler-qt5)
+ ("sane-backends" ,sane-backends)
+ ("qtbase" ,qtbase-5)
+ ("qtspell" ,qtspell)
+ ("quazip" ,quazip)
+ ("tesseract" ,tesseract-ocr)))
+ (home-page "https://github.com/manisandro/gImageReader")
+ (synopsis "Qt front-end to tesseract-ocr")
(description
- "Tesseract is an optical character recognition (OCR) engine with very
-high accuracy. It supports many languages, output text formatting, hOCR
-positional information and page layout analysis. Several image formats are
-supported through the Leptonica library. It can also detect whether text is
-monospaced or proportional.")
- (license license:asl2.0)))
+ "gImageReader is a Qt front-end to Tesseract optical character
+recognition (OCR) software.
+
+gImageReader supports automatic page layout detection but the user can
+also manually define and adjust the recognition regions. It is
+possible to import images from disk, scanning devices, clipboard and
+screenshots. gImageReader also supports multipage PDF documents.
+Recognized text is displayed directly next to the image and basic text
+editing including search/replace and removing of line breaks is
+possible. Spellchecking for the output text is also supported if the
+corresponding dictionaries are installed.")
+ (license license:gpl3+)))
+
+(define-public zinnia
+ (let* ((commit "581faa8f6f15e4a7b21964be3a5ec36265c80e5b")
+ (revision "1")
+ ;; version copied from 'configure.in'
+ (version (git-version "0.07" revision commit)))
+ (package
+ (name "zinnia")
+ (version version)
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/taku910/zinnia")
+ (commit commit)))
+ (sha256
+ (base32
+ "1izjy5qw6swg0rs2ym2i72zndb90mwrfbd1iv8xbpwckbm4899lg"))
+ (file-name (git-file-name name version))
+ (modules '((guix build utils)
+ (ice-9 ftw)
+ (srfi srfi-26)))
+ (snippet ; remove unnecessary files with potentially different license
+ '(begin
+ (for-each delete-file-recursively
+ (scandir "."
+ (negate (cut member <> '("zinnia"
+ "." "..")))))
+ #t))))
+ (build-system gnu-build-system)
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (replace 'bootstrap
+ (lambda _
+ (chdir "zinnia")
+ (for-each make-file-writable
+ '("config.log" "config.status"))
+ #t)))))
+ (home-page "https://taku910.github.io/zinnia/")
+ (synopsis "Online hand recognition system with machine learning")
+ (description
+ "Zinnia is a simple, customizable and portable online hand recognition
+system based on Support Vector Machines. Zinnia simply receives user pen
+strokes as a sequence of coordinate data and outputs n-best characters sorted
+by SVM confidence. To keep portability, Zinnia doesn't have any rendering
+functionality. In addition to recognition, Zinnia provides training module
+that allows us to create any hand-written recognition systems with low-cost.")
+ (license (list license:bsd-3 ; all files except...
+ (license:non-copyleft ; some autotools related files
+ "file://zinnia/aclocal.m4")
+ license:x11 ; 'install-sh'
+ license:public-domain))))) ; 'install-sh'
+
+;;; python 2 bindings, license under the same terms as zinnia
+(define-public python2-zinnia
+ (package
+ (inherit zinnia)
+ (name "python2-zinnia")
+ (build-system python-build-system)
+ (arguments
+ `(#:python ,python-2 ; CObject API is used, it was removed in Python 3.2
+ #:tests? #f ; avoid circular dependency on tegaki-zinnia-japanese
+ #:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'chdir
+ (lambda _
+ (chdir "zinnia/python")
+ #t)))))
+ (inputs
+ `(("zinnia" ,zinnia)))))
+