gnu: tesseract-ocr: Update to 5.2.0.
* gnu/packages/ocr.scm (tesseract-ocr): Update to 5.2.0. [inputs, native-inputs]: Move after arguments. Use new style inputs. [arguments]: Use gexps. [configure-flags]: Add --disable-static. [phases]{fix-docbook}: Replace phase with... {do-not-override-xml-catalog-files}: ... this new phase. {build-training}: Move after build phase. Enable parallel build. {trailing-install}: Move after install phase. [native-inputs]: Add libxml2.
parent
887dbf4d80
commit
f7c027617d
|
@ -5,6 +5,7 @@
|
|||
;;; Copyright © 2019 Alex Vong <alexvong1995@gmail.com>
|
||||
;;; Copyright © 2021 Andy Tai <atai@atai.org>
|
||||
;;; Copyright © 2021, 2022 Nicolas Goaziou <mail@nicolasgoaziou.fr>
|
||||
;;; Copyright © 2022 Maxim Cournoyer <maxim.cournoyer@gmail.com>
|
||||
;;;
|
||||
;;; This file is part of GNU Guix.
|
||||
;;;
|
||||
|
@ -74,71 +75,72 @@ it produces text in 8-bit or UTF-8 formats.")
|
|||
(license license:gpl3+)))
|
||||
|
||||
(define-public tesseract-ocr
|
||||
;; There are useful commits beyond the last official stable release.
|
||||
(let ((commit "97079fa353557af6df86fd20b5d2e0dff5d8d5df")
|
||||
(revision "1"))
|
||||
(package
|
||||
(name "tesseract-ocr")
|
||||
(version (git-version "4.1.1" revision commit))
|
||||
(source
|
||||
(origin
|
||||
(method git-fetch)
|
||||
(uri (git-reference
|
||||
(url "https://github.com/tesseract-ocr/tesseract")
|
||||
(commit commit)))
|
||||
(file-name (git-file-name name version))
|
||||
(sha256
|
||||
(base32
|
||||
"11137a4aaay7qp64vdjd83hz1l089nzi5a0ql0qgk8gn79pyhi98"))))
|
||||
(build-system gnu-build-system)
|
||||
(inputs
|
||||
`(("cairo" ,cairo)
|
||||
("icu" ,icu4c)
|
||||
("leptonica" ,leptonica)
|
||||
("pango" ,pango)
|
||||
("python-wrapper" ,python-wrapper)))
|
||||
(native-inputs
|
||||
`(("asciidoc" ,asciidoc)
|
||||
("autoconf" ,autoconf)
|
||||
("automake" ,automake)
|
||||
("docbook-xsl" ,docbook-xsl)
|
||||
("libarchive" ,libarchive)
|
||||
("libcurl" ,curl)
|
||||
("libtool" ,libtool)
|
||||
("libtiff" ,libtiff)
|
||||
("pkg-config" ,pkg-config)
|
||||
("xsltproc" ,libxslt)))
|
||||
(arguments
|
||||
`(#:configure-flags
|
||||
(let ((leptonica (assoc-ref %build-inputs "leptonica")))
|
||||
(list (string-append "LIBLEPT_HEADERSDIR=" leptonica "/include")))
|
||||
#:tests? #f ; Tests currently result in a segfault
|
||||
#:phases
|
||||
(modify-phases %standard-phases
|
||||
(add-after 'unpack 'fix-docbook
|
||||
(lambda* (#:key inputs #:allow-other-keys)
|
||||
;; Don't attempt to download XSL schema.
|
||||
(substitute* "doc/Makefile.am"
|
||||
(("http://docbook.sourceforge.net/release/xsl/current/manpages/docbook.xsl")
|
||||
(string-append (assoc-ref inputs "docbook-xsl")
|
||||
"/xml/xsl/docbook-xsl-"
|
||||
,(package-version docbook-xsl)
|
||||
"/manpages/docbook.xsl")))))
|
||||
(add-after 'install 'build-training
|
||||
(lambda _
|
||||
(invoke "make" "training")))
|
||||
(add-after 'build-training 'install-training
|
||||
(lambda _
|
||||
(invoke "make" "training-install"))))))
|
||||
(home-page "https://github.com/tesseract-ocr/tesseract")
|
||||
(synopsis "Optical character recognition engine")
|
||||
(description
|
||||
"Tesseract is an optical character recognition (OCR) engine with very
|
||||
(package
|
||||
(name "tesseract-ocr")
|
||||
(version "5.2.0")
|
||||
(source
|
||||
(origin
|
||||
(method git-fetch)
|
||||
(uri (git-reference
|
||||
(url "https://github.com/tesseract-ocr/tesseract")
|
||||
(commit version)))
|
||||
(file-name (git-file-name name version))
|
||||
(sha256
|
||||
(base32
|
||||
"0dai539h07lqj8lyhznd3wbwdpqr78qrsczq78rsmsryqvmdbyaa"))))
|
||||
(build-system gnu-build-system)
|
||||
(arguments
|
||||
(list
|
||||
#:configure-flags
|
||||
#~(list (string-append "LIBLEPT_HEADERSDIR="
|
||||
#$(this-package-input "leptonica") "/include")
|
||||
"--disable-static") ;avoid 6 MiB static archive
|
||||
;; The unit tests are disabled because they require building bundled
|
||||
;; third party libraries.
|
||||
#:tests? #f
|
||||
#:phases
|
||||
#~(modify-phases %standard-phases
|
||||
(add-after 'unpack 'do-not-override-xml-catalog-files
|
||||
(lambda _
|
||||
(substitute* "configure.ac"
|
||||
(("AC_SUBST\\(\\[XML_CATALOG_FILES])")
|
||||
""))))
|
||||
(add-after 'build 'build-training
|
||||
(lambda* (#:key parallel-build? #:allow-other-keys)
|
||||
(define n (if parallel-build? (number->string
|
||||
(parallel-job-count))
|
||||
"1"))
|
||||
(invoke "make" "-j" n "training")))
|
||||
(add-after 'install 'install-training
|
||||
(lambda _
|
||||
(invoke "make" "training-install"))))))
|
||||
(native-inputs
|
||||
(list asciidoc
|
||||
autoconf
|
||||
automake
|
||||
curl
|
||||
docbook-xsl
|
||||
libarchive
|
||||
libtiff
|
||||
libtool
|
||||
libxml2 ;for XML_CATALOG_FILES
|
||||
libxslt
|
||||
pkg-config))
|
||||
(inputs
|
||||
(list cairo
|
||||
icu4c
|
||||
leptonica
|
||||
pango
|
||||
python-wrapper))
|
||||
(home-page "https://github.com/tesseract-ocr/tesseract")
|
||||
(synopsis "Optical character recognition engine")
|
||||
(description
|
||||
"Tesseract is an optical character recognition (OCR) engine with very
|
||||
high accuracy. It supports many languages, output text formatting, hOCR
|
||||
positional information and page layout analysis. Several image formats are
|
||||
supported through the Leptonica library. It can also detect whether text is
|
||||
monospaced or proportional.")
|
||||
(license license:asl2.0))))
|
||||
(license license:asl2.0)))
|
||||
|
||||
(define-public gimagereader
|
||||
(package
|
||||
|
|
Reference in New Issue