gnu: tesseract-ocr: Make the default install minimally useful.
Fixes <https://issues.guix.gnu.org/47536>. * gnu/packages/ocr.scm (tesseract-ocr) [phases]{adjust-TESSDATA_PREFIX-macro}: New phase. {install-minimal-tessdata}: New phase. [native-inputs]: Add tesseract-ocr-tessdata-fast. [search-paths]: New field. [description]: Mention how to add support for more languages.
This commit is contained in:
parent
a6b6b0e89e
commit
ff0600c5ef
1 changed files with 30 additions and 3 deletions
|
@ -132,6 +132,15 @@ models for the Tesseract OCR Engine.")
|
||||||
(substitute* "configure.ac"
|
(substitute* "configure.ac"
|
||||||
(("AC_SUBST\\(\\[XML_CATALOG_FILES])")
|
(("AC_SUBST\\(\\[XML_CATALOG_FILES])")
|
||||||
""))))
|
""))))
|
||||||
|
(add-after 'unpack 'adjust-TESSDATA_PREFIX-macro
|
||||||
|
(lambda _
|
||||||
|
;; Use a deeper TESSDATA_PREFIX hierarchy so that a more
|
||||||
|
;; specific search-path than '/share' can be specified. The
|
||||||
|
;; build system uses CPPFLAGS for itself, so we can't simply set
|
||||||
|
;; a make flag.
|
||||||
|
(substitute* "Makefile.am"
|
||||||
|
(("-DTESSDATA_PREFIX='\"@datadir@\"'")
|
||||||
|
"-DTESSDATA_PREFIX='\"@datadir@/tesseract-ocr\"'"))))
|
||||||
(add-after 'build 'build-training
|
(add-after 'build 'build-training
|
||||||
(lambda* (#:key parallel-build? #:allow-other-keys)
|
(lambda* (#:key parallel-build? #:allow-other-keys)
|
||||||
(define n (if parallel-build? (number->string
|
(define n (if parallel-build? (number->string
|
||||||
|
@ -140,7 +149,18 @@ models for the Tesseract OCR Engine.")
|
||||||
(invoke "make" "-j" n "training")))
|
(invoke "make" "-j" n "training")))
|
||||||
(add-after 'install 'install-training
|
(add-after 'install 'install-training
|
||||||
(lambda _
|
(lambda _
|
||||||
(invoke "make" "training-install"))))))
|
(invoke "make" "training-install")))
|
||||||
|
(add-after 'install 'install-minimal-tessdata
|
||||||
|
;; tesseract-ocr cannot be used without its trained models data;
|
||||||
|
;; install the English language as a minimal base which can be
|
||||||
|
;; extended via TESSDATA_PREFIX.
|
||||||
|
(lambda* (#:key native-inputs inputs #:allow-other-keys)
|
||||||
|
(define eng.traineddata
|
||||||
|
"/share/tesseract-ocr/tessdata/eng.traineddata")
|
||||||
|
(install-file (search-input-file (or native-inputs inputs)
|
||||||
|
eng.traineddata)
|
||||||
|
(dirname (string-append #$output
|
||||||
|
eng.traineddata))))))))
|
||||||
(native-inputs
|
(native-inputs
|
||||||
(list asciidoc
|
(list asciidoc
|
||||||
autoconf
|
autoconf
|
||||||
|
@ -152,13 +172,18 @@ models for the Tesseract OCR Engine.")
|
||||||
libtool
|
libtool
|
||||||
libxml2 ;for XML_CATALOG_FILES
|
libxml2 ;for XML_CATALOG_FILES
|
||||||
libxslt
|
libxslt
|
||||||
pkg-config))
|
pkg-config
|
||||||
|
tesseract-ocr-tessdata-fast))
|
||||||
(inputs
|
(inputs
|
||||||
(list cairo
|
(list cairo
|
||||||
icu4c
|
icu4c
|
||||||
leptonica
|
leptonica
|
||||||
pango
|
pango
|
||||||
python-wrapper))
|
python-wrapper))
|
||||||
|
(native-search-paths (list (search-path-specification
|
||||||
|
(variable "TESSDATA_PREFIX")
|
||||||
|
(files (list "share/tesseract-ocr/tessdata"))
|
||||||
|
(separator #f)))) ;single value
|
||||||
(home-page "https://github.com/tesseract-ocr/tesseract")
|
(home-page "https://github.com/tesseract-ocr/tesseract")
|
||||||
(synopsis "Optical character recognition engine")
|
(synopsis "Optical character recognition engine")
|
||||||
(description
|
(description
|
||||||
|
@ -166,7 +191,9 @@ models for the Tesseract OCR Engine.")
|
||||||
high accuracy. It supports many languages, output text formatting, hOCR
|
high accuracy. It supports many languages, output text formatting, hOCR
|
||||||
positional information and page layout analysis. Several image formats are
|
positional information and page layout analysis. Several image formats are
|
||||||
supported through the Leptonica library. It can also detect whether text is
|
supported through the Leptonica library. It can also detect whether text is
|
||||||
monospaced or proportional.")
|
monospaced or proportional. Support for the English language is included by
|
||||||
|
default. To add support for more languages, the
|
||||||
|
@code{tesseract-ocr-tessdata-fast} package should be installed.")
|
||||||
(license license:asl2.0)))
|
(license license:asl2.0)))
|
||||||
|
|
||||||
(define-public gimagereader
|
(define-public gimagereader
|
||||||
|
|
Reference in a new issue