me
/
guix
Archived
1
0
Fork 0

gnu: tesseract-ocr: Update to 5.2.0.

* gnu/packages/ocr.scm (tesseract-ocr): Update to 5.2.0.
[inputs, native-inputs]: Move after arguments.  Use new style inputs.
[arguments]: Use gexps.
[configure-flags]: Add --disable-static.
[phases]{fix-docbook}: Replace phase with...
{do-not-override-xml-catalog-files}: ... this new phase.
{build-training}: Move after build phase.  Enable parallel build.
{trailing-install}: Move after install phase.
[native-inputs]: Add libxml2.
Maxim Cournoyer 2022-08-11 17:53:21 -04:00
parent 887dbf4d80
commit f7c027617d
No known key found for this signature in database
GPG Key ID: 1260E46482E63562
1 changed files with 63 additions and 61 deletions

View File

@ -5,6 +5,7 @@
;;; Copyright © 2019 Alex Vong <alexvong1995@gmail.com>
;;; Copyright © 2021 Andy Tai <atai@atai.org>
;;; Copyright © 2021, 2022 Nicolas Goaziou <mail@nicolasgoaziou.fr>
;;; Copyright © 2022 Maxim Cournoyer <maxim.cournoyer@gmail.com>
;;;
;;; This file is part of GNU Guix.
;;;
@ -74,71 +75,72 @@ it produces text in 8-bit or UTF-8 formats.")
(license license:gpl3+)))
(define-public tesseract-ocr
;; There are useful commits beyond the last official stable release.
(let ((commit "97079fa353557af6df86fd20b5d2e0dff5d8d5df")
(revision "1"))
(package
(name "tesseract-ocr")
(version (git-version "4.1.1" revision commit))
(source
(origin
(method git-fetch)
(uri (git-reference
(url "https://github.com/tesseract-ocr/tesseract")
(commit commit)))
(file-name (git-file-name name version))
(sha256
(base32
"11137a4aaay7qp64vdjd83hz1l089nzi5a0ql0qgk8gn79pyhi98"))))
(build-system gnu-build-system)
(inputs
`(("cairo" ,cairo)
("icu" ,icu4c)
("leptonica" ,leptonica)
("pango" ,pango)
("python-wrapper" ,python-wrapper)))
(native-inputs
`(("asciidoc" ,asciidoc)
("autoconf" ,autoconf)
("automake" ,automake)
("docbook-xsl" ,docbook-xsl)
("libarchive" ,libarchive)
("libcurl" ,curl)
("libtool" ,libtool)
("libtiff" ,libtiff)
("pkg-config" ,pkg-config)
("xsltproc" ,libxslt)))
(arguments
`(#:configure-flags
(let ((leptonica (assoc-ref %build-inputs "leptonica")))
(list (string-append "LIBLEPT_HEADERSDIR=" leptonica "/include")))
#:tests? #f ; Tests currently result in a segfault
#:phases
(modify-phases %standard-phases
(add-after 'unpack 'fix-docbook
(lambda* (#:key inputs #:allow-other-keys)
;; Don't attempt to download XSL schema.
(substitute* "doc/Makefile.am"
(("http://docbook.sourceforge.net/release/xsl/current/manpages/docbook.xsl")
(string-append (assoc-ref inputs "docbook-xsl")
"/xml/xsl/docbook-xsl-"
,(package-version docbook-xsl)
"/manpages/docbook.xsl")))))
(add-after 'install 'build-training
(lambda _
(invoke "make" "training")))
(add-after 'build-training 'install-training
(lambda _
(invoke "make" "training-install"))))))
(home-page "https://github.com/tesseract-ocr/tesseract")
(synopsis "Optical character recognition engine")
(description
"Tesseract is an optical character recognition (OCR) engine with very
(package
(name "tesseract-ocr")
(version "5.2.0")
(source
(origin
(method git-fetch)
(uri (git-reference
(url "https://github.com/tesseract-ocr/tesseract")
(commit version)))
(file-name (git-file-name name version))
(sha256
(base32
"0dai539h07lqj8lyhznd3wbwdpqr78qrsczq78rsmsryqvmdbyaa"))))
(build-system gnu-build-system)
(arguments
(list
#:configure-flags
#~(list (string-append "LIBLEPT_HEADERSDIR="
#$(this-package-input "leptonica") "/include")
"--disable-static") ;avoid 6 MiB static archive
;; The unit tests are disabled because they require building bundled
;; third party libraries.
#:tests? #f
#:phases
#~(modify-phases %standard-phases
(add-after 'unpack 'do-not-override-xml-catalog-files
(lambda _
(substitute* "configure.ac"
(("AC_SUBST\\(\\[XML_CATALOG_FILES])")
""))))
(add-after 'build 'build-training
(lambda* (#:key parallel-build? #:allow-other-keys)
(define n (if parallel-build? (number->string
(parallel-job-count))
"1"))
(invoke "make" "-j" n "training")))
(add-after 'install 'install-training
(lambda _
(invoke "make" "training-install"))))))
(native-inputs
(list asciidoc
autoconf
automake
curl
docbook-xsl
libarchive
libtiff
libtool
libxml2 ;for XML_CATALOG_FILES
libxslt
pkg-config))
(inputs
(list cairo
icu4c
leptonica
pango
python-wrapper))
(home-page "https://github.com/tesseract-ocr/tesseract")
(synopsis "Optical character recognition engine")
(description
"Tesseract is an optical character recognition (OCR) engine with very
high accuracy. It supports many languages, output text formatting, hOCR
positional information and page layout analysis. Several image formats are
supported through the Leptonica library. It can also detect whether text is
monospaced or proportional.")
(license license:asl2.0))))
(license license:asl2.0)))
(define-public gimagereader
(package