gnu: Add apache-arrow-for-ceph.
* gnu/packages/databases.scm (apache-arrow-for-ceph): New variable.
parent
0a45d4bad4
commit
ad141242f8
|
@ -4310,6 +4310,138 @@ language-bindings for structure manipulation. It also provides IPC and common
|
|||
algorithm implementations.")
|
||||
(license license:asl2.0)))
|
||||
|
||||
(define-public apache-arrow-for-ceph
|
||||
(package
|
||||
(name "apache-arrow")
|
||||
(version "6.0.1")
|
||||
(source
|
||||
(origin
|
||||
(method git-fetch)
|
||||
(uri (git-reference
|
||||
(url "https://github.com/apache/arrow")
|
||||
(commit (string-append "apache-arrow-" version))))
|
||||
(file-name (git-file-name name version))
|
||||
(sha256
|
||||
(base32
|
||||
"0mcw361akqw4sxnnpnr9c9v1zk4hphk6gcq763pcb19yzljh88ig"))))
|
||||
(build-system cmake-build-system)
|
||||
(arguments
|
||||
`(#:tests? #f
|
||||
#:phases
|
||||
(modify-phases %standard-phases
|
||||
(add-before 'configure 'enter-source-directory
|
||||
(lambda _ (chdir "cpp")))
|
||||
(add-after 'unpack 'set-env
|
||||
(lambda* (#:key inputs #:allow-other-keys)
|
||||
(substitute* "cpp/src/parquet/parquet.pc.in"
|
||||
(("includedir=\\$\\{prefix\\}/")
|
||||
"includedir="))
|
||||
(substitute* "cpp/cmake_modules/ThirdpartyToolchain.cmake"
|
||||
(("set\\(xsimd_SOURCE.*") ""))
|
||||
(setenv "BOOST_ROOT" (assoc-ref inputs "boost"))
|
||||
(setenv "BROTLI_HOME" (assoc-ref inputs "brotli"))
|
||||
(setenv "FLATBUFFERS_HOME" (assoc-ref inputs "flatbuffers"))
|
||||
(setenv "RAPIDJSON_HOME" (assoc-ref inputs "rapidjson")))))
|
||||
#:build-type "Release"
|
||||
#:configure-flags
|
||||
(list "-DARROW_PYTHON=ON"
|
||||
"-DARROW_GLOG=ON"
|
||||
;; Parquet options
|
||||
"-DARROW_PARQUET=ON"
|
||||
"-DPARQUET_BUILD_EXECUTABLES=ON"
|
||||
;; The maintainers disallow using system versions of
|
||||
;; jemalloc:
|
||||
;; https://issues.apache.org/jira/browse/ARROW-3507. This
|
||||
;; is unfortunate because jemalloc increases performance:
|
||||
;; https://arrow.apache.org/blog/2018/07/20/jemalloc/.
|
||||
"-DARROW_JEMALLOC=OFF"
|
||||
|
||||
;; The CMake option ARROW_DEPENDENCY_SOURCE is a global
|
||||
;; option that instructs the build system how to resolve
|
||||
;; each dependency. SYSTEM = Finding the dependency in
|
||||
;; system paths using CMake's built-in find_package
|
||||
;; function, or using pkg-config for packages that do not
|
||||
;; have this feature
|
||||
"-DARROW_DEPENDENCY_SOURCE=SYSTEM"
|
||||
"-Dxsimd_SOURCE=SYSTEM"
|
||||
|
||||
"-DARROW_RUNTIME_SIMD_LEVEL=NONE"
|
||||
"-DARROW_SIMD_LEVEL=NONE"
|
||||
"-DARROW_PACKAGE_KIND=Guix"
|
||||
|
||||
;; Split output into its component packages.
|
||||
(string-append "-DCMAKE_INSTALL_PREFIX="
|
||||
(assoc-ref %outputs "lib"))
|
||||
(string-append "-DCMAKE_INSTALL_RPATH="
|
||||
(assoc-ref %outputs "lib")
|
||||
"/lib")
|
||||
(string-append "-DCMAKE_INSTALL_BINDIR="
|
||||
(assoc-ref %outputs "out")
|
||||
"/bin")
|
||||
(string-append "-DCMAKE_INSTALL_INCLUDEDIR="
|
||||
(assoc-ref %outputs "include")
|
||||
"/share/include")
|
||||
|
||||
"-DARROW_WITH_SNAPPY=ON"
|
||||
"-DARROW_WITH_ZLIB=ON"
|
||||
"-DARROW_WITH_ZSTD=ON"
|
||||
"-DARROW_WITH_LZ4=ON"
|
||||
"-DARROW_COMPUTE=ON"
|
||||
"-DARROW_CSV=ON"
|
||||
"-DARROW_DATASET=ON"
|
||||
"-DARROW_FILESYSTEM=ON"
|
||||
"-DARROW_HDFS=ON"
|
||||
"-DARROW_JSON=ON"
|
||||
;; Arrow Python C++ integration library (required for
|
||||
;; building pyarrow). This library must be built against
|
||||
;; the same Python version for which you are building
|
||||
;; pyarrow. NumPy must also be installed. Enabling this
|
||||
;; option also enables ARROW_COMPUTE, ARROW_CSV,
|
||||
;; ARROW_DATASET, ARROW_FILESYSTEM, ARROW_HDFS, and
|
||||
;; ARROW_JSON.
|
||||
"-DARROW_PYTHON=ON"
|
||||
|
||||
;; Building the tests forces on all the
|
||||
;; optional features and the use of static
|
||||
;; libraries.
|
||||
"-DARROW_BUILD_TESTS=OFF"
|
||||
"-DBENCHMARK_ENABLE_GTEST_TESTS=OFF"
|
||||
;;"-DBENCHMARK_ENABLE_TESTING=OFF"
|
||||
"-DARROW_BUILD_STATIC=OFF")))
|
||||
(inputs
|
||||
(list boost
|
||||
brotli
|
||||
bzip2
|
||||
double-conversion
|
||||
gflags
|
||||
glog
|
||||
grpc
|
||||
protobuf
|
||||
python
|
||||
python-numpy
|
||||
rapidjson
|
||||
re2
|
||||
snappy
|
||||
xsimd))
|
||||
;; These are all listed under Requires.private in arrow.pc
|
||||
(propagated-inputs
|
||||
(list (list apache-thrift "lib")
|
||||
lz4
|
||||
utf8proc
|
||||
zlib
|
||||
(list zstd "lib")))
|
||||
(native-inputs
|
||||
(list pkg-config))
|
||||
(outputs '("out" "lib" "include"))
|
||||
(home-page "https://arrow.apache.org/")
|
||||
(synopsis "Columnar in-memory analytics")
|
||||
(description "Apache Arrow is a columnar in-memory analytics layer
|
||||
designed to accelerate big data. It houses a set of canonical in-memory
|
||||
representations of flat and hierarchical data along with multiple
|
||||
language-bindings for structure manipulation. It also provides IPC and common
|
||||
algorithm implementations.")
|
||||
(license license:asl2.0)))
|
||||
|
||||
(define-public apache-arrow-0.16
|
||||
(package
|
||||
(name "apache-arrow")
|
||||
|
|
Reference in New Issue