summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRicardo Wurmus <rekado@elephly.net>2022-06-16 15:13:29 +0200
committerRicardo Wurmus <rekado@elephly.net>2022-06-16 15:20:59 +0200
commitc8727617e51c335090daa48a63c61801c41a83ae (patch)
treeaaa2e0bab121ae587874a92648f70aeecf767201
parent59ec9e3f3eb60a53cde5b6b90094927e11e62d8a (diff)
gnu: Add apache-arrow-0.16.
* gnu/packages/databases.scm (apache-arrow-0.16): New variable.
-rw-r--r--gnu/packages/databases.scm126
1 files changed, 126 insertions, 0 deletions
diff --git a/gnu/packages/databases.scm b/gnu/packages/databases.scm
index 9a142da7ee..03ea55c6e1 100644
--- a/gnu/packages/databases.scm
+++ b/gnu/packages/databases.scm
@@ -4258,6 +4258,132 @@ language-bindings for structure manipulation. It also provides IPC and common
algorithm implementations.")
(license license:asl2.0)))
+(define-public apache-arrow-0.16
+ (package
+ (name "apache-arrow")
+ (version "0.16.0")
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/apache/arrow")
+ (commit (string-append "apache-arrow-" version))))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "15bplqy5708bxy1mynzjkd3d2g8v2wd36z8l0ap8yyyq54l3gdvy"))))
+ (build-system cmake-build-system)
+ (arguments
+ `(#:tests? #f
+ #:phases
+ (modify-phases %standard-phases
+ (add-before 'configure 'enter-source-directory
+ (lambda _
+ (chdir "cpp")
+ (substitute* "src/parquet/CMakeLists.txt"
+ ((" parquet_constants.cpp") "")
+ (("set\\(THRIFT_OUTPUT_FILES \\$\\{THRIFT_OUTPUT_FILES\\}.*") "")
+ ((".*\"\\$\\{THRIFT_OUTPUT_DIR\\}/parquet_constants.cpp\"\\).*") ""))))
+ (add-after 'unpack 'set-env
+ (lambda _
+ (setenv "BOOST_ROOT" (assoc-ref %build-inputs "boost"))
+ (setenv "BROTLI_HOME" (assoc-ref %build-inputs "brotli"))
+ (setenv "FLATBUFFERS_HOME" (assoc-ref %build-inputs "flatbuffers"))
+ (setenv "RAPIDJSON_HOME" (assoc-ref %build-inputs "rapidjson")))))
+ #:build-type "Release"
+ #:configure-flags
+ (list "-DARROW_PYTHON=ON"
+ "-DARROW_GLOG=ON"
+ "-DARROW_SSE42=OFF"
+ "-DARROW_BOOST_USE_SHARED=ON"
+ ;; Parquet options
+ "-DARROW_PARQUET=ON"
+
+ ;; The maintainers disallow using system versions of
+ ;; jemalloc:
+ ;; https://issues.apache.org/jira/browse/ARROW-3507. This
+ ;; is unfortunate because jemalloc increases performance:
+ ;; https://arrow.apache.org/blog/2018/07/20/jemalloc/.
+ "-DARROW_JEMALLOC=OFF"
+
+ ;; The CMake option ARROW_DEPENDENCY_SOURCE is a global
+ ;; option that instructs the build system how to resolve
+ ;; each dependency. SYSTEM = Finding the dependency in
+ ;; system paths using CMake's built-in find_package
+ ;; function, or using pkg-config for packages that do not
+ ;; have this feature
+ "-DARROW_DEPENDENCY_SOURCE=SYSTEM"
+
+ ;; Split output into its component packages.
+ (string-append "-DCMAKE_INSTALL_PREFIX="
+ (assoc-ref %outputs "out"))
+ (string-append "-DCMAKE_INSTALL_RPATH="
+ (assoc-ref %outputs "out")
+ "/lib")
+ (string-append "-DCMAKE_INSTALL_BINDIR="
+ (assoc-ref %outputs "out")
+ "/bin")
+ (string-append "-DCMAKE_INSTALL_INCLUDEDIR="
+ (assoc-ref %outputs "include")
+ "/share/include")
+
+
+ "-DARROW_WITH_SNAPPY=ON"
+ "-DARROW_WITH_ZLIB=ON"
+ "-DARROW_WITH_ZSTD=ON"
+ "-DARROW_WITH_LZ4=ON"
+ "-DARROW_COMPUTE=ON"
+ "-DARROW_CSV=ON"
+ "-DARROW_DATASET=ON"
+ "-DARROW_FILESYSTEM=ON"
+ "-DARROW_HDFS=ON"
+ "-DARROW_JSON=ON"
+ ;; Arrow Python C++ integration library (required for
+ ;; building pyarrow). This library must be built against
+ ;; the same Python version for which you are building
+ ;; pyarrow. NumPy must also be installed. Enabling this
+ ;; option also enables ARROW_COMPUTE, ARROW_CSV,
+ ;; ARROW_DATASET, ARROW_FILESYSTEM, ARROW_HDFS, and
+ ;; ARROW_JSON.
+ "-DARROW_PYTHON=ON"
+
+ ;; Building the tests forces on all the
+ ;; optional features and the use of static
+ ;; libraries.
+ "-DARROW_BUILD_TESTS=OFF"
+ "-DBENCHMARK_ENABLE_GTEST_TESTS=OFF"
+ ;;"-DBENCHMARK_ENABLE_TESTING=OFF"
+ "-DARROW_BUILD_STATIC=OFF")))
+ (inputs
+ `(("boost" ,boost)
+ ("brotli" ,google-brotli)
+ ("double-conversion" ,double-conversion)
+ ("snappy" ,snappy)
+ ("gflags" ,gflags)
+ ("glog" ,glog)
+ ("apache-thrift" ,apache-thrift "lib")
+ ("protobuf" ,protobuf)
+ ("rapidjson" ,rapidjson)
+ ("zlib" ,zlib)
+ ("bzip2" ,bzip2)
+ ("lz4" ,lz4)
+ ("zstd" ,zstd "lib")
+ ("re2" ,re2)
+ ("grpc" ,grpc)
+ ("python-3" ,python)
+ ("python-numpy" ,python-numpy)))
+ (native-inputs
+ (list pkg-config apache-thrift))
+ (outputs '("out" "include"))
+ (home-page "https://arrow.apache.org/")
+ (synopsis "Columnar in-memory analytics")
+ (description "Apache Arrow is a columnar in-memory analytics layer
+designed to accelerate big data. It houses a set of canonical in-memory
+representations of flat and hierarchical data along with multiple
+language-bindings for structure manipulation. It also provides IPC and common
+algorithm implementations.")
+ (license license:asl2.0)))
+
(define-public python-pyarrow
(package
(inherit apache-arrow)