Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/CurtHagenlocher/arrow into …
Browse files Browse the repository at this point in the history
…dev/curth/SqlDecimal128
  • Loading branch information
CurtHagenlocher committed Oct 27, 2023
2 parents 4d8abd9 + 547b240 commit a7f46a7
Show file tree
Hide file tree
Showing 98 changed files with 2,994 additions and 491 deletions.
2 changes: 1 addition & 1 deletion .env
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ ULIMIT_CORE=-1
ALMALINUX=8
ALPINE_LINUX=3.16
DEBIAN=11
FEDORA=35
FEDORA=38
UBUNTU=20.04

# Default versions for various dependencies
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# under the License.

ARG arch
FROM ${arch}/fedora:35
FROM ${arch}/fedora:38
ARG arch

# install dependencies
Expand Down Expand Up @@ -46,9 +46,9 @@ RUN dnf update -y && \
java-latest-openjdk-devel \
java-latest-openjdk-headless \
json-devel \
liborc-devel \
libzstd-devel \
llvm-devel \
llvm-static \
lz4-devel \
make \
ninja-build \
Expand All @@ -64,6 +64,7 @@ RUN dnf update -y && \
utf8proc-devel \
wget \
which \
xsimd-devel \
zlib-devel

COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
Expand Down Expand Up @@ -100,8 +101,6 @@ ENV absl_SOURCE=BUNDLED \
CC=gcc \
CXX=g++ \
google_cloud_cpp_storage_SOURCE=BUNDLED \
ORC_SOURCE=BUNDLED \
PARQUET_BUILD_EXAMPLES=ON \
PARQUET_BUILD_EXECUTABLES=ON \
PATH=/usr/lib/ccache/:$PATH \
xsimd_SOURCE=BUNDLED
PATH=/usr/lib/ccache/:$PATH
6 changes: 3 additions & 3 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -770,10 +770,10 @@ if(ARROW_WITH_ZSTD)
endif()

if(ARROW_ORC)
list(APPEND ARROW_SHARED_LINK_LIBS orc::liborc ${ARROW_PROTOBUF_LIBPROTOBUF})
list(APPEND ARROW_STATIC_LINK_LIBS orc::liborc ${ARROW_PROTOBUF_LIBPROTOBUF})
list(APPEND ARROW_SHARED_LINK_LIBS orc::orc ${ARROW_PROTOBUF_LIBPROTOBUF})
list(APPEND ARROW_STATIC_LINK_LIBS orc::orc ${ARROW_PROTOBUF_LIBPROTOBUF})
if(ORC_SOURCE STREQUAL "SYSTEM")
list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS orc::liborc
list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS orc::orc
${ARROW_PROTOBUF_LIBPROTOBUF})
endif()
endif()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,20 @@
# specific language governing permissions and limitations
# under the License.

# - Find Apache ORC C++ (orc/orc-config.h, liborc.a)
# This module defines
# ORC_INCLUDE_DIR, directory containing headers
# ORC_STATIC_LIB, path to liborc.a
# ORC_FOUND, whether orc has been found
if(orcAlt_FOUND)
return()
endif()

if(ORC_FOUND)
set(find_package_args)
if(orcAlt_FIND_VERSION)
list(APPEND find_package_args ${orcAlt_FIND_VERSION})
endif()
if(orcAlt_FIND_QUIETLY)
list(APPEND find_package_args QUIET)
endif()
find_package(orc ${find_package_args})
if(orc_FOUND)
set(orcAlt_FOUND TRUE)
return()
endif()

Expand All @@ -45,15 +52,13 @@ else()
PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
endif()

if(ORC_STATIC_LIB AND ORC_INCLUDE_DIR)
set(ORC_FOUND TRUE)
add_library(orc::liborc STATIC IMPORTED)
set_target_properties(orc::liborc
PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}"
INTERFACE_INCLUDE_DIRECTORIES "${ORC_INCLUDE_DIR}")
else()
if(ORC_FIND_REQUIRED)
message(FATAL_ERROR "ORC library was required in toolchain and unable to locate")
find_package_handle_standard_args(orcAlt REQUIRED_VARS ORC_STATIC_LIB ORC_INCLUDE_DIR)

if(orcAlt_FOUND)
if(NOT TARGET orc::orc)
add_library(orc::orc STATIC IMPORTED)
set_target_properties(orc::orc
PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}"
INTERFACE_INCLUDE_DIRECTORIES "${ORC_INCLUDE_DIR}")
endif()
set(ORC_FOUND FALSE)
endif()
34 changes: 21 additions & 13 deletions cpp/cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ set(ARROW_THIRDPARTY_DEPENDENCIES
lz4
nlohmann_json
opentelemetry-cpp
ORC
orc
re2
Protobuf
RapidJSON
Expand Down Expand Up @@ -94,6 +94,14 @@ if("${re2_SOURCE}" STREQUAL "" AND NOT "${RE2_SOURCE}" STREQUAL "")
set(re2_SOURCE ${RE2_SOURCE})
endif()

# For backward compatibility. We use "ORC_SOURCE" if "orc_SOURCE"
# isn't specified and "ORC_SOURCE" is specified.
# We renamed "ORC" dependency name to "orc" in 15.0.0 because
# upstream uses "orc" not "ORC" as package name.
if("${orc_SOURCE}" STREQUAL "" AND NOT "${ORC_SOURCE}" STREQUAL "")
set(orc_SOURCE ${ORC_SOURCE})
endif()

# For backward compatibility. We use "RE2_ROOT" if "re2_ROOT"
# isn't specified and "RE2_ROOT" is specified.
if("${re2_ROOT}" STREQUAL "" AND NOT "${RE2_ROOT}" STREQUAL "")
Expand Down Expand Up @@ -193,7 +201,7 @@ macro(build_dependency DEPENDENCY_NAME)
build_nlohmann_json()
elseif("${DEPENDENCY_NAME}" STREQUAL "opentelemetry-cpp")
build_opentelemetry()
elseif("${DEPENDENCY_NAME}" STREQUAL "ORC")
elseif("${DEPENDENCY_NAME}" STREQUAL "orc")
build_orc()
elseif("${DEPENDENCY_NAME}" STREQUAL "Protobuf")
build_protobuf()
Expand Down Expand Up @@ -4423,31 +4431,31 @@ macro(build_orc)

set(ORC_VENDORED 1)

add_library(orc::liborc STATIC IMPORTED)
set_target_properties(orc::liborc PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}")
target_include_directories(orc::liborc BEFORE INTERFACE "${ORC_INCLUDE_DIR}")
set(ORC_LINK_LIBRARIES LZ4::lz4 ZLIB::ZLIB ${ARROW_ZSTD_LIBZSTD} ${Snappy_TARGET})
add_library(orc::orc STATIC IMPORTED)
set_target_properties(orc::orc PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}")
target_include_directories(orc::orc BEFORE INTERFACE "${ORC_INCLUDE_DIR}")
target_link_libraries(orc::orc INTERFACE LZ4::lz4 ZLIB::ZLIB ${ARROW_ZSTD_LIBZSTD}
${Snappy_TARGET})
# Protobuf generated files may use ABSL_DCHECK*() and
# absl::log_internal_check_op is needed for them.
if(TARGET absl::log_internal_check_op)
list(APPEND ORC_LINK_LIBRARIES absl::log_internal_check_op)
target_link_libraries(orc::orc INTERFACE absl::log_internal_check_op)
endif()
if(NOT MSVC)
if(NOT APPLE AND ARROW_ENABLE_THREADING)
list(APPEND ORC_LINK_LIBRARIES Threads::Threads)
target_link_libraries(orc::orc INTERFACE Threads::Threads)
endif()
list(APPEND ORC_LINK_LIBRARIES ${CMAKE_DL_LIBS})
target_link_libraries(orc::orc INTERFACE ${CMAKE_DL_LIBS})
endif()
target_link_libraries(orc::liborc INTERFACE ${ORC_LINK_LIBRARIES})

add_dependencies(toolchain orc_ep)
add_dependencies(orc::liborc orc_ep)
add_dependencies(orc::orc orc_ep)

list(APPEND ARROW_BUNDLED_STATIC_LIBS orc::liborc)
list(APPEND ARROW_BUNDLED_STATIC_LIBS orc::orc)
endmacro()

if(ARROW_ORC)
resolve_dependency(ORC)
resolve_dependency(orc HAVE_ALT TRUE)
message(STATUS "Found ORC static library: ${ORC_STATIC_LIB}")
message(STATUS "Found ORC headers: ${ORC_INCLUDE_DIR}")
endif()
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/adapters/orc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ else()
set(ARROW_LIBRARIES_FOR_STATIC_TESTS arrow_testing_shared arrow_shared)
endif()

set(ORC_STATIC_TEST_LINK_LIBS orc::liborc ${ARROW_LIBRARIES_FOR_STATIC_TESTS}
set(ORC_STATIC_TEST_LINK_LIBS orc::orc ${ARROW_LIBRARIES_FOR_STATIC_TESTS}
${ARROW_GTEST_GTEST_MAIN} ${ARROW_GTEST_GTEST})

add_arrow_test(adapter_test
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/arrow/array/array_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ struct ScalarFromArraySlotImpl {
return Finish(a.GetString(index_));
}

Status Visit(const BinaryViewArray& a) { return Finish(a.GetString(index_)); }

Status Visit(const FixedSizeBinaryArray& a) { return Finish(a.GetString(index_)); }

Status Visit(const DayTimeIntervalArray& a) { return Finish(a.Value(index_)); }
Expand Down
28 changes: 28 additions & 0 deletions cpp/src/arrow/array/array_binary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "arrow/array/validate.h"
#include "arrow/type.h"
#include "arrow/type_traits.h"
#include "arrow/util/binary_view_util.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/logging.h"

Expand Down Expand Up @@ -89,6 +90,33 @@ LargeStringArray::LargeStringArray(int64_t length,

Status LargeStringArray::ValidateUTF8() const { return internal::ValidateUTF8(*data_); }

BinaryViewArray::BinaryViewArray(std::shared_ptr<ArrayData> data) {
ARROW_CHECK_EQ(data->type->id(), Type::BINARY_VIEW);
SetData(std::move(data));
}

BinaryViewArray::BinaryViewArray(std::shared_ptr<DataType> type, int64_t length,
std::shared_ptr<Buffer> views, BufferVector buffers,
std::shared_ptr<Buffer> null_bitmap, int64_t null_count,
int64_t offset) {
buffers.insert(buffers.begin(), std::move(views));
buffers.insert(buffers.begin(), std::move(null_bitmap));
SetData(
ArrayData::Make(std::move(type), length, std::move(buffers), null_count, offset));
}

std::string_view BinaryViewArray::GetView(int64_t i) const {
const std::shared_ptr<Buffer>* data_buffers = data_->buffers.data() + 2;
return util::FromBinaryView(raw_values_[i], data_buffers);
}

StringViewArray::StringViewArray(std::shared_ptr<ArrayData> data) {
ARROW_CHECK_EQ(data->type->id(), Type::STRING_VIEW);
SetData(std::move(data));
}

Status StringViewArray::ValidateUTF8() const { return internal::ValidateUTF8(*data_); }

FixedSizeBinaryArray::FixedSizeBinaryArray(const std::shared_ptr<ArrayData>& data) {
SetData(data);
}
Expand Down
60 changes: 60 additions & 0 deletions cpp/src/arrow/array/array_binary.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#include <cstdint>
#include <memory>
#include <optional>
#include <string>
#include <string_view>
#include <vector>
Expand Down Expand Up @@ -217,6 +218,65 @@ class ARROW_EXPORT LargeStringArray : public LargeBinaryArray {
Status ValidateUTF8() const;
};

// ----------------------------------------------------------------------
// BinaryView and StringView

/// Concrete Array class for variable-size binary view data using the
/// BinaryViewType::c_type struct to reference in-line or out-of-line string values
class ARROW_EXPORT BinaryViewArray : public FlatArray {
public:
using TypeClass = BinaryViewType;
using IteratorType = stl::ArrayIterator<BinaryViewArray>;
using c_type = BinaryViewType::c_type;

explicit BinaryViewArray(std::shared_ptr<ArrayData> data);

BinaryViewArray(std::shared_ptr<DataType> type, int64_t length,
std::shared_ptr<Buffer> views, BufferVector data_buffers,
std::shared_ptr<Buffer> null_bitmap = NULLPTR,
int64_t null_count = kUnknownNullCount, int64_t offset = 0);

// For API compatibility with BinaryArray etc.
std::string_view GetView(int64_t i) const;
std::string GetString(int64_t i) const { return std::string{GetView(i)}; }

const auto& values() const { return data_->buffers[1]; }
const c_type* raw_values() const { return raw_values_; }

std::optional<std::string_view> operator[](int64_t i) const {
return *IteratorType(*this, i);
}

IteratorType begin() const { return IteratorType(*this); }
IteratorType end() const { return IteratorType(*this, length()); }

protected:
using FlatArray::FlatArray;

void SetData(std::shared_ptr<ArrayData> data) {
FlatArray::SetData(std::move(data));
raw_values_ = data_->GetValuesSafe<c_type>(1);
}

const c_type* raw_values_;
};

/// Concrete Array class for variable-size string view (utf-8) data using
/// BinaryViewType::c_type to reference in-line or out-of-line string values
class ARROW_EXPORT StringViewArray : public BinaryViewArray {
public:
using TypeClass = StringViewType;

explicit StringViewArray(std::shared_ptr<ArrayData> data);

using BinaryViewArray::BinaryViewArray;

/// \brief Validate that this array contains only valid UTF8 entries
///
/// This check is also implied by ValidateFull()
Status ValidateUTF8() const;
};

// ----------------------------------------------------------------------
// Fixed width binary

Expand Down
Loading

0 comments on commit a7f46a7

Please sign in to comment.