Skip to content

Commit

Permalink
Merge remote-tracking branch 'apache/main' into reproduce-payload-ove…
Browse files Browse the repository at this point in the history
…rflow
  • Loading branch information
zanmato1984 committed Jan 25, 2025
2 parents d4c2af3 + 2c90daf commit 65c4003
Show file tree
Hide file tree
Showing 29 changed files with 772 additions and 342 deletions.
85 changes: 85 additions & 0 deletions .github/workflows/csharp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,88 @@ jobs:
- name: Test
shell: bash
run: ci/scripts/csharp_test.sh $(pwd)

package:
name: Package
# Branch or RC tag
if: github.ref_type != 'tag' || contains(github.ref_name, 'rc')
runs-on: ubuntu-latest
timeout-minutes: 15
permissions:
contents: write
steps:
- name: Checkout for utilities
if: github.ref_type == 'tag'
uses: actions/checkout@v4
with:
path: arrow
- name: Download source archive
if: github.ref_type == 'tag'
run: |
arrow/dev/release/utils-watch-gh-workflow.sh \
${GITHUB_REF_NAME} \
release_candidate.yml
gh release download ${GITHUB_REF_NAME} \
--pattern "*.tar.gz" \
--repo ${GITHUB_REPOSITORY}
tar -xf *.tar.gz --strip-components=1
mv csharp/dummy.git .git
env:
GH_TOKEN: ${{ github.token }}
- name: Checkout
if: github.ref_type != 'tag'
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Prepare version
if: github.ref_type != 'tag'
run: |
# apache-arrow-20.0.0.dev-9-g758867f907 ->
# 20.0.0.dev-9-g758867f907 ->
# 20.0.0.dev-9 ->
# 20.0.0-dev-9
semver="$(git describe --tags | \
sed -E \
-e 's/^apache-arrow-//' \
-e 's/-[^-]*$//' \
-e 's/^([0-9]*\.[0-9]*\.[0-9])\./\1-/')"
sed -i'' -E -e \
"s/^ <Version>.+<\/Version>/ <Version>${semver}<\/Version>/" \
csharp/Directory.Build.props
- name: Setup Python
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
python-version: 3
- name: Setup Archery
run: |
python3 -m pip install -e 'dev/archery[docker]'
- name: Build
run: |
archery docker run ubuntu-csharp
- name: Prepare artifacts
run: |
shopt -s globstar
cp csharp/artifacts/**/*.{,s}nupkg ./
for artifact in *.{,s}nupkg; do
dev/release/utils-generate-checksum.sh "${artifact}"
done
- name: Upload
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
with:
name: nuget
path: |
*.nupkg
*.sha256
*.sha512
*.snupkg
- name: Publish
if: github.ref_type == 'tag'
run: |
gh release upload ${GITHUB_REF_NAME} \
--repo ${GITHUB_REPOSITORY} \
*.nupkg \
*.sha256 \
*.sha512 \
*.snupkg
env:
GH_TOKEN: ${{ github.token }}
22 changes: 12 additions & 10 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,13 @@ env:

jobs:
publish:
name: Publish
name: Publish
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
- name: Get Tag Name of Latest Release Candidate
run: |
rc_tag=$(gh release list --repo apache/arrow | \
rc_tag=$(gh release list --repo ${GITHUB_REPOSITORY} | \
cut -f3 | \
grep -F "${GITHUB_REF_NAME}-rc" | \
head -n1)
Expand All @@ -52,23 +52,25 @@ jobs:
echo "VERSION_WITH_RC=${version_with_rc}" >> ${GITHUB_ENV}
echo "VERSION=${version}" >> ${GITHUB_ENV}
echo "RC_NUM=${rc_num}" >> ${GITHUB_ENV}
- name: Download Release Candidate Artifacts
- name: Download Release Candidate Artifacts
run: |
mkdir release_candidate_artifacts
gh release download ${RELEASE_CANDIDATE_TAG_NAME} --repo apache/arrow --dir release_candidate_artifacts
gh release download ${RELEASE_CANDIDATE_TAG_NAME} \
--dir release_candidate_artifacts \
--repo ${GITHUB_REPOSITORY}
- name: Create Release Title
run: |
title="Apache Arrow ${VERSION}"
echo "RELEASE_TITLE=${title}" >> ${GITHUB_ENV}
# Set the release notes to "TODO" temporarily. After the release notes page
# (https://arrow.apache.org/release/{VERSION}.html) is published, use
# gh release edit to update the release notes to refer to the newly
# (https://arrow.apache.org/release/{VERSION}.html) is published, use
# gh release edit to update the release notes to refer to the newly
# pushed web page. See dev/post/post-05-update-gh-release-notes.sh
- name: Create GitHub Release
run: |
gh release create ${GITHUB_REF_NAME} \
--repo apache/arrow \
--verify-tag \
--title "${RELEASE_TITLE}" \
--notes "TODO" \
release_candidate_artifacts/*
--repo ${GITHUB_REPOSITORY} \
--title "${RELEASE_TITLE}" \
--verify-tag \
release_candidate_artifacts/*
9 changes: 5 additions & 4 deletions .github/workflows/release_candidate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ env:

jobs:
publish:
name: Publish
name: Publish
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
steps:
- name: Checkout Arrow
uses: actions/checkout@v4
with:
Expand All @@ -58,13 +58,14 @@ jobs:
echo "RELEASE_CANDIDATE_NOTES=${release_notes}" >> ${GITHUB_ENV}
- name: Create Release tarball
run: |
cd dev/release/ && ./utils-create-release-tarball.sh ${VERSION} ${RC_NUM}
dev/release/utils-create-release-tarball.sh ${VERSION} ${RC_NUM}
echo "RELEASE_TARBALL=apache-arrow-${VERSION}.tar.gz" >> ${GITHUB_ENV}
dev/release/utils-generate-checksum.sh "apache-arrow-${VERSION}.tar.gz"
- name: Create GitHub Release
run: |
gh release create ${GITHUB_REF_NAME} \
--verify-tag \
--prerelease \
--title "${RELEASE_CANDIDATE_TITLE}" \
--notes "Release Notes: ${RELEASE_CANDIDATE_NOTES}" \
dev/release/${RELEASE_TARBALL}
${RELEASE_TARBALL}*
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -183,4 +183,5 @@ repos:
?^ci/scripts/c_glib_build\.sh$|
?^ci/scripts/c_glib_test\.sh$|
?^c_glib/test/run-test\.sh$|
?^dev/release/utils-generate-checksum\.sh$|
)
32 changes: 32 additions & 0 deletions c_glib/arrow-glib/basic-array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1017,6 +1017,38 @@ garrow_array_concatenate(GArrowArray *array, GList *other_arrays, GError **error
}
}

/**
* garrow_array_validate:
* @array: A #GArrowArray.
* @error: (nullable): Return location for a #GError or %NULL.
*
* Returns: %TRUE on success, %FALSE on error.
*
* Since: 20.0.0
*/
gboolean
garrow_array_validate(GArrowArray *array, GError **error)
{
const auto arrow_array = garrow_array_get_raw(array);
return garrow::check(error, arrow_array->Validate(), "[array][validate]");
}

/**
* garrow_array_validate_full:
* @array: A #GArrowArray.
* @error: (nullable): Return location for a #GError or %NULL.
*
* Returns: %TRUE on success, %FALSE on error.
*
* Since: 20.0.0
*/
gboolean
garrow_array_validate_full(GArrowArray *array, GError **error)
{
const auto arrow_array = garrow_array_get_raw(array);
return garrow::check(error, arrow_array->ValidateFull(), "[array][validate_full]");
}

G_DEFINE_TYPE(GArrowNullArray, garrow_null_array, GARROW_TYPE_ARRAY)

static void
Expand Down
8 changes: 8 additions & 0 deletions c_glib/arrow-glib/basic-array.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,14 @@ GARROW_AVAILABLE_IN_4_0
GArrowArray *
garrow_array_concatenate(GArrowArray *array, GList *other_arrays, GError **error);

GARROW_AVAILABLE_IN_20_0
gboolean
garrow_array_validate(GArrowArray *array, GError **error);

GARROW_AVAILABLE_IN_20_0
gboolean
garrow_array_validate_full(GArrowArray *array, GError **error);

#define GARROW_TYPE_NULL_ARRAY (garrow_null_array_get_type())
GARROW_AVAILABLE_IN_ALL
G_DECLARE_DERIVABLE_TYPE(
Expand Down
43 changes: 43 additions & 0 deletions c_glib/test/test-array.rb
Original file line number Diff line number Diff line change
Expand Up @@ -185,4 +185,47 @@ def test_mixed_type
end
end
end

sub_test_case("#validate") do
def test_valid
array = build_int32_array([1, 2, 3, 4, 5])
assert do
array.validate
end
end

def test_invalid
message = "[array][validate]: Invalid: Array length is negative"
array = Arrow::Int8Array.new(-1, Arrow::Buffer.new(""), Arrow::Buffer.new(""), -1)
assert_raise(Arrow::Error::Invalid.new(message)) do
array.validate
end
end
end

sub_test_case("#validate_full") do
def test_valid
array = build_int32_array([1, 2, 3, 4, 5])
assert do
array.validate_full
end
end

def test_invalid
message = "[array][validate_full]: Invalid: Invalid UTF8 sequence at string index 0"

# U+3042 HIRAGANA LETTER A, U+3044 HIRAGANA LETTER I
data = "\u3042\u3044".b[0..-2]
value_offsets = Arrow::Buffer.new([0, data.size].pack("l*"))
array = Arrow::StringArray.new(1,
value_offsets,
Arrow::Buffer.new(data),
Arrow::Buffer.new([0b01].pack("C*")),
-1)

assert_raise(Arrow::Error::Invalid.new(message)) do
array.validate_full
end
end
end
end
6 changes: 4 additions & 2 deletions cpp/cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -988,9 +988,11 @@ endif()

# Enable s/ccache if set by parent.
if(CMAKE_C_COMPILER_LAUNCHER AND CMAKE_CXX_COMPILER_LAUNCHER)
file(TO_CMAKE_PATH "${CMAKE_C_COMPILER_LAUNCHER}" EP_CMAKE_C_COMPILER_LAUNCHER)
file(TO_CMAKE_PATH "${CMAKE_CXX_COMPILER_LAUNCHER}" EP_CMAKE_CXX_COMPILER_LAUNCHER)
list(APPEND EP_COMMON_CMAKE_ARGS
-DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER}
-DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER})
-DCMAKE_C_COMPILER_LAUNCHER=${EP_CMAKE_C_COMPILER_LAUNCHER}
-DCMAKE_CXX_COMPILER_LAUNCHER=${EP_CMAKE_CXX_COMPILER_LAUNCHER})
endif()

if(NOT ARROW_VERBOSE_THIRDPARTY_BUILD)
Expand Down
11 changes: 11 additions & 0 deletions cpp/src/arrow/compute/api_vector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ using compute::DictionaryEncodeOptions;
using compute::FilterOptions;
using compute::NullPlacement;
using compute::RankOptions;
using compute::RankQuantileOptions;

template <>
struct EnumTraits<FilterOptions::NullSelectionBehavior>
Expand Down Expand Up @@ -151,6 +152,9 @@ static auto kRankOptionsType = GetFunctionOptionsType<RankOptions>(
DataMember("sort_keys", &RankOptions::sort_keys),
DataMember("null_placement", &RankOptions::null_placement),
DataMember("tiebreaker", &RankOptions::tiebreaker));
static auto kRankQuantileOptionsType = GetFunctionOptionsType<RankQuantileOptions>(
DataMember("sort_keys", &RankQuantileOptions::sort_keys),
DataMember("null_placement", &RankQuantileOptions::null_placement));
static auto kPairwiseOptionsType = GetFunctionOptionsType<PairwiseOptions>(
DataMember("periods", &PairwiseOptions::periods));
static auto kListFlattenOptionsType = GetFunctionOptionsType<ListFlattenOptions>(
Expand Down Expand Up @@ -228,6 +232,13 @@ RankOptions::RankOptions(std::vector<SortKey> sort_keys, NullPlacement null_plac
tiebreaker(tiebreaker) {}
constexpr char RankOptions::kTypeName[];

RankQuantileOptions::RankQuantileOptions(std::vector<SortKey> sort_keys,
NullPlacement null_placement)
: FunctionOptions(internal::kRankQuantileOptionsType),
sort_keys(std::move(sort_keys)),
null_placement(null_placement) {}
constexpr char RankQuantileOptions::kTypeName[];

PairwiseOptions::PairwiseOptions(int64_t periods)
: FunctionOptions(internal::kPairwiseOptionsType), periods(periods) {}
constexpr char PairwiseOptions::kTypeName[];
Expand Down
19 changes: 19 additions & 0 deletions cpp/src/arrow/compute/api_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,25 @@ class ARROW_EXPORT RankOptions : public FunctionOptions {
Tiebreaker tiebreaker;
};

/// \brief Quantile rank options
class ARROW_EXPORT RankQuantileOptions : public FunctionOptions {
public:
explicit RankQuantileOptions(std::vector<SortKey> sort_keys = {},
NullPlacement null_placement = NullPlacement::AtEnd);
/// Convenience constructor for array inputs
explicit RankQuantileOptions(SortOrder order,
NullPlacement null_placement = NullPlacement::AtEnd)
: RankQuantileOptions({SortKey("", order)}, null_placement) {}

static constexpr char const kTypeName[] = "RankQuantileOptions";
static RankQuantileOptions Defaults() { return RankQuantileOptions(); }

/// Column key(s) to order by and how to order by these sort keys.
std::vector<SortKey> sort_keys;
/// Whether nulls and NaNs are placed at the start or at the end
NullPlacement null_placement;
};

/// \brief Partitioning options for NthToIndices
class ARROW_EXPORT PartitionNthOptions : public FunctionOptions {
public:
Expand Down
Loading

0 comments on commit 65c4003

Please sign in to comment.