From 1565723088a33cd990e1e74750727523996deaec Mon Sep 17 00:00:00 2001 From: Rossi Sun Date: Fri, 27 Dec 2024 13:35:07 +0800 Subject: [PATCH] bytes_status_ -> bytes_status_in_block_ --- cpp/src/arrow/compute/key_map_internal.cc | 32 ++++++++++--------- cpp/src/arrow/compute/key_map_internal.h | 5 ++- .../arrow/compute/key_map_internal_avx2.cc | 3 +- 3 files changed, 21 insertions(+), 19 deletions(-) diff --git a/cpp/src/arrow/compute/key_map_internal.cc b/cpp/src/arrow/compute/key_map_internal.cc index 1f752be85d35c..2d96b3b34e617 100644 --- a/cpp/src/arrow/compute/key_map_internal.cc +++ b/cpp/src/arrow/compute/key_map_internal.cc @@ -101,7 +101,7 @@ void SwissTable::extract_group_ids_imp(const int num_keys, const uint16_t* selec if (log_blocks_ == 0) { for (int i = 0; i < num_keys; ++i) { uint32_t id = use_selection ? selection[i] : i; - uint32_t group_id = blocks()[bytes_status_ + local_slots[id]]; + uint32_t group_id = blocks()[bytes_status_in_block_ + local_slots[id]]; out_group_ids[id] = group_id; } } else { @@ -111,7 +111,7 @@ void SwissTable::extract_group_ids_imp(const int num_keys, const uint16_t* selec : num_groupid_bytes == 2 ? 0xFFFF : 0xFFFFFFFF; int64_t num_block_bytes = num_block_bytes_from_num_groupid_bits(num_groupid_bits); - const uint8_t* slots_base = blocks_->data() + bytes_status_; + const uint8_t* slots_base = blocks_->data() + bytes_status_in_block_; for (int i = 0; i < num_keys; ++i) { uint32_t id = use_selection ? selection[i] : i; @@ -494,7 +494,7 @@ Status SwissTable::map_new_keys_helper( // ARROW_DCHECK(*inout_num_selected <= static_cast(1 << log_minibatch_)); - size_t num_bytes_for_bits = (*inout_num_selected + 7) / 8 + bytes_status_; + size_t num_bytes_for_bits = (*inout_num_selected + 7) / 8 + bytes_status_in_block_; auto match_bitvector_buf = util::TempVectorHolder( temp_stack, static_cast(num_bytes_for_bits)); uint8_t* match_bitvector = match_bitvector_buf.mutable_data(); @@ -673,18 +673,19 @@ Status SwissTable::grow_double() { int ihalf = block_id_new & 1; uint8_t stamp_new = (hash >> bits_shift_for_block_and_stamp_after) & stamp_mask; uint64_t group_id_bit_offs = j * num_group_id_bits_before; - uint64_t group_id = (util::SafeLoadAs(block_base + bytes_status_ + - (group_id_bit_offs >> 3)) >> - (group_id_bit_offs & 7)) & - group_id_mask_before; + uint64_t group_id = + (util::SafeLoadAs(block_base + bytes_status_in_block_ + + (group_id_bit_offs >> 3)) >> + (group_id_bit_offs & 7)) & + group_id_mask_before; uint64_t slot_id_new = i * 16u + ihalf * 8u + full_slots_new[ihalf]; hashes_new[slot_id_new] = hash; uint8_t* block_base_new = double_block_base_new + ihalf * block_size_after; block_base_new[7 - full_slots_new[ihalf]] = stamp_new; int64_t group_id_bit_offs_new = full_slots_new[ihalf] * num_group_id_bits_after; - uint64_t* ptr = reinterpret_cast(block_base_new + bytes_status_ + - (group_id_bit_offs_new >> 3)); + uint64_t* ptr = reinterpret_cast( + block_base_new + bytes_status_in_block_ + (group_id_bit_offs_new >> 3)); util::SafeStore(ptr, util::SafeLoad(ptr) | (group_id << (group_id_bit_offs_new & 7))); full_slots_new[ihalf]++; @@ -709,10 +710,11 @@ Status SwissTable::grow_double() { } uint64_t group_id_bit_offs = j * num_group_id_bits_before; - uint64_t group_id = (util::SafeLoadAs(block_base + bytes_status_ + - (group_id_bit_offs >> 3)) >> - (group_id_bit_offs & 7)) & - group_id_mask_before; + uint64_t group_id = + (util::SafeLoadAs(block_base + bytes_status_in_block_ + + (group_id_bit_offs >> 3)) >> + (group_id_bit_offs & 7)) & + group_id_mask_before; uint8_t stamp_new = (hash >> bits_shift_for_block_and_stamp_after) & stamp_mask; uint8_t* block_base_new = @@ -731,8 +733,8 @@ Status SwissTable::grow_double() { hashes_new[block_id_new * 8u + full_slots_new] = hash; block_base_new[7 - full_slots_new] = stamp_new; int64_t group_id_bit_offs_new = full_slots_new * num_group_id_bits_after; - uint64_t* ptr = reinterpret_cast(block_base_new + bytes_status_ + - (group_id_bit_offs_new >> 3)); + uint64_t* ptr = reinterpret_cast( + block_base_new + bytes_status_in_block_ + (group_id_bit_offs_new >> 3)); util::SafeStore(ptr, util::SafeLoad(ptr) | (group_id << (group_id_bit_offs_new & 7))); } diff --git a/cpp/src/arrow/compute/key_map_internal.h b/cpp/src/arrow/compute/key_map_internal.h index e86bc7f71e660..f6086fd083a9d 100644 --- a/cpp/src/arrow/compute/key_map_internal.h +++ b/cpp/src/arrow/compute/key_map_internal.h @@ -103,7 +103,7 @@ class ARROW_EXPORT SwissTable { } static int64_t num_block_bytes_from_num_groupid_bits(int64_t num_groupid_bits) { - return num_groupid_bits + bytes_status_; + return num_groupid_bits + bytes_status_in_block_; } // Use 32-bit hash for now @@ -205,8 +205,7 @@ class ARROW_EXPORT SwissTable { // Resize large hash tables when 75% full. Status grow_double(); - // TODO: Rename to bytes_status_in_block_. - static constexpr int bytes_status_ = 8; + static constexpr int bytes_status_in_block_ = 8; // Number of hash bits stored in slots in a block. // The highest bits of hash determine block id. diff --git a/cpp/src/arrow/compute/key_map_internal_avx2.cc b/cpp/src/arrow/compute/key_map_internal_avx2.cc index 7ed46dbb05731..78df03f5e636d 100644 --- a/cpp/src/arrow/compute/key_map_internal_avx2.cc +++ b/cpp/src/arrow/compute/key_map_internal_avx2.cc @@ -394,7 +394,8 @@ int SwissTable::extract_group_ids_avx2(const int num_keys, const uint32_t* hashe : num_groupid_bytes == 2 ? 0xFFFF : 0xFFFFFFFF; int64_t num_block_bytes = num_block_bytes_from_num_groupid_bits(num_groupid_bits); - const int* slots_base = reinterpret_cast(blocks_->data() + bytes_status_); + const int* slots_base = + reinterpret_cast(blocks_->data() + bytes_status_in_block_); for (int i = 0; i < num_keys / unroll; ++i) { __m256i hash = _mm256_loadu_si256(reinterpret_cast(hashes) + i);