Skip to content

Commit

Permalink
Generic, simple implementation fox xsimd::compress
Browse files Browse the repository at this point in the history
Related to #975
  • Loading branch information
serge-sans-paille committed Nov 27, 2023
1 parent 0f47da8 commit 4222a13
Show file tree
Hide file tree
Showing 4 changed files with 144 additions and 0 deletions.
2 changes: 2 additions & 0 deletions docs/source/api/data_transfer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ In place:
+---------------------------------------+----------------------------------------------------+
| :cpp:func:`insert` | modify a single batch slot |
+---------------------------------------+----------------------------------------------------+
| :cpp:func:`compress` | pack elements according to a mask |
+---------------------------------------+----------------------------------------------------+

Between batches:

Expand Down
33 changes: 33 additions & 0 deletions include/xsimd/arch/generic/xsimd_generic_memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,39 @@ namespace xsimd

using namespace types;

// compress
namespace detail
{
template <class IT, class A, class I, size_t... Is>
std::pair<batch<IT, A>, batch_bool<IT, A>> create_compress_swizzle_mask(I bitmask, ::xsimd::detail::index_sequence<Is...>)
{
batch<IT, A> swizzle_mask(IT(0));
alignas(A::alignment()) IT mask_buffer[batch<IT, A>::size];
size_t inserted = 0, i = 0;
for (size_t i = 0; i < sizeof...(Is); ++i)
if ((bitmask >> i) & 1u)
mask_buffer[inserted++] = i;
for (size_t i = inserted; i < sizeof...(Is); ++i)
mask_buffer[i] = inserted;
batch<IT, A> mask = batch<IT, A>::load_aligned(&mask_buffer[0]);
batch_bool<IT, A> tail = mask < (IT)inserted;
return std::make_pair(mask, tail);
}
}

template <typename A, typename T>
inline batch<T, A>
compress(batch<T, A> const& x, batch_bool<T, A> const& mask,
kernel::requires_arch<generic>) noexcept
{
using IT = as_unsigned_integer_t<T>;
constexpr std::size_t size = batch_bool<T, A>::size;
auto bitmask = mask.mask();
auto compress_masks = detail::create_compress_swizzle_mask<IT, A>(bitmask, ::xsimd::detail::make_index_sequence<size>());
auto z = swizzle(x, compress_masks.first);
return bitwise_cast<T>(select(compress_masks.second, bitwise_cast<IT>(z), batch<IT, A>(IT(0))));
}

// extract_pair
template <class A, class T>
inline batch<T, A> extract_pair(batch<T, A> const& self, batch<T, A> const& other, std::size_t i, requires_arch<generic>) noexcept
Expand Down
13 changes: 13 additions & 0 deletions include/xsimd/types/xsimd_api.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,19 @@ namespace xsimd
return kernel::clip(x, lo, hi, A {});
}

/**
* @ingroup batch_data_transfer
*
* Pick elements from \c x selected by \c mask, and append them to the
* resulting vector, zeroing the remaining slots
*/
template <class T, class A>
inline batch<T, A> compress(batch<T, A> const& x, batch_bool<T, A> const& mask) noexcept
{
detail::static_check_supported_config<T, A>();
return kernel::compress<A>(x, mask, A {});
}

/**
* @ingroup batch_complex
*
Expand Down
96 changes: 96 additions & 0 deletions test/test_shuffle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,102 @@ TEST_CASE_TEMPLATE("[slide]", B, BATCH_INT_TYPES)

#endif

template <class B>
struct compress_test
{
using batch_type = B;
using value_type = typename B::value_type;
using mask_batch_type = typename B::batch_bool_type;

static constexpr size_t size = B::size;
std::array<value_type, size> input;
std::array<bool, size> mask;
std::array<value_type, size> expected;

compress_test()
{
for (size_t i = 0; i < size; ++i)
{
input[i] = i;
}
}

void full()
{
std::fill(mask.begin(), mask.end(), true);

for (size_t i = 0; i < size; ++i)
expected[i] = input[i];

auto b = xsimd::compress(
batch_type::load_unaligned(input.data()),
mask_batch_type::load_unaligned(mask.data()));
CHECK_BATCH_EQ(b, expected);
}

void empty()
{
std::fill(mask.begin(), mask.end(), false);

for (size_t i = 0; i < size; ++i)
expected[i] = 0;

auto b = xsimd::compress(
batch_type::load_unaligned(input.data()),
mask_batch_type::load_unaligned(mask.data()));
CHECK_BATCH_EQ(b, expected);
}

void interleave()
{
for (size_t i = 0; i < size; ++i)
mask[i] = i % 2 == 0;

for (size_t i = 0, j = 0; i < size; ++i)
expected[i] = i < size / 2 ? input[2 * i] : 0;

auto b = xsimd::compress(
batch_type::load_unaligned(input.data()),
mask_batch_type::load_unaligned(mask.data()));
CHECK_BATCH_EQ(b, expected);
}

void generic()
{
for (size_t i = 0; i < size; ++i)
mask[i] = i % 3 == 0;

for (size_t i = 0, j = 0; i < size; ++i)
expected[i] = i < size / 3 ? input[3 * i] : 0;

auto b = xsimd::compress(
batch_type::load_unaligned(input.data()),
mask_batch_type::load_unaligned(mask.data()));
CHECK_BATCH_EQ(b, expected);
}
};

TEST_CASE_TEMPLATE("[compress]", B, BATCH_FLOAT_TYPES, xsimd::batch<uint32_t>, xsimd::batch<int32_t>, xsimd::batch<uint64_t>, xsimd::batch<int64_t>)
{
compress_test<B> Test;
SUBCASE("empty")
{
Test.empty();
}
SUBCASE("full")
{
Test.full();
}
// SUBCASE("interleave")
//{
// Test.interleave();
// }
// SUBCASE("generic")
//{
// Test.generic();
// }
}

template <class B>
struct shuffle_test
{
Expand Down

0 comments on commit 4222a13

Please sign in to comment.