Skip to content

Commit

Permalink
arm neon qdmull: Fix SQDMULL implementation for 32-bit inputs. (#1255)
Browse files Browse the repository at this point in the history
The non-vectorized SQDMULL implementation was wrong for 32-bit
inputs.  It incorrectly checked one of the operands to see if the
value would overflow before doubling it, not the result of the
initial multiplication.  It now matches the 16-bit operand version,
and also matches hardware.  A test has been added for the scalar
form of the function, testing a range of values that will saturate
when multiplied and doubled.  This set of test vectors was produced
on an ARMv9 machine (Google Cloud box), failed tests on x86 with the
existing code, and passes with the modified code.
  • Loading branch information
Syonyk authored Jan 3, 2025
1 parent ed042d5 commit 948b236
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 1 deletion.
2 changes: 1 addition & 1 deletion simde/arm/neon/qdmull.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ simde_vqdmulls_s32(int32_t a, int32_t b) {
return vqdmulls_s32(a, b);
#else
int64_t mul = (HEDLEY_STATIC_CAST(int64_t, a) * HEDLEY_STATIC_CAST(int64_t, b));
return ((a > 0 ? a : -a) & (HEDLEY_STATIC_CAST(int64_t, 1) << 62)) ? ((mul < 0) ? INT64_MIN : INT64_MAX) : mul << 1;
return (simde_math_llabs(mul) & (HEDLEY_STATIC_CAST(int64_t, 1) << 62)) ? ((mul < 0) ? INT64_MIN : INT64_MAX) : mul << 1;
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
Expand Down
85 changes: 85 additions & 0 deletions test/arm/neon/qdmull.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,94 @@ test_simde_vqdmull_s32 (SIMDE_MUNIT_TEST_ARGS) {
}


static int
test_simde_vqdmulls_s32 (SIMDE_MUNIT_TEST_ARGS) {
#if 1
static const struct {
int32_t a;
int32_t b;
int64_t r;
} test_vec[] = {
{ INT32_MAX,
INT32_MAX,
INT64_C( 9223372028264841218) },
{ INT32_MAX,
INT32_C( 0),
INT64_C( 0) },
{ INT32_MAX,
INT32_MIN,
-INT64_C( 9223372032559808512) },
{ INT32_MAX,
-INT32_C( 2147483647),
-INT64_C( 9223372028264841218) },
{ INT32_C( 0),
INT32_MAX,
INT64_C( 0) },
{ INT32_C( 0),
INT32_C( 0),
INT64_C( 0) },
{ INT32_C( 0),
INT32_MIN,
INT64_C( 0) },
{ INT32_C( 0),
-INT32_C( 2147483647),
INT64_C( 0) },
{ INT32_MIN,
INT32_MAX,
-INT64_C( 9223372032559808512) },
{ INT32_MIN,
INT32_C( 0),
INT64_C( 0) },
{ INT32_MIN,
INT32_MIN,
INT64_MAX },
{ INT32_MIN,
-INT32_C( 2147483647),
INT64_C( 9223372032559808512) },
{ -INT32_C( 2147483647),
INT32_MAX,
-INT64_C( 9223372028264841218) },
{ -INT32_C( 2147483647),
INT32_C( 0),
INT64_C( 0) },
{ -INT32_C( 2147483647),
INT32_MIN,
INT64_C( 9223372032559808512) },
{ -INT32_C( 2147483647),
-INT32_C( 2147483647),
INT64_C( 9223372028264841218) },
};

for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
int64_t r = simde_vqdmulls_s32(test_vec[i].a, test_vec[i].b);

simde_assert_equal_i64(r, test_vec[i].r);
}

return 0;
#else
int32_t vals[4] = {0x7fffffff, 0, 0x80000000, 0x80000001};

fputc('\n', stdout);
for (int i = 0; i < 4; i++)
{
for (int j = 0; j < 4; j++)
{
int64_t r = simde_vqdmulls_s32(vals[i], vals[j]);

simde_test_codegen_write_i32(2, vals[i], SIMDE_TEST_VEC_POS_FIRST);
simde_test_codegen_write_i32(2, vals[j], SIMDE_TEST_VEC_POS_MIDDLE);
simde_test_codegen_write_i64(2, r, SIMDE_TEST_VEC_POS_LAST);
}
}
return 1;
#endif
}

SIMDE_TEST_FUNC_LIST_BEGIN
SIMDE_TEST_FUNC_LIST_ENTRY(vqdmull_s16)
SIMDE_TEST_FUNC_LIST_ENTRY(vqdmull_s32)
SIMDE_TEST_FUNC_LIST_ENTRY(vqdmulls_s32)
SIMDE_TEST_FUNC_LIST_END

#include "test-neon-footer.h"

0 comments on commit 948b236

Please sign in to comment.