Skip to content

Commit

Permalink
arm neon sli_n: Fix invalid shift warnings (#1253)
Browse files Browse the repository at this point in the history
Per the ARMv8 manual, the valid range of shifts for the vector SLI
operations is "0 to the element width in bits minus 1."  The existing
SIMDe implementation creates an invalid shift in the case of 0, as
the shifts are (element width - n) - so, for a 0-bit shift on a
64-bit value, the shift is 64.  This is undefined per the C spec, and
leads to compiler warnings on build.

This fix changes the sli_n shift operations to work properly for the
valid range of values, shifting ((element width - 1) - n), with a
modified constant value to generate the same results (7f... instead
of ff...).

While the existing tests all pass with the change, a number of the
tests have been modified (and have new constant values generated) to
properly exercise and demonstrate the "n == 0" shift case.  These
test vectors were generated on an ARMv9 system (Google Compute Engine
C4A system), and pass on x86 hardware as well.
  • Loading branch information
Syonyk authored Jan 2, 2025
1 parent e958b0a commit 8067442
Show file tree
Hide file tree
Showing 2 changed files with 560 additions and 456 deletions.
16 changes: 8 additions & 8 deletions simde/arm/neon/sli_n.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ SIMDE_BEGIN_DECLS_
#define simde_vslid_n_u64(a, b, n) vslid_n_u64(a, b, n)
#else
#define simde_vslid_n_u64(a, b, n) \
(((a & (UINT64_C(0xffffffffffffffff) >> (64 - n))) | simde_vshld_n_u64((b), (n))))
(((a & (UINT64_C(0x7fffffffffffffff) >> (63 - n))) | simde_vshld_n_u64((b), (n))))
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vslid_n_u64
Expand Down Expand Up @@ -103,7 +103,7 @@ SIMDE_BEGIN_DECLS_
#else
#define simde_vsli_n_u16(a, b, n) \
simde_vorr_u16( \
simde_vand_u16((a), simde_vdup_n_u16((UINT16_C(0xffff) >> (16 - n)))), \
simde_vand_u16((a), simde_vdup_n_u16((UINT16_C(0x7fff) >> (15 - n)))), \
simde_vshl_n_u16((b), (n)))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
Expand All @@ -129,7 +129,7 @@ SIMDE_BEGIN_DECLS_
#define simde_vsli_n_u32(a, b, n) \
simde_vorr_u32( \
simde_vand_u32((a), \
simde_vdup_n_u32((UINT32_C(0xffffffff) >> (32 - n)))), \
simde_vdup_n_u32((UINT32_C(0x7fffffff) >> (31 - n)))), \
simde_vshl_n_u32((b), (n)))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
Expand All @@ -155,7 +155,7 @@ SIMDE_BEGIN_DECLS_
#define simde_vsli_n_u64(a, b, n) \
simde_vorr_u64( \
simde_vand_u64((a), simde_vdup_n_u64( \
(UINT64_C(0xffffffffffffffff) >> (64 - n)))), \
(UINT64_C(0x7fffffffffffffff) >> (63 - n)))), \
simde_vshl_n_u64((b), (n)))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
Expand All @@ -180,7 +180,7 @@ SIMDE_BEGIN_DECLS_
#else
#define simde_vsliq_n_u8(a, b, n) \
simde_vorrq_u8( \
simde_vandq_u8((a), simde_vdupq_n_u8((UINT8_C(0xff) >> (8 - n)))), \
simde_vandq_u8((a), simde_vdupq_n_u8((UINT8_C(0x7f) >> (7 - n)))), \
simde_vshlq_n_u8((b), (n)))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
Expand All @@ -205,7 +205,7 @@ SIMDE_BEGIN_DECLS_
#else
#define simde_vsliq_n_u16(a, b, n) \
simde_vorrq_u16( \
simde_vandq_u16((a), simde_vdupq_n_u16((UINT16_C(0xffff) >> (16 - n)))), \
simde_vandq_u16((a), simde_vdupq_n_u16((UINT16_C(0x7fff) >> (15 - n)))), \
simde_vshlq_n_u16((b), (n)))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
Expand All @@ -231,7 +231,7 @@ SIMDE_BEGIN_DECLS_
#define simde_vsliq_n_u32(a, b, n) \
simde_vorrq_u32( \
simde_vandq_u32((a), \
simde_vdupq_n_u32((UINT32_C(0xffffffff) >> (32 - n)))), \
simde_vdupq_n_u32((UINT32_C(0x7fffffff) >> (31 - n)))), \
simde_vshlq_n_u32((b), (n)))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
Expand All @@ -257,7 +257,7 @@ SIMDE_BEGIN_DECLS_
#define simde_vsliq_n_u64(a, b, n) \
simde_vorrq_u64( \
simde_vandq_u64((a), simde_vdupq_n_u64( \
(UINT64_C(0xffffffffffffffff) >> (64 - n)))), \
(UINT64_C(0x7fffffffffffffff) >> (63 - n)))), \
simde_vshlq_n_u64((b), (n)))
#endif
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
Expand Down
Loading

0 comments on commit 8067442

Please sign in to comment.