Skip to content

Commit

Permalink
arm neon qshl: Fix UQSHL to match hardware. Add extensive test vector…
Browse files Browse the repository at this point in the history
…s. (#1256)

UQSHL was wrong in a variety of ways.  Most importantly, it did not
trim off the low byte for the shift as per the instruction spec:
"... shifts the element by a value from the least significant byte
of the corresponding element of the second source register..."
This was not detected in the tests, as the test vectors were limited
to small shift values that did not demonstrate the fault.

Behavior is fixed to be correct, per hardware.  Test vectors are
dramatically expanded, using both the previous small-range values as
well as additional full-range values.  Vectors generated on an ARMv9
system (Google Compute Engine C4A).
  • Loading branch information
Syonyk authored Jan 3, 2025
1 parent 948b236 commit e5d5064
Show file tree
Hide file tree
Showing 2 changed files with 3,524 additions and 644 deletions.
104 changes: 56 additions & 48 deletions simde/arm/neon/qshl.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,22 +187,24 @@ simde_vqshlb_u8(uint8_t a, int8_t b) {
#endif
#else
uint8_t r;
int8_t b8 = HEDLEY_STATIC_CAST(int8_t, b);

if (b < -7)
b = -7;

if (b <= 0) {
r = a >> -b;
} else if (b < 7) {
r = HEDLEY_STATIC_CAST(uint8_t, a << b);
if ((r >> b) != a) {
r = UINT8_MAX;
}
} else if (a == 0) {
if ((b8 <= -8) || (a == 0))
{
r = 0;
} else {
}
else if (b8 >= 8)
{
r = UINT8_MAX;
}
else if (b8 <= 0) {
r = a >> -b8;
} else {
r = HEDLEY_STATIC_CAST(uint8_t, a << b8);
if ((r >> b8) != a) {
r = UINT8_MAX;
}
}

return r;
#endif
Expand All @@ -228,22 +230,24 @@ simde_vqshlh_u16(uint16_t a, int16_t b) {
#endif
#else
uint16_t r;
int8_t b8 = HEDLEY_STATIC_CAST(int8_t, b);

if (b < -15)
b = -15;

if (b <= 0) {
r = a >> -b;
} else if (b < 15) {
r = HEDLEY_STATIC_CAST(uint16_t, a << b);
if ((r >> b) != a) {
r = UINT16_MAX;
}
} else if (a == 0) {
if ((b8 <= -16) || (a == 0))
{
r = 0;
} else {
}
else if (b8 >= 16)
{
r = UINT16_MAX;
}
else if (b8 <= 0) {
r = a >> -b8;
} else {
r = HEDLEY_STATIC_CAST(uint16_t, a << b8);
if ((r >> b8) != a) {
r = UINT16_MAX;
}
}

return r;
#endif
Expand All @@ -269,22 +273,24 @@ simde_vqshls_u32(uint32_t a, int32_t b) {
#endif
#else
uint32_t r;
int8_t b8 = HEDLEY_STATIC_CAST(int8_t, b);

if (b < -31)
b = -31;

if (b <= 0) {
r = HEDLEY_STATIC_CAST(uint32_t, a >> -b);
} else if (b < 31) {
r = a << b;
if ((r >> b) != a) {
r = UINT32_MAX;
}
} else if (a == 0) {
if ((b8 <= -32) || (a == 0))
{
r = 0;
} else {
}
else if (b8 >= 32)
{
r = UINT32_MAX;
}
else if (b8 <= 0) {
r = a >> -b8;
} else {
r = HEDLEY_STATIC_CAST(uint32_t, a << b8);
if ((r >> b8) != a) {
r = UINT32_MAX;
}
}

return r;
#endif
Expand All @@ -310,22 +316,24 @@ simde_vqshld_u64(uint64_t a, int64_t b) {
#endif
#else
uint64_t r;
int8_t b8 = HEDLEY_STATIC_CAST(int8_t, b);

if (b < -63)
b = -63;

if (b <= 0) {
r = a >> -b;
} else if (b < 63) {
r = HEDLEY_STATIC_CAST(uint64_t, a << b);
if ((r >> b) != a) {
r = UINT64_MAX;
}
} else if (a == 0) {
if ((b8 <= -64) || (a == 0))
{
r = 0;
} else {
}
else if (b8 >= 64)
{
r = UINT64_MAX;
}
else if (b8 <= 0) {
r = a >> -b8;
} else {
r = HEDLEY_STATIC_CAST(uint64_t, a << b8);
if ((r >> b8) != a) {
r = UINT64_MAX;
}
}

return r;
#endif
Expand Down
Loading

0 comments on commit e5d5064

Please sign in to comment.