Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change generation of 2^n values for fixed point conversions. #1261

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 37 additions & 33 deletions simde/arm/neon/cvt_n.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@
*
* Copyright:
* 2023 Yi-Yen Chung <[email protected]> (Copyright owned by Andes Technology)
*
* Note: pow(2, n) does not generate proper (exact) results with rounding
* modes other than round-to-nearest.
* See https://github.com/simd-everywhere/simde/issues/1260
*/

#if !defined(SIMDE_ARM_NEON_CVT_N_H)
Expand All @@ -40,7 +44,7 @@ simde_vcvth_n_u16_f16(simde_float16_t a, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) {
return simde_vcvth_u16_f16(
simde_float16_from_float32(
simde_float16_to_float32(a) * HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n))));
simde_float16_to_float32(a) * HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n))));
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
#define simde_vcvth_n_u16_f16(a, n) vcvth_n_u16_f16(a, n)
Expand All @@ -56,7 +60,7 @@ simde_vcvth_n_f16_s16(int16_t a, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) {
return simde_float16_from_float32(
HEDLEY_STATIC_CAST(simde_float32_t,
HEDLEY_STATIC_CAST(simde_float64_t, a) / simde_math_pow(2, n)));
HEDLEY_STATIC_CAST(simde_float64_t, a) / (UINT64_C(1) << n)));
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
#define simde_vcvth_n_f16_s16(a, n) vcvth_n_f16_s16(a, n)
Expand All @@ -72,7 +76,7 @@ simde_vcvth_n_f16_u16(uint16_t a, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) {
return simde_float16_from_float32(
HEDLEY_STATIC_CAST(simde_float32_t,
HEDLEY_STATIC_CAST(simde_float64_t, a) / simde_math_pow(2, n)));
HEDLEY_STATIC_CAST(simde_float64_t, a) / (UINT64_C(1) << n)));
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
#define simde_vcvth_n_f16_u16(a, n) vcvth_n_f16_u16(a, n)
Expand All @@ -86,7 +90,7 @@ SIMDE_FUNCTION_ATTRIBUTES
int32_t
simde_vcvts_n_s32_f32(simde_float32_t a, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) {
return simde_vcvts_s32_f32(a * HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n)));
return simde_vcvts_s32_f32(a * HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n)));
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vcvts_n_s32_f32(a, n) vcvts_n_s32_f32(a, n)
Expand All @@ -100,7 +104,7 @@ SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_vcvts_n_u32_f32(simde_float32_t a, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) {
return simde_vcvts_u32_f32(a * HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n)));
return simde_vcvts_u32_f32(a * HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n)));
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vcvts_n_u32_f32(a, n) vcvts_n_u32_f32(a, n)
Expand All @@ -115,7 +119,7 @@ simde_float32_t
simde_vcvts_n_f32_s32(int32_t a, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) {
return HEDLEY_STATIC_CAST(simde_float32_t,
HEDLEY_STATIC_CAST(simde_float64_t, a) / simde_math_pow(2, n));
HEDLEY_STATIC_CAST(simde_float64_t, a) / (UINT64_C(1) << n));
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vcvts_n_f32_s32(a, n) vcvts_n_f32_s32(a, n)
Expand All @@ -130,7 +134,7 @@ simde_float32_t
simde_vcvts_n_f32_u32(uint32_t a, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) {
return HEDLEY_STATIC_CAST(simde_float32_t,
HEDLEY_STATIC_CAST(simde_float64_t, a) / simde_math_pow(2, n));
HEDLEY_STATIC_CAST(simde_float64_t, a) / (UINT64_C(1) << n));
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vcvts_n_f32_u32(a, n) vcvts_n_f32_u32(a, n)
Expand All @@ -144,7 +148,7 @@ SIMDE_FUNCTION_ATTRIBUTES
int64_t
simde_vcvtd_n_s64_f64(simde_float64_t a, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) {
return simde_vcvtd_s64_f64(a * simde_math_pow(2, n));
return simde_vcvtd_s64_f64(a * ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vcvtd_n_s64_f64(a, n) vcvtd_n_s64_f64(a, n)
Expand All @@ -158,7 +162,7 @@ SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vcvtd_n_u64_f64(simde_float64_t a, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) {
return simde_vcvtd_u64_f64(a * simde_math_pow(2, n));
return simde_vcvtd_u64_f64(a * ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vcvtd_n_u64_f64(a, n) vcvtd_n_u64_f64(a, n)
Expand All @@ -172,7 +176,7 @@ SIMDE_FUNCTION_ATTRIBUTES
simde_float64_t
simde_vcvtd_n_f64_s64(int64_t a, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) {
return HEDLEY_STATIC_CAST(simde_float64_t, a) / simde_math_pow(2, n);
return HEDLEY_STATIC_CAST(simde_float64_t, a) / ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n);
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vcvtd_n_f64_s64(a, n) vcvtd_n_f64_s64(a, n)
Expand All @@ -186,7 +190,7 @@ SIMDE_FUNCTION_ATTRIBUTES
simde_float64_t
simde_vcvtd_n_f64_u64(uint64_t a, const int n)
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) {
return HEDLEY_STATIC_CAST(simde_float64_t, a) / simde_math_pow(2, n);
return HEDLEY_STATIC_CAST(simde_float64_t, a) / ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n);
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vcvtd_n_f64_u64(a, n) vcvtd_n_f64_u64(a, n)
Expand All @@ -205,7 +209,7 @@ simde_vcvt_n_s32_f32(simde_float32x2_t a, const int n)

SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcvts_s32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n)));
r_.values[i] = simde_vcvts_s32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n)));
}

return simde_int32x2_from_private(r_);
Expand All @@ -227,7 +231,7 @@ simde_vcvt_n_s64_f64(simde_float64x1_t a, const int n)

SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcvtd_s64_f64(a_.values[i] * simde_math_pow(2, n));
r_.values[i] = simde_vcvtd_s64_f64(a_.values[i] * ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
}

return simde_int64x1_from_private(r_);
Expand All @@ -251,7 +255,7 @@ simde_vcvt_n_u16_f16(simde_float16x4_t a, const int n)
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcvth_u16_f16(simde_float16_from_float32(
simde_float16_to_float32(a_.values[i]) *
HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n))));
HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n))));
}

return simde_uint16x4_from_private(r_);
Expand All @@ -273,7 +277,7 @@ simde_vcvt_n_u32_f32(simde_float32x2_t a, const int n)

SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcvts_u32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n)));
r_.values[i] = simde_vcvts_u32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n)));
}

return simde_uint32x2_from_private(r_);
Expand All @@ -295,7 +299,7 @@ simde_vcvt_n_u64_f64(simde_float64x1_t a, const int n)

SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcvtd_u64_f64(a_.values[i] * simde_math_pow(2, n));
r_.values[i] = simde_vcvtd_u64_f64(a_.values[i] * ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
}

return simde_uint64x1_from_private(r_);
Expand All @@ -317,7 +321,7 @@ simde_vcvtq_n_s32_f32(simde_float32x4_t a, const int n)

SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcvts_s32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n)));
r_.values[i] = simde_vcvts_s32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n)));
}

return simde_int32x4_from_private(r_);
Expand All @@ -339,7 +343,7 @@ simde_vcvtq_n_s64_f64(simde_float64x2_t a, const int n)

SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcvtd_s64_f64(a_.values[i] * simde_math_pow(2, n));
r_.values[i] = simde_vcvtd_s64_f64(a_.values[i] * ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
}

return simde_int64x2_from_private(r_);
Expand All @@ -363,7 +367,7 @@ simde_vcvtq_n_u16_f16(simde_float16x8_t a, const int n)
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcvth_u16_f16(simde_float16_from_float32(
simde_float16_to_float32(a_.values[i]) *
HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n))));
HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n))));
}

return simde_uint16x8_from_private(r_);
Expand All @@ -385,7 +389,7 @@ simde_vcvtq_n_u32_f32(simde_float32x4_t a, const int n)

SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcvts_u32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n)));
r_.values[i] = simde_vcvts_u32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n)));
}

return simde_uint32x4_from_private(r_);
Expand All @@ -407,7 +411,7 @@ simde_vcvtq_n_u64_f64(simde_float64x2_t a, const int n)

SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_vcvtd_u64_f64(a_.values[i] * simde_math_pow(2, n));
r_.values[i] = simde_vcvtd_u64_f64(a_.values[i] * ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
}

return simde_uint64x2_from_private(r_);
Expand All @@ -429,7 +433,7 @@ simde_vcvt_n_f16_u16(simde_uint16x4_t a, const int n)

SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n)));
r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / (UINT64_C(1) << n)));
}

return simde_float16x4_from_private(r_);
Expand All @@ -451,7 +455,7 @@ simde_vcvt_n_f16_s16(simde_int16x4_t a, const int n)

SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n)));
r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / (UINT64_C(1) << n)));
}

return simde_float16x4_from_private(r_);
Expand All @@ -473,7 +477,7 @@ simde_vcvtq_n_f16_u16(simde_uint16x8_t a, const int n)

SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n)));
r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / (UINT64_C(1) << n)));
}

return simde_float16x8_from_private(r_);
Expand All @@ -495,7 +499,7 @@ simde_vcvtq_n_f16_s16(simde_int16x8_t a, const int n)

SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, (a_.values[i] / simde_math_pow(2, n))));
r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / (UINT64_C(1) << n)));
}

return simde_float16x8_from_private(r_);
Expand All @@ -517,7 +521,7 @@ simde_vcvt_n_f32_u32(simde_uint32x2_t a, const int n)

SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n));
r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / (UINT64_C(1) << n));
}

return simde_float32x2_from_private(r_);
Expand All @@ -539,7 +543,7 @@ simde_vcvt_n_f32_s32(simde_int32x2_t a, const int n)

SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n));
r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / (UINT64_C(1) << n));
}

return simde_float32x2_from_private(r_);
Expand All @@ -561,7 +565,7 @@ simde_vcvt_n_f64_u64(simde_uint64x1_t a, const int n)

SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n));
r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
}

return simde_float64x1_from_private(r_);
Expand All @@ -583,7 +587,7 @@ simde_vcvtq_n_f64_u64(simde_uint64x2_t a, const int n)

SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n));
r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
}

return simde_float64x2_from_private(r_);
Expand All @@ -605,7 +609,7 @@ simde_vcvt_n_f64_s64(simde_int64x1_t a, const int n)

SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n));
r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
}

return simde_float64x1_from_private(r_);
Expand All @@ -627,7 +631,7 @@ simde_vcvtq_n_f64_s64(simde_int64x2_t a, const int n)

SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n));
r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
}

return simde_float64x2_from_private(r_);
Expand All @@ -649,7 +653,7 @@ simde_vcvtq_n_f32_s32(simde_int32x4_t a, const int n)

SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n));
r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / (UINT64_C(1) << n));
}

return simde_float32x4_from_private(r_);
Expand All @@ -671,7 +675,7 @@ simde_vcvtq_n_f32_u32(simde_uint32x4_t a, const int n)

SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n));
r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / (UINT64_C(1) << n));
}

return simde_float32x4_from_private(r_);
Expand Down
Loading