From 1e9692c5512002bb296475d7f68412e0bb6f5749 Mon Sep 17 00:00:00 2001 From: Steven Munroe Date: Thu, 15 Sep 2022 19:18:02 -0500 Subject: [PATCH] Resolve FTBFS [Bug 2113609] pveclib in rawhide/f37 The switch to GCC-12 exposed some latent issues in pveclib-1.0.4-4 1) The (accidental) typedef of __float128 over __ieee128 and the cast of (vector unsigned int *) constants to (vector unsigned __int128 *) seems to violate strict-aliasing. But this is not reported unless -Wstrict-aliasing=2 is used. The result was silently generating incorrect code. GCC PR 106755. 2) GCC-12 corrected a long latent problem with vec_cpsgn (swapped operands). PVECLIB followed the implementation from previous GCC versions (GCC <= 11). This broke vector float/double unit tests. GCC PR 101984. 3) The implementation of IEEE Float128 is evolving and various types (__float128, __ieee128, _Float128, ...) are highly dependent on the compiler version/options used. The implementation (tries again) to supports. The API uses __binary128 and avoids _Float128. * src/pveclib/vec_f128_ppc.h (__binary128): Define/typedef to the quad-precision float type that the support supports. * src/pveclib/vec_f32_ppc.h (vec_copysignf32): Unless PVECLIB_CPSGN_FIXED is defined, avoid vec_cpsgn. * src/pveclib/vec_f64_ppc.h (vec_copysignf64): Unless PVECLIB_CPSGN_FIXED is defined, avoid vec_cpsgn. * src/testsuite/arith128_test_f32.c (test_float_cpsgn): Swap operands/results to match GCC12. * src/testsuite/arith128_test_f64.c (test_double_cpsgn): Swap operands/results to match GCC12. * src/testsuite/arith128_test_i128.c (test_muludq): Change local variable 'l' type to match vec_muludq parameter. (test_madduq) Change local variable 'l' type to match vec_madduq parameter. * src/testsuite/vec_f128_dummy.c: Replace all usage of type __Float128 with __binary128. Signed-off-by: Steven Munroe --- src/pveclib/vec_f128_ppc.h | 23 +++++++++++++-- src/pveclib/vec_f32_ppc.h | 37 ++++++++++++++++++------ src/pveclib/vec_f64_ppc.h | 46 ++++++++++++++++++++++-------- src/testsuite/arith128_test_f32.c | 14 ++++----- src/testsuite/arith128_test_f64.c | 26 ++++++++--------- src/testsuite/arith128_test_i128.c | 6 ++-- src/testsuite/vec_f128_dummy.c | 24 ++++++++-------- 7 files changed, 120 insertions(+), 56 deletions(-) diff --git a/src/pveclib/vec_f128_ppc.h b/src/pveclib/vec_f128_ppc.h index b5398db..7bb96a3 100644 --- a/src/pveclib/vec_f128_ppc.h +++ b/src/pveclib/vec_f128_ppc.h @@ -195,12 +195,31 @@ test_cosf128 (__binary128 value) -mcu=power9 and -mfloat128. So far clang does not support/define the __ibm128 type. */ #ifdef __FLOAT128__ -typedef __float128 __Float128; +#ifndef __clang__ +// For now assume the not __clang__ implies GCC +// Can't just #ifdef __GNUC__ as Clang defined it +#ifdef __float128 +// Can assume GCC 7 or later so ... +// That version defines __ieee128 internally and +// #defines __float128 to __ieee128, so both are defined +// Define __binary128 so both GCC and CLang can use a single type +#define __binary128 __ieee128 +#else +// Assume GCC 6 or earlier +// So the compiler defines __float128 only typedef __float128 __binary128; typedef __float128 __ieee128; -#ifndef __clang__ +#endif +#if (__GNUC__ < 7) +typedef __float128 _Float128; +#endif typedef __ibm128 __IBM128; #else +/* Clang started defining __FLOAT128__ and does not allow redefining + __float128 or __ieee128. Worse it will give errors if you try to + use either type. So define __binary128 as if __FLOAT128__ is not + defined. */ +typedef vui128_t __binary128; /* Clang does not define __ibm128 over IBM long double. So defined it here. */ typedef long double __IBM128; diff --git a/src/pveclib/vec_f32_ppc.h b/src/pveclib/vec_f32_ppc.h index 8a146c5..2cd2c89 100644 --- a/src/pveclib/vec_f32_ppc.h +++ b/src/pveclib/vec_f32_ppc.h @@ -774,31 +774,52 @@ vec_any_iszerof32 (vf32_t vf32) #endif } -/** \brief Copy the sign bit from vf32y merged with magnitude from - * vf32x and return the resulting vector float values. +/** \brief Copy the sign bit from vf32x merged with magnitude from + * vf32y and return the resulting vector float values. + * + * \note This operation was patterned after the intrinsic vec_cpsgn + * (altivec.h) introduced for POWER7 and VSX. It turns out the + * original (GCC 4.9) compiler implementation reversed the operands + * and does not match the PowerISA or the Vector Intrinsic Programming + * Reference manuals. Subsequent compilers and PVECLIB + * implementations replicated this (operand order) error. + * This has now been reported as bug against the compilers, which are + * in the process of applying fixes and distributing updates. + * This version of PVECLIB is updated to match the Vector Intrinsic + * Programming Reference. This implementation is independent of the + * compilers update status. * * |processor|Latency|Throughput| * |--------:|:-----:|:---------| * |power8 | 6-7 | 2/cycle | * |power9 | 2 | 2/cycle | * - * @param vf32x vector float values containing the magnitudes. - * @param vf32y vector float values containing the sign bits. - * @return vector float values with magnitude from vf32x and the - * sign of vf32y. + * @param vf32x vector float values containing the sign bits. + * @param vf32y vector float values containing the magnitudes. + * @return vector float values with magnitude from vf32y and the + * sign of vf32x. */ static inline vf32_t vec_copysignf32 (vf32_t vf32x, vf32_t vf32y) { #if _ARCH_PWR7 - /* P9 has a 2 cycle xvcpsgnsp and eliminates a const load. */ +#ifdef PVECLIB_CPSGN_FIXED return (vec_cpsgn (vf32x, vf32y)); +#else + vf32_t result; + __asm__( + "xvcpsgnsp %x0,%x1,%x2;\n" + : "=wa" (result) + : "wa" (vf32x), "wa" (vf32y) + :); + return (result); +#endif #else const vui32_t signmask = CONST_VINT128_W(0x80000000, 0x80000000, 0x80000000, 0x80000000); vf32_t result; - result = (vf32_t)vec_sel ((vui32_t)vf32x, (vui32_t)vf32y, signmask); + result = (vf32_t)vec_sel ((vui32_t)vf32y, (vui32_t)vf32x, signmask); return (result); #endif } diff --git a/src/pveclib/vec_f64_ppc.h b/src/pveclib/vec_f64_ppc.h index 69fbd05..b599e39 100644 --- a/src/pveclib/vec_f64_ppc.h +++ b/src/pveclib/vec_f64_ppc.h @@ -773,31 +773,53 @@ vec_any_iszerof64 (vf64_t vf64) #endif } -/** \brief Copy the sign bit from vf64y merged with magnitude from - * vf64x and return the resulting vector double values. +/** \brief Copy the sign bit from vf64x merged with magnitude from + * vf64y and return the resulting vector double values. + * + * \note This operation was patterned after the intrinsic vec_cpsgn + * (altivec.h) introduced for POWER7 and VSX. It turns out the + * original (GCC 4.9) compiler implementation reversed the operands + * and does not match the PowerISA or the Vector Intrinsic Programming + * Reference manuals. Subsequent compilers and PVECLIB + * implementations replicated this (operand order) error. + * This has now been reported as bug against the compilers, which are + * in the process of applying fixes and distributing updates. + * This version of PVECLIB is updated to match the Vector Intrinsic + * Programming Reference. This implementation is independent of the + * compilers update status. * * |processor|Latency|Throughput| * |--------:|:-----:|:---------| * |power8 | 6-7 | 2/cycle | * |power9 | 2 | 2/cycle | * - * @param vf64x vector double values containing the magnitudes. - * @param vf64y vector double values containing the sign bits. - * @return vector double values with magnitude from vf64x and the - * sign of vf64y. + * @param vf64x vector double values containing the sign bits. + * @param vf64y vector double values containing the magnitudes. + * @return vector double values with magnitude from vf64y and the + * sign of vf64x. */ static inline vf64_t -vec_copysignf64 (vf64_t vf64x , vf64_t vf64y) +vec_copysignf64 (vf64_t vf64x, vf64_t vf64y) { #if _ARCH_PWR7 /* P9 has a 2 cycle xvcpsgndp and eliminates a const load. */ - return (vec_cpsgn (vf64x, vf64y)); +#ifdef PVECLIB_CPSGN_FIXED + return (vec_cpsgn (vf64x, vf64y)); +#else + vf64_t result; + __asm__( + "xvcpsgndp %x0,%x1,%x2;\n" + : "=wa" (result) + : "wa" (vf64x), "wa" (vf64y) + :); + return (result); +#endif #else - const vui32_t signmask = CONST_VINT128_W(0x80000000, 0, 0x80000000, 0); - vf64_t result; + const vui32_t signmask = CONST_VINT128_W(0x80000000, 0, 0x80000000, 0); + vf64_t result; - result = (vf64_t)vec_sel ((vui32_t)vf64x, (vui32_t)vf64y, signmask); - return (result); + result = (vf64_t) vec_sel ((vui32_t) vf64y, (vui32_t) vf64x, signmask); + return (result); #endif } diff --git a/src/testsuite/arith128_test_f32.c b/src/testsuite/arith128_test_f32.c index dd65f90..016bd54 100644 --- a/src/testsuite/arith128_test_f32.c +++ b/src/testsuite/arith128_test_f32.c @@ -1186,7 +1186,7 @@ test_float_cpsgn (void) i = (vf32_t) { 0.0, -0.0, 0.0, -0.0 }; j = (vf32_t) {-0.0, 0.0, -0.0, 0.0 }; - e = (vf32_t) {-0.0, 0.0, -0.0, 0.0 }; + e = (vf32_t) { 0.0, -0.0, 0.0, -0.0 }; k = vec_copysignf32 (i, j); #ifdef __DEBUG_PRINT__ @@ -1196,9 +1196,9 @@ test_float_cpsgn (void) #endif rc += check_v4f32x ("vec_copysignf32 1:", k, e); - i = (vf32_t) { __FLT_MAX__, __FLT_MIN__, __FLT_EPSILON__, + i = (vf32_t) {-0.0, 0.0, -0.0, 0.0 }; + j = (vf32_t) { __FLT_MAX__, __FLT_MIN__, __FLT_EPSILON__, __FLT_DENORM_MIN__ }; - j = (vf32_t) {-0.0, 0.0, -0.0, 0.0 }; e = (vf32_t) { -(__FLT_MAX__), __FLT_MIN__, -(__FLT_EPSILON__), __FLT_DENORM_MIN__ }; k = vec_copysignf32 (i, j); @@ -1210,9 +1210,9 @@ test_float_cpsgn (void) #endif rc += check_v4f32x ("vec_copysignf32 2:", k, e); - i = (vf32_t) CONST_VINT128_W(__FLOAT_INF, __FLOAT_NINF, __FLOAT_INF, + i = (vf32_t) CONST_VINT32_W(0.0, -0.0, 0.0, -0.0); + j = (vf32_t) CONST_VINT128_W(__FLOAT_INF, __FLOAT_NINF, __FLOAT_INF, __FLOAT_NINF); - j = (vf32_t) CONST_VINT32_W(0.0, -0.0, 0.0, -0.0); e = (vf32_t) CONST_VINT128_W(__FLOAT_INF, __FLOAT_NINF, __FLOAT_INF, __FLOAT_NINF); k = vec_copysignf32 (i, j); @@ -1224,9 +1224,9 @@ test_float_cpsgn (void) #endif rc += check_v4f32x ("vec_copysignf32 3:", k, e); - i = (vf32_t) CONST_VINT128_W(__FLOAT_NAN, __FLOAT_NNAN, __FLOAT_NSNAN, + i = (vf32_t) {-0.0, 0.0, 0.0, -0.0 }; + j = (vf32_t) CONST_VINT128_W(__FLOAT_NAN, __FLOAT_NNAN, __FLOAT_NSNAN, __FLOAT_SNAN); - j = (vf32_t) {-0.0, 0.0, 0.0, -0.0 }; e = (vf32_t) CONST_VINT128_W(__FLOAT_NNAN, __FLOAT_NAN, __FLOAT_SNAN, __FLOAT_NSNAN); k = vec_copysignf32 (i, j); diff --git a/src/testsuite/arith128_test_f64.c b/src/testsuite/arith128_test_f64.c index eb77700..1bec02e 100644 --- a/src/testsuite/arith128_test_f64.c +++ b/src/testsuite/arith128_test_f64.c @@ -1596,7 +1596,7 @@ test_double_cpsgn (void) i = (vf64_t) { 0.0, -0.0 }; j = (vf64_t) { -0.0, 0.0 }; - e = (vf64_t) { -0.0, 0.0 }; + e = (vf64_t) { 0.0, -0.0 }; k = vec_copysignf64 (i, j); #ifdef __DEBUG_PRINT__ @@ -1606,8 +1606,8 @@ test_double_cpsgn (void) #endif rc += check_v2f64x ("vec_copysignf64 1:", k, e); - i = (vf64_t) { __DBL_MAX__, __DBL_MIN__ }; - j = (vf64_t) { -0.0, 0.0 }; + i = (vf64_t) { -0.0, 0.0 }; + j = (vf64_t) { __DBL_MAX__, __DBL_MIN__ }; e = (vf64_t) { -(__DBL_MAX__), __DBL_MIN__ }; k = vec_copysignf64 (i, j); @@ -1618,8 +1618,8 @@ test_double_cpsgn (void) #endif rc += check_v2f64x ("vec_copysignf64 2:", k, e); - i = (vf64_t) { __DBL_EPSILON__, __DBL_DENORM_MIN__ }; - j = (vf64_t) { -0.0, 0.0 }; + i = (vf64_t) { -0.0, 0.0 }; + j = (vf64_t) { __DBL_EPSILON__, __DBL_DENORM_MIN__ }; e = (vf64_t) { -(__DBL_EPSILON__), __DBL_DENORM_MIN__ }; k = vec_copysignf64 (i, j); @@ -1630,8 +1630,8 @@ test_double_cpsgn (void) #endif rc += check_v2f64x ("vec_copysignf64 3:", k, e); - i = (vf64_t) CONST_VINT128_DW(__DOUBLE_INF, __DOUBLE_NINF); - j = (vf64_t) CONST_VINT64_DW(0.0, -0.0); + i = (vf64_t) CONST_VINT64_DW(0.0, -0.0); + j = (vf64_t) CONST_VINT128_DW(__DOUBLE_INF, __DOUBLE_NINF); e = (vf64_t) CONST_VINT128_DW(__DOUBLE_INF, __DOUBLE_NINF); k = vec_copysignf64 (i, j); @@ -1642,8 +1642,8 @@ test_double_cpsgn (void) #endif rc += check_v2f64x ("vec_copysignf64 4:", k, e); - i = (vf64_t) CONST_VINT128_DW(__DOUBLE_INF, __DOUBLE_NINF); - j = (vf64_t) CONST_VINT64_DW(0.0, -0.0); + i = (vf64_t) CONST_VINT64_DW(0.0, -0.0); + j = (vf64_t) CONST_VINT128_DW(__DOUBLE_INF, __DOUBLE_NINF); e = (vf64_t) CONST_VINT128_DW(__DOUBLE_INF, __DOUBLE_NINF); k = vec_copysignf64 (i, j); @@ -1654,8 +1654,8 @@ test_double_cpsgn (void) #endif rc += check_v2f64x ("vec_copysignf64 5:", k, e); - i = (vf64_t) CONST_VINT128_DW(__DOUBLE_NAN, __DOUBLE_NNAN); - j = (vf64_t) CONST_VINT64_DW( -0.0, 0.0 ); + i = (vf64_t) CONST_VINT64_DW( -0.0, 0.0 ); + j = (vf64_t) CONST_VINT128_DW(__DOUBLE_NAN, __DOUBLE_NNAN); e = (vf64_t) CONST_VINT128_DW(__DOUBLE_NNAN, __DOUBLE_NAN); k = vec_copysignf64 (i, j); @@ -1666,8 +1666,8 @@ test_double_cpsgn (void) #endif rc += check_v2f64x ("vec_copysignf64 6:", k, e); - i = (vf64_t) CONST_VINT128_DW(__DOUBLE_NSNAN, __DOUBLE_SNAN); - j = (vf64_t) CONST_VINT64_DW ( 0.0, -0.0 ); + i = (vf64_t) CONST_VINT64_DW ( 0.0, -0.0 ); + j = (vf64_t) CONST_VINT128_DW(__DOUBLE_NSNAN, __DOUBLE_SNAN); e = (vf64_t) CONST_VINT128_DW(__DOUBLE_SNAN, __DOUBLE_NSNAN); k = vec_copysignf64 (i, j); diff --git a/src/testsuite/arith128_test_i128.c b/src/testsuite/arith128_test_i128.c index b0a5c0b..a3de901 100644 --- a/src/testsuite/arith128_test_i128.c +++ b/src/testsuite/arith128_test_i128.c @@ -2308,7 +2308,8 @@ test_msumudm (void) int test_muludq (void) { - vui32_t i, j, k, l /*, m*/; + vui32_t i, j, k/*, l , m*/; + vui128_t l; vui32_t e, ec; int rc = 0; @@ -2383,7 +2384,8 @@ test_muludq (void) int test_madduq (void) { - vui32_t i, j, k, l, m, n; + vui32_t i, j, k, m, n; + vui128_t l; vui32_t e, ec; int rc = 0; diff --git a/src/testsuite/vec_f128_dummy.c b/src/testsuite/vec_f128_dummy.c index adbb004..29fee5a 100644 --- a/src/testsuite/vec_f128_dummy.c +++ b/src/testsuite/vec_f128_dummy.c @@ -224,37 +224,37 @@ test_cosf128 (__binary128 value) #endif vb128_t -_test_f128_isinff128 (__Float128 value) +_test_f128_isinff128 (__binary128 value) { return (vec_isinff128 (value)); } int -_test_f128_isinf_sign (__Float128 value) +_test_f128_isinf_sign (__binary128 value) { return (vec_isinf_signf128 (value)); } vb128_t -_test_f128_isnan (__Float128 value) +_test_f128_isnan (__binary128 value) { return (vec_isnanf128 (value)); } vb128_t -_test_pred_f128_finite (__Float128 value) +_test_pred_f128_finite (__binary128 value) { return (vec_isfinitef128 (value)); } vb128_t -_test_pred_f128_normal (__Float128 value) +_test_pred_f128_normal (__binary128 value) { return (vec_isnormalf128 (value)); } vb128_t -_test_pred_f128_subnormal (__Float128 value) +_test_pred_f128_subnormal (__binary128 value) { return (vec_issubnormalf128 (value)); } @@ -274,31 +274,31 @@ _test_xfer_bin128_2_vui16t (__binary128 f128) // __clang__ has a bug whenever -mfloat128 is enabled, maybe clang 10.0.1 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ int -test_gcc_f128_signbit (__Float128 value) +test_gcc_f128_signbit (__binary128 value) { return (signbit(value)); } int -test_gcc_f128_isinf (__Float128 value) +test_gcc_f128_isinf (__binary128 value) { return (isinf(value)); } int -test_gcc_float128_isnan (__Float128 value) +test_gcc_float128_isnan (__binary128 value) { return (isnan(value)); } -__Float128 -test_gcc_f128_copysign (__Float128 valx, __Float128 valy) +__binary128 +test_gcc_f128_copysign (__binary128 valx, __binary128 valy) { return (__builtin_copysignf128(valx, valy)); } int -test_glibc_f128_classify (__Float128 value) +test_glibc_f128_classify (__binary128 value) { if (isfinite(value)) return 1;