From d4712c8def19a3a637affe9b1e1ababe4a4fa514 Mon Sep 17 00:00:00 2001 From: Mykola Hohsadze Date: Tue, 27 Aug 2024 14:12:10 +0300 Subject: [PATCH] Update AArch64 features to Linux 6.10.6 (#359) --- include/cpuinfo_aarch64.h | 43 +++++++++++++++++++++++ include/internal/hwcaps.h | 18 ++++++++++ src/impl_aarch64__base_implementation.inl | 24 ++++++++++++- test/cpuinfo_aarch64_test.cc | 18 ++++++++++ 4 files changed, 102 insertions(+), 1 deletion(-) diff --git a/include/cpuinfo_aarch64.h b/include/cpuinfo_aarch64.h index 5da31f79..826f0abb 100644 --- a/include/cpuinfo_aarch64.h +++ b/include/cpuinfo_aarch64.h @@ -192,6 +192,31 @@ typedef struct { int smef16f16 : 1; // FP16 to FP16 outer product. int mops : 1; // Standardized memory operations. int hbc : 1; // Hinted conditional branches. + int sveb16b16 : 1; // Non-widening BFloat16 to BFloat16 arithmetic for SVE2 + // and SME2. + int lrcpc3 : 1; // Load-Acquire RCpc instructions version 3. + int lse128 : 1; // 128-bit Atomics. + int fpmr : 1; // Floating-point Mode Register. + int lut : 1; // Lookup table instructions with 2-bit and 4-bit indices. + int faminmax : 1; // Maximum and minimum absolute value instructions. + int f8cvt : 1; // FP scaling instructions and FP8 convert instructions. + int f8fma : 1; // FP8 to single-precision and half-precision + // multiply-accumulate instructions. + int f8dp4 : 1; // FP8 to single-precision 4-way dot product FDOT (4-way) + // instructions. + int f8dp2 : 1; // FP8 to half-precision 2-way dot product FDOT (2-way) + // instructions. + int f8e4m3 : 1; // Arm FP8 E4M3 format. + int f8e5m2 : 1; // Arm FP8 E5M2 format. + int smelutv2 : 1; // SME2 lookup table LUTI4 and MOVT instructions. + int smef8f16 : 1; // SME2 F8F16 instructions. + int smef8f32 : 1; // SME2 F8F32 instructions. + int smesf8fma : 1; // SVE2 FP8 to single-precision and half-precision + // multiply-accumulate instructions. + int smesf8dp4 : 1; // SVE2 FP8 to single-precision 4-way dot product FDOT + // (4-way) instructions. + int smesf8dp2 : 1; // SVE2 FP8 to half-precision 2-way dot product FDOT + // (2-way) instructions. // Make sure to update Aarch64FeaturesEnum below if you add a field here. } Aarch64Features; @@ -288,6 +313,24 @@ typedef enum { AARCH64_SME_F16F16, AARCH64_MOPS, AARCH64_HBC, + AARCH64_SVE_B16B16, + AARCH64_LRCPC3, + AARCH64_LSE128, + AARCH64_FPMR, + AARCH64_LUT, + AARCH64_FAMINMAX, + AARCH64_F8CVT, + AARCH64_F8FMA, + AARCH64_F8DP4, + AARCH64_F8DP2, + AARCH64_F8E4M3, + AARCH64_F8E5M2, + AARCH64_SME_LUTV2, + AARCH64_SME_F8F16, + AARCH64_SME_F8F32, + AARCH64_SME_SF8FMA, + AARCH64_SME_SF8DP4, + AARCH64_SME_SF8DP2, AARCH64_LAST_, } Aarch64FeaturesEnum; diff --git a/include/internal/hwcaps.h b/include/internal/hwcaps.h index 9d2a8a6f..2490e3e0 100644 --- a/include/internal/hwcaps.h +++ b/include/internal/hwcaps.h @@ -106,6 +106,24 @@ CPU_FEATURES_START_CPP_NAMESPACE #define AARCH64_HWCAP2_SME_F16F16 (1UL << 42) #define AARCH64_HWCAP2_MOPS (1UL << 43) #define AARCH64_HWCAP2_HBC (1UL << 44) +#define AARCH64_HWCAP2_SVE_B16B16 (1UL << 45) +#define AARCH64_HWCAP2_LRCPC3 (1UL << 46) +#define AARCH64_HWCAP2_LSE128 (1UL << 47) +#define AARCH64_HWCAP2_FPMR (1UL << 48) +#define AARCH64_HWCAP2_LUT (1UL << 49) +#define AARCH64_HWCAP2_FAMINMAX (1UL << 50) +#define AARCH64_HWCAP2_F8CVT (1UL << 51) +#define AARCH64_HWCAP2_F8FMA (1UL << 52) +#define AARCH64_HWCAP2_F8DP4 (1UL << 53) +#define AARCH64_HWCAP2_F8DP2 (1UL << 54) +#define AARCH64_HWCAP2_F8E4M3 (1UL << 55) +#define AARCH64_HWCAP2_F8E5M2 (1UL << 56) +#define AARCH64_HWCAP2_SME_LUTV2 (1UL << 57) +#define AARCH64_HWCAP2_SME_F8F16 (1UL << 58) +#define AARCH64_HWCAP2_SME_F8F32 (1UL << 59) +#define AARCH64_HWCAP2_SME_SF8FMA (1UL << 60) +#define AARCH64_HWCAP2_SME_SF8DP4 (1UL << 61) +#define AARCH64_HWCAP2_SME_SF8DP2 (1UL << 62) // http://elixir.free-electrons.com/linux/latest/source/arch/arm/include/uapi/asm/hwcap.h #define ARM_HWCAP_SWP (1UL << 0) diff --git a/src/impl_aarch64__base_implementation.inl b/src/impl_aarch64__base_implementation.inl index c55c8e29..16960a92 100644 --- a/src/impl_aarch64__base_implementation.inl +++ b/src/impl_aarch64__base_implementation.inl @@ -115,7 +115,29 @@ LINE(AARCH64_SME_F16F16, smef16f16, "smef16f16", 0, \ AARCH64_HWCAP2_SME_F16F16) \ LINE(AARCH64_MOPS, mops, "mops", 0, AARCH64_HWCAP2_MOPS) \ - LINE(AARCH64_HBC, hbc, "hbc", 0, AARCH64_HWCAP2_HBC) + LINE(AARCH64_HBC, hbc, "hbc", 0, AARCH64_HWCAP2_HBC) \ + LINE(AARCH64_SVE_B16B16, sveb16b16, "sveb16b16", 0, \ + AARCH64_HWCAP2_SVE_B16B16) \ + LINE(AARCH64_LRCPC3, lrcpc3, "lrcpc3", 0, AARCH64_HWCAP2_LRCPC3) \ + LINE(AARCH64_LSE128, lse128, "lse128", 0, AARCH64_HWCAP2_LSE128) \ + LINE(AARCH64_FPMR, fpmr, "fpmr", 0, AARCH64_HWCAP2_FPMR) \ + LINE(AARCH64_LUT, lut, "lut", 0, AARCH64_HWCAP2_LUT) \ + LINE(AARCH64_FAMINMAX, faminmax, "faminmax", 0, AARCH64_HWCAP2_FAMINMAX) \ + LINE(AARCH64_F8CVT, f8cvt, "f8cvt", 0, AARCH64_HWCAP2_F8CVT) \ + LINE(AARCH64_F8FMA, f8fma, "f8fma", 0, AARCH64_HWCAP2_F8FMA) \ + LINE(AARCH64_F8DP4, f8dp4, "f8dp4", 0, AARCH64_HWCAP2_F8DP4) \ + LINE(AARCH64_F8DP2, f8dp2, "f8dp2", 0, AARCH64_HWCAP2_F8DP2) \ + LINE(AARCH64_F8E4M3, f8e4m3, "f8e4m3", 0, AARCH64_HWCAP2_F8E4M3) \ + LINE(AARCH64_F8E5M2, f8e5m2, "f8e5m2", 0, AARCH64_HWCAP2_F8E5M2) \ + LINE(AARCH64_SME_LUTV2, smelutv2, "smelutv1", 0, AARCH64_HWCAP2_SME_LUTV2) \ + LINE(AARCH64_SME_F8F16, smef8f16, "smef8f16", 0, AARCH64_HWCAP2_SME_F8F16) \ + LINE(AARCH64_SME_F8F32, smef8f32, "smef8f32", 0, AARCH64_HWCAP2_SME_F8F32) \ + LINE(AARCH64_SME_SF8FMA, smesf8fma, "smesf8fma", 0, \ + AARCH64_HWCAP2_SME_SF8FMA) \ + LINE(AARCH64_SME_SF8DP4, smesf8dp4, "smesf8dp4", 0, \ + AARCH64_HWCAP2_SME_SF8DP4) \ + LINE(AARCH64_SME_SF8DP2, smesf8dp2, "smesf8dp2", 0, AARCH64_HWCAP2_SME_SF8DP2) + #define INTROSPECTION_PREFIX Aarch64 #define INTROSPECTION_ENUM_PREFIX AARCH64 #include "define_introspection_and_hwcaps.inl" diff --git a/test/cpuinfo_aarch64_test.cc b/test/cpuinfo_aarch64_test.cc index e191a145..f1f55943 100644 --- a/test/cpuinfo_aarch64_test.cc +++ b/test/cpuinfo_aarch64_test.cc @@ -312,6 +312,24 @@ CPU revision : 3)"); EXPECT_FALSE(info.features.smef16f16); EXPECT_FALSE(info.features.mops); EXPECT_FALSE(info.features.hbc); + EXPECT_FALSE(info.features.sveb16b16); + EXPECT_FALSE(info.features.lrcpc3); + EXPECT_FALSE(info.features.lse128); + EXPECT_FALSE(info.features.fpmr); + EXPECT_FALSE(info.features.lut); + EXPECT_FALSE(info.features.faminmax); + EXPECT_FALSE(info.features.f8cvt); + EXPECT_FALSE(info.features.f8fma); + EXPECT_FALSE(info.features.f8dp4); + EXPECT_FALSE(info.features.f8dp2); + EXPECT_FALSE(info.features.f8e4m3); + EXPECT_FALSE(info.features.f8e5m2); + EXPECT_FALSE(info.features.smelutv2); + EXPECT_FALSE(info.features.smef8f16); + EXPECT_FALSE(info.features.smef8f32); + EXPECT_FALSE(info.features.smesf8fma); + EXPECT_FALSE(info.features.smesf8dp4); + EXPECT_FALSE(info.features.smesf8dp2); } #elif defined(CPU_FEATURES_OS_MACOS) TEST_F(CpuidAarch64Test, FromDarwinSysctlFromName) {