From 6545090bcabfbcb328cad2555c4ebac21e5a4fa6 Mon Sep 17 00:00:00 2001 From: sharkautarch <128002472+sharkautarch@users.noreply.github.com> Date: Thu, 29 Aug 2024 21:13:24 -0400 Subject: [PATCH] Remove global -ffast-math flag, but apply fast math to just color_helpers.h/color_helpers.cpp Turn on FTZ/DAZ inside create_color_mgmt_luts() --- meson.build | 4 ---- src/Utils/Directives.h | 50 ++++++++++++++++++++++++++++++++++++++++ src/color_bench.cpp | 1 + src/color_helpers.cpp | 6 ++++- src/color_helpers_impl.h | 8 ++++++- src/color_tests.cpp | 4 +++- src/steamcompmgr.cpp | 3 ++- 7 files changed, 68 insertions(+), 8 deletions(-) create mode 100644 src/Utils/Directives.h diff --git a/meson.build b/meson.build index b562ec3512..9d0b487eb0 100644 --- a/meson.build +++ b/meson.build @@ -37,10 +37,6 @@ add_project_arguments(cppc.get_supported_arguments([ '-Wno-missing-braces', ]), language: 'cpp') -add_project_arguments(cppc.get_supported_arguments([ - '-ffast-math', -]), language: 'cpp') - pipewire_dep = dependency('libpipewire-0.3', required: get_option('pipewire')) librt_dep = cppc.find_library('rt', required : get_option('pipewire')) hwdata_dep = dependency('hwdata', required : false) diff --git a/src/Utils/Directives.h b/src/Utils/Directives.h new file mode 100644 index 0000000000..c5bbb301bb --- /dev/null +++ b/src/Utils/Directives.h @@ -0,0 +1,50 @@ +#pragma once + +#if defined(__x86__) || defined(__x86_64__) +#include +#include +#define SET_FAST_MATH_FLAGS FlagSwitcher switcher{}; +#define SET_FLUSH_AND_ZERO_TO_ON _mm_setcsr( _mm_getcsr() | (_MM_DENORMALS_ZERO_ON | _MM_FLUSH_ZERO_ON) ) +struct FlagSwitcher { + unsigned int m_csr; + FlagSwitcher() : m_csr{_mm_getcsr()} { + SET_FLUSH_AND_ZERO_TO_ON + } + ~FlagSwitcher() { + _mm_setcsr(m_csr); + } +}; +#elif defined(__aarch64__) && __has_builtin(__builtin_aarch64_get_fpcr64) && __has_builtin(__builtin_aarch64_set_fpcr64) +#define SET_FAST_MATH_FLAGS FlagSwitcher switcher{}; + +static constexpr unsigned long long fz_bit = 0x1'00'00'00; +//based on this stuff: https://github.com/DLTcollab/sse2neon/blob/706d3b58025364c2371cafcf9b16e32ff7e630ed/sse2neon.h#L2433 +//and this: https://stackoverflow.com/a/59001820 +static constexpr unsigned long long dz_bit = 0x8'00'00; +struct FlagSwitcher { + unsigned long long m_csr; + FlagSwitcher() : m_csr{__builtin_aarch64_get_fpcr64()} { + __builtin_aarch64_set_fpcr64(m_csr | fz_bit | dz_bit); + } + ~FlagSwitcher() { + __builtin_aarch64_set_fpcr64(m_csr); + } +}; + +#else +#define SET_FAST_MATH_FLAGS + +#endif + +#ifdef __clang__ +#define FAST_MATH_ON _Pragma("float_control(push)"); \ + _Pragma("float_control(precise, off)") //https://clang.llvm.org/docs/LanguageExtensions.html#extensions-to-specify-floating-point-flags +#define FAST_MATH_OFF _Pragma("float_control(pop)") +#elif defined(__GNUC__) +#define FAST_MATH_ON _Pragma("GCC push_options"); \ + _Pragma("GCC optimize(\"-ffast-math\")") +#define FAST_MATH_OFF _Pragma("GCC pop_options") +#else +#define FAST_MATH_ON +#define FAST_MATH_OFF +#endif diff --git a/src/color_bench.cpp b/src/color_bench.cpp index 105898629a..28fea287e6 100644 --- a/src/color_bench.cpp +++ b/src/color_bench.cpp @@ -17,6 +17,7 @@ lut3d_t lut3d_float; static void BenchmarkCalcColorTransform(EOTF inputEOTF, benchmark::State &state) { + SET_FAST_MATH_FLAGS const primaries_t primaries = { { 0.602f, 0.355f }, { 0.340f, 0.574f }, { 0.164f, 0.121f } }; const glm::vec2 white = { 0.3070f, 0.3220f }; const glm::vec2 destVirtualWhite = { 0.f, 0.f }; diff --git a/src/color_helpers.cpp b/src/color_helpers.cpp index 2075eca463..5fac79c7ac 100644 --- a/src/color_helpers.cpp +++ b/src/color_helpers.cpp @@ -1,6 +1,8 @@ #define COLOR_HELPERS_CPP #include "color_helpers_impl.h" +FAST_MATH_ON + #include #include #include @@ -214,7 +216,7 @@ inline void lerp_rgb(float* out, const float* a, const float* b, const float* c, inline float ClampAndSanitize( float a, float min, float max ) { -#ifndef __FAST_MATH__ +#if !( defined(__FAST_MATH__) || defined(__FINITE_MATH_ONLY__) ) return std::isfinite( a ) ? std::min(std::max(min, a), max) : min; #else return std::min(std::max(min, a), max); @@ -910,3 +912,5 @@ const glm::mat3 k_xyz_from_2020 = normalised_primary_matrix( displaycolorimetry_ const glm::mat3 k_2020_from_xyz = glm::inverse( k_xyz_from_2020 ); const glm::mat3 k_2020_from_709 = k_2020_from_xyz * k_xyz_from_709; + +FAST_MATH_OFF \ No newline at end of file diff --git a/src/color_helpers_impl.h b/src/color_helpers_impl.h index 3e8c2d5a5e..22352a23ae 100644 --- a/src/color_helpers_impl.h +++ b/src/color_helpers_impl.h @@ -1,4 +1,8 @@ #pragma once +#include "Utils/Directives.h" + +FAST_MATH_ON + #include "color_helpers.h" namespace rendervulkan { @@ -17,4 +21,6 @@ namespace ns_color_tests { #ifdef COLOR_HELPERS_CPP REGISTER_LUT_EDGE_SIZE(rendervulkan::s_nLutEdgeSize3d); -#endif \ No newline at end of file +#endif + +FAST_MATH_OFF \ No newline at end of file diff --git a/src/color_tests.cpp b/src/color_tests.cpp index 2d682bf661..6ee0804a31 100644 --- a/src/color_tests.cpp +++ b/src/color_tests.cpp @@ -1,4 +1,4 @@ -#include "color_helpers.h" +#include "color_helpers_impl.h" #include //#include @@ -16,6 +16,7 @@ lut3d_t lut3d_float; static void BenchmarkCalcColorTransform(EOTF inputEOTF, benchmark::State &state) { + SET_FAST_MATH_FLAGS const primaries_t primaries = { { 0.602f, 0.355f }, { 0.340f, 0.574f }, { 0.164f, 0.121f } }; const glm::vec2 white = { 0.3070f, 0.3220f }; @@ -232,6 +233,7 @@ void test_eetf2390_mono() int main(int argc, char* argv[]) { + SET_FAST_MATH_FLAGS printf("color_tests\n"); // test_eetf2390_mono(); color_tests(); diff --git a/src/steamcompmgr.cpp b/src/steamcompmgr.cpp index a8d959f78a..a9f0b36d9f 100644 --- a/src/steamcompmgr.cpp +++ b/src/steamcompmgr.cpp @@ -224,6 +224,7 @@ static const gamescope_color_mgmt_t k_ScreenshotColorMgmtHDR = static void create_color_mgmt_luts(const gamescope_color_mgmt_t& newColorMgmt, gamescope_color_mgmt_luts outColorMgmtLuts[ EOTF_Count ]) { + SET_FAST_MATH_FLAGS const displaycolorimetry_t& displayColorimetry = newColorMgmt.displayColorimetry; const displaycolorimetry_t& outputEncodingColorimetry = newColorMgmt.outputEncodingColorimetry; @@ -5069,7 +5070,7 @@ steamcompmgr_latch_frame_done( steamcompmgr_win_t *w, uint64_t vblank_idx ) static inline float santitize_float( float f ) { -#ifndef __FAST_MATH__ +#if !( defined(__FAST_MATH__) || defined(__FINITE_MATH_ONLY__) ) return ( std::isfinite( f ) ? f : 0.f ); #else return f;