From 9e759e4758ad87d019432b077bbb5bac58a39233 Mon Sep 17 00:00:00 2001 From: Wanglei Shen Date: Thu, 29 Feb 2024 10:13:23 +0800 Subject: [PATCH] refactor cpu pinning (#23099) ### Details: - *Porting [PR 22489](https://github.com/openvinotoolkit/openvino/pull/22489)* -- *add ov::affinity deprecation in CPU plugin introduction* -- *refactoring get_cpu_pinning()* -- *Default CPU pinning on Windows is false on all platform* - *Enable CPU pinning in part of CPU functional test cases. Now Intel CPU plugin func tests on Windows finish at 0h:27m:32.21s in this PR and 0h:30m:50.99s in [PR 23129](https://github.com/openvinotoolkit/openvino/pull/23129)* ### Tickets: - *CVS-129030* --- .../cpu-device.rst | 16 +- src/plugins/intel_cpu/src/config.cpp | 12 +- .../intel_cpu/src/cpu_map_scheduling.cpp | 35 ++-- .../intel_cpu/src/cpu_map_scheduling.hpp | 10 +- .../intel_cpu/src/cpu_streams_calculation.cpp | 9 +- .../custom/behavior/ov_plugin/properties.cpp | 31 ++- .../shared_tests_instances/core_config.cpp | 4 + .../unit/streams_info/cpu_pinning_test.cpp | 181 ++++++++++++++++++ 8 files changed, 252 insertions(+), 46 deletions(-) create mode 100644 src/plugins/intel_cpu/tests/unit/streams_info/cpu_pinning_test.cpp diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device.rst index 98514b3d57518d..ad857d849e7459 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device.rst @@ -357,6 +357,9 @@ Read-only properties - ``ov::device::full_name`` - ``ov::device::capabilities`` +.. note:: + ``ov::affinity`` is replaced by ``ov::hint::enable_cpu_pinning``. As such, it is deprecated in the 2024.0 release and will be removed in the 2025 release. + External Dependencies ########################################################### @@ -404,7 +407,16 @@ User can use the following properties to limit available CPU resource for model ``ov::hint::scheduling_core_type`` and ``ov::hint::enable_hyper_threading`` only support IntelĀ® x86-64 CPU on Linux and Windows in current release. -By default, OpenVINO Runtime will enable CPU threads pinning for better performance. User also can use property ``ov::hint::enable_cpu_pinning`` to switch it off. Disable threads pinning might be beneficial in complex applications with several workloads executed in parallel. +In some use cases, OpenVINO Runtime will enable CPU threads pinning by default for better performance. User can also turn it on or off using property ``ov::hint::enable_cpu_pinning``. Disable threads pinning might be beneficial in complex applications with several workloads executed in parallel. The following table describes the default setting for ``ov::hint::enable_cpu_pinning`` in different use cases. + +==================================================== ================================ + Use Case Default Setting of CPU Pinning +==================================================== ================================ + All use cases with Windows OS False + Stream contains both Pcore and Ecore with Linux OS False + Stream only contains Pcore or Ecore with Linux OS True + All use cases with Mac OS False +==================================================== ================================ .. tab-set:: @@ -427,7 +439,7 @@ user can check the :doc:`optimization guide (); changedCpuPinning = true; + ov::Affinity affinity = val.as(); +#if defined(__APPLE__) + enableCpuPinning = false; + threadBindingType = affinity == ov::Affinity::NONE ? IStreamsExecutor::ThreadBindingType::NONE + : IStreamsExecutor::ThreadBindingType::NUMA; +#else enableCpuPinning = (affinity == ov::Affinity::CORE || affinity == ov::Affinity::HYBRID_AWARE) ? true : false; switch (affinity) { @@ -102,11 +107,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { threadBindingType = IStreamsExecutor::ThreadBindingType::NONE; break; case ov::Affinity::CORE: { -#if (defined(__APPLE__) || defined(_WIN32)) - threadBindingType = IStreamsExecutor::ThreadBindingType::NUMA; -#else threadBindingType = IStreamsExecutor::ThreadBindingType::CORES; -#endif } break; case ov::Affinity::NUMA: threadBindingType = IStreamsExecutor::ThreadBindingType::NUMA; @@ -121,6 +122,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { key, ". Expected only ov::Affinity::CORE/NUMA/HYBRID_AWARE."); } +#endif } catch (const ov::Exception&) { OPENVINO_THROW("Wrong value ", val.as(), diff --git a/src/plugins/intel_cpu/src/cpu_map_scheduling.cpp b/src/plugins/intel_cpu/src/cpu_map_scheduling.cpp index 7889dc2dd5442a..c5960e8da5d286 100644 --- a/src/plugins/intel_cpu/src/cpu_map_scheduling.cpp +++ b/src/plugins/intel_cpu/src/cpu_map_scheduling.cpp @@ -7,6 +7,7 @@ #include "cpu_streams_calculation.hpp" #include "openvino/core/parallel.hpp" #include "openvino/runtime/system_conf.hpp" +#include "openvino/runtime/threading/cpu_streams_info.hpp" namespace ov { namespace intel_cpu { @@ -71,32 +72,30 @@ std::vector> apply_hyper_threading(bool& input_ht_hint, bool get_cpu_pinning(bool& input_value, const bool input_changed, - const int num_streams, - const Config::LatencyThreadingMode latency_threading_mode, - const std::vector>& proc_type_table) { - int result_value; - int num_sockets = get_default_latency_streams(latency_threading_mode); - bool latency = num_streams <= num_sockets && num_streams > 0; + const std::vector>& proc_type_table, + const std::vector>& streams_info_table) { + bool result_value; +#if defined(__APPLE__) + result_value = false; +#elif defined(_WIN32) + result_value = ((input_changed) && (proc_type_table.size() == 1)) ? input_value : false; +#else if (input_changed) { result_value = input_value; } else { result_value = true; - if (proc_type_table[0][EFFICIENT_CORE_PROC] > 0 && - proc_type_table[0][EFFICIENT_CORE_PROC] < proc_type_table[0][ALL_PROC]) { - result_value = latency ? false : true; + // The following code disables pinning in case stream contains both Pcore and Ecore + if (streams_info_table.size() >= 3) { + if ((streams_info_table[0][PROC_TYPE] == ALL_PROC) && + (streams_info_table[1][PROC_TYPE] != EFFICIENT_CORE_PROC) && + (streams_info_table[2][PROC_TYPE] == EFFICIENT_CORE_PROC)) { + result_value = false; + } } } -#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO) -# if defined(_WIN32) - if (proc_type_table.size() > 1) { - result_value = false; - } -# endif -# if defined(__APPLE__) - result_value = false; -# endif #endif + input_value = result_value; return result_value; diff --git a/src/plugins/intel_cpu/src/cpu_map_scheduling.hpp b/src/plugins/intel_cpu/src/cpu_map_scheduling.hpp index 950f5e5d6f46a6..ef2cfd90b50221 100644 --- a/src/plugins/intel_cpu/src/cpu_map_scheduling.hpp +++ b/src/plugins/intel_cpu/src/cpu_map_scheduling.hpp @@ -44,16 +44,14 @@ std::vector> apply_hyper_threading(bool& input_ht_hint, * @brief whether pinning cpu cores according to enableCpuPinning property * @param[in] input_type indicate value of property enableCpuPinning. * @param[in] input_changed indicate if value is set by user. - * @param[in] num_streams number of streams - * @param[in] latency_threading_mode is the scope of candidate processors per stream for latency hint - * @param[in] proc_type_table candidate processors available at this time + * @param[in] proc_type_table indicate processors information of this platform + * @param[in] streams_info_table indicate streams detail of this model * @return whether pinning threads to cpu cores */ bool get_cpu_pinning(bool& input_value, const bool input_changed, - const int num_streams, - const Config::LatencyThreadingMode latency_threading_mode, - const std::vector>& proc_type_table); + const std::vector>& proc_type_table, + const std::vector>& streams_info_table); } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp index 704894b24448ef..044aed674102ef 100644 --- a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp +++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp @@ -534,11 +534,7 @@ std::vector> generate_stream_info(const int streams, config.changedHyperThreading, ov::util::to_string(config.hintPerfMode), proc_type_table); - auto cpu_reservation = get_cpu_pinning(config.enableCpuPinning, - config.changedCpuPinning, - streams, - config.latencyThreadingMode, - proc_type_table); + if (-1 == preferred_nthreads_per_stream) { model_prefer_threads = get_model_prefer_threads(streams, proc_type_table, model, config); } @@ -553,6 +549,9 @@ std::vector> generate_stream_info(const int streams, config.latencyThreadingMode, proc_type_table); + auto cpu_reservation = + get_cpu_pinning(config.enableCpuPinning, config.changedCpuPinning, proc_type_table, streams_info_table); + config.streamExecutorConfig = IStreamsExecutor::Config{"CPUStreamsExecutor", config.streams, config.threadsPerStream, diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp index 9f8eef06fe8586..0d373252eddafd 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp @@ -133,19 +133,17 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigStreamsNum) { TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigAffinity) { ov::Core ie; - ov::Affinity value = ov::Affinity::NONE; -#if (defined(__APPLE__) || defined(_WIN32)) - auto numaNodes = ov::get_available_numa_nodes(); - auto coreTypes = ov::get_available_cores_types(); +#if defined(__APPLE__) + ov::Affinity value = ov::Affinity::CORE; auto defaultBindThreadParameter = ov::Affinity::NONE; - if (coreTypes.size() > 1) { - defaultBindThreadParameter = ov::Affinity::HYBRID_AWARE; - } else if (numaNodes.size() > 1) { - defaultBindThreadParameter = ov::Affinity::NUMA; - } #else + ov::Affinity value = ov::Affinity::NUMA; +# if defined(_WIN32) + auto defaultBindThreadParameter = ov::Affinity::NONE; +# else auto defaultBindThreadParameter = ov::Affinity::CORE; +# endif auto coreTypes = ov::get_available_cores_types(); if (coreTypes.size() > 1) { defaultBindThreadParameter = ov::Affinity::HYBRID_AWARE; @@ -154,10 +152,15 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigAffinity) { ASSERT_NO_THROW(value = ie.get_property("CPU", ov::affinity)); ASSERT_EQ(defaultBindThreadParameter, value); - const ov::Affinity affinity = defaultBindThreadParameter == ov::Affinity::HYBRID_AWARE ? ov::Affinity::NUMA : ov::Affinity::HYBRID_AWARE; + const ov::Affinity affinity = + defaultBindThreadParameter == ov::Affinity::HYBRID_AWARE ? ov::Affinity::NUMA : ov::Affinity::HYBRID_AWARE; ASSERT_NO_THROW(ie.set_property("CPU", ov::affinity(affinity))); ASSERT_NO_THROW(value = ie.get_property("CPU", ov::affinity)); +#if defined(__APPLE__) + ASSERT_EQ(ov::Affinity::NUMA, value); +#else ASSERT_EQ(affinity, value); +#endif } TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigAffinityCore) { @@ -167,12 +170,20 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigAffinityCore) { ASSERT_NO_THROW(ie.set_property("CPU", ov::affinity(affinity))); ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::enable_cpu_pinning)); +#if defined(__APPLE__) + ASSERT_EQ(false, value); +#else ASSERT_EQ(true, value); +#endif affinity = ov::Affinity::HYBRID_AWARE; ASSERT_NO_THROW(ie.set_property("CPU", ov::affinity(affinity))); ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::enable_cpu_pinning)); +#if defined(__APPLE__) + ASSERT_EQ(false, value); +#else ASSERT_EQ(true, value); +#endif affinity = ov::Affinity::NUMA; ASSERT_NO_THROW(ie.set_property("CPU", ov::affinity(affinity))); diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/core_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/core_config.cpp index 7d30811f7a4332..154d7d5778107e 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/core_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/core_config.cpp @@ -17,6 +17,10 @@ void core_configuration(ov::test::SubgraphBaseTest* test) { // todo: issue: 123320 test->convert_precisions.insert({ov::element::bf16, ov::element::f32}); test->convert_precisions.insert({ov::element::f16, ov::element::f32}); + + // Enable CPU pinning in CPU funtional tests to save validation time of Intel CPU plugin func tests (parallel) + // on Windows + test->configuration.insert({ov::hint::enable_cpu_pinning.name(), true}); } } // namespace test diff --git a/src/plugins/intel_cpu/tests/unit/streams_info/cpu_pinning_test.cpp b/src/plugins/intel_cpu/tests/unit/streams_info/cpu_pinning_test.cpp new file mode 100644 index 00000000000000..445bd5cf2d6d7d --- /dev/null +++ b/src/plugins/intel_cpu/tests/unit/streams_info/cpu_pinning_test.cpp @@ -0,0 +1,181 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "common_test_utils/test_common.hpp" +#include "cpu_map_scheduling.hpp" +#include "openvino/runtime/system_conf.hpp" + +using namespace testing; +using namespace ov; + +namespace { + +struct CpuPinningTestCase { + bool input_cpu_pinning; + bool input_changed; + std::vector> input_proc_type_table; + std::vector> input_stream_info_table; + bool output_cpu_pinning; +}; + +class CpuPinningTests : public ov::test::TestsCommon, + public testing::WithParamInterface> { +public: + void SetUp() override { + auto test_data = std::get<0>(GetParam()); + + auto test_output = ov::intel_cpu::get_cpu_pinning(test_data.input_cpu_pinning, + test_data.input_changed, + test_data.input_proc_type_table, + test_data.input_stream_info_table); + + ASSERT_EQ(test_data.output_cpu_pinning, test_data.input_cpu_pinning); + ASSERT_EQ(test_data.output_cpu_pinning, test_output); + } +}; + +TEST_P(CpuPinningTests, CpuPinning) {} + +CpuPinningTestCase cpu_pinning_macos_mock_set_true = { + true, // param[in]: simulated settting for cpu pinning property + true, // param[in]: simulated settting for user changing cpu pinning property + {{40, 20, 0, 20, 0, 0}}, // param[in]: simulated setting for current proc_type_table + {{1, MAIN_CORE_PROC, 20, 0, 0}}, // param[in]: simulated setting for current streams_info_table + false, // param[expected out]: simulated setting for expected output +}; +CpuPinningTestCase cpu_pinning_macos_mock_set_false = { + false, + true, + {{40, 20, 0, 20, 0, 0}}, + {{1, MAIN_CORE_PROC, 20, 0, 0}}, + false, +}; +CpuPinningTestCase cpu_pinning_macos_mock_set_default = { + true, + false, + {{40, 20, 0, 20, 0, 0}}, + {{1, MAIN_CORE_PROC, 20, 0, 0}}, + false, +}; +CpuPinningTestCase cpu_pinning_win_mock_set_true = { + true, + true, + {{40, 20, 0, 20, 0, 0}}, + {{1, MAIN_CORE_PROC, 20, 0, 0}}, + true, +}; +CpuPinningTestCase cpu_pinning_win_mock_set_true_2 = { + true, + true, + {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, + {{1, MAIN_CORE_PROC, 24, 0, 0}}, + false, +}; +CpuPinningTestCase cpu_pinning_win_mock_set_false = { + false, + true, + {{40, 20, 0, 20, 0, 0}}, + {{1, MAIN_CORE_PROC, 20, 0, 0}}, + false, +}; +CpuPinningTestCase cpu_pinning_win_mock_set_false_2 = { + false, + true, + {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, + {{1, MAIN_CORE_PROC, 24, 0, 0}}, + false, +}; +CpuPinningTestCase cpu_pinning_win_mock_set_default = { + true, + false, + {{40, 20, 0, 20, 0, 0}}, + {{1, MAIN_CORE_PROC, 20, 0, 0}}, + false, +}; +CpuPinningTestCase cpu_pinning_win_mock_set_default_2 = { + true, + false, + {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, + {{1, MAIN_CORE_PROC, 24, 0, 0}}, + false, +}; +CpuPinningTestCase cpu_pinning_linux_mock_set_true = { + true, + true, + {{40, 20, 0, 20, 0, 0}}, + {{1, MAIN_CORE_PROC, 20, 0, 0}}, + true, +}; +CpuPinningTestCase cpu_pinning_linux_mock_set_true_2 = { + true, + true, + {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, + {{1, MAIN_CORE_PROC, 24, 0, 0}}, + true, +}; +CpuPinningTestCase cpu_pinning_linux_mock_set_false = { + false, + true, + {{40, 20, 0, 20, 0, 0}}, + {{1, MAIN_CORE_PROC, 20, 0, 0}}, + false, +}; +CpuPinningTestCase cpu_pinning_linux_mock_set_false_2 = { + false, + true, + {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, + {{1, MAIN_CORE_PROC, 24, 0, 0}}, + false, +}; +CpuPinningTestCase cpu_pinning_linux_mock_set_default = { + false, + false, + {{40, 20, 0, 20, 0, 0}}, + {{1, MAIN_CORE_PROC, 20, 0, 0}}, + true, +}; +CpuPinningTestCase cpu_pinning_linux_mock_set_default_2 = { + true, + false, + {{20, 6, 8, 6, 0, 0}}, + {{1, ALL_PROC, 14, 0, 0}, {0, MAIN_CORE_PROC, 6, 0, 0}, {0, EFFICIENT_CORE_PROC, 8, 0, 0}}, + false, +}; +CpuPinningTestCase cpu_pinning_linux_mock_set_default_3 = { + false, + false, + {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, + {{1, MAIN_CORE_PROC, 24, 0, 0}}, + true, +}; + +#if defined(__linux__) +INSTANTIATE_TEST_SUITE_P(smoke_CpuPinning, + CpuPinningTests, + ::testing::Values(cpu_pinning_linux_mock_set_true, + cpu_pinning_linux_mock_set_true_2, + cpu_pinning_linux_mock_set_false, + cpu_pinning_linux_mock_set_false_2, + cpu_pinning_linux_mock_set_default, + cpu_pinning_linux_mock_set_default_2, + cpu_pinning_linux_mock_set_default_3)); +#elif defined(_WIN32) +INSTANTIATE_TEST_SUITE_P(smoke_CpuPinning, + CpuPinningTests, + ::testing::Values(cpu_pinning_win_mock_set_true, + cpu_pinning_win_mock_set_true_2, + cpu_pinning_win_mock_set_false, + cpu_pinning_win_mock_set_false_2, + cpu_pinning_win_mock_set_default, + cpu_pinning_win_mock_set_default_2)); +#else +INSTANTIATE_TEST_SUITE_P(smoke_CpuPinning, + CpuPinningTests, + ::testing::Values(cpu_pinning_macos_mock_set_true, + cpu_pinning_macos_mock_set_false, + cpu_pinning_macos_mock_set_default)); +#endif +} // namespace \ No newline at end of file