From 4e27715cb6fb69cab8d9b82c5533f7b29042586b Mon Sep 17 00:00:00 2001 From: "Wang, Yang" Date: Fri, 20 Dec 2024 12:21:01 +0800 Subject: [PATCH] update. --- src/plugins/auto/src/schedule.cpp | 5 +- .../auto/tests/unit/dynamic_output_test.cpp | 28 ++- .../auto/tests/unit/runtime_fallback_test.cpp | 176 ++++++++++++++---- 3 files changed, 161 insertions(+), 48 deletions(-) diff --git a/src/plugins/auto/src/schedule.cpp b/src/plugins/auto/src/schedule.cpp index 2fbb81e34647fb..dc9961752f18c3 100644 --- a/src/plugins/auto/src/schedule.cpp +++ b/src/plugins/auto/src/schedule.cpp @@ -89,10 +89,7 @@ void Schedule::generate_workers(const std::string& device, const SoCompiledModel (m_context->m_device_priorities.end() == it_numrequests || it_numrequests->num_requests_per_devices == -1) ? optimal_num : it_numrequests->num_requests_per_devices; - num_requests = - (num_requests == 1 && m_context->m_performance_hint != ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT) - ? 2 - : num_requests; + num_requests = (num_requests == 1) ? 2 : num_requests; auto& worker_requests = m_worker_requests[device]; auto& idle_worker_requests = m_idle_worker_requests[device]; worker_requests.resize(num_requests); diff --git a/src/plugins/auto/tests/unit/dynamic_output_test.cpp b/src/plugins/auto/tests/unit/dynamic_output_test.cpp index c0902b38ce5d46..ba7febe07d049e 100644 --- a/src/plugins/auto/tests/unit/dynamic_output_test.cpp +++ b/src/plugins/auto/tests/unit/dynamic_output_test.cpp @@ -7,7 +7,6 @@ #include "include/auto_unit_test.hpp" #include "openvino/runtime/threading/immediate_executor.hpp" - using DynamicOutputConfigParams = std::tuple; @@ -21,14 +20,18 @@ class DynamicOutputInferenceTest : public tests::AutoTest, public ::testing::Tes mockExecutor.reset(); mockExecutorActual.reset(); mockInferrequest.reset(); + mockInferrequest_2.reset(); mockInferrequestActual.reset(); + mockInferrequestActual_2.reset(); } protected: ov::Any priorityList; ov::Any targetList; std::shared_ptr mockInferrequest; + std::shared_ptr mockInferrequest_2; std::shared_ptr mockInferrequestActual; + std::shared_ptr mockInferrequestActual_2; std::shared_ptr mockExecutor; std::shared_ptr mockExecutorActual; }; @@ -53,10 +56,22 @@ void DynamicOutputInferenceTest::SetUp() { mockExecutorActual = std::make_shared(); mockInferrequest = std::make_shared(inferReqInternal, mockExecutor, nullptr, false); + // will be at least 2 infer requests for mocked CPU/GPU + auto inferReqInternal_2 = std::make_shared(mockIExeNet); + mockInferrequest_2 = + std::make_shared(inferReqInternal_2, mockExecutor, nullptr, false); + + auto inferReqInternalActual_2 = std::make_shared(mockIExeNetActual); + mockInferrequestActual = std::make_shared(inferReqInternalActual, mockExecutorActual, nullptr, false); + mockInferrequestActual_2 = std::make_shared(inferReqInternalActual_2, + mockExecutorActual, + nullptr, + false); + std::tie(priorityList, targetList) = GetParam(); auto targets = targetList.as>(); ON_CALL(*core, get_available_devices()).WillByDefault(Return(targets)); @@ -103,11 +118,12 @@ TEST_P(DynamicOutputInferenceTest, CanInferWithOutputChangedFromDynamicOnAutoToS auto tensor = inferReqInternal->get_tensor(it); tensor->set_shape(ov::Shape{2, 3}); } - ON_CALL(*mockIExeNet.get(), create_infer_request()).WillByDefault(Return(mockInferrequest)); - ON_CALL(*mockIExeNetActual.get(), create_infer_request()).WillByDefault(InvokeWithoutArgs([this]() { - std::this_thread::sleep_for(std::chrono::milliseconds(0)); - return mockInferrequestActual; - })); + EXPECT_CALL(*mockIExeNet.get(), create_infer_request()) + .WillOnce(Return(mockInferrequest)) + .WillOnce(Return(mockInferrequest_2)); + EXPECT_CALL(*mockIExeNetActual.get(), create_infer_request()) + .WillOnce(Return(mockInferrequestActual)) + .WillOnce(Return(mockInferrequestActual_2)); config.insert(ov::device::priorities(priorityList.as())); config.insert(ov::hint::performance_mode(ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT)); std::shared_ptr exeNetwork; diff --git a/src/plugins/auto/tests/unit/runtime_fallback_test.cpp b/src/plugins/auto/tests/unit/runtime_fallback_test.cpp index 6b97802f872ee4..12903b7dfed5e5 100644 --- a/src/plugins/auto/tests/unit/runtime_fallback_test.cpp +++ b/src/plugins/auto/tests/unit/runtime_fallback_test.cpp @@ -164,6 +164,11 @@ TEST_P(AutoRuntimeFallback, releaseResource) { _)) .WillByDefault(ov::Throw("compile model error")); } + std::map>> inferRequests; + inferRequests["CPU"] = {}; + inferRequests["GPU.0"] = {}; + inferRequests["GPU.1"] = {}; + inferRequests["OTHER"] = {}; for (auto& deviceInfo : targetDevices) { std::string deviceName; bool ifThrow; @@ -171,23 +176,47 @@ TEST_P(AutoRuntimeFallback, releaseResource) { targetDev += deviceName; targetDev += ((deviceInfo == targetDevices.back()) ? "" : ","); if (deviceName == "CPU") { - ON_CALL(*mockIExeNet.get(), create_infer_request()).WillByDefault([this, ifThrow]() { - auto inferRequest = std::make_shared(inferReqInternal, - mockExecutor, - nullptr, - ifThrow); - return inferRequest; + auto inferReqInternal_CPU_2 = std::make_shared(mockIExeNet); + auto inferRequest_2 = std::make_shared(inferReqInternal_CPU_2, + mockExecutor, + nullptr, + ifThrow); + auto inferRequest = std::make_shared(inferReqInternal, + mockExecutor, + nullptr, + ifThrow); + inferRequests[deviceName].push_back(inferRequest); + inferRequests[deviceName].push_back(inferRequest_2); + ON_CALL(*mockIExeNet.get(), create_infer_request()).WillByDefault([this, &inferRequests, deviceName]() { + auto infer = inferRequests.at(deviceName).back(); + if (inferRequests.at(deviceName).size() > 1) { + // in case of passthrough model, we need to keep the infer request + inferRequests.at(deviceName).pop_back(); + } + return infer; }); } else if (deviceName == "GPU.0") { + auto inferReqInternal_GPU_0_2 = + std::make_shared(mockIExeNetActual); + auto inferRequest_2 = + std::make_shared(inferReqInternal_GPU_0_2, + mockExecutorGPU_0, + nullptr, + ifThrow); + auto inferRequest = std::make_shared(inferReqInternalActual, + mockExecutorGPU_0, + nullptr, + ifThrow); + inferRequests[deviceName].push_back(inferRequest); + inferRequests[deviceName].push_back(inferRequest_2); ON_CALL(*mockIExeNetActual.get(), create_infer_request()) - .WillByDefault(InvokeWithoutArgs([this, ifThrow]() { + .WillByDefault(InvokeWithoutArgs([this, &inferRequests, deviceName]() { std::this_thread::sleep_for(std::chrono::milliseconds(0)); - auto inferRequest = - std::make_shared(inferReqInternalActual, - mockExecutorGPU_0, - nullptr, - ifThrow); - return inferRequest; + auto infer = inferRequests.at(deviceName).back(); + if (inferRequests.at(deviceName).size() > 1) { + inferRequests.at(deviceName).pop_back(); + } + return infer; })); } else if (deviceName == "GPU.1") { if (generateWorkersFail) { @@ -198,26 +227,52 @@ TEST_P(AutoRuntimeFallback, releaseResource) { ifThrow); ON_CALL(*mockIExeNetGPU_1.get(), create_infer_request()).WillByDefault(ov::Throw("error")); } else { + auto inferRequest = std::make_shared(inferReqInternalGPU_1, + mockExecutorGPU_1, + nullptr, + ifThrow); + auto inferReqInternalGPU_1_2 = + std::make_shared(mockIExeNetGPU_1); + auto inferRequest_2 = + std::make_shared(inferReqInternalGPU_1_2, + mockExecutorGPU_1, + nullptr, + ifThrow); + inferRequests[deviceName].push_back(inferRequest); + inferRequests[deviceName].push_back(inferRequest_2); ON_CALL(*mockIExeNetGPU_1.get(), create_infer_request()) - .WillByDefault(InvokeWithoutArgs([this, ifThrow]() { + .WillByDefault(InvokeWithoutArgs([this, &inferRequests, deviceName]() { std::this_thread::sleep_for(std::chrono::milliseconds(0)); - auto inferRequest = - std::make_shared(inferReqInternalGPU_1, - mockExecutorGPU_1, - nullptr, - ifThrow); - return inferRequest; + auto infer = inferRequests.at(deviceName).back(); + if (inferRequests.at(deviceName).size() > 1) { + inferRequests.at(deviceName).pop_back(); + } + return infer; })); } } else if (deviceName == "OTHER") { - ON_CALL(*mockIExeNetOTHER.get(), create_infer_request()).WillByDefault(InvokeWithoutArgs([this, ifThrow]() { - std::this_thread::sleep_for(std::chrono::milliseconds(0)); - auto inferRequest = std::make_shared(inferReqInternalOTHER, - mockExecutorOTHER, - nullptr, - ifThrow); - return inferRequest; - })); + auto inferRequest = std::make_shared(inferReqInternalOTHER, + mockExecutorOTHER, + nullptr, + ifThrow); + auto inferReqInternalOTHER_2 = + std::make_shared(mockIExeNetOTHER); + std::this_thread::sleep_for(std::chrono::milliseconds(0)); + auto inferRequest_2 = std::make_shared(inferReqInternalOTHER_2, + mockExecutorOTHER, + nullptr, + ifThrow); + inferRequests[deviceName].push_back(inferRequest); + inferRequests[deviceName].push_back(inferRequest_2); + ON_CALL(*mockIExeNetOTHER.get(), create_infer_request()) + .WillByDefault(InvokeWithoutArgs([this, &inferRequests, deviceName]() { + std::this_thread::sleep_for(std::chrono::milliseconds(0)); + auto infer = inferRequests.at(deviceName).back(); + if (inferRequests.at(deviceName).size() > 1) { + inferRequests.at(deviceName).pop_back(); + } + return infer; + })); } else { return; } @@ -321,6 +376,11 @@ TEST_P(AutoCTPUTRuntimeFallback, ctputDeviceInferFailTest) { _)) .WillByDefault(ov::Throw("compile model error")); } + std::map>> inferRequests; + inferRequests["CPU"] = {}; + inferRequests["GPU.0"] = {}; + inferRequests["GPU.1"] = {}; + inferRequests["OTHER"] = {}; for (auto& deviceInfo : targetDevices) { std::string deviceName; bool ifThrow; @@ -332,8 +392,20 @@ TEST_P(AutoCTPUTRuntimeFallback, ctputDeviceInferFailTest) { mockExecutor, nullptr, ifThrow); - ON_CALL(*mockIExeNet.get(), create_infer_request()).WillByDefault([this]() { - return mockInferrequest; + auto inferReqInternal_CPU_2 = std::make_shared(mockIExeNet); + auto inferRequest_2 = std::make_shared(inferReqInternal_CPU_2, + mockExecutor, + nullptr, + ifThrow); + inferRequests[deviceName].push_back(mockInferrequest); + inferRequests[deviceName].push_back(inferRequest_2); + ON_CALL(*mockIExeNet.get(), create_infer_request()).WillByDefault([this, &inferRequests, deviceName]() { + auto infer = inferRequests.at(deviceName).back(); + if (inferRequests.at(deviceName).size() > 1) { + // in case of passthrough model, we need to keep the infer request + inferRequests.at(deviceName).pop_back(); + } + return infer; }); } else if (deviceName == "GPU.0") { mockInferrequestGPU_0 = @@ -341,10 +413,24 @@ TEST_P(AutoCTPUTRuntimeFallback, ctputDeviceInferFailTest) { mockExecutorGPU_0, nullptr, ifThrow); - ON_CALL(*mockIExeNetActual.get(), create_infer_request()).WillByDefault(InvokeWithoutArgs([this]() { - std::this_thread::sleep_for(std::chrono::milliseconds(0)); - return mockInferrequestGPU_0; - })); + auto inferReqInternal_GPU_0_2 = + std::make_shared(mockIExeNetActual); + auto inferRequest_2 = + std::make_shared(inferReqInternal_GPU_0_2, + mockExecutorGPU_0, + nullptr, + ifThrow); + inferRequests[deviceName].push_back(mockInferrequestGPU_0); + inferRequests[deviceName].push_back(inferRequest_2); + ON_CALL(*mockIExeNetActual.get(), create_infer_request()) + .WillByDefault(InvokeWithoutArgs([this, &inferRequests, deviceName]() { + std::this_thread::sleep_for(std::chrono::milliseconds(0)); + auto infer = inferRequests.at(deviceName).back(); + if (inferRequests.at(deviceName).size() > 1) { + inferRequests.at(deviceName).pop_back(); + } + return infer; + })); } else if (deviceName == "GPU.1") { if (generateWorkersFail) { mockInferrequestGPU_1 = @@ -359,10 +445,24 @@ TEST_P(AutoCTPUTRuntimeFallback, ctputDeviceInferFailTest) { mockExecutorGPU_1, nullptr, ifThrow); - ON_CALL(*mockIExeNetGPU_1.get(), create_infer_request()).WillByDefault(InvokeWithoutArgs([this]() { - std::this_thread::sleep_for(std::chrono::milliseconds(0)); - return mockInferrequestGPU_1; - })); + auto inferReqInternalGPU_1_2 = + std::make_shared(mockIExeNetGPU_1); + auto inferRequest_2 = + std::make_shared(inferReqInternalGPU_1_2, + mockExecutorGPU_1, + nullptr, + ifThrow); + inferRequests[deviceName].push_back(mockInferrequestGPU_1); + inferRequests[deviceName].push_back(inferRequest_2); + ON_CALL(*mockIExeNetGPU_1.get(), create_infer_request()) + .WillByDefault(InvokeWithoutArgs([this, &inferRequests, deviceName]() { + std::this_thread::sleep_for(std::chrono::milliseconds(0)); + auto infer = inferRequests.at(deviceName).back(); + if (inferRequests.at(deviceName).size() > 1) { + inferRequests.at(deviceName).pop_back(); + } + return infer; + })); } } }