diff --git a/src/plugins/auto/src/schedule.cpp b/src/plugins/auto/src/schedule.cpp index f52a8327992e26..abfd460d42118b 100644 --- a/src/plugins/auto/src/schedule.cpp +++ b/src/plugins/auto/src/schedule.cpp @@ -85,8 +85,11 @@ void Schedule::generate_workers(const std::string& device, const SoCompiledModel OPENVINO_THROW("Every device used with AUTO should support query optimal_number_of_infer_requests property from compiled model ", iie.what()); } - const auto num_requests = (m_context->m_device_priorities.end() == it_numrequests || - it_numrequests->num_requests_per_devices == -1) ? optimal_num : it_numrequests->num_requests_per_devices; + auto num_requests = + (m_context->m_device_priorities.end() == it_numrequests || it_numrequests->num_requests_per_devices == -1) + ? optimal_num + : it_numrequests->num_requests_per_devices; + num_requests = (num_requests == 1) ? 2 : num_requests; auto& worker_requests = m_worker_requests[device]; auto& idle_worker_requests = m_idle_worker_requests[device]; worker_requests.resize(num_requests); diff --git a/src/plugins/auto/tests/unit/dynamic_output_test.cpp b/src/plugins/auto/tests/unit/dynamic_output_test.cpp index d7c1fecbdb905f..8c9d4820b892cb 100644 --- a/src/plugins/auto/tests/unit/dynamic_output_test.cpp +++ b/src/plugins/auto/tests/unit/dynamic_output_test.cpp @@ -7,7 +7,6 @@ #include "include/auto_unit_test.hpp" #include "openvino/runtime/threading/immediate_executor.hpp" - using DynamicOutputConfigParams = std::tuple; @@ -21,14 +20,18 @@ class DynamicOutputInferenceTest : public tests::AutoTest, public ::testing::Tes mockExecutor.reset(); mockExecutorActual.reset(); mockInferrequest.reset(); + mockInferrequest_2.reset(); mockInferrequestActual.reset(); + mockInferrequestActual_2.reset(); } protected: ov::Any priorityList; ov::Any targetList; std::shared_ptr mockInferrequest; + std::shared_ptr mockInferrequest_2; std::shared_ptr mockInferrequestActual; + std::shared_ptr mockInferrequestActual_2; std::shared_ptr mockExecutor; std::shared_ptr mockExecutorActual; }; @@ -53,10 +56,22 @@ void DynamicOutputInferenceTest::SetUp() { mockExecutorActual = std::make_shared(); mockInferrequest = std::make_shared(inferReqInternal, mockExecutor, nullptr, false); + // will be at least 2 infer requests for mocked CPU/GPU + auto inferReqInternal_2 = std::make_shared(mockIExeNet); + mockInferrequest_2 = + std::make_shared(inferReqInternal_2, mockExecutor, nullptr, false); + + auto inferReqInternalActual_2 = std::make_shared(mockIExeNetActual); + mockInferrequestActual = std::make_shared(inferReqInternalActual, mockExecutorActual, nullptr, false); + mockInferrequestActual_2 = std::make_shared(inferReqInternalActual_2, + mockExecutorActual, + nullptr, + false); + std::tie(priorityList, targetList) = GetParam(); auto targets = targetList.as>(); ON_CALL(*core, get_available_devices()).WillByDefault(Return(targets)); @@ -103,11 +118,12 @@ TEST_P(DynamicOutputInferenceTest, CanInferWithOutputChangedFromDynamicOnAutoToS auto tensor = inferReqInternal->get_tensor(it); tensor->set_shape(ov::Shape{2, 3}); } - ON_CALL(*mockIExeNet.get(), create_infer_request()).WillByDefault(Return(mockInferrequest)); - ON_CALL(*mockIExeNetActual.get(), create_infer_request()).WillByDefault(InvokeWithoutArgs([this]() { - std::this_thread::sleep_for(std::chrono::milliseconds(0)); - return mockInferrequestActual; - })); + EXPECT_CALL(*mockIExeNet.get(), create_infer_request()) + .WillOnce(Return(mockInferrequest)) + .WillOnce(Return(mockInferrequest_2)); + EXPECT_CALL(*mockIExeNetActual.get(), create_infer_request()) + .WillOnce(Return(mockInferrequestActual)) + .WillOnce(Return(mockInferrequestActual_2)); config.insert(ov::device::priorities(priorityList.as())); config.insert(ov::hint::performance_mode(ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT)); std::shared_ptr exeNetwork; diff --git a/src/plugins/auto/tests/unit/release_helper_test.cpp b/src/plugins/auto/tests/unit/release_helper_test.cpp index b1631409090900..507127f036e47d 100644 --- a/src/plugins/auto/tests/unit/release_helper_test.cpp +++ b/src/plugins/auto/tests/unit/release_helper_test.cpp @@ -157,7 +157,8 @@ TEST_P(AutoReleaseHelperTest, releaseResource) { bool cpuSuccess; bool accSuccess; std::tie(cpuSuccess, accSuccess) = this->GetParam(); - size_t decreaseCount = 0; + size_t decreaseExeNetworkCount = 0; + size_t decreaseInferReqCount = 0; // test auto plugin plugin->set_device_name("AUTO"); const std::string strDevices = ov::test::utils::DEVICE_GPU + std::string(",") + ov::test::utils::DEVICE_CPU; @@ -188,8 +189,11 @@ TEST_P(AutoReleaseHelperTest, releaseResource) { ::testing::Matcher(StrEq(ov::test::utils::DEVICE_CPU)), _)) .WillByDefault(Return(mockExeNetwork)); - if (accSuccess) - decreaseCount++; + if (accSuccess) { + decreaseExeNetworkCount++; + // will be at least 2 infer requests for mocked CPU/GPU + decreaseInferReqCount += 2; + } } else { ON_CALL(*core, compile_model(::testing::Matcher&>(_), @@ -224,8 +228,8 @@ TEST_P(AutoReleaseHelperTest, releaseResource) { auto sharedcount = mockExeNetwork._ptr.use_count(); auto requestsharedcount = inferReqInternal.use_count(); std::this_thread::sleep_for(std::chrono::milliseconds(500)); - EXPECT_EQ(mockExeNetwork._ptr.use_count(), sharedcount - decreaseCount); - EXPECT_EQ(inferReqInternal.use_count(), requestsharedcount - decreaseCount); + EXPECT_EQ(mockExeNetwork._ptr.use_count(), sharedcount - decreaseExeNetworkCount); + EXPECT_EQ(inferReqInternal.use_count(), requestsharedcount - decreaseInferReqCount); if (cpuSuccess || accSuccess) { if (accSuccess) EXPECT_EQ(exeNetwork->get_property(ov::execution_devices.name()).as(), diff --git a/src/plugins/auto/tests/unit/runtime_fallback_test.cpp b/src/plugins/auto/tests/unit/runtime_fallback_test.cpp index 58deda3b5cd719..113b933c89430e 100644 --- a/src/plugins/auto/tests/unit/runtime_fallback_test.cpp +++ b/src/plugins/auto/tests/unit/runtime_fallback_test.cpp @@ -164,6 +164,11 @@ TEST_P(AutoRuntimeFallback, releaseResource) { _)) .WillByDefault(ov::Throw("compile model error")); } + std::map>> inferRequests; + inferRequests["CPU"] = {}; + inferRequests["GPU.0"] = {}; + inferRequests["GPU.1"] = {}; + inferRequests["OTHER"] = {}; for (auto& deviceInfo : targetDevices) { std::string deviceName; bool ifThrow; @@ -171,23 +176,48 @@ TEST_P(AutoRuntimeFallback, releaseResource) { targetDev += deviceName; targetDev += ((deviceInfo == targetDevices.back()) ? "" : ","); if (deviceName == "CPU") { - mockInferrequest = std::make_shared(inferReqInternal, - mockExecutor, - nullptr, - ifThrow); - ON_CALL(*mockIExeNet.get(), create_infer_request()).WillByDefault([this]() { - return mockInferrequest; + auto inferReqInternal_CPU_2 = std::make_shared(mockIExeNet); + auto inferRequest_2 = std::make_shared(inferReqInternal_CPU_2, + mockExecutor, + nullptr, + ifThrow); + auto inferRequest = std::make_shared(inferReqInternal, + mockExecutor, + nullptr, + ifThrow); + inferRequests[deviceName].push_back(inferRequest); + inferRequests[deviceName].push_back(inferRequest_2); + ON_CALL(*mockIExeNet.get(), create_infer_request()).WillByDefault([&inferRequests, deviceName]() { + auto infer = inferRequests.at(deviceName).back(); + if (inferRequests.at(deviceName).size() > 1) { + // in case of passthrough model, we need to keep the infer request + inferRequests.at(deviceName).pop_back(); + } + return infer; }); } else if (deviceName == "GPU.0") { - mockInferrequestGPU_0 = - std::make_shared(inferReqInternalActual, + auto inferReqInternal_GPU_0_2 = + std::make_shared(mockIExeNetActual); + auto inferRequest_2 = + std::make_shared(inferReqInternal_GPU_0_2, mockExecutorGPU_0, nullptr, ifThrow); - ON_CALL(*mockIExeNetActual.get(), create_infer_request()).WillByDefault(InvokeWithoutArgs([this]() { - std::this_thread::sleep_for(std::chrono::milliseconds(0)); - return mockInferrequestGPU_0; - })); + auto inferRequest = std::make_shared(inferReqInternalActual, + mockExecutorGPU_0, + nullptr, + ifThrow); + inferRequests[deviceName].push_back(inferRequest); + inferRequests[deviceName].push_back(inferRequest_2); + ON_CALL(*mockIExeNetActual.get(), create_infer_request()) + .WillByDefault(InvokeWithoutArgs([&inferRequests, deviceName]() { + std::this_thread::sleep_for(std::chrono::milliseconds(0)); + auto infer = inferRequests.at(deviceName).back(); + if (inferRequests.at(deviceName).size() > 1) { + inferRequests.at(deviceName).pop_back(); + } + return infer; + })); } else if (deviceName == "GPU.1") { if (generateWorkersFail) { mockInferrequestGPU_1 = @@ -197,25 +227,52 @@ TEST_P(AutoRuntimeFallback, releaseResource) { ifThrow); ON_CALL(*mockIExeNetGPU_1.get(), create_infer_request()).WillByDefault(ov::Throw("error")); } else { - mockInferrequestGPU_1 = - std::make_shared(inferReqInternalGPU_1, + auto inferRequest = std::make_shared(inferReqInternalGPU_1, + mockExecutorGPU_1, + nullptr, + ifThrow); + auto inferReqInternalGPU_1_2 = + std::make_shared(mockIExeNetGPU_1); + auto inferRequest_2 = + std::make_shared(inferReqInternalGPU_1_2, mockExecutorGPU_1, nullptr, ifThrow); - ON_CALL(*mockIExeNetGPU_1.get(), create_infer_request()).WillByDefault(InvokeWithoutArgs([this]() { - std::this_thread::sleep_for(std::chrono::milliseconds(0)); - return mockInferrequestGPU_1; - })); + inferRequests[deviceName].push_back(inferRequest); + inferRequests[deviceName].push_back(inferRequest_2); + ON_CALL(*mockIExeNetGPU_1.get(), create_infer_request()) + .WillByDefault(InvokeWithoutArgs([&inferRequests, deviceName]() { + std::this_thread::sleep_for(std::chrono::milliseconds(0)); + auto infer = inferRequests.at(deviceName).back(); + if (inferRequests.at(deviceName).size() > 1) { + inferRequests.at(deviceName).pop_back(); + } + return infer; + })); } } else if (deviceName == "OTHER") { - mockInferrequestOTHER = std::make_shared(inferReqInternalOTHER, - mockExecutorOTHER, - nullptr, - ifThrow); - ON_CALL(*mockIExeNetOTHER.get(), create_infer_request()).WillByDefault(InvokeWithoutArgs([this]() { - std::this_thread::sleep_for(std::chrono::milliseconds(0)); - return mockInferrequestOTHER; - })); + auto inferRequest = std::make_shared(inferReqInternalOTHER, + mockExecutorOTHER, + nullptr, + ifThrow); + auto inferReqInternalOTHER_2 = + std::make_shared(mockIExeNetOTHER); + std::this_thread::sleep_for(std::chrono::milliseconds(0)); + auto inferRequest_2 = std::make_shared(inferReqInternalOTHER_2, + mockExecutorOTHER, + nullptr, + ifThrow); + inferRequests[deviceName].push_back(inferRequest); + inferRequests[deviceName].push_back(inferRequest_2); + ON_CALL(*mockIExeNetOTHER.get(), create_infer_request()) + .WillByDefault(InvokeWithoutArgs([&inferRequests, deviceName]() { + std::this_thread::sleep_for(std::chrono::milliseconds(0)); + auto infer = inferRequests.at(deviceName).back(); + if (inferRequests.at(deviceName).size() > 1) { + inferRequests.at(deviceName).pop_back(); + } + return infer; + })); } else { return; } @@ -319,6 +376,11 @@ TEST_P(AutoCTPUTRuntimeFallback, ctputDeviceInferFailTest) { _)) .WillByDefault(ov::Throw("compile model error")); } + std::map>> inferRequests; + inferRequests["CPU"] = {}; + inferRequests["GPU.0"] = {}; + inferRequests["GPU.1"] = {}; + inferRequests["OTHER"] = {}; for (auto& deviceInfo : targetDevices) { std::string deviceName; bool ifThrow; @@ -330,8 +392,20 @@ TEST_P(AutoCTPUTRuntimeFallback, ctputDeviceInferFailTest) { mockExecutor, nullptr, ifThrow); - ON_CALL(*mockIExeNet.get(), create_infer_request()).WillByDefault([this]() { - return mockInferrequest; + auto inferReqInternal_CPU_2 = std::make_shared(mockIExeNet); + auto inferRequest_2 = std::make_shared(inferReqInternal_CPU_2, + mockExecutor, + nullptr, + ifThrow); + inferRequests[deviceName].push_back(mockInferrequest); + inferRequests[deviceName].push_back(inferRequest_2); + ON_CALL(*mockIExeNet.get(), create_infer_request()).WillByDefault([&inferRequests, deviceName]() { + auto infer = inferRequests.at(deviceName).back(); + if (inferRequests.at(deviceName).size() > 1) { + // in case of passthrough model, we need to keep the infer request + inferRequests.at(deviceName).pop_back(); + } + return infer; }); } else if (deviceName == "GPU.0") { mockInferrequestGPU_0 = @@ -339,10 +413,24 @@ TEST_P(AutoCTPUTRuntimeFallback, ctputDeviceInferFailTest) { mockExecutorGPU_0, nullptr, ifThrow); - ON_CALL(*mockIExeNetActual.get(), create_infer_request()).WillByDefault(InvokeWithoutArgs([this]() { - std::this_thread::sleep_for(std::chrono::milliseconds(0)); - return mockInferrequestGPU_0; - })); + auto inferReqInternal_GPU_0_2 = + std::make_shared(mockIExeNetActual); + auto inferRequest_2 = + std::make_shared(inferReqInternal_GPU_0_2, + mockExecutorGPU_0, + nullptr, + ifThrow); + inferRequests[deviceName].push_back(mockInferrequestGPU_0); + inferRequests[deviceName].push_back(inferRequest_2); + ON_CALL(*mockIExeNetActual.get(), create_infer_request()) + .WillByDefault(InvokeWithoutArgs([&inferRequests, deviceName]() { + std::this_thread::sleep_for(std::chrono::milliseconds(0)); + auto infer = inferRequests.at(deviceName).back(); + if (inferRequests.at(deviceName).size() > 1) { + inferRequests.at(deviceName).pop_back(); + } + return infer; + })); } else if (deviceName == "GPU.1") { if (generateWorkersFail) { mockInferrequestGPU_1 = @@ -357,10 +445,24 @@ TEST_P(AutoCTPUTRuntimeFallback, ctputDeviceInferFailTest) { mockExecutorGPU_1, nullptr, ifThrow); - ON_CALL(*mockIExeNetGPU_1.get(), create_infer_request()).WillByDefault(InvokeWithoutArgs([this]() { - std::this_thread::sleep_for(std::chrono::milliseconds(0)); - return mockInferrequestGPU_1; - })); + auto inferReqInternalGPU_1_2 = + std::make_shared(mockIExeNetGPU_1); + auto inferRequest_2 = + std::make_shared(inferReqInternalGPU_1_2, + mockExecutorGPU_1, + nullptr, + ifThrow); + inferRequests[deviceName].push_back(mockInferrequestGPU_1); + inferRequests[deviceName].push_back(inferRequest_2); + ON_CALL(*mockIExeNetGPU_1.get(), create_infer_request()) + .WillByDefault(InvokeWithoutArgs([&inferRequests, deviceName]() { + std::this_thread::sleep_for(std::chrono::milliseconds(0)); + auto infer = inferRequests.at(deviceName).back(); + if (inferRequests.at(deviceName).size() > 1) { + inferRequests.at(deviceName).pop_back(); + } + return infer; + })); } } }