diff --git a/src/plugins/intel_npu/src/al/include/npu.hpp b/src/plugins/intel_npu/src/al/include/npu.hpp index 925b80ca7734fe..5d46ae3ae2a4ac 100644 --- a/src/plugins/intel_npu/src/al/include/npu.hpp +++ b/src/plugins/intel_npu/src/al/include/npu.hpp @@ -92,6 +92,11 @@ class IDevice : public std::enable_shared_from_this { ov::intel_npu::MemType mem_type = ov::intel_npu::MemType::L0_INTERNAL_BUF, void* mem = nullptr); + virtual ov::SoPtr createHostTensor(std::shared_ptr context, + const ov::element::Type& element_type, + const ov::Shape& shape, + const Config& config); + protected: virtual ~IDevice() = default; }; diff --git a/src/plugins/intel_npu/src/al/src/npu.cpp b/src/plugins/intel_npu/src/al/src/npu.cpp index 3b8c670ffd3404..8da55475e9b4f7 100644 --- a/src/plugins/intel_npu/src/al/src/npu.cpp +++ b/src/plugins/intel_npu/src/al/src/npu.cpp @@ -81,4 +81,11 @@ ov::SoPtr IDevice::createRemoteTensor(std::shared_ptr IDevice::createHostTensor(std::shared_ptr, + const ov::element::Type&, + const ov::Shape&, + const Config&) { + OPENVINO_THROW("Create Host Tensor is not supported"); +} + } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/backend/include/zero_device.hpp b/src/plugins/intel_npu/src/backend/include/zero_device.hpp index f198453b932d83..fc4ac58f7643c5 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_device.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_device.hpp @@ -47,6 +47,11 @@ class ZeroDevice : public IDevice { ov::intel_npu::MemType mem_type = ov::intel_npu::MemType::L0_INTERNAL_BUF, void* mem = nullptr) override; + ov::SoPtr createHostTensor(std::shared_ptr context, + const ov::element::Type& element_type, + const ov::Shape& shape, + const Config& config) override; + ZeroDevice& operator=(const ZeroDevice&) = delete; ZeroDevice(const ZeroDevice&) = delete; diff --git a/src/plugins/intel_npu/src/backend/include/zero_host_tensor.hpp b/src/plugins/intel_npu/src/backend/include/zero_host_tensor.hpp new file mode 100644 index 00000000000000..ce28bf572541bc --- /dev/null +++ b/src/plugins/intel_npu/src/backend/include/zero_host_tensor.hpp @@ -0,0 +1,39 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "intel_npu/al/config/config.hpp" +#include "openvino/runtime/itensor.hpp" +#include "zero_init.hpp" +#include "zero_remote_tensor.hpp" + +namespace intel_npu { + +class ZeroHostTensor : public ov::ITensor { +public: + ZeroHostTensor(std::shared_ptr context, + std::shared_ptr init_structs, + const ov::element::Type element_type, + const ov::Shape& shape, + const Config& config); + + ~ZeroHostTensor() override = default; + + void* data(const ov::element::Type& element_type) const override; + const ov::element::Type& get_element_type() const override; + + const ov::Shape& get_shape() const override; + + const ov::Strides& get_strides() const override; + + void set_shape(ov::Shape new_shape) override; + + std::shared_ptr get_impl() const; + +private: + std::shared_ptr m_impl; +}; + +} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp index 725a0e96c76f6e..cbf3a9466364be 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp @@ -53,8 +53,9 @@ class ZeroInferRequest final : public SyncInferRequest { * @brief Check the received remote tensor and copy it to the Level Zero tensor * @param tensor Reference to a tensor. * @param name Friendly name of the tensor. + * @param isParameter True if tensor is a parameter. */ - void set_remote_tensor_data(std::shared_ptr tensor, const std::string& name); + void set_remote_tensor_data(std::shared_ptr tensor, const std::string& name, bool isParameter); void check_network_precision(const ov::element::Type_t precision) const override; void create_pipeline(); @@ -77,8 +78,7 @@ class ZeroInferRequest final : public SyncInferRequest { // specific operations on the plugin in this case. size_t _batchSize = DEFAULT_BATCH_SIZE; - bool _createPipeline = true; - bool _updateCommandList = false; + bool _pipelineIsCreated = false; }; } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp index 78bca3718711e3..b8724dcdd53f73 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp @@ -16,7 +16,6 @@ struct TensorData { void* mem; size_t size; bool levelZeroTensorCreatedLocally = true; - bool changed = false; }; struct Pipeline { @@ -32,7 +31,7 @@ struct Pipeline { virtual void pull(size_t batch_index) = 0; virtual void reset(size_t batch_index) const = 0; - virtual void updateCommandList(std::unordered_map& tensors_data, size_t batch_size) = 0; + virtual void updateCommandList(const TensorData& tensors_data, uint32_t index, size_t batch_size) = 0; protected: zeroMemory::MemoryManagementUnit _deviceInputs; diff --git a/src/plugins/intel_npu/src/backend/include/zero_wrappers.hpp b/src/plugins/intel_npu/src/backend/include/zero_wrappers.hpp index 2b432619fff4f8..76cfce8fecfa26 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_wrappers.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_wrappers.hpp @@ -87,7 +87,7 @@ class CommandList { void appendGraphInitialize(const ze_graph_handle_t& graph_handle) const; void appendGraphExecute(const ze_graph_handle_t& graph_handle, const ze_graph_profiling_query_handle_t& profiling_query_handle) const; - void updateMutableCommandList(const void* pNext = nullptr) const; + void updateMutableCommandList(uint32_t arg_index, const void* arg_value) const; void appendNpuTimestamp(uint64_t* timestamp_buff) const; void appendBarrier() const; void close() const; @@ -96,9 +96,6 @@ class CommandList { inline ze_command_list_handle_t handle() const { return _handle; } - uint64_t getCommandListId() const { - return _command_id; - } private: ze_command_list_handle_t _handle = nullptr; diff --git a/src/plugins/intel_npu/src/backend/src/zero_device.cpp b/src/plugins/intel_npu/src/backend/src/zero_device.cpp index 56ee453b7d77c2..595ce734b533e9 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_device.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_device.cpp @@ -9,6 +9,7 @@ #include "intel_npu/al/itt.hpp" #include "intel_npu/utils/zero/zero_api.hpp" #include "zero_executor.hpp" +#include "zero_host_tensor.hpp" #include "zero_infer_request.hpp" #include "zero_remote_tensor.hpp" #include "zero_utils.hpp" @@ -193,3 +194,10 @@ ov::SoPtr ZeroDevice::createRemoteTensor(std::shared_ptr(context, _initStructs, element_type, shape, config, tensor_type, mem_type, mem)}; }; + +ov::SoPtr ZeroDevice::createHostTensor(std::shared_ptr context, + const ov::element::Type& element_type, + const ov::Shape& shape, + const Config& config) { + return {std::make_shared(context, _initStructs, element_type, shape, config)}; +}; diff --git a/src/plugins/intel_npu/src/backend/src/zero_host_tensor.cpp b/src/plugins/intel_npu/src/backend/src/zero_host_tensor.cpp new file mode 100644 index 00000000000000..e4ebe2c1d5a8ba --- /dev/null +++ b/src/plugins/intel_npu/src/backend/src/zero_host_tensor.cpp @@ -0,0 +1,48 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "zero_host_tensor.hpp" + +#include "openvino/runtime/intel_npu/remote_properties.hpp" + +namespace intel_npu { + +ZeroHostTensor::ZeroHostTensor(std::shared_ptr context, + std::shared_ptr init_structs, + const ov::element::Type element_type, + const ov::Shape& shape, + const Config& config) + : m_impl(std::make_shared(context, + init_structs, + element_type, + shape, + config, + ov::intel_npu::TensorType::BINDED, + ov::intel_npu::MemType::L0_INTERNAL_BUF)) {} + +void* ZeroHostTensor::data(const ov::element::Type&) const { + return m_impl->get_properties().find(ov::intel_npu::mem_handle.name())->second.as(); +} + +const ov::element::Type& ZeroHostTensor::get_element_type() const { + return m_impl->get_element_type(); +} + +const ov::Shape& ZeroHostTensor::get_shape() const { + return m_impl->get_shape(); +} + +const ov::Strides& ZeroHostTensor::get_strides() const { + return m_impl->get_strides(); +} + +void ZeroHostTensor::set_shape(ov::Shape new_shape) { + m_impl->set_shape(new_shape); +} + +std::shared_ptr ZeroHostTensor::get_impl() const { + return m_impl; +} + +} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp index 36738f32e9f6c3..773827a4864724 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp @@ -402,13 +402,26 @@ void ZeroInferRequest::set_tensor_data(std::shared_ptr tensor, cons if (setTensorData) { _tensorsData[name] = TensorData{_copyAllTensors.at(name)->data(), _copyAllTensors.at(name)->get_byte_size(), - levelZeroTensorCreatedLocally, - !_createPipeline}; - _updateCommandList = true; + levelZeroTensorCreatedLocally}; + + if (_pipelineIsCreated) { + _logger.debug("ZeroInferRequest::infer_async - update command list"); + + intel_npu::ZeroExecutor::ArgumentDescriptor desc; + if (isParameter) { + desc = _executor->inputs_desc_map().at(name); + } else { + desc = _executor->outputs_desc_map().at(name); + } + + _pipeline->updateCommandList(_tensorsData[name], desc.idx, _batchSize); + } } } -void ZeroInferRequest::set_remote_tensor_data(std::shared_ptr tensor, const std::string& name) { +void ZeroInferRequest::set_remote_tensor_data(std::shared_ptr tensor, + const std::string& name, + bool isParameter) { auto l0_context = reinterpret_cast( extract_object(tensor->get_context()->get_property(), ov::intel_npu::l0_context)); if (_initStructs->getContext() != l0_context) { @@ -421,8 +434,20 @@ void ZeroInferRequest::set_remote_tensor_data(std::shared_ptr } _copyAllTensors[name] = tensor; - _tensorsData[name] = TensorData{data, tensor->get_byte_size(), false, !_createPipeline}; - _updateCommandList = true; + _tensorsData[name] = TensorData{data, tensor->get_byte_size(), false}; + + if (_pipelineIsCreated) { + _logger.debug("ZeroInferRequest::infer_async - update command list"); + + intel_npu::ZeroExecutor::ArgumentDescriptor desc; + if (isParameter) { + desc = _executor->inputs_desc_map().at(name); + } else { + desc = _executor->outputs_desc_map().at(name); + } + + _pipeline->updateCommandList(_tensorsData[name], desc.idx, _batchSize); + } } void ZeroInferRequest::set_tensor(const ov::Output& port, const ov::SoPtr& tensor) { @@ -444,7 +469,9 @@ void ZeroInferRequest::set_tensor(const ov::Output& port, const ov::op::util::is_parameter(port.get_node())); } else { _logger.debug("ZeroInferRequest::set_tensor - set new remote tensor"); - set_remote_tensor_data(remoteTensor, port.get_node()->get_friendly_name()); + set_remote_tensor_data(remoteTensor, + port.get_node()->get_friendly_name(), + ov::op::util::is_parameter(port.get_node())); } } } @@ -489,23 +516,11 @@ void ZeroInferRequest::infer_async() { OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "infer_async"); _executor->mutexLock(); - - if (_createPipeline) { + if (!_pipelineIsCreated) { create_pipeline(); - _createPipeline = false; - _updateCommandList = false; + _pipelineIsCreated = true; } - - if (_initStructs->getMutableCommandListVersion()) { - if (_updateCommandList) { - _logger.debug("ZeroInferRequest::infer_async - update command list"); - _pipeline->updateCommandList(_tensorsData, _batchSize); - - _updateCommandList = false; - } - } - _executor->mutexUnlock(); for (const std::string& name : _inputAndStateInputNames) { diff --git a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp index 3a4ea554d157ec..f98e84a34a0a46 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp @@ -143,7 +143,7 @@ struct DiscretePipeline final : public Pipeline { } }; - void updateCommandList(std::unordered_map&, size_t) override{}; + void updateCommandList(const TensorData&, uint32_t, size_t) override {} private: const Config _config; @@ -274,60 +274,11 @@ struct IntegratedPipeline final : public Pipeline { _logger.debug("IntegratedPipeline - rest() completed"); }; - void updateCommandList(std::unordered_map& tensors_data, size_t batch_size) override { - std::vector mutable_argument_desc; - int32_t changed_tensors = 0; - - for (const auto& desc : tensors_data) { - if (desc.second.changed == true) { - changed_tensors++; - } - } - - mutable_argument_desc.reserve(changed_tensors); - - auto set_mutable_desc = - [&](int32_t mutable_desc_index, uint64_t command_list_id, uint32_t arg_index, const void* arg_value) { - mutable_argument_desc.emplace_back(ze_mutable_graph_argument_exp_desc_t{ - ZE_STRUCTURE_TYPE_MUTABLE_GRAPH_ARGUMENT_EXP_DESC, - mutable_desc_index ? &mutable_argument_desc.at(mutable_desc_index - 1) : nullptr, - command_list_id, - arg_index, - arg_value}); - }; - + void updateCommandList(const TensorData& tensors_data, uint32_t index, size_t batch_size) override { for (size_t i = 0; i < batch_size; i++) { - int32_t mutable_argument_desc_index = -1; - - for (const auto& desc : _executor->inputs_desc_map()) { - TensorData& inputTensorData = tensors_data.at(desc.first); - - if (inputTensorData.changed == true) { - set_mutable_desc( - ++mutable_argument_desc_index, - _command_lists.at(i)->getCommandListId(), - desc.second.idx, - static_cast(inputTensorData.mem) + (i * inputTensorData.size) / batch_size); - - inputTensorData.changed = false; - } - } - - for (const auto& desc : _executor->outputs_desc_map()) { - TensorData& outputTensorData = tensors_data.at(desc.first); - - if (outputTensorData.changed == true) { - set_mutable_desc( - ++mutable_argument_desc_index, - _command_lists.at(i)->getCommandListId(), - desc.second.idx, - static_cast(outputTensorData.mem) + (i * outputTensorData.size) / batch_size); - - outputTensorData.changed = false; - } - } - - _command_lists.at(i)->updateMutableCommandList(&mutable_argument_desc.at(mutable_argument_desc_index)); + _command_lists.at(i)->updateMutableCommandList( + index, + static_cast(tensors_data.mem) + (i * tensors_data.size) / batch_size); _command_lists.at(i)->close(); } }; diff --git a/src/plugins/intel_npu/src/backend/src/zero_wrappers.cpp b/src/plugins/intel_npu/src/backend/src/zero_wrappers.cpp index 77ebd858cc3e07..2cd249aad19a92 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_wrappers.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_wrappers.cpp @@ -114,11 +114,16 @@ CommandList::~CommandList() { _log.error("zeCommandListDestroy failed %#X", uint64_t(result)); } } -void CommandList::updateMutableCommandList(const void* pNext) const { - ze_mutable_commands_exp_desc_t mutable_commands_exp_desc_t = { - static_cast(ZE_MUTABLE_COMMAND_EXP_FLAG_GRAPH_ARGUMENT), - pNext, - 0}; +void CommandList::updateMutableCommandList(uint32_t arg_index, const void* arg_value) const { + ze_mutable_graph_argument_exp_desc_t desc = {ZE_STRUCTURE_TYPE_MUTABLE_GRAPH_ARGUMENT_EXP_DESC, + nullptr, + _command_id, + arg_index, + arg_value}; + + ze_mutable_commands_exp_desc_t mutable_commands_exp_desc_t = {ZE_STRUCTURE_TYPE_MUTABLE_COMMANDS_EXP_DESC, + &desc, + 0}; zeroUtils::throwOnFail("zeCommandListUpdateMutableCommandsExp", zeCommandListUpdateMutableCommandsExp(_handle, &mutable_commands_exp_desc_t)); diff --git a/src/plugins/intel_npu/src/plugin/include/remote_context.hpp b/src/plugins/intel_npu/src/plugin/include/remote_context.hpp index 398884dcb673ac..2fce44526c358e 100644 --- a/src/plugins/intel_npu/src/plugin/include/remote_context.hpp +++ b/src/plugins/intel_npu/src/plugin/include/remote_context.hpp @@ -43,6 +43,14 @@ class RemoteContextImpl : public ov::IRemoteContext { const ov::Shape& shape, const ov::AnyMap& params) override; + /** + * @brief This method is used to create a host tensor object friendly for the device in current context. + * @param type Tensor element type. + * @param shape Tensor shape. + * @return A tensor instance with device friendly memory. + */ + ov::SoPtr create_host_tensor(const ov::element::Type type, const ov::Shape& shape) override; + private: std::shared_ptr get_this_shared_ptr(); diff --git a/src/plugins/intel_npu/src/plugin/src/remote_context.cpp b/src/plugins/intel_npu/src/plugin/src/remote_context.cpp index 25683be31fe9e4..9539826f985147 100644 --- a/src/plugins/intel_npu/src/plugin/src/remote_context.cpp +++ b/src/plugins/intel_npu/src/plugin/src/remote_context.cpp @@ -84,6 +84,15 @@ ov::SoPtr RemoteContextImpl::create_tensor(const ov::element: mem_handle_object); } +ov::SoPtr RemoteContextImpl::create_host_tensor(const ov::element::Type type, const ov::Shape& shape) { + auto device = _backends->getDevice(_config.get()); + if (device == nullptr) { + OPENVINO_THROW("Device is not available"); + } + + return device->createHostTensor(get_this_shared_ptr(), type, shape, _config); +} + const std::string& RemoteContextImpl::get_device_name() const { return _device_name; } diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_api.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_api.hpp index 3de0dedd8d6878..6cb9e23d203c11 100644 --- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_api.hpp +++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_api.hpp @@ -8,6 +8,8 @@ #include +#include "openvino/core/except.hpp" + #ifndef _WIN32 # define LIB_ZE_LOADER_SUFFIX ".1" #endif @@ -26,8 +28,6 @@ namespace intel_npu { symbol_statement(zeCommandListCreate) \ symbol_statement(zeCommandListDestroy) \ symbol_statement(zeCommandListReset) \ - symbol_statement(zeCommandListGetNextCommandIdExp) \ - symbol_statement(zeCommandListUpdateMutableCommandsExp) \ symbol_statement(zeCommandQueueCreate) \ symbol_statement(zeCommandQueueDestroy) \ symbol_statement(zeCommandQueueExecuteCommandLists) \ @@ -58,6 +58,11 @@ namespace intel_npu { symbol_statement(zeMemAllocHost) \ symbol_statement(zeMemFree) \ symbol_statement(zeMemGetAllocProperties) + +//unsupported symbols with older ze_loader versions +#define weak_symbols_list() \ + symbol_statement(zeCommandListGetNextCommandIdExp) \ + symbol_statement(zeCommandListUpdateMutableCommandsExp) // clang-format on class ZeroApi { @@ -73,6 +78,7 @@ class ZeroApi { } #define symbol_statement(symbol) decltype(&::symbol) symbol; symbols_list(); + weak_symbols_list(); #undef symbol_statement private: @@ -84,11 +90,17 @@ class ZeroApi { #define symbol_statement(symbol) \ template \ inline typename std::invoke_result::type wrapped_##symbol(Args... args) { \ - return ZeroApi::getInstance().symbol(std::forward(args)...); \ + auto& ref = ZeroApi::getInstance(); \ + if (ref.symbol == nullptr) { \ + OPENVINO_THROW("Unsupported symbol " #symbol); \ + } \ + return ref.symbol(std::forward(args)...); \ } symbols_list(); +weak_symbols_list(); #undef symbol_statement #define symbol_statement(symbol) inline decltype(&::symbol) symbol = wrapped_##symbol; symbols_list(); +weak_symbols_list(); #undef symbol_statement } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/utils/src/zero/zero_api.cpp b/src/plugins/intel_npu/src/utils/src/zero/zero_api.cpp index fd3e128b3afc94..991e8d5f9f9e65 100644 --- a/src/plugins/intel_npu/src/utils/src/zero/zero_api.cpp +++ b/src/plugins/intel_npu/src/utils/src/zero/zero_api.cpp @@ -4,7 +4,6 @@ #include "intel_npu/utils/zero/zero_api.hpp" -#include "openvino/core/except.hpp" #include "openvino/util/file_util.hpp" #include "openvino/util/shared_object.hpp" @@ -29,14 +28,24 @@ ZeroApi::ZeroApi() { try { #define symbol_statement(symbol) \ this->symbol = reinterpret_cast(ov::util::get_symbol(lib, #symbol)); - symbols_list() + symbols_list(); #undef symbol_statement } catch (const std::runtime_error& error) { OPENVINO_THROW(error.what()); } +#define symbol_statement(symbol) \ + try { \ + this->symbol = reinterpret_cast(ov::util::get_symbol(lib, #symbol)); \ + } catch (const std::runtime_error&) { \ + this->symbol = nullptr; \ + } + weak_symbols_list(); +#undef symbol_statement + #define symbol_statement(symbol) symbol = this->symbol; symbols_list(); + weak_symbols_list(); #undef symbol_statement } diff --git a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp index 660eb875f72d38..6b7372223c6bea 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp +++ b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp @@ -473,6 +473,55 @@ TEST_P(BatchingRunTests, SetInputTensorInfer_Caching) { for (size_t i = 0; i < shape_size; ++i) { EXPECT_NEAR(actual[i], 6.f, 1e-5) << "Expected=6, actual=" << actual[i] << " for index " << i; } + + delete[] buffer; +} + +TEST_P(BatchingRunTests, CheckTwoRunsInfer) { + auto batch_shape = Shape{4, 2, 2, 2}; + auto shape_size = ov::shape_size(batch_shape); + auto model = createBatchingModel(element::f32, batch_shape, "N..."); + float* buffer = new float[shape_size]; + + auto context = core->get_default_context(target_device); + + compiled_model = core->compile_model(model, target_device, configuration); + ov::InferRequest inference_request; + inference_request = compiled_model.create_infer_request(); + + ov::Tensor tensor{element::f32, batch_shape, buffer}; + + inference_request.set_input_tensor(tensor); + auto actual_tensor = inference_request.get_output_tensor(0); + auto* actual = actual_tensor.data(); + auto* input_data = tensor.data(); + for (size_t i = 0; i < shape_size; ++i) { + input_data[i] = 5.f; + } + inference_request.infer(); // Adds '1' to each element + for (size_t i = 0; i < shape_size; ++i) { + EXPECT_NEAR(actual[i], 6.f, 1e-5) << "Expected=6, actual=" << actual[i] << " for index " << i; + } + + auto l0_host_input_tensor = context.create_host_tensor(ov::element::f32, batch_shape); + auto l0_host_output_tensor = context.create_host_tensor(ov::element::f32, actual_tensor.get_shape()); + + auto* input_data_host_tensor = l0_host_input_tensor.data(); + input_data = reinterpret_cast(input_data_host_tensor); + for (size_t i = 0; i < shape_size; ++i) { + input_data[i] = 5.f; + } + inference_request.set_input_tensor(l0_host_input_tensor); + inference_request.set_output_tensor(l0_host_output_tensor); + inference_request.infer(); + + auto* actual_host_tensor = l0_host_output_tensor.data(); + actual = reinterpret_cast(actual_host_tensor); + for (size_t i = 0; i < shape_size; ++i) { + EXPECT_NEAR(actual[i], 6.f, 1e-5) << "Expected=6, actual=" << actual[i] << " for index " << i; + } + + delete[] buffer; } } // namespace behavior diff --git a/src/plugins/intel_npu/tests/functional/behavior/remote_tensor_tests/remote_run.hpp b/src/plugins/intel_npu/tests/functional/behavior/remote_tensor_tests/remote_run.hpp index a6023e6e678d3d..a58da0253a9d74 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/remote_tensor_tests/remote_run.hpp +++ b/src/plugins/intel_npu/tests/functional/behavior/remote_tensor_tests/remote_run.hpp @@ -128,7 +128,7 @@ TEST_P(RemoteRunTests, CheckRemoteTensorInternalBufChangingTensors) { // set output remote tensor auto remote_output_tensor = inference_request.get_output_tensor(); - auto output_remote_tensor = context.create_l0_host_tensor(ov::element::f32, remote_output_tensor.get_shape()); + auto output_remote_tensor = context.create_tensor(ov::element::f32, remote_output_tensor.get_shape()); remote_output_tensor = {}; OV_ASSERT_NO_THROW(inference_request.set_output_tensor(output_remote_tensor)); @@ -202,8 +202,7 @@ TEST_P(RemoteRunTests, CheckOutputDataFromTwoRunsInOutRemoteTensors1) { auto remote_input_tensor = context.create_l0_host_tensor(ov::element::f32, input_shape, ov::intel_npu::TensorType::INPUT); - remote_output_tensor = context.create_l0_host_tensor(ov::element::f32, output_shape) - .as(); + remote_output_tensor = context.create_l0_host_tensor(ov::element::f32, output_shape); memset(remote_input_tensor.get(), 99, byte_size); OV_ASSERT_NO_THROW(inference_request.set_input_tensor(remote_input_tensor)); @@ -305,8 +304,7 @@ TEST_P(RemoteRunTests, CheckOutputDataFromTwoRunsInOutRemoteTensors3) { auto remote_input_tensor = context.create_l0_host_tensor(ov::element::f32, input_shape, ov::intel_npu::TensorType::INPUT); - auto remote_output_tensor = - context.create_l0_host_tensor(ov::element::f32, output_shape).as(); + auto remote_output_tensor = context.create_l0_host_tensor(ov::element::f32, output_shape); memset(remote_input_tensor.get(), 99, byte_size); OV_ASSERT_NO_THROW(inference_request.set_input_tensor(remote_input_tensor)); @@ -318,6 +316,74 @@ TEST_P(RemoteRunTests, CheckOutputDataFromTwoRunsInOutRemoteTensors3) { EXPECT_EQ(memcmp(first_output.data(), second_output, first_output.get_byte_size()), 0); } +TEST_P(RemoteRunTests, CheckOutputDataFromTwoRunsInOutRemoteTensorsHostTensor1) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + ov::InferRequest inference_request; + ov::Tensor first_output; + + auto context = core->get_default_context(target_device).as(); + + OV_ASSERT_NO_THROW(compiled_model = core->compile_model(ov_model, target_device, configuration)); + OV_ASSERT_NO_THROW(inference_request = compiled_model.create_infer_request()); + auto tensor = inference_request.get_input_tensor(); + memset(tensor.data(), 99, tensor.get_byte_size()); + OV_ASSERT_NO_THROW(inference_request.infer()); + first_output = inference_request.get_output_tensor(); + + auto l0_host_input_tensor = context.create_host_tensor(ov::element::f32, tensor.get_shape()); + auto l0_host_output_tensor = context.create_host_tensor(ov::element::f32, first_output.get_shape()); + + memset(l0_host_input_tensor.data(), 99, tensor.get_byte_size()); + OV_ASSERT_NO_THROW(inference_request.set_input_tensor(l0_host_input_tensor)); + OV_ASSERT_NO_THROW(inference_request.set_output_tensor(l0_host_output_tensor)); + OV_ASSERT_NO_THROW(inference_request.infer()); + + EXPECT_NE(first_output.data(), l0_host_output_tensor.data()); + EXPECT_EQ(memcmp(first_output.data(), l0_host_output_tensor.data(), first_output.get_byte_size()), 0); +} + +TEST_P(RemoteRunTests, CheckOutputDataFromTwoRunsInOutRemoteTensorsHostTensor2) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + ov::InferRequest inference_request; + + auto context = core->get_default_context(target_device).as(); + + OV_ASSERT_NO_THROW(compiled_model = core->compile_model(ov_model, target_device, configuration)); + OV_ASSERT_NO_THROW(inference_request = compiled_model.create_infer_request()); + auto input_tensor = inference_request.get_input_tensor(); + auto output_tensor = inference_request.get_output_tensor(); + const auto byte_size = input_tensor.get_byte_size(); + auto input_shape = input_tensor.get_shape(); + auto output_shape = output_tensor.get_shape(); + input_tensor = {}; + output_tensor = {}; + + auto remote_input_tensor = + context.create_l0_host_tensor(ov::element::f32, input_shape, ov::intel_npu::TensorType::INPUT); + auto remote_output_tensor = + context.create_l0_host_tensor(ov::element::f32, output_shape, ov::intel_npu::TensorType::INPUT); + memset(remote_input_tensor.get(), 1, byte_size); + OV_ASSERT_NO_THROW(inference_request.set_input_tensor(remote_input_tensor)); + OV_ASSERT_NO_THROW(inference_request.set_output_tensor(remote_output_tensor)); + OV_ASSERT_NO_THROW(inference_request.infer()); + + auto l0_host_input_tensor = context.create_host_tensor(ov::element::f32, input_shape); + auto l0_host_output_tensor = context.create_host_tensor(ov::element::f32, output_shape); + + memset(l0_host_input_tensor.data(), 99, byte_size); + OV_ASSERT_NO_THROW(inference_request.set_input_tensor(l0_host_input_tensor)); + OV_ASSERT_NO_THROW(inference_request.set_output_tensor(l0_host_output_tensor)); + OV_ASSERT_NO_THROW(inference_request.infer()); + + EXPECT_NE(remote_output_tensor.get(), l0_host_output_tensor.data()); + EXPECT_NE(memcmp(remote_output_tensor.get(), l0_host_output_tensor.data(), remote_output_tensor.get_byte_size()), + 0); +} + } // namespace behavior } // namespace test } // namespace ov