Skip to content

Commit

Permalink
Do not set a bigger shape in case of remote tensor
Browse files Browse the repository at this point in the history
Signed-off-by: Bogdan Pereanu <[email protected]>
  • Loading branch information
pereanub committed Dec 18, 2024
1 parent 55899af commit a14a25b
Show file tree
Hide file tree
Showing 7 changed files with 73 additions and 322 deletions.
12 changes: 7 additions & 5 deletions src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@

namespace intel_npu {

struct TensorInfo {
bool tensorCreatedLocally;
uint64_t originalMemoryId;
};

class ZeroInferRequest final : public SyncInferRequest {
public:
explicit ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>& initStructs,
Expand Down Expand Up @@ -79,11 +84,8 @@ class ZeroInferRequest final : public SyncInferRequest {
mutable std::vector<std::vector<std::shared_ptr<ov::ITensor>>> _levelZeroInputTensors;
mutable std::vector<std::shared_ptr<ov::ITensor>> _levelZeroOutputTensors;

mutable std::vector<std::optional<bool>> _inputLevelZeroTensorCreatedLocally;
mutable std::vector<std::optional<bool>> _outputLevelZeroTensorCreatedLocally;

mutable std::vector<uint64_t> _originalMemoryIdInputLevelZeroTensor;
mutable std::vector<uint64_t> _originalMemoryIdOutputLevelZeroTensor;
mutable std::vector<TensorInfo> _levelZeroInputTensorInfo;
mutable std::vector<TensorInfo> _levelZeroOutputTensorInfo;

ze_device_properties_t _properties = {};
std::shared_ptr<const zeroMemory::HostMemAllocator> _inputAllocator;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,6 @@ class ZeroRemoteTensor final : public RemoteTensor {
ov::intel_npu::MemType mem_type = ov::intel_npu::MemType::L0_INTERNAL_BUF,
void* mem = nullptr);

/**
* @brief Set new shape for tensor
* @note Memory allocation may happen
* @param shape A new shape
*/
void set_shape(ov::Shape shape) override;

~ZeroRemoteTensor() override;

private:
Expand Down
132 changes: 45 additions & 87 deletions src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,8 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
_logger("ZeroInferRequest", config.get<LOG_LEVEL>()),
_levelZeroInputTensors(_metadata.inputs.size(), std::vector<std::shared_ptr<ov::ITensor>>(1, nullptr)),
_levelZeroOutputTensors(_metadata.outputs.size(), nullptr),
_inputLevelZeroTensorCreatedLocally(_metadata.inputs.size(), std::nullopt),
_outputLevelZeroTensorCreatedLocally(_metadata.outputs.size(), std::nullopt),
_originalMemoryIdInputLevelZeroTensor(_metadata.inputs.size(), 0),
_originalMemoryIdOutputLevelZeroTensor(_metadata.outputs.size(), 0),
_levelZeroInputTensorInfo(_metadata.inputs.size(), TensorInfo{false, 0}),
_levelZeroOutputTensorInfo(_metadata.outputs.size(), TensorInfo{false, 0}),
_profilingPool(_initStructs, _graph, zeroProfiling::POOL_SIZE),
_profilingQuery(_initStructs, 0) {
_logger.debug("ZeroInferRequest::ZeroInferRequest - SyncInferRequest");
Expand Down Expand Up @@ -198,7 +196,7 @@ void ZeroInferRequest::create_pipeline() {
INPUT,
*_inputAllocator,
_graph->get_batch_size());
_inputLevelZeroTensorCreatedLocally.at(inputIndex) = true;
_levelZeroInputTensorInfo.at(inputIndex).tensorCreatedLocally = true;
}

for (size_t outputIndex = 0; outputIndex < _metadata.outputs.size(); ++outputIndex) {
Expand All @@ -214,7 +212,7 @@ void ZeroInferRequest::create_pipeline() {
OUTPUT,
*_outputAllocator,
_graph->get_batch_size());
_outputLevelZeroTensorCreatedLocally.at(outputIndex) = true;
_levelZeroOutputTensorInfo.at(outputIndex).tensorCreatedLocally = true;
}

if (_initStructs->getMutableCommandListVersion()) {
Expand All @@ -228,17 +226,12 @@ void ZeroInferRequest::create_pipeline() {
continue;
}

auto levelZeroRemoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(get_level_zero_input(inputIndex));
if (levelZeroRemoteTensor == nullptr) {
_originalMemoryIdInputLevelZeroTensor.at(inputIndex) =
get_memory_id(_initStructs->getContext(), get_level_zero_input(inputIndex)->data());

} else {
void* levelZeroBuffer =
extract_object(levelZeroRemoteTensor->get_properties(), ov::intel_npu::mem_handle);
_originalMemoryIdInputLevelZeroTensor.at(inputIndex) =
get_memory_id(_initStructs->getContext(), levelZeroBuffer);
if (std::dynamic_pointer_cast<ZeroRemoteTensor>(get_level_zero_input(inputIndex)) != nullptr) {
continue;
}

_levelZeroInputTensorInfo.at(inputIndex).originalMemoryId =
get_memory_id(_initStructs->getContext(), get_level_zero_input(inputIndex)->data());
}

for (size_t outputIndex = 0; outputIndex < _metadata.outputs.size(); ++outputIndex) {
Expand All @@ -247,17 +240,12 @@ void ZeroInferRequest::create_pipeline() {
continue;
}

auto levelZeroRemoteTensor =
std::dynamic_pointer_cast<ZeroRemoteTensor>(_levelZeroOutputTensors.at(outputIndex));
if (levelZeroRemoteTensor == nullptr) {
_originalMemoryIdOutputLevelZeroTensor.at(outputIndex) =
get_memory_id(_initStructs->getContext(), _levelZeroOutputTensors.at(outputIndex)->data());
} else {
void* levelZeroBuffer =
extract_object(levelZeroRemoteTensor->get_properties(), ov::intel_npu::mem_handle);
_originalMemoryIdOutputLevelZeroTensor.at(outputIndex) =
get_memory_id(_initStructs->getContext(), levelZeroBuffer);
if (std::dynamic_pointer_cast<ZeroRemoteTensor>(_levelZeroOutputTensors.at(outputIndex)) != nullptr) {
continue;
}

_levelZeroOutputTensorInfo.at(outputIndex).originalMemoryId =
get_memory_id(_initStructs->getContext(), _levelZeroOutputTensors.at(outputIndex)->data());
}
}

Expand Down Expand Up @@ -287,8 +275,8 @@ void ZeroInferRequest::set_tensor_data(const std::shared_ptr<ov::ITensor>& tenso
const bool isInput) {
OV_ITT_TASK_CHAIN(ZERO_SET_TENSOR, itt::domains::LevelZeroBackend, "set_tensor", "set_tensor_data");
auto& levelZeroTensors = isInput ? get_level_zero_input(index) : _levelZeroOutputTensors.at(index);
auto& tensorCreatedLocally =
isInput ? _inputLevelZeroTensorCreatedLocally.at(index) : _outputLevelZeroTensorCreatedLocally.at(index);
auto& tensorCreatedLocally = isInput ? _levelZeroInputTensorInfo.at(index).tensorCreatedLocally
: _levelZeroOutputTensorInfo.at(index).tensorCreatedLocally;

bool setTensorData = false;
bool levelZeroTensorCreatedLocally = true;
Expand All @@ -304,7 +292,7 @@ void ZeroInferRequest::set_tensor_data(const std::shared_ptr<ov::ITensor>& tenso
if (!setTensorData) {
// make sure that the L0 tensor was allocated locally and is not received from the user when receiving
// random tensor
if (tensorCreatedLocally.has_value() && !(*tensorCreatedLocally)) {
if (!tensorCreatedLocally) {
_logger.debug("ZeroInferRequest::set_tensor_data - create locally L0 tensor");
OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "allocate tensor");

Expand All @@ -325,8 +313,8 @@ void ZeroInferRequest::set_tensor_data(const std::shared_ptr<ov::ITensor>& tenso
if (_pipelineIsCreated) {
_logger.debug("ZeroInferRequest::infer_async - update command list");

auto& updateOriginalAddress = isInput ? _originalMemoryIdInputLevelZeroTensor.at(index)
: _originalMemoryIdOutputLevelZeroTensor.at(index);
auto& updateOriginalAddress = isInput ? _levelZeroInputTensorInfo.at(index).originalMemoryId
: _levelZeroOutputTensorInfo.at(index).originalMemoryId;

OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "updateCommandList");
_pipeline->updateCommandList(levelZeroTensors->data(),
Expand Down Expand Up @@ -356,25 +344,20 @@ void ZeroInferRequest::set_remote_tensor_data(const std::shared_ptr<ZeroRemoteTe
}

auto& levelZeroTensors = isInput ? get_level_zero_input(index) : _levelZeroOutputTensors.at(index);
auto& tensorCreatedLocally =
isInput ? _inputLevelZeroTensorCreatedLocally.at(index) : _outputLevelZeroTensorCreatedLocally.at(index);
auto& tensorCreatedLocally = isInput ? _levelZeroInputTensorInfo.at(index).tensorCreatedLocally
: _levelZeroOutputTensorInfo.at(index).tensorCreatedLocally;

levelZeroTensors = tensor;
tensorCreatedLocally = false;

if (_pipelineIsCreated) {
_logger.debug("ZeroInferRequest::infer_async - update command list");

auto& updateOriginalAddress = isInput ? _originalMemoryIdInputLevelZeroTensor.at(index)
: _originalMemoryIdOutputLevelZeroTensor.at(index);

OV_ITT_TASK_NEXT(ZERO_SET_REMOTE_TENSOR, "updateCommandList");
_pipeline->updateCommandList(
data,
tensor->get_byte_size(),
isInput ? _graph->get_input_descriptors().at(index).idx : _graph->get_output_descriptors().at(index).idx);

updateOriginalAddress = get_memory_id(_initStructs->getContext(), data);
}
}

Expand Down Expand Up @@ -517,8 +500,8 @@ ov::SoPtr<ov::ITensor> ZeroInferRequest::get_tensor(const ov::Output<const ov::N
metadata.nodeFriendlyName.c_str());

auto& levelZeroTensors = isInput ? get_level_zero_input(ioIndex) : _levelZeroOutputTensors.at(ioIndex);
auto& tensorCreatedLocally =
isInput ? _inputLevelZeroTensorCreatedLocally.at(ioIndex) : _outputLevelZeroTensorCreatedLocally.at(ioIndex);
auto& tensorCreatedLocally = isInput ? _levelZeroInputTensorInfo.at(ioIndex).tensorCreatedLocally
: _levelZeroOutputTensorInfo.at(ioIndex).tensorCreatedLocally;

levelZeroTensors = allocate_tensor(metadata,
ioIndex,
Expand Down Expand Up @@ -658,33 +641,20 @@ void ZeroInferRequest::infer_async() {
continue;
}

auto levelZeroRemoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(levelZeroTensor.at(SINGLE_TENSOR));

if (levelZeroRemoteTensor == nullptr) {
auto memoryId = get_memory_id(_initStructs->getContext(), levelZeroTensor.at(SINGLE_TENSOR)->data());

if (_originalMemoryIdInputLevelZeroTensor.at(inputIndex) != memoryId) {
_logger.debug("Update input graph descriptor with the new tensor");
_pipeline->updateCommandList(levelZeroTensor.at(SINGLE_TENSOR)->data(),
levelZeroTensor.at(SINGLE_TENSOR)->get_byte_size(),
_graph->get_input_descriptors().at(inputIndex).idx);

_originalMemoryIdInputLevelZeroTensor.at(inputIndex) = memoryId;
}
} else {
void* remoteLevelZeroBuffer =
extract_object(levelZeroRemoteTensor->get_properties(), ov::intel_npu::mem_handle);
if (std::dynamic_pointer_cast<ZeroRemoteTensor>(levelZeroTensor.at(SINGLE_TENSOR)) != nullptr) {
++inputIndex;
continue;
}

auto memoryId = get_memory_id(_initStructs->getContext(), remoteLevelZeroBuffer);
auto memoryId = get_memory_id(_initStructs->getContext(), levelZeroTensor.at(SINGLE_TENSOR)->data());

if (_originalMemoryIdInputLevelZeroTensor.at(inputIndex) != memoryId) {
_logger.debug("Update input graph descriptor with the new remote tensor");
_pipeline->updateCommandList(remoteLevelZeroBuffer,
levelZeroRemoteTensor->get_byte_size(),
_graph->get_input_descriptors().at(inputIndex).idx);
if (_levelZeroInputTensorInfo.at(inputIndex).originalMemoryId != memoryId) {
_logger.debug("Update input graph descriptor with the new tensor");
_pipeline->updateCommandList(levelZeroTensor.at(SINGLE_TENSOR)->data(),
levelZeroTensor.at(SINGLE_TENSOR)->get_byte_size(),
_graph->get_input_descriptors().at(inputIndex).idx);

_originalMemoryIdInputLevelZeroTensor.at(inputIndex) = memoryId;
}
_levelZeroInputTensorInfo.at(inputIndex).originalMemoryId = memoryId;
}

++inputIndex;
Expand All @@ -699,32 +669,20 @@ void ZeroInferRequest::infer_async() {
continue;
}

auto levelZeroRemoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(levelZeroTensor);
if (levelZeroRemoteTensor == nullptr) {
auto memoryId = get_memory_id(_initStructs->getContext(), levelZeroTensor->data());

if (_originalMemoryIdOutputLevelZeroTensor.at(outputIndex) != memoryId) {
_logger.debug("Update output graph descriptor with the new tensor");
_pipeline->updateCommandList(levelZeroTensor->data(),
levelZeroTensor->get_byte_size(),
_graph->get_output_descriptors().at(outputIndex).idx);

_originalMemoryIdOutputLevelZeroTensor.at(outputIndex) = memoryId;
}
} else {
void* remoteLevelZeroBuffer =
extract_object(levelZeroRemoteTensor->get_properties(), ov::intel_npu::mem_handle);
if (std::dynamic_pointer_cast<ZeroRemoteTensor>(levelZeroTensor) != nullptr) {
++outputIndex;
continue;
}

auto memoryId = get_memory_id(_initStructs->getContext(), remoteLevelZeroBuffer);
auto memoryId = get_memory_id(_initStructs->getContext(), levelZeroTensor->data());

if (_originalMemoryIdOutputLevelZeroTensor.at(outputIndex) != memoryId) {
_logger.debug("Update output graph descriptor with the new remote tensor");
_pipeline->updateCommandList(remoteLevelZeroBuffer,
levelZeroRemoteTensor->get_byte_size(),
_graph->get_output_descriptors().at(outputIndex).idx);
if (_levelZeroOutputTensorInfo.at(outputIndex).originalMemoryId != memoryId) {
_logger.debug("Update output graph descriptor with the new tensor");
_pipeline->updateCommandList(levelZeroTensor->data(),
levelZeroTensor->get_byte_size(),
_graph->get_output_descriptors().at(outputIndex).idx);

_originalMemoryIdOutputLevelZeroTensor.at(outputIndex) = memoryId;
}
_levelZeroOutputTensorInfo.at(outputIndex).originalMemoryId = memoryId;
}

++outputIndex;
Expand Down
30 changes: 0 additions & 30 deletions src/plugins/intel_npu/src/backend/src/zero_remote_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,36 +147,6 @@ void ZeroRemoteTensor::allocate(const size_t bytes) {
update_strides();
}

void ZeroRemoteTensor::set_shape(ov::Shape new_shape) {
if (_shape == new_shape)
return;

_shape = std::move(new_shape);

if (ov::shape_size(_shape) > ov::shape_size(_capacity)) {
#ifdef __linux__
OPENVINO_THROW("Re-shaping the tensor with a larger shape is not available.");
#endif

if (!_init_structs->getMutableCommandListVersion()) {
OPENVINO_THROW("Re-shaping the tensor with a larger shape is not available using this driver version. "
"Please update the driver.");
}

if (!deallocate()) {
OPENVINO_THROW("Cannot deallocate tensor while an attempt to enlarge tensor area in set_shape.");
}

_capacity = _shape;

const auto byte_size = ov::element::get_memory_size(_element_type, shape_size(_shape));
allocate(byte_size);
}

_strides.clear();
update_strides();
}

bool ZeroRemoteTensor::is_allocated() const noexcept {
return _data != nullptr;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@ class RemoteTensor : public ov::IRemoteTensor {
*/
const std::string& get_device_name() const override;

/**
* @brief Set new shape for tensor
* @note Allocation of a bigger tensor is not possible
* @param shape A new shape
*/
void set_shape(ov::Shape shape) override;

/**
* @return A tensor element type
*/
Expand Down
14 changes: 14 additions & 0 deletions src/plugins/intel_npu/src/common/src/remote_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,20 @@ const ov::AnyMap& RemoteTensor::get_properties() const {
return _properties;
}

void RemoteTensor::set_shape(ov::Shape new_shape) {
if (_shape == new_shape)
return;

_shape = std::move(new_shape);

if (ov::shape_size(_shape) > ov::shape_size(_capacity)) {
OPENVINO_THROW("Cannot set a new bigger shape to this tensor.");
}

_strides.clear();
update_strides();
}

void RemoteTensor::update_strides() {
if (_element_type.bitwidth() < 8) {
return;
Expand Down
Loading

0 comments on commit a14a25b

Please sign in to comment.