Skip to content

Commit

Permalink
Update command list if someone else allocated another tensor
Browse files Browse the repository at this point in the history
Signed-off-by: Bogdan Pereanu <[email protected]>
  • Loading branch information
pereanub committed Dec 16, 2024
1 parent 42de1ad commit 5cebf60
Show file tree
Hide file tree
Showing 9 changed files with 508 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ class ZeroInferRequest final : public SyncInferRequest {
mutable std::vector<std::optional<bool>> _inputLevelZeroTensorCreatedLocally;
mutable std::vector<std::optional<bool>> _outputLevelZeroTensorCreatedLocally;

mutable std::vector<uint64_t> _originalMemoryIdInputLevelZeroTensor;
mutable std::vector<uint64_t> _originalMemoryIdOutputLevelZeroTensor;

ze_device_properties_t _properties = {};
std::shared_ptr<const zeroMemory::HostMemAllocator> _inputAllocator;
std::shared_ptr<const zeroMemory::HostMemAllocator> _outputAllocator;
Expand Down
6 changes: 0 additions & 6 deletions src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,6 @@

namespace intel_npu {

struct TensorData {
void* mem;
size_t size;
bool levelZeroTensorCreatedLocally = true;
};

struct Pipeline {
public:
Pipeline(const Config& config,
Expand Down
174 changes: 170 additions & 4 deletions src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,17 @@ bool memory_was_allocated_in_the_same_l0_context(ze_context_handle_t hContext, c
return false;
}

uint64_t get_memory_id(ze_context_handle_t hContext, const void* ptr) {
ze_memory_allocation_properties_t desc = {};
desc.stype = ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES;
auto res = intel_npu::zeMemGetAllocProperties(hContext, ptr, &desc, nullptr);
if (res != ZE_RESULT_SUCCESS) {
return 0;
}

return desc.id;
}

} // namespace

//------------------------------------------------------------------------------
Expand All @@ -107,6 +118,8 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
_levelZeroOutputTensors(_metadata.outputs.size(), nullptr),
_inputLevelZeroTensorCreatedLocally(_metadata.inputs.size(), std::nullopt),
_outputLevelZeroTensorCreatedLocally(_metadata.outputs.size(), std::nullopt),
_originalMemoryIdInputLevelZeroTensor(_metadata.inputs.size(), 0),
_originalMemoryIdOutputLevelZeroTensor(_metadata.outputs.size(), 0),
_profilingPool(_initStructs, _graph, zeroProfiling::POOL_SIZE),
_profilingQuery(_initStructs, 0) {
_logger.debug("ZeroInferRequest::ZeroInferRequest - SyncInferRequest");
Expand Down Expand Up @@ -184,6 +197,7 @@ void ZeroInferRequest::create_pipeline() {
INPUT,
*_inputAllocator,
_graph->get_batch_size());
_inputLevelZeroTensorCreatedLocally.at(inputIndex) = true;
}

for (size_t outputIndex = 0; outputIndex < _metadata.outputs.size(); ++outputIndex) {
Expand All @@ -198,6 +212,50 @@ void ZeroInferRequest::create_pipeline() {
OUTPUT,
*_outputAllocator,
_graph->get_batch_size());
_outputLevelZeroTensorCreatedLocally.at(outputIndex) = true;
}

if (_initStructs->getMutableCommandListVersion()) {
for (size_t inputIndex = 0; inputIndex < _metadata.inputs.size(); ++inputIndex) {
if (is_batched_input(inputIndex)) {
continue;
}

const IODescriptor inputDescriptor = _metadata.outputs.at(inputIndex);
if (inputDescriptor.isShapeTensor || inputDescriptor.isStateInput) {
continue;
}

auto levelZeroRemoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(get_level_zero_input(inputIndex));
if (levelZeroRemoteTensor == nullptr) {
_originalMemoryIdInputLevelZeroTensor.at(inputIndex) =
get_memory_id(_initStructs->getContext(), get_level_zero_input(inputIndex)->data());
} else {
void* levelZeroBuffer =
extract_object(levelZeroRemoteTensor->get_properties(), ov::intel_npu::mem_handle);
_originalMemoryIdInputLevelZeroTensor.at(inputIndex) =
get_memory_id(_initStructs->getContext(), levelZeroBuffer);
}
}

for (size_t outputIndex = 0; outputIndex < _metadata.outputs.size(); ++outputIndex) {
const IODescriptor outputDescriptor = _metadata.outputs.at(outputIndex);
if (outputDescriptor.isShapeTensor || outputDescriptor.isStateOutput) {
continue;
}

auto levelZeroRemoteTensor =
std::dynamic_pointer_cast<ZeroRemoteTensor>(_levelZeroOutputTensors.at(outputIndex));
if (levelZeroRemoteTensor == nullptr) {
_originalMemoryIdOutputLevelZeroTensor.at(outputIndex) =
get_memory_id(_initStructs->getContext(), _levelZeroOutputTensors.at(outputIndex)->data());
} else {
void* levelZeroBuffer =
extract_object(levelZeroRemoteTensor->get_properties(), ov::intel_npu::mem_handle);
_originalMemoryIdOutputLevelZeroTensor.at(outputIndex) =
get_memory_id(_initStructs->getContext(), levelZeroBuffer);
}
}
}

// Find the corresponding command queue group.
Expand Down Expand Up @@ -226,7 +284,7 @@ void ZeroInferRequest::set_tensor_data(const std::shared_ptr<ov::ITensor>& tenso
const bool isInput) {
OV_ITT_TASK_CHAIN(ZERO_SET_TENSOR, itt::domains::LevelZeroBackend, "set_tensor", "set_tensor_data");
auto& levelZeroTensors = isInput ? get_level_zero_input(index) : _levelZeroOutputTensors.at(index);
auto& tensorsData =
auto& tensorCreatedLocally =
isInput ? _inputLevelZeroTensorCreatedLocally.at(index) : _outputLevelZeroTensorCreatedLocally.at(index);

bool setTensorData = false;
Expand All @@ -243,7 +301,7 @@ void ZeroInferRequest::set_tensor_data(const std::shared_ptr<ov::ITensor>& tenso
if (!setTensorData) {
// make sure that the L0 tensor was allocated locally and is not received from the user when receiving
// random tensor
if (tensorsData.has_value() && !tensorsData) {
if (tensorCreatedLocally.has_value() && !(*tensorCreatedLocally)) {
_logger.debug("ZeroInferRequest::set_tensor_data - create locally L0 tensor");
OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "allocate tensor");

Expand All @@ -259,16 +317,21 @@ void ZeroInferRequest::set_tensor_data(const std::shared_ptr<ov::ITensor>& tenso
}

if (setTensorData) {
tensorsData = levelZeroTensorCreatedLocally;
tensorCreatedLocally = levelZeroTensorCreatedLocally;

if (_pipelineIsCreated) {
_logger.debug("ZeroInferRequest::infer_async - update command list");

auto& updateOriginalAddress = isInput ? _originalMemoryIdInputLevelZeroTensor.at(index)
: _originalMemoryIdOutputLevelZeroTensor.at(index);

OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "updateCommandList");
_pipeline->updateCommandList(levelZeroTensors->data(),
levelZeroTensors->get_byte_size(),
isInput ? _graph->get_input_descriptors().at(index).idx
: _graph->get_output_descriptors().at(index).idx);

updateOriginalAddress = get_memory_id(_initStructs->getContext(), levelZeroTensors->data());
}
}
}
Expand All @@ -290,16 +353,25 @@ void ZeroInferRequest::set_remote_tensor_data(const std::shared_ptr<ZeroRemoteTe
}

auto& levelZeroTensors = isInput ? get_level_zero_input(index) : _levelZeroOutputTensors.at(index);
auto& tensorCreatedLocally =
isInput ? _inputLevelZeroTensorCreatedLocally.at(index) : _outputLevelZeroTensorCreatedLocally.at(index);

levelZeroTensors = tensor;
tensorCreatedLocally = false;

if (_pipelineIsCreated) {
_logger.debug("ZeroInferRequest::infer_async - update command list");

auto& updateOriginalAddress = isInput ? _originalMemoryIdInputLevelZeroTensor.at(index)
: _originalMemoryIdOutputLevelZeroTensor.at(index);

OV_ITT_TASK_NEXT(ZERO_SET_REMOTE_TENSOR, "updateCommandList");
_pipeline->updateCommandList(
data,
tensor->get_byte_size(),
isInput ? _graph->get_input_descriptors().at(index).idx : _graph->get_output_descriptors().at(index).idx);

updateOriginalAddress = get_memory_id(_initStructs->getContext(), data);
}
}

Expand Down Expand Up @@ -332,6 +404,7 @@ void ZeroInferRequest::set_tensor(const ov::Output<const ov::Node>& port, const
} else {
if (_userOutputTensors.at(foundPort.idx)._ptr == tensor._ptr) {
// Got set_tensor with the same object here too - do nothing
_logger.debug("ZeroInferRequest::set_tensor - got the same tensor, do nothing");
return;
}
_userOutputTensors.at(foundPort.idx) = tensor;
Expand Down Expand Up @@ -408,6 +481,7 @@ void ZeroInferRequest::set_tensors(const ov::Output<const ov::Node>& port,

if (_pipelineIsCreated) {
OV_ITT_TASK_NEXT(SET_TENSORS, "updateCommandList");

_pipeline->updateCommandList(data, _graph->get_input_descriptors().at(foundPort.idx).idx, i);
}
}
Expand Down Expand Up @@ -438,13 +512,17 @@ ov::SoPtr<ov::ITensor> ZeroInferRequest::get_tensor(const ov::Output<const ov::N
_logger.debug("ZeroInferRequest::get_tensor - tensor is not allocated, create the tensor");

auto& levelZeroTensors = isInput ? get_level_zero_input(ioIndex) : _levelZeroOutputTensors.at(ioIndex);
auto& tensorCreatedLocally =
isInput ? _inputLevelZeroTensorCreatedLocally.at(ioIndex) : _outputLevelZeroTensorCreatedLocally.at(ioIndex);

levelZeroTensors = allocate_tensor(isInput ? _metadata.inputs.at(ioIndex) : _metadata.outputs.at(ioIndex),
ioIndex,
isInput,
isInput ? *_inputAllocator : *_outputAllocator,
_graph->get_batch_size());

tensorCreatedLocally = true;

return levelZeroTensors;
}

Expand Down Expand Up @@ -541,7 +619,7 @@ void ZeroInferRequest::infer_async() {
? userTensor.at(SINGLE_TENSOR)->data()
: extract_object(userRemoteTensor->get_properties(), ov::intel_npu::mem_handle);

const std::shared_ptr<ov::ITensor>& levelZeroTensor = get_level_zero_input(inputIndex);
const auto& levelZeroTensor = get_level_zero_input(inputIndex);
auto levelZeroRemoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(levelZeroTensor);
if (levelZeroRemoteTensor == nullptr) {
void* levelZeroBuffer = levelZeroTensor->data();
Expand All @@ -560,6 +638,94 @@ void ZeroInferRequest::infer_async() {
++inputIndex;
}

if (_initStructs->getMutableCommandListVersion()) {
inputIndex = 0;

for (const auto& levelZeroTensor : _levelZeroInputTensors) {
if (is_batched_input(inputIndex)) {
++inputIndex;
continue;
}

const auto inputDescriptor = _metadata.inputs.at(inputIndex);
if (inputDescriptor.isShapeTensor || inputDescriptor.isStateInput) {
++inputIndex;
continue;
}

auto levelZeroRemoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(levelZeroTensor.at(SINGLE_TENSOR));

if (levelZeroRemoteTensor == nullptr) {
auto memoryId = get_memory_id(_initStructs->getContext(), levelZeroTensor.at(SINGLE_TENSOR)->data());

if (_originalMemoryIdInputLevelZeroTensor.at(inputIndex) != memoryId) {
_logger.debug("Update input graph descriptor with the new tensor");
_pipeline->updateCommandList(levelZeroTensor.at(SINGLE_TENSOR)->data(),
levelZeroTensor.at(SINGLE_TENSOR)->get_byte_size(),
_graph->get_input_descriptors().at(inputIndex).idx);

_originalMemoryIdInputLevelZeroTensor.at(inputIndex) = memoryId;
}
} else {
void* remoteLevelZeroBuffer =
extract_object(levelZeroRemoteTensor->get_properties(), ov::intel_npu::mem_handle);

auto memoryId = get_memory_id(_initStructs->getContext(), remoteLevelZeroBuffer);

if (_originalMemoryIdInputLevelZeroTensor.at(inputIndex) != memoryId) {
_logger.debug("Update input graph descriptor with the new remote tensor");
_pipeline->updateCommandList(remoteLevelZeroBuffer,
levelZeroRemoteTensor->get_byte_size(),
_graph->get_input_descriptors().at(inputIndex).idx);

_originalMemoryIdInputLevelZeroTensor.at(inputIndex) = memoryId;
}
}

++inputIndex;
}

size_t outputIndex = 0;

for (const auto& levelZeroTensor : _levelZeroOutputTensors) {
const auto outputDescriptor = _metadata.outputs.at(outputIndex);
if (outputDescriptor.isShapeTensor || outputDescriptor.isStateOutput) {
++outputIndex;
continue;
}

auto levelZeroRemoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(levelZeroTensor);
if (levelZeroRemoteTensor == nullptr) {
auto memoryId = get_memory_id(_initStructs->getContext(), levelZeroTensor->data());

if (_originalMemoryIdOutputLevelZeroTensor.at(outputIndex) != memoryId) {
_logger.debug("Update output graph descriptor with the new tensor");
_pipeline->updateCommandList(levelZeroTensor->data(),
levelZeroTensor->get_byte_size(),
_graph->get_output_descriptors().at(outputIndex).idx);

_originalMemoryIdOutputLevelZeroTensor.at(outputIndex) = memoryId;
}
} else {
void* remoteLevelZeroBuffer =
extract_object(levelZeroRemoteTensor->get_properties(), ov::intel_npu::mem_handle);

auto memoryId = get_memory_id(_initStructs->getContext(), remoteLevelZeroBuffer);

if (_originalMemoryIdOutputLevelZeroTensor.at(outputIndex) != memoryId) {
_logger.debug("Update output graph descriptor with the new remote tensor");
_pipeline->updateCommandList(remoteLevelZeroBuffer,
levelZeroRemoteTensor->get_byte_size(),
_graph->get_output_descriptors().at(outputIndex).idx);

_originalMemoryIdOutputLevelZeroTensor.at(outputIndex) = memoryId;
}
}

++outputIndex;
}
}

OV_ITT_TASK_NEXT(ZERO_INFER, "push");
_pipeline->push();
}
Expand Down
49 changes: 46 additions & 3 deletions src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,22 @@
#include "intel_npu/utils/logger/logger.hpp"
#include "intel_npu/utils/zero/zero_api.hpp"
#include "intel_npu/utils/zero/zero_types.hpp"
#include "zero_remote_tensor.hpp"

namespace {

template <typename Type>
Type extract_object(const ov::AnyMap& params, const ov::Property<Type>& p) {
auto itrHandle = params.find(p.name());
ov::Any res = nullptr;
if (itrHandle == params.end()) {
OPENVINO_THROW("No parameter ", p.name(), " found in parameters map");
}
res = itrHandle->second;
return res.as<Type>();
}

} // namespace

namespace intel_npu {

Expand Down Expand Up @@ -59,25 +75,52 @@ Pipeline::Pipeline(const Config& config,
size_t ioIndex = 0;
for (const auto& desc : graph->get_input_descriptors()) {
if (inputTensorsData.at(ioIndex).size() > 1) {
graph->set_argument_value(desc.idx, inputTensorsData.at(ioIndex).at(i)->data());
void* data = nullptr;
auto remoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(inputTensorsData.at(ioIndex).at(i));
if (remoteTensor == nullptr) {
data = inputTensorsData.at(ioIndex).at(i)->data();

} else {
data = extract_object(remoteTensor->get_properties(), ov::intel_npu::mem_handle);
}

graph->set_argument_value(desc.idx, data);

++ioIndex;
continue;
}

void* data = nullptr;
auto remoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(inputTensorsData.at(ioIndex).at(0));
if (remoteTensor == nullptr) {
data = inputTensorsData.at(ioIndex).at(0)->data();

} else {
data = extract_object(remoteTensor->get_properties(), ov::intel_npu::mem_handle);
}

graph->set_argument_value(
desc.idx,
static_cast<unsigned char*>(inputTensorsData.at(ioIndex).at(0)->data()) +
static_cast<unsigned char*>(data) +
(i * inputTensorsData.at(ioIndex).at(0)->get_byte_size()) / _number_of_command_lists);

++ioIndex;
}

ioIndex = 0;
for (const auto& desc : graph->get_output_descriptors()) {
void* data = nullptr;
auto remoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(outputTensorsData.at(ioIndex));
if (remoteTensor == nullptr) {
data = outputTensorsData.at(ioIndex)->data();

} else {
data = extract_object(remoteTensor->get_properties(), ov::intel_npu::mem_handle);
}

graph->set_argument_value(
desc.idx,
static_cast<unsigned char*>(outputTensorsData.at(ioIndex)->data()) +
static_cast<unsigned char*>(data) +
(i * outputTensorsData.at(ioIndex)->get_byte_size()) / _number_of_command_lists);
++ioIndex;
}
Expand Down
Loading

0 comments on commit 5cebf60

Please sign in to comment.