Skip to content

Commit

Permalink
Refactor import_model new API to accept only either std::istream
Browse files Browse the repository at this point in the history
…or `ov::AlignedBuffer`
  • Loading branch information
MirceaDan99 committed Nov 26, 2024
1 parent 41b08f6 commit 8bd6735
Show file tree
Hide file tree
Showing 10 changed files with 77 additions and 53 deletions.
6 changes: 2 additions & 4 deletions src/inference/dev_api/openvino/runtime/iplugin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,7 @@ class OPENVINO_RUNTIME_API IPlugin : public std::enable_shared_from_this<IPlugin
* @param properties A ov::AnyMap of properties
* @return An Compiled model
*/
virtual std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
virtual std::shared_ptr<ov::ICompiledModel> import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::AnyMap& properties) const;

/**
Expand All @@ -207,8 +206,7 @@ class OPENVINO_RUNTIME_API IPlugin : public std::enable_shared_from_this<IPlugin
* @param properties A ov::AnyMap of properties
* @return An Compiled model
*/
virtual std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
virtual std::shared_ptr<ov::ICompiledModel> import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const;

Expand Down
6 changes: 2 additions & 4 deletions src/inference/src/dev/iplugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,12 @@ const std::string& ov::IPlugin::get_device_name() const {
return m_plugin_name;
}

std::shared_ptr<ov::ICompiledModel> ov::IPlugin::import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
std::shared_ptr<ov::ICompiledModel> ov::IPlugin::import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::AnyMap& properties) const{
OPENVINO_THROW_NOT_IMPLEMENTED("This method is not implemented");
}

std::shared_ptr<ov::ICompiledModel> ov::IPlugin::import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
std::shared_ptr<ov::ICompiledModel> ov::IPlugin::import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const{
OPENVINO_THROW_NOT_IMPLEMENTED("This method is not implemented");
Expand Down
7 changes: 3 additions & 4 deletions src/inference/src/dev/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,11 @@ ov::SoPtr<ov::ICompiledModel> ov::Plugin::import_model(std::istream& model,
OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, context, config), m_so});
}

ov::SoPtr<ov::ICompiledModel> ov::Plugin::import_model(std::istream& model, std::shared_ptr<ov::AlignedBuffer> model_buffer, const ov::AnyMap& properties) const {
OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, model_buffer, properties), m_so});
ov::SoPtr<ov::ICompiledModel> ov::Plugin::import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer, const ov::AnyMap& properties) const {
OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model_buffer, properties), m_so});
}

ov::SoPtr<ov::ICompiledModel> ov::Plugin::import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
ov::SoPtr<ov::ICompiledModel> ov::Plugin::import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& config) const {
OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, model_buffer, context, config), m_so});
Expand Down
5 changes: 2 additions & 3 deletions src/inference/src/dev/plugin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,9 @@ class Plugin {
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& config) const;

SoPtr<ov::ICompiledModel> import_model(std::istream& model, std::shared_ptr<ov::AlignedBuffer> model_buffer, const ov::AnyMap& properties) const;
SoPtr<ov::ICompiledModel> import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer, const ov::AnyMap& properties) const;

SoPtr<ov::ICompiledModel> import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
SoPtr<ov::ICompiledModel> import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& config) const;

Expand Down
4 changes: 1 addition & 3 deletions src/plugins/intel_cpu/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -557,8 +557,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model_str
}


std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model_stream,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::AnyMap& config) const {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model");

Expand All @@ -571,7 +570,6 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model_str
}

ModelDeserializer deserializer(
model_stream,
model_buffer,
[this](const std::shared_ptr<ov::AlignedBuffer>& model, const std::shared_ptr<ov::AlignedBuffer>& weights) {
return get_core()->read_model(model, weights);
Expand Down
6 changes: 2 additions & 4 deletions src/plugins/intel_cpu/src/plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,9 @@ class Plugin : public ov::IPlugin {
"import_model with RemoteContext is not supported by CPU plugin!");
};

std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
std::shared_ptr<ov::ICompiledModel> import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::AnyMap& properties) const override;
std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
std::shared_ptr<ov::ICompiledModel> import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const override {
OPENVINO_THROW_NOT_IMPLEMENTED(
Expand Down
8 changes: 3 additions & 5 deletions src/plugins/intel_cpu/src/utils/serialize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ void ModelSerializer::operator<<(const std::shared_ptr<ov::Model>& model) {

////////// ModelDeserializer //////////

ModelDeserializer::ModelDeserializer(std::istream& model_stream,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
ModelDeserializer::ModelDeserializer(std::shared_ptr<ov::AlignedBuffer> model_buffer,
ModelBuilder fn,
const CacheDecrypt& decrypt_fn,
bool decript_from_string)
Expand All @@ -47,14 +46,13 @@ ModelDeserializer::ModelDeserializer(std::istream& model_stream,

void ModelDeserializer::operator>>(std::shared_ptr<ov::Model>& model) {
if (m_model_buffer) {
process_mmap(model, m_model_buffer);
process_mmap(m_model_buffer);
} else {
process_stream(model);
}
}

void ModelDeserializer::process_mmap(std::shared_ptr<ov::Model>& model,
const std::shared_ptr<ov::AlignedBuffer>& mmemory) {
void ModelDeserializer::process_mmap(const std::shared_ptr<ov::AlignedBuffer>& mmemory) {
// Note: Don't use seekg with mmaped stream. This may affect the performance of some models.
// Get file size before seek content.
// Blob from cache may have other header, so need to skip this.
Expand Down
5 changes: 2 additions & 3 deletions src/plugins/intel_cpu/src/utils/serialize.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,7 @@ class ModelDeserializer {
public:
typedef std::function<std::shared_ptr<ov::Model>(const std::shared_ptr<ov::AlignedBuffer>&, const std::shared_ptr<ov::AlignedBuffer>&)> ModelBuilder;

ModelDeserializer(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
ModelDeserializer(std::shared_ptr<ov::AlignedBuffer> model_buffer,
ModelBuilder fn,
const CacheDecrypt& encrypt_fn,
bool decript_from_string);
Expand All @@ -44,7 +43,7 @@ class ModelDeserializer {
protected:
static void set_info(pugi::xml_node& root, std::shared_ptr<ov::Model>& model);

void process_mmap(std::shared_ptr<ov::Model>& model, const std::shared_ptr<ov::AlignedBuffer>& memory);
void process_mmap(const std::shared_ptr<ov::AlignedBuffer>& memory);

void process_stream(std::shared_ptr<ov::Model>& model);

Expand Down
6 changes: 2 additions & 4 deletions src/plugins/intel_npu/src/plugin/include/plugin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,14 @@ class Plugin : public ov::IPlugin {

std::shared_ptr<ov::ICompiledModel> import_model(std::istream& stream, const ov::AnyMap& properties) const override;

std::shared_ptr<ov::ICompiledModel> import_model(std::istream& stream,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
std::shared_ptr<ov::ICompiledModel> import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::AnyMap& properties) const override;

std::shared_ptr<ov::ICompiledModel> import_model(std::istream& stream,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const override;

std::shared_ptr<ov::ICompiledModel> import_model(std::istream& stream,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
std::shared_ptr<ov::ICompiledModel> import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const override;

Expand Down
77 changes: 58 additions & 19 deletions src/plugins/intel_npu/src/plugin/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -744,10 +744,58 @@ ov::SoPtr<ov::IRemoteContext> Plugin::get_default_context(const ov::AnyMap&) con
}

std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, const ov::AnyMap& properties) const {
return import_model(stream, nullptr, properties);
OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::import_model");
OV_ITT_TASK_CHAIN(PLUGIN_IMPORT_MODEL, itt::domains::NPUPlugin, "Plugin::import_model", "merge_configs");

const std::map<std::string, std::string> propertiesMap = any_copy(properties);
auto localConfig = merge_configs(_globalConfig, propertiesMap, OptionMode::RunTime);
_logger.setLevel(localConfig.get<LOG_LEVEL>());
const auto platform = _backends->getCompilationPlatform(localConfig.get<PLATFORM>(), localConfig.get<DEVICE_ID>());
localConfig.update({{ov::intel_npu::platform.name(), platform}});
auto device = _backends->getDevice(localConfig.get<DEVICE_ID>());

set_batch_config(_backends->isBatchingSupported(), localConfig);

const auto loadedFromCache = localConfig.get<LOADED_FROM_CACHE>();
if (!loadedFromCache) {
_logger.warning(
"The usage of a compiled model can lead to undefined behavior. Please use OpenVINO IR instead!");
}

OV_ITT_TASK_NEXT(PLUGIN_IMPORT_MODEL, "parse");

std::shared_ptr<ov::ICompiledModel> compiledModel;

try {
auto compiler = getCompiler(localConfig);

auto graphSize = getFileSize(stream);
auto blobSO = std::make_shared<std::vector<uint8_t>>(graphSize);
stream.read(reinterpret_cast<char*>(blobSO->data()), graphSize);
if (!stream) {
OPENVINO_THROW("Failed to read data from stream!");
}
_logger.debug("Successfully read %zu bytes into blob.", graphSize);

auto graph = compiler->parse(std::make_shared<ov::SharedBuffer<std::shared_ptr<std::vector<uint8_t>>>>(reinterpret_cast<char*>(blobSO->data()), graphSize, blobSO), localConfig);
graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++));

const std::shared_ptr<ov::Model> modelDummy =
create_dummy_model(graph->get_metadata().inputs, graph->get_metadata().outputs);

compiledModel = std::make_shared<CompiledModel>(modelDummy, shared_from_this(), device, graph, localConfig);
} catch (const std::exception& ex) {
OPENVINO_THROW("Can't import network: ", ex.what());
} catch (...) {
OPENVINO_THROW("NPU import_model got unexpected exception from CompiledModel");
}

OV_ITT_TASK_SKIP(PLUGIN_IMPORT_MODEL);

return compiledModel;
}

std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, std::shared_ptr<ov::AlignedBuffer> model_buffer, const ov::AnyMap& properties) const {
std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer, const ov::AnyMap& properties) const {
OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::import_model");
OV_ITT_TASK_CHAIN(PLUGIN_IMPORT_MODEL, itt::domains::NPUPlugin, "Plugin::import_model", "merge_configs");

Expand All @@ -773,19 +821,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, s
try {
auto compiler = getCompiler(localConfig);

std::shared_ptr<IGraph> graph;
if (model_buffer != nullptr) {
graph = compiler->parse(model_buffer, localConfig);
} else {
auto graphSize = getFileSize(stream);
auto blobSO = std::make_shared<std::vector<uint8_t>>(graphSize);
stream.read(reinterpret_cast<char*>(blobSO->data()), graphSize);
if (!stream) {
OPENVINO_THROW("Failed to read data from stream!");
}
_logger.debug("Successfully read %zu bytes into blob.", graphSize);
graph = compiler->parse(std::make_shared<ov::SharedBuffer<std::shared_ptr<std::vector<uint8_t>>>>(reinterpret_cast<char*>(blobSO->data()), graphSize, blobSO), localConfig);
}
auto graph = compiler->parse(model_buffer, localConfig);
graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++));

const std::shared_ptr<ov::Model> modelDummy =
Expand All @@ -806,20 +842,23 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, s
std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const {
auto casted = std::dynamic_pointer_cast<RemoteContextImpl>(context._ptr);
if (casted == nullptr) {
OPENVINO_THROW("Invalid remote context type. Can't cast to ov::intel_npu::RemoteContext type");
}

return import_model(stream, nullptr, context, properties);
return import_model(stream, context, properties);
}

std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const {
auto casted = std::dynamic_pointer_cast<RemoteContextImpl>(context._ptr);
if (casted == nullptr) {
OPENVINO_THROW("Invalid remote context type. Can't cast to ov::intel_npu::RemoteContext type");
}

return import_model(stream, model_buffer, properties);
return import_model(model_buffer, properties);
}

ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>& model,
Expand Down

0 comments on commit 8bd6735

Please sign in to comment.