Skip to content

Commit

Permalink
[NPUW] Serialization fixes (#28442)
Browse files Browse the repository at this point in the history
Co-authored-by: Dmitry Matveev <[email protected]>
  • Loading branch information
smirnov-alexey and dmatveev authored Jan 15, 2025
1 parent 724cf80 commit 40b19c8
Show file tree
Hide file tree
Showing 6 changed files with 136 additions and 10 deletions.
29 changes: 28 additions & 1 deletion src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,18 @@ std::shared_ptr<ov::npuw::ICompiledModel> ov::npuw::ICompiledModel::create(
auto use_llm_key = ov::intel_npu::npuw::llm::enabled.name();
if (properties.count(use_llm_key) && properties.at(use_llm_key).as<bool>() == true) {
LOG_INFO("ov::npuw::LLMCompiledModel will be created.");
compiled_model = std::make_shared<ov::npuw::LLMCompiledModel>(model, plugin, properties);
// Drop CACHE_DIR from the config
// If it's present we will be utilizing LLMCompiledModel's import
// and not the underlying models and submodels
auto config = properties;
config.erase(ov::cache_dir.name());
compiled_model = std::make_shared<ov::npuw::LLMCompiledModel>(model, plugin, config);
} else {
LOG_INFO("ov::npuw::CompiledModel will be created.");
// CACHE_DIR isn't supported with NPU_USE_NPUW
if (properties.count(ov::cache_dir.name())) {
OPENVINO_THROW("Option 'CACHE_DIR' is not supported with configuration: NPU_USE_NPUW : YES, NPUW_LLM : NO");
}
pre_load_transform(model, properties);
compiled_model = std::make_shared<ov::npuw::CompiledModel>(model, plugin, properties);
}
Expand Down Expand Up @@ -611,6 +620,12 @@ void ov::npuw::CompiledModel::serialize(std::ostream& stream) const {

// Write config
write(stream, m_cfg);
// FIXME: utilize overload instead
write(stream, m_non_npuw_props.size());
for (const auto& p : m_non_npuw_props) {
write(stream, p.first);
write_any(stream, p.second);
}

// Serialize compiled submodels
write(stream, m_compiled_submodels.size());
Expand Down Expand Up @@ -671,6 +686,18 @@ std::shared_ptr<ov::npuw::CompiledModel> ov::npuw::CompiledModel::deserialize(

// Deserialize config
read(stream, compiled->m_cfg);
compiled->m_cfg.parseEnvVars();
// FIXME: utilize overload instead
std::size_t props_size;
read(stream, props_size);
for (std::size_t i = 0; i < props_size; ++i) {
std::string key;
read(stream, key);
ov::Any val;
read_any(stream, val);
compiled->m_non_npuw_props[key] = val;
}
compiled->implement_properties();

// Deserialize compiled submodels
std::size_t subm_size = 0;
Expand Down
9 changes: 5 additions & 4 deletions src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -611,21 +611,21 @@ std::shared_ptr<ov::npuw::LLMCompiledModel> ov::npuw::LLMCompiledModel::deserial
if (vmajor != OPENVINO_VERSION_MAJOR || vminor != OPENVINO_VERSION_MINOR || vpatch != OPENVINO_VERSION_PATCH ||
s11n_version != std::string(NPUW_SERIALIZATION_VERSION)) {
OPENVINO_THROW("This blobs was serialized with different OV version!",
" Serialized by OV ",
"\nSerialized by OV ",
vmajor,
'.',
vminor,
'.',
vpatch,
" Current OV version ",
"\nCurrent OV version ",
OPENVINO_VERSION_MAJOR,
'.',
OPENVINO_VERSION_MINOR,
'.',
OPENVINO_VERSION_PATCH,
" NPUW serialized by version ",
"\nNPUW serialized by version ",
s11n_version,
" NPUW current serialization version ",
"\nNPUW current serialization version ",
NPUW_SERIALIZATION_VERSION);
}

Expand Down Expand Up @@ -653,6 +653,7 @@ std::shared_ptr<ov::npuw::LLMCompiledModel> ov::npuw::LLMCompiledModel::deserial

// Deserialize config
read(stream, compiled->m_cfg);
compiled->implement_properties();

// Deserialize CompiledModels
compiled->m_kvcache_compiled = ov::npuw::CompiledModel::deserialize(stream, plugin);
Expand Down
92 changes: 92 additions & 0 deletions src/plugins/intel_npu/src/plugin/npuw/serialization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ void ov::npuw::s11n::write(std::ostream& stream, const bool& var) {
stream.write(reinterpret_cast<const char*>(&var), sizeof var);
}

void ov::npuw::s11n::write(std::ostream& stream, const float& var) {
stream.write(reinterpret_cast<const char*>(&var), sizeof var);
}

void ov::npuw::s11n::write(std::ostream& stream, const ov::npuw::compiled::Spatial& var) {
using ov::npuw::s11n::write;

Expand Down Expand Up @@ -74,6 +78,43 @@ void ov::npuw::s11n::write(std::ostream& stream, const ov::Output<const ov::Node
write(stream, var.get_names());
}

enum class AnyType : int { STRING = 0, CHARS, INT, UINT32, INT64, UINT64, SIZET, FLOAT, BOOL };

void ov::npuw::s11n::write_any(std::ostream& stream, const ov::Any& var) {
// FIXME: figure out a proper way to serialize Any (for config)
if (var.is<std::string>()) {
write(stream, static_cast<int>(AnyType::STRING));
write(stream, var.as<std::string>());
} else if (var.is<const char*>()) {
// FIXME: handle properly
write(stream, static_cast<int>(AnyType::CHARS));
write(stream, std::string(var.as<const char*>()));
} else if (var.is<std::size_t>()) {
write(stream, static_cast<int>(AnyType::SIZET));
write(stream, var.as<std::size_t>());
} else if (var.is<int>()) {
write(stream, static_cast<int>(AnyType::INT));
write(stream, var.as<int>());
} else if (var.is<int64_t>()) {
write(stream, static_cast<int>(AnyType::INT64));
write(stream, var.as<int64_t>());
} else if (var.is<uint32_t>()) {
write(stream, static_cast<int>(AnyType::UINT32));
write(stream, var.as<uint32_t>());
} else if (var.is<uint64_t>()) {
write(stream, static_cast<int>(AnyType::UINT64));
write(stream, var.as<uint64_t>());
} else if (var.is<float>()) {
write(stream, static_cast<int>(AnyType::FLOAT));
write(stream, var.as<float>());
} else if (var.is<bool>()) {
write(stream, static_cast<int>(AnyType::BOOL));
write(stream, var.as<bool>());
} else {
NPUW_ASSERT(false && "Unsupported type");
}
}

void ov::npuw::s11n::read(std::istream& stream, std::streampos& var) {
stream.read(reinterpret_cast<char*>(&var), sizeof var);
}
Expand All @@ -89,6 +130,10 @@ void ov::npuw::s11n::read(std::istream& stream, bool& var) {
stream.read(reinterpret_cast<char*>(&var), sizeof var);
}

void ov::npuw::s11n::read(std::istream& stream, float& var) {
stream.read(reinterpret_cast<char*>(&var), sizeof var);
}

void ov::npuw::s11n::read(std::istream& stream, ov::npuw::compiled::Spatial& var) {
using ov::npuw::s11n::read;

Expand Down Expand Up @@ -169,3 +214,50 @@ void ov::npuw::s11n::read(std::istream& stream, std::shared_ptr<ov::Node>& var)
var->output(0).set_tensor_ptr(tensor_dummy);
var->set_friendly_name(*names.begin()); // any_name ?
}

void ov::npuw::s11n::read_any(std::istream& stream, ov::Any& var) {
// FIXME: ugly, but cannot use .read(stream) here due to its usage of operator>>()
int type_int;
read(stream, type_int);
AnyType type = static_cast<AnyType>(type_int);
if (type == AnyType::STRING) {
std::string val;
read(stream, val);
var = std::move(val);
} else if (type == AnyType::CHARS) {
// FIXME: handle properly
std::string val;
read(stream, val);
var = std::move(val);
} else if (type == AnyType::SIZET) {
std::size_t val;
read(stream, val);
var = val;
} else if (type == AnyType::INT) {
int val;
read(stream, val);
var = val;
} else if (type == AnyType::INT64) {
int64_t val;
read(stream, val);
var = val;
} else if (type == AnyType::UINT32) {
uint32_t val;
read(stream, val);
var = val;
} else if (type == AnyType::UINT64) {
uint64_t val;
read(stream, val);
var = val;
} else if (type == AnyType::FLOAT) {
float val;
read(stream, val);
var = val;
} else if (type == AnyType::BOOL) {
bool val;
read(stream, val);
var = val;
} else {
NPUW_ASSERT(false && "Unsupported type");
}
}
5 changes: 5 additions & 0 deletions src/plugins/intel_npu/src/plugin/npuw/serialization.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class Config;
namespace ov {

// Forward declaration
class Any;
class Node;
class Tensor;
template <class>
Expand All @@ -52,19 +53,23 @@ namespace s11n {
void write(std::ostream& stream, const std::streampos& var);
void write(std::ostream& stream, const std::string& var);
void write(std::ostream& stream, const bool& var);
void write(std::ostream& stream, const float& var);
void write(std::ostream& stream, const ov::npuw::compiled::Spatial& var);
void write(std::ostream& stream, const ov::Tensor& var);
void write(std::ostream& stream, const ::intel_npu::Config& var);
void write(std::ostream& stream, const ov::Output<const ov::Node>& var);
void write_any(std::ostream& stream, const ov::Any& var);

void read(std::istream& stream, std::streampos& var);
void read(std::istream& stream, std::string& var);
void read(std::istream& stream, bool& var);
void read(std::istream& stream, float& var);
void read(std::istream& stream, ov::npuw::compiled::Spatial& var);
void read(std::istream& stream, ov::Tensor& var);
void read(std::istream& stream, ::intel_npu::Config& var);
void read(std::istream& stream, std::shared_ptr<ov::op::v0::Parameter>& var);
void read(std::istream& stream, std::shared_ptr<ov::Node>& var);
void read_any(std::istream& stream, ov::Any& var);

// Forward declaration
template <typename T1, typename T2>
Expand Down
7 changes: 6 additions & 1 deletion src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,8 @@ void Bank::read_and_add_tensor(std::istream& stream, int64_t uid, const std::str
auto iter_device = device_bank.storage.find(uid);

if (iter_device != device_bank.storage.end()) {
// Already allocated
// Shouldn't be possible
NPUW_ASSERT(false);
return;
}

Expand All @@ -234,6 +235,10 @@ void Bank::read_and_add_tensor(std::istream& stream, int64_t uid, const std::str
ov::Tensor allocated_tensor;

// FIXME: reading not via a dedicated function
bool is_intialized = false;
read(stream, is_intialized);
NPUW_ASSERT(is_intialized);

std::string type_str;
read(stream, type_str);
ov::element::Type type(type_str);
Expand Down
4 changes: 0 additions & 4 deletions src/plugins/intel_npu/src/plugin/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -622,10 +622,6 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
ov::AnyMap localProperties = properties;
if (localProperties.count(useNpuwKey)) {
if (localProperties.at(useNpuwKey).as<bool>() == true) {
// CACHE_DIR isn't supported with NPU_USE_NPUW
if (localProperties.count(ov::cache_dir.name()) || !_globalConfig.get<CACHE_DIR>().empty()) {
OPENVINO_THROW("Option 'CACHE_DIR' is not supported with NPU_USE_NPUW!");
}
return ov::npuw::ICompiledModel::create(model->clone(), shared_from_this(), localProperties);
} else {
// NPUW is disabled, remove the key from the properties
Expand Down

0 comments on commit 40b19c8

Please sign in to comment.