Skip to content

Commit

Permalink
[NPU] Add plugin metadata to compiled model (#27159)
Browse files Browse the repository at this point in the history
### Details:
A versioned blob would look like this:

![blob-layout](https://github.com/user-attachments/assets/4f7f9427-503c-4ee7-bd2c-d40fc2b4df3f)



Where _Blob data size_ is necessary when importing the blob to offset
jump at _Metadata_.

Rules of thumb when we need to set a new `Metadata` version:
- if we **add fields at the end** of current `Metadata`, then `Minor` is
incremented. This means we maintain backward compatibility with old
blobs.

Example:
`After OV Version we add a new field.`
- if we **remove fields** or **add fields anywhere but at end**, then
`Major` is incremented. This means we break forward compatibility with
old blobs.

Examples:
`Between Minor and OV Version, we add a new field.`
OR
`We remove OV Version.`

Simplified workflow:
- at export: we append the versioning stuff at the end of the blob
- at import: we extract the stored `Metadata` and verify it against the
currently supported one. If the imported blob is incompatible, we reject
it and print its OV version (where possible).

### Tickets:
 - *E-135371*

---------

Signed-off-by: Alexandru Enache <[email protected]>
  • Loading branch information
alexandruenache1111 authored Jan 15, 2025
1 parent ca38e56 commit 6e489ae
Show file tree
Hide file tree
Showing 11 changed files with 565 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class IGraph : public std::enable_shared_from_this<IGraph> {
const Config& config,
std::optional<std::vector<uint8_t>> blob);

virtual void export_blob(std::ostream& stream) const = 0;
virtual size_t export_blob(std::ostream& stream) const = 0;

virtual std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
const Config& config) const = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class DriverGraph final : public IGraph {
const Config& config,
std::optional<std::vector<uint8_t>> blob);

void export_blob(std::ostream& stream) const override;
size_t export_blob(std::ostream& stream) const override;

std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
const Config& config) const override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class PluginGraph final : public IGraph {
std::vector<uint8_t> blob,
const Config& config);

void export_blob(std::ostream& stream) const override;
size_t export_blob(std::ostream& stream) const override;

std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
const Config& config) const override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ DriverGraph::DriverGraph(const std::shared_ptr<ZeGraphExtWrappers>& zeGraphExt,
initialize(config);
}

void DriverGraph::export_blob(std::ostream& stream) const {
size_t DriverGraph::export_blob(std::ostream& stream) const {
const uint8_t* blobPtr = nullptr;
size_t blobSize = -1;
size_t blobSize;
std::vector<uint8_t> blob;

if (_blobIsReleased) {
Expand All @@ -47,7 +47,7 @@ void DriverGraph::export_blob(std::ostream& stream) const {

if (!stream) {
_logger.error("Write blob to stream failed. Blob is broken!");
return;
return 0;
}

if (_logger.level() >= ov::log::Level::INFO) {
Expand All @@ -61,6 +61,7 @@ void DriverGraph::export_blob(std::ostream& stream) const {
_logger.info(str.str().c_str());
}
_logger.info("Write blob to stream successfully.");
return blobSize;
}

std::vector<ov::ProfilingInfo> DriverGraph::process_profiling_output(const std::vector<uint8_t>& profData,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ PluginGraph::PluginGraph(const std::shared_ptr<ZeGraphExtWrappers>& zeGraphExt,
initialize(config);
}

void PluginGraph::export_blob(std::ostream& stream) const {
size_t PluginGraph::export_blob(std::ostream& stream) const {
stream.write(reinterpret_cast<const char*>(_blob.data()), _blob.size());

if (!stream) {
_logger.error("Write blob to stream failed. Blob is broken!");
return;
return 0;
}

if (_logger.level() >= ov::log::Level::INFO) {
Expand All @@ -49,6 +49,7 @@ void PluginGraph::export_blob(std::ostream& stream) const {
_logger.info(str.str().c_str());
}
_logger.info("Write blob to stream successfully.");
return _blob.size();
}

std::vector<ov::ProfilingInfo> PluginGraph::process_profiling_output(const std::vector<uint8_t>& profData,
Expand Down
173 changes: 173 additions & 0 deletions src/plugins/intel_npu/src/plugin/include/metadata.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <cstdint>
#include <memory>
#include <optional>
#include <string>
#include <vector>

namespace intel_npu {

struct MetadataBase {
protected:
uint32_t _version;

public:
MetadataBase(uint32_t version) : _version(version) {}

/**
* @brief Reads metadata from a stream.
*/
virtual void read(std::istream& stream) = 0;

/**
* @brief Writes metadata to a stream.
*/
virtual void write(std::ostream& stream) = 0;

virtual bool is_compatible() = 0;

virtual uint64_t get_blob_size() const = 0;

virtual ~MetadataBase() = default;

/**
* @brief Returns a uint32_t value which represents two uint16_t values concatenated.
* @details Convention for bumping the metadata version:
* - Increment Major in case of: removing a current field OR adding a new field in between fields.
* - Increment Minor in case of: adding a new field at the end.
*
* @return Major and minor versions concatenated into a single uint32_t value.
*/
static constexpr uint32_t make_version(uint16_t major, uint16_t minor) {
return major << 16 | (minor & 0x0000ffff);
}

/**
* @brief Gets the major version.
* @return Major version.
*/
static constexpr uint16_t get_major(uint32_t version) {
return static_cast<uint16_t>(version >> 16);
}

/**
* @brief Gets the minor version.
* @return Minor version.
*/
static constexpr uint16_t get_minor(uint32_t version) {
return static_cast<uint16_t>(version);
}
};

/**
* @brief Magic bytes used for identifying NPU blobs.
*/
constexpr std::string_view MAGIC_BYTES = "OVNPU";

/**
* @brief List of supported version formats.
*/
constexpr uint32_t METADATA_VERSION_1_0{MetadataBase::make_version(1, 0)};

/**
* @brief Current metadata version.
*/
constexpr uint32_t CURRENT_METADATA_VERSION{METADATA_VERSION_1_0};

constexpr uint16_t CURRENT_METADATA_MAJOR_VERSION{MetadataBase::get_major(CURRENT_METADATA_VERSION)};
constexpr uint16_t CURRENT_METADATA_MINOR_VERSION{MetadataBase::get_minor(CURRENT_METADATA_VERSION)};

struct OpenvinoVersion {
private:
std::string _version;
uint32_t _size;

public:
OpenvinoVersion();

OpenvinoVersion(std::string_view version);

/**
* @brief Reads version data from a stream.
*/
void read(std::istream& stream);

/**
* @brief Writes version data to a stream.
*/
void write(std::ostream& stream);

/**
* @brief Gets the version string.
*/
std::string get_version() const;
};

/**
* @brief Template for metadata class handling.
*/
template <uint32_t version>
struct Metadata : public MetadataBase {};

/**
* @brief Template specialization for metadata version 1.0.
*/
template <>
struct Metadata<METADATA_VERSION_1_0> : public MetadataBase {
protected:
OpenvinoVersion _ovVersion;
uint64_t _blobDataSize;

public:
Metadata(uint64_t blobSize, std::optional<std::string_view> ovVersion = std::nullopt);

void read(std::istream& stream) override;

/**
* @attention It's a must to first write metadata version in any metadata specialization.
*
* @details When importing a versioned blob, it's best to first read the metadata version field.
* This is the quickest way to handle many incompatible blob cases without needing to traverse the whole NPU
* metadata section.
*/
void write(std::ostream& stream) override;

/**
* @brief Checks if metadata is supported.
*
* @return Returns:
* - false:
* - if blob metadata does not match current metadata.
* - if blob OpenVINO version does not match current one.
*
* - true: if all versions match.
*
* @note The version check can be disabled if the "NPU_DISABLE_VERSION_CHECK" environment variable is set to '1'.
*/
bool is_compatible() override;

uint64_t get_blob_size() const override;
};

/**
* @brief Creates a Metadata object.
*
* @return Unique pointer to the created MetadataBase object if the major version is supported; otherwise, returns
* 'nullptr'.
*/
std::unique_ptr<MetadataBase> create_metadata(uint32_t version, uint64_t blobSize);

/**
* @brief Reads metadata from a blob.
*
* @return If the blob is versioned and its major version is supported, returns an unique pointer to the read
* MetadataBase object; otherwise, returns 'nullptr'.
*/
std::unique_ptr<MetadataBase> read_metadata_from(std::istream& stream);

} // namespace intel_npu
6 changes: 5 additions & 1 deletion src/plugins/intel_npu/src/plugin/src/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "intel_npu/config/compiler.hpp"
#include "intel_npu/config/config.hpp"
#include "intel_npu/config/runtime.hpp"
#include "metadata.hpp"
#include "openvino/pass/constant_folding.hpp"
#include "openvino/pass/manager.hpp"
#include "openvino/runtime/properties.hpp"
Expand Down Expand Up @@ -72,7 +73,10 @@ std::shared_ptr<ov::ISyncInferRequest> CompiledModel::create_sync_infer_request(

void CompiledModel::export_model(std::ostream& stream) const {
_logger.debug("CompiledModel::export_model");
_graph->export_blob(stream);
size_t blobSizeBeforeVersioning = _graph->export_blob(stream);

auto meta = Metadata<CURRENT_METADATA_VERSION>(blobSizeBeforeVersioning, ov::get_openvino_version().buildNumber);
meta.write(stream);
}

std::shared_ptr<const ov::Model> CompiledModel::get_runtime_model() const {
Expand Down
Loading

0 comments on commit 6e489ae

Please sign in to comment.