Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[intel-npu] Adding NPU_DYNAMIC_QUANTIZATION property #28316

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ offer a limited set of supported OpenVINO features.
ov::enable_profiling
ov::workload_type
ov::intel_npu::compilation_mode_params
ov::intel_npu::compiler_dynamic_quantization
ov::intel_npu::turbo
ov::intel_npu::tiles
ov::intel_npu::max_tiles
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -336,4 +336,5 @@ void regmodule_properties(py::module m) {
wrap_property_RW(m_intel_npu, ov::intel_npu::max_tiles, "max_tiles");
wrap_property_RW(m_intel_npu, ov::intel_npu::bypass_umd_caching, "bypass_umd_caching");
wrap_property_RW(m_intel_npu, ov::intel_npu::defer_weights_load, "defer_weights_load");
wrap_property_RW(m_intel_npu, ov::intel_npu::compiler_dynamic_quantization, "compiler_dynamic_quantization");
}
5 changes: 5 additions & 0 deletions src/bindings/python/tests/test_runtime/test_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,11 @@ def test_properties_ro(ov_property_ro, expected_value):
"NPU_DEFER_WEIGHTS_LOAD",
((True, True),),
),
(
intel_npu.compiler_dynamic_quantization,
"NPU_COMPILER_DYNAMIC_QUANTIZATION",
((True, True),),
),
],
)
def test_properties_rw(ov_property_rw, expected_value, test_values):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@ static constexpr ov::Property<uint32_t, ov::PropertyMutability::RO> compiler_ver
*/
static constexpr ov::Property<std::string> compilation_mode_params{"NPU_COMPILATION_MODE_PARAMS"};

/**
* @brief [Only for NPU compiler]
* Type: boolean
* Set or verify state of dynamic quantization in the NPU compiler
* @ingroup ov_runtime_npu_prop_cpp_api
*/
static constexpr ov::Property<bool> compiler_dynamic_quantization{"NPU_COMPILER_DYNAMIC_QUANTIZATION"};
ilya-lavrenov marked this conversation as resolved.
Show resolved Hide resolved

/**
* @brief [Only for NPU plugin]
* Type: std::bool
Expand Down
1 change: 1 addition & 0 deletions src/plugins/intel_npu/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ The following properties are supported:
| `ov::intel_npu::driver_version`/</br>`NPU_DRIVER_VERSION` | RO | NPU driver version. | `N/A` | `N/A` |
| `ov::intel_npu::compiler_version`/</br>`NPU_COMPILER_VERSION` | RO | NPU compiler version. MSB 16 bits are Major version, LSB 16 bits are Minor version | `N/A` | `N/A` |
| `ov::intel_npu::compilation_mode_params`/</br>`NPU_COMPILATION_MODE_PARAMS` | RW | Set various parameters supported by the NPU compiler. (See bellow) | `<std::string>`| `N/A` |
| `ov::intel_npu::compiler_dynamic_quantization`/</br>`NPU_COMPILER_DYNAMIC_QUANTIZATION` | RW | Enable/Disable dynamic quantization by NPU compiler | `YES` / `NO` | `N/A` |
| `ov::intel_npu::turbo`/</br>`NPU_TURBO` | RW | Set Turbo mode on/off | `YES`/ `NO`| `NO` |
| `ov::intel_npu::tiles`/</br>`NPU_TILES` | RW | Sets the number of npu tiles to compile the model for | `[0-]` | `-1` |
| `ov::intel_npu::max_tiles`/</br>`NPU_MAX_TILES` | RW | Maximum number of tiles supported by the device we compile for. Can be set for offline compilation. If not set, it will be populated by driver.| `[0-]` | `[1-6] depends on npu platform` |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -357,4 +357,26 @@ struct COMPILATION_NUM_THREADS final : OptionBase<COMPILATION_NUM_THREADS, int32
}
};

//
// NPU_COMPILER_DYNAMIC_QUANTIZATION
//

struct COMPILER_DYNAMIC_QUANTIZATION final : OptionBase<COMPILER_DYNAMIC_QUANTIZATION, bool> {
static std::string_view key() {
return ov::intel_npu::compiler_dynamic_quantization.name();
}

static bool defaultValue() {
return false;
}

static OptionMode mode() {
return OptionMode::CompileTime;
}

static bool isPublic() {
return true;
}
};

} // namespace intel_npu
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@

namespace intel_npu {

#ifndef ICOMPILER_MAKE_VERSION
/// @brief Generates npu compiler (generic 'oneAPI') API version number
# define ICOMPILER_MAKE_VERSION(_major, _minor) ((_major << 16) | (_minor & 0x0000ffff))
#endif // ICOMPILER_MAKE_VERSION

/**
* @struct NetworkDescription
* @brief The object returned by the compiler
Expand Down
1 change: 1 addition & 0 deletions src/plugins/intel_npu/src/al/src/config/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ void intel_npu::registerCompilerOptions(OptionsDesc& desc) {
desc.add<DMA_ENGINES>();
desc.add<DYNAMIC_SHAPE_TO_STATIC>();
desc.add<EXECUTION_MODE_HINT>();
desc.add<COMPILER_DYNAMIC_QUANTIZATION>();
lmielick marked this conversation as resolved.
Show resolved Hide resolved
}

//
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -426,10 +426,10 @@ std::string DriverCompilerAdapter::serializeConfig(const Config& config,
optLevelStr << keyOfOptL << KEY_VALUE_SEPARATOR << "\\d+";
std::ostringstream perfHintStr;
perfHintStr << keyOfPerfHO << KEY_VALUE_SEPARATOR << "\\S+";
logger.warning("%s property is not suppored by this compiler version. Removing from parameters",
logger.warning("%s property is not supported by this compiler version. Removing from parameters",
keyOfOptL.c_str());
valueOfParams = std::regex_replace(valueOfParams, std::regex(optLevelStr.str()), "");
logger.warning("%s property is not suppored by this compiler version. Removing from parameters",
logger.warning("%s property is not supported by this compiler version. Removing from parameters",
keyOfPerfHO.c_str());
valueOfParams = std::regex_replace(valueOfParams, std::regex(perfHintStr.str()), "");

Expand Down Expand Up @@ -487,7 +487,7 @@ std::string DriverCompilerAdapter::serializeConfig(const Config& config,
pinningstr << ov::hint::enable_cpu_pinning.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER << "\\S+"
<< VALUE_DELIMITER;
logger.warning(
"ENABLE_CPU_PINNING property is not suppored by this compiler version. Removing from parameters");
"ENABLE_CPU_PINNING property is not supported by this compiler version. Removing from parameters");
content = std::regex_replace(content, std::regex(pinningstr.str()), "");
}

Expand All @@ -499,9 +499,9 @@ std::string DriverCompilerAdapter::serializeConfig(const Config& config,
std::ostringstream maxtilestr;
maxtilestr << ov::intel_npu::max_tiles.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER << "\\d+"
<< VALUE_DELIMITER;
logger.warning("NPU_STEPPING property is not suppored by this compiler version. Removing from parameters");
logger.warning("NPU_STEPPING property is not supported by this compiler version. Removing from parameters");
content = std::regex_replace(content, std::regex(stepstr.str()), "");
logger.warning("NPU_MAX_TILES property is not suppored by this compiler version. Removing from parameters");
logger.warning("NPU_MAX_TILES property is not supported by this compiler version. Removing from parameters");
content = std::regex_replace(content, std::regex(maxtilestr.str()), "");
}

Expand All @@ -511,13 +511,13 @@ std::string DriverCompilerAdapter::serializeConfig(const Config& config,
precstr << ov::hint::inference_precision.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER << "\\S+"
<< VALUE_DELIMITER;
logger.warning(
"INFERENCE_PRECISION_HINT property is not suppored by this compiler version. Removing from parameters");
"INFERENCE_PRECISION_HINT property is not supported by this compiler version. Removing from parameters");
content = std::regex_replace(content, std::regex(precstr.str()), "");
}

/// Replacing NPU_TILES (for all versions) with NPU_DPU_GROUPS for backwards compatibility
if (std::regex_search(content, std::regex(ov::intel_npu::tiles.name()))) {
logger.warning("NPU_TILES property is not suppored by this compiler version. Swaping it to "
logger.warning("NPU_TILES property is not supported by this compiler version. Swaping it to "
"NPU_DPU_GROUPS (obsolete)");
content = std::regex_replace(content, std::regex(ov::intel_npu::tiles.name()), "NPU_DPU_GROUPS");
}
Expand All @@ -528,7 +528,7 @@ std::string DriverCompilerAdapter::serializeConfig(const Config& config,
batchstr << ov::intel_npu::batch_mode.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER << "\\S+"
<< VALUE_DELIMITER;

logger.warning("NPU_BATCH_MODE property is not suppored by this compiler version. Removing from parameters");
logger.warning("NPU_BATCH_MODE property is not supported by this compiler version. Removing from parameters");
content = std::regex_replace(content, std::regex(batchstr.str()), "");
}

Expand All @@ -538,10 +538,21 @@ std::string DriverCompilerAdapter::serializeConfig(const Config& config,
batchstr << ov::hint::execution_mode.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER << "\\S+"
<< VALUE_DELIMITER;
logger.warning(
"EXECUTION_MODE_HINT property is not suppored by this compiler version. Removing from parameters");
"EXECUTION_MODE_HINT property is not supported by this compiler version. Removing from parameters");
content = std::regex_replace(content, std::regex(batchstr.str()), "");
}

// COMPILER_DYNAMIC_QUANTIZATION is not supported in versions < 7.1 - need to remove it
if ((compilerVersion.major < 7) || (compilerVersion.major == 7 && compilerVersion.minor < 1)) {
std::ostringstream dqstr;
dqstr << ov::intel_npu::compiler_dynamic_quantization.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER << "\\S+"
<< VALUE_DELIMITER;
logger.warning(
"COMPILER_DYNAMIC_QUANTIZATION property is not supported by this compiler version. Removing from "
"parameters");
content = std::regex_replace(content, std::regex(dqstr.str()), "");
}

// NPU_DEFER_WEIGHTS_LOAD is needed at runtime only
{
std::ostringstream batchstr;
Expand Down
1 change: 1 addition & 0 deletions src/plugins/intel_npu/src/plugin/include/metrics.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ class Metrics final {
};
const std::vector<ov::PropertyName> _cachingProperties = {ov::device::architecture.name(),
ov::intel_npu::compilation_mode_params.name(),
ov::intel_npu::compiler_dynamic_quantization.name(),
ov::intel_npu::tiles.name(),
ov::intel_npu::dpu_groups.name(),
ov::intel_npu::dma_engines.name(),
Expand Down
8 changes: 6 additions & 2 deletions src/plugins/intel_npu/src/plugin/include/plugin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,14 @@ class Plugin : public ov::IPlugin {
std::unique_ptr<Metrics> _metrics;

// properties map: {name -> [supported, mutable, eval function]}
std::map<std::string, std::tuple<bool, ov::PropertyMutability, std::function<ov::Any(const Config&)>>> _properties;
std::vector<ov::PropertyName> _supportedProperties;
mutable std::map<std::string, std::tuple<bool, ov::PropertyMutability, std::function<ov::Any(const Config&)>>>
_properties;
mutable std::vector<ov::PropertyName> _supportedProperties;

static std::atomic<int> _compiledModelLoadCounter;

void reset_compiler_dependent_properties() const;
void reset_supported_properties() const;
};

} // namespace intel_npu
6 changes: 6 additions & 0 deletions src/plugins/intel_npu/src/plugin/src/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,12 @@ void CompiledModel::initialize_properties() {
[](const Config& config) {
return config.get<COMPILATION_MODE_PARAMS>();
}}},
{ov::intel_npu::compiler_dynamic_quantization.name(),
{true,
ov::PropertyMutability::RO,
[](const Config& config) {
return config.get<COMPILER_DYNAMIC_QUANTIZATION>();
}}},
{ov::intel_npu::turbo.name(),
{isPropertySupported(ov::intel_npu::turbo.name()),
ov::PropertyMutability::RO,
Expand Down
54 changes: 50 additions & 4 deletions src/plugins/intel_npu/src/plugin/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,8 @@
#include <fstream>

#include "compiled_model.hpp"
#include "npuw/compiled_model.hpp"
#include "npuw/llm_compiled_model.hpp"
#include "npuw/serialization.hpp"
#include "driver_compiler_adapter.hpp"
#include "compiler_adapter_factory.hpp"
#include "driver_compiler_adapter.hpp"
#include "intel_npu/common/device_helpers.hpp"
#include "intel_npu/common/icompiler_adapter.hpp"
#include "intel_npu/common/igraph.hpp"
Expand All @@ -23,6 +20,8 @@
#include "intel_npu/utils/zero/zero_init.hpp"
#include "metadata.hpp"
#include "npuw/compiled_model.hpp"
#include "npuw/llm_compiled_model.hpp"
#include "npuw/serialization.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/op/parameter.hpp"
#include "openvino/runtime/intel_npu/properties.hpp"
Expand Down Expand Up @@ -450,6 +449,12 @@ Plugin::Plugin()
[](const Config& config) {
return config.get<COMPILATION_MODE_PARAMS>();
}}},
{ov::intel_npu::compiler_dynamic_quantization.name(),
{false,
ov::PropertyMutability::RW,
[](const Config& config) {
return config.get<COMPILER_DYNAMIC_QUANTIZATION>();
}}},
{ov::intel_npu::turbo.name(),
{_backends->isCommandQueueExtSupported(),
ov::PropertyMutability::RW,
Expand Down Expand Up @@ -566,24 +571,51 @@ Plugin::Plugin()
{ov::intel_npu::batch_mode.name(), {false, ov::PropertyMutability::RW, [](const Config& config) {
return config.getString<BATCH_MODE>();
}}}};
}

void Plugin::reset_supported_properties() const {
/// reset first
_supportedProperties.clear(); /// Mutable member
/// populate
for (auto& property : _properties) {
if (std::get<0>(property.second)) {
_supportedProperties.emplace_back(ov::PropertyName(property.first, std::get<1>(property.second)));
}
}
}

void Plugin::reset_compiler_dependent_properties() const {
// get active compiler version
CompilerAdapterFactory compilerAdapterFactory;
auto dummyCompiler = compilerAdapterFactory.getCompiler(_backends->getIEngineBackend(), _globalConfig);
uint32_t active_compiler_version = dummyCompiler->get_version();

// NPU_COMPILER_DYNAMIC_QUANTIZATION
// unpublish if compiler version requirement is not met
if (_properties.find(ov::intel_npu::compiler_dynamic_quantization.name()) != _properties.end()) {
if (active_compiler_version >= ICOMPILER_MAKE_VERSION(7, 1)) {
std::get<0>(_properties[ov::intel_npu::compiler_dynamic_quantization.name()]) = true; /// mark supported
} else {
std::get<0>(_properties[ov::intel_npu::compiler_dynamic_quantization.name()]) = false; // mark unsupported
}
}
}

void Plugin::set_property(const ov::AnyMap& properties) {
const std::map<std::string, std::string> config = any_copy(properties);
update_log_level(config);
bool compiler_type_change = false;
for (const auto& configEntry : config) {
if (_properties.find(configEntry.first) == _properties.end()) {
OPENVINO_THROW("Unsupported configuration key: ", configEntry.first);
} else {
if (std::get<1>(_properties[configEntry.first]) == ov::PropertyMutability::RO) {
OPENVINO_THROW("READ-ONLY configuration key: ", configEntry.first);
}
if (configEntry.first == ov::intel_npu::compiler_type.name()) {
// we just assume its a change, not compare against old value
compiler_type_change = true;
}
}
}

Expand All @@ -595,12 +627,26 @@ void Plugin::set_property(const ov::AnyMap& properties) {
for (const auto& entry : config) {
_config[entry.first] = entry.second;
}

if (compiler_type_change) {
// if compiler type was changed > need to reset properties to match the new compiler
// since properties have changed > need to reset supported_properties as well
reset_compiler_dependent_properties();
reset_supported_properties();
}
}

ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& arguments) const {
const std::map<std::string, std::string>& amends = any_copy(arguments);
const Config amendedConfig = merge_configs(_globalConfig, amends);

/// Special case for supportedProperties
/// populate it at first get
if (name == ov::supported_properties.name() && _supportedProperties.size() < 1) {
reset_compiler_dependent_properties();
reset_supported_properties();
}

auto&& configIterator = _properties.find(name);
if (configIterator != _properties.cend()) {
return std::get<2>(configIterator->second)(amendedConfig);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@ std::vector<std::pair<std::string, ov::Any>> plugin_public_mutable_properties =
std::vector<std::pair<std::string, ov::Any>> plugin_internal_mutable_properties = {
{ov::intel_npu::compilation_mode_params.name(), ov::Any("use-user-precision=false propagate-quant-dequant=0")},
{ov::intel_npu::dma_engines.name(), ov::Any(1)},
{ov::intel_npu::compiler_type.name(), ov::Any(ov::intel_npu::CompilerType::MLIR)},
{ov::intel_npu::platform.name(), ov::Any(ov::intel_npu::Platform::AUTO_DETECT)},
{ov::intel_npu::compilation_mode.name(), ov::Any("DefaultHW")},
{ov::intel_npu::max_tiles.name(), ov::Any(8)},
Expand Down
Loading