From 99a0295b21109b36ee44c87ac8b65d81d557351f Mon Sep 17 00:00:00 2001 From: csoka Date: Wed, 8 Jan 2025 14:55:22 +0200 Subject: [PATCH 1/9] [intel-npu] Adding NPU_DYNAMIC_QUANTIZATION property --- .../npu-device.rst | 1 + .../openvino/runtime/intel_npu/properties.hpp | 8 +++++++ src/plugins/intel_npu/README.md | 1 + .../al/include/intel_npu/config/compiler.hpp | 22 +++++++++++++++++++ .../src/driver_compiler_adapter.cpp | 10 +++++++++ .../intel_npu/src/plugin/include/metrics.hpp | 1 + .../src/plugin/src/compiled_model.cpp | 6 +++++ .../intel_npu/src/plugin/src/plugin.cpp | 6 +++++ 8 files changed, 55 insertions(+) diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst index bbc6cbbc84d5d1..a3bdbfc7c2b7d1 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst @@ -143,6 +143,7 @@ offer a limited set of supported OpenVINO features. ov::enable_profiling ov::workload_type ov::intel_npu::compilation_mode_params + ov::intel_npu::compiler_dynamic_quantization ov::intel_npu::turbo ov::intel_npu::tiles ov::intel_npu::max_tiles diff --git a/src/inference/include/openvino/runtime/intel_npu/properties.hpp b/src/inference/include/openvino/runtime/intel_npu/properties.hpp index c5bf2331ad7dff..723a8b26f555d4 100644 --- a/src/inference/include/openvino/runtime/intel_npu/properties.hpp +++ b/src/inference/include/openvino/runtime/intel_npu/properties.hpp @@ -69,6 +69,14 @@ static constexpr ov::Property compiler_ver */ static constexpr ov::Property compilation_mode_params{"NPU_COMPILATION_MODE_PARAMS"}; +/** + * @brief [Only for NPU compiler] + * Type: boolean + * Set or verify state of dynamic quantization in the NPU compiler + * @ingroup ov_runtime_npu_prop_cpp_api + */ +static constexpr ov::Property compiler_dynamic_quantization{"NPU_COMPILER_DYNAMIC_QUANTIZATION"}; + /** * @brief [Only for NPU plugin] * Type: std::bool diff --git a/src/plugins/intel_npu/README.md b/src/plugins/intel_npu/README.md index b5da3bff6e1b47..e0601800abcb7d 100644 --- a/src/plugins/intel_npu/README.md +++ b/src/plugins/intel_npu/README.md @@ -173,6 +173,7 @@ The following properties are supported: | `ov::intel_npu::driver_version`/
`NPU_DRIVER_VERSION` | RO | NPU driver version. | `N/A` | `N/A` | | `ov::intel_npu::compiler_version`/
`NPU_COMPILER_VERSION` | RO | NPU compiler version. MSB 16 bits are Major version, LSB 16 bits are Minor version | `N/A` | `N/A` | | `ov::intel_npu::compilation_mode_params`/
`NPU_COMPILATION_MODE_PARAMS` | RW | Set various parameters supported by the NPU compiler. (See bellow) | ``| `N/A` | +| `ov::intel_npu::compiler_dynamic_quantization`/
`NPU_COMPILER_DYNAMIC_QUANTIZATION` | RW | Enable/Disable dynamic quantization by NPU compiler | `YES` / `NO` | `N/A` | | `ov::intel_npu::turbo`/
`NPU_TURBO` | RW | Set Turbo mode on/off | `YES`/ `NO`| `NO` | | `ov::intel_npu::tiles`/
`NPU_TILES` | RW | Sets the number of npu tiles to compile the model for | `[0-]` | `-1` | | `ov::intel_npu::max_tiles`/
`NPU_MAX_TILES` | RW | Maximum number of tiles supported by the device we compile for. Can be set for offline compilation. If not set, it will be populated by driver.| `[0-]` | `[1-6] depends on npu platform` | diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/config/compiler.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/config/compiler.hpp index 3e905a09757223..ba2767fa56065e 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/config/compiler.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/config/compiler.hpp @@ -357,4 +357,26 @@ struct COMPILATION_NUM_THREADS final : OptionBase { + static std::string_view key() { + return ov::intel_npu::compiler_dynamic_quantization.name(); + } + + static bool defaultValue() { + return false; + } + + static OptionMode mode() { + return OptionMode::CompileTime; + } + + static bool isPublic() { + return true; + } +}; + } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp index 95ac5b1c10b0db..aea39ed05882f4 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp @@ -542,6 +542,16 @@ std::string DriverCompilerAdapter::serializeConfig(const Config& config, content = std::regex_replace(content, std::regex(batchstr.str()), ""); } + // COMPILER_DYNAMIC_QUANTIZATION is not supported in versions < 6.1 - need to remove it + if ((compilerVersion.major < 6) || (compilerVersion.major == 6 && compilerVersion.minor < 3)) { + std::ostringstream dqstr; + dqstr << ov::intel_npu::compiler_dynamic_quantization.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER << "\\S+" + << VALUE_DELIMITER; + logger.warning("COMPILER_DYNAMIC_QUANTIZATION property is not suppored by this compiler version. Removing from " + "parameters"); + content = std::regex_replace(content, std::regex(dqstr.str()), ""); + } + // NPU_DEFER_WEIGHTS_LOAD is needed at runtime only { std::ostringstream batchstr; diff --git a/src/plugins/intel_npu/src/plugin/include/metrics.hpp b/src/plugins/intel_npu/src/plugin/include/metrics.hpp index f3652d6d7add65..91f78a9cd773f6 100644 --- a/src/plugins/intel_npu/src/plugin/include/metrics.hpp +++ b/src/plugins/intel_npu/src/plugin/include/metrics.hpp @@ -58,6 +58,7 @@ class Metrics final { }; const std::vector _cachingProperties = {ov::device::architecture.name(), ov::intel_npu::compilation_mode_params.name(), + ov::intel_npu::compiler_dynamic_quantization.name(), ov::intel_npu::tiles.name(), ov::intel_npu::dpu_groups.name(), ov::intel_npu::dma_engines.name(), diff --git a/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp index 516518f6999cd3..c680e0a59ad0a8 100644 --- a/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp +++ b/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp @@ -264,6 +264,12 @@ void CompiledModel::initialize_properties() { [](const Config& config) { return config.get(); }}}, + {ov::intel_npu::compiler_dynamic_quantization.name(), + {true, + ov::PropertyMutability::RO, + [](const Config& config) { + return config.get(); + }}}, {ov::intel_npu::turbo.name(), {isPropertySupported(ov::intel_npu::turbo.name()), ov::PropertyMutability::RO, diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 5d5f666f5119ec..e7f6453ec92d1c 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -450,6 +450,12 @@ Plugin::Plugin() [](const Config& config) { return config.get(); }}}, + {ov::intel_npu::compiler_dynamic_quantization.name(), + {true, + ov::PropertyMutability::RW, + [](const Config& config) { + return config.get(); + }}}, {ov::intel_npu::turbo.name(), {_backends->isCommandQueueExtSupported(), ov::PropertyMutability::RW, From 24563e98cf82f14626a89310cb7e68058d0cdd45 Mon Sep 17 00:00:00 2001 From: csoka Date: Thu, 9 Jan 2025 16:25:48 +0200 Subject: [PATCH 2/9] [intel-npu] fixing typos in adapter prints. setting dq support to 6.4 --- .../src/driver_compiler_adapter.cpp | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp index aea39ed05882f4..aa99cb7f6beb8a 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp @@ -426,10 +426,10 @@ std::string DriverCompilerAdapter::serializeConfig(const Config& config, optLevelStr << keyOfOptL << KEY_VALUE_SEPARATOR << "\\d+"; std::ostringstream perfHintStr; perfHintStr << keyOfPerfHO << KEY_VALUE_SEPARATOR << "\\S+"; - logger.warning("%s property is not suppored by this compiler version. Removing from parameters", + logger.warning("%s property is not supported by this compiler version. Removing from parameters", keyOfOptL.c_str()); valueOfParams = std::regex_replace(valueOfParams, std::regex(optLevelStr.str()), ""); - logger.warning("%s property is not suppored by this compiler version. Removing from parameters", + logger.warning("%s property is not supported by this compiler version. Removing from parameters", keyOfPerfHO.c_str()); valueOfParams = std::regex_replace(valueOfParams, std::regex(perfHintStr.str()), ""); @@ -487,7 +487,7 @@ std::string DriverCompilerAdapter::serializeConfig(const Config& config, pinningstr << ov::hint::enable_cpu_pinning.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER << "\\S+" << VALUE_DELIMITER; logger.warning( - "ENABLE_CPU_PINNING property is not suppored by this compiler version. Removing from parameters"); + "ENABLE_CPU_PINNING property is not supported by this compiler version. Removing from parameters"); content = std::regex_replace(content, std::regex(pinningstr.str()), ""); } @@ -499,9 +499,9 @@ std::string DriverCompilerAdapter::serializeConfig(const Config& config, std::ostringstream maxtilestr; maxtilestr << ov::intel_npu::max_tiles.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER << "\\d+" << VALUE_DELIMITER; - logger.warning("NPU_STEPPING property is not suppored by this compiler version. Removing from parameters"); + logger.warning("NPU_STEPPING property is not supported by this compiler version. Removing from parameters"); content = std::regex_replace(content, std::regex(stepstr.str()), ""); - logger.warning("NPU_MAX_TILES property is not suppored by this compiler version. Removing from parameters"); + logger.warning("NPU_MAX_TILES property is not supported by this compiler version. Removing from parameters"); content = std::regex_replace(content, std::regex(maxtilestr.str()), ""); } @@ -511,13 +511,13 @@ std::string DriverCompilerAdapter::serializeConfig(const Config& config, precstr << ov::hint::inference_precision.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER << "\\S+" << VALUE_DELIMITER; logger.warning( - "INFERENCE_PRECISION_HINT property is not suppored by this compiler version. Removing from parameters"); + "INFERENCE_PRECISION_HINT property is not supported by this compiler version. Removing from parameters"); content = std::regex_replace(content, std::regex(precstr.str()), ""); } /// Replacing NPU_TILES (for all versions) with NPU_DPU_GROUPS for backwards compatibility if (std::regex_search(content, std::regex(ov::intel_npu::tiles.name()))) { - logger.warning("NPU_TILES property is not suppored by this compiler version. Swaping it to " + logger.warning("NPU_TILES property is not supported by this compiler version. Swaping it to " "NPU_DPU_GROUPS (obsolete)"); content = std::regex_replace(content, std::regex(ov::intel_npu::tiles.name()), "NPU_DPU_GROUPS"); } @@ -528,7 +528,7 @@ std::string DriverCompilerAdapter::serializeConfig(const Config& config, batchstr << ov::intel_npu::batch_mode.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER << "\\S+" << VALUE_DELIMITER; - logger.warning("NPU_BATCH_MODE property is not suppored by this compiler version. Removing from parameters"); + logger.warning("NPU_BATCH_MODE property is not supported by this compiler version. Removing from parameters"); content = std::regex_replace(content, std::regex(batchstr.str()), ""); } @@ -538,17 +538,18 @@ std::string DriverCompilerAdapter::serializeConfig(const Config& config, batchstr << ov::hint::execution_mode.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER << "\\S+" << VALUE_DELIMITER; logger.warning( - "EXECUTION_MODE_HINT property is not suppored by this compiler version. Removing from parameters"); + "EXECUTION_MODE_HINT property is not supported by this compiler version. Removing from parameters"); content = std::regex_replace(content, std::regex(batchstr.str()), ""); } - // COMPILER_DYNAMIC_QUANTIZATION is not supported in versions < 6.1 - need to remove it - if ((compilerVersion.major < 6) || (compilerVersion.major == 6 && compilerVersion.minor < 3)) { + // COMPILER_DYNAMIC_QUANTIZATION is not supported in versions < 6.4 - need to remove it + if ((compilerVersion.major < 6) || (compilerVersion.major == 6 && compilerVersion.minor < 4)) { std::ostringstream dqstr; dqstr << ov::intel_npu::compiler_dynamic_quantization.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER << "\\S+" << VALUE_DELIMITER; - logger.warning("COMPILER_DYNAMIC_QUANTIZATION property is not suppored by this compiler version. Removing from " - "parameters"); + logger.warning( + "COMPILER_DYNAMIC_QUANTIZATION property is not supported by this compiler version. Removing from " + "parameters"); content = std::regex_replace(content, std::regex(dqstr.str()), ""); } From 17a011c91fa12fb0fb6abf84889633d6fb0f297e Mon Sep 17 00:00:00 2001 From: csoka Date: Fri, 10 Jan 2025 14:36:00 +0200 Subject: [PATCH 3/9] [intel-npu] Adding python bindings for npu_dynamic_quantization --- .../python/src/pyopenvino/core/properties/properties.cpp | 1 + src/bindings/python/tests/test_runtime/test_properties.py | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp index 6f417b52716efd..8b1e64b0dd4d18 100644 --- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp +++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp @@ -336,4 +336,5 @@ void regmodule_properties(py::module m) { wrap_property_RW(m_intel_npu, ov::intel_npu::max_tiles, "max_tiles"); wrap_property_RW(m_intel_npu, ov::intel_npu::bypass_umd_caching, "bypass_umd_caching"); wrap_property_RW(m_intel_npu, ov::intel_npu::defer_weights_load, "defer_weights_load"); + wrap_property_RW(m_intel_npu, ov::intel_npu::compiler_dynamic_quantization, "compiler_dynamic_quantization"); } diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py index fc726a0915a97d..447e668d3ad82d 100644 --- a/src/bindings/python/tests/test_runtime/test_properties.py +++ b/src/bindings/python/tests/test_runtime/test_properties.py @@ -452,6 +452,11 @@ def test_properties_ro(ov_property_ro, expected_value): "NPU_DEFER_WEIGHTS_LOAD", ((True, True),), ), + ( + intel_npu.compiler_dynamic_quantization, + "NPU_COMPILER_DYNAMIC_QUANTIZATION", + ((True, True),), + ), ], ) def test_properties_rw(ov_property_rw, expected_value, test_values): From 5d82cb90f89a1712f37e21ad7ae06af81a8d687b Mon Sep 17 00:00:00 2001 From: Csok Attila Date: Mon, 13 Jan 2025 14:29:52 +0000 Subject: [PATCH 4/9] [intel-npu] Register new property COMPILER_DYNAMIC_QUANTIZATION --- src/plugins/intel_npu/src/al/src/config/compiler.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/plugins/intel_npu/src/al/src/config/compiler.cpp b/src/plugins/intel_npu/src/al/src/config/compiler.cpp index 71d12147c816ce..7ccb0dff85905c 100644 --- a/src/plugins/intel_npu/src/al/src/config/compiler.cpp +++ b/src/plugins/intel_npu/src/al/src/config/compiler.cpp @@ -24,6 +24,7 @@ void intel_npu::registerCompilerOptions(OptionsDesc& desc) { desc.add(); desc.add(); desc.add(); + desc.add(); } // From 709b9dbd4a3265d34c86d1ceff2ca6fb3557eaa6 Mon Sep 17 00:00:00 2001 From: Csok Attila Date: Mon, 13 Jan 2025 17:26:46 +0000 Subject: [PATCH 5/9] [intel-npu] Adding internal helper function to compare for compiler-in-use's version; adding macro to encode compiler versions --- .../intel_npu/src/al/include/intel_npu/icompiler.hpp | 5 +++++ src/plugins/intel_npu/src/plugin/include/plugin.hpp | 2 ++ src/plugins/intel_npu/src/plugin/src/plugin.cpp | 9 +++++++++ 3 files changed, 16 insertions(+) diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/icompiler.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/icompiler.hpp index 5ed60641b8fa1e..5751eec3c544ae 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/icompiler.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/icompiler.hpp @@ -12,6 +12,11 @@ namespace intel_npu { +#ifndef ICOMPILER_MAKE_VERSION +/// @brief Generates npu compiler (generic 'oneAPI') API version number +# define ICOMPILER_MAKE_VERSION(_major, _minor) ((_major << 16) | (_minor & 0x0000ffff)) +#endif // ICOMPILER_MAKE_VERSION + /** * @struct NetworkDescription * @brief The object returned by the compiler diff --git a/src/plugins/intel_npu/src/plugin/include/plugin.hpp b/src/plugins/intel_npu/src/plugin/include/plugin.hpp index ec78ab223d3f35..eeefcb9f5b4e1a 100644 --- a/src/plugins/intel_npu/src/plugin/include/plugin.hpp +++ b/src/plugins/intel_npu/src/plugin/include/plugin.hpp @@ -65,6 +65,8 @@ class Plugin : public ov::IPlugin { std::vector _supportedProperties; static std::atomic _compiledModelLoadCounter; + + bool min_compiler_requirement(uint32_t compiler_version_requirement); }; } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index e7f6453ec92d1c..9c69a1826c5faf 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -855,6 +855,15 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& return supportedOpsMap; } +bool Plugin::min_compiler_requirement(uint32_t compiler_version_requirement) { + /// Internal helper function to check if compiler_version_requirement param >= actual compiler version in use + /// create dummy compiler of from config + CompilerAdapterFactory compilerAdapterFactory; + auto dummyCompiler = compilerAdapterFactory.getCompiler(_backends->getIEngineBackend(), _globalConfig); + uint32_t compiler_version_in_use = dummyCompiler->get_version(); + return (compiler_version_in_use >= compiler_version_requirement); +} + std::atomic Plugin::_compiledModelLoadCounter{1}; static const ov::Version version = {CI_BUILD_NUMBER, NPU_PLUGIN_LIB_NAME}; From 51e95c396b8a1249157ead9ee318713897943097 Mon Sep 17 00:00:00 2001 From: Csok Attila Date: Mon, 13 Jan 2025 17:27:38 +0000 Subject: [PATCH 6/9] [intel-npu] Condition NPU_COMPILER_DYNAMIC_QUANTIZATION to compiler version >= 6.4 --- src/plugins/intel_npu/src/plugin/src/plugin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 9c69a1826c5faf..0b99fa2d2a3358 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -451,7 +451,7 @@ Plugin::Plugin() return config.get(); }}}, {ov::intel_npu::compiler_dynamic_quantization.name(), - {true, + {min_compiler_requirement(ICOMPILER_MAKE_VERSION(6,4)), ov::PropertyMutability::RW, [](const Config& config) { return config.get(); From a4c9dceed3cf9d66fcceba3bdd45fe5912ff77cc Mon Sep 17 00:00:00 2001 From: csoka Date: Wed, 15 Jan 2025 11:24:05 +0200 Subject: [PATCH 7/9] [intel-npu] DQ available from compiler v7.1 --- .../src/compiler_adapter/src/driver_compiler_adapter.cpp | 4 ++-- src/plugins/intel_npu/src/plugin/src/plugin.cpp | 9 ++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp index aa99cb7f6beb8a..d7c4def10c8c93 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp @@ -542,8 +542,8 @@ std::string DriverCompilerAdapter::serializeConfig(const Config& config, content = std::regex_replace(content, std::regex(batchstr.str()), ""); } - // COMPILER_DYNAMIC_QUANTIZATION is not supported in versions < 6.4 - need to remove it - if ((compilerVersion.major < 6) || (compilerVersion.major == 6 && compilerVersion.minor < 4)) { + // COMPILER_DYNAMIC_QUANTIZATION is not supported in versions < 7.1 - need to remove it + if ((compilerVersion.major < 7) || (compilerVersion.major == 7 && compilerVersion.minor < 1)) { std::ostringstream dqstr; dqstr << ov::intel_npu::compiler_dynamic_quantization.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER << "\\S+" << VALUE_DELIMITER; diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 0b99fa2d2a3358..0755c0220d92d9 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -7,11 +7,8 @@ #include #include "compiled_model.hpp" -#include "npuw/compiled_model.hpp" -#include "npuw/llm_compiled_model.hpp" -#include "npuw/serialization.hpp" -#include "driver_compiler_adapter.hpp" #include "compiler_adapter_factory.hpp" +#include "driver_compiler_adapter.hpp" #include "intel_npu/common/device_helpers.hpp" #include "intel_npu/common/icompiler_adapter.hpp" #include "intel_npu/common/igraph.hpp" @@ -23,6 +20,8 @@ #include "intel_npu/utils/zero/zero_init.hpp" #include "metadata.hpp" #include "npuw/compiled_model.hpp" +#include "npuw/llm_compiled_model.hpp" +#include "npuw/serialization.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/parameter.hpp" #include "openvino/runtime/intel_npu/properties.hpp" @@ -451,7 +450,7 @@ Plugin::Plugin() return config.get(); }}}, {ov::intel_npu::compiler_dynamic_quantization.name(), - {min_compiler_requirement(ICOMPILER_MAKE_VERSION(6,4)), + {min_compiler_requirement(ICOMPILER_MAKE_VERSION(7, 1)), ov::PropertyMutability::RW, [](const Config& config) { return config.get(); From fc10e3a778d5363bf2411c7b0ca9158994ab7293 Mon Sep 17 00:00:00 2001 From: csoka Date: Wed, 15 Jan 2025 19:08:55 +0200 Subject: [PATCH 8/9] [intel-npu] rework compiler version dependent properties --- .../intel_npu/src/plugin/include/plugin.hpp | 8 +-- .../intel_npu/src/plugin/src/plugin.cpp | 52 +++++++++++++++---- 2 files changed, 47 insertions(+), 13 deletions(-) diff --git a/src/plugins/intel_npu/src/plugin/include/plugin.hpp b/src/plugins/intel_npu/src/plugin/include/plugin.hpp index eeefcb9f5b4e1a..b13be5000513ec 100644 --- a/src/plugins/intel_npu/src/plugin/include/plugin.hpp +++ b/src/plugins/intel_npu/src/plugin/include/plugin.hpp @@ -61,12 +61,14 @@ class Plugin : public ov::IPlugin { std::unique_ptr _metrics; // properties map: {name -> [supported, mutable, eval function]} - std::map>> _properties; - std::vector _supportedProperties; + mutable std::map>> + _properties; + mutable std::vector _supportedProperties; static std::atomic _compiledModelLoadCounter; - bool min_compiler_requirement(uint32_t compiler_version_requirement); + void reset_compiler_dependent_properties() const; + void reset_supported_properties() const; }; } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 0755c0220d92d9..a81ed843bca3f4 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -450,7 +450,7 @@ Plugin::Plugin() return config.get(); }}}, {ov::intel_npu::compiler_dynamic_quantization.name(), - {min_compiler_requirement(ICOMPILER_MAKE_VERSION(7, 1)), + {false, ov::PropertyMutability::RW, [](const Config& config) { return config.get(); @@ -571,7 +571,12 @@ Plugin::Plugin() {ov::intel_npu::batch_mode.name(), {false, ov::PropertyMutability::RW, [](const Config& config) { return config.getString(); }}}}; +} +void Plugin::reset_supported_properties() const { + /// reset first + _supportedProperties.clear(); /// Mutable member + /// populate for (auto& property : _properties) { if (std::get<0>(property.second)) { _supportedProperties.emplace_back(ov::PropertyName(property.first, std::get<1>(property.second))); @@ -579,9 +584,27 @@ Plugin::Plugin() } } +void Plugin::reset_compiler_dependent_properties() const { + // get active compiler version + CompilerAdapterFactory compilerAdapterFactory; + auto dummyCompiler = compilerAdapterFactory.getCompiler(_backends->getIEngineBackend(), _globalConfig); + uint32_t active_compiler_version = dummyCompiler->get_version(); + + // NPU_COMPILER_DYNAMIC_QUANTIZATION + // unpublish if compiler version requirement is not met + if (_properties.find(ov::intel_npu::compiler_dynamic_quantization.name()) != _properties.end()) { + if (active_compiler_version >= ICOMPILER_MAKE_VERSION(7, 1)) { + std::get<0>(_properties[ov::intel_npu::compiler_dynamic_quantization.name()]) = true; /// mark supported + } else { + std::get<0>(_properties[ov::intel_npu::compiler_dynamic_quantization.name()]) = false; // mark unsupported + } + } +} + void Plugin::set_property(const ov::AnyMap& properties) { const std::map config = any_copy(properties); update_log_level(config); + bool compiler_type_change = false; for (const auto& configEntry : config) { if (_properties.find(configEntry.first) == _properties.end()) { OPENVINO_THROW("Unsupported configuration key: ", configEntry.first); @@ -589,6 +612,10 @@ void Plugin::set_property(const ov::AnyMap& properties) { if (std::get<1>(_properties[configEntry.first]) == ov::PropertyMutability::RO) { OPENVINO_THROW("READ-ONLY configuration key: ", configEntry.first); } + if (configEntry.first == ov::intel_npu::compiler_type.name()) { + // we just assume its a change, not compare against old value + compiler_type_change = true; + } } } @@ -600,12 +627,26 @@ void Plugin::set_property(const ov::AnyMap& properties) { for (const auto& entry : config) { _config[entry.first] = entry.second; } + + if (compiler_type_change) { + // if compiler type was changed > need to reset properties to match the new compiler + // since properties have changed > need to reset supported_properties as well + reset_compiler_dependent_properties(); + reset_supported_properties(); + } } ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& arguments) const { const std::map& amends = any_copy(arguments); const Config amendedConfig = merge_configs(_globalConfig, amends); + /// Special case for supportedProperties + /// populate it at first get + if (name == ov::supported_properties.name() && _supportedProperties.size() < 1) { + reset_compiler_dependent_properties(); + reset_supported_properties(); + } + auto&& configIterator = _properties.find(name); if (configIterator != _properties.cend()) { return std::get<2>(configIterator->second)(amendedConfig); @@ -854,15 +895,6 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& return supportedOpsMap; } -bool Plugin::min_compiler_requirement(uint32_t compiler_version_requirement) { - /// Internal helper function to check if compiler_version_requirement param >= actual compiler version in use - /// create dummy compiler of from config - CompilerAdapterFactory compilerAdapterFactory; - auto dummyCompiler = compilerAdapterFactory.getCompiler(_backends->getIEngineBackend(), _globalConfig); - uint32_t compiler_version_in_use = dummyCompiler->get_version(); - return (compiler_version_in_use >= compiler_version_requirement); -} - std::atomic Plugin::_compiledModelLoadCounter{1}; static const ov::Version version = {CI_BUILD_NUMBER, NPU_PLUGIN_LIB_NAME}; From 10eeff973288c62970e0b125051f907db54531cd Mon Sep 17 00:00:00 2001 From: csoka Date: Wed, 15 Jan 2025 23:32:49 +0200 Subject: [PATCH 9/9] [intel-npu] removing private property from property tests --- .../functional/internal/overload/compiled_model/property.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/plugins/intel_npu/tests/functional/internal/overload/compiled_model/property.cpp b/src/plugins/intel_npu/tests/functional/internal/overload/compiled_model/property.cpp index 6ecc1886778b9d..6565c9bbb7a9c1 100644 --- a/src/plugins/intel_npu/tests/functional/internal/overload/compiled_model/property.cpp +++ b/src/plugins/intel_npu/tests/functional/internal/overload/compiled_model/property.cpp @@ -123,7 +123,6 @@ std::vector> plugin_public_mutable_properties = std::vector> plugin_internal_mutable_properties = { {ov::intel_npu::compilation_mode_params.name(), ov::Any("use-user-precision=false propagate-quant-dequant=0")}, {ov::intel_npu::dma_engines.name(), ov::Any(1)}, - {ov::intel_npu::compiler_type.name(), ov::Any(ov::intel_npu::CompilerType::MLIR)}, {ov::intel_npu::platform.name(), ov::Any(ov::intel_npu::Platform::AUTO_DETECT)}, {ov::intel_npu::compilation_mode.name(), ov::Any("DefaultHW")}, {ov::intel_npu::max_tiles.name(), ov::Any(8)},