diff --git a/docs/articles_en/assets/snippets/lpt_intel_cpu_plugin.cpp b/docs/articles_en/assets/snippets/lpt_intel_cpu_plugin.cpp index 76e6d60b8e3e90..b76a2595b8fe2f 100644 --- a/docs/articles_en/assets/snippets/lpt_intel_cpu_plugin.cpp +++ b/docs/articles_en/assets/snippets/lpt_intel_cpu_plugin.cpp @@ -18,6 +18,7 @@ namespace device { class ConvertOpSet1ToDeviceSpecific: public ov::pass::ModelPass { public: + OPENVINO_MODEL_PASS_RTTI("ConvertOpSet1ToDeviceSpecific"); bool run_on_model(const std::shared_ptr& f) override { return true; } @@ -96,7 +97,7 @@ if (useLpt) { // Low precision transformations plugin specific configuration: transformation callbacks definition lptManager.get_pass_config()->set_callback([](const std::shared_ptr& node) -> bool { - if (const auto multiply = std::dynamic_pointer_cast(node)) { + if (const auto multiply = ov::as_type_ptr(node)) { return !MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolution(multiply); } return false; diff --git a/docs/articles_en/assets/snippets/ov_model_snippets.cpp b/docs/articles_en/assets/snippets/ov_model_snippets.cpp index 31ba0bc8028edd..b9c9a1155cc019 100644 --- a/docs/articles_en/assets/snippets/ov_model_snippets.cpp +++ b/docs/articles_en/assets/snippets/ov_model_snippets.cpp @@ -217,7 +217,7 @@ return true; // ! [ov:replace_node] bool ov_replace_node(std::shared_ptr node) { // Step 1. Verify that node is of type ov::op::v0::Negative - auto neg = std::dynamic_pointer_cast(node); + auto neg = ov::as_type_ptr(node); if (!neg) { return false; } @@ -238,7 +238,7 @@ bool ov_replace_node(std::shared_ptr node) { // ! [ov:replace_node] bool ov_manual_replace_node(std::shared_ptr node) { -auto neg = std::dynamic_pointer_cast(node); +auto neg = ov::as_type_ptr(node); if (!neg) { return false; } diff --git a/docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp b/docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp index 01170795dbea22..a5271d148190d0 100644 --- a/docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp +++ b/docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp @@ -99,7 +99,7 @@ void replace_non_reshapable_const() { for (const auto& node : model->get_ops()) { // Trying to find the problematic Constant by name. if (node->get_friendly_name() == "name_of_non_reshapable_const") { - auto const_with_hardcoded_shape = std::dynamic_pointer_cast(node); + auto const_with_hardcoded_shape = ov::as_type_ptr(node); // Replacing the problematic Constant with a new one. Do this for all the problematic Constants in the network, then // you can apply the reshape feature. ov::replace_node(const_with_hardcoded_shape, new_const); diff --git a/docs/articles_en/assets/snippets/template_model_transformation.hpp b/docs/articles_en/assets/snippets/template_model_transformation.hpp index de615f54acf06c..9eab5e3ac6ff94 100644 --- a/docs/articles_en/assets/snippets/template_model_transformation.hpp +++ b/docs/articles_en/assets/snippets/template_model_transformation.hpp @@ -18,7 +18,7 @@ class MyModelTransformation; // template_model_transformation.hpp class ov::pass::MyModelTransformation : public ov::pass::ModelPass { public: - OPENVINO_RTTI("MyModelTransformation", "0"); + OPENVINO_MODEL_PASS_RTTI("MyModelTransformation"); bool run_on_model(const std::shared_ptr& f) override; }; // ! [model_pass:template_transformation_hpp] diff --git a/docs/articles_en/assets/snippets/template_pattern_transformation.cpp b/docs/articles_en/assets/snippets/template_pattern_transformation.cpp index 408f7f72d94009..e7c6d7889e826e 100644 --- a/docs/articles_en/assets/snippets/template_pattern_transformation.cpp +++ b/docs/articles_en/assets/snippets/template_pattern_transformation.cpp @@ -23,7 +23,7 @@ ov::pass::DecomposeDivideMatcher::DecomposeDivideMatcher() { auto div = std::make_shared(input0, input1); ov::matcher_pass_callback callback = [](pattern::Matcher& m) { - auto div = std::dynamic_pointer_cast(m.get_match_root()); + auto div = ov::as_type_ptr(m.get_match_root()); // We can not apply this transformation in case with integer input data type if (!div || div->input(0).get_element_type().is_integral()) { return false; diff --git a/docs/articles_en/assets/snippets/template_pattern_transformation.hpp b/docs/articles_en/assets/snippets/template_pattern_transformation.hpp index e6e1fd27146363..2ec754c6161c3d 100644 --- a/docs/articles_en/assets/snippets/template_pattern_transformation.hpp +++ b/docs/articles_en/assets/snippets/template_pattern_transformation.hpp @@ -23,13 +23,13 @@ class ReluReluFusionMatcher; */ class ov::pass::DecomposeDivideMatcher : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("DecomposeDivideMatcher", "0"); + OPENVINO_MATCHER_PASS_RTTI("DecomposeDivideMatcher"); DecomposeDivideMatcher(); }; // ! [graph_rewrite:template_transformation_hpp] class ov::pass::ReluReluFusionMatcher : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("ReluReluFusionMatcher", "0"); + OPENVINO_MATCHER_PASS_RTTI("ReluReluFusionMatcher"); ReluReluFusionMatcher(); }; diff --git a/src/bindings/python/src/pyopenvino/core/common.cpp b/src/bindings/python/src/pyopenvino/core/common.cpp index 0f7ac686866b3b..e98d4398cf2b8c 100644 --- a/src/bindings/python/src/pyopenvino/core/common.cpp +++ b/src/bindings/python/src/pyopenvino/core/common.cpp @@ -118,35 +118,48 @@ const TensorIndexMap cast_to_tensor_index_map(const py::dict& inputs) { namespace string_helpers { +namespace { +const char* find_last_not_null(const char* str, size_t length) { + return std::find_if(std::make_reverse_iterator(str + length), + std::make_reverse_iterator(str), + [](const auto& c) { + return c != '\0'; + }) + .base(); +} +} // namespace + py::array bytes_array_from_tensor(ov::Tensor&& t) { if (t.get_element_type() != ov::element::string) { OPENVINO_THROW("Tensor's type must be a string!"); } auto data = t.data(); + auto max_element = std::max_element(data, data + t.get_size(), [](const std::string& x, const std::string& y) { return x.length() < y.length(); }); auto max_stride = max_element->length(); auto dtype = py::dtype("|S" + std::to_string(max_stride)); + // Adjusting strides to follow the numpy convention: - py::array array; - auto new_strides = t.get_strides(); - if (new_strides.size() == 0) { - array = py::array(dtype, t.get_shape(), {}); - } else { - auto element_stride = new_strides[new_strides.size() - 1]; - for (size_t i = 0; i < new_strides.size(); ++i) { - new_strides[i] = (new_strides[i] / element_stride) * max_stride; + const auto py_array_strides = [&t, &max_stride] { + auto new_strides = t.get_strides(); + if (!new_strides.empty()) { + const auto& element_stride = new_strides.back(); + for (auto&& stride : new_strides) { + stride = (stride / element_stride) * max_stride; + } } - array = py::array(dtype, t.get_shape(), new_strides); - } + return new_strides; + }; + // Create an empty array and populate it with utf-8 encoded strings: - auto ptr = array.data(); + auto array = py::array(dtype, t.get_shape(), py_array_strides()); + auto ptr = reinterpret_cast(array.mutable_data()); for (size_t i = 0; i < t.get_size(); ++i) { - auto start = &data[i][0]; - auto length = data[i].length(); - auto end = std::copy(start, start + length, (char*)ptr + i * max_stride); - std::fill_n(end, max_stride - length, 0); + const auto length = data[i].length(); + ptr = std::copy_n(data[i].begin(), length, ptr); + ptr = std::fill_n(ptr, max_stride - length, '\0'); } return array; } @@ -169,14 +182,6 @@ py::array string_array_from_tensor(ov::Tensor&& t) { return array; } -static const char* find_first_not_null(const char* ptr, size_t itemsize) { - auto rbegin = std::make_reverse_iterator(ptr + itemsize); - auto first_not_null = std::find_if(rbegin, std::make_reverse_iterator(ptr), [](const auto& c) { - return c != '\0'; - }); - return first_not_null.base(); -} - void fill_tensor_from_bytes(ov::Tensor& tensor, py::array& array) { if (tensor.get_size() != static_cast(array.size())) { OPENVINO_THROW("Passed array must have the same size (number of elements) as the Tensor!"); @@ -185,7 +190,7 @@ void fill_tensor_from_bytes(ov::Tensor& tensor, py::array& array) { auto data = tensor.data(); for (size_t i = 0; i < tensor.get_size(); ++i) { const char* ptr = reinterpret_cast(buf.ptr) + (i * buf.itemsize); - auto first_not_null = find_first_not_null(ptr, buf.itemsize); + auto first_not_null = find_last_not_null(ptr, buf.itemsize); data[i] = std::string(ptr, first_not_null); } } @@ -194,18 +199,20 @@ void fill_tensor_from_strings(ov::Tensor& tensor, py::array& array) { if (tensor.get_size() != static_cast(array.size())) { OPENVINO_THROW("Passed array must have the same size (number of elements) as the Tensor!"); } - py::buffer_info buf = array.request(); + + const py::buffer_info buf = array.request(); auto data = tensor.data(); - for (size_t i = 0; i < tensor.get_size(); ++i) { - char* ptr = reinterpret_cast(buf.ptr) + (i * buf.itemsize); + + for (auto a_first = reinterpret_cast(buf.ptr), a_last = a_first + array.nbytes(); a_first < a_last; + a_first += array.itemsize(), ++data) { // TODO: check other unicode kinds? 2BYTE and 1BYTE? - PyObject* _unicode_obj = - PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, reinterpret_cast(ptr), buf.itemsize / 4); - PyObject* _utf8_obj = PyUnicode_AsUTF8String(_unicode_obj); - const char* _tmp_str = PyBytes_AsString(_utf8_obj); - data[i] = std::string(_tmp_str); + auto _unicode_obj = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, a_first, array.itemsize() / 4); + + Py_ssize_t utf8_size = 0; + const auto utf8_str = PyUnicode_AsUTF8AndSize(_unicode_obj, &utf8_size); + + *data = std::string(utf8_str, find_last_not_null(utf8_str, utf8_size)); Py_XDECREF(_unicode_obj); - Py_XDECREF(_utf8_obj); } } diff --git a/src/bindings/python/tests/test_runtime/test_tensor_string.py b/src/bindings/python/tests/test_runtime/test_tensor_string.py index f123f66a35987e..168a649b573ea5 100644 --- a/src/bindings/python/tests/test_runtime/test_tensor_string.py +++ b/src/bindings/python/tests/test_runtime/test_tensor_string.py @@ -75,9 +75,15 @@ def test_empty_string_tensor(init_type): (["text", "abc", "openvino"]), (["text", "больше текста", "jeszcze więcej słów", "효과가 있었어"]), ([["text"], ["abc"], ["openvino"]]), - ([["jeszcze więcej słów", "효과가 있었어"]]), - ], -) + ([["text"]]), + (["tex\u0000t\u0000tt"]), + ([["abĆ"]]), + ([["tex\u0000tttt"], ["abĆ"]]), + ([["jeszcze więcej słówe"], [u"효#과가 있었어"]]), + ([["jeszcze\u0000 więcej słówekó"]]), + ([["효과가 있었어"]]), + (["ab\u0000Ć"]), + ]) def test_init_with_list(string_data): tensor = ov.Tensor(string_data) assert tensor.element_type == ov.Type.string @@ -90,6 +96,25 @@ def test_init_with_list(string_data): check_string_based(tensor, _string_data) +def test_init_with_list_rare_real_scenario(): + input_data = ["tex\u0000\u0000ttt\u0000\u0000", "ab\u0000Ć"] + tensor = ov.Tensor(input_data) + assert tensor.element_type == ov.Type.string + # Convert to numpy to perform all checks. Memory is not shared, + np_string_data = np.array(input_data) + # Encoded: + check_bytes_based(tensor, np_string_data) + # Decoded: + str_tensor_data = tensor.str_data + assert str_tensor_data.shape == np_string_data.shape + # case when OV is not aligned with numpy format + # strides are different as trailing null characters are not stored in the tensor + # is rare to have any use of trailing null character in the string + assert str_tensor_data.strides != np_string_data.strides + assert np.array_equal(str_tensor_data, np_string_data) + assert not (np.shares_memory(str_tensor_data, np_string_data)) + + @pytest.mark.parametrize( ("string_data"), [ diff --git a/src/common/snippets/include/snippets/op/serialization_node.hpp b/src/common/snippets/include/snippets/op/serialization_node.hpp index 878c24bcf3f765..753e4c25e31b4d 100644 --- a/src/common/snippets/include/snippets/op/serialization_node.hpp +++ b/src/common/snippets/include/snippets/op/serialization_node.hpp @@ -19,6 +19,8 @@ namespace op { */ class SerializationNode : public ov::op::Op { public: + OPENVINO_OP("SerializationNode", "SnippetsOpset"); + enum SerializationMode { DATA_FLOW, CONTROL_FLOW }; SerializationNode() = default; SerializationNode(const ov::OutputVector& args, @@ -29,15 +31,6 @@ class SerializationNode : public ov::op::Op { std::shared_ptr clone_with_new_inputs(const OutputVector &new_args) const override; bool visit_attributes(AttributeVisitor &visitor) override; - _OPENVINO_HIDDEN_METHOD static const DiscreteTypeInfo& get_type_info_static() { - static ::ov::DiscreteTypeInfo type_info_static{"SerializationNode", "SnippetsOpset"}; - return type_info_static; - } - - const ::ov::DiscreteTypeInfo& get_type_info() const override { - return m_expr->get_node()->get_type_info(); - } - private: std::shared_ptr m_expr; SerializationMode m_mode; diff --git a/src/common/transformations/src/transformations/sdpa_to_paged_attention/state_management_pattern.cpp b/src/common/transformations/src/transformations/sdpa_to_paged_attention/state_management_pattern.cpp index 9b5bd0600cbf0c..f282baf355d06e 100644 --- a/src/common/transformations/src/transformations/sdpa_to_paged_attention/state_management_pattern.cpp +++ b/src/common/transformations/src/transformations/sdpa_to_paged_attention/state_management_pattern.cpp @@ -20,8 +20,10 @@ #include "openvino/op/scaled_dot_product_attention.hpp" #include "openvino/op/select.hpp" #include "openvino/op/shape_of.hpp" +#include "openvino/op/slice.hpp" #include "openvino/op/sqrt.hpp" #include "openvino/op/strided_slice.hpp" +#include "openvino/op/subtract.hpp" #include "openvino/op/transpose.hpp" #include "openvino/op/unsqueeze.hpp" #include "openvino/op/variadic_split.hpp" @@ -33,6 +35,143 @@ using namespace ov::op; using namespace ov::pass; using ov::OutputVector; +static std::tuple, std::shared_ptr> general_alibi_pattern() { + // Optional pattern to capture alibi slopes (based on pattern from bloom) + auto general_alibi = pattern::any_input(); + auto general_sdpa_mask = + pattern::wrap_type({pattern::any_input(), general_alibi}); // apply input position_ids + general_sdpa_mask = pattern::wrap_type({general_sdpa_mask, pattern::any_input()}); + general_sdpa_mask = pattern::wrap_type({general_sdpa_mask, pattern::any_input()}); + general_sdpa_mask = pattern::wrap_type({pattern::any_input(), pattern::any_input(), general_sdpa_mask}); + return {general_alibi, general_sdpa_mask}; +} + +static std::tuple, std::shared_ptr> jais_13b_alibi_pattern() { + auto jais_13b_alibi = pattern::any_input(); + auto mirroring_abs = pattern::wrap_type({pattern::any_input()}); + auto unsqueeze = pattern::wrap_type({mirroring_abs, pattern::any_input()}); + auto jais_alibi_mask = pattern::wrap_type({jais_13b_alibi, unsqueeze}); + jais_alibi_mask = pattern::wrap_type({jais_alibi_mask, pattern::any_input()}); + jais_alibi_mask = pattern::wrap_type({jais_alibi_mask, pattern::any_input()}); + jais_alibi_mask = pattern::wrap_type({pattern::any_input(), jais_alibi_mask}); + return {jais_13b_alibi, jais_alibi_mask}; +} + +static std::tuple, std::shared_ptr> baichuan2_13b_alibi_pattern() { + auto baichuan2_alibi = pattern::any_input(); + // this slice expected to be replaced with Slice(alibi_const, start {1, 1}, stop {2, 2}, step {1, 1}, axes{1, 2}); + auto alibi_slice_to_replace = pattern::wrap_type( + {baichuan2_alibi, pattern::any_input(), pattern::any_input(), pattern::any_input(), pattern::any_input()}); + auto alibi_path = pattern::wrap_type({alibi_slice_to_replace}); + alibi_path = pattern::wrap_type({alibi_path, pattern::any_input(), pattern::any_input()}); + alibi_path = pattern::wrap_type({pattern::any_input(), pattern::any_input(), alibi_path}); + alibi_path = pattern::wrap_type({pattern::any_input(), alibi_path}); + alibi_path = pattern::wrap_type({alibi_path}); + alibi_path = pattern::wrap_type({alibi_path, pattern::any_input()}); + alibi_path = pattern::wrap_type({pattern::any_input(), alibi_path}); + alibi_path = pattern::wrap_type({pattern::any_input(), pattern::any_input(), alibi_path}); + auto alibi_unsqueeze = pattern::wrap_type({alibi_slice_to_replace, pattern::any_input()}); + alibi_path = pattern::wrap_type({alibi_path, alibi_unsqueeze}); + auto mul = pattern::wrap_type({pattern::any_input(), pattern::any_input()}); + alibi_path = pattern::wrap_type( + {alibi_path, mul, pattern::any_input(), pattern::any_input(), pattern::any_input()}); + return {baichuan2_alibi, alibi_path}; +} + +static std::shared_ptr handle_general_alibi(const std::shared_ptr& matched_general_alibi_slopes) { + std::shared_ptr res_alibi_slopes = + std::make_shared(matched_general_alibi_slopes, + v0::Constant::create(ov::element::i64, ov::Shape{1}, {-1}), + false); + if (res_alibi_slopes->get_element_type() != ov::element::f32) { + res_alibi_slopes = std::make_shared(res_alibi_slopes, ov::element::f32); + } + + return res_alibi_slopes; +} + +static std::shared_ptr handle_jais_13b_alibi(const std::shared_ptr& matched_jais_13b_alibi_slopes) { + // At the beginning, handling of jais13's alibi is the same as the general case + std::shared_ptr res_alibi_slopes = handle_general_alibi(matched_jais_13b_alibi_slopes); + + // For now there's no such case with Alibi slopes being not a Constant, + // however that may change in the future. That is why the presence of + // Abs is the main sign of the Jais-like topology, thus we need to multiply + // by -1. If we encounter the Alibi being a constant, we may do the additional + // checking of the values to be negative and, if it fails, we won't multiply + // the values by -1. + if (auto alibi_constant = ov::as_type_ptr(matched_jais_13b_alibi_slopes)) { + auto alibi_constant_values = alibi_constant->cast_vector(); + bool all_values_nagative = + std::all_of(alibi_constant_values.begin(), alibi_constant_values.end(), [&](float value) { + return value < 0.0; + }); + + if (all_values_nagative) { + res_alibi_slopes = + std::make_shared(res_alibi_slopes, + v0::Constant::create(res_alibi_slopes->get_element_type(), {}, {-1})); + } + } else { + res_alibi_slopes = + std::make_shared(res_alibi_slopes, + v0::Constant::create(res_alibi_slopes->get_element_type(), {}, {-1})); + } + + return res_alibi_slopes; +} + +static std::shared_ptr handle_baichuan2_13b_alibi( + /* >>> alibi = np.reshape(alibi, (40, 4096, 4096)) + >>> print(alibi[0][:][:]) + [['0' '-inf' '-inf' ... '-inf' '-inf' '-inf'] + ['0' '0.839844' '-inf' ... '-inf' '-inf' '-inf'] + ['0' '0.839844' '1.67969' ... '-inf' '-inf' '-inf'] + ... + ['0' '0.839844' '1.67969' ... '3440' '-inf' '-inf'] + ['0' '0.839844' '1.67969' ... '3440' '3440' '-inf'] + ['0' '0.839844' '1.67969' ... '3440' '3440' '3440']] + >>> print(alibi[1][:][:]) + [['0' '-inf' '-inf' ... '-inf' '-inf' '-inf'] + ['0' '0.707031' '-inf' ... '-inf' '-inf' '-inf'] + ['0' '0.707031' '1.41406' ... '-inf' '-inf' '-inf'] + ... + ['0' '0.707031' '1.41406' ... '2896' '-inf' '-inf'] + ['0' '0.707031' '1.41406' ... '2896' '2896' '-inf'] + ['0' '0.707031' '1.41406' ... '2896' '2896' '2896']] + + etc. + + Slicing from {1, 1} to {2, 2} gives us the expected alibi slope constant to pass it to PagedAttention: + >>> print(alibi[0][1][1]) + 0.839844 + >>> print(line1[1][1][1]) + 0.707031 + + ALibi slopes constant's shape is [40, 4096, 4096] + Slicing means that we take only 1 value from each 4096 x 4096 matrix here + The resulting constant will be [40, 1, 1] + After that we need to insert Reshape to get the expected rank = 1 (shape [40]) + */ + const std::shared_ptr& matched_baichuan2_13b_alibi_slopes) { + std::shared_ptr res_alibi_slopes = matched_baichuan2_13b_alibi_slopes; + + auto start = v0::Constant::create(ov::element::i64, ov::Shape{2}, {1, 1}); + auto stop = v0::Constant::create(ov::element::i64, ov::Shape{2}, {2, 2}); + auto step = v0::Constant::create(ov::element::i64, ov::Shape{2}, {1, 1}); + auto axes = v0::Constant::create(ov::element::i64, ov::Shape{2}, {1, 2}); + // the Slice to extract the correct values + res_alibi_slopes = std::make_shared(res_alibi_slopes, start, stop, step, axes); + res_alibi_slopes = std::make_shared(res_alibi_slopes, + v0::Constant::create(ov::element::i64, ov::Shape{1}, {-1}), + false); + if (res_alibi_slopes->get_element_type() != ov::element::f32) { + res_alibi_slopes = std::make_shared(res_alibi_slopes, ov::element::f32); + } + + return res_alibi_slopes; +} + // Exactly copied the function from another file. Maybe should be moved to some general file static std::shared_ptr setName(std::shared_ptr node, const std::string& name) { // Set name for both node and output tensor (should be only one tensor, and any other names will be overriden by a @@ -146,19 +285,16 @@ ov::pass::StateManagementPattern::StateManagementPattern(ParameterVector& kv_par {std::make_shared(OutputVector{v_concat, v_shaped}), v_order}); // Optional pattern to capture alibi slopes (based on pattern from bloom) - auto alibi = pattern::any_input(); - auto sdpa_mask = pattern::wrap_type({pattern::any_input(), alibi}); // apply input position_ids - sdpa_mask = pattern::wrap_type({sdpa_mask, pattern::any_input()}); - sdpa_mask = pattern::wrap_type({sdpa_mask, pattern::any_input()}); - sdpa_mask = pattern::wrap_type({pattern::any_input(), pattern::any_input(), sdpa_mask}); + std::shared_ptr general_alibi, general_alibi_mask; + std::tie(general_alibi, general_alibi_mask) = general_alibi_pattern(); // For Jais (Jais-13b has a different pattern and handling of alibi slopes) - auto mirroring_abs = pattern::wrap_type({pattern::any_input()}); - auto unsqueeze = pattern::wrap_type({mirroring_abs, pattern::any_input()}); - auto alibi_mask = pattern::wrap_type({alibi, unsqueeze}); - alibi_mask = pattern::wrap_type({alibi_mask, pattern::any_input()}); - alibi_mask = pattern::wrap_type({alibi_mask, pattern::any_input()}); - alibi_mask = pattern::wrap_type({pattern::any_input(), alibi_mask}); + std::shared_ptr jais_13b_alibi, jais_alibi_mask; + std::tie(jais_13b_alibi, jais_alibi_mask) = jais_13b_alibi_pattern(); + + // Baichuan2 13b case + std::shared_ptr baichuan2_13b_alibi, baichuan2_13b_alibi_mask; + std::tie(baichuan2_13b_alibi, baichuan2_13b_alibi_mask) = baichuan2_13b_alibi_pattern(); auto q = pattern::any_input(); auto scale_input = pattern::any_input(); @@ -167,7 +303,8 @@ ov::pass::StateManagementPattern::StateManagementPattern(ParameterVector& kv_par std::make_shared(OutputVector{k_concat, k_shaped, k_shaped_transposed, k_simply_shaped}); auto v_to_sdpa = std::make_shared(OutputVector{v_concat, v_shaped, v_shaped_transposed, v_simply_shaped}); - auto mask_to_sdpa = std::make_shared(OutputVector{sdpa_mask, alibi_mask, pattern::any_input()}); + auto mask_to_sdpa = std::make_shared( + OutputVector{general_alibi_mask, jais_alibi_mask, baichuan2_13b_alibi_mask, pattern::any_input()}); auto sdpa_with_4_inputs = pattern::wrap_type({q, k_to_sdpa, v_to_sdpa, mask_to_sdpa}); @@ -342,41 +479,12 @@ ov::pass::StateManagementPattern::StateManagementPattern(ParameterVector& kv_par } std::shared_ptr alibi_slopes; - if (pattern_map.find(alibi) != pattern_map.end()) { - alibi_slopes = std::make_shared(pattern_map.at(alibi), - v0::Constant::create(element::i64, Shape{1}, {-1}), - false); - if (alibi_slopes->get_element_type() == element::f32) { - alibi_slopes = std::make_shared(alibi_slopes, element::f32); - } - - // Jais-13b case - if (pattern_map.find(mirroring_abs) != pattern_map.end()) { - // For now there's no such case with Alibi slopes being not a Constant, - // however that may change in the future. That is why the presence of - // Abs is the main sign of the Jais-like topology, thus we need to multiply - // by -1. If we encounter the Alibi being a constant, we may do the additional - // checking of the values to be negative and, if it fails, we won't multiply - // the values by -1. - if (auto alibi_constant = ov::as_type_ptr(pattern_map.at(alibi).get_node_shared_ptr())) { - auto alibi_constant_values = alibi_constant->cast_vector(); - bool all_values_nagative = - std::all_of(alibi_constant_values.begin(), alibi_constant_values.end(), [&](float value) { - return value < 0.0; - }); - - if (all_values_nagative) { - alibi_slopes = std::make_shared( - alibi_slopes, - v0::Constant::create(alibi_slopes->get_element_type(), {}, {-1})); - } - } else { - alibi_slopes = std::make_shared( - alibi_slopes, - v0::Constant::create(alibi_slopes->get_element_type(), {}, {-1})); - } - } - + if (pattern_map.find(general_alibi) != pattern_map.end()) { + alibi_slopes = handle_general_alibi(pattern_map.at(general_alibi).get_node_shared_ptr()); + } else if (pattern_map.find(jais_13b_alibi) != pattern_map.end()) { + alibi_slopes = handle_jais_13b_alibi(pattern_map.at(jais_13b_alibi).get_node_shared_ptr()); + } else if (pattern_map.find(baichuan2_13b_alibi) != pattern_map.end()) { + alibi_slopes = handle_baichuan2_13b_alibi(pattern_map.at(baichuan2_13b_alibi).get_node_shared_ptr()); } else { alibi_slopes = v0::Constant::create(element::f32, Shape{0}, {}); } diff --git a/src/common/transformations/tests/op_conversions/sdpa_to_paged_attention_test.cpp b/src/common/transformations/tests/op_conversions/sdpa_to_paged_attention_test.cpp index d4dca147b31b3b..c703b84429805a 100644 --- a/src/common/transformations/tests/op_conversions/sdpa_to_paged_attention_test.cpp +++ b/src/common/transformations/tests/op_conversions/sdpa_to_paged_attention_test.cpp @@ -29,7 +29,9 @@ #include "openvino/op/subtract.hpp" #include "openvino/op/transpose.hpp" #include "openvino/op/unsqueeze.hpp" +#include "openvino/pass/visualize_tree.hpp" #include "transformations/sdpa_to_paged_attention/prev_sequence_length_pattern.hpp" +#include "transformations/sdpa_to_paged_attention/state_management_pattern.hpp" #include "transformations/sdpa_to_paged_attention/total_sequence_length_pattern.hpp" #include "transformations/utils/gen_pattern.hpp" #include "transformations/utils/print_model.hpp" @@ -616,3 +618,267 @@ TEST_F(TransformationTestsF, SDPAToPA_TotalSequenceLengthPatternQwen) { disable_result_friendly_names_check(); disable_rt_info_check(); } + +static std::shared_ptr make_param(const PartialShape& pshape, + element::Type element_type, + const std::string& name) { + auto param = makeOP({}, {{"shape", pshape}, {"element_type", element_type}}); + param->set_friendly_name(name); + param->get_output_tensor(0).set_names({name}); + return param; +} + +// TODO: split the models in blocks the way it's done for Qwen and make the code not to be such a clutter +// TODO: write a test for StateManagementPattern only (because changes for Alibi are inside it) +// TODO: align precisions, check the copying of "fuse_names" attr in SDPAToPagedAttention +// checking the graph structure and names, other checks are temporarily disabled: +TEST_F(TransformationTestsF, SDPAToPA_Baichuan2_13b_general_test) { + { + auto beam_idx = make_param(PartialShape{DYN}, element::i32, "beam_idx"); + auto position_ids = make_param(PartialShape{DYN, DYN}, element::i64, "position_ids"); + auto attention_mask = make_param(PartialShape{DYN, DYN}, element::i64, "attention_mask"); + auto input_ids = make_param(PartialShape{DYN, DYN}, element::i64, "input_ids"); + + // gen_embeddings() { + auto ShapeOf5 = makeOP({beam_idx}, {{"output_type", "i64"}}); + auto Gather8 = makeOP({ShapeOf5, {0ll}, 0ll}, {{"batch_dims", 0}}); + auto Concat12 = makeOP({Gather8, {40ll}, {0ll}, {128ll}}, {{"axis", 0}}); + auto Broadcast13 = makeOP({0.0f, Concat12}, {{"mode", "numpy"}}); + auto Constant18 = makeConst(element::u8, ov::Shape({125696, 5120}), MOCK_VALUE); + auto Convert19 = makeOP({Constant18}, {{"destination_type", "f16"}}); + auto Constant20 = makeConst(element::u8, ov::Shape({125696, 1}), MOCK_VALUE); + auto Convert21 = makeOP({Constant20}, {{"destination_type", "f16"}}); + auto Subtract22 = makeOP({Convert19, Convert21}, {{"auto_broadcast", "numpy"}}); + auto Constant23 = makeConst(element::f16, ov::Shape({125696, 1}), MOCK_VALUE); + auto Multiply24 = makeOP({Subtract22, Constant23}, {{"auto_broadcast", "numpy"}}); + auto Convert25 = makeOP({Multiply24}, {{"destination_type", "f32"}}); + auto Convert26 = makeOP({input_ids}, {{"destination_type", "i32"}}); + auto Gather28 = makeOP({Convert25, Convert26, 0}, {{"batch_dims", 0}}); + //} + + auto Constant29 = makeConst(element::f32, ov::Shape({1, 1, 5120}), MOCK_VALUE); + auto Constant30 = makeConst(element::f32, ov::Shape({1, 1, 1}), {1.0f}); + auto Constant31 = makeConst(element::f32, ov::Shape({1, 1, 1}), {2.0f}); + auto Power32 = makeOP({Gather28, Constant31}, {{"auto_broadcast", "numpy"}}); + auto ReduceMean34 = makeOP({Power32, {-1}}, {{"keep_dims", true}}); + auto Constant35 = makeConst(element::f32, ov::Shape({1, 1, 1}), {0.000001f}); + auto Add36 = makeOP({ReduceMean34, Constant35}, {{"auto_broadcast", "numpy"}}); + auto Sqrt37 = makeOP({Add36}); + auto Divide38 = + makeOP({Constant30, Sqrt37}, {{"auto_broadcast", "numpy"}, {"m_pythondiv", true}}); + auto Multiply39 = makeOP({Gather28, Divide38}, {{"auto_broadcast", "numpy"}}); + auto Multiply40 = makeOP({Constant29, Multiply39}, {{"auto_broadcast", "numpy"}}); + + // gen_attention_weights() { + auto Constant41 = makeConst(element::u8, ov::Shape({15360, 5120}), MOCK_VALUE); + auto Convert42 = makeOP({Constant41}, {{"destination_type", "f16"}}); + auto Constant43 = makeConst(element::u8, ov::Shape({15360, 1}), MOCK_VALUE); + auto Convert44 = makeOP({Constant43}, {{"destination_type", "f16"}}); + auto Subtract45 = makeOP({Convert42, Convert44}, {{"auto_broadcast", "numpy"}}); + auto Constant46 = makeConst(element::f16, ov::Shape({15360, 1}), MOCK_VALUE); + auto Multiply47 = makeOP({Subtract45, Constant46}, {{"auto_broadcast", "numpy"}}); + auto Convert48 = makeOP({Multiply47}, {{"destination_type", "f32"}}); + //} + + auto MatMul49 = + makeOP({Multiply40, Convert48}, {{"transpose_a", false}, {"transpose_b", true}}); + auto Reshape51 = makeOP({MatMul49, {0, 0, 3, 5120}}, {{"special_zero", true}}); + auto Unsqueeze53 = makeOP({Reshape51, 0}); + auto Squeeze55 = makeOP({Unsqueeze53, {0}}); + auto Transpose57 = makeOP({Squeeze55, {2, 0, 1, 3}}); + + // Q + auto Gather58 = makeOP({Transpose57, 0, 0}, {{"batch_dims", 0}}); + auto Reshape60 = makeOP({Gather58, {0, 0, 40, 128}}, {{"special_zero", true}}); + auto Transpose62 = makeOP({Reshape60, {0, 2, 1, 3}}); + + auto ReadValue63 = makeOP({Broadcast13}, + {{"variable_id", "varid_2"}, + {"variable_type", "f32"}, + {"variable_shape", PartialShape{DYN, 40, DYN, 128}}}); + auto Gather65 = makeOP({ReadValue63, beam_idx, 0}, {{"batch_dims", 0}}); + + // K + auto Gather67 = makeOP({Transpose57, 1, 0}, {{"batch_dims", 0}}); + auto Reshape69 = makeOP({Gather67, {0, 0, 40, 128}}, {{"special_zero", true}}); + auto Transpose71 = makeOP({Reshape69, {0, 2, 1, 3}}); + auto Concat72 = makeOP({Gather65, Transpose71}, {{"axis", 2}}); + + auto ReadValue73 = makeOP({Broadcast13}, + {{"variable_id", "varid_3"}, + {"variable_type", "f32"}, + {"variable_shape", PartialShape{DYN, 40, DYN, 128}}}); + auto Gather75 = makeOP({ReadValue73, beam_idx, 0}, {{"batch_dims", 0}}); + + // V + auto Gather77 = makeOP({Transpose57, 2, 0}, {{"batch_dims", 0}}); + auto Reshape79 = makeOP({Gather77, {0, 0, 40, 128}}, {{"special_zero", true}}); + auto Transpose81 = makeOP({Reshape79, {0, 2, 1, 3}}); + auto Concat82 = makeOP({Gather75, Transpose81}, {{"axis", 2}}); + + auto Constant83 = makeConst(element::f32, ov::Shape({1, 1, 1, 1}), {1.000000f}); + auto Convert85 = makeOP({attention_mask}, {{"destination_type", "f32"}}); + auto Unsqueeze86 = makeOP({Convert85, 2}); + auto Unsqueeze87 = makeOP({Convert85, 1}); + auto Multiply88 = makeOP({Unsqueeze86, Unsqueeze87}, {{"auto_broadcast", "numpy"}}); + auto Constant89 = makeConst(element::f32, ov::Shape({1, 1, 1}), {0.000000f}); + auto Greater90 = makeOP({Multiply88, Constant89}, {{"auto_broadcast", "numpy"}}); + auto ShapeOf91 = makeOP({Greater90}, {{"output_type", "i32"}}); + auto Gather94 = makeOP({ShapeOf91, 1, 0}, {{"batch_dims", 0}}); + auto Range96 = makeOP({0, Gather94, 1}, {{"output_type", "i32"}}); + auto Unsqueeze97 = makeOP({Range96, 0}); + auto Unsqueeze98 = makeOP({Range96, 1}); + auto LessEqual99 = makeOP({Unsqueeze97, Unsqueeze98}, {{"auto_broadcast", "numpy"}}); + auto Constant100 = makeConst(element::boolean, ov::Shape({}), {0}); + auto Select101 = makeOP({LessEqual99, Greater90, Constant100}, {{"auto_broadcast", "numpy"}}); + auto Subtract102 = makeOP({Unsqueeze86, Unsqueeze87}, {{"auto_broadcast", "numpy"}}); + auto Constant103 = makeConst(element::f32, ov::Shape({1, 1, 1}), {0.000000f}); + auto Equal104 = makeOP({Subtract102, Constant103}, {{"auto_broadcast", "numpy"}}); + auto LogicalAnd105 = makeOP({Select101, Equal104}, {{"auto_broadcast", "numpy"}}); + auto Unsqueeze106 = makeOP({LogicalAnd105, 1}); + auto ShapeOf107 = makeOP({MatMul49}, {{"output_type", "i64"}}); + auto Gather110 = makeOP({ShapeOf107, {0}, 0}, {{"batch_dims", 0}}); + auto Constant112 = makeConst(element::f32, + ov::Shape({40, 4096, 4096}), + MOCK_VALUE); // TODO: there can be an error due to fake alibi slopes + auto Gather116 = makeOP({ShapeOf107, {1}, 0}, {{"batch_dims", 0}}); + auto ShapeOf117 = makeOP({Gather65}, {{"output_type", "i64"}}); + auto Gather120 = makeOP({ShapeOf117, {2}, 0}, {{"batch_dims", 0}}); + auto Add121 = makeOP({Gather116, Gather120}, {{"auto_broadcast", "numpy"}}); + auto Broadcast123 = makeOP({Add121, {2}}, {{"mode", "numpy"}}); + auto Slice126 = + makeOP({Constant112, {0, 0}, Broadcast123, {1, 1}, {1, 2}}); // the very slice we insert + auto ShapeOf127 = makeOP({Slice126}, {{"output_type", "i64"}}); + auto Gather130 = makeOP({ShapeOf127, {1, 2}, 0}, {{"batch_dims", 0}}); + auto Concat131 = makeOP({Gather110, {1L}, Gather130}, {{"axis", 0}}); + auto Broadcast132 = makeOP({Unsqueeze106, Concat131}, {{"mode", "bidirectional"}}); + auto Convert133 = makeOP({Broadcast132}, {{"destination_type", "f32"}}); + auto Constant134 = makeConst(element::f32, ov::Shape({1, 1, 1, 1}), {1.000000f}); + auto Multiply135 = makeOP({Convert133, Constant134}, {{"auto_broadcast", "numpy"}}); + auto Subtract136 = makeOP({Constant83, Multiply135}, {{"auto_broadcast", "numpy"}}); + auto Convert137 = makeOP({Subtract136}, {{"destination_type", "boolean"}}); + auto Select139 = makeOP({Convert137, -FLT_MAX, Subtract136}, {{"auto_broadcast", "numpy"}}); + auto Unsqueeze140 = makeOP({Slice126, 0}); + auto Add141 = makeOP({Select139, Unsqueeze140}, {{"auto_broadcast", "numpy"}}); + auto Multiply143 = makeOP({Gather116, {-1l}}, {{"auto_broadcast", "numpy"}}); + auto Slice147 = makeOP({Add141, Multiply143, {LLONG_MAX}, {1}, {2}}); + auto sdpa = + makeOP({Transpose62, Concat72, Concat82, Slice147}, {{"causal", false}}); + + auto res = makeOP({sdpa}); + + ParameterVector params = nodes_to_params({beam_idx, position_ids, attention_mask, input_ids}); + model = std::make_shared(OutputVector{res}, params); + + manager.register_pass(); + } + + { + auto max_context_len = make_param(PartialShape{}, element::i32, "max_context_len"); + auto block_indices_begins = make_param(PartialShape{DYN}, element::i32, "block_indices_begins"); + auto block_indices = make_param(PartialShape{DYN}, element::i32, "block_indices"); + auto subsequence_begins = make_param(PartialShape{DYN}, element::i32, "subsequence_begins"); + auto past_lens = make_param(PartialShape{DYN}, element::i32, "past_lens"); + auto value_cache_0 = make_param(PartialShape{DYN, 40, 128}, element::f32, "value_cache.0"); + auto key_cache_0 = make_param(PartialShape{DYN, 40, 128}, element::f32, "key_cache.0"); + auto input_ids = make_param(PartialShape{DYN}, element::i64, "input_ids"); + + ParameterVector params = nodes_to_params({max_context_len, + block_indices_begins, + block_indices, + subsequence_begins, + past_lens, + value_cache_0, + key_cache_0, + input_ids}); + + auto Constant88 = makeConst(element::u8, ov::Shape({125696, 5120}), MOCK_VALUE); + auto Convert89 = makeOP({Constant88}, {{"destination_type", "f16"}}); + auto Constant90 = makeConst(element::u8, ov::Shape({125696, 1}), MOCK_VALUE); + auto Convert91 = makeOP({Constant90}, {{"destination_type", "f16"}}); + auto Subtract92 = makeOP({Convert89, Convert91}, {{"auto_broadcast", "numpy"}}); + auto Constant93 = makeConst(element::f16, ov::Shape({125696, 1}), MOCK_VALUE); + auto Multiply94 = makeOP({Subtract92, Constant93}, {{"auto_broadcast", "numpy"}}); + auto Convert95 = makeOP({Multiply94}, {{"destination_type", "f32"}}); + auto Unsqueeze97 = makeOP({input_ids, 1}); + auto Convert98 = makeOP({Unsqueeze97}, {{"destination_type", "i32"}}); + auto Gather100 = makeOP({Convert95, Convert98, 0}, {{"batch_dims", 0}}); + auto Constant101 = makeConst(element::f32, ov::Shape({1, 1, 5120}), MOCK_VALUE); + auto Constant102 = makeConst(element::f32, ov::Shape({1, 1, 1}), {1.0f}); + auto Constant103 = makeConst(element::f32, ov::Shape({1, 1, 1}), {2.0f}); + auto Power104 = makeOP({Gather100, Constant103}, {{"auto_broadcast", "numpy"}}); + auto ReduceMean106 = makeOP({Power104, {-1}}, {{"keep_dims", true}}); + auto Constant107 = makeConst(element::f32, ov::Shape({1, 1, 1}), {0.000001f}); + auto Add108 = makeOP({ReduceMean106, Constant107}, {{"auto_broadcast", "numpy"}}); + auto Sqrt109 = makeOP({Add108}); + auto Divide110 = + makeOP({Constant102, Sqrt109}, {{"auto_broadcast", "numpy"}, {"m_pythondiv", true}}); + auto Multiply111 = makeOP({Gather100, Divide110}, {{"auto_broadcast", "numpy"}}); + auto Multiply112 = makeOP({Constant101, Multiply111}, {{"auto_broadcast", "numpy"}}); + auto Constant113 = makeConst(element::u8, ov::Shape({15360, 5120}), MOCK_VALUE); + auto Convert114 = makeOP({Constant113}, {{"destination_type", "f16"}}); + auto Constant115 = makeConst(element::u8, ov::Shape({15360, 1}), MOCK_VALUE); + auto Convert116 = makeOP({Constant115}, {{"destination_type", "f16"}}); + auto Subtract117 = makeOP({Convert114, Convert116}, {{"auto_broadcast", "numpy"}}); + auto Constant118 = makeConst(element::f16, ov::Shape({15360, 1}), MOCK_VALUE); + auto Multiply119 = makeOP({Subtract117, Constant118}, {{"auto_broadcast", "numpy"}}); + auto Convert120 = makeOP({Multiply119}, {{"destination_type", "f32"}}); + auto MatMul121 = + makeOP({Multiply112, Convert120}, {{"transpose_a", false}, {"transpose_b", true}}); + auto Reshape123 = makeOP({MatMul121, {0, 0, 3, 5120}}, {{"special_zero", true}}); + auto Unsqueeze125 = makeOP({Reshape123, 0}); + auto Squeeze127 = makeOP({Unsqueeze125, {0}}); + auto Transpose129 = makeOP({Squeeze127, {2, 0, 1, 3}}); + auto Gather130 = makeOP({Transpose129, 0, 0}, {{"batch_dims", 0}}); + auto Reshape132 = makeOP({Gather130, {0, 0, 40, 128}}, {{"special_zero", true}}); + auto Transpose134 = makeOP({Reshape132, {0, 2, 1, 3}}); + auto Transpose136 = makeOP({Transpose134, {0, 2, 1, 3}}); + auto Reshape138 = makeOP({Transpose136, {0, -1}}, {{"special_zero", true}}); + auto Gather140 = makeOP({Transpose129, 1, 0}, {{"batch_dims", 0}}); + auto Reshape142 = makeOP({Gather140, {0, 0, 40, 128}}, {{"special_zero", true}}); + auto Transpose144 = makeOP({Reshape142, {0, 2, 1, 3}}); + auto Transpose145 = makeOP({Transpose144, {0, 2, 1, 3}}); + auto Reshape147 = makeOP({Transpose145, {0, -1}}, {{"special_zero", true}}); + auto Gather149 = makeOP({Transpose129, 2, 0}, {{"batch_dims", 0}}); + auto Reshape151 = makeOP({Gather149, {0, 0, 40, 128}}, {{"special_zero", true}}); + auto Transpose153 = makeOP({Reshape151, {0, 2, 1, 3}}); + auto Transpose154 = makeOP({Transpose153, {0, 2, 1, 3}}); + auto Reshape156 = makeOP({Transpose154, {0, -1}}, {{"special_zero", true}}); + auto Constant159 = makeConst(element::f32, ov::Shape({40, 4096, 4096}), MOCK_VALUE); + auto Slice164 = makeOP({Constant159, {1, 1}, {2, 2}, {1, 1}, {1, 2}}); + auto Reshape166 = makeOP({Slice164, {-1}}, {{"special_zero", false}}); + + // PA cannot be instantiated uding makeOP hence creating constants for it manually + auto c1 = makeConst(element::f32, {}, {0.088388f}); + auto c2 = makeConst(element::i32, {}, {0}); + auto PagedAttentionExtension168 = + std::make_shared(ov::OutputVector{Reshape138, + Reshape147, + Reshape156, + key_cache_0, + value_cache_0, + past_lens, + subsequence_begins, + block_indices, + block_indices_begins, + c1, + c2, + Reshape166, + max_context_len}); + auto ShapeOf172 = makeOP({Transpose154}, {{"output_type", "i64"}}); + auto Gather175 = makeOP({ShapeOf172, -1, 0}, {{"batch_dims", 0}}); + auto Unsqueeze177 = makeOP({Gather175, 0}); + auto Concat178 = makeOP({{0l}, {1l}, {-1l}, Unsqueeze177}, {{"axis", 0}}); + auto Reshape179 = + makeOP({PagedAttentionExtension168->output(0), Concat178}, {{"special_zero", true}}); + auto Transpose180 = makeOP({Reshape179, {0, 2, 1, 3}}); + + auto result = std::make_shared(Transpose180); + model_ref = std::make_shared(ResultVector{result}, params); + + // checks are also disabled temporarily + comparator.disable(FunctionsComparator::PRECISIONS); + disable_result_friendly_names_check(); + disable_rt_info_check(); + } +} \ No newline at end of file diff --git a/src/core/include/openvino/core/type.hpp b/src/core/include/openvino/core/type.hpp index ab5c1ca0510b69..4877b9ce02b251 100644 --- a/src/core/include/openvino/core/type.hpp +++ b/src/core/include/openvino/core/type.hpp @@ -85,7 +85,7 @@ typename std::enable_if< bool>::value, bool>::type is_type(Value value) { - return value->get_type_info().is_castable(Type::get_type_info_static()); + return value && value->get_type_info().is_castable(Type::get_type_info_static()); } /// Casts a Value* to a Type* if it is of type Type, nullptr otherwise diff --git a/src/core/src/op/paged_attention.cpp b/src/core/src/op/paged_attention.cpp index a724e46499a57c..4d2cdc3e1fdac2 100644 --- a/src/core/src/op/paged_attention.cpp +++ b/src/core/src/op/paged_attention.cpp @@ -179,7 +179,7 @@ void PagedAttentionExtension::validate_and_infer_types() { NODE_VALIDATION_CHECK(this, get_input_element_type(15).is_dynamic() || get_input_element_type(15) == element::f32 || get_input_element_type(15) == element::f16, - "Element type of `rotation_trig_lut` input should be f32, but it is ", + "Element type of `rotation_trig_lut` input should be f32 or f16, but it is ", get_input_element_type(15), "."); } diff --git a/src/frontends/ir/src/ir_deserializer.cpp b/src/frontends/ir/src/ir_deserializer.cpp index 3b549ec91714e5..33e77d147557b0 100644 --- a/src/frontends/ir/src/ir_deserializer.cpp +++ b/src/frontends/ir/src/ir_deserializer.cpp @@ -533,18 +533,18 @@ std::shared_ptr ov::XmlDeserializer::parse_function(const pugi::xml_n auto node = create_node(inputs, p.xml, weights, p.params); id_to_node[layer_id] = node; - if (const auto& parameter_node = std::dynamic_pointer_cast(node)) { + if (const auto& parameter_node = ov::as_type_ptr(node)) { io_map.inputs.insert({layer_id, func_nodes.parameters.size()}); func_nodes.parameters.emplace_back(parameter_node); } - if (const auto& result_node = std::dynamic_pointer_cast(node)) { + if (const auto& result_node = ov::as_type_ptr(node)) { io_map.outputs.insert({layer_id, func_nodes.results.size()}); func_nodes.results.emplace_back(result_node); } - if (const auto& sink = std::dynamic_pointer_cast(node)) { - auto subgraph_op = std::dynamic_pointer_cast(node); + if (const auto& sink = ov::as_type_ptr(node)) { + auto subgraph_op = ov::as_type_ptr(node); if (subgraph_op) { for (const auto& body_model : subgraph_op->get_functions()) { if (body_model->get_sinks().size()) { @@ -557,7 +557,7 @@ std::shared_ptr ov::XmlDeserializer::parse_function(const pugi::xml_n } } - if (const auto& read_value = std::dynamic_pointer_cast(node)) { + if (const auto& read_value = ov::as_type_ptr(node)) { variable_id_to_read_value[read_value->get_variable_id()] = read_value; } @@ -569,7 +569,7 @@ std::shared_ptr ov::XmlDeserializer::parse_function(const pugi::xml_n func_nodes.parameters, pugixml::get_str_attr(root, "name", "")); for (const auto& sink : func_nodes.sinks) { - if (const auto& assign = std::dynamic_pointer_cast(sink)) { + if (const auto& assign = ov::as_type_ptr(sink)) { assign->add_control_dependency(variable_id_to_read_value.at(assign->get_variable_id())); } } @@ -902,7 +902,7 @@ std::shared_ptr ov::XmlDeserializer::create_node(const std::vector(ovNode)) { + if (auto constant = ov::as_type_ptr(ovNode)) { constant->alloc_buffer_on_visit_attributes(false); } ovNode->set_arguments(inputs); diff --git a/src/frontends/jax/src/node_context.cpp b/src/frontends/jax/src/node_context.cpp index 93fbac80807958..f6a965b258fff4 100644 --- a/src/frontends/jax/src/node_context.cpp +++ b/src/frontends/jax/src/node_context.cpp @@ -197,7 +197,7 @@ Any NodeContext::get_values_from_const_input(int index) const { index, " does not exist."); auto input_val = get_input(index); - if (auto input = std::dynamic_pointer_cast(input_val.get_node_shared_ptr())) { + if (auto input = ov::as_type_ptr(input_val.get_node_shared_ptr())) { const auto& attrs = input->get_attrs(); if (attrs.find("none_value") != attrs.end()) { return {}; diff --git a/src/frontends/onnx/frontend/src/core/null_node.cpp b/src/frontends/onnx/frontend/src/core/null_node.cpp index e595c4dd8f5c96..2f847f7d6d309f 100644 --- a/src/frontends/onnx/frontend/src/core/null_node.cpp +++ b/src/frontends/onnx/frontend/src/core/null_node.cpp @@ -19,7 +19,7 @@ std::shared_ptr NullNode::clone_with_new_inputs(const ov::OutputVector } // namespace ov bool ov::op::util::is_null(const ov::Node* node) { - return dynamic_cast(node) != nullptr; + return ov::as_type(node) != nullptr; } bool ov::op::util::is_null(const std::shared_ptr& node) { diff --git a/src/frontends/onnx/frontend/src/op/com.microsoft/matmulnbits.cpp b/src/frontends/onnx/frontend/src/op/com.microsoft/matmulnbits.cpp index fd3bc1b655c039..3c71f1c8985187 100644 --- a/src/frontends/onnx/frontend/src/op/com.microsoft/matmulnbits.cpp +++ b/src/frontends/onnx/frontend/src/op/com.microsoft/matmulnbits.cpp @@ -53,7 +53,7 @@ ov::OutputVector matmulnbits(const ov::frontend::onnx::Node& node) { CHECK_VALID_NODE(node, blob_size > 0, "Wrong blob size: ", blob_size); // in documentation: ...Input B is a 2D constant Matrix. CHECK_VALID_NODE(node, - dynamic_cast(b_quantized.get_node()) != nullptr, + ov::as_type(b_quantized.get_node()) != nullptr, "MatMulNBits limitation: accepting only a constant as a B input"); CHECK_VALID_NODE(node, b_quantized.get_partial_shape().rank() == 3, @@ -112,7 +112,7 @@ ov::OutputVector matmulnbits(const ov::frontend::onnx::Node& node) { } { - const auto b_const = std::dynamic_pointer_cast(b_quantized.get_node_shared_ptr()); + const auto b_const = ov::as_type_ptr(b_quantized.get_node_shared_ptr()); ov::Output casted_b; ov::Shape casted_b_shape; diff --git a/src/frontends/onnx/frontend/src/op/com.microsoft/qlinear_activation.cpp b/src/frontends/onnx/frontend/src/op/com.microsoft/qlinear_activation.cpp new file mode 100644 index 00000000000000..3702d80c79e0ff --- /dev/null +++ b/src/frontends/onnx/frontend/src/op/com.microsoft/qlinear_activation.cpp @@ -0,0 +1,92 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "core/operator_set.hpp" +#include "exceptions.hpp" +#include "openvino/frontend/exception.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/divide.hpp" +#include "openvino/op/maximum.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/sigmoid.hpp" +#include "openvino/op/subtract.hpp" +#include "utils/common.hpp" + +using namespace ov::op; + +namespace ov { +namespace frontend { +namespace onnx { +namespace com_microsoft { +namespace opset_1 { + +template +ov::OutputVector qlinear_activation(const ov::frontend::onnx::Node& node, const ActivationType& activation_fn) { + common::default_op_checks(node, 5); + + const auto inputs = node.get_ov_inputs(); + auto input_tensor = inputs[0]; + auto input_scale = inputs[1]; + auto input_zero_point = + (inputs[2].get_shape().empty()) ? v0::Constant::create(input_tensor.get_element_type(), {}, {0}) : inputs[2]; + auto output_scale = inputs[3]; + auto output_zero_point = + (inputs.size() > 4) ? inputs[4] : v0::Constant::create(input_tensor.get_element_type(), {}, {0}); + + CHECK_VALID_NODE(node, + (input_tensor.get_element_type() == element::i8 || input_tensor.get_element_type() == element::u8), + "Input tensor must be either int8 or uint8. Got: ", + input_tensor.get_element_type()); + + auto input_subtracted = std::make_shared(input_tensor, input_zero_point); + auto input_dequantized = + std::make_shared(std::make_shared(input_subtracted, input_scale.get_element_type()), + input_scale); + + auto activation_result = activation_fn(input_dequantized); + + auto scaled_result_float = std::make_shared(activation_result, output_scale); + auto quantized_result = + std::make_shared(std::make_shared(scaled_result_float, input_tensor.get_element_type()), + output_zero_point); + + return ov::OutputVector{quantized_result}; +} + +ov::OutputVector qlinear_sigmoid(const ov::frontend::onnx::Node& node) { + // Original documentation: + // https://github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md#commicrosoftqlinearsigmoid + // f(x) = quantize(Sigmoid(dequantize(x))) + + return qlinear_activation(node, [](const std::shared_ptr& input_dequantized) { + return std::make_shared(input_dequantized); + }); +} + +ov::OutputVector qlinear_leaky_relu(const ov::frontend::onnx::Node& node) { + // Original documentation: + // https://github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md#commicrosoftqlinearleakyrelu + // f(x) = quantize(alpha * dequantize(x)) for x < 0, + // quantize(dequantize(x)) for x >= 0 + + return qlinear_activation(node, [&](const std::shared_ptr& input_dequantized) { + auto alpha = + v0::Constant::create(input_dequantized->get_element_type(), {}, {node.get_attribute_value("alpha")}); + return std::make_shared(input_dequantized, + std::make_shared(input_dequantized, alpha)); + }); +} + +namespace { +ONNX_OP("QLinearSigmoid", OPSET_SINCE(1), com_microsoft::opset_1::qlinear_sigmoid, MICROSOFT_DOMAIN); +} +ONNX_OP("QLinearLeakyRelu", OPSET_SINCE(1), com_microsoft::opset_1::qlinear_leaky_relu, MICROSOFT_DOMAIN); + +} // namespace opset_1 +} // namespace com_microsoft +} // namespace onnx +} // namespace frontend +} // namespace ov diff --git a/src/frontends/onnx/frontend/src/op/com.microsoft/range.cpp b/src/frontends/onnx/frontend/src/op/com.microsoft/range.cpp new file mode 100644 index 00000000000000..8740869ef8d415 --- /dev/null +++ b/src/frontends/onnx/frontend/src/op/com.microsoft/range.cpp @@ -0,0 +1,44 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/op/range.hpp" + +#include "core/operator_set.hpp" +#include "exceptions.hpp" +#include "openvino/op/constant.hpp" +#include "utils/common.hpp" + +using namespace ov::op; + +namespace ov { +namespace frontend { +namespace onnx { +namespace com_microsoft { +namespace opset_1 { +ov::OutputVector range(const ov::frontend::onnx::Node& node) { + common::default_op_checks(node, 2); + auto nodes = node.get_ov_inputs(); + + auto start = nodes[0]; + auto limit = nodes[1]; + auto delta = + nodes.size() == 3 ? nodes[2] : ov::op::v0::Constant::create(start.get_element_type(), ov::Shape{}, {1}); + CHECK_VALID_NODE(node, + start.get_element_type() == limit.get_element_type(), + "start and limit must be of same type, got :", + start.get_element_type(), + limit.get_element_type()); + CHECK_VALID_NODE(node, + start.get_element_type() == delta.get_element_type(), + "start and delta must be of same type, got :", + start.get_element_type(), + delta.get_element_type()); + return {std::make_shared(start, limit, delta, start.get_element_type())}; +} +ONNX_OP("Range", OPSET_SINCE(1), com_microsoft::opset_1::range, MICROSOFT_DOMAIN); +} // namespace opset_1 +} // namespace com_microsoft +} // namespace onnx +} // namespace frontend +} // namespace ov \ No newline at end of file diff --git a/src/frontends/onnx/frontend/src/utils/common.cpp b/src/frontends/onnx/frontend/src/utils/common.cpp index e15b0c0bcda4fd..041ada73f9c387 100644 --- a/src/frontends/onnx/frontend/src/utils/common.cpp +++ b/src/frontends/onnx/frontend/src/utils/common.cpp @@ -221,7 +221,7 @@ bool collect_translation_exceptions(const std::shared_ptr& partially_ }; for (const auto& node : partially_converted->get_ordered_ops()) { - if (const auto& fw_node = std::dynamic_pointer_cast(node)) { + if (const auto& fw_node = ov::as_type_ptr(node)) { const auto& attrs = fw_node->get_attrs(); auto node_name = attrs.get_opset_name() + "." + attrs.get_type_name(); if (unsupported_operations->count(node_name) > 0) { @@ -230,7 +230,7 @@ bool collect_translation_exceptions(const std::shared_ptr& partially_ print_unsupported(fw_node); unsupported_operations->insert(node_name); - } else if (const auto& fw_node = std::dynamic_pointer_cast(node)) { + } else if (const auto& fw_node = ov::as_type_ptr(node)) { const auto& attrs = fw_node->get_attrs(); if (fw_node->additional_error_message().empty()) { @@ -248,7 +248,7 @@ bool collect_translation_exceptions(const std::shared_ptr& partially_ failures->insert(node_fail); } - } else if (const auto& if_node = std::dynamic_pointer_cast(node)) { + } else if (const auto& if_node = ov::as_type_ptr(node)) { collect_translation_exceptions(if_node->get_then_body(), telemetry, output_stream, @@ -259,7 +259,7 @@ bool collect_translation_exceptions(const std::shared_ptr& partially_ output_stream, unsupported_operations, failures); - } else if (const auto& loop_node = std::dynamic_pointer_cast(node)) { + } else if (const auto& loop_node = ov::as_type_ptr(node)) { collect_translation_exceptions(loop_node->get_function(), telemetry, output_stream, diff --git a/src/frontends/onnx/frontend/src/utils/onnx_internal.cpp b/src/frontends/onnx/frontend/src/utils/onnx_internal.cpp index ebf34eb5863905..18edc12d61952a 100644 --- a/src/frontends/onnx/frontend/src/utils/onnx_internal.cpp +++ b/src/frontends/onnx/frontend/src/utils/onnx_internal.cpp @@ -31,7 +31,7 @@ void remove_dangling_parameters(std::shared_ptr& model) { std::all_of(parameter_users.begin(), parameter_users.end(), [](const std::shared_ptr& node) -> bool { - return std::dynamic_pointer_cast(node) != nullptr; + return ov::as_type_ptr(node) != nullptr; }); if (is_dangling_parameter) { model->remove_parameter(parameter); @@ -69,8 +69,8 @@ void convert_decoded_model(std::shared_ptr model) { "' attribute in decoded model. Model probably wasn't created by FrontEnd::decode function."); auto onnx_graph = it->second.as>(); for (const auto& node : model->get_ordered_ops()) { - if (auto raw_node = std::dynamic_pointer_cast(node)) { - if (auto subgraph_node = std::dynamic_pointer_cast(node)) { + if (auto raw_node = ov::as_type_ptr(node)) { + if (auto subgraph_node = ov::as_type_ptr(node)) { subgraph_node->infer_inputs_from_parent(); for (auto& model : subgraph_node->get_subgraph_models()) { convert_decoded_model(model); diff --git a/src/frontends/onnx/tests/conversion.cpp b/src/frontends/onnx/tests/conversion.cpp index c837fa394ce431..237712e60b2725 100644 --- a/src/frontends/onnx/tests/conversion.cpp +++ b/src/frontends/onnx/tests/conversion.cpp @@ -69,7 +69,7 @@ TEST(ONNXConversionExtensionTest, custom_op_with_custom_domain) { OV_ASSERT_NO_THROW(model = onnx::tests::convert_model("missing_op_domain.onnx", ext)); for (const auto& op : model->get_ops()) { - if (const auto& add = std::dynamic_pointer_cast(op)) { + if (const auto& add = ov::as_type_ptr(op)) { EXPECT_TRUE(add->get_rt_info().count("added_by_extension") == 1); return; } diff --git a/src/frontends/onnx/tests/convert_partially_tests.cpp b/src/frontends/onnx/tests/convert_partially_tests.cpp index 290bb4d7298a9c..0409d73cb860ee 100644 --- a/src/frontends/onnx/tests/convert_partially_tests.cpp +++ b/src/frontends/onnx/tests/convert_partially_tests.cpp @@ -19,7 +19,7 @@ namespace { std::shared_ptr get_framework_node_with_out_name(const std::shared_ptr& model, const std::string& out_name) { for (const auto& op : model->get_ops()) { - if (auto framework_node = std::dynamic_pointer_cast(op)) { + if (auto framework_node = ov::as_type_ptr(op)) { for (const auto& out : op->outputs()) { if (out.get_any_name() == out_name) { return framework_node; diff --git a/src/frontends/onnx/tests/models/com.microsoft/q_linear_leaky_relu.prototxt b/src/frontends/onnx/tests/models/com.microsoft/q_linear_leaky_relu.prototxt new file mode 100644 index 00000000000000..9ae6e1388eb988 --- /dev/null +++ b/src/frontends/onnx/tests/models/com.microsoft/q_linear_leaky_relu.prototxt @@ -0,0 +1,101 @@ +ir_version: 3 +producer_name: "OpenVINO ONNX Frontend" +producer_version: "" +model_version: 0 +graph { + name: "test_qlinear_leakyrelu" + + node { + input: "X" + input: "X_scale" + input: "X_zero_point" + input: "Y_scale" + input: "Y_zero_point" + output: "Y" + op_type: "QLinearLeakyRelu" + attribute { + name: "alpha" + f: 0.1 + type: FLOAT + } + domain: "com.microsoft" + } + + input { + name: "X" + type { + tensor_type { + elem_type: 3 + shape { + dim { dim_value: 2 } + dim { dim_value: 3 } + } + } + } + } + + input { + name: "X_scale" + type { + tensor_type { + elem_type: 1 # float + shape { + dim { dim_value: 1 } + } + } + } + } + + input { + name: "X_zero_point" + type { + tensor_type { + elem_type: 3 + shape { + dim { dim_value: 1 } + } + } + } + } + + input { + name: "Y_scale" + type { + tensor_type { + elem_type: 1 + shape { + dim { dim_value: 1 } + } + } + } + } + + input { + name: "Y_zero_point" + type { + tensor_type { + elem_type: 3 + shape { + dim { dim_value: 1 } + } + } + } + } + + output { + name: "Y" + type { + tensor_type { + elem_type: 3 + shape { + dim { dim_value: 2 } + dim { dim_value: 3 } + } + } + } + } +} + +opset_import { + version: 1 +} diff --git a/src/frontends/onnx/tests/models/com.microsoft/q_linear_sigmoid.prototxt b/src/frontends/onnx/tests/models/com.microsoft/q_linear_sigmoid.prototxt new file mode 100644 index 00000000000000..17f7b7872bdc56 --- /dev/null +++ b/src/frontends/onnx/tests/models/com.microsoft/q_linear_sigmoid.prototxt @@ -0,0 +1,112 @@ +ir_version: 3 +producer_name: "OpenVINO ONNX Frontend" +producer_version: "" +model_version: 0 +graph { + name: "test_qlinear_sigmoid" + + node { + input: "X" + input: "X_scale" + input: "X_zero_point" + input: "Y_scale" + input: "Y_zero_point" + output: "Y" + op_type: "QLinearSigmoid" + domain: "com.microsoft" + } + + input { + name: "X" + type { + tensor_type { + elem_type: 3 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 3 + } + } + } + } + } + + input { + name: "X_scale" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 1 + } + } + } + } + } + + input { + name: "X_zero_point" + type { + tensor_type { + elem_type: 3 + shape { + dim { + dim_value: 1 + } + } + } + } +} + +input { + name: "Y_scale" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 1 + } + } + } + } +} + + input { + name: "Y_zero_point" + type { + tensor_type { + elem_type: 3 + shape { + dim { + dim_value: 1 + } + } + } + } + } + + output { + name: "Y" + type { + tensor_type { + elem_type: 3 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 3 + } + } + } + } + } +} + +opset_import { + version: 1 +} diff --git a/src/frontends/onnx/tests/models/com.microsoft/range_with_delta.prototxt b/src/frontends/onnx/tests/models/com.microsoft/range_with_delta.prototxt new file mode 100644 index 00000000000000..af66b46bc85fa5 --- /dev/null +++ b/src/frontends/onnx/tests/models/com.microsoft/range_with_delta.prototxt @@ -0,0 +1,60 @@ +ir_version: 6 +producer_name: "OpenVINO ONNX Frontend" +graph { + node { + input: "start" + input: "limit" + input: "delta" + output: "output" + op_type: "Range" + domain: "com.microsoft" + } + name: "test_range_float_type_with_delta" + input { + name: "start" + type { + tensor_type { + elem_type: 1 + shape { + } + } + } + } + input { + name: "limit" + type { + tensor_type { + elem_type: 1 + shape { + } + } + } + } + input { + name: "delta" + type { + tensor_type { + elem_type: 1 + shape { + } + } + } + } + output { + name: "output" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 10 + } + } + } + } + } +} +opset_import { + version: 1 + domain: "com.microsoft" +} diff --git a/src/frontends/onnx/tests/models/com.microsoft/range_without_delta.prototxt b/src/frontends/onnx/tests/models/com.microsoft/range_without_delta.prototxt new file mode 100644 index 00000000000000..b8ac7a98779955 --- /dev/null +++ b/src/frontends/onnx/tests/models/com.microsoft/range_without_delta.prototxt @@ -0,0 +1,49 @@ +ir_version: 6 +producer_name: "OpenVINO ONNX Frontend" +graph { + node { + input: "start" + input: "limit" + output: "output" + op_type: "Range" + domain: "com.microsoft" + } + name: "test_range_float_type_without_delta" + input { + name: "start" + type { + tensor_type { + elem_type: 1 + shape { + } + } + } + } + input { + name: "limit" + type { + tensor_type { + elem_type: 1 + shape { + } + } + } + } + output { + name: "output" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 10 + } + } + } + } + } +} +opset_import { + version: 1 + domain: "com.microsoft" +} diff --git a/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp b/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp index 1b07401cce024d..47a336f1749417 100644 --- a/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp @@ -1483,6 +1483,29 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_com_microsoft_simplified_layer_normalization test_case.run(); } +OPENVINO_TEST(${BACKEND_NAME}, onnx_com_microsoft_range_with_delta) { + const auto model = convert_model("com.microsoft/range_with_delta.onnx"); + auto test_case = ov::test::TestCase(model, s_device); + + test_case.add_input({0.f}); + test_case.add_input({10.f}); + test_case.add_input({1.f}); + test_case.add_expected_output(Shape{10}, {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f}); + + test_case.run(); +} + +OPENVINO_TEST(${BACKEND_NAME}, onnx_com_microsoft_range_without_delta) { + const auto model = convert_model("com.microsoft/range_without_delta.onnx"); + auto test_case = ov::test::TestCase(model, s_device); + + test_case.add_input({0.f}); + test_case.add_input({10.f}); + test_case.add_expected_output(Shape{10}, {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f}); + + test_case.run(); +} + OPENVINO_TEST(${BACKEND_NAME}, onnx_com_microsoft_fusedmatmul_2x3) { const auto model = convert_model("com.microsoft/fusedmatmul_2D.onnx"); auto test_case = ov::test::TestCase(model, s_device); @@ -1554,6 +1577,52 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_com_microsoft_matmul_integer_to_float) { test_case.run(); } +OPENVINO_TEST(${BACKEND_NAME}, onnx_com_microsoft_qlinearsigmoid) { + const auto model = convert_model("com.microsoft/q_linear_sigmoid.onnx"); + auto test_case = ov::test::TestCase(model, s_device); + + const std::vector data_X{-50, -25, 0, 25, 50, 75}; + + const std::vector x_scale{0.1f}; + const std::vector x_zero_point{0}; + const std::vector y_scale{0.2f}; + const std::vector y_zero_point{0}; + + const std::vector expected_output{0, 0, 2, 4, 4, 4}; + + test_case.add_input(Shape{2, 3}, data_X); + test_case.add_input(Shape{1}, x_scale); + test_case.add_input(Shape{1}, x_zero_point); + test_case.add_input(Shape{1}, y_scale); + test_case.add_input(Shape{1}, y_zero_point); + + test_case.add_expected_output(Shape{2, 3}, expected_output); + test_case.run(); +} + +OPENVINO_TEST(${BACKEND_NAME}, onnx_com_microsoft_qlinearleakyrelu) { + const auto model = convert_model("com.microsoft/q_linear_leaky_relu.onnx"); + auto test_case = ov::test::TestCase(model, s_device); + + const std::vector data_X{-50, -25, 0, 25, 50, 75}; + + const std::vector x_scale{0.1f}; + const std::vector x_zero_point{0}; + const std::vector y_scale{0.2f}; + const std::vector y_zero_point{0}; + + const std::vector expected_output{-2, -1, 0, 12, 25, 37}; + + test_case.add_input(Shape{2, 3}, data_X); + test_case.add_input(Shape{1}, x_scale); + test_case.add_input(Shape{1}, x_zero_point); + test_case.add_input(Shape{1}, y_scale); + test_case.add_input(Shape{1}, y_zero_point); + + test_case.add_expected_output(Shape{2, 3}, expected_output); + test_case.run(); +} + OPENVINO_TEST(${BACKEND_NAME}, onnx_com_microsoft_qlinear_add) { const auto model = convert_model("com.microsoft/q_linear_add.onnx"); auto test_case = ov::test::TestCase(model, s_device); diff --git a/src/frontends/onnx/tests/onnx_import_convpool.in.cpp b/src/frontends/onnx/tests/onnx_import_convpool.in.cpp index 3d34a40554752b..8080ff178bf79f 100644 --- a/src/frontends/onnx/tests/onnx_import_convpool.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_convpool.in.cpp @@ -334,7 +334,7 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_max_pool_empty_auto_pad) { const auto model = convert_model("max_pool_empty_auto_pad.onnx"); for (const auto& op : model->get_ops()) { - if (const auto max_pool = std::dynamic_pointer_cast(op)) { + if (const auto max_pool = ov::as_type_ptr(op)) { EXPECT_EQ(max_pool->get_auto_pad(), op::PadType::EXPLICIT); return; } diff --git a/src/frontends/onnx/tests/onnx_tensor_names.cpp b/src/frontends/onnx/tests/onnx_tensor_names.cpp index d66d6766f87dd0..933bb12cde1d76 100644 --- a/src/frontends/onnx/tests/onnx_tensor_names.cpp +++ b/src/frontends/onnx/tests/onnx_tensor_names.cpp @@ -29,7 +29,7 @@ bool matching_node_found_in_graph(const std::vector& ops, const std::unordered_set& output_names, int out_tensor_number = 0) { return std::any_of(std::begin(ops), std::end(ops), [&](const DerivedFromNode op) { - if (const std::shared_ptr casted = std::dynamic_pointer_cast(op)) { + if (const std::shared_ptr casted = ov::as_type_ptr(op)) { const auto& op_friendly_name = casted->get_friendly_name(); const auto& op_output_names = casted->get_output_tensor(out_tensor_number).get_names(); if (op_friendly_name == friendly_name && op_output_names == output_names) { @@ -44,11 +44,11 @@ template std::shared_ptr find_by_friendly_name(const std::vector& ops, const std::string& friendly_name) { const auto it = std::find_if(std::begin(ops), std::end(ops), [&friendly_name](const DerivedFromNode& op) { - return op->get_friendly_name() == friendly_name && std::dynamic_pointer_cast(op) != nullptr; + return op->get_friendly_name() == friendly_name && ov::as_type_ptr(op) != nullptr; }); if (it != std::end(ops)) { - return std::dynamic_pointer_cast(*it); + return ov::as_type_ptr(*it); } else { return nullptr; } diff --git a/src/frontends/paddle/src/frontend.cpp b/src/frontends/paddle/src/frontend.cpp index 4081f59e132b0d..22d5547489e723 100644 --- a/src/frontends/paddle/src/frontend.cpp +++ b/src/frontends/paddle/src/frontend.cpp @@ -492,7 +492,7 @@ std::shared_ptr FrontEnd::convert(const InputModel::Ptr& model) const void FrontEnd::convert(const std::shared_ptr& partiallyConverted) const { for (const auto& node : partiallyConverted->get_ordered_ops()) { if (ov::is_type(node)) { - paddle::normalize_framework_node(std::dynamic_pointer_cast(node), m_op_translators); + paddle::normalize_framework_node(ov::as_type_ptr(node), m_op_translators); } } for (const auto& result : partiallyConverted->get_results()) { diff --git a/src/frontends/paddle/src/internal/pass/transform_fakequantize.cpp b/src/frontends/paddle/src/internal/pass/transform_fakequantize.cpp index 93c8d632292f3c..4ab7557c4be2cb 100644 --- a/src/frontends/paddle/src/internal/pass/transform_fakequantize.cpp +++ b/src/frontends/paddle/src/internal/pass/transform_fakequantize.cpp @@ -71,20 +71,20 @@ ov::frontend::paddle::pass::TransformFakeQuantize::TransformFakeQuantize() { // check round mode // Fallback to the PDPD FE if the round_mode is HALF_AWAY_FROM_ZERO. - const auto& round_node_cast = std::dynamic_pointer_cast(opsMap.at(round_label).get_node_shared_ptr()); + const auto& round_node_cast = ov::as_type_ptr(opsMap.at(round_label).get_node_shared_ptr()); if (!round_node_cast || round_node_cast->get_mode() != Round::RoundMode::HALF_TO_EVEN) { return false; } // check quantize_linear zero_point - auto zp_node_cast = std::dynamic_pointer_cast(opsMap.at(dq_zp_label).get_node_shared_ptr()); + auto zp_node_cast = ov::as_type_ptr(opsMap.at(dq_zp_label).get_node_shared_ptr()); float zp; if (!zp_node_cast || !ov::op::util::get_single_value(zp_node_cast, zp)) { return false; } // prepare levels - const auto& clamp_node_cast = std::dynamic_pointer_cast(opsMap.at(q_clamp_label).get_node_shared_ptr()); + const auto& clamp_node_cast = ov::as_type_ptr(opsMap.at(q_clamp_label).get_node_shared_ptr()); if (!clamp_node_cast) { return false; } @@ -93,7 +93,7 @@ ov::frontend::paddle::pass::TransformFakeQuantize::TransformFakeQuantize() { const auto levels = high_range - low_range + 1; // get the scale - const auto& scale_node_cast = std::dynamic_pointer_cast( + const auto& scale_node_cast = ov::as_type_ptr( opsMap.at(q_real_scale_label).get_node_shared_ptr()->get_input_node_shared_ptr(0)); float scale; if (!scale_node_cast || !ov::op::util::get_single_value(scale_node_cast, scale)) { diff --git a/src/frontends/paddle/src/internal/pass/transform_if.cpp b/src/frontends/paddle/src/internal/pass/transform_if.cpp index 3d96154e5213e1..cfda9f6cbd6c9f 100644 --- a/src/frontends/paddle/src/internal/pass/transform_if.cpp +++ b/src/frontends/paddle/src/internal/pass/transform_if.cpp @@ -23,8 +23,7 @@ ov::frontend::paddle::pass::TransformIf::TransformIf(std::vector(); matcher_pass_callback callback = [funcs](pattern::Matcher& m) -> bool { - const auto conditional_block = - std::dynamic_pointer_cast(m.get_match_root()); + const auto conditional_block = ov::as_type_ptr(m.get_match_root()); const auto mask_idx = conditional_block->get_input_size() - 1; const auto cond = conditional_block->get_input_node_shared_ptr(mask_idx); diff --git a/src/frontends/paddle/src/internal/pass/transform_while.cpp b/src/frontends/paddle/src/internal/pass/transform_while.cpp index cacc601ddc8214..702d9fd5c83cde 100644 --- a/src/frontends/paddle/src/internal/pass/transform_while.cpp +++ b/src/frontends/paddle/src/internal/pass/transform_while.cpp @@ -29,7 +29,7 @@ ov::frontend::paddle::pass::TransformWhile::TransformWhile(std::vector(); matcher_pass_callback callback = [functions](pattern::Matcher& m) -> bool { - const auto& while_node = std::dynamic_pointer_cast(m.get_match_root()); + const auto& while_node = ov::as_type_ptr(m.get_match_root()); if (!while_node) return false; const auto& inputs = while_node->input_values(); diff --git a/src/frontends/pytorch/src/frontend.cpp b/src/frontends/pytorch/src/frontend.cpp index 69048d4798e788..04ba9a9c92c281 100644 --- a/src/frontends/pytorch/src/frontend.cpp +++ b/src/frontends/pytorch/src/frontend.cpp @@ -183,7 +183,7 @@ std::shared_ptr FrontEnd::convert(const ov::frontend::InputModel::Ptr& mo auto place = inputs[i]; if (place->get_names().size() != 0 && input_names.find(place->get_names().at(0)) != input_names.end()) { auto input = converted_model->input(place->get_names().at(0)); - auto param = std::dynamic_pointer_cast(input.get_node_shared_ptr()); + auto param = ov::as_type_ptr(input.get_node_shared_ptr()); FRONT_END_GENERAL_CHECK(param, "Input is not a Parameter."); update_parameter_info(param, place, converted_model); } else { @@ -205,7 +205,7 @@ std::shared_ptr FrontEnd::convert(const ov::frontend::InputModel::Ptr& mo update_parameter_info(parameters[idx], fplace, converted_model); } else { auto input = converted_model->input(fplace->get_names().at(0)); - auto param = std::dynamic_pointer_cast(input.get_node_shared_ptr()); + auto param = ov::as_type_ptr(input.get_node_shared_ptr()); FRONT_END_GENERAL_CHECK(param, "Input is not a Parameter."); update_parameter_info(param, fplace, converted_model); } diff --git a/src/frontends/pytorch/src/helper_ops/internal_op.hpp b/src/frontends/pytorch/src/helper_ops/internal_op.hpp index 54657a765f4338..f840ff856d4fd0 100644 --- a/src/frontends/pytorch/src/helper_ops/internal_op.hpp +++ b/src/frontends/pytorch/src/helper_ops/internal_op.hpp @@ -41,6 +41,9 @@ class InternalOpDecoder : public DummyDecoder { }; class InternalOperation : public PtFrameworkNode { +public: + OPENVINO_OP("InternalOperation", "util", PtFrameworkNode); + protected: InternalOperation(const std::string& op_type, const OutputVector& inputs, diff --git a/src/frontends/pytorch/src/helper_ops/packed_sequence.hpp b/src/frontends/pytorch/src/helper_ops/packed_sequence.hpp index d947ed735adcb2..9766346fbff563 100644 --- a/src/frontends/pytorch/src/helper_ops/packed_sequence.hpp +++ b/src/frontends/pytorch/src/helper_ops/packed_sequence.hpp @@ -13,7 +13,7 @@ namespace pytorch { class PackPadded : public InternalOperation { public: - OPENVINO_OP("PackPadded", "util", ov::op::util::FrameworkNode); + OPENVINO_OP("PackPadded", "util", InternalOperation); PackPadded(const Output& input, const Output& lengths) : InternalOperation("prim::PackPadded", {input, lengths}, 2, "This is PackedSequence pack operation.") { validate_and_infer_types(); @@ -27,7 +27,7 @@ class PackPadded : public InternalOperation { class PadPacked : public InternalOperation { public: - OPENVINO_OP("PadPacked", "util", ov::op::util::FrameworkNode); + OPENVINO_OP("PadPacked", "util", InternalOperation); PadPacked(const Output& input, const Output& lengths) : InternalOperation("prim::PadPacked", {input, lengths}, 2, "This is PackedSequence unpack operation.") { validate_and_infer_types(); diff --git a/src/frontends/pytorch/src/node_context.cpp b/src/frontends/pytorch/src/node_context.cpp index bd3d7bc89c57f4..8edd353adb4599 100644 --- a/src/frontends/pytorch/src/node_context.cpp +++ b/src/frontends/pytorch/src/node_context.cpp @@ -111,7 +111,7 @@ Output NodeContext::get_input_from_visible_context(size_t index) const { FRONT_END_GENERAL_CHECK(index < get_input_size(), "Index ", index, " is lower then number of inputs."); auto input_tensor = get_input(static_cast(index)); auto input_node = input_tensor.get_node_shared_ptr(); - if (std::dynamic_pointer_cast(input_node)) { + if (ov::as_type_ptr(input_node)) { // We need to look into external context for inputs that would be feed into this parameter size_t tensor_idx = m_translate_session->decode_tensor_name(input_node->output(0)); if (m_ext_tensor_map.count(tensor_idx)) { @@ -298,7 +298,7 @@ template <> std::string NodeContext::const_input(size_t index) const { FRONT_END_GENERAL_CHECK(!input_is_none(index), "Input with index: ", index, " is none."); auto input_node = get_input_from_visible_context(index).get_node_shared_ptr(); - auto input = std::dynamic_pointer_cast(input_node); + auto input = ov::as_type_ptr(input_node); FRONT_END_GENERAL_CHECK(input, "Input node with index ", index, @@ -327,7 +327,7 @@ Any NodeContext::get_values_from_const_input(int index) const { if (input_is_none(index)) return {}; auto input_val = get_input_from_visible_context(index); - if (auto input = std::dynamic_pointer_cast(input_val.get_node_shared_ptr())) { + if (auto input = ov::as_type_ptr(input_val.get_node_shared_ptr())) { const auto& attrs = input->get_attrs(); if (attrs.find("none_value") != attrs.end()) { return {}; diff --git a/src/frontends/pytorch/src/op/arange.cpp b/src/frontends/pytorch/src/op/arange.cpp index 6725db7c90b267..e20d8171053975 100644 --- a/src/frontends/pytorch/src/op/arange.cpp +++ b/src/frontends/pytorch/src/op/arange.cpp @@ -64,8 +64,7 @@ OutputVector translate_arange(const NodeContext& context) { PYTORCH_OP_CONVERSION_CHECK(false, "Not expected number of inputs for ", context.get_op_type()); } if (dtype_port >= 0 && !context.input_is_none(dtype_port)) { - if (std::dynamic_pointer_cast( - context.get_input_from_visible_context(dtype_port).get_node_shared_ptr())) { + if (ov::as_type_ptr(context.get_input_from_visible_context(dtype_port).get_node_shared_ptr())) { dtype = convert_dtype(context.const_input(dtype_port)); dtype_applied = true; } else if (const auto& fw_node = diff --git a/src/frontends/pytorch/src/op/as_strided.cpp b/src/frontends/pytorch/src/op/as_strided.cpp index 00a64b09e7bedf..5079766b4a1af0 100644 --- a/src/frontends/pytorch/src/op/as_strided.cpp +++ b/src/frontends/pytorch/src/op/as_strided.cpp @@ -92,7 +92,7 @@ OutputVector translate_as_strided(const NodeContext& context) { std::deque> sizes; std::deque> strides; - if (std::dynamic_pointer_cast(context.get_input_from_visible_context(1).get_node_shared_ptr())) { + if (ov::as_type_ptr(context.get_input_from_visible_context(1).get_node_shared_ptr())) { auto input_vector = context.const_input>(1); std::for_each(input_vector.rbegin(), input_vector.rend(), [&](int64_t input_val) { auto const_input = context.mark_node(v0::Constant::create(element::i32, Shape{}, {input_val})); @@ -101,7 +101,7 @@ OutputVector translate_as_strided(const NodeContext& context) { } else { sizes = get_list_as_outputs(context.get_input(1)); } - if (std::dynamic_pointer_cast(context.get_input_from_visible_context(2).get_node_shared_ptr())) { + if (ov::as_type_ptr(context.get_input_from_visible_context(2).get_node_shared_ptr())) { auto input_vector = context.const_input>(2); std::for_each(input_vector.rbegin(), input_vector.rend(), [&](int64_t input_val) { auto const_input = context.mark_node(v0::Constant::create(element::i32, Shape{}, {input_val})); diff --git a/src/frontends/pytorch/src/op/as_tensor.cpp b/src/frontends/pytorch/src/op/as_tensor.cpp index fe447c544edb6b..9c3e4c026606a4 100644 --- a/src/frontends/pytorch/src/op/as_tensor.cpp +++ b/src/frontends/pytorch/src/op/as_tensor.cpp @@ -28,14 +28,14 @@ OutputVector translate_as_tensor(const NodeContext& context) { auto list_elems = get_list_as_outputs(data); if (!context.input_is_none(1)) { auto dtype_ext_node = context.get_input_from_visible_context(1).get_node_shared_ptr(); - auto dtype_fw_node = std::dynamic_pointer_cast(dtype_ext_node); + auto dtype_fw_node = ov::as_type_ptr(dtype_ext_node); if (dtype_fw_node && dtype_fw_node->get_op_type() == "prim::dtype") { auto type_input = dtype_fw_node->input_value(0); std::for_each(list_elems.begin(), list_elems.end(), [&](Output& n) { n = context.mark_node(std::make_shared(n, type_input)); }); } - if (auto dtype_const = std::dynamic_pointer_cast(dtype_ext_node)) { + if (auto dtype_const = ov::as_type_ptr(dtype_ext_node)) { auto pt_type = dtype_const->cast_vector()[0]; dtype = convert_dtype(pt_type); std::for_each(list_elems.begin(), list_elems.end(), [&](Output& n) { @@ -59,4 +59,4 @@ OutputVector translate_as_tensor(const NodeContext& context) { } // namespace op } // namespace pytorch } // namespace frontend -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/frontends/pytorch/src/op/cat.cpp b/src/frontends/pytorch/src/op/cat.cpp index 9a6048d39044fc..5f620cc7b703c8 100644 --- a/src/frontends/pytorch/src/op/cat.cpp +++ b/src/frontends/pytorch/src/op/cat.cpp @@ -43,7 +43,7 @@ OutputVector translate_cat_common(const NodeContext& context, "::cat is located inside body while inputs are located outside of the body. " "This case is not supported."); if (list_elems.size() == 1 && - !std::dynamic_pointer_cast(context.get_input(0).get_node_shared_ptr()) && !is_fx) { + !ov::as_type_ptr(context.get_input(0).get_node_shared_ptr()) && !is_fx) { // Case when list was merged into tensor. // This case doesn't work with torchfx auto tensor = list_elems[0]; auto shape = context.mark_node(std::make_shared(tensor, element::i32)); diff --git a/src/frontends/pytorch/src/op/convnd.cpp b/src/frontends/pytorch/src/op/convnd.cpp index 78a78f23bc532d..ca3dcc77114ccb 100644 --- a/src/frontends/pytorch/src/op/convnd.cpp +++ b/src/frontends/pytorch/src/op/convnd.cpp @@ -53,7 +53,7 @@ OutputVector translate_convnd(const NodeContext& context) { if (!context.input_is_none(2)) { auto bias = context.get_input(2); auto bias_from_visible_context = context.get_input_from_visible_context(2); - if (std::dynamic_pointer_cast(bias_from_visible_context.get_node_shared_ptr())) { + if (ov::as_type_ptr(bias_from_visible_context.get_node_shared_ptr())) { bias = bias_from_visible_context; } auto bias_rank = bias.get_partial_shape().rank(); diff --git a/src/frontends/pytorch/src/op/linear.cpp b/src/frontends/pytorch/src/op/linear.cpp index 5472507d75cc2f..c6e345f70a9da7 100644 --- a/src/frontends/pytorch/src/op/linear.cpp +++ b/src/frontends/pytorch/src/op/linear.cpp @@ -60,7 +60,7 @@ uint32_t rearrange_awq_bits(uint32_t num) { } Output rearrange_constant(const Output& c, uint32_t groups) { - auto constant = std::dynamic_pointer_cast(c.get_node_shared_ptr()); + auto constant = ov::as_type_ptr(c.get_node_shared_ptr()); FRONT_END_OP_CONVERSION_CHECK(constant, "weight must be Constant."); auto src = constant->get_data_ptr(); auto initial_shape = constant->get_shape(); @@ -118,4 +118,4 @@ OutputVector translate_linear_awq(const NodeContext& context) { } // namespace op } // namespace pytorch } // namespace frontend -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/frontends/pytorch/src/op/linspace.cpp b/src/frontends/pytorch/src/op/linspace.cpp index 39fd2d5e7a8813..36319099a0d37a 100644 --- a/src/frontends/pytorch/src/op/linspace.cpp +++ b/src/frontends/pytorch/src/op/linspace.cpp @@ -37,7 +37,7 @@ OutputVector translate_linspace(const NodeContext& context) { auto dtype = element::f32; if (!context.input_is_none(3) && context.get_input_size() == 7) { // Case where dtype is provided directly in dtype input. - if (std::dynamic_pointer_cast(context.get_input_from_visible_context(3).get_node_shared_ptr())) { + if (ov::as_type_ptr(context.get_input_from_visible_context(3).get_node_shared_ptr())) { dtype = convert_dtype(context.const_input(3)); apply_dtype = true; } else if (const auto& fw_node = cast_fw_node(context.get_input(3).get_node_shared_ptr(), "prim::dtype")) { diff --git a/src/frontends/pytorch/src/op/list_construct.cpp b/src/frontends/pytorch/src/op/list_construct.cpp index 8916eeddb62121..15e87e1ca80e16 100644 --- a/src/frontends/pytorch/src/op/list_construct.cpp +++ b/src/frontends/pytorch/src/op/list_construct.cpp @@ -21,7 +21,7 @@ OutputVector translate_list_construct(const NodeContext& context) { ov::OutputVector consts; for (size_t i = 0; i < context.get_input_size(); i++) { auto input = context.get_input_from_visible_context(i); - auto c_node = std::dynamic_pointer_cast(input.get_node_shared_ptr()); + auto c_node = ov::as_type_ptr(input.get_node_shared_ptr()); PYTORCH_OP_CONVERSION_CHECK(c_node, "Translation for prim::ListConstruct support only constant inputs"); if (c_node->get_shape().size() == 0) { c_node = std::make_shared(c_node->get_element_type(), Shape{1}, c_node->get_data_ptr()); @@ -45,4 +45,4 @@ OutputVector translate_list_construct(const NodeContext& context) { } // namespace op } // namespace pytorch } // namespace frontend -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/frontends/pytorch/src/op/permute.cpp b/src/frontends/pytorch/src/op/permute.cpp new file mode 100644 index 00000000000000..46016ca8ca16a0 --- /dev/null +++ b/src/frontends/pytorch/src/op/permute.cpp @@ -0,0 +1,31 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/core/validation_util.hpp" +#include "openvino/frontend/pytorch/node_context.hpp" +#include "openvino/op/transpose.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace op { + +OutputVector translate_permute(const NodeContext& context) { + num_inputs_check(context, 2, 2); + auto data = context.get_input(0); + auto order = get_input_concat_if_list(context, 1); + auto rank = std::get<1>(get_shape_rank(context, data)); + auto rank_converted = context.mark_node(std::make_shared(rank, order)); + auto order_normalized = normalize_axis(context, order, rank_converted); + if (const auto order_const = ov::util::get_constant_from_source(order_normalized)) { + order_normalized = order_const; + } + return {context.mark_node(std::make_shared(data, order_normalized))}; +} + +} // namespace op +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/op/quantized_convnd.cpp b/src/frontends/pytorch/src/op/quantized_convnd.cpp index 523c2fe65b07ee..bbdbf0da4d7bba 100644 --- a/src/frontends/pytorch/src/op/quantized_convnd.cpp +++ b/src/frontends/pytorch/src/op/quantized_convnd.cpp @@ -21,8 +21,7 @@ using namespace ov::op; namespace { Output translate_quantized_convnd_base(const NodeContext& context) { auto input = context.get_input(0); - auto packed_params_node = - std::dynamic_pointer_cast(context.get_input(1).get_node_shared_ptr()); + auto packed_params_node = ov::as_type_ptr(context.get_input(1).get_node_shared_ptr()); PYTORCH_OP_CONVERSION_CHECK(packed_params_node, "Packed params input node type is required to be FrameworkNode."); const auto& attrs = packed_params_node->get_attrs(); PYTORCH_OP_CONVERSION_CHECK((attrs.find(PtFrameworkNode::op_type_key) != attrs.end()), @@ -36,13 +35,13 @@ Output translate_quantized_convnd_base(const NodeContext& context) { // Packed params: weight, bias, stride, padding, dilation, groups auto weight = packed_params[0].get_source_output(); auto bias = packed_params[1].get_source_output(); - auto strides = std::dynamic_pointer_cast(packed_params[2].get_source_output().get_node_shared_ptr()) + auto strides = ov::as_type_ptr(packed_params[2].get_source_output().get_node_shared_ptr()) ->cast_vector(); - auto pads = std::dynamic_pointer_cast(packed_params[3].get_source_output().get_node_shared_ptr()) + auto pads = ov::as_type_ptr(packed_params[3].get_source_output().get_node_shared_ptr()) ->cast_vector(); - auto dilations = std::dynamic_pointer_cast(packed_params[4].get_source_output().get_node_shared_ptr()) + auto dilations = ov::as_type_ptr(packed_params[4].get_source_output().get_node_shared_ptr()) ->cast_vector(); - int64_t groups = std::dynamic_pointer_cast(packed_params[5].get_source_output().get_node_shared_ptr()) + int64_t groups = ov::as_type_ptr(packed_params[5].get_source_output().get_node_shared_ptr()) ->cast_vector()[0]; auto pad_type = ov::op::PadType::EXPLICIT; diff --git a/src/frontends/pytorch/src/op/quantized_linear.cpp b/src/frontends/pytorch/src/op/quantized_linear.cpp index 609f33708f2c9c..3a3ac52d14b059 100644 --- a/src/frontends/pytorch/src/op/quantized_linear.cpp +++ b/src/frontends/pytorch/src/op/quantized_linear.cpp @@ -18,8 +18,7 @@ OutputVector translate_quantized_linear(const NodeContext& context) { // int Y_zero_point_i) -> Tensor Y" num_inputs_check(context, 4, 4); auto x = context.get_input(0); - auto packed_params_node = - std::dynamic_pointer_cast(context.get_input(1).get_node_shared_ptr()); + auto packed_params_node = ov::as_type_ptr(context.get_input(1).get_node_shared_ptr()); PYTORCH_OP_CONVERSION_CHECK(packed_params_node, "Packed params input node type is required to be FrameworkNode."); const auto& attrs = packed_params_node->get_attrs(); PYTORCH_OP_CONVERSION_CHECK((attrs.find(PtFrameworkNode::op_type_key) != attrs.end()), diff --git a/src/frontends/pytorch/src/op/rand.cpp b/src/frontends/pytorch/src/op/rand.cpp index 0779bf2bbcfaa8..cef77ee5811093 100644 --- a/src/frontends/pytorch/src/op/rand.cpp +++ b/src/frontends/pytorch/src/op/rand.cpp @@ -81,8 +81,7 @@ OutputVector translate_rand(const NodeContext& context) { dtype_id = 2; } if (!context.input_is_none(dtype_id)) { - if (std::dynamic_pointer_cast( - context.get_input_from_visible_context(dtype_id).get_node_shared_ptr())) { + if (ov::as_type_ptr(context.get_input_from_visible_context(dtype_id).get_node_shared_ptr())) { dtype = convert_dtype(context.const_input(dtype_id)); low = context.mark_node(std::make_shared(low, dtype)); high = context.mark_node(std::make_shared(high, dtype)); @@ -121,7 +120,7 @@ OutputVector translate_rand_like(const NodeContext& context) { bool dtype_applied = true; Output convert_like_out; if (!context.input_is_none(1)) { - if (std::dynamic_pointer_cast(context.get_input_from_visible_context(1).get_node_shared_ptr())) { + if (ov::as_type_ptr(context.get_input_from_visible_context(1).get_node_shared_ptr())) { dtype = convert_dtype(context.const_input(1)); low = context.mark_node(std::make_shared(low, dtype)); high = context.mark_node(std::make_shared(high, dtype)); @@ -177,8 +176,7 @@ OutputVector translate_randn(const NodeContext& context) { bool dtype_applied = true; Output convert_like_out; if (!context.input_is_none(dtype_id)) { - if (std::dynamic_pointer_cast( - context.get_input_from_visible_context(dtype_id).get_node_shared_ptr())) { + if (ov::as_type_ptr(context.get_input_from_visible_context(dtype_id).get_node_shared_ptr())) { dtype = convert_dtype(context.const_input(dtype_id)); } else if (const auto& fw_node = cast_fw_node(context.get_input(static_cast(dtype_id)).get_node_shared_ptr(), @@ -219,7 +217,7 @@ OutputVector translate_randn_like(const NodeContext& context) { bool dtype_applied = true; Output convert_like_out; if (!context.input_is_none(1)) { - if (std::dynamic_pointer_cast(context.get_input_from_visible_context(1).get_node_shared_ptr())) { + if (ov::as_type_ptr(context.get_input_from_visible_context(1).get_node_shared_ptr())) { dtype = convert_dtype(context.const_input(1)); } else if (const auto& fw_node = cast_fw_node(context.get_input(static_cast(1)).get_node_shared_ptr(), "prim::dtype")) { @@ -250,7 +248,7 @@ OutputVector translate_randint(const NodeContext& context) { bool dtype_applied = true; Output convert_like_out; if (!context.input_is_none(3)) { - if (std::dynamic_pointer_cast(context.get_input_from_visible_context(3).get_node_shared_ptr())) { + if (ov::as_type_ptr(context.get_input_from_visible_context(3).get_node_shared_ptr())) { dtype = convert_dtype(context.const_input(3)); } else if (const auto& fw_node = cast_fw_node(context.get_input(static_cast(3)).get_node_shared_ptr(), "prim::dtype")) { @@ -325,8 +323,7 @@ OutputVector translate_normal(const NodeContext& context) { Output convert_like_out; bool dtype_applied = true; if (!context.input_is_none(4)) { - if (std::dynamic_pointer_cast( - context.get_input_from_visible_context(3).get_node_shared_ptr())) { + if (ov::as_type_ptr(context.get_input_from_visible_context(3).get_node_shared_ptr())) { dtype = convert_dtype(context.const_input(4)); } else if (const auto& fw_node = cast_fw_node(context.get_input(3).get_node_shared_ptr(), "prim::dtype")) { convert_like_out = fw_node->input_value(0); diff --git a/src/frontends/pytorch/src/op/repeat_interleave.cpp b/src/frontends/pytorch/src/op/repeat_interleave.cpp index 79606417173a1d..b7bcb58ea0378a 100644 --- a/src/frontends/pytorch/src/op/repeat_interleave.cpp +++ b/src/frontends/pytorch/src/op/repeat_interleave.cpp @@ -48,7 +48,7 @@ OutputVector translate_repeat_interleave(const NodeContext& context) { std::shared_ptr result; auto repeats_ext_node = context.get_input_from_visible_context(1).get_node_shared_ptr(); - auto repeats_fw_node = std::dynamic_pointer_cast(repeats_ext_node); + auto repeats_fw_node = ov::as_type_ptr(repeats_ext_node); if (repeats_fw_node && repeats_fw_node->cast_vector().size() > 1) { // repeats is Constant with more then 1 element auto repeats = repeats_fw_node->cast_vector(); diff --git a/src/frontends/pytorch/src/op/to.cpp b/src/frontends/pytorch/src/op/to.cpp index 796dde380f861b..9d6525253d8c7a 100644 --- a/src/frontends/pytorch/src/op/to.cpp +++ b/src/frontends/pytorch/src/op/to.cpp @@ -23,7 +23,7 @@ OutputVector translate_to(const NodeContext& context) { // -> (Tensor(a)) dtype_idx = 1; auto node = context.get_input_from_visible_context(dtype_idx).get_node_shared_ptr(); - auto fw_node = std::dynamic_pointer_cast(node); + auto fw_node = ov::as_type_ptr(node); if (fw_node && fw_node->get_op_type() == "prim::device") { // Cast only to device without changing dtype. Return input node unchanged. return {context.get_input(0)}; @@ -66,12 +66,12 @@ OutputVector translate_to(const NodeContext& context) { // memory_format sets the desired memory format of returned Tensor. // memory format is ignored since it changes strides of a tensor. In openvino tensors are always contigious auto dtype_ext_node = context.get_input_from_visible_context(dtype_idx).get_node_shared_ptr(); - auto dtype_fw_node = std::dynamic_pointer_cast(dtype_ext_node); + auto dtype_fw_node = ov::as_type_ptr(dtype_ext_node); Output cast; if (dtype_fw_node && dtype_fw_node->get_op_type() == "prim::dtype") { auto type_input = dtype_fw_node->input_value(0); cast = context.mark_node(std::make_shared(context.get_input(0), type_input)); - } else if (const auto dtype_const = std::dynamic_pointer_cast(dtype_ext_node)) { + } else if (const auto dtype_const = ov::as_type_ptr(dtype_ext_node)) { auto pt_type = dtype_const->cast_vector()[0]; auto dtype = convert_dtype(pt_type); cast = context.mark_node(std::make_shared(context.get_input(0), dtype)); diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp index b30e2a5ae6c5dd..b8ad83c1106510 100644 --- a/src/frontends/pytorch/src/op_table.cpp +++ b/src/frontends/pytorch/src/op_table.cpp @@ -173,6 +173,7 @@ OP_CONVERTER(translate_outer); OP_CONVERTER(translate_pack_padded_sequence); OP_CONVERTER(translate_pad); OP_CONVERTER(translate_pad_packed_sequence); +OP_CONVERTER(translate_permute); OP_CONVERTER(translate_pairwise_distance); OP_CONVERTER(translate_pixel_shuffle); OP_CONVERTER(translate_pixel_unshuffle); @@ -589,7 +590,7 @@ const std::unordered_map get_supported_ops_ts() { {"aten::outer", op::translate_outer}, {"aten::pad", op::translate_pad}, {"aten::pairwise_distance", op::translate_pairwise_distance}, - {"aten::permute", op::translate_1to1_match_2_inputs}, + {"aten::permute", op::translate_permute}, {"aten::pixel_shuffle", op::translate_pixel_shuffle}, {"aten::pixel_unshuffle", op::translate_pixel_unshuffle}, {"aten::prelu", op::translate_1to1_match_2_inputs}, @@ -920,7 +921,7 @@ const std::unordered_map get_supported_ops_fx() { {"aten.ones.default", op::translate_ones_fx}, {"aten.ones.names", op::translate_ones_fx}, {"aten.ones_like.default", op::translate_ones_like_fx}, - {"aten.permute.default", op::translate_1to1_match_2_inputs}, + {"aten.permute.default", op::translate_permute}, {"aten.permute_copy.default", op::translate_1to1_match_2_inputs}, {"aten.pow.Scalar", op::translate_pow}, {"aten.pow.Tensor_Scalar", op::translate_pow}, diff --git a/src/frontends/pytorch/src/transforms/aten_cat_replacer.cpp b/src/frontends/pytorch/src/transforms/aten_cat_replacer.cpp index a627db1c1187e3..692cac207034f0 100644 --- a/src/frontends/pytorch/src/transforms/aten_cat_replacer.cpp +++ b/src/frontends/pytorch/src/transforms/aten_cat_replacer.cpp @@ -64,7 +64,7 @@ AtenCatToConcat::AtenCatToConcat() { } std::shared_ptr input_node = cat->get_input_node_shared_ptr(0); - if (auto loop = std::dynamic_pointer_cast(input_node)) { + if (auto loop = ov::as_type_ptr(input_node)) { // case when concatenation is done inside the Loop auto body = loop->get_function(); auto output_index = cat->input(0).get_source_output().get_index(); @@ -84,7 +84,7 @@ AtenCatToConcat::AtenCatToConcat() { "::cat unsupported case: aten::append wasn't found inside prim::Loop body."); return false; } - auto param = std::dynamic_pointer_cast(append->get_input_node_shared_ptr(0)); + auto param = ov::as_type_ptr(append->get_input_node_shared_ptr(0)); if (!param) { add_exception_to_fw_node( cat, diff --git a/src/frontends/pytorch/src/transforms/aten_stack_list_construct_replacer.cpp b/src/frontends/pytorch/src/transforms/aten_stack_list_construct_replacer.cpp index bbaa1d768bc971..1c9aa1e9911077 100644 --- a/src/frontends/pytorch/src/transforms/aten_stack_list_construct_replacer.cpp +++ b/src/frontends/pytorch/src/transforms/aten_stack_list_construct_replacer.cpp @@ -37,7 +37,7 @@ AtenStackListConstructReplacer::AtenStackListConstructReplacer() { const auto& pattern_map = m.get_pattern_value_map(); const auto& input_node = pattern_map.at(list_construct).get_node_shared_ptr(); auto axis_node = pattern_map.at(axis).get_node_shared_ptr(); - auto axis_const = std::dynamic_pointer_cast(axis_node); + auto axis_const = ov::as_type_ptr(axis_node); auto axis = axis_const->cast_vector(); if (axis.size() != 1) { add_exception_to_fw_node(stack, "aten::stack has multiple axes, only one is supported."); diff --git a/src/frontends/pytorch/src/transforms/dict_resolver.cpp b/src/frontends/pytorch/src/transforms/dict_resolver.cpp index d301e6b5553b14..25d5d3ba603cc5 100644 --- a/src/frontends/pytorch/src/transforms/dict_resolver.cpp +++ b/src/frontends/pytorch/src/transforms/dict_resolver.cpp @@ -31,8 +31,8 @@ bool DictParameterResolver::run_on_model(const std::shared_ptr& model) { for (const auto inp : targets) { const auto getitem_node = cast_fw_node(inp.get_node()->shared_from_this(), "aten::__getitem__"); if (getitem_node) { - const auto index_node = std::dynamic_pointer_cast( - getitem_node->get_input_node_shared_ptr(1)); + const auto index_node = + ov::as_type_ptr(getitem_node->get_input_node_shared_ptr(1)); if (!index_node) { at_least_one_unused = true; continue; @@ -85,7 +85,7 @@ bool DictResultResolver::run_on_model(const std::shared_ptr& model) { for (size_t i = 0; i < inputs.size(); i += 2) { auto new_output = inputs.at(i + 1); const auto& name_node = inputs.at(i); - auto fw_node = std::dynamic_pointer_cast(name_node.get_node_shared_ptr()); + auto fw_node = ov::as_type_ptr(name_node.get_node_shared_ptr()); if (!fw_node) { add_exception_to_fw_node( dict_construct_node, diff --git a/src/frontends/pytorch/src/transforms/irfftn_complex_replacer.cpp b/src/frontends/pytorch/src/transforms/irfftn_complex_replacer.cpp index 99aa253a9478e6..cb80987e4511ae 100644 --- a/src/frontends/pytorch/src/transforms/irfftn_complex_replacer.cpp +++ b/src/frontends/pytorch/src/transforms/irfftn_complex_replacer.cpp @@ -116,8 +116,8 @@ IRFFTNComplexReplacer::IRFFTNComplexReplacer() { // Handle norm parameter indicating normalization mode to use. Defaults to "backward". std::string norm; - if (const auto& fw_node_mode = std::dynamic_pointer_cast( - irfftn_op->input_value(3).get_node_shared_ptr())) { + if (const auto& fw_node_mode = + ov::as_type_ptr(irfftn_op->input_value(3).get_node_shared_ptr())) { const auto& attrs = fw_node_mode->get_attrs(); if (attrs.find("string_value") != attrs.end()) { norm = attrs.at("string_value"); diff --git a/src/frontends/pytorch/src/transforms/listconstruct_replacer.cpp b/src/frontends/pytorch/src/transforms/listconstruct_replacer.cpp index 9be1bbeeb16fad..c594a54ca80669 100644 --- a/src/frontends/pytorch/src/transforms/listconstruct_replacer.cpp +++ b/src/frontends/pytorch/src/transforms/listconstruct_replacer.cpp @@ -6,8 +6,6 @@ #include "openvino/core/rt_info.hpp" #include "openvino/core/validation_util.hpp" -#include "openvino/op/abs.hpp" -#include "openvino/op/adaptive_avg_pool.hpp" #include "openvino/op/broadcast.hpp" #include "openvino/op/concat.hpp" #include "openvino/op/constant.hpp" @@ -17,11 +15,9 @@ #include "openvino/op/multiply.hpp" #include "openvino/op/random_uniform.hpp" #include "openvino/op/reshape.hpp" -#include "openvino/op/roll.hpp" #include "openvino/op/select.hpp" #include "openvino/op/shape_of.hpp" #include "openvino/op/tile.hpp" -#include "openvino/op/transpose.hpp" #include "openvino/op/util/framework_node.hpp" #include "openvino/op/variadic_split.hpp" #include "openvino/pass/pattern/matcher.hpp" @@ -47,8 +43,6 @@ ListConstructReplacer::ListConstructReplacer() { const auto& select_op = pattern::wrap_type({pattern::any_input(), pattern::any_input(), list}); // replace list construct for aten::repeat(tensor, prim::ListConstruct(shapes))) const auto& tile_op = pattern::wrap_type({pattern::any_input(), list}); - // replace aten::permute(tensor, prim::ListConstruct) - const auto& transpose_op = pattern::wrap_type({pattern::any_input(), list}); // aten::split_with_sizes case const auto& vsplit_op = pattern::wrap_type({pattern::any_input(), pattern::any_input(), list}); // aten::upsample... case inside the body when body was removed @@ -58,15 +52,8 @@ ListConstructReplacer::ListConstructReplacer() { pattern::wrap_type({pattern::any_input(), interpolate_mul_op, pattern::any_input()}); // aten::randint case const auto& rand_op = pattern::wrap_type({list, pattern::any_input(), pattern::any_input()}); - const auto& lc_pattern = std::make_shared(OutputVector{broadcast_op, - shape_of_op, - equal_op, - select_op, - tile_op, - transpose_op, - vsplit_op, - interpolate_op, - rand_op}); + const auto& lc_pattern = std::make_shared( + OutputVector{broadcast_op, shape_of_op, equal_op, select_op, tile_op, vsplit_op, interpolate_op, rand_op}); ov::matcher_pass_callback callback = [=](pattern::Matcher& m) { auto& pattern_map = m.get_pattern_value_map(); diff --git a/src/frontends/pytorch/src/transforms/prim_list_unpack_replacer.cpp b/src/frontends/pytorch/src/transforms/prim_list_unpack_replacer.cpp index 35d5df54fe4d71..2240eec03c1251 100644 --- a/src/frontends/pytorch/src/transforms/prim_list_unpack_replacer.cpp +++ b/src/frontends/pytorch/src/transforms/prim_list_unpack_replacer.cpp @@ -305,7 +305,7 @@ PrimListUnpackReplacer::PrimListUnpackReplacer() { copy_runtime_info_and_name(list_unpack, rg.get(), {input_node, meshgrid_input_node}); replace_node(list_unpack, outputs); return true; - } else if (auto shape_of = std::dynamic_pointer_cast(input_node)) { + } else if (auto shape_of = ov::as_type_ptr(input_node)) { // case aten::size as input // Number of ListUnpack outputs should be equal to rank of input shape. auto axis_0 = v0::Constant::create(element::i32, Shape{}, {0}); @@ -321,7 +321,7 @@ PrimListUnpackReplacer::PrimListUnpackReplacer() { replace_node(list_unpack, res); return true; - } else if (auto slice = std::dynamic_pointer_cast(input_node)) { + } else if (auto slice = ov::as_type_ptr(input_node)) { // case aten::slice as input // Number of ListUnpack outputs should be equal to rank of input shape. auto axis_0 = v0::Constant::create(element::i32, Shape{}, {0}); diff --git a/src/frontends/pytorch/src/transforms/remove_packing_ops.cpp b/src/frontends/pytorch/src/transforms/remove_packing_ops.cpp index 125ddc29f16824..463e6ec7eb8895 100644 --- a/src/frontends/pytorch/src/transforms/remove_packing_ops.cpp +++ b/src/frontends/pytorch/src/transforms/remove_packing_ops.cpp @@ -116,7 +116,7 @@ RemovePackingOps::RemovePackingOps() { if (!pack_node) return false; if (as_type_ptr(pack_node)) - pack_node = std::dynamic_pointer_cast(pack_node->input_value(0).get_node_shared_ptr()); + pack_node = ov::as_type_ptr(pack_node->input_value(0).get_node_shared_ptr()); if (!pack_node) return false; diff --git a/src/frontends/pytorch/src/transforms/rfftn_complex_replacer.cpp b/src/frontends/pytorch/src/transforms/rfftn_complex_replacer.cpp index f5b8f8a5f021a4..b90e3121930c71 100644 --- a/src/frontends/pytorch/src/transforms/rfftn_complex_replacer.cpp +++ b/src/frontends/pytorch/src/transforms/rfftn_complex_replacer.cpp @@ -90,8 +90,8 @@ RFFTNComplexReplacer::RFFTNComplexReplacer() { // Handle norm parameter indicating normalization mode to use. Defaults to "backward". std::string norm; - if (const auto& fw_node_mode = std::dynamic_pointer_cast( - rfftn_op->input_value(3).get_node_shared_ptr())) { + if (const auto& fw_node_mode = + ov::as_type_ptr(rfftn_op->input_value(3).get_node_shared_ptr())) { const auto& attrs = fw_node_mode->get_attrs(); if (attrs.find("string_value") != attrs.end()) { norm = attrs.at("string_value"); diff --git a/src/frontends/pytorch/src/transforms/softmax_reshape_elimination.hpp b/src/frontends/pytorch/src/transforms/softmax_reshape_elimination.hpp index 4157364046cf61..40b35954e58eb7 100644 --- a/src/frontends/pytorch/src/transforms/softmax_reshape_elimination.hpp +++ b/src/frontends/pytorch/src/transforms/softmax_reshape_elimination.hpp @@ -19,6 +19,7 @@ namespace pass { */ class SoftmaxReshapeElimination : public ov::pass::MatcherPass { public: + OPENVINO_MATCHER_PASS_RTTI("ov::frontend::pytorch::pass::SoftmaxReshapeElimination"); SoftmaxReshapeElimination(); }; diff --git a/src/frontends/pytorch/src/transforms/string_equality_replacer.cpp b/src/frontends/pytorch/src/transforms/string_equality_replacer.cpp index f9a741dedd3996..d378b2e9a27821 100644 --- a/src/frontends/pytorch/src/transforms/string_equality_replacer.cpp +++ b/src/frontends/pytorch/src/transforms/string_equality_replacer.cpp @@ -34,8 +34,7 @@ StringEqualityReplacer::StringEqualityReplacer() { ov::matcher_pass_callback callback = [=](pattern::Matcher& m) { auto& pattern_map = m.get_pattern_value_map(); - auto lhs_node = - std::dynamic_pointer_cast(pattern_map.at(framework_node_lhs).get_node_shared_ptr()); + auto lhs_node = ov::as_type_ptr(pattern_map.at(framework_node_lhs).get_node_shared_ptr()); if (!lhs_node) { return false; } @@ -45,8 +44,7 @@ StringEqualityReplacer::StringEqualityReplacer() { } std::string lhs = lhs_attrs.at("string_value"); - auto rhs_node = - std::dynamic_pointer_cast(pattern_map.at(framework_node_rhs).get_node_shared_ptr()); + auto rhs_node = ov::as_type_ptr(pattern_map.at(framework_node_rhs).get_node_shared_ptr()); if (!rhs_node) { return false; } @@ -57,14 +55,14 @@ StringEqualityReplacer::StringEqualityReplacer() { std::string rhs = rhs_attrs.at("string_value"); auto equal_node = pattern_map.at(equal_op).get_node_shared_ptr(); - if (auto equal = std::dynamic_pointer_cast(equal_node)) { + if (auto equal = ov::as_type_ptr(equal_node)) { auto const_result = v0::Constant::create(element::boolean, Shape{}, {lhs == rhs}); copy_runtime_info_and_name(equal_node, {const_result}); replace_node(equal_node, const_result); return true; }; auto not_equal_node = pattern_map.at(not_equal_op).get_node_shared_ptr(); - if (auto equal = std::dynamic_pointer_cast(not_equal_node)) { + if (auto equal = ov::as_type_ptr(not_equal_node)) { auto const_result = v0::Constant::create(element::boolean, Shape{}, {lhs != rhs}); copy_runtime_info_and_name(equal_node, {const_result}); replace_node(equal_node, const_result); diff --git a/src/frontends/pytorch/src/transforms/torchfx_gptq_pattern_replacer.cpp b/src/frontends/pytorch/src/transforms/torchfx_gptq_pattern_replacer.cpp index 730da8f4c20a69..a9101cbd080890 100644 --- a/src/frontends/pytorch/src/transforms/torchfx_gptq_pattern_replacer.cpp +++ b/src/frontends/pytorch/src/transforms/torchfx_gptq_pattern_replacer.cpp @@ -68,18 +68,14 @@ GPTQDecompressionReplacer::GPTQDecompressionReplacer() { } const auto& pattern_map = m.get_pattern_value_map(); auto unsqueeze_1_node = pattern_map.at(unsqueeze_1).get_node_shared_ptr(); - auto unsqueeze_1_in0_const = - std::dynamic_pointer_cast(unsqueeze_1_node->get_input_node_shared_ptr(0)); - auto unsqueeze_1_in1_const = - std::dynamic_pointer_cast(unsqueeze_1_node->get_input_node_shared_ptr(1)); + auto unsqueeze_1_in0_const = ov::as_type_ptr(unsqueeze_1_node->get_input_node_shared_ptr(0)); + auto unsqueeze_1_in1_const = ov::as_type_ptr(unsqueeze_1_node->get_input_node_shared_ptr(1)); auto abs_node = pattern_map.at(abs).get_node_shared_ptr(); - auto abs_in_const = std::dynamic_pointer_cast(abs_node->get_input_node_shared_ptr(0)); + auto abs_in_const = ov::as_type_ptr(abs_node->get_input_node_shared_ptr(0)); auto broadcast_node = pattern_map.at(broadcast).get_node_shared_ptr(); auto unsqueeze_2_node = pattern_map.at(unsqueeze_2).get_node_shared_ptr(); - auto unsqueeze_2_in0_const = - std::dynamic_pointer_cast(unsqueeze_2_node->get_input_node_shared_ptr(0)); - auto unsqueeze_2_in1_const = - std::dynamic_pointer_cast(unsqueeze_2_node->get_input_node_shared_ptr(1)); + auto unsqueeze_2_in0_const = ov::as_type_ptr(unsqueeze_2_node->get_input_node_shared_ptr(0)); + auto unsqueeze_2_in1_const = ov::as_type_ptr(unsqueeze_2_node->get_input_node_shared_ptr(1)); OutputVector outputs_1(unsqueeze_1_node->get_output_size()); OutputVector unsqueeze_1_inputs(2); @@ -110,9 +106,9 @@ GPTQDecompressionReplacer::GPTQDecompressionReplacer() { return false; } const int32_t* rs_in0 = - std::dynamic_pointer_cast(outputs_3[0].get_node_shared_ptr())->get_data_ptr(); + ov::as_type_ptr(outputs_3[0].get_node_shared_ptr())->get_data_ptr(); const int32_t* rs_in1 = - std::dynamic_pointer_cast(outputs_4[0].get_node_shared_ptr())->get_data_ptr(); + ov::as_type_ptr(outputs_4[0].get_node_shared_ptr())->get_data_ptr(); auto shifted_const = std::make_shared(element::i32, outputs_3[0].get_shape()); auto dst = const_cast(reinterpret_cast(shifted_const->get_data_ptr())); if (!dst) @@ -156,8 +152,7 @@ GPTQDecompressionReplacer::GPTQDecompressionReplacer() { } else { auto convert_3_node = pattern_map.at(convert_3).get_node_shared_ptr(); auto convert_4_node = pattern_map.at(convert_4).get_node_shared_ptr(); - auto convert_4_in_const = - std::dynamic_pointer_cast(convert_4_node->get_input_node_shared_ptr(0)); + auto convert_4_in_const = ov::as_type_ptr(convert_4_node->get_input_node_shared_ptr(0)); auto add_node = pattern_map.at(add).get_node_shared_ptr(); OutputVector outputs_5(convert_3_node->get_output_size()); if (!convert_3_node->constant_fold(outputs_5, shifted_const->outputs())) { @@ -177,7 +172,7 @@ GPTQDecompressionReplacer::GPTQDecompressionReplacer() { } auto convert_2_node = pattern_map.at(convert_2).get_node_shared_ptr(); - auto convert_2_in_const = std::dynamic_pointer_cast(convert_2_node->get_input_node_shared_ptr(0)); + auto convert_2_in_const = ov::as_type_ptr(convert_2_node->get_input_node_shared_ptr(0)); OutputVector outputs_8(convert_2_node->get_output_size()); if (!convert_2_node->constant_fold(outputs_8, convert_2_in_const->outputs())) { @@ -187,9 +182,9 @@ GPTQDecompressionReplacer::GPTQDecompressionReplacer() { OutputVector outputs_9(bitwise_and->get_output_size()); const int8_t* and_in0 = - std::dynamic_pointer_cast(outputs_7[0].get_node_shared_ptr())->get_data_ptr(); + ov::as_type_ptr(outputs_7[0].get_node_shared_ptr())->get_data_ptr(); const int8_t* and_in1 = - std::dynamic_pointer_cast(outputs_8[0].get_node_shared_ptr())->get_data_ptr(); + ov::as_type_ptr(outputs_8[0].get_node_shared_ptr())->get_data_ptr(); auto masked_const = std::make_shared(element::i8, outputs_7[0].get_shape()); auto masked_dst = const_cast(reinterpret_cast(masked_const->get_data_ptr())); if (!masked_dst) @@ -258,15 +253,14 @@ GPTQMultPatternReplacer::GPTQMultPatternReplacer() { auto reshape3_node = pattern_map.at(reshape_3).get_node_shared_ptr(); // auto mult_node = pattern_map.at(mult).get_node_shared_ptr(); - auto add_input0_const = std::dynamic_pointer_cast(convert_1_node->get_input_node_shared_ptr(0)); + auto add_input0_const = ov::as_type_ptr(convert_1_node->get_input_node_shared_ptr(0)); if (add_input0_const->get_element_type() != element::u4) { return false; } auto add_in0_ptr = add_input0_const->get_data_ptr(); uint32_t add_val = 0; if (convert_2_node) { - auto convert_2_input_const = - std::dynamic_pointer_cast(convert_2_node->get_input_node_shared_ptr(0)); + auto convert_2_input_const = ov::as_type_ptr(convert_2_node->get_input_node_shared_ptr(0)); auto add_in1_ptr = convert_2_input_const->get_data_ptr(); if (!add_in1_ptr) return false; @@ -289,7 +283,7 @@ GPTQMultPatternReplacer::GPTQMultPatternReplacer() { } const auto& static_shape_2 = reshape2_node->get_shape(); - auto reshape2_in0_const = std::dynamic_pointer_cast(convert_4_node->get_input_node_shared_ptr(0)); + auto reshape2_in0_const = ov::as_type_ptr(convert_4_node->get_input_node_shared_ptr(0)); auto sub_replace_const = std::make_shared(reshape2_in0_const->get_element_type(), static_shape_2, reshape2_in0_const->get_data_ptr()); @@ -297,7 +291,7 @@ GPTQMultPatternReplacer::GPTQMultPatternReplacer() { auto new_sub_node = std::make_shared(new_convert_node, add_replace_const); const auto& static_shape_3 = reshape3_node->get_shape(); - auto reshape3_in0_const = std::dynamic_pointer_cast(reshape3_node->get_input_node_shared_ptr(0)); + auto reshape3_in0_const = ov::as_type_ptr(reshape3_node->get_input_node_shared_ptr(0)); auto mult_scale_const = std::make_shared(reshape3_in0_const->get_element_type(), static_shape_3, reshape3_in0_const->get_data_ptr()); diff --git a/src/frontends/pytorch/src/transforms/tuple_unpack_replacer.cpp b/src/frontends/pytorch/src/transforms/tuple_unpack_replacer.cpp index dd9bef56384051..e6993dfb55077b 100644 --- a/src/frontends/pytorch/src/transforms/tuple_unpack_replacer.cpp +++ b/src/frontends/pytorch/src/transforms/tuple_unpack_replacer.cpp @@ -52,7 +52,7 @@ bool TupleUnpackInBodyReplacer::run_on_model(const std::shared_ptr& model if (if_op) { for (size_t i = 1; i < if_op->get_input_size(); i++) { auto input = if_op->input_value(i); - auto tuple_construct = std::dynamic_pointer_cast( + auto tuple_construct = ov::as_type_ptr( cast_fw_node(input.get_node_shared_ptr(), "prim::TupleConstruct")); if (!tuple_construct) { continue; diff --git a/src/frontends/pytorch/src/transforms/u4_block_repack.cpp b/src/frontends/pytorch/src/transforms/u4_block_repack.cpp index 3e6e3eac0b5571..4c8bdb04a78039 100644 --- a/src/frontends/pytorch/src/transforms/u4_block_repack.cpp +++ b/src/frontends/pytorch/src/transforms/u4_block_repack.cpp @@ -49,8 +49,7 @@ U4BlockRepack::U4BlockRepack(bool is_symmetrical) { std::make_shared(m_reshape2, "ov::frontend::pytorch::pass::U4BlockRepack"), [=](Matcher& m) { auto& pattern_to_output = m.get_pattern_value_map(); - auto constant = - std::dynamic_pointer_cast(pattern_to_output[m_constant].get_node_shared_ptr()); + auto constant = ov::as_type_ptr(pattern_to_output[m_constant].get_node_shared_ptr()); if (!constant) return false; auto reshape1 = pattern_to_output[m_reshape1].get_node_shared_ptr(); @@ -89,13 +88,13 @@ U4BlockRepack::U4BlockRepack(bool is_symmetrical) { if (reshape_targets.size() != 1) return false; auto convert = reshape_targets.begin()->get_node()->shared_from_this(); - if (!std::dynamic_pointer_cast(convert)) + if (!ov::as_type_ptr(convert)) return false; auto convert_targets = convert->output(0).get_target_inputs(); if (convert_targets.size() != 1) return false; auto subtract = convert_targets.begin()->get_node()->shared_from_this(); - if (!std::dynamic_pointer_cast(subtract)) + if (!ov::as_type_ptr(subtract)) return false; pattern_root = subtract; copy_from.push_back(std::move(convert)); @@ -145,8 +144,7 @@ U4ConvertReshape::U4ConvertReshape() { std::make_shared(m_reshape, "ov::frontend::pytorch::pass::U4ConvertReshape"), [=](Matcher& m) { auto& pattern_to_output = m.get_pattern_value_map(); - auto u4_const = - std::dynamic_pointer_cast(pattern_to_output[m_constant].get_node_shared_ptr()); + auto u4_const = ov::as_type_ptr(pattern_to_output[m_constant].get_node_shared_ptr()); if (!u4_const) return false; @@ -158,15 +156,15 @@ U4ConvertReshape::U4ConvertReshape() { std::shared_ptr new_const; if (pattern_to_output.count(m_constant_8)) { - auto constant_8 = std::dynamic_pointer_cast( - pattern_to_output[m_constant_8].get_node_shared_ptr()); + auto constant_8 = + ov::as_type_ptr(pattern_to_output[m_constant_8].get_node_shared_ptr()); if (ov::shape_size(constant_8->get_output_shape(0)) != 1 || constant_8->get_output_element_type(0).is_real() || constant_8->cast_vector()[0] != 8) return false; if (pattern_to_output.count(m_constant_1)) { - auto constant_1 = std::dynamic_pointer_cast( - pattern_to_output[m_constant_1].get_node_shared_ptr()); + auto constant_1 = + ov::as_type_ptr(pattern_to_output[m_constant_1].get_node_shared_ptr()); if (ov::shape_size(constant_1->get_output_shape(0)) != 1 || constant_1->get_output_element_type(0).is_real() || constant_1->cast_vector()[0] != 1) return false; diff --git a/src/frontends/pytorch/src/utils.cpp b/src/frontends/pytorch/src/utils.cpp index 171445b959eeaa..da0b5c5cd24d61 100644 --- a/src/frontends/pytorch/src/utils.cpp +++ b/src/frontends/pytorch/src/utils.cpp @@ -200,8 +200,7 @@ element::Type convert_dtype(int64_t pt_type) { }; Output apply_dtype(const NodeContext& context, size_t dtype_port, const Output& input_tensor) { - if (std::dynamic_pointer_cast( - context.get_input_from_visible_context(dtype_port).get_node_shared_ptr())) { + if (ov::as_type_ptr(context.get_input_from_visible_context(dtype_port).get_node_shared_ptr())) { auto dtype = convert_dtype(context.const_input(dtype_port)); return context.mark_node(std::make_shared(input_tensor, dtype)); } else if (const auto& fw_node = @@ -373,7 +372,7 @@ OutputVector make_framework_node(const NodeContext& context, const std::string& } std::shared_ptr cast_fw_node(std::shared_ptr node, const std::string& type) { - auto fw_node = std::dynamic_pointer_cast(node); + auto fw_node = ov::as_type_ptr(node); if (!fw_node) { return nullptr; } @@ -386,7 +385,7 @@ std::shared_ptr cast_fw_node(std::shared_ptr std::shared_ptr cast_fw_node(std::shared_ptr node, std::initializer_list types) { - auto fw_node = std::dynamic_pointer_cast(node); + auto fw_node = ov::as_type_ptr(node); if (!fw_node) { return nullptr; } @@ -410,7 +409,7 @@ std::shared_ptr make_list_construct(const ov::OutputVector& inputs) { } bool is_none_node(const Output& node) { - if (const auto& fw_node_inp = std::dynamic_pointer_cast(node.get_node_shared_ptr())) { + if (const auto& fw_node_inp = ov::as_type_ptr(node.get_node_shared_ptr())) { const auto& attrs = fw_node_inp->get_attrs(); if (attrs.find("none_value") != attrs.end()) { return true; @@ -523,7 +522,7 @@ Output get_input_as_i32(const NodeContext& context, size_t idx) { Output get_input_concat_if_list(const NodeContext& context, size_t idx) { auto x = context.get_input(static_cast(idx)); if (context.get_input_type(idx).is() && - std::dynamic_pointer_cast(x.get_node_shared_ptr())) { + ov::as_type_ptr(x.get_node_shared_ptr())) { auto elems = get_list_as_outputs(x, true); if (elems.size() == 0) // Can we figure real type for empty list? @@ -562,7 +561,7 @@ std::deque> get_list_as_outputs(const Output& start, bool uns auto current_output = start; auto zero = v0::Constant::create(element::i32, Shape{}, {0}); while (const auto& input_fw_node = - std::dynamic_pointer_cast(current_output.get_node_shared_ptr())) { + ov::as_type_ptr(current_output.get_node_shared_ptr())) { const auto& attrs = input_fw_node->get_attrs(); if (attrs.find(PtFrameworkNode::op_type_key) == attrs.end()) { break; diff --git a/src/frontends/pytorch/src/utils_quantize.cpp b/src/frontends/pytorch/src/utils_quantize.cpp index 1e47573f7e9c36..76d5dc36a77af5 100644 --- a/src/frontends/pytorch/src/utils_quantize.cpp +++ b/src/frontends/pytorch/src/utils_quantize.cpp @@ -212,7 +212,7 @@ Output quantize_fx(const NodeContext& context, } std::shared_ptr cast_quantized_fw_node(std::shared_ptr node) { - auto quant_node = std::dynamic_pointer_cast(node); + auto quant_node = ov::as_type_ptr(node); if (!quant_node) { return nullptr; } @@ -232,7 +232,7 @@ std::shared_ptr u4_compression_stack(const OutputVector& list_elems, int64 auto bitwise_and_candidate = list_elems[0].get_node_shared_ptr(); std::shared_ptr bitwise_and = cast_fw_node(bitwise_and_candidate, "aten::bitwise_and"); if (!bitwise_and) { - bitwise_and = std::dynamic_pointer_cast(bitwise_and_candidate); + bitwise_and = ov::as_type_ptr(bitwise_and_candidate); if (!bitwise_and) return nullptr; } @@ -242,9 +242,8 @@ std::shared_ptr u4_compression_stack(const OutputVector& list_elems, int64 if (!bitwise_shift) return nullptr; - auto weights_u8 = std::dynamic_pointer_cast(bitwise_and->get_input_node_shared_ptr(0)); - auto weights_u8_bitwise_shift = - std::dynamic_pointer_cast(bitwise_shift->get_input_node_shared_ptr(0)); + auto weights_u8 = ov::as_type_ptr(bitwise_and->get_input_node_shared_ptr(0)); + auto weights_u8_bitwise_shift = ov::as_type_ptr(bitwise_shift->get_input_node_shared_ptr(0)); if (weights_u8->get_data_ptr() != weights_u8_bitwise_shift->get_data_ptr()) return nullptr; diff --git a/src/frontends/tensorflow/docs/supported_ops.md b/src/frontends/tensorflow/docs/supported_ops.md index 88a8e58c3cfd31..bd6e03cfdab5d9 100644 --- a/src/frontends/tensorflow/docs/supported_ops.md +++ b/src/frontends/tensorflow/docs/supported_ops.md @@ -1314,7 +1314,7 @@ A "supported operation" is one that TensorFlow Frontend can convert to the OpenV | TensorListSetItem | YES | | | TensorListSplit | NO | | | TensorListStack | YES | | -| TensorScatterAdd | NO | | +| TensorScatterAdd | YES | | | TensorScatterMax | NO | | | TensorScatterMin | NO | | | TensorScatterSub | NO | | diff --git a/src/frontends/tensorflow/src/frontend.cpp b/src/frontends/tensorflow/src/frontend.cpp index b276d2b5a4ed93..af609088679e14 100644 --- a/src/frontends/tensorflow/src/frontend.cpp +++ b/src/frontends/tensorflow/src/frontend.cpp @@ -66,7 +66,7 @@ void get_unsupported_operations_and_failures(const std::shared_ptr& model std::set& unsupported_operations, std::unordered_map& failures) { for (const auto& node : model->get_ordered_ops()) { - if (const auto& internal_op = std::dynamic_pointer_cast(node)) { + if (const auto& internal_op = ov::as_type_ptr(node)) { // handle internal operations separately // which can have elaborated reason of unconverted operation // like Const of string type @@ -546,7 +546,7 @@ std::shared_ptr FrontEnd::decode(const ov::frontend::InputModel::Ptr& void FrontEnd::convert(const std::shared_ptr& partiallyConverted) const { for (const auto& node : partiallyConverted->get_ordered_ops()) { if (ov::is_type(node)) { - translate_framework_node(std::dynamic_pointer_cast(node), m_op_translators); + translate_framework_node(ov::as_type_ptr(node), m_op_translators); } } for (const auto& result : partiallyConverted->get_results()) { diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp index 08fd85000b49e1..bcdfbb37927701 100644 --- a/src/frontends/tensorflow/src/op_table.cpp +++ b/src/frontends/tensorflow/src/op_table.cpp @@ -414,6 +414,7 @@ const std::map get_supported_ops() { {"TensorListReserve", CreatorFunction(translate_tensor_list_reserve_op)}, {"TensorListResize", CreatorFunction(translate_tensor_list_resize_op)}, {"TensorListConcatV2", CreatorFunction(translate_tensor_list_concat_v2_op)}, + {"TensorScatterAdd", CreatorFunction(translate_tensor_scatter_add_op)}, {"TensorScatterUpdate", CreatorFunction(translate_tensor_scatter_update_op)}, {"Tile", CreatorFunction(translate_tile_op)}, {"ToBool", CreatorFunction(translate_tobool_op)}, diff --git a/src/frontends/tensorflow/src/transformations/uninitialized_variable_resolve.cpp b/src/frontends/tensorflow/src/transformations/uninitialized_variable_resolve.cpp index 6c268f77b910ce..9cedaff7bf06a5 100644 --- a/src/frontends/tensorflow/src/transformations/uninitialized_variable_resolve.cpp +++ b/src/frontends/tensorflow/src/transformations/uninitialized_variable_resolve.cpp @@ -20,7 +20,7 @@ ov::frontend::tensorflow::pass::UninitializedVariableResolver::UninitializedVari matcher_pass_callback callback = [=](pattern::Matcher& m) { NodeRegistry rg; - auto unitialized_hash_table = dynamic_pointer_cast(m.get_match_root()); + auto unitialized_hash_table = ov::as_type_ptr(m.get_match_root()); if (!unitialized_hash_table) { return false; } diff --git a/src/frontends/tensorflow/src/translate_session.cpp b/src/frontends/tensorflow/src/translate_session.cpp index 3004d4953d5c53..efac0d96e9880b 100644 --- a/src/frontends/tensorflow/src/translate_session.cpp +++ b/src/frontends/tensorflow/src/translate_session.cpp @@ -529,7 +529,7 @@ void TranslateSession::translate_graph(const ov::frontend::InputModel::Ptr& inpu const auto& input_outputs_vector = ov_tensors_map->at(producer_name); if (input_outputs_vector.size() <= producer_port_idx) { auto producer_node = input_outputs_vector[0].port.get_node_shared_ptr(); - if (std::dynamic_pointer_cast(producer_node)) { + if (ov::as_type_ptr(producer_node)) { // FrameworkNode node does not know in advance how many output ports will be used // so we can increase number of outputs by demand producer_node->set_output_type(producer_port_idx, element::dynamic, PartialShape::dynamic()); @@ -583,13 +583,13 @@ void TranslateSession::translate_graph(const ov::frontend::InputModel::Ptr& inpu // We can't add all Sink operations to sinks vector, as there can be a FrameworkNode, // which we might need to remove from graph if (ov::as_type_ptr(node)) { - sinks.push_back(std::dynamic_pointer_cast(node)); + sinks.push_back(ov::as_type_ptr(node)); } else { - auto multi_subgraph = std::dynamic_pointer_cast(node); + auto multi_subgraph = ov::as_type_ptr(node); if (multi_subgraph) { for (const auto& body_model : multi_subgraph->get_functions()) { if (body_model->get_sinks().size()) { - sinks.push_back(std::dynamic_pointer_cast(multi_subgraph)); + sinks.push_back(ov::as_type_ptr(multi_subgraph)); break; } } @@ -738,7 +738,7 @@ void TranslateSession::translate_graph(const ov::frontend::InputModel::Ptr& inpu for (size_t output_ind = 0; output_ind < node_output_vector.second.size(); ++output_ind) { auto output = node_output_vector.second[output_ind].port; if (output.get_target_inputs().empty() && - !std::dynamic_pointer_cast(output.get_node_shared_ptr())) { + !ov::as_type_ptr(output.get_node_shared_ptr())) { auto model_output_name = output.get_node_shared_ptr()->get_friendly_name() + ":" + std::to_string(output_ind); auto result_node = std::make_shared(output); diff --git a/src/frontends/tensorflow/tests/convert_tricky_models.cpp b/src/frontends/tensorflow/tests/convert_tricky_models.cpp index ffb5ece8a2d2f9..d50e187d2bcfc8 100644 --- a/src/frontends/tensorflow/tests/convert_tricky_models.cpp +++ b/src/frontends/tensorflow/tests/convert_tricky_models.cpp @@ -110,7 +110,7 @@ TEST(FrontEndConvertTrickyModels, simple_wide_and_deep) { int num_emb_segment_sum = 0; for (auto& node : model->get_ordered_ops()) { - if (std::dynamic_pointer_cast(node)) { + if (ov::as_type_ptr(node)) { ++num_emb_segment_sum; } } diff --git a/src/frontends/tensorflow/tests/convert_unsupported.cpp b/src/frontends/tensorflow/tests/convert_unsupported.cpp index f37c1419854139..bc06ce0dc418a2 100644 --- a/src/frontends/tensorflow/tests/convert_unsupported.cpp +++ b/src/frontends/tensorflow/tests/convert_unsupported.cpp @@ -112,7 +112,7 @@ TEST(FrontEndConvertModelTest, test_unsupported_op) { ASSERT_THROW(frontEnd->convert(model), OpConversionFailure); for (auto& node : model->get_ordered_ops()) { - if (node->get_friendly_name() == "relu_0" && dynamic_pointer_cast(node)) { + if (node->get_friendly_name() == "relu_0" && ov::as_type_ptr(node)) { model->replace_node(node, make_shared(node->input(0).get_source_output())); } } diff --git a/src/frontends/tensorflow_common/include/common_op_table.hpp b/src/frontends/tensorflow_common/include/common_op_table.hpp index 92d2c6d6fb4a9d..c4a36e30119795 100644 --- a/src/frontends/tensorflow_common/include/common_op_table.hpp +++ b/src/frontends/tensorflow_common/include/common_op_table.hpp @@ -177,6 +177,7 @@ OP_CONVERTER(translate_tensor_list_set_item_op); OP_CONVERTER(translate_tensor_list_stack_op); OP_CONVERTER(translate_tensor_list_resize_op); OP_CONVERTER(translate_tensor_list_concat_v2_op); +OP_CONVERTER(translate_tensor_scatter_add_op); OP_CONVERTER(translate_tensor_scatter_update_op); OP_CONVERTER(translate_tile_op); OP_CONVERTER(translate_tobool_op); diff --git a/src/frontends/tensorflow_common/src/helper_transforms/tensor_array_v3_replacer.cpp b/src/frontends/tensorflow_common/src/helper_transforms/tensor_array_v3_replacer.cpp index 1e3fa977db8a89..a06832898e3e39 100644 --- a/src/frontends/tensorflow_common/src/helper_transforms/tensor_array_v3_replacer.cpp +++ b/src/frontends/tensorflow_common/src/helper_transforms/tensor_array_v3_replacer.cpp @@ -23,7 +23,7 @@ ov::frontend::tensorflow::pass::TensorArrayV3Replacer::TensorArrayV3Replacer() { matcher_pass_callback callback = [=](pattern::Matcher& m) { NodeRegistry rg; - auto tensor_array_v3 = dynamic_pointer_cast(m.get_match_root()); + auto tensor_array_v3 = ov::as_type_ptr(m.get_match_root()); if (!tensor_array_v3) { return false; } diff --git a/src/frontends/tensorflow_common/src/helper_transforms/tensor_list_ops_resolver.cpp b/src/frontends/tensorflow_common/src/helper_transforms/tensor_list_ops_resolver.cpp index 81eade74e15233..1baff1008f1dae 100644 --- a/src/frontends/tensorflow_common/src/helper_transforms/tensor_list_ops_resolver.cpp +++ b/src/frontends/tensorflow_common/src/helper_transforms/tensor_list_ops_resolver.cpp @@ -105,7 +105,7 @@ void update_parameter_to_slice_input(const std::shared_ptr& node, std::vector& update_param_ids) { // select only TensorListGetItem that accepts a tensor list from Parameter node // value of Parameter node is unchanged from one iteration to another one in Loop - auto tensor_list_get_item = std::dynamic_pointer_cast(node); + auto tensor_list_get_item = ov::as_type_ptr(node); if (!tensor_list_get_item) { return; } @@ -142,7 +142,7 @@ void update_result_to_concat_output(const std::shared_ptr& node, std::vector& remove_param_ids) { // select only TensorListSetItem that accepts a tensor list from Parameter node // output of TensorListSetItem goes to Result that is connected with the tensor list by a back edge - auto tensor_list_set_item = std::dynamic_pointer_cast(node); + auto tensor_list_set_item = ov::as_type_ptr(node); if (!tensor_list_set_item) { return; } @@ -202,7 +202,7 @@ ov::frontend::tensorflow::pass::TensorListReplacer::TensorListReplacer() { matcher_pass_callback callback = [=](pattern::Matcher& m) { NodeRegistry rg; - auto tensor_list = std::dynamic_pointer_cast(m.get_match_root()); + auto tensor_list = ov::as_type_ptr(m.get_match_root()); if (!tensor_list) { return false; } @@ -255,7 +255,7 @@ ov::frontend::tensorflow::pass::TensorListSetItemReplacer::TensorListSetItemRepl matcher_pass_callback callback = [=](pattern::Matcher& m) { NodeRegistry rg; - auto tensor_list_set_item = std::dynamic_pointer_cast(m.get_match_root()); + auto tensor_list_set_item = ov::as_type_ptr(m.get_match_root()); if (!tensor_list_set_item) { return false; } @@ -309,7 +309,7 @@ ov::frontend::tensorflow::pass::TensorListPushBackReplacer::TensorListPushBackRe matcher_pass_callback callback = [=](pattern::Matcher& m) { NodeRegistry rg; - auto tensor_list_push_back = std::dynamic_pointer_cast(m.get_match_root()); + auto tensor_list_push_back = ov::as_type_ptr(m.get_match_root()); if (!tensor_list_push_back) { return false; } @@ -353,7 +353,7 @@ ov::frontend::tensorflow::pass::TensorListGetItemReplacer::TensorListGetItemRepl matcher_pass_callback callback = [=](pattern::Matcher& m) { NodeRegistry rg; - auto tensor_list_get_item = std::dynamic_pointer_cast(m.get_match_root()); + auto tensor_list_get_item = ov::as_type_ptr(m.get_match_root()); if (!tensor_list_get_item) { return false; } @@ -491,8 +491,7 @@ ov::frontend::tensorflow::pass::TensorListInLoopOptimization::TensorListInLoopOp std::vector update_result_last_iter_ids; for (uint64_t result_idx = 0; result_idx < body_results.size(); ++result_idx) { const auto& result = body_results[result_idx]; - auto tensor_list_set_item = - std::dynamic_pointer_cast(result->get_input_node_shared_ptr(0)); + auto tensor_list_set_item = ov::as_type_ptr(result->get_input_node_shared_ptr(0)); if (!tensor_list_set_item) { continue; } @@ -529,8 +528,7 @@ ov::frontend::tensorflow::pass::TensorListInLoopOptimization::TensorListInLoopOp update_result_last_iter_ids.end()); for (auto update_result_idx : all_update_result_ids) { const auto& body_result = body_results[update_result_idx]; - auto tensor_list_set_item = - std::dynamic_pointer_cast(body_result->get_input_node_shared_ptr(0)); + auto tensor_list_set_item = ov::as_type_ptr(body_result->get_input_node_shared_ptr(0)); FRONT_END_GENERAL_CHECK(tensor_list_set_item, "[TensorFlow Frontend] internal error: tensor_list_set_item is nullptr in " "TensorListInLoopOptimization"); @@ -559,7 +557,7 @@ ov::frontend::tensorflow::pass::TensorListInLoopOptimization::TensorListInLoopOp "TensorListGetItem operation in TensorListInLoopOptimization"); auto target_input = *(body_param->get_output_target_inputs(0).begin()); auto tensor_list_get_item = - std::dynamic_pointer_cast(target_input.get_node()->shared_from_this()); + ov::as_type_ptr(target_input.get_node()->shared_from_this()); FRONT_END_GENERAL_CHECK(tensor_list_get_item, "[TensorFlow Frontend] internal error: tensor list must have only consumer " "TensorListGetItem operation in TensorListInLoopOptimization"); diff --git a/src/frontends/tensorflow_common/src/op/tensor_scatter_add.cpp b/src/frontends/tensorflow_common/src/op/tensor_scatter_add.cpp new file mode 100644 index 00000000000000..382f6f1914e334 --- /dev/null +++ b/src/frontends/tensorflow_common/src/op/tensor_scatter_add.cpp @@ -0,0 +1,29 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_op_table.hpp" +#include "openvino/op/scatter_nd_update.hpp" + +using namespace std; +using namespace ov::op; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { +OutputVector translate_tensor_scatter_add_op(const NodeContext& node) { + default_op_checks(node, 3, {"TensorScatterAdd"}); + auto data = node.get_input(0); + auto indices = node.get_input(1); + auto updates = node.get_input(2); + auto reduction = v15::ScatterNDUpdate::Reduction::SUM; + auto scatter_add_op = make_shared(data, indices, updates, reduction); + set_node_name(node.get_name(), scatter_add_op); + + return {scatter_add_op}; +} +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov \ No newline at end of file diff --git a/src/frontends/tensorflow_lite/src/frontend.cpp b/src/frontends/tensorflow_lite/src/frontend.cpp index bbf55a0f6f12f6..30cceeeb10b7dc 100644 --- a/src/frontends/tensorflow_lite/src/frontend.cpp +++ b/src/frontends/tensorflow_lite/src/frontend.cpp @@ -140,8 +140,7 @@ std::shared_ptr FrontEnd::convert(const ov::frontend::InputModel::Ptr void FrontEnd::convert(const std::shared_ptr& partiallyConverted) const { for (const auto& node : partiallyConverted->get_ordered_ops()) { if (ov::is_type(node)) { - translate_framework_node(std::dynamic_pointer_cast(node), - m_op_translators); + translate_framework_node(ov::as_type_ptr(node), m_op_translators); } } for (const auto& result : partiallyConverted->get_results()) { diff --git a/src/frontends/tests/frontend/shared/include/op_extension.hpp b/src/frontends/tests/frontend/shared/include/op_extension.hpp index 563a80739ef7e1..791911052eafb0 100644 --- a/src/frontends/tests/frontend/shared/include/op_extension.hpp +++ b/src/frontends/tests/frontend/shared/include/op_extension.hpp @@ -18,7 +18,7 @@ struct OpExtensionFEParam { class Relu : public ov::op::Op { public: - OPENVINO_OP("Relu"); + OPENVINO_OP("Relu", "frontend_test"); Relu() = default; Relu(const ov::Output& arg) : ov::op::Op({arg}) { diff --git a/src/frontends/tests/frontend/shared/src/cut_specific_model.cpp b/src/frontends/tests/frontend/shared/src/cut_specific_model.cpp index 53e634e1b1087e..019c05d0108907 100644 --- a/src/frontends/tests/frontend/shared/src/cut_specific_model.cpp +++ b/src/frontends/tests/frontend/shared/src/cut_specific_model.cpp @@ -240,7 +240,7 @@ TEST_P(FrontEndCutModelTest, testSetTensorValue) { return node->get_friendly_name().find(const_name) != std::string::npos; }); ASSERT_TRUE(const_node_it != ops.end()) << "Name shall exist:" << const_name; - auto data = std::dynamic_pointer_cast(*const_node_it)->get_vector(); + auto data = ov::as_type_ptr(*const_node_it)->get_vector(); EXPECT_EQ(data.size(), m_param.m_tensorValue.size()) << "Data size must be equal to expected size"; EXPECT_TRUE(std::equal(data.begin(), data.end(), m_param.m_tensorValue.begin())) << "Data must be equal"; } diff --git a/src/frontends/tests/frontend/shared/test_builtin_extensions/builtin_extensions.cpp b/src/frontends/tests/frontend/shared/test_builtin_extensions/builtin_extensions.cpp index 792ef552907000..09fecb89ad9e90 100644 --- a/src/frontends/tests/frontend/shared/test_builtin_extensions/builtin_extensions.cpp +++ b/src/frontends/tests/frontend/shared/test_builtin_extensions/builtin_extensions.cpp @@ -94,7 +94,7 @@ std::map Relu6ToReluTranslatorPaddle(const ov::fr class CustomElu : public ov::op::Op { public: - OPENVINO_OP("CustomElu"); + OPENVINO_OP("CustomElu", "frontend_test"); CustomElu() = default; CustomElu(const ov::Output& input, float alpha, float beta) : m_alpha{alpha}, m_beta{beta} { @@ -159,7 +159,7 @@ class CustomElu : public ov::op::Op { # include "openvino/op/relu.hpp" class ReluCustom : public ov::op::v0::Relu { public: - OPENVINO_OP("ReluCustom"); + OPENVINO_OP("ReluCustom", "frontend_test"); OPENVINO_FRAMEWORK_MAP(pytorch, "aten::relu"); }; # define PT_EXT \ diff --git a/src/inference/src/os/lin/lin_system_conf.cpp b/src/inference/src/os/lin/lin_system_conf.cpp index 5ff1131185f1e5..6a6f02799cae46 100644 --- a/src/inference/src/os/lin/lin_system_conf.cpp +++ b/src/inference/src/os/lin/lin_system_conf.cpp @@ -516,8 +516,7 @@ void parse_cache_info_linux(const std::vector> system_i if ((system_info_table[n][2].size() > 0) || (system_info_table[n][1].size() > 0)) { info_index = system_info_table[n][2].size() > 0 ? 2 : 1; if (-1 == _cpu_mapping_table[n][CPU_MAP_SOCKET_ID]) { - std::string::size_type pos = 0; - std::string::size_type endpos = 0; + std::string::size_type pos = 0, endpos = 0, endpos1 = 0; std::string sub_str; int core_1; @@ -531,7 +530,10 @@ void parse_cache_info_linux(const std::vector> system_i } while (1) { - if ((endpos = system_info_table[n][info_index].find('-', pos)) != std::string::npos) { + endpos = system_info_table[n][info_index].find('-', pos); + endpos1 = system_info_table[n][info_index].find(',', pos); + + if (endpos < endpos1) { sub_str = system_info_table[n][info_index].substr(pos, endpos - pos); core_1 = std::stoi(sub_str); sub_str = system_info_table[n][info_index].substr(endpos + 1); @@ -549,8 +551,8 @@ void parse_cache_info_linux(const std::vector> system_i return; }; } - } else if (pos != std::string::npos) { - sub_str = system_info_table[n][info_index].substr(pos); + } else { + sub_str = system_info_table[n][info_index].substr(pos, endpos1 - pos); core_1 = std::stoi(sub_str); _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID] = _sockets; _cpu_mapping_table[core_1][CPU_MAP_NUMA_NODE_ID] = @@ -559,11 +561,10 @@ void parse_cache_info_linux(const std::vector> system_i if (_processors == 0) { return; }; - endpos = pos; } - if ((pos = system_info_table[n][2].find(',', endpos)) != std::string::npos) { - pos++; + if (endpos1 != std::string::npos) { + pos = endpos1 + 1; } else { break; } diff --git a/src/inference/tests/unit/cpu_map_parser/cache_parser_linux.cpp b/src/inference/tests/unit/cpu_map_parser/cache_parser_linux.cpp index 8eece188e4cfe6..beedd33a342d74 100644 --- a/src/inference/tests/unit/cpu_map_parser/cache_parser_linux.cpp +++ b/src/inference/tests/unit/cpu_map_parser/cache_parser_linux.cpp @@ -1275,6 +1275,41 @@ LinuxCpuMapTestCase cache_1sockets_14cores_hyperthreading_1 = { }, {}, }; +LinuxCpuMapTestCase cache_1sockets_14cores = { + 9, + 1, + 1, + 9, + {{9, 1, 8, 0, 0, 0}}, + { + {0, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, + {6, 0, 0, 1, EFFICIENT_CORE_PROC, 1, -1}, + {7, 0, 0, 2, EFFICIENT_CORE_PROC, 1, -1}, + {8, 0, 0, 3, EFFICIENT_CORE_PROC, 1, -1}, + {9, 0, 0, 4, EFFICIENT_CORE_PROC, 1, -1}, + {10, 0, 0, 5, EFFICIENT_CORE_PROC, 2, -1}, + {11, 0, 0, 6, EFFICIENT_CORE_PROC, 2, -1}, + {12, 0, 0, 7, EFFICIENT_CORE_PROC, 2, -1}, + {13, 0, 0, 8, EFFICIENT_CORE_PROC, 2, -1}, + }, + { + {"0", "0", "0,6-13"}, + {"", "", ""}, + {"", "", ""}, + {"", "", ""}, + {"", "", ""}, + {"", "", ""}, + {"6", "6-9", "0,6-13"}, + {"7", "6-9", "0,6-13"}, + {"8", "6-9", "0,6-13"}, + {"9", "6-9", "0,6-13"}, + {"10", "10-13", "0,6-13"}, + {"11", "10-13", "0,6-13"}, + {"12", "10-13", "0,6-13"}, + {"13", "10-13", "0,6-13"}, + }, + {}, +}; LinuxCpuMapTestCase cache_1sockets_10cores_hyperthreading = { 12, 1, @@ -1475,6 +1510,7 @@ INSTANTIATE_TEST_SUITE_P(CPUMap, cache_1sockets_16cores_hyperthreading, cache_1sockets_14cores_hyperthreading, cache_1sockets_14cores_hyperthreading_1, + cache_1sockets_14cores, cache_1sockets_10cores_hyperthreading, cache_1sockets_8cores_hyperthreading, cache_1sockets_8cores_hyperthreading_1, diff --git a/src/plugins/auto/src/schedule.cpp b/src/plugins/auto/src/schedule.cpp index f52a8327992e26..abfd460d42118b 100644 --- a/src/plugins/auto/src/schedule.cpp +++ b/src/plugins/auto/src/schedule.cpp @@ -85,8 +85,11 @@ void Schedule::generate_workers(const std::string& device, const SoCompiledModel OPENVINO_THROW("Every device used with AUTO should support query optimal_number_of_infer_requests property from compiled model ", iie.what()); } - const auto num_requests = (m_context->m_device_priorities.end() == it_numrequests || - it_numrequests->num_requests_per_devices == -1) ? optimal_num : it_numrequests->num_requests_per_devices; + auto num_requests = + (m_context->m_device_priorities.end() == it_numrequests || it_numrequests->num_requests_per_devices == -1) + ? optimal_num + : it_numrequests->num_requests_per_devices; + num_requests = (num_requests == 1) ? 2 : num_requests; auto& worker_requests = m_worker_requests[device]; auto& idle_worker_requests = m_idle_worker_requests[device]; worker_requests.resize(num_requests); diff --git a/src/plugins/auto/tests/unit/dynamic_output_test.cpp b/src/plugins/auto/tests/unit/dynamic_output_test.cpp index d7c1fecbdb905f..8c9d4820b892cb 100644 --- a/src/plugins/auto/tests/unit/dynamic_output_test.cpp +++ b/src/plugins/auto/tests/unit/dynamic_output_test.cpp @@ -7,7 +7,6 @@ #include "include/auto_unit_test.hpp" #include "openvino/runtime/threading/immediate_executor.hpp" - using DynamicOutputConfigParams = std::tuple; @@ -21,14 +20,18 @@ class DynamicOutputInferenceTest : public tests::AutoTest, public ::testing::Tes mockExecutor.reset(); mockExecutorActual.reset(); mockInferrequest.reset(); + mockInferrequest_2.reset(); mockInferrequestActual.reset(); + mockInferrequestActual_2.reset(); } protected: ov::Any priorityList; ov::Any targetList; std::shared_ptr mockInferrequest; + std::shared_ptr mockInferrequest_2; std::shared_ptr mockInferrequestActual; + std::shared_ptr mockInferrequestActual_2; std::shared_ptr mockExecutor; std::shared_ptr mockExecutorActual; }; @@ -53,10 +56,22 @@ void DynamicOutputInferenceTest::SetUp() { mockExecutorActual = std::make_shared(); mockInferrequest = std::make_shared(inferReqInternal, mockExecutor, nullptr, false); + // will be at least 2 infer requests for mocked CPU/GPU + auto inferReqInternal_2 = std::make_shared(mockIExeNet); + mockInferrequest_2 = + std::make_shared(inferReqInternal_2, mockExecutor, nullptr, false); + + auto inferReqInternalActual_2 = std::make_shared(mockIExeNetActual); + mockInferrequestActual = std::make_shared(inferReqInternalActual, mockExecutorActual, nullptr, false); + mockInferrequestActual_2 = std::make_shared(inferReqInternalActual_2, + mockExecutorActual, + nullptr, + false); + std::tie(priorityList, targetList) = GetParam(); auto targets = targetList.as>(); ON_CALL(*core, get_available_devices()).WillByDefault(Return(targets)); @@ -103,11 +118,12 @@ TEST_P(DynamicOutputInferenceTest, CanInferWithOutputChangedFromDynamicOnAutoToS auto tensor = inferReqInternal->get_tensor(it); tensor->set_shape(ov::Shape{2, 3}); } - ON_CALL(*mockIExeNet.get(), create_infer_request()).WillByDefault(Return(mockInferrequest)); - ON_CALL(*mockIExeNetActual.get(), create_infer_request()).WillByDefault(InvokeWithoutArgs([this]() { - std::this_thread::sleep_for(std::chrono::milliseconds(0)); - return mockInferrequestActual; - })); + EXPECT_CALL(*mockIExeNet.get(), create_infer_request()) + .WillOnce(Return(mockInferrequest)) + .WillOnce(Return(mockInferrequest_2)); + EXPECT_CALL(*mockIExeNetActual.get(), create_infer_request()) + .WillOnce(Return(mockInferrequestActual)) + .WillOnce(Return(mockInferrequestActual_2)); config.insert(ov::device::priorities(priorityList.as())); config.insert(ov::hint::performance_mode(ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT)); std::shared_ptr exeNetwork; diff --git a/src/plugins/auto/tests/unit/release_helper_test.cpp b/src/plugins/auto/tests/unit/release_helper_test.cpp index b1631409090900..507127f036e47d 100644 --- a/src/plugins/auto/tests/unit/release_helper_test.cpp +++ b/src/plugins/auto/tests/unit/release_helper_test.cpp @@ -157,7 +157,8 @@ TEST_P(AutoReleaseHelperTest, releaseResource) { bool cpuSuccess; bool accSuccess; std::tie(cpuSuccess, accSuccess) = this->GetParam(); - size_t decreaseCount = 0; + size_t decreaseExeNetworkCount = 0; + size_t decreaseInferReqCount = 0; // test auto plugin plugin->set_device_name("AUTO"); const std::string strDevices = ov::test::utils::DEVICE_GPU + std::string(",") + ov::test::utils::DEVICE_CPU; @@ -188,8 +189,11 @@ TEST_P(AutoReleaseHelperTest, releaseResource) { ::testing::Matcher(StrEq(ov::test::utils::DEVICE_CPU)), _)) .WillByDefault(Return(mockExeNetwork)); - if (accSuccess) - decreaseCount++; + if (accSuccess) { + decreaseExeNetworkCount++; + // will be at least 2 infer requests for mocked CPU/GPU + decreaseInferReqCount += 2; + } } else { ON_CALL(*core, compile_model(::testing::Matcher&>(_), @@ -224,8 +228,8 @@ TEST_P(AutoReleaseHelperTest, releaseResource) { auto sharedcount = mockExeNetwork._ptr.use_count(); auto requestsharedcount = inferReqInternal.use_count(); std::this_thread::sleep_for(std::chrono::milliseconds(500)); - EXPECT_EQ(mockExeNetwork._ptr.use_count(), sharedcount - decreaseCount); - EXPECT_EQ(inferReqInternal.use_count(), requestsharedcount - decreaseCount); + EXPECT_EQ(mockExeNetwork._ptr.use_count(), sharedcount - decreaseExeNetworkCount); + EXPECT_EQ(inferReqInternal.use_count(), requestsharedcount - decreaseInferReqCount); if (cpuSuccess || accSuccess) { if (accSuccess) EXPECT_EQ(exeNetwork->get_property(ov::execution_devices.name()).as(), diff --git a/src/plugins/auto/tests/unit/runtime_fallback_test.cpp b/src/plugins/auto/tests/unit/runtime_fallback_test.cpp index 58deda3b5cd719..113b933c89430e 100644 --- a/src/plugins/auto/tests/unit/runtime_fallback_test.cpp +++ b/src/plugins/auto/tests/unit/runtime_fallback_test.cpp @@ -164,6 +164,11 @@ TEST_P(AutoRuntimeFallback, releaseResource) { _)) .WillByDefault(ov::Throw("compile model error")); } + std::map>> inferRequests; + inferRequests["CPU"] = {}; + inferRequests["GPU.0"] = {}; + inferRequests["GPU.1"] = {}; + inferRequests["OTHER"] = {}; for (auto& deviceInfo : targetDevices) { std::string deviceName; bool ifThrow; @@ -171,23 +176,48 @@ TEST_P(AutoRuntimeFallback, releaseResource) { targetDev += deviceName; targetDev += ((deviceInfo == targetDevices.back()) ? "" : ","); if (deviceName == "CPU") { - mockInferrequest = std::make_shared(inferReqInternal, - mockExecutor, - nullptr, - ifThrow); - ON_CALL(*mockIExeNet.get(), create_infer_request()).WillByDefault([this]() { - return mockInferrequest; + auto inferReqInternal_CPU_2 = std::make_shared(mockIExeNet); + auto inferRequest_2 = std::make_shared(inferReqInternal_CPU_2, + mockExecutor, + nullptr, + ifThrow); + auto inferRequest = std::make_shared(inferReqInternal, + mockExecutor, + nullptr, + ifThrow); + inferRequests[deviceName].push_back(inferRequest); + inferRequests[deviceName].push_back(inferRequest_2); + ON_CALL(*mockIExeNet.get(), create_infer_request()).WillByDefault([&inferRequests, deviceName]() { + auto infer = inferRequests.at(deviceName).back(); + if (inferRequests.at(deviceName).size() > 1) { + // in case of passthrough model, we need to keep the infer request + inferRequests.at(deviceName).pop_back(); + } + return infer; }); } else if (deviceName == "GPU.0") { - mockInferrequestGPU_0 = - std::make_shared(inferReqInternalActual, + auto inferReqInternal_GPU_0_2 = + std::make_shared(mockIExeNetActual); + auto inferRequest_2 = + std::make_shared(inferReqInternal_GPU_0_2, mockExecutorGPU_0, nullptr, ifThrow); - ON_CALL(*mockIExeNetActual.get(), create_infer_request()).WillByDefault(InvokeWithoutArgs([this]() { - std::this_thread::sleep_for(std::chrono::milliseconds(0)); - return mockInferrequestGPU_0; - })); + auto inferRequest = std::make_shared(inferReqInternalActual, + mockExecutorGPU_0, + nullptr, + ifThrow); + inferRequests[deviceName].push_back(inferRequest); + inferRequests[deviceName].push_back(inferRequest_2); + ON_CALL(*mockIExeNetActual.get(), create_infer_request()) + .WillByDefault(InvokeWithoutArgs([&inferRequests, deviceName]() { + std::this_thread::sleep_for(std::chrono::milliseconds(0)); + auto infer = inferRequests.at(deviceName).back(); + if (inferRequests.at(deviceName).size() > 1) { + inferRequests.at(deviceName).pop_back(); + } + return infer; + })); } else if (deviceName == "GPU.1") { if (generateWorkersFail) { mockInferrequestGPU_1 = @@ -197,25 +227,52 @@ TEST_P(AutoRuntimeFallback, releaseResource) { ifThrow); ON_CALL(*mockIExeNetGPU_1.get(), create_infer_request()).WillByDefault(ov::Throw("error")); } else { - mockInferrequestGPU_1 = - std::make_shared(inferReqInternalGPU_1, + auto inferRequest = std::make_shared(inferReqInternalGPU_1, + mockExecutorGPU_1, + nullptr, + ifThrow); + auto inferReqInternalGPU_1_2 = + std::make_shared(mockIExeNetGPU_1); + auto inferRequest_2 = + std::make_shared(inferReqInternalGPU_1_2, mockExecutorGPU_1, nullptr, ifThrow); - ON_CALL(*mockIExeNetGPU_1.get(), create_infer_request()).WillByDefault(InvokeWithoutArgs([this]() { - std::this_thread::sleep_for(std::chrono::milliseconds(0)); - return mockInferrequestGPU_1; - })); + inferRequests[deviceName].push_back(inferRequest); + inferRequests[deviceName].push_back(inferRequest_2); + ON_CALL(*mockIExeNetGPU_1.get(), create_infer_request()) + .WillByDefault(InvokeWithoutArgs([&inferRequests, deviceName]() { + std::this_thread::sleep_for(std::chrono::milliseconds(0)); + auto infer = inferRequests.at(deviceName).back(); + if (inferRequests.at(deviceName).size() > 1) { + inferRequests.at(deviceName).pop_back(); + } + return infer; + })); } } else if (deviceName == "OTHER") { - mockInferrequestOTHER = std::make_shared(inferReqInternalOTHER, - mockExecutorOTHER, - nullptr, - ifThrow); - ON_CALL(*mockIExeNetOTHER.get(), create_infer_request()).WillByDefault(InvokeWithoutArgs([this]() { - std::this_thread::sleep_for(std::chrono::milliseconds(0)); - return mockInferrequestOTHER; - })); + auto inferRequest = std::make_shared(inferReqInternalOTHER, + mockExecutorOTHER, + nullptr, + ifThrow); + auto inferReqInternalOTHER_2 = + std::make_shared(mockIExeNetOTHER); + std::this_thread::sleep_for(std::chrono::milliseconds(0)); + auto inferRequest_2 = std::make_shared(inferReqInternalOTHER_2, + mockExecutorOTHER, + nullptr, + ifThrow); + inferRequests[deviceName].push_back(inferRequest); + inferRequests[deviceName].push_back(inferRequest_2); + ON_CALL(*mockIExeNetOTHER.get(), create_infer_request()) + .WillByDefault(InvokeWithoutArgs([&inferRequests, deviceName]() { + std::this_thread::sleep_for(std::chrono::milliseconds(0)); + auto infer = inferRequests.at(deviceName).back(); + if (inferRequests.at(deviceName).size() > 1) { + inferRequests.at(deviceName).pop_back(); + } + return infer; + })); } else { return; } @@ -319,6 +376,11 @@ TEST_P(AutoCTPUTRuntimeFallback, ctputDeviceInferFailTest) { _)) .WillByDefault(ov::Throw("compile model error")); } + std::map>> inferRequests; + inferRequests["CPU"] = {}; + inferRequests["GPU.0"] = {}; + inferRequests["GPU.1"] = {}; + inferRequests["OTHER"] = {}; for (auto& deviceInfo : targetDevices) { std::string deviceName; bool ifThrow; @@ -330,8 +392,20 @@ TEST_P(AutoCTPUTRuntimeFallback, ctputDeviceInferFailTest) { mockExecutor, nullptr, ifThrow); - ON_CALL(*mockIExeNet.get(), create_infer_request()).WillByDefault([this]() { - return mockInferrequest; + auto inferReqInternal_CPU_2 = std::make_shared(mockIExeNet); + auto inferRequest_2 = std::make_shared(inferReqInternal_CPU_2, + mockExecutor, + nullptr, + ifThrow); + inferRequests[deviceName].push_back(mockInferrequest); + inferRequests[deviceName].push_back(inferRequest_2); + ON_CALL(*mockIExeNet.get(), create_infer_request()).WillByDefault([&inferRequests, deviceName]() { + auto infer = inferRequests.at(deviceName).back(); + if (inferRequests.at(deviceName).size() > 1) { + // in case of passthrough model, we need to keep the infer request + inferRequests.at(deviceName).pop_back(); + } + return infer; }); } else if (deviceName == "GPU.0") { mockInferrequestGPU_0 = @@ -339,10 +413,24 @@ TEST_P(AutoCTPUTRuntimeFallback, ctputDeviceInferFailTest) { mockExecutorGPU_0, nullptr, ifThrow); - ON_CALL(*mockIExeNetActual.get(), create_infer_request()).WillByDefault(InvokeWithoutArgs([this]() { - std::this_thread::sleep_for(std::chrono::milliseconds(0)); - return mockInferrequestGPU_0; - })); + auto inferReqInternal_GPU_0_2 = + std::make_shared(mockIExeNetActual); + auto inferRequest_2 = + std::make_shared(inferReqInternal_GPU_0_2, + mockExecutorGPU_0, + nullptr, + ifThrow); + inferRequests[deviceName].push_back(mockInferrequestGPU_0); + inferRequests[deviceName].push_back(inferRequest_2); + ON_CALL(*mockIExeNetActual.get(), create_infer_request()) + .WillByDefault(InvokeWithoutArgs([&inferRequests, deviceName]() { + std::this_thread::sleep_for(std::chrono::milliseconds(0)); + auto infer = inferRequests.at(deviceName).back(); + if (inferRequests.at(deviceName).size() > 1) { + inferRequests.at(deviceName).pop_back(); + } + return infer; + })); } else if (deviceName == "GPU.1") { if (generateWorkersFail) { mockInferrequestGPU_1 = @@ -357,10 +445,24 @@ TEST_P(AutoCTPUTRuntimeFallback, ctputDeviceInferFailTest) { mockExecutorGPU_1, nullptr, ifThrow); - ON_CALL(*mockIExeNetGPU_1.get(), create_infer_request()).WillByDefault(InvokeWithoutArgs([this]() { - std::this_thread::sleep_for(std::chrono::milliseconds(0)); - return mockInferrequestGPU_1; - })); + auto inferReqInternalGPU_1_2 = + std::make_shared(mockIExeNetGPU_1); + auto inferRequest_2 = + std::make_shared(inferReqInternalGPU_1_2, + mockExecutorGPU_1, + nullptr, + ifThrow); + inferRequests[deviceName].push_back(mockInferrequestGPU_1); + inferRequests[deviceName].push_back(inferRequest_2); + ON_CALL(*mockIExeNetGPU_1.get(), create_infer_request()) + .WillByDefault(InvokeWithoutArgs([&inferRequests, deviceName]() { + std::this_thread::sleep_for(std::chrono::milliseconds(0)); + auto infer = inferRequests.at(deviceName).back(); + if (inferRequests.at(deviceName).size() > 1) { + inferRequests.at(deviceName).pop_back(); + } + return infer; + })); } } } diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp index 10f7b485bc0a16..cb1324e7435703 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.cpp +++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp @@ -712,27 +712,50 @@ void GraphOptimizer::FuseFCAndConvertOnWeights(Graph& graph) { // This optimization fuses Convert (fp16 -> bf16/fp32) on weights directly to FC input to allow precision conversion // handling based on internal logic (e.g. fuse conversion with weights reordering) + + auto isSuitableTranspose = [](const NodePtr& node) { + return node->getType() == Type::Transpose && node->getChildEdges().size() == 1 && node->isConstant(); + }; + auto isSuitableConvert = [&](const NodePtr& node) { + return node->getType() == Type::Convert && node->isConstant() && + one_of(node->getOriginalInputPrecisionAtPort(0), ov::element::f16, ov::element::bf16) && + one_of(node->getOriginalOutputPrecisionAtPort(0), ov::element::f32, ov::element::bf16); + }; + auto& graphNodes = graph.GetNodes(); for (const auto& fullyConnected : graphNodes) { if (fullyConnected->getType() != Type::FullyConnected) { continue; } - const auto convert = fullyConnected->getParentEdgeAt(1)->getParent(); - if (convert->getType() != Type::Convert || - !one_of(convert->getOriginalInputPrecisionAtPort(0), ov::element::f16, ov::element::bf16) || - !one_of(convert->getOriginalOutputPrecisionAtPort(0), ov::element::f32, ov::element::bf16) || - !convert->isConstant()) { - continue; + + NodePtr transpose = nullptr; + auto parent = fullyConnected->getParentEdgeAt(1)->getParent(); + if (parent->getType() == Type::Transpose) { + if (!isSuitableTranspose(parent)) + continue; + + transpose = parent; + parent = transpose->getParentEdgeAt(0)->getParent(); } + const auto convert = parent; + if (!isSuitableConvert(convert)) + continue; + const auto weights = convert->getParentEdgeAt(0)->getParent(); const auto weights_out_edge = weights->getChildEdges()[0].lock(); - const auto fc_weights_path_edge = fullyConnected->getParentEdgeAt(1); + const auto fc_weights_path_edge = + transpose ? transpose->getParentEdgeAt(0) : fullyConnected->getParentEdgeAt(1); const auto inNum = weights_out_edge->getInputNum(); const auto outNum = fc_weights_path_edge->getOutputNum(); - fullyConnected->setOriginalInputPrecisionAtPort(1, convert->getOriginalInputPrecisionAtPort(0)); + const auto originalPrecision = convert->getOriginalInputPrecisionAtPort(0); + fullyConnected->setOriginalInputPrecisionAtPort(1, originalPrecision); + if (transpose) { + transpose->setOriginalInputPrecisionAtPort(0, originalPrecision); + transpose->setOriginalOutputPrecisionAtPort(0, originalPrecision); + } graph.RemoveEdge(fc_weights_path_edge); - graph.CreateEdge(weights, fullyConnected, inNum, outNum); + graph.CreateEdge(weights, transpose ? transpose : fullyConnected, inNum, outNum); if (convert->getChildEdges().empty()) { graph.DropNode(convert); } diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_matmul_to_fc.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_matmul_to_fc.cpp index bf9fb16f8dab7c..0f2252fd5d256f 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_matmul_to_fc.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_matmul_to_fc.cpp @@ -36,12 +36,8 @@ ov::intel_cpu::ConvertMatMulToFC::ConvertMatMulToFC() { // So in case of adding new operations that takes matmul inputs we need keep update fc_input_a and fc_input_b. auto fc_input_a = pattern_map.at(activations_m); auto fc_input_b = pattern_map.at(weights_m); - bool is_convert = false; if (auto convert_node = ov::as_type_ptr(fc_input_b.get_node_shared_ptr())) { - if (is_decompression(convert_node)) { - is_convert = true; - fc_input_b = convert_node->get_input_node_shared_ptr(0); - } else { + if (!is_decompression(convert_node)) { return false; } } @@ -151,14 +147,6 @@ ov::intel_cpu::ConvertMatMulToFC::ConvertMatMulToFC() { fc_input_a = create_transpose(fc_input_a, matmul->get_friendly_name() + "/transpose_a"); } - // Connect Convert to new input if needed - if (is_convert) { - auto convert = pattern_map.at(weights_m).get_node_shared_ptr(); - convert->input(0).replace_source_output(fc_input_b); - convert->validate_and_infer_types(); - fc_input_b = convert; - } - auto bias = std::make_shared(element::undefined, Shape{0}); new_ops.push_back(bias); diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/matmul_decompress_convert.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/matmul_decompress_convert.cpp index 383385e9e5c1db..aa68ca17db7375 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/matmul_decompress_convert.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/matmul_decompress_convert.cpp @@ -222,17 +222,18 @@ class MatMulDecompressConvertTest : public testing::WithParamInterface, // input shapes - std::pair, // transposeA, transposeB - ElementType, // weights precision - ov::AnyMap, // additional property - CPUSpecificParams>; class MatMulDecompressConvertTest2 : public MatMulDecompressConvertTest { protected: @@ -519,5 +515,144 @@ INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_FP16_2, } // namespace + +/* This test covers NNCF-case when decompression convert has not only MatMul consumer. + * Graph before: + ------------ --------------- + |Input(f32)| |Constant(f16)| + ------------ --------------- + | | + | --------------------------------- + | |Convert(decompression f16->f32)| + | --------------------------------- + | | | + ---------------------------- ----------------------- + |MatMul (transposed_b=true)| | Result | + ---------------------------- ----------------------- + | + ----------------------- + | Result | + ----------------------- + + * Exec graph: + ------------ ----------------------------- + |Input(f32)| | Constant(f16) | + ------------ ----------------------------- + | | | + | ------------- --------------------- + | | Transpose | | Convert(f16->f32) | + | ------------- --------------------- + | | | + ----------------------- ----------------------- + | FullyConnected | | Result | + ----------------------- ----------------------- + | + ----------------------- + | Result | + ----------------------- +*/ + +class MatMulDecompressConvertTest3 : public MatMulDecompressConvertTest { +protected: + void SetUp() override { + targetDevice = ov::test::utils::DEVICE_CPU; + + std::vector inputShapes; + std::pair transpose; + ElementType weiConstElemType; + ov::AnyMap additionalConfig; + CPUSpecificParams cpuParams; + + std::tie(inputShapes, transpose, weiConstElemType, additionalConfig, cpuParams) = this->GetParam(); + std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; + + init_input_shapes(inputShapes); + + bool transpA = transpose.first; + bool transpB = transpose.second; + + if (transpA) + transposeCount++; + if (!transpB) + transposeCount++; + + if (transpA) { + transpose_shape(inputDynamicShapes[0]); + for (auto& shapes : targetStaticShapes) { + transpose_shape(shapes[0]); + } + } + if (transpB) { + transpose_shape(inputDynamicShapes[1]); + for (auto& shapes : targetStaticShapes) { + transpose_shape(shapes[1]); + } + } + + const auto& inShapeA = inputDynamicShapes[0]; + const auto& inShapeB = inputDynamicShapes[1]; + + configuration.insert(additionalConfig.begin(), additionalConfig.end()); + + ElementType netType = ElementType::f32; + ElementType convertOutType = ElementType::f32; + inType = outType = netType; + + std::string cpuNodeType = "FullyConnected"; + selectedType = makeSelectedTypeStr(selectedType, outType); + + ov::ParameterVector params{std::make_shared(inType, inShapeA)}; + std::shared_ptr inputB = ov::test::utils::make_constant(weiConstElemType, inShapeB.get_shape()); + inputB = std::make_shared(inputB, convertOutType); + mark_as_decompression(inputB); + expectedWeiConstElemType = weiConstElemType; + convertCount = 1; + + auto matMul = std::make_shared(params[0], inputB, transpA, transpB); + auto result0 = std::make_shared(matMul); + auto result1 = std::make_shared(inputB); + result1->set_friendly_name("ConstantResult"); + + modifyGraph(netType, params, matMul); + function = std::make_shared(ov::ResultVector{result0, result1}, params, "MatMulDecompressed3"); + } + + void check_execution_graph() override { + MatMulDecompressConvertTest::check_execution_graph(); + + // Check that Result has correct shape: the same as origin Constant + const auto results = compiledModel.outputs(); + const auto result_it = std::find_if(results.cbegin(), results.cend(), + [](const ov::Output& out) { + return out.get_node()->get_friendly_name() == "ConstantResult"; + }); + ASSERT_NE(result_it, results.cend()) + << "Target Result has not been found!"; + ASSERT_EQ(result_it->get_partial_shape(), inputDynamicShapes[1]) + << "Target Result has not origin shape. It has: " << result_it->get_partial_shape() << " but should have origin: " << inputDynamicShapes[1]; + } +}; + +TEST_P(MatMulDecompressConvertTest3, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + run(); + check_execution_graph(); +} + +namespace { +const auto testParams2D_FP16_3_smoke = + ::testing::Combine(::testing::Values(static_shapes_to_test_representation({{1, 16, 32}, {32, 64}})), + ::testing::Values(std::pair{false, false}), + ::testing::Values(ElementType::f16), + ::testing::Values(emptyConfig), + ::testing::ValuesIn(filter_specific_params(false))); + +INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_FP16_3, + MatMulDecompressConvertTest3, + testParams2D_FP16_3_smoke, + MatMulDecompressConvertTest3::getTestCaseName); + +} // namespace + } // namespace test } // namespace ov diff --git a/src/plugins/intel_cpu/tests/unit/transformations/convert_matmul_test.cpp b/src/plugins/intel_cpu/tests/unit/transformations/convert_matmul_test.cpp index 3d2ab245d54c22..a7ed7296281c8f 100644 --- a/src/plugins/intel_cpu/tests/unit/transformations/convert_matmul_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/transformations/convert_matmul_test.cpp @@ -461,13 +461,13 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_decompress_convert_0) { auto input1 = std::make_shared(ov::element::f32, ov::Shape{3, 2, 2}); auto input2 = ov::opset1::Constant::create(ov::element::f16, ov::Shape{1, 2, 2}, {1}); + auto convert = std::make_shared(input2, ov::element::f32); auto transpose_constant = ov::opset1::Constant::create(ov::element::i32, ov::Shape{3}, {0, 2, 1}); - auto transpose = std::make_shared(input2, transpose_constant); - auto convert = std::make_shared(transpose, ov::element::f32); + auto transpose = std::make_shared(convert, transpose_constant); auto matmul = std::make_shared( input1, - convert, + transpose, std::make_shared(ov::element::undefined, ov::Shape{0})); model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); @@ -491,13 +491,13 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_decompress_convert_1) { auto transpose1 = std::make_shared(input1, transpose_constant1); auto input2 = ov::opset1::Constant::create(ov::element::f16, ov::Shape{1, 2, 2}, {1}); + auto convert = std::make_shared(input2, ov::element::f32); auto transpose_constant2 = ov::opset1::Constant::create(ov::element::i32, ov::Shape{3}, {0, 2, 1}); - auto transpose2 = std::make_shared(input2, transpose_constant2); - auto convert = std::make_shared(transpose2, ov::element::f32); + auto transpose2 = std::make_shared(convert, transpose_constant2); auto matmul = std::make_shared( transpose1, - convert, + transpose2, std::make_shared(ov::element::undefined, ov::Shape{0})); model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_shape_of_subgraphs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_shape_of_subgraphs.cpp index a40c7dfebb9de6..2c455fb8f7e937 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_shape_of_subgraphs.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_shape_of_subgraphs.cpp @@ -90,7 +90,7 @@ bool mark_shape_of_subgraphs::can_mark_node(const program_node& node) { // skip mark_node for broadcast node if dependency nodes are data and shape_of auto& dependencies = node.get_dependencies(); if (node.is_type() && dependencies.size() == 2) { - if (dependencies[0].first->is_type() && dependencies[1].first->is_type()) + if (dependencies[0].first->is_type() && dependencies[1].first->is_type() && (dependencies[1].first->get_users().size() == 1)) return false; } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index 28796cc1fcc83c..2120a1308ea290 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -16,6 +16,7 @@ #include "gemm_inst.h" #include "lrn_inst.h" #include "mvn_inst.h" +#include "rms_inst.h" #include "pooling_inst.h" #include "normalize_inst.h" #include "permute_inst.h" @@ -764,6 +765,8 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { should_fuse |= input.is_type(); + should_fuse |= input.is_type(); + should_fuse |= input.is_type(); should_fuse |= input.is_type() && data_type_traits::is_i8_u8(input.get_input_layout(0).data_type); @@ -964,6 +967,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { (parents[i].first->is_type() && mvn_supports_fusings(parents[i].first->as())) || (parents[i].first->is_type()) || + (parents[i].first->is_type()) || (parents[i].first->is_type()) || (parents[i].first->is_type()) || (parents[i].first->is_type()) || diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_bfyx_opt.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_bfyx_opt.cl index 605efedd381c43..cd4bc4349ed2f6 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_bfyx_opt.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_bfyx_opt.cl @@ -28,7 +28,11 @@ KERNEL(rms_gpu_bfyx_opt)( OPTIONAL_SHAPE_INFO_ARG const __global INPUT0_TYPE* input, const __global INPUT1_TYPE* gamma, - __global OUTPUT_TYPE* output) + __global OUTPUT_TYPE* output + #if HAS_FUSED_OPS_DECLS + , FUSED_OPS_DECLS + #endif +) { const uint data_idx = get_global_id(1); const uint in_data_idx = get_global_id(0); @@ -100,6 +104,26 @@ KERNEL(rms_gpu_bfyx_opt)( rms = slm_buf[0]; + #if HAS_FUSED_OPS + uint b, f, z, y, x; + #if INPUT_RANK == 1 + f = z = y = x = 1; + #elif INPUT_RANK == 2 + z = y = x = 1; + b = data_idx; + #elif INPUT_RANK == 3 + x = 1; + f = data_idx % OUTPUT_FEATURE_NUM; + b = data_idx / OUTPUT_FEATURE_NUM; + #else + x = data_idx; + y = x % OUTPUT_SIZE_Y; x = x / OUTPUT_SIZE_Y; + z = x % OUTPUT_SIZE_Z; x = x / OUTPUT_SIZE_Z; + f = x % OUTPUT_FEATURE_NUM; x = x / OUTPUT_FEATURE_NUM; + b = x % OUTPUT_BATCH_NUM; x = x / OUTPUT_BATCH_NUM; + #endif + #endif + i = 0; if ((workers_per_data > SUB_GROUP_SIZE) && USE_BLOCK_WRITE) { @@ -107,11 +131,26 @@ KERNEL(rms_gpu_bfyx_opt)( { ACC_TYPE vec_gamma = TO_ACC_TYPE(BLOCK_READ(gamma, subgroup_offset + i * get_sub_group_size())); OUTPUT_VEC_TYPE vec_tmp; + #if HAS_FUSED_OPS + LAST_DIM = subgroup_offset + i * get_sub_group_size() + get_sub_group_local_id(); + #endif #if SUBGROUP_BLOCK_SIZE == 1 - vec_tmp = TO_OUTPUT_TYPE(rms * data[i] * vec_gamma); + OUTPUT_TYPE normalized = TO_OUTPUT_TYPE(rms * data[i] * vec_gamma); + #if HAS_FUSED_OPS + FUSED_OPS; + normalized = FUSED_OPS_RESULT; + #endif + vec_tmp = normalized; #else - unroll_for (int j = 0; j < SUBGROUP_BLOCK_SIZE; j++) - vec_tmp[j] = TO_OUTPUT_TYPE(rms * data[i + j] * vec_gamma[j]); + unroll_for (int j = 0; j < SUBGROUP_BLOCK_SIZE; j++) { + OUTPUT_TYPE normalized = TO_OUTPUT_TYPE(rms * data[i + j] * vec_gamma[j]); + #if HAS_FUSED_OPS + LAST_DIM += j * get_sub_group_size(); + FUSED_OPS; + normalized = FUSED_OPS_RESULT; + #endif + vec_tmp[j] = normalized; + } #endif BLOCK_WRITE(output, data_offset + subgroup_offset + i * get_sub_group_size(), vec_tmp); } @@ -120,13 +159,25 @@ KERNEL(rms_gpu_bfyx_opt)( for (; i < items_num; i++) { ACCUMULATOR_TYPE temp = TO_ACCUMULATOR_TYPE(gamma[subgroup_offset + get_sub_group_local_id() + i * get_sub_group_size()]); - output[data_offset + subgroup_offset + get_sub_group_local_id() + i * get_sub_group_size()] = TO_OUTPUT_TYPE(rms * data[i] * temp); + OUTPUT_TYPE normalized = TO_OUTPUT_TYPE(rms * data[i] * temp); + #if HAS_FUSED_OPS + LAST_DIM = subgroup_offset + get_sub_group_local_id() + i * get_sub_group_size(); + FUSED_OPS; + normalized = FUSED_OPS_RESULT; + #endif + output[data_offset + subgroup_offset + get_sub_group_local_id() + i * get_sub_group_size()] = normalized; } if (in_data_idx < leftovers) { ACCUMULATOR_TYPE temp = TO_ACCUMULATOR_TYPE(gamma[workers_per_data * items_num + in_data_idx]); - output[data_offset + workers_per_data * items_num + in_data_idx] = TO_OUTPUT_TYPE(rms * data[items_num] * temp); + OUTPUT_TYPE normalized = TO_OUTPUT_TYPE(rms * data[items_num] * temp); + #if HAS_FUSED_OPS + LAST_DIM = workers_per_data * items_num + in_data_idx; + FUSED_OPS; + normalized = FUSED_OPS_RESULT; + #endif + output[data_offset + workers_per_data * items_num + in_data_idx] = normalized; } } #undef USE_BLOCK_WRITE diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_ref.cl index 88c5eb520d33e3..44c5540a79ccc3 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_ref.cl @@ -8,7 +8,11 @@ KERNEL(rms_gpu_ref)( OPTIONAL_SHAPE_INFO_ARG const __global INPUT0_TYPE* input, const __global INPUT1_TYPE* gamma, - __global OUTPUT_TYPE* output) + __global OUTPUT_TYPE* output + #if HAS_FUSED_OPS_DECLS + , FUSED_OPS_DECLS + #endif +) { const uint b = get_global_id(0); const uint f = get_global_id(1); @@ -38,6 +42,10 @@ KERNEL(rms_gpu_ref)( const uint gamma_idx = z; #endif OUTPUT_TYPE result = TO_OUTPUT_TYPE(rms) * TO_OUTPUT_TYPE(input[input_idx]) * TO_OUTPUT_TYPE(gamma[gamma_idx]); + #if HAS_FUSED_OPS + FUSED_OPS; + result = FUSED_OPS_RESULT; + #endif output[output_idx] = result; } } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.cpp index 9f57d8a78121a6..3ef083e545adae 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.cpp @@ -97,6 +97,35 @@ JitConstants RMSKernelBfyxOpt::GetJitConstants(const rms_params& params, Dispatc } jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", subgroup_size)); jit.AddConstant(MakeJitConstant("SUBGROUP_BLOCK_SIZE", dispatchData.subgroupBlockSize)); + if (!params.fused_ops.empty()) { + jit.AddConstant(MakeJitConstant("INPUT_RANK", params.ov_input_rank)); + switch (params.ov_input_rank) { + case 1 : + jit.AddConstant(MakeJitConstant("LAST_DIM", "b")); + break; + case 2 : + jit.AddConstant(MakeJitConstant("LAST_DIM", "f")); + break; + case 3 : + jit.AddConstant(MakeJitConstant("LAST_DIM", "y")); + break; + default: + jit.AddConstant(MakeJitConstant("LAST_DIM", "x")); + break; + } + + std::vector idx_order; + if (params.inputs[0].GetDims().size() == 5) { + idx_order = { "(b)", "(f)", "(z)", "(y)", "(x)" }; + } else if (params.inputs[0].GetDims().size() <= 4) { + idx_order = { "(b)", "(f)", "(y)", "(x)" }; + } else { + OPENVINO_THROW("rms_bfyx_opt doesn't support 5D or higher dims."); + } + + auto conf = FusedOpsConfiguration("", idx_order, "normalized", params.outputs[0].GetDType(), 1); + jit.Merge(MakeFusedOpsJitConstants(params, { conf })); + } return jit; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.h index 00e12e44a43979..01b882276a7430 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.h @@ -18,6 +18,13 @@ class RMSKernelBfyxOpt : public RMSKernelBase { ParamsKey GetSupportedKey() const override; protected: + std::vector GetSupportedFusedOps() const override { + return { + FusedOpType::ACTIVATION, + FusedOpType::QUANTIZE, + FusedOpType::ELTWISE + }; + } bool Validate(const Params&) const override; DispatchData SetDefault(const rms_params& params) const override; JitConstants GetJitConstants(const rms_params& params, DispatchData dispatchData) const override; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_ref.cpp index d3923988f42143..d554b5a707256a 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_ref.cpp @@ -25,6 +25,26 @@ ParamsKey RMSKernelRef::GetSupportedKey() const { return k; } +JitConstants RMSKernelRef::GetJitConstants(const rms_params& params, DispatchData dispatchData) const { + auto jit = Parent::GetJitConstants(params, dispatchData); + + if (!params.fused_ops.empty()) { + std::vector idx_order; + if (params.inputs[0].GetDims().size() == 5) { + idx_order = { "(b)", "(f)", "(z)", "(y)", "(x)" }; + } else if (params.inputs[0].GetDims().size() <= 4) { + idx_order = { "(b)", "(f)", "(y)", "(x)" }; + } else { + OPENVINO_THROW("rms_ref doesn't support 5D or higher dims."); + } + + auto conf = FusedOpsConfiguration("", idx_order, "result", params.outputs[0].GetDType(), 1); + jit.Merge(MakeFusedOpsJitConstants(params, { conf })); + } + + return jit; +} + KernelsData RMSKernelRef::GetKernelsData(const Params& params) const { return GetCommonKernelsData(params); } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_ref.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_ref.h index 913055fca1f8b4..b0f90ebf65ce4f 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_ref.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_ref.h @@ -16,5 +16,15 @@ class RMSKernelRef : public RMSKernelBase { KernelsData GetKernelsData(const Params& params) const override; KernelsPriority GetKernelsPriority(const Params& params) const override; ParamsKey GetSupportedKey() const override; + +protected: + std::vector GetSupportedFusedOps() const override { + return { + FusedOpType::ACTIVATION, + FusedOpType::QUANTIZE, + FusedOpType::ELTWISE + }; + } + JitConstants GetJitConstants(const rms_params& params, DispatchData dispatchData) const override; }; } // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/plugin/ops/paged_attention.cpp b/src/plugins/intel_gpu/src/plugin/ops/paged_attention.cpp index f8f14102eb9f6b..36e802a59d1884 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/paged_attention.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/paged_attention.cpp @@ -48,7 +48,6 @@ static void CreatePagedAttentionExtensionOp(ProgramBuilder& p, const std::shared const size_t scale_idx = 9; const size_t alibi_idx = 11; - const size_t rotated_block_indices_idx = 13; std::shared_ptr scale_const = ov::as_type_ptr(op->get_input_node_shared_ptr(scale_idx)); if (scale_const) { @@ -65,11 +64,6 @@ static void CreatePagedAttentionExtensionOp(ProgramBuilder& p, const std::shared prim.num_outputs = 1; - std::shared_ptr rotated_block_indices_const = - ov::as_type_ptr(op->get_input_node_shared_ptr(rotated_block_indices_idx)); - OPENVINO_ASSERT(rotated_block_indices_const != nullptr); - prim.has_rotated_blocks = ov::shape_size(rotated_block_indices_const->get_output_shape(0)) > 0; - if (op->get_output_size() > 1) { const auto scores_output_idx = 1; const auto& users = op->get_output_target_inputs(scores_output_idx); diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 62dcfcb6ad7c18..c893e14f193a93 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -1055,9 +1055,11 @@ void TransformationsPipeline::apply(std::shared_ptr func) { if (device_info.supports_immad) { auto dynamic_quantization_group_size = config.get_property(ov::hint::dynamic_quantization_group_size); pass_config->set_callback([=](const_node_ptr& root) -> bool { - if (root->get_input_node_shared_ptr(0)->get_element_type() == ov::element::Type_t::f32) { - GPU_DEBUG_TRACE << root->get_friendly_name() << " dyn_quan is turned off: input type is not supported" << std::endl; - return true; + for (size_t i = 0 ; i < root->get_input_node_shared_ptr(0)->get_output_size(); ++i) { + if (root->get_input_node_shared_ptr(0)->get_output_element_type(i) == ov::element::Type_t::f32) { + GPU_DEBUG_TRACE << root->get_friendly_name() << " dyn_quan is turned off: input type is not supported" << std::endl; + return true; + } } auto weight_shape = root->get_input_partial_shape(1); diff --git a/src/plugins/intel_gpu/tests/unit/fusions/fully_connected_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/fully_connected_fusion_test.cpp index 1540709023a4a9..9618ff17990cd9 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/fully_connected_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/fully_connected_fusion_test.cpp @@ -536,6 +536,7 @@ class fc_int8_inputs_fused_fp32_sum : public FullyConnectedFusingTestOneDNN { }; TEST_P(fc_int8_inputs_fused_fp32_sum, basic) { + GTEST_SKIP(); run_test(false); } @@ -571,10 +572,12 @@ class fc_fp16_eltwise_add : public FullyConnectedFusingTestOneDNN { }; TEST_P(fc_fp16_eltwise_add, basic) { + GTEST_SKIP(); run_test(false); } TEST_P(fc_fp16_eltwise_add, basic_cached) { + GTEST_SKIP(); run_test(true); } @@ -740,6 +743,7 @@ class fc_fp16_eltwise_sub : public FullyConnectedFusingTestOneDNN { }; TEST_P(fc_fp16_eltwise_sub, basic) { + GTEST_SKIP(); run_test(false); } @@ -775,6 +779,7 @@ class fc_fp16_eltwise_prod : public FullyConnectedFusingTestOneDNN { }; TEST_P(fc_fp16_eltwise_prod, basic) { + GTEST_SKIP(); run_test(false); } @@ -810,6 +815,7 @@ class fc_fp16_eltwise_sum : public FullyConnectedFusingTestOneDNN { }; TEST_P(fc_fp16_eltwise_sum, basic) { + GTEST_SKIP(); run_test(false); } @@ -827,6 +833,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_fp16_eltwise_sum, ::testing::ValuesIn(s class fc_fp32_activation_prelu : public FullyConnectedFusingTestOneDNN {}; TEST_P(fc_fp32_activation_prelu, basic) { + GTEST_SKIP(); auto p = GetParam(); create_topologies( input_layout("input", get_input_layout(p)), diff --git a/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp index eecb0d5c8241b9..1fa303656f80a5 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp @@ -269,6 +269,7 @@ TEST_P(gemm_2in_scale, basic) { } TEST_P(gemm_2in_scale, fp16_scale_out) { + GTEST_SKIP(); auto p = GetParam(); create_topologies( input_layout("input0", get_input_layout(p, 0)), @@ -299,6 +300,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, gemm_2in_scale, ::testing::ValuesIn(std::v class gemm_2in_add : public GemmFusingTest {}; TEST_P(gemm_2in_add, eltwise_postop_static) { + GTEST_SKIP(); auto p = GetParam(); if (engine.get_device_info().supports_immad) { @@ -331,6 +333,7 @@ TEST_P(gemm_2in_add, eltwise_postop_static) { } TEST_P(gemm_2in_add, eltwise_postop_dynamic) { + GTEST_SKIP(); auto p = GetParam(); if (engine.get_device_info().supports_immad) { @@ -367,6 +370,7 @@ TEST_P(gemm_2in_add, eltwise_postop_dynamic) { } TEST_P(gemm_2in_add, eltwise_postop_cached) { + GTEST_SKIP(); auto p = GetParam(); if (engine.get_device_info().supports_immad) { @@ -530,6 +534,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, gemm_2in_act_scale_quantize_i8, ::testing: class gemm_2in_act_scale_quantize_eltwise_i8 : public GemmFusingTest {}; TEST_P(gemm_2in_act_scale_quantize_eltwise_i8, basic) { + GTEST_SKIP(); auto p = GetParam(); create_topologies( input_layout("input0", get_input_layout(p, 0)), diff --git a/src/plugins/intel_gpu/tests/unit/fusions/rms_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/rms_fusion_test.cpp new file mode 100644 index 00000000000000..46df57a0267e33 --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/fusions/rms_fusion_test.cpp @@ -0,0 +1,126 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "test_utils.h" +#include "fusion_test_common.hpp" + +#include +#include +#include +#include +#include +#include + +#include + +using namespace cldnn; +using namespace ::tests; + +namespace { +struct rms_test_params { + tensor input_size; + tensor gamma_size; + tensor elwise_size; + data_types input_type; + format input_format; + size_t expected_fused_primitives; + size_t expected_fused_primitives_onednn; + size_t expected_not_fused_primitives; +}; + +class RMSFusingTest : public ::BaseFusingTest { +public: + void execute(rms_test_params& p) { + if (engine.get_device_info().supports_immad) + p.expected_fused_primitives = p.expected_fused_primitives_onednn; + auto input_prim = get_mem(get_input_layout(p)); + auto gamma_prim = get_mem(get_gamma_layout(p)); + + network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); + network network_fused(this->engine, this->topology_fused, cfg_fused); + + network_fused.set_input_data("input", input_prim); + network_fused.set_input_data("gamma", gamma_prim); + network_not_fused.set_input_data("input", input_prim); + network_not_fused.set_input_data("gamma", gamma_prim); + + compare(network_not_fused, network_fused, p); + } + + layout get_input_layout(rms_test_params& p) { + return layout{ p.input_type, p.input_format, p.input_size }; + } + + layout get_gamma_layout(rms_test_params& p) { + return layout{ p.input_type, p.input_format, p.gamma_size }; + } +}; +} // namespace + + +/* ----------------------------------------------------------------------------------------------------- */ +/* --------------------------------------- RMS cases --------------------------------------------------- */ +/* ----------------------------------------------------------------------------------------------------- */ + +#define CASE_RMS_F32_1 { 1, 16, 8, 8 }, { 1, 1, 1, 8 }, { 1, 16, 8, 8 }, data_types::f32, format::bfyx +#define CASE_RMS_F32_2 { 2, 16, 8, 8 }, { 1, 1, 1, 8 }, { 2, 16, 8, 8 }, data_types::f32, format::bfyx +#define CASE_RMS_3D_F32_1 { 1, 16, 8, 8, 8 }, { 1, 1, 1, 1, 8 }, { 1, 16, 8, 8, 8 }, data_types::f32, format::bfzyx +#define CASE_RMS_3D_F32_2 { 2, 16, 8, 8, 8 }, { 1, 1, 1, 1, 8 }, { 2, 16, 8, 8, 8 }, data_types::f32, format::bfzyx +#define CASE_RMS_F16_1 { 1, 16, 8, 8 }, { 1, 1, 1, 8 }, { 1, 16, 8, 8 }, data_types::f16, format::bfyx +#define CASE_RMS_F16_2 { 2, 16, 8, 8 }, { 1, 1, 1, 8 }, { 2, 16, 8, 8 }, data_types::f16, format::bfyx +#define CASE_RMS_3D_F16_1 { 1, 16, 8, 8, 8 }, { 1, 1, 1, 1, 8 }, { 1, 16, 8, 8, 8 }, data_types::f16, format::bfzyx +#define CASE_RMS_3D_F16_2 { 2, 16, 8, 8, 8 }, { 1, 1, 1, 1, 8 }, { 2, 16, 8, 8, 8 }, data_types::f16, format::bfzyx + +class rms_activation : public RMSFusingTest {}; +TEST_P(rms_activation, basic) { + auto p = GetParam(); + create_topologies( + input_layout("input", get_input_layout(p)), + input_layout("gamma", get_gamma_layout(p)), + rms("rms", input_info("input"), input_info("gamma"), 1e-10f), + activation("act", input_info("rms"), activation_func::relu), + reorder("reorder_bfyx", input_info("act"), format::bfyx, data_types::f32) + ); + + tolerance = (p.input_type == data_types::f32) ? 1e-5f : 0.1f; + execute(p); +} + +INSTANTIATE_TEST_SUITE_P(fusings_gpu, rms_activation, ::testing::ValuesIn(std::vector{ + rms_test_params{ CASE_RMS_F32_1, 3, 3, 4 }, + rms_test_params{ CASE_RMS_F32_2, 3, 3, 4 }, + rms_test_params{ CASE_RMS_3D_F32_1, 3, 3, 4 }, + rms_test_params{ CASE_RMS_3D_F32_2, 3, 3, 4 }, + rms_test_params{ CASE_RMS_F16_1, 3, 3, 4 }, + rms_test_params{ CASE_RMS_F16_2, 3, 3, 4 }, + rms_test_params{ CASE_RMS_3D_F16_1, 3, 3, 4 }, + rms_test_params{ CASE_RMS_3D_F16_2, 3, 3, 4 }, +})); + +class rms_eltwise : public RMSFusingTest {}; +TEST_P(rms_eltwise, basic) { + auto p = GetParam(); + create_topologies( + input_layout("input", layout{ p.input_type, p.input_format, p.input_size }), + input_layout("gamma", layout{ p.input_type, p.input_format, p.gamma_size }), + rms("rms", input_info("input"), input_info("gamma"), 1e-10f), + data("eltw_data", get_mem(layout{ p.input_type, p.input_format, p.elwise_size })), + eltwise("eltw", { input_info("rms"), input_info("eltw_data") }, eltwise_mode::sum, p.input_type), + reorder("reorder_bfyx", input_info("eltw"), p.input_format, data_types::f32) + ); + + tolerance = (p.input_type == data_types::f32) ? 1e-5f : 0.1f; + execute(p); +} + +INSTANTIATE_TEST_SUITE_P(fusings_gpu, rms_eltwise, ::testing::ValuesIn(std::vector{ + rms_test_params{ CASE_RMS_F32_1, 3, 3, 4 }, + rms_test_params{ CASE_RMS_F32_2, 3, 3, 4 }, + rms_test_params{ CASE_RMS_3D_F32_1, 3, 3, 4 }, + rms_test_params{ CASE_RMS_3D_F32_2, 3, 3, 4 }, + rms_test_params{ CASE_RMS_F16_1, 3, 3, 4 }, + rms_test_params{ CASE_RMS_F16_2, 3, 3, 4 }, + rms_test_params{ CASE_RMS_3D_F16_1, 3, 3, 4 }, + rms_test_params{ CASE_RMS_3D_F16_2, 3, 3, 4 }, +})); diff --git a/src/plugins/intel_gpu/tests/unit/passes/clamp_fp16_output_test.cpp b/src/plugins/intel_gpu/tests/unit/passes/clamp_fp16_output_test.cpp index 30b9cd01a2365f..12bc580242e807 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/clamp_fp16_output_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/clamp_fp16_output_test.cpp @@ -20,6 +20,7 @@ using namespace cldnn; using namespace ::tests; TEST(clamp_fp16_output_test, test_gemm_softmax_simple) { + GTEST_SKIP(); auto& engine = get_test_engine(); ov::Shape in1_shape = { 1, 1, 3, 4 }; ov::Shape in2_shape = { 1, 4 }; @@ -78,6 +79,7 @@ TEST(clamp_fp16_output_test, test_gemm_softmax_simple) { } TEST(clamp_fp16_output_test, test_gemm_softmax_mult_fused) { + GTEST_SKIP(); auto& engine = get_test_engine(); ov::Shape in1_shape = { 1, 1, 3, 4 }; ov::Shape in2_shape = { 1, 4 }; diff --git a/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp b/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp index 01cdd3c31d7a0a..c16b17e20a6d05 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp @@ -916,6 +916,7 @@ TEST(prepare_buffer_fusing, in_place_crop_dynamic_reshape_unsqueeze) { } TEST(prepare_buffer_fusing, in_place_crop_dynamic_reshape_squeeze_crop_axis) { + GTEST_SKIP(); auto& engine = get_test_engine(); tests::random_generator rg(GET_SUITE_NAME); diff --git a/src/plugins/intel_gpu/tests/unit/passes/prepare_primitive_fusing_test.cpp b/src/plugins/intel_gpu/tests/unit/passes/prepare_primitive_fusing_test.cpp index cd400128a55234..3f3b6019611e3a 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/prepare_primitive_fusing_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/prepare_primitive_fusing_test.cpp @@ -113,6 +113,7 @@ TEST(prepare_primitive_fusing, fuse_eltwise_to_fc_dyn_legal) { } TEST(prepare_primitive_fusing, fuse_eltwise_to_fc_dyn_illegal) { + GTEST_SKIP(); auto& engine = get_test_engine(); auto weights = engine.allocate_memory({ ov::PartialShape{ 2, 10 }, data_types::u8, format::bfyx }); auto in_layout = layout{ ov::PartialShape::dynamic(2), data_types::u8, format::bfyx }; @@ -165,6 +166,7 @@ TEST(prepare_primitive_fusing, fuse_eltwise_to_fc_dyn_illegal) { } TEST(prepare_primitive_fusing, fuse_eltwise_to_fc_dyn_illegal_const) { + GTEST_SKIP(); auto& engine = get_test_engine(); auto weights = engine.allocate_memory({ ov::PartialShape{ 2, 10 }, data_types::u8, format::bfyx }); auto in_layout = layout{ ov::PartialShape::dynamic(2), data_types::u8, format::bfyx }; @@ -538,6 +540,7 @@ TEST(prepare_primitive_fusing, fuse_constant_transposes_removal_check) { } TEST(prepare_primitive_fusing, fuse_constant_transposes_accuracy_test) { + GTEST_SKIP(); auto& engine = get_test_engine(); auto input = engine.allocate_memory({ { 2, 32 }, data_types::f16, format::bfyx }); @@ -587,6 +590,7 @@ TEST(prepare_primitive_fusing, fuse_constant_transposes_accuracy_test) { } TEST(prepare_primitive_fusing, can_profiling_data_when_fuse_illegal) { + GTEST_SKIP(); auto& engine = get_test_engine(); auto weights = engine.allocate_memory({ov::PartialShape{2, 10}, data_types::u8, format::bfyx}); auto in_layout = layout{ov::PartialShape::dynamic(2), data_types::u8, format::bfyx}; diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp index bbfe7224b4a328..30d12c490e3d15 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp @@ -4645,6 +4645,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio } TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activations_per_channel_dynamic) { + GTEST_SKIP(); auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::u8, format::bfyx, {1, 2, 5, 4} }); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp index ece38da37b2258..87d4c4ed7f0a2d 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp @@ -3175,6 +3175,7 @@ INSTANTIATE_TEST_SUITE_P( ); TEST_P(fully_connected_random_test_f16, basic) { + GTEST_SKIP(); run_test(); } @@ -3353,6 +3354,7 @@ INSTANTIATE_TEST_SUITE_P( ); TEST_P(fully_connected_random_test_i8_3d, basic) { + GTEST_SKIP(); run_test(); } @@ -3662,26 +3664,32 @@ using fully_connected_u8_u8_test = fc_quantized_random_test; using fully_connected_u8_f32_test = fc_quantized_random_test; TEST_P(fully_connected_i8_i8_test, random) { + GTEST_SKIP(); run_random_test(); } TEST_P(fully_connected_i8_u8_test, random) { + GTEST_SKIP(); run_random_test(); } TEST_P(fully_connected_i8_f32_test, random) { + GTEST_SKIP(); run_random_test(); } TEST_P(fully_connected_u8_i8_test, random) { + GTEST_SKIP(); run_random_test(); } TEST_P(fully_connected_u8_u8_test, random) { + GTEST_SKIP(); run_random_test(); } TEST_P(fully_connected_u8_f32_test, random) { + GTEST_SKIP(); run_random_test(); } @@ -4004,6 +4012,7 @@ TEST_F(fully_connected_gpu_tests, compressed_scale_zp_bias_cached) { } TEST_F(fully_connected_gpu_tests, compressed_int4_scale) { + GTEST_SKIP(); this->test_compressed_int4_scale(false, false, 256); } @@ -4036,6 +4045,7 @@ TEST_F(fully_connected_gpu_tests, compressed_int4_reuse_scale) { } TEST_F(fully_connected_gpu_tests, compressed_int4_scale_cached) { + GTEST_SKIP(); this->test_compressed_int4_scale(true, false, 256); } @@ -4052,6 +4062,7 @@ TEST_F(fully_connected_gpu_tests, compressed_int4_scale_dynamic_cached) { } TEST_F(fully_connected_gpu_tests, compressed_int4_scale_dynamic_b1g32) { + GTEST_SKIP(); this->test_compressed_int4_scale(false, true, 1, 32); } @@ -4060,18 +4071,22 @@ TEST_F(fully_connected_gpu_tests, compressed_int4_scale_dynamic_b48g32) { } TEST_F(fully_connected_gpu_tests, compressed_int4_scale_dynamic_b1g64) { + GTEST_SKIP(); this->test_compressed_int4_scale(false, true, 1, 64); } TEST_F(fully_connected_gpu_tests, compressed_int4_scale_dynamic_b1g128) { + GTEST_SKIP(); this->test_compressed_int4_scale(false, true, 1, 128); } TEST_F(fully_connected_gpu_tests, compressed_int4_scale_b1g32) { + GTEST_SKIP(); this->test_compressed_int4_scale(false, false, 1, 32); } TEST_F(fully_connected_gpu_tests, compressed_int4_scale_b1g64) { + GTEST_SKIP(); this->test_compressed_int4_scale(false, false, 1, 64); } @@ -4263,6 +4278,7 @@ TEST_F(fully_connected_gpu_tests, compressed_int8_scale_zp_scalar) { } TEST_F(fully_connected_gpu_tests, compressed_int8_scale_b1) { + GTEST_SKIP(); this->test_compressed_int8_scale(false, true, 1, false, false); } @@ -4279,6 +4295,7 @@ TEST_F(fully_connected_gpu_tests, compressed_int8_scale_cached) { } TEST_F(fully_connected_gpu_tests, compressed_int8_scale_zp_b1) { + GTEST_SKIP(); this->test_compressed_int8_scale(false, true, 1, false, true); } @@ -4454,14 +4471,17 @@ static const std::vector dyn_batches_smoke = {1, 2, 7, 8, 9, 16, 32, 33, 47, 48, 58}; TEST_P(dynamic_fully_connected_gpu_f32_3d, basic) { + GTEST_SKIP(); run_test(); } TEST_P(dynamic_fully_connected_gpu_f16_3d, basic) { + GTEST_SKIP(); run_test(); } TEST_P(dynamic_fully_connected_gpu_i8_3d, basic) { + GTEST_SKIP(); run_test(); } @@ -4732,10 +4752,12 @@ using fully_connected_types_u8_u8_test = fc_random_types_test; using fully_connected_types_u8_f32_test = fc_random_types_test; TEST_P(fully_connected_types_i8_i8_test, random) { + GTEST_SKIP(); run_random_test(); } TEST_P(fully_connected_types_i8_u8_test, random) { + GTEST_SKIP(); run_random_test(); } @@ -4744,10 +4766,12 @@ TEST_P(fully_connected_types_i8_f32_test, random) { } TEST_P(fully_connected_types_u8_i8_test, random) { + GTEST_SKIP(); run_random_test(); } TEST_P(fully_connected_types_u8_u8_test, random) { + GTEST_SKIP(); run_random_test(); } diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp index a852731eaf73e7..046ab87a26b971 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp @@ -2750,6 +2750,7 @@ class GemmOneDNNTest : public ::testing::TestWithParam { class gemm_onednn_ndims : public GemmOneDNNTest {}; TEST_P(gemm_onednn_ndims, basic) { + GTEST_SKIP(); if (!engine.get_device_info().supports_immad) return; @@ -3302,7 +3303,7 @@ INSTANTIATE_TEST_SUITE_P(gemm_gpu, gemm_int8_simple_tests_onednn, ::testing::Val })); class gemm_uint8_simple_tests_onednn : public ::GemmBaseOneDNNTest {}; -TEST_P(gemm_uint8_simple_tests_onednn, basic) { auto p = GetParam(); execute(p); } +TEST_P(gemm_uint8_simple_tests_onednn, basic) { GTEST_SKIP(); auto p = GetParam(); execute(p); } INSTANTIATE_TEST_SUITE_P(gemm_gpu, gemm_uint8_simple_tests_onednn, ::testing::ValuesIn(std::vector { gemm_base_test_params{ CASE_GEMM_UINT8_ONEDNN_1, "" }, @@ -3312,7 +3313,7 @@ INSTANTIATE_TEST_SUITE_P(gemm_gpu, gemm_uint8_simple_tests_onednn, ::testing::Va })); class gemm_fp16_simple_tests_onednn : public ::GemmBaseOneDNNTest {}; -TEST_P(gemm_fp16_simple_tests_onednn, basic) { auto p = GetParam(); execute(p); } +TEST_P(gemm_fp16_simple_tests_onednn, basic) { GTEST_SKIP(); auto p = GetParam(); execute(p); } INSTANTIATE_TEST_SUITE_P(gemm_gpu, gemm_fp16_simple_tests_onednn, ::testing::ValuesIn(std::vector { gemm_base_test_params{ CASE_GEMM_FP16_ONEDNN_1, "" }, @@ -3362,7 +3363,7 @@ INSTANTIATE_TEST_SUITE_P(gemm_gpu, gemm_uint8_transposition_tests_onednn, ::test })); class gemm_fp16_transposition_tests_onednn : public ::GemmBaseOneDNNTest {}; -TEST_P(gemm_fp16_transposition_tests_onednn, basic) { auto p = GetParam(); execute(p); } +TEST_P(gemm_fp16_transposition_tests_onednn, basic) { GTEST_SKIP(); auto p = GetParam(); execute(p); } INSTANTIATE_TEST_SUITE_P(gemm_gpu, gemm_fp16_transposition_tests_onednn, ::testing::ValuesIn(std::vector { gemm_base_test_params{ CASE_GEMM_FP16_NN_TRANSPOSITION_ONEDNN, "" }, @@ -3372,7 +3373,7 @@ INSTANTIATE_TEST_SUITE_P(gemm_gpu, gemm_fp16_transposition_tests_onednn, ::testi })); class gemm_fp32_transposition_tests_onednn : public ::GemmBaseOneDNNTest {}; -TEST_P(gemm_fp32_transposition_tests_onednn, basic) { auto p = GetParam(); execute(p); } +TEST_P(gemm_fp32_transposition_tests_onednn, basic) { GTEST_SKIP(); auto p = GetParam(); execute(p); } INSTANTIATE_TEST_SUITE_P(gemm_gpu, gemm_fp32_transposition_tests_onednn, ::testing::ValuesIn(std::vector { gemm_base_test_params{ CASE_GEMM_FP32_NN_TRANSPOSITION_ONEDNN, "" }, @@ -3622,12 +3623,12 @@ TEST_P(GemmGPUTestRandom, basic_cached) { #ifdef ENABLE_ONEDNN_FOR_GPU TEST_P(gemm_int8_simple_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); } -TEST_P(gemm_uint8_simple_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); } -TEST_P(gemm_fp16_simple_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); } -TEST_P(gemm_fp32_simple_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); } +TEST_P(gemm_uint8_simple_tests_onednn, basic_cached) { GTEST_SKIP(); auto p = GetParam(); execute(p, true); } +TEST_P(gemm_fp16_simple_tests_onednn, basic_cached) { GTEST_SKIP(); auto p = GetParam(); execute(p, true); } +TEST_P(gemm_fp32_simple_tests_onednn, basic_cached) { GTEST_SKIP(); auto p = GetParam(); execute(p, true); } TEST_P(gemm_int8_transposition_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); } TEST_P(gemm_uint8_transposition_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); } -TEST_P(gemm_fp16_transposition_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); } +TEST_P(gemm_fp16_transposition_tests_onednn, basic_cached) { GTEST_SKIP(); auto p = GetParam(); execute(p, true); } TEST_P(gemm_fp32_transposition_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); } TEST_P(gemm_int8_broadcasting_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); } TEST_P(gemm_fp16_broadcasting_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); } diff --git a/src/plugins/intel_gpu/thirdparty/onednn_gpu b/src/plugins/intel_gpu/thirdparty/onednn_gpu index 36e090a367a431..d989ded8c51582 160000 --- a/src/plugins/intel_gpu/thirdparty/onednn_gpu +++ b/src/plugins/intel_gpu/thirdparty/onednn_gpu @@ -1 +1 @@ -Subproject commit 36e090a367a4312a1caa2db9e95fb94d17d7573b +Subproject commit d989ded8c5158200dd2ccb602f53aeba92a64413 diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/npuw_private_properties.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/npuw_private_properties.hpp index 5cf489b6df34b4..bbf7073a04656b 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/npuw_private_properties.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/npuw_private_properties.hpp @@ -338,9 +338,10 @@ static constexpr ov::Property full{"NPUW_DUMP_FULL"}; * @brief * Type: std::string. * Dump the specified subgraph(s) in OpenVINO IR form in the current directory. - * Possible values: Comma-separated list of subgraph indices or "YES" for all - * subgraphs, "NO" or just empty value to turn option off. Keyword "last" can - * be used for dumping last subgraph without specifying it by specific index. + * Possible values: Comma-separated list of subgraph indices ("last" can be used + * for dumping last subgraph without specifying it by specific index), "YES" for + * all subgraphs, "MIN" for representative subgraph subset (all non-repeated and + * one instance of repeated block), "NO" or just empty value to turn option off. * E.g. "0,1" or "0,1,last" or "YES". * Default value: empty. */ @@ -350,9 +351,10 @@ static constexpr ov::Property subgraphs{"NPUW_DUMP_SUBS"}; * @brief * Type: std::string. * Dump subgraph on disk if a compilation failure happens. - * Possible values: Comma-separated list of subgraph indices or "YES" for all - * subgraphs, "NO" or just empty value to turn option off. Keyword "last" can - * be used for dumping last subgraph. E.g. "0,1" or "0,1,last" or "YES". + * Possible values: Comma-separated list of subgraph indices ("last" can be used + * for dumping last subgraph) or "YES" for all subgraphs, "MIN" for representative + * subgraph subset, "NO" or just empty value to turn option off. E.g. "0,1" or + * "0,1,last" or "YES". * Default value: empty. */ static constexpr ov::Property subgraphs_on_fail{"NPUW_DUMP_SUBS_ON_FAIL"}; @@ -361,9 +363,9 @@ static constexpr ov::Property subgraphs_on_fail{"NPUW_DUMP_SUBS_ON_ * @brief * Type: std::string. * Dump input & output tensors for subgraph(s). - * Possible values: Comma-separated list of subgraph indices or "YES" for all - * subgraphs, "NO" or just empty value to turn option off. Keyword "last" can - * be used for last subgraph. E.g. "0,1" or "0,1,last" or "YES". + * Possible values: Comma-separated list of subgraph indices ("last" can be used for + * last subgraph) or "YES" for all subgraphs, "MIN" for representative subgraph subset, + * "NO" or just empty value to turn option off. E.g. "0,1" or "0,1,last" or "YES". * Default value: empty. */ static constexpr ov::Property inputs_outputs{"NPUW_DUMP_IO"}; diff --git a/src/plugins/intel_npu/src/backend/include/zero_tensor.hpp b/src/plugins/intel_npu/src/backend/include/zero_tensor.hpp index a2a39ee301d6fc..e40d56e07a04ea 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_tensor.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_tensor.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2024 Intel Corporation +// Copyright (C) 2018-2025 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -41,6 +41,9 @@ class ZeroTensor final : public ov::ITensor { bool memory_address_changed(); void reset_memory_flag(); + bool tensor_was_shared_with_user(); + void set_tensor_shared_with_user(); + ~ZeroTensor(); private: @@ -61,6 +64,7 @@ class ZeroTensor final : public ov::ITensor { ov::Allocator _allocator; void* _ptr = nullptr; bool _reset_tensor_memory = false; + bool _tensor_shared_with_user = false; }; } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp index 904dfd332374f3..008e2bdd6d39de 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp @@ -226,14 +226,16 @@ void ZeroInferRequest::set_tensor_data(const std::shared_ptr& tenso OV_ITT_TASK_CHAIN(ZERO_SET_TENSOR, itt::domains::LevelZeroBackend, "set_tensor", "set_tensor_data"); auto& levelZeroTensors = isInput ? get_level_zero_input(index) : _levelZeroOutputTensors.at(index); - const auto& zeroTensor = std::dynamic_pointer_cast(tensor); + bool updateCommandListArg = false; - if (zeroTensor == nullptr) { - OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "check_data_allocation"); - if (memory_was_allocated_in_the_same_l0_context(_initStructs->getContext(), tensor->data())) { - _logger.debug("ZeroInferRequest::set_tensor_data - tensor was created in the same L0 context"); - levelZeroTensors = tensor; - } else { + OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "check_data_allocation"); + if (memory_was_allocated_in_the_same_l0_context(_initStructs->getContext(), tensor->data())) { + _logger.debug("ZeroInferRequest::set_tensor_data - tensor was created in the same L0 context"); + levelZeroTensors = tensor; + updateCommandListArg = true; + } else { + auto zeroTensor = std::dynamic_pointer_cast(levelZeroTensors); + if (zeroTensor != nullptr && zeroTensor->tensor_was_shared_with_user()) { _logger.debug("ZeroInferRequest::set_tensor_data - create locally L0 tensor"); OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "allocate tensor"); @@ -242,20 +244,22 @@ void ZeroInferRequest::set_tensor_data(const std::shared_ptr& tenso isInput, isInput ? *_inputAllocator : *_outputAllocator, _graph->get_batch_size()); + + updateCommandListArg = true; } + } - if (_pipelineIsCreated) { - _logger.debug("ZeroInferRequest::infer_async - update command list"); + if (_pipelineIsCreated && updateCommandListArg) { + _logger.debug("ZeroInferRequest::infer_async - update command list"); - OPENVINO_ASSERT(levelZeroTensors->data(), "Empty buffer"); + OPENVINO_ASSERT(levelZeroTensors->data(), "Empty buffer"); - OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "updateCommandList"); - _pipeline->updateCommandList(isInput ? _graph->get_input_descriptors().at(index).idx - : _graph->get_output_descriptors().at(index).idx, - levelZeroTensors->data(), - levelZeroTensors->get_byte_size()); - _pipeline->closeCommandList(); - } + OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "updateCommandList"); + _pipeline->updateCommandList( + isInput ? _graph->get_input_descriptors().at(index).idx : _graph->get_output_descriptors().at(index).idx, + levelZeroTensors->data(), + levelZeroTensors->get_byte_size()); + _pipeline->closeCommandList(); } } @@ -270,15 +274,15 @@ void ZeroInferRequest::set_remote_tensor_data(const std::shared_ptrget_properties(), ov::intel_npu::mem_handle); - OPENVINO_ASSERT(data, "Empty buffer"); - auto& levelZeroTensors = isInput ? get_level_zero_input(index) : _levelZeroOutputTensors.at(index); levelZeroTensors = tensor; if (_pipelineIsCreated) { _logger.debug("ZeroInferRequest::infer_async - update command list"); + auto data = extract_object(tensor->get_properties(), ov::intel_npu::mem_handle); + OPENVINO_ASSERT(data, "Empty buffer"); + OV_ITT_TASK_NEXT(ZERO_SET_REMOTE_TENSOR, "updateCommandList"); _pipeline->updateCommandList( isInput ? _graph->get_input_descriptors().at(index).idx : _graph->get_output_descriptors().at(index).idx, @@ -421,6 +425,11 @@ ov::SoPtr ZeroInferRequest::get_tensor(const ov::Output(userTensors._ptr); + if (zeroTensor != nullptr) { + zeroTensor->set_tensor_shared_with_user(); + } + _logger.debug("ZeroInferRequest::get_tensor - tensor allocated, get the tensor"); return userTensors; } @@ -437,7 +446,12 @@ ov::SoPtr ZeroInferRequest::get_tensor(const ov::Outputget_batch_size()); - return levelZeroTensors; + auto zeroTensor = std::dynamic_pointer_cast(levelZeroTensors); + if (zeroTensor != nullptr) { + zeroTensor->set_tensor_shared_with_user(); + } + + return userTensors; } void ZeroInferRequest::infer() { diff --git a/src/plugins/intel_npu/src/backend/src/zero_tensor.cpp b/src/plugins/intel_npu/src/backend/src/zero_tensor.cpp index b2b5cc7c9b166e..6d34186b22b3fa 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_tensor.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_tensor.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2024 Intel Corporation +// Copyright (C) 2018-2025 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -145,6 +145,13 @@ void ZeroTensor::reset_memory_flag() { _reset_tensor_memory = false; } +bool ZeroTensor::tensor_was_shared_with_user() { + return _tensor_shared_with_user; +} +void ZeroTensor::set_tensor_shared_with_user() { + _tensor_shared_with_user = true; +} + ZeroTensor::~ZeroTensor() { destroy_memory(); } diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp index 93368dc3b5fa34..fc5aec9158151c 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp @@ -23,7 +23,7 @@ class IGraph : public std::enable_shared_from_this { const Config& config, std::optional> blob); - virtual void export_blob(std::ostream& stream) const = 0; + virtual size_t export_blob(std::ostream& stream) const = 0; virtual std::vector process_profiling_output(const std::vector& profData, const Config& config) const = 0; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp index ff6dc6246a121d..a23ba2b5990299 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp @@ -19,14 +19,13 @@ class CompilerAdapterFactory final { auto compilerType = config.get(); switch (compilerType) { case ov::intel_npu::CompilerType::MLIR: { - if (engineBackend->getName() != "LEVEL0") { + if (engineBackend == nullptr || engineBackend->getName() != "LEVEL0") { return std::make_unique(nullptr); } - return std::make_unique(engineBackend->getInitStructs()); } case ov::intel_npu::CompilerType::DRIVER: { - if (engineBackend->getName() != "LEVEL0") { + if (engineBackend == nullptr || engineBackend->getName() != "LEVEL0") { OPENVINO_THROW("NPU Compiler Adapter must be used with LEVEL0 backend"); } diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp index 12eda1e2c1469c..cf3d54c6b363e5 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp @@ -23,7 +23,7 @@ class DriverGraph final : public IGraph { const Config& config, std::optional> blob); - void export_blob(std::ostream& stream) const override; + size_t export_blob(std::ostream& stream) const override; std::vector process_profiling_output(const std::vector& profData, const Config& config) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp index d905517cd8f313..9c88ace1c29d23 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp @@ -26,7 +26,7 @@ class PluginGraph final : public IGraph { std::vector blob, const Config& config); - void export_blob(std::ostream& stream) const override; + size_t export_blob(std::ostream& stream) const override; std::vector process_profiling_output(const std::vector& profData, const Config& config) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp index ced007499bdc1d..a29412075c7e39 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp @@ -32,9 +32,9 @@ DriverGraph::DriverGraph(const std::shared_ptr& zeGraphExt, initialize(config); } -void DriverGraph::export_blob(std::ostream& stream) const { +size_t DriverGraph::export_blob(std::ostream& stream) const { const uint8_t* blobPtr = nullptr; - size_t blobSize = -1; + size_t blobSize; std::vector blob; if (_blobIsReleased) { @@ -47,7 +47,7 @@ void DriverGraph::export_blob(std::ostream& stream) const { if (!stream) { _logger.error("Write blob to stream failed. Blob is broken!"); - return; + return 0; } if (_logger.level() >= ov::log::Level::INFO) { @@ -61,6 +61,7 @@ void DriverGraph::export_blob(std::ostream& stream) const { _logger.info(str.str().c_str()); } _logger.info("Write blob to stream successfully."); + return blobSize; } std::vector DriverGraph::process_profiling_output(const std::vector& profData, diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp index ae37568a90980d..d0c24a82e03937 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp @@ -30,12 +30,12 @@ PluginGraph::PluginGraph(const std::shared_ptr& zeGraphExt, initialize(config); } -void PluginGraph::export_blob(std::ostream& stream) const { +size_t PluginGraph::export_blob(std::ostream& stream) const { stream.write(reinterpret_cast(_blob.data()), _blob.size()); if (!stream) { _logger.error("Write blob to stream failed. Blob is broken!"); - return; + return 0; } if (_logger.level() >= ov::log::Level::INFO) { @@ -49,6 +49,7 @@ void PluginGraph::export_blob(std::ostream& stream) const { _logger.info(str.str().c_str()); } _logger.info("Write blob to stream successfully."); + return _blob.size(); } std::vector PluginGraph::process_profiling_output(const std::vector& profData, diff --git a/src/plugins/intel_npu/src/plugin/CMakeLists.txt b/src/plugins/intel_npu/src/plugin/CMakeLists.txt index d54be2f984fdc2..94526d1a20f081 100644 --- a/src/plugins/intel_npu/src/plugin/CMakeLists.txt +++ b/src/plugins/intel_npu/src/plugin/CMakeLists.txt @@ -53,7 +53,7 @@ cross_compiled_file(${TARGET_NAME} ARCH AVX2 ANY npuw/util_xarch.cpp API npuw/util_xarch.hpp - NAME unpack_i4i8 unpack_u4i8 unpack_i4f16 unpack_i4f16_scale unpack_i4f16_z unpack_u4f16 unpack_u4f16_scale_zp unpack_u4f16_asymm_zp unpack_u4f16_z unpack_u4f32 unpack_i8f16 unpack_i8f16_scale unpack_u8f16 to_f16 + NAME unpack_i4i8 unpack_u4i8 unpack_i4f16 unpack_i4f16_scale unpack_i4f16_z unpack_u4f16 unpack_u4f16_scale_zp unpack_u4f16_asymm_zp unpack_u4f16_z unpack_u4f32 unpack_i8f16 unpack_i8f16_scale unpack_u8f16 to_f16 copy_row_as_column NAMESPACE ov::npuw::util::XARCH ) diff --git a/src/plugins/intel_npu/src/plugin/include/metadata.hpp b/src/plugins/intel_npu/src/plugin/include/metadata.hpp new file mode 100644 index 00000000000000..f4ae25e84c9136 --- /dev/null +++ b/src/plugins/intel_npu/src/plugin/include/metadata.hpp @@ -0,0 +1,173 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include +#include + +namespace intel_npu { + +struct MetadataBase { +protected: + uint32_t _version; + +public: + MetadataBase(uint32_t version) : _version(version) {} + + /** + * @brief Reads metadata from a stream. + */ + virtual void read(std::istream& stream) = 0; + + /** + * @brief Writes metadata to a stream. + */ + virtual void write(std::ostream& stream) = 0; + + virtual bool is_compatible() = 0; + + virtual uint64_t get_blob_size() const = 0; + + virtual ~MetadataBase() = default; + + /** + * @brief Returns a uint32_t value which represents two uint16_t values concatenated. + * @details Convention for bumping the metadata version: + * - Increment Major in case of: removing a current field OR adding a new field in between fields. + * - Increment Minor in case of: adding a new field at the end. + * + * @return Major and minor versions concatenated into a single uint32_t value. + */ + static constexpr uint32_t make_version(uint16_t major, uint16_t minor) { + return major << 16 | (minor & 0x0000ffff); + } + + /** + * @brief Gets the major version. + * @return Major version. + */ + static constexpr uint16_t get_major(uint32_t version) { + return static_cast(version >> 16); + } + + /** + * @brief Gets the minor version. + * @return Minor version. + */ + static constexpr uint16_t get_minor(uint32_t version) { + return static_cast(version); + } +}; + +/** + * @brief Magic bytes used for identifying NPU blobs. + */ +constexpr std::string_view MAGIC_BYTES = "OVNPU"; + +/** + * @brief List of supported version formats. + */ +constexpr uint32_t METADATA_VERSION_1_0{MetadataBase::make_version(1, 0)}; + +/** + * @brief Current metadata version. + */ +constexpr uint32_t CURRENT_METADATA_VERSION{METADATA_VERSION_1_0}; + +constexpr uint16_t CURRENT_METADATA_MAJOR_VERSION{MetadataBase::get_major(CURRENT_METADATA_VERSION)}; +constexpr uint16_t CURRENT_METADATA_MINOR_VERSION{MetadataBase::get_minor(CURRENT_METADATA_VERSION)}; + +struct OpenvinoVersion { +private: + std::string _version; + uint32_t _size; + +public: + OpenvinoVersion(); + + OpenvinoVersion(std::string_view version); + + /** + * @brief Reads version data from a stream. + */ + void read(std::istream& stream); + + /** + * @brief Writes version data to a stream. + */ + void write(std::ostream& stream); + + /** + * @brief Gets the version string. + */ + std::string get_version() const; +}; + +/** + * @brief Template for metadata class handling. + */ +template +struct Metadata : public MetadataBase {}; + +/** + * @brief Template specialization for metadata version 1.0. + */ +template <> +struct Metadata : public MetadataBase { +protected: + OpenvinoVersion _ovVersion; + uint64_t _blobDataSize; + +public: + Metadata(uint64_t blobSize, std::optional ovVersion = std::nullopt); + + void read(std::istream& stream) override; + + /** + * @attention It's a must to first write metadata version in any metadata specialization. + * + * @details When importing a versioned blob, it's best to first read the metadata version field. + * This is the quickest way to handle many incompatible blob cases without needing to traverse the whole NPU + * metadata section. + */ + void write(std::ostream& stream) override; + + /** + * @brief Checks if metadata is supported. + * + * @return Returns: + * - false: + * - if blob metadata does not match current metadata. + * - if blob OpenVINO version does not match current one. + * + * - true: if all versions match. + * + * @note The version check can be disabled if the "NPU_DISABLE_VERSION_CHECK" environment variable is set to '1'. + */ + bool is_compatible() override; + + uint64_t get_blob_size() const override; +}; + +/** + * @brief Creates a Metadata object. + * + * @return Unique pointer to the created MetadataBase object if the major version is supported; otherwise, returns + * 'nullptr'. + */ +std::unique_ptr create_metadata(uint32_t version, uint64_t blobSize); + +/** + * @brief Reads metadata from a blob. + * + * @return If the blob is versioned and its major version is supported, returns an unique pointer to the read + * MetadataBase object; otherwise, returns 'nullptr'. + */ +std::unique_ptr read_metadata_from(std::istream& stream); + +} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.cpp b/src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.cpp index 77d000cb415de7..0a6ecfa7d556bf 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.cpp @@ -491,11 +491,12 @@ void ov::npuw::IBaseInferRequest::bind_global_results(std::size_t idx, RqPtr req void ov::npuw::IBaseInferRequest::dump_input_tensors(std::size_t idx) { const std::string dump_ios_opt = m_npuw_model->m_cfg.get<::intel_npu::NPUW_DUMP_IO>(); const std::size_t end_idx = m_npuw_model->m_compiled_submodels.size(); - if (!ov::npuw::util::is_set(idx, dump_ios_opt, end_idx)) { + auto real_idx = m_npuw_model->m_compiled_submodels[idx].replaced_by.value_or(idx); + + if (!ov::npuw::util::is_set(idx, dump_ios_opt, real_idx, end_idx)) { return; } - auto real_idx = m_npuw_model->m_compiled_submodels[idx].replaced_by.value_or(idx); const auto& comp_submodel_desc = m_npuw_model->m_compiled_submodels[real_idx]; const auto& comp_submodel = comp_submodel_desc.compiled_model; @@ -569,11 +570,12 @@ void ov::npuw::IBaseInferRequest::dump_input_tensors(std::size_t idx) { void ov::npuw::IBaseInferRequest::dump_output_tensors(std::size_t idx) { const std::string dump_ios_opt = m_npuw_model->m_cfg.get<::intel_npu::NPUW_DUMP_IO>(); const std::size_t end_idx = m_npuw_model->m_compiled_submodels.size(); - if (!ov::npuw::util::is_set(idx, dump_ios_opt, end_idx)) { + auto real_idx = m_npuw_model->m_compiled_submodels[idx].replaced_by.value_or(idx); + + if (!ov::npuw::util::is_set(idx, dump_ios_opt, real_idx, end_idx)) { return; } - auto real_idx = m_npuw_model->m_compiled_submodels[idx].replaced_by.value_or(idx); const auto& comp_submodel_desc = m_npuw_model->m_compiled_submodels[real_idx]; const auto& comp_submodel = comp_submodel_desc.compiled_model; diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp index be93e1f1b575f5..f0d9950c2e3520 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp @@ -133,9 +133,18 @@ std::shared_ptr ov::npuw::ICompiledModel::create( auto use_llm_key = ov::intel_npu::npuw::llm::enabled.name(); if (properties.count(use_llm_key) && properties.at(use_llm_key).as() == true) { LOG_INFO("ov::npuw::LLMCompiledModel will be created."); - compiled_model = std::make_shared(model, plugin, properties); + // Drop CACHE_DIR from the config + // If it's present we will be utilizing LLMCompiledModel's import + // and not the underlying models and submodels + auto config = properties; + config.erase(ov::cache_dir.name()); + compiled_model = std::make_shared(model, plugin, config); } else { LOG_INFO("ov::npuw::CompiledModel will be created."); + // CACHE_DIR isn't supported with NPU_USE_NPUW + if (properties.count(ov::cache_dir.name())) { + OPENVINO_THROW("Option 'CACHE_DIR' is not supported with configuration: NPU_USE_NPUW : YES, NPUW_LLM : NO"); + } pre_load_transform(model, properties); compiled_model = std::make_shared(model, plugin, properties); } @@ -364,7 +373,7 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr& model, fill_empty_tensor_names(m_compiled_submodels[real_id].model); } - if (ov::npuw::util::is_set(id, dump_sub_opt, end_sub_idx)) { + if (ov::npuw::util::is_set(id, dump_sub_opt, real_id, end_sub_idx)) { LOG_INFO("Dumping Subgraph[" << id << "]"); LOG_BLOCK(); if (real_id != id) { @@ -611,6 +620,12 @@ void ov::npuw::CompiledModel::serialize(std::ostream& stream) const { // Write config write(stream, m_cfg); + // FIXME: utilize overload instead + write(stream, m_non_npuw_props.size()); + for (const auto& p : m_non_npuw_props) { + write(stream, p.first); + write_any(stream, p.second); + } // Serialize compiled submodels write(stream, m_compiled_submodels.size()); @@ -671,6 +686,18 @@ std::shared_ptr ov::npuw::CompiledModel::deserialize( // Deserialize config read(stream, compiled->m_cfg); + compiled->m_cfg.parseEnvVars(); + // FIXME: utilize overload instead + std::size_t props_size; + read(stream, props_size); + for (std::size_t i = 0; i < props_size; ++i) { + std::string key; + read(stream, key); + ov::Any val; + read_any(stream, val); + compiled->m_non_npuw_props[key] = val; + } + compiled->implement_properties(); // Deserialize compiled submodels std::size_t subm_size = 0; @@ -996,8 +1023,9 @@ ov::SoPtr ov::npuw::CompiledModel::compile_submodel(const st void ov::npuw::CompiledModel::dump_on_fail(std::size_t id, const std::string& device_to_try, const char* extra) { const std::string dof_opt = m_cfg.get<::intel_npu::NPUW_DUMP_SUBS_ON_FAIL>(); const std::size_t end_idx = m_compiled_submodels.size(); + const std::size_t real_idx = m_compiled_submodels[id].replaced_by.value_or(id); - if (ov::npuw::util::is_set(id, dof_opt, end_idx)) { + if (ov::npuw::util::is_set(id, dof_opt, real_idx, end_idx)) { ov::npuw::dump_failure(m_compiled_submodels[id].model, device_to_try, extra); } } diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp index 209c289d68d1d4..fb31f7ed0770bb 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp @@ -309,11 +309,11 @@ std::optional extract_npu_descriptor(const std::shared_ptrget_property(ov::device::architecture.name(), ov::AnyMap{}).as(); const int64_t max_tiles = plugin->get_property(ov::intel_npu::max_tiles.name(), ov::AnyMap{}).as(); - bool compiler_dq = false; - const auto device_caps = - plugin->get_property(ov::device::capabilities.name(), ov::AnyMap{}).as>(); - if (std::find(device_caps.begin(), device_caps.end(), "COMPILER_DYNAMIC_QUANTIZATION") != device_caps.end()) { + const auto supported_properties = + plugin->get_property(ov::supported_properties.name(), ov::AnyMap{}).as>(); + if (std::find(supported_properties.begin(), supported_properties.end(), "NPU_COMPILER_DYNAMIC_QUANTIZATION") != + supported_properties.end()) { compiler_dq = true; } return std::make_optional(NPUDesc{arch, max_tiles, compiler_dq}); @@ -328,7 +328,7 @@ std::optional pop_option(ov::AnyMap& config, const std::string& option_ return std::nullopt; } -ov::AnyMap get_baseline_common_config() { +ov::AnyMap get_baseline_common_config(const std::optional& npudesc) { ov::AnyMap config = { {"NPU_COMPILATION_MODE_PARAMS", "compute-layers-with-higher-precision=Sqrt,Power,ReduceMean,Add_RMSNorm"}, {"NPUW_DEVICES", "NPU"}, @@ -339,11 +339,19 @@ ov::AnyMap get_baseline_common_config() { {"NPUW_WEIGHTS_BANK", "shared"}, {"NPUW_SLICE_OUT", "YES"}, {"NPUW_FUNCALL_ASYNC", "YES"}}; + // FIXME: this config logic is getting more and more complex + if (npudesc.has_value() && npudesc->compiler_dq) { + config.emplace("NPUW_DQ", "YES"); + config.emplace("NPUW_DQ_FULL", "NO"); + config.emplace("NPU_COMPILER_DYNAMIC_QUANTIZATION", "YES"); + config.erase("NPUW_DCOFF_TYPE"); + config.erase("NPUW_DCOFF_SCALE"); + } return config; } -ov::AnyMap get_default_common_config(const std::shared_ptr& model) { - auto config = get_baseline_common_config(); +ov::AnyMap get_default_common_config(const std::shared_ptr& model, const std::optional& npudesc) { + auto config = get_baseline_common_config(npudesc); const char* npu_l0 = std::getenv("DISABLE_OPENVINO_GENAI_NPU_L0"); if (npu_l0 && std::atoi(npu_l0) == 1) { config.emplace("NPUW_WEIGHTS_BANK_ALLOC", "CPU"); @@ -354,17 +362,17 @@ ov::AnyMap get_default_common_config(const std::shared_ptr& model) { } ov::AnyMap get_default_prefill_config(const std::shared_ptr& model, const std::optional& npudesc) { - auto config = get_default_common_config(model); - if (is_cw_compressed(model)) { - config.emplace("NPUW_DQ", "YES"); - } else { - config.emplace("NPUW_PMM", "NO"); - } + auto config = get_default_common_config(model, npudesc); if (npudesc.has_value() && npudesc->arch == "4000" && npudesc->max_tiles != -1) { config.emplace("NPU_DPU_GROUPS", npudesc->max_tiles); } - if (npudesc.has_value() && npudesc->compiler_dq) { - config.emplace("NPUW_DQ_FULL", "NO"); + // Specify NPUW DQ if Compiler DQ is not enabled + if (!npudesc.has_value() || !npudesc->compiler_dq) { + if (is_cw_compressed(model)) { + config.emplace("NPUW_DQ", "YES"); + } else { + config.emplace("NPUW_PMM", "NO"); + } } return config; } @@ -372,20 +380,19 @@ ov::AnyMap get_default_prefill_config(const std::shared_ptr& model, c ov::AnyMap get_default_generate_config(const std::shared_ptr& model, const std::optional& npudesc, const ::intel_npu::npuw::llm::GenerateHint hint) { - auto config = get_default_common_config(model); + auto config = get_default_common_config(model, npudesc); if (hint == ::intel_npu::npuw::llm::GenerateHint::BEST_PERF) { config.emplace("NPUW_ONLINE_PIPELINE", "NONE"); } - // NB: Unconditionally set for generation model - config.emplace("NPUW_DQ", "YES"); if (npudesc.has_value() && npudesc->arch == "4000") { config.emplace("NPU_DPU_GROUPS", 4); } if (hint == ::intel_npu::npuw::llm::GenerateHint::FAST_COMPILE) { config.emplace("NPUW_UNFOLD_IREQS", "YES"); } - if (npudesc.has_value() && npudesc->compiler_dq) { - config.emplace("NPUW_DQ_FULL", "NO"); + // Specify NPUW DQ if Compiler DQ is not enabled + if (!npudesc.has_value() || !npudesc->compiler_dq) { + config.emplace("NPUW_DQ", "YES"); } return config; } @@ -441,6 +448,8 @@ ov::npuw::LLMCompiledModel::LLMCompiledModel(const std::shared_ptr& m // preserve them somewhere. auto prefill_config_opt = pop_option(npuw_llm_props, std::string("NPUW_LLM_PREFILL_CONFIG")); auto generate_config_opt = pop_option(npuw_llm_props, std::string("NPUW_LLM_GENERATE_CONFIG")); + auto prefill_config_addition = pop_option(npuw_llm_props, std::string("++NPUW_LLM_PREFILL_CONFIG")); + auto generate_config_addition = pop_option(npuw_llm_props, std::string("++NPUW_LLM_GENERATE_CONFIG")); m_cfg.update(any_copy(npuw_llm_props)); @@ -494,8 +503,15 @@ ov::npuw::LLMCompiledModel::LLMCompiledModel(const std::shared_ptr& m generate_config_opt.value_or(get_default_generate_config(kvcache_model, npudesc, generate_hint)) .as(); + auto prefill_config_addition_value = + prefill_config_addition.has_value() ? prefill_config_addition.value().as() : ov::AnyMap{}; + auto generate_config_addition_value = + generate_config_addition.has_value() ? generate_config_addition.value().as() : ov::AnyMap{}; + merge_config_with(prefill_config, other_props); merge_config_with(generate_config, other_props); + merge_config_with(prefill_config, prefill_config_addition_value); + merge_config_with(generate_config, generate_config_addition_value); m_kvcache_compiled = std::dynamic_pointer_cast( ov::npuw::ICompiledModel::create(kvcache_model, plugin, generate_config)); @@ -595,21 +611,21 @@ std::shared_ptr ov::npuw::LLMCompiledModel::deserial if (vmajor != OPENVINO_VERSION_MAJOR || vminor != OPENVINO_VERSION_MINOR || vpatch != OPENVINO_VERSION_PATCH || s11n_version != std::string(NPUW_SERIALIZATION_VERSION)) { OPENVINO_THROW("This blobs was serialized with different OV version!", - " Serialized by OV ", + "\nSerialized by OV ", vmajor, '.', vminor, '.', vpatch, - " Current OV version ", + "\nCurrent OV version ", OPENVINO_VERSION_MAJOR, '.', OPENVINO_VERSION_MINOR, '.', OPENVINO_VERSION_PATCH, - " NPUW serialized by version ", + "\nNPUW serialized by version ", s11n_version, - " NPUW current serialization version ", + "\nNPUW current serialization version ", NPUW_SERIALIZATION_VERSION); } @@ -637,6 +653,7 @@ std::shared_ptr ov::npuw::LLMCompiledModel::deserial // Deserialize config read(stream, compiled->m_cfg); + compiled->implement_properties(); // Deserialize CompiledModels compiled->m_kvcache_compiled = ov::npuw::CompiledModel::deserialize(stream, plugin); diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.cpp b/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.cpp index 0bad68a35aa4ba..2e987036483e34 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.cpp @@ -9,6 +9,7 @@ #include "llm_compiled_model.hpp" #include "logging.hpp" #include "openvino/runtime/iasync_infer_request.hpp" +#include "util_xarch.hpp" namespace { template @@ -28,7 +29,49 @@ ov::SoPtr make_tensor_slice(ov::SoPtr tensor, return ov::get_tensor_impl(ov::Tensor(ov::make_tensor(tensor), start_shape, end_shape)); } +void copy_by_planes(ov::SoPtr src_tensor, ov::SoPtr dst_tensor) { + // [1, H, S1, E] -> [1, H, S2, E] + const int N = 0; + const int H = 1; + const int S = 2; + const int E = 3; + + OPENVINO_ASSERT(src_tensor->get_shape()[N] == dst_tensor->get_shape()[N]); + OPENVINO_ASSERT(src_tensor->get_shape()[H] == dst_tensor->get_shape()[H]); + OPENVINO_ASSERT(src_tensor->get_shape()[E] == dst_tensor->get_shape()[E]); + OPENVINO_ASSERT(src_tensor->get_element_type() == dst_tensor->get_element_type()); + OPENVINO_ASSERT(src_tensor->get_shape()[N] == 1u); + OPENVINO_ASSERT(src_tensor->get_shape().size() == 4u); + + const auto* src_tensor_data = reinterpret_cast(src_tensor->data()); + auto* dst_tensor_data = reinterpret_cast(dst_tensor->data()); + + const auto num_planes = src_tensor->get_shape()[H]; + const auto src_plane_stride = src_tensor->get_strides()[H]; + const auto dst_plane_stride = dst_tensor->get_strides()[H]; + const auto plane_size_in_bytes = src_tensor->get_strides()[S] * src_tensor->get_shape()[S]; + + for (size_t i = 0; i < num_planes; ++i) { + std::copy_n(src_tensor_data, plane_size_in_bytes, dst_tensor_data); + dst_tensor_data += dst_plane_stride; + src_tensor_data += src_plane_stride; + } +} + void copy_columns_by_row_chunks(ov::SoPtr src, ov::SoPtr& dst) { + /* + src/dst layout: [1, heads, emb_size, seq_len] + + X[*,i] - embedding for i-th token, + Instead of copy columns, copy rows X[i,*] + + [[X00 X01 ... X0n] [[X00 X01 ... X0n] + [X10 X11 ... X1n] [X10 X11 ... X1n] + [X20 X21 ... X2n] ... [X20 X21 ... X2n] + ... ... + [Xm0 Xm1 ... Xmn]] [Xm0 Xm1 ... Xmn]] + */ + const auto src_shape = src->get_shape(); OPENVINO_ASSERT(src_shape.size() == 4u); @@ -157,6 +200,8 @@ void ov::npuw::LLMInferRequest::infer_generate(ov::SoPtr input_ids, if (kv_dim == 3u) { copy_columns_by_row_chunks(prefill_out_slice, kvcache_in_slice); + } else if (kv_dim == 2u) { + copy_by_planes(prefill_out_slice, kvcache_in_slice); } else { prefill_out_slice->copy_to(kvcache_in_slice._ptr); } @@ -199,7 +244,13 @@ void ov::npuw::LLMInferRequest::infer_generate(ov::SoPtr input_ids, kvcache_desc.num_stored_tokens - 1, kvcache_desc.num_stored_tokens); auto kvcache_out_tensor = m_kvcache_request->get_tensor(m_kvcache_out_ports.at(output_name)); - kvcache_out_tensor->copy_to(kvcache_in_slice._ptr); + if (kv_dim == 3u) { + ov::npuw::util::XARCH::copy_row_as_column(kvcache_out_tensor, kvcache_in_slice); + } else if (kv_dim == 2u) { + copy_by_planes(kvcache_out_tensor, kvcache_in_slice); + } else { + kvcache_out_tensor->copy_to(kvcache_in_slice._ptr); + } } LOG_DEBUG("Done"); } diff --git a/src/plugins/intel_npu/src/plugin/npuw/serialization.cpp b/src/plugins/intel_npu/src/plugin/npuw/serialization.cpp index 550a1fdd384499..60417f9f241732 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/serialization.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/serialization.cpp @@ -23,6 +23,10 @@ void ov::npuw::s11n::write(std::ostream& stream, const bool& var) { stream.write(reinterpret_cast(&var), sizeof var); } +void ov::npuw::s11n::write(std::ostream& stream, const float& var) { + stream.write(reinterpret_cast(&var), sizeof var); +} + void ov::npuw::s11n::write(std::ostream& stream, const ov::npuw::compiled::Spatial& var) { using ov::npuw::s11n::write; @@ -74,6 +78,43 @@ void ov::npuw::s11n::write(std::ostream& stream, const ov::Output()) { + write(stream, static_cast(AnyType::STRING)); + write(stream, var.as()); + } else if (var.is()) { + // FIXME: handle properly + write(stream, static_cast(AnyType::CHARS)); + write(stream, std::string(var.as())); + } else if (var.is()) { + write(stream, static_cast(AnyType::SIZET)); + write(stream, var.as()); + } else if (var.is()) { + write(stream, static_cast(AnyType::INT)); + write(stream, var.as()); + } else if (var.is()) { + write(stream, static_cast(AnyType::INT64)); + write(stream, var.as()); + } else if (var.is()) { + write(stream, static_cast(AnyType::UINT32)); + write(stream, var.as()); + } else if (var.is()) { + write(stream, static_cast(AnyType::UINT64)); + write(stream, var.as()); + } else if (var.is()) { + write(stream, static_cast(AnyType::FLOAT)); + write(stream, var.as()); + } else if (var.is()) { + write(stream, static_cast(AnyType::BOOL)); + write(stream, var.as()); + } else { + NPUW_ASSERT(false && "Unsupported type"); + } +} + void ov::npuw::s11n::read(std::istream& stream, std::streampos& var) { stream.read(reinterpret_cast(&var), sizeof var); } @@ -89,6 +130,10 @@ void ov::npuw::s11n::read(std::istream& stream, bool& var) { stream.read(reinterpret_cast(&var), sizeof var); } +void ov::npuw::s11n::read(std::istream& stream, float& var) { + stream.read(reinterpret_cast(&var), sizeof var); +} + void ov::npuw::s11n::read(std::istream& stream, ov::npuw::compiled::Spatial& var) { using ov::npuw::s11n::read; @@ -169,3 +214,50 @@ void ov::npuw::s11n::read(std::istream& stream, std::shared_ptr& var) var->output(0).set_tensor_ptr(tensor_dummy); var->set_friendly_name(*names.begin()); // any_name ? } + +void ov::npuw::s11n::read_any(std::istream& stream, ov::Any& var) { + // FIXME: ugly, but cannot use .read(stream) here due to its usage of operator>>() + int type_int; + read(stream, type_int); + AnyType type = static_cast(type_int); + if (type == AnyType::STRING) { + std::string val; + read(stream, val); + var = std::move(val); + } else if (type == AnyType::CHARS) { + // FIXME: handle properly + std::string val; + read(stream, val); + var = std::move(val); + } else if (type == AnyType::SIZET) { + std::size_t val; + read(stream, val); + var = val; + } else if (type == AnyType::INT) { + int val; + read(stream, val); + var = val; + } else if (type == AnyType::INT64) { + int64_t val; + read(stream, val); + var = val; + } else if (type == AnyType::UINT32) { + uint32_t val; + read(stream, val); + var = val; + } else if (type == AnyType::UINT64) { + uint64_t val; + read(stream, val); + var = val; + } else if (type == AnyType::FLOAT) { + float val; + read(stream, val); + var = val; + } else if (type == AnyType::BOOL) { + bool val; + read(stream, val); + var = val; + } else { + NPUW_ASSERT(false && "Unsupported type"); + } +} diff --git a/src/plugins/intel_npu/src/plugin/npuw/serialization.hpp b/src/plugins/intel_npu/src/plugin/npuw/serialization.hpp index 77a6b3aa865254..170631f644da12 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/serialization.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/serialization.hpp @@ -27,6 +27,7 @@ class Config; namespace ov { // Forward declaration +class Any; class Node; class Tensor; template @@ -52,19 +53,23 @@ namespace s11n { void write(std::ostream& stream, const std::streampos& var); void write(std::ostream& stream, const std::string& var); void write(std::ostream& stream, const bool& var); +void write(std::ostream& stream, const float& var); void write(std::ostream& stream, const ov::npuw::compiled::Spatial& var); void write(std::ostream& stream, const ov::Tensor& var); void write(std::ostream& stream, const ::intel_npu::Config& var); void write(std::ostream& stream, const ov::Output& var); +void write_any(std::ostream& stream, const ov::Any& var); void read(std::istream& stream, std::streampos& var); void read(std::istream& stream, std::string& var); void read(std::istream& stream, bool& var); +void read(std::istream& stream, float& var); void read(std::istream& stream, ov::npuw::compiled::Spatial& var); void read(std::istream& stream, ov::Tensor& var); void read(std::istream& stream, ::intel_npu::Config& var); void read(std::istream& stream, std::shared_ptr& var); void read(std::istream& stream, std::shared_ptr& var); +void read_any(std::istream& stream, ov::Any& var); // Forward declaration template diff --git a/src/plugins/intel_npu/src/plugin/npuw/util.cpp b/src/plugins/intel_npu/src/plugin/npuw/util.cpp index f6bb6f439cff25..517dc57e0a1468 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/util.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/util.cpp @@ -18,7 +18,10 @@ #include "openvino/runtime/make_tensor.hpp" // get_tensor_impl #include "util_xarch.hpp" -bool ov::npuw::util::is_set(const std::size_t sub_idx, const std::string& opt, const std::size_t end_idx) { +bool ov::npuw::util::is_set(const std::size_t sub_idx, + const std::string& opt, + const std::size_t real_idx, + const std::size_t end_idx) { if (opt.empty() || opt == "NO") { return false; } @@ -26,6 +29,10 @@ bool ov::npuw::util::is_set(const std::size_t sub_idx, const std::string& opt, c return true; } + if (opt == "MIN") { + return sub_idx == real_idx; + } + std::string str(opt); std::size_t last_pos = str.find("last"); if (last_pos != std::string::npos) { diff --git a/src/plugins/intel_npu/src/plugin/npuw/util.hpp b/src/plugins/intel_npu/src/plugin/npuw/util.hpp index 616aff53128292..501c97cdff4b0e 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/util.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/util.hpp @@ -15,7 +15,10 @@ namespace ov { namespace npuw { namespace util { -bool is_set(const std::size_t sub_idx, const std::string& opt, const std::size_t end_idx = SIZE_MAX); +bool is_set(const std::size_t sub_idx, + const std::string& opt, + const std::size_t real_idx = SIZE_MAX, + const std::size_t end_idx = SIZE_MAX); // Every great project has its own string class... // NB: Newer C++ standards would allow to use string views or smt diff --git a/src/plugins/intel_npu/src/plugin/npuw/util_xarch.cpp b/src/plugins/intel_npu/src/plugin/npuw/util_xarch.cpp index 37c4770b9d9fa3..af6354126334fb 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/util_xarch.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/util_xarch.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -1427,3 +1427,49 @@ ov::Tensor ov::npuw::util::XARCH::to_f16(const ov::Tensor& t) { #endif return tnew; } + +void ov::npuw::util::XARCH::copy_row_as_column(const ov::SoPtr& from, const ov::SoPtr& to) { +#if defined(HAVE_AVX2) + constexpr uint32_t BLOCK_SIZE = sizeof(__m256i) / sizeof(uint16_t); + + OPENVINO_ASSERT(from->get_element_type() == ov::element::f16); + OPENVINO_ASSERT(from->is_continuous()); + OPENVINO_ASSERT(from->get_size() % BLOCK_SIZE == 0); + OPENVINO_ASSERT(from->get_shape().size() == 4u); + OPENVINO_ASSERT(from->get_shape()[0] == 1u); + OPENVINO_ASSERT(to->get_element_type() == ov::element::f16); + OPENVINO_ASSERT(to->get_shape().size() == 4u); + OPENVINO_ASSERT(to->get_shape()[0] == 1u); + OPENVINO_ASSERT(from->get_shape()[1] == to->get_shape()[1]); + OPENVINO_ASSERT(from->get_shape()[2] == to->get_shape()[2]); + + const auto* pSrc = reinterpret_cast(from->data()); + auto* pDst = reinterpret_cast(to->data()); + + const auto row_step = to->get_strides()[2] / sizeof(uint16_t); + for (size_t k = 0; k < from->get_size(); k += BLOCK_SIZE) { + const uint16_t* pSrcBlock = pSrc + k; + __m256i vsrc = _mm256_lddqu_si256(reinterpret_cast(pSrcBlock)); + // NB: Assign particular byte from the block to the column + pDst[0 * row_step] = _mm256_extract_epi16(vsrc, 0); + pDst[1 * row_step] = _mm256_extract_epi16(vsrc, 1); + pDst[2 * row_step] = _mm256_extract_epi16(vsrc, 2); + pDst[3 * row_step] = _mm256_extract_epi16(vsrc, 3); + pDst[4 * row_step] = _mm256_extract_epi16(vsrc, 4); + pDst[5 * row_step] = _mm256_extract_epi16(vsrc, 5); + pDst[6 * row_step] = _mm256_extract_epi16(vsrc, 6); + pDst[7 * row_step] = _mm256_extract_epi16(vsrc, 7); + pDst[8 * row_step] = _mm256_extract_epi16(vsrc, 8); + pDst[9 * row_step] = _mm256_extract_epi16(vsrc, 9); + pDst[10 * row_step] = _mm256_extract_epi16(vsrc, 10); + pDst[11 * row_step] = _mm256_extract_epi16(vsrc, 11); + pDst[12 * row_step] = _mm256_extract_epi16(vsrc, 12); + pDst[13 * row_step] = _mm256_extract_epi16(vsrc, 13); + pDst[14 * row_step] = _mm256_extract_epi16(vsrc, 14); + pDst[15 * row_step] = _mm256_extract_epi16(vsrc, 15); + pDst += BLOCK_SIZE * row_step; + } +#else + from->copy_to(to._ptr); +#endif +} diff --git a/src/plugins/intel_npu/src/plugin/npuw/util_xarch.hpp b/src/plugins/intel_npu/src/plugin/npuw/util_xarch.hpp index 0f0d9912f3b221..9148ba0106fa54 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/util_xarch.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/util_xarch.hpp @@ -82,6 +82,8 @@ void unpack_u8f16(const ov::SoPtr& from, ov::Tensor to_f16(const ov::Tensor& t); +void copy_row_as_column(const ov::SoPtr& from, const ov::SoPtr& to); + } // namespace XARCH } // namespace util } // namespace npuw diff --git a/src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp b/src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp index fb7faabbd42a76..125c8b3ab52f99 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp @@ -71,9 +71,6 @@ ov::Tensor Bank::get(int64_t uid, const std::string& device) { NPUW_ASSERT(iter_device != device_bank.storage.end() && iter_device->second.tensor && "Tensor should be registered and allocated first!"); - // uid may be coming from a 2nd (3rd, ...) model - // detach the tensor here just in case - const_cast(iter_device->second.lt).detach(); return iter_device->second.tensor; } @@ -222,7 +219,8 @@ void Bank::read_and_add_tensor(std::istream& stream, int64_t uid, const std::str auto iter_device = device_bank.storage.find(uid); if (iter_device != device_bank.storage.end()) { - // Already allocated + // Shouldn't be possible + NPUW_ASSERT(false); return; } @@ -237,6 +235,10 @@ void Bank::read_and_add_tensor(std::istream& stream, int64_t uid, const std::str ov::Tensor allocated_tensor; // FIXME: reading not via a dedicated function + bool is_intialized = false; + read(stream, is_intialized); + NPUW_ASSERT(is_intialized); + std::string type_str; read(stream, type_str); ov::element::Type type(type_str); diff --git a/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp index 75cf5a71f88fd8..516518f6999cd3 100644 --- a/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp +++ b/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp @@ -13,6 +13,7 @@ #include "intel_npu/config/compiler.hpp" #include "intel_npu/config/config.hpp" #include "intel_npu/config/runtime.hpp" +#include "metadata.hpp" #include "openvino/pass/constant_folding.hpp" #include "openvino/pass/manager.hpp" #include "openvino/runtime/properties.hpp" @@ -72,7 +73,10 @@ std::shared_ptr CompiledModel::create_sync_infer_request( void CompiledModel::export_model(std::ostream& stream) const { _logger.debug("CompiledModel::export_model"); - _graph->export_blob(stream); + size_t blobSizeBeforeVersioning = _graph->export_blob(stream); + + auto meta = Metadata(blobSizeBeforeVersioning, ov::get_openvino_version().buildNumber); + meta.write(stream); } std::shared_ptr CompiledModel::get_runtime_model() const { diff --git a/src/plugins/intel_npu/src/plugin/src/metadata.cpp b/src/plugins/intel_npu/src/plugin/src/metadata.cpp new file mode 100644 index 00000000000000..521ef5c01b96a4 --- /dev/null +++ b/src/plugins/intel_npu/src/plugin/src/metadata.cpp @@ -0,0 +1,165 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "metadata.hpp" + +#include +#include +#include + +#include "intel_npu/config/config.hpp" +#include "intel_npu/utils/logger/logger.hpp" +#include "openvino/core/version.hpp" +#include "openvino/runtime/shared_buffer.hpp" + +namespace { + +std::streampos getFileSize(std::istream& stream) { + auto log = intel_npu::Logger::global().clone("getFileSize"); + if (!stream) { + OPENVINO_THROW("Stream is in bad status! Please check the passed stream status!"); + } + + if (dynamic_cast(stream.rdbuf()) != nullptr) { + return stream.rdbuf()->in_avail(); + } + const std::streampos streamStart = stream.tellg(); + stream.seekg(0, std::ios_base::end); + const std::streampos streamEnd = stream.tellg(); + stream.seekg(streamStart, std::ios_base::beg); + + log.debug("Read blob size: streamStart=%zu, streamEnd=%zu", streamStart, streamEnd); + + if (streamEnd < streamStart) { + OPENVINO_THROW("Invalid stream size: streamEnd (", + streamEnd, + ") is not larger than streamStart (", + streamStart, + ")!"); + } + + return streamEnd - streamStart; +} +} // anonymous namespace + +namespace intel_npu { + +OpenvinoVersion::OpenvinoVersion(std::string_view version) + : _version(version), + _size(static_cast(version.size())) {} + +void OpenvinoVersion::read(std::istream& stream) { + stream.read(reinterpret_cast(&_size), sizeof(_size)); + _version.resize(_size); + stream.read(_version.data(), _size); +} + +void OpenvinoVersion::write(std::ostream& stream) { + stream.write(reinterpret_cast(&_size), sizeof(_size)); + stream.write(_version.data(), _size); +} + +Metadata::Metadata(uint64_t blobSize, std::optional ovVersion) + : MetadataBase{METADATA_VERSION_1_0}, + _ovVersion{ovVersion.value_or(ov::get_openvino_version().buildNumber)}, + _blobDataSize{blobSize} {} + +void Metadata::read(std::istream& stream) { + _ovVersion.read(stream); +} + +void Metadata::write(std::ostream& stream) { + stream.write(reinterpret_cast(&_version), sizeof(_version)); + _ovVersion.write(stream); + stream.write(reinterpret_cast(&_blobDataSize), sizeof(_blobDataSize)); + stream.write(MAGIC_BYTES.data(), MAGIC_BYTES.size()); +} + +std::unique_ptr create_metadata(uint32_t version, uint64_t blobSize) { + if (MetadataBase::get_major(version) == CURRENT_METADATA_MAJOR_VERSION && + MetadataBase::get_minor(version) > CURRENT_METADATA_MINOR_VERSION) { + return std::make_unique>(blobSize, std::nullopt); + } + + switch (version) { + case METADATA_VERSION_1_0: + return std::make_unique>(blobSize, std::nullopt); + + default: + OPENVINO_THROW("Invalid metadata version!"); + } +} + +std::string OpenvinoVersion::get_version() const { + return _version; +} + +bool Metadata::is_compatible() { + auto logger = Logger::global().clone("NPUBlobMetadata"); + // checking if we can import the blob + if (_ovVersion.get_version() != ov::get_openvino_version().buildNumber) { + logger.error("Imported blob OpenVINO version: %s, but the current OpenVINO version is: %s", + _ovVersion.get_version().c_str(), + ov::get_openvino_version().buildNumber); + +#ifdef NPU_PLUGIN_DEVELOPER_BUILD + if (auto envVar = std::getenv("NPU_DISABLE_VERSION_CHECK")) { + if (envVarStrToBool("NPU_DISABLE_VERSION_CHECK", envVar)) { + logger.info("Blob compatibility check skipped."); + return true; + } + } +#endif + return false; + } + return true; +} + +std::unique_ptr read_metadata_from(std::istream& stream) { + size_t magicBytesSize = MAGIC_BYTES.size(); + std::string blobMagicBytes; + blobMagicBytes.resize(magicBytesSize); + + std::streampos currentStreamPos = stream.tellg(), streamSize = getFileSize(stream); + stream.seekg(streamSize - std::streampos(magicBytesSize), std::ios::cur); + stream.read(blobMagicBytes.data(), magicBytesSize); + if (MAGIC_BYTES != blobMagicBytes) { + OPENVINO_THROW("Blob is missing NPU metadata!"); + } + + uint64_t blobDataSize; + stream.seekg(-std::streampos(magicBytesSize) - sizeof(blobDataSize), std::ios::cur); + stream.read(reinterpret_cast(&blobDataSize), sizeof(blobDataSize)); + stream.seekg(-stream.tellg() + currentStreamPos + blobDataSize, std::ios::cur); + + uint32_t metaVersion; + stream.read(reinterpret_cast(&metaVersion), sizeof(metaVersion)); + + std::unique_ptr storedMeta; + try { + storedMeta = create_metadata(metaVersion, blobDataSize); + storedMeta->read(stream); + } catch (const std::exception& ex) { + OPENVINO_THROW(ex.what(), + "Imported blob metadata version: ", + MetadataBase::get_major(metaVersion), + ".", + MetadataBase::get_minor(metaVersion), + " but the current version is: ", + CURRENT_METADATA_MAJOR_VERSION, + ".", + CURRENT_METADATA_MINOR_VERSION); + } catch (...) { + OPENVINO_THROW("Unexpected exception while reading blob NPU metadata"); + } + stream.seekg(-stream.tellg() + currentStreamPos, std::ios::cur); + + return storedMeta; +} + +uint64_t Metadata::get_blob_size() const { + return _blobDataSize; +} + +} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 5de2b700fe984e..301a71887054f2 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -21,6 +21,7 @@ #include "intel_npu/config/npuw.hpp" #include "intel_npu/config/runtime.hpp" #include "intel_npu/utils/zero/zero_init.hpp" +#include "metadata.hpp" #include "npuw/compiled_model.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/parameter.hpp" @@ -135,30 +136,6 @@ std::map any_copy(const ov::AnyMap& params) { return result; } -size_t getFileSize(std::istream& stream) { - auto log = Logger::global().clone("getFileSize"); - if (!stream) { - OPENVINO_THROW("Stream is in bad status! Please check the passed stream status!"); - } - - const size_t streamStart = stream.tellg(); - stream.seekg(0, std::ios_base::end); - const size_t streamEnd = stream.tellg(); - stream.seekg(streamStart, std::ios_base::beg); - - log.debug("Read blob size: streamStart=%zu, streamEnd=%zu", streamStart, streamEnd); - - if (streamEnd < streamStart) { - OPENVINO_THROW("Invalid stream size: streamEnd (", - streamEnd, - ") is not larger than streamStart (", - streamStart, - ")!"); - } - - return streamEnd - streamStart; -} - void update_log_level(const std::map& propertiesMap) { auto it = propertiesMap.find(std::string(LOG_LEVEL::key())); if (it != propertiesMap.end()) { @@ -645,10 +622,6 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< ov::AnyMap localProperties = properties; if (localProperties.count(useNpuwKey)) { if (localProperties.at(useNpuwKey).as() == true) { - // CACHE_DIR isn't supported with NPU_USE_NPUW - if (localProperties.count(ov::cache_dir.name()) || !_globalConfig.get().empty()) { - OPENVINO_THROW("Option 'CACHE_DIR' is not supported with NPU_USE_NPUW!"); - } return ov::npuw::ICompiledModel::create(model->clone(), shared_from_this(), localProperties); } else { // NPUW is disabled, remove the key from the properties @@ -773,7 +746,7 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c stream.seekg(stream_start_pos); return ov::npuw::LLMCompiledModel::deserialize(stream, shared_from_this()); } - stream.seekg(stream_start_pos); + stream.seekg(-stream.tellg() + stream_start_pos, std::ios::cur); // Drop NPUW properties if there are any ov::AnyMap npu_plugin_properties; @@ -806,7 +779,12 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c CompilerAdapterFactory compilerAdapterFactory; auto compiler = compilerAdapterFactory.getCompiler(_backends->getIEngineBackend(), localConfig); - auto graphSize = getFileSize(stream); + auto storedMeta = read_metadata_from(stream); + if (!storedMeta->is_compatible()) { + OPENVINO_THROW("Incompatible blob version!"); + } + + auto graphSize = storedMeta->get_blob_size(); std::vector blob(graphSize); stream.read(reinterpret_cast(blob.data()), graphSize); diff --git a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.cpp b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.cpp index 4377a38df53e16..f45e30bb109849 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.cpp +++ b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.cpp @@ -19,6 +19,12 @@ INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTest, ::testing::ValuesIn(configsInferRequestRunTests)), InferRequestRunTests::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTest, + RandomTensorOverZeroTensorRunTests, + ::testing::Combine(::testing::Values(ov::test::utils::DEVICE_NPU), + ::testing::ValuesIn(configsInferRequestRunTests)), + InferRequestRunTests::getTestCaseName); + INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTest, RunSeqTests, ::testing::Combine(::testing::Values(ov::test::utils::DEVICE_NPU), diff --git a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp index 97dc4ed1f2201d..31b55704757b01 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp +++ b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp @@ -344,6 +344,115 @@ TEST_P(InferRequestRunTests, RecreateL0TensorIfNeeded) { } } +using RandomTensorOverZeroTensorRunTests = InferRequestRunTests; + +TEST_P(RandomTensorOverZeroTensorRunTests, SetRandomTensorOverZeroTensor0) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + auto shape = Shape{1, 2, 2, 2}; + auto shape_size = ov::shape_size(shape); + auto model = createModel(element::f32, shape, "N..."); + + compiled_model = core->compile_model(model, target_device, configuration); + ov::InferRequest inference_request; + inference_request = compiled_model.create_infer_request(); + + auto input_zero_tensor = inference_request.get_input_tensor(0); + auto* input_zero_data = input_zero_tensor.data(); + for (size_t i = 0; i < shape_size; ++i) { + input_zero_data[i] = 5.f; + } + + inference_request.infer(); // Adds '1' to each element + + auto output_tensor = inference_request.get_output_tensor(0); + auto* output_data = output_tensor.data(); + for (size_t i = 0; i < shape_size; ++i) { + EXPECT_NEAR(output_data[i], 6.f, 1e-5) << "Expected=6, actual=" << output_data[i] << " for index " << i; + } + + float* buffer = new float[shape_size]; + ov::Tensor tensor{element::f32, shape, buffer}; + auto* input_data = tensor.data(); + for (size_t i = 0; i < shape_size; ++i) { + input_data[i] = 9.f; + } + + inference_request.set_input_tensor(tensor); + inference_request.infer(); // Adds '1' to each element + for (size_t i = 0; i < shape_size; ++i) { + EXPECT_NEAR(output_data[i], 10.f, 1e-5) << "Expected=10, actual=" << output_data[i] << " for index " << i; + } + + for (size_t i = 0; i < shape_size; ++i) { + EXPECT_NEAR(input_zero_data[i], 5.f, 1e-5) << "Expected=5, actual=" << input_zero_data[i] << " for index " << i; + } + + delete[] buffer; +} + +TEST_P(RandomTensorOverZeroTensorRunTests, SetRandomTensorOverZeroTensor1) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + auto shape = Shape{1, 2, 2, 2}; + auto shape_size = ov::shape_size(shape); + auto model = createModel(element::f32, shape, "N..."); + + compiled_model = core->compile_model(model, target_device, configuration); + ov::InferRequest inference_request0, inference_request1; + inference_request0 = compiled_model.create_infer_request(); + inference_request1 = compiled_model.create_infer_request(); + + auto input_zero_tensor = inference_request0.get_input_tensor(0); + auto* input_zero_data = input_zero_tensor.data(); + for (size_t i = 0; i < shape_size; ++i) { + input_zero_data[i] = 5.f; + } + + inference_request0.infer(); // Adds '1' to each element + + auto output_tensor0 = inference_request0.get_output_tensor(0); + auto* output_data0 = output_tensor0.data(); + for (size_t i = 0; i < shape_size; ++i) { + EXPECT_NEAR(output_data0[i], 6.f, 1e-5) << "Expected=6, actual=" << output_data0[i] << " for index " << i; + } + + inference_request1.set_input_tensor(output_tensor0); + inference_request1.infer(); // Adds '1' to each element + + auto output_tensor1 = inference_request1.get_output_tensor(0); + auto* output_data1 = output_tensor1.data(); + for (size_t i = 0; i < shape_size; ++i) { + EXPECT_NEAR(output_data1[i], 7.f, 1e-5) << "Expected=7, actual=" << output_data1[i] << " for index " << i; + } + + float* buffer = new float[shape_size]; + ov::Tensor tensor{element::f32, shape, buffer}; + auto* input_data = tensor.data(); + for (size_t i = 0; i < shape_size; ++i) { + input_data[i] = 9.f; + } + + inference_request1.set_input_tensor(tensor); + inference_request1.infer(); // Adds '1' to each element + + for (size_t i = 0; i < shape_size; ++i) { + EXPECT_NEAR(output_data1[i], 10.f, 1e-5) << "Expected=10, actual=" << output_data1[i] << " for index " << i; + } + + for (size_t i = 0; i < shape_size; ++i) { + EXPECT_NEAR(output_data0[i], 6.f, 1e-5) << "Expected=6, actual=" << output_data0[i] << " for index " << i; + } + + for (size_t i = 0; i < shape_size; ++i) { + EXPECT_NEAR(input_zero_data[i], 5.f, 1e-5) << "Expected=5, actual=" << input_zero_data[i] << " for index " << i; + } + + delete[] buffer; +} + using BatchingRunTests = InferRequestRunTests; TEST_P(BatchingRunTests, CheckBatchingSupportInfer) { diff --git a/src/plugins/intel_npu/tests/unit/CMakeLists.txt b/src/plugins/intel_npu/tests/unit/CMakeLists.txt index f4e8a64ecea92b..1097e183369fe4 100644 --- a/src/plugins/intel_npu/tests/unit/CMakeLists.txt +++ b/src/plugins/intel_npu/tests/unit/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2018-2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # @@ -29,6 +29,8 @@ ov_add_test_target( ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/utils/include ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/plugin/include ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/al/include + OBJECT_FILES + ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/plugin/src/metadata.cpp LINK_LIBRARIES ${MANDATORY_UNIT_TESTS_LIBS} LABELS diff --git a/src/plugins/intel_npu/tests/unit/npu/metadata_version.cpp b/src/plugins/intel_npu/tests/unit/npu/metadata_version.cpp new file mode 100644 index 00000000000000..0c94a1e5334b36 --- /dev/null +++ b/src/plugins/intel_npu/tests/unit/npu/metadata_version.cpp @@ -0,0 +1,201 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "common_test_utils/test_assertions.hpp" +#include "metadata.hpp" +#include "openvino/core/version.hpp" + +using namespace intel_npu; + +using MetadataUnitTests = ::testing::Test; + +struct MetadataTest : Metadata { + MetadataTest(uint64_t blobSize, std::optional ovVersion) + : Metadata(blobSize, ovVersion) {} + + void set_version(uint32_t newVersion) { + _version = newVersion; + } +}; + +TEST_F(MetadataUnitTests, readUnversionedBlob) { + std::stringstream blob("this_is an_unversioned bl0b"); + + std::unique_ptr storedMeta; + ASSERT_ANY_THROW(storedMeta = read_metadata_from(blob)); +} + +TEST_F(MetadataUnitTests, writeAndReadCurrentMetadataFromBlob) { + uint64_t blobSize = 0; + std::stringstream stream; + auto meta = MetadataTest(blobSize, ov::get_openvino_version().buildNumber); + + OV_ASSERT_NO_THROW(meta.write(stream)); + + std::unique_ptr storedMeta; + OV_ASSERT_NO_THROW(storedMeta = read_metadata_from(stream)); + ASSERT_TRUE(storedMeta->is_compatible()); +} + +TEST_F(MetadataUnitTests, writeAndReadInvalidOpenvinoVersion) { + uint64_t blobSize = 0; + std::stringstream stream; + auto meta = MetadataTest(blobSize, "just_some_wrong_ov_version"); + + OV_ASSERT_NO_THROW(meta.write(stream)); + + std::unique_ptr storedMeta; + OV_ASSERT_NO_THROW(storedMeta = read_metadata_from(stream)); + ASSERT_FALSE(storedMeta->is_compatible()); +} + +TEST_F(MetadataUnitTests, writeAndReadInvalidMetadataVersion) { + uint64_t blobSize = 0; + std::stringstream stream; + auto meta = MetadataTest(blobSize, std::nullopt); + + constexpr uint32_t dummyVersion = MetadataBase::make_version(0x00007E57, 0x0000AC3D); + meta.set_version(dummyVersion); + + OV_ASSERT_NO_THROW(meta.write(stream)); + ASSERT_ANY_THROW(auto storedMeta = read_metadata_from(stream)); +} + +TEST_F(MetadataUnitTests, writeAndReadMetadataWithNewerMinorVersion) { + uint64_t blobSize = 0; + std::stringstream stream; + auto meta = MetadataTest(blobSize, "some_ov_version"); + + constexpr uint32_t dummyVersion = + MetadataBase::make_version(CURRENT_METADATA_MAJOR_VERSION, CURRENT_METADATA_MINOR_VERSION + 1); + meta.set_version(dummyVersion); + + OV_ASSERT_NO_THROW(meta.write(stream)); + std::unique_ptr storedMeta; + OV_ASSERT_NO_THROW(storedMeta = read_metadata_from(stream)); + ASSERT_FALSE(storedMeta->is_compatible()); +} + +struct MetadataVersionTestFixture : Metadata, ::testing::TestWithParam { +public: + std::stringstream blob; + + void set_version(uint32_t newVersion) { + _version = newVersion; + } + + MetadataVersionTestFixture() : Metadata(0, std::nullopt) {} + + MetadataVersionTestFixture(uint64_t blobSize, std::optional ovVersion) + : Metadata(blobSize, ovVersion) {} + + void TestBody() override {} + + static std::string getTestCaseName(testing::TestParamInfo info); +}; + +std::string MetadataVersionTestFixture::getTestCaseName( + testing::TestParamInfo info) { + std::ostringstream result; + result << "major version=" << MetadataBase::get_major(info.param) + << ", minor version=" << MetadataBase::get_minor(info.param); + return result.str(); +} + +TEST_P(MetadataVersionTestFixture, writeAndReadInvalidMetadataVersion) { + uint32_t metaVersion = GetParam(); + if (CURRENT_METADATA_MAJOR_VERSION == MetadataBase::get_major(metaVersion) && CURRENT_METADATA_MINOR_VERSION == 0) { + GTEST_SKIP() << "Skipping single test since there is no case of lower minor version than actual."; + } + + MetadataVersionTestFixture dummyMeta = MetadataVersionTestFixture(0, "some_ov_version"); + dummyMeta.set_version(metaVersion); + + OV_ASSERT_NO_THROW(dummyMeta.write(blob)); + EXPECT_ANY_THROW(read_metadata_from(blob)); + ASSERT_FALSE(dummyMeta.is_compatible()); +} + +const std::vector badMetadataVersions = { + MetadataBase::make_version(CURRENT_METADATA_MAJOR_VERSION, CURRENT_METADATA_MINOR_VERSION - 1), + MetadataBase::make_version(CURRENT_METADATA_MAJOR_VERSION + 1, CURRENT_METADATA_MINOR_VERSION), + MetadataBase::make_version(CURRENT_METADATA_MAJOR_VERSION + 1, CURRENT_METADATA_MINOR_VERSION + 1), + MetadataBase::make_version(CURRENT_METADATA_MAJOR_VERSION + 1, CURRENT_METADATA_MINOR_VERSION - 1), + MetadataBase::make_version(CURRENT_METADATA_MAJOR_VERSION - 1, CURRENT_METADATA_MINOR_VERSION), + MetadataBase::make_version(CURRENT_METADATA_MAJOR_VERSION - 1, CURRENT_METADATA_MINOR_VERSION + 1), + MetadataBase::make_version(CURRENT_METADATA_MAJOR_VERSION - 1, CURRENT_METADATA_MINOR_VERSION - 1)}; + +INSTANTIATE_TEST_SUITE_P(MetadataUnitTests, + MetadataVersionTestFixture, + ::testing::ValuesIn(badMetadataVersions), + MetadataVersionTestFixture::getTestCaseName); + +TEST_F(MetadataUnitTests, writeAndReadMetadataWithNewerFieldAtEnd) { + uint64_t blobSize = 0; + std::stringstream stream; + auto meta = MetadataTest(blobSize, "some_ov_version"); + + constexpr uint32_t dummyVersion = + MetadataBase::make_version(CURRENT_METADATA_MAJOR_VERSION, CURRENT_METADATA_MINOR_VERSION + 1); + meta.set_version(dummyVersion); + + OV_ASSERT_NO_THROW(meta.write(stream)); + + // inserting a new field at the end of the blob, between last metadata field and blobDataSize + std::string temp = stream.str(); + size_t offset = MAGIC_BYTES.size() + sizeof(uint64_t); + temp.insert(temp.length() - offset, "new metadata field"); + stream.str(""); + stream << temp; + + std::unique_ptr storedMeta; + OV_ASSERT_NO_THROW(storedMeta = read_metadata_from(stream)); + ASSERT_FALSE(storedMeta->is_compatible()); +} + +TEST_F(MetadataUnitTests, writeAndReadMetadataWithNewerFieldAtMiddle) { + uint64_t blobSize = 0; + std::stringstream stream; + auto meta = MetadataTest(blobSize, "some_ov_version"); + + constexpr uint32_t dummyVersion = + MetadataBase::make_version(CURRENT_METADATA_MAJOR_VERSION + 1, CURRENT_METADATA_MINOR_VERSION); + meta.set_version(dummyVersion); + + OV_ASSERT_NO_THROW(meta.write(stream)); + + // inserting a new field at the middle of the blob, between metadata version and OV version size + std::string temp = stream.str(); + size_t offset = sizeof(CURRENT_METADATA_VERSION); + temp.insert(offset, "new metadata field"); + stream.str(""); + stream << temp; + + std::unique_ptr storedMeta; + EXPECT_ANY_THROW(storedMeta = read_metadata_from(stream)); +} + +TEST_F(MetadataUnitTests, writeAndReadMetadataWithRemovedField) { + uint64_t blobSize = 0; + std::stringstream stream; + auto meta = MetadataTest(blobSize, "some_ov_version"); + + constexpr uint32_t dummyVersion = + MetadataBase::make_version(CURRENT_METADATA_MAJOR_VERSION + 1, CURRENT_METADATA_MINOR_VERSION); + meta.set_version(dummyVersion); + + OV_ASSERT_NO_THROW(meta.write(stream)); + + // removing fields between metadata version and blob data size + std::string temp = stream.str(); + size_t offset = sizeof(CURRENT_METADATA_VERSION), size = offset + MAGIC_BYTES.size() + sizeof(uint64_t); + temp.replace(offset, temp.length() - size, ""); + stream.str(""); + stream << temp; + + std::unique_ptr storedMeta; + EXPECT_ANY_THROW(storedMeta = read_metadata_from(stream)); +} diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/model.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/model.hpp index d2d21da878b3c6..a05723b24b8d34 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/model.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/model.hpp @@ -91,12 +91,12 @@ generate_model(ov::NodeVector& nodes, // cloned_in_node is parameter or constant, it could have only one input ov::replace_output_update_name(cloned_in_node->output(cloned_in_node_out_idx), orig_in_node->output(out_idx)); if (ov::op::util::is_parameter(orig_in_node)) { - auto param = std::dynamic_pointer_cast(orig_in_node); + auto param = ov::as_type_ptr(orig_in_node); model_parameters.push_back(param); node_input_info.insert({ orig_in_node->get_friendly_name(), node_input_info[cloned_in_node_name]}); } else if (ov::op::util::is_constant(orig_in_node)) { - auto op_to_replace = std::dynamic_pointer_cast(orig_in_node); + auto op_to_replace = ov::as_type_ptr(orig_in_node); auto param = convert_const_to_param(op_to_replace); if (param != nullptr) { model_parameters.push_back(param); @@ -104,7 +104,7 @@ generate_model(ov::NodeVector& nodes, node_input_info.insert({ orig_in_node->get_friendly_name(), node_input_info[cloned_in_node_name]}); } else if (ov::op::util::is_sink(cloned_node)) { - model_sinks.push_back(std::dynamic_pointer_cast(cloned_node->shared_from_this())); + model_sinks.push_back(ov::as_type_ptr(cloned_node->shared_from_this())); } filled_input_idx++; // clean up replaced node data @@ -114,10 +114,10 @@ generate_model(ov::NodeVector& nodes, model_output_nodes.erase(orig_in_node_name); } } else if (ov::op::util::is_parameter(cloned_in_node)) { - auto param = std::dynamic_pointer_cast(cloned_in_node); + auto param = ov::as_type_ptr(cloned_in_node); model_parameters.push_back(param); } else if (ov::op::util::is_constant(cloned_in_node)) { - auto op_to_replace = std::dynamic_pointer_cast(cloned_in_node); + auto op_to_replace = ov::as_type_ptr(cloned_in_node); auto param = convert_const_to_param(op_to_replace); if (param != nullptr) { model_parameters.push_back(param); @@ -140,7 +140,7 @@ generate_model(ov::NodeVector& nodes, for (const auto& out_node_name : model_output_nodes) { auto out_node = cloned_node_map[out_node_name.first]; if (ov::op::util::is_output(out_node)) { - model_results.push_back(std::dynamic_pointer_cast(out_node)); + model_results.push_back(ov::as_type_ptr(out_node)); } else { for (const auto& out_port_id : out_node_name.second) { model_results.push_back(std::make_shared(out_node->output(out_port_id))); diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/cache/op_cache.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/cache/op_cache.cpp index f1550853fec90e..e31502f3d98374 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/cache/op_cache.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/cache/op_cache.cpp @@ -22,28 +22,28 @@ void OpCache::update_cache(const std::shared_ptr& model, std::cout << "[ INFO ][ OP CACHE ] Processing model: " << model_path << std::endl; size_t model_op_cnt = model->get_ops().size() - model->get_output_size() - model->inputs().size(); for (const auto& op : model->get_ordered_ops()) { - if (std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op) || + if (ov::as_type_ptr(op) || + ov::as_type_ptr(op) || + ov::as_type_ptr(op) || // ReadValue and Assign have to be handled in pair // Will be handled as part of 48838 - std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op)) { + ov::as_type_ptr(op) || + ov::as_type_ptr(op)) { continue; } if (extract_body) { - if (std::dynamic_pointer_cast(op)) { - auto if_op = std::dynamic_pointer_cast(op); + if (ov::as_type_ptr(op)) { + auto if_op = ov::as_type_ptr(op); for (size_t i = 0; i < if_op->get_internal_subgraphs_size(); i++) { auto if_body = if_op->get_function(i); update_cache(if_body, model_path, extract_body, from_cache); } - } else if (std::dynamic_pointer_cast(op)) { - auto loop = std::dynamic_pointer_cast(op); + } else if (ov::as_type_ptr(op)) { + auto loop = ov::as_type_ptr(op); auto loop_body = loop->get_function(); update_cache(loop_body, model_path, extract_body, from_cache); - } else if (std::dynamic_pointer_cast(op)) { - auto ti = std::dynamic_pointer_cast(op); + } else if (ov::as_type_ptr(op)) { + auto ti = ov::as_type_ptr(op); auto ti_body = ti->get_function(); update_cache(ti_body, model_path, extract_body, from_cache); } diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/single_op/convolutions.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/single_op/convolutions.cpp index c5504b014c49a3..1f95077d285560 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/single_op/convolutions.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/single_op/convolutions.cpp @@ -47,8 +47,8 @@ bool ConvolutionsMatcher::match_inputs(const std::shared_ptr &node, if (!SingleOpMatcher::match_inputs(node, ref)) { return false; } - bool has_groups = std::dynamic_pointer_cast(node) || - std::dynamic_pointer_cast(node); + bool has_groups = ov::as_type_ptr(node) || + ov::as_type_ptr(node); size_t kernel_size_offset = has_groups ? 3 : 2; auto ref_weights_shape = ref->get_input_partial_shape(1).get_shape(); auto cur_weights_shape = node->get_input_partial_shape(1).get_shape(); diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/fused_names.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/fused_names.cpp index 7c35658b361098..402d924a24f188 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/fused_names.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/fused_names.cpp @@ -207,17 +207,17 @@ FusedNamesExtractor::extract(const std::shared_ptr &model) { nodes.push_back(op); } if (is_extract_body) { - if (std::dynamic_pointer_cast(op)) { + if (ov::as_type_ptr(op)) { auto ti = ov::as_type_ptr(op); auto ti_body = ti->get_function(); auto tmp_res = extract(ti_body); matched_patterns.insert(matched_patterns.end(), tmp_res.begin(), tmp_res.end()); - } else if (std::dynamic_pointer_cast(op)) { + } else if (ov::as_type_ptr(op)) { auto loop = ov::as_type_ptr(op); auto loop_body = loop->get_function(); auto tmp_res = extract(loop_body); matched_patterns.insert(matched_patterns.end(), tmp_res.begin(), tmp_res.end()); - } else if (std::dynamic_pointer_cast(op)) { + } else if (ov::as_type_ptr(op)) { auto if_op = ov::as_type_ptr(op); std::vector> bodies; for (size_t i = 0; i < if_op->get_internal_subgraphs_size(); i++) { diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/read_value_assign.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/read_value_assign.cpp index e0ab7f29857efa..485b4dd2d98026 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/read_value_assign.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/read_value_assign.cpp @@ -24,10 +24,10 @@ ReadValueAssignExtractor::extract(const std::shared_ptr &model) { }; std::map pairs; for (auto& node : model->get_ordered_ops()) { - if (const auto& assign = std::dynamic_pointer_cast(node)) { + if (const auto& assign = ov::as_type_ptr(node)) { pairs[assign->get_variable()].cnt_assign++; pairs[assign->get_variable()].variable_id = assign->get_variable()->get_info().variable_id; - } else if (const auto& read_value = std::dynamic_pointer_cast(node)) { + } else if (const auto& read_value = ov::as_type_ptr(node)) { pairs[read_value->get_variable()].cnt_read_val++; pairs[read_value->get_variable()].rv = read_value; pairs[read_value->get_variable()].variable_id = read_value->get_variable()->get_info().variable_id; @@ -49,7 +49,7 @@ ReadValueAssignExtractor::extract(const std::shared_ptr &model) { while (bfs_queue.size() != 0) { auto node = bfs_queue.front(); all_extracted_nodes.push_back(node); - if (const auto& assign = std::dynamic_pointer_cast(node)) { + if (const auto& assign = ov::as_type_ptr(node)) { if (assign->get_variable()->get_info().variable_id == pair.second.variable_id) { break; } diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/repeat_pattern.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/repeat_pattern.cpp index be0e21a1b3d0d4..0c95ab587e8bfe 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/repeat_pattern.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/repeat_pattern.cpp @@ -306,17 +306,17 @@ RepeatPatternExtractor::find_repeat_patterns(const std::shared_ptr &m if (is_extract_body) { for (const auto& matched_node_idx : matched_nodes) { const auto& matched_node = ordered_ops[matched_node_idx]; - if (std::dynamic_pointer_cast(matched_node)) { + if (ov::as_type_ptr(matched_node)) { auto ti = ov::as_type_ptr(matched_node); auto ti_body = ti->get_function(); auto secondary_patterns = find_repeat_patterns(ti_body, is_save_borders_only); update_extractor_cache(extracted_patterns, secondary_patterns); - } else if (std::dynamic_pointer_cast(matched_node)) { + } else if (ov::as_type_ptr(matched_node)) { auto loop = ov::as_type_ptr(matched_node); auto loop_body = loop->get_function(); auto secondary_patterns = find_repeat_patterns(loop_body, is_save_borders_only); update_extractor_cache(extracted_patterns, secondary_patterns); - } else if (std::dynamic_pointer_cast(matched_node)) { + } else if (ov::as_type_ptr(matched_node)) { auto if_op = ov::as_type_ptr(matched_node); std::vector> bodies; for (size_t i = 0; i < if_op->get_internal_subgraphs_size(); i++) { diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/utils/model.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/utils/model.cpp index 6b77e62873d851..c9e1cd11c3c50c 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/utils/model.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/utils/model.cpp @@ -22,7 +22,7 @@ get_input_info_by_model(const std::shared_ptr& model) { ov::conformance::InputInfo::Range ranges(ov::conformance::DEFAULT_MIN_VALUE, ov::conformance::DEFAULT_MAX_VALUE); bool is_const = false; if (ov::shape_size(node->get_output_shape(0)) != 0 && ov::op::util::is_constant(node)) { - std::shared_ptr constant = std::dynamic_pointer_cast(node); + std::shared_ptr constant = ov::as_type_ptr(node); auto const_ranges = get_const_ranges(constant, constant->get_default_output().get_element_type()); ranges = const_ranges; @@ -82,14 +82,14 @@ bool is_same_paired_op_cnt(const std::shared_ptr &fist_model, size_t second_paired_op_cnt = 0; for (auto& node : fist_model->get_ordered_ops()) { - if (std::dynamic_pointer_cast(node) || - std::dynamic_pointer_cast(node)) + if (ov::as_type_ptr(node) || + ov::as_type_ptr(node)) fist_paired_op_cnt++; } for (auto& node : second_model->get_ordered_ops()) { - if (std::dynamic_pointer_cast(node) || - std::dynamic_pointer_cast(node)) + if (ov::as_type_ptr(node) || + ov::as_type_ptr(node)) second_paired_op_cnt++; } @@ -99,11 +99,11 @@ bool is_same_paired_op_cnt(const std::shared_ptr &fist_model, bool build_control_dependency(std::shared_ptr &model) { std::map, std::shared_ptr>> dependency_pairs; for (auto& node : model->get_ordered_ops()) { - if (const auto& read_value = std::dynamic_pointer_cast(node)) { + if (const auto& read_value = ov::as_type_ptr(node)) { dependency_pairs[read_value->get_variable_id()].first = read_value; } - if (const auto& assign = std::dynamic_pointer_cast(node)) { + if (const auto& assign = ov::as_type_ptr(node)) { dependency_pairs[assign->get_variable_id()].second = assign; } } @@ -119,4 +119,4 @@ bool build_control_dependency(std::shared_ptr &model) { } } // namespace util -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/utils/node.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/utils/node.cpp index a59905d4ba8ae9..8d24da58ce1941 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/utils/node.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/utils/node.cpp @@ -86,7 +86,7 @@ get_input_info_by_node(const std::shared_ptr& node) { } ov::conformance::InputInfo in_info(node->get_input_partial_shape(port_id)); std::string input_name = input_node->get_friendly_name(); - if (std::dynamic_pointer_cast(input_node)) { + if (ov::as_type_ptr(input_node)) { if (ov::shape_size(input_node->get_output_shape(0)) == 0) { auto const_node = ov::as_type_ptr(input_node); in_info.is_const = true; diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/op_cache.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/op_cache.cpp index ab987a2140a5e1..7d8c1743b2cb74 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/op_cache.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/op_cache.cpp @@ -116,10 +116,10 @@ TEST_F(OpCacheUnitTest, update_cache_by_model) { // check cache ASSERT_EQ(m_ops_cache.size(), 2); for (const auto& cached_node : this->m_ops_cache) { - ASSERT_TRUE(std::dynamic_pointer_cast(cached_node.first) || - std::dynamic_pointer_cast(cached_node.first)); + ASSERT_TRUE(ov::as_type_ptr(cached_node.first) || + ov::as_type_ptr(cached_node.first)); auto meta = cached_node.second; - if (std::dynamic_pointer_cast(cached_node.first)) { + if (ov::as_type_ptr(cached_node.first)) { // check model_path ASSERT_EQ(meta.get_model_info().size(), 1); ASSERT_EQ(meta.get_model_info().begin()->first, test_model_name); diff --git a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/utils/generate_static_shapes.cpp b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/utils/generate_static_shapes.cpp index 766de0fa314e36..a2ef484083b862 100644 --- a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/utils/generate_static_shapes.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/utils/generate_static_shapes.cpp @@ -50,7 +50,7 @@ namespace { InputShape generate(const std::shared_ptr& node, size_t in_port_id) { - const auto& param = std::dynamic_pointer_cast(node->get_input_node_shared_ptr(in_port_id)); + const auto& param = ov::as_type_ptr(node->get_input_node_shared_ptr(in_port_id)); std::vector staticShapes = { param->get_partial_shape().get_min_shape(), generate_mid_shape(param->get_partial_shape()), param->get_partial_shape().get_max_shape() }; diff --git a/src/tests/functional/plugin/shared/include/behavior/compiled_model/compiled_model_base.hpp b/src/tests/functional/plugin/shared/include/behavior/compiled_model/compiled_model_base.hpp index 74062a8ddb98a9..ffbec42fb1fac0 100644 --- a/src/tests/functional/plugin/shared/include/behavior/compiled_model/compiled_model_base.hpp +++ b/src/tests/functional/plugin/shared/include/behavior/compiled_model/compiled_model_base.hpp @@ -353,7 +353,7 @@ TEST_P(OVCompiledModelBaseTestOptional, CheckExecGraphInfoBeforeExecution) { } int constCnt = 0; - std::shared_ptr getFunction = std::dynamic_pointer_cast(execGraph); + std::shared_ptr getFunction = ov::as_type_ptr(execGraph); ASSERT_NE(getFunction, nullptr); for (const auto& op : getFunction->get_ops()) { @@ -405,7 +405,7 @@ TEST_P(OVCompiledModelBaseTestOptional, CheckExecGraphInfoAfterExecution) { int constCnt = 0; // Store all the layers from the executable graph information represented as CNNNetwork bool hasOpWithValidTime = false; - auto getFunction = std::dynamic_pointer_cast(execGraph); + auto getFunction = ov::as_type_ptr(execGraph); ASSERT_NE(nullptr, getFunction); for (const auto& op : getFunction->get_ops()) { diff --git a/src/tests/functional/shared_test_classes/src/base/utils/calculate_thresholds.cpp b/src/tests/functional/shared_test_classes/src/base/utils/calculate_thresholds.cpp index 92697e84e91114..a21ec8dd91e7a5 100644 --- a/src/tests/functional/shared_test_classes/src/base/utils/calculate_thresholds.cpp +++ b/src/tests/functional/shared_test_classes/src/base/utils/calculate_thresholds.cpp @@ -21,7 +21,7 @@ calculate_thresholds_by_whole_model(const std::shared_ptr& model) { // check all operations except convert to generate correct values for (const auto& op : model->get_ordered_ops()) { - if (std::dynamic_pointer_cast(op)) { + if (ov::as_type_ptr(op)) { continue; } // check the default threshold for operations diff --git a/src/tests/functional/shared_test_classes/src/single_op/comparison.cpp b/src/tests/functional/shared_test_classes/src/single_op/comparison.cpp index 7fea75f338f0be..0d63044cdb495f 100644 --- a/src/tests/functional/shared_test_classes/src/single_op/comparison.cpp +++ b/src/tests/functional/shared_test_classes/src/single_op/comparison.cpp @@ -66,7 +66,7 @@ void ComparisonLayerTest::SetUp() { std::shared_ptr second_input; if (second_input_type == InputLayerType::PARAMETER) { second_input = std::make_shared(model_type, inputDynamicShapes[1]); - inputs.push_back(std::dynamic_pointer_cast(second_input)); + inputs.push_back(ov::as_type_ptr(second_input)); } else { ov::Tensor tensor = ov::test::utils::create_and_fill_tensor(model_type, targetStaticShapes.front()[1]); second_input = std::make_shared(tensor); diff --git a/src/tests/functional/shared_test_classes/src/single_op/reverse_sequence.cpp b/src/tests/functional/shared_test_classes/src/single_op/reverse_sequence.cpp index f322aeffb2c62b..a20b59e5113032 100644 --- a/src/tests/functional/shared_test_classes/src/single_op/reverse_sequence.cpp +++ b/src/tests/functional/shared_test_classes/src/single_op/reverse_sequence.cpp @@ -47,7 +47,7 @@ void ReverseSequenceLayerTest::SetUp() { secondary_input = std::make_shared(tensor); } else if (ov::test::utils::InputLayerType::PARAMETER == secondary_input_type) { secondary_input = std::make_shared(second_data_type, ov::Shape(second_input_shape)); - params.push_back(std::dynamic_pointer_cast(secondary_input)); + params.push_back(ov::as_type_ptr(secondary_input)); } else { throw std::runtime_error("Unsupported input type"); } diff --git a/src/tests/functional/shared_test_classes/src/subgraph/quantized_convolution_backprop_data.cpp b/src/tests/functional/shared_test_classes/src/subgraph/quantized_convolution_backprop_data.cpp index b9b122b35b6a69..3208ab16ec4ac1 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/quantized_convolution_backprop_data.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/quantized_convolution_backprop_data.cpp @@ -70,7 +70,7 @@ void QuantConvBackpropDataLayerTest::SetUp() { auto weightsFq = ov::test::utils::make_fake_quantize(weightsNode, element_type, quantLevels, weightsFqConstShapes); - auto convBackpropData = std::dynamic_pointer_cast( + auto convBackpropData = ov::as_type_ptr( ov::test::utils::make_convolution_backprop_data(dataFq, weightsFq, element_type, stride, padBegin, padEnd, dilation, padType)); ov::ResultVector results{std::make_shared(convBackpropData)}; diff --git a/src/tests/functional/shared_test_classes/src/subgraph/quantized_group_convolution.cpp b/src/tests/functional/shared_test_classes/src/subgraph/quantized_group_convolution.cpp index cb213749d9c997..0e64399cd69494 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/quantized_group_convolution.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/quantized_group_convolution.cpp @@ -84,7 +84,7 @@ void QuantGroupConvLayerTest::SetUp() { weights = weightsNode; } - auto groupConv = std::dynamic_pointer_cast( + auto groupConv = ov::as_type_ptr( ov::test::utils::make_group_convolution(dataFq, weights, element_type, stride, padBegin, padEnd, dilation, padType)); ov::ResultVector results{std::make_shared(groupConv)}; diff --git a/src/tests/functional/shared_test_classes/src/subgraph/quantized_group_convolution_backprop_data.cpp b/src/tests/functional/shared_test_classes/src/subgraph/quantized_group_convolution_backprop_data.cpp index 45492b04c47302..86ffe543a0bd60 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/quantized_group_convolution_backprop_data.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/quantized_group_convolution_backprop_data.cpp @@ -77,7 +77,7 @@ void QuantGroupConvBackpropDataLayerTest::SetUp() { auto weightsFq = ov::test::utils::make_fake_quantize(weightsNode, element_type, quantLevels, weightsFqConstShapes); - auto groupConvBackpropData = std::dynamic_pointer_cast( + auto groupConvBackpropData = ov::as_type_ptr( ov::test::utils::make_group_convolution_backprop_data(dataFq, weightsFq, element_type, stride, padBegin, padEnd, dilation, padType)); ov::ResultVector results{std::make_shared(groupConvBackpropData)}; diff --git a/src/tests/ov_helpers/ov_lpt_models/src/elementwise.cpp b/src/tests/ov_helpers/ov_lpt_models/src/elementwise.cpp index 25dad9a6ba7c29..6d5ac45d5c74c1 100644 --- a/src/tests/ov_helpers/ov_lpt_models/src/elementwise.cpp +++ b/src/tests/ov_helpers/ov_lpt_models/src/elementwise.cpp @@ -109,7 +109,7 @@ std::shared_ptr ElementwiseFunction::getOriginalSubgraphWithConvoluti result = std::make_shared(result); result->set_friendly_name("result"); - ov::ResultVector results{ std::dynamic_pointer_cast(result) }; + ov::ResultVector results{ ov::as_type_ptr(result) }; return std::make_shared(results, ov::ParameterVector{ branch1.first, branch2.first }, "AddTransformation"); } diff --git a/src/tests/ov_helpers/ov_lpt_models/src/multiply.cpp b/src/tests/ov_helpers/ov_lpt_models/src/multiply.cpp index ace041eae73f08..0454a5f2a58889 100644 --- a/src/tests/ov_helpers/ov_lpt_models/src/multiply.cpp +++ b/src/tests/ov_helpers/ov_lpt_models/src/multiply.cpp @@ -69,10 +69,10 @@ std::shared_ptr MultiplyFunction::get(const ov::element::Type model_p ov::ParameterVector inputs; if (ov::is_type(branchNodes1.input)) { - inputs.push_back(std::dynamic_pointer_cast(branchNodes1.input)); + inputs.push_back(ov::as_type_ptr(branchNodes1.input)); } if (ov::is_type(branchNodes2.input)) { - inputs.push_back(std::dynamic_pointer_cast(branchNodes2.input)); + inputs.push_back(ov::as_type_ptr(branchNodes2.input)); } return std::make_shared(results, inputs, "MultiplyTransformation"); diff --git a/src/tests/ov_helpers/ov_lpt_models/src/multiply_partial_function.cpp b/src/tests/ov_helpers/ov_lpt_models/src/multiply_partial_function.cpp index 68c4bb8433c1fd..742f3a90aaf0af 100644 --- a/src/tests/ov_helpers/ov_lpt_models/src/multiply_partial_function.cpp +++ b/src/tests/ov_helpers/ov_lpt_models/src/multiply_partial_function.cpp @@ -69,10 +69,10 @@ std::shared_ptr MultiplyPartialFunction::get(const ov::element::Type ov::ParameterVector inputs; if (ov::is_type(branchNodes1.input)) { - inputs.push_back(std::dynamic_pointer_cast(branchNodes1.input)); + inputs.push_back(ov::as_type_ptr(branchNodes1.input)); } if (ov::is_type(branchNodes2.input)) { - inputs.push_back(std::dynamic_pointer_cast(branchNodes2.input)); + inputs.push_back(ov::as_type_ptr(branchNodes2.input)); } return std::make_shared(results, inputs, "MultiplyTransformation"); diff --git a/src/tests/test_utils/common_test_utils/src/graph_comparator.cpp b/src/tests/test_utils/common_test_utils/src/graph_comparator.cpp index 4b159890ddebae..5d361e1e2276db 100644 --- a/src/tests/test_utils/common_test_utils/src/graph_comparator.cpp +++ b/src/tests/test_utils/common_test_utils/src/graph_comparator.cpp @@ -579,10 +579,10 @@ class CompareSubGraphs { } static int64_t get_num_iterations(ov::op::util::SubGraphOp* sub) { - if (const auto ti = dynamic_cast(sub)) { + if (const auto ti = ov::as_type(sub)) { return ti->get_num_iterations(); } - if (const auto l = dynamic_cast(sub)) { + if (const auto l = ov::as_type(sub)) { return l->get_num_iterations(); } @@ -724,8 +724,8 @@ Comparator::Result Comparator::compare(ov::Node* node1, ov::Node* node2, std::os typeInfoToStr(type_info1) + " != " + typeInfoToStr(type_info2)); } - auto subgraph1 = dynamic_cast(node1); - auto subgraph2 = dynamic_cast(node2); + auto subgraph1 = ov::as_type(node1); + auto subgraph2 = ov::as_type(node2); const bool subgraph_nodes = subgraph1 && subgraph2; diff --git a/src/tests/test_utils/common_test_utils/src/ov_test_utils.cpp b/src/tests/test_utils/common_test_utils/src/ov_test_utils.cpp index 9c52c94a29ca9b..157c0e6628980b 100644 --- a/src/tests/test_utils/common_test_utils/src/ov_test_utils.cpp +++ b/src/tests/test_utils/common_test_utils/src/ov_test_utils.cpp @@ -157,7 +157,7 @@ ov::TensorVector infer_on_template(const std::shared_ptr& model, bool is_tensor_iterator_exist(const std::shared_ptr& model) { const auto& ops = model->get_ops(); for (const auto& node : ops) { - const auto& ti = std::dynamic_pointer_cast(node); + const auto& ti = ov::as_type_ptr(node); if (ti) { return true; } diff --git a/src/tests/test_utils/functional_test_utils/src/summary/op_summary.cpp b/src/tests/test_utils/functional_test_utils/src/summary/op_summary.cpp index a02645170cbc60..66f11b87e4d345 100644 --- a/src/tests/test_utils/functional_test_utils/src/summary/op_summary.cpp +++ b/src/tests/test_utils/functional_test_utils/src/summary/op_summary.cpp @@ -143,9 +143,8 @@ std::map OpSummary::getStatisticFromReport() { void OpSummary::updateOPsStats(const std::shared_ptr& model, const PassRate::Statuses& status, double k) { bool isFunctionalGraph = false; for (const auto& op : model->get_ordered_ops()) { - if (!std::dynamic_pointer_cast(op) && - !std::dynamic_pointer_cast(op) && - !std::dynamic_pointer_cast(op)) { + if (!ov::as_type_ptr(op) && !ov::as_type_ptr(op) && + !ov::as_type_ptr(op)) { // find all features isFunctionalGraph = true; break; @@ -153,24 +152,23 @@ void OpSummary::updateOPsStats(const std::shared_ptr& model, const Pa } for (const auto& op : model->get_ordered_ops()) { - if ((std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op)) && + if ((ov::as_type_ptr(op) || ov::as_type_ptr(op) || + ov::as_type_ptr(op)) && isFunctionalGraph) { continue; } if (extractBody) { - if (std::dynamic_pointer_cast(op)) { + if (ov::as_type_ptr(op)) { updateOPsStats(op->get_type_info(), status, k); auto ti = ov::as_type_ptr(op); auto ti_body = ti->get_function(); updateOPsStats(ti_body, status, k); - } else if (std::dynamic_pointer_cast(op)) { + } else if (ov::as_type_ptr(op)) { updateOPsStats(op->get_type_info(), status, k); auto loop = ov::as_type_ptr(op); auto loop_body = loop->get_function(); updateOPsStats(loop_body, status, k); - } else if (std::dynamic_pointer_cast(op)) { + } else if (ov::as_type_ptr(op)) { updateOPsStats(op->get_type_info(), status, k); auto if_op = ov::as_type_ptr(op); std::vector> bodies; @@ -190,26 +188,24 @@ void OpSummary::updateOPsImplStatus(const std::shared_ptr& model, con } bool isFunctionalGraph = false; for (const auto& op : model->get_ordered_ops()) { - if (!std::dynamic_pointer_cast(op) && - !std::dynamic_pointer_cast(op) && - !std::dynamic_pointer_cast(op)) { + if (!ov::as_type_ptr(op) && !ov::as_type_ptr(op) && + !ov::as_type_ptr(op)) { isFunctionalGraph = true; break; } } for (const auto& op : model->get_ordered_ops()) { - if ((std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op)) && + if ((ov::as_type_ptr(op) || ov::as_type_ptr(op) || + ov::as_type_ptr(op)) && isFunctionalGraph) { continue; - } else if (std::dynamic_pointer_cast(op)) { + } else if (ov::as_type_ptr(op)) { updateOPsImplStatus(op->get_type_info(), implStatus); auto ti = ov::as_type_ptr(op); auto ti_body = ti->get_function(); updateOPsImplStatus(ti_body, implStatus); - } else if (std::dynamic_pointer_cast(op)) { + } else if (ov::as_type_ptr(op)) { updateOPsImplStatus(op->get_type_info(), implStatus); auto loop = ov::as_type_ptr(op); auto loop_body = loop->get_function(); diff --git a/tests/layer_tests/pytorch_tests/test_permute.py b/tests/layer_tests/pytorch_tests/test_permute.py index 4089ae12149cea..d8fb94145bada7 100644 --- a/tests/layer_tests/pytorch_tests/test_permute.py +++ b/tests/layer_tests/pytorch_tests/test_permute.py @@ -26,13 +26,14 @@ def forward(self, x): return aten_permute(order), ref_net, "aten::permute" - @pytest.mark.parametrize("order", [[0, 2, 3, 1], [0, 3, 1, 2]]) + @pytest.mark.parametrize("order", [[0, 2, 3, 1], [0, 3, 1, 2], [0, -1, 1, -2]]) @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export def test_permute(self, order, ie_device, precision, ir_version): self._test(*self.create_model(order), ie_device, precision, ir_version) + class TestPermuteList(PytorchLayerTest): def _prepare_input(self, permute_shape): import numpy as np @@ -55,6 +56,6 @@ def forward(self, x, y): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export - def test_permute(self, order, ie_device, precision, ir_version): + def test_permute_list(self, order, ie_device, precision, ir_version): self._test(*self.create_model(), ie_device, precision, ir_version, kwargs_to_prepare_input={"permute_shape": order}, dynamic_shapes=ie_device != "GPU") diff --git a/tests/layer_tests/tensorflow_tests/test_tf_TensorScatterAdd.py b/tests/layer_tests/tensorflow_tests/test_tf_TensorScatterAdd.py new file mode 100644 index 00000000000000..392469646b2803 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_TensorScatterAdd.py @@ -0,0 +1,89 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import tensorflow as tf +from common.tf_layer_test_class import CommonTFLayerTest + +rng = np.random.default_rng(872173) + + +class TestTensorScatterAdd(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'tensor:0' in inputs_info + assert 'indices:0' in inputs_info + assert 'updates:0' in inputs_info + + tensor_shape = inputs_info['tensor:0'] + updates_shape = inputs_info['updates:0'] + indices_shape = inputs_info['indices:0'] + + inputs_data = {} + if np.issubdtype(self.data_type, np.floating): + inputs_data['tensor:0'] = rng.uniform(-5.0, 5.0, tensor_shape).astype(self.data_type) + inputs_data['updates:0'] = rng.uniform(-5.0, 5.0, updates_shape).astype(self.data_type) + elif np.issubdtype(self.data_type, np.signedinteger): + inputs_data['tensor:0'] = rng.integers(-8, 8, tensor_shape).astype(self.data_type) + inputs_data['updates:0'] = rng.integers(-8, 8, updates_shape).astype(self.data_type) + else: + inputs_data['tensor:0'] = rng.integers(0, 8, tensor_shape).astype(self.data_type) + inputs_data['updates:0'] = rng.integers(0, 8, updates_shape).astype(self.data_type) + + indices_rows, indices_col = indices_shape + + indices_of_tensor_shape = [] + for i in range(0, indices_col): + indices_of_tensor_shape.append(np.arange(tensor_shape[i])) + + mesh = np.meshgrid(*indices_of_tensor_shape) + + all_indicies = np.stack(mesh, axis=indices_col) + all_indicies = all_indicies.reshape(-1, all_indicies.shape[-1]) + + inputs_data['indices:0'] = rng.choice(all_indicies, indices_rows, replace=False).astype(self.indices_type) + + return inputs_data + + def create_tensor_scatter_add_net(self, data_type, indices_type, tensor_shape, updates_shape, indices_shape): + self.data_type = data_type + self.indices_type = indices_type + self.tensor_shape = tensor_shape + self.updates_shape = updates_shape + self.indices_shape = indices_shape + tf.compat.v1.reset_default_graph() + with tf.compat.v1.Session() as sess: + indices = tf.compat.v1.placeholder(indices_type, indices_shape, 'indices') + tensor = tf.compat.v1.placeholder(data_type, tensor_shape, 'tensor') + updates = tf.compat.v1.placeholder(data_type, updates_shape, 'updates') + tf.raw_ops.TensorScatterAdd( + tensor=tensor, + indices=indices, + updates=updates) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + ref_net = None + + return tf_net, ref_net + + @pytest.mark.parametrize('data_type', [np.float32, np.float64, np.int32]) + @pytest.mark.parametrize('indices_type', [np.int32, np.int64]) + @pytest.mark.parametrize('tensor_shape, updates_shape, indices_shape', [ + [[10, 5], [2], [2, 2]], + [[4, 4, 4], [2, 4, 4], [2, 1]], + [[2, 4, 8], [3], [3, 3]], + [[4, 3, 5], [1, 5], [1, 2]], + ]) + @pytest.mark.precommit + @pytest.mark.nightly + def test_tensor_scatter_add(self, data_type, indices_type, + tensor_shape, updates_shape, indices_shape, + ie_device, precision, ir_version, temp_dir, + use_legacy_frontend): + if ie_device == 'GPU': + pytest.skip("160549: ScatterNDUpdate(opset15) is not supported on GPU") + self._test(*self.create_tensor_scatter_add_net(data_type, indices_type, + tensor_shape, updates_shape, indices_shape), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_legacy_frontend=use_legacy_frontend)