Merge branch 'master' into as/npuw_s11n_fixes

openvinotoolkit · Jan 15, 2025 · 74d439d · 74d439d
2 parents 3f7a027 + 2f5af17
commit 74d439d
Show file tree

Hide file tree

Showing 155 changed files with 2,859 additions and 528 deletions.
diff --git a/docs/articles_en/assets/snippets/lpt_intel_cpu_plugin.cpp b/docs/articles_en/assets/snippets/lpt_intel_cpu_plugin.cpp
@@ -18,6 +18,7 @@ namespace device {
 
 class ConvertOpSet1ToDeviceSpecific: public ov::pass::ModelPass {
 public:
+    OPENVINO_MODEL_PASS_RTTI("ConvertOpSet1ToDeviceSpecific");
     bool run_on_model(const std::shared_ptr<ov::Model>& f) override {
         return true;
     }
@@ -96,7 +97,7 @@ if (useLpt) {
 
     // Low precision transformations plugin specific configuration: transformation callbacks definition
     lptManager.get_pass_config()->set_callback<MarkupPrecisions>([](const std::shared_ptr<const ov::Node>& node) -> bool {
-        if (const auto multiply = std::dynamic_pointer_cast<const ov::opset1::Multiply>(node)) {
+        if (const auto multiply = ov::as_type_ptr<const ov::opset1::Multiply>(node)) {
             return !MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolution(multiply);
         }
         return false;

diff --git a/docs/articles_en/assets/snippets/ov_model_snippets.cpp b/docs/articles_en/assets/snippets/ov_model_snippets.cpp
@@ -217,7 +217,7 @@ return true;
 // ! [ov:replace_node]
 bool ov_replace_node(std::shared_ptr<ov::Node> node) {
     // Step 1. Verify that node is of type ov::op::v0::Negative
-    auto neg = std::dynamic_pointer_cast<ov::op::v0::Negative>(node);
+    auto neg = ov::as_type_ptr<ov::op::v0::Negative>(node);
     if (!neg) {
         return false;
     }
@@ -238,7 +238,7 @@ bool ov_replace_node(std::shared_ptr<ov::Node> node) {
 // ! [ov:replace_node]
 
 bool ov_manual_replace_node(std::shared_ptr<ov::Node> node) {
-auto neg = std::dynamic_pointer_cast<ov::op::v0::Negative>(node);
+auto neg = ov::as_type_ptr<ov::op::v0::Negative>(node);
 if (!neg) {
     return false;
 }

diff --git a/docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp b/docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp
@@ -99,7 +99,7 @@ void replace_non_reshapable_const() {
     for (const auto& node : model->get_ops()) {
         // Trying to find the problematic Constant by name.
         if (node->get_friendly_name() == "name_of_non_reshapable_const") {
-            auto const_with_hardcoded_shape = std::dynamic_pointer_cast<ov::opset8::Constant>(node);
+            auto const_with_hardcoded_shape = ov::as_type_ptr<ov::opset8::Constant>(node);
             // Replacing the problematic Constant with a new one. Do this for all the problematic Constants in the network, then
             // you can apply the reshape feature.
             ov::replace_node(const_with_hardcoded_shape, new_const);

diff --git a/docs/articles_en/assets/snippets/template_model_transformation.hpp b/docs/articles_en/assets/snippets/template_model_transformation.hpp
@@ -18,7 +18,7 @@ class MyModelTransformation;
 // template_model_transformation.hpp
 class ov::pass::MyModelTransformation : public ov::pass::ModelPass {
 public:
-    OPENVINO_RTTI("MyModelTransformation", "0");
+    OPENVINO_MODEL_PASS_RTTI("MyModelTransformation");
     bool run_on_model(const std::shared_ptr<ov::Model>& f) override;
 };
 // ! [model_pass:template_transformation_hpp]
diff --git a/docs/articles_en/assets/snippets/template_pattern_transformation.cpp b/docs/articles_en/assets/snippets/template_pattern_transformation.cpp
@@ -23,7 +23,7 @@ ov::pass::DecomposeDivideMatcher::DecomposeDivideMatcher() {
     auto div = std::make_shared<ov::opset3::Divide>(input0, input1);
 
     ov::matcher_pass_callback callback = [](pattern::Matcher& m) {
-        auto div = std::dynamic_pointer_cast<ov::opset3::Divide>(m.get_match_root());
+        auto div = ov::as_type_ptr<ov::opset3::Divide>(m.get_match_root());
         // We can not apply this transformation in case with integer input data type
         if (!div || div->input(0).get_element_type().is_integral()) {
             return false;

diff --git a/docs/articles_en/assets/snippets/template_pattern_transformation.hpp b/docs/articles_en/assets/snippets/template_pattern_transformation.hpp
@@ -23,13 +23,13 @@ class ReluReluFusionMatcher;
  */
 class ov::pass::DecomposeDivideMatcher : public ov::pass::MatcherPass {
 public:
-    OPENVINO_RTTI("DecomposeDivideMatcher", "0");
+    OPENVINO_MATCHER_PASS_RTTI("DecomposeDivideMatcher");
     DecomposeDivideMatcher();
 };
 // ! [graph_rewrite:template_transformation_hpp]
 
 class ov::pass::ReluReluFusionMatcher : public ov::pass::MatcherPass {
 public:
-    OPENVINO_RTTI("ReluReluFusionMatcher", "0");
+    OPENVINO_MATCHER_PASS_RTTI("ReluReluFusionMatcher");
     ReluReluFusionMatcher();
 };
diff --git a/src/bindings/python/src/pyopenvino/core/common.cpp b/src/bindings/python/src/pyopenvino/core/common.cpp
@@ -118,35 +118,48 @@ const TensorIndexMap cast_to_tensor_index_map(const py::dict& inputs) {
 
 namespace string_helpers {
 
+namespace {
+const char* find_last_not_null(const char* str, size_t length) {
+    return std::find_if(std::make_reverse_iterator(str + length),
+                        std::make_reverse_iterator(str),
+                        [](const auto& c) {
+                            return c != '\0';
+                        })
+        .base();
+}
+}  // namespace
+
 py::array bytes_array_from_tensor(ov::Tensor&& t) {
     if (t.get_element_type() != ov::element::string) {
         OPENVINO_THROW("Tensor's type must be a string!");
     }
     auto data = t.data<std::string>();
+
     auto max_element = std::max_element(data, data + t.get_size(), [](const std::string& x, const std::string& y) {
         return x.length() < y.length();
     });
     auto max_stride = max_element->length();
     auto dtype = py::dtype("|S" + std::to_string(max_stride));
+
     // Adjusting strides to follow the numpy convention:
-    py::array array;
-    auto new_strides = t.get_strides();
-    if (new_strides.size() == 0) {
-        array = py::array(dtype, t.get_shape(), {});
-    } else {
-        auto element_stride = new_strides[new_strides.size() - 1];
-        for (size_t i = 0; i < new_strides.size(); ++i) {
-            new_strides[i] = (new_strides[i] / element_stride) * max_stride;
+    const auto py_array_strides = [&t, &max_stride] {
+        auto new_strides = t.get_strides();
+        if (!new_strides.empty()) {
+            const auto& element_stride = new_strides.back();
+            for (auto&& stride : new_strides) {
+                stride = (stride / element_stride) * max_stride;
+            }
         }
-        array = py::array(dtype, t.get_shape(), new_strides);
-    }
+        return new_strides;
+    };
+
     // Create an empty array and populate it with utf-8 encoded strings:
-    auto ptr = array.data();
+    auto array = py::array(dtype, t.get_shape(), py_array_strides());
+    auto ptr = reinterpret_cast<char*>(array.mutable_data());
     for (size_t i = 0; i < t.get_size(); ++i) {
-        auto start = &data[i][0];
-        auto length = data[i].length();
-        auto end = std::copy(start, start + length, (char*)ptr + i * max_stride);
-        std::fill_n(end, max_stride - length, 0);
+        const auto length = data[i].length();
+        ptr = std::copy_n(data[i].begin(), length, ptr);
+        ptr = std::fill_n(ptr, max_stride - length, '\0');
     }
     return array;
 }
@@ -169,14 +182,6 @@ py::array string_array_from_tensor(ov::Tensor&& t) {
     return array;
 }
 
-static const char* find_first_not_null(const char* ptr, size_t itemsize) {
-    auto rbegin = std::make_reverse_iterator(ptr + itemsize);
-    auto first_not_null = std::find_if(rbegin, std::make_reverse_iterator(ptr), [](const auto& c) {
-        return c != '\0';
-    });
-    return first_not_null.base();
-}
-
 void fill_tensor_from_bytes(ov::Tensor& tensor, py::array& array) {
     if (tensor.get_size() != static_cast<size_t>(array.size())) {
         OPENVINO_THROW("Passed array must have the same size (number of elements) as the Tensor!");
@@ -185,7 +190,7 @@ void fill_tensor_from_bytes(ov::Tensor& tensor, py::array& array) {
     auto data = tensor.data<std::string>();
     for (size_t i = 0; i < tensor.get_size(); ++i) {
         const char* ptr = reinterpret_cast<const char*>(buf.ptr) + (i * buf.itemsize);
-        auto first_not_null = find_first_not_null(ptr, buf.itemsize);
+        auto first_not_null = find_last_not_null(ptr, buf.itemsize);
         data[i] = std::string(ptr, first_not_null);
     }
 }
@@ -194,18 +199,20 @@ void fill_tensor_from_strings(ov::Tensor& tensor, py::array& array) {
     if (tensor.get_size() != static_cast<size_t>(array.size())) {
         OPENVINO_THROW("Passed array must have the same size (number of elements) as the Tensor!");
     }
-    py::buffer_info buf = array.request();
+
+    const py::buffer_info buf = array.request();
     auto data = tensor.data<std::string>();
-    for (size_t i = 0; i < tensor.get_size(); ++i) {
-        char* ptr = reinterpret_cast<char*>(buf.ptr) + (i * buf.itemsize);
+
+    for (auto a_first = reinterpret_cast<const uint8_t*>(buf.ptr), a_last = a_first + array.nbytes(); a_first < a_last;
+         a_first += array.itemsize(), ++data) {
         // TODO: check other unicode kinds? 2BYTE and 1BYTE?
-        PyObject* _unicode_obj =
-            PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, reinterpret_cast<void*>(ptr), buf.itemsize / 4);
-        PyObject* _utf8_obj = PyUnicode_AsUTF8String(_unicode_obj);
-        const char* _tmp_str = PyBytes_AsString(_utf8_obj);
-        data[i] = std::string(_tmp_str);
+        auto _unicode_obj = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, a_first, array.itemsize() / 4);
+
+        Py_ssize_t utf8_size = 0;
+        const auto utf8_str = PyUnicode_AsUTF8AndSize(_unicode_obj, &utf8_size);
+
+        *data = std::string(utf8_str, find_last_not_null(utf8_str, utf8_size));
         Py_XDECREF(_unicode_obj);
-        Py_XDECREF(_utf8_obj);
     }
 }
 

diff --git a/src/bindings/python/tests/test_runtime/test_tensor_string.py b/src/bindings/python/tests/test_runtime/test_tensor_string.py
@@ -75,9 +75,15 @@ def test_empty_string_tensor(init_type):
         (["text", "abc", "openvino"]),
         (["text", "больше текста", "jeszcze więcej słów", "효과가 있었어"]),
         ([["text"], ["abc"], ["openvino"]]),
-        ([["jeszcze więcej słów", "효과가 있었어"]]),
-    ],
-)
+        ([["text"]]),
+        (["tex\u0000t\u0000tt"]),
+        ([["abĆ"]]),
+        ([["tex\u0000tttt"], ["abĆ"]]),
+        ([["jeszcze więcej słówe"], [u"효#과가 있었어"]]),
+        ([["jeszcze\u0000 więcej słówekó"]]),
+        ([["효과가 있었어"]]),
+        (["ab\u0000Ć"]),
+    ])
 def test_init_with_list(string_data):
     tensor = ov.Tensor(string_data)
     assert tensor.element_type == ov.Type.string
@@ -90,6 +96,25 @@ def test_init_with_list(string_data):
     check_string_based(tensor, _string_data)
 
 
+def test_init_with_list_rare_real_scenario():
+    input_data = ["tex\u0000\u0000ttt\u0000\u0000", "ab\u0000Ć"]
+    tensor = ov.Tensor(input_data)
+    assert tensor.element_type == ov.Type.string
+    # Convert to numpy to perform all checks. Memory is not shared,
+    np_string_data = np.array(input_data)
+    # Encoded:
+    check_bytes_based(tensor, np_string_data)
+    # Decoded:
+    str_tensor_data = tensor.str_data
+    assert str_tensor_data.shape == np_string_data.shape
+    # case when OV is not aligned with numpy format
+    # strides are different as trailing null characters are not stored in the tensor
+    # is rare to have any use of trailing null character in the string
+    assert str_tensor_data.strides != np_string_data.strides
+    assert np.array_equal(str_tensor_data, np_string_data)
+    assert not (np.shares_memory(str_tensor_data, np_string_data))
+
+
 @pytest.mark.parametrize(
     ("string_data"),
     [