Refactor MVN

openvinotoolkit · Sep 23, 2024 · d40159d · d40159d
1 parent f00ac41
commit d40159d
Show file tree

Hide file tree

Showing 14 changed files with 574 additions and 186 deletions.
diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_mvn_new.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_mvn_new.cpp
@@ -0,0 +1,85 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "acl_mvn_new.hpp"
+#include "acl_utils.hpp"
+
+namespace ov {
+namespace intel_cpu {
+
+bool ACLMVNExecutor::supports(const MVNConfig &config) {
+    if (config.attrs.epsMode_ == MVNEpsMode::OUTSIDE_SQRT) {
+        DEBUG_LOG("NEMeanStdDevNormalizationLayer does not support OUTSIDE_SQRT mode");
+        return false;
+    }
+    if (!config.attrs.normalizeVariance_) {
+        DEBUG_LOG("NEMeanStdDevNormalizationLayer supports normalize_variance=true only");
+        return false;
+    }
+    if (!config.attrs.initAcrossChannels_ && config.attrs.srcIsNHWC) {
+        DEBUG_LOG("initAcrossChannels = false is not supported by ACL for NHWC layout");
+        return false;
+    }
+    return true;
+}
+
+void ACLMVNExecutor::updateTensorsShapes(ACLShapes& aclMemoryShapes) {
+    arm_compute::TensorShape srcDims;
+    const auto src_num_dim = aclMemoryShapes[ACLArgs::ACL_SRC_0].num_dimensions();
+    for (size_t i = 0; i < src_num_dim; i++) {
+        srcDims.set(i, aclMemoryShapes[ACLArgs::ACL_SRC_0][src_num_dim - i - 1]);
+    }
+
+    size_t X, Y;
+    if (aclMVNAtrrs.initAcrossChannels_) {
+        if (srcDims.num_dimensions() >= 2u) {
+            Y = srcDims[0];
+            X = srcDims[1];
+            for (size_t i = 2; i < srcDims.num_dimensions(); i++) {
+                X *= srcDims[i];
+            }
+        } else {
+            Y = 1;
+            X = srcDims[0];
+        }
+    } else {
+        if (srcDims.num_dimensions() > 2u) {
+            Y = srcDims[0] * srcDims[1];
+            X = srcDims[2];
+            for (size_t i = 3; i < srcDims.num_dimensions(); i++) {
+                X *= srcDims[i];
+            }
+        } else if (srcDims.num_dimensions() == 2u) {
+            Y = srcDims[0] * srcDims[1];
+            X = 1;
+        } else {
+            Y = srcDims[0];
+            X = 1;
+        }
+    }
+
+    aclMemoryShapes[ACLArgs::ACL_SRC_0].set(0, X);
+    aclMemoryShapes[ACLArgs::ACL_SRC_0].set(1, Y);
+    aclMemoryShapes[ACLArgs::ACL_DST].set(0, X);
+    aclMemoryShapes[ACLArgs::ACL_DST].set(1, Y);
+}
+
+arm_compute::Status ACLMVNExecutor::validateTensorsInfo(const ACLInfos &aclMemoryInfos) {
+    return arm_compute::NEMeanStdDevNormalizationLayer::validate(
+            aclMemoryInfos[ACLArgs::ACL_SRC_0].get(),
+            aclMemoryInfos[ACLArgs::ACL_DST].get(),
+            aclMVNAtrrs.epsValue_);
+}
+
+ACLFunction ACLMVNExecutor::configureFunction(const ACLTensors & aclMemoryTensors) {
+    auto neMVN = std::make_unique<arm_compute::NEMeanStdDevNormalizationLayer>();
+    neMVN->configure(
+            aclMemoryTensors[ACLArgs::ACL_SRC_0].get(),
+            aclMemoryTensors[ACLArgs::ACL_DST].get(),
+            aclMVNAtrrs.epsValue_);
+    return neMVN;
+}
+
+}   // namespace intel_cpu
+}   // namespace ov
diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_mvn_new.hpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_mvn_new.hpp
@@ -0,0 +1,34 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "acl_common_executor.hpp"
+#include "nodes/executors/mvn_config.hpp"
+
+namespace ov {
+namespace intel_cpu {
+
+class ACLMVNExecutor : public ACLCommonExecutor {
+public:
+    ACLMVNExecutor(const MVNAttrs& attrs,
+                   const PostOps& postOps,
+                   const MemoryArgs& memory,
+                   const ExecutorContext::CPtr context) : aclMVNAtrrs(attrs) {}
+
+    static bool supports(const MVNConfig& config);
+
+    void updateTensorsShapes(ACLShapes& aclMemoryShapes) override;
+
+    arm_compute::Status validateTensorsInfo(const ACLInfos & aclMemoryInfos) override;
+
+    ACLFunction configureFunction(const ACLTensors & aclMemoryTensors) override;
+
+private:
+    MVNAttrs aclMVNAtrrs;
+};
+
+using ACLMVNExecutorPtr = std::shared_ptr<ACLMVNExecutor>;
+}   // namespace intel_cpu
+}   // namespace ov
diff --git a/src/plugins/intel_cpu/src/nodes/executors/common/ref_mvn.cpp b/src/plugins/intel_cpu/src/nodes/executors/common/ref_mvn.cpp
@@ -0,0 +1,120 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ref_mvn.hpp"
+#include "openvino/core/parallel.hpp"
+
+void ov::intel_cpu::RefMVNExecutor::execute(const ov::intel_cpu::MemoryArgs &memory) {
+    mvn_ref(reinterpret_cast<uint8_t *>(memory.at(ARG_SRC_0)->getData()),
+            reinterpret_cast<uint8_t *>(memory.at(ARG_DST)->getData()), refMVNAttrs.shape5D);
+}
+
+bool ov::intel_cpu::RefMVNExecutor::update(const ov::intel_cpu::MemoryArgs &memory) {
+    return true;
+}
+
+bool ov::intel_cpu::RefMVNExecutor::supports(const ov::intel_cpu::MVNConfig& config) {
+    return true;
+}
+
+void ov::intel_cpu::RefMVNExecutor::mvn_ref(const uint8_t* src_data, uint8_t* dst_data, const VectorDims& shape5d) {
+    const float *src_data_ptr = reinterpret_cast<const float *>(src_data);
+    float *dst_data_ptr = reinterpret_cast<float *>(dst_data);
+    const size_t N = shape5d[0];
+    const size_t C = shape5d[1];
+    const size_t D = shape5d[2];
+    const size_t H = shape5d[3];
+    const size_t W = shape5d[4];
+
+    size_t C1 = H * W;
+    size_t C2 = C1 * D;
+    size_t C3 = C2 * C;
+
+    parallel_for(N, [&](int b) {
+        size_t cb = b * C3;
+        if (refMVNAttrs.execAcrossChannels_) {
+            // Parallel sum for each channel for mean
+            float C3inv = 1.f / static_cast<float>(C3);
+            float mean_temp = 0.0f;
+
+            mean_temp = parallel_sum(C, mean_temp, [&](size_t c)->float {
+                float mean_internal = 0.0f;
+                size_t cc = cb + c * C2;
+                for (size_t sp = 0lu; sp < C2; sp++) {
+                    mean_internal += src_data_ptr[cc + sp];
+                }
+                return mean_internal;
+            });
+
+            float mean = mean_temp * C3inv;
+
+            if (refMVNAttrs.normalizeVariance_) {
+                // parallel sum for each channel for variance
+                float variance_temp = 0.0f;
+                variance_temp = parallel_sum(C, variance_temp, [&](size_t c)->float {
+                    float variance_internal = 0.0f;
+                    size_t cc = cb + c * C2;
+                    for (size_t sp = 0lu; sp < C2; sp++) {
+                        variance_internal += (src_data_ptr[cc + sp] - mean) * (src_data_ptr[cc + sp] - mean);
+                    }
+                    return variance_internal;
+                });
+
+                float variance = 1.f;
+                if (refMVNAttrs.epsMode_ == INSIDE_SQRT)
+                    variance = 1.f / sqrtf(variance_temp * C3inv + refMVNAttrs.epsValue_);
+                else if (refMVNAttrs.epsMode_ == OUTSIDE_SQRT)
+                    variance = 1.f / (sqrtf(variance_temp * C3inv) + refMVNAttrs.epsValue_);
+
+                parallel_for(C, [&](int c) {
+                    size_t cc = cb + c * C2;
+                    for (size_t sp = 0lu; sp < C2; sp++) {
+                        dst_data_ptr[cc + sp] = (src_data_ptr[cc + sp] - mean) * variance;
+                    }
+                });
+            } else {
+                parallel_for(C, [&](int c) {
+                    size_t cc = cb + c * C2;
+                    for (size_t sp = 0lu; sp < C2; sp++) {
+                        dst_data_ptr[cc + sp] = src_data_ptr[cc + sp] - mean;
+                    }
+                });
+            }
+        } else {  // per channel
+            float C2inv = 1.f / static_cast<float>(C2);
+            parallel_for(C, [&](size_t c) {
+                // mean for this channel
+                float mean = 0.f;
+                size_t cc = cb + c * C2;
+                for (size_t sp = 0lu; sp < C2; sp++) {
+                    mean += src_data_ptr[cc + sp];
+                }
+                mean *= C2inv;
+
+                if (refMVNAttrs.normalizeVariance_) {
+                    // variance for this channel
+                    float variance = 0.f;
+                    for (size_t sp = 0lu; sp < C2; sp++) {
+                        variance += (src_data_ptr[cc + sp] - mean) * (src_data_ptr[cc + sp] - mean);
+                    }
+
+                    if (refMVNAttrs.epsMode_ == INSIDE_SQRT)
+                        variance = 1.f / sqrtf(variance * C2inv + refMVNAttrs.epsValue_);
+                    else if (refMVNAttrs.epsMode_ == OUTSIDE_SQRT)
+                        variance = 1.f / (sqrtf(variance * C2inv) + refMVNAttrs.epsValue_);
+
+                    // mvn for this channel
+                    for (size_t sp = 0lu; sp < C2; sp++) {
+                        dst_data_ptr[cc + sp] = (src_data_ptr[cc + sp] - mean) * variance;
+                    }
+                } else {
+                    // mvn for this channel
+                    for (size_t sp = 0lu; sp < C2; sp++) {
+                        dst_data_ptr[cc + sp] = src_data_ptr[cc + sp] - mean;
+                    }
+                }
+            });
+        }
+    });
+}
diff --git a/src/plugins/intel_cpu/src/nodes/executors/common/ref_mvn.hpp b/src/plugins/intel_cpu/src/nodes/executors/common/ref_mvn.hpp
@@ -0,0 +1,37 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#pragma once
+
+#include <memory>
+#include "cpu_memory.h"
+#include "nodes/executors/mvn_config.hpp"
+
+namespace ov {
+namespace intel_cpu {
+
+class RefMVNExecutor : public Executor {
+public:
+    RefMVNExecutor(const MVNAttrs& attrs,
+                   const PostOps& postOps,
+                   const MemoryArgs& memory,
+                   const ExecutorContext::CPtr context) : refMVNAttrs(attrs) {}
+
+    void execute(const MemoryArgs& memory) override;
+
+    impl_desc_type implType() const override {
+        return impl_desc_type::ref;
+    }
+
+    // offloads execution data preparation from the exec call
+    bool update(const MemoryArgs& memory) override;
+
+    static bool supports(const MVNConfig& config);
+
+private:
+    const MVNAttrs& refMVNAttrs;
+    void mvn_ref(const uint8_t *in_ptr_, uint8_t *out_ptr_, const VectorDims& shape5d);
+};
+
+}  // namespace intel_cpu
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/src/nodes/executors/executor.hpp b/src/plugins/intel_cpu/src/nodes/executors/executor.hpp
@@ -92,7 +92,8 @@ enum class ExecutorType {
 enum class OperationType {
     FullyConnected,
     MatMul,
-    Convolution
+    Convolution,
+    MVN
 };
 
 std::string ExecutorTypeToString(const ExecutorType type);

diff --git a/src/plugins/intel_cpu/src/nodes/executors/implementations.hpp b/src/plugins/intel_cpu/src/nodes/executors/implementations.hpp
@@ -8,6 +8,7 @@
 
 #include "nodes/executors/executor_implementation.hpp"
 #include "nodes/executors/fullyconnected_config.hpp"
+#include "nodes/executors/mvn_config.hpp"
 
 namespace ov {
 namespace intel_cpu {
@@ -26,6 +27,10 @@ const std::vector<ExecutorImplementation<Attrs>>& getImplementations() {
 template <>
 const std::vector<ExecutorImplementation<FCAttrs>>& getImplementations();
 
+// MVN
+template <>
+const std::vector<ExecutorImplementation<MVNAttrs>>& getImplementations();
+
 // ...
 
 }  // namespace intel_cpu

diff --git a/src/plugins/intel_cpu/src/nodes/executors/mvn.hpp b/src/plugins/intel_cpu/src/nodes/executors/mvn.hpp
@@ -7,33 +7,11 @@
 #include "cpu_memory.h"
 #include "onednn/iml_type_mapper.h"
 #include "executor.hpp"
+#include "mvn_config.hpp"
 
 namespace ov {
 namespace intel_cpu {
 
-enum MVNLayoutType {
-    mvn_planar,
-    mvn_block,
-    mvn_by_channel
-};
-
-// Defines way to add epsilon: inside sqrt or outside.
-enum MVNEpsMode {
-    INSIDE_SQRT,
-    OUTSIDE_SQRT
-};
-
-struct MVNAttrs {
-    MVNLayoutType layout = mvn_planar;
-    bool initAcrossChannels_ = false;
-    bool execAcrossChannels_ = false;
-    bool normalizeVariance_  = false;
-    float epsValue_ = 0.0f;
-    MVNEpsMode epsMode_ = INSIDE_SQRT;
-    ov::element::Type src_prc;
-    ov::element::Type dst_prc;
-};
-
 class MVNExecutor {
 public:
     MVNExecutor(const ExecutorContext::CPtr context);

diff --git a/src/plugins/intel_cpu/src/nodes/executors/mvn_config.hpp b/src/plugins/intel_cpu/src/nodes/executors/mvn_config.hpp
@@ -0,0 +1,42 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/core/type/element_type.hpp"
+#include "cpu_memory.h"
+#include "executor_config.hpp"
+
+namespace ov {
+namespace intel_cpu {
+
+enum MVNLayoutType {
+    mvn_planar,
+    mvn_block,
+    mvn_by_channel
+};
+
+// Defines way to add epsilon: inside sqrt or outside.
+enum MVNEpsMode {
+    INSIDE_SQRT,
+    OUTSIDE_SQRT
+};
+
+struct MVNAttrs {
+    MVNLayoutType layout = mvn_planar;
+    bool initAcrossChannels_ = false;
+    bool execAcrossChannels_ = false;
+    bool normalizeVariance_ = false;
+    float epsValue_ = 0.0f;
+    MVNEpsMode epsMode_ = INSIDE_SQRT;
+    ov::element::Type src_prc;
+    ov::element::Type dst_prc;
+    VectorDims shape5D = {0, 0, 0, 0, 0};
+    bool srcIsNHWC = false;
+};
+
+using MVNConfig = executor::Config<MVNAttrs>;
+
+}   // namespace intel_cpu
+}   // namespace ov