Skip to content

Commit

Permalink
Refactor MVN
Browse files Browse the repository at this point in the history
  • Loading branch information
allnes committed Sep 23, 2024
1 parent f00ac41 commit d40159d
Show file tree
Hide file tree
Showing 14 changed files with 574 additions and 186 deletions.
85 changes: 85 additions & 0 deletions src/plugins/intel_cpu/src/nodes/executors/acl/acl_mvn_new.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "acl_mvn_new.hpp"
#include "acl_utils.hpp"

namespace ov {
namespace intel_cpu {

bool ACLMVNExecutor::supports(const MVNConfig &config) {
if (config.attrs.epsMode_ == MVNEpsMode::OUTSIDE_SQRT) {
DEBUG_LOG("NEMeanStdDevNormalizationLayer does not support OUTSIDE_SQRT mode");
return false;
}
if (!config.attrs.normalizeVariance_) {
DEBUG_LOG("NEMeanStdDevNormalizationLayer supports normalize_variance=true only");
return false;
}
if (!config.attrs.initAcrossChannels_ && config.attrs.srcIsNHWC) {
DEBUG_LOG("initAcrossChannels = false is not supported by ACL for NHWC layout");
return false;
}
return true;
}

void ACLMVNExecutor::updateTensorsShapes(ACLShapes& aclMemoryShapes) {
arm_compute::TensorShape srcDims;
const auto src_num_dim = aclMemoryShapes[ACLArgs::ACL_SRC_0].num_dimensions();
for (size_t i = 0; i < src_num_dim; i++) {
srcDims.set(i, aclMemoryShapes[ACLArgs::ACL_SRC_0][src_num_dim - i - 1]);
}

size_t X, Y;
if (aclMVNAtrrs.initAcrossChannels_) {
if (srcDims.num_dimensions() >= 2u) {
Y = srcDims[0];
X = srcDims[1];
for (size_t i = 2; i < srcDims.num_dimensions(); i++) {
X *= srcDims[i];
}
} else {
Y = 1;
X = srcDims[0];
}
} else {
if (srcDims.num_dimensions() > 2u) {
Y = srcDims[0] * srcDims[1];
X = srcDims[2];
for (size_t i = 3; i < srcDims.num_dimensions(); i++) {
X *= srcDims[i];
}
} else if (srcDims.num_dimensions() == 2u) {
Y = srcDims[0] * srcDims[1];
X = 1;
} else {
Y = srcDims[0];
X = 1;
}
}

aclMemoryShapes[ACLArgs::ACL_SRC_0].set(0, X);
aclMemoryShapes[ACLArgs::ACL_SRC_0].set(1, Y);
aclMemoryShapes[ACLArgs::ACL_DST].set(0, X);
aclMemoryShapes[ACLArgs::ACL_DST].set(1, Y);
}

arm_compute::Status ACLMVNExecutor::validateTensorsInfo(const ACLInfos &aclMemoryInfos) {
return arm_compute::NEMeanStdDevNormalizationLayer::validate(
aclMemoryInfos[ACLArgs::ACL_SRC_0].get(),
aclMemoryInfos[ACLArgs::ACL_DST].get(),
aclMVNAtrrs.epsValue_);
}

ACLFunction ACLMVNExecutor::configureFunction(const ACLTensors & aclMemoryTensors) {
auto neMVN = std::make_unique<arm_compute::NEMeanStdDevNormalizationLayer>();
neMVN->configure(
aclMemoryTensors[ACLArgs::ACL_SRC_0].get(),
aclMemoryTensors[ACLArgs::ACL_DST].get(),
aclMVNAtrrs.epsValue_);
return neMVN;
}

} // namespace intel_cpu
} // namespace ov
34 changes: 34 additions & 0 deletions src/plugins/intel_cpu/src/nodes/executors/acl/acl_mvn_new.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "acl_common_executor.hpp"
#include "nodes/executors/mvn_config.hpp"

namespace ov {
namespace intel_cpu {

class ACLMVNExecutor : public ACLCommonExecutor {
public:
ACLMVNExecutor(const MVNAttrs& attrs,
const PostOps& postOps,
const MemoryArgs& memory,
const ExecutorContext::CPtr context) : aclMVNAtrrs(attrs) {}

static bool supports(const MVNConfig& config);

void updateTensorsShapes(ACLShapes& aclMemoryShapes) override;

arm_compute::Status validateTensorsInfo(const ACLInfos & aclMemoryInfos) override;

ACLFunction configureFunction(const ACLTensors & aclMemoryTensors) override;

private:
MVNAttrs aclMVNAtrrs;
};

using ACLMVNExecutorPtr = std::shared_ptr<ACLMVNExecutor>;
} // namespace intel_cpu
} // namespace ov
120 changes: 120 additions & 0 deletions src/plugins/intel_cpu/src/nodes/executors/common/ref_mvn.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "ref_mvn.hpp"
#include "openvino/core/parallel.hpp"

void ov::intel_cpu::RefMVNExecutor::execute(const ov::intel_cpu::MemoryArgs &memory) {
mvn_ref(reinterpret_cast<uint8_t *>(memory.at(ARG_SRC_0)->getData()),
reinterpret_cast<uint8_t *>(memory.at(ARG_DST)->getData()), refMVNAttrs.shape5D);
}

bool ov::intel_cpu::RefMVNExecutor::update(const ov::intel_cpu::MemoryArgs &memory) {
return true;
}

bool ov::intel_cpu::RefMVNExecutor::supports(const ov::intel_cpu::MVNConfig& config) {
return true;
}

void ov::intel_cpu::RefMVNExecutor::mvn_ref(const uint8_t* src_data, uint8_t* dst_data, const VectorDims& shape5d) {
const float *src_data_ptr = reinterpret_cast<const float *>(src_data);
float *dst_data_ptr = reinterpret_cast<float *>(dst_data);
const size_t N = shape5d[0];
const size_t C = shape5d[1];
const size_t D = shape5d[2];
const size_t H = shape5d[3];
const size_t W = shape5d[4];

size_t C1 = H * W;
size_t C2 = C1 * D;
size_t C3 = C2 * C;

parallel_for(N, [&](int b) {
size_t cb = b * C3;
if (refMVNAttrs.execAcrossChannels_) {
// Parallel sum for each channel for mean
float C3inv = 1.f / static_cast<float>(C3);
float mean_temp = 0.0f;

mean_temp = parallel_sum(C, mean_temp, [&](size_t c)->float {
float mean_internal = 0.0f;
size_t cc = cb + c * C2;
for (size_t sp = 0lu; sp < C2; sp++) {
mean_internal += src_data_ptr[cc + sp];
}
return mean_internal;
});

float mean = mean_temp * C3inv;

if (refMVNAttrs.normalizeVariance_) {
// parallel sum for each channel for variance
float variance_temp = 0.0f;
variance_temp = parallel_sum(C, variance_temp, [&](size_t c)->float {
float variance_internal = 0.0f;
size_t cc = cb + c * C2;
for (size_t sp = 0lu; sp < C2; sp++) {
variance_internal += (src_data_ptr[cc + sp] - mean) * (src_data_ptr[cc + sp] - mean);
}
return variance_internal;
});

float variance = 1.f;
if (refMVNAttrs.epsMode_ == INSIDE_SQRT)
variance = 1.f / sqrtf(variance_temp * C3inv + refMVNAttrs.epsValue_);
else if (refMVNAttrs.epsMode_ == OUTSIDE_SQRT)
variance = 1.f / (sqrtf(variance_temp * C3inv) + refMVNAttrs.epsValue_);

parallel_for(C, [&](int c) {
size_t cc = cb + c * C2;
for (size_t sp = 0lu; sp < C2; sp++) {
dst_data_ptr[cc + sp] = (src_data_ptr[cc + sp] - mean) * variance;
}
});
} else {
parallel_for(C, [&](int c) {
size_t cc = cb + c * C2;
for (size_t sp = 0lu; sp < C2; sp++) {
dst_data_ptr[cc + sp] = src_data_ptr[cc + sp] - mean;
}
});
}
} else { // per channel
float C2inv = 1.f / static_cast<float>(C2);
parallel_for(C, [&](size_t c) {
// mean for this channel
float mean = 0.f;
size_t cc = cb + c * C2;
for (size_t sp = 0lu; sp < C2; sp++) {
mean += src_data_ptr[cc + sp];
}
mean *= C2inv;

if (refMVNAttrs.normalizeVariance_) {
// variance for this channel
float variance = 0.f;
for (size_t sp = 0lu; sp < C2; sp++) {
variance += (src_data_ptr[cc + sp] - mean) * (src_data_ptr[cc + sp] - mean);
}

if (refMVNAttrs.epsMode_ == INSIDE_SQRT)
variance = 1.f / sqrtf(variance * C2inv + refMVNAttrs.epsValue_);
else if (refMVNAttrs.epsMode_ == OUTSIDE_SQRT)
variance = 1.f / (sqrtf(variance * C2inv) + refMVNAttrs.epsValue_);

// mvn for this channel
for (size_t sp = 0lu; sp < C2; sp++) {
dst_data_ptr[cc + sp] = (src_data_ptr[cc + sp] - mean) * variance;
}
} else {
// mvn for this channel
for (size_t sp = 0lu; sp < C2; sp++) {
dst_data_ptr[cc + sp] = src_data_ptr[cc + sp] - mean;
}
}
});
}
});
}
37 changes: 37 additions & 0 deletions src/plugins/intel_cpu/src/nodes/executors/common/ref_mvn.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once

#include <memory>
#include "cpu_memory.h"
#include "nodes/executors/mvn_config.hpp"

namespace ov {
namespace intel_cpu {

class RefMVNExecutor : public Executor {
public:
RefMVNExecutor(const MVNAttrs& attrs,
const PostOps& postOps,
const MemoryArgs& memory,
const ExecutorContext::CPtr context) : refMVNAttrs(attrs) {}

void execute(const MemoryArgs& memory) override;

impl_desc_type implType() const override {
return impl_desc_type::ref;
}

// offloads execution data preparation from the exec call
bool update(const MemoryArgs& memory) override;

static bool supports(const MVNConfig& config);

private:
const MVNAttrs& refMVNAttrs;
void mvn_ref(const uint8_t *in_ptr_, uint8_t *out_ptr_, const VectorDims& shape5d);
};

} // namespace intel_cpu
} // namespace ov
3 changes: 2 additions & 1 deletion src/plugins/intel_cpu/src/nodes/executors/executor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ enum class ExecutorType {
enum class OperationType {
FullyConnected,
MatMul,
Convolution
Convolution,
MVN
};

std::string ExecutorTypeToString(const ExecutorType type);
Expand Down
5 changes: 5 additions & 0 deletions src/plugins/intel_cpu/src/nodes/executors/implementations.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "nodes/executors/executor_implementation.hpp"
#include "nodes/executors/fullyconnected_config.hpp"
#include "nodes/executors/mvn_config.hpp"

namespace ov {
namespace intel_cpu {
Expand All @@ -26,6 +27,10 @@ const std::vector<ExecutorImplementation<Attrs>>& getImplementations() {
template <>
const std::vector<ExecutorImplementation<FCAttrs>>& getImplementations();

// MVN
template <>
const std::vector<ExecutorImplementation<MVNAttrs>>& getImplementations();

// ...

} // namespace intel_cpu
Expand Down
24 changes: 1 addition & 23 deletions src/plugins/intel_cpu/src/nodes/executors/mvn.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,33 +7,11 @@
#include "cpu_memory.h"
#include "onednn/iml_type_mapper.h"
#include "executor.hpp"
#include "mvn_config.hpp"

namespace ov {
namespace intel_cpu {

enum MVNLayoutType {
mvn_planar,
mvn_block,
mvn_by_channel
};

// Defines way to add epsilon: inside sqrt or outside.
enum MVNEpsMode {
INSIDE_SQRT,
OUTSIDE_SQRT
};

struct MVNAttrs {
MVNLayoutType layout = mvn_planar;
bool initAcrossChannels_ = false;
bool execAcrossChannels_ = false;
bool normalizeVariance_ = false;
float epsValue_ = 0.0f;
MVNEpsMode epsMode_ = INSIDE_SQRT;
ov::element::Type src_prc;
ov::element::Type dst_prc;
};

class MVNExecutor {
public:
MVNExecutor(const ExecutorContext::CPtr context);
Expand Down
42 changes: 42 additions & 0 deletions src/plugins/intel_cpu/src/nodes/executors/mvn_config.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "openvino/core/type/element_type.hpp"
#include "cpu_memory.h"
#include "executor_config.hpp"

namespace ov {
namespace intel_cpu {

enum MVNLayoutType {
mvn_planar,
mvn_block,
mvn_by_channel
};

// Defines way to add epsilon: inside sqrt or outside.
enum MVNEpsMode {
INSIDE_SQRT,
OUTSIDE_SQRT
};

struct MVNAttrs {
MVNLayoutType layout = mvn_planar;
bool initAcrossChannels_ = false;
bool execAcrossChannels_ = false;
bool normalizeVariance_ = false;
float epsValue_ = 0.0f;
MVNEpsMode epsMode_ = INSIDE_SQRT;
ov::element::Type src_prc;
ov::element::Type dst_prc;
VectorDims shape5D = {0, 0, 0, 0, 0};
bool srcIsNHWC = false;
};

using MVNConfig = executor::Config<MVNAttrs>;

} // namespace intel_cpu
} // namespace ov
Loading

0 comments on commit d40159d

Please sign in to comment.