Skip to content

Commit

Permalink
Refactor MVN
Browse files Browse the repository at this point in the history
  • Loading branch information
allnes committed Sep 23, 2024
1 parent f00ac41 commit 63008cf
Show file tree
Hide file tree
Showing 16 changed files with 517 additions and 399 deletions.
122 changes: 46 additions & 76 deletions src/plugins/intel_cpu/src/nodes/executors/acl/acl_mvn.cpp
Original file line number Diff line number Diff line change
@@ -1,43 +1,56 @@
// Copyright (C) 2023 Intel Corporation
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "acl_mvn.hpp"
#include "acl_utils.hpp"

namespace ov {
namespace intel_cpu {

using namespace arm_compute;

AclMVNExecutor::AclMVNExecutor(const ExecutorContext::CPtr context) : MVNExecutor(context) {}
bool ACLMVNExecutor::supports(const MVNConfig &config) {
if (config.attrs.epsMode_ == MVNEpsMode::OUTSIDE_SQRT) {
DEBUG_LOG("NEMeanStdDevNormalizationLayer does not support OUTSIDE_SQRT mode");
return false;
}
if (!config.attrs.normalizeVariance_) {
DEBUG_LOG("NEMeanStdDevNormalizationLayer supports normalize_variance=true only");
return false;
}
if (!config.attrs.initAcrossChannels_ && config.attrs.srcIsNHWC) {
DEBUG_LOG("initAcrossChannels = false is not supported by ACL for NHWC layout");
return false;
}
return true;
}

bool AclMVNExecutor::init(const MVNAttrs& mvnAttrs,
const std::vector<MemoryDescPtr>& srcDescs,
const std::vector<MemoryDescPtr>& dstDescs,
const dnnl::primitive_attr &attr) {
auto srcDims = srcDescs[0]->getShape().getStaticDims();
auto dstDims = dstDescs[0]->getShape().getStaticDims();
void ACLMVNExecutor::updateTensorsShapes(ACLShapes& aclMemoryShapes) {
arm_compute::TensorShape srcDims;
const auto src_num_dim = aclMemoryShapes[ACLArgs::ACL_SRC_0].num_dimensions();
for (size_t i = 0; i < src_num_dim; i++) {
srcDims.set(i, aclMemoryShapes[ACLArgs::ACL_SRC_0][src_num_dim - i - 1]);
}

size_t X, Y;
if (mvnAttrs.initAcrossChannels_) {
if (srcDims.size() >= 2u) {
if (aclMVNAtrrs.initAcrossChannels_) {
if (srcDims.num_dimensions() >= 2u) {
Y = srcDims[0];
X = srcDims[1];
for (size_t i = 2; i < srcDims.size(); i++) {
for (size_t i = 2; i < srcDims.num_dimensions(); i++) {
X *= srcDims[i];
}
} else {
Y = 1;
X = srcDims[0];
}
} else {
if (srcDims.size() > 2u) {
if (srcDims.num_dimensions() > 2u) {
Y = srcDims[0] * srcDims[1];
X = srcDims[2];
for (size_t i = 3; i < srcDims.size(); i++) {
for (size_t i = 3; i < srcDims.num_dimensions(); i++) {
X *= srcDims[i];
}
} else if (srcDims.size() == 2u) {
} else if (srcDims.num_dimensions() == 2u) {
Y = srcDims[0] * srcDims[1];
X = 1;
} else {
Expand All @@ -46,70 +59,27 @@ bool AclMVNExecutor::init(const MVNAttrs& mvnAttrs,
}
}

TensorInfo srcTensorInfo = TensorInfo(TensorShape(X, Y), 1, precisionToAclDataType(srcDescs[0]->getPrecision()), getAclDataLayoutByMemoryDesc(srcDescs[0]));
TensorInfo dstTensorInfo = TensorInfo(TensorShape(X, Y), 1, precisionToAclDataType(dstDescs[0]->getPrecision()), getAclDataLayoutByMemoryDesc(dstDescs[0]));


if (!arm_compute::NEMeanStdDevNormalizationLayer::validate(&srcTensorInfo, &dstTensorInfo, mvnAttrs.epsValue_))
return false;

srcTensor.allocator()->init(srcTensorInfo);
dstTensor.allocator()->init(dstTensorInfo);

mvn = std::make_unique<arm_compute::NEMeanStdDevNormalizationLayer>();
configureThreadSafe([&] { mvn->configure(&srcTensor, &dstTensor, mvnAttrs.epsValue_); });

return true;
aclMemoryShapes[ACLArgs::ACL_SRC_0].set(0, X);
aclMemoryShapes[ACLArgs::ACL_SRC_0].set(1, Y);
aclMemoryShapes[ACLArgs::ACL_DST].set(0, X);
aclMemoryShapes[ACLArgs::ACL_DST].set(1, Y);
}

void AclMVNExecutor::exec(const std::vector<MemoryCPtr>& src, const std::vector<MemoryPtr>& dst, const void *post_ops_data_) {
srcTensor.allocator()->import_memory(src[0]->getData());
dstTensor.allocator()->import_memory(dst[0]->getData());

mvn->run();

srcTensor.allocator()->free();
dstTensor.allocator()->free();
arm_compute::Status ACLMVNExecutor::validateTensorsInfo(const ACLInfos &aclMemoryInfos) {
return arm_compute::NEMeanStdDevNormalizationLayer::validate(
aclMemoryInfos[ACLArgs::ACL_SRC_0].get(),
aclMemoryInfos[ACLArgs::ACL_DST].get(),
aclMVNAtrrs.epsValue_);
}

bool AclMVNExecutorBuilder::isSupported(const MVNAttrs& mvnAttrs,
const std::vector<MemoryDescPtr>& srcDescs,
const std::vector<MemoryDescPtr>& dstDescs) const {
if ((srcDescs[0]->getPrecision() != ov::element::f32 &&
srcDescs[0]->getPrecision() != ov::element::f16) ||
srcDescs[0]->getPrecision() != dstDescs[0]->getPrecision()) {
DEBUG_LOG("NEMeanStdDevNormalizationLayer does not support precisions:",
" src[0]=", srcDescs[0]->getPrecision(),
" dst[0]=", dstDescs[0]->getPrecision());
return false;
}

if (!(srcDescs[0]->hasLayoutType(LayoutType::ncsp) &&
dstDescs[0]->hasLayoutType(LayoutType::ncsp)) &&
!(srcDescs[0]->hasLayoutType(LayoutType::nspc) &&
dstDescs[0]->hasLayoutType(LayoutType::nspc))) {
DEBUG_LOG("NEMeanStdDevNormalizationLayer does not support layout:",
" src: ", srcDescs[0]->serializeFormat(),
" dst: ", dstDescs[0]->serializeFormat());
return false;
}

if (mvnAttrs.epsMode_ == MVNEpsMode::OUTSIDE_SQRT) {
DEBUG_LOG("NEMeanStdDevNormalizationLayer does not support OUTSIDE_SQRT mode");
return false;
}
if (!mvnAttrs.normalizeVariance_) {
DEBUG_LOG("NEMeanStdDevNormalizationLayer supports normalize_variance=true only");
return false;
}
if (!mvnAttrs.initAcrossChannels_ &&
srcDescs[0]->hasLayoutType(LayoutType::nspc)) {
DEBUG_LOG("initAcrossChannels = false is not supported by ACL for NHWC layout");
return false;
}

return true;
}
ACLFunction ACLMVNExecutor::configureFunction(const ACLTensors & aclMemoryTensors) {
auto neMVN = std::make_unique<arm_compute::NEMeanStdDevNormalizationLayer>();
neMVN->configure(
aclMemoryTensors[ACLArgs::ACL_SRC_0].get(),
aclMemoryTensors[ACLArgs::ACL_DST].get(),
aclMVNAtrrs.epsValue_);
return neMVN;
}

} // namespace intel_cpu
} // namespace ov
49 changes: 16 additions & 33 deletions src/plugins/intel_cpu/src/nodes/executors/acl/acl_mvn.hpp
Original file line number Diff line number Diff line change
@@ -1,51 +1,34 @@
// Copyright (C) 2023 Intel Corporation
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "acl_utils.hpp"
#include "nodes/executors/mvn.hpp"
#include "arm_compute/runtime/NEON/NEFunctions.h"
#include "utils/debug_capabilities.h"
#include "acl_common_executor.hpp"
#include "nodes/executors/mvn_config.hpp"

namespace ov {
namespace intel_cpu {

class AclMVNExecutor : public MVNExecutor {
class ACLMVNExecutor : public ACLCommonExecutor {
public:
AclMVNExecutor(const ExecutorContext::CPtr context);
ACLMVNExecutor(const MVNAttrs& attrs,
const PostOps& postOps,
const MemoryArgs& memory,
const ExecutorContext::CPtr context) : aclMVNAtrrs(attrs) {}

bool init(const MVNAttrs& mvnAttrs,
const std::vector<MemoryDescPtr>& srcDescs,
const std::vector<MemoryDescPtr>& dstDescs,
const dnnl::primitive_attr &attr) override;
void exec(const std::vector<MemoryCPtr>& src,
const std::vector<MemoryPtr>& dst,
const void *post_ops_data_) override;
static bool supports(const MVNConfig& config);

impl_desc_type getImplType() const override {
return implType;
}
void updateTensorsShapes(ACLShapes& aclMemoryShapes) override;

private:
impl_desc_type implType = impl_desc_type::acl;
arm_compute::Status validateTensorsInfo(const ACLInfos & aclMemoryInfos) override;

arm_compute::Tensor srcTensor;
arm_compute::Tensor dstTensor;
std::unique_ptr<arm_compute::NEMeanStdDevNormalizationLayer> mvn = nullptr;
};
ACLFunction configureFunction(const ACLTensors & aclMemoryTensors) override;

class AclMVNExecutorBuilder : public MVNExecutorBuilder {
public:
bool isSupported(const MVNAttrs& mvnAttrs,
const std::vector<MemoryDescPtr>& srcDescs,
const std::vector<MemoryDescPtr>& dstDescs) const override;

MVNExecutorPtr makeExecutor(const ExecutorContext::CPtr context) const override {
return std::make_shared<AclMVNExecutor>(context);
}
private:
MVNAttrs aclMVNAtrrs;
};

using ACLMVNExecutorPtr = std::shared_ptr<ACLMVNExecutor>;
} // namespace intel_cpu
} // namespace ov
} // namespace ov
120 changes: 120 additions & 0 deletions src/plugins/intel_cpu/src/nodes/executors/common/ref_mvn.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "ref_mvn.hpp"
#include "openvino/core/parallel.hpp"

void ov::intel_cpu::CommonMVNExecutor::execute(const ov::intel_cpu::MemoryArgs &memory) {
mvn_ref(reinterpret_cast<uint8_t *>(memory.at(ARG_SRC_0)->getData()),
reinterpret_cast<uint8_t *>(memory.at(ARG_DST)->getData()), refMVNAttrs.shape5D);
}

bool ov::intel_cpu::CommonMVNExecutor::update(const ov::intel_cpu::MemoryArgs &memory) {
return true;
}

bool ov::intel_cpu::CommonMVNExecutor::supports(const ov::intel_cpu::MVNConfig& config) {
return true;
}

void ov::intel_cpu::CommonMVNExecutor::mvn_ref(const uint8_t* src_data, uint8_t* dst_data, const VectorDims& shape5d) {
const float *src_data_ptr = reinterpret_cast<const float *>(src_data);
float *dst_data_ptr = reinterpret_cast<float *>(dst_data);
const size_t N = shape5d[0];
const size_t C = shape5d[1];
const size_t D = shape5d[2];
const size_t H = shape5d[3];
const size_t W = shape5d[4];

size_t C1 = H * W;
size_t C2 = C1 * D;
size_t C3 = C2 * C;

parallel_for(N, [&](int b) {
size_t cb = b * C3;
if (refMVNAttrs.execAcrossChannels_) {
// Parallel sum for each channel for mean
float C3inv = 1.f / static_cast<float>(C3);
float mean_temp = 0.0f;

mean_temp = parallel_sum(C, mean_temp, [&](size_t c)->float {
float mean_internal = 0.0f;
size_t cc = cb + c * C2;
for (size_t sp = 0lu; sp < C2; sp++) {
mean_internal += src_data_ptr[cc + sp];
}
return mean_internal;
});

float mean = mean_temp * C3inv;

if (refMVNAttrs.normalizeVariance_) {
// parallel sum for each channel for variance
float variance_temp = 0.0f;
variance_temp = parallel_sum(C, variance_temp, [&](size_t c)->float {
float variance_internal = 0.0f;
size_t cc = cb + c * C2;
for (size_t sp = 0lu; sp < C2; sp++) {
variance_internal += (src_data_ptr[cc + sp] - mean) * (src_data_ptr[cc + sp] - mean);
}
return variance_internal;
});

float variance = 1.f;
if (refMVNAttrs.epsMode_ == INSIDE_SQRT)
variance = 1.f / sqrtf(variance_temp * C3inv + refMVNAttrs.epsValue_);
else if (refMVNAttrs.epsMode_ == OUTSIDE_SQRT)
variance = 1.f / (sqrtf(variance_temp * C3inv) + refMVNAttrs.epsValue_);

parallel_for(C, [&](int c) {
size_t cc = cb + c * C2;
for (size_t sp = 0lu; sp < C2; sp++) {
dst_data_ptr[cc + sp] = (src_data_ptr[cc + sp] - mean) * variance;
}
});
} else {
parallel_for(C, [&](int c) {
size_t cc = cb + c * C2;
for (size_t sp = 0lu; sp < C2; sp++) {
dst_data_ptr[cc + sp] = src_data_ptr[cc + sp] - mean;
}
});
}
} else { // per channel
float C2inv = 1.f / static_cast<float>(C2);
parallel_for(C, [&](size_t c) {
// mean for this channel
float mean = 0.f;
size_t cc = cb + c * C2;
for (size_t sp = 0lu; sp < C2; sp++) {
mean += src_data_ptr[cc + sp];
}
mean *= C2inv;

if (refMVNAttrs.normalizeVariance_) {
// variance for this channel
float variance = 0.f;
for (size_t sp = 0lu; sp < C2; sp++) {
variance += (src_data_ptr[cc + sp] - mean) * (src_data_ptr[cc + sp] - mean);
}

if (refMVNAttrs.epsMode_ == INSIDE_SQRT)
variance = 1.f / sqrtf(variance * C2inv + refMVNAttrs.epsValue_);
else if (refMVNAttrs.epsMode_ == OUTSIDE_SQRT)
variance = 1.f / (sqrtf(variance * C2inv) + refMVNAttrs.epsValue_);

// mvn for this channel
for (size_t sp = 0lu; sp < C2; sp++) {
dst_data_ptr[cc + sp] = (src_data_ptr[cc + sp] - mean) * variance;
}
} else {
// mvn for this channel
for (size_t sp = 0lu; sp < C2; sp++) {
dst_data_ptr[cc + sp] = src_data_ptr[cc + sp] - mean;
}
}
});
}
});
}
37 changes: 37 additions & 0 deletions src/plugins/intel_cpu/src/nodes/executors/common/ref_mvn.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once

#include <memory>
#include "cpu_memory.h"
#include "nodes/executors/mvn_config.hpp"

namespace ov {
namespace intel_cpu {

class CommonMVNExecutor : public Executor {
public:
CommonMVNExecutor(const MVNAttrs& attrs,
const PostOps& postOps,
const MemoryArgs& memory,
const ExecutorContext::CPtr context) : refMVNAttrs(attrs) {}

void execute(const MemoryArgs& memory) override;

impl_desc_type implType() const override {
return impl_desc_type::ref;
}

// offloads execution data preparation from the exec call
bool update(const MemoryArgs& memory) override;

static bool supports(const MVNConfig& config);

private:
const MVNAttrs& refMVNAttrs;
void mvn_ref(const uint8_t *in_ptr_, uint8_t *out_ptr_, const VectorDims& shape5d);
};

} // namespace intel_cpu
} // namespace ov
3 changes: 2 additions & 1 deletion src/plugins/intel_cpu/src/nodes/executors/executor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ enum class ExecutorType {
enum class OperationType {
FullyConnected,
MatMul,
Convolution
Convolution,
MVN
};

std::string ExecutorTypeToString(const ExecutorType type);
Expand Down
Loading

0 comments on commit 63008cf

Please sign in to comment.