Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CPU] FullyConnected: corrected BF16 config in case of f16 weights #25214

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions src/plugins/intel_cpu/src/graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,17 @@ void Graph::Replicate(const std::shared_ptr<const ov::Model>& model,
const auto parentEdge = outputNode->getParentEdgeAt(0);
const auto parent = parentEdge->getParent();
parent->setOriginalOutputPrecisionAtPort(parentEdge->getInputNum(), precToSet);
// If the parent has consumers except Output, precToSet is propagated to consumer's inputs
// to avoid precision mismatch (which leads to reorder insertion and unnecessary performance overheads)
if (parent->getChildEdges().size() > 1) {
for (size_t i = 0; i < parent->getChildEdges().size(); ++i) {
const auto childEdge = parent->getChildEdgeAt(i);
// Consumers from other parent's output shouldn't be changed
if (childEdge->getInputNum() != parentEdge->getInputNum())
continue;
childEdge->getChild()->setOriginalInputPrecisionAtPort(childEdge->getOutputNum(), precToSet);
}
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,12 @@ struct Require {
// clang-format off
static const TypeMapping dnnlFCTypeMapping {
// {src, wei, bia, dst} pt<src, wei, bias, dst>
{{_bf16, _bf16 | _f32, _any, _bf16 | _f32}, pt(bypass(), bypass(), use<3>(), bypass())},
{{_bf16, _bf16 | _f32 | _f16, _any, _bf16 | _f32}, pt(bypass(), bypass(), use<3>(), bypass())},
{{_f16, _f16, _any, _f16 | _f32}, pt(bypass(), bypass(), use<3>(), bypass())},
// integer precision outputs are not supported for float precision inputs
{{_f32 | _bf16 | _f16, _any, _any, _i8 | _u8}, pt(bypass(), bypass(), use<0>(), use<0>())},
// compresses float weights which do not match input data precision
{{_f32, _half_float, _any, _any | _any}, pt(bypass(), bypass(), use<0>(), use<0>())},
{{_bf16, _f16, _any, _any | _any}, pt(bypass(), bypass(), use<0>(), use<0>())},
{{_f16, _bf16, _any, _any | _any}, pt(bypass(), bypass(), use<0>(), use<0>())},
// quantization configuration
// int8 inner_product does not support f16 output and bias
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,7 @@ void ConvolutionLayerCPUTest::checkBiasFusing(ov::CompiledModel& execNet) const
bool foundConv = false;
for (const auto& node : execGraph->get_ops()) {
const auto& rtInfo = node->get_rt_info();
auto getExecValue = [&rtInfo](const std::string& paramName) -> std::string {
auto it = rtInfo.find(paramName);
OPENVINO_ASSERT(rtInfo.end() != it);
return it->second.as<std::string>();
};

if (getExecValue(ov::exec_model_info::LAYER_TYPE) == "Convolution") {
if (getRuntimeValue(rtInfo, ov::exec_model_info::LAYER_TYPE) == "Convolution") {
foundConv = true;
ASSERT_EQ(3, node->inputs().size());
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,8 @@ class GroupConvolutionLayerCPUTest : public testing::WithParamInterface<groupCon
bool foundConv = false;
for (const auto& node : execGraph->get_ops()) {
const auto& rtInfo = node->get_rt_info();
auto getExecValue = [&rtInfo](const std::string& paramName) -> std::string {
auto it = rtInfo.find(paramName);
OPENVINO_ASSERT(rtInfo.end() != it);
return it->second.as<std::string>();
};

if (getExecValue(ov::exec_model_info::LAYER_TYPE) == "Convolution") {
if (getRuntimeValue(rtInfo, ov::exec_model_info::LAYER_TYPE) == "Convolution") {
foundConv = true;
ASSERT_EQ(3, node->inputs().size());
break;
Expand Down Expand Up @@ -228,13 +223,8 @@ TEST_P(ExpectFallbackGroupConvolutionLayerCPUTest, CompareWithRefs) {
auto function = compiledModel.get_runtime_model();
for (const auto& node : function->get_ops()) {
const auto& rtInfo = node->get_rt_info();
auto getExecValue = [&rtInfo](const std::string& paramName) -> std::string {
auto it = rtInfo.find(paramName);
OPENVINO_ASSERT(rtInfo.end() != it);
return it->second.as<std::string>();
};
if ("Convolution" == getExecValue(ov::exec_model_info::LAYER_TYPE)) {
auto primType = getExecValue(ov::exec_model_info::IMPL_TYPE);
if ("Convolution" == getRuntimeValue(rtInfo, ov::exec_model_info::LAYER_TYPE)) {
auto primType = getRuntimeValue(rtInfo, ov::exec_model_info::IMPL_TYPE);
ASSERT_TRUE(selectedType != primType) << "primType is unexpected: " << primType;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,8 @@ void FuseTransposeAndReorderTest::check_transpose_count(size_t expectedTranspose
ASSERT_NE(nullptr, runtime_model);
size_t actual_transpose_count = 0;
for (const auto &node : runtime_model->get_ops()) {
const auto & rtInfo = node->get_rt_info();
auto getExecValue = [&rtInfo](const std::string & paramName) -> std::string {
auto it = rtInfo.find(paramName);
OPENVINO_ASSERT(rtInfo.end() != it);
return it->second.as<std::string>();
};
if (getExecValue(ov::exec_model_info::LAYER_TYPE) == "Transpose") {
const auto& rtInfo = node->get_rt_info();
if (getRuntimeValue(rtInfo, ov::exec_model_info::LAYER_TYPE) == "Transpose") {
actual_transpose_count++;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,24 +100,11 @@ class MatMulDecompressConvertTest : public testing::WithParamInterface<MatMulDec
std::tie(inputShapes, transpose, weiElemType, additionalConfig, cpuParams) = obj.param;

std::ostringstream result;
for (const auto& shape : inputShapes) {
result << ov::test::utils::partialShape2str({shape.first}) << "_";
}
result << "TS=";
for (const auto& shape : inputShapes) {
result << "(";
if (!shape.second.empty()) {
auto itr = shape.second.begin();
do {
result << ov::test::utils::vec2str(*itr);
} while (++itr != shape.second.end() && result << "_");
}
result << ")_";
}
for (size_t i = 0; i < inputShapes.size(); ++i)
result << "IS[" << i << "]=" << inputShapes[i] << "_";
result << "transpose_a=" << transpose.first << "_";
result << "transpose_b=" << transpose.second << "_";

result << "weiLemType=" << weiElemType << "_";
result << "weiElemType=" << weiElemType << "_";

result << "config=(";
for (const auto& configEntry : additionalConfig) {
Expand All @@ -137,25 +124,6 @@ class MatMulDecompressConvertTest : public testing::WithParamInterface<MatMulDec
std::swap(*(shape.end() - 1), *(shape.end() - 2));
}

void check_fc_weights_precision(ElementType expectedWeiElemType) const {
auto getExecValue = [](const ov::Node::RTMap& rtInfo, const std::string& paramName) -> std::string {
auto it = rtInfo.find(paramName);
OPENVINO_ASSERT(rtInfo.end() != it);
return it->second.as<std::string>();
};

const auto execFunction = compiledModel.get_runtime_model();
ASSERT_NE(nullptr, execFunction);
for (const auto& fcNode : execFunction->get_ops()) {
if (getExecValue(fcNode->get_rt_info(), ov::exec_model_info::LAYER_TYPE) == "FullyConnected") {
const auto& constNode = fcNode->get_input_node_shared_ptr(1);
ov::element::Type expectedType(
getExecValue(constNode->get_rt_info(), ov::exec_model_info::OUTPUT_PRECISIONS));
ASSERT_EQ(expectedType, expectedWeiElemType);
}
}
}

void SetUp() override {
targetDevice = ov::test::utils::DEVICE_CPU;

Expand Down Expand Up @@ -196,23 +164,23 @@ class MatMulDecompressConvertTest : public testing::WithParamInterface<MatMulDec

configuration.insert(additionalConfig.begin(), additionalConfig.end());

ElementType netType = ElementType::f32;
ElementType convertOutType = ElementType::f32;
inType = outType = netType = ElementType::f32;
auto it = additionalConfig.find(ov::hint::inference_precision.name());
if (it != additionalConfig.end() && it->second.as<ov::element::Type>() == ov::element::bf16) {
convertOutType = inType = outType = netType = ElementType::bf16;
netType = ElementType::bf16;
weiConstElemType = (weiConstElemType != ElementType::f32) ? weiConstElemType : ElementType::bf16;
} else {
inType = outType = netType;
// Reorder between parameter and FullyConnected
// Note: reorder between FC and Result is not needed since FC primitive supports f32 output natively
reorderCount++;
}

std::string cpuNodeType = "FullyConnected";
selectedType = makeSelectedTypeStr(selectedType, outType);
selectedType = makeSelectedTypeStr(selectedType, netType);

ov::ParameterVector params{std::make_shared<ov::op::v0::Parameter>(inType, inShapeA)};
std::shared_ptr<ov::Node> inputB = ov::test::utils::make_constant(weiConstElemType, inShapeB.get_shape());
if (weiConstElemType == ElementType::f16 || weiConstElemType == ElementType::bf16) {
inputB = std::make_shared<ov::op::v0::Convert>(inputB, convertOutType);
if (weiConstElemType != inType) {
inputB = std::make_shared<ov::op::v0::Convert>(inputB, inType);
mark_as_decompression(inputB);
}
expectedWeiConstElemType = weiConstElemType;
Expand All @@ -223,18 +191,22 @@ class MatMulDecompressConvertTest : public testing::WithParamInterface<MatMulDec
}

virtual void check_execution_graph() {
CheckNodePrecisionsWithType(compiledModel, "FullyConnected", {netType, expectedWeiConstElemType, ov::element::undefined}, {outType});
CheckPluginRelatedResults(compiledModel, "FullyConnected");
CheckNumberOfNodesWithType(compiledModel, "FullyConnected", fullyConnectedCount);
CheckNumberOfNodesWithType(compiledModel, "Transpose", transposeCount);
CheckNumberOfNodesWithType(compiledModel, "Convert", convertCount);
CheckNumberOfNodesWithType(compiledModel, "Reorder", 0);
check_fc_weights_precision(expectedWeiConstElemType);
// Note: Convert node might be converted to Subgraph
CheckNumberOfNodesWithType(compiledModel, "Subgraph", 0);
CheckNumberOfNodesWithType(compiledModel, "Reorder", reorderCount);
}

size_t fullyConnectedCount = 1;
size_t transposeCount = 0;
size_t convertCount = 0;
size_t reorderCount = 0;
ElementType expectedWeiConstElemType = ElementType::f32;
ElementType netType = ElementType::f32;
};

TEST_P(MatMulDecompressConvertTest, CompareWithRefs) {
Expand Down Expand Up @@ -266,11 +238,9 @@ const std::vector<std::vector<InputShape>> inputShapes3D = {
{{{-1, -1, -1}, {{1, 2, 3}, {1, 5, 3}}}, {{1, 3, 4}, {{1, 3, 4}, {1, 3, 4}}}},
};

ov::AnyMap emptyConfig = {/* empty config */};

std::vector<ov::AnyMap> filter_additional_config_bf16() {
std::vector<ov::AnyMap> additionalConfig;
if (ov::with_cpu_x86_avx512_core()) {
if (ov::with_cpu_x86_bfloat16()) {
additionalConfig.push_back({{ov::hint::inference_precision(ov::element::bf16)}});
}
return additionalConfig;
Expand Down Expand Up @@ -307,7 +277,7 @@ std::vector<CPUSpecificParams> filter_specific_params_bf16() {
const auto testParams2D_FP32_smoke = ::testing::Combine(::testing::ValuesIn(inputShapes2D),
::testing::ValuesIn(transposeParams),
::testing::Values(ElementType::f32),
::testing::Values(emptyConfig),
::testing::Values(CPUTestUtils::empty_plugin_config),
::testing::ValuesIn(filter_specific_params(true)));

INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_FP32,
Expand All @@ -318,7 +288,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_FP32,
const auto testParams2D_smoke = ::testing::Combine(::testing::ValuesIn(inputShapes2D),
::testing::ValuesIn(transposeParams),
::testing::Values(ElementType::f16, ElementType::bf16),
::testing::Values(emptyConfig),
::testing::Values(CPUTestUtils::empty_plugin_config),
::testing::ValuesIn(filter_specific_params(false)));

INSTANTIATE_TEST_SUITE_P(smoke_FC_2D,
Expand All @@ -340,7 +310,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_BF16,
const auto testParams3D_FP32_smoke = ::testing::Combine(::testing::ValuesIn(inputShapes3D),
::testing::ValuesIn(transposeParams),
::testing::Values(ElementType::f32),
::testing::Values(emptyConfig),
::testing::Values(CPUTestUtils::empty_plugin_config),
::testing::ValuesIn(filter_specific_params(true)));

INSTANTIATE_TEST_SUITE_P(smoke_FC_3D_FP32,
Expand All @@ -351,7 +321,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_FC_3D_FP32,
const auto testParams3D_smoke = ::testing::Combine(::testing::ValuesIn(inputShapes3D),
::testing::ValuesIn(transposeParams),
::testing::Values(ElementType::f16, ElementType::bf16),
::testing::Values(emptyConfig),
::testing::Values(CPUTestUtils::empty_plugin_config),
::testing::ValuesIn(filter_specific_params(false)));

INSTANTIATE_TEST_SUITE_P(smoke_FC_3D,
Expand Down Expand Up @@ -460,26 +430,26 @@ class MatMulDecompressConvertTest2 : public MatMulDecompressConvertTest {

configuration.insert(additionalConfig.begin(), additionalConfig.end());

ElementType netType = ElementType::f32;
ElementType convertOutType = ElementType::f32;
inType = outType = netType = ElementType::f32;
auto it = additionalConfig.find(ov::hint::inference_precision.name());
if (it != additionalConfig.end() && it->second.as<ov::element::Type>() == ov::element::bf16) {
convertOutType = inType = outType = netType = ElementType::bf16;
netType = ElementType::bf16;
weiConstElemType = (weiConstElemType != ElementType::f32) ? weiConstElemType : ElementType::bf16;
} else {
inType = outType = netType;
// Reorder between parameter and FullyConnected
// Note: reorder between FC and Result is not needed since FC primitive supports f32 output natively
reorderCount++;
}

std::string cpuNodeType = "FullyConnected";
selectedType = makeSelectedTypeStr(selectedType, outType);
selectedType = makeSelectedTypeStr(selectedType, netType);

ov::ParameterVector params;
for (auto&& shape : {inShapeFC0, inShapeFC1}) {
params.push_back(std::make_shared<ov::op::v0::Parameter>(inType, shape));
}
std::shared_ptr<ov::Node> inputWeights = ov::test::utils::make_constant(weiConstElemType, inShapeWeights.get_shape());
if (weiConstElemType == ElementType::f16) {
inputWeights = std::make_shared<ov::op::v0::Convert>(inputWeights, convertOutType);
if (weiConstElemType != inType) {
inputWeights = std::make_shared<ov::op::v0::Convert>(inputWeights, inType);
mark_as_decompression(inputWeights);
}
expectedWeiConstElemType = weiConstElemType;
Expand All @@ -505,7 +475,7 @@ const auto testParams2D_FP16_2_smoke =
::testing::Combine(::testing::Values(static_shapes_to_test_representation({{2, 3}, {2, 3}, {3, 4}})),
::testing::Values(std::pair<bool, bool>{false, true}),
::testing::Values(ElementType::f16),
::testing::Values(emptyConfig),
::testing::Values(CPUTestUtils::empty_plugin_config),
::testing::ValuesIn(filter_specific_params(false)));

INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_FP16_2,
Expand Down Expand Up @@ -644,7 +614,7 @@ const auto testParams2D_FP16_3_smoke =
::testing::Combine(::testing::Values(static_shapes_to_test_representation({{1, 16, 32}, {32, 64}})),
::testing::Values(std::pair<bool, bool>{false, false}),
::testing::Values(ElementType::f16),
::testing::Values(emptyConfig),
::testing::Values(CPUTestUtils::empty_plugin_config),
::testing::ValuesIn(filter_specific_params(false)));

INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_FP16_3,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,14 +108,9 @@ class MatmulBrgemmInt8Test : public testing::WithParamInterface<MatmulBrgemmInt8
void check_node(std::shared_ptr<const ov::Model> function, const std::string& nodeName) {
ASSERT_NE(nullptr, function);
for (const auto &node : function->get_ops()) {
const auto & rtInfo = node->get_rt_info();
auto getExecValue = [&rtInfo](const std::string & paramName) -> std::string {
auto it = rtInfo.find(paramName);
OPENVINO_ASSERT(rtInfo.end() != it);
return it->second.as<std::string>();
};
const auto& rtInfo = node->get_rt_info();
if (node->get_friendly_name() == nodeName) {
auto primType = getExecValue(ov::exec_model_info::IMPL_TYPE);
auto primType = getRuntimeValue(rtInfo, ov::exec_model_info::IMPL_TYPE);
ASSERT_TRUE(primTypeCheck(primType)) << "primType is unexpected: " << primType << " Expected: " << selectedType;
ASSERT_EQ(node->get_output_element_type(0), outType);
ASSERT_EQ(node->get_input_element_type(0), inType);
Expand Down
Loading
Loading