From 9c65ba2c1ff792d31dac0456a11ef0bf5cf11139 Mon Sep 17 00:00:00 2001 From: Vladislav Golubev Date: Tue, 8 Oct 2024 15:33:27 +0200 Subject: [PATCH] [Snippets] Tokenization helper cleanup and tokenization tests reenabling (#26843) ### Details: - *Stub convert related logic is removed from tokenization helper, since it is not actual anymore* - *Skipped tokenization tests are reenabled, reference builders are fixed* ### Tickets: - *CVS-142098* - *CVS-114607* - *CVS-114336* --- .../snippets/src/utils/tokenization_utils.cpp | 41 +-------- .../tests/src/pass/collapse_subgraph.cpp | 22 ++--- .../tests/src/pass/mha_tokenization.cpp | 20 +---- .../snippets/arm/convert.cpp | 4 +- .../snippets/x64/convert.cpp | 4 +- .../x64/snipptes_mark_skipped.cpp | 10 +-- .../include/subgraph_converts.hpp | 7 +- .../src/subgraph_convert.cpp | 76 +++------------- .../ov_snippets_models/src/subgraph_mha.cpp | 90 +++++++++---------- .../src/subgraph_simple.cpp | 16 ++-- 10 files changed, 82 insertions(+), 208 deletions(-) diff --git a/src/common/snippets/src/utils/tokenization_utils.cpp b/src/common/snippets/src/utils/tokenization_utils.cpp index 700b282f86f4d4..a5f925b5ab7374 100644 --- a/src/common/snippets/src/utils/tokenization_utils.cpp +++ b/src/common/snippets/src/utils/tokenization_utils.cpp @@ -227,44 +227,9 @@ bool tokenize_node(const std::shared_ptr& node, const SnippetsTokeniza // this is there stitching happens, get result of a copy of a body of currently processed input and put it to the new inputs // internal output index == external output index - auto& input_body = clones[input_node]; - size_t source_output_index = input_value.get_index(); - auto source_result = input_body->get_results()[source_output_index]; - - // We cannot add new node, that is not Convert, after Convert (that is start node) to avoid arithmetic problems with conversion - // We can add any new node in Subgraph after Convert (bacause after Input) - // Parameter - // | - // Convert - // - // We cannot add new node, that isn't Convert, in Subgraph after existing Convert - // Parameter - // Relu - // Convert - // - // But we can add new Convert in Subgraph after existing Convert - // Parameter - // Relu - // Convert - // Convert - // - // Thus, We can grow subgraph only if Convert is the first node of subgraph and have to abort it's the last one and we want to add not Convert - // We have this limitation because at the moment we support only one execution precision inside body, so - // if there is Convert with input and output data types that aren't equal to supported exec type, - // we can get conversion math errors - const auto output_of_subgraph = source_result->get_input_node_shared_ptr(0); - if (!ov::is_type(node) && ov::is_type(output_of_subgraph)) { - // Also we can add new node after < Parameter -> Convert -> Convert -> Convert > - auto grandparent = output_of_subgraph->get_input_node_ptr(0); - while (ov::is_type(grandparent)) { - grandparent = grandparent->get_input_node_ptr(0); - } - - if (!ov::is_type(grandparent)) { - return abort("Convert supports only as Input and as Result of subgraph. Aborting"); - } - } - // Result op has a single input + const auto& input_body = clones[input_node]; + const size_t source_output_index = input_value.get_index(); + const auto& source_result = input_body->get_results()[source_output_index]; internal_inputs.push_back(source_result->input_value(0)); } else { // We need some non-scalar constants inside Subgraph in the following cases: diff --git a/src/common/snippets/tests/src/pass/collapse_subgraph.cpp b/src/common/snippets/tests/src/pass/collapse_subgraph.cpp index c3459e1d2c6b2a..57ba33903f6197 100644 --- a/src/common/snippets/tests/src/pass/collapse_subgraph.cpp +++ b/src/common/snippets/tests/src/pass/collapse_subgraph.cpp @@ -27,22 +27,14 @@ void CollapseSubgraphTests::run() { }); } -class SKIP_CollapseSubgraphTests : public CollapseSubgraphTests { -public: - void SetUp() override { - GTEST_SKIP(); - } - void TearDown() override{}; -}; - -TEST_F(SKIP_CollapseSubgraphTests /* CVS-114607 */, smoke_Snippets_Eltwise) { +TEST_F(CollapseSubgraphTests, smoke_Snippets_Eltwise) { const auto& f = EltwiseFunction(std::vector {{2, 3}, {1, 3}}); model = f.getOriginal(); model_ref = f.getReference(); run(); } -TEST_F(SKIP_CollapseSubgraphTests /* CVS-114607 */, smoke_Snippets_MatMulWithEltwise) { +TEST_F(CollapseSubgraphTests, smoke_Snippets_MatMulWithEltwise) { const auto& f = MatMulEltwiseBranchesFunction(std::vector {{1, 3, 4, 4}, {1, 3, 4, 4}}); model = f.getOriginal(); model_ref = f.getReference(); @@ -56,35 +48,35 @@ TEST_F(CollapseSubgraphTests, smoke_Snippets_AvoidLoopEltwise) { run(); } -TEST_F(SKIP_CollapseSubgraphTests /* CVS-114607 */, smoke_Snippets_OneConvert) { +TEST_F(CollapseSubgraphTests, smoke_Snippets_OneConvert) { const auto& f = ConvertFunction(std::vector{{2, 5}}); model = f.getOriginal(); model_ref = f.getReference(); run(); } -TEST_F(SKIP_CollapseSubgraphTests /* CVS-114607 */, smoke_Snippets_ConvertInput) { +TEST_F(CollapseSubgraphTests, smoke_Snippets_ConvertInput) { const auto& f = ConvertInputFunction(std::vector{{2, 5}, {1, 5}}); model = f.getOriginal(); model_ref = f.getReference(); run(); } -TEST_F(SKIP_CollapseSubgraphTests /* CVS-114607 */, smoke_Snippets_ConvertOutput) { +TEST_F(CollapseSubgraphTests, smoke_Snippets_ConvertOutput) { const auto& f = ConvertOutputFunction(std::vector{{2, 5}, {1, 5}}); model = f.getOriginal(); model_ref = f.getReference(); run(); } -TEST_F(SKIP_CollapseSubgraphTests /* CVS-114607 */, smoke_Snippets_ConvertStub) { +TEST_F(CollapseSubgraphTests, smoke_Snippets_ConvertStub) { const auto& f = ConvertStubFunction(std::vector{{2, 5, 2}, {1, 5, 1}}); model = f.getOriginal(); model_ref = f.getReference(); run(); } -TEST_F(SKIP_CollapseSubgraphTests /* CVS-114607 */, smoke_Snippets_ConvertPartialInputsAndResults) { +TEST_F(CollapseSubgraphTests, smoke_Snippets_ConvertPartialInputsAndResults) { const auto& f = ConvertPartialInputsAndResultsFunction(std::vector{{2, 5, 1}, {1, 5, 1}, {2, 1, 10}}, std::vector{ov::element::i8, ov::element::bf16, ov::element::f32}, std::vector{ov::element::f32, ov::element::i8}); diff --git a/src/common/snippets/tests/src/pass/mha_tokenization.cpp b/src/common/snippets/tests/src/pass/mha_tokenization.cpp index 3a97b94925a2c0..c5932ed690d670 100644 --- a/src/common/snippets/tests/src/pass/mha_tokenization.cpp +++ b/src/common/snippets/tests/src/pass/mha_tokenization.cpp @@ -14,14 +14,6 @@ namespace ov { namespace test { namespace snippets { -class SKIP_TokenizeMHASnippetsTests : public TokenizeMHASnippetsTests { -public: - void SetUp() override { - GTEST_SKIP(); - } - void TearDown() override{}; -}; - void TokenizeMHASnippetsTests::run() { ASSERT_TRUE(model); manager.register_pass(); @@ -103,8 +95,7 @@ TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA_with_MatMul0_Transpose) { run(); } -TEST_F(SKIP_TokenizeMHASnippetsTests /* CVS-142098 */, smoke_Snippets_MHA_with_MatMul0_Transpose_Dynamic) { - GTEST_SKIP(); +TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA_with_MatMul0_Transpose_Dynamic) { const auto &f = MHAMatMul0TransposeFunction(std::vector{{-1, -1, -1, -1}, {-1, -1, -1, -1}, {-1, -1, -1, -1}, {-1, -1, -1, -1}}, std::vector({ov::element::f32, ov::element::f32, ov::element::f32, ov::element::f32}), false); @@ -113,8 +104,7 @@ TEST_F(SKIP_TokenizeMHASnippetsTests /* CVS-142098 */, smoke_Snippets_MHA_with_M run(); } -TEST_F(SKIP_TokenizeMHASnippetsTests /* CVS-114607 */, smoke_Snippets_MHA_with_int_Matmuls) { - GTEST_SKIP(); +TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA_with_int_Matmuls) { const auto &f = MHAINT8MatMulTypeRelaxedFunction(std::vector{{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 128, 12, 64}}); model = f.getOriginal(); model_ref = f.getReference(); @@ -128,8 +118,7 @@ TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA_Transpose_extraction) { run(); } -TEST_F(SKIP_TokenizeMHASnippetsTests /* CVS-142098 */, smoke_Snippets_MHA_Dynamic_Transpose_extraction) { - GTEST_SKIP(); +TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA_Dynamic_Transpose_extraction) { const auto& f = MHATransposedInputFunction(std::vector{{-1, -1, -1, -1}, {-1, -1, -1, -1}, {-1, -1, -1, -1}}, true); model = f.getOriginal(); model_ref = f.getReference(); @@ -144,8 +133,7 @@ TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA_Transpose_extraction_and_uns run(); } -TEST_F(SKIP_TokenizeMHASnippetsTests /* CVS-142098 */, smoke_Snippets_MHA_Dynamic_Transpose_extraction_and_unsupported_existing_transpose) { - GTEST_SKIP(); +TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA_Dynamic_Transpose_extraction_and_unsupported_existing_transpose) { const auto& f = MHATransposedInputFunction(std::vector{{-1, -1, -1, -1}, {-1, -1, -1, -1}, {-1, -1, -1, -1}}, true, std::vector{0, 3, 1, 2}); model = f.getOriginal(); diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/arm/convert.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/arm/convert.cpp index 1230034b778437..4d1877f5d23ffe 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/arm/convert.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/arm/convert.cpp @@ -95,8 +95,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvertStub, ConvertStub, ::testing::Combine( ::testing::ValuesIn(inputShapes_ConvertInput), ::testing::ValuesIn(types_ConvertStub), - ::testing::Values(2), - ::testing::Values(2), + ::testing::Values(1), + ::testing::Values(1), ::testing::Values(ov::test::utils::DEVICE_CPU)), Convert::getTestCaseName); diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/x64/convert.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/x64/convert.cpp index 842d6a5b453d09..590135c15750d3 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/x64/convert.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/x64/convert.cpp @@ -104,8 +104,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvertStub, ConvertStub, ::testing::Combine( ::testing::ValuesIn(inputShapes_ConvertInput), ::testing::ValuesIn(types_ConvertInput), - ::testing::Values(2), - ::testing::Values(2), + ::testing::Values(1), + ::testing::Values(1), ::testing::Values(ov::test::utils::DEVICE_CPU)), Convert::getTestCaseName); diff --git a/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/snipptes_mark_skipped.cpp b/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/snipptes_mark_skipped.cpp index 027386f0ad050d..f4ae354b6fe4e0 100644 --- a/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/snipptes_mark_skipped.cpp +++ b/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/snipptes_mark_skipped.cpp @@ -30,15 +30,7 @@ class SnippetsMarkSkippedTests : public TransformationTestsF { } }; -class SKIP_SnippetsMarkSkippedTests : public SnippetsMarkSkippedTests { -public: - void SetUp() override { - GTEST_SKIP(); - } - void TearDown() override{}; -}; - -TEST_F(SKIP_SnippetsMarkSkippedTests /* CVS-114336 */, smoke_Snippets_SkipAfterInputsMatMulEltwise) { +TEST_F(SnippetsMarkSkippedTests, smoke_Snippets_SkipAfterInputsMatMulEltwise) { const auto &f = MatMulEltwiseBranchesFunction(std::vector {{1, 3, 4, 4}, {1, 3, 4, 4}}); model = f.getOriginal(); // Fully tokenizable, since inputs are followed by MatMul diff --git a/src/tests/ov_helpers/ov_snippets_models/include/subgraph_converts.hpp b/src/tests/ov_helpers/ov_snippets_models/include/subgraph_converts.hpp index 84a4be181cf3d6..6c36034301d223 100644 --- a/src/tests/ov_helpers/ov_snippets_models/include/subgraph_converts.hpp +++ b/src/tests/ov_helpers/ov_snippets_models/include/subgraph_converts.hpp @@ -81,12 +81,11 @@ class ConvertOutputFunction : public SnippetsFunctionBase { }; -/// There are 2 subgraphs: Add + Convert(Stub) and Relu /// Tokenized simply by starting subgraph. // in1 in2 in1 in2 -// Add Subgraph -// Convert -> | -// Relu Subgraph +// Add | +// Convert -> Subgraph +// Relu | // Result Result class ConvertStubFunction : public SnippetsFunctionBase { public: diff --git a/src/tests/ov_helpers/ov_snippets_models/src/subgraph_convert.cpp b/src/tests/ov_helpers/ov_snippets_models/src/subgraph_convert.cpp index 00143f9f623f4e..1324d380294502 100644 --- a/src/tests/ov_helpers/ov_snippets_models/src/subgraph_convert.cpp +++ b/src/tests/ov_helpers/ov_snippets_models/src/subgraph_convert.cpp @@ -24,10 +24,7 @@ std::shared_ptr ConvertFunction::initOriginal() const { } std::shared_ptr ConvertFunction::initReference() const { auto data0 = std::make_shared(inType, input_shapes[0]); - auto indata0 = std::make_shared(inType, data0->get_shape()); - auto subgraph = std::make_shared(NodeVector{data0}, - std::make_shared(NodeVector{std::make_shared(indata0, outType)}, - ParameterVector{indata0})); + auto subgraph = std::make_shared(NodeVector{data0}, getOriginal()); return std::make_shared(NodeVector{subgraph}, ParameterVector{data0}); } @@ -41,13 +38,7 @@ std::shared_ptr ConvertInputFunction::initOriginal() const { std::shared_ptr ConvertInputFunction::initReference() const { auto data0 = std::make_shared(inType, input_shapes[0]); auto data1 = std::make_shared(outType, input_shapes[1]); - auto indata0 = std::make_shared(inType, data0->get_shape()); - auto indata1 = std::make_shared(outType, data1->get_shape()); - auto convert = std::make_shared(indata0, outType); - auto subgraph = std::make_shared(NodeVector{data0, data1}, - std::make_shared( - NodeVector{std::make_shared(convert, indata1)}, - ParameterVector{indata0, indata1})); + auto subgraph = std::make_shared(NodeVector{data0, data1}, getOriginal()); return std::make_shared(NodeVector{subgraph}, ParameterVector{data0, data1}); } @@ -61,14 +52,7 @@ std::shared_ptr ConvertOutputFunction::initOriginal() const { std::shared_ptr ConvertOutputFunction::initReference() const { auto data0 = std::make_shared(inType, input_shapes[0]); auto data1 = std::make_shared(inType, input_shapes[1]); - auto indata0 = std::make_shared(inType, data0->get_shape()); - auto indata1 = std::make_shared(inType, data1->get_shape()); - auto add = std::make_shared(indata0, indata1); - auto convert = std::make_shared(add, outType); - auto subgraph = std::make_shared(NodeVector{data0, data1}, - std::make_shared( - NodeVector{convert}, - ParameterVector{indata0, indata1})); + auto subgraph = std::make_shared(NodeVector{data0, data1}, getOriginal()); return std::make_shared(NodeVector{subgraph}, ParameterVector{data0, data1}); } @@ -83,17 +67,8 @@ std::shared_ptr ConvertStubFunction::initOriginal() const { std::shared_ptr ConvertStubFunction::initReference() const { auto data0 = std::make_shared(inType, input_shapes[0]); auto data1 = std::make_shared(inType, input_shapes[1]); - auto indata0 = std::make_shared(inType, data0->get_shape()); - auto indata1 = std::make_shared(inType, data1->get_shape()); - auto add = std::make_shared(indata0, indata1); - auto convert = std::make_shared(add, outType); - auto subgraph0 = std::make_shared( - NodeVector{data0, data1}, std::make_shared(NodeVector{convert}, ParameterVector{indata0, indata1})); - auto indata2 = std::make_shared(convert->get_destination_type(), convert->get_shape()); - auto relu = std::make_shared(indata2); - auto subgraph1 = std::make_shared( - NodeVector{subgraph0}, std::make_shared(NodeVector{relu}, ParameterVector{indata2})); - return std::make_shared(NodeVector{subgraph1}, ParameterVector{data0, data1}); + auto subgraph = std::make_shared(NodeVector{data0, data1}, getOriginal()); + return std::make_shared(NodeVector{subgraph}, ParameterVector{data0, data1}); } std::shared_ptr ConvertPartialInputsAndResultsFunction::initOriginal() const { @@ -116,12 +91,12 @@ std::shared_ptr ConvertPartialInputsAndResultsFunction::initReference auto indata0 = std::make_shared(inTypes[0], data0->get_shape()); auto indata1 = std::make_shared(inTypes[1], data1->get_shape()); auto indata2 = std::make_shared(inTypes[2], data2->get_shape()); - auto convert0 = std::make_shared(indata0, outTypes[0]); - auto convert1 = std::make_shared(indata1, outTypes[0]); + auto convert0 = std::make_shared(indata0, outTypes[0]); + auto convert1 = std::make_shared(indata1, outTypes[0]); auto add = std::make_shared(convert0, convert1); auto relu = std::make_shared(add); auto sub = std::make_shared(relu, indata2); - auto convert2 = std::make_shared(relu, outTypes[1]); + auto convert2 = std::make_shared(relu, outTypes[1]); auto subgraph = std::make_shared( NodeVector{data0, data1, data2}, std::make_shared(NodeVector{sub, convert2}, ParameterVector{indata0, indata1, indata2})); auto stub3 = createRollAsStub(subgraph); @@ -141,15 +116,7 @@ std::shared_ptr ConvertManyOnInputsFunction::initOriginal() const { } std::shared_ptr ConvertManyOnInputsFunction::initReference() const { auto data0 = std::make_shared(types[0], input_shapes[0]); - auto indata0 = std::make_shared(types[0], data0->get_shape()); - std::shared_ptr out = indata0; - for (auto i = 1; i < types.size(); i++) { - auto convert = std::make_shared(out, types[i]); - out = convert; - } - auto relu = std::make_shared(out); - auto subgraph = std::make_shared(NodeVector{data0}, - std::make_shared(NodeVector{relu}, ParameterVector{indata0})); + auto subgraph = std::make_shared(NodeVector{data0}, getOriginal()); return std::make_shared(NodeVector{subgraph}, ParameterVector{data0}); } @@ -165,15 +132,7 @@ std::shared_ptr ConvertManyOnOutputsFunction::initOriginal() const { } std::shared_ptr ConvertManyOnOutputsFunction::initReference() const { auto data0 = std::make_shared(types[0], input_shapes[0]); - auto indata0 = std::make_shared(types[0], data0->get_shape()); - auto relu = std::make_shared(indata0); - std::shared_ptr out = relu; - for (auto i = 1; i < types.size(); i++) { - auto convert = std::make_shared(out, types[i]); - out = convert; - } - auto subgraph = std::make_shared(NodeVector{data0}, - std::make_shared(NodeVector{out}, ParameterVector{indata0})); + auto subgraph = std::make_shared(NodeVector{data0}, getOriginal()); return std::make_shared(NodeVector{subgraph}, ParameterVector{data0}); } @@ -194,20 +153,7 @@ std::shared_ptr ConvertManyOnInputOutputFunction::initOriginal() cons } std::shared_ptr ConvertManyOnInputOutputFunction::initReference() const { auto data0 = std::make_shared(inTypes[0], input_shapes[0]); - auto indata0 = std::make_shared(inTypes[0], data0->get_shape()); - std::shared_ptr out = indata0; - for (auto i = 1; i < inTypes.size(); i++) { - auto convert = std::make_shared(out, inTypes[i]); - out = convert; - } - auto relu = std::make_shared(data0); - out = relu; - for (auto i = 0; i < outTypes.size(); i++) { - auto convert = std::make_shared(out, outTypes[i]); - out = convert; - } - auto subgraph = std::make_shared(NodeVector{data0}, - std::make_shared(NodeVector{out}, ParameterVector{indata0})); + auto subgraph = std::make_shared(NodeVector{data0}, getOriginal()); return std::make_shared(NodeVector{subgraph}, ParameterVector{data0}); } } // namespace snippets diff --git a/src/tests/ov_helpers/ov_snippets_models/src/subgraph_mha.cpp b/src/tests/ov_helpers/ov_snippets_models/src/subgraph_mha.cpp index c47cb754b5b891..1dbf8d7d22ed26 100644 --- a/src/tests/ov_helpers/ov_snippets_models/src/subgraph_mha.cpp +++ b/src/tests/ov_helpers/ov_snippets_models/src/subgraph_mha.cpp @@ -393,8 +393,8 @@ std::shared_ptr MHASelectFunction::initOriginal() const { // Value is equal to '1' - to avoid situation e^(-1000) / (sum(e^(-1000)) = 0/0 = NAN auto selectConst = ov::op::v0::Constant::create(precisions[2], ov::Shape{1}, std::vector{1}); - float transA = false; - float transB = false; + bool transA = false; + bool transB = false; const auto transpose0 = std::make_shared(transpose0Param, transpose0Const); const auto transpose1 = std::make_shared(transpose1Param, transpose1Const); const auto matMul0 = std::make_shared(transpose0, transpose1, transA, transB); @@ -531,8 +531,8 @@ std::shared_ptr MHAWOTransposeOnInputsFunction::initOriginal() const auto transpose3Const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape({4}), std::vector{0, 2, 1, 3}); - float transA = false; - float transB = false; + bool transA = false; + bool transB = false; const auto mulConst = ov::test::utils::make_constant(precision, ov::Shape({1})); const auto mul = std::make_shared(param1, mulConst); const auto matMul0 = std::make_shared(param0, mul, transA, transB); @@ -550,8 +550,8 @@ std::shared_ptr MHAWOTransposeFunction::initOriginal() const { auto param2 = std::make_shared(precisions[2], input_shapes[2]); ov::ParameterVector ngraphParam = {param0, param1, param2}; - float transA = false; - float transB = false; + bool transA = false; + bool transB = false; const auto matMul0 = std::make_shared(param0, param1, transA, transB); const auto softmax = std::make_shared(matMul0, -1); const auto matMul1 = std::make_shared(softmax, param2, transA, transB); @@ -615,8 +615,8 @@ std::shared_ptr MHAFQAfterMatMulFunction::initOriginal() const { static_cast(input_shapes[0].get_shape()[1])}; auto reshape1Const = ov::op::v0::Constant::create(ov::element::i64, {reshape1ConstData.size()}, reshape1ConstData); - float transA = false; - float transB = false; + bool transA = false; + bool transB = false; const auto transpose0 = std::make_shared(transpose0Param, transpose0Const); const auto transpose1 = std::make_shared(transpose1Param, transpose1Const); const auto matMul0 = std::make_shared(transpose0, transpose1, transA, transB); @@ -665,8 +665,8 @@ std::shared_ptr MHAINT8MatMulFunction::initOriginal() const { {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); auto fq2 = ov::test::utils::make_fake_quantize(transpose2Param, ov::element::f32, 256, {1}, {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); - float transA = false; - float transB = false; + bool transA = false; + bool transB = false; const auto transpose0 = std::make_shared(fq0, transpose0Const); const auto transpose1 = std::make_shared(fq1, transpose1Const); const auto matMul0 = std::make_shared(transpose0, transpose1, transA, transB); @@ -756,8 +756,8 @@ std::shared_ptr MHAFQFunction::initOriginal() const { const auto fq_add = ov::test::utils::make_fake_quantize(addParam, ov::element::f32, 256, {1}, {-1000}, {0}, {-1000}, {0}); - float transA = false; - float transB = false; + bool transA = false; + bool transB = false; const auto transpose0 = std::make_shared(fq0, transpose0Const); const auto transpose1 = std::make_shared(fq1, transpose1Const); const auto transpose2 = std::make_shared(transpose2Param, transpose2Const); @@ -806,12 +806,12 @@ std::shared_ptr MHAINT8MatMulTypeRelaxedFunction::initOriginal() cons auto reshape1Const = ov::op::v0::Constant::create(ov::element::i64, {reshape1ConstData.size()}, reshape1ConstData); const auto fq_signed_params = ov::builder::subgraph::FakeQuantizeOnData(256, {1}, {-36912.66015625}, {36624.28125}, {-128}, {127}, ov::element::i8); - const auto fq0 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(transpose0Param, ov::element::i8, fq_signed_params); - const auto fq1 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(transpose1Param, ov::element::i8, fq_signed_params); - const auto fq2 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(transpose2Param, ov::element::i8, fq_signed_params); + const auto fq0 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(transpose0Param, ov::element::f32, fq_signed_params); + const auto fq1 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(transpose1Param, ov::element::f32, fq_signed_params); + const auto fq2 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(transpose2Param, ov::element::f32, fq_signed_params); - float transA = false; - float transB = false; + bool transA = false; + bool transB = false; const auto transpose0 = std::make_shared(fq0, transpose0Const); const auto transpose1 = std::make_shared(fq1, transpose1Const); const auto matMul0 = std::make_shared>( @@ -820,7 +820,7 @@ std::shared_ptr MHAINT8MatMulTypeRelaxedFunction::initOriginal() cons ov::op::TemporaryReplaceOutputType(transpose0, element::f32).get(), ov::op::TemporaryReplaceOutputType(transpose1, element::f32).get(), transA, transB); - const auto fq3 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(matMul0, ov::element::i8, fq_signed_params); + const auto fq3 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(matMul0, ov::element::f32, fq_signed_params); const auto add = std::make_shared>( std::vector{ element::f32, element::f32 }, std::vector{ element::f32 }, @@ -833,12 +833,12 @@ std::shared_ptr MHAINT8MatMulTypeRelaxedFunction::initOriginal() cons ov::op::TemporaryReplaceOutputType(add, element::f32).get(), ov::op::TemporaryReplaceOutputType(deq, element::f32).get()); - const auto reshape0 = std::make_shared(add, reshape0Const, true); + const auto reshape0 = std::make_shared(deq_mul, reshape0Const, true); const auto softMax = std::make_shared(reshape0, 1); const auto reshape1 = std::make_shared(softMax, reshape1Const, true); const auto fq_unsigned_params = ov::builder::subgraph::FakeQuantizeOnData(256, {1}, {0}, {0.245}, {0}, {255}, ov::element::u8); - const auto fq4 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(reshape1, ov::element::u8, fq_unsigned_params); + const auto fq4 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(reshape1, ov::element::f32, fq_unsigned_params); const auto transpose2 = std::make_shared(fq2, transpose2Const); const auto matMul1 = std::make_shared>( @@ -846,7 +846,7 @@ std::shared_ptr MHAINT8MatMulTypeRelaxedFunction::initOriginal() cons std::vector{ element::f32 }, ov::op::TemporaryReplaceOutputType(fq4, element::f32).get(), ov::op::TemporaryReplaceOutputType(transpose2, element::f32).get(), transA, transB); - const auto fq5 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(matMul1, ov::element::i8, fq_signed_params); + const auto fq5 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(matMul1, ov::element::f32, fq_signed_params); const auto transpose3 = std::make_shared(fq5, transpose3Const); ov::ResultVector results{std::make_shared(transpose3)}; @@ -860,9 +860,9 @@ std::shared_ptr MHAINT8MatMulTypeRelaxedFunction::initReference() con ov::ParameterVector ngraphParams = {data0, data1, data2, data3}; const auto fq_signed_params = ov::builder::subgraph::FakeQuantizeOnData(256, {1}, {-36912.66015625}, {36624.28125}, {-128}, {127}, ov::element::i8); - const auto fq0 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(data0, ov::element::i8, fq_signed_params); - const auto fq1 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(data1, ov::element::i8, fq_signed_params); - const auto fq2 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(data3, ov::element::i8, fq_signed_params); + const auto fq0 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(data0, ov::element::f32, fq_signed_params); + const auto fq1 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(data1, ov::element::f32, fq_signed_params); + const auto fq2 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(data3, ov::element::f32, fq_signed_params); NodeVector subgraph_inputs = {fq0, fq1, data2, fq2}; auto transpose0Param = std::make_shared(precision, input_shapes[0]); @@ -877,19 +877,8 @@ std::shared_ptr MHAINT8MatMulTypeRelaxedFunction::initReference() con auto transpose2Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 1, 3}); auto transpose3Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 1, 3}); - std::vector reshape0ConstData = {static_cast(input_shapes[0].get_shape()[0] * - input_shapes[0].get_shape()[1] * input_shapes[0].get_shape()[2]), - -1}; - auto reshape0Const = ov::op::v0::Constant::create(ov::element::i64, {reshape0ConstData.size()}, reshape0ConstData); - - std::vector reshape1ConstData = {static_cast(input_shapes[0].get_shape()[0]), - static_cast(input_shapes[0].get_shape()[2]), - static_cast(input_shapes[0].get_shape()[1]), - static_cast(input_shapes[0].get_shape()[1])}; - auto reshape1Const = ov::op::v0::Constant::create(ov::element::i64, {reshape1ConstData.size()}, reshape1ConstData); - - float transA = false; - float transB = false; + bool transA = false; + bool transB = false; const auto transpose0 = std::make_shared(transpose0Param, transpose0Const); const auto transpose1 = std::make_shared(transpose1Param, transpose1Const); const auto matMul0 = std::make_shared>( @@ -898,7 +887,18 @@ std::shared_ptr MHAINT8MatMulTypeRelaxedFunction::initReference() con ov::op::TemporaryReplaceOutputType(transpose0, element::f32).get(), ov::op::TemporaryReplaceOutputType(transpose1, element::f32).get(), transA, transB); - const auto fq3 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(matMul0, ov::element::i8, fq_signed_params); + auto decomposed_fq = + [](const ov::Output& input, const ov::element::Type& out_precision, float il, float ih, float scale) { + const auto input_low = ov::op::v0::Constant::create(ov::element::f32, {1}, {il}); + const auto input_high = ov::op::v0::Constant::create(ov::element::f32, {1}, {ih}); + const auto output_scale = ov::op::v0::Constant::create(ov::element::f32, {1}, {scale}); + const auto max = std::make_shared(input, input_low); + const auto min = std::make_shared(max, input_high); + const auto mul = std::make_shared(min, output_scale); + return std::make_shared(mul, out_precision); + }; + + const auto fq3 = decomposed_fq(matMul0, ov::element::i8, fq_signed_params.inputLowValues[0], fq_signed_params.inputHighValues[0], 0.00346764503f); const auto add = std::make_shared>( std::vector{ element::f32, element::f32 }, std::vector{ element::f32 }, @@ -911,12 +911,8 @@ std::shared_ptr MHAINT8MatMulTypeRelaxedFunction::initReference() con ov::op::TemporaryReplaceOutputType(add, element::f32).get(), ov::op::TemporaryReplaceOutputType(deq, element::f32).get()); - const auto reshape0 = std::make_shared(add, reshape0Const, true); - const auto softMax = std::make_shared(reshape0, 1); - const auto reshape1 = std::make_shared(softMax, reshape1Const, true); - - const auto fq_unsigned_params = ov::builder::subgraph::FakeQuantizeOnData(256, {1}, {0}, {0.245}, {0}, {255}, ov::element::u8); - const auto fq4 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(reshape1, ov::element::u8, fq_unsigned_params); + const auto softMax = std::make_shared(deq_mul, 3); + const auto fq4 = decomposed_fq(softMax, ov::element::u8, 0.f, 0.245f, 1040.81628f); const auto transpose2 = std::make_shared(transpose2Param, transpose2Const); const auto matMul1 = std::make_shared>( @@ -924,7 +920,7 @@ std::shared_ptr MHAINT8MatMulTypeRelaxedFunction::initReference() con std::vector{ element::f32 }, ov::op::TemporaryReplaceOutputType(fq4, element::f32).get(), ov::op::TemporaryReplaceOutputType(transpose2, element::f32).get(), transA, transB); - const auto fq5 = ov::builder::subgraph::makeFakeQuantizeTypeRelaxed(matMul1, ov::element::i8, fq_signed_params); + const auto fq5 = decomposed_fq(matMul1, ov::element::i8, fq_signed_params.inputLowValues[0], fq_signed_params.inputHighValues[0], 0.00346764503f); auto subgraph = std::make_shared(subgraph_inputs, std::make_shared(NodeVector{fq5}, subgraph_params)); @@ -946,8 +942,8 @@ std::shared_ptr MHAMulAddFunction::initOriginal() const { auto transpose2Const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{input_shapes[2].size()}, std::vector{0, 2, 1, 3}); auto transpose3Const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{input_shapes[2].size()}, std::vector{0, 2, 1, 3}); - float transA = false; - float transB = false; + bool transA = false; + bool transB = false; const auto transpose0 = std::make_shared(transpose0Param, transpose0Const); const auto transpose1 = std::make_shared(transpose1Param, transpose1Const); const auto matMul0 = std::make_shared(transpose0, transpose1, transA, transB); diff --git a/src/tests/ov_helpers/ov_snippets_models/src/subgraph_simple.cpp b/src/tests/ov_helpers/ov_snippets_models/src/subgraph_simple.cpp index 67a1382b4f658a..12758a90c07652 100644 --- a/src/tests/ov_helpers/ov_snippets_models/src/subgraph_simple.cpp +++ b/src/tests/ov_helpers/ov_snippets_models/src/subgraph_simple.cpp @@ -20,11 +20,7 @@ std::shared_ptr AddFunction::initOriginal() const { std::shared_ptr AddFunction::initReference() const { auto data0 = std::make_shared(precision, input_shapes[0]); auto data1 = std::make_shared(precision, input_shapes[1]); - auto indata0 = std::make_shared(precision, data0->get_shape()); - auto indata1 = std::make_shared(precision, data1->get_shape()); - auto add = std::make_shared(NodeVector{data0, data1}, - std::make_shared(NodeVector{std::make_shared(indata0, indata1)}, - ParameterVector{indata0, indata1})); + auto add = std::make_shared(NodeVector{data0, data1}, getOriginal()); return std::make_shared(NodeVector{add}, ParameterVector{data0, data1}); } std::shared_ptr ExpFunction::initOriginal() const { @@ -78,7 +74,7 @@ std::shared_ptr EltwiseFunction::initReference() const { auto indata1 = std::make_shared(precision, data1->get_shape()); auto indata2 = std::make_shared(precision, data1->get_shape()); auto add = std::make_shared(indata0, indata1); - auto sub = std::make_shared(add, const_data); + auto sub = std::make_shared(add, indata2); auto mul = std::make_shared(NodeVector{data0, data1, const_data}, std::make_shared(NodeVector{std::make_shared(add, sub)}, ParameterVector{indata0, indata1, indata2})); @@ -151,10 +147,10 @@ std::shared_ptr MatMulEltwiseBranchesFunction::initReference() const const std::vector const_values = ov::test::utils::generate_float_numbers(4, -10., 10.); // snippet inputs auto non_snippet_op = std::make_shared(sinh_1, sinh_2); - auto mul_const_1 = std::make_shared(precision, Shape{1}, const_values[0]); - auto add_const_1 = std::make_shared(precision, Shape{1}, const_values[1]); - auto mul_const_2 = std::make_shared(precision, Shape{1}, const_values[2]); - auto sub_const_2 = std::make_shared(precision, Shape{1}, const_values[3]); + auto mul_const_1 = std::make_shared(precision, Shape{1}, const_values[0]); + auto add_const_1 = std::make_shared(precision, Shape{1}, const_values[1]); + auto mul_const_2 = std::make_shared(precision, Shape{1}, const_values[2]); + auto sub_const_2 = std::make_shared(precision, Shape{1}, const_values[3]); // snippet function Shape matMulOutShape = input_shapes[0].get_shape();