From f092cf2725704cc91641cb7c0e3d122e2f75fb5a Mon Sep 17 00:00:00 2001 From: Lyamin-Roman Date: Fri, 13 Dec 2024 03:46:23 +0900 Subject: [PATCH] [GPU] Review fixes 2 --- .../lora_horizontal_fusion.cpp | 38 ++- .../lora_horizontal_fusion.hpp | 69 +++++ .../lora_horizontal_fusion.cpp | 264 +++++++++++++++++- 3 files changed, 351 insertions(+), 20 deletions(-) diff --git a/src/plugins/intel_gpu/src/plugin/transformations/lora_horizontal_fusion.cpp b/src/plugins/intel_gpu/src/plugin/transformations/lora_horizontal_fusion.cpp index 226eb155652c10..d9059e63338876 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/lora_horizontal_fusion.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/lora_horizontal_fusion.cpp @@ -21,15 +21,23 @@ LoRAHorizontalFusion::LoRAHorizontalFusion() { auto is_lora_pattern = [](const std::shared_ptr& node) { #define check(node) if (!node) return false; - const auto& add = std::dynamic_pointer_cast(node); check(add) - const auto& matmul2 = std::dynamic_pointer_cast(add->get_input_node_shared_ptr(0)) ? - std::dynamic_pointer_cast(add->get_input_node_shared_ptr(0)) : - std::dynamic_pointer_cast(add->get_input_node_shared_ptr(1)); check(matmul2) - const auto& multiply = std::dynamic_pointer_cast(matmul2->get_input_node_shared_ptr(0)); check(multiply) - const auto& variable_b = std::dynamic_pointer_cast(matmul2->get_input_node_shared_ptr(1)); check(variable_b) - const auto& matmul1 = std::dynamic_pointer_cast(multiply->get_input_node_shared_ptr(0)); check(matmul1) - const auto& variable_alpha = std::dynamic_pointer_cast(multiply->get_input_node_shared_ptr(1)); check(variable_alpha) - const auto& variable_a = std::dynamic_pointer_cast(matmul1->get_input_node_shared_ptr(1)); check(variable_a) + const auto& add = std::dynamic_pointer_cast(node); check(add) + + size_t matmul2_idx = ov::is_type(add->get_input_node_shared_ptr(0)) ? 0 : 1; + const auto& matmul2 = std::dynamic_pointer_cast(add->get_input_node_shared_ptr(matmul2_idx)); check(matmul2) + + const auto& multiply = std::dynamic_pointer_cast(matmul2->get_input_node_shared_ptr(0)); check(multiply) + + const auto& variable_b = std::dynamic_pointer_cast(matmul2->get_input_node_shared_ptr(1)); check(variable_b) + + size_t matmul1_idx = ov::is_type(multiply->get_input_node_shared_ptr(0)) ? 0 : 1; + const auto& matmul1 = std::dynamic_pointer_cast(multiply->get_input_node_shared_ptr(matmul1_idx)); check(matmul1) + + size_t alpha_idx = (matmul1_idx + 1) % 2; + const auto& variable_alpha = + std::dynamic_pointer_cast(multiply->get_input_node_shared_ptr(alpha_idx)); check(variable_alpha) + + const auto& variable_a = std::dynamic_pointer_cast(matmul1->get_input_node_shared_ptr(1)); check(variable_a) #undef check return true; @@ -68,17 +76,19 @@ LoRAHorizontalFusion::LoRAHorizontalFusion() { for (const auto& add : split->get_users()) { add_nodes.emplace_back(add); - bool first_input_matmul = std::dynamic_pointer_cast(add->get_input_node_shared_ptr(0)) != nullptr; - matmul2_nodes.emplace_back(first_input_matmul ? add->get_input_node_shared_ptr(0) - : add->get_input_node_shared_ptr(1)); + size_t matmul2_idx = ov::is_type(add->get_input_node_shared_ptr(0)) ? 0 : 1; + matmul2_nodes.emplace_back(add->get_input_node_shared_ptr(matmul2_idx)); } for (const auto& matmul2 : matmul2_nodes) { multiply_nodes.emplace_back(matmul2->get_input_node_shared_ptr(0)); variable_b_nodes.emplace_back(matmul2->get_input_node_shared_ptr(1)); } for (const auto& multiply : multiply_nodes) { - matmul1_nodes.emplace_back(multiply->get_input_node_shared_ptr(0)); - variable_alpha_nodes.emplace_back(multiply->get_input_node_shared_ptr(1)); + size_t matmul1_idx = ov::is_type(multiply->get_input_node_shared_ptr(0)) ? 0 : 1; + matmul1_nodes.emplace_back(multiply->get_input_node_shared_ptr(matmul1_idx)); + + size_t alpha_idx = (matmul1_idx + 1) % 2; + variable_alpha_nodes.emplace_back(multiply->get_input_node_shared_ptr(alpha_idx)); } for (const auto& matmul1 : matmul1_nodes) { variable_a_nodes.emplace_back(matmul1->get_input_node_shared_ptr(1)); diff --git a/src/plugins/intel_gpu/src/plugin/transformations/lora_horizontal_fusion.hpp b/src/plugins/intel_gpu/src/plugin/transformations/lora_horizontal_fusion.hpp index 424f5a58bffb6b..631028d68baa7a 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/lora_horizontal_fusion.hpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/lora_horizontal_fusion.hpp @@ -9,6 +9,75 @@ namespace ov { namespace intel_gpu { +// Before: +// ┌─────────┐ ┌─────────┐ +// │ReadValue│ │ReadValue│ +// └────┬────┘ └────┬────┘ +// │ ┌───────────┐ │ +// │ ┌───────────────────────┼ LoraInput ┼───────────────────┐ │ +// │ │ └─────┬─────┘ │ │ +// │ ┌────▼───┐ │ ┌────▼───┐ │ +// └────► Gemm │ │ │ Gemm ◄──────┘ +// ┌─────────┐ └────┬───┘ │ └────┬───┘ ┌─────────┐ +// │ReadValue│ │ │ │ │ReadValue│ +// └────┬────┘ │ ┌───────────▼────────────┐ │ └────┬────┘ +// │ ┌────▼───┐ │FullyConnectedCompressed│ ┌────▼───┐ │ +// └─────────────►Multiply│ └───────────┬────────────┘ │Multiply◄────────────┘ +// └────┬───┘ │ └────────┘ +// ┌─────────┐ │ │ │ ┌─────────┐ +// │ReadValue│ │ │ │ │ReadValue│ +// └────┬────┘ │ │ │ └────┬────┘ +// │ ┌────▼───┐ ┌──────▼──────┐ ┌────▼───┐ │ +// └─────────────► Gemm │ ┌───────────┼VariadicSplit┼──────────┐ │ Gemm ◄────────────────┘ +// └────┬───┘ │ └──────┬──────┘ │ └────┬───┘ +// │ │ │ │ │ +// │ │ │ │ │ +// │ │ │ │ │ +// │ ┌──▼──┐ ▼ ┌──▼──┐ │ +// └───────► Add │ ... │ Add ◄────┘ +// └─────┘ └─────┘ +// After: +// ┌─────────┐ +// ┌────┼ReadValue│ +// ┌──────────┐ ┌──────┐ │ └─────────┘ +// │LoRA_Input┼────────────────────────────┐ ┌─────────────┼Concat◄─────┤ ... +// └────┬─────┘ │ │ └──────┘ │ ┌─────────┐ +// │ │ │ └────┼ReadValue│ +// │ │ │ └─────────┘ +// │ ┌────▼──▼───┐ +// │ │MatMulFused│ +// │ └───────────┘ +// │ │ ┌─────────┐ +// │ │ ┌────┼ReadValue│ +// │ │ ┌──────┐ │ └─────────┘ +// │ │ ┌────────┼Concat◄─────┤ ... +// │ │ │ └──────┘ │ ┌─────────┐ +// │ │ │ └────┼ReadValue│ +// ┌───────────▼────────────┐ ┌───▼──────▼──┐ └─────────┘ +// │FullyConnectedCompressed│ │MultiplyFused│ +// └───────────┬────────────┘ └──────┬──────┘ +// │ │ +// │ ┌─────────┐ │ ┌─────────┐ +// │ │ReadValue│ ┌──▼──┐ │ReadValue│ +// │ └────┬────┘ │Split│ └────┬────┘ +// │ │ └──┬──┘ │ +// │ │ │ │ +// │ │ ┌────────┼────────┐ │ +// │ │ │ │ │ +// │ ┌──▼──▼──┐ ┌──▼──▼──┐ +// │ │ MatMul │ ... │ MatMul │ +// │ └────┬───┘ └────┬───┘ +// │ └──────┐ ┌────────┘ +// │ │ │ +// │ ┌─────┐ ┌─▼────▼─┐ +// └─────────────► Add ◄─────────────┼ Concat │ +// └──┬──┘ └────────┘ +// │ +// │ +// ┌──────▼──────┐ +// │VariadicSplit│ +// └─────────────┘ + class LoRAHorizontalFusion: public ov::pass::MatcherPass { public: OPENVINO_RTTI("LoRAHorizontalFusion", "0"); diff --git a/src/plugins/intel_gpu/tests/unit/transformations/lora_horizontal_fusion.cpp b/src/plugins/intel_gpu/tests/unit/transformations/lora_horizontal_fusion.cpp index 1692ba3a40a1e8..38d2365c9e0545 100644 --- a/src/plugins/intel_gpu/tests/unit/transformations/lora_horizontal_fusion.cpp +++ b/src/plugins/intel_gpu/tests/unit/transformations/lora_horizontal_fusion.cpp @@ -59,7 +59,7 @@ TEST_F(TransformationTestsF, LoRAHorizontalFusion_default) { auto matmul2_1 = std::make_shared(multiply_1, read_value_b_1, false, true); auto add_1 = std::make_shared(split->output(1), matmul2_1); - auto variable_a_2 = std::make_shared( + auto variable_a_2 = std::make_shared( ov::op::util::VariableInfo{ov::PartialShape({-1, 2048}), model_dt, "var_a_2"}); auto variable_alpha_2 = std::make_shared( ov::op::util::VariableInfo{ov::PartialShape({1, -1}), model_dt, "var_alpha_2"}); @@ -165,7 +165,7 @@ TEST_F(TransformationTestsF, LoRAHorizontalFusion_default) { } } -TEST_F(TransformationTestsF, LoRAHorizontalFusion_swap_add_inputs) { +TEST_F(TransformationTestsF, LoRAHorizontalFusion_swap_add_and_multiply_inputs) { ov::element::Type model_dt = ov::element::f16; { auto lora_input = std::make_shared(model_dt, ov::PartialShape{-1, -1, 2048}); @@ -188,7 +188,7 @@ TEST_F(TransformationTestsF, LoRAHorizontalFusion_swap_add_inputs) { auto read_value_alpha_0 = std::make_shared(variable_alpha_0); auto read_value_b_0 = std::make_shared(variable_b_0); auto matmul1_0 = std::make_shared(lora_input, read_value_a_0, false, true); - auto multiply_0 = std::make_shared(matmul1_0, read_value_alpha_0); + auto multiply_0 = std::make_shared(read_value_alpha_0, matmul1_0); auto matmul2_0 = std::make_shared(multiply_0, read_value_b_0, false, true); auto add_0 = std::make_shared(matmul2_0, split->output(0)); @@ -202,11 +202,11 @@ TEST_F(TransformationTestsF, LoRAHorizontalFusion_swap_add_inputs) { auto read_value_alpha_1 = std::make_shared(variable_alpha_1); auto read_value_b_1 = std::make_shared(variable_b_1); auto matmul1_1 = std::make_shared(lora_input, read_value_a_1, false, true); - auto multiply_1 = std::make_shared(matmul1_1, read_value_alpha_1); + auto multiply_1 = std::make_shared(read_value_alpha_1, matmul1_1); auto matmul2_1 = std::make_shared(multiply_1, read_value_b_1, false, true); auto add_1 = std::make_shared(matmul2_1, split->output(1)); - auto variable_a_2 = std::make_shared( + auto variable_a_2 = std::make_shared( ov::op::util::VariableInfo{ov::PartialShape({-1, 2048}), model_dt, "var_a_2"}); auto variable_alpha_2 = std::make_shared( ov::op::util::VariableInfo{ov::PartialShape({1, -1}), model_dt, "var_alpha_2"}); @@ -216,7 +216,7 @@ TEST_F(TransformationTestsF, LoRAHorizontalFusion_swap_add_inputs) { auto read_value_alpha_2 = std::make_shared(variable_alpha_2); auto read_value_b_2 = std::make_shared(variable_b_2); auto matmul1_2 = std::make_shared(lora_input, read_value_a_2, false, true); - auto multiply_2 = std::make_shared(matmul1_2, read_value_alpha_2); + auto multiply_2 = std::make_shared(read_value_alpha_2, matmul1_2); auto matmul2_2 = std::make_shared(multiply_2, read_value_b_2, false, true); auto add_2 = std::make_shared(matmul2_2, split->output(2)); @@ -312,6 +312,258 @@ TEST_F(TransformationTestsF, LoRAHorizontalFusion_swap_add_inputs) { } } +TEST_F(TransformationTestsF, LoRAHorizontalFusion_split_two_outputs) { + ov::element::Type model_dt = ov::element::f16; + { + auto lora_input = std::make_shared(model_dt, ov::PartialShape{-1, -1, 2048}); + auto weights = std::make_shared(ov::element::u8, ov::Shape{2304, 2048}); + auto bias = std::make_shared(); + auto scale = std::make_shared(model_dt, ov::Shape{2304, 1}); + auto fc_fused = std::make_shared(lora_input, weights, bias, scale); + + auto axis_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {2}); + auto split_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{2}, {2048, 256}); + auto split = std::make_shared(fc_fused, axis_const, split_const); + + auto variable_a_0 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({-1, 2048}), model_dt, "var_a_0"}); + auto variable_alpha_0 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({1, -1}), model_dt, "var_alpha_0"}); + auto variable_b_0 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({2048, -1}), model_dt, "var_b_0"}); + auto read_value_a_0 = std::make_shared(variable_a_0); + auto read_value_alpha_0 = std::make_shared(variable_alpha_0); + auto read_value_b_0 = std::make_shared(variable_b_0); + auto matmul1_0 = std::make_shared(lora_input, read_value_a_0, false, true); + auto multiply_0 = std::make_shared(matmul1_0, read_value_alpha_0); + auto matmul2_0 = std::make_shared(multiply_0, read_value_b_0, false, true); + auto add_0 = std::make_shared(split->output(0), matmul2_0); + + auto variable_a_1 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({-1, 2048}), model_dt, "var_a_1"}); + auto variable_alpha_1 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({1, -1}), model_dt, "var_alpha_1"}); + auto variable_b_1 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({256, -1}), model_dt, "var_b_1"}); + auto read_value_a_1 = std::make_shared(variable_a_1); + auto read_value_alpha_1 = std::make_shared(variable_alpha_1); + auto read_value_b_1 = std::make_shared(variable_b_1); + auto matmul1_1 = std::make_shared(lora_input, read_value_a_1, false, true); + auto multiply_1 = std::make_shared(matmul1_1, read_value_alpha_1); + auto matmul2_1 = std::make_shared(multiply_1, read_value_b_1, false, true); + auto add_1 = std::make_shared(split->output(1), matmul2_1); + + auto reshape_pattern0 = std::make_shared(ov::element::i64, ov::Shape{4}, std::vector{0, 0, 32, 64}); + auto reshape_pattern1 = std::make_shared(ov::element::i64, ov::Shape{4}, std::vector{0, 0, 4, 64}); + auto reshape0 = std::make_shared(add_0, reshape_pattern0, true); + auto reshape1 = std::make_shared(add_1, reshape_pattern1, true); + + auto result0 = std::make_shared(reshape0); + auto result1 = std::make_shared(reshape1); + + model = std::make_shared(ov::NodeVector{result0, result1}, ov::ParameterVector{lora_input}); + manager.register_pass(); + } + + { + auto lora_input = std::make_shared(model_dt, ov::PartialShape{-1, -1, 2048}); + auto weights = std::make_shared(ov::element::u8, ov::Shape{2304, 2048}); + auto bias = std::make_shared(); + auto scale = std::make_shared(model_dt, ov::Shape{2304, 1}); + auto fc_fused = std::make_shared(lora_input, weights, bias, scale); + + auto variable_a_0 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({-1, 2048}), model_dt, "var_a_0"}); + auto variable_a_1 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({-1, 2048}), model_dt, "var_a_1"}); + + auto read_value_a_0 = std::make_shared(variable_a_0); + auto read_value_a_1 = std::make_shared(variable_a_1); + auto concat_variable_a = std::make_shared(NodeVector{read_value_a_0, read_value_a_1}, 0); + + auto fused_matmul1 = std::make_shared(lora_input, concat_variable_a, false, true); + + auto variable_alpha_0 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({1, -1}), model_dt, "var_alpha_0"}); + auto variable_alpha_1 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({1, -1}), model_dt, "var_alpha_1"}); + + auto read_value_alpha_0 = std::make_shared(variable_alpha_0); + auto read_value_alpha_1 = std::make_shared(variable_alpha_1); + auto concat_variable_alpha = std::make_shared(NodeVector{read_value_alpha_0, read_value_alpha_1}, 1); + + auto multiply = std::make_shared(fused_matmul1, concat_variable_alpha); + + auto split_axis = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{}, {2}); + auto split = std::make_shared(multiply, split_axis, 2); + + auto variable_b_0 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({2048, -1}), model_dt, "var_b_0"}); + auto variable_b_1 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({256, -1}), model_dt, "var_b_1"}); + + auto read_value_b_0 = std::make_shared(variable_b_0); + auto read_value_b_1 = std::make_shared(variable_b_1); + + auto matmul2_0 = std::make_shared(split->output(0), read_value_b_0, false, true); + auto matmul2_1 = std::make_shared(split->output(1), read_value_b_1, false, true); + + auto concat_matmul2 = std::make_shared(NodeVector{matmul2_0, matmul2_1}, 2); + + auto add = std::make_shared(fc_fused, concat_matmul2); + + auto axis_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {2}); + auto split_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{2}, {2048, 256}); + auto var_split = std::make_shared(add, axis_const, split_const); + + auto reshape_pattern0 = std::make_shared(ov::element::i64, ov::Shape{4}, std::vector{0, 0, 32, 64}); + auto reshape_pattern1 = std::make_shared(ov::element::i64, ov::Shape{4}, std::vector{0, 0, 4, 64}); + auto reshape0 = std::make_shared(var_split->output(0), reshape_pattern0, true); + auto reshape1 = std::make_shared(var_split->output(1), reshape_pattern1, true); + + auto result0 = std::make_shared(reshape0); + auto result1 = std::make_shared(reshape1); + + model_ref = std::make_shared(ov::NodeVector{result0, result1}, ov::ParameterVector{lora_input}); + comparator.enable(FunctionsComparator::ATTRIBUTES); + } +} + +TEST_F(TransformationTestsF, LoRAHorizontalFusion_multiple_split_output_users) { + ov::element::Type model_dt = ov::element::f16; + { + auto lora_input = std::make_shared(model_dt, ov::PartialShape{-1, -1, 2048}); + auto weights = std::make_shared(ov::element::u8, ov::Shape{2304, 2048}); + auto bias = std::make_shared(); + auto scale = std::make_shared(model_dt, ov::Shape{2304, 1}); + auto fc_fused = std::make_shared(lora_input, weights, bias, scale); + + auto axis_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {2}); + auto split_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{2}, {2048, 256}); + auto split = std::make_shared(fc_fused, axis_const, split_const); + + auto variable_a_0 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({-1, 2048}), model_dt, "var_a_0"}); + auto variable_alpha_0 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({1, -1}), model_dt, "var_alpha_0"}); + auto variable_b_0 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({2048, -1}), model_dt, "var_b_0"}); + auto read_value_a_0 = std::make_shared(variable_a_0); + auto read_value_alpha_0 = std::make_shared(variable_alpha_0); + auto read_value_b_0 = std::make_shared(variable_b_0); + auto matmul1_0 = std::make_shared(lora_input, read_value_a_0, false, true); + auto multiply_0 = std::make_shared(matmul1_0, read_value_alpha_0); + auto matmul2_0 = std::make_shared(multiply_0, read_value_b_0, false, true); + auto add_0 = std::make_shared(split->output(0), matmul2_0); + + auto variable_a_1 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({-1, 2048}), model_dt, "var_a_1"}); + auto variable_alpha_1 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({1, -1}), model_dt, "var_alpha_1"}); + auto variable_b_1 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({256, -1}), model_dt, "var_b_1"}); + auto read_value_a_1 = std::make_shared(variable_a_1); + auto read_value_alpha_1 = std::make_shared(variable_alpha_1); + auto read_value_b_1 = std::make_shared(variable_b_1); + auto matmul1_1 = std::make_shared(lora_input, read_value_a_1, false, true); + auto multiply_1 = std::make_shared(matmul1_1, read_value_alpha_1); + auto matmul2_1 = std::make_shared(multiply_1, read_value_b_1, false, true); + auto add_1 = std::make_shared(split->output(1), matmul2_1); + + auto reshape_pattern0 = std::make_shared(ov::element::i64, ov::Shape{4}, std::vector{0, 0, 32, 64}); + auto reshape_pattern1 = std::make_shared(ov::element::i64, ov::Shape{4}, std::vector{0, 0, 4, 64}); + auto reshape0 = std::make_shared(add_0, reshape_pattern0, true); + auto reshape1 = std::make_shared(add_1, reshape_pattern1, true); + + auto shape_of0 = std::make_shared(add_0); + auto shape_of1 = std::make_shared(add_0); + auto shape_of2 = std::make_shared(add_1); + auto shape_of3 = std::make_shared(add_1); + + auto result0 = std::make_shared(reshape0); + auto result1 = std::make_shared(reshape1); + auto result2 = std::make_shared(shape_of0); + auto result3 = std::make_shared(shape_of1); + auto result4 = std::make_shared(shape_of2); + auto result5 = std::make_shared(shape_of3); + + model = std::make_shared(ov::NodeVector{result0, result1, result2, result3, result4, result5}, ov::ParameterVector{lora_input}); + manager.register_pass(); + } + + { + auto lora_input = std::make_shared(model_dt, ov::PartialShape{-1, -1, 2048}); + auto weights = std::make_shared(ov::element::u8, ov::Shape{2304, 2048}); + auto bias = std::make_shared(); + auto scale = std::make_shared(model_dt, ov::Shape{2304, 1}); + auto fc_fused = std::make_shared(lora_input, weights, bias, scale); + + auto variable_a_0 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({-1, 2048}), model_dt, "var_a_0"}); + auto variable_a_1 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({-1, 2048}), model_dt, "var_a_1"}); + + auto read_value_a_0 = std::make_shared(variable_a_0); + auto read_value_a_1 = std::make_shared(variable_a_1); + auto concat_variable_a = std::make_shared(NodeVector{read_value_a_0, read_value_a_1}, 0); + + auto fused_matmul1 = std::make_shared(lora_input, concat_variable_a, false, true); + + auto variable_alpha_0 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({1, -1}), model_dt, "var_alpha_0"}); + auto variable_alpha_1 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({1, -1}), model_dt, "var_alpha_1"}); + + auto read_value_alpha_0 = std::make_shared(variable_alpha_0); + auto read_value_alpha_1 = std::make_shared(variable_alpha_1); + auto concat_variable_alpha = std::make_shared(NodeVector{read_value_alpha_0, read_value_alpha_1}, 1); + + auto multiply = std::make_shared(fused_matmul1, concat_variable_alpha); + + auto split_axis = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{}, {2}); + auto split = std::make_shared(multiply, split_axis, 2); + + auto variable_b_0 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({2048, -1}), model_dt, "var_b_0"}); + auto variable_b_1 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({256, -1}), model_dt, "var_b_1"}); + + auto read_value_b_0 = std::make_shared(variable_b_0); + auto read_value_b_1 = std::make_shared(variable_b_1); + + auto matmul2_0 = std::make_shared(split->output(0), read_value_b_0, false, true); + auto matmul2_1 = std::make_shared(split->output(1), read_value_b_1, false, true); + + auto concat_matmul2 = std::make_shared(NodeVector{matmul2_0, matmul2_1}, 2); + + auto add = std::make_shared(fc_fused, concat_matmul2); + + auto axis_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {2}); + auto split_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{2}, {2048, 256}); + auto var_split = std::make_shared(add, axis_const, split_const); + + auto reshape_pattern0 = std::make_shared(ov::element::i64, ov::Shape{4}, std::vector{0, 0, 32, 64}); + auto reshape_pattern1 = std::make_shared(ov::element::i64, ov::Shape{4}, std::vector{0, 0, 4, 64}); + auto reshape0 = std::make_shared(var_split->output(0), reshape_pattern0, true); + auto reshape1 = std::make_shared(var_split->output(1), reshape_pattern1, true); + + auto shape_of0 = std::make_shared(var_split->output(0)); + auto shape_of1 = std::make_shared(var_split->output(0)); + auto shape_of2 = std::make_shared(var_split->output(1)); + auto shape_of3 = std::make_shared(var_split->output(1)); + + auto result0 = std::make_shared(reshape0); + auto result1 = std::make_shared(reshape1); + auto result2 = std::make_shared(shape_of0); + auto result3 = std::make_shared(shape_of1); + auto result4 = std::make_shared(shape_of2); + auto result5 = std::make_shared(shape_of3); + + model_ref = std::make_shared(ov::NodeVector{result0, result1, result2, result3, result4, result5}, ov::ParameterVector{lora_input}); + comparator.enable(FunctionsComparator::ATTRIBUTES); + } +} + } // namespace intel_gpu } // namespace test } // namespace ov