[GPU] Enable fc 4d for MatMul (openvinotoolkit#24642)

### Details: - *Enable fc 4d for MatMul to calculate 4x2* ### Tickets: - *132334*
xufang-lisa · Aug 21, 2024 · 1335be0 · 1335be0
1 parent 711f060
commit 1335be0
Show file tree

Hide file tree

Showing 2 changed files with 40 additions and 1 deletion.
diff --git a/src/plugins/intel_gpu/src/graph/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/fully_connected.cpp
@@ -117,7 +117,7 @@ layout fully_connected_inst::calc_output_layout(fully_connected_node const& node
         feature = std::max({input_layout.spatial(0), input_layout.spatial(1), input_layout.spatial(2)});
     }
 
-    if (desc->input_size > 3) {
+    if (desc->input_size > 4) {
        input_layout.set_partial_shape(reshape_to_2d(input_pshape, feature));
     }
     if (weights_pshape.size() != 2) {
@@ -127,6 +127,8 @@ layout fully_connected_inst::calc_output_layout(fully_connected_node const& node
     auto output_size = tensor(input_layout.batch(), weights_layout.batch(), 1, 1);
     if (desc->input_size == 3) {
         output_size = tensor(input_layout.batch(), input_layout.feature(), 1, weights_layout.batch());
+    } else if (desc->input_size == 4) {
+        output_size = tensor(input_layout.batch(), input_layout.feature(), weights_layout.batch(), input_layout.spatial(1));
     }
     format output_format = get_preferred_format(node, impl_param);
 

diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp
@@ -371,6 +371,43 @@ TEST(fully_connected_gpu, no_biases_fc_i32) {
     }
 }
 
+TEST(fully_connected_gpu, no_biases_4d_input) {
+    //  Input  : 1x256x256x384
+    //  Output : 1x256x256x1536
+    //  Weights: 1536x384x1x1
+
+    const int32_t input_b = 1, input_f = 256, input_y = 256, input_x = 384,     // size of the whole input buffer
+                  weight_b = 1536, weight_f = 384, weight_y = 1, weight_x = 1;  // size of the whole weights buffer
+
+    auto& engine = get_test_engine();
+
+    auto input_prim = engine.allocate_memory({ data_types::f32, format::bfyx, { input_b, input_f, input_x, input_y } });
+    auto weights_prim = engine.allocate_memory({ data_types::f32, format::bfyx, { weight_b, weight_f, weight_x, weight_y } });
+
+    std::vector<float> input_data(input_b * input_f * input_y * input_x, 0);
+    std::vector<float> weights_data(weight_b * weight_f * weight_y * weight_x, 0);
+
+    set_values(input_prim, std::move(input_data));
+    set_values(weights_prim, std::move(weights_data));
+
+    auto input = input_layout("input", input_prim->get_layout());
+    auto w_data = data("weights", weights_prim);
+    auto fc = fully_connected("fc_prim", input_info("input"), "weights", "", 4, 2);
+    topology topology;
+    topology.add(input);
+    topology.add(w_data);
+    topology.add(fc);
+
+    network network(engine, topology, get_test_default_config(engine));
+    network.set_input_data("input", input_prim);
+
+    auto outputs = network.execute();
+    ASSERT_EQ(outputs.begin()->second.get_layout().batch(), input_b);
+    ASSERT_EQ(outputs.begin()->second.get_layout().feature(), input_f);
+    ASSERT_EQ(outputs.begin()->second.get_layout().spatial(1), input_y);
+    ASSERT_EQ(outputs.begin()->second.get_layout().spatial(0), weight_b);
+}
+
 TEST(fully_connected_gpu, xb_f32_batch_1) {
     //  Input  : 3x1
     //  Output : 4x1