Skip to content

Commit

Permalink
[GPU] Skip reorder opt when its dependency is crop (#27547)
Browse files Browse the repository at this point in the history
### Details:
 - *Skip reorder opt when its dependency is crop*

### Tickets:
 - *155068*
  • Loading branch information
steve-y authored Dec 21, 2024
1 parent 574c1a2 commit 6e02445
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,12 @@ void remove_redundant_reorders::run(program& p) {
auto o_layout = r_node.get_output_layout();
const auto& i_layout = r_node.get_input_layout(0);

auto is_r_node_rank_changed = r_node.get_output_layout().get_rank() != r_node.get_dependency(0).get_output_layout().get_rank();
if (is_r_node_rank_changed &&
((!update_implementations && r_node.get_dependency(0).is_type<crop>()) ||
(r_node.get_dependency(0).is_type<crop>() && r_node.get_dependency(0).can_be_optimized())))
continue;

// Optimize reorder b_fs_yx_fsv16 -> bfyx when spatials are equal to 1. In this case we can reinterpret buffer,
// but pads need to be handled correctly.
if (i_layout.format == format::b_fs_yx_fsv16 && o_layout.format == format::bfyx && !r_node.is_output() &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,9 @@ TEST(add_required_reorders, skip_adding_reorder_batch_axis_padding) {
crop_prim = network.get_primitive("crop2");
ASSERT_EQ(crop_prim->can_be_optimized(), true);
auto reorder_prim = network.get_primitive("crop1_reorder");
ASSERT_EQ(reorder_prim->can_be_optimized(), true);
ASSERT_EQ(reorder_prim->can_be_optimized(), false);
reorder_prim = network.get_primitive("crop2_reorder");
ASSERT_EQ(reorder_prim->can_be_optimized(), true);
ASSERT_EQ(reorder_prim->can_be_optimized(), false);
auto concate = network.get_primitive("concat");
ASSERT_EQ(concate->can_be_optimized(), false);
}
Original file line number Diff line number Diff line change
Expand Up @@ -1224,7 +1224,7 @@ TEST(prepare_buffer_fusing, test_implicit_crop_and_outerpadding) {
auto reorder_prim = network.get_primitive("gather1_reorder");
ASSERT_EQ(reorder_prim->can_be_optimized(), true);
reorder_prim = network.get_primitive("gather2_reorder");
ASSERT_EQ(reorder_prim->can_be_optimized(), true);
ASSERT_EQ(reorder_prim->can_be_optimized(), false);
auto reshape_prim = network.get_primitive("reshape1");
ASSERT_EQ(reshape_prim->can_be_optimized(), true);
}
Expand Down
93 changes: 93 additions & 0 deletions src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2467,6 +2467,99 @@ TEST(reorder_gpu_f32, bfzyx_to_bsv16_fsv16_padded)
}
}

TEST(reorder_gpu_f32, bfzyx_to_bfyx_padded) {
tests::random_generator rg(GET_SUITE_NAME);
auto& engine = get_test_engine();

const int32_t b_in = 1024;
const int32_t f_in = 64;
const int32_t x_in = 72;
const int32_t y_in = 2;
const int32_t z_in = 3;

const int32_t b_crop = 1024;
const int32_t f_crop = 64;
const int32_t x_crop = 72;
const int32_t y_crop = 2;
const int32_t z_crop = 1;

const int32_t z0_off = 0;
const int32_t z1_off = 1;
const int32_t z2_off = 2;

auto input = engine.allocate_memory({ data_types::f32,format::bfzyx,{ b_in, f_in, x_in, y_in, z_in } });

topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(crop("crop0", input_info("input"), { b_crop, f_crop, x_crop, y_crop, z_crop }, { 0, 0, 0, 0, z0_off }));
topology.add(crop("crop1", input_info("input"), { b_crop, f_crop, x_crop, y_crop, z_crop }, { 0, 0, 0, 0, z1_off }));
topology.add(crop("crop2", input_info("input"), { b_crop, f_crop, x_crop, y_crop, z_crop }, { 0, 0, 0, 0, z2_off }));
topology.add(reorder("reorder0", input_info("crop0"), format::bfyx, data_types::f32));
topology.add(reorder("reorder1", input_info("crop1"), format::bfyx, data_types::f32));
topology.add(reorder("reorder2", input_info("crop2"), format::bfyx, data_types::f32));
topology.add(reshape("reshape0", input_info("reorder0"), tensor(batch(b_in), feature(y_in), spatial(x_in, f_in))));
topology.add(reshape("reshape1", input_info("reorder1"), tensor(batch(b_in), feature(y_in), spatial(x_in, f_in))));
topology.add(reshape("reshape2", input_info("reorder2"), tensor(batch(b_in), feature(y_in), spatial(x_in, f_in))));

std::vector<float> input_vec = rg.generate_random_1d<float>(input->count(), -10, 10);
set_values(input, input_vec);

ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));
network network(engine, topology, config);

network.set_input_data("input", input);
auto outputs = network.execute();
auto output0 = outputs.at("reshape0").get_memory();
auto output1 = outputs.at("reshape1").get_memory();
auto output2 = outputs.at("reshape2").get_memory();

cldnn::mem_lock<float> output_ptr0(output0, get_test_stream());
for (int b = 0; b < b_crop; ++b) {
for (int f = 0; f < f_crop; ++f) {
for (int z = 0; z < z_crop; ++z) {
for (int y = 0; y < y_crop; ++y) {
for (int x = 0; x < x_crop; ++x) {
int linear_id = x + x_in * (y + y_in * (z + z0_off + z_in * (f + f_in * b)));
int output_linear_id = x + x_crop * (y + y_crop * (z + z_crop * (f + f_crop * b)));
ASSERT_EQ(output_ptr0[output_linear_id], input_vec[linear_id]);
}
}
}
}
}

cldnn::mem_lock<float> output_ptr1(output1, get_test_stream());
for (int b = 0; b < b_crop; ++b) {
for (int f = 0; f < f_crop; ++f) {
for (int z = 0; z < z_crop; ++z) {
for (int y = 0; y < y_crop; ++y) {
for (int x = 0; x < x_crop; ++x) {
int linear_id = x + x_in * (y + y_in * (z + z1_off + z_in * (f + f_in * b)));
int output_linear_id = x + x_crop * (y + y_crop * (z + z_crop * (f + f_crop * b)));
ASSERT_EQ(output_ptr1[output_linear_id], input_vec[linear_id]);
}
}
}
}
}

cldnn::mem_lock<float> output_ptr2(output2, get_test_stream());
for (int b = 0; b < b_crop; ++b) {
for (int f = 0; f < f_crop; ++f) {
for (int z = 0; z < z_crop; ++z) {
for (int y = 0; y < y_crop; ++y) {
for (int x = 0; x < x_crop; ++x) {
int linear_id = x + x_in * (y + y_in * (z + z2_off + z_in * (f + f_in * b)));
int output_linear_id = x + x_crop * (y + y_crop * (z + z_crop * (f + f_crop * b)));
ASSERT_EQ(output_ptr2[output_linear_id], input_vec[linear_id]);
}
}
}
}
}
}

TEST(reorder_gpu_f32, b_fs_yx_fsv16_to_bfyx_opt_allowed)
{
auto& engine = get_test_engine();
Expand Down

0 comments on commit 6e02445

Please sign in to comment.