Skip to content

Commit

Permalink
[GPU] Enable b_fs_yx_fsv16 format for shape-agnostic quantize and reo…
Browse files Browse the repository at this point in the history
…rder kernels (openvinotoolkit#26025)

### Details:
- Currently, b_fs_yx_fsv16 blocked format is enabled for convolution
operations, but the lack of it for reorder and quantize operations
causes runtime static version kernel recompilation. This change enables
support for the b_fs_yx_fsv16 format to allow shape-agnostic kernel
selection.

### Tickets:
 - [CVS-145296](https://jira.devtools.intel.com/browse/CVS-145296)
  • Loading branch information
sshlyapn authored Aug 13, 2024
1 parent d6bb880 commit 42ac61b
Show file tree
Hide file tree
Showing 5 changed files with 181 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,9 @@ void compile_graph::run(program& p) {

if (node->is_dynamic() && !is_planar) {
if (!(node->is_type<convolution>() && node->get_output_layout().format == cldnn::format::b_fs_yx_fsv16) &&
!(node->is_type<group_normalization>() && node->get_output_layout().format == cldnn::format::b_fs_yx_fsv16)) {
!(node->is_type<group_normalization>() && node->get_output_layout().format == cldnn::format::b_fs_yx_fsv16) &&
!(node->is_type<reorder>() && node->get_output_layout().format == cldnn::format::b_fs_yx_fsv16) &&
!(node->is_type<quantize>() && node->get_output_layout().format == cldnn::format::b_fs_yx_fsv16)) {
can_select_impl = false;
}
}
Expand Down
1 change: 1 addition & 0 deletions src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ attach_quantize_impl::attach_quantize_impl() {
format::bfwzyx,
format::bfuwzyx,
format::bfvuwzyx,
format::b_fs_yx_fsv16,
};

auto keys = implementation_map<quantize>::combine(types, formats);
Expand Down
1 change: 1 addition & 0 deletions src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ attach_reorder_impl::attach_reorder_impl() {
format::bfyx,
format::bfzyx,
format::bfwzyx,
format::b_fs_yx_fsv16
};
implementation_map<reorder>::add(impl_types::ocl, shape_types::dynamic_shape, reorder_impl::create, types, formats);

Expand Down
103 changes: 103 additions & 0 deletions src/plugins/intel_gpu/tests/unit/test_cases/quantize_gpu_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -749,6 +749,109 @@ TEST(quantize_gpu, dynamic) {
}
}

TEST(quantize_gpu, dynamic_fsv16) {
auto& engine = get_test_engine();

auto input = engine.allocate_memory({ { 1, 16, 2, 2 }, data_types::f32, format::bfyx });
auto input_low = engine.allocate_memory({ { 1, 16, 1, 1 }, data_types::f32, format::bfyx });
auto input_high = engine.allocate_memory({ { 1, 16, 1, 1 }, data_types::f32, format::bfyx });
auto output_low = engine.allocate_memory({ { 1, 1, 1, 1 }, data_types::f32, format::bfyx });
auto output_high = engine.allocate_memory({ { 1, 1, 1, 1 }, data_types::f32, format::bfyx });

layout in_dyn_layout { ov::PartialShape::dynamic(4), data_types::f32, format::bfyx };

set_values(input, { -1.0f, 2.1f, 3.0f, 4.0f,
5.0f, 2.0f, 2.0f, 3.0f,
4.0f, 6.0f, 3.0f, 3.0f,
3.0f, 5.0f, 1.0f, 1.0f,

1.0f, 1.0f, 1.0f, 1.0f,
4.0f, 6.0f, 3.0f, 3.0f,
3.0f, 5.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f,

1.0f, 2.0f, 3.0f, 4.0f,
5.0f, 2.0f, 2.0f, 3.0f,
4.0f, 6.0f, 3.0f, 3.0f,
3.0f, 5.0f, 1.0f, 1.0f,

1.0f, 1.0f, 1.0f, 1.0f,
4.0f, 6.0f, 3.0f, 3.0f,
3.0f, 5.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f });

set_values(input_low, { 0.0f, 1.0f, 2.0f, 3.0f,
4.0f, 5.0f, 6.0f, 7.0f,
7.0f, 6.0f, 5.0f, 4.0f,
3.0f, 2.0f, 1.0f, 0.0f });
set_values(input_high, { 10.0f, 21.0f, 32.0f, 43.0f,
54.0f, 65.0f, 76.0f, 87.0f,
87.0f, 76.0f, 65.0f, 54.0f,
43.0f, 32.0f, 21.0f, 10.0f });

set_values(output_low, { 0.0f });
set_values(output_high, { 255.0f });

std::vector<uint8_t> ref_data = {
0, 54, 77, 102,
51, 13, 13, 26,
17, 34, 8, 8,
0, 13, 0, 0,

0, 0, 0, 0,
0, 4, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,

0, 0, 0, 0,
0, 0, 0, 0,
0, 4, 0, 0,
0, 5, 0, 0,

0, 0, 0, 0,
17, 34, 8, 8,
26, 51, 0, 0,
26, 26, 26, 26
};

topology topology;
topology.add(
input_layout("input", in_dyn_layout),
data("input_low", input_low),
data("input_high", input_high),
data("output_low", output_low),
data("output_high", output_high),
reorder("reorder", input_info("input"), format::b_fs_yx_fsv16, data_types::f32),
quantize("quantize", input_info("reorder"), input_info("input_low"), input_info("input_high"), input_info("output_low"), input_info("output_high"), 255, data_types::u8),
reorder("output_reorder", input_info("quantize"), format::bfyx, data_types::u8)
);

ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
network network(engine, topology, config);
network.set_input_data("input", input);

auto inst = network.get_primitive("quantize");
auto impl = inst->get_impl();
ASSERT_TRUE(impl != nullptr);
ASSERT_TRUE(impl->is_dynamic());

auto outputs = network.execute();

auto output = outputs.at("output_reorder").get_memory();
cldnn::mem_lock<uint8_t> output_ptr(output, get_test_stream());

// Check that layout and memory contains logical size of tensor
ASSERT_EQ(output->count(), (size_t)64);
ASSERT_EQ(output->get_layout().count(), (size_t)64);

ASSERT_EQ(output->size(), ref_data.size() * sizeof(uint8_t));

for (size_t i = 0; i < ref_data.size(); ++i) {
ASSERT_NEAR(output_ptr[i], ref_data[i], 1) << " index = " << i;
}
}

struct quantize_random_test_params {
data_types input_type;
data_types output_type;
Expand Down
73 changes: 73 additions & 0 deletions src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1525,6 +1525,79 @@ TEST(reorder_gpu_f32, dynamic_bfyx_to_bfzyx) {
}
}

TEST(reorder_gpu_f32, dynamic_bfyx_to_fsv16) {
auto& engine = get_test_engine();

ov::Shape in_shape{ 1, 2, 4, 2 };
layout in_layout{ov::PartialShape::dynamic(in_shape.size()), data_types::f16, format::bfyx};
auto input = engine.allocate_memory({ov::PartialShape(in_shape), data_types::f16, format::bfyx});

set_values<ov::float16>(input, {
ov::float16(1.f), ov::float16(0.f),
ov::float16(5.f), ov::float16(1.5f),

ov::float16(2.f), ov::float16(0.f),
ov::float16(6.f), ov::float16(5.2f),

ov::float16(3.f), ov::float16(0.5f),
ov::float16(7.f), ov::float16(12.f),

ov::float16(4.f), ov::float16(-0.5f),
ov::float16(8.f), ov::float16(8.f)
});

topology topology(
input_layout("input", in_layout),
reorder("reorder", input_info("input"), format::b_fs_yx_fsv16, data_types::f16),
activation("relu", input_info("reorder"), activation_func::relu),
reorder("output_reorder", input_info("relu"), format::bfyx, data_types::f32));

ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
network network(engine, topology, config);

auto fsv16_reorder_inst = network.get_primitive("reorder");
auto fsv16_reorder_impl = fsv16_reorder_inst->get_impl();
ASSERT_TRUE(fsv16_reorder_impl != nullptr);
ASSERT_TRUE(fsv16_reorder_impl->is_dynamic());

auto output_reorder_inst = network.get_primitive("output_reorder");
auto output_reorder_impl = output_reorder_inst->get_impl();
ASSERT_TRUE(output_reorder_impl != nullptr);
ASSERT_TRUE(output_reorder_impl->is_dynamic());

network.set_input_data("input", input);

auto outputs = network.execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "output_reorder");

auto output = outputs.begin()->second.get_memory();
ASSERT_TRUE(output->get_layout().format == format::bfyx);
auto l = output->get_layout();
auto expected_shape = ov::PartialShape(in_shape);
ASSERT_EQ(l.get_partial_shape(), expected_shape);

float answers[16] = {
1.f, 0.f,
5.f, 1.5f,

2.f, 0.f,
6.f, 5.2f,

3.f, 0.5f,
7.f, 12.f,

4.f, 0.f,
8.f, 8.f
};

cldnn::mem_lock<float> output_ptr(output, get_test_stream());
for (int i = 0; i < 16; i++) {
ASSERT_NEAR(answers[i], output_ptr[i], 1e-2f);
}
}

TEST(reorder_gpu_f32, basic_yxfb_to_bfzyx)
{
// Input : yxfb:2x2x2x2
Expand Down

0 comments on commit 42ac61b

Please sign in to comment.