From 0d261dbf8311067f4dbc948588a38ebee8918cbf Mon Sep 17 00:00:00 2001 From: "Min, Byungil" Date: Thu, 5 Jan 2023 15:44:12 +0900 Subject: [PATCH] Add decomposing Reduce for Bugfix of byx reduction (#14449) + Add transformation to fix accuracy issue of oneDNN reduction : DecomposeReduceForFalseKeepdims + Add Reshape to modify output of Reduce and update keep_dims to true : reduce-reshape + Add exception logic for unsupported reduce mode by byx conversion Signed-off-by: Min, Byungil --- .../intel_gpu/src/graph/layout_optimizer.cpp | 16 +- .../decompose_reduce_for_false_keepdims.cpp | 119 ++++++++++ .../decompose_reduce_for_false_keepdims.hpp | 28 +++ .../src/plugin/transformations_pipeline.cpp | 2 + ...compose_reduce_for_false_keepdims_test.cpp | 214 ++++++++++++++++++ 5 files changed, 374 insertions(+), 5 deletions(-) create mode 100644 src/plugins/intel_gpu/src/plugin/transformations/decompose_reduce_for_false_keepdims.cpp create mode 100644 src/plugins/intel_gpu/src/plugin/transformations/decompose_reduce_for_false_keepdims.hpp create mode 100644 src/plugins/intel_gpu/tests/transformations/decompose_reduce_for_false_keepdims_test.cpp diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index cadd0f9173956c..06c90ed217d375 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -99,14 +99,20 @@ static bool is_reduce_blocked_axes(reduce_node const& node) { auto num_spatial = format::spatial_num(node.get_output_layout().format); auto dims = node.get_output_layout().format.dimension(); + // Check if it reduces all spatial axes + bool feature_axis_is_only_remaining = true; + for (size_t idx_spatial = (dims - num_spatial); idx_spatial < dims; idx_spatial++) { + if (count(reduce_axes.begin(), reduce_axes.end(), idx_spatial) == 0) { + feature_axis_is_only_remaining = false; + break; + } + } if (input_layout.is_static() && (count(reduce_axes.begin(), reduce_axes.end(), 1) > 0 || - (count(reduce_axes.begin(), reduce_axes.end(), 0) > 0 && input_layout.batch() > 1))) { - for (size_t idx_spatial = dims - num_spatial ; idx_spatial < dims ; idx_spatial++) { - if (count(reduce_axes.begin(), reduce_axes.end(), idx_spatial) == 0) - return true; - } + (count(reduce_axes.begin(), reduce_axes.end(), 0) > 0))) { + if (!feature_axis_is_only_remaining) + return true; } return false; diff --git a/src/plugins/intel_gpu/src/plugin/transformations/decompose_reduce_for_false_keepdims.cpp b/src/plugins/intel_gpu/src/plugin/transformations/decompose_reduce_for_false_keepdims.cpp new file mode 100644 index 00000000000000..087f083b7c8f1d --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/transformations/decompose_reduce_for_false_keepdims.cpp @@ -0,0 +1,119 @@ +// Copyright (C) 2018-2022 Intel Corporationc +// SPDX-License-Identifier: Apache-2.0 +// + +#include "decompose_reduce_for_false_keepdims.hpp" + +#include +#include +#include +#include +#include +#include +#include + +namespace ov { +namespace intel_gpu { + +DecomposeReduceForFalseKeepDims::DecomposeReduceForFalseKeepDims() { + // Get one MatcherPass for all modes + auto reduce_pattern = ngraph::pattern::wrap_type( + {ngraph::pattern::any_input(ngraph::pattern::has_static_shape()), + ngraph::pattern::wrap_type()}, + ngraph::pattern::has_static_shape()); + + // register callback + ov::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + auto reduce = + as_type_ptr(pattern_map.at(reduce_pattern).get_node_shared_ptr()); + if (!reduce) + return false; + + auto input = reduce->input_value(0); + const auto input_shape = input.get_shape(); + const auto reduce_shape = reduce->output(0).get_shape(); + const auto input_rank = input.get_partial_shape().rank().get_length(); + + auto axes_vector = reduce->get_reduction_axes().to_vector(); + std::sort(axes_vector.begin(), axes_vector.end()); + + if (!reduce->get_keep_dims() && + need_transformation_for_reordered_axes(axes_vector, input_rank, (input_rank - 2)) && + input_shape.size() < 6) { + ngraph::NodeVector new_ops; + + // Reduce + auto reduce_const = + ngraph::opset10::Constant::create(ngraph::element::i64, ngraph::Shape{axes_vector.size()}, axes_vector); + + // Add each reduce mode supported by oneDNN + if (ngraph::is_type(reduce)) + input = std::make_shared(input, reduce_const, true); + else if (ngraph::is_type(reduce)) + input = std::make_shared(input, reduce_const, true); + else if (ngraph::is_type(reduce)) + input = std::make_shared(input, reduce_const, true); + else if (ngraph::is_type(reduce)) + input = std::make_shared(input, reduce_const, true); + else if (ngraph::is_type(reduce)) + input = std::make_shared(input, reduce_const, true); + else + return false; + + input.get_node_shared_ptr()->set_friendly_name(reduce->get_friendly_name()); + new_ops.push_back(input.get_node_shared_ptr()); + + // Reshape + auto reshape_shape = ngraph::Shape((input_rank - axes_vector.size()), 1); + // Expected that a feature axis is only un-reduced unless a new case for this decomposition is added. + assert(reshape_shape.size() == 1); + reshape_shape[0] = reduce_shape[0]; + input = std::make_shared( + input, + ngraph::opset10::Constant::create(ngraph::element::i64, + ngraph::Shape{reshape_shape.size()}, + reshape_shape), + false); + + input.get_node_shared_ptr()->set_friendly_name(reduce->get_friendly_name() + "_reshape_false_keepdims"); + new_ops.push_back(input.get_node_shared_ptr()); + + ngraph::copy_runtime_info(reduce, new_ops); + reduce->output(0).replace(input); + return true; + } + + return false; + }; + + auto m = std::make_shared(reduce_pattern, "DecomposeReduceForFalseKeepDims"); + register_matcher(m, callback); +} + +bool DecomposeReduceForFalseKeepDims::need_transformation_for_reordered_axes(std::vector reduce_axes, + size_t num_dim, + size_t num_spatial) { + bool feature_axis_is_only_remaining = false; + // Case to reduce batch axis and spatial axes + if (reduce_axes.size() > 1 && count(reduce_axes.begin(), reduce_axes.end(), 0) != 0 && + count(reduce_axes.begin(), reduce_axes.end(), 1) == 0) { + feature_axis_is_only_remaining = true; + // Check if it reduces all spatial axes + for (size_t idx_spatial = (num_dim - num_spatial); idx_spatial < num_dim; idx_spatial++) { + if (count(reduce_axes.begin(), reduce_axes.end(), idx_spatial) == 0) { + feature_axis_is_only_remaining = false; + break; + } + } + } + + return feature_axis_is_only_remaining; +} + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations/decompose_reduce_for_false_keepdims.hpp b/src/plugins/intel_gpu/src/plugin/transformations/decompose_reduce_for_false_keepdims.hpp new file mode 100644 index 00000000000000..3f2f294d2564f7 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/transformations/decompose_reduce_for_false_keepdims.hpp @@ -0,0 +1,28 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +namespace ov { +namespace intel_gpu { + +/** + * @brief Add Reshape to modify output of Reduce and modify keep_dims to true : reduce-reshape + * A clDNN Reduce reorders un-reduced axes of its output tensor to b-f and spatial order when keep_dims is false. + * oneDNN reduction does not allow this. And clDNN execution shows a huge perf drop for blocked formats. + */ +class DecomposeReduceForFalseKeepDims : public ngraph::pass::MatcherPass { +public: + // Decompose reduce if keep_dims is false and it reduces batch and spatial axes + DecomposeReduceForFalseKeepDims(); + + // Returns true if reduction axes includes one of blocked axis and all spatial axes + bool need_transformation_for_reordered_axes(std::vector reduce_axes, size_t num_dim, size_t num_spatial); +}; + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 1062aaa168a0da..ac31397dbc94a7 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -28,6 +28,7 @@ #include "transformations/einsum_decomposition.hpp" #include "transformations/convert_pooling_to_reduce.hpp" +#include "transformations/decompose_reduce_for_false_keepdims.hpp" #include #include @@ -266,6 +267,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { pass_config->disable(); pass_config->disable(); manager.register_pass(); + manager.register_pass(); } else { pass_config->set_callback( [](const_node_ptr &node) -> bool { diff --git a/src/plugins/intel_gpu/tests/transformations/decompose_reduce_for_false_keepdims_test.cpp b/src/plugins/intel_gpu/tests/transformations/decompose_reduce_for_false_keepdims_test.cpp new file mode 100644 index 00000000000000..71c69fb315d287 --- /dev/null +++ b/src/plugins/intel_gpu/tests/transformations/decompose_reduce_for_false_keepdims_test.cpp @@ -0,0 +1,214 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "intel_gpu/primitives/reduce.hpp" +#include "ngraph/type/element_type.hpp" +#include "openvino/core/descriptor/tensor.hpp" +#include "test_utils.h" + +using namespace testing; +using namespace ::tests; + +using InputShape = ngraph::PartialShape; +using KeepDims = bool; +using ReduceAxes = std::vector; +using ReduceType = cldnn::reduce_mode; +using ReshapeShape = std::vector; +using NeedDecompose = bool; + +class ReduceDecomposeTests + : public ::testing::Test, + public testing::WithParamInterface< + std::tuple> { +public: + std::shared_ptr fc; + bool need_decompose; + ReshapeShape result_shape; + + void SetUp() override { + const auto& reduce_type = std::get<0>(GetParam()); + const auto& input_shape = std::get<1>(GetParam()); + const auto& axes = std::get<2>(GetParam()); + const auto& keep_dims = std::get<3>(GetParam()); + need_decompose = std::get<4>(GetParam()); + result_shape = std::get<5>(GetParam()); + + fc = get_transformed_function(input_shape, axes, reduce_type, keep_dims); + } + + static std::shared_ptr get_transformed_function(const ngraph::PartialShape& input_shape, + const std::vector& axes, + const ReduceType& reduce_type, + const bool keep_dim) { + auto param = std::make_shared(ngraph::element::f32, input_shape); + if (reduce_type == reduce_mode::logical_or || reduce_type == reduce_mode::logical_and) + param = std::make_shared(ngraph::element::boolean, input_shape); + + ngraph::Output input = param->output(0); + + auto axes_const = ngraph::opset10::Constant::create(ngraph::element::i64, ngraph::Shape{axes.size()}, axes); + + if (reduce_type == reduce_mode::sum) + input = std::make_shared(input, axes_const, keep_dim); + else if (reduce_type == reduce_mode::mean) + input = std::make_shared(input, axes_const, keep_dim); + else if (reduce_type == reduce_mode::min) + input = std::make_shared(input, axes_const, keep_dim); + else if (reduce_type == reduce_mode::max) + input = std::make_shared(input, axes_const, keep_dim); + else if (reduce_type == reduce_mode::prod) + input = std::make_shared(input, axes_const, keep_dim); + else if (reduce_type == reduce_mode::logical_or) + input = std::make_shared(input, axes_const, keep_dim); + else if (reduce_type == reduce_mode::logical_and) + input = std::make_shared(input, axes_const, keep_dim); + else + throw std::runtime_error("Invalid reduce type for this test-case."); + + return std::make_shared(ngraph::NodeVector{input.get_node_shared_ptr()}, + ngraph::ParameterVector{param}); + } +}; + +TEST_P(ReduceDecomposeTests, CompareFunctions) { + ngraph::pass::Manager m; + m.set_per_pass_validation(false); + m.register_pass(); + m.register_pass(); + m.run_passes(fc); + + bool success = false; + ov::Shape output_shape; + for (auto& ops : fc->get_ops()) { + std::string type_name(ops->get_type_name()); + + if (type_name.find("Reshape") != std::string::npos) { + success = true; + } + else if (type_name.find("Result") != std::string::npos) { + output_shape = ops->get_shape(); + } + } + ASSERT_TRUE(success == need_decompose); + ASSERT_TRUE(output_shape == result_shape); +} + +INSTANTIATE_TEST_SUITE_P(ReduceDecomposeForFalseKeepdims, + ReduceDecomposeTests, + testing::Values(std::make_tuple(reduce_mode::prod, + InputShape{32, 32, 32, 32}, + ReduceAxes{0, 2, 3}, + KeepDims{false}, + true, + ReshapeShape{32}), + std::make_tuple(reduce_mode::sum, + InputShape{16, 3, 32, 32}, + ReduceAxes{0, 2, 3}, + KeepDims{false}, + true, + ReshapeShape{3}), + std::make_tuple(reduce_mode::mean, + InputShape{16, 3, 32, 32}, + ReduceAxes{0, 2, 3}, + KeepDims{false}, + true, + ReshapeShape{3}), + std::make_tuple(reduce_mode::min, + InputShape{16, 3, 32, 32}, + ReduceAxes{0, 2, 3}, + KeepDims{false}, + true, + ReshapeShape{3}), + std::make_tuple(reduce_mode::max, + InputShape{16, 3, 32, 32}, + ReduceAxes{0, 2, 3}, + KeepDims{false}, + true, + ReshapeShape{3}), + std::make_tuple(reduce_mode::max, + InputShape{8, 3, 64, 64}, + ReduceAxes{0, 2, 3}, + KeepDims{false}, + true, + ReshapeShape{3}))); + +INSTANTIATE_TEST_SUITE_P(ReduceDecomposeForFalseKeepdimsNotCase, + ReduceDecomposeTests, + testing::Values(std::make_tuple(reduce_mode::max, + InputShape{32, 32, 32, 32}, + ReduceAxes{0, 2}, + KeepDims{false}, + false, + ReshapeShape{32, 32}), + std::make_tuple(reduce_mode::max, + InputShape{1, 3, 64, 64}, + ReduceAxes{0, 3}, + KeepDims{false}, + false, + ReshapeShape{3, 64}), + std::make_tuple(reduce_mode::max, + InputShape{32, 32, 32, 32}, + ReduceAxes{0}, + KeepDims{false}, + false, + ReshapeShape{32, 32, 32}), + std::make_tuple(reduce_mode::logical_and, + InputShape{16, 3, 32, 32}, + ReduceAxes{0, 2, 3}, + KeepDims{false}, + false, + ReshapeShape{3}), + std::make_tuple(reduce_mode::logical_or, + InputShape{16, 3, 32, 32}, + ReduceAxes{0, 2, 3}, + KeepDims{false}, + false, + ReshapeShape{3}), + std::make_tuple(reduce_mode::max, + InputShape{1, 3, 64, 64}, + ReduceAxes{0}, + KeepDims{false}, + false, + ReshapeShape{3, 64, 64}))); + +INSTANTIATE_TEST_SUITE_P(ReduceDecomposeForTrueKeepdims, + ReduceDecomposeTests, + testing::Values(std::make_tuple(reduce_mode::max, + InputShape{32, 32, 32, 32}, + ReduceAxes{0, 2, 3}, + KeepDims{true}, + false, + ReshapeShape{1, 32, 1, 1}), + std::make_tuple(reduce_mode::max, + InputShape{1, 3, 64, 64}, + ReduceAxes{0, 2, 3}, + KeepDims{true}, + false, + ReshapeShape{1, 3, 1, 1}), + std::make_tuple(reduce_mode::max, + InputShape{32, 32, 32, 32}, + ReduceAxes{0, 2}, + KeepDims{true}, + false, + ReshapeShape{1, 32, 1, 32}))); + +TEST(DecomposeReduceForFalseKeepDims, Negative) { + auto f = + ReduceDecomposeTests::get_transformed_function(ngraph::PartialShape::dynamic(), {3}, reduce_mode::max, true); + ngraph::pass::Manager manager; + manager.register_pass(); + ASSERT_NO_THROW(manager.run_passes(f)); +}