Skip to content

Commit

Permalink
Add decomposing Reduce for Bugfix of byx reduction (#14449)
Browse files Browse the repository at this point in the history
+ Add transformation to fix accuracy issue of oneDNN reduction : DecomposeReduceForFalseKeepdims
+ Add Reshape to modify output of Reduce and update keep_dims to true : reduce-reshape
+ Add exception logic for unsupported reduce mode by byx conversion

Signed-off-by: Min, Byungil <[email protected]>
  • Loading branch information
byungilm authored Jan 5, 2023
1 parent 9427623 commit 0d261db
Show file tree
Hide file tree
Showing 5 changed files with 374 additions and 5 deletions.
16 changes: 11 additions & 5 deletions src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,14 +99,20 @@ static bool is_reduce_blocked_axes(reduce_node const& node) {
auto num_spatial = format::spatial_num(node.get_output_layout().format);
auto dims = node.get_output_layout().format.dimension();

// Check if it reduces all spatial axes
bool feature_axis_is_only_remaining = true;
for (size_t idx_spatial = (dims - num_spatial); idx_spatial < dims; idx_spatial++) {
if (count(reduce_axes.begin(), reduce_axes.end(), idx_spatial) == 0) {
feature_axis_is_only_remaining = false;
break;
}
}

if (input_layout.is_static() &&
(count(reduce_axes.begin(), reduce_axes.end(), 1) > 0 ||
(count(reduce_axes.begin(), reduce_axes.end(), 0) > 0 && input_layout.batch() > 1))) {
for (size_t idx_spatial = dims - num_spatial ; idx_spatial < dims ; idx_spatial++) {
if (count(reduce_axes.begin(), reduce_axes.end(), idx_spatial) == 0)
return true;
}
(count(reduce_axes.begin(), reduce_axes.end(), 0) > 0))) {
if (!feature_axis_is_only_remaining)
return true;
}

return false;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
// Copyright (C) 2018-2022 Intel Corporationc
// SPDX-License-Identifier: Apache-2.0
//

#include "decompose_reduce_for_false_keepdims.hpp"

#include <algorithm>
#include <cassert>
#include <memory>
#include <ngraph/opsets/opset10.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
#include <ngraph/rt_info.hpp>
#include <vector>

namespace ov {
namespace intel_gpu {

DecomposeReduceForFalseKeepDims::DecomposeReduceForFalseKeepDims() {
// Get one MatcherPass for all modes
auto reduce_pattern = ngraph::pattern::wrap_type<ngraph::opset10::ReduceSum,
ngraph::opset10::ReduceMean,
ngraph::opset10::ReduceProd,
ngraph::opset10::ReduceMin,
ngraph::opset10::ReduceMax>(
{ngraph::pattern::any_input(ngraph::pattern::has_static_shape()),
ngraph::pattern::wrap_type<ngraph::opset10::Constant>()},
ngraph::pattern::has_static_shape());

// register callback
ov::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
const auto& pattern_map = m.get_pattern_value_map();
auto reduce =
as_type_ptr<op::util::ArithmeticReductionKeepDims>(pattern_map.at(reduce_pattern).get_node_shared_ptr());
if (!reduce)
return false;

auto input = reduce->input_value(0);
const auto input_shape = input.get_shape();
const auto reduce_shape = reduce->output(0).get_shape();
const auto input_rank = input.get_partial_shape().rank().get_length();

auto axes_vector = reduce->get_reduction_axes().to_vector();
std::sort(axes_vector.begin(), axes_vector.end());

if (!reduce->get_keep_dims() &&
need_transformation_for_reordered_axes(axes_vector, input_rank, (input_rank - 2)) &&
input_shape.size() < 6) {
ngraph::NodeVector new_ops;

// Reduce
auto reduce_const =
ngraph::opset10::Constant::create(ngraph::element::i64, ngraph::Shape{axes_vector.size()}, axes_vector);

// Add each reduce mode supported by oneDNN
if (ngraph::is_type<ngraph::opset10::ReduceSum>(reduce))
input = std::make_shared<ngraph::opset10::ReduceSum>(input, reduce_const, true);
else if (ngraph::is_type<ngraph::opset10::ReduceMean>(reduce))
input = std::make_shared<ngraph::opset10::ReduceMean>(input, reduce_const, true);
else if (ngraph::is_type<ngraph::opset10::ReduceMin>(reduce))
input = std::make_shared<ngraph::opset10::ReduceMin>(input, reduce_const, true);
else if (ngraph::is_type<ngraph::opset10::ReduceMax>(reduce))
input = std::make_shared<ngraph::opset10::ReduceMax>(input, reduce_const, true);
else if (ngraph::is_type<ngraph::opset10::ReduceProd>(reduce))
input = std::make_shared<ngraph::opset10::ReduceProd>(input, reduce_const, true);
else
return false;

input.get_node_shared_ptr()->set_friendly_name(reduce->get_friendly_name());
new_ops.push_back(input.get_node_shared_ptr());

// Reshape
auto reshape_shape = ngraph::Shape((input_rank - axes_vector.size()), 1);
// Expected that a feature axis is only un-reduced unless a new case for this decomposition is added.
assert(reshape_shape.size() == 1);
reshape_shape[0] = reduce_shape[0];
input = std::make_shared<ngraph::opset10::Reshape>(
input,
ngraph::opset10::Constant::create(ngraph::element::i64,
ngraph::Shape{reshape_shape.size()},
reshape_shape),
false);

input.get_node_shared_ptr()->set_friendly_name(reduce->get_friendly_name() + "_reshape_false_keepdims");
new_ops.push_back(input.get_node_shared_ptr());

ngraph::copy_runtime_info(reduce, new_ops);
reduce->output(0).replace(input);
return true;
}

return false;
};

auto m = std::make_shared<ngraph::pattern::Matcher>(reduce_pattern, "DecomposeReduceForFalseKeepDims");
register_matcher(m, callback);
}

bool DecomposeReduceForFalseKeepDims::need_transformation_for_reordered_axes(std::vector<int64_t> reduce_axes,
size_t num_dim,
size_t num_spatial) {
bool feature_axis_is_only_remaining = false;
// Case to reduce batch axis and spatial axes
if (reduce_axes.size() > 1 && count(reduce_axes.begin(), reduce_axes.end(), 0) != 0 &&
count(reduce_axes.begin(), reduce_axes.end(), 1) == 0) {
feature_axis_is_only_remaining = true;
// Check if it reduces all spatial axes
for (size_t idx_spatial = (num_dim - num_spatial); idx_spatial < num_dim; idx_spatial++) {
if (count(reduce_axes.begin(), reduce_axes.end(), idx_spatial) == 0) {
feature_axis_is_only_remaining = false;
break;
}
}
}

return feature_axis_is_only_remaining;
}

} // namespace intel_gpu
} // namespace ov
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <ngraph/pass/graph_rewrite.hpp>
#include <transformations_visibility.hpp>

namespace ov {
namespace intel_gpu {

/**
* @brief Add Reshape to modify output of Reduce and modify keep_dims to true : reduce-reshape
* A clDNN Reduce reorders un-reduced axes of its output tensor to b-f and spatial order when keep_dims is false.
* oneDNN reduction does not allow this. And clDNN execution shows a huge perf drop for blocked formats.
*/
class DecomposeReduceForFalseKeepDims : public ngraph::pass::MatcherPass {
public:
// Decompose reduce if keep_dims is false and it reduces batch and spatial axes
DecomposeReduceForFalseKeepDims();

// Returns true if reduction axes includes one of blocked axis and all spatial axes
bool need_transformation_for_reordered_axes(std::vector<int64_t> reduce_axes, size_t num_dim, size_t num_spatial);
};

} // namespace intel_gpu
} // namespace ov
2 changes: 2 additions & 0 deletions src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

#include "transformations/einsum_decomposition.hpp"
#include "transformations/convert_pooling_to_reduce.hpp"
#include "transformations/decompose_reduce_for_false_keepdims.hpp"

#include <transformations/opset_conversions/convert_opset3_to_opset2.hpp>
#include <transformations/opset_conversions/convert_opset2_to_opset1.hpp>
Expand Down Expand Up @@ -266,6 +267,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
pass_config->disable<ngraph::pass::ConvertReduceMeanToPooling>();
pass_config->disable<ngraph::pass::ConvertReduceMaxToPooling>();
manager.register_pass<ConvertAvgPoolingToReduce>();
manager.register_pass<DecomposeReduceForFalseKeepDims>();
} else {
pass_config->set_callback<ngraph::pass::ConvertReduceSumToPooling>(
[](const_node_ptr &node) -> bool {
Expand Down
Loading

0 comments on commit 0d261db

Please sign in to comment.