Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
Signed-off-by: Vladimir Paramuzov <[email protected]>
  • Loading branch information
vladimir-paramuzov committed Nov 26, 2024
1 parent 102bf3d commit a4e6f43
Show file tree
Hide file tree
Showing 25 changed files with 56 additions and 214 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,6 @@ class ProgramBuilder final {

void add_primitive(const ov::Node& op, std::shared_ptr<cldnn::primitive> prim, std::vector<std::string> aliases = {});

bool requires_new_shape_infer(const std::shared_ptr<ov::Node>& op) const;
bool is_inner_program() const { return m_is_inner_program; }
bool is_query_mode() { return queryMode; }

Expand Down
113 changes: 50 additions & 63 deletions src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_padding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,53 @@
using namespace cldnn;
using namespace ov::intel_gpu;

namespace {

template<typename T>
padding convert_paddings(const padding& current_pad, const T& pad_begin, const T& pad_end, size_t spatial_rank) {
tensor::value_type pb_z = std::max<std::ptrdiff_t>(pad_begin.size() >= 3 ? pad_begin[pad_begin.size() - 3] : 0, 0);
tensor::value_type pb_y = std::max<std::ptrdiff_t>(pad_begin.size() >= 2 ? pad_begin[pad_begin.size() - 2] : 0, 0);
tensor::value_type pb_x = std::max<std::ptrdiff_t>(pad_begin.size() >= 1 ? pad_begin[pad_begin.size() - 1] : 0, 0);

tensor::value_type pe_z = std::max<std::ptrdiff_t>(pad_end.size() >= 3 ? pad_end[pad_end.size() - 3] : 0, 0);
tensor::value_type pe_y = std::max<std::ptrdiff_t>(pad_end.size() >= 2 ? pad_end[pad_end.size() - 2] : 0, 0);
tensor::value_type pe_x = std::max<std::ptrdiff_t>(pad_end.size() >= 1 ? pad_end[pad_end.size() - 1] : 0, 0);

const auto& lower_sizes = current_pad._lower_size;
const auto& upper_sizes = current_pad._upper_size;

std::vector<int32_t> needed_lpad, needed_upad;
needed_lpad.push_back(lower_sizes[0]);
needed_lpad.push_back(lower_sizes[1]);

needed_upad.push_back(upper_sizes[0]);
needed_upad.push_back(upper_sizes[1]);
if (spatial_rank == 3) {
needed_lpad.push_back(std::max(pb_z, lower_sizes[2]));
needed_lpad.push_back(std::max(pb_y, lower_sizes[3]));
needed_lpad.push_back(std::max(pb_x, lower_sizes[4]));

needed_upad.push_back(std::max(pe_z, upper_sizes[2]));
needed_upad.push_back(std::max(pe_y, upper_sizes[3]));
needed_upad.push_back(std::max(pe_x, upper_sizes[4]));
} else if (spatial_rank == 2) {
needed_lpad.push_back(std::max(pb_y, lower_sizes[2]));
needed_lpad.push_back(std::max(pb_x, lower_sizes[3]));

needed_upad.push_back(std::max(pe_y, upper_sizes[2]));
needed_upad.push_back(std::max(pe_x, upper_sizes[3]));
} else {
needed_lpad.push_back(std::max(pb_x, lower_sizes[2]));
needed_upad.push_back(std::max(pb_x, upper_sizes[2]));
}

padding needed_padding(needed_lpad, needed_upad);

return needed_padding;
}

} // namespace

void prepare_padding::run(program& p) {
if (output_size_handling_enabled) {
// Prepare upper padding for primitives that support output_size parameter.
Expand Down Expand Up @@ -66,43 +113,7 @@ void prepare_padding::run(program& p) {
auto padding_begin = prim->padding_begin;
auto padding_end = prim->padding_end;

tensor::value_type pb_z = std::max<std::ptrdiff_t>(padding_begin.size() >= 3 ? padding_begin[padding_begin.size() - 3] : 0, 0);
tensor::value_type pb_y = std::max<std::ptrdiff_t>(padding_begin.size() >= 2 ? padding_begin[padding_begin.size() - 2] : 0, 0);
tensor::value_type pb_x = std::max<std::ptrdiff_t>(padding_begin.size() >= 1 ? padding_begin[padding_begin.size() - 1] : 0, 0);

tensor::value_type pe_z = std::max<std::ptrdiff_t>(padding_end.size() >= 3 ? padding_end[padding_end.size() - 3] : 0, 0);
tensor::value_type pe_y = std::max<std::ptrdiff_t>(padding_end.size() >= 2 ? padding_end[padding_end.size() - 2] : 0, 0);
tensor::value_type pe_x = std::max<std::ptrdiff_t>(padding_end.size() >= 1 ? padding_end[padding_end.size() - 1] : 0, 0);

const auto& lower_sizes = in_layout.data_padding._lower_size;
const auto& upper_sizes = in_layout.data_padding._upper_size;

std::vector<int32_t> needed_lpad, needed_upad;
needed_lpad.push_back(lower_sizes[0]);
needed_lpad.push_back(lower_sizes[1]);

needed_upad.push_back(upper_sizes[0]);
needed_upad.push_back(upper_sizes[1]);
if (spatial_rank == 3) {
needed_lpad.push_back(std::max(pb_z, lower_sizes[2]));
needed_lpad.push_back(std::max(pb_y, lower_sizes[3]));
needed_lpad.push_back(std::max(pb_x, lower_sizes[4]));

needed_upad.push_back(std::max(pe_z, upper_sizes[2]));
needed_upad.push_back(std::max(pe_y, upper_sizes[3]));
needed_upad.push_back(std::max(pe_x, upper_sizes[4]));
} else if (spatial_rank == 2) {
needed_lpad.push_back(std::max(pb_y, lower_sizes[2]));
needed_lpad.push_back(std::max(pb_x, lower_sizes[3]));

needed_upad.push_back(std::max(pe_y, upper_sizes[2]));
needed_upad.push_back(std::max(pe_x, upper_sizes[3]));
} else {
needed_lpad.push_back(std::max(pb_x, lower_sizes[2]));
needed_upad.push_back(std::max(pb_x, upper_sizes[2]));
}

padding needed_padding(needed_lpad, needed_upad);
auto needed_padding = convert_paddings(in_layout.data_padding, padding_begin, padding_end, spatial_rank);

add_required_padding(prim_node, needed_padding);
} else if (node->is_type<deconvolution>()) {
Expand Down Expand Up @@ -131,33 +142,9 @@ void prepare_padding::run(program& p) {
auto padding_begin = prim->pads_begin;
auto padding_end = prim->pads_end;

tensor::value_type pb_z = std::max<std::ptrdiff_t>(padding_begin.size() >= 3 ? padding_begin[padding_begin.size() - 3] : 0, 0);
tensor::value_type pb_y = std::max<std::ptrdiff_t>(padding_begin.size() >= 2 ? padding_begin[padding_begin.size() - 2] : 0, 0);
tensor::value_type pb_x = std::max<std::ptrdiff_t>(padding_begin.size() >= 1 ? padding_begin[padding_begin.size() - 1] : 0, 0);

tensor::value_type pe_z = std::max<std::ptrdiff_t>(padding_end.size() >= 3 ? padding_end[padding_end.size() - 3] : 0, 0);
tensor::value_type pe_y = std::max<std::ptrdiff_t>(padding_end.size() >= 2 ? padding_end[padding_end.size() - 2] : 0, 0);
tensor::value_type pe_x = std::max<std::ptrdiff_t>(padding_end.size() >= 1 ? padding_end[padding_end.size() - 1] : 0, 0);

tensor pad_l = tensor(0);
tensor pad_u = tensor(0);
pad_l.spatial[0] = pb_x;
pad_l.spatial[1] = pb_y;
pad_l.spatial[2] = pb_z;

pad_u.spatial[0] = pe_x;
pad_u.spatial[1] = pe_y;
pad_u.spatial[2] = pe_z;

auto in_layout = prim_node.get_input_layout();

const auto& actual_lpad = in_layout.data_padding.lower_size();
const auto& actual_upad = in_layout.data_padding.upper_size();

auto needed_lpad = tensor::max(pad_l, actual_lpad);
auto needed_upad = tensor::max(pad_u, actual_upad);

padding needed_padding(needed_lpad.sizes(), needed_upad.sizes());
const auto spatial_rank = in_layout.get_spatial_rank();
auto needed_padding = convert_paddings(in_layout.data_padding, padding_begin, padding_end, spatial_rank);

add_required_padding(prim_node, needed_padding);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ kernel_selector::dim_tensor<T> convert_dim_vector(const tensor& t) {
static_cast<T>(sizes[5])};
}


inline kernel_selector::DimTensor<uint32_t> convert_vec_to_dim_tensor(const std::vector<int32_t>& p, size_t out_rank, int32_t default_value) {
auto sizes = p;
auto format = cldnn::format::get_default_format(out_rank);
Expand Down
28 changes: 0 additions & 28 deletions src/plugins/intel_gpu/src/plugin/ops/fully_connected.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,34 +65,6 @@ static void CreateFullyConnectedCompressedOp(ProgramBuilder& p, const std::share
}

p.add_primitive(*op, fc);

if (op->get_input_partial_shape(0).size() > 3 && !p.use_new_shape_infer()) {
auto lastLayerName = primitive_name;
auto outReshapeName = primitive_name + "_cldnn_out_reshape";

// add reorder
auto outDims = op->get_output_shape(0);
auto outTensor = tensor_from_dims(outDims);

if (outDims.size() > 4) {
cldnn::format outputFormat = cldnn::format::bfyx;
switch (outDims.size()) {
case 5: outputFormat = cldnn::format::bfzyx; break;
case 6: outputFormat = cldnn::format::bfwzyx; break;
default: break;
}

cldnn::primitive_id reorderId = "reorder:" + outReshapeName + "_reorder";
cldnn::layout outputLayout(cldnn::element_type_to_data_type(op->get_output_element_type(0)), outputFormat, outTensor);
auto reorder_prim = cldnn::reorder(reorderId, cldnn::input_info(primitive_name), outputLayout);
p.add_primitive(*op, reorder_prim);
lastLayerName = reorderId;
}

// add reshape
auto outReshapePrim = cldnn::reshape(outReshapeName, cldnn::input_info(lastLayerName), outTensor);
p.add_primitive(*op, outReshapePrim);
}
}

static void CreateFullyConnectedOp(ProgramBuilder& p, const std::shared_ptr<op::FullyConnected>& op) {
Expand Down
29 changes: 4 additions & 25 deletions src/plugins/intel_gpu/src/plugin/ops/gather tree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,32 +18,11 @@ static void CreateGatherTreeOp(ProgramBuilder& p, const std::shared_ptr<ov::op::
auto inputs = p.GetInputInfo(op);
std::string layerName = layer_type_name_ID(op);

std::vector<cldnn::input_info> reordered_inputs;
reordered_inputs.resize(inputs.size());

for (size_t portIndex = 0; portIndex < inputs.size(); portIndex++) {
auto inputDataType = cldnn::element_type_to_data_type(op->get_input_element_type(portIndex));
if (inputDataType == cldnn::data_types::i64) {
// GPU primitive does not support i64 inputs,
// so we need additional reorders to convert them to i32
auto reorderPrimName = inputs[portIndex].pid + "_" + op->get_friendly_name() + ProgramBuilder::m_preProcessTag;
auto targetFormat = cldnn::format::get_default_format(op->get_input_shape(portIndex).size());
auto preprocessPrim = cldnn::reorder(reorderPrimName,
inputs[portIndex],
targetFormat,
cldnn::data_types::i32);
p.add_primitive(*op, preprocessPrim);
reordered_inputs[portIndex] = cldnn::input_info(reorderPrimName);
} else {
reordered_inputs[portIndex] = inputs[portIndex];
}
}

auto gatherTreePrim = cldnn::gather_tree(layerName,
reordered_inputs[0],
reordered_inputs[1],
reordered_inputs[2],
reordered_inputs[3]);
inputs[0],
inputs[1],
inputs[2],
inputs[3]);

p.add_primitive(*op, gatherTreePrim);
}
Expand Down
25 changes: 0 additions & 25 deletions src/plugins/intel_gpu/src/plugin/ops/gather.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ void CreateGatherOpBase(ProgramBuilder& p, const std::shared_ptr<T>& op, const i
ov::Shape out_shape = is_static ? op->get_output_shape(0) : ov::Shape{};

// Update output_shape in case of scalar indice
bool need_reshape = false;
auto out_shape_original = out_shape;

// WA for NMS->Gather construction. NMS fills part of the output blob by the -1 if these values
Expand All @@ -55,9 +54,6 @@ void CreateGatherOpBase(ProgramBuilder& p, const std::shared_ptr<T>& op, const i

// Set layer name for Gather
auto reshapeName = layerName + "";
if (need_reshape) {
layerName = layerName + "_reshape_output";
}

// Check if Gather could be converted to other primitive
const auto input_shape = op->get_input_partial_shape(0);
Expand Down Expand Up @@ -140,27 +136,6 @@ void CreateGatherOpBase(ProgramBuilder& p, const std::shared_ptr<T>& op, const i
p.add_primitive(*op, gatherPrim);
}
}

// Add reorder and reshape for scalar indice
if (need_reshape) {
auto input = inputs[0];
input.pid = layerName;

auto targetFormat = cldnn::format::get_default_format(out_shape_original.size());
if (targetFormat.value != cldnn::format::get_default_format(out_shape.size()).value) {
auto reorderName = layerName + "_cldnn_in_reorder";
auto targetDatatype = cldnn::element_type_to_data_type(op->get_input_element_type(0));
auto reorderPrim = cldnn::reorder(reorderName,
input,
targetFormat,
targetDatatype);
p.add_primitive(*op, reorderPrim);
input.pid = reorderName;
}

auto reshapePrim = cldnn::reshape(reshapeName, input, tensor_from_dims(out_shape_original));
p.add_primitive(*op, reshapePrim);
}
}

static void CreateGatherOp(ProgramBuilder& p, const std::shared_ptr<ov::op::v1::Gather>& op) {
Expand Down
35 changes: 0 additions & 35 deletions src/plugins/intel_gpu/src/plugin/program_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,41 +329,6 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptr<cldnn::pr
m_topology->add_primitive(prim);
}

bool ProgramBuilder::requires_new_shape_infer(const std::shared_ptr<ov::Node>& op) const {
if (op->is_dynamic()) {
return true;
}

if (ov::is_type<ov::op::v5::Loop>(op)) {
const auto body_function = std::static_pointer_cast<ov::op::v5::Loop>(op)->get_function();
if (body_function->is_dynamic())
return true;
}

if (ov::is_type<ov::op::v5::LSTMSequence>(op) || ov::is_type<ov::op::v4::LSTMCell>(op)) {
return true;
}
// When input node has dynamic shape with 4 dimension, this function return false
// because op.is_dynamic() which only checks input shapes return false.
// So, in the case of input data, we need to check output shape.
for (size_t i = 0; i < op->get_output_size(); i++) {
if (op->get_output_partial_shape(i).is_dynamic())
return true;
}

for (size_t i = 0; i < op->get_output_size(); i++) {
if (op->get_output_partial_shape(i).size() > 6)
return true;
}

for (size_t i = 0; i < op->get_input_size(); i++) {
if (op->get_input_partial_shape(i).size() > 6)
return true;
}

return false;
}

int64_t ProgramBuilder::get_parameter_index(const std::shared_ptr<ov::op::v0::Parameter>& parameter) const {
return m_model->get_parameter_index(parameter);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ TEST_P(skip_permute_at_runtime_test, runtime_skip) {
reorder("reorder", input_info("permute"), format::get_default_format(rank), data_types::f32));

ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));

network network(engine, topology, config);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ TEST_P(skip_scatter_update_at_runtime_test, runtime_skip) {
auto update2_zero_layout = layout{ov::PartialShape{0,16}, data_types::f16, format::bfyx};

ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));

cldnn::network::ptr network = nullptr;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ TEST(update_shape_test, ocl_impl_in_shapeof_subgraph) {
gather_nonzero("gather_nonzero", input_info("broadcast1"), input_info("count_nonzero")),
broadcast("broadcast2", input_info("gather_nonzero"), input_info("shape_of"), {}, ov::op::BroadcastType::BIDIRECTIONAL));
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));

network network(engine, topology, config);
network.set_input_data("input", input_mem);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -624,7 +624,6 @@ TEST_P(fc_fp16_eltwise_prod_unfused_dynamic, basic) {
);

bool is_dynamic = true;
cfg_not_fused.set_property(ov::intel_gpu::allow_new_shape_infer(is_dynamic));
tolerance = 0.5f;
execute(p, false, is_dynamic);
}
Expand Down Expand Up @@ -700,7 +699,6 @@ TEST_P(fc_compressed_int8_bias_prod_unfused_dynamic_onednn, basic) {
);

bool is_dynamic = true;
cfg_not_fused.set_property(ov::intel_gpu::allow_new_shape_infer(is_dynamic));
cfg_not_fused.set_property(ov::hint::dynamic_quantization_group_size(0));
tolerance = 1.0f;
execute(p, false, is_dynamic);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,6 @@ TEST(kernels_cache, reuse_kernels_property) {
reorder("output", input_info("sum"), {{3, 2}, data_types::f16, format::bfyx}));

ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::hint::enable_kernels_reuse(true));
auto prog = program::build_program(engine, topology, config, false, false);
auto& cache = prog->get_kernels_cache();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,6 @@ TEST(post_optimize_weights, onednn_group_conv_weights_reorder_test) {

ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));

if (engine.get_device_info().supports_immad) {
ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, std::string(""), impl_types::onednn };
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -835,7 +835,6 @@ TEST(prepare_buffer_fusing, in_place_crop_dynamic_reshape_unsqueeze) {
);

auto config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));
network network(engine, topology, config);

Expand Down Expand Up @@ -905,7 +904,6 @@ TEST(prepare_buffer_fusing, in_place_crop_dynamic_reshape_squeeze_crop_axis) {
);

auto config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));
network network(engine, topology, config);

Expand Down Expand Up @@ -987,7 +985,6 @@ TEST(prepare_buffer_fusing, in_place_crop_dynamic_split_lengths) {
);

auto config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));
network network(engine, topology, config);

Expand Down Expand Up @@ -1069,7 +1066,6 @@ TEST(prepare_buffer_fusing, in_place_crop_dynamic_mvn) {
);

auto config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));
network network(engine, topology, config);

Expand Down
Loading

0 comments on commit a4e6f43

Please sign in to comment.