Skip to content

Commit

Permalink
enable half in distributed matrix/vector/pgm/gmres
Browse files Browse the repository at this point in the history
  • Loading branch information
yhmtsai committed Jan 2, 2025
1 parent bed8415 commit f50a7d1
Show file tree
Hide file tree
Showing 16 changed files with 229 additions and 257 deletions.
4 changes: 2 additions & 2 deletions core/distributed/vector_cache.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

Expand Down Expand Up @@ -48,7 +48,7 @@ void VectorCache<ValueType>::init_from(


#define GKO_DECLARE_VECTOR_CACHE(_type) class VectorCache<_type>
GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE_BASE(GKO_DECLARE_VECTOR_CACHE);
GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_VECTOR_CACHE);


} // namespace detail
Expand Down
269 changes: 130 additions & 139 deletions core/multigrid/pgm.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

Expand Down Expand Up @@ -389,147 +389,138 @@ void Pgm<ValueType, IndexType>::generate()
#if GINKGO_BUILD_MPI
if (std::dynamic_pointer_cast<
const experimental::distributed::DistributedBase>(system_matrix_)) {
if constexpr (std::is_same_v<remove_complex<ValueType>, half>) {
GKO_NOT_SUPPORTED(nullptr);
} else {
auto convert_fine_op = [&](auto matrix) {
using global_index_type = typename std::decay_t<
decltype(*matrix)>::result_type::global_index_type;
auto exec = as<LinOp>(matrix)->get_executor();
auto comm =
as<experimental::distributed::DistributedBase>(matrix)
->get_communicator();
auto fine = share(
experimental::distributed::
Matrix<ValueType, IndexType, global_index_type>::create(
exec, comm,
matrix::Csr<ValueType, IndexType>::create(exec),
matrix::Csr<ValueType, IndexType>::create(exec)));
matrix->convert_to(fine);
this->set_fine_op(fine);
};
auto setup_fine_op = [&](auto matrix) {
// Only support csr matrix currently.
auto local_csr = std::dynamic_pointer_cast<const csr_type>(
matrix->get_local_matrix());
auto non_local_csr = std::dynamic_pointer_cast<const csr_type>(
matrix->get_non_local_matrix());
// If system matrix is not csr or need sorting, generate the
// csr.
if (!parameters_.skip_sorting || !local_csr || !non_local_csr) {
using global_index_type = typename std::decay_t<
decltype(*matrix)>::global_index_type;
convert_fine_op(
as<ConvertibleTo<experimental::distributed::Matrix<
ValueType, IndexType, global_index_type>>>(matrix));
}
};

using fst_mtx_type =
experimental::distributed::Matrix<ValueType, IndexType,
IndexType>;
using snd_mtx_type =
experimental::distributed::Matrix<ValueType, IndexType, int64>;
// setup the fine op using Csr with current ValueType
// we do not use dispatcher run in the first place because we have
// the fallback option for that.
if (auto obj = std::dynamic_pointer_cast<const fst_mtx_type>(
system_matrix_)) {
setup_fine_op(obj);
} else if (auto obj = std::dynamic_pointer_cast<const snd_mtx_type>(
system_matrix_)) {
setup_fine_op(obj);
} else {
// handle other ValueTypes.
run<ConvertibleTo, fst_mtx_type, snd_mtx_type>(system_matrix_,
convert_fine_op);
}

auto distributed_setup = [&](auto matrix) {
auto exec = gko::as<LinOp>(matrix)->get_executor();
auto comm =
gko::as<experimental::distributed::DistributedBase>(matrix)
->get_communicator();
auto num_rank = comm.size();
auto pgm_local_op =
gko::as<const csr_type>(matrix->get_local_matrix());
auto result = this->generate_local(pgm_local_op);

auto non_local_csr =
as<const csr_type>(matrix->get_non_local_matrix());
auto non_local_size = non_local_csr->get_size()[1];
array<IndexType> non_local_agg(exec, non_local_size);
// get agg information (prolong_row_gather row idx)
communicate(matrix, agg_, non_local_agg);
// generate non_local_col_map
non_local_agg.set_executor(exec->get_master());
array<IndexType> non_local_col_map(exec->get_master(),
non_local_size);
// add additional entry in tail such that the offset easily
// handle it.
array<IndexType> renumber(exec->get_master(),
non_local_size + 1);
auto recv_offsets = matrix->recv_offsets_;
generate_non_local_map(recv_offsets, non_local_agg,
non_local_col_map, renumber);

// get new recv_size and recv_offsets
std::vector<experimental::distributed::comm_index_type>
new_recv_size(num_rank);
std::vector<experimental::distributed::comm_index_type>
new_recv_offsets(num_rank + 1);
array<IndexType> new_recv_gather_idxs(exec->get_master());
compute_communication(recv_offsets, non_local_agg, renumber,
new_recv_size, new_recv_offsets,
new_recv_gather_idxs);

non_local_col_map.set_executor(exec);
IndexType non_local_num_agg = new_recv_gather_idxs.get_size();
// build csr from row and col map
// unlike non-distributed version, generate_coarse uses
// different row and col maps.
auto result_non_local_csr = generate_coarse(
exec, non_local_csr.get(),
static_cast<IndexType>(std::get<1>(result)->get_size()[0]),
agg_, non_local_num_agg, non_local_col_map);
// use local and non-local to build coarse matrix
// also restriction and prolongation (Local-only-global matrix)
auto coarse_size =
static_cast<int64>(std::get<1>(result)->get_size()[0]);
comm.all_reduce(exec->get_master(), &coarse_size, 1, MPI_SUM);
new_recv_gather_idxs.set_executor(exec);

// setup the generated linop.
auto convert_fine_op = [&](auto matrix) {
using global_index_type = typename std::decay_t<
decltype(*matrix)>::result_type::global_index_type;
auto exec = as<LinOp>(matrix)->get_executor();
auto comm = as<experimental::distributed::DistributedBase>(matrix)
->get_communicator();
auto fine = share(
experimental::distributed::
Matrix<ValueType, IndexType, global_index_type>::create(
exec, comm,
matrix::Csr<ValueType, IndexType>::create(exec),
matrix::Csr<ValueType, IndexType>::create(exec)));
matrix->convert_to(fine);
this->set_fine_op(fine);
};
auto setup_fine_op = [&](auto matrix) {
// Only support csr matrix currently.
auto local_csr = std::dynamic_pointer_cast<const csr_type>(
matrix->get_local_matrix());
auto non_local_csr = std::dynamic_pointer_cast<const csr_type>(
matrix->get_non_local_matrix());
// If system matrix is not csr or need sorting, generate the
// csr.
if (!parameters_.skip_sorting || !local_csr || !non_local_csr) {
using global_index_type =
typename std::decay_t<decltype(*matrix)>::global_index_type;
auto coarse = share(
experimental::distributed::
Matrix<ValueType, IndexType, global_index_type>::create(
exec, comm, gko::dim<2>(coarse_size, coarse_size),
std::get<1>(result), result_non_local_csr,
new_recv_size, new_recv_offsets,
new_recv_gather_idxs));
auto restrict_op = share(
experimental::distributed::
Matrix<ValueType, IndexType, global_index_type>::create(
exec, comm,
dim<2>(coarse_size,
gko::as<LinOp>(matrix)->get_size()[0]),
std::get<2>(result)));
auto prolong_op = share(
experimental::distributed::
Matrix<ValueType, IndexType, global_index_type>::create(
exec, comm,
dim<2>(gko::as<LinOp>(matrix)->get_size()[0],
coarse_size),
std::get<0>(result)));
this->set_multigrid_level(prolong_op, coarse, restrict_op);
};

// the fine op is using csr with the current ValueType
run<fst_mtx_type, snd_mtx_type>(this->get_fine_op(),
distributed_setup);
convert_fine_op(
as<ConvertibleTo<experimental::distributed::Matrix<
ValueType, IndexType, global_index_type>>>(matrix));
}
};

using fst_mtx_type =
experimental::distributed::Matrix<ValueType, IndexType, IndexType>;
using snd_mtx_type =
experimental::distributed::Matrix<ValueType, IndexType, int64>;
// setup the fine op using Csr with current ValueType
// we do not use dispatcher run in the first place because we have
// the fallback option for that.
if (auto obj =
std::dynamic_pointer_cast<const fst_mtx_type>(system_matrix_)) {
setup_fine_op(obj);
} else if (auto obj = std::dynamic_pointer_cast<const snd_mtx_type>(
system_matrix_)) {
setup_fine_op(obj);
} else {
// handle other ValueTypes.
run<ConvertibleTo, fst_mtx_type, snd_mtx_type>(system_matrix_,
convert_fine_op);
}

auto distributed_setup = [&](auto matrix) {
auto exec = gko::as<LinOp>(matrix)->get_executor();
auto comm =
gko::as<experimental::distributed::DistributedBase>(matrix)
->get_communicator();
auto num_rank = comm.size();
auto pgm_local_op =
gko::as<const csr_type>(matrix->get_local_matrix());
auto result = this->generate_local(pgm_local_op);

auto non_local_csr =
as<const csr_type>(matrix->get_non_local_matrix());
auto non_local_size = non_local_csr->get_size()[1];
array<IndexType> non_local_agg(exec, non_local_size);
// get agg information (prolong_row_gather row idx)
communicate(matrix, agg_, non_local_agg);
// generate non_local_col_map
non_local_agg.set_executor(exec->get_master());
array<IndexType> non_local_col_map(exec->get_master(),
non_local_size);
// add additional entry in tail such that the offset easily
// handle it.
array<IndexType> renumber(exec->get_master(), non_local_size + 1);
auto recv_offsets = matrix->recv_offsets_;
generate_non_local_map(recv_offsets, non_local_agg,
non_local_col_map, renumber);

// get new recv_size and recv_offsets
std::vector<experimental::distributed::comm_index_type>
new_recv_size(num_rank);
std::vector<experimental::distributed::comm_index_type>
new_recv_offsets(num_rank + 1);
array<IndexType> new_recv_gather_idxs(exec->get_master());
compute_communication(recv_offsets, non_local_agg, renumber,
new_recv_size, new_recv_offsets,
new_recv_gather_idxs);

non_local_col_map.set_executor(exec);
IndexType non_local_num_agg = new_recv_gather_idxs.get_size();
// build csr from row and col map
// unlike non-distributed version, generate_coarse uses
// different row and col maps.
auto result_non_local_csr = generate_coarse(
exec, non_local_csr.get(),
static_cast<IndexType>(std::get<1>(result)->get_size()[0]),
agg_, non_local_num_agg, non_local_col_map);
// use local and non-local to build coarse matrix
// also restriction and prolongation (Local-only-global matrix)
auto coarse_size =
static_cast<int64>(std::get<1>(result)->get_size()[0]);
comm.all_reduce(exec->get_master(), &coarse_size, 1, MPI_SUM);
new_recv_gather_idxs.set_executor(exec);

// setup the generated linop.
using global_index_type =
typename std::decay_t<decltype(*matrix)>::global_index_type;
auto coarse = share(
experimental::distributed::
Matrix<ValueType, IndexType, global_index_type>::create(
exec, comm, gko::dim<2>(coarse_size, coarse_size),
std::get<1>(result), result_non_local_csr,
new_recv_size, new_recv_offsets, new_recv_gather_idxs));
auto restrict_op = share(
experimental::distributed::
Matrix<ValueType, IndexType, global_index_type>::create(
exec, comm,
dim<2>(coarse_size,
gko::as<LinOp>(matrix)->get_size()[0]),
std::get<2>(result)));
auto prolong_op = share(
experimental::distributed::
Matrix<ValueType, IndexType, global_index_type>::create(
exec, comm,
dim<2>(gko::as<LinOp>(matrix)->get_size()[0],
coarse_size),
std::get<0>(result)));
this->set_multigrid_level(prolong_op, coarse, restrict_op);
};

// the fine op is using csr with the current ValueType
run<fst_mtx_type, snd_mtx_type>(this->get_fine_op(), distributed_setup);
} else
#endif // GINKGO_BUILD_MPI
{
Expand Down
7 changes: 4 additions & 3 deletions core/solver/gmres.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

Expand Down Expand Up @@ -196,17 +196,18 @@ void finish_reduce(matrix::Dense<ValueType>* hessenberg_iter,
auto hessenberg_reduce = hessenberg_iter->create_submatrix(
span{0, restart_iter + 1}, span{0, num_rhs});
int message_size = static_cast<int>((restart_iter + 1) * num_rhs);
auto sum_op = gko::experimental::mpi::sum<ValueType>();
if (experimental::mpi::requires_host_buffer(exec, comm)) {
::gko::detail::DenseCache<ValueType> host_reduction_buffer;
host_reduction_buffer.init(exec->get_master(),
hessenberg_reduce->get_size());
host_reduction_buffer->copy_from(hessenberg_reduce);
comm.all_reduce(exec->get_master(), host_reduction_buffer->get_values(),
message_size, MPI_SUM);
message_size, sum_op.get());
hessenberg_reduce->copy_from(host_reduction_buffer.get());
} else {
comm.all_reduce(exec, hessenberg_reduce->get_values(), message_size,
MPI_SUM);
sum_op.get());
}
}
#endif
Expand Down
4 changes: 2 additions & 2 deletions core/test/mpi/distributed/matrix.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

Expand Down Expand Up @@ -177,7 +177,7 @@ class MatrixBuilder : public ::testing::Test {
gko::experimental::mpi::communicator comm;
};

TYPED_TEST_SUITE(MatrixBuilder, gko::test::ValueLocalGlobalIndexTypesBase,
TYPED_TEST_SUITE(MatrixBuilder, gko::test::ValueLocalGlobalIndexTypes,
TupleTypenameNameGenerator);


Expand Down
4 changes: 2 additions & 2 deletions core/test/mpi/distributed/preconditioner/schwarz.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

Expand Down Expand Up @@ -65,7 +65,7 @@ class SchwarzFactory : public ::testing::Test {
std::shared_ptr<Mtx> mtx;
};

TYPED_TEST_SUITE(SchwarzFactory, gko::test::ValueLocalGlobalIndexTypesBase,
TYPED_TEST_SUITE(SchwarzFactory, gko::test::ValueLocalGlobalIndexTypes,
TupleTypenameNameGenerator);


Expand Down
Loading

0 comments on commit f50a7d1

Please sign in to comment.