From b7e5bc087ca033085bd904afac8cb183031c2eea Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Sat, 11 Jan 2025 19:17:50 +0100 Subject: [PATCH 1/6] add factorization validation kernel --- .../factorization/factorization_kernels.cpp | 12 +++- core/device_hooks/common_kernels.inc.cpp | 4 +- core/factorization/factorization_kernels.hpp | 13 +++- .../factorization_kernels.dp.cpp | 12 +++- omp/factorization/factorization_kernels.cpp | 66 ++++++++++++++++++- .../factorization/factorization_kernels.cpp | 61 ++++++++++++++++- reference/test/factorization/lu_kernels.cpp | 63 +++++++++++++++++- 7 files changed, 223 insertions(+), 8 deletions(-) diff --git a/common/cuda_hip/factorization/factorization_kernels.cpp b/common/cuda_hip/factorization/factorization_kernels.cpp index f26ef668d34..758a91bd71d 100644 --- a/common/cuda_hip/factorization/factorization_kernels.cpp +++ b/common/cuda_hip/factorization/factorization_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -487,6 +487,16 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FACTORIZATION_INITIALIZE_L_KERNEL); +template +void symbolic_validate(std::shared_ptr exec, + const matrix::Csr* system_matrix, + const matrix::Csr* factors, + bool& valid) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FACTORIZATION_SYMBOLIC_VALIDATE_KERNEL); + + } // namespace factorization } // namespace GKO_DEVICE_NAMESPACE } // namespace kernels diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index 0240dabc7e4..034bc73e388 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -948,6 +948,8 @@ GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FACTORIZATION_INITIALIZE_L_U_KERNEL); GKO_STUB_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FACTORIZATION_INITIALIZE_ROW_PTRS_L_KERNEL); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FACTORIZATION_INITIALIZE_L_KERNEL); +GKO_STUB_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FACTORIZATION_SYMBOLIC_VALIDATE_KERNEL); } // namespace factorization diff --git a/core/factorization/factorization_kernels.hpp b/core/factorization/factorization_kernels.hpp index bab3dd16bd2..fdfca1d2427 100644 --- a/core/factorization/factorization_kernels.hpp +++ b/core/factorization/factorization_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -52,6 +52,13 @@ namespace kernels { matrix::Csr* l_factor, \ bool diag_sqrt) +#define GKO_DECLARE_FACTORIZATION_SYMBOLIC_VALIDATE_KERNEL(ValueType, \ + IndexType) \ + void symbolic_validate( \ + std::shared_ptr exec, \ + const matrix::Csr* system_matrix, \ + const matrix::Csr* factors, bool& valid) + #define GKO_DECLARE_ALL_AS_TEMPLATES \ template \ @@ -66,7 +73,9 @@ namespace kernels { GKO_DECLARE_FACTORIZATION_INITIALIZE_ROW_PTRS_L_KERNEL(ValueType, \ IndexType); \ template \ - GKO_DECLARE_FACTORIZATION_INITIALIZE_L_KERNEL(ValueType, IndexType) + GKO_DECLARE_FACTORIZATION_INITIALIZE_L_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FACTORIZATION_SYMBOLIC_VALIDATE_KERNEL(ValueType, IndexType) GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(factorization, diff --git a/dpcpp/factorization/factorization_kernels.dp.cpp b/dpcpp/factorization/factorization_kernels.dp.cpp index 070bd8f86ee..18749bbf886 100644 --- a/dpcpp/factorization/factorization_kernels.dp.cpp +++ b/dpcpp/factorization/factorization_kernels.dp.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -590,6 +590,16 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FACTORIZATION_INITIALIZE_L_KERNEL); +template +void symbolic_validate(std::shared_ptr exec, + const matrix::Csr* system_matrix, + const matrix::Csr* factors, + bool& valid) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FACTORIZATION_SYMBOLIC_VALIDATE_KERNEL); + + } // namespace factorization } // namespace dpcpp } // namespace kernels diff --git a/omp/factorization/factorization_kernels.cpp b/omp/factorization/factorization_kernels.cpp index e7b66f6f887..a6a59f7dbc3 100644 --- a/omp/factorization/factorization_kernels.cpp +++ b/omp/factorization/factorization_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -10,6 +10,7 @@ #include #include +#include "core/base/allocator.hpp" #include "core/components/prefix_sum_kernels.hpp" #include "core/matrix/csr_builder.hpp" #include "omp/factorization/factorization_helpers.hpp" @@ -291,6 +292,69 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FACTORIZATION_INITIALIZE_L_KERNEL); +template +bool symbolic_validate_impl(std::shared_ptr exec, + const IndexType* row_ptrs, const IndexType* cols, + const IndexType* factor_row_ptrs, + const IndexType* factor_cols, IndexType size) +{ + unordered_set columns(exec); + bool valid = true; +#pragma omp parallel for firstprivate(columns) reduction(&& : valid) + for (IndexType row = 0; row < size; row++) { + const auto in_begin = cols + row_ptrs[row]; + const auto in_end = cols + row_ptrs[row + 1]; + const auto factor_begin = factor_cols + factor_row_ptrs[row]; + const auto factor_end = factor_cols + factor_row_ptrs[row + 1]; + if (!valid) { + continue; + } + columns.clear(); + // the factor needs to contain the original matrix + // plus the diagonal if that was missing + columns.insert(in_begin, in_end); + columns.insert(row); + for (auto col_it = factor_begin; col_it < factor_end; ++col_it) { + const auto col = *col_it; + if (col >= row) { + break; + } + const auto dep_begin = factor_cols + factor_row_ptrs[col]; + const auto dep_end = factor_cols + factor_row_ptrs[col + 1]; + // insert the upper triangular part of the row + const auto dep_diag = std::find(dep_begin, dep_end, col); + columns.insert(dep_diag, dep_end); + } + // the factor should contain exactly these columns, no more + if (factor_end - factor_begin != columns.size()) { + valid = false; + } + for (auto col_it = factor_begin; col_it < factor_end; ++col_it) { + if (columns.find(*col_it) == columns.end()) { + valid = false; + } + } + } + return valid; +} + +template +void symbolic_validate(std::shared_ptr exec, + const matrix::Csr* system_matrix, + const matrix::Csr* factors, + bool& valid) +{ + valid = symbolic_validate_impl( + exec, system_matrix->get_const_row_ptrs(), + system_matrix->get_const_col_idxs(), factors->get_const_row_ptrs(), + factors->get_const_col_idxs(), + static_cast(system_matrix->get_size()[0])); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FACTORIZATION_SYMBOLIC_VALIDATE_KERNEL); + + } // namespace factorization } // namespace omp } // namespace kernels diff --git a/reference/factorization/factorization_kernels.cpp b/reference/factorization/factorization_kernels.cpp index 99b522ffba9..6388f62adcc 100644 --- a/reference/factorization/factorization_kernels.cpp +++ b/reference/factorization/factorization_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -10,6 +10,7 @@ #include #include +#include "core/base/allocator.hpp" #include "core/components/prefix_sum_kernels.hpp" #include "core/matrix/csr_builder.hpp" #include "reference/factorization/factorization_helpers.hpp" @@ -231,6 +232,64 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FACTORIZATION_INITIALIZE_L_KERNEL); +template +bool symbolic_validate_impl(std::shared_ptr exec, + const IndexType* row_ptrs, const IndexType* cols, + const IndexType* factor_row_ptrs, + const IndexType* factor_cols, IndexType size) +{ + unordered_set columns(exec); + for (IndexType row = 0; row < size; row++) { + const auto in_begin = cols + row_ptrs[row]; + const auto in_end = cols + row_ptrs[row + 1]; + const auto factor_begin = factor_cols + factor_row_ptrs[row]; + const auto factor_end = factor_cols + factor_row_ptrs[row + 1]; + columns.clear(); + // the factor needs to contain the original matrix + // plus the diagonal if that was missing + columns.insert(in_begin, in_end); + columns.insert(row); + for (auto col_it = factor_begin; col_it < factor_end; ++col_it) { + const auto col = *col_it; + if (col >= row) { + break; + } + const auto dep_begin = factor_cols + factor_row_ptrs[col]; + const auto dep_end = factor_cols + factor_row_ptrs[col + 1]; + // insert the upper triangular part of the row + const auto dep_diag = std::find(dep_begin, dep_end, col); + columns.insert(dep_diag, dep_end); + } + // the factor should contain exactly these columns, no more + if (factor_end - factor_begin != columns.size()) { + return false; + } + for (auto col_it = factor_begin; col_it < factor_end; ++col_it) { + if (columns.find(*col_it) == columns.end()) { + return false; + } + } + } + return true; +} + +template +void symbolic_validate(std::shared_ptr exec, + const matrix::Csr* system_matrix, + const matrix::Csr* factors, + bool& valid) +{ + valid = symbolic_validate_impl( + exec, system_matrix->get_const_row_ptrs(), + system_matrix->get_const_col_idxs(), factors->get_const_row_ptrs(), + factors->get_const_col_idxs(), + static_cast(system_matrix->get_size()[0])); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FACTORIZATION_SYMBOLIC_VALIDATE_KERNEL); + + } // namespace factorization } // namespace reference } // namespace kernels diff --git a/reference/test/factorization/lu_kernels.cpp b/reference/test/factorization/lu_kernels.cpp index 1ea77665f69..a0358a6f23d 100644 --- a/reference/test/factorization/lu_kernels.cpp +++ b/reference/test/factorization/lu_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -20,6 +20,7 @@ #include "core/components/prefix_sum_kernels.hpp" #include "core/factorization/cholesky_kernels.hpp" #include "core/factorization/elimination_forest.hpp" +#include "core/factorization/factorization_kernels.hpp" #include "core/factorization/symbolic.hpp" #include "core/matrix/csr_kernels.hpp" #include "core/matrix/csr_lookup.hpp" @@ -329,3 +330,63 @@ TYPED_TEST(Lu, FactorizeWithKnownSparsityWorks) ASSERT_EQ(lu->get_diagonal(), nullptr); }); } + + +TYPED_TEST(Lu, ValidateValidFactors) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + this->forall_matrices([this] { + bool valid = false; + + gko::kernels::reference::factorization::symbolic_validate( + this->ref, this->mtx.get(), this->mtx_lu.get(), valid); + + ASSERT_TRUE(valid); + }); +} + + +TYPED_TEST(Lu, ValidateInvalidFactorsMissing) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + this->forall_matrices([this] { + bool valid = false; + gko::matrix_data data; + this->mtx_lu->write(data); + // delete a random entry somewhere in the middle of the matrix + data.nonzeros.erase(data.nonzeros.begin() + + data.nonzeros.size() * 3 / 4); + this->mtx_lu->read(data); + + gko::kernels::reference::factorization::symbolic_validate( + this->ref, this->mtx.get(), this->mtx_lu.get(), valid); + + ASSERT_FALSE(valid); + }); +} + + +TYPED_TEST(Lu, ValidateInvalidFactorsExtra) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + this->forall_matrices([this] { + bool valid = false; + gko::matrix_data data; + this->mtx_lu->write(data); + const auto it = std::adjacent_find( + data.nonzeros.begin() + data.nonzeros.size() / 5, + data.nonzeros.end(), [](auto a, auto b) { + return a.row == b.row && a.column < b.column - 1; + }); + data.nonzeros.insert(it, {it->row, it->column + 1, it->value}); + this->mtx_lu->read(data); + + gko::kernels::reference::factorization::symbolic_validate( + this->ref, this->mtx.get(), this->mtx_lu.get(), valid); + + ASSERT_FALSE(valid); + }); +} From 40a2dd711f7b8dce5cfb8b044a9292d7c4f4b108 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Sun, 12 Jan 2025 11:10:55 +0100 Subject: [PATCH 2/6] extract csr lookup creation --- core/CMakeLists.txt | 1 + core/factorization/cholesky.cpp | 35 ++----- core/factorization/ic.cpp | 33 ++----- core/factorization/ilu.cpp | 32 ++----- core/factorization/lu.cpp | 35 ++----- core/factorization/symbolic.cpp | 27 +----- core/matrix/csr_lookup.cpp | 52 +++++++++++ core/matrix/csr_lookup.hpp | 23 ++++- .../test/factorization/cholesky_kernels.cpp | 42 +++------ test/factorization/cholesky_kernels.cpp | 92 +++++++------------ test/factorization/lu_kernels.cpp | 82 +++++++---------- 11 files changed, 194 insertions(+), 260 deletions(-) create mode 100644 core/matrix/csr_lookup.cpp diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 7901edf5341..a4e5647540f 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -78,6 +78,7 @@ target_sources(${ginkgo_core} matrix/batch_identity.cpp matrix/coo.cpp matrix/csr.cpp + matrix/csr_lookup.cpp matrix/dense.cpp matrix/diagonal.cpp matrix/ell.cpp diff --git a/core/factorization/cholesky.cpp b/core/factorization/cholesky.cpp index 92d598f0bd7..ce189e3bbdc 100644 --- a/core/factorization/cholesky.cpp +++ b/core/factorization/cholesky.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -27,8 +27,6 @@ namespace { GKO_REGISTER_OPERATION(fill_array, components::fill_array); -GKO_REGISTER_OPERATION(build_lookup_offsets, csr::build_lookup_offsets); -GKO_REGISTER_OPERATION(build_lookup, csr::build_lookup); GKO_REGISTER_OPERATION(forest_from_factor, cholesky::forest_from_factor); GKO_REGISTER_OPERATION(initialize, cholesky::initialize); GKO_REGISTER_OPERATION(factorize, cholesky::factorize); @@ -107,37 +105,24 @@ std::unique_ptr Cholesky::generate_impl( exec->run(make_forest_from_factor(factors.get(), *forest)); } // setup lookup structure on factors - array storage_offsets{exec, num_rows + 1}; - array row_descs{exec, num_rows}; + const auto lookup = matrix::csr::build_lookup(factors.get()); array diag_idxs{exec, num_rows}; array transpose_idxs{exec, factors->get_num_stored_elements()}; - const auto allowed_sparsity = gko::matrix::csr::sparsity_type::bitmap | - gko::matrix::csr::sparsity_type::full | - gko::matrix::csr::sparsity_type::hash; - exec->run(make_build_lookup_offsets( - factors->get_const_row_ptrs(), factors->get_const_col_idxs(), num_rows, - allowed_sparsity, storage_offsets.get_data())); - const auto storage_size = - static_cast(get_element(storage_offsets, num_rows)); - array storage{exec, storage_size}; - exec->run(make_build_lookup( - factors->get_const_row_ptrs(), factors->get_const_col_idxs(), num_rows, - allowed_sparsity, storage_offsets.get_const_data(), - row_descs.get_data(), storage.get_data())); // initialize factors exec->run(make_fill_array(factors->get_values(), factors->get_num_stored_elements(), zero())); - exec->run(make_initialize(mtx.get(), storage_offsets.get_const_data(), - row_descs.get_const_data(), - storage.get_const_data(), diag_idxs.get_data(), - transpose_idxs.get_data(), factors.get())); + exec->run(make_initialize( + mtx.get(), lookup.storage_offsets.get_const_data(), + lookup.row_descs.get_const_data(), lookup.storage.get_const_data(), + diag_idxs.get_data(), transpose_idxs.get_data(), factors.get())); // run numerical factorization array tmp{exec}; exec->run(make_factorize( - storage_offsets.get_const_data(), row_descs.get_const_data(), - storage.get_const_data(), diag_idxs.get_const_data(), - transpose_idxs.get_const_data(), *forest, factors.get(), true, tmp)); + lookup.storage_offsets.get_const_data(), + lookup.row_descs.get_const_data(), lookup.storage.get_const_data(), + diag_idxs.get_const_data(), transpose_idxs.get_const_data(), *forest, + factors.get(), true, tmp)); return factorization_type::create_from_combined_cholesky( std::move(factors)); } diff --git a/core/factorization/ic.cpp b/core/factorization/ic.cpp index bf9d5e7bbf4..46d45ba5f04 100644 --- a/core/factorization/ic.cpp +++ b/core/factorization/ic.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -20,7 +20,6 @@ #include "core/factorization/elimination_forest.hpp" #include "core/factorization/factorization_kernels.hpp" #include "core/factorization/ic_kernels.hpp" -#include "core/matrix/csr_kernels.hpp" #include "core/matrix/csr_lookup.hpp" @@ -38,8 +37,6 @@ GKO_REGISTER_OPERATION(initialize_row_ptrs_l, GKO_REGISTER_OPERATION(initialize_l, factorization::initialize_l); // for gko syncfree implementation GKO_REGISTER_OPERATION(fill_array, components::fill_array); -GKO_REGISTER_OPERATION(build_lookup_offsets, csr::build_lookup_offsets); -GKO_REGISTER_OPERATION(build_lookup, csr::build_lookup); GKO_REGISTER_OPERATION(forest_from_factor, cholesky::forest_from_factor); GKO_REGISTER_OPERATION(initialize, cholesky::initialize); GKO_REGISTER_OPERATION(factorize, cholesky::factorize); @@ -126,39 +123,25 @@ std::unique_ptr> Ic::generate( ic_factorization::make_forest_from_factor(factors.get(), *forest)); // setup lookup structure on factors - array storage_offsets{exec, num_rows + 1}; - array row_descs{exec, num_rows}; + const auto lookup = matrix::csr::build_lookup(factors.get()); array diag_idxs{exec, num_rows}; array transpose_idxs{exec, factors->get_num_stored_elements()}; - const auto allowed_sparsity = gko::matrix::csr::sparsity_type::bitmap | - gko::matrix::csr::sparsity_type::full | - gko::matrix::csr::sparsity_type::hash; - exec->run(ic_factorization::make_build_lookup_offsets( - factors->get_const_row_ptrs(), factors->get_const_col_idxs(), - num_rows, allowed_sparsity, storage_offsets.get_data())); - const auto storage_size = - static_cast(get_element(storage_offsets, num_rows)); - array storage{exec, storage_size}; - exec->run(ic_factorization::make_build_lookup( - factors->get_const_row_ptrs(), factors->get_const_col_idxs(), - num_rows, allowed_sparsity, storage_offsets.get_const_data(), - row_descs.get_data(), storage.get_data())); // initialize factors exec->run(ic_factorization::make_fill_array( factors->get_values(), factors->get_num_stored_elements(), zero())); exec->run(ic_factorization::make_initialize( - local_system_matrix.get(), storage_offsets.get_const_data(), - row_descs.get_const_data(), storage.get_const_data(), + local_system_matrix.get(), lookup.storage_offsets.get_const_data(), + lookup.row_descs.get_const_data(), lookup.storage.get_const_data(), diag_idxs.get_data(), transpose_idxs.get_data(), factors.get())); // run numerical factorization array tmp{exec}; exec->run(ic_factorization::make_factorize( - storage_offsets.get_const_data(), row_descs.get_const_data(), - storage.get_const_data(), diag_idxs.get_const_data(), - transpose_idxs.get_const_data(), *forest, factors.get(), false, - tmp)); + lookup.storage_offsets.get_const_data(), + lookup.row_descs.get_const_data(), lookup.storage.get_const_data(), + diag_idxs.get_const_data(), transpose_idxs.get_const_data(), + *forest, factors.get(), false, tmp)); ic = factors; } else if (std::dynamic_pointer_cast(exec) && !std::dynamic_pointer_cast(exec)) { diff --git a/core/factorization/ilu.cpp b/core/factorization/ilu.cpp index f7703f3d20b..c865a15c261 100644 --- a/core/factorization/ilu.cpp +++ b/core/factorization/ilu.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -19,8 +19,6 @@ #include "core/factorization/factorization_kernels.hpp" #include "core/factorization/ilu_kernels.hpp" #include "core/factorization/lu_kernels.hpp" -#include "core/factorization/par_ilu_kernels.hpp" -#include "core/matrix/csr_kernels.hpp" #include "core/matrix/csr_lookup.hpp" @@ -38,8 +36,6 @@ GKO_REGISTER_OPERATION(initialize_row_ptrs_l_u, GKO_REGISTER_OPERATION(initialize_l_u, factorization::initialize_l_u); // for gko syncfree implementation GKO_REGISTER_OPERATION(fill_array, components::fill_array); -GKO_REGISTER_OPERATION(build_lookup_offsets, csr::build_lookup_offsets); -GKO_REGISTER_OPERATION(build_lookup, csr::build_lookup); GKO_REGISTER_OPERATION(initialize, lu_factorization::initialize); GKO_REGISTER_OPERATION(factorize, lu_factorization::factorize); @@ -117,32 +113,18 @@ std::unique_ptr> Ilu::generate_l_u( factors->set_strategy(factors->get_strategy()); // setup lookup structure on factors - array storage_offsets{exec, num_rows + 1}; - array row_descs{exec, num_rows}; + const auto lookup = matrix::csr::build_lookup(factors.get()); array diag_idxs{exec, num_rows}; - const auto allowed_sparsity = gko::matrix::csr::sparsity_type::bitmap | - gko::matrix::csr::sparsity_type::full | - gko::matrix::csr::sparsity_type::hash; - exec->run(ilu_factorization::make_build_lookup_offsets( - factors->get_const_row_ptrs(), factors->get_const_col_idxs(), - num_rows, allowed_sparsity, storage_offsets.get_data())); - const auto storage_size = - static_cast(get_element(storage_offsets, num_rows)); - array storage{exec, storage_size}; - exec->run(ilu_factorization::make_build_lookup( - factors->get_const_row_ptrs(), factors->get_const_col_idxs(), - num_rows, allowed_sparsity, storage_offsets.get_const_data(), - row_descs.get_data(), storage.get_data())); exec->run(ilu_factorization::make_initialize( - local_system_matrix.get(), storage_offsets.get_const_data(), - row_descs.get_const_data(), storage.get_const_data(), + local_system_matrix.get(), lookup.storage_offsets.get_const_data(), + lookup.row_descs.get_const_data(), lookup.storage.get_const_data(), diag_idxs.get_data(), factors.get())); // run numerical factorization array tmp{exec}; exec->run(ilu_factorization::make_factorize( - storage_offsets.get_const_data(), row_descs.get_const_data(), - storage.get_const_data(), diag_idxs.get_const_data(), factors.get(), - false, tmp)); + lookup.storage_offsets.get_const_data(), + lookup.row_descs.get_const_data(), lookup.storage.get_const_data(), + diag_idxs.get_const_data(), factors.get(), false, tmp)); ilu = factors; } else if (std::dynamic_pointer_cast(exec) && !std::dynamic_pointer_cast(exec)) { diff --git a/core/factorization/lu.cpp b/core/factorization/lu.cpp index 4feb78083d2..7811ae0ba0c 100644 --- a/core/factorization/lu.cpp +++ b/core/factorization/lu.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -10,13 +10,11 @@ #include #include -#include "core/base/array_access.hpp" #include "core/components/fill_array_kernels.hpp" #include "core/config/config_helper.hpp" #include "core/factorization/elimination_forest.hpp" #include "core/factorization/lu_kernels.hpp" #include "core/factorization/symbolic.hpp" -#include "core/matrix/csr_kernels.hpp" #include "core/matrix/csr_lookup.hpp" @@ -27,8 +25,6 @@ namespace { GKO_REGISTER_OPERATION(fill_array, components::fill_array); -GKO_REGISTER_OPERATION(build_lookup_offsets, csr::build_lookup_offsets); -GKO_REGISTER_OPERATION(build_lookup, csr::build_lookup); GKO_REGISTER_OPERATION(initialize, lu_factorization::initialize); GKO_REGISTER_OPERATION(factorize, lu_factorization::factorize); GKO_REGISTER_HOST_OPERATION(symbolic_cholesky, @@ -135,31 +131,18 @@ std::unique_ptr Lu::generate_impl( factors->set_strategy(factors->get_strategy()); } // setup lookup structure on factors - array storage_offsets{exec, num_rows + 1}; - array row_descs{exec, num_rows}; + const auto lookup = matrix::csr::build_lookup(factors.get()); array diag_idxs{exec, num_rows}; - const auto allowed_sparsity = gko::matrix::csr::sparsity_type::bitmap | - gko::matrix::csr::sparsity_type::full | - gko::matrix::csr::sparsity_type::hash; - exec->run(make_build_lookup_offsets( - factors->get_const_row_ptrs(), factors->get_const_col_idxs(), num_rows, - allowed_sparsity, storage_offsets.get_data())); - const auto storage_size = - static_cast(get_element(storage_offsets, num_rows)); - array storage{exec, storage_size}; - exec->run(make_build_lookup( - factors->get_const_row_ptrs(), factors->get_const_col_idxs(), num_rows, - allowed_sparsity, storage_offsets.get_const_data(), - row_descs.get_data(), storage.get_data())); exec->run(make_initialize( - mtx.get(), storage_offsets.get_const_data(), row_descs.get_const_data(), - storage.get_const_data(), diag_idxs.get_data(), factors.get())); + mtx.get(), lookup.storage_offsets.get_const_data(), + lookup.row_descs.get_const_data(), lookup.storage.get_const_data(), + diag_idxs.get_data(), factors.get())); // run numerical factorization array tmp{exec}; - exec->run( - make_factorize(storage_offsets.get_const_data(), - row_descs.get_const_data(), storage.get_const_data(), - diag_idxs.get_const_data(), factors.get(), true, tmp)); + exec->run(make_factorize( + lookup.storage_offsets.get_const_data(), + lookup.row_descs.get_const_data(), lookup.storage.get_const_data(), + diag_idxs.get_const_data(), factors.get(), true, tmp)); return factorization_type::create_from_combined_lu(std::move(factors)); } diff --git a/core/factorization/symbolic.cpp b/core/factorization/symbolic.cpp index 23f6b94cc14..b08602e1a1a 100644 --- a/core/factorization/symbolic.cpp +++ b/core/factorization/symbolic.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -16,7 +16,6 @@ #include "core/factorization/cholesky_kernels.hpp" #include "core/factorization/elimination_forest.hpp" #include "core/factorization/lu_kernels.hpp" -#include "core/matrix/csr_kernels.hpp" #include "core/matrix/csr_lookup.hpp" @@ -27,8 +26,6 @@ namespace { GKO_REGISTER_OPERATION(symbolic_count, cholesky::symbolic_count); GKO_REGISTER_OPERATION(symbolic, cholesky::symbolic_factorize); -GKO_REGISTER_OPERATION(build_lookup_offsets, csr::build_lookup_offsets); -GKO_REGISTER_OPERATION(build_lookup, csr::build_lookup); GKO_REGISTER_OPERATION(prefix_sum_nonnegative, components::prefix_sum_nonnegative); GKO_REGISTER_OPERATION(symbolic_factorize_simple, @@ -115,29 +112,15 @@ void symbolic_lu_near_symm( symbolic_cholesky(symm_mtx.get(), true, symm_factors, forest); } // build lookup structure - array storage_offsets{exec, size[0] + 1}; - array row_descs{exec, size[0]}; + const auto lookup = matrix::csr::build_lookup(symm_factors.get()); array diag_idxs{exec, size[0]}; - const auto allowed_sparsity = gko::matrix::csr::sparsity_type::bitmap | - gko::matrix::csr::sparsity_type::full | - gko::matrix::csr::sparsity_type::hash; - exec->run(make_build_lookup_offsets( - symm_factors->get_const_row_ptrs(), symm_factors->get_const_col_idxs(), - size[0], allowed_sparsity, storage_offsets.get_data())); - const auto storage_size = - static_cast(get_element(storage_offsets, size[0])); - array storage{exec, storage_size}; - exec->run(make_build_lookup( - symm_factors->get_const_row_ptrs(), symm_factors->get_const_col_idxs(), - size[0], allowed_sparsity, storage_offsets.get_const_data(), - row_descs.get_data(), storage.get_data())); // compute "numerical" factorization with 1s and 0s array factor_row_ptrs{exec, size[0] + 1}; exec->run(make_symbolic_factorize_simple( mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(), - storage_offsets.get_const_data(), row_descs.get_const_data(), - storage.get_const_data(), symm_factors.get(), - factor_row_ptrs.get_data())); + lookup.storage_offsets.get_const_data(), + lookup.row_descs.get_const_data(), lookup.storage.get_const_data(), + symm_factors.get(), factor_row_ptrs.get_data())); // build row pointers from nnz exec->run( make_prefix_sum_nonnegative(factor_row_ptrs.get_data(), size[0] + 1)); diff --git a/core/matrix/csr_lookup.cpp b/core/matrix/csr_lookup.cpp new file mode 100644 index 00000000000..4bea0716f9d --- /dev/null +++ b/core/matrix/csr_lookup.cpp @@ -0,0 +1,52 @@ +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +#include "core/matrix/csr_lookup.hpp" + +#include "core/base/array_access.hpp" +#include "core/matrix/csr_kernels.hpp" + +namespace gko { +namespace matrix { +namespace csr { +namespace { + + +GKO_REGISTER_OPERATION(build_lookup_offsets, csr::build_lookup_offsets); +GKO_REGISTER_OPERATION(build_lookup, csr::build_lookup); + + +} // namespace + + +template +lookup_data build_lookup(const Csr* mtx, + sparsity_type allowed_sparsity) +{ + const auto exec = mtx->get_executor(); + const auto size = mtx->get_size()[0]; + lookup_data result{exec, size}; + exec->run(make_build_lookup_offsets( + mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(), size, + allowed_sparsity, result.storage_offsets.get_data())); + const auto storage_size = get_element(result.storage_offsets, size); + result.storage.resize_and_reset(static_cast(storage_size)); + exec->run(make_build_lookup( + mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(), size, + allowed_sparsity, result.storage_offsets.get_const_data(), + result.row_descs.get_data(), result.storage.get_data())); + return result; +} + + +#define GKO_INSTANTIATE_BUILD_LOOKUP(ValueType, IndexType) \ + lookup_data build_lookup(const Csr* mtx, \ + sparsity_type allowed_sparsity) + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_INSTANTIATE_BUILD_LOOKUP); + + +} // namespace csr +} // namespace matrix +} // namespace gko diff --git a/core/matrix/csr_lookup.hpp b/core/matrix/csr_lookup.hpp index 50d9c087b68..2aa18f67061 100644 --- a/core/matrix/csr_lookup.hpp +++ b/core/matrix/csr_lookup.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -327,6 +327,27 @@ struct device_sparsity_lookup { }; +template +struct lookup_data { + explicit lookup_data(std::shared_ptr exec, + size_type size = 0) + : storage_offsets{exec, size + 1}, row_descs{exec, size}, storage{exec} + {} + + gko::array storage_offsets; + gko::array row_descs; + gko::array storage; +}; + + +template +lookup_data build_lookup( + const Csr* mtx, + sparsity_type allowed_sparsity = sparsity_type::full | + sparsity_type::bitmap | + sparsity_type::hash); + + } // namespace csr } // namespace matrix } // namespace gko diff --git a/reference/test/factorization/cholesky_kernels.cpp b/reference/test/factorization/cholesky_kernels.cpp index b4c33d76ab9..630aa2d0406 100644 --- a/reference/test/factorization/cholesky_kernels.cpp +++ b/reference/test/factorization/cholesky_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -42,9 +42,7 @@ class Cholesky : public ::testing::Test { : ref(gko::ReferenceExecutor::create()), tmp{ref}, ref_row_nnz{ref}, - storage_offsets{ref}, - storage{ref}, - row_descs{ref} + lookup{ref} {} std::unique_ptr combined_factor( @@ -103,8 +101,6 @@ class Cholesky : public ::testing::Test { combined->get_row_ptrs()); ref->copy(combined_ref->get_num_stored_elements(), combined_ref->get_const_col_idxs(), combined->get_col_idxs()); - storage_offsets.resize_and_reset(num_rows + 1); - row_descs.resize_and_reset(num_rows); ref_row_nnz.resize_and_reset(num_rows); const auto ref_row_ptrs = l_factor_ref->get_const_row_ptrs(); @@ -113,17 +109,7 @@ class Cholesky : public ::testing::Test { ref_row_ptrs[row + 1] - ref_row_ptrs[row]; } - const auto allowed = gko::matrix::csr::sparsity_type::bitmap | - gko::matrix::csr::sparsity_type::full | - gko::matrix::csr::sparsity_type::hash; - gko::kernels::reference::csr::build_lookup_offsets( - ref, combined->get_const_row_ptrs(), combined->get_const_col_idxs(), - num_rows, allowed, storage_offsets.get_data()); - storage.resize_and_reset(storage_offsets.get_const_data()[num_rows]); - gko::kernels::reference::csr::build_lookup( - ref, combined->get_const_row_ptrs(), combined->get_const_col_idxs(), - num_rows, allowed, storage_offsets.get_const_data(), - row_descs.get_data(), storage.get_data()); + lookup = gko::matrix::csr::build_lookup(combined.get()); } void assert_equal_forests(elimination_forest& lhs, elimination_forest& rhs, @@ -234,9 +220,7 @@ class Cholesky : public ::testing::Test { gko::size_type num_rows; gko::array tmp; gko::array ref_row_nnz; - gko::array storage_offsets; - gko::array storage; - gko::array row_descs; + gko::matrix::csr::lookup_data lookup; std::shared_ptr mtx; std::unique_ptr forest; std::shared_ptr l_factor; @@ -369,9 +353,9 @@ TYPED_TEST(Cholesky, KernelInitializeWorks) gko::kernels::reference::cholesky::initialize( this->ref, this->mtx.get(), - this->storage_offsets.get_const_data(), - this->row_descs.get_const_data(), - this->storage.get_const_data(), diag_idxs.get_data(), + this->lookup.storage_offsets.get_const_data(), + this->lookup.row_descs.get_const_data(), + this->lookup.storage.get_const_data(), diag_idxs.get_data(), transpose_idxs.get_data(), this->combined.get()); GKO_ASSERT_MTX_NEAR(this->mtx, this->combined, 0.0); @@ -413,15 +397,15 @@ TYPED_TEST(Cholesky, KernelFactorizeWorks) gko::array tmp{this->ref}; gko::kernels::reference::cholesky::initialize( this->ref, this->mtx.get(), - this->storage_offsets.get_const_data(), - this->row_descs.get_const_data(), - this->storage.get_const_data(), diag_idxs.get_data(), + this->lookup.storage_offsets.get_const_data(), + this->lookup.row_descs.get_const_data(), + this->lookup.storage.get_const_data(), diag_idxs.get_data(), transpose_idxs.get_data(), this->combined.get()); gko::kernels::reference::cholesky::factorize( - this->ref, this->storage_offsets.get_const_data(), - this->row_descs.get_const_data(), - this->storage.get_const_data(), diag_idxs.get_data(), + this->ref, this->lookup.storage_offsets.get_const_data(), + this->lookup.row_descs.get_const_data(), + this->lookup.storage.get_const_data(), diag_idxs.get_data(), transpose_idxs.get_data(), *this->forest, this->combined.get(), true, tmp); diff --git a/test/factorization/cholesky_kernels.cpp b/test/factorization/cholesky_kernels.cpp index 1b2d187c785..ae2894d3696 100644 --- a/test/factorization/cholesky_kernels.cpp +++ b/test/factorization/cholesky_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -18,7 +18,6 @@ #include "core/components/prefix_sum_kernels.hpp" #include "core/factorization/elimination_forest.hpp" #include "core/factorization/symbolic.hpp" -#include "core/matrix/csr_kernels.hpp" #include "core/matrix/csr_lookup.hpp" #include "core/test/utils.hpp" #include "core/test/utils/assertions.hpp" @@ -255,14 +254,7 @@ class Cholesky : public CommonTestFixture { using elimination_forest = gko::factorization::elimination_forest; - Cholesky() - : storage_offsets{ref}, - dstorage_offsets{exec}, - storage{ref}, - dstorage{exec}, - row_descs{ref}, - drow_descs{exec} - {} + Cholesky() : lookup{ref}, dlookup{exec} {} void initialize_data(const char* mtx_filename, const char* mtx_chol_filename) @@ -280,23 +272,9 @@ class Cholesky : public CommonTestFixture { mtx_chol_data.sort_row_major(); mtx_chol = matrix_type::create(ref); mtx_chol->read(mtx_chol_data); - storage_offsets.resize_and_reset(num_rows + 1); - row_descs.resize_and_reset(num_rows); - - const auto allowed = gko::matrix::csr::sparsity_type::bitmap | - gko::matrix::csr::sparsity_type::full | - gko::matrix::csr::sparsity_type::hash; - gko::kernels::reference::csr::build_lookup_offsets( - ref, mtx_chol->get_const_row_ptrs(), mtx_chol->get_const_col_idxs(), - num_rows, allowed, storage_offsets.get_data()); - storage.resize_and_reset(storage_offsets.get_const_data()[num_rows]); - gko::kernels::reference::csr::build_lookup( - ref, mtx_chol->get_const_row_ptrs(), mtx_chol->get_const_col_idxs(), - num_rows, allowed, storage_offsets.get_const_data(), - row_descs.get_data(), storage.get_data()); - dstorage_offsets = storage_offsets; - dstorage = storage; - drow_descs = row_descs; + lookup = gko::matrix::csr::build_lookup(mtx_chol.get()); + + dlookup = lookup; dmtx_chol = gko::clone(exec, mtx_chol); mtx_chol_sparsity = sparsity_pattern_type::create(ref); mtx_chol_sparsity->copy_from(mtx_chol.get()); @@ -343,12 +321,8 @@ class Cholesky : public CommonTestFixture { std::shared_ptr dmtx_chol; std::unique_ptr dforest; std::shared_ptr dmtx_chol_sparsity; - gko::array storage_offsets; - gko::array dstorage_offsets; - gko::array storage; - gko::array dstorage; - gko::array row_descs; - gko::array drow_descs; + gko::matrix::csr::lookup_data lookup; + gko::matrix::csr::lookup_data dlookup; }; TYPED_TEST_SUITE(Cholesky, Types, PairTypenameNameGenerator); @@ -370,16 +344,17 @@ TYPED_TEST(Cholesky, KernelInitializeIsEquivalentToRef) gko::array dtranspose_idxs{this->exec, nnz}; gko::kernels::reference::cholesky::initialize( - this->ref, this->mtx.get(), this->storage_offsets.get_const_data(), - this->row_descs.get_const_data(), this->storage.get_const_data(), - diag_idxs.get_data(), transpose_idxs.get_data(), - this->mtx_chol.get()); + this->ref, this->mtx.get(), + this->lookup.storage_offsets.get_const_data(), + this->lookup.row_descs.get_const_data(), + this->lookup.storage.get_const_data(), diag_idxs.get_data(), + transpose_idxs.get_data(), this->mtx_chol.get()); gko::kernels::GKO_DEVICE_NAMESPACE::cholesky::initialize( this->exec, this->dmtx.get(), - this->dstorage_offsets.get_const_data(), - this->drow_descs.get_const_data(), this->dstorage.get_const_data(), - ddiag_idxs.get_data(), dtranspose_idxs.get_data(), - this->dmtx_chol.get()); + this->dlookup.storage_offsets.get_const_data(), + this->dlookup.row_descs.get_const_data(), + this->dlookup.storage.get_const_data(), ddiag_idxs.get_data(), + dtranspose_idxs.get_data(), this->dmtx_chol.get()); GKO_ASSERT_MTX_NEAR(this->dmtx_chol, this->dmtx_chol, 0.0); GKO_ASSERT_ARRAY_EQ(diag_idxs, ddiag_idxs); @@ -400,27 +375,30 @@ TYPED_TEST(Cholesky, KernelFactorizeIsEquivalentToRef) gko::array tmp{this->ref}; gko::array dtmp{this->exec}; gko::kernels::reference::cholesky::initialize( - this->ref, this->mtx.get(), this->storage_offsets.get_const_data(), - this->row_descs.get_const_data(), this->storage.get_const_data(), - diag_idxs.get_data(), transpose_idxs.get_data(), - this->mtx_chol.get()); + this->ref, this->mtx.get(), + this->lookup.storage_offsets.get_const_data(), + this->lookup.row_descs.get_const_data(), + this->lookup.storage.get_const_data(), diag_idxs.get_data(), + transpose_idxs.get_data(), this->mtx_chol.get()); gko::kernels::GKO_DEVICE_NAMESPACE::cholesky::initialize( this->exec, this->dmtx.get(), - this->dstorage_offsets.get_const_data(), - this->drow_descs.get_const_data(), this->dstorage.get_const_data(), - ddiag_idxs.get_data(), dtranspose_idxs.get_data(), - this->dmtx_chol.get()); + this->dlookup.storage_offsets.get_const_data(), + this->dlookup.row_descs.get_const_data(), + this->dlookup.storage.get_const_data(), ddiag_idxs.get_data(), + dtranspose_idxs.get_data(), this->dmtx_chol.get()); gko::kernels::reference::cholesky::factorize( - this->ref, this->storage_offsets.get_const_data(), - this->row_descs.get_const_data(), this->storage.get_const_data(), - diag_idxs.get_const_data(), transpose_idxs.get_const_data(), - *this->forest, this->mtx_chol.get(), true, tmp); + this->ref, this->lookup.storage_offsets.get_const_data(), + this->lookup.row_descs.get_const_data(), + this->lookup.storage.get_const_data(), diag_idxs.get_const_data(), + transpose_idxs.get_const_data(), *this->forest, + this->mtx_chol.get(), true, tmp); gko::kernels::GKO_DEVICE_NAMESPACE::cholesky::factorize( - this->exec, this->dstorage_offsets.get_const_data(), - this->drow_descs.get_const_data(), this->dstorage.get_const_data(), - ddiag_idxs.get_const_data(), dtranspose_idxs.get_const_data(), - *this->dforest, this->dmtx_chol.get(), true, dtmp); + this->exec, this->dlookup.storage_offsets.get_const_data(), + this->dlookup.row_descs.get_const_data(), + this->dlookup.storage.get_const_data(), ddiag_idxs.get_const_data(), + dtranspose_idxs.get_const_data(), *this->dforest, + this->dmtx_chol.get(), true, dtmp); GKO_ASSERT_MTX_NEAR(this->mtx_chol, this->dmtx_chol, r::value); diff --git a/test/factorization/lu_kernels.cpp b/test/factorization/lu_kernels.cpp index 4fe974200dc..739e8907cab 100644 --- a/test/factorization/lu_kernels.cpp +++ b/test/factorization/lu_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -44,14 +44,7 @@ class Lu : public CommonTestFixture { using matrix_type = typename factory_type::matrix_type; using sparsity_pattern_type = typename factory_type::sparsity_pattern_type; - Lu() - : storage_offsets{ref}, - dstorage_offsets{exec}, - storage{ref}, - dstorage{exec}, - row_descs{ref}, - drow_descs{exec} - {} + Lu() : lookup{ref}, dlookup{exec} {} void initialize_data(const char* mtx_filename, const char* mtx_lu_filename) { @@ -61,23 +54,8 @@ class Lu : public CommonTestFixture { num_rows = mtx->get_size()[0]; std::ifstream s_mtx_lu{mtx_lu_filename}; mtx_lu = gko::read(s_mtx_lu, ref); - storage_offsets.resize_and_reset(num_rows + 1); - row_descs.resize_and_reset(num_rows); - - const auto allowed = gko::matrix::csr::sparsity_type::bitmap | - gko::matrix::csr::sparsity_type::full | - gko::matrix::csr::sparsity_type::hash; - gko::kernels::reference::csr::build_lookup_offsets( - ref, mtx_lu->get_const_row_ptrs(), mtx_lu->get_const_col_idxs(), - num_rows, allowed, storage_offsets.get_data()); - storage.resize_and_reset(storage_offsets.get_const_data()[num_rows]); - gko::kernels::reference::csr::build_lookup( - ref, mtx_lu->get_const_row_ptrs(), mtx_lu->get_const_col_idxs(), - num_rows, allowed, storage_offsets.get_const_data(), - row_descs.get_data(), storage.get_data()); - dstorage_offsets = storage_offsets; - dstorage = storage; - drow_descs = row_descs; + lookup = gko::matrix::csr::build_lookup(mtx_lu.get()); + dlookup = lookup; dmtx_lu = gko::clone(exec, mtx_lu); mtx_lu_sparsity = sparsity_pattern_type::create(ref); mtx_lu_sparsity->copy_from(mtx_lu); @@ -120,12 +98,8 @@ class Lu : public CommonTestFixture { std::shared_ptr dmtx; std::shared_ptr dmtx_lu; std::shared_ptr dmtx_lu_sparsity; - gko::array storage_offsets; - gko::array dstorage_offsets; - gko::array storage; - gko::array dstorage; - gko::array row_descs; - gko::array drow_descs; + gko::matrix::csr::lookup_data lookup; + gko::matrix::csr::lookup_data dlookup; }; #ifdef GKO_COMPILING_OMP @@ -159,14 +133,17 @@ TYPED_TEST(Lu, KernelInitializeIsEquivalentToRef) gko::array ddiag_idxs{this->exec, this->num_rows}; gko::kernels::reference::lu_factorization::initialize( - this->ref, this->mtx.get(), this->storage_offsets.get_const_data(), - this->row_descs.get_const_data(), this->storage.get_const_data(), - diag_idxs.get_data(), this->mtx_lu.get()); + this->ref, this->mtx.get(), + this->lookup.storage_offsets.get_const_data(), + this->lookup.row_descs.get_const_data(), + this->lookup.storage.get_const_data(), diag_idxs.get_data(), + this->mtx_lu.get()); gko::kernels::GKO_DEVICE_NAMESPACE::lu_factorization::initialize( this->exec, this->dmtx.get(), - this->dstorage_offsets.get_const_data(), - this->drow_descs.get_const_data(), this->dstorage.get_const_data(), - ddiag_idxs.get_data(), this->dmtx_lu.get()); + this->dlookup.storage_offsets.get_const_data(), + this->dlookup.row_descs.get_const_data(), + this->dlookup.storage.get_const_data(), ddiag_idxs.get_data(), + this->dmtx_lu.get()); GKO_ASSERT_MTX_NEAR(this->dmtx_lu, this->dmtx_lu, 0.0); GKO_ASSERT_ARRAY_EQ(diag_idxs, ddiag_idxs); @@ -184,23 +161,28 @@ TYPED_TEST(Lu, KernelFactorizeIsEquivalentToRef) gko::array tmp{this->ref}; gko::array dtmp{this->exec}; gko::kernels::reference::lu_factorization::initialize( - this->ref, this->mtx.get(), this->storage_offsets.get_const_data(), - this->row_descs.get_const_data(), this->storage.get_const_data(), - diag_idxs.get_data(), this->mtx_lu.get()); + this->ref, this->mtx.get(), + this->lookup.storage_offsets.get_const_data(), + this->lookup.row_descs.get_const_data(), + this->lookup.storage.get_const_data(), diag_idxs.get_data(), + this->mtx_lu.get()); gko::kernels::GKO_DEVICE_NAMESPACE::lu_factorization::initialize( this->exec, this->dmtx.get(), - this->dstorage_offsets.get_const_data(), - this->drow_descs.get_const_data(), this->dstorage.get_const_data(), - ddiag_idxs.get_data(), this->dmtx_lu.get()); + this->dlookup.storage_offsets.get_const_data(), + this->dlookup.row_descs.get_const_data(), + this->dlookup.storage.get_const_data(), ddiag_idxs.get_data(), + this->dmtx_lu.get()); gko::kernels::reference::lu_factorization::factorize( - this->ref, this->storage_offsets.get_const_data(), - this->row_descs.get_const_data(), this->storage.get_const_data(), - diag_idxs.get_const_data(), this->mtx_lu.get(), true, tmp); + this->ref, this->lookup.storage_offsets.get_const_data(), + this->lookup.row_descs.get_const_data(), + this->lookup.storage.get_const_data(), diag_idxs.get_const_data(), + this->mtx_lu.get(), true, tmp); gko::kernels::GKO_DEVICE_NAMESPACE::lu_factorization::factorize( - this->exec, this->dstorage_offsets.get_const_data(), - this->drow_descs.get_const_data(), this->dstorage.get_const_data(), - ddiag_idxs.get_const_data(), this->dmtx_lu.get(), true, dtmp); + this->exec, this->dlookup.storage_offsets.get_const_data(), + this->dlookup.row_descs.get_const_data(), + this->dlookup.storage.get_const_data(), ddiag_idxs.get_const_data(), + this->dmtx_lu.get(), true, dtmp); GKO_ASSERT_MTX_NEAR(this->mtx_lu, this->dmtx_lu, r::value); }); From eb24d39e0e28093235d1117db20d2577b3f26c80 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Sun, 12 Jan 2025 12:27:38 +0100 Subject: [PATCH 3/6] add device kernels for validation --- .../factorization/factorization_kernels.cpp | 110 +++++++++++++++++- core/factorization/factorization_kernels.hpp | 5 +- .../factorization_kernels.dp.cpp | 10 +- omp/factorization/factorization_kernels.cpp | 9 +- .../factorization/factorization_kernels.cpp | 9 +- reference/test/factorization/lu_kernels.cpp | 37 +++++- test/factorization/lu_kernels.cpp | 91 +++++++++++++++ 7 files changed, 249 insertions(+), 22 deletions(-) diff --git a/common/cuda_hip/factorization/factorization_kernels.cpp b/common/cuda_hip/factorization/factorization_kernels.cpp index 758a91bd71d..80a07c2a32d 100644 --- a/common/cuda_hip/factorization/factorization_kernels.cpp +++ b/common/cuda_hip/factorization/factorization_kernels.cpp @@ -4,11 +4,14 @@ #include "core/factorization/factorization_kernels.hpp" +#include + #include #include "common/cuda_hip/base/config.hpp" #include "common/cuda_hip/base/math.hpp" #include "common/cuda_hip/base/runtime.hpp" +#include "common/cuda_hip/base/thrust.hpp" #include "common/cuda_hip/base/types.hpp" #include "common/cuda_hip/components/cooperative_groups.hpp" #include "common/cuda_hip/components/intrinsics.hpp" @@ -16,6 +19,7 @@ #include "common/cuda_hip/components/thread_ids.hpp" #include "common/cuda_hip/factorization/factorization_helpers.hpp" #include "core/base/array_access.hpp" +#include "core/components/fill_array_kernels.hpp" #include "core/components/prefix_sum_kernels.hpp" #include "core/matrix/csr_builder.hpp" @@ -277,6 +281,73 @@ __global__ __launch_bounds__(default_block_size) void count_nnz_per_l_row( } +template +__global__ __launch_bounds__(default_block_size) void symbolic_validate( + const IndexType* __restrict__ mtx_row_ptrs, + const IndexType* __restrict__ mtx_cols, + const IndexType* __restrict__ factor_row_ptrs, + const IndexType* __restrict__ factor_cols, size_type size, + const IndexType* __restrict__ storage_offsets, + const int64* __restrict__ row_descs, const int32* __restrict__ storage, + bool* __restrict__ found, bool* __restrict__ missing) +{ + const auto row = thread::get_subwarp_id_flat(); + if (row >= size) { + return; + } + const auto warp = + group::tiled_partition(group::this_thread_block()); + const auto lane = warp.thread_rank(); + gko::matrix::csr::device_sparsity_lookup lookup{ + factor_row_ptrs, factor_cols, storage_offsets, + storage, row_descs, static_cast(row)}; + const auto mtx_begin = mtx_row_ptrs[row]; + const auto mtx_end = mtx_row_ptrs[row + 1]; + const auto factor_begin = factor_row_ptrs[row]; + const auto factor_end = factor_row_ptrs[row + 1]; + bool local_missing = false; + const auto mark_found = [&](IndexType col) { + const auto local_idx = lookup[col]; + const auto idx = local_idx + factor_begin; + if (local_idx == invalid_index()) { + local_missing = true; + } + found[idx] = true; + }; + // check the original matrix is part of the factors + for (auto nz = mtx_begin + lane; nz < mtx_end; nz += config::warp_size) { + mark_found(mtx_cols[nz]); + } + // check the diagonal is part of the factors + if (lane == 0) { + mark_found(row); + } + // check it is a valid factorization + for (auto nz = factor_begin; nz < factor_end; nz++) { + const auto dep = factor_cols[nz]; + if (dep >= row) { + continue; + } + // for every lower triangular entry + const auto dep_begin = factor_row_ptrs[dep]; + const auto dep_end = factor_row_ptrs[dep + 1]; + for (auto dep_nz = dep_begin + lane; dep_nz < dep_end; + dep_nz += config::warp_size) { + const auto col = factor_cols[dep_nz]; + // check every upper triangular entry thereof is part of the + // factorization + if (col > dep) { + mark_found(col); + } + } + } + local_missing = warp.any(local_missing); + if (lane == 0) { + missing[row] = local_missing; + } +} + + } // namespace kernel @@ -488,10 +559,41 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void symbolic_validate(std::shared_ptr exec, - const matrix::Csr* system_matrix, - const matrix::Csr* factors, - bool& valid) GKO_NOT_IMPLEMENTED; +void symbolic_validate( + std::shared_ptr exec, + const matrix::Csr* system_matrix, + const matrix::Csr* factors, + const matrix::csr::lookup_data& factors_lookup, bool& valid) +{ + const auto size = system_matrix->get_size()[0]; + const auto row_ptrs = system_matrix->get_const_row_ptrs(); + const auto col_idxs = system_matrix->get_const_col_idxs(); + const auto factor_row_ptrs = factors->get_const_row_ptrs(); + const auto factor_col_idxs = factors->get_const_col_idxs(); + // this stores for each factor nonzero whether it occurred as part of the + // factorization. + array found(exec, factors->get_num_stored_elements()); + components::fill_array(exec, found.get_data(), found.get_size(), false); + // this stores for each row whether there were any elements missing + array missing(exec, size); + components::fill_array(exec, missing.get_data(), missing.get_size(), false); + if (size > 0) { + const auto num_blocks = + ceildiv(size, default_block_size / config::warp_size); + kernel::symbolic_validate<<>>( + row_ptrs, col_idxs, factor_row_ptrs, factor_col_idxs, size, + factors_lookup.storage_offsets.get_const_data(), + factors_lookup.row_descs.get_const_data(), + factors_lookup.storage.get_const_data(), found.get_data(), + missing.get_data()); + } + valid = thrust::all_of(thrust_policy(exec), found.get_const_data(), + found.get_const_data() + found.get_size(), + thrust::identity{}) && + !thrust::any_of(thrust_policy(exec), missing.get_const_data(), + missing.get_const_data() + missing.get_size(), + thrust::identity{}); +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FACTORIZATION_SYMBOLIC_VALIDATE_KERNEL); diff --git a/core/factorization/factorization_kernels.hpp b/core/factorization/factorization_kernels.hpp index fdfca1d2427..4619647a23e 100644 --- a/core/factorization/factorization_kernels.hpp +++ b/core/factorization/factorization_kernels.hpp @@ -13,6 +13,7 @@ #include #include "core/base/kernel_declaration.hpp" +#include "core/matrix/csr_lookup.hpp" namespace gko { @@ -57,7 +58,9 @@ namespace kernels { void symbolic_validate( \ std::shared_ptr exec, \ const matrix::Csr* system_matrix, \ - const matrix::Csr* factors, bool& valid) + const matrix::Csr* factors, \ + const matrix::csr::lookup_data& factors_lookup, \ + bool& valid) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/dpcpp/factorization/factorization_kernels.dp.cpp b/dpcpp/factorization/factorization_kernels.dp.cpp index 18749bbf886..8fd11b3a68a 100644 --- a/dpcpp/factorization/factorization_kernels.dp.cpp +++ b/dpcpp/factorization/factorization_kernels.dp.cpp @@ -591,10 +591,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void symbolic_validate(std::shared_ptr exec, - const matrix::Csr* system_matrix, - const matrix::Csr* factors, - bool& valid) GKO_NOT_IMPLEMENTED; +void symbolic_validate( + std::shared_ptr exec, + const matrix::Csr* system_matrix, + const matrix::Csr* factors, + const matrix::csr::lookup_data& factors_lookup, + bool& valid) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FACTORIZATION_SYMBOLIC_VALIDATE_KERNEL); diff --git a/omp/factorization/factorization_kernels.cpp b/omp/factorization/factorization_kernels.cpp index a6a59f7dbc3..28f772532e7 100644 --- a/omp/factorization/factorization_kernels.cpp +++ b/omp/factorization/factorization_kernels.cpp @@ -339,10 +339,11 @@ bool symbolic_validate_impl(std::shared_ptr exec, } template -void symbolic_validate(std::shared_ptr exec, - const matrix::Csr* system_matrix, - const matrix::Csr* factors, - bool& valid) +void symbolic_validate( + std::shared_ptr exec, + const matrix::Csr* system_matrix, + const matrix::Csr* factors, + const matrix::csr::lookup_data& factors_lookup, bool& valid) { valid = symbolic_validate_impl( exec, system_matrix->get_const_row_ptrs(), diff --git a/reference/factorization/factorization_kernels.cpp b/reference/factorization/factorization_kernels.cpp index 6388f62adcc..b07cb9d783a 100644 --- a/reference/factorization/factorization_kernels.cpp +++ b/reference/factorization/factorization_kernels.cpp @@ -274,10 +274,11 @@ bool symbolic_validate_impl(std::shared_ptr exec, } template -void symbolic_validate(std::shared_ptr exec, - const matrix::Csr* system_matrix, - const matrix::Csr* factors, - bool& valid) +void symbolic_validate( + std::shared_ptr exec, + const matrix::Csr* system_matrix, + const matrix::Csr* factors, + const matrix::csr::lookup_data& factors_lookup, bool& valid) { valid = symbolic_validate_impl( exec, system_matrix->get_const_row_ptrs(), diff --git a/reference/test/factorization/lu_kernels.cpp b/reference/test/factorization/lu_kernels.cpp index a0358a6f23d..00d4aefab70 100644 --- a/reference/test/factorization/lu_kernels.cpp +++ b/reference/test/factorization/lu_kernels.cpp @@ -17,6 +17,7 @@ #include #include +#include "core/base/index_range.hpp" #include "core/components/prefix_sum_kernels.hpp" #include "core/factorization/cholesky_kernels.hpp" #include "core/factorization/elimination_forest.hpp" @@ -340,19 +341,43 @@ TYPED_TEST(Lu, ValidateValidFactors) bool valid = false; gko::kernels::reference::factorization::symbolic_validate( - this->ref, this->mtx.get(), this->mtx_lu.get(), valid); + this->ref, this->mtx.get(), this->mtx_lu.get(), + gko::matrix::csr::build_lookup(this->mtx_lu.get()), valid); ASSERT_TRUE(valid); }); } +TYPED_TEST(Lu, ValidateInvalidFactorsIdentity) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + this->forall_matrices([this] { + bool valid = true; + gko::matrix_data data(this->mtx_lu->get_size()); + // an identity matrix is a valid factorization, but doesn't contain the + // system matrix + for (auto row : gko::irange{static_cast(data.size[0])}) { + data.nonzeros.emplace_back(row, row, gko::one()); + } + this->mtx_lu->read(data); + + gko::kernels::reference::factorization::symbolic_validate( + this->ref, this->mtx.get(), this->mtx_lu.get(), + gko::matrix::csr::build_lookup(this->mtx_lu.get()), valid); + + ASSERT_FALSE(valid); + }); +} + + TYPED_TEST(Lu, ValidateInvalidFactorsMissing) { using value_type = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; this->forall_matrices([this] { - bool valid = false; + bool valid = true; gko::matrix_data data; this->mtx_lu->write(data); // delete a random entry somewhere in the middle of the matrix @@ -361,7 +386,8 @@ TYPED_TEST(Lu, ValidateInvalidFactorsMissing) this->mtx_lu->read(data); gko::kernels::reference::factorization::symbolic_validate( - this->ref, this->mtx.get(), this->mtx_lu.get(), valid); + this->ref, this->mtx.get(), this->mtx_lu.get(), + gko::matrix::csr::build_lookup(this->mtx_lu.get()), valid); ASSERT_FALSE(valid); }); @@ -373,7 +399,7 @@ TYPED_TEST(Lu, ValidateInvalidFactorsExtra) using value_type = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; this->forall_matrices([this] { - bool valid = false; + bool valid = true; gko::matrix_data data; this->mtx_lu->write(data); const auto it = std::adjacent_find( @@ -385,7 +411,8 @@ TYPED_TEST(Lu, ValidateInvalidFactorsExtra) this->mtx_lu->read(data); gko::kernels::reference::factorization::symbolic_validate( - this->ref, this->mtx.get(), this->mtx_lu.get(), valid); + this->ref, this->mtx.get(), this->mtx_lu.get(), + gko::matrix::csr::build_lookup(this->mtx_lu.get()), valid); ASSERT_FALSE(valid); }); diff --git a/test/factorization/lu_kernels.cpp b/test/factorization/lu_kernels.cpp index 739e8907cab..d1d6feaea26 100644 --- a/test/factorization/lu_kernels.cpp +++ b/test/factorization/lu_kernels.cpp @@ -19,10 +19,12 @@ #include #include +#include "core/base/index_range.hpp" #include "core/components/fill_array_kernels.hpp" #include "core/components/prefix_sum_kernels.hpp" #include "core/factorization/cholesky_kernels.hpp" #include "core/factorization/elimination_forest.hpp" +#include "core/factorization/factorization_kernels.hpp" #include "core/factorization/symbolic.hpp" #include "core/matrix/csr_kernels.hpp" #include "core/matrix/csr_lookup.hpp" @@ -189,6 +191,95 @@ TYPED_TEST(Lu, KernelFactorizeIsEquivalentToRef) } +TYPED_TEST(Lu, KernelValidateValidFactors) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + this->forall_matrices([this] { + bool valid = false; + + gko::kernels::GKO_DEVICE_NAMESPACE::factorization::symbolic_validate( + this->exec, this->dmtx.get(), this->dmtx_lu.get(), + gko::matrix::csr::build_lookup(this->dmtx_lu.get()), valid); + + ASSERT_TRUE(valid); + }); +} + + +TYPED_TEST(Lu, KernelValidateInvalidFactorsIdentity) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + this->forall_matrices([this] { + bool valid = true; + gko::matrix_data data( + this->dmtx_lu->get_size()); + // an identity matrix is a valid factorization, but doesn't contain the + // system matrix + for (auto row : gko::irange{static_cast(data.size[0])}) { + data.nonzeros.emplace_back(row, row, gko::one()); + } + this->dmtx_lu->read(data); + + gko::kernels::GKO_DEVICE_NAMESPACE::factorization::symbolic_validate( + this->exec, this->dmtx.get(), this->dmtx_lu.get(), + gko::matrix::csr::build_lookup(this->dmtx_lu.get()), valid); + + ASSERT_FALSE(valid); + }); +} + + +TYPED_TEST(Lu, KernelValidateInvalidFactorsMissing) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + this->forall_matrices([this] { + bool valid = true; + gko::matrix_data data; + this->dmtx_lu->write(data); + // delete a random entry somewhere in the middle of the matrix + data.nonzeros.erase(data.nonzeros.begin() + + data.nonzeros.size() * 3 / 4); + this->dmtx_lu->read(data); + + gko::kernels::GKO_DEVICE_NAMESPACE::factorization::symbolic_validate( + this->exec, this->dmtx.get(), this->dmtx_lu.get(), + gko::matrix::csr::build_lookup(this->dmtx_lu.get()), valid); + + ASSERT_FALSE(valid); + }); +} + + +TYPED_TEST(Lu, KernelValidateInvalidFactorsExtra) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + this->forall_matrices([this] { + bool valid = true; + gko::matrix_data data; + this->dmtx_lu->write(data); + // insert an entry between two non-adjacent values in a row somewhere + // not at the beginning + const auto it = std::adjacent_find( + data.nonzeros.begin() + data.nonzeros.size() / 5, + data.nonzeros.end(), [](auto a, auto b) { + return a.row == b.row && a.column < b.column - 1; + }); + data.nonzeros.insert(it, {it->row, it->column + 1, it->value}); + this->dmtx_lu->read(data); + + gko::kernels::GKO_DEVICE_NAMESPACE::factorization::symbolic_validate( + this->exec, this->dmtx.get(), this->dmtx_lu.get(), + gko::matrix::csr::build_lookup(this->dmtx_lu.get()), valid); + + ASSERT_FALSE(valid); + }); +} + + TYPED_TEST(Lu, SymbolicCholeskyWorks) { using value_type = typename TestFixture::value_type; From a7e5c51d2b3f227c4a202e3bfba8d298b1640673 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Sun, 12 Jan 2025 12:57:46 +0100 Subject: [PATCH 4/6] use validation kernel in sparse_blas benchmark --- benchmark/sparse_blas/operations.cpp | 50 +++++----------------------- 1 file changed, 8 insertions(+), 42 deletions(-) diff --git a/benchmark/sparse_blas/operations.cpp b/benchmark/sparse_blas/operations.cpp index 30f3b5a80fe..b1ca5c394ae 100644 --- a/benchmark/sparse_blas/operations.cpp +++ b/benchmark/sparse_blas/operations.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -11,6 +11,7 @@ #include "core/base/array_access.hpp" #include "core/factorization/elimination_forest.hpp" +#include "core/factorization/factorization_kernels.hpp" #include "core/factorization/symbolic.hpp" #include "core/matrix/csr_kernels.hpp" #include "core/matrix/csr_lookup.hpp" @@ -20,6 +21,7 @@ namespace { +GKO_REGISTER_OPERATION(symbolic_validate, factorization::symbolic_validate); GKO_REGISTER_OPERATION(build_lookup_offsets, csr::build_lookup_offsets); GKO_REGISTER_OPERATION(build_lookup, csr::build_lookup); GKO_REGISTER_OPERATION(benchmark_lookup, csr::benchmark_lookup); @@ -541,47 +543,11 @@ class LookupOperation : public BenchmarkOperation { bool validate_symbolic_factorization(const Mtx* input, const Mtx* factors) { - const auto host_exec = input->get_executor()->get_master(); - const auto host_input = gko::make_temporary_clone(host_exec, input); - const auto host_factors = gko::make_temporary_clone(host_exec, factors); - const auto num_rows = input->get_size()[0]; - const auto in_row_ptrs = host_input->get_const_row_ptrs(); - const auto in_cols = host_input->get_const_col_idxs(); - const auto factor_row_ptrs = host_factors->get_const_row_ptrs(); - const auto factor_cols = host_factors->get_const_col_idxs(); - std::unordered_set columns; - for (itype row = 0; row < num_rows; row++) { - const auto in_begin = in_cols + in_row_ptrs[row]; - const auto in_end = in_cols + in_row_ptrs[row + 1]; - const auto factor_begin = factor_cols + factor_row_ptrs[row]; - const auto factor_end = factor_cols + factor_row_ptrs[row + 1]; - columns.clear(); - // the factor needs to contain the original matrix - // plus the diagonal if that was missing - columns.insert(in_begin, in_end); - columns.insert(row); - for (auto col_it = factor_begin; col_it < factor_end; ++col_it) { - const auto col = *col_it; - if (col >= row) { - break; - } - const auto dep_begin = factor_cols + factor_row_ptrs[col]; - const auto dep_end = factor_cols + factor_row_ptrs[col + 1]; - // insert the upper triangular part of the row - const auto dep_diag = std::find(dep_begin, dep_end, col); - columns.insert(dep_diag, dep_end); - } - // the factor should contain exactly these columns, no more - if (factor_end - factor_begin != columns.size()) { - return false; - } - for (auto col_it = factor_begin; col_it < factor_end; ++col_it) { - if (columns.find(*col_it) == columns.end()) { - return false; - } - } - } - return true; + const auto exec = factors->get_executor(); + bool valid = false; + exec->run(make_symbolic_validate( + input, factors, gko::matrix::csr::build_lookup(factors), valid)); + return valid; } From fb7e2e8010c20902e4328f7ed25fd1cd860325e8 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Sun, 12 Jan 2025 13:09:28 +0100 Subject: [PATCH 5/6] require sortedness --- benchmark/sparse_blas/operations.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/benchmark/sparse_blas/operations.cpp b/benchmark/sparse_blas/operations.cpp index b1ca5c394ae..61eeaeeb627 100644 --- a/benchmark/sparse_blas/operations.cpp +++ b/benchmark/sparse_blas/operations.cpp @@ -543,6 +543,9 @@ class LookupOperation : public BenchmarkOperation { bool validate_symbolic_factorization(const Mtx* input, const Mtx* factors) { + if (!factors->is_sorted_by_column_index()) { + return false; + } const auto exec = factors->get_executor(); bool valid = false; exec->run(make_symbolic_validate( From 37f849886eb20bf38327ce680991a615efc3ae44 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Mon, 13 Jan 2025 13:14:31 +0100 Subject: [PATCH 6/6] fix OOB access with missing elements Co-authored-by: Yu-Hsiang M. Tsai --- common/cuda_hip/factorization/factorization_kernels.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/common/cuda_hip/factorization/factorization_kernels.cpp b/common/cuda_hip/factorization/factorization_kernels.cpp index 80a07c2a32d..034eaf0c14a 100644 --- a/common/cuda_hip/factorization/factorization_kernels.cpp +++ b/common/cuda_hip/factorization/factorization_kernels.cpp @@ -311,6 +311,7 @@ __global__ __launch_bounds__(default_block_size) void symbolic_validate( const auto idx = local_idx + factor_begin; if (local_idx == invalid_index()) { local_missing = true; + return; } found[idx] = true; };