Skip to content

Commit

Permalink
Merge kernel for factorization validation (#1766)
Browse files Browse the repository at this point in the history
Replaces the sequential validation function for the sparse_blas benchmark by device kernels,
which improves the performance of the validation significantly.
Additionally refactors the creation of csr_lookup into a separate function to avoid duplication. 

Related PR: #1766
  • Loading branch information
upsj authored Jan 14, 2025
2 parents b18607e + 37f8498 commit 75b134a
Show file tree
Hide file tree
Showing 19 changed files with 656 additions and 310 deletions.
53 changes: 11 additions & 42 deletions benchmark/sparse_blas/operations.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

Expand All @@ -11,6 +11,7 @@

#include "core/base/array_access.hpp"
#include "core/factorization/elimination_forest.hpp"
#include "core/factorization/factorization_kernels.hpp"
#include "core/factorization/symbolic.hpp"
#include "core/matrix/csr_kernels.hpp"
#include "core/matrix/csr_lookup.hpp"
Expand All @@ -20,6 +21,7 @@
namespace {


GKO_REGISTER_OPERATION(symbolic_validate, factorization::symbolic_validate);
GKO_REGISTER_OPERATION(build_lookup_offsets, csr::build_lookup_offsets);
GKO_REGISTER_OPERATION(build_lookup, csr::build_lookup);
GKO_REGISTER_OPERATION(benchmark_lookup, csr::benchmark_lookup);
Expand Down Expand Up @@ -541,47 +543,14 @@ class LookupOperation : public BenchmarkOperation {

bool validate_symbolic_factorization(const Mtx* input, const Mtx* factors)
{
const auto host_exec = input->get_executor()->get_master();
const auto host_input = gko::make_temporary_clone(host_exec, input);
const auto host_factors = gko::make_temporary_clone(host_exec, factors);
const auto num_rows = input->get_size()[0];
const auto in_row_ptrs = host_input->get_const_row_ptrs();
const auto in_cols = host_input->get_const_col_idxs();
const auto factor_row_ptrs = host_factors->get_const_row_ptrs();
const auto factor_cols = host_factors->get_const_col_idxs();
std::unordered_set<itype> columns;
for (itype row = 0; row < num_rows; row++) {
const auto in_begin = in_cols + in_row_ptrs[row];
const auto in_end = in_cols + in_row_ptrs[row + 1];
const auto factor_begin = factor_cols + factor_row_ptrs[row];
const auto factor_end = factor_cols + factor_row_ptrs[row + 1];
columns.clear();
// the factor needs to contain the original matrix
// plus the diagonal if that was missing
columns.insert(in_begin, in_end);
columns.insert(row);
for (auto col_it = factor_begin; col_it < factor_end; ++col_it) {
const auto col = *col_it;
if (col >= row) {
break;
}
const auto dep_begin = factor_cols + factor_row_ptrs[col];
const auto dep_end = factor_cols + factor_row_ptrs[col + 1];
// insert the upper triangular part of the row
const auto dep_diag = std::find(dep_begin, dep_end, col);
columns.insert(dep_diag, dep_end);
}
// the factor should contain exactly these columns, no more
if (factor_end - factor_begin != columns.size()) {
return false;
}
for (auto col_it = factor_begin; col_it < factor_end; ++col_it) {
if (columns.find(*col_it) == columns.end()) {
return false;
}
}
}
return true;
if (!factors->is_sorted_by_column_index()) {
return false;
}
const auto exec = factors->get_executor();
bool valid = false;
exec->run(make_symbolic_validate(
input, factors, gko::matrix::csr::build_lookup(factors), valid));
return valid;
}


Expand Down
115 changes: 114 additions & 1 deletion common/cuda_hip/factorization/factorization_kernels.cpp
Original file line number Diff line number Diff line change
@@ -1,21 +1,25 @@
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

#include "core/factorization/factorization_kernels.hpp"

#include <thrust/logical.h>

#include <ginkgo/core/base/array.hpp>

#include "common/cuda_hip/base/config.hpp"
#include "common/cuda_hip/base/math.hpp"
#include "common/cuda_hip/base/runtime.hpp"
#include "common/cuda_hip/base/thrust.hpp"
#include "common/cuda_hip/base/types.hpp"
#include "common/cuda_hip/components/cooperative_groups.hpp"
#include "common/cuda_hip/components/intrinsics.hpp"
#include "common/cuda_hip/components/searching.hpp"
#include "common/cuda_hip/components/thread_ids.hpp"
#include "common/cuda_hip/factorization/factorization_helpers.hpp"
#include "core/base/array_access.hpp"
#include "core/components/fill_array_kernels.hpp"
#include "core/components/prefix_sum_kernels.hpp"
#include "core/matrix/csr_builder.hpp"

Expand Down Expand Up @@ -277,6 +281,74 @@ __global__ __launch_bounds__(default_block_size) void count_nnz_per_l_row(
}


template <typename IndexType>
__global__ __launch_bounds__(default_block_size) void symbolic_validate(
const IndexType* __restrict__ mtx_row_ptrs,
const IndexType* __restrict__ mtx_cols,
const IndexType* __restrict__ factor_row_ptrs,
const IndexType* __restrict__ factor_cols, size_type size,
const IndexType* __restrict__ storage_offsets,
const int64* __restrict__ row_descs, const int32* __restrict__ storage,
bool* __restrict__ found, bool* __restrict__ missing)
{
const auto row = thread::get_subwarp_id_flat<config::warp_size>();
if (row >= size) {
return;
}
const auto warp =
group::tiled_partition<config::warp_size>(group::this_thread_block());
const auto lane = warp.thread_rank();
gko::matrix::csr::device_sparsity_lookup<IndexType> lookup{
factor_row_ptrs, factor_cols, storage_offsets,
storage, row_descs, static_cast<size_type>(row)};
const auto mtx_begin = mtx_row_ptrs[row];
const auto mtx_end = mtx_row_ptrs[row + 1];
const auto factor_begin = factor_row_ptrs[row];
const auto factor_end = factor_row_ptrs[row + 1];
bool local_missing = false;
const auto mark_found = [&](IndexType col) {
const auto local_idx = lookup[col];
const auto idx = local_idx + factor_begin;
if (local_idx == invalid_index<IndexType>()) {
local_missing = true;
return;
}
found[idx] = true;
};
// check the original matrix is part of the factors
for (auto nz = mtx_begin + lane; nz < mtx_end; nz += config::warp_size) {
mark_found(mtx_cols[nz]);
}
// check the diagonal is part of the factors
if (lane == 0) {
mark_found(row);
}
// check it is a valid factorization
for (auto nz = factor_begin; nz < factor_end; nz++) {
const auto dep = factor_cols[nz];
if (dep >= row) {
continue;
}
// for every lower triangular entry
const auto dep_begin = factor_row_ptrs[dep];
const auto dep_end = factor_row_ptrs[dep + 1];
for (auto dep_nz = dep_begin + lane; dep_nz < dep_end;
dep_nz += config::warp_size) {
const auto col = factor_cols[dep_nz];
// check every upper triangular entry thereof is part of the
// factorization
if (col > dep) {
mark_found(col);
}
}
}
local_missing = warp.any(local_missing);
if (lane == 0) {
missing[row] = local_missing;
}
}


} // namespace kernel


Expand Down Expand Up @@ -487,6 +559,47 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_FACTORIZATION_INITIALIZE_L_KERNEL);


template <typename ValueType, typename IndexType>
void symbolic_validate(
std::shared_ptr<const DefaultExecutor> exec,
const matrix::Csr<ValueType, IndexType>* system_matrix,
const matrix::Csr<ValueType, IndexType>* factors,
const matrix::csr::lookup_data<IndexType>& factors_lookup, bool& valid)
{
const auto size = system_matrix->get_size()[0];
const auto row_ptrs = system_matrix->get_const_row_ptrs();
const auto col_idxs = system_matrix->get_const_col_idxs();
const auto factor_row_ptrs = factors->get_const_row_ptrs();
const auto factor_col_idxs = factors->get_const_col_idxs();
// this stores for each factor nonzero whether it occurred as part of the
// factorization.
array<bool> found(exec, factors->get_num_stored_elements());
components::fill_array(exec, found.get_data(), found.get_size(), false);
// this stores for each row whether there were any elements missing
array<bool> missing(exec, size);
components::fill_array(exec, missing.get_data(), missing.get_size(), false);
if (size > 0) {
const auto num_blocks =
ceildiv(size, default_block_size / config::warp_size);
kernel::symbolic_validate<<<num_blocks, default_block_size>>>(
row_ptrs, col_idxs, factor_row_ptrs, factor_col_idxs, size,
factors_lookup.storage_offsets.get_const_data(),
factors_lookup.row_descs.get_const_data(),
factors_lookup.storage.get_const_data(), found.get_data(),
missing.get_data());
}
valid = thrust::all_of(thrust_policy(exec), found.get_const_data(),
found.get_const_data() + found.get_size(),
thrust::identity<bool>{}) &&
!thrust::any_of(thrust_policy(exec), missing.get_const_data(),
missing.get_const_data() + missing.get_size(),
thrust::identity<bool>{});
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_FACTORIZATION_SYMBOLIC_VALIDATE_KERNEL);


} // namespace factorization
} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
Expand Down
1 change: 1 addition & 0 deletions core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ target_sources(${ginkgo_core}
matrix/batch_identity.cpp
matrix/coo.cpp
matrix/csr.cpp
matrix/csr_lookup.cpp
matrix/dense.cpp
matrix/diagonal.cpp
matrix/ell.cpp
Expand Down
4 changes: 3 additions & 1 deletion core/device_hooks/common_kernels.inc.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

Expand Down Expand Up @@ -948,6 +948,8 @@ GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FACTORIZATION_INITIALIZE_L_U_KERNEL);
GKO_STUB_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_FACTORIZATION_INITIALIZE_ROW_PTRS_L_KERNEL);
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FACTORIZATION_INITIALIZE_L_KERNEL);
GKO_STUB_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_FACTORIZATION_SYMBOLIC_VALIDATE_KERNEL);


} // namespace factorization
Expand Down
35 changes: 10 additions & 25 deletions core/factorization/cholesky.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

Expand Down Expand Up @@ -27,8 +27,6 @@ namespace {


GKO_REGISTER_OPERATION(fill_array, components::fill_array);
GKO_REGISTER_OPERATION(build_lookup_offsets, csr::build_lookup_offsets);
GKO_REGISTER_OPERATION(build_lookup, csr::build_lookup);
GKO_REGISTER_OPERATION(forest_from_factor, cholesky::forest_from_factor);
GKO_REGISTER_OPERATION(initialize, cholesky::initialize);
GKO_REGISTER_OPERATION(factorize, cholesky::factorize);
Expand Down Expand Up @@ -107,37 +105,24 @@ std::unique_ptr<LinOp> Cholesky<ValueType, IndexType>::generate_impl(
exec->run(make_forest_from_factor(factors.get(), *forest));
}
// setup lookup structure on factors
array<IndexType> storage_offsets{exec, num_rows + 1};
array<int64> row_descs{exec, num_rows};
const auto lookup = matrix::csr::build_lookup(factors.get());
array<IndexType> diag_idxs{exec, num_rows};
array<IndexType> transpose_idxs{exec, factors->get_num_stored_elements()};
const auto allowed_sparsity = gko::matrix::csr::sparsity_type::bitmap |
gko::matrix::csr::sparsity_type::full |
gko::matrix::csr::sparsity_type::hash;
exec->run(make_build_lookup_offsets(
factors->get_const_row_ptrs(), factors->get_const_col_idxs(), num_rows,
allowed_sparsity, storage_offsets.get_data()));
const auto storage_size =
static_cast<size_type>(get_element(storage_offsets, num_rows));
array<int32> storage{exec, storage_size};
exec->run(make_build_lookup(
factors->get_const_row_ptrs(), factors->get_const_col_idxs(), num_rows,
allowed_sparsity, storage_offsets.get_const_data(),
row_descs.get_data(), storage.get_data()));
// initialize factors
exec->run(make_fill_array(factors->get_values(),
factors->get_num_stored_elements(),
zero<ValueType>()));
exec->run(make_initialize(mtx.get(), storage_offsets.get_const_data(),
row_descs.get_const_data(),
storage.get_const_data(), diag_idxs.get_data(),
transpose_idxs.get_data(), factors.get()));
exec->run(make_initialize(
mtx.get(), lookup.storage_offsets.get_const_data(),
lookup.row_descs.get_const_data(), lookup.storage.get_const_data(),
diag_idxs.get_data(), transpose_idxs.get_data(), factors.get()));
// run numerical factorization
array<int> tmp{exec};
exec->run(make_factorize(
storage_offsets.get_const_data(), row_descs.get_const_data(),
storage.get_const_data(), diag_idxs.get_const_data(),
transpose_idxs.get_const_data(), *forest, factors.get(), true, tmp));
lookup.storage_offsets.get_const_data(),
lookup.row_descs.get_const_data(), lookup.storage.get_const_data(),
diag_idxs.get_const_data(), transpose_idxs.get_const_data(), *forest,
factors.get(), true, tmp));
return factorization_type::create_from_combined_cholesky(
std::move(factors));
}
Expand Down
16 changes: 14 additions & 2 deletions core/factorization/factorization_kernels.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

Expand All @@ -13,6 +13,7 @@
#include <ginkgo/core/matrix/csr.hpp>

#include "core/base/kernel_declaration.hpp"
#include "core/matrix/csr_lookup.hpp"


namespace gko {
Expand Down Expand Up @@ -52,6 +53,15 @@ namespace kernels {
matrix::Csr<ValueType, IndexType>* l_factor, \
bool diag_sqrt)

#define GKO_DECLARE_FACTORIZATION_SYMBOLIC_VALIDATE_KERNEL(ValueType, \
IndexType) \
void symbolic_validate( \
std::shared_ptr<const DefaultExecutor> exec, \
const matrix::Csr<ValueType, IndexType>* system_matrix, \
const matrix::Csr<ValueType, IndexType>* factors, \
const matrix::csr::lookup_data<IndexType>& factors_lookup, \
bool& valid)


#define GKO_DECLARE_ALL_AS_TEMPLATES \
template <typename ValueType, typename IndexType> \
Expand All @@ -66,7 +76,9 @@ namespace kernels {
GKO_DECLARE_FACTORIZATION_INITIALIZE_ROW_PTRS_L_KERNEL(ValueType, \
IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_FACTORIZATION_INITIALIZE_L_KERNEL(ValueType, IndexType)
GKO_DECLARE_FACTORIZATION_INITIALIZE_L_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_FACTORIZATION_SYMBOLIC_VALIDATE_KERNEL(ValueType, IndexType)


GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(factorization,
Expand Down
Loading

0 comments on commit 75b134a

Please sign in to comment.