From 2e47fa74f92a63e340f067105640658ce989114d Mon Sep 17 00:00:00 2001 From: "Yu-Hsiang M. Tsai" Date: Mon, 30 Sep 2024 11:45:17 +0200 Subject: [PATCH] update the documentation, change checked_lookup -> has_full_fillin (opposite behavior). Co-authored-by: Tobias Ribizel Co-authored-by: Natalie Beams <246972+nbeams@users.noreply.github.com> --- common/cuda_hip/factorization/ilu_kernels.cpp | 6 +++--- common/cuda_hip/factorization/lu_kernels.cpp | 12 ++++++------ core/device_hooks/common_kernels.inc.cpp | 2 +- core/factorization/ilu.cpp | 4 ++-- core/factorization/ilu_kernels.hpp | 8 ++++---- core/factorization/lu.cpp | 6 +++--- core/factorization/lu_kernels.hpp | 2 +- core/test/config/factorization.cpp | 6 +++--- dpcpp/factorization/ilu_kernels.dp.cpp | 6 +++--- dpcpp/factorization/lu_kernels.dp.cpp | 2 +- include/ginkgo/core/factorization/ilu.hpp | 6 +++--- include/ginkgo/core/factorization/lu.hpp | 13 +++++++------ omp/factorization/ilu_kernels.cpp | 6 +++--- omp/factorization/lu_kernels.cpp | 4 ++-- reference/factorization/ilu_kernels.cpp | 6 +++--- reference/factorization/lu_kernels.cpp | 4 ++-- reference/test/factorization/lu_kernels.cpp | 4 ++-- test/factorization/lu_kernels.cpp | 8 ++++---- 18 files changed, 53 insertions(+), 52 deletions(-) diff --git a/common/cuda_hip/factorization/ilu_kernels.cpp b/common/cuda_hip/factorization/ilu_kernels.cpp index 0469b80fe86..b81f8fb9092 100644 --- a/common/cuda_hip/factorization/ilu_kernels.cpp +++ b/common/cuda_hip/factorization/ilu_kernels.cpp @@ -17,8 +17,8 @@ namespace ilu_factorization { template -void compute_lu(std::shared_ptr exec, - matrix::Csr* m) +void sparselib_ilu(std::shared_ptr exec, + matrix::Csr* m) { const auto id = exec->get_device_id(); auto handle = exec->get_sparselib_handle(); @@ -55,7 +55,7 @@ void compute_lu(std::shared_ptr exec, } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_ILU_COMPUTE_LU_KERNEL); + GKO_DECLARE_ILU_SPARSELIB_ILU_KERNEL); } // namespace ilu_factorization diff --git a/common/cuda_hip/factorization/lu_kernels.cpp b/common/cuda_hip/factorization/lu_kernels.cpp index 6cb9b02129b..8c69f1f071d 100644 --- a/common/cuda_hip/factorization/lu_kernels.cpp +++ b/common/cuda_hip/factorization/lu_kernels.cpp @@ -85,7 +85,7 @@ __global__ __launch_bounds__(default_block_size) void initialize( } -template +template __global__ __launch_bounds__(default_block_size) void factorize( const IndexType* __restrict__ row_ptrs, const IndexType* __restrict__ cols, const IndexType* __restrict__ storage_offsets, @@ -130,7 +130,7 @@ __global__ __launch_bounds__(default_block_size) void factorize( upper_nz += config::warp_size) { const auto upper_col = cols[upper_nz]; const auto upper_val = vals[upper_nz]; - if (checked_lookup) { + if (!has_all_fillin) { const auto pos = lookup[upper_col]; if (pos != invalid_index()) { vals[row_begin + pos] -= scale * upper_val; @@ -260,7 +260,7 @@ template void factorize(std::shared_ptr exec, const IndexType* lookup_offsets, const int64* lookup_descs, const int32* lookup_storage, const IndexType* diag_idxs, - matrix::Csr* factors, bool checked_lookup, + matrix::Csr* factors, bool has_all_fillin, array& tmp_storage) { const auto num_rows = factors->get_size()[0]; @@ -268,15 +268,15 @@ void factorize(std::shared_ptr exec, syncfree_storage storage(exec, tmp_storage, num_rows); const auto num_blocks = ceildiv(num_rows, default_block_size / config::warp_size); - if (checked_lookup) { - kernel::factorize + if (!has_all_fillin) { + kernel::factorize <<get_stream()>>>( factors->get_const_row_ptrs(), factors->get_const_col_idxs(), lookup_offsets, lookup_storage, lookup_descs, diag_idxs, as_device_type(factors->get_values()), storage, num_rows); } else { - kernel::factorize + kernel::factorize <<get_stream()>>>( factors->get_const_row_ptrs(), factors->get_const_col_idxs(), lookup_offsets, diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index 1ba925e94e3..459a9bf3741 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -873,7 +873,7 @@ GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_IC_COMPUTE_KERNEL); namespace ilu_factorization { -GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_ILU_COMPUTE_LU_KERNEL); +GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_ILU_SPARSELIB_ILU_KERNEL); } // namespace ilu_factorization diff --git a/core/factorization/ilu.cpp b/core/factorization/ilu.cpp index 015a5829493..dccfabcb51a 100644 --- a/core/factorization/ilu.cpp +++ b/core/factorization/ilu.cpp @@ -26,7 +26,7 @@ namespace ilu_factorization { namespace { -GKO_REGISTER_OPERATION(compute_ilu, ilu_factorization::compute_lu); +GKO_REGISTER_OPERATION(compute_ilu, ilu_factorization::sparselib_ilu); GKO_REGISTER_OPERATION(add_diagonal_elements, factorization::add_diagonal_elements); GKO_REGISTER_OPERATION(initialize_row_ptrs_l_u, @@ -106,7 +106,7 @@ std::unique_ptr> Ilu::generate_l_u( local_system_matrix->get_const_row_ptrs()))); ilu = gko::experimental::factorization::Lu::build() - .with_checked_lookup(true) + .with_has_all_fillin(false) .with_symbolic_factorization(sparsity) .on(exec) ->generate(local_system_matrix) diff --git a/core/factorization/ilu_kernels.hpp b/core/factorization/ilu_kernels.hpp index 2371c17fda4..ef90764b141 100644 --- a/core/factorization/ilu_kernels.hpp +++ b/core/factorization/ilu_kernels.hpp @@ -20,14 +20,14 @@ namespace gko { namespace kernels { -#define GKO_DECLARE_ILU_COMPUTE_LU_KERNEL(ValueType, IndexType) \ - void compute_lu(std::shared_ptr exec, \ - matrix::Csr* system_matrix) +#define GKO_DECLARE_ILU_SPARSELIB_ILU_KERNEL(ValueType, IndexType) \ + void sparselib_ilu(std::shared_ptr exec, \ + matrix::Csr* system_matrix) #define GKO_DECLARE_ALL_AS_TEMPLATES \ template \ - GKO_DECLARE_ILU_COMPUTE_LU_KERNEL(ValueType, IndexType) + GKO_DECLARE_ILU_SPARSELIB_ILU_KERNEL(ValueType, IndexType) GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(ilu_factorization, diff --git a/core/factorization/lu.cpp b/core/factorization/lu.cpp index 3f2790e674a..31d69b2a993 100644 --- a/core/factorization/lu.cpp +++ b/core/factorization/lu.cpp @@ -69,8 +69,8 @@ Lu::parse(const config::pnode& config, if (auto& obj = config.get("skip_sorting")) { params.with_skip_sorting(config::get_value(obj)); } - if (auto& obj = config.get("checked_lookup")) { - params.with_checked_lookup(config::get_value(obj)); + if (auto& obj = config.get("has_all_fillin")) { + params.with_has_all_fillin(config::get_value(obj)); } return params; @@ -166,7 +166,7 @@ std::unique_ptr Lu::generate_impl( exec->run(make_factorize( storage_offsets.get_const_data(), row_descs.get_const_data(), storage.get_const_data(), diag_idxs.get_const_data(), factors.get(), - parameters_.checked_lookup, tmp)); + parameters_.has_all_fillin, tmp)); return factorization_type::create_from_combined_lu(std::move(factors)); } diff --git a/core/factorization/lu_kernels.hpp b/core/factorization/lu_kernels.hpp index 581c50bcf6e..b2d398985fd 100644 --- a/core/factorization/lu_kernels.hpp +++ b/core/factorization/lu_kernels.hpp @@ -33,7 +33,7 @@ namespace kernels { const IndexType* lookup_offsets, const int64* lookup_descs, \ const int32* lookup_storage, const IndexType* diag_idxs, \ matrix::Csr* factors, \ - bool checked_lookup, array& tmp_storage) + bool has_all_fillin, array& tmp_storage) #define GKO_DECLARE_LU_SYMMETRIC_FACTORIZE_SIMPLE(IndexType) \ diff --git a/core/test/config/factorization.cpp b/core/test/config/factorization.cpp index 014fb5e346d..8fc8d138f28 100644 --- a/core/test/config/factorization.cpp +++ b/core/test/config/factorization.cpp @@ -181,8 +181,8 @@ struct Lu : FactorizationConfigTest< gko::experimental::factorization::symbolic_type::near_symmetric); config_map["skip_sorting"] = pnode{true}; param.with_skip_sorting(true); - config_map["checked_lookup"] = pnode{true}; - param.with_checked_lookup(true); + config_map["has_all_fillin"] = pnode{false}; + param.with_has_all_fillin(false); } template @@ -195,7 +195,7 @@ struct Lu : FactorizationConfigTest< ans_param.symbolic_factorization); ASSERT_EQ(res_param.symbolic_algorithm, ans_param.symbolic_algorithm); ASSERT_EQ(res_param.skip_sorting, ans_param.skip_sorting); - ASSERT_EQ(res_param.checked_lookup, ans_param.checked_lookup); + ASSERT_EQ(res_param.has_all_fillin, ans_param.has_all_fillin); } }; diff --git a/dpcpp/factorization/ilu_kernels.dp.cpp b/dpcpp/factorization/ilu_kernels.dp.cpp index 78fd24c08f4..847547f7706 100644 --- a/dpcpp/factorization/ilu_kernels.dp.cpp +++ b/dpcpp/factorization/ilu_kernels.dp.cpp @@ -17,11 +17,11 @@ namespace ilu_factorization { template -void compute_lu(std::shared_ptr exec, - matrix::Csr* m) GKO_NOT_IMPLEMENTED; +void sparselib_ilu(std::shared_ptr exec, + matrix::Csr* m) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_ILU_COMPUTE_LU_KERNEL); + GKO_DECLARE_ILU_SPARSELIB_ILU_KERNEL); } // namespace ilu_factorization diff --git a/dpcpp/factorization/lu_kernels.dp.cpp b/dpcpp/factorization/lu_kernels.dp.cpp index d6a1c2ed5b2..9fdf1165043 100644 --- a/dpcpp/factorization/lu_kernels.dp.cpp +++ b/dpcpp/factorization/lu_kernels.dp.cpp @@ -39,7 +39,7 @@ template void factorize(std::shared_ptr exec, const IndexType* lookup_offsets, const int64* lookup_descs, const int32* lookup_storage, const IndexType* diag_idxs, - matrix::Csr* factors, bool checked_lookup, + matrix::Csr* factors, bool has_all_fillin, array& tmp_storage) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_LU_FACTORIZE); diff --git a/include/ginkgo/core/factorization/ilu.hpp b/include/ginkgo/core/factorization/ilu.hpp index 2de5ea8bca8..d527d8c9912 100644 --- a/include/ginkgo/core/factorization/ilu.hpp +++ b/include/ginkgo/core/factorization/ilu.hpp @@ -27,7 +27,7 @@ namespace factorization { /** * A helper for algorithm selection in the incomplete factorization. - * sparselib is only available for cuda and hip. + * sparselib is only available for CUDA and HIP. * syncfree is Ginkgo's implementation through the Lu factorization with given * sparsity. */ @@ -105,9 +105,9 @@ class Ilu : public Composition { /** * Select the implementation which is supposed to be used for - * the incomplete factorization. This only matters for the Cuda and Hip + * the incomplete factorization. This only matters for the CUDA and HIP * executor where the choice is between the Ginkgo (syncfree) and the - * cuSPARSE/hipSPARSE (sparselib) implementation. Default is sparselib. + * cuSPARSE/HIPSPARSE (sparselib) implementation. Default is sparselib. */ factorize_algorithm GKO_FACTORY_PARAMETER_SCALAR( algorithm, factorize_algorithm::sparselib); diff --git a/include/ginkgo/core/factorization/lu.hpp b/include/ginkgo/core/factorization/lu.hpp index 9fba621548e..e803d8d1b03 100644 --- a/include/ginkgo/core/factorization/lu.hpp +++ b/include/ginkgo/core/factorization/lu.hpp @@ -99,13 +99,14 @@ class Lu bool GKO_FACTORY_PARAMETER_SCALAR(skip_sorting, false); /** - * The symbolic factoization should contains the fill-in information. If - * it is not the case (like Ilu), users might face hang or illegal - * access issue. Please enable this option when the symbolic - * factorization does not contain the full fill-in information. Symbolic - * factorization must still contain the entry for the original matrix. + * The symbolic factorization contains the fill-in for the matrix. If it + * does not have full fill-in, as in Ilu, this parameter must be set to + * false in order to avoid the possibility of hanging or illegal memory + * accesses during the factorization process. When this is true, the + * symbolic factorization must still contain the non-zero locations in + * the original matrix, at minimum. */ - bool GKO_FACTORY_PARAMETER_SCALAR(checked_lookup, false); + bool GKO_FACTORY_PARAMETER_SCALAR(has_all_fillin, true); }; /** diff --git a/omp/factorization/ilu_kernels.cpp b/omp/factorization/ilu_kernels.cpp index 70982c80753..b88e6a77900 100644 --- a/omp/factorization/ilu_kernels.cpp +++ b/omp/factorization/ilu_kernels.cpp @@ -17,11 +17,11 @@ namespace ilu_factorization { template -void compute_lu(std::shared_ptr exec, - matrix::Csr* m) GKO_NOT_IMPLEMENTED; +void sparselib_ilu(std::shared_ptr exec, + matrix::Csr* m) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_ILU_COMPUTE_LU_KERNEL); + GKO_DECLARE_ILU_SPARSELIB_ILU_KERNEL); } // namespace ilu_factorization diff --git a/omp/factorization/lu_kernels.cpp b/omp/factorization/lu_kernels.cpp index 95ff7e3087f..c18bda37cec 100644 --- a/omp/factorization/lu_kernels.cpp +++ b/omp/factorization/lu_kernels.cpp @@ -66,7 +66,7 @@ template void factorize(std::shared_ptr exec, const IndexType* lookup_offsets, const int64* lookup_descs, const int32* lookup_storage, const IndexType* diag_idxs, - matrix::Csr* factors, bool checked_lookup, + matrix::Csr* factors, bool has_all_fillin, array& tmp_storage) { const auto num_rows = factors->get_size()[0]; @@ -89,7 +89,7 @@ void factorize(std::shared_ptr exec, for (auto dep_nz = dep_diag_idx + 1; dep_nz < dep_end; dep_nz++) { const auto col = cols[dep_nz]; const auto val = vals[dep_nz]; - if (checked_lookup) { + if (!has_all_fillin) { const auto idx = lookup[col]; if (idx != invalid_index()) { vals[row_begin + idx] -= scale * val; diff --git a/reference/factorization/ilu_kernels.cpp b/reference/factorization/ilu_kernels.cpp index fdbe8a9e86f..3323e0b6cef 100644 --- a/reference/factorization/ilu_kernels.cpp +++ b/reference/factorization/ilu_kernels.cpp @@ -23,8 +23,8 @@ namespace ilu_factorization { template -void compute_lu(std::shared_ptr exec, - matrix::Csr* m) +void sparselib_ilu(std::shared_ptr exec, + matrix::Csr* m) { vector diagonals{m->get_size()[0], -1, exec}; const auto row_ptrs = m->get_const_row_ptrs(); @@ -66,7 +66,7 @@ void compute_lu(std::shared_ptr exec, } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_ILU_COMPUTE_LU_KERNEL); + GKO_DECLARE_ILU_SPARSELIB_ILU_KERNEL); } // namespace ilu_factorization diff --git a/reference/factorization/lu_kernels.cpp b/reference/factorization/lu_kernels.cpp index b6cafd590a5..5bf159a6de5 100644 --- a/reference/factorization/lu_kernels.cpp +++ b/reference/factorization/lu_kernels.cpp @@ -65,7 +65,7 @@ template void factorize(std::shared_ptr exec, const IndexType* lookup_offsets, const int64* lookup_descs, const int32* lookup_storage, const IndexType* diag_idxs, - matrix::Csr* factors, bool checked_lookup, + matrix::Csr* factors, bool has_all_fillin, array& tmp_storage) { const auto num_rows = factors->get_size()[0]; @@ -87,7 +87,7 @@ void factorize(std::shared_ptr exec, for (auto dep_nz = dep_diag_idx + 1; dep_nz < dep_end; dep_nz++) { const auto col = cols[dep_nz]; const auto val = vals[dep_nz]; - if (checked_lookup) { + if (!has_all_fillin) { const auto idx = lookup[col]; if (idx != invalid_index()) { vals[row_begin + idx] -= scale * val; diff --git a/reference/test/factorization/lu_kernels.cpp b/reference/test/factorization/lu_kernels.cpp index 5233a76f117..cfc86eba6d3 100644 --- a/reference/test/factorization/lu_kernels.cpp +++ b/reference/test/factorization/lu_kernels.cpp @@ -356,7 +356,7 @@ TYPED_TEST(Lu, GenerateIluWithBitmapIsEquivalentToRef) auto factory = gko::experimental::factorization::Lu::build() .with_symbolic_factorization(sparsity) - .with_checked_lookup(true) + .with_has_all_fillin(false) .on(this->ref); auto lu = factory->generate(mtx); @@ -390,7 +390,7 @@ TYPED_TEST(Lu, GenerateIluWithHashmapIsEquivalentToRef) auto factory = gko::experimental::factorization::Lu::build() .with_symbolic_factorization(sparsity) - .with_checked_lookup(true) + .with_has_all_fillin(false) .on(this->ref); auto lu = factory->generate(mtx); diff --git a/test/factorization/lu_kernels.cpp b/test/factorization/lu_kernels.cpp index b6965684cf5..d32942c393e 100644 --- a/test/factorization/lu_kernels.cpp +++ b/test/factorization/lu_kernels.cpp @@ -376,12 +376,12 @@ TYPED_TEST(Lu, GenerateIluWithBitmapIsEquivalentToRef) auto factory = gko::experimental::factorization::Lu::build() .with_symbolic_factorization(sparsity) - .with_checked_lookup(true) + .with_has_all_fillin(false) .on(this->ref); auto dfactory = gko::experimental::factorization::Lu::build() .with_symbolic_factorization(dsparsity) - .with_checked_lookup(true) + .with_has_all_fillin(false) .on(this->exec); auto lu = factory->generate(mtx); @@ -419,12 +419,12 @@ TYPED_TEST(Lu, GenerateIluWithHashmapIsEquivalentToRef) auto factory = gko::experimental::factorization::Lu::build() .with_symbolic_factorization(sparsity) - .with_checked_lookup(true) + .with_has_all_fillin(false) .on(this->ref); auto dfactory = gko::experimental::factorization::Lu::build() .with_symbolic_factorization(dsparsity) - .with_checked_lookup(true) + .with_has_all_fillin(false) .on(this->exec); auto lu = factory->generate(mtx);