-
Notifications
You must be signed in to change notification settings - Fork 91
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge Split batched solver compilation
This PR splits up the compilation of the batched solvers in order to reduce the compilation times. It splits up the instantiations of the kernel launches depending on the number of vectors in shared memory. This is based on the same CMake mechanism as for the csr and fbcsr kernels. Related PR: #1629
- Loading branch information
Showing
26 changed files
with
1,582 additions
and
786 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors | ||
// | ||
// SPDX-License-Identifier: BSD-3-Clause | ||
|
||
#pragma once | ||
|
||
#include "common/cuda_hip/base/batch_struct.hpp" | ||
#include "common/cuda_hip/base/config.hpp" | ||
#include "common/cuda_hip/base/types.hpp" | ||
#include "common/cuda_hip/matrix/batch_struct.hpp" | ||
#include "core/base/batch_struct.hpp" | ||
#include "core/matrix/batch_struct.hpp" | ||
#include "core/solver/batch_bicgstab_kernels.hpp" | ||
|
||
|
||
namespace gko { | ||
namespace kernels { | ||
namespace GKO_DEVICE_NAMESPACE { | ||
namespace batch_bicgstab { | ||
|
||
|
||
template <typename T> | ||
using settings = gko::kernels::batch_bicgstab::settings<T>; | ||
|
||
|
||
template <typename ValueType, int n_shared, bool prec_shared, typename StopType, | ||
typename PrecType, typename LogType, typename BatchMatrixType> | ||
void launch_apply_kernel( | ||
std::shared_ptr<const DefaultExecutor> exec, | ||
const gko::kernels::batch_bicgstab::storage_config& sconf, | ||
const settings<remove_complex<ValueType>>& settings, LogType& logger, | ||
PrecType& prec, const BatchMatrixType& mat, | ||
const device_type<ValueType>* const __restrict__ b_values, | ||
device_type<ValueType>* const __restrict__ x_values, | ||
device_type<ValueType>* const __restrict__ workspace_data, | ||
const int& block_size, const size_t& shared_size); | ||
|
||
#define GKO_DECLARE_BATCH_BICGSTAB_LAUNCH(_vtype, _n_shared, _prec_shared, \ | ||
mat_t, log_t, pre_t, stop_t) \ | ||
void launch_apply_kernel<device_type<_vtype>, _n_shared, _prec_shared, \ | ||
stop_t<device_type<_vtype>>>( \ | ||
std::shared_ptr<const DefaultExecutor> exec, \ | ||
const gko::kernels::batch_bicgstab::storage_config& sconf, \ | ||
const settings<remove_complex<device_type<_vtype>>>& settings, \ | ||
log_t<gko::remove_complex<device_type<_vtype>>>& logger, \ | ||
pre_t<device_type<_vtype>>& prec, \ | ||
const mat_t<const device_type<_vtype>>& mat, \ | ||
const device_type<_vtype>* const __restrict__ b_values, \ | ||
device_type<_vtype>* const __restrict__ x_values, \ | ||
device_type<_vtype>* const __restrict__ workspace_data, \ | ||
const int& block_size, const size_t& shared_size) | ||
|
||
#define GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH(...) \ | ||
GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE_VARGS( \ | ||
GKO_DECLARE_BATCH_BICGSTAB_LAUNCH, __VA_ARGS__) | ||
|
||
#define GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH_0_FALSE \ | ||
GKO_BATCH_INSTANTIATE_VARGS(GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH, 0, false) | ||
#define GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH_1_FALSE \ | ||
GKO_BATCH_INSTANTIATE_VARGS(GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH, 1, false) | ||
#define GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH_2_FALSE \ | ||
GKO_BATCH_INSTANTIATE_VARGS(GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH, 2, false) | ||
#define GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH_3_FALSE \ | ||
GKO_BATCH_INSTANTIATE_VARGS(GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH, 3, false) | ||
#define GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH_4_FALSE \ | ||
GKO_BATCH_INSTANTIATE_VARGS(GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH, 4, false) | ||
#define GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH_5_FALSE \ | ||
GKO_BATCH_INSTANTIATE_VARGS(GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH, 5, false) | ||
#define GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH_6_FALSE \ | ||
GKO_BATCH_INSTANTIATE_VARGS(GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH, 6, false) | ||
#define GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH_7_FALSE \ | ||
GKO_BATCH_INSTANTIATE_VARGS(GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH, 7, false) | ||
#define GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH_8_FALSE \ | ||
GKO_BATCH_INSTANTIATE_VARGS(GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH, 8, false) | ||
#define GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH_9_FALSE \ | ||
GKO_BATCH_INSTANTIATE_VARGS(GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH, 9, false) | ||
#define GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH_9_TRUE \ | ||
GKO_BATCH_INSTANTIATE_VARGS(GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH, 9, true) | ||
|
||
|
||
} // namespace batch_bicgstab | ||
} // namespace GKO_DEVICE_NAMESPACE | ||
} // namespace kernels | ||
} // namespace gko |
69 changes: 69 additions & 0 deletions
69
common/cuda_hip/solver/batch_bicgstab_launch.instantiate.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors | ||
// | ||
// SPDX-License-Identifier: BSD-3-Clause | ||
|
||
#include "common/cuda_hip/solver/batch_bicgstab_launch.hpp" | ||
|
||
#include <ginkgo/core/base/exception_helpers.hpp> | ||
|
||
#include "common/cuda_hip/solver/batch_bicgstab_kernels.hpp" | ||
#include "core/matrix/batch_struct.hpp" | ||
#include "core/solver/batch_bicgstab_kernels.hpp" | ||
#include "core/solver/batch_dispatch.hpp" | ||
|
||
|
||
namespace gko { | ||
namespace kernels { | ||
namespace GKO_DEVICE_NAMESPACE { | ||
namespace batch_bicgstab { | ||
|
||
|
||
template <typename ValueType, int n_shared, bool prec_shared, typename StopType, | ||
typename PrecType, typename LogType, typename BatchMatrixType> | ||
void launch_apply_kernel( | ||
std::shared_ptr<const DefaultExecutor> exec, | ||
const gko::kernels::batch_bicgstab::storage_config& sconf, | ||
const settings<remove_complex<ValueType>>& settings, LogType& logger, | ||
PrecType& prec, const BatchMatrixType& mat, | ||
const device_type<ValueType>* const __restrict__ b_values, | ||
device_type<ValueType>* const __restrict__ x_values, | ||
device_type<ValueType>* const __restrict__ workspace_data, | ||
const int& block_size, const size_t& shared_size) | ||
{ | ||
batch_single_kernels::apply_kernel<StopType, n_shared, prec_shared> | ||
<<<mat.num_batch_items, block_size, shared_size, exec->get_stream()>>>( | ||
sconf, settings.max_iterations, | ||
as_device_type(settings.residual_tol), logger, prec, mat, b_values, | ||
x_values, workspace_data); | ||
} | ||
|
||
|
||
// begin | ||
GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH_0_FALSE; | ||
// split | ||
GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH_1_FALSE; | ||
// split | ||
GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH_2_FALSE; | ||
// split | ||
GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH_3_FALSE; | ||
// split | ||
GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH_4_FALSE; | ||
// split | ||
GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH_5_FALSE; | ||
// split | ||
GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH_6_FALSE; | ||
// split | ||
GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH_7_FALSE; | ||
// split | ||
GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH_8_FALSE; | ||
// split | ||
GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH_9_FALSE; | ||
// split | ||
GKO_INSTANTIATE_BATCH_BICGSTAB_LAUNCH_9_TRUE; | ||
// end | ||
|
||
|
||
} // namespace batch_bicgstab | ||
} // namespace GKO_DEVICE_NAMESPACE | ||
} // namespace kernels | ||
} // namespace gko |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors | ||
// | ||
// SPDX-License-Identifier: BSD-3-Clause | ||
|
||
#pragma once | ||
|
||
#include "common/cuda_hip/base/batch_struct.hpp" | ||
#include "common/cuda_hip/base/config.hpp" | ||
#include "common/cuda_hip/base/types.hpp" | ||
#include "common/cuda_hip/matrix/batch_struct.hpp" | ||
#include "core/base/batch_struct.hpp" | ||
#include "core/matrix/batch_struct.hpp" | ||
#include "core/solver/batch_cg_kernels.hpp" | ||
|
||
|
||
namespace gko { | ||
namespace kernels { | ||
namespace GKO_DEVICE_NAMESPACE { | ||
namespace batch_cg { | ||
|
||
|
||
template <typename T> | ||
using settings = gko::kernels::batch_cg::settings<T>; | ||
|
||
|
||
template <typename ValueType, int n_shared, bool prec_shared, typename StopType, | ||
typename PrecType, typename LogType, typename BatchMatrixType> | ||
void launch_apply_kernel( | ||
std::shared_ptr<const DefaultExecutor> exec, | ||
const gko::kernels::batch_cg::storage_config& sconf, | ||
const settings<remove_complex<ValueType>>& settings, LogType& logger, | ||
PrecType& prec, const BatchMatrixType& mat, | ||
const device_type<ValueType>* const __restrict__ b_values, | ||
device_type<ValueType>* const __restrict__ x_values, | ||
device_type<ValueType>* const __restrict__ workspace_data, | ||
const int& block_size, const size_t& shared_size); | ||
|
||
#define GKO_DECLARE_BATCH_CG_LAUNCH(_vtype, _n_shared, _prec_shared, mat_t, \ | ||
log_t, pre_t, stop_t) \ | ||
void launch_apply_kernel<device_type<_vtype>, _n_shared, _prec_shared, \ | ||
stop_t<device_type<_vtype>>>( \ | ||
std::shared_ptr<const DefaultExecutor> exec, \ | ||
const gko::kernels::batch_cg::storage_config& sconf, \ | ||
const settings<remove_complex<_vtype>>& settings, \ | ||
log_t<device_type<gko::remove_complex<device_type<_vtype>>>>& logger, \ | ||
pre_t<device_type<_vtype>>& prec, \ | ||
const mat_t<const device_type<_vtype>>& mat, \ | ||
const device_type<_vtype>* const __restrict__ b_values, \ | ||
device_type<_vtype>* const __restrict__ x_values, \ | ||
device_type<_vtype>* const __restrict__ workspace_data, \ | ||
const int& block_size, const size_t& shared_size) | ||
|
||
#define GKO_INSTANTIATE_BATCH_CG_LAUNCH(...) \ | ||
GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE_VARGS(GKO_DECLARE_BATCH_CG_LAUNCH, \ | ||
__VA_ARGS__) | ||
|
||
#define GKO_INSTANTIATE_BATCH_CG_LAUNCH_0_FALSE \ | ||
GKO_BATCH_INSTANTIATE_VARGS(GKO_INSTANTIATE_BATCH_CG_LAUNCH, 0, false) | ||
#define GKO_INSTANTIATE_BATCH_CG_LAUNCH_1_FALSE \ | ||
GKO_BATCH_INSTANTIATE_VARGS(GKO_INSTANTIATE_BATCH_CG_LAUNCH, 1, false) | ||
#define GKO_INSTANTIATE_BATCH_CG_LAUNCH_2_FALSE \ | ||
GKO_BATCH_INSTANTIATE_VARGS(GKO_INSTANTIATE_BATCH_CG_LAUNCH, 2, false) | ||
#define GKO_INSTANTIATE_BATCH_CG_LAUNCH_3_FALSE \ | ||
GKO_BATCH_INSTANTIATE_VARGS(GKO_INSTANTIATE_BATCH_CG_LAUNCH, 3, false) | ||
#define GKO_INSTANTIATE_BATCH_CG_LAUNCH_4_FALSE \ | ||
GKO_BATCH_INSTANTIATE_VARGS(GKO_INSTANTIATE_BATCH_CG_LAUNCH, 4, false) | ||
#define GKO_INSTANTIATE_BATCH_CG_LAUNCH_5_FALSE \ | ||
GKO_BATCH_INSTANTIATE_VARGS(GKO_INSTANTIATE_BATCH_CG_LAUNCH, 5, false) | ||
#define GKO_INSTANTIATE_BATCH_CG_LAUNCH_5_TRUE \ | ||
GKO_BATCH_INSTANTIATE_VARGS(GKO_INSTANTIATE_BATCH_CG_LAUNCH, 5, true) | ||
|
||
|
||
} // namespace batch_cg | ||
} // namespace GKO_DEVICE_NAMESPACE | ||
} // namespace kernels | ||
} // namespace gko |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors | ||
// | ||
// SPDX-License-Identifier: BSD-3-Clause | ||
|
||
#include "common/cuda_hip/solver/batch_cg_launch.hpp" | ||
|
||
#include <ginkgo/core/base/exception_helpers.hpp> | ||
|
||
#include "common/cuda_hip/solver/batch_cg_kernels.hpp" | ||
#include "core/matrix/batch_struct.hpp" | ||
#include "core/solver/batch_cg_kernels.hpp" | ||
#include "core/solver/batch_dispatch.hpp" | ||
|
||
|
||
namespace gko { | ||
namespace kernels { | ||
namespace GKO_DEVICE_NAMESPACE { | ||
namespace batch_cg { | ||
|
||
|
||
template <typename ValueType, int n_shared, bool prec_shared, typename StopType, | ||
typename PrecType, typename LogType, typename BatchMatrixType> | ||
void launch_apply_kernel( | ||
std::shared_ptr<const DefaultExecutor> exec, | ||
const gko::kernels::batch_cg::storage_config& sconf, | ||
const settings<remove_complex<ValueType>>& settings, LogType& logger, | ||
PrecType& prec, const BatchMatrixType& mat, | ||
const device_type<ValueType>* const __restrict__ b_values, | ||
device_type<ValueType>* const __restrict__ x_values, | ||
device_type<ValueType>* const __restrict__ workspace_data, | ||
const int& block_size, const size_t& shared_size) | ||
{ | ||
batch_single_kernels::apply_kernel<StopType, n_shared, prec_shared> | ||
<<<mat.num_batch_items, block_size, shared_size, exec->get_stream()>>>( | ||
sconf, settings.max_iterations, | ||
as_device_type(settings.residual_tol), logger, prec, mat, b_values, | ||
x_values, workspace_data); | ||
} | ||
|
||
|
||
// begin | ||
GKO_INSTANTIATE_BATCH_CG_LAUNCH_0_FALSE; | ||
// split | ||
GKO_INSTANTIATE_BATCH_CG_LAUNCH_1_FALSE; | ||
// split | ||
GKO_INSTANTIATE_BATCH_CG_LAUNCH_2_FALSE; | ||
// split | ||
GKO_INSTANTIATE_BATCH_CG_LAUNCH_3_FALSE; | ||
// split | ||
GKO_INSTANTIATE_BATCH_CG_LAUNCH_4_FALSE; | ||
// split | ||
GKO_INSTANTIATE_BATCH_CG_LAUNCH_5_FALSE; | ||
// split | ||
GKO_INSTANTIATE_BATCH_CG_LAUNCH_5_TRUE; | ||
// end | ||
|
||
|
||
} // namespace batch_cg | ||
} // namespace GKO_DEVICE_NAMESPACE | ||
} // namespace kernels | ||
} // namespace gko |
Oops, something went wrong.