Skip to content

Commit

Permalink
Merge Fix cuda 11.0
Browse files Browse the repository at this point in the history
This PR fixes a compiler issue with cuda 11.0. This did not appear in cuda 11.4, so I replaced the 11.4 job with 11.0.

Related PR: #1729
  • Loading branch information
MarcelKoch authored Nov 20, 2024
2 parents 53bbc1d + 858d50a commit daa4b54
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 21 deletions.
6 changes: 3 additions & 3 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,12 @@ trigger_pipeline:
# cuda 11.4 and friends
build/cuda114/nompi/gcc/cuda/release/shared:
build/cuda110/nompi/gcc/cuda/release/shared:
extends:
- .build_and_test_template
- .default_variables
- .quick_test_condition
- .use_gko_cuda114-openmpi-gnu10-llvm12
- .use_gko_cuda110-mvapich-gnu9-llvm9
variables:
BUILD_OMP: "ON"
BUILD_CUDA: "ON"
Expand Down Expand Up @@ -648,7 +648,7 @@ cudamemcheck:
- .before_script_template
- .default_variables
- .deploy_condition
image: ginkgohub/cuda:110-mvapich2-gnu9-llvm9-intel2020
- .use_gko_cuda110-mvapich-gnu9-llvm9
tags:
- private_ci
- nvidia-gpu
Expand Down
6 changes: 6 additions & 0 deletions .gitlab/image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@
- cpu
- amdci

.use_gko_cuda110-mvapich-gnu9-llvm9:
image: ginkgohub/cuda:110-mvapich2-gnu9-llvm9-intel2020
tags:
- private_ci
- nvidia-gpu

.use_gko_cuda114-openmpi-gnu10-llvm12:
image: ginkgohub/cuda:114-openmpi-gnu10-llvm12
tags:
Expand Down
6 changes: 1 addition & 5 deletions common/cuda_hip/factorization/factorization_helpers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,6 @@ namespace factorization {
namespace helpers {


using namespace ::gko::factorization;


constexpr int default_block_size{512};


Expand Down Expand Up @@ -107,6 +104,5 @@ __global__ __launch_bounds__(default_block_size) void initialize_l(
} // namespace helpers
} // namespace factorization
} // namespace GKO_DEVICE_NAMESPACE

} // namespace kernels
} // namespace gko
} // namespace gko
18 changes: 10 additions & 8 deletions common/cuda_hip/factorization/factorization_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,12 @@ void initialize_l_u(std::shared_ptr<const DefaultExecutor> exec,
const auto grid_dim = static_cast<uint32>(
ceildiv(num_rows, static_cast<size_type>(block_size)));

using namespace gko::factorization;

if (grid_dim > 0) {
auto l_closure = triangular_mtx_closure(
[] __device__(auto val) { return one(val); }, identity{});
auto u_closure = triangular_mtx_closure(identity{}, identity{});
helpers::
initialize_l_u<<<grid_dim, block_size, 0, exec->get_stream()>>>(
num_rows, system_matrix->get_const_row_ptrs(),
Expand All @@ -408,12 +413,7 @@ void initialize_l_u(std::shared_ptr<const DefaultExecutor> exec,
csr_l->get_const_row_ptrs(), csr_l->get_col_idxs(),
as_device_type(csr_l->get_values()),
csr_u->get_const_row_ptrs(), csr_u->get_col_idxs(),
as_device_type(csr_u->get_values()),
helpers::triangular_mtx_closure(
[] __device__(auto val) { return one(val); },
helpers::identity{}),
helpers::triangular_mtx_closure(helpers::identity{},
helpers::identity{}));
as_device_type(csr_u->get_values()), l_closure, u_closure);
}
}

Expand Down Expand Up @@ -460,13 +460,15 @@ void initialize_l(std::shared_ptr<const DefaultExecutor> exec,
ceildiv(num_rows, static_cast<size_type>(block_size)));

if (grid_dim > 0) {
using namespace gko::factorization;

helpers::initialize_l<<<grid_dim, block_size, 0, exec->get_stream()>>>(
num_rows, system_matrix->get_const_row_ptrs(),
system_matrix->get_const_col_idxs(),
as_device_type(system_matrix->get_const_values()),
csr_l->get_const_row_ptrs(), csr_l->get_col_idxs(),
as_device_type(csr_l->get_values()),
helpers::triangular_mtx_closure(
triangular_mtx_closure(
[diag_sqrt] __device__(auto val) {
if (diag_sqrt) {
val = sqrt(val);
Expand All @@ -476,7 +478,7 @@ void initialize_l(std::shared_ptr<const DefaultExecutor> exec,
}
return val;
},
helpers::identity{}));
identity{}));
}
}

Expand Down
14 changes: 9 additions & 5 deletions common/cuda_hip/preconditioner/sor_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,20 @@ void initialize_weighted_l(
auto inv_weight = one(weight) / weight;

if (grid_dim > 0) {
using namespace gko::factorization;

factorization::helpers::
initialize_l<<<grid_dim, block_size, 0, exec->get_stream()>>>(
num_rows, system_matrix->get_const_row_ptrs(),
system_matrix->get_const_col_idxs(),
as_device_type(system_matrix->get_const_values()),
l_mtx->get_const_row_ptrs(), l_mtx->get_col_idxs(),
as_device_type(l_mtx->get_values()),
factorization::helpers::triangular_mtx_closure(
triangular_mtx_closure(
[inv_weight] __device__(auto val) {
return val * inv_weight;
},
factorization::helpers::identity{}));
identity{}));
}
}

Expand All @@ -65,6 +67,8 @@ void initialize_weighted_l_u(
one(weight) / (static_cast<remove_complex<ValueType>>(2.0) - weight);

if (grid_dim > 0) {
using namespace gko::factorization;

factorization::helpers::
initialize_l_u<<<grid_dim, block_size, 0, exec->get_stream()>>>(
num_rows, system_matrix->get_const_row_ptrs(),
Expand All @@ -74,12 +78,12 @@ void initialize_weighted_l_u(
as_device_type(l_mtx->get_values()),
u_mtx->get_const_row_ptrs(), u_mtx->get_col_idxs(),
as_device_type(u_mtx->get_values()),
factorization::helpers::triangular_mtx_closure(
triangular_mtx_closure(
[inv_weight] __device__(auto val) {
return val * inv_weight;
},
factorization::helpers::identity{}),
factorization::helpers::triangular_mtx_closure(
identity{}),
triangular_mtx_closure(
[inv_two_minus_weight] __device__(auto val) {
return val * inv_two_minus_weight;
},
Expand Down

0 comments on commit daa4b54

Please sign in to comment.