Merge Fix cuda 11.0

This PR fixes a compiler issue with cuda 11.0. This did not appear in cuda 11.4, so I replaced the 11.4 job with 11.0. Related PR: #1729
ginkgo-project · Nov 20, 2024 · daa4b54 · daa4b54
2 parents 53bbc1d + 858d50a
commit daa4b54
Show file tree

Hide file tree

Showing 5 changed files with 29 additions and 21 deletions.
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -92,12 +92,12 @@ trigger_pipeline:
 
 
 # cuda 11.4 and friends
-build/cuda114/nompi/gcc/cuda/release/shared:
+build/cuda110/nompi/gcc/cuda/release/shared:
   extends:
     - .build_and_test_template
     - .default_variables
     - .quick_test_condition
-    - .use_gko_cuda114-openmpi-gnu10-llvm12
+    - .use_gko_cuda110-mvapich-gnu9-llvm9
   variables:
     BUILD_OMP: "ON"
     BUILD_CUDA: "ON"
@@ -648,7 +648,7 @@ cudamemcheck:
     - .before_script_template
     - .default_variables
     - .deploy_condition
-  image: ginkgohub/cuda:110-mvapich2-gnu9-llvm9-intel2020
+    - .use_gko_cuda110-mvapich-gnu9-llvm9
   tags:
     - private_ci
     - nvidia-gpu

diff --git a/.gitlab/image.yml b/.gitlab/image.yml
@@ -17,6 +17,12 @@
     - cpu
     - amdci
 
+.use_gko_cuda110-mvapich-gnu9-llvm9:
+  image: ginkgohub/cuda:110-mvapich2-gnu9-llvm9-intel2020
+  tags:
+    - private_ci
+    - nvidia-gpu
+
 .use_gko_cuda114-openmpi-gnu10-llvm12:
   image: ginkgohub/cuda:114-openmpi-gnu10-llvm12
   tags:

diff --git a/common/cuda_hip/factorization/factorization_helpers.hpp b/common/cuda_hip/factorization/factorization_helpers.hpp
@@ -16,9 +16,6 @@ namespace factorization {
 namespace helpers {
 
 
-using namespace ::gko::factorization;
-
-
 constexpr int default_block_size{512};
 
 
@@ -107,6 +104,5 @@ __global__ __launch_bounds__(default_block_size) void initialize_l(
 }  // namespace helpers
 }  // namespace factorization
 }  // namespace GKO_DEVICE_NAMESPACE
-
 }  // namespace kernels
-}  // namespace gko
+}  // namespace gko
diff --git a/common/cuda_hip/factorization/factorization_kernels.cpp b/common/cuda_hip/factorization/factorization_kernels.cpp
@@ -399,7 +399,12 @@ void initialize_l_u(std::shared_ptr<const DefaultExecutor> exec,
     const auto grid_dim = static_cast<uint32>(
         ceildiv(num_rows, static_cast<size_type>(block_size)));
 
+    using namespace gko::factorization;
+
     if (grid_dim > 0) {
+        auto l_closure = triangular_mtx_closure(
+            [] __device__(auto val) { return one(val); }, identity{});
+        auto u_closure = triangular_mtx_closure(identity{}, identity{});
         helpers::
             initialize_l_u<<<grid_dim, block_size, 0, exec->get_stream()>>>(
                 num_rows, system_matrix->get_const_row_ptrs(),
@@ -408,12 +413,7 @@ void initialize_l_u(std::shared_ptr<const DefaultExecutor> exec,
                 csr_l->get_const_row_ptrs(), csr_l->get_col_idxs(),
                 as_device_type(csr_l->get_values()),
                 csr_u->get_const_row_ptrs(), csr_u->get_col_idxs(),
-                as_device_type(csr_u->get_values()),
-                helpers::triangular_mtx_closure(
-                    [] __device__(auto val) { return one(val); },
-                    helpers::identity{}),
-                helpers::triangular_mtx_closure(helpers::identity{},
-                                                helpers::identity{}));
+                as_device_type(csr_u->get_values()), l_closure, u_closure);
     }
 }
 
@@ -460,13 +460,15 @@ void initialize_l(std::shared_ptr<const DefaultExecutor> exec,
         ceildiv(num_rows, static_cast<size_type>(block_size)));
 
     if (grid_dim > 0) {
+        using namespace gko::factorization;
+
         helpers::initialize_l<<<grid_dim, block_size, 0, exec->get_stream()>>>(
             num_rows, system_matrix->get_const_row_ptrs(),
             system_matrix->get_const_col_idxs(),
             as_device_type(system_matrix->get_const_values()),
             csr_l->get_const_row_ptrs(), csr_l->get_col_idxs(),
             as_device_type(csr_l->get_values()),
-            helpers::triangular_mtx_closure(
+            triangular_mtx_closure(
                 [diag_sqrt] __device__(auto val) {
                     if (diag_sqrt) {
                         val = sqrt(val);
@@ -476,7 +478,7 @@ void initialize_l(std::shared_ptr<const DefaultExecutor> exec,
                     }
                     return val;
                 },
-                helpers::identity{}));
+                identity{}));
     }
 }
 

diff --git a/common/cuda_hip/preconditioner/sor_kernels.cpp b/common/cuda_hip/preconditioner/sor_kernels.cpp
@@ -29,18 +29,20 @@ void initialize_weighted_l(
     auto inv_weight = one(weight) / weight;
 
     if (grid_dim > 0) {
+        using namespace gko::factorization;
+
         factorization::helpers::
             initialize_l<<<grid_dim, block_size, 0, exec->get_stream()>>>(
                 num_rows, system_matrix->get_const_row_ptrs(),
                 system_matrix->get_const_col_idxs(),
                 as_device_type(system_matrix->get_const_values()),
                 l_mtx->get_const_row_ptrs(), l_mtx->get_col_idxs(),
                 as_device_type(l_mtx->get_values()),
-                factorization::helpers::triangular_mtx_closure(
+                triangular_mtx_closure(
                     [inv_weight] __device__(auto val) {
                         return val * inv_weight;
                     },
-                    factorization::helpers::identity{}));
+                    identity{}));
     }
 }
 
@@ -65,6 +67,8 @@ void initialize_weighted_l_u(
         one(weight) / (static_cast<remove_complex<ValueType>>(2.0) - weight);
 
     if (grid_dim > 0) {
+        using namespace gko::factorization;
+
         factorization::helpers::
             initialize_l_u<<<grid_dim, block_size, 0, exec->get_stream()>>>(
                 num_rows, system_matrix->get_const_row_ptrs(),
@@ -74,12 +78,12 @@ void initialize_weighted_l_u(
                 as_device_type(l_mtx->get_values()),
                 u_mtx->get_const_row_ptrs(), u_mtx->get_col_idxs(),
                 as_device_type(u_mtx->get_values()),
-                factorization::helpers::triangular_mtx_closure(
+                triangular_mtx_closure(
                     [inv_weight] __device__(auto val) {
                         return val * inv_weight;
                     },
-                    factorization::helpers::identity{}),
-                factorization::helpers::triangular_mtx_closure(
+                    identity{}),
+                triangular_mtx_closure(
                     [inv_two_minus_weight] __device__(auto val) {
                         return val * inv_two_minus_weight;
                     },