ginkgo-project · upsj · Dec 4, 2023 · Sep 19, 2023 · Nov 21, 2023 · Nov 24, 2023
diff --git a/benchmark/blas/blas_common.hpp b/benchmark/blas/blas_common.hpp
@@ -343,13 +343,13 @@ class PrefixSumOperation : public BenchmarkOperation {
 
     gko::size_type get_memory() const override
     {
-        return 2 * sizeof(IndexType) * array_.get_num_elems();
+        return 2 * sizeof(IndexType) * array_.get_size();
     }
 
     void run() override
     {
-        array_.get_executor()->run(make_prefix_sum_nonnegative(
-            array_.get_data(), array_.get_num_elems()));
+        array_.get_executor()->run(
+            make_prefix_sum_nonnegative(array_.get_data(), array_.get_size()));
     }
 
 private:

diff --git a/benchmark/sparse_blas/operations.cpp b/benchmark/sparse_blas/operations.cpp
@@ -36,10 +36,10 @@ DEFINE_int32(
     "Maximum distance for row swaps to avoid rows with disjoint column ranges");
 
 DEFINE_string(spgemm_mode, "normal",
-              R"(Which matrix B should be used to compute A * B: normal, 
+              R"(Which matrix B should be used to compute A * B: normal,
 transposed, sparse, dense
 normal: B = A for A square, A^T otherwise\ntransposed: B = A^T
-sparse: B is a sparse matrix with dimensions of A^T with uniformly 
+sparse: B is a sparse matrix with dimensions of A^T with uniformly
         random values, at most -spgemm_rowlength non-zeros per row
 dense: B is a 'dense' sparse matrix with -spgemm_rowlength columns
        and non-zeros per row)");
@@ -123,15 +123,15 @@ class SpgemmOperation : public BenchmarkOperation {
                                                            get_engine()));
                 }
             }
-            data.ensure_row_major_order();
+            data.sort_row_major();
             mtx2_ = Mtx::create(exec, size2);
             mtx2_->read(data);
         } else if (mode_str == "dense") {
             const auto size2 = gko::dim<2>(size[1], FLAGS_spgemm_rowlength);
             std::uniform_real_distribution<gko::remove_complex<etype>> dist(
                 -1.0, 1.0);
             gko::matrix_data<etype, itype> data{size2, dist, get_engine()};
-            data.ensure_row_major_order();
+            data.sort_row_major();
             mtx2_ = Mtx::create(exec, size2);
             mtx2_->read(data);
         } else {
@@ -433,7 +433,7 @@ class GenerateLookupOperation : public BenchmarkOperation {
         // read sparsity pattern and row pointers once, write lookup structures
         return mtx_->get_num_stored_elements() * sizeof(itype) +
                mtx_->get_size()[0] * (2 * sizeof(itype) + sizeof(gko::int64)) +
-               storage_.get_num_elems() * sizeof(gko::int32);
+               storage_.get_size() * sizeof(gko::int32);
     }
 
     void run() override
@@ -518,7 +518,7 @@ class LookupOperation : public BenchmarkOperation {
         // column index and write a result
         return mtx_->get_size()[0] * (2 * sizeof(itype) + sizeof(gko::int64) +
                                       sample_size_ * 2 * sizeof(itype)) +
-               storage_.get_num_elems() * sizeof(gko::int32);
+               storage_.get_size() * sizeof(gko::int32);
     }
 
     void run() override

diff --git a/benchmark/tools/matrix.cpp b/benchmark/tools/matrix.cpp
@@ -52,7 +52,7 @@ int main(int argc, char** argv)
     bool binary = std::string{argv[1]} == "-b";
 
     auto data = gko::read_generic_raw<value_type, gko::int64>(std::cin);
-    data.ensure_row_major_order();
+    data.sort_row_major();
     for (int argi = binary ? 2 : 1; argi < argc; argi++) {
         std::string arg{argv[argi]};
         if (arg == "lower-triangular") {

diff --git a/benchmark/utils/generator.hpp b/benchmark/utils/generator.hpp
@@ -39,7 +39,7 @@ struct DefaultSystemGenerator {
             throw std::runtime_error(
                 "No known way to generate matrix data found.");
         }
-        data.ensure_row_major_order();
+        data.sort_row_major();
         return data;
     }
 
@@ -181,7 +181,7 @@ struct DistributedDefaultSystemGenerator {
             throw std::runtime_error(
                 "No known way to generate matrix data found.");
         }
-        data.ensure_row_major_order();
+        data.sort_row_major();
         return data;
     }
 

diff --git a/common/cuda_hip/base/device_matrix_data_kernels.hpp.inc b/common/cuda_hip/base/device_matrix_data_kernels.hpp.inc
@@ -9,7 +9,7 @@ void remove_zeros(std::shared_ptr<const DefaultExecutor> exec,
 {
     using device_value_type = device_type<ValueType>;
     auto value_ptr = as_device_type(values.get_const_data());
-    auto size = values.get_num_elems();
+    auto size = values.get_size();
     // count nonzeros
     auto nnz = thrust::count_if(
         thrust_policy(exec), value_ptr, value_ptr + size,
@@ -47,7 +47,7 @@ void sum_duplicates(std::shared_ptr<const DefaultExecutor> exec, size_type,
                     array<ValueType>& values, array<IndexType>& row_idxs,
                     array<IndexType>& col_idxs)
 {
-    const auto size = values.get_num_elems();
+    const auto size = values.get_size();
     const auto rows = row_idxs.get_const_data();
     const auto cols = col_idxs.get_const_data();
     auto iota = thrust::make_counting_iterator(size_type{});
@@ -92,8 +92,8 @@ void sort_row_major(std::shared_ptr<const DefaultExecutor> exec,
     auto it = thrust::make_zip_iterator(
         thrust::make_tuple(data.get_row_idxs(), data.get_col_idxs()));
     auto vals = as_device_type(data.get_values());
-    thrust::sort_by_key(thrust_policy(exec), it, it + data.get_num_elems(),
-                        vals);
+    thrust::sort_by_key(thrust_policy(exec), it,
+                        it + data.get_num_stored_elements(), vals);
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(

diff --git a/common/cuda_hip/base/kernel_launch_reduction.hpp.inc b/common/cuda_hip/base/kernel_launch_reduction.hpp.inc
@@ -102,7 +102,7 @@ void run_kernel_reduction_cached(std::shared_ptr<const DefaultExecutor> exec,
         ceildiv(size, block_size), exec->get_num_warps() * oversubscription);
     if (num_blocks > 1) {
         const auto required_storage = sizeof(ValueType) * num_blocks;
-        if (tmp.get_num_elems() < required_storage) {
+        if (tmp.get_size() < required_storage) {
             tmp.resize_and_reset(required_storage);
         }
         generic_kernel_reduction_1d<<<num_blocks, block_size, 0,
@@ -143,7 +143,7 @@ void run_kernel_reduction_cached(std::shared_ptr<const DefaultExecutor> exec,
                         exec->get_num_warps() * oversubscription);
     if (num_blocks > 1) {
         const auto required_storage = sizeof(ValueType) * num_blocks;
-        if (tmp.get_num_elems() < required_storage) {
+        if (tmp.get_size() < required_storage) {
             tmp.resize_and_reset(required_storage);
         }
         generic_kernel_reduction_2d<<<num_blocks, block_size, 0,
@@ -367,7 +367,7 @@ void run_generic_col_reduction_small(
                 as_device_type(result), args...);
     } else {
         const auto required_storage = sizeof(ValueType) * num_blocks * cols;
-        if (tmp.get_num_elems() < required_storage) {
+        if (tmp.get_size() < required_storage) {
             tmp.resize_and_reset(required_storage);
         }
         generic_kernel_col_reduction_2d_small<subwarp_size>
@@ -413,7 +413,7 @@ void run_kernel_row_reduction_cached(
     if (rows * cols > resources && rows < cols) {
         const auto col_blocks = ceildiv(rows * cols, resources);
         const auto required_storage = sizeof(ValueType) * col_blocks * rows;
-        if (tmp.get_num_elems() < required_storage) {
+        if (tmp.get_size() < required_storage) {
             tmp.resize_and_reset(required_storage);
         }
         const auto num_blocks =
@@ -484,7 +484,7 @@ void run_kernel_col_reduction_cached(
                 as_device_type(result), map_to_device(args)...);
         } else {
             const auto required_storage = sizeof(ValueType) * row_blocks * cols;
-            if (tmp.get_num_elems() < required_storage) {
+            if (tmp.get_size() < required_storage) {
                 tmp.resize_and_reset(required_storage);
             }
             // no need to guard this kernel, as cols > warp_size, row_blocks > 1

diff --git a/common/cuda_hip/distributed/matrix_kernels.hpp.inc b/common/cuda_hip/distributed/matrix_kernels.hpp.inc
@@ -52,7 +52,7 @@ void build_local_nonlocal(
         col_partition->get_range_starting_indices();
     const auto num_row_ranges = row_partition->get_num_ranges();
     const auto num_col_ranges = col_partition->get_num_ranges();
-    const auto num_input_elements = input.get_num_elems();
+    const auto num_input_elements = input.get_num_stored_elements();
 
     // precompute the row and column range id of each input element
     auto input_row_idxs = input.get_const_row_idxs();
@@ -62,7 +62,7 @@ void build_local_nonlocal(
                         row_range_bounds + num_row_ranges + 1, input_row_idxs,
                         input_row_idxs + num_input_elements,
                         row_range_ids.get_data());
-    array<size_type> col_range_ids{exec, input.get_num_elems()};
+    array<size_type> col_range_ids{exec, input.get_num_stored_elements()};
     thrust::upper_bound(thrust_policy(exec), col_range_bounds + 1,
                         col_range_bounds + num_col_ranges + 1, input_col_idxs,
                         input_col_idxs + num_input_elements,
@@ -128,8 +128,8 @@ void build_local_nonlocal(
             return thrust::make_tuple(local_row, local_col, input.val);
         });
     thrust::copy_if(
-        thrust_policy(exec), local_it, local_it + input.get_num_elems(),
-        range_ids_it,
+        thrust_policy(exec), local_it,
+        local_it + input.get_num_stored_elements(), range_ids_it,
         thrust::make_zip_iterator(thrust::make_tuple(local_row_idxs.get_data(),
                                                      local_col_idxs.get_data(),
                                                      local_values.get_data())),
@@ -157,8 +157,8 @@ void build_local_nonlocal(
                                       input.col_range);
         });
     thrust::copy_if(
-        thrust_policy(exec), non_local_it, non_local_it + input.get_num_elems(),
-        range_ids_it,
+        thrust_policy(exec), non_local_it,
+        non_local_it + input.get_num_stored_elements(), range_ids_it,
         thrust::make_zip_iterator(thrust::make_tuple(
             non_local_row_idxs.get_data(), non_local_global_col_idxs.get_data(),
             non_local_values.get_data(), non_local_col_part_ids.get_data(),

diff --git a/common/cuda_hip/distributed/partition_helpers_kernels.hpp.inc b/common/cuda_hip/distributed/partition_helpers_kernels.hpp.inc
@@ -8,7 +8,7 @@ void sort_by_range_start(
     array<GlobalIndexType>& range_start_ends,
     array<experimental::distributed::comm_index_type>& part_ids)
 {
-    auto num_ranges = range_start_ends.get_num_elems() / 2;
+    auto num_ranges = range_start_ends.get_size() / 2;
     auto strided_indices = thrust::make_transform_iterator(
         thrust::make_counting_iterator(0),
         [] __host__ __device__(const int i) { return 2 * i; });

diff --git a/common/cuda_hip/distributed/vector_kernels.hpp.inc b/common/cuda_hip/distributed/vector_kernels.hpp.inc
@@ -16,12 +16,12 @@ void build_local(
     const auto* part_ids = partition->get_part_ids();
     const auto num_ranges = partition->get_num_ranges();
 
-    array<size_type> range_id{exec, input.get_num_elems()};
-    thrust::upper_bound(thrust_policy(exec), range_bounds + 1,
-                        range_bounds + num_ranges + 1,
-                        input.get_const_row_idxs(),
-                        input.get_const_row_idxs() + input.get_num_elems(),
-                        range_id.get_data(), thrust::less<GlobalIndexType>());
+    array<size_type> range_id{exec, input.get_num_stored_elements()};
+    thrust::upper_bound(
+        thrust_policy(exec), range_bounds + 1, range_bounds + num_ranges + 1,
+        input.get_const_row_idxs(),
+        input.get_const_row_idxs() + input.get_num_stored_elements(),
+        range_id.get_data(), thrust::less<GlobalIndexType>());
 
     // write values with local rows into the local matrix at the correct index
     // this needs the following iterators:
@@ -57,10 +57,10 @@ void build_local(
         [part_ids, local_part] __host__ __device__(const size_type rid) {
             return part_ids[rid] == local_part;
         };
-    thrust::scatter_if(thrust_policy(exec), input.get_const_values(),
-                       input.get_const_values() + input.get_num_elems(),
-                       flat_idx_it, range_id.get_data(),
-                       local_mtx->get_values(), is_local_row);
+    thrust::scatter_if(
+        thrust_policy(exec), input.get_const_values(),
+        input.get_const_values() + input.get_num_stored_elements(), flat_idx_it,
+        range_id.get_data(), local_mtx->get_values(), is_local_row);
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE(

diff --git a/common/cuda_hip/factorization/cholesky_kernels.hpp.inc b/common/cuda_hip/factorization/cholesky_kernels.hpp.inc
@@ -219,7 +219,7 @@ void build_children_from_parents(
     std::shared_ptr<const DefaultExecutor> exec,
     gko::factorization::elimination_forest<IndexType>& forest)
 {
-    const auto num_rows = forest.parents.get_num_elems();
+    const auto num_rows = forest.parents.get_size();
     // build COO representation of the tree
     array<IndexType> col_idx_array{exec, num_rows};
     const auto col_idxs = col_idx_array.get_data();

diff --git a/common/cuda_hip/matrix/csr_kernels.hpp.inc b/common/cuda_hip/matrix/csr_kernels.hpp.inc
@@ -979,7 +979,7 @@ void convert_to_fbcsr(std::shared_ptr<const DefaultExecutor> exec,
                                      source->get_size()[0],
                                      in_row_idxs.get_data());
     auto block_row_ptrs = block_row_ptr_array.get_data();
-    auto num_block_rows = block_row_ptr_array.get_num_elems() - 1;
+    auto num_block_rows = block_row_ptr_array.get_size() - 1;
     if (nnz == 0) {
         components::fill_array(exec, block_row_ptrs, num_block_rows + 1,
                                IndexType{});
@@ -1032,7 +1032,7 @@ void convert_to_fbcsr(std::shared_ptr<const DefaultExecutor> exec,
         });
     // build row pointers from row indices
     components::convert_idxs_to_ptrs(exec, block_row_idx_array.get_const_data(),
-                                     block_row_idx_array.get_num_elems(),
+                                     block_row_idx_array.get_size(),
                                      num_block_rows, block_row_ptrs);
     // fill in values
     components::fill_array(exec, block_value_array.get_data(),

diff --git a/common/cuda_hip/matrix/fbcsr_kernels.hpp.inc b/common/cuda_hip/matrix/fbcsr_kernels.hpp.inc
@@ -131,10 +131,10 @@ void fill_in_matrix_data(std::shared_ptr<const DefaultExecutor> exec,
                          array<ValueType>& block_value_array)
 {
     using tuple_type = thrust::tuple<IndexType, IndexType>;
-    const auto nnz = data.get_num_elems();
+    const auto nnz = data.get_num_stored_elements();
     const auto bs = block_size;
     auto block_row_ptrs = block_row_ptr_array.get_data();
-    auto num_block_rows = block_row_ptr_array.get_num_elems() - 1;
+    auto num_block_rows = block_row_ptr_array.get_size() - 1;
     if (nnz == 0) {
         components::fill_array(exec, block_row_ptrs, num_block_rows + 1,
                                IndexType{});
@@ -187,7 +187,7 @@ void fill_in_matrix_data(std::shared_ptr<const DefaultExecutor> exec,
         });
     // build row pointers from row indices
     components::convert_idxs_to_ptrs(exec, block_row_idx_array.get_const_data(),
-                                     block_row_idx_array.get_num_elems(),
+                                     block_row_idx_array.get_size(),
                                      num_block_rows, block_row_ptrs);
     // fill in values
     components::fill_array(exec, block_value_array.get_data(),

diff --git a/common/cuda_hip/preconditioner/jacobi_kernels.hpp.inc b/common/cuda_hip/preconditioner/jacobi_kernels.hpp.inc
@@ -240,13 +240,13 @@ void initialize_precisions(std::shared_ptr<const DefaultExecutor> exec,
                            array<precision_reduction>& precisions)
 {
     const auto block_size = default_num_warps * config::warp_size;
-    const auto grid_size = min(
-        default_grid_size,
-        static_cast<int32>(ceildiv(precisions.get_num_elems(), block_size)));
+    const auto grid_size =
+        min(default_grid_size,
+            static_cast<int32>(ceildiv(precisions.get_size(), block_size)));
     if (grid_size > 0) {
         duplicate_array<<<grid_size, block_size, 0, exec->get_stream()>>>(
-            source.get_const_data(), source.get_num_elems(),
-            precisions.get_data(), precisions.get_num_elems());
+            source.get_const_data(), source.get_size(), precisions.get_data(),
+            precisions.get_size());
     }
 }
 

diff --git a/common/cuda_hip/solver/multigrid_kernels.hpp.inc b/common/cuda_hip/solver/multigrid_kernels.hpp.inc
@@ -155,7 +155,7 @@ void kcycle_check_stop(std::shared_ptr<const DefaultExecutor> exec,
                        const ValueType rel_tol, bool& is_stop)
 {
     gko::array<bool> dis_stop(exec, 1);
-    components::fill_array(exec, dis_stop.get_data(), dis_stop.get_num_elems(),
+    components::fill_array(exec, dis_stop.get_data(), dis_stop.get_size(),
                            true);
     const auto nrhs = new_norm->get_size()[1];
     const auto grid = ceildiv(nrhs, default_block_size);

diff --git a/common/unified/base/device_matrix_data_kernels.cpp b/common/unified/base/device_matrix_data_kernels.cpp
@@ -28,8 +28,8 @@ void soa_to_aos(std::shared_ptr<const DefaultExecutor> exec,
         [] GKO_KERNEL(auto i, auto rows, auto cols, auto vals, auto out) {
             out[i] = {rows[i], cols[i], vals[i]};
         },
-        in.get_num_elems(), in.get_const_row_idxs(), in.get_const_col_idxs(),
-        in.get_const_values(), out);
+        in.get_num_stored_elements(), in.get_const_row_idxs(),
+        in.get_const_col_idxs(), in.get_const_values(), out);
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
@@ -48,7 +48,7 @@ void aos_to_soa(std::shared_ptr<const DefaultExecutor> exec,
             cols[i] = in[i].column;
             vals[i] = unpack_member(in[i].value);
         },
-        in.get_num_elems(), in, out.get_row_idxs(), out.get_col_idxs(),
+        in.get_size(), in, out.get_row_idxs(), out.get_col_idxs(),
         out.get_values());
 }
 

diff --git a/common/unified/base/index_set_kernels.cpp b/common/unified/base/index_set_kernels.cpp
@@ -33,7 +33,7 @@ void compute_validity(std::shared_ptr<const DefaultExecutor> exec,
             validity_array[elem] =
                 local_indices[elem] != invalid_index<IndexType>();
         },
-        local_indices->get_num_elems(), *local_indices, *validity_array);
+        local_indices->get_size(), *local_indices, *validity_array);
 }
 
 GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(

diff --git a/common/unified/components/reduce_array_kernels.cpp b/common/unified/components/reduce_array_kernels.cpp
@@ -32,8 +32,8 @@ void reduce_add_array(std::shared_ptr<const DefaultExecutor> exec,
         [] GKO_KERNEL(auto i, auto arr, auto result) {
             return i == 0 ? (arr[i] + result[0]) : arr[i];
         },
-        GKO_KERNEL_REDUCE_SUM(ValueType), result.get_data(),
-        arr.get_num_elems(), arr, result);
+        GKO_KERNEL_REDUCE_SUM(ValueType), result.get_data(), arr.get_size(),
+        arr, result);
 }
 
 GKO_INSTANTIATE_FOR_EACH_TEMPLATE_TYPE(GKO_DECLARE_REDUCE_ADD_ARRAY_KERNEL);

diff --git a/common/unified/distributed/partition_helpers_kernels.cpp b/common/unified/distributed/partition_helpers_kernels.cpp
@@ -21,7 +21,7 @@ void check_consecutive_ranges(std::shared_ptr<const DefaultExecutor> exec,
                               bool& result)
 {
     array<uint32> result_uint32{exec, 1};
-    auto num_ranges = range_start_ends.get_num_elems() / 2;
+    auto num_ranges = range_start_ends.get_size() / 2;
     // need additional guard because DPCPP doesn't return the initial value for
     // empty inputs
     if (num_ranges > 1) {
@@ -60,7 +60,7 @@ void compress_ranges(std::shared_ptr<const DefaultExecutor> exec,
             }
             offsets[i + 1] = start_ends[2 * i + 1];
         },
-        range_offsets.get_num_elems() - 1, range_start_ends.get_const_data(),
+        range_offsets.get_size() - 1, range_start_ends.get_const_data(),
         range_offsets.get_data());
 }