diff --git a/cpp/benchmarks/join/distinct_join.cu b/cpp/benchmarks/join/distinct_join.cu index 3502cbcea2a..1085b03ac7b 100644 --- a/cpp/benchmarks/join/distinct_join.cu +++ b/cpp/benchmarks/join/distinct_join.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,13 +23,8 @@ void distinct_inner_join(nvbench::state& state, auto join = [](cudf::table_view const& probe_input, cudf::table_view const& build_input, cudf::null_equality compare_nulls) { - auto const has_nulls = - cudf::has_nested_nulls(build_input) || cudf::has_nested_nulls(probe_input) - ? cudf::nullable_join::YES - : cudf::nullable_join::NO; - auto hj_obj = cudf::distinct_hash_join{ - build_input, probe_input, has_nulls, compare_nulls}; - return hj_obj.inner_join(); + auto hj_obj = cudf::distinct_hash_join{build_input, compare_nulls}; + return hj_obj.inner_join(probe_input); }; BM_join(state, join); @@ -42,13 +37,8 @@ void distinct_left_join(nvbench::state& state, auto join = [](cudf::table_view const& probe_input, cudf::table_view const& build_input, cudf::null_equality compare_nulls) { - auto const has_nulls = - cudf::has_nested_nulls(build_input) || cudf::has_nested_nulls(probe_input) - ? cudf::nullable_join::YES - : cudf::nullable_join::NO; - auto hj_obj = cudf::distinct_hash_join{ - build_input, probe_input, has_nulls, compare_nulls}; - return hj_obj.left_join(); + auto hj_obj = cudf::distinct_hash_join{build_input, compare_nulls}; + return hj_obj.left_join(probe_input); }; BM_join(state, join); diff --git a/cpp/include/cudf/detail/distinct_hash_join.cuh b/cpp/include/cudf/detail/distinct_hash_join.cuh index 2acc10105cf..9a10163eb15 100644 --- a/cpp/include/cudf/detail/distinct_hash_join.cuh +++ b/cpp/include/cudf/detail/distinct_hash_join.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,19 +36,24 @@ using cudf::experimental::row::lhs_index_type; using cudf::experimental::row::rhs_index_type; /** - * @brief An comparator adapter wrapping both self comparator and two table comparator + * @brief A custom comparator used for the build table insertion */ -template -struct comparator_adapter { - comparator_adapter(Equal const& d_equal) : _d_equal{d_equal} {} - - __device__ constexpr auto operator()( +struct always_not_equal { + __device__ constexpr bool operator()( cuco::pair const&, cuco::pair const&) const noexcept { // All build table keys are distinct thus `false` no matter what return false; } +}; + +/** + * @brief An comparator adapter wrapping the two table comparator + */ +template +struct comparator_adapter { + comparator_adapter(Equal const& d_equal) : _d_equal{d_equal} {} __device__ constexpr auto operator()( cuco::pair const& lhs, @@ -62,56 +67,14 @@ struct comparator_adapter { Equal _d_equal; }; -template -struct hasher_adapter { - hasher_adapter(Hasher const& d_hasher = {}) : _d_hasher{d_hasher} {} - - template - __device__ constexpr auto operator()(cuco::pair const& key) const noexcept - { - return _d_hasher(key.first); - } - - private: - Hasher _d_hasher; -}; - /** * @brief Distinct hash join that builds hash table in creation and probes results in subsequent * `*_join` member functions. * - * @tparam HasNested Flag indicating whether there are nested columns in build/probe table + * This class enables the distinct hash join scheme that builds hash table once, and probes as many + * times as needed (possibly in parallel). */ -template -struct distinct_hash_join { - private: - /// Device row equal type - using d_equal_type = cudf::experimental::row::equality::strong_index_comparator_adapter< - cudf::experimental::row::equality::device_row_comparator>; - using hasher = hasher_adapter>; - using probing_scheme_type = cuco::linear_probing<1, hasher>; - using cuco_storage_type = cuco::storage<1>; - - /// Hash table type - using hash_table_type = cuco::static_set, - cuco::extent, - cuda::thread_scope_device, - comparator_adapter, - probing_scheme_type, - cudf::detail::cuco_allocator, - cuco_storage_type>; - - bool _has_nulls; ///< true if nulls are present in either build table or probe table - cudf::null_equality _nulls_equal; ///< whether to consider nulls as equal - cudf::table_view _build; ///< input table to build the hash map - cudf::table_view _probe; ///< input table to probe the hash map - std::shared_ptr - _preprocessed_build; ///< input table preprocssed for row operators - std::shared_ptr - _preprocessed_probe; ///< input table preprocssed for row operators - hash_table_type _hash_table; ///< hash table built on `_build` - +class distinct_hash_join { public: distinct_hash_join() = delete; ~distinct_hash_join() = default; @@ -120,21 +83,28 @@ struct distinct_hash_join { distinct_hash_join& operator=(distinct_hash_join const&) = delete; distinct_hash_join& operator=(distinct_hash_join&&) = delete; + /** + * @brief Hasher adapter used by distinct hash join + */ + struct hasher { + template + __device__ constexpr hash_value_type operator()( + cuco::pair const& key) const noexcept + { + return key.first; + } + }; + /** * @brief Constructor that internally builds the hash table based on the given `build` table. * * @throw cudf::logic_error if the number of columns in `build` table is 0. * * @param build The build table, from which the hash table is built - * @param probe The probe table - * @param has_nulls Flag to indicate if any nulls exist in the `build` table or - * any `probe` table that will be used later for join. * @param compare_nulls Controls whether null join-key values should match or not. * @param stream CUDA stream used for device memory operations and kernel launches. */ distinct_hash_join(cudf::table_view const& build, - cudf::table_view const& probe, - bool has_nulls, cudf::null_equality compare_nulls, rmm::cuda_stream_view stream); @@ -143,12 +113,36 @@ struct distinct_hash_join { */ std::pair>, std::unique_ptr>> - inner_join(rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const; + inner_join(cudf::table_view const& probe, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const; /** * @copydoc cudf::distinct_hash_join::left_join */ std::unique_ptr> left_join( - rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const; + cudf::table_view const& probe, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const; + + private: + using probing_scheme_type = cuco::linear_probing<1, hasher>; + using cuco_storage_type = cuco::storage<1>; + + /// Hash table type + using hash_table_type = cuco::static_set, + cuco::extent, + cuda::thread_scope_device, + always_not_equal, + probing_scheme_type, + cudf::detail::cuco_allocator, + cuco_storage_type>; + + bool _has_nested_columns; ///< True if nested columns are present in build and probe tables + cudf::null_equality _nulls_equal; ///< Whether to consider nulls as equal + cudf::table_view _build; ///< Input table to build the hash map + std::shared_ptr + _preprocessed_build; ///< Input table preprocssed for row operators + hash_table_type _hash_table; ///< Hash table built on `_build` }; } // namespace cudf::detail diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp index afefd04d4fa..cc63565eee1 100644 --- a/cpp/include/cudf/join.hpp +++ b/cpp/include/cudf/join.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,13 +34,6 @@ namespace CUDF_EXPORT cudf { -/** - * @brief Enum to indicate whether the distinct join table has nested columns or not - * - * @ingroup column_join - */ -enum class has_nested : bool { YES, NO }; - // forward declaration namespace hashing::detail { @@ -61,7 +54,6 @@ class hash_join; /** * @brief Forward declaration for our distinct hash join */ -template class distinct_hash_join; } // namespace detail @@ -469,20 +461,19 @@ class hash_join { rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const; private: - const std::unique_ptr _impl; + std::unique_ptr _impl; }; /** * @brief Distinct hash join that builds hash table in creation and probes results in subsequent * `*_join` member functions * + * This class enables the distinct hash join scheme that builds hash table once, and probes as many + * times as needed (possibly in parallel). + * * @note Behavior is undefined if the build table contains duplicates. * @note All NaNs are considered as equal - * - * @tparam HasNested Flag indicating whether there are nested columns in build/probe table */ -// TODO: `HasNested` to be removed via dispatching -template class distinct_hash_join { public: distinct_hash_join() = delete; @@ -496,15 +487,10 @@ class distinct_hash_join { * @brief Constructs a distinct hash join object for subsequent probe calls * * @param build The build table that contains distinct elements - * @param probe The probe table, from which the keys are probed - * @param has_nulls Flag to indicate if there exists any nulls in the `build` table or - * any `probe` table that will be used later for join * @param compare_nulls Controls whether null join-key values should match or not * @param stream CUDA stream used for device memory operations and kernel launches */ distinct_hash_join(cudf::table_view const& build, - cudf::table_view const& probe, - nullable_join has_nulls = nullable_join::YES, null_equality compare_nulls = null_equality::EQUAL, rmm::cuda_stream_view stream = cudf::get_default_stream()); @@ -512,16 +498,18 @@ class distinct_hash_join { * @brief Returns the row indices that can be used to construct the result of performing * an inner join between two tables. @see cudf::inner_join(). * + * @param probe The probe table, from which the keys are probed * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned indices' device memory. * - * @return A pair of columns [`build_indices`, `probe_indices`] that can be used to + * @return A pair of columns [`probe_indices`, `build_indices`] that can be used to * construct the result of performing an inner join between two tables * with `build` and `probe` as the join keys. */ [[nodiscard]] std::pair>, std::unique_ptr>> - inner_join(rmm::cuda_stream_view stream = cudf::get_default_stream(), + inner_join(cudf::table_view const& probe, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const; /** @@ -532,19 +520,22 @@ class distinct_hash_join { * the row index of the matched row from the build table if there is a match. Otherwise, contains * `JoinNoneValue`. * + * @param probe The probe table, from which the keys are probed * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device * memory. + * * @return A `build_indices` column that can be used to construct the result of * performing a left join between two tables with `build` and `probe` as the join * keys. */ [[nodiscard]] std::unique_ptr> left_join( + cudf::table_view const& probe, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const; private: - using impl_type = typename cudf::detail::distinct_hash_join; ///< Implementation type + using impl_type = cudf::detail::distinct_hash_join; ///< Implementation type std::unique_ptr _impl; ///< Distinct hash join implementation }; diff --git a/cpp/src/join/distinct_hash_join.cu b/cpp/src/join/distinct_hash_join.cu index ce4d2067b82..d1a01ee76e4 100644 --- a/cpp/src/join/distinct_hash_join.cu +++ b/cpp/src/join/distinct_hash_join.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -47,28 +47,19 @@ namespace cudf { namespace detail { namespace { -template -auto prepare_device_equal( - std::shared_ptr build, - std::shared_ptr probe, - bool has_nulls, - cudf::null_equality compare_nulls) -{ - auto const two_table_equal = - cudf::experimental::row::equality::two_table_comparator(probe, build); - return comparator_adapter{two_table_equal.equal_to( - nullate::DYNAMIC{has_nulls}, compare_nulls)}; -} +bool constexpr has_nulls = true; ///< Always has nulls /** * @brief Device functor to create a pair of {hash_value, row_index} for a given row. - * - * @tparam Hasher The type of internal hasher to compute row hash. */ -template +template class build_keys_fn { + using hasher = + cudf::experimental::row::hash::device_row_hasher; + public: - CUDF_HOST_DEVICE build_keys_fn(Hasher const& hash) : _hash{hash} {} + CUDF_HOST_DEVICE constexpr build_keys_fn(hasher const& hash) : _hash{hash} {} __device__ __forceinline__ auto operator()(size_type i) const noexcept { @@ -76,7 +67,7 @@ class build_keys_fn { } private: - Hasher _hash; + hasher _hash; }; /** @@ -92,26 +83,19 @@ struct output_fn { }; } // namespace -template -distinct_hash_join::distinct_hash_join(cudf::table_view const& build, - cudf::table_view const& probe, - bool has_nulls, - cudf::null_equality compare_nulls, - rmm::cuda_stream_view stream) - : _has_nulls{has_nulls}, +distinct_hash_join::distinct_hash_join(cudf::table_view const& build, + cudf::null_equality compare_nulls, + rmm::cuda_stream_view stream) + : _has_nested_columns{cudf::has_nested_columns(build)}, _nulls_equal{compare_nulls}, _build{build}, - _probe{probe}, _preprocessed_build{ cudf::experimental::row::equality::preprocessed_table::create(_build, stream)}, - _preprocessed_probe{ - cudf::experimental::row::equality::preprocessed_table::create(_probe, stream)}, _hash_table{build.num_rows(), CUCO_DESIRED_LOAD_FACTOR, cuco::empty_key{cuco::pair{std::numeric_limits::max(), rhs_index_type{JoinNoneValue}}}, - prepare_device_equal( - _preprocessed_build, _preprocessed_probe, has_nulls, compare_nulls), + always_not_equal{}, {}, cuco::thread_scope_device, cuco_storage_type{}, @@ -124,10 +108,10 @@ distinct_hash_join::distinct_hash_join(cudf::table_view const& build, if (this->_build.num_rows() == 0) { return; } auto const row_hasher = experimental::row::hash::row_hasher{this->_preprocessed_build}; - auto const d_hasher = row_hasher.device_hasher(nullate::DYNAMIC{this->_has_nulls}); + auto const d_hasher = row_hasher.device_hasher(nullate::DYNAMIC{has_nulls}); - auto const iter = cudf::detail::make_counting_transform_iterator( - 0, build_keys_fn{d_hasher}); + auto const iter = + cudf::detail::make_counting_transform_iterator(0, build_keys_fn{d_hasher}); size_type const build_table_num_rows{build.num_rows()}; if (this->_nulls_equal == cudf::null_equality::EQUAL or (not cudf::nullable(this->_build))) { @@ -146,15 +130,15 @@ distinct_hash_join::distinct_hash_join(cudf::table_view const& build, } } -template std::pair>, std::unique_ptr>> -distinct_hash_join::inner_join(rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) const +distinct_hash_join::inner_join(cudf::table_view const& probe, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const { cudf::scoped_range range{"distinct_hash_join::inner_join"}; - size_type const probe_table_num_rows{this->_probe.num_rows()}; + size_type const probe_table_num_rows{probe.num_rows()}; // If output size is zero, return immediately if (probe_table_num_rows == 0) { @@ -162,25 +146,62 @@ distinct_hash_join::inner_join(rmm::cuda_stream_view stream, std::make_unique>(0, stream, mr)); } + auto preprocessed_probe = + cudf::experimental::row::equality::preprocessed_table::create(probe, stream); + auto const two_table_equal = cudf::experimental::row::equality::two_table_comparator( + preprocessed_probe, _preprocessed_build); + auto build_indices = std::make_unique>(probe_table_num_rows, stream, mr); auto probe_indices = std::make_unique>(probe_table_num_rows, stream, mr); - auto const probe_row_hasher = - cudf::experimental::row::hash::row_hasher{this->_preprocessed_probe}; - auto const d_probe_hasher = probe_row_hasher.device_hasher(nullate::DYNAMIC{this->_has_nulls}); - auto const iter = cudf::detail::make_counting_transform_iterator( - 0, build_keys_fn{d_probe_hasher}); + auto const probe_row_hasher = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; + auto const d_probe_hasher = probe_row_hasher.device_hasher(nullate::DYNAMIC{has_nulls}); + auto const iter = cudf::detail::make_counting_transform_iterator( + 0, build_keys_fn{d_probe_hasher}); auto found_indices = rmm::device_uvector(probe_table_num_rows, stream); auto const found_begin = thrust::make_transform_output_iterator(found_indices.begin(), output_fn{}); - // TODO conditional find for nulls once `cuco::static_set::find_if` is added - // If `idx` is within the range `[0, probe_table_num_rows)` and `found_indices[idx]` is not equal - // to `JoinNoneValue`, then `idx` has a match in the hash set. - this->_hash_table.find_async(iter, iter + probe_table_num_rows, found_begin, stream.value()); + auto const comparator_helper = [&](auto device_comparator) { + // If `idx` is within the range `[0, probe_table_num_rows)` and `found_indices[idx]` is not + // equal to `JoinNoneValue`, then `idx` has a match in the hash set. + if (this->_nulls_equal == cudf::null_equality::EQUAL or (not cudf::nullable(probe))) { + this->_hash_table.find_async(iter, + iter + probe_table_num_rows, + comparator_adapter{device_comparator}, + hasher{}, + found_begin, + stream.value()); + } else { + auto stencil = thrust::counting_iterator{0}; + auto const row_bitmask = + cudf::detail::bitmask_and(probe, stream, cudf::get_current_device_resource_ref()).first; + auto const pred = + cudf::detail::row_is_valid{reinterpret_cast(row_bitmask.data())}; + + this->_hash_table.find_if_async(iter, + iter + probe_table_num_rows, + stencil, + pred, + comparator_adapter{device_comparator}, + hasher{}, + found_begin, + stream.value()); + } + }; + + if (_has_nested_columns) { + auto const device_comparator = + two_table_equal.equal_to(nullate::DYNAMIC{has_nulls}, _nulls_equal); + comparator_helper(device_comparator); + } else { + auto const device_comparator = + two_table_equal.equal_to(nullate::DYNAMIC{has_nulls}, _nulls_equal); + comparator_helper(device_comparator); + } auto const tuple_iter = cudf::detail::make_counting_transform_iterator( 0, @@ -203,16 +224,17 @@ distinct_hash_join::inner_join(rmm::cuda_stream_view stream, build_indices->resize(actual_size, stream); probe_indices->resize(actual_size, stream); - return {std::move(build_indices), std::move(probe_indices)}; + return {std::move(probe_indices), std::move(build_indices)}; } -template -std::unique_ptr> distinct_hash_join::left_join( - rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const +std::unique_ptr> distinct_hash_join::left_join( + cudf::table_view const& probe, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const { cudf::scoped_range range{"distinct_hash_join::left_join"}; - size_type const probe_table_num_rows{this->_probe.num_rows()}; + size_type const probe_table_num_rows{probe.num_rows()}; // If output size is zero, return empty if (probe_table_num_rows == 0) { @@ -227,80 +249,82 @@ std::unique_ptr> distinct_hash_join::l thrust::fill( rmm::exec_policy_nosync(stream), build_indices->begin(), build_indices->end(), JoinNoneValue); } else { - auto const probe_row_hasher = - cudf::experimental::row::hash::row_hasher{this->_preprocessed_probe}; - auto const d_probe_hasher = probe_row_hasher.device_hasher(nullate::DYNAMIC{this->_has_nulls}); - auto const iter = cudf::detail::make_counting_transform_iterator( - 0, build_keys_fn{d_probe_hasher}); + auto preprocessed_probe = + cudf::experimental::row::equality::preprocessed_table::create(probe, stream); + auto const two_table_equal = cudf::experimental::row::equality::two_table_comparator( + preprocessed_probe, _preprocessed_build); + + auto const probe_row_hasher = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; + auto const d_probe_hasher = probe_row_hasher.device_hasher(nullate::DYNAMIC{has_nulls}); + auto const iter = cudf::detail::make_counting_transform_iterator( + 0, build_keys_fn{d_probe_hasher}); auto const output_begin = thrust::make_transform_output_iterator(build_indices->begin(), output_fn{}); - // TODO conditional find for nulls once `cuco::static_set::find_if` is added - this->_hash_table.find_async(iter, iter + probe_table_num_rows, output_begin, stream.value()); + auto const comparator_helper = [&](auto device_comparator) { + if (this->_nulls_equal == cudf::null_equality::EQUAL or (not cudf::nullable(probe))) { + this->_hash_table.find_async(iter, + iter + probe_table_num_rows, + comparator_adapter{device_comparator}, + hasher{}, + output_begin, + stream.value()); + } else { + auto stencil = thrust::counting_iterator{0}; + auto const row_bitmask = + cudf::detail::bitmask_and(probe, stream, cudf::get_current_device_resource_ref()).first; + auto const pred = + cudf::detail::row_is_valid{reinterpret_cast(row_bitmask.data())}; + + this->_hash_table.find_if_async(iter, + iter + probe_table_num_rows, + stencil, + pred, + comparator_adapter{device_comparator}, + hasher{}, + output_begin, + stream.value()); + } + }; + + if (_has_nested_columns) { + auto const device_comparator = + two_table_equal.equal_to(nullate::DYNAMIC{has_nulls}, _nulls_equal); + comparator_helper(device_comparator); + } else { + auto const device_comparator = + two_table_equal.equal_to(nullate::DYNAMIC{has_nulls}, _nulls_equal); + comparator_helper(device_comparator); + } } return build_indices; } } // namespace detail -template <> -distinct_hash_join::~distinct_hash_join() = default; - -template <> -distinct_hash_join::~distinct_hash_join() = default; - -template <> -distinct_hash_join::distinct_hash_join(cudf::table_view const& build, - cudf::table_view const& probe, - nullable_join has_nulls, - null_equality compare_nulls, - rmm::cuda_stream_view stream) - : _impl{std::make_unique( - build, probe, has_nulls == nullable_join::YES, compare_nulls, stream)} -{ -} - -template <> -distinct_hash_join::distinct_hash_join(cudf::table_view const& build, - cudf::table_view const& probe, - nullable_join has_nulls, - null_equality compare_nulls, - rmm::cuda_stream_view stream) - : _impl{std::make_unique( - build, probe, has_nulls == nullable_join::YES, compare_nulls, stream)} -{ -} +distinct_hash_join::~distinct_hash_join() = default; -template <> -std::pair>, - std::unique_ptr>> -distinct_hash_join::inner_join(rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) const +distinct_hash_join::distinct_hash_join(cudf::table_view const& build, + null_equality compare_nulls, + rmm::cuda_stream_view stream) + : _impl{std::make_unique(build, compare_nulls, stream)} { - return _impl->inner_join(stream, mr); } -template <> std::pair>, std::unique_ptr>> -distinct_hash_join::inner_join(rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) const -{ - return _impl->inner_join(stream, mr); -} - -template <> -std::unique_ptr> -distinct_hash_join::left_join(rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) const +distinct_hash_join::inner_join(cudf::table_view const& probe, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const { - return _impl->left_join(stream, mr); + return _impl->inner_join(probe, stream, mr); } -template <> -std::unique_ptr> distinct_hash_join::left_join( - rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const +std::unique_ptr> distinct_hash_join::left_join( + cudf::table_view const& probe, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const { - return _impl->left_join(stream, mr); + return _impl->left_join(probe, stream, mr); } } // namespace cudf diff --git a/cpp/tests/join/distinct_join_tests.cpp b/cpp/tests/join/distinct_join_tests.cpp index 9070efa38fe..e1ec8cda3ac 100644 --- a/cpp/tests/join/distinct_join_tests.cpp +++ b/cpp/tests/join/distinct_join_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -53,7 +53,7 @@ struct DistinctJoinTest : public cudf::test::BaseFixture { cudf::table_view const& expected_table, cudf::out_of_bounds_policy oob_policy = cudf::out_of_bounds_policy::DONT_CHECK) { - auto const& [build_join_indices, probe_join_indices] = result; + auto const& [probe_join_indices, build_join_indices] = result; auto build_indices_span = cudf::device_span{*build_join_indices}; auto probe_indices_span = cudf::device_span{*probe_join_indices}; @@ -89,10 +89,9 @@ TEST_F(DistinctJoinTest, IntegerInnerJoin) auto build_table = cudf::table_view{{build->view()}}; auto probe_table = cudf::table_view{{probe->view()}}; - auto distinct_join = cudf::distinct_hash_join{ - build_table, probe_table, cudf::nullable_join::NO}; + auto distinct_join = cudf::distinct_hash_join{build_table}; - auto result = distinct_join.inner_join(); + auto result = distinct_join.inner_join(probe_table); auto constexpr gold_size = size / 2; auto gold = cudf::sequence(gold_size, init, cudf::numeric_scalar{2}); @@ -120,8 +119,8 @@ TEST_F(DistinctJoinTest, InnerJoinNoNulls) Table build(std::move(cols0)); Table probe(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.inner_join(); + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.inner_join(probe.view()); column_wrapper col_gold_0{{1, 2}}; strcol_wrapper col_gold_1({"s0", "s0"}); @@ -162,8 +161,8 @@ TEST_F(DistinctJoinTest, InnerJoinWithNulls) Table build(std::move(cols0)); Table probe(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.inner_join(); + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.inner_join(probe.view()); column_wrapper col_gold_0{{3, 2}}; strcol_wrapper col_gold_1({"s1", "s0"}, {true, true}); @@ -229,8 +228,8 @@ TEST_F(DistinctJoinTest, InnerJoinWithStructsAndNulls) Table probe(std::move(cols0)); Table build(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.inner_join(); + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.inner_join(probe.view()); column_wrapper col_gold_0{{3, 2}}; strcol_wrapper col_gold_1({"s1", "s0"}, {true, true}); @@ -284,8 +283,8 @@ TEST_F(DistinctJoinTest, EmptyBuildTableInnerJoin) Table build(std::move(cols0)); Table probe(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.inner_join(); + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.inner_join(probe.view()); this->compare_to_reference(build.view(), probe.view(), result, build.view()); } @@ -307,9 +306,9 @@ TEST_F(DistinctJoinTest, EmptyBuildTableLeftJoin) Table build(std::move(cols0)); Table probe(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.left_join(); - auto gather_map = std::pair{std::move(result), get_left_indices(result->size())}; + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.left_join(probe.view()); + auto gather_map = std::pair{get_left_indices(result->size()), std::move(result)}; this->compare_to_reference( build.view(), probe.view(), gather_map, probe.view(), cudf::out_of_bounds_policy::NULLIFY); @@ -332,8 +331,8 @@ TEST_F(DistinctJoinTest, EmptyProbeTableInnerJoin) Table build(std::move(cols0)); Table probe(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.inner_join(); + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.inner_join(probe.view()); this->compare_to_reference(build.view(), probe.view(), result, probe.view()); } @@ -355,9 +354,9 @@ TEST_F(DistinctJoinTest, EmptyProbeTableLeftJoin) Table build(std::move(cols0)); Table probe(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.left_join(); - auto gather_map = std::pair{std::move(result), get_left_indices(result->size())}; + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.left_join(probe.view()); + auto gather_map = std::pair{get_left_indices(result->size()), std::move(result)}; this->compare_to_reference( build.view(), probe.view(), gather_map, probe.view(), cudf::out_of_bounds_policy::NULLIFY); @@ -391,9 +390,9 @@ TEST_F(DistinctJoinTest, LeftJoinNoNulls) cols_gold.push_back(col_gold_3.release()); Table gold(std::move(cols_gold)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.left_join(); - auto gather_map = std::pair{std::move(result), get_left_indices(result->size())}; + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.left_join(probe.view()); + auto gather_map = std::pair{get_left_indices(result->size()), std::move(result)}; this->compare_to_reference( build.view(), probe.view(), gather_map, gold.view(), cudf::out_of_bounds_policy::NULLIFY); @@ -416,9 +415,9 @@ TEST_F(DistinctJoinTest, LeftJoinWithNulls) Table probe(std::move(cols0)); Table build(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.left_join(); - auto gather_map = std::pair{std::move(result), get_left_indices(result->size())}; + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.left_join(probe.view()); + auto gather_map = std::pair{get_left_indices(result->size()), std::move(result)}; column_wrapper col_gold_0{{3, 1, 2, 0, 2}, {true, true, true, true, true}}; strcol_wrapper col_gold_1({"s1", "s1", "", "s4", "s0"}, {true, true, false, true, true}); @@ -461,9 +460,9 @@ TEST_F(DistinctJoinTest, LeftJoinWithStructsAndNulls) Table probe(std::move(cols0)); Table build(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.left_join(); - auto gather_map = std::pair{std::move(result), get_left_indices(result->size())}; + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.left_join(probe.view()); + auto gather_map = std::pair{get_left_indices(result->size()), std::move(result)}; auto col0_gold_names_col = strcol_wrapper{ "Samuel Vimes", "Detritus", "Carrot Ironfoundersson", "Samuel Vimes", "Angua von Überwald"}; diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index 1f8b1ea207d..ed35f35794d 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -2901,16 +2901,8 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_leftDistinctJoinGatherMap j_right_keys, compare_nulls_equal, [](cudf::table_view const& left, cudf::table_view const& right, cudf::null_equality nulleq) { - auto has_nulls = cudf::has_nested_nulls(left) || cudf::has_nested_nulls(right) - ? cudf::nullable_join::YES - : cudf::nullable_join::NO; - if (cudf::has_nested_columns(right)) { - cudf::distinct_hash_join hash(right, left, has_nulls, nulleq); - return hash.left_join(); - } else { - cudf::distinct_hash_join hash(right, left, has_nulls, nulleq); - return hash.left_join(); - } + cudf::distinct_hash_join hash(right, nulleq); + return hash.left_join(left); }); } @@ -3119,22 +3111,8 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_innerDistinctJoinGatherMa j_right_keys, compare_nulls_equal, [](cudf::table_view const& left, cudf::table_view const& right, cudf::null_equality nulleq) { - auto has_nulls = cudf::has_nested_nulls(left) || cudf::has_nested_nulls(right) - ? cudf::nullable_join::YES - : cudf::nullable_join::NO; - std::pair>, - std::unique_ptr>> - maps; - if (cudf::has_nested_columns(right)) { - cudf::distinct_hash_join hash(right, left, has_nulls, nulleq); - maps = hash.inner_join(); - } else { - cudf::distinct_hash_join hash(right, left, has_nulls, nulleq); - maps = hash.inner_join(); - } - // Unique join returns {right map, left map} but all the other joins - // return {left map, right map}. Swap here to make it consistent. - return std::make_pair(std::move(maps.second), std::move(maps.first)); + cudf::distinct_hash_join hash(right, nulleq); + return hash.inner_join(left); }); }