Skip to content

Commit

Permalink
Expose stream-ordering to strings attribute APIs (rapidsai#17398)
Browse files Browse the repository at this point in the history
Adds stream parameter to
```
cudf::strings::count_characters
cudf::strings::count_bytes
cudf::strings::code_points
```
Added stream gtests to verify correct stream forwarding.

Reference: rapidsai#13744

Authors:
  - Shruti Shivakumar (https://github.com/shrshi)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Nghia Truong (https://github.com/ttnghia)
  - Bradley Dice (https://github.com/bdice)

URL: rapidsai#17398
  • Loading branch information
shrshi authored Nov 25, 2024
1 parent 8d8cd78 commit d93e9c2
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 3 deletions.
6 changes: 6 additions & 0 deletions cpp/include/cudf/strings/attributes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,13 @@ namespace strings {
* Any null string will result in a null entry for that row in the output column.
*
* @param input Strings instance for this operation
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New column with lengths for each string
*/
std::unique_ptr<column> count_characters(
strings_column_view const& input,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
Expand All @@ -59,11 +61,13 @@ std::unique_ptr<column> count_characters(
* Any null string will result in a null entry for that row in the output column.
*
* @param input Strings instance for this operation
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New column with the number of bytes for each string
*/
std::unique_ptr<column> count_bytes(
strings_column_view const& input,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
Expand All @@ -79,11 +83,13 @@ std::unique_ptr<column> count_bytes(
* Any null string is ignored. No null entries will appear in the output column.
*
* @param input Strings instance for this operation
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New INT32 column with code point integer values for each character
*/
std::unique_ptr<column> code_points(
strings_column_view const& input,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/** @} */ // end of strings_apis group
Expand Down
9 changes: 6 additions & 3 deletions cpp/src/strings/attributes.cu
Original file line number Diff line number Diff line change
Expand Up @@ -264,24 +264,27 @@ std::unique_ptr<column> code_points(strings_column_view const& input,
// external APIS

std::unique_ptr<column> count_characters(strings_column_view const& input,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
return detail::count_characters(input, cudf::get_default_stream(), mr);
return detail::count_characters(input, stream, mr);
}

std::unique_ptr<column> count_bytes(strings_column_view const& input,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
return detail::count_bytes(input, cudf::get_default_stream(), mr);
return detail::count_bytes(input, stream, mr);
}

std::unique_ptr<column> code_points(strings_column_view const& input,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
return detail::code_points(input, cudf::get_default_stream(), mr);
return detail::code_points(input, stream, mr);
}

} // namespace strings
Expand Down
1 change: 1 addition & 0 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -724,6 +724,7 @@ ConfigureTest(STREAM_SORTING_TEST streams/sorting_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_STREAM_COMPACTION_TEST streams/stream_compaction_test.cpp STREAM_MODE testing)
ConfigureTest(
STREAM_STRINGS_TEST
streams/strings/attributes_test.cpp
streams/strings/case_test.cpp
streams/strings/combine_test.cpp
streams/strings/contains_test.cpp
Expand Down
59 changes: 59 additions & 0 deletions cpp/tests/streams/strings/attributes_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/default_stream.hpp>

#include <cudf/strings/attributes.hpp>
#include <cudf/strings/strings_column_view.hpp>

struct StringsAttributesTest : public cudf::test::BaseFixture {};

TEST_F(StringsAttributesTest, CodePoints)
{
std::vector<char const*> h_strings{"eee", "bb", nullptr, "", "aa", "bbb", "ééé"};
cudf::test::strings_column_wrapper strings(
h_strings.begin(),
h_strings.end(),
thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
auto strings_view = cudf::strings_column_view(strings);

auto results = cudf::strings::code_points(strings_view, cudf::test::get_default_stream());
}

TEST_F(StringsAttributesTest, CountCharacters)
{
std::vector<std::string> h_strings(
40000, "something a bit longer than 32 bytes ééé ééé ééé ééé ééé ééé ééé");
cudf::test::strings_column_wrapper strings(h_strings.begin(), h_strings.end());
auto strings_view = cudf::strings_column_view(strings);

auto results = cudf::strings::count_characters(strings_view, cudf::test::get_default_stream());
}

TEST_F(StringsAttributesTest, CountBytes)
{
std::vector<char const*> h_strings{
"eee", "bb", nullptr, "", "aa", "ééé", "something a bit longer than 32 bytes"};
cudf::test::strings_column_wrapper strings(
h_strings.begin(),
h_strings.end(),
thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
auto strings_view = cudf::strings_column_view(strings);

auto results = cudf::strings::count_bytes(strings_view, cudf::test::get_default_stream());
}

0 comments on commit d93e9c2

Please sign in to comment.