Skip to content

Commit

Permalink
Add string.convert.convert_fixed_type APIs to pylibcudf (rapidsai#16984)
Browse files Browse the repository at this point in the history
Contributes to rapidsai#15162

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: rapidsai#16984
  • Loading branch information
mroeschke authored Oct 4, 2024
1 parent a8da1ff commit 119aa9d
Show file tree
Hide file tree
Showing 8 changed files with 188 additions and 59 deletions.
69 changes: 17 additions & 52 deletions python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx
Original file line number Diff line number Diff line change
@@ -1,22 +1,11 @@
# Copyright (c) 2021-2024, NVIDIA CORPORATION.

import cudf

from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move

from cudf.core.buffer import acquire_spill_lock

from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.column.column_view cimport column_view
from pylibcudf.libcudf.strings.convert.convert_fixed_point cimport (
from_fixed_point as cpp_from_fixed_point,
is_fixed_point as cpp_is_fixed_point,
to_fixed_point as cpp_to_fixed_point,
)
from pylibcudf.libcudf.types cimport data_type, type_id

from cudf._lib.column cimport Column
from cudf._lib.types cimport dtype_to_pylibcudf_type

import pylibcudf as plc


@acquire_spill_lock()
Expand All @@ -32,14 +21,10 @@ def from_decimal(Column input_col):
-------
A column of strings representing the input decimal values.
"""
cdef column_view input_column_view = input_col.view()
cdef unique_ptr[column] c_result
with nogil:
c_result = move(
cpp_from_fixed_point(
input_column_view))

return Column.from_unique_ptr(move(c_result))
plc_column = plc.strings.convert.convert_fixed_point.from_fixed_point(
input_col.to_pylibcudf(mode="read"),
)
return Column.from_pylibcudf(plc_column)


@acquire_spill_lock()
Expand All @@ -57,25 +42,11 @@ def to_decimal(Column input_col, object out_type):
-------
A column of decimals parsed from the string values.
"""
cdef column_view input_column_view = input_col.view()
cdef unique_ptr[column] c_result
cdef int scale = out_type.scale
cdef data_type c_out_type
if isinstance(out_type, cudf.Decimal32Dtype):
c_out_type = data_type(type_id.DECIMAL32, -scale)
elif isinstance(out_type, cudf.Decimal64Dtype):
c_out_type = data_type(type_id.DECIMAL64, -scale)
elif isinstance(out_type, cudf.Decimal128Dtype):
c_out_type = data_type(type_id.DECIMAL128, -scale)
else:
raise TypeError("should be a decimal dtype")
with nogil:
c_result = move(
cpp_to_fixed_point(
input_column_view,
c_out_type))

result = Column.from_unique_ptr(move(c_result))
plc_column = plc.strings.convert.convert_fixed_point.to_fixed_point(
input_col.to_pylibcudf(mode="read"),
dtype_to_pylibcudf_type(out_type),
)
result = Column.from_pylibcudf(plc_column)
result.dtype.precision = out_type.precision
return result

Expand All @@ -98,14 +69,8 @@ def is_fixed_point(Column input_col, object dtype):
-------
A Column of booleans indicating valid decimal conversion.
"""
cdef unique_ptr[column] c_result
cdef column_view source_view = input_col.view()
cdef int scale = dtype.scale
cdef data_type c_dtype = data_type(type_id.DECIMAL64, -scale)
with nogil:
c_result = move(cpp_is_fixed_point(
source_view,
c_dtype
))

return Column.from_unique_ptr(move(c_result))
plc_column = plc.strings.convert.convert_fixed_point.is_fixed_point(
input_col.to_pylibcudf(mode="read"),
dtype_to_pylibcudf_type(dtype),
)
return Column.from_pylibcudf(plc_column)
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ from pylibcudf.libcudf.types cimport data_type
cdef extern from "cudf/strings/convert/convert_fixed_point.hpp" namespace \
"cudf::strings" nogil:
cdef unique_ptr[column] to_fixed_point(
column_view input_col,
column_view input,
data_type output_type) except +

cdef unique_ptr[column] from_fixed_point(
column_view input_col) except +
column_view input) except +

cdef unique_ptr[column] is_fixed_point(
column_view source_strings,
data_type output_type
column_view input,
data_type decimal_type
) except +
4 changes: 3 additions & 1 deletion python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
# the License.
# =============================================================================

set(cython_sources convert_booleans.pyx convert_durations.pyx convert_datetime.pyx)
set(cython_sources convert_booleans.pyx convert_datetime.pyx convert_durations.pyx
convert_fixed_point.pyx
)

set(linked_libraries cudf::cudf)
rapids_cython_create_modules(
Expand Down
7 changes: 6 additions & 1 deletion python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
from . cimport convert_booleans, convert_datetime, convert_durations
from . cimport (
convert_booleans,
convert_datetime,
convert_durations,
convert_fixed_point,
)
7 changes: 6 additions & 1 deletion python/pylibcudf/pylibcudf/strings/convert/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
from . import convert_booleans, convert_datetime, convert_durations
from . import (
convert_booleans,
convert_datetime,
convert_durations,
convert_fixed_point,
)
11 changes: 11 additions & 0 deletions python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from pylibcudf.column cimport Column
from pylibcudf.types cimport DataType


cpdef Column to_fixed_point(Column input, DataType output_type)

cpdef Column from_fixed_point(Column input)

cpdef Column is_fixed_point(Column input, DataType decimal_type=*)
107 changes: 107 additions & 0 deletions python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move
from pylibcudf.column cimport Column
from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.strings.convert cimport (
convert_fixed_point as cpp_fixed_point,
)
from pylibcudf.types cimport DataType, type_id


cpdef Column to_fixed_point(Column input, DataType output_type):
"""
Returns a new fixed-point column parsing decimal values from the
provided strings column.
For details, see :cpp:details:`cudf::strings::to_fixed_point`
Parameters
----------
input : Column
Strings instance for this operation.
output_type : DataType
Type of fixed-point column to return including the scale value.
Returns
-------
Column
New column of output_type.
"""
cdef unique_ptr[column] c_result

with nogil:
c_result = move(
cpp_fixed_point.to_fixed_point(
input.view(),
output_type.c_obj,
)
)

return Column.from_libcudf(move(c_result))

cpdef Column from_fixed_point(Column input):
"""
Returns a new strings column converting the fixed-point values
into a strings column.
For details, see :cpp:details:`cudf::strings::from_fixed_point`
Parameters
----------
input : Column
Fixed-point column to convert.
Returns
-------
Column
New strings column.
"""
cdef unique_ptr[column] c_result

with nogil:
c_result = move(
cpp_fixed_point.from_fixed_point(
input.view(),
)
)

return Column.from_libcudf(move(c_result))

cpdef Column is_fixed_point(Column input, DataType decimal_type=None):
"""
Returns a boolean column identifying strings in which all
characters are valid for conversion to fixed-point.
For details, see :cpp:details:`cudf::strings::is_fixed_point`
Parameters
----------
input : Column
Strings instance for this operation.
decimal_type : DataType
Fixed-point type (with scale) used only for checking overflow.
Defaults to Decimal64
Returns
-------
Column
New column of boolean results for each string.
"""
cdef unique_ptr[column] c_result

if decimal_type is None:
decimal_type = DataType(type_id.DECIMAL64)

with nogil:
c_result = move(
cpp_fixed_point.is_fixed_point(
input.view(),
decimal_type.c_obj,
)
)

return Column.from_libcudf(move(c_result))
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
import decimal

import pyarrow as pa
import pylibcudf as plc
from utils import assert_column_eq


def test_to_fixed_point():
typ = pa.decimal128(38, 2)
arr = pa.array(["123", "1.23", None])
result = plc.strings.convert.convert_fixed_point.to_fixed_point(
plc.interop.from_arrow(arr), plc.interop.from_arrow(typ)
)
expected = arr.cast(typ)
assert_column_eq(result, expected)


def test_from_fixed_point():
arr = pa.array([decimal.Decimal("1.1"), None])
result = plc.strings.convert.convert_fixed_point.from_fixed_point(
plc.interop.from_arrow(arr),
)
expected = pa.array(["1.1", None])
assert_column_eq(result, expected)


def test_is_fixed_point():
arr = pa.array(["123", "1.23", "1.2.3", "", None])
result = plc.strings.convert.convert_fixed_point.is_fixed_point(
plc.interop.from_arrow(arr),
)
expected = pa.array([True, True, False, False, None])
assert_column_eq(result, expected)

0 comments on commit 119aa9d

Please sign in to comment.