Skip to content

Commit

Permalink
Add cudf::calendrical_month_sequence to pylibcudf (rapidsai#17277)
Browse files Browse the repository at this point in the history
Apart of rapidsai#15162. Also adds tests for `pylibcudf.filling`.

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: rapidsai#17277
  • Loading branch information
Matt711 authored Nov 8, 2024
1 parent 2e0d2d6 commit d295f17
Show file tree
Hide file tree
Showing 4 changed files with 140 additions and 15 deletions.
21 changes: 6 additions & 15 deletions python/cudf/cudf/_lib/datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,7 @@ import warnings

from cudf.core.buffer import acquire_spill_lock

from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move

cimport pylibcudf.libcudf.datetime as libcudf_datetime
from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.filling cimport calendrical_month_sequence
from pylibcudf.libcudf.scalar.scalar cimport scalar
from pylibcudf.libcudf.types cimport size_type
from pylibcudf.datetime import DatetimeComponent, RoundingFrequency

Expand Down Expand Up @@ -143,20 +137,17 @@ def is_leap_year(Column col):

@acquire_spill_lock()
def date_range(DeviceScalar start, size_type n, offset):
cdef unique_ptr[column] c_result
cdef size_type months = (
offset.kwds.get("years", 0) * 12
+ offset.kwds.get("months", 0)
)

cdef const scalar* c_start = start.get_raw_ptr()
with nogil:
c_result = move(calendrical_month_sequence(
return Column.from_pylibcudf(
plc.filling.calendrical_month_sequence(
n,
c_start[0],
months
))
return Column.from_unique_ptr(move(c_result))
start.c_value,
months,
)
)


@acquire_spill_lock()
Expand Down
6 changes: 6 additions & 0 deletions python/pylibcudf/pylibcudf/filling.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,9 @@ cpdef Table repeat(
Table input_table,
ColumnOrSize count
)

cpdef Column calendrical_month_sequence(
size_type n,
Scalar init,
size_type months,
)
37 changes: 37 additions & 0 deletions python/pylibcudf/pylibcudf/filling.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ from pylibcudf.libcudf.filling cimport (
fill_in_place as cpp_fill_in_place,
repeat as cpp_repeat,
sequence as cpp_sequence,
calendrical_month_sequence as cpp_calendrical_month_sequence
)
from pylibcudf.libcudf.table.table cimport table
from pylibcudf.libcudf.types cimport size_type
Expand Down Expand Up @@ -164,3 +165,39 @@ cpdef Table repeat(
count
)
return Table.from_libcudf(move(result))


cpdef Column calendrical_month_sequence(
size_type n,
Scalar init,
size_type months,
):

"""Fill destination column from begin to end with value.
For details, see :cpp:func:`calendrical_month_sequence`.
Parameters
----------
n : size_type
Number of timestamps to generate
init : Scalar
The initial timestamp
months : size_type
Months to increment
Returns
-------
pylibcudf.Column
Timestamps column with sequences of months
"""

cdef unique_ptr[column] c_result

with nogil:
c_result = cpp_calendrical_month_sequence(
n,
dereference(init.c_obj),
months
)
return Column.from_libcudf(move(c_result))
91 changes: 91 additions & 0 deletions python/pylibcudf/pylibcudf/tests/test_filling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from datetime import datetime

import pyarrow as pa
import pytest
from utils import assert_column_eq, assert_table_eq

import pylibcudf as plc


@pytest.fixture
def pa_col():
return pa.array([2, 3, 5, 7, 11])


@pytest.fixture
def pa_table():
pa_col = pa.array([1, 2, 3])
return pa.table([pa_col], names=["a"])


def test_fill(pa_col):
result = plc.filling.fill(
plc.interop.from_arrow(pa_col),
1,
3,
plc.interop.from_arrow(pa.scalar(5)),
)
expect = pa.array([2, 5, 5, 7, 11])
assert_column_eq(result, expect)


def test_fill_in_place(pa_col):
result = plc.interop.from_arrow(pa_col)
plc.filling.fill_in_place(
result,
1,
3,
plc.interop.from_arrow(pa.scalar(5)),
)
expect = pa.array([2, 5, 5, 7, 11])
assert_column_eq(result, expect)


def test_sequence():
size = 5
init_scalar = plc.interop.from_arrow(pa.scalar(10))
step_scalar = plc.interop.from_arrow(pa.scalar(2))
result = plc.filling.sequence(
size,
init_scalar,
step_scalar,
)
expect = pa.array([10, 12, 14, 16, 18])
assert_column_eq(result, expect)


def test_repeat_with_count_int(pa_table):
input_table = plc.interop.from_arrow(pa_table)
count = 2
result = plc.filling.repeat(input_table, count)
expect = pa.table([[1, 1, 2, 2, 3, 3]], names=["a"])
assert_table_eq(expect, result)


def test_repeat_with_count_column(pa_table):
input_table = plc.interop.from_arrow(pa_table)
count = plc.interop.from_arrow(pa.array([1, 2, 3]))
result = plc.filling.repeat(input_table, count)
expect = pa.table([[1] + [2] * 2 + [3] * 3], names=["a"])
assert_table_eq(expect, result)


def test_calendrical_month_sequence():
n = 5
init_date = datetime(2020, 1, 31)
init = plc.interop.from_arrow(
pa.scalar(init_date, type=pa.timestamp("ms"))
)
months = 1
result = plc.filling.calendrical_month_sequence(n, init, months)
expected_dates = [
datetime(2020, 1, 31),
datetime(2020, 2, 29),
datetime(2020, 3, 31),
datetime(2020, 4, 30),
datetime(2020, 5, 31),
]
expect = pa.array(expected_dates, type=pa.timestamp("ms"))
assert_column_eq(result, expect)

0 comments on commit d295f17

Please sign in to comment.