From 9958804362639460f23c9bab81d66e49d7336a88 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Thu, 2 Jan 2025 18:01:31 -0800 Subject: [PATCH 1/5] convert all nulls to nans --- python/cudf/cudf/core/column/column.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 31efe267c96..1e51a205441 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -2405,6 +2405,7 @@ def as_column( and arbitrary.null_count > 0 ): arbitrary = arbitrary.cast(pa.float64()) + arbitrary = pc.fill_nulls(arbitrary, np.nan) if ( cudf.get_option("default_integer_bitwidth") and pa.types.is_integer(arbitrary.type) From a0b9d81ebc2c1c6d407460e14e2f8b18815f427c Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Thu, 2 Jan 2025 18:04:50 -0800 Subject: [PATCH 2/5] convert all nulls to nans --- python/cudf/cudf/core/column/column.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 1e51a205441..735ea4e992b 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2024, NVIDIA CORPORATION. +# Copyright (c) 2018-2025, NVIDIA CORPORATION. from __future__ import annotations From e947aa76c79e82539c84b51e1699250517a8cde1 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Fri, 3 Jan 2025 06:01:58 -0800 Subject: [PATCH 3/5] fix api call --- python/cudf/cudf/core/column/column.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 735ea4e992b..31aa2bb8212 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -2405,7 +2405,7 @@ def as_column( and arbitrary.null_count > 0 ): arbitrary = arbitrary.cast(pa.float64()) - arbitrary = pc.fill_nulls(arbitrary, np.nan) + arbitrary = pc.fill_null(arbitrary, np.nan) if ( cudf.get_option("default_integer_bitwidth") and pa.types.is_integer(arbitrary.type) From 20d2a7f3d1ab09dd3d2e70ef9c1115376ee929c4 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Thu, 9 Jan 2025 09:10:10 -0800 Subject: [PATCH 4/5] add test --- python/cudf/cudf/core/column/column.py | 3 +++ python/cudf/cudf/tests/test_series.py | 11 ++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index d6aa5a4356f..946649ac6aa 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -2419,6 +2419,9 @@ def as_column( and pa.types.is_integer(arbitrary.type) and arbitrary.null_count > 0 ): + # TODO: Need to re-visit this cast and fill_null + # calls while addressing the following issue: + # https://github.com/rapidsai/cudf/issues/14149 arbitrary = arbitrary.cast(pa.float64()) arbitrary = pc.fill_null(arbitrary, np.nan) if ( diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py index f8697c5c6b8..557a206bfce 100644 --- a/python/cudf/cudf/tests/test_series.py +++ b/python/cudf/cudf/tests/test_series.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2025, NVIDIA CORPORATION. import datetime import decimal import hashlib @@ -3003,3 +3003,12 @@ def test_dtype_dtypes_equal(): ser = cudf.Series([0]) assert ser.dtype is ser.dtypes assert ser.dtypes is ser.to_pandas().dtypes + + +def test_series_ensure_float_dtype(): + with cudf.option_context("mode.pandas_compatible", True): + ser = cudf.Series([1, 2, np.nan, 10, None]) + pser = pd.Series([1, 2, np.nan, 10, None]) + + assert pser.dtype == ser.dtype + assert_eq(ser, pser) From 7748ebf445be03d538b393c10a7a7fccf21cd48d Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Mon, 13 Jan 2025 12:25:17 -0600 Subject: [PATCH 5/5] Update python/cudf/cudf/tests/test_series.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- python/cudf/cudf/tests/test_series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py index 557a206bfce..891c0ede9a4 100644 --- a/python/cudf/cudf/tests/test_series.py +++ b/python/cudf/cudf/tests/test_series.py @@ -3005,7 +3005,7 @@ def test_dtype_dtypes_equal(): assert ser.dtypes is ser.to_pandas().dtypes -def test_series_ensure_float_dtype(): +def test_null_like_to_nan_pandas_compat(): with cudf.option_context("mode.pandas_compatible", True): ser = cudf.Series([1, 2, np.nan, 10, None]) pser = pd.Series([1, 2, np.nan, 10, None])