Skip to content

Commit

Permalink
Ensure size is always passed to NumericalColumn (#16576)
Browse files Browse the repository at this point in the history
#16457 requires `NumericalColumn` to be constructed with `size`. It appears another PR got in after this PR was created so there are currently a few usages where `size` isn't passed in.

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #16576
  • Loading branch information
mroeschke authored Aug 16, 2024
1 parent 1c63e1e commit e690d9d
Show file tree
Hide file tree
Showing 6 changed files with 13 additions and 36 deletions.
4 changes: 1 addition & 3 deletions python/cudf/cudf/core/_internals/where.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,7 @@ def _make_categorical_like(result, column):
if isinstance(column, cudf.core.column.CategoricalColumn):
result = cudf.core.column.build_categorical_column(
categories=column.categories,
codes=cudf.core.column.NumericalColumn(
result.base_data, dtype=result.dtype
),
codes=result,
mask=result.base_mask,
size=result.size,
offset=result.offset,
Expand Down
37 changes: 8 additions & 29 deletions python/cudf/cudf/core/column/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -659,10 +659,7 @@ def slice(self, start: int, stop: int, stride: int | None = None) -> Self:
Self,
cudf.core.column.build_categorical_column(
categories=self.categories,
codes=cudf.core.column.NumericalColumn(
codes.base_data, # type: ignore[arg-type]
dtype=codes.dtype,
),
codes=codes,
mask=codes.base_mask,
ordered=self.ordered,
size=codes.size,
Expand Down Expand Up @@ -734,10 +731,7 @@ def sort_values(
codes = self.codes.sort_values(ascending, na_position)
col = column.build_categorical_column(
categories=self.dtype.categories._values,
codes=cudf.core.column.NumericalColumn(
codes.base_data, # type: ignore[arg-type]
dtype=codes.dtype,
),
codes=codes,
mask=codes.base_mask,
size=codes.size,
ordered=self.dtype.ordered,
Expand Down Expand Up @@ -845,10 +839,7 @@ def unique(self) -> CategoricalColumn:
codes = self.codes.unique()
return column.build_categorical_column(
categories=self.categories,
codes=cudf.core.column.NumericalColumn(
codes.base_data, # type: ignore[arg-type]
dtype=codes.dtype,
),
codes=codes,
mask=codes.base_mask,
offset=codes.offset,
size=codes.size,
Expand Down Expand Up @@ -986,9 +977,7 @@ def find_and_replace(

result = column.build_categorical_column(
categories=new_cats["cats"],
codes=cudf.core.column.NumericalColumn(
output.base_data, dtype=output.dtype
),
codes=output,
mask=output.base_mask,
offset=output.offset,
size=output.size,
Expand Down Expand Up @@ -1184,10 +1173,7 @@ def _concat(

return column.build_categorical_column(
categories=column.as_column(cats),
codes=cudf.core.column.NumericalColumn(
codes_col.base_data, # type: ignore[arg-type]
dtype=codes_col.dtype,
),
codes=codes_col,
mask=codes_col.base_mask,
size=codes_col.size,
offset=codes_col.offset,
Expand All @@ -1199,10 +1185,7 @@ def _with_type_metadata(
if isinstance(dtype, CategoricalDtype):
return column.build_categorical_column(
categories=dtype.categories._values,
codes=cudf.core.column.NumericalColumn(
self.codes.base_data, # type: ignore[arg-type]
dtype=self.codes.dtype,
),
codes=self.codes,
mask=self.codes.base_mask,
ordered=dtype.ordered,
size=self.codes.size,
Expand Down Expand Up @@ -1345,9 +1328,7 @@ def _set_categories(
Self,
column.build_categorical_column(
categories=new_cats,
codes=cudf.core.column.NumericalColumn(
new_codes.base_data, dtype=new_codes.dtype
),
codes=new_codes,
mask=new_codes.base_mask,
size=new_codes.size,
offset=new_codes.offset,
Expand Down Expand Up @@ -1478,9 +1459,7 @@ def pandas_categorical_as_column(

return column.build_categorical_column(
categories=categorical.categories,
codes=cudf.core.column.NumericalColumn(
codes.base_data, dtype=codes.dtype
),
codes=codes,
size=codes.size,
mask=mask,
ordered=categorical.ordered,
Expand Down
1 change: 1 addition & 0 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -1513,6 +1513,7 @@ def column_empty(
* cudf.dtype(libcudf.types.size_type_dtype).itemsize
)
),
size=None,
dtype=libcudf.types.size_type_dtype,
),
)
Expand Down
1 change: 1 addition & 0 deletions python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,7 @@ def _with_type_metadata(self: ColumnBase, dtype: Dtype) -> ColumnBase:
categories=dtype.categories._values,
codes=cudf.core.column.NumericalColumn(
self.base_data, # type: ignore[arg-type]
self.size,
dtype=self.dtype,
),
mask=self.base_mask,
Expand Down
5 changes: 1 addition & 4 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
from cudf.core.column import (
CategoricalColumn,
ColumnBase,
NumericalColumn,
StructColumn,
as_column,
build_categorical_column,
Expand Down Expand Up @@ -8541,9 +8540,7 @@ def _reassign_categories(categories, cols, col_idxs):
if idx in categories:
cols[name] = build_categorical_column(
categories=categories[idx],
codes=NumericalColumn(
cols[name].base_data, dtype=cols[name].dtype
),
codes=cols[name],
mask=cols[name].base_mask,
offset=cols[name].offset,
size=cols[name].size,
Expand Down
1 change: 1 addition & 0 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2501,6 +2501,7 @@ def _get_dt_field(self, field: str) -> Index:
out_column = self._column.get_dt_field(field)
out_column = NumericalColumn(
data=out_column.base_data,
size=out_column.size,
dtype=out_column.dtype,
mask=out_column.base_mask,
offset=out_column.offset,
Expand Down

0 comments on commit e690d9d

Please sign in to comment.