Skip to content

Commit ad3ae65

Browse files
Deprecate is_categorical_dtype (#14274)
This PR deprecates `is_categorical_dtype` to match pandas-dev/pandas#52527 which was introduced in `pandas-2.x`. This PR internalizes the public API since this is a needed utility in our code base. This PR: ``` = 23835 failed, 5698 passed, 1613 skipped, 288 xfailed, 423 errors in 1976.84s (0:32:56) = ``` On `pandas_2.0_feature_branch`: ``` = 24297 failed, 5115 passed, 1613 skipped, 288 xfailed, 480 errors in 1980.46s (0:33:00) = ```
1 parent fc6a30f commit ad3ae65

27 files changed

+134
-112
lines changed

python/cudf/cudf/_fuzz_testing/csv.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
1+
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
22

33
import logging
44
import random
@@ -99,7 +99,7 @@ def set_rand_params(self, params):
9999
if dtype_val is not None:
100100
dtype_val = {
101101
col_name: "category"
102-
if cudf.utils.dtypes.is_categorical_dtype(dtype)
102+
if cudf.utils.dtypes._is_categorical_dtype(dtype)
103103
else pandas_dtypes_to_np_dtypes[dtype]
104104
for col_name, dtype in dtype_val.items()
105105
}

python/cudf/cudf/_fuzz_testing/json.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
1+
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
22

33
import logging
44
import random
@@ -27,7 +27,7 @@ def _get_dtype_param_value(dtype_val):
2727
if dtype_val is not None and isinstance(dtype_val, abc.Mapping):
2828
processed_dtypes = {}
2929
for col_name, dtype in dtype_val.items():
30-
if cudf.utils.dtypes.is_categorical_dtype(dtype):
30+
if cudf.utils.dtypes._is_categorical_dtype(dtype):
3131
processed_dtypes[col_name] = "category"
3232
else:
3333
processed_dtypes[col_name] = str(

python/cudf/cudf/_lib/column.pyx

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import rmm
1111
import cudf
1212
import cudf._lib as libcudf
1313
from cudf._lib import pylibcudf
14-
from cudf.api.types import is_categorical_dtype, is_datetime64tz_dtype
14+
from cudf.api.types import _is_categorical_dtype, is_datetime64tz_dtype
1515
from cudf.core.buffer import (
1616
Buffer,
1717
ExposureTrackedBuffer,
@@ -331,7 +331,7 @@ cdef class Column:
331331
)
332332

333333
cdef mutable_column_view mutable_view(self) except *:
334-
if is_categorical_dtype(self.dtype):
334+
if _is_categorical_dtype(self.dtype):
335335
col = self.base_children[0]
336336
data_dtype = col.dtype
337337
elif is_datetime64tz_dtype(self.dtype):
@@ -394,7 +394,7 @@ cdef class Column:
394394
return self._view(c_null_count)
395395

396396
cdef column_view _view(self, libcudf_types.size_type null_count) except *:
397-
if is_categorical_dtype(self.dtype):
397+
if _is_categorical_dtype(self.dtype):
398398
col = self.base_children[0]
399399
data_dtype = col.dtype
400400
elif is_datetime64tz_dtype(self.dtype):
@@ -469,7 +469,7 @@ cdef class Column:
469469
# categoricals because cudf supports ordered and unordered categoricals
470470
# while libcudf supports only unordered categoricals (see
471471
# https://github.com/rapidsai/cudf/pull/8567).
472-
if is_categorical_dtype(self.dtype):
472+
if _is_categorical_dtype(self.dtype):
473473
col = self.base_children[0]
474474
else:
475475
col = self
@@ -635,7 +635,7 @@ cdef class Column:
635635
"""
636636
column_owner = isinstance(owner, Column)
637637
mask_owner = owner
638-
if column_owner and is_categorical_dtype(owner.dtype):
638+
if column_owner and _is_categorical_dtype(owner.dtype):
639639
owner = owner.base_children[0]
640640

641641
size = cv.size()

python/cudf/cudf/_lib/csv.pyx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -434,19 +434,19 @@ def read_csv(
434434
if dtype is not None:
435435
if isinstance(dtype, abc.Mapping):
436436
for k, v in dtype.items():
437-
if cudf.api.types.is_categorical_dtype(v):
437+
if cudf.api.types._is_categorical_dtype(v):
438438
df._data[str(k)] = df._data[str(k)].astype(v)
439439
elif (
440440
cudf.api.types.is_scalar(dtype) or
441441
isinstance(dtype, (
442442
np.dtype, pd.api.extensions.ExtensionDtype, type
443443
))
444444
):
445-
if cudf.api.types.is_categorical_dtype(dtype):
445+
if cudf.api.types._is_categorical_dtype(dtype):
446446
df = df.astype(dtype)
447447
elif isinstance(dtype, abc.Collection):
448448
for index, col_dtype in enumerate(dtype):
449-
if cudf.api.types.is_categorical_dtype(col_dtype):
449+
if cudf.api.types._is_categorical_dtype(col_dtype):
450450
col_name = df._data.names[index]
451451
df._data[col_name] = df._data[col_name].astype(col_dtype)
452452

@@ -547,7 +547,7 @@ cdef data_type _get_cudf_data_type_from_dtype(object dtype) except *:
547547
# TODO: Remove this work-around Dictionary types
548548
# in libcudf are fully mapped to categorical columns:
549549
# https://github.com/rapidsai/cudf/issues/3960
550-
if cudf.api.types.is_categorical_dtype(dtype):
550+
if cudf.api.types._is_categorical_dtype(dtype):
551551
if isinstance(dtype, str):
552552
dtype = "str"
553553
else:

python/cudf/cudf/_lib/groupby.pyx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from pandas.core.groupby.groupby import DataError
44

55
from cudf.api.types import (
6-
is_categorical_dtype,
6+
_is_categorical_dtype,
77
is_decimal_dtype,
88
is_interval_dtype,
99
is_list_dtype,
@@ -189,7 +189,7 @@ cdef class GroupBy:
189189
valid_aggregations = (
190190
_LIST_AGGS if is_list_dtype(dtype)
191191
else _STRING_AGGS if is_string_dtype(dtype)
192-
else _CATEGORICAL_AGGS if is_categorical_dtype(dtype)
192+
else _CATEGORICAL_AGGS if _is_categorical_dtype(dtype)
193193
else _STRUCT_AGGS if is_struct_dtype(dtype)
194194
else _INTERVAL_AGGS if is_interval_dtype(dtype)
195195
else _DECIMAL_AGGS if is_decimal_dtype(dtype)
@@ -260,7 +260,7 @@ cdef class GroupBy:
260260
valid_aggregations = (
261261
_LIST_AGGS if is_list_dtype(dtype)
262262
else _STRING_AGGS if is_string_dtype(dtype)
263-
else _CATEGORICAL_AGGS if is_categorical_dtype(dtype)
263+
else _CATEGORICAL_AGGS if _is_categorical_dtype(dtype)
264264
else _STRUCT_AGGS if is_struct_dtype(dtype)
265265
else _INTERVAL_AGGS if is_interval_dtype(dtype)
266266
else _DECIMAL_AGGS if is_decimal_dtype(dtype)

python/cudf/cudf/_lib/json.pyx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ def write_json(
214214
cdef schema_element _get_cudf_schema_element_from_dtype(object dtype) except *:
215215
cdef schema_element s_element
216216
cdef data_type lib_type
217-
if cudf.api.types.is_categorical_dtype(dtype):
217+
if cudf.api.types._is_categorical_dtype(dtype):
218218
raise NotImplementedError(
219219
"CategoricalDtype as dtype is not yet "
220220
"supported in JSON reader"
@@ -237,7 +237,7 @@ cdef schema_element _get_cudf_schema_element_from_dtype(object dtype) except *:
237237

238238

239239
cdef data_type _get_cudf_data_type_from_dtype(object dtype) except *:
240-
if cudf.api.types.is_categorical_dtype(dtype):
240+
if cudf.api.types._is_categorical_dtype(dtype):
241241
raise NotImplementedError(
242242
"CategoricalDtype as dtype is not yet "
243243
"supported in JSON reader"

python/cudf/cudf/_lib/utils.pyx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ except ImportError:
2323
import json
2424

2525
from cudf.api.types import (
26-
is_categorical_dtype,
26+
_is_categorical_dtype,
2727
is_decimal_dtype,
2828
is_list_dtype,
2929
is_struct_dtype,
@@ -92,7 +92,7 @@ cpdef generate_pandas_metadata(table, index):
9292
# Columns
9393
for name, col in table._data.items():
9494
col_names.append(name)
95-
if is_categorical_dtype(col):
95+
if _is_categorical_dtype(col):
9696
raise ValueError(
9797
"'category' column dtypes are currently not "
9898
+ "supported by the gpu accelerated parquet writer"
@@ -147,7 +147,7 @@ cpdef generate_pandas_metadata(table, index):
147147
level=level,
148148
column_names=col_names
149149
)
150-
if is_categorical_dtype(idx):
150+
if _is_categorical_dtype(idx):
151151
raise ValueError(
152152
"'category' column dtypes are currently not "
153153
+ "supported by the gpu accelerated parquet writer"

python/cudf/cudf/api/types.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from cudf.core.dtypes import ( # noqa: F401
1919
_BaseDtype,
2020
dtype,
21-
is_categorical_dtype,
21+
_is_categorical_dtype,
2222
is_decimal32_dtype,
2323
is_decimal64_dtype,
2424
is_decimal128_dtype,
@@ -112,7 +112,7 @@ def is_string_dtype(obj):
112112
or (
113113
pd.api.types.is_string_dtype(obj)
114114
# Reject all cudf extension types.
115-
and not is_categorical_dtype(obj)
115+
and not _is_categorical_dtype(obj)
116116
and not is_decimal_dtype(obj)
117117
and not is_list_dtype(obj)
118118
and not is_struct_dtype(obj)
@@ -486,6 +486,7 @@ def is_any_real_numeric_dtype(arr_or_dtype) -> bool:
486486
is_iterator = pd_types.is_iterator
487487
is_bool = pd_types.is_bool
488488
is_categorical = pd_types.is_categorical_dtype
489+
# TODO
489490
is_complex = pd_types.is_complex
490491
is_float = pd_types.is_float
491492
is_hashable = pd_types.is_hashable

python/cudf/cudf/core/_internals/where.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
import cudf
99
from cudf._typing import ScalarLike
1010
from cudf.api.types import (
11+
_is_categorical_dtype,
1112
_is_non_decimal_numeric_dtype,
12-
is_categorical_dtype,
1313
is_scalar,
1414
)
1515
from cudf.core.column import ColumnBase
@@ -45,7 +45,7 @@ def _check_and_cast_columns_with_other(
4545
) -> Tuple[ColumnBase, Union[ScalarLike, ColumnBase]]:
4646
# Returns type-casted `source_col` & `other` based on `inplace`.
4747
source_dtype = source_col.dtype
48-
if is_categorical_dtype(source_dtype):
48+
if _is_categorical_dtype(source_dtype):
4949
return _normalize_categorical(source_col, other)
5050

5151
other_is_scalar = is_scalar(other)

python/cudf/cudf/core/column/categorical.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from cudf import _lib as libcudf
1717
from cudf._lib.transform import bools_to_mask
1818
from cudf._typing import ColumnBinaryOperand, ColumnLike, Dtype, ScalarLike
19-
from cudf.api.types import is_categorical_dtype, is_interval_dtype
19+
from cudf.api.types import _is_categorical_dtype, is_interval_dtype
2020
from cudf.core.buffer import Buffer
2121
from cudf.core.column import column
2222
from cudf.core.column.methods import ColumnMethods
@@ -98,7 +98,7 @@ class CategoricalAccessor(ColumnMethods):
9898
_column: CategoricalColumn
9999

100100
def __init__(self, parent: SeriesOrSingleColumnIndex):
101-
if not is_categorical_dtype(parent.dtype):
101+
if not _is_categorical_dtype(parent.dtype):
102102
raise AttributeError(
103103
"Can only use .cat accessor with a 'category' dtype"
104104
)

python/cudf/cudf/core/column/column.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,10 @@
4949
from cudf._lib.types import size_type_dtype
5050
from cudf._typing import ColumnLike, Dtype, ScalarLike
5151
from cudf.api.types import (
52+
_is_categorical_dtype,
5253
_is_non_decimal_numeric_dtype,
5354
infer_dtype,
5455
is_bool_dtype,
55-
is_categorical_dtype,
5656
is_datetime64_dtype,
5757
is_datetime64tz_dtype,
5858
is_decimal32_dtype,
@@ -977,7 +977,7 @@ def can_cast_safely(self, to_dtype: Dtype) -> bool:
977977
def astype(self, dtype: Dtype, **kwargs) -> ColumnBase:
978978
if self.dtype == dtype:
979979
return self
980-
if is_categorical_dtype(dtype):
980+
if _is_categorical_dtype(dtype):
981981
return self.as_categorical_column(dtype, **kwargs)
982982

983983
dtype = (
@@ -987,7 +987,7 @@ def astype(self, dtype: Dtype, **kwargs) -> ColumnBase:
987987
)
988988
if _is_non_decimal_numeric_dtype(dtype):
989989
return self.as_numerical_column(dtype, **kwargs)
990-
elif is_categorical_dtype(dtype):
990+
elif _is_categorical_dtype(dtype):
991991
return self.as_categorical_column(dtype, **kwargs)
992992
elif cudf.dtype(dtype).type in {
993993
np.str_,
@@ -1423,7 +1423,7 @@ def column_empty_like(
14231423

14241424
if (
14251425
hasattr(column, "dtype")
1426-
and is_categorical_dtype(column.dtype)
1426+
and _is_categorical_dtype(column.dtype)
14271427
and dtype == column.dtype
14281428
):
14291429
catcolumn = cast("cudf.core.column.CategoricalColumn", column)
@@ -1476,7 +1476,7 @@ def column_empty(
14761476
full(row_count + 1, 0, dtype=libcudf.types.size_type_dtype),
14771477
column_empty(row_count, dtype=dtype.element_type),
14781478
)
1479-
elif is_categorical_dtype(dtype):
1479+
elif _is_categorical_dtype(dtype):
14801480
data = None
14811481
children = (
14821482
build_column(
@@ -1553,7 +1553,7 @@ def build_column(
15531553
offset=offset,
15541554
null_count=null_count,
15551555
)
1556-
if is_categorical_dtype(dtype):
1556+
if _is_categorical_dtype(dtype):
15571557
if not len(children) == 1:
15581558
raise ValueError(
15591559
"Must specify exactly one child column for CategoricalColumn"
@@ -2037,7 +2037,7 @@ def as_column(
20372037
f"{arbitrary.dtype} is not supported. Convert first to "
20382038
f"{arbitrary.dtype.subtype}."
20392039
)
2040-
if is_categorical_dtype(arbitrary.dtype):
2040+
if _is_categorical_dtype(arbitrary.dtype):
20412041
if isinstance(
20422042
arbitrary.dtype.categories.dtype, pd.DatetimeTZDtype
20432043
):
@@ -2219,7 +2219,7 @@ def as_column(
22192219
data = data.astype(cudf.dtype(dtype))
22202220

22212221
elif isinstance(arbitrary, NumpyExtensionArray):
2222-
if is_categorical_dtype(arbitrary.dtype):
2222+
if _is_categorical_dtype(arbitrary.dtype):
22232223
arb_dtype = arbitrary.dtype
22242224
else:
22252225
if arbitrary.dtype == pd.StringDtype():
@@ -2347,7 +2347,9 @@ def as_column(
23472347
np_type = None
23482348
try:
23492349
if dtype is not None:
2350-
if is_categorical_dtype(dtype) or is_interval_dtype(dtype):
2350+
if _is_categorical_dtype(dtype) or is_interval_dtype(
2351+
dtype
2352+
):
23512353
raise TypeError
23522354
if is_datetime64tz_dtype(dtype):
23532355
raise NotImplementedError(
@@ -2491,7 +2493,7 @@ def as_column(
24912493
except (pa.ArrowInvalid, pa.ArrowTypeError, TypeError) as e:
24922494
if isinstance(e, MixedTypeError):
24932495
raise TypeError(str(e))
2494-
if is_categorical_dtype(dtype):
2496+
if _is_categorical_dtype(dtype):
24952497
sr = pd.Series(arbitrary, dtype="category")
24962498
data = as_column(sr, nan_as_null=nan_as_null, dtype=dtype)
24972499
elif np_type == np.str_:
@@ -2774,7 +2776,7 @@ def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
27742776
# ColumnBase._concat so that all subclasses can override necessary
27752777
# behavior. However, at the moment it's not clear what that API should look
27762778
# like, so CategoricalColumn simply implements a minimal working API.
2777-
if all(is_categorical_dtype(o.dtype) for o in objs):
2779+
if all(_is_categorical_dtype(o.dtype) for o in objs):
27782780
return cudf.core.column.categorical.CategoricalColumn._concat(
27792781
cast(
27802782
MutableSequence[

python/cudf/cudf/core/column/interval.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import pyarrow as pa
66

77
import cudf
8-
from cudf.api.types import is_categorical_dtype, is_interval_dtype
8+
from cudf.api.types import _is_categorical_dtype, is_interval_dtype
99
from cudf.core.column import StructColumn
1010
from cudf.core.dtypes import IntervalDtype
1111

@@ -102,7 +102,7 @@ def copy(self, deep=True):
102102

103103
def as_interval_column(self, dtype, **kwargs):
104104
if is_interval_dtype(dtype):
105-
if is_categorical_dtype(self):
105+
if _is_categorical_dtype(self):
106106
new_struct = self._get_decategorized_column()
107107
return IntervalColumn.from_struct_column(new_struct)
108108
if is_interval_dtype(dtype):

0 commit comments

Comments
 (0)