Skip to content

Commit 3aed81c

Browse files
authored
Set correct missing value indicator in astype for categorical (#45012)
1 parent 7148c03 commit 3aed81c

File tree

3 files changed

+14
-8
lines changed

3 files changed

+14
-8
lines changed

pandas/core/arrays/categorical.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,6 @@
108108
)
109109
import pandas.core.common as com
110110
from pandas.core.construction import (
111-
ensure_wrapped_if_datetimelike,
112111
extract_array,
113112
sanitize_array,
114113
)
@@ -539,14 +538,15 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
539538

540539
else:
541540
# GH8628 (PERF): astype category codes instead of astyping array
542-
if is_datetime64_dtype(self.categories):
543-
new_cats = ensure_wrapped_if_datetimelike(self.categories._values)
544-
else:
545-
new_cats = np.asarray(self.categories)
541+
new_cats = self.categories._values
546542

547543
try:
548544
new_cats = new_cats.astype(dtype=dtype, copy=copy)
549-
fill_value = lib.item_from_zerodim(np.array(np.nan).astype(dtype))
545+
fill_value = self.categories._na_value
546+
if not is_valid_na_for_dtype(fill_value, dtype):
547+
fill_value = lib.item_from_zerodim(
548+
np.array(self.categories._na_value).astype(dtype)
549+
)
550550
except (
551551
TypeError, # downstream error msg for CategoricalIndex is misleading
552552
ValueError,

pandas/core/dtypes/missing.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@
5959

6060
nan_checker = np.isnan
6161
INF_AS_NA = False
62+
_dtype_object = np.dtype("object")
63+
_dtype_str = np.dtype(str)
6264

6365

6466
def isna(obj):
@@ -647,7 +649,11 @@ def is_valid_na_for_dtype(obj, dtype: DtypeObj) -> bool:
647649
# Numeric
648650
return obj is not NaT and not isinstance(obj, (np.datetime64, np.timedelta64))
649651

650-
elif dtype == np.dtype("object"):
652+
elif dtype == _dtype_str:
653+
# numpy string dtypes to avoid float np.nan
654+
return not isinstance(obj, (np.datetime64, np.timedelta64, Decimal, float))
655+
656+
elif dtype == _dtype_object:
651657
# This is needed for Categorical, but is kind of weird
652658
return True
653659

pandas/tests/arrays/categorical/test_dtypes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ def test_astype_object_datetime_categories(self):
182182
# GH#40754
183183
cat = Categorical(to_datetime(["2021-03-27", NaT]))
184184
result = cat.astype(object)
185-
expected = np.array([Timestamp("2021-03-27 00:00:00"), np.nan], dtype="object")
185+
expected = np.array([Timestamp("2021-03-27 00:00:00"), NaT], dtype="object")
186186
tm.assert_numpy_array_equal(result, expected)
187187

188188
def test_astype_object_timestamp_categories(self):

0 commit comments

Comments
 (0)