diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index f2cb6a3389a6d..cf2945d8544a5 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1256,6 +1256,7 @@ Categorical - Bug in :meth:`Categorical.take` with a user-provided ``fill_value`` not encoding the ``fill_value``, which could result in a ``ValueError``, incorrect results, or a segmentation fault (:issue:`23296`). - In meth:`Series.unstack`, specifying a ``fill_value`` not present in the categories now raises a ``TypeError`` rather than ignoring the ``fill_value`` (:issue:`23284`) - Bug when resampling :meth:`Dataframe.resample()` and aggregating on categorical data, the categorical dtype was getting lost. (:issue:`23227`) +- Bug in many methods of the ``.str``-accessor, which always failed on calling the ``CategoricalIndex.str`` constructor (:issue:`23555`, :issue:`23556`) Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 0b791f6f91aa3..fd76293b4a0ca 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -15,7 +15,7 @@ from pandas.core.dtypes.common import ( ensure_object, is_bool_dtype, is_categorical_dtype, is_integer, is_list_like, is_object_dtype, is_re, is_scalar, is_string_like) -from pandas.core.dtypes.generic import ABCIndex, ABCSeries +from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna from pandas.core.algorithms import take_1d @@ -931,7 +931,7 @@ def str_extractall(arr, pat, flags=0): if regex.groups == 0: raise ValueError("pattern contains no capture groups") - if isinstance(arr, ABCIndex): + if isinstance(arr, ABCIndexClass): arr = arr.to_series().reset_index(drop=True) names = dict(zip(regex.groupindex.values(), regex.groupindex.keys())) @@ -1854,7 +1854,7 @@ def __iter__(self): def _wrap_result(self, result, use_codes=True, name=None, expand=None, fill_value=np.nan): - from pandas.core.index import Index, MultiIndex + from pandas import Index, Series, MultiIndex # for category, we do the stuff on the categories, so blow it up # to the full series again @@ -1862,7 +1862,8 @@ def _wrap_result(self, result, use_codes=True, # so make it possible to skip this step as the method already did this # before the transformation... if use_codes and self._is_categorical: - result = take_1d(result, self._orig.cat.codes, + # if self._orig is a CategoricalIndex, there is no .cat-accessor + result = take_1d(result, Series(self._orig, copy=False).cat.codes, fill_value=fill_value) if not hasattr(result, 'ndim') or not hasattr(result, 'dtype'): diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 117984ce89743..f3a7753b8bd48 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -245,9 +245,6 @@ def test_api_per_method(self, box, dtype, and inferred_dtype in ['boolean', 'date', 'time']): pytest.xfail(reason='Inferring incorrectly because of NaNs; ' 'solved by GH 23167') - if box == Index and dtype == 'category': - pytest.xfail(reason='Broken methods on CategoricalIndex; ' - 'see GH 23556') t = box(values, dtype=dtype) # explicit dtype to avoid casting method = getattr(t.str, method_name) @@ -264,6 +261,7 @@ def test_api_per_method(self, box, dtype, + ['mixed', 'mixed-integer'] * mixed_allowed) if inferred_dtype in allowed_types: + # xref GH 23555, GH 23556 method(*args, **kwargs) # works! else: # GH 23011, GH 23163