From 98ff497164c431abc753c1384f076b1aeb817c45 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 30 Nov 2018 00:32:45 +0100 Subject: [PATCH 1/4] API: fix str-accessor on CategoricalIndex --- doc/source/whatsnew/v0.24.0.rst | 1 + pandas/core/strings.py | 9 +++++---- pandas/tests/test_strings.py | 4 +--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index f888648a9363e..23efa7a09b9ac 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1254,6 +1254,7 @@ Categorical - Bug in :meth:`Categorical.take` with a user-provided ``fill_value`` not encoding the ``fill_value``, which could result in a ``ValueError``, incorrect results, or a segmentation fault (:issue:`23296`). - In meth:`Series.unstack`, specifying a ``fill_value`` not present in the categories now raises a ``TypeError`` rather than ignoring the ``fill_value`` (:issue:`23284`) - Bug when resampling :meth:`Dataframe.resample()` and aggregating on categorical data, the categorical dtype was getting lost. (:issue:`23227`) +- Bug in the ``__name__`` attribute of several methods of :class:`Series.str`, which were set incorrectly (:issue:`23551`) Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 0b791f6f91aa3..18b6d247b58a6 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -15,7 +15,7 @@ from pandas.core.dtypes.common import ( ensure_object, is_bool_dtype, is_categorical_dtype, is_integer, is_list_like, is_object_dtype, is_re, is_scalar, is_string_like) -from pandas.core.dtypes.generic import ABCIndex, ABCSeries +from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna from pandas.core.algorithms import take_1d @@ -931,7 +931,7 @@ def str_extractall(arr, pat, flags=0): if regex.groups == 0: raise ValueError("pattern contains no capture groups") - if isinstance(arr, ABCIndex): + if isinstance(arr, ABCIndexClass): arr = arr.to_series().reset_index(drop=True) names = dict(zip(regex.groupindex.values(), regex.groupindex.keys())) @@ -1854,7 +1854,7 @@ def __iter__(self): def _wrap_result(self, result, use_codes=True, name=None, expand=None, fill_value=np.nan): - from pandas.core.index import Index, MultiIndex + from pandas import Index, Series, MultiIndex # for category, we do the stuff on the categories, so blow it up # to the full series again @@ -1862,7 +1862,8 @@ def _wrap_result(self, result, use_codes=True, # so make it possible to skip this step as the method already did this # before the transformation... if use_codes and self._is_categorical: - result = take_1d(result, self._orig.cat.codes, + # if self._orig is a CategoricalIndex, there is no .cat-accessor + result = take_1d(result, Series(self._orig).cat.codes, fill_value=fill_value) if not hasattr(result, 'ndim') or not hasattr(result, 'dtype'): diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 117984ce89743..958656869acbe 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -245,9 +245,6 @@ def test_api_per_method(self, box, dtype, and inferred_dtype in ['boolean', 'date', 'time']): pytest.xfail(reason='Inferring incorrectly because of NaNs; ' 'solved by GH 23167') - if box == Index and dtype == 'category': - pytest.xfail(reason='Broken methods on CategoricalIndex; ' - 'see GH 23556') t = box(values, dtype=dtype) # explicit dtype to avoid casting method = getattr(t.str, method_name) @@ -264,6 +261,7 @@ def test_api_per_method(self, box, dtype, + ['mixed', 'mixed-integer'] * mixed_allowed) if inferred_dtype in allowed_types: + # i.a. GH 23555, GH 23556 method(*args, **kwargs) # works! else: # GH 23011, GH 23163 From 2ea6fc052fe93507611193bee76d951ee1925955 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 30 Nov 2018 01:50:04 +0100 Subject: [PATCH 2/4] Review (gfyoung) --- doc/source/whatsnew/v0.24.0.rst | 2 +- pandas/tests/test_strings.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 23efa7a09b9ac..a2b2130b326d3 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1254,7 +1254,7 @@ Categorical - Bug in :meth:`Categorical.take` with a user-provided ``fill_value`` not encoding the ``fill_value``, which could result in a ``ValueError``, incorrect results, or a segmentation fault (:issue:`23296`). - In meth:`Series.unstack`, specifying a ``fill_value`` not present in the categories now raises a ``TypeError`` rather than ignoring the ``fill_value`` (:issue:`23284`) - Bug when resampling :meth:`Dataframe.resample()` and aggregating on categorical data, the categorical dtype was getting lost. (:issue:`23227`) -- Bug in the ``__name__`` attribute of several methods of :class:`Series.str`, which were set incorrectly (:issue:`23551`) +- Bug in many methods of the ``.str``-accessor, which always failed on `CategoricalIndex` (:issue:`23555`, :issue:`23556`) Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 958656869acbe..c9124eb2c2215 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -261,7 +261,7 @@ def test_api_per_method(self, box, dtype, + ['mixed', 'mixed-integer'] * mixed_allowed) if inferred_dtype in allowed_types: - # i.a. GH 23555, GH 23556 + # inter alia GH 23555, GH 23556 method(*args, **kwargs) # works! else: # GH 23011, GH 23163 From 1ee5dea69e709df7405105ed9ddde3ac2f5b8de5 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 2 Dec 2018 19:26:36 +0100 Subject: [PATCH 3/4] Review (jreback) --- doc/source/whatsnew/v0.24.0.rst | 2 +- pandas/core/strings.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index e4d5bc3681065..8f1a3c6bac25b 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1256,7 +1256,7 @@ Categorical - Bug in :meth:`Categorical.take` with a user-provided ``fill_value`` not encoding the ``fill_value``, which could result in a ``ValueError``, incorrect results, or a segmentation fault (:issue:`23296`). - In meth:`Series.unstack`, specifying a ``fill_value`` not present in the categories now raises a ``TypeError`` rather than ignoring the ``fill_value`` (:issue:`23284`) - Bug when resampling :meth:`Dataframe.resample()` and aggregating on categorical data, the categorical dtype was getting lost. (:issue:`23227`) -- Bug in many methods of the ``.str``-accessor, which always failed on `CategoricalIndex` (:issue:`23555`, :issue:`23556`) +- Bug in many methods of the ``.str``-accessor, which always failed on ``CategoricalIndex`` (:issue:`23555`, :issue:`23556`) Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 18b6d247b58a6..fd76293b4a0ca 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1863,7 +1863,7 @@ def _wrap_result(self, result, use_codes=True, # before the transformation... if use_codes and self._is_categorical: # if self._orig is a CategoricalIndex, there is no .cat-accessor - result = take_1d(result, Series(self._orig).cat.codes, + result = take_1d(result, Series(self._orig, copy=False).cat.codes, fill_value=fill_value) if not hasattr(result, 'ndim') or not hasattr(result, 'dtype'): From 968e1880ceb96a4f47da94887a315c0f8c3e5513 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 2 Dec 2018 21:57:53 +0100 Subject: [PATCH 4/4] Review (jreback) --- doc/source/whatsnew/v0.24.0.rst | 2 +- pandas/tests/test_strings.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 8f1a3c6bac25b..4036b3bd3e00e 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1256,7 +1256,7 @@ Categorical - Bug in :meth:`Categorical.take` with a user-provided ``fill_value`` not encoding the ``fill_value``, which could result in a ``ValueError``, incorrect results, or a segmentation fault (:issue:`23296`). - In meth:`Series.unstack`, specifying a ``fill_value`` not present in the categories now raises a ``TypeError`` rather than ignoring the ``fill_value`` (:issue:`23284`) - Bug when resampling :meth:`Dataframe.resample()` and aggregating on categorical data, the categorical dtype was getting lost. (:issue:`23227`) -- Bug in many methods of the ``.str``-accessor, which always failed on ``CategoricalIndex`` (:issue:`23555`, :issue:`23556`) +- Bug in many methods of the ``.str``-accessor, which always failed on calling the ``CategoricalIndex.str`` constructor (:issue:`23555`, :issue:`23556`) Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index c9124eb2c2215..f3a7753b8bd48 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -261,7 +261,7 @@ def test_api_per_method(self, box, dtype, + ['mixed', 'mixed-integer'] * mixed_allowed) if inferred_dtype in allowed_types: - # inter alia GH 23555, GH 23556 + # xref GH 23555, GH 23556 method(*args, **kwargs) # works! else: # GH 23011, GH 23163