From 6e62b2611803d4a69ea50ac5c7dd17ceadd81762 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Fri, 24 Feb 2023 08:35:12 +0000 Subject: [PATCH 01/15] ENH: add na_action to Categorical.map --- doc/source/whatsnew/v2.1.0.rst | 3 +- pandas/core/arrays/categorical.py | 30 ++-- pandas/core/indexes/category.py | 5 +- pandas/tests/apply/test_invalid_arg.py | 9 +- pandas/tests/apply/test_series_apply.py | 29 +++- pandas/tests/arrays/categorical/test_map.py | 138 +++++++++++++++++++ pandas/tests/indexes/categorical/test_map.py | 59 +++++--- 7 files changed, 233 insertions(+), 40 deletions(-) create mode 100644 pandas/tests/arrays/categorical/test_map.py diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index bac567b537edc..97efc0f1ceb7d 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -38,6 +38,7 @@ Other enhancements - Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`) - :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`) - :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`). +- :meth:`Categorical.map` and :meth:`CategoricalIndex.map` now have a ``na_action`` parameter (:issue:``) .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: @@ -146,7 +147,7 @@ Bug fixes Categorical ^^^^^^^^^^^ -- +- Bug in :meth:`Series.map` , where the value of the ``na_action`` parameter was not used if the series held a :class:`Categorical` (:issue:``). - Datetimelike diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index f8befdbc6ca9c..167f50271109e 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1205,7 +1205,7 @@ def remove_unused_categories(self) -> Categorical: # ------------------------------------------------------------------ - def map(self, mapper, na_action=None): + def map(self, mapper, na_action: Literal["ignore"] | None = "ignore"): """ Map categories using an input mapping or function. @@ -1222,6 +1222,9 @@ def map(self, mapper, na_action=None): ---------- mapper : function, dict, or Series Mapping correspondence. + na_action : {None, 'ignore'}, default 'ignore' + If 'ignore', propagate NaN values, without passing them to the + mapping correspondence. Returns ------- @@ -1274,20 +1277,23 @@ def map(self, mapper, na_action=None): >>> cat.map({'a': 'first', 'b': 'second'}) Index(['first', 'second', nan], dtype='object') """ - if na_action is not None: - raise NotImplementedError + assert callable(mapper) or is_dict_like(mapper) new_categories = self.categories.map(mapper) - try: - return self.from_codes( - self._codes.copy(), categories=new_categories, ordered=self.ordered - ) - except ValueError: - # NA values are represented in self._codes with -1 - # np.take causes NA values to take final element in new_categories - if np.any(self._codes == -1): - new_categories = new_categories.insert(len(new_categories), np.nan) + + not_dictlike_and_no_nans = not (is_dict_like(mapper) and np.nan not in mapper) + + if na_action is None and not_dictlike_and_no_nans and np.any(self._codes == -1): + na_value = mapper(np.nan) if callable(mapper) else mapper[np.nan] + new_categories = new_categories.insert(len(new_categories), na_value) return np.take(new_categories, self._codes) + elif new_categories.is_unique and not new_categories.hasnans: + new_dtype = CategoricalDtype(new_categories, ordered=self.ordered) + return self.from_codes(self._codes.copy(), dtype=new_dtype) + + if np.any(self._codes == -1): + new_categories = new_categories.insert(len(new_categories), np.nan) + return np.take(new_categories, self._codes) __eq__ = _cat_compare_op(operator.eq) __ne__ = _cat_compare_op(operator.ne) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index b740f58097509..19d19994bbcab 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -4,6 +4,7 @@ TYPE_CHECKING, Any, Hashable, + Literal, ) import numpy as np @@ -402,7 +403,7 @@ def _maybe_cast_listlike_indexer(self, values) -> CategoricalIndex: def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: return self.categories._is_comparable_dtype(dtype) - def map(self, mapper): + def map(self, mapper, na_action: Literal["ignore"] | None = None): """ Map values using input an input mapping or function. @@ -469,7 +470,7 @@ def map(self, mapper): >>> idx.map({'a': 'first', 'b': 'second'}) Index(['first', 'second', nan], dtype='object') """ - mapped = self._values.map(mapper) + mapped = self._values.map(mapper, na_action=na_action) return Index(mapped, name=self.name) def _concat(self, to_concat: list[Index], name: Hashable) -> Index: diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py index 0207391c3070a..a7c5a930ba863 100644 --- a/pandas/tests/apply/test_invalid_arg.py +++ b/pandas/tests/apply/test_invalid_arg.py @@ -15,7 +15,6 @@ from pandas.errors import SpecificationError from pandas import ( - Categorical, DataFrame, Series, date_range, @@ -76,11 +75,9 @@ def test_map_arg_is_dict_with_invalid_na_action_raises(input_na_action): s.map({1: 2}, na_action=input_na_action) -def test_map_categorical_na_action(): - values = Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True) - s = Series(values, name="XX", index=list("abcdefg")) - with pytest.raises(NotImplementedError, match=tm.EMPTY_STRING_PATTERN): - s.map(lambda x: x, na_action="ignore") +def test_map_datetimetz_na_action(): + values = date_range("2011-01-01", "2011-01-02", freq="H").tz_localize("Asia/Tokyo") + s = Series(values, name="XX") @pytest.mark.parametrize("method", ["apply", "agg", "transform"]) diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index bd0167701d08b..8f54d8efca0af 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -748,22 +748,45 @@ def test_map_box(): tm.assert_series_equal(res, exp) -def test_map_categorical(): +@pytest.mark.parametrize("na_action", [None, "ignore"]) +def test_map_categorical(na_action): values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True) s = Series(values, name="XX", index=list("abcdefg")) - result = s.map(lambda x: x.lower()) + result = s.map(lambda x: x.lower(), na_action=na_action) exp_values = pd.Categorical(list("abbabcd"), categories=list("dcba"), ordered=True) exp = Series(exp_values, name="XX", index=list("abcdefg")) tm.assert_series_equal(result, exp) tm.assert_categorical_equal(result.values, exp_values) - result = s.map(lambda x: "A") + result = s.map(lambda x: "A", na_action=na_action) exp = Series(["A"] * 7, name="XX", index=list("abcdefg")) tm.assert_series_equal(result, exp) assert result.dtype == object +@pytest.mark.parametrize( + "na_action, expected", + ( + [None, Series(["A", "B", "nan"], name="XX")], + [ + "ignore", + Series( + ["A", "B", np.nan], + name="XX", + dtype=pd.CategoricalDtype(list("DCBA"), True), + ), + ], + ), +) +def test_map_categorical_na_action(na_action, expected): + dtype = pd.CategoricalDtype(list("DCBA"), ordered=True) + values = pd.Categorical(list("AB") + [np.nan], dtype=dtype) + s = Series(values, name="XX") + result = s.map(str, na_action=na_action) + tm.assert_series_equal(result, expected) + + def test_map_datetimetz(): values = pd.date_range("2011-01-01", "2011-01-02", freq="H").tz_localize( "Asia/Tokyo" diff --git a/pandas/tests/arrays/categorical/test_map.py b/pandas/tests/arrays/categorical/test_map.py new file mode 100644 index 0000000000000..73df8a7ff1a6b --- /dev/null +++ b/pandas/tests/arrays/categorical/test_map.py @@ -0,0 +1,138 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Categorical, + Index, + Series, +) +import pandas._testing as tm + + +@pytest.fixture(params=[None, "ignore"]) +def na_action(request): + return request.param + + +class TestMap: + @pytest.mark.parametrize( + "data, categories", + [ + (list("abcbca"), list("cab")), + (pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)), + ], + ids=["string", "interval"], + ) + def test_map_str(self, data, categories, ordered, na_action): + # GH 31202 - override base class since we want to maintain categorical/ordered + cat = Categorical(data, categories=categories, ordered=ordered) + result = cat.map(str, na_action=na_action) + expected = Categorical( + map(str, data), categories=map(str, categories), ordered=ordered + ) + tm.assert_categorical_equal(result, expected) + + def test_map(self, na_action): + cat = Categorical(list("ABABC"), categories=list("CBA"), ordered=True) + result = cat.map(lambda x: x.lower(), na_action=na_action) + exp = Categorical(list("ababc"), categories=list("cba"), ordered=True) + tm.assert_categorical_equal(result, exp) + + cat = Categorical(list("ABABC"), categories=list("BAC"), ordered=False) + result = cat.map(lambda x: x.lower(), na_action=na_action) + exp = Categorical(list("ababc"), categories=list("bac"), ordered=False) + tm.assert_categorical_equal(result, exp) + + # GH 12766: Return an index not an array + result = cat.map(lambda x: 1, na_action=na_action) + exp = Index(np.array([1] * 5, dtype=np.int64)) + tm.assert_index_equal(result, exp) + + # change categories dtype + cat = Categorical(list("ABABC"), categories=list("BAC"), ordered=False) + + def f(x): + return {"A": 10, "B": 20, "C": 30}.get(x) + + result = cat.map(f, na_action=na_action) + exp = Categorical([10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False) + tm.assert_categorical_equal(result, exp) + + mapper = Series([10, 20, 30], index=["A", "B", "C"]) + result = cat.map(mapper, na_action=na_action) + tm.assert_categorical_equal(result, exp) + + result = cat.map({"A": 10, "B": 20, "C": 30}, na_action=na_action) + tm.assert_categorical_equal(result, exp) + + @pytest.mark.parametrize( + ("data", "f", "expected"), + ( + ([1, 1, np.nan], pd.isna, Index([False, False, True])), + ([1, 2, np.nan], pd.isna, Index([False, False, True])), + ([1, 1, np.nan], {1: False}, Categorical([False, False, np.nan])), + ([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])), + ( + [1, 1, np.nan], + Series([False, False]), + Categorical([False, False, np.nan]), + ), + ( + [1, 2, np.nan], + Series([False] * 3), + Index([False, False, np.nan]), + ), + ), + ) + def test_map_with_nan_none(self, data, f, expected): # GH 24241 + values = Categorical(data) + result = values.map(f, na_action=None) + if isinstance(expected, Categorical): + tm.assert_categorical_equal(result, expected) + else: + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + ("data", "f", "expected"), + ( + ([1, 1, np.nan], pd.isna, Categorical([False, False, np.nan])), + ([1, 2, np.nan], pd.isna, Index([False, False, np.nan])), + ([1, 1, np.nan], {1: False}, Categorical([False, False, np.nan])), + ([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])), + ( + [1, 1, np.nan], + Series([False, False]), + Categorical([False, False, np.nan]), + ), + ( + [1, 2, np.nan], + Series([False, False, False]), + Index([False, False, np.nan]), + ), + ), + ) + def test_map_with_nan_ignore(self, data, f, expected): # GH 24241 + values = Categorical(data) + result = values.map(f, na_action="ignore") + if data[1] == 1: + tm.assert_categorical_equal(result, expected) + else: + tm.assert_index_equal(result, expected) + + def test_map_with_dict_or_series(self, na_action): + orig_values = ["a", "B", 1, "a"] + new_values = ["one", 2, 3.0, "one"] + cat = Categorical(orig_values) + + mapper = Series(new_values[:-1], index=orig_values[:-1]) + result = cat.map(mapper, na_action=na_action) + + # Order of categories in result can be different + expected = Categorical(new_values, categories=[3.0, 2, "one"]) + tm.assert_categorical_equal(result, expected) + + mapper = dict(zip(orig_values[:-1], new_values[:-1])) + result = cat.map(mapper, na_action=na_action) + # Order of categories in result can be different + tm.assert_categorical_equal(result, expected) diff --git a/pandas/tests/indexes/categorical/test_map.py b/pandas/tests/indexes/categorical/test_map.py index 261ee8daf5dec..e4bb3034be49c 100644 --- a/pandas/tests/indexes/categorical/test_map.py +++ b/pandas/tests/indexes/categorical/test_map.py @@ -78,25 +78,52 @@ def test_map_with_categorical_series(self): tm.assert_index_equal(a.map(c), exp) @pytest.mark.parametrize( - ("data", "f"), + ("data", "f", "expected"), ( - ([1, 1, np.nan], pd.isna), - ([1, 2, np.nan], pd.isna), - ([1, 1, np.nan], {1: False}), - ([1, 2, np.nan], {1: False, 2: False}), - ([1, 1, np.nan], Series([False, False])), - ([1, 2, np.nan], Series([False, False, False])), + ([1, 1, np.nan], pd.isna, CategoricalIndex([False, False, np.nan])), + ([1, 2, np.nan], pd.isna, Index([False, False, np.nan])), + ([1, 1, np.nan], {1: False}, CategoricalIndex([False, False, np.nan])), + ([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])), + ( + [1, 1, np.nan], + Series([False, False]), + CategoricalIndex([False, False, np.nan]), + ), + ( + [1, 2, np.nan], + Series([False, False, False]), + Index([False, False, np.nan]), + ), ), ) - def test_map_with_nan(self, data, f): # GH 24241 - values = pd.Categorical(data) - result = values.map(f) - if data[1] == 1: - expected = pd.Categorical([False, False, np.nan]) - tm.assert_categorical_equal(result, expected) - else: - expected = Index([False, False, np.nan]) - tm.assert_index_equal(result, expected) + def test_map_with_nan_ignore(self, data, f, expected): # GH 24241 + values = CategoricalIndex(data) + result = values.map(f, na_action="ignore") + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + ("data", "f", "expected"), + ( + ([1, 1, np.nan], pd.isna, Index([False, False, True])), + ([1, 2, np.nan], pd.isna, Index([False, False, True])), + ([1, 1, np.nan], {1: False}, CategoricalIndex([False, False, np.nan])), + ([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])), + ( + [1, 1, np.nan], + Series([False, False]), + CategoricalIndex([False, False, np.nan]), + ), + ( + [1, 2, np.nan], + Series([False, False, False]), + Index([False, False, np.nan]), + ), + ), + ) + def test_map_with_nan_none(self, data, f, expected): # GH 24241 + values = CategoricalIndex(data) + result = values.map(f, na_action=None) + tm.assert_index_equal(result, expected) def test_map_with_dict_or_series(self): orig_values = ["a", "B", 1, "a"] From 76722710219b8c0e54cd735fcd1001cbf6cf3034 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sun, 26 Feb 2023 07:02:04 +0000 Subject: [PATCH 02/15] add GH numbers --- doc/source/whatsnew/v2.1.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 97efc0f1ceb7d..fb1d618afa571 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -38,7 +38,7 @@ Other enhancements - Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`) - :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`) - :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`). -- :meth:`Categorical.map` and :meth:`CategoricalIndex.map` now have a ``na_action`` parameter (:issue:``) +- :meth:`Categorical.map` and :meth:`CategoricalIndex.map` now have a ``na_action`` parameter (:issue:`44279`) .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: @@ -147,7 +147,7 @@ Bug fixes Categorical ^^^^^^^^^^^ -- Bug in :meth:`Series.map` , where the value of the ``na_action`` parameter was not used if the series held a :class:`Categorical` (:issue:``). +- Bug in :meth:`Series.map` , where the value of the ``na_action`` parameter was not used if the series held a :class:`Categorical` (:issue:`22527`). - Datetimelike From 1c5b3928f6893c07843d892081e5b3f4c3c82d4a Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sat, 4 Mar 2023 21:54:43 +0000 Subject: [PATCH 03/15] pre-commit issues --- doc/source/whatsnew/v2.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index fb1d618afa571..11dd9cbf1a68c 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -39,6 +39,7 @@ Other enhancements - :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`) - :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`). - :meth:`Categorical.map` and :meth:`CategoricalIndex.map` now have a ``na_action`` parameter (:issue:`44279`) +- .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: From 5b070291e91e605159d9e3e9b2a30dd70dbd77bb Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Mon, 6 Mar 2023 17:41:32 +0000 Subject: [PATCH 04/15] map Categorical with Series --- pandas/tests/apply/test_series_apply.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index 8f54d8efca0af..813ad6197f4e9 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -649,12 +649,15 @@ def test_map_defaultdict_ignore_na(): tm.assert_series_equal(result, expected) -def test_map_categorical_na_ignore(): +@pytest.mark.parametrize( + "na_action, expected", + [(None, Series([10.0, 42.0, np.nan])), ("ignore", Series([10, np.nan, np.nan]))], +) +def test_map_categorical_na_ignore(na_action, expected): # GH#47527 - values = pd.Categorical([1, np.nan, 2], categories=[10, 1]) + values = pd.Categorical([1, np.nan, 2], categories=[10, 1, 2]) ser = Series(values) - result = ser.map({1: 10, np.nan: 42}) - expected = Series([10, np.nan, np.nan]) + result = ser.map({1: 10, np.nan: 42}, na_action=na_action) tm.assert_series_equal(result, expected) From 85b4193453e7c4aca730ca9663957de37b5134a2 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Mon, 6 Mar 2023 20:03:01 +0000 Subject: [PATCH 05/15] REF: simplify .map --- .pre-commit-config.yaml | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index de36bf2d441c5..980e624bea892 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -78,27 +78,6 @@ repos: --linelength=88, '--filter=-readability/casting,-runtime/int,-build/include_subdir,-readability/fn_size' ] -- repo: https://github.com/pycqa/pylint - rev: v2.16.2 - hooks: - - id: pylint - stages: [manual] -- repo: https://github.com/pycqa/pylint - rev: v2.16.2 - hooks: - - id: pylint - alias: redefined-outer-name - name: Redefining name from outer scope - files: ^pandas/ - exclude: | - (?x) - ^pandas/tests # keep excluded - |/_testing/ # keep excluded - |^pandas/util/_test_decorators\.py # keep excluded - |^pandas/_version\.py # keep excluded - |^pandas/conftest\.py # keep excluded - args: [--disable=all, --enable=redefined-outer-name] - stages: [manual] - repo: https://github.com/PyCQA/isort rev: 5.12.0 hooks: From 6dfdf878c5d723bff812091f1a74b19382b832fd Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Mon, 13 Mar 2023 20:14:28 +0000 Subject: [PATCH 06/15] pass test_map --- pandas/tests/extension/test_categorical.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 34a23315fd9fa..f331449489bcc 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -186,12 +186,8 @@ def test_combine_add(self, data_repeated): @pytest.mark.parametrize("na_action", [None, "ignore"]) def test_map(self, data, na_action): - if na_action is not None: - with pytest.raises(NotImplementedError, match=""): - data.map(lambda x: x, na_action=na_action) - else: - result = data.map(lambda x: x, na_action=na_action) - self.assert_extension_array_equal(result, data) + result = data.map(lambda x: x, na_action=na_action) + self.assert_extension_array_equal(result, data) class TestCasting(base.BaseCastingTests): From ed6b1d7a7c9db98f9f5f0afadda7821f322f6ca1 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Tue, 14 Mar 2023 18:44:38 +0000 Subject: [PATCH 07/15] fix whatsnew --- doc/source/whatsnew/v2.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 11dd9cbf1a68c..bd033c86c22a7 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -28,6 +28,7 @@ enhancement2 Other enhancements ^^^^^^^^^^^^^^^^^^ +- :meth:`Categorical.map` and :meth:`CategoricalIndex.map` now have a ``na_action`` parameter (:issue:`44279`) - Implemented ``__pandas_priority__`` to allow custom types to take precedence over :class:`DataFrame`, :class:`Series`, :class:`Index`, or :class:`ExtensionArray` for arithmetic operations, :ref:`see the developer guide ` (:issue:`48347`) - :meth:`MultiIndex.sort_values` now supports ``na_position`` (:issue:`51612`) - :meth:`MultiIndex.sortlevel` and :meth:`Index.sortlevel` gained a new keyword ``na_position`` (:issue:`51612`) From 2018aafc6d31e1c1554296e75cde85036dd85552 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sat, 18 Mar 2023 08:49:05 +0000 Subject: [PATCH 08/15] cleanups --- .pre-commit-config.yaml | 21 ++ pandas/core/arrays/categorical.py | 1 + pandas/tests/arrays/categorical/test_map.py | 225 +++++++++-------- pandas/tests/indexes/categorical/test_map.py | 252 ++++++++++--------- 4 files changed, 261 insertions(+), 238 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 980e624bea892..de36bf2d441c5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -78,6 +78,27 @@ repos: --linelength=88, '--filter=-readability/casting,-runtime/int,-build/include_subdir,-readability/fn_size' ] +- repo: https://github.com/pycqa/pylint + rev: v2.16.2 + hooks: + - id: pylint + stages: [manual] +- repo: https://github.com/pycqa/pylint + rev: v2.16.2 + hooks: + - id: pylint + alias: redefined-outer-name + name: Redefining name from outer scope + files: ^pandas/ + exclude: | + (?x) + ^pandas/tests # keep excluded + |/_testing/ # keep excluded + |^pandas/util/_test_decorators\.py # keep excluded + |^pandas/_version\.py # keep excluded + |^pandas/conftest\.py # keep excluded + args: [--disable=all, --enable=redefined-outer-name] + stages: [manual] - repo: https://github.com/PyCQA/isort rev: 5.12.0 hooks: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 167f50271109e..8eb16caa026d9 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1293,6 +1293,7 @@ def map(self, mapper, na_action: Literal["ignore"] | None = "ignore"): if np.any(self._codes == -1): new_categories = new_categories.insert(len(new_categories), np.nan) + return np.take(new_categories, self._codes) __eq__ = _cat_compare_op(operator.eq) diff --git a/pandas/tests/arrays/categorical/test_map.py b/pandas/tests/arrays/categorical/test_map.py index 73df8a7ff1a6b..07a9648db177f 100644 --- a/pandas/tests/arrays/categorical/test_map.py +++ b/pandas/tests/arrays/categorical/test_map.py @@ -15,124 +15,123 @@ def na_action(request): return request.param -class TestMap: - @pytest.mark.parametrize( - "data, categories", - [ - (list("abcbca"), list("cab")), - (pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)), - ], - ids=["string", "interval"], +@pytest.mark.parametrize( + "data, categories", + [ + (list("abcbca"), list("cab")), + (pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)), + ], + ids=["string", "interval"], +) +def test_map_str(data, categories, ordered, na_action): + # GH 31202 - override base class since we want to maintain categorical/ordered + cat = Categorical(data, categories=categories, ordered=ordered) + result = cat.map(str, na_action=na_action) + expected = Categorical( + map(str, data), categories=map(str, categories), ordered=ordered ) - def test_map_str(self, data, categories, ordered, na_action): - # GH 31202 - override base class since we want to maintain categorical/ordered - cat = Categorical(data, categories=categories, ordered=ordered) - result = cat.map(str, na_action=na_action) - expected = Categorical( - map(str, data), categories=map(str, categories), ordered=ordered - ) + tm.assert_categorical_equal(result, expected) + +def test_map(na_action): + cat = Categorical(list("ABABC"), categories=list("CBA"), ordered=True) + result = cat.map(lambda x: x.lower(), na_action=na_action) + exp = Categorical(list("ababc"), categories=list("cba"), ordered=True) + tm.assert_categorical_equal(result, exp) + + cat = Categorical(list("ABABC"), categories=list("BAC"), ordered=False) + result = cat.map(lambda x: x.lower(), na_action=na_action) + exp = Categorical(list("ababc"), categories=list("bac"), ordered=False) + tm.assert_categorical_equal(result, exp) + + # GH 12766: Return an index not an array + result = cat.map(lambda x: 1, na_action=na_action) + exp = Index(np.array([1] * 5, dtype=np.int64)) + tm.assert_index_equal(result, exp) + + # change categories dtype + cat = Categorical(list("ABABC"), categories=list("BAC"), ordered=False) + + def f(x): + return {"A": 10, "B": 20, "C": 30}.get(x) + + result = cat.map(f, na_action=na_action) + exp = Categorical([10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False) + tm.assert_categorical_equal(result, exp) + + mapper = Series([10, 20, 30], index=["A", "B", "C"]) + result = cat.map(mapper, na_action=na_action) + tm.assert_categorical_equal(result, exp) + + result = cat.map({"A": 10, "B": 20, "C": 30}, na_action=na_action) + tm.assert_categorical_equal(result, exp) + +@pytest.mark.parametrize( + ("data", "f", "expected"), + ( + ([1, 1, np.nan], pd.isna, Index([False, False, True])), + ([1, 2, np.nan], pd.isna, Index([False, False, True])), + ([1, 1, np.nan], {1: False}, Categorical([False, False, np.nan])), + ([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])), + ( + [1, 1, np.nan], + Series([False, False]), + Categorical([False, False, np.nan]), + ), + ( + [1, 2, np.nan], + Series([False] * 3), + Index([False, False, np.nan]), + ), + ), +) +def test_map_with_nan_none(data, f, expected): # GH 24241 + values = Categorical(data) + result = values.map(f, na_action=None) + if isinstance(expected, Categorical): tm.assert_categorical_equal(result, expected) - - def test_map(self, na_action): - cat = Categorical(list("ABABC"), categories=list("CBA"), ordered=True) - result = cat.map(lambda x: x.lower(), na_action=na_action) - exp = Categorical(list("ababc"), categories=list("cba"), ordered=True) - tm.assert_categorical_equal(result, exp) - - cat = Categorical(list("ABABC"), categories=list("BAC"), ordered=False) - result = cat.map(lambda x: x.lower(), na_action=na_action) - exp = Categorical(list("ababc"), categories=list("bac"), ordered=False) - tm.assert_categorical_equal(result, exp) - - # GH 12766: Return an index not an array - result = cat.map(lambda x: 1, na_action=na_action) - exp = Index(np.array([1] * 5, dtype=np.int64)) - tm.assert_index_equal(result, exp) - - # change categories dtype - cat = Categorical(list("ABABC"), categories=list("BAC"), ordered=False) - - def f(x): - return {"A": 10, "B": 20, "C": 30}.get(x) - - result = cat.map(f, na_action=na_action) - exp = Categorical([10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False) - tm.assert_categorical_equal(result, exp) - - mapper = Series([10, 20, 30], index=["A", "B", "C"]) - result = cat.map(mapper, na_action=na_action) - tm.assert_categorical_equal(result, exp) - - result = cat.map({"A": 10, "B": 20, "C": 30}, na_action=na_action) - tm.assert_categorical_equal(result, exp) - - @pytest.mark.parametrize( - ("data", "f", "expected"), + else: + tm.assert_index_equal(result, expected) + +@pytest.mark.parametrize( + ("data", "f", "expected"), + ( + ([1, 1, np.nan], pd.isna, Categorical([False, False, np.nan])), + ([1, 2, np.nan], pd.isna, Index([False, False, np.nan])), + ([1, 1, np.nan], {1: False}, Categorical([False, False, np.nan])), + ([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])), ( - ([1, 1, np.nan], pd.isna, Index([False, False, True])), - ([1, 2, np.nan], pd.isna, Index([False, False, True])), - ([1, 1, np.nan], {1: False}, Categorical([False, False, np.nan])), - ([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])), - ( - [1, 1, np.nan], - Series([False, False]), - Categorical([False, False, np.nan]), - ), - ( - [1, 2, np.nan], - Series([False] * 3), - Index([False, False, np.nan]), - ), + [1, 1, np.nan], + Series([False, False]), + Categorical([False, False, np.nan]), ), - ) - def test_map_with_nan_none(self, data, f, expected): # GH 24241 - values = Categorical(data) - result = values.map(f, na_action=None) - if isinstance(expected, Categorical): - tm.assert_categorical_equal(result, expected) - else: - tm.assert_index_equal(result, expected) - - @pytest.mark.parametrize( - ("data", "f", "expected"), ( - ([1, 1, np.nan], pd.isna, Categorical([False, False, np.nan])), - ([1, 2, np.nan], pd.isna, Index([False, False, np.nan])), - ([1, 1, np.nan], {1: False}, Categorical([False, False, np.nan])), - ([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])), - ( - [1, 1, np.nan], - Series([False, False]), - Categorical([False, False, np.nan]), - ), - ( - [1, 2, np.nan], - Series([False, False, False]), - Index([False, False, np.nan]), - ), + [1, 2, np.nan], + Series([False, False, False]), + Index([False, False, np.nan]), ), - ) - def test_map_with_nan_ignore(self, data, f, expected): # GH 24241 - values = Categorical(data) - result = values.map(f, na_action="ignore") - if data[1] == 1: - tm.assert_categorical_equal(result, expected) - else: - tm.assert_index_equal(result, expected) - - def test_map_with_dict_or_series(self, na_action): - orig_values = ["a", "B", 1, "a"] - new_values = ["one", 2, 3.0, "one"] - cat = Categorical(orig_values) - - mapper = Series(new_values[:-1], index=orig_values[:-1]) - result = cat.map(mapper, na_action=na_action) - - # Order of categories in result can be different - expected = Categorical(new_values, categories=[3.0, 2, "one"]) + ), +) +def test_map_with_nan_ignore(data, f, expected): # GH 24241 + values = Categorical(data) + result = values.map(f, na_action="ignore") + if data[1] == 1: tm.assert_categorical_equal(result, expected) + else: + tm.assert_index_equal(result, expected) - mapper = dict(zip(orig_values[:-1], new_values[:-1])) - result = cat.map(mapper, na_action=na_action) - # Order of categories in result can be different - tm.assert_categorical_equal(result, expected) +def test_map_with_dict_or_series(na_action): + orig_values = ["a", "B", 1, "a"] + new_values = ["one", 2, 3.0, "one"] + cat = Categorical(orig_values) + + mapper = Series(new_values[:-1], index=orig_values[:-1]) + result = cat.map(mapper, na_action=na_action) + + # Order of categories in result can be different + expected = Categorical(new_values, categories=[3.0, 2, "one"]) + tm.assert_categorical_equal(result, expected) + + mapper = dict(zip(orig_values[:-1], new_values[:-1])) + result = cat.map(mapper, na_action=na_action) + # Order of categories in result can be different + tm.assert_categorical_equal(result, expected) diff --git a/pandas/tests/indexes/categorical/test_map.py b/pandas/tests/indexes/categorical/test_map.py index e4bb3034be49c..8e73ee553dc03 100644 --- a/pandas/tests/indexes/categorical/test_map.py +++ b/pandas/tests/indexes/categorical/test_map.py @@ -10,133 +10,135 @@ import pandas._testing as tm -class TestMap: - @pytest.mark.parametrize( - "data, categories", - [ - (list("abcbca"), list("cab")), - (pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)), - ], - ids=["string", "interval"], +@pytest.mark.parametrize( + "data, categories", + [ + (list("abcbca"), list("cab")), + (pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)), + ], + ids=["string", "interval"], +) +def test_map_str(data, categories, ordered): + # GH 31202 - override base class since we want to maintain categorical/ordered + index = CategoricalIndex(data, categories=categories, ordered=ordered) + result = index.map(str) + expected = CategoricalIndex( + map(str, data), categories=map(str, categories), ordered=ordered ) - def test_map_str(self, data, categories, ordered): - # GH 31202 - override base class since we want to maintain categorical/ordered - index = CategoricalIndex(data, categories=categories, ordered=ordered) - result = index.map(str) - expected = CategoricalIndex( - map(str, data), categories=map(str, categories), ordered=ordered - ) - tm.assert_index_equal(result, expected) - - def test_map(self): - ci = CategoricalIndex(list("ABABC"), categories=list("CBA"), ordered=True) - result = ci.map(lambda x: x.lower()) - exp = CategoricalIndex(list("ababc"), categories=list("cba"), ordered=True) - tm.assert_index_equal(result, exp) - - ci = CategoricalIndex( - list("ABABC"), categories=list("BAC"), ordered=False, name="XXX" - ) - result = ci.map(lambda x: x.lower()) - exp = CategoricalIndex( - list("ababc"), categories=list("bac"), ordered=False, name="XXX" - ) - tm.assert_index_equal(result, exp) - - # GH 12766: Return an index not an array - tm.assert_index_equal( - ci.map(lambda x: 1), Index(np.array([1] * 5, dtype=np.int64), name="XXX") - ) - - # change categories dtype - ci = CategoricalIndex(list("ABABC"), categories=list("BAC"), ordered=False) - - def f(x): - return {"A": 10, "B": 20, "C": 30}.get(x) - - result = ci.map(f) - exp = CategoricalIndex( - [10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False - ) - tm.assert_index_equal(result, exp) - - result = ci.map(Series([10, 20, 30], index=["A", "B", "C"])) - tm.assert_index_equal(result, exp) - - result = ci.map({"A": 10, "B": 20, "C": 30}) - tm.assert_index_equal(result, exp) - - def test_map_with_categorical_series(self): - # GH 12756 - a = Index([1, 2, 3, 4]) - b = Series(["even", "odd", "even", "odd"], dtype="category") - c = Series(["even", "odd", "even", "odd"]) - - exp = CategoricalIndex(["odd", "even", "odd", np.nan]) - tm.assert_index_equal(a.map(b), exp) - exp = Index(["odd", "even", "odd", np.nan]) - tm.assert_index_equal(a.map(c), exp) - - @pytest.mark.parametrize( - ("data", "f", "expected"), - ( - ([1, 1, np.nan], pd.isna, CategoricalIndex([False, False, np.nan])), - ([1, 2, np.nan], pd.isna, Index([False, False, np.nan])), - ([1, 1, np.nan], {1: False}, CategoricalIndex([False, False, np.nan])), - ([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])), - ( - [1, 1, np.nan], - Series([False, False]), - CategoricalIndex([False, False, np.nan]), - ), - ( - [1, 2, np.nan], - Series([False, False, False]), - Index([False, False, np.nan]), - ), - ), + tm.assert_index_equal(result, expected) + +def test_map(): + ci = CategoricalIndex(list("ABABC"), categories=list("CBA"), ordered=True) + result = ci.map(lambda x: x.lower()) + exp = CategoricalIndex(list("ababc"), categories=list("cba"), ordered=True) + tm.assert_index_equal(result, exp) + + ci = CategoricalIndex( + list("ABABC"), categories=list("BAC"), ordered=False, name="XXX" + ) + result = ci.map(lambda x: x.lower()) + exp = CategoricalIndex( + list("ababc"), categories=list("bac"), ordered=False, name="XXX" + ) + tm.assert_index_equal(result, exp) + + # GH 12766: Return an index not an array + tm.assert_index_equal( + ci.map(lambda x: 1), Index(np.array([1] * 5, dtype=np.int64), name="XXX") ) - def test_map_with_nan_ignore(self, data, f, expected): # GH 24241 - values = CategoricalIndex(data) - result = values.map(f, na_action="ignore") - tm.assert_index_equal(result, expected) - @pytest.mark.parametrize( - ("data", "f", "expected"), + # change categories dtype + ci = CategoricalIndex(list("ABABC"), categories=list("BAC"), ordered=False) + + def f(x): + return {"A": 10, "B": 20, "C": 30}.get(x) + + result = ci.map(f) + exp = CategoricalIndex( + [10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False + ) + tm.assert_index_equal(result, exp) + + result = ci.map(Series([10, 20, 30], index=["A", "B", "C"])) + tm.assert_index_equal(result, exp) + + result = ci.map({"A": 10, "B": 20, "C": 30}) + tm.assert_index_equal(result, exp) + + +def test_map_with_categorical_series(): + # GH 12756 + a = Index([1, 2, 3, 4]) + b = Series(["even", "odd", "even", "odd"], dtype="category") + c = Series(["even", "odd", "even", "odd"]) + + exp = CategoricalIndex(["odd", "even", "odd", np.nan]) + tm.assert_index_equal(a.map(b), exp) + exp = Index(["odd", "even", "odd", np.nan]) + tm.assert_index_equal(a.map(c), exp) + + +@pytest.mark.parametrize( + ("data", "f", "expected"), + ( + ([1, 1, np.nan], pd.isna, CategoricalIndex([False, False, np.nan])), + ([1, 2, np.nan], pd.isna, Index([False, False, np.nan])), + ([1, 1, np.nan], {1: False}, CategoricalIndex([False, False, np.nan])), + ([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])), ( - ([1, 1, np.nan], pd.isna, Index([False, False, True])), - ([1, 2, np.nan], pd.isna, Index([False, False, True])), - ([1, 1, np.nan], {1: False}, CategoricalIndex([False, False, np.nan])), - ([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])), - ( - [1, 1, np.nan], - Series([False, False]), - CategoricalIndex([False, False, np.nan]), - ), - ( - [1, 2, np.nan], - Series([False, False, False]), - Index([False, False, np.nan]), - ), + [1, 1, np.nan], + Series([False, False]), + CategoricalIndex([False, False, np.nan]), ), - ) - def test_map_with_nan_none(self, data, f, expected): # GH 24241 - values = CategoricalIndex(data) - result = values.map(f, na_action=None) - tm.assert_index_equal(result, expected) - - def test_map_with_dict_or_series(self): - orig_values = ["a", "B", 1, "a"] - new_values = ["one", 2, 3.0, "one"] - cur_index = CategoricalIndex(orig_values, name="XXX") - expected = CategoricalIndex(new_values, name="XXX", categories=[3.0, 2, "one"]) - - mapper = Series(new_values[:-1], index=orig_values[:-1]) - result = cur_index.map(mapper) - # Order of categories in result can be different - tm.assert_index_equal(result, expected) - - mapper = dict(zip(orig_values[:-1], new_values[:-1])) - result = cur_index.map(mapper) - # Order of categories in result can be different - tm.assert_index_equal(result, expected) + ( + [1, 2, np.nan], + Series([False, False, False]), + Index([False, False, np.nan]), + ), + ), +) +def test_map_with_nan_ignore(data, f, expected): # GH 24241 + values = CategoricalIndex(data) + result = values.map(f, na_action="ignore") + tm.assert_index_equal(result, expected) + +@pytest.mark.parametrize( + ("data", "f", "expected"), + ( + ([1, 1, np.nan], pd.isna, Index([False, False, True])), + ([1, 2, np.nan], pd.isna, Index([False, False, True])), + ([1, 1, np.nan], {1: False}, CategoricalIndex([False, False, np.nan])), + ([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])), + ( + [1, 1, np.nan], + Series([False, False]), + CategoricalIndex([False, False, np.nan]), + ), + ( + [1, 2, np.nan], + Series([False, False, False]), + Index([False, False, np.nan]), + ), + ), +) +def test_map_with_nan_none(data, f, expected): # GH 24241 + values = CategoricalIndex(data) + result = values.map(f, na_action=None) + tm.assert_index_equal(result, expected) + + +def test_map_with_dict_or_series(): + orig_values = ["a", "B", 1, "a"] + new_values = ["one", 2, 3.0, "one"] + cur_index = CategoricalIndex(orig_values, name="XXX") + expected = CategoricalIndex(new_values, name="XXX", categories=[3.0, 2, "one"]) + + mapper = Series(new_values[:-1], index=orig_values[:-1]) + result = cur_index.map(mapper) + # Order of categories in result can be different + tm.assert_index_equal(result, expected) + + mapper = dict(zip(orig_values[:-1], new_values[:-1])) + result = cur_index.map(mapper) + # Order of categories in result can be different + tm.assert_index_equal(result, expected) From 84beb35a0d251e652a503d475853448828acfb0c Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sat, 18 Mar 2023 08:50:24 +0000 Subject: [PATCH 09/15] pre-commit --- pandas/tests/arrays/categorical/test_map.py | 4 ++++ pandas/tests/indexes/categorical/test_map.py | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_map.py b/pandas/tests/arrays/categorical/test_map.py index 07a9648db177f..f043aaab563f3 100644 --- a/pandas/tests/arrays/categorical/test_map.py +++ b/pandas/tests/arrays/categorical/test_map.py @@ -32,6 +32,7 @@ def test_map_str(data, categories, ordered, na_action): ) tm.assert_categorical_equal(result, expected) + def test_map(na_action): cat = Categorical(list("ABABC"), categories=list("CBA"), ordered=True) result = cat.map(lambda x: x.lower(), na_action=na_action) @@ -65,6 +66,7 @@ def f(x): result = cat.map({"A": 10, "B": 20, "C": 30}, na_action=na_action) tm.assert_categorical_equal(result, exp) + @pytest.mark.parametrize( ("data", "f", "expected"), ( @@ -92,6 +94,7 @@ def test_map_with_nan_none(data, f, expected): # GH 24241 else: tm.assert_index_equal(result, expected) + @pytest.mark.parametrize( ("data", "f", "expected"), ( @@ -119,6 +122,7 @@ def test_map_with_nan_ignore(data, f, expected): # GH 24241 else: tm.assert_index_equal(result, expected) + def test_map_with_dict_or_series(na_action): orig_values = ["a", "B", 1, "a"] new_values = ["one", 2, 3.0, "one"] diff --git a/pandas/tests/indexes/categorical/test_map.py b/pandas/tests/indexes/categorical/test_map.py index 8e73ee553dc03..baf836594dfb5 100644 --- a/pandas/tests/indexes/categorical/test_map.py +++ b/pandas/tests/indexes/categorical/test_map.py @@ -27,6 +27,7 @@ def test_map_str(data, categories, ordered): ) tm.assert_index_equal(result, expected) + def test_map(): ci = CategoricalIndex(list("ABABC"), categories=list("CBA"), ordered=True) result = ci.map(lambda x: x.lower()) @@ -54,9 +55,7 @@ def f(x): return {"A": 10, "B": 20, "C": 30}.get(x) result = ci.map(f) - exp = CategoricalIndex( - [10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False - ) + exp = CategoricalIndex([10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False) tm.assert_index_equal(result, exp) result = ci.map(Series([10, 20, 30], index=["A", "B", "C"])) @@ -102,6 +101,7 @@ def test_map_with_nan_ignore(data, f, expected): # GH 24241 result = values.map(f, na_action="ignore") tm.assert_index_equal(result, expected) + @pytest.mark.parametrize( ("data", "f", "expected"), ( From 01b199dc55ecad5c0e2ff68aec451c3cbcbc8669 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sun, 19 Mar 2023 15:18:48 +0000 Subject: [PATCH 10/15] deprecate Categorical.map(na_action=ignore) --- doc/source/whatsnew/v2.1.0.rst | 4 +++- pandas/core/apply.py | 7 +++++- pandas/core/arrays/categorical.py | 22 ++++++++++++++++++- .../arrays/categorical/test_analytics.py | 6 ++--- pandas/tests/arrays/categorical/test_map.py | 13 +++++++++++ .../tests/arrays/categorical/test_subclass.py | 2 +- 6 files changed, 47 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index bd033c86c22a7..383f006499171 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -28,7 +28,9 @@ enhancement2 Other enhancements ^^^^^^^^^^^^^^^^^^ -- :meth:`Categorical.map` and :meth:`CategoricalIndex.map` now have a ``na_action`` parameter (:issue:`44279`) +- :meth:`Categorical.map` and :meth:`CategoricalIndex.map` now have a ``na_action`` parameter. + :meth:`Categorical.map` implicitly had a default value of ``"ignore"`` for ``na_action``. This has formally been deprecated and will be changed to ``None`` in the future. + Also notice that :meth:`Series.map` has default ``na_action=None`` and calls to series with categorical data will now use ``na_action=None`` unless explicitly set otherwise (:issue:`44279`) - Implemented ``__pandas_priority__`` to allow custom types to take precedence over :class:`DataFrame`, :class:`Series`, :class:`Index`, or :class:`ExtensionArray` for arithmetic operations, :ref:`see the developer guide ` (:issue:`48347`) - :meth:`MultiIndex.sort_values` now supports ``na_position`` (:issue:`51612`) - :meth:`MultiIndex.sortlevel` and :meth:`Index.sortlevel` gained a new keyword ``na_position`` (:issue:`51612`) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 3d3a7fa6f0f33..dda98685f1fbd 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -39,6 +39,7 @@ from pandas.core.dtypes.cast import is_nested_object from pandas.core.dtypes.common import ( + is_categorical_dtype, is_dict_like, is_extension_array_dtype, is_list_like, @@ -1082,7 +1083,11 @@ def apply_standard(self) -> DataFrame | Series: return f(obj) # row-wise access - mapped = obj._map_values(mapper=f, convert=self.convert_dtype) + # apply doesn't have a `na_action` keyword and for backward compat reasons + # we need to give `na_action="ignore"` for categorical data. + # TODO: remove the `na_action="ignore"` has been removed from Categorical. + action = "ignore" if is_categorical_dtype(obj) else None + mapped = obj._map_values(mapper=f, na_action=action, convert=self.convert_dtype) if len(mapped) and isinstance(mapped[0], ABCSeries): # GH#43986 Need to do list(mapped) in order to get treated as nested diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 8eb16caa026d9..7b97eb0cea5e9 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1205,7 +1205,11 @@ def remove_unused_categories(self) -> Categorical: # ------------------------------------------------------------------ - def map(self, mapper, na_action: Literal["ignore"] | None = "ignore"): + def map( + self, + mapper, + na_action: Literal["ignore"] | None | lib.NoDefault = lib.no_default, + ): """ Map categories using an input mapping or function. @@ -1226,6 +1230,11 @@ def map(self, mapper, na_action: Literal["ignore"] | None = "ignore"): If 'ignore', propagate NaN values, without passing them to the mapping correspondence. + .. deprecated:: 2.1.0 + + The dault value of 'ignore' has been deprecated and will be changed to + None in the future. + Returns ------- pandas.Categorical or pandas.Index @@ -1277,6 +1286,17 @@ def map(self, mapper, na_action: Literal["ignore"] | None = "ignore"): >>> cat.map({'a': 'first', 'b': 'second'}) Index(['first', 'second', nan], dtype='object') """ + if na_action is lib.no_default: + warn( + "The default value of 'ignore' for the `na_action` parameter in " + "pandas.Categorical.map is deprecated and will be " + "changed to 'None' in a future version. Please set na_action to the " + "desired value to avoid seeing this warning", + FutureWarning, + stacklevel=find_stack_level(), + ) + na_action = "ignore" + assert callable(mapper) or is_dict_like(mapper) new_categories = self.categories.map(mapper) diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py index 55d39cf84eb30..057005b30ae20 100644 --- a/pandas/tests/arrays/categorical/test_analytics.py +++ b/pandas/tests/arrays/categorical/test_analytics.py @@ -300,16 +300,16 @@ def test_memory_usage(self): def test_map(self): c = Categorical(list("ABABC"), categories=list("CBA"), ordered=True) - result = c.map(lambda x: x.lower()) + result = c.map(lambda x: x.lower(), na_action=None) exp = Categorical(list("ababc"), categories=list("cba"), ordered=True) tm.assert_categorical_equal(result, exp) c = Categorical(list("ABABC"), categories=list("ABC"), ordered=False) - result = c.map(lambda x: x.lower()) + result = c.map(lambda x: x.lower(), na_action=None) exp = Categorical(list("ababc"), categories=list("abc"), ordered=False) tm.assert_categorical_equal(result, exp) - result = c.map(lambda x: 1) + result = c.map(lambda x: 1, na_action=None) # GH 12766: Return an index not an array tm.assert_index_equal(result, Index(np.array([1] * 5, dtype=np.int64))) diff --git a/pandas/tests/arrays/categorical/test_map.py b/pandas/tests/arrays/categorical/test_map.py index f043aaab563f3..3d41b7cc7094d 100644 --- a/pandas/tests/arrays/categorical/test_map.py +++ b/pandas/tests/arrays/categorical/test_map.py @@ -139,3 +139,16 @@ def test_map_with_dict_or_series(na_action): result = cat.map(mapper, na_action=na_action) # Order of categories in result can be different tm.assert_categorical_equal(result, expected) + + +def test_map_na_action_no_default_deprecated(): + # GH51645 + cat = Categorical(["a", "b", "c"]) + msg = ( + "The default value of 'ignore' for the `na_action` parameter in " + "pandas.Categorical.map is deprecated and will be " + "changed to 'None' in a future version. Please set na_action to the " + "desired value to avoid seeing this warning" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + cat.map(lambda x: x) diff --git a/pandas/tests/arrays/categorical/test_subclass.py b/pandas/tests/arrays/categorical/test_subclass.py index b80d0ff41aba6..48325395faad8 100644 --- a/pandas/tests/arrays/categorical/test_subclass.py +++ b/pandas/tests/arrays/categorical/test_subclass.py @@ -16,7 +16,7 @@ def test_from_codes(self): def test_map(self): sc = tm.SubclassedCategorical(["a", "b", "c"]) - res = sc.map(lambda x: x.upper()) + res = sc.map(lambda x: x.upper(), na_action=None) assert isinstance(res, tm.SubclassedCategorical) exp = Categorical(["A", "B", "C"]) tm.assert_categorical_equal(res, exp) From 7e470c967951fde87e2caae17d11b2af75e48950 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sun, 19 Mar 2023 16:43:38 +0000 Subject: [PATCH 11/15] fix docstrings --- pandas/core/arrays/categorical.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 7b97eb0cea5e9..7ad11105f3a7e 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1257,10 +1257,10 @@ def map( >>> cat ['a', 'b', 'c'] Categories (3, object): ['a', 'b', 'c'] - >>> cat.map(lambda x: x.upper()) + >>> cat.map(lambda x: x.upper(), na_action=None) ['A', 'B', 'C'] Categories (3, object): ['A', 'B', 'C'] - >>> cat.map({'a': 'first', 'b': 'second', 'c': 'third'}) + >>> cat.map({'a': 'first', 'b': 'second', 'c': 'third'}, na_action=None) ['first', 'second', 'third'] Categories (3, object): ['first', 'second', 'third'] @@ -1271,19 +1271,19 @@ def map( >>> cat ['a', 'b', 'c'] Categories (3, object): ['a' < 'b' < 'c'] - >>> cat.map({'a': 3, 'b': 2, 'c': 1}) + >>> cat.map({'a': 3, 'b': 2, 'c': 1}, na_action=None) [3, 2, 1] Categories (3, int64): [3 < 2 < 1] If the mapping is not one-to-one an :class:`~pandas.Index` is returned: - >>> cat.map({'a': 'first', 'b': 'second', 'c': 'first'}) + >>> cat.map({'a': 'first', 'b': 'second', 'c': 'first'}, na_action=None) Index(['first', 'second', 'first'], dtype='object') If a `dict` is used, all unmapped categories are mapped to `NaN` and the result is an :class:`~pandas.Index`: - >>> cat.map({'a': 'first', 'b': 'second'}) + >>> cat.map({'a': 'first', 'b': 'second'}, na_action=None) Index(['first', 'second', nan], dtype='object') """ if na_action is lib.no_default: From afb843b9e42ad687c93e0ef6073e712b7d6ac9d4 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Wed, 22 Mar 2023 07:34:44 +0000 Subject: [PATCH 12/15] fix rebase --- pandas/tests/apply/test_invalid_arg.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py index a7c5a930ba863..81f85a7b191d4 100644 --- a/pandas/tests/apply/test_invalid_arg.py +++ b/pandas/tests/apply/test_invalid_arg.py @@ -75,11 +75,6 @@ def test_map_arg_is_dict_with_invalid_na_action_raises(input_na_action): s.map({1: 2}, na_action=input_na_action) -def test_map_datetimetz_na_action(): - values = date_range("2011-01-01", "2011-01-02", freq="H").tz_localize("Asia/Tokyo") - s = Series(values, name="XX") - - @pytest.mark.parametrize("method", ["apply", "agg", "transform"]) @pytest.mark.parametrize("func", [{"A": {"B": "sum"}}, {"A": {"B": ["sum"]}}]) def test_nested_renamer(frame_or_series, method, func): From ef5ed65f97c77ed168cbe1290aec7f2688b1fb1f Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Fri, 24 Mar 2023 11:20:57 +0000 Subject: [PATCH 13/15] simplity implementation --- pandas/core/arrays/categorical.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 7ad11105f3a7e..2665ca16e9279 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1301,18 +1301,18 @@ def map( new_categories = self.categories.map(mapper) - not_dictlike_and_no_nans = not (is_dict_like(mapper) and np.nan not in mapper) + has_nans = np.any(self._codes == -1) - if na_action is None and not_dictlike_and_no_nans and np.any(self._codes == -1): - na_value = mapper(np.nan) if callable(mapper) else mapper[np.nan] - new_categories = new_categories.insert(len(new_categories), na_value) - return np.take(new_categories, self._codes) - elif new_categories.is_unique and not new_categories.hasnans: + na_val = np.nan + if na_action is None and has_nans: + na_val = mapper(np.nan) if callable(mapper) else mapper.get(np.nan, np.nan) + + if new_categories.is_unique and not new_categories.hasnans and na_val is np.nan: new_dtype = CategoricalDtype(new_categories, ordered=self.ordered) return self.from_codes(self._codes.copy(), dtype=new_dtype) - if np.any(self._codes == -1): - new_categories = new_categories.insert(len(new_categories), np.nan) + if has_nans: + new_categories = new_categories.insert(len(new_categories), na_val) return np.take(new_categories, self._codes) From be2d4512a735854ca9bf58aab13f072cb10838f1 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Wed, 29 Mar 2023 08:03:07 +0100 Subject: [PATCH 14/15] fix warn --- pandas/core/arrays/categorical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 2665ca16e9279..f32659201aa5a 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1287,7 +1287,7 @@ def map( Index(['first', 'second', nan], dtype='object') """ if na_action is lib.no_default: - warn( + warnings.warn( "The default value of 'ignore' for the `na_action` parameter in " "pandas.Categorical.map is deprecated and will be " "changed to 'None' in a future version. Please set na_action to the " From 9d0c325611537d8823735a277953fe360a4f4e4a Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Thu, 30 Mar 2023 07:28:03 +0100 Subject: [PATCH 15/15] fix comments --- pandas/core/apply.py | 3 ++- pandas/core/arrays/categorical.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index dda98685f1fbd..7710a3adc165d 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1085,7 +1085,8 @@ def apply_standard(self) -> DataFrame | Series: # row-wise access # apply doesn't have a `na_action` keyword and for backward compat reasons # we need to give `na_action="ignore"` for categorical data. - # TODO: remove the `na_action="ignore"` has been removed from Categorical. + # TODO: remove the `na_action="ignore"` when that default has been changed in + # Categorical (GH51645). action = "ignore" if is_categorical_dtype(obj) else None mapped = obj._map_values(mapper=f, na_action=action, convert=self.convert_dtype) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index f32659201aa5a..0184463e9e59b 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1232,7 +1232,7 @@ def map( .. deprecated:: 2.1.0 - The dault value of 'ignore' has been deprecated and will be changed to + The default value of 'ignore' has been deprecated and will be changed to None in the future. Returns