diff --git a/pandas/conftest.py b/pandas/conftest.py index f9c10a7758bd2..8fe4afcf4fab0 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -402,10 +402,18 @@ def nselect_method(request): return request.param -@pytest.fixture(params=[None, "ignore"]) +@pytest.fixture(params=[None, "ignore", "raise"]) def na_action(request): """ - Fixture for 'na_action' argument in map. + Fixture for 'na_action' argument in Series.map. + """ + return request.param + + +@pytest.fixture(params=[None, "ignore"]) +def na_action_frame(request): + """ + Fixture for 'na_action' argument in DataFrame.map. """ return request.param diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index aafd802b827a5..c14c504eb91fb 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1627,7 +1627,7 @@ def union_with_duplicates( def map_array( arr: ArrayLike, mapper, - na_action: Literal["ignore"] | None = None, + na_action: Literal["ignore", "raise"] | None = None, ) -> np.ndarray | ExtensionArray | Index: """ Map values using an input mapping or function. @@ -1636,9 +1636,12 @@ def map_array( ---------- mapper : function, dict, or Series Mapping correspondence. - na_action : {None, 'ignore'}, default None + na_action : {None, 'ignore', 'raise'}, default None If 'ignore', propagate NA values, without passing them to the - mapping correspondence. + mapping correspondence. If 'raise', an error is raised when the + array contains non-NA values which do not exist as keys in the mapping + correspondence (does not apply to function & dict-like mappers with + a '__missing__' attribute). Returns ------- @@ -1647,7 +1650,7 @@ def map_array( If the function returns a tuple with more than one element a MultiIndex will be returned. """ - if na_action not in (None, "ignore"): + if na_action not in (None, "ignore", "raise"): msg = f"na_action must either be 'ignore' or None, {na_action} was passed" raise ValueError(msg) @@ -1686,6 +1689,10 @@ def map_array( # Since values were input this means we came from either # a dict or a series and mapper should be an index indexer = mapper.index.get_indexer(arr) + + if na_action == "raise" and (indexer == -1).any(): + raise ValueError("At least one value is not covered in the mapping!") + new_values = take_nd(mapper._values, indexer) return new_values diff --git a/pandas/core/series.py b/pandas/core/series.py index da46f8ede3409..0bbd0fc4e0d09 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4321,7 +4321,7 @@ def unstack( def map( self, arg: Callable | Mapping | Series, - na_action: Literal["ignore"] | None = None, + na_action: Literal["ignore", "raise"] | None = None, **kwargs, ) -> Series: """ @@ -4335,9 +4335,11 @@ def map( ---------- arg : function, collections.abc.Mapping subclass or Series Mapping correspondence. - na_action : {None, 'ignore'}, default None + na_action : {None, 'ignore', 'raise'}, default None If 'ignore', propagate NaN values, without passing them to the - mapping correspondence. + mapping correspondence. With 'raise' a missing value in the mapping + correspondence raises a ``ValueError`` instead of replacing it + with ``NaN``. **kwargs Additional keyword arguments to pass as keywords arguments to `arg`. @@ -4359,7 +4361,8 @@ def map( Notes ----- When ``arg`` is a dictionary, values in Series that are not in the - dictionary (as keys) are converted to ``NaN``. However, if the + dictionary (as keys) are converted to ``NaN``. This conversion + can be anticipated with ``na_action = 'raise'``. However, if the dictionary is a ``dict`` subclass that defines ``__missing__`` (i.e. provides a method for default values), then this default is used rather than ``NaN``. diff --git a/pandas/tests/frame/methods/test_map.py b/pandas/tests/frame/methods/test_map.py index 9850de14b2092..d3fd1b6216c7a 100644 --- a/pandas/tests/frame/methods/test_map.py +++ b/pandas/tests/frame/methods/test_map.py @@ -33,7 +33,7 @@ def test_map_float_object_conversion(val): assert result == object -def test_map_keeps_dtype(na_action): +def test_map_keeps_dtype(na_action_frame): # GH52219 arr = Series(["a", np.nan, "b"]) sparse_arr = arr.astype(pd.SparseDtype(object)) @@ -42,7 +42,7 @@ def test_map_keeps_dtype(na_action): def func(x): return str.upper(x) if not pd.isna(x) else x - result = df.map(func, na_action=na_action) + result = df.map(func, na_action=na_action_frame) expected_sparse = pd.array(["A", np.nan, "B"], dtype=pd.SparseDtype(object)) expected_arr = expected_sparse.astype(object) @@ -50,7 +50,7 @@ def func(x): tm.assert_frame_equal(result, expected) - result_empty = df.iloc[:0, :].map(func, na_action=na_action) + result_empty = df.iloc[:0, :].map(func, na_action=na_action_frame) expected_empty = expected.iloc[:0, :] tm.assert_frame_equal(result_empty, expected_empty) diff --git a/pandas/tests/series/methods/test_map.py b/pandas/tests/series/methods/test_map.py index 84b60a2afe6eb..a912517a13e45 100644 --- a/pandas/tests/series/methods/test_map.py +++ b/pandas/tests/series/methods/test_map.py @@ -322,6 +322,19 @@ def test_map_dict_na_key(): tm.assert_series_equal(result, expected) +def test_map_missing_key(na_action): + s = Series([1, 2, 42]) + mapping = {1: "a", 2: "b", 3: "c"} + + if na_action == "raise": + with pytest.raises(ValueError, match="not covered"): + s.map(mapping, na_action=na_action) + else: + expected = Series(["a", "b", np.nan]) + result = s.map(mapping, na_action=na_action) + tm.assert_series_equal(result, expected) + + def test_map_defaultdict_na_key(na_action): # GH 48813 s = Series([1, 2, np.nan]) @@ -380,7 +393,7 @@ def test_map_categorical_na_ignore(na_action, expected): tm.assert_series_equal(result, expected) -def test_map_dict_subclass_with_missing(): +def test_map_dict_subclass_with_missing(na_action): """ Test Series.map with a dictionary subclass that defines __missing__, i.e. sets a default value (GH #15999). @@ -392,30 +405,40 @@ def __missing__(self, key): s = Series([1, 2, 3]) dictionary = DictWithMissing({3: "three"}) - result = s.map(dictionary) + result = s.map(dictionary, na_action=na_action) # also works with 'raise' expected = Series(["missing", "missing", "three"]) tm.assert_series_equal(result, expected) -def test_map_dict_subclass_without_missing(): +def test_map_dict_subclass_without_missing(na_action): class DictWithoutMissing(dict): pass s = Series([1, 2, 3]) dictionary = DictWithoutMissing({3: "three"}) - result = s.map(dictionary) - expected = Series([np.nan, np.nan, "three"]) - tm.assert_series_equal(result, expected) + if na_action == "raise": + with pytest.raises(ValueError, match="not covered"): + _ = s.map(dictionary, na_action=na_action) + else: + result = s.map(dictionary, na_action=na_action) + expected = Series([np.nan, np.nan, "three"]) + tm.assert_series_equal(result, expected) -def test_map_abc_mapping(non_dict_mapping_subclass): + +def test_map_abc_mapping(non_dict_mapping_subclass, na_action): # https://github.com/pandas-dev/pandas/issues/29733 # Check collections.abc.Mapping support as mapper for Series.map s = Series([1, 2, 3]) not_a_dictionary = non_dict_mapping_subclass({3: "three"}) - result = s.map(not_a_dictionary) - expected = Series([np.nan, np.nan, "three"]) - tm.assert_series_equal(result, expected) + + if na_action == "raise": + with pytest.raises(ValueError, match="not covered"): + _ = s.map(not_a_dictionary, na_action=na_action) + else: + result = s.map(not_a_dictionary, na_action=na_action) + expected = Series([np.nan, np.nan, "three"]) + tm.assert_series_equal(result, expected) def test_map_abc_mapping_with_missing(non_dict_mapping_subclass):