diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 393866b92771b..beccc1e6dfe21 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -239,6 +239,7 @@ Indexing - Bug in :meth:`CategoricalIndex.get_indexer` failing to raise ``InvalidIndexError`` when non-unique (:issue:`38372`) - Bug in inserting many new columns into a :class:`DataFrame` causing incorrect subsequent indexing behavior (:issue:`38380`) - Bug in :meth:`DataFrame.iloc.__setitem__` and :meth:`DataFrame.loc.__setitem__` with mixed dtypes when setting with a dictionary value (:issue:`38335`) +- Bug in :meth:`DataFrame.at` and :meth:`Series.at` did not adjust dtype when float was assigned to integer column (:issue:`26395`, :issue:`20643`) - Bug in :meth:`DataFrame.loc` dropping levels of :class:`MultiIndex` when :class:`DataFrame` used as input has only one row (:issue:`10521`) - diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c32483aa2a231..bc6a80bd6fe93 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -93,6 +93,7 @@ maybe_convert_platform, maybe_downcast_to_dtype, maybe_infer_to_datetimelike, + maybe_upcast, validate_numeric_casting, ) from pandas.core.dtypes.common import ( @@ -3289,13 +3290,8 @@ def _set_value(self, index, col, value, takeable: bool = False): return series = self._get_item_cache(col) - engine = self.index._engine - loc = engine.get_loc(index) - validate_numeric_casting(series.dtype, value) - - series._values[loc] = value - # Note: trying to use series._set_value breaks tests in - # tests.frame.indexing.test_indexing and tests.indexing.test_partial + self.index._engine.get_loc(index) + series._set_value(index, value, takeable) except (KeyError, TypeError): # set using a non-recursive method & reset the cache if takeable: diff --git a/pandas/core/series.py b/pandas/core/series.py index 1a85e88dbdb99..51e7613c40fe3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -42,6 +42,7 @@ from pandas.core.dtypes.cast import ( convert_dtypes, + infer_dtype_from_scalar, maybe_cast_to_extension_array, validate_numeric_casting, ) @@ -50,10 +51,12 @@ is_bool, is_categorical_dtype, is_dict_like, + is_dtype_equal, is_extension_array_dtype, is_integer, is_iterator, is_list_like, + is_numeric_dtype, is_object_dtype, is_scalar, validate_all_hashable, @@ -972,7 +975,15 @@ def _set_with_engine(self, key, value): # fails with AttributeError for IntervalIndex loc = self.index._engine.get_loc(key) validate_numeric_casting(self.dtype, value) - self._values[loc] = value + dtype, _ = infer_dtype_from_scalar(value) + if is_dtype_equal(self.dtype, dtype): + self._values[loc] = value + else: + # This only raises when index contains tuples + try: + self.loc[key] = value + except KeyError: + self._values[loc] = value def _set_with(self, key, value): # other: fancy integer or otherwise @@ -1034,8 +1045,16 @@ def _set_value(self, label, value, takeable: bool = False): takeable : interpret the index as indexers, default False """ try: - if takeable: + dtype, _ = infer_dtype_from_scalar(value, pandas_dtype=True) + if takeable and is_dtype_equal(self.dtype, dtype): self._values[label] = value + elif takeable: + self.iloc[label] = value + elif not is_dtype_equal(self.dtype, dtype) and is_numeric_dtype(dtype): + loc = self.index.get_loc(label) + validate_numeric_casting(self.dtype, value) + self.loc[label] = value + return else: loc = self.index.get_loc(label) validate_numeric_casting(self.dtype, value) diff --git a/pandas/tests/indexing/test_at.py b/pandas/tests/indexing/test_at.py index fbf33999386e6..5b71d3ee066e5 100644 --- a/pandas/tests/indexing/test_at.py +++ b/pandas/tests/indexing/test_at.py @@ -153,3 +153,33 @@ def test_at_getitem_mixed_index_no_fallback(self): ser.at[0] with pytest.raises(KeyError, match="^4$"): ser.at[4] + + +@pytest.mark.parametrize("func", ["at", "loc"]) +def test_at_assign_float_to_int_frame(func): + # GH: 26395 + obj = DataFrame([0, 0, 0], index=["A", "B", "C"], columns=["D"]) + getattr(obj, func)["C", "D"] = 44.5 + expected = DataFrame([0, 0, 44.5], index=["A", "B", "C"], columns=["D"]) + tm.assert_frame_equal(obj, expected) + + +@pytest.mark.parametrize("func", ["at", "loc"]) +def test_at_assign_float_to_int_series(func): + # GH: 26395 + obj = Series([0, 0, 0], index=["A", "B", "C"]) + getattr(obj, func)["C"] = 44.5 + expected = Series([0, 0, 44.5], index=["A", "B", "C"]) + tm.assert_series_equal(obj, expected) + + +def test_assign_float_to_int_series_takeable(): + # GH: 20643 + ser = Series([0, 1, 2], index=list("abc")) + ser.iat[1] = 3.1 + expected = Series([0, 3.1, 2], index=list("abc")) + tm.assert_series_equal(ser, expected) + + ser = Series([0, 1, 2], index=list("abc")) + ser.at["b"] = 3.1 + tm.assert_series_equal(ser, expected) diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 41f967ce32796..63d77acf478c2 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -114,31 +114,17 @@ def test_setitem_series_int64(self, val, exp_dtype, request): obj = pd.Series([1, 2, 3, 4]) assert obj.dtype == np.int64 - if exp_dtype is np.float64: - exp = pd.Series([1, 1, 3, 4]) - self._assert_setitem_series_conversion(obj, 1.1, exp, np.int64) - mark = pytest.mark.xfail(reason="GH12747 The result must be float") - request.node.add_marker(mark) - exp = pd.Series([1, val, 3, 4]) self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) @pytest.mark.parametrize( - "val,exp_dtype", [(np.int32(1), np.int8), (np.int16(2 ** 9), np.int16)] + "val,exp_dtype", [(np.int32(1), np.int32), (np.int16(2 ** 9), np.int16)] ) def test_setitem_series_int8(self, val, exp_dtype, request): obj = pd.Series([1, 2, 3, 4], dtype=np.int8) assert obj.dtype == np.int8 - if exp_dtype is np.int16: - exp = pd.Series([1, 0, 3, 4], dtype=np.int8) - self._assert_setitem_series_conversion(obj, val, exp, np.int8) - mark = pytest.mark.xfail( - reason="BUG: it must be pd.Series([1, 1, 3, 4], dtype=np.int16" - ) - request.node.add_marker(mark) - - exp = pd.Series([1, val, 3, 4], dtype=np.int8) + exp = pd.Series([1, val, 3, 4], dtype=exp_dtype) self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) @pytest.mark.parametrize( @@ -171,10 +157,10 @@ def test_setitem_series_complex128(self, val, exp_dtype): @pytest.mark.parametrize( "val,exp_dtype", [ - (1, np.int64), - (3, np.int64), - (1.1, np.float64), - (1 + 1j, np.complex128), + (1, "object"), + (3, "object"), + (1.1, "object"), + (1 + 1j, "object"), (True, np.bool_), ], ) @@ -182,22 +168,6 @@ def test_setitem_series_bool(self, val, exp_dtype, request): obj = pd.Series([True, False, True, False]) assert obj.dtype == np.bool_ - mark = None - if exp_dtype is np.int64: - exp = pd.Series([True, True, True, False]) - self._assert_setitem_series_conversion(obj, val, exp, np.bool_) - mark = pytest.mark.xfail(reason="TODO_GH12747 The result must be int") - elif exp_dtype is np.float64: - exp = pd.Series([True, True, True, False]) - self._assert_setitem_series_conversion(obj, val, exp, np.bool_) - mark = pytest.mark.xfail(reason="TODO_GH12747 The result must be float") - elif exp_dtype is np.complex128: - exp = pd.Series([True, True, True, False]) - self._assert_setitem_series_conversion(obj, val, exp, np.bool_) - mark = pytest.mark.xfail(reason="TODO_GH12747 The result must be complex") - if mark is not None: - request.node.add_marker(mark) - exp = pd.Series([True, val, True, False]) self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index f2d628c70ae62..7518d825e922a 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -47,7 +47,6 @@ def test_partial_setting(self): with pytest.raises(IndexError, match=msg): s.iloc[3] = 5.0 - msg = "index 3 is out of bounds for axis 0 with size 3" with pytest.raises(IndexError, match=msg): s.iat[3] = 5.0 diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index d6d0723bee0e8..65da24f6816de 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -394,3 +394,11 @@ def test_setitem_slice_into_readonly_backing_data(): series[1:3] = 1 assert not array.any() + + +def test_setitem_float_to_int(): + # GH 20643 + ser = Series([0, 1, 2], index=list("abc")) + ser["b"] = 3.1 + expected = Series([0, 3.1, 2], index=list("abc")) + tm.assert_series_equal(ser, expected)