From e2ddcdc3bc0011137d0094a0131302ce7df81515 Mon Sep 17 00:00:00 2001 From: Kee Chong Tan Date: Mon, 27 Jan 2020 12:34:42 +0000 Subject: [PATCH] Backport PR #29393: BUG: GH25495 incorrect dtype when using .loc to set Categorical value for column in 1-row DataFrame --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/internals/blocks.py | 23 ++++++++++++++----- .../tests/frame/indexing/test_categorical.py | 10 ++++++++ 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 81d8f3c1a1e0b..14a46e7e9b909 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1091,6 +1091,7 @@ Indexing - Bug when indexing with ``.loc`` where the index was a :class:`CategoricalIndex` with non-string categories didn't work (:issue:`17569`, :issue:`30225`) - :meth:`Index.get_indexer_non_unique` could fail with ``TypeError`` in some cases, such as when searching for ints in a string index (:issue:`28257`) - Bug in :meth:`Float64Index.get_loc` incorrectly raising ``TypeError`` instead of ``KeyError`` (:issue:`29189`) +- Bug in :meth:`DataFrame.loc` with incorrect dtype when setting Categorical value in 1-row DataFrame (:issue:`25495`) - :meth:`MultiIndex.get_loc` can't find missing values when input includes missing values (:issue:`19132`) - Bug in :meth:`Series.__setitem__` incorrectly assigning values with boolean indexer when the length of new data matches the number of ``True`` values and new data is not a ``Series`` or an ``np.array`` (:issue:`30567`) - Bug in indexing with a :class:`PeriodIndex` incorrectly accepting integers representing years, use e.g. ``ser.loc["2007"]`` instead of ``ser.loc[2007]`` (:issue:`30763`) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index a8a3b896f7b31..5fcd796eb41ed 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -872,7 +872,11 @@ def setitem(self, indexer, value): # length checking check_setitem_lengths(indexer, value, values) - + exact_match = ( + len(arr_value.shape) + and arr_value.shape[0] == values.shape[0] + and arr_value.size == values.size + ) if is_empty_indexer(indexer, arr_value): # GH#8669 empty indexers pass @@ -882,14 +886,21 @@ def setitem(self, indexer, value): # be e.g. a list; see GH#6043 values[indexer] = value - # if we are an exact match (ex-broadcasting), - # then use the resultant dtype elif ( - len(arr_value.shape) - and arr_value.shape[0] == values.shape[0] - and arr_value.size == values.size + exact_match + and is_categorical_dtype(arr_value.dtype) + and not is_categorical_dtype(values) ): + # GH25495 - If the current dtype is not categorical, + # we need to create a new categorical block values[indexer] = value + return self.make_block(Categorical(self.values, dtype=arr_value.dtype)) + + # if we are an exact match (ex-broadcasting), + # then use the resultant dtype + elif exact_match: + values[indexer] = value + try: values = values.astype(arr_value.dtype) except ValueError: diff --git a/pandas/tests/frame/indexing/test_categorical.py b/pandas/tests/frame/indexing/test_categorical.py index 5de38915f04c1..a29c193676db2 100644 --- a/pandas/tests/frame/indexing/test_categorical.py +++ b/pandas/tests/frame/indexing/test_categorical.py @@ -354,6 +354,16 @@ def test_functions_no_warnings(self): df.value, range(0, 105, 10), right=False, labels=labels ) + def test_setitem_single_row_categorical(self): + # GH 25495 + df = DataFrame({"Alpha": ["a"], "Numeric": [0]}) + categories = pd.Categorical(df["Alpha"], categories=["a", "b", "c"]) + df.loc[:, "Alpha"] = categories + + result = df["Alpha"] + expected = Series(categories, index=df.index, name="Alpha") + tm.assert_series_equal(result, expected) + def test_loc_indexing_preserves_index_category_dtype(self): # GH 15166 df = DataFrame(