From c6ad13d57c8157cd175a294f06c427f8a239dcea Mon Sep 17 00:00:00 2001 From: Richard Date: Sat, 2 May 2020 15:03:28 -0400 Subject: [PATCH 1/5] BUG: Setting DataFrame values via iloc aligns when arguments are lists --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/core/indexing.py | 4 ++++ pandas/tests/indexing/test_iloc.py | 16 ++++++++++++++++ pandas/tests/indexing/test_indexing.py | 16 ++++------------ 4 files changed, 25 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 5ebb2f20d22f5..b7bed86e77d3f 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -718,6 +718,7 @@ Indexing - Bug in :meth:`Series.__getitem__` allowing missing labels with ``np.ndarray``, :class:`Index`, :class:`Series` indexers but not ``list``, these now all raise ``KeyError`` (:issue:`33646`) - Bug in :meth:`DataFrame.truncate` and :meth:`Series.truncate` where index was assumed to be monotone increasing (:issue:`33756`) - Indexing with a list of strings representing datetimes failed on :class:`DatetimeIndex` or :class:`PeriodIndex`(:issue:`11278`) +- Bug in :meth:`DataFrame.iloc` when setting values with list arguments would align row/column labels (:issue:`22046`) Missing ^^^^^^^ diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b857a59195695..a44aa2400d482 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -662,6 +662,10 @@ def __setitem__(self, key, value): indexer = self._get_setitem_indexer(key) self._has_valid_setitem_indexer(key) + if self.name == "iloc" and isinstance(value, (ABCSeries, ABCDataFrame)): + # Strip labels so as to not align with RHS + value = value._values.copy() + iloc = self if self.name == "iloc" else self.obj.iloc iloc._setitem_with_indexer(indexer, value) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index c97cd81c84726..b9a4d3d10bff3 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -705,6 +705,15 @@ def test_iloc_setitem_categorical_updates_inplace(self): expected = pd.Categorical(["C", "B", "A"]) tm.assert_categorical_equal(cat, expected) + def test_iloc_setitem_frame_no_alignment(self): + # GH 22046 + # setting with iloc should not align labels + df = pd.DataFrame({"a": [1, 2], "b": [2, 3]}, index=[2, 1]) + expected = df.copy() + df2 = pd.DataFrame({"b": [1, 2], "a": [2, 3]}, index=[1, 2]) + df.iloc[:, [0, 1]] = df2.iloc[:, [0, 1]] + tm.assert_frame_equal(df, expected) + class TestILocSetItemDuplicateColumns: def test_iloc_setitem_scalar_duplicate_columns(self): @@ -733,3 +742,10 @@ def test_iloc_setitem_series_duplicate_columns(self): ) df.iloc[:, 0] = df.iloc[:, 0].astype(np.float64) assert df.dtypes.iloc[2] == np.int64 + + def test_iloc_settime_frame_duplicate_columns(self): + idx = pd.MultiIndex.from_tuples((("a", "a"), ("a", "a"))) + df = pd.DataFrame([[1, 1]], columns=idx) + expected = pd.DataFrame([[2, 2]], columns=idx) + df.iloc[:, [0, 1]] = expected.iloc[:, [0, 1]] + tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 51a7aa9bb586b..fadc03f0a7d07 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -716,27 +716,19 @@ def test_rhs_alignment(self): # GH8258, tests that both rows & columns are aligned to what is # assigned to. covers both uniform data-type & multi-type cases def run_tests(df, rhs, right): - # label, index, slice - lbl_one, idx_one, slice_one = list("bcd"), [1, 2, 3], slice(1, 4) - lbl_two, idx_two, slice_two = ["joe", "jolie"], [1, 2], slice(1, 3) + # labels + lbl_one = list("bcd") + lbl_two = ["joe", "jolie"] left = df.copy() left.loc[lbl_one, lbl_two] = rhs tm.assert_frame_equal(left, right) - left = df.copy() - left.iloc[idx_one, idx_two] = rhs - tm.assert_frame_equal(left, right) - - left = df.copy() - left.iloc[slice_one, slice_two] = rhs - tm.assert_frame_equal(left, right) - xs = np.arange(20).reshape(5, 4) cols = ["jim", "joe", "jolie", "joline"] df = DataFrame(xs, columns=cols, index=list("abcde")) - # right hand side; permute the indices and multiplpy by -2 + # right hand side; permute the indices and multiply by -2 rhs = -2 * df.iloc[3:0:-1, 2:0:-1] # expected `right` result; just multiply by -2 From b97d4c0f63937f2e931df1aec10e1e4b51dfd3bb Mon Sep 17 00:00:00 2001 From: Richard Date: Sun, 10 May 2020 15:38:26 -0400 Subject: [PATCH 2/5] trying test --- pandas/core/indexing.py | 3 ++- pandas/tests/groupby/test_nunique.py | 10 +++++++--- pandas/tests/indexing/test_indexing.py | 9 +++++++++ 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index a44aa2400d482..a0efd22799d65 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -664,7 +664,8 @@ def __setitem__(self, key, value): if self.name == "iloc" and isinstance(value, (ABCSeries, ABCDataFrame)): # Strip labels so as to not align with RHS - value = value._values.copy() + # value = value._values.copy() + pass iloc = self if self.name == "iloc" else self.obj.iloc iloc._setitem_with_indexer(indexer, value) diff --git a/pandas/tests/groupby/test_nunique.py b/pandas/tests/groupby/test_nunique.py index 952443e0ad23b..d5940cc0e9c53 100644 --- a/pandas/tests/groupby/test_nunique.py +++ b/pandas/tests/groupby/test_nunique.py @@ -25,7 +25,10 @@ def check_nunique(df, keys, as_index=True): if not as_index: right = right.reset_index(drop=True) - tm.assert_series_equal(left, right, check_names=False) + if not as_index: + tm.assert_frame_equal(left, right, check_names=False) + else: + tm.assert_series_equal(left, right, check_names=False) tm.assert_frame_equal(df, original_df) days = date_range("2015-08-23", periods=10) @@ -56,11 +59,12 @@ def check_nunique(df, keys, as_index=True): def test_nunique(): df = DataFrame({"A": list("abbacc"), "B": list("abxacc"), "C": list("abbacx")}) - expected = DataFrame({"A": [1] * 3, "B": [1, 2, 1], "C": [1, 1, 2]}) + expected = DataFrame({"A": list("abc"), "B": [1, 2, 1], "C": [1, 1, 2]}) result = df.groupby("A", as_index=False).nunique() tm.assert_frame_equal(result, expected) # as_index + expected = expected.drop(columns="A") expected.index = list("abc") expected.index.name = "A" result = df.groupby("A").nunique() @@ -71,7 +75,7 @@ def test_nunique(): tm.assert_frame_equal(result, expected) # dropna - expected = DataFrame({"A": [1] * 3, "B": [1] * 3, "C": [1] * 3}, index=list("abc")) + expected = DataFrame({"B": [1] * 3, "C": [1] * 3}, index=list("abc")) expected.index.name = "A" result = df.replace({"x": None}).groupby("A").nunique() tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index fadc03f0a7d07..a2c3141cc9138 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -620,6 +620,15 @@ def test_astype_assignment(self): expected = DataFrame({"A": [1, 2, 3, 4]}) tm.assert_frame_equal(df, expected) + def test_32bit_assignment(self): + df = DataFrame( + [[1, 2, 3]], columns=list("ABC") + ) + + expected = df.copy() + df.iloc[:, :] = df._values + tm.assert_frame_equal(df, expected) + def test_index_type_coercion(self): # GH 11836 From 44bdd559457cff6e3b1cf950d7b1f7d6a59b9432 Mon Sep 17 00:00:00 2001 From: Richard Date: Sat, 16 May 2020 09:02:30 -0400 Subject: [PATCH 3/5] Reverted change, added test to see if it fails --- pandas/core/indexing.py | 8 ++++---- pandas/tests/groupby/test_nunique.py | 10 +++------- pandas/tests/indexing/test_indexing.py | 15 ++++++++++----- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index a0efd22799d65..33287a197cb91 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -662,10 +662,10 @@ def __setitem__(self, key, value): indexer = self._get_setitem_indexer(key) self._has_valid_setitem_indexer(key) - if self.name == "iloc" and isinstance(value, (ABCSeries, ABCDataFrame)): - # Strip labels so as to not align with RHS - # value = value._values.copy() - pass + # Reverted for testing path on master + # if self.name == "iloc" and isinstance(value, (ABCSeries, ABCDataFrame)): + # # Strip labels so as to not align with RHS + # value = value._values.copy() iloc = self if self.name == "iloc" else self.obj.iloc iloc._setitem_with_indexer(indexer, value) diff --git a/pandas/tests/groupby/test_nunique.py b/pandas/tests/groupby/test_nunique.py index d5940cc0e9c53..952443e0ad23b 100644 --- a/pandas/tests/groupby/test_nunique.py +++ b/pandas/tests/groupby/test_nunique.py @@ -25,10 +25,7 @@ def check_nunique(df, keys, as_index=True): if not as_index: right = right.reset_index(drop=True) - if not as_index: - tm.assert_frame_equal(left, right, check_names=False) - else: - tm.assert_series_equal(left, right, check_names=False) + tm.assert_series_equal(left, right, check_names=False) tm.assert_frame_equal(df, original_df) days = date_range("2015-08-23", periods=10) @@ -59,12 +56,11 @@ def check_nunique(df, keys, as_index=True): def test_nunique(): df = DataFrame({"A": list("abbacc"), "B": list("abxacc"), "C": list("abbacx")}) - expected = DataFrame({"A": list("abc"), "B": [1, 2, 1], "C": [1, 1, 2]}) + expected = DataFrame({"A": [1] * 3, "B": [1, 2, 1], "C": [1, 1, 2]}) result = df.groupby("A", as_index=False).nunique() tm.assert_frame_equal(result, expected) # as_index - expected = expected.drop(columns="A") expected.index = list("abc") expected.index.name = "A" result = df.groupby("A").nunique() @@ -75,7 +71,7 @@ def test_nunique(): tm.assert_frame_equal(result, expected) # dropna - expected = DataFrame({"B": [1] * 3, "C": [1] * 3}, index=list("abc")) + expected = DataFrame({"A": [1] * 3, "B": [1] * 3, "C": [1] * 3}, index=list("abc")) expected.index.name = "A" result = df.replace({"x": None}).groupby("A").nunique() tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index a2c3141cc9138..917741ef296f6 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -620,13 +620,18 @@ def test_astype_assignment(self): expected = DataFrame({"A": [1, 2, 3, 4]}) tm.assert_frame_equal(df, expected) - def test_32bit_assignment(self): - df = DataFrame( - [[1, 2, 3]], columns=list("ABC") + def test_astype_assignment_nolabel(self): + + # GH4312 (iloc) + df_orig = DataFrame( + [["1", "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") ) - expected = df.copy() - df.iloc[:, :] = df._values + df = df_orig.copy() + df.iloc[:, 0:2] = df.iloc[:, 0:2].values.astype(np.int64) + expected = DataFrame( + [[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") + ) tm.assert_frame_equal(df, expected) def test_index_type_coercion(self): From f88a75793a9a60c49a3d4abe2b6307b46434c1db Mon Sep 17 00:00:00 2001 From: Richard Date: Sat, 16 May 2020 12:50:00 -0400 Subject: [PATCH 4/5] Trying bugfix --- pandas/core/indexing.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 33287a197cb91..a004a004ea143 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -662,10 +662,9 @@ def __setitem__(self, key, value): indexer = self._get_setitem_indexer(key) self._has_valid_setitem_indexer(key) - # Reverted for testing path on master - # if self.name == "iloc" and isinstance(value, (ABCSeries, ABCDataFrame)): - # # Strip labels so as to not align with RHS - # value = value._values.copy() + if self.name == "iloc" and isinstance(value, (ABCSeries, ABCDataFrame)): + # Strip labels so as to not align with RHS + value = value._values.copy() iloc = self if self.name == "iloc" else self.obj.iloc iloc._setitem_with_indexer(indexer, value) @@ -1710,10 +1709,12 @@ def isetter(loc, v): # But we may be relying on the ndarray coercion to check ndim. # Why not just convert to an ndarray earlier on if needed? elif np.ndim(value) == 2: + from pandas import DataFrame # note that this coerces the dtype if we are mixed # GH 7551 - value = np.array(value, dtype=object) + # value = np.array(value, dtype=object) + value = DataFrame(value) if len(ilocs) != value.shape[1]: raise ValueError( "Must have equal len keys and value " @@ -1722,7 +1723,8 @@ def isetter(loc, v): for i, loc in enumerate(ilocs): # setting with a list, re-coerces - isetter(loc, value[:, i].tolist()) + # isetter(loc, value[:, i].tolist()) + isetter(loc, value.iloc[:, i]) elif ( len(labels) == 1 From a734cde855ee224ad18e94ab9d3aa755c85cdb2e Mon Sep 17 00:00:00 2001 From: Richard Date: Sat, 16 May 2020 13:30:23 -0400 Subject: [PATCH 5/5] Trying bugfix --- pandas/core/indexing.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index a004a004ea143..5cd9c8db1a1b2 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1709,12 +1709,10 @@ def isetter(loc, v): # But we may be relying on the ndarray coercion to check ndim. # Why not just convert to an ndarray earlier on if needed? elif np.ndim(value) == 2: - from pandas import DataFrame # note that this coerces the dtype if we are mixed # GH 7551 - # value = np.array(value, dtype=object) - value = DataFrame(value) + value = np.array(value, dtype=object) if len(ilocs) != value.shape[1]: raise ValueError( "Must have equal len keys and value " @@ -1724,7 +1722,7 @@ def isetter(loc, v): for i, loc in enumerate(ilocs): # setting with a list, re-coerces # isetter(loc, value[:, i].tolist()) - isetter(loc, value.iloc[:, i]) + isetter(loc, list(value[:, i])) elif ( len(labels) == 1