From 4c7722ee4a4711a43f11ec6522564be750a09f21 Mon Sep 17 00:00:00 2001 From: Eric Feldman Date: Mon, 25 Apr 2022 13:37:50 +0300 Subject: [PATCH 01/22] merge non string columns --- pandas/core/reshape/merge.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index fbcf8a88d2fee..b3be01d5e42d0 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -2383,14 +2383,14 @@ def renamer(x, suffix): Parameters ---------- - x : original column name + x : original column suffix : str or None Returns ------- - x : renamed column name + x : renamed column """ - if x in to_rename and suffix is not None: + if x in to_rename and isinstance(x, str) and suffix is not None: return f"{x}{suffix}" return x From cb22a1e64a03a57c7b1ed78da1f2b7e2d5187b5e Mon Sep 17 00:00:00 2001 From: Eric Feldman Date: Tue, 26 Apr 2022 23:04:59 +0300 Subject: [PATCH 02/22] fix test --- doc/source/whatsnew/v1.5.0.rst | 6 ++++++ pandas/core/reshape/merge.py | 2 +- pandas/tests/reshape/merge/test_merge.py | 13 +++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index e4879a6c41515..abe36c07bf26f 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -511,6 +511,12 @@ Strings - Bug in :meth:`str.startswith` and :meth:`str.endswith` when using other series as parameter _pat_. Now raises ``TypeError`` (:issue:`3485`) - +Merge +^^^^^^^ +- Bug in :meth:`merge` supports complex types merge with identical columns +- + + Interval ^^^^^^^^ - Bug in :meth:`IntervalArray.__setitem__` when setting ``np.nan`` into an integer-backed array raising ``ValueError`` instead of ``TypeError`` (:issue:`45484`) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index b3be01d5e42d0..d7ba96a70627b 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -2390,7 +2390,7 @@ def renamer(x, suffix): ------- x : renamed column """ - if x in to_rename and isinstance(x, str) and suffix is not None: + if x in to_rename and isinstance(x, (str, int, float)) and suffix is not None: return f"{x}{suffix}" return x diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index ccdfc3cd23790..6c6a7134b950c 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2669,3 +2669,16 @@ def test_merge_different_index_names(): result = merge(left, right, left_on="c", right_on="d") expected = DataFrame({"a_x": [1], "a_y": 1}) tm.assert_frame_equal(result, expected) + + +def test_merge_complex_column(): + class Column: + def __init__(self, name): + self.name = name + + merged_column = Column(name='Z') + left = DataFrame({merged_column: [1], 'X': [2]}) + right = DataFrame({merged_column: [1], 'Y': [6]}) + result = merge(left, right, left_index=True, right_index=True) + expected = DataFrame([[1, 2, 1, 6]], columns=[merged_column, 'X', merged_column, 'Y']) + tm.assert_frame_equal(result, expected) \ No newline at end of file From 7342a726c544f54478fa71f8bbd652a8472710f4 Mon Sep 17 00:00:00 2001 From: Eric Feldman Date: Tue, 26 Apr 2022 23:05:32 +0300 Subject: [PATCH 03/22] add new line --- pandas/tests/reshape/merge/test_merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 6c6a7134b950c..c26cbff010bf0 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2681,4 +2681,4 @@ def __init__(self, name): right = DataFrame({merged_column: [1], 'Y': [6]}) result = merge(left, right, left_index=True, right_index=True) expected = DataFrame([[1, 2, 1, 6]], columns=[merged_column, 'X', merged_column, 'Y']) - tm.assert_frame_equal(result, expected) \ No newline at end of file + tm.assert_frame_equal(result, expected) From ac929ea9c748dedd68013916135e4ded815afed1 Mon Sep 17 00:00:00 2001 From: Eric Feldman Date: Tue, 26 Apr 2022 23:06:30 +0300 Subject: [PATCH 04/22] fix lint --- pandas/tests/reshape/merge/test_merge.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index c26cbff010bf0..7ab80b2698f9b 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2680,5 +2680,6 @@ def __init__(self, name): left = DataFrame({merged_column: [1], 'X': [2]}) right = DataFrame({merged_column: [1], 'Y': [6]}) result = merge(left, right, left_index=True, right_index=True) - expected = DataFrame([[1, 2, 1, 6]], columns=[merged_column, 'X', merged_column, 'Y']) + expected = DataFrame([[1, 2, 1, 6]], + columns=[merged_column, 'X', merged_column, 'Y']) tm.assert_frame_equal(result, expected) From 5f3f90e3828e39611e7c362b9181ea927f4fbca1 Mon Sep 17 00:00:00 2001 From: Eric Feldman Date: Wed, 27 Apr 2022 01:29:11 +0300 Subject: [PATCH 05/22] fix black --- pandas/tests/reshape/merge/test_merge.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 7ab80b2698f9b..2641415de5e51 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2680,6 +2680,7 @@ def __init__(self, name): left = DataFrame({merged_column: [1], 'X': [2]}) right = DataFrame({merged_column: [1], 'Y': [6]}) result = merge(left, right, left_index=True, right_index=True) - expected = DataFrame([[1, 2, 1, 6]], - columns=[merged_column, 'X', merged_column, 'Y']) + expected = DataFrame( + [[1, 2, 1, 6]], columns=[merged_column, 'X', merged_column, 'Y'] + ) tm.assert_frame_equal(result, expected) From 496419d17c9cba592aedd2f39579916f73722b0c Mon Sep 17 00:00:00 2001 From: Eric Feldman Date: Wed, 27 Apr 2022 01:39:41 +0300 Subject: [PATCH 06/22] fix black for real --- pandas/tests/reshape/merge/test_merge.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 2641415de5e51..b60cc2e4df069 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2676,11 +2676,11 @@ class Column: def __init__(self, name): self.name = name - merged_column = Column(name='Z') - left = DataFrame({merged_column: [1], 'X': [2]}) - right = DataFrame({merged_column: [1], 'Y': [6]}) + merged_column = Column(name="Z") + left = DataFrame({merged_column: [1], "X": [2]}) + right = DataFrame({merged_column: [1], "Y": [6]}) result = merge(left, right, left_index=True, right_index=True) expected = DataFrame( - [[1, 2, 1, 6]], columns=[merged_column, 'X', merged_column, 'Y'] + [[1, 2, 1, 6]], columns=[merged_column, "X", merged_column, "Y"] ) tm.assert_frame_equal(result, expected) From 4af2f8940e611a4014b0eb2d9cd73f67c519d12e Mon Sep 17 00:00:00 2001 From: Eric Feldman Date: Thu, 28 Apr 2022 20:41:38 +0300 Subject: [PATCH 07/22] pr comments --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index abe36c07bf26f..0bed43c01c6e1 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -513,7 +513,7 @@ Strings Merge ^^^^^^^ -- Bug in :meth:`merge` supports complex types merge with identical columns +- Bug in :meth:`merge` supports user-defined types merge with identical columns - From 9fef0f3da38e93ea23141c53b33d647d4dd806db Mon Sep 17 00:00:00 2001 From: Eric Feldman Date: Mon, 2 May 2022 20:10:25 +0300 Subject: [PATCH 08/22] __add__ for adding suffix --- pandas/core/reshape/merge.py | 7 +- pandas/tests/reshape/merge/test_merge.py | 149 ++++++++++++----------- 2 files changed, 84 insertions(+), 72 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index d7ba96a70627b..6c1dcffeaefbb 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -2390,8 +2390,11 @@ def renamer(x, suffix): ------- x : renamed column """ - if x in to_rename and isinstance(x, (str, int, float)) and suffix is not None: - return f"{x}{suffix}" + if x in to_rename and suffix is not None: + try: + return x + suffix + except TypeError: + return f"{x}{suffix}" return x lrenamer = partial(renamer, suffix=lsuffix) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index b60cc2e4df069..9b6ee5635cb11 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -652,8 +652,8 @@ def test_merge_nan_right(self): None: {0: 0, 1: 0}, } ) - .set_index(None) - .reset_index()[["i1", "i2", "i1_", "i3"]] + .set_index(None) + .reset_index()[["i1", "i2", "i1_", "i3"]] ) tm.assert_frame_equal(result, expected, check_dtype=False) @@ -731,7 +731,6 @@ def test_other_datetime_unit(self): "datetime64[us]", "datetime64[ns]", ]: - df2 = s.astype(dtype).to_frame("days") # coerces to datetime64[ns], thus should not be affected assert df2["days"].dtype == "datetime64[ns]" @@ -832,9 +831,9 @@ def test_merge_datetime64tz_values(self): { "key": [1, 2, 3], "value_x": list(pd.date_range("20151010", periods=2, tz="US/Eastern")) - + [pd.NaT], + + [pd.NaT], "value_y": [pd.NaT] - + list(pd.date_range("20151011", periods=2, tz="US/Eastern")), + + list(pd.date_range("20151011", periods=2, tz="US/Eastern")), } ) result = merge(left, right, on="key", how="outer") @@ -1297,33 +1296,33 @@ def test_merge_two_empty_df_no_division_error(self): "index,expected_index", [ ( - CategoricalIndex([1, 2, 4]), - CategoricalIndex([1, 2, 4, None, None, None]), + CategoricalIndex([1, 2, 4]), + CategoricalIndex([1, 2, 4, None, None, None]), ), ( - DatetimeIndex(["2001-01-01", "2002-02-02", "2003-03-03"]), - DatetimeIndex( - ["2001-01-01", "2002-02-02", "2003-03-03", pd.NaT, pd.NaT, pd.NaT] - ), + DatetimeIndex(["2001-01-01", "2002-02-02", "2003-03-03"]), + DatetimeIndex( + ["2001-01-01", "2002-02-02", "2003-03-03", pd.NaT, pd.NaT, pd.NaT] + ), ), (Float64Index([1, 2, 3]), Float64Index([1, 2, 3, None, None, None])), (Int64Index([1, 2, 3]), Float64Index([1, 2, 3, None, None, None])), ( - IntervalIndex.from_tuples([(1, 2), (2, 3), (3, 4)]), - IntervalIndex.from_tuples( - [(1, 2), (2, 3), (3, 4), np.nan, np.nan, np.nan] - ), + IntervalIndex.from_tuples([(1, 2), (2, 3), (3, 4)]), + IntervalIndex.from_tuples( + [(1, 2), (2, 3), (3, 4), np.nan, np.nan, np.nan] + ), ), ( - PeriodIndex(["2001-01-01", "2001-01-02", "2001-01-03"], freq="D"), - PeriodIndex( - ["2001-01-01", "2001-01-02", "2001-01-03", pd.NaT, pd.NaT, pd.NaT], - freq="D", - ), + PeriodIndex(["2001-01-01", "2001-01-02", "2001-01-03"], freq="D"), + PeriodIndex( + ["2001-01-01", "2001-01-02", "2001-01-03", pd.NaT, pd.NaT, pd.NaT], + freq="D", + ), ), ( - TimedeltaIndex(["1d", "2d", "3d"]), - TimedeltaIndex(["1d", "2d", "3d", pd.NaT, pd.NaT, pd.NaT]), + TimedeltaIndex(["1d", "2d", "3d"]), + TimedeltaIndex(["1d", "2d", "3d", pd.NaT, pd.NaT, pd.NaT]), ), ], ) @@ -1595,14 +1594,14 @@ def test_merge_incompat_dtypes_are_ok(self, df1_vals, df2_vals): ([0.0, 1.0, 2.0], ["0", "1", "2"]), ([0, 1, 2], ["0", "1", "2"]), ( - pd.date_range("1/1/2011", periods=2, freq="D"), - ["2011-01-01", "2011-01-02"], + pd.date_range("1/1/2011", periods=2, freq="D"), + ["2011-01-01", "2011-01-02"], ), (pd.date_range("1/1/2011", periods=2, freq="D"), [0, 1]), (pd.date_range("1/1/2011", periods=2, freq="D"), [0.0, 1.0]), ( - pd.date_range("20130101", periods=3), - pd.date_range("20130101", periods=3, tz="US/Eastern"), + pd.date_range("20130101", periods=3), + pd.date_range("20130101", periods=3, tz="US/Eastern"), ), ], ) @@ -2029,7 +2028,7 @@ def test_dtype_on_categorical_dates(self): ], ) def test_merging_with_bool_or_int_cateorical_column( - self, category_column, categories, expected_categories, ordered + self, category_column, categories, expected_categories, ordered ): # GH 17187 # merging with a boolean/int categorical column @@ -2068,44 +2067,44 @@ class TestMergeOnIndexes: ("inner", False, DataFrame({"a": [20, 10], "b": [200, 100]}, index=[2, 1])), ("inner", True, DataFrame({"a": [10, 20], "b": [100, 200]}, index=[1, 2])), ( - "left", - False, - DataFrame({"a": [20, 10, 0], "b": [200, 100, np.nan]}, index=[2, 1, 0]), + "left", + False, + DataFrame({"a": [20, 10, 0], "b": [200, 100, np.nan]}, index=[2, 1, 0]), ), ( - "left", - True, - DataFrame({"a": [0, 10, 20], "b": [np.nan, 100, 200]}, index=[0, 1, 2]), + "left", + True, + DataFrame({"a": [0, 10, 20], "b": [np.nan, 100, 200]}, index=[0, 1, 2]), ), ( - "right", - False, - DataFrame( - {"a": [np.nan, 10, 20], "b": [300, 100, 200]}, index=[3, 1, 2] - ), + "right", + False, + DataFrame( + {"a": [np.nan, 10, 20], "b": [300, 100, 200]}, index=[3, 1, 2] + ), ), ( - "right", - True, - DataFrame( - {"a": [10, 20, np.nan], "b": [100, 200, 300]}, index=[1, 2, 3] - ), + "right", + True, + DataFrame( + {"a": [10, 20, np.nan], "b": [100, 200, 300]}, index=[1, 2, 3] + ), ), ( - "outer", - False, - DataFrame( - {"a": [0, 10, 20, np.nan], "b": [np.nan, 100, 200, 300]}, - index=[0, 1, 2, 3], - ), + "outer", + False, + DataFrame( + {"a": [0, 10, 20, np.nan], "b": [np.nan, 100, 200, 300]}, + index=[0, 1, 2, 3], + ), ), ( - "outer", - True, - DataFrame( - {"a": [0, 10, 20, np.nan], "b": [np.nan, 100, 200, 300]}, - index=[0, 1, 2, 3], - ), + "outer", + True, + DataFrame( + {"a": [0, 10, 20, np.nan], "b": [np.nan, 100, 200, 300]}, + index=[0, 1, 2, 3], + ), ), ], ) @@ -2229,20 +2228,20 @@ def test_merge_suffix(col1, col2, kwargs, expected_cols): "how,expected", [ ( - "right", - DataFrame( - {"A": [100, 200, 300], "B1": [60, 70, np.nan], "B2": [600, 700, 800]} - ), + "right", + DataFrame( + {"A": [100, 200, 300], "B1": [60, 70, np.nan], "B2": [600, 700, 800]} + ), ), ( - "outer", - DataFrame( - { - "A": [100, 200, 1, 300], - "B1": [60, 70, 80, np.nan], - "B2": [600, 700, np.nan, 800], - } - ), + "outer", + DataFrame( + { + "A": [100, 200, 1, 300], + "B1": [60, 70, 80, np.nan], + "B2": [600, 700, np.nan, 800], + } + ), ), ], ) @@ -2672,15 +2671,25 @@ def test_merge_different_index_names(): def test_merge_complex_column(): + # GH#46885 class Column: def __init__(self, name): self.name = name + def __eq__(self, other): + return other.name == self.name + + def __hash__(self): + return hash(self.name) + + def __add__(self, other): + return Column(name=f"{self.name}@{other}") + merged_column = Column(name="Z") - left = DataFrame({merged_column: [1], "X": [2]}) - right = DataFrame({merged_column: [1], "Y": [6]}) + left = DataFrame([[1,2]], columns=[merged_column, "X"]) + right = DataFrame([[1,6]], columns=[merged_column, "Y"]) result = merge(left, right, left_index=True, right_index=True) expected = DataFrame( - [[1, 2, 1, 6]], columns=[merged_column, "X", merged_column, "Y"] + [[1, 2, 1, 6]], columns=[Column('Z@_x'), "X", Column('Z@_y'), "Y"] ) tm.assert_frame_equal(result, expected) From 6aa110ed8b8a7ba912c48dee7b1942f84832216a Mon Sep 17 00:00:00 2001 From: Eric Feldman Date: Mon, 2 May 2022 20:18:57 +0300 Subject: [PATCH 09/22] fix flake --- pandas/tests/reshape/merge/test_merge.py | 139 ++++++++++++----------- 1 file changed, 70 insertions(+), 69 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 9b6ee5635cb11..48517c2f0661b 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -652,8 +652,8 @@ def test_merge_nan_right(self): None: {0: 0, 1: 0}, } ) - .set_index(None) - .reset_index()[["i1", "i2", "i1_", "i3"]] + .set_index(None) + .reset_index()[["i1", "i2", "i1_", "i3"]] ) tm.assert_frame_equal(result, expected, check_dtype=False) @@ -731,6 +731,7 @@ def test_other_datetime_unit(self): "datetime64[us]", "datetime64[ns]", ]: + df2 = s.astype(dtype).to_frame("days") # coerces to datetime64[ns], thus should not be affected assert df2["days"].dtype == "datetime64[ns]" @@ -831,9 +832,9 @@ def test_merge_datetime64tz_values(self): { "key": [1, 2, 3], "value_x": list(pd.date_range("20151010", periods=2, tz="US/Eastern")) - + [pd.NaT], + + [pd.NaT], "value_y": [pd.NaT] - + list(pd.date_range("20151011", periods=2, tz="US/Eastern")), + + list(pd.date_range("20151011", periods=2, tz="US/Eastern")), } ) result = merge(left, right, on="key", how="outer") @@ -1296,33 +1297,33 @@ def test_merge_two_empty_df_no_division_error(self): "index,expected_index", [ ( - CategoricalIndex([1, 2, 4]), - CategoricalIndex([1, 2, 4, None, None, None]), + CategoricalIndex([1, 2, 4]), + CategoricalIndex([1, 2, 4, None, None, None]), ), ( - DatetimeIndex(["2001-01-01", "2002-02-02", "2003-03-03"]), - DatetimeIndex( - ["2001-01-01", "2002-02-02", "2003-03-03", pd.NaT, pd.NaT, pd.NaT] - ), + DatetimeIndex(["2001-01-01", "2002-02-02", "2003-03-03"]), + DatetimeIndex( + ["2001-01-01", "2002-02-02", "2003-03-03", pd.NaT, pd.NaT, pd.NaT] + ), ), (Float64Index([1, 2, 3]), Float64Index([1, 2, 3, None, None, None])), (Int64Index([1, 2, 3]), Float64Index([1, 2, 3, None, None, None])), ( - IntervalIndex.from_tuples([(1, 2), (2, 3), (3, 4)]), - IntervalIndex.from_tuples( - [(1, 2), (2, 3), (3, 4), np.nan, np.nan, np.nan] - ), + IntervalIndex.from_tuples([(1, 2), (2, 3), (3, 4)]), + IntervalIndex.from_tuples( + [(1, 2), (2, 3), (3, 4), np.nan, np.nan, np.nan] + ), ), ( - PeriodIndex(["2001-01-01", "2001-01-02", "2001-01-03"], freq="D"), - PeriodIndex( - ["2001-01-01", "2001-01-02", "2001-01-03", pd.NaT, pd.NaT, pd.NaT], - freq="D", - ), + PeriodIndex(["2001-01-01", "2001-01-02", "2001-01-03"], freq="D"), + PeriodIndex( + ["2001-01-01", "2001-01-02", "2001-01-03", pd.NaT, pd.NaT, pd.NaT], + freq="D", + ), ), ( - TimedeltaIndex(["1d", "2d", "3d"]), - TimedeltaIndex(["1d", "2d", "3d", pd.NaT, pd.NaT, pd.NaT]), + TimedeltaIndex(["1d", "2d", "3d"]), + TimedeltaIndex(["1d", "2d", "3d", pd.NaT, pd.NaT, pd.NaT]), ), ], ) @@ -1594,14 +1595,14 @@ def test_merge_incompat_dtypes_are_ok(self, df1_vals, df2_vals): ([0.0, 1.0, 2.0], ["0", "1", "2"]), ([0, 1, 2], ["0", "1", "2"]), ( - pd.date_range("1/1/2011", periods=2, freq="D"), - ["2011-01-01", "2011-01-02"], + pd.date_range("1/1/2011", periods=2, freq="D"), + ["2011-01-01", "2011-01-02"], ), (pd.date_range("1/1/2011", periods=2, freq="D"), [0, 1]), (pd.date_range("1/1/2011", periods=2, freq="D"), [0.0, 1.0]), ( - pd.date_range("20130101", periods=3), - pd.date_range("20130101", periods=3, tz="US/Eastern"), + pd.date_range("20130101", periods=3), + pd.date_range("20130101", periods=3, tz="US/Eastern"), ), ], ) @@ -2028,7 +2029,7 @@ def test_dtype_on_categorical_dates(self): ], ) def test_merging_with_bool_or_int_cateorical_column( - self, category_column, categories, expected_categories, ordered + self, category_column, categories, expected_categories, ordered ): # GH 17187 # merging with a boolean/int categorical column @@ -2067,44 +2068,44 @@ class TestMergeOnIndexes: ("inner", False, DataFrame({"a": [20, 10], "b": [200, 100]}, index=[2, 1])), ("inner", True, DataFrame({"a": [10, 20], "b": [100, 200]}, index=[1, 2])), ( - "left", - False, - DataFrame({"a": [20, 10, 0], "b": [200, 100, np.nan]}, index=[2, 1, 0]), + "left", + False, + DataFrame({"a": [20, 10, 0], "b": [200, 100, np.nan]}, index=[2, 1, 0]), ), ( - "left", - True, - DataFrame({"a": [0, 10, 20], "b": [np.nan, 100, 200]}, index=[0, 1, 2]), + "left", + True, + DataFrame({"a": [0, 10, 20], "b": [np.nan, 100, 200]}, index=[0, 1, 2]), ), ( - "right", - False, - DataFrame( - {"a": [np.nan, 10, 20], "b": [300, 100, 200]}, index=[3, 1, 2] - ), + "right", + False, + DataFrame( + {"a": [np.nan, 10, 20], "b": [300, 100, 200]}, index=[3, 1, 2] + ), ), ( - "right", - True, - DataFrame( - {"a": [10, 20, np.nan], "b": [100, 200, 300]}, index=[1, 2, 3] - ), + "right", + True, + DataFrame( + {"a": [10, 20, np.nan], "b": [100, 200, 300]}, index=[1, 2, 3] + ), ), ( - "outer", - False, - DataFrame( - {"a": [0, 10, 20, np.nan], "b": [np.nan, 100, 200, 300]}, - index=[0, 1, 2, 3], - ), + "outer", + False, + DataFrame( + {"a": [0, 10, 20, np.nan], "b": [np.nan, 100, 200, 300]}, + index=[0, 1, 2, 3], + ), ), ( - "outer", - True, - DataFrame( - {"a": [0, 10, 20, np.nan], "b": [np.nan, 100, 200, 300]}, - index=[0, 1, 2, 3], - ), + "outer", + True, + DataFrame( + {"a": [0, 10, 20, np.nan], "b": [np.nan, 100, 200, 300]}, + index=[0, 1, 2, 3], + ), ), ], ) @@ -2228,20 +2229,20 @@ def test_merge_suffix(col1, col2, kwargs, expected_cols): "how,expected", [ ( - "right", - DataFrame( - {"A": [100, 200, 300], "B1": [60, 70, np.nan], "B2": [600, 700, 800]} - ), + "right", + DataFrame( + {"A": [100, 200, 300], "B1": [60, 70, np.nan], "B2": [600, 700, 800]} + ), ), ( - "outer", - DataFrame( - { - "A": [100, 200, 1, 300], - "B1": [60, 70, 80, np.nan], - "B2": [600, 700, np.nan, 800], - } - ), + "outer", + DataFrame( + { + "A": [100, 200, 1, 300], + "B1": [60, 70, 80, np.nan], + "B2": [600, 700, np.nan, 800], + } + ), ), ], ) @@ -2686,10 +2687,10 @@ def __add__(self, other): return Column(name=f"{self.name}@{other}") merged_column = Column(name="Z") - left = DataFrame([[1,2]], columns=[merged_column, "X"]) - right = DataFrame([[1,6]], columns=[merged_column, "Y"]) + left = DataFrame([[1, 2]], columns=[merged_column, "X"]) + right = DataFrame([[1, 6]], columns=[merged_column, "Y"]) result = merge(left, right, left_index=True, right_index=True) expected = DataFrame( - [[1, 2, 1, 6]], columns=[Column('Z@_x'), "X", Column('Z@_y'), "Y"] + [[1, 2, 1, 6]], columns=[Column("Z@_x"), "X", Column("Z@_y"), "Y"] ) tm.assert_frame_equal(result, expected) From b58d477decc72c366914dc5d94f19ee12f010361 Mon Sep 17 00:00:00 2001 From: Eric Feldman Date: Wed, 4 May 2022 14:51:57 +0300 Subject: [PATCH 10/22] move comment to the right section --- doc/source/whatsnew/v1.5.0.rst | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 0bed43c01c6e1..9905e8660982f 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -511,12 +511,6 @@ Strings - Bug in :meth:`str.startswith` and :meth:`str.endswith` when using other series as parameter _pat_. Now raises ``TypeError`` (:issue:`3485`) - -Merge -^^^^^^^ -- Bug in :meth:`merge` supports user-defined types merge with identical columns -- - - Interval ^^^^^^^^ - Bug in :meth:`IntervalArray.__setitem__` when setting ``np.nan`` into an integer-backed array raising ``ValueError`` instead of ``TypeError`` (:issue:`45484`) @@ -625,6 +619,7 @@ Reshaping - Bug in concanenation with ``IntegerDtype``, or ``FloatingDtype`` arrays where the resulting dtype did not mirror the behavior of the non-nullable dtypes (:issue:`46379`) - Bug in :func:`concat` with identical key leads to error when indexing :class:`MultiIndex` (:issue:`46519`) - Bug in :meth:`DataFrame.join` with a list when using suffixes to join DataFrames with duplicate column names (:issue:`46396`) +- Bug in :meth:`merge` supports user-defined types merge with identical columns - Sparse From bfe59be5d343fa58017384a6718cb501520b5b46 Mon Sep 17 00:00:00 2001 From: Eric Feldman Date: Wed, 4 May 2022 18:34:55 +0300 Subject: [PATCH 11/22] allow null suffix --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/core/frame.py | 10 +++++----- pandas/core/reshape/merge.py | 12 +++--------- pandas/tests/reshape/merge/test_merge.py | 4 ++++ 4 files changed, 13 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 9905e8660982f..5c6d4f8497c7e 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -619,7 +619,7 @@ Reshaping - Bug in concanenation with ``IntegerDtype``, or ``FloatingDtype`` arrays where the resulting dtype did not mirror the behavior of the non-nullable dtypes (:issue:`46379`) - Bug in :func:`concat` with identical key leads to error when indexing :class:`MultiIndex` (:issue:`46519`) - Bug in :meth:`DataFrame.join` with a list when using suffixes to join DataFrames with duplicate column names (:issue:`46396`) -- Bug in :meth:`merge` supports user-defined types merge with identical columns +- Bug in :meth:`merge` allows empty suffixes, keeping the columns as-is - Sparse diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 74d061cbb9b7f..380a1c32c549f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -25,6 +25,7 @@ Iterable, Iterator, Literal, + Optional, Sequence, cast, overload, @@ -319,13 +320,12 @@ sort : bool, default False Sort the join keys lexicographically in the result DataFrame. If False, the order of the join keys depends on the join type (how keyword). -suffixes : list-like, default is ("_x", "_y") - A length-2 sequence where each element is optionally a string +suffixes : optional list-like, default is ("_x", "_y") + An optional length-2 sequence where each element is optionally a string indicating the suffix to add to overlapping column names in `left` and `right` respectively. Pass a value of `None` instead of a string to indicate that the column name from `left` or - `right` should be left as-is, with no suffix. At least one of the - values must not be None. + `right` should be left as-is, with no suffix. Pass `None` to keep both columns as-is. copy : bool, default True If False, avoid copy if possible. indicator : bool or str, default False @@ -9620,7 +9620,7 @@ def merge( left_index: bool = False, right_index: bool = False, sort: bool = False, - suffixes: Suffixes = ("_x", "_y"), + suffixes: Optional[Suffixes] = ("_x", "_y"), copy: bool = True, indicator: bool = False, validate: str | None = None, diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 6c1dcffeaefbb..c4ab5e4920ca9 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -2356,7 +2356,7 @@ def _items_overlap_with_suffix( If corresponding suffix is empty, the entry is simply converted to string. """ - if not is_list_like(suffixes, allow_sets=False): + if not (is_list_like(suffixes, allow_sets=False) or suffixes is None): warnings.warn( f"Passing 'suffixes' as a {type(suffixes)}, is not supported and may give " "unexpected results. Provide 'suffixes' as a tuple instead. In the " @@ -2369,10 +2369,7 @@ def _items_overlap_with_suffix( if len(to_rename) == 0: return left, right - lsuffix, rsuffix = suffixes - - if not lsuffix and not rsuffix: - raise ValueError(f"columns overlap but no suffix specified: {to_rename}") + lsuffix, rsuffix = suffixes if suffixes else (None, None) def renamer(x, suffix): """ @@ -2391,10 +2388,7 @@ def renamer(x, suffix): x : renamed column """ if x in to_rename and suffix is not None: - try: - return x + suffix - except TypeError: - return f"{x}{suffix}" + return f"{x}{suffix}" return x lrenamer = partial(renamer, suffix=lsuffix) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 48517c2f0661b..6e2fd9ccba657 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2209,6 +2209,10 @@ def test_merge_series(on, left_on, right_on, left_index, right_index, nm): (0, 0, {"suffixes": ("_a", None)}, ["0_a", 0]), ("a", "a", {}, ["a_x", "a_y"]), (0, 0, {}, ["0_x", "0_y"]), + (0, 0, {"suffixes": None}, [0, 0]), + (0, 0, {"suffixes": (None, None)}, [0, 0]), + ("a", "a", {"suffixes": None}, ["a", "a"]), + ("a", "a", {"suffixes": (None, None)}, ["a", "a"]) ], ) def test_merge_suffix(col1, col2, kwargs, expected_cols): From 8ec9e56a5a7b0eca169d81166fca3f448bfc9804 Mon Sep 17 00:00:00 2001 From: Eric Feldman Date: Wed, 4 May 2022 18:44:39 +0300 Subject: [PATCH 12/22] fix pre commit --- pandas/core/frame.py | 6 +++--- pandas/tests/reshape/merge/test_merge.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 380a1c32c549f..41b7fc3077547 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -25,7 +25,6 @@ Iterable, Iterator, Literal, - Optional, Sequence, cast, overload, @@ -325,7 +324,8 @@ indicating the suffix to add to overlapping column names in `left` and `right` respectively. Pass a value of `None` instead of a string to indicate that the column name from `left` or - `right` should be left as-is, with no suffix. Pass `None` to keep both columns as-is. + `right` should be left as-is, with no suffix. + Pass `None` to keep both columns as-is. copy : bool, default True If False, avoid copy if possible. indicator : bool or str, default False @@ -9620,7 +9620,7 @@ def merge( left_index: bool = False, right_index: bool = False, sort: bool = False, - suffixes: Optional[Suffixes] = ("_x", "_y"), + suffixes: Suffixes | None = ("_x", "_y"), copy: bool = True, indicator: bool = False, validate: str | None = None, diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 6e2fd9ccba657..927f8632cff11 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2212,7 +2212,7 @@ def test_merge_series(on, left_on, right_on, left_index, right_index, nm): (0, 0, {"suffixes": None}, [0, 0]), (0, 0, {"suffixes": (None, None)}, [0, 0]), ("a", "a", {"suffixes": None}, ["a", "a"]), - ("a", "a", {"suffixes": (None, None)}, ["a", "a"]) + ("a", "a", {"suffixes": (None, None)}, ["a", "a"]), ], ) def test_merge_suffix(col1, col2, kwargs, expected_cols): From 4112c7206fcb536be5b5e8b4bade29c7dac839f1 Mon Sep 17 00:00:00 2001 From: Eric Feldman Date: Wed, 4 May 2022 18:54:15 +0300 Subject: [PATCH 13/22] add file --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 41b7fc3077547..3681b7fb03279 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -324,8 +324,8 @@ indicating the suffix to add to overlapping column names in `left` and `right` respectively. Pass a value of `None` instead of a string to indicate that the column name from `left` or - `right` should be left as-is, with no suffix. - Pass `None` to keep both columns as-is. + `right` should be left as-is, with no suffix. Pass `None` to keep + both columns as-is. copy : bool, default True If False, avoid copy if possible. indicator : bool or str, default False From 0375b9459616b4948c395e9c12c78058effe1f0b Mon Sep 17 00:00:00 2001 From: Eric Feldman Date: Wed, 4 May 2022 19:41:17 +0300 Subject: [PATCH 14/22] fix tests --- pandas/tests/reshape/merge/test_merge.py | 40 ------------------------ 1 file changed, 40 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 927f8632cff11..b2bce22931643 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2259,21 +2259,6 @@ def test_merge_duplicate_suffix(how, expected): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize( - "col1, col2, suffixes", - [("a", "a", (None, None)), ("a", "a", ("", None)), (0, 0, (None, ""))], -) -def test_merge_suffix_error(col1, col2, suffixes): - # issue: 24782 - a = DataFrame({col1: [1, 2, 3]}) - b = DataFrame({col2: [3, 4, 5]}) - - # TODO: might reconsider current raise behaviour, see issue 24782 - msg = "columns overlap but no suffix specified" - with pytest.raises(ValueError, match=msg): - merge(a, b, left_index=True, right_index=True, suffixes=suffixes) - - @pytest.mark.parametrize("suffixes", [{"left", "right"}, {"left": 0, "right": 0}]) def test_merge_suffix_warns(suffixes): a = DataFrame({"a": [1, 2, 3]}) @@ -2673,28 +2658,3 @@ def test_merge_different_index_names(): result = merge(left, right, left_on="c", right_on="d") expected = DataFrame({"a_x": [1], "a_y": 1}) tm.assert_frame_equal(result, expected) - - -def test_merge_complex_column(): - # GH#46885 - class Column: - def __init__(self, name): - self.name = name - - def __eq__(self, other): - return other.name == self.name - - def __hash__(self): - return hash(self.name) - - def __add__(self, other): - return Column(name=f"{self.name}@{other}") - - merged_column = Column(name="Z") - left = DataFrame([[1, 2]], columns=[merged_column, "X"]) - right = DataFrame([[1, 6]], columns=[merged_column, "Y"]) - result = merge(left, right, left_index=True, right_index=True) - expected = DataFrame( - [[1, 2, 1, 6]], columns=[Column("Z@_x"), "X", Column("Z@_y"), "Y"] - ) - tm.assert_frame_equal(result, expected) From c8e956d3f8d365a8ebc713b7f34e3cc9a523d922 Mon Sep 17 00:00:00 2001 From: Eric Feldman Date: Sat, 7 May 2022 12:12:15 +0300 Subject: [PATCH 15/22] fix tests assumes suppix is must --- pandas/tests/frame/methods/test_join.py | 6 ------ pandas/tests/reshape/merge/test_multi.py | 6 ------ 2 files changed, 12 deletions(-) diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py index 597e95c8a2289..3a114a884632a 100644 --- a/pandas/tests/frame/methods/test_join.py +++ b/pandas/tests/frame/methods/test_join.py @@ -138,12 +138,6 @@ def test_join_index(float_frame): with pytest.raises(ValueError, match="join method"): f.join(f2, how="foo") - # corner case - overlapping columns - msg = "columns overlap but no suffix" - for how in ("outer", "left", "inner"): - with pytest.raises(ValueError, match=msg): - float_frame.join(float_frame, how=how) - def test_join_index_more(float_frame): af = float_frame.loc[:, ["A", "B"]] diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index 0dbe45eeb1e82..8b2a43e6512e1 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -636,12 +636,6 @@ def test_join_multi_levels_invalid(self, portfolio, household): ): household.join(portfolio, how="inner") - portfolio2 = portfolio.copy() - portfolio2.index.set_names(["household_id", "foo"]) - - with pytest.raises(ValueError, match="columns overlap but no suffix specified"): - portfolio2.join(portfolio, how="inner") - def test_join_multi_levels2(self): # some more advanced merges From 989aefa6af55b94bd443af9b819dba3772a93572 Mon Sep 17 00:00:00 2001 From: Eric Feldman Date: Sat, 7 May 2022 12:34:14 +0300 Subject: [PATCH 16/22] fix docstringg --- pandas/core/frame.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3681b7fb03279..b153df054b003 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -409,14 +409,17 @@ 4 bar 2 bar 6 5 baz 3 baz 7 -Merge DataFrames df1 and df2, but raise an exception if the DataFrames have -any overlapping columns. - ->>> df1.merge(df2, left_on='lkey', right_on='rkey', suffixes=(False, False)) -Traceback (most recent call last): -... -ValueError: columns overlap but no suffix specified: - Index(['value'], dtype='object') +Merge DataFrames df1 and df2 with null as suffix will keep +the original columns names + +>>> df1.merge(df2, left_on='lkey', right_on='rkey', suffixes=None) + lkey value rkey value +0 foo 1 foo 5 +1 foo 1 foo 8 +2 foo 5 foo 5 +3 foo 5 foo 8 +4 bar 2 bar 6 +5 baz 3 baz 7 >>> df1 = pd.DataFrame({'a': ['foo', 'bar'], 'b': [1, 2]}) >>> df2 = pd.DataFrame({'a': ['foo', 'baz'], 'c': [3, 4]}) From f1792a6f052484333cbc35da55f56c44795a5207 Mon Sep 17 00:00:00 2001 From: Eric Feldman Date: Sat, 7 May 2022 14:03:13 +0300 Subject: [PATCH 17/22] fix typing --- pandas/_typing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index 1debc4265508f..8cb94195bdae1 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -111,7 +111,7 @@ IndexLabel = Union[Hashable, Sequence[Hashable]] Level = Union[Hashable, int] Shape = Tuple[int, ...] -Suffixes = Tuple[Optional[str], Optional[str]] +Suffixes = Optional[Tuple[Optional[str], Optional[str]]] Ordered = Optional[bool] JSONSerializable = Optional[Union[PythonScalar, List, Dict]] Frequency = Union[str, "DateOffset"] From c8224f9ff5dfff567ed58bacec5f7d897ace8a46 Mon Sep 17 00:00:00 2001 From: Eric Feldman Date: Tue, 10 May 2022 16:56:42 +0300 Subject: [PATCH 18/22] add issue to comment --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 5c6d4f8497c7e..21f2687638d35 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -619,7 +619,7 @@ Reshaping - Bug in concanenation with ``IntegerDtype``, or ``FloatingDtype`` arrays where the resulting dtype did not mirror the behavior of the non-nullable dtypes (:issue:`46379`) - Bug in :func:`concat` with identical key leads to error when indexing :class:`MultiIndex` (:issue:`46519`) - Bug in :meth:`DataFrame.join` with a list when using suffixes to join DataFrames with duplicate column names (:issue:`46396`) -- Bug in :meth:`merge` allows empty suffixes, keeping the columns as-is +- Bug in :meth:`merge` allows pass None as suffix, keeping the columns as-is (:issue:`46885`) - Sparse From 5c1dacb38402e97cfd167746d4d60392f8775763 Mon Sep 17 00:00:00 2001 From: Eric Feldman Date: Sat, 14 May 2022 14:32:52 +0300 Subject: [PATCH 19/22] fix comments --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/core/frame.py | 4 ++-- pandas/core/reshape/merge.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 21f2687638d35..86e67b166c1b7 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -619,7 +619,7 @@ Reshaping - Bug in concanenation with ``IntegerDtype``, or ``FloatingDtype`` arrays where the resulting dtype did not mirror the behavior of the non-nullable dtypes (:issue:`46379`) - Bug in :func:`concat` with identical key leads to error when indexing :class:`MultiIndex` (:issue:`46519`) - Bug in :meth:`DataFrame.join` with a list when using suffixes to join DataFrames with duplicate column names (:issue:`46396`) -- Bug in :meth:`merge` allows pass None as suffix, keeping the columns as-is (:issue:`46885`) +- Bug in :func:`merge` and :meth:`DataFrame.merge` now allows passing ``None`` or ``(None, None)`` for ``suffixes`` argument, keeping column labels unchanged in the resulting :class:`DataFrame` potentially with duplicate column labels (:issue:`46885`) - Sparse diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b153df054b003..14601a81dc110 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -325,7 +325,7 @@ `left` and `right` respectively. Pass a value of `None` instead of a string to indicate that the column name from `left` or `right` should be left as-is, with no suffix. Pass `None` to keep - both columns as-is. + both columns labels as-is. copy : bool, default True If False, avoid copy if possible. indicator : bool or str, default False @@ -9623,7 +9623,7 @@ def merge( left_index: bool = False, right_index: bool = False, sort: bool = False, - suffixes: Suffixes | None = ("_x", "_y"), + suffixes: Suffixes = ("_x", "_y"), copy: bool = True, indicator: bool = False, validate: str | None = None, diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index c4ab5e4920ca9..6fa5fe253654f 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -2380,12 +2380,12 @@ def renamer(x, suffix): Parameters ---------- - x : original column + x : original column label suffix : str or None Returns ------- - x : renamed column + x : renamed column label """ if x in to_rename and suffix is not None: return f"{x}{suffix}" From edef8fe6022dabb356898c5cf46da63af7f8d785 Mon Sep 17 00:00:00 2001 From: Eric Feldman Date: Fri, 20 May 2022 11:52:43 +0300 Subject: [PATCH 20/22] add df.join allows empty string --- doc/source/whatsnew/v1.5.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index fdb9eb9138278..a5c8012e57892 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -746,6 +746,7 @@ Reshaping - Bug in :func:`concat` with identical key leads to error when indexing :class:`MultiIndex` (:issue:`46519`) - Bug in :meth:`DataFrame.join` with a list when using suffixes to join DataFrames with duplicate column names (:issue:`46396`) - Bug in :func:`merge` and :meth:`DataFrame.merge` now allows passing ``None`` or ``(None, None)`` for ``suffixes`` argument, keeping column labels unchanged in the resulting :class:`DataFrame` potentially with duplicate column labels (:issue:`46885`) +- Bug in :func:`DataFrame.join` now allows passing ``None`` or empty string for ``lsuffix`` and ``rsuffix`` arguments, keeping column labels unchanged in the resulting :class:`DataFrame` potentially with duplicate column labels (:issue:`46885`) - Sparse From ce72b94f4d0c58844f61eb0f5fa0a680a06d9e3f Mon Sep 17 00:00:00 2001 From: Eric Feldman Date: Fri, 20 May 2022 11:53:49 +0300 Subject: [PATCH 21/22] none isn't aloweed --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index a5c8012e57892..a0068be43dd8e 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -746,7 +746,7 @@ Reshaping - Bug in :func:`concat` with identical key leads to error when indexing :class:`MultiIndex` (:issue:`46519`) - Bug in :meth:`DataFrame.join` with a list when using suffixes to join DataFrames with duplicate column names (:issue:`46396`) - Bug in :func:`merge` and :meth:`DataFrame.merge` now allows passing ``None`` or ``(None, None)`` for ``suffixes`` argument, keeping column labels unchanged in the resulting :class:`DataFrame` potentially with duplicate column labels (:issue:`46885`) -- Bug in :func:`DataFrame.join` now allows passing ``None`` or empty string for ``lsuffix`` and ``rsuffix`` arguments, keeping column labels unchanged in the resulting :class:`DataFrame` potentially with duplicate column labels (:issue:`46885`) +- Bug in :func:`DataFrame.join` now allows passing empty string for ``lsuffix`` and ``rsuffix`` arguments, keeping column labels unchanged in the resulting :class:`DataFrame` potentially with duplicate column labels (:issue:`46885`) - Sparse From f62a084060d2454d62d041ce3dd212b1eb510188 Mon Sep 17 00:00:00 2001 From: Eric Feldman Date: Fri, 20 May 2022 18:33:00 +0300 Subject: [PATCH 22/22] move to enhancements --- doc/source/whatsnew/v1.5.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 61d52e2f0379a..024e0d54f0745 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -151,6 +151,8 @@ Other enhancements - A :class:`errors.PerformanceWarning` is now thrown when using ``string[pyarrow]`` dtype with methods that don't dispatch to ``pyarrow.compute`` methods (:issue:`42613`) - Added ``numeric_only`` argument to :meth:`Resampler.sum`, :meth:`Resampler.prod`, :meth:`Resampler.min`, :meth:`Resampler.max`, :meth:`Resampler.first`, and :meth:`Resampler.last` (:issue:`46442`) - ``times`` argument in :class:`.ExponentialMovingWindow` now accepts ``np.timedelta64`` (:issue:`47003`) +- :func:`merge` and :meth:`DataFrame.merge` now allows passing ``None`` or ``(None, None)`` for ``suffixes`` argument, keeping column labels unchanged in the resulting :class:`DataFrame` potentially with duplicate column labels (:issue:`46885`) +- :func:`DataFrame.join` now allows passing empty string for ``lsuffix`` and ``rsuffix`` arguments, keeping column labels unchanged in the resulting :class:`DataFrame` potentially with duplicate column labels (:issue:`46885`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: @@ -796,8 +798,6 @@ Reshaping - Bug in :func:`concat` with identical key leads to error when indexing :class:`MultiIndex` (:issue:`46519`) - Bug in :meth:`DataFrame.join` with a list when using suffixes to join DataFrames with duplicate column names (:issue:`46396`) - Bug in :meth:`DataFrame.pivot_table` with ``sort=False`` results in sorted index (:issue:`17041`) -- Bug in :func:`merge` and :meth:`DataFrame.merge` now allows passing ``None`` or ``(None, None)`` for ``suffixes`` argument, keeping column labels unchanged in the resulting :class:`DataFrame` potentially with duplicate column labels (:issue:`46885`) -- Bug in :func:`DataFrame.join` now allows passing empty string for ``lsuffix`` and ``rsuffix`` arguments, keeping column labels unchanged in the resulting :class:`DataFrame` potentially with duplicate column labels (:issue:`46885`) - Sparse