diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index b2a1e10469a0f..66edd6216e83e 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -80,6 +80,8 @@ Other enhancements - ``regex`` argument to ``DataFrame.filter`` now handles numeric column names instead of raising ``ValueError`` (:issue:`10384`). +- ``pd.merge`` will now allow duplicate column names if they are not merged upon (:issue:`10639`). + .. _whatsnew_0170.api: .. _whatsnew_0170.api_breaking: diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index c7c578232cd0f..430828a3db31b 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -402,19 +402,14 @@ def _validate_specification(self): if self.left_on is None: raise MergeError('Must pass left_on or left_index=True') else: - if not self.left.columns.is_unique: - raise MergeError("Left data columns not unique: %s" - % repr(self.left.columns)) - - if not self.right.columns.is_unique: - raise MergeError("Right data columns not unique: %s" - % repr(self.right.columns)) - # use the common columns common_cols = self.left.columns.intersection( self.right.columns) if len(common_cols) == 0: raise MergeError('No common columns to perform merge on') + if not common_cols.is_unique: + raise MergeError("Data columns not unique: %s" + % repr(common_cols)) self.left_on = self.right_on = common_cols elif self.on is not None: if self.left_on is not None or self.right_on is not None: diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index d357182a60b1f..cd3581273b74d 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -843,7 +843,6 @@ def test_join_append_timedeltas(self): assert_frame_equal(result, expected) def test_overlapping_columns_error_message(self): - # #2649 df = DataFrame({'key': [1, 2, 3], 'v1': [4, 5, 6], 'v2': [7, 8, 9]}) @@ -853,7 +852,16 @@ def test_overlapping_columns_error_message(self): df.columns = ['key', 'foo', 'foo'] df2.columns = ['key', 'bar', 'bar'] + expected = DataFrame({'key': [1, 2, 3], + 'v1': [4, 5, 6], + 'v2': [7, 8, 9], + 'v3': [4, 5, 6], + 'v4': [7, 8, 9]}) + expected.columns = ['key', 'foo', 'foo', 'bar', 'bar'] + assert_frame_equal(merge(df, df2), expected) + # #2649 + df2.columns = ['key1', 'foo', 'foo'] self.assertRaises(ValueError, merge, df, df2) def _check_merge(x, y):