Skip to content

Commit 82c52a4

Browse files
one in all
1 parent bb43726 commit 82c52a4

File tree

4 files changed

+73
-14
lines changed

4 files changed

+73
-14
lines changed

doc/source/whatsnew/v0.24.1.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ Fixed Regressions
7272

7373
**Other**
7474

75-
- Fixed AttributeError when printing a DataFrame's HTML repr after accessing the IPython config object (:issue:`25036`)
75+
- Bug in :func:`pandas.merge` doesn't work correctly if None is in suffixes (:issue: `24782`).
7676

7777
.. _whatsnew_0.241.contributors:
7878

pandas/core/internals/managers.py

+21-10
Original file line numberDiff line numberDiff line change
@@ -1967,20 +1967,31 @@ def items_overlap_with_suffix(left, lsuffix, right, rsuffix):
19671967
if len(to_rename) == 0:
19681968
return left, right
19691969
else:
1970-
if not lsuffix and not rsuffix:
1971-
raise ValueError('columns overlap but no suffix specified: '
1972-
'{rename}'.format(rename=to_rename))
1970+
# if column name is string, raise error if suffix is a combination of
1971+
# empty string and None, or two Nones
1972+
if isinstance(to_rename[0], str):
1973+
if not lsuffix and not rsuffix:
1974+
raise ValueError('columns overlap but no suffix specified: '
1975+
'{rename}'.format(rename=to_rename))
1976+
else:
1977+
# if not, only suffix with (None, None) will raise error
1978+
if lsuffix is None and rsuffix is None:
1979+
raise ValueError('columns overlap but no suffix specified: '
1980+
'{rename}'.format(rename=to_rename))
19731981

1974-
def lrenamer(x):
1975-
if x in to_rename:
1976-
return '{x}{lsuffix}'.format(x=x, lsuffix=lsuffix)
1977-
return x
1982+
def renamer(x, suffix):
1983+
"""Rename the left and right indices.
19781984
1979-
def rrenamer(x):
1980-
if x in to_rename:
1981-
return '{x}{rsuffix}'.format(x=x, rsuffix=rsuffix)
1985+
If there is overlap, and suffix is not None, add
1986+
suffix, otherwise, leave it as-is.
1987+
"""
1988+
if x in to_rename and suffix is not None:
1989+
return '{x}{suffix}'.format(x=x, suffix=suffix)
19821990
return x
19831991

1992+
lrenamer = partial(renamer, suffix=lsuffix)
1993+
rrenamer = partial(renamer, suffix=rsuffix)
1994+
19841995
return (_transform_index(left, lrenamer),
19851996
_transform_index(right, rrenamer))
19861997

pandas/core/reshape/merge.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -159,9 +159,13 @@ def merge_ordered(left, right, on=None,
159159
left DataFrame
160160
fill_method : {'ffill', None}, default None
161161
Interpolation method for data
162-
suffixes : 2-length sequence (tuple, list, ...)
163-
Suffix to apply to overlapping column names in the left and right
164-
side, respectively
162+
suffixes : Sequence, default is ("_x", "_y")
163+
A length-2 sequence where each element is optionally a string
164+
indicating the suffix to add to overlapping column names in
165+
`left` and `right` respectively. Pass a value of `None` instead
166+
of a string to indicate that the column name from `left` or
167+
`right` should be left as-is, with no suffix. At least one of the
168+
values must not be None.
165169
how : {'left', 'right', 'outer', 'inner'}, default 'outer'
166170
* left: use only keys from left frame (SQL: left outer join)
167171
* right: use only keys from right frame (SQL: right outer join)

pandas/tests/reshape/merge/test_merge.py

+44
Original file line numberDiff line numberDiff line change
@@ -1526,3 +1526,47 @@ def test_merge_series(on, left_on, right_on, left_index, right_index, nm):
15261526
with pytest.raises(ValueError, match=msg):
15271527
result = pd.merge(a, b, on=on, left_on=left_on, right_on=right_on,
15281528
left_index=left_index, right_index=right_index)
1529+
1530+
1531+
@pytest.mark.parametrize("col1, col2, kwargs, expected_cols", [
1532+
(0, 0, dict(suffixes=("", "_dup")), ["0", "0_dup"]),
1533+
(0, 0, dict(suffixes=(None, "_dup")), [0, "0_dup"]),
1534+
(0, 0, dict(suffixes=("_x", "_y")), ["0_x", "0_y"]),
1535+
("a", 0, dict(suffixes=(None, "_y")), ["a", 0]),
1536+
(0.0, 0.0, dict(suffixes=("_x", None)), ["0.0_x", 0.0]),
1537+
("b", "b", dict(suffixes=(None, "_y")), ["b", "b_y"]),
1538+
("a", "a", dict(suffixes=("_x", None)), ["a_x", "a"]),
1539+
("a", "b", dict(suffixes=("_x", None)), ["a", "b"]),
1540+
("a", "a", dict(suffixes=[None, "_x"]), ["a", "a_x"]),
1541+
(0, 0, dict(suffixes=(["_a", None])), ["0_a", 0]),
1542+
(0, 0, dict(suffixes=('', None)), ["0", 0]),
1543+
("a", "a", dict(), ["a_x", "a_y"]),
1544+
(0, 0, dict(), ["0_x", "0_y"])
1545+
])
1546+
def test_merge_suffix(col1, col2, kwargs, expected_cols):
1547+
# issue: 24782
1548+
a = pd.DataFrame({col1: [1, 2, 3]})
1549+
b = pd.DataFrame({col2: [4, 5, 6]})
1550+
1551+
expected = pd.DataFrame([[1, 4], [2, 5], [3, 6]],
1552+
columns=expected_cols)
1553+
1554+
result = a.merge(b, left_index=True, right_index=True, **kwargs)
1555+
tm.assert_frame_equal(result, expected)
1556+
1557+
1558+
@pytest.mark.parametrize("col, suffixes", [
1559+
('a', (None, None)),
1560+
('a', [None, None]),
1561+
('a', ('', None)),
1562+
('a', [None, '']),
1563+
(0, (None, None))
1564+
])
1565+
def test_merge_suffix_errors(col, suffixes):
1566+
# issue: 24782
1567+
a = pd.DataFrame({col: [1, 2, 3]})
1568+
b = pd.DataFrame({col: [4, 5, 6]})
1569+
1570+
with pytest.raises(ValueError,
1571+
match="columns overlap but no suffix specified"):
1572+
a.merge(b, left_index=True, right_index=True, suffixes=suffixes)

0 commit comments

Comments
 (0)