Skip to content

BUG: concat not copying index and columns when copy=True #31119

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jan 21, 2020
Merged
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ Reshaping
- Bug in :meth:`DataFrame.pivot_table` when ``margin`` is ``True`` and only ``column`` is defined (:issue:`31016`)
- Fix incorrect error message in :meth:`DataFrame.pivot` when ``columns`` is set to ``None``. (:issue:`30924`)
- Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`)

- Bug in :func:`concat` where the resulting indices are not copied when ``copy=True`` (:issue:`29879`)

Sparse
^^^^^^
Expand Down
18 changes: 15 additions & 3 deletions pandas/core/indexes/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@


def get_objs_combined_axis(
objs, intersect: bool = False, axis=0, sort: bool = True
objs, intersect: bool = False, axis=0, sort: bool = True, copy: bool = False
) -> Index:
"""
Extract combined index: return intersection or union (depending on the
Expand All @@ -81,13 +81,15 @@ def get_objs_combined_axis(
The axis to extract indexes from.
sort : bool, default True
Whether the result index should come out sorted or not.
copy : bool, default False
If True, return a copy of the combined index.

Returns
-------
Index
"""
obs_idxes = [obj._get_axis(axis) for obj in objs]
return _get_combined_index(obs_idxes, intersect=intersect, sort=sort)
return _get_combined_index(obs_idxes, intersect=intersect, sort=sort, copy=copy)


def _get_distinct_objs(objs: List[Index]) -> List[Index]:
Expand All @@ -105,7 +107,10 @@ def _get_distinct_objs(objs: List[Index]) -> List[Index]:


def _get_combined_index(
indexes: List[Index], intersect: bool = False, sort: bool = False
indexes: List[Index],
intersect: bool = False,
sort: bool = False,
copy: bool = False,
) -> Index:
"""
Return the union or intersection of indexes.
Expand All @@ -119,6 +124,8 @@ def _get_combined_index(
calculate the union.
sort : bool, default False
Whether the result index should come out sorted or not.
copy : bool, default False
If True, return a copy of the combined index.

Returns
-------
Expand All @@ -143,6 +150,11 @@ def _get_combined_index(
index = index.sort_values()
except TypeError:
pass

# GH 29879
if copy:
index = index.copy()

return index


Expand Down
6 changes: 5 additions & 1 deletion pandas/core/reshape/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,11 @@ def _get_new_axes(self) -> List[Index]:
def _get_comb_axis(self, i: int) -> Index:
data_axis = self.objs[0]._get_block_manager_axis(i)
return get_objs_combined_axis(
self.objs, axis=data_axis, intersect=self.intersect, sort=self.sort
self.objs,
axis=data_axis,
intersect=self.intersect,
sort=self.sort,
copy=self.copy,
)

def _get_concat_axis(self) -> Index:
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/reshape/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2750,3 +2750,17 @@ def test_concat_sparse():
)
result = pd.concat([a, a], axis=1)
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("test_series", [True, False])
def test_concat_copy_index(test_series, axis):
# GH 29879
if test_series:
ser = Series([1, 2])
comb = concat([ser, ser], axis=axis, copy=True)
assert comb.index is not ser.index
else:
df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
comb = concat([df, df], axis=axis, copy=True)
assert comb.index is not df.index
assert comb.columns is not df.columns