Skip to content

Commit be0926b

Browse files
committed
Merge remote-tracking branch 'upstream/master' into series_rolling_count_ignores_min_periods
2 parents 1a90629 + 7ffcf9d commit be0926b

File tree

8 files changed

+47
-5578
lines changed

8 files changed

+47
-5578
lines changed

doc/source/whatsnew/v1.1.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ Reshaping
156156
- Bug in :meth:`DataFrame.pivot_table` when ``margin`` is ``True`` and only ``column`` is defined (:issue:`31016`)
157157
- Fix incorrect error message in :meth:`DataFrame.pivot` when ``columns`` is set to ``None``. (:issue:`30924`)
158158
- Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`)
159-
159+
- Bug in :func:`concat` where the resulting indices are not copied when ``copy=True`` (:issue:`29879`)
160160

161161
Sparse
162162
^^^^^^

pandas/core/indexes/api.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363

6464

6565
def get_objs_combined_axis(
66-
objs, intersect: bool = False, axis=0, sort: bool = True
66+
objs, intersect: bool = False, axis=0, sort: bool = True, copy: bool = False
6767
) -> Index:
6868
"""
6969
Extract combined index: return intersection or union (depending on the
@@ -81,13 +81,15 @@ def get_objs_combined_axis(
8181
The axis to extract indexes from.
8282
sort : bool, default True
8383
Whether the result index should come out sorted or not.
84+
copy : bool, default False
85+
If True, return a copy of the combined index.
8486
8587
Returns
8688
-------
8789
Index
8890
"""
8991
obs_idxes = [obj._get_axis(axis) for obj in objs]
90-
return _get_combined_index(obs_idxes, intersect=intersect, sort=sort)
92+
return _get_combined_index(obs_idxes, intersect=intersect, sort=sort, copy=copy)
9193

9294

9395
def _get_distinct_objs(objs: List[Index]) -> List[Index]:
@@ -105,7 +107,10 @@ def _get_distinct_objs(objs: List[Index]) -> List[Index]:
105107

106108

107109
def _get_combined_index(
108-
indexes: List[Index], intersect: bool = False, sort: bool = False
110+
indexes: List[Index],
111+
intersect: bool = False,
112+
sort: bool = False,
113+
copy: bool = False,
109114
) -> Index:
110115
"""
111116
Return the union or intersection of indexes.
@@ -119,6 +124,8 @@ def _get_combined_index(
119124
calculate the union.
120125
sort : bool, default False
121126
Whether the result index should come out sorted or not.
127+
copy : bool, default False
128+
If True, return a copy of the combined index.
122129
123130
Returns
124131
-------
@@ -143,6 +150,11 @@ def _get_combined_index(
143150
index = index.sort_values()
144151
except TypeError:
145152
pass
153+
154+
# GH 29879
155+
if copy:
156+
index = index.copy()
157+
146158
return index
147159

148160

pandas/core/reshape/concat.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -517,7 +517,11 @@ def _get_new_axes(self) -> List[Index]:
517517
def _get_comb_axis(self, i: int) -> Index:
518518
data_axis = self.objs[0]._get_block_manager_axis(i)
519519
return get_objs_combined_axis(
520-
self.objs, axis=data_axis, intersect=self.intersect, sort=self.sort
520+
self.objs,
521+
axis=data_axis,
522+
intersect=self.intersect,
523+
sort=self.sort,
524+
copy=self.copy,
521525
)
522526

523527
def _get_concat_axis(self) -> Index:

pandas/tests/io/data/html/computer_sales_page.html

Lines changed: 0 additions & 619 deletions
This file was deleted.

pandas/tests/io/data/html/macau.html

Lines changed: 0 additions & 3691 deletions
This file was deleted.

pandas/tests/io/data/html/nyse_wsj.html

Lines changed: 0 additions & 1207 deletions
This file was deleted.

pandas/tests/io/test_html.py

Lines changed: 12 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from pandas.errors import ParserError
1515
import pandas.util._test_decorators as td
1616

17-
from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range, read_csv
17+
from pandas import DataFrame, MultiIndex, Series, Timestamp, date_range, read_csv
1818
import pandas._testing as tm
1919

2020
from pandas.io.common import file_path_to_url
@@ -373,32 +373,6 @@ def test_python_docs_table(self):
373373
zz = [df.iloc[0, 0][0:4] for df in dfs]
374374
assert sorted(zz) == sorted(["Repo", "What"])
375375

376-
@pytest.mark.slow
377-
def test_thousands_macau_stats(self, datapath):
378-
all_non_nan_table_index = -2
379-
macau_data = datapath("io", "data", "html", "macau.html")
380-
dfs = self.read_html(macau_data, index_col=0, attrs={"class": "style1"})
381-
df = dfs[all_non_nan_table_index]
382-
383-
assert not any(s.isna().any() for _, s in df.items())
384-
385-
@pytest.mark.slow
386-
def test_thousands_macau_index_col(self, datapath, request):
387-
# https://github.com/pandas-dev/pandas/issues/29622
388-
# This tests fails for bs4 >= 4.8.0 - so handle xfail accordingly
389-
if self.read_html.keywords.get("flavor") == "bs4" and td.safe_import(
390-
"bs4", "4.8.0"
391-
):
392-
reason = "fails for bs4 version >= 4.8.0"
393-
request.node.add_marker(pytest.mark.xfail(reason=reason))
394-
395-
all_non_nan_table_index = -2
396-
macau_data = datapath("io", "data", "html", "macau.html")
397-
dfs = self.read_html(macau_data, index_col=0, header=0)
398-
df = dfs[all_non_nan_table_index]
399-
400-
assert not any(s.isna().any() for _, s in df.items())
401-
402376
def test_empty_tables(self):
403377
"""
404378
Make sure that read_html ignores empty tables.
@@ -571,23 +545,6 @@ def test_parse_header_of_non_string_column(self):
571545

572546
tm.assert_frame_equal(result, expected)
573547

574-
def test_nyse_wsj_commas_table(self, datapath):
575-
data = datapath("io", "data", "html", "nyse_wsj.html")
576-
df = self.read_html(data, index_col=0, header=0, attrs={"class": "mdcTable"})[0]
577-
578-
expected = Index(
579-
[
580-
"Issue(Roll over for charts and headlines)",
581-
"Volume",
582-
"Price",
583-
"Chg",
584-
"% Chg",
585-
]
586-
)
587-
nrows = 100
588-
assert df.shape[0] == nrows
589-
tm.assert_index_equal(df.columns, expected)
590-
591548
@pytest.mark.slow
592549
def test_banklist_header(self, datapath):
593550
from pandas.io.html import _remove_whitespace
@@ -894,24 +851,23 @@ def test_parse_dates_combine(self):
894851
newdf = DataFrame({"datetime": raw_dates})
895852
tm.assert_frame_equal(newdf, res[0])
896853

897-
def test_computer_sales_page(self, datapath):
898-
data = datapath("io", "data", "html", "computer_sales_page.html")
899-
msg = (
900-
r"Passed header=\[0,1\] are too many "
901-
r"rows for this multi_index of columns"
902-
)
903-
with pytest.raises(ParserError, match=msg):
904-
self.read_html(data, header=[0, 1])
905-
906-
data = datapath("io", "data", "html", "computer_sales_page.html")
907-
assert self.read_html(data, header=[1, 2])
908-
909854
def test_wikipedia_states_table(self, datapath):
910855
data = datapath("io", "data", "html", "wikipedia_states.html")
911856
assert os.path.isfile(data), f"{repr(data)} is not a file"
912857
assert os.path.getsize(data), f"{repr(data)} is an empty file"
913858
result = self.read_html(data, "Arizona", header=1)[0]
859+
assert result.shape == (60, 12)
860+
assert "Unnamed" in result.columns[-1]
914861
assert result["sq mi"].dtype == np.dtype("float64")
862+
assert np.allclose(result.loc[0, "sq mi"], 665384.04)
863+
864+
def test_wikipedia_states_multiindex(self, datapath):
865+
data = datapath("io", "data", "html", "wikipedia_states.html")
866+
result = self.read_html(data, "Arizona", index_col=0)[0]
867+
assert result.shape == (60, 11)
868+
assert "Unnamed" in result.columns[-1][1]
869+
assert result.columns.nlevels == 2
870+
assert np.allclose(result.loc["Alaska", ("Total area[2]", "sq mi")], 665384.04)
915871

916872
def test_parser_error_on_empty_header_row(self):
917873
msg = (

pandas/tests/reshape/test_concat.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2750,3 +2750,17 @@ def test_concat_sparse():
27502750
)
27512751
result = pd.concat([a, a], axis=1)
27522752
tm.assert_frame_equal(result, expected)
2753+
2754+
2755+
@pytest.mark.parametrize("test_series", [True, False])
2756+
def test_concat_copy_index(test_series, axis):
2757+
# GH 29879
2758+
if test_series:
2759+
ser = Series([1, 2])
2760+
comb = concat([ser, ser], axis=axis, copy=True)
2761+
assert comb.index is not ser.index
2762+
else:
2763+
df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
2764+
comb = concat([df, df], axis=axis, copy=True)
2765+
assert comb.index is not df.index
2766+
assert comb.columns is not df.columns

0 commit comments

Comments
 (0)