From 4dcc7d9d795c4977f06a7b3c3f97ee8eacc3836f Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 8 Jan 2021 10:18:40 +0100 Subject: [PATCH 1/2] Revert "BUG: casting on concat with empties (#38907)" This reverts commit 04282c7adbcc113470a08cc895c3858f123d4168. --- doc/source/whatsnew/v1.3.0.rst | 3 +++ pandas/core/internals/concat.py | 6 ------ pandas/tests/indexing/test_partial.py | 3 +-- pandas/tests/reshape/concat/test_append.py | 6 +++++- pandas/tests/reshape/concat/test_empty.py | 1 + 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 9e557a0020f1e..6332d5db54553 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -314,6 +314,9 @@ Reshaping - Bug in :func:`join` over :class:`MultiIndex` returned wrong result, when one of both indexes had only one level (:issue:`36909`) - Bug in :func:`concat` incorrectly casting to ``object`` dtype in some cases when one or more of the operands is empty (:issue:`38843`, :issue:`38907`) - :meth:`merge_asof` raises ``ValueError`` instead of cryptic ``TypeError`` in case of non-numerical merge columns (:issue:`29130`) +- Bug in :func:`concat` incorrectly casting to ``object`` dtype in some cases when one or more of the operands is empty (:issue:`38843`) +- + Sparse ^^^^^^ diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 5e587dd9f9472..f97077954f8bf 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -318,12 +318,6 @@ def _concatenate_join_units( # Concatenating join units along ax0 is handled in _merge_blocks. raise AssertionError("Concatenating join units along axis0") - nonempties = [ - x for x in join_units if x.block is None or x.block.shape[concat_axis] > 0 - ] - if nonempties: - join_units = nonempties - empty_dtype, upcasted_na = _get_empty_dtype_and_na(join_units) to_concat = [ diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index f2d628c70ae62..d8dd08ea13341 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -154,8 +154,7 @@ def test_partial_setting_mixed_dtype(self): # columns will align df = DataFrame(columns=["A", "B"]) df.loc[0] = Series(1, index=range(4)) - expected = DataFrame(columns=["A", "B"], index=[0], dtype=int) - tm.assert_frame_equal(df, expected) + tm.assert_frame_equal(df, DataFrame(columns=["A", "B"], index=[0])) # columns will align df = DataFrame(columns=["A", "B"]) diff --git a/pandas/tests/reshape/concat/test_append.py b/pandas/tests/reshape/concat/test_append.py index 1a895aee98f0a..ffeda703cd890 100644 --- a/pandas/tests/reshape/concat/test_append.py +++ b/pandas/tests/reshape/concat/test_append.py @@ -82,7 +82,6 @@ def test_append_length0_frame(self, sort): df5 = df.append(df3, sort=sort) expected = DataFrame(index=[0, 1], columns=["A", "B", "C"]) - expected["C"] = expected["C"].astype(np.float64) tm.assert_frame_equal(df5, expected) def test_append_records(self): @@ -341,11 +340,16 @@ def test_append_empty_frame_to_series_with_dateutil_tz(self): expected = DataFrame( [[np.nan, np.nan, 1.0, 2.0, date]], columns=["c", "d", "a", "b", "date"] ) + # These columns get cast to object after append + expected["c"] = expected["c"].astype(object) + expected["d"] = expected["d"].astype(object) tm.assert_frame_equal(result_a, expected) expected = DataFrame( [[np.nan, np.nan, 1.0, 2.0, date]] * 2, columns=["c", "d", "a", "b", "date"] ) + expected["c"] = expected["c"].astype(object) + expected["d"] = expected["d"].astype(object) result_b = result_a.append(s, ignore_index=True) tm.assert_frame_equal(result_b, expected) diff --git a/pandas/tests/reshape/concat/test_empty.py b/pandas/tests/reshape/concat/test_empty.py index 075785120677a..dea04e98088e8 100644 --- a/pandas/tests/reshape/concat/test_empty.py +++ b/pandas/tests/reshape/concat/test_empty.py @@ -210,6 +210,7 @@ def test_concat_empty_df_object_dtype(self, dtype): df_2 = DataFrame(columns=df_1.columns) result = pd.concat([df_1, df_2], axis=0) expected = df_1.copy() + expected["EmptyCol"] = expected["EmptyCol"].astype(object) # TODO: why? tm.assert_frame_equal(result, expected) def test_concat_empty_dataframe_dtypes(self): From c209b59dcd313acacce19b90c51e802dbbb3bfd4 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 8 Jan 2021 10:19:43 +0100 Subject: [PATCH 2/2] Revert "BUG: inconsistent concat casting EA vs non-EA (#38843)" This reverts commit 2362df9adb6fde9c73c1e2572871b8fd3714eb31. --- doc/source/whatsnew/v1.3.0.rst | 3 --- pandas/core/dtypes/concat.py | 2 +- pandas/tests/indexing/test_partial.py | 16 +++------------- pandas/tests/reshape/concat/test_concat.py | 7 +++---- pandas/tests/reshape/concat/test_empty.py | 7 ++----- 5 files changed, 9 insertions(+), 26 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 6332d5db54553..c2372b0e6b9a1 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -312,12 +312,9 @@ Reshaping - Bug in :func:`merge` raising error when performing an inner join with partial index and ``right_index`` when no overlap between indices (:issue:`33814`) - Bug in :meth:`DataFrame.unstack` with missing levels led to incorrect index names (:issue:`37510`) - Bug in :func:`join` over :class:`MultiIndex` returned wrong result, when one of both indexes had only one level (:issue:`36909`) -- Bug in :func:`concat` incorrectly casting to ``object`` dtype in some cases when one or more of the operands is empty (:issue:`38843`, :issue:`38907`) - :meth:`merge_asof` raises ``ValueError`` instead of cryptic ``TypeError`` in case of non-numerical merge columns (:issue:`29130`) -- Bug in :func:`concat` incorrectly casting to ``object`` dtype in some cases when one or more of the operands is empty (:issue:`38843`) - - Sparse ^^^^^^ diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index d768f83e4d36b..624e71a5cf760 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -127,7 +127,7 @@ def is_nonempty(x) -> bool: # marginal given that it would still require shape & dtype calculation and # np.concatenate which has them both implemented is compiled. non_empties = [x for x in to_concat if is_nonempty(x)] - if non_empties: + if non_empties and axis == 0: to_concat = non_empties typs = _get_dtype_kinds(to_concat) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index d8dd08ea13341..0251fb4a0ebd6 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -170,21 +170,11 @@ def test_partial_setting_mixed_dtype(self): with pytest.raises(ValueError, match=msg): df.loc[0] = [1, 2, 3] - @pytest.mark.parametrize("dtype", [None, "int64", "Int64"]) - def test_loc_setitem_expanding_empty(self, dtype): + # TODO: #15657, these are left as object and not coerced df = DataFrame(columns=["A", "B"]) + df.loc[3] = [6, 7] - value = [6, 7] - if dtype == "int64": - value = np.array(value, dtype=dtype) - elif dtype == "Int64": - value = pd.array(value, dtype=dtype) - - df.loc[3] = value - - exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype=dtype) - if dtype is not None: - exp = exp.astype(dtype) + exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype="object") tm.assert_frame_equal(df, exp) def test_series_partial_set(self): diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 4750f9b0c40a3..16c4e9456aa05 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -474,12 +474,11 @@ def test_concat_will_upcast(dt, pdt): assert x.values.dtype == "float64" -@pytest.mark.parametrize("dtype", ["int64", "Int64"]) -def test_concat_empty_and_non_empty_frame_regression(dtype): +def test_concat_empty_and_non_empty_frame_regression(): # GH 18178 regression test - df1 = DataFrame({"foo": [1]}).astype(dtype) + df1 = DataFrame({"foo": [1]}) df2 = DataFrame({"foo": []}) - expected = df1 + expected = DataFrame({"foo": [1.0]}) result = pd.concat([df1, df2]) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/concat/test_empty.py b/pandas/tests/reshape/concat/test_empty.py index dea04e98088e8..a97e9265b4f99 100644 --- a/pandas/tests/reshape/concat/test_empty.py +++ b/pandas/tests/reshape/concat/test_empty.py @@ -202,15 +202,12 @@ def test_concat_empty_series_dtypes_sparse(self): expected = pd.SparseDtype("object") assert result.dtype == expected - @pytest.mark.parametrize("dtype", ["int64", "Int64"]) - def test_concat_empty_df_object_dtype(self, dtype): + def test_concat_empty_df_object_dtype(self): # GH 9149 df_1 = DataFrame({"Row": [0, 1, 1], "EmptyCol": np.nan, "NumberCol": [1, 2, 3]}) - df_1["Row"] = df_1["Row"].astype(dtype) df_2 = DataFrame(columns=df_1.columns) result = pd.concat([df_1, df_2], axis=0) - expected = df_1.copy() - expected["EmptyCol"] = expected["EmptyCol"].astype(object) # TODO: why? + expected = df_1.astype(object) tm.assert_frame_equal(result, expected) def test_concat_empty_dataframe_dtypes(self):