From eb0058924b0d2e56daa99e2905b1c67c0e1bc10a Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 30 Dec 2020 19:00:10 -0800 Subject: [PATCH 1/5] BUG: inconsistent concat casting EA vs non-EA --- pandas/core/dtypes/concat.py | 2 +- pandas/tests/indexing/test_partial.py | 15 ++++++++++++--- pandas/tests/reshape/concat/test_concat.py | 7 ++++--- pandas/tests/reshape/concat/test_empty.py | 7 +++++-- 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index a9355e30cd3c2..a1562b26c4fd8 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -128,7 +128,7 @@ def is_nonempty(x) -> bool: # marginal given that it would still require shape & dtype calculation and # np.concatenate which has them both implemented is compiled. non_empties = [x for x in to_concat if is_nonempty(x)] - if non_empties and axis == 0: + if non_empties: to_concat = non_empties typs = _get_dtype_kinds(to_concat) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 0251fb4a0ebd6..39bc2476a2cf5 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -170,11 +170,20 @@ def test_partial_setting_mixed_dtype(self): with pytest.raises(ValueError, match=msg): df.loc[0] = [1, 2, 3] - # TODO: #15657, these are left as object and not coerced + @pytest.mark.parametrize("dtype", [None, "int64", "Int64"]) + def test_loc_setitem_expanding_empty(self, dtype): df = DataFrame(columns=["A", "B"]) - df.loc[3] = [6, 7] - exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype="object") + value = [6, 7] + if dtype == "int64": + value = np.array(value, dtype=dtype) + elif dtype == "Int64": + value = pd.array(value, dtype=dtype) + + df.loc[3] = value + + exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype=dtype) + exp = exp.astype(dtype) tm.assert_frame_equal(df, exp) def test_series_partial_set(self): diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 16c4e9456aa05..4750f9b0c40a3 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -474,11 +474,12 @@ def test_concat_will_upcast(dt, pdt): assert x.values.dtype == "float64" -def test_concat_empty_and_non_empty_frame_regression(): +@pytest.mark.parametrize("dtype", ["int64", "Int64"]) +def test_concat_empty_and_non_empty_frame_regression(dtype): # GH 18178 regression test - df1 = DataFrame({"foo": [1]}) + df1 = DataFrame({"foo": [1]}).astype(dtype) df2 = DataFrame({"foo": []}) - expected = DataFrame({"foo": [1.0]}) + expected = df1 result = pd.concat([df1, df2]) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/concat/test_empty.py b/pandas/tests/reshape/concat/test_empty.py index a97e9265b4f99..dea04e98088e8 100644 --- a/pandas/tests/reshape/concat/test_empty.py +++ b/pandas/tests/reshape/concat/test_empty.py @@ -202,12 +202,15 @@ def test_concat_empty_series_dtypes_sparse(self): expected = pd.SparseDtype("object") assert result.dtype == expected - def test_concat_empty_df_object_dtype(self): + @pytest.mark.parametrize("dtype", ["int64", "Int64"]) + def test_concat_empty_df_object_dtype(self, dtype): # GH 9149 df_1 = DataFrame({"Row": [0, 1, 1], "EmptyCol": np.nan, "NumberCol": [1, 2, 3]}) + df_1["Row"] = df_1["Row"].astype(dtype) df_2 = DataFrame(columns=df_1.columns) result = pd.concat([df_1, df_2], axis=0) - expected = df_1.astype(object) + expected = df_1.copy() + expected["EmptyCol"] = expected["EmptyCol"].astype(object) # TODO: why? tm.assert_frame_equal(result, expected) def test_concat_empty_dataframe_dtypes(self): From 6afee149da8b1a57ad6e634accbe029b3528f4e2 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 30 Dec 2020 19:23:15 -0800 Subject: [PATCH 2/5] whatsnew --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 5197fd2b23dab..4aeb1d0c49cbe 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -289,7 +289,7 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ - +- Bug in :func:`concat` incorrectly casting to ``object`` dtype in some cases when one or more of the operands is empty (:issue:`38843`) - - From 073dfd2d658073c72a710ac3807b0ba407dd1d67 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 31 Dec 2020 15:23:29 -0800 Subject: [PATCH 3/5] fix dtype=None case --- pandas/tests/indexing/test_partial.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 39bc2476a2cf5..d8dd08ea13341 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -183,7 +183,8 @@ def test_loc_setitem_expanding_empty(self, dtype): df.loc[3] = value exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype=dtype) - exp = exp.astype(dtype) + if dtype is not None: + exp = exp.astype(dtype) tm.assert_frame_equal(df, exp) def test_series_partial_set(self): From b18ce62ea67f5d489e4517785e9395fe5adb75a5 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 31 Dec 2020 18:43:03 -0800 Subject: [PATCH 4/5] typo fixup --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index a97223cd931be..cd656d1052d82 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -292,7 +292,7 @@ Reshaping ^^^^^^^^^ - Bug in :meth:`DataFrame.unstack` with missing levels led to incorrect index names (:issue:`37510`- Bug in :func:`concat` incorrectly casting to ``object`` dtype in some cases when one or more of the operands is empty (:issue:`38843`) - -) + Sparse ^^^^^^ From 9be5dfbea92c435623cf0fe68590f089fc0efa45 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 1 Jan 2021 13:28:31 -0800 Subject: [PATCH 5/5] typo fixup --- doc/source/whatsnew/v1.3.0.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 3713b53e62c47..cae04af1ac2c4 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -293,7 +293,8 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ -- Bug in :meth:`DataFrame.unstack` with missing levels led to incorrect index names (:issue:`37510`- Bug in :func:`concat` incorrectly casting to ``object`` dtype in some cases when one or more of the operands is empty (:issue:`38843`) +- Bug in :meth:`DataFrame.unstack` with missing levels led to incorrect index names (:issue:`37510`) +- Bug in :func:`concat` incorrectly casting to ``object`` dtype in some cases when one or more of the operands is empty (:issue:`38843`) -