Skip to content

BUG: inconsistent concat casting EA vs non-EA #38843

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jan 1, 2021
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -293,10 +293,11 @@ Groupby/resample/rolling

Reshaping
^^^^^^^^^

- Bug in :meth:`DataFrame.unstack` with missing levels led to incorrect index names (:issue:`37510`)
- Bug in :func:`concat` incorrectly casting to ``object`` dtype in some cases when one or more of the operands is empty (:issue:`38843`)
-


Sparse
^^^^^^

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def is_nonempty(x) -> bool:
# marginal given that it would still require shape & dtype calculation and
# np.concatenate which has them both implemented is compiled.
non_empties = [x for x in to_concat if is_nonempty(x)]
if non_empties and axis == 0:
if non_empties:
to_concat = non_empties

typs = _get_dtype_kinds(to_concat)
Expand Down
16 changes: 13 additions & 3 deletions pandas/tests/indexing/test_partial.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,11 +170,21 @@ def test_partial_setting_mixed_dtype(self):
with pytest.raises(ValueError, match=msg):
df.loc[0] = [1, 2, 3]

# TODO: #15657, these are left as object and not coerced
@pytest.mark.parametrize("dtype", [None, "int64", "Int64"])
def test_loc_setitem_expanding_empty(self, dtype):
df = DataFrame(columns=["A", "B"])
df.loc[3] = [6, 7]

exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype="object")
value = [6, 7]
if dtype == "int64":
value = np.array(value, dtype=dtype)
elif dtype == "Int64":
value = pd.array(value, dtype=dtype)

df.loc[3] = value

exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype=dtype)
if dtype is not None:
exp = exp.astype(dtype)
tm.assert_frame_equal(df, exp)

def test_series_partial_set(self):
Expand Down
7 changes: 4 additions & 3 deletions pandas/tests/reshape/concat/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,11 +474,12 @@ def test_concat_will_upcast(dt, pdt):
assert x.values.dtype == "float64"


def test_concat_empty_and_non_empty_frame_regression():
@pytest.mark.parametrize("dtype", ["int64", "Int64"])
def test_concat_empty_and_non_empty_frame_regression(dtype):
# GH 18178 regression test
df1 = DataFrame({"foo": [1]})
df1 = DataFrame({"foo": [1]}).astype(dtype)
df2 = DataFrame({"foo": []})
expected = DataFrame({"foo": [1.0]})
expected = df1
result = pd.concat([df1, df2])
tm.assert_frame_equal(result, expected)

Expand Down
7 changes: 5 additions & 2 deletions pandas/tests/reshape/concat/test_empty.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,12 +202,15 @@ def test_concat_empty_series_dtypes_sparse(self):
expected = pd.SparseDtype("object")
assert result.dtype == expected

def test_concat_empty_df_object_dtype(self):
@pytest.mark.parametrize("dtype", ["int64", "Int64"])
def test_concat_empty_df_object_dtype(self, dtype):
# GH 9149
df_1 = DataFrame({"Row": [0, 1, 1], "EmptyCol": np.nan, "NumberCol": [1, 2, 3]})
df_1["Row"] = df_1["Row"].astype(dtype)
df_2 = DataFrame(columns=df_1.columns)
result = pd.concat([df_1, df_2], axis=0)
expected = df_1.astype(object)
expected = df_1.copy()
expected["EmptyCol"] = expected["EmptyCol"].astype(object) # TODO: why?
tm.assert_frame_equal(result, expected)

def test_concat_empty_dataframe_dtypes(self):
Expand Down