Skip to content

Commit 6509028

Browse files
BUG: DataFrame.append with empty DataFrame and Series with tz-aware datetime value allocated object column (#35038)
1 parent d396111 commit 6509028

File tree

4 files changed

+22
-14
lines changed

4 files changed

+22
-14
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -922,6 +922,7 @@ Datetimelike
922922
resolution which converted to object dtype instead of coercing to ``datetime64[ns]``
923923
dtype when within the timestamp bounds (:issue:`34843`).
924924
- The ``freq`` keyword in :class:`Period`, :func:`date_range`, :func:`period_range`, :func:`pd.tseries.frequencies.to_offset` no longer allows tuples, pass as string instead (:issue:`34703`)
925+
- Bug in :meth:`DataFrame.append` when appending a :class:`Series` containing a scalar tz-aware :class:`Timestamp` to an empty :class:`DataFrame` resulted in an object column instead of datetime64[ns, tz] dtype (:issue:`35038`)
925926
- ``OutOfBoundsDatetime`` issues an improved error message when timestamp is out of implementation bounds. (:issue:`32967`)
926927

927928
Timedelta

pandas/core/dtypes/concat.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -152,11 +152,11 @@ def is_nonempty(x) -> bool:
152152
target_dtype = find_common_type([x.dtype for x in to_concat])
153153
to_concat = [_cast_to_common_type(arr, target_dtype) for arr in to_concat]
154154

155-
if isinstance(to_concat[0], ExtensionArray):
155+
if isinstance(to_concat[0], ExtensionArray) and axis == 0:
156156
cls = type(to_concat[0])
157157
return cls._concat_same_type(to_concat)
158158
else:
159-
return np.concatenate(to_concat)
159+
return np.concatenate(to_concat, axis=axis)
160160

161161
elif _contains_datetime or "timedelta" in typs:
162162
return concat_datetime(to_concat, axis=axis, typs=typs)

pandas/core/internals/concat.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ def _concatenate_join_units(join_units, concat_axis, copy):
333333
# concatting with at least one EA means we are concatting a single column
334334
# the non-EA values are 2D arrays with shape (1, n)
335335
to_concat = [t if isinstance(t, ExtensionArray) else t[0, :] for t in to_concat]
336-
concat_values = concat_compat(to_concat, axis=concat_axis)
336+
concat_values = concat_compat(to_concat, axis=0)
337337
if not isinstance(concat_values, ExtensionArray):
338338
# if the result of concat is not an EA but an ndarray, reshape to
339339
# 2D to put it a non-EA Block

pandas/tests/reshape/test_concat.py

+18-11
Original file line numberDiff line numberDiff line change
@@ -1087,20 +1087,27 @@ def test_append_empty_frame_to_series_with_dateutil_tz(self):
10871087
date = Timestamp("2018-10-24 07:30:00", tz=dateutil.tz.tzutc())
10881088
s = Series({"date": date, "a": 1.0, "b": 2.0})
10891089
df = DataFrame(columns=["c", "d"])
1090-
result = df.append(s, ignore_index=True)
1091-
# n.b. it's not clear to me that expected is correct here.
1092-
# It's possible that the `date` column should have
1093-
# datetime64[ns, tz] dtype for both result and expected.
1094-
# that would be more consistent with new columns having
1095-
# their own dtype (float for a and b, datetime64ns, tz for date).
1090+
result_a = df.append(s, ignore_index=True)
10961091
expected = DataFrame(
1097-
[[np.nan, np.nan, 1.0, 2.0, date]],
1098-
columns=["c", "d", "a", "b", "date"],
1099-
dtype=object,
1092+
[[np.nan, np.nan, 1.0, 2.0, date]], columns=["c", "d", "a", "b", "date"]
11001093
)
11011094
# These columns get cast to object after append
1102-
expected["a"] = expected["a"].astype(float)
1103-
expected["b"] = expected["b"].astype(float)
1095+
expected["c"] = expected["c"].astype(object)
1096+
expected["d"] = expected["d"].astype(object)
1097+
tm.assert_frame_equal(result_a, expected)
1098+
1099+
expected = DataFrame(
1100+
[[np.nan, np.nan, 1.0, 2.0, date]] * 2, columns=["c", "d", "a", "b", "date"]
1101+
)
1102+
expected["c"] = expected["c"].astype(object)
1103+
expected["d"] = expected["d"].astype(object)
1104+
1105+
result_b = result_a.append(s, ignore_index=True)
1106+
tm.assert_frame_equal(result_b, expected)
1107+
1108+
# column order is different
1109+
expected = expected[["c", "d", "date", "a", "b"]]
1110+
result = df.append([s, s], ignore_index=True)
11041111
tm.assert_frame_equal(result, expected)
11051112

11061113

0 commit comments

Comments
 (0)