BUG: DataFrame.append with empty DataFrame and Series with tz-aware datetime value allocated object column (#35038)

simonjayhawkins · web-flow · commit 650902866086 · 2020-07-16T18:50:38.000-04:00
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -922,6 +922,7 @@ Datetimelike
   resolution which converted to object dtype instead of coercing to ``datetime64[ns]``
   dtype when within the timestamp bounds (:issue:`34843`).
 - The ``freq`` keyword in :class:`Period`, :func:`date_range`, :func:`period_range`, :func:`pd.tseries.frequencies.to_offset` no longer allows tuples, pass as string instead (:issue:`34703`)
+- Bug in :meth:`DataFrame.append` when appending a :class:`Series` containing a scalar tz-aware :class:`Timestamp` to an empty :class:`DataFrame` resulted in an object column instead of datetime64[ns, tz] dtype (:issue:`35038`)
 - ``OutOfBoundsDatetime`` issues an improved error message when timestamp is out of implementation bounds. (:issue:`32967`)
 
 Timedelta
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
@@ -152,11 +152,11 @@ def is_nonempty(x) -> bool:
             target_dtype = find_common_type([x.dtype for x in to_concat])
             to_concat = [_cast_to_common_type(arr, target_dtype) for arr in to_concat]
 
-        if isinstance(to_concat[0], ExtensionArray):
+        if isinstance(to_concat[0], ExtensionArray) and axis == 0:
             cls = type(to_concat[0])
             return cls._concat_same_type(to_concat)
         else:
-            return np.concatenate(to_concat)
+            return np.concatenate(to_concat, axis=axis)
 
     elif _contains_datetime or "timedelta" in typs:
         return concat_datetime(to_concat, axis=axis, typs=typs)
diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
@@ -333,7 +333,7 @@ def _concatenate_join_units(join_units, concat_axis, copy):
         # concatting with at least one EA means we are concatting a single column
         # the non-EA values are 2D arrays with shape (1, n)
         to_concat = [t if isinstance(t, ExtensionArray) else t[0, :] for t in to_concat]
-        concat_values = concat_compat(to_concat, axis=concat_axis)
+        concat_values = concat_compat(to_concat, axis=0)
         if not isinstance(concat_values, ExtensionArray):
             # if the result of concat is not an EA but an ndarray, reshape to
             # 2D to put it a non-EA Block
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
@@ -1087,20 +1087,27 @@ def test_append_empty_frame_to_series_with_dateutil_tz(self):
         date = Timestamp("2018-10-24 07:30:00", tz=dateutil.tz.tzutc())
         s = Series({"date": date, "a": 1.0, "b": 2.0})
         df = DataFrame(columns=["c", "d"])
-        result = df.append(s, ignore_index=True)
-        # n.b. it's not clear to me that expected is correct here.
-        # It's possible that the `date` column should have
-        # datetime64[ns, tz] dtype for both result and expected.
-        # that would be more consistent with new columns having
-        # their own dtype (float for a and b, datetime64ns, tz for date).
+        result_a = df.append(s, ignore_index=True)
         expected = DataFrame(
-            [[np.nan, np.nan, 1.0, 2.0, date]],
-            columns=["c", "d", "a", "b", "date"],
-            dtype=object,
+            [[np.nan, np.nan, 1.0, 2.0, date]], columns=["c", "d", "a", "b", "date"]
         )
         # These columns get cast to object after append
-        expected["a"] = expected["a"].astype(float)
-        expected["b"] = expected["b"].astype(float)
+        expected["c"] = expected["c"].astype(object)
+        expected["d"] = expected["d"].astype(object)
+        tm.assert_frame_equal(result_a, expected)
+
+        expected = DataFrame(
+            [[np.nan, np.nan, 1.0, 2.0, date]] * 2, columns=["c", "d", "a", "b", "date"]
+        )
+        expected["c"] = expected["c"].astype(object)
+        expected["d"] = expected["d"].astype(object)
+
+        result_b = result_a.append(s, ignore_index=True)
+        tm.assert_frame_equal(result_b, expected)
+
+        # column order is different
+        expected = expected[["c", "d", "date", "a", "b"]]
+        result = df.append([s, s], ignore_index=True)
         tm.assert_frame_equal(result, expected)