diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index e28a1a2326d17..1b0a045dca180 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1904,6 +1904,23 @@ def std( # Constructor Helpers +def sequence_to_datetimes( + data, allow_object: bool = False +) -> Union[np.ndarray, DatetimeArray]: + """ + Parse/convert the passed data to either DatetimeArray or np.ndarray[object]. + """ + result, tz, freq = sequence_to_dt64ns( + data, allow_object=allow_object, allow_mixed=True + ) + if result.dtype == object: + return result + + dtype = tz_to_dtype(tz) + dta = DatetimeArray._simple_new(result, freq=freq, dtype=dtype) + return dta + + def sequence_to_dt64ns( data, dtype=None, @@ -1912,6 +1929,9 @@ def sequence_to_dt64ns( dayfirst=False, yearfirst=False, ambiguous="raise", + *, + allow_object: bool = False, + allow_mixed: bool = False, ): """ Parameters @@ -1924,6 +1944,11 @@ def sequence_to_dt64ns( yearfirst : bool, default False ambiguous : str, bool, or arraylike, default 'raise' See pandas._libs.tslibs.tzconversion.tz_localize_to_utc. + allow_object : bool, default False + Whether to return an object-dtype ndarray instead of raising if the + data contains more than one timezone. + allow_mixed : bool, default False + Interpret integers as timestamps when datetime objects are also present. Returns ------- @@ -1987,7 +2012,11 @@ def sequence_to_dt64ns( # data comes back here as either i8 to denote UTC timestamps # or M8[ns] to denote wall times data, inferred_tz = objects_to_datetime64ns( - data, dayfirst=dayfirst, yearfirst=yearfirst + data, + dayfirst=dayfirst, + yearfirst=yearfirst, + allow_object=allow_object, + allow_mixed=allow_mixed, ) if tz and inferred_tz: # two timezones: convert to intended from base UTC repr @@ -1995,6 +2024,9 @@ def sequence_to_dt64ns( data = data.view(DT64NS_DTYPE) elif inferred_tz: tz = inferred_tz + elif allow_object and data.dtype == object: + # We encountered mixed-timezones. + return data, None, None data_dtype = data.dtype @@ -2053,6 +2085,7 @@ def objects_to_datetime64ns( errors="raise", require_iso8601=False, allow_object=False, + allow_mixed: bool = False, ): """ Convert data to array of timestamps. @@ -2069,6 +2102,8 @@ def objects_to_datetime64ns( allow_object : bool Whether to return an object-dtype ndarray instead of raising if the data contains more than one timezone. + allow_mixed : bool, default False + Interpret integers as timestamps when datetime objects are also present. Returns ------- @@ -2097,6 +2132,7 @@ def objects_to_datetime64ns( dayfirst=dayfirst, yearfirst=yearfirst, require_iso8601=require_iso8601, + allow_mixed=allow_mixed, ) result = result.reshape(data.shape, order=order) except ValueError as err: @@ -2133,7 +2169,7 @@ def objects_to_datetime64ns( raise TypeError(result) -def maybe_convert_dtype(data, copy): +def maybe_convert_dtype(data, copy: bool): """ Convert data based on dtype conventions, issuing deprecation warnings or errors where appropriate. diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index d456f9c56e309..f5ec6616659ba 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -29,10 +29,7 @@ import numpy as np -from pandas._libs import ( - lib, - tslib, -) +from pandas._libs import lib from pandas._libs.tslibs import ( NaT, OutOfBoundsDatetime, @@ -1605,8 +1602,8 @@ def maybe_cast_to_datetime( try to cast the array/value to a datetimelike dtype, converting float nan to iNaT """ + from pandas.core.arrays.datetimes import sequence_to_datetimes from pandas.core.arrays.timedeltas import sequence_to_td64ns - from pandas.core.tools.datetimes import to_datetime if not is_list_like(value): raise TypeError("value must be listlike") @@ -1664,19 +1661,19 @@ def maybe_cast_to_datetime( try: if is_datetime64: - dti = to_datetime(value, errors="raise") + dta = sequence_to_datetimes(value, allow_object=False) # GH 25843: Remove tz information since the dtype # didn't specify one - if dti.tz is not None: - dti = dti.tz_localize(None) - value = dti._values + if dta.tz is not None: + dta = dta.tz_localize(None) + value = dta elif is_datetime64tz: # The string check can be removed once issue #13712 # is solved. String data that is passed with a # datetime64tz is assumed to be naive which should # be localized to the timezone. is_dt_string = is_string_dtype(value.dtype) - dta = to_datetime(value, errors="raise").array + dta = sequence_to_datetimes(value, allow_object=False) if dta.tz is not None: value = dta.astype(dtype, copy=False) elif is_dt_string: @@ -1691,24 +1688,10 @@ def maybe_cast_to_datetime( value, _ = sequence_to_td64ns(value) except OutOfBoundsDatetime: raise - except ValueError as err: + except ValueError: # TODO(GH#40048): only catch dateutil's ParserError # once we can reliably import it in all supported versions - if "mixed datetimes and integers in passed array" in str(err): - # We need to catch this in array_to_datetime, otherwise - # we end up going through numpy which will lose nanoseconds - # from Timestamps - try: - i8vals, tz = tslib.array_to_datetime( - value, allow_mixed=True - ) - except ValueError: - pass - else: - from pandas.core.arrays import DatetimeArray - - dta = DatetimeArray(i8vals).tz_localize(tz) - value = dta + pass # coerce datetimelike to object elif is_datetime64_dtype(