Skip to content

REF: move mixed-int handling from maybe_cast_to_datetime #40187

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 38 additions & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1904,6 +1904,23 @@ def std(
# Constructor Helpers


def sequence_to_datetimes(
data, allow_object: bool = False
) -> Union[np.ndarray, DatetimeArray]:
"""
Parse/convert the passed data to either DatetimeArray or np.ndarray[object].
"""
result, tz, freq = sequence_to_dt64ns(
data, allow_object=allow_object, allow_mixed=True
)
if result.dtype == object:
return result

dtype = tz_to_dtype(tz)
dta = DatetimeArray._simple_new(result, freq=freq, dtype=dtype)
return dta


def sequence_to_dt64ns(
data,
dtype=None,
Expand All @@ -1912,6 +1929,9 @@ def sequence_to_dt64ns(
dayfirst=False,
yearfirst=False,
ambiguous="raise",
*,
allow_object: bool = False,
allow_mixed: bool = False,
):
"""
Parameters
Expand All @@ -1924,6 +1944,11 @@ def sequence_to_dt64ns(
yearfirst : bool, default False
ambiguous : str, bool, or arraylike, default 'raise'
See pandas._libs.tslibs.tzconversion.tz_localize_to_utc.
allow_object : bool, default False
Whether to return an object-dtype ndarray instead of raising if the
data contains more than one timezone.
allow_mixed : bool, default False
Interpret integers as timestamps when datetime objects are also present.

Returns
-------
Expand Down Expand Up @@ -1987,14 +2012,21 @@ def sequence_to_dt64ns(
# data comes back here as either i8 to denote UTC timestamps
# or M8[ns] to denote wall times
data, inferred_tz = objects_to_datetime64ns(
data, dayfirst=dayfirst, yearfirst=yearfirst
data,
dayfirst=dayfirst,
yearfirst=yearfirst,
allow_object=allow_object,
allow_mixed=allow_mixed,
)
if tz and inferred_tz:
# two timezones: convert to intended from base UTC repr
data = tzconversion.tz_convert_from_utc(data.view("i8"), tz)
data = data.view(DT64NS_DTYPE)
elif inferred_tz:
tz = inferred_tz
elif allow_object and data.dtype == object:
# We encountered mixed-timezones.
return data, None, None

data_dtype = data.dtype

Expand Down Expand Up @@ -2053,6 +2085,7 @@ def objects_to_datetime64ns(
errors="raise",
require_iso8601=False,
allow_object=False,
allow_mixed: bool = False,
):
"""
Convert data to array of timestamps.
Expand All @@ -2069,6 +2102,8 @@ def objects_to_datetime64ns(
allow_object : bool
Whether to return an object-dtype ndarray instead of raising if the
data contains more than one timezone.
allow_mixed : bool, default False
Interpret integers as timestamps when datetime objects are also present.

Returns
-------
Expand Down Expand Up @@ -2097,6 +2132,7 @@ def objects_to_datetime64ns(
dayfirst=dayfirst,
yearfirst=yearfirst,
require_iso8601=require_iso8601,
allow_mixed=allow_mixed,
)
result = result.reshape(data.shape, order=order)
except ValueError as err:
Expand Down Expand Up @@ -2133,7 +2169,7 @@ def objects_to_datetime64ns(
raise TypeError(result)


def maybe_convert_dtype(data, copy):
def maybe_convert_dtype(data, copy: bool):
"""
Convert data based on dtype conventions, issuing deprecation warnings
or errors where appropriate.
Expand Down
35 changes: 9 additions & 26 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,7 @@

import numpy as np

from pandas._libs import (
lib,
tslib,
)
from pandas._libs import lib
from pandas._libs.tslibs import (
NaT,
OutOfBoundsDatetime,
Expand Down Expand Up @@ -1605,8 +1602,8 @@ def maybe_cast_to_datetime(
try to cast the array/value to a datetimelike dtype, converting float
nan to iNaT
"""
from pandas.core.arrays.datetimes import sequence_to_datetimes
from pandas.core.arrays.timedeltas import sequence_to_td64ns
from pandas.core.tools.datetimes import to_datetime

if not is_list_like(value):
raise TypeError("value must be listlike")
Expand Down Expand Up @@ -1664,19 +1661,19 @@ def maybe_cast_to_datetime(

try:
if is_datetime64:
dti = to_datetime(value, errors="raise")
dta = sequence_to_datetimes(value, allow_object=False)
# GH 25843: Remove tz information since the dtype
# didn't specify one
if dti.tz is not None:
dti = dti.tz_localize(None)
value = dti._values
if dta.tz is not None:
dta = dta.tz_localize(None)
value = dta
elif is_datetime64tz:
# The string check can be removed once issue #13712
# is solved. String data that is passed with a
# datetime64tz is assumed to be naive which should
# be localized to the timezone.
is_dt_string = is_string_dtype(value.dtype)
dta = to_datetime(value, errors="raise").array
dta = sequence_to_datetimes(value, allow_object=False)
if dta.tz is not None:
value = dta.astype(dtype, copy=False)
elif is_dt_string:
Expand All @@ -1691,24 +1688,10 @@ def maybe_cast_to_datetime(
value, _ = sequence_to_td64ns(value)
except OutOfBoundsDatetime:
raise
except ValueError as err:
except ValueError:
# TODO(GH#40048): only catch dateutil's ParserError
# once we can reliably import it in all supported versions
if "mixed datetimes and integers in passed array" in str(err):
# We need to catch this in array_to_datetime, otherwise
# we end up going through numpy which will lose nanoseconds
# from Timestamps
try:
i8vals, tz = tslib.array_to_datetime(
value, allow_mixed=True
)
except ValueError:
pass
else:
from pandas.core.arrays import DatetimeArray

dta = DatetimeArray(i8vals).tz_localize(tz)
value = dta
pass

# coerce datetimelike to object
elif is_datetime64_dtype(
Expand Down