Skip to content

REF: separate out seen.nat_ cases in maybe_convert_objects #49541

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
182 changes: 81 additions & 101 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1284,13 +1284,12 @@ cdef class Seen:

@property
def is_bool(self):
return not (self.datetime_ or self.numeric_ or self.timedelta_
or self.nat_)

@property
def is_float_or_complex(self):
return not (self.bool_ or self.datetime_ or self.timedelta_
or self.nat_)
# i.e. not (anything but bool)
return not (
self.datetime_ or self.datetimetz_ or self.timedelta_ or self.nat_
or self.period_ or self.interval_
or self.numeric_ or self.nan_ or self.null_ or self.object_
)


cdef object _try_infer_map(object dtype):
Expand Down Expand Up @@ -2641,104 +2640,83 @@ def maybe_convert_objects(ndarray[object] objects,

seen.object_ = True

if seen.nat_:
if seen.object_:
result = objects
elif seen.bool_:
result = objects
elif seen.null_:
result = objects
elif not safe and seen.nan_:
result = objects
elif seen.numeric_:
result = objects
else:
if convert_datetime and convert_timedelta:
dtype = dtype_if_all_nat
if dtype is not None:
# otherwise we keep object dtype
result = _infer_all_nats(
dtype, objects.shape
)
else:
result = objects
elif convert_datetime:
result = datetimes
elif convert_timedelta:
result = timedeltas
else:
result = objects
return result
return result

if seen.bool_:
if seen.is_bool:
# is_bool property rules out everything else
return bools.view(np.bool_)
seen.object_ = True

if not seen.object_:
result = None
if not safe:
if seen.null_ or seen.nan_:
if seen.is_float_or_complex:
if seen.complex_:
result = complexes
elif seen.float_:
result = floats
elif seen.int_:
if convert_to_nullable_integer:
from pandas.core.arrays import IntegerArray
result = IntegerArray(ints, mask)
else:
result = floats
elif seen.nan_:
if seen.complex_:
result = complexes
elif seen.float_:
result = floats
elif seen.null_ or seen.nan_:
if seen.int_:
if convert_to_nullable_integer:
from pandas.core.arrays import IntegerArray
result = IntegerArray(ints, mask)
else:
result = floats
elif seen.nan_:
result = floats
else:
if not seen.bool_:
if seen.datetime_:
if not seen.numeric_ and not seen.timedelta_:
result = datetimes
elif seen.timedelta_:
if not seen.numeric_:
result = timedeltas
elif seen.nat_:
if not seen.numeric_:
if convert_datetime and convert_timedelta:
dtype = dtype_if_all_nat
if dtype is not None:
# otherwise we keep object dtype
result = _infer_all_nats(
dtype, datetimes, timedeltas
)

elif convert_datetime:
result = datetimes
elif convert_timedelta:
result = timedeltas
if seen.int_:
if seen.uint_:
result = uints
else:
if seen.complex_:
result = complexes
elif seen.float_:
result = floats
elif seen.int_:
if seen.uint_:
result = uints
else:
result = ints
elif seen.is_bool:
result = bools.view(np.bool_)
result = ints

else:
# don't cast int to float, etc.
if seen.null_:
if seen.is_float_or_complex:
if seen.complex_:
if not seen.int_:
result = complexes
elif seen.float_ or seen.nan_:
if not seen.int_:
result = floats
else:
if not seen.bool_:
if seen.datetime_:
if not seen.numeric_ and not seen.timedelta_:
result = datetimes
elif seen.timedelta_:
if not seen.numeric_:
result = timedeltas
elif seen.nat_:
if not seen.numeric_:
if convert_datetime and convert_timedelta:
dtype = dtype_if_all_nat
if dtype is not None:
# otherwise we keep object dtype
result = _infer_all_nats(
dtype, datetimes, timedeltas
)

elif convert_datetime:
result = datetimes
elif convert_timedelta:
result = timedeltas
if seen.int_:
if seen.null_ or seen.nan_ or seen.float_ or seen.complex_:
# we have seen something other than int, so we do not
# convert with safe=True.
pass
else:
if seen.uint_:
result = uints
else:
if seen.complex_:
if not seen.int_:
result = complexes
elif seen.float_ or seen.nan_:
if not seen.int_:
result = floats
elif seen.int_:
if seen.uint_:
result = uints
else:
result = ints
elif seen.is_bool and not seen.nan_:
result = bools.view(np.bool_)
result = ints

else:
if seen.complex_:
result = complexes
elif seen.float_ or seen.nan_:
result = floats

if result is uints or result is ints or result is floats or result is complexes:
# cast to the largest itemsize when all values are NumPy scalars
Expand All @@ -2751,22 +2729,24 @@ def maybe_convert_objects(ndarray[object] objects,
return objects


cdef _infer_all_nats(dtype, ndarray datetimes, ndarray timedeltas):
cdef _infer_all_nats(dtype, cnp.npy_intp* shape):
"""
If we have all-NaT values, cast these to the given dtype.
"""
if cnp.PyArray_DescrCheck(dtype):
# i.e. isinstance(dtype, np.dtype):
if dtype == "M8[ns]":
result = datetimes
elif dtype == "m8[ns]":
result = timedeltas
if dtype == "M8[ns]" or dtype == "m8[ns]":
pass
else:
raise ValueError(dtype)

i8vals = cnp.PyArray_EMPTY(1, shape, cnp.NPY_INT64, 0)
i8vals.fill(NPY_NAT)
result = i8vals.view(dtype)
else:
# ExtensionDtype
cls = dtype.construct_array_type()
i8vals = cnp.PyArray_EMPTY(1, datetimes.shape, cnp.NPY_INT64, 0)
i8vals = cnp.PyArray_EMPTY(1, shape, cnp.NPY_INT64, 0)
i8vals.fill(NPY_NAT)
result = cls(i8vals, dtype=dtype)
return result
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1361,6 +1361,8 @@ def _format_with_header(self, header: list[str_t], na_rep: str_t) -> list[str_t]

if is_object_dtype(values.dtype):
values = cast(np.ndarray, values)
# Only place where we pass safe=True, only needed for
# test_format_missing
values = lib.maybe_convert_objects(values, safe=True)

result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values]
Expand Down