Skip to content

REF: back DatetimeBlock, TimedeltaBlock by DTA/TDA #40456

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Mar 30, 2021
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pandas/core/array_algos/quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,10 @@ def quantile_ea_compat(
assert result.shape == (1,), result.shape
result = type(orig)._from_factorized(result, orig)

elif orig.ndim == 2:
# i.e. DatetimeArray
result = type(orig)._from_factorized(result, orig)

else:
assert result.shape == (1, len(qs)), result.shape
result = type(orig)._from_factorized(result[0], orig)
Expand Down
9 changes: 8 additions & 1 deletion pandas/core/array_algos/take.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import (
TYPE_CHECKING,
Optional,
cast,
overload,
)

Expand All @@ -25,6 +26,7 @@
from pandas.core.construction import ensure_wrapped_if_datetimelike

if TYPE_CHECKING:
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
from pandas.core.arrays.base import ExtensionArray


Expand Down Expand Up @@ -100,7 +102,12 @@ def take_nd(

if not isinstance(arr, np.ndarray):
# i.e. ExtensionArray,
# includes for EA to catch DatetimeArray, TimedeltaArray
if arr.ndim == 2:
# e.g. DatetimeArray, TimedeltArray
arr = cast("NDArrayBackedExtensionArray", arr)
return arr.take(
indexer, fill_value=fill_value, allow_fill=allow_fill, axis=axis
)
return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)

arr = np.asarray(arr)
Expand Down
22 changes: 21 additions & 1 deletion pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@
cache_readonly,
doc,
)
from pandas.util._validators import validate_fillna_kwargs
from pandas.util._validators import (
validate_bool_kwarg,
validate_fillna_kwargs,
)

from pandas.core.dtypes.common import is_dtype_equal
from pandas.core.dtypes.missing import array_equivalent
Expand All @@ -35,6 +38,7 @@
from pandas.core.arrays.base import ExtensionArray
from pandas.core.construction import extract_array
from pandas.core.indexers import check_array_indexer
from pandas.core.sorting import nargminmax

NDArrayBackedExtensionArrayT = TypeVar(
"NDArrayBackedExtensionArrayT", bound="NDArrayBackedExtensionArray"
Expand Down Expand Up @@ -185,6 +189,22 @@ def equals(self, other) -> bool:
def _values_for_argsort(self):
return self._ndarray

# Signature of "argmin" incompatible with supertype "ExtensionArray"
def argmin(self, axis: int = 0, skipna: bool = True): # type:ignore[override]
# override base class by adding axis keyword
validate_bool_kwarg(skipna, "skipna")
if not skipna and self.isna().any():
raise NotImplementedError
return nargminmax(self, "argmin", axis=axis)

# Signature of "argmax" incompatible with supertype "ExtensionArray"
def argmax(self, axis: int = 0, skipna: bool = True): # type:ignore[override]
# override base class by adding axis keyword
validate_bool_kwarg(skipna, "skipna")
if not skipna and self.isna().any():
raise NotImplementedError
return nargminmax(self, "argmax", axis=axis)

def copy(self: NDArrayBackedExtensionArrayT) -> NDArrayBackedExtensionArrayT:
new_data = self._ndarray.copy()
return self._from_backing_data(new_data)
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -9517,6 +9517,9 @@ def func(values: np.ndarray):

def blk_func(values, axis=1):
if isinstance(values, ExtensionArray):
if values.ndim == 2:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isn't this just axis=axis ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure. next pass

# i.e. DatetimeArray, TimedeltaArray
return values._reduce(name, axis=1, skipna=skipna, **kwds)
return values._reduce(name, skipna=skipna, **kwds)
else:
return op(values, axis=axis, skipna=skipna, **kwds)
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
)

import pandas.core.algorithms as algorithms
from pandas.core.arrays import ExtensionArray
from pandas.core.base import SelectionMixin
import pandas.core.common as com
from pandas.core.frame import DataFrame
Expand Down Expand Up @@ -267,7 +268,9 @@ def apply(self, f: F, data: FrameOrSeries, axis: int = 0):
group_keys = self._get_group_keys()
result_values = None

if data.ndim == 2 and np.any(data.dtypes.apply(is_extension_array_dtype)):
if data.ndim == 2 and any(
isinstance(x, ExtensionArray) for x in data._iter_column_arrays()
):
# calling splitter.fast_apply will raise TypeError via apply_frame_axis0
# if we pass EA instead of ndarray
# TODO: can we have a workaround for EAs backed by ndarray?
Expand Down
9 changes: 6 additions & 3 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,9 +494,12 @@ def apply_with_block(self: T, f, align_keys=None, swap_axis=True, **kwargs) -> T
if isinstance(applied, list):
applied = applied[0]
arr = applied.values
if self.ndim == 2:
if isinstance(arr, np.ndarray):
arr = arr[0, :]
if self.ndim == 2 and arr.ndim == 2:
assert len(arr) == 1
# error: Invalid index type "Tuple[int, slice]" for
# "Union[ndarray, ExtensionArray]"; expected type
# "Union[int, slice, ndarray]"
arr = arr[0, :] # type: ignore[index]
result_arrays.append(arr)

return type(self)(result_arrays, self._axes)
Expand Down
82 changes: 39 additions & 43 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
writers,
)
from pandas._libs.internals import BlockPlacement
from pandas._libs.tslibs import conversion
from pandas._typing import (
ArrayLike,
Dtype,
Expand All @@ -45,7 +44,6 @@
maybe_downcast_numeric,
maybe_downcast_to_dtype,
maybe_upcast,
sanitize_to_nanoseconds,
soft_convert_objects,
)
from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -919,7 +917,11 @@ def setitem(self, indexer, value):
return self.coerce_to_target_dtype(value).setitem(indexer, value)

if self.dtype.kind in ["m", "M"]:
arr = self.array_values().T
arr = self.values
if self.ndim > 1:
# Dont transpose with ndim=1 bc we would fail to invalidate
# arr.freq
arr = arr.T
arr[indexer] = value
return self

Expand Down Expand Up @@ -1153,6 +1155,7 @@ def _interpolate_with_fill(
limit_area=limit_area,
)

values = maybe_coerce_values(values)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

prob can have maybe_coerce_values do downcasting (maybe)

blocks = [self.make_block_same_class(values)]
return self._maybe_downcast(blocks, downcast)

Expand Down Expand Up @@ -1208,6 +1211,7 @@ def func(yvalues: np.ndarray) -> np.ndarray:

# interp each column independently
interp_values = np.apply_along_axis(func, axis, data)
interp_values = maybe_coerce_values(interp_values)

blocks = [self.make_block_same_class(interp_values)]
return self._maybe_downcast(blocks, downcast)
Expand Down Expand Up @@ -1790,15 +1794,23 @@ class NDArrayBackedExtensionBlock(HybridMixin, Block):
Block backed by an NDArrayBackedExtensionArray
"""

values: NDArrayBackedExtensionArray

@property
def is_view(self) -> bool:
""" return a boolean if I am possibly a view """
# check the ndarray values of the DatetimeIndex values
return self.values._ndarray.base is not None

def internal_values(self):
# Override to return DatetimeArray and TimedeltaArray
return self.array_values()
return self.values

def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray:
"""
return object dtype as boxed values, such as Timestamps/Timedelta
"""
values = self.array_values()
values = self.values
if is_object_dtype(dtype):
# DTA/TDA constructor and astype can handle 2D
values = values.astype(object)
Expand All @@ -1808,7 +1820,7 @@ def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray:
def iget(self, key):
# GH#31649 we need to wrap scalars in Timestamp/Timedelta
# TODO(EA2D): this can be removed if we ever have 2D EA
return self.array_values().reshape(self.shape)[key]
return self.values.reshape(self.shape)[key]

def putmask(self, mask, new) -> List[Block]:
mask = extract_bool_array(mask)
Expand All @@ -1817,14 +1829,13 @@ def putmask(self, mask, new) -> List[Block]:
return self.astype(object).putmask(mask, new)

# TODO(EA2D): reshape unnecessary with 2D EAs
arr = self.array_values().reshape(self.shape)
arr = cast("NDArrayBackedExtensionArray", arr)
arr = self.values.reshape(self.shape)
arr.T.putmask(mask, new)
return [self]

def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]:
# TODO(EA2D): reshape unnecessary with 2D EAs
arr = self.array_values().reshape(self.shape)
arr = self.values.reshape(self.shape)

cond = extract_bool_array(cond)

Expand All @@ -1835,7 +1846,6 @@ def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]:

# TODO(EA2D): reshape not needed with 2D EAs
res_values = res_values.reshape(self.values.shape)
res_values = maybe_coerce_values(res_values)
nb = self.make_block_same_class(res_values)
return [nb]

Expand All @@ -1860,17 +1870,15 @@ def diff(self, n: int, axis: int = 0) -> List[Block]:
by apply.
"""
# TODO(EA2D): reshape not necessary with 2D EAs
values = self.array_values().reshape(self.shape)
values = self.values.reshape(self.shape)

new_values = values - values.shift(n, axis=axis)
new_values = maybe_coerce_values(new_values)
return [self.make_block(new_values)]

def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> List[Block]:
# TODO(EA2D) this is unnecessary if these blocks are backed by 2D EAs
values = self.array_values().reshape(self.shape)
values = self.values.reshape(self.shape)
new_values = values.shift(periods, fill_value=fill_value, axis=axis)
new_values = maybe_coerce_values(new_values)
return [self.make_block_same_class(new_values)]

def fillna(
Expand All @@ -1883,38 +1891,36 @@ def fillna(
# TODO: don't special-case td64
return self.astype(object).fillna(value, limit, inplace, downcast)

values = self.array_values()
values = self.values
values = values if inplace else values.copy()
new_values = values.fillna(value=value, limit=limit)
new_values = maybe_coerce_values(new_values)
return [self.make_block_same_class(values=new_values)]


class DatetimeLikeBlockMixin(NDArrayBackedExtensionBlock):
"""Mixin class for DatetimeBlock, DatetimeTZBlock, and TimedeltaBlock."""

values: Union[DatetimeArray, TimedeltaArray]

is_numeric = False
_can_hold_na = True

def array_values(self):
return ensure_wrapped_if_datetimelike(self.values)
return self.values

def external_values(self):
# NB: for dt64tz this is different from np.asarray(self.values),
# since that return an object-dtype ndarray of Timestamps.
return self.values._ndarray

@property
def _holder(self):
return type(self.array_values())
return type(self.values)


class DatetimeBlock(DatetimeLikeBlockMixin):
__slots__ = ()

def set_inplace(self, locs, values):
"""
See Block.set.__doc__
"""
values = conversion.ensure_datetime64ns(values, copy=False)

self.values[locs] = values


class DatetimeTZBlock(ExtensionBlock, DatetimeBlock):
""" implement a datetime64 block with a tz attribute """
Expand All @@ -1932,20 +1938,14 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock):
where = DatetimeBlock.where
putmask = DatetimeLikeBlockMixin.putmask
fillna = DatetimeLikeBlockMixin.fillna
external_values = DatetimeLikeBlockMixin.external_values

array_values = ExtensionBlock.array_values
# error: Incompatible types in assignment (expression has type
# "Callable[[NDArrayBackedExtensionBlock], bool]", base class "ExtensionBlock"
# defined the type as "bool") [assignment]
is_view = NDArrayBackedExtensionBlock.is_view # type: ignore[assignment]

@property
def is_view(self) -> bool:
""" return a boolean if I am possibly a view """
# check the ndarray values of the DatetimeIndex values
return self.values._data.base is not None

def external_values(self):
# NB: this is different from np.asarray(self.values), since that
# return an object-dtype ndarray of Timestamps.
# Avoid FutureWarning in .astype in casting from dt64tz to dt64
return self.values._data
array_values = ExtensionBlock.array_values


class TimeDeltaBlock(DatetimeLikeBlockMixin):
Expand Down Expand Up @@ -2079,15 +2079,11 @@ def maybe_coerce_values(values) -> ArrayLike:
values = extract_array(values, extract_numpy=True)

if isinstance(values, np.ndarray):
values = sanitize_to_nanoseconds(values)
values = ensure_wrapped_if_datetimelike(values)

if issubclass(values.dtype.type, str):
values = np.array(values, dtype=object)

elif isinstance(values.dtype, np.dtype):
# i.e. not datetime64tz, extract DTA/TDA -> ndarray
values = values._data

return values


Expand Down
11 changes: 9 additions & 2 deletions pandas/core/internals/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,10 +422,17 @@ def _concatenate_join_units(
concat_values = concat_values.copy()
else:
concat_values = concat_values.copy()
elif any(isinstance(t, ExtensionArray) for t in to_concat):
elif any(isinstance(t, ExtensionArray) and t.ndim == 1 for t in to_concat):
# concatting with at least one EA means we are concatting a single column
# the non-EA values are 2D arrays with shape (1, n)
to_concat = [t if isinstance(t, ExtensionArray) else t[0, :] for t in to_concat]
# error: Invalid index type "Tuple[int, slice]" for
# "Union[ExtensionArray, ndarray]"; expected type "Union[int, slice, ndarray]"
to_concat = [
t
if (isinstance(t, ExtensionArray) and t.ndim == 1)
else t[0, :] # type: ignore[index]
for t in to_concat
]
concat_values = concat_compat(to_concat, axis=0, ea_compat_axis=True)
concat_values = ensure_block_shape(concat_values, 2)

Expand Down
Loading