diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 5352ca53e1b54..31b6935e9b2ba 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -455,3 +455,53 @@ def get_blkno_placements(blknos, group: bool = True): for blkno, indexer in get_blkno_indexers(blknos, group): yield blkno, BlockPlacement(indexer) + + +@cython.freelist(64) +cdef class Block: + """ + Defining __init__ in a cython class significantly improves performance. + """ + cdef: + public BlockPlacement _mgr_locs + readonly int ndim + public object values + + def __cinit__(self, values, placement: BlockPlacement, ndim: int): + """ + Parameters + ---------- + values : np.ndarray or ExtensionArray + We assume maybe_coerce_values has already been called. + placement : BlockPlacement + ndim : int + 1 for SingleBlockManager/Series, 2 for BlockManager/DataFrame + """ + self._mgr_locs = placement + self.ndim = ndim + self.values = values + + cpdef __reduce__(self): + # We have to do some gymnastics b/c "ndim" is keyword-only + from functools import partial + + from pandas.core.internals.blocks import new_block + + args = (self.values, self.mgr_locs.indexer) + func = partial(new_block, ndim=self.ndim) + return func, args + + cpdef __setstate__(self, state): + from pandas.core.construction import extract_array + + self.mgr_locs = BlockPlacement(state[0]) + self.values = extract_array(state[1], extract_numpy=True) + if len(state) > 2: + # we stored ndim + self.ndim = state[2] + else: + # older pickle + from pandas.core.internals.api import maybe_infer_ndim + + ndim = maybe_infer_ndim(self.values, self.mgr_locs) + self.ndim = ndim diff --git a/pandas/core/internals/api.py b/pandas/core/internals/api.py index aab8273b1e213..d6b76510c68ab 100644 --- a/pandas/core/internals/api.py +++ b/pandas/core/internals/api.py @@ -59,13 +59,13 @@ def make_block( if not isinstance(placement, BlockPlacement): placement = BlockPlacement(placement) - ndim = _maybe_infer_ndim(values, placement, ndim) + ndim = maybe_infer_ndim(values, placement, ndim) check_ndim(values, placement, ndim) values = maybe_coerce_values(values) return klass(values, ndim=ndim, placement=placement) -def _maybe_infer_ndim(values, placement: BlockPlacement, ndim: Optional[int]) -> int: +def maybe_infer_ndim(values, placement: BlockPlacement, ndim: Optional[int]) -> int: """ If `ndim` is not provided, infer it from placment and values. """ diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index e4c89f2e63822..4d1006e5ab65b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -146,7 +146,7 @@ def newfunc(self, *args, **kwargs) -> List[Block]: return cast(F, newfunc) -class Block(PandasObject): +class Block(libinternals.Block, PandasObject): """ Canonical n-dimensional unit of homogeneous dtype contained in a pandas data structure @@ -156,7 +156,7 @@ class Block(PandasObject): values: Union[np.ndarray, ExtensionArray] - __slots__ = ["_mgr_locs", "values", "ndim"] + __slots__ = () is_numeric = False is_bool = False is_object = False @@ -164,35 +164,6 @@ class Block(PandasObject): _can_consolidate = True _validate_ndim = True - @classmethod - def _simple_new( - cls, values: ArrayLike, placement: BlockPlacement, ndim: int - ) -> Block: - """ - Fastpath constructor, does *no* validation - """ - obj = object.__new__(cls) - obj.ndim = ndim - obj.values = values - obj._mgr_locs = placement - return obj - - def __init__(self, values, placement: BlockPlacement, ndim: int): - """ - Parameters - ---------- - values : np.ndarray or ExtensionArray - We assume maybe_coerce_values has already been called. - placement : BlockPlacement (or castable) - ndim : int - 1 for SingleBlockManager/Series, 2 for BlockManager/DataFrame - """ - assert isinstance(ndim, int) - assert isinstance(placement, BlockPlacement) - self.ndim = ndim - self._mgr_locs = placement - self.values = values - @final @property def _consolidate_key(self): @@ -277,7 +248,6 @@ def mgr_locs(self) -> BlockPlacement: @mgr_locs.setter def mgr_locs(self, new_mgr_locs: BlockPlacement): - assert isinstance(new_mgr_locs, BlockPlacement) self._mgr_locs = new_mgr_locs @final @@ -322,16 +292,6 @@ def __repr__(self) -> str: def __len__(self) -> int: return len(self.values) - @final - def __getstate__(self): - return self.mgr_locs.indexer, self.values - - @final - def __setstate__(self, state): - self.mgr_locs = libinternals.BlockPlacement(state[0]) - self.values = extract_array(state[1], extract_numpy=True) - self.ndim = self.values.ndim - def _slice(self, slicer): """ return a slice of my values """ @@ -352,7 +312,7 @@ def getitem_block(self, slicer) -> Block: if new_values.ndim != self.values.ndim: raise ValueError("Only same dim slicing is allowed") - return type(self)._simple_new(new_values, new_mgr_locs, self.ndim) + return type(self)(new_values, new_mgr_locs, self.ndim) @final def getitem_block_index(self, slicer: slice) -> Block: @@ -364,7 +324,7 @@ def getitem_block_index(self, slicer: slice) -> Block: # error: Invalid index type "Tuple[ellipsis, slice]" for # "Union[ndarray, ExtensionArray]"; expected type "Union[int, slice, ndarray]" new_values = self.values[..., slicer] # type: ignore[index] - return type(self)._simple_new(new_values, self._mgr_locs, ndim=self.ndim) + return type(self)(new_values, self._mgr_locs, ndim=self.ndim) @final def getitem_block_columns(self, slicer, new_mgr_locs: BlockPlacement) -> Block: @@ -378,7 +338,7 @@ def getitem_block_columns(self, slicer, new_mgr_locs: BlockPlacement) -> Block: if new_values.ndim != self.values.ndim: raise ValueError("Only same dim slicing is allowed") - return type(self)._simple_new(new_values, new_mgr_locs, self.ndim) + return type(self)(new_values, new_mgr_locs, self.ndim) @property def shape(self) -> Shape: @@ -1911,7 +1871,7 @@ def set_inplace(self, locs, values): self.values[locs] = values -class DatetimeTZBlock(ExtensionBlock, DatetimeBlock): +class DatetimeTZBlock(ExtensionBlock, DatetimeLikeBlockMixin): """ implement a datetime64 block with a tz attribute """ values: DatetimeArray