From 2cd0f716b074bf3e4add185bb9ffc3345cdf1713 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 3 Mar 2020 12:15:24 -0800 Subject: [PATCH 1/4] TYP: enforce annotation on SingleBlockManager.__init__ --- pandas/core/internals/managers.py | 68 +++++++++---------- pandas/core/series.py | 4 +- pandas/tests/extension/test_external_block.py | 5 +- 3 files changed, 39 insertions(+), 38 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index c0e32066a8a70..f568b1dce2b9f 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -8,7 +8,7 @@ import numpy as np from pandas._libs import Timedelta, Timestamp, internals as libinternals, lib -from pandas._typing import DtypeObj, Label +from pandas._typing import ArrayLike, DtypeObj, Label from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import ( @@ -154,6 +154,7 @@ def make_empty(self, axes=None) -> "BlockManager": if self.ndim == 1: assert isinstance(self, SingleBlockManager) # for mypy blocks = np.array([], dtype=self.array_dtype) + blocks = make_block(blocks, placement=slice(0, 0), ndim=1) else: blocks = [] return type(self)(blocks, axes) @@ -426,7 +427,11 @@ def apply(self, f, filter=None, **kwargs) -> "BlockManager": if len(result_blocks) == 0: return self.make_empty(self.axes) - bm = type(self)(result_blocks, self.axes, do_integrity_check=False) + if self.ndim == 1: + assert len(result_blocks) == 1 + bm = type(self)(result_blocks[0], self.axes) # type: ignore + else: + bm = type(self)(result_blocks, self.axes, do_integrity_check=False) return bm def quantile( @@ -626,8 +631,12 @@ def comp(s, regex=False): rb = new_rb result_blocks.extend(rb) - bm = type(self)(result_blocks, self.axes) - bm._consolidate_inplace() + if self.ndim == 1: + assert len(result_blocks) == 1 + bm = type(self)(result_blocks[0], self.axes) + else: + bm = type(self)(result_blocks, self.axes) + bm._consolidate_inplace() return bm def is_consolidated(self) -> bool: @@ -715,6 +724,9 @@ def combine(self, blocks: List[Block], copy: bool = True) -> "BlockManager": axes = list(self.axes) axes[0] = self.items.take(indexer) + if self.ndim == 1: + assert len(new_blocks) == 1 + return type(self)(new_blocks[0], axes) # type: ignore return type(self)(new_blocks, axes, do_integrity_check=False) def get_slice(self, slobj: slice, axis: int = 0) -> "BlockManager": @@ -1263,6 +1275,9 @@ def reindex_indexer( new_axes = list(self.axes) new_axes[axis] = new_axis + if self.ndim == 1: + assert len(new_blocks) == 1 + return type(self)(new_blocks[0], new_axes) return type(self)(new_blocks, new_axes) def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None): @@ -1464,6 +1479,8 @@ def __init__( do_integrity_check: bool = False, fastpath: bool = False, ): + assert isinstance(block, Block), type(block) + if isinstance(axis, list): if len(axis) != 1: raise ValueError( @@ -1474,39 +1491,19 @@ def __init__( # passed from constructor, single block, single axis if fastpath: self.axes = [axis] - if isinstance(block, list): - - # empty block - if len(block) == 0: - block = [np.array([])] - elif len(block) != 1: - raise ValueError( - "Cannot create SingleBlockManager with more than 1 block" - ) - block = block[0] else: self.axes = [ensure_index(axis)] - # create the block here - if isinstance(block, list): - - # provide consolidation to the interleaved_dtype - if len(block) > 1: - dtype = _interleaved_dtype(block) - block = [b.astype(dtype) for b in block] - block = _consolidate(block) - - if len(block) != 1: - raise ValueError( - "Cannot create SingleBlockManager with more than 1 block" - ) - block = block[0] - - if not isinstance(block, Block): - block = make_block(block, placement=slice(0, len(axis)), ndim=1) - self.blocks = tuple([block]) + @classmethod + def from_array(cls, array: ArrayLike, index: Index) -> "SingleBlockManager": + """ + Constructor for if we have an array that is not yet a Block. + """ + block = make_block(array, placement=slice(0, len(index)), ndim=1) + return cls(block, index, fastpath=True) + def _post_setstate(self): pass @@ -1532,7 +1529,10 @@ def get_slice(self, slobj: slice, axis: int = 0) -> "SingleBlockManager": if axis >= self.ndim: raise IndexError("Requested axis not found in manager") - return type(self)(self._block._slice(slobj), self.index[slobj], fastpath=True) + blk = self._block + array = blk._slice(slobj) + block = blk.make_block_same_class(array, placement=range(len(array))) + return type(self)(block, self.index[slobj], fastpath=True) @property def index(self) -> Index: @@ -1594,7 +1594,7 @@ def fast_xs(self, loc): """ raise NotImplementedError("Use series._values[loc] instead") - def concat(self, to_concat, new_axis) -> "SingleBlockManager": + def concat(self, to_concat, new_axis: Index) -> "SingleBlockManager": """ Concatenate a list of SingleBlockManagers into a single SingleBlockManager. diff --git a/pandas/core/series.py b/pandas/core/series.py index db63e9205d48d..268d58a2a91a8 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -206,7 +206,7 @@ def __init__( # data is an ndarray, index is defined if not isinstance(data, SingleBlockManager): - data = SingleBlockManager(data, index, fastpath=True) + data = SingleBlockManager.from_array(data, index) if copy: data = data.copy() if index is None: @@ -321,7 +321,7 @@ def __init__( else: data = sanitize_array(data, index, dtype, copy, raise_cast_failure=True) - data = SingleBlockManager(data, index, fastpath=True) + data = SingleBlockManager.from_array(data, index) generic.NDFrame.__init__(self, data) self.name = name diff --git a/pandas/tests/extension/test_external_block.py b/pandas/tests/extension/test_external_block.py index 6311070cfe2bb..8a8dac54cf96a 100644 --- a/pandas/tests/extension/test_external_block.py +++ b/pandas/tests/extension/test_external_block.py @@ -2,7 +2,7 @@ import pytest import pandas as pd -from pandas.core.internals import BlockManager +from pandas.core.internals import BlockManager, SingleBlockManager from pandas.core.internals.blocks import Block, NonConsolidatableMixIn @@ -36,7 +36,8 @@ def test_concat_series(): # GH17728 values = np.arange(3, dtype="int64") block = CustomBlock(values, placement=slice(0, 3)) - s = pd.Series(block, pd.RangeIndex(3), fastpath=True) + mgr = SingleBlockManager(block, pd.RangeIndex(3)) + s = pd.Series(mgr, pd.RangeIndex(3), fastpath=True) res = pd.concat([s, s]) assert isinstance(res._data.blocks[0], CustomBlock) From 36696ec7038c2e2695641f631c5ddb0942f5d83d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 4 Mar 2020 16:15:14 -0800 Subject: [PATCH 2/4] helper function --- pandas/core/internals/managers.py | 65 +++++++++++++++++-------------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 20e74993093d8..5a499e047c26d 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -3,7 +3,7 @@ import itertools import operator import re -from typing import Dict, List, Optional, Sequence, Tuple, Union +from typing import Dict, List, Optional, Sequence, Tuple, TypeVar, Union import numpy as np @@ -58,6 +58,8 @@ # TODO: flexible with index=None and/or items=None +T = TypeVar("T", bound="BlockManager") + class BlockManager(PandasObject): """ @@ -149,6 +151,13 @@ def __init__( self._blknos = None self._blklocs = None + @classmethod + def _from_blocks(cls, blocks: List[Block], axes: List[Index]): + """ + Constructor for BlockManager and SingleBlockManager with same signature. + """ + return cls(blocks, axes, do_integrity_check=False) + @property def blknos(self): """ @@ -176,7 +185,7 @@ def blklocs(self): return self._blklocs - def make_empty(self, axes=None) -> "BlockManager": + def make_empty(self: T, axes=None) -> T: """ return an empty BlockManager with the items axis of len 0 """ if axes is None: axes = [Index([])] + self.axes[1:] @@ -184,11 +193,11 @@ def make_empty(self, axes=None) -> "BlockManager": # preserve dtype if possible if self.ndim == 1: assert isinstance(self, SingleBlockManager) # for mypy - blocks = np.array([], dtype=self.array_dtype) - blocks = make_block(blocks, placement=slice(0, 0), ndim=1) + arr = np.array([], dtype=self.array_dtype) + blocks = [make_block(arr, placement=slice(0, 0), ndim=1)] else: blocks = [] - return type(self)(blocks, axes) + return type(self)._from_blocks(blocks, axes) def __nonzero__(self) -> bool: return True @@ -381,7 +390,7 @@ def reduce(self, func, *args, **kwargs): return res - def apply(self, f, filter=None, **kwargs) -> "BlockManager": + def apply(self: T, f, filter=None, **kwargs) -> T: """ Iterate over the blocks, collect and create a new BlockManager. @@ -459,12 +468,8 @@ def apply(self, f, filter=None, **kwargs) -> "BlockManager": if len(result_blocks) == 0: return self.make_empty(self.axes) - if self.ndim == 1: - assert len(result_blocks) == 1 - bm = type(self)(result_blocks[0], self.axes) # type: ignore - else: - bm = type(self)(result_blocks, self.axes, do_integrity_check=False) - return bm + + return type(self)._from_blocks(result_blocks, self.axes) def quantile( self, @@ -663,12 +668,8 @@ def comp(s, regex=False): rb = new_rb result_blocks.extend(rb) - if self.ndim == 1: - assert len(result_blocks) == 1 - bm = type(self)(result_blocks[0], self.axes) - else: - bm = type(self)(result_blocks, self.axes) - bm._consolidate_inplace() + bm = type(self)._from_blocks(result_blocks, self.axes) + bm._consolidate_inplace() return bm def is_consolidated(self) -> bool: @@ -756,10 +757,7 @@ def combine(self, blocks: List[Block], copy: bool = True) -> "BlockManager": axes = list(self.axes) axes[0] = self.items.take(indexer) - if self.ndim == 1: - assert len(new_blocks) == 1 - return type(self)(new_blocks[0], axes) # type: ignore - return type(self)(new_blocks, axes, do_integrity_check=False) + return type(self)._from_blocks(new_blocks, axes) def get_slice(self, slobj: slice, axis: int = 0) -> "BlockManager": @@ -786,7 +784,7 @@ def __contains__(self, item) -> bool: def nblocks(self) -> int: return len(self.blocks) - def copy(self, deep=True) -> "BlockManager": + def copy(self: T, deep=True) -> T: """ Make deep or shallow copy of BlockManager @@ -1256,14 +1254,14 @@ def reindex_axis( ) def reindex_indexer( - self, + self: T, new_axis, indexer, axis: int, fill_value=None, - allow_dups=False, + allow_dups: bool = False, copy: bool = True, - ): + ) -> T: """ Parameters ---------- @@ -1311,10 +1309,8 @@ def reindex_indexer( new_axes = list(self.axes) new_axes[axis] = new_axis - if self.ndim == 1: - assert len(new_blocks) == 1 - return type(self)(new_blocks[0], new_axes) - return type(self)(new_blocks, new_axes) + + return type(self)._from_blocks(new_blocks, new_axes) def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None): """ @@ -1532,6 +1528,15 @@ def __init__( self.blocks = tuple([block]) + @classmethod + def _from_blocks(cls, blocks: List[Block], axes: List[Index]): + """ + Constructor for BlockManager and SingleBlockManager with same signature. + """ + assert len(blocks) == 1 + assert len(axes) == 1 + return cls(blocks[0], axes[0], do_integrity_check=False, fastpath=True) + @classmethod def from_array(cls, array: ArrayLike, index: Index) -> "SingleBlockManager": """ From 4eef7f2b4636bbd76748ed2ee02157ea6d50e860 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 5 Mar 2020 10:35:34 -0800 Subject: [PATCH 3/4] Suggested annotation --- pandas/core/internals/managers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 5a499e047c26d..d0d2cbed272a0 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1529,7 +1529,9 @@ def __init__( self.blocks = tuple([block]) @classmethod - def _from_blocks(cls, blocks: List[Block], axes: List[Index]): + def _from_blocks( + cls, blocks: List[Block], axes: List[Index] + ) -> "SingleBlockManager": """ Constructor for BlockManager and SingleBlockManager with same signature. """ From e35cf337bd3caf72fcf01c03481e5b22c8ec1d7c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 8 Mar 2020 09:26:08 -0700 Subject: [PATCH 4/4] de-privatize --- pandas/core/internals/managers.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 60cfa0573eefe..c59499e1bee5c 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -152,7 +152,7 @@ def __init__( self._blklocs = None @classmethod - def _from_blocks(cls, blocks: List[Block], axes: List[Index]): + def from_blocks(cls, blocks: List[Block], axes: List[Index]): """ Constructor for BlockManager and SingleBlockManager with same signature. """ @@ -197,7 +197,7 @@ def make_empty(self: T, axes=None) -> T: blocks = [make_block(arr, placement=slice(0, 0), ndim=1)] else: blocks = [] - return type(self)._from_blocks(blocks, axes) + return type(self).from_blocks(blocks, axes) def __nonzero__(self) -> bool: return True @@ -469,7 +469,7 @@ def apply(self: T, f, filter=None, **kwargs) -> T: if len(result_blocks) == 0: return self.make_empty(self.axes) - return type(self)._from_blocks(result_blocks, self.axes) + return type(self).from_blocks(result_blocks, self.axes) def quantile( self, @@ -668,7 +668,7 @@ def comp(s, regex=False): rb = new_rb result_blocks.extend(rb) - bm = type(self)._from_blocks(result_blocks, self.axes) + bm = type(self).from_blocks(result_blocks, self.axes) bm._consolidate_inplace() return bm @@ -757,7 +757,7 @@ def combine(self, blocks: List[Block], copy: bool = True) -> "BlockManager": axes = list(self.axes) axes[0] = self.items.take(indexer) - return type(self)._from_blocks(new_blocks, axes) + return type(self).from_blocks(new_blocks, axes) def get_slice(self, slobj: slice, axis: int = 0) -> "BlockManager": @@ -1310,7 +1310,7 @@ def reindex_indexer( new_axes = list(self.axes) new_axes[axis] = new_axis - return type(self)._from_blocks(new_blocks, new_axes) + return type(self).from_blocks(new_blocks, new_axes) def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None): """ @@ -1529,7 +1529,7 @@ def __init__( self.blocks = tuple([block]) @classmethod - def _from_blocks( + def from_blocks( cls, blocks: List[Block], axes: List[Index] ) -> "SingleBlockManager": """