From 1f883a0f2602291a2ab3d94353b1f5547b373ddc Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 3 Sep 2021 16:25:05 -0700 Subject: [PATCH 1/2] fix incorrect assertion --- pandas/core/internals/concat.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index e1a51e5783cb8..d8c685779a2bc 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -1,5 +1,6 @@ from __future__ import annotations +import copy import itertools from typing import ( TYPE_CHECKING, From 76d0ef52ab059f84b8c1ccad8ee425faab857923 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 17 Sep 2021 20:26:41 -0700 Subject: [PATCH 2/2] REF: remove JoinUnit.shape --- pandas/core/internals/concat.py | 63 ++++++++++----------------------- 1 file changed, 19 insertions(+), 44 deletions(-) diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index d8c685779a2bc..f5fcf85331001 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -1,6 +1,5 @@ from __future__ import annotations -import copy import itertools from typing import ( TYPE_CHECKING, @@ -210,6 +209,8 @@ def concatenate_managers( for placement, join_units in concat_plan: unit = join_units[0] blk = unit.block + # Assertion disabled for performance + # assert len(join_units) == len(mgrs_indexers) if len(join_units) == 1: values = blk.values @@ -225,11 +226,11 @@ def concatenate_managers( # _is_uniform_join_units ensures a single dtype, so # we can use np.concatenate, which is more performant # than concat_compat - values = np.concatenate(vals, axis=blk.ndim - 1) + values = np.concatenate(vals, axis=1) else: # TODO(EA2D): special-casing not needed with 2D EAs values = concat_compat(vals, axis=1) - values = ensure_block_shape(values, blk.ndim) + values = ensure_block_shape(values, 2) values = ensure_wrapped_if_datetimelike(values) @@ -241,7 +242,7 @@ def concatenate_managers( if fastpath: b = blk.make_block_same_class(values, placement=placement) else: - b = new_block(values, placement=placement, ndim=len(axes)) + b = new_block(values, placement=placement, ndim=2) blocks.append(b) @@ -327,14 +328,10 @@ def _get_mgr_concatenation_plan(mgr: BlockManager): plan : list of (BlockPlacement, JoinUnit) tuples """ - # Calculate post-reindex shape , save for item axis which will be separate - # for each block anyway. - mgr_shape_list = list(mgr.shape) - mgr_shape = tuple(mgr_shape_list) if mgr.is_single_block: blk = mgr.blocks[0] - return [(blk.mgr_locs, JoinUnit(blk, mgr_shape))] + return [(blk.mgr_locs, JoinUnit(blk))] blknos = mgr.blknos blklocs = mgr.blklocs @@ -342,12 +339,9 @@ def _get_mgr_concatenation_plan(mgr: BlockManager): plan = [] for blkno, placements in libinternals.get_blkno_placements(blknos, group=False): - assert placements.is_slice_like - assert blkno != -1 - - shape_list = list(mgr_shape) - shape_list[0] = len(placements) - shape = tuple(shape_list) + # Assertions disabled for performance; these should always hold + # assert placements.is_slice_like + # assert blkno != -1 blk = mgr.blocks[blkno] ax0_blk_indexer = blklocs[placements.indexer] @@ -375,8 +369,7 @@ def _get_mgr_concatenation_plan(mgr: BlockManager): # Assertions disabled for performance # assert blk._mgr_locs.as_slice == placements.as_slice - # assert blk.shape[0] == shape[0] - unit = JoinUnit(blk, shape) + unit = JoinUnit(blk) plan.append((placements, unit)) @@ -384,10 +377,8 @@ def _get_mgr_concatenation_plan(mgr: BlockManager): class JoinUnit: - def __init__(self, block: Block, shape: Shape): - # Passing shape explicitly is required for cases when block is None. + def __init__(self, block: Block): self.block = block - self.shape = shape def __repr__(self) -> str: return f"{type(self).__name__}({repr(self.block)})" @@ -400,22 +391,11 @@ def is_na(self) -> bool: return False def get_reindexed_values(self, empty_dtype: DtypeObj) -> ArrayLike: - values: ArrayLike - if self.is_na: - return make_na_array(empty_dtype, self.shape) + return make_na_array(empty_dtype, self.block.shape) else: - - if not self.block._can_consolidate: - # preserve these for validation in concat_compat - return self.block.values - - # No dtype upcasting is done here, it will be performed during - # concatenation itself. - values = self.block.values - - return values + return self.block.values def make_na_array(dtype: DtypeObj, shape: Shape) -> ArrayLike: @@ -553,6 +533,9 @@ def _is_uniform_join_units(join_units: list[JoinUnit]) -> bool: first = join_units[0].block if first.dtype.kind == "V": return False + elif len(join_units) == 1: + # only use this path when there is something to concatenate + return False return ( # exclude cases where a) ju.block is None or b) we have e.g. Int64+int64 all(type(ju.block) is type(first) for ju in join_units) @@ -565,13 +548,8 @@ def _is_uniform_join_units(join_units: list[JoinUnit]) -> bool: or ju.block.dtype.kind in ["b", "i", "u"] for ju in join_units ) - and - # no blocks that would get missing values (can lead to type upcasts) - # unless we're an extension dtype. - all(not ju.is_na or ju.block.is_extension for ju in join_units) - and - # only use this path when there is something to concatenate - len(join_units) > 1 + # this also precludes any blocks with dtype.kind == "V", since + # we excluded that case for `first` above. ) @@ -593,10 +571,7 @@ def _trim_join_unit(join_unit: JoinUnit, length: int) -> JoinUnit: extra_block = join_unit.block.getitem_block(slice(length, None)) join_unit.block = join_unit.block.getitem_block(slice(length)) - extra_shape = (join_unit.shape[0] - length,) + join_unit.shape[1:] - join_unit.shape = (length,) + join_unit.shape[1:] - - return JoinUnit(block=extra_block, shape=extra_shape) + return JoinUnit(block=extra_block) def _combine_concat_plans(plans):