@@ -198,6 +198,8 @@ def concatenate_managers(
198
198
if isinstance (mgrs_indexers [0 ][0 ], ArrayManager ):
199
199
return _concatenate_array_managers (mgrs_indexers , axes , concat_axis , copy )
200
200
201
+ mgrs_indexers = _maybe_reindex_columns_na_proxy (axes , mgrs_indexers )
202
+
201
203
concat_plans = [
202
204
_get_mgr_concatenation_plan (mgr , indexers ) for mgr , indexers in mgrs_indexers
203
205
]
@@ -245,6 +247,38 @@ def concatenate_managers(
245
247
return BlockManager (tuple (blocks ), axes )
246
248
247
249
250
+ def _maybe_reindex_columns_na_proxy (
251
+ axes : list [Index ], mgrs_indexers : list [tuple [BlockManager , dict [int , np .ndarray ]]]
252
+ ) -> list [tuple [BlockManager , dict [int , np .ndarray ]]]:
253
+ """
254
+ Reindex along columns so that all of the BlockManagers being concatenated
255
+ have matching columns.
256
+
257
+ Columns added in this reindexing have dtype=np.void, indicating they
258
+ should be ignored when choosing a column's final dtype.
259
+ """
260
+ new_mgrs_indexers = []
261
+ for mgr , indexers in mgrs_indexers :
262
+ # We only reindex for axis=0 (i.e. columns), as this can be done cheaply
263
+ if 0 in indexers :
264
+ new_mgr = mgr .reindex_indexer (
265
+ axes [0 ],
266
+ indexers [0 ],
267
+ axis = 0 ,
268
+ copy = False ,
269
+ only_slice = True ,
270
+ allow_dups = True ,
271
+ use_na_proxy = True ,
272
+ )
273
+ new_indexers = indexers .copy ()
274
+ del new_indexers [0 ]
275
+ new_mgrs_indexers .append ((new_mgr , new_indexers ))
276
+ else :
277
+ new_mgrs_indexers .append ((mgr , indexers ))
278
+
279
+ return new_mgrs_indexers
280
+
281
+
248
282
def _get_mgr_concatenation_plan (mgr : BlockManager , indexers : dict [int , np .ndarray ]):
249
283
"""
250
284
Construct concatenation plan for given block manager and indexers.
@@ -375,6 +409,8 @@ def _is_valid_na_for(self, dtype: DtypeObj) -> bool:
375
409
return False
376
410
if self .block is None :
377
411
return True
412
+ if self .block .dtype .kind == "V" :
413
+ return True
378
414
379
415
if self .dtype == object :
380
416
values = self .block .values
@@ -401,6 +437,8 @@ def is_na(self) -> bool:
401
437
blk = self .block
402
438
if blk is None :
403
439
return True
440
+ if blk .dtype .kind == "V" :
441
+ return True
404
442
405
443
if not blk ._can_hold_na :
406
444
return False
@@ -426,7 +464,7 @@ def is_na(self) -> bool:
426
464
return all (isna_all (row ) for row in values )
427
465
428
466
def get_reindexed_values (self , empty_dtype : DtypeObj , upcasted_na ) -> ArrayLike :
429
- if upcasted_na is None :
467
+ if upcasted_na is None and self . block . dtype . kind != "V" :
430
468
# No upcasting is necessary
431
469
fill_value = self .block .fill_value
432
470
values = self .block .get_values ()
@@ -435,6 +473,7 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
435
473
436
474
if self ._is_valid_na_for (empty_dtype ):
437
475
# note: always holds when self.block is None
476
+ # or self.block.dtype.kind == "V"
438
477
blk_dtype = getattr (self .block , "dtype" , None )
439
478
440
479
if blk_dtype == np .dtype ("object" ):
@@ -512,7 +551,9 @@ def _concatenate_join_units(
512
551
513
552
empty_dtype = _get_empty_dtype (join_units )
514
553
515
- has_none_blocks = any (unit .block is None for unit in join_units )
554
+ has_none_blocks = any (
555
+ unit .block is None or unit .block .dtype .kind == "V" for unit in join_units
556
+ )
516
557
upcasted_na = _dtype_to_na_value (empty_dtype , has_none_blocks )
517
558
518
559
to_concat = [
@@ -597,13 +638,19 @@ def _get_empty_dtype(join_units: Sequence[JoinUnit]) -> DtypeObj:
597
638
empty_dtype = join_units [0 ].block .dtype
598
639
return empty_dtype
599
640
600
- has_none_blocks = any (unit .block is None for unit in join_units )
641
+ has_none_blocks = any (
642
+ unit .block is None or unit .block .dtype .kind == "V" for unit in join_units
643
+ )
601
644
602
645
dtypes = [
603
646
unit .dtype for unit in join_units if unit .block is not None and not unit .is_na
604
647
]
605
648
if not len (dtypes ):
606
- dtypes = [unit .dtype for unit in join_units if unit .block is not None ]
649
+ dtypes = [
650
+ unit .dtype
651
+ for unit in join_units
652
+ if unit .block is not None and unit .block .dtype .kind != "V"
653
+ ]
607
654
608
655
dtype = find_common_type (dtypes )
609
656
if has_none_blocks :
@@ -619,7 +666,7 @@ def _is_uniform_join_units(join_units: list[JoinUnit]) -> bool:
619
666
620
667
"""
621
668
first = join_units [0 ].block
622
- if first is None :
669
+ if first is None or first . dtype . kind == "V" :
623
670
return False
624
671
return (
625
672
# exclude cases where a) ju.block is None or b) we have e.g. Int64+int64
0 commit comments