Skip to content

Commit 7e6bf97

Browse files
committed
Merge remote-tracking branch 'upstream/master' into styler_format_index
2 parents afa6da3 + 7d27588 commit 7e6bf97

File tree

7 files changed

+135
-12
lines changed

7 files changed

+135
-12
lines changed

pandas/core/config_init.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -896,7 +896,7 @@ def register_converter_cb(key):
896896
"latex.multicol_align",
897897
"r",
898898
styler_multicol_align,
899-
validator=is_one_of_factory(["r", "c", "l"]),
899+
validator=is_one_of_factory(["r", "c", "l", "naive-l", "naive-r"]),
900900
)
901901

902902
cf.register_option(

pandas/core/frame.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,8 @@ class DataFrame(NDFrame, OpsMixin):
469469
----------
470470
data : ndarray (structured or homogeneous), Iterable, dict, or DataFrame
471471
Dict can contain Series, arrays, constants, dataclass or list-like objects. If
472-
data is a dict, column order follows insertion-order.
472+
data is a dict, column order follows insertion-order. If a dict contains Series
473+
which have an index defined, it is aligned by its index.
473474
474475
.. versionchanged:: 0.25.0
475476
If data is a list of dicts, column order follows insertion-order.
@@ -524,6 +525,16 @@ class DataFrame(NDFrame, OpsMixin):
524525
col2 int8
525526
dtype: object
526527
528+
Constructing DataFrame from a dictionary including Series:
529+
530+
>>> d = {'col1': [0, 1, 2, 3], 'col2': pd.Series([2, 3], index=[2, 3])}
531+
>>> pd.DataFrame(data=d, index=[0, 1, 2, 3])
532+
col1 col2
533+
0 0 NaN
534+
1 1 NaN
535+
2 2 2.0
536+
3 3 3.0
537+
527538
Constructing DataFrame from numpy ndarray:
528539
529540
>>> df2 = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),

pandas/core/internals/concat.py

+52-5
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,8 @@ def concatenate_managers(
198198
if isinstance(mgrs_indexers[0][0], ArrayManager):
199199
return _concatenate_array_managers(mgrs_indexers, axes, concat_axis, copy)
200200

201+
mgrs_indexers = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers)
202+
201203
concat_plans = [
202204
_get_mgr_concatenation_plan(mgr, indexers) for mgr, indexers in mgrs_indexers
203205
]
@@ -245,6 +247,38 @@ def concatenate_managers(
245247
return BlockManager(tuple(blocks), axes)
246248

247249

250+
def _maybe_reindex_columns_na_proxy(
251+
axes: list[Index], mgrs_indexers: list[tuple[BlockManager, dict[int, np.ndarray]]]
252+
) -> list[tuple[BlockManager, dict[int, np.ndarray]]]:
253+
"""
254+
Reindex along columns so that all of the BlockManagers being concatenated
255+
have matching columns.
256+
257+
Columns added in this reindexing have dtype=np.void, indicating they
258+
should be ignored when choosing a column's final dtype.
259+
"""
260+
new_mgrs_indexers = []
261+
for mgr, indexers in mgrs_indexers:
262+
# We only reindex for axis=0 (i.e. columns), as this can be done cheaply
263+
if 0 in indexers:
264+
new_mgr = mgr.reindex_indexer(
265+
axes[0],
266+
indexers[0],
267+
axis=0,
268+
copy=False,
269+
only_slice=True,
270+
allow_dups=True,
271+
use_na_proxy=True,
272+
)
273+
new_indexers = indexers.copy()
274+
del new_indexers[0]
275+
new_mgrs_indexers.append((new_mgr, new_indexers))
276+
else:
277+
new_mgrs_indexers.append((mgr, indexers))
278+
279+
return new_mgrs_indexers
280+
281+
248282
def _get_mgr_concatenation_plan(mgr: BlockManager, indexers: dict[int, np.ndarray]):
249283
"""
250284
Construct concatenation plan for given block manager and indexers.
@@ -375,6 +409,8 @@ def _is_valid_na_for(self, dtype: DtypeObj) -> bool:
375409
return False
376410
if self.block is None:
377411
return True
412+
if self.block.dtype.kind == "V":
413+
return True
378414

379415
if self.dtype == object:
380416
values = self.block.values
@@ -401,6 +437,8 @@ def is_na(self) -> bool:
401437
blk = self.block
402438
if blk is None:
403439
return True
440+
if blk.dtype.kind == "V":
441+
return True
404442

405443
if not blk._can_hold_na:
406444
return False
@@ -426,7 +464,7 @@ def is_na(self) -> bool:
426464
return all(isna_all(row) for row in values)
427465

428466
def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
429-
if upcasted_na is None:
467+
if upcasted_na is None and self.block.dtype.kind != "V":
430468
# No upcasting is necessary
431469
fill_value = self.block.fill_value
432470
values = self.block.get_values()
@@ -435,6 +473,7 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
435473

436474
if self._is_valid_na_for(empty_dtype):
437475
# note: always holds when self.block is None
476+
# or self.block.dtype.kind == "V"
438477
blk_dtype = getattr(self.block, "dtype", None)
439478

440479
if blk_dtype == np.dtype("object"):
@@ -512,7 +551,9 @@ def _concatenate_join_units(
512551

513552
empty_dtype = _get_empty_dtype(join_units)
514553

515-
has_none_blocks = any(unit.block is None for unit in join_units)
554+
has_none_blocks = any(
555+
unit.block is None or unit.block.dtype.kind == "V" for unit in join_units
556+
)
516557
upcasted_na = _dtype_to_na_value(empty_dtype, has_none_blocks)
517558

518559
to_concat = [
@@ -597,13 +638,19 @@ def _get_empty_dtype(join_units: Sequence[JoinUnit]) -> DtypeObj:
597638
empty_dtype = join_units[0].block.dtype
598639
return empty_dtype
599640

600-
has_none_blocks = any(unit.block is None for unit in join_units)
641+
has_none_blocks = any(
642+
unit.block is None or unit.block.dtype.kind == "V" for unit in join_units
643+
)
601644

602645
dtypes = [
603646
unit.dtype for unit in join_units if unit.block is not None and not unit.is_na
604647
]
605648
if not len(dtypes):
606-
dtypes = [unit.dtype for unit in join_units if unit.block is not None]
649+
dtypes = [
650+
unit.dtype
651+
for unit in join_units
652+
if unit.block is not None and unit.block.dtype.kind != "V"
653+
]
607654

608655
dtype = find_common_type(dtypes)
609656
if has_none_blocks:
@@ -619,7 +666,7 @@ def _is_uniform_join_units(join_units: list[JoinUnit]) -> bool:
619666
620667
"""
621668
first = join_units[0].block
622-
if first is None:
669+
if first is None or first.dtype.kind == "V":
623670
return False
624671
return (
625672
# exclude cases where a) ju.block is None or b) we have e.g. Int64+int64

pandas/core/internals/managers.py

+28-3
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
from pandas.core.internals.blocks import (
6767
Block,
6868
DatetimeTZBlock,
69+
NumpyBlock,
6970
ensure_block_shape,
7071
extend_blocks,
7172
get_block_type,
@@ -613,6 +614,8 @@ def reindex_indexer(
613614
copy: bool = True,
614615
consolidate: bool = True,
615616
only_slice: bool = False,
617+
*,
618+
use_na_proxy: bool = False,
616619
) -> T:
617620
"""
618621
Parameters
@@ -627,6 +630,8 @@ def reindex_indexer(
627630
Whether to consolidate inplace before reindexing.
628631
only_slice : bool, default False
629632
Whether to take views, not copies, along columns.
633+
use_na_proxy : bool, default False
634+
Whether to use a np.void ndarray for newly introduced columns.
630635
631636
pandas-indexer with -1's only.
632637
"""
@@ -651,7 +656,10 @@ def reindex_indexer(
651656

652657
if axis == 0:
653658
new_blocks = self._slice_take_blocks_ax0(
654-
indexer, fill_value=fill_value, only_slice=only_slice
659+
indexer,
660+
fill_value=fill_value,
661+
only_slice=only_slice,
662+
use_na_proxy=use_na_proxy,
655663
)
656664
else:
657665
new_blocks = [
@@ -675,6 +683,8 @@ def _slice_take_blocks_ax0(
675683
slice_or_indexer: slice | np.ndarray,
676684
fill_value=lib.no_default,
677685
only_slice: bool = False,
686+
*,
687+
use_na_proxy: bool = False,
678688
) -> list[Block]:
679689
"""
680690
Slice/take blocks along axis=0.
@@ -688,6 +698,8 @@ def _slice_take_blocks_ax0(
688698
only_slice : bool, default False
689699
If True, we always return views on existing arrays, never copies.
690700
This is used when called from ops.blockwise.operate_blockwise.
701+
use_na_proxy : bool, default False
702+
Whether to use a np.void ndarray for newly introduced columns.
691703
692704
Returns
693705
-------
@@ -756,7 +768,11 @@ def _slice_take_blocks_ax0(
756768
# If we've got here, fill_value was not lib.no_default
757769

758770
blocks.append(
759-
self._make_na_block(placement=mgr_locs, fill_value=fill_value)
771+
self._make_na_block(
772+
placement=mgr_locs,
773+
fill_value=fill_value,
774+
use_na_proxy=use_na_proxy,
775+
)
760776
)
761777
else:
762778
blk = self.blocks[blkno]
@@ -798,7 +814,16 @@ def _slice_take_blocks_ax0(
798814

799815
return blocks
800816

801-
def _make_na_block(self, placement: BlockPlacement, fill_value=None) -> Block:
817+
def _make_na_block(
818+
self, placement: BlockPlacement, fill_value=None, use_na_proxy: bool = False
819+
) -> Block:
820+
821+
if use_na_proxy:
822+
assert fill_value is None
823+
shape = (len(placement), self.shape[1])
824+
vals = np.empty(shape, dtype=np.void)
825+
nb = NumpyBlock(vals, placement, ndim=2)
826+
return nb
802827

803828
if fill_value is None:
804829
fill_value = np.nan

pandas/io/formats/style.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -527,10 +527,13 @@ def to_latex(
527527
without multirow.
528528
529529
.. versionchanged:: 1.4.0
530-
multicol_align : {"r", "c", "l"}, optional
530+
multicol_align : {"r", "c", "l", "naive-l", "naive-r"}, optional
531531
If sparsifying hierarchical MultiIndex columns whether to align text at
532532
the left, centrally, or at the right. If not given defaults to
533-
``pandas.options.styler.latex.multicol_align``
533+
``pandas.options.styler.latex.multicol_align``. If a naive option is
534+
given renders without multicol.
535+
536+
.. versionchanged:: 1.4.0
534537
siunitx : bool, default False
535538
Set to ``True`` to structure LaTeX compatible with the {siunitx} package.
536539
environment : str, optional

pandas/io/formats/style_render.py

+8
Original file line numberDiff line numberDiff line change
@@ -1622,6 +1622,14 @@ def _parse_latex_header_span(
16221622
if 'colspan="' in attrs:
16231623
colspan = attrs[attrs.find('colspan="') + 9 :] # len('colspan="') = 9
16241624
colspan = int(colspan[: colspan.find('"')])
1625+
if "naive-l" == multicol_align:
1626+
out = f"{{{display_val}}}" if wrap else f"{display_val}"
1627+
blanks = " & {}" if wrap else " &"
1628+
return out + blanks * (colspan - 1)
1629+
elif "naive-r" == multicol_align:
1630+
out = f"{{{display_val}}}" if wrap else f"{display_val}"
1631+
blanks = "{} & " if wrap else "& "
1632+
return blanks * (colspan - 1) + out
16251633
return f"\\multicolumn{{{colspan}}}{{{multicol_align}}}{{{display_val}}}"
16261634
elif 'rowspan="' in attrs:
16271635
if multirow_align == "naive":

pandas/tests/io/formats/style/test_to_latex.py

+29
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,35 @@ def test_multiindex_row_and_col(df_ext):
302302
assert result == expected
303303

304304

305+
@pytest.mark.parametrize(
306+
"multicol_align, siunitx, header",
307+
[
308+
("naive-l", False, " & A & &"),
309+
("naive-r", False, " & & & A"),
310+
("naive-l", True, "{} & {A} & {} & {}"),
311+
("naive-r", True, "{} & {} & {} & {A}"),
312+
],
313+
)
314+
def test_multicol_naive(df, multicol_align, siunitx, header):
315+
ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("A", "c")])
316+
df.columns = ridx
317+
level1 = " & a & b & c" if not siunitx else "{} & {a} & {b} & {c}"
318+
col_format = "lrrl" if not siunitx else "lSSl"
319+
expected = dedent(
320+
f"""\
321+
\\begin{{tabular}}{{{col_format}}}
322+
{header} \\\\
323+
{level1} \\\\
324+
0 & 0 & -0.61 & ab \\\\
325+
1 & 1 & -1.22 & cd \\\\
326+
\\end{{tabular}}
327+
"""
328+
)
329+
styler = df.style.format(precision=2)
330+
result = styler.to_latex(multicol_align=multicol_align, siunitx=siunitx)
331+
assert expected == result
332+
333+
305334
def test_multi_options(df_ext):
306335
cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")])
307336
ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")])

0 commit comments

Comments
 (0)