Skip to content

Commit c56f00e

Browse files
authored
Merge branch 'main' into deprecation
2 parents 1df4167 + 0a23624 commit c56f00e

File tree

10 files changed

+103
-53
lines changed

10 files changed

+103
-53
lines changed

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -718,6 +718,7 @@ Conversion
718718
Strings
719719
^^^^^^^
720720
- Bug in :meth:`Series.str` that did not raise a ``TypeError`` when iterated (:issue:`54173`)
721+
- Bug in ``repr`` for :class:`DataFrame`` with string-dtype columns (:issue:`54797`)
721722

722723
Interval
723724
^^^^^^^^

meson.build

+1-6
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,15 @@ project(
66
license: 'BSD-3',
77
meson_version: '>=1.0.1',
88
default_options: [
9-
# TODO: investigate, does meson try to compile against debug Python
10-
# when buildtype = debug, this seems to be causing problems on CI
11-
# where provided Python is not compiled in debug mode
129
'buildtype=release',
1310
# TODO: Reactivate werror, some warnings on Windows
1411
#'werror=true',
1512
'c_std=c99'
1613
]
1714
)
1815

19-
py_mod = import('python')
2016
fs = import('fs')
21-
py = py_mod.find_installation('python')
22-
py_dep = py.dependency()
17+
py = import('python').find_installation()
2318
tempita = files('generate_pxi.py')
2419
versioneer = files('generate_version.py')
2520

pandas/_libs/window/meson.build

-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ py.extension_module(
33
['aggregations.pyx'],
44
cython_args: ['-X always_allow_keywords=true'],
55
include_directories: [inc_np, inc_pd],
6-
dependencies: [py_dep],
76
subdir: 'pandas/_libs/window',
87
override_options : ['cython_language=cpp'],
98
install: true
@@ -14,7 +13,6 @@ py.extension_module(
1413
['indexers.pyx'],
1514
cython_args: ['-X always_allow_keywords=true'],
1615
include_directories: [inc_np, inc_pd],
17-
dependencies: [py_dep],
1816
subdir: 'pandas/_libs/window',
1917
install: true
2018
)

pandas/core/generic.py

+55-5
Original file line numberDiff line numberDiff line change
@@ -7099,6 +7099,8 @@ def fillna(
70997099
71007100
See Also
71017101
--------
7102+
ffill : Fill values by propagating the last valid observation to next valid.
7103+
bfill : Fill values by using the next valid observation to fill the gap.
71027104
interpolate : Fill NaN values using interpolation.
71037105
reindex : Conform object to new index.
71047106
asfreq : Convert TimeSeries to specified frequency.
@@ -7358,7 +7360,10 @@ def ffill(
73587360
...
73597361

73607362
@final
7361-
@doc(klass=_shared_doc_kwargs["klass"])
7363+
@doc(
7364+
klass=_shared_doc_kwargs["klass"],
7365+
axes_single_arg=_shared_doc_kwargs["axes_single_arg"],
7366+
)
73627367
def ffill(
73637368
self,
73647369
*,
@@ -7370,6 +7375,27 @@ def ffill(
73707375
"""
73717376
Fill NA/NaN values by propagating the last valid observation to next valid.
73727377
7378+
Parameters
7379+
----------
7380+
axis : {axes_single_arg}
7381+
Axis along which to fill missing values. For `Series`
7382+
this parameter is unused and defaults to 0.
7383+
inplace : bool, default False
7384+
If True, fill in-place. Note: this will modify any
7385+
other views on this object (e.g., a no-copy slice for a column in a
7386+
DataFrame).
7387+
limit : int, default None
7388+
If method is specified, this is the maximum number of consecutive
7389+
NaN values to forward/backward fill. In other words, if there is
7390+
a gap with more than this number of consecutive NaNs, it will only
7391+
be partially filled. If method is not specified, this is the
7392+
maximum number of entries along the entire axis where NaNs will be
7393+
filled. Must be greater than 0 if not None.
7394+
downcast : dict, default is None
7395+
A dict of item->dtype of what to downcast if possible,
7396+
or the string 'infer' which will try to downcast to an appropriate
7397+
equal type (e.g. float64 to int64 if possible).
7398+
73737399
Returns
73747400
-------
73757401
{klass} or None
@@ -7437,7 +7463,7 @@ def pad(
74377463
downcast: dict | None | lib.NoDefault = lib.no_default,
74387464
) -> Self | None:
74397465
"""
7440-
Synonym for :meth:`DataFrame.fillna` with ``method='ffill'``.
7466+
Fill NA/NaN values by propagating the last valid observation to next valid.
74417467
74427468
.. deprecated:: 2.0
74437469
@@ -7494,7 +7520,10 @@ def bfill(
74947520
...
74957521

74967522
@final
7497-
@doc(klass=_shared_doc_kwargs["klass"])
7523+
@doc(
7524+
klass=_shared_doc_kwargs["klass"],
7525+
axes_single_arg=_shared_doc_kwargs["axes_single_arg"],
7526+
)
74987527
def bfill(
74997528
self,
75007529
*,
@@ -7504,7 +7533,28 @@ def bfill(
75047533
downcast: dict | None | lib.NoDefault = lib.no_default,
75057534
) -> Self | None:
75067535
"""
7507-
Synonym for :meth:`DataFrame.fillna` with ``method='bfill'``.
7536+
Fill NA/NaN values by using the next valid observation to fill the gap.
7537+
7538+
Parameters
7539+
----------
7540+
axis : {axes_single_arg}
7541+
Axis along which to fill missing values. For `Series`
7542+
this parameter is unused and defaults to 0.
7543+
inplace : bool, default False
7544+
If True, fill in-place. Note: this will modify any
7545+
other views on this object (e.g., a no-copy slice for a column in a
7546+
DataFrame).
7547+
limit : int, default None
7548+
If method is specified, this is the maximum number of consecutive
7549+
NaN values to forward/backward fill. In other words, if there is
7550+
a gap with more than this number of consecutive NaNs, it will only
7551+
be partially filled. If method is not specified, this is the
7552+
maximum number of entries along the entire axis where NaNs will be
7553+
filled. Must be greater than 0 if not None.
7554+
downcast : dict, default is None
7555+
A dict of item->dtype of what to downcast if possible,
7556+
or the string 'infer' which will try to downcast to an appropriate
7557+
equal type (e.g. float64 to int64 if possible).
75087558
75097559
Returns
75107560
-------
@@ -7583,7 +7633,7 @@ def backfill(
75837633
downcast: dict | None | lib.NoDefault = lib.no_default,
75847634
) -> Self | None:
75857635
"""
7586-
Synonym for :meth:`DataFrame.fillna` with ``method='bfill'``.
7636+
Fill NA/NaN values by using the next valid observation to fill the gap.
75877637
75887638
.. deprecated:: 2.0
75897639

pandas/core/indexes/base.py

+18-30
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@
124124
from pandas.core.dtypes.generic import (
125125
ABCDataFrame,
126126
ABCDatetimeIndex,
127+
ABCIntervalIndex,
127128
ABCMultiIndex,
128129
ABCPeriodIndex,
129130
ABCSeries,
@@ -1396,8 +1397,8 @@ def _format_with_header(self, header: list[str_t], na_rep: str_t) -> list[str_t]
13961397

13971398
values = self._values
13981399

1399-
if is_object_dtype(values.dtype):
1400-
values = cast(np.ndarray, values)
1400+
if is_object_dtype(values.dtype) or is_string_dtype(values.dtype):
1401+
values = np.asarray(values)
14011402
values = lib.maybe_convert_objects(values, safe=True)
14021403

14031404
result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values]
@@ -3492,8 +3493,6 @@ def _intersection(self, other: Index, sort: bool = False):
34923493
and other.is_monotonic_increasing
34933494
and self._can_use_libjoin
34943495
and other._can_use_libjoin
3495-
and not isinstance(self, ABCMultiIndex)
3496-
and not isinstance(other, ABCMultiIndex)
34973496
):
34983497
try:
34993498
res_indexer, indexer, _ = self._inner_indexer(other)
@@ -4632,28 +4631,13 @@ def join(
46324631

46334632
_validate_join_method(how)
46344633

4635-
if not self.is_unique and not other.is_unique:
4636-
return self._join_non_unique(other, how=how, sort=sort)
4637-
elif not self.is_unique or not other.is_unique:
4638-
if self.is_monotonic_increasing and other.is_monotonic_increasing:
4639-
# Note: 2023-08-15 we *do* have tests that get here with
4640-
# Categorical, string[python] (can use libjoin)
4641-
# and Interval (cannot)
4642-
if self._can_use_libjoin and other._can_use_libjoin:
4643-
# otherwise we will fall through to _join_via_get_indexer
4644-
# GH#39133
4645-
# go through object dtype for ea till engine is supported properly
4646-
return self._join_monotonic(other, how=how)
4647-
else:
4648-
return self._join_non_unique(other, how=how, sort=sort)
4649-
elif (
4650-
# GH48504: exclude MultiIndex to avoid going through MultiIndex._values
4651-
self.is_monotonic_increasing
4634+
if (
4635+
not isinstance(self.dtype, CategoricalDtype)
4636+
and self.is_monotonic_increasing
46524637
and other.is_monotonic_increasing
46534638
and self._can_use_libjoin
46544639
and other._can_use_libjoin
4655-
and not isinstance(self, ABCMultiIndex)
4656-
and not isinstance(self.dtype, CategoricalDtype)
4640+
and (self.is_unique or other.is_unique)
46574641
):
46584642
# Categorical is monotonic if data are ordered as categories, but join can
46594643
# not handle this in case of not lexicographically monotonic GH#38502
@@ -4662,6 +4646,8 @@ def join(
46624646
except TypeError:
46634647
# object dtype; non-comparable objects
46644648
pass
4649+
elif not self.is_unique or not other.is_unique:
4650+
return self._join_non_unique(other, how=how, sort=sort)
46654651

46664652
return self._join_via_get_indexer(other, how, sort)
46674653

@@ -4797,6 +4783,9 @@ def _join_non_unique(
47974783
join_idx = self.take(left_idx)
47984784
right = other.take(right_idx)
47994785
join_index = join_idx.putmask(mask, right)
4786+
if isinstance(join_index, ABCMultiIndex) and how == "outer":
4787+
# test_join_index_levels
4788+
join_index = join_index._sort_levels_monotonic()
48004789
return join_index, left_idx, right_idx
48014790

48024791
@final
@@ -5042,10 +5031,10 @@ def _can_use_libjoin(self) -> bool:
50425031
or isinstance(self._values, (ArrowExtensionArray, BaseMaskedArray))
50435032
or self.dtype == "string[python]"
50445033
)
5045-
# For IntervalIndex, the conversion to numpy converts
5046-
# to object dtype, which negates the performance benefit of libjoin
5047-
# TODO: exclude RangeIndex and MultiIndex as these also make copies?
5048-
return not isinstance(self.dtype, IntervalDtype)
5034+
# Exclude index types where the conversion to numpy converts to object dtype,
5035+
# which negates the performance benefit of libjoin
5036+
# TODO: exclude RangeIndex? Seems to break test_concat_datetime_timezone
5037+
return not isinstance(self, (ABCIntervalIndex, ABCMultiIndex))
50495038

50505039
# --------------------------------------------------------------------
50515040
# Uncategorized Methods
@@ -5180,8 +5169,7 @@ def _get_join_target(self) -> np.ndarray:
51805169
# present
51815170
return self._values.to_numpy()
51825171

5183-
# TODO: exclude ABCRangeIndex, ABCMultiIndex cases here as those create
5184-
# copies.
5172+
# TODO: exclude ABCRangeIndex case here as it copies
51855173
target = self._get_engine_target()
51865174
if not isinstance(target, np.ndarray):
51875175
raise ValueError("_can_use_libjoin should return False.")
@@ -5194,7 +5182,7 @@ def _from_join_target(self, result: np.ndarray) -> ArrayLike:
51945182
"""
51955183
if isinstance(self.values, BaseMaskedArray):
51965184
return type(self.values)(result, np.zeros(result.shape, dtype=np.bool_))
5197-
elif isinstance(self.values, ArrowExtensionArray):
5185+
elif isinstance(self.values, (ArrowExtensionArray, StringArray)):
51985186
return type(self.values)._from_sequence(result)
51995187
return result
52005188

pandas/core/indexing.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -604,12 +604,12 @@ def at(self) -> _AtIndexer:
604604
Raises
605605
------
606606
KeyError
607-
* If getting a value and 'label' does not exist in a DataFrame or
608-
Series.
607+
If getting a value and 'label' does not exist in a DataFrame or Series.
608+
609609
ValueError
610-
* If row/column label pair is not a tuple or if any label from
611-
the pair is not a scalar for DataFrame.
612-
* If label is list-like (*excluding* NamedTuple) for Series.
610+
If row/column label pair is not a tuple or if any label
611+
from the pair is not a scalar for DataFrame.
612+
If label is list-like (*excluding* NamedTuple) for Series.
613613
614614
See Also
615615
--------

pandas/core/internals/blocks.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2008,7 +2008,7 @@ def fillna(
20082008
"need to implement this keyword or an exception will be "
20092009
"raised. In the interim, the keyword is ignored by "
20102010
f"{type(self.values).__name__}.",
2011-
FutureWarning,
2011+
DeprecationWarning,
20122012
stacklevel=find_stack_level(),
20132013
)
20142014

pandas/tests/extension/decimal/test_decimal.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators):
133133
def test_fillna_frame(self, data_missing):
134134
msg = "ExtensionArray.fillna added a 'copy' keyword"
135135
with tm.assert_produces_warning(
136-
FutureWarning, match=msg, check_stacklevel=False
136+
DeprecationWarning, match=msg, check_stacklevel=False
137137
):
138138
super().test_fillna_frame(data_missing)
139139

@@ -166,7 +166,7 @@ def test_fillna_no_op_returns_copy(self, data):
166166
def test_fillna_series(self, data_missing):
167167
msg = "ExtensionArray.fillna added a 'copy' keyword"
168168
with tm.assert_produces_warning(
169-
FutureWarning, match=msg, check_stacklevel=False
169+
DeprecationWarning, match=msg, check_stacklevel=False
170170
):
171171
super().test_fillna_series(data_missing)
172172

@@ -178,13 +178,13 @@ def test_fillna_series_method(self, data_missing, fillna_method):
178178
super().test_fillna_series_method(data_missing, fillna_method)
179179

180180
def test_fillna_copy_frame(self, data_missing, using_copy_on_write):
181-
warn = FutureWarning if not using_copy_on_write else None
181+
warn = DeprecationWarning if not using_copy_on_write else None
182182
msg = "ExtensionArray.fillna added a 'copy' keyword"
183183
with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
184184
super().test_fillna_copy_frame(data_missing)
185185

186186
def test_fillna_copy_series(self, data_missing, using_copy_on_write):
187-
warn = FutureWarning if not using_copy_on_write else None
187+
warn = DeprecationWarning if not using_copy_on_write else None
188188
msg = "ExtensionArray.fillna added a 'copy' keyword"
189189
with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
190190
super().test_fillna_copy_series(data_missing)

pandas/tests/frame/test_repr_info.py

+11
Original file line numberDiff line numberDiff line change
@@ -455,3 +455,14 @@ def test_masked_ea_with_formatter(self):
455455
0 0.12 1.00
456456
1 1.12 2.00"""
457457
assert result == expected
458+
459+
def test_repr_ea_columns(self, any_string_dtype):
460+
# GH#54797
461+
pytest.importorskip("pyarrow")
462+
df = DataFrame({"long_column_name": [1, 2, 3], "col2": [4, 5, 6]})
463+
df.columns = df.columns.astype(any_string_dtype)
464+
expected = """ long_column_name col2
465+
0 1 4
466+
1 2 5
467+
2 3 6"""
468+
assert repr(df) == expected

pandas/tests/indexes/test_setops.py

+7
Original file line numberDiff line numberDiff line change
@@ -899,3 +899,10 @@ def test_union_ea_dtypes(self, any_numeric_ea_and_arrow_dtype):
899899
result = idx.union(idx2)
900900
expected = Index([1, 2, 3, 4, 5], dtype=any_numeric_ea_and_arrow_dtype)
901901
tm.assert_index_equal(result, expected)
902+
903+
def test_union_string_array(self, any_string_dtype):
904+
idx1 = Index(["a"], dtype=any_string_dtype)
905+
idx2 = Index(["b"], dtype=any_string_dtype)
906+
result = idx1.union(idx2)
907+
expected = Index(["a", "b"], dtype=any_string_dtype)
908+
tm.assert_index_equal(result, expected)

0 commit comments

Comments
 (0)