Skip to content

Commit afa6da3

Browse files
committed
Merge remote-tracking branch 'upstream/master' into styler_format_index
2 parents 1a32d17 + 5018d20 commit afa6da3

File tree

13 files changed

+150
-71
lines changed

13 files changed

+150
-71
lines changed

doc/source/whatsnew/v1.3.3.rst

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ Fixed regressions
2222
- Fixed regression in :meth:`merge` where ``on`` columns with ``ExtensionDtype`` or ``bool`` data types were cast to ``object`` in ``right`` and ``outer`` merge (:issue:`40073`)
2323
- Fixed regression in :meth:`RangeIndex.where` and :meth:`RangeIndex.putmask` raising ``AssertionError`` when result did not represent a :class:`RangeIndex` (:issue:`43240`)
2424
- Fixed regression in :meth:`read_parquet` where the ``fastparquet`` engine would not work properly with fastparquet 0.7.0 (:issue:`43075`)
25+
- Fixed regression in :meth:`DataFrame.loc.__setitem__` raising ``ValueError`` when setting array as cell value (:issue:`43422`)
2526
- Fixed regression in :func:`is_list_like` where objects with ``__iter__`` set to ``None`` would be identified as iterable (:issue:`43373`)
2627
- Fixed regression in :meth:`.Resampler.aggregate` when used after column selection would raise if ``func`` is a list of aggregation functions (:issue:`42905`)
2728
- Fixed regression in :meth:`DataFrame.corr` where Kendall correlation would produce incorrect results for columns with repeated values (:issue:`43401`)

doc/source/whatsnew/v1.4.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,7 @@ Indexing
361361
- Bug in :meth:`Index.get_indexer_non_unique` when index contains multiple ``np.nan`` (:issue:`35392`)
362362
- Bug in :meth:`DataFrame.query` did not handle the degree sign in a backticked column name, such as \`Temp(°C)\`, used in an expression to query a dataframe (:issue:`42826`)
363363
- Bug in :meth:`DataFrame.drop` where the error message did not show missing labels with commas when raising ``KeyError`` (:issue:`42881`)
364-
-
364+
- Bug in :meth:`DataFrame.nlargest` and :meth:`Series.nlargest` where sorted result did not count indexes containing ``np.nan`` (:issue:`28984`)
365365

366366

367367
Missing
@@ -419,6 +419,7 @@ Reshaping
419419
- :func:`concat` creating :class:`MultiIndex` with duplicate level entries when concatenating a :class:`DataFrame` with duplicates in :class:`Index` and multiple keys (:issue:`42651`)
420420
- Bug in :meth:`pandas.cut` on :class:`Series` with duplicate indices (:issue:`42185`) and non-exact :meth:`pandas.CategoricalIndex` (:issue:`42425`)
421421
- Bug in :meth:`DataFrame.append` failing to retain dtypes when appended columns do not match (:issue:`43392`)
422+
- Bug in :func:`concat` of ``bool`` and ``boolean`` dtypes resulting in ``object`` dtype instead of ``boolean`` dtype (:issue:`42800`)
422423
-
423424

424425
Sparse

pandas/core/algorithms.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -1252,6 +1252,8 @@ class SelectNSeries(SelectN):
12521252

12531253
def compute(self, method: str) -> Series:
12541254

1255+
from pandas.core.reshape.concat import concat
1256+
12551257
n = self.n
12561258
dtype = self.obj.dtype
12571259
if not self.is_valid_dtype_n_method(dtype):
@@ -1261,6 +1263,7 @@ def compute(self, method: str) -> Series:
12611263
return self.obj[[]]
12621264

12631265
dropped = self.obj.dropna()
1266+
nan_index = self.obj.drop(dropped.index)
12641267

12651268
if is_extension_array_dtype(dropped.dtype):
12661269
# GH#41816 bc we have dropped NAs above, MaskedArrays can use the
@@ -1277,7 +1280,7 @@ def compute(self, method: str) -> Series:
12771280
# slow method
12781281
if n >= len(self.obj):
12791282
ascending = method == "nsmallest"
1280-
return dropped.sort_values(ascending=ascending).head(n)
1283+
return self.obj.sort_values(ascending=ascending).head(n)
12811284

12821285
# fast method
12831286
new_dtype = dropped.dtype
@@ -1295,6 +1298,8 @@ def compute(self, method: str) -> Series:
12951298
if self.keep == "last":
12961299
arr = arr[::-1]
12971300

1301+
nbase = n
1302+
findex = len(self.obj)
12981303
narr = len(arr)
12991304
n = min(n, narr)
13001305

@@ -1306,12 +1311,13 @@ def compute(self, method: str) -> Series:
13061311

13071312
if self.keep != "all":
13081313
inds = inds[:n]
1314+
findex = nbase
13091315

13101316
if self.keep == "last":
13111317
# reverse indices
13121318
inds = narr - 1 - inds
13131319

1314-
return dropped.iloc[inds]
1320+
return concat([dropped.iloc[inds], nan_index]).iloc[:findex]
13151321

13161322

13171323
class SelectNFrame(SelectN):

pandas/core/arrays/boolean.py

+13
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
ArrayLike,
1818
AstypeArg,
1919
Dtype,
20+
DtypeObj,
2021
npt,
2122
type_t,
2223
)
@@ -153,6 +154,18 @@ def __from_arrow__(
153154
else:
154155
return BooleanArray._concat_same_type(results)
155156

157+
def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
158+
# Handle only boolean + np.bool_ -> boolean, since other cases like
159+
# Int64 + boolean -> Int64 will be handled by the other type
160+
if all(
161+
isinstance(t, BooleanDtype)
162+
or (isinstance(t, np.dtype) and (np.issubdtype(t, np.bool_)))
163+
for t in dtypes
164+
):
165+
return BooleanDtype()
166+
else:
167+
return None
168+
156169

157170
def coerce_to_array(
158171
values, mask=None, copy: bool = False

pandas/core/dtypes/cast.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -2185,6 +2185,11 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool:
21852185
# ExtensionBlock._can_hold_element
21862186
return True
21872187

2188+
# error: Non-overlapping equality check (left operand type: "dtype[Any]", right
2189+
# operand type: "Type[object]")
2190+
if dtype == object: # type: ignore[comparison-overlap]
2191+
return True
2192+
21882193
tipo = maybe_infer_dtype_type(element)
21892194

21902195
if dtype.kind in ["i", "u"]:
@@ -2232,11 +2237,6 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool:
22322237
return tipo.kind == "b"
22332238
return lib.is_bool(element)
22342239

2235-
# error: Non-overlapping equality check (left operand type: "dtype[Any]", right
2236-
# operand type: "Type[object]")
2237-
elif dtype == object: # type: ignore[comparison-overlap]
2238-
return True
2239-
22402240
elif dtype.kind == "S":
22412241
# TODO: test tests.frame.methods.test_replace tests get here,
22422242
# need more targeted tests. xref phofl has a PR about this

pandas/core/groupby/generic.py

+15-60
Original file line numberDiff line numberDiff line change
@@ -354,35 +354,17 @@ def array_func(values: ArrayLike) -> ArrayLike:
354354
)
355355
return self._reindex_output(ser)
356356

357-
def _wrap_aggregated_output(
358-
self,
359-
output: Mapping[base.OutputKey, Series | ArrayLike],
357+
def _indexed_output_to_ndframe(
358+
self, output: Mapping[base.OutputKey, ArrayLike]
360359
) -> Series:
361360
"""
362-
Wraps the output of a SeriesGroupBy aggregation into the expected result.
363-
364-
Parameters
365-
----------
366-
output : Mapping[base.OutputKey, Union[Series, ArrayLike]]
367-
Data to wrap.
368-
369-
Returns
370-
-------
371-
Series
372-
373-
Notes
374-
-----
375-
In the vast majority of cases output will only contain one element.
376-
The exception is operations that expand dimensions, like ohlc.
361+
Wrap the dict result of a GroupBy aggregation into a Series.
377362
"""
378363
assert len(output) == 1
379-
380-
name = self.obj.name
381-
index = self.grouper.result_index
382364
values = next(iter(output.values()))
383-
384-
result = self.obj._constructor(values, index=index, name=name)
385-
return self._reindex_output(result)
365+
result = self.obj._constructor(values)
366+
result.name = self.obj.name
367+
return result
386368

387369
def _wrap_transformed_output(
388370
self, output: Mapping[base.OutputKey, Series | ArrayLike]
@@ -1614,46 +1596,19 @@ def _insert_inaxis_grouper_inplace(self, result: DataFrame) -> None:
16141596
if in_axis and name not in columns:
16151597
result.insert(0, name, lev)
16161598

1617-
def _wrap_aggregated_output(
1618-
self,
1619-
output: Mapping[base.OutputKey, Series | ArrayLike],
1599+
def _indexed_output_to_ndframe(
1600+
self, output: Mapping[base.OutputKey, ArrayLike]
16201601
) -> DataFrame:
16211602
"""
1622-
Wraps the output of DataFrameGroupBy aggregations into the expected result.
1623-
1624-
Parameters
1625-
----------
1626-
output : Mapping[base.OutputKey, Union[Series, np.ndarray]]
1627-
Data to wrap.
1628-
1629-
Returns
1630-
-------
1631-
DataFrame
1603+
Wrap the dict result of a GroupBy aggregation into a DataFrame.
16321604
"""
1633-
if isinstance(output, DataFrame):
1634-
result = output
1635-
else:
1636-
indexed_output = {key.position: val for key, val in output.items()}
1637-
columns = Index([key.label for key in output])
1638-
columns._set_names(self._obj_with_exclusions._get_axis(1 - self.axis).names)
1639-
1640-
result = self.obj._constructor(indexed_output)
1641-
result.columns = columns
1642-
1643-
if not self.as_index:
1644-
self._insert_inaxis_grouper_inplace(result)
1645-
result = result._consolidate()
1646-
else:
1647-
result.index = self.grouper.result_index
1648-
1649-
if self.axis == 1:
1650-
result = result.T
1651-
if result.index.equals(self.obj.index):
1652-
# Retain e.g. DatetimeIndex/TimedeltaIndex freq
1653-
result.index = self.obj.index.copy()
1654-
# TODO: Do this more systematically
1605+
indexed_output = {key.position: val for key, val in output.items()}
1606+
columns = Index([key.label for key in output])
1607+
columns._set_names(self._obj_with_exclusions._get_axis(1 - self.axis).names)
16551608

1656-
return self._reindex_output(result)
1609+
result = self.obj._constructor(indexed_output)
1610+
result.columns = columns
1611+
return result
16571612

16581613
def _wrap_transformed_output(
16591614
self, output: Mapping[base.OutputKey, Series | ArrayLike]

pandas/core/groupby/groupby.py

+46-1
Original file line numberDiff line numberDiff line change
@@ -1095,9 +1095,54 @@ def _set_result_index_ordered(
10951095

10961096
return result
10971097

1098-
def _wrap_aggregated_output(self, output: Mapping[base.OutputKey, ArrayLike]):
1098+
def _indexed_output_to_ndframe(
1099+
self, result: Mapping[base.OutputKey, ArrayLike]
1100+
) -> Series | DataFrame:
10991101
raise AbstractMethodError(self)
11001102

1103+
def _wrap_aggregated_output(
1104+
self, output: Series | DataFrame | Mapping[base.OutputKey, ArrayLike]
1105+
):
1106+
"""
1107+
Wraps the output of GroupBy aggregations into the expected result.
1108+
1109+
Parameters
1110+
----------
1111+
output : Series, DataFrame, or Mapping[base.OutputKey, ArrayLike]
1112+
Data to wrap.
1113+
1114+
Returns
1115+
-------
1116+
Series or DataFrame
1117+
"""
1118+
1119+
if isinstance(output, (Series, DataFrame)):
1120+
# We get here (for DataFrameGroupBy) if we used Manager.grouped_reduce,
1121+
# in which case our columns are already set correctly.
1122+
# ATM we do not get here for SeriesGroupBy; when we do, we will
1123+
# need to require that result.name already match self.obj.name
1124+
result = output
1125+
else:
1126+
result = self._indexed_output_to_ndframe(output)
1127+
1128+
if not self.as_index:
1129+
# `not self.as_index` is only relevant for DataFrameGroupBy,
1130+
# enforced in __init__
1131+
self._insert_inaxis_grouper_inplace(result)
1132+
result = result._consolidate()
1133+
else:
1134+
result.index = self.grouper.result_index
1135+
1136+
if self.axis == 1:
1137+
# Only relevant for DataFrameGroupBy, no-op for SeriesGroupBy
1138+
result = result.T
1139+
if result.index.equals(self.obj.index):
1140+
# Retain e.g. DatetimeIndex/TimedeltaIndex freq
1141+
result.index = self.obj.index.copy()
1142+
# TODO: Do this more systematically
1143+
1144+
return self._reindex_output(result)
1145+
11011146
def _wrap_transformed_output(self, output: Mapping[base.OutputKey, ArrayLike]):
11021147
raise AbstractMethodError(self)
11031148

pandas/tests/frame/indexing/test_indexing.py

+7
Original file line numberDiff line numberDiff line change
@@ -1206,6 +1206,13 @@ def test_getitem_interval_index_partial_indexing(self):
12061206
res = df.loc[:, 0.5]
12071207
tm.assert_series_equal(res, expected)
12081208

1209+
def test_setitem_array_as_cell_value(self):
1210+
# GH#43422
1211+
df = DataFrame(columns=["a", "b"], dtype=object)
1212+
df.loc[0] = {"a": np.zeros((2,)), "b": np.zeros((2, 2))}
1213+
expected = DataFrame({"a": [np.zeros((2,))], "b": [np.zeros((2, 2))]})
1214+
tm.assert_frame_equal(df, expected)
1215+
12091216

12101217
class TestDataFrameIndexingUInt64:
12111218
def test_setitem(self, uint64_frame):

pandas/tests/frame/methods/test_nlargest.py

+7
Original file line numberDiff line numberDiff line change
@@ -209,3 +209,10 @@ def test_nlargest_multiindex_column_lookup(self):
209209
result = df.nlargest(3, ("x", "b"))
210210
expected = df.iloc[[3, 2, 1]]
211211
tm.assert_frame_equal(result, expected)
212+
213+
def test_nlargest_nan(self):
214+
# GH#43060
215+
df = pd.DataFrame([np.nan, np.nan, 0, 1, 2, 3])
216+
result = df.nlargest(5, 0)
217+
expected = df.sort_values(0, ascending=False).head(5)
218+
tm.assert_frame_equal(result, expected)

pandas/tests/groupby/test_apply.py

+12
Original file line numberDiff line numberDiff line change
@@ -1145,3 +1145,15 @@ def test_doctest_example2():
11451145
{"B": [1.0, 0.0], "C": [2.0, 0.0]}, index=Index(["a", "b"], name="A")
11461146
)
11471147
tm.assert_frame_equal(result, expected)
1148+
1149+
1150+
@pytest.mark.parametrize("dropna", [True, False])
1151+
def test_apply_na(dropna):
1152+
# GH#28984
1153+
df = DataFrame(
1154+
{"grp": [1, 1, 2, 2], "y": [1, 0, 2, 5], "z": [1, 2, np.nan, np.nan]}
1155+
)
1156+
dfgrp = df.groupby("grp", dropna=dropna)
1157+
result = dfgrp.apply(lambda grp_df: grp_df.nlargest(1, "z"))
1158+
expected = dfgrp.apply(lambda x: x.sort_values("z", ascending=False).head(1))
1159+
tm.assert_frame_equal(result, expected)

pandas/tests/indexing/multiindex/test_multiindex.py

+10
Original file line numberDiff line numberDiff line change
@@ -119,3 +119,13 @@ def test_multiindex_complex(self):
119119
),
120120
)
121121
tm.assert_frame_equal(result, expected)
122+
123+
def test_rename_multiindex_with_duplicates(self):
124+
# GH 38015
125+
mi = MultiIndex.from_tuples([("A", "cat"), ("B", "cat"), ("B", "cat")])
126+
df = DataFrame(index=mi)
127+
df = df.rename(index={"A": "Apple"}, level=0)
128+
129+
mi2 = MultiIndex.from_tuples([("Apple", "cat"), ("B", "cat"), ("B", "cat")])
130+
expected = DataFrame(index=mi2)
131+
tm.assert_frame_equal(df, expected)

pandas/tests/reshape/concat/test_concat.py

+18
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,24 @@ def test_concat_preserves_extension_int64_dtype():
597597
tm.assert_frame_equal(result, expected)
598598

599599

600+
@pytest.mark.parametrize(
601+
"dtype1,dtype2,expected_dtype",
602+
[
603+
("bool", "bool", "bool"),
604+
("boolean", "bool", "boolean"),
605+
("bool", "boolean", "boolean"),
606+
("boolean", "boolean", "boolean"),
607+
],
608+
)
609+
def test_concat_bool_types(dtype1, dtype2, expected_dtype):
610+
# GH 42800
611+
ser1 = Series([True, False], dtype=dtype1)
612+
ser2 = Series([False, True], dtype=dtype2)
613+
result = concat([ser1, ser2], ignore_index=True)
614+
expected = Series([True, False, False, True], dtype=expected_dtype)
615+
tm.assert_series_equal(result, expected)
616+
617+
600618
@pytest.mark.parametrize(
601619
("keys", "integrity"),
602620
[

pandas/tests/series/methods/test_nlargest.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,12 @@ def test_nsmallest_nlargest(self, s_main_dtypes_split):
127127
def test_nlargest_misc(self):
128128

129129
ser = Series([3.0, np.nan, 1, 2, 5])
130-
tm.assert_series_equal(ser.nlargest(), ser.iloc[[4, 0, 3, 2]])
131-
tm.assert_series_equal(ser.nsmallest(), ser.iloc[[2, 3, 0, 4]])
130+
result = ser.nlargest()
131+
expected = ser.iloc[[4, 0, 3, 2, 1]]
132+
tm.assert_series_equal(result, expected)
133+
result = ser.nsmallest()
134+
expected = ser.iloc[[2, 3, 0, 4, 1]]
135+
tm.assert_series_equal(result, expected)
132136

133137
msg = 'keep must be either "first", "last"'
134138
with pytest.raises(ValueError, match=msg):

0 commit comments

Comments
 (0)