Skip to content

Commit d1ccb52

Browse files
feat: Add inplace arg support to sort methods (#1710)
1 parent 3f79528 commit d1ccb52

File tree

9 files changed

+226
-24
lines changed

9 files changed

+226
-24
lines changed

bigframes/core/indexes/base.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,13 @@ def _memory_usage(self) -> int:
298298
def transpose(self) -> Index:
299299
return self
300300

301-
def sort_values(self, *, ascending: bool = True, na_position: str = "last"):
301+
def sort_values(
302+
self,
303+
*,
304+
inplace: bool = False,
305+
ascending: bool = True,
306+
na_position: str = "last",
307+
) -> Index:
302308
if na_position not in ["first", "last"]:
303309
raise ValueError("Param na_position must be one of 'first' or 'last'")
304310
na_last = na_position == "last"

bigframes/dataframe.py

Lines changed: 64 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2234,10 +2234,34 @@ def set_index(
22342234
col_ids_strs: List[str] = [col_id for col_id in col_ids if col_id is not None]
22352235
return DataFrame(self._block.set_index(col_ids_strs, append=append, drop=drop))
22362236

2237-
@validations.requires_index
2237+
@overload # type: ignore[override]
22382238
def sort_index(
2239-
self, ascending: bool = True, na_position: Literal["first", "last"] = "last"
2239+
self,
2240+
*,
2241+
ascending: bool = ...,
2242+
inplace: Literal[False] = ...,
2243+
na_position: Literal["first", "last"] = ...,
22402244
) -> DataFrame:
2245+
...
2246+
2247+
@overload
2248+
def sort_index(
2249+
self,
2250+
*,
2251+
ascending: bool = ...,
2252+
inplace: Literal[True] = ...,
2253+
na_position: Literal["first", "last"] = ...,
2254+
) -> None:
2255+
...
2256+
2257+
@validations.requires_index
2258+
def sort_index(
2259+
self,
2260+
*,
2261+
ascending: bool = True,
2262+
inplace: bool = False,
2263+
na_position: Literal["first", "last"] = "last",
2264+
) -> Optional[DataFrame]:
22412265
if na_position not in ["first", "last"]:
22422266
raise ValueError("Param na_position must be one of 'first' or 'last'")
22432267
na_last = na_position == "last"
@@ -2248,16 +2272,46 @@ def sort_index(
22482272
else order.descending_over(column, na_last)
22492273
for column in index_columns
22502274
]
2251-
return DataFrame(self._block.order_by(ordering))
2275+
block = self._block.order_by(ordering)
2276+
if inplace:
2277+
self._set_block(block)
2278+
return None
2279+
else:
2280+
return DataFrame(block)
22522281

2282+
@overload # type: ignore[override]
22532283
def sort_values(
22542284
self,
22552285
by: str | typing.Sequence[str],
22562286
*,
2287+
inplace: Literal[False] = ...,
2288+
ascending: bool | typing.Sequence[bool] = ...,
2289+
kind: str = ...,
2290+
na_position: typing.Literal["first", "last"] = ...,
2291+
) -> DataFrame:
2292+
...
2293+
2294+
@overload
2295+
def sort_values(
2296+
self,
2297+
by: str | typing.Sequence[str],
2298+
*,
2299+
inplace: Literal[True] = ...,
2300+
ascending: bool | typing.Sequence[bool] = ...,
2301+
kind: str = ...,
2302+
na_position: typing.Literal["first", "last"] = ...,
2303+
) -> None:
2304+
...
2305+
2306+
def sort_values(
2307+
self,
2308+
by: str | typing.Sequence[str],
2309+
*,
2310+
inplace: bool = False,
22572311
ascending: bool | typing.Sequence[bool] = True,
22582312
kind: str = "quicksort",
22592313
na_position: typing.Literal["first", "last"] = "last",
2260-
) -> DataFrame:
2314+
) -> Optional[DataFrame]:
22612315
if isinstance(by, (bigframes.series.Series, indexes.Index, DataFrame)):
22622316
raise KeyError(
22632317
f"Invalid key type: {type(by).__name__}. Please provide valid column name(s)."
@@ -2287,7 +2341,12 @@ def sort_values(
22872341
if is_ascending
22882342
else order.descending_over(column_id, na_last)
22892343
)
2290-
return DataFrame(self._block.order_by(ordering))
2344+
block = self._block.order_by(ordering)
2345+
if inplace:
2346+
self._set_block(block)
2347+
return None
2348+
else:
2349+
return DataFrame(block)
22912350

22922351
def eval(self, expr: str) -> DataFrame:
22932352
import bigframes.core.eval as bf_eval

bigframes/ml/metrics/_metrics.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ def recall_score(
240240
unique_labels = (
241241
bpd.concat([y_true_series, y_pred_series], join="outer")
242242
.drop_duplicates()
243-
.sort_values()
243+
.sort_values(inplace=False)
244244
)
245245
index = unique_labels.to_list()
246246

@@ -277,7 +277,7 @@ def precision_score(
277277
unique_labels = (
278278
bpd.concat([y_true_series, y_pred_series], join="outer")
279279
.drop_duplicates()
280-
.sort_values()
280+
.sort_values(inplace=False)
281281
)
282282
index = unique_labels.to_list()
283283

bigframes/series.py

Lines changed: 56 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1538,9 +1538,39 @@ def value_counts(
15381538
)
15391539
return Series(block)
15401540

1541+
@typing.overload # type: ignore[override]
15411542
def sort_values(
1542-
self, *, axis=0, ascending=True, kind: str = "quicksort", na_position="last"
1543+
self,
1544+
*,
1545+
axis=...,
1546+
inplace: Literal[True] = ...,
1547+
ascending: bool | typing.Sequence[bool] = ...,
1548+
kind: str = ...,
1549+
na_position: typing.Literal["first", "last"] = ...,
1550+
) -> None:
1551+
...
1552+
1553+
@typing.overload
1554+
def sort_values(
1555+
self,
1556+
*,
1557+
axis=...,
1558+
inplace: Literal[False] = ...,
1559+
ascending: bool | typing.Sequence[bool] = ...,
1560+
kind: str = ...,
1561+
na_position: typing.Literal["first", "last"] = ...,
15431562
) -> Series:
1563+
...
1564+
1565+
def sort_values(
1566+
self,
1567+
*,
1568+
axis=0,
1569+
inplace: bool = False,
1570+
ascending=True,
1571+
kind: str = "quicksort",
1572+
na_position: typing.Literal["first", "last"] = "last",
1573+
) -> Optional[Series]:
15441574
if axis != 0 and axis != "index":
15451575
raise ValueError(f"No axis named {axis} for object type Series")
15461576
if na_position not in ["first", "last"]:
@@ -1552,10 +1582,28 @@ def sort_values(
15521582
else order.descending_over(self._value_column, (na_position == "last"))
15531583
],
15541584
)
1555-
return Series(block)
1585+
if inplace:
1586+
self._set_block(block)
1587+
return None
1588+
else:
1589+
return Series(block)
1590+
1591+
@typing.overload # type: ignore[override]
1592+
def sort_index(
1593+
self, *, axis=..., inplace: Literal[False] = ..., ascending=..., na_position=...
1594+
) -> Series:
1595+
...
1596+
1597+
@typing.overload
1598+
def sort_index(
1599+
self, *, axis=0, inplace: Literal[True] = ..., ascending=..., na_position=...
1600+
) -> None:
1601+
...
15561602

15571603
@validations.requires_index
1558-
def sort_index(self, *, axis=0, ascending=True, na_position="last") -> Series:
1604+
def sort_index(
1605+
self, *, axis=0, inplace: bool = False, ascending=True, na_position="last"
1606+
) -> Optional[Series]:
15591607
# TODO(tbergeron): Support level parameter once multi-index introduced.
15601608
if axis != 0 and axis != "index":
15611609
raise ValueError(f"No axis named {axis} for object type Series")
@@ -1570,7 +1618,11 @@ def sort_index(self, *, axis=0, ascending=True, na_position="last") -> Series:
15701618
for column in block.index_columns
15711619
]
15721620
block = block.order_by(ordering)
1573-
return Series(block)
1621+
if inplace:
1622+
self._set_block(block)
1623+
return None
1624+
else:
1625+
return Series(block)
15741626

15751627
@validations.requires_ordering()
15761628
def rolling(

tests/system/small/test_dataframe.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2035,6 +2035,17 @@ def test_sort_index(scalars_dfs, ascending, na_position):
20352035
pandas.testing.assert_frame_equal(bf_result, pd_result)
20362036

20372037

2038+
def test_dataframe_sort_index_inplace(scalars_dfs):
2039+
index_column = "int64_col"
2040+
scalars_df, scalars_pandas_df = scalars_dfs
2041+
df = scalars_df.copy().set_index(index_column)
2042+
df.sort_index(ascending=False, inplace=True)
2043+
bf_result = df.to_pandas()
2044+
2045+
pd_result = scalars_pandas_df.set_index(index_column).sort_index(ascending=False)
2046+
pandas.testing.assert_frame_equal(bf_result, pd_result)
2047+
2048+
20382049
def test_df_abs(scalars_dfs_maybe_ordered):
20392050
scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
20402051
columns = ["int64_col", "int64_too", "float64_col"]
@@ -2817,6 +2828,32 @@ def test_dataframe_sort_values(
28172828
)
28182829

28192830

2831+
@pytest.mark.parametrize(
2832+
("by", "ascending", "na_position"),
2833+
[
2834+
("int64_col", True, "first"),
2835+
(["bool_col", "int64_col"], True, "last"),
2836+
],
2837+
)
2838+
def test_dataframe_sort_values_inplace(
2839+
scalars_df_index, scalars_pandas_df_index, by, ascending, na_position
2840+
):
2841+
# Test needs values to be unique
2842+
bf_sorted = scalars_df_index.copy()
2843+
bf_sorted.sort_values(
2844+
by, ascending=ascending, na_position=na_position, inplace=True
2845+
)
2846+
bf_result = bf_sorted.to_pandas()
2847+
pd_result = scalars_pandas_df_index.sort_values(
2848+
by, ascending=ascending, na_position=na_position
2849+
)
2850+
2851+
pandas.testing.assert_frame_equal(
2852+
bf_result,
2853+
pd_result,
2854+
)
2855+
2856+
28202857
def test_dataframe_sort_values_invalid_input(scalars_df_index):
28212858
with pytest.raises(KeyError):
28222859
scalars_df_index.sort_values(by=scalars_df_index["int64_col"])

tests/system/small/test_series.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3339,6 +3339,19 @@ def test_sort_values(scalars_df_index, scalars_pandas_df_index, ascending, na_po
33393339
)
33403340

33413341

3342+
def test_series_sort_values_inplace(scalars_df_index, scalars_pandas_df_index):
3343+
# Test needs values to be unique
3344+
bf_series = scalars_df_index["int64_col"].copy()
3345+
bf_series.sort_values(ascending=False, inplace=True)
3346+
bf_result = bf_series.to_pandas()
3347+
pd_result = scalars_pandas_df_index["int64_col"].sort_values(ascending=False)
3348+
3349+
pd.testing.assert_series_equal(
3350+
bf_result,
3351+
pd_result,
3352+
)
3353+
3354+
33423355
@pytest.mark.parametrize(
33433356
("ascending"),
33443357
[
@@ -3358,6 +3371,18 @@ def test_sort_index(scalars_df_index, scalars_pandas_df_index, ascending):
33583371
)
33593372

33603373

3374+
def test_series_sort_index_inplace(scalars_df_index, scalars_pandas_df_index):
3375+
bf_series = scalars_df_index["int64_too"].copy()
3376+
bf_series.sort_index(ascending=False, inplace=True)
3377+
bf_result = bf_series.to_pandas()
3378+
pd_result = scalars_pandas_df_index["int64_too"].sort_index(ascending=False)
3379+
3380+
pd.testing.assert_series_equal(
3381+
bf_result,
3382+
pd_result,
3383+
)
3384+
3385+
33613386
def test_mask_default_value(scalars_dfs):
33623387
scalars_df, scalars_pandas_df = scalars_dfs
33633388

tests/system/small/test_session.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,16 +96,17 @@ def test_read_gbq_tokyo(
9696
tokyo_location: str,
9797
):
9898
df = session_tokyo.read_gbq(scalars_table_tokyo, index_col=["rowindex"])
99-
result = df.sort_index().to_pandas()
99+
df.sort_index(inplace=True)
100100
expected = scalars_pandas_df_index
101101

102102
# use_explicit_destination=True, otherwise might use path with no query_job
103-
result = session_tokyo._executor.execute(
103+
exec_result = session_tokyo._executor.execute(
104104
df._block.expr, use_explicit_destination=True
105105
)
106-
assert result.query_job.location == tokyo_location
106+
assert exec_result.query_job is not None
107+
assert exec_result.query_job.location == tokyo_location
107108

108-
assert len(expected) == result.total_rows
109+
assert len(expected) == exec_result.total_rows
109110

110111

111112
@pytest.mark.parametrize(

third_party/bigframes_vendored/pandas/core/frame.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2213,10 +2213,11 @@ def sort_values(
22132213
self,
22142214
by: str | Sequence[str],
22152215
*,
2216+
inplace: bool = False,
22162217
ascending: bool | Sequence[bool] = True,
22172218
kind: str = "quicksort",
2218-
na_position="last",
2219-
) -> DataFrame:
2219+
na_position: Literal["first", "last"] = "last",
2220+
):
22202221
"""Sort by the values along row axis.
22212222
22222223
**Examples:**
@@ -2300,6 +2301,8 @@ def sort_values(
23002301
Sort ascending vs. descending. Specify list for multiple sort
23012302
orders. If this is a list of bools, must match the length of
23022303
the by.
2304+
inplace (bool, default False):
2305+
If True, perform operation in-place.
23032306
kind (str, default 'quicksort'):
23042307
Choice of sorting algorithm. Accepts 'quicksort', 'mergesort',
23052308
'heapsort', 'stable'. Ignored except when determining whether to
@@ -2309,8 +2312,8 @@ def sort_values(
23092312
if `first`; `last` puts NaNs at the end.
23102313
23112314
Returns:
2312-
bigframes.pandas.DataFrame:
2313-
DataFrame with sorted values.
2315+
bigframes.pandas.DataFram or None:
2316+
DataFrame with sorted values or None if inplace=True.
23142317
23152318
Raises:
23162319
ValueError:
@@ -2320,12 +2323,25 @@ def sort_values(
23202323

23212324
def sort_index(
23222325
self,
2323-
) -> DataFrame:
2326+
*,
2327+
ascending: bool = True,
2328+
inplace: bool = False,
2329+
na_position: Literal["first", "last"] = "last",
2330+
):
23242331
"""Sort object by labels (along an axis).
23252332
2333+
Args:
2334+
ascending (bool, default True)
2335+
Sort ascending vs. descending.
2336+
inplace (bool, default False):
2337+
Whether to modify the DataFrame rather than creating a new one.
2338+
na_position ({'first', 'last'}, default 'last'):
2339+
Puts NaNs at the beginning if `first`; `last` puts NaNs at the end.
2340+
Not implemented for MultiIndex.
2341+
23262342
Returns:
23272343
bigframes.pandas.DataFrame:
2328-
The original DataFrame sorted by the labels.
2344+
DataFrame with sorted values or None if inplace=True.
23292345
23302346
Raises:
23312347
ValueError:

0 commit comments

Comments
 (0)