From 8f57ee79b859a0852b6a301b8440f4a1c060b024 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Thu, 8 May 2025 17:11:51 +0000
Subject: [PATCH 1/2] feat: Add inplace arg support to sort methods

---
 bigframes/core/indexes/base.py                |  8 ++-
 bigframes/dataframe.py                        | 69 +++++++++++++++++--
 bigframes/series.py                           | 60 ++++++++++++++--
 tests/system/small/test_dataframe.py          | 37 ++++++++++
 tests/system/small/test_series.py             | 25 +++++++
 .../bigframes_vendored/pandas/core/frame.py   | 26 +++++--
 .../bigframes_vendored/pandas/core/series.py  |  6 ++
 7 files changed, 216 insertions(+), 15 deletions(-)

diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py
index 71dc914ed4..eac1f58eae 100644
--- a/bigframes/core/indexes/base.py
+++ b/bigframes/core/indexes/base.py
@@ -298,7 +298,13 @@ def _memory_usage(self) -> int:
     def transpose(self) -> Index:
         return self
 
-    def sort_values(self, *, ascending: bool = True, na_position: str = "last"):
+    def sort_values(
+        self,
+        *,
+        inplace: bool = False,
+        ascending: bool = True,
+        na_position: str = "last",
+    ) -> Index:
         if na_position not in ["first", "last"]:
             raise ValueError("Param na_position must be one of 'first' or 'last'")
         na_last = na_position == "last"
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 8e9794a5e3..d520a4a36f 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -2234,10 +2234,34 @@ def set_index(
         col_ids_strs: List[str] = [col_id for col_id in col_ids if col_id is not None]
         return DataFrame(self._block.set_index(col_ids_strs, append=append, drop=drop))
 
-    @validations.requires_index
+    @overload
     def sort_index(
-        self, ascending: bool = True, na_position: Literal["first", "last"] = "last"
+        self,
+        *,
+        ascending: bool = ...,
+        inplace: Literal[False] = ...,
+        na_position: Literal["first", "last"] = ...,
     ) -> DataFrame:
+        ...
+
+    @overload
+    def sort_index(
+        self,
+        *,
+        ascending: bool = ...,
+        inplace: Literal[True] = ...,
+        na_position: Literal["first", "last"] = ...,
+    ) -> None:
+        ...
+
+    @validations.requires_index
+    def sort_index(
+        self,
+        *,
+        ascending: bool = True,
+        inplace: bool = False,
+        na_position: Literal["first", "last"] = "last",
+    ) -> Optional[DataFrame]:
         if na_position not in ["first", "last"]:
             raise ValueError("Param na_position must be one of 'first' or 'last'")
         na_last = na_position == "last"
@@ -2248,16 +2272,46 @@ def sort_index(
             else order.descending_over(column, na_last)
             for column in index_columns
         ]
-        return DataFrame(self._block.order_by(ordering))
+        block = self._block.order_by(ordering)
+        if inplace:
+            self._set_block(block)
+            return None
+        else:
+            return DataFrame(block)
 
+    @overload
     def sort_values(
         self,
         by: str | typing.Sequence[str],
         *,
+        inplace: Literal[False] = ...,
+        ascending: bool | typing.Sequence[bool] = ...,
+        kind: str = ...,
+        na_position: typing.Literal["first", "last"] = ...,
+    ) -> DataFrame:
+        ...
+
+    @overload
+    def sort_values(
+        self,
+        by: str | typing.Sequence[str],
+        *,
+        inplace: Literal[True] = ...,
+        ascending: bool | typing.Sequence[bool] = ...,
+        kind: str = ...,
+        na_position: typing.Literal["first", "last"] = ...,
+    ) -> None:
+        ...
+
+    def sort_values(
+        self,
+        by: str | typing.Sequence[str],
+        *,
+        inplace: bool = False,
         ascending: bool | typing.Sequence[bool] = True,
         kind: str = "quicksort",
         na_position: typing.Literal["first", "last"] = "last",
-    ) -> DataFrame:
+    ) -> Optional[DataFrame]:
         if isinstance(by, (bigframes.series.Series, indexes.Index, DataFrame)):
             raise KeyError(
                 f"Invalid key type: {type(by).__name__}. Please provide valid column name(s)."
@@ -2287,7 +2341,12 @@ def sort_values(
                 if is_ascending
                 else order.descending_over(column_id, na_last)
             )
-        return DataFrame(self._block.order_by(ordering))
+        block = self._block.order_by(ordering)
+        if inplace:
+            self._set_block(block)
+            return None
+        else:
+            return DataFrame(block)
 
     def eval(self, expr: str) -> DataFrame:
         import bigframes.core.eval as bf_eval
diff --git a/bigframes/series.py b/bigframes/series.py
index 37a3723a0a..7b69d0176a 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -1538,9 +1538,39 @@ def value_counts(
         )
         return Series(block)
 
+    @typing.overload
     def sort_values(
-        self, *, axis=0, ascending=True, kind: str = "quicksort", na_position="last"
+        self,
+        *,
+        axis=...,
+        inplace: Literal[True] = ...,
+        ascending: bool | typing.Sequence[bool] = ...,
+        kind: str = ...,
+        na_position: typing.Literal["first", "last"] = ...,
+    ) -> None:
+        ...
+
+    @typing.overload
+    def sort_values(
+        self,
+        *,
+        axis=...,
+        inplace: Literal[False] = ...,
+        ascending: bool | typing.Sequence[bool] = ...,
+        kind: str = ...,
+        na_position: typing.Literal["first", "last"] = ...,
     ) -> Series:
+        ...
+
+    def sort_values(
+        self,
+        *,
+        axis=0,
+        inplace: bool = False,
+        ascending=True,
+        kind: str = "quicksort",
+        na_position: typing.Literal["first", "last"] = "last",
+    ) -> Optional[Series]:
         if axis != 0 and axis != "index":
             raise ValueError(f"No axis named {axis} for object type Series")
         if na_position not in ["first", "last"]:
@@ -1552,10 +1582,28 @@ def sort_values(
                 else order.descending_over(self._value_column, (na_position == "last"))
             ],
         )
-        return Series(block)
+        if inplace:
+            self._set_block(block)
+            return None
+        else:
+            return Series(block)
+
+    @typing.overload
+    def sort_index(
+        self, *, axis=..., inplace: Literal[False] = ..., ascending=..., na_position=...
+    ) -> Series:
+        ...
+
+    @typing.overload
+    def sort_index(
+        self, *, axis=0, inplace: Literal[True] = ..., ascending=..., na_position=...
+    ) -> None:
+        ...
 
     @validations.requires_index
-    def sort_index(self, *, axis=0, ascending=True, na_position="last") -> Series:
+    def sort_index(
+        self, *, axis=0, inplace: bool = False, ascending=True, na_position="last"
+    ) -> Optional[Series]:
         # TODO(tbergeron): Support level parameter once multi-index introduced.
         if axis != 0 and axis != "index":
             raise ValueError(f"No axis named {axis} for object type Series")
@@ -1570,7 +1618,11 @@ def sort_index(self, *, axis=0, ascending=True, na_position="last") -> Series:
             for column in block.index_columns
         ]
         block = block.order_by(ordering)
-        return Series(block)
+        if inplace:
+            self._set_block(block)
+            return None
+        else:
+            return Series(block)
 
     @validations.requires_ordering()
     def rolling(
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 362d736aeb..ce291d4999 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -2035,6 +2035,17 @@ def test_sort_index(scalars_dfs, ascending, na_position):
     pandas.testing.assert_frame_equal(bf_result, pd_result)
 
 
+def test_dataframe_sort_index_inplace(scalars_dfs):
+    index_column = "int64_col"
+    scalars_df, scalars_pandas_df = scalars_dfs
+    df = scalars_df.copy().set_index(index_column)
+    df.sort_index(ascending=False, inplace=True)
+    bf_result = df.to_pandas()
+
+    pd_result = scalars_pandas_df.set_index(index_column).sort_index(ascending=False)
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
 def test_df_abs(scalars_dfs_maybe_ordered):
     scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
     columns = ["int64_col", "int64_too", "float64_col"]
@@ -2817,6 +2828,32 @@ def test_dataframe_sort_values(
     )
 
 
+@pytest.mark.parametrize(
+    ("by", "ascending", "na_position"),
+    [
+        ("int64_col", True, "first"),
+        (["bool_col", "int64_col"], True, "last"),
+    ],
+)
+def test_dataframe_sort_values_inplace(
+    scalars_df_index, scalars_pandas_df_index, by, ascending, na_position
+):
+    # Test needs values to be unique
+    bf_sorted = scalars_df_index.copy()
+    bf_sorted.sort_values(
+        by, ascending=ascending, na_position=na_position, inplace=True
+    )
+    bf_result = bf_sorted.to_pandas()
+    pd_result = scalars_pandas_df_index.sort_values(
+        by, ascending=ascending, na_position=na_position
+    )
+
+    pandas.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
 def test_dataframe_sort_values_invalid_input(scalars_df_index):
     with pytest.raises(KeyError):
         scalars_df_index.sort_values(by=scalars_df_index["int64_col"])
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index c63bf8e12b..3852c417fa 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -3339,6 +3339,19 @@ def test_sort_values(scalars_df_index, scalars_pandas_df_index, ascending, na_po
     )
 
 
+def test_series_sort_values_inplace(scalars_df_index, scalars_pandas_df_index):
+    # Test needs values to be unique
+    bf_series = scalars_df_index["int64_col"].copy()
+    bf_series.sort_values(ascending=False, inplace=True)
+    bf_result = bf_series.to_pandas()
+    pd_result = scalars_pandas_df_index["int64_col"].sort_values(ascending=False)
+
+    pd.testing.assert_series_equal(
+        bf_result,
+        pd_result,
+    )
+
+
 @pytest.mark.parametrize(
     ("ascending"),
     [
@@ -3358,6 +3371,18 @@ def test_sort_index(scalars_df_index, scalars_pandas_df_index, ascending):
     )
 
 
+def test_series_sort_index_inplace(scalars_df_index, scalars_pandas_df_index):
+    bf_series = scalars_df_index["int64_too"].copy()
+    bf_series.sort_index(ascending=False, inplace=True)
+    bf_result = bf_series.to_pandas()
+    pd_result = scalars_pandas_df_index["int64_too"].sort_index(ascending=False)
+
+    pd.testing.assert_series_equal(
+        bf_result,
+        pd_result,
+    )
+
+
 def test_mask_default_value(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
 
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 8f3e150606..15b318fbda 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -2213,10 +2213,11 @@ def sort_values(
         self,
         by: str | Sequence[str],
         *,
+        inplace: bool = False,
         ascending: bool | Sequence[bool] = True,
         kind: str = "quicksort",
         na_position="last",
-    ) -> DataFrame:
+    ) -> Optional[DataFrame]:
         """Sort by the values along row axis.
 
         **Examples:**
@@ -2300,6 +2301,8 @@ def sort_values(
                 Sort ascending vs. descending. Specify list for multiple sort
                 orders.  If this is a list of bools, must match the length of
                 the by.
+            inplace (bool, default False):
+                If True, perform operation in-place.
             kind (str, default 'quicksort'):
                 Choice of sorting algorithm. Accepts 'quicksort', 'mergesort',
                 'heapsort', 'stable'. Ignored except when determining whether to
@@ -2309,8 +2312,8 @@ def sort_values(
              if `first`; `last` puts NaNs at the end.
 
         Returns:
-            bigframes.pandas.DataFrame:
-                DataFrame with sorted values.
+            bigframes.pandas.DataFram or None:
+                DataFrame with sorted values or None if inplace=True.
 
         Raises:
             ValueError:
@@ -2320,12 +2323,25 @@ def sort_values(
 
     def sort_index(
         self,
-    ) -> DataFrame:
+        *,
+        ascending: bool = True,
+        inplace: bool = False,
+        na_position: Literal["first", "last"] = "last",
+    ) -> Optional[DataFrame]:
         """Sort object by labels (along an axis).
 
+        Args:
+            ascending (bool, default True)
+                Sort ascending vs. descending.
+            inplace (bool, default False):
+                Whether to modify the DataFrame rather than creating a new one.
+            na_position ({'first', 'last'}, default 'last'):
+                Puts NaNs at the beginning if `first`; `last` puts NaNs at the end.
+                Not implemented for MultiIndex.
+
         Returns:
             bigframes.pandas.DataFrame:
-                The original DataFrame sorted by the labels.
+                DataFrame with sorted values or None if inplace=True.
 
         Raises:
             ValueError:
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index a2d0983652..1d42a8fca2 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -1504,6 +1504,7 @@ def sort_values(
         self,
         *,
         axis: Axis = 0,
+        inplace: bool = False,
         ascending: bool | int | Sequence[bool] | Sequence[int] = True,
         kind: str = "quicksort",
         na_position: str = "last",
@@ -1581,6 +1582,8 @@ def sort_values(
         Args:
             axis (0 or 'index'):
                 Unused. Parameter needed for compatibility with DataFrame.
+            inplace (bool, default False):
+                Whether to modify the Series rather than creating a new one.
             ascending (bool or list of bools, default True):
                 If True, sort values in ascending order, otherwise descending.
             kind (str, default to 'quicksort'):
@@ -1601,6 +1604,7 @@ def sort_index(
         self,
         *,
         axis: Axis = 0,
+        inplace: bool = False,
         ascending: bool | Sequence[bool] = True,
         na_position: NaPosition = "last",
     ) -> Series | None:
@@ -1647,6 +1651,8 @@ def sort_index(
         Args:
             axis ({0 or 'index'}):
                 Unused. Parameter needed for compatibility with DataFrame.
+            inplace (bool, default False):
+                Whether to modify the Series rather than creating a new one.
             ascending (bool or list-like of bools, default True):
                 Sort ascending vs. descending. When the index is a MultiIndex the
                 sort direction can be controlled for each level individually.

From 4f08fc7f5fee9b6d603b51d5bc057a6507e65b97 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Thu, 8 May 2025 17:51:40 +0000
Subject: [PATCH 2/2] mypy overloads

---
 bigframes/dataframe.py                               | 4 ++--
 bigframes/ml/metrics/_metrics.py                     | 4 ++--
 bigframes/series.py                                  | 4 ++--
 tests/system/small/test_session.py                   | 9 +++++----
 third_party/bigframes_vendored/pandas/core/frame.py  | 6 +++---
 third_party/bigframes_vendored/pandas/core/series.py | 4 ++--
 6 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index d520a4a36f..8ed749138c 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -2234,7 +2234,7 @@ def set_index(
         col_ids_strs: List[str] = [col_id for col_id in col_ids if col_id is not None]
         return DataFrame(self._block.set_index(col_ids_strs, append=append, drop=drop))
 
-    @overload
+    @overload  # type: ignore[override]
     def sort_index(
         self,
         *,
@@ -2279,7 +2279,7 @@ def sort_index(
         else:
             return DataFrame(block)
 
-    @overload
+    @overload  # type: ignore[override]
     def sort_values(
         self,
         by: str | typing.Sequence[str],
diff --git a/bigframes/ml/metrics/_metrics.py b/bigframes/ml/metrics/_metrics.py
index 658818b261..d7591ef011 100644
--- a/bigframes/ml/metrics/_metrics.py
+++ b/bigframes/ml/metrics/_metrics.py
@@ -240,7 +240,7 @@ def recall_score(
     unique_labels = (
         bpd.concat([y_true_series, y_pred_series], join="outer")
         .drop_duplicates()
-        .sort_values()
+        .sort_values(inplace=False)
     )
     index = unique_labels.to_list()
 
@@ -277,7 +277,7 @@ def precision_score(
     unique_labels = (
         bpd.concat([y_true_series, y_pred_series], join="outer")
         .drop_duplicates()
-        .sort_values()
+        .sort_values(inplace=False)
     )
     index = unique_labels.to_list()
 
diff --git a/bigframes/series.py b/bigframes/series.py
index 7b69d0176a..1e29671310 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -1538,7 +1538,7 @@ def value_counts(
         )
         return Series(block)
 
-    @typing.overload
+    @typing.overload  # type: ignore[override]
     def sort_values(
         self,
         *,
@@ -1588,7 +1588,7 @@ def sort_values(
         else:
             return Series(block)
 
-    @typing.overload
+    @typing.overload  # type: ignore[override]
     def sort_index(
         self, *, axis=..., inplace: Literal[False] = ..., ascending=..., na_position=...
     ) -> Series:
diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py
index ab460d5bc9..eeb242e8da 100644
--- a/tests/system/small/test_session.py
+++ b/tests/system/small/test_session.py
@@ -96,16 +96,17 @@ def test_read_gbq_tokyo(
     tokyo_location: str,
 ):
     df = session_tokyo.read_gbq(scalars_table_tokyo, index_col=["rowindex"])
-    result = df.sort_index().to_pandas()
+    df.sort_index(inplace=True)
     expected = scalars_pandas_df_index
 
     # use_explicit_destination=True, otherwise might use path with no query_job
-    result = session_tokyo._executor.execute(
+    exec_result = session_tokyo._executor.execute(
         df._block.expr, use_explicit_destination=True
     )
-    assert result.query_job.location == tokyo_location
+    assert exec_result.query_job is not None
+    assert exec_result.query_job.location == tokyo_location
 
-    assert len(expected) == result.total_rows
+    assert len(expected) == exec_result.total_rows
 
 
 @pytest.mark.parametrize(
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 15b318fbda..5bbf72b421 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -2216,8 +2216,8 @@ def sort_values(
         inplace: bool = False,
         ascending: bool | Sequence[bool] = True,
         kind: str = "quicksort",
-        na_position="last",
-    ) -> Optional[DataFrame]:
+        na_position: Literal["first", "last"] = "last",
+    ):
         """Sort by the values along row axis.
 
         **Examples:**
@@ -2327,7 +2327,7 @@ def sort_index(
         ascending: bool = True,
         inplace: bool = False,
         na_position: Literal["first", "last"] = "last",
-    ) -> Optional[DataFrame]:
+    ):
         """Sort object by labels (along an axis).
 
         Args:
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index 1d42a8fca2..8164fa7415 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -1508,7 +1508,7 @@ def sort_values(
         ascending: bool | int | Sequence[bool] | Sequence[int] = True,
         kind: str = "quicksort",
         na_position: str = "last",
-    ) -> Series | None:
+    ):
         """
         Sort by the values.
 
@@ -1607,7 +1607,7 @@ def sort_index(
         inplace: bool = False,
         ascending: bool | Sequence[bool] = True,
         na_position: NaPosition = "last",
-    ) -> Series | None:
+    ):
         """
         Sort Series by index labels.