googleapis · TrevorBergeron · May 8, 2025 · May 8, 2025 · May 8, 2025 · chelsea-lin
@@ -298,7 +298,13 @@ def _memory_usage(self) -> int:
     def transpose(self) -> Index:
         return self
 
-    def sort_values(self, *, ascending: bool = True, na_position: str = "last"):
+    def sort_values(
+        self,
+        *,
+        inplace: bool = False,
+        ascending: bool = True,
+        na_position: str = "last",
+    ) -> Index:
         if na_position not in ["first", "last"]:
             raise ValueError("Param na_position must be one of 'first' or 'last'")
         na_last = na_position == "last"

@@ -2234,10 +2234,34 @@ def set_index(
         col_ids_strs: List[str] = [col_id for col_id in col_ids if col_id is not None]
         return DataFrame(self._block.set_index(col_ids_strs, append=append, drop=drop))
 
-    @validations.requires_index
+    @overload  # type: ignore[override]
     def sort_index(
-        self, ascending: bool = True, na_position: Literal["first", "last"] = "last"
+        self,
+        *,
+        ascending: bool = ...,
+        inplace: Literal[False] = ...,
+        na_position: Literal["first", "last"] = ...,
     ) -> DataFrame:
+        ...
+
+    @overload
+    def sort_index(
+        self,
+        *,
+        ascending: bool = ...,
+        inplace: Literal[True] = ...,
+        na_position: Literal["first", "last"] = ...,
+    ) -> None:
+        ...
+
+    @validations.requires_index
+    def sort_index(
+        self,
+        *,
+        ascending: bool = True,
+        inplace: bool = False,
+        na_position: Literal["first", "last"] = "last",
+    ) -> Optional[DataFrame]:
         if na_position not in ["first", "last"]:
             raise ValueError("Param na_position must be one of 'first' or 'last'")
         na_last = na_position == "last"
@@ -2248,16 +2272,46 @@ def sort_index(
             else order.descending_over(column, na_last)
             for column in index_columns
         ]
-        return DataFrame(self._block.order_by(ordering))
+        block = self._block.order_by(ordering)
+        if inplace:
+            self._set_block(block)
+            return None
+        else:
+            return DataFrame(block)
 
+    @overload  # type: ignore[override]
     def sort_values(
         self,
         by: str | typing.Sequence[str],
         *,
+        inplace: Literal[False] = ...,
+        ascending: bool | typing.Sequence[bool] = ...,
+        kind: str = ...,
+        na_position: typing.Literal["first", "last"] = ...,
+    ) -> DataFrame:
+        ...
+
+    @overload
+    def sort_values(
+        self,
+        by: str | typing.Sequence[str],
+        *,
+        inplace: Literal[True] = ...,
+        ascending: bool | typing.Sequence[bool] = ...,
+        kind: str = ...,
+        na_position: typing.Literal["first", "last"] = ...,
+    ) -> None:
+        ...
+
+    def sort_values(
+        self,
+        by: str | typing.Sequence[str],
+        *,
+        inplace: bool = False,
         ascending: bool | typing.Sequence[bool] = True,
         kind: str = "quicksort",
         na_position: typing.Literal["first", "last"] = "last",
-    ) -> DataFrame:
+    ) -> Optional[DataFrame]:
         if isinstance(by, (bigframes.series.Series, indexes.Index, DataFrame)):
             raise KeyError(
                 f"Invalid key type: {type(by).__name__}. Please provide valid column name(s)."
@@ -2287,7 +2341,12 @@ def sort_values(
                 if is_ascending
                 else order.descending_over(column_id, na_last)
             )
-        return DataFrame(self._block.order_by(ordering))
+        block = self._block.order_by(ordering)
+        if inplace:
+            self._set_block(block)
+            return None
+        else:
+            return DataFrame(block)
 
     def eval(self, expr: str) -> DataFrame:
         import bigframes.core.eval as bf_eval

@@ -240,7 +240,7 @@ def recall_score(
     unique_labels = (
         bpd.concat([y_true_series, y_pred_series], join="outer")
         .drop_duplicates()
-        .sort_values()
+        .sort_values(inplace=False)
     )
     index = unique_labels.to_list()
 
@@ -277,7 +277,7 @@ def precision_score(
     unique_labels = (
         bpd.concat([y_true_series, y_pred_series], join="outer")
         .drop_duplicates()
-        .sort_values()
+        .sort_values(inplace=False)
     )
     index = unique_labels.to_list()
 

@@ -1538,9 +1538,39 @@ def value_counts(
         )
         return Series(block)
 
+    @typing.overload  # type: ignore[override]
     def sort_values(
-        self, *, axis=0, ascending=True, kind: str = "quicksort", na_position="last"
+        self,
+        *,
+        axis=...,
+        inplace: Literal[True] = ...,
+        ascending: bool | typing.Sequence[bool] = ...,
+        kind: str = ...,
+        na_position: typing.Literal["first", "last"] = ...,
+    ) -> None:
+        ...
+
+    @typing.overload
+    def sort_values(
+        self,
+        *,
+        axis=...,
+        inplace: Literal[False] = ...,
+        ascending: bool | typing.Sequence[bool] = ...,
+        kind: str = ...,
+        na_position: typing.Literal["first", "last"] = ...,
     ) -> Series:
+        ...
+
+    def sort_values(
+        self,
+        *,
+        axis=0,
+        inplace: bool = False,
+        ascending=True,
+        kind: str = "quicksort",
+        na_position: typing.Literal["first", "last"] = "last",
+    ) -> Optional[Series]:
         if axis != 0 and axis != "index":
             raise ValueError(f"No axis named {axis} for object type Series")
         if na_position not in ["first", "last"]:
@@ -1552,10 +1582,28 @@ def sort_values(
                 else order.descending_over(self._value_column, (na_position == "last"))
             ],
         )
-        return Series(block)
+        if inplace:
+            self._set_block(block)
+            return None
+        else:
+            return Series(block)
+
+    @typing.overload  # type: ignore[override]
+    def sort_index(
+        self, *, axis=..., inplace: Literal[False] = ..., ascending=..., na_position=...
+    ) -> Series:
+        ...
+
+    @typing.overload
+    def sort_index(
+        self, *, axis=0, inplace: Literal[True] = ..., ascending=..., na_position=...
+    ) -> None:
+        ...
 
     @validations.requires_index
-    def sort_index(self, *, axis=0, ascending=True, na_position="last") -> Series:
+    def sort_index(
+        self, *, axis=0, inplace: bool = False, ascending=True, na_position="last"
+    ) -> Optional[Series]:
         # TODO(tbergeron): Support level parameter once multi-index introduced.
         if axis != 0 and axis != "index":
             raise ValueError(f"No axis named {axis} for object type Series")
@@ -1570,7 +1618,11 @@ def sort_index(self, *, axis=0, ascending=True, na_position="last") -> Series:
             for column in block.index_columns
         ]
         block = block.order_by(ordering)
-        return Series(block)
+        if inplace:
+            self._set_block(block)
+            return None
+        else:
+            return Series(block)
 
     @validations.requires_ordering()
     def rolling(

@@ -2035,6 +2035,17 @@ def test_sort_index(scalars_dfs, ascending, na_position):
     pandas.testing.assert_frame_equal(bf_result, pd_result)
 
 
+def test_dataframe_sort_index_inplace(scalars_dfs):
+    index_column = "int64_col"
+    scalars_df, scalars_pandas_df = scalars_dfs
+    df = scalars_df.copy().set_index(index_column)
+    df.sort_index(ascending=False, inplace=True)
+    bf_result = df.to_pandas()
+
+    pd_result = scalars_pandas_df.set_index(index_column).sort_index(ascending=False)
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
 def test_df_abs(scalars_dfs_maybe_ordered):
     scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
     columns = ["int64_col", "int64_too", "float64_col"]
@@ -2817,6 +2828,32 @@ def test_dataframe_sort_values(
     )
 
 
+@pytest.mark.parametrize(
+    ("by", "ascending", "na_position"),
+    [
+        ("int64_col", True, "first"),
+        (["bool_col", "int64_col"], True, "last"),
+    ],
+)
+def test_dataframe_sort_values_inplace(
+    scalars_df_index, scalars_pandas_df_index, by, ascending, na_position
+):
+    # Test needs values to be unique
+    bf_sorted = scalars_df_index.copy()
+    bf_sorted.sort_values(
+        by, ascending=ascending, na_position=na_position, inplace=True
+    )
+    bf_result = bf_sorted.to_pandas()
+    pd_result = scalars_pandas_df_index.sort_values(
+        by, ascending=ascending, na_position=na_position
+    )
+
+    pandas.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
 def test_dataframe_sort_values_invalid_input(scalars_df_index):
     with pytest.raises(KeyError):
         scalars_df_index.sort_values(by=scalars_df_index["int64_col"])

@@ -3339,6 +3339,19 @@ def test_sort_values(scalars_df_index, scalars_pandas_df_index, ascending, na_po
     )
 
 
+def test_series_sort_values_inplace(scalars_df_index, scalars_pandas_df_index):
+    # Test needs values to be unique
+    bf_series = scalars_df_index["int64_col"].copy()
+    bf_series.sort_values(ascending=False, inplace=True)
+    bf_result = bf_series.to_pandas()
+    pd_result = scalars_pandas_df_index["int64_col"].sort_values(ascending=False)
+
+    pd.testing.assert_series_equal(
+        bf_result,
+        pd_result,
+    )
+
+
 @pytest.mark.parametrize(
     ("ascending"),
     [
@@ -3358,6 +3371,18 @@ def test_sort_index(scalars_df_index, scalars_pandas_df_index, ascending):
     )
 
 
+def test_series_sort_index_inplace(scalars_df_index, scalars_pandas_df_index):
+    bf_series = scalars_df_index["int64_too"].copy()
+    bf_series.sort_index(ascending=False, inplace=True)
+    bf_result = bf_series.to_pandas()
+    pd_result = scalars_pandas_df_index["int64_too"].sort_index(ascending=False)
+
+    pd.testing.assert_series_equal(
+        bf_result,
+        pd_result,
+    )
+
+
 def test_mask_default_value(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
 

@@ -96,16 +96,17 @@ def test_read_gbq_tokyo(
     tokyo_location: str,
 ):
     df = session_tokyo.read_gbq(scalars_table_tokyo, index_col=["rowindex"])
-    result = df.sort_index().to_pandas()
+    df.sort_index(inplace=True)
     expected = scalars_pandas_df_index
 
     # use_explicit_destination=True, otherwise might use path with no query_job
-    result = session_tokyo._executor.execute(
+    exec_result = session_tokyo._executor.execute(
         df._block.expr, use_explicit_destination=True
     )
-    assert result.query_job.location == tokyo_location
+    assert exec_result.query_job is not None
+    assert exec_result.query_job.location == tokyo_location
 
-    assert len(expected) == result.total_rows
+    assert len(expected) == exec_result.total_rows
 
 
 @pytest.mark.parametrize(

@@ -2213,10 +2213,11 @@ def sort_values(
         self,
         by: str | Sequence[str],
         *,
+        inplace: bool = False,
         ascending: bool | Sequence[bool] = True,
         kind: str = "quicksort",
-        na_position="last",
-    ) -> DataFrame:
+        na_position: Literal["first", "last"] = "last",
+    ):
         """Sort by the values along row axis.
 
         **Examples:**
@@ -2300,6 +2301,8 @@ def sort_values(
                 Sort ascending vs. descending. Specify list for multiple sort
                 orders.  If this is a list of bools, must match the length of
                 the by.
+            inplace (bool, default False):
+                If True, perform operation in-place.
             kind (str, default 'quicksort'):
                 Choice of sorting algorithm. Accepts 'quicksort', 'mergesort',
                 'heapsort', 'stable'. Ignored except when determining whether to
@@ -2309,8 +2312,8 @@ def sort_values(
              if `first`; `last` puts NaNs at the end.
 
         Returns:
-            bigframes.pandas.DataFrame:
-                DataFrame with sorted values.
+            bigframes.pandas.DataFram or None:
+                DataFrame with sorted values or None if inplace=True.
 
         Raises:
             ValueError:
@@ -2320,12 +2323,25 @@ def sort_values(
 
     def sort_index(
         self,
-    ) -> DataFrame:
+        *,
+        ascending: bool = True,
+        inplace: bool = False,
+        na_position: Literal["first", "last"] = "last",
+    ):
         """Sort object by labels (along an axis).
 
+        Args:
+            ascending (bool, default True)
+                Sort ascending vs. descending.
+            inplace (bool, default False):
+                Whether to modify the DataFrame rather than creating a new one.
+            na_position ({'first', 'last'}, default 'last'):
+                Puts NaNs at the beginning if `first`; `last` puts NaNs at the end.
+                Not implemented for MultiIndex.
+
         Returns:
             bigframes.pandas.DataFrame:
-                The original DataFrame sorted by the labels.
+                DataFrame with sorted values or None if inplace=True.
 
         Raises:
             ValueError: