From 414e2b4156b962b4b926d7f87137621a22b3bf64 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Tue, 9 Sep 2025 22:38:39 +0000 Subject: [PATCH] feat: support pd.cut() for array-like type --- bigframes/core/reshape/tile.py | 8 ++++++-- tests/system/small/test_pandas.py | 12 ++++++++++++ tests/unit/test_pandas.py | 3 +++ .../bigframes_vendored/pandas/core/reshape/tile.py | 6 +++--- 4 files changed, 24 insertions(+), 5 deletions(-) diff --git a/bigframes/core/reshape/tile.py b/bigframes/core/reshape/tile.py index 86ccf52408..74a941be54 100644 --- a/bigframes/core/reshape/tile.py +++ b/bigframes/core/reshape/tile.py @@ -20,6 +20,7 @@ import bigframes_vendored.pandas.core.reshape.tile as vendored_pandas_tile import pandas as pd +import bigframes import bigframes.constants import bigframes.core.expression as ex import bigframes.core.ordering as order @@ -32,7 +33,7 @@ def cut( - x: bigframes.series.Series, + x, bins: typing.Union[ int, pd.IntervalIndex, @@ -60,9 +61,12 @@ def cut( f"but found {type(list(labels)[0])}. {constants.FEEDBACK_LINK}" ) - if x.size == 0: + if len(x) == 0: raise ValueError("Cannot cut empty array.") + if not isinstance(x, bigframes.series.Series): + x = bigframes.series.Series(x) + if isinstance(bins, int): if bins <= 0: raise ValueError("`bins` should be a positive integer.") diff --git a/tests/system/small/test_pandas.py b/tests/system/small/test_pandas.py index 550a75e1bb..d2cde59729 100644 --- a/tests/system/small/test_pandas.py +++ b/tests/system/small/test_pandas.py @@ -520,6 +520,18 @@ def _convert_pandas_category(pd_s: pd.Series): ) +def test_cut_for_array(): + """Avoid regressions for internal issue 329866195""" + sc = [30, 80, 40, 90, 60, 45, 95, 75, 55, 100, 65, 85] + x = [20, 40, 60, 80, 100] + + pd_result: pd.Series = pd.Series(pd.cut(sc, x)) + bf_result = bpd.cut(sc, x) + + pd_result = _convert_pandas_category(pd_result) + pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + + @pytest.mark.parametrize( ("right", "labels"), [ diff --git a/tests/unit/test_pandas.py b/tests/unit/test_pandas.py index e8383512a6..73e0b7f2d6 100644 --- a/tests/unit/test_pandas.py +++ b/tests/unit/test_pandas.py @@ -122,6 +122,7 @@ def test_method_matches_session(method_name: str): ) def test_cut_raises_with_invalid_labels(bins: int, labels, error_message: str): mock_series = mock.create_autospec(bigframes.pandas.Series, instance=True) + mock_series.__len__.return_value = 5 with pytest.raises(ValueError, match=error_message): bigframes.pandas.cut(mock_series, bins, labels=labels) @@ -160,6 +161,8 @@ def test_cut_raises_with_unsupported_labels(): ) def test_cut_raises_with_invalid_bins(bins: int, error_message: str): mock_series = mock.create_autospec(bigframes.pandas.Series, instance=True) + mock_series.__len__.return_value = 5 + with pytest.raises(ValueError, match=error_message): bigframes.pandas.cut(mock_series, bins, labels=False) diff --git a/third_party/bigframes_vendored/pandas/core/reshape/tile.py b/third_party/bigframes_vendored/pandas/core/reshape/tile.py index fccaffdadf..697c17f23c 100644 --- a/third_party/bigframes_vendored/pandas/core/reshape/tile.py +++ b/third_party/bigframes_vendored/pandas/core/reshape/tile.py @@ -8,11 +8,11 @@ import pandas as pd -from bigframes import constants, series +from bigframes import constants def cut( - x: series.Series, + x, bins: typing.Union[ int, pd.IntervalIndex, @@ -113,7 +113,7 @@ def cut( dtype: struct[pyarrow] Args: - x (bigframes.pandas.Series): + x (array-like): The input Series to be binned. Must be 1-dimensional. bins (int, pd.IntervalIndex, Iterable): The criteria to bin by.