feat: Add barh, pie plot types (#2146)

TrevorBergeron · web-flow · commit 5cc3c5b1391a · 2025-10-09T13:33:19.000-07:00
diff --git a/bigframes/operations/_matplotlib/__init__.py b/bigframes/operations/_matplotlib/__init__.py
@@ -22,6 +22,8 @@
 PLOT_CLASSES: dict[str, PLOT_TYPES] = {
     "area": core.AreaPlot,
     "bar": core.BarPlot,
+    "barh": core.BarhPlot,
+    "pie": core.PiePlot,
     "line": core.LinePlot,
     "scatter": core.ScatterPlot,
     "hist": hist.HistPlot,
diff --git a/bigframes/operations/_matplotlib/core.py b/bigframes/operations/_matplotlib/core.py
@@ -55,7 +55,12 @@ def _kind(self):
 
     @property
     def _sampling_warning_msg(self) -> typing.Optional[str]:
-        return None
+        return (
+            "To optimize plotting performance, your data has been downsampled to {sampling_n} "
+            "rows from the original {total_n} rows. This may result in some data points "
+            "not being displayed. For a more comprehensive view, consider pre-processing "
+            "your data by aggregating it or selecting the top categories."
+        )
 
     def __init__(self, data, **kwargs) -> None:
         self.kwargs = kwargs
@@ -92,6 +97,10 @@ def _compute_plot_data(self):
 
 
 class AreaPlot(SamplingPlot):
+    @property
+    def _sampling_warning_msg(self) -> typing.Optional[str]:
+        return None
+
     @property
     def _kind(self) -> typing.Literal["area"]:
         return "area"
@@ -102,14 +111,17 @@ class BarPlot(SamplingPlot):
     def _kind(self) -> typing.Literal["bar"]:
         return "bar"
 
+
+class BarhPlot(SamplingPlot):
     @property
-    def _sampling_warning_msg(self) -> typing.Optional[str]:
-        return (
-            "To optimize plotting performance, your data has been downsampled to {sampling_n} "
-            "rows from the original {total_n} rows. This may result in some data points "
-            "not being displayed. For a more comprehensive view, consider pre-processing "
-            "your data by aggregating it or selecting the top categories."
-        )
+    def _kind(self) -> typing.Literal["barh"]:
+        return "barh"
+
+
+class PiePlot(SamplingPlot):
+    @property
+    def _kind(self) -> typing.Literal["pie"]:
+        return "pie"
 
 
 class LinePlot(SamplingPlot):
@@ -123,6 +135,10 @@ class ScatterPlot(SamplingPlot):
     def _kind(self) -> typing.Literal["scatter"]:
         return "scatter"
 
+    @property
+    def _sampling_warning_msg(self) -> typing.Optional[str]:
+        return None
+
     def __init__(self, data, **kwargs) -> None:
         super().__init__(data, **kwargs)
 
diff --git a/bigframes/operations/plotting.py b/bigframes/operations/plotting.py
@@ -25,8 +25,8 @@
 class PlotAccessor(vendordt.PlotAccessor):
     __doc__ = vendordt.PlotAccessor.__doc__
 
-    _common_kinds = ("line", "area", "hist", "bar")
-    _dataframe_kinds = ("scatter",)
+    _common_kinds = ("line", "area", "hist", "bar", "barh", "pie")
+    _dataframe_kinds = ("scatter", "hexbin,")
     _all_kinds = _common_kinds + _dataframe_kinds
 
     def __call__(self, **kwargs):
@@ -82,6 +82,21 @@ def bar(
     ):
         return self(kind="bar", x=x, y=y, **kwargs)
 
+    def barh(
+        self,
+        x: typing.Optional[typing.Hashable] = None,
+        y: typing.Optional[typing.Hashable] = None,
+        **kwargs,
+    ):
+        return self(kind="barh", x=x, y=y, **kwargs)
+
+    def pie(
+        self,
+        y: typing.Optional[typing.Hashable] = None,
+        **kwargs,
+    ):
+        return self(kind="pie", y=y, **kwargs)
+
     def scatter(
         self,
         x: typing.Optional[typing.Hashable] = None,
diff --git a/tests/system/small/operations/test_plotting.py b/tests/system/small/operations/test_plotting.py
@@ -264,6 +264,42 @@ def test_bar(scalars_dfs, col_names, alias):
         tm.assert_almost_equal(line.get_data()[1], pd_line.get_data()[1])
 
 
+@pytest.mark.parametrize(
+    ("col_names",),
+    [
+        pytest.param(["int64_col", "float64_col", "int64_too"], id="df"),
+        pytest.param(["int64_col"], id="series"),
+    ],
+)
+def test_barh(scalars_dfs, col_names):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    ax = scalars_df[col_names].plot.barh()
+    pd_ax = scalars_pandas_df[col_names].plot.barh()
+    tm.assert_almost_equal(ax.get_xticks(), pd_ax.get_xticks())
+    tm.assert_almost_equal(ax.get_yticks(), pd_ax.get_yticks())
+    for line, pd_line in zip(ax.lines, pd_ax.lines):
+        # Compare y coordinates between the lines
+        tm.assert_almost_equal(line.get_data()[1], pd_line.get_data()[1])
+
+
+@pytest.mark.parametrize(
+    ("col_names",),
+    [
+        pytest.param(["int64_col", "float64_col", "int64_too"], id="df"),
+        pytest.param(["int64_col"], id="series"),
+    ],
+)
+def test_pie(scalars_dfs, col_names):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    ax = scalars_df[col_names].abs().plot.pie(y="int64_col")
+    pd_ax = scalars_pandas_df[col_names].abs().plot.pie(y="int64_col")
+    tm.assert_almost_equal(ax.get_xticks(), pd_ax.get_xticks())
+    tm.assert_almost_equal(ax.get_yticks(), pd_ax.get_yticks())
+    for line, pd_line in zip(ax.lines, pd_ax.lines):
+        # Compare y coordinates between the lines
+        tm.assert_almost_equal(line.get_data()[1], pd_line.get_data()[1])
+
+
 @pytest.mark.parametrize(
     ("col_names", "alias"),
     [
diff --git a/third_party/bigframes_vendored/pandas/plotting/_core.py b/third_party/bigframes_vendored/pandas/plotting/_core.py
@@ -275,6 +275,109 @@ def bar(
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def barh(
+        self,
+        x: typing.Optional[typing.Hashable] = None,
+        y: typing.Optional[typing.Hashable] = None,
+        **kwargs,
+    ):
+        """
+        Draw a horizontal bar plot.
+
+        This function calls `pandas.plot` to generate a plot with a random sample
+        of items. For consistent results, the random sampling is reproducible.
+        Use the `sampling_random_state` parameter to modify the sampling seed.
+
+        **Examples:**
+
+        Basic plot.
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+            >>> df = bpd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]})
+            >>> ax = df.plot.barh(x='lab', y='val', rot=0)
+
+        Plot a whole dataframe to a barh plot. Each column is assigned a distinct color,
+        and each row is nested in a group along the horizontal axis.
+
+            >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
+            >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
+            >>> index = ['snail', 'pig', 'elephant',
+            ...          'rabbit', 'giraffe', 'coyote', 'horse']
+            >>> df = bpd.DataFrame({'speed': speed, 'lifespan': lifespan}, index=index)
+            >>> ax = df.plot.barh(rot=0)
+
+        Plot stacked barh charts for the DataFrame.
+
+            >>> ax = df.plot.barh(stacked=True)
+
+        If you don’t like the default colours, you can specify how you’d like each column
+        to be colored.
+
+            >>> axes = df.plot.barh(
+            ...     rot=0, subplots=True, color={"speed": "red", "lifespan": "green"}
+            ... )
+
+        Args:
+            x (label or position, optional):
+                Allows plotting of one column versus another. If not specified, the index
+                of the DataFrame is used.
+            y (label or position, optional):
+                Allows plotting of one column versus another. If not specified, all numerical
+                columns are used.
+            **kwargs:
+                Additional keyword arguments are documented in
+                :meth:`DataFrame.plot`.
+
+        Returns:
+            matplotlib.axes.Axes or numpy.ndarray:
+                Area plot, or array of area plots if subplots is True.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def pie(
+        self,
+        y: typing.Optional[typing.Hashable] = None,
+        **kwargs,
+    ):
+        """
+        Generate a pie plot.
+
+        A pie plot is a proportional representation of the numerical data in a
+        column. This function wraps :meth:`matplotlib.pyplot.pie` for the
+        specified column. If no column reference is passed and
+        ``subplots=True`` a pie plot is drawn for each numerical column
+        independently.
+
+        **Examples:**
+
+        In the example below we have a DataFrame with the information about
+        planet's mass and radius. We pass the 'mass' column to the
+        pie function to get a pie plot.
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({'mass': [0.330, 4.87 , 5.97],
+            ...                    'radius': [2439.7, 6051.8, 6378.1]},
+            ...                   index=['Mercury', 'Venus', 'Earth'])
+            >>> plot = df.plot.pie(y='mass', figsize=(5, 5))
+
+            >>> plot = df.plot.pie(subplots=True, figsize=(11, 6))
+
+        Args:
+            y (int or label, optional):
+                Label or position of the column to plot.
+                If not provided, ``subplots=True`` argument must be passed.
+            **kwargs:
+                Keyword arguments to pass on to :meth:`DataFrame.plot`.
+
+        Returns:
+            matplotlib.axes.Axes or np.ndarray:
+                A NumPy array is returned when `subplots` is True.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def scatter(
         self,
         x: typing.Optional[typing.Hashable] = None,