Skip to content

Commit 5cc3c5b

Browse files
feat: Add barh, pie plot types (#2146)
1 parent 5e1e809 commit 5cc3c5b

File tree

5 files changed

+182
-10
lines changed

5 files changed

+182
-10
lines changed

bigframes/operations/_matplotlib/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
PLOT_CLASSES: dict[str, PLOT_TYPES] = {
2323
"area": core.AreaPlot,
2424
"bar": core.BarPlot,
25+
"barh": core.BarhPlot,
26+
"pie": core.PiePlot,
2527
"line": core.LinePlot,
2628
"scatter": core.ScatterPlot,
2729
"hist": hist.HistPlot,

bigframes/operations/_matplotlib/core.py

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,12 @@ def _kind(self):
5555

5656
@property
5757
def _sampling_warning_msg(self) -> typing.Optional[str]:
58-
return None
58+
return (
59+
"To optimize plotting performance, your data has been downsampled to {sampling_n} "
60+
"rows from the original {total_n} rows. This may result in some data points "
61+
"not being displayed. For a more comprehensive view, consider pre-processing "
62+
"your data by aggregating it or selecting the top categories."
63+
)
5964

6065
def __init__(self, data, **kwargs) -> None:
6166
self.kwargs = kwargs
@@ -92,6 +97,10 @@ def _compute_plot_data(self):
9297

9398

9499
class AreaPlot(SamplingPlot):
100+
@property
101+
def _sampling_warning_msg(self) -> typing.Optional[str]:
102+
return None
103+
95104
@property
96105
def _kind(self) -> typing.Literal["area"]:
97106
return "area"
@@ -102,14 +111,17 @@ class BarPlot(SamplingPlot):
102111
def _kind(self) -> typing.Literal["bar"]:
103112
return "bar"
104113

114+
115+
class BarhPlot(SamplingPlot):
105116
@property
106-
def _sampling_warning_msg(self) -> typing.Optional[str]:
107-
return (
108-
"To optimize plotting performance, your data has been downsampled to {sampling_n} "
109-
"rows from the original {total_n} rows. This may result in some data points "
110-
"not being displayed. For a more comprehensive view, consider pre-processing "
111-
"your data by aggregating it or selecting the top categories."
112-
)
117+
def _kind(self) -> typing.Literal["barh"]:
118+
return "barh"
119+
120+
121+
class PiePlot(SamplingPlot):
122+
@property
123+
def _kind(self) -> typing.Literal["pie"]:
124+
return "pie"
113125

114126

115127
class LinePlot(SamplingPlot):
@@ -123,6 +135,10 @@ class ScatterPlot(SamplingPlot):
123135
def _kind(self) -> typing.Literal["scatter"]:
124136
return "scatter"
125137

138+
@property
139+
def _sampling_warning_msg(self) -> typing.Optional[str]:
140+
return None
141+
126142
def __init__(self, data, **kwargs) -> None:
127143
super().__init__(data, **kwargs)
128144

bigframes/operations/plotting.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@
2525
class PlotAccessor(vendordt.PlotAccessor):
2626
__doc__ = vendordt.PlotAccessor.__doc__
2727

28-
_common_kinds = ("line", "area", "hist", "bar")
29-
_dataframe_kinds = ("scatter",)
28+
_common_kinds = ("line", "area", "hist", "bar", "barh", "pie")
29+
_dataframe_kinds = ("scatter", "hexbin,")
3030
_all_kinds = _common_kinds + _dataframe_kinds
3131

3232
def __call__(self, **kwargs):
@@ -82,6 +82,21 @@ def bar(
8282
):
8383
return self(kind="bar", x=x, y=y, **kwargs)
8484

85+
def barh(
86+
self,
87+
x: typing.Optional[typing.Hashable] = None,
88+
y: typing.Optional[typing.Hashable] = None,
89+
**kwargs,
90+
):
91+
return self(kind="barh", x=x, y=y, **kwargs)
92+
93+
def pie(
94+
self,
95+
y: typing.Optional[typing.Hashable] = None,
96+
**kwargs,
97+
):
98+
return self(kind="pie", y=y, **kwargs)
99+
85100
def scatter(
86101
self,
87102
x: typing.Optional[typing.Hashable] = None,

tests/system/small/operations/test_plotting.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,42 @@ def test_bar(scalars_dfs, col_names, alias):
264264
tm.assert_almost_equal(line.get_data()[1], pd_line.get_data()[1])
265265

266266

267+
@pytest.mark.parametrize(
268+
("col_names",),
269+
[
270+
pytest.param(["int64_col", "float64_col", "int64_too"], id="df"),
271+
pytest.param(["int64_col"], id="series"),
272+
],
273+
)
274+
def test_barh(scalars_dfs, col_names):
275+
scalars_df, scalars_pandas_df = scalars_dfs
276+
ax = scalars_df[col_names].plot.barh()
277+
pd_ax = scalars_pandas_df[col_names].plot.barh()
278+
tm.assert_almost_equal(ax.get_xticks(), pd_ax.get_xticks())
279+
tm.assert_almost_equal(ax.get_yticks(), pd_ax.get_yticks())
280+
for line, pd_line in zip(ax.lines, pd_ax.lines):
281+
# Compare y coordinates between the lines
282+
tm.assert_almost_equal(line.get_data()[1], pd_line.get_data()[1])
283+
284+
285+
@pytest.mark.parametrize(
286+
("col_names",),
287+
[
288+
pytest.param(["int64_col", "float64_col", "int64_too"], id="df"),
289+
pytest.param(["int64_col"], id="series"),
290+
],
291+
)
292+
def test_pie(scalars_dfs, col_names):
293+
scalars_df, scalars_pandas_df = scalars_dfs
294+
ax = scalars_df[col_names].abs().plot.pie(y="int64_col")
295+
pd_ax = scalars_pandas_df[col_names].abs().plot.pie(y="int64_col")
296+
tm.assert_almost_equal(ax.get_xticks(), pd_ax.get_xticks())
297+
tm.assert_almost_equal(ax.get_yticks(), pd_ax.get_yticks())
298+
for line, pd_line in zip(ax.lines, pd_ax.lines):
299+
# Compare y coordinates between the lines
300+
tm.assert_almost_equal(line.get_data()[1], pd_line.get_data()[1])
301+
302+
267303
@pytest.mark.parametrize(
268304
("col_names", "alias"),
269305
[

third_party/bigframes_vendored/pandas/plotting/_core.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,109 @@ def bar(
275275
"""
276276
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
277277

278+
def barh(
279+
self,
280+
x: typing.Optional[typing.Hashable] = None,
281+
y: typing.Optional[typing.Hashable] = None,
282+
**kwargs,
283+
):
284+
"""
285+
Draw a horizontal bar plot.
286+
287+
This function calls `pandas.plot` to generate a plot with a random sample
288+
of items. For consistent results, the random sampling is reproducible.
289+
Use the `sampling_random_state` parameter to modify the sampling seed.
290+
291+
**Examples:**
292+
293+
Basic plot.
294+
295+
>>> import bigframes.pandas as bpd
296+
>>> bpd.options.display.progress_bar = None
297+
>>> df = bpd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]})
298+
>>> ax = df.plot.barh(x='lab', y='val', rot=0)
299+
300+
Plot a whole dataframe to a barh plot. Each column is assigned a distinct color,
301+
and each row is nested in a group along the horizontal axis.
302+
303+
>>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
304+
>>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
305+
>>> index = ['snail', 'pig', 'elephant',
306+
... 'rabbit', 'giraffe', 'coyote', 'horse']
307+
>>> df = bpd.DataFrame({'speed': speed, 'lifespan': lifespan}, index=index)
308+
>>> ax = df.plot.barh(rot=0)
309+
310+
Plot stacked barh charts for the DataFrame.
311+
312+
>>> ax = df.plot.barh(stacked=True)
313+
314+
If you don’t like the default colours, you can specify how you’d like each column
315+
to be colored.
316+
317+
>>> axes = df.plot.barh(
318+
... rot=0, subplots=True, color={"speed": "red", "lifespan": "green"}
319+
... )
320+
321+
Args:
322+
x (label or position, optional):
323+
Allows plotting of one column versus another. If not specified, the index
324+
of the DataFrame is used.
325+
y (label or position, optional):
326+
Allows plotting of one column versus another. If not specified, all numerical
327+
columns are used.
328+
**kwargs:
329+
Additional keyword arguments are documented in
330+
:meth:`DataFrame.plot`.
331+
332+
Returns:
333+
matplotlib.axes.Axes or numpy.ndarray:
334+
Area plot, or array of area plots if subplots is True.
335+
"""
336+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
337+
338+
def pie(
339+
self,
340+
y: typing.Optional[typing.Hashable] = None,
341+
**kwargs,
342+
):
343+
"""
344+
Generate a pie plot.
345+
346+
A pie plot is a proportional representation of the numerical data in a
347+
column. This function wraps :meth:`matplotlib.pyplot.pie` for the
348+
specified column. If no column reference is passed and
349+
``subplots=True`` a pie plot is drawn for each numerical column
350+
independently.
351+
352+
**Examples:**
353+
354+
In the example below we have a DataFrame with the information about
355+
planet's mass and radius. We pass the 'mass' column to the
356+
pie function to get a pie plot.
357+
358+
>>> import bigframes.pandas as bpd
359+
>>> bpd.options.display.progress_bar = None
360+
361+
>>> df = bpd.DataFrame({'mass': [0.330, 4.87 , 5.97],
362+
... 'radius': [2439.7, 6051.8, 6378.1]},
363+
... index=['Mercury', 'Venus', 'Earth'])
364+
>>> plot = df.plot.pie(y='mass', figsize=(5, 5))
365+
366+
>>> plot = df.plot.pie(subplots=True, figsize=(11, 6))
367+
368+
Args:
369+
y (int or label, optional):
370+
Label or position of the column to plot.
371+
If not provided, ``subplots=True`` argument must be passed.
372+
**kwargs:
373+
Keyword arguments to pass on to :meth:`DataFrame.plot`.
374+
375+
Returns:
376+
matplotlib.axes.Axes or np.ndarray:
377+
A NumPy array is returned when `subplots` is True.
378+
"""
379+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
380+
278381
def scatter(
279382
self,
280383
x: typing.Optional[typing.Hashable] = None,

0 commit comments

Comments
 (0)