Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5279,6 +5279,25 @@ def bar(

bar.__doc__ = inspect.getdoc(plotting.PlotAccessor.bar)

def barh(
self,
x: typing.Optional[typing.Hashable] = None,
y: typing.Optional[typing.Hashable] = None,
**kwargs,
):
return self.plot.barh(x=x, y=y, **kwargs)

barh.__doc__ = inspect.getdoc(plotting.PlotAccessor.barh)

def pie(
self,
y: typing.Optional[typing.Hashable] = None,
**kwargs,
):
return self.plot.pie(y=y, **kwargs)

pie.__doc__ = inspect.getdoc(plotting.PlotAccessor.pie)

def scatter(
self,
x: typing.Optional[typing.Hashable] = None,
Expand Down
2 changes: 2 additions & 0 deletions bigframes/operations/_matplotlib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
PLOT_CLASSES: dict[str, PLOT_TYPES] = {
"area": core.AreaPlot,
"bar": core.BarPlot,
"barh": core.BarhPlot,
"pie": core.PiePlot,
"line": core.LinePlot,
"scatter": core.ScatterPlot,
"hist": hist.HistPlot,
Expand Down
30 changes: 30 additions & 0 deletions bigframes/operations/_matplotlib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,36 @@ def _sampling_warning_msg(self) -> typing.Optional[str]:
)


class BarhPlot(SamplingPlot):
@property
def _kind(self) -> typing.Literal["barh"]:
return "barh"

@property
def _sampling_warning_msg(self) -> typing.Optional[str]:
return (
"To optimize plotting performance, your data has been downsampled to {sampling_n} "
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can yo please move this warning message to the SamplingPlot base object. Then, BarhPlot/PiePlot/BarPlot can only set "True" to decide output this warning message?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

"rows from the original {total_n} rows. This may result in some data points "
"not being displayed. For a more comprehensive view, consider pre-processing "
"your data by aggregating it or selecting the top categories."
)


class PiePlot(SamplingPlot):
@property
def _kind(self) -> typing.Literal["pie"]:
return "pie"

@property
def _sampling_warning_msg(self) -> typing.Optional[str]:
return (
"To optimize plotting performance, your data has been downsampled to {sampling_n} "
"rows from the original {total_n} rows. This may result in some data points "
"not being displayed. For a more comprehensive view, consider pre-processing "
"your data by aggregating it or selecting the top categories."
)


class LinePlot(SamplingPlot):
@property
def _kind(self) -> typing.Literal["line"]:
Expand Down
19 changes: 17 additions & 2 deletions bigframes/operations/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
class PlotAccessor(vendordt.PlotAccessor):
__doc__ = vendordt.PlotAccessor.__doc__

_common_kinds = ("line", "area", "hist", "bar")
_dataframe_kinds = ("scatter",)
_common_kinds = ("line", "area", "hist", "bar", "barh", "pie")
_dataframe_kinds = ("scatter", "hexbin,")
_all_kinds = _common_kinds + _dataframe_kinds

def __call__(self, **kwargs):
Expand Down Expand Up @@ -82,6 +82,21 @@ def bar(
):
return self(kind="bar", x=x, y=y, **kwargs)

def barh(
self,
x: typing.Optional[typing.Hashable] = None,
y: typing.Optional[typing.Hashable] = None,
**kwargs,
):
return self(kind="barh", x=x, y=y, **kwargs)

def pie(
self,
y: typing.Optional[typing.Hashable] = None,
**kwargs,
):
return self(kind="pie", y=y, **kwargs)

def scatter(
self,
x: typing.Optional[typing.Hashable] = None,
Expand Down
19 changes: 19 additions & 0 deletions bigframes/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2530,6 +2530,25 @@ def bar(

bar.__doc__ = inspect.getdoc(plotting.PlotAccessor.bar)

def barh(
self,
x: typing.Optional[typing.Hashable] = None,
y: typing.Optional[typing.Hashable] = None,
**kwargs,
):
return self.plot.barh(x=x, y=y, **kwargs)

barh.__doc__ = inspect.getdoc(plotting.PlotAccessor.barh)

def pie(
self,
y: typing.Optional[typing.Hashable] = None,
**kwargs,
):
return self.plot.pie(y=y, **kwargs)

pie.__doc__ = inspect.getdoc(plotting.PlotAccessor.pie)

def _slice(
self,
start: typing.Optional[int] = None,
Expand Down
46 changes: 46 additions & 0 deletions tests/system/small/operations/test_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,52 @@ def test_bar(scalars_dfs, col_names, alias):
tm.assert_almost_equal(line.get_data()[1], pd_line.get_data()[1])


@pytest.mark.parametrize(
("col_names", "alias"),
[
pytest.param(["int64_col", "float64_col", "int64_too"], True, id="df_alias"),
pytest.param(["int64_col", "float64_col", "int64_too"], False, id="df"),
pytest.param(["int64_col"], True, id="series_alias"),
pytest.param(["int64_col"], False, id="series"),
],
)
def test_barh(scalars_dfs, col_names, alias):
scalars_df, scalars_pandas_df = scalars_dfs
if alias:
ax = scalars_df[col_names].barh()
else:
ax = scalars_df[col_names].plot.barh()
pd_ax = scalars_pandas_df[col_names].plot.barh()
tm.assert_almost_equal(ax.get_xticks(), pd_ax.get_xticks())
tm.assert_almost_equal(ax.get_yticks(), pd_ax.get_yticks())
for line, pd_line in zip(ax.lines, pd_ax.lines):
# Compare y coordinates between the lines
tm.assert_almost_equal(line.get_data()[1], pd_line.get_data()[1])


@pytest.mark.parametrize(
("col_names", "alias"),
[
pytest.param(["int64_col", "float64_col", "int64_too"], True, id="df_alias"),
pytest.param(["int64_col", "float64_col", "int64_too"], False, id="df"),
pytest.param(["int64_col"], True, id="series_alias"),
pytest.param(["int64_col"], False, id="series"),
],
)
def test_pie(scalars_dfs, col_names, alias):
scalars_df, scalars_pandas_df = scalars_dfs
if alias:
ax = scalars_df[col_names].abs().pie(y="int64_col")
else:
ax = scalars_df[col_names].abs().plot.pie(y="int64_col")
pd_ax = scalars_pandas_df[col_names].abs().plot.pie(y="int64_col")
tm.assert_almost_equal(ax.get_xticks(), pd_ax.get_xticks())
tm.assert_almost_equal(ax.get_yticks(), pd_ax.get_yticks())
for line, pd_line in zip(ax.lines, pd_ax.lines):
# Compare y coordinates between the lines
tm.assert_almost_equal(line.get_data()[1], pd_line.get_data()[1])


@pytest.mark.parametrize(
("col_names", "alias"),
[
Expand Down
103 changes: 103 additions & 0 deletions third_party/bigframes_vendored/pandas/plotting/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,109 @@ def bar(
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def barh(
self,
x: typing.Optional[typing.Hashable] = None,
y: typing.Optional[typing.Hashable] = None,
**kwargs,
):
"""
Draw a horizontal bar plot.

This function calls `pandas.plot` to generate a plot with a random sample
of items. For consistent results, the random sampling is reproducible.
Use the `sampling_random_state` parameter to modify the sampling seed.

**Examples:**

Basic plot.

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None
>>> df = bpd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]})
>>> ax = df.plot.barh(x='lab', y='val', rot=0)

Plot a whole dataframe to a barh plot. Each column is assigned a distinct color,
and each row is nested in a group along the horizontal axis.

>>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
>>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
>>> index = ['snail', 'pig', 'elephant',
... 'rabbit', 'giraffe', 'coyote', 'horse']
>>> df = bpd.DataFrame({'speed': speed, 'lifespan': lifespan}, index=index)
>>> ax = df.plot.barh(rot=0)

Plot stacked barh charts for the DataFrame.

>>> ax = df.plot.barh(stacked=True)

If you don’t like the default colours, you can specify how you’d like each column
to be colored.

>>> axes = df.plot.barh(
... rot=0, subplots=True, color={"speed": "red", "lifespan": "green"}
... )

Args:
x (label or position, optional):
Allows plotting of one column versus another. If not specified, the index
of the DataFrame is used.
y (label or position, optional):
Allows plotting of one column versus another. If not specified, all numerical
columns are used.
**kwargs:
Additional keyword arguments are documented in
:meth:`DataFrame.plot`.

Returns:
matplotlib.axes.Axes or numpy.ndarray:
Area plot, or array of area plots if subplots is True.
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def pie(
self,
y: typing.Optional[typing.Hashable] = None,
**kwargs,
):
"""
Generate a pie plot.

A pie plot is a proportional representation of the numerical data in a
column. This function wraps :meth:`matplotlib.pyplot.pie` for the
specified column. If no column reference is passed and
``subplots=True`` a pie plot is drawn for each numerical column
independently.

**Examples:**

In the example below we have a DataFrame with the information about
planet's mass and radius. We pass the 'mass' column to the
pie function to get a pie plot.

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None

>>> df = bpd.DataFrame({'mass': [0.330, 4.87 , 5.97],
... 'radius': [2439.7, 6051.8, 6378.1]},
... index=['Mercury', 'Venus', 'Earth'])
>>> plot = df.plot.pie(y='mass', figsize=(5, 5))

>>> plot = df.plot.pie(subplots=True, figsize=(11, 6))

Args:
y (int or label, optional):
Label or position of the column to plot.
If not provided, ``subplots=True`` argument must be passed.
**kwargs:
Keyword arguments to pass on to :meth:`DataFrame.plot`.

Returns:
matplotlib.axes.Axes or np.ndarray:
A NumPy array is returned when `subplots` is True.
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def scatter(
self,
x: typing.Optional[typing.Hashable] = None,
Expand Down