Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions bigframes/operations/_matplotlib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
PLOT_CLASSES: dict[str, PLOT_TYPES] = {
"area": core.AreaPlot,
"bar": core.BarPlot,
"barh": core.BarhPlot,
"pie": core.PiePlot,
"line": core.LinePlot,
"scatter": core.ScatterPlot,
"hist": hist.HistPlot,
Expand Down
32 changes: 24 additions & 8 deletions bigframes/operations/_matplotlib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,12 @@ def _kind(self):

@property
def _sampling_warning_msg(self) -> typing.Optional[str]:
return None
return (
"To optimize plotting performance, your data has been downsampled to {sampling_n} "
"rows from the original {total_n} rows. This may result in some data points "
"not being displayed. For a more comprehensive view, consider pre-processing "
"your data by aggregating it or selecting the top categories."
)

def __init__(self, data, **kwargs) -> None:
self.kwargs = kwargs
Expand Down Expand Up @@ -92,6 +97,10 @@ def _compute_plot_data(self):


class AreaPlot(SamplingPlot):
@property
def _sampling_warning_msg(self) -> typing.Optional[str]:
return None

@property
def _kind(self) -> typing.Literal["area"]:
return "area"
Expand All @@ -102,14 +111,17 @@ class BarPlot(SamplingPlot):
def _kind(self) -> typing.Literal["bar"]:
return "bar"


class BarhPlot(SamplingPlot):
@property
def _sampling_warning_msg(self) -> typing.Optional[str]:
return (
"To optimize plotting performance, your data has been downsampled to {sampling_n} "
"rows from the original {total_n} rows. This may result in some data points "
"not being displayed. For a more comprehensive view, consider pre-processing "
"your data by aggregating it or selecting the top categories."
)
def _kind(self) -> typing.Literal["barh"]:
return "barh"


class PiePlot(SamplingPlot):
@property
def _kind(self) -> typing.Literal["pie"]:
return "pie"


class LinePlot(SamplingPlot):
Expand All @@ -123,6 +135,10 @@ class ScatterPlot(SamplingPlot):
def _kind(self) -> typing.Literal["scatter"]:
return "scatter"

@property
def _sampling_warning_msg(self) -> typing.Optional[str]:
return None

def __init__(self, data, **kwargs) -> None:
super().__init__(data, **kwargs)

Expand Down
19 changes: 17 additions & 2 deletions bigframes/operations/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
class PlotAccessor(vendordt.PlotAccessor):
__doc__ = vendordt.PlotAccessor.__doc__

_common_kinds = ("line", "area", "hist", "bar")
_dataframe_kinds = ("scatter",)
_common_kinds = ("line", "area", "hist", "bar", "barh", "pie")
_dataframe_kinds = ("scatter", "hexbin,")
_all_kinds = _common_kinds + _dataframe_kinds

def __call__(self, **kwargs):
Expand Down Expand Up @@ -82,6 +82,21 @@ def bar(
):
return self(kind="bar", x=x, y=y, **kwargs)

def barh(
self,
x: typing.Optional[typing.Hashable] = None,
y: typing.Optional[typing.Hashable] = None,
**kwargs,
):
return self(kind="barh", x=x, y=y, **kwargs)

def pie(
self,
y: typing.Optional[typing.Hashable] = None,
**kwargs,
):
return self(kind="pie", y=y, **kwargs)

def scatter(
self,
x: typing.Optional[typing.Hashable] = None,
Expand Down
46 changes: 46 additions & 0 deletions tests/system/small/operations/test_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,52 @@ def test_bar(scalars_dfs, col_names, alias):
tm.assert_almost_equal(line.get_data()[1], pd_line.get_data()[1])


@pytest.mark.parametrize(
("col_names", "alias"),
[
pytest.param(["int64_col", "float64_col", "int64_too"], True, id="df_alias"),
pytest.param(["int64_col", "float64_col", "int64_too"], False, id="df"),
pytest.param(["int64_col"], True, id="series_alias"),
pytest.param(["int64_col"], False, id="series"),
],
)
def test_barh(scalars_dfs, col_names, alias):
scalars_df, scalars_pandas_df = scalars_dfs
if alias:
ax = scalars_df[col_names].barh()
else:
ax = scalars_df[col_names].plot.barh()
pd_ax = scalars_pandas_df[col_names].plot.barh()
tm.assert_almost_equal(ax.get_xticks(), pd_ax.get_xticks())
tm.assert_almost_equal(ax.get_yticks(), pd_ax.get_yticks())
for line, pd_line in zip(ax.lines, pd_ax.lines):
# Compare y coordinates between the lines
tm.assert_almost_equal(line.get_data()[1], pd_line.get_data()[1])


@pytest.mark.parametrize(
("col_names", "alias"),
[
pytest.param(["int64_col", "float64_col", "int64_too"], True, id="df_alias"),
pytest.param(["int64_col", "float64_col", "int64_too"], False, id="df"),
pytest.param(["int64_col"], True, id="series_alias"),
pytest.param(["int64_col"], False, id="series"),
],
)
def test_pie(scalars_dfs, col_names, alias):
scalars_df, scalars_pandas_df = scalars_dfs
if alias:
ax = scalars_df[col_names].abs().pie(y="int64_col")
else:
ax = scalars_df[col_names].abs().plot.pie(y="int64_col")
pd_ax = scalars_pandas_df[col_names].abs().plot.pie(y="int64_col")
tm.assert_almost_equal(ax.get_xticks(), pd_ax.get_xticks())
tm.assert_almost_equal(ax.get_yticks(), pd_ax.get_yticks())
for line, pd_line in zip(ax.lines, pd_ax.lines):
# Compare y coordinates between the lines
tm.assert_almost_equal(line.get_data()[1], pd_line.get_data()[1])


@pytest.mark.parametrize(
("col_names", "alias"),
[
Expand Down
103 changes: 103 additions & 0 deletions third_party/bigframes_vendored/pandas/plotting/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,109 @@ def bar(
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def barh(
self,
x: typing.Optional[typing.Hashable] = None,
y: typing.Optional[typing.Hashable] = None,
**kwargs,
):
"""
Draw a horizontal bar plot.

This function calls `pandas.plot` to generate a plot with a random sample
of items. For consistent results, the random sampling is reproducible.
Use the `sampling_random_state` parameter to modify the sampling seed.

**Examples:**

Basic plot.

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None
>>> df = bpd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]})
>>> ax = df.plot.barh(x='lab', y='val', rot=0)

Plot a whole dataframe to a barh plot. Each column is assigned a distinct color,
and each row is nested in a group along the horizontal axis.

>>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
>>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
>>> index = ['snail', 'pig', 'elephant',
... 'rabbit', 'giraffe', 'coyote', 'horse']
>>> df = bpd.DataFrame({'speed': speed, 'lifespan': lifespan}, index=index)
>>> ax = df.plot.barh(rot=0)

Plot stacked barh charts for the DataFrame.

>>> ax = df.plot.barh(stacked=True)

If you don’t like the default colours, you can specify how you’d like each column
to be colored.

>>> axes = df.plot.barh(
... rot=0, subplots=True, color={"speed": "red", "lifespan": "green"}
... )

Args:
x (label or position, optional):
Allows plotting of one column versus another. If not specified, the index
of the DataFrame is used.
y (label or position, optional):
Allows plotting of one column versus another. If not specified, all numerical
columns are used.
**kwargs:
Additional keyword arguments are documented in
:meth:`DataFrame.plot`.

Returns:
matplotlib.axes.Axes or numpy.ndarray:
Area plot, or array of area plots if subplots is True.
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def pie(
self,
y: typing.Optional[typing.Hashable] = None,
**kwargs,
):
"""
Generate a pie plot.

A pie plot is a proportional representation of the numerical data in a
column. This function wraps :meth:`matplotlib.pyplot.pie` for the
specified column. If no column reference is passed and
``subplots=True`` a pie plot is drawn for each numerical column
independently.

**Examples:**

In the example below we have a DataFrame with the information about
planet's mass and radius. We pass the 'mass' column to the
pie function to get a pie plot.

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None

>>> df = bpd.DataFrame({'mass': [0.330, 4.87 , 5.97],
... 'radius': [2439.7, 6051.8, 6378.1]},
... index=['Mercury', 'Venus', 'Earth'])
>>> plot = df.plot.pie(y='mass', figsize=(5, 5))

>>> plot = df.plot.pie(subplots=True, figsize=(11, 6))

Args:
y (int or label, optional):
Label or position of the column to plot.
If not provided, ``subplots=True`` argument must be passed.
**kwargs:
Keyword arguments to pass on to :meth:`DataFrame.plot`.

Returns:
matplotlib.axes.Axes or np.ndarray:
A NumPy array is returned when `subplots` is True.
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def scatter(
self,
x: typing.Optional[typing.Hashable] = None,
Expand Down