Skip to content

Commit 68cbb5f

Browse files
committed
Add plot.pie in Series
1 parent 7a76730 commit 68cbb5f

File tree

3 files changed

+90
-8
lines changed

3 files changed

+90
-8
lines changed

databricks/koalas/plot.py

Lines changed: 68 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,19 @@
1515
#
1616

1717
from distutils.version import LooseVersion
18+
1819
import matplotlib
1920
import numpy as np
2021
import pandas as pd
21-
2222
from matplotlib.axes._base import _process_plot_format
2323
from pandas.core.dtypes.inference import is_integer, is_list_like
2424
from pandas.io.formats.printing import pprint_thing
2525
from pandas.core.base import PandasObject
26-
27-
from databricks.koalas.missing import _unsupported_function
2826
from pyspark.ml.feature import Bucketizer
2927
from pyspark.sql import functions as F
3028

29+
from databricks.koalas.missing import _unsupported_function
30+
3131

3232
def _gca(rc=None):
3333
import matplotlib.pyplot as plt
@@ -40,10 +40,10 @@ def _get_standard_kind(kind):
4040

4141

4242
if LooseVersion(pd.__version__) < LooseVersion('0.25'):
43-
from pandas.plotting._core import _all_kinds, BarPlot, BoxPlot, HistPlot, MPLPlot
43+
from pandas.plotting._core import _all_kinds, BarPlot, BoxPlot, HistPlot, MPLPlot, PiePlot
4444
else:
4545
from pandas.plotting._core import PlotAccessor
46-
from pandas.plotting._matplotlib import BarPlot, BoxPlot, HistPlot
46+
from pandas.plotting._matplotlib import BarPlot, BoxPlot, HistPlot, PiePlot
4747
from pandas.plotting._matplotlib.core import MPLPlot
4848
_all_kinds = PlotAccessor._all_kinds
4949

@@ -59,7 +59,7 @@ def __init__(self, data, **kwargs):
5959
if len(data) > KoalasBarPlot.max_rows:
6060
self.partial = True
6161
data = data.iloc[:KoalasBarPlot.max_rows]
62-
super().__init__(data, **kwargs)
62+
super(KoalasBarPlot, self).__init__(data, **kwargs)
6363

6464
def _plot(self, ax, x, y, w, start=0, log=False, **kwds):
6565
if self.partial:
@@ -424,7 +424,30 @@ def _compute_plot_data(self):
424424
self.data = summary.calc_histogram(self.bins)
425425

426426

427-
_klasses = [KoalasHistPlot, KoalasBarPlot, KoalasBoxPlot]
427+
class KoalasPiePlot(PiePlot):
428+
max_rows = 1000
429+
430+
def __init__(self, data, **kwargs):
431+
# Simply use the first 1k elements and make it into a pandas dataframe
432+
# For categorical variables, it is likely called from df.x.value_counts().plot.pie()
433+
data = data.head(KoalasPiePlot.max_rows + 1).to_pandas().to_frame()
434+
self.partial = False
435+
if len(data) > KoalasPiePlot.max_rows:
436+
self.partial = True
437+
data = data.iloc[:KoalasPiePlot.max_rows]
438+
super(KoalasPiePlot, self).__init__(data, **kwargs)
439+
440+
def _make_plot(self):
441+
if self.partial:
442+
self._get_ax(0).text(
443+
1, 1, 'showing top 1,000 elements only', size=6, ha='right', va='bottom',
444+
transform=self._get_ax(0).transAxes)
445+
self.data = self.data.iloc[:KoalasPiePlot.max_rows]
446+
447+
super(KoalasPiePlot, self)._make_plot()
448+
449+
450+
_klasses = [KoalasHistPlot, KoalasBarPlot, KoalasBoxPlot, KoalasPiePlot]
428451
_plot_klass = {getattr(klass, '_kind'): klass for klass in _klasses}
429452

430453

@@ -676,4 +699,41 @@ def area(self, **kwds):
676699
return _unsupported_function(class_name='pd.Series', method_name='area')()
677700

678701
def pie(self, **kwds):
679-
return _unsupported_function(class_name='pd.Series', method_name='pie')()
702+
"""
703+
Generate a pie plot.
704+
705+
A pie plot is a proportional representation of the numerical data in a
706+
column. This function wraps :meth:`matplotlib.pyplot.pie` for the
707+
specified column. If no column reference is passed and
708+
``subplots=True`` a pie plot is drawn for each numerical column
709+
independently.
710+
711+
Parameters
712+
----------
713+
y : int or label, optional
714+
Label or position of the column to plot.
715+
If not provided, ``subplots=True`` argument must be passed.
716+
**kwds
717+
Keyword arguments to pass on to :meth:`Koalas.Series.plot`.
718+
719+
Returns
720+
-------
721+
matplotlib.axes.Axes or np.ndarray of them
722+
A NumPy array is returned when `subplots` is True.
723+
724+
725+
Examples
726+
--------
727+
In the example below we have a DataFrame with the information about
728+
planet's mass and radius. We pass the the 'mass' column to the
729+
pie function to get a pie plot.
730+
731+
732+
>>> df = ks.DataFrame({'mass': [0.330, 4.87 , 5.97],
733+
... 'radius': [2439.7, 6051.8, 6378.1]},
734+
... index=['Mercury', 'Venus', 'Earth'])
735+
>>> plot = df.mass.plot.pie(figsize=(5, 5))
736+
737+
>>> plot = df.mass.plot.pie(subplots=True, figsize=(6, 3))
738+
"""
739+
return self(kind='pie', **kwds)

databricks/koalas/tests/test_series_plot.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,27 @@ def test_bar_plot_limited(self):
8282

8383
self.compare_plots(ax1, ax2)
8484

85+
def test_pie_plot(self):
86+
pdf = self.pdf1
87+
kdf = self.kdf1
88+
89+
ax1 = pdf['a'].plot.pie(colormap='Paired')
90+
ax2 = kdf['a'].plot.pie(colormap='Paired')
91+
self.compare_plots(ax1, ax2)
92+
93+
def test_pie_plot_limited(self):
94+
pdf = self.pdf2
95+
kdf = self.kdf2
96+
97+
_, ax1 = plt.subplots(1, 1)
98+
ax1 = pdf['id'][:1000].plot.pie(colormap='Paired')
99+
ax1.text(1, 1, 'showing top 1,000 elements only', size=6, ha='right', va='bottom',
100+
transform=ax1.transAxes)
101+
_, ax2 = plt.subplots(1, 1)
102+
ax2 = kdf['id'].plot.pie(colormap='Paired')
103+
104+
self.compare_plots(ax1, ax2)
105+
85106
def test_hist_plot(self):
86107
pdf = self.pdf1
87108
kdf = self.kdf1

docs/source/reference/series.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,7 @@ specific plotting methods of the form ``Series.plot.<kind>``.
332332
Series.plot.bar
333333
Series.plot.box
334334
Series.plot.hist
335+
Series.plot.pie
335336

336337
.. currentmodule:: databricks.koalas
337338
.. autosummary::

0 commit comments

Comments
 (0)