1515#
1616
1717from distutils .version import LooseVersion
18+
1819import matplotlib
1920import numpy as np
2021import pandas as pd
21-
2222from matplotlib .axes ._base import _process_plot_format
2323from pandas .core .dtypes .inference import is_integer , is_list_like
2424from pandas .io .formats .printing import pprint_thing
2525from pandas .core .base import PandasObject
26-
27- from databricks .koalas .missing import _unsupported_function
2826from pyspark .ml .feature import Bucketizer
2927from pyspark .sql import functions as F
3028
29+ from databricks .koalas .missing import _unsupported_function
30+
3131
3232def _gca (rc = None ):
3333 import matplotlib .pyplot as plt
@@ -40,10 +40,10 @@ def _get_standard_kind(kind):
4040
4141
4242if LooseVersion (pd .__version__ ) < LooseVersion ('0.25' ):
43- from pandas .plotting ._core import _all_kinds , BarPlot , BoxPlot , HistPlot , MPLPlot
43+ from pandas .plotting ._core import _all_kinds , BarPlot , BoxPlot , HistPlot , MPLPlot , PiePlot
4444else :
4545 from pandas .plotting ._core import PlotAccessor
46- from pandas .plotting ._matplotlib import BarPlot , BoxPlot , HistPlot
46+ from pandas .plotting ._matplotlib import BarPlot , BoxPlot , HistPlot , PiePlot
4747 from pandas .plotting ._matplotlib .core import MPLPlot
4848 _all_kinds = PlotAccessor ._all_kinds
4949
@@ -59,7 +59,7 @@ def __init__(self, data, **kwargs):
5959 if len (data ) > KoalasBarPlot .max_rows :
6060 self .partial = True
6161 data = data .iloc [:KoalasBarPlot .max_rows ]
62- super ().__init__ (data , ** kwargs )
62+ super (KoalasBarPlot , self ).__init__ (data , ** kwargs )
6363
6464 def _plot (self , ax , x , y , w , start = 0 , log = False , ** kwds ):
6565 if self .partial :
@@ -424,7 +424,30 @@ def _compute_plot_data(self):
424424 self .data = summary .calc_histogram (self .bins )
425425
426426
427- _klasses = [KoalasHistPlot , KoalasBarPlot , KoalasBoxPlot ]
427+ class KoalasPiePlot (PiePlot ):
428+ max_rows = 1000
429+
430+ def __init__ (self , data , ** kwargs ):
431+ # Simply use the first 1k elements and make it into a pandas dataframe
432+ # For categorical variables, it is likely called from df.x.value_counts().plot.pie()
433+ data = data .head (KoalasPiePlot .max_rows + 1 ).to_pandas ().to_frame ()
434+ self .partial = False
435+ if len (data ) > KoalasPiePlot .max_rows :
436+ self .partial = True
437+ data = data .iloc [:KoalasPiePlot .max_rows ]
438+ super (KoalasPiePlot , self ).__init__ (data , ** kwargs )
439+
440+ def _make_plot (self ):
441+ if self .partial :
442+ self ._get_ax (0 ).text (
443+ 1 , 1 , 'showing top 1,000 elements only' , size = 6 , ha = 'right' , va = 'bottom' ,
444+ transform = self ._get_ax (0 ).transAxes )
445+ self .data = self .data .iloc [:KoalasPiePlot .max_rows ]
446+
447+ super (KoalasPiePlot , self )._make_plot ()
448+
449+
450+ _klasses = [KoalasHistPlot , KoalasBarPlot , KoalasBoxPlot , KoalasPiePlot ]
428451_plot_klass = {getattr (klass , '_kind' ): klass for klass in _klasses }
429452
430453
@@ -676,4 +699,41 @@ def area(self, **kwds):
676699 return _unsupported_function (class_name = 'pd.Series' , method_name = 'area' )()
677700
678701 def pie (self , ** kwds ):
679- return _unsupported_function (class_name = 'pd.Series' , method_name = 'pie' )()
702+ """
703+ Generate a pie plot.
704+
705+ A pie plot is a proportional representation of the numerical data in a
706+ column. This function wraps :meth:`matplotlib.pyplot.pie` for the
707+ specified column. If no column reference is passed and
708+ ``subplots=True`` a pie plot is drawn for each numerical column
709+ independently.
710+
711+ Parameters
712+ ----------
713+ y : int or label, optional
714+ Label or position of the column to plot.
715+ If not provided, ``subplots=True`` argument must be passed.
716+ **kwds
717+ Keyword arguments to pass on to :meth:`Koalas.Series.plot`.
718+
719+ Returns
720+ -------
721+ matplotlib.axes.Axes or np.ndarray of them
722+ A NumPy array is returned when `subplots` is True.
723+
724+
725+ Examples
726+ --------
727+ In the example below we have a DataFrame with the information about
728+ planet's mass and radius. We pass the the 'mass' column to the
729+ pie function to get a pie plot.
730+
731+
732+ >>> df = ks.DataFrame({'mass': [0.330, 4.87 , 5.97],
733+ ... 'radius': [2439.7, 6051.8, 6378.1]},
734+ ... index=['Mercury', 'Venus', 'Earth'])
735+ >>> plot = df.mass.plot.pie(figsize=(5, 5))
736+
737+ >>> plot = df.mass.plot.pie(subplots=True, figsize=(6, 3))
738+ """
739+ return self (kind = 'pie' , ** kwds )
0 commit comments