Skip to content

Groupbydocs #8231

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 11 commits into from
70 changes: 69 additions & 1 deletion doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1394,12 +1394,80 @@ Computations / Descriptive Stats
.. autosummary::
:toctree: generated/

GroupBy.count
GroupBy.cumcount
GroupBy.first
GroupBy.head
GroupBy.last
GroupBy.max
GroupBy.mean
GroupBy.median
GroupBy.min
GroupBy.nth
GroupBy.ohlc
GroupBy.prod
GroupBy.size
GroupBy.sem
GroupBy.std
GroupBy.sum
GroupBy.var
GroupBy.ohlc
GroupBy.tail

The following methods are available in both ``SeriesGroupBy`` and
``DataFrameGroupBy`` objects, but may differ slightly, usually in that
the ``DataFrameGroupBy`` version usually permits the specification of an
axis argument, and often an argument indicating whether to restrict
application to columns of a specific data type.

.. autosummary::
:toctree: generated/

DataFrameGroupBy.bfill
DataFrameGroupBy.cummax
DataFrameGroupBy.cummin
DataFrameGroupBy.cumprod
DataFrameGroupBy.cumsum
DataFrameGroupBy.describe
DataFrameGroupBy.all
DataFrameGroupBy.any
DataFrameGroupBy.corr
DataFrameGroupBy.cov
DataFrameGroupBy.diff
DataFrameGroupBy.ffill
DataFrameGroupBy.fillna
DataFrameGroupBy.hist
DataFrameGroupBy.idxmax
DataFrameGroupBy.idxmin
DataFrameGroupBy.irow
DataFrameGroupBy.mad
DataFrameGroupBy.pct_change
DataFrameGroupBy.plot
DataFrameGroupBy.quantile
DataFrameGroupBy.rank
DataFrameGroupBy.resample
DataFrameGroupBy.shift
DataFrameGroupBy.skew
DataFrameGroupBy.take
DataFrameGroupBy.tshift

The following methods are available only for ``SeriesGroupBy`` objects.

.. autosummary::
:toctree: generated/

SeriesGroupBy.nlargest
SeriesGroupBy.nsmallest
SeriesGroupBy.nunique
SeriesGroupBy.unique
SeriesGroupBy.value_counts

The following methods are available only for ``DataFrameGroupBy`` objects.

.. autosummary::
:toctree: generated/

DataFrameGroupBy.corrwith
DataFrameGroupBy.boxplot

.. currentmodule:: pandas

Expand Down
74 changes: 71 additions & 3 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from pandas.core.internals import BlockManager, make_block
from pandas.core.series import Series
from pandas.core.panel import Panel
from pandas.util.decorators import cache_readonly, Appender
from pandas.util.decorators import cache_readonly, Appender, make_signature
import pandas.core.algorithms as algos
import pandas.core.common as com
from pandas.core.common import(_possibly_downcast_to_dtype, isnull,
Expand Down Expand Up @@ -533,7 +533,7 @@ def wrapper(*args, **kwargs):
# a little trickery for aggregation functions that need an axis
# argument
kwargs_with_axis = kwargs.copy()
if 'axis' not in kwargs_with_axis:
if 'axis' not in kwargs_with_axis or kwargs_with_axis['axis']==None:
kwargs_with_axis['axis'] = self.axis

def curried_with_axis(x):
Expand Down Expand Up @@ -2133,9 +2133,73 @@ def _convert_grouper(axis, grouper):
else:
return grouper

def _whitelist_method_generator(klass, whitelist) :
"""
Yields all GroupBy member defs for DataFrame/Series names in _whitelist.

Parameters
----------
klass - class where members are defined. Should be Series or DataFrame

whitelist - list of names of klass methods to be constructed

Returns
-------
The generator yields a sequence of strings, each suitable for exec'ing,
that define implementations of the named methods for DataFrameGroupBy
or SeriesGroupBy.

Since we don't want to override methods explicitly defined in the
base class, any such name is skipped.
"""

method_wrapper_template = \
"""def %(name)s(%(sig)s) :
\"""
%(doc)s
\"""
f = %(self)s.__getattr__('%(name)s')
return f(%(args)s)"""
property_wrapper_template = \
"""@property
def %(name)s(self) :
\"""
%(doc)s
\"""
return self.__getattr__('%(name)s')"""
for name in whitelist :
# don't override anything that was explicitly defined
# in the base class
if hasattr(GroupBy,name) :
continue
# ugly, but we need the name string itself in the method.
f = getattr(klass,name)
doc = f.__doc__
doc = doc if type(doc)==str else ''
if type(f) == types.MethodType :
wrapper_template = method_wrapper_template
decl, args = make_signature(f)
# pass args by name to f because otherwise
# GroupBy._make_wrapper won't know whether
# we passed in an axis parameter.
args_by_name = ['{0}={0}'.format(arg) for arg in args[1:]]
params = {'name':name,
'doc':doc,
'sig':','.join(decl),
'self':args[0],
'args':','.join(args_by_name)}
else :
wrapper_template = property_wrapper_template
params = {'name':name, 'doc':doc}
yield wrapper_template % params

class SeriesGroupBy(GroupBy):
#
# Make class defs of attributes on SeriesGroupBy whitelist
_apply_whitelist = _series_apply_whitelist

for _def_str in _whitelist_method_generator(Series,_series_apply_whitelist) :
exec(_def_str)

def aggregate(self, func_or_funcs, *args, **kwargs):
"""
Apply aggregation function or functions to groups, yielding most likely
Expand Down Expand Up @@ -3045,6 +3109,10 @@ def filter(self, func, dropna=True, *args, **kwargs):

class DataFrameGroupBy(NDFrameGroupBy):
_apply_whitelist = _dataframe_apply_whitelist
#
# Make class defs of attributes on DataFrameGroupBy whitelist.
for _def_str in _whitelist_method_generator(DataFrame,_apply_whitelist) :
exec(_def_str)

_block_agg_axis = 1

Expand Down
40 changes: 38 additions & 2 deletions pandas/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
assert_index_equal, assertRaisesRegexp)
from pandas.compat import(
range, long, lrange, StringIO, lmap, lzip, map,
zip, builtins, OrderedDict
zip, builtins, OrderedDict, product as cart_product
)
from pandas import compat
from pandas.core.panel import Panel
Expand Down Expand Up @@ -4327,7 +4327,43 @@ def test_groupby_whitelist(self):
gb = obj.groupby(df.letters)
self.assertEqual(whitelist, gb._apply_whitelist)
for m in whitelist:
getattr(gb, m)
getattr(type(gb), m)

AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew',
'mad', 'std', 'var', 'sem']
AGG_FUNCTIONS_WITH_SKIPNA = ['skew', 'mad']

def test_regression_whitelist_methods(self) :
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is this testing?


index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
['one', 'two', 'three']],
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=['first', 'second'])
raw_frame = DataFrame(np.random.randn(10, 3), index=index,
columns=Index(['A', 'B', 'C'], name='exp'))
raw_frame.ix[1, [1, 2]] = np.nan
raw_frame.ix[7, [0, 1]] = np.nan

for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS,
lrange(2), lrange(2),
[True,False]) :

if axis == 0 :
frame = raw_frame
else :
frame = raw_frame.T

if op in self.AGG_FUNCTIONS_WITH_SKIPNA :
grouped = frame.groupby(level=level,axis=axis)
result = getattr(grouped,op)(skipna=skipna)
expected = getattr(frame,op)(level=level,axis=axis,skipna=skipna)
assert_frame_equal(result, expected)
else :
grouped = frame.groupby(level=level,axis=axis)
result = getattr(grouped,op)()
expected = getattr(frame,op)(level=level,axis=axis)
assert_frame_equal(result, expected)

def test_groupby_blacklist(self):
from string import ascii_lowercase
Expand Down
26 changes: 26 additions & 0 deletions pandas/util/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,3 +258,29 @@ def knownfailer(*args, **kwargs):
return nose.tools.make_decorator(f)(knownfailer)

return knownfail_decorator

def make_signature(func) :
"""
Returns a string repr of the arg list of a func call, with any defaults

Examples
--------

>>> def f(a,b,c=2) :
>>> return a*b*c
>>> print(_make_signature(f))
a,b,c=2
"""
from inspect import getargspec
spec = getargspec(func)
if spec.defaults == None :
n_wo_defaults = len(spec.args)
defaults = ('',) * n_wo_defaults
else :
n_wo_defaults = len(spec.args) - len(spec.defaults)
defaults = ('',) * n_wo_defaults + spec.defaults
args = []
for i, (var, default) in enumerate(zip(spec.args, defaults)) :
args.append(var if default=='' else var+'='+repr(default))
return args, spec.args