Skip to content

Groupbydocs #8231

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 11 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 104 additions & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1334,6 +1334,110 @@ Computations / Descriptive Stats
GroupBy.std
GroupBy.var
GroupBy.ohlc
GroupBy.nth
GroupBy.prod
GroupBy.size

DataFrameGroupBy
----------------
.. currentmodule:: pandas.core.groupby

A DataFrameGroupBy object is returned by :func:`pandas.DataFrame.groupby`.

Computations / Descriptive Stats
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autosummary::
:toctree: generated/

DataFrameGroupBy.all
DataFrameGroupBy.any
DataFrameGroupBy.bfill
DataFrameGroupBy.boxplot
DataFrameGroupBy.corr
DataFrameGroupBy.corrwith
DataFrameGroupBy.count
DataFrameGroupBy.cov
DataFrameGroupBy.cumcount
DataFrameGroupBy.cummax
DataFrameGroupBy.cummin
DataFrameGroupBy.cumprod
DataFrameGroupBy.cumsum
DataFrameGroupBy.describe
DataFrameGroupBy.diff
DataFrameGroupBy.ffill
DataFrameGroupBy.fillna
DataFrameGroupBy.first
DataFrameGroupBy.head
DataFrameGroupBy.hist
DataFrameGroupBy.idxmax
DataFrameGroupBy.idxmin
DataFrameGroupBy.irow
DataFrameGroupBy.last
DataFrameGroupBy.mad
DataFrameGroupBy.max
DataFrameGroupBy.min
DataFrameGroupBy.pct_change
DataFrameGroupBy.plot
DataFrameGroupBy.quantile
DataFrameGroupBy.rank
DataFrameGroupBy.resample
DataFrameGroupBy.shift
DataFrameGroupBy.skew
DataFrameGroupBy.sum
DataFrameGroupBy.tail
DataFrameGroupBy.take
DataFrameGroupBy.tshift

SeriesGroupBy
-------------

A SeriesGroupBy object is returned by :func:`pandas.Series.groupby`.

Computations / Descriptive Stats
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autosummary::
:toctree: generated/

SeriesGroupBy.all
SeriesGroupBy.any
SeriesGroupBy.bfill
SeriesGroupBy.corr
SeriesGroupBy.count
SeriesGroupBy.cov
SeriesGroupBy.cumcount
SeriesGroupBy.cummax
SeriesGroupBy.cummin
SeriesGroupBy.cumprod
SeriesGroupBy.cumsum
SeriesGroupBy.describe
SeriesGroupBy.diff
SeriesGroupBy.dtype
SeriesGroupBy.ffill
SeriesGroupBy.fillna
SeriesGroupBy.first
SeriesGroupBy.head
SeriesGroupBy.hist
SeriesGroupBy.idxmax
SeriesGroupBy.idxmin
SeriesGroupBy.irow
SeriesGroupBy.last
SeriesGroupBy.mad
SeriesGroupBy.max
SeriesGroupBy.min
SeriesGroupBy.nunique
SeriesGroupBy.pct_change
SeriesGroupBy.plot
SeriesGroupBy.quantile
SeriesGroupBy.rank
SeriesGroupBy.resample
SeriesGroupBy.shift
SeriesGroupBy.skew
SeriesGroupBy.sum
SeriesGroupBy.tail
SeriesGroupBy.take
SeriesGroupBy.tshift
SeriesGroupBy.unique
SeriesGroupBy.value_counts

.. currentmodule:: pandas

Expand Down
97 changes: 96 additions & 1 deletion pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,7 @@ def wrapper(*args, **kwargs):
# a little trickery for aggregation functions that need an axis
# argument
kwargs_with_axis = kwargs.copy()
if 'axis' not in kwargs_with_axis:
if 'axis' not in kwargs_with_axis or kwargs_with_axis['axis']==None:
kwargs_with_axis['axis'] = self.axis

def curried_with_axis(x):
Expand Down Expand Up @@ -2132,8 +2132,99 @@ def _convert_grouper(axis, grouper):
else:
return grouper

from inspect import getargspec
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you move this function inse _whitelist_method_generator? its only called from their yes (and its cleaner that way)

def _make_signature(func) :
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you move this to pandas/utils/decorators.py? or somewhat similar. doesn't really belong in here as its generic and not related to groupby.

"""
Returns a string repr of the arg list of a func call, with any defaults

Examples
--------

>>> def f(a,b,c=2) :
>>> return a*b*c
>>> print(_make_signature(f))
a,b,c=2
"""
spec = getargspec(func)
if spec.defaults == None :
n_wo_defaults = len(spec.args)
defaults = ('',) * n_wo_defaults
else :
n_wo_defaults = len(spec.args) - len(spec.defaults)
defaults = ('',) * n_wo_defaults + spec.defaults
args = []
for i, (var, default) in enumerate(zip(spec.args, defaults)) :
args.append(var if default=='' else var+'='+repr(default))
return args, spec.args

def _whitelist_method_generator(klass, whitelist) :
"""
Yields all GroupBy member defs for DataFrame/Series names in _whitelist.

Parameters
----------
klass - class where members are defined. Should be Series or DataFrame

whitelist - list of names of klass methods to be constructed

Returns
-------
The generator yields a sequence of strings, each suitable for exec'ing,
that define implementations of the named methods for DataFrameGroupBy
or SeriesGroupBy.

Since we don't want to override methods explicitly defined in the
base class, any such name is skipped.
"""

method_wrapper_template = \
"""def %(name)s(%(sig)s) :
\"""
%(doc)s
\"""
f = %(self)s.__getattr__('%(name)s')
return f(%(args)s)
"""
property_wrapper_template = \
"""@property
def %(name)s(self) :
\"""
%(doc)s
\"""
return self.__getattr__('%(name)s')
"""
for name in whitelist :
# don't override anything that was explicitly defined
# in the base class
if hasattr(GroupBy,name) :
continue
# ugly, but we need the name string itself in the method.
f = getattr(klass,name)
doc = f.__doc__
doc = doc if type(doc)==str else ''
if type(f) == types.MethodType :
wrapper_template = method_wrapper_template
decl, args = _make_signature(f)
# pass args by name to f because otherwise
# GroupBy._make_wrapper won't know whether
# we passed in an axis parameter.
args_by_name = ['{0}={0}'.format(arg) for arg in args[1:]]
params = {'name':name,
'doc':doc,
'sig':','.join(decl),
'self':args[0],
'args':','.join(args_by_name)}
else :
wrapper_template = property_wrapper_template
params = {'name':name, 'doc':doc}
yield wrapper_template % params

class SeriesGroupBy(GroupBy):
#
# Make class defs of attributes on SeriesGroupBy whitelist
_apply_whitelist = _series_apply_whitelist
for _def_str in _whitelist_method_generator(Series,_series_apply_whitelist) :
exec(_def_str)

def aggregate(self, func_or_funcs, *args, **kwargs):
"""
Expand Down Expand Up @@ -3044,6 +3135,10 @@ def filter(self, func, dropna=True, *args, **kwargs):

class DataFrameGroupBy(NDFrameGroupBy):
_apply_whitelist = _dataframe_apply_whitelist
#
# Make class defs of attributes on DataFrameGroupBy whitelist.
for _def_str in _whitelist_method_generator(DataFrame,_apply_whitelist) :
exec(_def_str)

_block_agg_axis = 1

Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -4310,6 +4310,10 @@ def test_groupby_whitelist(self):
self.assertEqual(whitelist, gb._apply_whitelist)
for m in whitelist:
getattr(gb, m)
# Also make sure that the class itself has
# the method defined (dtypes is not a method)
if m not in ['dtypes'] :
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this here?

self.assertTrue(hasattr(type(gb), m))

def test_groupby_blacklist(self):
from string import ascii_lowercase
Expand Down
63 changes: 63 additions & 0 deletions pandas/tests/test_groupby_whitelist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# pylint: disable-msg=W0612,E1101,W0141
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just stick the tests in tests/test_group.py rather than adding a new test file. (you can do it in a new class like you are doing here).

import datetime
import nose

from numpy.random import randn
import numpy as np

from pandas.core.index import Index, MultiIndex
from pandas import Panel, DataFrame, Series, notnull, isnull

from pandas.util.testing import (assert_almost_equal,
assert_series_equal,
assert_frame_equal,
assertRaisesRegexp)
import pandas.core.common as com
import pandas.util.testing as tm
from pandas.compat import (range, lrange, StringIO, lzip, u,
product as cart_product, zip)
import pandas as pd

import pandas.index as _index


class TestNewGroupByAttr(tm.TestCase):

_multiprocess_can_split_ = True

def setUp(self):
import warnings
warnings.filterwarnings(action='ignore', category=FutureWarning)

index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
['one', 'two', 'three']],
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=['first', 'second'])
self.frame = DataFrame(np.random.randn(10, 3), index=index,
columns=Index(['A', 'B', 'C'], name='exp'))

self.frame.ix[1, [1, 2]] = np.nan
self.frame.ix[7, [0, 1]] = np.nan

AGG_FUNCTIONS = ['skew', 'mad']

def test_newattr(self) :
for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS,
lrange(2), lrange(2),
[True,False]) :
if axis == 0 :
frame = self.frame
else :
frame = self.frame.T

grouped = frame.groupby(level=level,axis=axis)
result = getattr(grouped,op)(skipna=skipna)
expected = getattr(frame,op)(level=level,axis=axis,skipna=skipna)
assert_frame_equal(result, expected)

if __name__ == '__main__':

import nose
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
exit=False)