diff --git a/doc/source/api.rst b/doc/source/api.rst index 242ce9865dc9a..f831b97d2033f 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1394,12 +1394,80 @@ Computations / Descriptive Stats .. autosummary:: :toctree: generated/ + GroupBy.count + GroupBy.cumcount + GroupBy.first + GroupBy.head + GroupBy.last + GroupBy.max GroupBy.mean GroupBy.median + GroupBy.min + GroupBy.nth + GroupBy.ohlc + GroupBy.prod + GroupBy.size GroupBy.sem GroupBy.std + GroupBy.sum GroupBy.var - GroupBy.ohlc + GroupBy.tail + +The following methods are available in both ``SeriesGroupBy`` and +``DataFrameGroupBy`` objects, but may differ slightly, usually in that +the ``DataFrameGroupBy`` version usually permits the specification of an +axis argument, and often an argument indicating whether to restrict +application to columns of a specific data type. + +.. autosummary:: + :toctree: generated/ + + DataFrameGroupBy.bfill + DataFrameGroupBy.cummax + DataFrameGroupBy.cummin + DataFrameGroupBy.cumprod + DataFrameGroupBy.cumsum + DataFrameGroupBy.describe + DataFrameGroupBy.all + DataFrameGroupBy.any + DataFrameGroupBy.corr + DataFrameGroupBy.cov + DataFrameGroupBy.diff + DataFrameGroupBy.ffill + DataFrameGroupBy.fillna + DataFrameGroupBy.hist + DataFrameGroupBy.idxmax + DataFrameGroupBy.idxmin + DataFrameGroupBy.irow + DataFrameGroupBy.mad + DataFrameGroupBy.pct_change + DataFrameGroupBy.plot + DataFrameGroupBy.quantile + DataFrameGroupBy.rank + DataFrameGroupBy.resample + DataFrameGroupBy.shift + DataFrameGroupBy.skew + DataFrameGroupBy.take + DataFrameGroupBy.tshift + +The following methods are available only for ``SeriesGroupBy`` objects. + +.. autosummary:: + :toctree: generated/ + + SeriesGroupBy.nlargest + SeriesGroupBy.nsmallest + SeriesGroupBy.nunique + SeriesGroupBy.unique + SeriesGroupBy.value_counts + +The following methods are available only for ``DataFrameGroupBy`` objects. + +.. autosummary:: + :toctree: generated/ + + DataFrameGroupBy.corrwith + DataFrameGroupBy.boxplot .. currentmodule:: pandas diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 26ef375934ac9..02e36c30e962b 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -18,7 +18,7 @@ from pandas.core.internals import BlockManager, make_block from pandas.core.series import Series from pandas.core.panel import Panel -from pandas.util.decorators import cache_readonly, Appender +from pandas.util.decorators import cache_readonly, Appender, make_signature import pandas.core.algorithms as algos import pandas.core.common as com from pandas.core.common import(_possibly_downcast_to_dtype, isnull, @@ -533,7 +533,7 @@ def wrapper(*args, **kwargs): # a little trickery for aggregation functions that need an axis # argument kwargs_with_axis = kwargs.copy() - if 'axis' not in kwargs_with_axis: + if 'axis' not in kwargs_with_axis or kwargs_with_axis['axis']==None: kwargs_with_axis['axis'] = self.axis def curried_with_axis(x): @@ -2133,9 +2133,73 @@ def _convert_grouper(axis, grouper): else: return grouper +def _whitelist_method_generator(klass, whitelist) : + """ + Yields all GroupBy member defs for DataFrame/Series names in _whitelist. + + Parameters + ---------- + klass - class where members are defined. Should be Series or DataFrame + + whitelist - list of names of klass methods to be constructed + + Returns + ------- + The generator yields a sequence of strings, each suitable for exec'ing, + that define implementations of the named methods for DataFrameGroupBy + or SeriesGroupBy. + + Since we don't want to override methods explicitly defined in the + base class, any such name is skipped. + """ + + method_wrapper_template = \ + """def %(name)s(%(sig)s) : + \""" + %(doc)s + \""" + f = %(self)s.__getattr__('%(name)s') + return f(%(args)s)""" + property_wrapper_template = \ + """@property +def %(name)s(self) : + \""" + %(doc)s + \""" + return self.__getattr__('%(name)s')""" + for name in whitelist : + # don't override anything that was explicitly defined + # in the base class + if hasattr(GroupBy,name) : + continue + # ugly, but we need the name string itself in the method. + f = getattr(klass,name) + doc = f.__doc__ + doc = doc if type(doc)==str else '' + if type(f) == types.MethodType : + wrapper_template = method_wrapper_template + decl, args = make_signature(f) + # pass args by name to f because otherwise + # GroupBy._make_wrapper won't know whether + # we passed in an axis parameter. + args_by_name = ['{0}={0}'.format(arg) for arg in args[1:]] + params = {'name':name, + 'doc':doc, + 'sig':','.join(decl), + 'self':args[0], + 'args':','.join(args_by_name)} + else : + wrapper_template = property_wrapper_template + params = {'name':name, 'doc':doc} + yield wrapper_template % params + class SeriesGroupBy(GroupBy): + # + # Make class defs of attributes on SeriesGroupBy whitelist _apply_whitelist = _series_apply_whitelist - + for _def_str in _whitelist_method_generator(Series,_series_apply_whitelist) : + exec(_def_str) + def aggregate(self, func_or_funcs, *args, **kwargs): """ Apply aggregation function or functions to groups, yielding most likely @@ -3045,6 +3109,10 @@ def filter(self, func, dropna=True, *args, **kwargs): class DataFrameGroupBy(NDFrameGroupBy): _apply_whitelist = _dataframe_apply_whitelist + # + # Make class defs of attributes on DataFrameGroupBy whitelist. + for _def_str in _whitelist_method_generator(DataFrame,_apply_whitelist) : + exec(_def_str) _block_agg_axis = 1 diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index f34ac434ef9d7..ab6363b705aa5 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -19,7 +19,7 @@ assert_index_equal, assertRaisesRegexp) from pandas.compat import( range, long, lrange, StringIO, lmap, lzip, map, - zip, builtins, OrderedDict + zip, builtins, OrderedDict, product as cart_product ) from pandas import compat from pandas.core.panel import Panel @@ -4327,7 +4327,43 @@ def test_groupby_whitelist(self): gb = obj.groupby(df.letters) self.assertEqual(whitelist, gb._apply_whitelist) for m in whitelist: - getattr(gb, m) + getattr(type(gb), m) + + AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew', + 'mad', 'std', 'var', 'sem'] + AGG_FUNCTIONS_WITH_SKIPNA = ['skew', 'mad'] + + def test_regression_whitelist_methods(self) : + + index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], + ['one', 'two', 'three']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + raw_frame = DataFrame(np.random.randn(10, 3), index=index, + columns=Index(['A', 'B', 'C'], name='exp')) + raw_frame.ix[1, [1, 2]] = np.nan + raw_frame.ix[7, [0, 1]] = np.nan + + for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS, + lrange(2), lrange(2), + [True,False]) : + + if axis == 0 : + frame = raw_frame + else : + frame = raw_frame.T + + if op in self.AGG_FUNCTIONS_WITH_SKIPNA : + grouped = frame.groupby(level=level,axis=axis) + result = getattr(grouped,op)(skipna=skipna) + expected = getattr(frame,op)(level=level,axis=axis,skipna=skipna) + assert_frame_equal(result, expected) + else : + grouped = frame.groupby(level=level,axis=axis) + result = getattr(grouped,op)() + expected = getattr(frame,op)(level=level,axis=axis) + assert_frame_equal(result, expected) def test_groupby_blacklist(self): from string import ascii_lowercase diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index 288ec164198e4..c74c35fd07f5e 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -258,3 +258,29 @@ def knownfailer(*args, **kwargs): return nose.tools.make_decorator(f)(knownfailer) return knownfail_decorator + +def make_signature(func) : + """ + Returns a string repr of the arg list of a func call, with any defaults + + Examples + -------- + + >>> def f(a,b,c=2) : + >>> return a*b*c + >>> print(_make_signature(f)) + a,b,c=2 + """ + from inspect import getargspec + spec = getargspec(func) + if spec.defaults == None : + n_wo_defaults = len(spec.args) + defaults = ('',) * n_wo_defaults + else : + n_wo_defaults = len(spec.args) - len(spec.defaults) + defaults = ('',) * n_wo_defaults + spec.defaults + args = [] + for i, (var, default) in enumerate(zip(spec.args, defaults)) : + args.append(var if default=='' else var+'='+repr(default)) + return args, spec.args +