Skip to content

Commit 0acfa44

Browse files
mcjcodejreback
authored andcommitted
DOC: Explicitly create class definitions of whitelisted SeriesGroupBy and DataFrameGroupBy methods (GH6944)
1 parent 15b8454 commit 0acfa44

File tree

4 files changed

+205
-5
lines changed

4 files changed

+205
-5
lines changed

doc/source/api.rst

+69-1
Original file line numberDiff line numberDiff line change
@@ -1394,12 +1394,80 @@ Computations / Descriptive Stats
13941394
.. autosummary::
13951395
:toctree: generated/
13961396

1397+
GroupBy.count
1398+
GroupBy.cumcount
1399+
GroupBy.first
1400+
GroupBy.head
1401+
GroupBy.last
1402+
GroupBy.max
13971403
GroupBy.mean
13981404
GroupBy.median
1405+
GroupBy.min
1406+
GroupBy.nth
1407+
GroupBy.ohlc
1408+
GroupBy.prod
1409+
GroupBy.size
13991410
GroupBy.sem
14001411
GroupBy.std
1412+
GroupBy.sum
14011413
GroupBy.var
1402-
GroupBy.ohlc
1414+
GroupBy.tail
1415+
1416+
The following methods are available in both ``SeriesGroupBy`` and
1417+
``DataFrameGroupBy`` objects, but may differ slightly, usually in that
1418+
the ``DataFrameGroupBy`` version usually permits the specification of an
1419+
axis argument, and often an argument indicating whether to restrict
1420+
application to columns of a specific data type.
1421+
1422+
.. autosummary::
1423+
:toctree: generated/
1424+
1425+
DataFrameGroupBy.bfill
1426+
DataFrameGroupBy.cummax
1427+
DataFrameGroupBy.cummin
1428+
DataFrameGroupBy.cumprod
1429+
DataFrameGroupBy.cumsum
1430+
DataFrameGroupBy.describe
1431+
DataFrameGroupBy.all
1432+
DataFrameGroupBy.any
1433+
DataFrameGroupBy.corr
1434+
DataFrameGroupBy.cov
1435+
DataFrameGroupBy.diff
1436+
DataFrameGroupBy.ffill
1437+
DataFrameGroupBy.fillna
1438+
DataFrameGroupBy.hist
1439+
DataFrameGroupBy.idxmax
1440+
DataFrameGroupBy.idxmin
1441+
DataFrameGroupBy.irow
1442+
DataFrameGroupBy.mad
1443+
DataFrameGroupBy.pct_change
1444+
DataFrameGroupBy.plot
1445+
DataFrameGroupBy.quantile
1446+
DataFrameGroupBy.rank
1447+
DataFrameGroupBy.resample
1448+
DataFrameGroupBy.shift
1449+
DataFrameGroupBy.skew
1450+
DataFrameGroupBy.take
1451+
DataFrameGroupBy.tshift
1452+
1453+
The following methods are available only for ``SeriesGroupBy`` objects.
1454+
1455+
.. autosummary::
1456+
:toctree: generated/
1457+
1458+
SeriesGroupBy.nlargest
1459+
SeriesGroupBy.nsmallest
1460+
SeriesGroupBy.nunique
1461+
SeriesGroupBy.unique
1462+
SeriesGroupBy.value_counts
1463+
1464+
The following methods are available only for ``DataFrameGroupBy`` objects.
1465+
1466+
.. autosummary::
1467+
:toctree: generated/
1468+
1469+
DataFrameGroupBy.corrwith
1470+
DataFrameGroupBy.boxplot
14031471

14041472
.. currentmodule:: pandas
14051473

pandas/core/groupby.py

+70-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from pandas.core.internals import BlockManager, make_block
1919
from pandas.core.series import Series
2020
from pandas.core.panel import Panel
21-
from pandas.util.decorators import cache_readonly, Appender
21+
from pandas.util.decorators import cache_readonly, Appender, make_signature
2222
import pandas.core.algorithms as algos
2323
import pandas.core.common as com
2424
from pandas.core.common import(_possibly_downcast_to_dtype, isnull,
@@ -533,7 +533,7 @@ def wrapper(*args, **kwargs):
533533
# a little trickery for aggregation functions that need an axis
534534
# argument
535535
kwargs_with_axis = kwargs.copy()
536-
if 'axis' not in kwargs_with_axis:
536+
if 'axis' not in kwargs_with_axis or kwargs_with_axis['axis']==None:
537537
kwargs_with_axis['axis'] = self.axis
538538

539539
def curried_with_axis(x):
@@ -2133,8 +2133,72 @@ def _convert_grouper(axis, grouper):
21332133
else:
21342134
return grouper
21352135

2136+
def _whitelist_method_generator(klass, whitelist) :
2137+
"""
2138+
Yields all GroupBy member defs for DataFrame/Series names in _whitelist.
2139+
2140+
Parameters
2141+
----------
2142+
klass - class where members are defined. Should be Series or DataFrame
2143+
2144+
whitelist - list of names of klass methods to be constructed
2145+
2146+
Returns
2147+
-------
2148+
The generator yields a sequence of strings, each suitable for exec'ing,
2149+
that define implementations of the named methods for DataFrameGroupBy
2150+
or SeriesGroupBy.
2151+
2152+
Since we don't want to override methods explicitly defined in the
2153+
base class, any such name is skipped.
2154+
"""
2155+
2156+
method_wrapper_template = \
2157+
"""def %(name)s(%(sig)s) :
2158+
\"""
2159+
%(doc)s
2160+
\"""
2161+
f = %(self)s.__getattr__('%(name)s')
2162+
return f(%(args)s)"""
2163+
property_wrapper_template = \
2164+
"""@property
2165+
def %(name)s(self) :
2166+
\"""
2167+
%(doc)s
2168+
\"""
2169+
return self.__getattr__('%(name)s')"""
2170+
for name in whitelist :
2171+
# don't override anything that was explicitly defined
2172+
# in the base class
2173+
if hasattr(GroupBy,name) :
2174+
continue
2175+
# ugly, but we need the name string itself in the method.
2176+
f = getattr(klass,name)
2177+
doc = f.__doc__
2178+
doc = doc if type(doc)==str else ''
2179+
if type(f) == types.MethodType :
2180+
wrapper_template = method_wrapper_template
2181+
decl, args = make_signature(f)
2182+
# pass args by name to f because otherwise
2183+
# GroupBy._make_wrapper won't know whether
2184+
# we passed in an axis parameter.
2185+
args_by_name = ['{0}={0}'.format(arg) for arg in args[1:]]
2186+
params = {'name':name,
2187+
'doc':doc,
2188+
'sig':','.join(decl),
2189+
'self':args[0],
2190+
'args':','.join(args_by_name)}
2191+
else :
2192+
wrapper_template = property_wrapper_template
2193+
params = {'name':name, 'doc':doc}
2194+
yield wrapper_template % params
2195+
21362196
class SeriesGroupBy(GroupBy):
2197+
#
2198+
# Make class defs of attributes on SeriesGroupBy whitelist
21372199
_apply_whitelist = _series_apply_whitelist
2200+
for _def_str in _whitelist_method_generator(Series,_series_apply_whitelist) :
2201+
exec(_def_str)
21382202

21392203
def aggregate(self, func_or_funcs, *args, **kwargs):
21402204
"""
@@ -3045,6 +3109,10 @@ def filter(self, func, dropna=True, *args, **kwargs):
30453109

30463110
class DataFrameGroupBy(NDFrameGroupBy):
30473111
_apply_whitelist = _dataframe_apply_whitelist
3112+
#
3113+
# Make class defs of attributes on DataFrameGroupBy whitelist.
3114+
for _def_str in _whitelist_method_generator(DataFrame,_apply_whitelist) :
3115+
exec(_def_str)
30483116

30493117
_block_agg_axis = 1
30503118

pandas/tests/test_groupby.py

+40-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
assert_index_equal, assertRaisesRegexp)
2020
from pandas.compat import(
2121
range, long, lrange, StringIO, lmap, lzip, map,
22-
zip, builtins, OrderedDict
22+
zip, builtins, OrderedDict, product as cart_product
2323
)
2424
from pandas import compat
2525
from pandas.core.panel import Panel
@@ -4327,7 +4327,45 @@ def test_groupby_whitelist(self):
43274327
gb = obj.groupby(df.letters)
43284328
self.assertEqual(whitelist, gb._apply_whitelist)
43294329
for m in whitelist:
4330-
getattr(gb, m)
4330+
getattr(type(gb), m)
4331+
4332+
AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew',
4333+
'mad', 'std', 'var', 'sem']
4334+
AGG_FUNCTIONS_WITH_SKIPNA = ['skew', 'mad']
4335+
4336+
def test_regression_whitelist_methods(self) :
4337+
4338+
# GH6944
4339+
# explicity test the whitelest methods
4340+
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
4341+
['one', 'two', 'three']],
4342+
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
4343+
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
4344+
names=['first', 'second'])
4345+
raw_frame = DataFrame(np.random.randn(10, 3), index=index,
4346+
columns=Index(['A', 'B', 'C'], name='exp'))
4347+
raw_frame.ix[1, [1, 2]] = np.nan
4348+
raw_frame.ix[7, [0, 1]] = np.nan
4349+
4350+
for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS,
4351+
lrange(2), lrange(2),
4352+
[True,False]) :
4353+
4354+
if axis == 0 :
4355+
frame = raw_frame
4356+
else :
4357+
frame = raw_frame.T
4358+
4359+
if op in self.AGG_FUNCTIONS_WITH_SKIPNA :
4360+
grouped = frame.groupby(level=level,axis=axis)
4361+
result = getattr(grouped,op)(skipna=skipna)
4362+
expected = getattr(frame,op)(level=level,axis=axis,skipna=skipna)
4363+
assert_frame_equal(result, expected)
4364+
else :
4365+
grouped = frame.groupby(level=level,axis=axis)
4366+
result = getattr(grouped,op)()
4367+
expected = getattr(frame,op)(level=level,axis=axis)
4368+
assert_frame_equal(result, expected)
43314369

43324370
def test_groupby_blacklist(self):
43334371
from string import ascii_lowercase

pandas/util/decorators.py

+26
Original file line numberDiff line numberDiff line change
@@ -258,3 +258,29 @@ def knownfailer(*args, **kwargs):
258258
return nose.tools.make_decorator(f)(knownfailer)
259259

260260
return knownfail_decorator
261+
262+
def make_signature(func) :
263+
"""
264+
Returns a string repr of the arg list of a func call, with any defaults
265+
266+
Examples
267+
--------
268+
269+
>>> def f(a,b,c=2) :
270+
>>> return a*b*c
271+
>>> print(_make_signature(f))
272+
a,b,c=2
273+
"""
274+
from inspect import getargspec
275+
spec = getargspec(func)
276+
if spec.defaults == None :
277+
n_wo_defaults = len(spec.args)
278+
defaults = ('',) * n_wo_defaults
279+
else :
280+
n_wo_defaults = len(spec.args) - len(spec.defaults)
281+
defaults = ('',) * n_wo_defaults + spec.defaults
282+
args = []
283+
for i, (var, default) in enumerate(zip(spec.args, defaults)) :
284+
args.append(var if default=='' else var+'='+repr(default))
285+
return args, spec.args
286+

0 commit comments

Comments
 (0)