-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
Groupbydocs #8231
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Groupbydocs #8231
Changes from 2 commits
ed8145b
9d910f2
a77bc2d
70ecbaa
1b4536b
d076afe
a12bef6
6d31eb7
cab5bb0
295120b
0eb9023
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -533,7 +533,7 @@ def wrapper(*args, **kwargs): | |
# a little trickery for aggregation functions that need an axis | ||
# argument | ||
kwargs_with_axis = kwargs.copy() | ||
if 'axis' not in kwargs_with_axis: | ||
if 'axis' not in kwargs_with_axis or kwargs_with_axis['axis']==None: | ||
kwargs_with_axis['axis'] = self.axis | ||
|
||
def curried_with_axis(x): | ||
|
@@ -2132,8 +2132,99 @@ def _convert_grouper(axis, grouper): | |
else: | ||
return grouper | ||
|
||
from inspect import getargspec | ||
def _make_signature(func) : | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you move this to |
||
""" | ||
Returns a string repr of the arg list of a func call, with any defaults | ||
|
||
Examples | ||
-------- | ||
|
||
>>> def f(a,b,c=2) : | ||
>>> return a*b*c | ||
>>> print(_make_signature(f)) | ||
a,b,c=2 | ||
""" | ||
spec = getargspec(func) | ||
if spec.defaults == None : | ||
n_wo_defaults = len(spec.args) | ||
defaults = ('',) * n_wo_defaults | ||
else : | ||
n_wo_defaults = len(spec.args) - len(spec.defaults) | ||
defaults = ('',) * n_wo_defaults + spec.defaults | ||
args = [] | ||
for i, (var, default) in enumerate(zip(spec.args, defaults)) : | ||
args.append(var if default=='' else var+'='+repr(default)) | ||
return args, spec.args | ||
|
||
def _whitelist_method_generator(klass, whitelist) : | ||
""" | ||
Yields all GroupBy member defs for DataFrame/Series names in _whitelist. | ||
|
||
Parameters | ||
---------- | ||
klass - class where members are defined. Should be Series or DataFrame | ||
|
||
whitelist - list of names of klass methods to be constructed | ||
|
||
Returns | ||
------- | ||
The generator yields a sequence of strings, each suitable for exec'ing, | ||
that define implementations of the named methods for DataFrameGroupBy | ||
or SeriesGroupBy. | ||
|
||
Since we don't want to override methods explicitly defined in the | ||
base class, any such name is skipped. | ||
""" | ||
|
||
method_wrapper_template = \ | ||
"""def %(name)s(%(sig)s) : | ||
\""" | ||
%(doc)s | ||
\""" | ||
f = %(self)s.__getattr__('%(name)s') | ||
return f(%(args)s) | ||
""" | ||
property_wrapper_template = \ | ||
"""@property | ||
def %(name)s(self) : | ||
\""" | ||
%(doc)s | ||
\""" | ||
return self.__getattr__('%(name)s') | ||
""" | ||
for name in whitelist : | ||
# don't override anything that was explicitly defined | ||
# in the base class | ||
if hasattr(GroupBy,name) : | ||
continue | ||
# ugly, but we need the name string itself in the method. | ||
f = getattr(klass,name) | ||
doc = f.__doc__ | ||
doc = doc if type(doc)==str else '' | ||
if type(f) == types.MethodType : | ||
wrapper_template = method_wrapper_template | ||
decl, args = _make_signature(f) | ||
# pass args by name to f because otherwise | ||
# GroupBy._make_wrapper won't know whether | ||
# we passed in an axis parameter. | ||
args_by_name = ['{0}={0}'.format(arg) for arg in args[1:]] | ||
params = {'name':name, | ||
'doc':doc, | ||
'sig':','.join(decl), | ||
'self':args[0], | ||
'args':','.join(args_by_name)} | ||
else : | ||
wrapper_template = property_wrapper_template | ||
params = {'name':name, 'doc':doc} | ||
yield wrapper_template % params | ||
|
||
class SeriesGroupBy(GroupBy): | ||
# | ||
# Make class defs of attributes on SeriesGroupBy whitelist | ||
_apply_whitelist = _series_apply_whitelist | ||
for _def_str in _whitelist_method_generator(Series,_series_apply_whitelist) : | ||
exec(_def_str) | ||
|
||
def aggregate(self, func_or_funcs, *args, **kwargs): | ||
""" | ||
|
@@ -3044,6 +3135,10 @@ def filter(self, func, dropna=True, *args, **kwargs): | |
|
||
class DataFrameGroupBy(NDFrameGroupBy): | ||
_apply_whitelist = _dataframe_apply_whitelist | ||
# | ||
# Make class defs of attributes on DataFrameGroupBy whitelist. | ||
for _def_str in _whitelist_method_generator(DataFrame,_apply_whitelist) : | ||
exec(_def_str) | ||
|
||
_block_agg_axis = 1 | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4310,6 +4310,10 @@ def test_groupby_whitelist(self): | |
self.assertEqual(whitelist, gb._apply_whitelist) | ||
for m in whitelist: | ||
getattr(gb, m) | ||
# Also make sure that the class itself has | ||
# the method defined (dtypes is not a method) | ||
if m not in ['dtypes'] : | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is this here? |
||
self.assertTrue(hasattr(type(gb), m)) | ||
|
||
def test_groupby_blacklist(self): | ||
from string import ascii_lowercase | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
# pylint: disable-msg=W0612,E1101,W0141 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just stick the tests in tests/test_group.py rather than adding a new test file. (you can do it in a new class like you are doing here). |
||
import datetime | ||
import nose | ||
|
||
from numpy.random import randn | ||
import numpy as np | ||
|
||
from pandas.core.index import Index, MultiIndex | ||
from pandas import Panel, DataFrame, Series, notnull, isnull | ||
|
||
from pandas.util.testing import (assert_almost_equal, | ||
assert_series_equal, | ||
assert_frame_equal, | ||
assertRaisesRegexp) | ||
import pandas.core.common as com | ||
import pandas.util.testing as tm | ||
from pandas.compat import (range, lrange, StringIO, lzip, u, | ||
product as cart_product, zip) | ||
import pandas as pd | ||
|
||
import pandas.index as _index | ||
|
||
|
||
class TestNewGroupByAttr(tm.TestCase): | ||
|
||
_multiprocess_can_split_ = True | ||
|
||
def setUp(self): | ||
import warnings | ||
warnings.filterwarnings(action='ignore', category=FutureWarning) | ||
|
||
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], | ||
['one', 'two', 'three']], | ||
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], | ||
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], | ||
names=['first', 'second']) | ||
self.frame = DataFrame(np.random.randn(10, 3), index=index, | ||
columns=Index(['A', 'B', 'C'], name='exp')) | ||
|
||
self.frame.ix[1, [1, 2]] = np.nan | ||
self.frame.ix[7, [0, 1]] = np.nan | ||
|
||
AGG_FUNCTIONS = ['skew', 'mad'] | ||
|
||
def test_newattr(self) : | ||
for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS, | ||
lrange(2), lrange(2), | ||
[True,False]) : | ||
if axis == 0 : | ||
frame = self.frame | ||
else : | ||
frame = self.frame.T | ||
|
||
grouped = frame.groupby(level=level,axis=axis) | ||
result = getattr(grouped,op)(skipna=skipna) | ||
expected = getattr(frame,op)(level=level,axis=axis,skipna=skipna) | ||
assert_frame_equal(result, expected) | ||
|
||
if __name__ == '__main__': | ||
|
||
import nose | ||
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], | ||
exit=False) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you move this function inse _whitelist_method_generator? its only called from their yes (and its cleaner that way)