Skip to content

Commit c5b1659

Browse files
committed
ENH: added groups attribute back and implemented GroupBy.__len__, addressing GH #99 and GH #95
1 parent ba4957a commit c5b1659

File tree

2 files changed

+55
-19
lines changed

2 files changed

+55
-19
lines changed

pandas/core/groupby.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,25 @@ def __init__(self, obj, grouper=None, axis=0, level=None,
4848
self.groupings = groupings
4949
self.exclusions = set(exclusions)
5050

51+
def __len__(self):
52+
return len(self.groups)
53+
54+
_groups = None
5155
@property
5256
def groups(self):
57+
if self._groups is not None:
58+
return self._groups
59+
5360
if len(self.groupings) == 1:
54-
return self.primary.groups
61+
self._groups = self.primary.groups
5562
else:
56-
raise NotImplementedError
63+
to_groupby = zip(*(ping.grouper for ping in self.groupings))
64+
to_groupby = Index(to_groupby)
65+
66+
axis = self.obj._get_axis(self.axis)
67+
self._groups = _tseries.groupby(axis, to_groupby)
68+
69+
return self._groups
5770

5871
@property
5972
def name(self):

pandas/tests/test_groupby.py

+40-17
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717

1818
import pandas.util.testing as tm
1919

20-
# unittest.TestCase
21-
2220
def commonSetUp(self):
2321
self.dateRange = DateRange('1/1/2005', periods=250, offset=dt.bday)
2422
self.stringIndex = Index([rands(8).upper() for x in xrange(250)])
@@ -36,19 +34,6 @@ def commonSetUp(self):
3634
self.timeMatrix = DataFrame(randMat, columns=self.columnIndex,
3735
index=self.dateRange)
3836

39-
40-
class GroupByTestCase(unittest.TestCase):
41-
setUp = commonSetUp
42-
43-
def test_python_grouper(self):
44-
groupFunc = self.groupDict.get
45-
groups = groupby(self.stringIndex, groupFunc)
46-
setDict = dict((k, set(v)) for k, v in groups.iteritems())
47-
for idx in self.stringIndex:
48-
key = groupFunc(idx)
49-
groupSet = setDict[key]
50-
assert(idx in groupSet)
51-
5237
class TestGroupBy(unittest.TestCase):
5338

5439
def setUp(self):
@@ -122,6 +107,33 @@ def test_series_agg_corner(self):
122107
result = self.ts.groupby(self.ts * np.nan).sum()
123108
assert_series_equal(result, Series([]))
124109

110+
def test_len(self):
111+
df = tm.makeTimeDataFrame()
112+
grouped = df.groupby([lambda x: x.year,
113+
lambda x: x.month,
114+
lambda x: x.day])
115+
self.assertEquals(len(grouped), len(df))
116+
117+
grouped = df.groupby([lambda x: x.year,
118+
lambda x: x.month])
119+
expected = len(set([(x.year, x.month) for x in df.index]))
120+
self.assertEquals(len(grouped), expected)
121+
122+
def test_groups(self):
123+
grouped = self.df.groupby(['A'])
124+
groups = grouped.groups
125+
self.assert_(groups is grouped.groups) # caching works
126+
127+
for k, v in grouped.groups.iteritems():
128+
self.assert_((self.df.ix[v]['A'] == k).all())
129+
130+
grouped = self.df.groupby(['A', 'B'])
131+
groups = grouped.groups
132+
self.assert_(groups is grouped.groups) # caching works
133+
for k, v in grouped.groups.iteritems():
134+
self.assert_((self.df.ix[v]['A'] == k[0]).all())
135+
self.assert_((self.df.ix[v]['B'] == k[1]).all())
136+
125137
def test_aggregate_str_func(self):
126138
def _check_results(grouped):
127139
# single series
@@ -221,10 +233,21 @@ def test_attr_wrapper(self):
221233
self.assertRaises(AttributeError, getattr, grouped, 'foo')
222234

223235
def test_series_describe_multikey(self):
224-
raise nose.SkipTest
225236
ts = tm.makeTimeSeries()
226237
grouped = ts.groupby([lambda x: x.year, lambda x: x.month])
227-
grouped.describe()
238+
result = grouped.describe()
239+
assert_series_equal(result['mean'], grouped.mean())
240+
assert_series_equal(result['std'], grouped.std())
241+
assert_series_equal(result['min'], grouped.min())
242+
243+
def test_frame_describe_multikey(self):
244+
grouped = self.tsframe.groupby([lambda x: x.year,
245+
lambda x: x.month])
246+
result = grouped.describe()
247+
248+
for col, ts in self.tsframe.iteritems():
249+
expected = grouped[col].describe()
250+
assert_series_equal(result['A'].unstack(), expected)
228251

229252
def test_frame_groupby(self):
230253
grouped = self.tsframe.groupby(lambda x: x.weekday())

0 commit comments

Comments
 (0)