Skip to content

Commit fdf3e8c

Browse files
committed
move NDFrame.groupby to (DataFrame|Series).groupby
1 parent 96bb151 commit fdf3e8c

File tree

3 files changed

+168
-81
lines changed

3 files changed

+168
-81
lines changed

pandas/core/frame.py

+77-1
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@
8787
from pandas.core.dtypes.missing import isna, notna
8888

8989
from pandas._typing import Axes, Dtype, FilePathOrBuffer
90-
from pandas.core import algorithms, common as com, nanops, ops
90+
from pandas.core import algorithms, common as com, groupby, nanops, ops
9191
from pandas.core.accessor import CachedAccessor
9292
from pandas.core.arrays import Categorical, ExtensionArray
9393
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin as DatetimeLikeArray
@@ -5532,6 +5532,82 @@ def update(
55325532

55335533
# ----------------------------------------------------------------------
55345534
# Data reshaping
5535+
@Appender(
5536+
"""
5537+
Examples
5538+
--------
5539+
>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
5540+
... 'Parrot', 'Parrot'],
5541+
... 'Max Speed': [380., 370., 24., 26.]})
5542+
>>> df
5543+
Animal Max Speed
5544+
0 Falcon 380.0
5545+
1 Falcon 370.0
5546+
2 Parrot 24.0
5547+
3 Parrot 26.0
5548+
>>> df.groupby(['Animal']).mean()
5549+
Max Speed
5550+
Animal
5551+
Falcon 375.0
5552+
Parrot 25.0
5553+
5554+
**Hierarchical Indexes**
5555+
5556+
We can groupby different levels of a hierarchical index
5557+
using the `level` parameter:
5558+
5559+
>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
5560+
... ['Captive', 'Wild', 'Captive', 'Wild']]
5561+
>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
5562+
>>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
5563+
... index=index)
5564+
>>> df
5565+
Max Speed
5566+
Animal Type
5567+
Falcon Captive 390.0
5568+
Wild 350.0
5569+
Parrot Captive 30.0
5570+
Wild 20.0
5571+
>>> df.groupby(level=0).mean()
5572+
Max Speed
5573+
Animal
5574+
Falcon 370.0
5575+
Parrot 25.0
5576+
>>> df.groupby(level="Type").mean()
5577+
Max Speed
5578+
Type
5579+
Captive 210.0
5580+
Wild 185.0
5581+
"""
5582+
)
5583+
@Appender(_shared_docs["groupby"] % _shared_doc_kwargs)
5584+
def groupby(
5585+
self,
5586+
by=None,
5587+
axis=0,
5588+
level=None,
5589+
as_index: bool = True,
5590+
sort: bool = True,
5591+
group_keys: bool = True,
5592+
squeeze: bool = False,
5593+
observed: bool = False,
5594+
) -> "groupby.DataFrameGroupBy":
5595+
5596+
if level is None and by is None:
5597+
raise TypeError("You have to supply one of 'by' and 'level'")
5598+
axis = self._get_axis_number(axis)
5599+
5600+
return groupby.DataFrameGroupBy(
5601+
obj=self,
5602+
keys=by,
5603+
axis=axis,
5604+
level=level,
5605+
as_index=as_index,
5606+
sort=sort,
5607+
group_keys=group_keys,
5608+
squeeze=squeeze,
5609+
observed=observed,
5610+
)
55355611

55365612
_shared_docs[
55375613
"pivot"

pandas/core/generic.py

+7-79
Original file line numberDiff line numberDiff line change
@@ -7343,19 +7343,10 @@ def clip(
73437343

73447344
return result
73457345

7346-
def groupby(
7347-
self,
7348-
by=None,
7349-
axis=0,
7350-
level=None,
7351-
as_index: bool_t = True,
7352-
sort: bool_t = True,
7353-
group_keys: bool_t = True,
7354-
squeeze: bool_t = False,
7355-
observed: bool_t = False,
7356-
):
7357-
"""
7358-
Group DataFrame or Series using a mapper or by a Series of columns.
7346+
_shared_docs[
7347+
"groupby"
7348+
] = """
7349+
Group %(klass)s using a mapper or by a Series of columns.
73597350
73607351
A groupby operation involves some combination of splitting the
73617352
object, applying a function, and combining the results. This can be
@@ -7400,9 +7391,8 @@ def groupby(
74007391
74017392
Returns
74027393
-------
7403-
DataFrameGroupBy or SeriesGroupBy
7404-
Depends on the calling object and returns groupby object that
7405-
contains information about the groups.
7394+
%(klass)sGroupBy
7395+
Returns a groupby object that contains information about the groups.
74067396
74077397
See Also
74087398
--------
@@ -7413,69 +7403,7 @@ def groupby(
74137403
-----
74147404
See the `user guide
74157405
<http://pandas.pydata.org/pandas-docs/stable/groupby.html>`_ for more.
7416-
7417-
Examples
7418-
--------
7419-
>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
7420-
... 'Parrot', 'Parrot'],
7421-
... 'Max Speed': [380., 370., 24., 26.]})
7422-
>>> df
7423-
Animal Max Speed
7424-
0 Falcon 380.0
7425-
1 Falcon 370.0
7426-
2 Parrot 24.0
7427-
3 Parrot 26.0
7428-
>>> df.groupby(['Animal']).mean()
7429-
Max Speed
7430-
Animal
7431-
Falcon 375.0
7432-
Parrot 25.0
7433-
7434-
**Hierarchical Indexes**
7435-
7436-
We can groupby different levels of a hierarchical index
7437-
using the `level` parameter:
7438-
7439-
>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
7440-
... ['Captive', 'Wild', 'Captive', 'Wild']]
7441-
>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
7442-
>>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
7443-
... index=index)
7444-
>>> df
7445-
Max Speed
7446-
Animal Type
7447-
Falcon Captive 390.0
7448-
Wild 350.0
7449-
Parrot Captive 30.0
7450-
Wild 20.0
7451-
>>> df.groupby(level=0).mean()
7452-
Max Speed
7453-
Animal
7454-
Falcon 370.0
7455-
Parrot 25.0
7456-
>>> df.groupby(level=1).mean()
7457-
Max Speed
7458-
Type
7459-
Captive 210.0
7460-
Wild 185.0
7461-
"""
7462-
from pandas.core.groupby.groupby import get_groupby
7463-
7464-
if level is None and by is None:
7465-
raise TypeError("You have to supply one of 'by' and 'level'")
7466-
axis = self._get_axis_number(axis)
7467-
7468-
return get_groupby(
7469-
self,
7470-
by=by,
7471-
axis=axis,
7472-
level=level,
7473-
as_index=as_index,
7474-
sort=sort,
7475-
group_keys=group_keys,
7476-
squeeze=squeeze,
7477-
observed=observed,
7478-
)
7406+
"""
74797407

74807408
def asfreq(
74817409
self,

pandas/core/series.py

+84-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
)
4848

4949
import pandas as pd
50-
from pandas.core import algorithms, base, generic, nanops, ops
50+
from pandas.core import algorithms, base, generic, groupby, nanops, ops
5151
from pandas.core.accessor import CachedAccessor
5252
from pandas.core.arrays import ExtensionArray, try_cast_to_ea
5353
from pandas.core.arrays.categorical import Categorical, CategoricalAccessor
@@ -1565,6 +1565,89 @@ def _set_name(self, name, inplace=False):
15651565
ser.name = name
15661566
return ser
15671567

1568+
@Appender(
1569+
"""
1570+
Examples
1571+
--------
1572+
>>> ser = pd.Series([390., 350., 30., 20.],
1573+
... index=['Falcon', 'Falcon', 'Parrot', 'Parrot'], name="Max Speed")
1574+
>>> ser
1575+
Falcon 390.0
1576+
Falcon 350.0
1577+
Parrot 30.0
1578+
Parrot 20.0
1579+
Name: Max Speed, dtype: float64
1580+
>>> ser.groupby(["a", "b", "a", "b"]).mean()
1581+
a 210.0
1582+
b 185.0
1583+
Name: Max Speed, dtype: float64
1584+
>>> ser.groupby(level=0).mean()
1585+
Falcon 370.0
1586+
Parrot 25.0
1587+
Name: Max Speed, dtype: float64
1588+
>>> ser.groupby(ser > 100).mean()
1589+
Max Speed
1590+
False 25.0
1591+
True 370.0
1592+
Name: Max Speed, dtype: float64
1593+
1594+
**Grouping by Indexes**
1595+
1596+
We can groupby different levels of a hierarchical index
1597+
using the `level` parameter:
1598+
1599+
>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
1600+
... ['Captive', 'Wild', 'Captive', 'Wild']]
1601+
>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
1602+
>>> ser = pd.Series([390., 350., 30., 20.], index=index, name="Max Speed")
1603+
>>> ser
1604+
Animal Type
1605+
Falcon Captive 390.0
1606+
Wild 350.0
1607+
Parrot Captive 30.0
1608+
Wild 20.0
1609+
Name: Max Speed, dtype: float64
1610+
>>> ser.groupby(level=0).mean()
1611+
Animal
1612+
Falcon 370.0
1613+
Parrot 25.0
1614+
Name: Max Speed, dtype: float64
1615+
>>> ser.groupby(level="Type").mean()
1616+
Type
1617+
Captive 210.0
1618+
Wild 185.0
1619+
Name: Max Speed, dtype: float64
1620+
"""
1621+
)
1622+
@Appender(generic._shared_docs["groupby"] % _shared_doc_kwargs)
1623+
def groupby(
1624+
self,
1625+
by=None,
1626+
axis=0,
1627+
level=None,
1628+
as_index: bool = True,
1629+
sort: bool = True,
1630+
group_keys: bool = True,
1631+
squeeze: bool = False,
1632+
observed: bool = False,
1633+
) -> "groupby.SeriesGroupBy":
1634+
1635+
if level is None and by is None:
1636+
raise TypeError("You have to supply one of 'by' and 'level'")
1637+
axis = self._get_axis_number(axis)
1638+
1639+
return groupby.SeriesGroupBy(
1640+
obj=self,
1641+
keys=by,
1642+
axis=axis,
1643+
level=level,
1644+
as_index=as_index,
1645+
sort=sort,
1646+
group_keys=group_keys,
1647+
squeeze=squeeze,
1648+
observed=observed,
1649+
)
1650+
15681651
# ----------------------------------------------------------------------
15691652
# Statistics, overridden ndarray methods
15701653

0 commit comments

Comments
 (0)