Skip to content

Commit 53817ef

Browse files
committed
move NDFrame.groupby to (DataFrame|Series).groupby
1 parent 477b2d5 commit 53817ef

File tree

3 files changed

+168
-81
lines changed

3 files changed

+168
-81
lines changed

pandas/core/frame.py

+77-1
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@
9292
from pandas.core.dtypes.missing import isna, notna
9393

9494
from pandas._typing import Axes, Dtype, FilePathOrBuffer
95-
from pandas.core import algorithms, common as com, nanops, ops
95+
from pandas.core import algorithms, common as com, groupby, nanops, ops
9696
from pandas.core.accessor import CachedAccessor
9797
from pandas.core.arrays import Categorical, ExtensionArray
9898
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin as DatetimeLikeArray
@@ -5523,6 +5523,82 @@ def update(
55235523

55245524
# ----------------------------------------------------------------------
55255525
# Data reshaping
5526+
@Appender(
5527+
"""
5528+
Examples
5529+
--------
5530+
>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
5531+
... 'Parrot', 'Parrot'],
5532+
... 'Max Speed': [380., 370., 24., 26.]})
5533+
>>> df
5534+
Animal Max Speed
5535+
0 Falcon 380.0
5536+
1 Falcon 370.0
5537+
2 Parrot 24.0
5538+
3 Parrot 26.0
5539+
>>> df.groupby(['Animal']).mean()
5540+
Max Speed
5541+
Animal
5542+
Falcon 375.0
5543+
Parrot 25.0
5544+
5545+
**Hierarchical Indexes**
5546+
5547+
We can groupby different levels of a hierarchical index
5548+
using the `level` parameter:
5549+
5550+
>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
5551+
... ['Captive', 'Wild', 'Captive', 'Wild']]
5552+
>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
5553+
>>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
5554+
... index=index)
5555+
>>> df
5556+
Max Speed
5557+
Animal Type
5558+
Falcon Captive 390.0
5559+
Wild 350.0
5560+
Parrot Captive 30.0
5561+
Wild 20.0
5562+
>>> df.groupby(level=0).mean()
5563+
Max Speed
5564+
Animal
5565+
Falcon 370.0
5566+
Parrot 25.0
5567+
>>> df.groupby(level="Type").mean()
5568+
Max Speed
5569+
Type
5570+
Captive 210.0
5571+
Wild 185.0
5572+
"""
5573+
)
5574+
@Appender(_shared_docs["groupby"] % _shared_doc_kwargs)
5575+
def groupby(
5576+
self,
5577+
by=None,
5578+
axis=0,
5579+
level=None,
5580+
as_index: bool = True,
5581+
sort: bool = True,
5582+
group_keys: bool = True,
5583+
squeeze: bool = False,
5584+
observed: bool = False,
5585+
) -> "groupby.DataFrameGroupBy":
5586+
5587+
if level is None and by is None:
5588+
raise TypeError("You have to supply one of 'by' and 'level'")
5589+
axis = self._get_axis_number(axis)
5590+
5591+
return groupby.DataFrameGroupBy(
5592+
obj=self,
5593+
keys=by,
5594+
axis=axis,
5595+
level=level,
5596+
as_index=as_index,
5597+
sort=sort,
5598+
group_keys=group_keys,
5599+
squeeze=squeeze,
5600+
observed=observed,
5601+
)
55265602

55275603
_shared_docs[
55285604
"pivot"

pandas/core/generic.py

+7-79
Original file line numberDiff line numberDiff line change
@@ -7282,19 +7282,10 @@ def clip(
72827282

72837283
return result
72847284

7285-
def groupby(
7286-
self,
7287-
by=None,
7288-
axis=0,
7289-
level=None,
7290-
as_index: bool_t = True,
7291-
sort: bool_t = True,
7292-
group_keys: bool_t = True,
7293-
squeeze: bool_t = False,
7294-
observed: bool_t = False,
7295-
):
7296-
"""
7297-
Group DataFrame or Series using a mapper or by a Series of columns.
7285+
_shared_docs[
7286+
"groupby"
7287+
] = """
7288+
Group %(klass)s using a mapper or by a Series of columns.
72987289
72997290
A groupby operation involves some combination of splitting the
73007291
object, applying a function, and combining the results. This can be
@@ -7339,9 +7330,8 @@ def groupby(
73397330
73407331
Returns
73417332
-------
7342-
DataFrameGroupBy or SeriesGroupBy
7343-
Depends on the calling object and returns groupby object that
7344-
contains information about the groups.
7333+
%(klass)sGroupBy
7334+
Returns a groupby object that contains information about the groups.
73457335
73467336
See Also
73477337
--------
@@ -7352,69 +7342,7 @@ def groupby(
73527342
-----
73537343
See the `user guide
73547344
<http://pandas.pydata.org/pandas-docs/stable/groupby.html>`_ for more.
7355-
7356-
Examples
7357-
--------
7358-
>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
7359-
... 'Parrot', 'Parrot'],
7360-
... 'Max Speed': [380., 370., 24., 26.]})
7361-
>>> df
7362-
Animal Max Speed
7363-
0 Falcon 380.0
7364-
1 Falcon 370.0
7365-
2 Parrot 24.0
7366-
3 Parrot 26.0
7367-
>>> df.groupby(['Animal']).mean()
7368-
Max Speed
7369-
Animal
7370-
Falcon 375.0
7371-
Parrot 25.0
7372-
7373-
**Hierarchical Indexes**
7374-
7375-
We can groupby different levels of a hierarchical index
7376-
using the `level` parameter:
7377-
7378-
>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
7379-
... ['Captive', 'Wild', 'Captive', 'Wild']]
7380-
>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
7381-
>>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
7382-
... index=index)
7383-
>>> df
7384-
Max Speed
7385-
Animal Type
7386-
Falcon Captive 390.0
7387-
Wild 350.0
7388-
Parrot Captive 30.0
7389-
Wild 20.0
7390-
>>> df.groupby(level=0).mean()
7391-
Max Speed
7392-
Animal
7393-
Falcon 370.0
7394-
Parrot 25.0
7395-
>>> df.groupby(level=1).mean()
7396-
Max Speed
7397-
Type
7398-
Captive 210.0
7399-
Wild 185.0
7400-
"""
7401-
from pandas.core.groupby.groupby import get_groupby
7402-
7403-
if level is None and by is None:
7404-
raise TypeError("You have to supply one of 'by' and 'level'")
7405-
axis = self._get_axis_number(axis)
7406-
7407-
return get_groupby(
7408-
self,
7409-
by=by,
7410-
axis=axis,
7411-
level=level,
7412-
as_index=as_index,
7413-
sort=sort,
7414-
group_keys=group_keys,
7415-
squeeze=squeeze,
7416-
observed=observed,
7417-
)
7345+
"""
74187346

74197347
def asfreq(
74207348
self,

pandas/core/series.py

+84-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
)
4848

4949
import pandas as pd
50-
from pandas.core import algorithms, base, generic, nanops, ops
50+
from pandas.core import algorithms, base, generic, groupby, nanops, ops
5151
from pandas.core.accessor import CachedAccessor
5252
from pandas.core.arrays import ExtensionArray, try_cast_to_ea
5353
from pandas.core.arrays.categorical import Categorical, CategoricalAccessor
@@ -1565,6 +1565,89 @@ def _set_name(self, name, inplace=False):
15651565
ser.name = name
15661566
return ser
15671567

1568+
@Appender(
1569+
"""
1570+
Examples
1571+
--------
1572+
>>> ser = pd.Series([390., 350., 30., 20.],
1573+
... index=['Falcon', 'Falcon', 'Parrot', 'Parrot'], name="Max Speed")
1574+
>>> ser
1575+
Falcon 390.0
1576+
Falcon 350.0
1577+
Parrot 30.0
1578+
Parrot 20.0
1579+
Name: Max Speed, dtype: float64
1580+
>>> ser.groupby(["a", "b", "a", "b"]).mean()
1581+
a 210.0
1582+
b 185.0
1583+
Name: Max Speed, dtype: float64
1584+
>>> ser.groupby(level=0).mean()
1585+
Falcon 370.0
1586+
Parrot 25.0
1587+
Name: Max Speed, dtype: float64
1588+
>>> ser.groupby(ser > 100).mean()
1589+
Max Speed
1590+
False 25.0
1591+
True 370.0
1592+
Name: Max Speed, dtype: float64
1593+
1594+
**Grouping by Indexes**
1595+
1596+
We can groupby different levels of a hierarchical index
1597+
using the `level` parameter:
1598+
1599+
>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
1600+
... ['Captive', 'Wild', 'Captive', 'Wild']]
1601+
>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
1602+
>>> ser = pd.Series([390., 350., 30., 20.], index=index, name="Max Speed")
1603+
>>> ser
1604+
Animal Type
1605+
Falcon Captive 390.0
1606+
Wild 350.0
1607+
Parrot Captive 30.0
1608+
Wild 20.0
1609+
Name: Max Speed, dtype: float64
1610+
>>> ser.groupby(level=0).mean()
1611+
Animal
1612+
Falcon 370.0
1613+
Parrot 25.0
1614+
Name: Max Speed, dtype: float64
1615+
>>> ser.groupby(level="Type").mean()
1616+
Type
1617+
Captive 210.0
1618+
Wild 185.0
1619+
Name: Max Speed, dtype: float64
1620+
"""
1621+
)
1622+
@Appender(generic._shared_docs["groupby"] % _shared_doc_kwargs)
1623+
def groupby(
1624+
self,
1625+
by=None,
1626+
axis=0,
1627+
level=None,
1628+
as_index: bool = True,
1629+
sort: bool = True,
1630+
group_keys: bool = True,
1631+
squeeze: bool = False,
1632+
observed: bool = False,
1633+
) -> "groupby.SeriesGroupBy":
1634+
1635+
if level is None and by is None:
1636+
raise TypeError("You have to supply one of 'by' and 'level'")
1637+
axis = self._get_axis_number(axis)
1638+
1639+
return groupby.SeriesGroupBy(
1640+
obj=self,
1641+
keys=by,
1642+
axis=axis,
1643+
level=level,
1644+
as_index=as_index,
1645+
sort=sort,
1646+
group_keys=group_keys,
1647+
squeeze=squeeze,
1648+
observed=observed,
1649+
)
1650+
15681651
# ----------------------------------------------------------------------
15691652
# Statistics, overridden ndarray methods
15701653

0 commit comments

Comments
 (0)