Skip to content

Commit 4b7c2c9

Browse files
committed
move NDFrame.groupby to (DataFrame|Series).groupby
1 parent c4e5566 commit 4b7c2c9

File tree

3 files changed

+168
-81
lines changed

3 files changed

+168
-81
lines changed

pandas/core/frame.py

+77-1
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@
8787
from pandas.core.dtypes.missing import isna, notna
8888

8989
from pandas._typing import Axes, Dtype, FilePathOrBuffer
90-
from pandas.core import algorithms, common as com, nanops, ops
90+
from pandas.core import algorithms, common as com, groupby, nanops, ops
9191
from pandas.core.accessor import CachedAccessor
9292
from pandas.core.arrays import Categorical, ExtensionArray
9393
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin as DatetimeLikeArray
@@ -5512,6 +5512,82 @@ def update(
55125512

55135513
# ----------------------------------------------------------------------
55145514
# Data reshaping
5515+
@Appender(
5516+
"""
5517+
Examples
5518+
--------
5519+
>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
5520+
... 'Parrot', 'Parrot'],
5521+
... 'Max Speed': [380., 370., 24., 26.]})
5522+
>>> df
5523+
Animal Max Speed
5524+
0 Falcon 380.0
5525+
1 Falcon 370.0
5526+
2 Parrot 24.0
5527+
3 Parrot 26.0
5528+
>>> df.groupby(['Animal']).mean()
5529+
Max Speed
5530+
Animal
5531+
Falcon 375.0
5532+
Parrot 25.0
5533+
5534+
**Hierarchical Indexes**
5535+
5536+
We can groupby different levels of a hierarchical index
5537+
using the `level` parameter:
5538+
5539+
>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
5540+
... ['Captive', 'Wild', 'Captive', 'Wild']]
5541+
>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
5542+
>>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
5543+
... index=index)
5544+
>>> df
5545+
Max Speed
5546+
Animal Type
5547+
Falcon Captive 390.0
5548+
Wild 350.0
5549+
Parrot Captive 30.0
5550+
Wild 20.0
5551+
>>> df.groupby(level=0).mean()
5552+
Max Speed
5553+
Animal
5554+
Falcon 370.0
5555+
Parrot 25.0
5556+
>>> df.groupby(level="Type").mean()
5557+
Max Speed
5558+
Type
5559+
Captive 210.0
5560+
Wild 185.0
5561+
"""
5562+
)
5563+
@Appender(_shared_docs["groupby"] % _shared_doc_kwargs)
5564+
def groupby(
5565+
self,
5566+
by=None,
5567+
axis=0,
5568+
level=None,
5569+
as_index: bool = True,
5570+
sort: bool = True,
5571+
group_keys: bool = True,
5572+
squeeze: bool = False,
5573+
observed: bool = False,
5574+
) -> "groupby.DataFrameGroupBy":
5575+
5576+
if level is None and by is None:
5577+
raise TypeError("You have to supply one of 'by' and 'level'")
5578+
axis = self._get_axis_number(axis)
5579+
5580+
return groupby.DataFrameGroupBy(
5581+
obj=self,
5582+
keys=by,
5583+
axis=axis,
5584+
level=level,
5585+
as_index=as_index,
5586+
sort=sort,
5587+
group_keys=group_keys,
5588+
squeeze=squeeze,
5589+
observed=observed,
5590+
)
55155591

55165592
_shared_docs[
55175593
"pivot"

pandas/core/generic.py

+7-79
Original file line numberDiff line numberDiff line change
@@ -7299,19 +7299,10 @@ def clip(
72997299

73007300
return result
73017301

7302-
def groupby(
7303-
self,
7304-
by=None,
7305-
axis=0,
7306-
level=None,
7307-
as_index: bool_t = True,
7308-
sort: bool_t = True,
7309-
group_keys: bool_t = True,
7310-
squeeze: bool_t = False,
7311-
observed: bool_t = False,
7312-
):
7313-
"""
7314-
Group DataFrame or Series using a mapper or by a Series of columns.
7302+
_shared_docs[
7303+
"groupby"
7304+
] = """
7305+
Group %(klass)s using a mapper or by a Series of columns.
73157306
73167307
A groupby operation involves some combination of splitting the
73177308
object, applying a function, and combining the results. This can be
@@ -7356,9 +7347,8 @@ def groupby(
73567347
73577348
Returns
73587349
-------
7359-
DataFrameGroupBy or SeriesGroupBy
7360-
Depends on the calling object and returns groupby object that
7361-
contains information about the groups.
7350+
%(klass)sGroupBy
7351+
Returns a groupby object that contains information about the groups.
73627352
73637353
See Also
73647354
--------
@@ -7369,69 +7359,7 @@ def groupby(
73697359
-----
73707360
See the `user guide
73717361
<http://pandas.pydata.org/pandas-docs/stable/groupby.html>`_ for more.
7372-
7373-
Examples
7374-
--------
7375-
>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
7376-
... 'Parrot', 'Parrot'],
7377-
... 'Max Speed': [380., 370., 24., 26.]})
7378-
>>> df
7379-
Animal Max Speed
7380-
0 Falcon 380.0
7381-
1 Falcon 370.0
7382-
2 Parrot 24.0
7383-
3 Parrot 26.0
7384-
>>> df.groupby(['Animal']).mean()
7385-
Max Speed
7386-
Animal
7387-
Falcon 375.0
7388-
Parrot 25.0
7389-
7390-
**Hierarchical Indexes**
7391-
7392-
We can groupby different levels of a hierarchical index
7393-
using the `level` parameter:
7394-
7395-
>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
7396-
... ['Captive', 'Wild', 'Captive', 'Wild']]
7397-
>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
7398-
>>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
7399-
... index=index)
7400-
>>> df
7401-
Max Speed
7402-
Animal Type
7403-
Falcon Captive 390.0
7404-
Wild 350.0
7405-
Parrot Captive 30.0
7406-
Wild 20.0
7407-
>>> df.groupby(level=0).mean()
7408-
Max Speed
7409-
Animal
7410-
Falcon 370.0
7411-
Parrot 25.0
7412-
>>> df.groupby(level=1).mean()
7413-
Max Speed
7414-
Type
7415-
Captive 210.0
7416-
Wild 185.0
7417-
"""
7418-
from pandas.core.groupby.groupby import get_groupby
7419-
7420-
if level is None and by is None:
7421-
raise TypeError("You have to supply one of 'by' and 'level'")
7422-
axis = self._get_axis_number(axis)
7423-
7424-
return get_groupby(
7425-
self,
7426-
by=by,
7427-
axis=axis,
7428-
level=level,
7429-
as_index=as_index,
7430-
sort=sort,
7431-
group_keys=group_keys,
7432-
squeeze=squeeze,
7433-
observed=observed,
7434-
)
7362+
"""
74357363

74367364
def asfreq(
74377365
self,

pandas/core/series.py

+84-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
)
4848

4949
import pandas as pd
50-
from pandas.core import algorithms, base, generic, nanops, ops
50+
from pandas.core import algorithms, base, generic, groupby, nanops, ops
5151
from pandas.core.accessor import CachedAccessor
5252
from pandas.core.arrays import ExtensionArray, try_cast_to_ea
5353
from pandas.core.arrays.categorical import Categorical, CategoricalAccessor
@@ -1565,6 +1565,89 @@ def _set_name(self, name, inplace=False):
15651565
ser.name = name
15661566
return ser
15671567

1568+
@Appender(
1569+
"""
1570+
Examples
1571+
--------
1572+
>>> ser = pd.Series([390., 350., 30., 20.],
1573+
... index=['Falcon', 'Falcon', 'Parrot', 'Parrot'], name="Max Speed")
1574+
>>> ser
1575+
Falcon 390.0
1576+
Falcon 350.0
1577+
Parrot 30.0
1578+
Parrot 20.0
1579+
Name: Max Speed, dtype: float64
1580+
>>> ser.groupby(["a", "b", "a", "b"]).mean()
1581+
a 210.0
1582+
b 185.0
1583+
Name: Max Speed, dtype: float64
1584+
>>> ser.groupby(level=0).mean()
1585+
Falcon 370.0
1586+
Parrot 25.0
1587+
Name: Max Speed, dtype: float64
1588+
>>> ser.groupby(ser > 100).mean()
1589+
Max Speed
1590+
False 25.0
1591+
True 370.0
1592+
Name: Max Speed, dtype: float64
1593+
1594+
**Grouping by Indexes**
1595+
1596+
We can groupby different levels of a hierarchical index
1597+
using the `level` parameter:
1598+
1599+
>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
1600+
... ['Captive', 'Wild', 'Captive', 'Wild']]
1601+
>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
1602+
>>> ser = pd.Series([390., 350., 30., 20.], index=index, name="Max Speed")
1603+
>>> ser
1604+
Animal Type
1605+
Falcon Captive 390.0
1606+
Wild 350.0
1607+
Parrot Captive 30.0
1608+
Wild 20.0
1609+
Name: Max Speed, dtype: float64
1610+
>>> ser.groupby(level=0).mean()
1611+
Animal
1612+
Falcon 370.0
1613+
Parrot 25.0
1614+
Name: Max Speed, dtype: float64
1615+
>>> ser.groupby(level="Type").mean()
1616+
Type
1617+
Captive 210.0
1618+
Wild 185.0
1619+
Name: Max Speed, dtype: float64
1620+
"""
1621+
)
1622+
@Appender(generic._shared_docs["groupby"] % _shared_doc_kwargs)
1623+
def groupby(
1624+
self,
1625+
by=None,
1626+
axis=0,
1627+
level=None,
1628+
as_index: bool = True,
1629+
sort: bool = True,
1630+
group_keys: bool = True,
1631+
squeeze: bool = False,
1632+
observed: bool = False,
1633+
) -> "groupby.SeriesGroupBy":
1634+
1635+
if level is None and by is None:
1636+
raise TypeError("You have to supply one of 'by' and 'level'")
1637+
axis = self._get_axis_number(axis)
1638+
1639+
return groupby.SeriesGroupBy(
1640+
obj=self,
1641+
keys=by,
1642+
axis=axis,
1643+
level=level,
1644+
as_index=as_index,
1645+
sort=sort,
1646+
group_keys=group_keys,
1647+
squeeze=squeeze,
1648+
observed=observed,
1649+
)
1650+
15681651
# ----------------------------------------------------------------------
15691652
# Statistics, overridden ndarray methods
15701653

0 commit comments

Comments
 (0)