Skip to content

Commit 68c7064

Browse files
committed
move NDFrame.groupby to (DataFrame|Series).groupby
1 parent 844dc4a commit 68c7064

File tree

3 files changed

+169
-81
lines changed

3 files changed

+169
-81
lines changed

pandas/core/frame.py

+78-1
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,8 @@
9494
)
9595
from pandas.core.dtypes.missing import isna, notna
9696

97-
from pandas.core import algorithms, common as com, nanops, ops
97+
from pandas._typing import Axes, Dtype, FilePathOrBuffer
98+
from pandas.core import algorithms, common as com, groupby, nanops, ops
9899
from pandas.core.accessor import CachedAccessor
99100
from pandas.core.arrays import Categorical, ExtensionArray
100101
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin as DatetimeLikeArray
@@ -5598,6 +5599,82 @@ def update(
55985599

55995600
# ----------------------------------------------------------------------
56005601
# Data reshaping
5602+
@Appender(
5603+
"""
5604+
Examples
5605+
--------
5606+
>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
5607+
... 'Parrot', 'Parrot'],
5608+
... 'Max Speed': [380., 370., 24., 26.]})
5609+
>>> df
5610+
Animal Max Speed
5611+
0 Falcon 380.0
5612+
1 Falcon 370.0
5613+
2 Parrot 24.0
5614+
3 Parrot 26.0
5615+
>>> df.groupby(['Animal']).mean()
5616+
Max Speed
5617+
Animal
5618+
Falcon 375.0
5619+
Parrot 25.0
5620+
5621+
**Hierarchical Indexes**
5622+
5623+
We can groupby different levels of a hierarchical index
5624+
using the `level` parameter:
5625+
5626+
>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
5627+
... ['Captive', 'Wild', 'Captive', 'Wild']]
5628+
>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
5629+
>>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
5630+
... index=index)
5631+
>>> df
5632+
Max Speed
5633+
Animal Type
5634+
Falcon Captive 390.0
5635+
Wild 350.0
5636+
Parrot Captive 30.0
5637+
Wild 20.0
5638+
>>> df.groupby(level=0).mean()
5639+
Max Speed
5640+
Animal
5641+
Falcon 370.0
5642+
Parrot 25.0
5643+
>>> df.groupby(level="Type").mean()
5644+
Max Speed
5645+
Type
5646+
Captive 210.0
5647+
Wild 185.0
5648+
"""
5649+
)
5650+
@Appender(_shared_docs["groupby"] % _shared_doc_kwargs)
5651+
def groupby(
5652+
self,
5653+
by=None,
5654+
axis=0,
5655+
level=None,
5656+
as_index: bool = True,
5657+
sort: bool = True,
5658+
group_keys: bool = True,
5659+
squeeze: bool = False,
5660+
observed: bool = False,
5661+
) -> "groupby.DataFrameGroupBy":
5662+
5663+
if level is None and by is None:
5664+
raise TypeError("You have to supply one of 'by' and 'level'")
5665+
axis = self._get_axis_number(axis)
5666+
5667+
return groupby.DataFrameGroupBy(
5668+
obj=self,
5669+
keys=by,
5670+
axis=axis,
5671+
level=level,
5672+
as_index=as_index,
5673+
sort=sort,
5674+
group_keys=group_keys,
5675+
squeeze=squeeze,
5676+
observed=observed,
5677+
)
56015678

56025679
_shared_docs[
56035680
"pivot"

pandas/core/generic.py

+7-79
Original file line numberDiff line numberDiff line change
@@ -7273,19 +7273,10 @@ def clip(
72737273

72747274
return result
72757275

7276-
def groupby(
7277-
self,
7278-
by=None,
7279-
axis=0,
7280-
level=None,
7281-
as_index: bool_t = True,
7282-
sort: bool_t = True,
7283-
group_keys: bool_t = True,
7284-
squeeze: bool_t = False,
7285-
observed: bool_t = False,
7286-
):
7287-
"""
7288-
Group DataFrame or Series using a mapper or by a Series of columns.
7276+
_shared_docs[
7277+
"groupby"
7278+
] = """
7279+
Group %(klass)s using a mapper or by a Series of columns.
72897280
72907281
A groupby operation involves some combination of splitting the
72917282
object, applying a function, and combining the results. This can be
@@ -7330,9 +7321,8 @@ def groupby(
73307321
73317322
Returns
73327323
-------
7333-
DataFrameGroupBy or SeriesGroupBy
7334-
Depends on the calling object and returns groupby object that
7335-
contains information about the groups.
7324+
%(klass)sGroupBy
7325+
Returns a groupby object that contains information about the groups.
73367326
73377327
See Also
73387328
--------
@@ -7343,69 +7333,7 @@ def groupby(
73437333
-----
73447334
See the `user guide
73457335
<http://pandas.pydata.org/pandas-docs/stable/groupby.html>`_ for more.
7346-
7347-
Examples
7348-
--------
7349-
>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
7350-
... 'Parrot', 'Parrot'],
7351-
... 'Max Speed': [380., 370., 24., 26.]})
7352-
>>> df
7353-
Animal Max Speed
7354-
0 Falcon 380.0
7355-
1 Falcon 370.0
7356-
2 Parrot 24.0
7357-
3 Parrot 26.0
7358-
>>> df.groupby(['Animal']).mean()
7359-
Max Speed
7360-
Animal
7361-
Falcon 375.0
7362-
Parrot 25.0
7363-
7364-
**Hierarchical Indexes**
7365-
7366-
We can groupby different levels of a hierarchical index
7367-
using the `level` parameter:
7368-
7369-
>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
7370-
... ['Captive', 'Wild', 'Captive', 'Wild']]
7371-
>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
7372-
>>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
7373-
... index=index)
7374-
>>> df
7375-
Max Speed
7376-
Animal Type
7377-
Falcon Captive 390.0
7378-
Wild 350.0
7379-
Parrot Captive 30.0
7380-
Wild 20.0
7381-
>>> df.groupby(level=0).mean()
7382-
Max Speed
7383-
Animal
7384-
Falcon 370.0
7385-
Parrot 25.0
7386-
>>> df.groupby(level=1).mean()
7387-
Max Speed
7388-
Type
7389-
Captive 210.0
7390-
Wild 185.0
7391-
"""
7392-
from pandas.core.groupby.groupby import get_groupby
7393-
7394-
if level is None and by is None:
7395-
raise TypeError("You have to supply one of 'by' and 'level'")
7396-
axis = self._get_axis_number(axis)
7397-
7398-
return get_groupby(
7399-
self,
7400-
by=by,
7401-
axis=axis,
7402-
level=level,
7403-
as_index=as_index,
7404-
sort=sort,
7405-
group_keys=group_keys,
7406-
squeeze=squeeze,
7407-
observed=observed,
7408-
)
7336+
"""
74097337

74107338
def asfreq(
74117339
self,

pandas/core/series.py

+84-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
)
4848

4949
import pandas as pd
50-
from pandas.core import algorithms, base, generic, nanops, ops
50+
from pandas.core import algorithms, base, generic, groupby, nanops, ops
5151
from pandas.core.accessor import CachedAccessor
5252
from pandas.core.arrays import ExtensionArray, try_cast_to_ea
5353
from pandas.core.arrays.categorical import Categorical, CategoricalAccessor
@@ -1568,6 +1568,89 @@ def _set_name(self, name, inplace=False):
15681568
ser.name = name
15691569
return ser
15701570

1571+
@Appender(
1572+
"""
1573+
Examples
1574+
--------
1575+
>>> ser = pd.Series([390., 350., 30., 20.],
1576+
... index=['Falcon', 'Falcon', 'Parrot', 'Parrot'], name="Max Speed")
1577+
>>> ser
1578+
Falcon 390.0
1579+
Falcon 350.0
1580+
Parrot 30.0
1581+
Parrot 20.0
1582+
Name: Max Speed, dtype: float64
1583+
>>> ser.groupby(["a", "b", "a", "b"]).mean()
1584+
a 210.0
1585+
b 185.0
1586+
Name: Max Speed, dtype: float64
1587+
>>> ser.groupby(level=0).mean()
1588+
Falcon 370.0
1589+
Parrot 25.0
1590+
Name: Max Speed, dtype: float64
1591+
>>> ser.groupby(ser > 100).mean()
1592+
Max Speed
1593+
False 25.0
1594+
True 370.0
1595+
Name: Max Speed, dtype: float64
1596+
1597+
**Grouping by Indexes**
1598+
1599+
We can groupby different levels of a hierarchical index
1600+
using the `level` parameter:
1601+
1602+
>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
1603+
... ['Captive', 'Wild', 'Captive', 'Wild']]
1604+
>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
1605+
>>> ser = pd.Series([390., 350., 30., 20.], index=index, name="Max Speed")
1606+
>>> ser
1607+
Animal Type
1608+
Falcon Captive 390.0
1609+
Wild 350.0
1610+
Parrot Captive 30.0
1611+
Wild 20.0
1612+
Name: Max Speed, dtype: float64
1613+
>>> ser.groupby(level=0).mean()
1614+
Animal
1615+
Falcon 370.0
1616+
Parrot 25.0
1617+
Name: Max Speed, dtype: float64
1618+
>>> ser.groupby(level="Type").mean()
1619+
Type
1620+
Captive 210.0
1621+
Wild 185.0
1622+
Name: Max Speed, dtype: float64
1623+
"""
1624+
)
1625+
@Appender(generic._shared_docs["groupby"] % _shared_doc_kwargs)
1626+
def groupby(
1627+
self,
1628+
by=None,
1629+
axis=0,
1630+
level=None,
1631+
as_index: bool = True,
1632+
sort: bool = True,
1633+
group_keys: bool = True,
1634+
squeeze: bool = False,
1635+
observed: bool = False,
1636+
) -> "groupby.SeriesGroupBy":
1637+
1638+
if level is None and by is None:
1639+
raise TypeError("You have to supply one of 'by' and 'level'")
1640+
axis = self._get_axis_number(axis)
1641+
1642+
return groupby.SeriesGroupBy(
1643+
obj=self,
1644+
keys=by,
1645+
axis=axis,
1646+
level=level,
1647+
as_index=as_index,
1648+
sort=sort,
1649+
group_keys=group_keys,
1650+
squeeze=squeeze,
1651+
observed=observed,
1652+
)
1653+
15711654
# ----------------------------------------------------------------------
15721655
# Statistics, overridden ndarray methods
15731656

0 commit comments

Comments
 (0)