Skip to content

DOC/CLN: move NDFrame.groupby to (DataFrame|Series).groupby #30314

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin as DatetimeLikeArray
from pandas.core.arrays.sparse import SparseFrameAccessor
from pandas.core.generic import NDFrame, _shared_docs
from pandas.core.groupby import generic as groupby_generic
from pandas.core.indexes import base as ibase
from pandas.core.indexes.api import Index, ensure_index, ensure_index_from_sequences
from pandas.core.indexes.datetimes import DatetimeIndex
Expand Down Expand Up @@ -5598,6 +5599,82 @@ def update(

# ----------------------------------------------------------------------
# Data reshaping
@Appender(
"""
Examples
--------
>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
... 'Parrot', 'Parrot'],
... 'Max Speed': [380., 370., 24., 26.]})
>>> df
Animal Max Speed
0 Falcon 380.0
1 Falcon 370.0
2 Parrot 24.0
3 Parrot 26.0
>>> df.groupby(['Animal']).mean()
Max Speed
Animal
Falcon 375.0
Parrot 25.0

**Hierarchical Indexes**

We can groupby different levels of a hierarchical index
using the `level` parameter:

>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
... ['Captive', 'Wild', 'Captive', 'Wild']]
>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
>>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
... index=index)
>>> df
Max Speed
Animal Type
Falcon Captive 390.0
Wild 350.0
Parrot Captive 30.0
Wild 20.0
>>> df.groupby(level=0).mean()
Max Speed
Animal
Falcon 370.0
Parrot 25.0
>>> df.groupby(level="Type").mean()
Max Speed
Type
Captive 210.0
Wild 185.0
"""
)
@Appender(_shared_docs["groupby"] % _shared_doc_kwargs)
def groupby(
self,
by=None,
axis=0,
level=None,
as_index: bool = True,
sort: bool = True,
group_keys: bool = True,
squeeze: bool = False,
observed: bool = False,
) -> "groupby_generic.DataFrameGroupBy":

if level is None and by is None:
raise TypeError("You have to supply one of 'by' and 'level'")
axis = self._get_axis_number(axis)

return groupby_generic.DataFrameGroupBy(
obj=self,
keys=by,
axis=axis,
level=level,
as_index=as_index,
sort=sort,
group_keys=group_keys,
squeeze=squeeze,
observed=observed,
)

_shared_docs[
"pivot"
Expand Down
86 changes: 7 additions & 79 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7273,19 +7273,10 @@ def clip(

return result

def groupby(
self,
by=None,
axis=0,
level=None,
as_index: bool_t = True,
sort: bool_t = True,
group_keys: bool_t = True,
squeeze: bool_t = False,
observed: bool_t = False,
):
"""
Group DataFrame or Series using a mapper or by a Series of columns.
_shared_docs[
"groupby"
] = """
Group %(klass)s using a mapper or by a Series of columns.

A groupby operation involves some combination of splitting the
object, applying a function, and combining the results. This can be
Expand Down Expand Up @@ -7330,9 +7321,8 @@ def groupby(

Returns
-------
DataFrameGroupBy or SeriesGroupBy
Depends on the calling object and returns groupby object that
contains information about the groups.
%(klass)sGroupBy
Returns a groupby object that contains information about the groups.

See Also
--------
Expand All @@ -7343,69 +7333,7 @@ def groupby(
-----
See the `user guide
<http://pandas.pydata.org/pandas-docs/stable/groupby.html>`_ for more.

Examples
--------
>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
... 'Parrot', 'Parrot'],
... 'Max Speed': [380., 370., 24., 26.]})
>>> df
Animal Max Speed
0 Falcon 380.0
1 Falcon 370.0
2 Parrot 24.0
3 Parrot 26.0
>>> df.groupby(['Animal']).mean()
Max Speed
Animal
Falcon 375.0
Parrot 25.0

**Hierarchical Indexes**

We can groupby different levels of a hierarchical index
using the `level` parameter:

>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
... ['Captive', 'Wild', 'Captive', 'Wild']]
>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
>>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
... index=index)
>>> df
Max Speed
Animal Type
Falcon Captive 390.0
Wild 350.0
Parrot Captive 30.0
Wild 20.0
>>> df.groupby(level=0).mean()
Max Speed
Animal
Falcon 370.0
Parrot 25.0
>>> df.groupby(level=1).mean()
Max Speed
Type
Captive 210.0
Wild 185.0
"""
from pandas.core.groupby.groupby import get_groupby

if level is None and by is None:
raise TypeError("You have to supply one of 'by' and 'level'")
axis = self._get_axis_number(axis)

return get_groupby(
self,
by=by,
axis=axis,
level=level,
as_index=as_index,
sort=sort,
group_keys=group_keys,
squeeze=squeeze,
observed=observed,
)
"""

def asfreq(
self,
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
from pandas.core.dtypes.missing import isna, na_value_for_dtype

from pandas import Categorical, Index, MultiIndex
from pandas.core import groupby
import pandas.core.algorithms as algos
from pandas.core.arrays.categorical import _recode_for_categories
import pandas.core.common as com
Expand Down Expand Up @@ -113,6 +114,7 @@ def _groupby_and_merge(
by = [by]

lby = left.groupby(by, sort=False)
rby: Optional[groupby.DataFrameGroupBy] = None

# if we can groupby the rhs
# then we can get vastly better perf
Expand All @@ -132,7 +134,7 @@ def _groupby_and_merge(
try:
rby = right.groupby(by, sort=False)
except KeyError:
rby = None
pass

for key, lhs in lby:

Expand Down
87 changes: 85 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
is_empty_data,
sanitize_array,
)
from pandas.core.generic import _shared_docs
from pandas.core.groupby import generic as groupby_generic
from pandas.core.indexers import maybe_convert_indices
from pandas.core.indexes.accessors import CombinedDatetimelikeProperties
from pandas.core.indexes.api import (
Expand Down Expand Up @@ -1431,7 +1431,7 @@ def to_string(
"""
)
@Substitution(klass="Series")
@Appender(_shared_docs["to_markdown"])
@Appender(generic._shared_docs["to_markdown"])
def to_markdown(
self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs,
) -> Optional[str]:
Expand Down Expand Up @@ -1568,6 +1568,89 @@ def _set_name(self, name, inplace=False):
ser.name = name
return ser

@Appender(
"""
Examples
--------
>>> ser = pd.Series([390., 350., 30., 20.],
... index=['Falcon', 'Falcon', 'Parrot', 'Parrot'], name="Max Speed")
>>> ser
Falcon 390.0
Falcon 350.0
Parrot 30.0
Parrot 20.0
Name: Max Speed, dtype: float64
>>> ser.groupby(["a", "b", "a", "b"]).mean()
a 210.0
b 185.0
Name: Max Speed, dtype: float64
>>> ser.groupby(level=0).mean()
Falcon 370.0
Parrot 25.0
Name: Max Speed, dtype: float64
>>> ser.groupby(ser > 100).mean()
Max Speed
False 25.0
True 370.0
Name: Max Speed, dtype: float64

**Grouping by Indexes**

We can groupby different levels of a hierarchical index
using the `level` parameter:

>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
... ['Captive', 'Wild', 'Captive', 'Wild']]
>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
>>> ser = pd.Series([390., 350., 30., 20.], index=index, name="Max Speed")
>>> ser
Animal Type
Falcon Captive 390.0
Wild 350.0
Parrot Captive 30.0
Wild 20.0
Name: Max Speed, dtype: float64
>>> ser.groupby(level=0).mean()
Animal
Falcon 370.0
Parrot 25.0
Name: Max Speed, dtype: float64
>>> ser.groupby(level="Type").mean()
Type
Captive 210.0
Wild 185.0
Name: Max Speed, dtype: float64
"""
)
@Appender(generic._shared_docs["groupby"] % _shared_doc_kwargs)
def groupby(
self,
by=None,
axis=0,
level=None,
as_index: bool = True,
sort: bool = True,
group_keys: bool = True,
squeeze: bool = False,
observed: bool = False,
) -> "groupby_generic.SeriesGroupBy":

if level is None and by is None:
raise TypeError("You have to supply one of 'by' and 'level'")
axis = self._get_axis_number(axis)

return groupby_generic.SeriesGroupBy(
obj=self,
keys=by,
axis=axis,
level=level,
as_index=as_index,
sort=sort,
group_keys=group_keys,
squeeze=squeeze,
observed=observed,
)

# ----------------------------------------------------------------------
# Statistics, overridden ndarray methods

Expand Down