Skip to content

Commit 7f276c8

Browse files
authored
DOC: Fixed examples in pandas/core/groupby/ (#33230)
1 parent 982b4aa commit 7f276c8

File tree

4 files changed

+65
-24
lines changed

4 files changed

+65
-24
lines changed

ci/code_checks.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -292,10 +292,6 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
292292
pytest -q --doctest-modules pandas/core/generic.py
293293
RET=$(($RET + $?)) ; echo $MSG "DONE"
294294

295-
MSG='Doctests groupby.py' ; echo $MSG
296-
pytest -q --doctest-modules pandas/core/groupby/groupby.py -k"-cumcount -describe -pipe"
297-
RET=$(($RET + $?)) ; echo $MSG "DONE"
298-
299295
MSG='Doctests series.py' ; echo $MSG
300296
pytest -q --doctest-modules pandas/core/series.py
301297
RET=$(($RET + $?)) ; echo $MSG "DONE"
@@ -318,6 +314,10 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
318314
pytest -q --doctest-modules pandas/core/dtypes/
319315
RET=$(($RET + $?)) ; echo $MSG "DONE"
320316

317+
MSG='Doctests groupby' ; echo $MSG
318+
pytest -q --doctest-modules pandas/core/groupby/
319+
RET=$(($RET + $?)) ; echo $MSG "DONE"
320+
321321
MSG='Doctests indexes' ; echo $MSG
322322
pytest -q --doctest-modules pandas/core/indexes/
323323
RET=$(($RET + $?)) ; echo $MSG "DONE"

pandas/core/groupby/generic.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -833,10 +833,13 @@ class DataFrameGroupBy(GroupBy[DataFrame]):
833833
"""
834834
Examples
835835
--------
836-
837-
>>> df = pd.DataFrame({'A': [1, 1, 2, 2],
838-
... 'B': [1, 2, 3, 4],
839-
... 'C': np.random.randn(4)})
836+
>>> df = pd.DataFrame(
837+
... {
838+
... "A": [1, 1, 2, 2],
839+
... "B": [1, 2, 3, 4],
840+
... "C": [0.362838, 0.227877, 1.267767, -0.562860],
841+
... }
842+
... )
840843
841844
>>> df
842845
A B C
@@ -876,7 +879,7 @@ class DataFrameGroupBy(GroupBy[DataFrame]):
876879
B C
877880
min max sum
878881
A
879-
1 1 2 0.590716
882+
1 1 2 0.590715
880883
2 3 4 0.704907
881884
882885
To control the output names with different aggregations per column,
@@ -887,8 +890,9 @@ class DataFrameGroupBy(GroupBy[DataFrame]):
887890
... c_sum=pd.NamedAgg(column="C", aggfunc="sum"))
888891
b_min c_sum
889892
A
890-
1 1 -1.956929
891-
2 3 -0.322183
893+
1 1 0.590715
894+
2 3 0.704907
895+
892896
893897
- The keywords are the *output* column names
894898
- The values are tuples whose first element is the column to select

pandas/core/groupby/groupby.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -202,14 +202,14 @@ class providing the base-class of operations.
202202
functions that expect Series, DataFrames, GroupBy or Resampler objects.
203203
Instead of writing
204204
205-
>>> h(g(f(df.groupby('group')), arg1=a), arg2=b, arg3=c)
205+
>>> h(g(f(df.groupby('group')), arg1=a), arg2=b, arg3=c) # doctest: +SKIP
206206
207207
You can write
208208
209209
>>> (df.groupby('group')
210210
... .pipe(f)
211211
... .pipe(g, arg1=a)
212-
... .pipe(h, arg2=b, arg3=c))
212+
... .pipe(h, arg2=b, arg3=c)) # doctest: +SKIP
213213
214214
which is much more readable.
215215
@@ -2017,7 +2017,9 @@ def cumcount(self, ascending: bool = True):
20172017
20182018
Essentially this is equivalent to
20192019
2020-
>>> self.apply(lambda x: pd.Series(np.arange(len(x)), x.index))
2020+
.. code-block:: python
2021+
2022+
self.apply(lambda x: pd.Series(np.arange(len(x)), x.index))
20212023
20222024
Parameters
20232025
----------

pandas/core/groupby/grouper.py

Lines changed: 45 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -79,16 +79,51 @@ class Grouper:
7979
--------
8080
Syntactic sugar for ``df.groupby('A')``
8181
82-
>>> df.groupby(Grouper(key='A'))
83-
84-
Specify a resample operation on the column 'date'
85-
86-
>>> df.groupby(Grouper(key='date', freq='60s'))
87-
88-
Specify a resample operation on the level 'date' on the columns axis
89-
with a frequency of 60s
90-
91-
>>> df.groupby(Grouper(level='date', freq='60s', axis=1))
82+
>>> df = pd.DataFrame(
83+
... {
84+
... "Animal": ["Falcon", "Parrot", "Falcon", "Falcon", "Parrot"],
85+
... "Speed": [100, 5, 200, 300, 15],
86+
... }
87+
... )
88+
>>> df
89+
Animal Speed
90+
0 Falcon 100
91+
1 Parrot 5
92+
2 Falcon 200
93+
3 Falcon 300
94+
4 Parrot 15
95+
>>> df.groupby(pd.Grouper(key="Animal")).mean()
96+
Speed
97+
Animal
98+
Falcon 200
99+
Parrot 10
100+
101+
Specify a resample operation on the column 'Publish date'
102+
103+
>>> df = pd.DataFrame(
104+
... {
105+
... "Publish date": [
106+
... pd.Timestamp("2000-01-02"),
107+
... pd.Timestamp("2000-01-02"),
108+
... pd.Timestamp("2000-01-09"),
109+
... pd.Timestamp("2000-01-16")
110+
... ],
111+
... "ID": [0, 1, 2, 3],
112+
... "Price": [10, 20, 30, 40]
113+
... }
114+
... )
115+
>>> df
116+
Publish date ID Price
117+
0 2000-01-02 0 10
118+
1 2000-01-02 1 20
119+
2 2000-01-09 2 30
120+
3 2000-01-16 3 40
121+
>>> df.groupby(pd.Grouper(key="Publish date", freq="1W")).mean()
122+
ID Price
123+
Publish date
124+
2000-01-02 0.5 15.0
125+
2000-01-09 2.0 30.0
126+
2000-01-16 3.0 40.0
92127
"""
93128

94129
_attributes: Tuple[str, ...] = ("key", "level", "freq", "axis", "sort")

0 commit comments

Comments
 (0)