@@ -160,6 +160,33 @@ def test_agg_apply_corner(ts, tsframe):
160
160
tm .assert_frame_equal (res , exp_df )
161
161
162
162
163
+ def test_with_na_groups (any_real_numpy_dtype ):
164
+ index = Index (np .arange (10 ))
165
+ values = Series (np .ones (10 ), index , dtype = any_real_numpy_dtype )
166
+ labels = Series (
167
+ [np .nan , "foo" , "bar" , "bar" , np .nan , np .nan , "bar" , "bar" , np .nan , "foo" ],
168
+ index = index ,
169
+ )
170
+
171
+ # this SHOULD be an int
172
+ grouped = values .groupby (labels )
173
+ agged = grouped .agg (len )
174
+ expected = Series ([4 , 2 ], index = ["bar" , "foo" ])
175
+
176
+ tm .assert_series_equal (agged , expected , check_dtype = False )
177
+
178
+ # assert issubclass(agged.dtype.type, np.integer)
179
+
180
+ # explicitly return a float from my function
181
+ def f (x ):
182
+ return float (len (x ))
183
+
184
+ agged = grouped .agg (f )
185
+ expected = Series ([4.0 , 2.0 ], index = ["bar" , "foo" ])
186
+
187
+ tm .assert_series_equal (agged , expected )
188
+
189
+
163
190
def test_agg_grouping_is_list_tuple (ts ):
164
191
df = DataFrame (
165
192
np .random .default_rng (2 ).standard_normal ((30 , 4 )),
@@ -1049,6 +1076,73 @@ def test_grouby_agg_loses_results_with_as_index_false_relabel_multiindex():
1049
1076
tm .assert_frame_equal (result , expected )
1050
1077
1051
1078
1079
+ def test_groupby_as_index_agg (df ):
1080
+ grouped = df .groupby ("A" , as_index = False )
1081
+
1082
+ # single-key
1083
+
1084
+ result = grouped [["C" , "D" ]].agg ("mean" )
1085
+ expected = grouped .mean (numeric_only = True )
1086
+ tm .assert_frame_equal (result , expected )
1087
+
1088
+ result2 = grouped .agg ({"C" : "mean" , "D" : "sum" })
1089
+ expected2 = grouped .mean (numeric_only = True )
1090
+ expected2 ["D" ] = grouped .sum ()["D" ]
1091
+ tm .assert_frame_equal (result2 , expected2 )
1092
+
1093
+ grouped = df .groupby ("A" , as_index = True )
1094
+
1095
+ msg = r"nested renamer is not supported"
1096
+ with pytest .raises (SpecificationError , match = msg ):
1097
+ grouped ["C" ].agg ({"Q" : "sum" })
1098
+
1099
+ # multi-key
1100
+
1101
+ grouped = df .groupby (["A" , "B" ], as_index = False )
1102
+
1103
+ result = grouped .agg ("mean" )
1104
+ expected = grouped .mean ()
1105
+ tm .assert_frame_equal (result , expected )
1106
+
1107
+ result2 = grouped .agg ({"C" : "mean" , "D" : "sum" })
1108
+ expected2 = grouped .mean ()
1109
+ expected2 ["D" ] = grouped .sum ()["D" ]
1110
+ tm .assert_frame_equal (result2 , expected2 )
1111
+
1112
+ expected3 = grouped ["C" ].sum ()
1113
+ expected3 = DataFrame (expected3 ).rename (columns = {"C" : "Q" })
1114
+ msg = "Passing a dictionary to SeriesGroupBy.agg is deprecated"
1115
+ with tm .assert_produces_warning (FutureWarning , match = msg ):
1116
+ result3 = grouped ["C" ].agg ({"Q" : "sum" })
1117
+ tm .assert_frame_equal (result3 , expected3 )
1118
+
1119
+ # GH7115 & GH8112 & GH8582
1120
+ df = DataFrame (
1121
+ np .random .default_rng (2 ).integers (0 , 100 , (50 , 3 )),
1122
+ columns = ["jim" , "joe" , "jolie" ],
1123
+ )
1124
+ ts = Series (np .random .default_rng (2 ).integers (5 , 10 , 50 ), name = "jim" )
1125
+
1126
+ gr = df .groupby (ts )
1127
+ gr .nth (0 ) # invokes set_selection_from_grouper internally
1128
+
1129
+ msg = "The behavior of DataFrame.sum with axis=None is deprecated"
1130
+ with tm .assert_produces_warning (FutureWarning , match = msg , check_stacklevel = False ):
1131
+ res = gr .apply (sum )
1132
+ with tm .assert_produces_warning (FutureWarning , match = msg , check_stacklevel = False ):
1133
+ alt = df .groupby (ts ).apply (sum )
1134
+ tm .assert_frame_equal (res , alt )
1135
+
1136
+ for attr in ["mean" , "max" , "count" , "idxmax" , "cumsum" , "all" ]:
1137
+ gr = df .groupby (ts , as_index = False )
1138
+ left = getattr (gr , attr )()
1139
+
1140
+ gr = df .groupby (ts .values , as_index = True )
1141
+ right = getattr (gr , attr )().reset_index (drop = True )
1142
+
1143
+ tm .assert_frame_equal (left , right )
1144
+
1145
+
1052
1146
@pytest .mark .parametrize (
1053
1147
"func" , [lambda s : s .mean (), lambda s : np .mean (s ), lambda s : np .nanmean (s )]
1054
1148
)
@@ -1252,6 +1346,28 @@ def test_agg_multiple_lambda(self):
1252
1346
tm .assert_frame_equal (result2 , expected )
1253
1347
1254
1348
1349
+ def test_pass_args_kwargs_duplicate_columns (tsframe , as_index ):
1350
+ # go through _aggregate_frame with self.axis == 0 and duplicate columns
1351
+ tsframe .columns = ["A" , "B" , "A" , "C" ]
1352
+ gb = tsframe .groupby (lambda x : x .month , as_index = as_index )
1353
+
1354
+ warn = None if as_index else FutureWarning
1355
+ msg = "A grouping .* was excluded from the result"
1356
+ with tm .assert_produces_warning (warn , match = msg ):
1357
+ res = gb .agg (np .percentile , 80 , axis = 0 )
1358
+
1359
+ ex_data = {
1360
+ 1 : tsframe [tsframe .index .month == 1 ].quantile (0.8 ),
1361
+ 2 : tsframe [tsframe .index .month == 2 ].quantile (0.8 ),
1362
+ }
1363
+ expected = DataFrame (ex_data ).T
1364
+ if not as_index :
1365
+ # TODO: try to get this more consistent?
1366
+ expected .index = Index (range (2 ))
1367
+
1368
+ tm .assert_frame_equal (res , expected )
1369
+
1370
+
1255
1371
def test_groupby_get_by_index ():
1256
1372
# GH 33439
1257
1373
df = DataFrame ({"A" : ["S" , "W" , "W" ], "B" : [1.0 , 1.0 , 2.0 ]})
0 commit comments