@@ -1260,6 +1260,7 @@ def result_to_bool(result: np.ndarray, inference: Type) -> np.ndarray:
1260
1260
return self ._get_cythonized_result (
1261
1261
"group_any_all" ,
1262
1262
aggregate = True ,
1263
+ numeric_only = False ,
1263
1264
cython_dtype = np .dtype (np .uint8 ),
1264
1265
needs_values = True ,
1265
1266
needs_mask = True ,
@@ -1416,18 +1417,16 @@ def std(self, ddof: int = 1):
1416
1417
Series or DataFrame
1417
1418
Standard deviation of values within each group.
1418
1419
"""
1419
- result = self .var (ddof = ddof )
1420
- if result .ndim == 1 :
1421
- result = np .sqrt (result )
1422
- else :
1423
- cols = result .columns .get_indexer_for (
1424
- result .columns .difference (self .exclusions ).unique ()
1425
- )
1426
- # TODO(GH-22046) - setting with iloc broken if labels are not unique
1427
- # .values to remove labels
1428
- result .iloc [:, cols ] = np .sqrt (result .iloc [:, cols ]).values
1429
-
1430
- return result
1420
+ return self ._get_cythonized_result (
1421
+ "group_var_float64" ,
1422
+ aggregate = True ,
1423
+ needs_counts = True ,
1424
+ needs_values = True ,
1425
+ needs_2d = True ,
1426
+ cython_dtype = np .dtype (np .float64 ),
1427
+ post_processing = lambda vals , inference : np .sqrt (vals ),
1428
+ ddof = ddof ,
1429
+ )
1431
1430
1432
1431
@Substitution (name = "groupby" )
1433
1432
@Appender (_common_see_also )
@@ -1756,6 +1755,7 @@ def _fill(self, direction, limit=None):
1756
1755
1757
1756
return self ._get_cythonized_result (
1758
1757
"group_fillna_indexer" ,
1758
+ numeric_only = False ,
1759
1759
needs_mask = True ,
1760
1760
cython_dtype = np .dtype (np .int64 ),
1761
1761
result_is_index = True ,
@@ -2039,9 +2039,6 @@ def pre_processor(vals: np.ndarray) -> Tuple[np.ndarray, Optional[Type]]:
2039
2039
inference = "datetime64[ns]"
2040
2040
vals = np .asarray (vals ).astype (np .float )
2041
2041
2042
- if vals .dtype != np .dtype (np .float64 ):
2043
- vals = vals .astype (np .float64 )
2044
-
2045
2042
return vals , inference
2046
2043
2047
2044
def post_processor (vals : np .ndarray , inference : Optional [Type ]) -> np .ndarray :
@@ -2059,6 +2056,7 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray:
2059
2056
return self ._get_cythonized_result (
2060
2057
"group_quantile" ,
2061
2058
aggregate = True ,
2059
+ numeric_only = False ,
2062
2060
needs_values = True ,
2063
2061
needs_mask = True ,
2064
2062
cython_dtype = np .dtype (np .float64 ),
@@ -2348,7 +2346,11 @@ def _get_cythonized_result(
2348
2346
how : str ,
2349
2347
cython_dtype : np .dtype ,
2350
2348
aggregate : bool = False ,
2349
+ numeric_only : bool = True ,
2350
+ needs_counts : bool = False ,
2351
2351
needs_values : bool = False ,
2352
+ needs_2d : bool = False ,
2353
+ min_count : Optional [int ] = None ,
2352
2354
needs_mask : bool = False ,
2353
2355
needs_ngroups : bool = False ,
2354
2356
result_is_index : bool = False ,
@@ -2367,9 +2369,18 @@ def _get_cythonized_result(
2367
2369
aggregate : bool, default False
2368
2370
Whether the result should be aggregated to match the number of
2369
2371
groups
2372
+ numeric_only : bool, default True
2373
+ Whether only numeric datatypes should be computed
2374
+ needs_counts : bool, default False
2375
+ Whether the counts should be a part of the Cython call
2370
2376
needs_values : bool, default False
2371
2377
Whether the values should be a part of the Cython call
2372
2378
signature
2379
+ needs_2d : bool, default False
2380
+ Whether the values and result of the Cython call signature
2381
+ are 2-dimensional.
2382
+ min_count : int, default None
2383
+ When not None, min_count for the Cython call
2373
2384
needs_mask : bool, default False
2374
2385
Whether boolean mask needs to be part of the Cython call
2375
2386
signature
@@ -2415,56 +2426,44 @@ def _get_cythonized_result(
2415
2426
output : Dict [base .OutputKey , np .ndarray ] = {}
2416
2427
base_func = getattr (libgroupby , how )
2417
2428
2418
- if how == "group_quantile" :
2419
- values = self ._obj_with_exclusions ._values
2420
- result_sz = ngroups if aggregate else len (values )
2421
-
2422
- vals , inferences = pre_processing (values )
2423
- if self ._obj_with_exclusions .ndim == 1 :
2424
- width = 1
2425
- vals = np .reshape (vals , (- 1 , 1 ))
2426
- else :
2427
- width = len (self ._obj_with_exclusions .columns )
2428
- result = np .zeros ((result_sz , width ), dtype = cython_dtype )
2429
- counts = np .zeros (self .ngroups , dtype = np .int64 )
2430
- mask = isna (vals ).view (np .uint8 )
2431
-
2432
- func = partial (base_func , result , counts , vals , labels , - 1 , mask )
2433
- func (** kwargs ) # Call func to modify indexer values in place
2434
- result = post_processing (result , inferences )
2435
-
2436
- if self ._obj_with_exclusions .ndim == 1 :
2437
- key = base .OutputKey (label = self ._obj_with_exclusions .name , position = 0 )
2438
- output [key ] = result [:, 0 ]
2439
- else :
2440
- for idx , name in enumerate (self ._obj_with_exclusions .columns ):
2441
- key = base .OutputKey (label = name , position = idx )
2442
- output [key ] = result [:, idx ]
2443
-
2444
- if aggregate :
2445
- return self ._wrap_aggregated_output (output )
2446
- else :
2447
- return self ._wrap_transformed_output (output )
2448
-
2449
2429
for idx , obj in enumerate (self ._iterate_slices ()):
2450
2430
name = obj .name
2451
2431
values = obj ._values
2452
2432
2433
+ if numeric_only and not is_numeric_dtype (values ):
2434
+ continue
2435
+
2453
2436
if aggregate :
2454
2437
result_sz = ngroups
2455
2438
else :
2456
2439
result_sz = len (values )
2457
2440
2458
- result = np .zeros (result_sz , dtype = cython_dtype )
2459
- func = partial (base_func , result , labels )
2441
+ if needs_2d :
2442
+ result = np .zeros ((result_sz , 1 ), dtype = cython_dtype )
2443
+ else :
2444
+ result = np .zeros (result_sz , dtype = cython_dtype )
2445
+ func = partial (base_func , result )
2446
+
2460
2447
inferences = None
2461
2448
2449
+ if needs_counts :
2450
+ counts = np .zeros (self .ngroups , dtype = np .int64 )
2451
+ func = partial (func , counts )
2452
+
2462
2453
if needs_values :
2463
2454
vals = values
2464
2455
if pre_processing :
2465
2456
vals , inferences = pre_processing (vals )
2457
+ if needs_2d :
2458
+ vals = vals .reshape ((- 1 , 1 ))
2459
+ vals = vals .astype (cython_dtype , copy = False )
2466
2460
func = partial (func , vals )
2467
2461
2462
+ func = partial (func , labels )
2463
+
2464
+ if min_count is not None :
2465
+ func = partial (func , min_count )
2466
+
2468
2467
if needs_mask :
2469
2468
mask = isna (values ).view (np .uint8 )
2470
2469
func = partial (func , mask )
@@ -2474,6 +2473,9 @@ def _get_cythonized_result(
2474
2473
2475
2474
func (** kwargs ) # Call func to modify indexer values in place
2476
2475
2476
+ if needs_2d :
2477
+ result = result .reshape (- 1 )
2478
+
2477
2479
if result_is_index :
2478
2480
result = algorithms .take_nd (values , result )
2479
2481
@@ -2524,6 +2526,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None):
2524
2526
2525
2527
return self ._get_cythonized_result (
2526
2528
"group_shift_indexer" ,
2529
+ numeric_only = False ,
2527
2530
cython_dtype = np .dtype (np .int64 ),
2528
2531
needs_ngroups = True ,
2529
2532
result_is_index = True ,
0 commit comments