From c01b4ca4bffda39f02f4d178fbb02081017f802b Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 25 Mar 2021 16:17:39 -0700 Subject: [PATCH 1/2] TYP: fix ignores in core.groupby --- pandas/core/groupby/generic.py | 17 ++++++++--------- pandas/core/groupby/groupby.py | 7 +++---- pandas/core/groupby/ops.py | 23 ++++++++++------------- 3 files changed, 21 insertions(+), 26 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index b407212fe6a50..74b79e82d033f 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -356,7 +356,7 @@ def _aggregate_multiple_funcs(self, arg): # TODO: index should not be Optional - see GH 35490 def _wrap_series_output( self, - output: Mapping[base.OutputKey, Union[Series, np.ndarray]], + output: Mapping[base.OutputKey, Union[Series, ArrayLike]], index: Optional[Index], ) -> FrameOrSeriesUnion: """ @@ -364,7 +364,7 @@ def _wrap_series_output( Parameters ---------- - output : Mapping[base.OutputKey, Union[Series, np.ndarray]] + output : Mapping[base.OutputKey, Union[Series, np.ndarray, ExtensionArray]] Data to wrap. index : pd.Index or None Index to apply to the output. @@ -421,14 +421,14 @@ def _wrap_aggregated_output( return self._reindex_output(result) def _wrap_transformed_output( - self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]] + self, output: Mapping[base.OutputKey, Union[Series, ArrayLike]] ) -> Series: """ Wraps the output of a SeriesGroupBy aggregation into the expected result. Parameters ---------- - output : dict[base.OutputKey, Union[Series, np.ndarray]] + output : dict[base.OutputKey, Union[Series, np.ndarray, ExtensionArray]] Dict with a sole key of 0 and a value of the result values. Returns @@ -1121,6 +1121,7 @@ def cast_agg_result(result, values: ArrayLike, how: str) -> ArrayLike: if isinstance(values, Categorical) and isinstance(result, np.ndarray): # If the Categorical op didn't raise, it is dtype-preserving + # We get here with how="first", "last", "min", "max" result = type(values)._from_sequence(result.ravel(), dtype=values.dtype) # Note this will have result.dtype == dtype from above @@ -1197,9 +1198,7 @@ def array_func(values: ArrayLike) -> ArrayLike: assert how == "ohlc" raise - # error: Incompatible types in assignment (expression has type - # "ExtensionArray", variable has type "ndarray") - result = py_fallback(values) # type: ignore[assignment] + result = py_fallback(values) return cast_agg_result(result, values, how) @@ -1755,14 +1754,14 @@ def _wrap_aggregated_output( return self._reindex_output(result) def _wrap_transformed_output( - self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]] + self, output: Mapping[base.OutputKey, Union[Series, ArrayLike]] ) -> DataFrame: """ Wraps the output of DataFrameGroupBy transformations into the expected result. Parameters ---------- - output : Mapping[base.OutputKey, Union[Series, np.ndarray]] + output : Mapping[base.OutputKey, Union[Series, np.ndarray, ExtensionArray]] Data to wrap. Returns diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index f33833193e4e0..51f41fb789258 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -84,7 +84,6 @@ class providing the base-class of operations. import pandas.core.algorithms as algorithms from pandas.core.arrays import ( Categorical, - DatetimeArray, ExtensionArray, ) from pandas.core.base import ( @@ -1026,7 +1025,7 @@ def _cumcount_array(self, ascending: bool = True): def _cython_transform( self, how: str, numeric_only: bool = True, axis: int = 0, **kwargs ): - output: Dict[base.OutputKey, np.ndarray] = {} + output: Dict[base.OutputKey, ArrayLike] = {} for idx, obj in enumerate(self._iterate_slices()): name = obj.name @@ -1054,7 +1053,7 @@ def _wrap_aggregated_output( ): raise AbstractMethodError(self) - def _wrap_transformed_output(self, output: Mapping[base.OutputKey, np.ndarray]): + def _wrap_transformed_output(self, output: Mapping[base.OutputKey, ArrayLike]): raise AbstractMethodError(self) def _wrap_applied_output(self, data, keys, values, not_indexed_same: bool = False): @@ -1099,7 +1098,7 @@ def _agg_general( def _cython_agg_general( self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1 ): - output: Dict[base.OutputKey, Union[np.ndarray, DatetimeArray]] = {} + output: Dict[base.OutputKey, ArrayLike] = {} # Ideally we would be able to enumerate self._iterate_slices and use # the index from enumeration as the key of output, but ohlc in particular # returns a (n x 4) array. Output requires 1D ndarrays as values, so we diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index c5d36d1588a5f..156b7d9df461d 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -31,6 +31,7 @@ import pandas._libs.groupby as libgroupby import pandas._libs.reduction as libreduction from pandas._typing import ( + ArrayLike, DtypeObj, F, FrameOrSeries, @@ -525,7 +526,7 @@ def _disallow_invalid_ops( @final def _ea_wrap_cython_operation( self, kind: str, values, how: str, axis: int, min_count: int = -1, **kwargs - ) -> np.ndarray: + ) -> ArrayLike: """ If we have an ExtensionArray, unwrap, call _cython_operation, and re-wrap if appropriate. @@ -577,7 +578,7 @@ def _ea_wrap_cython_operation( @final def _cython_operation( self, kind: str, values, how: str, axis: int, min_count: int = -1, **kwargs - ) -> np.ndarray: + ) -> ArrayLike: """ Returns the values of a cython operation. """ @@ -684,11 +685,11 @@ def _cython_operation( # e.g. if we are int64 and need to restore to datetime64/timedelta64 # "rank" is the only member of cython_cast_blocklist we get here dtype = maybe_cast_result_dtype(orig_values.dtype, how) - # error: Incompatible types in assignment (expression has type - # "Union[ExtensionArray, ndarray]", variable has type "ndarray") - result = maybe_downcast_to_dtype(result, dtype) # type: ignore[assignment] + op_result = maybe_downcast_to_dtype(result, dtype) + else: + op_result = result - return result + return op_result @final def _aggregate( @@ -785,14 +786,10 @@ def _aggregate_series_pure_python(self, obj: Series, func: F): counts[label] = group.shape[0] result[label] = res - result = lib.maybe_convert_objects(result, try_float=False) - # error: Incompatible types in assignment (expression has type - # "Union[ExtensionArray, ndarray]", variable has type "ndarray") - result = maybe_cast_result( # type: ignore[assignment] - result, obj, numeric_only=True - ) + out = lib.maybe_convert_objects(result, try_float=False) + out = maybe_cast_result(result, obj, numeric_only=True) - return result, counts + return out, counts class BinGrouper(BaseGrouper): From 54010723512f94a88c107d9eef4a0f1be13bbfa7 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 25 Mar 2021 17:51:41 -0700 Subject: [PATCH 2/2] typo fixup --- pandas/core/groupby/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 156b7d9df461d..467c9948cd9ae 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -787,7 +787,7 @@ def _aggregate_series_pure_python(self, obj: Series, func: F): result[label] = res out = lib.maybe_convert_objects(result, try_float=False) - out = maybe_cast_result(result, obj, numeric_only=True) + out = maybe_cast_result(out, obj, numeric_only=True) return out, counts