From d1fd5c3e3ad1a510900dbabe0056b5e15b50ec25 Mon Sep 17 00:00:00 2001 From: weikhor Date: Sun, 15 May 2022 17:58:13 +0800 Subject: [PATCH 01/12] rolling type --- pandas/core/window/rolling.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 9617e27cc2e4b..4ad3e258ee94c 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -365,7 +365,10 @@ def _prep_values(self, values: ArrayLike) -> np.ndarray: if isinstance(values, ExtensionArray): values = values.to_numpy(np.float64, na_value=np.nan) else: - values = ensure_float64(values) + if np.iscomplex(values).all(): + values = values.astype(np.complex64) + else: + values = ensure_float64(values) except (ValueError, TypeError) as err: raise TypeError(f"cannot handle this type -> {values.dtype}") from err From 2736431e43a53ece389060f8a0d6f780046e0838 Mon Sep 17 00:00:00 2001 From: weikhor Date: Sun, 15 May 2022 19:01:52 +0800 Subject: [PATCH 02/12] add empty cases --- pandas/core/window/rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 4ad3e258ee94c..239812e3ad2ff 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -365,7 +365,7 @@ def _prep_values(self, values: ArrayLike) -> np.ndarray: if isinstance(values, ExtensionArray): values = values.to_numpy(np.float64, na_value=np.nan) else: - if np.iscomplex(values).all(): + if len(values) != 0 and np.iscomplex(values).all(): values = values.astype(np.complex64) else: values = ensure_float64(values) From cdc4c6ed80ea5a9695fc70a06cbe5b2f1399ca3a Mon Sep 17 00:00:00 2001 From: weikhor Date: Sun, 15 May 2022 19:46:07 +0800 Subject: [PATCH 03/12] add test --- pandas/tests/window/test_rolling.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 9c46a7194c9c5..173a7ab3a1527 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -1860,3 +1860,13 @@ def test_rolling_mean_sum_floating_artifacts(): assert (result[-3:] == 0).all() result = r.sum() assert (result[-3:] == 0).all() + + +def test_rolling_imaginary_part_of_complex(): + # GH 46619 + + df = DataFrame([1j, 1 + 2j]) + result = df.rolling(2).apply(lambda x: print(x) is None) + expected = DataFrame([np.nan, 1.0]) + + tm.assert_frame_equal(result, expected) From ac9fd046a7ed2fd3152faa32bf40cddf969b76ac Mon Sep 17 00:00:00 2001 From: weikhor Date: Mon, 16 May 2022 23:12:35 +0800 Subject: [PATCH 04/12] use aggregation --- pandas/core/window/rolling.py | 3 ++- pandas/tests/window/test_rolling.py | 17 +++++++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 239812e3ad2ff..6a3170e9968d3 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -39,6 +39,7 @@ from pandas.core.dtypes.common import ( ensure_float64, is_bool, + is_complex_dtype, is_integer, is_list_like, is_scalar, @@ -365,7 +366,7 @@ def _prep_values(self, values: ArrayLike) -> np.ndarray: if isinstance(values, ExtensionArray): values = values.to_numpy(np.float64, na_value=np.nan) else: - if len(values) != 0 and np.iscomplex(values).all(): + if is_complex_dtype(values): values = values.astype(np.complex64) else: values = ensure_float64(values) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 59701bba8d8d1..9953238a5a0a2 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -1873,11 +1873,16 @@ def test_rolling_skew_kurt_floating_artifacts(): assert (result[-2:] == -3).all() -def test_rolling_imaginary_part_of_complex(): +def test_rolling_imaginary_part_of_complex(arithmetic_win_operators): # GH 46619 - + func_name = arithmetic_win_operators df = DataFrame([1j, 1 + 2j]) - result = df.rolling(2).apply(lambda x: print(x) is None) - expected = DataFrame([np.nan, 1.0]) - - tm.assert_frame_equal(result, expected) + result = getattr( + df.rolling(2).apply(lambda x: print(x) is None), + func_name, + )() + expected = getattr( + DataFrame([np.nan, 1.0]), + func_name, + )() + tm.assert_series_equal(result, expected) From 098dc16f687fde911805ad6deabd57e81a73cef3 Mon Sep 17 00:00:00 2001 From: weikhor Date: Sun, 22 May 2022 15:59:13 +0800 Subject: [PATCH 05/12] test rolling --- pandas/core/window/rolling.py | 3 +++ pandas/tests/window/test_rolling.py | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 6a3170e9968d3..93e2f8ba6b6af 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -608,6 +608,9 @@ def calc(x): ) self._check_window_bounds(start, end, len(x)) + if is_complex_dtype(x): + x = ensure_float64(x) + return func(x, start, end, min_periods, *numba_args) with np.errstate(all="ignore"): diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 9953238a5a0a2..49eefa66f802b 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -1878,11 +1878,11 @@ def test_rolling_imaginary_part_of_complex(arithmetic_win_operators): func_name = arithmetic_win_operators df = DataFrame([1j, 1 + 2j]) result = getattr( - df.rolling(2).apply(lambda x: print(x) is None), + df.rolling(2), func_name, )() expected = getattr( - DataFrame([np.nan, 1.0]), + DataFrame([0, 1]).rolling(2), func_name, )() - tm.assert_series_equal(result, expected) + tm.assert_frame_equal(result, expected) From dc9cd7e8a68ddb0ca18268786c61710085b1e79f Mon Sep 17 00:00:00 2001 From: weikhor Date: Sun, 22 May 2022 19:29:06 +0800 Subject: [PATCH 06/12] count --- pandas/_libs/window/aggregations.pyx | 34 ++++++++++++++++++++-------- pandas/core/window/rolling.py | 3 --- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 68c05f2bb2c98..f24779427b219 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -14,6 +14,8 @@ import numpy as np cimport numpy as cnp from numpy cimport ( + complex64_t, + complex128_t, float32_t, float64_t, int64_t, @@ -26,6 +28,11 @@ from pandas._libs.algos import is_monotonic from pandas._libs.dtypes cimport numeric_t +from pandas.core.dtypes.common import ensure_float64 + +ctypedef fused float_complex_t: + float64_t + complex64_t cdef extern from "../src/skiplist.h": ctypedef struct node_t: @@ -129,9 +136,10 @@ cdef inline void remove_sum(float64_t val, int64_t *nobs, float64_t *sum_x, sum_x[0] = t -def roll_sum(const float64_t[:] values, ndarray[int64_t] start, +def roll_sum(float_complex_t[:] float_complex_values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp) -> np.ndarray: cdef: + float64_t[:] values = ensure_float64(float_complex_values) Py_ssize_t i, j float64_t sum_x, compensation_add, compensation_remove, prev_value int64_t s, e, num_consecutive_same_value @@ -234,7 +242,6 @@ cdef inline void add_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, num_consecutive_same_value[0] = 1 prev_value[0] = val - cdef inline void remove_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, Py_ssize_t *neg_ct, float64_t *compensation) nogil: """ remove a value from the mean calc using Kahan summation """ @@ -251,9 +258,10 @@ cdef inline void remove_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, neg_ct[0] = neg_ct[0] - 1 -def roll_mean(const float64_t[:] values, ndarray[int64_t] start, +def roll_mean(float_complex_t[:] float_complex_values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp) -> np.ndarray: cdef: + float64_t[:] values = ensure_float64(float_complex_values) float64_t val, compensation_add, compensation_remove, sum_x, prev_value int64_t s, e, num_consecutive_same_value Py_ssize_t nobs, i, j, neg_ct, N = len(start) @@ -387,12 +395,13 @@ cdef inline void remove_var(float64_t val, float64_t *nobs, float64_t *mean_x, ssqdm_x[0] = 0 -def roll_var(const float64_t[:] values, ndarray[int64_t] start, +def roll_var(float_complex_t[:] float_complex_values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp, int ddof=1) -> np.ndarray: """ Numerically stable implementation using Welford's method. """ cdef: + float64_t[:] values = ensure_float64(float_complex_values) float64_t mean_x, ssqdm_x, nobs, compensation_add, float64_t compensation_remove, prev_value int64_t s, e, num_consecutive_same_value @@ -562,9 +571,10 @@ cdef inline void remove_skew(float64_t val, int64_t *nobs, xxx[0] = t -def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start, +def roll_skew(float_complex_t[:] float_complex_values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp) -> np.ndarray: cdef: + float64_t[:] values = ensure_float64(float_complex_values) Py_ssize_t i, j float64_t val, prev, min_val, mean_val, sum_val = 0 float64_t compensation_xxx_add, compensation_xxx_remove @@ -775,9 +785,10 @@ cdef inline void remove_kurt(float64_t val, int64_t *nobs, xxxx[0] = t -def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start, +def roll_kurt(float_complex_t[:] float_complex_values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp) -> np.ndarray: cdef: + float64_t[:] values = ensure_float64(float_complex_values) Py_ssize_t i, j float64_t val, prev, mean_val, min_val, sum_val = 0 float64_t compensation_xxxx_add, compensation_xxxx_remove @@ -869,9 +880,10 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start, # Rolling median, min, max -def roll_median_c(const float64_t[:] values, ndarray[int64_t] start, +def roll_median_c(float_complex_t[:] float_complex_values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp) -> np.ndarray: cdef: + float64_t[:] values = ensure_float64(float_complex_values) Py_ssize_t i, j bint err = False, is_monotonic_increasing_bounds int midpoint, ret = 0 @@ -1010,7 +1022,7 @@ cdef inline numeric_t calc_mm(int64_t minp, Py_ssize_t nobs, return result -def roll_max(ndarray[float64_t] values, ndarray[int64_t] start, +def roll_max(float_complex_t[:] float_complex_values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp) -> np.ndarray: """ Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. @@ -1031,10 +1043,12 @@ def roll_max(ndarray[float64_t] values, ndarray[int64_t] start, ------- np.ndarray[float] """ + cdef: + ndarray[float64_t] values = ensure_float64(float_complex_values) return _roll_min_max(values, start, end, minp, is_max=1) -def roll_min(ndarray[float64_t] values, ndarray[int64_t] start, +def roll_min(float_complex_t[:] float_complex_values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp) -> np.ndarray: """ Moving min of 1d array of any numeric type along axis=0 ignoring NaNs. @@ -1052,6 +1066,8 @@ def roll_min(ndarray[float64_t] values, ndarray[int64_t] start, ------- np.ndarray[float] """ + cdef: + ndarray[float64_t] values = ensure_float64(float_complex_values) return _roll_min_max(values, start, end, minp, is_max=0) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 93e2f8ba6b6af..6a3170e9968d3 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -608,9 +608,6 @@ def calc(x): ) self._check_window_bounds(start, end, len(x)) - if is_complex_dtype(x): - x = ensure_float64(x) - return func(x, start, end, min_periods, *numba_args) with np.errstate(all="ignore"): From beaa1aef5e3234f35c78621f6da6f00c63788abb Mon Sep 17 00:00:00 2001 From: weikhor Date: Sun, 22 May 2022 19:30:45 +0800 Subject: [PATCH 07/12] remove type dependency --- pandas/_libs/window/aggregations.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index f24779427b219..85e66d5fab09a 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -15,7 +15,6 @@ import numpy as np cimport numpy as cnp from numpy cimport ( complex64_t, - complex128_t, float32_t, float64_t, int64_t, From 1dcfa05c5d94f00d9e4a478b52f0025dda2d84d4 Mon Sep 17 00:00:00 2001 From: weikhor Date: Sun, 22 May 2022 21:14:37 +0800 Subject: [PATCH 08/12] add buffer source array is read-only --- pandas/tests/window/test_rolling.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 49eefa66f802b..46f957ad40e5d 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -572,7 +572,6 @@ def test_rolling_axis_count(axis_frame): def test_readonly_array(): # GH-27766 arr = np.array([1, 3, np.nan, 3, 5]) - arr.setflags(write=False) result = Series(arr).rolling(2).mean() expected = Series([np.nan, 2, np.nan, np.nan, 4]) tm.assert_series_equal(result, expected) From 6e4259bbfa02053cf43f33881ba691ea07179d0d Mon Sep 17 00:00:00 2001 From: weikhor Date: Mon, 23 May 2022 23:48:32 +0800 Subject: [PATCH 09/12] revert --- pandas/_libs/window/aggregations.pyx | 33 ++++++++-------------------- pandas/tests/window/test_rolling.py | 1 + 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 85e66d5fab09a..68c05f2bb2c98 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -14,7 +14,6 @@ import numpy as np cimport numpy as cnp from numpy cimport ( - complex64_t, float32_t, float64_t, int64_t, @@ -27,11 +26,6 @@ from pandas._libs.algos import is_monotonic from pandas._libs.dtypes cimport numeric_t -from pandas.core.dtypes.common import ensure_float64 - -ctypedef fused float_complex_t: - float64_t - complex64_t cdef extern from "../src/skiplist.h": ctypedef struct node_t: @@ -135,10 +129,9 @@ cdef inline void remove_sum(float64_t val, int64_t *nobs, float64_t *sum_x, sum_x[0] = t -def roll_sum(float_complex_t[:] float_complex_values, ndarray[int64_t] start, +def roll_sum(const float64_t[:] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp) -> np.ndarray: cdef: - float64_t[:] values = ensure_float64(float_complex_values) Py_ssize_t i, j float64_t sum_x, compensation_add, compensation_remove, prev_value int64_t s, e, num_consecutive_same_value @@ -241,6 +234,7 @@ cdef inline void add_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, num_consecutive_same_value[0] = 1 prev_value[0] = val + cdef inline void remove_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, Py_ssize_t *neg_ct, float64_t *compensation) nogil: """ remove a value from the mean calc using Kahan summation """ @@ -257,10 +251,9 @@ cdef inline void remove_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, neg_ct[0] = neg_ct[0] - 1 -def roll_mean(float_complex_t[:] float_complex_values, ndarray[int64_t] start, +def roll_mean(const float64_t[:] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp) -> np.ndarray: cdef: - float64_t[:] values = ensure_float64(float_complex_values) float64_t val, compensation_add, compensation_remove, sum_x, prev_value int64_t s, e, num_consecutive_same_value Py_ssize_t nobs, i, j, neg_ct, N = len(start) @@ -394,13 +387,12 @@ cdef inline void remove_var(float64_t val, float64_t *nobs, float64_t *mean_x, ssqdm_x[0] = 0 -def roll_var(float_complex_t[:] float_complex_values, ndarray[int64_t] start, +def roll_var(const float64_t[:] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp, int ddof=1) -> np.ndarray: """ Numerically stable implementation using Welford's method. """ cdef: - float64_t[:] values = ensure_float64(float_complex_values) float64_t mean_x, ssqdm_x, nobs, compensation_add, float64_t compensation_remove, prev_value int64_t s, e, num_consecutive_same_value @@ -570,10 +562,9 @@ cdef inline void remove_skew(float64_t val, int64_t *nobs, xxx[0] = t -def roll_skew(float_complex_t[:] float_complex_values, ndarray[int64_t] start, +def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp) -> np.ndarray: cdef: - float64_t[:] values = ensure_float64(float_complex_values) Py_ssize_t i, j float64_t val, prev, min_val, mean_val, sum_val = 0 float64_t compensation_xxx_add, compensation_xxx_remove @@ -784,10 +775,9 @@ cdef inline void remove_kurt(float64_t val, int64_t *nobs, xxxx[0] = t -def roll_kurt(float_complex_t[:] float_complex_values, ndarray[int64_t] start, +def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp) -> np.ndarray: cdef: - float64_t[:] values = ensure_float64(float_complex_values) Py_ssize_t i, j float64_t val, prev, mean_val, min_val, sum_val = 0 float64_t compensation_xxxx_add, compensation_xxxx_remove @@ -879,10 +869,9 @@ def roll_kurt(float_complex_t[:] float_complex_values, ndarray[int64_t] start, # Rolling median, min, max -def roll_median_c(float_complex_t[:] float_complex_values, ndarray[int64_t] start, +def roll_median_c(const float64_t[:] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp) -> np.ndarray: cdef: - float64_t[:] values = ensure_float64(float_complex_values) Py_ssize_t i, j bint err = False, is_monotonic_increasing_bounds int midpoint, ret = 0 @@ -1021,7 +1010,7 @@ cdef inline numeric_t calc_mm(int64_t minp, Py_ssize_t nobs, return result -def roll_max(float_complex_t[:] float_complex_values, ndarray[int64_t] start, +def roll_max(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp) -> np.ndarray: """ Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. @@ -1042,12 +1031,10 @@ def roll_max(float_complex_t[:] float_complex_values, ndarray[int64_t] start, ------- np.ndarray[float] """ - cdef: - ndarray[float64_t] values = ensure_float64(float_complex_values) return _roll_min_max(values, start, end, minp, is_max=1) -def roll_min(float_complex_t[:] float_complex_values, ndarray[int64_t] start, +def roll_min(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp) -> np.ndarray: """ Moving min of 1d array of any numeric type along axis=0 ignoring NaNs. @@ -1065,8 +1052,6 @@ def roll_min(float_complex_t[:] float_complex_values, ndarray[int64_t] start, ------- np.ndarray[float] """ - cdef: - ndarray[float64_t] values = ensure_float64(float_complex_values) return _roll_min_max(values, start, end, minp, is_max=0) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 46f957ad40e5d..49eefa66f802b 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -572,6 +572,7 @@ def test_rolling_axis_count(axis_frame): def test_readonly_array(): # GH-27766 arr = np.array([1, 3, np.nan, 3, 5]) + arr.setflags(write=False) result = Series(arr).rolling(2).mean() expected = Series([np.nan, 2, np.nan, np.nan, 4]) tm.assert_series_equal(result, expected) From 115feaf4a9d510234764dedeae0ddd7ca47c0078 Mon Sep 17 00:00:00 2001 From: weikhor Date: Tue, 24 May 2022 00:05:12 +0800 Subject: [PATCH 10/12] update complex --- pandas/core/window/rolling.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 27512f3be9e0e..56f748d5b1c11 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -606,6 +606,25 @@ def calc(x): ) self._check_window_bounds(start, end, len(x)) + arithmetic_win_operators = [ + "sum", + "mean", + "median", + "max", + "min", + "var", + "std", + "kurt", + "skew", + "count", + "sem", + ] + + if is_complex_dtype(x) and any( + ext in func.__name__ for ext in arithmetic_win_operators + ): + x = ensure_float64(x) + return func(x, start, end, min_periods, *numba_args) with np.errstate(all="ignore"): From 7bdde2460b26ffffd15c47fdf83bb9f9df1039e4 Mon Sep 17 00:00:00 2001 From: weikhor Date: Tue, 24 May 2022 00:06:23 +0800 Subject: [PATCH 11/12] update complex --- pandas/core/window/rolling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 56f748d5b1c11..ced346182ef7b 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -606,7 +606,7 @@ def calc(x): ) self._check_window_bounds(start, end, len(x)) - arithmetic_win_operators = [ + arithmetic_operators = [ "sum", "mean", "median", @@ -621,7 +621,7 @@ def calc(x): ] if is_complex_dtype(x) and any( - ext in func.__name__ for ext in arithmetic_win_operators + operator in func.__name__ for operator in arithmetic_operators ): x = ensure_float64(x) From 9f275deda6f261c3b4bc04d10ac677a96165f0f0 Mon Sep 17 00:00:00 2001 From: weikhor Date: Mon, 6 Jun 2022 19:41:27 +0800 Subject: [PATCH 12/12] add test --- pandas/_libs/window/aggregations.pyx | 105 +++++++++++++++++++++------ pandas/core/window/rolling.py | 21 +----- 2 files changed, 84 insertions(+), 42 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 68c05f2bb2c98..a81c9182c04d2 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -14,6 +14,7 @@ import numpy as np cimport numpy as cnp from numpy cimport ( + complex64_t, float32_t, float64_t, int64_t, @@ -65,6 +66,10 @@ cdef bint is_monotonic_increasing_start_end_bounds( ): return is_monotonic(start, False)[0] and is_monotonic(end, False)[0] +ctypedef fused float_complex_types: + float64_t + complex64_t + # ---------------------------------------------------------------------- # Rolling sum @@ -129,7 +134,7 @@ cdef inline void remove_sum(float64_t val, int64_t *nobs, float64_t *sum_x, sum_x[0] = t -def roll_sum(const float64_t[:] values, ndarray[int64_t] start, +def roll_sum(float_complex_types[:] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp) -> np.ndarray: cdef: Py_ssize_t i, j @@ -151,25 +156,39 @@ def roll_sum(const float64_t[:] values, ndarray[int64_t] start, e = end[i] if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]: - + if float_complex_types is complex64_t: + prev_value = values[s].real + else: + prev_value = values[s] # setup - prev_value = values[s] num_consecutive_same_value = 0 sum_x = compensation_add = compensation_remove = 0 nobs = 0 for j in range(s, e): - add_sum(values[j], &nobs, &sum_x, &compensation_add, + if float_complex_types is complex64_t: + prev_value = values[j].real + else: + prev_value = values[j] + add_sum(prev_value, &nobs, &sum_x, &compensation_add, &num_consecutive_same_value, &prev_value) else: # calculate deletes + if float_complex_types is complex64_t: + prev_value = values[j].real + else: + prev_value = values[j] for j in range(start[i - 1], s): - remove_sum(values[j], &nobs, &sum_x, &compensation_remove) + remove_sum(prev_value, &nobs, &sum_x, &compensation_remove) # calculate adds for j in range(end[i - 1], e): - add_sum(values[j], &nobs, &sum_x, &compensation_add, + if float_complex_types is complex64_t: + prev_value = values[j].real + else: + prev_value = values[j] + add_sum(prev_value, &nobs, &sum_x, &compensation_add, &num_consecutive_same_value, &prev_value) output[i] = calc_sum(minp, nobs, sum_x, num_consecutive_same_value, prev_value) @@ -251,7 +270,7 @@ cdef inline void remove_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, neg_ct[0] = neg_ct[0] - 1 -def roll_mean(const float64_t[:] values, ndarray[int64_t] start, +def roll_mean(float_complex_types[:] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp) -> np.ndarray: cdef: float64_t val, compensation_add, compensation_remove, sum_x, prev_value @@ -276,10 +295,18 @@ def roll_mean(const float64_t[:] values, ndarray[int64_t] start, # setup compensation_add = compensation_remove = sum_x = 0 nobs = neg_ct = 0 - prev_value = values[s] + + if float_complex_types is complex64_t: + prev_value = values[s].real + else: + prev_value = values[s] + num_consecutive_same_value = 0 for j in range(s, e): - val = values[j] + if float_complex_types is complex64_t: + val = values[j].real + else: + val = values[j] add_mean(val, &nobs, &sum_x, &neg_ct, &compensation_add, &num_consecutive_same_value, &prev_value) @@ -287,12 +314,18 @@ def roll_mean(const float64_t[:] values, ndarray[int64_t] start, # calculate deletes for j in range(start[i - 1], s): - val = values[j] + if float_complex_types is complex64_t: + val = values[j].real + else: + val = values[j] remove_mean(val, &nobs, &sum_x, &neg_ct, &compensation_remove) # calculate adds for j in range(end[i - 1], e): - val = values[j] + if float_complex_types is complex64_t: + val = values[j].real + else: + val = values[j] add_mean(val, &nobs, &sum_x, &neg_ct, &compensation_add, &num_consecutive_same_value, &prev_value) @@ -387,14 +420,14 @@ cdef inline void remove_var(float64_t val, float64_t *nobs, float64_t *mean_x, ssqdm_x[0] = 0 -def roll_var(const float64_t[:] values, ndarray[int64_t] start, +def roll_var(float_complex_types[:] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp, int ddof=1) -> np.ndarray: """ Numerically stable implementation using Welford's method. """ cdef: float64_t mean_x, ssqdm_x, nobs, compensation_add, - float64_t compensation_remove, prev_value + float64_t compensation_remove, prev_value, val int64_t s, e, num_consecutive_same_value Py_ssize_t i, j, N = len(start) ndarray[float64_t] output @@ -417,12 +450,20 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start, # never removed if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]: - prev_value = values[s] + if float_complex_types is complex64_t: + prev_value = values[s].real + else: + prev_value = values[s] + num_consecutive_same_value = 0 mean_x = ssqdm_x = nobs = compensation_add = compensation_remove = 0 for j in range(s, e): - add_var(values[j], &nobs, &mean_x, &ssqdm_x, &compensation_add, + if float_complex_types is complex64_t: + val = values[j].real + else: + val = values[j] + add_var(val, &nobs, &mean_x, &ssqdm_x, &compensation_add, &num_consecutive_same_value, &prev_value) else: @@ -432,12 +473,20 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start, # calculate deletes for j in range(start[i - 1], s): - remove_var(values[j], &nobs, &mean_x, &ssqdm_x, + if float_complex_types is complex64_t: + val = values[j].real + else: + val = values[j] + remove_var(val, &nobs, &mean_x, &ssqdm_x, &compensation_remove) # calculate adds for j in range(end[i - 1], e): - add_var(values[j], &nobs, &mean_x, &ssqdm_x, &compensation_add, + if float_complex_types is complex64_t: + val = values[j].real + else: + val = values[j] + add_var(val, &nobs, &mean_x, &ssqdm_x, &compensation_add, &num_consecutive_same_value, &prev_value) output[i] = calc_var(minp, ddof, nobs, ssqdm_x, num_consecutive_same_value) @@ -562,7 +611,7 @@ cdef inline void remove_skew(float64_t val, int64_t *nobs, xxx[0] = t -def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start, +def roll_skew(ndarray[float_complex_types] fused_values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp) -> np.ndarray: cdef: Py_ssize_t i, j @@ -572,10 +621,16 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start, float64_t compensation_x_add, compensation_x_remove float64_t x, xx, xxx float64_t prev_value - int64_t nobs = 0, N = len(start), V = len(values), nobs_mean = 0 + ndarray[float64_t] values + int64_t nobs = 0, N = len(start), V = len(fused_values), nobs_mean = 0 int64_t s, e, num_consecutive_same_value ndarray[float64_t] output, mean_array, values_copy - bint is_monotonic_increasing_bounds + bint is_monotonic_increasing_bound + + if float_complex_types is complex64_t: + values = fused_values.real.astype(float) + else: + values = fused_values minp = max(minp, 3) is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds( @@ -775,7 +830,7 @@ cdef inline void remove_kurt(float64_t val, int64_t *nobs, xxxx[0] = t -def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start, +def roll_kurt(ndarray[float_complex_types] fused_values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp) -> np.ndarray: cdef: Py_ssize_t i, j @@ -786,11 +841,17 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start, float64_t compensation_x_remove, compensation_x_add float64_t x, xx, xxx, xxxx float64_t prev_value + ndarray[float64_t] values int64_t nobs, s, e, num_consecutive_same_value - int64_t N = len(start), V = len(values), nobs_mean = 0 + int64_t N = len(start), V = len(fused_values), nobs_mean = 0 ndarray[float64_t] output, values_copy bint is_monotonic_increasing_bounds + if float_complex_types is complex64_t: + values = fused_values.real.astype(float) + else: + values = fused_values + minp = max(minp, 4) is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds( start, end diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 8e1b9862b0317..e6b20cb085539 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -605,26 +605,7 @@ def calc(x): step=self.step, ) self._check_window_bounds(start, end, len(x)) - - arithmetic_operators = [ - "sum", - "mean", - "median", - "max", - "min", - "var", - "std", - "kurt", - "skew", - "count", - "sem", - ] - - if is_complex_dtype(x) and any( - operator in func.__name__ for operator in arithmetic_operators - ): - x = ensure_float64(x) - + # x = ensure_float64(x) return func(x, start, end, min_periods, *numba_args) with np.errstate(all="ignore"):