Skip to content

CLN: rolling.py and window/aggregations.pyx #38768

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Dec 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 9 additions & 39 deletions pandas/_libs/window/aggregations.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -55,39 +55,11 @@ cdef:

float64_t NaN = <float64_t>np.NaN

cdef inline int int_max(int a, int b): return a if a >= b else b
cdef inline int int_min(int a, int b): return a if a <= b else b

cdef bint is_monotonic_increasing_start_end_bounds(
ndarray[int64_t, ndim=1] start, ndarray[int64_t, ndim=1] end
):
return is_monotonic(start, False)[0] and is_monotonic(end, False)[0]

# Cython implementations of rolling sum, mean, variance, skewness,
# other statistical moment functions
#
# Misc implementation notes
# -------------------------
#
# - In Cython x * x is faster than x ** 2 for C types, this should be
# periodically revisited to see if it's still true.
#

# original C implementation by N. Devillard.
# This code in public domain.
# Function : kth_smallest()
# In : array of elements, # of elements in the array, rank k
# Out : one element
# Job : find the kth smallest element in the array

# Reference:

# Author: Wirth, Niklaus
# Title: Algorithms + data structures = programs
# Publisher: Englewood Cliffs: Prentice-Hall, 1976
# Physical description: 366 p.
# Series: Prentice-Hall Series in Automatic Computation

# ----------------------------------------------------------------------
# Rolling sum

Expand Down Expand Up @@ -774,7 +746,6 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,

def roll_median_c(const float64_t[:] values, ndarray[int64_t] start,
ndarray[int64_t] end, int64_t minp):
# GH 32865. win argument kept for compatibility
cdef:
float64_t val, res, prev
bint err = False
Expand Down Expand Up @@ -1167,9 +1138,8 @@ def roll_apply(object obj,
arr = np.asarray(obj)

# ndarray input
if raw:
if not arr.flags.c_contiguous:
arr = arr.copy('C')
if raw and not arr.flags.c_contiguous:
arr = arr.copy('C')

counts = roll_sum(np.isfinite(arr).astype(float), start, end, minp)

Expand All @@ -1195,17 +1165,17 @@ def roll_apply(object obj,
# Rolling sum and mean for weighted window


def roll_weighted_sum(float64_t[:] values, float64_t[:] weights, int minp):
def roll_weighted_sum(const float64_t[:] values, const float64_t[:] weights, int minp):
return _roll_weighted_sum_mean(values, weights, minp, avg=0)


def roll_weighted_mean(float64_t[:] values, float64_t[:] weights, int minp):
def roll_weighted_mean(const float64_t[:] values, const float64_t[:] weights, int minp):
return _roll_weighted_sum_mean(values, weights, minp, avg=1)


cdef ndarray[float64_t] _roll_weighted_sum_mean(float64_t[:] values,
float64_t[:] weights,
int minp, bint avg):
cdef float64_t[:] _roll_weighted_sum_mean(const float64_t[:] values,
const float64_t[:] weights,
int minp, bint avg):
"""
Assume len(weights) << len(values)
"""
Expand Down Expand Up @@ -1270,7 +1240,7 @@ cdef ndarray[float64_t] _roll_weighted_sum_mean(float64_t[:] values,
if c < minp:
output[in_i] = NaN

return np.asarray(output)
return output


# ----------------------------------------------------------------------
Expand Down Expand Up @@ -1424,7 +1394,7 @@ cdef inline void remove_weighted_var(float64_t val,
mean[0] = 0


def roll_weighted_var(float64_t[:] values, float64_t[:] weights,
def roll_weighted_var(const float64_t[:] values, const float64_t[:] weights,
int64_t minp, unsigned int ddof):
"""
Calculates weighted rolling variance using West's online algorithm.
Expand Down
20 changes: 7 additions & 13 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,9 +174,8 @@ def _create_data(self, obj: FrameOrSeries) -> FrameOrSeries:
Split data into blocks & return conformed data.
"""
# filter out the on from the object
if self.on is not None and not isinstance(self.on, Index):
if obj.ndim == 2:
obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False)
if self.on is not None and not isinstance(self.on, Index) and obj.ndim == 2:
obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False)
if self.axis == 1:
# GH: 20649 in case of mixed dtype and axis=1 we have to convert everything
# to float to calculate the complete row at once. We exclude all non-numeric
Expand Down Expand Up @@ -238,10 +237,6 @@ def _get_cov_corr_window(
"""
return self.window

@property
def _window_type(self) -> str:
return type(self).__name__

def __repr__(self) -> str:
"""
Provide a nice str repr of our rolling object.
Expand All @@ -252,7 +247,7 @@ def __repr__(self) -> str:
if getattr(self, attr_name, None) is not None
)
attrs = ",".join(attrs_list)
return f"{self._window_type} [{attrs}]"
return f"{type(self).__name__} [{attrs}]"

def __iter__(self):
obj = self._create_data(self._selected_obj)
Expand All @@ -278,7 +273,7 @@ def _prep_values(self, values: Optional[np.ndarray] = None) -> np.ndarray:

if needs_i8_conversion(values.dtype):
raise NotImplementedError(
f"ops for {self._window_type} for this "
f"ops for {type(self).__name__} for this "
f"dtype {values.dtype} are not implemented"
)
else:
Expand Down Expand Up @@ -464,7 +459,6 @@ def calc(x):
result = np.apply_along_axis(calc, self.axis, values)
else:
result = calc(values)
result = np.asarray(result)

if numba_cache_key is not None:
NUMBA_FUNC_CACHE[numba_cache_key] = func
Expand Down Expand Up @@ -1102,8 +1096,8 @@ def calc(x):
if values.ndim > 1:
result = np.apply_along_axis(calc, self.axis, values)
else:
result = calc(values)
result = np.asarray(result)
# Our weighted aggregations return memoryviews
result = np.asarray(calc(values))

if self.center:
result = self._center_window(result, offset)
Expand Down Expand Up @@ -2158,7 +2152,7 @@ def _validate_monotonic(self):
"""
Validate that on is monotonic;
in this case we have to check only for nans, because
monotonicy was already validated at a higher level.
monotonicity was already validated at a higher level.
"""
if self._on.hasnans:
self._raise_monotonic_error()