diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 3ba4c2375b4e8..2b60b0dc01d19 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -77,7 +77,9 @@ class NegInfinity(object): __ge__ = lambda self, other: isinstance(other, NegInfinity) -cpdef ndarray[int64_t, ndim=1] unique_deltas(ndarray[int64_t] arr): +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef ndarray[int64_t, ndim=1] unique_deltas(int64_t[:] arr): """ Efficiently find the unique first-differences of the given array. @@ -98,6 +100,8 @@ cpdef ndarray[int64_t, ndim=1] unique_deltas(ndarray[int64_t] arr): int ret = 0 list uniques = [] + util.require_not_none(arr) + table = kh_init_int64() kh_resize_int64(table, 10) for i in range(n - 1): @@ -151,7 +155,7 @@ def is_lexsorted(list_of_arrays: list) -> bint: @cython.boundscheck(False) @cython.wraparound(False) -def groupsort_indexer(ndarray[int64_t] index, Py_ssize_t ngroups): +def groupsort_indexer(int64_t[:] index, Py_ssize_t ngroups): """ compute a 1-d indexer that is an ordering of the passed index, ordered by the groups. This is a reverse of the label @@ -171,6 +175,8 @@ def groupsort_indexer(ndarray[int64_t] index, Py_ssize_t ngroups): Py_ssize_t i, loc, label, n ndarray[int64_t] counts, where, result + util.require_not_none(index) + counts = np.zeros(ngroups + 1, dtype=np.int64) n = len(index) result = np.zeros(n, dtype=np.int64) @@ -236,7 +242,7 @@ def nancorr(ndarray[float64_t, ndim=2] mat, bint cov=0, minp=None): Py_ssize_t i, j, xi, yi, N, K bint minpv ndarray[float64_t, ndim=2] result - ndarray[uint8_t, ndim=2] mask + uint8_t[:, :] mask int64_t nobs = 0 float64_t vx, vy, sumx, sumy, sumxx, sumyy, meanx, meany, divisor @@ -301,7 +307,7 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1): ndarray[float64_t, ndim=2] result ndarray[float64_t, ndim=1] maskedx ndarray[float64_t, ndim=1] maskedy - ndarray[uint8_t, ndim=2] mask + uint8_t[:, :] mask int64_t nobs = 0 float64_t vx, vy, sumx, sumxx, sumyy, mean, divisor @@ -373,7 +379,7 @@ ctypedef fused algos_t: # TODO: unused; needed? @cython.wraparound(False) @cython.boundscheck(False) -cpdef map_indices(ndarray[algos_t] index): +cpdef map_indices(algos_t[:] index): """ Produce a dict mapping the values of the input array to their respective locations. @@ -387,6 +393,8 @@ cpdef map_indices(ndarray[algos_t] index): Py_ssize_t i, length dict result = {} + util.require_not_none(index) + length = len(index) for i in range(length): @@ -397,13 +405,16 @@ cpdef map_indices(ndarray[algos_t] index): @cython.boundscheck(False) @cython.wraparound(False) -def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None): +def pad(algos_t[:] old, algos_t[:] new, limit=None): cdef: Py_ssize_t i, j, nleft, nright ndarray[int64_t, ndim=1] indexer algos_t cur, next int lim, fill_count = 0 + util.require_not_none(old) + util.require_not_none(new) + nleft = len(old) nright = len(new) indexer = np.empty(nright, dtype=np.int64) @@ -475,14 +486,15 @@ pad_bool = pad["uint8_t"] @cython.boundscheck(False) @cython.wraparound(False) -def pad_inplace(ndarray[algos_t] values, - ndarray[uint8_t, cast=True] mask, - limit=None): +def pad_inplace(algos_t[:] values, uint8_t[:] mask, limit=None): cdef: Py_ssize_t i, N algos_t val int lim, fill_count = 0 + util.require_not_none(values) + util.require_not_none(mask) + N = len(values) # GH#2778 @@ -521,14 +533,15 @@ pad_inplace_bool = pad_inplace["uint8_t"] @cython.boundscheck(False) @cython.wraparound(False) -def pad_2d_inplace(ndarray[algos_t, ndim=2] values, - ndarray[uint8_t, ndim=2] mask, - limit=None): +def pad_2d_inplace(algos_t[:, :] values, uint8_t[:, :] mask, limit=None): cdef: Py_ssize_t i, j, N, K algos_t val int lim, fill_count = 0 + util.require_not_none(values) + util.require_not_none(mask) + K, N = ( values).shape # GH#2778 @@ -595,13 +608,16 @@ D @cython.boundscheck(False) @cython.wraparound(False) -def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None): +def backfill(algos_t[:] old, algos_t[:] new, limit=None): cdef: Py_ssize_t i, j, nleft, nright ndarray[int64_t, ndim=1] indexer algos_t cur, prev int lim, fill_count = 0 + util.require_not_none(old) + util.require_not_none(new) + nleft = len(old) nright = len(new) indexer = np.empty(nright, dtype=np.int64) @@ -674,14 +690,15 @@ backfill_bool = backfill["uint8_t"] @cython.boundscheck(False) @cython.wraparound(False) -def backfill_inplace(ndarray[algos_t] values, - ndarray[uint8_t, cast=True] mask, - limit=None): +def backfill_inplace(algos_t[:] values, uint8_t[:] mask, limit=None): cdef: Py_ssize_t i, N algos_t val int lim, fill_count = 0 + util.require_not_none(values) + util.require_not_none(mask) + N = len(values) # GH#2778 @@ -720,14 +737,15 @@ backfill_inplace_bool = backfill_inplace["uint8_t"] @cython.boundscheck(False) @cython.wraparound(False) -def backfill_2d_inplace(ndarray[algos_t, ndim=2] values, - ndarray[uint8_t, ndim=2] mask, - limit=None): +def backfill_2d_inplace(algos_t[:, :] values, uint8_t[:, :] mask, limit=None): cdef: Py_ssize_t i, j, N, K algos_t val int lim, fill_count = 0 + util.require_not_none(values) + util.require_not_none(mask) + K, N = ( values).shape # GH#2778 @@ -768,14 +786,16 @@ backfill_2d_inplace_bool = backfill_2d_inplace["uint8_t"] @cython.wraparound(False) @cython.boundscheck(False) -def arrmap(ndarray[algos_t] index, object func): +def arrmap(algos_t[:] index, object func): cdef: Py_ssize_t length = index.shape[0] Py_ssize_t i = 0 - ndarray[object] result = np.empty(length, dtype=np.object_) + object[:] result = np.empty(length, dtype=np.object_) from pandas._libs.lib import maybe_convert_objects + util.require_not_none(index) + for i in range(length): result[i] = func(index[i]) @@ -793,7 +813,7 @@ arrmap_bool = arrmap["uint8_t"] @cython.boundscheck(False) @cython.wraparound(False) -def is_monotonic(ndarray[algos_t] arr, bint timelike): +def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike): """ Returns ------- diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in index 2835c95c96575..a10f0493166a0 100644 --- a/pandas/_libs/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -29,7 +29,7 @@ def get_dispatch(dtypes): @cython.boundscheck(False) @cython.wraparound(False) def diff_2d_{{name}}(ndarray[{{c_type}}, ndim=2] arr, - ndarray[{{dest_type}}, ndim=2] out, + {{dest_type}}[:, :] out, Py_ssize_t periods, int axis): cdef: Py_ssize_t i, j, sx, sy @@ -72,8 +72,8 @@ def diff_2d_{{name}}(ndarray[{{c_type}}, ndim=2] arr, def put2d_{{name}}_{{dest_name}}(ndarray[{{c_type}}, ndim=2, cast=True] values, - ndarray[int64_t] indexer, Py_ssize_t loc, - ndarray[{{dest_type}}] out): + int64_t[:] indexer, Py_ssize_t loc, + {{dest_type}}[:, :] out): cdef: Py_ssize_t i, j, k diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index d683c93c9b32e..45d9c3f111b5d 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -14,7 +14,7 @@ from numpy cimport (ndarray, cnp.import_array() -from util cimport numeric, get_nat +from util cimport numeric, get_nat, require_not_none from algos cimport (swap, TiebreakEnumType, TIEBREAK_AVERAGE, TIEBREAK_MIN, TIEBREAK_MAX, TIEBREAK_FIRST, TIEBREAK_DENSE) @@ -98,21 +98,24 @@ cdef inline float64_t kth_smallest_c(float64_t* a, @cython.boundscheck(False) @cython.wraparound(False) -def group_median_float64(ndarray[float64_t, ndim=2] out, - ndarray[int64_t] counts, +def group_median_float64(float64_t[:, :] out, + int64_t[:] counts, ndarray[float64_t, ndim=2] values, - ndarray[int64_t] labels, + int64_t[:] labels, Py_ssize_t min_count=-1): """ Only aggregates on axis=0 """ cdef: Py_ssize_t i, j, N, K, ngroups, size - ndarray[int64_t] _counts - ndarray data + int64_t[:] _counts + ndarray[float64_t, ndim=2] data float64_t* ptr assert min_count == -1, "'min_count' only used in add and prod" + require_not_none(counts) + require_not_none(out) + require_not_none(labels) ngroups = len(counts) N, K = ( values).shape @@ -217,7 +220,7 @@ def group_cumsum(numeric[:, :] out, @cython.boundscheck(False) @cython.wraparound(False) -def group_shift_indexer(ndarray[int64_t] out, ndarray[int64_t] labels, +def group_shift_indexer(int64_t[:] out, int64_t[:] labels, int ngroups, int periods): cdef: Py_ssize_t N, i, j, ii @@ -269,8 +272,8 @@ def group_shift_indexer(ndarray[int64_t] out, ndarray[int64_t] labels, @cython.wraparound(False) @cython.boundscheck(False) -def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels, - ndarray[uint8_t] mask, object direction, +def group_fillna_indexer(int64_t[:] out, ndarray[int64_t] labels, + uint8_t[:] mask, object direction, int64_t limit): """Indexes how to fill values forwards or backwards within a group @@ -291,7 +294,7 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels, """ cdef: Py_ssize_t i, N - ndarray[int64_t] sorted_labels + int64_t[:] sorted_labels int64_t idx, curr_fill_idx=-1, filled_vals=0 N = len(out) @@ -301,6 +304,7 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels, sorted_labels = np.argsort(labels, kind='mergesort').astype( np.int64, copy=False) + if direction == 'bfill': sorted_labels = sorted_labels[::-1] @@ -327,10 +331,10 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels, @cython.boundscheck(False) @cython.wraparound(False) -def group_any_all(ndarray[uint8_t] out, - ndarray[int64_t] labels, - ndarray[uint8_t] values, - ndarray[uint8_t] mask, +def group_any_all(uint8_t[:] out, + int64_t[:] labels, + uint8_t[:] values, + uint8_t[:] mask, object val_test, bint skipna): """Aggregated boolean values to show truthfulness of group elements @@ -353,10 +357,15 @@ def group_any_all(ndarray[uint8_t] out, The returned values will either be 0 or 1 (False or True, respectively). """ cdef: - Py_ssize_t i, N=len(labels) + Py_ssize_t i, N = len(labels) int64_t lab uint8_t flag_val + require_not_none(out) + require_not_none(labels) + require_not_none(values) + require_not_none(mask) + if val_test == 'all': # Because the 'all' value of an empty iterable in Python is True we can # start with an array full of ones and set to zero when a False value @@ -370,7 +379,7 @@ def group_any_all(ndarray[uint8_t] out, else: raise ValueError("'bool_func' must be either 'any' or 'all'!") - out.fill(1 - flag_val) + out[:] = 1 - flag_val with nogil: for i in range(N): diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index 335c8ee5c2340..b626d4cf0faf1 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -30,9 +30,9 @@ def get_dispatch(dtypes): @cython.wraparound(False) @cython.boundscheck(False) def group_add_{{name}}(ndarray[{{c_type}}, ndim=2] out, - ndarray[int64_t] counts, + int64_t[:] counts, ndarray[{{c_type}}, ndim=2] values, - ndarray[int64_t] labels, + int64_t[:] labels, Py_ssize_t min_count=0): """ Only aggregates on axis=0 @@ -40,7 +40,9 @@ def group_add_{{name}}(ndarray[{{c_type}}, ndim=2] out, cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) {{c_type}} val, count - ndarray[{{c_type}}, ndim=2] sumx, nobs + {{c_type}}[:, :] sumx, nobs + + require_not_none(counts) if not len(values) == len(labels): raise AssertionError("len(index) != len(labels)") @@ -77,9 +79,9 @@ def group_add_{{name}}(ndarray[{{c_type}}, ndim=2] out, @cython.wraparound(False) @cython.boundscheck(False) def group_prod_{{name}}(ndarray[{{c_type}}, ndim=2] out, - ndarray[int64_t] counts, + int64_t[:] counts, ndarray[{{c_type}}, ndim=2] values, - ndarray[int64_t] labels, + int64_t[:] labels, Py_ssize_t min_count=0): """ Only aggregates on axis=0 @@ -87,7 +89,9 @@ def group_prod_{{name}}(ndarray[{{c_type}}, ndim=2] out, cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) {{c_type}} val, count - ndarray[{{c_type}}, ndim=2] prodx, nobs + {{c_type}}[:, :] prodx, nobs + + require_not_none(counts) if not len(values) == len(labels): raise AssertionError("len(index) != len(labels)") @@ -124,15 +128,16 @@ def group_prod_{{name}}(ndarray[{{c_type}}, ndim=2] out, @cython.boundscheck(False) @cython.cdivision(True) def group_var_{{name}}(ndarray[{{c_type}}, ndim=2] out, - ndarray[int64_t] counts, + int64_t[:] counts, ndarray[{{c_type}}, ndim=2] values, - ndarray[int64_t] labels, + int64_t[:] labels, Py_ssize_t min_count=-1): cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) {{c_type}} val, ct, oldmean - ndarray[{{c_type}}, ndim=2] nobs, mean + {{c_type}}[:, :] nobs, mean + require_not_none(counts) assert min_count == -1, "'min_count' only used in add and prod" if not len(values) == len(labels): @@ -176,15 +181,16 @@ def group_var_{{name}}(ndarray[{{c_type}}, ndim=2] out, @cython.wraparound(False) @cython.boundscheck(False) def group_mean_{{name}}(ndarray[{{c_type}}, ndim=2] out, - ndarray[int64_t] counts, + int64_t[:] counts, ndarray[{{c_type}}, ndim=2] values, ndarray[int64_t] labels, Py_ssize_t min_count=-1): cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) {{c_type}} val, count - ndarray[{{c_type}}, ndim=2] sumx, nobs + {{c_type}}[:, :] sumx, nobs + require_not_none(counts) assert min_count == -1, "'min_count' only used in add and prod" if not len(values) == len(labels): @@ -220,10 +226,10 @@ def group_mean_{{name}}(ndarray[{{c_type}}, ndim=2] out, @cython.wraparound(False) @cython.boundscheck(False) -def group_ohlc_{{name}}(ndarray[{{c_type}}, ndim=2] out, - ndarray[int64_t] counts, +def group_ohlc_{{name}}({{c_type}}[:, :] out, + int64_t[:] counts, ndarray[{{c_type}}, ndim=2] values, - ndarray[int64_t] labels, + int64_t[:] labels, Py_ssize_t min_count=-1): """ Only aggregates on axis=0 @@ -233,6 +239,7 @@ def group_ohlc_{{name}}(ndarray[{{c_type}}, ndim=2] out, {{c_type}} val, count Py_ssize_t ngroups = len(counts) + require_not_none(counts) assert min_count == -1, "'min_count' only used in add and prod" if len(labels) == 0: @@ -246,7 +253,8 @@ def group_ohlc_{{name}}(ndarray[{{c_type}}, ndim=2] out, if K > 1: raise NotImplementedError("Argument 'values' must have only " "one dimension") - out.fill(np.nan) + + out[:] = np.nan with nogil: for i in range(N): @@ -304,8 +312,8 @@ def group_last_{{name}}(ndarray[{{c_type}}, ndim=2] out, cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) {{c_type}} val - ndarray[{{c_type}}, ndim=2] resx - ndarray[int64_t, ndim=2] nobs + {{c_type}}[:, :] resx + int64_t[:, :] nobs assert min_count == -1, "'min_count' only used in add and prod" @@ -361,8 +369,8 @@ def group_nth_{{name}}(ndarray[{{c_type}}, ndim=2] out, cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) {{c_type}} val - ndarray[{{c_type}}, ndim=2] resx - ndarray[int64_t, ndim=2] nobs + {{c_type}}[:, :] resx + int64_t[:, :] nobs assert min_count == -1, "'min_count' only used in add and prod" @@ -411,7 +419,7 @@ def group_nth_{{name}}(ndarray[{{c_type}}, ndim=2] out, @cython.boundscheck(False) @cython.wraparound(False) -def group_rank_{{name}}(ndarray[float64_t, ndim=2] out, +def group_rank_{{name}}(float64_t[:, :] out, ndarray[{{c_type}}, ndim=2] values, ndarray[int64_t] labels, bint is_datetimelike, object ties_method, @@ -453,8 +461,8 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out, TiebreakEnumType tiebreak Py_ssize_t i, j, N, K, grp_start=0, dups=0, sum_ranks=0 Py_ssize_t grp_vals_seen=1, grp_na_count=0, grp_tie_count=0 - ndarray[int64_t] _as - ndarray[float64_t, ndim=2] grp_sizes + int64_t[:] _as + float64_t[:, :] grp_sizes ndarray[{{c_type}}] masked_vals ndarray[uint8_t] mask bint keep_na @@ -617,7 +625,7 @@ def group_max(ndarray[groupby_t, ndim=2] out, cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) groupby_t val, count, nan_val - ndarray[groupby_t, ndim=2] maxx, nobs + groupby_t[:, :] maxx, nobs assert min_count == -1, "'min_count' only used in add and prod" @@ -629,10 +637,10 @@ def group_max(ndarray[groupby_t, ndim=2] out, maxx = np.empty_like(out) if groupby_t is int64_t: # Note: evaluated at compile-time - maxx.fill(-_int64_max) + maxx[:] = -_int64_max nan_val = iNaT else: - maxx.fill(-np.inf) + maxx[:] = -np.inf nan_val = NAN N, K = ( values).shape @@ -667,11 +675,6 @@ def group_max(ndarray[groupby_t, ndim=2] out, out[i, j] = maxx[i, j] -group_max_float64 = group_max["float64_t"] -group_max_float32 = group_max["float32_t"] -group_max_int64 = group_max["int64_t"] - - @cython.wraparound(False) @cython.boundscheck(False) def group_min(ndarray[groupby_t, ndim=2] out, @@ -685,7 +688,7 @@ def group_min(ndarray[groupby_t, ndim=2] out, cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) groupby_t val, count, nan_val - ndarray[groupby_t, ndim=2] minx, nobs + groupby_t[:, :] minx, nobs assert min_count == -1, "'min_count' only used in add and prod" @@ -696,10 +699,10 @@ def group_min(ndarray[groupby_t, ndim=2] out, minx = np.empty_like(out) if groupby_t is int64_t: - minx.fill(_int64_max) + minx[:] = _int64_max nan_val = iNaT else: - minx.fill(np.inf) + minx[:] = np.inf nan_val = NAN N, K = ( values).shape @@ -734,14 +737,9 @@ def group_min(ndarray[groupby_t, ndim=2] out, out[i, j] = minx[i, j] -group_min_float64 = group_min["float64_t"] -group_min_float32 = group_min["float32_t"] -group_min_int64 = group_min["int64_t"] - - @cython.boundscheck(False) @cython.wraparound(False) -def group_cummin(ndarray[groupby_t, ndim=2] out, +def group_cummin(groupby_t[:, :] out, ndarray[groupby_t, ndim=2] values, ndarray[int64_t] labels, bint is_datetimelike): @@ -751,15 +749,15 @@ def group_cummin(ndarray[groupby_t, ndim=2] out, cdef: Py_ssize_t i, j, N, K, size groupby_t val, mval - ndarray[groupby_t, ndim=2] accum + groupby_t[:, :] accum int64_t lab N, K = ( values).shape accum = np.empty_like(values) if groupby_t is int64_t: - accum.fill(_int64_max) + accum[:] = _int64_max else: - accum.fill(np.inf) + accum[:] = np.inf with nogil: for i in range(N): @@ -787,14 +785,9 @@ def group_cummin(ndarray[groupby_t, ndim=2] out, out[i, j] = mval -group_cummin_float64 = group_cummin["float64_t"] -group_cummin_float32 = group_cummin["float32_t"] -group_cummin_int64 = group_cummin["int64_t"] - - @cython.boundscheck(False) @cython.wraparound(False) -def group_cummax(ndarray[groupby_t, ndim=2] out, +def group_cummax(groupby_t[:, :] out, ndarray[groupby_t, ndim=2] values, ndarray[int64_t] labels, bint is_datetimelike): @@ -804,15 +797,15 @@ def group_cummax(ndarray[groupby_t, ndim=2] out, cdef: Py_ssize_t i, j, N, K, size groupby_t val, mval - ndarray[groupby_t, ndim=2] accum + groupby_t[:, :] accum int64_t lab N, K = ( values).shape accum = np.empty_like(values) if groupby_t is int64_t: - accum.fill(-_int64_max) + accum[:] = -_int64_max else: - accum.fill(-np.inf) + accum[:] = -np.inf with nogil: for i in range(N): @@ -837,8 +830,3 @@ def group_cummax(ndarray[groupby_t, ndim=2] out, if val > mval: accum[lab, j] = mval = val out[i, j] = mval - - -group_cummax_float64 = group_cummax["float64_t"] -group_cummax_float32 = group_cummax["float32_t"] -group_cummax_int64 = group_cummax["int64_t"] diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 27758234c0cf1..03f60f7637c12 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -86,12 +86,12 @@ cdef class {{name}}Vector: self.data.n = 0 self.data.m = _INIT_VEC_CAP self.ao = np.empty(self.data.m, dtype={{idtype}}) - self.data.data = <{{arg}}*> self.ao.data + self.data.data = <{{arg}}*>self.ao.data cdef resize(self): self.data.m = max(self.data.m * 4, _INIT_VEC_CAP) self.ao.resize(self.data.m, refcheck=False) - self.data.data = <{{arg}}*> self.ao.data + self.data.data = <{{arg}}*>self.ao.data def __dealloc__(self): if self.data is not NULL: @@ -140,7 +140,7 @@ cdef class StringVector: self.external_view_exists = False self.data.n = 0 self.data.m = _INIT_VEC_CAP - self.data.data = malloc(self.data.m * sizeof(char *)) + self.data.data = malloc(self.data.m * sizeof(char *)) if not self.data.data: raise MemoryError() @@ -153,7 +153,7 @@ cdef class StringVector: self.data.m = max(self.data.m * 4, _INIT_VEC_CAP) orig_data = self.data.data - self.data.data = malloc(self.data.m * sizeof(char *)) + self.data.data = malloc(self.data.m * sizeof(char *)) if not self.data.data: raise MemoryError() for i in range(m): @@ -208,22 +208,22 @@ cdef class ObjectVector: self.n = 0 self.m = _INIT_VEC_CAP self.ao = np.empty(_INIT_VEC_CAP, dtype=object) - self.data = self.ao.data + self.data = self.ao.data def __len__(self): return self.n - cdef inline append(self, object o): + cdef inline append(self, object obj): if self.n == self.m: if self.external_view_exists: raise ValueError("external reference but " "Vector.resize() needed") self.m = max(self.m * 2, _INIT_VEC_CAP) self.ao.resize(self.m, refcheck=False) - self.data = self.ao.data + self.data = self.ao.data - Py_INCREF(o) - self.data[self.n] = o + Py_INCREF(obj) + self.data[self.n] = obj self.n += 1 def to_array(self): @@ -318,16 +318,18 @@ cdef class {{name}}HashTable(HashTable): for i in range(n): key = keys[i] k = kh_put_{{dtype}}(self.table, key, &ret) - self.table.vals[k] = values[i] + self.table.vals[k] = values[i] @cython.boundscheck(False) - def map_locations(self, ndarray[{{dtype}}_t, ndim=1] values): + def map_locations(self, const {{dtype}}_t[:] values): cdef: Py_ssize_t i, n = len(values) int ret = 0 {{dtype}}_t val khiter_t k + util.require_not_none(values) + with nogil: for i in range(n): val = values[i] @@ -590,7 +592,7 @@ cdef class StringHashTable(HashTable): cdef: Py_ssize_t i, n = len(values) ndarray[int64_t] labels = np.empty(n, dtype=np.int64) - int64_t *resbuf = labels.data + int64_t *resbuf = labels.data khiter_t k kh_str_t *table = self.table const char *v diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in index 801c67832d8b9..fbbd2b3fa2b4d 100644 --- a/pandas/_libs/hashtable_func_helper.pxi.in +++ b/pandas/_libs/hashtable_func_helper.pxi.in @@ -45,11 +45,11 @@ cdef build_count_table_{{dtype}}({{dtype}}_t[:] values, val = values[i] if not checknull(val) or not dropna: - k = kh_get_{{ttype}}(table, val) + k = kh_get_{{ttype}}(table, val) if k != table.n_buckets: table.vals[k] += 1 else: - k = kh_put_{{ttype}}(table, val, &ret) + k = kh_put_{{ttype}}(table, val, &ret) table.vals[k] = 1 {{else}} with nogil: @@ -103,7 +103,7 @@ cpdef value_count_{{dtype}}({{scalar}}[:] values, bint dropna): {{if dtype == 'object'}} for k in range(table.n_buckets): if kh_exist_{{ttype}}(table, k): - result_keys[i] = <{{dtype}}> table.keys[k] + result_keys[i] = <{{dtype}}>table.keys[k] result_counts[i] = table.vals[k] i += 1 {{else}} @@ -152,7 +152,7 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'): if keep == 'last': {{if dtype == 'object'}} for i from n > i >= 0: - kh_put_{{ttype}}(table, values[i], &ret) + kh_put_{{ttype}}(table, values[i], &ret) out[i] = ret == 0 {{else}} with nogil: @@ -163,7 +163,7 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'): elif keep == 'first': {{if dtype == 'object'}} for i in range(n): - kh_put_{{ttype}}(table, values[i], &ret) + kh_put_{{ttype}}(table, values[i], &ret) out[i] = ret == 0 {{else}} with nogil: @@ -175,13 +175,13 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'): {{if dtype == 'object'}} for i in range(n): value = values[i] - k = kh_get_{{ttype}}(table, value) + k = kh_get_{{ttype}}(table, value) if k != table.n_buckets: out[table.vals[k]] = 1 out[i] = 1 else: - k = kh_put_{{ttype}}(table, value, &ret) - table.keys[k] = value + k = kh_put_{{ttype}}(table, value, &ret) + table.keys[k] = value table.vals[k] = i out[i] = 0 {{else}} @@ -245,7 +245,7 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values): {{if dtype == 'object'}} for i in range(n): - kh_put_{{ttype}}(table, values[i], &ret) + kh_put_{{ttype}}(table, values[i], &ret) {{else}} with nogil: for i in range(n): @@ -259,7 +259,7 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values): {{if dtype == 'object'}} for i in range(n): val = arr[i] - k = kh_get_{{ttype}}(table, val) + k = kh_get_{{ttype}}(table, val) result[i] = (k != table.n_buckets) {{else}} with nogil: @@ -308,7 +308,7 @@ def mode_{{dtype}}({{ctype}}[:] values, bint dropna): int j = -1 # so you can do += Py_ssize_t k kh_{{table_type}}_t *table - ndarray[{{ctype}}] modes + {{ctype}}[:] modes table = kh_init_{{table_type}}() build_count_table_{{dtype}}(values, table, dropna) @@ -342,11 +342,13 @@ def mode_{{dtype}}({{ctype}}[:] values, bint dropna): else: continue - modes[j] = table.keys[k] + modes[j] = table.keys[k] {{endif}} kh_destroy_{{table_type}}(table) - return modes[:j + 1] + # Note: For reasons unknown, slicing modes.base works but modes[:j+1].base + # returns an object with an incorrect length + return modes.base[:j + 1] # `.base` to access underlying np.ndarray {{endfor}} diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index c6afeda6a37dc..c92e0a4a7aa23 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t import numpy as np cimport numpy as cnp diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 6ec9a7e93bc55..86d4cfa44b38a 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -18,7 +18,7 @@ PyDateTime_IMPORT import numpy as np cimport numpy as cnp -from numpy cimport (ndarray, PyArray_NDIM, PyArray_GETITEM, +from numpy cimport (ndarray, PyArray_GETITEM, PyArray_ITER_DATA, PyArray_ITER_NEXT, PyArray_IterNew, flatiter, NPY_OBJECT, int64_t, @@ -74,9 +74,9 @@ cdef bint PY2 = sys.version_info[0] == 2 cdef double nan = np.NaN -def values_from_object(object obj): +def values_from_object(obj: object): """ return my values or the object if we are say an ndarray """ - cdef func # TODO: Does declaring this without a type accomplish anything? + func: object func = getattr(obj, 'get_values', None) if func is not None: @@ -170,7 +170,7 @@ def item_from_zerodim(val: object) -> object: @cython.boundscheck(False) def fast_unique_multiple(list arrays): cdef: - ndarray[object] buf + object[:] buf Py_ssize_t k = len(arrays) Py_ssize_t i, j, n list uniques = [] @@ -195,7 +195,7 @@ def fast_unique_multiple(list arrays): @cython.wraparound(False) @cython.boundscheck(False) -def fast_unique_multiple_list(list lists, bint sort=True): +def fast_unique_multiple_list(lists: list, sort: bint = True) -> list: cdef: list buf Py_ssize_t k = len(lists) @@ -263,10 +263,10 @@ def fast_unique_multiple_list_gen(object gen, bint sort=True): @cython.wraparound(False) @cython.boundscheck(False) -def dicts_to_array(list dicts, list columns): +def dicts_to_array(dicts: list, columns: list): cdef: Py_ssize_t i, j, k, n - ndarray[object, ndim=2] result + object[:, :] result dict row object col, onan = np.nan @@ -284,7 +284,7 @@ def dicts_to_array(list dicts, list columns): else: result[i, j] = onan - return result + return np.asarray(result) def fast_zip(list ndarrays): @@ -329,7 +329,7 @@ def fast_zip(list ndarrays): return result -def get_reverse_indexer(ndarray[int64_t] indexer, Py_ssize_t length): +def get_reverse_indexer(int64_t[:] indexer, Py_ssize_t length): """ Reverse indexing operation. @@ -343,20 +343,24 @@ def get_reverse_indexer(ndarray[int64_t] indexer, Py_ssize_t length): cdef: Py_ssize_t i, n = len(indexer) - ndarray[int64_t] rev_indexer + int64_t[:] rev_indexer int64_t idx + util.require_not_none(indexer) + rev_indexer = np.empty(length, dtype=np.int64) - rev_indexer.fill(-1) + rev_indexer[:] = -1 for i in range(n): idx = indexer[i] if idx != -1: rev_indexer[idx] = i - return rev_indexer + return np.asarray(rev_indexer) -def has_infs_f4(ndarray[float32_t] arr) -> bint: +@cython.wraparound(False) +@cython.boundscheck(False) +def has_infs_f4(float32_t[:] arr) -> bint: cdef: Py_ssize_t i, n = len(arr) float32_t inf, neginf, val @@ -371,7 +375,9 @@ def has_infs_f4(ndarray[float32_t] arr) -> bint: return False -def has_infs_f8(ndarray[float64_t] arr) -> bint: +@cython.wraparound(False) +@cython.boundscheck(False) +def has_infs_f8(float64_t[:] arr) -> bint: cdef: Py_ssize_t i, n = len(arr) float64_t inf, neginf, val @@ -423,6 +429,8 @@ def maybe_indices_to_slice(ndarray[int64_t] indices, int max_len): return slice(vstart, vlast - 1, k) +@cython.wraparound(False) +@cython.boundscheck(False) def maybe_booleans_to_slice(ndarray[uint8_t] mask): cdef: Py_ssize_t i, n = len(mask) @@ -454,7 +462,7 @@ def maybe_booleans_to_slice(ndarray[uint8_t] mask): @cython.wraparound(False) @cython.boundscheck(False) -def array_equivalent_object(left: object[:], right: object[:]) -> bint: +def array_equivalent_object(left: object[:], right: object[:]) -> bool: """ perform an element by element comparion on 1-d object arrays taking into account nan positions """ cdef: @@ -478,7 +486,7 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bint: def astype_intsafe(ndarray[object] arr, new_dtype): cdef: Py_ssize_t i, n = len(arr) - object v + object val bint is_datelike ndarray result @@ -487,19 +495,18 @@ def astype_intsafe(ndarray[object] arr, new_dtype): result = np.empty(n, dtype=new_dtype) for i in range(n): - v = arr[i] - if is_datelike and checknull(v): + val = arr[i] + if is_datelike and checknull(val): result[i] = NPY_NAT else: - result[i] = v + result[i] = val return result @cython.wraparound(False) @cython.boundscheck(False) -def astype_unicode(arr: ndarray, - skipna: bool=False) -> ndarray[object]: +def astype_unicode(arr: ndarray, skipna: bool=False) -> ndarray[object]: """ Convert all elements in an array to unicode. @@ -519,7 +526,7 @@ def astype_unicode(arr: ndarray, cdef: object arr_i Py_ssize_t i, n = arr.size - ndarray[object] result = np.empty(n, dtype=object) + object[:] result = np.empty(n, dtype=object) for i in range(n): arr_i = arr[i] @@ -529,13 +536,12 @@ def astype_unicode(arr: ndarray, result[i] = arr_i - return result + return np.asarray(result) @cython.wraparound(False) @cython.boundscheck(False) -def astype_str(arr: ndarray, - skipna: bool=False) -> ndarray[object]: +def astype_str(arr: ndarray, skipna: bool=False) -> ndarray[object]: """ Convert all elements in an array to string. @@ -555,7 +561,7 @@ def astype_str(arr: ndarray, cdef: object arr_i Py_ssize_t i, n = arr.size - ndarray[object] result = np.empty(n, dtype=object) + object[:] result = np.empty(n, dtype=object) for i in range(n): arr_i = arr[i] @@ -565,24 +571,24 @@ def astype_str(arr: ndarray, result[i] = arr_i - return result + return np.asarray(result) @cython.wraparound(False) @cython.boundscheck(False) -def clean_index_list(list obj): +def clean_index_list(obj: list): """ Utility used in pandas.core.index.ensure_index """ cdef: Py_ssize_t i, n = len(obj) - object v + object val bint all_arrays = 1 for i in range(n): - v = obj[i] - if not (isinstance(v, list) or - util.is_array(v) or hasattr(v, '_data')): + val = obj[i] + if not (isinstance(val, list) or + util.is_array(val) or hasattr(val, '_data')): all_arrays = 0 break @@ -594,7 +600,7 @@ def clean_index_list(list obj): if inferred in ['string', 'bytes', 'unicode', 'mixed', 'mixed-integer']: return np.asarray(obj, dtype=object), 0 elif inferred in ['integer']: - # TODO: we infer an integer but it *could* be a unint64 + # TODO: we infer an integer but it *could* be a uint64 try: return np.asarray(obj, dtype='int64'), 0 except OverflowError: @@ -611,7 +617,7 @@ def clean_index_list(list obj): # is a general, O(max(len(values), len(binner))) method. @cython.boundscheck(False) @cython.wraparound(False) -def generate_bins_dt64(ndarray[int64_t] values, ndarray[int64_t] binner, +def generate_bins_dt64(ndarray[int64_t] values, int64_t[:] binner, object closed='left', bint hasnans=0): """ Int64 (datetime64) version of generic python version in groupby.py @@ -674,13 +680,13 @@ def generate_bins_dt64(ndarray[int64_t] values, ndarray[int64_t] binner, @cython.boundscheck(False) @cython.wraparound(False) -def row_bool_subset(ndarray[float64_t, ndim=2] values, +def row_bool_subset(float64_t[:, :] values, ndarray[uint8_t, cast=True] mask): cdef: Py_ssize_t i, j, n, k, pos = 0 - ndarray[float64_t, ndim=2] out + float64_t[:, :] out - n, k = ( values).shape + n, k = (values).shape assert (n == len(mask)) out = np.empty((mask.sum(), k), dtype=np.float64) @@ -691,18 +697,18 @@ def row_bool_subset(ndarray[float64_t, ndim=2] values, out[pos, j] = values[i, j] pos += 1 - return out + return np.asarray(out) @cython.boundscheck(False) @cython.wraparound(False) -def row_bool_subset_object(ndarray[object, ndim=2] values, +def row_bool_subset_object(object[:, :] values, ndarray[uint8_t, cast=True] mask): cdef: Py_ssize_t i, j, n, k, pos = 0 - ndarray[object, ndim=2] out + object[:, :] out - n, k = ( values).shape + n, k = (values).shape assert (n == len(mask)) out = np.empty((mask.sum(), k), dtype=object) @@ -713,13 +719,12 @@ def row_bool_subset_object(ndarray[object, ndim=2] values, out[pos, j] = values[i, j] pos += 1 - return out + return np.asarray(out) @cython.boundscheck(False) @cython.wraparound(False) -def get_level_sorter(ndarray[int64_t, ndim=1] label, - ndarray[int64_t, ndim=1] starts): +def get_level_sorter(ndarray[int64_t, ndim=1] label, int64_t[:] starts): """ argsort for a single level of a multi-index, keeping the order of higher levels unchanged. `starts` points to starts of same-key indices w.r.t @@ -742,7 +747,7 @@ def get_level_sorter(ndarray[int64_t, ndim=1] label, @cython.boundscheck(False) @cython.wraparound(False) def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask, - ndarray[int64_t, ndim=1] labels, + int64_t[:] labels, Py_ssize_t max_bin, int axis): cdef: @@ -769,7 +774,7 @@ def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask, return counts -def generate_slices(ndarray[int64_t] labels, Py_ssize_t ngroups): +def generate_slices(int64_t[:] labels, Py_ssize_t ngroups): cdef: Py_ssize_t i, group_size, n, start int64_t lab @@ -798,7 +803,7 @@ def generate_slices(ndarray[int64_t] labels, Py_ssize_t ngroups): return starts, ends -def indices_fast(object index, ndarray[int64_t] labels, list keys, +def indices_fast(object index, int64_t[:] labels, list keys, list sorted_labels): cdef: Py_ssize_t i, j, k, lab, cur, start, n = len(labels) @@ -841,19 +846,19 @@ def indices_fast(object index, ndarray[int64_t] labels, list keys, # core.common import for fast inference checks -def is_float(obj: object) -> bint: +def is_float(obj: object) -> bool: return util.is_float_object(obj) -def is_integer(obj: object) -> bint: +def is_integer(obj: object) -> bool: return util.is_integer_object(obj) -def is_bool(obj: object) -> bint: +def is_bool(obj: object) -> bool: return util.is_bool_object(obj) -def is_complex(obj: object) -> bint: +def is_complex(obj: object) -> bool: return util.is_complex_object(obj) @@ -865,7 +870,7 @@ cpdef bint is_interval(object obj): return getattr(obj, '_typ', '_typ') == 'interval' -def is_period(val: object) -> bint: +def is_period(val: object) -> bool: """ Return a boolean if this is a Period object """ return util.is_period_object(val) @@ -1046,7 +1051,7 @@ cdef _try_infer_map(v): return None -def infer_dtype(object value, bint skipna=False): +def infer_dtype(value: object, skipna: bint = False) -> str: """ Efficiently infer the type of a passed val, or list-like array of values. Return a string describing the type. @@ -1347,7 +1352,7 @@ def infer_datetimelike_array(arr: object) -> object: seen_datetime = 1 elif PyDate_Check(v): seen_date = 1 - elif is_timedelta(v) or util.is_timedelta64_object(v): + elif is_timedelta(v): # timedelta, or timedelta64 seen_timedelta = 1 else: @@ -1626,7 +1631,9 @@ cpdef bint is_datetime64_array(ndarray values): return validator.validate(values) -def is_datetime_with_singletz_array(values: ndarray) -> bint: +@cython.wraparound(False) +@cython.boundscheck(False) +def is_datetime_with_singletz_array(values: ndarray) -> bool: """ Check values have the same tzinfo attribute. Doesn't check values are datetime-like types. @@ -1915,7 +1922,7 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, @cython.boundscheck(False) @cython.wraparound(False) -def maybe_convert_objects(ndarray[object] objects, bint try_float=0, +def maybe_convert_objects(object[:] objects, bint try_float=0, bint safe=0, bint convert_datetime=0, bint convert_timedelta=0): """ @@ -1928,12 +1935,14 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, ndarray[int64_t] ints ndarray[uint64_t] uints ndarray[uint8_t] bools - ndarray[int64_t] idatetimes - ndarray[int64_t] itimedeltas + int64_t[:] idatetimes + int64_t[:] itimedeltas Seen seen = Seen() object val float64_t fval, fnan + util.require_not_none(objects) + n = len(objects) floats = np.empty(n, dtype='f8') @@ -2042,7 +2051,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, if seen.datetimetz_: if len({getattr(val, 'tzinfo', None) for val in objects}) == 1: from pandas import DatetimeIndex - return DatetimeIndex(objects) + return DatetimeIndex(np.asarray(objects)) seen.object_ = 1 if not seen.object_: @@ -2107,11 +2116,10 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, elif seen.is_bool: return bools.view(np.bool_) - return objects + return np.asarray(objects) -def map_infer_mask(ndarray arr, object f, ndarray[uint8_t] mask, - bint convert=1): +def map_infer_mask(ndarray arr, object f, uint8_t[:] mask, bint convert=1): """ Substitute for np.vectorize with pandas-friendly dtype inference @@ -2126,18 +2134,18 @@ def map_infer_mask(ndarray arr, object f, ndarray[uint8_t] mask, """ cdef: Py_ssize_t i, n - ndarray[object] result + object[:] result object val n = len(arr) result = np.empty(n, dtype=object) for i in range(n): if mask[i]: - val = util.get_value_at(arr, i) + val = arr[i] else: - val = f(util.get_value_at(arr, i)) + val = f(arr[i]) - if util.is_array(val) and PyArray_NDIM(val) == 0: + if cnp.PyArray_IsZeroDim(val): # unbox 0-dim arrays, GH#690 # TODO: is there a faster way to unbox? # item_from_zerodim? @@ -2151,9 +2159,11 @@ def map_infer_mask(ndarray arr, object f, ndarray[uint8_t] mask, convert_datetime=0, convert_timedelta=0) - return result + return np.asarray(result) +@cython.wraparound(False) +@cython.boundscheck(False) def map_infer(ndarray arr, object f, bint convert=1): """ Substitute for np.vectorize with pandas-friendly dtype inference @@ -2169,15 +2179,15 @@ def map_infer(ndarray arr, object f, bint convert=1): """ cdef: Py_ssize_t i, n - ndarray[object] result + object[:] result object val n = len(arr) result = np.empty(n, dtype=object) for i in range(n): - val = f(util.get_value_at(arr, i)) + val = f(arr[i]) - if util.is_array(val) and PyArray_NDIM(val) == 0: + if cnp.PyArray_IsZeroDim(val): # unbox 0-dim arrays, GH#690 # TODO: is there a faster way to unbox? # item_from_zerodim? @@ -2191,10 +2201,10 @@ def map_infer(ndarray arr, object f, bint convert=1): convert_datetime=0, convert_timedelta=0) - return result + return np.asarray(result) -def to_object_array(list rows, int min_width=0): +def to_object_array(rows: list, min_width: int = 0): """ Convert a list of lists into an object array. @@ -2214,7 +2224,7 @@ def to_object_array(list rows, int min_width=0): """ cdef: Py_ssize_t i, j, n, k, tmp - ndarray[object, ndim=2] result + object[:, :] result list row n = len(rows) @@ -2233,13 +2243,13 @@ def to_object_array(list rows, int min_width=0): for j in range(len(row)): result[i, j] = row[j] - return result + return np.asarray(result) def tuples_to_object_array(ndarray[object] tuples): cdef: Py_ssize_t i, j, n, k, tmp - ndarray[object, ndim=2] result + object[:, :] result tuple tup n = len(tuples) @@ -2250,13 +2260,13 @@ def tuples_to_object_array(ndarray[object] tuples): for j in range(k): result[i, j] = tup[j] - return result + return np.asarray(result) -def to_object_array_tuples(list rows): +def to_object_array_tuples(rows: list): cdef: Py_ssize_t i, j, n, k, tmp - ndarray[object, ndim=2] result + object[:, :] result tuple row n = len(rows) @@ -2281,14 +2291,16 @@ def to_object_array_tuples(list rows): for j in range(len(row)): result[i, j] = row[j] - return result + return np.asarray(result) +@cython.wraparound(False) +@cython.boundscheck(False) def fast_multiget(dict mapping, ndarray keys, default=np.nan): cdef: Py_ssize_t i, n = len(keys) object val - ndarray[object] output = np.empty(n, dtype='O') + object[:] output = np.empty(n, dtype='O') if n == 0: # kludge, for Series diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index 0ba61fcc58f46..8cb313513f7b7 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -51,6 +51,15 @@ cdef inline int import_array() except -1: _import_array() +cdef inline require_not_none(obj): + """ + Functions accepting cython memoryviews will also accept None. In order to + avoid silently returning incorrect answers, we explicitly check for None. + """ + if obj is None: + raise TypeError("An array or memoryview is required, not None.") + + # -------------------------------------------------------------------- # Type Checking diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index c7cfaab60b606..369784fc53338 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -241,7 +241,7 @@ cdef class VariableWindowIndexer(WindowIndexer): # max window size self.win = (self.end - self.start).max() - def build(self, ndarray[int64_t] index, int64_t win, bint left_closed, + def build(self, int64_t[:] index, int64_t win, bint left_closed, bint right_closed): cdef: @@ -249,6 +249,8 @@ cdef class VariableWindowIndexer(WindowIndexer): int64_t start_bound, end_bound, N Py_ssize_t i, j + util.require_not_none(index) + start = self.start end = self.end N = self.N @@ -1449,8 +1451,8 @@ def roll_quantile(ndarray[float64_t, cast=True] values, int64_t win, try: interpolation_type = interpolation_types[interpolation] except KeyError: - raise ValueError("Interpolation '{}' is not supported" - .format(interpolation)) + raise ValueError("Interpolation '{interp}' is not supported" + .format(interp=interpolation)) # we use the Fixed/Variable Indexer here as the # actual skiplist ops outweigh any window computation costs