From 613f764db4c3984aaac5d6bc95c21fc9a82d639a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 11 Aug 2018 14:25:40 -0700 Subject: [PATCH 1/4] misc cython cleanup --- pandas/_libs/algos.pxd | 7 +- pandas/_libs/algos.pyx | 2 +- pandas/_libs/algos_common_helper.pxi.in | 59 ++++++++------- pandas/_libs/groupby.pyx | 9 ++- pandas/_libs/hashing.pyx | 16 ++-- pandas/_libs/hashtable_class_helper.pxi.in | 14 +++- pandas/_libs/hashtable_func_helper.pxi.in | 1 - pandas/_libs/internals.pyx | 7 +- pandas/_libs/lib.pyx | 85 +++++++++------------- pandas/_libs/reduction.pyx | 2 +- pandas/_libs/tslib.pyx | 22 +++--- pandas/_libs/tslibs/timezones.pxd | 2 +- pandas/_libs/tslibs/timezones.pyx | 4 +- pandas/_libs/window.pyx | 73 ++++++++++--------- 14 files changed, 157 insertions(+), 146 deletions(-) diff --git a/pandas/_libs/algos.pxd b/pandas/_libs/algos.pxd index a535872ff7279..0888cf3c85f2f 100644 --- a/pandas/_libs/algos.pxd +++ b/pandas/_libs/algos.pxd @@ -1,10 +1,12 @@ from util cimport numeric -from numpy cimport float64_t, double_t + cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil + cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil: - cdef numeric t + cdef: + numeric t # cython doesn't allow pointer dereference so use array syntax t = a[0] @@ -12,6 +14,7 @@ cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil: b[0] = t return 0 + cdef enum TiebreakEnumType: TIEBREAK_AVERAGE TIEBREAK_MIN, diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 124792638e3df..30279615758e0 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -45,7 +45,7 @@ tiebreakers = { } -cdef inline are_diff(object left, object right): +cdef inline bint are_diff(object left, object right): try: return fabs(left - right) > FP_ERR except TypeError: diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in index 97b7196da80bb..42dda15ea2cbb 100644 --- a/pandas/_libs/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -68,12 +68,12 @@ cpdef map_indices_{{name}}(ndarray[{{c_type}}] index): @cython.boundscheck(False) @cython.wraparound(False) -def pad_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new, - limit=None): - cdef Py_ssize_t i, j, nleft, nright - cdef ndarray[int64_t, ndim=1] indexer - cdef {{c_type}} cur, next - cdef int lim, fill_count = 0 +def pad_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new, limit=None): + cdef: + Py_ssize_t i, j, nleft, nright + ndarray[int64_t, ndim=1] indexer + {{c_type}} cur, next + int lim, fill_count = 0 nleft = len(old) nright = len(new) @@ -135,9 +135,10 @@ def pad_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new, def pad_inplace_{{name}}(ndarray[{{c_type}}] values, ndarray[uint8_t, cast=True] mask, limit=None): - cdef Py_ssize_t i, N - cdef {{c_type}} val - cdef int lim, fill_count = 0 + cdef: + Py_ssize_t i, N + {{c_type}} val + int lim, fill_count = 0 N = len(values) @@ -171,9 +172,10 @@ def pad_inplace_{{name}}(ndarray[{{c_type}}] values, def pad_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values, ndarray[uint8_t, ndim=2] mask, limit=None): - cdef Py_ssize_t i, j, N, K - cdef {{c_type}} val - cdef int lim, fill_count = 0 + cdef: + Py_ssize_t i, j, N, K + {{c_type}} val + int lim, fill_count = 0 K, N = ( values).shape @@ -233,10 +235,11 @@ D @cython.wraparound(False) def backfill_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new, limit=None): - cdef Py_ssize_t i, j, nleft, nright - cdef ndarray[int64_t, ndim=1] indexer - cdef {{c_type}} cur, prev - cdef int lim, fill_count = 0 + cdef: + Py_ssize_t i, j, nleft, nright + ndarray[int64_t, ndim=1] indexer + {{c_type}} cur, prev + int lim, fill_count = 0 nleft = len(old) nright = len(new) @@ -299,9 +302,10 @@ def backfill_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new, def backfill_inplace_{{name}}(ndarray[{{c_type}}] values, ndarray[uint8_t, cast=True] mask, limit=None): - cdef Py_ssize_t i, N - cdef {{c_type}} val - cdef int lim, fill_count = 0 + cdef: + Py_ssize_t i, N + {{c_type}} val + int lim, fill_count = 0 N = len(values) @@ -335,9 +339,10 @@ def backfill_inplace_{{name}}(ndarray[{{c_type}}] values, def backfill_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values, ndarray[uint8_t, ndim=2] mask, limit=None): - cdef Py_ssize_t i, j, N, K - cdef {{c_type}} val - cdef int lim, fill_count = 0 + cdef: + Py_ssize_t i, j, N, K + {{c_type}} val + int lim, fill_count = 0 K, N = ( values).shape @@ -428,10 +433,10 @@ def is_monotonic_{{name}}(ndarray[{{c_type}}] arr, bint timelike): @cython.wraparound(False) @cython.boundscheck(False) def arrmap_{{name}}(ndarray[{{c_type}}] index, object func): - cdef Py_ssize_t length = index.shape[0] - cdef Py_ssize_t i = 0 - - cdef ndarray[object] result = np.empty(length, dtype=np.object_) + cdef: + Py_ssize_t length = index.shape[0] + Py_ssize_t i = 0 + ndarray[object] result = np.empty(length, dtype=np.object_) from pandas._libs.lib import maybe_convert_objects @@ -535,6 +540,7 @@ def put2d_{{name}}_{{dest_type}}(ndarray[{{c_type}}, ndim=2, cast=True] values, cdef int PLATFORM_INT = ( np.arange(0, dtype=np.intp)).descr.type_num + cpdef ensure_platform_int(object arr): # GH3033, GH1392 # platform int is the size of the int pointer, e.g. np.intp @@ -546,6 +552,7 @@ cpdef ensure_platform_int(object arr): else: return np.array(arr, dtype=np.intp) + cpdef ensure_object(object arr): if util.is_array(arr): if ( arr).descr.type_num == NPY_OBJECT: diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 5681d01c6bb25..5ffd3fc45460e 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -28,9 +28,10 @@ cdef double nan = NaN cdef inline float64_t median_linear(float64_t* a, int n) nogil: - cdef int i, j, na_count = 0 - cdef float64_t result - cdef float64_t* tmp + cdef: + int i, j, na_count = 0 + float64_t result + float64_t* tmp if n == 0: return NaN @@ -319,7 +320,7 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels, # If we move to the next group, reset # the fill_idx and counter - if i == N - 1 or labels[idx] != labels[sorted_labels[i+1]]: + if i == N - 1 or labels[idx] != labels[sorted_labels[i + 1]]: curr_fill_idx = -1 filled_vals = 0 diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx index a9775d3950187..a758be6557206 100644 --- a/pandas/_libs/hashing.pyx +++ b/pandas/_libs/hashing.pyx @@ -48,9 +48,8 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'): k = key.encode(encoding) kb = k if len(k) != 16: - raise ValueError( - 'key should be a 16-byte string encoded, got {!r} (len {})'.format( - k, len(k))) + raise ValueError("key should be a 16-byte string encoded, " + "got {key} (len {klen})".format(key=k, klen=len(k))) n = len(arr) @@ -70,8 +69,9 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'): data = str(val).encode(encoding) else: - raise TypeError("{} of type {} is not a valid type for hashing, " - "must be string or null".format(val, type(val))) + raise TypeError("{val} of type {typ} is not a valid type " + "for hashing, must be string or null" + .format(val=val, typ=type(val))) l = len(data) lens[i] = l @@ -134,9 +134,9 @@ cdef inline void _sipround(uint64_t* v0, uint64_t* v1, cpdef uint64_t siphash(bytes data, bytes key) except? 0: if len(key) != 16: - raise ValueError( - 'key should be a 16-byte bytestring, got {!r} (len {})'.format( - key, len(key))) + raise ValueError("key should be a 16-byte bytestring, " + "got {key} (len {klen})" + .format(key=key, klen=len(key))) return low_level_siphash(data, len(data), key) diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 7f4a2eeafeea2..550cabd5e3192 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -115,7 +115,8 @@ cdef class {{name}}Vector: if needs_resize(self.data): if self.external_view_exists: - raise ValueError("external reference but Vector.resize() needed") + raise ValueError("external reference but " + "Vector.resize() needed") self.resize() append_data_{{dtype}}(self.data, x) @@ -194,6 +195,7 @@ cdef class StringVector: for i in range(len(x)): self.append(x[i]) + cdef class ObjectVector: cdef: @@ -215,7 +217,8 @@ cdef class ObjectVector: cdef inline append(self, object o): if self.n == self.m: if self.external_view_exists: - raise ValueError("external reference but Vector.resize() needed") + raise ValueError("external reference but " + "Vector.resize() needed") self.m = max(self.m * 2, _INIT_VEC_CAP) self.ao.resize(self.m, refcheck=False) self.data = self.ao.data @@ -405,8 +408,9 @@ cdef class {{name}}HashTable(HashTable): if needs_resize(ud): with gil: if uniques.external_view_exists: - raise ValueError("external reference to uniques held, " - "but Vector.resize() needed") + raise ValueError("external reference to " + "uniques held, but " + "Vector.resize() needed") uniques.resize() append_data_{{dtype}}(ud, val) labels[i] = count @@ -742,8 +746,10 @@ cdef class StringHashTable(HashTable): return np.asarray(labels) + na_sentinel = object + cdef class PyObjectHashTable(HashTable): def __init__(self, size_hint=1): diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in index f5cd8d05650a8..45a69b613f698 100644 --- a/pandas/_libs/hashtable_func_helper.pxi.in +++ b/pandas/_libs/hashtable_func_helper.pxi.in @@ -273,7 +273,6 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values): {{endfor}} - #---------------------------------------------------------------------- # Mode Computations #---------------------------------------------------------------------- diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 97cc7f96cb24f..328a503493b8c 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -3,7 +3,6 @@ cimport cython from cython cimport Py_ssize_t -from cpython cimport PyObject from cpython.slice cimport PySlice_Check cdef extern from "Python.h": @@ -13,7 +12,7 @@ import numpy as np from numpy cimport int64_t cdef extern from "compat_helper.h": - cdef int slice_get_indices(PyObject* s, Py_ssize_t length, + cdef int slice_get_indices(object s, Py_ssize_t length, Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step, Py_ssize_t *slicelength) except -1 @@ -249,7 +248,7 @@ cpdef Py_ssize_t slice_len( if slc is None: raise TypeError("slc must be slice") - slice_get_indices(slc, objlen, + slice_get_indices(slc, objlen, &start, &stop, &step, &length) return length @@ -269,7 +268,7 @@ cpdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX): if slc is None: raise TypeError("slc should be a slice") - slice_get_indices(slc, objlen, + slice_get_indices(slc, objlen, &start, &stop, &step, &length) return start, stop, step, length diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index e05905ab63624..6e84bea98091b 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -22,7 +22,7 @@ PyDateTime_IMPORT import numpy as np cimport numpy as cnp -from numpy cimport (ndarray, PyArray_NDIM, PyArray_GETITEM, +from numpy cimport (ndarray, PyArray_GETITEM, PyArray_ITER_DATA, PyArray_ITER_NEXT, PyArray_IterNew, flatiter, NPY_OBJECT, int64_t, @@ -137,10 +137,10 @@ cpdef bint is_scalar(object val): or util.is_period_object(val) or is_decimal(val) or is_interval(val) - or is_offset(val)) + or util.is_offset_object(val)) -def item_from_zerodim(object val): +cpdef item_from_zerodim(object val): """ If the value is a zerodim array, return the item it contains. @@ -173,7 +173,7 @@ def item_from_zerodim(object val): @cython.boundscheck(False) def fast_unique_multiple(list arrays): cdef: - ndarray[object] buf + object[:] buf Py_ssize_t k = len(arrays) Py_ssize_t i, j, n list uniques = [] @@ -269,7 +269,7 @@ def fast_unique_multiple_list_gen(object gen, bint sort=True): def dicts_to_array(list dicts, list columns): cdef: Py_ssize_t i, j, k, n - ndarray[object, ndim=2] result + object[:, :] result dict row object col, onan = np.nan @@ -287,7 +287,7 @@ def dicts_to_array(list dicts, list columns): else: result[i, j] = onan - return result + return result.base # `.base` to access underlying np.ndarray def fast_zip(list ndarrays): @@ -296,7 +296,7 @@ def fast_zip(list ndarrays): """ cdef: Py_ssize_t i, j, k, n - ndarray[object] result + object[:] result flatiter it object val, tup @@ -329,10 +329,10 @@ def fast_zip(list ndarrays): Py_INCREF(val) PyArray_ITER_NEXT(it) - return result + return result.base # `.base` to access underlying np.ndarray -def get_reverse_indexer(ndarray[int64_t] indexer, Py_ssize_t length): +def get_reverse_indexer(int64_t[:] indexer, Py_ssize_t length): """ Reverse indexing operation. @@ -359,7 +359,7 @@ def get_reverse_indexer(ndarray[int64_t] indexer, Py_ssize_t length): return rev_indexer -def has_infs_f4(ndarray[float32_t] arr): +def has_infs_f4(float32_t[:] arr): cdef: Py_ssize_t i, n = len(arr) float32_t inf, neginf, val @@ -374,7 +374,7 @@ def has_infs_f4(ndarray[float32_t] arr): return False -def has_infs_f8(ndarray[float64_t] arr): +def has_infs_f8(float64_t[:] arr): cdef: Py_ssize_t i, n = len(arr) float64_t inf, neginf, val @@ -476,7 +476,7 @@ cpdef bint array_equivalent_object(object[:] left, object[:] right): return True -def astype_intsafe(ndarray[object] arr, new_dtype): +def astype_intsafe(object[:] arr, new_dtype): cdef: Py_ssize_t i, n = len(arr) object v @@ -545,8 +545,7 @@ def clean_index_list(list obj): # don't force numpy coerce with nan's inferred = infer_dtype(obj) - if inferred in ['string', 'bytes', 'unicode', - 'mixed', 'mixed-integer']: + if inferred in ['string', 'bytes', 'unicode', 'mixed', 'mixed-integer']: return np.asarray(obj, dtype=object), 0 elif inferred in ['integer']: @@ -567,7 +566,7 @@ def clean_index_list(list obj): # is a general, O(max(len(values), len(binner))) method. @cython.boundscheck(False) @cython.wraparound(False) -def generate_bins_dt64(ndarray[int64_t] values, ndarray[int64_t] binner, +def generate_bins_dt64(ndarray[int64_t] values, int64_t[:] binner, object closed='left', bint hasnans=0): """ Int64 (datetime64) version of generic python version in groupby.py @@ -634,7 +633,7 @@ def row_bool_subset(ndarray[float64_t, ndim=2] values, ndarray[uint8_t, cast=True] mask): cdef: Py_ssize_t i, j, n, k, pos = 0 - ndarray[float64_t, ndim=2] out + float64_t[:, :] out n, k = ( values).shape assert (n == len(mask)) @@ -647,7 +646,7 @@ def row_bool_subset(ndarray[float64_t, ndim=2] values, out[pos, j] = values[i, j] pos += 1 - return out + return out.base # `.base` to access underlying np.ndarray @cython.boundscheck(False) @@ -656,7 +655,7 @@ def row_bool_subset_object(ndarray[object, ndim=2] values, ndarray[uint8_t, cast=True] mask): cdef: Py_ssize_t i, j, n, k, pos = 0 - ndarray[object, ndim=2] out + object[:, :] out n, k = ( values).shape assert (n == len(mask)) @@ -669,13 +668,12 @@ def row_bool_subset_object(ndarray[object, ndim=2] values, out[pos, j] = values[i, j] pos += 1 - return out + return out.base # `.base` to access underlying np.ndarray @cython.boundscheck(False) @cython.wraparound(False) -def get_level_sorter(ndarray[int64_t, ndim=1] label, - ndarray[int64_t, ndim=1] starts): +def get_level_sorter(ndarray[int64_t, ndim=1] label, int64_t[:] starts): """ argsort for a single level of a multi-index, keeping the order of higher levels unchanged. `starts` points to starts of same-key indices w.r.t @@ -698,7 +696,7 @@ def get_level_sorter(ndarray[int64_t, ndim=1] label, @cython.boundscheck(False) @cython.wraparound(False) def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask, - ndarray[int64_t, ndim=1] labels, + int64_t[:] labels, Py_ssize_t max_bin, int axis): cdef: @@ -725,7 +723,7 @@ def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask, return counts -def generate_slices(ndarray[int64_t] labels, Py_ssize_t ngroups): +def generate_slices(int64_t[:] labels, Py_ssize_t ngroups): cdef: Py_ssize_t i, group_size, n, start int64_t lab @@ -754,7 +752,7 @@ def generate_slices(ndarray[int64_t] labels, Py_ssize_t ngroups): return starts, ends -def indices_fast(object index, ndarray[int64_t] labels, list keys, +def indices_fast(object index, int64_t[:] labels, list keys, list sorted_labels): cdef: Py_ssize_t i, j, k, lab, cur, start, n = len(labels) @@ -826,10 +824,6 @@ cpdef bint is_period(object val): return util.is_period_object(val) -cdef inline bint is_offset(object val): - return getattr(val, '_typ', '_typ') == 'dateoffset' - - _TYPE_MAP = { 'categorical': 'categorical', 'category': 'categorical', @@ -1231,7 +1225,7 @@ def infer_dtype(object value, bint skipna=False): if is_bytes_array(values, skipna=skipna): return 'bytes' - elif is_period(val): + elif util.is_period_object(val): if is_period_array(values): return 'period' @@ -1678,7 +1672,7 @@ cpdef bint is_time_array(ndarray values, bint skipna=False): cdef class PeriodValidator(TemporalValidator): cdef inline bint is_value_typed(self, object value) except -1: - return is_period(value) + return util.is_period_object(value) cdef inline bint is_valid_null(self, object value) except -1: return is_null_period(value) @@ -2068,8 +2062,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, return objects -def map_infer_mask(ndarray arr, object f, ndarray[uint8_t] mask, - bint convert=1): +def map_infer_mask(ndarray arr, object f, uint8_t[:] mask, bint convert=1): """ Substitute for np.vectorize with pandas-friendly dtype inference @@ -2095,11 +2088,8 @@ def map_infer_mask(ndarray arr, object f, ndarray[uint8_t] mask, else: val = f(util.get_value_at(arr, i)) - if util.is_array(val) and PyArray_NDIM(val) == 0: - # unbox 0-dim arrays, GH#690 - # TODO: is there a faster way to unbox? - # item_from_zerodim? - val = val.item() + # unbox if 0-dim array GH#690 + val = item_from_zerodim(val) result[i] = val @@ -2135,11 +2125,8 @@ def map_infer(ndarray arr, object f, bint convert=1): for i in range(n): val = f(util.get_value_at(arr, i)) - if util.is_array(val) and PyArray_NDIM(val) == 0: - # unbox 0-dim arrays, GH#690 - # TODO: is there a faster way to unbox? - # item_from_zerodim? - val = val.item() + # unbox if 0-dim array GH#690 + val = item_from_zerodim(val) result[i] = val @@ -2172,7 +2159,7 @@ def to_object_array(list rows, int min_width=0): """ cdef: Py_ssize_t i, j, n, k, tmp - ndarray[object, ndim=2] result + object[:, :] result list row n = len(rows) @@ -2191,13 +2178,13 @@ def to_object_array(list rows, int min_width=0): for j in range(len(row)): result[i, j] = row[j] - return result + return result.base # `.base` to access underlying np.ndarray -def tuples_to_object_array(ndarray[object] tuples): +def tuples_to_object_array(object[:] tuples): cdef: Py_ssize_t i, j, n, k, tmp - ndarray[object, ndim=2] result + object[:, :] result tuple tup n = len(tuples) @@ -2208,13 +2195,13 @@ def tuples_to_object_array(ndarray[object] tuples): for j in range(k): result[i, j] = tup[j] - return result + return result.base # `.base` to access underlying np.ndarray def to_object_array_tuples(list rows): cdef: Py_ssize_t i, j, n, k, tmp - ndarray[object, ndim=2] result + object[:, :] result tuple row n = len(rows) @@ -2239,7 +2226,7 @@ def to_object_array_tuples(list rows): for j in range(len(row)): result[i, j] = row[j] - return result + return result.base # `.base` to access underlying np.ndarray def fast_multiget(dict mapping, ndarray keys, default=np.nan): diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 3588ac14c87d1..59aca2a72a7ae 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -25,7 +25,7 @@ is_numpy_prior_1_6_2 = LooseVersion(np.__version__) < '1.6.2' cdef _get_result_array(object obj, Py_ssize_t size, Py_ssize_t cnt): if (util.is_array(obj) or - isinstance(obj, list) and len(obj) == cnt or + (isinstance(obj, list) and len(obj) == cnt) or getattr(obj, 'shape', None) == (cnt,)): raise ValueError('function does not reduce') diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 04e039a9fc2c9..9e2e0b2fddce1 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -62,12 +62,14 @@ cdef inline object create_datetime_from_ts( return datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz) + cdef inline object create_date_from_ts( int64_t value, npy_datetimestruct dts, object tz, object freq): """ convenience routine to construct a datetime.date from its parts """ return date(dts.year, dts.month, dts.day) + cdef inline object create_time_from_ts( int64_t value, npy_datetimestruct dts, object tz, object freq): @@ -351,8 +353,8 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): if ((fvalues < _NS_LOWER_BOUND).any() or (fvalues > _NS_UPPER_BOUND).any()): - raise OutOfBoundsDatetime( - "cannot convert input with unit '{0}'".format(unit)) + raise OutOfBoundsDatetime("cannot convert input with unit " + "'{unit}'".format(unit=unit)) result = (iresult * m).astype('M8[ns]') iresult = result.view('i8') iresult[mask] = iNaT @@ -378,8 +380,8 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): except OverflowError: if is_raise: raise OutOfBoundsDatetime( - "cannot convert input {0} with the unit " - "'{1}'".format(val, unit)) + "cannot convert input {val} with the unit " + "'{unit}'".format(val=val, unit=unit)) elif is_ignore: raise AssertionError iresult[i] = NPY_NAT @@ -394,16 +396,16 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): except ValueError: if is_raise: raise ValueError( - "non convertible value {0} with the unit " - "'{1}'".format(val, unit)) + "non convertible value {val} with the unit " + "'{unit}'".format(val=val, unit=unit)) elif is_ignore: raise AssertionError iresult[i] = NPY_NAT except: if is_raise: raise OutOfBoundsDatetime( - "cannot convert input {0} with the unit " - "'{1}'".format(val, unit)) + "cannot convert input {val} with the unit " + "'{unit}'".format(val=val, unit=unit)) elif is_ignore: raise AssertionError iresult[i] = NPY_NAT @@ -696,8 +698,8 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', if is_coerce: iresult[i] = NPY_NAT else: - raise TypeError("{0} is not convertible to datetime" - .format(type(val))) + raise TypeError("{typ} is not convertible to datetime" + .format(typ=type(val))) if seen_datetime and seen_integer: # we have mixed datetimes & integers diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd index 67353f3eec614..420d4ddd6eedd 100644 --- a/pandas/_libs/tslibs/timezones.pxd +++ b/pandas/_libs/tslibs/timezones.pxd @@ -11,7 +11,7 @@ cpdef bint tz_compare(object start, object end) cpdef object get_timezone(object tz) cpdef object maybe_get_tz(object tz) -cpdef get_utcoffset(tzinfo, obj) +cdef get_utcoffset(tzinfo, obj) cdef bint is_fixed_offset(object tz) cdef object get_dst_info(object tz) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index a787452d90c07..dadf44d3c289d 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -150,7 +150,7 @@ cdef inline object tz_cache_key(object tz): # UTC Offsets -cpdef get_utcoffset(tzinfo, obj): +cdef get_utcoffset(tzinfo, obj): try: return tzinfo._utcoffset except AttributeError: @@ -187,7 +187,7 @@ cdef object get_utc_trans_times_from_dateutil_tz(object tz): return new_trans -cpdef int64_t[:] unbox_utcoffsets(object transinfo): +cdef int64_t[:] unbox_utcoffsets(object transinfo): cdef: Py_ssize_t i, sz int64_t[:] arr diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index efc8a02014bc0..29bc06edc7ea6 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -609,12 +609,12 @@ def roll_mean(ndarray[double_t] input, int64_t win, int64_t minp, else: with nogil: - for i from 0 <= i < minp - 1: + for i in range(minp - 1): val = input[i] add_mean(val, &nobs, &sum_x, &neg_ct) output[i] = NaN - for i from minp - 1 <= i < N: + for i in range(minp - 1, N): val = input[i] add_mean(val, &nobs, &sum_x, &neg_ct) @@ -747,7 +747,7 @@ def roll_var(ndarray[double_t] input, int64_t win, int64_t minp, # Over the first window, observations can only be added, never # removed - for i from 0 <= i < win: + for i in range(win): add_var(input[i], &nobs, &mean_x, &ssqdm_x) output[i] = calc_var(minp, ddof, nobs, ssqdm_x) @@ -756,7 +756,7 @@ def roll_var(ndarray[double_t] input, int64_t win, int64_t minp, # After the first window, observations can both be added and # removed - for i from win <= i < N: + for i in range(win, N): val = input[i] prev = input[i - win] @@ -816,6 +816,7 @@ cdef inline double calc_skew(int64_t minp, int64_t nobs, double x, double xx, return result + cdef inline void add_skew(double val, int64_t *nobs, double *x, double *xx, double *xxx) nogil: """ add a value from the skew calc """ @@ -829,6 +830,7 @@ cdef inline void add_skew(double val, int64_t *nobs, double *x, double *xx, xx[0] = xx[0] + val * val xxx[0] = xxx[0] + val * val * val + cdef inline void remove_skew(double val, int64_t *nobs, double *x, double *xx, double *xxx) nogil: """ remove a value from the skew calc """ @@ -896,12 +898,12 @@ def roll_skew(ndarray[double_t] input, int64_t win, int64_t minp, else: with nogil: - for i from 0 <= i < minp - 1: + for i in range(minp - 1): val = input[i] add_skew(val, &nobs, &x, &xx, &xxx) output[i] = NaN - for i from minp - 1 <= i < N: + for i in range(minp - 1, N): val = input[i] add_skew(val, &nobs, &x, &xx, &xxx) @@ -951,6 +953,7 @@ cdef inline double calc_kurt(int64_t minp, int64_t nobs, double x, double xx, return result + cdef inline void add_kurt(double val, int64_t *nobs, double *x, double *xx, double *xxx, double *xxxx) nogil: """ add a value from the kurotic calc """ @@ -965,6 +968,7 @@ cdef inline void add_kurt(double val, int64_t *nobs, double *x, double *xx, xxx[0] = xxx[0] + val * val * val xxxx[0] = xxxx[0] + val * val * val * val + cdef inline void remove_kurt(double val, int64_t *nobs, double *x, double *xx, double *xxx, double *xxxx) nogil: """ remove a value from the kurotic calc """ @@ -1031,11 +1035,11 @@ def roll_kurt(ndarray[double_t] input, int64_t win, int64_t minp, with nogil: - for i from 0 <= i < minp - 1: + for i in range(minp - 1): add_kurt(input[i], &nobs, &x, &xx, &xxx, &xxxx) output[i] = NaN - for i from minp - 1 <= i < N: + for i in range(minp - 1, N): add_kurt(input[i], &nobs, &x, &xx, &xxx, &xxxx) if i > win - 1: @@ -1589,7 +1593,7 @@ def roll_generic(object obj, elif not raw: # series - for i from 0 <= i < N: + for i in range(N): if counts[i] >= minp: sl = slice(int_max(i + offset - win + 1, 0), int_min(i + offset + 1, N)) @@ -1652,7 +1656,7 @@ def roll_window(ndarray[float64_t, ndim=1, cast=True] input, minp = _check_minp(len(weights), minp, in_n) if avg: - for win_i from 0 <= win_i < win_n: + for win_i in range(win_n): val_win = weights[win_i] if val_win != val_win: continue @@ -1664,7 +1668,7 @@ def roll_window(ndarray[float64_t, ndim=1, cast=True] input, counts[in_i + (win_n - win_i) - 1] += 1 tot_wgt[in_i + (win_n - win_i) - 1] += val_win - for in_i from 0 <= in_i < in_n: + for in_i in range(in_n): c = counts[in_i] if c < minp: output[in_i] = NaN @@ -1676,7 +1680,7 @@ def roll_window(ndarray[float64_t, ndim=1, cast=True] input, output[in_i] /= tot_wgt[in_i] else: - for win_i from 0 <= win_i < win_n: + for win_i in range(win_n): val_win = weights[win_i] if val_win != val_win: continue @@ -1688,7 +1692,7 @@ def roll_window(ndarray[float64_t, ndim=1, cast=True] input, output[in_i + (win_n - win_i) - 1] += val_in * val_win counts[in_i + (win_n - win_i) - 1] += 1 - for in_i from 0 <= in_i < in_n: + for in_i in range(in_n): c = counts[in_i] if c < minp: output[in_i] = NaN @@ -1699,14 +1703,13 @@ def roll_window(ndarray[float64_t, ndim=1, cast=True] input, # Exponentially weighted moving average -def ewma(ndarray[double_t] input, double_t com, int adjust, int ignore_na, - int minp): +def ewma(double_t[:] vals, double_t com, int adjust, int ignore_na, int minp): """ Compute exponentially-weighted moving average using center-of-mass. Parameters ---------- - input : ndarray (float64 type) + vals : ndarray (float64 type) com : float64 adjust: int ignore_na: int @@ -1717,28 +1720,29 @@ def ewma(ndarray[double_t] input, double_t com, int adjust, int ignore_na, y : ndarray """ - cdef Py_ssize_t N = len(input) - cdef ndarray[double_t] output = np.empty(N, dtype=float) + cdef: + Py_ssize_t N = len(vals) + ndarray[double_t] output = np.empty(N, dtype=float) + double alpha, old_wt_factor, new_wt, weighted_avg, old_wt, cur + Py_ssize_t i, nobs + if N == 0: return output minp = max(minp, 1) - cdef double alpha, old_wt_factor, new_wt, weighted_avg, old_wt, cur - cdef Py_ssize_t i, nobs - alpha = 1. / (1. + com) old_wt_factor = 1. - alpha new_wt = 1. if adjust else alpha - weighted_avg = input[0] + weighted_avg = vals[0] is_observation = (weighted_avg == weighted_avg) nobs = int(is_observation) output[0] = weighted_avg if (nobs >= minp) else NaN old_wt = 1. - for i from 1 <= i < N: - cur = input[i] + for i in range(1, N): + cur = vals[i] is_observation = (cur == cur) nobs += int(is_observation) if weighted_avg == weighted_avg: @@ -1767,7 +1771,7 @@ def ewma(ndarray[double_t] input, double_t com, int adjust, int ignore_na, # Exponentially weighted moving covariance -def ewmcov(ndarray[double_t] input_x, ndarray[double_t] input_y, +def ewmcov(double_t[:] input_x, double_t[:] input_y, double_t com, int adjust, int ignore_na, int minp, int bias): """ Compute exponentially-weighted moving variance using center-of-mass. @@ -1787,20 +1791,23 @@ def ewmcov(ndarray[double_t] input_x, ndarray[double_t] input_y, y : ndarray """ - cdef Py_ssize_t N = len(input_x) + cdef: + Py_ssize_t N = len(input_x) + double alpha, old_wt_factor, new_wt, mean_x, mean_y, cov + double sum_wt, sum_wt2, old_wt, cur_x, cur_y, old_mean_x, old_mean_y + Py_ssize_t i, nobs + ndarray[double_t] output + if len(input_y) != N: raise ValueError("arrays are of different lengths " - "(%d and %d)" % (N, len(input_y))) - cdef ndarray[double_t] output = np.empty(N, dtype=float) + "({N} and {len_y})".format(N=N, len_y=len(input_y))) + + output = np.empty(N, dtype=float) if N == 0: return output minp = max(minp, 1) - cdef double alpha, old_wt_factor, new_wt, mean_x, mean_y, cov - cdef double sum_wt, sum_wt2, old_wt, cur_x, cur_y, old_mean_x, old_mean_y - cdef Py_ssize_t i, nobs - alpha = 1. / (1. + com) old_wt_factor = 1. - alpha new_wt = 1. if adjust else alpha @@ -1818,7 +1825,7 @@ def ewmcov(ndarray[double_t] input_x, ndarray[double_t] input_y, sum_wt2 = 1. old_wt = 1. - for i from 1 <= i < N: + for i in range(1, N): cur_x = input_x[i] cur_y = input_y[i] is_observation = ((cur_x == cur_x) and (cur_y == cur_y)) From b9676bfcdf87f4e9d3801195e3d098e818a837c7 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 13 Aug 2018 17:29:04 -0700 Subject: [PATCH 2/4] use np.asarray instead of .base --- pandas/_libs/lib.pyx | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 6e84bea98091b..ebf5541bdfb80 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -287,7 +287,7 @@ def dicts_to_array(list dicts, list columns): else: result[i, j] = onan - return result.base # `.base` to access underlying np.ndarray + return np.asarray(result) def fast_zip(list ndarrays): @@ -329,7 +329,7 @@ def fast_zip(list ndarrays): Py_INCREF(val) PyArray_ITER_NEXT(it) - return result.base # `.base` to access underlying np.ndarray + return np.asarray(result) def get_reverse_indexer(int64_t[:] indexer, Py_ssize_t length): @@ -646,7 +646,7 @@ def row_bool_subset(ndarray[float64_t, ndim=2] values, out[pos, j] = values[i, j] pos += 1 - return out.base # `.base` to access underlying np.ndarray + return np.asarray(out) @cython.boundscheck(False) @@ -668,7 +668,7 @@ def row_bool_subset_object(ndarray[object, ndim=2] values, out[pos, j] = values[i, j] pos += 1 - return out.base # `.base` to access underlying np.ndarray + return np.asarray(out) @cython.boundscheck(False) @@ -2178,7 +2178,7 @@ def to_object_array(list rows, int min_width=0): for j in range(len(row)): result[i, j] = row[j] - return result.base # `.base` to access underlying np.ndarray + return np.asarray(result) def tuples_to_object_array(object[:] tuples): @@ -2195,7 +2195,7 @@ def tuples_to_object_array(object[:] tuples): for j in range(k): result[i, j] = tup[j] - return result.base # `.base` to access underlying np.ndarray + return np.asarray(result) def to_object_array_tuples(list rows): @@ -2226,7 +2226,7 @@ def to_object_array_tuples(list rows): for j in range(len(row)): result[i, j] = row[j] - return result.base # `.base` to access underlying np.ndarray + return np.asarray(result) def fast_multiget(dict mapping, ndarray keys, default=np.nan): From c09f234f58438ab5d9cadb9a1d27bc84215de3f4 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 15 Aug 2018 11:43:58 -0700 Subject: [PATCH 3/4] revert to troubleshoot travis --- pandas/_libs/internals.pyx | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 328a503493b8c..97cc7f96cb24f 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -3,6 +3,7 @@ cimport cython from cython cimport Py_ssize_t +from cpython cimport PyObject from cpython.slice cimport PySlice_Check cdef extern from "Python.h": @@ -12,7 +13,7 @@ import numpy as np from numpy cimport int64_t cdef extern from "compat_helper.h": - cdef int slice_get_indices(object s, Py_ssize_t length, + cdef int slice_get_indices(PyObject* s, Py_ssize_t length, Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step, Py_ssize_t *slicelength) except -1 @@ -248,7 +249,7 @@ cpdef Py_ssize_t slice_len( if slc is None: raise TypeError("slc must be slice") - slice_get_indices(slc, objlen, + slice_get_indices(slc, objlen, &start, &stop, &step, &length) return length @@ -268,7 +269,7 @@ cpdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX): if slc is None: raise TypeError("slc should be a slice") - slice_get_indices(slc, objlen, + slice_get_indices(slc, objlen, &start, &stop, &step, &length) return start, stop, step, length From bcff47aed80012247d0047906b4396dc842f52c7 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 15 Aug 2018 13:18:47 -0700 Subject: [PATCH 4/4] Revert to troubleshoot travis --- pandas/_libs/lib.pyx | 85 +++++++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 36 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index ebf5541bdfb80..e05905ab63624 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -22,7 +22,7 @@ PyDateTime_IMPORT import numpy as np cimport numpy as cnp -from numpy cimport (ndarray, PyArray_GETITEM, +from numpy cimport (ndarray, PyArray_NDIM, PyArray_GETITEM, PyArray_ITER_DATA, PyArray_ITER_NEXT, PyArray_IterNew, flatiter, NPY_OBJECT, int64_t, @@ -137,10 +137,10 @@ cpdef bint is_scalar(object val): or util.is_period_object(val) or is_decimal(val) or is_interval(val) - or util.is_offset_object(val)) + or is_offset(val)) -cpdef item_from_zerodim(object val): +def item_from_zerodim(object val): """ If the value is a zerodim array, return the item it contains. @@ -173,7 +173,7 @@ cpdef item_from_zerodim(object val): @cython.boundscheck(False) def fast_unique_multiple(list arrays): cdef: - object[:] buf + ndarray[object] buf Py_ssize_t k = len(arrays) Py_ssize_t i, j, n list uniques = [] @@ -269,7 +269,7 @@ def fast_unique_multiple_list_gen(object gen, bint sort=True): def dicts_to_array(list dicts, list columns): cdef: Py_ssize_t i, j, k, n - object[:, :] result + ndarray[object, ndim=2] result dict row object col, onan = np.nan @@ -287,7 +287,7 @@ def dicts_to_array(list dicts, list columns): else: result[i, j] = onan - return np.asarray(result) + return result def fast_zip(list ndarrays): @@ -296,7 +296,7 @@ def fast_zip(list ndarrays): """ cdef: Py_ssize_t i, j, k, n - object[:] result + ndarray[object] result flatiter it object val, tup @@ -329,10 +329,10 @@ def fast_zip(list ndarrays): Py_INCREF(val) PyArray_ITER_NEXT(it) - return np.asarray(result) + return result -def get_reverse_indexer(int64_t[:] indexer, Py_ssize_t length): +def get_reverse_indexer(ndarray[int64_t] indexer, Py_ssize_t length): """ Reverse indexing operation. @@ -359,7 +359,7 @@ def get_reverse_indexer(int64_t[:] indexer, Py_ssize_t length): return rev_indexer -def has_infs_f4(float32_t[:] arr): +def has_infs_f4(ndarray[float32_t] arr): cdef: Py_ssize_t i, n = len(arr) float32_t inf, neginf, val @@ -374,7 +374,7 @@ def has_infs_f4(float32_t[:] arr): return False -def has_infs_f8(float64_t[:] arr): +def has_infs_f8(ndarray[float64_t] arr): cdef: Py_ssize_t i, n = len(arr) float64_t inf, neginf, val @@ -476,7 +476,7 @@ cpdef bint array_equivalent_object(object[:] left, object[:] right): return True -def astype_intsafe(object[:] arr, new_dtype): +def astype_intsafe(ndarray[object] arr, new_dtype): cdef: Py_ssize_t i, n = len(arr) object v @@ -545,7 +545,8 @@ def clean_index_list(list obj): # don't force numpy coerce with nan's inferred = infer_dtype(obj) - if inferred in ['string', 'bytes', 'unicode', 'mixed', 'mixed-integer']: + if inferred in ['string', 'bytes', 'unicode', + 'mixed', 'mixed-integer']: return np.asarray(obj, dtype=object), 0 elif inferred in ['integer']: @@ -566,7 +567,7 @@ def clean_index_list(list obj): # is a general, O(max(len(values), len(binner))) method. @cython.boundscheck(False) @cython.wraparound(False) -def generate_bins_dt64(ndarray[int64_t] values, int64_t[:] binner, +def generate_bins_dt64(ndarray[int64_t] values, ndarray[int64_t] binner, object closed='left', bint hasnans=0): """ Int64 (datetime64) version of generic python version in groupby.py @@ -633,7 +634,7 @@ def row_bool_subset(ndarray[float64_t, ndim=2] values, ndarray[uint8_t, cast=True] mask): cdef: Py_ssize_t i, j, n, k, pos = 0 - float64_t[:, :] out + ndarray[float64_t, ndim=2] out n, k = ( values).shape assert (n == len(mask)) @@ -646,7 +647,7 @@ def row_bool_subset(ndarray[float64_t, ndim=2] values, out[pos, j] = values[i, j] pos += 1 - return np.asarray(out) + return out @cython.boundscheck(False) @@ -655,7 +656,7 @@ def row_bool_subset_object(ndarray[object, ndim=2] values, ndarray[uint8_t, cast=True] mask): cdef: Py_ssize_t i, j, n, k, pos = 0 - object[:, :] out + ndarray[object, ndim=2] out n, k = ( values).shape assert (n == len(mask)) @@ -668,12 +669,13 @@ def row_bool_subset_object(ndarray[object, ndim=2] values, out[pos, j] = values[i, j] pos += 1 - return np.asarray(out) + return out @cython.boundscheck(False) @cython.wraparound(False) -def get_level_sorter(ndarray[int64_t, ndim=1] label, int64_t[:] starts): +def get_level_sorter(ndarray[int64_t, ndim=1] label, + ndarray[int64_t, ndim=1] starts): """ argsort for a single level of a multi-index, keeping the order of higher levels unchanged. `starts` points to starts of same-key indices w.r.t @@ -696,7 +698,7 @@ def get_level_sorter(ndarray[int64_t, ndim=1] label, int64_t[:] starts): @cython.boundscheck(False) @cython.wraparound(False) def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask, - int64_t[:] labels, + ndarray[int64_t, ndim=1] labels, Py_ssize_t max_bin, int axis): cdef: @@ -723,7 +725,7 @@ def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask, return counts -def generate_slices(int64_t[:] labels, Py_ssize_t ngroups): +def generate_slices(ndarray[int64_t] labels, Py_ssize_t ngroups): cdef: Py_ssize_t i, group_size, n, start int64_t lab @@ -752,7 +754,7 @@ def generate_slices(int64_t[:] labels, Py_ssize_t ngroups): return starts, ends -def indices_fast(object index, int64_t[:] labels, list keys, +def indices_fast(object index, ndarray[int64_t] labels, list keys, list sorted_labels): cdef: Py_ssize_t i, j, k, lab, cur, start, n = len(labels) @@ -824,6 +826,10 @@ cpdef bint is_period(object val): return util.is_period_object(val) +cdef inline bint is_offset(object val): + return getattr(val, '_typ', '_typ') == 'dateoffset' + + _TYPE_MAP = { 'categorical': 'categorical', 'category': 'categorical', @@ -1225,7 +1231,7 @@ def infer_dtype(object value, bint skipna=False): if is_bytes_array(values, skipna=skipna): return 'bytes' - elif util.is_period_object(val): + elif is_period(val): if is_period_array(values): return 'period' @@ -1672,7 +1678,7 @@ cpdef bint is_time_array(ndarray values, bint skipna=False): cdef class PeriodValidator(TemporalValidator): cdef inline bint is_value_typed(self, object value) except -1: - return util.is_period_object(value) + return is_period(value) cdef inline bint is_valid_null(self, object value) except -1: return is_null_period(value) @@ -2062,7 +2068,8 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, return objects -def map_infer_mask(ndarray arr, object f, uint8_t[:] mask, bint convert=1): +def map_infer_mask(ndarray arr, object f, ndarray[uint8_t] mask, + bint convert=1): """ Substitute for np.vectorize with pandas-friendly dtype inference @@ -2088,8 +2095,11 @@ def map_infer_mask(ndarray arr, object f, uint8_t[:] mask, bint convert=1): else: val = f(util.get_value_at(arr, i)) - # unbox if 0-dim array GH#690 - val = item_from_zerodim(val) + if util.is_array(val) and PyArray_NDIM(val) == 0: + # unbox 0-dim arrays, GH#690 + # TODO: is there a faster way to unbox? + # item_from_zerodim? + val = val.item() result[i] = val @@ -2125,8 +2135,11 @@ def map_infer(ndarray arr, object f, bint convert=1): for i in range(n): val = f(util.get_value_at(arr, i)) - # unbox if 0-dim array GH#690 - val = item_from_zerodim(val) + if util.is_array(val) and PyArray_NDIM(val) == 0: + # unbox 0-dim arrays, GH#690 + # TODO: is there a faster way to unbox? + # item_from_zerodim? + val = val.item() result[i] = val @@ -2159,7 +2172,7 @@ def to_object_array(list rows, int min_width=0): """ cdef: Py_ssize_t i, j, n, k, tmp - object[:, :] result + ndarray[object, ndim=2] result list row n = len(rows) @@ -2178,13 +2191,13 @@ def to_object_array(list rows, int min_width=0): for j in range(len(row)): result[i, j] = row[j] - return np.asarray(result) + return result -def tuples_to_object_array(object[:] tuples): +def tuples_to_object_array(ndarray[object] tuples): cdef: Py_ssize_t i, j, n, k, tmp - object[:, :] result + ndarray[object, ndim=2] result tuple tup n = len(tuples) @@ -2195,13 +2208,13 @@ def tuples_to_object_array(object[:] tuples): for j in range(k): result[i, j] = tup[j] - return np.asarray(result) + return result def to_object_array_tuples(list rows): cdef: Py_ssize_t i, j, n, k, tmp - object[:, :] result + ndarray[object, ndim=2] result tuple row n = len(rows) @@ -2226,7 +2239,7 @@ def to_object_array_tuples(list rows): for j in range(len(row)): result[i, j] = row[j] - return np.asarray(result) + return result def fast_multiget(dict mapping, ndarray keys, default=np.nan):