From 0410aed279933f8ea68033b82071542b76c8d402 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 10 Aug 2018 21:32:26 -0700 Subject: [PATCH 1/7] remove cython:profile annotations --- pandas/_libs/algos.pyx | 2 +- pandas/_libs/groupby.pyx | 1 - pandas/_libs/hashing.pyx | 2 +- pandas/_libs/hashtable.pyx | 2 +- pandas/_libs/index.pyx | 2 +- pandas/_libs/indexing.pyx | 2 +- pandas/_libs/join.pyx | 2 +- pandas/_libs/khash.pxd | 1 - pandas/_libs/missing.pxd | 1 - pandas/_libs/missing.pyx | 1 - pandas/_libs/reduction.pyx | 1 - pandas/_libs/reshape.pyx | 2 +- pandas/_libs/skiplist.pxd | 1 - pandas/_libs/tslib.pyx | 1 - pandas/_libs/tslibs/ccalendar.pxd | 1 - pandas/_libs/tslibs/ccalendar.pyx | 1 - pandas/_libs/tslibs/conversion.pxd | 1 - pandas/_libs/tslibs/conversion.pyx | 1 - pandas/_libs/tslibs/frequencies.pxd | 1 - pandas/_libs/tslibs/nattype.pxd | 1 - pandas/_libs/tslibs/nattype.pyx | 1 - pandas/_libs/tslibs/np_datetime.pxd | 1 - pandas/_libs/tslibs/np_datetime.pyx | 1 - pandas/_libs/tslibs/timedeltas.pxd | 1 - pandas/_libs/tslibs/timedeltas.pyx | 1 - pandas/_libs/tslibs/timestamps.pxd | 1 - pandas/_libs/tslibs/timestamps.pyx | 1 - pandas/_libs/tslibs/timezones.pxd | 1 - pandas/_libs/tslibs/timezones.pyx | 1 - pandas/_libs/window.pyx | 2 +- 30 files changed, 8 insertions(+), 30 deletions(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 124792638e3df..908bf59987527 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -1,4 +1,4 @@ -# cython: profile=False +# -*- coding: utf-8 -*- cimport cython from cython cimport Py_ssize_t diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 5681d01c6bb25..077ef925a8321 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False cimport cython from cython cimport Py_ssize_t diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx index a9775d3950187..65fdeb8e33efd 100644 --- a/pandas/_libs/hashing.pyx +++ b/pandas/_libs/hashing.pyx @@ -1,4 +1,4 @@ -# cython: profile=False +# -*- coding: utf-8 -*- # Translated from the reference implementation # at https://github.com/veorq/SipHash diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx index b9a72a0c8285f..2ced98198afc6 100644 --- a/pandas/_libs/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -1,4 +1,4 @@ -# cython: profile=False +# -*- coding: utf-8 -*- cimport cython diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 5918560cf1436..4965469d05276 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -1,4 +1,4 @@ -# cython: profile=False +# -*- coding: utf-8 -*- from datetime import datetime, timedelta, date cimport cython diff --git a/pandas/_libs/indexing.pyx b/pandas/_libs/indexing.pyx index c680706b7b2d2..8d7a440af8fea 100644 --- a/pandas/_libs/indexing.pyx +++ b/pandas/_libs/indexing.pyx @@ -1,4 +1,4 @@ -# cython: profile=False +# -*- coding: utf-8 -*- cdef class _NDFrameIndexerBase: """ diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index 27d2a639d13e6..ebb7bd40694ec 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -1,4 +1,4 @@ -# cython: profile=False +# -*- coding: utf-8 -*- cimport cython from cython cimport Py_ssize_t diff --git a/pandas/_libs/khash.pxd b/pandas/_libs/khash.pxd index 4c00e273b33b7..971a45e365586 100644 --- a/pandas/_libs/khash.pxd +++ b/pandas/_libs/khash.pxd @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False from cpython cimport PyObject from numpy cimport int64_t, uint64_t, int32_t, uint32_t, float64_t diff --git a/pandas/_libs/missing.pxd b/pandas/_libs/missing.pxd index b90975df8e247..2c1f13eeb5dff 100644 --- a/pandas/_libs/missing.pxd +++ b/pandas/_libs/missing.pxd @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False from tslibs.nattype cimport is_null_datetimelike diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index e9c3cf12eb328..c787cc61e8773 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False from cpython cimport PyFloat_Check, PyComplex_Check diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 3588ac14c87d1..2ccb58dd67014 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False from distutils.version import LooseVersion from cython cimport Py_ssize_t diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx index 4fd1fd0f37b1d..8d7e314517ed8 100644 --- a/pandas/_libs/reshape.pyx +++ b/pandas/_libs/reshape.pyx @@ -1,4 +1,4 @@ -# cython: profile=False +# -*- coding: utf-8 -*- cimport cython from cython cimport Py_ssize_t diff --git a/pandas/_libs/skiplist.pxd b/pandas/_libs/skiplist.pxd index 78f206962bcfc..a273d2c445d18 100644 --- a/pandas/_libs/skiplist.pxd +++ b/pandas/_libs/skiplist.pxd @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False from cython cimport Py_ssize_t diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 04e039a9fc2c9..7b938d0279a7c 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False from cython cimport Py_ssize_t from cpython cimport PyFloat_Check, PyUnicode_Check diff --git a/pandas/_libs/tslibs/ccalendar.pxd b/pandas/_libs/tslibs/ccalendar.pxd index 04fb6eaf49c84..08f539a70a7ed 100644 --- a/pandas/_libs/tslibs/ccalendar.pxd +++ b/pandas/_libs/tslibs/ccalendar.pxd @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False from cython cimport Py_ssize_t diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index 12d35f7ce2f58..ec54c023290b3 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False # cython: boundscheck=False """ Cython implementations of functions resembling the stdlib calendar module diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index 96e4676fe91c0..4eb93c35b4afc 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False from cpython.datetime cimport datetime, tzinfo diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 74a9823a85016..fe664cf03b0b9 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False cimport cython from cython cimport Py_ssize_t diff --git a/pandas/_libs/tslibs/frequencies.pxd b/pandas/_libs/tslibs/frequencies.pxd index 98d600c540ace..4e7949e55c836 100644 --- a/pandas/_libs/tslibs/frequencies.pxd +++ b/pandas/_libs/tslibs/frequencies.pxd @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False cpdef object get_rule_month(object source, object default=*) diff --git a/pandas/_libs/tslibs/nattype.pxd b/pandas/_libs/tslibs/nattype.pxd index 24ce797575b2a..382ac9d323918 100644 --- a/pandas/_libs/tslibs/nattype.pxd +++ b/pandas/_libs/tslibs/nattype.pxd @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False from numpy cimport int64_t cdef int64_t NPY_NAT diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 25b1572cfe52f..08d9128ff660c 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False from cpython cimport ( PyFloat_Check, PyComplex_Check, diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index c3d229d4e5006..803c8cb18e3d5 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False from cpython.datetime cimport date, datetime diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index a0099837e876a..f0aa6389fba56 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False from cpython cimport (Py_EQ, Py_NE, Py_GE, Py_GT, Py_LT, Py_LE, PyUnicode_Check, PyUnicode_AsASCIIString) diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd index ef9fd3207e5f0..eda4418902513 100644 --- a/pandas/_libs/tslibs/timedeltas.pxd +++ b/pandas/_libs/tslibs/timedeltas.pxd @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False from numpy cimport int64_t diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index c32ad2f4d599c..b84c1a753215a 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False import collections import textwrap import warnings diff --git a/pandas/_libs/tslibs/timestamps.pxd b/pandas/_libs/tslibs/timestamps.pxd index e9e484c715f9a..d6b649becc479 100644 --- a/pandas/_libs/tslibs/timestamps.pxd +++ b/pandas/_libs/tslibs/timestamps.pxd @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False from numpy cimport int64_t from np_datetime cimport npy_datetimestruct diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 67420fda8aa51..3ab1396c0fe38 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False import warnings from cpython cimport (PyObject_RichCompareBool, PyObject_RichCompare, diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd index 67353f3eec614..e8a10a0728212 100644 --- a/pandas/_libs/tslibs/timezones.pxd +++ b/pandas/_libs/tslibs/timezones.pxd @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False cdef bint is_utc(object tz) cdef bint is_tzlocal(object tz) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index a787452d90c07..4d87a37866c49 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False from cython cimport Py_ssize_t diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index efc8a02014bc0..c43750c754209 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -1,4 +1,4 @@ -# cython: profile=False +# -*- coding: utf-8 -*- # cython: boundscheck=False, wraparound=False, cdivision=True cimport cython From 7f77137391e61fb0e1d859ceb9db8f76d5afb6e0 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 10 Aug 2018 21:40:20 -0700 Subject: [PATCH 2/7] uncdef functions not used from within cython --- pandas/_libs/lib.pyx | 39 +++++++++++++++++++-------------------- pandas/_libs/writers.pyx | 11 +++++------ 2 files changed, 24 insertions(+), 26 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index e05905ab63624..efda595e89a2f 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -107,7 +107,7 @@ def memory_usage_of_objects(object[:] arr): # ---------------------------------------------------------------------- -cpdef bint is_scalar(object val): +def is_scalar(val: object) -> bint: """ Return True if given value is scalar. @@ -137,7 +137,7 @@ cpdef bint is_scalar(object val): or util.is_period_object(val) or is_decimal(val) or is_interval(val) - or is_offset(val)) + or util.is_offset_object(val)) def item_from_zerodim(object val): @@ -455,9 +455,10 @@ def maybe_booleans_to_slice(ndarray[uint8_t] mask): return slice(start, end) +# TODO: belongs in libmissing? @cython.wraparound(False) @cython.boundscheck(False) -cpdef bint array_equivalent_object(object[:] left, object[:] right): +def array_equivalent_object(left: object[:], right: object[:]) -> bint: """ perform an element by element comparion on 1-d object arrays taking into account nan positions """ cdef: @@ -499,7 +500,7 @@ def astype_intsafe(ndarray[object] arr, new_dtype): return result -cpdef ndarray[object] astype_unicode(ndarray arr): +def astype_unicode(arr: ndarray) -> ndarray[object]: cdef: Py_ssize_t i, n = arr.size ndarray[object] result = np.empty(n, dtype=object) @@ -512,7 +513,7 @@ cpdef ndarray[object] astype_unicode(ndarray arr): return result -cpdef ndarray[object] astype_str(ndarray arr): +def astype_str(arr: ndarray) -> ndarray[object]: cdef: Py_ssize_t i, n = arr.size ndarray[object] result = np.empty(n, dtype=object) @@ -797,19 +798,19 @@ def indices_fast(object index, ndarray[int64_t] labels, list keys, # core.common import for fast inference checks -cpdef bint is_float(object obj): +def is_float(obj: object) -> bint: return util.is_float_object(obj) -cpdef bint is_integer(object obj): +def is_integer(obj: object) -> bint: return util.is_integer_object(obj) -cpdef bint is_bool(object obj): +def is_bool(obj: object) -> bint: return util.is_bool_object(obj) -cpdef bint is_complex(object obj): +def is_complex(obj: object) -> bint: return util.is_complex_object(obj) @@ -821,15 +822,11 @@ cpdef bint is_interval(object obj): return getattr(obj, '_typ', '_typ') == 'interval' -cpdef bint is_period(object val): +def is_period(val: object) -> bint: """ Return a boolean if this is a Period object """ return util.is_period_object(val) -cdef inline bint is_offset(object val): - return getattr(val, '_typ', '_typ') == 'dateoffset' - - _TYPE_MAP = { 'categorical': 'categorical', 'category': 'categorical', @@ -1231,7 +1228,7 @@ def infer_dtype(object value, bint skipna=False): if is_bytes_array(values, skipna=skipna): return 'bytes' - elif is_period(val): + elif util.is_period_object(val): if is_period_array(values): return 'period' @@ -1249,7 +1246,7 @@ def infer_dtype(object value, bint skipna=False): return 'mixed' -cpdef object infer_datetimelike_array(object arr): +def infer_datetimelike_array(arr: object) -> object: """ infer if we have a datetime or timedelta array - date: we have *only* date and maybe strings, nulls @@ -1586,7 +1583,7 @@ cpdef bint is_datetime64_array(ndarray values): return validator.validate(values) -cpdef bint is_datetime_with_singletz_array(ndarray values): +def is_datetime_with_singletz_array(values: ndarray) -> bint: """ Check values have the same tzinfo attribute. Doesn't check values are datetime-like types. @@ -1622,7 +1619,8 @@ cdef class TimedeltaValidator(TemporalValidator): return is_null_timedelta64(value) -cpdef bint is_timedelta_array(ndarray values): +# TODO: Not used outside of tests; remove? +def is_timedelta_array(values: ndarray) -> bint: cdef: TimedeltaValidator validator = TimedeltaValidator(len(values), skipna=True) @@ -1634,7 +1632,8 @@ cdef class Timedelta64Validator(TimedeltaValidator): return util.is_timedelta64_object(value) -cpdef bint is_timedelta64_array(ndarray values): +# TODO: Not used outside of tests; remove? +def is_timedelta64_array(values: ndarray) -> bint: cdef: Timedelta64Validator validator = Timedelta64Validator(len(values), skipna=True) @@ -1678,7 +1677,7 @@ cpdef bint is_time_array(ndarray values, bint skipna=False): cdef class PeriodValidator(TemporalValidator): cdef inline bint is_value_typed(self, object value) except -1: - return is_period(value) + return util.is_period_object(value) cdef inline bint is_valid_null(self, object value) except -1: return is_null_period(value) diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx index 796f4b754857e..ceee7eb47c775 100644 --- a/pandas/_libs/writers.pyx +++ b/pandas/_libs/writers.pyx @@ -3,8 +3,7 @@ cimport cython from cython cimport Py_ssize_t -from cpython cimport (PyString_Check, PyBytes_Check, PyUnicode_Check, - PyBytes_GET_SIZE, PyUnicode_GET_SIZE) +from cpython cimport PyBytes_GET_SIZE, PyUnicode_GET_SIZE try: from cpython cimport PyString_GET_SIZE @@ -124,7 +123,7 @@ def convert_json_to_lines(object arr): # stata, pytables @cython.boundscheck(False) @cython.wraparound(False) -cpdef Py_ssize_t max_len_string_array(pandas_string[:] arr): +def max_len_string_array(arr: pandas_string[:]) -> Py_ssize_t: """ return the maximum size of elements in a 1-dim string array """ cdef: Py_ssize_t i, m = 0, l = 0, length = arr.shape[0] @@ -132,11 +131,11 @@ cpdef Py_ssize_t max_len_string_array(pandas_string[:] arr): for i in range(length): v = arr[i] - if PyString_Check(v): + if isinstance(v, str): l = PyString_GET_SIZE(v) - elif PyBytes_Check(v): + elif isinstance(v, bytes): l = PyBytes_GET_SIZE(v) - elif PyUnicode_Check(v): + elif isinstance(v, unicode): l = PyUnicode_GET_SIZE(v) if l > m: From b999b53d42a7bd485520a9c48f6610500aab9052 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 10 Aug 2018 21:50:49 -0700 Subject: [PATCH 3/7] un-cpdef functions that dont need to be --- pandas/_libs/algos_common_helper.pxi.in | 13 +++++++------ pandas/_libs/groupby.pyx | 1 + pandas/_libs/hashing.pyx | 1 + pandas/_libs/index.pyx | 2 +- pandas/_libs/internals.pyx | 4 ++-- pandas/_libs/interval.pyx | 4 ++-- pandas/_libs/lib.pyx | 1 - pandas/_libs/tslib.pyx | 2 +- 8 files changed, 15 insertions(+), 13 deletions(-) diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in index 97b7196da80bb..ca0fb66d2a1e1 100644 --- a/pandas/_libs/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -45,7 +45,7 @@ def get_dispatch(dtypes): @cython.wraparound(False) @cython.boundscheck(False) -cpdef map_indices_{{name}}(ndarray[{{c_type}}] index): +def map_indices_{{name}}(ndarray[{{c_type}}] index): """ Produce a dict mapping the values of the input array to their respective locations. @@ -55,8 +55,9 @@ cpdef map_indices_{{name}}(ndarray[{{c_type}}] index): Better to do this with Cython because of the enormous speed boost. """ - cdef Py_ssize_t i, length - cdef dict result = {} + cdef: + Py_ssize_t i, length + dict result = {} length = len(index) @@ -535,7 +536,7 @@ def put2d_{{name}}_{{dest_type}}(ndarray[{{c_type}}, ndim=2, cast=True] values, cdef int PLATFORM_INT = ( np.arange(0, dtype=np.intp)).descr.type_num -cpdef ensure_platform_int(object arr): +def ensure_platform_int(object arr): # GH3033, GH1392 # platform int is the size of the int pointer, e.g. np.intp if util.is_array(arr): @@ -546,7 +547,7 @@ cpdef ensure_platform_int(object arr): else: return np.array(arr, dtype=np.intp) -cpdef ensure_object(object arr): +def ensure_object(object arr): if util.is_array(arr): if ( arr).descr.type_num == NPY_OBJECT: return arr @@ -579,7 +580,7 @@ def get_dispatch(dtypes): {{for name, c_type, dtype in get_dispatch(dtypes)}} -cpdef ensure_{{name}}(object arr, copy=True): +def ensure_{{name}}(object arr, copy=True): if util.is_array(arr): if ( arr).descr.type_num == NPY_{{c_type}}: return arr diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 077ef925a8321..b13b1bf877c9d 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -66,6 +66,7 @@ cdef inline float64_t median_linear(float64_t* a, int n) nogil: return result +# TODO: Is this redundant with algos.kth_smallest? cdef inline float64_t kth_smallest_c(float64_t* a, Py_ssize_t k, Py_ssize_t n) nogil: diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx index 65fdeb8e33efd..e5efc5fb82e20 100644 --- a/pandas/_libs/hashing.pyx +++ b/pandas/_libs/hashing.pyx @@ -132,6 +132,7 @@ cdef inline void _sipround(uint64_t* v0, uint64_t* v1, v2[0] = _rotl(v2[0], 32) +# TODO: This appears unused; remove? cpdef uint64_t siphash(bytes data, bytes key) except? 0: if len(key) != 16: raise ValueError( diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 4965469d05276..92c14cb8880f0 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -49,7 +49,7 @@ cpdef get_value_at(ndarray arr, object loc, object tz=None): return util.get_value_at(arr, loc) -cpdef object get_value_box(ndarray arr, object loc): +def get_value_box(arr: ndarray, loc: object) -> object: cdef: Py_ssize_t i, sz diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 97cc7f96cb24f..996570dae3302 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -184,7 +184,7 @@ cdef class BlockPlacement: return self._as_slice -cpdef slice_canonize(slice s): +cdef slice_canonize(slice s): """ Convert slice to canonical bounded form. """ @@ -255,7 +255,7 @@ cpdef Py_ssize_t slice_len( return length -cpdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX): +cdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX): """ Get (start, stop, step, length) tuple for a slice. diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 22153b58cc49b..96a24fc4f3ee7 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -361,8 +361,8 @@ cdef class Interval(IntervalMixin): @cython.wraparound(False) @cython.boundscheck(False) -cpdef intervals_to_interval_bounds(ndarray intervals, - bint validate_closed=True): +def intervals_to_interval_bounds(ndarray intervals, + bint validate_closed=True): """ Parameters ---------- diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index efda595e89a2f..14c0895f5143f 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -455,7 +455,6 @@ def maybe_booleans_to_slice(ndarray[uint8_t] mask): return slice(start, end) -# TODO: belongs in libmissing? @cython.wraparound(False) @cython.boundscheck(False) def array_equivalent_object(left: object[:], right: object[:]) -> bint: diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 7b938d0279a7c..76989337dce0c 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -299,7 +299,7 @@ def format_array_from_datetime(ndarray[int64_t] values, object tz=None, return result -cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): +def array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): """ convert the ndarray according to the unit if errors: From 38cd31f9835f0b7c2e7cb1e3677648172b6d4b1a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 11 Aug 2018 09:13:50 -0700 Subject: [PATCH 4/7] change fickle syntax --- pandas/_libs/writers.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx index ceee7eb47c775..8e55ffad8d231 100644 --- a/pandas/_libs/writers.pyx +++ b/pandas/_libs/writers.pyx @@ -123,7 +123,7 @@ def convert_json_to_lines(object arr): # stata, pytables @cython.boundscheck(False) @cython.wraparound(False) -def max_len_string_array(arr: pandas_string[:]) -> Py_ssize_t: +def max_len_string_array(pandas_string[:] arr) -> Py_ssize_t: """ return the maximum size of elements in a 1-dim string array """ cdef: Py_ssize_t i, m = 0, l = 0, length = arr.shape[0] From 74df6b9d2094ec4a7739ce8339b70d9693e84554 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 11 Aug 2018 09:25:16 -0700 Subject: [PATCH 5/7] modernize string formatting --- pandas/_libs/parsers.pyx | 41 +++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index fba7f210b34a1..c63860418590d 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -29,7 +29,7 @@ cdef extern from "Python.h": import numpy as np cimport numpy as cnp -from numpy cimport ndarray, uint8_t, uint64_t, int64_t +from numpy cimport ndarray, uint8_t, uint64_t, int64_t, float64_t cnp.import_array() from util cimport UINT64_MAX, INT64_MAX, INT64_MIN @@ -694,7 +694,7 @@ cdef class TextReader: if ptr == NULL: if not os.path.exists(source): raise compat.FileNotFoundError( - 'File %s does not exist' % source) + 'File {source} does not exist'.format(source=source)) raise IOError('Initializing from file failed') self.parser.source = ptr @@ -772,9 +772,10 @@ cdef class TextReader: if name == '': if self.has_mi_columns: - name = 'Unnamed: %d_level_%d' % (i, level) + name = ('Unnamed: {i}_level_{lvl}' + .format(i=i, lvl=level)) else: - name = 'Unnamed: %d' % i + name = 'Unnamed: {i}'.format(i=i) unnamed_count += 1 count = counts.get(name, 0) @@ -849,8 +850,8 @@ cdef class TextReader: # 'data has %d fields' # % (passed_count, field_count)) - if self.has_usecols and self.allow_leading_cols and \ - not callable(self.usecols): + if s(elf.has_usecols and self.allow_leading_cols and + not callable(self.usecols)): nuse = len(self.usecols) if nuse == passed_count: self.leading_cols = 0 @@ -1027,8 +1028,10 @@ cdef class TextReader: if self.table_width - self.leading_cols > num_cols: raise ParserError( - "Too many columns specified: expected %s and found %s" % - (self.table_width - self.leading_cols, num_cols)) + "Too many columns specified: expected {expected} and " + "found {found}" + .format(expected=self.table_width - self.leading_cols, + found=num_cols)) results = {} nused = 0 @@ -1036,8 +1039,8 @@ cdef class TextReader: if i < self.leading_cols: # Pass through leading columns always name = i - elif self.usecols and not callable(self.usecols) and \ - nused == len(self.usecols): + elif (self.usecols and not callable(self.usecols) and + nused == len(self.usecols)): # Once we've gathered all requested columns, stop. GH5766 break else: @@ -1103,7 +1106,7 @@ cdef class TextReader: col_res = _maybe_upcast(col_res) if col_res is None: - raise ParserError('Unable to parse column %d' % i) + raise ParserError('Unable to parse column {i}'.format(i=i)) results[i] = col_res @@ -1222,8 +1225,8 @@ cdef class TextReader: elif dtype.kind == 'U': width = dtype.itemsize if width > 0: - raise TypeError("the dtype %s is not " - "supported for parsing" % dtype) + raise TypeError("the dtype {dtype} is not " + "supported for parsing".format(dtype=dtype)) # unicode variable width return self._string_convert(i, start, end, na_filter, @@ -1241,12 +1244,12 @@ cdef class TextReader: return self._string_convert(i, start, end, na_filter, na_hashset) elif is_datetime64_dtype(dtype): - raise TypeError("the dtype %s is not supported " + raise TypeError("the dtype {dtype} is not supported " "for parsing, pass this column " - "using parse_dates instead" % dtype) + "using parse_dates instead".format(dtype=dtype)) else: - raise TypeError("the dtype %s is not " - "supported for parsing" % dtype) + raise TypeError("the dtype {dtype} is not " + "supported for parsing".format(dtype=dtype)) cdef _string_convert(self, Py_ssize_t i, int64_t start, int64_t end, bint na_filter, kh_str_t *na_hashset): @@ -2058,7 +2061,7 @@ cdef kh_float64_t* kset_float64_from_list(values) except NULL: khiter_t k kh_float64_t *table int ret = 0 - cnp.float64_t val + float64_t val object value table = kh_init_float64() @@ -2101,7 +2104,7 @@ cdef raise_parser_error(object base, parser_t *parser): Py_XDECREF(type) raise old_exc - message = '%s. C error: ' % base + message = '{base}. C error: '.format(base=base) if parser.error_msg != NULL: if PY3: message += parser.error_msg.decode('utf-8') From e0a19adcff68a1398f477dc9060e20409c9fe856 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 11 Aug 2018 11:35:15 -0700 Subject: [PATCH 6/7] typo fixup --- pandas/_libs/parsers.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index c63860418590d..91faed678192f 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -850,7 +850,7 @@ cdef class TextReader: # 'data has %d fields' # % (passed_count, field_count)) - if s(elf.has_usecols and self.allow_leading_cols and + if (self.has_usecols and self.allow_leading_cols and not callable(self.usecols)): nuse = len(self.usecols) if nuse == passed_count: From 5dfb6480b98c38221660a4d78a8a5a429f6c4870 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 11 Aug 2018 14:13:47 -0700 Subject: [PATCH 7/7] flake8 fixup --- pandas/_libs/interval.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 96a24fc4f3ee7..e018ede7e3150 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -414,4 +414,5 @@ def intervals_to_interval_bounds(ndarray intervals, return left, right, closed + include "intervaltree.pxi"