Skip to content

[CLN] More Misc Cleanups in _libs #22287

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Aug 20, 2018
7 changes: 5 additions & 2 deletions pandas/_libs/algos.pxd
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
from util cimport numeric
from numpy cimport float64_t, double_t


cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil


cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil:
cdef numeric t
cdef:
numeric t

# cython doesn't allow pointer dereference so use array syntax
t = a[0]
a[0] = b[0]
b[0] = t
return 0


cdef enum TiebreakEnumType:
TIEBREAK_AVERAGE
TIEBREAK_MIN,
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ tiebreakers = {
}


cdef inline are_diff(object left, object right):
cdef inline bint are_diff(object left, object right):
try:
return fabs(left - right) > FP_ERR
except TypeError:
Expand Down
59 changes: 33 additions & 26 deletions pandas/_libs/algos_common_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,12 @@ cpdef map_indices_{{name}}(ndarray[{{c_type}}] index):

@cython.boundscheck(False)
@cython.wraparound(False)
def pad_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new,
limit=None):
cdef Py_ssize_t i, j, nleft, nright
cdef ndarray[int64_t, ndim=1] indexer
cdef {{c_type}} cur, next
cdef int lim, fill_count = 0
def pad_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new, limit=None):
cdef:
Py_ssize_t i, j, nleft, nright
ndarray[int64_t, ndim=1] indexer
{{c_type}} cur, next
int lim, fill_count = 0

nleft = len(old)
nright = len(new)
Expand Down Expand Up @@ -135,9 +135,10 @@ def pad_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new,
def pad_inplace_{{name}}(ndarray[{{c_type}}] values,
ndarray[uint8_t, cast=True] mask,
limit=None):
cdef Py_ssize_t i, N
cdef {{c_type}} val
cdef int lim, fill_count = 0
cdef:
Py_ssize_t i, N
{{c_type}} val
int lim, fill_count = 0

N = len(values)

Expand Down Expand Up @@ -171,9 +172,10 @@ def pad_inplace_{{name}}(ndarray[{{c_type}}] values,
def pad_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values,
ndarray[uint8_t, ndim=2] mask,
limit=None):
cdef Py_ssize_t i, j, N, K
cdef {{c_type}} val
cdef int lim, fill_count = 0
cdef:
Py_ssize_t i, j, N, K
{{c_type}} val
int lim, fill_count = 0

K, N = (<object> values).shape

Expand Down Expand Up @@ -233,10 +235,11 @@ D
@cython.wraparound(False)
def backfill_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new,
limit=None):
cdef Py_ssize_t i, j, nleft, nright
cdef ndarray[int64_t, ndim=1] indexer
cdef {{c_type}} cur, prev
cdef int lim, fill_count = 0
cdef:
Py_ssize_t i, j, nleft, nright
ndarray[int64_t, ndim=1] indexer
{{c_type}} cur, prev
int lim, fill_count = 0

nleft = len(old)
nright = len(new)
Expand Down Expand Up @@ -299,9 +302,10 @@ def backfill_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new,
def backfill_inplace_{{name}}(ndarray[{{c_type}}] values,
ndarray[uint8_t, cast=True] mask,
limit=None):
cdef Py_ssize_t i, N
cdef {{c_type}} val
cdef int lim, fill_count = 0
cdef:
Py_ssize_t i, N
{{c_type}} val
int lim, fill_count = 0

N = len(values)

Expand Down Expand Up @@ -335,9 +339,10 @@ def backfill_inplace_{{name}}(ndarray[{{c_type}}] values,
def backfill_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values,
ndarray[uint8_t, ndim=2] mask,
limit=None):
cdef Py_ssize_t i, j, N, K
cdef {{c_type}} val
cdef int lim, fill_count = 0
cdef:
Py_ssize_t i, j, N, K
{{c_type}} val
int lim, fill_count = 0

K, N = (<object> values).shape

Expand Down Expand Up @@ -428,10 +433,10 @@ def is_monotonic_{{name}}(ndarray[{{c_type}}] arr, bint timelike):
@cython.wraparound(False)
@cython.boundscheck(False)
def arrmap_{{name}}(ndarray[{{c_type}}] index, object func):
cdef Py_ssize_t length = index.shape[0]
cdef Py_ssize_t i = 0

cdef ndarray[object] result = np.empty(length, dtype=np.object_)
cdef:
Py_ssize_t length = index.shape[0]
Py_ssize_t i = 0
ndarray[object] result = np.empty(length, dtype=np.object_)

from pandas._libs.lib import maybe_convert_objects

Expand Down Expand Up @@ -535,6 +540,7 @@ def put2d_{{name}}_{{dest_type}}(ndarray[{{c_type}}, ndim=2, cast=True] values,

cdef int PLATFORM_INT = (<ndarray> np.arange(0, dtype=np.intp)).descr.type_num


cpdef ensure_platform_int(object arr):
# GH3033, GH1392
# platform int is the size of the int pointer, e.g. np.intp
Expand All @@ -546,6 +552,7 @@ cpdef ensure_platform_int(object arr):
else:
return np.array(arr, dtype=np.intp)


cpdef ensure_object(object arr):
if util.is_array(arr):
if (<ndarray> arr).descr.type_num == NPY_OBJECT:
Expand Down
9 changes: 5 additions & 4 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@ cdef double nan = NaN


cdef inline float64_t median_linear(float64_t* a, int n) nogil:
cdef int i, j, na_count = 0
cdef float64_t result
cdef float64_t* tmp
cdef:
int i, j, na_count = 0
float64_t result
float64_t* tmp

if n == 0:
return NaN
Expand Down Expand Up @@ -319,7 +320,7 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,

# If we move to the next group, reset
# the fill_idx and counter
if i == N - 1 or labels[idx] != labels[sorted_labels[i+1]]:
if i == N - 1 or labels[idx] != labels[sorted_labels[i + 1]]:
curr_fill_idx = -1
filled_vals = 0

Expand Down
16 changes: 8 additions & 8 deletions pandas/_libs/hashing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,8 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
k = <bytes>key.encode(encoding)
kb = <uint8_t *>k
if len(k) != 16:
raise ValueError(
'key should be a 16-byte string encoded, got {!r} (len {})'.format(
k, len(k)))
raise ValueError("key should be a 16-byte string encoded, "
"got {key} (len {klen})".format(key=k, klen=len(k)))

n = len(arr)

Expand All @@ -70,8 +69,9 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
data = <bytes>str(val).encode(encoding)

else:
raise TypeError("{} of type {} is not a valid type for hashing, "
"must be string or null".format(val, type(val)))
raise TypeError("{val} of type {typ} is not a valid type "
"for hashing, must be string or null"
.format(val=val, typ=type(val)))

l = len(data)
lens[i] = l
Expand Down Expand Up @@ -134,9 +134,9 @@ cdef inline void _sipround(uint64_t* v0, uint64_t* v1,

cpdef uint64_t siphash(bytes data, bytes key) except? 0:
if len(key) != 16:
raise ValueError(
'key should be a 16-byte bytestring, got {!r} (len {})'.format(
key, len(key)))
raise ValueError("key should be a 16-byte bytestring, "
"got {key} (len {klen})"
.format(key=key, klen=len(key)))
return low_level_siphash(data, len(data), key)


Expand Down
14 changes: 10 additions & 4 deletions pandas/_libs/hashtable_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,8 @@ cdef class {{name}}Vector:

if needs_resize(self.data):
if self.external_view_exists:
raise ValueError("external reference but Vector.resize() needed")
raise ValueError("external reference but "
"Vector.resize() needed")
self.resize()

append_data_{{dtype}}(self.data, x)
Expand Down Expand Up @@ -194,6 +195,7 @@ cdef class StringVector:
for i in range(len(x)):
self.append(x[i])


cdef class ObjectVector:

cdef:
Expand All @@ -215,7 +217,8 @@ cdef class ObjectVector:
cdef inline append(self, object o):
if self.n == self.m:
if self.external_view_exists:
raise ValueError("external reference but Vector.resize() needed")
raise ValueError("external reference but "
"Vector.resize() needed")
self.m = max(self.m * 2, _INIT_VEC_CAP)
self.ao.resize(self.m, refcheck=False)
self.data = <PyObject**> self.ao.data
Expand Down Expand Up @@ -405,8 +408,9 @@ cdef class {{name}}HashTable(HashTable):
if needs_resize(ud):
with gil:
if uniques.external_view_exists:
raise ValueError("external reference to uniques held, "
"but Vector.resize() needed")
raise ValueError("external reference to "
"uniques held, but "
"Vector.resize() needed")
uniques.resize()
append_data_{{dtype}}(ud, val)
labels[i] = count
Expand Down Expand Up @@ -742,8 +746,10 @@ cdef class StringHashTable(HashTable):

return np.asarray(labels)


na_sentinel = object


cdef class PyObjectHashTable(HashTable):

def __init__(self, size_hint=1):
Expand Down
1 change: 0 additions & 1 deletion pandas/_libs/hashtable_func_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,6 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values):
{{endfor}}



#----------------------------------------------------------------------
# Mode Computations
#----------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/reduction.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ is_numpy_prior_1_6_2 = LooseVersion(np.__version__) < '1.6.2'
cdef _get_result_array(object obj, Py_ssize_t size, Py_ssize_t cnt):

if (util.is_array(obj) or
isinstance(obj, list) and len(obj) == cnt or
(isinstance(obj, list) and len(obj) == cnt) or
getattr(obj, 'shape', None) == (cnt,)):
raise ValueError('function does not reduce')

Expand Down
22 changes: 12 additions & 10 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,14 @@ cdef inline object create_datetime_from_ts(
return datetime(dts.year, dts.month, dts.day, dts.hour,
dts.min, dts.sec, dts.us, tz)


cdef inline object create_date_from_ts(
int64_t value, npy_datetimestruct dts,
object tz, object freq):
""" convenience routine to construct a datetime.date from its parts """
return date(dts.year, dts.month, dts.day)


cdef inline object create_time_from_ts(
int64_t value, npy_datetimestruct dts,
object tz, object freq):
Expand Down Expand Up @@ -351,8 +353,8 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):

if ((fvalues < _NS_LOWER_BOUND).any()
or (fvalues > _NS_UPPER_BOUND).any()):
raise OutOfBoundsDatetime(
"cannot convert input with unit '{0}'".format(unit))
raise OutOfBoundsDatetime("cannot convert input with unit "
"'{unit}'".format(unit=unit))
result = (iresult * m).astype('M8[ns]')
iresult = result.view('i8')
iresult[mask] = iNaT
Expand All @@ -378,8 +380,8 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
except OverflowError:
if is_raise:
raise OutOfBoundsDatetime(
"cannot convert input {0} with the unit "
"'{1}'".format(val, unit))
"cannot convert input {val} with the unit "
"'{unit}'".format(val=val, unit=unit))
elif is_ignore:
raise AssertionError
iresult[i] = NPY_NAT
Expand All @@ -394,16 +396,16 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
except ValueError:
if is_raise:
raise ValueError(
"non convertible value {0} with the unit "
"'{1}'".format(val, unit))
"non convertible value {val} with the unit "
"'{unit}'".format(val=val, unit=unit))
elif is_ignore:
raise AssertionError
iresult[i] = NPY_NAT
except:
if is_raise:
raise OutOfBoundsDatetime(
"cannot convert input {0} with the unit "
"'{1}'".format(val, unit))
"cannot convert input {val} with the unit "
"'{unit}'".format(val=val, unit=unit))
elif is_ignore:
raise AssertionError
iresult[i] = NPY_NAT
Expand Down Expand Up @@ -696,8 +698,8 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
if is_coerce:
iresult[i] = NPY_NAT
else:
raise TypeError("{0} is not convertible to datetime"
.format(type(val)))
raise TypeError("{typ} is not convertible to datetime"
.format(typ=type(val)))

if seen_datetime and seen_integer:
# we have mixed datetimes & integers
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/timezones.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ cpdef bint tz_compare(object start, object end)
cpdef object get_timezone(object tz)
cpdef object maybe_get_tz(object tz)

cpdef get_utcoffset(tzinfo, obj)
cdef get_utcoffset(tzinfo, obj)
cdef bint is_fixed_offset(object tz)

cdef object get_dst_info(object tz)
4 changes: 2 additions & 2 deletions pandas/_libs/tslibs/timezones.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ cdef inline object tz_cache_key(object tz):
# UTC Offsets


cpdef get_utcoffset(tzinfo, obj):
cdef get_utcoffset(tzinfo, obj):
try:
return tzinfo._utcoffset
except AttributeError:
Expand Down Expand Up @@ -187,7 +187,7 @@ cdef object get_utc_trans_times_from_dateutil_tz(object tz):
return new_trans


cpdef int64_t[:] unbox_utcoffsets(object transinfo):
cdef int64_t[:] unbox_utcoffsets(object transinfo):
cdef:
Py_ssize_t i, sz
int64_t[:] arr
Expand Down
Loading