Skip to content

Commit 906cd50

Browse files
committed
Re-add kwargs to method signature
1 parent 4ed354a commit 906cd50

File tree

4 files changed

+43
-52
lines changed

4 files changed

+43
-52
lines changed

pandas/_libs/hashtable.pyx

+2-1
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ cdef class Int64Factorizer:
124124
uniques.extend(self.uniques.to_array())
125125
self.uniques = uniques
126126
labels = self.table.get_labels(values, self.uniques,
127-
self.count, na_sentinel, na_value)
127+
self.count, na_sentinel,
128+
na_value=na_value)
128129

129130
# sort on
130131
if sort:

pandas/_libs/hashtable_class_helper.pxi.in

+32-38
Original file line numberDiff line numberDiff line change
@@ -355,15 +355,15 @@ cdef class {{name}}HashTable(HashTable):
355355

356356
return np.asarray(locs)
357357

358-
def unique(self, const {{dtype}}_t[:] values, bint return_inverse):
358+
def unique(self, const {{dtype}}_t[:] values, bint return_inverse=False):
359359
"""
360360
Calculate unique values and labels (no sorting!)
361361

362362
Parameters
363363
----------
364364
values : ndarray[{{dtype}}]
365365
Array of values of which unique will be calculated
366-
return_inverse : boolean
366+
return_inverse : boolean, default False
367367
Whether the mapping of the original array values to their location
368368
in the vector of uniques should be returned.
369369

@@ -376,14 +376,12 @@ cdef class {{name}}HashTable(HashTable):
376376
"""
377377
uniques = {{name}}Vector()
378378
# explicitly compile path without inverse for performance
379-
# the last three arguments are not relevant for this method, but we
380-
# don't use kwargs to avoid cython perf hit (just using default values)
381379
if return_inverse:
382-
return self._unique_with_inverse(values, uniques, 0, -1, None)
383-
return self._unique_no_inverse(values, uniques, 0, -1, None)
380+
return self._unique_with_inverse(values, uniques)
381+
return self._unique_no_inverse(values, uniques)
384382

385-
def factorize(self, const {{dtype}}_t[:] values, Py_ssize_t na_sentinel,
386-
object na_value):
383+
def factorize(self, const {{dtype}}_t[:] values, Py_ssize_t na_sentinel=-1,
384+
object na_value=None):
387385
"""
388386
Calculate unique values and labels (no sorting!)
389387

@@ -427,8 +425,8 @@ unique_funcs = [('_unique_no_inverse', False, False),
427425
@cython.boundscheck(False)
428426
@cython.wraparound(False)
429427
def {{func_name}}(self, const {{dtype}}_t[:] values,
430-
{{name}}Vector uniques, Py_ssize_t count_prior,
431-
Py_ssize_t na_sentinel, object na_value):
428+
{{name}}Vector uniques, Py_ssize_t count_prior=0,
429+
Py_ssize_t na_sentinel=-1, object na_value=None):
432430
"""
433431
Calculate unique values and labels (no sorting!)
434432
{{if func_name == '_factorize' or func_name == 'get_labels'}}
@@ -443,11 +441,11 @@ unique_funcs = [('_unique_no_inverse', False, False),
443441
Array of values of which unique will be calculated
444442
uniques : {{name}}Vector
445443
Vector into which uniques will be written
446-
count_prior : Py_ssize_t
444+
count_prior : Py_ssize_t, default 0
447445
Number of existing entries in uniques
448-
na_sentinel : Py_ssize_t
446+
na_sentinel : Py_ssize_t, default -1
449447
Sentinel value used for all NA-values in inverse
450-
na_value : object
448+
na_value : object, default None
451449
Value to identify as missing. If na_value is None, then
452450
any value "val" satisfying val != val is considered missing.
453451
If na_value is not None, then _additionally_, any value "val"
@@ -727,15 +725,15 @@ cdef class StringHashTable(HashTable):
727725
self.table.vals[k] = i
728726
free(vecs)
729727

730-
def unique(self, ndarray[object] values, bint return_inverse):
728+
def unique(self, ndarray[object] values, bint return_inverse=False):
731729
"""
732730
Calculate unique values and labels (no sorting!)
733731

734732
Parameters
735733
----------
736734
values : ndarray[object]
737735
Array of values of which unique will be calculated
738-
return_inverse : boolean
736+
return_inverse : boolean, default False
739737
Whether the mapping of the original array values to their location
740738
in the vector of uniques should be returned.
741739

@@ -748,14 +746,12 @@ cdef class StringHashTable(HashTable):
748746
"""
749747
uniques = ObjectVector()
750748
# explicitly compile path without inverse for performance
751-
# the last three arguments are not relevant for this method, but we
752-
# don't use kwargs to avoid cython perf hit (just using default values)
753749
if return_inverse:
754-
return self._unique_with_inverse(values, uniques, 0, -1, None)
755-
return self._unique_no_inverse(values, uniques, 0, -1, None)
750+
return self._unique_with_inverse(values, uniques)
751+
return self._unique_no_inverse(values, uniques)
756752

757-
def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel,
758-
object na_value):
753+
def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel=-1,
754+
object na_value=None):
759755
"""
760756
Calculate unique values and labels (no sorting!)
761757

@@ -799,8 +795,8 @@ unique_funcs = [('_unique_no_inverse', False, False),
799795
@cython.boundscheck(False)
800796
@cython.wraparound(False)
801797
def {{func_name}}(self, ndarray[object] values, ObjectVector uniques,
802-
Py_ssize_t count_prior, Py_ssize_t na_sentinel,
803-
object na_value):
798+
Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
799+
object na_value=None):
804800
"""
805801
Calculate unique values and labels (no sorting!)
806802
{{if func_name == '_factorize' or func_name == 'get_labels'}}
@@ -815,11 +811,11 @@ unique_funcs = [('_unique_no_inverse', False, False),
815811
Array of values of which unique will be calculated
816812
uniques : ObjectVector
817813
Vector into which uniques will be written
818-
count_prior : Py_ssize_t
814+
count_prior : Py_ssize_t, default 0
819815
Number of existing entries in uniques
820-
na_sentinel : Py_ssize_t
816+
na_sentinel : Py_ssize_t, default -1
821817
Sentinel value used for all NA-values in inverse
822-
na_value : object
818+
na_value : object, default None
823819
Value to identify as missing. If na_value is None, then any value
824820
that is not a string is considered missing. If na_value is
825821
not None, then _additionally_ any value "val" satisfying
@@ -1002,15 +998,15 @@ cdef class PyObjectHashTable(HashTable):
1002998

1003999
return np.asarray(locs)
10041000

1005-
def unique(self, ndarray[object] values, bint return_inverse):
1001+
def unique(self, ndarray[object] values, bint return_inverse=False):
10061002
"""
10071003
Calculate unique values and labels (no sorting!)
10081004

10091005
Parameters
10101006
----------
10111007
values : ndarray[object]
10121008
Array of values of which unique will be calculated
1013-
return_inverse : boolean
1009+
return_inverse : boolean, default False
10141010
Whether the mapping of the original array values to their location
10151011
in the vector of uniques should be returned.
10161012

@@ -1023,14 +1019,12 @@ cdef class PyObjectHashTable(HashTable):
10231019
"""
10241020
uniques = ObjectVector()
10251021
# explicitly compile path without inverse for performance
1026-
# the last three arguments are not relevant for this method, but we
1027-
# don't use kwargs to avoid cython perf hit (just using default values)
10281022
if return_inverse:
1029-
return self._unique_with_inverse(values, uniques, 0, -1, None)
1030-
return self._unique_no_inverse(values, uniques, 0, -1, None)
1023+
return self._unique_with_inverse(values, uniques)
1024+
return self._unique_no_inverse(values, uniques)
10311025

1032-
def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel,
1033-
object na_value):
1026+
def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel=-1,
1027+
object na_value=None):
10341028
"""
10351029
Calculate unique values and labels (no sorting!)
10361030

@@ -1074,8 +1068,8 @@ unique_funcs = [('_unique_no_inverse', False, False),
10741068
@cython.boundscheck(False)
10751069
@cython.wraparound(False)
10761070
def {{func_name}}(self, ndarray[object] values, ObjectVector uniques,
1077-
Py_ssize_t count_prior, Py_ssize_t na_sentinel,
1078-
object na_value):
1071+
Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
1072+
object na_value=None):
10791073
"""
10801074
Calculate unique values and labels (no sorting!)
10811075
{{if func_name == '_factorize' or func_name == 'get_labels'}}
@@ -1090,9 +1084,9 @@ unique_funcs = [('_unique_no_inverse', False, False),
10901084
Array of values of which unique will be calculated
10911085
uniques : ObjectVector
10921086
Vector into which uniques will be written
1093-
count_prior : Py_ssize_t
1087+
count_prior : Py_ssize_t, default 0
10941088
Number of existing entries in uniques
1095-
na_sentinel : Py_ssize_t
1089+
na_sentinel : Py_ssize_t, default -1
10961090
Sentinel value used for all NA-values in inverse
10971091
na_value : object
10981092
Value to identify as missing. If na_value is None, then None _plus_

pandas/core/algorithms.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,7 @@ def unique(values):
365365
htable, _, values, dtype, ndtype = _get_hashtable_algo(values)
366366

367367
table = htable(len(values))
368-
uniques = table.unique(values, False)
368+
uniques = table.unique(values)
369369
uniques = _reconstruct_data(uniques, dtype, original)
370370

371371
if isinstance(original, ABCSeries) and is_datetime64tz_dtype(dtype):
@@ -470,7 +470,8 @@ def _factorize_array(values, na_sentinel=-1, size_hint=None,
470470
(hash_klass, _), values = _get_data_algo(values, _hashtables)
471471

472472
table = hash_klass(size_hint or len(values))
473-
labels, uniques = table.factorize(values, na_sentinel, na_value)
473+
labels, uniques = table.factorize(values, na_sentinel=na_sentinel,
474+
na_value=na_value)
474475

475476
labels = ensure_platform_int(labels)
476477
return labels, uniques

pandas/tests/test_algos.py

+6-11
Original file line numberDiff line numberDiff line change
@@ -1318,18 +1318,18 @@ def test_vector_resize(self, writable, htable, uniques, dtype,
13181318
uniques = uniques()
13191319

13201320
# get_labels may append to uniques
1321-
htable.get_labels(vals[:nvals], uniques, 0, -1, None)
1321+
htable.get_labels(vals[:nvals], uniques, 0, -1)
13221322
# to_array() sets an external_view_exists flag on uniques.
13231323
tmp = uniques.to_array()
13241324
oldshape = tmp.shape
13251325

13261326
# subsequent get_labels() calls can no longer append to it
13271327
# (except for StringHashTables + ObjectVector)
13281328
if safely_resizes:
1329-
htable.get_labels(vals, uniques, 0, -1, None)
1329+
htable.get_labels(vals, uniques, 0, -1)
13301330
else:
13311331
with tm.assert_raises_regex(ValueError, 'external reference.*'):
1332-
htable.get_labels(vals, uniques, 0, -1, None)
1332+
htable.get_labels(vals, uniques, 0, -1)
13331333

13341334
uniques.to_array() # should not raise here
13351335
assert tmp.shape == oldshape
@@ -1358,14 +1358,12 @@ def test_hashtable_unique(self, htable, tm_dtype, writable):
13581358
# drop_duplicates has own cython code (hash_table_func_helper.pxi)
13591359
# and is tested separately; keeps first occurrence like ht.unique()
13601360
expected_unique = s_duplicated.drop_duplicates(keep='first').values
1361-
return_inverse = False
1362-
result_unique = htable().unique(s_duplicated.values, return_inverse)
1361+
result_unique = htable().unique(s_duplicated.values)
13631362
tm.assert_numpy_array_equal(result_unique, expected_unique)
13641363

13651364
# test with inverse
1366-
return_inverse = True
13671365
result_unique, result_inverse = htable().unique(s_duplicated.values,
1368-
return_inverse)
1366+
return_inverse=True)
13691367
tm.assert_numpy_array_equal(result_unique, expected_unique)
13701368
reconstr = result_unique[result_inverse]
13711369
tm.assert_numpy_array_equal(reconstr, s_duplicated.values)
@@ -1392,10 +1390,7 @@ def test_hashtable_factorize(self, htable, tm_dtype, writable):
13921390
s_duplicated.values.setflags(write=writable)
13931391
na_mask = s_duplicated.isna().values
13941392

1395-
na_sentinel = -1
1396-
na_value = None
1397-
result = htable().factorize(s_duplicated.values, na_sentinel, na_value)
1398-
result_inverse, result_unique = result
1393+
result_inverse, result_unique = htable().factorize(s_duplicated.values)
13991394

14001395
# drop_duplicates has own cython code (hash_table_func_helper.pxi)
14011396
# and is tested separately; keeps first occurrence like ht.factorize()

0 commit comments

Comments
 (0)