@@ -355,15 +355,15 @@ cdef class {{name}}HashTable(HashTable):
355
355
356
356
return np.asarray(locs)
357
357
358
- def unique(self, const {{dtype}}_t[:] values, bint return_inverse):
358
+ def unique(self, const {{dtype}}_t[:] values, bint return_inverse=False ):
359
359
"""
360
360
Calculate unique values and labels (no sorting!)
361
361
362
362
Parameters
363
363
----------
364
364
values : ndarray[{{dtype}}]
365
365
Array of values of which unique will be calculated
366
- return_inverse : boolean
366
+ return_inverse : boolean, default False
367
367
Whether the mapping of the original array values to their location
368
368
in the vector of uniques should be returned.
369
369
@@ -376,14 +376,12 @@ cdef class {{name}}HashTable(HashTable):
376
376
"""
377
377
uniques = {{name}}Vector()
378
378
# explicitly compile path without inverse for performance
379
- # the last three arguments are not relevant for this method, but we
380
- # don't use kwargs to avoid cython perf hit (just using default values)
381
379
if return_inverse:
382
- return self._unique_with_inverse(values, uniques, 0, -1, None )
383
- return self._unique_no_inverse(values, uniques, 0, -1, None )
380
+ return self._unique_with_inverse(values, uniques)
381
+ return self._unique_no_inverse(values, uniques)
384
382
385
- def factorize(self, const {{dtype}}_t[:] values, Py_ssize_t na_sentinel,
386
- object na_value):
383
+ def factorize(self, const {{dtype}}_t[:] values, Py_ssize_t na_sentinel=-1 ,
384
+ object na_value=None ):
387
385
"""
388
386
Calculate unique values and labels (no sorting!)
389
387
@@ -427,8 +425,8 @@ unique_funcs = [('_unique_no_inverse', False, False),
427
425
@cython.boundscheck(False)
428
426
@cython.wraparound(False)
429
427
def {{func_name}}(self, const {{dtype}}_t[:] values,
430
- {{name}}Vector uniques, Py_ssize_t count_prior,
431
- Py_ssize_t na_sentinel, object na_value):
428
+ {{name}}Vector uniques, Py_ssize_t count_prior=0 ,
429
+ Py_ssize_t na_sentinel=-1 , object na_value=None ):
432
430
"""
433
431
Calculate unique values and labels (no sorting!)
434
432
{{if func_name == '_factorize' or func_name == 'get_labels'}}
@@ -443,11 +441,11 @@ unique_funcs = [('_unique_no_inverse', False, False),
443
441
Array of values of which unique will be calculated
444
442
uniques : {{name}}Vector
445
443
Vector into which uniques will be written
446
- count_prior : Py_ssize_t
444
+ count_prior : Py_ssize_t, default 0
447
445
Number of existing entries in uniques
448
- na_sentinel : Py_ssize_t
446
+ na_sentinel : Py_ssize_t, default -1
449
447
Sentinel value used for all NA-values in inverse
450
- na_value : object
448
+ na_value : object, default None
451
449
Value to identify as missing. If na_value is None, then
452
450
any value "val" satisfying val != val is considered missing.
453
451
If na_value is not None, then _additionally_, any value "val"
@@ -727,15 +725,15 @@ cdef class StringHashTable(HashTable):
727
725
self.table.vals[k] = i
728
726
free(vecs)
729
727
730
- def unique(self, ndarray[object] values, bint return_inverse):
728
+ def unique(self, ndarray[object] values, bint return_inverse=False ):
731
729
"""
732
730
Calculate unique values and labels (no sorting!)
733
731
734
732
Parameters
735
733
----------
736
734
values : ndarray[object]
737
735
Array of values of which unique will be calculated
738
- return_inverse : boolean
736
+ return_inverse : boolean, default False
739
737
Whether the mapping of the original array values to their location
740
738
in the vector of uniques should be returned.
741
739
@@ -748,14 +746,12 @@ cdef class StringHashTable(HashTable):
748
746
"""
749
747
uniques = ObjectVector()
750
748
# explicitly compile path without inverse for performance
751
- # the last three arguments are not relevant for this method, but we
752
- # don't use kwargs to avoid cython perf hit (just using default values)
753
749
if return_inverse:
754
- return self._unique_with_inverse(values, uniques, 0, -1, None )
755
- return self._unique_no_inverse(values, uniques, 0, -1, None )
750
+ return self._unique_with_inverse(values, uniques)
751
+ return self._unique_no_inverse(values, uniques)
756
752
757
- def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel,
758
- object na_value):
753
+ def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel=-1 ,
754
+ object na_value=None ):
759
755
"""
760
756
Calculate unique values and labels (no sorting!)
761
757
@@ -799,8 +795,8 @@ unique_funcs = [('_unique_no_inverse', False, False),
799
795
@cython.boundscheck(False)
800
796
@cython.wraparound(False)
801
797
def {{func_name}}(self, ndarray[object] values, ObjectVector uniques,
802
- Py_ssize_t count_prior, Py_ssize_t na_sentinel,
803
- object na_value):
798
+ Py_ssize_t count_prior=0 , Py_ssize_t na_sentinel=-1 ,
799
+ object na_value=None ):
804
800
"""
805
801
Calculate unique values and labels (no sorting!)
806
802
{{if func_name == '_factorize' or func_name == 'get_labels'}}
@@ -815,11 +811,11 @@ unique_funcs = [('_unique_no_inverse', False, False),
815
811
Array of values of which unique will be calculated
816
812
uniques : ObjectVector
817
813
Vector into which uniques will be written
818
- count_prior : Py_ssize_t
814
+ count_prior : Py_ssize_t, default 0
819
815
Number of existing entries in uniques
820
- na_sentinel : Py_ssize_t
816
+ na_sentinel : Py_ssize_t, default -1
821
817
Sentinel value used for all NA-values in inverse
822
- na_value : object
818
+ na_value : object, default None
823
819
Value to identify as missing. If na_value is None, then any value
824
820
that is not a string is considered missing. If na_value is
825
821
not None, then _additionally_ any value "val" satisfying
@@ -1002,15 +998,15 @@ cdef class PyObjectHashTable(HashTable):
1002
998
1003
999
return np.asarray(locs)
1004
1000
1005
- def unique(self, ndarray[object] values, bint return_inverse):
1001
+ def unique(self, ndarray[object] values, bint return_inverse=False ):
1006
1002
"""
1007
1003
Calculate unique values and labels (no sorting!)
1008
1004
1009
1005
Parameters
1010
1006
----------
1011
1007
values : ndarray[object]
1012
1008
Array of values of which unique will be calculated
1013
- return_inverse : boolean
1009
+ return_inverse : boolean, default False
1014
1010
Whether the mapping of the original array values to their location
1015
1011
in the vector of uniques should be returned.
1016
1012
@@ -1023,14 +1019,12 @@ cdef class PyObjectHashTable(HashTable):
1023
1019
"""
1024
1020
uniques = ObjectVector()
1025
1021
# explicitly compile path without inverse for performance
1026
- # the last three arguments are not relevant for this method, but we
1027
- # don't use kwargs to avoid cython perf hit (just using default values)
1028
1022
if return_inverse:
1029
- return self._unique_with_inverse(values, uniques, 0, -1, None )
1030
- return self._unique_no_inverse(values, uniques, 0, -1, None )
1023
+ return self._unique_with_inverse(values, uniques)
1024
+ return self._unique_no_inverse(values, uniques)
1031
1025
1032
- def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel,
1033
- object na_value):
1026
+ def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel=-1 ,
1027
+ object na_value=None ):
1034
1028
"""
1035
1029
Calculate unique values and labels (no sorting!)
1036
1030
@@ -1074,8 +1068,8 @@ unique_funcs = [('_unique_no_inverse', False, False),
1074
1068
@cython.boundscheck(False)
1075
1069
@cython.wraparound(False)
1076
1070
def {{func_name}}(self, ndarray[object] values, ObjectVector uniques,
1077
- Py_ssize_t count_prior, Py_ssize_t na_sentinel,
1078
- object na_value):
1071
+ Py_ssize_t count_prior=0 , Py_ssize_t na_sentinel=-1 ,
1072
+ object na_value=None ):
1079
1073
"""
1080
1074
Calculate unique values and labels (no sorting!)
1081
1075
{{if func_name == '_factorize' or func_name == 'get_labels'}}
@@ -1090,9 +1084,9 @@ unique_funcs = [('_unique_no_inverse', False, False),
1090
1084
Array of values of which unique will be calculated
1091
1085
uniques : ObjectVector
1092
1086
Vector into which uniques will be written
1093
- count_prior : Py_ssize_t
1087
+ count_prior : Py_ssize_t, default 0
1094
1088
Number of existing entries in uniques
1095
- na_sentinel : Py_ssize_t
1089
+ na_sentinel : Py_ssize_t, default -1
1096
1090
Sentinel value used for all NA-values in inverse
1097
1091
na_value : object
1098
1092
Value to identify as missing. If na_value is None, then None _plus_
0 commit comments