Re-add kwargs to method signature

h-vetinari · h-vetinari · commit 906cd50e8391 · 2018-10-26T19:18:55.000+02:00
diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx
@@ -124,7 +124,8 @@ cdef class Int64Factorizer:
             uniques.extend(self.uniques.to_array())
             self.uniques = uniques
         labels = self.table.get_labels(values, self.uniques,
-                                       self.count, na_sentinel, na_value)
+                                       self.count, na_sentinel,
+                                       na_value=na_value)
 
         # sort on
         if sort:
diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -355,15 +355,15 @@ cdef class {{name}}HashTable(HashTable):
 
         return np.asarray(locs)
 
-    def unique(self, const {{dtype}}_t[:] values, bint return_inverse):
+    def unique(self, const {{dtype}}_t[:] values, bint return_inverse=False):
         """
         Calculate unique values and labels (no sorting!)
 
         Parameters
         ----------
         values : ndarray[{{dtype}}]
             Array of values of which unique will be calculated
-        return_inverse : boolean
+        return_inverse : boolean, default False
             Whether the mapping of the original array values to their location
             in the vector of uniques should be returned.
 
@@ -376,14 +376,12 @@ cdef class {{name}}HashTable(HashTable):
         """
         uniques = {{name}}Vector()
         # explicitly compile path without inverse for performance
-        # the last three arguments are not relevant for this method, but we
-        # don't use kwargs to avoid cython perf hit (just using default values)
         if return_inverse:
-            return self._unique_with_inverse(values, uniques, 0, -1, None)
-        return self._unique_no_inverse(values, uniques, 0, -1, None)
+            return self._unique_with_inverse(values, uniques)
+        return self._unique_no_inverse(values, uniques)
 
-    def factorize(self, const {{dtype}}_t[:] values, Py_ssize_t na_sentinel,
-                  object na_value):
+    def factorize(self, const {{dtype}}_t[:] values, Py_ssize_t na_sentinel=-1,
+                  object na_value=None):
         """
         Calculate unique values and labels (no sorting!)
 
@@ -427,8 +425,8 @@ unique_funcs = [('_unique_no_inverse', False, False),
     @cython.boundscheck(False)
     @cython.wraparound(False)
     def {{func_name}}(self, const {{dtype}}_t[:] values,
-                      {{name}}Vector uniques, Py_ssize_t count_prior,
-                      Py_ssize_t na_sentinel, object na_value):
+                      {{name}}Vector uniques, Py_ssize_t count_prior=0,
+                      Py_ssize_t na_sentinel=-1, object na_value=None):
         """
         Calculate unique values and labels (no sorting!)
 {{if func_name == '_factorize' or func_name == 'get_labels'}}
@@ -443,11 +441,11 @@ unique_funcs = [('_unique_no_inverse', False, False),
             Array of values of which unique will be calculated
         uniques : {{name}}Vector
             Vector into which uniques will be written
-        count_prior : Py_ssize_t
+        count_prior : Py_ssize_t, default 0
             Number of existing entries in uniques
-        na_sentinel : Py_ssize_t
+        na_sentinel : Py_ssize_t, default -1
             Sentinel value used for all NA-values in inverse
-        na_value : object
+        na_value : object, default None
             Value to identify as missing. If na_value is None, then
             any value "val" satisfying val != val is considered missing.
             If na_value is not None, then _additionally_, any value "val"
@@ -727,15 +725,15 @@ cdef class StringHashTable(HashTable):
                 self.table.vals[k] = i
         free(vecs)
 
-    def unique(self, ndarray[object] values, bint return_inverse):
+    def unique(self, ndarray[object] values, bint return_inverse=False):
         """
         Calculate unique values and labels (no sorting!)
 
         Parameters
         ----------
         values : ndarray[object]
             Array of values of which unique will be calculated
-        return_inverse : boolean
+        return_inverse : boolean, default False
             Whether the mapping of the original array values to their location
             in the vector of uniques should be returned.
 
@@ -748,14 +746,12 @@ cdef class StringHashTable(HashTable):
         """
         uniques = ObjectVector()
         # explicitly compile path without inverse for performance
-        # the last three arguments are not relevant for this method, but we
-        # don't use kwargs to avoid cython perf hit (just using default values)
         if return_inverse:
-            return self._unique_with_inverse(values, uniques, 0, -1, None)
-        return self._unique_no_inverse(values, uniques, 0, -1, None)
+            return self._unique_with_inverse(values, uniques)
+        return self._unique_no_inverse(values, uniques)
 
-    def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel,
-                  object na_value):
+    def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel=-1,
+                  object na_value=None):
         """
         Calculate unique values and labels (no sorting!)
 
@@ -799,8 +795,8 @@ unique_funcs = [('_unique_no_inverse', False, False),
     @cython.boundscheck(False)
     @cython.wraparound(False)
     def {{func_name}}(self, ndarray[object] values, ObjectVector uniques,
-                      Py_ssize_t count_prior, Py_ssize_t na_sentinel,
-                      object na_value):
+                      Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
+                      object na_value=None):
         """
         Calculate unique values and labels (no sorting!)
 {{if func_name == '_factorize' or func_name == 'get_labels'}}
@@ -815,11 +811,11 @@ unique_funcs = [('_unique_no_inverse', False, False),
             Array of values of which unique will be calculated
         uniques : ObjectVector
             Vector into which uniques will be written
-        count_prior : Py_ssize_t
+        count_prior : Py_ssize_t, default 0
             Number of existing entries in uniques
-        na_sentinel : Py_ssize_t
+        na_sentinel : Py_ssize_t, default -1
             Sentinel value used for all NA-values in inverse
-        na_value : object
+        na_value : object, default None
             Value to identify as missing. If na_value is None, then any value
             that is not a string is considered missing. If na_value is
             not None, then _additionally_ any value "val" satisfying
@@ -1002,15 +998,15 @@ cdef class PyObjectHashTable(HashTable):
 
         return np.asarray(locs)
 
-    def unique(self, ndarray[object] values, bint return_inverse):
+    def unique(self, ndarray[object] values, bint return_inverse=False):
         """
         Calculate unique values and labels (no sorting!)
 
         Parameters
         ----------
         values : ndarray[object]
             Array of values of which unique will be calculated
-        return_inverse : boolean
+        return_inverse : boolean, default False
             Whether the mapping of the original array values to their location
             in the vector of uniques should be returned.
 
@@ -1023,14 +1019,12 @@ cdef class PyObjectHashTable(HashTable):
         """
         uniques = ObjectVector()
         # explicitly compile path without inverse for performance
-        # the last three arguments are not relevant for this method, but we
-        # don't use kwargs to avoid cython perf hit (just using default values)
         if return_inverse:
-            return self._unique_with_inverse(values, uniques, 0, -1, None)
-        return self._unique_no_inverse(values, uniques, 0, -1, None)
+            return self._unique_with_inverse(values, uniques)
+        return self._unique_no_inverse(values, uniques)
 
-    def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel,
-                  object na_value):
+    def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel=-1,
+                  object na_value=None):
         """
         Calculate unique values and labels (no sorting!)
 
@@ -1074,8 +1068,8 @@ unique_funcs = [('_unique_no_inverse', False, False),
     @cython.boundscheck(False)
     @cython.wraparound(False)
     def {{func_name}}(self, ndarray[object] values, ObjectVector uniques,
-                      Py_ssize_t count_prior, Py_ssize_t na_sentinel,
-                      object na_value):
+                      Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
+                      object na_value=None):
         """
         Calculate unique values and labels (no sorting!)
 {{if func_name == '_factorize' or func_name == 'get_labels'}}
@@ -1090,9 +1084,9 @@ unique_funcs = [('_unique_no_inverse', False, False),
             Array of values of which unique will be calculated
         uniques : ObjectVector
             Vector into which uniques will be written
-        count_prior : Py_ssize_t
+        count_prior : Py_ssize_t, default 0
             Number of existing entries in uniques
-        na_sentinel : Py_ssize_t
+        na_sentinel : Py_ssize_t, default -1
             Sentinel value used for all NA-values in inverse
         na_value : object
             Value to identify as missing. If na_value is None, then None _plus_
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -365,7 +365,7 @@ def unique(values):
     htable, _, values, dtype, ndtype = _get_hashtable_algo(values)
 
     table = htable(len(values))
-    uniques = table.unique(values, False)
+    uniques = table.unique(values)
     uniques = _reconstruct_data(uniques, dtype, original)
 
     if isinstance(original, ABCSeries) and is_datetime64tz_dtype(dtype):
@@ -470,7 +470,8 @@ def _factorize_array(values, na_sentinel=-1, size_hint=None,
     (hash_klass, _), values = _get_data_algo(values, _hashtables)
 
     table = hash_klass(size_hint or len(values))
-    labels, uniques = table.factorize(values, na_sentinel, na_value)
+    labels, uniques = table.factorize(values, na_sentinel=na_sentinel,
+                                      na_value=na_value)
 
     labels = ensure_platform_int(labels)
     return labels, uniques
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
@@ -1318,18 +1318,18 @@ def test_vector_resize(self, writable, htable, uniques, dtype,
         uniques = uniques()
 
         # get_labels may append to uniques
-        htable.get_labels(vals[:nvals], uniques, 0, -1, None)
+        htable.get_labels(vals[:nvals], uniques, 0, -1)
         # to_array() sets an external_view_exists flag on uniques.
         tmp = uniques.to_array()
         oldshape = tmp.shape
 
         # subsequent get_labels() calls can no longer append to it
         # (except for StringHashTables + ObjectVector)
         if safely_resizes:
-            htable.get_labels(vals, uniques, 0, -1, None)
+            htable.get_labels(vals, uniques, 0, -1)
         else:
             with tm.assert_raises_regex(ValueError, 'external reference.*'):
-                htable.get_labels(vals, uniques, 0, -1, None)
+                htable.get_labels(vals, uniques, 0, -1)
 
         uniques.to_array()   # should not raise here
         assert tmp.shape == oldshape
@@ -1358,14 +1358,12 @@ def test_hashtable_unique(self, htable, tm_dtype, writable):
         # drop_duplicates has own cython code (hash_table_func_helper.pxi)
         # and is tested separately; keeps first occurrence like ht.unique()
         expected_unique = s_duplicated.drop_duplicates(keep='first').values
-        return_inverse = False
-        result_unique = htable().unique(s_duplicated.values, return_inverse)
+        result_unique = htable().unique(s_duplicated.values)
         tm.assert_numpy_array_equal(result_unique, expected_unique)
 
         # test with inverse
-        return_inverse = True
         result_unique, result_inverse = htable().unique(s_duplicated.values,
-                                                        return_inverse)
+                                                        return_inverse=True)
         tm.assert_numpy_array_equal(result_unique, expected_unique)
         reconstr = result_unique[result_inverse]
         tm.assert_numpy_array_equal(reconstr, s_duplicated.values)
@@ -1392,10 +1390,7 @@ def test_hashtable_factorize(self, htable, tm_dtype, writable):
         s_duplicated.values.setflags(write=writable)
         na_mask = s_duplicated.isna().values
 
-        na_sentinel = -1
-        na_value = None
-        result = htable().factorize(s_duplicated.values, na_sentinel, na_value)
-        result_inverse, result_unique = result
+        result_inverse, result_unique = htable().factorize(s_duplicated.values)
 
         # drop_duplicates has own cython code (hash_table_func_helper.pxi)
         # and is tested separately; keeps first occurrence like ht.factorize()