diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index bb82d5578481b..56f59851d6d04 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -35,10 +35,6 @@ def verify_pickle(self, indices): assert indices.equals(unpickled) def test_pickle_compat_construction(self): - # this is testing for pickle compat - if self._holder is None: - return - # need an object to create with pytest.raises(TypeError, self._holder) @@ -236,7 +232,7 @@ def test_set_name_methods(self, indices): # don't tests a MultiIndex here (as its tested separated) if isinstance(indices, MultiIndex): - return + pytest.skip('Skip check for MultiIndex') original_name = indices.name new_ind = indices.set_names([new_name]) assert new_ind.name == new_name @@ -333,7 +329,8 @@ def test_copy_and_deepcopy(self, indices): from copy import copy, deepcopy if isinstance(indices, MultiIndex): - return + pytest.skip('Skip check for MultiIndex') + for func in (copy, deepcopy): idx_copy = func(indices) assert idx_copy is not indices @@ -342,20 +339,50 @@ def test_copy_and_deepcopy(self, indices): new_copy = indices.copy(deep=True, name="banana") assert new_copy.name == "banana" - def test_duplicates(self, indices): + def test_has_duplicates(self, indices): if type(indices) is not self._holder: - return + pytest.skip('Can only check if we have the correct type') if not len(indices) or isinstance(indices, MultiIndex): - return + # MultiIndex tested separately in: + # tests/indexes/multi/test_unique_and_duplicates + pytest.skip('Skip check for empty Index and MultiIndex') + idx = self._holder([indices[0]] * 5) assert not idx.is_unique assert idx.has_duplicates + @pytest.mark.parametrize('keep', ['first', 'last', False]) + def test_duplicated(self, indices, keep): + if type(indices) is not self._holder: + pytest.skip('Can only check if we know the index type') + if not len(indices) or isinstance(indices, MultiIndex): + # MultiIndex tested separately in: + # tests/indexes/multi/test_unique_and_duplicates + pytest.skip('Skip check for empty Index and MultiIndex') + + idx = self._holder(indices) + if idx.has_duplicates: + # We are testing the duplicated-method here, so we need to know + # exactly which indices are duplicate and how (for the result). + # This is not possible if "idx" has duplicates already, which we + # therefore remove. This is seemingly circular, as drop_duplicates + # invokes duplicated, but in the end, it all works out because we + # cross-check with Series.duplicated, which is tested separately. + idx = idx.drop_duplicates() + + n, k = len(idx), 10 + duplicated_selection = np.random.choice(n, k * n) + expected = pd.Series(duplicated_selection).duplicated(keep=keep).values + idx = self._holder(idx.values[duplicated_selection]) + + result = idx.duplicated(keep=keep) + tm.assert_numpy_array_equal(result, expected) + def test_unique(self, indices): # don't test a MultiIndex here (as its tested separated) # don't test a CategoricalIndex because categories change (GH 18291) if isinstance(indices, (MultiIndex, CategoricalIndex)): - return + pytest.skip('Skip check for MultiIndex/CategoricalIndex') # GH 17896 expected = indices.drop_duplicates() @@ -375,7 +402,7 @@ def test_unique_na(self): def test_get_unique_index(self, indices): # MultiIndex tested separately if not len(indices) or isinstance(indices, MultiIndex): - return + pytest.skip('Skip check for empty Index and MultiIndex') idx = indices[[0] * 5] idx_unique = indices[[0]] @@ -394,7 +421,7 @@ def test_get_unique_index(self, indices): # nans: if not indices._can_hold_na: - return + pytest.skip('Skip na-check if index cannot hold na') if needs_i8_conversion(indices): vals = indices.asi8[[0] * 5] @@ -423,7 +450,7 @@ def test_sort(self, indices): def test_mutability(self, indices): if not len(indices): - return + pytest.skip('Skip check for empty Index') pytest.raises(TypeError, indices.__setitem__, 0, indices[0]) def test_view(self, indices): @@ -761,7 +788,7 @@ def test_equals_op(self): # GH9947, GH10637 index_a = self.create_index() if isinstance(index_a, PeriodIndex): - return + pytest.skip('Skip check for PeriodIndex') n = len(index_a) index_b = index_a[0:-1] @@ -989,11 +1016,11 @@ def test_searchsorted_monotonic(self, indices): # not implemented for tuple searches in MultiIndex # or Intervals searches in IntervalIndex if isinstance(indices, (MultiIndex, IntervalIndex)): - return + pytest.skip('Skip check for MultiIndex/IntervalIndex') # nothing to test if the index is empty if indices.empty: - return + pytest.skip('Skip check for empty Index') value = indices[0] # determine the expected results (handle dupes for 'right') diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index a2a4170256088..2221fd023b561 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -590,12 +590,15 @@ def test_is_unique(self, values, expected): ci = CategoricalIndex(values) assert ci.is_unique is expected - def test_duplicates(self): + def test_has_duplicates(self): idx = CategoricalIndex([0, 0, 0], name='foo') assert not idx.is_unique assert idx.has_duplicates + def test_drop_duplicates(self): + + idx = CategoricalIndex([0, 0, 0], name='foo') expected = CategoricalIndex([0], name='foo') tm.assert_index_equal(idx.drop_duplicates(), expected) tm.assert_index_equal(idx.unique(), expected) diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 38f4b341116b8..2a9efd92df8a3 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -806,7 +806,7 @@ def test_explicit_conversions(self): result = a - fidx tm.assert_index_equal(result, expected) - def test_duplicates(self): + def test_has_duplicates(self): for ind in self.indices: if not len(ind): continue