From 0f4175d359afe01a3e40cd8ec1c7b0c7e0618f26 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 13 Jul 2018 22:43:16 +0200 Subject: [PATCH 1/4] TST/CLN: correctly skip in indexes/common; add test for duplicated --- pandas/tests/indexes/common.py | 53 ++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index bb82d5578481b..a126df10c3b3b 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -37,7 +37,7 @@ def verify_pickle(self, indices): def test_pickle_compat_construction(self): # this is testing for pickle compat if self._holder is None: - return + pytest.skip('Skip check for uncertain type') # need an object to create with pytest.raises(TypeError, self._holder) @@ -236,7 +236,7 @@ def test_set_name_methods(self, indices): # don't tests a MultiIndex here (as its tested separated) if isinstance(indices, MultiIndex): - return + pytest.skip('Skip check for MultiIndex') original_name = indices.name new_ind = indices.set_names([new_name]) assert new_ind.name == new_name @@ -333,7 +333,8 @@ def test_copy_and_deepcopy(self, indices): from copy import copy, deepcopy if isinstance(indices, MultiIndex): - return + pytest.skip('Skip check for MultiIndex') + for func in (copy, deepcopy): idx_copy = func(indices) assert idx_copy is not indices @@ -344,18 +345,46 @@ def test_copy_and_deepcopy(self, indices): def test_duplicates(self, indices): if type(indices) is not self._holder: - return + pytest.skip('Can only check if we have the correct type') if not len(indices) or isinstance(indices, MultiIndex): - return + # MultiIndex tested separately in: + # tests/indexes/multi/test_unique_and_duplicates + pytest.skip('Skip check for empty Index and MultiIndex') + idx = self._holder([indices[0]] * 5) assert not idx.is_unique assert idx.has_duplicates + @pytest.mark.parametrize('keep', ['first', 'last', False]) + def test_duplicated(self, indices, keep): + if type(indices) is not self._holder: + pytest.skip('Can only check if we know the index type') + if not len(indices) or isinstance(indices, MultiIndex): + # MultiIndex tested separately in: + # tests/indexes/multi/test_unique_and_duplicates + pytest.skip('Skip check for empty Index and MultiIndex') + + idx = self._holder(indices) + if idx.has_duplicates: + # We need to be able to control creation of duplicates here + # This is slightly circular, as drop_duplicates depends on + # duplicated, but in the end, it all works out because we + # cross-check with Series.duplicated + idx = idx.drop_duplicates() + + n, k = len(idx), 10 + duplicated_selection = np.random.choice(n, k * n) + expected = pd.Series(duplicated_selection).duplicated(keep=keep).values + idx = self._holder(idx.values[duplicated_selection]) + + result = idx.duplicated(keep=keep) + tm.assert_numpy_array_equal(result, expected) + def test_unique(self, indices): # don't test a MultiIndex here (as its tested separated) # don't test a CategoricalIndex because categories change (GH 18291) if isinstance(indices, (MultiIndex, CategoricalIndex)): - return + pytest.skip('Skip check for MultiIndex/CategoricalIndex') # GH 17896 expected = indices.drop_duplicates() @@ -375,7 +404,7 @@ def test_unique_na(self): def test_get_unique_index(self, indices): # MultiIndex tested separately if not len(indices) or isinstance(indices, MultiIndex): - return + pytest.skip('Skip check for empty Index and MultiIndex') idx = indices[[0] * 5] idx_unique = indices[[0]] @@ -394,7 +423,7 @@ def test_get_unique_index(self, indices): # nans: if not indices._can_hold_na: - return + pytest.skip('Skip na-check if index cannot hold na') if needs_i8_conversion(indices): vals = indices.asi8[[0] * 5] @@ -423,7 +452,7 @@ def test_sort(self, indices): def test_mutability(self, indices): if not len(indices): - return + pytest.skip('Skip check for empty Index') pytest.raises(TypeError, indices.__setitem__, 0, indices[0]) def test_view(self, indices): @@ -761,7 +790,7 @@ def test_equals_op(self): # GH9947, GH10637 index_a = self.create_index() if isinstance(index_a, PeriodIndex): - return + pytest.skip('Skip check for PeriodIndex') n = len(index_a) index_b = index_a[0:-1] @@ -989,11 +1018,11 @@ def test_searchsorted_monotonic(self, indices): # not implemented for tuple searches in MultiIndex # or Intervals searches in IntervalIndex if isinstance(indices, (MultiIndex, IntervalIndex)): - return + pytest.skip('Skip check for MultiIndex/IntervalIndex') # nothing to test if the index is empty if indices.empty: - return + pytest.skip('Skip check for empty Index') value = indices[0] # determine the expected results (handle dupes for 'right') From 4ab753a037ea6b1e74f33a96ff7365fec2ac45a1 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sat, 14 Jul 2018 17:07:37 +0200 Subject: [PATCH 2/4] Rename ambiguous test names --- pandas/tests/indexes/common.py | 2 +- pandas/tests/indexes/test_category.py | 5 ++++- pandas/tests/indexes/test_range.py | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index a126df10c3b3b..3da30230ec375 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -343,7 +343,7 @@ def test_copy_and_deepcopy(self, indices): new_copy = indices.copy(deep=True, name="banana") assert new_copy.name == "banana" - def test_duplicates(self, indices): + def test_has_duplicates(self, indices): if type(indices) is not self._holder: pytest.skip('Can only check if we have the correct type') if not len(indices) or isinstance(indices, MultiIndex): diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index a2a4170256088..2221fd023b561 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -590,12 +590,15 @@ def test_is_unique(self, values, expected): ci = CategoricalIndex(values) assert ci.is_unique is expected - def test_duplicates(self): + def test_has_duplicates(self): idx = CategoricalIndex([0, 0, 0], name='foo') assert not idx.is_unique assert idx.has_duplicates + def test_drop_duplicates(self): + + idx = CategoricalIndex([0, 0, 0], name='foo') expected = CategoricalIndex([0], name='foo') tm.assert_index_equal(idx.drop_duplicates(), expected) tm.assert_index_equal(idx.unique(), expected) diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 38f4b341116b8..2a9efd92df8a3 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -806,7 +806,7 @@ def test_explicit_conversions(self): result = a - fidx tm.assert_index_equal(result, expected) - def test_duplicates(self): + def test_has_duplicates(self): for ind in self.indices: if not len(ind): continue From b6e2858b0942aafa6f4b80b15255fe5314d66ea8 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Mon, 16 Jul 2018 18:11:54 +0200 Subject: [PATCH 3/4] Incorporate review (jreback) --- pandas/tests/indexes/common.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 3da30230ec375..a2785d53cc4fc 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -366,10 +366,12 @@ def test_duplicated(self, indices, keep): idx = self._holder(indices) if idx.has_duplicates: - # We need to be able to control creation of duplicates here - # This is slightly circular, as drop_duplicates depends on - # duplicated, but in the end, it all works out because we - # cross-check with Series.duplicated + # We are testing the duplicated-method here, so we need to know + # exactly which indices are duplicate and how (for the result). + # This is not possible if "idx" has duplicates already, which we + # therefore remove. This is seemingly circular, as drop_duplicates + # invokes duplicated, but in the end, it all works out because we + # cross-check with Series.duplicated, which is tested separately. idx = idx.drop_duplicates() n, k = len(idx), 10 From f9c9aab75b63bf3367f5c9818c3f4a833efbe393 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 5 Aug 2018 19:12:19 +0200 Subject: [PATCH 4/4] Review; remove unhit instance-check --- pandas/tests/indexes/common.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index a2785d53cc4fc..56f59851d6d04 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -35,10 +35,6 @@ def verify_pickle(self, indices): assert indices.equals(unpickled) def test_pickle_compat_construction(self): - # this is testing for pickle compat - if self._holder is None: - pytest.skip('Skip check for uncertain type') - # need an object to create with pytest.raises(TypeError, self._holder)