@@ -37,39 +37,6 @@ def index(request):
3737 return request .param
3838
3939
40- def _check_equal (obj , ** kwargs ):
41- """
42- Check that hashing an objects produces the same value each time.
43-
44- Parameters
45- ----------
46- obj : object
47- The object to hash.
48- kwargs : kwargs
49- Keyword arguments to pass to the hashing function.
50- """
51- a = hash_pandas_object (obj , ** kwargs )
52- b = hash_pandas_object (obj , ** kwargs )
53- tm .assert_series_equal (a , b )
54-
55-
56- def _check_not_equal_with_index (obj ):
57- """
58- Check the hash of an object with and without its index is not the same.
59-
60- Parameters
61- ----------
62- obj : object
63- The object to hash.
64- """
65- if not isinstance (obj , Index ):
66- a = hash_pandas_object (obj , index = True )
67- b = hash_pandas_object (obj , index = False )
68-
69- if len (obj ):
70- assert not (a == b ).all ()
71-
72-
7340def test_consistency ():
7441 # Check that our hash doesn't change because of a mistake
7542 # in the actual code; this is the ground truth.
@@ -89,12 +56,10 @@ def test_hash_array(series):
8956 tm .assert_numpy_array_equal (hash_array (arr ), hash_array (arr ))
9057
9158
92- @pytest .mark .parametrize (
93- "arr2" , [np .array ([3 , 4 , "All" ], dtype = "U" ), np .array ([3 , 4 , "All" ], dtype = object )]
94- )
95- def test_hash_array_mixed (arr2 ):
59+ @pytest .mark .parametrize ("dtype" , ["U" , object ])
60+ def test_hash_array_mixed (dtype ):
9661 result1 = hash_array (np .array (["3" , "4" , "All" ]))
97- result2 = hash_array (arr2 )
62+ result2 = hash_array (np . array ([ 3 , 4 , "All" ], dtype = dtype ) )
9863
9964 tm .assert_numpy_array_equal (result1 , result2 )
10065
@@ -159,32 +124,77 @@ def test_multiindex_objects():
159124 Series (["a" , None , "c" ]),
160125 Series ([True , False , True ]),
161126 Series (dtype = object ),
162- Index ([1 , 2 , 3 ]),
163- Index ([True , False , True ]),
164127 DataFrame ({"x" : ["a" , "b" , "c" ], "y" : [1 , 2 , 3 ]}),
165128 DataFrame (),
166129 tm .makeMissingDataframe (),
167130 tm .makeMixedDataFrame (),
168131 tm .makeTimeDataFrame (),
169132 tm .makeTimeSeries (),
170- tm .makeTimedeltaIndex (),
171- tm .makePeriodIndex (),
172133 Series (tm .makePeriodIndex ()),
173134 Series (pd .date_range ("20130101" , periods = 3 , tz = "US/Eastern" )),
135+ ],
136+ )
137+ def test_hash_pandas_object (obj , index ):
138+ a = hash_pandas_object (obj , index = index )
139+ b = hash_pandas_object (obj , index = index )
140+ tm .assert_series_equal (a , b )
141+
142+
143+ @pytest .mark .parametrize (
144+ "obj" ,
145+ [
146+ Series ([1 , 2 , 3 ]),
147+ Series ([1.0 , 1.5 , 3.2 ]),
148+ Series ([1.0 , 1.5 , np .nan ]),
149+ Series ([1.0 , 1.5 , 3.2 ], index = [1.5 , 1.1 , 3.3 ]),
150+ Series (["a" , "b" , "c" ]),
151+ Series (["a" , np .nan , "c" ]),
152+ Series (["a" , None , "c" ]),
153+ Series ([True , False , True ]),
154+ DataFrame ({"x" : ["a" , "b" , "c" ], "y" : [1 , 2 , 3 ]}),
155+ tm .makeMissingDataframe (),
156+ tm .makeMixedDataFrame (),
157+ tm .makeTimeDataFrame (),
158+ tm .makeTimeSeries (),
159+ Series (tm .makePeriodIndex ()),
160+ Series (pd .date_range ("20130101" , periods = 3 , tz = "US/Eastern" )),
161+ ],
162+ )
163+ def test_hash_pandas_object_diff_index_non_empty (obj ):
164+ a = hash_pandas_object (obj , index = True )
165+ b = hash_pandas_object (obj , index = False )
166+ assert not (a == b ).all ()
167+
168+
169+ @pytest .mark .parametrize (
170+ "obj" ,
171+ [
172+ Index ([1 , 2 , 3 ]),
173+ Index ([True , False , True ]),
174+ tm .makeTimedeltaIndex (),
175+ tm .makePeriodIndex (),
174176 MultiIndex .from_product (
175177 [range (5 ), ["foo" , "bar" , "baz" ], pd .date_range ("20130101" , periods = 2 )]
176178 ),
177179 MultiIndex .from_product ([pd .CategoricalIndex (list ("aabc" )), range (3 )]),
178180 ],
179181)
180- def test_hash_pandas_object (obj , index ):
181- _check_equal (obj , index = index )
182- _check_not_equal_with_index (obj )
182+ def test_hash_pandas_index (obj , index ):
183+ a = hash_pandas_object (obj , index = index )
184+ b = hash_pandas_object (obj , index = index )
185+ tm .assert_series_equal (a , b )
183186
184187
185- def test_hash_pandas_object2 (series , index ):
186- _check_equal (series , index = index )
187- _check_not_equal_with_index (series )
188+ def test_hash_pandas_series (series , index ):
189+ a = hash_pandas_object (series , index = index )
190+ b = hash_pandas_object (series , index = index )
191+ tm .assert_series_equal (a , b )
192+
193+
194+ def test_hash_pandas_series_diff_index (series ):
195+ a = hash_pandas_object (series , index = True )
196+ b = hash_pandas_object (series , index = False )
197+ assert not (a == b ).all ()
188198
189199
190200@pytest .mark .parametrize (
@@ -193,7 +203,9 @@ def test_hash_pandas_object2(series, index):
193203def test_hash_pandas_empty_object (obj , index ):
194204 # These are by-definition the same with
195205 # or without the index as the data is empty.
196- _check_equal (obj , index = index )
206+ a = hash_pandas_object (obj , index = index )
207+ b = hash_pandas_object (obj , index = index )
208+ tm .assert_series_equal (a , b )
197209
198210
199211@pytest .mark .parametrize (
@@ -235,11 +247,10 @@ def test_categorical_with_nan_consistency():
235247 assert result [1 ] in expected
236248
237249
238- @pytest .mark .parametrize ("obj" , [pd .Timestamp ("20130101" )])
239- def test_pandas_errors (obj ):
250+ def test_pandas_errors ():
240251 msg = "Unexpected type for hashing"
241252 with pytest .raises (TypeError , match = msg ):
242- hash_pandas_object (obj )
253+ hash_pandas_object (pd . Timestamp ( "20130101" ) )
243254
244255
245256def test_hash_keys ():
@@ -292,12 +303,16 @@ def test_invalid_key():
292303def test_already_encoded (index ):
293304 # If already encoded, then ok.
294305 obj = Series (list ("abc" )).str .encode ("utf8" )
295- _check_equal (obj , index = index )
306+ a = hash_pandas_object (obj , index = index )
307+ b = hash_pandas_object (obj , index = index )
308+ tm .assert_series_equal (a , b )
296309
297310
298311def test_alternate_encoding (index ):
299312 obj = Series (list ("abc" ))
300- _check_equal (obj , index = index , encoding = "ascii" )
313+ a = hash_pandas_object (obj , index = index )
314+ b = hash_pandas_object (obj , index = index )
315+ tm .assert_series_equal (a , b )
301316
302317
303318@pytest .mark .parametrize ("l_exp" , range (8 ))
@@ -332,20 +347,24 @@ def test_hash_collisions():
332347 tm .assert_numpy_array_equal (result , np .concatenate ([expected1 , expected2 ], axis = 0 ))
333348
334349
335- def test_hash_with_tuple ():
350+ @pytest .mark .parametrize (
351+ "data, result_data" ,
352+ [
353+ [[tuple ("1" ), tuple ("2" )], [10345501319357378243 , 8331063931016360761 ]],
354+ [[(1 ,), (2 ,)], [9408946347443669104 , 3278256261030523334 ]],
355+ ],
356+ )
357+ def test_hash_with_tuple (data , result_data ):
336358 # GH#28969 array containing a tuple raises on call to arr.astype(str)
337359 # apparently a numpy bug github.com/numpy/numpy/issues/9441
338360
339- df = DataFrame ({"data" : [ tuple ( "1" ), tuple ( "2" )] })
361+ df = DataFrame ({"data" : data })
340362 result = hash_pandas_object (df )
341- expected = Series ([ 10345501319357378243 , 8331063931016360761 ] , dtype = np .uint64 )
363+ expected = Series (result_data , dtype = np .uint64 )
342364 tm .assert_series_equal (result , expected )
343365
344- df2 = DataFrame ({"data" : [(1 ,), (2 ,)]})
345- result = hash_pandas_object (df2 )
346- expected = Series ([9408946347443669104 , 3278256261030523334 ], dtype = np .uint64 )
347- tm .assert_series_equal (result , expected )
348366
367+ def test_hashable_tuple_args ():
349368 # require that the elements of such tuples are themselves hashable
350369
351370 df3 = DataFrame (
0 commit comments