@@ -37,39 +37,6 @@ def index(request):
37
37
return request .param
38
38
39
39
40
- def _check_equal (obj , ** kwargs ):
41
- """
42
- Check that hashing an objects produces the same value each time.
43
-
44
- Parameters
45
- ----------
46
- obj : object
47
- The object to hash.
48
- kwargs : kwargs
49
- Keyword arguments to pass to the hashing function.
50
- """
51
- a = hash_pandas_object (obj , ** kwargs )
52
- b = hash_pandas_object (obj , ** kwargs )
53
- tm .assert_series_equal (a , b )
54
-
55
-
56
- def _check_not_equal_with_index (obj ):
57
- """
58
- Check the hash of an object with and without its index is not the same.
59
-
60
- Parameters
61
- ----------
62
- obj : object
63
- The object to hash.
64
- """
65
- if not isinstance (obj , Index ):
66
- a = hash_pandas_object (obj , index = True )
67
- b = hash_pandas_object (obj , index = False )
68
-
69
- if len (obj ):
70
- assert not (a == b ).all ()
71
-
72
-
73
40
def test_consistency ():
74
41
# Check that our hash doesn't change because of a mistake
75
42
# in the actual code; this is the ground truth.
@@ -89,12 +56,10 @@ def test_hash_array(series):
89
56
tm .assert_numpy_array_equal (hash_array (arr ), hash_array (arr ))
90
57
91
58
92
- @pytest .mark .parametrize (
93
- "arr2" , [np .array ([3 , 4 , "All" ], dtype = "U" ), np .array ([3 , 4 , "All" ], dtype = object )]
94
- )
95
- def test_hash_array_mixed (arr2 ):
59
+ @pytest .mark .parametrize ("dtype" , ["U" , object ])
60
+ def test_hash_array_mixed (dtype ):
96
61
result1 = hash_array (np .array (["3" , "4" , "All" ]))
97
- result2 = hash_array (arr2 )
62
+ result2 = hash_array (np . array ([ 3 , 4 , "All" ], dtype = dtype ) )
98
63
99
64
tm .assert_numpy_array_equal (result1 , result2 )
100
65
@@ -159,32 +124,77 @@ def test_multiindex_objects():
159
124
Series (["a" , None , "c" ]),
160
125
Series ([True , False , True ]),
161
126
Series (dtype = object ),
162
- Index ([1 , 2 , 3 ]),
163
- Index ([True , False , True ]),
164
127
DataFrame ({"x" : ["a" , "b" , "c" ], "y" : [1 , 2 , 3 ]}),
165
128
DataFrame (),
166
129
tm .makeMissingDataframe (),
167
130
tm .makeMixedDataFrame (),
168
131
tm .makeTimeDataFrame (),
169
132
tm .makeTimeSeries (),
170
- tm .makeTimedeltaIndex (),
171
- tm .makePeriodIndex (),
172
133
Series (tm .makePeriodIndex ()),
173
134
Series (pd .date_range ("20130101" , periods = 3 , tz = "US/Eastern" )),
135
+ ],
136
+ )
137
+ def test_hash_pandas_object (obj , index ):
138
+ a = hash_pandas_object (obj , index = index )
139
+ b = hash_pandas_object (obj , index = index )
140
+ tm .assert_series_equal (a , b )
141
+
142
+
143
+ @pytest .mark .parametrize (
144
+ "obj" ,
145
+ [
146
+ Series ([1 , 2 , 3 ]),
147
+ Series ([1.0 , 1.5 , 3.2 ]),
148
+ Series ([1.0 , 1.5 , np .nan ]),
149
+ Series ([1.0 , 1.5 , 3.2 ], index = [1.5 , 1.1 , 3.3 ]),
150
+ Series (["a" , "b" , "c" ]),
151
+ Series (["a" , np .nan , "c" ]),
152
+ Series (["a" , None , "c" ]),
153
+ Series ([True , False , True ]),
154
+ DataFrame ({"x" : ["a" , "b" , "c" ], "y" : [1 , 2 , 3 ]}),
155
+ tm .makeMissingDataframe (),
156
+ tm .makeMixedDataFrame (),
157
+ tm .makeTimeDataFrame (),
158
+ tm .makeTimeSeries (),
159
+ Series (tm .makePeriodIndex ()),
160
+ Series (pd .date_range ("20130101" , periods = 3 , tz = "US/Eastern" )),
161
+ ],
162
+ )
163
+ def test_hash_pandas_object_diff_index_non_empty (obj ):
164
+ a = hash_pandas_object (obj , index = True )
165
+ b = hash_pandas_object (obj , index = False )
166
+ assert not (a == b ).all ()
167
+
168
+
169
+ @pytest .mark .parametrize (
170
+ "obj" ,
171
+ [
172
+ Index ([1 , 2 , 3 ]),
173
+ Index ([True , False , True ]),
174
+ tm .makeTimedeltaIndex (),
175
+ tm .makePeriodIndex (),
174
176
MultiIndex .from_product (
175
177
[range (5 ), ["foo" , "bar" , "baz" ], pd .date_range ("20130101" , periods = 2 )]
176
178
),
177
179
MultiIndex .from_product ([pd .CategoricalIndex (list ("aabc" )), range (3 )]),
178
180
],
179
181
)
180
- def test_hash_pandas_object (obj , index ):
181
- _check_equal (obj , index = index )
182
- _check_not_equal_with_index (obj )
182
+ def test_hash_pandas_index (obj , index ):
183
+ a = hash_pandas_object (obj , index = index )
184
+ b = hash_pandas_object (obj , index = index )
185
+ tm .assert_series_equal (a , b )
183
186
184
187
185
- def test_hash_pandas_object2 (series , index ):
186
- _check_equal (series , index = index )
187
- _check_not_equal_with_index (series )
188
+ def test_hash_pandas_series (series , index ):
189
+ a = hash_pandas_object (series , index = index )
190
+ b = hash_pandas_object (series , index = index )
191
+ tm .assert_series_equal (a , b )
192
+
193
+
194
+ def test_hash_pandas_series_diff_index (series ):
195
+ a = hash_pandas_object (series , index = True )
196
+ b = hash_pandas_object (series , index = False )
197
+ assert not (a == b ).all ()
188
198
189
199
190
200
@pytest .mark .parametrize (
@@ -193,7 +203,9 @@ def test_hash_pandas_object2(series, index):
193
203
def test_hash_pandas_empty_object (obj , index ):
194
204
# These are by-definition the same with
195
205
# or without the index as the data is empty.
196
- _check_equal (obj , index = index )
206
+ a = hash_pandas_object (obj , index = index )
207
+ b = hash_pandas_object (obj , index = index )
208
+ tm .assert_series_equal (a , b )
197
209
198
210
199
211
@pytest .mark .parametrize (
@@ -235,11 +247,10 @@ def test_categorical_with_nan_consistency():
235
247
assert result [1 ] in expected
236
248
237
249
238
- @pytest .mark .parametrize ("obj" , [pd .Timestamp ("20130101" )])
239
- def test_pandas_errors (obj ):
250
+ def test_pandas_errors ():
240
251
msg = "Unexpected type for hashing"
241
252
with pytest .raises (TypeError , match = msg ):
242
- hash_pandas_object (obj )
253
+ hash_pandas_object (pd . Timestamp ( "20130101" ) )
243
254
244
255
245
256
def test_hash_keys ():
@@ -292,12 +303,16 @@ def test_invalid_key():
292
303
def test_already_encoded (index ):
293
304
# If already encoded, then ok.
294
305
obj = Series (list ("abc" )).str .encode ("utf8" )
295
- _check_equal (obj , index = index )
306
+ a = hash_pandas_object (obj , index = index )
307
+ b = hash_pandas_object (obj , index = index )
308
+ tm .assert_series_equal (a , b )
296
309
297
310
298
311
def test_alternate_encoding (index ):
299
312
obj = Series (list ("abc" ))
300
- _check_equal (obj , index = index , encoding = "ascii" )
313
+ a = hash_pandas_object (obj , index = index )
314
+ b = hash_pandas_object (obj , index = index )
315
+ tm .assert_series_equal (a , b )
301
316
302
317
303
318
@pytest .mark .parametrize ("l_exp" , range (8 ))
@@ -332,20 +347,24 @@ def test_hash_collisions():
332
347
tm .assert_numpy_array_equal (result , np .concatenate ([expected1 , expected2 ], axis = 0 ))
333
348
334
349
335
- def test_hash_with_tuple ():
350
+ @pytest .mark .parametrize (
351
+ "data, result_data" ,
352
+ [
353
+ [[tuple ("1" ), tuple ("2" )], [10345501319357378243 , 8331063931016360761 ]],
354
+ [[(1 ,), (2 ,)], [9408946347443669104 , 3278256261030523334 ]],
355
+ ],
356
+ )
357
+ def test_hash_with_tuple (data , result_data ):
336
358
# GH#28969 array containing a tuple raises on call to arr.astype(str)
337
359
# apparently a numpy bug github.com/numpy/numpy/issues/9441
338
360
339
- df = DataFrame ({"data" : [ tuple ( "1" ), tuple ( "2" )] })
361
+ df = DataFrame ({"data" : data })
340
362
result = hash_pandas_object (df )
341
- expected = Series ([ 10345501319357378243 , 8331063931016360761 ] , dtype = np .uint64 )
363
+ expected = Series (result_data , dtype = np .uint64 )
342
364
tm .assert_series_equal (result , expected )
343
365
344
- df2 = DataFrame ({"data" : [(1 ,), (2 ,)]})
345
- result = hash_pandas_object (df2 )
346
- expected = Series ([9408946347443669104 , 3278256261030523334 ], dtype = np .uint64 )
347
- tm .assert_series_equal (result , expected )
348
366
367
+ def test_hashable_tuple_args ():
349
368
# require that the elements of such tuples are themselves hashable
350
369
351
370
df3 = DataFrame (
0 commit comments