13
13
// is 64 bits the truncation causes collission issues. Given all that, we use our own
14
14
// simple hash, viewing the double bytes as an int64 and using khash's default
15
15
// hash for 64 bit integers.
16
- // GH 13436
16
+ // GH 13436 showed that _Py_HashDouble doesn't work well with khash
17
+ // GH 28303 showed, that the simple xoring-version isn't good enough
18
+ // thus murmur2-hash is used
19
+
20
+
21
+ // specializations of https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp
22
+ // it is possible to have a special x64-version, which would need less operations, but
23
+ // using 32bit version always has also some benifits:
24
+ // - one code for 32bit and 64bit builds
25
+ // - the same case for 32bit and 64bit builds
26
+ // - no performance difference could be measured compared to a possible x64-version
27
+
28
+ khint32_t PANDAS_INLINE murmur2_32_32to32 (khint32_t k1 , khint32_t k2 ){
29
+ const khint32_t SEED = 0xc70f6907UL ;
30
+ // 'm' and 'r' are mixing constants generated offline.
31
+ // They're not really 'magic', they just happen to work well.
32
+ const khint32_t M_32 = 0x5bd1e995 ;
33
+ const int R_32 = 24 ;
34
+
35
+ // Initialize the hash to a 'random' value
36
+ khint32_t h = SEED ^ 4 ;
37
+
38
+ //handle first 4 bytes:
39
+ k1 *= M_32 ;
40
+ k1 ^= k1 >> R_32 ;
41
+ k1 *= M_32 ;
42
+
43
+ h *= M_32 ;
44
+ h ^= k1 ;
45
+
46
+ //handle second 4 bytes:
47
+ k2 *= M_32 ;
48
+ k2 ^= k2 >> R_32 ;
49
+ k2 *= M_32 ;
50
+
51
+ h *= M_32 ;
52
+ h ^= k2 ;
53
+
54
+ // Do a few final mixes of the hash to ensure the "last few
55
+ // bytes" are well-incorporated.
56
+ h ^= h >> 13 ;
57
+ h *= M_32 ;
58
+ h ^= h >> 15 ;
59
+ return h ;
60
+ }
61
+
62
+ khint32_t PANDAS_INLINE murmur2_64to32 (khint64_t k ){
63
+ khint32_t k1 = (khint32_t )k ;
64
+ khint32_t k2 = (khint32_t )(k >>32 );
65
+
66
+ return murmur2_32_32to32 (k1 , k2 );
67
+ }
68
+
17
69
khint64_t PANDAS_INLINE asint64 (double key ) {
18
- khint64_t val ;
19
- memcpy (& val , & key , sizeof (double ));
20
- return val ;
70
+ khint64_t val ;
71
+ memcpy (& val , & key , sizeof (double ));
72
+ return val ;
21
73
}
22
74
23
- // correct for all inputs but not -0.0 and NaNs
24
- #define kh_float64_hash_func_0_NAN (key ) (khint32_t)((asint64(key))>>33^(asint64(key))^(asint64(key))<<11)
25
75
26
- // correct for all inputs but not NaNs
27
- #define kh_float64_hash_func_NAN (key ) ((key) == 0.0 ? \
28
- kh_float64_hash_func_0_NAN(0.0) : \
29
- kh_float64_hash_func_0_NAN(key))
76
+ #define ZERO_HASH 0
77
+ #define NAN_HASH 0
78
+
79
+ khint32_t PANDAS_INLINE kh_float64_hash_func (double val ){
80
+ if (val == 0.0 ){
81
+ return ZERO_HASH ;
82
+ }
83
+ if (val != val ){
84
+ return NAN_HASH ;
85
+ }
86
+ khint64_t as_int = asint64 (val );
87
+ return murmur2_64to32 (as_int );
88
+ }
30
89
31
- // correct for all
32
- #define kh_float64_hash_func (key ) ((key) != (key) ? \
33
- kh_float64_hash_func_NAN(Py_NAN) : \
34
- kh_float64_hash_func_NAN(key))
35
90
36
91
#define kh_float64_hash_equal (a , b ) ((a) == (b) || ((b) != (b) && (a) != (a)))
37
92
@@ -121,4 +176,4 @@ void PANDAS_INLINE kh_destroy_str_starts(kh_str_starts_t* table) {
121
176
122
177
void PANDAS_INLINE kh_resize_str_starts (kh_str_starts_t * table , khint_t val ) {
123
178
kh_resize_str (table -> table , val );
124
- }
179
+ }
0 commit comments