1313// is 64 bits the truncation causes collission issues. Given all that, we use our own
1414// simple hash, viewing the double bytes as an int64 and using khash's default
1515// hash for 64 bit integers.
16- // GH 13436
16+ // GH 13436 showed that _Py_HashDouble doesn't work well with khash
17+ // GH 28303 showed, that the simple xoring-version isn't good enough
18+ // thus murmur2-hash is used
19+
20+
21+ // specializations of https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp
22+ // it is possible to have a special x64-version, which would need less operations, but
23+ // using 32bit version always has also some benifits:
24+ // - one code for 32bit and 64bit builds
25+ // - the same case for 32bit and 64bit builds
26+ // - no performance difference could be measured compared to a possible x64-version
27+
28+ khint32_t PANDAS_INLINE murmur2_32_32to32 (khint32_t k1 , khint32_t k2 ){
29+ const khint32_t SEED = 0xc70f6907UL ;
30+ // 'm' and 'r' are mixing constants generated offline.
31+ // They're not really 'magic', they just happen to work well.
32+ const khint32_t M_32 = 0x5bd1e995 ;
33+ const int R_32 = 24 ;
34+
35+ // Initialize the hash to a 'random' value
36+ khint32_t h = SEED ^ 4 ;
37+
38+ //handle first 4 bytes:
39+ k1 *= M_32 ;
40+ k1 ^= k1 >> R_32 ;
41+ k1 *= M_32 ;
42+
43+ h *= M_32 ;
44+ h ^= k1 ;
45+
46+ //handle second 4 bytes:
47+ k2 *= M_32 ;
48+ k2 ^= k2 >> R_32 ;
49+ k2 *= M_32 ;
50+
51+ h *= M_32 ;
52+ h ^= k2 ;
53+
54+ // Do a few final mixes of the hash to ensure the "last few
55+ // bytes" are well-incorporated.
56+ h ^= h >> 13 ;
57+ h *= M_32 ;
58+ h ^= h >> 15 ;
59+ return h ;
60+ }
61+
62+ khint32_t PANDAS_INLINE murmur2_64to32 (khint64_t k ){
63+ khint32_t k1 = (khint32_t )k ;
64+ khint32_t k2 = (khint32_t )(k >>32 );
65+
66+ return murmur2_32_32to32 (k1 , k2 );
67+ }
68+
1769khint64_t PANDAS_INLINE asint64 (double key ) {
18- khint64_t val ;
19- memcpy (& val , & key , sizeof (double ));
20- return val ;
70+ khint64_t val ;
71+ memcpy (& val , & key , sizeof (double ));
72+ return val ;
2173}
2274
23- // correct for all inputs but not -0.0 and NaNs
24- #define kh_float64_hash_func_0_NAN (key ) (khint32_t)((asint64(key))>>33^(asint64(key))^(asint64(key))<<11)
2575
26- // correct for all inputs but not NaNs
27- #define kh_float64_hash_func_NAN (key ) ((key) == 0.0 ? \
28- kh_float64_hash_func_0_NAN(0.0) : \
29- kh_float64_hash_func_0_NAN(key))
76+ #define ZERO_HASH 0
77+ #define NAN_HASH 0
78+
79+ khint32_t PANDAS_INLINE kh_float64_hash_func (double val ){
80+ if (val == 0.0 ){
81+ return ZERO_HASH ;
82+ }
83+ if (val != val ){
84+ return NAN_HASH ;
85+ }
86+ khint64_t as_int = asint64 (val );
87+ return murmur2_64to32 (as_int );
88+ }
3089
31- // correct for all
32- #define kh_float64_hash_func (key ) ((key) != (key) ? \
33- kh_float64_hash_func_NAN(Py_NAN) : \
34- kh_float64_hash_func_NAN(key))
3590
3691#define kh_float64_hash_equal (a , b ) ((a) == (b) || ((b) != (b) && (a) != (a)))
3792
@@ -121,4 +176,4 @@ void PANDAS_INLINE kh_destroy_str_starts(kh_str_starts_t* table) {
121176
122177void PANDAS_INLINE kh_resize_str_starts (kh_str_starts_t * table , khint_t val ) {
123178 kh_resize_str (table -> table , val );
124- }
179+ }
0 commit comments