Skip to content

Commit ca451bf

Browse files
authored
[mono] Switch simdhash to prime bucket counts; increase load factor
Add some measurements for simdhash ght Introduce scan data that's fetched once per bucket Targeted simdhash optimizations via new dn_simdhash_ght_get_value_or_default Switch to spaced prime bucket counts instead of power of two bucket counts for better collision resistance Fix cache alignment of ght compatible and ptr ptr simdhash variants on 64-bit Higher simdhash load factor (closer to ghashtable's; improves performance for bad hashes) Add sequential keys measurements Increase load factor to 30%
1 parent 15bcd62 commit ca451bf

18 files changed

+441
-69
lines changed

src/mono/mono/metadata/bundled-resources.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -204,9 +204,7 @@ bundled_resources_get (const char *id)
204204
char key_buffer[1024];
205205
key_from_id(id, key_buffer, sizeof(key_buffer));
206206

207-
MonoBundledResource *result = NULL;
208-
dn_simdhash_ght_try_get_value (bundled_resources, key_buffer, (void **)&result);
209-
return result;
207+
return (MonoBundledResource *)dn_simdhash_ght_get_value_or_default (bundled_resources, key_buffer);
210208
}
211209

212210
//---------------------------------------------------------------------------------------

src/mono/mono/metadata/class.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1252,7 +1252,7 @@ mono_class_inflate_generic_method_full_checked (MonoMethod *method, MonoClass *k
12521252
mono_mem_manager_lock (mm);
12531253
if (!mm->gmethod_cache)
12541254
mm->gmethod_cache = dn_simdhash_ght_new_full (inflated_method_hash, inflated_method_equal, NULL, (GDestroyNotify)free_inflated_method, 0, NULL);
1255-
dn_simdhash_ght_try_get_value (mm->gmethod_cache, iresult, (void **)&cached);
1255+
cached = dn_simdhash_ght_get_value_or_default (mm->gmethod_cache, iresult);
12561256
mono_mem_manager_unlock (mm);
12571257

12581258
if (cached) {
@@ -1358,8 +1358,7 @@ mono_class_inflate_generic_method_full_checked (MonoMethod *method, MonoClass *k
13581358

13591359
// check cache
13601360
mono_mem_manager_lock (mm);
1361-
cached = NULL;
1362-
dn_simdhash_ght_try_get_value (mm->gmethod_cache, iresult, (void **)&cached);
1361+
cached = dn_simdhash_ght_get_value_or_default (mm->gmethod_cache, iresult);
13631362
if (!cached) {
13641363
dn_simdhash_ght_insert (mm->gmethod_cache, iresult, iresult);
13651364
iresult->owner = mm;

src/mono/mono/metadata/metadata.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3359,7 +3359,7 @@ mono_metadata_get_inflated_signature (MonoMethodSignature *sig, MonoGenericConte
33593359
mm->gsignature_cache = dn_simdhash_ght_new_full (inflated_signature_hash, inflated_signature_equal, NULL, (GDestroyNotify)free_inflated_signature, 256, NULL);
33603360

33613361
// FIXME: The lookup is done on the newly allocated sig so it always fails
3362-
dn_simdhash_ght_try_get_value (mm->gsignature_cache, &helper, (gpointer *)&res);
3362+
res = dn_simdhash_ght_get_value_or_default (mm->gsignature_cache, &helper);
33633363
if (!res) {
33643364
res = mono_mem_manager_alloc0 (mm, sizeof (MonoInflatedMethodSignature));
33653365
// FIXME: sig is an inflated signature not owned by the mem manager

src/mono/mono/mini/interp/transform.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2736,7 +2736,7 @@ interp_handle_intrinsics (TransformData *td, MonoMethod *target_method, MonoClas
27362736
default: return FALSE;
27372737
}
27382738
}
2739-
}
2739+
}
27402740

27412741
return FALSE;
27422742
}
@@ -10036,8 +10036,7 @@ mono_interp_transform_method (InterpMethod *imethod, ThreadContext *context, Mon
1003610036

1003710037
// FIXME Publishing of seq points seems to be racy with tiereing. We can have both tiered and untiered method
1003810038
// running at the same time. We could therefore get the optimized imethod seq points for the unoptimized method.
10039-
gpointer seq_points = NULL;
10040-
dn_simdhash_ght_try_get_value (jit_mm->seq_points, imethod->method, (void **)&seq_points);
10039+
gpointer seq_points = dn_simdhash_ght_get_value_or_default (jit_mm->seq_points, imethod->method);
1004110040
if (!seq_points || seq_points != imethod->jinfo->seq_points)
1004210041
dn_simdhash_ght_replace (jit_mm->seq_points, imethod->method, imethod->jinfo->seq_points);
1004310042
}

src/mono/mono/mini/seq-points.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -272,12 +272,12 @@ mono_get_seq_points (MonoMethod *method)
272272
// FIXME:
273273
jit_mm = get_default_jit_mm ();
274274
jit_mm_lock (jit_mm);
275-
dn_simdhash_ght_try_get_value (jit_mm->seq_points, method, (void **)&seq_points);
275+
seq_points = dn_simdhash_ght_get_value_or_default (jit_mm->seq_points, method);
276276
if (!seq_points && method->is_inflated) {
277277
/* generic sharing + aot */
278-
dn_simdhash_ght_try_get_value (jit_mm->seq_points, declaring_generic_method, (void **)&seq_points);
278+
seq_points = dn_simdhash_ght_get_value_or_default (jit_mm->seq_points, declaring_generic_method);
279279
if (!seq_points)
280-
dn_simdhash_ght_try_get_value (jit_mm->seq_points, shared_method, (void **)&seq_points);
280+
seq_points = dn_simdhash_ght_get_value_or_default (jit_mm->seq_points, shared_method);
281281
}
282282
jit_mm_unlock (jit_mm);
283283

src/native/containers/dn-simdhash-ght-compatible.c

Lines changed: 48 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -16,27 +16,6 @@ typedef struct dn_simdhash_ght_data {
1616
dn_simdhash_ght_destroy_func value_destroy_func;
1717
} dn_simdhash_ght_data;
1818

19-
static inline uint32_t
20-
dn_simdhash_ght_hash (dn_simdhash_ght_data data, void * key)
21-
{
22-
dn_simdhash_ght_hash_func hash_func = data.hash_func;
23-
if (hash_func)
24-
return (uint32_t)hash_func(key);
25-
else
26-
// FIXME: Seed
27-
return MurmurHash3_32_ptr(key, 0);
28-
}
29-
30-
static inline int32_t
31-
dn_simdhash_ght_equals (dn_simdhash_ght_data data, void * lhs, void * rhs)
32-
{
33-
dn_simdhash_ght_equal_func equal_func = data.key_equal_func;
34-
if (equal_func)
35-
return equal_func(lhs, rhs);
36-
else
37-
return lhs == rhs;
38-
}
39-
4019
static inline void
4120
dn_simdhash_ght_removed (dn_simdhash_ght_data data, void * key, void * value)
4221
{
@@ -68,26 +47,53 @@ dn_simdhash_ght_replaced (dn_simdhash_ght_data data, void * old_key, void * new_
6847
#define DN_SIMDHASH_KEY_T void *
6948
#define DN_SIMDHASH_VALUE_T void *
7049
#define DN_SIMDHASH_INSTANCE_DATA_T dn_simdhash_ght_data
71-
#define DN_SIMDHASH_KEY_HASHER dn_simdhash_ght_hash
72-
#define DN_SIMDHASH_KEY_EQUALS dn_simdhash_ght_equals
50+
51+
#define DN_SIMDHASH_SCAN_DATA_T dn_simdhash_ght_equal_func
52+
#define DN_SIMDHASH_GET_SCAN_DATA(data) data.key_equal_func
53+
54+
#define DN_SIMDHASH_KEY_HASHER(data, key) (uint32_t)data.hash_func(key)
55+
#define DN_SIMDHASH_KEY_EQUALS(scan_data, lhs, rhs) scan_data(lhs, rhs)
56+
7357
#define DN_SIMDHASH_ON_REMOVE dn_simdhash_ght_removed
7458
#define DN_SIMDHASH_ON_REPLACE dn_simdhash_ght_replaced
59+
// perfect cache alignment. 128-byte buckets for 64-bit pointers, 64-byte buckets for 32-bit pointers
7560
#if SIZEOF_VOID_P == 8
76-
#define DN_SIMDHASH_BUCKET_CAPACITY 11
61+
#define DN_SIMDHASH_BUCKET_CAPACITY 14
7762
#else
7863
#define DN_SIMDHASH_BUCKET_CAPACITY 12
7964
#endif
8065
#define DN_SIMDHASH_NO_DEFAULT_NEW 1
8166

8267
#include "dn-simdhash-specialization.h"
8368

69+
static unsigned int
70+
dn_simdhash_ght_default_hash (const void * key)
71+
{
72+
// You might think we should avalanche the key bits but in my testing, it doesn't help.
73+
// Right now the default hash function is rarely used anyway
74+
return (unsigned int)(size_t)key;
75+
}
76+
77+
static int32_t
78+
dn_simdhash_ght_default_comparer (const void * a, const void * b)
79+
{
80+
return a == b;
81+
}
82+
8483
dn_simdhash_ght_t *
8584
dn_simdhash_ght_new (
8685
dn_simdhash_ght_hash_func hash_func, dn_simdhash_ght_equal_func key_equal_func,
8786
uint32_t capacity, dn_allocator_t *allocator
8887
)
8988
{
9089
dn_simdhash_ght_t *hash = dn_simdhash_new_internal(&DN_SIMDHASH_T_META, DN_SIMDHASH_T_VTABLE, capacity, allocator);
90+
// Most users of dn_simdhash_ght are passing a custom comparer, and always doing an indirect call ends up being faster
91+
// than conditionally doing a fast inline check when there's no comparer set. Somewhat counter-intuitive, but true
92+
// on both x64 and arm64. Probably due to the smaller code size and reduced branch predictor pressure.
93+
if (!hash_func)
94+
hash_func = dn_simdhash_ght_default_hash;
95+
if (!key_equal_func)
96+
key_equal_func = dn_simdhash_ght_default_comparer;
9197
dn_simdhash_instance_data(dn_simdhash_ght_data, hash).hash_func = hash_func;
9298
dn_simdhash_instance_data(dn_simdhash_ght_data, hash).key_equal_func = key_equal_func;
9399
return hash;
@@ -101,6 +107,10 @@ dn_simdhash_ght_new_full (
101107
)
102108
{
103109
dn_simdhash_ght_t *hash = dn_simdhash_new_internal(&DN_SIMDHASH_T_META, DN_SIMDHASH_T_VTABLE, capacity, allocator);
110+
if (!hash_func)
111+
hash_func = dn_simdhash_ght_default_hash;
112+
if (!key_equal_func)
113+
key_equal_func = dn_simdhash_ght_default_comparer;
104114
dn_simdhash_instance_data(dn_simdhash_ght_data, hash).hash_func = hash_func;
105115
dn_simdhash_instance_data(dn_simdhash_ght_data, hash).key_equal_func = key_equal_func;
106116
dn_simdhash_instance_data(dn_simdhash_ght_data, hash).key_destroy_func = key_destroy_func;
@@ -145,3 +155,17 @@ dn_simdhash_ght_insert_replace (
145155
return;
146156
}
147157
}
158+
159+
void *
160+
dn_simdhash_ght_get_value_or_default (
161+
dn_simdhash_ght_t *hash, void * key
162+
)
163+
{
164+
check_self(hash);
165+
uint32_t key_hash = DN_SIMDHASH_KEY_HASHER(DN_SIMDHASH_GET_DATA(hash), key);
166+
DN_SIMDHASH_VALUE_T *value_ptr = DN_SIMDHASH_FIND_VALUE_INTERNAL(hash, key, key_hash);
167+
if (value_ptr)
168+
return *value_ptr;
169+
else
170+
return NULL;
171+
}

src/native/containers/dn-simdhash-ght-compatible.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,12 @@ dn_simdhash_ght_insert_replace (
2828
int32_t overwrite_key
2929
);
3030

31+
// faster and simpler to use than dn_simdhash_ght_try_get_value, use it wisely
32+
void *
33+
dn_simdhash_ght_get_value_or_default (
34+
dn_simdhash_ght_t *hash, void * key
35+
);
36+
3137
// compatibility shims for the g_hash_table_ versions in glib.h
3238
#define dn_simdhash_ght_insert(h,k,v) dn_simdhash_ght_insert_replace ((h),(k),(v),FALSE)
3339
#define dn_simdhash_ght_replace(h,k,v) dn_simdhash_ght_insert_replace ((h),(k),(v),TRUE)

src/native/containers/dn-simdhash-ptr-ptr.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,11 @@
1111
#define DN_SIMDHASH_T dn_simdhash_ptr_ptr
1212
#define DN_SIMDHASH_KEY_T void *
1313
#define DN_SIMDHASH_VALUE_T void *
14-
#define DN_SIMDHASH_KEY_HASHER(hash, key) (MurmurHash3_32_ptr(key, 0))
15-
#define DN_SIMDHASH_KEY_EQUALS(hash, lhs, rhs) (lhs == rhs)
14+
#define DN_SIMDHASH_KEY_HASHER(data, key) (MurmurHash3_32_ptr(key, 0))
15+
#define DN_SIMDHASH_KEY_EQUALS(data, lhs, rhs) (lhs == rhs)
16+
// perfect cache alignment. 128-byte buckets for 64-bit pointers, 64-byte buckets for 32-bit pointers
1617
#if SIZEOF_VOID_P == 8
17-
#define DN_SIMDHASH_BUCKET_CAPACITY 11
18+
#define DN_SIMDHASH_BUCKET_CAPACITY 14
1819
#else
1920
#define DN_SIMDHASH_BUCKET_CAPACITY 12
2021
#endif

src/native/containers/dn-simdhash-ptrpair-ptr.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ dn_ptrpair_t_equals (dn_ptrpair_t lhs, dn_ptrpair_t rhs)
2626
#define DN_SIMDHASH_T dn_simdhash_ptrpair_ptr
2727
#define DN_SIMDHASH_KEY_T dn_ptrpair_t
2828
#define DN_SIMDHASH_VALUE_T void *
29-
#define DN_SIMDHASH_KEY_HASHER(hash, key) dn_ptrpair_t_hash(key)
30-
#define DN_SIMDHASH_KEY_EQUALS(hash, lhs, rhs) dn_ptrpair_t_equals(lhs, rhs)
29+
#define DN_SIMDHASH_KEY_HASHER(data, key) dn_ptrpair_t_hash(key)
30+
#define DN_SIMDHASH_KEY_EQUALS(data, lhs, rhs) dn_ptrpair_t_equals(lhs, rhs)
3131
#if SIZEOF_VOID_P == 8
3232
// 192 bytes holds 12 16-byte blocks, so 11 keys and one suffix table
3333
#define DN_SIMDHASH_BUCKET_CAPACITY 11

src/native/containers/dn-simdhash-specialization.h

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,13 @@
2323
#error Expected DN_SIMDHASH_VALUE_T definition i.e. int
2424
#endif
2525

26+
// If specified, this data will be precalculated/prefetched at the start of scans
27+
// and passed to your KEY_EQUALS macro as its first parameter
28+
#ifndef DN_SIMDHASH_SCAN_DATA_T
29+
#define DN_SIMDHASH_SCAN_DATA_T uint8_t
30+
#define DN_SIMDHASH_GET_SCAN_DATA(data) 0
31+
#endif
32+
2633
// If specified, we pass instance data to the handlers by-value, otherwise we
2734
// pass the pointer to the hash itself by-value. This is enough to allow clang
2835
// to hoist the load of the instance data out of the key scan loop, though it
@@ -204,11 +211,13 @@ DN_SIMDHASH_SCAN_BUCKET_INTERNAL (DN_SIMDHASH_T_PTR hash, bucket_t *restrict buc
204211
#undef bucket_suffixes
205212

206213
if (DN_LIKELY(index < count)) {
214+
DN_SIMDHASH_SCAN_DATA_T scan_data = DN_SIMDHASH_GET_SCAN_DATA(DN_SIMDHASH_GET_DATA(hash));
215+
// HACK: Suppress unused variable warning for specializations that don't use scan_data
216+
(void)(scan_data);
217+
207218
DN_SIMDHASH_KEY_T *key = &bucket->keys[index];
208219
do {
209-
// FIXME: Could be profitable to manually hoist the data load outside of the loop,
210-
// if not out of SCAN_BUCKET_INTERNAL entirely. Clang appears to do LICM on it.
211-
if (DN_SIMDHASH_KEY_EQUALS(DN_SIMDHASH_GET_DATA(hash), needle, *key))
220+
if (DN_SIMDHASH_KEY_EQUALS(scan_data, needle, *key))
212221
return index;
213222
key++;
214223
index++;

src/native/containers/dn-simdhash-string-ptr.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ dn_simdhash_str_hash (dn_simdhash_str_key v1)
3535
#define DN_SIMDHASH_T dn_simdhash_string_ptr
3636
#define DN_SIMDHASH_KEY_T dn_simdhash_str_key
3737
#define DN_SIMDHASH_VALUE_T void *
38-
#define DN_SIMDHASH_KEY_HASHER(hash, key) dn_simdhash_str_hash(key)
39-
#define DN_SIMDHASH_KEY_EQUALS(hash, lhs, rhs) dn_simdhash_str_equal(lhs, rhs)
38+
#define DN_SIMDHASH_KEY_HASHER(data, key) dn_simdhash_str_hash(key)
39+
#define DN_SIMDHASH_KEY_EQUALS(data, lhs, rhs) dn_simdhash_str_equal(lhs, rhs)
4040
#define DN_SIMDHASH_ACCESSOR_SUFFIX _raw
4141

4242
// perfect cache alignment. 32-bit ptrs: 8-byte keys. 64-bit: 16-byte keys.

src/native/containers/dn-simdhash-u32-ptr.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
#define DN_SIMDHASH_T dn_simdhash_u32_ptr
88
#define DN_SIMDHASH_KEY_T uint32_t
99
#define DN_SIMDHASH_VALUE_T void *
10-
#define DN_SIMDHASH_KEY_HASHER(hash, key) murmur3_fmix32(key)
11-
#define DN_SIMDHASH_KEY_EQUALS(hash, lhs, rhs) (lhs == rhs)
10+
#define DN_SIMDHASH_KEY_HASHER(data, key) murmur3_fmix32(key)
11+
#define DN_SIMDHASH_KEY_EQUALS(data, lhs, rhs) (lhs == rhs)
1212

1313
#include "dn-simdhash-specialization.h"

src/native/containers/dn-simdhash-utils.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,35 @@ MurmurHash3_32_streaming (const uint8_t *key, uint32_t seed)
176176
return h1;
177177
}
178178

179+
static inline uint32_t
180+
MurmurHash3_32 (const uint8_t *key, uint32_t key_length, uint32_t seed)
181+
{
182+
uint32_t h1 = seed;
183+
184+
while (key_length >= sizeof(uint32_t)) {
185+
uint32_t block = *(uint32_t *)key;
186+
MURMUR3_HASH_BLOCK(block);
187+
key += sizeof(uint32_t);
188+
key_length -= sizeof(uint32_t);
189+
}
190+
191+
if (key_length) {
192+
uint32_t last_block = 0;
193+
// This is incorrect but easier to write
194+
while (key_length > 0) {
195+
last_block |= *key;
196+
last_block <<= 8;
197+
key++;
198+
}
199+
MURMUR3_HASH_BLOCK(last_block);
200+
}
201+
202+
// finalize. note this is not the same as 32_streaming
203+
h1 ^= key_length;
204+
h1 = murmur3_fmix32(h1);
205+
return h1;
206+
}
207+
179208
// end of reformulated murmur3-32
180209

181210
void

0 commit comments

Comments
 (0)