Skip to content

Commit 2c252ff

Browse files
author
Ariel Ben-Yehuda
authored
Rollup merge of rust-lang#40237 - arthurprs:hm-adapt2, r=alexcrichton
Reduce size overhead of adaptative hashmap Exposes a boolean flag in RawTable and use it instead of a bool field in HashMap. Taking a bit from capacity or length would make overflow handling tricky. Fixes: rust-lang#40042
2 parents 68a5a16 + 3273003 commit 2c252ff

File tree

2 files changed

+74
-24
lines changed

2 files changed

+74
-24
lines changed

src/libstd/collections/hash/map.rs

+8-16
Original file line numberDiff line numberDiff line change
@@ -396,8 +396,6 @@ pub struct HashMap<K, V, S = RandomState> {
396396
table: RawTable<K, V>,
397397

398398
resize_policy: DefaultResizePolicy,
399-
400-
long_probes: bool,
401399
}
402400

403401
/// Search for a pre-hashed key.
@@ -655,7 +653,6 @@ impl<K, V, S> HashMap<K, V, S>
655653
hash_builder: hash_builder,
656654
resize_policy: DefaultResizePolicy::new(),
657655
table: RawTable::new(0),
658-
long_probes: false,
659656
}
660657
}
661658

@@ -688,7 +685,6 @@ impl<K, V, S> HashMap<K, V, S>
688685
hash_builder: hash_builder,
689686
resize_policy: resize_policy,
690687
table: RawTable::new(raw_cap),
691-
long_probes: false,
692688
}
693689
}
694690

@@ -746,7 +742,7 @@ impl<K, V, S> HashMap<K, V, S>
746742
let min_cap = self.len().checked_add(additional).expect("reserve overflow");
747743
let raw_cap = self.resize_policy.raw_capacity(min_cap);
748744
self.resize(raw_cap);
749-
} else if self.long_probes && remaining <= self.len() {
745+
} else if self.table.tag() && remaining <= self.len() {
750746
// Probe sequence is too long and table is half full,
751747
// resize early to reduce probing length.
752748
let new_capacity = self.table.capacity() * 2;
@@ -763,7 +759,6 @@ impl<K, V, S> HashMap<K, V, S>
763759
assert!(self.table.size() <= new_raw_cap);
764760
assert!(new_raw_cap.is_power_of_two() || new_raw_cap == 0);
765761

766-
self.long_probes = false;
767762
let mut old_table = replace(&mut self.table, RawTable::new(new_raw_cap));
768763
let old_size = old_table.size();
769764

@@ -844,8 +839,7 @@ impl<K, V, S> HashMap<K, V, S>
844839
/// If the key already exists, the hashtable will be returned untouched
845840
/// and a reference to the existing element will be returned.
846841
fn insert_hashed_nocheck(&mut self, hash: SafeHash, k: K, v: V) -> Option<V> {
847-
let entry = search_hashed(&mut self.table, hash, |key| *key == k)
848-
.into_entry(k, &mut self.long_probes);
842+
let entry = search_hashed(&mut self.table, hash, |key| *key == k).into_entry(k);
849843
match entry {
850844
Some(Occupied(mut elem)) => Some(elem.insert(v)),
851845
Some(Vacant(elem)) => {
@@ -1002,7 +996,7 @@ impl<K, V, S> HashMap<K, V, S>
1002996
self.reserve(1);
1003997
let hash = self.make_hash(&key);
1004998
search_hashed(&mut self.table, hash, |q| q.eq(&key))
1005-
.into_entry(key, &mut self.long_probes).expect("unreachable")
999+
.into_entry(key).expect("unreachable")
10061000
}
10071001

10081002
/// Returns the number of elements in the map.
@@ -1456,7 +1450,7 @@ impl<K, V, M> InternalEntry<K, V, M> {
14561450

14571451
impl<'a, K, V> InternalEntry<K, V, &'a mut RawTable<K, V>> {
14581452
#[inline]
1459-
fn into_entry(self, key: K, long_probes: &'a mut bool) -> Option<Entry<'a, K, V>> {
1453+
fn into_entry(self, key: K) -> Option<Entry<'a, K, V>> {
14601454
match self {
14611455
InternalEntry::Occupied { elem } => {
14621456
Some(Occupied(OccupiedEntry {
@@ -1469,7 +1463,6 @@ impl<'a, K, V> InternalEntry<K, V, &'a mut RawTable<K, V>> {
14691463
hash: hash,
14701464
key: key,
14711465
elem: elem,
1472-
long_probes: long_probes,
14731466
}))
14741467
}
14751468
InternalEntry::TableIsEmpty => None,
@@ -1542,7 +1535,6 @@ pub struct VacantEntry<'a, K: 'a, V: 'a> {
15421535
hash: SafeHash,
15431536
key: K,
15441537
elem: VacantEntryState<K, V, &'a mut RawTable<K, V>>,
1545-
long_probes: &'a mut bool,
15461538
}
15471539

15481540
#[stable(feature= "debug_hash_map", since = "1.12.0")]
@@ -2117,15 +2109,15 @@ impl<'a, K: 'a, V: 'a> VacantEntry<'a, K, V> {
21172109
#[stable(feature = "rust1", since = "1.0.0")]
21182110
pub fn insert(self, value: V) -> &'a mut V {
21192111
match self.elem {
2120-
NeqElem(bucket, disp) => {
2112+
NeqElem(mut bucket, disp) => {
21212113
if disp >= DISPLACEMENT_THRESHOLD {
2122-
*self.long_probes = true;
2114+
bucket.table_mut().set_tag(true);
21232115
}
21242116
robin_hood(bucket, disp, self.hash, self.key, value)
21252117
},
2126-
NoElem(bucket, disp) => {
2118+
NoElem(mut bucket, disp) => {
21272119
if disp >= DISPLACEMENT_THRESHOLD {
2128-
*self.long_probes = true;
2120+
bucket.table_mut().set_tag(true);
21292121
}
21302122
bucket.put(self.hash, self.key, value).into_mut_refs().1
21312123
},

src/libstd/collections/hash/table.rs

+66-8
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,42 @@ type HashUint = usize;
3434

3535
const EMPTY_BUCKET: HashUint = 0;
3636

37+
/// Special `Unique<HashUint>` that uses the lower bit of the pointer
38+
/// to expose a boolean tag.
39+
/// Note: when the pointer is initialized to EMPTY `.ptr()` will return
40+
/// null and the tag functions shouldn't be used.
41+
struct TaggedHashUintPtr(Unique<HashUint>);
42+
43+
impl TaggedHashUintPtr {
44+
#[inline]
45+
unsafe fn new(ptr: *mut HashUint) -> Self {
46+
debug_assert!(ptr as usize & 1 == 0 || ptr as usize == EMPTY as usize);
47+
TaggedHashUintPtr(Unique::new(ptr))
48+
}
49+
50+
#[inline]
51+
fn set_tag(&mut self, value: bool) {
52+
let usize_ptr = &*self.0 as *const *mut HashUint as *mut usize;
53+
unsafe {
54+
if value {
55+
*usize_ptr |= 1;
56+
} else {
57+
*usize_ptr &= !1;
58+
}
59+
}
60+
}
61+
62+
#[inline]
63+
fn tag(&self) -> bool {
64+
(*self.0 as usize) & 1 == 1
65+
}
66+
67+
#[inline]
68+
fn ptr(&self) -> *mut HashUint {
69+
(*self.0 as usize & !1) as *mut HashUint
70+
}
71+
}
72+
3773
/// The raw hashtable, providing safe-ish access to the unzipped and highly
3874
/// optimized arrays of hashes, and key-value pairs.
3975
///
@@ -72,10 +108,14 @@ const EMPTY_BUCKET: HashUint = 0;
72108
/// around just the "table" part of the hashtable. It enforces some
73109
/// invariants at the type level and employs some performance trickery,
74110
/// but in general is just a tricked out `Vec<Option<(u64, K, V)>>`.
111+
///
112+
/// The hashtable also exposes a special boolean tag. The tag defaults to false
113+
/// when the RawTable is created and is accessible with the `tag` and `set_tag`
114+
/// functions.
75115
pub struct RawTable<K, V> {
76116
capacity: usize,
77117
size: usize,
78-
hashes: Unique<HashUint>,
118+
hashes: TaggedHashUintPtr,
79119

80120
// Because K/V do not appear directly in any of the types in the struct,
81121
// inform rustc that in fact instances of K and V are reachable from here.
@@ -208,6 +248,10 @@ impl<K, V, M> FullBucket<K, V, M> {
208248
pub fn table(&self) -> &M {
209249
&self.table
210250
}
251+
/// Borrow a mutable reference to the table.
252+
pub fn table_mut(&mut self) -> &mut M {
253+
&mut self.table
254+
}
211255
/// Move out the reference to the table.
212256
pub fn into_table(self) -> M {
213257
self.table
@@ -227,6 +271,10 @@ impl<K, V, M> EmptyBucket<K, V, M> {
227271
pub fn table(&self) -> &M {
228272
&self.table
229273
}
274+
/// Borrow a mutable reference to the table.
275+
pub fn table_mut(&mut self) -> &mut M {
276+
&mut self.table
277+
}
230278
}
231279

232280
impl<K, V, M> Bucket<K, V, M> {
@@ -687,7 +735,7 @@ impl<K, V> RawTable<K, V> {
687735
return RawTable {
688736
size: 0,
689737
capacity: 0,
690-
hashes: Unique::new(EMPTY as *mut HashUint),
738+
hashes: TaggedHashUintPtr::new(EMPTY as *mut HashUint),
691739
marker: marker::PhantomData,
692740
};
693741
}
@@ -728,7 +776,7 @@ impl<K, V> RawTable<K, V> {
728776
RawTable {
729777
capacity: capacity,
730778
size: 0,
731-
hashes: Unique::new(hashes),
779+
hashes: TaggedHashUintPtr::new(hashes),
732780
marker: marker::PhantomData,
733781
}
734782
}
@@ -737,13 +785,13 @@ impl<K, V> RawTable<K, V> {
737785
let hashes_size = self.capacity * size_of::<HashUint>();
738786
let pairs_size = self.capacity * size_of::<(K, V)>();
739787

740-
let buffer = *self.hashes as *mut u8;
788+
let buffer = self.hashes.ptr() as *mut u8;
741789
let (pairs_offset, _, oflo) =
742790
calculate_offsets(hashes_size, pairs_size, align_of::<(K, V)>());
743791
debug_assert!(!oflo, "capacity overflow");
744792
unsafe {
745793
RawBucket {
746-
hash: *self.hashes,
794+
hash: self.hashes.ptr(),
747795
pair: buffer.offset(pairs_offset as isize) as *const _,
748796
_marker: marker::PhantomData,
749797
}
@@ -755,7 +803,7 @@ impl<K, V> RawTable<K, V> {
755803
pub fn new(capacity: usize) -> RawTable<K, V> {
756804
unsafe {
757805
let ret = RawTable::new_uninitialized(capacity);
758-
ptr::write_bytes(*ret.hashes, 0, capacity);
806+
ptr::write_bytes(ret.hashes.ptr(), 0, capacity);
759807
ret
760808
}
761809
}
@@ -774,7 +822,7 @@ impl<K, V> RawTable<K, V> {
774822
fn raw_buckets(&self) -> RawBuckets<K, V> {
775823
RawBuckets {
776824
raw: self.first_bucket_raw(),
777-
hashes_end: unsafe { self.hashes.offset(self.capacity as isize) },
825+
hashes_end: unsafe { self.hashes.ptr().offset(self.capacity as isize) },
778826
marker: marker::PhantomData,
779827
}
780828
}
@@ -832,6 +880,16 @@ impl<K, V> RawTable<K, V> {
832880
marker: marker::PhantomData,
833881
}
834882
}
883+
884+
/// Set the table tag
885+
pub fn set_tag(&mut self, value: bool) {
886+
self.hashes.set_tag(value)
887+
}
888+
889+
/// Get the table tag
890+
pub fn tag(&self) -> bool {
891+
self.hashes.tag()
892+
}
835893
}
836894

837895
/// A raw iterator. The basis for some other iterators in this module. Although
@@ -1156,7 +1214,7 @@ unsafe impl<#[may_dangle] K, #[may_dangle] V> Drop for RawTable<K, V> {
11561214
debug_assert!(!oflo, "should be impossible");
11571215

11581216
unsafe {
1159-
deallocate(*self.hashes as *mut u8, size, align);
1217+
deallocate(self.hashes.ptr() as *mut u8, size, align);
11601218
// Remember how everything was allocated out of one buffer
11611219
// during initialization? We only need one call to free here.
11621220
}

0 commit comments

Comments
 (0)