Skip to content

Commit d4c4171

Browse files
committed
[stdlib] Speed up Set.contains
- Simplify generated code; eliminate obvious inefficiencies. - Revive guaranteedNative trick; removing the bridged branch does have a measurable impact for contains at least. (Apart from code size benefits) Lookups in sets are still not quite as fast as on master for small sets (up to ~1k elements) when the element type has fast equality checks — I believe this is because the SIMD approach has worse latency than direct lookup, and the lookup chains aren’t long enough to take advantage of higher throughput. (The latency difference only matters when the set is fully cached, which is why the slowdown only occurs for small sets.)
1 parent 3b9d453 commit d4c4171

File tree

2 files changed

+103
-49
lines changed

2 files changed

+103
-49
lines changed

stdlib/public/core/HashTable.swift

Lines changed: 78 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,15 @@ internal struct _HashTable {
2424
internal let buckets: UnsafeMutablePointer<Bucket>
2525

2626
@usableFromInline
27-
internal let bucketCount: Int
27+
internal let bucketMask: Int
2828

2929
@inlinable
3030
@inline(__always)
3131
internal init(buckets: UnsafeMutablePointer<Bucket>, bucketCount: Int) {
3232
self.buckets = buckets
33-
self.bucketCount = bucketCount
33+
// The bucket count is a positive power of two, so subtracting 1 will
34+
// never overflow and get us a nice mask.
35+
self.bucketMask = bucketCount &- 1
3436
}
3537

3638
@inlinable
@@ -41,11 +43,9 @@ internal struct _HashTable {
4143
}
4244

4345
@inlinable
44-
internal var bucketMask: Int {
46+
internal var bucketCount: Int {
4547
@inline(__always) get {
46-
// The bucket count is a positive power of two, so subtracting 1 will
47-
// never overflow and get us a nice mask.
48-
return bucketCount &- 1
48+
return bucketMask &+ 1
4949
}
5050
}
5151
}
@@ -67,7 +67,7 @@ extension _HashTable {
6767

6868
/// The inverse of the maximum hash table load factor.
6969
private static var maxLoadFactorInverse: Double {
70-
@inline(__always) get { return 100 / 90 }
70+
@inline(__always) get { return 100 / 75 }
7171
}
7272

7373
internal static func capacity(forScale scale: Int) -> Int {
@@ -107,14 +107,6 @@ extension _HashTable {
107107
@usableFromInline
108108
@_fixed_layout
109109
internal struct Entry {
110-
@inlinable
111-
internal static var occupiedFlag: UInt8 {
112-
@inline(__always) get { return 0x80 }
113-
}
114-
@inlinable
115-
internal static var payloadMask: UInt8 {
116-
@inline(__always) get { return 0x7F }
117-
}
118110
@inlinable
119111
internal static var unoccupied: Entry {
120112
@inline(__always) get { return Entry(_value: 0) }
@@ -132,16 +124,15 @@ extension _HashTable {
132124
@inlinable
133125
@inline(__always)
134126
internal init(payload: UInt8) {
135-
_sanityCheck(payload & ~Entry.payloadMask == 0)
136-
self.init(_value: Entry.occupiedFlag | payload)
127+
_sanityCheck(payload != 0)
128+
self.init(_value: payload)
137129
}
138130

139131
@inlinable
140132
@inline(__always)
141133
internal init(forHashValue hashValue: Int) {
142-
// Use the highest seven bits of the hash value.
143-
let payload = UInt(bitPattern: hashValue) &>> (Int.bitWidth &- 7)
144-
self.init(payload: UInt8(truncatingIfNeeded: payload))
134+
let payload = UInt(bitPattern: hashValue) &>> (Int.bitWidth &- 8)
135+
self.init(payload: Swift.max(1, UInt8(truncatingIfNeeded: payload)))
145136
}
146137

147138
@inlinable
@@ -152,12 +143,23 @@ extension _HashTable {
152143
@inlinable
153144
internal var payload: UInt8 {
154145
@inline(__always) get {
155-
return _value & _HashTable.Entry.payloadMask
146+
return _value
156147
}
157148
}
158149
}
159150
}
160151

152+
extension _HashTable.Entry {
153+
@inlinable
154+
var pattern: UInt64 {
155+
@inline(__always) get {
156+
// Fill a 64-bit integer with 8 copies of this entry.
157+
let p = UInt64(truncatingIfNeeded: _value)
158+
return p &* 0x01010101_01010101
159+
}
160+
}
161+
}
162+
161163
extension _HashTable.Entry: Equatable {
162164
@inlinable
163165
@inline(__always)
@@ -270,7 +272,7 @@ extension _HashTable.Bucket {
270272
@inline(__always)
271273
get {
272274
// Holes are always at the end, so it's enough to check the highest byte.
273-
return _value &>> (UInt64.bitWidth &- UInt8.bitWidth) != 0
275+
return _value.leadingZeroBitCount < UInt8.bitWidth
274276
}
275277
}
276278

@@ -295,7 +297,7 @@ extension _HashTable.Bucket {
295297
@inline(__always)
296298
get {
297299
// Holes are zero bytes at the most significant places.
298-
let shift = UInt64.bitWidth - (_value.leadingZeroBitCount & ~7)
300+
let shift = UInt64.bitWidth &- (_value.leadingZeroBitCount & ~7)
299301
return _HashTable.Slot(shift: shift)
300302
}
301303
}
@@ -363,14 +365,9 @@ extension _HashTable.Bucket {
363365

364366
/// Returns a sequence of Slots matching the given entry.
365367
@inlinable
366-
internal func slots(matching entry: _HashTable.Entry) -> SlotSet {
367-
// Fill a 64-bit integer with 8 copies of the entry we're looking for.
368-
var p = UInt64(entry._value)
369-
p |= p &<< 8
370-
p |= p &<< 16
371-
p |= p &<< 32
372-
// Xoring `p` to `self._value` turns matching bytes into zeroes.
373-
p ^= self._value
368+
@inline(__always)
369+
internal func _slots(matching pattern: UInt64) -> SlotSet {
370+
let p = self._value ^ pattern
374371
// The problem now reduces to finding zero bytes in `p`. For every 8-bit
375372
// integer `b`, `x = ((b & 0x7F) + 0x7F) | b` has bit 7 set iff `b !=
376373
// 0`. Further, `~(x | 0x7F)` leaves bit 7 set to one iff `b == 0` and
@@ -384,6 +381,13 @@ extension _HashTable.Bucket {
384381
// bits to the start of their corresponding bytes.
385382
return SlotSet(_shifts: y &>> 7)
386383
}
384+
385+
/// Returns a sequence of Slots matching the given entry.
386+
@inlinable
387+
@inline(__always)
388+
internal func slots(matching entry: _HashTable.Entry) -> SlotSet {
389+
return _slots(matching: entry.pattern)
390+
}
387391
}
388392

389393
extension _HashTable {
@@ -483,7 +487,6 @@ extension _HashTable: Collection {
483487
}
484488

485489
@inlinable
486-
@inline(__always)
487490
internal func formIndex(after index: inout Index) {
488491
index.offset += 1
489492
if index.bucket >= bucketCount { return }
@@ -560,7 +563,7 @@ extension _HashTable {
560563
@usableFromInline
561564
let _hashTable: _HashTable
562565
@usableFromInline
563-
let _entry: Entry
566+
let _pattern: UInt64
564567
@usableFromInline
565568
var _bucket: Int
566569
@usableFromInline
@@ -569,14 +572,15 @@ extension _HashTable {
569572
@inlinable
570573
internal init(hashTable: _HashTable, hashValue: Int) {
571574
self._hashTable = hashTable
572-
self._entry = Entry(forHashValue: hashValue)
573575
self._bucket = hashTable._idealBucket(forHashValue: hashValue)
574-
self._matches = hashTable.buckets[_bucket].slots(matching: _entry)
576+
let b = hashTable.buckets[_bucket]
577+
self._pattern = Entry(forHashValue: hashValue).pattern
578+
self._matches = b._slots(matching: _pattern)
575579
}
576580

577581
@inlinable
578582
internal mutating func next() -> (index: Index, found: Bool) {
579-
repeat {
583+
while true {
580584
if let slot = _matches.next() {
581585
return (Index(bucket: _bucket, slot: slot), true)
582586
}
@@ -586,8 +590,8 @@ extension _HashTable {
586590
return (Index(bucket: _bucket, slot: hole), false)
587591
}
588592
_bucket = _hashTable._succ(_bucket)
589-
_matches = _hashTable.buckets[_bucket].slots(matching: _entry)
590-
} while true
593+
_matches = _hashTable.buckets[_bucket]._slots(matching: _pattern)
594+
}
591595
}
592596
}
593597

@@ -598,7 +602,37 @@ extension _HashTable {
598602
}
599603

600604
extension _HashTable {
601-
@usableFromInline
605+
@inlinable
606+
@inline(__always)
607+
func contains<Element: Equatable>(
608+
hashValue: Int,
609+
element: Element,
610+
elements: UnsafePointer<Element>
611+
) -> Bool {
612+
var bucket = _idealBucket(forHashValue: hashValue)
613+
var b = self.buckets[bucket]
614+
let pattern = Entry(forHashValue: hashValue).pattern
615+
var matches = b._slots(matching: pattern)._shifts
616+
while true {
617+
if _fastPath(matches != 0) {
618+
let shift = matches.trailingZeroBitCount
619+
let index = Index(bucket: bucket, slotOffset: shift &>> 3)
620+
if elements[index.offset] == element { return true }
621+
matches &= matches &- 1
622+
} else if b.isFull {
623+
bucket = _succ(bucket)
624+
b = self.buckets[bucket]
625+
matches = b._slots(matching: pattern)._shifts
626+
} else {
627+
return false
628+
}
629+
}
630+
}
631+
}
632+
633+
extension _HashTable {
634+
@inlinable
635+
@inline(__always)
602636
@_effects(releasenone)
603637
internal func copyContents(of other: _HashTable) {
604638
_sanityCheck(bucketCount == other.bucketCount)
@@ -607,7 +641,8 @@ extension _HashTable {
607641

608642
/// Insert a new entry with the specified hash value into the table.
609643
/// The entry must not already exist in the table -- duplicates are ignored.
610-
@usableFromInline
644+
@inlinable
645+
@inline(__always)
611646
@_effects(releasenone)
612647
internal func insertNew(hashValue: Int) -> Index {
613648
var bucket = _idealBucket(forHashValue: hashValue)
@@ -621,14 +656,16 @@ extension _HashTable {
621656
/// Insert a new entry for an element with the specified hash value at
622657
/// `bucket`. The bucket must have been returned by `lookupFirst` or
623658
/// `lookupNext` for the same hash value, with `found == false`.
624-
@usableFromInline
659+
@inlinable
625660
@inline(__always)
626661
@_effects(releasenone)
627662
internal func insert(hashValue: Int, at index: Index) {
628663
_sanityCheck(!isOccupied(index))
629664
self[index] = Entry(forHashValue: hashValue)
630665
}
631666

667+
@inlinable
668+
@inline(__always)
632669
internal func removeAll() {
633670
buckets.assign(repeating: Bucket(0), count: bucketCount)
634671
}

stdlib/public/core/Set.swift

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1513,6 +1513,7 @@ internal class _SwiftRawSetStorage: _SwiftNativeNSSet {
15131513

15141514
/// The storage class for the singleton empty set.
15151515
/// The single instance of this class is created by the runtime.
1516+
@_fixed_layout
15161517
@usableFromInline
15171518
internal class _SwiftEmptySetStorage: _SwiftRawSetStorage {
15181519
override internal init(_doNotCallMe: ()) {
@@ -1803,7 +1804,7 @@ extension _NativeSet {
18031804
}
18041805
}
18051806

1806-
@inlinable
1807+
@usableFromInline @_transparent
18071808
internal var elements: UnsafeMutablePointer<Element> {
18081809
@inline(__always)
18091810
get {
@@ -1818,8 +1819,7 @@ extension _NativeSet {
18181819
}
18191820
}
18201821

1821-
@inlinable
1822-
@inline(__always)
1822+
@usableFromInline @_transparent
18231823
internal func uncheckedElement(at index: Index) -> Element {
18241824
_sanityCheck(hashTable.isOccupied(index))
18251825
return elements[index.offset]
@@ -1954,7 +1954,10 @@ extension _NativeSet: _SetBuffer {
19541954
// // Fast path that avoids computing the hash of the key.
19551955
// return false
19561956
// }
1957-
return find(member).found
1957+
return hashTable.contains(
1958+
hashValue: hashValue(for: member),
1959+
element: member,
1960+
elements: elements)
19581961
}
19591962

19601963
@inlinable
@@ -2963,9 +2966,17 @@ extension Set._Variant: _SetBuffer {
29632966
}
29642967
}
29652968

2969+
@usableFromInline @_transparent
2970+
internal var guaranteedNative: Bool {
2971+
return _canBeClass(Element.self) == 0
2972+
}
2973+
29662974
@inlinable
29672975
@inline(__always)
29682976
internal func contains(_ member: Element) -> Bool {
2977+
if guaranteedNative {
2978+
return asNative.contains(member)
2979+
}
29692980
switch self {
29702981
case .native:
29712982
return asNative.contains(member)
@@ -3766,20 +3777,26 @@ public typealias SetIterator<Element: Hashable> = Set<Element>.Iterator
37663777
extension Set {
37673778
// FIXME: Remove
37683779
public // @testable performance metrics
3769-
var _stats: (maxLookups: Int, averageLookups: Double, maxCollisions: Int)? {
3780+
var _stats: (maxLookups: Int, averageLookups: Double, maxCollisions: Int, averageCollisions: Double)? {
37703781
guard case .native(let native) = _variant else { return nil }
3771-
guard native.count > 0 else { return (0, 0, 0) }
3782+
guard native.count > 0 else { return (0, 0, 0, 0) }
37723783
defer { _fixLifetime(self) }
37733784
var maxLookups = 0
37743785
var sumLookups = 0
37753786
var maxCollisions = 0
3787+
var sumCollisions = 0
37763788
for i in native.indices {
37773789
let stats = native.stats(ofElementAt: i)
37783790
maxLookups = Swift.max(maxLookups, stats.displacement + 1)
37793791
sumLookups += stats.displacement + 1
37803792
maxCollisions = Swift.max(maxCollisions, stats.collisions)
3793+
sumCollisions += stats.collisions
37813794
}
3782-
return (maxLookups, Double(sumLookups) / Double(count), maxCollisions)
3795+
return (
3796+
maxLookups,
3797+
Double(sumLookups) / Double(count),
3798+
maxCollisions,
3799+
Double(sumCollisions) / Double(count))
37833800
}
37843801
}
37853802

@@ -3826,7 +3843,7 @@ extension _NativeSet {
38263843
let hashValue = self.hashValue(for: element)
38273844
let stats = self.stats(ofElementAt: i)
38283845
let h = String(UInt(bitPattern: hashValue), radix: 16)
3829-
let hl = String(hashTable[i].payload << 1, radix: 16)
3846+
let hl = String(hashTable[i].payload, radix: 16)
38303847
result += " <\(i.offset)> "
38313848
result += "delta: \(stats.displacement) (\(stats.collisions) coll) "
38323849
result += "hash: \(hl)/\(h): "

0 commit comments

Comments
 (0)