Skip to content

[benchmark] Fix benchmarks for Set operations #18928

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Aug 24, 2018
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
283 changes: 124 additions & 159 deletions benchmark/single-source/SetTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,107 +12,116 @@

import TestsUtils

public let SetTests = [
BenchmarkInfo(name: "SetExclusiveOr", runFunction: run_SetExclusiveOr, tags: [.validation, .api, .Set]),
BenchmarkInfo(name: "SetExclusiveOr_OfObjects", runFunction: run_SetExclusiveOr_OfObjects, tags: [.validation, .api, .Set]),
BenchmarkInfo(name: "SetIntersect", runFunction: run_SetIntersect, tags: [.validation, .api, .Set]),
BenchmarkInfo(name: "SetIntersect_OfObjects", runFunction: run_SetIntersect_OfObjects, tags: [.validation, .api, .Set]),
BenchmarkInfo(name: "SetIsSubsetOf", runFunction: run_SetIsSubsetOf, tags: [.validation, .api, .Set]),
BenchmarkInfo(name: "SetIsSubsetOf_OfObjects", runFunction: run_SetIsSubsetOf_OfObjects, tags: [.validation, .api, .Set]),
BenchmarkInfo(name: "SetUnion", runFunction: run_SetUnion, tags: [.validation, .api, .Set]),
BenchmarkInfo(name: "SetUnion_OfObjects", runFunction: run_SetUnion_OfObjects, tags: [.validation, .api, .Set]),
]

@inline(never)
public func run_SetIsSubsetOf(_ N: Int) {
let size = 200
let size = 400
let overlap = 100

SRand()
let setAB = Set(0 ..< size)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was drawing number lines like the dummy that I am… a little bit of spelling out would help me grok the correctness of this much sooner. What do you think?

let setAB = Set(0 ..< size)                            // 0...399
let setCD = Set(size ..< 2 * size)                     // 400...799
let setBC = Set(size - overlap ..< 2 * size - overlap) // 300...699
let setB = Set(size - overlap ..< size)                // 300...399

let setCD = Set(size ..< 2 * size)
let setBC = Set(size - overlap ..< 2 * size - overlap)
let setB = Set(size - overlap ..< size)

var set = Set<Int>(minimumCapacity: size)
var otherSet = Set<Int>(minimumCapacity: size)
let setOAB = Set(setAB.map(Box.init))
let setOCD = Set(setCD.map(Box.init))
let setOBC = Set(setBC.map(Box.init))
let setOB = Set(setB.map(Box.init))

for _ in 0 ..< size {
set.insert(Int(truncatingIfNeeded: Random()))
otherSet.insert(Int(truncatingIfNeeded: Random()))
}

var isSubset = false
for _ in 0 ..< N * 5000 {
isSubset = set.isSubset(of: otherSet)
if isSubset {
break
}
}
let countAC = 2 * (size - overlap)
let countABC = 2 * size - overlap
let countABCD = 2 * size
let countB = overlap

CheckResults(!isSubset)
}
public let SetTests = [
BenchmarkInfo(
name: "SetExclusiveOr2",
runFunction: { n in run_SetExclusiveOr(setAB, setBC, countAC, 10 * n) },
tags: [.validation, .api, .Set],
setUpFunction: { blackHole([setAB, setBC]) }),
BenchmarkInfo(
name: "SetExclusiveOr2_OfObjects",
runFunction: { n in run_SetExclusiveOr_OfObjects(setOAB, setOBC, countAC, 10 * n) },
Copy link
Contributor

@palimondo palimondo Aug 24, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm skeptical that the same multiplier — 10 * n — for objects as for Ints woudn't overshoot the healthy runtime. Do you aim for under 1ms (1000 μs) runtimes? I see these are much lower then original, so it might just work out… Do you want to keep the multipliers same to show the cost of going from Ints to Boxes? That would be great if their difference isn't bigger than 100x… 🤨
Also, conventionally the variable has been called with capital N, IMO it would make sense to not break with tradition here.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The difference between Int and Box<Int> is relatively small; it's merely an extra indirection. It is important to remain able to keep track of the difference, though.

Capitalized names for function parameters is a silly tradition; I'm happy to be the barbarian who breaks it. ;-)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Oh, beyond the indirection, Box also hashes a bit slower than Int since it's not forwarding the one-shot hashing implementation.)

tags: [.validation, .api, .Set],
setUpFunction: { blackHole([setOAB, setOBC]) }),

BenchmarkInfo(
name: "SetIntersect2",
runFunction: { n in run_SetIntersect(setAB, setBC, countB, 10 * n) },
tags: [.validation, .api, .Set],
setUpFunction: { blackHole([setAB, setBC]) }),
BenchmarkInfo(
name: "SetIntersect2_OfObjects",
runFunction: { n in run_SetIntersect_OfObjects(setOAB, setOBC, countB, 10 * n) },
tags: [.validation, .api, .Set],
setUpFunction: { blackHole([setOAB, setOBC]) }),

BenchmarkInfo(
name: "SetIsSubsetOf2",
runFunction: { n in run_SetIsSubsetOf(setB, setAB, true, 50 * n) },
tags: [.validation, .api, .Set],
setUpFunction: { blackHole([setB, setAB]) }),
BenchmarkInfo(
name: "SetIsSubsetOf2_OfObjects",
runFunction: { n in run_SetIsSubsetOf_OfObjects(setOB, setOAB, true, 50 * n) },
tags: [.validation, .api, .Set],
setUpFunction: { blackHole([setOB, setOAB]) }),

BenchmarkInfo(
name: "SetUnion2",
runFunction: { n in run_SetUnion(setAB, setBC, countABC, 10 * n) },
tags: [.validation, .api, .Set],
setUpFunction: { blackHole([setAB, setBC]) }),
BenchmarkInfo(
name: "SetUnion2_OfObjects",
runFunction: { n in run_SetUnion_OfObjects(setOAB, setOBC, countABC, 10 * n) },
tags: [.validation, .api, .Set],
setUpFunction: { blackHole([setOAB, setOBC]) }),
]

@inline(never)
func sink(_ s: inout Set<Int>) {
public func run_SetIsSubsetOf(
_ a: Set<Int>,
_ b: Set<Int>,
_ r: Bool,
_ n: Int) {
for _ in 0 ..< n {
let isSubset = a.isSubset(of: identity(b))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One more thing… could you document what's the purpose of wrapping b in identity here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be a relatively common idiom in these benchmarks -- it's there to prevent the compiler from moving things out of the loop. a and b are constant through all iterations, and in theory, a sufficiently smart compiler could optimize some/all of it away. Adding an opaque function call with unknown effects (hopefully) defeats these optimizations.

(I don't think any optimizations would apply in these cases, but it's better to be safe than sorry.)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That explanation makes perfect sense. How about putting it in the source comment? That way it’s easier to point people unfamiliar with the idiom to this file as an example of best practice (link from docs, when we get to it).

CheckResults(isSubset == r)
}
}

@inline(never)
public func run_SetExclusiveOr(_ N: Int) {
let size = 400

SRand()

var set = Set<Int>(minimumCapacity: size)
var otherSet = Set<Int>(minimumCapacity: size)

for _ in 0 ..< size {
set.insert(Int(truncatingIfNeeded: Random()))
otherSet.insert(Int(truncatingIfNeeded: Random()))
}

var xor = Set<Int>()
for _ in 0 ..< N * 100 {
xor = set.symmetricDifference(otherSet)
public func run_SetExclusiveOr(
_ a: Set<Int>,
_ b: Set<Int>,
_ r: Int,
_ n: Int) {
for _ in 0 ..< n {
let diff = a.symmetricDifference(identity(b))
CheckResults(diff.count == r)
}
sink(&xor)
}

@inline(never)
public func run_SetUnion(_ N: Int) {
let size = 400

SRand()

var set = Set<Int>(minimumCapacity: size)
var otherSet = Set<Int>(minimumCapacity: size)

for _ in 0 ..< size {
set.insert(Int(truncatingIfNeeded: Random()))
otherSet.insert(Int(truncatingIfNeeded: Random()))
public func run_SetUnion(
_ a: Set<Int>,
_ b: Set<Int>,
_ r: Int,
_ n: Int) {
for _ in 0 ..< n {
let or = a.union(identity(b))
CheckResults(or.count == r)
}

var or = Set<Int>()
for _ in 0 ..< N * 100 {
or = set.union(otherSet)
}
sink(&or)
}

@inline(never)
public func run_SetIntersect(_ N: Int) {
let size = 400

SRand()

var set = Set<Int>(minimumCapacity: size)
var otherSet = Set<Int>(minimumCapacity: size)

for _ in 0 ..< size {
set.insert(Int(truncatingIfNeeded: Random()))
otherSet.insert(Int(truncatingIfNeeded: Random()))
}

var and = Set<Int>()
for _ in 0 ..< N * 100 {
and = set.intersection(otherSet)
public func run_SetIntersect(
_ a: Set<Int>,
_ b: Set<Int>,
_ r: Int,
_ n: Int) {
for _ in 0 ..< n {
let and = a.intersection(identity(b))
CheckResults(and.count == r)
}
sink(&and)
}

class Box<T : Hashable> : Hashable {
Expand All @@ -132,93 +141,49 @@ class Box<T : Hashable> : Hashable {
}

@inline(never)
public func run_SetIsSubsetOf_OfObjects(_ N: Int) {
let size = 200

SRand()

var set = Set<Box<Int>>(minimumCapacity: size)
var otherSet = Set<Box<Int>>(minimumCapacity: size)

for _ in 0 ..< size {
set.insert(Box(Int(truncatingIfNeeded: Random())))
otherSet.insert(Box(Int(truncatingIfNeeded: Random())))
func run_SetIsSubsetOf_OfObjects(
_ a: Set<Box<Int>>,
_ b: Set<Box<Int>>,
_ r: Bool,
_ n: Int) {
for _ in 0 ..< n {
let isSubset = a.isSubset(of: identity(b))
CheckResults(isSubset == r)
}

var isSubset = false
for _ in 0 ..< N * 5000 {
isSubset = set.isSubset(of: otherSet)
if isSubset {
break
}
}

CheckResults(!isSubset)
}

@inline(never)
func sink(_ s: inout Set<Box<Int>>) {
}

@inline(never)
public func run_SetExclusiveOr_OfObjects(_ N: Int) {
let size = 400

SRand()

var set = Set<Box<Int>>(minimumCapacity: size)
var otherSet = Set<Box<Int>>(minimumCapacity: size)

for _ in 0 ..< size {
set.insert(Box(Int(truncatingIfNeeded: Random())))
otherSet.insert(Box(Int(truncatingIfNeeded: Random())))
func run_SetExclusiveOr_OfObjects(
_ a: Set<Box<Int>>,
_ b: Set<Box<Int>>,
_ r: Int,
_ n: Int) {
for _ in 0 ..< n {
let diff = a.symmetricDifference(identity(b))
CheckResults(diff.count == r)
}

var xor = Set<Box<Int>>()
for _ in 0 ..< N * 100 {
xor = set.symmetricDifference(otherSet)
}
sink(&xor)
}

@inline(never)
public func run_SetUnion_OfObjects(_ N: Int) {
let size = 400

SRand()

var set = Set<Box<Int>>(minimumCapacity: size)
var otherSet = Set<Box<Int>>(minimumCapacity: size)

for _ in 0 ..< size {
set.insert(Box(Int(truncatingIfNeeded: Random())))
otherSet.insert(Box(Int(truncatingIfNeeded: Random())))
}

var or = Set<Box<Int>>()
for _ in 0 ..< N * 100 {
or = set.union(otherSet)
func run_SetUnion_OfObjects(
_ a: Set<Box<Int>>,
_ b: Set<Box<Int>>,
_ r: Int,
_ n: Int) {
for _ in 0 ..< n {
let or = a.union(identity(b))
CheckResults(or.count == r)
}
sink(&or)
}

@inline(never)
public func run_SetIntersect_OfObjects(_ N: Int) {
let size = 400

SRand()

var set = Set<Box<Int>>(minimumCapacity: size)
var otherSet = Set<Box<Int>>(minimumCapacity: size)

for _ in 0 ..< size {
set.insert(Box(Int(truncatingIfNeeded: Random())))
otherSet.insert(Box(Int(truncatingIfNeeded: Random())))
}

var and = Set<Box<Int>>()
for _ in 0 ..< N * 100 {
and = set.intersection(otherSet)
func run_SetIntersect_OfObjects(
_ a: Set<Box<Int>>,
_ b: Set<Box<Int>>,
_ r: Int,
_ n: Int) {
for _ in 0 ..< n {
let and = a.intersection(b)
CheckResults(and.count == r)
}
sink(&and)
}