Skip to content
This repository was archived by the owner on Dec 22, 2021. It is now read-only.

Commit 566b143

Browse files
committed
Reimplementations of immutable HashSet and HashMap.
The reimplementations are based upon Compressed Hash-Array Mapped Prefix-trees (CHAMP), see paper "Optimizing Hash-Array Mapped Tries for Fast and Lean Immutable JVM Collections" by Steindorfer and Vinju (OOPSLA'15) for more details and descriptions of low-level performance optimizations (a pre-print of the paper is available under https://michael.steindorfer.name/publications/oopsla15.pdf). This commit closes #192. The new implementations (i.e., ChampHashSet and ChampHashMap) currently exist next to the previous HashMap and HashSet. By default immutable.Map and immutable.Set now pickup the CHAMP data structures. A JVM flag (-Dstrawman.collection.immutable.useBaseline=true) allows to switch back to the previous HashSet and HashMap implementations for testing. Note, the flag and the previous HashSet and HashMap implementations will be removed in the final version of collection-strawman, but for the time being they remain to support comparing the different trade-offs and performance characteristics of the current and the new data structures. Preliminary performance numbers of the new CHAMP data structures were presented in issue #192. Overall one can summarize that the CHAMP data structures significantly lower memory footprints and significantly improve all iteration-based operations and equality checks. Basic operations such as lookup, insertion, and deletion may slow down. The current state of the reimplementation does not optimize for hash-collisions yet. Note that the CHAMP design / implementation differs from the previous immutable hashed data structures by not memoizing the hash codes of the individual elements (which may change the performance of certain workloads). If necessary, CHAMP's design allows to modularly add memoized hash codes of the individual elements (at the expense of some memory savings). Details are discussed in the paper mentioned above.
1 parent 2eb7f23 commit 566b143

File tree

15 files changed

+2429
-20
lines changed

15 files changed

+2429
-20
lines changed
Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
package strawman.collection.immutable
2+
3+
import java.util.concurrent.TimeUnit
4+
5+
import org.openjdk.jmh.annotations._
6+
import org.openjdk.jmh.infra.Blackhole
7+
8+
@BenchmarkMode(scala.Array(Mode.AverageTime))
9+
@OutputTimeUnit(TimeUnit.NANOSECONDS)
10+
@Fork(1)
11+
@Warmup(iterations = 8)
12+
@Measurement(iterations = 8)
13+
@State(Scope.Benchmark)
14+
class ChampHashSetBenchmark {
15+
@Param(scala.Array("0", "1", "2", "3", "4", "7", "8", "15", "16", "17", "39", "282", "4096", "131070", "7312102"))
16+
var size: Int = _
17+
18+
var xs: ChampHashSet[Long] = _
19+
var ys: ChampHashSet[Long] = _
20+
var zs: ChampHashSet[Long] = _
21+
var zipped: ChampHashSet[(Long, Long)] = _
22+
var randomIndices: scala.Array[Int] = _
23+
def fresh(n: Int) = ChampHashSet((1 to n).map(_.toLong): _*)
24+
25+
@Setup(Level.Trial)
26+
def initTrial(): Unit = {
27+
xs = fresh(size)
28+
ys = fresh(size)
29+
zs = fresh((size / 1000) max 2).map(-_)
30+
zipped = xs.map(x => (x, x))
31+
if (size > 0) {
32+
randomIndices = scala.Array.fill(1000)(scala.util.Random.nextInt(size))
33+
}
34+
}
35+
36+
@Benchmark
37+
def create(bh: Blackhole): Unit = bh.consume(fresh(size))
38+
39+
@Benchmark
40+
@OperationsPerInvocation(1000)
41+
def expand_incl(bh: Blackhole): Unit = {
42+
var ys = xs
43+
var i = 0L
44+
while (i < 1000) {
45+
ys += -i
46+
i += 1
47+
}
48+
bh.consume(ys)
49+
}
50+
51+
@Benchmark
52+
def expand_concat(bh: Blackhole): Unit = bh.consume(xs ++ zs)
53+
54+
@Benchmark
55+
def traverse_foreach(bh: Blackhole): Unit = xs.foreach(x => bh.consume(x))
56+
57+
@Benchmark
58+
def traverse_headTail(bh: Blackhole): Unit = {
59+
var ys = xs
60+
while (ys.nonEmpty) {
61+
bh.consume(ys.head)
62+
ys = ys.tail
63+
}
64+
}
65+
66+
@Benchmark
67+
def traverse_initLast(bh: Blackhole): Unit = {
68+
var ys = xs
69+
while (ys.nonEmpty) {
70+
bh.consume(ys.last)
71+
ys = ys.init
72+
}
73+
}
74+
75+
@Benchmark
76+
def traverse_iterator(bh: Blackhole): Unit = {
77+
val it = xs.iterator()
78+
while (it.hasNext) {
79+
bh.consume(it.next())
80+
}
81+
}
82+
83+
@Benchmark
84+
def traverse_foldLeft(bh: Blackhole): Unit = bh.consume(xs.foldLeft(0) {
85+
case (acc, n) =>
86+
bh.consume(n)
87+
acc + 1
88+
})
89+
90+
@Benchmark
91+
def traverse_foldRight(bh: Blackhole): Unit = bh.consume(xs.foldRight(0) {
92+
case (n, acc) =>
93+
bh.consume(n)
94+
acc - 1
95+
})
96+
@Benchmark
97+
def access_tail(bh: Blackhole): Unit = bh.consume(xs.tail)
98+
99+
@Benchmark
100+
def access_init(bh: Blackhole): Unit = bh.consume(xs.init)
101+
102+
@Benchmark
103+
@OperationsPerInvocation(100)
104+
def access_slice(bh: Blackhole): Unit = {
105+
var i = 0
106+
while (i < 100) {
107+
bh.consume(xs.slice(size - size / (i + 1), size))
108+
i += 1
109+
}
110+
}
111+
112+
@Benchmark
113+
@OperationsPerInvocation(1000)
114+
def access_contains(bh: Blackhole): Unit = {
115+
var i = 0
116+
while (i < 1000) {
117+
bh.consume(xs.contains(i))
118+
i += 1
119+
}
120+
}
121+
122+
@Benchmark
123+
def transform_map(bh: Blackhole): Unit = bh.consume(xs.map(x => x + 1))
124+
125+
@Benchmark
126+
@OperationsPerInvocation(100)
127+
def transform_span(bh: Blackhole): Unit = {
128+
var i = 0
129+
while (i < 100) {
130+
val (xs1, xs2) = xs.span(x => x < randomIndices(i))
131+
bh.consume(xs1)
132+
bh.consume(xs2)
133+
i += 1
134+
}
135+
}
136+
137+
@Benchmark
138+
def transform_zip(bh: Blackhole): Unit = bh.consume(xs.zip(xs))
139+
140+
@Benchmark
141+
def transform_zipMapTupled(bh: Blackhole): Unit = {
142+
val f = (a: Long, b: Long) => (a, b)
143+
bh.consume(xs.zip(xs).map(f.tupled))
144+
}
145+
146+
@Benchmark
147+
def transform_zipWithIndex(bh: Blackhole): Unit = bh.consume(xs.zipWithIndex)
148+
149+
@Benchmark
150+
def transform_lazyZip(bh: Blackhole): Unit = bh.consume(xs.lazyZip(xs).map((_, _)))
151+
152+
@Benchmark
153+
def transform_unzip(bh: Blackhole): Unit = bh.consume(zipped.unzip)
154+
155+
@Benchmark
156+
def transform_groupBy(bh: Blackhole): Unit = {
157+
val result = xs.groupBy(_ % 5)
158+
bh.consume(result)
159+
}
160+
161+
@Benchmark
162+
def traverse_subsetOf(bh: Blackhole): Unit = bh.consume(ys.subsetOf(xs))
163+
164+
@Benchmark
165+
def traverse_equals(bh: Blackhole): Unit = bh.consume(xs == ys)
166+
167+
}

benchmarks/time/src/main/scala/strawman/collection/immutable/HashSetBenchmark.scala

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ class HashSetBenchmark {
1919
var size: Int = _
2020

2121
var xs: HashSet[Long] = _
22+
var ys: HashSet[Long] = _
2223
var zs: HashSet[Long] = _
2324
var zipped: HashSet[(Long, Long)] = _
2425
var randomIndices: scala.Array[Int] = _
@@ -27,6 +28,7 @@ class HashSetBenchmark {
2728
@Setup(Level.Trial)
2829
def initTrial(): Unit = {
2930
xs = fresh(size)
31+
ys = fresh(size)
3032
zs = fresh((size / 1000) max 2).map(-_)
3133
zipped = xs.map(x => (x, x))
3234
if (size > 0) {
@@ -64,14 +66,15 @@ class HashSetBenchmark {
6466
}
6567
}
6668

67-
@Benchmark
68-
def traverse_initLast(bh: Blackhole): Unit = {
69-
var ys = xs
70-
while (ys.nonEmpty) {
71-
bh.consume(ys.last)
72-
ys = ys.init
73-
}
74-
}
69+
// // TODO: currently disabled, since it does not finish
70+
// @Benchmark
71+
// def traverse_initLast(bh: Blackhole): Unit = {
72+
// var ys = xs
73+
// while (ys.nonEmpty) {
74+
// bh.consume(ys.last)
75+
// ys = ys.init
76+
// }
77+
// }
7578

7679
@Benchmark
7780
def traverse_iterator(bh: Blackhole): Unit = {
@@ -158,4 +161,11 @@ class HashSetBenchmark {
158161
val result = xs.groupBy(_ % 5)
159162
bh.consume(result)
160163
}
164+
165+
@Benchmark
166+
def traverse_subsetOf(bh: Blackhole): Unit = bh.consume(ys.subsetOf(xs))
167+
168+
@Benchmark
169+
def traverse_equals(bh: Blackhole): Unit = bh.consume(xs == ys)
170+
161171
}

benchmarks/time/src/main/scala/strawman/collection/immutable/ScalaHashSetBenchmark.scala

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ class ScalaHashSetBenchmark {
1919
var size: Int = _
2020

2121
var xs: scala.collection.immutable.HashSet[Long] = _
22+
var ys: scala.collection.immutable.HashSet[Long] = _
2223
var zs: scala.collection.immutable.HashSet[Long] = _
2324
var zipped: scala.collection.immutable.HashSet[(Long, Long)] = _
2425
var randomIndices: scala.Array[Int] = _
@@ -27,6 +28,7 @@ class ScalaHashSetBenchmark {
2728
@Setup(Level.Trial)
2829
def initTrial(): Unit = {
2930
xs = fresh(size)
31+
ys = fresh(size)
3032
zs = fresh((size / 1000) max 2).map(-_)
3133
zipped = xs.map(x => (x, x))
3234
if (size > 0) {
@@ -64,14 +66,15 @@ class ScalaHashSetBenchmark {
6466
}
6567
}
6668

67-
@Benchmark
68-
def traverse_initLast(bh: Blackhole): Unit = {
69-
var ys = xs
70-
while (ys.nonEmpty) {
71-
bh.consume(ys.last)
72-
ys = ys.init
73-
}
74-
}
69+
// // TODO: currently disabled, since it does not finish
70+
// @Benchmark
71+
// def traverse_initLast(bh: Blackhole): Unit = {
72+
// var ys = xs
73+
// while (ys.nonEmpty) {
74+
// bh.consume(ys.last)
75+
// ys = ys.init
76+
// }
77+
// }
7578

7679
@Benchmark
7780
def traverse_iterator(bh: Blackhole): Unit = {
@@ -158,4 +161,11 @@ class ScalaHashSetBenchmark {
158161
val result = xs.groupBy(_ % 5)
159162
bh.consume(result)
160163
}
164+
165+
@Benchmark
166+
def traverse_subsetOf(bh: Blackhole): Unit = bh.consume(ys.subsetOf(xs))
167+
168+
@Benchmark
169+
def traverse_equals(bh: Blackhole): Unit = bh.consume(xs == ys)
170+
161171
}

build.sbt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ val commonSettings = Seq(
3939
<developer><id>odersky</id><name>Martin Odersky</name></developer>
4040
<developer><id>julienrf</id><name>Julien Richard-Foy</name></developer>
4141
<developer><id>szeiger</id><name>Stefan Zeiger</name></developer>
42+
<developer><id>msteindorfer</id><name>Michael J. Steindorfer</name></developer>
4243
</developers>,
4344
// For publishing snapshots
4445
credentials ++= (

0 commit comments

Comments
 (0)