Skip to content
This repository was archived by the owner on Dec 22, 2021. It is now read-only.

Commit 777e5dc

Browse files
committed
Reimplementations of immutable HashSet and HashMap.
The reimplementations are based upon Compressed Hash-Array Mapped Prefix-trees (CHAMP), see paper "Optimizing Hash-Array Mapped Tries for Fast and Lean Immutable JVM Collections" by Steindorfer and Vinju (OOPSLA'15) for more details and descriptions of low-level performance optimizations (a pre-print of the paper is available under https://michael.steindorfer.name/publications/oopsla15.pdf). This commit closes #192. The new implementations (i.e., ChampHashSet and ChampHashMap) currently exist next to the previous HashMap and HashSet. By default immutable.Map and immutable.Set now pickup the CHAMP data structures. A JVM flag (-Dstrawman.collection.immutable.useBaseline=true) allows to switch back to the previous HashSet and HashMap implementations for testing. Note, the flag and the previous HashSet and HashMap implementations will be removed in the final version of collection-strawman, but for the time being they remain to support comparing the different trade-offs and performance characteristics of the current and the new data structures. Preliminary performance numbers of the new CHAMP data structures were presented in issue #192. Overall one can summarize that the CHAMP data structures significantly lower memory footprints and significantly improve all iteration-based operations and equality checks. Lookups slow down, but insertion and deletion also seem to benefit as well. The current state of the reimplementation does not optimize for hash-collisions yet. Note that the CHAMP design / implementation differs from the previous immutable hashed data structures by not memoizing the hash codes of the individual elements (which may change the performance of certain workloads). If necessary, CHAMP's design allows to modularly add memoized hash codes of the individual elements (at the expense of some memory savings). Details are discussed in the paper mentioned above.
1 parent 2eb7f23 commit 777e5dc

File tree

15 files changed

+2323
-20
lines changed

15 files changed

+2323
-20
lines changed
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
package strawman.collection.immutable
2+
3+
import java.util.concurrent.TimeUnit
4+
5+
import org.openjdk.jmh.annotations._
6+
import org.openjdk.jmh.infra.Blackhole
7+
8+
@BenchmarkMode(scala.Array(Mode.AverageTime))
9+
@OutputTimeUnit(TimeUnit.NANOSECONDS)
10+
@Fork(1)
11+
@Warmup(iterations = 8)
12+
@Measurement(iterations = 8)
13+
@State(Scope.Benchmark)
14+
class ChampHashSetBenchmark {
15+
@Param(scala.Array("0", "1", "2", "3", "4", "7", "8", "15", "16", "17", "39", "282", "4096", "131070", "7312102"))
16+
var size: Int = _
17+
18+
var xs: ChampHashSet[Long] = _
19+
var zs: ChampHashSet[Long] = _
20+
var zipped: ChampHashSet[(Long, Long)] = _
21+
var randomIndices: scala.Array[Int] = _
22+
def fresh(n: Int) = ChampHashSet((1 to n).map(_.toLong): _*)
23+
24+
@Setup(Level.Trial)
25+
def initTrial(): Unit = {
26+
xs = fresh(size)
27+
zs = fresh((size / 1000) max 2).map(-_)
28+
zipped = xs.map(x => (x, x))
29+
if (size > 0) {
30+
randomIndices = scala.Array.fill(1000)(scala.util.Random.nextInt(size))
31+
}
32+
}
33+
34+
@Benchmark
35+
def create(bh: Blackhole): Unit = bh.consume(fresh(size))
36+
37+
@Benchmark
38+
@OperationsPerInvocation(1000)
39+
def expand_incl(bh: Blackhole): Unit = {
40+
var ys = xs
41+
var i = 0L
42+
while (i < 1000) {
43+
ys += -i
44+
i += 1
45+
}
46+
bh.consume(ys)
47+
}
48+
49+
@Benchmark
50+
def expand_concat(bh: Blackhole): Unit = bh.consume(xs ++ zs)
51+
52+
@Benchmark
53+
def traverse_foreach(bh: Blackhole): Unit = xs.foreach(x => bh.consume(x))
54+
55+
@Benchmark
56+
def traverse_headTail(bh: Blackhole): Unit = {
57+
var ys = xs
58+
while (ys.nonEmpty) {
59+
bh.consume(ys.head)
60+
ys = ys.tail
61+
}
62+
}
63+
64+
@Benchmark
65+
def traverse_initLast(bh: Blackhole): Unit = {
66+
var ys = xs
67+
while (ys.nonEmpty) {
68+
bh.consume(ys.last)
69+
ys = ys.init
70+
}
71+
}
72+
73+
@Benchmark
74+
def traverse_iterator(bh: Blackhole): Unit = {
75+
val it = xs.iterator()
76+
while (it.hasNext) {
77+
bh.consume(it.next())
78+
}
79+
}
80+
81+
@Benchmark
82+
def traverse_foldLeft(bh: Blackhole): Unit = bh.consume(xs.foldLeft(0) {
83+
case (acc, n) =>
84+
bh.consume(n)
85+
acc + 1
86+
})
87+
88+
@Benchmark
89+
def traverse_foldRight(bh: Blackhole): Unit = bh.consume(xs.foldRight(0) {
90+
case (n, acc) =>
91+
bh.consume(n)
92+
acc - 1
93+
})
94+
@Benchmark
95+
def access_tail(bh: Blackhole): Unit = bh.consume(xs.tail)
96+
97+
@Benchmark
98+
def access_init(bh: Blackhole): Unit = bh.consume(xs.init)
99+
100+
@Benchmark
101+
@OperationsPerInvocation(100)
102+
def access_slice(bh: Blackhole): Unit = {
103+
var i = 0
104+
while (i < 100) {
105+
bh.consume(xs.slice(size - size / (i + 1), size))
106+
i += 1
107+
}
108+
}
109+
110+
@Benchmark
111+
@OperationsPerInvocation(1000)
112+
def access_contains(bh: Blackhole): Unit = {
113+
var i = 0
114+
while (i < 1000) {
115+
bh.consume(xs.contains(i))
116+
i += 1
117+
}
118+
}
119+
120+
@Benchmark
121+
def transform_map(bh: Blackhole): Unit = bh.consume(xs.map(x => x + 1))
122+
123+
@Benchmark
124+
@OperationsPerInvocation(100)
125+
def transform_span(bh: Blackhole): Unit = {
126+
var i = 0
127+
while (i < 100) {
128+
val (xs1, xs2) = xs.span(x => x < randomIndices(i))
129+
bh.consume(xs1)
130+
bh.consume(xs2)
131+
i += 1
132+
}
133+
}
134+
135+
@Benchmark
136+
def transform_zip(bh: Blackhole): Unit = bh.consume(xs.zip(xs))
137+
138+
@Benchmark
139+
def transform_zipMapTupled(bh: Blackhole): Unit = {
140+
val f = (a: Long, b: Long) => (a, b)
141+
bh.consume(xs.zip(xs).map(f.tupled))
142+
}
143+
144+
@Benchmark
145+
def transform_zipWithIndex(bh: Blackhole): Unit = bh.consume(xs.zipWithIndex)
146+
147+
@Benchmark
148+
def transform_lazyZip(bh: Blackhole): Unit = bh.consume(xs.lazyZip(xs).map((_, _)))
149+
150+
@Benchmark
151+
def transform_unzip(bh: Blackhole): Unit = bh.consume(zipped.unzip)
152+
153+
@Benchmark
154+
def transform_groupBy(bh: Blackhole): Unit = {
155+
val result = xs.groupBy(_ % 5)
156+
bh.consume(result)
157+
}
158+
}

benchmarks/time/src/main/scala/strawman/collection/immutable/HashSetBenchmark.scala

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -64,14 +64,15 @@ class HashSetBenchmark {
6464
}
6565
}
6666

67-
@Benchmark
68-
def traverse_initLast(bh: Blackhole): Unit = {
69-
var ys = xs
70-
while (ys.nonEmpty) {
71-
bh.consume(ys.last)
72-
ys = ys.init
73-
}
74-
}
67+
// // TODO: currently disabled, since it does not finish
68+
// @Benchmark
69+
// def traverse_initLast(bh: Blackhole): Unit = {
70+
// var ys = xs
71+
// while (ys.nonEmpty) {
72+
// bh.consume(ys.last)
73+
// ys = ys.init
74+
// }
75+
// }
7576

7677
@Benchmark
7778
def traverse_iterator(bh: Blackhole): Unit = {

benchmarks/time/src/main/scala/strawman/collection/immutable/ScalaHashSetBenchmark.scala

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -64,14 +64,15 @@ class ScalaHashSetBenchmark {
6464
}
6565
}
6666

67-
@Benchmark
68-
def traverse_initLast(bh: Blackhole): Unit = {
69-
var ys = xs
70-
while (ys.nonEmpty) {
71-
bh.consume(ys.last)
72-
ys = ys.init
73-
}
74-
}
67+
// // TODO: currently disabled, since it does not finish
68+
// @Benchmark
69+
// def traverse_initLast(bh: Blackhole): Unit = {
70+
// var ys = xs
71+
// while (ys.nonEmpty) {
72+
// bh.consume(ys.last)
73+
// ys = ys.init
74+
// }
75+
// }
7576

7677
@Benchmark
7778
def traverse_iterator(bh: Blackhole): Unit = {

build.sbt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ val commonSettings = Seq(
3939
<developer><id>odersky</id><name>Martin Odersky</name></developer>
4040
<developer><id>julienrf</id><name>Julien Richard-Foy</name></developer>
4141
<developer><id>szeiger</id><name>Stefan Zeiger</name></developer>
42+
<developer><id>msteindorfer</id><name>Michael J. Steindorfer</name></developer>
4243
</developers>,
4344
// For publishing snapshots
4445
credentials ++= (

0 commit comments

Comments
 (0)