Skip to content

Commit 52fa084

Browse files
committed
Upgrade to Scala v2.13
Key challenges with the upgrade: * The [`scala-io`](https://github.com/jesseeichar/scala-io) library is not available for Scala 2.13 (I had custom-built the Scala 2.12 version, but it was too much effort to get it to 2.13). `scala-io` was providing several useful features: * streaming reading of character data with line-splitting that *preserves line-breaks* (because I don't want the BFG to change the line-breaks in your files while doing `--replace-text`). Finding a replacement library to do this wasn't easy - so much code seems to be based on `java.io.BufferedReader`, which throws line-breaks away. `java.util.Scanner` can _maybe_ do it, but I couldn't get it to work! In the end, I implemented my own solution, a tiny library with a fairly decent set of tests: https://github.com/rtyley/line-break-preserving-line-splitting * resource management - mostly replaced by using Scala 2.13's [`Using`](https://www.scala-lang.org/api/current/scala/util/Using$.html). * File-path types and operations - mostly now performed by java.nio & Guava functions. * Scala 2.13 has a [revised collections framework](https://docs.scala-lang.org/overviews/core/collections-migration-213.html), so: * BFG has its own `ConcurrentSet` which needs to be implemented in the new fashion * `Traversable` becomes `Iterable`, etc
1 parent 73eabaf commit 52fa084

33 files changed

+309
-212
lines changed

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
language: scala
22
dist: xenial
33
scala:
4-
- 2.12.12
4+
- 2.13.4
55

66
jdk:
77
- openjdk8

bfg-benchmark/build.sbt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ import Dependencies._
22

33
libraryDependencies ++= Seq(
44
madgagCompress,
5-
scalaIoFile,
65
textmatching,
76
scopt
87
)

bfg-library/build.sbt

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,14 @@
11
import Dependencies._
22

3-
libraryDependencies ++= guava :+ scalaIoFile :+ textmatching :+ scalaGit :+ jgit :+ slf4jSimple :+ scalaGitTest % "test"
3+
libraryDependencies ++= guava ++ Seq(
4+
parCollections,
5+
scalaCollectionPlus,
6+
textmatching,
7+
scalaGit,
8+
jgit,
9+
slf4jSimple,
10+
lineSplitting,
11+
scalaGitTest % Test,
12+
"org.apache.commons" % "commons-text" % "1.9" % Test
13+
)
414

bfg-library/src/main/scala/com/madgag/collection/concurrent/ConcurrentMultiMap.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
package com.madgag.collection.concurrent
2222

23+
import com.madgag.scala.collection.decorators._
2324

2425
class ConcurrentMultiMap[A, B] {
2526

@@ -34,5 +35,5 @@ class ConcurrentMultiMap[A, B] {
3435
this
3536
}
3637

37-
def toMap: Map[A, Set[B]] = m.toMap.mapValues(_.toSet)
38+
def toMap: Map[A, Set[B]] = m.toMap.mapV(_.toSet)
3839
}

bfg-library/src/main/scala/com/madgag/collection/concurrent/ConcurrentSet.scala

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,29 +20,53 @@
2020

2121
package com.madgag.collection.concurrent
2222

23-
import scala.collection.mutable.{Set, SetLike}
23+
import scala.collection.mutable.{AbstractSet, SetOps}
24+
import scala.collection.{IterableFactory, IterableFactoryDefaults, mutable}
2425

25-
26-
class ConcurrentSet[A] extends Set[A] with SetLike[A, ConcurrentSet[A]] {
26+
class ConcurrentSet[A]()
27+
extends AbstractSet[A]
28+
with SetOps[A, ConcurrentSet, ConcurrentSet[A]]
29+
with IterableFactoryDefaults[A, ConcurrentSet]
30+
{
2731

2832
val m: collection.concurrent.Map[A, Boolean] = collection.concurrent.TrieMap.empty
2933

30-
override def +=(elem: A): this.type = {
34+
override def iterableFactory: IterableFactory[ConcurrentSet] = ConcurrentSet
35+
36+
override def clear(): Unit = m.clear()
37+
38+
override def addOne(elem: A): ConcurrentSet.this.type = {
3139
m.put(elem, true)
3240
this
3341
}
3442

35-
override def -=(elem: A): this.type = {
43+
override def subtractOne(elem: A): ConcurrentSet.this.type = {
3644
m.remove(elem)
3745
this
3846
}
3947

40-
override def empty: this.type = {
41-
m.empty
42-
this
43-
}
44-
4548
override def contains(elem: A): Boolean = m.contains(elem)
4649

4750
override def iterator: Iterator[A] = m.keysIterator
51+
52+
}
53+
54+
object ConcurrentSet extends IterableFactory[ConcurrentSet] {
55+
56+
@transient
57+
private final val EmptySet = new ConcurrentSet()
58+
59+
def empty[A]: ConcurrentSet[A] = EmptySet.asInstanceOf[ConcurrentSet[A]]
60+
61+
def from[A](source: collection.IterableOnce[A]): ConcurrentSet[A] =
62+
source match {
63+
case hs: ConcurrentSet[A] => hs
64+
case _ if source.knownSize == 0 => empty[A]
65+
case _ => (newBuilder[A] ++= source).result()
66+
}
67+
68+
/** Create a new Builder which can be reused after calling `result()` without an
69+
* intermediate call to `clear()` in order to build multiple related results.
70+
*/
71+
def newBuilder[A]: mutable.Builder[A, ConcurrentSet[A]] = ???
4872
}

bfg-library/src/main/scala/com/madgag/git/LFS.scala

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,34 +20,34 @@
2020

2121
package com.madgag.git
2222

23-
import java.nio.charset.Charset
24-
import java.security.{DigestOutputStream, MessageDigest}
25-
2623
import com.google.common.base.Splitter
2724
import com.madgag.git.bfg.model.FileName
2825
import org.apache.commons.codec.binary.Hex._
2926
import org.eclipse.jgit.lib.ObjectLoader
3027

31-
import scala.collection.JavaConverters._
32-
import scalax.file.Path
33-
import scalax.file.defaultfs.DefaultPath
28+
import java.nio.charset.Charset
29+
import java.nio.charset.StandardCharsets.UTF_8
30+
import java.nio.file.{Files, Path}
31+
import java.security.{DigestOutputStream, MessageDigest}
32+
import scala.jdk.CollectionConverters._
33+
import scala.util.Using
3434

3535
object LFS {
3636

37-
val ObjectsPath = Path("lfs" , "objects")
37+
val ObjectsPath: Seq[String] = Seq("lfs" , "objects")
3838

39-
val PointerCharset = Charset.forName("UTF-8")
39+
val PointerCharset: Charset = UTF_8
4040

4141
case class Pointer(shaHex: String, blobSize: Long) {
4242

43-
lazy val text = s"""|version https://git-lfs.github.com/spec/v1
44-
|oid sha256:$shaHex
45-
|size $blobSize
46-
|""".stripMargin
43+
lazy val text: String = s"""|version https://git-lfs.github.com/spec/v1
44+
|oid sha256:$shaHex
45+
|size $blobSize
46+
|""".stripMargin
4747

48-
lazy val bytes = text.getBytes(PointerCharset)
48+
lazy val bytes: Array[Byte] = text.getBytes(PointerCharset)
4949

50-
lazy val path = Path(shaHex.substring(0, 2), shaHex.substring(2, 4), shaHex)
50+
lazy val path: Seq[String] = Seq(shaHex.substring(0, 2), shaHex.substring(2, 4), shaHex)
5151
}
5252

5353
object Pointer {
@@ -65,12 +65,12 @@ object LFS {
6565

6666
val GitAttributesFileName = FileName(".gitattributes")
6767

68-
def pointerFor(loader: ObjectLoader, tmpFile: DefaultPath) = {
68+
def pointerFor(loader: ObjectLoader, tmpFile: Path) = {
6969
val digest = MessageDigest.getInstance("SHA-256")
7070

71-
for {
72-
outStream <- tmpFile.outputStream()
73-
} loader.copyTo(new DigestOutputStream(outStream, digest))
71+
Using(Files.newOutputStream(tmpFile)) { outStream =>
72+
loader.copyTo(new DigestOutputStream(outStream, digest))
73+
}
7474

7575
Pointer(encodeHexString(digest.digest()), loader.getSize)
7676
}

bfg-library/src/main/scala/com/madgag/git/bfg/GitUtil.scala

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,11 @@ import org.eclipse.jgit.lib._
3030
import org.eclipse.jgit.revwalk.RevWalk
3131
import org.eclipse.jgit.storage.file.WindowCacheConfig
3232

33-
import scala.collection.convert.ImplicitConversionsToScala._
33+
import scala.jdk.CollectionConverters._
34+
import scala.jdk.StreamConverters._
3435
import scala.language.implicitConversions
3536

37+
3638
trait CleaningMapper[V] extends Cleaner[V] {
3739
def isDirty(v: V) = apply(v) != v
3840

@@ -51,7 +53,7 @@ object GitUtil {
5153

5254
val ProbablyNoNonFileObjectsOverSizeThreshold: Long = 1024 * 1024
5355

54-
def tweakStaticJGitConfig(massiveNonFileObjects: Option[Long]) {
56+
def tweakStaticJGitConfig(massiveNonFileObjects: Option[Long]): Unit = {
5557
val wcConfig: WindowCacheConfig = new WindowCacheConfig()
5658
wcConfig.setStreamFileThreshold(Ints.saturatedCast(massiveNonFileObjects.getOrElse(ProbablyNoNonFileObjectsOverSizeThreshold)))
5759
wcConfig.install()
@@ -62,22 +64,22 @@ object GitUtil {
6264
implicit val revWalk = new RevWalk(repo)
6365
implicit val objectReader = revWalk.getObjectReader
6466

65-
repo.getAllRefs.values.map(_.getObjectId).filter(_.open.getType == Constants.OBJ_COMMIT)
66-
.map(_.asRevCommit).exists(_.getFooterLines(FormerCommitFooter.Key).nonEmpty)
67+
repo.getAllRefs.values().stream().toScala(Seq).map(_.getObjectId).filter(_.open.getType == Constants.OBJ_COMMIT)
68+
.map(_.asRevCommit).exists(_.getFooterLines(FormerCommitFooter.Key).asScala.nonEmpty)
6769
}
6870

6971
implicit def cleaner2CleaningMapper[V](f: Cleaner[V]): CleaningMapper[V] = new CleaningMapper[V] {
7072
def apply(v: V) = f(v)
7173
}
7274

73-
def biggestBlobs(implicit objectDB: ObjectDirectory, progressMonitor: ProgressMonitor = NullProgressMonitor.INSTANCE): Stream[SizedObject] = {
75+
def biggestBlobs(implicit objectDB: ObjectDirectory, progressMonitor: ProgressMonitor = NullProgressMonitor.INSTANCE): LazyList[SizedObject] = {
7476
Timing.measureTask("Scanning packfile for large blobs", ProgressMonitor.UNKNOWN) {
7577
val reader = objectDB.newReader
7678
objectDB.packedObjects.map {
7779
objectId =>
7880
progressMonitor update 1
7981
SizedObject(objectId, reader.getObjectSize(objectId, OBJ_ANY))
80-
}.toSeq.sorted.reverse.toStream.filter { oid =>
82+
}.toSeq.sorted.reverse.to(LazyList).filter { oid =>
8183
oid.size > ProbablyNoNonFileObjectsOverSizeThreshold || reader.open(oid.objectId).getType == OBJ_BLOB
8284
}
8385
}

bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/BlobCharsetDetector.scala

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,36 +20,38 @@
2020

2121
package com.madgag.git.bfg.cleaner
2222

23-
import java.nio.ByteBuffer
24-
import java.nio.charset.Charset
25-
import java.nio.charset.CodingErrorAction._
26-
23+
import com.google.common.io.ByteStreams
24+
import com.google.common.io.ByteStreams.toByteArray
2725
import com.madgag.git.bfg.model.TreeBlobEntry
2826
import org.eclipse.jgit.diff.RawText
29-
import org.eclipse.jgit.lib.ObjectStream
27+
import org.eclipse.jgit.lib.ObjectLoader
3028

31-
import scala.util.Try
32-
import scalax.io.managed.InputStreamResource
29+
import java.nio.ByteBuffer
30+
import java.nio.charset.Charset
31+
import java.nio.charset.CodingErrorAction._
32+
import scala.util.{Try, Using}
3333

3434

3535
trait BlobCharsetDetector {
3636
// should return None if this is a binary file that can not be converted to text
37-
def charsetFor(entry: TreeBlobEntry, streamResource: InputStreamResource[ObjectStream]): Option[Charset]
37+
def charsetFor(entry: TreeBlobEntry, objectLoader: ObjectLoader): Option[Charset]
3838
}
3939

4040

4141
object QuickBlobCharsetDetector extends BlobCharsetDetector {
4242

43-
val CharSets = Seq(Charset.forName("UTF-8"), Charset.defaultCharset(), Charset.forName("ISO-8859-1")).distinct
43+
val CharSets: Seq[Charset] =
44+
Seq(Charset.forName("UTF-8"), Charset.defaultCharset(), Charset.forName("ISO-8859-1")).distinct
4445

45-
def charsetFor(entry: TreeBlobEntry, streamResource: InputStreamResource[ObjectStream]): Option[Charset] =
46-
Some(streamResource.bytes.take(8000).toArray).filterNot(RawText.isBinary).flatMap {
46+
def charsetFor(entry: TreeBlobEntry, objectLoader: ObjectLoader): Option[Charset] = {
47+
Using(ByteStreams.limit(objectLoader.openStream(), 8000))(toByteArray).toOption.filterNot(RawText.isBinary).flatMap {
4748
sampleBytes =>
4849
val b = ByteBuffer.wrap(sampleBytes)
4950
CharSets.find(cs => Try(decode(b, cs)).isSuccess)
5051
}
52+
}
5153

52-
private def decode(b: ByteBuffer, charset: Charset) {
54+
private def decode(b: ByteBuffer, charset: Charset): Unit = {
5355
charset.newDecoder.onMalformedInput(REPORT).onUnmappableCharacter(REPORT).decode(b)
5456
}
5557
}

bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/BlobTextModifier.scala

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,14 @@
2020

2121
package com.madgag.git.bfg.cleaner
2222

23-
import java.io.ByteArrayOutputStream
24-
2523
import com.madgag.git.ThreadLocalObjectDatabaseResources
2624
import com.madgag.git.bfg.model.TreeBlobEntry
25+
import com.madgag.linesplitting.LineBreakPreservingIterator
2726
import org.eclipse.jgit.lib.Constants.OBJ_BLOB
27+
import org.eclipse.jgit.lib.ObjectLoader
2828

29-
import scalax.io.Resource
29+
import java.io.{ByteArrayOutputStream, InputStreamReader}
30+
import java.nio.charset.Charset
3031

3132

3233
object BlobTextModifier {
@@ -50,17 +51,13 @@ trait BlobTextModifier extends TreeBlobModifier {
5051
def filterTextIn(e: TreeBlobEntry, lineCleaner: String => String): TreeBlobEntry = {
5152
def isDirty(line: String) = lineCleaner(line) != line
5253

54+
val loader = threadLocalObjectDBResources.reader().open(e.objectId)
5355
val opt = for {
54-
loader <- Some(threadLocalObjectDBResources.reader().open(e.objectId))
55-
if loader.getSize < sizeThreshold
56-
streamResource <- Some(Resource.fromInputStream(loader.openStream()))
57-
charset <- charsetDetector.charsetFor(e, streamResource)
58-
reader <- Some(streamResource.reader(charset))
59-
lines = reader.lines(includeTerminator = true)
60-
if lines.exists(isDirty)
56+
charset <- charsetDetector.charsetFor(e, loader)
57+
if loader.getSize < sizeThreshold && linesFor(loader, charset).exists(isDirty)
6158
} yield {
6259
val b = new ByteArrayOutputStream(loader.getSize.toInt)
63-
lines.view.map(lineCleaner).foreach(line => b.write(line.getBytes(charset)))
60+
linesFor(loader, charset).map(lineCleaner).foreach(line => b.write(line.getBytes(charset)))
6461
val oid = threadLocalObjectDBResources.inserter().insert(OBJ_BLOB, b.toByteArray)
6562
e.copy(objectId = oid)
6663
}
@@ -73,4 +70,8 @@ trait BlobTextModifier extends TreeBlobModifier {
7370
case None => entry.withoutName
7471
}
7572
}
73+
74+
private def linesFor(loader: ObjectLoader, charset: Charset): Iterator[String] = {
75+
new LineBreakPreservingIterator(new InputStreamReader(loader.openStream(), charset))
76+
}
7677
}

0 commit comments

Comments
 (0)