From b8b0f8758bd8425e5a0bcd5cc920f0e21a46c480 Mon Sep 17 00:00:00 2001 From: Tomas Janousek Date: Fri, 3 Jul 2015 23:10:50 +0200 Subject: [PATCH 1/3] Speed up line/column in OffsetPosition Building the index again and again for every OffsetPosition instance makes very little sense. So this makes it build it only once for a given source. I'm not sure if this is the best way to implement it and I'm afraid that a global synchronized map may be a performance bottleneck once the number of processors goes into the hundreds, but I know very little Scala/Java to do this properly. :-( Fixes http://stackoverflow.com/questions/14707127/accessing-position-information-in-a-scala-combinatorparser-kills-performance --- .../util/parsing/input/OffsetPosition.scala | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/main/scala/scala/util/parsing/input/OffsetPosition.scala b/src/main/scala/scala/util/parsing/input/OffsetPosition.scala index 23f79c74..08527375 100644 --- a/src/main/scala/scala/util/parsing/input/OffsetPosition.scala +++ b/src/main/scala/scala/util/parsing/input/OffsetPosition.scala @@ -23,6 +23,16 @@ case class OffsetPosition(source: java.lang.CharSequence, offset: Int) extends P /** An index that contains all line starts, including first line, and eof. */ private lazy val index: Array[Int] = { + Option(OffsetPosition.indexCache.get(source)) match { + case Some(index) => index + case None => + val index = genIndex + OffsetPosition.indexCache.put(source, index) + index + } + } + + private def genIndex: Array[Int] = { val lineStarts = new ArrayBuffer[Int] lineStarts += 0 for (i <- 0 until source.length) @@ -71,3 +81,12 @@ case class OffsetPosition(source: java.lang.CharSequence, offset: Int) extends P this.line == that.line && this.column < that.column } } + +/** An object holding the index cache. + * + * @author Tomáš Janoušek + */ +object OffsetPosition { + private lazy val indexCache = java.util.Collections.synchronizedMap( + new java.util.WeakHashMap[java.lang.CharSequence, Array[Int]]) +} From b6ee1f6a115cbe63d7a1fb49ba194a66813aa3be Mon Sep 17 00:00:00 2001 From: Tomas Janousek Date: Thu, 6 Aug 2015 15:41:26 +0200 Subject: [PATCH 2/3] Use a thread local WeakHashMap instead of synchronized This should scale much better. --- .../scala/util/parsing/input/OffsetPosition.scala | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/main/scala/scala/util/parsing/input/OffsetPosition.scala b/src/main/scala/scala/util/parsing/input/OffsetPosition.scala index 08527375..1449dbd4 100644 --- a/src/main/scala/scala/util/parsing/input/OffsetPosition.scala +++ b/src/main/scala/scala/util/parsing/input/OffsetPosition.scala @@ -10,6 +10,8 @@ package scala package util.parsing.input import scala.collection.mutable.ArrayBuffer +import java.lang.{CharSequence, ThreadLocal} +import java.util.WeakHashMap /** `OffsetPosition` is a standard class for positions * represented as offsets into a source ``document''. @@ -19,7 +21,7 @@ import scala.collection.mutable.ArrayBuffer * * @author Martin Odersky */ -case class OffsetPosition(source: java.lang.CharSequence, offset: Int) extends Position { +case class OffsetPosition(source: CharSequence, offset: Int) extends Position { /** An index that contains all line starts, including first line, and eof. */ private lazy val index: Array[Int] = { @@ -87,6 +89,11 @@ case class OffsetPosition(source: java.lang.CharSequence, offset: Int) extends P * @author Tomáš Janoušek */ object OffsetPosition { - private lazy val indexCache = java.util.Collections.synchronizedMap( - new java.util.WeakHashMap[java.lang.CharSequence, Array[Int]]) + private lazy val indexCacheTL = + // not DynamicVariable as that would share the map from parent to child :-( + new ThreadLocal[java.util.Map[CharSequence, Array[Int]]] { + override def initialValue = new WeakHashMap[CharSequence, Array[Int]] + } + + private def indexCache = indexCacheTL.get } From ab6e080efbb8218fb73b094478e194528b10e1fd Mon Sep 17 00:00:00 2001 From: Tomas Janousek Date: Fri, 11 Sep 2015 22:40:52 +0200 Subject: [PATCH 3/3] Fix binary incompatibilities in object OffsetPosition --- src/main/scala/scala/util/parsing/input/OffsetPosition.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/scala/util/parsing/input/OffsetPosition.scala b/src/main/scala/scala/util/parsing/input/OffsetPosition.scala index 1449dbd4..23fd2c8e 100644 --- a/src/main/scala/scala/util/parsing/input/OffsetPosition.scala +++ b/src/main/scala/scala/util/parsing/input/OffsetPosition.scala @@ -88,7 +88,7 @@ case class OffsetPosition(source: CharSequence, offset: Int) extends Position { * * @author Tomáš Janoušek */ -object OffsetPosition { +object OffsetPosition extends scala.runtime.AbstractFunction2[CharSequence,Int,OffsetPosition] { private lazy val indexCacheTL = // not DynamicVariable as that would share the map from parent to child :-( new ThreadLocal[java.util.Map[CharSequence, Array[Int]]] {