Skip to content

Commit 91584dc

Browse files
committed
SI-7710 fix memory performance of RegexParsers in jdk7u6+
Starting with 1.7.0_06 [1], String.substring no longer reuses the internal char array of the String but make a copy instead. Since we call subSequence twice for *every* input character, this results in horrible parse performance and GC. With the benchmark from the (duplicate) ticket SI-8542, I get: BEFORE: parseAll(new StringReader(String)) For 100 items: 49 ms For 500 items: 97 ms For 1000 items: 155 ms For 5000 items: 113 ms For 10000 items: 188 ms For 50000 items: 1437 ms === parseAll(String) For 100 items: 4 ms For 500 items: 67 ms For 1000 items: 372 ms For 5000 items: 5693 ms For 10000 items: 23126 ms For 50000 items: 657665 ms AFTER: parseAll(new StringReader(String)) For 100 items: 43 ms For 500 items: 118 ms For 1000 items: 217 ms For 5000 items: 192 ms For 10000 items: 196 ms For 50000 items: 1424 ms === parseAll(String) For 100 items: 2 ms For 500 items: 8 ms For 1000 items: 16 ms For 5000 items: 79 ms For 10000 items: 161 ms For 50000 items: 636 ms [1] http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6924259
1 parent 01ef53c commit 91584dc

File tree

2 files changed

+34
-2
lines changed

2 files changed

+34
-2
lines changed

src/main/scala/scala/util/parsing/combinator/RegexParsers.scala

+2-2
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ trait RegexParsers extends Parsers {
7373
*/
7474
protected def handleWhiteSpace(source: java.lang.CharSequence, offset: Int): Int =
7575
if (skipWhitespace)
76-
(whiteSpace findPrefixMatchOf (source.subSequence(offset, source.length))) match {
76+
(whiteSpace findPrefixMatchOf (new SubSequence(source, offset))) match {
7777
case Some(matched) => offset + matched.end
7878
case None => offset
7979
}
@@ -107,7 +107,7 @@ trait RegexParsers extends Parsers {
107107
val source = in.source
108108
val offset = in.offset
109109
val start = handleWhiteSpace(source, offset)
110-
(r findPrefixMatchOf (source.subSequence(start, source.length))) match {
110+
(r findPrefixMatchOf (new SubSequence(source, start))) match {
111111
case Some(matched) =>
112112
Success(source.subSequence(start, start + matched.end).toString,
113113
in.drop(start + matched.end - offset))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/* __ *\
2+
** ________ ___ / / ___ Scala API **
3+
** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL **
4+
** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
5+
** /____/\___/_/ |_/____/_/ | | **
6+
** |/ **
7+
\* */
8+
9+
10+
package scala
11+
package util.parsing.combinator
12+
13+
// A shallow wrapper over another CharSequence (usually a String)
14+
//
15+
// See SI-7710: in jdk7u6 String.subSequence stopped sharing the char array of the original
16+
// string and began copying it.
17+
// RegexParsers calls subSequence twice per input character: that's a lot of array copying!
18+
private[combinator] class SubSequence(s: CharSequence, start: Int, val length: Int) extends CharSequence {
19+
def this(s: CharSequence, start: Int) = this(s, start, s.length - start)
20+
21+
def charAt(i: Int) =
22+
if (i >= 0 && i < length) s.charAt(start + i) else throw new IndexOutOfBoundsException(s"index: $i, length: $length")
23+
24+
def subSequence(_start: Int, _end: Int) = {
25+
if (_start < 0 || _end < 0 || _end > length || _start > _end)
26+
throw new IndexOutOfBoundsException(s"start: ${_start}, end: ${_end}, length: $length")
27+
28+
new SubSequence(s, start + _start, _end - _start)
29+
}
30+
31+
override def toString = s.subSequence(start, start + length).toString
32+
}

0 commit comments

Comments
 (0)