From 91584dc4df968869b72530abf8b56f7a281283b6 Mon Sep 17 00:00:00 2001
From: Antoine Gourlay <antoine@gourlay.fr>
Date: Mon, 28 Apr 2014 19:21:00 +0200
Subject: [PATCH] SI-7710 fix memory performance of RegexParsers in jdk7u6+

Starting with 1.7.0_06 [1], String.substring no longer reuses the internal
char array of the String but make a copy instead. Since we call
subSequence twice for *every* input character, this results in horrible
parse performance and GC.

With the benchmark from the (duplicate) ticket SI-8542, I get:

BEFORE:
    parseAll(new StringReader(String))
    For 100 items: 49 ms
    For 500 items: 97 ms
    For 1000 items: 155 ms
    For 5000 items: 113 ms
    For 10000 items: 188 ms
    For 50000 items: 1437 ms
    ===
    parseAll(String)
    For 100 items: 4 ms
    For 500 items: 67 ms
    For 1000 items: 372 ms
    For 5000 items: 5693 ms
    For 10000 items: 23126 ms
    For 50000 items: 657665 ms

AFTER:
    parseAll(new StringReader(String))
    For 100 items: 43 ms
    For 500 items: 118 ms
    For 1000 items: 217 ms
    For 5000 items: 192 ms
    For 10000 items: 196 ms
    For 50000 items: 1424 ms
    ===
    parseAll(String)
    For 100 items: 2 ms
    For 500 items: 8 ms
    For 1000 items: 16 ms
    For 5000 items: 79 ms
    For 10000 items: 161 ms
    For 50000 items: 636 ms

[1] http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6924259
---
 .../parsing/combinator/RegexParsers.scala     |  4 +--
 .../util/parsing/combinator/SubSequence.scala | 32 +++++++++++++++++++
 2 files changed, 34 insertions(+), 2 deletions(-)
 create mode 100644 src/main/scala/scala/util/parsing/combinator/SubSequence.scala

diff --git a/src/main/scala/scala/util/parsing/combinator/RegexParsers.scala b/src/main/scala/scala/util/parsing/combinator/RegexParsers.scala
index 8ebbc573..f345fae0 100644
--- a/src/main/scala/scala/util/parsing/combinator/RegexParsers.scala
+++ b/src/main/scala/scala/util/parsing/combinator/RegexParsers.scala
@@ -73,7 +73,7 @@ trait RegexParsers extends Parsers {
    */
   protected def handleWhiteSpace(source: java.lang.CharSequence, offset: Int): Int =
     if (skipWhitespace)
-      (whiteSpace findPrefixMatchOf (source.subSequence(offset, source.length))) match {
+      (whiteSpace findPrefixMatchOf (new SubSequence(source, offset))) match {
         case Some(matched) => offset + matched.end
         case None => offset
       }
@@ -107,7 +107,7 @@ trait RegexParsers extends Parsers {
       val source = in.source
       val offset = in.offset
       val start = handleWhiteSpace(source, offset)
-      (r findPrefixMatchOf (source.subSequence(start, source.length))) match {
+      (r findPrefixMatchOf (new SubSequence(source, start))) match {
         case Some(matched) =>
           Success(source.subSequence(start, start + matched.end).toString,
                   in.drop(start + matched.end - offset))
diff --git a/src/main/scala/scala/util/parsing/combinator/SubSequence.scala b/src/main/scala/scala/util/parsing/combinator/SubSequence.scala
new file mode 100644
index 00000000..79c8acac
--- /dev/null
+++ b/src/main/scala/scala/util/parsing/combinator/SubSequence.scala
@@ -0,0 +1,32 @@
+/*                     __                                               *\
+**     ________ ___   / /  ___     Scala API                            **
+**    / __/ __// _ | / /  / _ |    (c) 2006-2013, LAMP/EPFL             **
+**  __\ \/ /__/ __ |/ /__/ __ |    http://scala-lang.org/               **
+** /____/\___/_/ |_/____/_/ | |                                         **
+**                          |/                                          **
+\*                                                                      */
+
+
+package scala
+package util.parsing.combinator
+
+// A shallow wrapper over another CharSequence (usually a String)
+//
+// See SI-7710: in jdk7u6 String.subSequence stopped sharing the char array of the original
+// string and began copying it.
+// RegexParsers calls subSequence twice per input character: that's a lot of array copying!
+private[combinator] class SubSequence(s: CharSequence, start: Int, val length: Int) extends CharSequence {
+  def this(s: CharSequence, start: Int) = this(s, start, s.length - start)
+
+  def charAt(i: Int) =
+    if (i >= 0 && i < length) s.charAt(start + i) else throw new IndexOutOfBoundsException(s"index: $i, length: $length")
+
+  def subSequence(_start: Int, _end: Int) = {
+    if (_start < 0 || _end < 0 || _end > length || _start > _end)
+      throw new IndexOutOfBoundsException(s"start: ${_start}, end: ${_end}, length: $length")
+
+    new SubSequence(s, start + _start, _end - _start)
+  }
+
+  override def toString = s.subSequence(start, start + length).toString
+}