summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAntoine Gourlay <antoine@gourlay.fr>2014-04-28 19:21:00 +0200
committerAntoine Gourlay <antoine@gourlay.fr>2014-08-12 16:18:07 +0200
commitfceae7084cc196e37bb8cf9efa96b994c0d73738 (patch)
treee03a7099e10e250a4208e2d132a22550114cd003
parent300db2a1e3eefc2a6ed379c870bc7da42a26e69a (diff)
downloadscala-fceae7084cc196e37bb8cf9efa96b994c0d73738.tar.gz
scala-fceae7084cc196e37bb8cf9efa96b994c0d73738.tar.bz2
scala-fceae7084cc196e37bb8cf9efa96b994c0d73738.zip
[backport] SI-7710 fix memory performance of RegexParsers in jdk7u6+
Backport of scala/scala-parser-combinators@91584dc. --- Starting with 1.7.0_06 [1], String.substring no longer reuses the internal char array of the String but make a copy instead. Since we call subSequence twice for *every* input character, this results in horrible parse performance and GC. With the benchmark from the (duplicate) ticket SI-8542, I get: BEFORE: parseAll(new StringReader(String)) For 100 items: 49 ms For 500 items: 97 ms For 1000 items: 155 ms For 5000 items: 113 ms For 10000 items: 188 ms For 50000 items: 1437 ms === parseAll(String) For 100 items: 4 ms For 500 items: 67 ms For 1000 items: 372 ms For 5000 items: 5693 ms For 10000 items: 23126 ms For 50000 items: 657665 ms AFTER: parseAll(new StringReader(String)) For 100 items: 43 ms For 500 items: 118 ms For 1000 items: 217 ms For 5000 items: 192 ms For 10000 items: 196 ms For 50000 items: 1424 ms === parseAll(String) For 100 items: 2 ms For 500 items: 8 ms For 1000 items: 16 ms For 5000 items: 79 ms For 10000 items: 161 ms For 50000 items: 636 ms [1] http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6924259
-rw-r--r--bincompat-forward.whitelist.conf6
-rw-r--r--src/library/scala/util/parsing/combinator/RegexParsers.scala4
-rw-r--r--src/library/scala/util/parsing/combinator/SubSequence.scala32
3 files changed, 39 insertions, 3 deletions
diff --git a/bincompat-forward.whitelist.conf b/bincompat-forward.whitelist.conf
index 7879ecfd84..d8a91f3ae8 100644
--- a/bincompat-forward.whitelist.conf
+++ b/bincompat-forward.whitelist.conf
@@ -177,7 +177,11 @@ filter {
{
matchName="scala.reflect.runtime.JavaMirrors#JavaMirror.scala$reflect$runtime$JavaMirrors$JavaMirror$$followStatic"
problemName=MissingMethodProblem
+ },
+ {
+ # only accessible from util.parsing.combinator package
+ matchName="scala.util.parsing.combinator.SubSequence"
+ problemName=MissingClassProblem
}
-
]
}
diff --git a/src/library/scala/util/parsing/combinator/RegexParsers.scala b/src/library/scala/util/parsing/combinator/RegexParsers.scala
index d17d0cac8d..5f2c07c2ef 100644
--- a/src/library/scala/util/parsing/combinator/RegexParsers.scala
+++ b/src/library/scala/util/parsing/combinator/RegexParsers.scala
@@ -72,7 +72,7 @@ trait RegexParsers extends Parsers {
*/
protected def handleWhiteSpace(source: java.lang.CharSequence, offset: Int): Int =
if (skipWhitespace)
- (whiteSpace findPrefixMatchOf (source.subSequence(offset, source.length))) match {
+ (whiteSpace findPrefixMatchOf (new SubSequence(source, offset))) match {
case Some(matched) => offset + matched.end
case None => offset
}
@@ -106,7 +106,7 @@ trait RegexParsers extends Parsers {
val source = in.source
val offset = in.offset
val start = handleWhiteSpace(source, offset)
- (r findPrefixMatchOf (source.subSequence(start, source.length))) match {
+ (r findPrefixMatchOf (new SubSequence(source, start))) match {
case Some(matched) =>
Success(source.subSequence(start, start + matched.end).toString,
in.drop(start + matched.end - offset))
diff --git a/src/library/scala/util/parsing/combinator/SubSequence.scala b/src/library/scala/util/parsing/combinator/SubSequence.scala
new file mode 100644
index 0000000000..79c8acac0f
--- /dev/null
+++ b/src/library/scala/util/parsing/combinator/SubSequence.scala
@@ -0,0 +1,32 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+package scala
+package util.parsing.combinator
+
+// A shallow wrapper over another CharSequence (usually a String)
+//
+// See SI-7710: in jdk7u6 String.subSequence stopped sharing the char array of the original
+// string and began copying it.
+// RegexParsers calls subSequence twice per input character: that's a lot of array copying!
+private[combinator] class SubSequence(s: CharSequence, start: Int, val length: Int) extends CharSequence {
+ def this(s: CharSequence, start: Int) = this(s, start, s.length - start)
+
+ def charAt(i: Int) =
+ if (i >= 0 && i < length) s.charAt(start + i) else throw new IndexOutOfBoundsException(s"index: $i, length: $length")
+
+ def subSequence(_start: Int, _end: Int) = {
+ if (_start < 0 || _end < 0 || _end > length || _start > _end)
+ throw new IndexOutOfBoundsException(s"start: ${_start}, end: ${_end}, length: $length")
+
+ new SubSequence(s, start + _start, _end - _start)
+ }
+
+ override def toString = s.subSequence(start, start + length).toString
+}