diff options
author | Antoine Gourlay <antoine@gourlay.fr> | 2014-04-28 19:21:00 +0200 |
---|---|---|
committer | Antoine Gourlay <antoine@gourlay.fr> | 2014-08-12 16:18:07 +0200 |
commit | fceae7084cc196e37bb8cf9efa96b994c0d73738 (patch) | |
tree | e03a7099e10e250a4208e2d132a22550114cd003 /src | |
parent | 300db2a1e3eefc2a6ed379c870bc7da42a26e69a (diff) | |
download | scala-fceae7084cc196e37bb8cf9efa96b994c0d73738.tar.gz scala-fceae7084cc196e37bb8cf9efa96b994c0d73738.tar.bz2 scala-fceae7084cc196e37bb8cf9efa96b994c0d73738.zip |
[backport] SI-7710 fix memory performance of RegexParsers in jdk7u6+
Backport of scala/scala-parser-combinators@91584dc.
---
Starting with 1.7.0_06 [1], String.substring no longer reuses the internal
char array of the String but make a copy instead. Since we call
subSequence twice for *every* input character, this results in horrible
parse performance and GC.
With the benchmark from the (duplicate) ticket SI-8542, I get:
BEFORE:
parseAll(new StringReader(String))
For 100 items: 49 ms
For 500 items: 97 ms
For 1000 items: 155 ms
For 5000 items: 113 ms
For 10000 items: 188 ms
For 50000 items: 1437 ms
===
parseAll(String)
For 100 items: 4 ms
For 500 items: 67 ms
For 1000 items: 372 ms
For 5000 items: 5693 ms
For 10000 items: 23126 ms
For 50000 items: 657665 ms
AFTER:
parseAll(new StringReader(String))
For 100 items: 43 ms
For 500 items: 118 ms
For 1000 items: 217 ms
For 5000 items: 192 ms
For 10000 items: 196 ms
For 50000 items: 1424 ms
===
parseAll(String)
For 100 items: 2 ms
For 500 items: 8 ms
For 1000 items: 16 ms
For 5000 items: 79 ms
For 10000 items: 161 ms
For 50000 items: 636 ms
[1] http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6924259
Diffstat (limited to 'src')
-rw-r--r-- | src/library/scala/util/parsing/combinator/RegexParsers.scala | 4 | ||||
-rw-r--r-- | src/library/scala/util/parsing/combinator/SubSequence.scala | 32 |
2 files changed, 34 insertions, 2 deletions
diff --git a/src/library/scala/util/parsing/combinator/RegexParsers.scala b/src/library/scala/util/parsing/combinator/RegexParsers.scala index d17d0cac8d..5f2c07c2ef 100644 --- a/src/library/scala/util/parsing/combinator/RegexParsers.scala +++ b/src/library/scala/util/parsing/combinator/RegexParsers.scala @@ -72,7 +72,7 @@ trait RegexParsers extends Parsers { */ protected def handleWhiteSpace(source: java.lang.CharSequence, offset: Int): Int = if (skipWhitespace) - (whiteSpace findPrefixMatchOf (source.subSequence(offset, source.length))) match { + (whiteSpace findPrefixMatchOf (new SubSequence(source, offset))) match { case Some(matched) => offset + matched.end case None => offset } @@ -106,7 +106,7 @@ trait RegexParsers extends Parsers { val source = in.source val offset = in.offset val start = handleWhiteSpace(source, offset) - (r findPrefixMatchOf (source.subSequence(start, source.length))) match { + (r findPrefixMatchOf (new SubSequence(source, start))) match { case Some(matched) => Success(source.subSequence(start, start + matched.end).toString, in.drop(start + matched.end - offset)) diff --git a/src/library/scala/util/parsing/combinator/SubSequence.scala b/src/library/scala/util/parsing/combinator/SubSequence.scala new file mode 100644 index 0000000000..79c8acac0f --- /dev/null +++ b/src/library/scala/util/parsing/combinator/SubSequence.scala @@ -0,0 +1,32 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package util.parsing.combinator + +// A shallow wrapper over another CharSequence (usually a String) +// +// See SI-7710: in jdk7u6 String.subSequence stopped sharing the char array of the original +// string and began copying it. +// RegexParsers calls subSequence twice per input character: that's a lot of array copying! +private[combinator] class SubSequence(s: CharSequence, start: Int, val length: Int) extends CharSequence { + def this(s: CharSequence, start: Int) = this(s, start, s.length - start) + + def charAt(i: Int) = + if (i >= 0 && i < length) s.charAt(start + i) else throw new IndexOutOfBoundsException(s"index: $i, length: $length") + + def subSequence(_start: Int, _end: Int) = { + if (_start < 0 || _end < 0 || _end > length || _start > _end) + throw new IndexOutOfBoundsException(s"start: ${_start}, end: ${_end}, length: $length") + + new SubSequence(s, start + _start, _end - _start) + } + + override def toString = s.subSequence(start, start + length).toString +} |