From 44971d104f4364a0ddaa4f05afc4cc61ee39cdf7 Mon Sep 17 00:00:00 2001 From: Sébastien Doeraene Date: Wed, 14 Sep 2016 17:06:53 +0200 Subject: Rewrite TraversableLike.stringPrefix not to blow up code size in Scala.js. The commit 30876fe2dd8cbe657a6cad6b11bbc34f10c29b36 changed `TraversableLike.stringPrefix` to report nicer results for inner classes and method-local classes. The changes included calls to `String.split()`, `Character.isDigit()` and `Character.isUpperCase()`. This was particularly bad for Scala.js, because those methods bring with them huge parts of the JDK (the `java.util.regex.*` implementation on the one hand, and the Unicode database on the other hand), which increased generated code size by 6 KB after minimification and gzip for an application that does not otherwise use those methods. This sudden increase is tracked in the Scala.js bug tracker at https://github.com/scala-js/scala-js/issues/2591. This commit rewrites `TraversableLike.stringPrefix` in a very imperative way, without resorting to those methods. The behavior is (mostly) preserved. There can be different results when `getClass().getName()` contains non-ASCII lowercase letters and/or digits. Those will now be recognized as user-defined instead of likely compiler-synthesized (which is a progression). There still are false positives for ASCII lowercase letters, which cause the `stringPrefix` to be empty (as before). Since the new implementation is imperative anyway, at least I made it not allocate anything but the result `String` in the common case where the result does not contain any `.`. --- src/library/scala/collection/TraversableLike.scala | 71 ++++++++++++++++++---- .../scala/collection/TraversableLikeTest.scala | 46 ++++++++++++-- 2 files changed, 100 insertions(+), 17 deletions(-) diff --git a/src/library/scala/collection/TraversableLike.scala b/src/library/scala/collection/TraversableLike.scala index be2f427ea4..c9482fe0a2 100644 --- a/src/library/scala/collection/TraversableLike.scala +++ b/src/library/scala/collection/TraversableLike.scala @@ -605,22 +605,69 @@ trait TraversableLike[+A, +Repr] extends Any * applied to this $coll. By default the string prefix is the * simple name of the collection class $coll. */ - def stringPrefix : String = { + def stringPrefix: String = { + /* This method is written in a style that avoids calling `String.split()` + * as well as methods of java.lang.Character that require the Unicode + * database information. This is mostly important for Scala.js, so that + * using the collection library does automatically bring java.util.regex.* + * and the Unicode database in the generated code. + * + * This algorithm has the additional benefit that it won't allocate + * anything except the result String in the common case, where the class + * is not an inner class (i.e., when the result contains no '.'). + */ val fqn = repr.getClass.getName - val cls = { - val idx1 = fqn.lastIndexOf('.' : Int) - if (idx1 != -1) fqn.substring(idx1 + 1) else fqn + var pos: Int = fqn.length - 1 + + // Skip trailing $'s + while (pos != -1 && fqn.charAt(pos) == '$') { + pos -= 1 + } + if (pos == -1 || fqn.charAt(pos) == '.') { + return "" } - val parts = cls.split('$') - val last = parts.length - 1 - parts.zipWithIndex.foldLeft("") { case (z, (s, i)) => - if (s.isEmpty) z - else if (i != last && s.forall(java.lang.Character.isDigit)) "" // drop prefix in method-local classes - else if (i == 0 || java.lang.Character.isUpperCase(s.charAt(0))) { - if (z.isEmpty) s else z + '.' + s + + var result: String = "" + while (true) { + // Invariant: if we enter the loop, there is a non-empty part + + // Look for the beginning of the part, remembering where was the last non-digit + val partEnd = pos + 1 + while (pos != -1 && fqn.charAt(pos) <= '9' && fqn.charAt(pos) >= '0') { + pos -= 1 + } + val lastNonDigit = pos + while (pos != -1 && fqn.charAt(pos) != '$' && fqn.charAt(pos) != '.') { + pos -= 1 + } + val partStart = pos + 1 + + // A non-last part which contains only digits marks a method-local part -> drop the prefix + if (pos == lastNonDigit && partEnd != fqn.length) { + return result + } + + // Skip to the next part, and determine whether we are the end + while (pos != -1 && fqn.charAt(pos) == '$') { + pos -= 1 + } + val atEnd = pos == -1 || fqn.charAt(pos) == '.' + + // Handle the actual content of the part (we ignore parts that are likely synthetic) + def isPartLikelySynthetic = { + val firstChar = fqn.charAt(partStart) + (firstChar > 'Z' && firstChar < 0x7f) || (firstChar < 'A') + } + if (atEnd || !isPartLikelySynthetic) { + val part = fqn.substring(partStart, partEnd) + result = if (result.isEmpty) part else part + '.' + result + if (atEnd) + return result } - else z } + + // dead code + result } /** Creates a non-strict view of this $coll. diff --git a/test/junit/scala/collection/TraversableLikeTest.scala b/test/junit/scala/collection/TraversableLikeTest.scala index 8588956016..f703abf3e4 100644 --- a/test/junit/scala/collection/TraversableLikeTest.scala +++ b/test/junit/scala/collection/TraversableLikeTest.scala @@ -5,29 +5,65 @@ import org.junit.Test import org.junit.runner.RunWith import org.junit.runners.JUnit4 +object TraversableLikeTest { + abstract class FakeIndexedSeq[A] extends IndexedSeq[A] { + def apply(i: Int): A = ??? + def length: Int = 0 + } +} + @RunWith(classOf[JUnit4]) class TraversableLikeTest { + import TraversableLikeTest._ + // For test_SI9019; out here because as of test writing, putting this in a method would crash compiler class Baz[@specialized(Int) A]() extends IndexedSeq[A] { def apply(i: Int) = ??? def length: Int = 0 } - + @Test def test_SI9019 { object Foo { def mkBar = () => { - class Bar extends IndexedSeq[Int] { - def apply(i: Int) = ??? - def length: Int = 0 - } + class Bar extends FakeIndexedSeq[Int] new Bar } + + def mkFalsePositiveToSyntheticTest = () => { + /* A class whose name tarts with an ASCII lowercase letter. + * It will be a false positive to the synthetic-part test. + */ + class falsePositive extends FakeIndexedSeq[Int] + new falsePositive + } + + def mkFrench = () => { + // For non-French speakers, this means "strange class name" + class ÉtrangeNomDeClasse extends FakeIndexedSeq[Int] + new ÉtrangeNomDeClasse + } + + def mkFrenchLowercase = () => { + class étrangeNomDeClasseMinuscules extends FakeIndexedSeq[Int] + new étrangeNomDeClasseMinuscules + } } + val bar = Foo.mkBar() assertEquals("Bar", bar.stringPrefix) // Previously would have been outermost class, TraversableLikeTest val baz = new Baz[Int]() assertEquals("TraversableLikeTest.Baz", baz.stringPrefix) // Make sure we don't see specialization $mcI$sp stuff + + // The false positive unfortunately produces an empty stringPrefix + val falsePositive = Foo.mkFalsePositiveToSyntheticTest() + assertEquals("", falsePositive.stringPrefix) + + val french = Foo.mkFrench() + assertEquals("ÉtrangeNomDeClasse", french.stringPrefix) + + val frenchLowercase = Foo.mkFrenchLowercase() + assertEquals("étrangeNomDeClasseMinuscules", frenchLowercase.stringPrefix) } } -- cgit v1.2.3