summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Phillips <paulp@improving.org>2011-12-12 06:40:18 -0800
committerPaul Phillips <paulp@improving.org>2011-12-12 13:12:28 -0800
commit4cfc633fc6cb2ab0f473c2e5141724017d444dc6 (patch)
tree2c2467b923f369aa61baa5550bcdb2894b51bc3c
parentd1e3b46f5bf58469bffb6f8e2ffebd932b990a5d (diff)
downloadscala-4cfc633fc6cb2ab0f473c2e5141724017d444dc6.tar.gz
scala-4cfc633fc6cb2ab0f473c2e5141724017d444dc6.tar.bz2
scala-4cfc633fc6cb2ab0f473c2e5141724017d444dc6.zip
Range.foreach optimization.
This makes code like 0 to 100 foreach (x += _) as fast as (often faster than, in fact) a while loop. See the comment in Range for the gory details. More investigation should be done regarding total impact on inlining behavior. Review by @odersky.
-rw-r--r--src/library/scala/collection/immutable/Range.scala89
-rw-r--r--test/benchmarks/src/scala/collection/immutable/range-bench.scala61
2 files changed, 138 insertions, 12 deletions
diff --git a/src/library/scala/collection/immutable/Range.scala b/src/library/scala/collection/immutable/Range.scala
index e891f8bec8..16d7e68dee 100644
--- a/src/library/scala/collection/immutable/Range.scala
+++ b/src/library/scala/collection/immutable/Range.scala
@@ -71,18 +71,6 @@ extends collection.AbstractSeq[Int]
def isInclusive = false
- @inline final override def foreach[@specialized(Unit) U](f: Int => U) {
- if (length > 0) {
- val last = this.last
- var i = start
- while (i != last) {
- f(i)
- i += step
- }
- f(i)
- }
- }
-
override def length: Int = numRangeElements
override lazy val last: Int =
if (length == 0) Nil.last
@@ -95,6 +83,83 @@ extends collection.AbstractSeq[Int]
if (idx < 0 || idx >= length) throw new IndexOutOfBoundsException(idx.toString)
locationAfterN(idx)
}
+
+ /** @note Making foreach run as fast as a while loop is a challenge.
+ * The key elements which I can observe making a difference are:
+ *
+ * - the inner loop should be as small as possible
+ * - the inner loop should be monomorphic
+ * - the inner loop should perform no boxing and no avoidable tests
+ *
+ * This is achieved by:
+ *
+ * - keeping initialization logic out of the inner loop
+ * - dispatching to custom variations based on initial conditions
+ * - tricking the compiler into always calling Function1#apply$mcVI$sp
+ *
+ * The last one is important and less than obvious. Even when foreach
+ * was specialized on Unit, only Int => Unit arguments benefited from it.
+ * Other function types would be accepted, but in the absence of full
+ * specialization the integer argument was boxed on every call. For example:
+ *
+ class A {
+ final def f(x: Int): Int = x + 1
+ // Calls Range.foreach, which calls Function1.apply
+ def g1 = 1 until 100 foreach { x => f(x) }
+ // Calls Range.foreach$mVc$sp, which calls Function1.apply$mcVI$sp
+ def g2 = 1 until 100 foreach { x => f(x) ; () }
+ }
+ *
+ * However! Since the result of the closure is always discarded, we
+ * simply cast it to Int => Unit, thereby executing the fast version.
+ * The seemingly looming ClassCastException can never arrive.
+ */
+ @inline final override def foreach[U](f: Int => U) {
+ if (step < 0) {
+ if (isInclusive) foreachDownIn(f.asInstanceOf[Int => Unit])
+ else foreachDownEx(f.asInstanceOf[Int => Unit])
+ }
+ else {
+ if (isInclusive) foreachUpIn(f.asInstanceOf[Int => Unit])
+ else foreachUpEx(f.asInstanceOf[Int => Unit])
+ }
+ }
+
+ /** !!! These methods must be public or they will not be inlined.
+ * But they are certainly not intended to be part of the API.
+ * This collision between inlining requirements and access semantics
+ * is highly unfortunate and must be resolved.
+ *
+ * Proposed band-aid: an @internal annotation.
+ */
+ @inline final def foreachDownIn(f: Int => Unit) {
+ var i = start
+ while (i >= end) {
+ f(i)
+ i += step
+ }
+ }
+ @inline final def foreachUpIn(f: Int => Unit) {
+ var i = start
+ while (i <= end) {
+ f(i)
+ i += step
+ }
+ }
+ @inline final def foreachDownEx(f: Int => Unit) {
+ var i = start
+ while (i > end) {
+ f(i)
+ i += step
+ }
+ }
+ @inline final def foreachUpEx(f: Int => Unit) {
+ var i = start
+ while (i < end) {
+ f(i)
+ i += step
+ }
+ }
/** Creates a new range containing the first `n` elements of this range.
*
diff --git a/test/benchmarks/src/scala/collection/immutable/range-bench.scala b/test/benchmarks/src/scala/collection/immutable/range-bench.scala
new file mode 100644
index 0000000000..e167ff04e8
--- /dev/null
+++ b/test/benchmarks/src/scala/collection/immutable/range-bench.scala
@@ -0,0 +1,61 @@
+package scala.collection.immutable
+package benchmarks
+
+object RangeTest {
+ // not inlined any more, needs investigation
+ //
+ // class XXS {
+ // private val array = Array.range(0, 100)
+ // def tst = { var sum = 0; for (i <- 0 until array.length) sum += array(i); sum }
+ // }
+
+ var x: Int = 0
+
+ def foreachSum(max: Int): Int = {
+ var sum = 0
+ 1 to max foreach (sum += _)
+ sum
+ }
+ def whileSum(max: Int) = {
+ var sum = 0
+ var num = 1
+ while (num <= max) {
+ sum += num
+ num += 1
+ }
+ sum
+ }
+
+ def show(max: Int, foreachNanos: Long, whileNanos: Long) {
+ val winner = if (foreachNanos < whileNanos) "foreachSum" else "whileSum"
+ val ratio = if (foreachNanos < whileNanos) foreachNanos.toDouble / whileNanos else whileNanos.toDouble / foreachNanos
+ println("1 to %d:, %12s wins, %.3f: foreach %.3f while %.3f".format(
+ max, winner, ratio,
+ foreachNanos.toDouble / 1000000L,
+ whileNanos.toDouble / 1000000L)
+ )
+ }
+
+ def run(max: Int) = {
+ val foreachFirst = util.Random.nextBoolean
+ val t1 = System.nanoTime
+ x = if (foreachFirst) foreachSum(max) else whileSum(max)
+ val t2 = System.nanoTime
+ x = if (foreachFirst) whileSum(max) else foreachSum(max)
+ val t3 = System.nanoTime
+
+ val foreachNanos = if (foreachFirst) t2 - t1 else t3 - t2
+ val whileNanos = if (foreachFirst) t3 - t2 else t2 - t1
+ show(max, foreachNanos, whileNanos)
+ }
+
+ def main(args: Array[String]): Unit = {
+ var max = if (args.isEmpty) 100 else args(0).toInt
+ while (max > 0) {
+ run(max)
+ run(max)
+ run(max)
+ max += (max / 7)
+ }
+ }
+}