Range.foreach optimization.

This makes code like 0 to 100 foreach (x += _) as fast as (often faster than, in fact) a while loop. See the comment in Range for the gory details. More investigation should be done regarding total impact on inlining behavior. Review by @odersky.
author: Paul Phillips <paulp@improving.org> 2011-12-12 06:40:18 -0800
committer: Paul Phillips <paulp@improving.org> 2011-12-12 13:12:28 -0800
commit: 4cfc633fc6cb2ab0f473c2e5141724017d444dc6 (patch)
tree: 2c2467b923f369aa61baa5550bcdb2894b51bc3c
parent: d1e3b46f5bf58469bffb6f8e2ffebd932b990a5d (diff)
download: scala-4cfc633fc6cb2ab0f473c2e5141724017d444dc6.tar.gz
scala-4cfc633fc6cb2ab0f473c2e5141724017d444dc6.tar.bz2
scala-4cfc633fc6cb2ab0f473c2e5141724017d444dc6.zip
2 files changed, 138 insertions, 12 deletions
diff --git a/src/library/scala/collection/immutable/Range.scala b/src/library/scala/collection/immutable/Range.scala
index e891f8bec8..16d7e68dee 100644
--- a/src/library/scala/collection/immutable/Range.scala
+++ b/src/library/scala/collection/immutable/Range.scala
@@ -71,18 +71,6 @@ extends collection.AbstractSeq[Int]
 
   def isInclusive = false
 
-  @inline final override def foreach[@specialized(Unit) U](f: Int => U) {
-    if (length > 0) {
-      val last = this.last
-      var i = start
-      while (i != last) {
-        f(i)
-        i += step
-      }
-      f(i)
-    }
-  }
-
   override def length: Int = numRangeElements
   override lazy val last: Int =
     if (length == 0) Nil.last
@@ -95,6 +83,83 @@ extends collection.AbstractSeq[Int]
     if (idx < 0 || idx >= length) throw new IndexOutOfBoundsException(idx.toString)
     locationAfterN(idx)
   }
+  
+  /** @note Making foreach run as fast as a while loop is a challenge.
+   *  The key elements which I can observe making a difference are:
+   *
+   *   - the inner loop should be as small as possible
+   *   - the inner loop should be monomorphic
+   *   - the inner loop should perform no boxing and no avoidable tests
+   *
+   *  This is achieved by:
+   *
+   *   - keeping initialization logic out of the inner loop
+   *   - dispatching to custom variations based on initial conditions
+   *   - tricking the compiler into always calling Function1#apply$mcVI$sp
+   *
+   *  The last one is important and less than obvious.  Even when foreach
+   *  was specialized on Unit, only Int => Unit arguments benefited from it.
+   *  Other function types would be accepted, but in the absence of full
+   *  specialization the integer argument was boxed on every call.  For example:
+   *  
+       class A {
+         final def f(x: Int): Int = x + 1
+         // Calls Range.foreach, which calls Function1.apply
+         def g1 = 1 until 100 foreach { x => f(x) }
+         // Calls Range.foreach$mVc$sp, which calls Function1.apply$mcVI$sp
+         def g2 = 1 until 100 foreach { x => f(x) ; () }
+       }
+   *
+   *  However! Since the result of the closure is always discarded, we
+   *  simply cast it to Int => Unit, thereby executing the fast version.
+   *  The seemingly looming ClassCastException can never arrive.
+   */
+  @inline final override def foreach[U](f: Int => U) {
+    if (step < 0) {
+      if (isInclusive) foreachDownIn(f.asInstanceOf[Int => Unit])
+      else foreachDownEx(f.asInstanceOf[Int => Unit])
+    }
+    else {
+      if (isInclusive) foreachUpIn(f.asInstanceOf[Int => Unit])
+      else foreachUpEx(f.asInstanceOf[Int => Unit])
+    }
+  }
+
+  /** !!! These methods must be public or they will not be inlined.
+   *  But they are certainly not intended to be part of the API.
+   *  This collision between inlining requirements and access semantics
+   *  is highly unfortunate and must be resolved.
+   *
+   *  Proposed band-aid: an @internal annotation.
+   */
+  @inline final def foreachDownIn(f: Int => Unit) {
+    var i = start
+    while (i >= end) {
+      f(i)
+      i += step
+    }
+  }
+  @inline final def foreachUpIn(f: Int => Unit) {
+    var i = start
+    while (i <= end) {
+      f(i)
+      i += step
+    }
+  }
+  @inline final def foreachDownEx(f: Int => Unit) {
+    var i = start
+    while (i > end) {
+      f(i)
+      i += step
+    }
+  }
+  @inline final def foreachUpEx(f: Int => Unit) {
+    var i = start
+    while (i < end) {
+      f(i)
+      i += step
+    }
+  }
 
   /** Creates a new range containing the first `n` elements of this range.
    *
diff --git a/test/benchmarks/src/scala/collection/immutable/range-bench.scala b/test/benchmarks/src/scala/collection/immutable/range-bench.scala
new file mode 100644
index 0000000000..e167ff04e8
--- /dev/null
+++ b/test/benchmarks/src/scala/collection/immutable/range-bench.scala
@@ -0,0 +1,61 @@
+package scala.collection.immutable
+package benchmarks
+
+object RangeTest {
+  // not inlined any more, needs investigation
+  // 
+  // class XXS {
+  //   private val array = Array.range(0, 100)
+  //   def tst = { var sum = 0; for (i <- 0 until array.length) sum += array(i); sum }
+  // }
+  
+  var x: Int = 0
+  
+  def foreachSum(max: Int): Int = {
+    var sum = 0
+    1 to max foreach (sum += _)
+    sum
+  }
+  def whileSum(max: Int) = {
+    var sum = 0
+    var num = 1
+    while (num <= max) {
+      sum += num
+      num += 1
+    }
+    sum
+  }
+
+  def show(max: Int, foreachNanos: Long, whileNanos: Long) {
+    val winner = if (foreachNanos < whileNanos) "foreachSum" else "whileSum"
+    val ratio = if (foreachNanos < whileNanos) foreachNanos.toDouble / whileNanos else whileNanos.toDouble / foreachNanos
+    println("1 to %d:, %12s wins, %.3f:  foreach %.3f   while %.3f".format(
+      max, winner, ratio, 
+      foreachNanos.toDouble / 1000000L, 
+      whileNanos.toDouble / 1000000L)
+    )
+  }
+  
+  def run(max: Int) = {
+    val foreachFirst = util.Random.nextBoolean
+    val t1 = System.nanoTime
+    x = if (foreachFirst) foreachSum(max) else whileSum(max)
+    val t2 = System.nanoTime
+    x = if (foreachFirst) whileSum(max) else foreachSum(max)
+    val t3 = System.nanoTime
+    
+    val foreachNanos = if (foreachFirst) t2 - t1 else t3 - t2
+    val whileNanos = if (foreachFirst) t3 - t2 else t2 - t1
+    show(max, foreachNanos, whileNanos)
+  }
+
+  def main(args: Array[String]): Unit = {
+    var max = if (args.isEmpty) 100 else args(0).toInt
+    while (max > 0) {    
+      run(max)
+      run(max)
+      run(max)
+      max += (max / 7)
+    }
+  }
+}
author	Paul Phillips <paulp@improving.org>	2011-12-12 06:40:18 -0800
committer	Paul Phillips <paulp@improving.org>	2011-12-12 13:12:28 -0800
commit	4cfc633fc6cb2ab0f473c2e5141724017d444dc6 (patch)
tree	2c2467b923f369aa61baa5550bcdb2894b51bc3c
parent	d1e3b46f5bf58469bffb6f8e2ffebd932b990a5d (diff)
download	scala-4cfc633fc6cb2ab0f473c2e5141724017d444dc6.tar.gz scala-4cfc633fc6cb2ab0f473c2e5141724017d444dc6.tar.bz2 scala-4cfc633fc6cb2ab0f473c2e5141724017d444dc6.zip